@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "daily-compounding-reflection",
|
|
4
|
+
"displayName": "Daily Compounding Reflection",
|
|
5
|
+
"slug": "daily-compounding-reflection",
|
|
6
|
+
"title": "Daily Compounding Reflection",
|
|
7
|
+
"description": "Capture lessons from the day into memory, skills, and workflow improvements.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": [
|
|
11
|
+
"SLACK_CHANNEL_ID",
|
|
12
|
+
"TIMEZONE"
|
|
13
|
+
],
|
|
14
|
+
"runAllSeedersCandidate": true,
|
|
15
|
+
"tags": [
|
|
16
|
+
"memory",
|
|
17
|
+
"skills",
|
|
18
|
+
"operations"
|
|
19
|
+
],
|
|
20
|
+
"must": true
|
|
21
|
+
}
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
# Daily Compounding Reflection
|
|
2
|
+
|
|
3
|
+
Capture lessons from the day into memory, skills, and workflow improvements.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "10 2 * * *",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Task Type: Daily Evolution — "Compounding Engine"
|
|
21
|
+
|
|
22
|
+
You are Lead. This is the swarm's daily evolution routine. You are a real team working for Desplega Labs (desplega.ai) — the agent swarm, agent-fs, and related products. Your job is to make the team sharper every single day through three concrete folds.
|
|
23
|
+
|
|
24
|
+
The purpose is NOT to write a nice Slack post. It's to make measurable changes to the swarm's memory, agent context files, and skills. The Slack post is just the receipt.
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## Phase 0: Gather Context (DO NOT SKIP)
|
|
29
|
+
|
|
30
|
+
1. **Read today's blocker digest first.** Use `memory-search` with query "daily-blocker-digest" and read the latest entry. The `daily-blocker-digest` schedule runs 5 minutes before this. Any `RESOLVED-STALE` items in its output are direct evidence of our worst failure mode (trusting stale state). Fold 1 MUST write at least one lesson memory per RESOLVED-STALE item.
|
|
31
|
+
2. Use `get-tasks` with status "completed" (limit 25) to see what got done since the last reflection.
|
|
32
|
+
3. Use `get-tasks` with status "failed" (limit 10) to see what went wrong.
|
|
33
|
+
4. Use `memory-search` with query "daily evolution" to find the last reflection and track continuity.
|
|
34
|
+
5. Use `get-swarm` to see the current state of all agents (their profiles, SOUL.md, IDENTITY.md, etc.).
|
|
35
|
+
6. Review the last few days of completed tasks per agent to understand who did what and how well.
|
|
36
|
+
7. Use `skill-list` to see all current skills and their installation status.
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
## Fold 1: Memory Improvement
|
|
41
|
+
|
|
42
|
+
The swarm's memory is its institutional knowledge. It should grow smarter, not just bigger.
|
|
43
|
+
|
|
44
|
+
### 1A. Extract New Learnings
|
|
45
|
+
- Review completed tasks from the last 24h. For each non-trivial completion:
|
|
46
|
+
- Was there a reusable pattern, gotcha, or solution? → Write it as a shared memory
|
|
47
|
+
- Was there a codebase insight that other agents should know? → Write it as a shared memory
|
|
48
|
+
- Was there a process learning (what worked, what didn't)? → Write it as a shared memory
|
|
49
|
+
- Review failed tasks: What went wrong? Is there a preventable pattern? → Write a "lesson learned" memory
|
|
50
|
+
- **Blocker-digest input**: For each RESOLVED-STALE item caught by today's blocker digest, write a "how did this stay stale for N days?" post-mortem memory. The root cause is almost always an assumption that wasn't re-verified — codify the trigger that should have caught it.
|
|
51
|
+
|
|
52
|
+
### 1B. Curate Existing Memories
|
|
53
|
+
- Use `memory-search` with broad queries related to recent work areas to find existing memories
|
|
54
|
+
- Identify stale or outdated memories (e.g., references to files/tools that no longer exist) → Note them for cleanup
|
|
55
|
+
- Identify duplicate or overlapping memories → Consolidate into a single, better version
|
|
56
|
+
- Check if any memories contradict the current state of the codebase → Update or remove them
|
|
57
|
+
|
|
58
|
+
### 1C. Fill Knowledge Gaps
|
|
59
|
+
- Based on recent task patterns, are there areas where agents keep having to re-discover things?
|
|
60
|
+
- Are there common questions or lookups that should be pre-loaded as memories?
|
|
61
|
+
- Write 1-3 targeted memories that would have saved time in yesterday's work
|
|
62
|
+
|
|
63
|
+
**Track all memory changes** (created, updated, consolidated, flagged for removal).
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Fold 2: Agent Evolution
|
|
68
|
+
|
|
69
|
+
Each agent has context files that shape how they think and work: SOUL.md (personality, values, core identity), IDENTITY.md (role, expertise, working style), CLAUDE.md (operational rules, project instructions), and TOOLS.md (environment knowledge). These should evolve based on real performance.
|
|
70
|
+
|
|
71
|
+
### 2A. Performance Review (per agent)
|
|
72
|
+
For each active agent (Lead, Picateclas, Researcher, Reviewer, Tester, Jackknife):
|
|
73
|
+
- How many tasks did they complete in the last 24-48h?
|
|
74
|
+
- Did any tasks fail? What was the failure pattern?
|
|
75
|
+
- Did they need retries or corrections?
|
|
76
|
+
- Did they discover new capabilities or tools?
|
|
77
|
+
- Did any task reveal a gap in their knowledge or instructions?
|
|
78
|
+
|
|
79
|
+
### 2B. Identify Evolution Actions
|
|
80
|
+
Pick 1-3 agents to evolve today (rotate — don't always pick the same ones). For each:
|
|
81
|
+
|
|
82
|
+
**SOUL.md changes** — personality and values evolution:
|
|
83
|
+
- Did they demonstrate a new strength? Codify it.
|
|
84
|
+
- Did they show a weakness? Add a hard rule to prevent it.
|
|
85
|
+
- Has their role expanded or narrowed? Reflect it.
|
|
86
|
+
|
|
87
|
+
**IDENTITY.md changes** — role and expertise evolution:
|
|
88
|
+
- New areas of expertise demonstrated? Add them.
|
|
89
|
+
- Working style insights? Update quirks/preferences.
|
|
90
|
+
- New tools or repos they've mastered? Add to expertise.
|
|
91
|
+
|
|
92
|
+
**CLAUDE.md changes** — operational rules:
|
|
93
|
+
- New operational patterns discovered? Add as rules.
|
|
94
|
+
- Rules that proved too strict or too loose? Adjust.
|
|
95
|
+
- New project context that affects how they should work? Add it.
|
|
96
|
+
|
|
97
|
+
**TOOLS.md changes** — environment knowledge:
|
|
98
|
+
- New services, APIs, or tools discovered? Document them.
|
|
99
|
+
- Changed endpoints, ports, or configurations? Update them.
|
|
100
|
+
- Tips and tricks learned? Add them.
|
|
101
|
+
|
|
102
|
+
### 2C. Execute Evolution
|
|
103
|
+
For each evolution action:
|
|
104
|
+
- Use `update-profile` with the agent's ID and the updated field (soulMd, identityMd, claudeMd, toolsMd)
|
|
105
|
+
- Be surgical — don't rewrite entire files, update the specific section that changed
|
|
106
|
+
- Log exactly what you changed and why
|
|
107
|
+
|
|
108
|
+
**IMPORTANT**: When updating an agent's profile, you MUST first read their current profile from the `get-swarm` output, then make targeted edits. Do NOT overwrite their entire SOUL.md/IDENTITY.md with a template — that destroys accumulated evolution.
|
|
109
|
+
|
|
110
|
+
### 2D. Self-Evolution (Lead)
|
|
111
|
+
Don't forget yourself. Review your own performance:
|
|
112
|
+
- Did your task routing work well? Were tasks assigned to the right agents?
|
|
113
|
+
- Did your coordination cause any bottlenecks?
|
|
114
|
+
- Did the blocker digest catch stale-state items you should have caught sooner? → Add a verification rule to your own CLAUDE.md
|
|
115
|
+
- Update your own SOUL.md/IDENTITY.md/CLAUDE.md/TOOLS.md if needed
|
|
116
|
+
|
|
117
|
+
---
|
|
118
|
+
|
|
119
|
+
## Fold 3: Skill Evolution
|
|
120
|
+
|
|
121
|
+
Skills are the swarm's procedural knowledge — tested playbooks for how to do things. They compound by capturing what agents learn into reusable procedures.
|
|
122
|
+
|
|
123
|
+
### 3A. Identify Skill Candidates
|
|
124
|
+
- Review completed tasks: Was any task done by researching something that should have been a skill?
|
|
125
|
+
- Look for repeated patterns: Has the same type of task been done 3+ times with a stable approach?
|
|
126
|
+
- Check failed tasks: Did an agent waste context on research that a skill could have prevented?
|
|
127
|
+
- Review agent sessions: Did any agent ignore an existing skill and re-derive the same knowledge? (This is a skill discoverability problem.)
|
|
128
|
+
|
|
129
|
+
### 3B. Create New Skills
|
|
130
|
+
For each candidate:
|
|
131
|
+
1. Draft the skill content (procedure, examples, gotchas)
|
|
132
|
+
2. Use `skill-create` with clear name, description, and `agentAutoTrigger` field
|
|
133
|
+
3. Use `skill-install` to install it for relevant agents
|
|
134
|
+
4. The description and trigger fields are CRITICAL — they determine whether agents find and use the skill
|
|
135
|
+
|
|
136
|
+
### 3C. Update Existing Skills
|
|
137
|
+
- Use `skill-list` with `includeContent: true` to review current skills
|
|
138
|
+
- Are any skills outdated (e.g., referencing old APIs, wrong procedures)?
|
|
139
|
+
- Are any skills not being used? Check if the description/trigger is too vague
|
|
140
|
+
- Update skills with `skill-update` as needed
|
|
141
|
+
|
|
142
|
+
### 3D. Verify Skill Adoption
|
|
143
|
+
- Check recent task sessions: Are agents actually invoking skills via the `Skill` tool?
|
|
144
|
+
- If not, investigate why:
|
|
145
|
+
- Is the skill's description/trigger too vague? → Make it more specific
|
|
146
|
+
- Is the skill not installed for the right agents? → Install it
|
|
147
|
+
- Is the agent's prompt not mentioning skills? → Update their TOOLS.md
|
|
148
|
+
|
|
149
|
+
**Track all skill changes** (created, updated, installed/uninstalled).
|
|
150
|
+
|
|
151
|
+
---
|
|
152
|
+
|
|
153
|
+
## Phase 4: Post to Slack (THE RECEIPT)
|
|
154
|
+
|
|
155
|
+
Use `slack-post` with channelId "C0A4J7GB0UD". Format:
|
|
156
|
+
|
|
157
|
+
```
|
|
158
|
+
🧬 Daily Evolution — [date]
|
|
159
|
+
|
|
160
|
+
**Prelude — Blocker Digest:**
|
|
161
|
+
- [X] real blockers still pending humans
|
|
162
|
+
- [Y] RESOLVED-STALE items caught & removed from HEARTBEAT
|
|
163
|
+
- Worst offender: [item that lingered longest]
|
|
164
|
+
|
|
165
|
+
**Fold 1 — Memory:**
|
|
166
|
+
- [X] new memories written
|
|
167
|
+
- [X] memories curated/consolidated
|
|
168
|
+
- [X] stale memories flagged
|
|
169
|
+
- Key insight: [what the swarm learned today]
|
|
170
|
+
|
|
171
|
+
**Fold 2 — Agent Evolution:**
|
|
172
|
+
- [Agent Name]: [what was changed in which file and why]
|
|
173
|
+
- [Agent Name]: [what was changed in which file and why]
|
|
174
|
+
(or "All agents performing well — no evolution needed today" — but this should be RARE)
|
|
175
|
+
|
|
176
|
+
**Fold 3 — Skill Evolution:**
|
|
177
|
+
- [X] new skills created
|
|
178
|
+
- [X] existing skills updated
|
|
179
|
+
- [X] skills installed for agents
|
|
180
|
+
- Key action: [what procedural knowledge was captured or improved]
|
|
181
|
+
(or "No skill changes needed today" — acceptable if skills are current)
|
|
182
|
+
|
|
183
|
+
**Deferred:** [anything needing user input]
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
Keep it concise. The proof is in the changes, not the prose.
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Phase 5: Verify
|
|
191
|
+
|
|
192
|
+
Before calling `store-progress`:
|
|
193
|
+
1. Did you read the blocker digest memory? (Phase 0 step 1)
|
|
194
|
+
2. Did you write/update at least 1 memory? (Fold 1)
|
|
195
|
+
3. Did you call `update-profile` for at least 1 agent OR have a documented reason why no evolution was needed? (Fold 2)
|
|
196
|
+
4. Did you review skills and either create/update one OR document why no changes were needed? (Fold 3)
|
|
197
|
+
5. Did you post to Slack?
|
|
198
|
+
|
|
199
|
+
If you have zero changes across all three folds and zero deferred items, something went wrong — go back and look harder.
|
|
200
|
+
|
|
201
|
+
## Anti-patterns to avoid:
|
|
202
|
+
- ❌ Posting a Slack summary without making actual changes
|
|
203
|
+
- ❌ Claiming agent evolution without calling `update-profile`
|
|
204
|
+
- ❌ Rewriting an entire SOUL.md from scratch (destroys history)
|
|
205
|
+
- ❌ Only ever evolving 1 agent and ignoring the others
|
|
206
|
+
- ❌ Writing vague memories ("things went well") instead of specific ones
|
|
207
|
+
- ❌ Completing in under 2 minutes (impossible to do this properly that fast)
|
|
208
|
+
- ❌ Skipping Fold 1, Fold 2, or Fold 3 entirely
|
|
209
|
+
- ❌ Ignoring skill adoption problems (agents not using skills that exist)
|
|
210
|
+
- ❌ Ignoring RESOLVED-STALE items from the blocker digest — they're the highest-signal lessons we get each day
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "daily-hn-briefing",
|
|
4
|
+
"displayName": "Daily Hacker News Briefing",
|
|
5
|
+
"slug": "daily-hn-briefing",
|
|
6
|
+
"title": "Daily Hacker News Briefing",
|
|
7
|
+
"description": "Demonstrate web research automation by summarizing relevant technology discussion.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": ["SLACK_CHANNEL_ID", "TIMEZONE"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["research", "briefing", "browser"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# Daily Hacker News Briefing
|
|
2
|
+
|
|
3
|
+
Demonstrate web research automation by summarizing relevant technology discussion.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "30 2 * * *",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Task Type: General (Browser Automation + Email Report)
|
|
21
|
+
Goal: Daily Hacker News briefing — scrape HN using browser automation, email the report, and archive it
|
|
22
|
+
|
|
23
|
+
Instructions:
|
|
24
|
+
|
|
25
|
+
1. Use qa-use browser commands (e.g., `/qa-use:explore`) to scrape the following HN pages **ONE AT A TIME, STRICTLY SEQUENTIAL**.
|
|
26
|
+
|
|
27
|
+
**CRITICAL — DO NOT PARALLELIZE.** Do NOT fan out parallel browser sessions, do NOT launch multiple Browser Use SDK flows concurrently, do NOT batch the URLs into a single multi-target call. This template has auto-failed on 2026-05-10 and 2026-05-11 because the worker scraped all 5 URLs in parallel and crossed the heartbeat-stale watchdog threshold before the flows completed. Strict serial execution is required.
|
|
28
|
+
|
|
29
|
+
**Workflow:** Scrape URL #1 → call `store-progress` with a one-line update (e.g., "Scraped HN page 1 — N stories found") → scrape URL #2 → `store-progress` → … → URL #5 → `store-progress`. After every individual URL scrape, you MUST call `store-progress` BEFORE starting the next URL. This keeps the session heartbeat fresh and prevents the watchdog from killing the task.
|
|
30
|
+
|
|
31
|
+
**Visit each URL one-by-one in this exact order:**
|
|
32
|
+
- https://news.ycombinator.com (page 1)
|
|
33
|
+
- https://news.ycombinator.com/?p=2 (page 2)
|
|
34
|
+
- https://news.ycombinator.com/?p=3 (page 3)
|
|
35
|
+
- https://news.ycombinator.com/newest
|
|
36
|
+
- https://news.ycombinator.com/show
|
|
37
|
+
|
|
38
|
+
You MUST visit all 5 URLs above. Do not skip any. Do not parallelize. Do not combine into a single browser call.
|
|
39
|
+
|
|
40
|
+
2. From ALL scraped pages, filter for stories relevant to these topics:
|
|
41
|
+
- AI / LLMs / foundation models
|
|
42
|
+
- Agentic coding / AI-powered development
|
|
43
|
+
- E2E testing / browser automation / QA
|
|
44
|
+
- Developer tools / DevOps
|
|
45
|
+
- Startups / SaaS relevant to Desplega's space
|
|
46
|
+
|
|
47
|
+
3. Format a quick-scan briefing. Organize by source section:
|
|
48
|
+
- **Front Page** (from pages 1-3)
|
|
49
|
+
- **New** (from /newest)
|
|
50
|
+
- **Show HN** (from /show)
|
|
51
|
+
|
|
52
|
+
Each item MUST include:
|
|
53
|
+
- HN title as a link
|
|
54
|
+
- Post date (e.g., "Feb 24" or "2h ago") — REQUIRED on every item
|
|
55
|
+
- Direct link to story (or HN comments link)
|
|
56
|
+
- 1 short line on why it's relevant
|
|
57
|
+
- Points/comments count if notable
|
|
58
|
+
|
|
59
|
+
Example format per item:
|
|
60
|
+
• Emdash — Open-source agentic dev environment (https://news.ycombinator.com/item?id=12345) · Feb 24 · 65 pts · 30 comments
|
|
61
|
+
Direct peer to Cursor/Windsurf — agentic-first IDE.
|
|
62
|
+
|
|
63
|
+
4. If any story is exceptionally relevant to Desplega (E2E testing, browser automation, AI agents), add a "Deep Dive" section with 2-3 sentences.
|
|
64
|
+
|
|
65
|
+
5. STORE THE REPORT: Save the full briefing as a markdown file at:
|
|
66
|
+
/workspace/shared/hn-briefings/YYYY-MM-DD.md
|
|
67
|
+
(using today's date). Create the directory if it doesn't exist. The file should include:
|
|
68
|
+
- Title: "# HN Briefing — [DATE]"
|
|
69
|
+
- Stats line: number of stories curated, number scraped, pages checked
|
|
70
|
+
- The full curated list (organized by section)
|
|
71
|
+
- Any deep dives
|
|
72
|
+
This creates a persistent archive we can reference later.
|
|
73
|
+
|
|
74
|
+
6. SEND EMAIL: Use AgentMail MCP tools to send the report as an email:
|
|
75
|
+
- From inbox: lead@agent-swarm.dev
|
|
76
|
+
- To: t@desplega.ai AND e@desplega.ai
|
|
77
|
+
- Subject: "HN Briefing — [TODAY'S DATE, e.g. Feb 25, 2026]"
|
|
78
|
+
- Body: Send as HTML email. Format the briefing nicely with:
|
|
79
|
+
- A header: "HN Briefing — [DATE]"
|
|
80
|
+
- Stats line (include "Sources: Front Page (3 pages), New, Show HN")
|
|
81
|
+
- Each section clearly labeled
|
|
82
|
+
- Each story as a bullet with clickable links
|
|
83
|
+
- Deep dives in a separate section if applicable
|
|
84
|
+
- Also include the plain text version in the text field
|
|
85
|
+
|
|
86
|
+
7. Call `store-progress` when done with the formatted briefing as output.
|
|
87
|
+
|
|
88
|
+
IMPORTANT:
|
|
89
|
+
- Use qa-use browser automation to browse HN, don't use web search
|
|
90
|
+
- **Scrape URLs SERIALLY (one at a time) and call `store-progress` between every URL** — never parallelize, never fan-out. This prevents heartbeat-stale auto-fails.
|
|
91
|
+
- Only include stories from the last ~24 hours
|
|
92
|
+
- ALWAYS include the post date on each story — this is required
|
|
93
|
+
- Keep it scannable — clickable links, not walls of text
|
|
94
|
+
- Target 5-20 relevant stories (quality over quantity)
|
|
95
|
+
- You MUST scrape all 5 URLs (3 main pages + new + show) — this is required, but ONE AT A TIME
|
|
96
|
+
- The email is sent from lead@agent-swarm.dev using AgentMail MCP `send_message` tool
|
|
97
|
+
- Recipients: t@desplega.ai AND e@desplega.ai
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "daily-workflow-health-audit",
|
|
4
|
+
"displayName": "Daily Workflow Health Audit",
|
|
5
|
+
"slug": "daily-workflow-health-audit",
|
|
6
|
+
"title": "Daily Workflow Health Audit",
|
|
7
|
+
"description": "Check scheduled jobs and workflows for repeated failures, stale runs, and silent drift.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": ["SLACK_CHANNEL_ID", "TIMEZONE"],
|
|
11
|
+
"runAllSeedersCandidate": true,
|
|
12
|
+
"tags": ["workflows", "schedules", "reliability"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# Daily Workflow Health Audit
|
|
2
|
+
|
|
3
|
+
Check scheduled jobs and workflows for repeated failures, stale runs, and silent drift.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "0 8 * * *",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Task Type: Daily Workflow + Schedule Health Audit
|
|
21
|
+
|
|
22
|
+
You are Lead. Run this audit and post a single Slack digest. **Source ask:** Eze in `C0A4J7GB0UD` thread ts `1779264760.065579` (2026-05-20). Cadence: daily at 08:00 UTC. Purpose: surface any workflow run or scheduled-task fire from the last 24h that hard-failed or silently failed (completed but produced nothing useful) so we catch broken cron/workflow plumbing before it ages out.
|
|
23
|
+
|
|
24
|
+
---
|
|
25
|
+
|
|
26
|
+
## Phase 1 — Query the six failure modes
|
|
27
|
+
|
|
28
|
+
Use `db-query` for each.
|
|
29
|
+
|
|
30
|
+
### 1A. Hard-failed workflow runs (last 24h)
|
|
31
|
+
|
|
32
|
+
```sql
|
|
33
|
+
SELECT wr.id, w.name AS workflowName, wr.status,
|
|
34
|
+
wr.finishedAt, wr.lastUpdatedAt,
|
|
35
|
+
SUBSTR(COALESCE(wr.error, ''), 1, 220) AS errSnippet
|
|
36
|
+
FROM workflow_runs wr
|
|
37
|
+
JOIN workflows w ON w.id = wr.workflowId
|
|
38
|
+
WHERE wr.status = 'failed'
|
|
39
|
+
AND datetime(COALESCE(wr.finishedAt, wr.lastUpdatedAt, wr.startedAt)) > datetime('now', '-24 hours')
|
|
40
|
+
ORDER BY wr.lastUpdatedAt DESC;
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
### 1B. Hard-failed schedule-spawned tasks (last 24h)
|
|
44
|
+
|
|
45
|
+
```sql
|
|
46
|
+
SELECT t.id, s.name AS scheduleName, t.status,
|
|
47
|
+
SUBSTR(COALESCE(t.failureReason, ''), 1, 220) AS reasonSnippet,
|
|
48
|
+
SUBSTR(COALESCE(t.output, ''), 1, 220) AS outSnippet,
|
|
49
|
+
t.lastUpdatedAt
|
|
50
|
+
FROM agent_tasks t
|
|
51
|
+
LEFT JOIN scheduled_tasks s ON s.id = t.scheduleId
|
|
52
|
+
WHERE t.status = 'failed'
|
|
53
|
+
AND t.scheduleId IS NOT NULL
|
|
54
|
+
AND datetime(t.lastUpdatedAt) > datetime('now', '-24 hours')
|
|
55
|
+
ORDER BY t.lastUpdatedAt DESC;
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
### 1C. Halted >24h workflow runs (silent stuck)
|
|
59
|
+
|
|
60
|
+
```sql
|
|
61
|
+
SELECT wr.id, w.name AS workflowName, wr.status, wr.lastUpdatedAt
|
|
62
|
+
FROM workflow_runs wr
|
|
63
|
+
JOIN workflows w ON w.id = wr.workflowId
|
|
64
|
+
WHERE wr.status IN ('running', 'waiting')
|
|
65
|
+
AND datetime(wr.lastUpdatedAt) < datetime('now', '-24 hours')
|
|
66
|
+
ORDER BY wr.lastUpdatedAt ASC;
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 1D. Silent: schedule-spawned task completed with empty/sentinel output
|
|
70
|
+
|
|
71
|
+
```sql
|
|
72
|
+
SELECT t.id, s.name AS scheduleName, t.status,
|
|
73
|
+
SUBSTR(COALESCE(t.output, ''), 1, 220) AS outSnippet,
|
|
74
|
+
LENGTH(TRIM(COALESCE(t.output, ''))) AS outLen,
|
|
75
|
+
t.lastUpdatedAt
|
|
76
|
+
FROM agent_tasks t
|
|
77
|
+
LEFT JOIN scheduled_tasks s ON s.id = t.scheduleId
|
|
78
|
+
WHERE t.status = 'completed'
|
|
79
|
+
AND t.scheduleId IS NOT NULL
|
|
80
|
+
AND datetime(t.lastUpdatedAt) > datetime('now', '-24 hours')
|
|
81
|
+
AND (
|
|
82
|
+
t.output IS NULL
|
|
83
|
+
OR TRIM(t.output) = ''
|
|
84
|
+
OR TRIM(t.output) = '⚡ Running shell command'
|
|
85
|
+
OR LENGTH(TRIM(t.output)) < 10
|
|
86
|
+
)
|
|
87
|
+
ORDER BY t.lastUpdatedAt DESC;
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 1E. Cron didn't fire (nextRunAt in the past)
|
|
91
|
+
|
|
92
|
+
```sql
|
|
93
|
+
SELECT s.id, s.name, s.cronExpression, s.lastRunAt, s.nextRunAt, s.consecutiveErrors,
|
|
94
|
+
SUBSTR(COALESCE(s.lastErrorMessage, ''), 1, 220) AS lastErrSnippet
|
|
95
|
+
FROM scheduled_tasks s
|
|
96
|
+
WHERE s.enabled = 1
|
|
97
|
+
AND s.scheduleType = 'recurring'
|
|
98
|
+
AND s.nextRunAt IS NOT NULL
|
|
99
|
+
AND datetime(s.nextRunAt) < datetime('now', '-1 hour')
|
|
100
|
+
ORDER BY s.nextRunAt ASC;
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### 1F. Schedules with consecutive errors (defensive)
|
|
104
|
+
|
|
105
|
+
```sql
|
|
106
|
+
SELECT s.id, s.name, s.cronExpression, s.consecutiveErrors, s.lastErrorAt,
|
|
107
|
+
SUBSTR(COALESCE(s.lastErrorMessage, ''), 1, 220) AS lastErrSnippet
|
|
108
|
+
FROM scheduled_tasks s
|
|
109
|
+
WHERE s.enabled = 1
|
|
110
|
+
AND s.consecutiveErrors >= 3
|
|
111
|
+
ORDER BY s.consecutiveErrors DESC;
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### 1G. Totals (for the "all clear" denominator)
|
|
115
|
+
|
|
116
|
+
```sql
|
|
117
|
+
SELECT
|
|
118
|
+
(SELECT COUNT(*) FROM workflow_runs WHERE datetime(lastUpdatedAt) > datetime('now','-24 hours')) AS workflowRuns24h,
|
|
119
|
+
(SELECT COUNT(*) FROM agent_tasks WHERE scheduleId IS NOT NULL AND datetime(lastUpdatedAt) > datetime('now','-24 hours')) AS scheduledFires24h;
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
---
|
|
123
|
+
|
|
124
|
+
## Phase 2 — Render the digest
|
|
125
|
+
|
|
126
|
+
Each bullet must include a clickable URL.
|
|
127
|
+
|
|
128
|
+
- Workflow run URL: `https://app.agent-swarm.dev/workflow-runs/<id>` → Slack format: `<https://app.agent-swarm.dev/workflow-runs/<id>|workflow:<workflowName>>`
|
|
129
|
+
- Task URL: `https://app.agent-swarm.dev/tasks/<id>` → Slack format: `<https://app.agent-swarm.dev/tasks/<id>|schedule:<scheduleName>>`
|
|
130
|
+
|
|
131
|
+
Truncate error/output snippets to 200 chars + `…` if longer. Replace newlines with ` ⏎ `.
|
|
132
|
+
|
|
133
|
+
### Template
|
|
134
|
+
|
|
135
|
+
If TOTAL issues across 1A–1F is zero:
|
|
136
|
+
|
|
137
|
+
```
|
|
138
|
+
:white_check_mark: *Daily Workflow + Schedule Health Audit* — <YYYY-MM-DD>
|
|
139
|
+
|
|
140
|
+
<@U08NY4B5R2M> All clear — <workflowRuns24h> workflow runs + <scheduledFires24h> scheduled fires in the last 24h, all produced expected output.
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
Otherwise:
|
|
144
|
+
|
|
145
|
+
```
|
|
146
|
+
:stethoscope: *Daily Workflow + Schedule Health Audit* — <YYYY-MM-DD>
|
|
147
|
+
|
|
148
|
+
<@U08NY4B5R2M> Audit window: last 24h. Totals: <workflowRuns24h> workflow runs · <scheduledFires24h> scheduled fires · *<TOTAL_ISSUES> issues*
|
|
149
|
+
|
|
150
|
+
*Hard failures — workflow runs* (<N1A>)
|
|
151
|
+
• <url|workflow:name> — failed <relative-time>
|
|
152
|
+
↳ <errSnippet>
|
|
153
|
+
|
|
154
|
+
*Hard failures — scheduled tasks* (<N1B>)
|
|
155
|
+
• <url|schedule:name> — failed <relative-time>
|
|
156
|
+
↳ <reasonSnippet OR outSnippet OR "(no failureReason set)">
|
|
157
|
+
|
|
158
|
+
*Silent: halted >24h* (<N1C>)
|
|
159
|
+
• <url|workflow:name> — status=<status>, no progress since <timestamp>
|
|
160
|
+
|
|
161
|
+
*Silent: empty output* (<N1D>)
|
|
162
|
+
• <url|schedule:name> — completed, output=<"empty" | first-N-chars>
|
|
163
|
+
|
|
164
|
+
*Cron didn't fire on time* (<N1E>)
|
|
165
|
+
• schedule:<name> (cron `<expr>`) — nextRunAt=<past-timestamp>, lastRunAt=<timestamp or "never">
|
|
166
|
+
|
|
167
|
+
*Schedules with ≥3 consecutive errors* (<N1F>)
|
|
168
|
+
• schedule:<name> — consecutiveErrors=<n>, last error: <lastErrSnippet>
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Omit any section whose count is 0. Cap message at 4000 chars (Slack limit) — if longer, keep top 5 per section and add `…and <K> more` lines.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Phase 3 — Post to Slack and complete
|
|
176
|
+
|
|
177
|
+
1. Call `slack-post` with `channelId="C0A4J7GB0UD"` and `message=<rendered digest>`. **Do NOT** thread under the design thread `1779264760.065579` — daily fires are top-level so they're easy to scan.
|
|
178
|
+
2. Call `store-progress` with `status: "completed"` and a one-paragraph `output` summary:
|
|
179
|
+
- `Issues found: hard-fail-wf=<N1A>, hard-fail-task=<N1B>, halted-24h=<N1C>, silent-empty=<N1D>, cron-stuck=<N1E>, consec-err=<N1F>.`
|
|
180
|
+
- `Totals: workflowRuns24h=<X>, scheduledFires24h=<Y>.`
|
|
181
|
+
- `Slack message ts: <ts from slack-post response>.`
|
|
182
|
+
|
|
183
|
+
## Anti-patterns
|
|
184
|
+
|
|
185
|
+
- ❌ Posting a separate Slack message per failure mode — ONE digest.
|
|
186
|
+
- ❌ Raw IDs without clickable URLs.
|
|
187
|
+
- ❌ Dumping full `error` / `output` content — truncate to 220 chars per item.
|
|
188
|
+
- ❌ Threading the daily digest under the original 1779264760.065579 design thread.
|
|
189
|
+
- ❌ Skipping the "all clear" message when zero issues — the heartbeat itself is the signal that the audit ran.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "gtm-weekly-review",
|
|
4
|
+
"displayName": "Weekly GTM Metrics Review",
|
|
5
|
+
"slug": "gtm-weekly-review",
|
|
6
|
+
"title": "Weekly GTM Metrics Review",
|
|
7
|
+
"description": "Summarize product, marketing, or sales signals into an operator-friendly weekly review.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": ["SLACK_CHANNEL_ID", "TIMEZONE"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["gtm", "metrics", "reporting"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# Weekly GTM Metrics Review
|
|
2
|
+
|
|
3
|
+
Summarize product, marketing, or sales signals into an operator-friendly weekly review.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "20 3 * * 1",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Task Type: Research
|
|
21
|
+
Topic: Weekly GTM Metrics Review for agent-swarm
|
|
22
|
+
|
|
23
|
+
Goal: Check current GitHub stars, traffic, Google Search Console performance, and content metrics for the GTM campaign.
|
|
24
|
+
|
|
25
|
+
Instructions:
|
|
26
|
+
1. Check GitHub metrics: `gh api repos/desplega-ai/agent-swarm` (stars, forks, issues)
|
|
27
|
+
2. Check traffic: `gh api repos/desplega-ai/agent-swarm/traffic/views` and `/traffic/clones`
|
|
28
|
+
3. Check referrers: `gh api repos/desplega-ai/agent-swarm/traffic/popular/referrers`
|
|
29
|
+
4. Check popular content: `gh api repos/desplega-ai/agent-swarm/traffic/popular/paths`
|
|
30
|
+
|
|
31
|
+
5. **Pull Google Search Console data** using the `gsc-analytics` skill (already installed on this agent). Do NOT write Python auth code — use the `gsc` CLI at `/workspace/repos/agent-work/gsc/gsc`. The setup script already wires up `GOOGLE_APPLICATION_CREDENTIALS`, so no extra env setup needed.
|
|
32
|
+
|
|
33
|
+
Pull the weekly snapshot for each of the 4 Desplega properties:
|
|
34
|
+
```bash
|
|
35
|
+
GSC=/workspace/repos/agent-work/gsc/gsc
|
|
36
|
+
for site in desplega.ai agent-swarm.dev desplega.sh agent-fs.dev; do
|
|
37
|
+
echo "=== $site ==="
|
|
38
|
+
$GSC analytics "sc-domain:$site" --top 20 --json > "/tmp/gsc-$site.json"
|
|
39
|
+
jq '{current, previous, window, prior,
|
|
40
|
+
top_queries: [.topQueries[:10][] | {q: .keys[0], c: .clicks, i: .impressions, ctr: .ctr, pos: .position}],
|
|
41
|
+
top_pages: [.topPages[:10][] | {p: .keys[0], c: .clicks, i: .impressions, ctr: .ctr, pos: .position}]
|
|
42
|
+
}' "/tmp/gsc-$site.json"
|
|
43
|
+
done
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
The `analytics` subcommand returns headline KPIs (clicks, impressions, CTR, avg position) PLUS a WoW comparison against the prior 7 days — this is what powers the "this week vs last week" section of the report.
|
|
47
|
+
|
|
48
|
+
6. Review the GTM plan at /workspace/shared/thoughts/shared/research/gtm-state-assessment.md
|
|
49
|
+
7. Compile a brief report with:
|
|
50
|
+
- Current star count, weekly change
|
|
51
|
+
- Top traffic sources
|
|
52
|
+
- **GSC summary**: total clicks/impressions across all domains, top performing queries, queries with growth potential (high impressions, low CTR or position 5-20)
|
|
53
|
+
- What's working, what to try next
|
|
54
|
+
- **SEO opportunities**: queries where we're close to page 1, content gaps to fill
|
|
55
|
+
|
|
56
|
+
Save report to /workspace/shared/thoughts/shared/research/gtm-weekly-{date}.md
|
|
57
|
+
|
|
58
|
+
This is part of the GTM: Agent Swarm → 100k GitHub Stars epic.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "schedule",
|
|
3
|
+
"name": "weekly-dependabot-triage",
|
|
4
|
+
"displayName": "Weekly Dependency Triage",
|
|
5
|
+
"slug": "weekly-dependabot-triage",
|
|
6
|
+
"title": "Weekly Dependency Triage",
|
|
7
|
+
"description": "Review dependency update PRs, group safe patches, and flag risky upgrades.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "schedules",
|
|
10
|
+
"placeholders": ["SLACK_CHANNEL_ID", "REPO_URL", "TIMEZONE"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["github", "dependencies", "maintenance"]
|
|
13
|
+
}
|