@desplega.ai/agent-swarm 1.85.0 → 1.86.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. package/README.md +1 -0
  2. package/openapi.json +1 -1
  3. package/package.json +8 -6
  4. package/src/be/db.ts +44 -0
  5. package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
  6. package/src/be/modelsdev-cache.json +152028 -0
  7. package/src/be/modelsdev-cache.ts +46 -0
  8. package/src/be/seed-pricing.ts +7 -44
  9. package/src/cli.tsx +12 -2
  10. package/src/commands/codex-session-runner.ts +132 -0
  11. package/src/commands/credential-wait.ts +2 -2
  12. package/src/commands/provider-credentials.ts +10 -5
  13. package/src/commands/runner.ts +3 -3
  14. package/src/prompts/base-prompt.ts +49 -3
  15. package/src/providers/claude-adapter.ts +83 -2
  16. package/src/providers/claude-managed-models.ts +18 -2
  17. package/src/providers/codex-adapter.ts +417 -97
  18. package/src/providers/codex-models.ts +9 -2
  19. package/src/providers/index.ts +28 -19
  20. package/src/providers/pricing-sources.md +7 -4
  21. package/src/providers/swarm-events-shared.ts +14 -0
  22. package/src/slack/HEURISTICS.md +5 -1
  23. package/src/slack/handlers.test.ts +35 -0
  24. package/src/slack/handlers.ts +79 -2
  25. package/src/tests/base-prompt.test.ts +46 -8
  26. package/src/tests/claude-managed-adapter.test.ts +4 -4
  27. package/src/tests/codex-adapter-otel.test.ts +4 -4
  28. package/src/tests/codex-adapter.test.ts +20 -7
  29. package/src/tests/codex-swarm-events.test.ts +35 -0
  30. package/src/tests/context-window.test.ts +1 -0
  31. package/src/tests/credential-check.test.ts +48 -29
  32. package/src/tests/entrypoint-config-env-export.test.ts +81 -0
  33. package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
  34. package/src/tests/migration-046-budgets.test.ts +6 -5
  35. package/src/tests/pricing-routes.test.ts +6 -5
  36. package/src/tests/provider-adapter.test.ts +10 -10
  37. package/src/tests/provider-command-format.test.ts +4 -4
  38. package/src/tests/session-costs-codex-recompute.test.ts +25 -0
  39. package/src/tools/send-task.ts +30 -9
  40. package/src/utils/context-window.ts +1 -0
  41. package/templates/schedules/daily-blocker-digest/config.json +13 -0
  42. package/templates/schedules/daily-blocker-digest/content.md +150 -0
  43. package/templates/schedules/daily-compounding-reflection/config.json +21 -0
  44. package/templates/schedules/daily-compounding-reflection/content.md +210 -0
  45. package/templates/schedules/daily-hn-briefing/config.json +13 -0
  46. package/templates/schedules/daily-hn-briefing/content.md +97 -0
  47. package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
  48. package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
  49. package/templates/schedules/gtm-weekly-review/config.json +13 -0
  50. package/templates/schedules/gtm-weekly-review/content.md +58 -0
  51. package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
  52. package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
  53. package/templates/schema.ts +26 -0
  54. package/templates/skills/agentmail-sending/config.json +13 -0
  55. package/templates/skills/agentmail-sending/content.md +48 -0
  56. package/templates/skills/artifacts/config.json +13 -0
  57. package/templates/skills/artifacts/content.md +87 -0
  58. package/templates/skills/browser-use-cloud/config.json +13 -0
  59. package/templates/skills/browser-use-cloud/content.md +155 -0
  60. package/templates/skills/desloppify/config.json +13 -0
  61. package/templates/skills/desloppify/content.md +201 -0
  62. package/templates/skills/exa-search/config.json +13 -0
  63. package/templates/skills/exa-search/content.md +106 -0
  64. package/templates/skills/jira-interaction/config.json +13 -0
  65. package/templates/skills/jira-interaction/content.md +252 -0
  66. package/templates/skills/kapso-whatsapp/config.json +13 -0
  67. package/templates/skills/kapso-whatsapp/content.md +369 -0
  68. package/templates/skills/kv-storage/config.json +13 -0
  69. package/templates/skills/kv-storage/content.md +111 -0
  70. package/templates/skills/linear-interaction/config.json +20 -0
  71. package/templates/skills/linear-interaction/content.md +230 -0
  72. package/templates/skills/pages/config.json +18 -0
  73. package/templates/skills/pages/content.md +85 -0
  74. package/templates/skills/profile-corruption-escalation/config.json +13 -0
  75. package/templates/skills/profile-corruption-escalation/content.md +105 -0
  76. package/templates/skills/scheduled-task-resilience/config.json +13 -0
  77. package/templates/skills/scheduled-task-resilience/content.md +95 -0
  78. package/templates/skills/sprite-cli/config.json +13 -0
  79. package/templates/skills/sprite-cli/content.md +133 -0
  80. package/templates/skills/turso-interaction/config.json +13 -0
  81. package/templates/skills/turso-interaction/content.md +192 -0
  82. package/templates/skills/workflow-iterate/config.json +18 -0
  83. package/templates/skills/workflow-iterate/content.md +399 -0
  84. package/templates/skills/workflow-structured-output/config.json +13 -0
  85. package/templates/skills/workflow-structured-output/content.md +101 -0
  86. package/templates/skills/x-api-interactions/config.json +13 -0
  87. package/templates/skills/x-api-interactions/content.md +109 -0
  88. package/templates/workflows/autopilot/config.json +13 -0
  89. package/templates/workflows/autopilot/content.md +58 -0
  90. package/templates/workflows/linear-drain-loop/config.json +21 -0
  91. package/templates/workflows/linear-drain-loop/content.md +72 -0
  92. package/templates/workflows/ralph-loop/config.json +13 -0
  93. package/templates/workflows/ralph-loop/content.md +75 -0
@@ -0,0 +1,45 @@
1
+ # Weekly Dependency Triage
2
+
3
+ Review dependency update PRs, group safe patches, and flag risky upgrades.
4
+
5
+ ## Schedule
6
+
7
+ ```json
8
+ {
9
+ "cron": "40 3 * * 0",
10
+ "timezone": "UTC",
11
+ "agentRole": "lead",
12
+ "enabled": true
13
+ }
14
+ ```
15
+
16
+ ## Scheduled Task
17
+
18
+ This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
19
+
20
+ Triage dependabot PRs from https://github.com/desplega-ai/desplega.ai/pulls
21
+
22
+ ## Instructions
23
+
24
+ 1. **List all open dependabot PRs** in desplega-ai/desplega.ai using `gh pr list`
25
+ 2. **Only paths we care about**: `/be` and `/new-fe`. Close all other dependabot PRs (ones that don't touch these paths).
26
+ 3. **DO NOT touch non-dependabot PRs** — leave them as-is.
27
+ 4. **Create two unified PRs** that merge all dependabot bumps into one PR each:
28
+ - One for `/be` changes — branch name format: `YYYY-MM-DD-dependabot-be` (use today's date)
29
+ - One for `/new-fe` changes — branch name format: `YYYY-MM-DD-dependabot-fe` (use today's date)
30
+ - Each unified PR should be based on latest `main` and include all the dependency bumps from the individual dependabot PRs for that path.
31
+ - After creating the unified PRs, close the individual dependabot PRs that were merged into them.
32
+ 5. **Return the URLs** of the two final unified PRs.
33
+ 6. If there are no open dependabot PRs, just report that and complete.
34
+
35
+ ## Approach
36
+
37
+ - Clone the repo, checkout main, pull latest
38
+ - For each path (be, new-fe): create a branch, cherry-pick or merge the dependabot changes, push, create PR
39
+ - Close individual dependabot PRs after unifying
40
+ - Be careful: some dependabot PRs may have conflicts — handle gracefully
41
+
42
+ ## Important
43
+ - The PR title should be descriptive, e.g. "chore(be): consolidate dependabot bumps YYYY-MM-DD"
44
+ - Add @tarasyarema as reviewer on both PRs
45
+ - Post the final PR URLs back to Slack
@@ -35,3 +35,29 @@ export interface TemplateResponse {
35
35
  heartbeatMd: string;
36
36
  };
37
37
  }
38
+
39
+ export type AgentAssetKind = "skill" | "schedule" | "workflow";
40
+ export type AgentAssetCategory = "skills" | "schedules" | "workflows";
41
+
42
+ export interface AgentAssetConfig {
43
+ kind: AgentAssetKind;
44
+ name: string;
45
+ displayName: string;
46
+ slug: string;
47
+ title: string;
48
+ description: string;
49
+ version: string;
50
+ category: AgentAssetCategory;
51
+ placeholders: string[];
52
+ runAllSeedersCandidate: boolean;
53
+ /** Marks an asset as an essential, recommended-for-every-swarm building block. */
54
+ must?: boolean;
55
+ tags: string[];
56
+ }
57
+
58
+ export interface AgentAssetResponse {
59
+ config: AgentAssetConfig;
60
+ body: string;
61
+ }
62
+
63
+ export const ASSET_CATEGORIES: AgentAssetCategory[] = ["skills", "schedules", "workflows"];
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "skill",
3
+ "name": "agentmail-sending",
4
+ "displayName": "AgentMail Sending",
5
+ "slug": "agentmail-sending",
6
+ "title": "AgentMail Sending",
7
+ "description": "Generic rules for sending email through an agent-accessible mailbox API.",
8
+ "version": "1.0.0",
9
+ "category": "skills",
10
+ "placeholders": ["COMPANY_SIGNATURE"],
11
+ "runAllSeedersCandidate": false,
12
+ "tags": ["email", "communications"]
13
+ }
@@ -0,0 +1,48 @@
1
+ # AgentMail Sending Rules
2
+
3
+ These rules are MANDATORY for all agents sending email via AgentMail. Violating them will result in blank emails reaching real people.
4
+
5
+ ## Rule 1: TEXT ONLY — Never Pass `html` Parameter
6
+
7
+ **AgentMail has a critical bug (as of 2026-03-25):** When both `text` and `html` parameters are passed to `send_message` or `reply_to_message`, the HTML body content is silently dropped. The resulting email has an empty `<div dir="ltr"></div>`. Email clients (Gmail, etc.) prefer the HTML version over plain text, so recipients see a completely blank email.
8
+
9
+ **What to do:**
10
+ - ONLY pass the `text` parameter
11
+ - NEVER pass the `html` parameter
12
+ - This applies to BOTH `send_message` and `reply_to_message`
13
+
14
+ **Why this matters:** This bug caused real outbound prospect emails to arrive blank, burning contacts permanently. It is not a cosmetic issue — it's a data loss / reputation issue.
15
+
16
+ ## Rule 2: Always BCC t@desplega.ai on Outbound Emails
17
+
18
+ All outbound emails to external recipients (anyone outside @agent-swarm.dev) MUST include `t@desplega.ai` as BCC. This gives the human founder visibility into what emails the swarm is sending.
19
+
20
+ **How:**
21
+ ```
22
+ send_message({
23
+ inboxId: "lead@agent-swarm.dev",
24
+ to: ["recipient@example.com"],
25
+ bcc: ["t@desplega.ai"],
26
+ subject: "...",
27
+ text: "..."
28
+ })
29
+ ```
30
+
31
+ **Exception:** Internal emails between agent inboxes (@agent-swarm.dev) or to t@desplega.ai / e@desplega.ai directly do NOT need BCC.
32
+
33
+ ## Rule 3: Always Include Signature
34
+
35
+ Use the `email-signature` skill to append the proper plain text signature to every outgoing email. See that skill for the template.
36
+
37
+ ## Rule 4: Human Approval Before Sending to Prospects
38
+
39
+ Never send outreach/cold emails to external prospects without explicit human approval. Draft the emails, present them for review, and only send after receiving "approved" or equivalent confirmation.
40
+
41
+ ## Summary Checklist
42
+
43
+ Before every `send_message` or `reply_to_message` call:
44
+ - [ ] Only `text` param, NO `html` param
45
+ - [ ] BCC `t@desplega.ai` if recipient is external
46
+ - [ ] Plain text signature appended
47
+ - [ ] Human-approved if it's outreach/cold email
48
+
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "skill",
3
+ "name": "artifacts",
4
+ "displayName": "Artifacts",
5
+ "slug": "artifacts",
6
+ "title": "Artifacts",
7
+ "description": "Store logs, screenshots, exports, and generated files as durable task artifacts.",
8
+ "version": "1.0.0",
9
+ "category": "skills",
10
+ "placeholders": [],
11
+ "runAllSeedersCandidate": true,
12
+ "tags": ["artifacts", "evidence", "qa"]
13
+ }
@@ -0,0 +1,87 @@
1
+ # Artifacts
2
+
3
+ Artifacts are files, screenshots, recordings, logs, and reports that outlive a session and can be referenced by other agents, humans, or future tasks. The agent-swarm supports two artifact stores: **agent-fs** (structured, searchable, shareable) and the **shared workspace filesystem** (`/workspace/shared/`).
4
+
5
+ ## When to Create Artifacts
6
+
7
+ - Your task produces a deliverable humans should review (report, screenshot, recording, data export).
8
+ - Another agent or future session needs to pick up where you left off.
9
+ - You want to attach evidence to a PR, Linear ticket, or Slack message.
10
+ - The output is too large for `store-progress.output`.
11
+
12
+ ## Agent-fs (Preferred for Human-Shareable Artifacts)
13
+
14
+ agent-fs is a persistent, searchable file system shared across the swarm.
15
+
16
+ ```bash
17
+ # Write to personal drive
18
+ agent-fs write thoughts/research/2026-05-28-topic.md --content "..." -m "description"
19
+
20
+ # Write to shared drive (humans + other agents can see)
21
+ agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write \
22
+ thoughts/c06cca59-187e-4aa6-8472-8ac6caf177af/research/2026-05-28-topic.md \
23
+ --content "..." -m "research findings"
24
+ ```
25
+
26
+ Verify the write succeeded (agent-fs writes can fail silently with empty payloads):
27
+ ```bash
28
+ agent-fs stat <path> --json | jq '.size'
29
+ # If size < 200 bytes on a non-trivial artifact, the write FAILED — re-do it.
30
+ ```
31
+
32
+ ### Sharing agent-fs files with humans
33
+
34
+ Build the URL from the live host env var:
35
+ ```
36
+ ${AGENT_FS_LIVE_URL}/file/~/<org_id>/<drive_id>/<file_path>
37
+ ```
38
+
39
+ `AGENT_FS_LIVE_URL` defaults to `https://live.agent-fs.dev`. Get `org_id` and `drive_id` from `agent-fs stat <path> --json`.
40
+
41
+ ## Shared Filesystem
42
+
43
+ For non-text artifacts or files other agents need to access during the same session:
44
+
45
+ - `/workspace/shared/downloads/<agent-id>/` — downloaded files
46
+ - `/workspace/shared/misc/<agent-id>/` — other shared files
47
+
48
+ ## Binary Artifacts (PNG, MP4)
49
+
50
+ **agent-fs write is text-only and mangles binaries** (inserts UTF-8 replacement characters). For PNG/MP4 uploads use the binary upload path:
51
+
52
+ ```bash
53
+ # Use binary-safe upload, NOT agent-fs write
54
+ # For QA screenshots: use qa-use's built-in screenshot capture
55
+ # For custom screenshots: Playwright, ffmpeg, or system screenshot tools
56
+ ```
57
+
58
+ For QA screenshots attached to PRs, see the QA evidence convention in TOOLS.md.
59
+
60
+ ## Naming Conventions
61
+
62
+ Name paths predictably by task, date, and artifact type:
63
+
64
+ ```
65
+ thoughts/<agent-id>/research/YYYY-MM-DD-<topic>.md
66
+ thoughts/<agent-id>/plans/YYYY-MM-DD-<topic>.md
67
+ thoughts/<agent-id>/qa/<topic>-screenshots/<filename>.png
68
+ misc/<agent-id>/<task-id>-<description>.ext
69
+ ```
70
+
71
+ ## Attaching Artifacts
72
+
73
+ - **PR body:** Embed `![caption](live.agent-fs.dev/file/~/...)` image URLs as markdown.
74
+ - **Slack messages:** Link to agent-fs URLs (they're public, no auth required).
75
+ - **`store-progress`:** Use the `attachments` field with `kind: "agent-fs"` and the path.
76
+ - **Linear comments:** Paste the live.agent-fs.dev URL in the comment body.
77
+
78
+ ## What NOT to Store in Artifacts
79
+
80
+ - Secrets, API keys, OAuth tokens
81
+ - Raw customer data without approval
82
+ - Oversized files without approval (check file size before uploading)
83
+ - Ephemeral progress notes (put those in `store-progress.progress` instead)
84
+
85
+ ## Trade-offs
86
+
87
+ **agent-fs vs shared filesystem:** agent-fs is persistent, versioned, and searchable across sessions. The shared filesystem is faster for same-session handoffs between agents but doesn't survive container restarts. Use agent-fs for anything that needs to outlive the current session or be reviewed by humans.
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "skill",
3
+ "name": "browser-use-cloud",
4
+ "displayName": "Browser Use Cloud",
5
+ "slug": "browser-use-cloud",
6
+ "title": "Browser Use Cloud",
7
+ "description": "Run browser automation tasks with bounded polling and explicit artifacts.",
8
+ "version": "1.0.0",
9
+ "category": "skills",
10
+ "placeholders": ["BROWSER_USE_API_KEY"],
11
+ "runAllSeedersCandidate": false,
12
+ "tags": ["browser", "automation", "qa"]
13
+ }
@@ -0,0 +1,155 @@
1
+ # Browser Use Cloud (universal IP-block / web-UI workaround)
2
+
3
+ The swarm container runs on a datacenter IP. A lot of public sites — YouTube,
4
+ Cloudflare-protected pages, login walls — block that IP outright. When direct
5
+ HTTP (curl, WebFetch, yt-dlp, `youtube-transcript-api`) returns a bot
6
+ challenge or a JS-only shell, the cleanest fallback is **Browser Use Cloud**
7
+ — a hosted real-browser service that runs an LLM-driven agent inside Chrome.
8
+ It loads pages, clicks things, scrolls, reads the DOM, and returns the
9
+ extracted content as text. Different IP, full JS, real browser fingerprint.
10
+
11
+ ## When to use this vs. alternatives
12
+
13
+ | Situation | Use this? |
14
+ |---|---|
15
+ | YouTube transcript / metadata | **Yes.** YouTube blocks the swarm IP for `yt-dlp`, `youtube-transcript-api`, and the free transcript sites (Cloudflare). Browser Use is the only path without cookies. |
16
+ | Cloudflare-protected site (`youtubetranscript.com`, `youtubetotranscript.com`, similar) | **Yes.** WebFetch returns the JS challenge HTML. Browser Use solves the challenge automatically. |
17
+ | Login-walled content the user authorizes you to access | **Yes** — pass the credentials in the task prompt; Browser Use can fill forms. |
18
+ | Plain server-rendered HTML | **No.** Use `curl` / WebFetch — Browser Use is overkill and ~$0.05+/task. |
19
+ | Site has a public API or RSS | **No.** Always prefer the API. |
20
+ | `steipete/summarize` on a non-IP-blocked URL | **No.** Use summarize directly — it's free and faster. |
21
+
22
+ `steipete/summarize` is installed in the swarm (`npm i -g @steipete/summarize`)
23
+ and works fine for non-IP-blocked URLs and local files. It fails on YouTube
24
+ from the swarm because every YouTube-extraction path it uses (`web` caption
25
+ fetch, `yt-dlp` audio download, `apify` if no token) hits the same datacenter
26
+ IP wall.
27
+
28
+ ## Auth & base URL
29
+
30
+ - **Base URL:** `https://api.browser-use.com/api/v2`
31
+ - **Auth header:** `X-Browser-Use-API-Key: <key>` (NOT `Authorization: Bearer`
32
+ — that returns 404 on all endpoints, easy time-sink)
33
+ - **Stored in swarm config:** key `BROWSER_USE_API_KEY` (global, secret).
34
+ Fetch via `get-config key=BROWSER_USE_API_KEY includeSecrets=true`.
35
+
36
+ ## Core endpoints
37
+
38
+ | Endpoint | Purpose |
39
+ |---|---|
40
+ | `POST /api/v2/tasks` | Create a task. Body: `{"task": "<natural-language instructions>"}`. Returns `{id, sessionId}`. |
41
+ | `GET /api/v2/tasks/{id}` | Poll status + output. Fields: `status` (`started` → `finished` / `failed`), `steps[]`, `output` (the agent's final answer as a string). |
42
+ | `GET /api/v2/tasks` | List recent tasks (useful to inspect prior runs / patterns). |
43
+
44
+ The v1 endpoints (`/api/v1/...`) return `{"detail":"Not Found"}` — **always
45
+ use v2**.
46
+
47
+ ## Quickstart — YouTube transcript
48
+
49
+ ```bash
50
+ K=$(swarm-get-config BROWSER_USE_API_KEY) # or pull via get-config MCP tool
51
+
52
+ # 1. Create the task
53
+ TASK=$(curl -s -X POST "https://api.browser-use.com/api/v2/tasks" \
54
+ -H "X-Browser-Use-API-Key: $K" \
55
+ -H "Content-Type: application/json" \
56
+ -d '{"task":"Open https://www.youtube.com/watch?v=VIDEO_ID . Dismiss any cookie/consent dialog. Note the video title and channel name. Open the transcript: click \"...more\" in the description, then click \"Show transcript\" (or use the three-dot menu under the video and choose \"Show transcript\"). The transcript panel will appear on the right. Scroll the transcript panel fully to the bottom so every line loads. Then extract and output the COMPLETE transcript text verbatim, all lines, in order. Also output the video title and channel at the top."}')
57
+
58
+ TASK_ID=$(echo "$TASK" | jq -r .id)
59
+ echo "task: $TASK_ID"
60
+
61
+ # 2. Poll until finished (typical: 2-4 minutes for a ~15min video)
62
+ while true; do
63
+ R=$(curl -s "https://api.browser-use.com/api/v2/tasks/$TASK_ID" \
64
+ -H "X-Browser-Use-API-Key: $K")
65
+ S=$(echo "$R" | jq -r .status)
66
+ echo "$(date +%H:%M:%S) status=$S steps=$(echo "$R" | jq '.steps | length')"
67
+ [ "$S" = "finished" ] || [ "$S" = "failed" ] && break
68
+ sleep 30
69
+ done
70
+
71
+ # 3. Extract output — note: \n in the JSON is the literal two chars "\\n",
72
+ # not a newline. Unescape before writing to disk.
73
+ echo "$R" | jq -r '.output' | python3 -c "import sys; sys.stdout.write(sys.stdin.read().replace('\\\\n','\n'))" \
74
+ > /workspace/personal/tmp/transcript.md
75
+ ```
76
+
77
+ **Polling cadence:** 30s is plenty. A 10-15min YouTube video transcript run
78
+ takes ~2-4 min and ~15-20 agent steps. Don't busy-poll faster — every poll
79
+ costs a request and Browser Use rate-limits aggressively.
80
+
81
+ ## Writing good task prompts
82
+
83
+ The Browser Use agent is an LLM driving a browser — it follows instructions
84
+ literally and sometimes loops if the page changes mid-task. Give it:
85
+
86
+ 1. **Exact start URL** — full `https://...` link, not "search for X".
87
+ 2. **Pre-emptive dismissals** — "Dismiss any cookie/consent dialog" prevents
88
+ it from getting stuck on EU cookie banners.
89
+ 3. **A concrete click path** with fallbacks — `"click 'Show transcript' (or
90
+ use the three-dot menu under the video and choose 'Show transcript')"` —
91
+ sites move things around; give it two ways.
92
+ 4. **Explicit scroll instructions** for lazy-loaded content — `"scroll the
93
+ transcript panel fully to the bottom so every line loads"`. Without this
94
+ you get the first ~50 lines and silent truncation.
95
+ 5. **The exact output format** — `"output the COMPLETE transcript text
96
+ verbatim, all lines, in order"`. If you want JSON, say "respond with valid
97
+ JSON only, no prose".
98
+ 6. **Don't ask it to summarize** unless that's the goal. You want the raw
99
+ content; do the LLM work yourself in a separate step with full control of
100
+ the model.
101
+
102
+ ## Output shape gotchas
103
+
104
+ - `output` is a **single string**. Multi-line content has `\n` *escaped as
105
+ two characters* inside that string (because the API returns JSON). When you
106
+ pipe to a file, unescape with `replace('\\n', '\n')` or `printf '%b'` —
107
+ otherwise the file looks like one giant line of `\n[0:01]...\n[0:03]...`.
108
+ - `steps` is an array of `{action, nextGoal, ...}` — useful for debugging a
109
+ failed run, ignore on success.
110
+ - If `status: failed`, look at the last step's `error` field. Common cause:
111
+ the agent couldn't find the click target (page changed, A/B test, region
112
+ variation) — refine the click path and re-run.
113
+
114
+ ## Cost & limits
115
+
116
+ - **Paid per task.** Roughly $0.05-$0.15 per task in practice; longer tasks
117
+ with more steps cost more. Don't loop this; use it once you've actually
118
+ hit a block.
119
+ - **Rate-limited.** Don't fire >1 task per ~10s.
120
+ - **No streaming.** You always poll for the final `output`.
121
+
122
+ ## Worked example (the one that prompted this skill)
123
+
124
+ Task: get the transcript for `https://www.youtube.com/watch?v=t-G67yKAHBQ`.
125
+
126
+ What failed first:
127
+ - `yt-dlp` (all `--extractor-args "youtube:player_client=..."` variants: `tv`,
128
+ `ios`, `web_safari`, `mweb`, `android`): all returned "Sign in to confirm
129
+ you're not a bot".
130
+ - `youtube-transcript-api` (Python lib): `RequestBlocked` — same IP issue.
131
+ - `summarize "<url>" --extract --youtube web|auto|yt-dlp`: failed too — its
132
+ `web` mode hits YouTube's HTML directly (gets the JS shell footer), and
133
+ `yt-dlp` mode chains the same blocked tool.
134
+ - `curl` + `WebFetch` against `youtubetranscript.com` /
135
+ `youtubetotranscript.com` / `tactiq`: Cloudflare interstitial / App Check
136
+ rejection / 403.
137
+
138
+ What worked:
139
+ - One Browser Use Cloud task with the prompt above. ~17 steps, ~3 min,
140
+ returned a clean 17.7 KB timestamped transcript ("How to Build a
141
+ Self-Improving Company with AI", YC Root Access).
142
+
143
+ ## Tips
144
+
145
+ - **Always save the `output` to a file under `/workspace/...`** before
146
+ uploading to Slack — `slack-upload-file` will not read `/tmp/`. Use
147
+ `/workspace/personal/tmp/` or `/workspace/shared/misc/<your-agent-id>/`.
148
+ - **For long videos** (>1h), warn the user this takes ~10 min and costs more
149
+ per run. Consider chunking by timestamp range if you only need part of it.
150
+ - **For non-YouTube sites**, the same recipe applies — just change the URL
151
+ and the click path. Browser Use is the swarm's general "I need a real
152
+ browser" tool.
153
+ - **Don't store the API key in scripts you commit.** Pull it fresh from
154
+ `get-config` each run.
155
+
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "skill",
3
+ "name": "desloppify",
4
+ "displayName": "Code Health Scan",
5
+ "slug": "desloppify",
6
+ "title": "Code Health Scan",
7
+ "description": "Run a multi-language code health scan and publish prioritized findings.",
8
+ "version": "1.0.0",
9
+ "category": "skills",
10
+ "placeholders": ["REPO_URL"],
11
+ "runAllSeedersCandidate": false,
12
+ "tags": ["code-quality", "audit", "reporting"]
13
+ }
@@ -0,0 +1,201 @@
1
+ # desloppify — Code-health scan workflow (swarm edition)
2
+
3
+ `desloppify` is a multi-language codebase health scanner (peteromallet/desloppify). This skill is the swarm-adapted SKILL.md: it codifies the **install recipe with the tree-sitter pin**, the surface-only **scan → status → next → triage** workflow we run for repos like `agent-swarm`, and the **publish-to-agent-fs** step so humans (Eze) can see the findings.
4
+
5
+ > **Default mode for swarm workers: surface-only.** Run Phase 1 + early Phase 2, publish a memo to agent-fs, stop. Do **not** run Phase 3 (queue-grinding refactors) unless the task explicitly asks.
6
+
7
+ ## When to use this skill
8
+
9
+ - A task asks you to run desloppify, do a code-health scan, get a health score, or surface debt themes on a repo.
10
+ - A task references `peteromallet/desloppify` or the upstream `docs/SKILL.md`.
11
+ - You need to triage tech debt on a TS / Python / multi-lang repo (agent-swarm, landing, agent-swarm-landing, desplega-ai, etc.).
12
+
13
+ If the task is "fix this one bug" or "rename X" → not this skill. Desloppify is for batch debt-surfacing, not point fixes.
14
+
15
+ ## Step 1 — Detect
16
+
17
+ ```bash
18
+ command -v desloppify >/dev/null 2>&1 && desloppify --version || echo "NOT INSTALLED"
19
+ ```
20
+
21
+ If `--version` prints cleanly, **also verify the tree-sitter pin** (a busted pin is worse than no install — the scan crashes mid-run):
22
+
23
+ ```bash
24
+ pipx runpip desloppify show tree-sitter-language-pack | grep Version
25
+ # Expect: Version: 1.6.2 (or anything < 1.8)
26
+ # If 1.8.x → see Step 3 (re-pin).
27
+ ```
28
+
29
+ ## Step 2 — Install (first run only)
30
+
31
+ Use `pipx`. This is the working recipe verified in PR #463 (Dockerfile.worker) and across multiple sprite smokes:
32
+
33
+ ```bash
34
+ pipx install 'desloppify[full]==0.9.15'
35
+ pipx inject --force desloppify 'tree-sitter-language-pack<1.8'
36
+
37
+ # Verify the pin landed:
38
+ pipx runpip desloppify show tree-sitter-language-pack | grep Version
39
+ # → must show 1.6.2 (or another <1.8)
40
+ ```
41
+
42
+ **Why the pin:** `tree-sitter-language-pack` 1.8.0 has an ABI mismatch with `tree-sitter` that crashes desloppify in `cohesion.py:52` → `extractors.py:90` with a `TypeError`. Upstream fix is [peteromallet/desloppify#605](https://github.com/peteromallet/desloppify/pull/605) — open, unmerged. Until that ships, we cap locally. (See task `616b4bba-43de-40b5-af3b-4e219b622218` for the full repro.)
43
+
44
+ If `pipx` isn't installed: `python3 -m pip install --user pipx && python3 -m pipx ensurepath` and start a new shell. If `pipx install` fails with build errors, jump to **Sprite escape hatch** below.
45
+
46
+ ## Step 3 — Re-pin (installed but broken)
47
+
48
+ If desloppify is on PATH but `tree-sitter-language-pack` is ≥ 1.8, you do **not** need a full reinstall:
49
+
50
+ ```bash
51
+ pipx inject --force desloppify 'tree-sitter-language-pack<1.8'
52
+ pipx runpip desloppify show tree-sitter-language-pack | grep Version
53
+ ```
54
+
55
+ The `--force` is required — without it pipx refuses to downgrade. Confirm the version drops, then scan.
56
+
57
+ ## Step 4 — Scan
58
+
59
+ Clone or `cd` into the target repo. For TS repos you usually don't need `node_modules` for the scan, but if a finding requires resolving imports, run `bun install` (or `npm install`) first.
60
+
61
+ **Monorepo note:** if the repo has multiple programs in sibling folders (e.g. `frontend/`, `backend/`), scan each separately — never scan the parent:
62
+
63
+ ```bash
64
+ desloppify --lang typescript scan --path ./frontend
65
+ desloppify --lang python scan --path ./backend
66
+ ```
67
+
68
+ Single-program repo:
69
+
70
+ ```bash
71
+ desloppify scan --path .
72
+ ```
73
+
74
+ Capture **exit code**, **duration**, and any warnings. A clean run is exit 0 with no traceback. Typical scan time: 30–90s for ~200K LOC.
75
+
76
+ ## Step 5 — Status + next + triage
77
+
78
+ ```bash
79
+ desloppify status # overall / strict / objective / verified scores + dimension health
80
+ desloppify next --count 15 # top-priority execution items (cluster-aware)
81
+ desloppify show --status open --count 50 # broader open backlog if you want to slice manually
82
+ ```
83
+
84
+ What to capture for the memo:
85
+
86
+ - **Status snapshot** — overall, strict, objective, verified scores. Note if subjective is at 0% (unassessed) — that means the strict number is a measurement artifact, not a reflection of reality.
87
+ - **Item counts by detector** — `desloppify status` shows this. Look for which detectors dominate (test_coverage, smells, duplication, security, orphaned, …).
88
+ - **Top 10–15 findings** from `next` — identifier · kind · severity · the one-line "why".
89
+ - **Your themes** (2–4 bullets) — what jumps out across the findings. E.g. "godfile in src/be/db.ts", "MCP-tool boilerplate dup cluster", "UI mega-pages with 20+ hooks". Use your own read, not `desloppify plan triage --complete`.
90
+ - **Phase 3 candidates** (2–3) — what you'd nominate to actually grind, if asked.
91
+ - **False positives** — anything desloppify flagged that's actually intentional in the swarm context (CLI entrypoints, deferred-registration MCP tools, dynamically loaded plugins, sanity fixtures, etc.).
92
+
93
+ **Do not** run `desloppify plan triage --complete`, `desloppify plan commit-log record`, or any Phase 3 commands in surface-only mode. Those mutate desloppify's local state and create commitments we don't intend to follow through on.
94
+
95
+ ## Step 6 — Publish findings to agent-fs
96
+
97
+ Save the scan memo so Eze and the swarm can see it. Use the shared org (`648a5f3c-35c8-4f11-8673-b89de52cd6bd`) and namespace the path under your own agent ID:
98
+
99
+ ```bash
100
+ DATE=$(date +%Y-%m-%d)
101
+ agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write \
102
+ thoughts/$AGENT_ID/research/$DATE-desloppify-<repo>-scan.md \
103
+ --content "$(cat <<'EOF'
104
+ # desloppify scan — <repo> @ <SHA>
105
+
106
+ ## Install + scan
107
+ - recipe: pipx install desloppify[full]==0.9.15 + tree-sitter pin
108
+ - exit: 0 / duration: 47s / 919 files / 210K LOC
109
+
110
+ ## Status
111
+ overall <N>/100 · strict <N>/100 · objective <N>/100 · verified <N>/100
112
+ dimension health: File X% · Code Y% · Dup Z% · Security S% · Test T%
113
+
114
+ ## Top 10–15 findings
115
+ - src/be/db.ts · godfile · T1 · 9441 LOC / complexity 457 / 48 issues
116
+ - ...
117
+
118
+ ## Themes
119
+ 1. ...
120
+ 2. ...
121
+
122
+ ## Phase 3 candidates
123
+ 1. Split src/be/db.ts by domain
124
+ 2. ...
125
+
126
+ ## False positives (don't act on)
127
+ - src/cli.tsx (CLI entrypoint)
128
+ - ...
129
+ EOF
130
+ )" -m "desloppify scan memo for <repo>"
131
+ ```
132
+
133
+ Also drop a private copy at `/workspace/personal/memory/desloppify-<repo>-scan-$DATE.md` so it's indexed for future memory-search.
134
+
135
+ ## Step 7 — Slack reply (if the task came from Slack)
136
+
137
+ Single concise reply on the originating thread (use `slack-reply` with your taskId). Include:
138
+
139
+ - ✅/❌ + scan exit + duration
140
+ - Status snapshot (overall / strict / objective / dimension health)
141
+ - Top themes (2–4 bullets)
142
+ - Phase 3 candidates (2–3)
143
+ - agent-fs path to the full memo
144
+ - False positives flagged
145
+
146
+ **Slack block limit is 3000 chars per message.** If your reply trips `invalid_blocks`, split into Part 1 / Part 2. Don't try to cram everything into one message — readability > brevity.
147
+
148
+ ## Sprite escape hatch — when local pipx is broken
149
+
150
+ If `pipx install` fails (system-package conflicts, missing build deps, weird Python ABI), or your worker container's desloppify install is corrupt and re-pinning doesn't help, **spin a fresh sprite** instead of fighting the local env. See the `sprite-cli` skill for full details.
151
+
152
+ ```bash
153
+ sprite create desloppify-scan
154
+ sprite exec -s desloppify-scan -- bash -c '
155
+ set -e
156
+ sudo apt-get update -qq
157
+ sudo apt-get install -y pipx python3-venv git
158
+ pipx ensurepath
159
+ export PATH="$HOME/.local/bin:$PATH"
160
+ pipx install "desloppify[full]==0.9.15"
161
+ pipx inject --force desloppify "tree-sitter-language-pack<1.8"
162
+ pipx runpip desloppify show tree-sitter-language-pack | grep Version
163
+ git clone --depth=1 https://github.com/desplega-ai/<repo>.git /tmp/repo
164
+ cd /tmp/repo
165
+ desloppify scan --path .
166
+ desloppify status
167
+ desloppify next --count 15
168
+ '
169
+ # … capture output, then:
170
+ sprite destroy desloppify-scan --force
171
+ ```
172
+
173
+ **Always destroy the sprite when done.** Sprites are paid resources.
174
+
175
+ ## Stop conditions
176
+
177
+ - If desloppify crashes despite a verified `<1.8` pin → STOP, paste the traceback, escalate. Don't bisect tree-sitter versions further.
178
+ - If the scan produces 0 findings → suspect a `--path` problem (you may be scanning a parent dir of a monorepo). Re-scan with explicit per-program paths.
179
+ - If the worker is asked to "fix the findings" → that's Phase 3. Confirm scope with the requester before doing it — Phase 3 is queue-grinding refactors and we historically do *not* default to it.
180
+
181
+ ## Quick reference (cheat sheet)
182
+
183
+ ```bash
184
+ # Detect + install (idempotent)
185
+ command -v desloppify || { pipx install 'desloppify[full]==0.9.15' && pipx inject --force desloppify 'tree-sitter-language-pack<1.8'; }
186
+ pipx runpip desloppify show tree-sitter-language-pack | grep Version # must be <1.8
187
+
188
+ # Surface workflow
189
+ desloppify scan --path .
190
+ desloppify status
191
+ desloppify next --count 15
192
+
193
+ # Publish + report
194
+ agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write thoughts/$AGENT_ID/research/$(date +%F)-desloppify-<repo>-scan.md --content "..." -m "scan memo"
195
+ # slack-reply on the originating thread
196
+ ```
197
+
198
+ ## Upstream reference
199
+
200
+ Full Phase 3 / review workflow / plan commands are in the upstream SKILL.md: <https://github.com/peteromallet/desloppify/blob/main/docs/SKILL.md>. Reach for it when you're explicitly asked to grind the queue (rare). Default swarm mode stops after the memo + Slack reply.
201
+
@@ -0,0 +1,13 @@
1
+ {
2
+ "kind": "skill",
3
+ "name": "exa-search",
4
+ "displayName": "Exa Search",
5
+ "slug": "exa-search",
6
+ "title": "Exa Search",
7
+ "description": "Use web search APIs for cited research without over-fetching irrelevant pages.",
8
+ "version": "1.0.0",
9
+ "category": "skills",
10
+ "placeholders": ["EXA_API_KEY"],
11
+ "runAllSeedersCandidate": false,
12
+ "tags": ["research", "search", "web"]
13
+ }