@desplega.ai/agent-swarm 1.85.0 → 1.86.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/openapi.json +1 -1
- package/package.json +8 -6
- package/src/be/db.ts +44 -0
- package/src/be/migrations/078_backfill_gpt_5_5_pricing.sql +15 -0
- package/src/be/modelsdev-cache.json +152028 -0
- package/src/be/modelsdev-cache.ts +46 -0
- package/src/be/seed-pricing.ts +7 -44
- package/src/cli.tsx +12 -2
- package/src/commands/codex-session-runner.ts +132 -0
- package/src/commands/credential-wait.ts +2 -2
- package/src/commands/provider-credentials.ts +10 -5
- package/src/commands/runner.ts +3 -3
- package/src/prompts/base-prompt.ts +49 -3
- package/src/providers/claude-adapter.ts +83 -2
- package/src/providers/claude-managed-models.ts +18 -2
- package/src/providers/codex-adapter.ts +417 -97
- package/src/providers/codex-models.ts +9 -2
- package/src/providers/index.ts +28 -19
- package/src/providers/pricing-sources.md +7 -4
- package/src/providers/swarm-events-shared.ts +14 -0
- package/src/slack/HEURISTICS.md +5 -1
- package/src/slack/handlers.test.ts +35 -0
- package/src/slack/handlers.ts +79 -2
- package/src/tests/base-prompt.test.ts +46 -8
- package/src/tests/claude-managed-adapter.test.ts +4 -4
- package/src/tests/codex-adapter-otel.test.ts +4 -4
- package/src/tests/codex-adapter.test.ts +20 -7
- package/src/tests/codex-swarm-events.test.ts +35 -0
- package/src/tests/context-window.test.ts +1 -0
- package/src/tests/credential-check.test.ts +48 -29
- package/src/tests/entrypoint-config-env-export.test.ts +81 -0
- package/src/tests/follow-up-redelivery-guard.test.ts +165 -0
- package/src/tests/migration-046-budgets.test.ts +6 -5
- package/src/tests/pricing-routes.test.ts +6 -5
- package/src/tests/provider-adapter.test.ts +10 -10
- package/src/tests/provider-command-format.test.ts +4 -4
- package/src/tests/session-costs-codex-recompute.test.ts +25 -0
- package/src/tools/send-task.ts +30 -9
- package/src/utils/context-window.ts +1 -0
- package/templates/schedules/daily-blocker-digest/config.json +13 -0
- package/templates/schedules/daily-blocker-digest/content.md +150 -0
- package/templates/schedules/daily-compounding-reflection/config.json +21 -0
- package/templates/schedules/daily-compounding-reflection/content.md +210 -0
- package/templates/schedules/daily-hn-briefing/config.json +13 -0
- package/templates/schedules/daily-hn-briefing/content.md +97 -0
- package/templates/schedules/daily-workflow-health-audit/config.json +13 -0
- package/templates/schedules/daily-workflow-health-audit/content.md +189 -0
- package/templates/schedules/gtm-weekly-review/config.json +13 -0
- package/templates/schedules/gtm-weekly-review/content.md +58 -0
- package/templates/schedules/weekly-dependabot-triage/config.json +13 -0
- package/templates/schedules/weekly-dependabot-triage/content.md +45 -0
- package/templates/schema.ts +26 -0
- package/templates/skills/agentmail-sending/config.json +13 -0
- package/templates/skills/agentmail-sending/content.md +48 -0
- package/templates/skills/artifacts/config.json +13 -0
- package/templates/skills/artifacts/content.md +87 -0
- package/templates/skills/browser-use-cloud/config.json +13 -0
- package/templates/skills/browser-use-cloud/content.md +155 -0
- package/templates/skills/desloppify/config.json +13 -0
- package/templates/skills/desloppify/content.md +201 -0
- package/templates/skills/exa-search/config.json +13 -0
- package/templates/skills/exa-search/content.md +106 -0
- package/templates/skills/jira-interaction/config.json +13 -0
- package/templates/skills/jira-interaction/content.md +252 -0
- package/templates/skills/kapso-whatsapp/config.json +13 -0
- package/templates/skills/kapso-whatsapp/content.md +369 -0
- package/templates/skills/kv-storage/config.json +13 -0
- package/templates/skills/kv-storage/content.md +111 -0
- package/templates/skills/linear-interaction/config.json +20 -0
- package/templates/skills/linear-interaction/content.md +230 -0
- package/templates/skills/pages/config.json +18 -0
- package/templates/skills/pages/content.md +85 -0
- package/templates/skills/profile-corruption-escalation/config.json +13 -0
- package/templates/skills/profile-corruption-escalation/content.md +105 -0
- package/templates/skills/scheduled-task-resilience/config.json +13 -0
- package/templates/skills/scheduled-task-resilience/content.md +95 -0
- package/templates/skills/sprite-cli/config.json +13 -0
- package/templates/skills/sprite-cli/content.md +133 -0
- package/templates/skills/turso-interaction/config.json +13 -0
- package/templates/skills/turso-interaction/content.md +192 -0
- package/templates/skills/workflow-iterate/config.json +18 -0
- package/templates/skills/workflow-iterate/content.md +399 -0
- package/templates/skills/workflow-structured-output/config.json +13 -0
- package/templates/skills/workflow-structured-output/content.md +101 -0
- package/templates/skills/x-api-interactions/config.json +13 -0
- package/templates/skills/x-api-interactions/content.md +109 -0
- package/templates/workflows/autopilot/config.json +13 -0
- package/templates/workflows/autopilot/content.md +58 -0
- package/templates/workflows/linear-drain-loop/config.json +21 -0
- package/templates/workflows/linear-drain-loop/content.md +72 -0
- package/templates/workflows/ralph-loop/config.json +13 -0
- package/templates/workflows/ralph-loop/content.md +75 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Weekly Dependency Triage
|
|
2
|
+
|
|
3
|
+
Review dependency update PRs, group safe patches, and flag risky upgrades.
|
|
4
|
+
|
|
5
|
+
## Schedule
|
|
6
|
+
|
|
7
|
+
```json
|
|
8
|
+
{
|
|
9
|
+
"cron": "40 3 * * 0",
|
|
10
|
+
"timezone": "UTC",
|
|
11
|
+
"agentRole": "lead",
|
|
12
|
+
"enabled": true
|
|
13
|
+
}
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
## Scheduled Task
|
|
17
|
+
|
|
18
|
+
This is the full task prompt the schedule runs on each fire — including the accumulated operational learnings baked into it. Adapt the swarm-specific references (channel IDs, agent names, repo paths) to your environment before enabling.
|
|
19
|
+
|
|
20
|
+
Triage dependabot PRs from https://github.com/desplega-ai/desplega.ai/pulls
|
|
21
|
+
|
|
22
|
+
## Instructions
|
|
23
|
+
|
|
24
|
+
1. **List all open dependabot PRs** in desplega-ai/desplega.ai using `gh pr list`
|
|
25
|
+
2. **Only paths we care about**: `/be` and `/new-fe`. Close all other dependabot PRs (ones that don't touch these paths).
|
|
26
|
+
3. **DO NOT touch non-dependabot PRs** — leave them as-is.
|
|
27
|
+
4. **Create two unified PRs** that merge all dependabot bumps into one PR each:
|
|
28
|
+
- One for `/be` changes — branch name format: `YYYY-MM-DD-dependabot-be` (use today's date)
|
|
29
|
+
- One for `/new-fe` changes — branch name format: `YYYY-MM-DD-dependabot-fe` (use today's date)
|
|
30
|
+
- Each unified PR should be based on latest `main` and include all the dependency bumps from the individual dependabot PRs for that path.
|
|
31
|
+
- After creating the unified PRs, close the individual dependabot PRs that were merged into them.
|
|
32
|
+
5. **Return the URLs** of the two final unified PRs.
|
|
33
|
+
6. If there are no open dependabot PRs, just report that and complete.
|
|
34
|
+
|
|
35
|
+
## Approach
|
|
36
|
+
|
|
37
|
+
- Clone the repo, checkout main, pull latest
|
|
38
|
+
- For each path (be, new-fe): create a branch, cherry-pick or merge the dependabot changes, push, create PR
|
|
39
|
+
- Close individual dependabot PRs after unifying
|
|
40
|
+
- Be careful: some dependabot PRs may have conflicts — handle gracefully
|
|
41
|
+
|
|
42
|
+
## Important
|
|
43
|
+
- The PR title should be descriptive, e.g. "chore(be): consolidate dependabot bumps YYYY-MM-DD"
|
|
44
|
+
- Add @tarasyarema as reviewer on both PRs
|
|
45
|
+
- Post the final PR URLs back to Slack
|
package/templates/schema.ts
CHANGED
|
@@ -35,3 +35,29 @@ export interface TemplateResponse {
|
|
|
35
35
|
heartbeatMd: string;
|
|
36
36
|
};
|
|
37
37
|
}
|
|
38
|
+
|
|
39
|
+
export type AgentAssetKind = "skill" | "schedule" | "workflow";
|
|
40
|
+
export type AgentAssetCategory = "skills" | "schedules" | "workflows";
|
|
41
|
+
|
|
42
|
+
export interface AgentAssetConfig {
|
|
43
|
+
kind: AgentAssetKind;
|
|
44
|
+
name: string;
|
|
45
|
+
displayName: string;
|
|
46
|
+
slug: string;
|
|
47
|
+
title: string;
|
|
48
|
+
description: string;
|
|
49
|
+
version: string;
|
|
50
|
+
category: AgentAssetCategory;
|
|
51
|
+
placeholders: string[];
|
|
52
|
+
runAllSeedersCandidate: boolean;
|
|
53
|
+
/** Marks an asset as an essential, recommended-for-every-swarm building block. */
|
|
54
|
+
must?: boolean;
|
|
55
|
+
tags: string[];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface AgentAssetResponse {
|
|
59
|
+
config: AgentAssetConfig;
|
|
60
|
+
body: string;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export const ASSET_CATEGORIES: AgentAssetCategory[] = ["skills", "schedules", "workflows"];
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "skill",
|
|
3
|
+
"name": "agentmail-sending",
|
|
4
|
+
"displayName": "AgentMail Sending",
|
|
5
|
+
"slug": "agentmail-sending",
|
|
6
|
+
"title": "AgentMail Sending",
|
|
7
|
+
"description": "Generic rules for sending email through an agent-accessible mailbox API.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "skills",
|
|
10
|
+
"placeholders": ["COMPANY_SIGNATURE"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["email", "communications"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# AgentMail Sending Rules
|
|
2
|
+
|
|
3
|
+
These rules are MANDATORY for all agents sending email via AgentMail. Violating them will result in blank emails reaching real people.
|
|
4
|
+
|
|
5
|
+
## Rule 1: TEXT ONLY — Never Pass `html` Parameter
|
|
6
|
+
|
|
7
|
+
**AgentMail has a critical bug (as of 2026-03-25):** When both `text` and `html` parameters are passed to `send_message` or `reply_to_message`, the HTML body content is silently dropped. The resulting email has an empty `<div dir="ltr"></div>`. Email clients (Gmail, etc.) prefer the HTML version over plain text, so recipients see a completely blank email.
|
|
8
|
+
|
|
9
|
+
**What to do:**
|
|
10
|
+
- ONLY pass the `text` parameter
|
|
11
|
+
- NEVER pass the `html` parameter
|
|
12
|
+
- This applies to BOTH `send_message` and `reply_to_message`
|
|
13
|
+
|
|
14
|
+
**Why this matters:** This bug caused real outbound prospect emails to arrive blank, burning contacts permanently. It is not a cosmetic issue — it's a data loss / reputation issue.
|
|
15
|
+
|
|
16
|
+
## Rule 2: Always BCC t@desplega.ai on Outbound Emails
|
|
17
|
+
|
|
18
|
+
All outbound emails to external recipients (anyone outside @agent-swarm.dev) MUST include `t@desplega.ai` as BCC. This gives the human founder visibility into what emails the swarm is sending.
|
|
19
|
+
|
|
20
|
+
**How:**
|
|
21
|
+
```
|
|
22
|
+
send_message({
|
|
23
|
+
inboxId: "lead@agent-swarm.dev",
|
|
24
|
+
to: ["recipient@example.com"],
|
|
25
|
+
bcc: ["t@desplega.ai"],
|
|
26
|
+
subject: "...",
|
|
27
|
+
text: "..."
|
|
28
|
+
})
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**Exception:** Internal emails between agent inboxes (@agent-swarm.dev) or to t@desplega.ai / e@desplega.ai directly do NOT need BCC.
|
|
32
|
+
|
|
33
|
+
## Rule 3: Always Include Signature
|
|
34
|
+
|
|
35
|
+
Use the `email-signature` skill to append the proper plain text signature to every outgoing email. See that skill for the template.
|
|
36
|
+
|
|
37
|
+
## Rule 4: Human Approval Before Sending to Prospects
|
|
38
|
+
|
|
39
|
+
Never send outreach/cold emails to external prospects without explicit human approval. Draft the emails, present them for review, and only send after receiving "approved" or equivalent confirmation.
|
|
40
|
+
|
|
41
|
+
## Summary Checklist
|
|
42
|
+
|
|
43
|
+
Before every `send_message` or `reply_to_message` call:
|
|
44
|
+
- [ ] Only `text` param, NO `html` param
|
|
45
|
+
- [ ] BCC `t@desplega.ai` if recipient is external
|
|
46
|
+
- [ ] Plain text signature appended
|
|
47
|
+
- [ ] Human-approved if it's outreach/cold email
|
|
48
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "skill",
|
|
3
|
+
"name": "artifacts",
|
|
4
|
+
"displayName": "Artifacts",
|
|
5
|
+
"slug": "artifacts",
|
|
6
|
+
"title": "Artifacts",
|
|
7
|
+
"description": "Store logs, screenshots, exports, and generated files as durable task artifacts.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "skills",
|
|
10
|
+
"placeholders": [],
|
|
11
|
+
"runAllSeedersCandidate": true,
|
|
12
|
+
"tags": ["artifacts", "evidence", "qa"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# Artifacts
|
|
2
|
+
|
|
3
|
+
Artifacts are files, screenshots, recordings, logs, and reports that outlive a session and can be referenced by other agents, humans, or future tasks. The agent-swarm supports two artifact stores: **agent-fs** (structured, searchable, shareable) and the **shared workspace filesystem** (`/workspace/shared/`).
|
|
4
|
+
|
|
5
|
+
## When to Create Artifacts
|
|
6
|
+
|
|
7
|
+
- Your task produces a deliverable humans should review (report, screenshot, recording, data export).
|
|
8
|
+
- Another agent or future session needs to pick up where you left off.
|
|
9
|
+
- You want to attach evidence to a PR, Linear ticket, or Slack message.
|
|
10
|
+
- The output is too large for `store-progress.output`.
|
|
11
|
+
|
|
12
|
+
## Agent-fs (Preferred for Human-Shareable Artifacts)
|
|
13
|
+
|
|
14
|
+
agent-fs is a persistent, searchable file system shared across the swarm.
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
# Write to personal drive
|
|
18
|
+
agent-fs write thoughts/research/2026-05-28-topic.md --content "..." -m "description"
|
|
19
|
+
|
|
20
|
+
# Write to shared drive (humans + other agents can see)
|
|
21
|
+
agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write \
|
|
22
|
+
thoughts/c06cca59-187e-4aa6-8472-8ac6caf177af/research/2026-05-28-topic.md \
|
|
23
|
+
--content "..." -m "research findings"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Verify the write succeeded (agent-fs writes can fail silently with empty payloads):
|
|
27
|
+
```bash
|
|
28
|
+
agent-fs stat <path> --json | jq '.size'
|
|
29
|
+
# If size < 200 bytes on a non-trivial artifact, the write FAILED — re-do it.
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Sharing agent-fs files with humans
|
|
33
|
+
|
|
34
|
+
Build the URL from the live host env var:
|
|
35
|
+
```
|
|
36
|
+
${AGENT_FS_LIVE_URL}/file/~/<org_id>/<drive_id>/<file_path>
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
`AGENT_FS_LIVE_URL` defaults to `https://live.agent-fs.dev`. Get `org_id` and `drive_id` from `agent-fs stat <path> --json`.
|
|
40
|
+
|
|
41
|
+
## Shared Filesystem
|
|
42
|
+
|
|
43
|
+
For non-text artifacts or files other agents need to access during the same session:
|
|
44
|
+
|
|
45
|
+
- `/workspace/shared/downloads/<agent-id>/` — downloaded files
|
|
46
|
+
- `/workspace/shared/misc/<agent-id>/` — other shared files
|
|
47
|
+
|
|
48
|
+
## Binary Artifacts (PNG, MP4)
|
|
49
|
+
|
|
50
|
+
**agent-fs write is text-only and mangles binaries** (inserts UTF-8 replacement characters). For PNG/MP4 uploads use the binary upload path:
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
# Use binary-safe upload, NOT agent-fs write
|
|
54
|
+
# For QA screenshots: use qa-use's built-in screenshot capture
|
|
55
|
+
# For custom screenshots: Playwright, ffmpeg, or system screenshot tools
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
For QA screenshots attached to PRs, see the QA evidence convention in TOOLS.md.
|
|
59
|
+
|
|
60
|
+
## Naming Conventions
|
|
61
|
+
|
|
62
|
+
Name paths predictably by task, date, and artifact type:
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
thoughts/<agent-id>/research/YYYY-MM-DD-<topic>.md
|
|
66
|
+
thoughts/<agent-id>/plans/YYYY-MM-DD-<topic>.md
|
|
67
|
+
thoughts/<agent-id>/qa/<topic>-screenshots/<filename>.png
|
|
68
|
+
misc/<agent-id>/<task-id>-<description>.ext
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Attaching Artifacts
|
|
72
|
+
|
|
73
|
+
- **PR body:** Embed `` image URLs as markdown.
|
|
74
|
+
- **Slack messages:** Link to agent-fs URLs (they're public, no auth required).
|
|
75
|
+
- **`store-progress`:** Use the `attachments` field with `kind: "agent-fs"` and the path.
|
|
76
|
+
- **Linear comments:** Paste the live.agent-fs.dev URL in the comment body.
|
|
77
|
+
|
|
78
|
+
## What NOT to Store in Artifacts
|
|
79
|
+
|
|
80
|
+
- Secrets, API keys, OAuth tokens
|
|
81
|
+
- Raw customer data without approval
|
|
82
|
+
- Oversized files without approval (check file size before uploading)
|
|
83
|
+
- Ephemeral progress notes (put those in `store-progress.progress` instead)
|
|
84
|
+
|
|
85
|
+
## Trade-offs
|
|
86
|
+
|
|
87
|
+
**agent-fs vs shared filesystem:** agent-fs is persistent, versioned, and searchable across sessions. The shared filesystem is faster for same-session handoffs between agents but doesn't survive container restarts. Use agent-fs for anything that needs to outlive the current session or be reviewed by humans.
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "skill",
|
|
3
|
+
"name": "browser-use-cloud",
|
|
4
|
+
"displayName": "Browser Use Cloud",
|
|
5
|
+
"slug": "browser-use-cloud",
|
|
6
|
+
"title": "Browser Use Cloud",
|
|
7
|
+
"description": "Run browser automation tasks with bounded polling and explicit artifacts.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "skills",
|
|
10
|
+
"placeholders": ["BROWSER_USE_API_KEY"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["browser", "automation", "qa"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
# Browser Use Cloud (universal IP-block / web-UI workaround)
|
|
2
|
+
|
|
3
|
+
The swarm container runs on a datacenter IP. A lot of public sites — YouTube,
|
|
4
|
+
Cloudflare-protected pages, login walls — block that IP outright. When direct
|
|
5
|
+
HTTP (curl, WebFetch, yt-dlp, `youtube-transcript-api`) returns a bot
|
|
6
|
+
challenge or a JS-only shell, the cleanest fallback is **Browser Use Cloud**
|
|
7
|
+
— a hosted real-browser service that runs an LLM-driven agent inside Chrome.
|
|
8
|
+
It loads pages, clicks things, scrolls, reads the DOM, and returns the
|
|
9
|
+
extracted content as text. Different IP, full JS, real browser fingerprint.
|
|
10
|
+
|
|
11
|
+
## When to use this vs. alternatives
|
|
12
|
+
|
|
13
|
+
| Situation | Use this? |
|
|
14
|
+
|---|---|
|
|
15
|
+
| YouTube transcript / metadata | **Yes.** YouTube blocks the swarm IP for `yt-dlp`, `youtube-transcript-api`, and the free transcript sites (Cloudflare). Browser Use is the only path without cookies. |
|
|
16
|
+
| Cloudflare-protected site (`youtubetranscript.com`, `youtubetotranscript.com`, similar) | **Yes.** WebFetch returns the JS challenge HTML. Browser Use solves the challenge automatically. |
|
|
17
|
+
| Login-walled content the user authorizes you to access | **Yes** — pass the credentials in the task prompt; Browser Use can fill forms. |
|
|
18
|
+
| Plain server-rendered HTML | **No.** Use `curl` / WebFetch — Browser Use is overkill and ~$0.05+/task. |
|
|
19
|
+
| Site has a public API or RSS | **No.** Always prefer the API. |
|
|
20
|
+
| `steipete/summarize` on a non-IP-blocked URL | **No.** Use summarize directly — it's free and faster. |
|
|
21
|
+
|
|
22
|
+
`steipete/summarize` is installed in the swarm (`npm i -g @steipete/summarize`)
|
|
23
|
+
and works fine for non-IP-blocked URLs and local files. It fails on YouTube
|
|
24
|
+
from the swarm because every YouTube-extraction path it uses (`web` caption
|
|
25
|
+
fetch, `yt-dlp` audio download, `apify` if no token) hits the same datacenter
|
|
26
|
+
IP wall.
|
|
27
|
+
|
|
28
|
+
## Auth & base URL
|
|
29
|
+
|
|
30
|
+
- **Base URL:** `https://api.browser-use.com/api/v2`
|
|
31
|
+
- **Auth header:** `X-Browser-Use-API-Key: <key>` (NOT `Authorization: Bearer`
|
|
32
|
+
— that returns 404 on all endpoints, easy time-sink)
|
|
33
|
+
- **Stored in swarm config:** key `BROWSER_USE_API_KEY` (global, secret).
|
|
34
|
+
Fetch via `get-config key=BROWSER_USE_API_KEY includeSecrets=true`.
|
|
35
|
+
|
|
36
|
+
## Core endpoints
|
|
37
|
+
|
|
38
|
+
| Endpoint | Purpose |
|
|
39
|
+
|---|---|
|
|
40
|
+
| `POST /api/v2/tasks` | Create a task. Body: `{"task": "<natural-language instructions>"}`. Returns `{id, sessionId}`. |
|
|
41
|
+
| `GET /api/v2/tasks/{id}` | Poll status + output. Fields: `status` (`started` → `finished` / `failed`), `steps[]`, `output` (the agent's final answer as a string). |
|
|
42
|
+
| `GET /api/v2/tasks` | List recent tasks (useful to inspect prior runs / patterns). |
|
|
43
|
+
|
|
44
|
+
The v1 endpoints (`/api/v1/...`) return `{"detail":"Not Found"}` — **always
|
|
45
|
+
use v2**.
|
|
46
|
+
|
|
47
|
+
## Quickstart — YouTube transcript
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
K=$(swarm-get-config BROWSER_USE_API_KEY) # or pull via get-config MCP tool
|
|
51
|
+
|
|
52
|
+
# 1. Create the task
|
|
53
|
+
TASK=$(curl -s -X POST "https://api.browser-use.com/api/v2/tasks" \
|
|
54
|
+
-H "X-Browser-Use-API-Key: $K" \
|
|
55
|
+
-H "Content-Type: application/json" \
|
|
56
|
+
-d '{"task":"Open https://www.youtube.com/watch?v=VIDEO_ID . Dismiss any cookie/consent dialog. Note the video title and channel name. Open the transcript: click \"...more\" in the description, then click \"Show transcript\" (or use the three-dot menu under the video and choose \"Show transcript\"). The transcript panel will appear on the right. Scroll the transcript panel fully to the bottom so every line loads. Then extract and output the COMPLETE transcript text verbatim, all lines, in order. Also output the video title and channel at the top."}')
|
|
57
|
+
|
|
58
|
+
TASK_ID=$(echo "$TASK" | jq -r .id)
|
|
59
|
+
echo "task: $TASK_ID"
|
|
60
|
+
|
|
61
|
+
# 2. Poll until finished (typical: 2-4 minutes for a ~15min video)
|
|
62
|
+
while true; do
|
|
63
|
+
R=$(curl -s "https://api.browser-use.com/api/v2/tasks/$TASK_ID" \
|
|
64
|
+
-H "X-Browser-Use-API-Key: $K")
|
|
65
|
+
S=$(echo "$R" | jq -r .status)
|
|
66
|
+
echo "$(date +%H:%M:%S) status=$S steps=$(echo "$R" | jq '.steps | length')"
|
|
67
|
+
[ "$S" = "finished" ] || [ "$S" = "failed" ] && break
|
|
68
|
+
sleep 30
|
|
69
|
+
done
|
|
70
|
+
|
|
71
|
+
# 3. Extract output — note: \n in the JSON is the literal two chars "\\n",
|
|
72
|
+
# not a newline. Unescape before writing to disk.
|
|
73
|
+
echo "$R" | jq -r '.output' | python3 -c "import sys; sys.stdout.write(sys.stdin.read().replace('\\\\n','\n'))" \
|
|
74
|
+
> /workspace/personal/tmp/transcript.md
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
**Polling cadence:** 30s is plenty. A 10-15min YouTube video transcript run
|
|
78
|
+
takes ~2-4 min and ~15-20 agent steps. Don't busy-poll faster — every poll
|
|
79
|
+
costs a request and Browser Use rate-limits aggressively.
|
|
80
|
+
|
|
81
|
+
## Writing good task prompts
|
|
82
|
+
|
|
83
|
+
The Browser Use agent is an LLM driving a browser — it follows instructions
|
|
84
|
+
literally and sometimes loops if the page changes mid-task. Give it:
|
|
85
|
+
|
|
86
|
+
1. **Exact start URL** — full `https://...` link, not "search for X".
|
|
87
|
+
2. **Pre-emptive dismissals** — "Dismiss any cookie/consent dialog" prevents
|
|
88
|
+
it from getting stuck on EU cookie banners.
|
|
89
|
+
3. **A concrete click path** with fallbacks — `"click 'Show transcript' (or
|
|
90
|
+
use the three-dot menu under the video and choose 'Show transcript')"` —
|
|
91
|
+
sites move things around; give it two ways.
|
|
92
|
+
4. **Explicit scroll instructions** for lazy-loaded content — `"scroll the
|
|
93
|
+
transcript panel fully to the bottom so every line loads"`. Without this
|
|
94
|
+
you get the first ~50 lines and silent truncation.
|
|
95
|
+
5. **The exact output format** — `"output the COMPLETE transcript text
|
|
96
|
+
verbatim, all lines, in order"`. If you want JSON, say "respond with valid
|
|
97
|
+
JSON only, no prose".
|
|
98
|
+
6. **Don't ask it to summarize** unless that's the goal. You want the raw
|
|
99
|
+
content; do the LLM work yourself in a separate step with full control of
|
|
100
|
+
the model.
|
|
101
|
+
|
|
102
|
+
## Output shape gotchas
|
|
103
|
+
|
|
104
|
+
- `output` is a **single string**. Multi-line content has `\n` *escaped as
|
|
105
|
+
two characters* inside that string (because the API returns JSON). When you
|
|
106
|
+
pipe to a file, unescape with `replace('\\n', '\n')` or `printf '%b'` —
|
|
107
|
+
otherwise the file looks like one giant line of `\n[0:01]...\n[0:03]...`.
|
|
108
|
+
- `steps` is an array of `{action, nextGoal, ...}` — useful for debugging a
|
|
109
|
+
failed run, ignore on success.
|
|
110
|
+
- If `status: failed`, look at the last step's `error` field. Common cause:
|
|
111
|
+
the agent couldn't find the click target (page changed, A/B test, region
|
|
112
|
+
variation) — refine the click path and re-run.
|
|
113
|
+
|
|
114
|
+
## Cost & limits
|
|
115
|
+
|
|
116
|
+
- **Paid per task.** Roughly $0.05-$0.15 per task in practice; longer tasks
|
|
117
|
+
with more steps cost more. Don't loop this; use it once you've actually
|
|
118
|
+
hit a block.
|
|
119
|
+
- **Rate-limited.** Don't fire >1 task per ~10s.
|
|
120
|
+
- **No streaming.** You always poll for the final `output`.
|
|
121
|
+
|
|
122
|
+
## Worked example (the one that prompted this skill)
|
|
123
|
+
|
|
124
|
+
Task: get the transcript for `https://www.youtube.com/watch?v=t-G67yKAHBQ`.
|
|
125
|
+
|
|
126
|
+
What failed first:
|
|
127
|
+
- `yt-dlp` (all `--extractor-args "youtube:player_client=..."` variants: `tv`,
|
|
128
|
+
`ios`, `web_safari`, `mweb`, `android`): all returned "Sign in to confirm
|
|
129
|
+
you're not a bot".
|
|
130
|
+
- `youtube-transcript-api` (Python lib): `RequestBlocked` — same IP issue.
|
|
131
|
+
- `summarize "<url>" --extract --youtube web|auto|yt-dlp`: failed too — its
|
|
132
|
+
`web` mode hits YouTube's HTML directly (gets the JS shell footer), and
|
|
133
|
+
`yt-dlp` mode chains the same blocked tool.
|
|
134
|
+
- `curl` + `WebFetch` against `youtubetranscript.com` /
|
|
135
|
+
`youtubetotranscript.com` / `tactiq`: Cloudflare interstitial / App Check
|
|
136
|
+
rejection / 403.
|
|
137
|
+
|
|
138
|
+
What worked:
|
|
139
|
+
- One Browser Use Cloud task with the prompt above. ~17 steps, ~3 min,
|
|
140
|
+
returned a clean 17.7 KB timestamped transcript ("How to Build a
|
|
141
|
+
Self-Improving Company with AI", YC Root Access).
|
|
142
|
+
|
|
143
|
+
## Tips
|
|
144
|
+
|
|
145
|
+
- **Always save the `output` to a file under `/workspace/...`** before
|
|
146
|
+
uploading to Slack — `slack-upload-file` will not read `/tmp/`. Use
|
|
147
|
+
`/workspace/personal/tmp/` or `/workspace/shared/misc/<your-agent-id>/`.
|
|
148
|
+
- **For long videos** (>1h), warn the user this takes ~10 min and costs more
|
|
149
|
+
per run. Consider chunking by timestamp range if you only need part of it.
|
|
150
|
+
- **For non-YouTube sites**, the same recipe applies — just change the URL
|
|
151
|
+
and the click path. Browser Use is the swarm's general "I need a real
|
|
152
|
+
browser" tool.
|
|
153
|
+
- **Don't store the API key in scripts you commit.** Pull it fresh from
|
|
154
|
+
`get-config` each run.
|
|
155
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "skill",
|
|
3
|
+
"name": "desloppify",
|
|
4
|
+
"displayName": "Code Health Scan",
|
|
5
|
+
"slug": "desloppify",
|
|
6
|
+
"title": "Code Health Scan",
|
|
7
|
+
"description": "Run a multi-language code health scan and publish prioritized findings.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "skills",
|
|
10
|
+
"placeholders": ["REPO_URL"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["code-quality", "audit", "reporting"]
|
|
13
|
+
}
|
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
# desloppify — Code-health scan workflow (swarm edition)
|
|
2
|
+
|
|
3
|
+
`desloppify` is a multi-language codebase health scanner (peteromallet/desloppify). This skill is the swarm-adapted SKILL.md: it codifies the **install recipe with the tree-sitter pin**, the surface-only **scan → status → next → triage** workflow we run for repos like `agent-swarm`, and the **publish-to-agent-fs** step so humans (Eze) can see the findings.
|
|
4
|
+
|
|
5
|
+
> **Default mode for swarm workers: surface-only.** Run Phase 1 + early Phase 2, publish a memo to agent-fs, stop. Do **not** run Phase 3 (queue-grinding refactors) unless the task explicitly asks.
|
|
6
|
+
|
|
7
|
+
## When to use this skill
|
|
8
|
+
|
|
9
|
+
- A task asks you to run desloppify, do a code-health scan, get a health score, or surface debt themes on a repo.
|
|
10
|
+
- A task references `peteromallet/desloppify` or the upstream `docs/SKILL.md`.
|
|
11
|
+
- You need to triage tech debt on a TS / Python / multi-lang repo (agent-swarm, landing, agent-swarm-landing, desplega-ai, etc.).
|
|
12
|
+
|
|
13
|
+
If the task is "fix this one bug" or "rename X" → not this skill. Desloppify is for batch debt-surfacing, not point fixes.
|
|
14
|
+
|
|
15
|
+
## Step 1 — Detect
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
command -v desloppify >/dev/null 2>&1 && desloppify --version || echo "NOT INSTALLED"
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
If `--version` prints cleanly, **also verify the tree-sitter pin** (a busted pin is worse than no install — the scan crashes mid-run):
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
pipx runpip desloppify show tree-sitter-language-pack | grep Version
|
|
25
|
+
# Expect: Version: 1.6.2 (or anything < 1.8)
|
|
26
|
+
# If 1.8.x → see Step 3 (re-pin).
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Step 2 — Install (first run only)
|
|
30
|
+
|
|
31
|
+
Use `pipx`. This is the working recipe verified in PR #463 (Dockerfile.worker) and across multiple sprite smokes:
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pipx install 'desloppify[full]==0.9.15'
|
|
35
|
+
pipx inject --force desloppify 'tree-sitter-language-pack<1.8'
|
|
36
|
+
|
|
37
|
+
# Verify the pin landed:
|
|
38
|
+
pipx runpip desloppify show tree-sitter-language-pack | grep Version
|
|
39
|
+
# → must show 1.6.2 (or another <1.8)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**Why the pin:** `tree-sitter-language-pack` 1.8.0 has an ABI mismatch with `tree-sitter` that crashes desloppify in `cohesion.py:52` → `extractors.py:90` with a `TypeError`. Upstream fix is [peteromallet/desloppify#605](https://github.com/peteromallet/desloppify/pull/605) — open, unmerged. Until that ships, we cap locally. (See task `616b4bba-43de-40b5-af3b-4e219b622218` for the full repro.)
|
|
43
|
+
|
|
44
|
+
If `pipx` isn't installed: `python3 -m pip install --user pipx && python3 -m pipx ensurepath` and start a new shell. If `pipx install` fails with build errors, jump to **Sprite escape hatch** below.
|
|
45
|
+
|
|
46
|
+
## Step 3 — Re-pin (installed but broken)
|
|
47
|
+
|
|
48
|
+
If desloppify is on PATH but `tree-sitter-language-pack` is ≥ 1.8, you do **not** need a full reinstall:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
pipx inject --force desloppify 'tree-sitter-language-pack<1.8'
|
|
52
|
+
pipx runpip desloppify show tree-sitter-language-pack | grep Version
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
The `--force` is required — without it pipx refuses to downgrade. Confirm the version drops, then scan.
|
|
56
|
+
|
|
57
|
+
## Step 4 — Scan
|
|
58
|
+
|
|
59
|
+
Clone or `cd` into the target repo. For TS repos you usually don't need `node_modules` for the scan, but if a finding requires resolving imports, run `bun install` (or `npm install`) first.
|
|
60
|
+
|
|
61
|
+
**Monorepo note:** if the repo has multiple programs in sibling folders (e.g. `frontend/`, `backend/`), scan each separately — never scan the parent:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
desloppify --lang typescript scan --path ./frontend
|
|
65
|
+
desloppify --lang python scan --path ./backend
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Single-program repo:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
desloppify scan --path .
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Capture **exit code**, **duration**, and any warnings. A clean run is exit 0 with no traceback. Typical scan time: 30–90s for ~200K LOC.
|
|
75
|
+
|
|
76
|
+
## Step 5 — Status + next + triage
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
desloppify status # overall / strict / objective / verified scores + dimension health
|
|
80
|
+
desloppify next --count 15 # top-priority execution items (cluster-aware)
|
|
81
|
+
desloppify show --status open --count 50 # broader open backlog if you want to slice manually
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
What to capture for the memo:
|
|
85
|
+
|
|
86
|
+
- **Status snapshot** — overall, strict, objective, verified scores. Note if subjective is at 0% (unassessed) — that means the strict number is a measurement artifact, not a reflection of reality.
|
|
87
|
+
- **Item counts by detector** — `desloppify status` shows this. Look for which detectors dominate (test_coverage, smells, duplication, security, orphaned, …).
|
|
88
|
+
- **Top 10–15 findings** from `next` — identifier · kind · severity · the one-line "why".
|
|
89
|
+
- **Your themes** (2–4 bullets) — what jumps out across the findings. E.g. "godfile in src/be/db.ts", "MCP-tool boilerplate dup cluster", "UI mega-pages with 20+ hooks". Use your own read, not `desloppify plan triage --complete`.
|
|
90
|
+
- **Phase 3 candidates** (2–3) — what you'd nominate to actually grind, if asked.
|
|
91
|
+
- **False positives** — anything desloppify flagged that's actually intentional in the swarm context (CLI entrypoints, deferred-registration MCP tools, dynamically loaded plugins, sanity fixtures, etc.).
|
|
92
|
+
|
|
93
|
+
**Do not** run `desloppify plan triage --complete`, `desloppify plan commit-log record`, or any Phase 3 commands in surface-only mode. Those mutate desloppify's local state and create commitments we don't intend to follow through on.
|
|
94
|
+
|
|
95
|
+
## Step 6 — Publish findings to agent-fs
|
|
96
|
+
|
|
97
|
+
Save the scan memo so Eze and the swarm can see it. Use the shared org (`648a5f3c-35c8-4f11-8673-b89de52cd6bd`) and namespace the path under your own agent ID:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
DATE=$(date +%Y-%m-%d)
|
|
101
|
+
agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write \
|
|
102
|
+
thoughts/$AGENT_ID/research/$DATE-desloppify-<repo>-scan.md \
|
|
103
|
+
--content "$(cat <<'EOF'
|
|
104
|
+
# desloppify scan — <repo> @ <SHA>
|
|
105
|
+
|
|
106
|
+
## Install + scan
|
|
107
|
+
- recipe: pipx install desloppify[full]==0.9.15 + tree-sitter pin
|
|
108
|
+
- exit: 0 / duration: 47s / 919 files / 210K LOC
|
|
109
|
+
|
|
110
|
+
## Status
|
|
111
|
+
overall <N>/100 · strict <N>/100 · objective <N>/100 · verified <N>/100
|
|
112
|
+
dimension health: File X% · Code Y% · Dup Z% · Security S% · Test T%
|
|
113
|
+
|
|
114
|
+
## Top 10–15 findings
|
|
115
|
+
- src/be/db.ts · godfile · T1 · 9441 LOC / complexity 457 / 48 issues
|
|
116
|
+
- ...
|
|
117
|
+
|
|
118
|
+
## Themes
|
|
119
|
+
1. ...
|
|
120
|
+
2. ...
|
|
121
|
+
|
|
122
|
+
## Phase 3 candidates
|
|
123
|
+
1. Split src/be/db.ts by domain
|
|
124
|
+
2. ...
|
|
125
|
+
|
|
126
|
+
## False positives (don't act on)
|
|
127
|
+
- src/cli.tsx (CLI entrypoint)
|
|
128
|
+
- ...
|
|
129
|
+
EOF
|
|
130
|
+
)" -m "desloppify scan memo for <repo>"
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Also drop a private copy at `/workspace/personal/memory/desloppify-<repo>-scan-$DATE.md` so it's indexed for future memory-search.
|
|
134
|
+
|
|
135
|
+
## Step 7 — Slack reply (if the task came from Slack)
|
|
136
|
+
|
|
137
|
+
Single concise reply on the originating thread (use `slack-reply` with your taskId). Include:
|
|
138
|
+
|
|
139
|
+
- ✅/❌ + scan exit + duration
|
|
140
|
+
- Status snapshot (overall / strict / objective / dimension health)
|
|
141
|
+
- Top themes (2–4 bullets)
|
|
142
|
+
- Phase 3 candidates (2–3)
|
|
143
|
+
- agent-fs path to the full memo
|
|
144
|
+
- False positives flagged
|
|
145
|
+
|
|
146
|
+
**Slack block limit is 3000 chars per message.** If your reply trips `invalid_blocks`, split into Part 1 / Part 2. Don't try to cram everything into one message — readability > brevity.
|
|
147
|
+
|
|
148
|
+
## Sprite escape hatch — when local pipx is broken
|
|
149
|
+
|
|
150
|
+
If `pipx install` fails (system-package conflicts, missing build deps, weird Python ABI), or your worker container's desloppify install is corrupt and re-pinning doesn't help, **spin a fresh sprite** instead of fighting the local env. See the `sprite-cli` skill for full details.
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
sprite create desloppify-scan
|
|
154
|
+
sprite exec -s desloppify-scan -- bash -c '
|
|
155
|
+
set -e
|
|
156
|
+
sudo apt-get update -qq
|
|
157
|
+
sudo apt-get install -y pipx python3-venv git
|
|
158
|
+
pipx ensurepath
|
|
159
|
+
export PATH="$HOME/.local/bin:$PATH"
|
|
160
|
+
pipx install "desloppify[full]==0.9.15"
|
|
161
|
+
pipx inject --force desloppify "tree-sitter-language-pack<1.8"
|
|
162
|
+
pipx runpip desloppify show tree-sitter-language-pack | grep Version
|
|
163
|
+
git clone --depth=1 https://github.com/desplega-ai/<repo>.git /tmp/repo
|
|
164
|
+
cd /tmp/repo
|
|
165
|
+
desloppify scan --path .
|
|
166
|
+
desloppify status
|
|
167
|
+
desloppify next --count 15
|
|
168
|
+
'
|
|
169
|
+
# … capture output, then:
|
|
170
|
+
sprite destroy desloppify-scan --force
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
**Always destroy the sprite when done.** Sprites are paid resources.
|
|
174
|
+
|
|
175
|
+
## Stop conditions
|
|
176
|
+
|
|
177
|
+
- If desloppify crashes despite a verified `<1.8` pin → STOP, paste the traceback, escalate. Don't bisect tree-sitter versions further.
|
|
178
|
+
- If the scan produces 0 findings → suspect a `--path` problem (you may be scanning a parent dir of a monorepo). Re-scan with explicit per-program paths.
|
|
179
|
+
- If the worker is asked to "fix the findings" → that's Phase 3. Confirm scope with the requester before doing it — Phase 3 is queue-grinding refactors and we historically do *not* default to it.
|
|
180
|
+
|
|
181
|
+
## Quick reference (cheat sheet)
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
# Detect + install (idempotent)
|
|
185
|
+
command -v desloppify || { pipx install 'desloppify[full]==0.9.15' && pipx inject --force desloppify 'tree-sitter-language-pack<1.8'; }
|
|
186
|
+
pipx runpip desloppify show tree-sitter-language-pack | grep Version # must be <1.8
|
|
187
|
+
|
|
188
|
+
# Surface workflow
|
|
189
|
+
desloppify scan --path .
|
|
190
|
+
desloppify status
|
|
191
|
+
desloppify next --count 15
|
|
192
|
+
|
|
193
|
+
# Publish + report
|
|
194
|
+
agent-fs --org 648a5f3c-35c8-4f11-8673-b89de52cd6bd write thoughts/$AGENT_ID/research/$(date +%F)-desloppify-<repo>-scan.md --content "..." -m "scan memo"
|
|
195
|
+
# slack-reply on the originating thread
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Upstream reference
|
|
199
|
+
|
|
200
|
+
Full Phase 3 / review workflow / plan commands are in the upstream SKILL.md: <https://github.com/peteromallet/desloppify/blob/main/docs/SKILL.md>. Reach for it when you're explicitly asked to grind the queue (rare). Default swarm mode stops after the memo + Slack reply.
|
|
201
|
+
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"kind": "skill",
|
|
3
|
+
"name": "exa-search",
|
|
4
|
+
"displayName": "Exa Search",
|
|
5
|
+
"slug": "exa-search",
|
|
6
|
+
"title": "Exa Search",
|
|
7
|
+
"description": "Use web search APIs for cited research without over-fetching irrelevant pages.",
|
|
8
|
+
"version": "1.0.0",
|
|
9
|
+
"category": "skills",
|
|
10
|
+
"placeholders": ["EXA_API_KEY"],
|
|
11
|
+
"runAllSeedersCandidate": false,
|
|
12
|
+
"tags": ["research", "search", "web"]
|
|
13
|
+
}
|