@aion0/forge 0.8.9 → 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/RELEASE_NOTES.md CHANGED
@@ -1,14 +1,11 @@
1
- # Forge v0.8.9
1
+ # Forge v0.9.1
2
2
 
3
- Released: 2026-05-21
3
+ Released: 2026-05-22
4
4
 
5
- ## Changes since v0.8.8
6
-
7
- ### Bug Fixes
8
- - fix(install): bundle CLI to cli/mw.mjs at prepack — no runtime tsx needed
5
+ ## Changes since v0.9.0
9
6
 
10
7
  ### Other
11
- - fix(install): bundle CLI to cli/mw.mjs at prepack no runtime tsx needed
8
+ - fix(settings): CLI/API profile add doesn't show + leaks into Agents
12
9
 
13
10
 
14
- **Full Changelog**: https://github.com/aiwatching/forge/compare/v0.8.8...v0.8.9
11
+ **Full Changelog**: https://github.com/aiwatching/forge/compare/v0.9.0...v0.9.1
@@ -34,6 +34,7 @@ export async function POST(req: Request) {
34
34
  schedule_kind: ['once', 'cron', 'manual'].includes(body.schedule_kind) ? body.schedule_kind : 'period',
35
35
  schedule_at: body.schedule_at ? String(body.schedule_at) : null,
36
36
  schedule_cron: body.schedule_cron ? String(body.schedule_cron) : null,
37
+ max_per_tick: Number.isFinite(Number(body.max_per_tick)) ? Number(body.max_per_tick) : undefined,
37
38
  mark_existing_as_seen: body.mark_existing_as_seen !== false,
38
39
  });
39
40
  return NextResponse.json({ job });
package/cli/mw.mjs CHANGED
@@ -8218,6 +8218,7 @@ var init_settings = __esm({
8218
8218
  skillsRepoUrl: "https://raw.githubusercontent.com/aiwatching/forge-skills/main",
8219
8219
  connectorsRepoUrl: "https://raw.githubusercontent.com/aiwatching/forge-connectors/main",
8220
8220
  workflowRepoUrl: "https://raw.githubusercontent.com/aiwatching/forge-workflow/main",
8221
+ maxConcurrentPipelines: 5,
8221
8222
  displayName: "Forge",
8222
8223
  displayEmail: "",
8223
8224
  favoriteProjects: [],
@@ -203,6 +203,7 @@ interface Settings {
203
203
  telegramModel: string;
204
204
  skipPermissions: boolean;
205
205
  notificationRetentionDays: number;
206
+ maxConcurrentPipelines: number;
206
207
  _secretStatus?: Record<string, boolean>;
207
208
  }
208
209
 
@@ -230,6 +231,7 @@ export default function SettingsModal({ onClose }: { onClose: () => void }) {
230
231
  telegramModel: 'sonnet',
231
232
  skipPermissions: false,
232
233
  notificationRetentionDays: 30,
234
+ maxConcurrentPipelines: 5,
233
235
  });
234
236
  const [secretStatus, setSecretStatus] = useState<Record<string, boolean>>({});
235
237
  const [newRoot, setNewRoot] = useState('');
@@ -520,6 +522,30 @@ export default function SettingsModal({ onClose }: { onClose: () => void }) {
520
522
  </div>
521
523
  </div>
522
524
 
525
+ {/* Pipeline concurrency cap */}
526
+ <div className="space-y-2">
527
+ <label className="text-xs text-[var(--text-secondary)] font-semibold uppercase">
528
+ Pipeline concurrency
529
+ </label>
530
+ <div className="flex items-center gap-2">
531
+ <span className="text-[10px] text-[var(--text-secondary)]">Max concurrent pipelines</span>
532
+ <select
533
+ value={settings.maxConcurrentPipelines || 5}
534
+ onChange={e => setSettings({ ...settings, maxConcurrentPipelines: Number(e.target.value) })}
535
+ className="text-xs bg-[var(--bg-tertiary)] border border-[var(--border)] rounded px-2 py-1 text-[var(--text-primary)]"
536
+ >
537
+ {[1, 2, 3, 5, 8, 10, 15, 20].map(n => (
538
+ <option key={n} value={n}>{n}</option>
539
+ ))}
540
+ </select>
541
+ </div>
542
+ <p className="text-[10px] text-[var(--text-secondary)] leading-snug">
543
+ Global cap on running + pending pipelines (cancels new Job dispatches when reached;
544
+ the items roll over to the next tick). Per-Job <code>max_per_tick</code> (default 5, capped 10)
545
+ stacks on top.
546
+ </p>
547
+ </div>
548
+
523
549
  {/* Remote Access (Cloudflare Tunnel) */}
524
550
  <div className="space-y-2">
525
551
  <label className="text-xs text-[var(--text-secondary)] font-semibold uppercase">
@@ -1207,12 +1233,14 @@ function AgentsSection({ settings, setSettings }: { settings: any; setSettings:
1207
1233
  }
1208
1234
 
1209
1235
  // Add configured but not detected agents. Skip rows that are
1210
- // profiles (CLI profile with `base`, or API profile with
1211
- // `type: 'api'`) — those have their own Profiles section below
1212
- // and don't belong in the Agents list.
1236
+ // profiles (CLI profile with `cliType`/`base`, or API profile
1237
+ // with `type: 'api'`) — those have their own Profiles section
1238
+ // below and don't belong in the Agents list. `cliType` is the
1239
+ // current canonical field (written by AddProfileForm); `base`
1240
+ // is the legacy name (kept for backward compat).
1213
1241
  for (const [id, cfg] of Object.entries(configured) as [string, any][]) {
1214
1242
  if (merged.find(a => a.id === id)) continue;
1215
- if (cfg.type === 'api' || cfg.base) continue;
1243
+ if (cfg.type === 'api' || cfg.cliType || cfg.base) continue;
1216
1244
  merged.push({
1217
1245
  id,
1218
1246
  name: cfg.name ?? id,
@@ -1510,7 +1538,7 @@ function AgentsSection({ settings, setSettings }: { settings: any; setSettings:
1510
1538
  className={inputClass}
1511
1539
  >
1512
1540
  <option value="">Default (no profile)</option>
1513
- {Object.entries(settings.agents || {}).filter(([, cfg]: [string, any]) => cfg.base || cfg.type === 'profile').map(([pid, cfg]: [string, any]) => (
1541
+ {Object.entries(settings.agents || {}).filter(([, cfg]: [string, any]) => cfg.cliType || cfg.base || cfg.type === 'profile' || cfg.type === 'api').map(([pid, cfg]: [string, any]) => (
1514
1542
  <option key={pid} value={pid}>{cfg.name || pid}{cfg.model ? ` (${cfg.model})` : ''}</option>
1515
1543
  ))}
1516
1544
  </select>
@@ -1595,8 +1623,9 @@ function AgentsSection({ settings, setSettings }: { settings: any; setSettings:
1595
1623
  <span className="text-[8px] text-[var(--text-secondary)]">Shared across workspace and terminal — override model, env vars, API endpoint</span>
1596
1624
  </div>
1597
1625
 
1598
- {/* All profiles (CLI + API) */}
1599
- {Object.entries(settings.agents || {}).filter(([, cfg]: [string, any]) => cfg.base || cfg.type === 'api').map(([id, cfg]: [string, any]) => (
1626
+ {/* All profiles (CLI + API). CLI profile marker is `cliType`
1627
+ (new) or `base` (legacy); API profile is `type: 'api'`. */}
1628
+ {Object.entries(settings.agents || {}).filter(([, cfg]: [string, any]) => cfg.cliType || cfg.base || cfg.type === 'api').map(([id, cfg]: [string, any]) => (
1600
1629
  <ProfileRow key={id} id={id} cfg={cfg} inputClass={inputClass}
1601
1630
  onUpdate={(updated) => setSettings({ ...settings, agents: { ...settings.agents, [id]: updated } })}
1602
1631
  onDelete={() => {
@@ -13,6 +13,11 @@ Job is a separate primitive from Task and Pipeline. The CLI keeps
13
13
  `forge task` (single agent invocation) and `forge pipeline` (DAG of tasks)
14
14
  unchanged. `forge jobs` is new.
15
15
 
16
+ > **Tip**: for common watchers (GitLab MR comments, Mantis bugs) prefer
17
+ > **From recipe…** in the Jobs form — it pre-fills source connector,
18
+ > dedup field, and pipeline wiring so you only fill 3-4 high-level
19
+ > params. See `22-recipes.md` for the catalog.
20
+
16
21
  ## Anatomy
17
22
 
18
23
  | Field | Purpose |
@@ -0,0 +1,124 @@
1
+ # Recipes & GitLab MR auto-fix
2
+
3
+ A **Recipe** is a parameterized template for a Job + its dispatched pipeline. Instead of hand-filling 12 fields in the Jobs form, you pick a recipe, fill 3-4 high-level params, and Forge instantiates the rest.
4
+
5
+ Recipes live under `~/.forge/data/recipes/*.yaml`. The canonical catalog is `aiwatching/forge-workflow` on GitHub; Marketplace ↓ Sync pulls the latest registry.
6
+
7
+ ## Built-in recipes
8
+
9
+ | Name | What it does | Source tool |
10
+ |---|---|---|
11
+ | `gitlab-mr-watch` | Watch ONE specific MR for new review comments → run `mr-review-fix` | `gitlab.get_mr` |
12
+ | `gitlab-my-mrs-watch` | Watch all MRs assigned to you → run `mr-review-fix` per MR | `gitlab.list_my_mrs` |
13
+ | `mantis-bug-fix` | Manual fire — fix a single Mantis bug → run `mantis-bug-fix-and-mr` | `mantis.get_bug` |
14
+ | `mantis-bug-watch` | Periodic — fix bugs matching a filter | `mantis.search_bugs` |
15
+
16
+ ## Creating a Job from a recipe
17
+
18
+ Extension → Jobs tab → **+ From recipe…** → pick one → fill the form → Create.
19
+
20
+ For `gitlab-mr-watch` you fill:
21
+ - **Local project name** (picks a directory under `projectRoots`)
22
+ - **MR URL** — paste the full URL like `https://gitlab.example.com/namespace/repo/-/merge_requests/123`. Forge auto-extracts namespace/repo + iid.
23
+ - **Poll interval** (minutes, default 5)
24
+ - **Triage policy** (free-form, optional — ships with a sensible default)
25
+
26
+ ## Special param types
27
+
28
+ - `project_picker` — dropdown of dirs under your configured `projectRoots`
29
+ - `gitlab_mr_url` — single MR URL; Forge parses out `<name>__path` (`namespace/repo`) + `<name>__iid` (numeric iid) at instantiation time. Reference them in templates via `{{params.mr_url__path}}` / `{{params.mr_url__iid}}`.
30
+ - `number` — coerces string → number
31
+ - `boolean` — coerces truthy strings
32
+
33
+ ## The `mr-review-fix` pipeline — 5 nodes
34
+
35
+ 1. **ingest** (shell, no worktree) — `glab api` pulls MR meta + ALL notes + diff into `/tmp/mr-<iid>-*.{json,txt}`. Emits `SOURCE_BRANCH`, `MR_TITLE`, `MR_AUTHOR`, `LATEST_COMMENTER`, file paths.
36
+ 2. **triage** (AI, no worktree, no shell) — reads all notes holistically, decides action.
37
+ 3. **fix** (AI, branch `mr-<iid>` worktree) — runs the git work only on ACT path.
38
+ 4. **reply** (shell) — posts a comment on the MR describing the outcome; adds a label; closes the MR if needed.
39
+ 5. **cleanup** (shell) — Teams notify + worktree removal + tmp file cleanup.
40
+
41
+ ### Four triage actions
42
+
43
+ | ACTION | Meaning | Reply posted? | Label | MR state |
44
+ |---|---|---|---|---|
45
+ | `act` | Comments name a concrete fix → apply it | yes (with diff summary) | `forge:fix-applied` | push fixup to source_branch |
46
+ | `skip` | Substantive but no change needed (lgtm, nits, questions) | yes (explanation) | `forge:no-fix-needed` | unchanged |
47
+ | `close` | MR itself shouldn't merge (duplicate, wrong direction, already-fixed-elsewhere) | yes (rationale) | `forge:no-merge` | **closed** via `glab api PUT state_event=close` |
48
+ | `noop` | Latest non-system comment is Forge's own or this Job's user → nothing changed since last run | **no** (silent) | none | unchanged |
49
+
50
+ The `noop` case is the loop-breaker. Without it, Forge's auto-reply bumps `user_notes_count`, the next tick sees a new state, fires again, posts another reply — endless self-conversation.
51
+
52
+ ### Required GitLab labels (one-time)
53
+
54
+ Create these in **GitLab → Project → Labels → New label** (any colors):
55
+
56
+ ```
57
+ forge:fix-applied (suggest green)
58
+ forge:no-fix-needed (suggest blue)
59
+ forge:no-merge (suggest red)
60
+ ```
61
+
62
+ If a label is missing, the label-add step logs a failure but the rest of the pipeline still runs.
63
+
64
+ ### Triage prompt — when to customize
65
+
66
+ The recipe ships with a default `triage_instructions` covering common cases. Overwrite when you need project-specific rules. Examples:
67
+
68
+ ```
69
+ Only act if the reviewer is from team @backend. Skip everything else.
70
+ ```
71
+
72
+ ```
73
+ Act ONLY on Code Review Bot 'error' findings; skip all human comments.
74
+ ```
75
+
76
+ ```
77
+ Treat any comment containing "MUST" as actionable; everything else SKIP.
78
+ ```
79
+
80
+ Keep the noop clause at the bottom of any custom policy:
81
+
82
+ ```
83
+ NOOP when the latest non-system comment is authored by @<your-username>
84
+ or starts with '🤖 Forge:'.
85
+ ```
86
+
87
+ ## Per-MR vs per-comment dispatch
88
+
89
+ Older recipe versions used `gitlab.list_mr_notes` and fired ONE pipeline per new comment — 50 historical comments on first Fire meant 50 pipelines. Current recipes use `gitlab.get_mr` + dedup on `user_notes_count`:
90
+
91
+ - Fetches the MR object once; scheduler wraps as 1-item list
92
+ - `user_notes_count` only changes when comments are added/removed → label flips, status changes, assignee changes don't false-trigger
93
+ - First Fire registers the current count as "seen" via `__mark_existing_as_seen: true` — no backfill stampede
94
+
95
+ ## Marketplace ↔ local copy split
96
+
97
+ - **Marketplace** (Settings → Marketplace → Templates → Recipes / Pipelines) lists ONLY items from the registry (`forge-workflow` repo). Editing-by-hand or "Import as new copy" workflows don't appear here.
98
+ - **Pipelines tab** lists ALL local workflow yamls — registry-sourced installs, your custom edits, anything in `~/.forge/data/flows/`.
99
+ - **Reinstall** button overwrites a local copy with the current registry version (use after a version bump).
100
+ - Local copies are **independent** of marketplace — Forge will never silently overwrite a local file you've edited.
101
+
102
+ ## Cleanup recipes
103
+
104
+ ```bash
105
+ # Delete a Job + its dedup state + any pending pipeline runs
106
+ sqlite3 ~/.forge/data/workflow.db <<'SQL'
107
+ DELETE FROM jobs WHERE name='mr-watch-FortiNAC-14636';
108
+ DELETE FROM job_dedup_keys WHERE job_id NOT IN (SELECT id FROM jobs);
109
+ DELETE FROM pipelines WHERE workflow_name='mr-review-fix' AND status IN ('pending','running');
110
+ SQL
111
+
112
+ # Force re-trigger on next tick — clears dedup so any state looks "new"
113
+ sqlite3 ~/.forge/data/workflow.db "DELETE FROM job_dedup_keys WHERE job_id IN (SELECT id FROM jobs WHERE source_tool='get_mr');"
114
+ ```
115
+
116
+ ## Where things live on disk
117
+
118
+ | Path | Purpose |
119
+ |---|---|
120
+ | `~/.forge/data/recipes/*.yaml` | Local recipe copies |
121
+ | `~/.forge/data/flows/*.yaml` | Local workflow copies (mr-review-fix, mantis-bug-fix-and-mr, custom) |
122
+ | `~/.forge/data/workflow.db` | Job rows, dedup keys, run history |
123
+ | `~/.forge/data/pipelines/*.json` | Per-run state for each pipeline execution |
124
+ | `~/.forge/data/workflow-cache.json` | Last-synced registry from `forge-workflow` repo |
@@ -51,6 +51,7 @@ The token is valid for 24 hours. Store it in a variable and reuse for all API ca
51
51
  | `18-chrome-mcp.md` | Connect Forge Claude Code sessions to a real Chrome via chrome-devtools-mcp — dev-time browser access for connector authoring |
52
52
  | `19-jobs.md` | Jobs — scheduled connector polls that dedup and fan out to Pipeline / Chat |
53
53
  | `20-mantis-bug-fix.md` | Mantis → Bug Fix → MR builtin pipeline (mantis-bug-fix-and-mr) |
54
+ | `22-recipes.md` | Recipes (parameterized Job templates) + the `mr-review-fix` pipeline — gitlab_mr_url param, 4-action triage (act/skip/close/noop), GitLab label setup, per-MR vs per-comment dispatch, Marketplace ↔ local-copy split |
54
55
 
55
56
  ## Matching questions to docs
56
57
 
@@ -81,3 +82,4 @@ The token is valid for 24 hours. Store it in a variable and reuse for all API ca
81
82
  - Chrome MCP / chrome-devtools-mcp / dev-time browser / CDP / remote debugging → `18-chrome-mcp.md`
82
83
  - Job / scheduled job / connector poll / dedup / periodic fetch / Teams poll / Mantis bug poll → `19-jobs.md`
83
84
  - Mantis bug fix pipeline / mantis-bug-fix-and-mr / open MR for Mantis bug / notify Teams from pipeline / connector-tool endpoint → `20-mantis-bug-fix.md`
85
+ - Recipe / parameterized job / "From recipe" form / mr-review-fix / MR review auto-fix / triage instructions / act vs skip vs close vs noop / forge:no-fix-needed / forge:no-merge label / gitlab_mr_url param / per-MR vs per-comment / Marketplace local-only filter → `22-recipes.md`
@@ -40,7 +40,7 @@ import { getDataDir } from '../dirs';
40
40
  import { createJob } from './store';
41
41
  import type { Job, PipelineDispatchParams, ChatDispatchParams } from './types';
42
42
 
43
- export type ParamType = 'string' | 'number' | 'boolean' | 'select' | 'project_picker' | 'gitlab_mr_url';
43
+ export type ParamType = 'string' | 'number' | 'boolean' | 'select' | 'project_picker' | 'gitlab_mr_url' | 'csv_ids';
44
44
 
45
45
  export interface RecipeParam {
46
46
  name: string;
@@ -164,7 +164,23 @@ function renderString(input: string, ctx: RenderContext): string {
164
164
 
165
165
  function renderDeep(value: any, ctx: RenderContext): any {
166
166
  if (value == null) return value;
167
- if (typeof value === 'string') return renderString(value, ctx);
167
+ if (typeof value === 'string') {
168
+ // When the WHOLE string is one placeholder like "{{params.foo}}"
169
+ // AND it resolves to a non-string (array / object / number),
170
+ // return the typed value directly instead of JSON-stringifying.
171
+ // Lets a recipe plug an array into source_input.items, etc.
172
+ const single = value.trim().match(/^\{\{\s*([a-zA-Z0-9_]+)\.([a-zA-Z0-9_.]+)\s*\}\}$/);
173
+ if (single && KNOWN_NAMESPACES.has(single[1])) {
174
+ const root: any = (ctx as any)[single[1]];
175
+ let cur: any = root;
176
+ for (const seg of single[2].split('.')) {
177
+ if (cur == null || typeof cur !== 'object') { cur = undefined; break; }
178
+ cur = cur[seg];
179
+ }
180
+ if (cur !== undefined && typeof cur !== 'string') return cur;
181
+ }
182
+ return renderString(value, ctx);
183
+ }
168
184
  if (Array.isArray(value)) return value.map((v) => renderDeep(v, ctx));
169
185
  if (typeof value === 'object') {
170
186
  const out: Record<string, any> = {};
@@ -218,6 +234,18 @@ export function instantiateRecipe(name: string, rawParams: Record<string, any>):
218
234
  }
219
235
  params[`${p.name}__path`] = m[1];
220
236
  params[`${p.name}__iid`] = parseInt(m[2], 10);
237
+ } else if (p.type === 'csv_ids' && val !== '') {
238
+ // Comma-separated numeric ids → derived `<name>__items` array of
239
+ // `{ id }` objects, suitable as a Job's source_input.items for
240
+ // an `inline` source. Lets a batch recipe ask the user "give me
241
+ // a list of bug ids" and have the scheduler iterate without
242
+ // needing a per-connector transformer tool.
243
+ const ids = String(val).split(',').map(s => s.trim()).filter(Boolean)
244
+ .map(s => parseInt(s, 10)).filter(Number.isFinite);
245
+ if (ids.length === 0) {
246
+ return { ok: false, error: `param "${p.name}" must be a comma-separated list of numeric ids — got: ${String(val).slice(0, 80)}` };
247
+ }
248
+ params[`${p.name}__items`] = ids.map(id => ({ id }));
221
249
  }
222
250
  params[p.name] = val;
223
251
  }
@@ -252,6 +280,9 @@ export function instantiateRecipe(name: string, rawParams: Record<string, any>):
252
280
  dispatch_type: rendered.dispatch_type || 'pipeline',
253
281
  dispatch_params: (rendered.dispatch_params || {}) as PipelineDispatchParams | ChatDispatchParams,
254
282
  skills: Array.isArray(rendered.skills) ? rendered.skills : [],
283
+ max_per_tick: typeof rendered.max_per_tick === 'number'
284
+ ? rendered.max_per_tick
285
+ : (rendered.max_per_tick != null && rendered.max_per_tick !== '' ? Number(rendered.max_per_tick) : undefined),
255
286
  });
256
287
  return { ok: true, job };
257
288
  } catch (e) {
@@ -9,11 +9,39 @@
9
9
 
10
10
  import {
11
11
  ensureSchema, getDueJobs, hasInflightRun, startRun, finishRun,
12
- markSeen, recordDispatch, getJob, updateJob,
12
+ markSeen, isSeen, recordDispatch, getJob, updateJob,
13
13
  } from './store';
14
14
  import type { Job, JobRunStatus, PipelineDispatchParams, ChatDispatchParams } from './types';
15
15
  import { dispatchTool } from '@/lib/chat/tool-dispatcher';
16
16
  import { dispatchToPipeline, dispatchToChat, dispatchToChatSummary } from './dispatcher';
17
+ import { getDb } from '@/src/core/db/database';
18
+ import { getDbPath } from '@/src/config';
19
+
20
+ /** Count pipelines currently running or pending. Used as the global
21
+ * concurrency budget — paired with settings.maxConcurrentPipelines. */
22
+ function countActivePipelines(): number {
23
+ try {
24
+ const r = getDb(getDbPath()).prepare(
25
+ `SELECT COUNT(*) AS n FROM pipeline_runs WHERE status IN ('running', 'pending')`,
26
+ ).get() as { n: number } | undefined;
27
+ return r?.n ?? 0;
28
+ } catch {
29
+ return 0;
30
+ }
31
+ }
32
+
33
+ /** Read settings.maxConcurrentPipelines (default 5, ceiling 20). */
34
+ async function getMaxConcurrentPipelines(): Promise<number> {
35
+ try {
36
+ const { loadSettings } = await import('@/lib/settings');
37
+ const s = loadSettings();
38
+ const v = (s as any).maxConcurrentPipelines;
39
+ if (!Number.isFinite(v)) return 5;
40
+ return Math.min(Math.max(Math.trunc(v), 1), 20);
41
+ } catch {
42
+ return 5;
43
+ }
44
+ }
17
45
 
18
46
  const TICK_INTERVAL_MS = 60_000;
19
47
 
@@ -166,32 +194,43 @@ export async function executeRun(job: Job, runId: string): Promise<void> {
166
194
  const { __mark_existing_as_seen, ...sourceInput } = job.source_input as any;
167
195
  logLine('info', `tick start trigger=${(__mark_existing_as_seen ? 'backfill' : 'normal')} dispatch=${job.dispatch_type}`);
168
196
  logLine('info', `source input: ${JSON.stringify(sourceInput)}`);
169
- logLine('info', `calling connector ${callName}…`);
170
-
171
- const toolResult = await dispatchTool(
172
- { id: `job-${runId}`, name: callName, input: sourceInput },
173
- // We JSON.parse the response — the 8KB LLM-friendly truncation
174
- // would break parsing on any moderately large list (Todos, big MR
175
- // searches, etc.). Ask for the raw body.
176
- { noTruncation: true },
177
- );
178
-
179
- const respBytes = toolResult.content?.length ?? 0;
180
- logLine(toolResult.is_error ? 'error' : 'info',
181
- `connector returned ${toolResult.is_error ? 'is_error=true ' : ''}${respBytes} bytes`);
182
- logLine('info', `response preview:\n${truncate(toolResult.content || '(empty)', 600)}`, false);
183
-
184
- if (toolResult.is_error) {
185
- throw new Error(`connector ${callName} failed: ${toolResult.content.slice(0, 500)}`);
186
- }
187
197
 
188
- // ── Parse + extract items ────────────────────────────────────
189
- const parsed = safeParseJson(toolResult.content);
190
- if (parsed === undefined) {
191
- const note = `Connector returned non-JSON content (${respBytes} bytes). Preview: ${toolResult.content.slice(0, 200)}`;
192
- logLine('warn', note);
193
- persist({ status: 'ok', notes: note });
194
- return;
198
+ // ── Source path A: inline ───────────────────────────────────
199
+ // When source_connector === 'inline', the recipe / Job baked the
200
+ // items list directly into source_input.items at instantiation —
201
+ // no tool dispatch, no extension round-trip. Common case: a
202
+ // user-typed list of ids being fanned out into pipeline runs.
203
+ // Skips the entire dispatchTool / JSON parse layer.
204
+ let parsed: unknown;
205
+ let respBytes = 0;
206
+ if (job.source_connector === 'inline') {
207
+ logLine('info', `inline source — skipping tool dispatch`);
208
+ parsed = { items: Array.isArray(sourceInput.items) ? sourceInput.items : [] };
209
+ respBytes = JSON.stringify(parsed).length;
210
+ } else {
211
+ logLine('info', `calling connector ${callName}…`);
212
+ const toolResult = await dispatchTool(
213
+ { id: `job-${runId}`, name: callName, input: sourceInput },
214
+ // We JSON.parse the response — the 8KB LLM-friendly truncation
215
+ // would break parsing on any moderately large list (Todos, big MR
216
+ // searches, etc.). Ask for the raw body.
217
+ { noTruncation: true },
218
+ );
219
+ respBytes = toolResult.content?.length ?? 0;
220
+ logLine(toolResult.is_error ? 'error' : 'info',
221
+ `connector returned ${toolResult.is_error ? 'is_error=true ' : ''}${respBytes} bytes`);
222
+ logLine('info', `response preview:\n${truncate(toolResult.content || '(empty)', 600)}`, false);
223
+
224
+ if (toolResult.is_error) {
225
+ throw new Error(`connector ${callName} failed: ${toolResult.content.slice(0, 500)}`);
226
+ }
227
+ parsed = safeParseJson(toolResult.content);
228
+ if (parsed === undefined) {
229
+ const note = `Connector returned non-JSON content (${respBytes} bytes). Preview: ${toolResult.content.slice(0, 200)}`;
230
+ logLine('warn', note);
231
+ persist({ status: 'ok', notes: note });
232
+ return;
233
+ }
195
234
  }
196
235
  logLine('info', `parsed JSON; type=${Array.isArray(parsed) ? 'array' : typeof parsed}`);
197
236
 
@@ -291,7 +330,25 @@ export async function executeRun(job: Job, runId: string): Promise<void> {
291
330
  }
292
331
 
293
332
  // ── Per-item dispatch ────────────────────────────────────────
294
- let dedupHits = 0, missingKey = 0;
333
+ //
334
+ // Two budgets gate dispatch:
335
+ // (a) Per-Job `max_per_tick` (default 5, capped 1-10) — how many
336
+ // NEW items this job is allowed to fan out per tick. Anything
337
+ // over that stays unmarked → rolls over to next tick.
338
+ // (b) Global concurrent-pipeline cap (settings.maxConcurrentPipelines,
339
+ // default 5, max 20) — counted across ALL running/pending
340
+ // pipelines, not just this job's. Prevents one job from
341
+ // monopolizing all slots.
342
+ // Why both: a single job with max_per_tick=10 can still go over if
343
+ // there are already 15 pipelines from OTHER jobs in flight.
344
+ const budget = (() => {
345
+ const v = (job as any).max_per_tick;
346
+ if (!Number.isFinite(v) || v == null) return 5;
347
+ return Math.min(Math.max(Math.trunc(v), 1), 10);
348
+ })();
349
+ const globalCap = await getMaxConcurrentPipelines();
350
+ let dispatchedThisTick = 0;
351
+ let dedupHits = 0, missingKey = 0, deferred = 0;
295
352
  for (const [idx, item] of itemsArr.entries()) {
296
353
  const key = pickDedupKey(item, job.dedup_field);
297
354
  if (!key) {
@@ -299,14 +356,31 @@ export async function executeRun(job: Job, runId: string): Promise<void> {
299
356
  logLine('warn', `[${idx}] item missing dedup_field "${job.dedup_field}" — skipping`);
300
357
  continue;
301
358
  }
302
- const isNew = markSeen(job.id, key);
303
- if (!isNew) {
359
+ // Read-only dedup check — don't mark yet, in case we hit budget.
360
+ if (isSeen(job.id, key)) {
304
361
  dedupHits++;
305
- // Don't mirror to console — too chatty in the typical "0 new" case
306
362
  logLine('info', `[${idx}] ${key} — already seen, skip`, false);
307
363
  continue;
308
364
  }
365
+ // Per-Job budget?
366
+ if (dispatchedThisTick >= budget) {
367
+ deferred++;
368
+ continue;
369
+ }
370
+ // Global cap? Re-check each iteration since other jobs' pipelines
371
+ // may finish or new ones start while we loop.
372
+ if (job.dispatch_type === 'pipeline') {
373
+ const inFlight = countActivePipelines();
374
+ if (inFlight >= globalCap) {
375
+ deferred++;
376
+ logLine('info', `[${idx}] ${key} — global pipeline cap (${globalCap}) reached; deferring to next tick`);
377
+ continue;
378
+ }
379
+ }
380
+ // OK to commit dedup + dispatch.
381
+ markSeen(job.id, key);
309
382
  itemsNew++;
383
+ dispatchedThisTick++;
310
384
  const preview = renderItemPreview(item);
311
385
  logLine('info', `[${idx}] ${key} — new — dispatching ${job.dispatch_type}…`);
312
386
  const dispatchStart = Date.now();
@@ -359,7 +433,11 @@ export async function executeRun(job: Job, runId: string): Promise<void> {
359
433
  else if (missingKey === itemsSeen) note = `All ${itemsSeen} items lacked the dedup_field "${job.dedup_field}". Check the field name vs the connector's response shape.`;
360
434
  }
361
435
 
362
- logLine('info', `tick done in ${Date.now() - t0}ms — ${itemsSeen} seen, ${itemsNew} new, ${itemsDispatched} dispatched, ${dedupHits} dedup hits` + (missingKey ? `, ${missingKey} missing-key` : ''));
436
+ if (deferred > 0) {
437
+ const baseNote = note ? note + ' ' : '';
438
+ note = `${baseNote}${deferred} item(s) deferred to next tick (per-Job budget ${budget} or global cap ${globalCap} reached).`;
439
+ }
440
+ logLine('info', `tick done in ${Date.now() - t0}ms — ${itemsSeen} seen, ${itemsNew} new, ${itemsDispatched} dispatched, ${dedupHits} dedup hits` + (deferred ? `, ${deferred} deferred` : '') + (missingKey ? `, ${missingKey} missing-key` : ''));
363
441
  persist({ status: 'ok', notes: note });
364
442
  } catch (e) {
365
443
  runError = e instanceof Error ? e.message : String(e);
package/lib/jobs/store.ts CHANGED
@@ -40,6 +40,12 @@ export function ensureSchema(): void {
40
40
  schedule_kind TEXT NOT NULL DEFAULT 'period',
41
41
  schedule_at TEXT,
42
42
  schedule_cron TEXT,
43
+ /** Per-tick dispatch budget. The scheduler dispatches at most
44
+ this many NEW items per run; the rest stay unmarked and get
45
+ picked up on the next tick. Hard ceiling enforced in code
46
+ to protect against catastrophic fan-out (e.g. mantis search
47
+ returning 200 bugs and spawning 200 worktrees). */
48
+ max_per_tick INTEGER NOT NULL DEFAULT 5,
43
49
  last_run_at TEXT,
44
50
  next_run_at TEXT,
45
51
  created_at TEXT NOT NULL DEFAULT (datetime('now')),
@@ -88,6 +94,7 @@ export function ensureSchema(): void {
88
94
  try { db().exec(`ALTER TABLE jobs ADD COLUMN schedule_kind TEXT NOT NULL DEFAULT 'period'`); } catch {}
89
95
  try { db().exec(`ALTER TABLE jobs ADD COLUMN schedule_at TEXT`); } catch {}
90
96
  try { db().exec(`ALTER TABLE jobs ADD COLUMN schedule_cron TEXT`); } catch {}
97
+ try { db().exec(`ALTER TABLE jobs ADD COLUMN max_per_tick INTEGER NOT NULL DEFAULT 5`); } catch {}
91
98
  ensured = true;
92
99
  }
93
100
 
@@ -110,6 +117,7 @@ function rowToJob(r: any): Job {
110
117
  schedule_kind: (r.schedule_kind as 'period' | 'once' | 'cron' | 'manual') || 'period',
111
118
  schedule_at: toIsoUTC(r.schedule_at),
112
119
  schedule_cron: r.schedule_cron || null,
120
+ max_per_tick: typeof r.max_per_tick === 'number' ? r.max_per_tick : 5,
113
121
  last_run_at: toIsoUTC(r.last_run_at),
114
122
  next_run_at: toIsoUTC(r.next_run_at),
115
123
  created_at: toIsoUTC(r.created_at) || r.created_at,
@@ -167,6 +175,16 @@ export function getJob(id: string): Job | null {
167
175
  return r ? rowToJob(r) : null;
168
176
  }
169
177
 
178
+ /** Per-tick dispatch budget — clamped [1, 10]. Centralized so the
179
+ * ceiling is one constant, not duplicated across createJob/updateJob/
180
+ * scheduler. Catastrophic fan-out (mantis returns 200 bugs → 200
181
+ * worktrees → disk full) was the motivating incident. */
182
+ export const MAX_PER_TICK_CEILING = 10;
183
+ function clampMaxPerTick(v: number | undefined): number {
184
+ if (!Number.isFinite(v) || v == null) return 5;
185
+ return Math.min(Math.max(Math.trunc(v as number), 1), MAX_PER_TICK_CEILING);
186
+ }
187
+
170
188
  export function createJob(input: CreateJobInput): Job {
171
189
  ensureSchema();
172
190
  const id = randomUUID().slice(0, 12);
@@ -175,8 +193,8 @@ export function createJob(input: CreateJobInput): Job {
175
193
  source_connector, source_tool, source_input,
176
194
  items_path, dedup_field,
177
195
  dispatch_type, dispatch_params, skills,
178
- schedule_kind, schedule_at, schedule_cron)
179
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
196
+ schedule_kind, schedule_at, schedule_cron, max_per_tick)
197
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
180
198
  `).run(
181
199
  id,
182
200
  input.name,
@@ -193,6 +211,7 @@ export function createJob(input: CreateJobInput): Job {
193
211
  input.schedule_kind || 'period',
194
212
  input.schedule_at || null,
195
213
  input.schedule_cron || null,
214
+ clampMaxPerTick(input.max_per_tick),
196
215
  );
197
216
 
198
217
  // Backfill guard: if mark_existing_as_seen is true (default), we don't pre-seed
@@ -232,6 +251,7 @@ export function updateJob(id: string, patch: Partial<{
232
251
  schedule_kind: 'period' | 'once' | 'cron' | 'manual';
233
252
  schedule_at: string | null;
234
253
  schedule_cron: string | null;
254
+ max_per_tick: number;
235
255
  }>): boolean {
236
256
  ensureSchema();
237
257
  const sets: string[] = []; const vals: any[] = [];
@@ -249,6 +269,7 @@ export function updateJob(id: string, patch: Partial<{
249
269
  if (patch.schedule_kind !== undefined) { sets.push('schedule_kind = ?'); vals.push(patch.schedule_kind); }
250
270
  if (patch.schedule_at !== undefined) { sets.push('schedule_at = ?'); vals.push(patch.schedule_at); }
251
271
  if (patch.schedule_cron !== undefined) { sets.push('schedule_cron = ?'); vals.push(patch.schedule_cron); }
272
+ if (patch.max_per_tick !== undefined) { sets.push('max_per_tick = ?'); vals.push(clampMaxPerTick(patch.max_per_tick)); }
252
273
  if (sets.length === 0) return false;
253
274
  sets.push("updated_at = datetime('now')");
254
275
  vals.push(id);
@@ -335,6 +356,15 @@ export function markSeen(jobId: string, key: string): boolean {
335
356
  return r.changes > 0;
336
357
  }
337
358
 
359
+ /** Read-only check — does NOT mutate dedup state. Use when you might
360
+ * defer a "new" item to a later tick (budget cap) and don't want to
361
+ * prematurely mark it as seen. */
362
+ export function isSeen(jobId: string, key: string): boolean {
363
+ ensureSchema();
364
+ const r = db().prepare(`SELECT 1 FROM job_seen WHERE job_id = ? AND dedup_key = ? LIMIT 1`).get(jobId, key);
365
+ return !!r;
366
+ }
367
+
338
368
  export function resetDedup(jobId: string): number {
339
369
  ensureSchema();
340
370
  const r = db().prepare('DELETE FROM job_seen WHERE job_id = ?').run(jobId);
package/lib/jobs/types.ts CHANGED
@@ -84,6 +84,12 @@ export interface Job {
84
84
  /** Cron expression (5 fields); only used when schedule_kind === 'cron'. */
85
85
  schedule_cron: string | null;
86
86
 
87
+ /** Per-tick dispatch budget. Default 5, hard ceiling 10 in the
88
+ * scheduler. Source can return 200 items; only this many spawn
89
+ * pipelines per tick — the rest stay un-dedup-marked and roll
90
+ * over to the next tick. Protects disk/RAM from fan-out blow-up. */
91
+ max_per_tick: number;
92
+
87
93
  last_run_at: string | null;
88
94
  next_run_at: string | null;
89
95
  created_at: string;
@@ -144,6 +150,9 @@ export interface CreateJobInput {
144
150
  /** Cron expression, required when schedule_kind === 'cron'. */
145
151
  schedule_cron?: string | null;
146
152
 
153
+ /** Per-tick dispatch budget (default 5, capped 1-10 in scheduler). */
154
+ max_per_tick?: number;
155
+
147
156
  /** Default true: first tick records existing items as seen without dispatching. */
148
157
  mark_existing_as_seen?: boolean;
149
158
  }
package/lib/settings.ts CHANGED
@@ -76,6 +76,13 @@ export interface Settings {
76
76
  * shape as connectorsRepoUrl. Default: `aiwatching/forge-workflow`.
77
77
  */
78
78
  workflowRepoUrl: string;
79
+ /**
80
+ * Maximum concurrent pipeline runs (running + pending). When a Job's
81
+ * scheduler tick would push the total above this, additional items
82
+ * are deferred to the next tick instead of dispatched.
83
+ * Default 5; ceiling 20 enforced in scheduler.
84
+ */
85
+ maxConcurrentPipelines: number;
79
86
  displayName: string;
80
87
  displayEmail: string;
81
88
  favoriteProjects: string[];
@@ -132,6 +139,7 @@ const defaults: Settings = {
132
139
  skillsRepoUrl: 'https://raw.githubusercontent.com/aiwatching/forge-skills/main',
133
140
  connectorsRepoUrl: 'https://raw.githubusercontent.com/aiwatching/forge-connectors/main',
134
141
  workflowRepoUrl: 'https://raw.githubusercontent.com/aiwatching/forge-workflow/main',
142
+ maxConcurrentPipelines: 5,
135
143
  displayName: 'Forge',
136
144
  displayEmail: '',
137
145
  favoriteProjects: [],
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aion0/forge",
3
- "version": "0.8.9",
3
+ "version": "0.9.1",
4
4
  "description": "Unified AI workflow platform — multi-model task orchestration, persistent sessions, web terminal, remote access",
5
5
  "type": "module",
6
6
  "scripts": {
@@ -0,0 +1,223 @@
1
+ /**
2
+ * Mantis connector regression suite.
3
+ *
4
+ * pnpm tsx scripts/test-mantis.ts # all cases
5
+ * pnpm tsx scripts/test-mantis.ts --case source # one case
6
+ * pnpm tsx scripts/test-mantis.ts --bail # stop on first fail
7
+ *
8
+ * Hits the LIVE Mantis via the running Forge's dispatchTool. The
9
+ * browser extension must be paired + logged into Mantis. Each case:
10
+ *
11
+ * 1. Calls mantis.<tool> with `args`.
12
+ * 2. Validates response shape (parseable, has `bugs`).
13
+ * 3. Runs row-level assertions (e.g. every result.source matches
14
+ * the filter).
15
+ *
16
+ * No DB writes, no Job dispatch — pure connector smoke tests.
17
+ */
18
+ import { dispatchTool } from '@/lib/chat/tool-dispatcher';
19
+
20
+ // ─── Test case shape ──────────────────────────────────────────
21
+ interface Case {
22
+ name: string;
23
+ tool: string;
24
+ args: Record<string, unknown>;
25
+ assertions?: Array<(resp: any) => string | null>; // null = pass, string = fail reason
26
+ // For inspecting a case under debug — print the response keys.
27
+ dump?: boolean;
28
+ }
29
+
30
+ // ─── Assertion helpers ────────────────────────────────────────
31
+ const has = (key: string) => (r: any) =>
32
+ Array.isArray(r?.[key]) || typeof r?.[key] === 'object' ? null : `missing field '${key}'`;
33
+
34
+ const nonEmpty = (key: string) => (r: any) =>
35
+ Array.isArray(r?.[key]) && r[key].length > 0 ? null : `expected non-empty '${key}', got len=${r?.[key]?.length ?? 'n/a'}`;
36
+
37
+ /** Every row in r.bugs must have row[col] containing one of `needles` (case-insensitive). */
38
+ const everyRowMatches = (col: string, needles: string[]) => (r: any) => {
39
+ if (!Array.isArray(r.bugs) || r.bugs.length === 0) return `no bugs to check`;
40
+ const lc = needles.map(n => n.toLowerCase());
41
+ const bad = r.bugs.find((b: any) => {
42
+ const v = String(b[col] || '').toLowerCase();
43
+ return !lc.some(n => v.includes(n));
44
+ });
45
+ if (bad) return `bug ${bad.id} ${col}="${bad[col]}" doesn't match any of ${JSON.stringify(needles)}`;
46
+ return null;
47
+ };
48
+
49
+ /** Verify the URL Mantis was hit with carries show_status for the
50
+ * requested states. Mantis applies status filter server-side via
51
+ * show_status=<id>; per-row b.status is unreliable on customized
52
+ * themes that put handler name in the Status column, so we trust
53
+ * the URL instead. */
54
+ const STATUS_TO_ID: Record<string, number> = {
55
+ new: 10, feedback: 20, acknowledged: 30, confirmed: 40,
56
+ assigned: 50, resolved: 80, closed: 90,
57
+ };
58
+ const urlHasStatus = (states: string[]) => (r: any) => {
59
+ const url = r._filter_url || '';
60
+ const wantIds = states.map(s => STATUS_TO_ID[s.toLowerCase()]).filter(Boolean);
61
+ const params = new URLSearchParams(url.split('?')[1] || '');
62
+ const got = (params.get('show_status') || '').split(',').map(s => parseInt(s, 10)).filter(Boolean);
63
+ const missing = wantIds.filter(w => !got.includes(w));
64
+ return missing.length === 0 ? null : `URL missing show_status for ${missing.join(',')} — got "${params.get('show_status')}"`;
65
+ };
66
+
67
+ /** No error field. */
68
+ const noError = (r: any) =>
69
+ r._error ? `connector reported _error: ${String(r._error).slice(0, 200)}` : null;
70
+
71
+ const countLessOrEq = (key: string, max: number) => (r: any) => {
72
+ const n = Array.isArray(r?.[key]) ? r[key].length : r?.[key];
73
+ return n != null && n <= max ? null : `expected ${key} ≤ ${max}, got ${n}`;
74
+ };
75
+
76
+ // ─── Cases ────────────────────────────────────────────────────
77
+ const CASES: Case[] = [
78
+ {
79
+ name: 'baseline / project_name resolves',
80
+ tool: 'mantis.search_bugs',
81
+ args: { project_name: 'FortiNAC', status: 'assigned', limit: 5 },
82
+ assertions: [noError, has('bugs'), countLessOrEq('bugs', 5)],
83
+ },
84
+ {
85
+ name: 'status=assigned filter',
86
+ tool: 'mantis.search_bugs',
87
+ args: { project_name: 'FortiNAC', status: 'assigned', limit: 10 },
88
+ assertions: [noError, urlHasStatus(['assigned'])],
89
+ },
90
+ {
91
+ name: 'source=QA filter (client-side, was broken pre-v0.13.0)',
92
+ tool: 'mantis.search_bugs',
93
+ args: { project_name: 'FortiNAC', status: 'assigned', source: 'QA', limit: 10 },
94
+ assertions: [noError, everyRowMatches('source', ['QA'])],
95
+ },
96
+ {
97
+ name: 'source=DEV filter — different value, same path',
98
+ tool: 'mantis.search_bugs',
99
+ args: { project_name: 'FortiNAC', status: 'assigned', source: 'DEV', limit: 10 },
100
+ assertions: [noError, everyRowMatches('source', ['DEV'])],
101
+ },
102
+ {
103
+ name: 'fix_schedule filter',
104
+ tool: 'mantis.search_bugs',
105
+ args: { project_name: 'FortiNAC', status: 'assigned', fix_schedule: '8.0.0', limit: 10 },
106
+ assertions: [noError, everyRowMatches('fix_schedule', ['8.0.0'])],
107
+ },
108
+ {
109
+ name: 'combined: status + project + source + fix_schedule',
110
+ tool: 'mantis.search_bugs',
111
+ args: {
112
+ project_name: 'FortiNAC', status: 'assigned',
113
+ fix_schedule: '8.0.0', source: 'QA', limit: 5,
114
+ },
115
+ assertions: [
116
+ noError,
117
+ everyRowMatches('source', ['QA']),
118
+ everyRowMatches('fix_schedule', ['8.0.0']),
119
+ urlHasStatus(['assigned']),
120
+ ],
121
+ },
122
+ {
123
+ name: 'empty-match: source=NONEXISTENT → 0 bugs, no error',
124
+ tool: 'mantis.search_bugs',
125
+ args: { project_name: 'FortiNAC', status: 'assigned', source: 'NONEXISTENT_VALUE_XYZ', limit: 5 },
126
+ assertions: [
127
+ noError,
128
+ (r: any) => r.bugs?.length === 0 ? null : `expected 0 bugs, got ${r.bugs?.length}`,
129
+ ],
130
+ },
131
+ {
132
+ name: 'resolution=open via extra_params (URL-layer)',
133
+ tool: 'mantis.search_bugs',
134
+ args: {
135
+ project_name: 'FortiNAC', status: 'assigned',
136
+ extra_params: { 'resolution[]': 10 }, limit: 5,
137
+ },
138
+ assertions: [noError, has('bugs')],
139
+ },
140
+ {
141
+ name: 'get_bug round-trip on bug from search',
142
+ tool: 'mantis.search_bugs',
143
+ args: { project_name: 'FortiNAC', status: 'assigned', limit: 1 },
144
+ assertions: [noError, nonEmpty('bugs')],
145
+ },
146
+ ];
147
+
148
+ // ─── Runner ───────────────────────────────────────────────────
149
+ const args = process.argv.slice(2);
150
+ const caseFilter = (() => {
151
+ const idx = args.indexOf('--case');
152
+ return idx >= 0 ? args[idx + 1] : null;
153
+ })();
154
+ const bail = args.includes('--bail');
155
+
156
+ const C = {
157
+ red: (s: string) => `\x1b[31m${s}\x1b[0m`,
158
+ green: (s: string) => `\x1b[32m${s}\x1b[0m`,
159
+ yellow: (s: string) => `\x1b[33m${s}\x1b[0m`,
160
+ dim: (s: string) => `\x1b[2m${s}\x1b[0m`,
161
+ bold: (s: string) => `\x1b[1m${s}\x1b[0m`,
162
+ };
163
+
164
+ async function runCase(c: Case): Promise<{ pass: boolean; failures: string[]; resp: any }> {
165
+ const failures: string[] = [];
166
+ let resp: any = null;
167
+ try {
168
+ const r = await dispatchTool(
169
+ { id: `test-${Date.now()}`, name: c.tool, input: c.args },
170
+ { noTruncation: true },
171
+ );
172
+ if (r.is_error) {
173
+ return { pass: false, failures: [`is_error=true: ${r.content.slice(0, 300)}`], resp: null };
174
+ }
175
+ resp = JSON.parse(r.content);
176
+ } catch (e) {
177
+ return { pass: false, failures: [`exception: ${(e as Error).message}`], resp: null };
178
+ }
179
+ for (const a of c.assertions || []) {
180
+ const failure = a(resp);
181
+ if (failure) failures.push(failure);
182
+ }
183
+ return { pass: failures.length === 0, failures, resp };
184
+ }
185
+
186
+ (async () => {
187
+ const cases = caseFilter
188
+ ? CASES.filter(c => c.name.toLowerCase().includes(caseFilter.toLowerCase()))
189
+ : CASES;
190
+ if (cases.length === 0) {
191
+ console.error(`no cases match '${caseFilter}'`);
192
+ console.error(`available: ${CASES.map(c => c.name).join(', ')}`);
193
+ process.exit(2);
194
+ }
195
+ let passed = 0, failed = 0;
196
+ const startedAll = Date.now();
197
+ for (const c of cases) {
198
+ process.stdout.write(`${C.dim('▶')} ${c.name.padEnd(60)} `);
199
+ const t0 = Date.now();
200
+ const { pass, failures, resp } = await runCase(c);
201
+ const ms = Date.now() - t0;
202
+ if (pass) {
203
+ const n = resp?.bugs?.length ?? '?';
204
+ console.log(`${C.green('PASS')} ${C.dim(`(${ms}ms, ${n} bug${n === 1 ? '' : 's'})`)}`);
205
+ passed++;
206
+ } else {
207
+ console.log(`${C.red('FAIL')} ${C.dim(`(${ms}ms)`)}`);
208
+ for (const f of failures) console.log(` ${C.red('×')} ${f}`);
209
+ failed++;
210
+ if (c.dump && resp) {
211
+ console.log(C.dim(' response keys: ' + Object.keys(resp).join(', ')));
212
+ if (resp._filter_diagnostics) {
213
+ console.log(C.dim(' _filter_diagnostics: ' + JSON.stringify(resp._filter_diagnostics)));
214
+ }
215
+ }
216
+ if (bail) break;
217
+ }
218
+ }
219
+ const totalMs = Date.now() - startedAll;
220
+ console.log('');
221
+ console.log(`${C.bold(`${passed}/${passed + failed} passed`)} in ${(totalMs / 1000).toFixed(1)}s`);
222
+ process.exit(failed === 0 ? 0 : 1);
223
+ })();