npm - @aion0/forge - Versions diffs - 0.5.22 → 0.5.24 - Mend

@aion0/forge 0.5.22 → 0.5.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

package/RELEASE_NOTES.md +16 -29
package/app/api/smith-templates/route.ts +81 -0
package/components/SettingsModal.tsx +6 -1
package/components/WorkspaceView.tsx +841 -83
package/lib/claude-sessions.ts +2 -1
package/lib/forge-mcp-server.ts +247 -33
package/lib/help-docs/11-workspace.md +722 -166
package/lib/telegram-bot.ts +1 -1
package/lib/workspace/orchestrator.ts +292 -76
package/lib/workspace/presets.ts +535 -58
package/lib/workspace/requests.ts +287 -0
package/lib/workspace/session-monitor.ts +4 -3
package/lib/workspace/types.ts +1 -0
package/lib/workspace/watch-manager.ts +1 -1
package/lib/workspace-standalone.ts +1 -1
package/package.json +1 -1
package/scripts/bench/README.md +66 -0
package/scripts/bench/results/.gitignore +2 -0
package/scripts/bench/run.ts +635 -0
package/scripts/bench/tasks/01-text-utils/task.md +26 -0
package/scripts/bench/tasks/01-text-utils/validator.sh +46 -0
package/scripts/bench/tasks/02-pagination/setup.sh +19 -0
package/scripts/bench/tasks/02-pagination/task.md +48 -0
package/scripts/bench/tasks/02-pagination/validator.sh +69 -0
package/scripts/bench/tasks/03-bug-fix/setup.sh +82 -0
package/scripts/bench/tasks/03-bug-fix/task.md +30 -0
package/scripts/bench/tasks/03-bug-fix/validator.sh +29 -0
package/templates/smith-lead.json +45 -0

package/scripts/bench/tasks/01-text-utils/validator.sh ADDED Viewed

@@ -0,0 +1,46 @@
+#!/usr/bin/env bash
+# Validator for text utility task.
+# Runs in harness_test project root. Exits 0 = pass, non-zero = fail.
+set -e
+PROJECT_ROOT="${1:-/Users/zliu/IdeaProjects/harness_test}"
+cd "$PROJECT_ROOT/src" || { echo "FAIL: src/ directory not found"; exit 1; }
+# 1. Check files exist
+[ -f utils/text.js ] || { echo "FAIL: utils/text.js missing"; exit 1; }
+[ -f utils/text.test.js ] || { echo "FAIL: utils/text.test.js missing"; exit 1; }
+# 2. Check exports
+grep -q "export.*capitalize" utils/text.js || { echo "FAIL: capitalize not exported"; exit 1; }
+grep -q "export.*reverseWords" utils/text.js || { echo "FAIL: reverseWords not exported"; exit 1; }
+# 3. Run tests
+node --test utils/text.test.js 2>&1 | tee /tmp/text-test-output.txt
+TEST_EXIT=${PIPESTATUS[0]}
+if [ "$TEST_EXIT" != "0" ]; then
+  echo "FAIL: tests failed (exit=$TEST_EXIT)"
+  exit 1
+fi
+# 4. Additional smoke test — behavior verification independent of agent's tests
+node -e "
+import('./utils/text.js').then(m => {
+  const assert = require('node:assert/strict');
+  // capitalize
+  assert.equal(m.capitalize('hello'), 'Hello', 'capitalize basic');
+  assert.equal(m.capitalize('a'), 'A', 'capitalize single char');
+  try { m.capitalize(''); assert.fail('expected throw on empty'); } catch (e) { assert.ok(e instanceof TypeError); }
+  try { m.capitalize(null); assert.fail('expected throw on null'); } catch (e) { assert.ok(e instanceof TypeError); }
+  try { m.capitalize(123); assert.fail('expected throw on number'); } catch (e) { assert.ok(e instanceof TypeError); }
+  // reverseWords
+  assert.equal(m.reverseWords('hello world'), 'world hello');
+  assert.equal(m.reverseWords('  a  b  c  '), 'c b a');
+  assert.equal(m.reverseWords(''), '');
+  assert.equal(m.reverseWords('single'), 'single');
+  try { m.reverseWords(null); assert.fail('expected throw'); } catch (e) { assert.ok(e instanceof TypeError); }
+  console.log('SMOKE_TEST_PASSED');
+}).catch(err => { console.error('SMOKE_TEST_FAILED:', err.message); process.exit(1); });
+" || { echo "FAIL: smoke test failed"; exit 1; }
+echo "PASS"
+exit 0

package/scripts/bench/tasks/02-pagination/setup.sh ADDED Viewed

@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+# Create a basic user list module without pagination.
+set -e
+PROJECT="${1:-/Users/zliu/IdeaProjects/harness_test}"
+mkdir -p "$PROJECT/src/api"
+cat > "$PROJECT/src/api/users.js" <<'EOF'
+const USERS = Array.from({ length: 127 }, (_, i) => ({
+  id: i + 1,
+  name: `User ${i + 1}`,
+  email: `user${i + 1}@example.com`,
+}));
+export function listUsers() {
+  return USERS;
+}
+EOF
+echo "Setup complete: created src/api/users.js with 127 users and a listUsers() function."

package/scripts/bench/tasks/02-pagination/task.md ADDED Viewed

@@ -0,0 +1,48 @@
+# Task: Add Pagination to User List
+The file `src/api/users.js` currently has a `listUsers()` function that returns all users. Add pagination support.
+## Requirements
+Replace `listUsers()` (or add a new function) with a paginated version:
+```
+listUsers({ page = 1, pageSize = 20 } = {})
+```
+**Return format**:
+```js
+{
+  items: [...],       // users on the current page
+  total: 127,         // total number of users
+  page: 1,            // current page (1-indexed)
+  pageSize: 20,       // page size (after validation)
+  totalPages: 7,      // Math.ceil(total / pageSize)
+  hasNext: true,      // true if more pages exist
+  hasPrev: false      // true if page > 1
+}
+```
+## Validation Rules
+- `page` must be integer ≥ 1. If invalid (not a number, < 1, NaN, float), throw `RangeError`.
+- `pageSize` must be integer in [1, 100]. If invalid, throw `RangeError`.
+- If `page` exceeds available pages, return empty `items` array but still return correct `total`, `page`, `pageSize`, `totalPages`, `hasNext: false`, `hasPrev: true`.
+## Test File
+Also create `src/api/users.test.js` using `node:test` and `node:assert/strict` covering:
+- Default params return page 1 with 20 items
+- Page 2 returns items 21-40
+- Last page (page 7) returns items 121-127
+- Page 8 returns empty items but correct metadata
+- Custom pageSize (e.g., 50)
+- Invalid page (0, -1, 'abc', 1.5, NaN) throws RangeError
+- Invalid pageSize (0, 101, 'abc', 1.5) throws RangeError
+## Constraints
+- Keep ES module syntax
+- No external deps
+- Preserve the existing USERS array
+- Tests must pass via: `cd src && node --test api/users.test.js`

package/scripts/bench/tasks/02-pagination/validator.sh ADDED Viewed

@@ -0,0 +1,69 @@
+#!/usr/bin/env bash
+set -e
+PROJECT="${1:-/Users/zliu/IdeaProjects/harness_test}"
+cd "$PROJECT/src"
+[ -f api/users.js ] || { echo "FAIL: api/users.js missing"; exit 1; }
+[ -f api/users.test.js ] || { echo "FAIL: api/users.test.js missing"; exit 1; }
+grep -q "export function listUsers\|export const listUsers\|export { listUsers" api/users.js || { echo "FAIL: listUsers not exported"; exit 1; }
+# Run agent's tests
+node --test api/users.test.js 2>&1 | tee /tmp/paginate-test-output.txt
+TEST_EXIT=${PIPESTATUS[0]}
+[ "$TEST_EXIT" = "0" ] || { echo "FAIL: agent tests failed"; exit 1; }
+# Independent smoke test
+node -e "
+import('./api/users.js').then(m => {
+  const assert = require('node:assert/strict');
+  // Default: page 1, 20 items
+  let r = m.listUsers();
+  assert.equal(r.items.length, 20, 'default pageSize 20');
+  assert.equal(r.items[0].id, 1);
+  assert.equal(r.total, 127);
+  assert.equal(r.page, 1);
+  assert.equal(r.pageSize, 20);
+  assert.equal(r.totalPages, 7);
+  assert.equal(r.hasNext, true);
+  assert.equal(r.hasPrev, false);
+  // Page 2
+  r = m.listUsers({ page: 2 });
+  assert.equal(r.items[0].id, 21);
+  assert.equal(r.items.length, 20);
+  assert.equal(r.hasPrev, true);
+  // Last page (127 / 20 = 6.35 → 7 pages; page 7 has 7 items)
+  r = m.listUsers({ page: 7 });
+  assert.equal(r.items.length, 7, 'last page has 7 items');
+  assert.equal(r.items[0].id, 121);
+  assert.equal(r.hasNext, false);
+  // Page 8 (beyond) — empty but correct metadata
+  r = m.listUsers({ page: 8 });
+  assert.equal(r.items.length, 0, 'page beyond: empty items');
+  assert.equal(r.total, 127);
+  assert.equal(r.totalPages, 7);
+  assert.equal(r.hasNext, false);
+  // Custom pageSize
+  r = m.listUsers({ page: 1, pageSize: 50 });
+  assert.equal(r.items.length, 50);
+  assert.equal(r.totalPages, 3);
+  // Invalid page
+  for (const p of [0, -1, 'abc', 1.5, NaN]) {
+    try { m.listUsers({ page: p }); assert.fail('expected RangeError for page=' + p); }
+    catch (e) { assert.ok(e instanceof RangeError, 'page=' + p + ' should throw RangeError, got: ' + e.constructor.name); }
+  }
+  // Invalid pageSize
+  for (const ps of [0, 101, 'abc', 1.5]) {
+    try { m.listUsers({ page: 1, pageSize: ps }); assert.fail('expected RangeError for pageSize=' + ps); }
+    catch (e) { assert.ok(e instanceof RangeError, 'pageSize=' + ps + ' should throw RangeError'); }
+  }
+  console.log('SMOKE_TEST_PASSED');
+}).catch(err => { console.error('SMOKE_TEST_FAILED:', err.message); process.exit(1); });
+" || { echo "FAIL: smoke test failed"; exit 1; }
+echo "PASS"
+exit 0

package/scripts/bench/tasks/03-bug-fix/setup.sh ADDED Viewed

@@ -0,0 +1,82 @@
+#!/usr/bin/env bash
+# Create a date range calculator with 2 bugs.
+set -e
+PROJECT="${1:-/Users/zliu/IdeaProjects/harness_test}"
+mkdir -p "$PROJECT/src/lib" "$PROJECT/src/lib/__tests__"
+cat > "$PROJECT/src/lib/dateRange.js" <<'EOF'
+// Compute the inclusive number of days between two YYYY-MM-DD dates.
+// Returns a positive integer. If end is before start, throws RangeError.
+export function daysBetween(startStr, endStr) {
+  if (typeof startStr !== 'string' || typeof endStr !== 'string') {
+    throw new TypeError('daysBetween expects two YYYY-MM-DD strings');
+  }
+  const start = new Date(startStr);
+  const end = new Date(endStr);
+  if (Number.isNaN(start.getTime()) || Number.isNaN(end.getTime())) {
+    throw new TypeError('invalid date format');
+  }
+  if (end < start) throw new RangeError('end before start');
+  // BUG: missing +1 to be inclusive of both endpoints
+  return Math.floor((end - start) / (1000 * 60 * 60 * 24));
+}
+// Return array of YYYY-MM-DD strings from start to end (inclusive).
+export function dateRange(startStr, endStr) {
+  const days = daysBetween(startStr, endStr);
+  const result = [];
+  const current = new Date(startStr);
+  // BUG: loop condition uses < instead of <=, excluding final day
+  for (let i = 0; i < days; i++) {
+    result.push(current.toISOString().slice(0, 10));
+    current.setDate(current.getDate() + 1);
+  }
+  return result;
+}
+EOF
+cat > "$PROJECT/src/lib/__tests__/dateRange.test.js" <<'EOF'
+import { test } from 'node:test';
+import assert from 'node:assert/strict';
+import { daysBetween, dateRange } from '../dateRange.js';
+test('daysBetween: same day returns 1', () => {
+  assert.equal(daysBetween('2026-01-01', '2026-01-01'), 1);
+});
+test('daysBetween: one day apart returns 2', () => {
+  assert.equal(daysBetween('2026-01-01', '2026-01-02'), 2);
+});
+test('daysBetween: one week', () => {
+  assert.equal(daysBetween('2026-01-01', '2026-01-07'), 7);
+});
+test('daysBetween: end before start throws RangeError', () => {
+  assert.throws(() => daysBetween('2026-01-05', '2026-01-01'), RangeError);
+});
+test('daysBetween: non-string throws TypeError', () => {
+  assert.throws(() => daysBetween(20260101, '2026-01-02'), TypeError);
+});
+test('dateRange: single day returns array with one date', () => {
+  assert.deepEqual(dateRange('2026-01-01', '2026-01-01'), ['2026-01-01']);
+});
+test('dateRange: three days', () => {
+  assert.deepEqual(
+    dateRange('2026-01-01', '2026-01-03'),
+    ['2026-01-01', '2026-01-02', '2026-01-03']
+  );
+});
+test('dateRange: includes both endpoints', () => {
+  const r = dateRange('2026-03-30', '2026-04-02');
+  assert.equal(r.length, 4);
+  assert.equal(r[0], '2026-03-30');
+  assert.equal(r[r.length - 1], '2026-04-02');
+});
+EOF
+echo "Setup complete: created src/lib/dateRange.js (with 2 bugs) and tests that currently fail."

package/scripts/bench/tasks/03-bug-fix/task.md ADDED Viewed

@@ -0,0 +1,30 @@
+# Task: Fix Bugs in dateRange Module
+The file `src/lib/dateRange.js` has 2 bugs. The existing test file `src/lib/__tests__/dateRange.test.js` describes the expected behavior.
+## Your job
+1. Run the existing tests — several will fail. Identify what's wrong.
+2. Fix both bugs in `src/lib/dateRange.js`.
+3. Do NOT modify the test file. The tests correctly express the expected behavior.
+4. Do NOT change the function signatures or add new functions.
+5. After fixing, all tests must pass.
+## Verify
+```bash
+cd src && node --test lib/__tests__/dateRange.test.js
+```
+All tests should pass.
+## Hints
+- `daysBetween('2026-01-01', '2026-01-01')` should return `1` (inclusive count)
+- `dateRange('2026-01-01', '2026-01-03')` should return all 3 days including both endpoints
+## Constraints
+- Minimal diff — fix only what's broken
+- Keep the functions pure
+- No new dependencies

package/scripts/bench/tasks/03-bug-fix/validator.sh ADDED Viewed

@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+set -e
+PROJECT="${1:-/Users/zliu/IdeaProjects/harness_test}"
+cd "$PROJECT/src"
+[ -f lib/dateRange.js ] || { echo "FAIL: lib/dateRange.js missing (agent deleted it?)"; exit 1; }
+[ -f lib/__tests__/dateRange.test.js ] || { echo "FAIL: test file missing (agent deleted it?)"; exit 1; }
+# Run the existing (unmodified) tests
+node --test lib/__tests__/dateRange.test.js 2>&1 | tee /tmp/bugfix-test-output.txt
+TEST_EXIT=${PIPESTATUS[0]}
+[ "$TEST_EXIT" = "0" ] || { echo "FAIL: tests still failing after fix"; exit 1; }
+# Extra smoke: verify functions exist and behave
+node -e "
+import('./lib/dateRange.js').then(m => {
+  const assert = require('node:assert/strict');
+  assert.equal(m.daysBetween('2026-01-01', '2026-01-01'), 1);
+  assert.equal(m.daysBetween('2026-01-01', '2026-01-10'), 10);
+  const r = m.dateRange('2026-01-01', '2026-01-05');
+  assert.equal(r.length, 5, 'should include both endpoints');
+  assert.equal(r[0], '2026-01-01');
+  assert.equal(r[4], '2026-01-05');
+  console.log('SMOKE_TEST_PASSED');
+}).catch(err => { console.error('SMOKE_TEST_FAILED:', err.message); process.exit(1); });
+" || { echo "FAIL: smoke test failed"; exit 1; }
+echo "PASS"
+exit 0

package/templates/smith-lead.json ADDED Viewed

@@ -0,0 +1,45 @@
+{
+  "name": "Lead",
+  "icon": "👑",
+  "description": "Primary coordinator — SOP-driven requirement intake, delegation, gap coverage, and quality gate",
+  "config": {
+    "label": "Lead",
+    "icon": "👑",
+    "backend": "cli",
+    "agentId": "claude",
+    "primary": true,
+    "persistentSession": true,
+    "workDir": "./",
+    "outputs": ["docs/lead/"],
+    "plugins": ["playwright", "shell-command"],
+    "role": "You are the Lead — primary coordinator of this workspace.\n\nYour context automatically includes a \"Workspace Team\" section showing all agents, their roles, status, and missing standard roles. Read it before every action.\n\n## SOP: Requirement Intake\n\nWhen you receive a requirement (from user input or inbox message):\n\n```\n1. Read the Workspace Team section in your context\n2. Classify the requirement:\n   - Single task → one request document\n   - Multi-module → break into independent request documents, group in a batch\n3. Route based on available roles:\n\n   HAS Architect?\n   └─ YES → create_request with full description → Architect breaks it down further\n   └─ NO → you break it down yourself, then:\n\n        HAS Engineer?\n        └─ YES → create_request for each module (status: open)\n                 Engineers claim via claim_request\n        └─ NO → implement it yourself in src/\n                 Record files_changed in docs/lead/impl-notes.md\n\n4. After implementation (by you or Engineer):\n\n   HAS QA?\n   └─ YES → update_response(section: engineer) triggers auto-notify to QA\n   └─ NO → you test it:\n          - Read acceptance_criteria from the request\n          - Write tests in tests/ or run manually\n          - Record results: update_response(section: qa, result: passed/failed)\n\n5. After testing:\n\n   HAS Reviewer?\n   └─ YES → auto-notified when QA passes\n   └─ NO → you review it:\n          - Check code quality, security, PRD compliance\n          - Record: update_response(section: review, result: approved/changes_requested)\n          - If changes_requested → send_message to Engineer or fix yourself\n```\n\n## SOP: Monitoring & Coordination\n\nWhile work is in progress:\n\n```\n1. get_status → check all agents' smith/task status\n2. list_requests → check request progress\n\nIF agent taskStatus = failed:\n   → Read their error from get_status\n   → send_message asking what went wrong\n   → If no response or unfixable: handle the request yourself\n\nIF agent taskStatus = running for too long:\n   → send_message to check progress\n\nIF request stuck in one status:\n   → Check which agent should be handling it\n   → send_message to nudge, or cover it yourself\n\nIF multiple Engineers exist and request unclaimed:\n   → send_message to available Engineer suggesting they claim_request\n```\n\n## SOP: Quality Gate (before declaring done)\n\n```\n1. list_requests(batch: current_batch)\n2. ALL requests status = done?\n   └─ NO → identify stuck ones, apply Monitoring SOP\n   └─ YES → continue\n3. Any request with review.result = changes_requested?\n   └─ YES → verify changes were made, re-review if no Reviewer\n4. Any request with qa.result = failed?\n   └─ YES → verify fix was applied, re-test if no QA\n5. Write summary to docs/lead/delivery-summary.md:\n   - Requirements covered\n   - Requests created and their final status\n   - Roles you covered due to gaps\n   - Any open issues\n```\n\n## Gap Coverage Reference\n\nWhen you cover a missing role, follow that role's standards:\n\n| Missing Role | What You Do | Output |\n|---|---|---|\n| PM/Architect | Break requirements into modules with acceptance_criteria | request documents via create_request |\n| Engineer | Read request → implement in src/ → update_response(section: engineer) | source code + files_changed |\n| QA | Read acceptance_criteria → write/run tests → update_response(section: qa) | test results in tests/ or docs/qa/ |\n| Reviewer | Read code changes → check quality/security → update_response(section: review) | review findings |\n\n## Rules\n\n- Workspace Team is in your context — don't call get_agents redundantly at start, just read it\n- DO call get_agents/get_status when you need live status updates mid-task\n- Every delegated task MUST go through request documents (create_request) — never just send_message with vague instructions\n- Each request needs concrete acceptance_criteria that QA can verify\n- Do NOT duplicate work an active agent is already doing — check status first\n- When covering a gap, be thorough — don't half-do it just because it's not your \"main\" role",
+    "steps": [
+      {
+        "id": "intake",
+        "label": "Intake & Analyze",
+        "prompt": "Read the Workspace Team section in your context. Identify: (1) which standard roles are present and missing, (2) incoming requirements from upstream input or inbox. For each requirement, decide scope: single task or multi-module. List what you will delegate vs handle yourself."
+      },
+      {
+        "id": "delegate",
+        "label": "Create Requests & Route",
+        "prompt": "For each module/task: create_request with title, description, acceptance_criteria, and batch name. If Architect exists, create high-level requests for them to break down. If only Engineers exist, create implementation-ready requests. If no one to delegate to, note which requests you will implement yourself. Verify with list_requests that all were created."
+      },
+      {
+        "id": "cover-gaps",
+        "label": "Cover Missing Roles",
+        "prompt": "Handle all work for missing roles. If no Engineer: implement code, then update_response(section: engineer). If no QA: write/run tests against acceptance_criteria, then update_response(section: qa). If no Reviewer: review code changes for quality and security, then update_response(section: review). Use get_status between tasks to check if other agents have completed their work."
+      },
+      {
+        "id": "monitor",
+        "label": "Monitor & Unblock",
+        "prompt": "Run get_status and list_requests. For each stuck/failed agent: diagnose and send_message to unblock, or take over the request yourself. For unclaimed requests: nudge available agents. Continue until all requests show progress."
+      },
+      {
+        "id": "gate",
+        "label": "Quality Gate & Summary",
+        "prompt": "list_requests for the current batch. Verify ALL requests are status=done with review.result=approved and qa.result=passed. If any are not: apply gap coverage. Write docs/lead/delivery-summary.md with: requirements covered, request statuses, roles you covered, open issues."
+      }
+    ]
+  },
+  "exportedAt": 1743724800000
+}