@gethmy/agent 1.12.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/cli.js +349 -251
  2. package/dist/index.js +349 -251
  3. package/package.json +2 -2
package/dist/cli.js CHANGED
@@ -1158,6 +1158,294 @@ var TAG3 = "http";
1158
1158
  var init_http_server = __esm(() => {
1159
1159
  init_log();
1160
1160
  });
1161
+ // ../harmony-shared/dist/branchRef.js
1162
+ var BRANCH_REF_PATTERN, SAFE_GIT_REF_PATTERN;
1163
+ var init_branchRef = __esm(() => {
1164
+ BRANCH_REF_PATTERN = /Branch:\s*`([^`]+)`/;
1165
+ SAFE_GIT_REF_PATTERN = /^[a-zA-Z0-9/_.-]+$/;
1166
+ });
1167
+
1168
+ // ../harmony-shared/dist/cardLinks.js
1169
+ var init_cardLinks = () => {};
1170
+ // ../harmony-shared/dist/classification.js
1171
+ function escalateTier(tier) {
1172
+ const i = MODEL_TIERS.indexOf(tier);
1173
+ return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
1174
+ }
1175
+ function isModelTier(v) {
1176
+ return typeof v === "string" && MODEL_TIERS.includes(v);
1177
+ }
1178
+ var MODEL_TIERS;
1179
+ var init_classification = __esm(() => {
1180
+ MODEL_TIERS = ["simple", "advanced", "research"];
1181
+ });
1182
+
1183
+ // ../harmony-shared/dist/commentSerializer.js
1184
+ function sanitizeHeaderField(value) {
1185
+ return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
1186
+ }
1187
+ function authorLabel(c) {
1188
+ if (c.author_type === "agent")
1189
+ return "AI agent";
1190
+ const raw = c.author?.full_name || "teammate";
1191
+ return sanitizeHeaderField(raw);
1192
+ }
1193
+ function criticalIds(comments) {
1194
+ const keep = new Set;
1195
+ for (const c of comments) {
1196
+ if (c.comment_type === "decision")
1197
+ keep.add(c.id);
1198
+ if (c.supersedes_id) {
1199
+ keep.add(c.id);
1200
+ keep.add(c.supersedes_id);
1201
+ }
1202
+ if (c.confirms_id) {
1203
+ keep.add(c.id);
1204
+ keep.add(c.confirms_id);
1205
+ }
1206
+ }
1207
+ return keep;
1208
+ }
1209
+ function serializeCommentThread(comments, options = {}) {
1210
+ const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
1211
+ const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
1212
+ if (visible.length === 0)
1213
+ return "";
1214
+ const indexById = new Map;
1215
+ visible.forEach((c, i) => {
1216
+ indexById.set(c.id, i + 1);
1217
+ });
1218
+ let rendered = visible;
1219
+ let elidedCount = 0;
1220
+ if (maxComments && visible.length > maxComments) {
1221
+ const keep = criticalIds(visible);
1222
+ const recentThreshold = visible.length - maxComments;
1223
+ rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
1224
+ elidedCount = visible.length - rendered.length;
1225
+ }
1226
+ const ref = (id) => {
1227
+ const n = indexById.get(id);
1228
+ return n ? `#${n}` : `#${id.slice(0, 8)}`;
1229
+ };
1230
+ const lines = [];
1231
+ if (elidedCount > 0) {
1232
+ lines.push({
1233
+ at: visible[0]?.created_at ?? "",
1234
+ text: `(${elidedCount} earlier comment(s) omitted for brevity)`
1235
+ });
1236
+ }
1237
+ for (const c of rendered) {
1238
+ const tags = [];
1239
+ if (c.edited_at)
1240
+ tags.push("edited");
1241
+ if (c.supersedes_id)
1242
+ tags.push(`supersedes ${ref(c.supersedes_id)}`);
1243
+ if (c.confirms_id)
1244
+ tags.push(`confirms ${ref(c.confirms_id)}`);
1245
+ if (c.resolved_at)
1246
+ tags.push("resolved");
1247
+ const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
1248
+ const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
1249
+ const fencedBody = c.body.trim().replaceAll("<", "&lt;").replaceAll(">", "&gt;");
1250
+ lines.push({
1251
+ at: c.created_at,
1252
+ text: `${header}
1253
+ <comment-body>
1254
+ ${fencedBody}
1255
+ </comment-body>`
1256
+ });
1257
+ }
1258
+ for (const a of activity) {
1259
+ const actor = a.actor ? `${a.actor} ` : "";
1260
+ lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
1261
+ }
1262
+ lines.sort((a, b) => a.at.localeCompare(b.at));
1263
+ const body = lines.map((l) => l.text).join(`
1264
+
1265
+ `);
1266
+ const instruction = includeInstructions ? `
1267
+
1268
+ ${CONFLICT_INSTRUCTION}` : "";
1269
+ return `## ${heading} (oldest → newest)
1270
+
1271
+ ${body}${instruction}`;
1272
+ }
1273
+ var CONFLICT_INSTRUCTION;
1274
+ var init_commentSerializer = __esm(() => {
1275
+ CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
1276
+ });
1277
+
1278
+ // ../harmony-shared/dist/constants.js
1279
+ var TIMINGS;
1280
+ var init_constants = __esm(() => {
1281
+ TIMINGS = {
1282
+ SEARCH_DEBOUNCE: 300,
1283
+ AUTOSAVE_DEBOUNCE: 1000,
1284
+ TOAST_DURATION: 3000,
1285
+ QUERY_STALE_TIME: 1000 * 60 * 5,
1286
+ QUERY_GC_TIME: 1000 * 60 * 60 * 24
1287
+ };
1288
+ });
1289
+ // ../harmony-shared/dist/logger.js
1290
+ var init_logger = () => {};
1291
+ // ../harmony-shared/dist/projectTemplates.js
1292
+ var init_projectTemplates = () => {};
1293
+
1294
+ // ../harmony-shared/dist/reviewMethodology.js
1295
+ var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
1296
+ Report findings; do NOT fix them. This is a read-only review.
1297
+
1298
+ Review the diff through five lenses on every pass: functionality, security,
1299
+ performance, code quality, and best practices. For every finding, set
1300
+ \`relatedToDiff\`: true when the change under review introduced or exposed it,
1301
+ false when it is a pre-existing issue you happened to notice. Only diff-caused
1302
+ findings gate the verdict — pre-existing ones are reported for context and never
1303
+ block.
1304
+
1305
+ ## Two-Pass Review
1306
+
1307
+ ### Pass 1 — CRITICAL (highest severity)
1308
+
1309
+ **SQL & Data Safety**
1310
+ - String interpolation in SQL — use parameterized queries / prepared statements
1311
+ - TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
1312
+
1313
+ **Race Conditions & Concurrency**
1314
+ - Read-check-write without uniqueness constraint or duplicate key handling
1315
+ - Status transitions without atomic WHERE old_status UPDATE SET new_status
1316
+ - Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
1317
+
1318
+ **Security & Access Control**
1319
+ - Hardcoded secrets, API keys, or credentials committed to source
1320
+ - New endpoints, mutations, or service-role/RLS-exempt queries missing an auth or ownership check
1321
+ - Over-broad CORS, missing input validation on a trust boundary, injection beyond SQL (command, path, template)
1322
+
1323
+ **LLM Output Trust Boundary**
1324
+ - LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
1325
+ - Structured tool output accepted without type/shape checks before database writes
1326
+
1327
+ **Enum & Value Completeness**
1328
+ - When the diff introduces a new enum/status/type value, trace it through every consumer
1329
+ - Check allowlists, filter arrays, and case/if-elsif chains for the new value
1330
+ - Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
1331
+
1332
+ ### Pass 2 — INFORMATIONAL (lower severity)
1333
+
1334
+ **Functionality & Edge Cases**
1335
+ - Logic errors, off-by-one, unhandled null/undefined, wrong API or library usage
1336
+ - Conditional side effects: code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
1337
+
1338
+ **Performance**
1339
+ - O(n²) algorithms and O(n*m) lookups (Array.find in a loop instead of a Map/index)
1340
+ - N+1 queries, unbounded fetches missing pagination, repeated work that should be cached/memoized
1341
+ - Unnecessary React re-renders (unstable props/deps, inline object/array literals); leaked subscriptions, timers, or listeners
1342
+ - Inline styles re-parsed every render
1343
+
1344
+ **Code Quality**
1345
+ - Dead code: variables assigned but never read, unreachable branches
1346
+ - Duplication that should be extracted, over-long functions, unclear naming
1347
+ - \`any\` / unchecked casts that defeat the type system
1348
+ - Comments/docstrings describing old behavior after code changed
1349
+
1350
+ **Best Practices & Conventions**
1351
+ - Deviations from established project conventions and framework idioms / anti-patterns
1352
+ - React hook dependency arrays that are wrong, missing, or over-broad
1353
+ - Accessibility gaps on new UI: missing labels, roles, alt text, or keyboard paths
1354
+
1355
+ **Test Gaps**
1356
+ - Missing negative-path tests for new error handling
1357
+ - Security enforcement features without integration tests
1358
+
1359
+ **Completeness Gaps**
1360
+ - Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
1361
+
1362
+ ## Severity Classification
1363
+
1364
+ - **critical**: SQL safety, race conditions, XSS, secrets/auth/injection holes, LLM trust boundary violations, enum completeness gaps causing runtime errors
1365
+ - **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects, performance regressions on a hot path
1366
+ - **minor**: Dead code, stale comments, test gaps, naming/duplication, minor view issues, cosmetic completeness gaps
1367
+
1368
+ ## Suppressions — DO NOT flag these
1369
+
1370
+ - Redundancy that aids readability (e.g., present? redundant with length > 20)
1371
+ - "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
1372
+ - Consistency-only changes (wrapping a value to match how another constant is guarded)
1373
+ - Regex edge cases when input is constrained and the edge case never occurs in practice
1374
+ - Eval threshold changes — these are tuned empirically
1375
+ - Harmless no-ops (e.g., .reject on an element never in the array)
1376
+ - Pre-existing issues unrelated to the diff, beyond a single noted finding (set relatedToDiff:false; never block on them)
1377
+ - ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, REVIEW_ACCEPTANCE_CHECKS = `## Acceptance Checks
1378
+
1379
+ Before judging code quality, verify the change actually satisfies the card.
1380
+ Derive one acceptance check per concrete requirement in the card description and
1381
+ one per subtask (the stated acceptance criteria). For each, assign a status from
1382
+ hard evidence — cite the file:line you read or the dev-server behaviour you
1383
+ observed that proves it:
1384
+
1385
+ - **pass** — implemented and verified by code you read or behaviour you observed
1386
+ - **partial** — started but incomplete (a missing branch, an edge case, or one of several bundled requirements)
1387
+ - **fail** — required but absent, or implemented incorrectly
1388
+ - **unverifiable** — cannot be confirmed from the diff or a running app (state why)
1389
+
1390
+ Do NOT mark a check "pass" on the implementing agent's say-so or a subtask's
1391
+ checkbox alone — only on evidence you found yourself. Any \`fail\` or \`partial\`
1392
+ check is an unaddressed requirement and forces a rejected verdict.`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
1393
+
1394
+ For each page affected by the changes:
1395
+
1396
+ 1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
1397
+ 2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
1398
+ 3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
1399
+ 4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
1400
+ 5. **States** — Check empty state, loading state, error state, overflow state.
1401
+ 6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
1402
+ 7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
1403
+
1404
+ ### SPA-Specific (React/Vite)
1405
+ - Use snapshot for navigation — client-side routes may not appear in link lists.
1406
+ - Check for stale state: navigate away and back — does data refresh correctly?
1407
+ - Test browser back/forward — does the app handle history correctly?
1408
+ - Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
1409
+ "verdict": "approved" | "rejected",
1410
+ "summary": "Brief overall assessment",
1411
+ "scopeCheck": {
1412
+ "status": "clean" | "drift" | "missing",
1413
+ "notes": "Optional explanation of scope issues"
1414
+ },
1415
+ "acceptanceChecks": [
1416
+ {
1417
+ "criterion": "The requirement or subtask being verified",
1418
+ "status": "pass" | "partial" | "fail" | "unverifiable",
1419
+ "evidence": "file:line or observed behaviour that proves the status"
1420
+ }
1421
+ ],
1422
+ "findings": [
1423
+ {
1424
+ "severity": "critical" | "major" | "minor",
1425
+ "category": "sql-safety | race-condition | security | llm-trust | enum-completeness | functional | performance | code-quality | best-practices | accessibility | visual | ux | console | scope | other",
1426
+ "title": "Short title",
1427
+ "description": "Detailed description of the issue",
1428
+ "location": "file:line (if applicable)",
1429
+ "relatedToDiff": true
1430
+ }
1431
+ ]
1432
+ }`, REVIEW_DECISION_RULES = `Counting only findings with \`relatedToDiff: true\`:
1433
+ - **rejected**: Any acceptance check that is \`fail\` or \`partial\`, any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
1434
+ - **approved**: Every acceptance check \`pass\` (or \`unverifiable\` with a stated reason), no critical findings, at most 1 major finding; minor findings OK.`;
1435
+ // ../harmony-shared/dist/types.js
1436
+ var init_types2 = () => {};
1437
+
1438
+ // ../harmony-shared/dist/index.js
1439
+ var init_dist = __esm(() => {
1440
+ init_branchRef();
1441
+ init_cardLinks();
1442
+ init_classification();
1443
+ init_commentSerializer();
1444
+ init_constants();
1445
+ init_logger();
1446
+ init_projectTemplates();
1447
+ init_types2();
1448
+ });
1161
1449
 
1162
1450
  // src/pm.ts
1163
1451
  import { execFileSync as execFileSync2 } from "node:child_process";
@@ -1419,9 +1707,8 @@ function checkoutExistingBranch(basePath, branchName) {
1419
1707
  function extractBranchFromDescription(description) {
1420
1708
  if (!description)
1421
1709
  return null;
1422
- const match = description.match(/Branch:\s*`([^`]+)`/);
1423
- const branch = match?.[1] ?? null;
1424
- if (branch && !/^[a-zA-Z0-9/_.-]+$/.test(branch)) {
1710
+ const branch = description.match(BRANCH_REF_PATTERN)?.[1] ?? null;
1711
+ if (branch && !SAFE_GIT_REF_PATTERN.test(branch)) {
1425
1712
  log.warn(TAG6, `Extracted branch name contains unsafe characters: ${branch}`);
1426
1713
  return null;
1427
1714
  }
@@ -1429,6 +1716,7 @@ function extractBranchFromDescription(description) {
1429
1716
  }
1430
1717
  var TAG6 = "review-worktree";
1431
1718
  var init_review_worktree = __esm(() => {
1719
+ init_dist();
1432
1720
  init_log();
1433
1721
  init_pm();
1434
1722
  init_worktree();
@@ -3375,6 +3663,22 @@ function buildFindingComments(findings) {
3375
3663
  bodies.push(current);
3376
3664
  return bodies;
3377
3665
  }
3666
+ function acceptanceSummaryLine(checks) {
3667
+ if (!checks || checks.length === 0)
3668
+ return "";
3669
+ const counts = { pass: 0, partial: 0, fail: 0, unverifiable: 0 };
3670
+ for (const c of checks)
3671
+ counts[c.status]++;
3672
+ const flagged = [];
3673
+ if (counts.fail)
3674
+ flagged.push(`${counts.fail} fail`);
3675
+ if (counts.partial)
3676
+ flagged.push(`${counts.partial} partial`);
3677
+ if (counts.unverifiable)
3678
+ flagged.push(`${counts.unverifiable} unverifiable`);
3679
+ const detail = flagged.length ? ` (${flagged.join(", ")})` : "";
3680
+ return `Acceptance: ${counts.pass}/${checks.length} pass${detail}`;
3681
+ }
3378
3682
  function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
3379
3683
  try {
3380
3684
  const size = statSync(path).size;
@@ -3388,14 +3692,32 @@ function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
3388
3692
  }
3389
3693
  }
3390
3694
  function extractResult(parsed) {
3391
- const verdict = parsed.verdict === "approved" || parsed.verdict === "rejected" ? parsed.verdict : "rejected";
3695
+ let verdict = parsed.verdict === "approved" || parsed.verdict === "rejected" ? parsed.verdict : "rejected";
3392
3696
  const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((f) => typeof f === "object" && f !== null && ("title" in f)).map((f) => ({
3393
3697
  severity: f.severity === "critical" ? "critical" : f.severity === "minor" ? "minor" : "major",
3394
3698
  title: String(f.title ?? "Untitled finding"),
3395
3699
  description: String(f.description ?? ""),
3396
3700
  category: f.category ? String(f.category) : undefined,
3397
- location: f.location ? String(f.location) : undefined
3701
+ location: f.location ? String(f.location) : undefined,
3702
+ relatedToDiff: f.relatedToDiff !== false
3398
3703
  })) : [];
3704
+ const acceptanceChecks = Array.isArray(parsed.acceptanceChecks) ? parsed.acceptanceChecks.filter((c) => typeof c === "object" && c !== null && ("criterion" in c)).map((c) => ({
3705
+ criterion: String(c.criterion ?? "Unnamed criterion"),
3706
+ status: ["pass", "partial", "fail", "unverifiable"].includes(c.status) ? c.status : "unverifiable",
3707
+ evidence: c.evidence ? String(c.evidence) : undefined
3708
+ })) : undefined;
3709
+ const unmet = (acceptanceChecks ?? []).filter((c) => c.status === "fail" || c.status === "partial");
3710
+ if (verdict === "approved" && unmet.length > 0) {
3711
+ verdict = "rejected";
3712
+ findings.unshift({
3713
+ severity: "major",
3714
+ title: `Unmet acceptance criteria (${unmet.length})`,
3715
+ description: unmet.map((c) => `- [${c.status}] ${c.criterion}${c.evidence ? ` — ${c.evidence}` : ""}`).join(`
3716
+ `),
3717
+ category: "scope",
3718
+ relatedToDiff: true
3719
+ });
3720
+ }
3399
3721
  const scopeCheck = parsed.scopeCheck && typeof parsed.scopeCheck === "object" && "status" in parsed.scopeCheck ? {
3400
3722
  status: ["clean", "drift", "missing"].includes(parsed.scopeCheck.status) ? parsed.scopeCheck.status : "clean",
3401
3723
  notes: parsed.scopeCheck.notes ? String(parsed.scopeCheck.notes) : undefined
@@ -3404,6 +3726,7 @@ function extractResult(parsed) {
3404
3726
  verdict,
3405
3727
  summary: String(parsed.summary ?? "").slice(0, 2000),
3406
3728
  scopeCheck,
3729
+ acceptanceChecks,
3407
3730
  findings
3408
3731
  };
3409
3732
  }
@@ -3577,6 +3900,7 @@ ${runLogTail}
3577
3900
  const body = [
3578
3901
  "**Review — approved.**",
3579
3902
  result.summary || "",
3903
+ acceptanceSummaryLine(result.acceptanceChecks),
3580
3904
  scopeLine,
3581
3905
  result.findings.length > 0 ? `${result.findings.length} minor finding(s) noted.` : "",
3582
3906
  prUrl ? `PR: ${prUrl}` : ""
@@ -3592,10 +3916,11 @@ ${runLogTail}
3592
3916
  });
3593
3917
  log.info(TAG17, `#${card.short_id} approved${prUrl ? ` — PR: ${prUrl}` : ""} — labeled "${config.review.approvedLabel}"`);
3594
3918
  } else {
3595
- const criticalFindings = result.findings.filter((f) => f.severity === "critical").slice(0, MAX_FINDINGS);
3596
- const majorFindings = result.findings.filter((f) => f.severity === "major").slice(0, MAX_FINDINGS);
3919
+ const reworkFindings = result.findings.filter((f) => f.relatedToDiff !== false);
3920
+ const criticalFindings = reworkFindings.filter((f) => f.severity === "critical").slice(0, MAX_FINDINGS);
3921
+ const majorFindings = reworkFindings.filter((f) => f.severity === "major").slice(0, MAX_FINDINGS);
3597
3922
  const linkedFindings = [...criticalFindings, ...majorFindings];
3598
- const minorFindings = result.findings.filter((f) => f.severity === "minor").slice(0, MAX_FINDINGS);
3923
+ const minorFindings = reworkFindings.filter((f) => f.severity === "minor").slice(0, MAX_FINDINGS);
3599
3924
  if (currentCycle >= maxCycles) {
3600
3925
  log.warn(TAG17, `#${card.short_id} reached max review cycles (${maxCycles}), moving to Done with note`);
3601
3926
  await moveCardToColumn(client, card, config.review.moveToColumn);
@@ -3663,6 +3988,7 @@ ${runLogTail}
3663
3988
  const body = [
3664
3989
  "**Review — rejected.**",
3665
3990
  result.summary || "",
3991
+ acceptanceSummaryLine(result.acceptanceChecks),
3666
3992
  scopeLine,
3667
3993
  `${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor finding(s).`
3668
3994
  ].filter(Boolean).join(`
@@ -3734,239 +4060,6 @@ var init_review_completion = __esm(() => {
3734
4060
  init_types();
3735
4061
  init_worktree();
3736
4062
  });
3737
- // ../harmony-shared/dist/cardLinks.js
3738
- var init_cardLinks = () => {};
3739
- // ../harmony-shared/dist/classification.js
3740
- function escalateTier(tier) {
3741
- const i = MODEL_TIERS.indexOf(tier);
3742
- return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
3743
- }
3744
- function isModelTier(v) {
3745
- return typeof v === "string" && MODEL_TIERS.includes(v);
3746
- }
3747
- var MODEL_TIERS;
3748
- var init_classification = __esm(() => {
3749
- MODEL_TIERS = ["simple", "advanced", "research"];
3750
- });
3751
-
3752
- // ../harmony-shared/dist/commentSerializer.js
3753
- function sanitizeHeaderField(value) {
3754
- return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
3755
- }
3756
- function authorLabel(c) {
3757
- if (c.author_type === "agent")
3758
- return "AI agent";
3759
- const raw = c.author?.full_name || "teammate";
3760
- return sanitizeHeaderField(raw);
3761
- }
3762
- function criticalIds(comments) {
3763
- const keep = new Set;
3764
- for (const c of comments) {
3765
- if (c.comment_type === "decision")
3766
- keep.add(c.id);
3767
- if (c.supersedes_id) {
3768
- keep.add(c.id);
3769
- keep.add(c.supersedes_id);
3770
- }
3771
- if (c.confirms_id) {
3772
- keep.add(c.id);
3773
- keep.add(c.confirms_id);
3774
- }
3775
- }
3776
- return keep;
3777
- }
3778
- function serializeCommentThread(comments, options = {}) {
3779
- const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
3780
- const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
3781
- if (visible.length === 0)
3782
- return "";
3783
- const indexById = new Map;
3784
- visible.forEach((c, i) => {
3785
- indexById.set(c.id, i + 1);
3786
- });
3787
- let rendered = visible;
3788
- let elidedCount = 0;
3789
- if (maxComments && visible.length > maxComments) {
3790
- const keep = criticalIds(visible);
3791
- const recentThreshold = visible.length - maxComments;
3792
- rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
3793
- elidedCount = visible.length - rendered.length;
3794
- }
3795
- const ref = (id) => {
3796
- const n = indexById.get(id);
3797
- return n ? `#${n}` : `#${id.slice(0, 8)}`;
3798
- };
3799
- const lines = [];
3800
- if (elidedCount > 0) {
3801
- lines.push({
3802
- at: visible[0]?.created_at ?? "",
3803
- text: `(${elidedCount} earlier comment(s) omitted for brevity)`
3804
- });
3805
- }
3806
- for (const c of rendered) {
3807
- const tags = [];
3808
- if (c.edited_at)
3809
- tags.push("edited");
3810
- if (c.supersedes_id)
3811
- tags.push(`supersedes ${ref(c.supersedes_id)}`);
3812
- if (c.confirms_id)
3813
- tags.push(`confirms ${ref(c.confirms_id)}`);
3814
- if (c.resolved_at)
3815
- tags.push("resolved");
3816
- const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
3817
- const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
3818
- const fencedBody = c.body.trim().replaceAll("<", "&lt;").replaceAll(">", "&gt;");
3819
- lines.push({
3820
- at: c.created_at,
3821
- text: `${header}
3822
- <comment-body>
3823
- ${fencedBody}
3824
- </comment-body>`
3825
- });
3826
- }
3827
- for (const a of activity) {
3828
- const actor = a.actor ? `${a.actor} ` : "";
3829
- lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
3830
- }
3831
- lines.sort((a, b) => a.at.localeCompare(b.at));
3832
- const body = lines.map((l) => l.text).join(`
3833
-
3834
- `);
3835
- const instruction = includeInstructions ? `
3836
-
3837
- ${CONFLICT_INSTRUCTION}` : "";
3838
- return `## ${heading} (oldest → newest)
3839
-
3840
- ${body}${instruction}`;
3841
- }
3842
- var CONFLICT_INSTRUCTION;
3843
- var init_commentSerializer = __esm(() => {
3844
- CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
3845
- });
3846
-
3847
- // ../harmony-shared/dist/constants.js
3848
- var TIMINGS;
3849
- var init_constants = __esm(() => {
3850
- TIMINGS = {
3851
- SEARCH_DEBOUNCE: 300,
3852
- AUTOSAVE_DEBOUNCE: 1000,
3853
- TOAST_DURATION: 3000,
3854
- QUERY_STALE_TIME: 1000 * 60 * 5,
3855
- QUERY_GC_TIME: 1000 * 60 * 60 * 24
3856
- };
3857
- });
3858
- // ../harmony-shared/dist/logger.js
3859
- var init_logger = () => {};
3860
- // ../harmony-shared/dist/projectTemplates.js
3861
- var init_projectTemplates = () => {};
3862
-
3863
- // ../harmony-shared/dist/reviewMethodology.js
3864
- var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
3865
- Report findings; do NOT fix them. This is a read-only review.
3866
-
3867
- ## Two-Pass Review
3868
-
3869
- ### Pass 1 — CRITICAL (highest severity)
3870
-
3871
- **SQL & Data Safety**
3872
- - String interpolation in SQL — use parameterized queries / prepared statements
3873
- - TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
3874
-
3875
- **Race Conditions & Concurrency**
3876
- - Read-check-write without uniqueness constraint or duplicate key handling
3877
- - Status transitions without atomic WHERE old_status UPDATE SET new_status
3878
- - Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
3879
-
3880
- **LLM Output Trust Boundary**
3881
- - LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
3882
- - Structured tool output accepted without type/shape checks before database writes
3883
-
3884
- **Enum & Value Completeness**
3885
- - When the diff introduces a new enum/status/type value, trace it through every consumer
3886
- - Check allowlists, filter arrays, and case/if-elsif chains for the new value
3887
- - Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
3888
-
3889
- ### Pass 2 — INFORMATIONAL (lower severity)
3890
-
3891
- **Conditional Side Effects**
3892
- - Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
3893
-
3894
- **Dead Code & Consistency**
3895
- - Variables assigned but never read
3896
- - Comments/docstrings describing old behavior after code changed
3897
-
3898
- **Test Gaps**
3899
- - Missing negative-path tests for new error handling
3900
- - Security enforcement features without integration tests
3901
-
3902
- **Completeness Gaps**
3903
- - Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
3904
-
3905
- **View/Frontend**
3906
- - O(n*m) lookups in views (Array.find in a loop instead of Map/index)
3907
- - Inline styles re-parsed every render
3908
-
3909
- ## Severity Classification
3910
-
3911
- - **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors
3912
- - **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects
3913
- - **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps
3914
-
3915
- ## Suppressions — DO NOT flag these
3916
-
3917
- - Redundancy that aids readability (e.g., present? redundant with length > 20)
3918
- - "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
3919
- - Consistency-only changes (wrapping a value to match how another constant is guarded)
3920
- - Regex edge cases when input is constrained and the edge case never occurs in practice
3921
- - Eval threshold changes — these are tuned empirically
3922
- - Harmless no-ops (e.g., .reject on an element never in the array)
3923
- - ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
3924
-
3925
- For each page affected by the changes:
3926
-
3927
- 1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
3928
- 2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
3929
- 3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
3930
- 4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
3931
- 5. **States** — Check empty state, loading state, error state, overflow state.
3932
- 6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
3933
- 7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
3934
-
3935
- ### SPA-Specific (React/Vite)
3936
- - Use snapshot for navigation — client-side routes may not appear in link lists.
3937
- - Check for stale state: navigate away and back — does data refresh correctly?
3938
- - Test browser back/forward — does the app handle history correctly?
3939
- - Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
3940
- "verdict": "approved" | "rejected",
3941
- "summary": "Brief overall assessment",
3942
- "scopeCheck": {
3943
- "status": "clean" | "drift" | "missing",
3944
- "notes": "Optional explanation of scope issues"
3945
- },
3946
- "findings": [
3947
- {
3948
- "severity": "critical" | "major" | "minor",
3949
- "category": "sql-safety | race-condition | llm-trust | enum-completeness | visual | functional | ux | console | scope | other",
3950
- "title": "Short title",
3951
- "description": "Detailed description of the issue",
3952
- "location": "file:line (if applicable)"
3953
- }
3954
- ]
3955
- }`, REVIEW_DECISION_RULES = `- **rejected**: Any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
3956
- - **approved**: No critical findings, at most 1 major finding with minor findings OK.`;
3957
- // ../harmony-shared/dist/types.js
3958
- var init_types2 = () => {};
3959
-
3960
- // ../harmony-shared/dist/index.js
3961
- var init_dist = __esm(() => {
3962
- init_cardLinks();
3963
- init_classification();
3964
- init_commentSerializer();
3965
- init_constants();
3966
- init_logger();
3967
- init_projectTemplates();
3968
- init_types2();
3969
- });
3970
4063
 
3971
4064
  // src/review-knowledge.ts
3972
4065
  var init_review_knowledge = __esm(() => {
@@ -3980,6 +4073,8 @@ You are thorough, specific, and cite file:line locations for every finding.
3980
4073
 
3981
4074
  ${REVIEW_SYSTEM_PROMPT}
3982
4075
 
4076
+ ${REVIEW_ACCEPTANCE_CHECKS}
4077
+
3983
4078
  ${QA_VISUAL_CHECKLIST}`;
3984
4079
  }
3985
4080
  function buildReviewUserPrompt(enriched, branchName, worktreePath, previewUrl, diffSummary, baseBranch) {
@@ -4014,19 +4109,22 @@ you have Read, Grep, Glob, and read-only Bash:
4014
4109
 
4015
4110
  Follow these steps in order:
4016
4111
 
4017
- ### Step 1: Scope Check
4018
- Compare the diff against the card description and subtasks above.
4019
- - Are all requirements from the description addressed?
4020
- - Are all subtasks implemented?
4021
- - Is there scope creep changes unrelated to the card requirements?
4022
- Flag any missing requirements or scope drift.
4112
+ ### Step 1: Acceptance Checks
4113
+ Per the Acceptance Checks methodology in your system instructions, derive one
4114
+ check per requirement in the description and one per subtask above, then assign
4115
+ each a status (pass / partial / fail / unverifiable) backed by evidence you read
4116
+ yourself never the agent's say-so or a checkbox. Emit these as
4117
+ \`acceptanceChecks\`. Separately, set \`scopeCheck\` to flag scope creep —
4118
+ changes unrelated to the card's requirements.
4023
4119
 
4024
- ### Step 2: Code Review (Two-Pass)
4025
- Apply the two-pass review from your system instructions:
4026
- - **Pass 1 (CRITICAL)**: SQL safety, race conditions, LLM trust boundary, enum completeness.
4027
- - **Pass 2 (INFORMATIONAL)**: Conditional side effects, dead code, test gaps, completeness gaps, view issues.
4120
+ ### Step 2: Code Review (Two-Pass, five lenses)
4121
+ Apply the two-pass review from your system instructions, looking through all
4122
+ five lenses (functionality, security, performance, code quality, best practices):
4123
+ - **Pass 1 (CRITICAL)**: SQL safety, race conditions, security/auth/injection, LLM trust boundary, enum completeness.
4124
+ - **Pass 2 (INFORMATIONAL)**: functionality/edge cases, performance, code quality, best practices/accessibility, test gaps, completeness gaps.
4028
4125
 
4029
4126
  For enum completeness checks, use Grep and Read to trace new values through consumers OUTSIDE the diff.
4127
+ Set \`relatedToDiff\` on every finding — only diff-caused findings gate the verdict.
4030
4128
 
4031
4129
  ### Step 3: Visual QA
4032
4130
  Use the \`/browse\` skill to navigate to ${previewUrl} and apply the visual QA checklist: