@gethmy/agent 1.12.0 → 1.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/dist/cli.js +349 -251
  2. package/dist/index.js +349 -251
  3. package/package.json +2 -2
package/dist/index.js CHANGED
@@ -1157,6 +1157,294 @@ var TAG3 = "http";
1157
1157
  var init_http_server = __esm(() => {
1158
1158
  init_log();
1159
1159
  });
1160
+ // ../harmony-shared/dist/branchRef.js
1161
+ var BRANCH_REF_PATTERN, SAFE_GIT_REF_PATTERN;
1162
+ var init_branchRef = __esm(() => {
1163
+ BRANCH_REF_PATTERN = /Branch:\s*`([^`]+)`/;
1164
+ SAFE_GIT_REF_PATTERN = /^[a-zA-Z0-9/_.-]+$/;
1165
+ });
1166
+
1167
+ // ../harmony-shared/dist/cardLinks.js
1168
+ var init_cardLinks = () => {};
1169
+ // ../harmony-shared/dist/classification.js
1170
+ function escalateTier(tier) {
1171
+ const i = MODEL_TIERS.indexOf(tier);
1172
+ return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
1173
+ }
1174
+ function isModelTier(v) {
1175
+ return typeof v === "string" && MODEL_TIERS.includes(v);
1176
+ }
1177
+ var MODEL_TIERS;
1178
+ var init_classification = __esm(() => {
1179
+ MODEL_TIERS = ["simple", "advanced", "research"];
1180
+ });
1181
+
1182
+ // ../harmony-shared/dist/commentSerializer.js
1183
+ function sanitizeHeaderField(value) {
1184
+ return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
1185
+ }
1186
+ function authorLabel(c) {
1187
+ if (c.author_type === "agent")
1188
+ return "AI agent";
1189
+ const raw = c.author?.full_name || "teammate";
1190
+ return sanitizeHeaderField(raw);
1191
+ }
1192
+ function criticalIds(comments) {
1193
+ const keep = new Set;
1194
+ for (const c of comments) {
1195
+ if (c.comment_type === "decision")
1196
+ keep.add(c.id);
1197
+ if (c.supersedes_id) {
1198
+ keep.add(c.id);
1199
+ keep.add(c.supersedes_id);
1200
+ }
1201
+ if (c.confirms_id) {
1202
+ keep.add(c.id);
1203
+ keep.add(c.confirms_id);
1204
+ }
1205
+ }
1206
+ return keep;
1207
+ }
1208
+ function serializeCommentThread(comments, options = {}) {
1209
+ const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
1210
+ const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
1211
+ if (visible.length === 0)
1212
+ return "";
1213
+ const indexById = new Map;
1214
+ visible.forEach((c, i) => {
1215
+ indexById.set(c.id, i + 1);
1216
+ });
1217
+ let rendered = visible;
1218
+ let elidedCount = 0;
1219
+ if (maxComments && visible.length > maxComments) {
1220
+ const keep = criticalIds(visible);
1221
+ const recentThreshold = visible.length - maxComments;
1222
+ rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
1223
+ elidedCount = visible.length - rendered.length;
1224
+ }
1225
+ const ref = (id) => {
1226
+ const n = indexById.get(id);
1227
+ return n ? `#${n}` : `#${id.slice(0, 8)}`;
1228
+ };
1229
+ const lines = [];
1230
+ if (elidedCount > 0) {
1231
+ lines.push({
1232
+ at: visible[0]?.created_at ?? "",
1233
+ text: `(${elidedCount} earlier comment(s) omitted for brevity)`
1234
+ });
1235
+ }
1236
+ for (const c of rendered) {
1237
+ const tags = [];
1238
+ if (c.edited_at)
1239
+ tags.push("edited");
1240
+ if (c.supersedes_id)
1241
+ tags.push(`supersedes ${ref(c.supersedes_id)}`);
1242
+ if (c.confirms_id)
1243
+ tags.push(`confirms ${ref(c.confirms_id)}`);
1244
+ if (c.resolved_at)
1245
+ tags.push("resolved");
1246
+ const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
1247
+ const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
1248
+ const fencedBody = c.body.trim().replaceAll("<", "&lt;").replaceAll(">", "&gt;");
1249
+ lines.push({
1250
+ at: c.created_at,
1251
+ text: `${header}
1252
+ <comment-body>
1253
+ ${fencedBody}
1254
+ </comment-body>`
1255
+ });
1256
+ }
1257
+ for (const a of activity) {
1258
+ const actor = a.actor ? `${a.actor} ` : "";
1259
+ lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
1260
+ }
1261
+ lines.sort((a, b) => a.at.localeCompare(b.at));
1262
+ const body = lines.map((l) => l.text).join(`
1263
+
1264
+ `);
1265
+ const instruction = includeInstructions ? `
1266
+
1267
+ ${CONFLICT_INSTRUCTION}` : "";
1268
+ return `## ${heading} (oldest → newest)
1269
+
1270
+ ${body}${instruction}`;
1271
+ }
1272
+ var CONFLICT_INSTRUCTION;
1273
+ var init_commentSerializer = __esm(() => {
1274
+ CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
1275
+ });
1276
+
1277
+ // ../harmony-shared/dist/constants.js
1278
+ var TIMINGS;
1279
+ var init_constants = __esm(() => {
1280
+ TIMINGS = {
1281
+ SEARCH_DEBOUNCE: 300,
1282
+ AUTOSAVE_DEBOUNCE: 1000,
1283
+ TOAST_DURATION: 3000,
1284
+ QUERY_STALE_TIME: 1000 * 60 * 5,
1285
+ QUERY_GC_TIME: 1000 * 60 * 60 * 24
1286
+ };
1287
+ });
1288
+ // ../harmony-shared/dist/logger.js
1289
+ var init_logger = () => {};
1290
+ // ../harmony-shared/dist/projectTemplates.js
1291
+ var init_projectTemplates = () => {};
1292
+
1293
+ // ../harmony-shared/dist/reviewMethodology.js
1294
+ var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
1295
+ Report findings; do NOT fix them. This is a read-only review.
1296
+
1297
+ Review the diff through five lenses on every pass: functionality, security,
1298
+ performance, code quality, and best practices. For every finding, set
1299
+ \`relatedToDiff\`: true when the change under review introduced or exposed it,
1300
+ false when it is a pre-existing issue you happened to notice. Only diff-caused
1301
+ findings gate the verdict — pre-existing ones are reported for context and never
1302
+ block.
1303
+
1304
+ ## Two-Pass Review
1305
+
1306
+ ### Pass 1 — CRITICAL (highest severity)
1307
+
1308
+ **SQL & Data Safety**
1309
+ - String interpolation in SQL — use parameterized queries / prepared statements
1310
+ - TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
1311
+
1312
+ **Race Conditions & Concurrency**
1313
+ - Read-check-write without uniqueness constraint or duplicate key handling
1314
+ - Status transitions without atomic WHERE old_status UPDATE SET new_status
1315
+ - Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
1316
+
1317
+ **Security & Access Control**
1318
+ - Hardcoded secrets, API keys, or credentials committed to source
1319
+ - New endpoints, mutations, or service-role/RLS-exempt queries missing an auth or ownership check
1320
+ - Over-broad CORS, missing input validation on a trust boundary, injection beyond SQL (command, path, template)
1321
+
1322
+ **LLM Output Trust Boundary**
1323
+ - LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
1324
+ - Structured tool output accepted without type/shape checks before database writes
1325
+
1326
+ **Enum & Value Completeness**
1327
+ - When the diff introduces a new enum/status/type value, trace it through every consumer
1328
+ - Check allowlists, filter arrays, and case/if-elsif chains for the new value
1329
+ - Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
1330
+
1331
+ ### Pass 2 — INFORMATIONAL (lower severity)
1332
+
1333
+ **Functionality & Edge Cases**
1334
+ - Logic errors, off-by-one, unhandled null/undefined, wrong API or library usage
1335
+ - Conditional side effects: code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
1336
+
1337
+ **Performance**
1338
+ - O(n²) algorithms and O(n*m) lookups (Array.find in a loop instead of a Map/index)
1339
+ - N+1 queries, unbounded fetches missing pagination, repeated work that should be cached/memoized
1340
+ - Unnecessary React re-renders (unstable props/deps, inline object/array literals); leaked subscriptions, timers, or listeners
1341
+ - Inline styles re-parsed every render
1342
+
1343
+ **Code Quality**
1344
+ - Dead code: variables assigned but never read, unreachable branches
1345
+ - Duplication that should be extracted, over-long functions, unclear naming
1346
+ - \`any\` / unchecked casts that defeat the type system
1347
+ - Comments/docstrings describing old behavior after code changed
1348
+
1349
+ **Best Practices & Conventions**
1350
+ - Deviations from established project conventions and framework idioms / anti-patterns
1351
+ - React hook dependency arrays that are wrong, missing, or over-broad
1352
+ - Accessibility gaps on new UI: missing labels, roles, alt text, or keyboard paths
1353
+
1354
+ **Test Gaps**
1355
+ - Missing negative-path tests for new error handling
1356
+ - Security enforcement features without integration tests
1357
+
1358
+ **Completeness Gaps**
1359
+ - Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
1360
+
1361
+ ## Severity Classification
1362
+
1363
+ - **critical**: SQL safety, race conditions, XSS, secrets/auth/injection holes, LLM trust boundary violations, enum completeness gaps causing runtime errors
1364
+ - **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects, performance regressions on a hot path
1365
+ - **minor**: Dead code, stale comments, test gaps, naming/duplication, minor view issues, cosmetic completeness gaps
1366
+
1367
+ ## Suppressions — DO NOT flag these
1368
+
1369
+ - Redundancy that aids readability (e.g., present? redundant with length > 20)
1370
+ - "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
1371
+ - Consistency-only changes (wrapping a value to match how another constant is guarded)
1372
+ - Regex edge cases when input is constrained and the edge case never occurs in practice
1373
+ - Eval threshold changes — these are tuned empirically
1374
+ - Harmless no-ops (e.g., .reject on an element never in the array)
1375
+ - Pre-existing issues unrelated to the diff, beyond a single noted finding (set relatedToDiff:false; never block on them)
1376
+ - ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, REVIEW_ACCEPTANCE_CHECKS = `## Acceptance Checks
1377
+
1378
+ Before judging code quality, verify the change actually satisfies the card.
1379
+ Derive one acceptance check per concrete requirement in the card description and
1380
+ one per subtask (the stated acceptance criteria). For each, assign a status from
1381
+ hard evidence — cite the file:line you read or the dev-server behaviour you
1382
+ observed that proves it:
1383
+
1384
+ - **pass** — implemented and verified by code you read or behaviour you observed
1385
+ - **partial** — started but incomplete (a missing branch, an edge case, or one of several bundled requirements)
1386
+ - **fail** — required but absent, or implemented incorrectly
1387
+ - **unverifiable** — cannot be confirmed from the diff or a running app (state why)
1388
+
1389
+ Do NOT mark a check "pass" on the implementing agent's say-so or a subtask's
1390
+ checkbox alone — only on evidence you found yourself. Any \`fail\` or \`partial\`
1391
+ check is an unaddressed requirement and forces a rejected verdict.`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
1392
+
1393
+ For each page affected by the changes:
1394
+
1395
+ 1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
1396
+ 2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
1397
+ 3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
1398
+ 4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
1399
+ 5. **States** — Check empty state, loading state, error state, overflow state.
1400
+ 6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
1401
+ 7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
1402
+
1403
+ ### SPA-Specific (React/Vite)
1404
+ - Use snapshot for navigation — client-side routes may not appear in link lists.
1405
+ - Check for stale state: navigate away and back — does data refresh correctly?
1406
+ - Test browser back/forward — does the app handle history correctly?
1407
+ - Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
1408
+ "verdict": "approved" | "rejected",
1409
+ "summary": "Brief overall assessment",
1410
+ "scopeCheck": {
1411
+ "status": "clean" | "drift" | "missing",
1412
+ "notes": "Optional explanation of scope issues"
1413
+ },
1414
+ "acceptanceChecks": [
1415
+ {
1416
+ "criterion": "The requirement or subtask being verified",
1417
+ "status": "pass" | "partial" | "fail" | "unverifiable",
1418
+ "evidence": "file:line or observed behaviour that proves the status"
1419
+ }
1420
+ ],
1421
+ "findings": [
1422
+ {
1423
+ "severity": "critical" | "major" | "minor",
1424
+ "category": "sql-safety | race-condition | security | llm-trust | enum-completeness | functional | performance | code-quality | best-practices | accessibility | visual | ux | console | scope | other",
1425
+ "title": "Short title",
1426
+ "description": "Detailed description of the issue",
1427
+ "location": "file:line (if applicable)",
1428
+ "relatedToDiff": true
1429
+ }
1430
+ ]
1431
+ }`, REVIEW_DECISION_RULES = `Counting only findings with \`relatedToDiff: true\`:
1432
+ - **rejected**: Any acceptance check that is \`fail\` or \`partial\`, any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
1433
+ - **approved**: Every acceptance check \`pass\` (or \`unverifiable\` with a stated reason), no critical findings, at most 1 major finding; minor findings OK.`;
1434
+ // ../harmony-shared/dist/types.js
1435
+ var init_types2 = () => {};
1436
+
1437
+ // ../harmony-shared/dist/index.js
1438
+ var init_dist = __esm(() => {
1439
+ init_branchRef();
1440
+ init_cardLinks();
1441
+ init_classification();
1442
+ init_commentSerializer();
1443
+ init_constants();
1444
+ init_logger();
1445
+ init_projectTemplates();
1446
+ init_types2();
1447
+ });
1160
1448
 
1161
1449
  // src/pm.ts
1162
1450
  import { execFileSync as execFileSync2 } from "node:child_process";
@@ -1418,9 +1706,8 @@ function checkoutExistingBranch(basePath, branchName) {
1418
1706
  function extractBranchFromDescription(description) {
1419
1707
  if (!description)
1420
1708
  return null;
1421
- const match = description.match(/Branch:\s*`([^`]+)`/);
1422
- const branch = match?.[1] ?? null;
1423
- if (branch && !/^[a-zA-Z0-9/_.-]+$/.test(branch)) {
1709
+ const branch = description.match(BRANCH_REF_PATTERN)?.[1] ?? null;
1710
+ if (branch && !SAFE_GIT_REF_PATTERN.test(branch)) {
1424
1711
  log.warn(TAG6, `Extracted branch name contains unsafe characters: ${branch}`);
1425
1712
  return null;
1426
1713
  }
@@ -1428,6 +1715,7 @@ function extractBranchFromDescription(description) {
1428
1715
  }
1429
1716
  var TAG6 = "review-worktree";
1430
1717
  var init_review_worktree = __esm(() => {
1718
+ init_dist();
1431
1719
  init_log();
1432
1720
  init_pm();
1433
1721
  init_worktree();
@@ -3374,6 +3662,22 @@ function buildFindingComments(findings) {
3374
3662
  bodies.push(current);
3375
3663
  return bodies;
3376
3664
  }
3665
+ function acceptanceSummaryLine(checks) {
3666
+ if (!checks || checks.length === 0)
3667
+ return "";
3668
+ const counts = { pass: 0, partial: 0, fail: 0, unverifiable: 0 };
3669
+ for (const c of checks)
3670
+ counts[c.status]++;
3671
+ const flagged = [];
3672
+ if (counts.fail)
3673
+ flagged.push(`${counts.fail} fail`);
3674
+ if (counts.partial)
3675
+ flagged.push(`${counts.partial} partial`);
3676
+ if (counts.unverifiable)
3677
+ flagged.push(`${counts.unverifiable} unverifiable`);
3678
+ const detail = flagged.length ? ` (${flagged.join(", ")})` : "";
3679
+ return `Acceptance: ${counts.pass}/${checks.length} pass${detail}`;
3680
+ }
3377
3681
  function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
3378
3682
  try {
3379
3683
  const size = statSync(path).size;
@@ -3387,14 +3691,32 @@ function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
3387
3691
  }
3388
3692
  }
3389
3693
  function extractResult(parsed) {
3390
- const verdict = parsed.verdict === "approved" || parsed.verdict === "rejected" ? parsed.verdict : "rejected";
3694
+ let verdict = parsed.verdict === "approved" || parsed.verdict === "rejected" ? parsed.verdict : "rejected";
3391
3695
  const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((f) => typeof f === "object" && f !== null && ("title" in f)).map((f) => ({
3392
3696
  severity: f.severity === "critical" ? "critical" : f.severity === "minor" ? "minor" : "major",
3393
3697
  title: String(f.title ?? "Untitled finding"),
3394
3698
  description: String(f.description ?? ""),
3395
3699
  category: f.category ? String(f.category) : undefined,
3396
- location: f.location ? String(f.location) : undefined
3700
+ location: f.location ? String(f.location) : undefined,
3701
+ relatedToDiff: f.relatedToDiff !== false
3397
3702
  })) : [];
3703
+ const acceptanceChecks = Array.isArray(parsed.acceptanceChecks) ? parsed.acceptanceChecks.filter((c) => typeof c === "object" && c !== null && ("criterion" in c)).map((c) => ({
3704
+ criterion: String(c.criterion ?? "Unnamed criterion"),
3705
+ status: ["pass", "partial", "fail", "unverifiable"].includes(c.status) ? c.status : "unverifiable",
3706
+ evidence: c.evidence ? String(c.evidence) : undefined
3707
+ })) : undefined;
3708
+ const unmet = (acceptanceChecks ?? []).filter((c) => c.status === "fail" || c.status === "partial");
3709
+ if (verdict === "approved" && unmet.length > 0) {
3710
+ verdict = "rejected";
3711
+ findings.unshift({
3712
+ severity: "major",
3713
+ title: `Unmet acceptance criteria (${unmet.length})`,
3714
+ description: unmet.map((c) => `- [${c.status}] ${c.criterion}${c.evidence ? ` — ${c.evidence}` : ""}`).join(`
3715
+ `),
3716
+ category: "scope",
3717
+ relatedToDiff: true
3718
+ });
3719
+ }
3398
3720
  const scopeCheck = parsed.scopeCheck && typeof parsed.scopeCheck === "object" && "status" in parsed.scopeCheck ? {
3399
3721
  status: ["clean", "drift", "missing"].includes(parsed.scopeCheck.status) ? parsed.scopeCheck.status : "clean",
3400
3722
  notes: parsed.scopeCheck.notes ? String(parsed.scopeCheck.notes) : undefined
@@ -3403,6 +3725,7 @@ function extractResult(parsed) {
3403
3725
  verdict,
3404
3726
  summary: String(parsed.summary ?? "").slice(0, 2000),
3405
3727
  scopeCheck,
3728
+ acceptanceChecks,
3406
3729
  findings
3407
3730
  };
3408
3731
  }
@@ -3576,6 +3899,7 @@ ${runLogTail}
3576
3899
  const body = [
3577
3900
  "**Review — approved.**",
3578
3901
  result.summary || "",
3902
+ acceptanceSummaryLine(result.acceptanceChecks),
3579
3903
  scopeLine,
3580
3904
  result.findings.length > 0 ? `${result.findings.length} minor finding(s) noted.` : "",
3581
3905
  prUrl ? `PR: ${prUrl}` : ""
@@ -3591,10 +3915,11 @@ ${runLogTail}
3591
3915
  });
3592
3916
  log.info(TAG17, `#${card.short_id} approved${prUrl ? ` — PR: ${prUrl}` : ""} — labeled "${config.review.approvedLabel}"`);
3593
3917
  } else {
3594
- const criticalFindings = result.findings.filter((f) => f.severity === "critical").slice(0, MAX_FINDINGS);
3595
- const majorFindings = result.findings.filter((f) => f.severity === "major").slice(0, MAX_FINDINGS);
3918
+ const reworkFindings = result.findings.filter((f) => f.relatedToDiff !== false);
3919
+ const criticalFindings = reworkFindings.filter((f) => f.severity === "critical").slice(0, MAX_FINDINGS);
3920
+ const majorFindings = reworkFindings.filter((f) => f.severity === "major").slice(0, MAX_FINDINGS);
3596
3921
  const linkedFindings = [...criticalFindings, ...majorFindings];
3597
- const minorFindings = result.findings.filter((f) => f.severity === "minor").slice(0, MAX_FINDINGS);
3922
+ const minorFindings = reworkFindings.filter((f) => f.severity === "minor").slice(0, MAX_FINDINGS);
3598
3923
  if (currentCycle >= maxCycles) {
3599
3924
  log.warn(TAG17, `#${card.short_id} reached max review cycles (${maxCycles}), moving to Done with note`);
3600
3925
  await moveCardToColumn(client, card, config.review.moveToColumn);
@@ -3662,6 +3987,7 @@ ${runLogTail}
3662
3987
  const body = [
3663
3988
  "**Review — rejected.**",
3664
3989
  result.summary || "",
3990
+ acceptanceSummaryLine(result.acceptanceChecks),
3665
3991
  scopeLine,
3666
3992
  `${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor finding(s).`
3667
3993
  ].filter(Boolean).join(`
@@ -3733,239 +4059,6 @@ var init_review_completion = __esm(() => {
3733
4059
  init_types();
3734
4060
  init_worktree();
3735
4061
  });
3736
- // ../harmony-shared/dist/cardLinks.js
3737
- var init_cardLinks = () => {};
3738
- // ../harmony-shared/dist/classification.js
3739
- function escalateTier(tier) {
3740
- const i = MODEL_TIERS.indexOf(tier);
3741
- return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
3742
- }
3743
- function isModelTier(v) {
3744
- return typeof v === "string" && MODEL_TIERS.includes(v);
3745
- }
3746
- var MODEL_TIERS;
3747
- var init_classification = __esm(() => {
3748
- MODEL_TIERS = ["simple", "advanced", "research"];
3749
- });
3750
-
3751
- // ../harmony-shared/dist/commentSerializer.js
3752
- function sanitizeHeaderField(value) {
3753
- return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
3754
- }
3755
- function authorLabel(c) {
3756
- if (c.author_type === "agent")
3757
- return "AI agent";
3758
- const raw = c.author?.full_name || "teammate";
3759
- return sanitizeHeaderField(raw);
3760
- }
3761
- function criticalIds(comments) {
3762
- const keep = new Set;
3763
- for (const c of comments) {
3764
- if (c.comment_type === "decision")
3765
- keep.add(c.id);
3766
- if (c.supersedes_id) {
3767
- keep.add(c.id);
3768
- keep.add(c.supersedes_id);
3769
- }
3770
- if (c.confirms_id) {
3771
- keep.add(c.id);
3772
- keep.add(c.confirms_id);
3773
- }
3774
- }
3775
- return keep;
3776
- }
3777
- function serializeCommentThread(comments, options = {}) {
3778
- const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
3779
- const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
3780
- if (visible.length === 0)
3781
- return "";
3782
- const indexById = new Map;
3783
- visible.forEach((c, i) => {
3784
- indexById.set(c.id, i + 1);
3785
- });
3786
- let rendered = visible;
3787
- let elidedCount = 0;
3788
- if (maxComments && visible.length > maxComments) {
3789
- const keep = criticalIds(visible);
3790
- const recentThreshold = visible.length - maxComments;
3791
- rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
3792
- elidedCount = visible.length - rendered.length;
3793
- }
3794
- const ref = (id) => {
3795
- const n = indexById.get(id);
3796
- return n ? `#${n}` : `#${id.slice(0, 8)}`;
3797
- };
3798
- const lines = [];
3799
- if (elidedCount > 0) {
3800
- lines.push({
3801
- at: visible[0]?.created_at ?? "",
3802
- text: `(${elidedCount} earlier comment(s) omitted for brevity)`
3803
- });
3804
- }
3805
- for (const c of rendered) {
3806
- const tags = [];
3807
- if (c.edited_at)
3808
- tags.push("edited");
3809
- if (c.supersedes_id)
3810
- tags.push(`supersedes ${ref(c.supersedes_id)}`);
3811
- if (c.confirms_id)
3812
- tags.push(`confirms ${ref(c.confirms_id)}`);
3813
- if (c.resolved_at)
3814
- tags.push("resolved");
3815
- const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
3816
- const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
3817
- const fencedBody = c.body.trim().replaceAll("<", "&lt;").replaceAll(">", "&gt;");
3818
- lines.push({
3819
- at: c.created_at,
3820
- text: `${header}
3821
- <comment-body>
3822
- ${fencedBody}
3823
- </comment-body>`
3824
- });
3825
- }
3826
- for (const a of activity) {
3827
- const actor = a.actor ? `${a.actor} ` : "";
3828
- lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
3829
- }
3830
- lines.sort((a, b) => a.at.localeCompare(b.at));
3831
- const body = lines.map((l) => l.text).join(`
3832
-
3833
- `);
3834
- const instruction = includeInstructions ? `
3835
-
3836
- ${CONFLICT_INSTRUCTION}` : "";
3837
- return `## ${heading} (oldest → newest)
3838
-
3839
- ${body}${instruction}`;
3840
- }
3841
- var CONFLICT_INSTRUCTION;
3842
- var init_commentSerializer = __esm(() => {
3843
- CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
3844
- });
3845
-
3846
- // ../harmony-shared/dist/constants.js
3847
- var TIMINGS;
3848
- var init_constants = __esm(() => {
3849
- TIMINGS = {
3850
- SEARCH_DEBOUNCE: 300,
3851
- AUTOSAVE_DEBOUNCE: 1000,
3852
- TOAST_DURATION: 3000,
3853
- QUERY_STALE_TIME: 1000 * 60 * 5,
3854
- QUERY_GC_TIME: 1000 * 60 * 60 * 24
3855
- };
3856
- });
3857
- // ../harmony-shared/dist/logger.js
3858
- var init_logger = () => {};
3859
- // ../harmony-shared/dist/projectTemplates.js
3860
- var init_projectTemplates = () => {};
3861
-
3862
- // ../harmony-shared/dist/reviewMethodology.js
3863
- var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
3864
- Report findings; do NOT fix them. This is a read-only review.
3865
-
3866
- ## Two-Pass Review
3867
-
3868
- ### Pass 1 — CRITICAL (highest severity)
3869
-
3870
- **SQL & Data Safety**
3871
- - String interpolation in SQL — use parameterized queries / prepared statements
3872
- - TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
3873
-
3874
- **Race Conditions & Concurrency**
3875
- - Read-check-write without uniqueness constraint or duplicate key handling
3876
- - Status transitions without atomic WHERE old_status UPDATE SET new_status
3877
- - Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
3878
-
3879
- **LLM Output Trust Boundary**
3880
- - LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
3881
- - Structured tool output accepted without type/shape checks before database writes
3882
-
3883
- **Enum & Value Completeness**
3884
- - When the diff introduces a new enum/status/type value, trace it through every consumer
3885
- - Check allowlists, filter arrays, and case/if-elsif chains for the new value
3886
- - Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
3887
-
3888
- ### Pass 2 — INFORMATIONAL (lower severity)
3889
-
3890
- **Conditional Side Effects**
3891
- - Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
3892
-
3893
- **Dead Code & Consistency**
3894
- - Variables assigned but never read
3895
- - Comments/docstrings describing old behavior after code changed
3896
-
3897
- **Test Gaps**
3898
- - Missing negative-path tests for new error handling
3899
- - Security enforcement features without integration tests
3900
-
3901
- **Completeness Gaps**
3902
- - Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
3903
-
3904
- **View/Frontend**
3905
- - O(n*m) lookups in views (Array.find in a loop instead of Map/index)
3906
- - Inline styles re-parsed every render
3907
-
3908
- ## Severity Classification
3909
-
3910
- - **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors
3911
- - **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects
3912
- - **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps
3913
-
3914
- ## Suppressions — DO NOT flag these
3915
-
3916
- - Redundancy that aids readability (e.g., present? redundant with length > 20)
3917
- - "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
3918
- - Consistency-only changes (wrapping a value to match how another constant is guarded)
3919
- - Regex edge cases when input is constrained and the edge case never occurs in practice
3920
- - Eval threshold changes — these are tuned empirically
3921
- - Harmless no-ops (e.g., .reject on an element never in the array)
3922
- - ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
3923
-
3924
- For each page affected by the changes:
3925
-
3926
- 1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
3927
- 2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
3928
- 3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
3929
- 4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
3930
- 5. **States** — Check empty state, loading state, error state, overflow state.
3931
- 6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
3932
- 7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
3933
-
3934
- ### SPA-Specific (React/Vite)
3935
- - Use snapshot for navigation — client-side routes may not appear in link lists.
3936
- - Check for stale state: navigate away and back — does data refresh correctly?
3937
- - Test browser back/forward — does the app handle history correctly?
3938
- - Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
3939
- "verdict": "approved" | "rejected",
3940
- "summary": "Brief overall assessment",
3941
- "scopeCheck": {
3942
- "status": "clean" | "drift" | "missing",
3943
- "notes": "Optional explanation of scope issues"
3944
- },
3945
- "findings": [
3946
- {
3947
- "severity": "critical" | "major" | "minor",
3948
- "category": "sql-safety | race-condition | llm-trust | enum-completeness | visual | functional | ux | console | scope | other",
3949
- "title": "Short title",
3950
- "description": "Detailed description of the issue",
3951
- "location": "file:line (if applicable)"
3952
- }
3953
- ]
3954
- }`, REVIEW_DECISION_RULES = `- **rejected**: Any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
3955
- - **approved**: No critical findings, at most 1 major finding with minor findings OK.`;
3956
- // ../harmony-shared/dist/types.js
3957
- var init_types2 = () => {};
3958
-
3959
- // ../harmony-shared/dist/index.js
3960
- var init_dist = __esm(() => {
3961
- init_cardLinks();
3962
- init_classification();
3963
- init_commentSerializer();
3964
- init_constants();
3965
- init_logger();
3966
- init_projectTemplates();
3967
- init_types2();
3968
- });
3969
4062
 
3970
4063
  // src/review-knowledge.ts
3971
4064
  var init_review_knowledge = __esm(() => {
@@ -3979,6 +4072,8 @@ You are thorough, specific, and cite file:line locations for every finding.
3979
4072
 
3980
4073
  ${REVIEW_SYSTEM_PROMPT}
3981
4074
 
4075
+ ${REVIEW_ACCEPTANCE_CHECKS}
4076
+
3982
4077
  ${QA_VISUAL_CHECKLIST}`;
3983
4078
  }
3984
4079
  function buildReviewUserPrompt(enriched, branchName, worktreePath, previewUrl, diffSummary, baseBranch) {
@@ -4013,19 +4108,22 @@ you have Read, Grep, Glob, and read-only Bash:
4013
4108
 
4014
4109
  Follow these steps in order:
4015
4110
 
4016
- ### Step 1: Scope Check
4017
- Compare the diff against the card description and subtasks above.
4018
- - Are all requirements from the description addressed?
4019
- - Are all subtasks implemented?
4020
- - Is there scope creep changes unrelated to the card requirements?
4021
- Flag any missing requirements or scope drift.
4111
+ ### Step 1: Acceptance Checks
4112
+ Per the Acceptance Checks methodology in your system instructions, derive one
4113
+ check per requirement in the description and one per subtask above, then assign
4114
+ each a status (pass / partial / fail / unverifiable) backed by evidence you read
4115
+ yourself never the agent's say-so or a checkbox. Emit these as
4116
+ \`acceptanceChecks\`. Separately, set \`scopeCheck\` to flag scope creep —
4117
+ changes unrelated to the card's requirements.
4022
4118
 
4023
- ### Step 2: Code Review (Two-Pass)
4024
- Apply the two-pass review from your system instructions:
4025
- - **Pass 1 (CRITICAL)**: SQL safety, race conditions, LLM trust boundary, enum completeness.
4026
- - **Pass 2 (INFORMATIONAL)**: Conditional side effects, dead code, test gaps, completeness gaps, view issues.
4119
+ ### Step 2: Code Review (Two-Pass, five lenses)
4120
+ Apply the two-pass review from your system instructions, looking through all
4121
+ five lenses (functionality, security, performance, code quality, best practices):
4122
+ - **Pass 1 (CRITICAL)**: SQL safety, race conditions, security/auth/injection, LLM trust boundary, enum completeness.
4123
+ - **Pass 2 (INFORMATIONAL)**: functionality/edge cases, performance, code quality, best practices/accessibility, test gaps, completeness gaps.
4027
4124
 
4028
4125
  For enum completeness checks, use Grep and Read to trace new values through consumers OUTSIDE the diff.
4126
+ Set \`relatedToDiff\` on every finding — only diff-caused findings gate the verdict.
4029
4127
 
4030
4128
  ### Step 3: Visual QA
4031
4129
  Use the \`/browse\` skill to navigate to ${previewUrl} and apply the visual QA checklist: