@gethmy/agent 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +349 -251
- package/dist/index.js +349 -251
- package/package.json +2 -2
package/dist/index.js
CHANGED
|
@@ -1157,6 +1157,294 @@ var TAG3 = "http";
|
|
|
1157
1157
|
var init_http_server = __esm(() => {
|
|
1158
1158
|
init_log();
|
|
1159
1159
|
});
|
|
1160
|
+
// ../harmony-shared/dist/branchRef.js
|
|
1161
|
+
var BRANCH_REF_PATTERN, SAFE_GIT_REF_PATTERN;
|
|
1162
|
+
var init_branchRef = __esm(() => {
|
|
1163
|
+
BRANCH_REF_PATTERN = /Branch:\s*`([^`]+)`/;
|
|
1164
|
+
SAFE_GIT_REF_PATTERN = /^[a-zA-Z0-9/_.-]+$/;
|
|
1165
|
+
});
|
|
1166
|
+
|
|
1167
|
+
// ../harmony-shared/dist/cardLinks.js
|
|
1168
|
+
var init_cardLinks = () => {};
|
|
1169
|
+
// ../harmony-shared/dist/classification.js
|
|
1170
|
+
function escalateTier(tier) {
|
|
1171
|
+
const i = MODEL_TIERS.indexOf(tier);
|
|
1172
|
+
return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
|
|
1173
|
+
}
|
|
1174
|
+
function isModelTier(v) {
|
|
1175
|
+
return typeof v === "string" && MODEL_TIERS.includes(v);
|
|
1176
|
+
}
|
|
1177
|
+
var MODEL_TIERS;
|
|
1178
|
+
var init_classification = __esm(() => {
|
|
1179
|
+
MODEL_TIERS = ["simple", "advanced", "research"];
|
|
1180
|
+
});
|
|
1181
|
+
|
|
1182
|
+
// ../harmony-shared/dist/commentSerializer.js
|
|
1183
|
+
function sanitizeHeaderField(value) {
|
|
1184
|
+
return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
|
|
1185
|
+
}
|
|
1186
|
+
function authorLabel(c) {
|
|
1187
|
+
if (c.author_type === "agent")
|
|
1188
|
+
return "AI agent";
|
|
1189
|
+
const raw = c.author?.full_name || "teammate";
|
|
1190
|
+
return sanitizeHeaderField(raw);
|
|
1191
|
+
}
|
|
1192
|
+
function criticalIds(comments) {
|
|
1193
|
+
const keep = new Set;
|
|
1194
|
+
for (const c of comments) {
|
|
1195
|
+
if (c.comment_type === "decision")
|
|
1196
|
+
keep.add(c.id);
|
|
1197
|
+
if (c.supersedes_id) {
|
|
1198
|
+
keep.add(c.id);
|
|
1199
|
+
keep.add(c.supersedes_id);
|
|
1200
|
+
}
|
|
1201
|
+
if (c.confirms_id) {
|
|
1202
|
+
keep.add(c.id);
|
|
1203
|
+
keep.add(c.confirms_id);
|
|
1204
|
+
}
|
|
1205
|
+
}
|
|
1206
|
+
return keep;
|
|
1207
|
+
}
|
|
1208
|
+
function serializeCommentThread(comments, options = {}) {
|
|
1209
|
+
const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
|
|
1210
|
+
const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
|
|
1211
|
+
if (visible.length === 0)
|
|
1212
|
+
return "";
|
|
1213
|
+
const indexById = new Map;
|
|
1214
|
+
visible.forEach((c, i) => {
|
|
1215
|
+
indexById.set(c.id, i + 1);
|
|
1216
|
+
});
|
|
1217
|
+
let rendered = visible;
|
|
1218
|
+
let elidedCount = 0;
|
|
1219
|
+
if (maxComments && visible.length > maxComments) {
|
|
1220
|
+
const keep = criticalIds(visible);
|
|
1221
|
+
const recentThreshold = visible.length - maxComments;
|
|
1222
|
+
rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
|
|
1223
|
+
elidedCount = visible.length - rendered.length;
|
|
1224
|
+
}
|
|
1225
|
+
const ref = (id) => {
|
|
1226
|
+
const n = indexById.get(id);
|
|
1227
|
+
return n ? `#${n}` : `#${id.slice(0, 8)}`;
|
|
1228
|
+
};
|
|
1229
|
+
const lines = [];
|
|
1230
|
+
if (elidedCount > 0) {
|
|
1231
|
+
lines.push({
|
|
1232
|
+
at: visible[0]?.created_at ?? "",
|
|
1233
|
+
text: `(${elidedCount} earlier comment(s) omitted for brevity)`
|
|
1234
|
+
});
|
|
1235
|
+
}
|
|
1236
|
+
for (const c of rendered) {
|
|
1237
|
+
const tags = [];
|
|
1238
|
+
if (c.edited_at)
|
|
1239
|
+
tags.push("edited");
|
|
1240
|
+
if (c.supersedes_id)
|
|
1241
|
+
tags.push(`supersedes ${ref(c.supersedes_id)}`);
|
|
1242
|
+
if (c.confirms_id)
|
|
1243
|
+
tags.push(`confirms ${ref(c.confirms_id)}`);
|
|
1244
|
+
if (c.resolved_at)
|
|
1245
|
+
tags.push("resolved");
|
|
1246
|
+
const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
|
|
1247
|
+
const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
|
|
1248
|
+
const fencedBody = c.body.trim().replaceAll("<", "<").replaceAll(">", ">");
|
|
1249
|
+
lines.push({
|
|
1250
|
+
at: c.created_at,
|
|
1251
|
+
text: `${header}
|
|
1252
|
+
<comment-body>
|
|
1253
|
+
${fencedBody}
|
|
1254
|
+
</comment-body>`
|
|
1255
|
+
});
|
|
1256
|
+
}
|
|
1257
|
+
for (const a of activity) {
|
|
1258
|
+
const actor = a.actor ? `${a.actor} ` : "";
|
|
1259
|
+
lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
|
|
1260
|
+
}
|
|
1261
|
+
lines.sort((a, b) => a.at.localeCompare(b.at));
|
|
1262
|
+
const body = lines.map((l) => l.text).join(`
|
|
1263
|
+
|
|
1264
|
+
`);
|
|
1265
|
+
const instruction = includeInstructions ? `
|
|
1266
|
+
|
|
1267
|
+
${CONFLICT_INSTRUCTION}` : "";
|
|
1268
|
+
return `## ${heading} (oldest → newest)
|
|
1269
|
+
|
|
1270
|
+
${body}${instruction}`;
|
|
1271
|
+
}
|
|
1272
|
+
var CONFLICT_INSTRUCTION;
|
|
1273
|
+
var init_commentSerializer = __esm(() => {
|
|
1274
|
+
CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
|
|
1275
|
+
});
|
|
1276
|
+
|
|
1277
|
+
// ../harmony-shared/dist/constants.js
|
|
1278
|
+
var TIMINGS;
|
|
1279
|
+
var init_constants = __esm(() => {
|
|
1280
|
+
TIMINGS = {
|
|
1281
|
+
SEARCH_DEBOUNCE: 300,
|
|
1282
|
+
AUTOSAVE_DEBOUNCE: 1000,
|
|
1283
|
+
TOAST_DURATION: 3000,
|
|
1284
|
+
QUERY_STALE_TIME: 1000 * 60 * 5,
|
|
1285
|
+
QUERY_GC_TIME: 1000 * 60 * 60 * 24
|
|
1286
|
+
};
|
|
1287
|
+
});
|
|
1288
|
+
// ../harmony-shared/dist/logger.js
|
|
1289
|
+
var init_logger = () => {};
|
|
1290
|
+
// ../harmony-shared/dist/projectTemplates.js
|
|
1291
|
+
var init_projectTemplates = () => {};
|
|
1292
|
+
|
|
1293
|
+
// ../harmony-shared/dist/reviewMethodology.js
|
|
1294
|
+
var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
|
|
1295
|
+
Report findings; do NOT fix them. This is a read-only review.
|
|
1296
|
+
|
|
1297
|
+
Review the diff through five lenses on every pass: functionality, security,
|
|
1298
|
+
performance, code quality, and best practices. For every finding, set
|
|
1299
|
+
\`relatedToDiff\`: true when the change under review introduced or exposed it,
|
|
1300
|
+
false when it is a pre-existing issue you happened to notice. Only diff-caused
|
|
1301
|
+
findings gate the verdict — pre-existing ones are reported for context and never
|
|
1302
|
+
block.
|
|
1303
|
+
|
|
1304
|
+
## Two-Pass Review
|
|
1305
|
+
|
|
1306
|
+
### Pass 1 — CRITICAL (highest severity)
|
|
1307
|
+
|
|
1308
|
+
**SQL & Data Safety**
|
|
1309
|
+
- String interpolation in SQL — use parameterized queries / prepared statements
|
|
1310
|
+
- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
|
|
1311
|
+
|
|
1312
|
+
**Race Conditions & Concurrency**
|
|
1313
|
+
- Read-check-write without uniqueness constraint or duplicate key handling
|
|
1314
|
+
- Status transitions without atomic WHERE old_status UPDATE SET new_status
|
|
1315
|
+
- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
|
|
1316
|
+
|
|
1317
|
+
**Security & Access Control**
|
|
1318
|
+
- Hardcoded secrets, API keys, or credentials committed to source
|
|
1319
|
+
- New endpoints, mutations, or service-role/RLS-exempt queries missing an auth or ownership check
|
|
1320
|
+
- Over-broad CORS, missing input validation on a trust boundary, injection beyond SQL (command, path, template)
|
|
1321
|
+
|
|
1322
|
+
**LLM Output Trust Boundary**
|
|
1323
|
+
- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
|
|
1324
|
+
- Structured tool output accepted without type/shape checks before database writes
|
|
1325
|
+
|
|
1326
|
+
**Enum & Value Completeness**
|
|
1327
|
+
- When the diff introduces a new enum/status/type value, trace it through every consumer
|
|
1328
|
+
- Check allowlists, filter arrays, and case/if-elsif chains for the new value
|
|
1329
|
+
- Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
|
|
1330
|
+
|
|
1331
|
+
### Pass 2 — INFORMATIONAL (lower severity)
|
|
1332
|
+
|
|
1333
|
+
**Functionality & Edge Cases**
|
|
1334
|
+
- Logic errors, off-by-one, unhandled null/undefined, wrong API or library usage
|
|
1335
|
+
- Conditional side effects: code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
|
|
1336
|
+
|
|
1337
|
+
**Performance**
|
|
1338
|
+
- O(n²) algorithms and O(n*m) lookups (Array.find in a loop instead of a Map/index)
|
|
1339
|
+
- N+1 queries, unbounded fetches missing pagination, repeated work that should be cached/memoized
|
|
1340
|
+
- Unnecessary React re-renders (unstable props/deps, inline object/array literals); leaked subscriptions, timers, or listeners
|
|
1341
|
+
- Inline styles re-parsed every render
|
|
1342
|
+
|
|
1343
|
+
**Code Quality**
|
|
1344
|
+
- Dead code: variables assigned but never read, unreachable branches
|
|
1345
|
+
- Duplication that should be extracted, over-long functions, unclear naming
|
|
1346
|
+
- \`any\` / unchecked casts that defeat the type system
|
|
1347
|
+
- Comments/docstrings describing old behavior after code changed
|
|
1348
|
+
|
|
1349
|
+
**Best Practices & Conventions**
|
|
1350
|
+
- Deviations from established project conventions and framework idioms / anti-patterns
|
|
1351
|
+
- React hook dependency arrays that are wrong, missing, or over-broad
|
|
1352
|
+
- Accessibility gaps on new UI: missing labels, roles, alt text, or keyboard paths
|
|
1353
|
+
|
|
1354
|
+
**Test Gaps**
|
|
1355
|
+
- Missing negative-path tests for new error handling
|
|
1356
|
+
- Security enforcement features without integration tests
|
|
1357
|
+
|
|
1358
|
+
**Completeness Gaps**
|
|
1359
|
+
- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
|
|
1360
|
+
|
|
1361
|
+
## Severity Classification
|
|
1362
|
+
|
|
1363
|
+
- **critical**: SQL safety, race conditions, XSS, secrets/auth/injection holes, LLM trust boundary violations, enum completeness gaps causing runtime errors
|
|
1364
|
+
- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects, performance regressions on a hot path
|
|
1365
|
+
- **minor**: Dead code, stale comments, test gaps, naming/duplication, minor view issues, cosmetic completeness gaps
|
|
1366
|
+
|
|
1367
|
+
## Suppressions — DO NOT flag these
|
|
1368
|
+
|
|
1369
|
+
- Redundancy that aids readability (e.g., present? redundant with length > 20)
|
|
1370
|
+
- "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
|
|
1371
|
+
- Consistency-only changes (wrapping a value to match how another constant is guarded)
|
|
1372
|
+
- Regex edge cases when input is constrained and the edge case never occurs in practice
|
|
1373
|
+
- Eval threshold changes — these are tuned empirically
|
|
1374
|
+
- Harmless no-ops (e.g., .reject on an element never in the array)
|
|
1375
|
+
- Pre-existing issues unrelated to the diff, beyond a single noted finding (set relatedToDiff:false; never block on them)
|
|
1376
|
+
- ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, REVIEW_ACCEPTANCE_CHECKS = `## Acceptance Checks
|
|
1377
|
+
|
|
1378
|
+
Before judging code quality, verify the change actually satisfies the card.
|
|
1379
|
+
Derive one acceptance check per concrete requirement in the card description and
|
|
1380
|
+
one per subtask (the stated acceptance criteria). For each, assign a status from
|
|
1381
|
+
hard evidence — cite the file:line you read or the dev-server behaviour you
|
|
1382
|
+
observed that proves it:
|
|
1383
|
+
|
|
1384
|
+
- **pass** — implemented and verified by code you read or behaviour you observed
|
|
1385
|
+
- **partial** — started but incomplete (a missing branch, an edge case, or one of several bundled requirements)
|
|
1386
|
+
- **fail** — required but absent, or implemented incorrectly
|
|
1387
|
+
- **unverifiable** — cannot be confirmed from the diff or a running app (state why)
|
|
1388
|
+
|
|
1389
|
+
Do NOT mark a check "pass" on the implementing agent's say-so or a subtask's
|
|
1390
|
+
checkbox alone — only on evidence you found yourself. Any \`fail\` or \`partial\`
|
|
1391
|
+
check is an unaddressed requirement and forces a rejected verdict.`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
|
|
1392
|
+
|
|
1393
|
+
For each page affected by the changes:
|
|
1394
|
+
|
|
1395
|
+
1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
|
|
1396
|
+
2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
|
|
1397
|
+
3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
|
|
1398
|
+
4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
|
|
1399
|
+
5. **States** — Check empty state, loading state, error state, overflow state.
|
|
1400
|
+
6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
|
|
1401
|
+
7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
|
|
1402
|
+
|
|
1403
|
+
### SPA-Specific (React/Vite)
|
|
1404
|
+
- Use snapshot for navigation — client-side routes may not appear in link lists.
|
|
1405
|
+
- Check for stale state: navigate away and back — does data refresh correctly?
|
|
1406
|
+
- Test browser back/forward — does the app handle history correctly?
|
|
1407
|
+
- Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
|
|
1408
|
+
"verdict": "approved" | "rejected",
|
|
1409
|
+
"summary": "Brief overall assessment",
|
|
1410
|
+
"scopeCheck": {
|
|
1411
|
+
"status": "clean" | "drift" | "missing",
|
|
1412
|
+
"notes": "Optional explanation of scope issues"
|
|
1413
|
+
},
|
|
1414
|
+
"acceptanceChecks": [
|
|
1415
|
+
{
|
|
1416
|
+
"criterion": "The requirement or subtask being verified",
|
|
1417
|
+
"status": "pass" | "partial" | "fail" | "unverifiable",
|
|
1418
|
+
"evidence": "file:line or observed behaviour that proves the status"
|
|
1419
|
+
}
|
|
1420
|
+
],
|
|
1421
|
+
"findings": [
|
|
1422
|
+
{
|
|
1423
|
+
"severity": "critical" | "major" | "minor",
|
|
1424
|
+
"category": "sql-safety | race-condition | security | llm-trust | enum-completeness | functional | performance | code-quality | best-practices | accessibility | visual | ux | console | scope | other",
|
|
1425
|
+
"title": "Short title",
|
|
1426
|
+
"description": "Detailed description of the issue",
|
|
1427
|
+
"location": "file:line (if applicable)",
|
|
1428
|
+
"relatedToDiff": true
|
|
1429
|
+
}
|
|
1430
|
+
]
|
|
1431
|
+
}`, REVIEW_DECISION_RULES = `Counting only findings with \`relatedToDiff: true\`:
|
|
1432
|
+
- **rejected**: Any acceptance check that is \`fail\` or \`partial\`, any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
|
|
1433
|
+
- **approved**: Every acceptance check \`pass\` (or \`unverifiable\` with a stated reason), no critical findings, at most 1 major finding; minor findings OK.`;
|
|
1434
|
+
// ../harmony-shared/dist/types.js
|
|
1435
|
+
var init_types2 = () => {};
|
|
1436
|
+
|
|
1437
|
+
// ../harmony-shared/dist/index.js
|
|
1438
|
+
var init_dist = __esm(() => {
|
|
1439
|
+
init_branchRef();
|
|
1440
|
+
init_cardLinks();
|
|
1441
|
+
init_classification();
|
|
1442
|
+
init_commentSerializer();
|
|
1443
|
+
init_constants();
|
|
1444
|
+
init_logger();
|
|
1445
|
+
init_projectTemplates();
|
|
1446
|
+
init_types2();
|
|
1447
|
+
});
|
|
1160
1448
|
|
|
1161
1449
|
// src/pm.ts
|
|
1162
1450
|
import { execFileSync as execFileSync2 } from "node:child_process";
|
|
@@ -1418,9 +1706,8 @@ function checkoutExistingBranch(basePath, branchName) {
|
|
|
1418
1706
|
function extractBranchFromDescription(description) {
|
|
1419
1707
|
if (!description)
|
|
1420
1708
|
return null;
|
|
1421
|
-
const
|
|
1422
|
-
|
|
1423
|
-
if (branch && !/^[a-zA-Z0-9/_.-]+$/.test(branch)) {
|
|
1709
|
+
const branch = description.match(BRANCH_REF_PATTERN)?.[1] ?? null;
|
|
1710
|
+
if (branch && !SAFE_GIT_REF_PATTERN.test(branch)) {
|
|
1424
1711
|
log.warn(TAG6, `Extracted branch name contains unsafe characters: ${branch}`);
|
|
1425
1712
|
return null;
|
|
1426
1713
|
}
|
|
@@ -1428,6 +1715,7 @@ function extractBranchFromDescription(description) {
|
|
|
1428
1715
|
}
|
|
1429
1716
|
var TAG6 = "review-worktree";
|
|
1430
1717
|
var init_review_worktree = __esm(() => {
|
|
1718
|
+
init_dist();
|
|
1431
1719
|
init_log();
|
|
1432
1720
|
init_pm();
|
|
1433
1721
|
init_worktree();
|
|
@@ -3374,6 +3662,22 @@ function buildFindingComments(findings) {
|
|
|
3374
3662
|
bodies.push(current);
|
|
3375
3663
|
return bodies;
|
|
3376
3664
|
}
|
|
3665
|
+
function acceptanceSummaryLine(checks) {
|
|
3666
|
+
if (!checks || checks.length === 0)
|
|
3667
|
+
return "";
|
|
3668
|
+
const counts = { pass: 0, partial: 0, fail: 0, unverifiable: 0 };
|
|
3669
|
+
for (const c of checks)
|
|
3670
|
+
counts[c.status]++;
|
|
3671
|
+
const flagged = [];
|
|
3672
|
+
if (counts.fail)
|
|
3673
|
+
flagged.push(`${counts.fail} fail`);
|
|
3674
|
+
if (counts.partial)
|
|
3675
|
+
flagged.push(`${counts.partial} partial`);
|
|
3676
|
+
if (counts.unverifiable)
|
|
3677
|
+
flagged.push(`${counts.unverifiable} unverifiable`);
|
|
3678
|
+
const detail = flagged.length ? ` (${flagged.join(", ")})` : "";
|
|
3679
|
+
return `Acceptance: ${counts.pass}/${checks.length} pass${detail}`;
|
|
3680
|
+
}
|
|
3377
3681
|
function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
|
|
3378
3682
|
try {
|
|
3379
3683
|
const size = statSync(path).size;
|
|
@@ -3387,14 +3691,32 @@ function tailRunLog(path, bytes = RUN_LOG_TAIL_BYTES) {
|
|
|
3387
3691
|
}
|
|
3388
3692
|
}
|
|
3389
3693
|
function extractResult(parsed) {
|
|
3390
|
-
|
|
3694
|
+
let verdict = parsed.verdict === "approved" || parsed.verdict === "rejected" ? parsed.verdict : "rejected";
|
|
3391
3695
|
const findings = Array.isArray(parsed.findings) ? parsed.findings.filter((f) => typeof f === "object" && f !== null && ("title" in f)).map((f) => ({
|
|
3392
3696
|
severity: f.severity === "critical" ? "critical" : f.severity === "minor" ? "minor" : "major",
|
|
3393
3697
|
title: String(f.title ?? "Untitled finding"),
|
|
3394
3698
|
description: String(f.description ?? ""),
|
|
3395
3699
|
category: f.category ? String(f.category) : undefined,
|
|
3396
|
-
location: f.location ? String(f.location) : undefined
|
|
3700
|
+
location: f.location ? String(f.location) : undefined,
|
|
3701
|
+
relatedToDiff: f.relatedToDiff !== false
|
|
3397
3702
|
})) : [];
|
|
3703
|
+
const acceptanceChecks = Array.isArray(parsed.acceptanceChecks) ? parsed.acceptanceChecks.filter((c) => typeof c === "object" && c !== null && ("criterion" in c)).map((c) => ({
|
|
3704
|
+
criterion: String(c.criterion ?? "Unnamed criterion"),
|
|
3705
|
+
status: ["pass", "partial", "fail", "unverifiable"].includes(c.status) ? c.status : "unverifiable",
|
|
3706
|
+
evidence: c.evidence ? String(c.evidence) : undefined
|
|
3707
|
+
})) : undefined;
|
|
3708
|
+
const unmet = (acceptanceChecks ?? []).filter((c) => c.status === "fail" || c.status === "partial");
|
|
3709
|
+
if (verdict === "approved" && unmet.length > 0) {
|
|
3710
|
+
verdict = "rejected";
|
|
3711
|
+
findings.unshift({
|
|
3712
|
+
severity: "major",
|
|
3713
|
+
title: `Unmet acceptance criteria (${unmet.length})`,
|
|
3714
|
+
description: unmet.map((c) => `- [${c.status}] ${c.criterion}${c.evidence ? ` — ${c.evidence}` : ""}`).join(`
|
|
3715
|
+
`),
|
|
3716
|
+
category: "scope",
|
|
3717
|
+
relatedToDiff: true
|
|
3718
|
+
});
|
|
3719
|
+
}
|
|
3398
3720
|
const scopeCheck = parsed.scopeCheck && typeof parsed.scopeCheck === "object" && "status" in parsed.scopeCheck ? {
|
|
3399
3721
|
status: ["clean", "drift", "missing"].includes(parsed.scopeCheck.status) ? parsed.scopeCheck.status : "clean",
|
|
3400
3722
|
notes: parsed.scopeCheck.notes ? String(parsed.scopeCheck.notes) : undefined
|
|
@@ -3403,6 +3725,7 @@ function extractResult(parsed) {
|
|
|
3403
3725
|
verdict,
|
|
3404
3726
|
summary: String(parsed.summary ?? "").slice(0, 2000),
|
|
3405
3727
|
scopeCheck,
|
|
3728
|
+
acceptanceChecks,
|
|
3406
3729
|
findings
|
|
3407
3730
|
};
|
|
3408
3731
|
}
|
|
@@ -3576,6 +3899,7 @@ ${runLogTail}
|
|
|
3576
3899
|
const body = [
|
|
3577
3900
|
"**Review — approved.**",
|
|
3578
3901
|
result.summary || "",
|
|
3902
|
+
acceptanceSummaryLine(result.acceptanceChecks),
|
|
3579
3903
|
scopeLine,
|
|
3580
3904
|
result.findings.length > 0 ? `${result.findings.length} minor finding(s) noted.` : "",
|
|
3581
3905
|
prUrl ? `PR: ${prUrl}` : ""
|
|
@@ -3591,10 +3915,11 @@ ${runLogTail}
|
|
|
3591
3915
|
});
|
|
3592
3916
|
log.info(TAG17, `#${card.short_id} approved${prUrl ? ` — PR: ${prUrl}` : ""} — labeled "${config.review.approvedLabel}"`);
|
|
3593
3917
|
} else {
|
|
3594
|
-
const
|
|
3595
|
-
const
|
|
3918
|
+
const reworkFindings = result.findings.filter((f) => f.relatedToDiff !== false);
|
|
3919
|
+
const criticalFindings = reworkFindings.filter((f) => f.severity === "critical").slice(0, MAX_FINDINGS);
|
|
3920
|
+
const majorFindings = reworkFindings.filter((f) => f.severity === "major").slice(0, MAX_FINDINGS);
|
|
3596
3921
|
const linkedFindings = [...criticalFindings, ...majorFindings];
|
|
3597
|
-
const minorFindings =
|
|
3922
|
+
const minorFindings = reworkFindings.filter((f) => f.severity === "minor").slice(0, MAX_FINDINGS);
|
|
3598
3923
|
if (currentCycle >= maxCycles) {
|
|
3599
3924
|
log.warn(TAG17, `#${card.short_id} reached max review cycles (${maxCycles}), moving to Done with note`);
|
|
3600
3925
|
await moveCardToColumn(client, card, config.review.moveToColumn);
|
|
@@ -3662,6 +3987,7 @@ ${runLogTail}
|
|
|
3662
3987
|
const body = [
|
|
3663
3988
|
"**Review — rejected.**",
|
|
3664
3989
|
result.summary || "",
|
|
3990
|
+
acceptanceSummaryLine(result.acceptanceChecks),
|
|
3665
3991
|
scopeLine,
|
|
3666
3992
|
`${criticalFindings.length} critical, ${majorFindings.length} major, ${minorFindings.length} minor finding(s).`
|
|
3667
3993
|
].filter(Boolean).join(`
|
|
@@ -3733,239 +4059,6 @@ var init_review_completion = __esm(() => {
|
|
|
3733
4059
|
init_types();
|
|
3734
4060
|
init_worktree();
|
|
3735
4061
|
});
|
|
3736
|
-
// ../harmony-shared/dist/cardLinks.js
|
|
3737
|
-
var init_cardLinks = () => {};
|
|
3738
|
-
// ../harmony-shared/dist/classification.js
|
|
3739
|
-
function escalateTier(tier) {
|
|
3740
|
-
const i = MODEL_TIERS.indexOf(tier);
|
|
3741
|
-
return MODEL_TIERS[Math.min(i + 1, MODEL_TIERS.length - 1)];
|
|
3742
|
-
}
|
|
3743
|
-
function isModelTier(v) {
|
|
3744
|
-
return typeof v === "string" && MODEL_TIERS.includes(v);
|
|
3745
|
-
}
|
|
3746
|
-
var MODEL_TIERS;
|
|
3747
|
-
var init_classification = __esm(() => {
|
|
3748
|
-
MODEL_TIERS = ["simple", "advanced", "research"];
|
|
3749
|
-
});
|
|
3750
|
-
|
|
3751
|
-
// ../harmony-shared/dist/commentSerializer.js
|
|
3752
|
-
function sanitizeHeaderField(value) {
|
|
3753
|
-
return value.replace(/[\]\r\n|<>]/g, " ").trim() || "—";
|
|
3754
|
-
}
|
|
3755
|
-
function authorLabel(c) {
|
|
3756
|
-
if (c.author_type === "agent")
|
|
3757
|
-
return "AI agent";
|
|
3758
|
-
const raw = c.author?.full_name || "teammate";
|
|
3759
|
-
return sanitizeHeaderField(raw);
|
|
3760
|
-
}
|
|
3761
|
-
function criticalIds(comments) {
|
|
3762
|
-
const keep = new Set;
|
|
3763
|
-
for (const c of comments) {
|
|
3764
|
-
if (c.comment_type === "decision")
|
|
3765
|
-
keep.add(c.id);
|
|
3766
|
-
if (c.supersedes_id) {
|
|
3767
|
-
keep.add(c.id);
|
|
3768
|
-
keep.add(c.supersedes_id);
|
|
3769
|
-
}
|
|
3770
|
-
if (c.confirms_id) {
|
|
3771
|
-
keep.add(c.id);
|
|
3772
|
-
keep.add(c.confirms_id);
|
|
3773
|
-
}
|
|
3774
|
-
}
|
|
3775
|
-
return keep;
|
|
3776
|
-
}
|
|
3777
|
-
function serializeCommentThread(comments, options = {}) {
|
|
3778
|
-
const { heading = "Conversation", includeInstructions = true, activity = [], maxComments } = options;
|
|
3779
|
-
const visible = comments.filter((c) => !c.deleted_at).slice().sort((a, b) => a.created_at.localeCompare(b.created_at));
|
|
3780
|
-
if (visible.length === 0)
|
|
3781
|
-
return "";
|
|
3782
|
-
const indexById = new Map;
|
|
3783
|
-
visible.forEach((c, i) => {
|
|
3784
|
-
indexById.set(c.id, i + 1);
|
|
3785
|
-
});
|
|
3786
|
-
let rendered = visible;
|
|
3787
|
-
let elidedCount = 0;
|
|
3788
|
-
if (maxComments && visible.length > maxComments) {
|
|
3789
|
-
const keep = criticalIds(visible);
|
|
3790
|
-
const recentThreshold = visible.length - maxComments;
|
|
3791
|
-
rendered = visible.filter((c, i) => i >= recentThreshold || keep.has(c.id));
|
|
3792
|
-
elidedCount = visible.length - rendered.length;
|
|
3793
|
-
}
|
|
3794
|
-
const ref = (id) => {
|
|
3795
|
-
const n = indexById.get(id);
|
|
3796
|
-
return n ? `#${n}` : `#${id.slice(0, 8)}`;
|
|
3797
|
-
};
|
|
3798
|
-
const lines = [];
|
|
3799
|
-
if (elidedCount > 0) {
|
|
3800
|
-
lines.push({
|
|
3801
|
-
at: visible[0]?.created_at ?? "",
|
|
3802
|
-
text: `(${elidedCount} earlier comment(s) omitted for brevity)`
|
|
3803
|
-
});
|
|
3804
|
-
}
|
|
3805
|
-
for (const c of rendered) {
|
|
3806
|
-
const tags = [];
|
|
3807
|
-
if (c.edited_at)
|
|
3808
|
-
tags.push("edited");
|
|
3809
|
-
if (c.supersedes_id)
|
|
3810
|
-
tags.push(`supersedes ${ref(c.supersedes_id)}`);
|
|
3811
|
-
if (c.confirms_id)
|
|
3812
|
-
tags.push(`confirms ${ref(c.confirms_id)}`);
|
|
3813
|
-
if (c.resolved_at)
|
|
3814
|
-
tags.push("resolved");
|
|
3815
|
-
const tagStr = tags.length ? ` | ${tags.join(" | ")}` : "";
|
|
3816
|
-
const header = `[${sanitizeHeaderField(ref(c.id))} | ${sanitizeHeaderField(c.author_type)} | ${authorLabel(c)} | ${sanitizeHeaderField(c.comment_type)} | ${sanitizeHeaderField(c.created_at)}${tagStr}]`;
|
|
3817
|
-
const fencedBody = c.body.trim().replaceAll("<", "<").replaceAll(">", ">");
|
|
3818
|
-
lines.push({
|
|
3819
|
-
at: c.created_at,
|
|
3820
|
-
text: `${header}
|
|
3821
|
-
<comment-body>
|
|
3822
|
-
${fencedBody}
|
|
3823
|
-
</comment-body>`
|
|
3824
|
-
});
|
|
3825
|
-
}
|
|
3826
|
-
for (const a of activity) {
|
|
3827
|
-
const actor = a.actor ? `${a.actor} ` : "";
|
|
3828
|
-
lines.push({ at: a.at, text: `· (system) ${a.at} — ${actor}${a.text}` });
|
|
3829
|
-
}
|
|
3830
|
-
lines.sort((a, b) => a.at.localeCompare(b.at));
|
|
3831
|
-
const body = lines.map((l) => l.text).join(`
|
|
3832
|
-
|
|
3833
|
-
`);
|
|
3834
|
-
const instruction = includeInstructions ? `
|
|
3835
|
-
|
|
3836
|
-
${CONFLICT_INSTRUCTION}` : "";
|
|
3837
|
-
return `## ${heading} (oldest → newest)
|
|
3838
|
-
|
|
3839
|
-
${body}${instruction}`;
|
|
3840
|
-
}
|
|
3841
|
-
var CONFLICT_INSTRUCTION;
|
|
3842
|
-
var init_commentSerializer = __esm(() => {
|
|
3843
|
-
CONFLICT_INSTRUCTION = "When two comments conflict, prefer the latest created_at, UNLESS a later " + "comment explicitly confirms or restates the earlier finding. Evaluate " + "substance, not just recency. Cite the comment id(s) you relied on.";
|
|
3844
|
-
});
|
|
3845
|
-
|
|
3846
|
-
// ../harmony-shared/dist/constants.js
|
|
3847
|
-
var TIMINGS;
|
|
3848
|
-
var init_constants = __esm(() => {
|
|
3849
|
-
TIMINGS = {
|
|
3850
|
-
SEARCH_DEBOUNCE: 300,
|
|
3851
|
-
AUTOSAVE_DEBOUNCE: 1000,
|
|
3852
|
-
TOAST_DURATION: 3000,
|
|
3853
|
-
QUERY_STALE_TIME: 1000 * 60 * 5,
|
|
3854
|
-
QUERY_GC_TIME: 1000 * 60 * 60 * 24
|
|
3855
|
-
};
|
|
3856
|
-
});
|
|
3857
|
-
// ../harmony-shared/dist/logger.js
|
|
3858
|
-
var init_logger = () => {};
|
|
3859
|
-
// ../harmony-shared/dist/projectTemplates.js
|
|
3860
|
-
var init_projectTemplates = () => {};
|
|
3861
|
-
|
|
3862
|
-
// ../harmony-shared/dist/reviewMethodology.js
|
|
3863
|
-
var REVIEW_SYSTEM_PROMPT = `You are a senior code reviewer. Follow this two-pass methodology strictly.
|
|
3864
|
-
Report findings; do NOT fix them. This is a read-only review.
|
|
3865
|
-
|
|
3866
|
-
## Two-Pass Review
|
|
3867
|
-
|
|
3868
|
-
### Pass 1 — CRITICAL (highest severity)
|
|
3869
|
-
|
|
3870
|
-
**SQL & Data Safety**
|
|
3871
|
-
- String interpolation in SQL — use parameterized queries / prepared statements
|
|
3872
|
-
- TOCTOU races: check-then-set patterns that should be atomic WHERE + UPDATE
|
|
3873
|
-
|
|
3874
|
-
**Race Conditions & Concurrency**
|
|
3875
|
-
- Read-check-write without uniqueness constraint or duplicate key handling
|
|
3876
|
-
- Status transitions without atomic WHERE old_status UPDATE SET new_status
|
|
3877
|
-
- Unsafe HTML rendering (dangerouslySetInnerHTML, v-html) on user-controlled data (XSS)
|
|
3878
|
-
|
|
3879
|
-
**LLM Output Trust Boundary**
|
|
3880
|
-
- LLM-generated values written to DB without format validation (EMAIL_REGEXP, URI.parse, .trim())
|
|
3881
|
-
- Structured tool output accepted without type/shape checks before database writes
|
|
3882
|
-
|
|
3883
|
-
**Enum & Value Completeness**
|
|
3884
|
-
- When the diff introduces a new enum/status/type value, trace it through every consumer
|
|
3885
|
-
- Check allowlists, filter arrays, and case/if-elsif chains for the new value
|
|
3886
|
-
- Use Grep to find all references to sibling values and Read each match — look OUTSIDE the diff
|
|
3887
|
-
|
|
3888
|
-
### Pass 2 — INFORMATIONAL (lower severity)
|
|
3889
|
-
|
|
3890
|
-
**Conditional Side Effects**
|
|
3891
|
-
- Code paths that branch but forget a side effect on one branch (e.g., promoting without attaching URL)
|
|
3892
|
-
|
|
3893
|
-
**Dead Code & Consistency**
|
|
3894
|
-
- Variables assigned but never read
|
|
3895
|
-
- Comments/docstrings describing old behavior after code changed
|
|
3896
|
-
|
|
3897
|
-
**Test Gaps**
|
|
3898
|
-
- Missing negative-path tests for new error handling
|
|
3899
|
-
- Security enforcement features without integration tests
|
|
3900
|
-
|
|
3901
|
-
**Completeness Gaps**
|
|
3902
|
-
- Partial enum handling, incomplete error paths, missing edge cases that are straightforward to add
|
|
3903
|
-
|
|
3904
|
-
**View/Frontend**
|
|
3905
|
-
- O(n*m) lookups in views (Array.find in a loop instead of Map/index)
|
|
3906
|
-
- Inline styles re-parsed every render
|
|
3907
|
-
|
|
3908
|
-
## Severity Classification
|
|
3909
|
-
|
|
3910
|
-
- **critical**: SQL safety, race conditions, XSS, LLM trust boundary violations, enum completeness gaps causing runtime errors
|
|
3911
|
-
- **major**: Missing requirements, broken functionality, significant completeness gaps, conditional side effects
|
|
3912
|
-
- **minor**: Dead code, stale comments, test gaps, minor view issues, cosmetic completeness gaps
|
|
3913
|
-
|
|
3914
|
-
## Suppressions — DO NOT flag these
|
|
3915
|
-
|
|
3916
|
-
- Redundancy that aids readability (e.g., present? redundant with length > 20)
|
|
3917
|
-
- "Add a comment explaining why this threshold was chosen" — thresholds change, comments rot
|
|
3918
|
-
- Consistency-only changes (wrapping a value to match how another constant is guarded)
|
|
3919
|
-
- Regex edge cases when input is constrained and the edge case never occurs in practice
|
|
3920
|
-
- Eval threshold changes — these are tuned empirically
|
|
3921
|
-
- Harmless no-ops (e.g., .reject on an element never in the array)
|
|
3922
|
-
- ANYTHING already addressed in the diff you are reviewing — read the FULL diff before flagging`, QA_VISUAL_CHECKLIST = `## Visual QA Checklist
|
|
3923
|
-
|
|
3924
|
-
For each page affected by the changes:
|
|
3925
|
-
|
|
3926
|
-
1. **Visual scan** — Screenshot the page. Check for layout breaks, broken images, alignment issues, z-index problems.
|
|
3927
|
-
2. **Interactive elements** — Click every button, link, and control. Does each do what it says?
|
|
3928
|
-
3. **Forms** — Fill and submit. Test empty submission, invalid data, edge cases.
|
|
3929
|
-
4. **Navigation** — Check all paths in/out. Breadcrumbs, back button, deep links.
|
|
3930
|
-
5. **States** — Check empty state, loading state, error state, overflow state.
|
|
3931
|
-
6. **Console** — Check for JS exceptions, failed network requests (4xx/5xx), CORS errors after interactions.
|
|
3932
|
-
7. **Responsiveness** — If the change is visual, check mobile viewport (375px).
|
|
3933
|
-
|
|
3934
|
-
### SPA-Specific (React/Vite)
|
|
3935
|
-
- Use snapshot for navigation — client-side routes may not appear in link lists.
|
|
3936
|
-
- Check for stale state: navigate away and back — does data refresh correctly?
|
|
3937
|
-
- Test browser back/forward — does the app handle history correctly?
|
|
3938
|
-
- Watch for hydration errors or layout shifts after dynamic content loads.`, REVIEW_VERDICT_SCHEMA = `{
|
|
3939
|
-
"verdict": "approved" | "rejected",
|
|
3940
|
-
"summary": "Brief overall assessment",
|
|
3941
|
-
"scopeCheck": {
|
|
3942
|
-
"status": "clean" | "drift" | "missing",
|
|
3943
|
-
"notes": "Optional explanation of scope issues"
|
|
3944
|
-
},
|
|
3945
|
-
"findings": [
|
|
3946
|
-
{
|
|
3947
|
-
"severity": "critical" | "major" | "minor",
|
|
3948
|
-
"category": "sql-safety | race-condition | llm-trust | enum-completeness | visual | functional | ux | console | scope | other",
|
|
3949
|
-
"title": "Short title",
|
|
3950
|
-
"description": "Detailed description of the issue",
|
|
3951
|
-
"location": "file:line (if applicable)"
|
|
3952
|
-
}
|
|
3953
|
-
]
|
|
3954
|
-
}`, REVIEW_DECISION_RULES = `- **rejected**: Any \`critical\` finding, unaddressed requirements, or 2+ \`major\` findings.
|
|
3955
|
-
- **approved**: No critical findings, at most 1 major finding with minor findings OK.`;
|
|
3956
|
-
// ../harmony-shared/dist/types.js
|
|
3957
|
-
var init_types2 = () => {};
|
|
3958
|
-
|
|
3959
|
-
// ../harmony-shared/dist/index.js
|
|
3960
|
-
var init_dist = __esm(() => {
|
|
3961
|
-
init_cardLinks();
|
|
3962
|
-
init_classification();
|
|
3963
|
-
init_commentSerializer();
|
|
3964
|
-
init_constants();
|
|
3965
|
-
init_logger();
|
|
3966
|
-
init_projectTemplates();
|
|
3967
|
-
init_types2();
|
|
3968
|
-
});
|
|
3969
4062
|
|
|
3970
4063
|
// src/review-knowledge.ts
|
|
3971
4064
|
var init_review_knowledge = __esm(() => {
|
|
@@ -3979,6 +4072,8 @@ You are thorough, specific, and cite file:line locations for every finding.
|
|
|
3979
4072
|
|
|
3980
4073
|
${REVIEW_SYSTEM_PROMPT}
|
|
3981
4074
|
|
|
4075
|
+
${REVIEW_ACCEPTANCE_CHECKS}
|
|
4076
|
+
|
|
3982
4077
|
${QA_VISUAL_CHECKLIST}`;
|
|
3983
4078
|
}
|
|
3984
4079
|
function buildReviewUserPrompt(enriched, branchName, worktreePath, previewUrl, diffSummary, baseBranch) {
|
|
@@ -4013,19 +4108,22 @@ you have Read, Grep, Glob, and read-only Bash:
|
|
|
4013
4108
|
|
|
4014
4109
|
Follow these steps in order:
|
|
4015
4110
|
|
|
4016
|
-
### Step 1:
|
|
4017
|
-
|
|
4018
|
-
|
|
4019
|
-
|
|
4020
|
-
|
|
4021
|
-
|
|
4111
|
+
### Step 1: Acceptance Checks
|
|
4112
|
+
Per the Acceptance Checks methodology in your system instructions, derive one
|
|
4113
|
+
check per requirement in the description and one per subtask above, then assign
|
|
4114
|
+
each a status (pass / partial / fail / unverifiable) backed by evidence you read
|
|
4115
|
+
yourself — never the agent's say-so or a checkbox. Emit these as
|
|
4116
|
+
\`acceptanceChecks\`. Separately, set \`scopeCheck\` to flag scope creep —
|
|
4117
|
+
changes unrelated to the card's requirements.
|
|
4022
4118
|
|
|
4023
|
-
### Step 2: Code Review (Two-Pass)
|
|
4024
|
-
Apply the two-pass review from your system instructions
|
|
4025
|
-
|
|
4026
|
-
- **Pass
|
|
4119
|
+
### Step 2: Code Review (Two-Pass, five lenses)
|
|
4120
|
+
Apply the two-pass review from your system instructions, looking through all
|
|
4121
|
+
five lenses (functionality, security, performance, code quality, best practices):
|
|
4122
|
+
- **Pass 1 (CRITICAL)**: SQL safety, race conditions, security/auth/injection, LLM trust boundary, enum completeness.
|
|
4123
|
+
- **Pass 2 (INFORMATIONAL)**: functionality/edge cases, performance, code quality, best practices/accessibility, test gaps, completeness gaps.
|
|
4027
4124
|
|
|
4028
4125
|
For enum completeness checks, use Grep and Read to trace new values through consumers OUTSIDE the diff.
|
|
4126
|
+
Set \`relatedToDiff\` on every finding — only diff-caused findings gate the verdict.
|
|
4029
4127
|
|
|
4030
4128
|
### Step 3: Visual QA
|
|
4031
4129
|
Use the \`/browse\` skill to navigate to ${previewUrl} and apply the visual QA checklist:
|