kc-beta 0.7.3 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -4
- package/bin/kc-beta.js +20 -6
- package/package.json +3 -2
- package/src/agent/engine.js +493 -132
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +387 -17
- package/src/agent/pipelines/initializer.js +4 -1
- package/src/agent/pipelines/skill-authoring.js +30 -1
- package/src/agent/skill-loader.js +433 -111
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +127 -0
- package/src/agent/tools/copy-to-workspace.js +4 -3
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +378 -8
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/tools/workspace-file.js +7 -7
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +97 -1
- package/src/config.js +20 -3
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +16 -0
- package/template/AGENT.md +182 -7
- package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
- package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
- package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
- package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
- package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
- package/template/skills/en/skill-creator/SKILL.md +2 -1
- package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +58 -4
- package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +51 -6
- package/template/skills/phase_skills.yaml +112 -0
- package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +15 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +83 -0
- package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/zh/document-chunking/SKILL.md +40 -0
- package/template/skills/zh/document-parsing/SKILL.md +102 -0
- package/template/skills/zh/entity-extraction/SKILL.md +121 -0
- package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +10 -0
- package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +40 -0
- package/template/skills/zh/skill-creator/SKILL.md +205 -200
- package/template/skills/zh/skill-to-workflow/SKILL.md +243 -0
- package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/zh/tree-processing/SKILL.md +126 -0
- package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +49 -4
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
- package/template/CLAUDE.md +0 -150
- package/template/skills/en/meta/compliance-judgment/SKILL.md +0 -82
- package/template/skills/en/meta/document-chunking/SKILL.md +0 -32
- package/template/skills/en/meta/entity-extraction/SKILL.md +0 -120
- package/template/skills/zh/meta/document-parsing/SKILL.md +0 -101
- package/template/skills/zh/meta/tree-processing/SKILL.md +0 -121
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
- /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
- /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
|
@@ -85,13 +85,19 @@ export class ReleaseTool extends BaseTool {
|
|
|
85
85
|
return new ToolResult(`release template missing at ${TEMPLATE_DIR}`, true);
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
// 1
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
88
|
+
// v0.8.1 P9-C: defer the snapshot (git tag) until AFTER the bundle
|
|
89
|
+
// is written + verified. v0.8.0 ordered snapshot-first to "lock in
|
|
90
|
+
// commit + tag regardless of bundle outcome," but E2E #11 资管 v0.8
|
|
91
|
+
// audit found `release-v1` tags with no corresponding bundle dir —
|
|
92
|
+
// tag without bundle confuses downstream consumers. New order:
|
|
93
|
+
// 1. Build bundle (catalog read, copy template, write fixtures, manifest, README)
|
|
94
|
+
// 2. Verify bundle (manifest.json + README.md exist + non-empty)
|
|
95
|
+
// 3. ONLY THEN snapshot (creates the git tag) + back-fill manifest
|
|
96
|
+
// with snapshot tag/commit
|
|
97
|
+
// If verification fails, a `.failed_release` marker is written into
|
|
98
|
+
// the bundle dir and NO tag is created.
|
|
99
|
+
let snapshotTag = null;
|
|
100
|
+
let snapshotCommit = null;
|
|
95
101
|
|
|
96
102
|
// 2. Read catalog and filter
|
|
97
103
|
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
@@ -185,8 +191,23 @@ export class ReleaseTool extends BaseTool {
|
|
|
185
191
|
// file through and emitted a stub on miss. We try to populate from
|
|
186
192
|
// known QC artifact shapes here; if nothing matches, fall through
|
|
187
193
|
// to the existing stub fallback.
|
|
194
|
+
// v0.7.5 G-H3: aggregator now runs if calibSrc is MISSING **or** has
|
|
195
|
+
// empty `historical_accuracy`. v0.7.4 audit (both 贷款 + 资管) shipped
|
|
196
|
+
// empty stubs despite QC data on disk — root cause was the v0.7.2
|
|
197
|
+
// gate only checked file existence; a stub written earlier (e.g., on
|
|
198
|
+
// finalization phase entry) kept the aggregator from firing later.
|
|
188
199
|
const calibSrc = path.join(this._workspace.cwd, "confidence_calibration.json");
|
|
189
|
-
|
|
200
|
+
let shouldAggregate = !fs.existsSync(calibSrc);
|
|
201
|
+
if (!shouldAggregate) {
|
|
202
|
+
try {
|
|
203
|
+
const existing = JSON.parse(fs.readFileSync(calibSrc, "utf-8"));
|
|
204
|
+
const ha = existing?.historical_accuracy;
|
|
205
|
+
if (!ha || (typeof ha === "object" && Object.keys(ha).length === 0)) {
|
|
206
|
+
shouldAggregate = true;
|
|
207
|
+
}
|
|
208
|
+
} catch { shouldAggregate = true; } // corrupt → re-aggregate
|
|
209
|
+
}
|
|
210
|
+
if (shouldAggregate) {
|
|
190
211
|
const aggregated = this._aggregateAccuracyFromOutput();
|
|
191
212
|
if (aggregated && Object.keys(aggregated.historical_accuracy).length > 0) {
|
|
192
213
|
fs.writeFileSync(calibSrc, JSON.stringify(aggregated, null, 2) + "\n", "utf-8");
|
|
@@ -247,6 +268,14 @@ export class ReleaseTool extends BaseTool {
|
|
|
247
268
|
.replace(/\{RULES_LIST\}/g, rulesList);
|
|
248
269
|
fs.writeFileSync(path.join(bundleAbs, "README.md"), readme, "utf-8");
|
|
249
270
|
|
|
271
|
+
// v0.7.5 G-H4: sweep any leftover `.tmpl` files from the bundle dir.
|
|
272
|
+
// template/release/v1/ contains manifest.json.tmpl, catalog.json.tmpl,
|
|
273
|
+
// README.md.tmpl. _copyDir's exclude list (line 119) only filters
|
|
274
|
+
// README.md.tmpl; the other two ride along and persist alongside their
|
|
275
|
+
// populated counterparts. Audit (v0.7.4 贷款) confirmed this regression
|
|
276
|
+
// of v0.7.2 G1d which only handled the v1/ scaffold case.
|
|
277
|
+
this._sweepTmplFiles(bundleAbs);
|
|
278
|
+
|
|
250
279
|
// v0.7.2 1d: clean up the template scaffold dir if a customized
|
|
251
280
|
// release was just written alongside it. Both v0.7.1 audit runs
|
|
252
281
|
// shipped with `output/releases/v1/` (template-derived, .tmpl
|
|
@@ -271,6 +300,77 @@ export class ReleaseTool extends BaseTool {
|
|
|
271
300
|
}
|
|
272
301
|
}
|
|
273
302
|
|
|
303
|
+
// v0.8.1 P9-C: bundle verification + transactional snapshot.
|
|
304
|
+
// The manifest + README were written above. Verify they exist with
|
|
305
|
+
// substance (≥200 bytes README, valid JSON manifest with `slug` field).
|
|
306
|
+
// If verification fails, write `.failed_release` marker and skip
|
|
307
|
+
// the git-tag step — no tag-without-bundle.
|
|
308
|
+
const manifestPath = path.join(bundleAbs, "manifest.json");
|
|
309
|
+
const readmePath = path.join(bundleAbs, "README.md");
|
|
310
|
+
let verifyError = null;
|
|
311
|
+
try {
|
|
312
|
+
const mStat = fs.statSync(manifestPath);
|
|
313
|
+
const rStat = fs.statSync(readmePath);
|
|
314
|
+
if (!mStat.isFile() || mStat.size < 50) verifyError = "manifest.json missing or too small";
|
|
315
|
+
else if (!rStat.isFile() || rStat.size < 200) verifyError = "README.md missing or too small";
|
|
316
|
+
else {
|
|
317
|
+
const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
318
|
+
if (m.slug !== slug) verifyError = `manifest.slug=${m.slug} doesn't match expected ${slug}`;
|
|
319
|
+
}
|
|
320
|
+
} catch (e) {
|
|
321
|
+
verifyError = `bundle verification threw: ${e.message}`;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (verifyError) {
|
|
325
|
+
try {
|
|
326
|
+
fs.writeFileSync(
|
|
327
|
+
path.join(bundleAbs, ".failed_release"),
|
|
328
|
+
JSON.stringify({
|
|
329
|
+
failed_at: new Date().toISOString(),
|
|
330
|
+
reason: verifyError,
|
|
331
|
+
label,
|
|
332
|
+
slug,
|
|
333
|
+
}, null, 2),
|
|
334
|
+
);
|
|
335
|
+
} catch { /* best-effort */ }
|
|
336
|
+
return new ToolResult(
|
|
337
|
+
`Release bundle verification failed (${verifyError}). NO git tag created. ` +
|
|
338
|
+
`See .failed_release marker in ${bundleRel}/ for details. Fix the bundle issue and re-run.`,
|
|
339
|
+
true,
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Bundle verified. NOW snapshot — creates the durable git tag.
|
|
344
|
+
const snapResult = await this._snapshot.execute({
|
|
345
|
+
label: `release-${slug}`,
|
|
346
|
+
notes: `Release ${label} bundle source`,
|
|
347
|
+
});
|
|
348
|
+
if (snapResult.isError) {
|
|
349
|
+
// Bundle exists but tagging failed. Surface but don't roll back —
|
|
350
|
+
// the bundle is still usable; the user can manually tag later.
|
|
351
|
+
return new ToolResult(
|
|
352
|
+
`Release '${label}' bundled at ${bundleRel} but snapshot tag FAILED: ${snapResult.content}. ` +
|
|
353
|
+
`Bundle is valid; create the snapshot tag manually if needed.`,
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
const meta = this._readSnapshotMeta(`release-${slug}`);
|
|
357
|
+
snapshotTag = meta.tag;
|
|
358
|
+
snapshotCommit = meta.commit;
|
|
359
|
+
|
|
360
|
+
// Back-fill the manifest with the now-known snapshot tag/commit.
|
|
361
|
+
try {
|
|
362
|
+
const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
363
|
+
m.snapshot_tag = snapshotTag;
|
|
364
|
+
m.snapshot_commit = snapshotCommit;
|
|
365
|
+
fs.writeFileSync(manifestPath, JSON.stringify(m, null, 2) + "\n");
|
|
366
|
+
// Also back-fill the README's snapshot placeholders if still placeholder.
|
|
367
|
+
const readme = fs.readFileSync(readmePath, "utf-8");
|
|
368
|
+
const updated = readme
|
|
369
|
+
.replace(/\(no tag — git unavailable\)/g, snapshotTag || "")
|
|
370
|
+
.replace(/\(unknown\)/g, snapshotCommit || "(unknown)");
|
|
371
|
+
if (updated !== readme) fs.writeFileSync(readmePath, updated);
|
|
372
|
+
} catch { /* best-effort back-fill */ }
|
|
373
|
+
|
|
274
374
|
// Bundle dir is in output/ (gitignored). Snapshot manifest in snapshots/ IS tracked.
|
|
275
375
|
const lines = [
|
|
276
376
|
`Release '${label}' bundled at ${bundleRel}`,
|
|
@@ -303,6 +403,25 @@ export class ReleaseTool extends BaseTool {
|
|
|
303
403
|
}
|
|
304
404
|
}
|
|
305
405
|
|
|
406
|
+
/**
|
|
407
|
+
* v0.7.5 G-H4: recursively remove any `*.tmpl` files from a directory.
|
|
408
|
+
* Used after populating a release bundle to drop template stubs that
|
|
409
|
+
* weren't filtered by the initial copy's exclude list. Idempotent.
|
|
410
|
+
*/
|
|
411
|
+
_sweepTmplFiles(dir) {
|
|
412
|
+
try {
|
|
413
|
+
if (!fs.existsSync(dir) || !fs.statSync(dir).isDirectory()) return;
|
|
414
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
415
|
+
const entryPath = path.join(dir, entry.name);
|
|
416
|
+
if (entry.isDirectory()) {
|
|
417
|
+
this._sweepTmplFiles(entryPath);
|
|
418
|
+
} else if (entry.isFile() && entry.name.endsWith(".tmpl")) {
|
|
419
|
+
try { fs.unlinkSync(entryPath); } catch { /* best-effort */ }
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
} catch { /* best-effort */ }
|
|
423
|
+
}
|
|
424
|
+
|
|
306
425
|
_findLatestWorkflow(ruleId) {
|
|
307
426
|
// Canonical: workflows/<ruleId>/workflow_v#.py (subdirectory layout)
|
|
308
427
|
const wfDir = path.join(this._workspace.cwd, "workflows", ruleId);
|
|
@@ -338,10 +457,95 @@ export class ReleaseTool extends BaseTool {
|
|
|
338
457
|
}
|
|
339
458
|
} catch { /* manifest unreadable; skip */ }
|
|
340
459
|
}
|
|
460
|
+
|
|
461
|
+
// v0.7.5 G-H2: master / grouped workflow pattern. Agent shipped a
|
|
462
|
+
// single workflow folder (e.g., workflows/master/ or workflows/
|
|
463
|
+
// bank_wm_compliance/) declaring `source_rules: [R001, R002, ...]`
|
|
464
|
+
// in its SKILL.md / workflow.md / config.json. The manifest writer
|
|
465
|
+
// should credit this rule_id as covered by that workflow.
|
|
466
|
+
//
|
|
467
|
+
// Walk workflows/ subdirs looking for a source_rules declaration
|
|
468
|
+
// that includes this ruleId. Return the first matching workflow file.
|
|
469
|
+
// Audit (v0.7.4 贷款 session) confirmed manifest under-counted:
|
|
470
|
+
// catalog had 15 rules; manifest only listed R001 because R002-R015
|
|
471
|
+
// weren't found as standalone workflows.
|
|
472
|
+
for (const entry of fs.readdirSync(flatRoot, { withFileTypes: true })) {
|
|
473
|
+
if (!entry.isDirectory()) continue;
|
|
474
|
+
if (entry.name === ruleId) continue; // already checked above
|
|
475
|
+
const subDir = path.join(flatRoot, entry.name);
|
|
476
|
+
const declaredRules = this._readWorkflowSourceRules(subDir);
|
|
477
|
+
if (declaredRules.includes(ruleId)) {
|
|
478
|
+
// Find the workflow entry file in this dir
|
|
479
|
+
const subFiles = fs.readdirSync(subDir);
|
|
480
|
+
const versioned = subFiles.filter((f) => /^workflow_v\d+\.py$/.test(f)).sort();
|
|
481
|
+
if (versioned.length > 0) return path.join(subDir, versioned[versioned.length - 1]);
|
|
482
|
+
const any = subFiles.find((f) => f.endsWith(".py"));
|
|
483
|
+
if (any) return path.join(subDir, any);
|
|
484
|
+
}
|
|
485
|
+
}
|
|
341
486
|
}
|
|
342
487
|
return null;
|
|
343
488
|
}
|
|
344
489
|
|
|
490
|
+
/**
|
|
491
|
+
* v0.7.5 G-H2: read a workflow directory's source_rules declaration.
|
|
492
|
+
* Checks SKILL.md / workflow.md frontmatter (`source_rules: [...]`)
|
|
493
|
+
* and config.json (`source_rules`, `rules`, or `rule_ids` field).
|
|
494
|
+
* Returns array of canonical rule IDs.
|
|
495
|
+
*/
|
|
496
|
+
_readWorkflowSourceRules(workflowDir) {
|
|
497
|
+
const ids = new Set();
|
|
498
|
+
try {
|
|
499
|
+
const files = fs.readdirSync(workflowDir);
|
|
500
|
+
|
|
501
|
+
// Frontmatter sources
|
|
502
|
+
for (const fname of files) {
|
|
503
|
+
if (!/^(skill|workflow)\.md$/i.test(fname)) continue;
|
|
504
|
+
let content;
|
|
505
|
+
try { content = fs.readFileSync(path.join(workflowDir, fname), "utf-8"); } catch { continue; }
|
|
506
|
+
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
507
|
+
if (!fmMatch) continue;
|
|
508
|
+
const fm = fmMatch[1];
|
|
509
|
+
// Inline form
|
|
510
|
+
const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
|
|
511
|
+
if (inlineMatch) {
|
|
512
|
+
inlineMatch[1].split(",").map(s => s.trim().replace(/^["']|["']$/g, ""))
|
|
513
|
+
.filter(Boolean).forEach(s => {
|
|
514
|
+
const m = s.match(/^R0*(\d+)$/i);
|
|
515
|
+
if (m) ids.add(`R${String(parseInt(m[1], 10)).padStart(3, "0")}`);
|
|
516
|
+
});
|
|
517
|
+
}
|
|
518
|
+
// Block form
|
|
519
|
+
const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
|
|
520
|
+
if (blockMatch) {
|
|
521
|
+
blockMatch[1].split("\n").forEach(line => {
|
|
522
|
+
const m = line.match(/^[ \t]+-\s+["']?(R0*\d+)["']?\s*$/i);
|
|
523
|
+
if (m) {
|
|
524
|
+
const n = m[1].match(/R0*(\d+)/i);
|
|
525
|
+
if (n) ids.add(`R${String(parseInt(n[1], 10)).padStart(3, "0")}`);
|
|
526
|
+
}
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
// Config.json sources
|
|
532
|
+
const configPath = path.join(workflowDir, "config.json");
|
|
533
|
+
if (fs.existsSync(configPath)) {
|
|
534
|
+
try {
|
|
535
|
+
const data = JSON.parse(fs.readFileSync(configPath, "utf-8"));
|
|
536
|
+
const rules = Array.isArray(data?.source_rules) ? data.source_rules :
|
|
537
|
+
Array.isArray(data?.rules) ? data.rules :
|
|
538
|
+
Array.isArray(data?.rule_ids) ? data.rule_ids : [];
|
|
539
|
+
for (const r of rules) {
|
|
540
|
+
const m = String(r).match(/^R0*(\d+)$/i);
|
|
541
|
+
if (m) ids.add(`R${String(parseInt(m[1], 10)).padStart(3, "0")}`);
|
|
542
|
+
}
|
|
543
|
+
} catch { /* ignore */ }
|
|
544
|
+
}
|
|
545
|
+
} catch { /* dir unreadable */ }
|
|
546
|
+
return [...ids];
|
|
547
|
+
}
|
|
548
|
+
|
|
345
549
|
_resolveFixture(rel) {
|
|
346
550
|
// Try samples/ first (workspace, then project), then plain workspace path
|
|
347
551
|
const candidates = [];
|
|
@@ -449,10 +653,175 @@ export class ReleaseTool extends BaseTool {
|
|
|
449
653
|
}
|
|
450
654
|
}
|
|
451
655
|
|
|
656
|
+
// 3) v0.8 P0-C: production_qc_results.json + qc_results_v*.json shapes
|
|
657
|
+
// (资管 + 贷款 v0.7.5 audits both shipped empty historical_accuracy
|
|
658
|
+
// because the v0.7.2 aggregator only recognized rule_stats / full_test_results).
|
|
659
|
+
if (tally.size === 0) {
|
|
660
|
+
const qcFiles = files
|
|
661
|
+
.filter((f) =>
|
|
662
|
+
/^production_qc(?:_results)?(?:_v\d+)?\.json$/i.test(f.name) ||
|
|
663
|
+
/^qc_results(?:_v\d+)?\.json$/i.test(f.name)
|
|
664
|
+
)
|
|
665
|
+
.sort((a, b) => a.name.localeCompare(b.name));
|
|
666
|
+
for (const f of qcFiles.slice(0, 5)) {
|
|
667
|
+
try {
|
|
668
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
669
|
+
const results = d.results;
|
|
670
|
+
if (!results) continue;
|
|
671
|
+
|
|
672
|
+
// Shape 3a (资管): nested rule-keyed map
|
|
673
|
+
// {results: {<rid>: {<doc_id>: {verdict, ...}}}}
|
|
674
|
+
if (typeof results === "object" && !Array.isArray(results)) {
|
|
675
|
+
for (const [rid, docs] of Object.entries(results)) {
|
|
676
|
+
if (!isRuleId(rid) || !docs || typeof docs !== "object") continue;
|
|
677
|
+
for (const r of Object.values(docs)) {
|
|
678
|
+
if (!r || typeof r !== "object") continue;
|
|
679
|
+
const verdict = (r.verdict || "").toString().toUpperCase();
|
|
680
|
+
if (verdict === "PASS") bump(rid, "pass");
|
|
681
|
+
else if (verdict === "FAIL") bump(rid, "fail");
|
|
682
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA" || verdict === "WARNING") bump(rid, "na");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
686
|
+
}
|
|
687
|
+
// Shape 3b (贷款): per-doc rollup list with failed_rules
|
|
688
|
+
// {results: [{filename, actual, correct, failed_rules: [...]}], total_tested: N}
|
|
689
|
+
// For each rule: failures counted from failed_rules union; passes
|
|
690
|
+
// inferred as (total_tested - failures) for rules that appear in the catalog.
|
|
691
|
+
else if (Array.isArray(results)) {
|
|
692
|
+
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
693
|
+
let catalogRules = [];
|
|
694
|
+
try {
|
|
695
|
+
const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
696
|
+
const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
|
|
697
|
+
catalogRules = list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x));
|
|
698
|
+
} catch { /* catalog optional */ }
|
|
699
|
+
|
|
700
|
+
const failCountByRule = new Map();
|
|
701
|
+
let docCount = 0;
|
|
702
|
+
for (const row of results) {
|
|
703
|
+
if (!row || typeof row !== "object") continue;
|
|
704
|
+
docCount += 1;
|
|
705
|
+
const failed = Array.isArray(row.failed_rules) ? row.failed_rules : [];
|
|
706
|
+
for (const rid of failed) {
|
|
707
|
+
if (!isRuleId(rid)) continue;
|
|
708
|
+
failCountByRule.set(rid, (failCountByRule.get(rid) || 0) + 1);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
if (docCount > 0) {
|
|
712
|
+
const ruleSet = new Set([...catalogRules, ...failCountByRule.keys()]);
|
|
713
|
+
for (const rid of ruleSet) {
|
|
714
|
+
const fails = failCountByRule.get(rid) || 0;
|
|
715
|
+
const passes = Math.max(0, docCount - fails);
|
|
716
|
+
const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
|
|
717
|
+
t.pass += passes; t.fail += fails; t.n += docCount;
|
|
718
|
+
tally.set(rid, t);
|
|
719
|
+
}
|
|
720
|
+
if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
} catch { /* try next file */ }
|
|
724
|
+
if (tally.size > 0) break;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// 4) v0.8.1 P9-A: top-level fail_by_rule + pass_by_rule maps (贷款
|
|
729
|
+
// v0.8 production_qc_report.json shape). Direct per-rule counts —
|
|
730
|
+
// no per-doc rollup, no verdict literals to scan.
|
|
731
|
+
// {accuracy, total_checks, fail_by_rule: {<rid>: N}, pass_by_rule: {<rid>: N}}
|
|
732
|
+
if (tally.size === 0) {
|
|
733
|
+
for (const f of files) {
|
|
734
|
+
if (!/qc|prod|report|result/i.test(f.name)) continue;
|
|
735
|
+
try {
|
|
736
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
737
|
+
const failMap = d?.fail_by_rule;
|
|
738
|
+
const passMap = d?.pass_by_rule;
|
|
739
|
+
if (
|
|
740
|
+
failMap && typeof failMap === "object" && !Array.isArray(failMap) &&
|
|
741
|
+
passMap && typeof passMap === "object" && !Array.isArray(passMap)
|
|
742
|
+
) {
|
|
743
|
+
const allRules = new Set([...Object.keys(failMap), ...Object.keys(passMap)]);
|
|
744
|
+
let matched = false;
|
|
745
|
+
for (const rid of allRules) {
|
|
746
|
+
if (!isRuleId(rid)) continue;
|
|
747
|
+
const fails = Number(failMap[rid]) || 0;
|
|
748
|
+
const passes = Number(passMap[rid]) || 0;
|
|
749
|
+
if (fails + passes === 0) continue;
|
|
750
|
+
const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
|
|
751
|
+
t.pass += passes;
|
|
752
|
+
t.fail += fails;
|
|
753
|
+
t.n += passes + fails;
|
|
754
|
+
tally.set(rid, t);
|
|
755
|
+
matched = true;
|
|
756
|
+
}
|
|
757
|
+
if (matched) {
|
|
758
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
759
|
+
break;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
} catch { /* skip non-JSON */ }
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// 5) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
|
|
767
|
+
// walk any output/*.json with a top-level rule_id-keyed shape that has
|
|
768
|
+
// verdict-like leaf objects. Catches future schema drift before the
|
|
769
|
+
// next audit cycle.
|
|
770
|
+
if (tally.size === 0) {
|
|
771
|
+
for (const f of files) {
|
|
772
|
+
if (!/qc|verdict|result/i.test(f.name)) continue;
|
|
773
|
+
try {
|
|
774
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
775
|
+
const root = d?.results || d;
|
|
776
|
+
if (!root || typeof root !== "object" || Array.isArray(root)) continue;
|
|
777
|
+
let matched = false;
|
|
778
|
+
for (const [rid, val] of Object.entries(root)) {
|
|
779
|
+
if (!isRuleId(rid) || !val || typeof val !== "object") continue;
|
|
780
|
+
// val might be {verdict, ...} OR {<doc>: {verdict, ...}}
|
|
781
|
+
const probe = val.verdict ? [val] : Object.values(val);
|
|
782
|
+
for (const r of probe) {
|
|
783
|
+
if (!r || typeof r !== "object") continue;
|
|
784
|
+
const verdict = (r.verdict || "").toString().toUpperCase();
|
|
785
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
786
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
787
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
788
|
+
}
|
|
789
|
+
}
|
|
790
|
+
if (matched) {
|
|
791
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path) + " (fallback shape)");
|
|
792
|
+
break;
|
|
793
|
+
}
|
|
794
|
+
} catch { /* skip non-JSON */ }
|
|
795
|
+
}
|
|
796
|
+
}
|
|
797
|
+
|
|
452
798
|
if (tally.size === 0) return null;
|
|
453
799
|
|
|
800
|
+
// v0.8.1 P9-D: filter tally to rule_ids in the current catalog.
|
|
801
|
+
// E2E #11 资管 v0.8 audit: confidence_calibration aggregated from
|
|
802
|
+
// an abandoned 39-rule pipeline included only 2 of 4 final samples.
|
|
803
|
+
// Filtering to catalog.json keeps the calibration scoped to the
|
|
804
|
+
// rules that actually ship in the release.
|
|
805
|
+
let catalogRuleIds = null;
|
|
806
|
+
try {
|
|
807
|
+
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
808
|
+
if (fs.existsSync(catalogPath)) {
|
|
809
|
+
const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
810
|
+
const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
|
|
811
|
+
catalogRuleIds = new Set(
|
|
812
|
+
list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x))
|
|
813
|
+
);
|
|
814
|
+
if (catalogRuleIds.size === 0) catalogRuleIds = null;
|
|
815
|
+
}
|
|
816
|
+
} catch { /* skip filter if catalog missing/malformed */ }
|
|
817
|
+
|
|
454
818
|
const historical_accuracy = {};
|
|
819
|
+
const droppedRules = [];
|
|
455
820
|
for (const [rid, t] of tally.entries()) {
|
|
821
|
+
if (catalogRuleIds && !catalogRuleIds.has(rid)) {
|
|
822
|
+
droppedRules.push(rid);
|
|
823
|
+
continue;
|
|
824
|
+
}
|
|
456
825
|
const fired = t.pass + t.fail;
|
|
457
826
|
historical_accuracy[rid] = {
|
|
458
827
|
pass_rate: fired > 0 ? +(t.pass / fired).toFixed(4) : null,
|
|
@@ -466,6 +835,7 @@ export class ReleaseTool extends BaseTool {
|
|
|
466
835
|
historical_accuracy,
|
|
467
836
|
computed_at: new Date().toISOString(),
|
|
468
837
|
source_files: sourceFiles,
|
|
838
|
+
...(droppedRules.length > 0 ? { dropped_off_catalog: droppedRules } : {}),
|
|
469
839
|
};
|
|
470
840
|
}
|
|
471
841
|
|
|
@@ -25,16 +25,38 @@ function detectSharedFileWrites(command) {
|
|
|
25
25
|
* Execute shell commands in the workspace directory.
|
|
26
26
|
* Uses child_process.spawn so pipes, redirects, && all work.
|
|
27
27
|
* Output (stdout + stderr combined) is capped at 10K chars.
|
|
28
|
+
*
|
|
29
|
+
* v0.8 P1-F timeout model:
|
|
30
|
+
* - Default: KC_EXEC_DEFAULT_TIMEOUT_MS (env) or 120000ms (2 min)
|
|
31
|
+
* - Hard cap: KC_EXEC_MAX_TIMEOUT_MS (env) or 600000ms (10 min)
|
|
32
|
+
* - Per-call `timeout_ms` overrides default, clamped to [1000, max]
|
|
33
|
+
* - Legacy `KC_EXEC_TIMEOUT` (seconds) still accepted as a deprecation
|
|
34
|
+
* alias for the default; emits a warning to stderr on first read.
|
|
28
35
|
*/
|
|
29
36
|
export class SandboxExecTool extends BaseTool {
|
|
30
37
|
/**
|
|
31
38
|
* @param {import('../workspace.js').Workspace} workspace
|
|
32
|
-
* @param {number} [
|
|
39
|
+
* @param {object|number} [opts] — either a config object (new) OR
|
|
40
|
+
* a number meaning the legacy timeout-in-seconds (old). The number
|
|
41
|
+
* form is preserved for callers that haven't been updated yet.
|
|
42
|
+
* @param {number} [opts.defaultTimeoutMs] — default 120000
|
|
43
|
+
* @param {number} [opts.maxTimeoutMs] — default 600000
|
|
33
44
|
*/
|
|
34
|
-
constructor(workspace,
|
|
45
|
+
constructor(workspace, opts = {}) {
|
|
35
46
|
super();
|
|
36
47
|
this._workspace = workspace;
|
|
37
|
-
|
|
48
|
+
|
|
49
|
+
// Legacy: opts is a bare number = seconds. Convert to ms.
|
|
50
|
+
if (typeof opts === "number") {
|
|
51
|
+
this._defaultTimeoutMs = opts * 1000;
|
|
52
|
+
this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, 600_000);
|
|
53
|
+
} else {
|
|
54
|
+
this._defaultTimeoutMs = opts.defaultTimeoutMs ?? 120_000;
|
|
55
|
+
this._maxTimeoutMs = opts.maxTimeoutMs ?? 600_000;
|
|
56
|
+
}
|
|
57
|
+
// Floor: keep at least 1s. Cap: max can't be below default.
|
|
58
|
+
this._defaultTimeoutMs = Math.max(1000, this._defaultTimeoutMs);
|
|
59
|
+
this._maxTimeoutMs = Math.max(this._defaultTimeoutMs, this._maxTimeoutMs);
|
|
38
60
|
}
|
|
39
61
|
|
|
40
62
|
get name() { return "sandbox_exec"; }
|
|
@@ -47,7 +69,10 @@ export class SandboxExecTool extends BaseTool {
|
|
|
47
69
|
"Pipes, redirects, and chained commands (&&) are supported. " +
|
|
48
70
|
"stdout + stderr combined are capped at 10,000 chars; longer output is truncated. " +
|
|
49
71
|
"For reading individual files larger than ~10 KB (e.g. regulation documents), " +
|
|
50
|
-
"prefer workspace_file (operation=read) which has a larger 50 KB cap."
|
|
72
|
+
"prefer workspace_file (operation=read) which has a larger 50 KB cap. " +
|
|
73
|
+
`Default timeout ${Math.round(this._defaultTimeoutMs / 1000)}s; pass timeout_ms ` +
|
|
74
|
+
`to extend up to ${Math.round(this._maxTimeoutMs / 1000)}s for known-slow commands ` +
|
|
75
|
+
`(LLM batch processing, document parsing, large regression runs).`
|
|
51
76
|
);
|
|
52
77
|
}
|
|
53
78
|
|
|
@@ -64,6 +89,10 @@ export class SandboxExecTool extends BaseTool {
|
|
|
64
89
|
enum: ["workspace", "project"],
|
|
65
90
|
description: "Working directory. 'workspace' (default) = KC's workspace. 'project' = user's project directory.",
|
|
66
91
|
},
|
|
92
|
+
timeout_ms: {
|
|
93
|
+
type: "integer",
|
|
94
|
+
description: `Optional per-call timeout in milliseconds. Default ${this._defaultTimeoutMs}ms; clamped to [1000, ${this._maxTimeoutMs}]. Pass for commands you expect to take longer than the default (LLM batches, parsing, regressions).`,
|
|
95
|
+
},
|
|
67
96
|
},
|
|
68
97
|
required: ["command"],
|
|
69
98
|
};
|
|
@@ -76,6 +105,22 @@ export class SandboxExecTool extends BaseTool {
|
|
|
76
105
|
return new ToolResult("No command provided", true);
|
|
77
106
|
}
|
|
78
107
|
|
|
108
|
+
// v0.8 P1-F: per-call timeout clamping
|
|
109
|
+
let effectiveTimeoutMs = this._defaultTimeoutMs;
|
|
110
|
+
let clampedMessage = null;
|
|
111
|
+
if (Number.isFinite(input.timeout_ms) && input.timeout_ms > 0) {
|
|
112
|
+
const requested = Math.floor(input.timeout_ms);
|
|
113
|
+
if (requested < 1000) {
|
|
114
|
+
effectiveTimeoutMs = 1000;
|
|
115
|
+
clampedMessage = `timeout_ms=${requested} below 1000ms floor; using 1000ms.`;
|
|
116
|
+
} else if (requested > this._maxTimeoutMs) {
|
|
117
|
+
effectiveTimeoutMs = this._maxTimeoutMs;
|
|
118
|
+
clampedMessage = `timeout_ms=${requested} above ${this._maxTimeoutMs}ms ceiling; clamped to ${this._maxTimeoutMs}ms.`;
|
|
119
|
+
} else {
|
|
120
|
+
effectiveTimeoutMs = requested;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
79
124
|
const effectiveCwd = (cwdScope === "project" && this._workspace.projectDir)
|
|
80
125
|
? this._workspace.projectDir
|
|
81
126
|
: this._workspace.cwd;
|
|
@@ -86,7 +131,7 @@ export class SandboxExecTool extends BaseTool {
|
|
|
86
131
|
const sharedHits = detectSharedFileWrites(command);
|
|
87
132
|
|
|
88
133
|
try {
|
|
89
|
-
const { output, code } = await this._run(command, effectiveCwd);
|
|
134
|
+
const { output, code } = await this._run(command, effectiveCwd, effectiveTimeoutMs);
|
|
90
135
|
let result = output;
|
|
91
136
|
if (result.length > MAX_OUTPUT) {
|
|
92
137
|
result = result.slice(0, MAX_OUTPUT) + "\n[truncated]";
|
|
@@ -101,10 +146,20 @@ export class SandboxExecTool extends BaseTool {
|
|
|
101
146
|
` Under concurrent subagents this races — use workspace_file or rule_catalog instead.\n\n`;
|
|
102
147
|
result = prefix + result;
|
|
103
148
|
}
|
|
149
|
+
if (clampedMessage) {
|
|
150
|
+
result = `[note] ${clampedMessage}\n\n` + result;
|
|
151
|
+
}
|
|
104
152
|
return new ToolResult(result, code !== 0);
|
|
105
153
|
} catch (err) {
|
|
106
154
|
if (err.message === "timeout") {
|
|
107
|
-
|
|
155
|
+
const seconds = Math.round(effectiveTimeoutMs / 1000);
|
|
156
|
+
const hint = effectiveTimeoutMs < this._maxTimeoutMs
|
|
157
|
+
? ` Pass timeout_ms (up to ${this._maxTimeoutMs}) for known-slow commands.`
|
|
158
|
+
: ` Already at max timeout (${this._maxTimeoutMs}ms); consider splitting the command into smaller batches or running it via a subagent.`;
|
|
159
|
+
return new ToolResult(
|
|
160
|
+
`Command timed out after ${seconds}s (${effectiveTimeoutMs}ms).${hint}`,
|
|
161
|
+
true,
|
|
162
|
+
);
|
|
108
163
|
}
|
|
109
164
|
return new ToolResult(`Execution error: ${err.message}`, true);
|
|
110
165
|
}
|
|
@@ -112,9 +167,11 @@ export class SandboxExecTool extends BaseTool {
|
|
|
112
167
|
|
|
113
168
|
/**
|
|
114
169
|
* @param {string} command
|
|
170
|
+
* @param {string} cwd
|
|
171
|
+
* @param {number} timeoutMs
|
|
115
172
|
* @returns {Promise<{output: string, code: number}>}
|
|
116
173
|
*/
|
|
117
|
-
_run(command, cwd) {
|
|
174
|
+
_run(command, cwd, timeoutMs) {
|
|
118
175
|
return new Promise((resolve, reject) => {
|
|
119
176
|
const controller = new AbortController();
|
|
120
177
|
const proc = spawn("sh", ["-c", command], {
|
|
@@ -130,7 +187,7 @@ export class SandboxExecTool extends BaseTool {
|
|
|
130
187
|
const timer = setTimeout(() => {
|
|
131
188
|
controller.abort();
|
|
132
189
|
reject(new Error("timeout"));
|
|
133
|
-
},
|
|
190
|
+
}, timeoutMs);
|
|
134
191
|
|
|
135
192
|
proc.on("close", (code) => {
|
|
136
193
|
clearTimeout(timer);
|