kc-beta 0.7.5 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +47 -0
- package/package.json +3 -2
- package/src/agent/context.js +17 -1
- package/src/agent/engine.js +467 -100
- package/src/agent/llm-client.js +24 -1
- package/src/agent/pipelines/_advance-hints.js +92 -0
- package/src/agent/pipelines/_milestone-derive.js +325 -20
- package/src/agent/pipelines/skill-authoring.js +49 -3
- package/src/agent/tools/agent-tool.js +2 -2
- package/src/agent/tools/consult-skill.js +15 -0
- package/src/agent/tools/dashboard-render.js +48 -1
- package/src/agent/tools/document-parse.js +31 -2
- package/src/agent/tools/phase-advance.js +17 -13
- package/src/agent/tools/release.js +343 -7
- package/src/agent/tools/sandbox-exec.js +65 -8
- package/src/agent/tools/worker-llm-call.js +95 -15
- package/src/agent/workspace.js +25 -4
- package/src/cli/components.js +4 -1
- package/src/cli/index.js +125 -8
- package/src/config.js +19 -2
- package/src/marathon/driver.js +217 -0
- package/src/marathon/prompts.js +93 -0
- package/template/.env.template +17 -1
- package/template/AGENT.md +2 -2
- package/template/skills/en/auto-model-selection/SKILL.md +55 -35
- package/template/skills/en/bootstrap-workspace/SKILL.md +27 -0
- package/template/skills/en/compliance-judgment/SKILL.md +14 -0
- package/template/skills/en/confidence-system/SKILL.md +30 -8
- package/template/skills/en/corner-case-management/SKILL.md +53 -33
- package/template/skills/en/cross-document-verification/SKILL.md +88 -83
- package/template/skills/en/dashboard-reporting/SKILL.md +91 -66
- package/template/skills/en/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/en/data-sensibility/SKILL.md +19 -12
- package/template/skills/en/document-chunking/SKILL.md +99 -15
- package/template/skills/en/entity-extraction/SKILL.md +14 -4
- package/template/skills/en/quality-control/SKILL.md +23 -0
- package/template/skills/en/rule-extraction/SKILL.md +92 -94
- package/template/skills/en/rule-extraction/references/chunking-strategies.md +7 -78
- package/template/skills/en/skill-authoring/SKILL.md +85 -2
- package/template/skills/en/skill-creator/SKILL.md +25 -3
- package/template/skills/en/skill-to-workflow/SKILL.md +73 -1
- package/template/skills/en/task-decomposition/SKILL.md +1 -1
- package/template/skills/en/tree-processing/SKILL.md +1 -1
- package/template/skills/en/version-control/SKILL.md +15 -0
- package/template/skills/en/work-decomposition/SKILL.md +52 -32
- package/template/skills/phase_skills.yaml +5 -0
- package/template/skills/zh/auto-model-selection/SKILL.md +54 -33
- package/template/skills/zh/bootstrap-workspace/SKILL.md +27 -0
- package/template/skills/zh/compliance-judgment/SKILL.md +51 -37
- package/template/skills/zh/compliance-judgment/references/output-format.md +62 -62
- package/template/skills/zh/confidence-system/SKILL.md +34 -9
- package/template/skills/zh/corner-case-management/SKILL.md +71 -104
- package/template/skills/zh/cross-document-verification/SKILL.md +90 -195
- package/template/skills/zh/cross-document-verification/references/contradiction-taxonomy.md +36 -36
- package/template/skills/zh/dashboard-reporting/SKILL.md +82 -232
- package/template/skills/zh/dashboard-reporting/scripts/generate_dashboard.py +1 -1
- package/template/skills/zh/data-sensibility/SKILL.md +13 -0
- package/template/skills/zh/document-chunking/SKILL.md +101 -18
- package/template/skills/zh/document-parsing/SKILL.md +65 -65
- package/template/skills/zh/document-parsing/references/parser-catalog.md +26 -26
- package/template/skills/zh/entity-extraction/SKILL.md +78 -68
- package/template/skills/zh/evolution-loop/references/convergence-guide.md +38 -38
- package/template/skills/zh/quality-control/SKILL.md +23 -0
- package/template/skills/zh/quality-control/references/qa-layers.md +65 -65
- package/template/skills/zh/quality-control/references/sampling-strategies.md +49 -49
- package/template/skills/zh/rule-extraction/SKILL.md +199 -188
- package/template/skills/zh/rule-extraction/references/chunking-strategies.md +5 -78
- package/template/skills/zh/skill-authoring/SKILL.md +136 -58
- package/template/skills/zh/skill-authoring/references/skill-format-spec.md +39 -39
- package/template/skills/zh/skill-creator/SKILL.md +215 -201
- package/template/skills/zh/skill-creator/references/schemas.md +60 -60
- package/template/skills/zh/skill-to-workflow/SKILL.md +73 -1
- package/template/skills/zh/skill-to-workflow/references/worker-llm-catalog.md +24 -24
- package/template/skills/zh/task-decomposition/SKILL.md +1 -1
- package/template/skills/zh/task-decomposition/references/decision-matrix.md +54 -54
- package/template/skills/zh/tree-processing/SKILL.md +67 -63
- package/template/skills/zh/version-control/SKILL.md +15 -0
- package/template/skills/zh/version-control/references/trace-id-spec.md +34 -34
- package/template/skills/zh/work-decomposition/SKILL.md +52 -30
- package/template/workflows/common/llm_client.py +168 -0
- package/template/workflows/common/utils.py +132 -0
|
@@ -382,8 +382,8 @@ export class AgentTool extends BaseTool {
|
|
|
382
382
|
* B8: List currently-running sub-agents. Called by engine's phase-advance
|
|
383
383
|
* path to emit a `stale_subagents` pipeline event — the main agent's next
|
|
384
384
|
* turn sees the list and decides whether to kill each. Soft signal, not
|
|
385
|
-
* an automated kill
|
|
386
|
-
*
|
|
385
|
+
* an automated kill: coupling the subagent lifecycle to phase advance
|
|
386
|
+
* would amplify blast radius if a transition happened unexpectedly.
|
|
387
387
|
*/
|
|
388
388
|
getRunningTaskIds() {
|
|
389
389
|
return Array.from(this._runningTasks.keys());
|
|
@@ -63,6 +63,21 @@ export class ConsultSkillTool extends BaseTool {
|
|
|
63
63
|
const name = (input?.name || "").trim();
|
|
64
64
|
if (!name) return new ToolResult("name required (e.g. consult_skill({name: 'work-decomposition'}))", true);
|
|
65
65
|
|
|
66
|
+
// v0.8 P0-A: defensive null-check. v0.7.5 shipped with an init-order bug
|
|
67
|
+
// where ConsultSkillTool received undefined skillLoader and threw
|
|
68
|
+
// "Cannot read properties of undefined (reading 'getPhaseSkillSet')"
|
|
69
|
+
// on every invocation (资管 audit § 9.1, 5/5 failure rate). The init-order
|
|
70
|
+
// fix is in engine.js:238; this guard prevents an uncaught exception if
|
|
71
|
+
// the bug recurs from any future constructor reorder.
|
|
72
|
+
if (!this._skillLoader || typeof this._skillLoader.getPhaseSkillSet !== "function") {
|
|
73
|
+
return new ToolResult(
|
|
74
|
+
"consult_skill is misconfigured: skillLoader unavailable. This is an engine-side bug — " +
|
|
75
|
+
"surface to the developer user. The agent should fall back to reading skill bodies " +
|
|
76
|
+
"directly from <workspace>/skills/<name>/SKILL.md or the system prompt's always-loaded section.",
|
|
77
|
+
true,
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
|
|
66
81
|
const phase = this._getCurrentPhase ? this._getCurrentPhase() : null;
|
|
67
82
|
const { alwaysLoaded, available } = this._skillLoader.getPhaseSkillSet(phase);
|
|
68
83
|
|
|
@@ -81,11 +81,57 @@ export class DashboardRenderTool extends BaseTool {
|
|
|
81
81
|
metrics.evolution_iterations = fs.readdirSync(evoDir).filter((f) => f.endsWith(".json")).length;
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
// v0.8 P1-G: QC counter now reads from multiple known agent-write
|
|
85
|
+
// locations + counts per-doc reviews. Pre-v0.8 read only output/qc/*.json
|
|
86
|
+
// top-level; 资管 v0.7.5 wrote output/results/production_qc_results.json
|
|
87
|
+
// so the dashboard showed `QC Batches: 0` despite 126 pairs of data.
|
|
88
|
+
let qcBatches = 0;
|
|
89
|
+
let qcDocsReviewed = 0;
|
|
90
|
+
|
|
91
|
+
// (a) Top-level batch files in output/qc/ (贷款 v0.7.5 shape)
|
|
84
92
|
const qcDir = path.join(ws, "output", "qc");
|
|
85
93
|
if (fs.existsSync(qcDir)) {
|
|
86
|
-
|
|
94
|
+
for (const f of fs.readdirSync(qcDir).filter((f) => f.endsWith(".json"))) {
|
|
95
|
+
qcBatches++;
|
|
96
|
+
try {
|
|
97
|
+
const data = JSON.parse(fs.readFileSync(path.join(qcDir, f), "utf-8"));
|
|
98
|
+
const n = Number(data?.documents_reviewed);
|
|
99
|
+
if (Number.isFinite(n) && n > qcDocsReviewed) qcDocsReviewed = n;
|
|
100
|
+
} catch { /* skip malformed */ }
|
|
101
|
+
}
|
|
87
102
|
}
|
|
88
103
|
|
|
104
|
+
// (b) Per-doc reviews at output/qc/reviews/ (贷款 detail shape)
|
|
105
|
+
const reviewsDir = path.join(ws, "output", "qc", "reviews");
|
|
106
|
+
if (fs.existsSync(reviewsDir)) {
|
|
107
|
+
const reviewFiles = fs.readdirSync(reviewsDir).filter((f) => f.endsWith(".json"));
|
|
108
|
+
qcDocsReviewed = Math.max(qcDocsReviewed, reviewFiles.length);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// (c) production_qc_results.json shape (资管 v0.7.5)
|
|
112
|
+
const productionQc = path.join(ws, "output", "results", "production_qc_results.json");
|
|
113
|
+
if (fs.existsSync(productionQc)) {
|
|
114
|
+
qcBatches++;
|
|
115
|
+
try {
|
|
116
|
+
const data = JSON.parse(fs.readFileSync(productionQc, "utf-8"));
|
|
117
|
+
const totalDocs = Number(data?.total_docs);
|
|
118
|
+
if (Number.isFinite(totalDocs)) qcDocsReviewed = Math.max(qcDocsReviewed, totalDocs);
|
|
119
|
+
// Otherwise, dedup doc keys from nested results
|
|
120
|
+
if (!Number.isFinite(totalDocs) && data?.results && typeof data.results === "object") {
|
|
121
|
+
const docSet = new Set();
|
|
122
|
+
for (const docs of Object.values(data.results)) {
|
|
123
|
+
if (docs && typeof docs === "object") {
|
|
124
|
+
for (const k of Object.keys(docs)) docSet.add(k);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
if (docSet.size > 0) qcDocsReviewed = Math.max(qcDocsReviewed, docSet.size);
|
|
128
|
+
}
|
|
129
|
+
} catch { /* skip */ }
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
metrics.qc_batches = qcBatches;
|
|
133
|
+
metrics.qc_docs_reviewed = qcDocsReviewed;
|
|
134
|
+
|
|
89
135
|
return metrics;
|
|
90
136
|
}
|
|
91
137
|
|
|
@@ -126,6 +172,7 @@ th { color: #737373; font-size: 0.85em; }
|
|
|
126
172
|
<div class="metric"><span class="value">${total}</span><br><span class="label">Results</span></div>
|
|
127
173
|
<div class="metric"><span class="value">${metrics.evolution_iterations}</span><br><span class="label">Evolution Cycles</span></div>
|
|
128
174
|
<div class="metric"><span class="value">${metrics.qc_batches}</span><br><span class="label">QC Batches</span></div>
|
|
175
|
+
<div class="metric"><span class="value">${metrics.qc_docs_reviewed || 0}</span><br><span class="label">Docs Reviewed</span></div>
|
|
129
176
|
</div>
|
|
130
177
|
<h2>Confidence Distribution</h2>
|
|
131
178
|
<div class="card">
|
|
@@ -12,14 +12,43 @@ const MIN_CHARS_PER_PAGE = 50;
|
|
|
12
12
|
* Level 3: OCR models via SiliconFlow — fallback via vision models
|
|
13
13
|
*/
|
|
14
14
|
export class DocumentParseTool extends BaseTool {
|
|
15
|
-
|
|
15
|
+
/**
|
|
16
|
+
* @param {object} workspace
|
|
17
|
+
* @param {object} opts
|
|
18
|
+
* @param {string} [opts.mineruApiUrl]
|
|
19
|
+
* @param {string} [opts.mineruApiKey]
|
|
20
|
+
* @param {string} [opts.llmApiKey]
|
|
21
|
+
* @param {string} [opts.llmBaseUrl]
|
|
22
|
+
* @param {string} [opts.ocrModel] — static fallback (legacy)
|
|
23
|
+
* @param {() => string} [opts.getOcrModel] — v0.8.1 P9-B: live-read
|
|
24
|
+
* callback. If provided, takes precedence over `ocrModel`. The
|
|
25
|
+
* constructor used to capture vlmTier1 once at engine startup, but
|
|
26
|
+
* workspace_env_overlay (P1-B) fires AFTER tool construction in
|
|
27
|
+
* some flows (e.g. agent edits .env mid-run, OR overlay applies on
|
|
28
|
+
* a subagent's engine but parent already cached the gc default).
|
|
29
|
+
* E2E #11 资管 v0.8 audit found document_parse errors quoting
|
|
30
|
+
* Qwen3-VL-235B-A22B-Instruct (gc default) even though .env set
|
|
31
|
+
* OCR_MODEL_TIER1=zai-org/GLM-4.6V — the overlay applied 5 min
|
|
32
|
+
* after first failed call. Live-read fixes the race.
|
|
33
|
+
*/
|
|
34
|
+
constructor(workspace, { mineruApiUrl, mineruApiKey, llmApiKey, llmBaseUrl, ocrModel, getOcrModel } = {}) {
|
|
16
35
|
super();
|
|
17
36
|
this._workspace = workspace;
|
|
18
37
|
this._mineruApiUrl = mineruApiUrl || "";
|
|
19
38
|
this._mineruApiKey = mineruApiKey || "";
|
|
20
39
|
this._vlmApiKey = llmApiKey || "";
|
|
21
40
|
this._vlmBaseUrl = (llmBaseUrl || "").replace(/\/+$/, "");
|
|
22
|
-
this.
|
|
41
|
+
this._ocrModelStatic = ocrModel || "";
|
|
42
|
+
this._getOcrModel = typeof getOcrModel === "function" ? getOcrModel : null;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/** Read ocrModel live (P9-B) or fall back to the static value captured at construction. */
|
|
46
|
+
get _ocrModel() {
|
|
47
|
+
if (this._getOcrModel) {
|
|
48
|
+
try { return this._getOcrModel() || this._ocrModelStatic; }
|
|
49
|
+
catch { return this._ocrModelStatic; }
|
|
50
|
+
}
|
|
51
|
+
return this._ocrModelStatic;
|
|
23
52
|
}
|
|
24
53
|
|
|
25
54
|
get name() { return "document_parse"; }
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { BaseTool, ToolResult } from "./base.js";
|
|
2
2
|
import { Phase } from "../pipelines/index.js";
|
|
3
|
+
import { getPrescriptiveHint } from "../pipelines/_advance-hints.js";
|
|
3
4
|
|
|
4
5
|
const VALID_PHASES = new Set(Object.values(Phase));
|
|
5
6
|
|
|
@@ -72,12 +73,12 @@ export class PhaseAdvanceTool extends BaseTool {
|
|
|
72
73
|
|
|
73
74
|
const beforePhase = this._getCurrentPhase();
|
|
74
75
|
// H1: short-circuit the "already in target" case with an informational
|
|
75
|
-
// message —
|
|
76
|
-
//
|
|
77
|
-
//
|
|
76
|
+
// message — agent was trying to advance correctly, engine was already
|
|
77
|
+
// there (from a prior pipeline_event-driven advance or an earlier
|
|
78
|
+
// explicit call). Treat as success, not refusal.
|
|
78
79
|
if (beforePhase && beforePhase === to) {
|
|
79
80
|
return new ToolResult(
|
|
80
|
-
`Already in phase ${to} (engine
|
|
81
|
+
`Already in phase ${to} (engine was already there from a prior advance). Proceed with phase-appropriate work.`,
|
|
81
82
|
);
|
|
82
83
|
}
|
|
83
84
|
|
|
@@ -126,18 +127,21 @@ export class PhaseAdvanceTool extends BaseTool {
|
|
|
126
127
|
// exactly which milestones the gate is reading and can satisfy them.
|
|
127
128
|
// E2E #6 v070 showed the generic "check /status" hint wasn't concrete
|
|
128
129
|
// enough — agents forced through. Naming the gap inline reduces that.
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
130
|
+
// v0.8 P0-E: prescriptive refusal hint — name the artifacts the agent
|
|
131
|
+
// needs to produce, derived from the same paths _milestone-derive.js
|
|
132
|
+
// walks. Replaces the v0.7.x descriptive "check /status" message that
|
|
133
|
+
// 资管 + 贷款 v0.7.5 audits showed agents force-bypassing.
|
|
134
|
+
const prescriptive = getPrescriptiveHint(
|
|
135
|
+
beforePhase,
|
|
136
|
+
advanceResult?.engineCounts,
|
|
137
|
+
advanceResult?.engineCounts || "",
|
|
138
|
+
);
|
|
132
139
|
|
|
133
140
|
return new ToolResult(
|
|
134
141
|
`Did not advance to ${to} (currently in ${beforePhase || "?"}). ` +
|
|
135
|
-
`Likely cause: source-phase exit criteria not met
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
`the engine derives milestones from filesystem facts (rule_skills/<id>/SKILL.md, check.py, ` +
|
|
139
|
-
`workflows/<id>/*.py, output/results/*.json, etc.). ` +
|
|
140
|
-
`If the transition is non-adjacent or this phase truly is done despite the gate, ` +
|
|
142
|
+
`Likely cause: source-phase exit criteria not met.\n\n` +
|
|
143
|
+
prescriptive +
|
|
144
|
+
`\n\nIf the transition is non-adjacent or this phase truly is done despite the gate, ` +
|
|
141
145
|
`re-call with the documented schema flag. The engine logged the precise reason in ` +
|
|
142
146
|
`events.jsonl as 'phase_advance_refused'.`,
|
|
143
147
|
false,
|
|
@@ -85,13 +85,19 @@ export class ReleaseTool extends BaseTool {
|
|
|
85
85
|
return new ToolResult(`release template missing at ${TEMPLATE_DIR}`, true);
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
-
// 1
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
88
|
+
// v0.8.1 P9-C: defer the snapshot (git tag) until AFTER the bundle
|
|
89
|
+
// is written + verified. v0.8.0 ordered snapshot-first to "lock in
|
|
90
|
+
// commit + tag regardless of bundle outcome," but E2E #11 资管 v0.8
|
|
91
|
+
// audit found `release-v1` tags with no corresponding bundle dir —
|
|
92
|
+
// tag without bundle confuses downstream consumers. New order:
|
|
93
|
+
// 1. Build bundle (catalog read, copy template, write fixtures, manifest, README)
|
|
94
|
+
// 2. Verify bundle (manifest.json + README.md exist + non-empty)
|
|
95
|
+
// 3. ONLY THEN snapshot (creates the git tag) + back-fill manifest
|
|
96
|
+
// with snapshot tag/commit
|
|
97
|
+
// If verification fails, a `.failed_release` marker is written into
|
|
98
|
+
// the bundle dir and NO tag is created.
|
|
99
|
+
let snapshotTag = null;
|
|
100
|
+
let snapshotCommit = null;
|
|
95
101
|
|
|
96
102
|
// 2. Read catalog and filter
|
|
97
103
|
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
@@ -294,6 +300,77 @@ export class ReleaseTool extends BaseTool {
|
|
|
294
300
|
}
|
|
295
301
|
}
|
|
296
302
|
|
|
303
|
+
// v0.8.1 P9-C: bundle verification + transactional snapshot.
|
|
304
|
+
// The manifest + README were written above. Verify they exist with
|
|
305
|
+
// substance (≥200 bytes README, valid JSON manifest with `slug` field).
|
|
306
|
+
// If verification fails, write `.failed_release` marker and skip
|
|
307
|
+
// the git-tag step — no tag-without-bundle.
|
|
308
|
+
const manifestPath = path.join(bundleAbs, "manifest.json");
|
|
309
|
+
const readmePath = path.join(bundleAbs, "README.md");
|
|
310
|
+
let verifyError = null;
|
|
311
|
+
try {
|
|
312
|
+
const mStat = fs.statSync(manifestPath);
|
|
313
|
+
const rStat = fs.statSync(readmePath);
|
|
314
|
+
if (!mStat.isFile() || mStat.size < 50) verifyError = "manifest.json missing or too small";
|
|
315
|
+
else if (!rStat.isFile() || rStat.size < 200) verifyError = "README.md missing or too small";
|
|
316
|
+
else {
|
|
317
|
+
const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
318
|
+
if (m.slug !== slug) verifyError = `manifest.slug=${m.slug} doesn't match expected ${slug}`;
|
|
319
|
+
}
|
|
320
|
+
} catch (e) {
|
|
321
|
+
verifyError = `bundle verification threw: ${e.message}`;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
if (verifyError) {
|
|
325
|
+
try {
|
|
326
|
+
fs.writeFileSync(
|
|
327
|
+
path.join(bundleAbs, ".failed_release"),
|
|
328
|
+
JSON.stringify({
|
|
329
|
+
failed_at: new Date().toISOString(),
|
|
330
|
+
reason: verifyError,
|
|
331
|
+
label,
|
|
332
|
+
slug,
|
|
333
|
+
}, null, 2),
|
|
334
|
+
);
|
|
335
|
+
} catch { /* best-effort */ }
|
|
336
|
+
return new ToolResult(
|
|
337
|
+
`Release bundle verification failed (${verifyError}). NO git tag created. ` +
|
|
338
|
+
`See .failed_release marker in ${bundleRel}/ for details. Fix the bundle issue and re-run.`,
|
|
339
|
+
true,
|
|
340
|
+
);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Bundle verified. NOW snapshot — creates the durable git tag.
|
|
344
|
+
const snapResult = await this._snapshot.execute({
|
|
345
|
+
label: `release-${slug}`,
|
|
346
|
+
notes: `Release ${label} bundle source`,
|
|
347
|
+
});
|
|
348
|
+
if (snapResult.isError) {
|
|
349
|
+
// Bundle exists but tagging failed. Surface but don't roll back —
|
|
350
|
+
// the bundle is still usable; the user can manually tag later.
|
|
351
|
+
return new ToolResult(
|
|
352
|
+
`Release '${label}' bundled at ${bundleRel} but snapshot tag FAILED: ${snapResult.content}. ` +
|
|
353
|
+
`Bundle is valid; create the snapshot tag manually if needed.`,
|
|
354
|
+
);
|
|
355
|
+
}
|
|
356
|
+
const meta = this._readSnapshotMeta(`release-${slug}`);
|
|
357
|
+
snapshotTag = meta.tag;
|
|
358
|
+
snapshotCommit = meta.commit;
|
|
359
|
+
|
|
360
|
+
// Back-fill the manifest with the now-known snapshot tag/commit.
|
|
361
|
+
try {
|
|
362
|
+
const m = JSON.parse(fs.readFileSync(manifestPath, "utf-8"));
|
|
363
|
+
m.snapshot_tag = snapshotTag;
|
|
364
|
+
m.snapshot_commit = snapshotCommit;
|
|
365
|
+
fs.writeFileSync(manifestPath, JSON.stringify(m, null, 2) + "\n");
|
|
366
|
+
// Also back-fill the README's snapshot placeholders if still placeholder.
|
|
367
|
+
const readme = fs.readFileSync(readmePath, "utf-8");
|
|
368
|
+
const updated = readme
|
|
369
|
+
.replace(/\(no tag — git unavailable\)/g, snapshotTag || "")
|
|
370
|
+
.replace(/\(unknown\)/g, snapshotCommit || "(unknown)");
|
|
371
|
+
if (updated !== readme) fs.writeFileSync(readmePath, updated);
|
|
372
|
+
} catch { /* best-effort back-fill */ }
|
|
373
|
+
|
|
297
374
|
// Bundle dir is in output/ (gitignored). Snapshot manifest in snapshots/ IS tracked.
|
|
298
375
|
const lines = [
|
|
299
376
|
`Release '${label}' bundled at ${bundleRel}`,
|
|
@@ -576,10 +653,268 @@ export class ReleaseTool extends BaseTool {
|
|
|
576
653
|
}
|
|
577
654
|
}
|
|
578
655
|
|
|
656
|
+
// 3) v0.8 P0-C: production_qc_results.json + qc_results_v*.json shapes
|
|
657
|
+
// (资管 + 贷款 v0.7.5 audits both shipped empty historical_accuracy
|
|
658
|
+
// because the v0.7.2 aggregator only recognized rule_stats / full_test_results).
|
|
659
|
+
if (tally.size === 0) {
|
|
660
|
+
const qcFiles = files
|
|
661
|
+
.filter((f) =>
|
|
662
|
+
/^production_qc(?:_results)?(?:_v\d+)?\.json$/i.test(f.name) ||
|
|
663
|
+
/^qc_results(?:_v\d+)?\.json$/i.test(f.name)
|
|
664
|
+
)
|
|
665
|
+
.sort((a, b) => a.name.localeCompare(b.name));
|
|
666
|
+
for (const f of qcFiles.slice(0, 5)) {
|
|
667
|
+
try {
|
|
668
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
669
|
+
const results = d.results;
|
|
670
|
+
if (!results) continue;
|
|
671
|
+
|
|
672
|
+
// Shape 3a (资管): nested rule-keyed map
|
|
673
|
+
// {results: {<rid>: {<doc_id>: {verdict, ...}}}}
|
|
674
|
+
if (typeof results === "object" && !Array.isArray(results)) {
|
|
675
|
+
for (const [rid, docs] of Object.entries(results)) {
|
|
676
|
+
if (!isRuleId(rid) || !docs || typeof docs !== "object") continue;
|
|
677
|
+
for (const r of Object.values(docs)) {
|
|
678
|
+
if (!r || typeof r !== "object") continue;
|
|
679
|
+
const verdict = (r.verdict || "").toString().toUpperCase();
|
|
680
|
+
if (verdict === "PASS") bump(rid, "pass");
|
|
681
|
+
else if (verdict === "FAIL") bump(rid, "fail");
|
|
682
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA" || verdict === "WARNING") bump(rid, "na");
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
686
|
+
}
|
|
687
|
+
// Shape 3b (贷款): per-doc rollup list with failed_rules
|
|
688
|
+
// {results: [{filename, actual, correct, failed_rules: [...]}], total_tested: N}
|
|
689
|
+
// For each rule: failures counted from failed_rules union; passes
|
|
690
|
+
// inferred as (total_tested - failures) for rules that appear in the catalog.
|
|
691
|
+
else if (Array.isArray(results)) {
|
|
692
|
+
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
693
|
+
let catalogRules = [];
|
|
694
|
+
try {
|
|
695
|
+
const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
696
|
+
const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
|
|
697
|
+
catalogRules = list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x));
|
|
698
|
+
} catch { /* catalog optional */ }
|
|
699
|
+
|
|
700
|
+
const failCountByRule = new Map();
|
|
701
|
+
let docCount = 0;
|
|
702
|
+
for (const row of results) {
|
|
703
|
+
if (!row || typeof row !== "object") continue;
|
|
704
|
+
docCount += 1;
|
|
705
|
+
const failed = Array.isArray(row.failed_rules) ? row.failed_rules : [];
|
|
706
|
+
for (const rid of failed) {
|
|
707
|
+
if (!isRuleId(rid)) continue;
|
|
708
|
+
failCountByRule.set(rid, (failCountByRule.get(rid) || 0) + 1);
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
if (docCount > 0) {
|
|
712
|
+
const ruleSet = new Set([...catalogRules, ...failCountByRule.keys()]);
|
|
713
|
+
for (const rid of ruleSet) {
|
|
714
|
+
const fails = failCountByRule.get(rid) || 0;
|
|
715
|
+
const passes = Math.max(0, docCount - fails);
|
|
716
|
+
const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
|
|
717
|
+
t.pass += passes; t.fail += fails; t.n += docCount;
|
|
718
|
+
tally.set(rid, t);
|
|
719
|
+
}
|
|
720
|
+
if (tally.size > 0) sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
} catch { /* try next file */ }
|
|
724
|
+
if (tally.size > 0) break;
|
|
725
|
+
}
|
|
726
|
+
}
|
|
727
|
+
|
|
728
|
+
// 4) v0.8.1 P9-A: top-level fail_by_rule + pass_by_rule maps (贷款
|
|
729
|
+
// v0.8 production_qc_report.json shape). Direct per-rule counts —
|
|
730
|
+
// no per-doc rollup, no verdict literals to scan.
|
|
731
|
+
// {accuracy, total_checks, fail_by_rule: {<rid>: N}, pass_by_rule: {<rid>: N}}
|
|
732
|
+
if (tally.size === 0) {
|
|
733
|
+
for (const f of files) {
|
|
734
|
+
if (!/qc|prod|report|result/i.test(f.name)) continue;
|
|
735
|
+
try {
|
|
736
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
737
|
+
const failMap = d?.fail_by_rule;
|
|
738
|
+
const passMap = d?.pass_by_rule;
|
|
739
|
+
if (
|
|
740
|
+
failMap && typeof failMap === "object" && !Array.isArray(failMap) &&
|
|
741
|
+
passMap && typeof passMap === "object" && !Array.isArray(passMap)
|
|
742
|
+
) {
|
|
743
|
+
const allRules = new Set([...Object.keys(failMap), ...Object.keys(passMap)]);
|
|
744
|
+
let matched = false;
|
|
745
|
+
for (const rid of allRules) {
|
|
746
|
+
if (!isRuleId(rid)) continue;
|
|
747
|
+
const fails = Number(failMap[rid]) || 0;
|
|
748
|
+
const passes = Number(passMap[rid]) || 0;
|
|
749
|
+
if (fails + passes === 0) continue;
|
|
750
|
+
const t = tally.get(rid) || { pass: 0, fail: 0, na: 0, n: 0 };
|
|
751
|
+
t.pass += passes;
|
|
752
|
+
t.fail += fails;
|
|
753
|
+
t.n += passes + fails;
|
|
754
|
+
tally.set(rid, t);
|
|
755
|
+
matched = true;
|
|
756
|
+
}
|
|
757
|
+
if (matched) {
|
|
758
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
759
|
+
break;
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
} catch { /* skip non-JSON */ }
|
|
763
|
+
}
|
|
764
|
+
}
|
|
765
|
+
|
|
766
|
+
// 5) v0.8.2 P13-A: doc-keyed → rules-keyed nested shape.
|
|
767
|
+
// 贷款 v0.8.1 wrote skill_test_v*_results.json + v2_hybrid_results.json
|
|
768
|
+
// + run_all_checks.json all with this shape:
|
|
769
|
+
// {
|
|
770
|
+
// "<doc_filename>": {
|
|
771
|
+
// "channel": "...", "expected": "PASS"|"FAIL",
|
|
772
|
+
// "rules": {
|
|
773
|
+
// "R01": {"rule_id": "R01", "verdict": "PASS", "confidence": 0.95, "method": "regex"},
|
|
774
|
+
// "R02": {...}
|
|
775
|
+
// }
|
|
776
|
+
// },
|
|
777
|
+
// ...
|
|
778
|
+
// }
|
|
779
|
+
// The optional outer "results" wrapper from v2_full_regression.json
|
|
780
|
+
// (which nests this further) is unwrapped via d.results || d.
|
|
781
|
+
if (tally.size === 0) {
|
|
782
|
+
for (const f of files) {
|
|
783
|
+
if (!/qc|verdict|result|test/i.test(f.name)) continue;
|
|
784
|
+
try {
|
|
785
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
786
|
+
const root = d?.results || d;
|
|
787
|
+
if (!root || typeof root !== "object" || Array.isArray(root)) continue;
|
|
788
|
+
let matched = false;
|
|
789
|
+
for (const docKey of Object.keys(root)) {
|
|
790
|
+
const docEntry = root[docKey];
|
|
791
|
+
if (!docEntry || typeof docEntry !== "object") continue;
|
|
792
|
+
const rulesMap = docEntry.rules;
|
|
793
|
+
if (!rulesMap || typeof rulesMap !== "object" || Array.isArray(rulesMap)) continue;
|
|
794
|
+
for (const rid of Object.keys(rulesMap)) {
|
|
795
|
+
if (!isRuleId(rid)) continue;
|
|
796
|
+
const r = rulesMap[rid];
|
|
797
|
+
if (!r || typeof r !== "object") continue;
|
|
798
|
+
const verdict = (r.verdict || r.result_type || r.status || "").toString().toUpperCase();
|
|
799
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
800
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
801
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
if (matched) {
|
|
805
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
806
|
+
break;
|
|
807
|
+
}
|
|
808
|
+
} catch { /* skip non-JSON */ }
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
// 6) v0.8.3 P22-B6: top-level array of {doc_id, results: [{rule_id, status}]}.
|
|
813
|
+
// 资管 v0.8.2 wrote `output/skill_test_v*.json` + `workflow_v*_results.json`
|
|
814
|
+
// + `evolution_round*.json` all with this shape:
|
|
815
|
+
// [
|
|
816
|
+
// {
|
|
817
|
+
// "doc_id": "<doc-filename>",
|
|
818
|
+
// "results": [
|
|
819
|
+
// {"rule_id": "R01-01", "status": "WARNING", "found_fields": {...}},
|
|
820
|
+
// {"rule_id": "R01-02", "status": "PASS", ...},
|
|
821
|
+
// ...
|
|
822
|
+
// ]
|
|
823
|
+
// },
|
|
824
|
+
// ...
|
|
825
|
+
// ]
|
|
826
|
+
// Distinct from Shape 5: top-level is an ARRAY (not object), and the
|
|
827
|
+
// per-rule data lives in `results: [...]` (an array of rule outcomes)
|
|
828
|
+
// rather than `rules: {<rule>: ...}` (object keyed by rule).
|
|
829
|
+
if (tally.size === 0) {
|
|
830
|
+
for (const f of files) {
|
|
831
|
+
if (!/qc|verdict|result|test|evolution|workflow/i.test(f.name)) continue;
|
|
832
|
+
try {
|
|
833
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
834
|
+
if (!Array.isArray(d)) continue;
|
|
835
|
+
let matched = false;
|
|
836
|
+
for (const docEntry of d) {
|
|
837
|
+
if (!docEntry || typeof docEntry !== "object") continue;
|
|
838
|
+
const results = docEntry.results;
|
|
839
|
+
if (!Array.isArray(results)) continue;
|
|
840
|
+
for (const r of results) {
|
|
841
|
+
if (!r || typeof r !== "object") continue;
|
|
842
|
+
const rid = r.rule_id || r.ruleId || r.id;
|
|
843
|
+
if (!isRuleId(rid)) continue;
|
|
844
|
+
const verdict = (r.status || r.verdict || r.result_type || "").toString().toUpperCase();
|
|
845
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
846
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
847
|
+
else if (verdict === "WARNING") { bump(rid, "pass"); matched = true; } // WARNING counts as pass (per existing shape conventions)
|
|
848
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
849
|
+
}
|
|
850
|
+
}
|
|
851
|
+
if (matched) {
|
|
852
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path));
|
|
853
|
+
break;
|
|
854
|
+
}
|
|
855
|
+
} catch { /* skip non-JSON */ }
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
// 7) Fallback (belt-and-suspenders per v0.8 plan Risk #7):
|
|
860
|
+
// walk any output/*.json with a top-level rule_id-keyed shape that has
|
|
861
|
+
// verdict-like leaf objects. Catches future schema drift before the
|
|
862
|
+
// next audit cycle.
|
|
863
|
+
if (tally.size === 0) {
|
|
864
|
+
for (const f of files) {
|
|
865
|
+
if (!/qc|verdict|result/i.test(f.name)) continue;
|
|
866
|
+
try {
|
|
867
|
+
const d = JSON.parse(fs.readFileSync(f.path, "utf-8"));
|
|
868
|
+
const root = d?.results || d;
|
|
869
|
+
if (!root || typeof root !== "object" || Array.isArray(root)) continue;
|
|
870
|
+
let matched = false;
|
|
871
|
+
for (const [rid, val] of Object.entries(root)) {
|
|
872
|
+
if (!isRuleId(rid) || !val || typeof val !== "object") continue;
|
|
873
|
+
// val might be {verdict, ...} OR {<doc>: {verdict, ...}}
|
|
874
|
+
const probe = val.verdict ? [val] : Object.values(val);
|
|
875
|
+
for (const r of probe) {
|
|
876
|
+
if (!r || typeof r !== "object") continue;
|
|
877
|
+
const verdict = (r.verdict || "").toString().toUpperCase();
|
|
878
|
+
if (verdict === "PASS") { bump(rid, "pass"); matched = true; }
|
|
879
|
+
else if (verdict === "FAIL") { bump(rid, "fail"); matched = true; }
|
|
880
|
+
else if (verdict === "NOT_APPLICABLE" || verdict === "NA") { bump(rid, "na"); matched = true; }
|
|
881
|
+
}
|
|
882
|
+
}
|
|
883
|
+
if (matched) {
|
|
884
|
+
sourceFiles.push(path.relative(this._workspace.cwd, f.path) + " (fallback shape)");
|
|
885
|
+
break;
|
|
886
|
+
}
|
|
887
|
+
} catch { /* skip non-JSON */ }
|
|
888
|
+
}
|
|
889
|
+
}
|
|
890
|
+
|
|
579
891
|
if (tally.size === 0) return null;
|
|
580
892
|
|
|
893
|
+
// v0.8.1 P9-D: filter tally to rule_ids in the current catalog.
|
|
894
|
+
// E2E #11 资管 v0.8 audit: confidence_calibration aggregated from
|
|
895
|
+
// an abandoned 39-rule pipeline included only 2 of 4 final samples.
|
|
896
|
+
// Filtering to catalog.json keeps the calibration scoped to the
|
|
897
|
+
// rules that actually ship in the release.
|
|
898
|
+
let catalogRuleIds = null;
|
|
899
|
+
try {
|
|
900
|
+
const catalogPath = path.join(this._workspace.cwd, "rules", "catalog.json");
|
|
901
|
+
if (fs.existsSync(catalogPath)) {
|
|
902
|
+
const cat = JSON.parse(fs.readFileSync(catalogPath, "utf-8"));
|
|
903
|
+
const list = Array.isArray(cat) ? cat : Array.isArray(cat?.rules) ? cat.rules : [];
|
|
904
|
+
catalogRuleIds = new Set(
|
|
905
|
+
list.map((r) => r?.id || r?.rule_id).filter((x) => isRuleId(x))
|
|
906
|
+
);
|
|
907
|
+
if (catalogRuleIds.size === 0) catalogRuleIds = null;
|
|
908
|
+
}
|
|
909
|
+
} catch { /* skip filter if catalog missing/malformed */ }
|
|
910
|
+
|
|
581
911
|
const historical_accuracy = {};
|
|
912
|
+
const droppedRules = [];
|
|
582
913
|
for (const [rid, t] of tally.entries()) {
|
|
914
|
+
if (catalogRuleIds && !catalogRuleIds.has(rid)) {
|
|
915
|
+
droppedRules.push(rid);
|
|
916
|
+
continue;
|
|
917
|
+
}
|
|
583
918
|
const fired = t.pass + t.fail;
|
|
584
919
|
historical_accuracy[rid] = {
|
|
585
920
|
pass_rate: fired > 0 ? +(t.pass / fired).toFixed(4) : null,
|
|
@@ -593,6 +928,7 @@ export class ReleaseTool extends BaseTool {
|
|
|
593
928
|
historical_accuracy,
|
|
594
929
|
computed_at: new Date().toISOString(),
|
|
595
930
|
source_files: sourceFiles,
|
|
931
|
+
...(droppedRules.length > 0 ? { dropped_off_catalog: droppedRules } : {}),
|
|
596
932
|
};
|
|
597
933
|
}
|
|
598
934
|
|