kc-beta 0.7.2 → 0.7.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -8
- package/bin/kc-beta.js +20 -6
- package/package.json +1 -1
- package/src/agent/engine.js +138 -55
- package/src/agent/pipelines/_milestone-derive.js +140 -4
- package/src/agent/pipelines/initializer.js +4 -1
- package/src/agent/skill-loader.js +433 -111
- package/src/agent/tools/consult-skill.js +112 -0
- package/src/agent/tools/copy-to-workspace.js +18 -12
- package/src/agent/tools/release.js +128 -1
- package/src/agent/tools/sandbox-exec.js +4 -1
- package/src/agent/tools/task-board.js +194 -0
- package/src/agent/tools/workspace-file.js +57 -43
- package/src/config.js +6 -4
- package/template/AGENT.md +182 -7
- package/template/skills/en/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/compliance-judgment/SKILL.md +1 -0
- package/template/skills/en/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/en/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/en/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/document-chunking/SKILL.md +1 -0
- package/template/skills/en/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
- package/template/skills/{zh/meta → en}/entity-extraction/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +60 -0
- package/template/skills/en/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
- package/template/skills/en/skill-creator/SKILL.md +2 -1
- package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/SKILL.md +5 -4
- package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/en/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/en/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +37 -2
- package/template/skills/phase_skills.yaml +107 -0
- package/template/skills/zh/{meta-meta/auto-model-selection → auto-model-selection}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/bootstrap-workspace → bootstrap-workspace}/SKILL.md +1 -0
- package/template/skills/{en/meta → zh}/compliance-judgment/SKILL.md +1 -0
- package/template/skills/zh/{meta/confidence-system → confidence-system}/SKILL.md +1 -0
- package/template/skills/zh/{meta/corner-case-management → corner-case-management}/SKILL.md +1 -0
- package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/SKILL.md +1 -0
- package/template/skills/zh/{meta/data-sensibility → data-sensibility}/SKILL.md +1 -0
- package/template/skills/{en/meta → zh}/document-chunking/SKILL.md +1 -0
- package/template/skills/zh/{meta/document-parsing → document-parsing}/SKILL.md +1 -0
- package/template/skills/{en/meta → zh}/entity-extraction/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/quality-control → quality-control}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/SKILL.md +48 -0
- package/template/skills/zh/{meta-meta/rule-graph → rule-graph}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/SKILL.md +1 -0
- package/template/skills/zh/skill-creator/SKILL.md +2 -1
- package/template/skills/zh/skill-to-workflow/SKILL.md +190 -0
- package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/SKILL.md +1 -0
- package/template/skills/zh/{meta/tree-processing → tree-processing}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/version-control → version-control}/SKILL.md +1 -0
- package/template/skills/zh/{meta-meta/work-decomposition → work-decomposition}/SKILL.md +37 -2
- package/template/CLAUDE.md +0 -137
- package/template/skills/zh/meta-meta/skill-to-workflow/SKILL.md +0 -188
- /package/template/skills/en/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/en/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/en/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/en/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/en/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/en/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/en/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/en/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/en/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/en/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
- /package/template/skills/zh/{meta/compliance-judgment → compliance-judgment}/references/output-format.md +0 -0
- /package/template/skills/zh/{meta/cross-document-verification → cross-document-verification}/references/contradiction-taxonomy.md +0 -0
- /package/template/skills/zh/{meta-meta/dashboard-reporting → dashboard-reporting}/scripts/generate_dashboard.py +0 -0
- /package/template/skills/zh/{meta/document-parsing → document-parsing}/references/parser-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/evolution-loop → evolution-loop}/references/convergence-guide.md +0 -0
- /package/template/skills/zh/{meta-meta/pdf-review-dashboard → pdf-review-dashboard}/scripts/generate_review.js +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/qa-layers.md +0 -0
- /package/template/skills/zh/{meta-meta/quality-control → quality-control}/references/sampling-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/rule-extraction → rule-extraction}/references/chunking-strategies.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-authoring → skill-authoring}/references/skill-format-spec.md +0 -0
- /package/template/skills/zh/{meta-meta/skill-to-workflow → skill-to-workflow}/references/worker-llm-catalog.md +0 -0
- /package/template/skills/zh/{meta-meta/task-decomposition → task-decomposition}/references/decision-matrix.md +0 -0
- /package/template/skills/zh/{meta-meta/version-control → version-control}/references/trace-id-spec.md +0 -0
package/README.md
CHANGED
|
@@ -216,28 +216,41 @@ Quality Thresholds, Language.
|
|
|
216
216
|
|
|
217
217
|
## Status
|
|
218
218
|
|
|
219
|
-
**v0.
|
|
219
|
+
**v0.7.4 — phase-control fix + codex review re-attempt.** Architectural
|
|
220
|
+
payload from v0.6.0+ is still in place:
|
|
220
221
|
|
|
221
222
|
- Parallel ralph-loop (up to 8 concurrent workers) with a heap-safety
|
|
222
223
|
conformance gate
|
|
223
224
|
- Native chunker + RAG (onion-peeler + CJK bigram keyword index +
|
|
224
225
|
one-shot LLM bundle classifier, ported from the AMC verification app)
|
|
225
|
-
-
|
|
226
|
-
|
|
226
|
+
- Agent-owned task board: the agent reads the rule list from
|
|
227
|
+
`describeState`, decides decomposition (per-rule / grouped / range),
|
|
228
|
+
and calls `TaskCreate` / `TaskUpdate` / `TaskComplete` to drive the
|
|
229
|
+
Ralph loop **within the current phase only** (v0.7.4). Source-context
|
|
230
|
+
auto-attach pulls rule NL + evidence chunks + sibling rules into each
|
|
231
|
+
task's prompt.
|
|
232
|
+
- Phase boundaries = user checkpoints: the Ralph loop exits at every
|
|
233
|
+
phase transition, returning control to the user. The engine doesn't
|
|
234
|
+
auto-advance; phase advance is explicit (agent's `phase_advance` tool
|
|
235
|
+
call or user re-prompt). Marathon-style end-to-end autonomy lives
|
|
236
|
+
outside the engine.
|
|
227
237
|
- Workspace file locking for shared coordination files (`rules/catalog.json`,
|
|
228
|
-
`rules/manifest.json`, `
|
|
238
|
+
`rules/manifest.json`, `refs/manifest.json`, `tasks.json`,
|
|
239
|
+
`session-state.json`) — every writer goes through `withFileLock`.
|
|
229
240
|
- `agent_tool` gets `wait` / `poll` / `list` / `kill` operations +
|
|
230
241
|
`stale_subagents` phase-advance signal
|
|
231
|
-
-
|
|
242
|
+
- FINALIZATION phase packages the session into a shippable deliverable
|
|
232
243
|
(canonical `rule_skills/` layout + README + coverage report + final
|
|
233
244
|
dashboard)
|
|
245
|
+
- Filesystem-derived phase milestones (v0.7.0+): the engine reads disk
|
|
246
|
+
artifacts for advance criteria, never trusts tool-call assertions
|
|
234
247
|
- Input stays active during streaming (type-ahead queue), arrow keys +
|
|
235
248
|
history recall, CTX smoothing + peak, per-provider context-limit caps,
|
|
236
249
|
`/tools`, `/parallelism`, and more
|
|
237
250
|
|
|
238
|
-
See [DEV_LOG.md](./DEV_LOG.md) for the
|
|
239
|
-
[docs/
|
|
240
|
-
|
|
251
|
+
See [DEV_LOG.md](./DEV_LOG.md) for the per-release change breakdowns and
|
|
252
|
+
[docs/update_design_v7.md](./docs/update_design_v7.md) for the v0.7.x
|
|
253
|
+
plan and patch notes.
|
|
241
254
|
|
|
242
255
|
Bug reports and PRs welcome at <https://github.com/kitchen-engineer42/kc-cli>.
|
|
243
256
|
|
package/bin/kc-beta.js
CHANGED
|
@@ -34,17 +34,31 @@ if (parallelismOverride !== null) {
|
|
|
34
34
|
// their own output.
|
|
35
35
|
const __filename = fileURLToPath(import.meta.url);
|
|
36
36
|
const __dirname = dirname(__filename);
|
|
37
|
-
function
|
|
37
|
+
function readPkgVersion() {
|
|
38
38
|
try {
|
|
39
39
|
const pkg = JSON.parse(readFileSync(resolve(__dirname, "..", "package.json"), "utf-8"));
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
40
|
+
return pkg.version || "unknown";
|
|
41
|
+
} catch { return "unknown"; }
|
|
42
|
+
}
|
|
43
|
+
function printBanner() {
|
|
44
|
+
try {
|
|
45
|
+
const v = readPkgVersion();
|
|
46
|
+
process.stderr.write(`⏵⏵ KC Agent CLI v${v} · ${__filename}\n`);
|
|
47
|
+
} catch { /* silent */ }
|
|
43
48
|
}
|
|
44
|
-
const
|
|
45
|
-
|
|
49
|
+
const isVersion = args.includes("--version") || args.includes("-v");
|
|
50
|
+
const isHelp = args.includes("--help") || args.includes("-h");
|
|
51
|
+
const suppressBanner = isVersion || isHelp;
|
|
46
52
|
if (!suppressBanner) printBanner();
|
|
47
53
|
|
|
54
|
+
// v0.7.5 G-F1: `--version` prints version and exits. Previously the flag
|
|
55
|
+
// suppressed the banner but fell through to TUI launch (audit confirmed
|
|
56
|
+
// during v0.7.4 testing). Print + exit before the subcommand dispatch.
|
|
57
|
+
if (isVersion) {
|
|
58
|
+
process.stdout.write(`${readPkgVersion()}\n`);
|
|
59
|
+
process.exit(0);
|
|
60
|
+
}
|
|
61
|
+
|
|
48
62
|
(async () => {
|
|
49
63
|
if (subcommand === "onboard" || subcommand === "setup") {
|
|
50
64
|
const { onboard } = await import("../src/cli/onboard.js");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kc-beta",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.5",
|
|
4
4
|
"description": "KC Agent — LLM document verification agent (pure Node.js CLI). Dual-licensed: PolyForm Noncommercial 1.0.0 for personal/noncommercial use; commercial license required for enterprise production. See LICENSE and LICENSE-COMMERCIAL.md.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
package/src/agent/engine.js
CHANGED
|
@@ -37,6 +37,8 @@ import { EvolutionCycleTool } from "./tools/evolution-cycle.js";
|
|
|
37
37
|
import { TierDowngradeTool } from "./tools/tier-downgrade.js";
|
|
38
38
|
import { AgentTool } from "./tools/agent-tool.js";
|
|
39
39
|
import { WebSearchTool } from "./tools/web-search.js";
|
|
40
|
+
import { TaskCreateTool, TaskUpdateTool, TaskCompleteTool } from "./tools/task-board.js";
|
|
41
|
+
import { ConsultSkillTool } from "./tools/consult-skill.js";
|
|
40
42
|
import { SkillLoader } from "./skill-loader.js";
|
|
41
43
|
import { TaskManager } from "./task-manager.js";
|
|
42
44
|
import { Scheduler } from "./scheduler.js";
|
|
@@ -233,6 +235,18 @@ export class AgentEngine {
|
|
|
233
235
|
// Skill discovery (Claude Code pattern: index in context, full content on demand)
|
|
234
236
|
this._skillLoader = new SkillLoader(config.language);
|
|
235
237
|
|
|
238
|
+
// v0.7.5 G-D1: populate <workspace>/skills/ with the initial phase's
|
|
239
|
+
// available skill set. Symlink with copy fallback. Re-populated on
|
|
240
|
+
// every phase advance/retreat (see _advancePhase).
|
|
241
|
+
try {
|
|
242
|
+
const res = this._skillLoader.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
|
|
243
|
+
this.eventLog?.append?.("skills_populated", {
|
|
244
|
+
phase: res.phase,
|
|
245
|
+
populated: res.populated,
|
|
246
|
+
failures: res.failures,
|
|
247
|
+
});
|
|
248
|
+
} catch { /* best-effort; skills/ population is not a critical-path failure */ }
|
|
249
|
+
|
|
236
250
|
// Register tools for initial phase
|
|
237
251
|
this.toolRegistry = new ToolRegistry();
|
|
238
252
|
this._registerToolsForPhase(this.currentPhase);
|
|
@@ -475,6 +489,27 @@ export class AgentEngine {
|
|
|
475
489
|
() => this.currentPhase,
|
|
476
490
|
),
|
|
477
491
|
new WebSearchTool(this.config.tavilyApiKey),
|
|
492
|
+
// v0.7.4 (re-applied from v0.7.3 G2b): TaskCreate /
|
|
493
|
+
// TaskUpdate / TaskComplete — agent populates the
|
|
494
|
+
// Ralph-loop queue for the CURRENT phase only. Phase
|
|
495
|
+
// boundaries exit the loop (v0.7.4 G0c). Skipped for
|
|
496
|
+
// subagents (taskManager null in subagent scope).
|
|
497
|
+
...(this.taskManager ? [
|
|
498
|
+
new TaskCreateTool(this.workspace, this.taskManager),
|
|
499
|
+
new TaskUpdateTool(this.workspace, this.taskManager),
|
|
500
|
+
new TaskCompleteTool(this.workspace, this.taskManager),
|
|
501
|
+
] : []),
|
|
502
|
+
// v0.7.5: consult_skill loads a meta-skill body into conversation
|
|
503
|
+
// history on demand. Always-loaded skills are already in the
|
|
504
|
+
// system prompt via SkillLoader.formatForContext; this tool covers
|
|
505
|
+
// the "available" set for the current phase. Both main + subagents
|
|
506
|
+
// register their own — each has its own skillLoader + phase.
|
|
507
|
+
new ConsultSkillTool(
|
|
508
|
+
this.workspace,
|
|
509
|
+
this._skillLoader,
|
|
510
|
+
() => this.currentPhase,
|
|
511
|
+
this.eventLog,
|
|
512
|
+
),
|
|
478
513
|
],
|
|
479
514
|
// Distillation+ only (DISTILL mode)
|
|
480
515
|
distill: [
|
|
@@ -1185,11 +1220,11 @@ export class AgentEngine {
|
|
|
1185
1220
|
}
|
|
1186
1221
|
this._totalTurns = (this._totalTurns || 0) + 1;
|
|
1187
1222
|
|
|
1188
|
-
//
|
|
1189
|
-
//
|
|
1190
|
-
// tool
|
|
1191
|
-
|
|
1192
|
-
|
|
1223
|
+
// v0.7.4 G0b: removed `_maybeAutoAdvance()` auto-fire here.
|
|
1224
|
+
// Phase advance is now 100% explicit (agent's `phase_advance`
|
|
1225
|
+
// tool, or user re-prompt). v0.7.3 phase-control regression
|
|
1226
|
+
// was caused by this edge-triggered auto-advance firing mid-
|
|
1227
|
+
// session and chaining into next phase without user check-in.
|
|
1193
1228
|
|
|
1194
1229
|
this.eventLog.append("turn_complete", {});
|
|
1195
1230
|
this.saveState();
|
|
@@ -1278,23 +1313,45 @@ export class AgentEngine {
|
|
|
1278
1313
|
|
|
1279
1314
|
this.eventLog.append("tool_result", {
|
|
1280
1315
|
name: tc.name,
|
|
1316
|
+
input: inputData,
|
|
1281
1317
|
output: result.content || "",
|
|
1282
1318
|
isError: result.isError,
|
|
1283
1319
|
traceId: offload?.traceId || null,
|
|
1284
1320
|
});
|
|
1285
1321
|
|
|
1286
|
-
//
|
|
1287
|
-
//
|
|
1288
|
-
//
|
|
1289
|
-
//
|
|
1290
|
-
|
|
1322
|
+
// v0.7.5 (G-F4): added `input` above so events.jsonl carries the
|
|
1323
|
+
// tool inputs (v0.7.4 G1c only patched the AgentEvent yield path,
|
|
1324
|
+
// missed the persistence path — audit confirmed 0/453 + 0/946
|
|
1325
|
+
// tool_result events had `input` in v0.7.4 sessions).
|
|
1326
|
+
|
|
1327
|
+
// D3a: trace skill invocations. v0.7.5 (G-C6): only fire on
|
|
1328
|
+
// READS of meta-skill paths. Writes to rule_skills/<id>/SKILL.md
|
|
1329
|
+
// during skill_authoring are NOT skill invocations — they're the
|
|
1330
|
+
// agent producing its own deliverable. The old "(unknown)" spam
|
|
1331
|
+
// (100% of events in v0.7.1 + v0.7.4 sessions) is gone.
|
|
1332
|
+
//
|
|
1333
|
+
// Note: meta-skill body reads now happen via consult_skill, which
|
|
1334
|
+
// emits skill_invoked itself (with the real skill name). This
|
|
1335
|
+
// path-matching emission stays only as a fallback for any agent
|
|
1336
|
+
// that reads a SKILL.md path directly (out of pattern).
|
|
1291
1337
|
try {
|
|
1338
|
+
const isRead =
|
|
1339
|
+
(tc.name === "workspace_file" && inputData?.operation === "read") ||
|
|
1340
|
+
(tc.name === "sandbox_exec" && /\b(cat|head|tail|less|grep|view|read)\b/.test(
|
|
1341
|
+
String(inputData?.command || "")
|
|
1342
|
+
));
|
|
1292
1343
|
if (
|
|
1293
1344
|
!result.isError &&
|
|
1345
|
+
isRead &&
|
|
1294
1346
|
(tc.name === "workspace_file" || tc.name === "sandbox_exec")
|
|
1295
1347
|
) {
|
|
1296
1348
|
const p = String(inputData?.path || inputData?.command || "");
|
|
1297
|
-
|
|
1349
|
+
// v0.7.5 flat layout: skills/<name>/SKILL.md (workspace scope)
|
|
1350
|
+
// OR template/skills/<lang>/<name>/SKILL.md (template scope, rare)
|
|
1351
|
+
// Deep layout backward-compat preserved for any stragglers.
|
|
1352
|
+
const skillMatch = p.match(
|
|
1353
|
+
/(?:template\/)?skills\/(?:[a-z]+\/)?(?:(?:meta-meta|meta|skill-creator)\/)?([a-zA-Z0-9_-]+)\/SKILL\.md\b/
|
|
1354
|
+
) || p.match(/\bSKILL\.md\b/);
|
|
1298
1355
|
if (skillMatch) {
|
|
1299
1356
|
const skillName = skillMatch[1] || "(unknown)";
|
|
1300
1357
|
this.eventLog.append("skill_invoked", {
|
|
@@ -1308,6 +1365,7 @@ export class AgentEngine {
|
|
|
1308
1365
|
yield new AgentEvent({
|
|
1309
1366
|
type: "tool_result",
|
|
1310
1367
|
name: tc.name,
|
|
1368
|
+
input: inputData,
|
|
1311
1369
|
output: historyContent,
|
|
1312
1370
|
isError: result.isError,
|
|
1313
1371
|
});
|
|
@@ -1374,12 +1432,9 @@ export class AgentEngine {
|
|
|
1374
1432
|
}
|
|
1375
1433
|
}
|
|
1376
1434
|
|
|
1377
|
-
//
|
|
1378
|
-
//
|
|
1379
|
-
//
|
|
1380
|
-
// just need to act on it eagerly.
|
|
1381
|
-
const ev = this._maybeAutoAdvance();
|
|
1382
|
-
if (ev) yield ev;
|
|
1435
|
+
// v0.7.4 G0b: removed post-tool `_maybeAutoAdvance()` call.
|
|
1436
|
+
// Phase advance is now 100% explicit. See `_runTaskLoopSerial`
|
|
1437
|
+
// phase-change-exit guard for the loop-level checkpoint.
|
|
1383
1438
|
|
|
1384
1439
|
} catch (err) {
|
|
1385
1440
|
// A8: If the LLM client tagged the stream termination reason, pass
|
|
@@ -1529,6 +1584,20 @@ export class AgentEngine {
|
|
|
1529
1584
|
this.workspace.setPhase(this.currentPhase);
|
|
1530
1585
|
this._createTasksForPhase(this.currentPhase);
|
|
1531
1586
|
|
|
1587
|
+
// v0.7.5 G-D2: re-populate <workspace>/skills/ with the new phase's
|
|
1588
|
+
// available set. Symlinks are wiped + recreated. Agent's `ls skills/`
|
|
1589
|
+
// and any read-by-path reflects the current phase's allowlist.
|
|
1590
|
+
try {
|
|
1591
|
+
const res = this._skillLoader?.populateWorkspaceSkills(this.workspace.cwd, this.currentPhase);
|
|
1592
|
+
if (res) {
|
|
1593
|
+
this.eventLog.append("skills_populated", {
|
|
1594
|
+
phase: res.phase,
|
|
1595
|
+
populated: res.populated,
|
|
1596
|
+
failures: res.failures,
|
|
1597
|
+
});
|
|
1598
|
+
}
|
|
1599
|
+
} catch { /* best-effort */ }
|
|
1600
|
+
|
|
1532
1601
|
// v0.7.0 N (#94): give the entered pipeline a chance to do
|
|
1533
1602
|
// phase-entry setup. Used by finalization to copy the release
|
|
1534
1603
|
// template into output/releases/v1/. Other pipelines are no-ops.
|
|
@@ -2097,7 +2166,26 @@ export class AgentEngine {
|
|
|
2097
2166
|
// Run the initial turn (user's request)
|
|
2098
2167
|
yield* this.runTurn(userMessage);
|
|
2099
2168
|
|
|
2100
|
-
//
|
|
2169
|
+
// v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
|
|
2170
|
+
// marathon test. The strict capture-BEFORE form lets every user
|
|
2171
|
+
// prompt advance only one phase, which blocks unattended overnight
|
|
2172
|
+
// sessions. v0.7.4-style capture-AFTER (below) allows the agent
|
|
2173
|
+
// to chain multiple phase_advance calls within the initial runTurn,
|
|
2174
|
+
// then exits the while loop on subsequent phase changes.
|
|
2175
|
+
//
|
|
2176
|
+
// TODO: after the overnight E2E results come in (2026-05-14), decide:
|
|
2177
|
+
// (a) re-enable F5 strict and build marathon as a separate mode
|
|
2178
|
+
// (external driver pattern, e.g., /loop-kc command) — locked
|
|
2179
|
+
// earlier decision per harness-research § 7
|
|
2180
|
+
// (b) keep capture-AFTER permanently and accept multi-phase prompts
|
|
2181
|
+
//
|
|
2182
|
+
// To re-enable F5: move `const startingPhase = this.currentPhase;`
|
|
2183
|
+
// to BEFORE the `yield* this.runTurn(userMessage);` above, and add
|
|
2184
|
+
// the matching `if (this.currentPhase !== startingPhase) { return; }`
|
|
2185
|
+
// block between runTurn and the while loop.
|
|
2186
|
+
const startingPhase = this.currentPhase;
|
|
2187
|
+
|
|
2188
|
+
// Auto-continue through pending tasks (within current phase only)
|
|
2101
2189
|
while (this.taskManager.getNextPending()) {
|
|
2102
2190
|
// v0.7.0 #93: budget-aware compact threshold. The old
|
|
2103
2191
|
// `messages.length > 15` was message-count-based and frozen
|
|
@@ -2158,26 +2246,20 @@ export class AgentEngine {
|
|
|
2158
2246
|
},
|
|
2159
2247
|
});
|
|
2160
2248
|
|
|
2161
|
-
//
|
|
2162
|
-
//
|
|
2163
|
-
//
|
|
2164
|
-
//
|
|
2165
|
-
|
|
2166
|
-
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2171
|
-
|
|
2172
|
-
|
|
2173
|
-
|
|
2174
|
-
|
|
2175
|
-
type: "pipeline_event",
|
|
2176
|
-
data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
|
|
2177
|
-
});
|
|
2178
|
-
}
|
|
2179
|
-
}
|
|
2180
|
-
}
|
|
2249
|
+
// v0.7.4 G0c: phase boundary = user checkpoint. Exit the
|
|
2250
|
+
// loop if the agent advanced phase during this task —
|
|
2251
|
+
// even if pre-created tasks for the new phase are queued.
|
|
2252
|
+
// User sees current state and explicitly re-prompts to
|
|
2253
|
+
// begin the next phase. Marathon-style end-to-end
|
|
2254
|
+
// autonomy belongs to an external driver (Claude Code
|
|
2255
|
+
// /loop pattern), not the engine.
|
|
2256
|
+
if (this.currentPhase !== startingPhase) {
|
|
2257
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2258
|
+
reason: "phase_changed",
|
|
2259
|
+
from: startingPhase,
|
|
2260
|
+
to: this.currentPhase,
|
|
2261
|
+
});
|
|
2262
|
+
break;
|
|
2181
2263
|
}
|
|
2182
2264
|
}
|
|
2183
2265
|
}
|
|
@@ -2203,6 +2285,12 @@ export class AgentEngine {
|
|
|
2203
2285
|
// Initial turn: main agent reads user request, creates tasks.
|
|
2204
2286
|
yield* this.runTurn(userMessage);
|
|
2205
2287
|
|
|
2288
|
+
// v0.7.5 G-F5 — TEMPORARILY DISABLED 2026-05-13 for overnight
|
|
2289
|
+
// marathon test. See _runTaskLoopSerial above for full rationale.
|
|
2290
|
+
// To re-enable F5: move `startingPhase` capture BEFORE the
|
|
2291
|
+
// initial runTurn, add post-runTurn exit check matching serial.
|
|
2292
|
+
const startingPhase = this.currentPhase;
|
|
2293
|
+
|
|
2206
2294
|
const agentTool = this._buildTools.core.find((t) => t?.name === "agent_tool");
|
|
2207
2295
|
if (!agentTool) {
|
|
2208
2296
|
// Shouldn't happen (agent_tool is core), but fall back safely.
|
|
@@ -2227,6 +2315,9 @@ export class AgentEngine {
|
|
|
2227
2315
|
const inFlight = new Map();
|
|
2228
2316
|
|
|
2229
2317
|
const dispatch = async () => {
|
|
2318
|
+
// v0.7.4 G0c: stop dispatching if phase changed since loop start.
|
|
2319
|
+
// In-flight workers complete naturally; queue stays untouched.
|
|
2320
|
+
if (this.currentPhase !== startingPhase) return;
|
|
2230
2321
|
while (inFlight.size < parallelism) {
|
|
2231
2322
|
const task = this.taskManager.claimNextPending(`pool${inFlight.size}`);
|
|
2232
2323
|
if (!task) return;
|
|
@@ -2362,23 +2453,15 @@ export class AgentEngine {
|
|
|
2362
2453
|
|
|
2363
2454
|
this.saveState();
|
|
2364
2455
|
|
|
2365
|
-
//
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
|
|
2369
|
-
|
|
2370
|
-
|
|
2371
|
-
|
|
2372
|
-
|
|
2373
|
-
|
|
2374
|
-
if (advanced) {
|
|
2375
|
-
yield new AgentEvent({
|
|
2376
|
-
type: "pipeline_event",
|
|
2377
|
-
data: { type: "phase_ready", nextPhase: next, message: "all phase tasks done; exit criteria met" },
|
|
2378
|
-
});
|
|
2379
|
-
}
|
|
2380
|
-
}
|
|
2381
|
-
}
|
|
2456
|
+
// v0.7.4 G0c: if phase changed during the parallel run, log the
|
|
2457
|
+
// checkpoint event for the audit trail. No auto-advance — that
|
|
2458
|
+
// belongs to the agent (phase_advance tool) or user re-prompt.
|
|
2459
|
+
if (this.currentPhase !== startingPhase) {
|
|
2460
|
+
this.eventLog.append("ralph_loop_exit", {
|
|
2461
|
+
reason: "phase_changed",
|
|
2462
|
+
from: startingPhase,
|
|
2463
|
+
to: this.currentPhase,
|
|
2464
|
+
});
|
|
2382
2465
|
}
|
|
2383
2466
|
}
|
|
2384
2467
|
|
|
@@ -80,6 +80,59 @@ function readJsonSafe(p) {
|
|
|
80
80
|
try { return JSON.parse(fs.readFileSync(p, "utf-8")); } catch { return null; }
|
|
81
81
|
}
|
|
82
82
|
|
|
83
|
+
function readFileSafe(p) {
|
|
84
|
+
try { return fs.readFileSync(p, "utf-8"); } catch { return ""; }
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* v0.7.5 G-H1: extract `source_rules: [...]` from YAML frontmatter.
|
|
89
|
+
*
|
|
90
|
+
* Supports both inline and block list forms:
|
|
91
|
+
* source_rules: [R001, R005, R007]
|
|
92
|
+
* source_rules:
|
|
93
|
+
* - R001
|
|
94
|
+
* - R005
|
|
95
|
+
*
|
|
96
|
+
* Used by milestone derivation to credit grouped/thematic skill folders
|
|
97
|
+
* + master workflows where the agent declares which rules are covered.
|
|
98
|
+
* Returns an array of canonical rule IDs (e.g., ["R001", "R005"]).
|
|
99
|
+
*/
|
|
100
|
+
function parseSourceRulesFromFrontmatter(content) {
|
|
101
|
+
if (!content || typeof content !== "string") return [];
|
|
102
|
+
const fmMatch = content.match(/^---\n([\s\S]*?)\n---/);
|
|
103
|
+
if (!fmMatch) return [];
|
|
104
|
+
const fm = fmMatch[1];
|
|
105
|
+
|
|
106
|
+
// Inline form: source_rules: [R001, R005, "R007"]
|
|
107
|
+
const inlineMatch = fm.match(/^source_rules\s*:\s*\[([^\]]*)\]\s*$/m);
|
|
108
|
+
if (inlineMatch) {
|
|
109
|
+
return inlineMatch[1]
|
|
110
|
+
.split(",")
|
|
111
|
+
.map(s => s.trim().replace(/^["']|["']$/g, ""))
|
|
112
|
+
.filter(Boolean)
|
|
113
|
+
.map(s => canonicalRuleId(s) || s)
|
|
114
|
+
.filter(rid => /^R\d+$/i.test(rid))
|
|
115
|
+
.map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Block form: source_rules:\n - R001\n - R005
|
|
119
|
+
const blockMatch = fm.match(/^source_rules\s*:\s*\n((?:[ \t]+-\s+\S+\s*\n?)+)/m);
|
|
120
|
+
if (blockMatch) {
|
|
121
|
+
return blockMatch[1]
|
|
122
|
+
.split("\n")
|
|
123
|
+
.map(line => {
|
|
124
|
+
const m = line.match(/^[ \t]+-\s+["']?([^"'\s]+)["']?\s*$/);
|
|
125
|
+
return m ? m[1] : null;
|
|
126
|
+
})
|
|
127
|
+
.filter(Boolean)
|
|
128
|
+
.map(s => canonicalRuleId(s) || s)
|
|
129
|
+
.filter(rid => /^R\d+$/i.test(rid))
|
|
130
|
+
.map(rid => rid.toUpperCase().replace(/^R0*(\d+)$/, (_, n) => `R${String(parseInt(n,10)).padStart(3,"0")}`));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
return [];
|
|
134
|
+
}
|
|
135
|
+
|
|
83
136
|
function sha256OfFile(p) {
|
|
84
137
|
try {
|
|
85
138
|
const buf = fs.readFileSync(p);
|
|
@@ -239,6 +292,26 @@ export function deriveSkillAuthoringMilestones(workspace) {
|
|
|
239
292
|
}
|
|
240
293
|
}
|
|
241
294
|
}
|
|
295
|
+
|
|
296
|
+
// v0.7.5 G-H1: also credit rule_ids declared in SKILL.md frontmatter
|
|
297
|
+
// `source_rules:` field. Agents using grouped/thematic skill folders
|
|
298
|
+
// (e.g., S01_compliance/, custodian_checks/) declare which rules
|
|
299
|
+
// their grouped check covers via frontmatter; engine derivation
|
|
300
|
+
// credits each declared rule_id. Audit found 资管 v0.7.4 session
|
|
301
|
+
// forced through skill_authoring → skill_testing because its 10 S*
|
|
302
|
+
// grouped folders weren't credited (rulesCovered=0/94).
|
|
303
|
+
if (hasSkillMd) {
|
|
304
|
+
try {
|
|
305
|
+
const skillMdFile = listChildFiles(skillPath).find(
|
|
306
|
+
(f) => f.name.toLowerCase() === "skill.md",
|
|
307
|
+
);
|
|
308
|
+
if (skillMdFile) {
|
|
309
|
+
const content = readFileSafe(path.join(skillPath, skillMdFile.name));
|
|
310
|
+
const sourceRules = parseSourceRulesFromFrontmatter(content);
|
|
311
|
+
for (const rid of sourceRules) ruleIdsCovered.add(rid);
|
|
312
|
+
}
|
|
313
|
+
} catch { /* best-effort */ }
|
|
314
|
+
}
|
|
242
315
|
}
|
|
243
316
|
|
|
244
317
|
return {
|
|
@@ -362,6 +435,37 @@ export function deriveDistillationMilestones(workspace) {
|
|
|
362
435
|
const cwd = cwdOf(workspace);
|
|
363
436
|
const wfRoot = path.join(cwd, "workflows");
|
|
364
437
|
const workflowsCreated = [];
|
|
438
|
+
// v0.7.5 G-H1: also track rule IDs covered by workflows. Grouped/master
|
|
439
|
+
// workflows (e.g., 贷款 v0.7.4's master + R001 template) cover multiple
|
|
440
|
+
// rules; declare them via SKILL.md frontmatter `source_rules: [...]`.
|
|
441
|
+
// Engine credits each declared rule_id so workflowsCovered milestone
|
|
442
|
+
// matches catalog reality.
|
|
443
|
+
const ruleIdsCovered = new Set();
|
|
444
|
+
|
|
445
|
+
const creditWorkflowSourceRules = (workflowDir) => {
|
|
446
|
+
// Check for a SKILL.md (or workflow.md) declaring source_rules
|
|
447
|
+
const candidates = listChildFiles(workflowDir).filter(
|
|
448
|
+
(f) => /^(skill|workflow)\.md$/i.test(f.name),
|
|
449
|
+
);
|
|
450
|
+
for (const c of candidates) {
|
|
451
|
+
const content = readFileSafe(path.join(workflowDir, c.name));
|
|
452
|
+
for (const rid of parseSourceRulesFromFrontmatter(content)) {
|
|
453
|
+
ruleIdsCovered.add(rid);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
// Also: per-workflow config.json may declare rule coverage
|
|
457
|
+
const configPath = path.join(workflowDir, "config.json");
|
|
458
|
+
if (fileExists(configPath)) {
|
|
459
|
+
const data = readJsonSafe(configPath);
|
|
460
|
+
const rules = Array.isArray(data?.source_rules) ? data.source_rules :
|
|
461
|
+
Array.isArray(data?.rules) ? data.rules :
|
|
462
|
+
Array.isArray(data?.rule_ids) ? data.rule_ids : [];
|
|
463
|
+
for (const r of rules) {
|
|
464
|
+
const canon = canonicalRuleId(String(r));
|
|
465
|
+
if (canon) ruleIdsCovered.add(canon);
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
};
|
|
365
469
|
|
|
366
470
|
if (dirExists(wfRoot)) {
|
|
367
471
|
// Two layouts seen in E2E #5:
|
|
@@ -375,16 +479,39 @@ export function deriveDistillationMilestones(workspace) {
|
|
|
375
479
|
const sub = path.join(wfRoot, e.name);
|
|
376
480
|
const hasPy = listChildFiles(sub).some((f) =>
|
|
377
481
|
/workflow.*\.py$/i.test(f.name) || /^check.*\.py$/i.test(f.name));
|
|
378
|
-
if (hasPy)
|
|
482
|
+
if (hasPy) {
|
|
483
|
+
workflowsCreated.push(e.name);
|
|
484
|
+
// Dir name might itself be a rule_id
|
|
485
|
+
const canon = canonicalRuleId(e.name);
|
|
486
|
+
if (canon) ruleIdsCovered.add(canon);
|
|
487
|
+
// Plus any frontmatter / config-declared source_rules
|
|
488
|
+
creditWorkflowSourceRules(sub);
|
|
489
|
+
}
|
|
379
490
|
continue;
|
|
380
491
|
}
|
|
381
492
|
if (e.isFile()) {
|
|
382
493
|
const m1 = e.name.match(/^(.+)_workflow\.py$/i);
|
|
383
|
-
if (m1) {
|
|
494
|
+
if (m1) {
|
|
495
|
+
workflowsCreated.push(m1[1]);
|
|
496
|
+
const canon = canonicalRuleId(m1[1]);
|
|
497
|
+
if (canon) ruleIdsCovered.add(canon);
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
384
500
|
const m2 = e.name.match(/^(.+)\.json$/i);
|
|
385
501
|
if (m2) {
|
|
386
502
|
const data = readJsonSafe(path.join(wfRoot, e.name));
|
|
387
|
-
if (data && (data.rule_id || data.entry || data.type))
|
|
503
|
+
if (data && (data.rule_id || data.entry || data.type)) {
|
|
504
|
+
workflowsCreated.push(m2[1]);
|
|
505
|
+
const canon = canonicalRuleId(data.rule_id || m2[1]);
|
|
506
|
+
if (canon) ruleIdsCovered.add(canon);
|
|
507
|
+
// Manifest-declared source_rules
|
|
508
|
+
const rules = Array.isArray(data.source_rules) ? data.source_rules :
|
|
509
|
+
Array.isArray(data.rules) ? data.rules : [];
|
|
510
|
+
for (const r of rules) {
|
|
511
|
+
const c2 = canonicalRuleId(String(r));
|
|
512
|
+
if (c2) ruleIdsCovered.add(c2);
|
|
513
|
+
}
|
|
514
|
+
}
|
|
388
515
|
continue;
|
|
389
516
|
}
|
|
390
517
|
}
|
|
@@ -408,7 +535,16 @@ export function deriveDistillationMilestones(workspace) {
|
|
|
408
535
|
}
|
|
409
536
|
}
|
|
410
537
|
|
|
411
|
-
return {
|
|
538
|
+
return {
|
|
539
|
+
workflowsCreated,
|
|
540
|
+
workflowsTested,
|
|
541
|
+
// v0.7.5 G-H1: rule_ids the engine credits as having workflow coverage
|
|
542
|
+
// (either via dir name being a canonical rule_id, or via SKILL.md /
|
|
543
|
+
// workflow.md / config.json frontmatter declaring source_rules: [...]).
|
|
544
|
+
// Pipelines that check workflow coverage against the catalog should
|
|
545
|
+
// prefer ruleIdsCovered over workflowsCreated for grouped patterns.
|
|
546
|
+
ruleIdsCovered: [...ruleIdsCovered],
|
|
547
|
+
};
|
|
412
548
|
}
|
|
413
549
|
|
|
414
550
|
// ───────────────────────────────────────────────────────────────────
|
|
@@ -9,7 +9,10 @@ import { deriveBootstrapMilestones } from "./_milestone-derive.js";
|
|
|
9
9
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
10
10
|
const AGENT_MD_TEMPLATE = path.resolve(__dirname, "../../../template/AGENT.md");
|
|
11
11
|
|
|
12
|
-
|
|
12
|
+
// v0.7.5: `skills` added to required dirs. Populated by SkillLoader
|
|
13
|
+
// .populateWorkspaceSkills() at bootstrap + on every phase transition
|
|
14
|
+
// with the phase's `available` skill set (per phase_skills.yaml).
|
|
15
|
+
const REQUIRED_DIRS = ["rules", "samples", "input", "output", "logs", "workflows", "rule_skills", "skills"];
|
|
13
16
|
|
|
14
17
|
const DEFAULT_ENV = `# === KC Agent Project Configuration ===
|
|
15
18
|
|