cap-pro 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/README.md +26 -0
- package/.claude-plugin/marketplace.json +24 -0
- package/.claude-plugin/plugin.json +24 -0
- package/LICENSE +21 -0
- package/README.ja-JP.md +834 -0
- package/README.ko-KR.md +823 -0
- package/README.md +806 -0
- package/README.pt-BR.md +452 -0
- package/README.zh-CN.md +800 -0
- package/agents/cap-architect.md +269 -0
- package/agents/cap-brainstormer.md +207 -0
- package/agents/cap-curator.md +276 -0
- package/agents/cap-debugger.md +365 -0
- package/agents/cap-designer.md +246 -0
- package/agents/cap-historian.md +464 -0
- package/agents/cap-migrator.md +291 -0
- package/agents/cap-prototyper.md +197 -0
- package/agents/cap-validator.md +308 -0
- package/bin/install.js +5433 -0
- package/cap/bin/cap-tools.cjs +853 -0
- package/cap/bin/lib/arc-scanner.cjs +344 -0
- package/cap/bin/lib/cap-affinity-engine.cjs +862 -0
- package/cap/bin/lib/cap-anchor.cjs +228 -0
- package/cap/bin/lib/cap-annotation-writer.cjs +340 -0
- package/cap/bin/lib/cap-checkpoint.cjs +434 -0
- package/cap/bin/lib/cap-cluster-detect.cjs +945 -0
- package/cap/bin/lib/cap-cluster-display.cjs +52 -0
- package/cap/bin/lib/cap-cluster-format.cjs +245 -0
- package/cap/bin/lib/cap-cluster-helpers.cjs +295 -0
- package/cap/bin/lib/cap-cluster-io.cjs +212 -0
- package/cap/bin/lib/cap-completeness.cjs +540 -0
- package/cap/bin/lib/cap-deps.cjs +583 -0
- package/cap/bin/lib/cap-design-families.cjs +332 -0
- package/cap/bin/lib/cap-design.cjs +966 -0
- package/cap/bin/lib/cap-divergence-detector.cjs +400 -0
- package/cap/bin/lib/cap-doctor.cjs +752 -0
- package/cap/bin/lib/cap-feature-map-internals.cjs +19 -0
- package/cap/bin/lib/cap-feature-map-migrate.cjs +335 -0
- package/cap/bin/lib/cap-feature-map-monorepo.cjs +885 -0
- package/cap/bin/lib/cap-feature-map-shard.cjs +315 -0
- package/cap/bin/lib/cap-feature-map.cjs +1943 -0
- package/cap/bin/lib/cap-fitness-score.cjs +1075 -0
- package/cap/bin/lib/cap-impact-analysis.cjs +652 -0
- package/cap/bin/lib/cap-learn-review.cjs +1072 -0
- package/cap/bin/lib/cap-learning-signals.cjs +627 -0
- package/cap/bin/lib/cap-loader.cjs +227 -0
- package/cap/bin/lib/cap-logger.cjs +57 -0
- package/cap/bin/lib/cap-memory-bridge.cjs +764 -0
- package/cap/bin/lib/cap-memory-confidence.cjs +452 -0
- package/cap/bin/lib/cap-memory-dir.cjs +987 -0
- package/cap/bin/lib/cap-memory-engine.cjs +698 -0
- package/cap/bin/lib/cap-memory-extends.cjs +398 -0
- package/cap/bin/lib/cap-memory-graph.cjs +790 -0
- package/cap/bin/lib/cap-memory-migrate.cjs +2015 -0
- package/cap/bin/lib/cap-memory-pin.cjs +183 -0
- package/cap/bin/lib/cap-memory-platform.cjs +490 -0
- package/cap/bin/lib/cap-memory-prune.cjs +707 -0
- package/cap/bin/lib/cap-memory-schema.cjs +812 -0
- package/cap/bin/lib/cap-migrate-tags.cjs +309 -0
- package/cap/bin/lib/cap-migrate.cjs +540 -0
- package/cap/bin/lib/cap-pattern-apply.cjs +1203 -0
- package/cap/bin/lib/cap-pattern-pipeline.cjs +1034 -0
- package/cap/bin/lib/cap-plugin-manifest.cjs +80 -0
- package/cap/bin/lib/cap-realtime-affinity.cjs +399 -0
- package/cap/bin/lib/cap-reconcile.cjs +570 -0
- package/cap/bin/lib/cap-research-gate.cjs +218 -0
- package/cap/bin/lib/cap-scope-filter.cjs +402 -0
- package/cap/bin/lib/cap-semantic-pipeline.cjs +1038 -0
- package/cap/bin/lib/cap-session-extract.cjs +987 -0
- package/cap/bin/lib/cap-session.cjs +445 -0
- package/cap/bin/lib/cap-snapshot-linkage.cjs +963 -0
- package/cap/bin/lib/cap-stack-docs.cjs +646 -0
- package/cap/bin/lib/cap-tag-observer.cjs +371 -0
- package/cap/bin/lib/cap-tag-scanner.cjs +1766 -0
- package/cap/bin/lib/cap-telemetry.cjs +466 -0
- package/cap/bin/lib/cap-test-audit.cjs +1438 -0
- package/cap/bin/lib/cap-thread-migrator.cjs +307 -0
- package/cap/bin/lib/cap-thread-synthesis.cjs +545 -0
- package/cap/bin/lib/cap-thread-tracker.cjs +519 -0
- package/cap/bin/lib/cap-trace.cjs +399 -0
- package/cap/bin/lib/cap-trust-mode.cjs +336 -0
- package/cap/bin/lib/cap-ui-design-editor.cjs +642 -0
- package/cap/bin/lib/cap-ui-mind-map.cjs +712 -0
- package/cap/bin/lib/cap-ui-thread-nav.cjs +693 -0
- package/cap/bin/lib/cap-ui.cjs +1245 -0
- package/cap/bin/lib/cap-upgrade.cjs +1028 -0
- package/cap/bin/lib/cli/arg-helpers.cjs +49 -0
- package/cap/bin/lib/cli/frontmatter-router.cjs +31 -0
- package/cap/bin/lib/cli/init-router.cjs +68 -0
- package/cap/bin/lib/cli/phase-router.cjs +102 -0
- package/cap/bin/lib/cli/state-router.cjs +61 -0
- package/cap/bin/lib/cli/template-router.cjs +37 -0
- package/cap/bin/lib/cli/uat-router.cjs +29 -0
- package/cap/bin/lib/cli/validation-router.cjs +26 -0
- package/cap/bin/lib/cli/verification-router.cjs +31 -0
- package/cap/bin/lib/cli/workstream-router.cjs +39 -0
- package/cap/bin/lib/commands.cjs +961 -0
- package/cap/bin/lib/config.cjs +467 -0
- package/cap/bin/lib/convention-reader.cjs +258 -0
- package/cap/bin/lib/core.cjs +1241 -0
- package/cap/bin/lib/feature-aggregator.cjs +423 -0
- package/cap/bin/lib/frontmatter.cjs +337 -0
- package/cap/bin/lib/init.cjs +1443 -0
- package/cap/bin/lib/manifest-generator.cjs +383 -0
- package/cap/bin/lib/milestone.cjs +253 -0
- package/cap/bin/lib/model-profiles.cjs +69 -0
- package/cap/bin/lib/monorepo-context.cjs +226 -0
- package/cap/bin/lib/monorepo-migrator.cjs +509 -0
- package/cap/bin/lib/phase.cjs +889 -0
- package/cap/bin/lib/profile-output.cjs +989 -0
- package/cap/bin/lib/profile-pipeline.cjs +540 -0
- package/cap/bin/lib/roadmap.cjs +330 -0
- package/cap/bin/lib/security.cjs +394 -0
- package/cap/bin/lib/session-manager.cjs +292 -0
- package/cap/bin/lib/skeleton-generator.cjs +179 -0
- package/cap/bin/lib/state.cjs +1032 -0
- package/cap/bin/lib/template.cjs +231 -0
- package/cap/bin/lib/test-detector.cjs +62 -0
- package/cap/bin/lib/uat.cjs +283 -0
- package/cap/bin/lib/verify.cjs +889 -0
- package/cap/bin/lib/workspace-detector.cjs +371 -0
- package/cap/bin/lib/workstream.cjs +492 -0
- package/cap/commands/gsd/workstreams.md +63 -0
- package/cap/references/arc-standard.md +315 -0
- package/cap/references/cap-agent-architecture.md +101 -0
- package/cap/references/cap-gitignore-template +9 -0
- package/cap/references/cap-zero-deps.md +158 -0
- package/cap/references/checkpoints.md +778 -0
- package/cap/references/continuation-format.md +249 -0
- package/cap/references/contract-test-templates.md +312 -0
- package/cap/references/feature-map-template.md +25 -0
- package/cap/references/git-integration.md +295 -0
- package/cap/references/git-planning-commit.md +38 -0
- package/cap/references/model-profiles.md +174 -0
- package/cap/references/phase-numbering.md +126 -0
- package/cap/references/planning-config.md +202 -0
- package/cap/references/property-test-templates.md +316 -0
- package/cap/references/security-test-templates.md +347 -0
- package/cap/references/session-template.json +8 -0
- package/cap/references/tdd.md +263 -0
- package/cap/references/user-profiling.md +681 -0
- package/cap/references/verification-patterns.md +612 -0
- package/cap/templates/UAT.md +265 -0
- package/cap/templates/claude-md.md +175 -0
- package/cap/templates/codebase/architecture.md +255 -0
- package/cap/templates/codebase/concerns.md +310 -0
- package/cap/templates/codebase/conventions.md +307 -0
- package/cap/templates/codebase/integrations.md +280 -0
- package/cap/templates/codebase/stack.md +186 -0
- package/cap/templates/codebase/structure.md +285 -0
- package/cap/templates/codebase/testing.md +480 -0
- package/cap/templates/config.json +44 -0
- package/cap/templates/context.md +352 -0
- package/cap/templates/continue-here.md +78 -0
- package/cap/templates/copilot-instructions.md +7 -0
- package/cap/templates/debug-subagent-prompt.md +91 -0
- package/cap/templates/discussion-log.md +63 -0
- package/cap/templates/milestone-archive.md +123 -0
- package/cap/templates/milestone.md +115 -0
- package/cap/templates/phase-prompt.md +610 -0
- package/cap/templates/planner-subagent-prompt.md +117 -0
- package/cap/templates/project.md +186 -0
- package/cap/templates/requirements.md +231 -0
- package/cap/templates/research-project/ARCHITECTURE.md +204 -0
- package/cap/templates/research-project/FEATURES.md +147 -0
- package/cap/templates/research-project/PITFALLS.md +200 -0
- package/cap/templates/research-project/STACK.md +120 -0
- package/cap/templates/research-project/SUMMARY.md +170 -0
- package/cap/templates/research.md +552 -0
- package/cap/templates/roadmap.md +202 -0
- package/cap/templates/state.md +176 -0
- package/cap/templates/summary.md +364 -0
- package/cap/templates/user-preferences.md +498 -0
- package/cap/templates/verification-report.md +322 -0
- package/cap/workflows/add-phase.md +112 -0
- package/cap/workflows/add-tests.md +351 -0
- package/cap/workflows/add-todo.md +158 -0
- package/cap/workflows/audit-milestone.md +340 -0
- package/cap/workflows/audit-uat.md +109 -0
- package/cap/workflows/autonomous.md +891 -0
- package/cap/workflows/check-todos.md +177 -0
- package/cap/workflows/cleanup.md +152 -0
- package/cap/workflows/complete-milestone.md +767 -0
- package/cap/workflows/diagnose-issues.md +231 -0
- package/cap/workflows/discovery-phase.md +289 -0
- package/cap/workflows/discuss-phase-assumptions.md +653 -0
- package/cap/workflows/discuss-phase.md +1049 -0
- package/cap/workflows/do.md +104 -0
- package/cap/workflows/execute-phase.md +846 -0
- package/cap/workflows/execute-plan.md +514 -0
- package/cap/workflows/fast.md +105 -0
- package/cap/workflows/forensics.md +265 -0
- package/cap/workflows/health.md +181 -0
- package/cap/workflows/help.md +660 -0
- package/cap/workflows/insert-phase.md +130 -0
- package/cap/workflows/list-phase-assumptions.md +178 -0
- package/cap/workflows/list-workspaces.md +56 -0
- package/cap/workflows/manager.md +362 -0
- package/cap/workflows/map-codebase.md +377 -0
- package/cap/workflows/milestone-summary.md +223 -0
- package/cap/workflows/new-milestone.md +486 -0
- package/cap/workflows/new-project.md +1250 -0
- package/cap/workflows/new-workspace.md +237 -0
- package/cap/workflows/next.md +97 -0
- package/cap/workflows/node-repair.md +92 -0
- package/cap/workflows/note.md +156 -0
- package/cap/workflows/pause-work.md +176 -0
- package/cap/workflows/plan-milestone-gaps.md +273 -0
- package/cap/workflows/plan-phase.md +857 -0
- package/cap/workflows/plant-seed.md +169 -0
- package/cap/workflows/pr-branch.md +129 -0
- package/cap/workflows/profile-user.md +449 -0
- package/cap/workflows/progress.md +507 -0
- package/cap/workflows/quick.md +757 -0
- package/cap/workflows/remove-phase.md +155 -0
- package/cap/workflows/remove-workspace.md +90 -0
- package/cap/workflows/research-phase.md +82 -0
- package/cap/workflows/resume-project.md +326 -0
- package/cap/workflows/review.md +228 -0
- package/cap/workflows/session-report.md +146 -0
- package/cap/workflows/settings.md +283 -0
- package/cap/workflows/ship.md +228 -0
- package/cap/workflows/stats.md +60 -0
- package/cap/workflows/transition.md +671 -0
- package/cap/workflows/ui-phase.md +298 -0
- package/cap/workflows/ui-review.md +161 -0
- package/cap/workflows/update.md +323 -0
- package/cap/workflows/validate-phase.md +170 -0
- package/cap/workflows/verify-phase.md +254 -0
- package/cap/workflows/verify-work.md +637 -0
- package/commands/cap/annotate.md +165 -0
- package/commands/cap/brainstorm.md +393 -0
- package/commands/cap/checkpoint.md +106 -0
- package/commands/cap/completeness.md +94 -0
- package/commands/cap/continue.md +72 -0
- package/commands/cap/debug.md +588 -0
- package/commands/cap/deps.md +169 -0
- package/commands/cap/design.md +479 -0
- package/commands/cap/init.md +354 -0
- package/commands/cap/iterate.md +249 -0
- package/commands/cap/learn.md +459 -0
- package/commands/cap/memory.md +275 -0
- package/commands/cap/migrate-feature-map.md +91 -0
- package/commands/cap/migrate-memory.md +108 -0
- package/commands/cap/migrate-tags.md +91 -0
- package/commands/cap/migrate.md +131 -0
- package/commands/cap/prototype.md +510 -0
- package/commands/cap/reconcile.md +121 -0
- package/commands/cap/review.md +360 -0
- package/commands/cap/save.md +72 -0
- package/commands/cap/scan.md +404 -0
- package/commands/cap/start.md +356 -0
- package/commands/cap/status.md +118 -0
- package/commands/cap/test-audit.md +262 -0
- package/commands/cap/test.md +394 -0
- package/commands/cap/trace.md +133 -0
- package/commands/cap/ui.md +167 -0
- package/hooks/dist/cap-check-update.js +115 -0
- package/hooks/dist/cap-context-monitor.js +185 -0
- package/hooks/dist/cap-learn-review-hook.js +114 -0
- package/hooks/dist/cap-learning-hook.js +192 -0
- package/hooks/dist/cap-memory.js +299 -0
- package/hooks/dist/cap-prompt-guard.js +97 -0
- package/hooks/dist/cap-statusline.js +157 -0
- package/hooks/dist/cap-tag-observer.js +115 -0
- package/hooks/dist/cap-version-check.js +112 -0
- package/hooks/dist/cap-workflow-guard.js +175 -0
- package/hooks/hooks.json +55 -0
- package/package.json +58 -0
- package/scripts/base64-scan.sh +262 -0
- package/scripts/build-hooks.js +93 -0
- package/scripts/cap-removal-checklist.md +202 -0
- package/scripts/prompt-injection-scan.sh +199 -0
- package/scripts/run-tests.cjs +181 -0
- package/scripts/secret-scan.sh +227 -0
|
@@ -0,0 +1,1034 @@
|
|
|
1
|
+
// @cap-context CAP F-071 Extract Patterns via Heuristics and LLM — pure-compute pipeline that turns
|
|
2
|
+
// @cap-history(sessions:2, edits:17, since:2026-05-05, learned:2026-05-06) Frequently modified — 2 sessions, 17 edits
|
|
3
|
+
// raw F-070 learning signals into actionable P-NNN patterns. Stage 1 is deterministic
|
|
4
|
+
// (TF-IDF / RegEx clustering / frequency); Stage 2 is the LLM stage triggered when a
|
|
5
|
+
// candidate hits the threshold (≥ 3 similar overrides OR ≥ 1 regret). All LLM-bound
|
|
6
|
+
// payload is counts + hashes only — no raw signal records, no user text, no paths.
|
|
7
|
+
// @cap-decision(F-071/D1) LLM call mechanism — Host-LLM via Skill-Briefing pattern. The pipeline writes
|
|
8
|
+
// an aggregate briefing to .cap/learning/queue/P-NNN.md; the /cap:learn skill instructs
|
|
9
|
+
// the outer agent (Claude running the session) to read the briefing and write the result
|
|
10
|
+
// to .cap/learning/patterns/P-NNN.json. There is NO HTTPS client, NO API key, NO SDK
|
|
11
|
+
// dependency. This mirrors how /cap:prototype hands a task to cap-prototyper.
|
|
12
|
+
// @cap-decision(F-071/D2) Trigger — manual via /cap:learn skill. NOT auto on /cap:scan, NOT on Stop-Hook.
|
|
13
|
+
// Auto-triggering would burn through the user's LLM budget without consent.
|
|
14
|
+
// @cap-decision(F-071/D3) LLM input shape — Counts + Hashes only. No FEATURE-MAP context, no
|
|
15
|
+
// tag-description text, no raw signal records. The strict path. The briefing schema is:
|
|
16
|
+
// { candidateId, signalType, count, byFeature: [{featureId, count}], topContextHashes:
|
|
17
|
+
// [{hash, count}] }. Anything beyond this MUST go through hashContext first or be denied.
|
|
18
|
+
// @cap-decision(F-071/D4) TF-IDF tokens are tuples, not free text — `${signalType}|${featureId}|${
|
|
19
|
+
// targetFileHash || decisionId}`. The privacy boundary already hashed the path, so the
|
|
20
|
+
// token-string is hash-clean by construction. Documents are sessions (groupBy sessionId).
|
|
21
|
+
// TF · IDF ranks within-session; absolute count provides the AC-2 threshold path
|
|
22
|
+
// (count >= 3 override / >= 1 regret) regardless of TF-IDF rank.
|
|
23
|
+
// @cap-decision(F-071/D5) P-NNN allocation is compute-on-read from filenames. AC-6 demands "sequential,
|
|
24
|
+
// never renumbered" — gaps are fine; allocator returns max(existing IDs) + 1, scanning
|
|
25
|
+
// .cap/learning/patterns/P-*.json AND .cap/learning/queue/P-*.md (queue burns IDs too,
|
|
26
|
+
// because a deferred candidate retains its assigned ID across sessions). No .next-id
|
|
27
|
+
// file: that drifts when developers manually delete a pattern file or move things around.
|
|
28
|
+
// @cap-decision(F-071/D7) "Similar overrides" means the same (signalType, featureId, contextKey) tuple
|
|
29
|
+
// — i.e. SAME feature AND SAME target file (or decisionId for regret). 3 overrides spread
|
|
30
|
+
// across 3 different featureIds do NOT trigger Stage 2; 3 edits across 3 different files
|
|
31
|
+
// of the same feature do NOT trigger Stage 2. STRICT match.
|
|
32
|
+
// Why: early-phase self-learning needs cluster cohesion — Stage 2's LLM can only distill
|
|
33
|
+
// a meaningful L2/L3 pattern from semantically similar records. Loose (featureId-only)
|
|
34
|
+
// matching would produce heterogeneous clusters that the LLM cannot synthesise honestly,
|
|
35
|
+
// and would burn the 3-call budget on low-signal candidates. F-074 unlearn would then
|
|
36
|
+
// auto-retract them, wasting the budget round-trip. F-072 fitness scoring + F-074 will
|
|
37
|
+
// surface coverage gaps over time; if strict turns out to be too narrow, loose-mode is
|
|
38
|
+
// an additive future change (a parallel candidate class), not a refactor.
|
|
39
|
+
// Confirmed by user before ship — see PIN-2 in the F-071 test-audit report.
|
|
40
|
+
// @cap-constraint Zero external dependencies: node:fs, node:path only. We re-use cap-telemetry.cjs for
|
|
41
|
+
// hashContext (privacy primitive) and readBudget / getLlmUsage (budget primitive), and
|
|
42
|
+
// cap-learning-signals.cjs#getSignals as the SOLE input source. We never read JSONL
|
|
43
|
+
// files directly; the F-070 query API is the contract.
|
|
44
|
+
// @cap-risk(F-071/AC-3) PRIVACY BOUNDARY — every place that constructs an LLM-bound briefing payload
|
|
45
|
+
// carries this tag. The briefing must contain ONLY hex hashes and integer counts. Any
|
|
46
|
+
// future contributor adding a `description`, `summary`, `path`, or `signalRaw` field
|
|
47
|
+
// violates AC-3. Tests perform byte-level needle-search on the briefing markdown.
|
|
48
|
+
// @cap-risk(F-071/AC-4) BUDGET BOUNDARY — promotion to Stage 2 must be gated by readBudget +
|
|
49
|
+
// getLlmUsage. A regression that bypasses the gate would silently burn through the
|
|
50
|
+
// user's wallet. The gate is in promoteCandidates(); tests pre-load recordLlmCall
|
|
51
|
+
// entries and assert overflow lands in the queue with deferred:budget.
|
|
52
|
+
|
|
53
|
+
'use strict';
|
|
54
|
+
|
|
55
|
+
// @cap-feature(feature:F-071, primary:true) Pattern Pipeline — heuristic Stage 1 + LLM-briefing Stage 2.
|
|
56
|
+
|
|
57
|
+
const fs = require('node:fs');
|
|
58
|
+
const path = require('node:path');
|
|
59
|
+
|
|
60
|
+
const telemetry = require('./cap-telemetry.cjs');
|
|
61
|
+
const learningSignals = require('./cap-learning-signals.cjs');
|
|
62
|
+
|
|
63
|
+
// -----------------------------------------------------------------------------
|
|
64
|
+
// Constants — kept top-of-file so tests and downstream consumers (F-072/F-073)
|
|
65
|
+
// reference exactly one place.
|
|
66
|
+
// -----------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
const CAP_DIR = '.cap';
|
|
69
|
+
const LEARNING_DIR = 'learning';
|
|
70
|
+
const CANDIDATES_DIR = 'candidates';
|
|
71
|
+
const PATTERNS_DIR = 'patterns';
|
|
72
|
+
const QUEUE_DIR = 'queue';
|
|
73
|
+
|
|
74
|
+
// AC-2: thresholds. Centralised so a future tuning lives in one place and the
|
|
75
|
+
// adversarial tests can verify exact behaviour.
|
|
76
|
+
const THRESHOLD_OVERRIDE_COUNT = 3;
|
|
77
|
+
const THRESHOLD_REGRET_COUNT = 1;
|
|
78
|
+
|
|
79
|
+
// AC-1: TF-IDF top-K within each session. K=5 covers the high-signal head;
|
|
80
|
+
// anything below is noise or single-occurrence.
|
|
81
|
+
const TFIDF_TOP_K_PER_SESSION = 5;
|
|
82
|
+
|
|
83
|
+
// Length cap for any string field that might land in a briefing or pattern record.
|
|
84
|
+
// Mirrors cap-telemetry.cjs#ID_MAX so a hostile caller cannot smuggle a prompt
|
|
85
|
+
// through e.g. a manipulated featureId or contextHash field.
|
|
86
|
+
const ID_MAX = 200;
|
|
87
|
+
|
|
88
|
+
// P-NNN ID format.
|
|
89
|
+
const PATTERN_ID_PREFIX = 'P-';
|
|
90
|
+
const PATTERN_ID_PAD = 3;
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* @typedef {Object} HeuristicCandidate
|
|
94
|
+
* @property {string} candidateId - Stable hash of the (signalType + featureId + contextKey) tuple. Used as the briefing dedup key.
|
|
95
|
+
* @property {'override'|'memory-ref'|'regret'} signalType
|
|
96
|
+
* @property {string|null} featureId - Most-frequent featureId across the records that produced this candidate.
|
|
97
|
+
* @property {number} count - Total record count contributing to this candidate.
|
|
98
|
+
* @property {number} score - Maximum TF-IDF score for this candidate's token across all sessions.
|
|
99
|
+
* Separate from `count`: F-072 (fitness) and F-073 (review) can sort by either depending on what
|
|
100
|
+
* they need. Magnitude (TF-IDF) reveals "rare-but-concentrated" patterns; count reveals "loud"
|
|
101
|
+
* patterns. The orchestrator default-sorts by count for stable strong-cluster-first ordering.
|
|
102
|
+
* @property {Array<{featureId: string|null, count: number}>} byFeature - Per-feature breakdown, sorted descending by count.
|
|
103
|
+
* @property {Array<{hash: string, count: number}>} topContextHashes - Top-N context hashes that produced this candidate, sorted descending by count.
|
|
104
|
+
* @property {{kind:'L1', target:string, from:number, to:number, rationale:string}} suggestion - Heuristic-only L1 proposal — Stage 2 may upgrade this to L2/L3.
|
|
105
|
+
*/
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* @typedef {Object} PatternRecord
|
|
109
|
+
* @property {string} id - 'P-NNN'.
|
|
110
|
+
* @property {string} createdAt - ISO timestamp.
|
|
111
|
+
* @property {'L1'|'L2'|'L3'} level
|
|
112
|
+
* @property {string|null} featureRef - Feature ID this pattern targets (e.g. 'F-070').
|
|
113
|
+
* @property {'heuristic'|'llm'} source - Whether this was promoted via Stage 2 (llm) or persisted heuristic-only (heuristic).
|
|
114
|
+
* @property {boolean} degraded - True when LLM stage was unavailable and the heuristic-only suggestion is final.
|
|
115
|
+
* @property {number} confidence - 0..1.
|
|
116
|
+
* @property {Object} suggestion - Shape depends on `level` (L1: parameter tweak, L2: rule, L3: prompt-template patch).
|
|
117
|
+
* @property {{candidateId:string, signalType:string, count:number, topContextHashes:Array<{hash:string,count:number}>}} evidence
|
|
118
|
+
*/
|
|
119
|
+
|
|
120
|
+
// -----------------------------------------------------------------------------
|
|
121
|
+
// Internal helpers — directory + IO
|
|
122
|
+
// -----------------------------------------------------------------------------
|
|
123
|
+
|
|
124
|
+
function ensureDir(dir) {
|
|
125
|
+
try {
|
|
126
|
+
if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
|
|
127
|
+
} catch (_e) {
|
|
128
|
+
// Public boundary callers swallow errors; the next write will surface persistent IO problems.
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function learningRoot(projectRoot) {
|
|
133
|
+
return path.join(projectRoot, CAP_DIR, LEARNING_DIR);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function candidatesDir(projectRoot) {
|
|
137
|
+
return path.join(learningRoot(projectRoot), CANDIDATES_DIR);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function patternsDir(projectRoot) {
|
|
141
|
+
return path.join(learningRoot(projectRoot), PATTERNS_DIR);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function queueDir(projectRoot) {
|
|
145
|
+
return path.join(learningRoot(projectRoot), QUEUE_DIR);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
// -----------------------------------------------------------------------------
|
|
149
|
+
// Read-side wiring for F-074 applied-state — closes the V5 self-learning loop.
|
|
150
|
+
//
|
|
151
|
+
// @cap-decision(F-071/D9) Read .cap/learning/applied-state.json directly with a tiny inline helper
|
|
152
|
+
// instead of `require('./cap-pattern-apply.cjs')`. cap-pattern-apply already requires
|
|
153
|
+
// cap-pattern-pipeline, so importing it here would create a circular dependency.
|
|
154
|
+
// Schema is owned by F-074 and documented at cap-pattern-apply#readAppliedState
|
|
155
|
+
// (F-074/D2): { version:1, l1:{ '<featureId>/<KEY>': value }, l2:[], l3:[] }.
|
|
156
|
+
// -----------------------------------------------------------------------------
|
|
157
|
+
|
|
158
|
+
const APPLIED_STATE_RELATIVE = path.join(CAP_DIR, LEARNING_DIR, 'applied-state.json');
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Look up the L1 override value for a given featureId+key. Returns `null` when the file is missing,
|
|
162
|
+
* malformed, the key is absent, or the value fails the validator. Pure read, never throws.
|
|
163
|
+
*
|
|
164
|
+
* @cap-risk(F-071/D9) The applied-state file is hand-editable. A user (or a buggy pattern) could
|
|
165
|
+
* stuff a string, NaN, or negative number into the L1 map. The validator below
|
|
166
|
+
* is the trust boundary — anything that fails it falls back to the constant
|
|
167
|
+
* default. The strict integer check exists so a malformed file cannot weaken
|
|
168
|
+
* promotion gates (e.g. `to: -1` would otherwise allow every cluster through).
|
|
169
|
+
*
|
|
170
|
+
* @param {string} projectRoot
|
|
171
|
+
* @param {string} featureId - 'F-070' style; null/non-string returns null.
|
|
172
|
+
* @param {string} key - Sub-key, e.g. 'threshold'. Combined as `${featureId}/${key}` per F-074/D2.
|
|
173
|
+
* @param {(v: unknown) => boolean} validator - True when the value is acceptable. Mandatory.
|
|
174
|
+
* @returns {*} The validated value, or null.
|
|
175
|
+
*/
|
|
176
|
+
function readAppliedL1(projectRoot, featureId, key, validator) {
|
|
177
|
+
if (typeof projectRoot !== 'string' || projectRoot.length === 0) return null;
|
|
178
|
+
if (typeof featureId !== 'string' || featureId.length === 0) return null;
|
|
179
|
+
if (typeof key !== 'string' || key.length === 0) return null;
|
|
180
|
+
if (typeof validator !== 'function') return null;
|
|
181
|
+
const fp = path.join(projectRoot, APPLIED_STATE_RELATIVE);
|
|
182
|
+
let raw;
|
|
183
|
+
try {
|
|
184
|
+
raw = fs.readFileSync(fp, 'utf8');
|
|
185
|
+
} catch (_e) {
|
|
186
|
+
return null;
|
|
187
|
+
}
|
|
188
|
+
let parsed;
|
|
189
|
+
try {
|
|
190
|
+
parsed = JSON.parse(raw);
|
|
191
|
+
} catch (_e) {
|
|
192
|
+
return null;
|
|
193
|
+
}
|
|
194
|
+
const l1 = parsed && parsed.l1;
|
|
195
|
+
if (!l1 || typeof l1 !== 'object' || Array.isArray(l1)) return null;
|
|
196
|
+
const value = l1[`${featureId}/${key}`];
|
|
197
|
+
if (value === undefined) return null;
|
|
198
|
+
return validator(value) ? value : null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
/**
|
|
202
|
+
* Strict positive-integer validator for threshold values. Rejects strings, floats, NaN, Infinity,
|
|
203
|
+
* negatives, and zero. Threshold of 0 would mean "every cluster promotes immediately", which is
|
|
204
|
+
* semantically broken — refuse it at the boundary.
|
|
205
|
+
* @param {unknown} v
|
|
206
|
+
* @returns {boolean}
|
|
207
|
+
*/
|
|
208
|
+
function isPositiveIntegerThreshold(v) {
|
|
209
|
+
return typeof v === 'number' && Number.isInteger(v) && v > 0 && Number.isFinite(v);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
/**
|
|
213
|
+
* Compute the effective threshold for a (signalType, featureId) pair, honouring any L1 override
|
|
214
|
+
* applied via F-074. Falls back to the module constant when no override is applicable.
|
|
215
|
+
*
|
|
216
|
+
* Lookup precedence:
|
|
217
|
+
* 1. applied-state.json#l1[`${featureId}/threshold`] — when projectRoot + featureId provided
|
|
218
|
+
* 2. THRESHOLD_REGRET_COUNT (regret) / THRESHOLD_OVERRIDE_COUNT (override, anything else)
|
|
219
|
+
*
|
|
220
|
+
* memory-ref candidates never promote; callers gate them out before reaching here.
|
|
221
|
+
*
|
|
222
|
+
* @param {string|null|undefined} projectRoot
|
|
223
|
+
* @param {string} signalType
|
|
224
|
+
* @param {string|null|undefined} featureId
|
|
225
|
+
* @returns {number}
|
|
226
|
+
*/
|
|
227
|
+
function getEffectiveThreshold(projectRoot, signalType, featureId) {
|
|
228
|
+
const fallback = signalType === 'regret' ? THRESHOLD_REGRET_COUNT : THRESHOLD_OVERRIDE_COUNT;
|
|
229
|
+
const override = readAppliedL1(projectRoot, featureId, 'threshold', isPositiveIntegerThreshold);
|
|
230
|
+
return override === null ? fallback : override;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* Cap a string at ID_MAX, return null for non-strings or empty.
|
|
235
|
+
*/
|
|
236
|
+
function capId(v) {
|
|
237
|
+
if (typeof v !== 'string' || v.length === 0) return null;
|
|
238
|
+
return v.slice(0, ID_MAX);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
// -----------------------------------------------------------------------------
|
|
242
|
+
// TF-IDF tokenizer — operates on hash-tuples, NOT free text.
|
|
243
|
+
//
|
|
244
|
+
// @cap-decision(F-071/D4) Tokens are tuples like `${signalType}|${featureId}|${contextKey}`. Documents
|
|
245
|
+
// are sessions. The privacy boundary in F-070 already hashed paths and decision
|
|
246
|
+
// fields, so token-strings are hash-clean by construction. This is the unusual bit:
|
|
247
|
+
// standard TF-IDF runs on word tokens; we run it on structured hash-tuples. The same
|
|
248
|
+
// math still applies (TF · IDF ranks token rarity within a session), just over a
|
|
249
|
+
// different alphabet.
|
|
250
|
+
// -----------------------------------------------------------------------------
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* Build a stable tuple-token from a signal record. The contextKey is the part that distinguishes
|
|
254
|
+
* different "instances" of the same problem within the same featureId — for overrides we use
|
|
255
|
+
* targetFileHash; for regrets we use decisionId; for memory-refs we use the contextHash.
|
|
256
|
+
*
|
|
257
|
+
* @param {object} record
|
|
258
|
+
* @returns {string}
|
|
259
|
+
*/
|
|
260
|
+
function buildToken(record) {
|
|
261
|
+
const safe = record || {};
|
|
262
|
+
const signalType = capId(safe.signalType) || 'unknown';
|
|
263
|
+
const featureId = capId(safe.featureId) || 'unassigned';
|
|
264
|
+
let contextKey;
|
|
265
|
+
if (signalType === 'override') {
|
|
266
|
+
contextKey = capId(safe.targetFileHash) || capId(safe.contextHash) || capId(safe.subType) || 'unknown';
|
|
267
|
+
} else if (signalType === 'regret') {
|
|
268
|
+
contextKey = capId(safe.decisionId) || capId(safe.contextHash) || 'unknown';
|
|
269
|
+
} else {
|
|
270
|
+
// memory-ref
|
|
271
|
+
contextKey = capId(safe.memoryFileHash) || capId(safe.contextHash) || 'unknown';
|
|
272
|
+
}
|
|
273
|
+
return `${signalType}|${featureId}|${contextKey}`;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
/**
|
|
277
|
+
* Group records by sessionId. Records without a sessionId go into the `__no-session__` bucket
|
|
278
|
+
* so they still contribute to global counts — but their TF-IDF treats the bucket as a single
|
|
279
|
+
* synthetic session, which is the safe default (under-counts rather than over-promotes).
|
|
280
|
+
*
|
|
281
|
+
* @param {Array<object>} records
|
|
282
|
+
* @returns {Map<string, Array<object>>}
|
|
283
|
+
*/
|
|
284
|
+
function groupBySession(records) {
|
|
285
|
+
const map = new Map();
|
|
286
|
+
for (const r of records || []) {
|
|
287
|
+
const sid = (r && typeof r.sessionId === 'string' && r.sessionId.length > 0)
|
|
288
|
+
? r.sessionId
|
|
289
|
+
: '__no-session__';
|
|
290
|
+
if (!map.has(sid)) map.set(sid, []);
|
|
291
|
+
map.get(sid).push(r);
|
|
292
|
+
}
|
|
293
|
+
return map;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Compute TF-IDF scores for tokens within each session. Returns a flat array of
|
|
298
|
+
* { token, sessionId, tfidf, count } entries — one per (token × session) pair.
|
|
299
|
+
*
|
|
300
|
+
* TF = count of token in session.
|
|
301
|
+
* IDF = log(totalSessions / sessionsContainingToken).
|
|
302
|
+
* For a single-session corpus IDF = log(1) = 0; we floor IDF at a small epsilon
|
|
303
|
+
* so TF·IDF still ranks within the lone session by raw frequency.
|
|
304
|
+
*
|
|
305
|
+
* @param {Array<object>} records
|
|
306
|
+
* @returns {{ tokenScores: Array<{token:string, sessionId:string, tfidf:number, count:number}>, sessionsByToken: Map<string,Set<string>>, recordsByToken: Map<string, Array<object>> }}
|
|
307
|
+
*/
|
|
308
|
+
function computeTfIdf(records) {
|
|
309
|
+
const sessions = groupBySession(records);
|
|
310
|
+
const totalSessions = Math.max(1, sessions.size);
|
|
311
|
+
const sessionsByToken = new Map();
|
|
312
|
+
const recordsByToken = new Map();
|
|
313
|
+
|
|
314
|
+
// Per-session token frequencies.
|
|
315
|
+
/** @type {Map<string, Map<string, number>>} */
|
|
316
|
+
const sessionTokenCounts = new Map();
|
|
317
|
+
for (const [sid, sessionRecords] of sessions.entries()) {
|
|
318
|
+
/** @type {Map<string, number>} */
|
|
319
|
+
const counts = new Map();
|
|
320
|
+
for (const r of sessionRecords) {
|
|
321
|
+
const t = buildToken(r);
|
|
322
|
+
counts.set(t, (counts.get(t) || 0) + 1);
|
|
323
|
+
if (!sessionsByToken.has(t)) sessionsByToken.set(t, new Set());
|
|
324
|
+
sessionsByToken.get(t).add(sid);
|
|
325
|
+
if (!recordsByToken.has(t)) recordsByToken.set(t, []);
|
|
326
|
+
recordsByToken.get(t).push(r);
|
|
327
|
+
}
|
|
328
|
+
sessionTokenCounts.set(sid, counts);
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const tokenScores = [];
|
|
332
|
+
for (const [sid, counts] of sessionTokenCounts.entries()) {
|
|
333
|
+
for (const [token, tf] of counts.entries()) {
|
|
334
|
+
const docFreq = sessionsByToken.get(token).size;
|
|
335
|
+
// IDF with a small floor so single-session corpora still rank.
|
|
336
|
+
const idf = Math.max(0.01, Math.log(totalSessions / Math.max(1, docFreq)));
|
|
337
|
+
tokenScores.push({ token, sessionId: sid, tfidf: tf * idf, count: tf });
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
return { tokenScores, sessionsByToken, recordsByToken };
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
/**
|
|
345
|
+
* Pick the top-K tokens per session by TF-IDF, then deduplicate to a flat set
|
|
346
|
+
* (a token reaching top-K in any session is selected). The result is the set of
|
|
347
|
+
* "interesting" tokens; downstream code attaches global counts and applies the
|
|
348
|
+
* AC-2 threshold or the absolute-count fallback.
|
|
349
|
+
*
|
|
350
|
+
* @param {Array<{token:string, sessionId:string, tfidf:number}>} tokenScores
|
|
351
|
+
* @param {number} k
|
|
352
|
+
* @returns {Set<string>}
|
|
353
|
+
*/
|
|
354
|
+
function topKTokensPerSession(tokenScores, k) {
|
|
355
|
+
/** @type {Map<string, Array<{token:string, tfidf:number}>>} */
|
|
356
|
+
const bySession = new Map();
|
|
357
|
+
for (const s of tokenScores) {
|
|
358
|
+
if (!bySession.has(s.sessionId)) bySession.set(s.sessionId, []);
|
|
359
|
+
bySession.get(s.sessionId).push({ token: s.token, tfidf: s.tfidf });
|
|
360
|
+
}
|
|
361
|
+
const selected = new Set();
|
|
362
|
+
for (const [, arr] of bySession.entries()) {
|
|
363
|
+
arr.sort((a, b) => b.tfidf - a.tfidf);
|
|
364
|
+
for (let i = 0; i < Math.min(k, arr.length); i++) {
|
|
365
|
+
selected.add(arr[i].token);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return selected;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// -----------------------------------------------------------------------------
|
|
372
|
+
// Heuristic stage — Stage 1
|
|
373
|
+
// -----------------------------------------------------------------------------
|
|
374
|
+
|
|
375
|
+
// @cap-todo(ac:F-071/AC-1) Stage-1 deterministic heuristic engine: TF-IDF + RegEx-Cluster + Frequency
|
|
376
|
+
// on signal records. Writes per-candidate JSON to .cap/learning/candidates/.
|
|
377
|
+
/**
|
|
378
|
+
* Run Stage 1 — the deterministic heuristic engine — over all signals across the three F-070
|
|
379
|
+
* collectors. Returns a list of HeuristicCandidate objects sorted by descending score, and writes
|
|
380
|
+
* one `.cap/learning/candidates/<candidateId>.json` per candidate.
|
|
381
|
+
*
|
|
382
|
+
* Pure compute over the F-070 query API — never reads JSONL files directly. AC-7 budget reading is
|
|
383
|
+
* NOT performed here; that's the orchestrator's job (Step 4 of /cap:learn).
|
|
384
|
+
*
|
|
385
|
+
* @param {string} projectRoot
|
|
386
|
+
* @param {Object} [options]
|
|
387
|
+
* @param {string} [options.sessionId] - Optional filter — only consider records from this session.
|
|
388
|
+
* @param {number} [options.topK] - Override TFIDF_TOP_K_PER_SESSION (mostly for tests).
|
|
389
|
+
* @param {boolean} [options.persist] - When false, candidates are returned but not written to disk. Default true.
|
|
390
|
+
* @returns {{ candidates: HeuristicCandidate[], errors: string[] }}
|
|
391
|
+
*/
|
|
392
|
+
function runHeuristicStage(projectRoot, options) {
|
|
393
|
+
const opts = options || {};
|
|
394
|
+
const errors = [];
|
|
395
|
+
if (typeof projectRoot !== 'string' || projectRoot.length === 0) {
|
|
396
|
+
return { candidates: [], errors: ['projectRoot is required'] };
|
|
397
|
+
}
|
|
398
|
+
const persist = opts.persist !== false;
|
|
399
|
+
const topK = typeof opts.topK === 'number' && opts.topK > 0 ? opts.topK : TFIDF_TOP_K_PER_SESSION;
|
|
400
|
+
|
|
401
|
+
// Collect all three signal types via the F-070 query API. The range filter is honoured iff
|
|
402
|
+
// sessionId is supplied — otherwise we operate on the full corpus. AC-1 doesn't restrict
|
|
403
|
+
// the range; consumers wanting a window pass sessionId or a future range.
|
|
404
|
+
const range = opts.sessionId ? { sessionId: opts.sessionId } : undefined;
|
|
405
|
+
let overrides = [];
|
|
406
|
+
let memoryRefs = [];
|
|
407
|
+
let regrets = [];
|
|
408
|
+
try {
|
|
409
|
+
overrides = learningSignals.getSignals(projectRoot, 'override', range) || [];
|
|
410
|
+
} catch (e) {
|
|
411
|
+
errors.push(`getSignals(override) failed: ${e && e.message ? e.message : 'unknown'}`);
|
|
412
|
+
}
|
|
413
|
+
try {
|
|
414
|
+
memoryRefs = learningSignals.getSignals(projectRoot, 'memory-ref', range) || [];
|
|
415
|
+
} catch (e) {
|
|
416
|
+
errors.push(`getSignals(memory-ref) failed: ${e && e.message ? e.message : 'unknown'}`);
|
|
417
|
+
}
|
|
418
|
+
try {
|
|
419
|
+
regrets = learningSignals.getSignals(projectRoot, 'regret', range) || [];
|
|
420
|
+
} catch (e) {
|
|
421
|
+
errors.push(`getSignals(regret) failed: ${e && e.message ? e.message : 'unknown'}`);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const allRecords = [...overrides, ...memoryRefs, ...regrets];
|
|
425
|
+
if (allRecords.length === 0) {
|
|
426
|
+
return { candidates: [], errors };
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
// TF-IDF on the union — but we then walk each token and inspect its records' actual signalType.
|
|
430
|
+
// That keeps memory-ref counts visible alongside override / regret counts in the same ranking.
|
|
431
|
+
const { tokenScores, recordsByToken } = computeTfIdf(allRecords);
|
|
432
|
+
const topTokens = topKTokensPerSession(tokenScores, topK);
|
|
433
|
+
|
|
434
|
+
// Map<token, maxTfidf> — used by candidate() to populate the persisted `score` field separately
|
|
435
|
+
// from the record `count`. We keep both because F-072 (fitness) and F-073 (review) may want to
|
|
436
|
+
// sort by either; pre-computing the per-token max keeps candidate() pure.
|
|
437
|
+
// @cap-decision(F-071/D6) `score` (TF-IDF magnitude) and `count` (record count) are persisted as
|
|
438
|
+
// separate fields. Splitting was a PIN-decision before ship — F-072 will pick.
|
|
439
|
+
/** @type {Map<string, number>} */
|
|
440
|
+
const maxTfidfByToken = new Map();
|
|
441
|
+
for (const s of tokenScores) {
|
|
442
|
+
const cur = maxTfidfByToken.get(s.token) || 0;
|
|
443
|
+
if (s.tfidf > cur) maxTfidfByToken.set(s.token, s.tfidf);
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
// ALSO include any token whose absolute count meets the AC-2 threshold, even if it didn't make
|
|
447
|
+
// it into the per-session top-K. This is the "frequency" arm of AC-1's heuristic engine.
|
|
448
|
+
// @cap-todo(ac:F-071/AC-1) Frequency-analysis arm: tokens with count >= threshold are considered
|
|
449
|
+
// regardless of TF-IDF rank.
|
|
450
|
+
// @cap-decision(F-071/D9) Effective threshold respects per-featureId L1 overrides from F-074
|
|
451
|
+
// applied-state.json. The token's first record carries the featureId; if a user
|
|
452
|
+
// applied P-NNN that proposed `F-070/threshold: 4`, the F-070 cluster needs 4
|
|
453
|
+
// records (not 3) to reach the frequency arm.
|
|
454
|
+
for (const [token, recs] of recordsByToken.entries()) {
|
|
455
|
+
const recsArr = recs;
|
|
456
|
+
const sigType = (recsArr[0] && recsArr[0].signalType) || 'unknown';
|
|
457
|
+
const featureIdForToken = recsArr[0] && capId(recsArr[0].featureId);
|
|
458
|
+
const requiredCount = getEffectiveThreshold(projectRoot, sigType, featureIdForToken);
|
|
459
|
+
if (recsArr.length >= requiredCount) topTokens.add(token);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
/** @type {HeuristicCandidate[]} */
|
|
463
|
+
const candidates = [];
|
|
464
|
+
for (const token of topTokens) {
|
|
465
|
+
const recs = recordsByToken.get(token) || [];
|
|
466
|
+
if (recs.length === 0) continue;
|
|
467
|
+
|
|
468
|
+
// RegEx-cluster arm: group regret tokens by decisionId family. The token already encodes
|
|
469
|
+
// featureId, so a "family" is simply (signalType + featureId) — same family already shares
|
|
470
|
+
// a candidate. The clustering effect is implicit in the tuple-token construction.
|
|
471
|
+
// @cap-todo(ac:F-071/AC-1) RegEx-Cluster arm — the `signalType|featureId|contextKey` tuple IS
|
|
472
|
+
// the cluster key. Tokens are members of the same cluster iff they share
|
|
473
|
+
// the (signalType, featureId) prefix; the contextKey distinguishes
|
|
474
|
+
// instances within the cluster.
|
|
475
|
+
|
|
476
|
+
candidate(candidates, token, recs, maxTfidfByToken.get(token) || 0, projectRoot);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
// @cap-decision(F-071/D9) Post-collection effective-threshold filter. The TF-IDF arm could still
|
|
480
|
+
// bubble up a "rare-but-concentrated" cluster whose count is below an applied
|
|
481
|
+
// threshold; in the V5 loop the user has explicitly said "I don't want F-X
|
|
482
|
+
// candidates until 4 records accumulate", so we drop them here instead of
|
|
483
|
+
// surfacing them in the review board where they'd just produce noise. Stage 2
|
|
484
|
+
// promotion (`checkThreshold`) is also threshold-aware as defense-in-depth.
|
|
485
|
+
const filtered = candidates.filter((c) => {
|
|
486
|
+
if (c.signalType === 'memory-ref') return true; // memory-ref carries positive signal — never filtered.
|
|
487
|
+
const required = getEffectiveThreshold(projectRoot, c.signalType, c.featureId);
|
|
488
|
+
return Number(c.count) >= required;
|
|
489
|
+
});
|
|
490
|
+
|
|
491
|
+
// Sort by count descending so the orchestrator processes the loudest clusters first.
|
|
492
|
+
// F-072 / F-073 may resort by score (TF-IDF magnitude) when "rare-but-concentrated" matters more
|
|
493
|
+
// than "loud" — both fields are persisted on the candidate.
|
|
494
|
+
filtered.sort((a, b) => b.count - a.count);
|
|
495
|
+
candidates.length = 0;
|
|
496
|
+
for (const c of filtered) candidates.push(c);
|
|
497
|
+
|
|
498
|
+
if (persist && candidates.length > 0) {
|
|
499
|
+
ensureDir(candidatesDir(projectRoot));
|
|
500
|
+
for (const c of candidates) {
|
|
501
|
+
try {
|
|
502
|
+
const fp = path.join(candidatesDir(projectRoot), `${c.candidateId}.json`);
|
|
503
|
+
fs.writeFileSync(fp, JSON.stringify(c, null, 2) + '\n', 'utf8');
|
|
504
|
+
} catch (e) {
|
|
505
|
+
errors.push(`persist candidate ${c.candidateId} failed: ${e && e.message ? e.message : 'unknown'}`);
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
return { candidates, errors };
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
/**
|
|
514
|
+
* Build a HeuristicCandidate from a token and its contributing records. Pushed onto the accumulator.
|
|
515
|
+
* Internal helper for runHeuristicStage.
|
|
516
|
+
*
|
|
517
|
+
* @param {HeuristicCandidate[]} acc
|
|
518
|
+
* @param {string} token
|
|
519
|
+
* @param {Array<object>} recs
|
|
520
|
+
* @param {number} tfidfScore - Maximum TF-IDF score for this token across all sessions.
|
|
521
|
+
* @param {string} [projectRoot] - Forwarded to buildHeuristicSuggestion so the L1 `from` reflects
|
|
522
|
+
* any applied F-074 threshold override; absent => fallback to constants. (F-071/D9)
|
|
523
|
+
*/
|
|
524
|
+
function candidate(acc, token, recs, tfidfScore, projectRoot) {
|
|
525
|
+
const signalType = recs[0].signalType;
|
|
526
|
+
|
|
527
|
+
// Per-feature breakdown, sorted descending.
|
|
528
|
+
/** @type {Map<string|null, number>} */
|
|
529
|
+
const featureCounts = new Map();
|
|
530
|
+
for (const r of recs) {
|
|
531
|
+
const fid = capId(r.featureId);
|
|
532
|
+
featureCounts.set(fid, (featureCounts.get(fid) || 0) + 1);
|
|
533
|
+
}
|
|
534
|
+
const byFeature = [...featureCounts.entries()]
|
|
535
|
+
.map(([featureId, count]) => ({ featureId, count }))
|
|
536
|
+
.sort((a, b) => b.count - a.count);
|
|
537
|
+
|
|
538
|
+
// Top context hashes — the contextHash field is the F-070 dedup key; we count occurrences.
|
|
539
|
+
/** @type {Map<string, number>} */
|
|
540
|
+
const hashCounts = new Map();
|
|
541
|
+
for (const r of recs) {
|
|
542
|
+
// @cap-risk(F-071/AC-3) Only the contextHash hex string is taken — never the targetFile,
|
|
543
|
+
// never the decisionId, never any free-text field. The privacy gate
|
|
544
|
+
// in F-070 already hashed those at the source.
|
|
545
|
+
const h = capId(r.contextHash);
|
|
546
|
+
if (!h) continue;
|
|
547
|
+
hashCounts.set(h, (hashCounts.get(h) || 0) + 1);
|
|
548
|
+
}
|
|
549
|
+
const topContextHashes = [...hashCounts.entries()]
|
|
550
|
+
.map(([hash, count]) => ({ hash, count }))
|
|
551
|
+
.sort((a, b) => b.count - a.count)
|
|
552
|
+
.slice(0, 5);
|
|
553
|
+
|
|
554
|
+
// candidateId = stable hash of the token. Re-using telemetry.hashContext keeps the hash function
|
|
555
|
+
// identical to the F-070 / F-061 privacy gate — single source of truth.
|
|
556
|
+
const candidateId = telemetry.hashContext(token);
|
|
557
|
+
|
|
558
|
+
const dominantFeature = byFeature[0] && byFeature[0].featureId;
|
|
559
|
+
const score = tfidfScore; // TF-IDF magnitude — separate from `count` per @cap-decision(F-071/D6)
|
|
560
|
+
|
|
561
|
+
// Heuristic-only L1 suggestion — a parameter tweak the user could apply WITHOUT an LLM call.
|
|
562
|
+
// This is the "graceful degradation" payload (AC-5): if Stage 2 is skipped, this still ships.
|
|
563
|
+
const suggestion = buildHeuristicSuggestion(signalType, recs, dominantFeature, projectRoot);
|
|
564
|
+
|
|
565
|
+
acc.push({
|
|
566
|
+
candidateId,
|
|
567
|
+
signalType,
|
|
568
|
+
featureId: dominantFeature,
|
|
569
|
+
count: recs.length,
|
|
570
|
+
score,
|
|
571
|
+
byFeature,
|
|
572
|
+
topContextHashes,
|
|
573
|
+
suggestion,
|
|
574
|
+
});
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
// @cap-risk(F-071/AC-1) L1 oscillation: each run raises threshold by `to = recs.length + 1`. Two
|
|
578
|
+
// consecutive runs on a 4-record cluster with threshold 3 propose 4, then on a
|
|
579
|
+
// 4-record cluster with threshold 4 propose 5, … unbounded climb. The dampener
|
|
580
|
+
// lives in F-072 (fitness scoring): a low-fitness pattern is auto-retracted by
|
|
581
|
+
// F-074, breaking the loop. If F-072 is removed or skipped, this heuristic
|
|
582
|
+
// becomes unstable. Do not loosen `to = recs.length + 1` without F-072 in place.
|
|
583
|
+
/**
|
|
584
|
+
* Build a heuristic-only L1 suggestion. The shape mirrors the L1 example in the F-071 brief:
|
|
585
|
+
* { kind:'L1', target, from, to, rationale }.
|
|
586
|
+
*
|
|
587
|
+
* @param {string} signalType
|
|
588
|
+
* @param {Array<object>} recs
|
|
589
|
+
* @param {string|null} featureId
|
|
590
|
+
* @param {string} [projectRoot] - When provided, `from` reflects the effective threshold (any
|
|
591
|
+
* applied F-074 override), not just the constant default. (F-071/D9)
|
|
592
|
+
* @returns {{kind:'L1', target:string, from:number, to:number, rationale:string}}
|
|
593
|
+
*/
|
|
594
|
+
function buildHeuristicSuggestion(signalType, recs, featureId, projectRoot) {
|
|
595
|
+
// Default: propose raising the AC-2 threshold so the same cluster wouldn't promote next time.
|
|
596
|
+
// The "from" anchors at the current threshold; "to" proposes the next step (count + 1) so the
|
|
597
|
+
// cluster has to grow further before re-triggering.
|
|
598
|
+
const target = featureId ? `${featureId}/threshold` : 'F-071/threshold';
|
|
599
|
+
const from = getEffectiveThreshold(projectRoot, signalType, featureId);
|
|
600
|
+
const to = recs.length + 1;
|
|
601
|
+
// @cap-risk(F-071/AC-3) The rationale is a pure-structural string — count + featureId.
|
|
602
|
+
// No raw paths, no decision text. Safe to persist.
|
|
603
|
+
const rationale = `Cluster of ${recs.length} ${signalType} signals${
|
|
604
|
+
featureId ? ` on ${featureId}` : ''
|
|
605
|
+
} would not have triggered if threshold had been ${to}.`;
|
|
606
|
+
return { kind: 'L1', target, from, to, rationale };
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
// -----------------------------------------------------------------------------
|
|
610
|
+
// Threshold check — AC-2
|
|
611
|
+
// -----------------------------------------------------------------------------
|
|
612
|
+
|
|
613
|
+
// @cap-todo(ac:F-071/AC-2) Stage-2 trigger: candidate hits threshold (>=3 similar overrides OR >=1 regret).
|
|
614
|
+
/**
|
|
615
|
+
* Decide whether a candidate qualifies for Stage 2. Memory-ref candidates never trigger Stage 2 —
|
|
616
|
+
* memory-ref tells you a memory is *valuable*, not that something is *wrong*; promoting it would
|
|
617
|
+
* waste the LLM budget on positive-signal data.
|
|
618
|
+
*
|
|
619
|
+
* Override candidates additionally must share `featureId` across all records (the candidate token
|
|
620
|
+
* already encodes featureId, so this is implicit when the candidate was built from a single token).
|
|
621
|
+
*
|
|
622
|
+
* @cap-decision(F-071/D9) Optional `projectRoot` consults applied-state.json for a per-featureId
|
|
623
|
+
* override. Backwards-compatible: when projectRoot is omitted, behaviour falls
|
|
624
|
+
* through to the module constants exactly as before, so existing callers
|
|
625
|
+
* (and the AC-2 unit tests) keep working unchanged.
|
|
626
|
+
*
|
|
627
|
+
* @param {HeuristicCandidate} candidate
|
|
628
|
+
* @param {string} [projectRoot]
|
|
629
|
+
* @returns {boolean}
|
|
630
|
+
*/
|
|
631
|
+
function checkThreshold(candidate, projectRoot) {
|
|
632
|
+
if (!candidate || typeof candidate !== 'object') return false;
|
|
633
|
+
if (candidate.signalType === 'memory-ref') return false;
|
|
634
|
+
if (candidate.signalType !== 'override' && candidate.signalType !== 'regret') return false;
|
|
635
|
+
const required = getEffectiveThreshold(projectRoot, candidate.signalType, candidate.featureId);
|
|
636
|
+
return Number(candidate.count) >= required;
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// -----------------------------------------------------------------------------
|
|
640
|
+
// P-NNN allocation — compute-on-read from filenames
|
|
641
|
+
// -----------------------------------------------------------------------------
|
|
642
|
+
|
|
643
|
+
// @cap-todo(ac:F-071/AC-6) P-NNN allocation: sequential, never renumbered. Compute-on-read.
|
|
644
|
+
/**
|
|
645
|
+
* Allocate the next P-NNN id by scanning .cap/learning/patterns/P-*.json AND
|
|
646
|
+
* .cap/learning/queue/P-*.md filenames. Returns 'P-001' when no files exist.
|
|
647
|
+
*
|
|
648
|
+
* AC-6 contract: "sequential, never renumbered" — gaps are fine. We return max(existing IDs) + 1.
|
|
649
|
+
* If P-005 exists in the queue and P-001/P-002 in patterns, next is P-006. Pattern files and queue
|
|
650
|
+
* files share the ID namespace because a deferred candidate retains its assigned ID across sessions.
|
|
651
|
+
*
|
|
652
|
+
* @param {string} projectRoot
|
|
653
|
+
* @returns {string} 'P-NNN'
|
|
654
|
+
*/
|
|
655
|
+
function allocatePatternId(projectRoot) {
|
|
656
|
+
const ids = listExistingPatternIds(projectRoot);
|
|
657
|
+
let max = 0;
|
|
658
|
+
for (const id of ids) {
|
|
659
|
+
const n = parsePatternId(id);
|
|
660
|
+
if (n != null && n > max) max = n;
|
|
661
|
+
}
|
|
662
|
+
return formatPatternId(max + 1);
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
/**
|
|
666
|
+
* List every P-NNN id present in patterns/ (json) or queue/ (md). De-duplicated.
|
|
667
|
+
*
|
|
668
|
+
* @param {string} projectRoot
|
|
669
|
+
* @returns {string[]}
|
|
670
|
+
*/
|
|
671
|
+
function listExistingPatternIds(projectRoot) {
|
|
672
|
+
const ids = new Set();
|
|
673
|
+
const scan = (dir, suffix) => {
|
|
674
|
+
if (!fs.existsSync(dir)) return;
|
|
675
|
+
let entries;
|
|
676
|
+
try {
|
|
677
|
+
entries = fs.readdirSync(dir);
|
|
678
|
+
} catch (_e) {
|
|
679
|
+
return;
|
|
680
|
+
}
|
|
681
|
+
for (const f of entries) {
|
|
682
|
+
if (!f.endsWith(suffix)) continue;
|
|
683
|
+
const base = f.slice(0, -suffix.length);
|
|
684
|
+
if (/^P-\d+$/.test(base)) ids.add(base);
|
|
685
|
+
}
|
|
686
|
+
};
|
|
687
|
+
scan(patternsDir(projectRoot), '.json');
|
|
688
|
+
scan(queueDir(projectRoot), '.md');
|
|
689
|
+
return [...ids];
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
function parsePatternId(id) {
|
|
693
|
+
const m = /^P-(\d+)$/.exec(id || '');
|
|
694
|
+
return m ? parseInt(m[1], 10) : null;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
function formatPatternId(n) {
|
|
698
|
+
return `${PATTERN_ID_PREFIX}${String(n).padStart(PATTERN_ID_PAD, '0')}`;
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// -----------------------------------------------------------------------------
|
|
702
|
+
// Briefing builder — Stage 2 input (counts + hashes only)
|
|
703
|
+
// -----------------------------------------------------------------------------
|
|
704
|
+
|
|
705
|
+
// @cap-todo(ac:F-071/AC-3) PRIVACY-CRITICAL — LLM input is counts + hashes only. Constructs the
|
|
706
|
+
// structured aggregate { candidateId, signalType, count, byFeature,
|
|
707
|
+
// topContextHashes } and writes it to .cap/learning/queue/P-NNN.md as the
|
|
708
|
+
// briefing the outer agent will read.
|
|
709
|
+
// @cap-risk(F-071/AC-3) This is THE place where LLM-bound payload is constructed. Any new field
|
|
710
|
+
// added here MUST be a count or a hex hash. No paths, no decision text,
|
|
711
|
+
// no record verbatim, no targetFile string. The adversarial test injects
|
|
712
|
+
// SECRET_NEEDLE values into every input field and asserts zero needle bytes
|
|
713
|
+
// in the briefing markdown.
|
|
714
|
+
/**
|
|
715
|
+
* Build a briefing for Stage 2 and persist it to .cap/learning/queue/P-NNN.md.
|
|
716
|
+
*
|
|
717
|
+
* The briefing is the ONLY artifact the outer agent (LLM) reads. It MUST contain only counts and
|
|
718
|
+
* hex hashes — never raw paths, decision text, or record verbatim. The structured payload is also
|
|
719
|
+
* returned for testing and for the orchestrator to forward to the agent.
|
|
720
|
+
*
|
|
721
|
+
* @param {HeuristicCandidate} candidate
|
|
722
|
+
* @param {string} projectRoot
|
|
723
|
+
* @param {Object} [options]
|
|
724
|
+
* @param {string} [options.id] - Pre-allocated P-NNN id (optional; allocated if omitted).
|
|
725
|
+
* @param {boolean} [options.deferred] - When true, the briefing carries a `deferred: budget` marker.
|
|
726
|
+
* @returns {{ id: string, briefingPath: string, payload: object }|null}
|
|
727
|
+
*/
|
|
728
|
+
function buildBriefing(candidate, projectRoot, options) {
|
|
729
|
+
if (!candidate || typeof candidate !== 'object') return null;
|
|
730
|
+
if (typeof projectRoot !== 'string' || projectRoot.length === 0) return null;
|
|
731
|
+
|
|
732
|
+
const opts = options || {};
|
|
733
|
+
const id = opts.id || allocatePatternId(projectRoot);
|
|
734
|
+
const deferred = opts.deferred === true;
|
|
735
|
+
|
|
736
|
+
// @cap-risk(F-071/AC-3) Build the payload from STRUCTURED COUNTS + HEX HASHES only.
|
|
737
|
+
// Validate every hash is hex via /^[0-9a-f]+$/ — anything else is dropped
|
|
738
|
+
// defensively. This guards against a bug upstream (e.g. a future contributor
|
|
739
|
+
// passing the raw path through here by mistake).
|
|
740
|
+
// @cap-risk(F-071/AC-3) featureId is structured metadata, but the briefing enforces strict shape
|
|
741
|
+
// /^F-\d{3,}$/ — anything else collapses to null. A future contributor who
|
|
742
|
+
// tries to smuggle text via a hand-crafted featureId (e.g. by writing the
|
|
743
|
+
// record with a non-conforming string) will see the field disappear from
|
|
744
|
+
// the briefing rather than leak. The featureId-as-smuggle-channel attack is
|
|
745
|
+
// proven impossible in tests (cap-pattern-pipeline-adversarial.test.cjs).
|
|
746
|
+
const safeFeature = (s) => {
|
|
747
|
+
const v = capId(s);
|
|
748
|
+
if (v == null) return null;
|
|
749
|
+
return /^F-\d{3,}$/.test(v) ? v : null;
|
|
750
|
+
};
|
|
751
|
+
const isHexHash = (h) => typeof h === 'string' && /^[0-9a-f]+$/.test(h) && h.length <= 64;
|
|
752
|
+
|
|
753
|
+
const byFeature = (Array.isArray(candidate.byFeature) ? candidate.byFeature : [])
|
|
754
|
+
.map((row) => ({ featureId: safeFeature(row && row.featureId), count: Math.max(0, Number(row && row.count) || 0) }))
|
|
755
|
+
.filter((row) => Number.isFinite(row.count));
|
|
756
|
+
const topContextHashes = (Array.isArray(candidate.topContextHashes) ? candidate.topContextHashes : [])
|
|
757
|
+
.filter((row) => row && isHexHash(row.hash))
|
|
758
|
+
.map((row) => ({ hash: row.hash, count: Math.max(0, Number(row.count) || 0) }));
|
|
759
|
+
|
|
760
|
+
const payload = {
|
|
761
|
+
candidateId: typeof candidate.candidateId === 'string' && /^[0-9a-f]+$/.test(candidate.candidateId)
|
|
762
|
+
? candidate.candidateId
|
|
763
|
+
: telemetry.hashContext(String(candidate.candidateId || 'unknown')),
|
|
764
|
+
signalType: candidate.signalType === 'override' || candidate.signalType === 'regret'
|
|
765
|
+
? candidate.signalType
|
|
766
|
+
: 'unknown',
|
|
767
|
+
count: Math.max(0, Number(candidate.count) || 0),
|
|
768
|
+
byFeature,
|
|
769
|
+
topContextHashes,
|
|
770
|
+
};
|
|
771
|
+
|
|
772
|
+
ensureDir(queueDir(projectRoot));
|
|
773
|
+
const briefingPath = path.join(queueDir(projectRoot), `${id}.md`);
|
|
774
|
+
|
|
775
|
+
// Markdown body — pure counts + hashes. The frontmatter carries the deferred marker (AC-4).
|
|
776
|
+
const md = renderBriefingMarkdown(id, payload, deferred);
|
|
777
|
+
try {
|
|
778
|
+
fs.writeFileSync(briefingPath, md, 'utf8');
|
|
779
|
+
} catch (_e) {
|
|
780
|
+
return null;
|
|
781
|
+
}
|
|
782
|
+
|
|
783
|
+
return { id, briefingPath, payload };
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
/**
|
|
787
|
+
* Render the briefing markdown. Frontmatter + sections; the payload is the only source of content.
|
|
788
|
+
*
|
|
789
|
+
* @param {string} id
|
|
790
|
+
* @param {object} payload
|
|
791
|
+
* @param {boolean} deferred
|
|
792
|
+
* @returns {string}
|
|
793
|
+
*/
|
|
794
|
+
function renderBriefingMarkdown(id, payload, deferred) {
|
|
795
|
+
const lines = [];
|
|
796
|
+
lines.push('---');
|
|
797
|
+
lines.push(`id: ${id}`);
|
|
798
|
+
lines.push(`signalType: ${payload.signalType}`);
|
|
799
|
+
lines.push(`count: ${payload.count}`);
|
|
800
|
+
lines.push(`candidateId: ${payload.candidateId}`);
|
|
801
|
+
if (deferred) lines.push('deferred: budget');
|
|
802
|
+
lines.push('---');
|
|
803
|
+
lines.push('');
|
|
804
|
+
lines.push(`# Pattern Briefing ${id}`);
|
|
805
|
+
lines.push('');
|
|
806
|
+
lines.push('Counts + hashes only. No raw signals, no user text, no file paths. (F-071/AC-3)');
|
|
807
|
+
lines.push('');
|
|
808
|
+
lines.push('## Aggregate');
|
|
809
|
+
lines.push('');
|
|
810
|
+
lines.push(`- signalType: \`${payload.signalType}\``);
|
|
811
|
+
lines.push(`- count: ${payload.count}`);
|
|
812
|
+
lines.push(`- candidateId: \`${payload.candidateId}\``);
|
|
813
|
+
lines.push('');
|
|
814
|
+
lines.push('## By Feature');
|
|
815
|
+
lines.push('');
|
|
816
|
+
if (payload.byFeature.length === 0) {
|
|
817
|
+
lines.push('_(none)_');
|
|
818
|
+
} else {
|
|
819
|
+
for (const row of payload.byFeature) {
|
|
820
|
+
lines.push(`- \`${row.featureId == null ? '(unassigned)' : row.featureId}\` — ${row.count}`);
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
lines.push('');
|
|
824
|
+
lines.push('## Top Context Hashes');
|
|
825
|
+
lines.push('');
|
|
826
|
+
if (payload.topContextHashes.length === 0) {
|
|
827
|
+
lines.push('_(none)_');
|
|
828
|
+
} else {
|
|
829
|
+
for (const row of payload.topContextHashes) {
|
|
830
|
+
lines.push(`- \`${row.hash}\` — ${row.count}`);
|
|
831
|
+
}
|
|
832
|
+
}
|
|
833
|
+
lines.push('');
|
|
834
|
+
lines.push('## Task');
|
|
835
|
+
lines.push('');
|
|
836
|
+
lines.push('Choose ONE of L1 / L2 / L3 and write the result to');
|
|
837
|
+
lines.push(`\`.cap/learning/patterns/${id}.json\` matching the documented schema.`);
|
|
838
|
+
lines.push('');
|
|
839
|
+
return lines.join('\n');
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
// -----------------------------------------------------------------------------
|
|
843
|
+
// Pattern persistence — write/read P-NNN.json
|
|
844
|
+
// -----------------------------------------------------------------------------
|
|
845
|
+
|
|
846
|
+
// @cap-todo(ac:F-071/AC-5) Graceful degradation — when LLM stage cannot run, persist the heuristic
|
|
847
|
+
// L1 suggestion with degraded:true. Marked via markDegraded() helper.
|
|
848
|
+
// @cap-todo(ac:F-071/AC-6) PatternRecord schema persisted here: id, level, featureRef, source,
|
|
849
|
+
// degraded, confidence, suggestion, evidence.
|
|
850
|
+
/**
|
|
851
|
+
* Persist a PatternRecord to .cap/learning/patterns/P-NNN.json. Lazy-creates the directory.
|
|
852
|
+
*
|
|
853
|
+
* @param {string} projectRoot
|
|
854
|
+
* @param {PatternRecord} pattern
|
|
855
|
+
* @returns {boolean}
|
|
856
|
+
*/
|
|
857
|
+
function recordPatternSuggestion(projectRoot, pattern) {
|
|
858
|
+
if (typeof projectRoot !== 'string' || projectRoot.length === 0) return false;
|
|
859
|
+
if (!pattern || typeof pattern !== 'object') return false;
|
|
860
|
+
if (typeof pattern.id !== 'string' || !/^P-\d+$/.test(pattern.id)) return false;
|
|
861
|
+
|
|
862
|
+
ensureDir(patternsDir(projectRoot));
|
|
863
|
+
const fp = path.join(patternsDir(projectRoot), `${pattern.id}.json`);
|
|
864
|
+
try {
|
|
865
|
+
fs.writeFileSync(fp, JSON.stringify(pattern, null, 2) + '\n', 'utf8');
|
|
866
|
+
return true;
|
|
867
|
+
} catch (_e) {
|
|
868
|
+
return false;
|
|
869
|
+
}
|
|
870
|
+
}
|
|
871
|
+
|
|
872
|
+
/**
|
|
873
|
+
* Persist a heuristic-only PatternRecord (degraded path). Helper used by the orchestrator's
|
|
874
|
+
* AC-5 fallback when an outer agent doesn't process the briefing in this session.
|
|
875
|
+
*
|
|
876
|
+
* @cap-decision(F-071/D8) Clobber protection: if `patterns/<id>.json` already exists with
|
|
877
|
+
* `source !== 'heuristic'` (i.e. an LLM stage actually produced a pattern for this id), the
|
|
878
|
+
* degraded fallback MUST NOT overwrite it. Returns `{ written: false, reason: 'llm-pattern-exists' }`
|
|
879
|
+
* so the orchestrator knows to log instead of silently clobbering. Without this guard, a slow
|
|
880
|
+
* Stage-2 LLM result followed by a Step-5 fallback in the same session could silently lose the
|
|
881
|
+
* higher-quality LLM pattern. Foot-gun for F-072/F-073 wirers — closed pre-ship per Stage-2 review.
|
|
882
|
+
* @cap-risk(F-071/AC-5) Two heuristic-only runs over the same id WILL overwrite (latest-wins is the
|
|
883
|
+
* intended degraded contract). The guard only blocks heuristic-over-llm clobber, not heuristic-
|
|
884
|
+
* over-heuristic refresh.
|
|
885
|
+
*
|
|
886
|
+
* @param {string} projectRoot
|
|
887
|
+
* @param {string} id - 'P-NNN'
|
|
888
|
+
* @param {HeuristicCandidate} candidate
|
|
889
|
+
* @returns {boolean | { written: boolean, reason?: string, prior?: { source: string, level: string } }}
|
|
890
|
+
* - `true` when the degraded record was written (back-compat with prior boolean callers).
|
|
891
|
+
* - `false` when the candidate was nullish or the write itself failed.
|
|
892
|
+
* - `{ written: false, reason: 'llm-pattern-exists', prior }` when an LLM pattern was preserved.
|
|
893
|
+
*/
|
|
894
|
+
function markDegraded(projectRoot, id, candidate) {
|
|
895
|
+
if (!candidate) return false;
|
|
896
|
+
|
|
897
|
+
// Clobber-protection: read any existing pattern at this id and refuse to overwrite an LLM record.
|
|
898
|
+
try {
|
|
899
|
+
const existingPath = path.join(patternsDir(projectRoot), `${id}.json`);
|
|
900
|
+
if (fs.existsSync(existingPath)) {
|
|
901
|
+
const existing = JSON.parse(fs.readFileSync(existingPath, 'utf8'));
|
|
902
|
+
if (existing && existing.source && existing.source !== 'heuristic') {
|
|
903
|
+
return {
|
|
904
|
+
written: false,
|
|
905
|
+
reason: 'llm-pattern-exists',
|
|
906
|
+
prior: { source: existing.source, level: existing.level },
|
|
907
|
+
};
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
} catch (_e) {
|
|
911
|
+
// Read failure → fall through to write (latest-wins for malformed prior records).
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
/** @type {PatternRecord} */
|
|
915
|
+
const pattern = {
|
|
916
|
+
id,
|
|
917
|
+
createdAt: new Date().toISOString(),
|
|
918
|
+
level: 'L1',
|
|
919
|
+
featureRef: candidate.featureId || null,
|
|
920
|
+
source: 'heuristic',
|
|
921
|
+
degraded: true,
|
|
922
|
+
confidence: 0.5,
|
|
923
|
+
suggestion: candidate.suggestion,
|
|
924
|
+
evidence: {
|
|
925
|
+
candidateId: candidate.candidateId,
|
|
926
|
+
signalType: candidate.signalType,
|
|
927
|
+
count: candidate.count,
|
|
928
|
+
topContextHashes: candidate.topContextHashes || [],
|
|
929
|
+
},
|
|
930
|
+
};
|
|
931
|
+
return recordPatternSuggestion(projectRoot, pattern);
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
/**
|
|
935
|
+
* List all persisted PatternRecords. Reads `.cap/learning/patterns/P-*.json`. Tolerant to missing
|
|
936
|
+
* directory and malformed files — they're skipped.
|
|
937
|
+
*
|
|
938
|
+
* @param {string} projectRoot
|
|
939
|
+
* @returns {Array<PatternRecord>}
|
|
940
|
+
*/
|
|
941
|
+
function listPatterns(projectRoot) {
|
|
942
|
+
const dir = patternsDir(projectRoot);
|
|
943
|
+
if (!fs.existsSync(dir)) return [];
|
|
944
|
+
let entries;
|
|
945
|
+
try {
|
|
946
|
+
entries = fs.readdirSync(dir);
|
|
947
|
+
} catch (_e) {
|
|
948
|
+
return [];
|
|
949
|
+
}
|
|
950
|
+
const out = [];
|
|
951
|
+
for (const f of entries) {
|
|
952
|
+
if (!f.endsWith('.json')) continue;
|
|
953
|
+
if (!/^P-\d+\.json$/.test(f)) continue;
|
|
954
|
+
try {
|
|
955
|
+
const raw = fs.readFileSync(path.join(dir, f), 'utf8');
|
|
956
|
+
const parsed = JSON.parse(raw);
|
|
957
|
+
if (parsed && typeof parsed === 'object') out.push(parsed);
|
|
958
|
+
} catch (_e) {
|
|
959
|
+
// Skip — malformed pattern files must not crash listing.
|
|
960
|
+
}
|
|
961
|
+
}
|
|
962
|
+
// Sort by id ascending so consumers get a stable deterministic order.
|
|
963
|
+
out.sort((a, b) => {
|
|
964
|
+
const na = parsePatternId(a.id) || 0;
|
|
965
|
+
const nb = parsePatternId(b.id) || 0;
|
|
966
|
+
return na - nb;
|
|
967
|
+
});
|
|
968
|
+
return out;
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
// -----------------------------------------------------------------------------
|
|
972
|
+
// Budget gate — AC-4 / AC-7
|
|
973
|
+
// -----------------------------------------------------------------------------
|
|
974
|
+
|
|
975
|
+
// @cap-todo(ac:F-071/AC-4) Budget hard-limit: 3 LLM calls per session by default. Overflow lands in
|
|
976
|
+
// .cap/learning/queue/ with deferred:budget. Re-uses readBudget +
|
|
977
|
+
// getLlmUsage from cap-telemetry.cjs — single source of truth.
|
|
978
|
+
// @cap-todo(ac:F-071/AC-7) Budget override from .cap/learning/config.json#llmBudgetPerSession.
|
|
979
|
+
// Honoured automatically because we delegate to telemetry.readBudget().
|
|
980
|
+
// @cap-risk(F-071/AC-4) The budget gate is THE reason we can ship Stage 2. A regression that
|
|
981
|
+
// bypasses readBudget / getLlmUsage would burn through the user's wallet
|
|
982
|
+
// silently. Every promotion path in this module routes through this function.
|
|
983
|
+
/**
|
|
984
|
+
* Compute the remaining LLM-call budget for a session. Returns 0 when the session is at or over
|
|
985
|
+
* the budget cap.
|
|
986
|
+
*
|
|
987
|
+
* @param {string} projectRoot
|
|
988
|
+
* @param {string|null} sessionId
|
|
989
|
+
* @returns {{ budget: number, used: number, remaining: number, source: 'config'|'default' }}
|
|
990
|
+
*/
|
|
991
|
+
function getSessionBudgetState(projectRoot, sessionId) {
|
|
992
|
+
const { budget, source } = telemetry.readBudget(projectRoot);
|
|
993
|
+
let used = 0;
|
|
994
|
+
if (sessionId) {
|
|
995
|
+
try {
|
|
996
|
+
const calls = telemetry.getLlmUsage(projectRoot, { sessionId }) || [];
|
|
997
|
+
used = calls.length;
|
|
998
|
+
} catch (_e) {
|
|
999
|
+
used = 0;
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
const remaining = Math.max(0, budget - used);
|
|
1003
|
+
return { budget, used, remaining, source };
|
|
1004
|
+
}
|
|
1005
|
+
|
|
1006
|
+
// -----------------------------------------------------------------------------
|
|
1007
|
+
// Exports — keep this list minimal. F-072 / F-073 should consume only these.
|
|
1008
|
+
// -----------------------------------------------------------------------------
|
|
1009
|
+
|
|
1010
|
+
module.exports = {
|
|
1011
|
+
// constants — exported for tests
|
|
1012
|
+
CAP_DIR,
|
|
1013
|
+
LEARNING_DIR,
|
|
1014
|
+
CANDIDATES_DIR,
|
|
1015
|
+
PATTERNS_DIR,
|
|
1016
|
+
QUEUE_DIR,
|
|
1017
|
+
THRESHOLD_OVERRIDE_COUNT,
|
|
1018
|
+
THRESHOLD_REGRET_COUNT,
|
|
1019
|
+
TFIDF_TOP_K_PER_SESSION,
|
|
1020
|
+
// public API
|
|
1021
|
+
runHeuristicStage,
|
|
1022
|
+
checkThreshold,
|
|
1023
|
+
allocatePatternId,
|
|
1024
|
+
buildBriefing,
|
|
1025
|
+
recordPatternSuggestion,
|
|
1026
|
+
markDegraded,
|
|
1027
|
+
listPatterns,
|
|
1028
|
+
getSessionBudgetState,
|
|
1029
|
+
getEffectiveThreshold,
|
|
1030
|
+
// path helpers — exported for tests / consumers; kept private from public docs
|
|
1031
|
+
candidatesDir,
|
|
1032
|
+
patternsDir,
|
|
1033
|
+
queueDir,
|
|
1034
|
+
};
|