opencode-swarm-plugin 0.38.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env +2 -0
- package/.hive/eval-results.json +26 -0
- package/.hive/issues.jsonl +27 -0
- package/.hive/memories.jsonl +23 -1
- package/.opencode/eval-history.jsonl +12 -0
- package/CHANGELOG.md +182 -0
- package/README.md +29 -12
- package/bin/swarm.test.ts +881 -0
- package/bin/swarm.ts +686 -0
- package/dist/compaction-hook.d.ts +8 -1
- package/dist/compaction-hook.d.ts.map +1 -1
- package/dist/compaction-observability.d.ts +173 -0
- package/dist/compaction-observability.d.ts.map +1 -0
- package/dist/compaction-prompt-scoring.d.ts +124 -0
- package/dist/compaction-prompt-scoring.d.ts.map +1 -0
- package/dist/eval-capture.d.ts +174 -1
- package/dist/eval-capture.d.ts.map +1 -1
- package/dist/eval-gates.d.ts +84 -0
- package/dist/eval-gates.d.ts.map +1 -0
- package/dist/eval-history.d.ts +117 -0
- package/dist/eval-history.d.ts.map +1 -0
- package/dist/eval-learning.d.ts +216 -0
- package/dist/eval-learning.d.ts.map +1 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +80 -1
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +16098 -651
- package/dist/plugin.js +16012 -756
- package/dist/post-compaction-tracker.d.ts +133 -0
- package/dist/post-compaction-tracker.d.ts.map +1 -0
- package/dist/schemas/task.d.ts +3 -3
- package/dist/swarm-orchestrate.d.ts +23 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts +25 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm.d.ts +4 -0
- package/dist/swarm.d.ts.map +1 -1
- package/evals/README.md +702 -105
- package/evals/compaction-prompt.eval.ts +149 -0
- package/evals/coordinator-behavior.eval.ts +8 -8
- package/evals/fixtures/compaction-prompt-cases.ts +305 -0
- package/evals/lib/compaction-loader.test.ts +248 -0
- package/evals/lib/compaction-loader.ts +320 -0
- package/evals/lib/data-loader.test.ts +345 -0
- package/evals/lib/data-loader.ts +107 -6
- package/evals/scorers/compaction-prompt-scorers.ts +145 -0
- package/evals/scorers/compaction-scorers.ts +13 -13
- package/evals/scorers/coordinator-discipline.evalite-test.ts +166 -2
- package/evals/scorers/coordinator-discipline.ts +348 -15
- package/evals/scorers/index.test.ts +146 -0
- package/evals/scorers/index.ts +104 -0
- package/evals/swarm-decomposition.eval.ts +9 -2
- package/examples/commands/swarm.md +291 -21
- package/examples/plugin-wrapper-template.ts +117 -0
- package/package.json +7 -5
- package/scripts/migrate-unknown-sessions.ts +349 -0
- package/src/compaction-capture.integration.test.ts +257 -0
- package/src/compaction-hook.test.ts +42 -0
- package/src/compaction-hook.ts +315 -86
- package/src/compaction-observability.integration.test.ts +139 -0
- package/src/compaction-observability.test.ts +187 -0
- package/src/compaction-observability.ts +324 -0
- package/src/compaction-prompt-scorers.test.ts +299 -0
- package/src/compaction-prompt-scoring.ts +298 -0
- package/src/eval-capture.test.ts +626 -1
- package/src/eval-capture.ts +286 -2
- package/src/eval-gates.test.ts +306 -0
- package/src/eval-gates.ts +218 -0
- package/src/eval-history.test.ts +508 -0
- package/src/eval-history.ts +214 -0
- package/src/eval-learning.test.ts +378 -0
- package/src/eval-learning.ts +360 -0
- package/src/eval-runner.test.ts +96 -0
- package/src/eval-runner.ts +356 -0
- package/src/hive.ts +34 -0
- package/src/index.ts +115 -2
- package/src/memory.test.ts +110 -0
- package/src/memory.ts +34 -0
- package/src/post-compaction-tracker.test.ts +251 -0
- package/src/post-compaction-tracker.ts +237 -0
- package/src/swarm-decompose.ts +2 -2
- package/src/swarm-orchestrate.ts +2 -2
- package/src/swarm-prompts.ts +2 -2
- package/src/swarm-review.ts +3 -3
- package/dist/beads.d.ts +0 -386
- package/dist/beads.d.ts.map +0 -1
- package/dist/schemas/bead-events.d.ts +0 -698
- package/dist/schemas/bead-events.d.ts.map +0 -1
- package/dist/schemas/bead.d.ts +0 -255
- package/dist/schemas/bead.d.ts.map +0 -1
- /package/evals/{evalite.config.ts → evalite.config.ts.bak} +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Result from a gate check
|
|
3
|
+
*/
|
|
4
|
+
export interface GateResult {
|
|
5
|
+
/** Whether the gate passed */
|
|
6
|
+
passed: boolean;
|
|
7
|
+
/** Current phase */
|
|
8
|
+
phase: "bootstrap" | "stabilization" | "production";
|
|
9
|
+
/** Human-readable message */
|
|
10
|
+
message: string;
|
|
11
|
+
/** Baseline score (mean of history) */
|
|
12
|
+
baseline?: number;
|
|
13
|
+
/** Current score */
|
|
14
|
+
currentScore: number;
|
|
15
|
+
/** Regression percentage (negative = improvement) */
|
|
16
|
+
regressionPercent?: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Configuration for gate thresholds
|
|
20
|
+
*/
|
|
21
|
+
export interface GateConfig {
|
|
22
|
+
/** Regression threshold for stabilization phase (default: 0.1 = 10%) */
|
|
23
|
+
stabilizationThreshold?: number;
|
|
24
|
+
/** Regression threshold for production phase (default: 0.05 = 5%) */
|
|
25
|
+
productionThreshold?: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Default regression thresholds by phase
|
|
29
|
+
*/
|
|
30
|
+
export declare const DEFAULT_THRESHOLDS: {
|
|
31
|
+
readonly stabilization: 0.1;
|
|
32
|
+
readonly production: 0.05;
|
|
33
|
+
};
|
|
34
|
+
/**
|
|
35
|
+
* Check if the current eval score passes the quality gate
|
|
36
|
+
*
|
|
37
|
+
* Progressive gates adapt based on data maturity:
|
|
38
|
+
* - **Bootstrap (<10 runs)**: Always pass, focus on collecting baseline data
|
|
39
|
+
* - **Stabilization (10-50 runs)**: Warn on >10% regression (default), but pass
|
|
40
|
+
* - **Production (>50 runs + variance <0.1)**: Fail on >5% regression (default)
|
|
41
|
+
*
|
|
42
|
+
* **Baseline calculation**: Mean of all historical scores for this eval (not just last run).
|
|
43
|
+
*
|
|
44
|
+
* **Regression formula**: `(baseline - current) / baseline`
|
|
45
|
+
* - Positive = regression (score dropped)
|
|
46
|
+
* - Negative = improvement
|
|
47
|
+
* - Returns 0 if baseline is 0 (avoids division by zero)
|
|
48
|
+
*
|
|
49
|
+
* **Variance threshold (0.1)**: High variance keeps eval in stabilization phase even with >50 runs.
|
|
50
|
+
* This prevents premature production gates when scores are still unstable.
|
|
51
|
+
*
|
|
52
|
+
* **CI Integration**: Production gates can fail PRs. Use `swarm eval status` to check phase before merging.
|
|
53
|
+
*
|
|
54
|
+
* @param projectPath - Absolute path to project root (contains `.opencode/eval-history.jsonl`)
|
|
55
|
+
* @param evalName - Name of the eval (e.g., "swarm-decomposition", "coordinator-behavior")
|
|
56
|
+
* @param currentScore - Current score to check (typically 0-1 range)
|
|
57
|
+
* @param config - Optional threshold configuration (defaults: stabilization=0.1, production=0.05)
|
|
58
|
+
* @returns Gate check result with pass/fail, phase, baseline, regression details
|
|
59
|
+
*
|
|
60
|
+
* @example
|
|
61
|
+
* ```typescript
|
|
62
|
+
* import { checkGate } from "./eval-gates.js";
|
|
63
|
+
*
|
|
64
|
+
* const result = checkGate("/path/to/project", "swarm-decomposition", 0.89);
|
|
65
|
+
*
|
|
66
|
+
* if (!result.passed) {
|
|
67
|
+
* console.error(`❌ Gate FAILED: ${result.message}`);
|
|
68
|
+
* process.exit(1); // Fail CI
|
|
69
|
+
* }
|
|
70
|
+
*
|
|
71
|
+
* console.log(`✅ ${result.phase} phase: ${result.message}`);
|
|
72
|
+
* ```
|
|
73
|
+
*
|
|
74
|
+
* @example
|
|
75
|
+
* ```typescript
|
|
76
|
+
* // Custom thresholds for sensitive eval
|
|
77
|
+
* const result = checkGate("/path", "critical-eval", 0.92, {
|
|
78
|
+
* stabilizationThreshold: 0.05, // 5% threshold in stabilization
|
|
79
|
+
* productionThreshold: 0.02, // 2% threshold in production
|
|
80
|
+
* });
|
|
81
|
+
* ```
|
|
82
|
+
*/
|
|
83
|
+
export declare function checkGate(projectPath: string, evalName: string, currentScore: number, config?: GateConfig): GateResult;
|
|
84
|
+
//# sourceMappingURL=eval-gates.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-gates.d.ts","sourceRoot":"","sources":["../src/eval-gates.ts"],"names":[],"mappings":"AAYA;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,8BAA8B;IAC9B,MAAM,EAAE,OAAO,CAAC;IAChB,oBAAoB;IACpB,KAAK,EAAE,WAAW,GAAG,eAAe,GAAG,YAAY,CAAC;IACpD,6BAA6B;IAC7B,OAAO,EAAE,MAAM,CAAC;IAChB,uCAAuC;IACvC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,oBAAoB;IACpB,YAAY,EAAE,MAAM,CAAC;IACrB,qDAAqD;IACrD,iBAAiB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,UAAU;IAC1B,wEAAwE;IACxE,sBAAsB,CAAC,EAAE,MAAM,CAAC;IAChC,qEAAqE;IACrE,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAED;;GAEG;AACH,eAAO,MAAM,kBAAkB;;;CAGrB,CAAC;AAoCX;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,wBAAgB,SAAS,CACxB,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,EACpB,MAAM,CAAC,EAAE,UAAU,GACjB,UAAU,CAiFZ"}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Progressive phases based on run count and variance
|
|
3
|
+
*/
|
|
4
|
+
export type Phase = "bootstrap" | "stabilization" | "production";
|
|
5
|
+
/**
|
|
6
|
+
* Single eval run record
|
|
7
|
+
*/
|
|
8
|
+
export interface EvalRunRecord {
|
|
9
|
+
/** ISO-8601 timestamp */
|
|
10
|
+
timestamp: string;
|
|
11
|
+
/** Name of the eval (e.g., "swarm-decomposition") */
|
|
12
|
+
eval_name: string;
|
|
13
|
+
/** Score (0-1 range typically) */
|
|
14
|
+
score: number;
|
|
15
|
+
/** Run count (monotonically increasing per eval) */
|
|
16
|
+
run_count: number;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Default path for eval history
|
|
20
|
+
*/
|
|
21
|
+
export declare const DEFAULT_EVAL_HISTORY_PATH = ".opencode/eval-history.jsonl";
|
|
22
|
+
/**
|
|
23
|
+
* Variance threshold for production phase
|
|
24
|
+
*/
|
|
25
|
+
export declare const VARIANCE_THRESHOLD = 0.1;
|
|
26
|
+
/**
|
|
27
|
+
* Run count thresholds for phase transitions
|
|
28
|
+
*/
|
|
29
|
+
export declare const BOOTSTRAP_THRESHOLD = 10;
|
|
30
|
+
export declare const STABILIZATION_THRESHOLD = 50;
|
|
31
|
+
/**
|
|
32
|
+
* Get the eval history file path
|
|
33
|
+
*/
|
|
34
|
+
export declare function getEvalHistoryPath(projectPath: string): string;
|
|
35
|
+
/**
|
|
36
|
+
* Ensure the eval history directory exists
|
|
37
|
+
*/
|
|
38
|
+
export declare function ensureEvalHistoryDir(projectPath: string): void;
|
|
39
|
+
/**
|
|
40
|
+
* Record an eval run to JSONL history
|
|
41
|
+
*
|
|
42
|
+
* Appends atomically to `.opencode/eval-history.jsonl`. Each line is a complete JSON object
|
|
43
|
+
* representing one eval run (timestamp, eval name, score, run count).
|
|
44
|
+
*
|
|
45
|
+
* **Auto-creates directory** if `.opencode/` doesn't exist.
|
|
46
|
+
*
|
|
47
|
+
* **Thread-safe**: Uses `appendFileSync` for atomic writes (safe for concurrent eval runs).
|
|
48
|
+
*
|
|
49
|
+
* **Integration**: Called automatically by evalite runner after each eval completes.
|
|
50
|
+
* Also callable manually for custom eval tracking.
|
|
51
|
+
*
|
|
52
|
+
* @param projectPath - Absolute path to project root
|
|
53
|
+
* @param run - Eval run record with timestamp, eval_name, score, run_count
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```typescript
|
|
57
|
+
* import { recordEvalRun } from "./eval-history.js";
|
|
58
|
+
*
|
|
59
|
+
* recordEvalRun("/path/to/project", {
|
|
60
|
+
* timestamp: new Date().toISOString(),
|
|
61
|
+
* eval_name: "swarm-decomposition",
|
|
62
|
+
* score: 0.92,
|
|
63
|
+
* run_count: 15,
|
|
64
|
+
* });
|
|
65
|
+
* ```
|
|
66
|
+
*/
|
|
67
|
+
export declare function recordEvalRun(projectPath: string, run: EvalRunRecord): void;
|
|
68
|
+
/**
|
|
69
|
+
* Get score history for a specific eval
|
|
70
|
+
*
|
|
71
|
+
* Returns runs in chronological order (oldest first)
|
|
72
|
+
*/
|
|
73
|
+
export declare function getScoreHistory(projectPath: string, evalName: string): EvalRunRecord[];
|
|
74
|
+
/**
|
|
75
|
+
* Calculate statistical variance of scores
|
|
76
|
+
*
|
|
77
|
+
* Variance = mean of squared deviations from the mean
|
|
78
|
+
* Formula: Σ((x - μ)²) / n
|
|
79
|
+
*/
|
|
80
|
+
export declare function calculateVariance(scores: number[]): number;
|
|
81
|
+
/**
|
|
82
|
+
* Get the current phase for an eval based on run count and score variance
|
|
83
|
+
*
|
|
84
|
+
* Progressive phase logic ensures quality gates adapt to data maturity:
|
|
85
|
+
*
|
|
86
|
+
* - **Bootstrap (<10 runs)**: No gates, just collect baseline data
|
|
87
|
+
* - **Stabilization (10-50 runs)**: Warn on >10% regression (but pass)
|
|
88
|
+
* - **Production (>50 runs AND variance <0.1)**: Fail on >5% regression
|
|
89
|
+
*
|
|
90
|
+
* **Variance check**: If >50 runs but variance ≥0.1, stays in stabilization.
|
|
91
|
+
* This prevents premature production gates when scores are still unstable.
|
|
92
|
+
*
|
|
93
|
+
* **Why variance matters**: An eval with wildly fluctuating scores isn't ready for
|
|
94
|
+
* strict gates. Variance threshold (0.1) ensures the eval is consistent before
|
|
95
|
+
* enforcing production-level quality control.
|
|
96
|
+
*
|
|
97
|
+
* @param projectPath - Absolute path to project root (contains `.opencode/eval-history.jsonl`)
|
|
98
|
+
* @param evalName - Name of the eval (e.g., "swarm-decomposition")
|
|
99
|
+
* @returns Current phase: "bootstrap" | "stabilization" | "production"
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* ```typescript
|
|
103
|
+
* import { getPhase } from "./eval-history.js";
|
|
104
|
+
*
|
|
105
|
+
* const phase = getPhase("/path/to/project", "swarm-decomposition");
|
|
106
|
+
*
|
|
107
|
+
* if (phase === "production") {
|
|
108
|
+
* console.log("🚀 Production phase - strict gates enabled");
|
|
109
|
+
* } else if (phase === "stabilization") {
|
|
110
|
+
* console.log("⚙️ Stabilization phase - warnings only");
|
|
111
|
+
* } else {
|
|
112
|
+
* console.log("🌱 Bootstrap phase - collecting data");
|
|
113
|
+
* }
|
|
114
|
+
* ```
|
|
115
|
+
*/
|
|
116
|
+
export declare function getPhase(projectPath: string, evalName: string): Phase;
|
|
117
|
+
//# sourceMappingURL=eval-history.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-history.d.ts","sourceRoot":"","sources":["../src/eval-history.ts"],"names":[],"mappings":"AAaA;;GAEG;AACH,MAAM,MAAM,KAAK,GAAG,WAAW,GAAG,eAAe,GAAG,YAAY,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,yBAAyB;IACzB,SAAS,EAAE,MAAM,CAAC;IAClB,qDAAqD;IACrD,SAAS,EAAE,MAAM,CAAC;IAClB,kCAAkC;IAClC,KAAK,EAAE,MAAM,CAAC;IACd,oDAAoD;IACpD,SAAS,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,yBAAyB,iCAAiC,CAAC;AAExE;;GAEG;AACH,eAAO,MAAM,kBAAkB,MAAM,CAAC;AAEtC;;GAEG;AACH,eAAO,MAAM,mBAAmB,KAAK,CAAC;AACtC,eAAO,MAAM,uBAAuB,KAAK,CAAC;AAE1C;;GAEG;AACH,wBAAgB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,MAAM,CAE9D;AAED;;GAEG;AACH,wBAAgB,oBAAoB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAM9D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AACH,wBAAgB,aAAa,CAC3B,WAAW,EAAE,MAAM,EACnB,GAAG,EAAE,aAAa,GACjB,IAAI,CAKN;AAoBD;;;;GAIG;AACH,wBAAgB,eAAe,CAC7B,WAAW,EAAE,MAAM,EACnB,QAAQ,EAAE,MAAM,GACf,aAAa,EAAE,CAIjB;AAED;;;;;GAKG;AACH,wBAAgB,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,GAAG,MAAM,CAa1D;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkCG;AACH,wBAAgB,QAAQ,CAAC,WAAW,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,KAAK,CAqBrE"}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Eval-to-Learning Feedback Loop
|
|
3
|
+
*
|
|
4
|
+
* Automatically stores eval failures to semantic memory for learning.
|
|
5
|
+
* When eval scores drop significantly from rolling average (default >15%),
|
|
6
|
+
* stores context to semantic-memory with tags for future prompt generation.
|
|
7
|
+
*
|
|
8
|
+
* ## Usage
|
|
9
|
+
*
|
|
10
|
+
* ```typescript
|
|
11
|
+
* import { learnFromEvalFailure } from "./eval-learning";
|
|
12
|
+
* import { getMemoryAdapter } from "./memory-tools";
|
|
13
|
+
* import { getScoreHistory } from "./eval-history";
|
|
14
|
+
*
|
|
15
|
+
* const memoryAdapter = await getMemoryAdapter();
|
|
16
|
+
* const history = getScoreHistory(projectPath, "compaction-test");
|
|
17
|
+
*
|
|
18
|
+
* const result = await learnFromEvalFailure(
|
|
19
|
+
* "compaction-test",
|
|
20
|
+
* currentScore,
|
|
21
|
+
* history,
|
|
22
|
+
* memoryAdapter
|
|
23
|
+
* );
|
|
24
|
+
*
|
|
25
|
+
* if (result.triggered) {
|
|
26
|
+
* console.log(`📉 Regression detected: ${(result.drop_percentage * 100).toFixed(1)}% drop`);
|
|
27
|
+
* console.log(`Memory ID: ${result.memory_id}`);
|
|
28
|
+
* }
|
|
29
|
+
* ```
|
|
30
|
+
*
|
|
31
|
+
* ## Integration Points
|
|
32
|
+
*
|
|
33
|
+
* - **After each eval run**: Call to detect regressions automatically
|
|
34
|
+
* - **Memory tags**: `eval-failure`, `{eval-name}`, `regression`
|
|
35
|
+
* - **Future prompts**: Query memories with these tags for context
|
|
36
|
+
* - **Scorer context**: Optional detail about which scorer failed
|
|
37
|
+
*
|
|
38
|
+
* ## Customization
|
|
39
|
+
*
|
|
40
|
+
* ```typescript
|
|
41
|
+
* const customConfig = {
|
|
42
|
+
* dropThreshold: 0.10, // 10% threshold (more sensitive)
|
|
43
|
+
* windowSize: 10, // Last 10 runs for baseline
|
|
44
|
+
* };
|
|
45
|
+
*
|
|
46
|
+
* await learnFromEvalFailure(
|
|
47
|
+
* "test",
|
|
48
|
+
* score,
|
|
49
|
+
* history,
|
|
50
|
+
* adapter,
|
|
51
|
+
* { config: customConfig }
|
|
52
|
+
* );
|
|
53
|
+
* ```
|
|
54
|
+
*
|
|
55
|
+
* @module eval-learning
|
|
56
|
+
*/
|
|
57
|
+
import type { EvalRunRecord } from "./eval-history";
|
|
58
|
+
import type { MemoryAdapter } from "./memory-tools";
|
|
59
|
+
/**
|
|
60
|
+
* Configuration for eval-to-learning feedback
|
|
61
|
+
*/
|
|
62
|
+
export interface EvalLearningConfig {
|
|
63
|
+
/** Threshold for significant drop (0-1, default 0.15 = 15%) */
|
|
64
|
+
dropThreshold: number;
|
|
65
|
+
/** Rolling average window size (default 5 runs) */
|
|
66
|
+
windowSize: number;
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Default configuration
|
|
70
|
+
*/
|
|
71
|
+
export declare const DEFAULT_EVAL_LEARNING_CONFIG: EvalLearningConfig;
|
|
72
|
+
/**
|
|
73
|
+
* Result from learning check
|
|
74
|
+
*/
|
|
75
|
+
export interface LearningResult {
|
|
76
|
+
/** Whether the check triggered memory storage */
|
|
77
|
+
triggered: boolean;
|
|
78
|
+
/** Baseline score from rolling average */
|
|
79
|
+
baseline: number;
|
|
80
|
+
/** Current score */
|
|
81
|
+
current: number;
|
|
82
|
+
/** Drop percentage (0-1, e.g., 0.20 = 20% drop) */
|
|
83
|
+
drop_percentage: number;
|
|
84
|
+
/** Memory ID if stored */
|
|
85
|
+
memory_id?: string;
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Calculate rolling average of recent scores
|
|
89
|
+
*
|
|
90
|
+
* Uses last N runs (default 5) to establish baseline.
|
|
91
|
+
* If history shorter than window, uses all available.
|
|
92
|
+
*
|
|
93
|
+
* @param history - Score history (chronological order)
|
|
94
|
+
* @param windowSize - Number of recent runs to average (default 5)
|
|
95
|
+
* @returns Average score (0 if empty)
|
|
96
|
+
*/
|
|
97
|
+
export declare function calculateRollingAverage(history: EvalRunRecord[], windowSize?: number): number;
|
|
98
|
+
/**
|
|
99
|
+
* Check if current score is a significant drop from baseline
|
|
100
|
+
*
|
|
101
|
+
* Significant = drop exceeds threshold (default 15%).
|
|
102
|
+
* Formula: (baseline - current) / baseline >= threshold
|
|
103
|
+
*
|
|
104
|
+
* @param currentScore - Current eval score
|
|
105
|
+
* @param baseline - Baseline score (rolling average)
|
|
106
|
+
* @param threshold - Drop threshold (default 0.15 = 15%)
|
|
107
|
+
* @returns True if drop is significant
|
|
108
|
+
*/
|
|
109
|
+
export declare function isSignificantDrop(currentScore: number, baseline: number, threshold?: number): boolean;
|
|
110
|
+
/**
|
|
111
|
+
* Format failure context for semantic memory storage
|
|
112
|
+
*
|
|
113
|
+
* Creates human-readable description of the failure with
|
|
114
|
+
* quantified metrics and optional scorer context.
|
|
115
|
+
*
|
|
116
|
+
* @param evalName - Name of eval that failed
|
|
117
|
+
* @param currentScore - Current score
|
|
118
|
+
* @param baseline - Baseline score
|
|
119
|
+
* @param scorerContext - Optional context about which scorer failed
|
|
120
|
+
* @returns Formatted context string
|
|
121
|
+
*/
|
|
122
|
+
export declare function formatFailureContext(evalName: string, currentScore: number, baseline: number, scorerContext?: string): string;
|
|
123
|
+
/**
|
|
124
|
+
* Main learning function - automatically stores eval failures to semantic memory
|
|
125
|
+
*
|
|
126
|
+
* **Closed-loop learning**: When eval scores drop significantly from baseline,
|
|
127
|
+
* this function stores failure context to semantic memory. Future prompt generation
|
|
128
|
+
* queries these memories for context, preventing repeated mistakes.
|
|
129
|
+
*
|
|
130
|
+
* **Trigger condition**: Score drops >15% (default) from rolling average baseline.
|
|
131
|
+
* Uses last 5 runs (default) to establish baseline, not just previous run.
|
|
132
|
+
*
|
|
133
|
+
* **What gets stored**:
|
|
134
|
+
* - Eval name, baseline score, current score, drop percentage
|
|
135
|
+
* - Scorer-specific context (which scorer failed, why)
|
|
136
|
+
* - Timestamp and metadata for querying
|
|
137
|
+
* - Tags: `eval-failure`, `{eval-name}`, `regression`
|
|
138
|
+
*
|
|
139
|
+
* **Future use**: Before generating prompts for the same eval, query semantic memory
|
|
140
|
+
* with tags to inject learnings from past failures.
|
|
141
|
+
*
|
|
142
|
+
* **Integration points**:
|
|
143
|
+
* - After each eval run (in evalite runner or CI)
|
|
144
|
+
* - In `checkGate()` when regression detected
|
|
145
|
+
* - Manual calls for custom eval tracking
|
|
146
|
+
*
|
|
147
|
+
* @param evalName - Name of eval (e.g., "compaction-test", "coordinator-behavior")
|
|
148
|
+
* @param currentScore - Current eval score (typically 0-1 range)
|
|
149
|
+
* @param history - Score history in chronological order (oldest first)
|
|
150
|
+
* @param memoryAdapter - Semantic memory adapter (from `getMemoryAdapter()`)
|
|
151
|
+
* @param options - Optional config (thresholds, window size) and scorer context
|
|
152
|
+
* @param options.config - Custom thresholds (dropThreshold, windowSize)
|
|
153
|
+
* @param options.scorerContext - Details about which scorer failed (for context)
|
|
154
|
+
* @returns Learning result with trigger status, baseline, drop percentage, memory ID
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
* ```typescript
|
|
158
|
+
* import { learnFromEvalFailure } from "./eval-learning.js";
|
|
159
|
+
* import { getMemoryAdapter } from "./memory-tools.js";
|
|
160
|
+
* import { getScoreHistory } from "./eval-history.js";
|
|
161
|
+
*
|
|
162
|
+
* const memoryAdapter = await getMemoryAdapter();
|
|
163
|
+
* const history = getScoreHistory("/path/to/project", "coordinator-behavior");
|
|
164
|
+
*
|
|
165
|
+
* const result = await learnFromEvalFailure(
|
|
166
|
+
* "coordinator-behavior",
|
|
167
|
+
* 0.68, // Current score
|
|
168
|
+
* history,
|
|
169
|
+
* memoryAdapter,
|
|
170
|
+
* { scorerContext: "violationCount: 5 violations (coordinator edited files)" }
|
|
171
|
+
* );
|
|
172
|
+
*
|
|
173
|
+
* if (result.triggered) {
|
|
174
|
+
* console.log(`📉 Regression detected: ${(result.drop_percentage * 100).toFixed(1)}% drop`);
|
|
175
|
+
* console.log(`Stored to memory: ${result.memory_id}`);
|
|
176
|
+
* }
|
|
177
|
+
* ```
|
|
178
|
+
*
|
|
179
|
+
* @example
|
|
180
|
+
* ```typescript
|
|
181
|
+
* // Custom threshold (more sensitive)
|
|
182
|
+
* const result = await learnFromEvalFailure(
|
|
183
|
+
* "critical-eval",
|
|
184
|
+
* 0.85,
|
|
185
|
+
* history,
|
|
186
|
+
* memoryAdapter,
|
|
187
|
+
* {
|
|
188
|
+
* config: {
|
|
189
|
+
* dropThreshold: 0.10, // 10% threshold (default is 15%)
|
|
190
|
+
* windowSize: 10, // Last 10 runs for baseline (default is 5)
|
|
191
|
+
* },
|
|
192
|
+
* }
|
|
193
|
+
* );
|
|
194
|
+
* ```
|
|
195
|
+
*/
|
|
196
|
+
export declare function learnFromEvalFailure(evalName: string, currentScore: number, history: EvalRunRecord[], memoryAdapter: MemoryAdapter, options?: {
|
|
197
|
+
config?: EvalLearningConfig;
|
|
198
|
+
scorerContext?: string;
|
|
199
|
+
}): Promise<LearningResult>;
|
|
200
|
+
/**
|
|
201
|
+
* Create custom learning config with specific threshold
|
|
202
|
+
*
|
|
203
|
+
* Helper for common use case: custom drop threshold.
|
|
204
|
+
*
|
|
205
|
+
* @param dropThreshold - Drop threshold (0-1)
|
|
206
|
+
* @param windowSize - Optional window size (default 5)
|
|
207
|
+
* @returns Custom config
|
|
208
|
+
*
|
|
209
|
+
* @example
|
|
210
|
+
* ```typescript
|
|
211
|
+
* const config = createLearningConfig(0.10); // 10% threshold
|
|
212
|
+
* await learnFromEvalFailure("test", score, history, adapter, { config });
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
export declare function createLearningConfig(dropThreshold: number, windowSize?: number): EvalLearningConfig;
|
|
216
|
+
//# sourceMappingURL=eval-learning.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"eval-learning.d.ts","sourceRoot":"","sources":["../src/eval-learning.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuDG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,gBAAgB,CAAC;AAMpD;;GAEG;AACH,MAAM,WAAW,kBAAkB;IAClC,+DAA+D;IAC/D,aAAa,EAAE,MAAM,CAAC;IACtB,mDAAmD;IACnD,UAAU,EAAE,MAAM,CAAC;CACnB;AAED;;GAEG;AACH,eAAO,MAAM,4BAA4B,EAAE,kBAG1C,CAAC;AAMF;;GAEG;AACH,MAAM,WAAW,cAAc;IAC9B,iDAAiD;IACjD,SAAS,EAAE,OAAO,CAAC;IACnB,0CAA0C;IAC1C,QAAQ,EAAE,MAAM,CAAC;IACjB,oBAAoB;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,mDAAmD;IACnD,eAAe,EAAE,MAAM,CAAC;IACxB,0BAA0B;IAC1B,SAAS,CAAC,EAAE,MAAM,CAAC;CACnB;AAMD;;;;;;;;;GASG;AACH,wBAAgB,uBAAuB,CACtC,OAAO,EAAE,aAAa,EAAE,EACxB,UAAU,GAAE,MAAU,GACpB,MAAM,CAQR;AAED;;;;;;;;;;GAUG;AACH,wBAAgB,iBAAiB,CAChC,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAChB,SAAS,GAAE,MAAa,GACtB,OAAO,CAQT;AAED;;;;;;;;;;;GAWG;AACH,wBAAgB,oBAAoB,CACnC,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,EACpB,QAAQ,EAAE,MAAM,EAChB,aAAa,CAAC,EAAE,MAAM,GACpB,MAAM,CAsBR;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAwEG;AACH,wBAAsB,oBAAoB,CACzC,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,MAAM,EACpB,OAAO,EAAE,aAAa,EAAE,EACxB,aAAa,EAAE,aAAa,EAC5B,OAAO,CAAC,EAAE;IACT,MAAM,CAAC,EAAE,kBAAkB,CAAC;IAC5B,aAAa,CAAC,EAAE,MAAM,CAAC;CACvB,GACC,OAAO,CAAC,cAAc,CAAC,CAqDzB;AAMD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,oBAAoB,CACnC,aAAa,EAAE,MAAM,EACrB,UAAU,CAAC,EAAE,MAAM,GACjB,kBAAkB,CAKpB"}
|
package/dist/hive.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hive.d.ts","sourceRoot":"","sources":["../src/hive.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAKL,KAAK,WAAW,EAIjB,MAAM,YAAY,CAAC;AAepB;;;;;GAKG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAE/D;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,IAAI,MAAM,CAEhD;AAGD,eAAO,MAAM,wBAAwB,gCAA0B,CAAC;AAChE,eAAO,MAAM,wBAAwB,gCAA0B,CAAC;AAuChE;;GAEG;AACH,qBAAa,SAAU,SAAQ,KAAK;aAGhB,OAAO,EAAE,MAAM;aACf,QAAQ,CAAC,EAAE,MAAM;aACjB,MAAM,CAAC,EAAE,MAAM;gBAH/B,OAAO,EAAE,MAAM,EACC,OAAO,EAAE,MAAM,EACf,QAAQ,CAAC,EAAE,MAAM,YAAA,EACjB,MAAM,CAAC,EAAE,MAAM,YAAA;CAKlC;AAGD,eAAO,MAAM,SAAS,kBAAY,CAAC;AAEnC;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;aAG1B,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBADpC,OAAO,EAAE,MAAM,EACC,QAAQ,EAAE,CAAC,CAAC,QAAQ;CAKvC;AAGD,eAAO,MAAM,mBAAmB,4BAAsB,CAAC;AAMvD;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,kCAAkC;IAClC,MAAM,EAAE,OAAO,CAAC;IAChB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,sCAAsC;IACtC,QAAQ,EAAE,OAAO,CAAC;IAClB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;GASG;AACH,wBAAgB,yBAAyB,CAAC,WAAW,EAAE,MAAM,GAAG,oBAAoB,CAgBnF;AAED;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAyBtF;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAO7D;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAC,CAAC,CA6CxG;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IACtE,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC,CAmGD;AAoFD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAiB7E;AAGD,eAAO,MAAM,eAAe,uBAAiB,CAAC;AA+E9C;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;CA+CtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
1
|
+
{"version":3,"file":"hive.d.ts","sourceRoot":"","sources":["../src/hive.ts"],"names":[],"mappings":"AAgBA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,EAKL,KAAK,WAAW,EAIjB,MAAM,YAAY,CAAC;AAepB;;;;;GAKG;AACH,wBAAgB,uBAAuB,CAAC,SAAS,EAAE,MAAM,GAAG,IAAI,CAE/D;AAED;;;GAGG;AACH,wBAAgB,uBAAuB,IAAI,MAAM,CAEhD;AAGD,eAAO,MAAM,wBAAwB,gCAA0B,CAAC;AAChE,eAAO,MAAM,wBAAwB,gCAA0B,CAAC;AAuChE;;GAEG;AACH,qBAAa,SAAU,SAAQ,KAAK;aAGhB,OAAO,EAAE,MAAM;aACf,QAAQ,CAAC,EAAE,MAAM;aACjB,MAAM,CAAC,EAAE,MAAM;gBAH/B,OAAO,EAAE,MAAM,EACC,OAAO,EAAE,MAAM,EACf,QAAQ,CAAC,EAAE,MAAM,YAAA,EACjB,MAAM,CAAC,EAAE,MAAM,YAAA;CAKlC;AAGD,eAAO,MAAM,SAAS,kBAAY,CAAC;AAEnC;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,KAAK;aAG1B,QAAQ,EAAE,CAAC,CAAC,QAAQ;gBADpC,OAAO,EAAE,MAAM,EACC,QAAQ,EAAE,CAAC,CAAC,QAAQ;CAKvC;AAGD,eAAO,MAAM,mBAAmB,4BAAsB,CAAC;AAMvD;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,kCAAkC;IAClC,MAAM,EAAE,OAAO,CAAC;IAChB,4CAA4C;IAC5C,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,sCAAsC;IACtC,QAAQ,EAAE,OAAO,CAAC;IAClB,sCAAsC;IACtC,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;GASG;AACH,wBAAgB,yBAAyB,CAAC,WAAW,EAAE,MAAM,GAAG,oBAAoB,CAgBnF;AAED;;;;;;;;GAQG;AACH,wBAAsB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC,eAAe,CAAC,CAyBtF;AAED;;;;;;;GAOG;AACH,wBAAgB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,IAAI,CAO7D;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IAAC,MAAM,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,MAAM,CAAA;CAAC,CAAC,CA6CxG;AAED;;;;;;;;;;;GAWG;AACH,wBAAsB,mBAAmB,CAAC,WAAW,EAAE,MAAM,GAAG,OAAO,CAAC;IACtE,QAAQ,EAAE,MAAM,CAAC;IACjB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC,CAmGD;AAoFD;;;;;;GAMG;AACH,wBAAsB,cAAc,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAiB7E;AAGD,eAAO,MAAM,eAAe,uBAAiB,CAAC;AA+E9C;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;CA+CtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmM3B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;CAiDrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;CAiFtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;CA+CrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;CA8CrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;CAwBrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;;;CAmFrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,SAAS;;;;;;;;CAyLpB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,gBAAgB;;;;;;;;;;CA8C3B,CAAC;AAMH,eAAO,MAAM,SAAS;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAWrB,CAAC;AAkCF;;GAEG;AACH,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;;;;CAMvB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAM5B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;;;;;;;;;;;;;;;;;;CAMtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,YAAY;;;;;;;;;;;;;;;;;;;CAMvB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;;;CAMtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,WAAW;;;;;;;;CAMtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,WAAW;;;;CAMtB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;CAMrB,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,iBAAiB;;;;;;;;;;CAM5B,CAAC;AAEH;;GAEG;AACH,eAAO,MAAM,UAAU;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAUtB,CAAC"}
|
package/dist/index.d.ts
CHANGED
|
@@ -751,6 +751,8 @@ export declare const allTools: {
|
|
|
751
751
|
user_cancelled: "user_cancelled";
|
|
752
752
|
}>>;
|
|
753
753
|
failure_details: import("zod").ZodOptional<import("zod").ZodString>;
|
|
754
|
+
project_path: import("zod").ZodOptional<import("zod").ZodString>;
|
|
755
|
+
epic_id: import("zod").ZodOptional<import("zod").ZodString>;
|
|
754
756
|
};
|
|
755
757
|
execute(args: {
|
|
756
758
|
bead_id: string;
|
|
@@ -763,6 +765,8 @@ export declare const allTools: {
|
|
|
763
765
|
strategy?: "file-based" | "feature-based" | "risk-based" | "research-based" | undefined;
|
|
764
766
|
failure_mode?: "timeout" | "unknown" | "conflict" | "validation" | "tool_failure" | "context_overflow" | "dependency_blocked" | "user_cancelled" | undefined;
|
|
765
767
|
failure_details?: string | undefined;
|
|
768
|
+
project_path?: string | undefined;
|
|
769
|
+
epic_id?: string | undefined;
|
|
766
770
|
}, context: import("@opencode-ai/plugin").ToolContext): Promise<string>;
|
|
767
771
|
};
|
|
768
772
|
readonly swarm_research_phase: {
|
|
@@ -1619,6 +1623,8 @@ export { guardrailOutput, truncateWithBoundaries, createMetrics, DEFAULT_GUARDRA
|
|
|
1619
1623
|
* Includes:
|
|
1620
1624
|
* - SWARM_COMPACTION_CONTEXT - Prompt text for swarm state preservation
|
|
1621
1625
|
* - createCompactionHook - Factory function for the compaction hook
|
|
1626
|
+
* - scanSessionMessages - Scan session for swarm state
|
|
1627
|
+
* - ScannedSwarmState - Scanned state interface
|
|
1622
1628
|
*
|
|
1623
1629
|
* Usage:
|
|
1624
1630
|
* ```typescript
|
|
@@ -1629,7 +1635,40 @@ export { guardrailOutput, truncateWithBoundaries, createMetrics, DEFAULT_GUARDRA
|
|
|
1629
1635
|
* };
|
|
1630
1636
|
* ```
|
|
1631
1637
|
*/
|
|
1632
|
-
export { SWARM_COMPACTION_CONTEXT, createCompactionHook } from "./compaction-hook";
|
|
1638
|
+
export { SWARM_COMPACTION_CONTEXT, createCompactionHook, scanSessionMessages, type ScannedSwarmState, } from "./compaction-hook";
|
|
1639
|
+
/**
|
|
1640
|
+
* Re-export compaction-observability module
|
|
1641
|
+
*
|
|
1642
|
+
* Includes:
|
|
1643
|
+
* - CompactionPhase - Enum of compaction phases
|
|
1644
|
+
* - createMetricsCollector - Create a metrics collector
|
|
1645
|
+
* - recordPhaseStart, recordPhaseComplete - Phase timing
|
|
1646
|
+
* - recordPatternExtracted, recordPatternSkipped - Pattern tracking
|
|
1647
|
+
* - getMetricsSummary - Get metrics summary
|
|
1648
|
+
*
|
|
1649
|
+
* Types:
|
|
1650
|
+
* - CompactionMetrics - Mutable metrics collector
|
|
1651
|
+
* - CompactionMetricsSummary - Read-only summary snapshot
|
|
1652
|
+
*
|
|
1653
|
+
* Features:
|
|
1654
|
+
* - Phase timing breakdown (START, GATHER, DETECT, INJECT, COMPLETE)
|
|
1655
|
+
* - Pattern extraction tracking with reasons
|
|
1656
|
+
* - Success rate calculation
|
|
1657
|
+
* - Debug mode for verbose details
|
|
1658
|
+
* - JSON serializable for persistence
|
|
1659
|
+
*
|
|
1660
|
+
* Usage:
|
|
1661
|
+
* ```typescript
|
|
1662
|
+
* import { createMetricsCollector, CompactionPhase, recordPhaseStart } from "opencode-swarm-plugin";
|
|
1663
|
+
*
|
|
1664
|
+
* const metrics = createMetricsCollector({ session_id: "abc123" });
|
|
1665
|
+
* recordPhaseStart(metrics, CompactionPhase.DETECT);
|
|
1666
|
+
* // ... work ...
|
|
1667
|
+
* recordPhaseComplete(metrics, CompactionPhase.DETECT);
|
|
1668
|
+
* const summary = getMetricsSummary(metrics);
|
|
1669
|
+
* ```
|
|
1670
|
+
*/
|
|
1671
|
+
export { CompactionPhase, createMetricsCollector, recordPhaseStart, recordPhaseComplete, recordPatternExtracted, recordPatternSkipped, getMetricsSummary, type CompactionMetrics, type CompactionMetricsSummary, } from "./compaction-observability";
|
|
1633
1672
|
/**
|
|
1634
1673
|
* Re-export memory module
|
|
1635
1674
|
*
|
|
@@ -1645,6 +1684,46 @@ export { SWARM_COMPACTION_CONTEXT, createCompactionHook } from "./compaction-hoo
|
|
|
1645
1684
|
*/
|
|
1646
1685
|
export { memoryTools, createMemoryAdapter, resetMemoryCache, type MemoryAdapter, type StoreArgs, type FindArgs, type IdArgs, type ListArgs, type StoreResult, type FindResult, type StatsResult, type HealthResult, type OperationResult, } from "./memory-tools";
|
|
1647
1686
|
export type { Memory, SearchResult, SearchOptions } from "swarm-mail";
|
|
1687
|
+
/**
|
|
1688
|
+
* Re-export eval-history module
|
|
1689
|
+
*
|
|
1690
|
+
* Includes:
|
|
1691
|
+
* - recordEvalRun - Record eval run to JSONL history
|
|
1692
|
+
* - getScoreHistory - Get score history for a specific eval
|
|
1693
|
+
* - getPhase - Get current phase based on run count and variance
|
|
1694
|
+
* - calculateVariance - Calculate statistical variance of scores
|
|
1695
|
+
* - ensureEvalHistoryDir - Ensure history directory exists
|
|
1696
|
+
* - getEvalHistoryPath - Get path to eval history file
|
|
1697
|
+
*
|
|
1698
|
+
* Constants:
|
|
1699
|
+
* - DEFAULT_EVAL_HISTORY_PATH - Default path (.opencode/eval-history.jsonl)
|
|
1700
|
+
* - VARIANCE_THRESHOLD - Variance threshold for production phase (0.1)
|
|
1701
|
+
* - BOOTSTRAP_THRESHOLD - Run count for bootstrap phase (10)
|
|
1702
|
+
* - STABILIZATION_THRESHOLD - Run count for stabilization phase (50)
|
|
1703
|
+
*
|
|
1704
|
+
* Types:
|
|
1705
|
+
* - Phase - Progressive phases (bootstrap | stabilization | production)
|
|
1706
|
+
* - EvalRunRecord - Single eval run record
|
|
1707
|
+
*/
|
|
1708
|
+
export { recordEvalRun, getScoreHistory, getPhase, calculateVariance, ensureEvalHistoryDir, getEvalHistoryPath, DEFAULT_EVAL_HISTORY_PATH, VARIANCE_THRESHOLD, BOOTSTRAP_THRESHOLD, STABILIZATION_THRESHOLD, type Phase, type EvalRunRecord, } from "./eval-history";
|
|
1709
|
+
/**
|
|
1710
|
+
* Re-export eval-gates module
|
|
1711
|
+
*
|
|
1712
|
+
* Includes:
|
|
1713
|
+
* - checkGate - Check if current score passes quality gate
|
|
1714
|
+
* - DEFAULT_THRESHOLDS - Default regression thresholds by phase
|
|
1715
|
+
*
|
|
1716
|
+
* Types:
|
|
1717
|
+
* - GateResult - Result from gate check
|
|
1718
|
+
* - GateConfig - Configuration for gate thresholds
|
|
1719
|
+
*
|
|
1720
|
+
* Features:
|
|
1721
|
+
* - Phase-based regression thresholds (Bootstrap: none, Stabilization: 10%, Production: 5%)
|
|
1722
|
+
* - Configurable thresholds via GateConfig
|
|
1723
|
+
* - Clear pass/fail messages with baseline comparison
|
|
1724
|
+
* - Handles edge cases (division by zero, no history)
|
|
1725
|
+
*/
|
|
1726
|
+
export { checkGate, DEFAULT_THRESHOLDS, type GateResult, type GateConfig, } from "./eval-gates";
|
|
1648
1727
|
/**
|
|
1649
1728
|
* Re-export logger infrastructure
|
|
1650
1729
|
*
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,OAAO,KAAK,EAAE,MAAM,EAAsB,MAAM,qBAAqB,CAAC;AA6CtE;;;;;;;;;;;;;;;;;GAiBG;AACH,QAAA,MAAM,WAAW,EAAE,MA0QlB,CAAC;AAEF;;;;;;;GAOG;AACH,eAAe,WAAW,CAAC;AAM3B;;GAEG;AACH,cAAc,WAAW,CAAC;AAE1B;;;;;;;;;;;GAWG;AACH,cAAc,QAAQ,CAAC;AAEvB;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,cAAc,EACd,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,oBAAoB,EACpB,4BAA4B,EAC5B,4BAA4B,EAC5B,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,iBAAiB,EACjB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;GAKG;AACH,OAAO,EAAE,KAAK,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD;;;;;;GAMG;AACH,OAAO,EACL,eAAe,EACf,mBAAmB,EACnB,eAAe,EACf,eAAe,GAChB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,UAAU,EACV,UAAU,EACV,kBAAkB,EAClB,mBAAmB,EACnB,qBAAqB,EACrB,sBAAsB,EACtB,iBAAiB,EAEjB,UAAU,EACV,cAAc,EACd,wBAAwB,EACxB,KAAK,qBAAqB,EAC1B,KAAK,kBAAkB,GACxB,MAAM,SAAS,CAAC;AAMjB;;;;;;;GAOG;AACH,eAAO,MAAM,QAAQ
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,OAAO,KAAK,EAAE,MAAM,EAAsB,MAAM,qBAAqB,CAAC;AA6CtE;;;;;;;;;;;;;;;;;GAiBG;AACH,QAAA,MAAM,WAAW,EAAE,MA0QlB,CAAC;AAEF;;;;;;;GAOG;AACH,eAAe,WAAW,CAAC;AAM3B;;GAEG;AACH,cAAc,WAAW,CAAC;AAE1B;;;;;;;;;;;GAWG;AACH,cAAc,QAAQ,CAAC;AAEvB;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,cAAc,EACd,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,oBAAoB,EACpB,4BAA4B,EAC5B,4BAA4B,EAC5B,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,iBAAiB,EACjB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;GAKG;AACH,OAAO,EAAE,KAAK,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD;;;;;;GAMG;AACH,OAAO,EACL,eAAe,EACf,mBAAmB,EACnB,eAAe,EACf,eAAe,GAChB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,UAAU,EACV,UAAU,EACV,kBAAkB,EAClB,mBAAmB,EACnB,qBAAqB,EACrB,sBAAsB,EACtB,iBAAiB,EAEjB,UAAU,EACV,cAAc,EACd,wBAAwB,EACxB,KAAK,qBAAqB,EAC1B,KAAK,kBAAkB,GACxB,MAAM,SAAS,CAAC;AAMjB;;;;;;;GAOG;AACH,eAAO,MAAM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAWX,CAAC;AAEX;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,QAAQ,CAAC;AAEhD;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,aAAa,EACb,yBAAyB,EACzB,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EACf,qBAAqB,EACrB,yBAAyB,EACzB,sBAAsB,EACtB,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,kBAAkB,GACxB,MAAM,WAAW,CAAC;AAEnB;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,SAAS,EACT,eAAe,EACf,aAAa,EACb,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,EACf,eAAe,EACf,WAAW,EACX,sBAAsB,EACtB,cAAc,EACd,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9D;;;;;;;;;;;;;;GAcG;AACH,OAAO,EACL,WAAW,EACX,cAAc,EACd,QAAQ,EACR,UAAU,EACV,gBAAgB,EAChB,yBAAyB,EACzB,qBAAqB,EACrB,wBAAwB,EACxB,kBAAkB,EAClB,KAAK,KAAK,EACV,KAAK,aAAa,EAClB,KAAK,QAAQ,GACd,MAAM,UAAU,CAAC;AAElB;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAExD;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,iBAAiB,EACjB,mBAAmB,EACnB,mBAAmB,EACnB,wBAAwB,EACxB,sBAAsB,EACtB,4BAA4B,EAC5B,8BAA8B,EAC9B,KAAK,cAAc,EACnB,KAAK,oBAAoB,EACzB,KAAK,qBAAqB,EAC1B,KAAK,yBAAyB,GAC/B,MAAM,mBAAmB,CAAC;AAE3B;;;;;;;;;;;GAWG;AACH,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,qBAAqB,EACrB,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,EACjB,KAAK,eAAe,GACrB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,aAAa,EACb,wBAAwB,EACxB,KAAK,eAAe,EACpB,KAAK,eAAe,EACpB,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;GAiBG;AACH,OAAO,EACL,wBAAwB,EACxB,oBAAoB,EACpB,mBAAmB,EACnB,KAAK,iBAAiB,GACvB,MAAM,mBAAmB,CAAC;AAE3B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,iBAAiB,EACjB,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,GAC9B,MAAM,4BAA4B,CAAC;AAEpC;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,WAAW,EACX,mBAAmB,EACnB,gBAAgB,EAChB,KAAK,aAAa,EAClB,KAAK,SAAS,EACd,KAAK,QAAQ,EACb,KAAK,MAAM,EACX,KAAK,QAAQ,EACb,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,gBAAgB,CAAC;AACxB,YAAY,EAAE,MAAM,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEtE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,EACR,iBAAiB,EACjB,oBAAoB,EACpB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,mBAAmB,EACnB,uBAAuB,EACvB,KAAK,KAAK,EACV,KAAK,aAAa,GACnB,MAAM,gBAAgB,CAAC;AAExB;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,SAAS,EACT,kBAAkB,EAClB,KAAK,UAAU,EACf,KAAK,UAAU,GAChB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAEhE;;;;;;;;;;;GAWG;AACH,OAAO,EACL,gBAAgB,EAChB,oBAAoB,EACpB,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,WAAW,GACjB,MAAM,kBAAkB,CAAC"}
|