opencode-swarm-plugin 0.43.0 → 0.44.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cass.characterization.test.ts +422 -0
- package/bin/swarm.serve.test.ts +6 -4
- package/bin/swarm.test.ts +68 -0
- package/bin/swarm.ts +81 -8
- package/dist/compaction-prompt-scoring.js +139 -0
- package/dist/contributor-tools.d.ts +42 -0
- package/dist/contributor-tools.d.ts.map +1 -0
- package/dist/eval-capture.js +12811 -0
- package/dist/hive.d.ts.map +1 -1
- package/dist/index.d.ts +12 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +7728 -62590
- package/dist/plugin.js +23833 -78695
- package/dist/sessions/agent-discovery.d.ts +59 -0
- package/dist/sessions/agent-discovery.d.ts.map +1 -0
- package/dist/sessions/index.d.ts +10 -0
- package/dist/sessions/index.d.ts.map +1 -0
- package/dist/swarm-orchestrate.d.ts.map +1 -1
- package/dist/swarm-prompts.d.ts.map +1 -1
- package/dist/swarm-review.d.ts.map +1 -1
- package/package.json +17 -5
- package/.changeset/swarm-insights-data-layer.md +0 -63
- package/.hive/analysis/eval-failure-analysis-2025-12-25.md +0 -331
- package/.hive/analysis/session-data-quality-audit.md +0 -320
- package/.hive/eval-results.json +0 -483
- package/.hive/issues.jsonl +0 -138
- package/.hive/memories.jsonl +0 -729
- package/.opencode/eval-history.jsonl +0 -327
- package/.turbo/turbo-build.log +0 -9
- package/CHANGELOG.md +0 -2255
- package/SCORER-ANALYSIS.md +0 -598
- package/docs/analysis/subagent-coordination-patterns.md +0 -902
- package/docs/analysis-socratic-planner-pattern.md +0 -504
- package/docs/planning/ADR-001-monorepo-structure.md +0 -171
- package/docs/planning/ADR-002-package-extraction.md +0 -393
- package/docs/planning/ADR-003-performance-improvements.md +0 -451
- package/docs/planning/ADR-004-message-queue-features.md +0 -187
- package/docs/planning/ADR-005-devtools-observability.md +0 -202
- package/docs/planning/ADR-007-swarm-enhancements-worktree-review.md +0 -168
- package/docs/planning/ADR-008-worker-handoff-protocol.md +0 -293
- package/docs/planning/ADR-009-oh-my-opencode-patterns.md +0 -353
- package/docs/planning/ROADMAP.md +0 -368
- package/docs/semantic-memory-cli-syntax.md +0 -123
- package/docs/swarm-mail-architecture.md +0 -1147
- package/docs/testing/context-recovery-test.md +0 -470
- package/evals/ARCHITECTURE.md +0 -1189
- package/evals/README.md +0 -768
- package/evals/compaction-prompt.eval.ts +0 -149
- package/evals/compaction-resumption.eval.ts +0 -289
- package/evals/coordinator-behavior.eval.ts +0 -307
- package/evals/coordinator-session.eval.ts +0 -154
- package/evals/evalite.config.ts.bak +0 -15
- package/evals/example.eval.ts +0 -31
- package/evals/fixtures/compaction-cases.ts +0 -350
- package/evals/fixtures/compaction-prompt-cases.ts +0 -311
- package/evals/fixtures/coordinator-sessions.ts +0 -328
- package/evals/fixtures/decomposition-cases.ts +0 -105
- package/evals/lib/compaction-loader.test.ts +0 -248
- package/evals/lib/compaction-loader.ts +0 -320
- package/evals/lib/data-loader.evalite-test.ts +0 -289
- package/evals/lib/data-loader.test.ts +0 -345
- package/evals/lib/data-loader.ts +0 -281
- package/evals/lib/llm.ts +0 -115
- package/evals/scorers/compaction-prompt-scorers.ts +0 -145
- package/evals/scorers/compaction-scorers.ts +0 -305
- package/evals/scorers/coordinator-discipline.evalite-test.ts +0 -539
- package/evals/scorers/coordinator-discipline.ts +0 -325
- package/evals/scorers/index.test.ts +0 -146
- package/evals/scorers/index.ts +0 -328
- package/evals/scorers/outcome-scorers.evalite-test.ts +0 -27
- package/evals/scorers/outcome-scorers.ts +0 -349
- package/evals/swarm-decomposition.eval.ts +0 -121
- package/examples/commands/swarm.md +0 -745
- package/examples/plugin-wrapper-template.ts +0 -2426
- package/examples/skills/hive-workflow/SKILL.md +0 -212
- package/examples/skills/skill-creator/SKILL.md +0 -223
- package/examples/skills/swarm-coordination/SKILL.md +0 -292
- package/global-skills/cli-builder/SKILL.md +0 -344
- package/global-skills/cli-builder/references/advanced-patterns.md +0 -244
- package/global-skills/learning-systems/SKILL.md +0 -644
- package/global-skills/skill-creator/LICENSE.txt +0 -202
- package/global-skills/skill-creator/SKILL.md +0 -352
- package/global-skills/skill-creator/references/output-patterns.md +0 -82
- package/global-skills/skill-creator/references/workflows.md +0 -28
- package/global-skills/swarm-coordination/SKILL.md +0 -995
- package/global-skills/swarm-coordination/references/coordinator-patterns.md +0 -235
- package/global-skills/swarm-coordination/references/strategies.md +0 -138
- package/global-skills/system-design/SKILL.md +0 -213
- package/global-skills/testing-patterns/SKILL.md +0 -430
- package/global-skills/testing-patterns/references/dependency-breaking-catalog.md +0 -586
- package/opencode-swarm-plugin-0.30.7.tgz +0 -0
- package/opencode-swarm-plugin-0.31.0.tgz +0 -0
- package/scripts/cleanup-test-memories.ts +0 -346
- package/scripts/init-skill.ts +0 -222
- package/scripts/migrate-unknown-sessions.ts +0 -349
- package/scripts/validate-skill.ts +0 -204
- package/src/agent-mail.ts +0 -1724
- package/src/anti-patterns.test.ts +0 -1167
- package/src/anti-patterns.ts +0 -448
- package/src/compaction-capture.integration.test.ts +0 -257
- package/src/compaction-hook.test.ts +0 -838
- package/src/compaction-hook.ts +0 -1204
- package/src/compaction-observability.integration.test.ts +0 -139
- package/src/compaction-observability.test.ts +0 -187
- package/src/compaction-observability.ts +0 -324
- package/src/compaction-prompt-scorers.test.ts +0 -475
- package/src/compaction-prompt-scoring.ts +0 -300
- package/src/dashboard.test.ts +0 -611
- package/src/dashboard.ts +0 -462
- package/src/error-enrichment.test.ts +0 -403
- package/src/error-enrichment.ts +0 -219
- package/src/eval-capture.test.ts +0 -1015
- package/src/eval-capture.ts +0 -929
- package/src/eval-gates.test.ts +0 -306
- package/src/eval-gates.ts +0 -218
- package/src/eval-history.test.ts +0 -508
- package/src/eval-history.ts +0 -214
- package/src/eval-learning.test.ts +0 -378
- package/src/eval-learning.ts +0 -360
- package/src/eval-runner.test.ts +0 -223
- package/src/eval-runner.ts +0 -402
- package/src/export-tools.test.ts +0 -476
- package/src/export-tools.ts +0 -257
- package/src/hive.integration.test.ts +0 -2241
- package/src/hive.ts +0 -1628
- package/src/index.ts +0 -935
- package/src/learning.integration.test.ts +0 -1815
- package/src/learning.ts +0 -1079
- package/src/logger.test.ts +0 -189
- package/src/logger.ts +0 -135
- package/src/mandate-promotion.test.ts +0 -473
- package/src/mandate-promotion.ts +0 -239
- package/src/mandate-storage.integration.test.ts +0 -601
- package/src/mandate-storage.test.ts +0 -578
- package/src/mandate-storage.ts +0 -794
- package/src/mandates.ts +0 -540
- package/src/memory-tools.test.ts +0 -195
- package/src/memory-tools.ts +0 -344
- package/src/memory.integration.test.ts +0 -334
- package/src/memory.test.ts +0 -158
- package/src/memory.ts +0 -527
- package/src/model-selection.test.ts +0 -188
- package/src/model-selection.ts +0 -68
- package/src/observability-tools.test.ts +0 -359
- package/src/observability-tools.ts +0 -871
- package/src/output-guardrails.test.ts +0 -438
- package/src/output-guardrails.ts +0 -381
- package/src/pattern-maturity.test.ts +0 -1160
- package/src/pattern-maturity.ts +0 -525
- package/src/planning-guardrails.test.ts +0 -491
- package/src/planning-guardrails.ts +0 -438
- package/src/plugin.ts +0 -23
- package/src/post-compaction-tracker.test.ts +0 -251
- package/src/post-compaction-tracker.ts +0 -237
- package/src/query-tools.test.ts +0 -636
- package/src/query-tools.ts +0 -324
- package/src/rate-limiter.integration.test.ts +0 -466
- package/src/rate-limiter.ts +0 -774
- package/src/replay-tools.test.ts +0 -496
- package/src/replay-tools.ts +0 -240
- package/src/repo-crawl.integration.test.ts +0 -441
- package/src/repo-crawl.ts +0 -610
- package/src/schemas/cell-events.test.ts +0 -347
- package/src/schemas/cell-events.ts +0 -807
- package/src/schemas/cell.ts +0 -257
- package/src/schemas/evaluation.ts +0 -166
- package/src/schemas/index.test.ts +0 -199
- package/src/schemas/index.ts +0 -286
- package/src/schemas/mandate.ts +0 -232
- package/src/schemas/swarm-context.ts +0 -115
- package/src/schemas/task.ts +0 -161
- package/src/schemas/worker-handoff.test.ts +0 -302
- package/src/schemas/worker-handoff.ts +0 -131
- package/src/skills.integration.test.ts +0 -1192
- package/src/skills.test.ts +0 -643
- package/src/skills.ts +0 -1549
- package/src/storage.integration.test.ts +0 -341
- package/src/storage.ts +0 -884
- package/src/structured.integration.test.ts +0 -817
- package/src/structured.test.ts +0 -1046
- package/src/structured.ts +0 -762
- package/src/swarm-decompose.test.ts +0 -188
- package/src/swarm-decompose.ts +0 -1302
- package/src/swarm-deferred.integration.test.ts +0 -157
- package/src/swarm-deferred.test.ts +0 -38
- package/src/swarm-insights.test.ts +0 -214
- package/src/swarm-insights.ts +0 -459
- package/src/swarm-mail.integration.test.ts +0 -970
- package/src/swarm-mail.ts +0 -739
- package/src/swarm-orchestrate.integration.test.ts +0 -282
- package/src/swarm-orchestrate.test.ts +0 -548
- package/src/swarm-orchestrate.ts +0 -3084
- package/src/swarm-prompts.test.ts +0 -1270
- package/src/swarm-prompts.ts +0 -2077
- package/src/swarm-research.integration.test.ts +0 -701
- package/src/swarm-research.test.ts +0 -698
- package/src/swarm-research.ts +0 -472
- package/src/swarm-review.integration.test.ts +0 -285
- package/src/swarm-review.test.ts +0 -879
- package/src/swarm-review.ts +0 -709
- package/src/swarm-strategies.ts +0 -407
- package/src/swarm-worktree.test.ts +0 -501
- package/src/swarm-worktree.ts +0 -575
- package/src/swarm.integration.test.ts +0 -2377
- package/src/swarm.ts +0 -38
- package/src/tool-adapter.integration.test.ts +0 -1221
- package/src/tool-availability.ts +0 -461
- package/tsconfig.json +0 -28
package/src/learning.ts
DELETED
|
@@ -1,1079 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Learning Module - Confidence decay, feedback scoring, and outcome tracking
|
|
3
|
-
*
|
|
4
|
-
* Implements patterns from cass-memory for learning from swarm outcomes:
|
|
5
|
-
* - Confidence decay: evaluation criteria weights fade unless revalidated
|
|
6
|
-
* - Feedback events: track helpful/harmful signals from task outcomes
|
|
7
|
-
* - Outcome scoring: implicit feedback from duration, errors, retries
|
|
8
|
-
*
|
|
9
|
-
* @see https://github.com/Dicklesworthstone/cass_memory_system/blob/main/src/scoring.ts
|
|
10
|
-
* @see https://github.com/Dicklesworthstone/cass_memory_system/blob/main/src/outcome.ts
|
|
11
|
-
*/
|
|
12
|
-
import { z } from "zod";
|
|
13
|
-
|
|
14
|
-
// ============================================================================
|
|
15
|
-
// Schemas
|
|
16
|
-
// ============================================================================
|
|
17
|
-
|
|
18
|
-
/**
|
|
19
|
-
* Feedback event types
|
|
20
|
-
*/
|
|
21
|
-
export const FeedbackTypeSchema = z.enum(["helpful", "harmful", "neutral"]);
|
|
22
|
-
export type FeedbackType = z.infer<typeof FeedbackTypeSchema>;
|
|
23
|
-
|
|
24
|
-
/**
|
|
25
|
-
* A feedback event records whether a criterion evaluation was accurate
|
|
26
|
-
*
|
|
27
|
-
* When an evaluation criterion (e.g., "type_safe") is later proven correct
|
|
28
|
-
* or incorrect, we record that as feedback to adjust future weights.
|
|
29
|
-
*/
|
|
30
|
-
export const FeedbackEventSchema = z.object({
|
|
31
|
-
/** Unique ID for this feedback event */
|
|
32
|
-
id: z.string(),
|
|
33
|
-
/** The criterion this feedback applies to */
|
|
34
|
-
criterion: z.string(),
|
|
35
|
-
/** Whether this feedback indicates the criterion was helpful or harmful */
|
|
36
|
-
type: FeedbackTypeSchema,
|
|
37
|
-
/** When this feedback was recorded */
|
|
38
|
-
timestamp: z.string(), // ISO-8601
|
|
39
|
-
/** Context about why this feedback was given */
|
|
40
|
-
context: z.string().optional(),
|
|
41
|
-
/** The bead ID this feedback relates to */
|
|
42
|
-
bead_id: z.string().optional(),
|
|
43
|
-
/** Raw value before decay (1.0 = full weight) */
|
|
44
|
-
raw_value: z.number().min(0).max(1).default(1),
|
|
45
|
-
});
|
|
46
|
-
export type FeedbackEvent = z.infer<typeof FeedbackEventSchema>;
|
|
47
|
-
|
|
48
|
-
/**
|
|
49
|
-
* Criterion weight with decay tracking
|
|
50
|
-
*/
|
|
51
|
-
export const CriterionWeightSchema = z.object({
|
|
52
|
-
/** The criterion name (e.g., "type_safe") */
|
|
53
|
-
criterion: z.string(),
|
|
54
|
-
/** Current weight after decay (0-1) */
|
|
55
|
-
weight: z.number().min(0).max(1),
|
|
56
|
-
/** Number of helpful feedback events */
|
|
57
|
-
helpful_count: z.number().int().min(0),
|
|
58
|
-
/** Number of harmful feedback events */
|
|
59
|
-
harmful_count: z.number().int().min(0),
|
|
60
|
-
/** Last time this criterion was validated */
|
|
61
|
-
last_validated: z.string().optional(), // ISO-8601
|
|
62
|
-
/** Decay half-life in days */
|
|
63
|
-
half_life_days: z.number().positive().default(90),
|
|
64
|
-
});
|
|
65
|
-
export type CriterionWeight = z.infer<typeof CriterionWeightSchema>;
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Error types that can occur during subtask execution
|
|
69
|
-
*/
|
|
70
|
-
export const ErrorTypeSchema = z.enum([
|
|
71
|
-
"validation",
|
|
72
|
-
"timeout",
|
|
73
|
-
"conflict",
|
|
74
|
-
"tool_failure",
|
|
75
|
-
"unknown",
|
|
76
|
-
]);
|
|
77
|
-
export type ErrorType = z.infer<typeof ErrorTypeSchema>;
|
|
78
|
-
|
|
79
|
-
/**
|
|
80
|
-
* An error entry in the error accumulator
|
|
81
|
-
*
|
|
82
|
-
* Errors are accumulated during subtask execution and can be fed
|
|
83
|
-
* into retry prompts to help agents learn from past failures.
|
|
84
|
-
*/
|
|
85
|
-
export const ErrorEntrySchema = z.object({
|
|
86
|
-
/** Unique ID for this error entry */
|
|
87
|
-
id: z.string(),
|
|
88
|
-
/** The bead ID this error relates to */
|
|
89
|
-
bead_id: z.string(),
|
|
90
|
-
/** Type of error encountered */
|
|
91
|
-
error_type: ErrorTypeSchema,
|
|
92
|
-
/** Human-readable error message */
|
|
93
|
-
message: z.string(),
|
|
94
|
-
/** Optional stack trace for debugging */
|
|
95
|
-
stack_trace: z.string().optional(),
|
|
96
|
-
/** Tool that failed, if applicable */
|
|
97
|
-
tool_name: z.string().optional(),
|
|
98
|
-
/** When this error occurred */
|
|
99
|
-
timestamp: z.string(), // ISO-8601
|
|
100
|
-
/** Whether this error was resolved */
|
|
101
|
-
resolved: z.boolean().default(false),
|
|
102
|
-
/** Context about what was happening when error occurred */
|
|
103
|
-
context: z.string().optional(),
|
|
104
|
-
});
|
|
105
|
-
export type ErrorEntry = z.infer<typeof ErrorEntrySchema>;
|
|
106
|
-
|
|
107
|
-
/**
|
|
108
|
-
* Decomposition strategies for tracking which approach was used
|
|
109
|
-
*/
|
|
110
|
-
export const DecompositionStrategySchema = z.enum([
|
|
111
|
-
"file-based",
|
|
112
|
-
"feature-based",
|
|
113
|
-
"risk-based",
|
|
114
|
-
"research-based",
|
|
115
|
-
]);
|
|
116
|
-
export type DecompositionStrategy = z.infer<typeof DecompositionStrategySchema>;
|
|
117
|
-
|
|
118
|
-
/**
|
|
119
|
-
* Failure mode taxonomy (imported from evaluation.ts)
|
|
120
|
-
*/
|
|
121
|
-
export const FailureModeSchema = z.enum([
|
|
122
|
-
"timeout",
|
|
123
|
-
"conflict",
|
|
124
|
-
"validation",
|
|
125
|
-
"tool_failure",
|
|
126
|
-
"context_overflow",
|
|
127
|
-
"dependency_blocked",
|
|
128
|
-
"user_cancelled",
|
|
129
|
-
"unknown",
|
|
130
|
-
]);
|
|
131
|
-
export type FailureMode = z.infer<typeof FailureModeSchema>;
|
|
132
|
-
|
|
133
|
-
/**
|
|
134
|
-
* Outcome signals from a completed subtask
|
|
135
|
-
*
|
|
136
|
-
* These implicit signals help score decomposition quality without
|
|
137
|
-
* explicit feedback from the user.
|
|
138
|
-
*/
|
|
139
|
-
export const OutcomeSignalsSchema = z.object({
|
|
140
|
-
/** Subtask bead ID */
|
|
141
|
-
bead_id: z.string(),
|
|
142
|
-
/** Duration in milliseconds */
|
|
143
|
-
duration_ms: z.number().int().min(0),
|
|
144
|
-
/** Number of errors encountered */
|
|
145
|
-
error_count: z.number().int().min(0),
|
|
146
|
-
/** Number of retry attempts */
|
|
147
|
-
retry_count: z.number().int().min(0),
|
|
148
|
-
/** Whether the subtask ultimately succeeded */
|
|
149
|
-
success: z.boolean(),
|
|
150
|
-
/** Files that were modified */
|
|
151
|
-
files_touched: z.array(z.string()).default([]),
|
|
152
|
-
/** Timestamp when outcome was recorded */
|
|
153
|
-
timestamp: z.string(), // ISO-8601
|
|
154
|
-
/** Decomposition strategy used for this task */
|
|
155
|
-
strategy: DecompositionStrategySchema.optional(),
|
|
156
|
-
/** Failure classification (only when success=false) */
|
|
157
|
-
failure_mode: FailureModeSchema.optional(),
|
|
158
|
-
/** Detailed failure context */
|
|
159
|
-
failure_details: z.string().optional(),
|
|
160
|
-
});
|
|
161
|
-
export type OutcomeSignals = z.infer<typeof OutcomeSignalsSchema>;
|
|
162
|
-
|
|
163
|
-
/**
|
|
164
|
-
* Scored outcome with implicit feedback type
|
|
165
|
-
*/
|
|
166
|
-
export const ScoredOutcomeSchema = z.object({
|
|
167
|
-
/** The outcome signals */
|
|
168
|
-
signals: OutcomeSignalsSchema,
|
|
169
|
-
/** Inferred feedback type */
|
|
170
|
-
type: FeedbackTypeSchema,
|
|
171
|
-
/** Decayed value (0-1) */
|
|
172
|
-
decayed_value: z.number().min(0).max(1),
|
|
173
|
-
/** Explanation of the scoring */
|
|
174
|
-
reasoning: z.string(),
|
|
175
|
-
});
|
|
176
|
-
export type ScoredOutcome = z.infer<typeof ScoredOutcomeSchema>;
|
|
177
|
-
|
|
178
|
-
// ============================================================================
|
|
179
|
-
// Configuration
|
|
180
|
-
// ============================================================================
|
|
181
|
-
|
|
182
|
-
/**
|
|
183
|
-
* Default configuration for learning
|
|
184
|
-
*/
|
|
185
|
-
export interface LearningConfig {
|
|
186
|
-
/** Half-life for confidence decay in days */
|
|
187
|
-
halfLifeDays: number;
|
|
188
|
-
/** Minimum feedback events before adjusting weights */
|
|
189
|
-
minFeedbackForAdjustment: number;
|
|
190
|
-
/** Maximum harmful ratio before deprecating a criterion */
|
|
191
|
-
maxHarmfulRatio: number;
|
|
192
|
-
/** Threshold duration (ms) for "fast" completion */
|
|
193
|
-
fastCompletionThresholdMs: number;
|
|
194
|
-
/** Threshold duration (ms) for "slow" completion */
|
|
195
|
-
slowCompletionThresholdMs: number;
|
|
196
|
-
/** Maximum errors before considering harmful */
|
|
197
|
-
maxErrorsForHelpful: number;
|
|
198
|
-
}
|
|
199
|
-
|
|
200
|
-
export const DEFAULT_LEARNING_CONFIG: LearningConfig = {
|
|
201
|
-
halfLifeDays: 90,
|
|
202
|
-
minFeedbackForAdjustment: 3,
|
|
203
|
-
maxHarmfulRatio: 0.3,
|
|
204
|
-
fastCompletionThresholdMs: 5 * 60 * 1000, // 5 minutes
|
|
205
|
-
slowCompletionThresholdMs: 30 * 60 * 1000, // 30 minutes
|
|
206
|
-
maxErrorsForHelpful: 2,
|
|
207
|
-
};
|
|
208
|
-
|
|
209
|
-
// ============================================================================
|
|
210
|
-
// Core Functions
|
|
211
|
-
// ============================================================================
|
|
212
|
-
|
|
213
|
-
/**
|
|
214
|
-
* Calculate decayed value using half-life formula
|
|
215
|
-
*
|
|
216
|
-
* Value decays by 50% every `halfLifeDays` days.
|
|
217
|
-
* Formula: value * 0.5^(age/halfLife)
|
|
218
|
-
*
|
|
219
|
-
* @param timestamp - When the event occurred (ISO-8601)
|
|
220
|
-
* @param now - Current time
|
|
221
|
-
* @param halfLifeDays - Half-life in days (default: 90)
|
|
222
|
-
* @returns Decayed value between 0 and 1
|
|
223
|
-
*
|
|
224
|
-
* @example
|
|
225
|
-
* // Event from 90 days ago with 90-day half-life
|
|
226
|
-
* calculateDecayedValue("2024-09-08T00:00:00Z", new Date("2024-12-07"), 90)
|
|
227
|
-
* // Returns ~0.5
|
|
228
|
-
*/
|
|
229
|
-
export function calculateDecayedValue(
|
|
230
|
-
timestamp: string,
|
|
231
|
-
now: Date = new Date(),
|
|
232
|
-
halfLifeDays: number = 90,
|
|
233
|
-
): number {
|
|
234
|
-
// Prevent division by zero
|
|
235
|
-
const safeHalfLife = halfLifeDays <= 0 ? 1 : halfLifeDays;
|
|
236
|
-
|
|
237
|
-
const eventTime = new Date(timestamp).getTime();
|
|
238
|
-
const nowTime = now.getTime();
|
|
239
|
-
const ageDays = Math.max(0, (nowTime - eventTime) / (24 * 60 * 60 * 1000));
|
|
240
|
-
|
|
241
|
-
return Math.pow(0.5, ageDays / safeHalfLife);
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
/**
|
|
245
|
-
* Calculate weighted criterion score from feedback events
|
|
246
|
-
*
|
|
247
|
-
* Applies decay to each feedback event and aggregates them.
|
|
248
|
-
* Helpful events increase the score, harmful events decrease it.
|
|
249
|
-
*
|
|
250
|
-
* @param events - Feedback events for this criterion
|
|
251
|
-
* @param config - Learning configuration
|
|
252
|
-
* @returns Weight between 0 and 1
|
|
253
|
-
*/
|
|
254
|
-
export function calculateCriterionWeight(
|
|
255
|
-
events: FeedbackEvent[],
|
|
256
|
-
config: LearningConfig = DEFAULT_LEARNING_CONFIG,
|
|
257
|
-
): CriterionWeight {
|
|
258
|
-
// Return early with default weight if events array is empty
|
|
259
|
-
if (events.length === 0) {
|
|
260
|
-
return {
|
|
261
|
-
criterion: "unknown",
|
|
262
|
-
weight: 1.0,
|
|
263
|
-
helpful_count: 0,
|
|
264
|
-
harmful_count: 0,
|
|
265
|
-
last_validated: undefined,
|
|
266
|
-
half_life_days: config.halfLifeDays,
|
|
267
|
-
};
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
const now = new Date();
|
|
271
|
-
let helpfulSum = 0;
|
|
272
|
-
let harmfulSum = 0;
|
|
273
|
-
let helpfulCount = 0;
|
|
274
|
-
let harmfulCount = 0;
|
|
275
|
-
let lastValidated: string | undefined;
|
|
276
|
-
|
|
277
|
-
for (const event of events) {
|
|
278
|
-
const decayed = calculateDecayedValue(
|
|
279
|
-
event.timestamp,
|
|
280
|
-
now,
|
|
281
|
-
config.halfLifeDays,
|
|
282
|
-
);
|
|
283
|
-
const value = event.raw_value * decayed;
|
|
284
|
-
|
|
285
|
-
if (event.type === "helpful") {
|
|
286
|
-
helpfulSum += value;
|
|
287
|
-
helpfulCount++;
|
|
288
|
-
if (!lastValidated || event.timestamp > lastValidated) {
|
|
289
|
-
lastValidated = event.timestamp;
|
|
290
|
-
}
|
|
291
|
-
} else if (event.type === "harmful") {
|
|
292
|
-
harmfulSum += value;
|
|
293
|
-
harmfulCount++;
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
|
|
297
|
-
// Calculate weight: helpful / (helpful + harmful), with minimum of 0.1
|
|
298
|
-
const total = helpfulSum + harmfulSum;
|
|
299
|
-
const weight = total > 0 ? Math.max(0.1, helpfulSum / total) : 1.0;
|
|
300
|
-
|
|
301
|
-
return {
|
|
302
|
-
criterion: events[0].criterion,
|
|
303
|
-
weight,
|
|
304
|
-
helpful_count: helpfulCount,
|
|
305
|
-
harmful_count: harmfulCount,
|
|
306
|
-
last_validated: lastValidated,
|
|
307
|
-
half_life_days: config.halfLifeDays,
|
|
308
|
-
};
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
/**
|
|
312
|
-
* Score implicit feedback from task outcome signals
|
|
313
|
-
*
|
|
314
|
-
* Infers whether a decomposition/subtask was helpful or harmful based on:
|
|
315
|
-
* - Duration: fast completion = helpful, slow = harmful
|
|
316
|
-
* - Errors: few errors = helpful, many = harmful
|
|
317
|
-
* - Retries: no retries = helpful, many = harmful
|
|
318
|
-
* - Success: success = helpful, failure = harmful
|
|
319
|
-
*
|
|
320
|
-
* @param signals - Outcome signals from completed subtask
|
|
321
|
-
* @param config - Learning configuration
|
|
322
|
-
* @returns Scored outcome with feedback type and reasoning
|
|
323
|
-
*/
|
|
324
|
-
export function scoreImplicitFeedback(
|
|
325
|
-
signals: OutcomeSignals,
|
|
326
|
-
config: LearningConfig = DEFAULT_LEARNING_CONFIG,
|
|
327
|
-
): ScoredOutcome {
|
|
328
|
-
const now = new Date();
|
|
329
|
-
const decayed = calculateDecayedValue(
|
|
330
|
-
signals.timestamp,
|
|
331
|
-
now,
|
|
332
|
-
config.halfLifeDays,
|
|
333
|
-
);
|
|
334
|
-
|
|
335
|
-
// Score components (each 0-1, higher = better)
|
|
336
|
-
const durationScore =
|
|
337
|
-
signals.duration_ms < config.fastCompletionThresholdMs
|
|
338
|
-
? 1.0
|
|
339
|
-
: signals.duration_ms > config.slowCompletionThresholdMs
|
|
340
|
-
? 0.2
|
|
341
|
-
: 0.6;
|
|
342
|
-
|
|
343
|
-
const errorScore =
|
|
344
|
-
signals.error_count === 0
|
|
345
|
-
? 1.0
|
|
346
|
-
: signals.error_count <= config.maxErrorsForHelpful
|
|
347
|
-
? 0.6
|
|
348
|
-
: 0.2;
|
|
349
|
-
|
|
350
|
-
const retryScore =
|
|
351
|
-
signals.retry_count === 0 ? 1.0 : signals.retry_count === 1 ? 0.7 : 0.3;
|
|
352
|
-
|
|
353
|
-
const successScore = signals.success ? 1.0 : 0.0;
|
|
354
|
-
|
|
355
|
-
// Weighted average (success matters most)
|
|
356
|
-
const rawScore =
|
|
357
|
-
successScore * 0.4 +
|
|
358
|
-
durationScore * 0.2 +
|
|
359
|
-
errorScore * 0.2 +
|
|
360
|
-
retryScore * 0.2;
|
|
361
|
-
|
|
362
|
-
// Determine feedback type
|
|
363
|
-
let type: FeedbackType;
|
|
364
|
-
let reasoning: string;
|
|
365
|
-
|
|
366
|
-
if (rawScore >= 0.7) {
|
|
367
|
-
type = "helpful";
|
|
368
|
-
reasoning =
|
|
369
|
-
`Fast completion (${Math.round(signals.duration_ms / 1000)}s), ` +
|
|
370
|
-
`${signals.error_count} errors, ${signals.retry_count} retries, ` +
|
|
371
|
-
`${signals.success ? "succeeded" : "failed"}`;
|
|
372
|
-
} else if (rawScore <= 0.4) {
|
|
373
|
-
type = "harmful";
|
|
374
|
-
reasoning =
|
|
375
|
-
`Slow completion (${Math.round(signals.duration_ms / 1000)}s), ` +
|
|
376
|
-
`${signals.error_count} errors, ${signals.retry_count} retries, ` +
|
|
377
|
-
`${signals.success ? "succeeded" : "failed"}`;
|
|
378
|
-
} else {
|
|
379
|
-
type = "neutral";
|
|
380
|
-
reasoning =
|
|
381
|
-
`Mixed signals: ${Math.round(signals.duration_ms / 1000)}s, ` +
|
|
382
|
-
`${signals.error_count} errors, ${signals.retry_count} retries`;
|
|
383
|
-
}
|
|
384
|
-
|
|
385
|
-
return {
|
|
386
|
-
signals,
|
|
387
|
-
type,
|
|
388
|
-
decayed_value: rawScore * decayed,
|
|
389
|
-
reasoning,
|
|
390
|
-
};
|
|
391
|
-
}
|
|
392
|
-
|
|
393
|
-
/**
|
|
394
|
-
* Create a feedback event from a scored outcome
|
|
395
|
-
*
|
|
396
|
-
* Converts implicit outcome scoring into an explicit feedback event
|
|
397
|
-
* that can be stored and used for criterion weight calculation.
|
|
398
|
-
*
|
|
399
|
-
* @param outcome - Scored outcome
|
|
400
|
-
* @param criterion - Which criterion this feedback applies to
|
|
401
|
-
* @returns Feedback event
|
|
402
|
-
*/
|
|
403
|
-
export function outcomeToFeedback(
|
|
404
|
-
outcome: ScoredOutcome,
|
|
405
|
-
criterion: string,
|
|
406
|
-
): FeedbackEvent {
|
|
407
|
-
return {
|
|
408
|
-
id: `${outcome.signals.bead_id}-${criterion}-${Date.now()}`,
|
|
409
|
-
criterion,
|
|
410
|
-
type: outcome.type,
|
|
411
|
-
timestamp: outcome.signals.timestamp,
|
|
412
|
-
context: outcome.reasoning,
|
|
413
|
-
bead_id: outcome.signals.bead_id,
|
|
414
|
-
raw_value: outcome.decayed_value,
|
|
415
|
-
};
|
|
416
|
-
}
|
|
417
|
-
|
|
418
|
-
/**
|
|
419
|
-
* Apply criterion weights to evaluation scores
|
|
420
|
-
*
|
|
421
|
-
* Adjusts raw evaluation scores by their learned weights.
|
|
422
|
-
* Criteria with low confidence (due to past failures) have reduced impact.
|
|
423
|
-
*
|
|
424
|
-
* @param criteria - Map of criterion name to raw score (0-1)
|
|
425
|
-
* @param weights - Map of criterion name to weight
|
|
426
|
-
* @returns Weighted scores
|
|
427
|
-
*/
|
|
428
|
-
export function applyWeights(
|
|
429
|
-
criteria: Record<string, number>,
|
|
430
|
-
weights: Record<string, CriterionWeight>,
|
|
431
|
-
): Record<string, { raw: number; weighted: number; weight: number }> {
|
|
432
|
-
const result: Record<
|
|
433
|
-
string,
|
|
434
|
-
{ raw: number; weighted: number; weight: number }
|
|
435
|
-
> = {};
|
|
436
|
-
|
|
437
|
-
for (const [name, rawScore] of Object.entries(criteria)) {
|
|
438
|
-
const weight = weights[name]?.weight ?? 1.0;
|
|
439
|
-
result[name] = {
|
|
440
|
-
raw: rawScore,
|
|
441
|
-
weighted: rawScore * weight,
|
|
442
|
-
weight,
|
|
443
|
-
};
|
|
444
|
-
}
|
|
445
|
-
|
|
446
|
-
return result;
|
|
447
|
-
}
|
|
448
|
-
|
|
449
|
-
/**
|
|
450
|
-
* Check if a criterion should be deprecated based on feedback
|
|
451
|
-
*
|
|
452
|
-
* A criterion is deprecated if it has enough feedback and the
|
|
453
|
-
* harmful ratio exceeds the threshold.
|
|
454
|
-
*
|
|
455
|
-
* @param weight - Criterion weight with feedback counts
|
|
456
|
-
* @param config - Learning configuration
|
|
457
|
-
* @returns Whether the criterion should be deprecated
|
|
458
|
-
*/
|
|
459
|
-
export function shouldDeprecateCriterion(
|
|
460
|
-
weight: CriterionWeight,
|
|
461
|
-
config: LearningConfig = DEFAULT_LEARNING_CONFIG,
|
|
462
|
-
): boolean {
|
|
463
|
-
const total = weight.helpful_count + weight.harmful_count;
|
|
464
|
-
if (total < config.minFeedbackForAdjustment) {
|
|
465
|
-
return false;
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
const harmfulRatio = weight.harmful_count / total;
|
|
469
|
-
return harmfulRatio > config.maxHarmfulRatio;
|
|
470
|
-
}
|
|
471
|
-
|
|
472
|
-
// ============================================================================
|
|
473
|
-
// Storage Helpers
|
|
474
|
-
// ============================================================================
|
|
475
|
-
|
|
476
|
-
/**
|
|
477
|
-
* Storage interface for feedback events
|
|
478
|
-
*
|
|
479
|
-
* Implementations can use file system, SQLite, or other backends.
|
|
480
|
-
*/
|
|
481
|
-
export interface FeedbackStorage {
|
|
482
|
-
/** Store a feedback event */
|
|
483
|
-
store(event: FeedbackEvent): Promise<void>;
|
|
484
|
-
/** Get all feedback events for a criterion */
|
|
485
|
-
getByCriterion(criterion: string): Promise<FeedbackEvent[]>;
|
|
486
|
-
/** Get all feedback events for a bead */
|
|
487
|
-
getByBead(beadId: string): Promise<FeedbackEvent[]>;
|
|
488
|
-
/** Get all feedback events */
|
|
489
|
-
getAll(): Promise<FeedbackEvent[]>;
|
|
490
|
-
}
|
|
491
|
-
|
|
492
|
-
/**
|
|
493
|
-
* In-memory feedback storage (for testing and short-lived sessions)
|
|
494
|
-
*
|
|
495
|
-
* Uses LRU eviction to prevent unbounded memory growth.
|
|
496
|
-
*/
|
|
497
|
-
export class InMemoryFeedbackStorage implements FeedbackStorage {
|
|
498
|
-
private events: FeedbackEvent[] = [];
|
|
499
|
-
private readonly maxSize: number;
|
|
500
|
-
|
|
501
|
-
constructor(maxSize: number = 10000) {
|
|
502
|
-
this.maxSize = maxSize;
|
|
503
|
-
}
|
|
504
|
-
|
|
505
|
-
async store(event: FeedbackEvent): Promise<void> {
|
|
506
|
-
this.events.push(event);
|
|
507
|
-
|
|
508
|
-
// Evict oldest events if we exceed max size (LRU)
|
|
509
|
-
if (this.events.length > this.maxSize) {
|
|
510
|
-
this.events = this.events.slice(this.events.length - this.maxSize);
|
|
511
|
-
}
|
|
512
|
-
}
|
|
513
|
-
|
|
514
|
-
async getByCriterion(criterion: string): Promise<FeedbackEvent[]> {
|
|
515
|
-
return this.events.filter((e) => e.criterion === criterion);
|
|
516
|
-
}
|
|
517
|
-
|
|
518
|
-
async getByBead(beadId: string): Promise<FeedbackEvent[]> {
|
|
519
|
-
return this.events.filter((e) => e.bead_id === beadId);
|
|
520
|
-
}
|
|
521
|
-
|
|
522
|
-
async getAll(): Promise<FeedbackEvent[]> {
|
|
523
|
-
return [...this.events];
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
|
|
527
|
-
// ============================================================================
|
|
528
|
-
// 3-Strike Detection
|
|
529
|
-
// ============================================================================
|
|
530
|
-
|
|
531
|
-
/**
|
|
532
|
-
* Strike record for a bead
|
|
533
|
-
*
|
|
534
|
-
* Tracks consecutive fix failures to detect architectural problems.
|
|
535
|
-
* After 3 strikes, the system should STOP and question the architecture
|
|
536
|
-
* rather than attempting Fix #4.
|
|
537
|
-
*/
|
|
538
|
-
export const StrikeRecordSchema = z.object({
|
|
539
|
-
/** The bead ID */
|
|
540
|
-
bead_id: z.string(),
|
|
541
|
-
/** Number of consecutive failures */
|
|
542
|
-
strike_count: z.number().int().min(0).max(3),
|
|
543
|
-
/** Failure descriptions for each strike */
|
|
544
|
-
failures: z.array(
|
|
545
|
-
z.object({
|
|
546
|
-
/** What fix was attempted */
|
|
547
|
-
attempt: z.string(),
|
|
548
|
-
/** Why it failed */
|
|
549
|
-
reason: z.string(),
|
|
550
|
-
/** When it failed */
|
|
551
|
-
timestamp: z.string(), // ISO-8601
|
|
552
|
-
}),
|
|
553
|
-
),
|
|
554
|
-
/** When strikes were recorded */
|
|
555
|
-
first_strike_at: z.string().optional(), // ISO-8601
|
|
556
|
-
last_strike_at: z.string().optional(), // ISO-8601
|
|
557
|
-
});
|
|
558
|
-
export type StrikeRecord = z.infer<typeof StrikeRecordSchema>;
|
|
559
|
-
|
|
560
|
-
/**
|
|
561
|
-
* Storage interface for strike records
|
|
562
|
-
*/
|
|
563
|
-
export interface StrikeStorage {
|
|
564
|
-
/** Store a strike record */
|
|
565
|
-
store(record: StrikeRecord): Promise<void>;
|
|
566
|
-
/** Get strike record for a bead */
|
|
567
|
-
get(beadId: string): Promise<StrikeRecord | null>;
|
|
568
|
-
/** Get all strike records */
|
|
569
|
-
getAll(): Promise<StrikeRecord[]>;
|
|
570
|
-
/** Clear strikes for a bead */
|
|
571
|
-
clear(beadId: string): Promise<void>;
|
|
572
|
-
}
|
|
573
|
-
|
|
574
|
-
/**
|
|
575
|
-
* In-memory strike storage
|
|
576
|
-
*/
|
|
577
|
-
export class InMemoryStrikeStorage implements StrikeStorage {
|
|
578
|
-
private strikes: Map<string, StrikeRecord> = new Map();
|
|
579
|
-
|
|
580
|
-
async store(record: StrikeRecord): Promise<void> {
|
|
581
|
-
this.strikes.set(record.bead_id, record);
|
|
582
|
-
}
|
|
583
|
-
|
|
584
|
-
async get(beadId: string): Promise<StrikeRecord | null> {
|
|
585
|
-
return this.strikes.get(beadId) ?? null;
|
|
586
|
-
}
|
|
587
|
-
|
|
588
|
-
async getAll(): Promise<StrikeRecord[]> {
|
|
589
|
-
return Array.from(this.strikes.values());
|
|
590
|
-
}
|
|
591
|
-
|
|
592
|
-
async clear(beadId: string): Promise<void> {
|
|
593
|
-
this.strikes.delete(beadId);
|
|
594
|
-
}
|
|
595
|
-
}
|
|
596
|
-
|
|
597
|
-
/**
|
|
598
|
-
* Add a strike to a bead's record
|
|
599
|
-
*
|
|
600
|
-
* Records a failure attempt and increments the strike count.
|
|
601
|
-
*
|
|
602
|
-
* @param beadId - Cell ID
|
|
603
|
-
* @param attempt - Description of what was attempted
|
|
604
|
-
* @param reason - Why it failed
|
|
605
|
-
* @param storage - Strike storage (defaults to in-memory)
|
|
606
|
-
* @returns Updated strike record
|
|
607
|
-
*/
|
|
608
|
-
export async function addStrike(
|
|
609
|
-
beadId: string,
|
|
610
|
-
attempt: string,
|
|
611
|
-
reason: string,
|
|
612
|
-
storage: StrikeStorage = new InMemoryStrikeStorage(),
|
|
613
|
-
): Promise<StrikeRecord> {
|
|
614
|
-
const existing = await storage.get(beadId);
|
|
615
|
-
const now = new Date().toISOString();
|
|
616
|
-
|
|
617
|
-
const record: StrikeRecord = existing ?? {
|
|
618
|
-
bead_id: beadId,
|
|
619
|
-
strike_count: 0,
|
|
620
|
-
failures: [],
|
|
621
|
-
};
|
|
622
|
-
|
|
623
|
-
record.strike_count = Math.min(3, record.strike_count + 1);
|
|
624
|
-
record.failures.push({ attempt, reason, timestamp: now });
|
|
625
|
-
record.last_strike_at = now;
|
|
626
|
-
|
|
627
|
-
if (!record.first_strike_at) {
|
|
628
|
-
record.first_strike_at = now;
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
await storage.store(record);
|
|
632
|
-
return record;
|
|
633
|
-
}
|
|
634
|
-
|
|
635
|
-
/**
|
|
636
|
-
* Get strike count for a bead
|
|
637
|
-
*
|
|
638
|
-
* @param beadId - Cell ID
|
|
639
|
-
* @param storage - Strike storage
|
|
640
|
-
* @returns Strike count (0-3)
|
|
641
|
-
*/
|
|
642
|
-
export async function getStrikes(
|
|
643
|
-
beadId: string,
|
|
644
|
-
storage: StrikeStorage = new InMemoryStrikeStorage(),
|
|
645
|
-
): Promise<number> {
|
|
646
|
-
const record = await storage.get(beadId);
|
|
647
|
-
return record?.strike_count ?? 0;
|
|
648
|
-
}
|
|
649
|
-
|
|
650
|
-
/**
|
|
651
|
-
* Check if a bead has struck out (3 strikes)
|
|
652
|
-
*
|
|
653
|
-
* @param beadId - Cell ID
|
|
654
|
-
* @param storage - Strike storage
|
|
655
|
-
* @returns True if bead has 3 strikes
|
|
656
|
-
*/
|
|
657
|
-
export async function isStrikedOut(
|
|
658
|
-
beadId: string,
|
|
659
|
-
storage: StrikeStorage = new InMemoryStrikeStorage(),
|
|
660
|
-
): Promise<boolean> {
|
|
661
|
-
const count = await getStrikes(beadId, storage);
|
|
662
|
-
return count >= 3;
|
|
663
|
-
}
|
|
664
|
-
|
|
665
|
-
/**
|
|
666
|
-
* Generate architecture review prompt for a struck-out bead
|
|
667
|
-
*
|
|
668
|
-
* When a bead hits 3 strikes, this generates a prompt that forces
|
|
669
|
-
* the human to question the architecture instead of attempting Fix #4.
|
|
670
|
-
*
|
|
671
|
-
* @param beadId - Cell ID
|
|
672
|
-
* @param storage - Strike storage
|
|
673
|
-
* @returns Architecture review prompt
|
|
674
|
-
*/
|
|
675
|
-
export async function getArchitecturePrompt(
|
|
676
|
-
beadId: string,
|
|
677
|
-
storage: StrikeStorage = new InMemoryStrikeStorage(),
|
|
678
|
-
): Promise<string> {
|
|
679
|
-
const record = await storage.get(beadId);
|
|
680
|
-
|
|
681
|
-
if (!record || record.strike_count < 3) {
|
|
682
|
-
return "";
|
|
683
|
-
}
|
|
684
|
-
|
|
685
|
-
const failuresList = record.failures
|
|
686
|
-
.map((f, i) => `${i + 1}. **${f.attempt}** - Failed: ${f.reason}`)
|
|
687
|
-
.join("\n");
|
|
688
|
-
|
|
689
|
-
return `## Architecture Review Required
|
|
690
|
-
|
|
691
|
-
This bead (\`${beadId}\`) has failed 3 consecutive fix attempts:
|
|
692
|
-
|
|
693
|
-
${failuresList}
|
|
694
|
-
|
|
695
|
-
This pattern suggests an **architectural problem**, not a bug.
|
|
696
|
-
|
|
697
|
-
**Questions to consider:**
|
|
698
|
-
- Is the current approach fundamentally sound?
|
|
699
|
-
- Should we refactor the architecture instead?
|
|
700
|
-
- Are we fixing symptoms instead of root cause?
|
|
701
|
-
|
|
702
|
-
**Options:**
|
|
703
|
-
1. **Refactor architecture** (describe new approach)
|
|
704
|
-
2. **Continue with Fix #4** (explain why this time is different)
|
|
705
|
-
3. **Abandon this approach entirely**
|
|
706
|
-
|
|
707
|
-
**DO NOT attempt Fix #4 without answering these questions.**
|
|
708
|
-
`;
|
|
709
|
-
}
|
|
710
|
-
|
|
711
|
-
/**
|
|
712
|
-
* Clear strikes for a bead (e.g., after successful fix)
|
|
713
|
-
*
|
|
714
|
-
* @param beadId - Cell ID
|
|
715
|
-
* @param storage - Strike storage
|
|
716
|
-
*/
|
|
717
|
-
export async function clearStrikes(
|
|
718
|
-
beadId: string,
|
|
719
|
-
storage: StrikeStorage = new InMemoryStrikeStorage(),
|
|
720
|
-
): Promise<void> {
|
|
721
|
-
await storage.clear(beadId);
|
|
722
|
-
}
|
|
723
|
-
|
|
724
|
-
// ============================================================================
|
|
725
|
-
// Error Accumulator
|
|
726
|
-
// ============================================================================
|
|
727
|
-
|
|
728
|
-
/**
|
|
729
|
-
* Storage interface for error entries
|
|
730
|
-
*
|
|
731
|
-
* Similar to FeedbackStorage but for tracking errors during execution.
|
|
732
|
-
*/
|
|
733
|
-
export interface ErrorStorage {
|
|
734
|
-
/** Store an error entry */
|
|
735
|
-
store(entry: ErrorEntry): Promise<void>;
|
|
736
|
-
/** Get all errors for a bead */
|
|
737
|
-
getByBead(beadId: string): Promise<ErrorEntry[]>;
|
|
738
|
-
/** Get unresolved errors for a bead */
|
|
739
|
-
getUnresolvedByBead(beadId: string): Promise<ErrorEntry[]>;
|
|
740
|
-
/** Mark an error as resolved */
|
|
741
|
-
markResolved(id: string): Promise<void>;
|
|
742
|
-
/** Get all errors */
|
|
743
|
-
getAll(): Promise<ErrorEntry[]>;
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
/**
|
|
747
|
-
* In-memory error storage
|
|
748
|
-
*
|
|
749
|
-
* Accumulates errors during subtask execution for feeding into retry prompts.
|
|
750
|
-
*/
|
|
751
|
-
export class InMemoryErrorStorage implements ErrorStorage {
|
|
752
|
-
private errors: ErrorEntry[] = [];
|
|
753
|
-
|
|
754
|
-
async store(entry: ErrorEntry): Promise<void> {
|
|
755
|
-
this.errors.push(entry);
|
|
756
|
-
}
|
|
757
|
-
|
|
758
|
-
async getByBead(beadId: string): Promise<ErrorEntry[]> {
|
|
759
|
-
return this.errors.filter((e) => e.bead_id === beadId);
|
|
760
|
-
}
|
|
761
|
-
|
|
762
|
-
async getUnresolvedByBead(beadId: string): Promise<ErrorEntry[]> {
|
|
763
|
-
return this.errors.filter((e) => e.bead_id === beadId && !e.resolved);
|
|
764
|
-
}
|
|
765
|
-
|
|
766
|
-
async markResolved(id: string): Promise<void> {
|
|
767
|
-
const error = this.errors.find((e) => e.id === id);
|
|
768
|
-
if (error) {
|
|
769
|
-
error.resolved = true;
|
|
770
|
-
}
|
|
771
|
-
}
|
|
772
|
-
|
|
773
|
-
async getAll(): Promise<ErrorEntry[]> {
|
|
774
|
-
return [...this.errors];
|
|
775
|
-
}
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
/**
|
|
779
|
-
* Error accumulator for tracking errors during subtask execution
|
|
780
|
-
*
|
|
781
|
-
* Implements patterns from "Patterns for Building AI Agents" p.40:
|
|
782
|
-
* - Examines and corrects errors when something goes wrong
|
|
783
|
-
* - Feeds error context into retry prompts
|
|
784
|
-
* - Tracks error patterns for learning
|
|
785
|
-
*/
|
|
786
|
-
export class ErrorAccumulator {
|
|
787
|
-
private storage: ErrorStorage;
|
|
788
|
-
|
|
789
|
-
constructor(storage?: ErrorStorage) {
|
|
790
|
-
this.storage = storage ?? new InMemoryErrorStorage();
|
|
791
|
-
}
|
|
792
|
-
|
|
793
|
-
/**
|
|
794
|
-
* Record an error during subtask execution
|
|
795
|
-
*
|
|
796
|
-
* @param beadId - Cell ID where error occurred
|
|
797
|
-
* @param errorType - Category of error
|
|
798
|
-
* @param message - Human-readable error message
|
|
799
|
-
* @param options - Additional context (stack trace, tool name, etc.)
|
|
800
|
-
* @returns The created error entry
|
|
801
|
-
*/
|
|
802
|
-
async recordError(
|
|
803
|
-
beadId: string,
|
|
804
|
-
errorType: ErrorType,
|
|
805
|
-
message: string,
|
|
806
|
-
options?: {
|
|
807
|
-
stack_trace?: string;
|
|
808
|
-
tool_name?: string;
|
|
809
|
-
context?: string;
|
|
810
|
-
},
|
|
811
|
-
): Promise<ErrorEntry> {
|
|
812
|
-
const entry: ErrorEntry = {
|
|
813
|
-
id: `${beadId}-${errorType}-${Date.now()}`,
|
|
814
|
-
bead_id: beadId,
|
|
815
|
-
error_type: errorType,
|
|
816
|
-
message,
|
|
817
|
-
stack_trace: options?.stack_trace,
|
|
818
|
-
tool_name: options?.tool_name,
|
|
819
|
-
timestamp: new Date().toISOString(),
|
|
820
|
-
resolved: false,
|
|
821
|
-
context: options?.context,
|
|
822
|
-
};
|
|
823
|
-
|
|
824
|
-
const validated = ErrorEntrySchema.parse(entry);
|
|
825
|
-
await this.storage.store(validated);
|
|
826
|
-
|
|
827
|
-
return validated;
|
|
828
|
-
}
|
|
829
|
-
|
|
830
|
-
/**
|
|
831
|
-
* Get all errors for a bead (resolved and unresolved)
|
|
832
|
-
*/
|
|
833
|
-
async getErrors(beadId: string): Promise<ErrorEntry[]> {
|
|
834
|
-
return this.storage.getByBead(beadId);
|
|
835
|
-
}
|
|
836
|
-
|
|
837
|
-
/**
|
|
838
|
-
* Get only unresolved errors for a bead
|
|
839
|
-
*/
|
|
840
|
-
async getUnresolvedErrors(beadId: string): Promise<ErrorEntry[]> {
|
|
841
|
-
return this.storage.getUnresolvedByBead(beadId);
|
|
842
|
-
}
|
|
843
|
-
|
|
844
|
-
/**
|
|
845
|
-
* Mark an error as resolved
|
|
846
|
-
*/
|
|
847
|
-
async resolveError(errorId: string): Promise<void> {
|
|
848
|
-
await this.storage.markResolved(errorId);
|
|
849
|
-
}
|
|
850
|
-
|
|
851
|
-
/**
|
|
852
|
-
* Format errors as context for retry prompts
|
|
853
|
-
*
|
|
854
|
-
* Groups errors by type and provides structured feedback
|
|
855
|
-
* for the agent to learn from.
|
|
856
|
-
*
|
|
857
|
-
* @param beadId - Bead to get error context for
|
|
858
|
-
* @param includeResolved - Include resolved errors (default: false)
|
|
859
|
-
* @returns Formatted error context string
|
|
860
|
-
*/
|
|
861
|
-
async getErrorContext(
|
|
862
|
-
beadId: string,
|
|
863
|
-
includeResolved = false,
|
|
864
|
-
): Promise<string> {
|
|
865
|
-
const errors = includeResolved
|
|
866
|
-
? await this.getErrors(beadId)
|
|
867
|
-
: await this.getUnresolvedErrors(beadId);
|
|
868
|
-
|
|
869
|
-
if (errors.length === 0) {
|
|
870
|
-
return "";
|
|
871
|
-
}
|
|
872
|
-
|
|
873
|
-
// Group errors by type
|
|
874
|
-
const byType = errors.reduce(
|
|
875
|
-
(acc, err) => {
|
|
876
|
-
const type = err.error_type;
|
|
877
|
-
if (!acc[type]) {
|
|
878
|
-
acc[type] = [];
|
|
879
|
-
}
|
|
880
|
-
acc[type].push(err);
|
|
881
|
-
return acc;
|
|
882
|
-
},
|
|
883
|
-
{} as Record<ErrorType, ErrorEntry[]>,
|
|
884
|
-
);
|
|
885
|
-
|
|
886
|
-
// Format as structured feedback
|
|
887
|
-
const lines = [
|
|
888
|
-
"## Previous Errors",
|
|
889
|
-
"",
|
|
890
|
-
"The following errors were encountered during execution:",
|
|
891
|
-
"",
|
|
892
|
-
];
|
|
893
|
-
|
|
894
|
-
for (const [type, typeErrors] of Object.entries(byType)) {
|
|
895
|
-
lines.push(
|
|
896
|
-
`### ${type} (${typeErrors.length} error${typeErrors.length > 1 ? "s" : ""})`,
|
|
897
|
-
);
|
|
898
|
-
lines.push("");
|
|
899
|
-
|
|
900
|
-
for (const err of typeErrors) {
|
|
901
|
-
lines.push(`- **${err.message}**`);
|
|
902
|
-
if (err.context) {
|
|
903
|
-
lines.push(` - Context: ${err.context}`);
|
|
904
|
-
}
|
|
905
|
-
if (err.tool_name) {
|
|
906
|
-
lines.push(` - Tool: ${err.tool_name}`);
|
|
907
|
-
}
|
|
908
|
-
if (err.stack_trace) {
|
|
909
|
-
lines.push(` - Stack: \`${err.stack_trace.slice(0, 100)}...\``);
|
|
910
|
-
}
|
|
911
|
-
lines.push(
|
|
912
|
-
` - Time: ${new Date(err.timestamp).toLocaleString()}${err.resolved ? " (resolved)" : ""}`,
|
|
913
|
-
);
|
|
914
|
-
lines.push("");
|
|
915
|
-
}
|
|
916
|
-
}
|
|
917
|
-
|
|
918
|
-
lines.push(
|
|
919
|
-
"**Action Required**: Address these errors before proceeding. Consider:",
|
|
920
|
-
);
|
|
921
|
-
lines.push("- What caused each error?");
|
|
922
|
-
lines.push("- How can you prevent similar errors?");
|
|
923
|
-
lines.push("- Are there patterns across error types?");
|
|
924
|
-
lines.push("");
|
|
925
|
-
|
|
926
|
-
return lines.join("\n");
|
|
927
|
-
}
|
|
928
|
-
|
|
929
|
-
/**
|
|
930
|
-
* Get error statistics for outcome tracking
|
|
931
|
-
*
|
|
932
|
-
* @param beadId - Bead to get stats for
|
|
933
|
-
* @returns Error counts and patterns
|
|
934
|
-
*/
|
|
935
|
-
async getErrorStats(beadId: string): Promise<{
|
|
936
|
-
total: number;
|
|
937
|
-
unresolved: number;
|
|
938
|
-
by_type: Record<ErrorType, number>;
|
|
939
|
-
}> {
|
|
940
|
-
const allErrors = await this.getErrors(beadId);
|
|
941
|
-
const unresolved = await this.getUnresolvedErrors(beadId);
|
|
942
|
-
|
|
943
|
-
const byType = allErrors.reduce(
|
|
944
|
-
(acc, err) => {
|
|
945
|
-
acc[err.error_type] = (acc[err.error_type] || 0) + 1;
|
|
946
|
-
return acc;
|
|
947
|
-
},
|
|
948
|
-
{} as Record<ErrorType, number>,
|
|
949
|
-
);
|
|
950
|
-
|
|
951
|
-
return {
|
|
952
|
-
total: allErrors.length,
|
|
953
|
-
unresolved: unresolved.length,
|
|
954
|
-
by_type: byType,
|
|
955
|
-
};
|
|
956
|
-
}
|
|
957
|
-
}
|
|
958
|
-
|
|
959
|
-
// ============================================================================
|
|
960
|
-
// Semantic Memory Integration Helpers
|
|
961
|
-
// ============================================================================
|
|
962
|
-
|
|
963
|
-
/**
|
|
964
|
-
* Format memory store instruction for successful task completion
|
|
965
|
-
*
|
|
966
|
-
* @param beadId - Cell ID that completed
|
|
967
|
-
* @param summary - Completion summary
|
|
968
|
-
* @param filesTouched - Files modified
|
|
969
|
-
* @param strategy - Decomposition strategy used (if applicable)
|
|
970
|
-
* @returns Memory store instruction object
|
|
971
|
-
*/
|
|
972
|
-
export function formatMemoryStoreOnSuccess(
|
|
973
|
-
beadId: string,
|
|
974
|
-
summary: string,
|
|
975
|
-
filesTouched: string[],
|
|
976
|
-
strategy?: DecompositionStrategy,
|
|
977
|
-
): {
|
|
978
|
-
information: string;
|
|
979
|
-
metadata: string;
|
|
980
|
-
instruction: string;
|
|
981
|
-
} {
|
|
982
|
-
const strategyInfo = strategy ? ` using ${strategy} strategy` : "";
|
|
983
|
-
|
|
984
|
-
return {
|
|
985
|
-
information: `Task "${beadId}" completed successfully${strategyInfo}.
|
|
986
|
-
Key insight: ${summary}
|
|
987
|
-
Files touched: ${filesTouched.join(", ") || "none"}`,
|
|
988
|
-
metadata: `swarm, success, ${beadId}, ${strategy || "completion"}`,
|
|
989
|
-
instruction:
|
|
990
|
-
"Store this successful completion in semantic-memory for future reference",
|
|
991
|
-
};
|
|
992
|
-
}
|
|
993
|
-
|
|
994
|
-
/**
|
|
995
|
-
* Format memory store instruction for architectural problems (3-strike)
|
|
996
|
-
*
|
|
997
|
-
* @param beadId - Cell ID that struck out
|
|
998
|
-
* @param failures - Array of failure attempts
|
|
999
|
-
* @returns Memory store instruction object
|
|
1000
|
-
*/
|
|
1001
|
-
export function formatMemoryStoreOn3Strike(
|
|
1002
|
-
beadId: string,
|
|
1003
|
-
failures: Array<{ attempt: string; reason: string }>,
|
|
1004
|
-
): {
|
|
1005
|
-
information: string;
|
|
1006
|
-
metadata: string;
|
|
1007
|
-
instruction: string;
|
|
1008
|
-
} {
|
|
1009
|
-
const failuresList = failures
|
|
1010
|
-
.map((f, i) => `${i + 1}. ${f.attempt} - Failed: ${f.reason}`)
|
|
1011
|
-
.join("\n");
|
|
1012
|
-
|
|
1013
|
-
return {
|
|
1014
|
-
information: `Architecture problem detected in ${beadId}: Task failed after 3 attempts.
|
|
1015
|
-
Attempts:
|
|
1016
|
-
${failuresList}
|
|
1017
|
-
|
|
1018
|
-
This indicates a structural issue requiring human decision, not another fix attempt.`,
|
|
1019
|
-
metadata: `architecture, 3-strike, ${beadId}, failure`,
|
|
1020
|
-
instruction:
|
|
1021
|
-
"Store this architectural problem in semantic-memory to avoid similar patterns in future",
|
|
1022
|
-
};
|
|
1023
|
-
}
|
|
1024
|
-
|
|
1025
|
-
/**
|
|
1026
|
-
* Format memory query instruction for task decomposition
|
|
1027
|
-
*
|
|
1028
|
-
* @param task - Task description
|
|
1029
|
-
* @param limit - Max results to return
|
|
1030
|
-
* @returns Memory query instruction object
|
|
1031
|
-
*/
|
|
1032
|
-
export function formatMemoryQueryForDecomposition(
|
|
1033
|
-
task: string,
|
|
1034
|
-
limit: number = 3,
|
|
1035
|
-
): {
|
|
1036
|
-
query: string;
|
|
1037
|
-
limit: number;
|
|
1038
|
-
instruction: string;
|
|
1039
|
-
} {
|
|
1040
|
-
return {
|
|
1041
|
-
query: task,
|
|
1042
|
-
limit,
|
|
1043
|
-
instruction:
|
|
1044
|
-
"Query semantic-memory for relevant past learnings about similar tasks before decomposition",
|
|
1045
|
-
};
|
|
1046
|
-
}
|
|
1047
|
-
|
|
1048
|
-
/**
|
|
1049
|
-
* Format memory validation hint when CASS history helped
|
|
1050
|
-
*
|
|
1051
|
-
* @param beadId - Cell ID that benefited from CASS
|
|
1052
|
-
* @returns Memory validation hint
|
|
1053
|
-
*/
|
|
1054
|
-
export function formatMemoryValidationHint(beadId: string): {
|
|
1055
|
-
instruction: string;
|
|
1056
|
-
context: string;
|
|
1057
|
-
} {
|
|
1058
|
-
return {
|
|
1059
|
-
instruction:
|
|
1060
|
-
"If any semantic-memory entries helped with this task, validate them to reset decay timer",
|
|
1061
|
-
context: `Task ${beadId} completed successfully with assistance from past learnings`,
|
|
1062
|
-
};
|
|
1063
|
-
}
|
|
1064
|
-
|
|
1065
|
-
// ============================================================================
|
|
1066
|
-
// Exports
|
|
1067
|
-
// ============================================================================
|
|
1068
|
-
|
|
1069
|
-
export const learningSchemas = {
|
|
1070
|
-
FeedbackTypeSchema,
|
|
1071
|
-
FeedbackEventSchema,
|
|
1072
|
-
CriterionWeightSchema,
|
|
1073
|
-
OutcomeSignalsSchema,
|
|
1074
|
-
ScoredOutcomeSchema,
|
|
1075
|
-
DecompositionStrategySchema,
|
|
1076
|
-
ErrorTypeSchema,
|
|
1077
|
-
ErrorEntrySchema,
|
|
1078
|
-
StrikeRecordSchema,
|
|
1079
|
-
};
|