opencode-swarm-plugin 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,438 @@
1
+ /**
2
+ * Learning Module - Confidence decay, feedback scoring, and outcome tracking
3
+ *
4
+ * Implements patterns from cass-memory for learning from swarm outcomes:
5
+ * - Confidence decay: evaluation criteria weights fade unless revalidated
6
+ * - Feedback events: track helpful/harmful signals from task outcomes
7
+ * - Outcome scoring: implicit feedback from duration, errors, retries
8
+ *
9
+ * @see https://github.com/Dicklesworthstone/cass_memory_system/blob/main/src/scoring.ts
10
+ * @see https://github.com/Dicklesworthstone/cass_memory_system/blob/main/src/outcome.ts
11
+ */
12
+ import { z } from "zod";
13
+
14
+ // ============================================================================
15
+ // Schemas
16
+ // ============================================================================
17
+
18
+ /**
19
+ * Feedback event types
20
+ */
21
+ export const FeedbackTypeSchema = z.enum(["helpful", "harmful", "neutral"]);
22
+ export type FeedbackType = z.infer<typeof FeedbackTypeSchema>;
23
+
24
+ /**
25
+ * A feedback event records whether a criterion evaluation was accurate
26
+ *
27
+ * When an evaluation criterion (e.g., "type_safe") is later proven correct
28
+ * or incorrect, we record that as feedback to adjust future weights.
29
+ */
30
+ export const FeedbackEventSchema = z.object({
31
+ /** Unique ID for this feedback event */
32
+ id: z.string(),
33
+ /** The criterion this feedback applies to */
34
+ criterion: z.string(),
35
+ /** Whether this feedback indicates the criterion was helpful or harmful */
36
+ type: FeedbackTypeSchema,
37
+ /** When this feedback was recorded */
38
+ timestamp: z.string(), // ISO-8601
39
+ /** Context about why this feedback was given */
40
+ context: z.string().optional(),
41
+ /** The bead ID this feedback relates to */
42
+ bead_id: z.string().optional(),
43
+ /** Raw value before decay (1.0 = full weight) */
44
+ raw_value: z.number().min(0).max(1).default(1),
45
+ });
46
+ export type FeedbackEvent = z.infer<typeof FeedbackEventSchema>;
47
+
48
+ /**
49
+ * Criterion weight with decay tracking
50
+ */
51
+ export const CriterionWeightSchema = z.object({
52
+ /** The criterion name (e.g., "type_safe") */
53
+ criterion: z.string(),
54
+ /** Current weight after decay (0-1) */
55
+ weight: z.number().min(0).max(1),
56
+ /** Number of helpful feedback events */
57
+ helpful_count: z.number().int().min(0),
58
+ /** Number of harmful feedback events */
59
+ harmful_count: z.number().int().min(0),
60
+ /** Last time this criterion was validated */
61
+ last_validated: z.string().optional(), // ISO-8601
62
+ /** Decay half-life in days */
63
+ half_life_days: z.number().positive().default(90),
64
+ });
65
+ export type CriterionWeight = z.infer<typeof CriterionWeightSchema>;
66
+
67
+ /**
68
+ * Outcome signals from a completed subtask
69
+ *
70
+ * These implicit signals help score decomposition quality without
71
+ * explicit feedback from the user.
72
+ */
73
+ export const OutcomeSignalsSchema = z.object({
74
+ /** Subtask bead ID */
75
+ bead_id: z.string(),
76
+ /** Duration in milliseconds */
77
+ duration_ms: z.number().int().min(0),
78
+ /** Number of errors encountered */
79
+ error_count: z.number().int().min(0),
80
+ /** Number of retry attempts */
81
+ retry_count: z.number().int().min(0),
82
+ /** Whether the subtask ultimately succeeded */
83
+ success: z.boolean(),
84
+ /** Files that were modified */
85
+ files_touched: z.array(z.string()).default([]),
86
+ /** Timestamp when outcome was recorded */
87
+ timestamp: z.string(), // ISO-8601
88
+ });
89
+ export type OutcomeSignals = z.infer<typeof OutcomeSignalsSchema>;
90
+
91
+ /**
92
+ * Scored outcome with implicit feedback type
93
+ */
94
+ export const ScoredOutcomeSchema = z.object({
95
+ /** The outcome signals */
96
+ signals: OutcomeSignalsSchema,
97
+ /** Inferred feedback type */
98
+ type: FeedbackTypeSchema,
99
+ /** Decayed value (0-1) */
100
+ decayed_value: z.number().min(0).max(1),
101
+ /** Explanation of the scoring */
102
+ reasoning: z.string(),
103
+ });
104
+ export type ScoredOutcome = z.infer<typeof ScoredOutcomeSchema>;
105
+
106
+ // ============================================================================
107
+ // Configuration
108
+ // ============================================================================
109
+
110
+ /**
111
+ * Default configuration for learning
112
+ */
113
+ export interface LearningConfig {
114
+ /** Half-life for confidence decay in days */
115
+ halfLifeDays: number;
116
+ /** Minimum feedback events before adjusting weights */
117
+ minFeedbackForAdjustment: number;
118
+ /** Maximum harmful ratio before deprecating a criterion */
119
+ maxHarmfulRatio: number;
120
+ /** Threshold duration (ms) for "fast" completion */
121
+ fastCompletionThresholdMs: number;
122
+ /** Threshold duration (ms) for "slow" completion */
123
+ slowCompletionThresholdMs: number;
124
+ /** Maximum errors before considering harmful */
125
+ maxErrorsForHelpful: number;
126
+ }
127
+
128
+ export const DEFAULT_LEARNING_CONFIG: LearningConfig = {
129
+ halfLifeDays: 90,
130
+ minFeedbackForAdjustment: 3,
131
+ maxHarmfulRatio: 0.3,
132
+ fastCompletionThresholdMs: 5 * 60 * 1000, // 5 minutes
133
+ slowCompletionThresholdMs: 30 * 60 * 1000, // 30 minutes
134
+ maxErrorsForHelpful: 2,
135
+ };
136
+
137
+ // ============================================================================
138
+ // Core Functions
139
+ // ============================================================================
140
+
141
+ /**
142
+ * Calculate decayed value using half-life formula
143
+ *
144
+ * Value decays by 50% every `halfLifeDays` days.
145
+ * Formula: value * 0.5^(age/halfLife)
146
+ *
147
+ * @param timestamp - When the event occurred (ISO-8601)
148
+ * @param now - Current time
149
+ * @param halfLifeDays - Half-life in days (default: 90)
150
+ * @returns Decayed value between 0 and 1
151
+ *
152
+ * @example
153
+ * // Event from 90 days ago with 90-day half-life
154
+ * calculateDecayedValue("2024-09-08T00:00:00Z", new Date("2024-12-07"), 90)
155
+ * // Returns ~0.5
156
+ */
157
+ export function calculateDecayedValue(
158
+ timestamp: string,
159
+ now: Date = new Date(),
160
+ halfLifeDays: number = 90,
161
+ ): number {
162
+ const eventTime = new Date(timestamp).getTime();
163
+ const nowTime = now.getTime();
164
+ const ageDays = Math.max(0, (nowTime - eventTime) / (24 * 60 * 60 * 1000));
165
+
166
+ return Math.pow(0.5, ageDays / halfLifeDays);
167
+ }
168
+
169
+ /**
170
+ * Calculate weighted criterion score from feedback events
171
+ *
172
+ * Applies decay to each feedback event and aggregates them.
173
+ * Helpful events increase the score, harmful events decrease it.
174
+ *
175
+ * @param events - Feedback events for this criterion
176
+ * @param config - Learning configuration
177
+ * @returns Weight between 0 and 1
178
+ */
179
+ export function calculateCriterionWeight(
180
+ events: FeedbackEvent[],
181
+ config: LearningConfig = DEFAULT_LEARNING_CONFIG,
182
+ ): CriterionWeight {
183
+ const now = new Date();
184
+ let helpfulSum = 0;
185
+ let harmfulSum = 0;
186
+ let helpfulCount = 0;
187
+ let harmfulCount = 0;
188
+ let lastValidated: string | undefined;
189
+
190
+ for (const event of events) {
191
+ const decayed = calculateDecayedValue(
192
+ event.timestamp,
193
+ now,
194
+ config.halfLifeDays,
195
+ );
196
+ const value = event.raw_value * decayed;
197
+
198
+ if (event.type === "helpful") {
199
+ helpfulSum += value;
200
+ helpfulCount++;
201
+ if (!lastValidated || event.timestamp > lastValidated) {
202
+ lastValidated = event.timestamp;
203
+ }
204
+ } else if (event.type === "harmful") {
205
+ harmfulSum += value;
206
+ harmfulCount++;
207
+ }
208
+ }
209
+
210
+ // Calculate weight: helpful / (helpful + harmful), with minimum of 0.1
211
+ const total = helpfulSum + harmfulSum;
212
+ const weight = total > 0 ? Math.max(0.1, helpfulSum / total) : 1.0;
213
+
214
+ return {
215
+ criterion: events[0]?.criterion ?? "unknown",
216
+ weight,
217
+ helpful_count: helpfulCount,
218
+ harmful_count: harmfulCount,
219
+ last_validated: lastValidated,
220
+ half_life_days: config.halfLifeDays,
221
+ };
222
+ }
223
+
224
+ /**
225
+ * Score implicit feedback from task outcome signals
226
+ *
227
+ * Infers whether a decomposition/subtask was helpful or harmful based on:
228
+ * - Duration: fast completion = helpful, slow = harmful
229
+ * - Errors: few errors = helpful, many = harmful
230
+ * - Retries: no retries = helpful, many = harmful
231
+ * - Success: success = helpful, failure = harmful
232
+ *
233
+ * @param signals - Outcome signals from completed subtask
234
+ * @param config - Learning configuration
235
+ * @returns Scored outcome with feedback type and reasoning
236
+ */
237
+ export function scoreImplicitFeedback(
238
+ signals: OutcomeSignals,
239
+ config: LearningConfig = DEFAULT_LEARNING_CONFIG,
240
+ ): ScoredOutcome {
241
+ const now = new Date();
242
+ const decayed = calculateDecayedValue(
243
+ signals.timestamp,
244
+ now,
245
+ config.halfLifeDays,
246
+ );
247
+
248
+ // Score components (each 0-1, higher = better)
249
+ const durationScore =
250
+ signals.duration_ms < config.fastCompletionThresholdMs
251
+ ? 1.0
252
+ : signals.duration_ms > config.slowCompletionThresholdMs
253
+ ? 0.2
254
+ : 0.6;
255
+
256
+ const errorScore =
257
+ signals.error_count === 0
258
+ ? 1.0
259
+ : signals.error_count <= config.maxErrorsForHelpful
260
+ ? 0.6
261
+ : 0.2;
262
+
263
+ const retryScore =
264
+ signals.retry_count === 0 ? 1.0 : signals.retry_count === 1 ? 0.7 : 0.3;
265
+
266
+ const successScore = signals.success ? 1.0 : 0.0;
267
+
268
+ // Weighted average (success matters most)
269
+ const rawScore =
270
+ successScore * 0.4 +
271
+ durationScore * 0.2 +
272
+ errorScore * 0.2 +
273
+ retryScore * 0.2;
274
+
275
+ // Determine feedback type
276
+ let type: FeedbackType;
277
+ let reasoning: string;
278
+
279
+ if (rawScore >= 0.7) {
280
+ type = "helpful";
281
+ reasoning =
282
+ `Fast completion (${Math.round(signals.duration_ms / 1000)}s), ` +
283
+ `${signals.error_count} errors, ${signals.retry_count} retries, ` +
284
+ `${signals.success ? "succeeded" : "failed"}`;
285
+ } else if (rawScore <= 0.4) {
286
+ type = "harmful";
287
+ reasoning =
288
+ `Slow completion (${Math.round(signals.duration_ms / 1000)}s), ` +
289
+ `${signals.error_count} errors, ${signals.retry_count} retries, ` +
290
+ `${signals.success ? "succeeded" : "failed"}`;
291
+ } else {
292
+ type = "neutral";
293
+ reasoning =
294
+ `Mixed signals: ${Math.round(signals.duration_ms / 1000)}s, ` +
295
+ `${signals.error_count} errors, ${signals.retry_count} retries`;
296
+ }
297
+
298
+ return {
299
+ signals,
300
+ type,
301
+ decayed_value: rawScore * decayed,
302
+ reasoning,
303
+ };
304
+ }
305
+
306
+ /**
307
+ * Create a feedback event from a scored outcome
308
+ *
309
+ * Converts implicit outcome scoring into an explicit feedback event
310
+ * that can be stored and used for criterion weight calculation.
311
+ *
312
+ * @param outcome - Scored outcome
313
+ * @param criterion - Which criterion this feedback applies to
314
+ * @returns Feedback event
315
+ */
316
+ export function outcomeToFeedback(
317
+ outcome: ScoredOutcome,
318
+ criterion: string,
319
+ ): FeedbackEvent {
320
+ return {
321
+ id: `${outcome.signals.bead_id}-${criterion}-${Date.now()}`,
322
+ criterion,
323
+ type: outcome.type,
324
+ timestamp: outcome.signals.timestamp,
325
+ context: outcome.reasoning,
326
+ bead_id: outcome.signals.bead_id,
327
+ raw_value: outcome.decayed_value,
328
+ };
329
+ }
330
+
331
+ /**
332
+ * Apply criterion weights to evaluation scores
333
+ *
334
+ * Adjusts raw evaluation scores by their learned weights.
335
+ * Criteria with low confidence (due to past failures) have reduced impact.
336
+ *
337
+ * @param criteria - Map of criterion name to raw score (0-1)
338
+ * @param weights - Map of criterion name to weight
339
+ * @returns Weighted scores
340
+ */
341
+ export function applyWeights(
342
+ criteria: Record<string, number>,
343
+ weights: Record<string, CriterionWeight>,
344
+ ): Record<string, { raw: number; weighted: number; weight: number }> {
345
+ const result: Record<
346
+ string,
347
+ { raw: number; weighted: number; weight: number }
348
+ > = {};
349
+
350
+ for (const [name, rawScore] of Object.entries(criteria)) {
351
+ const weight = weights[name]?.weight ?? 1.0;
352
+ result[name] = {
353
+ raw: rawScore,
354
+ weighted: rawScore * weight,
355
+ weight,
356
+ };
357
+ }
358
+
359
+ return result;
360
+ }
361
+
362
+ /**
363
+ * Check if a criterion should be deprecated based on feedback
364
+ *
365
+ * A criterion is deprecated if it has enough feedback and the
366
+ * harmful ratio exceeds the threshold.
367
+ *
368
+ * @param weight - Criterion weight with feedback counts
369
+ * @param config - Learning configuration
370
+ * @returns Whether the criterion should be deprecated
371
+ */
372
+ export function shouldDeprecateCriterion(
373
+ weight: CriterionWeight,
374
+ config: LearningConfig = DEFAULT_LEARNING_CONFIG,
375
+ ): boolean {
376
+ const total = weight.helpful_count + weight.harmful_count;
377
+ if (total < config.minFeedbackForAdjustment) {
378
+ return false;
379
+ }
380
+
381
+ const harmfulRatio = weight.harmful_count / total;
382
+ return harmfulRatio > config.maxHarmfulRatio;
383
+ }
384
+
385
+ // ============================================================================
386
+ // Storage Helpers
387
+ // ============================================================================
388
+
389
+ /**
390
+ * Storage interface for feedback events
391
+ *
392
+ * Implementations can use file system, SQLite, or other backends.
393
+ */
394
+ export interface FeedbackStorage {
395
+ /** Store a feedback event */
396
+ store(event: FeedbackEvent): Promise<void>;
397
+ /** Get all feedback events for a criterion */
398
+ getByCriterion(criterion: string): Promise<FeedbackEvent[]>;
399
+ /** Get all feedback events for a bead */
400
+ getByBead(beadId: string): Promise<FeedbackEvent[]>;
401
+ /** Get all feedback events */
402
+ getAll(): Promise<FeedbackEvent[]>;
403
+ }
404
+
405
+ /**
406
+ * In-memory feedback storage (for testing and short-lived sessions)
407
+ */
408
+ export class InMemoryFeedbackStorage implements FeedbackStorage {
409
+ private events: FeedbackEvent[] = [];
410
+
411
+ async store(event: FeedbackEvent): Promise<void> {
412
+ this.events.push(event);
413
+ }
414
+
415
+ async getByCriterion(criterion: string): Promise<FeedbackEvent[]> {
416
+ return this.events.filter((e) => e.criterion === criterion);
417
+ }
418
+
419
+ async getByBead(beadId: string): Promise<FeedbackEvent[]> {
420
+ return this.events.filter((e) => e.bead_id === beadId);
421
+ }
422
+
423
+ async getAll(): Promise<FeedbackEvent[]> {
424
+ return [...this.events];
425
+ }
426
+ }
427
+
428
+ // ============================================================================
429
+ // Exports
430
+ // ============================================================================
431
+
432
+ export const learningSchemas = {
433
+ FeedbackTypeSchema,
434
+ FeedbackEventSchema,
435
+ CriterionWeightSchema,
436
+ OutcomeSignalsSchema,
437
+ ScoredOutcomeSchema,
438
+ };