@symerian/symi 3.0.18 → 3.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/canvas-host/a2ui/.bundle.hash +1 -1
- package/package.json +1 -1
- package/extensions/copilot-proxy/README.md +0 -24
- package/extensions/copilot-proxy/index.ts +0 -154
- package/extensions/copilot-proxy/node_modules/.bin/symi +0 -21
- package/extensions/copilot-proxy/package.json +0 -15
- package/extensions/copilot-proxy/symi.plugin.json +0 -9
- package/extensions/device-pair/index.ts +0 -642
- package/extensions/device-pair/symi.plugin.json +0 -20
- package/extensions/diagnostics-otel/index.ts +0 -15
- package/extensions/diagnostics-otel/node_modules/.bin/acorn +0 -21
- package/extensions/diagnostics-otel/node_modules/.bin/symi +0 -21
- package/extensions/diagnostics-otel/package.json +0 -27
- package/extensions/diagnostics-otel/src/service.test.ts +0 -290
- package/extensions/diagnostics-otel/src/service.ts +0 -666
- package/extensions/diagnostics-otel/symi.plugin.json +0 -8
- package/extensions/google-antigravity-auth/README.md +0 -24
- package/extensions/google-antigravity-auth/index.ts +0 -424
- package/extensions/google-antigravity-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-antigravity-auth/package.json +0 -15
- package/extensions/google-antigravity-auth/symi.plugin.json +0 -9
- package/extensions/google-gemini-cli-auth/README.md +0 -35
- package/extensions/google-gemini-cli-auth/index.ts +0 -75
- package/extensions/google-gemini-cli-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-gemini-cli-auth/oauth.test.ts +0 -162
- package/extensions/google-gemini-cli-auth/oauth.ts +0 -636
- package/extensions/google-gemini-cli-auth/package.json +0 -15
- package/extensions/google-gemini-cli-auth/symi.plugin.json +0 -9
- package/extensions/learning-loop/index.ts +0 -159
- package/extensions/learning-loop/node_modules/.bin/symi +0 -21
- package/extensions/learning-loop/package.json +0 -18
- package/extensions/learning-loop/src/analytics/gateway-methods.ts +0 -230
- package/extensions/learning-loop/src/analytics/metrics-aggregator.ts +0 -153
- package/extensions/learning-loop/src/capture/run-tracker.ts +0 -181
- package/extensions/learning-loop/src/capture/serializer.ts +0 -74
- package/extensions/learning-loop/src/db.ts +0 -583
- package/extensions/learning-loop/src/feedback/explicit-feedback.ts +0 -58
- package/extensions/learning-loop/src/feedback/implicit-signals.ts +0 -89
- package/extensions/learning-loop/src/graph/edge-inference.ts +0 -189
- package/extensions/learning-loop/src/graph/graph-retrieval.ts +0 -144
- package/extensions/learning-loop/src/graph/graph-store.ts +0 -183
- package/extensions/learning-loop/src/hooks.ts +0 -244
- package/extensions/learning-loop/src/injection/cache.ts +0 -73
- package/extensions/learning-loop/src/injection/context-injector.ts +0 -104
- package/extensions/learning-loop/src/injection/prompt-builder.ts +0 -43
- package/extensions/learning-loop/src/learning/embedding-bridge.ts +0 -54
- package/extensions/learning-loop/src/learning/learning-extractor.ts +0 -217
- package/extensions/learning-loop/src/learning/learning-store.ts +0 -158
- package/extensions/learning-loop/src/learning/retrieval.ts +0 -87
- package/extensions/learning-loop/src/math/confidence-intervals.ts +0 -62
- package/extensions/learning-loop/src/math/ewma.ts +0 -51
- package/extensions/learning-loop/src/math/weighted-scorer.ts +0 -42
- package/extensions/learning-loop/src/schema.ts +0 -176
- package/extensions/learning-loop/src/scoring/normalization.ts +0 -32
- package/extensions/learning-loop/src/scoring/quality-engine.ts +0 -78
- package/extensions/learning-loop/src/scoring/signal-extractors.ts +0 -155
- package/extensions/learning-loop/src/test/context-injector.test.ts +0 -142
- package/extensions/learning-loop/src/test/fixes.test.ts +0 -1286
- package/extensions/learning-loop/src/test/graph.test.ts +0 -711
- package/extensions/learning-loop/src/test/integration.test.ts +0 -312
- package/extensions/learning-loop/src/test/learning-store.test.ts +0 -191
- package/extensions/learning-loop/src/test/math.test.ts +0 -148
- package/extensions/learning-loop/src/test/quality-engine.test.ts +0 -231
- package/extensions/learning-loop/src/test/run-tracker.test.ts +0 -143
- package/extensions/learning-loop/src/types.ts +0 -281
- package/extensions/learning-loop/symi.plugin.json +0 -46
- package/extensions/llm-task/README.md +0 -97
- package/extensions/llm-task/index.ts +0 -6
- package/extensions/llm-task/package.json +0 -12
- package/extensions/llm-task/src/llm-task-tool.test.ts +0 -138
- package/extensions/llm-task/src/llm-task-tool.ts +0 -249
- package/extensions/llm-task/symi.plugin.json +0 -21
- package/extensions/memory-lancedb/config.ts +0 -161
- package/extensions/memory-lancedb/index.test.ts +0 -330
- package/extensions/memory-lancedb/index.ts +0 -670
- package/extensions/memory-lancedb/node_modules/.bin/arrow2csv +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/openai +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/symi +0 -21
- package/extensions/memory-lancedb/package.json +0 -20
- package/extensions/memory-lancedb/symi.plugin.json +0 -71
- package/extensions/minimax-portal-auth/README.md +0 -33
- package/extensions/minimax-portal-auth/index.ts +0 -161
- package/extensions/minimax-portal-auth/node_modules/.bin/symi +0 -21
- package/extensions/minimax-portal-auth/oauth.ts +0 -247
- package/extensions/minimax-portal-auth/package.json +0 -15
- package/extensions/minimax-portal-auth/symi.plugin.json +0 -9
- package/extensions/model-equalizer/index.ts +0 -80
- package/extensions/model-equalizer/skills/model-equalizer/SKILL.md +0 -58
- package/extensions/model-equalizer/src/detection.ts +0 -62
- package/extensions/model-equalizer/src/enhancer.ts +0 -63
- package/extensions/model-equalizer/src/test/detection.test.ts +0 -218
- package/extensions/model-equalizer/src/test/enhancer.test.ts +0 -137
- package/extensions/model-equalizer/src/test/integration.test.ts +0 -185
- package/extensions/model-equalizer/src/types.ts +0 -24
- package/extensions/model-equalizer/symi.plugin.json +0 -12
- package/extensions/phone-control/index.ts +0 -421
- package/extensions/phone-control/symi.plugin.json +0 -10
- package/extensions/pipeline/README.md +0 -75
- package/extensions/pipeline/SKILL.md +0 -97
- package/extensions/pipeline/index.ts +0 -18
- package/extensions/pipeline/package.json +0 -11
- package/extensions/pipeline/src/pipeline-tool.test.ts +0 -345
- package/extensions/pipeline/src/pipeline-tool.ts +0 -266
- package/extensions/pipeline/src/windows-spawn.test.ts +0 -148
- package/extensions/pipeline/src/windows-spawn.ts +0 -193
- package/extensions/pipeline/symi.plugin.json +0 -10
- package/extensions/qwen-portal-auth/README.md +0 -24
- package/extensions/qwen-portal-auth/index.ts +0 -134
- package/extensions/qwen-portal-auth/oauth.ts +0 -190
- package/extensions/qwen-portal-auth/symi.plugin.json +0 -9
- package/extensions/talk-voice/index.ts +0 -150
- package/extensions/talk-voice/symi.plugin.json +0 -10
- package/extensions/thread-ownership/index.test.ts +0 -180
- package/extensions/thread-ownership/index.ts +0 -133
- package/extensions/thread-ownership/symi.plugin.json +0 -28
|
@@ -1,312 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import os from "node:os";
|
|
3
|
-
import path from "node:path";
|
|
4
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
5
|
-
import { createMetricsAggregator } from "../analytics/metrics-aggregator.js";
|
|
6
|
-
import { createRunTracker } from "../capture/run-tracker.js";
|
|
7
|
-
import { normalizeCompletedRun } from "../capture/serializer.js";
|
|
8
|
-
import { createDatabaseManager } from "../db.js";
|
|
9
|
-
import { createExplicitFeedbackHandler } from "../feedback/explicit-feedback.js";
|
|
10
|
-
import { createImplicitSignals } from "../feedback/implicit-signals.js";
|
|
11
|
-
import { createContextInjector } from "../injection/context-injector.js";
|
|
12
|
-
import { createLearningExtractor } from "../learning/learning-extractor.js";
|
|
13
|
-
import { createLearningStore } from "../learning/learning-store.js";
|
|
14
|
-
import { createQualityEngine } from "../scoring/quality-engine.js";
|
|
15
|
-
import type { LearningLoopConfig } from "../types.js";
|
|
16
|
-
|
|
17
|
-
const CONFIG: LearningLoopConfig = {
|
|
18
|
-
capture: { embedPrompts: false, maxRuns: 1000 },
|
|
19
|
-
scoring: {
|
|
20
|
-
weights: {
|
|
21
|
-
taskCompletion: 0.35,
|
|
22
|
-
toolEfficiency: 0.25,
|
|
23
|
-
responseAppropriateLength: 0.1,
|
|
24
|
-
latencyRelative: 0.1,
|
|
25
|
-
userFeedback: 0.2,
|
|
26
|
-
},
|
|
27
|
-
},
|
|
28
|
-
injection: { maxLearnings: 5, minRelevance: 0.1, maxTokens: 500, cacheTtlMs: 60000 },
|
|
29
|
-
decay: { halfLifeDays: 30 },
|
|
30
|
-
};
|
|
31
|
-
|
|
32
|
-
const logger = {
|
|
33
|
-
info: () => {},
|
|
34
|
-
warn: () => {},
|
|
35
|
-
error: () => {},
|
|
36
|
-
debug: () => {},
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
describe("Integration: Full Lifecycle", () => {
|
|
40
|
-
let tmpDir: string;
|
|
41
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
42
|
-
|
|
43
|
-
beforeEach(() => {
|
|
44
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "learning-loop-integration-"));
|
|
45
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: CONFIG, logger });
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
afterEach(() => {
|
|
49
|
-
db.close();
|
|
50
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
it("should capture, score, and persist a full run lifecycle", () => {
|
|
54
|
-
const tracker = createRunTracker();
|
|
55
|
-
const qualityEngine = createQualityEngine({ config: CONFIG, db });
|
|
56
|
-
const learningStore = createLearningStore({ db });
|
|
57
|
-
const metrics = createMetricsAggregator({ db });
|
|
58
|
-
|
|
59
|
-
// Simulate llm_input
|
|
60
|
-
tracker.onLlmInput({
|
|
61
|
-
runId: "run-integration-1",
|
|
62
|
-
sessionId: "sess-1",
|
|
63
|
-
provider: "openai",
|
|
64
|
-
model: "gpt-4",
|
|
65
|
-
prompt: "Write a function to sort an array",
|
|
66
|
-
sessionKey: "sk-1",
|
|
67
|
-
agentId: "agent-1",
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
// Simulate after_tool_call
|
|
71
|
-
tracker.onToolCall({
|
|
72
|
-
toolName: "code_search",
|
|
73
|
-
durationMs: 50,
|
|
74
|
-
success: true,
|
|
75
|
-
paramHash: "h1",
|
|
76
|
-
sessionKey: "sk-1",
|
|
77
|
-
});
|
|
78
|
-
tracker.onToolCall({
|
|
79
|
-
toolName: "file_write",
|
|
80
|
-
durationMs: 30,
|
|
81
|
-
success: true,
|
|
82
|
-
paramHash: "h2",
|
|
83
|
-
sessionKey: "sk-1",
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
// Simulate llm_output
|
|
87
|
-
tracker.onLlmOutput({
|
|
88
|
-
runId: "run-integration-1",
|
|
89
|
-
sessionId: "sess-1",
|
|
90
|
-
assistantTexts: ["Here is a sort function implementation..."],
|
|
91
|
-
usage: { input: 100, output: 200, total: 300 },
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
// Simulate agent_end
|
|
95
|
-
const completedRaw = tracker.finalize({
|
|
96
|
-
sessionKey: "sk-1",
|
|
97
|
-
agentId: "agent-1",
|
|
98
|
-
sessionId: "sess-1",
|
|
99
|
-
success: true,
|
|
100
|
-
durationMs: 1500,
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
expect(completedRaw).not.toBeNull();
|
|
104
|
-
const completed = normalizeCompletedRun(completedRaw!);
|
|
105
|
-
|
|
106
|
-
// Score the run
|
|
107
|
-
const score = qualityEngine.scoreRun(completed);
|
|
108
|
-
expect(score.score).toBeGreaterThan(0);
|
|
109
|
-
expect(score.score).toBeLessThanOrEqual(1);
|
|
110
|
-
expect(score.signals).toHaveLength(5);
|
|
111
|
-
|
|
112
|
-
// Persist
|
|
113
|
-
db.insertRun(completed, score);
|
|
114
|
-
metrics.recordRun(completed, score);
|
|
115
|
-
|
|
116
|
-
// Verify persistence
|
|
117
|
-
const savedRun = db.getRun("run-integration-1");
|
|
118
|
-
expect(savedRun).toBeDefined();
|
|
119
|
-
expect(savedRun!.provider).toBe("openai");
|
|
120
|
-
expect(savedRun!.quality_score).toBeCloseTo(score.score, 5);
|
|
121
|
-
|
|
122
|
-
const savedToolCalls = db.getToolCalls("run-integration-1");
|
|
123
|
-
expect(savedToolCalls).toHaveLength(2);
|
|
124
|
-
|
|
125
|
-
// Verify metrics
|
|
126
|
-
const leaderboard = db.getModelLeaderboard();
|
|
127
|
-
expect(leaderboard.length).toBeGreaterThan(0);
|
|
128
|
-
expect(leaderboard[0]!.provider).toBe("openai");
|
|
129
|
-
expect(leaderboard[0]!.runCount).toBe(1);
|
|
130
|
-
|
|
131
|
-
// Verify stats
|
|
132
|
-
const stats = db.getStats();
|
|
133
|
-
expect(stats.runCount).toBe(1);
|
|
134
|
-
});
|
|
135
|
-
|
|
136
|
-
it("should handle explicit feedback submission", () => {
|
|
137
|
-
const tracker = createRunTracker();
|
|
138
|
-
const qualityEngine = createQualityEngine({ config: CONFIG, db });
|
|
139
|
-
const feedbackHandler = createExplicitFeedbackHandler({ db });
|
|
140
|
-
|
|
141
|
-
// Create a run first
|
|
142
|
-
tracker.onLlmInput({
|
|
143
|
-
runId: "run-fb-1",
|
|
144
|
-
sessionId: "sess-1",
|
|
145
|
-
provider: "claude",
|
|
146
|
-
model: "claude-3",
|
|
147
|
-
prompt: "Test prompt",
|
|
148
|
-
sessionKey: "sk-1",
|
|
149
|
-
});
|
|
150
|
-
tracker.onLlmOutput({
|
|
151
|
-
runId: "run-fb-1",
|
|
152
|
-
sessionId: "sess-1",
|
|
153
|
-
assistantTexts: ["Response"],
|
|
154
|
-
usage: { input: 10, output: 20, total: 30 },
|
|
155
|
-
});
|
|
156
|
-
|
|
157
|
-
const completed = normalizeCompletedRun(
|
|
158
|
-
tracker.finalize({ sessionKey: "sk-1", success: true, durationMs: 500 })!,
|
|
159
|
-
);
|
|
160
|
-
const score = qualityEngine.scoreRun(completed);
|
|
161
|
-
db.insertRun(completed, score);
|
|
162
|
-
|
|
163
|
-
// Submit feedback
|
|
164
|
-
const feedback = feedbackHandler.submitFeedback("run-fb-1", 5);
|
|
165
|
-
expect(feedback).not.toBeNull();
|
|
166
|
-
expect(feedback!.source).toBe("explicit");
|
|
167
|
-
expect(feedback!.score).toBe(5);
|
|
168
|
-
|
|
169
|
-
// Verify feedback persisted
|
|
170
|
-
const savedFeedback = db.getFeedback("run-fb-1");
|
|
171
|
-
expect(savedFeedback).toHaveLength(1);
|
|
172
|
-
});
|
|
173
|
-
|
|
174
|
-
it("should detect implicit feedback signals", () => {
|
|
175
|
-
const implicit = createImplicitSignals({ db, config: CONFIG });
|
|
176
|
-
const tracker = createRunTracker();
|
|
177
|
-
const qualityEngine = createQualityEngine({ config: CONFIG, db });
|
|
178
|
-
|
|
179
|
-
// Create and persist a run
|
|
180
|
-
tracker.onLlmInput({
|
|
181
|
-
runId: "run-impl-1",
|
|
182
|
-
sessionId: "sess-1",
|
|
183
|
-
provider: "openai",
|
|
184
|
-
model: "gpt-4",
|
|
185
|
-
prompt: "How do I sort an array in JavaScript?",
|
|
186
|
-
sessionKey: "sk-1",
|
|
187
|
-
});
|
|
188
|
-
tracker.onLlmOutput({
|
|
189
|
-
runId: "run-impl-1",
|
|
190
|
-
sessionId: "sess-1",
|
|
191
|
-
assistantTexts: ["Use Array.sort()"],
|
|
192
|
-
usage: { input: 10, output: 20, total: 30 },
|
|
193
|
-
});
|
|
194
|
-
const completed = normalizeCompletedRun(
|
|
195
|
-
tracker.finalize({ sessionKey: "sk-1", success: true, durationMs: 200 })!,
|
|
196
|
-
);
|
|
197
|
-
const score = qualityEngine.scoreRun(completed);
|
|
198
|
-
db.insertRun(completed, score);
|
|
199
|
-
|
|
200
|
-
// Record the prompt for implicit tracking
|
|
201
|
-
implicit.recordPrompt("run-impl-1", "How do I sort an array in JavaScript?");
|
|
202
|
-
|
|
203
|
-
// Simulate topic change (positive signal)
|
|
204
|
-
const feedback = implicit.analyzeMessage("Now let me work on the database schema", Date.now());
|
|
205
|
-
expect(feedback).not.toBeNull();
|
|
206
|
-
expect(feedback!.score).toBe(1.0); // topic change = positive
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
it("should prune old runs when exceeding maxRuns", () => {
|
|
210
|
-
const qualityEngine = createQualityEngine({ config: CONFIG, db });
|
|
211
|
-
|
|
212
|
-
// Insert runs
|
|
213
|
-
for (let i = 0; i < 5; i++) {
|
|
214
|
-
const run = {
|
|
215
|
-
runId: `prune-run-${i}`,
|
|
216
|
-
sessionId: "sess-1",
|
|
217
|
-
sessionKey: "sk-1",
|
|
218
|
-
agentId: "agent-1",
|
|
219
|
-
provider: "openai",
|
|
220
|
-
model: "gpt-4",
|
|
221
|
-
promptHash: `hash-${i}`,
|
|
222
|
-
promptLength: 100,
|
|
223
|
-
responseLength: 200,
|
|
224
|
-
responseToolCallCount: 0,
|
|
225
|
-
usage: { input: 50, output: 100, total: 150 },
|
|
226
|
-
toolCalls: [],
|
|
227
|
-
success: true,
|
|
228
|
-
error: null,
|
|
229
|
-
durationMs: 1000,
|
|
230
|
-
startedAt: Date.now() - 5000 + i * 1000,
|
|
231
|
-
completedAt: Date.now() - 4000 + i * 1000,
|
|
232
|
-
};
|
|
233
|
-
const score = qualityEngine.scoreRun(run);
|
|
234
|
-
db.insertRun(run, score);
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
expect(db.getRunCount()).toBe(5);
|
|
238
|
-
|
|
239
|
-
// Prune to max 3
|
|
240
|
-
const pruned = db.pruneOldRuns(3);
|
|
241
|
-
expect(pruned).toBe(2);
|
|
242
|
-
expect(db.getRunCount()).toBe(3);
|
|
243
|
-
});
|
|
244
|
-
|
|
245
|
-
it("should track EWMA state across runs", () => {
|
|
246
|
-
// Update EWMA for latency
|
|
247
|
-
const state1 = db.updateEwmaState("openai", "gpt-4", "latency", 1000, 0.1);
|
|
248
|
-
expect(state1.value).toBe(1000);
|
|
249
|
-
expect(state1.count).toBe(1);
|
|
250
|
-
|
|
251
|
-
const state2 = db.updateEwmaState("openai", "gpt-4", "latency", 2000, 0.1);
|
|
252
|
-
// 0.1 * 2000 + 0.9 * 1000 = 1100
|
|
253
|
-
expect(state2.value).toBeCloseTo(1100, 5);
|
|
254
|
-
expect(state2.count).toBe(2);
|
|
255
|
-
|
|
256
|
-
// Verify persistence
|
|
257
|
-
const loaded = db.getEwmaState("openai", "gpt-4", "latency");
|
|
258
|
-
expect(loaded.value).toBeCloseTo(1100, 5);
|
|
259
|
-
});
|
|
260
|
-
|
|
261
|
-
it("should track metrics across multiple models", () => {
|
|
262
|
-
const qualityEngine = createQualityEngine({ config: CONFIG, db });
|
|
263
|
-
const metrics = createMetricsAggregator({ db });
|
|
264
|
-
|
|
265
|
-
// Run with model A
|
|
266
|
-
const runA = {
|
|
267
|
-
runId: "model-a-1",
|
|
268
|
-
sessionId: "sess-1",
|
|
269
|
-
sessionKey: "sk-1",
|
|
270
|
-
agentId: "agent-1",
|
|
271
|
-
provider: "openai",
|
|
272
|
-
model: "gpt-4",
|
|
273
|
-
promptHash: "hA",
|
|
274
|
-
promptLength: 100,
|
|
275
|
-
responseLength: 200,
|
|
276
|
-
responseToolCallCount: 0,
|
|
277
|
-
usage: { input: 50, output: 100, total: 150 },
|
|
278
|
-
toolCalls: [],
|
|
279
|
-
success: true,
|
|
280
|
-
error: null,
|
|
281
|
-
durationMs: 1000,
|
|
282
|
-
startedAt: Date.now() - 1000,
|
|
283
|
-
completedAt: Date.now(),
|
|
284
|
-
};
|
|
285
|
-
|
|
286
|
-
// Run with model B
|
|
287
|
-
const runB = {
|
|
288
|
-
...runA,
|
|
289
|
-
runId: "model-b-1",
|
|
290
|
-
provider: "claude",
|
|
291
|
-
model: "claude-3",
|
|
292
|
-
promptHash: "hB",
|
|
293
|
-
durationMs: 500,
|
|
294
|
-
};
|
|
295
|
-
|
|
296
|
-
const scoreA = qualityEngine.scoreRun(runA);
|
|
297
|
-
db.insertRun(runA, scoreA);
|
|
298
|
-
metrics.recordRun(runA, scoreA);
|
|
299
|
-
|
|
300
|
-
const scoreB = qualityEngine.scoreRun(runB);
|
|
301
|
-
db.insertRun(runB, scoreB);
|
|
302
|
-
metrics.recordRun(runB, scoreB);
|
|
303
|
-
|
|
304
|
-
// Verify leaderboard has both models
|
|
305
|
-
const leaderboard = metrics.getLeaderboard();
|
|
306
|
-
expect(leaderboard).toHaveLength(2);
|
|
307
|
-
|
|
308
|
-
// Verify model-specific metrics
|
|
309
|
-
const modelMetrics = metrics.getModelMetrics();
|
|
310
|
-
expect(modelMetrics).toHaveLength(2);
|
|
311
|
-
});
|
|
312
|
-
});
|
|
@@ -1,191 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import os from "node:os";
|
|
3
|
-
import path from "node:path";
|
|
4
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
5
|
-
import { createDatabaseManager } from "../db.js";
|
|
6
|
-
import { createLearningStore } from "../learning/learning-store.js";
|
|
7
|
-
import type { LearningLoopConfig, CompletedRun, QualityScore } from "../types.js";
|
|
8
|
-
|
|
9
|
-
const TEST_CONFIG: LearningLoopConfig = {
|
|
10
|
-
capture: { embedPrompts: false, maxRuns: 1000 },
|
|
11
|
-
scoring: {
|
|
12
|
-
weights: {
|
|
13
|
-
taskCompletion: 0.35,
|
|
14
|
-
toolEfficiency: 0.25,
|
|
15
|
-
responseAppropriateLength: 0.1,
|
|
16
|
-
latencyRelative: 0.1,
|
|
17
|
-
userFeedback: 0.2,
|
|
18
|
-
},
|
|
19
|
-
},
|
|
20
|
-
injection: { maxLearnings: 5, minRelevance: 0.3, maxTokens: 500, cacheTtlMs: 60000 },
|
|
21
|
-
decay: { halfLifeDays: 30 },
|
|
22
|
-
};
|
|
23
|
-
|
|
24
|
-
const logger = {
|
|
25
|
-
info: () => {},
|
|
26
|
-
warn: () => {},
|
|
27
|
-
error: () => {},
|
|
28
|
-
};
|
|
29
|
-
|
|
30
|
-
function makeRun(id: string): CompletedRun {
|
|
31
|
-
return {
|
|
32
|
-
runId: id,
|
|
33
|
-
sessionId: "sess-1",
|
|
34
|
-
sessionKey: "sk-1",
|
|
35
|
-
agentId: "agent-1",
|
|
36
|
-
provider: "openai",
|
|
37
|
-
model: "gpt-4",
|
|
38
|
-
promptHash: "hash-" + id,
|
|
39
|
-
promptLength: 100,
|
|
40
|
-
responseLength: 200,
|
|
41
|
-
responseToolCallCount: 0,
|
|
42
|
-
usage: { input: 50, output: 100, total: 150 },
|
|
43
|
-
toolCalls: [],
|
|
44
|
-
success: true,
|
|
45
|
-
error: null,
|
|
46
|
-
durationMs: 1000,
|
|
47
|
-
startedAt: Date.now() - 1000,
|
|
48
|
-
completedAt: Date.now(),
|
|
49
|
-
};
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
const testScore: QualityScore = {
|
|
53
|
-
score: 0.85,
|
|
54
|
-
signals: [],
|
|
55
|
-
algorithmVersion: 1,
|
|
56
|
-
};
|
|
57
|
-
|
|
58
|
-
describe("LearningStore", () => {
|
|
59
|
-
let tmpDir: string;
|
|
60
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
61
|
-
let store: ReturnType<typeof createLearningStore>;
|
|
62
|
-
|
|
63
|
-
beforeEach(() => {
|
|
64
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "learning-loop-test-"));
|
|
65
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
66
|
-
store = createLearningStore({ db });
|
|
67
|
-
|
|
68
|
-
// Insert a run for foreign key constraint
|
|
69
|
-
db.insertRun(makeRun("run-1"), testScore);
|
|
70
|
-
});
|
|
71
|
-
|
|
72
|
-
afterEach(() => {
|
|
73
|
-
db.close();
|
|
74
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
75
|
-
});
|
|
76
|
-
|
|
77
|
-
it("should add and retrieve a learning", () => {
|
|
78
|
-
const id = store.addLearning({
|
|
79
|
-
runId: "run-1",
|
|
80
|
-
category: "tool_pattern",
|
|
81
|
-
content: "Use search before read for better results",
|
|
82
|
-
embedding: null,
|
|
83
|
-
confidence: 0.8,
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
expect(id).not.toBeNull();
|
|
87
|
-
|
|
88
|
-
const learning = store.getLearning(id!);
|
|
89
|
-
expect(learning).not.toBeNull();
|
|
90
|
-
expect(learning!.category).toBe("tool_pattern");
|
|
91
|
-
expect(learning!.content).toContain("search before read");
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
it("should deduplicate identical content", () => {
|
|
95
|
-
const id1 = store.addLearning({
|
|
96
|
-
runId: "run-1",
|
|
97
|
-
category: "tool_pattern",
|
|
98
|
-
content: "Exact duplicate content",
|
|
99
|
-
embedding: null,
|
|
100
|
-
confidence: 0.8,
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
const id2 = store.addLearning({
|
|
104
|
-
runId: "run-1",
|
|
105
|
-
category: "tool_pattern",
|
|
106
|
-
content: "Exact duplicate content",
|
|
107
|
-
embedding: null,
|
|
108
|
-
confidence: 0.8,
|
|
109
|
-
});
|
|
110
|
-
|
|
111
|
-
expect(id1).not.toBeNull();
|
|
112
|
-
expect(id2).toBeNull();
|
|
113
|
-
});
|
|
114
|
-
|
|
115
|
-
it("should list learnings by category", () => {
|
|
116
|
-
store.addLearning({
|
|
117
|
-
runId: "run-1",
|
|
118
|
-
category: "tool_pattern",
|
|
119
|
-
content: "Pattern A",
|
|
120
|
-
embedding: null,
|
|
121
|
-
confidence: 0.8,
|
|
122
|
-
});
|
|
123
|
-
store.addLearning({
|
|
124
|
-
runId: "run-1",
|
|
125
|
-
category: "error_recovery",
|
|
126
|
-
content: "Recovery B",
|
|
127
|
-
embedding: null,
|
|
128
|
-
confidence: 0.7,
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
const patterns = store.listLearnings({ category: "tool_pattern" });
|
|
132
|
-
expect(patterns).toHaveLength(1);
|
|
133
|
-
expect(patterns[0]!.category).toBe("tool_pattern");
|
|
134
|
-
|
|
135
|
-
const all = store.listLearnings();
|
|
136
|
-
expect(all).toHaveLength(2);
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
it("should increment applied count", () => {
|
|
140
|
-
const id = store.addLearning({
|
|
141
|
-
runId: "run-1",
|
|
142
|
-
category: "tool_pattern",
|
|
143
|
-
content: "Test learning",
|
|
144
|
-
embedding: null,
|
|
145
|
-
confidence: 0.8,
|
|
146
|
-
})!;
|
|
147
|
-
|
|
148
|
-
store.recordApplication(id);
|
|
149
|
-
store.recordApplication(id);
|
|
150
|
-
|
|
151
|
-
const learning = store.getLearning(id);
|
|
152
|
-
expect(learning!.appliedCount).toBe(2);
|
|
153
|
-
});
|
|
154
|
-
|
|
155
|
-
it("should remove learnings", () => {
|
|
156
|
-
const id = store.addLearning({
|
|
157
|
-
runId: "run-1",
|
|
158
|
-
category: "anti_pattern",
|
|
159
|
-
content: "Bad pattern to remove",
|
|
160
|
-
embedding: null,
|
|
161
|
-
confidence: 0.5,
|
|
162
|
-
})!;
|
|
163
|
-
|
|
164
|
-
store.removeLearning(id);
|
|
165
|
-
expect(store.getLearning(id)).toBeNull();
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
it("should deduplicate by cosine similarity when embeddings provided", () => {
|
|
169
|
-
const embedding1 = [0.1, 0.2, 0.3, 0.4, 0.5];
|
|
170
|
-
const embedding2 = [0.1, 0.2, 0.3, 0.4, 0.5]; // identical = cosine 1.0
|
|
171
|
-
|
|
172
|
-
const id1 = store.addLearning({
|
|
173
|
-
runId: "run-1",
|
|
174
|
-
category: "tool_pattern",
|
|
175
|
-
content: "First version of similar learning",
|
|
176
|
-
embedding: embedding1,
|
|
177
|
-
confidence: 0.8,
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
const id2 = store.addLearning({
|
|
181
|
-
runId: "run-1",
|
|
182
|
-
category: "tool_pattern",
|
|
183
|
-
content: "Slightly different but same embedding",
|
|
184
|
-
embedding: embedding2,
|
|
185
|
-
confidence: 0.8,
|
|
186
|
-
});
|
|
187
|
-
|
|
188
|
-
expect(id1).not.toBeNull();
|
|
189
|
-
expect(id2).toBeNull(); // deduplicated
|
|
190
|
-
});
|
|
191
|
-
});
|
|
@@ -1,148 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { wilsonScoreLowerBound, wilsonScoreInterval } from "../math/confidence-intervals.js";
|
|
3
|
-
import { createEWMA, updateEWMA, getEWMAValue } from "../math/ewma.js";
|
|
4
|
-
import { computeWeightedScore } from "../math/weighted-scorer.js";
|
|
5
|
-
|
|
6
|
-
describe("EWMA", () => {
|
|
7
|
-
it("should initialize with zero value and count", () => {
|
|
8
|
-
const state = createEWMA();
|
|
9
|
-
expect(state.value).toBe(0);
|
|
10
|
-
expect(state.count).toBe(0);
|
|
11
|
-
});
|
|
12
|
-
|
|
13
|
-
it("should return fallback when no observations exist", () => {
|
|
14
|
-
const state = createEWMA();
|
|
15
|
-
expect(getEWMAValue(state, 42)).toBe(42);
|
|
16
|
-
});
|
|
17
|
-
|
|
18
|
-
it("should set first observation directly", () => {
|
|
19
|
-
const state = createEWMA();
|
|
20
|
-
const updated = updateEWMA(state, 100);
|
|
21
|
-
expect(updated.value).toBe(100);
|
|
22
|
-
expect(updated.count).toBe(1);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
it("should apply exponential weighting on subsequent observations", () => {
|
|
26
|
-
let state = createEWMA();
|
|
27
|
-
state = updateEWMA(state, 100, 0.1);
|
|
28
|
-
state = updateEWMA(state, 200, 0.1);
|
|
29
|
-
// 0.1 * 200 + 0.9 * 100 = 110
|
|
30
|
-
expect(state.value).toBeCloseTo(110, 5);
|
|
31
|
-
expect(state.count).toBe(2);
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
it("should converge toward recent values with high alpha", () => {
|
|
35
|
-
let state = createEWMA();
|
|
36
|
-
state = updateEWMA(state, 100, 0.9);
|
|
37
|
-
state = updateEWMA(state, 200, 0.9);
|
|
38
|
-
// 0.9 * 200 + 0.1 * 100 = 190
|
|
39
|
-
expect(state.value).toBeCloseTo(190, 5);
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
it("should ignore non-finite observations", () => {
|
|
43
|
-
let state = createEWMA();
|
|
44
|
-
state = updateEWMA(state, 100);
|
|
45
|
-
const unchanged = updateEWMA(state, NaN);
|
|
46
|
-
expect(unchanged.value).toBe(100);
|
|
47
|
-
expect(unchanged.count).toBe(1);
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
it("should clamp alpha to [0, 1]", () => {
|
|
51
|
-
let state = createEWMA();
|
|
52
|
-
state = updateEWMA(state, 100, 2.0);
|
|
53
|
-
state = updateEWMA(state, 200, 2.0);
|
|
54
|
-
// alpha clamped to 1.0: 1.0 * 200 + 0.0 * 100 = 200
|
|
55
|
-
expect(state.value).toBeCloseTo(200, 5);
|
|
56
|
-
});
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
describe("Wilson Score Interval", () => {
|
|
60
|
-
it("should return 0 for zero total", () => {
|
|
61
|
-
expect(wilsonScoreLowerBound(0, 0)).toBe(0);
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
it("should return 0 for zero successes", () => {
|
|
65
|
-
const lb = wilsonScoreLowerBound(0, 10);
|
|
66
|
-
expect(lb).toBeGreaterThanOrEqual(0);
|
|
67
|
-
expect(lb).toBeLessThan(0.05);
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
it("should return close to 1 for all successes with many trials", () => {
|
|
71
|
-
const lb = wilsonScoreLowerBound(1000, 1000);
|
|
72
|
-
expect(lb).toBeGreaterThan(0.99);
|
|
73
|
-
});
|
|
74
|
-
|
|
75
|
-
it("should produce lower bound < upper bound", () => {
|
|
76
|
-
const [lower, upper] = wilsonScoreInterval(7, 10);
|
|
77
|
-
expect(lower).toBeLessThan(upper);
|
|
78
|
-
expect(lower).toBeGreaterThanOrEqual(0);
|
|
79
|
-
expect(upper).toBeLessThanOrEqual(1);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it("should increase lower bound with more observations at same rate", () => {
|
|
83
|
-
const lb10 = wilsonScoreLowerBound(8, 10);
|
|
84
|
-
const lb100 = wilsonScoreLowerBound(80, 100);
|
|
85
|
-
expect(lb100).toBeGreaterThan(lb10);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it("should return [0, 0] for zero total in interval", () => {
|
|
89
|
-
const [lower, upper] = wilsonScoreInterval(0, 0);
|
|
90
|
-
expect(lower).toBe(0);
|
|
91
|
-
expect(upper).toBe(0);
|
|
92
|
-
});
|
|
93
|
-
});
|
|
94
|
-
|
|
95
|
-
describe("Weighted Scorer", () => {
|
|
96
|
-
it("should return 0 for empty signals", () => {
|
|
97
|
-
expect(computeWeightedScore([])).toBe(0);
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
it("should return 0 when all confidences are 0", () => {
|
|
101
|
-
expect(
|
|
102
|
-
computeWeightedScore([
|
|
103
|
-
{ value: 1.0, weight: 0.5, confidence: 0 },
|
|
104
|
-
{ value: 0.8, weight: 0.5, confidence: 0 },
|
|
105
|
-
]),
|
|
106
|
-
).toBe(0);
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
it("should compute correct weighted average for uniform confidence", () => {
|
|
110
|
-
const score = computeWeightedScore([
|
|
111
|
-
{ value: 1.0, weight: 0.5, confidence: 1.0 },
|
|
112
|
-
{ value: 0.0, weight: 0.5, confidence: 1.0 },
|
|
113
|
-
]);
|
|
114
|
-
expect(score).toBeCloseTo(0.5, 5);
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
it("should weight by confidence", () => {
|
|
118
|
-
const score = computeWeightedScore([
|
|
119
|
-
{ value: 1.0, weight: 0.5, confidence: 1.0 },
|
|
120
|
-
{ value: 0.0, weight: 0.5, confidence: 0.0 }, // excluded
|
|
121
|
-
]);
|
|
122
|
-
expect(score).toBeCloseTo(1.0, 5);
|
|
123
|
-
});
|
|
124
|
-
|
|
125
|
-
it("should clamp values to [0, 1]", () => {
|
|
126
|
-
const score = computeWeightedScore([{ value: 2.0, weight: 1.0, confidence: 1.0 }]);
|
|
127
|
-
expect(score).toBeCloseTo(1.0, 5);
|
|
128
|
-
});
|
|
129
|
-
|
|
130
|
-
it("should handle mixed confidences correctly", () => {
|
|
131
|
-
// w1*c1*v1 + w2*c2*v2 / (w1*c1 + w2*c2)
|
|
132
|
-
// 0.35*1.0*1.0 + 0.25*0.5*0.6 / (0.35*1.0 + 0.25*0.5)
|
|
133
|
-
// = 0.35 + 0.075 / (0.35 + 0.125) = 0.425 / 0.475 = 0.8947...
|
|
134
|
-
const score = computeWeightedScore([
|
|
135
|
-
{ value: 1.0, weight: 0.35, confidence: 1.0 },
|
|
136
|
-
{ value: 0.6, weight: 0.25, confidence: 0.5 },
|
|
137
|
-
]);
|
|
138
|
-
expect(score).toBeCloseTo(0.425 / 0.475, 4);
|
|
139
|
-
});
|
|
140
|
-
|
|
141
|
-
it("should skip signals with non-finite values", () => {
|
|
142
|
-
const score = computeWeightedScore([
|
|
143
|
-
{ value: NaN, weight: 0.5, confidence: 1.0 },
|
|
144
|
-
{ value: 0.8, weight: 0.5, confidence: 1.0 },
|
|
145
|
-
]);
|
|
146
|
-
expect(score).toBeCloseTo(0.8, 5);
|
|
147
|
-
});
|
|
148
|
-
});
|