@symerian/symi 3.0.18 → 3.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-info.json +3 -3
- package/dist/canvas-host/a2ui/.bundle.hash +1 -1
- package/package.json +1 -1
- package/extensions/copilot-proxy/README.md +0 -24
- package/extensions/copilot-proxy/index.ts +0 -154
- package/extensions/copilot-proxy/node_modules/.bin/symi +0 -21
- package/extensions/copilot-proxy/package.json +0 -15
- package/extensions/copilot-proxy/symi.plugin.json +0 -9
- package/extensions/device-pair/index.ts +0 -642
- package/extensions/device-pair/symi.plugin.json +0 -20
- package/extensions/diagnostics-otel/index.ts +0 -15
- package/extensions/diagnostics-otel/node_modules/.bin/acorn +0 -21
- package/extensions/diagnostics-otel/node_modules/.bin/symi +0 -21
- package/extensions/diagnostics-otel/package.json +0 -27
- package/extensions/diagnostics-otel/src/service.test.ts +0 -290
- package/extensions/diagnostics-otel/src/service.ts +0 -666
- package/extensions/diagnostics-otel/symi.plugin.json +0 -8
- package/extensions/google-antigravity-auth/README.md +0 -24
- package/extensions/google-antigravity-auth/index.ts +0 -424
- package/extensions/google-antigravity-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-antigravity-auth/package.json +0 -15
- package/extensions/google-antigravity-auth/symi.plugin.json +0 -9
- package/extensions/google-gemini-cli-auth/README.md +0 -35
- package/extensions/google-gemini-cli-auth/index.ts +0 -75
- package/extensions/google-gemini-cli-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-gemini-cli-auth/oauth.test.ts +0 -162
- package/extensions/google-gemini-cli-auth/oauth.ts +0 -636
- package/extensions/google-gemini-cli-auth/package.json +0 -15
- package/extensions/google-gemini-cli-auth/symi.plugin.json +0 -9
- package/extensions/learning-loop/index.ts +0 -159
- package/extensions/learning-loop/node_modules/.bin/symi +0 -21
- package/extensions/learning-loop/package.json +0 -18
- package/extensions/learning-loop/src/analytics/gateway-methods.ts +0 -230
- package/extensions/learning-loop/src/analytics/metrics-aggregator.ts +0 -153
- package/extensions/learning-loop/src/capture/run-tracker.ts +0 -181
- package/extensions/learning-loop/src/capture/serializer.ts +0 -74
- package/extensions/learning-loop/src/db.ts +0 -583
- package/extensions/learning-loop/src/feedback/explicit-feedback.ts +0 -58
- package/extensions/learning-loop/src/feedback/implicit-signals.ts +0 -89
- package/extensions/learning-loop/src/graph/edge-inference.ts +0 -189
- package/extensions/learning-loop/src/graph/graph-retrieval.ts +0 -144
- package/extensions/learning-loop/src/graph/graph-store.ts +0 -183
- package/extensions/learning-loop/src/hooks.ts +0 -244
- package/extensions/learning-loop/src/injection/cache.ts +0 -73
- package/extensions/learning-loop/src/injection/context-injector.ts +0 -104
- package/extensions/learning-loop/src/injection/prompt-builder.ts +0 -43
- package/extensions/learning-loop/src/learning/embedding-bridge.ts +0 -54
- package/extensions/learning-loop/src/learning/learning-extractor.ts +0 -217
- package/extensions/learning-loop/src/learning/learning-store.ts +0 -158
- package/extensions/learning-loop/src/learning/retrieval.ts +0 -87
- package/extensions/learning-loop/src/math/confidence-intervals.ts +0 -62
- package/extensions/learning-loop/src/math/ewma.ts +0 -51
- package/extensions/learning-loop/src/math/weighted-scorer.ts +0 -42
- package/extensions/learning-loop/src/schema.ts +0 -176
- package/extensions/learning-loop/src/scoring/normalization.ts +0 -32
- package/extensions/learning-loop/src/scoring/quality-engine.ts +0 -78
- package/extensions/learning-loop/src/scoring/signal-extractors.ts +0 -155
- package/extensions/learning-loop/src/test/context-injector.test.ts +0 -142
- package/extensions/learning-loop/src/test/fixes.test.ts +0 -1286
- package/extensions/learning-loop/src/test/graph.test.ts +0 -711
- package/extensions/learning-loop/src/test/integration.test.ts +0 -312
- package/extensions/learning-loop/src/test/learning-store.test.ts +0 -191
- package/extensions/learning-loop/src/test/math.test.ts +0 -148
- package/extensions/learning-loop/src/test/quality-engine.test.ts +0 -231
- package/extensions/learning-loop/src/test/run-tracker.test.ts +0 -143
- package/extensions/learning-loop/src/types.ts +0 -281
- package/extensions/learning-loop/symi.plugin.json +0 -46
- package/extensions/llm-task/README.md +0 -97
- package/extensions/llm-task/index.ts +0 -6
- package/extensions/llm-task/package.json +0 -12
- package/extensions/llm-task/src/llm-task-tool.test.ts +0 -138
- package/extensions/llm-task/src/llm-task-tool.ts +0 -249
- package/extensions/llm-task/symi.plugin.json +0 -21
- package/extensions/memory-lancedb/config.ts +0 -161
- package/extensions/memory-lancedb/index.test.ts +0 -330
- package/extensions/memory-lancedb/index.ts +0 -670
- package/extensions/memory-lancedb/node_modules/.bin/arrow2csv +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/openai +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/symi +0 -21
- package/extensions/memory-lancedb/package.json +0 -20
- package/extensions/memory-lancedb/symi.plugin.json +0 -71
- package/extensions/minimax-portal-auth/README.md +0 -33
- package/extensions/minimax-portal-auth/index.ts +0 -161
- package/extensions/minimax-portal-auth/node_modules/.bin/symi +0 -21
- package/extensions/minimax-portal-auth/oauth.ts +0 -247
- package/extensions/minimax-portal-auth/package.json +0 -15
- package/extensions/minimax-portal-auth/symi.plugin.json +0 -9
- package/extensions/model-equalizer/index.ts +0 -80
- package/extensions/model-equalizer/skills/model-equalizer/SKILL.md +0 -58
- package/extensions/model-equalizer/src/detection.ts +0 -62
- package/extensions/model-equalizer/src/enhancer.ts +0 -63
- package/extensions/model-equalizer/src/test/detection.test.ts +0 -218
- package/extensions/model-equalizer/src/test/enhancer.test.ts +0 -137
- package/extensions/model-equalizer/src/test/integration.test.ts +0 -185
- package/extensions/model-equalizer/src/types.ts +0 -24
- package/extensions/model-equalizer/symi.plugin.json +0 -12
- package/extensions/phone-control/index.ts +0 -421
- package/extensions/phone-control/symi.plugin.json +0 -10
- package/extensions/pipeline/README.md +0 -75
- package/extensions/pipeline/SKILL.md +0 -97
- package/extensions/pipeline/index.ts +0 -18
- package/extensions/pipeline/package.json +0 -11
- package/extensions/pipeline/src/pipeline-tool.test.ts +0 -345
- package/extensions/pipeline/src/pipeline-tool.ts +0 -266
- package/extensions/pipeline/src/windows-spawn.test.ts +0 -148
- package/extensions/pipeline/src/windows-spawn.ts +0 -193
- package/extensions/pipeline/symi.plugin.json +0 -10
- package/extensions/qwen-portal-auth/README.md +0 -24
- package/extensions/qwen-portal-auth/index.ts +0 -134
- package/extensions/qwen-portal-auth/oauth.ts +0 -190
- package/extensions/qwen-portal-auth/symi.plugin.json +0 -9
- package/extensions/talk-voice/index.ts +0 -150
- package/extensions/talk-voice/symi.plugin.json +0 -10
- package/extensions/thread-ownership/index.test.ts +0 -180
- package/extensions/thread-ownership/index.ts +0 -133
- package/extensions/thread-ownership/symi.plugin.json +0 -28
|
@@ -1,231 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { combineSignals } from "../scoring/normalization.js";
|
|
3
|
-
import {
|
|
4
|
-
extractTaskCompletion,
|
|
5
|
-
extractToolEfficiency,
|
|
6
|
-
extractResponseAppropriateness,
|
|
7
|
-
extractLatencyRelative,
|
|
8
|
-
extractUserFeedback,
|
|
9
|
-
} from "../scoring/signal-extractors.js";
|
|
10
|
-
import type { CompletedRun } from "../types.js";
|
|
11
|
-
|
|
12
|
-
function makeRun(overrides: Partial<CompletedRun> = {}): CompletedRun {
|
|
13
|
-
return {
|
|
14
|
-
runId: "test-run",
|
|
15
|
-
sessionId: "sess-1",
|
|
16
|
-
sessionKey: "sk-1",
|
|
17
|
-
agentId: "agent-1",
|
|
18
|
-
provider: "openai",
|
|
19
|
-
model: "gpt-4",
|
|
20
|
-
promptHash: "abc123",
|
|
21
|
-
promptLength: 100,
|
|
22
|
-
responseLength: 200,
|
|
23
|
-
responseToolCallCount: 0,
|
|
24
|
-
usage: { input: 50, output: 100, total: 150 },
|
|
25
|
-
toolCalls: [],
|
|
26
|
-
success: true,
|
|
27
|
-
error: null,
|
|
28
|
-
durationMs: 1000,
|
|
29
|
-
startedAt: Date.now() - 1000,
|
|
30
|
-
completedAt: Date.now(),
|
|
31
|
-
...overrides,
|
|
32
|
-
};
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
describe("Signal Extractors", () => {
|
|
36
|
-
describe("Task Completion", () => {
|
|
37
|
-
it("should return 1.0 for success without error", () => {
|
|
38
|
-
const signal = extractTaskCompletion(makeRun({ success: true, error: null }), 0.35);
|
|
39
|
-
expect(signal.value).toBe(1.0);
|
|
40
|
-
expect(signal.confidence).toBe(1.0);
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
it("should return 0.5 for success with error", () => {
|
|
44
|
-
const signal = extractTaskCompletion(makeRun({ success: true, error: "warning" }), 0.35);
|
|
45
|
-
expect(signal.value).toBe(0.5);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it("should return 0.0 for failure", () => {
|
|
49
|
-
const signal = extractTaskCompletion(makeRun({ success: false }), 0.35);
|
|
50
|
-
expect(signal.value).toBe(0.0);
|
|
51
|
-
});
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
describe("Tool Efficiency", () => {
|
|
55
|
-
it("should return 1.0 with no tool calls", () => {
|
|
56
|
-
const signal = extractToolEfficiency(makeRun({ toolCalls: [] }), 0.25);
|
|
57
|
-
expect(signal.value).toBe(1.0);
|
|
58
|
-
expect(signal.confidence).toBe(0.3);
|
|
59
|
-
});
|
|
60
|
-
|
|
61
|
-
it("should return 1.0 for all successful unique tool calls", () => {
|
|
62
|
-
const signal = extractToolEfficiency(
|
|
63
|
-
makeRun({
|
|
64
|
-
toolCalls: [
|
|
65
|
-
{ toolName: "a", durationMs: 10, success: true, error: null, paramHash: "h1" },
|
|
66
|
-
{ toolName: "b", durationMs: 20, success: true, error: null, paramHash: "h2" },
|
|
67
|
-
],
|
|
68
|
-
}),
|
|
69
|
-
0.25,
|
|
70
|
-
);
|
|
71
|
-
expect(signal.value).toBe(1.0);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
it("should penalize errors", () => {
|
|
75
|
-
const signal = extractToolEfficiency(
|
|
76
|
-
makeRun({
|
|
77
|
-
toolCalls: [
|
|
78
|
-
{ toolName: "a", durationMs: 10, success: false, error: "fail", paramHash: "h1" },
|
|
79
|
-
{ toolName: "b", durationMs: 20, success: true, error: null, paramHash: "h2" },
|
|
80
|
-
],
|
|
81
|
-
}),
|
|
82
|
-
0.25,
|
|
83
|
-
);
|
|
84
|
-
// 1.0 - 0.6*(1/2) - 0.4*(0/2) = 0.7
|
|
85
|
-
expect(signal.value).toBeCloseTo(0.7, 5);
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it("should penalize redundant calls", () => {
|
|
89
|
-
const signal = extractToolEfficiency(
|
|
90
|
-
makeRun({
|
|
91
|
-
toolCalls: [
|
|
92
|
-
{ toolName: "a", durationMs: 10, success: true, error: null, paramHash: "h1" },
|
|
93
|
-
{ toolName: "a", durationMs: 10, success: true, error: null, paramHash: "h1" },
|
|
94
|
-
],
|
|
95
|
-
}),
|
|
96
|
-
0.25,
|
|
97
|
-
);
|
|
98
|
-
// 1.0 - 0.6*(0/2) - 0.4*(1/2) = 0.8
|
|
99
|
-
expect(signal.value).toBeCloseTo(0.8, 5);
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
it("should increase confidence with more tool calls", () => {
|
|
103
|
-
const few = extractToolEfficiency(
|
|
104
|
-
makeRun({
|
|
105
|
-
toolCalls: [
|
|
106
|
-
{ toolName: "a", durationMs: 10, success: true, error: null, paramHash: "h1" },
|
|
107
|
-
],
|
|
108
|
-
}),
|
|
109
|
-
0.25,
|
|
110
|
-
);
|
|
111
|
-
const many = extractToolEfficiency(
|
|
112
|
-
makeRun({
|
|
113
|
-
toolCalls: Array.from({ length: 5 }, (_, i) => ({
|
|
114
|
-
toolName: `t${i}`,
|
|
115
|
-
durationMs: 10,
|
|
116
|
-
success: true,
|
|
117
|
-
error: null,
|
|
118
|
-
paramHash: `h${i}`,
|
|
119
|
-
})),
|
|
120
|
-
}),
|
|
121
|
-
0.25,
|
|
122
|
-
);
|
|
123
|
-
expect(many.confidence).toBeGreaterThan(few.confidence);
|
|
124
|
-
});
|
|
125
|
-
});
|
|
126
|
-
|
|
127
|
-
describe("Response Appropriateness", () => {
|
|
128
|
-
it("should return 0.5 for zero prompt length", () => {
|
|
129
|
-
const signal = extractResponseAppropriateness(makeRun({ promptLength: 0 }), 0.1);
|
|
130
|
-
expect(signal.value).toBe(0.5);
|
|
131
|
-
});
|
|
132
|
-
|
|
133
|
-
it("should return ~1.0 for equal prompt and response length", () => {
|
|
134
|
-
const signal = extractResponseAppropriateness(
|
|
135
|
-
makeRun({ promptLength: 100, responseLength: 100 }),
|
|
136
|
-
0.1,
|
|
137
|
-
);
|
|
138
|
-
// ratio = 1.0, ln(1) = 0, exp(-0) = 1.0
|
|
139
|
-
expect(signal.value).toBeCloseTo(1.0, 5);
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
it("should penalize very long responses", () => {
|
|
143
|
-
const signal = extractResponseAppropriateness(
|
|
144
|
-
makeRun({ promptLength: 10, responseLength: 10000 }),
|
|
145
|
-
0.1,
|
|
146
|
-
);
|
|
147
|
-
expect(signal.value).toBeLessThan(0.5);
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
describe("Latency Relative", () => {
|
|
152
|
-
it("should return 1.0 for zero duration", () => {
|
|
153
|
-
const signal = extractLatencyRelative(makeRun({ durationMs: 0 }), 0.1, {
|
|
154
|
-
value: 0,
|
|
155
|
-
count: 0,
|
|
156
|
-
});
|
|
157
|
-
expect(signal.value).toBe(1.0);
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
it("should return 1.0 when faster than EWMA", () => {
|
|
161
|
-
const signal = extractLatencyRelative(makeRun({ durationMs: 500 }), 0.1, {
|
|
162
|
-
value: 1000,
|
|
163
|
-
count: 10,
|
|
164
|
-
});
|
|
165
|
-
expect(signal.value).toBe(1.0);
|
|
166
|
-
});
|
|
167
|
-
|
|
168
|
-
it("should degrade when slower than EWMA", () => {
|
|
169
|
-
const signal = extractLatencyRelative(makeRun({ durationMs: 2000 }), 0.1, {
|
|
170
|
-
value: 1000,
|
|
171
|
-
count: 10,
|
|
172
|
-
});
|
|
173
|
-
expect(signal.value).toBeCloseTo(0.5, 5);
|
|
174
|
-
});
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
describe("User Feedback", () => {
|
|
178
|
-
it("should return confidence 0 when no feedback", () => {
|
|
179
|
-
const signal = extractUserFeedback(0.2);
|
|
180
|
-
expect(signal.confidence).toBe(0.0);
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
it("should map explicit 5 to 1.0", () => {
|
|
184
|
-
const signal = extractUserFeedback(0.2, { source: "explicit", score: 5 });
|
|
185
|
-
expect(signal.value).toBe(1.0);
|
|
186
|
-
expect(signal.confidence).toBe(1.0);
|
|
187
|
-
});
|
|
188
|
-
|
|
189
|
-
it("should map explicit 1 to 0.0", () => {
|
|
190
|
-
const signal = extractUserFeedback(0.2, { source: "explicit", score: 1 });
|
|
191
|
-
expect(signal.value).toBe(0.0);
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
it("should map explicit 3 to 0.5", () => {
|
|
195
|
-
const signal = extractUserFeedback(0.2, { source: "explicit", score: 3 });
|
|
196
|
-
expect(signal.value).toBe(0.5);
|
|
197
|
-
});
|
|
198
|
-
|
|
199
|
-
it("should use lower confidence for implicit feedback", () => {
|
|
200
|
-
const signal = extractUserFeedback(0.2, { source: "implicit", score: 0.8 });
|
|
201
|
-
expect(signal.confidence).toBe(0.6);
|
|
202
|
-
});
|
|
203
|
-
});
|
|
204
|
-
});
|
|
205
|
-
|
|
206
|
-
describe("Signal Combination", () => {
|
|
207
|
-
it("should produce score in [0, 1]", () => {
|
|
208
|
-
const score = combineSignals([
|
|
209
|
-
{ name: "taskCompletion", value: 1.0, confidence: 1.0, weight: 0.35 },
|
|
210
|
-
{ name: "toolEfficiency", value: 0.8, confidence: 0.7, weight: 0.25 },
|
|
211
|
-
{ name: "responseAppropriateLength", value: 0.9, confidence: 0.5, weight: 0.1 },
|
|
212
|
-
{ name: "latencyRelative", value: 0.7, confidence: 0.3, weight: 0.1 },
|
|
213
|
-
{ name: "userFeedback", value: 0.5, confidence: 0.0, weight: 0.2 },
|
|
214
|
-
]);
|
|
215
|
-
expect(score.score).toBeGreaterThanOrEqual(0);
|
|
216
|
-
expect(score.score).toBeLessThanOrEqual(1);
|
|
217
|
-
expect(score.algorithmVersion).toBe(1);
|
|
218
|
-
});
|
|
219
|
-
|
|
220
|
-
it("should exclude zero-confidence signals", () => {
|
|
221
|
-
const withFeedback = combineSignals([
|
|
222
|
-
{ name: "taskCompletion", value: 1.0, confidence: 1.0, weight: 0.35 },
|
|
223
|
-
{ name: "userFeedback", value: 0.0, confidence: 0.0, weight: 0.2 },
|
|
224
|
-
]);
|
|
225
|
-
const withoutFeedback = combineSignals([
|
|
226
|
-
{ name: "taskCompletion", value: 1.0, confidence: 1.0, weight: 0.35 },
|
|
227
|
-
]);
|
|
228
|
-
// Should be equal since feedback has 0 confidence
|
|
229
|
-
expect(withFeedback.score).toBeCloseTo(withoutFeedback.score, 5);
|
|
230
|
-
});
|
|
231
|
-
});
|
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
-
import { createRunTracker } from "../capture/run-tracker.js";
|
|
3
|
-
|
|
4
|
-
describe("RunTracker", () => {
|
|
5
|
-
let tracker: ReturnType<typeof createRunTracker>;
|
|
6
|
-
|
|
7
|
-
beforeEach(() => {
|
|
8
|
-
tracker = createRunTracker();
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
it("should start with no active runs", () => {
|
|
12
|
-
expect(tracker.getActiveRunCount()).toBe(0);
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
it("should create a run on llm_input", () => {
|
|
16
|
-
tracker.onLlmInput({
|
|
17
|
-
runId: "run-1",
|
|
18
|
-
sessionId: "sess-1",
|
|
19
|
-
provider: "openai",
|
|
20
|
-
model: "gpt-4",
|
|
21
|
-
prompt: "Hello world",
|
|
22
|
-
sessionKey: "sk-1",
|
|
23
|
-
agentId: "agent-1",
|
|
24
|
-
});
|
|
25
|
-
expect(tracker.getActiveRunCount()).toBe(1);
|
|
26
|
-
});
|
|
27
|
-
|
|
28
|
-
it("should bind session to run", () => {
|
|
29
|
-
tracker.onLlmInput({
|
|
30
|
-
runId: "run-1",
|
|
31
|
-
sessionId: "sess-1",
|
|
32
|
-
provider: "openai",
|
|
33
|
-
model: "gpt-4",
|
|
34
|
-
prompt: "Hello",
|
|
35
|
-
sessionKey: "sk-1",
|
|
36
|
-
});
|
|
37
|
-
expect(tracker.getRunIdForSession("sk-1")).toBe("run-1");
|
|
38
|
-
});
|
|
39
|
-
|
|
40
|
-
it("should accumulate tool calls", () => {
|
|
41
|
-
tracker.onLlmInput({
|
|
42
|
-
runId: "run-1",
|
|
43
|
-
sessionId: "sess-1",
|
|
44
|
-
provider: "openai",
|
|
45
|
-
model: "gpt-4",
|
|
46
|
-
prompt: "Hello",
|
|
47
|
-
sessionKey: "sk-1",
|
|
48
|
-
});
|
|
49
|
-
|
|
50
|
-
tracker.onToolCall({
|
|
51
|
-
toolName: "search",
|
|
52
|
-
durationMs: 50,
|
|
53
|
-
success: true,
|
|
54
|
-
paramHash: "h1",
|
|
55
|
-
sessionKey: "sk-1",
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
tracker.onToolCall({
|
|
59
|
-
toolName: "read",
|
|
60
|
-
durationMs: 30,
|
|
61
|
-
success: false,
|
|
62
|
-
error: "not found",
|
|
63
|
-
paramHash: "h2",
|
|
64
|
-
sessionKey: "sk-1",
|
|
65
|
-
});
|
|
66
|
-
|
|
67
|
-
const result = tracker.finalize({
|
|
68
|
-
sessionKey: "sk-1",
|
|
69
|
-
success: true,
|
|
70
|
-
durationMs: 500,
|
|
71
|
-
});
|
|
72
|
-
|
|
73
|
-
expect(result).not.toBeNull();
|
|
74
|
-
expect(result!.toolCalls).toHaveLength(2);
|
|
75
|
-
expect(result!.toolCalls[0]!.toolName).toBe("search");
|
|
76
|
-
expect(result!.toolCalls[1]!.success).toBe(false);
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
it("should capture usage from llm_output", () => {
|
|
80
|
-
tracker.onLlmInput({
|
|
81
|
-
runId: "run-1",
|
|
82
|
-
sessionId: "sess-1",
|
|
83
|
-
provider: "claude",
|
|
84
|
-
model: "claude-3",
|
|
85
|
-
prompt: "Test prompt",
|
|
86
|
-
sessionKey: "sk-1",
|
|
87
|
-
});
|
|
88
|
-
|
|
89
|
-
tracker.onLlmOutput({
|
|
90
|
-
runId: "run-1",
|
|
91
|
-
sessionId: "sess-1",
|
|
92
|
-
assistantTexts: ["Response text here"],
|
|
93
|
-
usage: { input: 10, output: 20, total: 30 },
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
const result = tracker.finalize({
|
|
97
|
-
sessionKey: "sk-1",
|
|
98
|
-
success: true,
|
|
99
|
-
durationMs: 200,
|
|
100
|
-
});
|
|
101
|
-
|
|
102
|
-
expect(result).not.toBeNull();
|
|
103
|
-
expect(result!.responseLength).toBe("Response text here".length);
|
|
104
|
-
expect(result!.usage.input).toBe(10);
|
|
105
|
-
expect(result!.usage.output).toBe(20);
|
|
106
|
-
});
|
|
107
|
-
|
|
108
|
-
it("should return null for finalize without llm_input", () => {
|
|
109
|
-
const result = tracker.finalize({
|
|
110
|
-
sessionKey: "sk-unknown",
|
|
111
|
-
success: true,
|
|
112
|
-
});
|
|
113
|
-
expect(result).toBeNull();
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
it("should clean up after finalize", () => {
|
|
117
|
-
tracker.onLlmInput({
|
|
118
|
-
runId: "run-1",
|
|
119
|
-
sessionId: "sess-1",
|
|
120
|
-
provider: "openai",
|
|
121
|
-
model: "gpt-4",
|
|
122
|
-
prompt: "Hello",
|
|
123
|
-
sessionKey: "sk-1",
|
|
124
|
-
});
|
|
125
|
-
|
|
126
|
-
tracker.finalize({ sessionKey: "sk-1", success: true });
|
|
127
|
-
expect(tracker.getActiveRunCount()).toBe(0);
|
|
128
|
-
expect(tracker.getRunIdForSession("sk-1")).toBeUndefined();
|
|
129
|
-
});
|
|
130
|
-
|
|
131
|
-
it("should clear all state", () => {
|
|
132
|
-
tracker.onLlmInput({
|
|
133
|
-
runId: "run-1",
|
|
134
|
-
sessionId: "sess-1",
|
|
135
|
-
provider: "openai",
|
|
136
|
-
model: "gpt-4",
|
|
137
|
-
prompt: "Hello",
|
|
138
|
-
sessionKey: "sk-1",
|
|
139
|
-
});
|
|
140
|
-
tracker.clear();
|
|
141
|
-
expect(tracker.getActiveRunCount()).toBe(0);
|
|
142
|
-
});
|
|
143
|
-
});
|
|
@@ -1,281 +0,0 @@
|
|
|
1
|
-
export type RunId = string;
|
|
2
|
-
export type SessionId = string;
|
|
3
|
-
export type SessionKey = string;
|
|
4
|
-
export type AgentId = string;
|
|
5
|
-
|
|
6
|
-
export type UsageData = {
|
|
7
|
-
input?: number;
|
|
8
|
-
output?: number;
|
|
9
|
-
cacheRead?: number;
|
|
10
|
-
cacheWrite?: number;
|
|
11
|
-
total?: number;
|
|
12
|
-
};
|
|
13
|
-
|
|
14
|
-
export type ToolCallRecord = {
|
|
15
|
-
toolName: string;
|
|
16
|
-
durationMs: number | null;
|
|
17
|
-
success: boolean;
|
|
18
|
-
error: string | null;
|
|
19
|
-
paramHash: string;
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
export type PartialRun = {
|
|
23
|
-
runId: RunId;
|
|
24
|
-
sessionId?: SessionId;
|
|
25
|
-
sessionKey?: SessionKey;
|
|
26
|
-
agentId?: AgentId;
|
|
27
|
-
provider?: string;
|
|
28
|
-
model?: string;
|
|
29
|
-
promptHash?: string;
|
|
30
|
-
promptLength?: number;
|
|
31
|
-
responseLength?: number;
|
|
32
|
-
responseToolCallCount?: number;
|
|
33
|
-
usage?: UsageData;
|
|
34
|
-
toolCalls: ToolCallRecord[];
|
|
35
|
-
success?: boolean;
|
|
36
|
-
error?: string;
|
|
37
|
-
durationMs?: number;
|
|
38
|
-
startedAt: number;
|
|
39
|
-
};
|
|
40
|
-
|
|
41
|
-
export type CompletedRun = {
|
|
42
|
-
runId: RunId;
|
|
43
|
-
sessionId: string;
|
|
44
|
-
sessionKey: string;
|
|
45
|
-
agentId: string;
|
|
46
|
-
provider: string;
|
|
47
|
-
model: string;
|
|
48
|
-
promptHash: string;
|
|
49
|
-
promptLength: number;
|
|
50
|
-
responseLength: number;
|
|
51
|
-
responseToolCallCount: number;
|
|
52
|
-
usage: UsageData;
|
|
53
|
-
toolCalls: ToolCallRecord[];
|
|
54
|
-
success: boolean;
|
|
55
|
-
error: string | null;
|
|
56
|
-
durationMs: number;
|
|
57
|
-
startedAt: number;
|
|
58
|
-
completedAt: number;
|
|
59
|
-
};
|
|
60
|
-
|
|
61
|
-
export type QualitySignalName =
|
|
62
|
-
| "taskCompletion"
|
|
63
|
-
| "toolEfficiency"
|
|
64
|
-
| "responseAppropriateLength"
|
|
65
|
-
| "latencyRelative"
|
|
66
|
-
| "userFeedback";
|
|
67
|
-
|
|
68
|
-
export type QualitySignal = {
|
|
69
|
-
name: QualitySignalName;
|
|
70
|
-
value: number;
|
|
71
|
-
confidence: number;
|
|
72
|
-
weight: number;
|
|
73
|
-
};
|
|
74
|
-
|
|
75
|
-
export type QualityScore = {
|
|
76
|
-
score: number;
|
|
77
|
-
signals: QualitySignal[];
|
|
78
|
-
algorithmVersion: number;
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
export type LearningCategory =
|
|
82
|
-
| "tool_pattern"
|
|
83
|
-
| "error_recovery"
|
|
84
|
-
| "model_affinity"
|
|
85
|
-
| "anti_pattern";
|
|
86
|
-
|
|
87
|
-
export type LearningRecord = {
|
|
88
|
-
id: string;
|
|
89
|
-
runId: RunId;
|
|
90
|
-
category: LearningCategory;
|
|
91
|
-
content: string;
|
|
92
|
-
embedding: number[] | null;
|
|
93
|
-
confidence: number;
|
|
94
|
-
appliedCount: number;
|
|
95
|
-
createdAt: number;
|
|
96
|
-
updatedAt: number;
|
|
97
|
-
};
|
|
98
|
-
|
|
99
|
-
export type FeedbackRecord = {
|
|
100
|
-
id: string;
|
|
101
|
-
runId: RunId;
|
|
102
|
-
source: "explicit" | "implicit";
|
|
103
|
-
score: number;
|
|
104
|
-
createdAt: number;
|
|
105
|
-
};
|
|
106
|
-
|
|
107
|
-
export type MetricsBucket = {
|
|
108
|
-
provider: string;
|
|
109
|
-
model: string;
|
|
110
|
-
bucketHour: string;
|
|
111
|
-
runCount: number;
|
|
112
|
-
successCount: number;
|
|
113
|
-
qualitySum: number;
|
|
114
|
-
qualitySumSq: number;
|
|
115
|
-
latencySum: number;
|
|
116
|
-
latencySumSq: number;
|
|
117
|
-
tokenInputSum: number;
|
|
118
|
-
tokenOutputSum: number;
|
|
119
|
-
};
|
|
120
|
-
|
|
121
|
-
export type RunRow = {
|
|
122
|
-
run_id: string;
|
|
123
|
-
session_id: string;
|
|
124
|
-
session_key: string;
|
|
125
|
-
agent_id: string;
|
|
126
|
-
provider: string;
|
|
127
|
-
model: string;
|
|
128
|
-
prompt_hash: string;
|
|
129
|
-
prompt_length: number;
|
|
130
|
-
response_length: number;
|
|
131
|
-
response_tool_call_count: number;
|
|
132
|
-
usage_input: number;
|
|
133
|
-
usage_output: number;
|
|
134
|
-
usage_cache_read: number;
|
|
135
|
-
usage_cache_write: number;
|
|
136
|
-
usage_total: number;
|
|
137
|
-
success: number;
|
|
138
|
-
error: string | null;
|
|
139
|
-
duration_ms: number;
|
|
140
|
-
quality_score: number | null;
|
|
141
|
-
algorithm_version: number | null;
|
|
142
|
-
started_at: number;
|
|
143
|
-
completed_at: number;
|
|
144
|
-
};
|
|
145
|
-
|
|
146
|
-
export type ToolCallRow = {
|
|
147
|
-
id: number;
|
|
148
|
-
run_id: string;
|
|
149
|
-
tool_name: string;
|
|
150
|
-
duration_ms: number | null;
|
|
151
|
-
success: number;
|
|
152
|
-
error: string | null;
|
|
153
|
-
param_hash: string;
|
|
154
|
-
};
|
|
155
|
-
|
|
156
|
-
export type LearningRow = {
|
|
157
|
-
id: string;
|
|
158
|
-
run_id: string;
|
|
159
|
-
category: string;
|
|
160
|
-
content: string;
|
|
161
|
-
embedding: string | null;
|
|
162
|
-
confidence: number;
|
|
163
|
-
applied_count: number;
|
|
164
|
-
created_at: number;
|
|
165
|
-
updated_at: number;
|
|
166
|
-
};
|
|
167
|
-
|
|
168
|
-
export type FeedbackRow = {
|
|
169
|
-
id: string;
|
|
170
|
-
run_id: string;
|
|
171
|
-
source: string;
|
|
172
|
-
score: number;
|
|
173
|
-
created_at: number;
|
|
174
|
-
};
|
|
175
|
-
|
|
176
|
-
export type MetricsBucketRow = {
|
|
177
|
-
provider: string;
|
|
178
|
-
model: string;
|
|
179
|
-
bucket_hour: string;
|
|
180
|
-
run_count: number;
|
|
181
|
-
success_count: number;
|
|
182
|
-
quality_sum: number;
|
|
183
|
-
quality_sum_sq: number;
|
|
184
|
-
latency_sum: number;
|
|
185
|
-
latency_sum_sq: number;
|
|
186
|
-
token_input_sum: number;
|
|
187
|
-
token_output_sum: number;
|
|
188
|
-
};
|
|
189
|
-
|
|
190
|
-
export type ScoringWeights = {
|
|
191
|
-
taskCompletion: number;
|
|
192
|
-
toolEfficiency: number;
|
|
193
|
-
responseAppropriateLength: number;
|
|
194
|
-
latencyRelative: number;
|
|
195
|
-
userFeedback: number;
|
|
196
|
-
};
|
|
197
|
-
|
|
198
|
-
export type InjectionConfig = {
|
|
199
|
-
maxLearnings: number;
|
|
200
|
-
minRelevance: number;
|
|
201
|
-
maxTokens: number;
|
|
202
|
-
cacheTtlMs: number;
|
|
203
|
-
};
|
|
204
|
-
|
|
205
|
-
export type CaptureConfig = {
|
|
206
|
-
embedPrompts: boolean;
|
|
207
|
-
maxRuns: number;
|
|
208
|
-
};
|
|
209
|
-
|
|
210
|
-
export type DecayConfig = {
|
|
211
|
-
halfLifeDays: number;
|
|
212
|
-
};
|
|
213
|
-
|
|
214
|
-
export type LearningLoopConfig = {
|
|
215
|
-
capture: CaptureConfig;
|
|
216
|
-
scoring: { weights: ScoringWeights };
|
|
217
|
-
injection: InjectionConfig;
|
|
218
|
-
decay: DecayConfig;
|
|
219
|
-
};
|
|
220
|
-
|
|
221
|
-
const DEFAULT_CONFIG: LearningLoopConfig = {
|
|
222
|
-
capture: { embedPrompts: false, maxRuns: 10_000 },
|
|
223
|
-
scoring: {
|
|
224
|
-
weights: {
|
|
225
|
-
taskCompletion: 0.35,
|
|
226
|
-
toolEfficiency: 0.25,
|
|
227
|
-
responseAppropriateLength: 0.1,
|
|
228
|
-
latencyRelative: 0.1,
|
|
229
|
-
userFeedback: 0.2,
|
|
230
|
-
},
|
|
231
|
-
},
|
|
232
|
-
injection: {
|
|
233
|
-
maxLearnings: 5,
|
|
234
|
-
minRelevance: 0.3,
|
|
235
|
-
maxTokens: 500,
|
|
236
|
-
cacheTtlMs: 60_000,
|
|
237
|
-
},
|
|
238
|
-
decay: { halfLifeDays: 30 },
|
|
239
|
-
};
|
|
240
|
-
|
|
241
|
-
export function resolveConfig(pluginConfig?: Record<string, unknown>): LearningLoopConfig {
|
|
242
|
-
if (!pluginConfig) return { ...DEFAULT_CONFIG };
|
|
243
|
-
|
|
244
|
-
const raw = pluginConfig as Partial<{
|
|
245
|
-
capture: Partial<CaptureConfig>;
|
|
246
|
-
scoring: Partial<{ weights: Partial<ScoringWeights> }>;
|
|
247
|
-
injection: Partial<InjectionConfig>;
|
|
248
|
-
decay: Partial<DecayConfig>;
|
|
249
|
-
}>;
|
|
250
|
-
|
|
251
|
-
return {
|
|
252
|
-
capture: { ...DEFAULT_CONFIG.capture, ...raw.capture },
|
|
253
|
-
scoring: {
|
|
254
|
-
weights: { ...DEFAULT_CONFIG.scoring.weights, ...raw.scoring?.weights },
|
|
255
|
-
},
|
|
256
|
-
injection: { ...DEFAULT_CONFIG.injection, ...raw.injection },
|
|
257
|
-
decay: { ...DEFAULT_CONFIG.decay, ...raw.decay },
|
|
258
|
-
};
|
|
259
|
-
}
|
|
260
|
-
|
|
261
|
-
export type EdgeType = "T" | "S" | "C" | "U" | "X" | "R";
|
|
262
|
-
|
|
263
|
-
export interface LearningEdge {
|
|
264
|
-
id: number;
|
|
265
|
-
sourceId: string;
|
|
266
|
-
targetId: string;
|
|
267
|
-
edgeType: EdgeType;
|
|
268
|
-
weight: number;
|
|
269
|
-
createdAt: number;
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
export interface EdgeRow {
|
|
273
|
-
id: number;
|
|
274
|
-
source_id: string;
|
|
275
|
-
target_id: string;
|
|
276
|
-
edge_type: string;
|
|
277
|
-
weight: number;
|
|
278
|
-
created_at: number;
|
|
279
|
-
}
|
|
280
|
-
|
|
281
|
-
export const ALGORITHM_VERSION = 1;
|
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"id": "learning-loop",
|
|
3
|
-
"configSchema": {
|
|
4
|
-
"type": "object",
|
|
5
|
-
"additionalProperties": false,
|
|
6
|
-
"properties": {
|
|
7
|
-
"capture": {
|
|
8
|
-
"type": "object",
|
|
9
|
-
"properties": {
|
|
10
|
-
"embedPrompts": { "type": "boolean" },
|
|
11
|
-
"maxRuns": { "type": "number" }
|
|
12
|
-
}
|
|
13
|
-
},
|
|
14
|
-
"scoring": {
|
|
15
|
-
"type": "object",
|
|
16
|
-
"properties": {
|
|
17
|
-
"weights": {
|
|
18
|
-
"type": "object",
|
|
19
|
-
"properties": {
|
|
20
|
-
"taskCompletion": { "type": "number" },
|
|
21
|
-
"toolEfficiency": { "type": "number" },
|
|
22
|
-
"responseAppropriateLength": { "type": "number" },
|
|
23
|
-
"latencyRelative": { "type": "number" },
|
|
24
|
-
"userFeedback": { "type": "number" }
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
},
|
|
29
|
-
"injection": {
|
|
30
|
-
"type": "object",
|
|
31
|
-
"properties": {
|
|
32
|
-
"maxLearnings": { "type": "number" },
|
|
33
|
-
"minRelevance": { "type": "number" },
|
|
34
|
-
"maxTokens": { "type": "number" },
|
|
35
|
-
"cacheTtlMs": { "type": "number" }
|
|
36
|
-
}
|
|
37
|
-
},
|
|
38
|
-
"decay": {
|
|
39
|
-
"type": "object",
|
|
40
|
-
"properties": {
|
|
41
|
-
"halfLifeDays": { "type": "number" }
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
}
|
|
46
|
-
}
|