@symerian/symi 3.0.17 → 3.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{audio-preflight-CBDFctZN.js → audio-preflight-BfmZbg4Y.js} +4 -4
- package/dist/{audio-preflight-gsZSpG-6.js → audio-preflight-DcuC-liM.js} +4 -4
- package/dist/build-info.json +3 -3
- package/dist/bundled/boot-md/handler.js +8 -8
- package/dist/bundled/session-memory/handler.js +7 -7
- package/dist/canvas-host/a2ui/.bundle.hash +1 -1
- package/dist/{chrome-nPMY1XTJ.js → chrome-Bo7cbvFK.js} +5 -5
- package/dist/{chrome-BjVab8gM.js → chrome-DYp18Q0t.js} +5 -5
- package/dist/{deliver-D-QFqm31.js → deliver-ChSIbiMM.js} +1 -1
- package/dist/{deliver-B4-bcot9.js → deliver-DEgRQM4J.js} +1 -1
- package/dist/extensionAPI.js +7 -7
- package/dist/{image-CDwtQjmt.js → image-Bx-hvoNJ.js} +1 -1
- package/dist/{image-CcS-vzTA.js → image-CQl_mjWk.js} +1 -1
- package/dist/llm-slug-generator.js +7 -7
- package/dist/{manager-BnEdHzmO.js → manager-D_pn0urG.js} +1 -1
- package/dist/{manager-09r0qPze.js → manager-YQxK2t0C.js} +1 -1
- package/dist/{pi-embedded-CWsY69-4.js → pi-embedded-CLw_ZzEZ.js} +16 -16
- package/dist/{pi-embedded-helpers-BBMy-lqr.js → pi-embedded-helpers-B5I53aw6.js} +4 -4
- package/dist/{pi-embedded-helpers-ChEYbgVj.js → pi-embedded-helpers-sUAEIC9X.js} +4 -4
- package/dist/plugin-sdk/{accounts-BfyWsC_i.js → accounts-CWFytwbR.js} +3 -3
- package/dist/plugin-sdk/{active-listener-DcJW7xAT.js → active-listener-BkZ4jHrL.js} +2 -2
- package/dist/plugin-sdk/{agent-scope-ChbGV6of.js → agent-scope-C9gfY_Gk.js} +2 -2
- package/dist/plugin-sdk/{audio-preflight-D3GtNLqW.js → audio-preflight-HKbdzXLZ.js} +21 -21
- package/dist/plugin-sdk/{bindings-CN2Qmefj.js → bindings-BaKIqPPy.js} +2 -2
- package/dist/plugin-sdk/{channel-web-DTyqujjA.js → channel-web-D5nWiTH1.js} +18 -18
- package/dist/plugin-sdk/{chrome-BKzAKr3K.js → chrome-klTSnz-9.js} +3 -3
- package/dist/plugin-sdk/{chunk-DhDkBujV.js → chunk-BbrYSny_.js} +1 -1
- package/dist/plugin-sdk/{command-format-CVrYFyZS.js → command-format-BN6tyZt6.js} +1 -1
- package/dist/plugin-sdk/{commands-registry-17yfZkHZ.js → commands-registry-CTzKKtY6.js} +4 -4
- package/dist/plugin-sdk/{config-7wk65zKC.js → config-Crv2qEdJ.js} +9 -9
- package/dist/plugin-sdk/{consolidate-exbAW0ml.js → consolidate-DT1QH65Q.js} +2 -2
- package/dist/plugin-sdk/{deliver-TxAcw7J5.js → deliver-7rOvAlrc.js} +12 -12
- package/dist/plugin-sdk/{diagnostic-Debx4frd.js → diagnostic-0nsxhWp7.js} +1 -1
- package/dist/plugin-sdk/{fs-safe-wBYbAkJF.js → fs-safe-DfWYBeWF.js} +1 -1
- package/dist/plugin-sdk/{gemini-auth-7U2pm2Ky.js → gemini-auth-C0N0_u49.js} +1 -1
- package/dist/plugin-sdk/{image-BtDVmYA5.js → image-WOSl2apK.js} +4 -4
- package/dist/plugin-sdk/index.js +43 -43
- package/dist/plugin-sdk/{ir-CKMvRrGW.js → ir-9J84MTls.js} +4 -4
- package/dist/plugin-sdk/{local-roots-c_gaPs01.js → local-roots-OLRDbvyY.js} +3 -3
- package/dist/plugin-sdk/{login-DUym1Jy0.js → login-C7x4q0i2.js} +7 -7
- package/dist/plugin-sdk/{login-qr-B-WBdvrX.js → login-qr-Dv5_MoAW.js} +9 -9
- package/dist/plugin-sdk/{manager-B71SCzos.js → manager-C83tK17x.js} +8 -8
- package/dist/plugin-sdk/{manifest-registry-Dnic6Chh.js → manifest-registry-CJMV-PI7.js} +1 -1
- package/dist/plugin-sdk/{markdown-tables-Dur7OTlM.js → markdown-tables-DXNKz5y_.js} +1 -1
- package/dist/plugin-sdk/{message-channel-BrAhJJV_.js → message-channel-aGy1HbQQ.js} +1 -1
- package/dist/plugin-sdk/{model-selection-B9qaVQSJ.js → model-selection-C-3-tpe7.js} +4 -4
- package/dist/plugin-sdk/{outbound-DB1wDM8b.js → outbound-DquCeSy5.js} +6 -6
- package/dist/plugin-sdk/{pi-auth-json-ZO118hoy.js → pi-auth-json-D9PDCXGn.js} +1 -1
- package/dist/plugin-sdk/{pi-embedded-helpers-s_U0Un7j.js → pi-embedded-helpers-D3ygfH7l.js} +16 -16
- package/dist/plugin-sdk/{plugins-DF81oSaI.js → plugins-DOwnSg9D.js} +4 -4
- package/dist/plugin-sdk/{pw-ai-CTwP02uv.js → pw-ai-rlengLjb.js} +8 -8
- package/dist/plugin-sdk/{qmd-manager-CBaSGant.js → qmd-manager-BzxFjRFa.js} +4 -4
- package/dist/plugin-sdk/{registry-CZVURNhF.js → registry-5iFfixlB.js} +2 -2
- package/dist/plugin-sdk/{replies-hwRbkU3z.js → replies-BXOzO_H5.js} +7 -7
- package/dist/plugin-sdk/{reply-prefix-CaXmzZlx.js → reply-prefix-INAKTqCU.js} +1 -1
- package/dist/plugin-sdk/{resolve-outbound-target-fxVSOBmk.js → resolve-outbound-target-DvbxHtqp.js} +2 -2
- package/dist/plugin-sdk/{resolve-route-ClCyiOeu.js → resolve-route-URXlY3AK.js} +3 -3
- package/dist/plugin-sdk/{runner-Cq5jvwQ7.js → runner-Bv0_DWoH.js} +9 -9
- package/dist/plugin-sdk/{session-B_TkB65Y.js → session-C3r8l7ou.js} +4 -4
- package/dist/plugin-sdk/{skill-commands-0LF9HTGr.js → skill-commands-KjLUGIdZ.js} +5 -5
- package/dist/plugin-sdk/{skills-BIT_O7J0.js → skills-BrsD4L5c.js} +7 -7
- package/dist/plugin-sdk/{sqlite-Bx5Y5U5X.js → sqlite-CjW7ME1H.js} +1 -1
- package/dist/plugin-sdk/{subsystem-CXqYeDy-.js → subsystem-DcOg1xJr.js} +1 -1
- package/dist/plugin-sdk/{synthesis-DtsYAj1E.js → synthesis-CY7YAasV.js} +38 -38
- package/dist/plugin-sdk/{target-errors-B8mokOeH.js → target-errors-BVWJGWFq.js} +2 -2
- package/dist/plugin-sdk/{thinking-Ca0DhqzO.js → thinking-CtsTDPOi.js} +3 -3
- package/dist/plugin-sdk/{tokens-CvlONEqh.js → tokens-8lqOTZCB.js} +1 -1
- package/dist/plugin-sdk/{tool-images-DpBaWEHT.js → tool-images-Cl_rGIUZ.js} +2 -2
- package/dist/plugin-sdk/{tool-loop-detection-BOvUFa0f.js → tool-loop-detection-Da4WUT_P.js} +2 -2
- package/dist/plugin-sdk/{unified-runner-CnM7lyNd.js → unified-runner-nwMnsZyj.js} +60 -60
- package/dist/plugin-sdk/web-BlweOZDp.js +54 -0
- package/dist/plugin-sdk/{whatsapp-actions-CvnfsFJm.js → whatsapp-actions-DpfaGYs7.js} +21 -21
- package/dist/{pw-ai-BW8_KeDf.js → pw-ai-BqxJG-Wh.js} +1 -1
- package/dist/{pw-ai-j9IE1K0-.js → pw-ai-C-NSGye0.js} +1 -1
- package/dist/{runner-8ALr2UII.js → runner-COGFTeDw.js} +1 -1
- package/dist/{runner-C4-9kFdR.js → runner-DhCi2lT1.js} +1 -1
- package/dist/{synthesis-Cph3LhA1.js → synthesis-CXZu24Vx.js} +7 -7
- package/dist/{synthesis-Cus0A2dL.js → synthesis-DrPxcMlQ.js} +7 -7
- package/dist/{unified-runner-CX80YMTk.js → unified-runner-iByUazvW.js} +16 -16
- package/dist/{web-ChozvJ7I.js → web-EsMQBIYf.js} +7 -7
- package/dist/{web-DFlsbXmQ.js → web-PPg5y6xI.js} +7 -7
- package/package.json +1 -1
- package/dist/plugin-sdk/web-CIPJBHAU.js +0 -54
- package/extensions/copilot-proxy/README.md +0 -24
- package/extensions/copilot-proxy/index.ts +0 -154
- package/extensions/copilot-proxy/node_modules/.bin/symi +0 -21
- package/extensions/copilot-proxy/package.json +0 -15
- package/extensions/copilot-proxy/symi.plugin.json +0 -9
- package/extensions/device-pair/index.ts +0 -642
- package/extensions/device-pair/symi.plugin.json +0 -20
- package/extensions/diagnostics-otel/index.ts +0 -15
- package/extensions/diagnostics-otel/node_modules/.bin/acorn +0 -21
- package/extensions/diagnostics-otel/node_modules/.bin/symi +0 -21
- package/extensions/diagnostics-otel/package.json +0 -27
- package/extensions/diagnostics-otel/src/service.test.ts +0 -290
- package/extensions/diagnostics-otel/src/service.ts +0 -666
- package/extensions/diagnostics-otel/symi.plugin.json +0 -8
- package/extensions/google-antigravity-auth/README.md +0 -24
- package/extensions/google-antigravity-auth/index.ts +0 -424
- package/extensions/google-antigravity-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-antigravity-auth/package.json +0 -15
- package/extensions/google-antigravity-auth/symi.plugin.json +0 -9
- package/extensions/google-gemini-cli-auth/README.md +0 -35
- package/extensions/google-gemini-cli-auth/index.ts +0 -75
- package/extensions/google-gemini-cli-auth/node_modules/.bin/symi +0 -21
- package/extensions/google-gemini-cli-auth/oauth.test.ts +0 -162
- package/extensions/google-gemini-cli-auth/oauth.ts +0 -636
- package/extensions/google-gemini-cli-auth/package.json +0 -15
- package/extensions/google-gemini-cli-auth/symi.plugin.json +0 -9
- package/extensions/learning-loop/index.ts +0 -159
- package/extensions/learning-loop/node_modules/.bin/symi +0 -21
- package/extensions/learning-loop/package.json +0 -18
- package/extensions/learning-loop/src/analytics/gateway-methods.ts +0 -230
- package/extensions/learning-loop/src/analytics/metrics-aggregator.ts +0 -153
- package/extensions/learning-loop/src/capture/run-tracker.ts +0 -181
- package/extensions/learning-loop/src/capture/serializer.ts +0 -74
- package/extensions/learning-loop/src/db.ts +0 -583
- package/extensions/learning-loop/src/feedback/explicit-feedback.ts +0 -58
- package/extensions/learning-loop/src/feedback/implicit-signals.ts +0 -89
- package/extensions/learning-loop/src/graph/edge-inference.ts +0 -189
- package/extensions/learning-loop/src/graph/graph-retrieval.ts +0 -144
- package/extensions/learning-loop/src/graph/graph-store.ts +0 -183
- package/extensions/learning-loop/src/hooks.ts +0 -244
- package/extensions/learning-loop/src/injection/cache.ts +0 -73
- package/extensions/learning-loop/src/injection/context-injector.ts +0 -104
- package/extensions/learning-loop/src/injection/prompt-builder.ts +0 -43
- package/extensions/learning-loop/src/learning/embedding-bridge.ts +0 -54
- package/extensions/learning-loop/src/learning/learning-extractor.ts +0 -217
- package/extensions/learning-loop/src/learning/learning-store.ts +0 -158
- package/extensions/learning-loop/src/learning/retrieval.ts +0 -87
- package/extensions/learning-loop/src/math/confidence-intervals.ts +0 -62
- package/extensions/learning-loop/src/math/ewma.ts +0 -51
- package/extensions/learning-loop/src/math/weighted-scorer.ts +0 -42
- package/extensions/learning-loop/src/schema.ts +0 -176
- package/extensions/learning-loop/src/scoring/normalization.ts +0 -32
- package/extensions/learning-loop/src/scoring/quality-engine.ts +0 -78
- package/extensions/learning-loop/src/scoring/signal-extractors.ts +0 -155
- package/extensions/learning-loop/src/test/context-injector.test.ts +0 -142
- package/extensions/learning-loop/src/test/fixes.test.ts +0 -1286
- package/extensions/learning-loop/src/test/graph.test.ts +0 -711
- package/extensions/learning-loop/src/test/integration.test.ts +0 -312
- package/extensions/learning-loop/src/test/learning-store.test.ts +0 -191
- package/extensions/learning-loop/src/test/math.test.ts +0 -148
- package/extensions/learning-loop/src/test/quality-engine.test.ts +0 -231
- package/extensions/learning-loop/src/test/run-tracker.test.ts +0 -143
- package/extensions/learning-loop/src/types.ts +0 -281
- package/extensions/learning-loop/symi.plugin.json +0 -46
- package/extensions/llm-task/README.md +0 -97
- package/extensions/llm-task/index.ts +0 -6
- package/extensions/llm-task/package.json +0 -12
- package/extensions/llm-task/src/llm-task-tool.test.ts +0 -138
- package/extensions/llm-task/src/llm-task-tool.ts +0 -249
- package/extensions/llm-task/symi.plugin.json +0 -21
- package/extensions/memory-lancedb/config.ts +0 -161
- package/extensions/memory-lancedb/index.test.ts +0 -330
- package/extensions/memory-lancedb/index.ts +0 -670
- package/extensions/memory-lancedb/node_modules/.bin/arrow2csv +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/openai +0 -21
- package/extensions/memory-lancedb/node_modules/.bin/symi +0 -21
- package/extensions/memory-lancedb/package.json +0 -20
- package/extensions/memory-lancedb/symi.plugin.json +0 -71
- package/extensions/minimax-portal-auth/README.md +0 -33
- package/extensions/minimax-portal-auth/index.ts +0 -161
- package/extensions/minimax-portal-auth/node_modules/.bin/symi +0 -21
- package/extensions/minimax-portal-auth/oauth.ts +0 -247
- package/extensions/minimax-portal-auth/package.json +0 -15
- package/extensions/minimax-portal-auth/symi.plugin.json +0 -9
- package/extensions/model-equalizer/index.ts +0 -80
- package/extensions/model-equalizer/skills/model-equalizer/SKILL.md +0 -58
- package/extensions/model-equalizer/src/detection.ts +0 -62
- package/extensions/model-equalizer/src/enhancer.ts +0 -63
- package/extensions/model-equalizer/src/test/detection.test.ts +0 -218
- package/extensions/model-equalizer/src/test/enhancer.test.ts +0 -137
- package/extensions/model-equalizer/src/test/integration.test.ts +0 -185
- package/extensions/model-equalizer/src/types.ts +0 -24
- package/extensions/model-equalizer/symi.plugin.json +0 -12
- package/extensions/phone-control/index.ts +0 -421
- package/extensions/phone-control/symi.plugin.json +0 -10
- package/extensions/pipeline/README.md +0 -75
- package/extensions/pipeline/SKILL.md +0 -97
- package/extensions/pipeline/index.ts +0 -18
- package/extensions/pipeline/package.json +0 -11
- package/extensions/pipeline/src/pipeline-tool.test.ts +0 -345
- package/extensions/pipeline/src/pipeline-tool.ts +0 -266
- package/extensions/pipeline/src/windows-spawn.test.ts +0 -148
- package/extensions/pipeline/src/windows-spawn.ts +0 -193
- package/extensions/pipeline/symi.plugin.json +0 -10
- package/extensions/qwen-portal-auth/README.md +0 -24
- package/extensions/qwen-portal-auth/index.ts +0 -134
- package/extensions/qwen-portal-auth/oauth.ts +0 -190
- package/extensions/qwen-portal-auth/symi.plugin.json +0 -9
- package/extensions/talk-voice/index.ts +0 -150
- package/extensions/talk-voice/symi.plugin.json +0 -10
- package/extensions/thread-ownership/index.test.ts +0 -180
- package/extensions/thread-ownership/index.ts +0 -133
- package/extensions/thread-ownership/symi.plugin.json +0 -28
- package/skills/1password/SKILL.md +0 -71
- package/skills/1password/references/cli-examples.md +0 -29
- package/skills/1password/references/get-started.md +0 -17
- package/skills/apple-notes/SKILL.md +0 -78
- package/skills/apple-reminders/SKILL.md +0 -119
- package/skills/bear-notes/SKILL.md +0 -108
- package/skills/blogwatcher/SKILL.md +0 -70
- package/skills/blucli/SKILL.md +0 -48
- package/skills/bluebubbles/SKILL.md +0 -132
- package/skills/camsnap/SKILL.md +0 -46
- package/skills/canvas/SKILL.md +0 -204
- package/skills/connect-email/SKILL.md +0 -142
- package/skills/document-generation/SKILL.md +0 -83
- package/skills/eightctl/SKILL.md +0 -51
- package/skills/food-order/SKILL.md +0 -49
- package/skills/gemini/SKILL.md +0 -44
- package/skills/gh-issues/SKILL.md +0 -865
- package/skills/gifgrep/SKILL.md +0 -80
- package/skills/github/SKILL.md +0 -164
- package/skills/gog/SKILL.md +0 -117
- package/skills/goplaces/SKILL.md +0 -53
- package/skills/healthcheck/SKILL.md +0 -246
- package/skills/himalaya/SKILL.md +0 -258
- package/skills/himalaya/references/configuration.md +0 -184
- package/skills/himalaya/references/message-composition.md +0 -199
- package/skills/imsg/SKILL.md +0 -122
- package/skills/long-task/SKILL.md +0 -58
- package/skills/long-task/scripts/detach-task.sh +0 -187
- package/skills/nano-banana-pro/SKILL.md +0 -59
- package/skills/nano-banana-pro/scripts/generate_image.py +0 -184
- package/skills/nano-pdf/SKILL.md +0 -39
- package/skills/notion/SKILL.md +0 -173
- package/skills/obsidian/SKILL.md +0 -82
- package/skills/openai-image-gen/SKILL.md +0 -90
- package/skills/openai-image-gen/scripts/gen.py +0 -240
- package/skills/openai-whisper/SKILL.md +0 -39
- package/skills/openai-whisper-api/SKILL.md +0 -53
- package/skills/openai-whisper-api/scripts/transcribe.sh +0 -85
- package/skills/openhue/SKILL.md +0 -113
- package/skills/oracle/SKILL.md +0 -126
- package/skills/ordercli/SKILL.md +0 -79
- package/skills/peekaboo/SKILL.md +0 -191
- package/skills/reactions-extensive/SKILL.md +0 -30
- package/skills/reactions-minimal/SKILL.md +0 -31
- package/skills/safe-edit/SKILL.md +0 -51
- package/skills/sag/SKILL.md +0 -88
- package/skills/sherpa-onnx-tts/SKILL.md +0 -104
- package/skills/sherpa-onnx-tts/bin/sherpa-onnx-tts +0 -178
- package/skills/songsee/SKILL.md +0 -50
- package/skills/sonoscli/SKILL.md +0 -66
- package/skills/spotify-player/SKILL.md +0 -65
- package/skills/symihub/SKILL.md +0 -78
- package/skills/things-mac/SKILL.md +0 -87
- package/skills/tmux/SKILL.md +0 -153
- package/skills/tmux/scripts/find-sessions.sh +0 -112
- package/skills/tmux/scripts/wait-for-text.sh +0 -83
- package/skills/trello/SKILL.md +0 -96
- package/skills/video-frames/SKILL.md +0 -47
- package/skills/video-frames/scripts/frame.sh +0 -81
- package/skills/voice-call/SKILL.md +0 -46
- package/skills/wacli/SKILL.md +0 -73
- package/skills/weather/SKILL.md +0 -113
- package/skills/xurl/SKILL.md +0 -462
|
@@ -1,1286 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Comprehensive tests for the four follow-up fixes:
|
|
3
|
-
* A. Shared reconstructCompletedRun utility
|
|
4
|
-
* B. Pruning preserves learnings (sentinel __pruned__ row) + pruneStaleLearnings
|
|
5
|
-
* C. Lazy bridge available() state tracking
|
|
6
|
-
* D. Tool pattern matching with Jaccard similarity
|
|
7
|
-
* E. Feedback rescoring updates DB (explicit + implicit paths)
|
|
8
|
-
* F. Embeddings populated when bridge available + backfill DB methods
|
|
9
|
-
*/
|
|
10
|
-
|
|
11
|
-
import fs from "node:fs";
|
|
12
|
-
import os from "node:os";
|
|
13
|
-
import path from "node:path";
|
|
14
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
15
|
-
import { reconstructCompletedRun, normalizeCompletedRun } from "../capture/serializer.js";
|
|
16
|
-
import { createDatabaseManager } from "../db.js";
|
|
17
|
-
import { createExplicitFeedbackHandler } from "../feedback/explicit-feedback.js";
|
|
18
|
-
import { createEmbeddingBridge, type EmbeddingBridge } from "../learning/embedding-bridge.js";
|
|
19
|
-
import { createLearningExtractor } from "../learning/learning-extractor.js";
|
|
20
|
-
import { createLearningStore } from "../learning/learning-store.js";
|
|
21
|
-
import { createQualityEngine } from "../scoring/quality-engine.js";
|
|
22
|
-
import type { CompletedRun, LearningLoopConfig, RunRow, ToolCallRow } from "../types.js";
|
|
23
|
-
|
|
24
|
-
const TEST_CONFIG: LearningLoopConfig = {
|
|
25
|
-
capture: { embedPrompts: false, maxRuns: 1000 },
|
|
26
|
-
scoring: {
|
|
27
|
-
weights: {
|
|
28
|
-
taskCompletion: 0.35,
|
|
29
|
-
toolEfficiency: 0.25,
|
|
30
|
-
responseAppropriateLength: 0.1,
|
|
31
|
-
latencyRelative: 0.1,
|
|
32
|
-
userFeedback: 0.2,
|
|
33
|
-
},
|
|
34
|
-
},
|
|
35
|
-
injection: { maxLearnings: 5, minRelevance: 0.1, maxTokens: 500, cacheTtlMs: 60000 },
|
|
36
|
-
decay: { halfLifeDays: 30 },
|
|
37
|
-
};
|
|
38
|
-
|
|
39
|
-
const logger = {
|
|
40
|
-
info: () => {},
|
|
41
|
-
warn: () => {},
|
|
42
|
-
error: () => {},
|
|
43
|
-
debug: () => {},
|
|
44
|
-
};
|
|
45
|
-
|
|
46
|
-
function makeRun(overrides: Partial<CompletedRun> = {}): CompletedRun {
|
|
47
|
-
return {
|
|
48
|
-
runId: `run_${Date.now()}_${Math.random().toString(36).slice(2, 6)}`,
|
|
49
|
-
sessionId: "sess-1",
|
|
50
|
-
sessionKey: "sk-1",
|
|
51
|
-
agentId: "agent-1",
|
|
52
|
-
provider: "openai",
|
|
53
|
-
model: "gpt-4",
|
|
54
|
-
promptHash: "hash-default",
|
|
55
|
-
promptLength: 100,
|
|
56
|
-
responseLength: 200,
|
|
57
|
-
responseToolCallCount: 2,
|
|
58
|
-
usage: { input: 50, output: 100, cacheRead: 0, cacheWrite: 0, total: 150 },
|
|
59
|
-
toolCalls: [],
|
|
60
|
-
success: true,
|
|
61
|
-
error: null,
|
|
62
|
-
durationMs: 1000,
|
|
63
|
-
startedAt: Date.now() - 1000,
|
|
64
|
-
completedAt: Date.now(),
|
|
65
|
-
...overrides,
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
const testScore = { score: 0.85, signals: [], algorithmVersion: 1 } as const;
|
|
70
|
-
|
|
71
|
-
// ---------------------------------------------------------------------------
|
|
72
|
-
// A. reconstructCompletedRun shared utility
|
|
73
|
-
// ---------------------------------------------------------------------------
|
|
74
|
-
describe("reconstructCompletedRun", () => {
|
|
75
|
-
let tmpDir: string;
|
|
76
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
77
|
-
|
|
78
|
-
beforeEach(() => {
|
|
79
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-reconstruct-"));
|
|
80
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
81
|
-
});
|
|
82
|
-
|
|
83
|
-
afterEach(() => {
|
|
84
|
-
db.close();
|
|
85
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it("should roundtrip a run through DB and reconstruct identically", () => {
|
|
89
|
-
const original = makeRun({
|
|
90
|
-
runId: "roundtrip-1",
|
|
91
|
-
provider: "claude",
|
|
92
|
-
model: "claude-3-opus",
|
|
93
|
-
promptHash: "abc123",
|
|
94
|
-
promptLength: 500,
|
|
95
|
-
responseLength: 1200,
|
|
96
|
-
responseToolCallCount: 3,
|
|
97
|
-
usage: { input: 200, output: 400, cacheRead: 50, cacheWrite: 10, total: 660 },
|
|
98
|
-
toolCalls: [
|
|
99
|
-
{ toolName: "Read", durationMs: 15, success: true, error: null, paramHash: "ph1" },
|
|
100
|
-
{
|
|
101
|
-
toolName: "Edit",
|
|
102
|
-
durationMs: 42,
|
|
103
|
-
success: false,
|
|
104
|
-
error: "file not found",
|
|
105
|
-
paramHash: "ph2",
|
|
106
|
-
},
|
|
107
|
-
{ toolName: "Bash", durationMs: 100, success: true, error: null, paramHash: "ph3" },
|
|
108
|
-
],
|
|
109
|
-
success: true,
|
|
110
|
-
error: null,
|
|
111
|
-
durationMs: 2500,
|
|
112
|
-
startedAt: 1700000000000,
|
|
113
|
-
completedAt: 1700000002500,
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
db.insertRun(original, { score: 0.8, signals: [], algorithmVersion: 1 });
|
|
117
|
-
const row = db.getRun("roundtrip-1")!;
|
|
118
|
-
const toolCalls = db.getToolCalls("roundtrip-1");
|
|
119
|
-
const reconstructed = reconstructCompletedRun(row, toolCalls);
|
|
120
|
-
|
|
121
|
-
expect(reconstructed.runId).toBe(original.runId);
|
|
122
|
-
expect(reconstructed.provider).toBe(original.provider);
|
|
123
|
-
expect(reconstructed.model).toBe(original.model);
|
|
124
|
-
expect(reconstructed.promptHash).toBe(original.promptHash);
|
|
125
|
-
expect(reconstructed.promptLength).toBe(original.promptLength);
|
|
126
|
-
expect(reconstructed.responseLength).toBe(original.responseLength);
|
|
127
|
-
expect(reconstructed.responseToolCallCount).toBe(original.responseToolCallCount);
|
|
128
|
-
expect(reconstructed.usage).toEqual(original.usage);
|
|
129
|
-
expect(reconstructed.success).toBe(original.success);
|
|
130
|
-
expect(reconstructed.error).toBe(original.error);
|
|
131
|
-
expect(reconstructed.durationMs).toBe(original.durationMs);
|
|
132
|
-
expect(reconstructed.startedAt).toBe(original.startedAt);
|
|
133
|
-
expect(reconstructed.completedAt).toBe(original.completedAt);
|
|
134
|
-
|
|
135
|
-
expect(reconstructed.toolCalls).toHaveLength(3);
|
|
136
|
-
expect(reconstructed.toolCalls[0]!.toolName).toBe("Read");
|
|
137
|
-
expect(reconstructed.toolCalls[1]!.success).toBe(false);
|
|
138
|
-
expect(reconstructed.toolCalls[1]!.error).toBe("file not found");
|
|
139
|
-
expect(reconstructed.toolCalls[2]!.durationMs).toBe(100);
|
|
140
|
-
});
|
|
141
|
-
|
|
142
|
-
it("should handle a run with no tool calls", () => {
|
|
143
|
-
const original = makeRun({ runId: "no-tools", toolCalls: [], responseToolCallCount: 0 });
|
|
144
|
-
db.insertRun(original, testScore);
|
|
145
|
-
|
|
146
|
-
const row = db.getRun("no-tools")!;
|
|
147
|
-
const toolCalls = db.getToolCalls("no-tools");
|
|
148
|
-
const reconstructed = reconstructCompletedRun(row, toolCalls);
|
|
149
|
-
|
|
150
|
-
expect(reconstructed.toolCalls).toHaveLength(0);
|
|
151
|
-
});
|
|
152
|
-
|
|
153
|
-
it("should reconstruct error run with null error field correctly", () => {
|
|
154
|
-
const original = makeRun({ runId: "failed-run", success: false, error: "timeout" });
|
|
155
|
-
db.insertRun(original, testScore);
|
|
156
|
-
|
|
157
|
-
const row = db.getRun("failed-run")!;
|
|
158
|
-
const reconstructed = reconstructCompletedRun(row, []);
|
|
159
|
-
|
|
160
|
-
expect(reconstructed.success).toBe(false);
|
|
161
|
-
expect(reconstructed.error).toBe("timeout");
|
|
162
|
-
});
|
|
163
|
-
|
|
164
|
-
it("should convert success from integer to boolean", () => {
|
|
165
|
-
// DB stores success as 0/1 integer; reconstructCompletedRun must return boolean
|
|
166
|
-
const run = makeRun({ runId: "bool-check", success: true });
|
|
167
|
-
db.insertRun(run, testScore);
|
|
168
|
-
|
|
169
|
-
const row = db.getRun("bool-check")!;
|
|
170
|
-
// DB row has integer
|
|
171
|
-
expect(typeof row.success).toBe("number");
|
|
172
|
-
|
|
173
|
-
const reconstructed = reconstructCompletedRun(row, []);
|
|
174
|
-
expect(typeof reconstructed.success).toBe("boolean");
|
|
175
|
-
expect(reconstructed.success).toBe(true);
|
|
176
|
-
});
|
|
177
|
-
|
|
178
|
-
it("should be usable from explicit-feedback path (integration)", () => {
|
|
179
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
180
|
-
const run = makeRun({
|
|
181
|
-
runId: "fb-reconstruct",
|
|
182
|
-
toolCalls: [{ toolName: "Glob", durationMs: 5, success: true, error: null, paramHash: "g1" }],
|
|
183
|
-
responseToolCallCount: 1,
|
|
184
|
-
});
|
|
185
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
186
|
-
|
|
187
|
-
// The explicit feedback handler internally uses reconstructCompletedRun
|
|
188
|
-
const handler = createExplicitFeedbackHandler({ db, qualityEngine });
|
|
189
|
-
const fb = handler.submitFeedback("fb-reconstruct", 4);
|
|
190
|
-
|
|
191
|
-
expect(fb).not.toBeNull();
|
|
192
|
-
expect(fb!.score).toBe(4);
|
|
193
|
-
|
|
194
|
-
// Score should have been updated via the reconstructed run
|
|
195
|
-
const updatedRow = db.getRun("fb-reconstruct")!;
|
|
196
|
-
expect(updatedRow.quality_score).not.toBeNull();
|
|
197
|
-
});
|
|
198
|
-
});
|
|
199
|
-
|
|
200
|
-
// ---------------------------------------------------------------------------
|
|
201
|
-
// B. Pruning preserves learnings (sentinel row)
|
|
202
|
-
// ---------------------------------------------------------------------------
|
|
203
|
-
describe("Pruning preserves learnings", () => {
|
|
204
|
-
let tmpDir: string;
|
|
205
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
206
|
-
|
|
207
|
-
beforeEach(() => {
|
|
208
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-prune-"));
|
|
209
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
afterEach(() => {
|
|
213
|
-
db.close();
|
|
214
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
215
|
-
});
|
|
216
|
-
|
|
217
|
-
it("should set run_id to '__pruned__' on learnings when their run is pruned", () => {
|
|
218
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
219
|
-
const learningStore = createLearningStore({ db });
|
|
220
|
-
|
|
221
|
-
const run = makeRun({ runId: "run-to-prune" });
|
|
222
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
223
|
-
|
|
224
|
-
const learningId = learningStore.addLearning({
|
|
225
|
-
runId: "run-to-prune",
|
|
226
|
-
category: "tool_pattern",
|
|
227
|
-
content: "This learning should survive pruning",
|
|
228
|
-
embedding: null,
|
|
229
|
-
confidence: 0.8,
|
|
230
|
-
});
|
|
231
|
-
expect(learningId).not.toBeNull();
|
|
232
|
-
|
|
233
|
-
const pruned = db.pruneOldRuns(0);
|
|
234
|
-
expect(pruned).toBe(1);
|
|
235
|
-
expect(db.getRunCount()).toBe(0);
|
|
236
|
-
|
|
237
|
-
const row = db.getLearning(learningId!);
|
|
238
|
-
expect(row).toBeDefined();
|
|
239
|
-
expect(row!.run_id).toBe("__pruned__");
|
|
240
|
-
});
|
|
241
|
-
|
|
242
|
-
it("should preserve multiple learnings across multiple pruned runs", () => {
|
|
243
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
244
|
-
const learningStore = createLearningStore({ db });
|
|
245
|
-
|
|
246
|
-
// Insert 3 runs with learnings
|
|
247
|
-
const learningIds: string[] = [];
|
|
248
|
-
for (let i = 0; i < 3; i++) {
|
|
249
|
-
const run = makeRun({
|
|
250
|
-
runId: `multi-prune-${i}`,
|
|
251
|
-
completedAt: Date.now() - (3 - i) * 1000,
|
|
252
|
-
});
|
|
253
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
254
|
-
const id = learningStore.addLearning({
|
|
255
|
-
runId: `multi-prune-${i}`,
|
|
256
|
-
category: "tool_pattern",
|
|
257
|
-
content: `Learning from run ${i} unique content`,
|
|
258
|
-
embedding: null,
|
|
259
|
-
confidence: 0.8,
|
|
260
|
-
});
|
|
261
|
-
if (id) learningIds.push(id);
|
|
262
|
-
}
|
|
263
|
-
expect(learningIds).toHaveLength(3);
|
|
264
|
-
|
|
265
|
-
// Prune down to 1 run (keeps the newest)
|
|
266
|
-
const pruned = db.pruneOldRuns(1);
|
|
267
|
-
expect(pruned).toBe(2);
|
|
268
|
-
expect(db.getRunCount()).toBe(1);
|
|
269
|
-
|
|
270
|
-
// All 3 learnings should still exist; 2 with __pruned__, 1 with original run_id
|
|
271
|
-
for (const id of learningIds) {
|
|
272
|
-
const row = db.getLearning(id);
|
|
273
|
-
expect(row).toBeDefined();
|
|
274
|
-
}
|
|
275
|
-
|
|
276
|
-
const prunedLearnings = learningIds
|
|
277
|
-
.map((id) => db.getLearning(id)!)
|
|
278
|
-
.filter((r) => r.run_id === "__pruned__");
|
|
279
|
-
expect(prunedLearnings).toHaveLength(2);
|
|
280
|
-
});
|
|
281
|
-
|
|
282
|
-
it("should not create sentinel if no learnings exist for pruned runs", () => {
|
|
283
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
284
|
-
|
|
285
|
-
// Insert runs without learnings
|
|
286
|
-
for (let i = 0; i < 3; i++) {
|
|
287
|
-
const run = makeRun({
|
|
288
|
-
runId: `no-learning-${i}`,
|
|
289
|
-
completedAt: Date.now() - (3 - i) * 1000,
|
|
290
|
-
});
|
|
291
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
292
|
-
}
|
|
293
|
-
|
|
294
|
-
// Pruning should still work (sentinel is created but has no learnings referencing it)
|
|
295
|
-
const pruned = db.pruneOldRuns(1);
|
|
296
|
-
expect(pruned).toBe(2);
|
|
297
|
-
expect(db.getRunCount()).toBe(1);
|
|
298
|
-
});
|
|
299
|
-
|
|
300
|
-
it("should handle pruning idempotently (sentinel already exists)", () => {
|
|
301
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
302
|
-
const learningStore = createLearningStore({ db });
|
|
303
|
-
|
|
304
|
-
// First round: insert 2 runs with learnings, prune to 1
|
|
305
|
-
for (let i = 0; i < 2; i++) {
|
|
306
|
-
const run = makeRun({
|
|
307
|
-
runId: `idem-round1-${i}`,
|
|
308
|
-
completedAt: Date.now() - (2 - i) * 1000,
|
|
309
|
-
});
|
|
310
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
311
|
-
learningStore.addLearning({
|
|
312
|
-
runId: `idem-round1-${i}`,
|
|
313
|
-
category: "tool_pattern",
|
|
314
|
-
content: `Idempotent learning round 1 run ${i}`,
|
|
315
|
-
embedding: null,
|
|
316
|
-
confidence: 0.8,
|
|
317
|
-
});
|
|
318
|
-
}
|
|
319
|
-
db.pruneOldRuns(1);
|
|
320
|
-
expect(db.getRunCount()).toBe(1);
|
|
321
|
-
|
|
322
|
-
// Second round: insert 2 more runs, prune to 1 again
|
|
323
|
-
for (let i = 0; i < 2; i++) {
|
|
324
|
-
const run = makeRun({
|
|
325
|
-
runId: `idem-round2-${i}`,
|
|
326
|
-
completedAt: Date.now() + (i + 1) * 1000,
|
|
327
|
-
});
|
|
328
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
329
|
-
learningStore.addLearning({
|
|
330
|
-
runId: `idem-round2-${i}`,
|
|
331
|
-
category: "error_recovery",
|
|
332
|
-
content: `Idempotent learning round 2 run ${i}`,
|
|
333
|
-
embedding: null,
|
|
334
|
-
confidence: 0.7,
|
|
335
|
-
});
|
|
336
|
-
}
|
|
337
|
-
const pruned = db.pruneOldRuns(1);
|
|
338
|
-
expect(pruned).toBe(2);
|
|
339
|
-
expect(db.getRunCount()).toBe(1);
|
|
340
|
-
|
|
341
|
-
// All 4 learnings should exist
|
|
342
|
-
const allLearnings = db.getAllLearnings(100);
|
|
343
|
-
expect(allLearnings).toHaveLength(4);
|
|
344
|
-
});
|
|
345
|
-
|
|
346
|
-
it("should return 0 when nothing to prune", () => {
|
|
347
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
348
|
-
const run = makeRun({ runId: "keep-me" });
|
|
349
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
350
|
-
|
|
351
|
-
const pruned = db.pruneOldRuns(10);
|
|
352
|
-
expect(pruned).toBe(0);
|
|
353
|
-
expect(db.getRunCount()).toBe(1);
|
|
354
|
-
});
|
|
355
|
-
|
|
356
|
-
it("should exclude sentinel from getRunCount", () => {
|
|
357
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
358
|
-
const learningStore = createLearningStore({ db });
|
|
359
|
-
|
|
360
|
-
const run = makeRun({ runId: "count-test" });
|
|
361
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
362
|
-
learningStore.addLearning({
|
|
363
|
-
runId: "count-test",
|
|
364
|
-
category: "tool_pattern",
|
|
365
|
-
content: "count test learning",
|
|
366
|
-
embedding: null,
|
|
367
|
-
confidence: 0.8,
|
|
368
|
-
});
|
|
369
|
-
|
|
370
|
-
expect(db.getRunCount()).toBe(1);
|
|
371
|
-
|
|
372
|
-
db.pruneOldRuns(0);
|
|
373
|
-
// Sentinel exists in DB but getRunCount should return 0
|
|
374
|
-
expect(db.getRunCount()).toBe(0);
|
|
375
|
-
});
|
|
376
|
-
|
|
377
|
-
it("should remove stale learnings (old, unapplied, low-confidence)", () => {
|
|
378
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
379
|
-
const longAgo = Date.now() - 120 * 24 * 60 * 60 * 1000;
|
|
380
|
-
|
|
381
|
-
const run = makeRun({ runId: "r1" });
|
|
382
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
383
|
-
|
|
384
|
-
db.insertLearning({
|
|
385
|
-
id: "stale-1",
|
|
386
|
-
runId: "r1",
|
|
387
|
-
category: "tool_pattern",
|
|
388
|
-
content: "stale learning",
|
|
389
|
-
embedding: null,
|
|
390
|
-
confidence: 0.3,
|
|
391
|
-
appliedCount: 0,
|
|
392
|
-
createdAt: longAgo,
|
|
393
|
-
updatedAt: longAgo,
|
|
394
|
-
});
|
|
395
|
-
|
|
396
|
-
const pruned = db.pruneStaleLearnings(90 * 24 * 60 * 60 * 1000);
|
|
397
|
-
expect(pruned).toBe(1);
|
|
398
|
-
expect(db.getLearning("stale-1")).toBeUndefined();
|
|
399
|
-
});
|
|
400
|
-
|
|
401
|
-
it("should preserve active learnings (applied or high-confidence)", () => {
|
|
402
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
403
|
-
const longAgo = Date.now() - 120 * 24 * 60 * 60 * 1000;
|
|
404
|
-
|
|
405
|
-
const run1 = makeRun({ runId: "r1" });
|
|
406
|
-
db.insertRun(run1, qualityEngine.scoreRun(run1));
|
|
407
|
-
const run2 = makeRun({ runId: "r2" });
|
|
408
|
-
db.insertRun(run2, qualityEngine.scoreRun(run2));
|
|
409
|
-
|
|
410
|
-
db.insertLearning({
|
|
411
|
-
id: "active-hc",
|
|
412
|
-
runId: "r1",
|
|
413
|
-
category: "tool_pattern",
|
|
414
|
-
content: "high confidence learning",
|
|
415
|
-
embedding: null,
|
|
416
|
-
confidence: 0.9,
|
|
417
|
-
appliedCount: 0,
|
|
418
|
-
createdAt: longAgo,
|
|
419
|
-
updatedAt: longAgo,
|
|
420
|
-
});
|
|
421
|
-
|
|
422
|
-
db.insertLearning({
|
|
423
|
-
id: "active-applied",
|
|
424
|
-
runId: "r2",
|
|
425
|
-
category: "error_recovery",
|
|
426
|
-
content: "applied learning",
|
|
427
|
-
embedding: null,
|
|
428
|
-
confidence: 0.3,
|
|
429
|
-
appliedCount: 5,
|
|
430
|
-
createdAt: longAgo,
|
|
431
|
-
updatedAt: longAgo,
|
|
432
|
-
});
|
|
433
|
-
|
|
434
|
-
const pruned = db.pruneStaleLearnings(90 * 24 * 60 * 60 * 1000);
|
|
435
|
-
expect(pruned).toBe(0);
|
|
436
|
-
expect(db.getLearning("active-hc")).toBeDefined();
|
|
437
|
-
expect(db.getLearning("active-applied")).toBeDefined();
|
|
438
|
-
});
|
|
439
|
-
|
|
440
|
-
it("should not prune learnings that are old but recently updated", () => {
|
|
441
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
442
|
-
const longAgo = Date.now() - 120 * 24 * 60 * 60 * 1000;
|
|
443
|
-
|
|
444
|
-
const run = makeRun({ runId: "r-recent" });
|
|
445
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
446
|
-
|
|
447
|
-
db.insertLearning({
|
|
448
|
-
id: "recently-updated",
|
|
449
|
-
runId: "r-recent",
|
|
450
|
-
category: "tool_pattern",
|
|
451
|
-
content: "old but recently updated",
|
|
452
|
-
embedding: null,
|
|
453
|
-
confidence: 0.3,
|
|
454
|
-
appliedCount: 0,
|
|
455
|
-
createdAt: longAgo,
|
|
456
|
-
updatedAt: Date.now(), // recently updated
|
|
457
|
-
});
|
|
458
|
-
|
|
459
|
-
const pruned = db.pruneStaleLearnings(90 * 24 * 60 * 60 * 1000);
|
|
460
|
-
expect(pruned).toBe(0);
|
|
461
|
-
});
|
|
462
|
-
});
|
|
463
|
-
|
|
464
|
-
// ---------------------------------------------------------------------------
|
|
465
|
-
// C. Lazy bridge available() state tracking
|
|
466
|
-
// ---------------------------------------------------------------------------
|
|
467
|
-
describe("Lazy bridge available() state tracking", () => {
|
|
468
|
-
it("should return true (optimistic) before bridge resolves", () => {
|
|
469
|
-
// Simulate the lazy bridge pattern from index.ts
|
|
470
|
-
let bridgeResolved = false;
|
|
471
|
-
let bridgeHasProvider = false;
|
|
472
|
-
|
|
473
|
-
const lazyAvailable = () => (bridgeResolved ? bridgeHasProvider : true);
|
|
474
|
-
|
|
475
|
-
// Before any embed call, available() is optimistic
|
|
476
|
-
expect(lazyAvailable()).toBe(true);
|
|
477
|
-
});
|
|
478
|
-
|
|
479
|
-
it("should return true after resolving with a real provider", async () => {
|
|
480
|
-
let bridgeResolved = false;
|
|
481
|
-
let bridgeHasProvider = false;
|
|
482
|
-
let bridgePromise: Promise<EmbeddingBridge> | null = null;
|
|
483
|
-
|
|
484
|
-
const realBridge: EmbeddingBridge = {
|
|
485
|
-
embed: async () => [0.1, 0.2],
|
|
486
|
-
embedBatch: async (texts) => texts.map(() => [0.1, 0.2]),
|
|
487
|
-
available: () => true,
|
|
488
|
-
};
|
|
489
|
-
|
|
490
|
-
async function initAndTrack(): Promise<EmbeddingBridge> {
|
|
491
|
-
bridgeResolved = true;
|
|
492
|
-
bridgeHasProvider = realBridge.available();
|
|
493
|
-
return realBridge;
|
|
494
|
-
}
|
|
495
|
-
|
|
496
|
-
const lazyBridge: EmbeddingBridge = {
|
|
497
|
-
async embed(text) {
|
|
498
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
499
|
-
return (await bridgePromise).embed(text);
|
|
500
|
-
},
|
|
501
|
-
async embedBatch(texts) {
|
|
502
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
503
|
-
return (await bridgePromise).embedBatch(texts);
|
|
504
|
-
},
|
|
505
|
-
available: () => (bridgeResolved ? bridgeHasProvider : true),
|
|
506
|
-
};
|
|
507
|
-
|
|
508
|
-
// Before use: optimistic
|
|
509
|
-
expect(lazyBridge.available()).toBe(true);
|
|
510
|
-
|
|
511
|
-
// Trigger initialization
|
|
512
|
-
const result = await lazyBridge.embed("test");
|
|
513
|
-
expect(result).toEqual([0.1, 0.2]);
|
|
514
|
-
|
|
515
|
-
// After resolve: accurately true
|
|
516
|
-
expect(lazyBridge.available()).toBe(true);
|
|
517
|
-
});
|
|
518
|
-
|
|
519
|
-
it("should return false after resolving with no provider", async () => {
|
|
520
|
-
let bridgeResolved = false;
|
|
521
|
-
let bridgeHasProvider = false;
|
|
522
|
-
let bridgePromise: Promise<EmbeddingBridge> | null = null;
|
|
523
|
-
|
|
524
|
-
const nullBridge: EmbeddingBridge = {
|
|
525
|
-
embed: async () => null,
|
|
526
|
-
embedBatch: async (texts) => texts.map(() => null),
|
|
527
|
-
available: () => false,
|
|
528
|
-
};
|
|
529
|
-
|
|
530
|
-
async function initAndTrack(): Promise<EmbeddingBridge> {
|
|
531
|
-
bridgeResolved = true;
|
|
532
|
-
bridgeHasProvider = nullBridge.available();
|
|
533
|
-
return nullBridge;
|
|
534
|
-
}
|
|
535
|
-
|
|
536
|
-
const lazyBridge: EmbeddingBridge = {
|
|
537
|
-
async embed(text) {
|
|
538
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
539
|
-
return (await bridgePromise).embed(text);
|
|
540
|
-
},
|
|
541
|
-
async embedBatch(texts) {
|
|
542
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
543
|
-
return (await bridgePromise).embedBatch(texts);
|
|
544
|
-
},
|
|
545
|
-
available: () => (bridgeResolved ? bridgeHasProvider : true),
|
|
546
|
-
};
|
|
547
|
-
|
|
548
|
-
// Before use: optimistic
|
|
549
|
-
expect(lazyBridge.available()).toBe(true);
|
|
550
|
-
|
|
551
|
-
// Trigger initialization
|
|
552
|
-
const result = await lazyBridge.embed("test");
|
|
553
|
-
expect(result).toBeNull();
|
|
554
|
-
|
|
555
|
-
// After resolve: accurately false
|
|
556
|
-
expect(lazyBridge.available()).toBe(false);
|
|
557
|
-
});
|
|
558
|
-
|
|
559
|
-
it("should only initialize once even with concurrent calls", async () => {
|
|
560
|
-
let initCount = 0;
|
|
561
|
-
let bridgeResolved = false;
|
|
562
|
-
let bridgeHasProvider = false;
|
|
563
|
-
let bridgePromise: Promise<EmbeddingBridge> | null = null;
|
|
564
|
-
|
|
565
|
-
const realBridge: EmbeddingBridge = {
|
|
566
|
-
embed: async () => [1.0],
|
|
567
|
-
embedBatch: async (texts) => texts.map(() => [1.0]),
|
|
568
|
-
available: () => true,
|
|
569
|
-
};
|
|
570
|
-
|
|
571
|
-
async function initAndTrack(): Promise<EmbeddingBridge> {
|
|
572
|
-
initCount++;
|
|
573
|
-
bridgeResolved = true;
|
|
574
|
-
bridgeHasProvider = realBridge.available();
|
|
575
|
-
return realBridge;
|
|
576
|
-
}
|
|
577
|
-
|
|
578
|
-
const lazyBridge: EmbeddingBridge = {
|
|
579
|
-
async embed(text) {
|
|
580
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
581
|
-
return (await bridgePromise).embed(text);
|
|
582
|
-
},
|
|
583
|
-
async embedBatch(texts) {
|
|
584
|
-
if (!bridgePromise) bridgePromise = initAndTrack();
|
|
585
|
-
return (await bridgePromise).embedBatch(texts);
|
|
586
|
-
},
|
|
587
|
-
available: () => (bridgeResolved ? bridgeHasProvider : true),
|
|
588
|
-
};
|
|
589
|
-
|
|
590
|
-
// Fire multiple concurrent calls
|
|
591
|
-
const results = await Promise.all([
|
|
592
|
-
lazyBridge.embed("a"),
|
|
593
|
-
lazyBridge.embed("b"),
|
|
594
|
-
lazyBridge.embedBatch(["c", "d"]),
|
|
595
|
-
]);
|
|
596
|
-
|
|
597
|
-
expect(initCount).toBe(1);
|
|
598
|
-
expect(results[0]).toEqual([1.0]);
|
|
599
|
-
expect(results[1]).toEqual([1.0]);
|
|
600
|
-
expect(results[2]).toEqual([[1.0], [1.0]]);
|
|
601
|
-
});
|
|
602
|
-
});
|
|
603
|
-
|
|
604
|
-
// ---------------------------------------------------------------------------
|
|
605
|
-
// D. createEmbeddingBridge unit tests
|
|
606
|
-
// ---------------------------------------------------------------------------
|
|
607
|
-
describe("createEmbeddingBridge", () => {
|
|
608
|
-
it("should return available=true with a real provider", () => {
|
|
609
|
-
const mockProvider = {
|
|
610
|
-
embedQuery: async (text: string) => [0.1, 0.2, 0.3],
|
|
611
|
-
embedBatch: async (texts: string[]) => texts.map(() => [0.1, 0.2, 0.3]),
|
|
612
|
-
};
|
|
613
|
-
const bridge = createEmbeddingBridge({ provider: mockProvider, logger });
|
|
614
|
-
expect(bridge.available()).toBe(true);
|
|
615
|
-
});
|
|
616
|
-
|
|
617
|
-
it("should return available=false with null provider", () => {
|
|
618
|
-
const bridge = createEmbeddingBridge({ provider: null, logger });
|
|
619
|
-
expect(bridge.available()).toBe(false);
|
|
620
|
-
});
|
|
621
|
-
|
|
622
|
-
it("should return null from embed when provider is null", async () => {
|
|
623
|
-
const bridge = createEmbeddingBridge({ provider: null, logger });
|
|
624
|
-
const result = await bridge.embed("test");
|
|
625
|
-
expect(result).toBeNull();
|
|
626
|
-
});
|
|
627
|
-
|
|
628
|
-
it("should return all nulls from embedBatch when provider is null", async () => {
|
|
629
|
-
const bridge = createEmbeddingBridge({ provider: null, logger });
|
|
630
|
-
const results = await bridge.embedBatch(["a", "b", "c"]);
|
|
631
|
-
expect(results).toEqual([null, null, null]);
|
|
632
|
-
});
|
|
633
|
-
|
|
634
|
-
it("should return vector from embed when provider exists", async () => {
|
|
635
|
-
const mockProvider = {
|
|
636
|
-
embedQuery: async (text: string) => [0.5, 0.6],
|
|
637
|
-
embedBatch: async (texts: string[]) => texts.map(() => [0.5, 0.6]),
|
|
638
|
-
};
|
|
639
|
-
const bridge = createEmbeddingBridge({ provider: mockProvider, logger });
|
|
640
|
-
const result = await bridge.embed("test");
|
|
641
|
-
expect(result).toEqual([0.5, 0.6]);
|
|
642
|
-
});
|
|
643
|
-
|
|
644
|
-
it("should gracefully return null on embed error", async () => {
|
|
645
|
-
const mockProvider = {
|
|
646
|
-
embedQuery: async () => {
|
|
647
|
-
throw new Error("API timeout");
|
|
648
|
-
},
|
|
649
|
-
embedBatch: async (texts: string[]) => texts.map(() => [0.1]),
|
|
650
|
-
};
|
|
651
|
-
const bridge = createEmbeddingBridge({ provider: mockProvider, logger });
|
|
652
|
-
const result = await bridge.embed("test");
|
|
653
|
-
expect(result).toBeNull();
|
|
654
|
-
});
|
|
655
|
-
|
|
656
|
-
it("should gracefully return nulls on embedBatch error", async () => {
|
|
657
|
-
const mockProvider = {
|
|
658
|
-
embedQuery: async () => [0.1],
|
|
659
|
-
embedBatch: async () => {
|
|
660
|
-
throw new Error("batch limit exceeded");
|
|
661
|
-
},
|
|
662
|
-
};
|
|
663
|
-
const bridge = createEmbeddingBridge({ provider: mockProvider, logger });
|
|
664
|
-
const results = await bridge.embedBatch(["a", "b"]);
|
|
665
|
-
expect(results).toEqual([null, null]);
|
|
666
|
-
});
|
|
667
|
-
});
|
|
668
|
-
|
|
669
|
-
// ---------------------------------------------------------------------------
|
|
670
|
-
// E. Tool pattern matching with Jaccard similarity
|
|
671
|
-
// ---------------------------------------------------------------------------
|
|
672
|
-
describe("Tool pattern matching with Jaccard", () => {
|
|
673
|
-
let tmpDir: string;
|
|
674
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
675
|
-
|
|
676
|
-
beforeEach(() => {
|
|
677
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-jaccard-"));
|
|
678
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
679
|
-
});
|
|
680
|
-
|
|
681
|
-
afterEach(() => {
|
|
682
|
-
db.close();
|
|
683
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
684
|
-
});
|
|
685
|
-
|
|
686
|
-
it("should extract a tool pattern when same tools appear in different order", async () => {
|
|
687
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
688
|
-
const learningStore = createLearningStore({ db });
|
|
689
|
-
const extractor = createLearningExtractor({ db, learningStore, config: TEST_CONFIG });
|
|
690
|
-
|
|
691
|
-
const toolsA = [
|
|
692
|
-
{ toolName: "Read", durationMs: 10, success: true, error: null, paramHash: "a" },
|
|
693
|
-
{ toolName: "Edit", durationMs: 20, success: true, error: null, paramHash: "b" },
|
|
694
|
-
{ toolName: "Bash", durationMs: 30, success: true, error: null, paramHash: "c" },
|
|
695
|
-
];
|
|
696
|
-
const toolsB = [
|
|
697
|
-
{ toolName: "Bash", durationMs: 15, success: true, error: null, paramHash: "d" },
|
|
698
|
-
{ toolName: "Read", durationMs: 25, success: true, error: null, paramHash: "e" },
|
|
699
|
-
{ toolName: "Edit", durationMs: 35, success: true, error: null, paramHash: "f" },
|
|
700
|
-
];
|
|
701
|
-
|
|
702
|
-
for (let i = 0; i < 3; i++) {
|
|
703
|
-
const run = makeRun({
|
|
704
|
-
runId: `jaccard-run-${i}`,
|
|
705
|
-
toolCalls: i % 2 === 0 ? toolsA : toolsB,
|
|
706
|
-
responseToolCallCount: 3,
|
|
707
|
-
});
|
|
708
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
709
|
-
db.updateRunScore(run.runId, 0.9);
|
|
710
|
-
}
|
|
711
|
-
|
|
712
|
-
const currentRun = makeRun({
|
|
713
|
-
runId: "jaccard-current",
|
|
714
|
-
toolCalls: [
|
|
715
|
-
{ toolName: "Edit", durationMs: 12, success: true, error: null, paramHash: "g" },
|
|
716
|
-
{ toolName: "Bash", durationMs: 22, success: true, error: null, paramHash: "h" },
|
|
717
|
-
{ toolName: "Read", durationMs: 32, success: true, error: null, paramHash: "i" },
|
|
718
|
-
],
|
|
719
|
-
responseToolCallCount: 3,
|
|
720
|
-
});
|
|
721
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
722
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
723
|
-
|
|
724
|
-
await extractor.extract(currentRun, { score: 0.9, signals: [], algorithmVersion: 1 });
|
|
725
|
-
|
|
726
|
-
const learnings = learningStore.listLearnings({ category: "tool_pattern" });
|
|
727
|
-
expect(learnings.length).toBeGreaterThanOrEqual(1);
|
|
728
|
-
expect(learnings[0]!.content).toContain("Read");
|
|
729
|
-
expect(learnings[0]!.content).toContain("Edit");
|
|
730
|
-
expect(learnings[0]!.content).toContain("Bash");
|
|
731
|
-
});
|
|
732
|
-
|
|
733
|
-
it("should NOT extract a pattern when tool sets differ significantly (low Jaccard)", async () => {
|
|
734
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
735
|
-
const learningStore = createLearningStore({ db });
|
|
736
|
-
const extractor = createLearningExtractor({ db, learningStore, config: TEST_CONFIG });
|
|
737
|
-
|
|
738
|
-
for (let i = 0; i < 3; i++) {
|
|
739
|
-
const run = makeRun({
|
|
740
|
-
runId: `low-jaccard-${i}`,
|
|
741
|
-
toolCalls: [
|
|
742
|
-
{ toolName: `UniqueA${i}`, durationMs: 10, success: true, error: null, paramHash: "x" },
|
|
743
|
-
{ toolName: `UniqueB${i}`, durationMs: 20, success: true, error: null, paramHash: "y" },
|
|
744
|
-
],
|
|
745
|
-
responseToolCallCount: 2,
|
|
746
|
-
});
|
|
747
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
748
|
-
db.updateRunScore(run.runId, 0.9);
|
|
749
|
-
}
|
|
750
|
-
|
|
751
|
-
const currentRun = makeRun({
|
|
752
|
-
runId: "low-jaccard-current",
|
|
753
|
-
toolCalls: [
|
|
754
|
-
{ toolName: "Diff1", durationMs: 10, success: true, error: null, paramHash: "z" },
|
|
755
|
-
{ toolName: "Diff2", durationMs: 20, success: true, error: null, paramHash: "w" },
|
|
756
|
-
],
|
|
757
|
-
responseToolCallCount: 2,
|
|
758
|
-
});
|
|
759
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
760
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
761
|
-
|
|
762
|
-
await extractor.extract(currentRun, { score: 0.9, signals: [], algorithmVersion: 1 });
|
|
763
|
-
|
|
764
|
-
const learnings = learningStore.listLearnings({ category: "tool_pattern" });
|
|
765
|
-
expect(learnings.length).toBe(0);
|
|
766
|
-
});
|
|
767
|
-
|
|
768
|
-
it("should match with high overlap (4/5 shared tools = Jaccard 0.8)", async () => {
|
|
769
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
770
|
-
const learningStore = createLearningStore({ db });
|
|
771
|
-
const extractor = createLearningExtractor({ db, learningStore, config: TEST_CONFIG });
|
|
772
|
-
|
|
773
|
-
// Base tool set: A, B, C, D, E
|
|
774
|
-
const baseTools = ["ToolA", "ToolB", "ToolC", "ToolD", "ToolE"];
|
|
775
|
-
// Current: A, B, C, D, F => intersection=4, union=6, Jaccard=4/6=0.667 < 0.8
|
|
776
|
-
// But if current is A, B, C, D, E too => Jaccard=1.0
|
|
777
|
-
// Let's test the exact boundary: 4 shared out of 5 each = 4/6 = 0.667 < 0.8
|
|
778
|
-
|
|
779
|
-
for (let i = 0; i < 3; i++) {
|
|
780
|
-
const run = makeRun({
|
|
781
|
-
runId: `overlap-${i}`,
|
|
782
|
-
toolCalls: baseTools.map((t) => ({
|
|
783
|
-
toolName: t,
|
|
784
|
-
durationMs: 10,
|
|
785
|
-
success: true,
|
|
786
|
-
error: null,
|
|
787
|
-
paramHash: `h${i}`,
|
|
788
|
-
})),
|
|
789
|
-
responseToolCallCount: 5,
|
|
790
|
-
});
|
|
791
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
792
|
-
db.updateRunScore(run.runId, 0.9);
|
|
793
|
-
}
|
|
794
|
-
|
|
795
|
-
// Current run shares 4 of 5 tools + 1 different => Jaccard = 4/6 = 0.667
|
|
796
|
-
const currentRun = makeRun({
|
|
797
|
-
runId: "overlap-current",
|
|
798
|
-
toolCalls: ["ToolA", "ToolB", "ToolC", "ToolD", "ToolX"].map((t) => ({
|
|
799
|
-
toolName: t,
|
|
800
|
-
durationMs: 10,
|
|
801
|
-
success: true,
|
|
802
|
-
error: null,
|
|
803
|
-
paramHash: "hc",
|
|
804
|
-
})),
|
|
805
|
-
responseToolCallCount: 5,
|
|
806
|
-
});
|
|
807
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
808
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
809
|
-
|
|
810
|
-
await extractor.extract(currentRun, { score: 0.9, signals: [], algorithmVersion: 1 });
|
|
811
|
-
|
|
812
|
-
// 4/6 = 0.667 < 0.8, so should NOT match
|
|
813
|
-
const learnings = learningStore.listLearnings({ category: "tool_pattern" });
|
|
814
|
-
expect(learnings.length).toBe(0);
|
|
815
|
-
});
|
|
816
|
-
|
|
817
|
-
it("should not extract patterns for low-score runs", async () => {
|
|
818
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
819
|
-
const learningStore = createLearningStore({ db });
|
|
820
|
-
const extractor = createLearningExtractor({ db, learningStore, config: TEST_CONFIG });
|
|
821
|
-
|
|
822
|
-
const tools = [
|
|
823
|
-
{ toolName: "Read", durationMs: 10, success: true, error: null, paramHash: "a" },
|
|
824
|
-
{ toolName: "Edit", durationMs: 20, success: true, error: null, paramHash: "b" },
|
|
825
|
-
];
|
|
826
|
-
|
|
827
|
-
for (let i = 0; i < 3; i++) {
|
|
828
|
-
const run = makeRun({
|
|
829
|
-
runId: `lowscore-${i}`,
|
|
830
|
-
toolCalls: tools,
|
|
831
|
-
responseToolCallCount: 2,
|
|
832
|
-
});
|
|
833
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
834
|
-
db.updateRunScore(run.runId, 0.9);
|
|
835
|
-
}
|
|
836
|
-
|
|
837
|
-
const currentRun = makeRun({
|
|
838
|
-
runId: "lowscore-current",
|
|
839
|
-
toolCalls: tools,
|
|
840
|
-
responseToolCallCount: 2,
|
|
841
|
-
});
|
|
842
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
843
|
-
|
|
844
|
-
// Extract with a score below 0.7 threshold
|
|
845
|
-
await extractor.extract(currentRun, { score: 0.5, signals: [], algorithmVersion: 1 });
|
|
846
|
-
|
|
847
|
-
const learnings = learningStore.listLearnings({ category: "tool_pattern" });
|
|
848
|
-
expect(learnings.length).toBe(0);
|
|
849
|
-
});
|
|
850
|
-
});
|
|
851
|
-
|
|
852
|
-
// ---------------------------------------------------------------------------
|
|
853
|
-
// F. Feedback rescoring updates DB
|
|
854
|
-
// ---------------------------------------------------------------------------
|
|
855
|
-
describe("Feedback rescoring updates DB", () => {
|
|
856
|
-
let tmpDir: string;
|
|
857
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
858
|
-
|
|
859
|
-
beforeEach(() => {
|
|
860
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-feedback-"));
|
|
861
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
862
|
-
});
|
|
863
|
-
|
|
864
|
-
afterEach(() => {
|
|
865
|
-
db.close();
|
|
866
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
867
|
-
});
|
|
868
|
-
|
|
869
|
-
it("should update quality_score via updateRunScore", () => {
|
|
870
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
871
|
-
const run = makeRun({ runId: "rescore-1" });
|
|
872
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
873
|
-
|
|
874
|
-
db.updateRunScore("rescore-1", 0.99);
|
|
875
|
-
const row = db.getRun("rescore-1");
|
|
876
|
-
expect(row).toBeDefined();
|
|
877
|
-
expect(row!.quality_score).toBeCloseTo(0.99, 5);
|
|
878
|
-
});
|
|
879
|
-
|
|
880
|
-
it("should rescore via explicit feedback handler with qualityEngine", () => {
|
|
881
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
882
|
-
const run = makeRun({ runId: "fb-rescore-1" });
|
|
883
|
-
const initialScore = qualityEngine.scoreRun(run);
|
|
884
|
-
db.insertRun(run, initialScore);
|
|
885
|
-
|
|
886
|
-
const handler = createExplicitFeedbackHandler({ db, qualityEngine });
|
|
887
|
-
const fb = handler.submitFeedback("fb-rescore-1", 5);
|
|
888
|
-
expect(fb).not.toBeNull();
|
|
889
|
-
|
|
890
|
-
const row = db.getRun("fb-rescore-1");
|
|
891
|
-
expect(row).toBeDefined();
|
|
892
|
-
expect(row!.quality_score).not.toBeCloseTo(initialScore.score, 5);
|
|
893
|
-
});
|
|
894
|
-
|
|
895
|
-
it("should clamp feedback scores to 1-5 range", () => {
|
|
896
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
897
|
-
const run = makeRun({ runId: "clamp-test" });
|
|
898
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
899
|
-
|
|
900
|
-
const handler = createExplicitFeedbackHandler({ db });
|
|
901
|
-
|
|
902
|
-
const fb1 = handler.submitFeedback("clamp-test", -10);
|
|
903
|
-
expect(fb1!.score).toBe(1);
|
|
904
|
-
|
|
905
|
-
const fb2 = handler.submitFeedback("clamp-test", 100);
|
|
906
|
-
expect(fb2!.score).toBe(5);
|
|
907
|
-
});
|
|
908
|
-
|
|
909
|
-
it("should return null when submitting feedback for nonexistent run", () => {
|
|
910
|
-
const handler = createExplicitFeedbackHandler({ db });
|
|
911
|
-
const fb = handler.submitFeedback("nonexistent-run", 3);
|
|
912
|
-
expect(fb).toBeNull();
|
|
913
|
-
});
|
|
914
|
-
|
|
915
|
-
it("should persist feedback record in DB", () => {
|
|
916
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
917
|
-
const run = makeRun({ runId: "persist-fb" });
|
|
918
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
919
|
-
|
|
920
|
-
const handler = createExplicitFeedbackHandler({ db });
|
|
921
|
-
handler.submitFeedback("persist-fb", 4);
|
|
922
|
-
|
|
923
|
-
const feedbacks = db.getFeedback("persist-fb");
|
|
924
|
-
expect(feedbacks).toHaveLength(1);
|
|
925
|
-
expect(feedbacks[0]!.source).toBe("explicit");
|
|
926
|
-
expect(feedbacks[0]!.score).toBe(4);
|
|
927
|
-
});
|
|
928
|
-
|
|
929
|
-
it("should handle feedback without qualityEngine (no rescore)", () => {
|
|
930
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
931
|
-
const run = makeRun({ runId: "no-qe" });
|
|
932
|
-
const score = qualityEngine.scoreRun(run);
|
|
933
|
-
db.insertRun(run, score);
|
|
934
|
-
|
|
935
|
-
// Handler without qualityEngine
|
|
936
|
-
const handler = createExplicitFeedbackHandler({ db });
|
|
937
|
-
const fb = handler.submitFeedback("no-qe", 2);
|
|
938
|
-
expect(fb).not.toBeNull();
|
|
939
|
-
|
|
940
|
-
// Score should not have changed
|
|
941
|
-
const row = db.getRun("no-qe")!;
|
|
942
|
-
expect(row.quality_score).toBeCloseTo(score.score, 5);
|
|
943
|
-
});
|
|
944
|
-
|
|
945
|
-
it("should rescore lower with bad feedback (score 1)", () => {
|
|
946
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
947
|
-
const run = makeRun({ runId: "bad-fb" });
|
|
948
|
-
const initialScore = qualityEngine.scoreRun(run);
|
|
949
|
-
db.insertRun(run, initialScore);
|
|
950
|
-
|
|
951
|
-
const handler = createExplicitFeedbackHandler({ db, qualityEngine });
|
|
952
|
-
handler.submitFeedback("bad-fb", 1);
|
|
953
|
-
|
|
954
|
-
const row = db.getRun("bad-fb")!;
|
|
955
|
-
// With feedback of 1 (worst), score should decrease
|
|
956
|
-
expect(row.quality_score).toBeLessThan(initialScore.score);
|
|
957
|
-
});
|
|
958
|
-
});
|
|
959
|
-
|
|
960
|
-
// ---------------------------------------------------------------------------
|
|
961
|
-
// G. Embeddings populated when bridge available + backfill
|
|
962
|
-
// ---------------------------------------------------------------------------
|
|
963
|
-
describe("Embeddings populated when bridge available", () => {
|
|
964
|
-
let tmpDir: string;
|
|
965
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
966
|
-
|
|
967
|
-
beforeEach(() => {
|
|
968
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-embed-"));
|
|
969
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
970
|
-
});
|
|
971
|
-
|
|
972
|
-
afterEach(() => {
|
|
973
|
-
db.close();
|
|
974
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
975
|
-
});
|
|
976
|
-
|
|
977
|
-
it("should store embeddings when bridge returns vectors", async () => {
|
|
978
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
979
|
-
const learningStore = createLearningStore({ db });
|
|
980
|
-
|
|
981
|
-
const mockBridge: EmbeddingBridge = {
|
|
982
|
-
embed: async () => [0.1, 0.2, 0.3],
|
|
983
|
-
embedBatch: async (texts) => texts.map(() => [0.1, 0.2, 0.3]),
|
|
984
|
-
available: () => true,
|
|
985
|
-
};
|
|
986
|
-
|
|
987
|
-
const extractor = createLearningExtractor({
|
|
988
|
-
db,
|
|
989
|
-
learningStore,
|
|
990
|
-
config: TEST_CONFIG,
|
|
991
|
-
embeddingBridge: mockBridge,
|
|
992
|
-
});
|
|
993
|
-
|
|
994
|
-
const tools = [
|
|
995
|
-
{ toolName: "Read", durationMs: 10, success: true, error: null, paramHash: "a" },
|
|
996
|
-
{ toolName: "Edit", durationMs: 20, success: true, error: null, paramHash: "b" },
|
|
997
|
-
];
|
|
998
|
-
for (let i = 0; i < 3; i++) {
|
|
999
|
-
const run = makeRun({
|
|
1000
|
-
runId: `embed-prior-${i}`,
|
|
1001
|
-
toolCalls: tools,
|
|
1002
|
-
responseToolCallCount: 2,
|
|
1003
|
-
});
|
|
1004
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
1005
|
-
db.updateRunScore(run.runId, 0.9);
|
|
1006
|
-
}
|
|
1007
|
-
|
|
1008
|
-
const currentRun = makeRun({
|
|
1009
|
-
runId: "embed-current",
|
|
1010
|
-
toolCalls: tools,
|
|
1011
|
-
responseToolCallCount: 2,
|
|
1012
|
-
});
|
|
1013
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
1014
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
1015
|
-
|
|
1016
|
-
const ids = await extractor.extract(currentRun, {
|
|
1017
|
-
score: 0.9,
|
|
1018
|
-
signals: [],
|
|
1019
|
-
algorithmVersion: 1,
|
|
1020
|
-
});
|
|
1021
|
-
|
|
1022
|
-
expect(ids.length).toBeGreaterThan(0);
|
|
1023
|
-
const row = db.getLearning(ids[0]!);
|
|
1024
|
-
expect(row).toBeDefined();
|
|
1025
|
-
expect(row!.embedding).not.toBeNull();
|
|
1026
|
-
const parsed = JSON.parse(row!.embedding!) as number[];
|
|
1027
|
-
expect(parsed).toEqual([0.1, 0.2, 0.3]);
|
|
1028
|
-
});
|
|
1029
|
-
|
|
1030
|
-
it("should leave embeddings null when no bridge is provided", async () => {
|
|
1031
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
1032
|
-
const learningStore = createLearningStore({ db });
|
|
1033
|
-
|
|
1034
|
-
const extractor = createLearningExtractor({
|
|
1035
|
-
db,
|
|
1036
|
-
learningStore,
|
|
1037
|
-
config: TEST_CONFIG,
|
|
1038
|
-
});
|
|
1039
|
-
|
|
1040
|
-
const tools = [
|
|
1041
|
-
{ toolName: "Read", durationMs: 10, success: true, error: null, paramHash: "a" },
|
|
1042
|
-
{ toolName: "Edit", durationMs: 20, success: true, error: null, paramHash: "b" },
|
|
1043
|
-
];
|
|
1044
|
-
for (let i = 0; i < 3; i++) {
|
|
1045
|
-
const run = makeRun({
|
|
1046
|
-
runId: `no-embed-prior-${i}`,
|
|
1047
|
-
toolCalls: tools,
|
|
1048
|
-
responseToolCallCount: 2,
|
|
1049
|
-
});
|
|
1050
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
1051
|
-
db.updateRunScore(run.runId, 0.9);
|
|
1052
|
-
}
|
|
1053
|
-
|
|
1054
|
-
const currentRun = makeRun({
|
|
1055
|
-
runId: "no-embed-current",
|
|
1056
|
-
toolCalls: tools,
|
|
1057
|
-
responseToolCallCount: 2,
|
|
1058
|
-
});
|
|
1059
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
1060
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
1061
|
-
|
|
1062
|
-
const ids = await extractor.extract(currentRun, {
|
|
1063
|
-
score: 0.9,
|
|
1064
|
-
signals: [],
|
|
1065
|
-
algorithmVersion: 1,
|
|
1066
|
-
});
|
|
1067
|
-
|
|
1068
|
-
expect(ids.length).toBeGreaterThan(0);
|
|
1069
|
-
const row = db.getLearning(ids[0]!);
|
|
1070
|
-
expect(row).toBeDefined();
|
|
1071
|
-
expect(row!.embedding).toBeNull();
|
|
1072
|
-
});
|
|
1073
|
-
|
|
1074
|
-
it("should leave embeddings null when bridge returns null (no provider)", async () => {
|
|
1075
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
1076
|
-
const learningStore = createLearningStore({ db });
|
|
1077
|
-
|
|
1078
|
-
const nullBridge: EmbeddingBridge = {
|
|
1079
|
-
embed: async () => null,
|
|
1080
|
-
embedBatch: async (texts) => texts.map(() => null),
|
|
1081
|
-
available: () => false,
|
|
1082
|
-
};
|
|
1083
|
-
|
|
1084
|
-
const extractor = createLearningExtractor({
|
|
1085
|
-
db,
|
|
1086
|
-
learningStore,
|
|
1087
|
-
config: TEST_CONFIG,
|
|
1088
|
-
embeddingBridge: nullBridge,
|
|
1089
|
-
});
|
|
1090
|
-
|
|
1091
|
-
const tools = [
|
|
1092
|
-
{ toolName: "Read", durationMs: 10, success: true, error: null, paramHash: "a" },
|
|
1093
|
-
{ toolName: "Edit", durationMs: 20, success: true, error: null, paramHash: "b" },
|
|
1094
|
-
];
|
|
1095
|
-
for (let i = 0; i < 3; i++) {
|
|
1096
|
-
const run = makeRun({
|
|
1097
|
-
runId: `null-bridge-${i}`,
|
|
1098
|
-
toolCalls: tools,
|
|
1099
|
-
responseToolCallCount: 2,
|
|
1100
|
-
});
|
|
1101
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
1102
|
-
db.updateRunScore(run.runId, 0.9);
|
|
1103
|
-
}
|
|
1104
|
-
|
|
1105
|
-
const currentRun = makeRun({
|
|
1106
|
-
runId: "null-bridge-current",
|
|
1107
|
-
toolCalls: tools,
|
|
1108
|
-
responseToolCallCount: 2,
|
|
1109
|
-
});
|
|
1110
|
-
db.insertRun(currentRun, qualityEngine.scoreRun(currentRun));
|
|
1111
|
-
db.updateRunScore(currentRun.runId, 0.9);
|
|
1112
|
-
|
|
1113
|
-
const ids = await extractor.extract(currentRun, {
|
|
1114
|
-
score: 0.9,
|
|
1115
|
-
signals: [],
|
|
1116
|
-
algorithmVersion: 1,
|
|
1117
|
-
});
|
|
1118
|
-
|
|
1119
|
-
expect(ids.length).toBeGreaterThan(0);
|
|
1120
|
-
const row = db.getLearning(ids[0]!);
|
|
1121
|
-
expect(row).toBeDefined();
|
|
1122
|
-
expect(row!.embedding).toBeNull();
|
|
1123
|
-
});
|
|
1124
|
-
});
|
|
1125
|
-
|
|
1126
|
-
// ---------------------------------------------------------------------------
|
|
1127
|
-
// H. Backfill DB methods and flow
|
|
1128
|
-
// ---------------------------------------------------------------------------
|
|
1129
|
-
describe("Embedding backfill", () => {
|
|
1130
|
-
let tmpDir: string;
|
|
1131
|
-
let db: ReturnType<typeof createDatabaseManager>;
|
|
1132
|
-
|
|
1133
|
-
beforeEach(() => {
|
|
1134
|
-
tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "ll-fixes-backfill-"));
|
|
1135
|
-
db = createDatabaseManager({ stateDir: tmpDir, config: TEST_CONFIG, logger });
|
|
1136
|
-
});
|
|
1137
|
-
|
|
1138
|
-
afterEach(() => {
|
|
1139
|
-
db.close();
|
|
1140
|
-
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
1141
|
-
});
|
|
1142
|
-
|
|
1143
|
-
function insertLearningWithRun(id: string, embedding: number[] | null) {
|
|
1144
|
-
const qualityEngine = createQualityEngine({ config: TEST_CONFIG, db });
|
|
1145
|
-
const run = makeRun({ runId: `run-for-${id}` });
|
|
1146
|
-
db.insertRun(run, qualityEngine.scoreRun(run));
|
|
1147
|
-
db.insertLearning({
|
|
1148
|
-
id,
|
|
1149
|
-
runId: `run-for-${id}`,
|
|
1150
|
-
category: "tool_pattern",
|
|
1151
|
-
content: `Learning ${id}`,
|
|
1152
|
-
embedding,
|
|
1153
|
-
confidence: 0.8,
|
|
1154
|
-
appliedCount: 0,
|
|
1155
|
-
createdAt: Date.now(),
|
|
1156
|
-
updatedAt: Date.now(),
|
|
1157
|
-
});
|
|
1158
|
-
}
|
|
1159
|
-
|
|
1160
|
-
it("should find learnings without embeddings", () => {
|
|
1161
|
-
insertLearningWithRun("bf-1", null);
|
|
1162
|
-
insertLearningWithRun("bf-2", null);
|
|
1163
|
-
insertLearningWithRun("bf-3", [0.1, 0.2]);
|
|
1164
|
-
|
|
1165
|
-
const rows = db.getLearningsWithoutEmbeddings(100);
|
|
1166
|
-
expect(rows).toHaveLength(2);
|
|
1167
|
-
expect(rows.map((r) => r.id).sort()).toEqual(["bf-1", "bf-2"]);
|
|
1168
|
-
});
|
|
1169
|
-
|
|
1170
|
-
it("should respect the limit parameter", () => {
|
|
1171
|
-
insertLearningWithRun("lim-1", null);
|
|
1172
|
-
insertLearningWithRun("lim-2", null);
|
|
1173
|
-
insertLearningWithRun("lim-3", null);
|
|
1174
|
-
|
|
1175
|
-
const rows = db.getLearningsWithoutEmbeddings(2);
|
|
1176
|
-
expect(rows).toHaveLength(2);
|
|
1177
|
-
});
|
|
1178
|
-
|
|
1179
|
-
it("should update a learning embedding", () => {
|
|
1180
|
-
insertLearningWithRun("upd-1", null);
|
|
1181
|
-
|
|
1182
|
-
db.updateLearningEmbedding("upd-1", [0.5, 0.6, 0.7]);
|
|
1183
|
-
|
|
1184
|
-
const row = db.getLearning("upd-1");
|
|
1185
|
-
expect(row).toBeDefined();
|
|
1186
|
-
expect(row!.embedding).not.toBeNull();
|
|
1187
|
-
expect(JSON.parse(row!.embedding!)).toEqual([0.5, 0.6, 0.7]);
|
|
1188
|
-
});
|
|
1189
|
-
|
|
1190
|
-
it("should not return learnings that already have embeddings", () => {
|
|
1191
|
-
insertLearningWithRun("has-embed", [1.0, 2.0, 3.0]);
|
|
1192
|
-
|
|
1193
|
-
const rows = db.getLearningsWithoutEmbeddings(100);
|
|
1194
|
-
expect(rows).toHaveLength(0);
|
|
1195
|
-
});
|
|
1196
|
-
|
|
1197
|
-
it("should return empty array when all learnings have embeddings", () => {
|
|
1198
|
-
insertLearningWithRun("all-1", [0.1]);
|
|
1199
|
-
insertLearningWithRun("all-2", [0.2]);
|
|
1200
|
-
|
|
1201
|
-
const rows = db.getLearningsWithoutEmbeddings(100);
|
|
1202
|
-
expect(rows).toHaveLength(0);
|
|
1203
|
-
});
|
|
1204
|
-
|
|
1205
|
-
it("should simulate full backfill loop with batching", async () => {
|
|
1206
|
-
// Insert 5 learnings without embeddings
|
|
1207
|
-
for (let i = 0; i < 5; i++) {
|
|
1208
|
-
insertLearningWithRun(`batch-${i}`, null);
|
|
1209
|
-
}
|
|
1210
|
-
|
|
1211
|
-
const mockBridge: EmbeddingBridge = {
|
|
1212
|
-
embed: async () => [0.1],
|
|
1213
|
-
embedBatch: async (texts) => texts.map((_, idx) => [idx * 0.1]),
|
|
1214
|
-
available: () => true,
|
|
1215
|
-
};
|
|
1216
|
-
|
|
1217
|
-
// Simulate the backfill loop from index.ts
|
|
1218
|
-
const BATCH_SIZE = 2;
|
|
1219
|
-
let totalBackfilled = 0;
|
|
1220
|
-
|
|
1221
|
-
while (true) {
|
|
1222
|
-
const rows = db.getLearningsWithoutEmbeddings(BATCH_SIZE);
|
|
1223
|
-
if (rows.length === 0) break;
|
|
1224
|
-
|
|
1225
|
-
const texts = rows.map((r) => r.content);
|
|
1226
|
-
const embeddings = await mockBridge.embedBatch(texts);
|
|
1227
|
-
|
|
1228
|
-
for (let i = 0; i < rows.length; i++) {
|
|
1229
|
-
const emb = embeddings[i];
|
|
1230
|
-
if (emb) {
|
|
1231
|
-
db.updateLearningEmbedding(rows[i]!.id, emb);
|
|
1232
|
-
totalBackfilled++;
|
|
1233
|
-
}
|
|
1234
|
-
}
|
|
1235
|
-
}
|
|
1236
|
-
|
|
1237
|
-
expect(totalBackfilled).toBe(5);
|
|
1238
|
-
expect(db.getLearningsWithoutEmbeddings(100)).toHaveLength(0);
|
|
1239
|
-
|
|
1240
|
-
// Verify all have embeddings now
|
|
1241
|
-
for (let i = 0; i < 5; i++) {
|
|
1242
|
-
const row = db.getLearning(`batch-${i}`);
|
|
1243
|
-
expect(row!.embedding).not.toBeNull();
|
|
1244
|
-
}
|
|
1245
|
-
});
|
|
1246
|
-
|
|
1247
|
-
it("should handle partial failures in backfill (some null embeddings)", async () => {
|
|
1248
|
-
insertLearningWithRun("partial-0", null);
|
|
1249
|
-
insertLearningWithRun("partial-1", null);
|
|
1250
|
-
insertLearningWithRun("partial-2", null);
|
|
1251
|
-
|
|
1252
|
-
const mockBridge: EmbeddingBridge = {
|
|
1253
|
-
embed: async () => [0.1],
|
|
1254
|
-
embedBatch: async (texts) =>
|
|
1255
|
-
// Return null for the middle one
|
|
1256
|
-
texts.map((_, i) => (i === 1 ? null : [0.1 * i])),
|
|
1257
|
-
available: () => true,
|
|
1258
|
-
};
|
|
1259
|
-
|
|
1260
|
-
const rows = db.getLearningsWithoutEmbeddings(100);
|
|
1261
|
-
const embeddings = await mockBridge.embedBatch(rows.map((r) => r.content));
|
|
1262
|
-
|
|
1263
|
-
let backfilled = 0;
|
|
1264
|
-
for (let i = 0; i < rows.length; i++) {
|
|
1265
|
-
const emb = embeddings[i];
|
|
1266
|
-
if (emb) {
|
|
1267
|
-
db.updateLearningEmbedding(rows[i]!.id, emb);
|
|
1268
|
-
backfilled++;
|
|
1269
|
-
}
|
|
1270
|
-
}
|
|
1271
|
-
|
|
1272
|
-
expect(backfilled).toBe(2);
|
|
1273
|
-
// One learning still has no embedding
|
|
1274
|
-
const remaining = db.getLearningsWithoutEmbeddings(100);
|
|
1275
|
-
expect(remaining).toHaveLength(1);
|
|
1276
|
-
});
|
|
1277
|
-
|
|
1278
|
-
it("should overwrite existing embedding via updateLearningEmbedding", () => {
|
|
1279
|
-
insertLearningWithRun("overwrite", [0.1, 0.2]);
|
|
1280
|
-
|
|
1281
|
-
db.updateLearningEmbedding("overwrite", [0.9, 0.8]);
|
|
1282
|
-
|
|
1283
|
-
const row = db.getLearning("overwrite");
|
|
1284
|
-
expect(JSON.parse(row!.embedding!)).toEqual([0.9, 0.8]);
|
|
1285
|
-
});
|
|
1286
|
-
});
|