visionclaw 0.1.195-beta.0 → 0.1.195-dev.feat-e2e-test-system.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/command-handlers.d.ts.map +1 -1
- package/dist/agent/command-handlers.js +17 -0
- package/dist/agent/command-handlers.js.map +1 -1
- package/dist/builtin-skills/catalog/equity-research/SKILL.md +256 -0
- package/dist/builtin-skills/catalog/financial-modeling/SKILL.md +186 -0
- package/dist/builtin-skills/catalog/investment-banking/SKILL.md +213 -0
- package/dist/builtin-skills/catalog/private-equity/SKILL.md +282 -0
- package/dist/builtin-skills/catalog/wealth-management/SKILL.md +252 -0
- package/dist/channels/interface.d.ts +9 -0
- package/dist/channels/interface.d.ts.map +1 -1
- package/dist/channels/manager.d.ts.map +1 -1
- package/dist/channels/manager.js +3 -0
- package/dist/channels/manager.js.map +1 -1
- package/dist/channels/telegram.d.ts.map +1 -1
- package/dist/channels/telegram.js +7 -1
- package/dist/channels/telegram.js.map +1 -1
- package/dist/config/types.d.ts +6 -0
- package/dist/config/types.d.ts.map +1 -1
- package/dist/config/types.js +10 -0
- package/dist/config/types.js.map +1 -1
- package/dist/e2e/artifacts.d.ts +8 -0
- package/dist/e2e/artifacts.d.ts.map +1 -0
- package/dist/e2e/artifacts.js +35 -0
- package/dist/e2e/artifacts.js.map +1 -0
- package/dist/e2e/cleanup.d.ts +8 -0
- package/dist/e2e/cleanup.d.ts.map +1 -0
- package/dist/e2e/cleanup.js +108 -0
- package/dist/e2e/cleanup.js.map +1 -0
- package/dist/e2e/cli.d.ts +4 -0
- package/dist/e2e/cli.d.ts.map +1 -0
- package/dist/e2e/cli.js +16 -0
- package/dist/e2e/cli.js.map +1 -0
- package/dist/e2e/index.d.ts +5 -0
- package/dist/e2e/index.d.ts.map +1 -0
- package/dist/e2e/index.js +4 -0
- package/dist/e2e/index.js.map +1 -0
- package/dist/e2e/local-test-server.d.ts +7 -0
- package/dist/e2e/local-test-server.d.ts.map +1 -0
- package/dist/e2e/local-test-server.js +75 -0
- package/dist/e2e/local-test-server.js.map +1 -0
- package/dist/e2e/oauth-setup-store.d.ts +28 -0
- package/dist/e2e/oauth-setup-store.d.ts.map +1 -0
- package/dist/e2e/oauth-setup-store.js +56 -0
- package/dist/e2e/oauth-setup-store.js.map +1 -0
- package/dist/e2e/parser.d.ts +4 -0
- package/dist/e2e/parser.d.ts.map +1 -0
- package/dist/e2e/parser.js +52 -0
- package/dist/e2e/parser.js.map +1 -0
- package/dist/e2e/registry.d.ts +3 -0
- package/dist/e2e/registry.d.ts.map +1 -0
- package/dist/e2e/registry.js +44 -0
- package/dist/e2e/registry.js.map +1 -0
- package/dist/e2e/reporter.d.ts +6 -0
- package/dist/e2e/reporter.d.ts.map +1 -0
- package/dist/e2e/reporter.js +56 -0
- package/dist/e2e/reporter.js.map +1 -0
- package/dist/e2e/runner.d.ts +4 -0
- package/dist/e2e/runner.d.ts.map +1 -0
- package/dist/e2e/runner.js +116 -0
- package/dist/e2e/runner.js.map +1 -0
- package/dist/e2e/setup-google-guest.d.ts +19 -0
- package/dist/e2e/setup-google-guest.d.ts.map +1 -0
- package/dist/e2e/setup-google-guest.js +205 -0
- package/dist/e2e/setup-google-guest.js.map +1 -0
- package/dist/e2e/suite-utils.d.ts +19 -0
- package/dist/e2e/suite-utils.d.ts.map +1 -0
- package/dist/e2e/suite-utils.js +60 -0
- package/dist/e2e/suite-utils.js.map +1 -0
- package/dist/e2e/suites/agent.d.ts +3 -0
- package/dist/e2e/suites/agent.d.ts.map +1 -0
- package/dist/e2e/suites/agent.js +33 -0
- package/dist/e2e/suites/agent.js.map +1 -0
- package/dist/e2e/suites/browser.d.ts +3 -0
- package/dist/e2e/suites/browser.d.ts.map +1 -0
- package/dist/e2e/suites/browser.js +58 -0
- package/dist/e2e/suites/browser.js.map +1 -0
- package/dist/e2e/suites/cua.d.ts +3 -0
- package/dist/e2e/suites/cua.d.ts.map +1 -0
- package/dist/e2e/suites/cua.js +68 -0
- package/dist/e2e/suites/cua.js.map +1 -0
- package/dist/e2e/suites/google.d.ts +3 -0
- package/dist/e2e/suites/google.d.ts.map +1 -0
- package/dist/e2e/suites/google.js +145 -0
- package/dist/e2e/suites/google.js.map +1 -0
- package/dist/e2e/suites/memory.d.ts +3 -0
- package/dist/e2e/suites/memory.d.ts.map +1 -0
- package/dist/e2e/suites/memory.js +50 -0
- package/dist/e2e/suites/memory.js.map +1 -0
- package/dist/e2e/suites/obs.d.ts +3 -0
- package/dist/e2e/suites/obs.d.ts.map +1 -0
- package/dist/e2e/suites/obs.js +29 -0
- package/dist/e2e/suites/obs.js.map +1 -0
- package/dist/e2e/suites/self.d.ts +3 -0
- package/dist/e2e/suites/self.d.ts.map +1 -0
- package/dist/e2e/suites/self.js +65 -0
- package/dist/e2e/suites/self.js.map +1 -0
- package/dist/e2e/suites/upgrade.d.ts +3 -0
- package/dist/e2e/suites/upgrade.d.ts.map +1 -0
- package/dist/e2e/suites/upgrade.js +31 -0
- package/dist/e2e/suites/upgrade.js.map +1 -0
- package/dist/e2e/types.d.ts +91 -0
- package/dist/e2e/types.d.ts.map +1 -0
- package/dist/e2e/types.js +2 -0
- package/dist/e2e/types.js.map +1 -0
- package/dist/index.js.map +1 -1
- package/dist/service/daemon.d.ts +1 -0
- package/dist/service/daemon.d.ts.map +1 -1
- package/dist/service/daemon.js +110 -15
- package/dist/service/daemon.js.map +1 -1
- package/dist/tools/upgrade.d.ts +8 -0
- package/dist/tools/upgrade.d.ts.map +1 -1
- package/dist/tools/upgrade.js +64 -8
- package/dist/tools/upgrade.js.map +1 -1
- package/dist-agent/bundle.cjs +32037 -30064
- package/package.json +1 -1
- package/dist/agent/applied-credential-signature.d.ts +0 -53
- package/dist/agent/applied-credential-signature.d.ts.map +0 -1
- package/dist/agent/applied-credential-signature.js +0 -137
- package/dist/agent/applied-credential-signature.js.map +0 -1
- package/dist/agent/engines/claude/cli-resolver.d.ts +0 -16
- package/dist/agent/engines/claude/cli-resolver.d.ts.map +0 -1
- package/dist/agent/engines/claude/cli-resolver.js +0 -83
- package/dist/agent/engines/claude/cli-resolver.js.map +0 -1
- package/dist/agent/engines/claude/session-browser-policy.d.ts +0 -9
- package/dist/agent/engines/claude/session-browser-policy.d.ts.map +0 -1
- package/dist/agent/engines/claude/session-browser-policy.js +0 -49
- package/dist/agent/engines/claude/session-browser-policy.js.map +0 -1
- package/dist/agent/engines/claude/session.d.ts +0 -304
- package/dist/agent/engines/claude/session.d.ts.map +0 -1
- package/dist/agent/engines/claude/session.js +0 -1233
- package/dist/agent/engines/claude/session.js.map +0 -1
- package/dist/agent/engines/client-factory.d.ts +0 -63
- package/dist/agent/engines/client-factory.d.ts.map +0 -1
- package/dist/agent/engines/client-factory.js +0 -382
- package/dist/agent/engines/client-factory.js.map +0 -1
- package/dist/agent/engines/engine-factory.d.ts +0 -5
- package/dist/agent/engines/engine-factory.d.ts.map +0 -1
- package/dist/agent/engines/engine-factory.js +0 -7
- package/dist/agent/engines/engine-factory.js.map +0 -1
- package/dist/agent/engines/engine.d.ts +0 -8
- package/dist/agent/engines/engine.d.ts.map +0 -1
- package/dist/agent/engines/engine.js +0 -15
- package/dist/agent/engines/engine.js.map +0 -1
- package/dist/agent/engines/openai/file-session.d.ts +0 -19
- package/dist/agent/engines/openai/file-session.d.ts.map +0 -1
- package/dist/agent/engines/openai/file-session.js +0 -78
- package/dist/agent/engines/openai/file-session.js.map +0 -1
- package/dist/agent/engines/openai/file-tools.d.ts +0 -35
- package/dist/agent/engines/openai/file-tools.d.ts.map +0 -1
- package/dist/agent/engines/openai/file-tools.js +0 -194
- package/dist/agent/engines/openai/file-tools.js.map +0 -1
- package/dist/agent/engines/openai/session.d.ts +0 -55
- package/dist/agent/engines/openai/session.d.ts.map +0 -1
- package/dist/agent/engines/openai/session.js +0 -447
- package/dist/agent/engines/openai/session.js.map +0 -1
- package/dist/agent/engines/openai/tools.d.ts +0 -15
- package/dist/agent/engines/openai/tools.d.ts.map +0 -1
- package/dist/agent/engines/openai/tools.js +0 -221
- package/dist/agent/engines/openai/tools.js.map +0 -1
- package/dist/agent/engines/pi/session.d.ts +0 -54
- package/dist/agent/engines/pi/session.d.ts.map +0 -1
- package/dist/agent/engines/pi/session.js +0 -397
- package/dist/agent/engines/pi/session.js.map +0 -1
- package/dist/agent/engines/pi/tools.d.ts +0 -19
- package/dist/agent/engines/pi/tools.d.ts.map +0 -1
- package/dist/agent/engines/pi/tools.js +0 -127
- package/dist/agent/engines/pi/tools.js.map +0 -1
- package/dist/agent/engines/session-types.d.ts +0 -153
- package/dist/agent/engines/session-types.d.ts.map +0 -1
- package/dist/agent/engines/session-types.js +0 -2
- package/dist/agent/engines/session-types.js.map +0 -1
- package/dist/agent/engines/system-prompt-log.d.ts +0 -9
- package/dist/agent/engines/system-prompt-log.d.ts.map +0 -1
- package/dist/agent/engines/system-prompt-log.js +0 -46
- package/dist/agent/engines/system-prompt-log.js.map +0 -1
- package/dist/agent/model-provider.d.ts +0 -103
- package/dist/agent/model-provider.d.ts.map +0 -1
- package/dist/agent/model-provider.js +0 -540
- package/dist/agent/model-provider.js.map +0 -1
- package/dist/agent/transcript/transcript-backfill.d.ts +0 -54
- package/dist/agent/transcript/transcript-backfill.d.ts.map +0 -1
- package/dist/agent/transcript/transcript-backfill.js +0 -604
- package/dist/agent/transcript/transcript-backfill.js.map +0 -1
- package/dist/agent/transcript/transcript-indexer.d.ts +0 -273
- package/dist/agent/transcript/transcript-indexer.d.ts.map +0 -1
- package/dist/agent/transcript/transcript-indexer.js +0 -1217
- package/dist/agent/transcript/transcript-indexer.js.map +0 -1
- package/dist/agent/transcript/transcript-memory-migrations.d.ts +0 -25
- package/dist/agent/transcript/transcript-memory-migrations.d.ts.map +0 -1
- package/dist/agent/transcript/transcript-memory-migrations.js +0 -87
- package/dist/agent/transcript/transcript-memory-migrations.js.map +0 -1
- package/dist/agent/transcript-memory-migrations.d.ts +0 -25
- package/dist/agent/transcript-memory-migrations.d.ts.map +0 -1
- package/dist/agent/transcript-memory-migrations.js +0 -87
- package/dist/agent/transcript-memory-migrations.js.map +0 -1
- package/dist/agent/tunnel-credential-handler.d.ts +0 -90
- package/dist/agent/tunnel-credential-handler.d.ts.map +0 -1
- package/dist/agent/tunnel-credential-handler.js +0 -162
- package/dist/agent/tunnel-credential-handler.js.map +0 -1
- package/dist/agent/usage/usage-backfill-handler.d.ts +0 -18
- package/dist/agent/usage/usage-backfill-handler.d.ts.map +0 -1
- package/dist/agent/usage/usage-backfill-handler.js +0 -69
- package/dist/agent/usage/usage-backfill-handler.js.map +0 -1
- package/dist/agent/usage/usage-gate.d.ts +0 -25
- package/dist/agent/usage/usage-gate.d.ts.map +0 -1
- package/dist/agent/usage/usage-gate.js +0 -83
- package/dist/agent/usage/usage-gate.js.map +0 -1
- package/dist/agent/usage/usage-handler.d.ts +0 -7
- package/dist/agent/usage/usage-handler.d.ts.map +0 -1
- package/dist/agent/usage/usage-handler.js +0 -28
- package/dist/agent/usage/usage-handler.js.map +0 -1
- package/dist/agent/usage/usage-report-builder.d.ts +0 -26
- package/dist/agent/usage/usage-report-builder.d.ts.map +0 -1
- package/dist/agent/usage/usage-report-builder.js +0 -80
- package/dist/agent/usage/usage-report-builder.js.map +0 -1
- package/dist/agent/usage/usage-report-queue.d.ts +0 -26
- package/dist/agent/usage/usage-report-queue.d.ts.map +0 -1
- package/dist/agent/usage/usage-report-queue.js +0 -199
- package/dist/agent/usage/usage-report-queue.js.map +0 -1
- package/dist/agent/usage/usage-report-types.d.ts +0 -41
- package/dist/agent/usage/usage-report-types.d.ts.map +0 -1
- package/dist/agent/usage/usage-report-types.js +0 -2
- package/dist/agent/usage/usage-report-types.js.map +0 -1
- package/dist/agent/usage/usage-reporter.d.ts +0 -31
- package/dist/agent/usage/usage-reporter.d.ts.map +0 -1
- package/dist/agent/usage/usage-reporter.js +0 -102
- package/dist/agent/usage/usage-reporter.js.map +0 -1
- package/dist/agent/usage-backfill-handler.d.ts +0 -18
- package/dist/agent/usage-backfill-handler.d.ts.map +0 -1
- package/dist/agent/usage-backfill-handler.js +0 -69
- package/dist/agent/usage-backfill-handler.js.map +0 -1
- package/dist/agent/usage-gate.d.ts +0 -25
- package/dist/agent/usage-gate.d.ts.map +0 -1
- package/dist/agent/usage-gate.js +0 -83
- package/dist/agent/usage-gate.js.map +0 -1
- package/dist/agent/usage-report-builder.d.ts +0 -26
- package/dist/agent/usage-report-builder.d.ts.map +0 -1
- package/dist/agent/usage-report-builder.js +0 -80
- package/dist/agent/usage-report-builder.js.map +0 -1
- package/dist/agent/usage-report-queue.d.ts +0 -26
- package/dist/agent/usage-report-queue.d.ts.map +0 -1
- package/dist/agent/usage-report-queue.js +0 -199
- package/dist/agent/usage-report-queue.js.map +0 -1
- package/dist/agent/usage-report-types.d.ts +0 -41
- package/dist/agent/usage-report-types.d.ts.map +0 -1
- package/dist/agent/usage-report-types.js +0 -2
- package/dist/agent/usage-report-types.js.map +0 -1
- package/dist/agent/usage-reporter.d.ts +0 -31
- package/dist/agent/usage-reporter.d.ts.map +0 -1
- package/dist/agent/usage-reporter.js +0 -102
- package/dist/agent/usage-reporter.js.map +0 -1
- package/dist/agent/wake-cycle-tool-tracker.d.ts +0 -39
- package/dist/agent/wake-cycle-tool-tracker.d.ts.map +0 -1
- package/dist/agent/wake-cycle-tool-tracker.js +0 -72
- package/dist/agent/wake-cycle-tool-tracker.js.map +0 -1
- package/dist/billing/payg-handler.d.ts +0 -29
- package/dist/billing/payg-handler.d.ts.map +0 -1
- package/dist/billing/payg-handler.js +0 -92
- package/dist/billing/payg-handler.js.map +0 -1
- package/dist/billing/payment-handler.d.ts +0 -24
- package/dist/billing/payment-handler.d.ts.map +0 -1
- package/dist/billing/payment-handler.js +0 -101
- package/dist/billing/payment-handler.js.map +0 -1
- package/dist/builtin-skills/catalog/phone-adb-automation/SKILL.md +0 -412
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_input.sh +0 -132
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_launch.sh +0 -166
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_screenshot.sh +0 -87
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_security_kbd.py +0 -174
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_setup.sh +0 -274
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_swipe.sh +0 -111
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_tap.sh +0 -87
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_ui_parse.py +0 -176
- package/dist/builtin-skills/catalog/phone-adb-automation/phone_wake_unlock.sh +0 -67
- package/dist/builtin-skills/transcribe-audio/SKILL.md +0 -122
- package/dist/data-processing/convert-demo-cli.d.ts +0 -7
- package/dist/data-processing/convert-demo-cli.d.ts.map +0 -1
- package/dist/data-processing/convert-demo-cli.js +0 -30
- package/dist/data-processing/convert-demo-cli.js.map +0 -1
- package/dist/data-processing/convert-demo.d.ts +0 -26
- package/dist/data-processing/convert-demo.d.ts.map +0 -1
- package/dist/data-processing/convert-demo.js +0 -233
- package/dist/data-processing/convert-demo.js.map +0 -1
- package/dist/obs/rdp/icons/icons/app_windows.svg +0 -4
- package/dist/obs/rdp/icons/icons/clip_get.svg +0 -4
- package/dist/obs/rdp/icons/icons/clip_send.svg +0 -4
- package/dist/obs/rdp/icons/icons/clip_shared.svg +0 -4
- package/dist/obs/rdp/icons/icons/clipboard.svg +0 -4
- package/dist/obs/rdp/icons/icons/clipboard_shared.svg +0 -4
- package/dist/obs/rdp/icons/icons/control.svg +0 -4
- package/dist/obs/rdp/icons/icons/desktop.svg +0 -4
- package/dist/obs/rdp/icons/icons/display.svg +0 -4
- package/dist/obs/rdp/icons/icons/launchpad.svg +0 -4
- package/dist/obs/rdp/icons/icons/mission_control.svg +0 -4
- package/dist/obs/rdp/icons/icons/screenshot.svg +0 -4
- package/dist/obs/rdp/icons/icons/zoom_actual.svg +0 -4
- package/dist/obs/rdp/icons/icons/zoom_fit.svg +0 -4
- package/dist/obs/rdp/icons/icons/zoom_in.svg +0 -4
- package/dist/obs/rdp/icons/icons/zoom_out.svg +0 -4
- package/dist/obs/tunnel-telemetry.d.ts +0 -46
- package/dist/obs/tunnel-telemetry.d.ts.map +0 -1
- package/dist/obs/tunnel-telemetry.js +0 -70
- package/dist/obs/tunnel-telemetry.js.map +0 -1
- package/dist/onboarding/cloudflared-cert.d.ts +0 -15
- package/dist/onboarding/cloudflared-cert.d.ts.map +0 -1
- package/dist/onboarding/cloudflared-cert.js +0 -57
- package/dist/onboarding/cloudflared-cert.js.map +0 -1
- package/dist/onboarding/playwriter-extension.d.ts +0 -19
- package/dist/onboarding/playwriter-extension.d.ts.map +0 -1
- package/dist/onboarding/playwriter-extension.js +0 -246
- package/dist/onboarding/playwriter-extension.js.map +0 -1
- package/dist/realtime/websocket.d.ts +0 -7
- package/dist/realtime/websocket.d.ts.map +0 -1
- package/dist/realtime/websocket.js +0 -65
- package/dist/realtime/websocket.js.map +0 -1
- package/dist/service/gbox-tun.d.ts +0 -14
- package/dist/service/gbox-tun.d.ts.map +0 -1
- package/dist/service/gbox-tun.js +0 -315
- package/dist/service/gbox-tun.js.map +0 -1
- package/dist/skills/installed.d.ts +0 -11
- package/dist/skills/installed.d.ts.map +0 -1
- package/dist/skills/installed.js +0 -35
- package/dist/skills/installed.js.map +0 -1
- package/dist/tools/coordinate-resolver.d.ts +0 -30
- package/dist/tools/coordinate-resolver.d.ts.map +0 -1
- package/dist/tools/coordinate-resolver.js +0 -104
- package/dist/tools/coordinate-resolver.js.map +0 -1
- package/dist/utils/playwriter-relay.d.ts +0 -9
- package/dist/utils/playwriter-relay.d.ts.map +0 -1
- package/dist/utils/playwriter-relay.js +0 -77
- package/dist/utils/playwriter-relay.js.map +0 -1
- package/dist/utils/wechat-monitor.d.ts +0 -21
- package/dist/utils/wechat-monitor.d.ts.map +0 -1
- package/dist/utils/wechat-monitor.js +0 -88
- package/dist/utils/wechat-monitor.js.map +0 -1
- package/dist-agent/realtime/assets/index.html +0 -1058
- package/dist-agent/realtime/assets/samples/alloy.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/ash.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/ballad.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/cedar.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/coral.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/echo.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/marin.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/sage.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/shimmer.mp3 +0 -0
- package/dist-agent/realtime/assets/samples/verse.mp3 +0 -0
|
@@ -1,1217 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Transcript-based structured memory: indexes wake cycle summaries into
|
|
3
|
-
* a SQLite database with FTS5 full-text search and sqlite-vec vector
|
|
4
|
-
* similarity search. Each wake cycle is summarized by an LLM after
|
|
5
|
-
* completion and stored for later retrieval.
|
|
6
|
-
*
|
|
7
|
-
* IMPORTANT — file terminology:
|
|
8
|
-
* - "source file" (DB column `source_file`): the JSONL file that contains
|
|
9
|
-
* the raw data for a wake cycle. This is EITHER:
|
|
10
|
-
* • A **Claude SDK transcript** (`~/.claude/projects/<slug>/<sessionId>.jsonl`)
|
|
11
|
-
* for live-indexed cycles, OR
|
|
12
|
-
* • A **VisionClaw runtime log** (`~/.visionclaw/profiles/<profile>/logs/YYYY-MM-DD.jsonl`)
|
|
13
|
-
* for backfill-indexed cycles.
|
|
14
|
-
* These two formats are different:
|
|
15
|
-
* - Claude transcripts: `{type:"user"|"assistant", message:{...}}`
|
|
16
|
-
* - VisionClaw logs: `{timestamp, level, category, message, data}`
|
|
17
|
-
*
|
|
18
|
-
* This module handles:
|
|
19
|
-
* - SQLite schema creation (wake_cycles + FTS5 + vec0 virtual tables)
|
|
20
|
-
* - Reading source file lines (both Claude transcripts and VisionClaw logs)
|
|
21
|
-
* - Generating summaries via Gemini Flash
|
|
22
|
-
* - Generating embeddings via Gemini Embedding
|
|
23
|
-
* - Inserting indexed records
|
|
24
|
-
* - Keyword (FTS5), semantic (vec0 KNN), and hybrid search
|
|
25
|
-
* - Retrieval of key excerpts and original source file lines
|
|
26
|
-
*/
|
|
27
|
-
import { DatabaseSync } from "node:sqlite";
|
|
28
|
-
import fs from "node:fs";
|
|
29
|
-
import readline from "node:readline";
|
|
30
|
-
import { createGunzip } from "node:zlib";
|
|
31
|
-
import { pipeline } from "node:stream/promises";
|
|
32
|
-
import { PassThrough } from "node:stream";
|
|
33
|
-
import path from "node:path";
|
|
34
|
-
import { GoogleGenAI } from "@google/genai";
|
|
35
|
-
import * as sqliteVec from "sqlite-vec";
|
|
36
|
-
import { getConfigDir } from "../../config/index.js";
|
|
37
|
-
import { resolveGeminiApiKey, GEMINI_SUMMARY_MODEL, GEMINI_EMBEDDING_MODEL, } from "../../gemini-credentials.js";
|
|
38
|
-
import { logger } from "../../logger.js";
|
|
39
|
-
import { runTranscriptMemoryMigrations } from "./transcript-memory-migrations.js";
|
|
40
|
-
import { deriveArchivePath } from "../session-trimmer.js";
|
|
41
|
-
function loadSqliteVec(db) {
|
|
42
|
-
const moduleValue = sqliteVec;
|
|
43
|
-
if (typeof moduleValue !== "object" ||
|
|
44
|
-
moduleValue === null ||
|
|
45
|
-
typeof moduleValue.load !== "function") {
|
|
46
|
-
throw new Error("sqlite-vec module did not expose a callable load() function");
|
|
47
|
-
}
|
|
48
|
-
moduleValue.load(db);
|
|
49
|
-
}
|
|
50
|
-
// ---------------------------------------------------------------------------
|
|
51
|
-
// DB singleton
|
|
52
|
-
// ---------------------------------------------------------------------------
|
|
53
|
-
let db = null;
|
|
54
|
-
let stmtInsert = null;
|
|
55
|
-
let stmtInsertVec = null;
|
|
56
|
-
let stmtSearchKeyword = null;
|
|
57
|
-
let stmtSearchSemantic = null;
|
|
58
|
-
let stmtGetById = null;
|
|
59
|
-
let vecEnabled = false;
|
|
60
|
-
/** Embedding dimension for Gemini Embedding model */
|
|
61
|
-
const EMBEDDING_DIM = 768;
|
|
62
|
-
const EMBEDDING_BYTE_LENGTH = EMBEDDING_DIM * Float32Array.BYTES_PER_ELEMENT;
|
|
63
|
-
function requireStatement(stmt, name) {
|
|
64
|
-
if (!stmt) {
|
|
65
|
-
throw new Error(`Transcript memory statement not initialized: ${name}`);
|
|
66
|
-
}
|
|
67
|
-
return stmt;
|
|
68
|
-
}
|
|
69
|
-
function getDb() {
|
|
70
|
-
if (db)
|
|
71
|
-
return db;
|
|
72
|
-
const dbPath = path.join(getConfigDir(), "transcript-memory.db");
|
|
73
|
-
db = new DatabaseSync(dbPath, { allowExtension: true });
|
|
74
|
-
db.exec("PRAGMA journal_mode = WAL");
|
|
75
|
-
// Load sqlite-vec extension (best-effort — fall back to keyword-only)
|
|
76
|
-
try {
|
|
77
|
-
loadSqliteVec(db);
|
|
78
|
-
vecEnabled = true;
|
|
79
|
-
}
|
|
80
|
-
catch (err) {
|
|
81
|
-
logger.warn(`[transcript-indexer] sqlite-vec extension not loaded, semantic search disabled: ${err instanceof Error ? err.message : String(err)}`);
|
|
82
|
-
vecEnabled = false;
|
|
83
|
-
}
|
|
84
|
-
// Main table
|
|
85
|
-
db.exec(`
|
|
86
|
-
CREATE TABLE IF NOT EXISTS wake_cycles (
|
|
87
|
-
wake_cycle_id TEXT PRIMARY KEY,
|
|
88
|
-
session_type TEXT NOT NULL,
|
|
89
|
-
timestamp_start TEXT NOT NULL,
|
|
90
|
-
timestamp_end TEXT NOT NULL,
|
|
91
|
-
summary TEXT NOT NULL,
|
|
92
|
-
key_excerpts TEXT,
|
|
93
|
-
source_file TEXT NOT NULL,
|
|
94
|
-
start_line INTEGER NOT NULL,
|
|
95
|
-
end_line INTEGER NOT NULL,
|
|
96
|
-
embedding BLOB
|
|
97
|
-
)
|
|
98
|
-
`);
|
|
99
|
-
// Migration: rename old column `transcript_file` → `source_file`
|
|
100
|
-
// SQLite doesn't support RENAME COLUMN in all versions, but >=3.25.0 does.
|
|
101
|
-
// If the old column exists, rename it.
|
|
102
|
-
try {
|
|
103
|
-
const cols = db.prepare("PRAGMA table_info(wake_cycles)").all();
|
|
104
|
-
const hasOldCol = cols.some((c) => c.name === "transcript_file");
|
|
105
|
-
if (hasOldCol) {
|
|
106
|
-
db.exec("ALTER TABLE wake_cycles RENAME COLUMN transcript_file TO source_file");
|
|
107
|
-
logger.system("[transcript-indexer] Migrated column transcript_file → source_file");
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
catch {
|
|
111
|
-
// best-effort — old DBs without the column or already migrated
|
|
112
|
-
}
|
|
113
|
-
// Run explicit SQL migrations (adds usage-reporting columns, etc.)
|
|
114
|
-
runTranscriptMemoryMigrations(db);
|
|
115
|
-
// FTS5 virtual table for keyword search.
|
|
116
|
-
// Using content= external-content mode with the main table.
|
|
117
|
-
db.exec(`
|
|
118
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS wake_cycles_fts USING fts5(
|
|
119
|
-
wake_cycle_id,
|
|
120
|
-
summary,
|
|
121
|
-
content=wake_cycles,
|
|
122
|
-
content_rowid=rowid
|
|
123
|
-
)
|
|
124
|
-
`);
|
|
125
|
-
// Triggers to keep FTS5 in sync with the main table
|
|
126
|
-
db.exec(`
|
|
127
|
-
CREATE TRIGGER IF NOT EXISTS wake_cycles_ai AFTER INSERT ON wake_cycles BEGIN
|
|
128
|
-
INSERT INTO wake_cycles_fts(rowid, wake_cycle_id, summary)
|
|
129
|
-
VALUES (new.rowid, new.wake_cycle_id, new.summary);
|
|
130
|
-
END
|
|
131
|
-
`);
|
|
132
|
-
db.exec(`
|
|
133
|
-
CREATE TRIGGER IF NOT EXISTS wake_cycles_ad AFTER DELETE ON wake_cycles BEGIN
|
|
134
|
-
INSERT INTO wake_cycles_fts(wake_cycles_fts, rowid, wake_cycle_id, summary)
|
|
135
|
-
VALUES ('delete', old.rowid, old.wake_cycle_id, old.summary);
|
|
136
|
-
END
|
|
137
|
-
`);
|
|
138
|
-
db.exec(`
|
|
139
|
-
CREATE TRIGGER IF NOT EXISTS wake_cycles_au AFTER UPDATE ON wake_cycles BEGIN
|
|
140
|
-
INSERT INTO wake_cycles_fts(wake_cycles_fts, rowid, wake_cycle_id, summary)
|
|
141
|
-
VALUES ('delete', old.rowid, old.wake_cycle_id, old.summary);
|
|
142
|
-
INSERT INTO wake_cycles_fts(rowid, wake_cycle_id, summary)
|
|
143
|
-
VALUES (new.rowid, new.wake_cycle_id, new.summary);
|
|
144
|
-
END
|
|
145
|
-
`);
|
|
146
|
-
// vec0 virtual table for vector similarity search (requires sqlite-vec)
|
|
147
|
-
if (vecEnabled) {
|
|
148
|
-
db.exec(`
|
|
149
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS wake_cycles_vec USING vec0(
|
|
150
|
-
wake_cycle_id TEXT PRIMARY KEY,
|
|
151
|
-
embedding float[${EMBEDDING_DIM}]
|
|
152
|
-
)
|
|
153
|
-
`);
|
|
154
|
-
stmtInsertVec = db.prepare(`
|
|
155
|
-
INSERT OR REPLACE INTO wake_cycles_vec (wake_cycle_id, embedding)
|
|
156
|
-
VALUES (?, ?)
|
|
157
|
-
`);
|
|
158
|
-
stmtSearchSemantic = db.prepare(`
|
|
159
|
-
SELECT
|
|
160
|
-
v.wake_cycle_id,
|
|
161
|
-
v.distance
|
|
162
|
-
FROM wake_cycles_vec v
|
|
163
|
-
WHERE v.embedding MATCH ?
|
|
164
|
-
AND k = ?
|
|
165
|
-
ORDER BY v.distance
|
|
166
|
-
`);
|
|
167
|
-
// One-time migration: sync existing embeddings from wake_cycles to vec0.
|
|
168
|
-
// Records inserted before sqlite-vec was available have embeddings in the
|
|
169
|
-
// main table but not in the vec0 table. This backfills them.
|
|
170
|
-
try {
|
|
171
|
-
const missingVec = db.prepare(`
|
|
172
|
-
SELECT wc.wake_cycle_id, wc.embedding
|
|
173
|
-
FROM wake_cycles wc
|
|
174
|
-
LEFT JOIN wake_cycles_vec v ON v.wake_cycle_id = wc.wake_cycle_id
|
|
175
|
-
WHERE wc.embedding IS NOT NULL AND v.wake_cycle_id IS NULL
|
|
176
|
-
`).all();
|
|
177
|
-
if (missingVec.length > 0) {
|
|
178
|
-
let skippedIncompatible = 0;
|
|
179
|
-
for (const row of missingVec) {
|
|
180
|
-
if (row.embedding.byteLength !== EMBEDDING_BYTE_LENGTH) {
|
|
181
|
-
skippedIncompatible += 1;
|
|
182
|
-
continue;
|
|
183
|
-
}
|
|
184
|
-
stmtInsertVec.run(row.wake_cycle_id, new Uint8Array(row.embedding.buffer, row.embedding.byteOffset, row.embedding.byteLength));
|
|
185
|
-
}
|
|
186
|
-
const syncedCount = missingVec.length - skippedIncompatible;
|
|
187
|
-
if (syncedCount > 0) {
|
|
188
|
-
logger.system(`[transcript-indexer] Synced ${syncedCount} embedding(s) to vec0 table`);
|
|
189
|
-
}
|
|
190
|
-
if (skippedIncompatible > 0) {
|
|
191
|
-
logger.warn(`[transcript-indexer] Skipped ${skippedIncompatible} incompatible embedding(s) during vec0 sync; they need re-embedding`);
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
}
|
|
195
|
-
catch (syncErr) {
|
|
196
|
-
logger.warn(`[transcript-indexer] vec0 sync failed (non-fatal): ${syncErr instanceof Error ? syncErr.message : String(syncErr)}`);
|
|
197
|
-
}
|
|
198
|
-
}
|
|
199
|
-
// Prepared statements
|
|
200
|
-
stmtInsert = db.prepare(`
|
|
201
|
-
INSERT OR REPLACE INTO wake_cycles
|
|
202
|
-
(wake_cycle_id, session_type, timestamp_start, timestamp_end,
|
|
203
|
-
summary, key_excerpts, source_file, start_line, end_line, embedding,
|
|
204
|
-
tool_calls_json, trigger_type, trigger_channel, outcome,
|
|
205
|
-
tool_call_count, tool_calls_truncated, reported_tool_call_count,
|
|
206
|
-
usage_reported)
|
|
207
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
208
|
-
`);
|
|
209
|
-
stmtSearchKeyword = db.prepare(`
|
|
210
|
-
SELECT
|
|
211
|
-
wc.wake_cycle_id,
|
|
212
|
-
wc.session_type,
|
|
213
|
-
wc.timestamp_start,
|
|
214
|
-
wc.timestamp_end,
|
|
215
|
-
wc.summary,
|
|
216
|
-
bm25(wake_cycles_fts) AS relevance_score
|
|
217
|
-
FROM wake_cycles_fts fts
|
|
218
|
-
JOIN wake_cycles wc ON wc.rowid = fts.rowid
|
|
219
|
-
WHERE wake_cycles_fts MATCH ?
|
|
220
|
-
ORDER BY bm25(wake_cycles_fts)
|
|
221
|
-
LIMIT ?
|
|
222
|
-
`);
|
|
223
|
-
stmtGetById = db.prepare(`SELECT * FROM wake_cycles WHERE wake_cycle_id = ?`);
|
|
224
|
-
return db;
|
|
225
|
-
}
|
|
226
|
-
// ---------------------------------------------------------------------------
|
|
227
|
-
// Source file reading (Claude transcripts or VisionClaw logs)
|
|
228
|
-
// ---------------------------------------------------------------------------
|
|
229
|
-
/**
|
|
230
|
-
* Count lines in a file asynchronously via streaming.
|
|
231
|
-
* Returns 0 if the file does not exist.
|
|
232
|
-
*/
|
|
233
|
-
export async function countFileLines(filePath) {
|
|
234
|
-
if (!fs.existsSync(filePath))
|
|
235
|
-
return 0;
|
|
236
|
-
const inputStream = fs.createReadStream(filePath, { encoding: "utf-8" });
|
|
237
|
-
const rl = readline.createInterface({
|
|
238
|
-
input: inputStream,
|
|
239
|
-
crlfDelay: Infinity,
|
|
240
|
-
});
|
|
241
|
-
let count = 0;
|
|
242
|
-
for await (const _line of rl) {
|
|
243
|
-
count++;
|
|
244
|
-
}
|
|
245
|
-
return count;
|
|
246
|
-
}
|
|
247
|
-
/**
|
|
248
|
-
* Read specific line range from a JSONL source file (Claude transcript or
|
|
249
|
-
* VisionClaw log). Lines are 1-indexed; range is [startLine, endLine)
|
|
250
|
-
* (exclusive end). Handles both plain .jsonl and .jsonl.gz files via streaming.
|
|
251
|
-
*
|
|
252
|
-
* @param maxLines — optional cap on lines returned. When the range contains
|
|
253
|
-
* more lines than this limit, the function samples lines evenly across the
|
|
254
|
-
* range so the start, middle and end of the cycle are all represented.
|
|
255
|
-
*/
|
|
256
|
-
export async function readSourceFileLines(filePath, startLine, endLine, maxLines) {
|
|
257
|
-
// Try the exact path first, then .gz variant
|
|
258
|
-
let actualPath = filePath;
|
|
259
|
-
let isGz = false;
|
|
260
|
-
if (!fs.existsSync(actualPath)) {
|
|
261
|
-
const gzPath = `${actualPath}.gz`;
|
|
262
|
-
if (fs.existsSync(gzPath)) {
|
|
263
|
-
actualPath = gzPath;
|
|
264
|
-
isGz = true;
|
|
265
|
-
}
|
|
266
|
-
else {
|
|
267
|
-
return [];
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
if (actualPath.endsWith(".gz")) {
|
|
271
|
-
isGz = true;
|
|
272
|
-
}
|
|
273
|
-
const lines = [];
|
|
274
|
-
let lineNum = 1;
|
|
275
|
-
let inputStream;
|
|
276
|
-
if (isGz) {
|
|
277
|
-
const fileStream = fs.createReadStream(actualPath);
|
|
278
|
-
const gunzip = createGunzip();
|
|
279
|
-
const passthrough = new PassThrough();
|
|
280
|
-
void pipeline(fileStream, gunzip, passthrough).catch(() => {
|
|
281
|
-
// Stream errors surface when readline tries to read
|
|
282
|
-
});
|
|
283
|
-
inputStream = passthrough;
|
|
284
|
-
}
|
|
285
|
-
else {
|
|
286
|
-
inputStream = fs.createReadStream(actualPath, { encoding: "utf-8" });
|
|
287
|
-
}
|
|
288
|
-
const rl = readline.createInterface({
|
|
289
|
-
input: inputStream,
|
|
290
|
-
crlfDelay: Infinity,
|
|
291
|
-
});
|
|
292
|
-
for await (const line of rl) {
|
|
293
|
-
if (lineNum >= startLine && lineNum < endLine) {
|
|
294
|
-
lines.push(line);
|
|
295
|
-
}
|
|
296
|
-
if (lineNum >= endLine) {
|
|
297
|
-
rl.close();
|
|
298
|
-
break;
|
|
299
|
-
}
|
|
300
|
-
lineNum++;
|
|
301
|
-
}
|
|
302
|
-
// If maxLines is set and we have more lines than the limit, sample evenly.
|
|
303
|
-
// This keeps the start, middle, and end of the cycle represented so the
|
|
304
|
-
// summary captures the full arc of the wake cycle.
|
|
305
|
-
if (maxLines && maxLines > 0 && lines.length > maxLines) {
|
|
306
|
-
const sampled = [];
|
|
307
|
-
const step = lines.length / maxLines;
|
|
308
|
-
for (let i = 0; i < maxLines; i++) {
|
|
309
|
-
sampled.push(lines[Math.floor(i * step)]);
|
|
310
|
-
}
|
|
311
|
-
return sampled;
|
|
312
|
-
}
|
|
313
|
-
return lines;
|
|
314
|
-
}
|
|
315
|
-
/**
|
|
316
|
-
* Read transcript lines for a wake cycle, searching across live and archived
|
|
317
|
-
* transcript files. Used for backfill reconstruction when the original
|
|
318
|
-
* source_file may have been archived due to transcript rollover.
|
|
319
|
-
*
|
|
320
|
-
* Search order:
|
|
321
|
-
* 1. The original source_file path (live transcript)
|
|
322
|
-
* 2. Date-stamped archive for the wake cycle's date
|
|
323
|
-
* 3. Adjacent dates (±1 day) for timezone/boundary cases
|
|
324
|
-
*
|
|
325
|
-
* @param sourceFile - Original source file path from the wake cycle record.
|
|
326
|
-
* @param startLine - 1-indexed start line.
|
|
327
|
-
* @param endLine - 1-indexed exclusive end line.
|
|
328
|
-
* @param wakeCycleTimestamp - ISO timestamp of the wake cycle's start, used
|
|
329
|
-
* to derive which date-stamped archive to search.
|
|
330
|
-
*/
|
|
331
|
-
export async function readWakeCycleTranscriptLines(sourceFile, startLine, endLine, wakeCycleTimestamp) {
|
|
332
|
-
// 1. Try the original source_file path (live transcript)
|
|
333
|
-
const lines = await readSourceFileLines(sourceFile, startLine, endLine);
|
|
334
|
-
if (lines.length > 0)
|
|
335
|
-
return lines;
|
|
336
|
-
// 2. Try date-stamped archive for the wake cycle's date
|
|
337
|
-
const cycleDate = new Date(wakeCycleTimestamp);
|
|
338
|
-
if (Number.isNaN(cycleDate.getTime()))
|
|
339
|
-
return [];
|
|
340
|
-
const formatDate = (d) => {
|
|
341
|
-
const year = d.getFullYear();
|
|
342
|
-
const month = String(d.getMonth() + 1).padStart(2, "0");
|
|
343
|
-
const day = String(d.getDate()).padStart(2, "0");
|
|
344
|
-
return `${year}-${month}-${day}`;
|
|
345
|
-
};
|
|
346
|
-
const archivePath = deriveArchivePath(sourceFile, formatDate(cycleDate));
|
|
347
|
-
const archiveLines = await readSourceFileLines(archivePath, startLine, endLine);
|
|
348
|
-
if (archiveLines.length > 0)
|
|
349
|
-
return archiveLines;
|
|
350
|
-
// 3. Try adjacent dates (±1 day) for timezone/boundary cases
|
|
351
|
-
const prevDate = new Date(cycleDate);
|
|
352
|
-
prevDate.setDate(prevDate.getDate() - 1);
|
|
353
|
-
const prevArchive = deriveArchivePath(sourceFile, formatDate(prevDate));
|
|
354
|
-
const prevLines = await readSourceFileLines(prevArchive, startLine, endLine);
|
|
355
|
-
if (prevLines.length > 0)
|
|
356
|
-
return prevLines;
|
|
357
|
-
const nextDate = new Date(cycleDate);
|
|
358
|
-
nextDate.setDate(nextDate.getDate() + 1);
|
|
359
|
-
const nextArchive = deriveArchivePath(sourceFile, formatDate(nextDate));
|
|
360
|
-
const nextLines = await readSourceFileLines(nextArchive, startLine, endLine);
|
|
361
|
-
if (nextLines.length > 0)
|
|
362
|
-
return nextLines;
|
|
363
|
-
// 4. Not found in any expected location
|
|
364
|
-
return [];
|
|
365
|
-
}
|
|
366
|
-
/** Maximum tool calls to include in a single usage report. */
|
|
367
|
-
const MAX_TOOL_CALLS_PER_CYCLE = 500;
|
|
368
|
-
function isTimestampWithinCycle(timestamp, context) {
|
|
369
|
-
const timestampMs = Date.parse(timestamp);
|
|
370
|
-
const startMs = Date.parse(context.cycleStartedAt);
|
|
371
|
-
const endMs = Date.parse(context.cycleEndedAt);
|
|
372
|
-
if (Number.isNaN(timestampMs) || Number.isNaN(startMs) || Number.isNaN(endMs)) {
|
|
373
|
-
return true;
|
|
374
|
-
}
|
|
375
|
-
return timestampMs >= startMs && timestampMs <= endMs;
|
|
376
|
-
}
|
|
377
|
-
/**
|
|
378
|
-
* Extract tool call facts from raw Claude SDK transcript JSONL lines.
|
|
379
|
-
*
|
|
380
|
-
* Matches `tool_use` blocks (from assistant messages) with corresponding
|
|
381
|
-
* `tool_result` blocks (from user messages) via `tool_use_id`. If a
|
|
382
|
-
* tool_use has no matching result, it is closed at `cycleEndedAt` with
|
|
383
|
-
* `success: false` and `errorKind: "missing_tool_result"`.
|
|
384
|
-
*
|
|
385
|
-
* When transcript lines carry timestamps, tool_use events outside the wake
|
|
386
|
-
* cycle window are ignored. This protects backfill/live indexing from
|
|
387
|
-
* over-broad transcript ranges. For untimestamped lines, `startedAt` falls
|
|
388
|
-
* back to `cycleStartedAt` and `endedAt` falls back to the last seen timestamp
|
|
389
|
-
* marker or cycle boundary.
|
|
390
|
-
*
|
|
391
|
-
* @param lines - Raw JSONL transcript lines (Claude transcript format only).
|
|
392
|
-
* @param context - Cycle-level timestamps for fallback.
|
|
393
|
-
*/
|
|
394
|
-
export function extractToolCallsFromLines(lines, context) {
|
|
395
|
-
const inFlight = new Map();
|
|
396
|
-
const completed = [];
|
|
397
|
-
// Track a running timestamp approximation. Claude transcripts don't have
|
|
398
|
-
// per-line timestamps, so we use the cycle boundary as a fallback. If we
|
|
399
|
-
// ever discover a timestamp field on transcript lines, we can use it here.
|
|
400
|
-
let currentTimestamp = context.cycleStartedAt;
|
|
401
|
-
for (const line of lines) {
|
|
402
|
-
if (!line.trim())
|
|
403
|
-
continue;
|
|
404
|
-
try {
|
|
405
|
-
const parsed = JSON.parse(line);
|
|
406
|
-
const type = parsed.type;
|
|
407
|
-
if (parsed.isMeta === true)
|
|
408
|
-
continue;
|
|
409
|
-
// Check for any timestamp on the line itself
|
|
410
|
-
if (typeof parsed.timestamp === "string") {
|
|
411
|
-
currentTimestamp = parsed.timestamp;
|
|
412
|
-
}
|
|
413
|
-
if (type === "assistant") {
|
|
414
|
-
const message = parsed.message;
|
|
415
|
-
const parentToolUseIdRaw = parsed.parent_tool_use_id;
|
|
416
|
-
const parentToolUseId = typeof parentToolUseIdRaw === "string" && parentToolUseIdRaw.length > 0
|
|
417
|
-
? parentToolUseIdRaw
|
|
418
|
-
: undefined;
|
|
419
|
-
if (message?.content && Array.isArray(message.content)) {
|
|
420
|
-
for (const block of message.content) {
|
|
421
|
-
const b = block;
|
|
422
|
-
if (b.type === "tool_use") {
|
|
423
|
-
const toolUseId = b.id ??
|
|
424
|
-
b.tool_use_id;
|
|
425
|
-
const toolName = b.name;
|
|
426
|
-
if (toolUseId &&
|
|
427
|
-
toolName &&
|
|
428
|
-
isTimestampWithinCycle(currentTimestamp, context)) {
|
|
429
|
-
inFlight.set(toolUseId, {
|
|
430
|
-
toolUseId,
|
|
431
|
-
toolName,
|
|
432
|
-
parentToolUseId,
|
|
433
|
-
startedAt: currentTimestamp,
|
|
434
|
-
});
|
|
435
|
-
}
|
|
436
|
-
}
|
|
437
|
-
}
|
|
438
|
-
}
|
|
439
|
-
}
|
|
440
|
-
else if (type === "user") {
|
|
441
|
-
const message = parsed.message;
|
|
442
|
-
if (message?.content && Array.isArray(message.content)) {
|
|
443
|
-
for (const block of message.content) {
|
|
444
|
-
const b = block;
|
|
445
|
-
if (b.type === "tool_result") {
|
|
446
|
-
const toolUseId = b.tool_use_id;
|
|
447
|
-
if (!toolUseId)
|
|
448
|
-
continue;
|
|
449
|
-
const pending = inFlight.get(toolUseId);
|
|
450
|
-
if (!pending)
|
|
451
|
-
continue; // unmatched result — skip
|
|
452
|
-
inFlight.delete(toolUseId);
|
|
453
|
-
const isError = b.is_error === true;
|
|
454
|
-
completed.push({
|
|
455
|
-
toolName: pending.toolName,
|
|
456
|
-
toolUseId: pending.toolUseId,
|
|
457
|
-
parentToolUseId: pending.parentToolUseId,
|
|
458
|
-
startedAt: pending.startedAt,
|
|
459
|
-
endedAt: currentTimestamp,
|
|
460
|
-
success: !isError,
|
|
461
|
-
...(isError ? { errorKind: "tool_error" } : {}),
|
|
462
|
-
});
|
|
463
|
-
}
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
}
|
|
468
|
-
catch {
|
|
469
|
-
// Skip malformed lines
|
|
470
|
-
}
|
|
471
|
-
}
|
|
472
|
-
// Close any remaining in-flight tool calls as missing results
|
|
473
|
-
for (const pending of inFlight.values()) {
|
|
474
|
-
completed.push({
|
|
475
|
-
toolName: pending.toolName,
|
|
476
|
-
toolUseId: pending.toolUseId,
|
|
477
|
-
parentToolUseId: pending.parentToolUseId,
|
|
478
|
-
startedAt: pending.startedAt,
|
|
479
|
-
endedAt: context.cycleEndedAt,
|
|
480
|
-
success: false,
|
|
481
|
-
errorKind: "missing_tool_result",
|
|
482
|
-
});
|
|
483
|
-
}
|
|
484
|
-
// Sort by startedAt for consistent ordering
|
|
485
|
-
completed.sort((a, b) => a.startedAt.localeCompare(b.startedAt));
|
|
486
|
-
const originalCount = completed.length;
|
|
487
|
-
const truncated = originalCount > MAX_TOOL_CALLS_PER_CYCLE;
|
|
488
|
-
const toolCalls = truncated
|
|
489
|
-
? completed.slice(0, MAX_TOOL_CALLS_PER_CYCLE)
|
|
490
|
-
: completed;
|
|
491
|
-
return {
|
|
492
|
-
toolCalls,
|
|
493
|
-
toolCallCount: originalCount,
|
|
494
|
-
truncated,
|
|
495
|
-
reportedToolCallCount: truncated ? toolCalls.length : null,
|
|
496
|
-
};
|
|
497
|
-
}
|
|
498
|
-
// ---------------------------------------------------------------------------
|
|
499
|
-
// Claude transcript parsing / filtering for summary generation
|
|
500
|
-
// ---------------------------------------------------------------------------
|
|
501
|
-
/**
|
|
502
|
-
* Base tool name suffixes considered "housekeeping". MCP tools are often
|
|
503
|
-
* prefixed by the SDK (e.g. `mcp__visionclaw__memory`), so we match by
|
|
504
|
-
* suffix to handle both bare and prefixed names.
|
|
505
|
-
*/
|
|
506
|
-
const TRIVIAL_TOOL_SUFFIXES = [
|
|
507
|
-
"memory",
|
|
508
|
-
"memory_transcript_search",
|
|
509
|
-
"memory_transcript_detail",
|
|
510
|
-
"finish",
|
|
511
|
-
"wait",
|
|
512
|
-
"switch_session",
|
|
513
|
-
];
|
|
514
|
-
/**
|
|
515
|
-
* Exact tool names from the Claude Agent SDK that are trivial.
|
|
516
|
-
*/
|
|
517
|
-
const TRIVIAL_EXACT_NAMES = new Set([
|
|
518
|
-
"ToolSearch",
|
|
519
|
-
]);
|
|
520
|
-
/**
|
|
521
|
-
* Check whether a tool name is trivial/housekeeping, handling MCP-prefixed
|
|
522
|
-
* names like `mcp__visionclaw__memory` as well as bare names like `memory`.
|
|
523
|
-
*/
|
|
524
|
-
function isTrivialTool(name) {
|
|
525
|
-
if (TRIVIAL_EXACT_NAMES.has(name))
|
|
526
|
-
return true;
|
|
527
|
-
for (const suffix of TRIVIAL_TOOL_SUFFIXES) {
|
|
528
|
-
if (name === suffix || name.endsWith(`__${suffix}`))
|
|
529
|
-
return true;
|
|
530
|
-
}
|
|
531
|
-
return false;
|
|
532
|
-
}
|
|
533
|
-
function isObjectRecord(value) {
|
|
534
|
-
return typeof value === "object" && value !== null;
|
|
535
|
-
}
|
|
536
|
-
function sanitizeTranscriptValue(value) {
|
|
537
|
-
if (Array.isArray(value)) {
|
|
538
|
-
return value.map((item) => sanitizeTranscriptValue(item));
|
|
539
|
-
}
|
|
540
|
-
if (!isObjectRecord(value)) {
|
|
541
|
-
return value;
|
|
542
|
-
}
|
|
543
|
-
if (value.type === "image") {
|
|
544
|
-
const source = value.source;
|
|
545
|
-
if (isObjectRecord(source)) {
|
|
546
|
-
if (source.type === "base64") {
|
|
547
|
-
return "[image: base64 omitted]";
|
|
548
|
-
}
|
|
549
|
-
if (source.type === "url") {
|
|
550
|
-
return "[image: url omitted]";
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
return "[image omitted]";
|
|
554
|
-
}
|
|
555
|
-
if (value.type === "image_url") {
|
|
556
|
-
return "[image_url omitted]";
|
|
557
|
-
}
|
|
558
|
-
const sanitized = {};
|
|
559
|
-
for (const [key, child] of Object.entries(value)) {
|
|
560
|
-
if (key === "data" &&
|
|
561
|
-
typeof child === "string" &&
|
|
562
|
-
child.length > 200 &&
|
|
563
|
-
/^[A-Za-z0-9+/=]+$/.test(child)) {
|
|
564
|
-
sanitized[key] = "[base64 omitted]";
|
|
565
|
-
continue;
|
|
566
|
-
}
|
|
567
|
-
sanitized[key] = sanitizeTranscriptValue(child);
|
|
568
|
-
}
|
|
569
|
-
return sanitized;
|
|
570
|
-
}
|
|
571
|
-
/**
|
|
572
|
-
* Parse raw JSONL lines from a Claude SDK transcript file into a condensed
|
|
573
|
-
* text representation suitable for summary generation. This only works with
|
|
574
|
-
* Claude transcript format (`{type:"user"|"assistant", message:{...}}`), NOT
|
|
575
|
-
* VisionClaw log format. Filters out large tool results and binary data to
|
|
576
|
-
* reduce token count.
|
|
577
|
-
*/
|
|
578
|
-
export function parseTranscriptForSummary(lines) {
|
|
579
|
-
const entries = [];
|
|
580
|
-
for (const line of lines) {
|
|
581
|
-
if (!line.trim())
|
|
582
|
-
continue;
|
|
583
|
-
try {
|
|
584
|
-
const parsed = JSON.parse(line);
|
|
585
|
-
const type = parsed.type;
|
|
586
|
-
const isMeta = parsed.isMeta === true;
|
|
587
|
-
const subtype = parsed.subtype;
|
|
588
|
-
if (isMeta) {
|
|
589
|
-
continue;
|
|
590
|
-
}
|
|
591
|
-
if (type === "system") {
|
|
592
|
-
if (subtype === "compact_boundary") {
|
|
593
|
-
continue;
|
|
594
|
-
}
|
|
595
|
-
continue;
|
|
596
|
-
}
|
|
597
|
-
if (type === "assistant") {
|
|
598
|
-
const message = parsed.message;
|
|
599
|
-
if (message?.content && Array.isArray(message.content)) {
|
|
600
|
-
for (const block of message.content) {
|
|
601
|
-
const b = block;
|
|
602
|
-
if (b.type === "text" && typeof b.text === "string") {
|
|
603
|
-
entries.push({ type: "assistant_text", text: b.text });
|
|
604
|
-
}
|
|
605
|
-
else if (b.type === "tool_use") {
|
|
606
|
-
const sanitizedInput = sanitizeTranscriptValue(b.input);
|
|
607
|
-
const inputStr = sanitizedInput ? JSON.stringify(sanitizedInput) : "";
|
|
608
|
-
const truncated = inputStr.length > 500
|
|
609
|
-
? inputStr.slice(0, 500) + "...[truncated]"
|
|
610
|
-
: inputStr;
|
|
611
|
-
entries.push({
|
|
612
|
-
type: "tool_call",
|
|
613
|
-
toolName: b.name,
|
|
614
|
-
toolInput: truncated,
|
|
615
|
-
});
|
|
616
|
-
}
|
|
617
|
-
}
|
|
618
|
-
}
|
|
619
|
-
}
|
|
620
|
-
else if (type === "user") {
|
|
621
|
-
const message = parsed.message;
|
|
622
|
-
if (message?.content) {
|
|
623
|
-
if (typeof message.content === "string") {
|
|
624
|
-
entries.push({ type: "user_message", text: message.content });
|
|
625
|
-
}
|
|
626
|
-
else if (Array.isArray(message.content)) {
|
|
627
|
-
for (const block of message.content) {
|
|
628
|
-
const b = block;
|
|
629
|
-
if (b.type === "text" && typeof b.text === "string") {
|
|
630
|
-
entries.push({ type: "user_message", text: b.text });
|
|
631
|
-
}
|
|
632
|
-
else if (b.type === "image" || b.type === "image_url") {
|
|
633
|
-
entries.push({ type: "user_message", text: "[Image omitted]" });
|
|
634
|
-
}
|
|
635
|
-
else if (b.type === "tool_result") {
|
|
636
|
-
const content = b.content;
|
|
637
|
-
const raw = typeof content === "string"
|
|
638
|
-
? content
|
|
639
|
-
: JSON.stringify(sanitizeTranscriptValue(content));
|
|
640
|
-
const truncated = raw.length > 300
|
|
641
|
-
? raw.slice(0, 300) + "...[truncated]"
|
|
642
|
-
: raw;
|
|
643
|
-
entries.push({
|
|
644
|
-
type: "tool_result",
|
|
645
|
-
toolName: b.tool_use_id ?? "unknown",
|
|
646
|
-
text: truncated,
|
|
647
|
-
isError: b.is_error === true,
|
|
648
|
-
});
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
}
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
}
|
|
655
|
-
catch {
|
|
656
|
-
// Skip malformed lines
|
|
657
|
-
}
|
|
658
|
-
}
|
|
659
|
-
const parts = [];
|
|
660
|
-
for (const entry of entries) {
|
|
661
|
-
switch (entry.type) {
|
|
662
|
-
case "user_message":
|
|
663
|
-
parts.push(`[User] ${entry.text ?? ""}`);
|
|
664
|
-
break;
|
|
665
|
-
case "assistant_text":
|
|
666
|
-
parts.push(`[Assistant] ${entry.text ?? ""}`);
|
|
667
|
-
break;
|
|
668
|
-
case "tool_call":
|
|
669
|
-
parts.push(`[Tool Call] ${entry.toolName ?? "unknown"}(${entry.toolInput ?? ""})`);
|
|
670
|
-
break;
|
|
671
|
-
case "tool_result":
|
|
672
|
-
if (entry.isError) {
|
|
673
|
-
parts.push(`[Tool Error] ${entry.text ?? ""}`);
|
|
674
|
-
}
|
|
675
|
-
// Skip non-error tool results to keep the summary concise
|
|
676
|
-
break;
|
|
677
|
-
}
|
|
678
|
-
}
|
|
679
|
-
return parts.join("\n");
|
|
680
|
-
}
|
|
681
|
-
/**
|
|
682
|
-
* Check whether a wake cycle only contains trivial/housekeeping tool calls
|
|
683
|
-
* (memory reads, transcript search, finish, wait, switch_session) and no
|
|
684
|
-
* substantive work. Such cycles should be skipped from indexing.
|
|
685
|
-
*
|
|
686
|
-
* Works on the raw JSONL lines from a Claude SDK transcript file.
|
|
687
|
-
*/
|
|
688
|
-
export function isOnlyTrivialToolCalls(lines) {
|
|
689
|
-
let hasToolCalls = false;
|
|
690
|
-
for (const line of lines) {
|
|
691
|
-
if (!line.trim())
|
|
692
|
-
continue;
|
|
693
|
-
try {
|
|
694
|
-
const parsed = JSON.parse(line);
|
|
695
|
-
const type = parsed.type;
|
|
696
|
-
if (type === "assistant") {
|
|
697
|
-
const message = parsed.message;
|
|
698
|
-
if (message?.content && Array.isArray(message.content)) {
|
|
699
|
-
for (const block of message.content) {
|
|
700
|
-
const b = block;
|
|
701
|
-
if (b.type === "tool_use") {
|
|
702
|
-
hasToolCalls = true;
|
|
703
|
-
const toolName = b.name;
|
|
704
|
-
if (!toolName || !isTrivialTool(toolName)) {
|
|
705
|
-
// Found a non-trivial tool call → cycle is substantive
|
|
706
|
-
return false;
|
|
707
|
-
}
|
|
708
|
-
}
|
|
709
|
-
}
|
|
710
|
-
}
|
|
711
|
-
}
|
|
712
|
-
}
|
|
713
|
-
catch {
|
|
714
|
-
// Skip malformed lines
|
|
715
|
-
}
|
|
716
|
-
}
|
|
717
|
-
// Only trivial if there were tool calls and ALL were trivial
|
|
718
|
-
return hasToolCalls;
|
|
719
|
-
}
|
|
720
|
-
// ---------------------------------------------------------------------------
|
|
721
|
-
// Summary generation via Gemini
|
|
722
|
-
// ---------------------------------------------------------------------------
|
|
723
|
-
const SUMMARY_PROMPT = `You are analyzing a transcript of an AI agent's wake cycle. Generate a structured summary.
|
|
724
|
-
|
|
725
|
-
Instructions:
|
|
726
|
-
- Focus on: what task was performed, key decisions made, user interactions, and outcomes
|
|
727
|
-
- For "summary": write a detailed paragraph (200-400 words) covering the main activities and results
|
|
728
|
-
- For "key_excerpts": extract 3-8 verbatim quotes from the transcript that capture the most important information (user requests, key decisions, important results)
|
|
729
|
-
- Omit routine/trivial details (heartbeat checks with no action, viewing memory with no changes)
|
|
730
|
-
|
|
731
|
-
Respond with valid JSON only, no markdown fences:
|
|
732
|
-
{"summary": "...", "key_excerpts": ["...", "..."]}
|
|
733
|
-
|
|
734
|
-
TRANSCRIPT:
|
|
735
|
-
`;
|
|
736
|
-
export async function generateSummary(condensedTranscript, config) {
|
|
737
|
-
const apiKey = resolveGeminiApiKey(config);
|
|
738
|
-
const ai = new GoogleGenAI({ apiKey });
|
|
739
|
-
const maxInputChars = 30_000;
|
|
740
|
-
const truncatedInput = condensedTranscript.length > maxInputChars
|
|
741
|
-
? condensedTranscript.slice(0, maxInputChars) +
|
|
742
|
-
"\n...[transcript truncated]"
|
|
743
|
-
: condensedTranscript;
|
|
744
|
-
const response = await ai.models.generateContent({
|
|
745
|
-
model: GEMINI_SUMMARY_MODEL,
|
|
746
|
-
contents: SUMMARY_PROMPT + truncatedInput,
|
|
747
|
-
});
|
|
748
|
-
const text = response.text ?? "";
|
|
749
|
-
const jsonMatch = /\{[\s\S]*\}/.exec(text);
|
|
750
|
-
if (!jsonMatch) {
|
|
751
|
-
return { summary: text.slice(0, 2000), key_excerpts: [] };
|
|
752
|
-
}
|
|
753
|
-
try {
|
|
754
|
-
const parsed = JSON.parse(jsonMatch[0]);
|
|
755
|
-
return {
|
|
756
|
-
summary: parsed.summary ?? text.slice(0, 2000),
|
|
757
|
-
key_excerpts: Array.isArray(parsed.key_excerpts)
|
|
758
|
-
? parsed.key_excerpts
|
|
759
|
-
: [],
|
|
760
|
-
};
|
|
761
|
-
}
|
|
762
|
-
catch {
|
|
763
|
-
return { summary: text.slice(0, 2000), key_excerpts: [] };
|
|
764
|
-
}
|
|
765
|
-
}
|
|
766
|
-
// ---------------------------------------------------------------------------
|
|
767
|
-
// Embedding generation via Gemini
|
|
768
|
-
// ---------------------------------------------------------------------------
|
|
769
|
-
export async function generateEmbedding(text, config) {
|
|
770
|
-
try {
|
|
771
|
-
const apiKey = resolveGeminiApiKey(config);
|
|
772
|
-
const ai = new GoogleGenAI({ apiKey });
|
|
773
|
-
const result = await ai.models.embedContent({
|
|
774
|
-
model: GEMINI_EMBEDDING_MODEL,
|
|
775
|
-
contents: text,
|
|
776
|
-
config: { outputDimensionality: EMBEDDING_DIM },
|
|
777
|
-
});
|
|
778
|
-
const values = result.embeddings?.[0]?.values;
|
|
779
|
-
if (!values || values.length === 0)
|
|
780
|
-
return null;
|
|
781
|
-
if (values.length !== EMBEDDING_DIM) {
|
|
782
|
-
throw new Error(`Unexpected embedding dimension: got ${values.length}, expected ${EMBEDDING_DIM}`);
|
|
783
|
-
}
|
|
784
|
-
return new Float32Array(values);
|
|
785
|
-
}
|
|
786
|
-
catch (err) {
|
|
787
|
-
logger.warn(`[transcript-indexer] Embedding generation failed (non-fatal): ${err instanceof Error ? err.message : String(err)}`);
|
|
788
|
-
return null;
|
|
789
|
-
}
|
|
790
|
-
}
|
|
791
|
-
// ---------------------------------------------------------------------------
|
|
792
|
-
// Public API: index a wake cycle
|
|
793
|
-
// ---------------------------------------------------------------------------
|
|
794
|
-
/** Max lines to read from a Claude transcript for summarization.
|
|
795
|
-
* Prevents excessive memory usage on very long wake cycles. */
|
|
796
|
-
const MAX_LINES_FOR_SUMMARY = 2_000;
|
|
797
|
-
/**
|
|
798
|
-
* Index the current wake cycle's Claude transcript. Called after each wake
|
|
799
|
-
* cycle completes (fire-and-forget from the main loop).
|
|
800
|
-
*
|
|
801
|
-
* Steps:
|
|
802
|
-
* 1. Read Claude transcript lines for the wake cycle range (capped)
|
|
803
|
-
* 1.5 Extract tool call facts for usage metering
|
|
804
|
-
* 2. Parse and condense for summary generation
|
|
805
|
-
* 3. Call Gemini Flash to generate summary + key excerpts
|
|
806
|
-
* 4. Generate embedding for the summary (best-effort)
|
|
807
|
-
* 5. Store in SQLite (wake_cycles + FTS5 auto-synced via triggers)
|
|
808
|
-
*/
|
|
809
|
-
export async function indexWakeCycle(options) {
|
|
810
|
-
const { sessionType, sourceFile, startLine, endLine, timestampStart, timestampEnd, trigger, messages, config, outcome, } = options;
|
|
811
|
-
// TODO: These skip paths also skip usage persistence. A future improvement
|
|
812
|
-
// should split usage-record persistence from memory summary indexing so
|
|
813
|
-
// that even trivial/short cycles have their usage metadata stored.
|
|
814
|
-
// Skip trivial wake cycles
|
|
815
|
-
const lineCount = endLine - startLine;
|
|
816
|
-
if (trigger === "heartbeat" && messages.length === 0 && lineCount < 10) {
|
|
817
|
-
logger.debug("[transcript-indexer] Skipping trivial heartbeat wake cycle");
|
|
818
|
-
return;
|
|
819
|
-
}
|
|
820
|
-
if (lineCount <= 0) {
|
|
821
|
-
logger.debug("[transcript-indexer] Skipping empty wake cycle (no new lines)");
|
|
822
|
-
return;
|
|
823
|
-
}
|
|
824
|
-
// Step 1: Read Claude transcript lines (capped at MAX_LINES_FOR_SUMMARY to
|
|
825
|
-
// avoid loading a huge file into memory for very long wake cycles)
|
|
826
|
-
const lines = await readSourceFileLines(sourceFile, startLine, endLine, MAX_LINES_FOR_SUMMARY);
|
|
827
|
-
if (lines.length === 0) {
|
|
828
|
-
logger.debug("[transcript-indexer] No source file lines read, skipping");
|
|
829
|
-
return;
|
|
830
|
-
}
|
|
831
|
-
// Step 1.5: Extract tool call facts for usage metering.
|
|
832
|
-
// This happens before any skip logic so extracted data is available even
|
|
833
|
-
// for cycles that would be skipped for summary generation.
|
|
834
|
-
const toolCallExtraction = extractToolCallsFromLines(lines, {
|
|
835
|
-
cycleStartedAt: timestampStart,
|
|
836
|
-
cycleEndedAt: timestampEnd,
|
|
837
|
-
});
|
|
838
|
-
const toolCallsJson = JSON.stringify(toolCallExtraction.toolCalls);
|
|
839
|
-
const firstMessage = messages.length > 0 ? messages[0] : undefined;
|
|
840
|
-
// TODO: Skip paths below still skip usage persistence. Eventually separate
|
|
841
|
-
// usage-first persistence (tool calls, trigger, outcome) from memory
|
|
842
|
-
// summary indexing (summary, embedding, FTS).
|
|
843
|
-
// Step 1.6: Skip cycles that only contain trivial/housekeeping tool calls
|
|
844
|
-
if (isOnlyTrivialToolCalls(lines)) {
|
|
845
|
-
logger.debug("[transcript-indexer] Skipping wake cycle with only trivial tool calls");
|
|
846
|
-
return;
|
|
847
|
-
}
|
|
848
|
-
// Step 2: Parse and condense
|
|
849
|
-
const condensed = parseTranscriptForSummary(lines);
|
|
850
|
-
if (condensed.length < 50) {
|
|
851
|
-
logger.debug("[transcript-indexer] Condensed text too short, skipping");
|
|
852
|
-
return;
|
|
853
|
-
}
|
|
854
|
-
// Step 3: Generate summary via Gemini Flash
|
|
855
|
-
const { summary, key_excerpts } = await generateSummary(condensed, config);
|
|
856
|
-
// Step 4: Generate embedding (best-effort, non-blocking failure)
|
|
857
|
-
const embeddingVec = await generateEmbedding(summary, config);
|
|
858
|
-
const embeddingBlob = embeddingVec
|
|
859
|
-
? Buffer.from(embeddingVec.buffer, embeddingVec.byteOffset, embeddingVec.byteLength)
|
|
860
|
-
: null;
|
|
861
|
-
// Step 5: Store in SQLite
|
|
862
|
-
const wakeCycleId = options.wakeCycleId;
|
|
863
|
-
const wakeTriggerType = trigger === "heartbeat"
|
|
864
|
-
? "heartbeat"
|
|
865
|
-
: (firstMessage?.channel ?? "message");
|
|
866
|
-
const wakeTriggerChannel = trigger === "message" ? (firstMessage?.channel ?? null) : null;
|
|
867
|
-
getDb();
|
|
868
|
-
const insertStmt = requireStatement(stmtInsert, "stmtInsert");
|
|
869
|
-
insertStmt.run(wakeCycleId, sessionType, timestampStart, timestampEnd, summary, key_excerpts.length > 0 ? JSON.stringify(key_excerpts) : null, sourceFile, startLine, endLine, embeddingBlob, toolCallsJson, wakeTriggerType, wakeTriggerChannel, outcome ?? null, toolCallExtraction.toolCallCount, toolCallExtraction.truncated ? 1 : 0, toolCallExtraction.reportedToolCallCount, 0);
|
|
870
|
-
// Also insert into vec0 table for semantic search
|
|
871
|
-
if (vecEnabled && stmtInsertVec && embeddingBlob) {
|
|
872
|
-
try {
|
|
873
|
-
stmtInsertVec.run(wakeCycleId, new Uint8Array(embeddingBlob.buffer, embeddingBlob.byteOffset, embeddingBlob.byteLength));
|
|
874
|
-
}
|
|
875
|
-
catch (vecErr) {
|
|
876
|
-
logger.warn(`[transcript-indexer] vec0 insert failed (non-fatal): ${vecErr instanceof Error ? vecErr.message : String(vecErr)}`);
|
|
877
|
-
}
|
|
878
|
-
}
|
|
879
|
-
logger.system(`[transcript-indexer] Indexed wake cycle ${wakeCycleId}: ${lines.length} lines → ${summary.length} char summary`);
|
|
880
|
-
}
|
|
881
|
-
/**
|
|
882
|
-
* Insert a pre-built wake cycle record into the database.
|
|
883
|
-
* Used by the backfill CLI to insert records with known timestamps and IDs.
|
|
884
|
-
*
|
|
885
|
-
* @param record.sourceFile — path to the source JSONL file. For backfilled
|
|
886
|
-
* data this is a VisionClaw runtime log file; for live data it is a Claude
|
|
887
|
-
* SDK transcript file.
|
|
888
|
-
*/
|
|
889
|
-
export function insertWakeCycleRecord(record) {
|
|
890
|
-
getDb();
|
|
891
|
-
const insertStmt = requireStatement(stmtInsert, "stmtInsert");
|
|
892
|
-
insertStmt.run(record.wakeCycleId, record.sessionType, record.timestampStart, record.timestampEnd, record.summary, record.keyExcerpts && record.keyExcerpts.length > 0
|
|
893
|
-
? JSON.stringify(record.keyExcerpts)
|
|
894
|
-
: null, record.sourceFile, record.startLine, record.endLine, record.embeddingBlob, null, // tool_calls_json — not available for backfill-inserted records
|
|
895
|
-
null, // trigger_type
|
|
896
|
-
null, // trigger_channel
|
|
897
|
-
null, // outcome
|
|
898
|
-
0, // tool_call_count
|
|
899
|
-
0, // tool_calls_truncated
|
|
900
|
-
null, // reported_tool_call_count
|
|
901
|
-
0);
|
|
902
|
-
// Also insert into vec0 table for semantic search
|
|
903
|
-
if (vecEnabled && stmtInsertVec && record.embeddingBlob) {
|
|
904
|
-
try {
|
|
905
|
-
const blob = record.embeddingBlob;
|
|
906
|
-
stmtInsertVec.run(record.wakeCycleId, new Uint8Array(blob.buffer, blob.byteOffset, blob.byteLength));
|
|
907
|
-
}
|
|
908
|
-
catch {
|
|
909
|
-
// best-effort — vec insert failure is non-fatal
|
|
910
|
-
}
|
|
911
|
-
}
|
|
912
|
-
}
|
|
913
|
-
/**
|
|
914
|
-
* Return the total number of wake cycles in the database.
|
|
915
|
-
*/
|
|
916
|
-
export function getWakeCycleCount() {
|
|
917
|
-
const dbInstance = getDb();
|
|
918
|
-
const row = dbInstance.prepare("SELECT COUNT(*) as count FROM wake_cycles").get();
|
|
919
|
-
return row?.count ?? 0;
|
|
920
|
-
}
|
|
921
|
-
export function listWakeCyclesForUsageBackfill(limit = 5_000) {
|
|
922
|
-
const dbInstance = getDb();
|
|
923
|
-
return dbInstance.prepare(`
|
|
924
|
-
SELECT wake_cycle_id, session_type, timestamp_start, timestamp_end,
|
|
925
|
-
summary, key_excerpts, source_file, start_line, end_line, embedding,
|
|
926
|
-
tool_calls_json, usage_reported, trigger_type, trigger_channel,
|
|
927
|
-
outcome, tool_call_count, tool_calls_truncated, reported_tool_call_count
|
|
928
|
-
FROM wake_cycles
|
|
929
|
-
ORDER BY timestamp_start ASC
|
|
930
|
-
LIMIT ?
|
|
931
|
-
`).all(Math.max(1, limit));
|
|
932
|
-
}
|
|
933
|
-
/**
|
|
934
|
-
* List wake cycles that have not yet been queued into usage.db.
|
|
935
|
-
* Used by the background UsageReporter.
|
|
936
|
-
*/
|
|
937
|
-
export function listUnreportedWakeCycles(limit = 100) {
|
|
938
|
-
const dbInstance = getDb();
|
|
939
|
-
return dbInstance.prepare(`
|
|
940
|
-
SELECT wake_cycle_id, session_type, timestamp_start, timestamp_end,
|
|
941
|
-
summary, key_excerpts, source_file, start_line, end_line, embedding,
|
|
942
|
-
tool_calls_json, usage_reported, trigger_type, trigger_channel,
|
|
943
|
-
outcome, tool_call_count, tool_calls_truncated, reported_tool_call_count
|
|
944
|
-
FROM wake_cycles
|
|
945
|
-
WHERE usage_reported = 0
|
|
946
|
-
ORDER BY timestamp_start ASC
|
|
947
|
-
LIMIT ?
|
|
948
|
-
`).all(Math.max(1, limit));
|
|
949
|
-
}
|
|
950
|
-
/**
|
|
951
|
-
* Mark a wake cycle as successfully queued into usage.db.
|
|
952
|
-
*/
|
|
953
|
-
export function markWakeCycleUsageReported(wakeCycleId) {
|
|
954
|
-
const dbInstance = getDb();
|
|
955
|
-
dbInstance.prepare("UPDATE wake_cycles SET usage_reported = 1 WHERE wake_cycle_id = ?").run(wakeCycleId);
|
|
956
|
-
}
|
|
957
|
-
/**
|
|
958
|
-
* Record a usage report error for a wake cycle.
|
|
959
|
-
*/
|
|
960
|
-
export function markWakeCycleUsageReportError(wakeCycleId, error) {
|
|
961
|
-
const dbInstance = getDb();
|
|
962
|
-
dbInstance.prepare("UPDATE wake_cycles SET usage_report_error = ?, usage_report_attempted_at = ? WHERE wake_cycle_id = ?").run(error, new Date().toISOString(), wakeCycleId);
|
|
963
|
-
}
|
|
964
|
-
/**
|
|
965
|
-
* Update the tool_calls_json for an existing wake cycle row.
|
|
966
|
-
* Used to persist reconstructed tool calls from archived transcripts.
|
|
967
|
-
*/
|
|
968
|
-
export function updateWakeCycleToolCallsJson(wakeCycleId, toolCallsJson) {
|
|
969
|
-
const dbInstance = getDb();
|
|
970
|
-
dbInstance.prepare("UPDATE wake_cycles SET tool_calls_json = ? WHERE wake_cycle_id = ?").run(toolCallsJson, wakeCycleId);
|
|
971
|
-
}
|
|
972
|
-
/**
|
|
973
|
-
* Check if a wake cycle ID already exists in the database.
|
|
974
|
-
*/
|
|
975
|
-
export function wakeCycleExists(wakeCycleId) {
|
|
976
|
-
getDb();
|
|
977
|
-
const getByIdStmt = requireStatement(stmtGetById, "stmtGetById");
|
|
978
|
-
const row = getByIdStmt.get(wakeCycleId);
|
|
979
|
-
return row !== undefined;
|
|
980
|
-
}
|
|
981
|
-
/**
|
|
982
|
-
* Check whether semantic/vector search is available.
|
|
983
|
-
*/
|
|
984
|
-
export function isSemanticSearchAvailable() {
|
|
985
|
-
getDb();
|
|
986
|
-
return vecEnabled;
|
|
987
|
-
}
|
|
988
|
-
/**
|
|
989
|
-
* Search wake cycle summaries. Routes to keyword, semantic, or hybrid
|
|
990
|
-
* search based on the `type` option.
|
|
991
|
-
*
|
|
992
|
-
* - keyword: FTS5 full-text search with BM25 ranking (default)
|
|
993
|
-
* - semantic: vec0 KNN search using embedding similarity
|
|
994
|
-
* - hybrid: combines keyword + semantic via Reciprocal Rank Fusion (RRF)
|
|
995
|
-
*/
|
|
996
|
-
export function searchWakeCycles(options) {
|
|
997
|
-
const { type = "keyword" } = options;
|
|
998
|
-
// Fall back to keyword if semantic search is unavailable
|
|
999
|
-
if ((type === "semantic" || type === "hybrid") && !vecEnabled) {
|
|
1000
|
-
logger.debug("[transcript-indexer] Semantic search requested but sqlite-vec not loaded, falling back to keyword");
|
|
1001
|
-
return searchKeyword(options);
|
|
1002
|
-
}
|
|
1003
|
-
switch (type) {
|
|
1004
|
-
case "semantic":
|
|
1005
|
-
return searchSemantic(options);
|
|
1006
|
-
case "hybrid":
|
|
1007
|
-
return searchHybrid(options);
|
|
1008
|
-
default:
|
|
1009
|
-
return searchKeyword(options);
|
|
1010
|
-
}
|
|
1011
|
-
}
|
|
1012
|
-
/**
|
|
1013
|
-
* FTS5 keyword search with BM25 ranking.
|
|
1014
|
-
*/
|
|
1015
|
-
function searchKeyword(options) {
|
|
1016
|
-
const { query, timeRange, sessionType, limit = 5 } = options;
|
|
1017
|
-
getDb();
|
|
1018
|
-
// Sanitize the query for FTS5: wrap words in quotes and join with OR
|
|
1019
|
-
const sanitized = query
|
|
1020
|
-
.split(/\s+/)
|
|
1021
|
-
.filter((w) => w.length > 0)
|
|
1022
|
-
.map((w) => `"${w.replace(/"/g, "")}"`)
|
|
1023
|
-
.join(" OR ");
|
|
1024
|
-
if (!sanitized)
|
|
1025
|
-
return [];
|
|
1026
|
-
// Fetch more than `limit` to allow post-filtering
|
|
1027
|
-
const fetchLimit = timeRange ?? sessionType ? limit * 3 : limit;
|
|
1028
|
-
const keywordStmt = requireStatement(stmtSearchKeyword, "stmtSearchKeyword");
|
|
1029
|
-
const rows = keywordStmt.all(sanitized, fetchLimit);
|
|
1030
|
-
// Normalize BM25 scores: raw BM25 is negative (lower = better).
|
|
1031
|
-
// Convert to positive 0-1 range where higher = more relevant.
|
|
1032
|
-
const normalized = rows.map((r) => ({
|
|
1033
|
-
...r,
|
|
1034
|
-
relevance_score: normalizeBm25Score(r.relevance_score),
|
|
1035
|
-
}));
|
|
1036
|
-
return applyFilters(normalized, timeRange, sessionType, limit);
|
|
1037
|
-
}
|
|
1038
|
-
/**
|
|
1039
|
-
* Semantic search requires async embedding generation.
|
|
1040
|
-
* This sync stub falls back to keyword — use searchSemanticAsync() instead.
|
|
1041
|
-
*/
|
|
1042
|
-
function searchSemantic(options) {
|
|
1043
|
-
// Semantic search requires async embedding generation;
|
|
1044
|
-
// MCP tools call searchSemanticAsync() directly.
|
|
1045
|
-
return searchKeyword(options);
|
|
1046
|
-
}
|
|
1047
|
-
/**
|
|
1048
|
-
* Async semantic search — generates query embedding then searches vec0.
|
|
1049
|
-
*/
|
|
1050
|
-
export async function searchSemanticAsync(options) {
|
|
1051
|
-
const { query, timeRange, sessionType, limit = 5, config } = options;
|
|
1052
|
-
if (!vecEnabled || !stmtSearchSemantic || !config) {
|
|
1053
|
-
return searchKeyword(options);
|
|
1054
|
-
}
|
|
1055
|
-
getDb();
|
|
1056
|
-
// Generate embedding for the query
|
|
1057
|
-
const queryEmbedding = await generateEmbedding(query, config);
|
|
1058
|
-
if (!queryEmbedding) {
|
|
1059
|
-
logger.debug("[transcript-indexer] Query embedding generation failed, falling back to keyword");
|
|
1060
|
-
return searchKeyword(options);
|
|
1061
|
-
}
|
|
1062
|
-
// KNN search via vec0
|
|
1063
|
-
const queryBlob = new Uint8Array(queryEmbedding.buffer, queryEmbedding.byteOffset, queryEmbedding.byteLength);
|
|
1064
|
-
const fetchLimit = timeRange ?? sessionType ? limit * 3 : limit;
|
|
1065
|
-
const vecRows = stmtSearchSemantic.all(queryBlob, fetchLimit);
|
|
1066
|
-
// Hydrate with full metadata from the main table
|
|
1067
|
-
const results = [];
|
|
1068
|
-
const getByIdStmt = requireStatement(stmtGetById, "stmtGetById");
|
|
1069
|
-
for (const vecRow of vecRows) {
|
|
1070
|
-
const row = getByIdStmt.get(vecRow.wake_cycle_id);
|
|
1071
|
-
if (row) {
|
|
1072
|
-
results.push({
|
|
1073
|
-
wake_cycle_id: row.wake_cycle_id,
|
|
1074
|
-
session_type: row.session_type,
|
|
1075
|
-
timestamp_start: row.timestamp_start,
|
|
1076
|
-
timestamp_end: row.timestamp_end,
|
|
1077
|
-
summary: row.summary,
|
|
1078
|
-
// Normalize distance to 0-1 range: 1/(1+distance).
|
|
1079
|
-
// Lower distance = higher score = more relevant.
|
|
1080
|
-
relevance_score: 1 / (1 + vecRow.distance),
|
|
1081
|
-
});
|
|
1082
|
-
}
|
|
1083
|
-
}
|
|
1084
|
-
return applyFilters(results, timeRange, sessionType, limit);
|
|
1085
|
-
}
|
|
1086
|
-
/**
|
|
1087
|
-
* Async hybrid search — runs keyword + semantic in parallel, fuses with RRF.
|
|
1088
|
-
* Reciprocal Rank Fusion: score = Σ 1/(k + rank_i) for each result system.
|
|
1089
|
-
*/
|
|
1090
|
-
export async function searchHybridAsync(options) {
|
|
1091
|
-
const { limit = 5 } = options;
|
|
1092
|
-
// Run both searches — keyword is synchronous, semantic is async
|
|
1093
|
-
const keywordResults = searchKeyword({ ...options, limit: limit * 2 });
|
|
1094
|
-
const semanticResults = await searchSemanticAsync({
|
|
1095
|
-
...options,
|
|
1096
|
-
limit: limit * 2,
|
|
1097
|
-
});
|
|
1098
|
-
return fuseWithRRF(keywordResults, semanticResults, limit);
|
|
1099
|
-
}
|
|
1100
|
-
/**
|
|
1101
|
-
* Reciprocal Rank Fusion (RRF) to merge results from multiple search systems.
|
|
1102
|
-
* k=60 is the standard RRF constant.
|
|
1103
|
-
*/
|
|
1104
|
-
function fuseWithRRF(keywordResults, semanticResults, limit, k = 60) {
|
|
1105
|
-
const scoreMap = new Map();
|
|
1106
|
-
const recordMap = new Map();
|
|
1107
|
-
// Score from keyword results
|
|
1108
|
-
for (let rank = 0; rank < keywordResults.length; rank++) {
|
|
1109
|
-
const r = keywordResults[rank];
|
|
1110
|
-
const rrfScore = 1 / (k + rank + 1);
|
|
1111
|
-
scoreMap.set(r.wake_cycle_id, (scoreMap.get(r.wake_cycle_id) ?? 0) + rrfScore);
|
|
1112
|
-
recordMap.set(r.wake_cycle_id, r);
|
|
1113
|
-
}
|
|
1114
|
-
// Score from semantic results
|
|
1115
|
-
for (let rank = 0; rank < semanticResults.length; rank++) {
|
|
1116
|
-
const r = semanticResults[rank];
|
|
1117
|
-
const rrfScore = 1 / (k + rank + 1);
|
|
1118
|
-
scoreMap.set(r.wake_cycle_id, (scoreMap.get(r.wake_cycle_id) ?? 0) + rrfScore);
|
|
1119
|
-
if (!recordMap.has(r.wake_cycle_id)) {
|
|
1120
|
-
recordMap.set(r.wake_cycle_id, r);
|
|
1121
|
-
}
|
|
1122
|
-
}
|
|
1123
|
-
// Sort by fused score (descending) and take top `limit`
|
|
1124
|
-
const fused = [...scoreMap.entries()]
|
|
1125
|
-
.sort((a, b) => b[1] - a[1])
|
|
1126
|
-
.slice(0, limit)
|
|
1127
|
-
.map(([id, score]) => {
|
|
1128
|
-
const record = recordMap.get(id);
|
|
1129
|
-
if (!record) {
|
|
1130
|
-
throw new Error(`Missing fused record for wake cycle ${id}`);
|
|
1131
|
-
}
|
|
1132
|
-
return { ...record, relevance_score: score };
|
|
1133
|
-
});
|
|
1134
|
-
return fused;
|
|
1135
|
-
}
|
|
1136
|
-
/**
|
|
1137
|
-
* Stub for synchronous hybrid — delegates to keyword (the async version
|
|
1138
|
-
* is called from the MCP tool layer).
|
|
1139
|
-
*/
|
|
1140
|
-
function searchHybrid(options) {
|
|
1141
|
-
// Hybrid requires async embedding generation; this sync fallback
|
|
1142
|
-
// returns keyword results. The MCP tool uses searchHybridAsync().
|
|
1143
|
-
return searchKeyword(options);
|
|
1144
|
-
}
|
|
1145
|
-
/**
|
|
1146
|
-
* Apply time range, session type filters and limit to search results.
|
|
1147
|
-
*/
|
|
1148
|
-
function applyFilters(results, timeRange, sessionType, limit) {
|
|
1149
|
-
let filtered = results;
|
|
1150
|
-
if (timeRange) {
|
|
1151
|
-
filtered = filtered.filter((r) => r.timestamp_start >= timeRange.start &&
|
|
1152
|
-
r.timestamp_end <= timeRange.end);
|
|
1153
|
-
}
|
|
1154
|
-
if (sessionType) {
|
|
1155
|
-
filtered = filtered.filter((r) => r.session_type === sessionType);
|
|
1156
|
-
}
|
|
1157
|
-
return filtered.slice(0, limit);
|
|
1158
|
-
}
|
|
1159
|
-
/**
|
|
1160
|
-
* Normalize BM25 scores (which are negative, lower = better) to positive
|
|
1161
|
-
* scores where higher = better. Uses: score = 1 / (1 + |bm25|).
|
|
1162
|
-
* This gives a 0-1 range suitable for display.
|
|
1163
|
-
*/
|
|
1164
|
-
export function normalizeBm25Score(bm25) {
|
|
1165
|
-
return 1 / (1 + Math.abs(bm25));
|
|
1166
|
-
}
|
|
1167
|
-
// ---------------------------------------------------------------------------
|
|
1168
|
-
// Public API: get details
|
|
1169
|
-
// ---------------------------------------------------------------------------
|
|
1170
|
-
/**
|
|
1171
|
-
* Get the key_excerpts for a specific wake cycle.
|
|
1172
|
-
*/
|
|
1173
|
-
export function getWakeCycleExcerpts(wakeCycleId) {
|
|
1174
|
-
getDb();
|
|
1175
|
-
const getByIdStmt = requireStatement(stmtGetById, "stmtGetById");
|
|
1176
|
-
const row = getByIdStmt.get(wakeCycleId);
|
|
1177
|
-
if (!row)
|
|
1178
|
-
return null;
|
|
1179
|
-
return row.key_excerpts;
|
|
1180
|
-
}
|
|
1181
|
-
/**
|
|
1182
|
-
* Get the full original source content for a specific wake cycle.
|
|
1183
|
-
* Reads from the source file (Claude transcript or VisionClaw log),
|
|
1184
|
-
* decompressing .gz if needed. No line limit is applied here so the
|
|
1185
|
-
* agent can access the complete original data when needed.
|
|
1186
|
-
*/
|
|
1187
|
-
export async function getWakeCycleFullTranscript(wakeCycleId) {
|
|
1188
|
-
getDb();
|
|
1189
|
-
const getByIdStmt = requireStatement(stmtGetById, "stmtGetById");
|
|
1190
|
-
const row = getByIdStmt.get(wakeCycleId);
|
|
1191
|
-
if (!row)
|
|
1192
|
-
return null;
|
|
1193
|
-
const lines = await readSourceFileLines(row.source_file, row.start_line, row.end_line);
|
|
1194
|
-
if (lines.length === 0) {
|
|
1195
|
-
return "[Source file no longer available. The summary and key excerpts above are the only preserved content.]";
|
|
1196
|
-
}
|
|
1197
|
-
return lines.join("\n");
|
|
1198
|
-
}
|
|
1199
|
-
// ---------------------------------------------------------------------------
|
|
1200
|
-
// Cleanup
|
|
1201
|
-
// ---------------------------------------------------------------------------
|
|
1202
|
-
/**
|
|
1203
|
-
* Close the database connection. Call during graceful shutdown.
|
|
1204
|
-
*/
|
|
1205
|
-
export function closeTranscriptMemoryDb() {
|
|
1206
|
-
if (db) {
|
|
1207
|
-
db.close();
|
|
1208
|
-
db = null;
|
|
1209
|
-
stmtInsert = null;
|
|
1210
|
-
stmtInsertVec = null;
|
|
1211
|
-
stmtSearchKeyword = null;
|
|
1212
|
-
stmtSearchSemantic = null;
|
|
1213
|
-
stmtGetById = null;
|
|
1214
|
-
vecEnabled = false;
|
|
1215
|
-
}
|
|
1216
|
-
}
|
|
1217
|
-
//# sourceMappingURL=transcript-indexer.js.map
|