nlm-memory 0.5.0 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +89 -34
- package/dist/cli/digest.d.ts +20 -0
- package/dist/cli/digest.js +142 -0
- package/dist/cli/digest.js.map +1 -0
- package/dist/cli/nlm.d.ts +1 -0
- package/dist/cli/nlm.js +25 -1
- package/dist/cli/nlm.js.map +1 -1
- package/dist/core/digest/compose.d.ts +38 -0
- package/dist/core/digest/compose.js +93 -0
- package/dist/core/digest/compose.js.map +1 -0
- package/dist/core/digest/hook-liveness.d.ts +32 -0
- package/dist/core/digest/hook-liveness.js +54 -0
- package/dist/core/digest/hook-liveness.js.map +1 -0
- package/dist/http/app.js +2 -1
- package/dist/http/app.js.map +1 -1
- package/dist/mcp/server.js +20 -1
- package/dist/mcp/server.js.map +1 -1
- package/dist/ui/assets/{index-C8cpwbYJ.css → index-Beo8psd-.css} +1 -1
- package/dist/ui/assets/{index-CB50QnL-.js → index-CSPTTeeM.js} +8 -8
- package/dist/ui/index.html +2 -2
- package/package.json +26 -1
- package/.agents/plugins/marketplace.json +0 -20
- package/.github/workflows/ci.yml +0 -30
- package/docs/methodology/re-derivation-rate.md +0 -112
- package/docs/methodology/useful-hit-rate.md +0 -79
- package/docs/plans/2026-05-20-fts5-lexical-recall.md +0 -1088
- package/docs/plans/2026-05-20-recall-daemon-wedge-fix.md +0 -662
- package/docs/plans/2026-05-20-recall-hook-design.md +0 -131
- package/docs/plans/2026-05-20-recall-hook-implementation.md +0 -1222
- package/docs/plans/desktop-product.md +0 -69
- package/docs/plans/factstore-design.md +0 -236
- package/logs/CHANGELOG/CHANGELOG-2026.md +0 -1575
- package/logs/CHANGELOG/CHANGELOG.md +0 -209
- package/migrations/000_initial_schema.sql +0 -174
- package/migrations/001_entity_type_rename.sql +0 -17
- package/migrations/002_adapter_state_extend.sql +0 -12
- package/migrations/003_session_embeddings.sql +0 -11
- package/migrations/004_facts.sql +0 -46
- package/migrations/005_sources.sql +0 -31
- package/migrations/006_providers.sql +0 -33
- package/migrations/007_source_tokens.sql +0 -17
- package/migrations/008_fts_rebuild.sql +0 -9
- package/migrations/009_session_embedding_chunks.sql +0 -46
- package/migrations/010_sources_opencode.sql +0 -30
- package/migrations/011_sources_hermes_agent.sql +0 -30
- package/migrations/012_sources_aider.sql +0 -30
- package/migrations/013_adapter_state_failure_count.sql +0 -12
- package/migrations/014_sources_cursor.sql +0 -30
- package/migrations/015_sources_windsurf.sql +0 -30
- package/plugin-hermes-agent/README.md +0 -49
- package/plugin-hermes-agent/__init__.py +0 -75
- package/plugin-hermes-agent/plugin.yaml +0 -15
- package/scripts/backfill-citations.mjs +0 -0
- package/scripts/build-codex-plugin.mjs +0 -61
- package/scripts/deepseek-probe.mjs +0 -67
- package/scripts/extract-triples.mjs +0 -207
- package/scripts/longmemeval/embedding-cache.ts +0 -77
- package/scripts/longmemeval/fetch-dataset.sh +0 -25
- package/scripts/longmemeval/run-harness.ts +0 -315
- package/scripts/longmemeval/scorer.ts +0 -99
- package/scripts/longmemeval/tsconfig.json +0 -9
- package/scripts/longmemeval/types.ts +0 -35
- package/scripts/nlm-daily-digest.py +0 -239
- package/scripts/nlm-daily-digest.sh +0 -28
- package/src/cli/classify-parity.ts +0 -257
- package/src/cli/launchctl-helpers.ts +0 -49
- package/src/cli/nlm.ts +0 -1078
- package/src/core/actions/actions-log.ts +0 -118
- package/src/core/actions/overlay.ts +0 -117
- package/src/core/adapters/aider.ts +0 -205
- package/src/core/adapters/claude-code.ts +0 -293
- package/src/core/adapters/common.ts +0 -54
- package/src/core/adapters/cursor.ts +0 -486
- package/src/core/adapters/from-source.ts +0 -67
- package/src/core/adapters/hermes-agent.ts +0 -240
- package/src/core/adapters/hermes.ts +0 -277
- package/src/core/adapters/jsonl-generic.ts +0 -208
- package/src/core/adapters/opencode.ts +0 -281
- package/src/core/adapters/pi.ts +0 -264
- package/src/core/adapters/windsurf.ts +0 -386
- package/src/core/classifier/prompt.ts +0 -200
- package/src/core/dataset/build-dataset.ts +0 -463
- package/src/core/embedding/chunk-body.ts +0 -76
- package/src/core/embedding/embed-backfill.ts +0 -210
- package/src/core/embedding/embed-normalize.ts +0 -135
- package/src/core/facts/backfill-facts.ts +0 -254
- package/src/core/facts/extract-facts.ts +0 -50
- package/src/core/hook/citation-detect.ts +0 -124
- package/src/core/hook/cite-memo.ts +0 -68
- package/src/core/hook/claude-settings.ts +0 -187
- package/src/core/hook/gate.ts +0 -25
- package/src/core/hook/hook-log.ts +0 -41
- package/src/core/hook/memo-sweep.ts +0 -164
- package/src/core/hook/memo.ts +0 -67
- package/src/core/hook/pointer-block.ts +0 -26
- package/src/core/hook/select.ts +0 -32
- package/src/core/hook/transcript.ts +0 -121
- package/src/core/ingest/ingest-session.ts +0 -111
- package/src/core/providers/provider-models.ts +0 -100
- package/src/core/providers/provider-registry.ts +0 -196
- package/src/core/recall/citation-log.ts +0 -108
- package/src/core/recall/filter.ts +0 -27
- package/src/core/recall/index.ts +0 -6
- package/src/core/recall/match-fields.ts +0 -40
- package/src/core/recall/query-log.ts +0 -149
- package/src/core/recall/query-shape.ts +0 -66
- package/src/core/recall/recall-service.ts +0 -320
- package/src/core/recall/recent-log.ts +0 -59
- package/src/core/recall/tokenize.ts +0 -18
- package/src/core/recall/useful-scan.ts +0 -336
- package/src/core/recall-facts/fact-query-log.ts +0 -150
- package/src/core/recall-facts/fact-recall-service.ts +0 -327
- package/src/core/scheduler/scan-once.ts +0 -142
- package/src/core/scheduler/scheduler.ts +0 -225
- package/src/core/sources/source-registry.ts +0 -278
- package/src/core/storage/db-restore.ts +0 -133
- package/src/core/storage/live-status.ts +0 -45
- package/src/core/storage/migrate.ts +0 -72
- package/src/core/storage/sqlite-fact-store.ts +0 -304
- package/src/core/storage/sqlite-session-store.ts +0 -810
- package/src/hook/hook-auth.ts +0 -18
- package/src/hook/prompt-recall-hook.ts +0 -180
- package/src/hook/session-end-hook.ts +0 -81
- package/src/hook/session-start-hook.ts +0 -168
- package/src/hook/stop-hook.ts +0 -239
- package/src/http/app.ts +0 -1215
- package/src/install/claude-code.ts +0 -128
- package/src/install/codex.ts +0 -367
- package/src/install/cursor.ts +0 -68
- package/src/install/hermes-agent.ts +0 -76
- package/src/install/hermes.ts +0 -78
- package/src/install/nlm-dir-perms.ts +0 -55
- package/src/install/ollama.ts +0 -284
- package/src/install/setup.ts +0 -489
- package/src/install/windsurf.ts +0 -68
- package/src/llm/classifier-box.ts +0 -64
- package/src/llm/deepseek-client.ts +0 -150
- package/src/llm/env-autoload.ts +0 -55
- package/src/llm/ollama-client.ts +0 -189
- package/src/mcp/server.ts +0 -534
- package/src/ports/fact-store.ts +0 -102
- package/src/ports/llm-client.ts +0 -52
- package/src/ports/logger.ts +0 -16
- package/src/ports/session-store.ts +0 -45
- package/src/ports/transcript-adapter.ts +0 -55
- package/src/shared/types.ts +0 -149
- package/src/ui/App.tsx +0 -58
- package/src/ui/components/PromoteOpenButton.tsx +0 -65
- package/src/ui/components/SessionDrawer.tsx +0 -199
- package/src/ui/components/SideNav.tsx +0 -162
- package/src/ui/components/Skeleton.tsx +0 -107
- package/src/ui/index.html +0 -13
- package/src/ui/lib/actions.ts +0 -30
- package/src/ui/lib/api.ts +0 -92
- package/src/ui/lib/dataset.ts +0 -141
- package/src/ui/lib/registries.ts +0 -155
- package/src/ui/lib/view-settings.ts +0 -41
- package/src/ui/main.tsx +0 -15
- package/src/ui/pages/Live.tsx +0 -229
- package/src/ui/pages/Pulse.tsx +0 -415
- package/src/ui/pages/Recall.tsx +0 -190
- package/src/ui/pages/River.tsx +0 -354
- package/src/ui/pages/Search.tsx +0 -386
- package/src/ui/pages/Stub.tsx +0 -9
- package/src/ui/pages/Thread.tsx +0 -473
- package/src/ui/pages/settings/Classifier.tsx +0 -227
- package/src/ui/pages/settings/Data.tsx +0 -190
- package/src/ui/pages/settings/Index.tsx +0 -65
- package/src/ui/pages/settings/Labels.tsx +0 -224
- package/src/ui/pages/settings/Providers.tsx +0 -305
- package/src/ui/pages/settings/SettingsSubnav.tsx +0 -28
- package/src/ui/pages/settings/Sources.tsx +0 -326
- package/src/ui/pages/settings/Views.tsx +0 -96
- package/src/ui/styles.css +0 -1890
- package/src/ui/tsconfig.json +0 -21
- package/src/ui/vite.config.ts +0 -19
- package/tests/fixtures/claude_code/short_session.jsonl +0 -2
- package/tests/fixtures/claude_code/standard_iso.jsonl +0 -4
- package/tests/fixtures/claude_code/tool_heavy.jsonl +0 -8
- package/tests/fixtures/claude_code/with_subagent.jsonl +0 -7
- package/tests/fixtures/facts.ts +0 -17
- package/tests/fixtures/golden-corpus.ts +0 -85
- package/tests/fixtures/hermes/paired_request_dump.json +0 -24
- package/tests/fixtures/hermes/paired_session.json +0 -23
- package/tests/fixtures/hermes/request_dump.json +0 -28
- package/tests/fixtures/hermes/session_iso.json +0 -38
- package/tests/fixtures/hermes/session_unix.json +0 -38
- package/tests/fixtures/hermes/system_only.json +0 -18
- package/tests/fixtures/pi/error-connection-abort.jsonl +0 -8
- package/tests/fixtures/pi/short-successful.jsonl +0 -5
- package/tests/fixtures/pi/with-custom-message.jsonl +0 -6
- package/tests/fixtures/sessions.ts +0 -22
- package/tests/integration/backfill-facts.test.ts +0 -362
- package/tests/integration/citation-explicit.test.ts +0 -111
- package/tests/integration/cite-event.test.ts +0 -169
- package/tests/integration/cite-memo.test.ts +0 -87
- package/tests/integration/db-restore.test.ts +0 -153
- package/tests/integration/embed-backfill.test.ts +0 -176
- package/tests/integration/fact-supersedence.test.ts +0 -313
- package/tests/integration/fts-index.test.ts +0 -60
- package/tests/integration/getbyids-sqlite.test.ts +0 -100
- package/tests/integration/hermes-agent-hooks.test.ts +0 -248
- package/tests/integration/hook-claude-settings.test.ts +0 -218
- package/tests/integration/hook-log.test.ts +0 -54
- package/tests/integration/hook-memo.test.ts +0 -68
- package/tests/integration/hook-pre-compact.test.ts +0 -105
- package/tests/integration/hook-subagent-start.test.ts +0 -102
- package/tests/integration/http.test.ts +0 -401
- package/tests/integration/keyword-search-fts.test.ts +0 -66
- package/tests/integration/mcp-recall-logging.test.ts +0 -88
- package/tests/integration/mcp.test.ts +0 -260
- package/tests/integration/memo-sweep.test.ts +0 -91
- package/tests/integration/prompt-recall-hook.test.ts +0 -88
- package/tests/integration/provider-registry.test.ts +0 -107
- package/tests/integration/recall-golden.test.ts +0 -59
- package/tests/integration/recall-sqlite.test.ts +0 -169
- package/tests/integration/scheduler.test.ts +0 -391
- package/tests/integration/session-end-hook.test.ts +0 -48
- package/tests/integration/session-start-hook.test.ts +0 -126
- package/tests/integration/source-registry.test.ts +0 -122
- package/tests/integration/sqlite-fact-store.test.ts +0 -346
- package/tests/integration/stop-hook.test.ts +0 -560
- package/tests/integration/wal-checkpoint.test.ts +0 -49
- package/tests/unit/cli/launchctl-helpers.test.ts +0 -60
- package/tests/unit/core/adapters/aider.test.ts +0 -230
- package/tests/unit/core/adapters/claude-code.test.ts +0 -118
- package/tests/unit/core/adapters/cursor.test.ts +0 -485
- package/tests/unit/core/adapters/hermes-agent.test.ts +0 -329
- package/tests/unit/core/adapters/hermes.test.ts +0 -81
- package/tests/unit/core/adapters/jsonl-generic.test.ts +0 -142
- package/tests/unit/core/adapters/opencode.test.ts +0 -354
- package/tests/unit/core/adapters/pi.test.ts +0 -110
- package/tests/unit/core/adapters/windsurf.test.ts +0 -416
- package/tests/unit/core/classifier/prompt.test.ts +0 -126
- package/tests/unit/core/embedding/chunk-body.test.ts +0 -100
- package/tests/unit/core/facts/extract-facts.test.ts +0 -117
- package/tests/unit/core/filter.test.ts +0 -40
- package/tests/unit/core/hook/citation-detect-cite-session.test.ts +0 -96
- package/tests/unit/core/hook/citation-detect.test.ts +0 -124
- package/tests/unit/core/hook/gate.test.ts +0 -29
- package/tests/unit/core/hook/pointer-block.test.ts +0 -22
- package/tests/unit/core/hook/select.test.ts +0 -66
- package/tests/unit/core/match-fields.test.ts +0 -39
- package/tests/unit/core/mcp-cite-session.test.ts +0 -51
- package/tests/unit/core/providers/provider-models.test.ts +0 -101
- package/tests/unit/core/query-shape.test.ts +0 -92
- package/tests/unit/core/recall-facts/fact-recall-service.test.ts +0 -258
- package/tests/unit/core/recall-service.test.ts +0 -200
- package/tests/unit/core/storage/live-status.test.ts +0 -54
- package/tests/unit/core/tokenize.test.ts +0 -32
- package/tests/unit/core/useful-scan.test.ts +0 -537
- package/tests/unit/llm/embed.test.ts +0 -93
- package/tests/unit/llm/ollama-client.test.ts +0 -124
- package/tests/unit/scripts/longmemeval-scorer.test.ts +0 -114
- package/tsconfig.json +0 -31
- package/tsconfig.test.json +0 -11
- package/vitest.config.ts +0 -22
|
@@ -1,416 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* WindsurfAdapter unit tests.
|
|
3
|
-
*
|
|
4
|
-
* Each test builds a fake Windsurf workspaceStorage directory tree with
|
|
5
|
-
* in-memory SQLite DBs written to temp files so the adapter can open them
|
|
6
|
-
* in readonly mode.
|
|
7
|
-
*/
|
|
8
|
-
|
|
9
|
-
import { mkdirSync, mkdtempSync, rmSync } from "node:fs";
|
|
10
|
-
import { tmpdir } from "node:os";
|
|
11
|
-
import { join } from "node:path";
|
|
12
|
-
import Database from "better-sqlite3";
|
|
13
|
-
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
|
14
|
-
import { WindsurfAdapter } from "../../../../src/core/adapters/windsurf.js";
|
|
15
|
-
|
|
16
|
-
// ── Schema helpers ────────────────────────────────────────────────────────────
|
|
17
|
-
|
|
18
|
-
const CHAT_KEY = "workbench.panel.aichat.view.aichat.chatdata";
|
|
19
|
-
|
|
20
|
-
function createWorkspaceDb(dbPath: string): Database.Database {
|
|
21
|
-
const db = new Database(dbPath);
|
|
22
|
-
db.exec(`
|
|
23
|
-
CREATE TABLE IF NOT EXISTS ItemTable (
|
|
24
|
-
key TEXT PRIMARY KEY,
|
|
25
|
-
value TEXT
|
|
26
|
-
);
|
|
27
|
-
`);
|
|
28
|
-
return db;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
interface Bubble {
|
|
32
|
-
type: "user" | "ai";
|
|
33
|
-
text?: string;
|
|
34
|
-
rawText?: string;
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
interface Tab {
|
|
38
|
-
tabId: string;
|
|
39
|
-
chatTitle?: string;
|
|
40
|
-
lastSendTime?: number;
|
|
41
|
-
bubbles?: Bubble[];
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
function writeChatData(db: Database.Database, tabs: Tab[]): void {
|
|
45
|
-
db.prepare(`INSERT OR REPLACE INTO ItemTable (key, value) VALUES (?, ?)`).run(
|
|
46
|
-
CHAT_KEY,
|
|
47
|
-
JSON.stringify({ tabs }),
|
|
48
|
-
);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
// ── Fixtures ──────────────────────────────────────────────────────────────────
|
|
52
|
-
|
|
53
|
-
let tmp: string;
|
|
54
|
-
let userDir: string;
|
|
55
|
-
let wsStorageDir: string;
|
|
56
|
-
let adapter: WindsurfAdapter;
|
|
57
|
-
|
|
58
|
-
beforeEach(() => {
|
|
59
|
-
tmp = mkdtempSync(join(tmpdir(), "nlm-windsurf-"));
|
|
60
|
-
userDir = join(tmp, "Windsurf", "User");
|
|
61
|
-
wsStorageDir = join(userDir, "workspaceStorage");
|
|
62
|
-
mkdirSync(wsStorageDir, { recursive: true });
|
|
63
|
-
adapter = new WindsurfAdapter({ userDir });
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
afterEach(() => {
|
|
67
|
-
rmSync(tmp, { recursive: true, force: true });
|
|
68
|
-
});
|
|
69
|
-
|
|
70
|
-
function addWorkspace(name: string, tabs: Tab[]): string {
|
|
71
|
-
const wsDir = join(wsStorageDir, name);
|
|
72
|
-
mkdirSync(wsDir, { recursive: true });
|
|
73
|
-
const dbPath = join(wsDir, "state.vscdb");
|
|
74
|
-
const db = createWorkspaceDb(dbPath);
|
|
75
|
-
writeChatData(db, tabs);
|
|
76
|
-
db.close();
|
|
77
|
-
return dbPath;
|
|
78
|
-
}
|
|
79
|
-
|
|
80
|
-
// ── detect() ─────────────────────────────────────────────────────────────────
|
|
81
|
-
|
|
82
|
-
describe("detect()", () => {
|
|
83
|
-
it("returns enabled when userDir exists", () => {
|
|
84
|
-
const result = adapter.detect();
|
|
85
|
-
expect(result.enabled).toBe(true);
|
|
86
|
-
expect(result.path).toBe(userDir);
|
|
87
|
-
expect(result.hint).toBeNull();
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
it("returns disabled when userDir is absent", () => {
|
|
91
|
-
const noDir = new WindsurfAdapter({ userDir: join(tmp, "nonexistent") });
|
|
92
|
-
const result = noDir.detect();
|
|
93
|
-
expect(result.enabled).toBe(false);
|
|
94
|
-
expect(result.path).toBeNull();
|
|
95
|
-
expect(result.hint).toMatch(/Windsurf/);
|
|
96
|
-
});
|
|
97
|
-
});
|
|
98
|
-
|
|
99
|
-
// ── discover() ───────────────────────────────────────────────────────────────
|
|
100
|
-
|
|
101
|
-
describe("discover()", () => {
|
|
102
|
-
it("returns empty array when workspaceStorage has no DBs", async () => {
|
|
103
|
-
expect(await adapter.discover()).toEqual([]);
|
|
104
|
-
});
|
|
105
|
-
|
|
106
|
-
it("returns prefixed tabIds across workspace DBs", async () => {
|
|
107
|
-
addWorkspace("ws1", [
|
|
108
|
-
{ tabId: "tab-aaa", bubbles: [{ type: "user", text: "Hello" }] },
|
|
109
|
-
]);
|
|
110
|
-
addWorkspace("ws2", [
|
|
111
|
-
{ tabId: "tab-bbb", bubbles: [{ type: "user", text: "Hi" }] },
|
|
112
|
-
{ tabId: "tab-ccc", bubbles: [{ type: "ai", text: "Bye" }] },
|
|
113
|
-
]);
|
|
114
|
-
|
|
115
|
-
const ids = await adapter.discover();
|
|
116
|
-
expect(ids.sort()).toEqual(["ws_tab-aaa", "ws_tab-bbb", "ws_tab-ccc"].sort());
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
it("skips tabs with no bubbles", async () => {
|
|
120
|
-
addWorkspace("ws1", [
|
|
121
|
-
{ tabId: "empty-tab", bubbles: [] },
|
|
122
|
-
{ tabId: "good-tab", bubbles: [{ type: "user", text: "Hello" }] },
|
|
123
|
-
]);
|
|
124
|
-
|
|
125
|
-
const ids = await adapter.discover();
|
|
126
|
-
expect(ids).toEqual(["ws_good-tab"]);
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
it("deduplicates tabIds appearing in multiple workspaces", async () => {
|
|
130
|
-
// Edge case: same tab ID in two workspace DBs (migration artifact)
|
|
131
|
-
addWorkspace("ws1", [{ tabId: "dup-tab", bubbles: [{ type: "user", text: "A" }] }]);
|
|
132
|
-
addWorkspace("ws2", [{ tabId: "dup-tab", bubbles: [{ type: "user", text: "B" }] }]);
|
|
133
|
-
|
|
134
|
-
const ids = await adapter.discover();
|
|
135
|
-
expect(ids.filter((id) => id === "ws_dup-tab").length).toBe(1);
|
|
136
|
-
});
|
|
137
|
-
|
|
138
|
-
it("filters by since using lastSendTime", async () => {
|
|
139
|
-
const old = Date.now() - 10 * 24 * 3600_000;
|
|
140
|
-
const recent = Date.now();
|
|
141
|
-
addWorkspace("ws1", [
|
|
142
|
-
{ tabId: "old-tab", lastSendTime: old, bubbles: [{ type: "user", text: "Old" }] },
|
|
143
|
-
{ tabId: "new-tab", lastSendTime: recent, bubbles: [{ type: "user", text: "New" }] },
|
|
144
|
-
]);
|
|
145
|
-
|
|
146
|
-
const cutoff = new Date(Date.now() - 5 * 24 * 3600_000);
|
|
147
|
-
const ids = await adapter.discover({ since: cutoff });
|
|
148
|
-
expect(ids).toEqual(["ws_new-tab"]);
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
it("includes tab with lastSendTime=0 even when since is set (zero means unknown age)", async () => {
|
|
152
|
-
addWorkspace("ws1", [
|
|
153
|
-
{ tabId: "zero-ts-tab", lastSendTime: 0, bubbles: [{ type: "user", text: "Hi" }] },
|
|
154
|
-
]);
|
|
155
|
-
|
|
156
|
-
const cutoff = new Date(); // very recent cutoff that would exclude everything with a real ts
|
|
157
|
-
const ids = await adapter.discover({ since: cutoff });
|
|
158
|
-
expect(ids).toContain("ws_zero-ts-tab");
|
|
159
|
-
});
|
|
160
|
-
});
|
|
161
|
-
|
|
162
|
-
// ── parseSession() ────────────────────────────────────────────────────────────
|
|
163
|
-
|
|
164
|
-
describe("parseSession()", () => {
|
|
165
|
-
it("returns null for unknown tabId", async () => {
|
|
166
|
-
addWorkspace("ws1", [{ tabId: "real-tab", bubbles: [{ type: "user", text: "Hello" }] }]);
|
|
167
|
-
expect(await adapter.parseSession("ghost-tab")).toBeNull();
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
it("returns null for tab with no usable bubbles", async () => {
|
|
171
|
-
addWorkspace("ws1", [{ tabId: "empty-tab", bubbles: [] }]);
|
|
172
|
-
expect(await adapter.parseSession("empty-tab")).toBeNull();
|
|
173
|
-
});
|
|
174
|
-
|
|
175
|
-
it("extracts user and assistant turns", async () => {
|
|
176
|
-
addWorkspace("ws1", [
|
|
177
|
-
{
|
|
178
|
-
tabId: "chat-tab",
|
|
179
|
-
chatTitle: "My chat",
|
|
180
|
-
bubbles: [
|
|
181
|
-
{ type: "user", text: "Hello AI" },
|
|
182
|
-
{ type: "ai", rawText: "Hi human" },
|
|
183
|
-
],
|
|
184
|
-
},
|
|
185
|
-
]);
|
|
186
|
-
|
|
187
|
-
const chunk = await adapter.parseSession("chat-tab");
|
|
188
|
-
expect(chunk).not.toBeNull();
|
|
189
|
-
expect(chunk!.turnCount).toBe(2);
|
|
190
|
-
expect(chunk!.text).toContain("user: Hello AI");
|
|
191
|
-
expect(chunk!.text).toContain("assistant: Hi human");
|
|
192
|
-
});
|
|
193
|
-
|
|
194
|
-
it("prefers rawText over text for bubble content", async () => {
|
|
195
|
-
addWorkspace("ws1", [
|
|
196
|
-
{
|
|
197
|
-
tabId: "rawtext-tab",
|
|
198
|
-
bubbles: [
|
|
199
|
-
{ type: "user", rawText: "Raw question", text: "text version" },
|
|
200
|
-
],
|
|
201
|
-
},
|
|
202
|
-
]);
|
|
203
|
-
|
|
204
|
-
const chunk = await adapter.parseSession("rawtext-tab");
|
|
205
|
-
expect(chunk!.text).toContain("Raw question");
|
|
206
|
-
expect(chunk!.text).not.toContain("text version");
|
|
207
|
-
});
|
|
208
|
-
|
|
209
|
-
it("uses chatTitle as label", async () => {
|
|
210
|
-
addWorkspace("ws1", [
|
|
211
|
-
{
|
|
212
|
-
tabId: "titled-tab",
|
|
213
|
-
chatTitle: "Refactoring session",
|
|
214
|
-
bubbles: [{ type: "user", text: "Let's refactor" }],
|
|
215
|
-
},
|
|
216
|
-
]);
|
|
217
|
-
|
|
218
|
-
const chunk = await adapter.parseSession("titled-tab");
|
|
219
|
-
expect(chunk!.label).toBe("Refactoring session");
|
|
220
|
-
});
|
|
221
|
-
|
|
222
|
-
it("falls back to first user turn as label when chatTitle is absent", async () => {
|
|
223
|
-
addWorkspace("ws1", [
|
|
224
|
-
{
|
|
225
|
-
tabId: "notitle-tab",
|
|
226
|
-
bubbles: [{ type: "user", text: "What is a monad?" }],
|
|
227
|
-
},
|
|
228
|
-
]);
|
|
229
|
-
|
|
230
|
-
const chunk = await adapter.parseSession("notitle-tab");
|
|
231
|
-
expect(chunk!.label).toBe("What is a monad?");
|
|
232
|
-
});
|
|
233
|
-
|
|
234
|
-
it("sets correct id prefix and runtimeSessionId", async () => {
|
|
235
|
-
addWorkspace("ws1", [
|
|
236
|
-
{ tabId: "id-check", bubbles: [{ type: "user", text: "Hello" }] },
|
|
237
|
-
]);
|
|
238
|
-
|
|
239
|
-
const chunk = await adapter.parseSession("id-check");
|
|
240
|
-
expect(chunk!.runtimeSessionId).toBe("id-check");
|
|
241
|
-
expect(chunk!.id).toMatch(/^ws_/);
|
|
242
|
-
});
|
|
243
|
-
|
|
244
|
-
it("sets sourcePath to dbPath::tabId", async () => {
|
|
245
|
-
const dbPath = addWorkspace("ws1", [
|
|
246
|
-
{ tabId: "path-tab", bubbles: [{ type: "user", text: "Hello" }] },
|
|
247
|
-
]);
|
|
248
|
-
|
|
249
|
-
const chunk = await adapter.parseSession("path-tab");
|
|
250
|
-
expect(chunk!.sourcePath).toBe(`${dbPath}::path-tab`);
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
it("finds tab in second workspace when not in first", async () => {
|
|
254
|
-
addWorkspace("ws1", [{ tabId: "tab-ws1", bubbles: [{ type: "user", text: "In ws1" }] }]);
|
|
255
|
-
addWorkspace("ws2", [{ tabId: "tab-ws2", bubbles: [{ type: "user", text: "In ws2" }] }]);
|
|
256
|
-
|
|
257
|
-
const chunk = await adapter.parseSession("tab-ws2");
|
|
258
|
-
expect(chunk).not.toBeNull();
|
|
259
|
-
expect(chunk!.text).toContain("In ws2");
|
|
260
|
-
});
|
|
261
|
-
|
|
262
|
-
it("skips bubbles with empty text", async () => {
|
|
263
|
-
addWorkspace("ws1", [
|
|
264
|
-
{
|
|
265
|
-
tabId: "sparse-tab",
|
|
266
|
-
bubbles: [
|
|
267
|
-
{ type: "user", text: "" },
|
|
268
|
-
{ type: "user", text: "Real question" },
|
|
269
|
-
{ type: "ai", text: "Real answer" },
|
|
270
|
-
],
|
|
271
|
-
},
|
|
272
|
-
]);
|
|
273
|
-
|
|
274
|
-
const chunk = await adapter.parseSession("sparse-tab");
|
|
275
|
-
expect(chunk!.turnCount).toBe(2);
|
|
276
|
-
});
|
|
277
|
-
|
|
278
|
-
it("populates byteRange[1] equal to transcript byte length", async () => {
|
|
279
|
-
addWorkspace("ws1", [
|
|
280
|
-
{
|
|
281
|
-
tabId: "bytes-tab",
|
|
282
|
-
bubbles: [
|
|
283
|
-
{ type: "user", text: "Hello" },
|
|
284
|
-
{ type: "ai", text: "Hi" },
|
|
285
|
-
],
|
|
286
|
-
},
|
|
287
|
-
]);
|
|
288
|
-
|
|
289
|
-
const chunk = await adapter.parseSession("bytes-tab");
|
|
290
|
-
expect(chunk!.byteRange[1]).toBe(Buffer.byteLength(chunk!.text, "utf8"));
|
|
291
|
-
});
|
|
292
|
-
});
|
|
293
|
-
|
|
294
|
-
// ── global DB agent sessions (wsg_) ──────────────────────────────────────────
|
|
295
|
-
|
|
296
|
-
describe("global DB agent sessions (wsg_)", () => {
|
|
297
|
-
let globalDb: Database.Database;
|
|
298
|
-
let globalDbPath: string;
|
|
299
|
-
|
|
300
|
-
beforeEach(() => {
|
|
301
|
-
const globalDir = join(userDir, "globalStorage");
|
|
302
|
-
mkdirSync(globalDir, { recursive: true });
|
|
303
|
-
globalDbPath = join(globalDir, "state.vscdb");
|
|
304
|
-
globalDb = new Database(globalDbPath);
|
|
305
|
-
});
|
|
306
|
-
|
|
307
|
-
afterEach(() => {
|
|
308
|
-
try { globalDb.close(); } catch { /* already closed */ }
|
|
309
|
-
});
|
|
310
|
-
|
|
311
|
-
function addCursorDiskKVSession(
|
|
312
|
-
composerId: string,
|
|
313
|
-
opts: { name?: string; createdAt?: string; lastUpdatedAt?: string; conversation?: Array<{ type?: number; role?: string; text: string }> } = {},
|
|
314
|
-
): void {
|
|
315
|
-
globalDb.exec(`CREATE TABLE IF NOT EXISTS cursorDiskKV (key TEXT PRIMARY KEY, value TEXT);`);
|
|
316
|
-
const data = {
|
|
317
|
-
composerId,
|
|
318
|
-
name: opts.name,
|
|
319
|
-
createdAt: opts.createdAt ?? new Date(Date.now() - 3600_000).toISOString(),
|
|
320
|
-
lastUpdatedAt: opts.lastUpdatedAt ?? new Date().toISOString(),
|
|
321
|
-
conversation: opts.conversation ?? [],
|
|
322
|
-
};
|
|
323
|
-
globalDb.prepare(`INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)`).run(
|
|
324
|
-
`composerData:${composerId}`,
|
|
325
|
-
JSON.stringify(data),
|
|
326
|
-
);
|
|
327
|
-
globalDb.close();
|
|
328
|
-
}
|
|
329
|
-
|
|
330
|
-
function addItemTableSession(
|
|
331
|
-
composerId: string,
|
|
332
|
-
opts: { name?: string; conversation?: Array<{ type?: number; role?: string; text: string }> } = {},
|
|
333
|
-
): void {
|
|
334
|
-
globalDb.exec(`CREATE TABLE IF NOT EXISTS ItemTable (key TEXT PRIMARY KEY, value TEXT);`);
|
|
335
|
-
const data = {
|
|
336
|
-
composerId,
|
|
337
|
-
name: opts.name,
|
|
338
|
-
conversation: opts.conversation ?? [],
|
|
339
|
-
};
|
|
340
|
-
// Use an agent-style key so the fallback LIKE query matches
|
|
341
|
-
globalDb.prepare(`INSERT INTO ItemTable (key, value) VALUES (?, ?)`).run(
|
|
342
|
-
`cascade:${composerId}`,
|
|
343
|
-
JSON.stringify(data),
|
|
344
|
-
);
|
|
345
|
-
globalDb.close();
|
|
346
|
-
}
|
|
347
|
-
|
|
348
|
-
it("discover() returns wsg_ ids from cursorDiskKV global sessions", async () => {
|
|
349
|
-
addCursorDiskKVSession("agent-aaa", {
|
|
350
|
-
conversation: [{ type: 1, text: "Hello" }],
|
|
351
|
-
});
|
|
352
|
-
|
|
353
|
-
const ids = await adapter.discover();
|
|
354
|
-
expect(ids).toContain("wsg_agent-aaa");
|
|
355
|
-
});
|
|
356
|
-
|
|
357
|
-
it("discover() returns wsg_ ids from ItemTable fallback", async () => {
|
|
358
|
-
addItemTableSession("flow-bbb", {
|
|
359
|
-
conversation: [{ role: "user", text: "Hi" }],
|
|
360
|
-
});
|
|
361
|
-
|
|
362
|
-
const ids = await adapter.discover();
|
|
363
|
-
expect(ids).toContain("wsg_flow-bbb");
|
|
364
|
-
});
|
|
365
|
-
|
|
366
|
-
it("parseSession(wsg_<id>) extracts turns via cursorDiskKV", async () => {
|
|
367
|
-
addCursorDiskKVSession("agent-parse", {
|
|
368
|
-
name: "My flow",
|
|
369
|
-
conversation: [
|
|
370
|
-
{ type: 1, text: "Build a widget" },
|
|
371
|
-
{ type: 2, text: "Built!" },
|
|
372
|
-
],
|
|
373
|
-
});
|
|
374
|
-
|
|
375
|
-
const chunk = await adapter.parseSession("wsg_agent-parse");
|
|
376
|
-
expect(chunk).not.toBeNull();
|
|
377
|
-
expect(chunk!.turnCount).toBe(2);
|
|
378
|
-
expect(chunk!.text).toContain("user: Build a widget");
|
|
379
|
-
expect(chunk!.text).toContain("assistant: Built!");
|
|
380
|
-
expect(chunk!.label).toBe("My flow");
|
|
381
|
-
expect(chunk!.id).toMatch(/^wsg_/);
|
|
382
|
-
expect(chunk!.runtimeSessionId).toBe("agent-parse");
|
|
383
|
-
});
|
|
384
|
-
|
|
385
|
-
it("parseSession(wsg_<id>) returns null when conversation is empty", async () => {
|
|
386
|
-
addCursorDiskKVSession("empty-agent", { conversation: [] });
|
|
387
|
-
expect(await adapter.parseSession("wsg_empty-agent")).toBeNull();
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
it("discover() wsg_ filters by since using lastUpdatedAt", async () => {
|
|
391
|
-
const recentDb = new Database(globalDbPath);
|
|
392
|
-
recentDb.exec(`CREATE TABLE IF NOT EXISTS cursorDiskKV (key TEXT PRIMARY KEY, value TEXT);`);
|
|
393
|
-
const old = new Date(Date.now() - 10 * 24 * 3600_000).toISOString();
|
|
394
|
-
const recent = new Date().toISOString();
|
|
395
|
-
const oldData = { composerId: "old-agent", lastUpdatedAt: old, conversation: [{ type: 1, text: "Old" }] };
|
|
396
|
-
const newData = { composerId: "new-agent", lastUpdatedAt: recent, conversation: [{ type: 1, text: "New" }] };
|
|
397
|
-
recentDb.prepare(`INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)`).run("composerData:old-agent", JSON.stringify(oldData));
|
|
398
|
-
recentDb.prepare(`INSERT INTO cursorDiskKV (key, value) VALUES (?, ?)`).run("composerData:new-agent", JSON.stringify(newData));
|
|
399
|
-
recentDb.close();
|
|
400
|
-
|
|
401
|
-
const cutoff = new Date(Date.now() - 5 * 24 * 3600_000);
|
|
402
|
-
const ids = await adapter.discover({ since: cutoff });
|
|
403
|
-
expect(ids).not.toContain("wsg_old-agent");
|
|
404
|
-
expect(ids).toContain("wsg_new-agent");
|
|
405
|
-
});
|
|
406
|
-
});
|
|
407
|
-
|
|
408
|
-
// ── metadata ──────────────────────────────────────────────────────────────────
|
|
409
|
-
|
|
410
|
-
describe("adapter metadata", () => {
|
|
411
|
-
it("has correct name, runtimeVersion, and transcriptKind", () => {
|
|
412
|
-
expect(adapter.name).toBe("windsurf");
|
|
413
|
-
expect(adapter.runtimeVersion).toBe("windsurf/1.0");
|
|
414
|
-
expect(adapter.transcriptKind).toBe("windsurf-sqlite");
|
|
415
|
-
});
|
|
416
|
-
});
|
|
@@ -1,126 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* coerceClassifyResult — defensive parser over raw LLM JSON output. Focuses
|
|
3
|
-
* on the Phase B.2 facts[] additions; existing fields are covered by the
|
|
4
|
-
* end-to-end OllamaClient tests.
|
|
5
|
-
*/
|
|
6
|
-
|
|
7
|
-
import { describe, expect, it } from "vitest";
|
|
8
|
-
import {
|
|
9
|
-
CLASSIFIER_SYSTEM_PROMPT,
|
|
10
|
-
PREDICATE_VOCABULARY,
|
|
11
|
-
coerceClassifyResult,
|
|
12
|
-
} from "../../../../src/core/classifier/prompt.js";
|
|
13
|
-
|
|
14
|
-
describe("coerceClassifyResult — facts", () => {
|
|
15
|
-
function baseFields() {
|
|
16
|
-
return {
|
|
17
|
-
label: "L",
|
|
18
|
-
summary: "S",
|
|
19
|
-
entities: [],
|
|
20
|
-
decisions: [],
|
|
21
|
-
open: [],
|
|
22
|
-
confidence: 0.8,
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
it("returns an empty facts array when the key is missing entirely", () => {
|
|
27
|
-
expect(coerceClassifyResult(baseFields()).facts).toEqual([]);
|
|
28
|
-
});
|
|
29
|
-
|
|
30
|
-
it("returns an empty facts array when facts is not an array", () => {
|
|
31
|
-
expect(
|
|
32
|
-
coerceClassifyResult({ ...baseFields(), facts: "not-an-array" }).facts,
|
|
33
|
-
).toEqual([]);
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
it("normalizes subject + predicate to lowercase and trims value", () => {
|
|
37
|
-
const out = coerceClassifyResult({
|
|
38
|
-
...baseFields(),
|
|
39
|
-
facts: [
|
|
40
|
-
{ kind: "decision", subject: "NLM-Memory-TS", predicate: "Framework", value: " Hono " },
|
|
41
|
-
],
|
|
42
|
-
});
|
|
43
|
-
expect(out.facts).toEqual([
|
|
44
|
-
{ kind: "decision", subject: "nlm-memory-ts", predicate: "framework", value: "Hono" },
|
|
45
|
-
]);
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
it("drops facts with predicates outside the closed vocabulary (no 'other' escape hatch)", () => {
|
|
49
|
-
const out = coerceClassifyResult({
|
|
50
|
-
...baseFields(),
|
|
51
|
-
facts: [
|
|
52
|
-
{ kind: "decision", subject: "x", predicate: "color-of-the-bikeshed", value: "blue" },
|
|
53
|
-
{ kind: "decision", subject: "x", predicate: "framework", value: "Hono" },
|
|
54
|
-
],
|
|
55
|
-
});
|
|
56
|
-
expect(out.facts.map((f) => f.predicate)).toEqual(["framework"]);
|
|
57
|
-
});
|
|
58
|
-
|
|
59
|
-
it("PREDICATE_VOCABULARY does not include 'other'", () => {
|
|
60
|
-
// Removed in Phase B.5 after pilot showed `other` was 43% of writes and
|
|
61
|
-
// almost all slop. Off-vocab facts now get dropped by the coercer rather
|
|
62
|
-
// than forced into a catch-all bucket.
|
|
63
|
-
expect(PREDICATE_VOCABULARY).not.toContain("other");
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
it("drops facts missing required fields (subject, predicate, value)", () => {
|
|
67
|
-
const out = coerceClassifyResult({
|
|
68
|
-
...baseFields(),
|
|
69
|
-
facts: [
|
|
70
|
-
{ kind: "decision", subject: "", predicate: "framework", value: "Hono" },
|
|
71
|
-
{ kind: "decision", subject: "x", predicate: "", value: "Hono" },
|
|
72
|
-
{ kind: "decision", subject: "x", predicate: "framework", value: "" },
|
|
73
|
-
{ kind: "decision", subject: "good", predicate: "framework", value: "Hono" },
|
|
74
|
-
],
|
|
75
|
-
});
|
|
76
|
-
expect(out.facts).toEqual([
|
|
77
|
-
{ kind: "decision", subject: "good", predicate: "framework", value: "Hono" },
|
|
78
|
-
]);
|
|
79
|
-
});
|
|
80
|
-
|
|
81
|
-
it("drops facts with an invalid kind", () => {
|
|
82
|
-
const out = coerceClassifyResult({
|
|
83
|
-
...baseFields(),
|
|
84
|
-
facts: [
|
|
85
|
-
{ kind: "garbage", subject: "x", predicate: "framework", value: "Hono" },
|
|
86
|
-
{ kind: "attribute", subject: "x", predicate: "framework", value: "Hono" },
|
|
87
|
-
],
|
|
88
|
-
});
|
|
89
|
-
expect(out.facts.map((f) => f.kind)).toEqual(["attribute"]);
|
|
90
|
-
});
|
|
91
|
-
|
|
92
|
-
it("clamps sourceQuote to 500 chars and trims whitespace", () => {
|
|
93
|
-
const long = " ".repeat(10) + "a".repeat(600) + " ".repeat(10);
|
|
94
|
-
const out = coerceClassifyResult({
|
|
95
|
-
...baseFields(),
|
|
96
|
-
facts: [
|
|
97
|
-
{ kind: "decision", subject: "x", predicate: "framework", value: "Hono", sourceQuote: long },
|
|
98
|
-
],
|
|
99
|
-
});
|
|
100
|
-
expect(out.facts[0]?.sourceQuote).toBe("a".repeat(500));
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
it("omits sourceQuote when blank or non-string", () => {
|
|
104
|
-
const out = coerceClassifyResult({
|
|
105
|
-
...baseFields(),
|
|
106
|
-
facts: [
|
|
107
|
-
{ kind: "decision", subject: "a", predicate: "framework", value: "v", sourceQuote: " " },
|
|
108
|
-
{ kind: "decision", subject: "b", predicate: "framework", value: "v", sourceQuote: 42 },
|
|
109
|
-
],
|
|
110
|
-
});
|
|
111
|
-
expect(out.facts[0]?.sourceQuote).toBeUndefined();
|
|
112
|
-
expect(out.facts[1]?.sourceQuote).toBeUndefined();
|
|
113
|
-
});
|
|
114
|
-
});
|
|
115
|
-
|
|
116
|
-
describe("CLASSIFIER_SYSTEM_PROMPT", () => {
|
|
117
|
-
it("includes the facts field in the requested JSON shape", () => {
|
|
118
|
-
expect(CLASSIFIER_SYSTEM_PROMPT).toContain('"facts"');
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
it("inlines the predicate vocabulary so the LLM sees the closed list", () => {
|
|
122
|
-
for (const p of PREDICATE_VOCABULARY) {
|
|
123
|
-
expect(CLASSIFIER_SYSTEM_PROMPT).toContain(p);
|
|
124
|
-
}
|
|
125
|
-
});
|
|
126
|
-
});
|
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
import { describe, expect, it } from "vitest";
|
|
2
|
-
import {
|
|
3
|
-
chunkSessionText,
|
|
4
|
-
MAX_CHUNK_CHARS,
|
|
5
|
-
OVERLAP_CHARS,
|
|
6
|
-
} from "../../../../src/core/embedding/chunk-body.js";
|
|
7
|
-
|
|
8
|
-
describe("chunkSessionText", () => {
|
|
9
|
-
it("returns empty array when label, summary, and body are all blank", () => {
|
|
10
|
-
expect(chunkSessionText({})).toEqual([]);
|
|
11
|
-
expect(chunkSessionText({ label: "", summary: " ", body: "" })).toEqual([]);
|
|
12
|
-
});
|
|
13
|
-
|
|
14
|
-
it("returns a header-only chunk when body is empty", () => {
|
|
15
|
-
const chunks = chunkSessionText({ label: "Meeting notes", summary: "Q4 plan" });
|
|
16
|
-
expect(chunks).toEqual(["Meeting notes Q4 plan"]);
|
|
17
|
-
});
|
|
18
|
-
|
|
19
|
-
it("returns one chunk when header + body fits in maxChars", () => {
|
|
20
|
-
const chunks = chunkSessionText(
|
|
21
|
-
{ label: "L", summary: "S", body: "hello world" },
|
|
22
|
-
{ maxChars: 100, overlap: 10 },
|
|
23
|
-
);
|
|
24
|
-
expect(chunks).toEqual(["L S hello world"]);
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
it("splits body into multiple chunks with overlap when over maxChars", () => {
|
|
28
|
-
// body 250 chars, maxChars=100, overlap=20, no header → step=80
|
|
29
|
-
// chunk 0: body[0..100], chunk 1: body[80..180], chunk 2: body[160..250]
|
|
30
|
-
const body = "x".repeat(250);
|
|
31
|
-
const chunks = chunkSessionText(
|
|
32
|
-
{ body },
|
|
33
|
-
{ maxChars: 100, overlap: 20 },
|
|
34
|
-
);
|
|
35
|
-
expect(chunks).toHaveLength(3);
|
|
36
|
-
expect(chunks[0]!.length).toBe(100);
|
|
37
|
-
expect(chunks[1]!.length).toBe(100);
|
|
38
|
-
expect(chunks[2]!.length).toBe(90);
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
it("preserves overlap content between adjacent chunks", () => {
|
|
42
|
-
// Recognisable letters so we can confirm the boundary overlaps.
|
|
43
|
-
const body =
|
|
44
|
-
"A".repeat(50) +
|
|
45
|
-
"B".repeat(50) +
|
|
46
|
-
"C".repeat(50) +
|
|
47
|
-
"D".repeat(50); // 200 chars
|
|
48
|
-
const chunks = chunkSessionText(
|
|
49
|
-
{ body },
|
|
50
|
-
{ maxChars: 80, overlap: 20 },
|
|
51
|
-
);
|
|
52
|
-
// chunk 0: body[0..80] → AAA...AAA BBB...BBB BB (50 A + 30 B)
|
|
53
|
-
// chunk 1: body[60..140] → 40 B + 40 C overlapping the last 20 B from chunk 0
|
|
54
|
-
expect(chunks[0]!.slice(-10)).toBe("B".repeat(10));
|
|
55
|
-
expect(chunks[1]!.slice(0, 20)).toBe("B".repeat(20)); // overlap
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
it("accounts for header in first-chunk budget", () => {
|
|
59
|
-
const header = "h".repeat(20);
|
|
60
|
-
const body = "b".repeat(200);
|
|
61
|
-
const chunks = chunkSessionText(
|
|
62
|
-
{ label: header, body },
|
|
63
|
-
{ maxChars: 100, overlap: 10 },
|
|
64
|
-
);
|
|
65
|
-
// First chunk: 20-char header + space + body[0..79] = 100 chars total
|
|
66
|
-
// Second chunk: body[69..169] (90 chars body budget - 10 overlap from start of body)
|
|
67
|
-
expect(chunks[0]!.startsWith(header + " ")).toBe(true);
|
|
68
|
-
expect(chunks[0]!.length).toBeLessThanOrEqual(100);
|
|
69
|
-
});
|
|
70
|
-
|
|
71
|
-
it("respects defaults (MAX_CHUNK_CHARS, OVERLAP_CHARS) when no opts passed", () => {
|
|
72
|
-
const body = "y".repeat(MAX_CHUNK_CHARS * 2 + 1000);
|
|
73
|
-
const chunks = chunkSessionText({ body });
|
|
74
|
-
expect(chunks.length).toBeGreaterThan(1);
|
|
75
|
-
for (const c of chunks) {
|
|
76
|
-
expect(c.length).toBeLessThanOrEqual(MAX_CHUNK_CHARS);
|
|
77
|
-
}
|
|
78
|
-
// Overlap default sanity: consecutive chunks should share OVERLAP_CHARS
|
|
79
|
-
expect(OVERLAP_CHARS).toBeGreaterThan(0);
|
|
80
|
-
});
|
|
81
|
-
|
|
82
|
-
it("throws on invalid options", () => {
|
|
83
|
-
expect(() => chunkSessionText({ body: "x" }, { maxChars: 0 })).toThrow();
|
|
84
|
-
expect(() => chunkSessionText({ body: "x" }, { maxChars: 100, overlap: -1 })).toThrow();
|
|
85
|
-
expect(() => chunkSessionText({ body: "x" }, { maxChars: 100, overlap: 100 })).toThrow();
|
|
86
|
-
});
|
|
87
|
-
|
|
88
|
-
it("trims whitespace at chunk boundaries", () => {
|
|
89
|
-
const body = "alpha " + "z".repeat(200);
|
|
90
|
-
const chunks = chunkSessionText({ body }, { maxChars: 100, overlap: 20 });
|
|
91
|
-
for (const c of chunks) {
|
|
92
|
-
expect(c).toBe(c.trim());
|
|
93
|
-
}
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
it("returns at least one chunk for tiny non-empty input", () => {
|
|
97
|
-
expect(chunkSessionText({ body: "x" })).toEqual(["x"]);
|
|
98
|
-
expect(chunkSessionText({ label: "x" })).toEqual(["x"]);
|
|
99
|
-
});
|
|
100
|
-
});
|