@andespindola/brainlink 0.1.0-beta.12 → 0.1.0-beta.120

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/AGENTS.md +8 -5
  2. package/CHANGELOG.md +26 -2
  3. package/CONTRIBUTING.md +2 -2
  4. package/COPYRIGHT.md +5 -0
  5. package/README.md +138 -18
  6. package/SECURITY.md +1 -1
  7. package/dist/application/analyze-vault.js +1 -9
  8. package/dist/application/build-context.js +56 -1
  9. package/dist/application/dedupe-notes.js +226 -0
  10. package/dist/application/frontend/client-css.js +93 -45
  11. package/dist/application/frontend/client-html.js +34 -25
  12. package/dist/application/frontend/client-js.js +2637 -181
  13. package/dist/application/frontend/client-worker-js.js +66 -0
  14. package/dist/application/get-graph-layout.js +2 -2
  15. package/dist/application/get-graph-node.js +3 -3
  16. package/dist/application/get-graph-summary.js +3 -3
  17. package/dist/application/get-graph.js +3 -3
  18. package/dist/application/import-legacy-sqlite.js +296 -0
  19. package/dist/application/index-vault.js +250 -24
  20. package/dist/application/list-agents.js +3 -3
  21. package/dist/application/list-links.js +5 -5
  22. package/dist/application/offline-pack-backup.js +44 -0
  23. package/dist/application/search-graph-node-ids.js +3 -3
  24. package/dist/application/search-knowledge.js +6 -6
  25. package/dist/application/server/routes.js +90 -1
  26. package/dist/application/start-server.js +75 -4
  27. package/dist/application/watch-vault.js +23 -2
  28. package/dist/benchmarks/large-vault.js +1 -1
  29. package/dist/cli/commands/agent-commands.js +7 -0
  30. package/dist/cli/commands/write-commands.js +818 -8
  31. package/dist/domain/context.js +53 -11
  32. package/dist/domain/graph-layout.js +177 -3
  33. package/dist/domain/middle-out.js +18 -0
  34. package/dist/infrastructure/config.js +38 -0
  35. package/dist/infrastructure/file-index.js +358 -0
  36. package/dist/infrastructure/file-system-vault.js +15 -0
  37. package/dist/infrastructure/index-state.js +56 -0
  38. package/dist/infrastructure/private-pack-codec.js +71 -10
  39. package/dist/infrastructure/search-packs.js +313 -17
  40. package/dist/mcp/server.js +11 -1
  41. package/dist/mcp/tools.js +62 -0
  42. package/docs/AGENT_USAGE.md +96 -17
  43. package/docs/ARCHITECTURE.md +22 -27
  44. package/docs/QUICKSTART.md +7 -0
  45. package/package.json +6 -4
  46. package/dist/infrastructure/sqlite/document-writer.js +0 -51
  47. package/dist/infrastructure/sqlite/graph-reader.js +0 -267
  48. package/dist/infrastructure/sqlite/recovery.js +0 -83
  49. package/dist/infrastructure/sqlite/schema.js +0 -114
  50. package/dist/infrastructure/sqlite/search-reader.js +0 -188
  51. package/dist/infrastructure/sqlite/types.js +0 -1
  52. package/dist/infrastructure/sqlite-index.js +0 -38
@@ -18,7 +18,7 @@ The correct dependency direction is:
18
18
  agent -> Brainlink CLI -> Markdown vault + derived index
19
19
  ```
20
20
 
21
- Agents should never depend on the internal SQLite schema as a public API.
21
+ Agents should never depend on internal index persistence files as a public API.
22
22
 
23
23
  The installed CLI exposes two equivalent binaries:
24
24
 
@@ -52,6 +52,8 @@ Use `blink config where` and `blink config doctor` to inspect active paths and e
52
52
 
53
53
  You can also set `defaultAgent` in `brainlink.config.json` / `.brainlink.json` (for example `"defaultAgent": "coding-agent"`). When set, CLI commands and MCP calls reuse it when `--agent`/`agent` is not passed.
54
54
  You can set `agentProfiles` to define per-agent defaults for `defaultSearchMode`, `defaultSearchLimit` and `defaultContextTokens`.
55
+ You can tune search-pack compression with `searchPack.rowChunkSize`, `searchPack.compressionLevel` and `searchPack.useDictionary`.
56
+ Guardrails for benchmark acceptance are configured with `searchPack.guardrailMinSavingsPercent` and `searchPack.guardrailMaxLatencyRegressionPercent`.
55
57
 
56
58
  `autoIndexOnWrite` (default: `true`) controls whether `add` and MCP write tools index right after writing.
57
59
 
@@ -180,16 +182,16 @@ Required write behavior:
180
182
  Good linked note:
181
183
 
182
184
  ```bash
183
- blink add "SQLite Index Rebuild" \
185
+ blink add "Index Rebuild" \
184
186
  --agent coding-agent \
185
- --content "Legacy derived indexes without agent columns are rebuilt because SQLite is disposable. Related: [[Architecture]], [[Agent Namespaces]]. #sqlite #architecture #decision"
187
+ --content "Derived index artifacts are rebuildable and disposable. Related: [[Architecture]], [[Agent Namespaces]]. #index #architecture #decision"
186
188
  blink validate --agent coding-agent
187
189
  ```
188
190
 
189
191
  Poor disconnected note:
190
192
 
191
193
  ```bash
192
- blink add "SQLite Index Rebuild" \
194
+ blink add "Index Rebuild" \
193
195
  --agent coding-agent \
194
196
  --content "We rebuild old indexes now."
195
197
  ```
@@ -377,6 +379,18 @@ blink migrate-vault --from ~/.brainlink/vault --to ./team-vault --report ./migra
377
379
 
378
380
  Use `--dry-run` to preview `copied`, `conflicted`, `unchanged` before writing files.
379
381
 
382
+ ### Import Legacy SQLite DB
383
+
384
+ ```bash
385
+ blink db-import --vault ./team-vault
386
+ blink db-import --vault ./team-vault --db ./legacy/brainlink.db
387
+ blink db-import --vault ./team-vault --db ./legacy/brainlink.db --table legacy_notes --dry-run
388
+ ```
389
+
390
+ `db-import` migrates rows from legacy SQLite memory into Markdown notes in the current vault and indexes the result by default.
391
+ Without `--db`, Brainlink auto-detects common legacy database paths.
392
+ Use `--agent` to force namespace, `--limit` for staged migration, `--dry-run` to preview writes, and `--no-index` to postpone indexing.
393
+
380
394
  ### Install Agent Integration
381
395
 
382
396
  ```bash
@@ -390,6 +404,7 @@ blink agent status
390
404
  ```
391
405
 
392
406
  `agent install` configures Brainlink MCP in `~/.codex/config.toml` so compatible agents can use Brainlink by default.
407
+ `agent install` and `agent upgrade` automatically apply the `fully-auto` MCP bootstrap policy (`enforceBootstrap=true`, `enforceContextFirst=true`, `autoBootstrapOnRead=true`, `autoBootstrapOnStartup=true`) so all plug-and-play Brainlink features start enabled.
393
408
  Use `agent upgrade` on legacy installations to reapply the latest defaults and run self-test diagnostics.
394
409
  Use `agent policy --preset fully-auto` to keep startup/read auto-bootstrap enabled, or `agent policy --preset strict` to force explicit bootstrap calls.
395
410
 
@@ -417,6 +432,25 @@ This creates a slugged Markdown file with frontmatter and a heading.
417
432
 
418
433
  The CLI blocks common secret patterns by default. Do not use `--allow-sensitive` unless the vault is intentionally protected.
419
434
  Brainlink also auto-connects notes that have no `[[wiki links]]` by adding a fallback edge to an agent hub note, so new memory does not stay disconnected.
435
+ `add` also returns `possibleDuplicates` (exact hash + semantic candidates) so agents can decide duplicate resolution immediately.
436
+
437
+ ### Detect Duplicate Notes
438
+
439
+ ```bash
440
+ blink dedupe --vault ./vault --json
441
+ blink dedupe --vault ./vault --agent coding-agent --limit 20 --min-score 0.92 --json
442
+ blink dedupe --vault ./vault --no-semantic --json
443
+ ```
444
+
445
+ ### Resolve Duplicate Notes
446
+
447
+ ```bash
448
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action merge --json
449
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action link --json
450
+ blink dedupe-resolve --vault ./vault --left agents/shared/a.md --right agents/shared/b.md --action ignore --json
451
+ ```
452
+
453
+ `dedupe-resolve` keeps connectivity: non-merge actions still create a low-priority related edge (`#related-to`).
420
454
 
421
455
  For agent-private memory:
422
456
 
@@ -446,6 +480,37 @@ This scans Markdown files and rebuilds:
446
480
  - links
447
481
  - full-text search records
448
482
 
483
+ ### Benchmark Indexing Realtime
484
+
485
+ ```bash
486
+ blink bench --vault ./vault
487
+ blink bench --vault ./vault --watch
488
+ blink bench --vault ./vault --watch --debounce 500
489
+ blink bench --vault ./vault --json
490
+ ```
491
+
492
+ `bench` runs indexing with realtime phase events and prints a run summary with:
493
+
494
+ - indexed totals (documents, chunks, links)
495
+ - elapsed time and changed document count
496
+ - pack rebuild status and reason
497
+ - pack compression metrics (`inputBytes`, `outputBytes`, ratio/saved percentage)
498
+ - objective guardrails (`guardrailMinSavingsPercent`, `guardrailMaxLatencyRegressionPercent`)
499
+
500
+ Use `--watch` for continuous benchmark runs while editing notes. Watch mode is supported only for local filesystem vaults.
501
+ If pack manifest metadata is missing but encrypted `.blpk` files are present, Brainlink repairs manifest metadata before deciding rebuild policy to avoid unnecessary full repacks on small updates.
502
+
503
+ ### Create Offline Pack Backup
504
+
505
+ ```bash
506
+ blink pack-backup --vault ./vault
507
+ blink pack-backup --vault ./vault --output ./vault/.brainlink/backups/custom.blpkbak.gz
508
+ blink pack-backup --vault ./vault --json
509
+ ```
510
+
511
+ `pack-backup` creates an offline artifact with second-stage compression on top of encrypted `.blpk` packs.
512
+ This is outside the online retrieval path (`index`, `search`, `context`), which keeps a single compression stage.
513
+
449
514
  ### Search Knowledge
450
515
 
451
516
  ```bash
@@ -460,11 +525,12 @@ If `--mode`/`--limit` are omitted, Brainlink resolves those values from the acti
460
525
 
461
526
  Search modes:
462
527
 
463
- - `hybrid`: default; combines SQLite FTS and local embedding similarity.
464
- - `fts`: lexical SQLite full-text search only.
465
- - `semantic`: local deterministic embedding similarity with SQLite bucket candidate narrowing.
528
+ - `hybrid`: default; combines lexical matching and local embedding similarity.
529
+ - `fts`: lexical full-text matching only.
530
+ - `semantic`: local deterministic embedding similarity.
466
531
 
467
- Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/brainlink.db` changes.
532
+ Hybrid results are cached in-memory for a short TTL and invalidated when `.brainlink/index.json` changes.
533
+ Context assembly uses middle-out ordering inside each note: the highest-scoring chunk is selected first, then nearby chunks are expanded while token budget allows.
468
534
 
469
535
  ### Build Agent Context
470
536
 
@@ -523,15 +589,26 @@ shared: 30 documents
523
589
  ```bash
524
590
  blink server --host 127.0.0.1 --port 4321
525
591
  blink server --vault ./vault --host 127.0.0.1 --port 4321
592
+ blink server --vault ./vault --host 127.0.0.1 --port 4321 --no-open
526
593
  ```
527
594
 
528
595
  This starts a local frontend for inspecting the knowledge graph.
596
+ By default it tries to open the graph in a native desktop GUI window:
597
+ - macOS: Swift + WebKit
598
+ - Windows: PowerShell WinForms WebBrowser
599
+ - Linux: optional Python GTK + WebKit2 (requires `python3` + `gi` + `WebKit2`)
600
+
601
+ On Linux, native GUI is disabled by default for better startup performance. Enable it with `BRAINLINK_LINUX_NATIVE_GUI=1`.
602
+ If native GUI launch is unavailable, it falls back to dedicated app-window mode and then to the default browser.
603
+ Use `--no-open` to keep the server headless.
604
+ When native GUI is active, the GUI window closes automatically when the `blink server` process stops.
529
605
 
530
606
  Without `--vault`, the graph UI serves `$HOME/.brainlink/vault`.
531
607
 
532
- The frontend includes an agent selector. Selecting an agent calls the same read APIs with `agent=<agent-id>` and renders that namespace instead of merging every agent into one graph.
608
+ The frontend includes an agent selector that shows only the agent id. Selecting an agent calls the same read APIs with `agent=<agent-id>` and renders that namespace instead of merging every agent into one graph.
533
609
 
534
- Graph navigation controls include zoom in, zoom out, fit visible nodes and reset-to-fit-all nodes. Mouse wheel zoom is anchored to the cursor. Totals for notes, links and tags stay visible as floating metrics under the Brainlink title, and node details open on click in a modal (tags, outgoing links, backlinks and Markdown content).
610
+ Graph navigation controls include zoom in, zoom out, fit visible nodes and reset-to-fit-all nodes. Mouse wheel zoom (including `cmd+scroll` and `ctrl+scroll`) is anchored to the cursor. Keyboard shortcuts are `+` (zoom in), `-` (zoom out) and `0` (reset fit). Double-click on canvas zooms in at cursor position. Totals for notes, links and tags stay visible as floating metrics under the Brainlink title, and node details open on click in a modal (tags, outgoing links, backlinks and Markdown content). Vaults above 1000 notes also expose stable hierarchy groups of up to 1000 direct nodes, with recursive parent groups when a level exceeds 1000 groups; zoom-out renders those groups as normal graph nodes, and zoom-in expands the focused group into its child graph without drawing a background glow layer.
611
+ During graph filtering, Brainlink keeps hub context nodes visible (`Memory Hub`/`MOC`/high-degree fallback) so filtered views still show relationship anchors.
535
612
 
536
613
  The command reindexes by default, then serves:
537
614
 
@@ -589,6 +666,8 @@ Available MCP tools:
589
666
  - `brainlink_recommendations`
590
667
  - `brainlink_context`
591
668
  - `brainlink_search`
669
+ - `brainlink_dedupe`
670
+ - `brainlink_resolve_duplicate`
592
671
  - `brainlink_add_note`
593
672
  - `brainlink_add_file`
594
673
  - `brainlink_index`
@@ -634,8 +713,8 @@ GET /api/validate
634
713
 
635
714
  The HTTP API is read-only. Use the CLI for writes and indexing.
636
715
 
637
- Brainlink maintains an automatic SQLite rollback snapshot at `.brainlink/brainlink.db.backup`. When `.brainlink/brainlink.db` is corrupted, Brainlink restores from snapshot automatically or recreates a clean index if no snapshot exists yet.
638
- Indexing also writes private encrypted search packs at `.brainlink/search-packs/*.blpk`; when SQLite cannot be opened, Brainlink falls back to pack-based search automatically.
716
+ Indexing writes private encrypted search packs at `.brainlink/search-packs/*.blpk` for resilient retrieval and portability.
717
+ Pack search now uses compressed-space prefiltering (token bloom index per pack) before decrypting/reading pack payloads.
639
718
  Pack decryption keys are resolved from `$BRAINLINK_HOME/keys` (or `BRAINLINK_SEARCH_PACK_KEY` when explicitly set).
640
719
 
641
720
  ## Agent Integration Contract
@@ -669,9 +748,9 @@ Non-goals:
669
748
  ## Operational Rules
670
749
 
671
750
  - Re-run `index` after modifying notes.
672
- - Treat `.brainlink/brainlink.db` as disposable.
673
- - Commit Markdown notes, not local database files.
674
- - Do not manually edit the database.
751
+ - Treat `.brainlink/index.json` and `.brainlink/search-packs/` as disposable.
752
+ - Commit Markdown notes, not local index files.
753
+ - Do not manually edit generated index artifacts.
675
754
  - Keep generated context short enough for the target model.
676
755
  - Prefer specific queries over broad queries.
677
756
  - Write explicit `[[wiki links]]` when durable memory should be connected.
@@ -701,9 +780,9 @@ Weak retrieval usually means:
701
780
 
702
781
  ## Current Limits
703
782
 
704
- - Search supports FTS, local semantic embeddings, SQLite semantic buckets and hybrid ranking.
783
+ - Search supports FTS, local semantic embeddings and hybrid ranking.
705
784
  - Local embeddings are deterministic and provider-free; remote embedding providers are not implemented yet.
706
785
  - MCP integration is available through the `brainlink-mcp` stdio server.
707
786
  - HTTP API is local and unauthenticated.
708
- - Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use a local cache for SQLite indexes.
787
+ - Bucket vaults support S3-compatible `s3://bucket/prefix` URIs and use local cache/index artifacts.
709
788
  - Watch mode depends on platform filesystem watcher behavior and is only supported for local filesystem vaults.
@@ -8,7 +8,7 @@ CLI -> application use cases -> domain functions -> infrastructure adapters
8
8
 
9
9
  The core rule is simple:
10
10
 
11
- Domain code must not know about the CLI, filesystem, or SQLite.
11
+ Domain code must not know about the CLI, filesystem, or index persistence format.
12
12
 
13
13
  ## Modules
14
14
 
@@ -53,14 +53,11 @@ src/
53
53
  types.ts
54
54
 
55
55
  infrastructure/
56
- sqlite/
57
- document-writer.ts
58
- graph-reader.ts
59
- schema.ts
60
- search-reader.ts
56
+ file-index.ts
61
57
  file-system-vault.ts
58
+ private-pack-codec.ts
59
+ search-packs.ts
62
60
  session-state.ts
63
- sqlite-index.ts
64
61
 
65
62
  mcp/
66
63
  main.ts
@@ -80,7 +77,6 @@ The domain layer contains pure knowledge rules:
80
77
  - extract `#tags`
81
78
  - split documents into chunks
82
79
  - create deterministic local embeddings
83
- - create deterministic embedding buckets for semantic candidate retrieval
84
80
  - calculate cosine similarity
85
81
  - estimate token counts
86
82
  - select context sections
@@ -116,12 +112,11 @@ The infrastructure layer handles side effects:
116
112
  - mirroring S3-compatible bucket Markdown into a local cache
117
113
  - writing Markdown notes
118
114
  - creating `.brainlink`
119
- - writing and querying SQLite
120
- - running FTS, semantic and hybrid retrieval
121
- - narrowing semantic candidates through SQLite embedding buckets before cosine scoring
115
+ - writing and querying file-based indexes
116
+ - running lexical, semantic and hybrid retrieval
122
117
 
123
- SQLite is an index, not the canonical storage model. For bucket vaults, Markdown
124
- objects in the bucket remain canonical and SQLite is still local derived data.
118
+
119
+ Index artifacts are rebuildable and are not canonical storage. For bucket vaults, Markdown objects in the bucket remain canonical and local index files are derived data.
125
120
 
126
121
  ## Indexing Flow
127
122
 
@@ -132,11 +127,9 @@ read markdown files
132
127
  -> resolve links
133
128
  -> split chunks
134
129
  -> create chunk embeddings
135
- -> reset SQLite index
130
+ -> reset file index
136
131
  -> persist documents, chunks and links
137
- -> populate FTS records
138
- -> persist embedding vectors
139
- -> persist embedding buckets
132
+ -> persist chunks, links and embeddings in file index
140
133
  ```
141
134
 
142
135
  ## Retrieval Flow
@@ -145,8 +138,10 @@ read markdown files
145
138
  question
146
139
  -> selected mode: fts | semantic | hybrid
147
140
  -> optional query embedding
148
- -> FTS query and/or embedding bucket candidate lookup
141
+ -> optional compressed pack prefilter (token bloom)
142
+ -> lexical scoring and/or semantic cosine scoring
149
143
  -> cosine similarity over candidate chunks
144
+ -> middle-out context expansion around strongest chunk
150
145
  -> ranked chunks with textScore and semanticScore
151
146
  -> token-budget selection
152
147
  -> Markdown context package
@@ -163,7 +158,7 @@ server command
163
158
  -> browser renders graph canvas
164
159
  ```
165
160
 
166
- The graph UI is intentionally read-only. Markdown remains the write interface and SQLite remains a derived index.
161
+ The graph UI is intentionally read-only. Markdown remains the write interface and index artifacts remain derived data.
167
162
 
168
163
  ## HTTP API Flow
169
164
 
@@ -171,7 +166,7 @@ The graph UI is intentionally read-only. Markdown remains the write interface an
171
166
  HTTP request
172
167
  -> route handler
173
168
  -> application use case
174
- -> filesystem and SQLite adapters
169
+ -> filesystem and index adapters
175
170
  -> JSON response
176
171
  ```
177
172
 
@@ -282,11 +277,10 @@ vault/agents/<agent-id>/**/*.md
282
277
 
283
278
  Rebuildable:
284
279
 
285
- - `.brainlink/brainlink.db`
280
+ - `.brainlink/index.json`
281
+ - `.brainlink/search-packs/*.blpk`
286
282
  - `$BRAINLINK_HOME/bucket-cache`
287
- - FTS records
288
283
  - local embedding vectors
289
- - local embedding bucket index
290
284
  - chunks
291
285
  - resolved links
292
286
 
@@ -296,13 +290,14 @@ Rebuildable:
296
290
 
297
291
  Markdown keeps the system portable, inspectable, Git-friendly, and compatible with Obsidian-like workflows.
298
292
 
299
- ### SQLite As Local Index
293
+ ### File Index As Local Index
300
294
 
301
- SQLite gives fast local search, local vector storage and rebuildable retrieval without forcing users to run external infrastructure.
295
+ Brainlink uses a local JSON index plus encrypted pack exports for fast rebuildable retrieval without external infrastructure.
302
296
  Hybrid retrieval also uses a short-lived in-memory cache keyed by vault/query/agent and invalidated by index file mtime to reduce repeated query latency.
303
- Brainlink also writes a local rollback snapshot (`.brainlink/brainlink.db.backup`) after successful indexing. On corruption detection (`quick_check`/SQLite malformed errors), Brainlink restores from snapshot automatically before reopening the index.
304
- Indexing additionally exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks. Search falls back to these packs when SQLite is unavailable, preserving retrieval continuity in degraded mode.
297
+ Indexing exports private encrypted pack files (`.brainlink/search-packs/*.blpk`) from indexed chunks for fast retrieval and recovery continuity.
298
+ Pack manifests include compressed-space token bloom metadata so retrieval can skip unrelated packs before decryption.
305
299
  Pack encryption keys are resolved from `$BRAINLINK_HOME/keys` or from `BRAINLINK_SEARCH_PACK_KEY` when configured.
300
+ Legacy `.jsonl.gz` search packs are auto-upgraded to `.blpk` on first retrieval flow.
306
301
 
307
302
  ### CLI First
308
303
 
@@ -102,3 +102,10 @@ S3 target:
102
102
  ```bash
103
103
  blink migrate-vault --from ~/.brainlink/vault --to "s3://my-memory-bucket/brainlink" --dry-run
104
104
  ```
105
+
106
+ Legacy SQLite import:
107
+
108
+ ```bash
109
+ blink db-import --vault ./team-vault
110
+ blink db-import --vault ./team-vault --db ./legacy/brainlink.db --dry-run
111
+ ```
package/package.json CHANGED
@@ -1,10 +1,10 @@
1
1
  {
2
2
  "name": "@andespindola/brainlink",
3
- "version": "0.1.0-beta.12",
3
+ "version": "0.1.0-beta.120",
4
4
  "description": "Local-first knowledge memory for agents with Markdown, backlinks, indexing and context retrieval.",
5
5
  "type": "module",
6
6
  "license": "MIT",
7
- "author": "Anderson Espindola",
7
+ "author": "Substructa",
8
8
  "homepage": "https://github.com/andersonflima/brainlink#readme",
9
9
  "repository": {
10
10
  "type": "git",
@@ -32,6 +32,7 @@
32
32
  "dist",
33
33
  "assets",
34
34
  "README.md",
35
+ "COPYRIGHT.md",
35
36
  "LICENSE",
36
37
  "CHANGELOG.md",
37
38
  "CONTRIBUTING.md",
@@ -58,12 +59,13 @@
58
59
  "dependencies": {
59
60
  "@aws-sdk/client-s3": "^3.1038.0",
60
61
  "@modelcontextprotocol/sdk": "^1.29.0",
61
- "better-sqlite3": "^12.9.0",
62
62
  "commander": "^14.0.2",
63
63
  "zod": "^4.3.6"
64
64
  },
65
+ "overrides": {
66
+ "qs": "6.15.2"
67
+ },
65
68
  "devDependencies": {
66
- "@types/better-sqlite3": "^7.6.13",
67
69
  "@types/node": "^24.9.2",
68
70
  "tsx": "^4.21.0",
69
71
  "typescript": "^5.9.3",
@@ -1,51 +0,0 @@
1
- import { createEmbeddingBuckets } from '../../domain/embeddings.js';
2
- const toTitleKey = (title) => title.toLowerCase();
3
- export const createIndexWriter = (database) => ({
4
- reset: () => {
5
- database.exec(`
6
- DELETE FROM embedding_buckets;
7
- DELETE FROM chunks_fts;
8
- DELETE FROM links;
9
- DELETE FROM chunks;
10
- DELETE FROM documents;
11
- `);
12
- },
13
- saveDocuments: (documents) => {
14
- const insertDocument = database.prepare(`
15
- INSERT INTO documents (id, agent_id, title, path, content, tags_json, frontmatter_json, created_at, updated_at)
16
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
17
- `);
18
- const insertChunk = database.prepare(`
19
- INSERT INTO chunks (id, document_id, ordinal, content, token_count, embedding_provider, embedding_json)
20
- VALUES (?, ?, ?, ?, ?, ?, ?)
21
- `);
22
- const insertChunkFts = database.prepare(`
23
- INSERT INTO chunks_fts (chunk_id, document_id, agent_id, title, content)
24
- VALUES (?, ?, ?, ?, ?)
25
- `);
26
- const insertEmbeddingBucket = database.prepare(`
27
- INSERT OR IGNORE INTO embedding_buckets (bucket, chunk_id)
28
- VALUES (?, ?)
29
- `);
30
- const insertLink = database.prepare(`
31
- INSERT INTO links (from_document_id, to_title, to_title_key, to_document_id, weight, priority)
32
- VALUES (?, ?, ?, ?, ?, ?)
33
- `);
34
- const transaction = database.transaction(() => {
35
- documents.forEach(({ document, chunks, links }) => {
36
- insertDocument.run(document.id, document.agentId, document.title, document.path, document.content, JSON.stringify(document.tags), JSON.stringify(document.frontmatter), document.createdAt, document.updatedAt);
37
- chunks.forEach((chunk) => {
38
- insertChunk.run(chunk.id, chunk.documentId, chunk.ordinal, chunk.content, chunk.tokenCount, chunk.embeddingProvider, JSON.stringify(chunk.embedding));
39
- insertChunkFts.run(chunk.id, chunk.documentId, document.agentId, document.title, chunk.content);
40
- createEmbeddingBuckets(chunk.embedding).forEach((bucket) => insertEmbeddingBucket.run(bucket, chunk.id));
41
- });
42
- });
43
- documents.forEach(({ links }) => {
44
- links.forEach((link) => {
45
- insertLink.run(link.fromDocumentId, link.toTitle, toTitleKey(link.toTitle), link.toDocumentId, link.weight, link.priority);
46
- });
47
- });
48
- });
49
- transaction();
50
- }
51
- });