context-mode 1.0.88 → 1.0.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/.openclaw-plugin/openclaw.plugin.json +1 -1
- package/.openclaw-plugin/package.json +1 -1
- package/README.md +184 -60
- package/build/adapters/antigravity/index.d.ts +3 -5
- package/build/adapters/antigravity/index.js +7 -35
- package/build/adapters/base.d.ts +27 -0
- package/build/adapters/base.js +59 -0
- package/build/adapters/claude-code/index.d.ts +9 -25
- package/build/adapters/claude-code/index.js +27 -141
- package/build/adapters/claude-code-base.d.ts +49 -0
- package/build/adapters/claude-code-base.js +113 -0
- package/build/adapters/client-map.js +5 -0
- package/build/adapters/codex/hooks.d.ts +21 -14
- package/build/adapters/codex/hooks.js +22 -15
- package/build/adapters/codex/index.d.ts +6 -10
- package/build/adapters/codex/index.js +13 -43
- package/build/adapters/copilot-base.d.ts +78 -0
- package/build/adapters/copilot-base.js +281 -0
- package/build/adapters/cursor/index.d.ts +3 -5
- package/build/adapters/cursor/index.js +6 -34
- package/build/adapters/detect.d.ts +7 -0
- package/build/adapters/detect.js +57 -56
- package/build/adapters/gemini-cli/index.d.ts +3 -5
- package/build/adapters/gemini-cli/index.js +7 -35
- package/build/adapters/jetbrains-copilot/config.d.ts +8 -0
- package/build/adapters/jetbrains-copilot/config.js +8 -0
- package/build/adapters/jetbrains-copilot/hooks.d.ts +51 -0
- package/build/adapters/jetbrains-copilot/hooks.js +82 -0
- package/build/adapters/jetbrains-copilot/index.d.ts +24 -0
- package/build/adapters/jetbrains-copilot/index.js +119 -0
- package/build/adapters/kiro/hooks.d.ts +14 -0
- package/build/adapters/kiro/hooks.js +23 -0
- package/build/adapters/kiro/index.d.ts +3 -5
- package/build/adapters/kiro/index.js +10 -38
- package/build/adapters/openclaw/index.d.ts +3 -4
- package/build/adapters/openclaw/index.js +6 -22
- package/build/adapters/opencode/index.d.ts +2 -3
- package/build/adapters/opencode/index.js +5 -16
- package/build/adapters/qwen-code/index.d.ts +39 -0
- package/build/adapters/qwen-code/index.js +199 -0
- package/build/adapters/types.d.ts +1 -1
- package/build/adapters/vscode-copilot/index.d.ts +16 -46
- package/build/adapters/vscode-copilot/index.js +29 -320
- package/build/adapters/zed/index.d.ts +3 -5
- package/build/adapters/zed/index.js +7 -35
- package/build/cli.js +113 -47
- package/build/lifecycle.d.ts +23 -0
- package/build/lifecycle.js +54 -13
- package/build/opencode-plugin.d.ts +19 -7
- package/build/opencode-plugin.js +19 -7
- package/build/pi-extension.js +24 -7
- package/build/runtime.js +24 -9
- package/build/security.d.ts +17 -1
- package/build/security.js +40 -6
- package/build/server.js +129 -21
- package/build/session/analytics.d.ts +8 -7
- package/build/session/analytics.js +95 -75
- package/build/session/db.d.ts +10 -1
- package/build/session/db.js +67 -8
- package/build/session/extract.js +10 -2
- package/build/session/project-attribution.d.ts +73 -0
- package/build/session/project-attribution.js +231 -0
- package/build/store.d.ts +7 -0
- package/build/store.js +117 -18
- package/build/truncate.d.ts +6 -0
- package/build/truncate.js +51 -29
- package/build/types.d.ts +8 -0
- package/cli.bundle.mjs +157 -136
- package/configs/antigravity/GEMINI.md +31 -36
- package/configs/claude-code/CLAUDE.md +31 -37
- package/configs/codex/AGENTS.md +35 -49
- package/configs/cursor/context-mode.mdc +24 -25
- package/configs/gemini-cli/GEMINI.md +30 -36
- package/configs/jetbrains-copilot/copilot-instructions.md +59 -0
- package/configs/jetbrains-copilot/hooks.json +16 -0
- package/configs/jetbrains-copilot/mcp.json +8 -0
- package/configs/kilo/AGENTS.md +30 -36
- package/configs/kiro/KIRO.md +30 -36
- package/configs/kiro/agent.json +1 -1
- package/configs/openclaw/AGENTS.md +30 -36
- package/configs/opencode/AGENTS.md +30 -36
- package/configs/pi/AGENTS.md +31 -36
- package/configs/qwen-code/QWEN.md +63 -0
- package/configs/vscode-copilot/copilot-instructions.md +30 -36
- package/configs/zed/AGENTS.md +31 -36
- package/hooks/codex/posttooluse.mjs +7 -7
- package/hooks/codex/pretooluse.mjs +3 -3
- package/hooks/codex/sessionstart.mjs +2 -1
- package/hooks/core/formatters.mjs +24 -0
- package/hooks/core/routing.mjs +40 -15
- package/hooks/core/tool-naming.mjs +2 -0
- package/hooks/cursor/posttooluse.mjs +7 -7
- package/hooks/cursor/pretooluse.mjs +3 -3
- package/hooks/cursor/sessionstart.mjs +2 -1
- package/hooks/cursor/stop.mjs +2 -2
- package/hooks/ensure-deps.mjs +22 -10
- package/hooks/gemini-cli/aftertool.mjs +8 -8
- package/hooks/gemini-cli/beforetool.mjs +3 -2
- package/hooks/gemini-cli/precompress.mjs +2 -2
- package/hooks/gemini-cli/sessionstart.mjs +12 -4
- package/hooks/jetbrains-copilot/posttooluse.mjs +61 -0
- package/hooks/jetbrains-copilot/precompact.mjs +54 -0
- package/hooks/jetbrains-copilot/pretooluse.mjs +27 -0
- package/hooks/jetbrains-copilot/sessionstart.mjs +119 -0
- package/hooks/kiro/posttooluse.mjs +6 -7
- package/hooks/kiro/pretooluse.mjs +3 -2
- package/hooks/posttooluse.mjs +8 -8
- package/hooks/precompact.mjs +3 -4
- package/hooks/pretooluse.mjs +43 -20
- package/hooks/routing-block.mjs +35 -33
- package/hooks/session-attribution.bundle.mjs +1 -0
- package/hooks/session-db.bundle.mjs +27 -8
- package/hooks/session-extract.bundle.mjs +2 -1
- package/hooks/session-helpers.mjs +44 -3
- package/hooks/session-loaders.mjs +37 -0
- package/hooks/session-snapshot.bundle.mjs +14 -14
- package/hooks/sessionstart.mjs +5 -5
- package/hooks/userpromptsubmit.mjs +26 -9
- package/hooks/vscode-copilot/posttooluse.mjs +8 -8
- package/hooks/vscode-copilot/precompact.mjs +2 -2
- package/hooks/vscode-copilot/pretooluse.mjs +3 -2
- package/hooks/vscode-copilot/sessionstart.mjs +2 -2
- package/insight/server.mjs +262 -32
- package/insight/src/lib/api.ts +2 -1
- package/insight/src/routes/index.tsx +16 -3
- package/insight/src/routes/search.tsx +1 -1
- package/openclaw.plugin.json +1 -1
- package/package.json +11 -2
- package/server.bundle.mjs +117 -99
- package/skills/ctx-insight/SKILL.md +1 -1
package/build/store.js
CHANGED
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
var _a;
|
|
11
11
|
import { loadDatabase, applyWALPragmas, closeDB, cleanOrphanedWALFiles, withRetry, deleteDBFiles, isSQLiteCorruptionError } from "./db-base.js";
|
|
12
12
|
import { readFileSync, readdirSync, unlinkSync, existsSync, statSync } from "node:fs";
|
|
13
|
+
import { createHash } from "node:crypto";
|
|
13
14
|
import { tmpdir } from "node:os";
|
|
14
15
|
import { join } from "node:path";
|
|
15
16
|
// ─────────────────────────────────────────────────────────
|
|
@@ -33,12 +34,30 @@ const STOPWORDS = new Set([
|
|
|
33
34
|
// ─────────────────────────────────────────────────────────
|
|
34
35
|
// Helpers
|
|
35
36
|
// ─────────────────────────────────────────────────────────
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
/**
|
|
38
|
+
* Remove case-insensitive duplicate tokens while preserving the first
|
|
39
|
+
* occurrence's original casing. FTS5's unicode61 tokenizer lowercases on
|
|
40
|
+
* both sides, so `"Error" OR "error"` produces no extra recall — just
|
|
41
|
+
* redundant index lookups. Dedup keeps the compiled query minimal.
|
|
42
|
+
*/
|
|
43
|
+
function dedupeTokens(tokens) {
|
|
44
|
+
const seen = new Set();
|
|
45
|
+
const out = [];
|
|
46
|
+
for (const t of tokens) {
|
|
47
|
+
const key = t.toLowerCase();
|
|
48
|
+
if (!seen.has(key)) {
|
|
49
|
+
seen.add(key);
|
|
50
|
+
out.push(t);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
export function sanitizeQuery(query, mode = "AND") {
|
|
56
|
+
const words = dedupeTokens(query
|
|
38
57
|
.replace(/['"(){}[\]*:^~]/g, " ")
|
|
39
58
|
.split(/\s+/)
|
|
40
59
|
.filter((w) => w.length > 0 &&
|
|
41
|
-
!["AND", "OR", "NOT", "NEAR"].includes(w.toUpperCase()));
|
|
60
|
+
!["AND", "OR", "NOT", "NEAR"].includes(w.toUpperCase())));
|
|
42
61
|
if (words.length === 0)
|
|
43
62
|
return '""';
|
|
44
63
|
// Filter stopwords to improve BM25 ranking — common terms like "update",
|
|
@@ -48,11 +67,11 @@ function sanitizeQuery(query, mode = "AND") {
|
|
|
48
67
|
const final = meaningful.length > 0 ? meaningful : words;
|
|
49
68
|
return final.map((w) => `"${w}"`).join(mode === "OR" ? " OR " : " ");
|
|
50
69
|
}
|
|
51
|
-
function sanitizeTrigramQuery(query, mode = "AND") {
|
|
70
|
+
export function sanitizeTrigramQuery(query, mode = "AND") {
|
|
52
71
|
const cleaned = query.replace(/["'(){}[\]*:^~]/g, "").trim();
|
|
53
72
|
if (cleaned.length < 3)
|
|
54
73
|
return "";
|
|
55
|
-
const words = cleaned.split(/\s+/).filter((w) => w.length >= 3);
|
|
74
|
+
const words = dedupeTokens(cleaned.split(/\s+/).filter((w) => w.length >= 3));
|
|
56
75
|
if (words.length === 0)
|
|
57
76
|
return "";
|
|
58
77
|
const meaningful = words.filter((w) => !STOPWORDS.has(w.toLowerCase()));
|
|
@@ -280,6 +299,13 @@ export class ContentStore {
|
|
|
280
299
|
// search performance. SQLite's built-in 'optimize' merges b-tree segments.
|
|
281
300
|
#insertCount = 0;
|
|
282
301
|
static OPTIMIZE_EVERY = 50;
|
|
302
|
+
// Fuzzy correction cache (process-local LRU). fuzzyCorrect() hits the vocab
|
|
303
|
+
// DB and runs levenshtein against every candidate within length tolerance,
|
|
304
|
+
// which is CPU-linear in |candidates|. Repeated queries ("erro", "erro" …)
|
|
305
|
+
// recompute the same answer. The vocabulary table is insert-only, so cache
|
|
306
|
+
// entries only become stale when new words enter — we clear on actual insert.
|
|
307
|
+
#fuzzyCache = new Map();
|
|
308
|
+
static FUZZY_CACHE_SIZE = 256;
|
|
283
309
|
constructor(dbPath) {
|
|
284
310
|
const Database = loadDatabase();
|
|
285
311
|
this.#dbPath =
|
|
@@ -332,7 +358,9 @@ export class ContentStore {
|
|
|
332
358
|
label TEXT NOT NULL,
|
|
333
359
|
chunk_count INTEGER NOT NULL DEFAULT 0,
|
|
334
360
|
code_chunk_count INTEGER NOT NULL DEFAULT 0,
|
|
335
|
-
indexed_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
361
|
+
indexed_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
362
|
+
file_path TEXT,
|
|
363
|
+
content_hash TEXT
|
|
336
364
|
);
|
|
337
365
|
|
|
338
366
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks USING fts5(
|
|
@@ -357,11 +385,20 @@ export class ContentStore {
|
|
|
357
385
|
|
|
358
386
|
CREATE INDEX IF NOT EXISTS idx_sources_label ON sources(label);
|
|
359
387
|
`);
|
|
388
|
+
// Stale detection columns — safe for existing DBs (ALTER is O(1) in SQLite)
|
|
389
|
+
try {
|
|
390
|
+
this.#db.exec("ALTER TABLE sources ADD COLUMN file_path TEXT");
|
|
391
|
+
}
|
|
392
|
+
catch { /* already exists */ }
|
|
393
|
+
try {
|
|
394
|
+
this.#db.exec("ALTER TABLE sources ADD COLUMN content_hash TEXT");
|
|
395
|
+
}
|
|
396
|
+
catch { /* already exists */ }
|
|
360
397
|
}
|
|
361
398
|
#prepareStatements() {
|
|
362
399
|
// Write path
|
|
363
|
-
this.#stmtInsertSourceEmpty = this.#db.prepare("INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, 0, 0)");
|
|
364
|
-
this.#stmtInsertSource = this.#db.prepare("INSERT INTO sources (label, chunk_count, code_chunk_count) VALUES (?, ?, ?)");
|
|
400
|
+
this.#stmtInsertSourceEmpty = this.#db.prepare("INSERT INTO sources (label, chunk_count, code_chunk_count, file_path, content_hash) VALUES (?, 0, 0, ?, ?)");
|
|
401
|
+
this.#stmtInsertSource = this.#db.prepare("INSERT INTO sources (label, chunk_count, code_chunk_count, file_path, content_hash) VALUES (?, ?, ?, ?, ?)");
|
|
365
402
|
this.#stmtInsertChunk = this.#db.prepare("INSERT INTO chunks (title, content, source_id, content_type) VALUES (?, ?, ?, ?)");
|
|
366
403
|
this.#stmtInsertChunkTrigram = this.#db.prepare("INSERT INTO chunks_trigram (title, content, source_id, content_type) VALUES (?, ?, ?, ?)");
|
|
367
404
|
this.#stmtInsertVocab = this.#db.prepare("INSERT OR IGNORE INTO vocabulary (word) VALUES (?)");
|
|
@@ -551,7 +588,7 @@ export class ContentStore {
|
|
|
551
588
|
ORDER BY c.rowid`);
|
|
552
589
|
this.#stmtSourceChunkCount = this.#db.prepare("SELECT chunk_count FROM sources WHERE id = ?");
|
|
553
590
|
this.#stmtChunkContent = this.#db.prepare("SELECT content FROM chunks WHERE source_id = ?");
|
|
554
|
-
this.#stmtSourceMeta = this.#db.prepare("SELECT label, chunk_count, code_chunk_count, indexed_at FROM sources WHERE label = ?");
|
|
591
|
+
this.#stmtSourceMeta = this.#db.prepare("SELECT label, chunk_count, code_chunk_count, indexed_at, file_path, content_hash FROM sources WHERE label = ?");
|
|
555
592
|
this.#stmtStats = this.#db.prepare(`
|
|
556
593
|
SELECT
|
|
557
594
|
(SELECT COUNT(*) FROM sources) AS sources,
|
|
@@ -572,7 +609,10 @@ export class ContentStore {
|
|
|
572
609
|
const text = content ?? readFileSync(path, "utf-8");
|
|
573
610
|
const label = source ?? path ?? "untitled";
|
|
574
611
|
const chunks = this.#chunkMarkdown(text);
|
|
575
|
-
|
|
612
|
+
// Stale detection: store file_path + SHA-256 for file-backed sources
|
|
613
|
+
const filePath = path ?? undefined;
|
|
614
|
+
const contentHash = filePath ? createHash("sha256").update(text).digest("hex") : undefined;
|
|
615
|
+
return withRetry(() => this.#insertChunks(chunks, label, text, filePath, contentHash));
|
|
576
616
|
}
|
|
577
617
|
// ── Index Plain Text ──
|
|
578
618
|
/**
|
|
@@ -619,7 +659,7 @@ export class ContentStore {
|
|
|
619
659
|
* into both FTS5 tables within a transaction and extracts vocabulary.
|
|
620
660
|
* Uses cached prepared statements from #prepareStatements().
|
|
621
661
|
*/
|
|
622
|
-
#insertChunks(chunks, label, text) {
|
|
662
|
+
#insertChunks(chunks, label, text, filePath, contentHash) {
|
|
623
663
|
const codeChunks = chunks.filter((c) => c.hasCode).length;
|
|
624
664
|
// Atomic dedup + insert: delete previous source with same label,
|
|
625
665
|
// then insert new content — all within a single transaction.
|
|
@@ -629,10 +669,10 @@ export class ContentStore {
|
|
|
629
669
|
this.#stmtDeleteChunksTrigramByLabel.run(label);
|
|
630
670
|
this.#stmtDeleteSourcesByLabel.run(label);
|
|
631
671
|
if (chunks.length === 0) {
|
|
632
|
-
const info = this.#stmtInsertSourceEmpty.run(label);
|
|
672
|
+
const info = this.#stmtInsertSourceEmpty.run(label, filePath ?? null, contentHash ?? null);
|
|
633
673
|
return Number(info.lastInsertRowid);
|
|
634
674
|
}
|
|
635
|
-
const info = this.#stmtInsertSource.run(label, chunks.length, codeChunks);
|
|
675
|
+
const info = this.#stmtInsertSource.run(label, chunks.length, codeChunks, filePath ?? null, contentHash ?? null);
|
|
636
676
|
const sourceId = Number(info.lastInsertRowid);
|
|
637
677
|
for (const chunk of chunks) {
|
|
638
678
|
const ct = chunk.hasCode ? "code" : "prose";
|
|
@@ -733,20 +773,38 @@ export class ContentStore {
|
|
|
733
773
|
const word = query.toLowerCase().trim();
|
|
734
774
|
if (word.length < 3)
|
|
735
775
|
return null;
|
|
776
|
+
// Cache hit: promote to tail (Map preserves insertion order → LRU).
|
|
777
|
+
if (this.#fuzzyCache.has(word)) {
|
|
778
|
+
const cached = this.#fuzzyCache.get(word) ?? null;
|
|
779
|
+
this.#fuzzyCache.delete(word);
|
|
780
|
+
this.#fuzzyCache.set(word, cached);
|
|
781
|
+
return cached;
|
|
782
|
+
}
|
|
736
783
|
const maxDist = maxEditDistance(word.length);
|
|
737
784
|
const candidates = this.#stmtFuzzyVocab.all(word.length - maxDist, word.length + maxDist);
|
|
738
785
|
let bestWord = null;
|
|
739
786
|
let bestDist = maxDist + 1;
|
|
787
|
+
let exactMatch = false;
|
|
740
788
|
for (const { word: candidate } of candidates) {
|
|
741
|
-
if (candidate === word)
|
|
742
|
-
|
|
789
|
+
if (candidate === word) {
|
|
790
|
+
exactMatch = true;
|
|
791
|
+
break;
|
|
792
|
+
}
|
|
743
793
|
const dist = levenshtein(word, candidate);
|
|
744
794
|
if (dist < bestDist) {
|
|
745
795
|
bestDist = dist;
|
|
746
796
|
bestWord = candidate;
|
|
747
797
|
}
|
|
748
798
|
}
|
|
749
|
-
|
|
799
|
+
const result = exactMatch ? null : bestDist <= maxDist ? bestWord : null;
|
|
800
|
+
// Evict the oldest entry before insert if we hit the size cap.
|
|
801
|
+
if (this.#fuzzyCache.size >= _a.FUZZY_CACHE_SIZE) {
|
|
802
|
+
const oldestKey = this.#fuzzyCache.keys().next().value;
|
|
803
|
+
if (oldestKey !== undefined)
|
|
804
|
+
this.#fuzzyCache.delete(oldestKey);
|
|
805
|
+
}
|
|
806
|
+
this.#fuzzyCache.set(word, result);
|
|
807
|
+
return result;
|
|
750
808
|
}
|
|
751
809
|
// ── Reciprocal Rank Fusion (Cormack et al. 2009) ──
|
|
752
810
|
#rrfSearch(query, limit, source, contentType, sourceMatchMode = "like") {
|
|
@@ -818,6 +876,8 @@ export class ContentStore {
|
|
|
818
876
|
}
|
|
819
877
|
// ── Unified Fallback Search ──
|
|
820
878
|
searchWithFallback(query, limit = 3, source, contentType, sourceMatchMode = "like") {
|
|
879
|
+
// Step 0: Auto-refresh stale file-backed sources before searching
|
|
880
|
+
this.#refreshStaleSources();
|
|
821
881
|
// Step 1: RRF fusion (porter OR + trigram OR → merge)
|
|
822
882
|
const rrfResults = this.#rrfSearch(query, limit, source, contentType, sourceMatchMode);
|
|
823
883
|
if (rrfResults.length > 0) {
|
|
@@ -844,12 +904,44 @@ export class ContentStore {
|
|
|
844
904
|
}
|
|
845
905
|
return [];
|
|
846
906
|
}
|
|
907
|
+
/** Number of sources auto-refreshed in the last searchWithFallback call. */
|
|
908
|
+
lastRefreshCount = 0;
|
|
909
|
+
/**
|
|
910
|
+
* Check all file-backed sources for staleness and auto re-index changed files.
|
|
911
|
+
* Uses mtime as a fast gate — only computes SHA-256 when mtime has advanced
|
|
912
|
+
* past indexed_at. Gracefully skips deleted files and non-file sources.
|
|
913
|
+
*/
|
|
914
|
+
#refreshStaleSources() {
|
|
915
|
+
this.lastRefreshCount = 0;
|
|
916
|
+
const sources = this.#db.prepare("SELECT label, file_path, content_hash, indexed_at FROM sources WHERE file_path IS NOT NULL").all();
|
|
917
|
+
for (const src of sources) {
|
|
918
|
+
try {
|
|
919
|
+
if (!existsSync(src.file_path))
|
|
920
|
+
continue; // file deleted — keep cached results
|
|
921
|
+
const mtime = statSync(src.file_path).mtime;
|
|
922
|
+
const indexedAt = new Date(src.indexed_at + "Z");
|
|
923
|
+
if (mtime <= indexedAt)
|
|
924
|
+
continue; // file unchanged — fast path
|
|
925
|
+
// mtime advanced — check hash to confirm real change (not just touch)
|
|
926
|
+
const newContent = readFileSync(src.file_path, "utf-8");
|
|
927
|
+
const newHash = createHash("sha256").update(newContent).digest("hex");
|
|
928
|
+
if (newHash === src.content_hash)
|
|
929
|
+
continue; // content identical — skip
|
|
930
|
+
// File genuinely changed — re-index
|
|
931
|
+
this.index({ path: src.file_path, source: src.label });
|
|
932
|
+
this.lastRefreshCount++;
|
|
933
|
+
}
|
|
934
|
+
catch {
|
|
935
|
+
// Graceful degradation — never break search for stale detection
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
}
|
|
847
939
|
// ── Sources ──
|
|
848
940
|
getSourceMeta(label) {
|
|
849
941
|
const row = this.#stmtSourceMeta.get(label);
|
|
850
942
|
if (!row)
|
|
851
943
|
return null;
|
|
852
|
-
return { label: row.label, chunkCount: row.chunk_count, codeChunkCount: row.code_chunk_count, indexedAt: row.indexed_at };
|
|
944
|
+
return { label: row.label, chunkCount: row.chunk_count, codeChunkCount: row.code_chunk_count, indexedAt: row.indexed_at, filePath: row.file_path ?? null, contentHash: row.content_hash ?? null };
|
|
853
945
|
}
|
|
854
946
|
listSources() {
|
|
855
947
|
return this.#stmtListSources.all();
|
|
@@ -955,11 +1047,18 @@ export class ContentStore {
|
|
|
955
1047
|
.split(/[^\p{L}\p{N}_-]+/u)
|
|
956
1048
|
.filter((w) => w.length >= 3 && !STOPWORDS.has(w));
|
|
957
1049
|
const unique = [...new Set(words)];
|
|
1050
|
+
let inserted = 0;
|
|
958
1051
|
this.#db.transaction(() => {
|
|
959
1052
|
for (const word of unique) {
|
|
960
|
-
this.#stmtInsertVocab.run(word);
|
|
1053
|
+
const info = this.#stmtInsertVocab.run(word);
|
|
1054
|
+
inserted += info.changes;
|
|
961
1055
|
}
|
|
962
1056
|
})();
|
|
1057
|
+
// Invalidate fuzzy cache when new vocab words actually land. INSERT OR
|
|
1058
|
+
// IGNORE reports changes=0 for duplicates, so re-indexing identical
|
|
1059
|
+
// content does not thrash the cache during iterative workflows.
|
|
1060
|
+
if (inserted > 0)
|
|
1061
|
+
this.#fuzzyCache.clear();
|
|
963
1062
|
}
|
|
964
1063
|
// ── Chunking ──
|
|
965
1064
|
#chunkMarkdown(text, maxChunkBytes = MAX_CHUNK_BYTES) {
|
package/build/truncate.d.ts
CHANGED
|
@@ -11,6 +11,9 @@
|
|
|
11
11
|
* "... [truncated]" is appended. The result is NOT guaranteed to be valid
|
|
12
12
|
* JSON after truncation — it is suitable only for display/logging.
|
|
13
13
|
*
|
|
14
|
+
* The returned string is always <= `maxBytes` bytes. When `maxBytes` is
|
|
15
|
+
* smaller than the marker, the marker itself is byte-safely truncated.
|
|
16
|
+
*
|
|
14
17
|
* @param value - Any JSON-serializable value.
|
|
15
18
|
* @param maxBytes - Maximum byte length of the returned string.
|
|
16
19
|
* @param indent - JSON indentation spaces (default 2). Pass 0 for compact.
|
|
@@ -30,6 +33,9 @@ export declare function escapeXML(str: string): string;
|
|
|
30
33
|
* byte-safe slice with an ellipsis appended. Useful for single-value fields
|
|
31
34
|
* (e.g., tool response strings) where head+tail splitting is not needed.
|
|
32
35
|
*
|
|
36
|
+
* The returned string is always <= `maxBytes` bytes. When `maxBytes` is
|
|
37
|
+
* smaller than the ellipsis marker, the marker itself is byte-safely truncated.
|
|
38
|
+
*
|
|
33
39
|
* @param str - Input string.
|
|
34
40
|
* @param maxBytes - Hard byte cap.
|
|
35
41
|
*/
|
package/build/truncate.js
CHANGED
|
@@ -6,6 +6,43 @@
|
|
|
6
6
|
* consumer can import them without pulling in the full store or executor.
|
|
7
7
|
*/
|
|
8
8
|
// ─────────────────────────────────────────────────────────
|
|
9
|
+
// Internal: byte-safe prefix
|
|
10
|
+
// ─────────────────────────────────────────────────────────
|
|
11
|
+
/**
|
|
12
|
+
* Return the longest character-prefix of `str` whose UTF-8 encoding is at
|
|
13
|
+
* most `maxBytes` bytes. Uses binary search to avoid O(n²) scanning. Returns
|
|
14
|
+
* "" when `maxBytes` is <= 0 so callers never exceed their budget.
|
|
15
|
+
*
|
|
16
|
+
* Guards against splitting a UTF-16 surrogate pair: if the prefix would end
|
|
17
|
+
* on a lone high surrogate, back off one code unit so the result round-trips
|
|
18
|
+
* through UTF-8 without producing a U+FFFD replacement character.
|
|
19
|
+
*/
|
|
20
|
+
function byteSafePrefix(str, maxBytes) {
|
|
21
|
+
if (maxBytes <= 0)
|
|
22
|
+
return "";
|
|
23
|
+
if (Buffer.byteLength(str) <= maxBytes)
|
|
24
|
+
return str;
|
|
25
|
+
let lo = 0;
|
|
26
|
+
let hi = str.length;
|
|
27
|
+
while (lo < hi) {
|
|
28
|
+
const mid = (lo + hi + 1) >> 1;
|
|
29
|
+
if (Buffer.byteLength(str.slice(0, mid)) <= maxBytes) {
|
|
30
|
+
lo = mid;
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
hi = mid - 1;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
// If we landed between a high and low surrogate, back off so the prefix
|
|
37
|
+
// ends on a valid code point boundary.
|
|
38
|
+
if (lo > 0) {
|
|
39
|
+
const code = str.charCodeAt(lo - 1);
|
|
40
|
+
if (code >= 0xd800 && code <= 0xdbff)
|
|
41
|
+
lo -= 1;
|
|
42
|
+
}
|
|
43
|
+
return str.slice(0, lo);
|
|
44
|
+
}
|
|
45
|
+
// ─────────────────────────────────────────────────────────
|
|
9
46
|
// JSON truncation
|
|
10
47
|
// ─────────────────────────────────────────────────────────
|
|
11
48
|
/**
|
|
@@ -14,6 +51,9 @@
|
|
|
14
51
|
* "... [truncated]" is appended. The result is NOT guaranteed to be valid
|
|
15
52
|
* JSON after truncation — it is suitable only for display/logging.
|
|
16
53
|
*
|
|
54
|
+
* The returned string is always <= `maxBytes` bytes. When `maxBytes` is
|
|
55
|
+
* smaller than the marker, the marker itself is byte-safely truncated.
|
|
56
|
+
*
|
|
17
57
|
* @param value - Any JSON-serializable value.
|
|
18
58
|
* @param maxBytes - Maximum byte length of the returned string.
|
|
19
59
|
* @param indent - JSON indentation spaces (default 2). Pass 0 for compact.
|
|
@@ -22,24 +62,13 @@ export function truncateJSON(value, maxBytes, indent = 2) {
|
|
|
22
62
|
const serialized = JSON.stringify(value, null, indent) ?? "null";
|
|
23
63
|
if (Buffer.byteLength(serialized) <= maxBytes)
|
|
24
64
|
return serialized;
|
|
25
|
-
// Find the largest character slice that stays within maxBytes once encoded.
|
|
26
|
-
// Buffer.byteLength is O(n) but we only call it once per truncation.
|
|
27
65
|
const marker = "... [truncated]";
|
|
28
66
|
const markerBytes = Buffer.byteLength(marker);
|
|
29
|
-
|
|
30
|
-
//
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
const mid = (lo + hi + 1) >> 1;
|
|
35
|
-
if (Buffer.byteLength(serialized.slice(0, mid)) <= budget) {
|
|
36
|
-
lo = mid;
|
|
37
|
-
}
|
|
38
|
-
else {
|
|
39
|
-
hi = mid - 1;
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
return serialized.slice(0, lo) + marker;
|
|
67
|
+
// Degenerate budget: can't fit serialized content + marker. Fit as much of
|
|
68
|
+
// the marker as we can so the return still honors `maxBytes`.
|
|
69
|
+
if (maxBytes <= markerBytes)
|
|
70
|
+
return byteSafePrefix(marker, maxBytes);
|
|
71
|
+
return byteSafePrefix(serialized, maxBytes - markerBytes) + marker;
|
|
43
72
|
}
|
|
44
73
|
// ─────────────────────────────────────────────────────────
|
|
45
74
|
// XML / HTML escaping
|
|
@@ -68,6 +97,9 @@ export function escapeXML(str) {
|
|
|
68
97
|
* byte-safe slice with an ellipsis appended. Useful for single-value fields
|
|
69
98
|
* (e.g., tool response strings) where head+tail splitting is not needed.
|
|
70
99
|
*
|
|
100
|
+
* The returned string is always <= `maxBytes` bytes. When `maxBytes` is
|
|
101
|
+
* smaller than the ellipsis marker, the marker itself is byte-safely truncated.
|
|
102
|
+
*
|
|
71
103
|
* @param str - Input string.
|
|
72
104
|
* @param maxBytes - Hard byte cap.
|
|
73
105
|
*/
|
|
@@ -76,17 +108,7 @@ export function capBytes(str, maxBytes) {
|
|
|
76
108
|
return str;
|
|
77
109
|
const marker = "...";
|
|
78
110
|
const markerBytes = Buffer.byteLength(marker);
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
while (lo < hi) {
|
|
83
|
-
const mid = (lo + hi + 1) >> 1;
|
|
84
|
-
if (Buffer.byteLength(str.slice(0, mid)) <= budget) {
|
|
85
|
-
lo = mid;
|
|
86
|
-
}
|
|
87
|
-
else {
|
|
88
|
-
hi = mid - 1;
|
|
89
|
-
}
|
|
90
|
-
}
|
|
91
|
-
return str.slice(0, lo) + marker;
|
|
111
|
+
if (maxBytes <= markerBytes)
|
|
112
|
+
return byteSafePrefix(marker, maxBytes);
|
|
113
|
+
return byteSafePrefix(str, maxBytes - markerBytes) + marker;
|
|
92
114
|
}
|
package/build/types.d.ts
CHANGED
|
@@ -29,6 +29,14 @@ export interface SessionEvent {
|
|
|
29
29
|
data: string;
|
|
30
30
|
priority: number;
|
|
31
31
|
data_hash: string;
|
|
32
|
+
/**
|
|
33
|
+
* Best-effort project attribution for this event.
|
|
34
|
+
* Empty string means unattributed/unknown.
|
|
35
|
+
*/
|
|
36
|
+
project_dir?: string;
|
|
37
|
+
attribution_source?: string;
|
|
38
|
+
/** 0..1 confidence score for project attribution. */
|
|
39
|
+
attribution_confidence?: number;
|
|
32
40
|
}
|
|
33
41
|
/**
|
|
34
42
|
* Result returned by PolyglotExecutor after running a code snippet.
|