claude-transcript-viewer 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +215 -0
- package/dist/api/search.d.ts +50 -0
- package/dist/api/search.js +181 -0
- package/dist/api/search.js.map +1 -0
- package/dist/api/snippets.d.ts +2 -0
- package/dist/api/snippets.js +49 -0
- package/dist/api/snippets.js.map +1 -0
- package/dist/config.d.ts +14 -0
- package/dist/config.js +52 -0
- package/dist/config.js.map +1 -0
- package/dist/db/chunks.d.ts +14 -0
- package/dist/db/chunks.js +43 -0
- package/dist/db/chunks.js.map +1 -0
- package/dist/db/conversations.d.ts +16 -0
- package/dist/db/conversations.js +40 -0
- package/dist/db/conversations.js.map +1 -0
- package/dist/db/index.d.ts +6 -0
- package/dist/db/index.js +40 -0
- package/dist/db/index.js.map +1 -0
- package/dist/db/schema.d.ts +5 -0
- package/dist/db/schema.js +71 -0
- package/dist/db/schema.js.map +1 -0
- package/dist/embeddings/client.d.ts +16 -0
- package/dist/embeddings/client.js +155 -0
- package/dist/embeddings/client.js.map +1 -0
- package/dist/indexer/changeDetection.d.ts +16 -0
- package/dist/indexer/changeDetection.js +81 -0
- package/dist/indexer/changeDetection.js.map +1 -0
- package/dist/indexer/chunker.d.ts +5 -0
- package/dist/indexer/chunker.js +44 -0
- package/dist/indexer/chunker.js.map +1 -0
- package/dist/indexer/fileUtils.d.ts +2 -0
- package/dist/indexer/fileUtils.js +9 -0
- package/dist/indexer/fileUtils.js.map +1 -0
- package/dist/indexer/index.d.ts +19 -0
- package/dist/indexer/index.js +267 -0
- package/dist/indexer/index.js.map +1 -0
- package/dist/indexer/parser.d.ts +12 -0
- package/dist/indexer/parser.js +45 -0
- package/dist/indexer/parser.js.map +1 -0
- package/dist/server.d.ts +2 -0
- package/dist/server.js +1851 -0
- package/dist/server.js.map +1 -0
- package/package.json +62 -0
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { getDatabase } from './index.js';
|
|
2
|
+
export function insertConversation(c) {
|
|
3
|
+
getDatabase()
|
|
4
|
+
.prepare(`INSERT OR REPLACE INTO conversations
|
|
5
|
+
(id, project, title, created_at, file_path, content_hash, source_mtime)
|
|
6
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)`)
|
|
7
|
+
.run(c.id, c.project, c.title, c.created_at, c.file_path, c.content_hash, c.source_mtime);
|
|
8
|
+
}
|
|
9
|
+
export function getConversation(id) {
|
|
10
|
+
const result = getDatabase()
|
|
11
|
+
.prepare('SELECT * FROM conversations WHERE id = ?')
|
|
12
|
+
.get(id);
|
|
13
|
+
return result ?? null;
|
|
14
|
+
}
|
|
15
|
+
export function getConversationByPath(path) {
|
|
16
|
+
const result = getDatabase()
|
|
17
|
+
.prepare('SELECT * FROM conversations WHERE file_path = ?')
|
|
18
|
+
.get(path);
|
|
19
|
+
return result ?? null;
|
|
20
|
+
}
|
|
21
|
+
export function deleteConversation(id) {
|
|
22
|
+
getDatabase().prepare('DELETE FROM conversations WHERE id = ?').run(id);
|
|
23
|
+
}
|
|
24
|
+
export function listConversations(project) {
|
|
25
|
+
const db = getDatabase();
|
|
26
|
+
if (project) {
|
|
27
|
+
return db
|
|
28
|
+
.prepare('SELECT * FROM conversations WHERE project = ? ORDER BY created_at DESC')
|
|
29
|
+
.all(project);
|
|
30
|
+
}
|
|
31
|
+
return db
|
|
32
|
+
.prepare('SELECT * FROM conversations ORDER BY created_at DESC')
|
|
33
|
+
.all();
|
|
34
|
+
}
|
|
35
|
+
export function getRecentConversations(limit = 10) {
|
|
36
|
+
return getDatabase()
|
|
37
|
+
.prepare('SELECT * FROM conversations ORDER BY created_at DESC LIMIT ?')
|
|
38
|
+
.all(limit);
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=conversations.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"conversations.js","sourceRoot":"","sources":["../../src/db/conversations.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,YAAY,CAAC;AAazC,MAAM,UAAU,kBAAkB,CAAC,CAAe;IAChD,WAAW,EAAE;SACV,OAAO,CACN;;oCAE8B,CAC/B;SACA,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC,SAAS,EAAE,CAAC,CAAC,YAAY,EAAE,CAAC,CAAC,YAAY,CAAC,CAAC;AAC9F,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,EAAU;IACxC,MAAM,MAAM,GAAG,WAAW,EAAE;SACzB,OAAO,CAAC,0CAA0C,CAAC;SACnD,GAAG,CAAC,EAAE,CAA6B,CAAC;IACvC,OAAO,MAAM,IAAI,IAAI,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,MAAM,MAAM,GAAG,WAAW,EAAE;SACzB,OAAO,CAAC,iDAAiD,CAAC;SAC1D,GAAG,CAAC,IAAI,CAA6B,CAAC;IACzC,OAAO,MAAM,IAAI,IAAI,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,EAAU;IAC3C,WAAW,EAAE,CAAC,OAAO,CAAC,wCAAwC,CAAC,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;AAC1E,CAAC;AAED,MAAM,UAAU,iBAAiB,CAAC,OAAgB;IAChD,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,IAAI,OAAO,EAAE,CAAC;QACZ,OAAO,EAAE;aACN,OAAO,CAAC,wEAAwE,CAAC;aACjF,GAAG,CAAC,OAAO,CAAmB,CAAC;IACpC,CAAC;IACD,OAAO,EAAE;SACN,OAAO,CAAC,sDAAsD,CAAC;SAC/D,GAAG,EAAoB,CAAC;AAC7B,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,QAAgB,EAAE;IACvD,OAAO,WAAW,EAAE;SACjB,OAAO,CAAC,8DAA8D,CAAC;SACvE,GAAG,CAAC,KAAK,CAAmB,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
export declare function createDatabase(dbPath: string): Database.Database;
|
|
3
|
+
export declare function getDatabase(): Database.Database;
|
|
4
|
+
export declare function closeDatabase(): void;
|
|
5
|
+
export declare function setMetadata(key: string, value: string): void;
|
|
6
|
+
export declare function getMetadata(key: string): string | null;
|
package/dist/db/index.js
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import * as sqliteVec from 'sqlite-vec';
|
|
3
|
+
import { SCHEMA_SQL, SCHEMA_VERSION, FTS_TABLE_SQL, VEC_TABLE_SQL, TRIGGER_SQL } from './schema.js';
|
|
4
|
+
let db = null;
|
|
5
|
+
export function createDatabase(dbPath) {
|
|
6
|
+
if (db) {
|
|
7
|
+
db.close();
|
|
8
|
+
}
|
|
9
|
+
db = new Database(dbPath);
|
|
10
|
+
// Load sqlite-vec extension for vector search
|
|
11
|
+
sqliteVec.load(db);
|
|
12
|
+
db.pragma('journal_mode = WAL');
|
|
13
|
+
db.pragma('foreign_keys = ON');
|
|
14
|
+
db.exec(SCHEMA_SQL);
|
|
15
|
+
db.exec(FTS_TABLE_SQL);
|
|
16
|
+
db.exec(VEC_TABLE_SQL);
|
|
17
|
+
db.exec(TRIGGER_SQL);
|
|
18
|
+
db.prepare('INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)')
|
|
19
|
+
.run('schema_version', String(SCHEMA_VERSION));
|
|
20
|
+
return db;
|
|
21
|
+
}
|
|
22
|
+
export function getDatabase() {
|
|
23
|
+
if (!db)
|
|
24
|
+
throw new Error('Database not initialized. Call createDatabase first.');
|
|
25
|
+
return db;
|
|
26
|
+
}
|
|
27
|
+
export function closeDatabase() {
|
|
28
|
+
if (db) {
|
|
29
|
+
db.close();
|
|
30
|
+
db = null;
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
export function setMetadata(key, value) {
|
|
34
|
+
getDatabase().prepare('INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)').run(key, value);
|
|
35
|
+
}
|
|
36
|
+
export function getMetadata(key) {
|
|
37
|
+
const row = getDatabase().prepare('SELECT value FROM metadata WHERE key = ?').get(key);
|
|
38
|
+
return row?.value ?? null;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/db/index.ts"],"names":[],"mappings":"AAAA,OAAO,QAAQ,MAAM,gBAAgB,CAAC;AACtC,OAAO,KAAK,SAAS,MAAM,YAAY,CAAC;AACxC,OAAO,EAAE,UAAU,EAAE,cAAc,EAAE,aAAa,EAAE,aAAa,EAAE,WAAW,EAAE,MAAM,aAAa,CAAC;AAEpG,IAAI,EAAE,GAA6B,IAAI,CAAC;AAExC,MAAM,UAAU,cAAc,CAAC,MAAc;IAC3C,IAAI,EAAE,EAAE,CAAC;QACP,EAAE,CAAC,KAAK,EAAE,CAAC;IACb,CAAC;IACD,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;IAE1B,8CAA8C;IAC9C,SAAS,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAEnB,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;IAChC,EAAE,CAAC,MAAM,CAAC,mBAAmB,CAAC,CAAC;IAC/B,EAAE,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IACpB,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACvB,EAAE,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACvB,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACrB,EAAE,CAAC,OAAO,CAAC,4DAA4D,CAAC;SACrE,GAAG,CAAC,gBAAgB,EAAE,MAAM,CAAC,cAAc,CAAC,CAAC,CAAC;IACjD,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,WAAW;IACzB,IAAI,CAAC,EAAE;QAAE,MAAM,IAAI,KAAK,CAAC,sDAAsD,CAAC,CAAC;IACjF,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,UAAU,aAAa;IAC3B,IAAI,EAAE,EAAE,CAAC;QAAC,EAAE,CAAC,KAAK,EAAE,CAAC;QAAC,EAAE,GAAG,IAAI,CAAC;IAAC,CAAC;AACpC,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW,EAAE,KAAa;IACpD,WAAW,EAAE,CAAC,OAAO,CAAC,4DAA4D,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAC;AACtG,CAAC;AAED,MAAM,UAAU,WAAW,CAAC,GAAW;IACrC,MAAM,GAAG,GAAG,WAAW,EAAE,CAAC,OAAO,CAAC,0CAA0C,CAAC,CAAC,GAAG,CAAC,GAAG,CAAkC,CAAC;IACxH,OAAO,GAAG,EAAE,KAAK,IAAI,IAAI,CAAC;AAC5B,CAAC"}
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
export declare const SCHEMA_VERSION = 1;
|
|
2
|
+
export declare const SCHEMA_SQL = "\nCREATE TABLE IF NOT EXISTS metadata (\n key TEXT PRIMARY KEY,\n value TEXT\n);\n\nCREATE TABLE IF NOT EXISTS conversations (\n id TEXT PRIMARY KEY,\n project TEXT NOT NULL,\n title TEXT,\n created_at DATETIME,\n file_path TEXT NOT NULL,\n content_hash TEXT NOT NULL,\n source_mtime INTEGER NOT NULL,\n indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP\n);\n\nCREATE INDEX IF NOT EXISTS idx_conversations_project ON conversations(project);\nCREATE INDEX IF NOT EXISTS idx_conversations_created ON conversations(created_at);\n\nCREATE TABLE IF NOT EXISTS chunks (\n id INTEGER PRIMARY KEY,\n conversation_id TEXT NOT NULL,\n chunk_index INTEGER NOT NULL,\n page_number INTEGER,\n role TEXT NOT NULL,\n content TEXT NOT NULL,\n embedding BLOB,\n FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE\n);\n\nCREATE INDEX IF NOT EXISTS idx_chunks_conversation ON chunks(conversation_id);\nCREATE INDEX IF NOT EXISTS idx_chunks_role ON chunks(role);\n";
|
|
3
|
+
export declare const FTS_TABLE_SQL = "\nCREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(\n content,\n content='chunks',\n content_rowid='id',\n tokenize='trigram'\n);\n";
|
|
4
|
+
export declare const VEC_TABLE_SQL = "\nCREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(\n embedding float[1024]\n);\n";
|
|
5
|
+
export declare const TRIGGER_SQL = "\n-- FTS sync triggers\nCREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN\n INSERT INTO chunks_fts(rowid, content) VALUES (NEW.id, NEW.content);\n INSERT INTO chunks_vec(rowid, embedding) SELECT NEW.id, NEW.embedding WHERE NEW.embedding IS NOT NULL;\nEND;\n\nCREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.id, OLD.content);\n DELETE FROM chunks_vec WHERE rowid = OLD.id;\nEND;\n\nCREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN\n INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.id, OLD.content);\n INSERT INTO chunks_fts(rowid, content) VALUES (NEW.id, NEW.content);\n DELETE FROM chunks_vec WHERE rowid = OLD.id;\n INSERT INTO chunks_vec(rowid, embedding) SELECT NEW.id, NEW.embedding WHERE NEW.embedding IS NOT NULL;\nEND;\n";
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
export const SCHEMA_VERSION = 1;
|
|
2
|
+
export const SCHEMA_SQL = `
|
|
3
|
+
CREATE TABLE IF NOT EXISTS metadata (
|
|
4
|
+
key TEXT PRIMARY KEY,
|
|
5
|
+
value TEXT
|
|
6
|
+
);
|
|
7
|
+
|
|
8
|
+
CREATE TABLE IF NOT EXISTS conversations (
|
|
9
|
+
id TEXT PRIMARY KEY,
|
|
10
|
+
project TEXT NOT NULL,
|
|
11
|
+
title TEXT,
|
|
12
|
+
created_at DATETIME,
|
|
13
|
+
file_path TEXT NOT NULL,
|
|
14
|
+
content_hash TEXT NOT NULL,
|
|
15
|
+
source_mtime INTEGER NOT NULL,
|
|
16
|
+
indexed_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
CREATE INDEX IF NOT EXISTS idx_conversations_project ON conversations(project);
|
|
20
|
+
CREATE INDEX IF NOT EXISTS idx_conversations_created ON conversations(created_at);
|
|
21
|
+
|
|
22
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
23
|
+
id INTEGER PRIMARY KEY,
|
|
24
|
+
conversation_id TEXT NOT NULL,
|
|
25
|
+
chunk_index INTEGER NOT NULL,
|
|
26
|
+
page_number INTEGER,
|
|
27
|
+
role TEXT NOT NULL,
|
|
28
|
+
content TEXT NOT NULL,
|
|
29
|
+
embedding BLOB,
|
|
30
|
+
FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE
|
|
31
|
+
);
|
|
32
|
+
|
|
33
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_conversation ON chunks(conversation_id);
|
|
34
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_role ON chunks(role);
|
|
35
|
+
`;
|
|
36
|
+
export const FTS_TABLE_SQL = `
|
|
37
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
38
|
+
content,
|
|
39
|
+
content='chunks',
|
|
40
|
+
content_rowid='id',
|
|
41
|
+
tokenize='trigram'
|
|
42
|
+
);
|
|
43
|
+
`;
|
|
44
|
+
// Vector table for semantic search
|
|
45
|
+
// Dimensions: 1024 for qwen3-small, 2048 for qwen3-medium, 4096 for qwen3-large
|
|
46
|
+
// Using 1024 as default (small model is fastest)
|
|
47
|
+
export const VEC_TABLE_SQL = `
|
|
48
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_vec USING vec0(
|
|
49
|
+
embedding float[1024]
|
|
50
|
+
);
|
|
51
|
+
`;
|
|
52
|
+
export const TRIGGER_SQL = `
|
|
53
|
+
-- FTS sync triggers
|
|
54
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
55
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.id, NEW.content);
|
|
56
|
+
INSERT INTO chunks_vec(rowid, embedding) SELECT NEW.id, NEW.embedding WHERE NEW.embedding IS NOT NULL;
|
|
57
|
+
END;
|
|
58
|
+
|
|
59
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
60
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.id, OLD.content);
|
|
61
|
+
DELETE FROM chunks_vec WHERE rowid = OLD.id;
|
|
62
|
+
END;
|
|
63
|
+
|
|
64
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
65
|
+
INSERT INTO chunks_fts(chunks_fts, rowid, content) VALUES('delete', OLD.id, OLD.content);
|
|
66
|
+
INSERT INTO chunks_fts(rowid, content) VALUES (NEW.id, NEW.content);
|
|
67
|
+
DELETE FROM chunks_vec WHERE rowid = OLD.id;
|
|
68
|
+
INSERT INTO chunks_vec(rowid, embedding) SELECT NEW.id, NEW.embedding WHERE NEW.embedding IS NOT NULL;
|
|
69
|
+
END;
|
|
70
|
+
`;
|
|
71
|
+
//# sourceMappingURL=schema.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"schema.js","sourceRoot":"","sources":["../../src/db/schema.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,cAAc,GAAG,CAAC,CAAC;AAEhC,MAAM,CAAC,MAAM,UAAU,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAiCzB,CAAC;AAEF,MAAM,CAAC,MAAM,aAAa,GAAG;;;;;;;CAO5B,CAAC;AAEF,mCAAmC;AACnC,gFAAgF;AAChF,iDAAiD;AACjD,MAAM,CAAC,MAAM,aAAa,GAAG;;;;CAI5B,CAAC;AAEF,MAAM,CAAC,MAAM,WAAW,GAAG;;;;;;;;;;;;;;;;;;CAkB1B,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface EmbeddingResponse {
|
|
2
|
+
embedding: number[];
|
|
3
|
+
tokens: number;
|
|
4
|
+
}
|
|
5
|
+
export interface ModelInfo {
|
|
6
|
+
model: string;
|
|
7
|
+
dim: number;
|
|
8
|
+
}
|
|
9
|
+
export interface EmbeddingClient {
|
|
10
|
+
isHealthy(): Promise<boolean>;
|
|
11
|
+
embed(text: string): Promise<EmbeddingResponse | null>;
|
|
12
|
+
embedBatch(texts: string[]): Promise<EmbeddingResponse[] | null>;
|
|
13
|
+
getModelInfo(): Promise<ModelInfo | null>;
|
|
14
|
+
close(): void;
|
|
15
|
+
}
|
|
16
|
+
export declare function createEmbeddingClient(endpoint: string, timeout?: number): EmbeddingClient;
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import * as http from 'http';
|
|
2
|
+
async function makeRequest(options) {
|
|
3
|
+
return new Promise((resolve) => {
|
|
4
|
+
const { socketPath, hostname, port, path, method, body, timeout = 5000 } = options;
|
|
5
|
+
const reqOptions = {
|
|
6
|
+
path,
|
|
7
|
+
method,
|
|
8
|
+
headers: body
|
|
9
|
+
? {
|
|
10
|
+
'Content-Type': 'application/json',
|
|
11
|
+
'Content-Length': Buffer.byteLength(body),
|
|
12
|
+
}
|
|
13
|
+
: {},
|
|
14
|
+
timeout,
|
|
15
|
+
};
|
|
16
|
+
// Support both Unix socket and HTTP
|
|
17
|
+
if (socketPath) {
|
|
18
|
+
reqOptions.socketPath = socketPath;
|
|
19
|
+
}
|
|
20
|
+
else if (hostname && port) {
|
|
21
|
+
reqOptions.hostname = hostname;
|
|
22
|
+
reqOptions.port = port;
|
|
23
|
+
}
|
|
24
|
+
const req = http.request(reqOptions, (res) => {
|
|
25
|
+
let responseBody = '';
|
|
26
|
+
res.on('data', (chunk) => (responseBody += chunk));
|
|
27
|
+
res.on('end', () => {
|
|
28
|
+
resolve({ statusCode: res.statusCode || 0, body: responseBody });
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
req.on('error', () => resolve(null));
|
|
32
|
+
req.on('timeout', () => {
|
|
33
|
+
req.destroy();
|
|
34
|
+
resolve(null);
|
|
35
|
+
});
|
|
36
|
+
if (body) {
|
|
37
|
+
req.write(body);
|
|
38
|
+
}
|
|
39
|
+
req.end();
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
class EmbeddingClientImpl {
|
|
43
|
+
socketPath;
|
|
44
|
+
hostname;
|
|
45
|
+
port;
|
|
46
|
+
timeout;
|
|
47
|
+
batchEndpoint;
|
|
48
|
+
constructor(endpoint, timeout = 5000) {
|
|
49
|
+
this.timeout = timeout;
|
|
50
|
+
// Parse endpoint - could be Unix socket path or HTTP URL
|
|
51
|
+
if (endpoint.startsWith('http://')) {
|
|
52
|
+
const url = new URL(endpoint);
|
|
53
|
+
this.hostname = url.hostname;
|
|
54
|
+
this.port = parseInt(url.port) || 8000;
|
|
55
|
+
// qwen3-embeddings-mlx uses /embed_batch, not /embed/batch
|
|
56
|
+
this.batchEndpoint = '/embed_batch';
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
// Unix socket path
|
|
60
|
+
this.socketPath = endpoint;
|
|
61
|
+
this.batchEndpoint = '/embed/batch';
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
getRequestBase() {
|
|
65
|
+
if (this.socketPath) {
|
|
66
|
+
return { socketPath: this.socketPath };
|
|
67
|
+
}
|
|
68
|
+
return { hostname: this.hostname, port: this.port };
|
|
69
|
+
}
|
|
70
|
+
async isHealthy() {
|
|
71
|
+
const result = await makeRequest({
|
|
72
|
+
...this.getRequestBase(),
|
|
73
|
+
path: '/health',
|
|
74
|
+
method: 'GET',
|
|
75
|
+
timeout: this.timeout,
|
|
76
|
+
});
|
|
77
|
+
return result !== null && result.statusCode === 200;
|
|
78
|
+
}
|
|
79
|
+
async embed(text) {
|
|
80
|
+
const result = await makeRequest({
|
|
81
|
+
...this.getRequestBase(),
|
|
82
|
+
path: '/embed',
|
|
83
|
+
method: 'POST',
|
|
84
|
+
body: JSON.stringify({ text }),
|
|
85
|
+
timeout: this.timeout,
|
|
86
|
+
});
|
|
87
|
+
if (!result || result.statusCode !== 200) {
|
|
88
|
+
return null;
|
|
89
|
+
}
|
|
90
|
+
try {
|
|
91
|
+
const parsed = JSON.parse(result.body);
|
|
92
|
+
return {
|
|
93
|
+
embedding: parsed.embedding,
|
|
94
|
+
tokens: parsed.tokens || parsed.tokens_processed || 0,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
catch {
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
async embedBatch(texts) {
|
|
102
|
+
if (texts.length === 0) {
|
|
103
|
+
return [];
|
|
104
|
+
}
|
|
105
|
+
const result = await makeRequest({
|
|
106
|
+
...this.getRequestBase(),
|
|
107
|
+
path: this.batchEndpoint,
|
|
108
|
+
method: 'POST',
|
|
109
|
+
body: JSON.stringify({ texts }),
|
|
110
|
+
timeout: this.timeout,
|
|
111
|
+
});
|
|
112
|
+
if (!result || result.statusCode !== 200) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
try {
|
|
116
|
+
const parsed = JSON.parse(result.body);
|
|
117
|
+
return parsed.embeddings.map((e) => ({
|
|
118
|
+
embedding: e.embedding,
|
|
119
|
+
tokens: e.tokens || e.tokens_processed || 0,
|
|
120
|
+
}));
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
async getModelInfo() {
|
|
127
|
+
const result = await makeRequest({
|
|
128
|
+
...this.getRequestBase(),
|
|
129
|
+
path: '/health',
|
|
130
|
+
method: 'GET',
|
|
131
|
+
timeout: this.timeout,
|
|
132
|
+
});
|
|
133
|
+
if (!result || result.statusCode !== 200) {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
try {
|
|
137
|
+
const parsed = JSON.parse(result.body);
|
|
138
|
+
// qwen3-embeddings-mlx uses model_name and embedding_dim
|
|
139
|
+
return {
|
|
140
|
+
model: parsed.model || parsed.model_name || parsed.default_model || 'unknown',
|
|
141
|
+
dim: parsed.dim || parsed.embedding_dim || parsed.dimensions || 0,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
catch {
|
|
145
|
+
return null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
close() {
|
|
149
|
+
// No persistent connections to close in this implementation
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
export function createEmbeddingClient(endpoint, timeout = 5000) {
|
|
153
|
+
return new EmbeddingClientImpl(endpoint, timeout);
|
|
154
|
+
}
|
|
155
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../src/embeddings/client.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,IAAI,MAAM,MAAM,CAAC;AA8B7B,KAAK,UAAU,WAAW,CAAC,OAAuB;IAIhD,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;QAC7B,MAAM,EAAE,UAAU,EAAE,QAAQ,EAAE,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,GAAG,IAAI,EAAE,GAAG,OAAO,CAAC;QAEnF,MAAM,UAAU,GAAwB;YACtC,IAAI;YACJ,MAAM;YACN,OAAO,EAAE,IAAI;gBACX,CAAC,CAAC;oBACE,cAAc,EAAE,kBAAkB;oBAClC,gBAAgB,EAAE,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC;iBAC1C;gBACH,CAAC,CAAC,EAAE;YACN,OAAO;SACR,CAAC;QAEF,oCAAoC;QACpC,IAAI,UAAU,EAAE,CAAC;YACf,UAAU,CAAC,UAAU,GAAG,UAAU,CAAC;QACrC,CAAC;aAAM,IAAI,QAAQ,IAAI,IAAI,EAAE,CAAC;YAC5B,UAAU,CAAC,QAAQ,GAAG,QAAQ,CAAC;YAC/B,UAAU,CAAC,IAAI,GAAG,IAAI,CAAC;QACzB,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,GAAG,EAAE,EAAE;YAC3C,IAAI,YAAY,GAAG,EAAE,CAAC;YACtB,GAAG,CAAC,EAAE,CAAC,MAAM,EAAE,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,YAAY,IAAI,KAAK,CAAC,CAAC,CAAC;YACnD,GAAG,CAAC,EAAE,CAAC,KAAK,EAAE,GAAG,EAAE;gBACjB,OAAO,CAAC,EAAE,UAAU,EAAE,GAAG,CAAC,UAAU,IAAI,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;YACnE,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;QAEH,GAAG,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;QACrC,GAAG,CAAC,EAAE,CAAC,SAAS,EAAE,GAAG,EAAE;YACrB,GAAG,CAAC,OAAO,EAAE,CAAC;YACd,OAAO,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,IAAI,IAAI,EAAE,CAAC;YACT,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClB,CAAC;QACD,GAAG,CAAC,GAAG,EAAE,CAAC;IACZ,CAAC,CAAC,CAAC;AACL,CAAC;AAED,MAAM,mBAAmB;IACf,UAAU,CAAU;IACpB,QAAQ,CAAU;IAClB,IAAI,CAAU;IACd,OAAO,CAAS;IAChB,aAAa,CAAS;IAE9B,YAAY,QAAgB,EAAE,OAAO,GAAG,IAAI;QAC1C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QAEvB,yDAAyD;QACzD,IAAI,QAAQ,CAAC,UAAU,CAAC,SAAS,CAAC,EAAE,CAAC;YACnC,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,QAAQ,CAAC,CAAC;YAC9B,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC;YAC7B,IAAI,CAAC,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,IAAI,CAAC,IAAI,IAAI,CAAC;YACvC,2DAA2D;YAC3D,IAAI,CAAC,aAAa,GAAG,cAAc,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,mBAAmB;YACnB,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC;YAC3B,IAAI,CAAC,aAAa,GAAG,cAAc,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,cAAc;QACpB,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;YACpB,OAAO,EAAE,UAAU,EAAE,IAAI,CAAC,UAAU,EAAE,CAAC;QACzC,CAAC;QACD,OAAO,EAAE,QAAQ,EAAE,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,SAAS;QACb,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC;YAC/B,GAAG,IAAI,CAAC,cAAc,EAAE;YACxB,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,OAAO,MAAM,KAAK,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,GAAG,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,IAAY;QACtB,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC;YAC/B,GAAG,IAAI,CAAC,cAAc,EAAE;YACxB,IAAI,EAAE,QAAQ;YACd,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,EAAE,CAAC;YAC9B,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvC,OAAO;gBACL,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,MAAM,EAAE,MAAM,CAAC,MAAM,IAAI,MAAM,CAAC,gBAAgB,IAAI,CAAC;aACtD,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,KAAe;QAC9B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC;YAC/B,GAAG,IAAI,CAAC,cAAc,EAAE;YACxB,IAAI,EAAE,IAAI,CAAC,aAAa;YACxB,MAAM,EAAE,MAAM;YACd,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,EAAE,KAAK,EAAE,CAAC;YAC/B,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvC,OAAO,MAAM,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAsE,EAAE,EAAE,CAAC,CAAC;gBACxG,SAAS,EAAE,CAAC,CAAC,SAAS;gBACtB,MAAM,EAAE,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,gBAAgB,IAAI,CAAC;aAC5C,CAAC,CAAC,CAAC;QACN,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK,CAAC,YAAY;QAChB,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC;YAC/B,GAAG,IAAI,CAAC,cAAc,EAAE;YACxB,IAAI,EAAE,SAAS;YACf,MAAM,EAAE,KAAK;YACb,OAAO,EAAE,IAAI,CAAC,OAAO;SACtB,CAAC,CAAC;QAEH,IAAI,CAAC,MAAM,IAAI,MAAM,CAAC,UAAU,KAAK,GAAG,EAAE,CAAC;YACzC,OAAO,IAAI,CAAC;QACd,CAAC;QAED,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvC,yDAAyD;YACzD,OAAO;gBACL,KAAK,EAAE,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,UAAU,IAAI,MAAM,CAAC,aAAa,IAAI,SAAS;gBAC7E,GAAG,EAAE,MAAM,CAAC,GAAG,IAAI,MAAM,CAAC,aAAa,IAAI,MAAM,CAAC,UAAU,IAAI,CAAC;aAClE,CAAC;QACJ,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC;IAED,KAAK;QACH,4DAA4D;IAC9D,CAAC;CACF;AAED,MAAM,UAAU,qBAAqB,CACnC,QAAgB,EAChB,OAAO,GAAG,IAAI;IAEd,OAAO,IAAI,mBAAmB,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;AACpD,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
export interface ChangeSet {
|
|
2
|
+
added: string[];
|
|
3
|
+
modified: string[];
|
|
4
|
+
deleted: string[];
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Detect changes between the file system and the indexed database.
|
|
8
|
+
*
|
|
9
|
+
* Uses a two-phase check:
|
|
10
|
+
* 1. Quick mtime comparison to skip unchanged files
|
|
11
|
+
* 2. Hash comparison only when mtime differs
|
|
12
|
+
*
|
|
13
|
+
* @param sourceDir - Directory to scan for .jsonl files
|
|
14
|
+
* @returns ChangeSet with added, modified, and deleted file paths/ids
|
|
15
|
+
*/
|
|
16
|
+
export declare function detectChanges(sourceDir: string): ChangeSet;
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { readdirSync, existsSync } from 'fs';
|
|
2
|
+
import { join } from 'path';
|
|
3
|
+
import { getDatabase } from '../db/index.js';
|
|
4
|
+
import { getFileHash, getFileMtime } from './fileUtils.js';
|
|
5
|
+
/**
|
|
6
|
+
* Detect changes between the file system and the indexed database.
|
|
7
|
+
*
|
|
8
|
+
* Uses a two-phase check:
|
|
9
|
+
* 1. Quick mtime comparison to skip unchanged files
|
|
10
|
+
* 2. Hash comparison only when mtime differs
|
|
11
|
+
*
|
|
12
|
+
* @param sourceDir - Directory to scan for .jsonl files
|
|
13
|
+
* @returns ChangeSet with added, modified, and deleted file paths/ids
|
|
14
|
+
*/
|
|
15
|
+
export function detectChanges(sourceDir) {
|
|
16
|
+
const db = getDatabase();
|
|
17
|
+
const changes = { added: [], modified: [], deleted: [] };
|
|
18
|
+
// Find all .jsonl files in the source directory (recursively)
|
|
19
|
+
const files = findJsonlFiles(sourceDir);
|
|
20
|
+
const fileSet = new Set(files);
|
|
21
|
+
// Get all indexed conversations from the database
|
|
22
|
+
const indexed = db
|
|
23
|
+
.prepare('SELECT id, file_path, content_hash, source_mtime FROM conversations')
|
|
24
|
+
.all();
|
|
25
|
+
// Build a map of file_path -> record for quick lookup
|
|
26
|
+
const indexedPaths = new Map(indexed.map((r) => [
|
|
27
|
+
r.file_path,
|
|
28
|
+
{ id: r.id, hash: r.content_hash, mtime: r.source_mtime },
|
|
29
|
+
]));
|
|
30
|
+
// Check each file on disk
|
|
31
|
+
for (const filePath of files) {
|
|
32
|
+
const existing = indexedPaths.get(filePath);
|
|
33
|
+
if (!existing) {
|
|
34
|
+
// File is not in database - it's new
|
|
35
|
+
changes.added.push(filePath);
|
|
36
|
+
}
|
|
37
|
+
else {
|
|
38
|
+
// File exists in database - check if modified
|
|
39
|
+
const currentMtime = getFileMtime(filePath);
|
|
40
|
+
// Quick check: if mtime matches, assume unchanged
|
|
41
|
+
if (currentMtime !== existing.mtime) {
|
|
42
|
+
// Mtime differs - need to check hash
|
|
43
|
+
const currentHash = getFileHash(filePath);
|
|
44
|
+
if (currentHash !== existing.hash) {
|
|
45
|
+
changes.modified.push(filePath);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
// Check for deleted files (in database but not on disk)
|
|
51
|
+
for (const [filePath, info] of indexedPaths) {
|
|
52
|
+
if (!fileSet.has(filePath) && !existsSync(filePath)) {
|
|
53
|
+
changes.deleted.push(info.id);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
return changes;
|
|
57
|
+
}
|
|
58
|
+
/**
|
|
59
|
+
* Recursively find all .jsonl files in a directory.
|
|
60
|
+
*
|
|
61
|
+
* @param dir - Directory to scan
|
|
62
|
+
* @returns Array of absolute file paths
|
|
63
|
+
*/
|
|
64
|
+
function findJsonlFiles(dir) {
|
|
65
|
+
const files = [];
|
|
66
|
+
function scan(currentDir) {
|
|
67
|
+
const entries = readdirSync(currentDir, { withFileTypes: true });
|
|
68
|
+
for (const entry of entries) {
|
|
69
|
+
const fullPath = join(currentDir, entry.name);
|
|
70
|
+
if (entry.isDirectory()) {
|
|
71
|
+
scan(fullPath);
|
|
72
|
+
}
|
|
73
|
+
else if (entry.isFile() && entry.name.endsWith('.jsonl')) {
|
|
74
|
+
files.push(fullPath);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
scan(dir);
|
|
79
|
+
return files;
|
|
80
|
+
}
|
|
81
|
+
//# sourceMappingURL=changeDetection.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"changeDetection.js","sourceRoot":"","sources":["../../src/indexer/changeDetection.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,IAAI,CAAC;AAC7C,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,WAAW,EAAE,MAAM,gBAAgB,CAAC;AAC7C,OAAO,EAAE,WAAW,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAe3D;;;;;;;;;GASG;AACH,MAAM,UAAU,aAAa,CAAC,SAAiB;IAC7C,MAAM,EAAE,GAAG,WAAW,EAAE,CAAC;IACzB,MAAM,OAAO,GAAc,EAAE,KAAK,EAAE,EAAE,EAAE,QAAQ,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;IAEpE,8DAA8D;IAC9D,MAAM,KAAK,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;IACxC,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC;IAE/B,kDAAkD;IAClD,MAAM,OAAO,GAAG,EAAE;SACf,OAAO,CACN,qEAAqE,CACtE;SACA,GAAG,EAAqB,CAAC;IAE5B,sDAAsD;IACtD,MAAM,YAAY,GAAG,IAAI,GAAG,CAI1B,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC;QACjB,CAAC,CAAC,SAAS;QACX,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,YAAY,EAAE,KAAK,EAAE,CAAC,CAAC,YAAY,EAAE;KAC1D,CAAC,CACH,CAAC;IAEF,0BAA0B;IAC1B,KAAK,MAAM,QAAQ,IAAI,KAAK,EAAE,CAAC;QAC7B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QAE5C,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,qCAAqC;YACrC,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;aAAM,CAAC;YACN,8CAA8C;YAC9C,MAAM,YAAY,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;YAE5C,kDAAkD;YAClD,IAAI,YAAY,KAAK,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACpC,qCAAqC;gBACrC,MAAM,WAAW,GAAG,WAAW,CAAC,QAAQ,CAAC,CAAC;gBAC1C,IAAI,WAAW,KAAK,QAAQ,CAAC,IAAI,EAAE,CAAC;oBAClC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,wDAAwD;IACxD,KAAK,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,IAAI,YAAY,EAAE,CAAC;QAC5C,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YACpD,OAAO,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QAChC,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;GAKG;AACH,SAAS,cAAc,CAAC,GAAW;IACjC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,SAAS,IAAI,CAAC,UAAkB;QAC9B,MAAM,OAAO,GAAG,WAAW,CAAC,UAAU,EAAE,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QAEjE,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,EAAE,KAAK,CAAC,IAAI,CAAC,CAAC;YAE9C,IAAI,KAAK,CAAC,WAAW,EAAE,EAAE,CAAC;gBACxB,IAAI,CAAC,QAAQ,CAAC,CAAC;YACjB,CAAC;iBAAM,IAAI,KAAK,CAAC,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC3D,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;YACvB,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,OAAO,KAAK,CAAC;AACf,CAAC"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
const CHARS_PER_TOKEN = 4;
|
|
2
|
+
export function chunkText(text, opts) {
|
|
3
|
+
const trimmed = text.trim();
|
|
4
|
+
if (!trimmed)
|
|
5
|
+
return [];
|
|
6
|
+
const maxChars = opts.maxTokens * CHARS_PER_TOKEN;
|
|
7
|
+
const overlapChars = opts.overlap * CHARS_PER_TOKEN;
|
|
8
|
+
if (trimmed.length <= maxChars)
|
|
9
|
+
return [trimmed];
|
|
10
|
+
const chunks = [];
|
|
11
|
+
let pos = 0;
|
|
12
|
+
while (pos < trimmed.length) {
|
|
13
|
+
let end = Math.min(pos + maxChars, trimmed.length);
|
|
14
|
+
if (end < trimmed.length) {
|
|
15
|
+
end = findBreakPoint(trimmed, pos, end);
|
|
16
|
+
}
|
|
17
|
+
const chunk = trimmed.slice(pos, end).trim();
|
|
18
|
+
if (chunk)
|
|
19
|
+
chunks.push(chunk);
|
|
20
|
+
pos = Math.max(pos + 1, end - overlapChars);
|
|
21
|
+
}
|
|
22
|
+
return chunks.length ? chunks : [trimmed];
|
|
23
|
+
}
|
|
24
|
+
function findBreakPoint(text, start, end) {
|
|
25
|
+
const segment = text.slice(start, end);
|
|
26
|
+
const minPos = Math.floor(segment.length * 0.3);
|
|
27
|
+
const paraIdx = segment.lastIndexOf('\n\n');
|
|
28
|
+
if (paraIdx > minPos)
|
|
29
|
+
return start + paraIdx + 2;
|
|
30
|
+
const fenceIdx = segment.lastIndexOf('```\n');
|
|
31
|
+
if (fenceIdx > minPos)
|
|
32
|
+
return start + fenceIdx + 4;
|
|
33
|
+
const lineIdx = segment.lastIndexOf('\n');
|
|
34
|
+
if (lineIdx > minPos)
|
|
35
|
+
return start + lineIdx + 1;
|
|
36
|
+
const sentenceIdx = segment.lastIndexOf('. ');
|
|
37
|
+
if (sentenceIdx > minPos)
|
|
38
|
+
return start + sentenceIdx + 2;
|
|
39
|
+
const wordIdx = segment.lastIndexOf(' ');
|
|
40
|
+
if (wordIdx > minPos)
|
|
41
|
+
return start + wordIdx + 1;
|
|
42
|
+
return end;
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../src/indexer/chunker.ts"],"names":[],"mappings":"AAKA,MAAM,eAAe,GAAG,CAAC,CAAC;AAE1B,MAAM,UAAU,SAAS,CAAC,IAAY,EAAE,IAAkB;IACxD,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,SAAS,GAAG,eAAe,CAAC;IAClD,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,GAAG,eAAe,CAAC;IAEpD,IAAI,OAAO,CAAC,MAAM,IAAI,QAAQ;QAAE,OAAO,CAAC,OAAO,CAAC,CAAC;IAEjD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,GAAG,GAAG,CAAC,CAAC;IAEZ,OAAO,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;QAC5B,IAAI,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,QAAQ,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;QAEnD,IAAI,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YACzB,GAAG,GAAG,cAAc,CAAC,OAAO,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC;QAC1C,CAAC;QAED,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,GAAG,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7C,IAAI,KAAK;YAAE,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAE9B,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,YAAY,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC;AAC5C,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa,EAAE,GAAW;IAC9D,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,MAAM,GAAG,GAAG,CAAC,CAAC;IAEhD,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC,MAAM,CAAC,CAAC;IAC5C,IAAI,OAAO,GAAG,MAAM;QAAE,OAAO,KAAK,GAAG,OAAO,GAAG,CAAC,CAAC;IAEjD,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IAC9C,IAAI,QAAQ,GAAG,MAAM;QAAE,OAAO,KAAK,GAAG,QAAQ,GAAG,CAAC,CAAC;IAEnD,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAC1C,IAAI,OAAO,GAAG,MAAM;QAAE,OAAO,KAAK,GAAG,OAAO,GAAG,CAAC,CAAC;IAEjD,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;IAC9C,IAAI,WAAW,GAAG,MAAM;QAAE,OAAO,KAAK,GAAG,WAAW,GAAG,CAAC,CAAC;IAEzD,MAAM,OAAO,GAAG,OAAO,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACzC,IAAI,OAAO,GAAG,MAAM;QAAE,OAAO,KAAK,GAAG,OAAO,GAAG,CAAC,CAAC;IAEjD,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
import { createHash } from 'crypto';
|
|
2
|
+
import { readFileSync, statSync } from 'fs';
|
|
3
|
+
export function getFileHash(path) {
|
|
4
|
+
return createHash('sha256').update(readFileSync(path)).digest('hex');
|
|
5
|
+
}
|
|
6
|
+
export function getFileMtime(path) {
|
|
7
|
+
return Math.floor(statSync(path).mtimeMs / 1000);
|
|
8
|
+
}
|
|
9
|
+
//# sourceMappingURL=fileUtils.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fileUtils.js","sourceRoot":"","sources":["../../src/indexer/fileUtils.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,YAAY,EAAE,QAAQ,EAAE,MAAM,IAAI,CAAC;AAE5C,MAAM,UAAU,WAAW,CAAC,IAAY;IACtC,OAAO,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACvE,CAAC;AAED,MAAM,UAAU,YAAY,CAAC,IAAY;IACvC,OAAO,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;AACnD,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export interface IndexOptions {
|
|
2
|
+
sourceDir: string;
|
|
3
|
+
databasePath: string;
|
|
4
|
+
embedSocketPath?: string;
|
|
5
|
+
embedUrl?: string;
|
|
6
|
+
batchSize?: number;
|
|
7
|
+
verbose?: boolean;
|
|
8
|
+
}
|
|
9
|
+
export interface IndexStats {
|
|
10
|
+
added: number;
|
|
11
|
+
modified: number;
|
|
12
|
+
deleted: number;
|
|
13
|
+
chunks: number;
|
|
14
|
+
errors: string[];
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Run the full indexing process.
|
|
18
|
+
*/
|
|
19
|
+
export declare function runIndexer(options: IndexOptions): Promise<IndexStats>;
|