chattercatcher 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1,150 +1,3 @@
1
- var __defProp = Object.defineProperty;
2
- var __getOwnPropNames = Object.getOwnPropertyNames;
3
- var __esm = (fn, res) => function __init() {
4
- return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
- };
6
- var __export = (target, all) => {
7
- for (var name in all)
8
- __defProp(target, name, { get: all[name], enumerable: true });
9
- };
10
-
11
- // src/config/paths.ts
12
- import os2 from "os";
13
- import path2 from "path";
14
- function getChatterCatcherHome() {
15
- return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
16
- }
17
- function resolveHomePath(value) {
18
- if (value === "~") {
19
- return os2.homedir();
20
- }
21
- if (value.startsWith("~/") || value.startsWith("~\\")) {
22
- return path2.join(os2.homedir(), value.slice(2));
23
- }
24
- return path2.resolve(value);
25
- }
26
- function getConfigPath() {
27
- return path2.join(getChatterCatcherHome(), "config.json");
28
- }
29
- function getSecretsPath() {
30
- return path2.join(getChatterCatcherHome(), "secrets.json");
31
- }
32
- var init_paths = __esm({
33
- "src/config/paths.ts"() {
34
- "use strict";
35
- }
36
- });
37
-
38
- // src/rag/lancedb-store.ts
39
- var lancedb_store_exports = {};
40
- __export(lancedb_store_exports, {
41
- LanceDbVectorStore: () => LanceDbVectorStore,
42
- getLanceDbPath: () => getLanceDbPath
43
- });
44
- import fs6 from "fs/promises";
45
- import path9 from "path";
46
- function getLanceDbPath(config) {
47
- return path9.join(resolveHomePath(config.storage.dataDir), "vector", "lancedb");
48
- }
49
- function toRow(record) {
50
- return {
51
- id: record.id,
52
- vector: record.vector,
53
- text: record.evidence.text,
54
- source_json: JSON.stringify(record.evidence.source)
55
- };
56
- }
57
- function toLanceData(rows) {
58
- return rows.map((row) => ({
59
- id: row.id,
60
- vector: row.vector,
61
- text: row.text,
62
- source_json: row.source_json
63
- }));
64
- }
65
- function escapeSqlString(value) {
66
- return value.replace(/'/g, "''");
67
- }
68
- function toEvidence(row) {
69
- const distance = row._distance ?? 0;
70
- const vectorScore = 1 / (1 + Math.max(0, distance));
71
- return {
72
- id: row.id,
73
- text: row.text,
74
- score: vectorScore,
75
- vectorScore,
76
- source: JSON.parse(row.source_json)
77
- };
78
- }
79
- var DEFAULT_TABLE_NAME, LanceDbVectorStore;
80
- var init_lancedb_store = __esm({
81
- "src/rag/lancedb-store.ts"() {
82
- "use strict";
83
- init_paths();
84
- DEFAULT_TABLE_NAME = "message_chunks";
85
- LanceDbVectorStore = class _LanceDbVectorStore {
86
- constructor(connection, tableName) {
87
- this.connection = connection;
88
- this.tableName = tableName;
89
- }
90
- connection;
91
- tableName;
92
- static async connect(uri, tableName = DEFAULT_TABLE_NAME) {
93
- await fs6.mkdir(uri, { recursive: true });
94
- const lancedb = await import("@lancedb/lancedb");
95
- const connection = await lancedb.connect(uri);
96
- return new _LanceDbVectorStore(connection, tableName);
97
- }
98
- static async connectFromConfig(config, tableName = DEFAULT_TABLE_NAME) {
99
- return _LanceDbVectorStore.connect(getLanceDbPath(config), tableName);
100
- }
101
- close() {
102
- this.connection.close();
103
- }
104
- async upsert(records) {
105
- if (records.length === 0) {
106
- return;
107
- }
108
- const rows = records.map(toRow);
109
- const data = toLanceData(rows);
110
- const table = await this.ensureTable(data);
111
- const ids = rows.map((row) => `'${escapeSqlString(row.id)}'`).join(", ");
112
- await table.delete(`id IN (${ids})`);
113
- await table.add(data);
114
- }
115
- async search(vector, limit) {
116
- const table = await this.openTableIfExists();
117
- if (!table) {
118
- return [];
119
- }
120
- const rows = await table.vectorSearch(vector).limit(limit).toArray();
121
- return rows.map(toEvidence);
122
- }
123
- async count() {
124
- const table = await this.openTableIfExists();
125
- if (!table) {
126
- return 0;
127
- }
128
- return table.countRows();
129
- }
130
- async ensureTable(initialRows) {
131
- const table = await this.openTableIfExists();
132
- if (table) {
133
- return table;
134
- }
135
- return this.connection.createTable(this.tableName, initialRows);
136
- }
137
- async openTableIfExists() {
138
- const tableNames = await this.connection.tableNames();
139
- if (!tableNames.includes(this.tableName)) {
140
- return null;
141
- }
142
- return this.connection.openTable(this.tableName);
143
- }
144
- };
145
- }
146
- });
147
-
148
1
  // src/config/schema.ts
149
2
  import os from "os";
150
3
  import path from "path";
@@ -211,7 +64,30 @@ function createDefaultSecrets() {
211
64
  // src/config/store.ts
212
65
  import fs from "fs/promises";
213
66
  import path3 from "path";
214
- init_paths();
67
+
68
+ // src/config/paths.ts
69
+ import os2 from "os";
70
+ import path2 from "path";
71
+ function getChatterCatcherHome() {
72
+ return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
73
+ }
74
+ function resolveHomePath(value) {
75
+ if (value === "~") {
76
+ return os2.homedir();
77
+ }
78
+ if (value.startsWith("~/") || value.startsWith("~\\")) {
79
+ return path2.join(os2.homedir(), value.slice(2));
80
+ }
81
+ return path2.resolve(value);
82
+ }
83
+ function getConfigPath() {
84
+ return path2.join(getChatterCatcherHome(), "config.json");
85
+ }
86
+ function getSecretsPath() {
87
+ return path2.join(getChatterCatcherHome(), "secrets.json");
88
+ }
89
+
90
+ // src/config/store.ts
215
91
  async function readJsonFile(filePath, fallback) {
216
92
  try {
217
93
  const raw = await fs.readFile(filePath, "utf8");
@@ -275,7 +151,6 @@ function resolveEmbeddingApiKey(input) {
275
151
  }
276
152
 
277
153
  // src/data/deletion.ts
278
- init_paths();
279
154
  import fs2 from "fs/promises";
280
155
  import path4 from "path";
281
156
  function emptyResult(targetType, targetId) {
@@ -401,7 +276,6 @@ async function deleteLocalData(input) {
401
276
  }
402
277
 
403
278
  // src/db/database.ts
404
- init_paths();
405
279
  import Database from "better-sqlite3";
406
280
  import fs3 from "fs";
407
281
  import path5 from "path";
@@ -462,6 +336,18 @@ function migrateDatabase(database) {
462
336
  tokenize = 'unicode61'
463
337
  );
464
338
 
339
+ CREATE TABLE IF NOT EXISTS message_chunk_embeddings (
340
+ chunk_id TEXT NOT NULL REFERENCES message_chunks(id) ON DELETE CASCADE,
341
+ model TEXT NOT NULL,
342
+ dimension INTEGER NOT NULL,
343
+ embedding_json TEXT NOT NULL,
344
+ updated_at TEXT NOT NULL,
345
+ PRIMARY KEY (chunk_id, model)
346
+ );
347
+
348
+ CREATE INDEX IF NOT EXISTS message_chunk_embeddings_model_idx
349
+ ON message_chunk_embeddings(model, dimension);
350
+
465
351
  CREATE TABLE IF NOT EXISTS file_jobs (
466
352
  id TEXT PRIMARY KEY,
467
353
  source_path TEXT NOT NULL,
@@ -481,8 +367,7 @@ function migrateDatabase(database) {
481
367
  }
482
368
 
483
369
  // src/doctor/checks.ts
484
- init_paths();
485
- import fs7 from "fs/promises";
370
+ import fs6 from "fs/promises";
486
371
 
487
372
  // src/files/jobs.ts
488
373
  import crypto from "crypto";
@@ -624,12 +509,10 @@ var FileJobRepository = class {
624
509
  };
625
510
 
626
511
  // src/gateway/runtime.ts
627
- init_paths();
628
512
  import fs5 from "fs";
629
513
  import path8 from "path";
630
514
 
631
515
  // src/logs/reader.ts
632
- init_paths();
633
516
  import fs4 from "fs/promises";
634
517
  import { watch } from "fs";
635
518
  import path7 from "path";
@@ -1378,6 +1261,119 @@ var MessageFtsRetriever = class {
1378
1261
  }
1379
1262
  };
1380
1263
 
1264
+ // src/rag/embedding.ts
1265
+ function cosineSimilarity(left, right) {
1266
+ if (left.length === 0 || right.length === 0 || left.length !== right.length) {
1267
+ return 0;
1268
+ }
1269
+ let dot = 0;
1270
+ let leftNorm = 0;
1271
+ let rightNorm = 0;
1272
+ for (let index = 0; index < left.length; index += 1) {
1273
+ const leftValue = left[index] ?? 0;
1274
+ const rightValue = right[index] ?? 0;
1275
+ dot += leftValue * rightValue;
1276
+ leftNorm += leftValue * leftValue;
1277
+ rightNorm += rightValue * rightValue;
1278
+ }
1279
+ if (leftNorm === 0 || rightNorm === 0) {
1280
+ return 0;
1281
+ }
1282
+ return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
1283
+ }
1284
+
1285
+ // src/rag/sqlite-vector-store.ts
1286
+ function parseEmbeddingJson(value) {
1287
+ try {
1288
+ const parsed = JSON.parse(value);
1289
+ return Array.isArray(parsed) && parsed.every((item) => typeof item === "number") ? parsed : [];
1290
+ } catch {
1291
+ return [];
1292
+ }
1293
+ }
1294
+ function toEvidenceSource2(row) {
1295
+ return {
1296
+ type: "message",
1297
+ label: row.chatName,
1298
+ sender: row.senderName,
1299
+ timestamp: row.sentAt
1300
+ };
1301
+ }
1302
+ var SqliteVectorStore = class {
1303
+ constructor(database, options) {
1304
+ this.database = database;
1305
+ this.options = options;
1306
+ }
1307
+ database;
1308
+ options;
1309
+ async upsert(records) {
1310
+ if (records.length === 0) {
1311
+ return;
1312
+ }
1313
+ const updatedAt = (/* @__PURE__ */ new Date()).toISOString();
1314
+ const statement = this.database.prepare(`
1315
+ INSERT INTO message_chunk_embeddings (chunk_id, model, dimension, embedding_json, updated_at)
1316
+ VALUES (@chunkId, @model, @dimension, @embeddingJson, @updatedAt)
1317
+ ON CONFLICT(chunk_id, model)
1318
+ DO UPDATE SET
1319
+ dimension = excluded.dimension,
1320
+ embedding_json = excluded.embedding_json,
1321
+ updated_at = excluded.updated_at
1322
+ `);
1323
+ const transaction = this.database.transaction((input) => {
1324
+ for (const record of input) {
1325
+ statement.run({
1326
+ chunkId: record.id,
1327
+ model: this.options.model,
1328
+ dimension: record.vector.length,
1329
+ embeddingJson: JSON.stringify(record.vector),
1330
+ updatedAt
1331
+ });
1332
+ }
1333
+ });
1334
+ transaction(records);
1335
+ }
1336
+ async search(vector, limit) {
1337
+ if (limit <= 0) {
1338
+ return [];
1339
+ }
1340
+ const rows = this.database.prepare(
1341
+ `
1342
+ SELECT
1343
+ mc.id AS chunkId,
1344
+ mc.text AS text,
1345
+ c.name AS chatName,
1346
+ m.sender_name AS senderName,
1347
+ m.sent_at AS sentAt,
1348
+ e.embedding_json AS embeddingJson
1349
+ FROM message_chunk_embeddings e
1350
+ JOIN message_chunks mc ON mc.id = e.chunk_id
1351
+ JOIN messages m ON m.id = mc.message_id
1352
+ JOIN chats c ON c.id = m.chat_id
1353
+ WHERE e.model = ?
1354
+ `
1355
+ ).all(this.options.model);
1356
+ return rows.flatMap((row) => {
1357
+ const storedVector = parseEmbeddingJson(row.embeddingJson);
1358
+ if (storedVector.length === 0) {
1359
+ return [];
1360
+ }
1361
+ const vectorScore = cosineSimilarity(vector, storedVector);
1362
+ return {
1363
+ id: row.chunkId,
1364
+ text: row.text,
1365
+ score: vectorScore,
1366
+ vectorScore,
1367
+ source: toEvidenceSource2(row)
1368
+ };
1369
+ }).sort((left, right) => right.vectorScore - left.vectorScore).slice(0, limit);
1370
+ }
1371
+ count() {
1372
+ const row = this.database.prepare("SELECT COUNT(*) AS count FROM message_chunk_embeddings WHERE model = ?").get(this.options.model);
1373
+ return row.count;
1374
+ }
1375
+ };
1376
+
1381
1377
  // src/rag/vector-retriever.ts
1382
1378
  var VectorRetriever = class {
1383
1379
  constructor(embedding, store, limit = 8) {
@@ -1402,10 +1398,10 @@ async function createHybridRetriever(input) {
1402
1398
  const retrievers = [new MessageFtsRetriever(input.messages, { excludeMessageIds: input.excludeMessageIds })];
1403
1399
  const closers = [];
1404
1400
  if (hasEmbeddingConfig(input.config, input.secrets)) {
1405
- const { LanceDbVectorStore: LanceDbVectorStore2 } = await Promise.resolve().then(() => (init_lancedb_store(), lancedb_store_exports));
1406
- const vectorStore = await LanceDbVectorStore2.connectFromConfig(input.config);
1401
+ const vectorStore = new SqliteVectorStore(input.database, {
1402
+ model: input.config.embedding.model
1403
+ });
1407
1404
  retrievers.push(new VectorRetriever(createEmbeddingModel(input.config, input.secrets), vectorStore));
1408
- closers.push(() => vectorStore.close());
1409
1405
  }
1410
1406
  return {
1411
1407
  retriever: new HybridRetriever(retrievers),
@@ -1435,7 +1431,7 @@ async function runDoctor(config, secrets, options = {}) {
1435
1431
  checks.push(checkEmbeddingConfig(config, secrets));
1436
1432
  checks.push(await checkSqlite(config));
1437
1433
  checks.push(await checkFilePipeline(config));
1438
- checks.push(await checkLanceDb(config));
1434
+ checks.push(await checkSqliteVectorIndex(config));
1439
1435
  checks.push(checkRagPolicy());
1440
1436
  if (options.online) {
1441
1437
  checks.push(await checkChatModel(config, secrets));
@@ -1446,8 +1442,8 @@ async function runDoctor(config, secrets, options = {}) {
1446
1442
  async function checkHomeDirectory() {
1447
1443
  const home = getChatterCatcherHome();
1448
1444
  try {
1449
- await fs7.mkdir(home, { recursive: true });
1450
- await fs7.access(home);
1445
+ await fs6.mkdir(home, { recursive: true });
1446
+ await fs6.access(home);
1451
1447
  return pass("\u914D\u7F6E\u76EE\u5F55", home);
1452
1448
  } catch (error) {
1453
1449
  return fail("\u914D\u7F6E\u76EE\u5F55", error instanceof Error ? error.message : String(error));
@@ -1468,7 +1464,7 @@ function checkLlmConfig(config, secrets) {
1468
1464
  }
1469
1465
  function checkEmbeddingConfig(config, secrets) {
1470
1466
  if (!hasEmbeddingConfig(config, secrets)) {
1471
- return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u4F7F\u7528 LanceDB \u8BED\u4E49\u68C0\u7D22\u3002");
1467
+ return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u542F\u7528 SQLite embedding \u8BED\u4E49\u68C0\u7D22\u3002");
1472
1468
  }
1473
1469
  return pass("Embedding \u914D\u7F6E", `${config.embedding.model} @ ${config.embedding.baseUrl || config.llm.baseUrl}`);
1474
1470
  }
@@ -1502,17 +1498,22 @@ async function checkFilePipeline(config) {
1502
1498
  database?.close();
1503
1499
  }
1504
1500
  }
1505
- async function checkLanceDb(config) {
1506
- let store = null;
1501
+ async function checkSqliteVectorIndex(config) {
1502
+ let database = null;
1507
1503
  try {
1508
- const { getLanceDbPath: getLanceDbPath2, LanceDbVectorStore: LanceDbVectorStore2 } = await Promise.resolve().then(() => (init_lancedb_store(), lancedb_store_exports));
1509
- store = await LanceDbVectorStore2.connectFromConfig(config);
1510
- const count = await store.count();
1511
- return pass("LanceDB", `${getLanceDbPath2(config)}\uFF1Bvectors=${count}`);
1504
+ database = openDatabase(config);
1505
+ const defaultModel = config.embedding.model || "default";
1506
+ const vectorStore = new SqliteVectorStore(database, { model: defaultModel });
1507
+ const vectors = vectorStore.count();
1508
+ const availableModels = database.prepare("SELECT COUNT(DISTINCT model) AS count FROM message_chunk_embeddings").get();
1509
+ return pass(
1510
+ "SQLite embedding \u5411\u91CF\u7D22\u5F15",
1511
+ `${getDatabasePath(config)}\uFF1Bvectors=${vectors}\uFF1Bmodels=${availableModels.count}${config.embedding.model ? `\uFF1Bactive_model=${config.embedding.model}` : "\uFF1Bactive_model=\u672A\u914D\u7F6E"}`
1512
+ );
1512
1513
  } catch (error) {
1513
- return fail("LanceDB", error instanceof Error ? error.message : String(error));
1514
+ return fail("SQLite embedding \u5411\u91CF\u7D22\u5F15", error instanceof Error ? error.message : String(error));
1514
1515
  } finally {
1515
- store?.close();
1516
+ database?.close();
1516
1517
  }
1517
1518
  }
1518
1519
  function checkRagPolicy() {
@@ -1553,9 +1554,8 @@ function formatDoctorChecks(checks) {
1553
1554
  }
1554
1555
 
1555
1556
  // src/export/data-export.ts
1556
- init_paths();
1557
- import fs8 from "fs/promises";
1558
- import path10 from "path";
1557
+ import fs7 from "fs/promises";
1558
+ import path9 from "path";
1559
1559
  function parseJsonObject(value) {
1560
1560
  try {
1561
1561
  const parsed = JSON.parse(value);
@@ -1574,11 +1574,11 @@ function parseJsonArray(value) {
1574
1574
  }
1575
1575
  function defaultExportPath(config, exportedAt) {
1576
1576
  const fileName = `chattercatcher-export-${exportedAt.replace(/[:.]/g, "-")}.json`;
1577
- return path10.join(resolveHomePath(config.storage.dataDir), "exports", fileName);
1577
+ return path9.join(resolveHomePath(config.storage.dataDir), "exports", fileName);
1578
1578
  }
1579
1579
  async function exportLocalData(input) {
1580
1580
  const exportedAt = input.exportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
1581
- const outputPath = path10.resolve(input.outputPath ?? defaultExportPath(input.config, exportedAt));
1581
+ const outputPath = path9.resolve(input.outputPath ?? defaultExportPath(input.config, exportedAt));
1582
1582
  const chats = input.database.prepare(
1583
1583
  `
1584
1584
  SELECT
@@ -1665,8 +1665,8 @@ async function exportLocalData(input) {
1665
1665
  fileJobs
1666
1666
  }
1667
1667
  };
1668
- await fs8.mkdir(path10.dirname(outputPath), { recursive: true });
1669
- await fs8.writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
1668
+ await fs7.mkdir(path9.dirname(outputPath), { recursive: true });
1669
+ await fs7.writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
1670
1670
  `, "utf8");
1671
1671
  return {
1672
1672
  outputPath,
@@ -1678,8 +1678,8 @@ async function exportLocalData(input) {
1678
1678
  }
1679
1679
 
1680
1680
  // src/export/data-restore.ts
1681
- import fs9 from "fs/promises";
1682
- import path11 from "path";
1681
+ import fs8 from "fs/promises";
1682
+ import path10 from "path";
1683
1683
  function asObject(value) {
1684
1684
  return value && typeof value === "object" && !Array.isArray(value) ? value : {};
1685
1685
  }
@@ -1727,8 +1727,8 @@ function clearDatabase(database) {
1727
1727
  database.prepare("DELETE FROM chats").run();
1728
1728
  }
1729
1729
  async function restoreLocalData(input) {
1730
- const inputPath = path11.resolve(input.inputPath);
1731
- const payload = parsePayload(await fs9.readFile(inputPath, "utf8"));
1730
+ const inputPath = path10.resolve(input.inputPath);
1731
+ const payload = parsePayload(await fs8.readFile(inputPath, "utf8"));
1732
1732
  const mode = input.replace ? "replace" : "merge";
1733
1733
  const restore = input.database.transaction(() => {
1734
1734
  if (input.replace) {
@@ -2110,6 +2110,7 @@ var FeishuQuestionHandler = class {
2110
2110
  const { retriever, close } = await createHybridRetriever({
2111
2111
  config: this.options.config,
2112
2112
  secrets: this.options.secrets,
2113
+ database: this.options.database,
2113
2114
  messages: new MessageRepository(this.options.database),
2114
2115
  excludeMessageIds: options.excludeMessageIds
2115
2116
  });
@@ -2470,10 +2471,9 @@ function normalizeFeishuReceiveMessageEvent(payload) {
2470
2471
  }
2471
2472
 
2472
2473
  // src/feishu/resource-downloader.ts
2473
- init_paths();
2474
2474
  import * as lark3 from "@larksuiteoapi/node-sdk";
2475
- import fs10 from "fs/promises";
2476
- import path12 from "path";
2475
+ import fs9 from "fs/promises";
2476
+ import path11 from "path";
2477
2477
  var RESOURCE_TYPE_BY_KIND = {
2478
2478
  file: "file",
2479
2479
  image: "image",
@@ -2511,10 +2511,10 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
2511
2511
  }
2512
2512
  async download(input) {
2513
2513
  const resourceType = RESOURCE_TYPE_BY_KIND[input.attachment.kind];
2514
- const targetDir = path12.join(this.dataDir, "files", "feishu");
2515
- await fs10.mkdir(targetDir, { recursive: true });
2514
+ const targetDir = path11.join(this.dataDir, "files", "feishu");
2515
+ await fs9.mkdir(targetDir, { recursive: true });
2516
2516
  const fileName = buildStoredFileName(input);
2517
- const storedPath = path12.join(targetDir, fileName);
2517
+ const storedPath = path11.join(targetDir, fileName);
2518
2518
  const payload = {
2519
2519
  params: { type: resourceType },
2520
2520
  path: { message_id: input.messageId, file_key: input.attachment.fileKey }
@@ -2536,31 +2536,30 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
2536
2536
  };
2537
2537
 
2538
2538
  // src/files/ingest.ts
2539
- init_paths();
2540
2539
  import crypto3 from "crypto";
2541
- import fs12 from "fs/promises";
2542
- import path14 from "path";
2543
-
2544
- // src/files/parser.ts
2545
2540
  import fs11 from "fs/promises";
2546
2541
  import path13 from "path";
2542
+
2543
+ // src/files/parser.ts
2544
+ import fs10 from "fs/promises";
2545
+ import path12 from "path";
2547
2546
  import mammoth from "mammoth";
2548
2547
  import { PDFParse } from "pdf-parse";
2549
2548
  var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([".txt", ".md", ".markdown", ".json", ".csv", ".tsv", ".log"]);
2550
2549
  var DOCX_EXTENSIONS = /* @__PURE__ */ new Set([".docx"]);
2551
2550
  var PDF_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
2552
2551
  function isSupportedParseFile(filePath) {
2553
- const extension = path13.extname(filePath).toLowerCase();
2552
+ const extension = path12.extname(filePath).toLowerCase();
2554
2553
  return TEXT_EXTENSIONS.has(extension) || DOCX_EXTENSIONS.has(extension) || PDF_EXTENSIONS.has(extension);
2555
2554
  }
2556
2555
  function describeSupportedParseTypes() {
2557
2556
  return "txt\u3001md\u3001json\u3001csv\u3001tsv\u3001log\u3001docx\u3001pdf";
2558
2557
  }
2559
2558
  async function parseFileToText(filePath) {
2560
- const extension = path13.extname(filePath).toLowerCase();
2559
+ const extension = path12.extname(filePath).toLowerCase();
2561
2560
  if (TEXT_EXTENSIONS.has(extension)) {
2562
2561
  return {
2563
- text: await fs11.readFile(filePath, "utf8"),
2562
+ text: await fs10.readFile(filePath, "utf8"),
2564
2563
  parser: "text",
2565
2564
  warnings: []
2566
2565
  };
@@ -2574,7 +2573,7 @@ async function parseFileToText(filePath) {
2574
2573
  };
2575
2574
  }
2576
2575
  if (PDF_EXTENSIONS.has(extension)) {
2577
- const buffer = await fs11.readFile(filePath);
2576
+ const buffer = await fs10.readFile(filePath);
2578
2577
  const parser = new PDFParse({ data: buffer });
2579
2578
  try {
2580
2579
  const result = await parser.getText();
@@ -2596,7 +2595,7 @@ function isSupportedTextFile(filePath) {
2596
2595
  }
2597
2596
  function ensureSupportedTextFile(filePath) {
2598
2597
  if (!isSupportedTextFile(filePath)) {
2599
- const extension = path14.extname(filePath).toLowerCase();
2598
+ const extension = path13.extname(filePath).toLowerCase();
2600
2599
  throw new Error(`\u6682\u4E0D\u652F\u6301\u8BE5\u6587\u4EF6\u7C7B\u578B\uFF1A${extension || "\u65E0\u6269\u5C55\u540D"}\u3002\u5F53\u524D\u652F\u6301 ${describeSupportedParseTypes()}\u3002`);
2601
2600
  }
2602
2601
  }
@@ -2605,12 +2604,12 @@ function stableStoredName(sourcePath, fileName) {
2605
2604
  return `${digest}-${fileName}`;
2606
2605
  }
2607
2606
  async function ingestLocalFile(input) {
2608
- const sourcePath = path14.resolve(input.filePath);
2609
- const fileName = path14.basename(sourcePath);
2607
+ const sourcePath = path13.resolve(input.filePath);
2608
+ const fileName = path13.basename(sourcePath);
2610
2609
  const jobId = input.jobs?.start({ sourcePath, fileName });
2611
2610
  try {
2612
2611
  ensureSupportedTextFile(sourcePath);
2613
- const stat = await fs12.stat(sourcePath);
2612
+ const stat = await fs11.stat(sourcePath);
2614
2613
  if (!stat.isFile()) {
2615
2614
  throw new Error(`\u4E0D\u662F\u6587\u4EF6\uFF1A${sourcePath}`);
2616
2615
  }
@@ -2619,10 +2618,10 @@ async function ingestLocalFile(input) {
2619
2618
  if (!text) {
2620
2619
  throw new Error(`\u6587\u4EF6\u6CA1\u6709\u53EF\u7D22\u5F15\u6587\u672C\uFF1A${sourcePath}`);
2621
2620
  }
2622
- const fileDir = path14.join(resolveHomePath(input.config.storage.dataDir), "files");
2623
- await fs12.mkdir(fileDir, { recursive: true });
2624
- const storedPath = path14.join(fileDir, stableStoredName(sourcePath, fileName));
2625
- await fs12.copyFile(sourcePath, storedPath);
2621
+ const fileDir = path13.join(resolveHomePath(input.config.storage.dataDir), "files");
2622
+ await fs11.mkdir(fileDir, { recursive: true });
2623
+ const storedPath = path13.join(fileDir, stableStoredName(sourcePath, fileName));
2624
+ await fs11.copyFile(sourcePath, storedPath);
2626
2625
  const messageId = input.messages.ingest({
2627
2626
  platform: "local-file",
2628
2627
  platformChatId: "local-files",
@@ -2753,27 +2752,6 @@ var GatewayIngestor = class {
2753
2752
  }
2754
2753
  };
2755
2754
 
2756
- // src/rag/embedding.ts
2757
- function cosineSimilarity(left, right) {
2758
- if (left.length === 0 || right.length === 0 || left.length !== right.length) {
2759
- return 0;
2760
- }
2761
- let dot = 0;
2762
- let leftNorm = 0;
2763
- let rightNorm = 0;
2764
- for (let index = 0; index < left.length; index += 1) {
2765
- const leftValue = left[index] ?? 0;
2766
- const rightValue = right[index] ?? 0;
2767
- dot += leftValue * rightValue;
2768
- leftNorm += leftValue * leftValue;
2769
- rightNorm += rightValue * rightValue;
2770
- }
2771
- if (leftNorm === 0 || rightNorm === 0) {
2772
- return 0;
2773
- }
2774
- return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
2775
- }
2776
-
2777
2755
  // src/rag/indexer.ts
2778
2756
  async function indexMessageChunks(input) {
2779
2757
  const chunks = input.messageIds ? input.messages.listMessageChunksByMessageIds(input.messageIds, input.limit ?? 1e4) : input.messages.listAllMessageChunks(input.limit ?? 1e4);
@@ -2794,7 +2772,7 @@ async function indexMessageChunks(input) {
2794
2772
  id: chunk.chunkId,
2795
2773
  text: chunk.text,
2796
2774
  score: 1,
2797
- source: toEvidenceSource2(chunk)
2775
+ source: toEvidenceSource3(chunk)
2798
2776
  }
2799
2777
  });
2800
2778
  }
@@ -2804,7 +2782,7 @@ async function indexMessageChunks(input) {
2804
2782
  vectors: records.length
2805
2783
  };
2806
2784
  }
2807
- function toEvidenceSource2(chunk) {
2785
+ function toEvidenceSource3(chunk) {
2808
2786
  if (chunk.messageType === "file") {
2809
2787
  return {
2810
2788
  type: "file",
@@ -2820,9 +2798,6 @@ function toEvidenceSource2(chunk) {
2820
2798
  };
2821
2799
  }
2822
2800
 
2823
- // src/index.ts
2824
- init_lancedb_store();
2825
-
2826
2801
  // src/rag/manual-index.ts
2827
2802
  async function processMessagesNow(input) {
2828
2803
  const startedAt = (/* @__PURE__ */ new Date()).toISOString();
@@ -2836,25 +2811,23 @@ async function processMessagesNow(input) {
2836
2811
  finishedAt: (/* @__PURE__ */ new Date()).toISOString()
2837
2812
  };
2838
2813
  }
2839
- const { LanceDbVectorStore: LanceDbVectorStore2 } = await Promise.resolve().then(() => (init_lancedb_store(), lancedb_store_exports));
2840
- const vectorStore = await LanceDbVectorStore2.connectFromConfig(input.config);
2841
- try {
2842
- const stats = await indexMessageChunks({
2843
- messages: new MessageRepository(input.database),
2844
- embedding: createEmbeddingModel(input.config, input.secrets),
2845
- store: vectorStore,
2846
- limit: input.limit
2847
- });
2848
- return {
2849
- status: "completed",
2850
- chunks: stats.chunks,
2851
- vectors: stats.vectors,
2852
- startedAt,
2853
- finishedAt: (/* @__PURE__ */ new Date()).toISOString()
2854
- };
2855
- } finally {
2856
- vectorStore.close();
2857
- }
2814
+ const vectorStore = new SqliteVectorStore(input.database, {
2815
+ model: input.config.embedding.model
2816
+ });
2817
+ const embedding = input.embedding ?? createEmbeddingModel(input.config, input.secrets);
2818
+ const stats = await indexMessageChunks({
2819
+ messages: new MessageRepository(input.database),
2820
+ embedding,
2821
+ store: vectorStore,
2822
+ limit: input.limit
2823
+ });
2824
+ return {
2825
+ status: "completed",
2826
+ chunks: stats.chunks,
2827
+ vectors: stats.vectors,
2828
+ startedAt,
2829
+ finishedAt: (/* @__PURE__ */ new Date()).toISOString()
2830
+ };
2858
2831
  }
2859
2832
 
2860
2833
  // src/rag/vector-store.ts
@@ -3274,7 +3247,7 @@ function createWebApp(config) {
3274
3247
  note: "\u95EE\u7B54\u5FC5\u987B\u5148\u68C0\u7D22\u8BC1\u636E\uFF0C\u7981\u6B62\u5168\u91CF\u4E0A\u4E0B\u6587\u5806\u53E0\u3002",
3275
3248
  retrieval: {
3276
3249
  keyword: "SQLite FTS5",
3277
- vector: "LanceDB",
3250
+ vector: "SQLite embedding",
3278
3251
  hybrid: true
3279
3252
  }
3280
3253
  },
@@ -3338,7 +3311,6 @@ export {
3338
3311
  FileJobRepository,
3339
3312
  GatewayIngestor,
3340
3313
  HybridRetriever,
3341
- LanceDbVectorStore,
3342
3314
  MemoryVectorStore,
3343
3315
  MessageFtsRetriever,
3344
3316
  MessageRepository,
@@ -3375,7 +3347,6 @@ export {
3375
3347
  getGatewayLogPath,
3376
3348
  getGatewayPidPath,
3377
3349
  getGatewayRuntimeState,
3378
- getLanceDbPath,
3379
3350
  getLogsDirectory,
3380
3351
  hasEmbeddingConfig,
3381
3352
  indexMessageChunks,