chattercatcher 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -25
- package/dist/cli.js +321 -281
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +3 -16
- package/dist/index.js +222 -251
- package/dist/index.js.map +1 -1
- package/docs/DEVELOPMENT_PLAN.md +1 -1
- package/docs/TECHNICAL_ARCHITECTURE.md +10 -10
- package/package.json +1 -2
package/dist/index.js
CHANGED
|
@@ -1,150 +1,3 @@
|
|
|
1
|
-
var __defProp = Object.defineProperty;
|
|
2
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
-
var __esm = (fn, res) => function __init() {
|
|
4
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
-
};
|
|
6
|
-
var __export = (target, all) => {
|
|
7
|
-
for (var name in all)
|
|
8
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
// src/config/paths.ts
|
|
12
|
-
import os2 from "os";
|
|
13
|
-
import path2 from "path";
|
|
14
|
-
function getChatterCatcherHome() {
|
|
15
|
-
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
16
|
-
}
|
|
17
|
-
function resolveHomePath(value) {
|
|
18
|
-
if (value === "~") {
|
|
19
|
-
return os2.homedir();
|
|
20
|
-
}
|
|
21
|
-
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
22
|
-
return path2.join(os2.homedir(), value.slice(2));
|
|
23
|
-
}
|
|
24
|
-
return path2.resolve(value);
|
|
25
|
-
}
|
|
26
|
-
function getConfigPath() {
|
|
27
|
-
return path2.join(getChatterCatcherHome(), "config.json");
|
|
28
|
-
}
|
|
29
|
-
function getSecretsPath() {
|
|
30
|
-
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
31
|
-
}
|
|
32
|
-
var init_paths = __esm({
|
|
33
|
-
"src/config/paths.ts"() {
|
|
34
|
-
"use strict";
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
// src/rag/lancedb-store.ts
|
|
39
|
-
var lancedb_store_exports = {};
|
|
40
|
-
__export(lancedb_store_exports, {
|
|
41
|
-
LanceDbVectorStore: () => LanceDbVectorStore,
|
|
42
|
-
getLanceDbPath: () => getLanceDbPath
|
|
43
|
-
});
|
|
44
|
-
import fs6 from "fs/promises";
|
|
45
|
-
import path9 from "path";
|
|
46
|
-
function getLanceDbPath(config) {
|
|
47
|
-
return path9.join(resolveHomePath(config.storage.dataDir), "vector", "lancedb");
|
|
48
|
-
}
|
|
49
|
-
function toRow(record) {
|
|
50
|
-
return {
|
|
51
|
-
id: record.id,
|
|
52
|
-
vector: record.vector,
|
|
53
|
-
text: record.evidence.text,
|
|
54
|
-
source_json: JSON.stringify(record.evidence.source)
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
function toLanceData(rows) {
|
|
58
|
-
return rows.map((row) => ({
|
|
59
|
-
id: row.id,
|
|
60
|
-
vector: row.vector,
|
|
61
|
-
text: row.text,
|
|
62
|
-
source_json: row.source_json
|
|
63
|
-
}));
|
|
64
|
-
}
|
|
65
|
-
function escapeSqlString(value) {
|
|
66
|
-
return value.replace(/'/g, "''");
|
|
67
|
-
}
|
|
68
|
-
function toEvidence(row) {
|
|
69
|
-
const distance = row._distance ?? 0;
|
|
70
|
-
const vectorScore = 1 / (1 + Math.max(0, distance));
|
|
71
|
-
return {
|
|
72
|
-
id: row.id,
|
|
73
|
-
text: row.text,
|
|
74
|
-
score: vectorScore,
|
|
75
|
-
vectorScore,
|
|
76
|
-
source: JSON.parse(row.source_json)
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
var DEFAULT_TABLE_NAME, LanceDbVectorStore;
|
|
80
|
-
var init_lancedb_store = __esm({
|
|
81
|
-
"src/rag/lancedb-store.ts"() {
|
|
82
|
-
"use strict";
|
|
83
|
-
init_paths();
|
|
84
|
-
DEFAULT_TABLE_NAME = "message_chunks";
|
|
85
|
-
LanceDbVectorStore = class _LanceDbVectorStore {
|
|
86
|
-
constructor(connection, tableName) {
|
|
87
|
-
this.connection = connection;
|
|
88
|
-
this.tableName = tableName;
|
|
89
|
-
}
|
|
90
|
-
connection;
|
|
91
|
-
tableName;
|
|
92
|
-
static async connect(uri, tableName = DEFAULT_TABLE_NAME) {
|
|
93
|
-
await fs6.mkdir(uri, { recursive: true });
|
|
94
|
-
const lancedb = await import("@lancedb/lancedb");
|
|
95
|
-
const connection = await lancedb.connect(uri);
|
|
96
|
-
return new _LanceDbVectorStore(connection, tableName);
|
|
97
|
-
}
|
|
98
|
-
static async connectFromConfig(config, tableName = DEFAULT_TABLE_NAME) {
|
|
99
|
-
return _LanceDbVectorStore.connect(getLanceDbPath(config), tableName);
|
|
100
|
-
}
|
|
101
|
-
close() {
|
|
102
|
-
this.connection.close();
|
|
103
|
-
}
|
|
104
|
-
async upsert(records) {
|
|
105
|
-
if (records.length === 0) {
|
|
106
|
-
return;
|
|
107
|
-
}
|
|
108
|
-
const rows = records.map(toRow);
|
|
109
|
-
const data = toLanceData(rows);
|
|
110
|
-
const table = await this.ensureTable(data);
|
|
111
|
-
const ids = rows.map((row) => `'${escapeSqlString(row.id)}'`).join(", ");
|
|
112
|
-
await table.delete(`id IN (${ids})`);
|
|
113
|
-
await table.add(data);
|
|
114
|
-
}
|
|
115
|
-
async search(vector, limit) {
|
|
116
|
-
const table = await this.openTableIfExists();
|
|
117
|
-
if (!table) {
|
|
118
|
-
return [];
|
|
119
|
-
}
|
|
120
|
-
const rows = await table.vectorSearch(vector).limit(limit).toArray();
|
|
121
|
-
return rows.map(toEvidence);
|
|
122
|
-
}
|
|
123
|
-
async count() {
|
|
124
|
-
const table = await this.openTableIfExists();
|
|
125
|
-
if (!table) {
|
|
126
|
-
return 0;
|
|
127
|
-
}
|
|
128
|
-
return table.countRows();
|
|
129
|
-
}
|
|
130
|
-
async ensureTable(initialRows) {
|
|
131
|
-
const table = await this.openTableIfExists();
|
|
132
|
-
if (table) {
|
|
133
|
-
return table;
|
|
134
|
-
}
|
|
135
|
-
return this.connection.createTable(this.tableName, initialRows);
|
|
136
|
-
}
|
|
137
|
-
async openTableIfExists() {
|
|
138
|
-
const tableNames = await this.connection.tableNames();
|
|
139
|
-
if (!tableNames.includes(this.tableName)) {
|
|
140
|
-
return null;
|
|
141
|
-
}
|
|
142
|
-
return this.connection.openTable(this.tableName);
|
|
143
|
-
}
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
});
|
|
147
|
-
|
|
148
1
|
// src/config/schema.ts
|
|
149
2
|
import os from "os";
|
|
150
3
|
import path from "path";
|
|
@@ -211,7 +64,30 @@ function createDefaultSecrets() {
|
|
|
211
64
|
// src/config/store.ts
|
|
212
65
|
import fs from "fs/promises";
|
|
213
66
|
import path3 from "path";
|
|
214
|
-
|
|
67
|
+
|
|
68
|
+
// src/config/paths.ts
|
|
69
|
+
import os2 from "os";
|
|
70
|
+
import path2 from "path";
|
|
71
|
+
function getChatterCatcherHome() {
|
|
72
|
+
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
73
|
+
}
|
|
74
|
+
function resolveHomePath(value) {
|
|
75
|
+
if (value === "~") {
|
|
76
|
+
return os2.homedir();
|
|
77
|
+
}
|
|
78
|
+
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
79
|
+
return path2.join(os2.homedir(), value.slice(2));
|
|
80
|
+
}
|
|
81
|
+
return path2.resolve(value);
|
|
82
|
+
}
|
|
83
|
+
function getConfigPath() {
|
|
84
|
+
return path2.join(getChatterCatcherHome(), "config.json");
|
|
85
|
+
}
|
|
86
|
+
function getSecretsPath() {
|
|
87
|
+
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// src/config/store.ts
|
|
215
91
|
async function readJsonFile(filePath, fallback) {
|
|
216
92
|
try {
|
|
217
93
|
const raw = await fs.readFile(filePath, "utf8");
|
|
@@ -275,7 +151,6 @@ function resolveEmbeddingApiKey(input) {
|
|
|
275
151
|
}
|
|
276
152
|
|
|
277
153
|
// src/data/deletion.ts
|
|
278
|
-
init_paths();
|
|
279
154
|
import fs2 from "fs/promises";
|
|
280
155
|
import path4 from "path";
|
|
281
156
|
function emptyResult(targetType, targetId) {
|
|
@@ -401,7 +276,6 @@ async function deleteLocalData(input) {
|
|
|
401
276
|
}
|
|
402
277
|
|
|
403
278
|
// src/db/database.ts
|
|
404
|
-
init_paths();
|
|
405
279
|
import Database from "better-sqlite3";
|
|
406
280
|
import fs3 from "fs";
|
|
407
281
|
import path5 from "path";
|
|
@@ -462,6 +336,18 @@ function migrateDatabase(database) {
|
|
|
462
336
|
tokenize = 'unicode61'
|
|
463
337
|
);
|
|
464
338
|
|
|
339
|
+
CREATE TABLE IF NOT EXISTS message_chunk_embeddings (
|
|
340
|
+
chunk_id TEXT NOT NULL REFERENCES message_chunks(id) ON DELETE CASCADE,
|
|
341
|
+
model TEXT NOT NULL,
|
|
342
|
+
dimension INTEGER NOT NULL,
|
|
343
|
+
embedding_json TEXT NOT NULL,
|
|
344
|
+
updated_at TEXT NOT NULL,
|
|
345
|
+
PRIMARY KEY (chunk_id, model)
|
|
346
|
+
);
|
|
347
|
+
|
|
348
|
+
CREATE INDEX IF NOT EXISTS message_chunk_embeddings_model_idx
|
|
349
|
+
ON message_chunk_embeddings(model, dimension);
|
|
350
|
+
|
|
465
351
|
CREATE TABLE IF NOT EXISTS file_jobs (
|
|
466
352
|
id TEXT PRIMARY KEY,
|
|
467
353
|
source_path TEXT NOT NULL,
|
|
@@ -481,8 +367,7 @@ function migrateDatabase(database) {
|
|
|
481
367
|
}
|
|
482
368
|
|
|
483
369
|
// src/doctor/checks.ts
|
|
484
|
-
|
|
485
|
-
import fs7 from "fs/promises";
|
|
370
|
+
import fs6 from "fs/promises";
|
|
486
371
|
|
|
487
372
|
// src/files/jobs.ts
|
|
488
373
|
import crypto from "crypto";
|
|
@@ -624,12 +509,10 @@ var FileJobRepository = class {
|
|
|
624
509
|
};
|
|
625
510
|
|
|
626
511
|
// src/gateway/runtime.ts
|
|
627
|
-
init_paths();
|
|
628
512
|
import fs5 from "fs";
|
|
629
513
|
import path8 from "path";
|
|
630
514
|
|
|
631
515
|
// src/logs/reader.ts
|
|
632
|
-
init_paths();
|
|
633
516
|
import fs4 from "fs/promises";
|
|
634
517
|
import { watch } from "fs";
|
|
635
518
|
import path7 from "path";
|
|
@@ -1378,6 +1261,119 @@ var MessageFtsRetriever = class {
|
|
|
1378
1261
|
}
|
|
1379
1262
|
};
|
|
1380
1263
|
|
|
1264
|
+
// src/rag/embedding.ts
|
|
1265
|
+
function cosineSimilarity(left, right) {
|
|
1266
|
+
if (left.length === 0 || right.length === 0 || left.length !== right.length) {
|
|
1267
|
+
return 0;
|
|
1268
|
+
}
|
|
1269
|
+
let dot = 0;
|
|
1270
|
+
let leftNorm = 0;
|
|
1271
|
+
let rightNorm = 0;
|
|
1272
|
+
for (let index = 0; index < left.length; index += 1) {
|
|
1273
|
+
const leftValue = left[index] ?? 0;
|
|
1274
|
+
const rightValue = right[index] ?? 0;
|
|
1275
|
+
dot += leftValue * rightValue;
|
|
1276
|
+
leftNorm += leftValue * leftValue;
|
|
1277
|
+
rightNorm += rightValue * rightValue;
|
|
1278
|
+
}
|
|
1279
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
1280
|
+
return 0;
|
|
1281
|
+
}
|
|
1282
|
+
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// src/rag/sqlite-vector-store.ts
|
|
1286
|
+
function parseEmbeddingJson(value) {
|
|
1287
|
+
try {
|
|
1288
|
+
const parsed = JSON.parse(value);
|
|
1289
|
+
return Array.isArray(parsed) && parsed.every((item) => typeof item === "number") ? parsed : [];
|
|
1290
|
+
} catch {
|
|
1291
|
+
return [];
|
|
1292
|
+
}
|
|
1293
|
+
}
|
|
1294
|
+
function toEvidenceSource2(row) {
|
|
1295
|
+
return {
|
|
1296
|
+
type: "message",
|
|
1297
|
+
label: row.chatName,
|
|
1298
|
+
sender: row.senderName,
|
|
1299
|
+
timestamp: row.sentAt
|
|
1300
|
+
};
|
|
1301
|
+
}
|
|
1302
|
+
var SqliteVectorStore = class {
|
|
1303
|
+
constructor(database, options) {
|
|
1304
|
+
this.database = database;
|
|
1305
|
+
this.options = options;
|
|
1306
|
+
}
|
|
1307
|
+
database;
|
|
1308
|
+
options;
|
|
1309
|
+
async upsert(records) {
|
|
1310
|
+
if (records.length === 0) {
|
|
1311
|
+
return;
|
|
1312
|
+
}
|
|
1313
|
+
const updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1314
|
+
const statement = this.database.prepare(`
|
|
1315
|
+
INSERT INTO message_chunk_embeddings (chunk_id, model, dimension, embedding_json, updated_at)
|
|
1316
|
+
VALUES (@chunkId, @model, @dimension, @embeddingJson, @updatedAt)
|
|
1317
|
+
ON CONFLICT(chunk_id, model)
|
|
1318
|
+
DO UPDATE SET
|
|
1319
|
+
dimension = excluded.dimension,
|
|
1320
|
+
embedding_json = excluded.embedding_json,
|
|
1321
|
+
updated_at = excluded.updated_at
|
|
1322
|
+
`);
|
|
1323
|
+
const transaction = this.database.transaction((input) => {
|
|
1324
|
+
for (const record of input) {
|
|
1325
|
+
statement.run({
|
|
1326
|
+
chunkId: record.id,
|
|
1327
|
+
model: this.options.model,
|
|
1328
|
+
dimension: record.vector.length,
|
|
1329
|
+
embeddingJson: JSON.stringify(record.vector),
|
|
1330
|
+
updatedAt
|
|
1331
|
+
});
|
|
1332
|
+
}
|
|
1333
|
+
});
|
|
1334
|
+
transaction(records);
|
|
1335
|
+
}
|
|
1336
|
+
async search(vector, limit) {
|
|
1337
|
+
if (limit <= 0) {
|
|
1338
|
+
return [];
|
|
1339
|
+
}
|
|
1340
|
+
const rows = this.database.prepare(
|
|
1341
|
+
`
|
|
1342
|
+
SELECT
|
|
1343
|
+
mc.id AS chunkId,
|
|
1344
|
+
mc.text AS text,
|
|
1345
|
+
c.name AS chatName,
|
|
1346
|
+
m.sender_name AS senderName,
|
|
1347
|
+
m.sent_at AS sentAt,
|
|
1348
|
+
e.embedding_json AS embeddingJson
|
|
1349
|
+
FROM message_chunk_embeddings e
|
|
1350
|
+
JOIN message_chunks mc ON mc.id = e.chunk_id
|
|
1351
|
+
JOIN messages m ON m.id = mc.message_id
|
|
1352
|
+
JOIN chats c ON c.id = m.chat_id
|
|
1353
|
+
WHERE e.model = ?
|
|
1354
|
+
`
|
|
1355
|
+
).all(this.options.model);
|
|
1356
|
+
return rows.flatMap((row) => {
|
|
1357
|
+
const storedVector = parseEmbeddingJson(row.embeddingJson);
|
|
1358
|
+
if (storedVector.length === 0) {
|
|
1359
|
+
return [];
|
|
1360
|
+
}
|
|
1361
|
+
const vectorScore = cosineSimilarity(vector, storedVector);
|
|
1362
|
+
return {
|
|
1363
|
+
id: row.chunkId,
|
|
1364
|
+
text: row.text,
|
|
1365
|
+
score: vectorScore,
|
|
1366
|
+
vectorScore,
|
|
1367
|
+
source: toEvidenceSource2(row)
|
|
1368
|
+
};
|
|
1369
|
+
}).sort((left, right) => right.vectorScore - left.vectorScore).slice(0, limit);
|
|
1370
|
+
}
|
|
1371
|
+
count() {
|
|
1372
|
+
const row = this.database.prepare("SELECT COUNT(*) AS count FROM message_chunk_embeddings WHERE model = ?").get(this.options.model);
|
|
1373
|
+
return row.count;
|
|
1374
|
+
}
|
|
1375
|
+
};
|
|
1376
|
+
|
|
1381
1377
|
// src/rag/vector-retriever.ts
|
|
1382
1378
|
var VectorRetriever = class {
|
|
1383
1379
|
constructor(embedding, store, limit = 8) {
|
|
@@ -1402,10 +1398,10 @@ async function createHybridRetriever(input) {
|
|
|
1402
1398
|
const retrievers = [new MessageFtsRetriever(input.messages, { excludeMessageIds: input.excludeMessageIds })];
|
|
1403
1399
|
const closers = [];
|
|
1404
1400
|
if (hasEmbeddingConfig(input.config, input.secrets)) {
|
|
1405
|
-
const
|
|
1406
|
-
|
|
1401
|
+
const vectorStore = new SqliteVectorStore(input.database, {
|
|
1402
|
+
model: input.config.embedding.model
|
|
1403
|
+
});
|
|
1407
1404
|
retrievers.push(new VectorRetriever(createEmbeddingModel(input.config, input.secrets), vectorStore));
|
|
1408
|
-
closers.push(() => vectorStore.close());
|
|
1409
1405
|
}
|
|
1410
1406
|
return {
|
|
1411
1407
|
retriever: new HybridRetriever(retrievers),
|
|
@@ -1435,7 +1431,7 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1435
1431
|
checks.push(checkEmbeddingConfig(config, secrets));
|
|
1436
1432
|
checks.push(await checkSqlite(config));
|
|
1437
1433
|
checks.push(await checkFilePipeline(config));
|
|
1438
|
-
checks.push(await
|
|
1434
|
+
checks.push(await checkSqliteVectorIndex(config));
|
|
1439
1435
|
checks.push(checkRagPolicy());
|
|
1440
1436
|
if (options.online) {
|
|
1441
1437
|
checks.push(await checkChatModel(config, secrets));
|
|
@@ -1446,8 +1442,8 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1446
1442
|
async function checkHomeDirectory() {
|
|
1447
1443
|
const home = getChatterCatcherHome();
|
|
1448
1444
|
try {
|
|
1449
|
-
await
|
|
1450
|
-
await
|
|
1445
|
+
await fs6.mkdir(home, { recursive: true });
|
|
1446
|
+
await fs6.access(home);
|
|
1451
1447
|
return pass("\u914D\u7F6E\u76EE\u5F55", home);
|
|
1452
1448
|
} catch (error) {
|
|
1453
1449
|
return fail("\u914D\u7F6E\u76EE\u5F55", error instanceof Error ? error.message : String(error));
|
|
@@ -1468,7 +1464,7 @@ function checkLlmConfig(config, secrets) {
|
|
|
1468
1464
|
}
|
|
1469
1465
|
function checkEmbeddingConfig(config, secrets) {
|
|
1470
1466
|
if (!hasEmbeddingConfig(config, secrets)) {
|
|
1471
|
-
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\
|
|
1467
|
+
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u542F\u7528 SQLite embedding \u8BED\u4E49\u68C0\u7D22\u3002");
|
|
1472
1468
|
}
|
|
1473
1469
|
return pass("Embedding \u914D\u7F6E", `${config.embedding.model} @ ${config.embedding.baseUrl || config.llm.baseUrl}`);
|
|
1474
1470
|
}
|
|
@@ -1502,17 +1498,22 @@ async function checkFilePipeline(config) {
|
|
|
1502
1498
|
database?.close();
|
|
1503
1499
|
}
|
|
1504
1500
|
}
|
|
1505
|
-
async function
|
|
1506
|
-
let
|
|
1501
|
+
async function checkSqliteVectorIndex(config) {
|
|
1502
|
+
let database = null;
|
|
1507
1503
|
try {
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
const
|
|
1511
|
-
|
|
1504
|
+
database = openDatabase(config);
|
|
1505
|
+
const defaultModel = config.embedding.model || "default";
|
|
1506
|
+
const vectorStore = new SqliteVectorStore(database, { model: defaultModel });
|
|
1507
|
+
const vectors = vectorStore.count();
|
|
1508
|
+
const availableModels = database.prepare("SELECT COUNT(DISTINCT model) AS count FROM message_chunk_embeddings").get();
|
|
1509
|
+
return pass(
|
|
1510
|
+
"SQLite embedding \u5411\u91CF\u7D22\u5F15",
|
|
1511
|
+
`${getDatabasePath(config)}\uFF1Bvectors=${vectors}\uFF1Bmodels=${availableModels.count}${config.embedding.model ? `\uFF1Bactive_model=${config.embedding.model}` : "\uFF1Bactive_model=\u672A\u914D\u7F6E"}`
|
|
1512
|
+
);
|
|
1512
1513
|
} catch (error) {
|
|
1513
|
-
return fail("
|
|
1514
|
+
return fail("SQLite embedding \u5411\u91CF\u7D22\u5F15", error instanceof Error ? error.message : String(error));
|
|
1514
1515
|
} finally {
|
|
1515
|
-
|
|
1516
|
+
database?.close();
|
|
1516
1517
|
}
|
|
1517
1518
|
}
|
|
1518
1519
|
function checkRagPolicy() {
|
|
@@ -1553,9 +1554,8 @@ function formatDoctorChecks(checks) {
|
|
|
1553
1554
|
}
|
|
1554
1555
|
|
|
1555
1556
|
// src/export/data-export.ts
|
|
1556
|
-
|
|
1557
|
-
import
|
|
1558
|
-
import path10 from "path";
|
|
1557
|
+
import fs7 from "fs/promises";
|
|
1558
|
+
import path9 from "path";
|
|
1559
1559
|
function parseJsonObject(value) {
|
|
1560
1560
|
try {
|
|
1561
1561
|
const parsed = JSON.parse(value);
|
|
@@ -1574,11 +1574,11 @@ function parseJsonArray(value) {
|
|
|
1574
1574
|
}
|
|
1575
1575
|
function defaultExportPath(config, exportedAt) {
|
|
1576
1576
|
const fileName = `chattercatcher-export-${exportedAt.replace(/[:.]/g, "-")}.json`;
|
|
1577
|
-
return
|
|
1577
|
+
return path9.join(resolveHomePath(config.storage.dataDir), "exports", fileName);
|
|
1578
1578
|
}
|
|
1579
1579
|
async function exportLocalData(input) {
|
|
1580
1580
|
const exportedAt = input.exportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
1581
|
-
const outputPath =
|
|
1581
|
+
const outputPath = path9.resolve(input.outputPath ?? defaultExportPath(input.config, exportedAt));
|
|
1582
1582
|
const chats = input.database.prepare(
|
|
1583
1583
|
`
|
|
1584
1584
|
SELECT
|
|
@@ -1665,8 +1665,8 @@ async function exportLocalData(input) {
|
|
|
1665
1665
|
fileJobs
|
|
1666
1666
|
}
|
|
1667
1667
|
};
|
|
1668
|
-
await
|
|
1669
|
-
await
|
|
1668
|
+
await fs7.mkdir(path9.dirname(outputPath), { recursive: true });
|
|
1669
|
+
await fs7.writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
|
|
1670
1670
|
`, "utf8");
|
|
1671
1671
|
return {
|
|
1672
1672
|
outputPath,
|
|
@@ -1678,8 +1678,8 @@ async function exportLocalData(input) {
|
|
|
1678
1678
|
}
|
|
1679
1679
|
|
|
1680
1680
|
// src/export/data-restore.ts
|
|
1681
|
-
import
|
|
1682
|
-
import
|
|
1681
|
+
import fs8 from "fs/promises";
|
|
1682
|
+
import path10 from "path";
|
|
1683
1683
|
function asObject(value) {
|
|
1684
1684
|
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
1685
1685
|
}
|
|
@@ -1727,8 +1727,8 @@ function clearDatabase(database) {
|
|
|
1727
1727
|
database.prepare("DELETE FROM chats").run();
|
|
1728
1728
|
}
|
|
1729
1729
|
async function restoreLocalData(input) {
|
|
1730
|
-
const inputPath =
|
|
1731
|
-
const payload = parsePayload(await
|
|
1730
|
+
const inputPath = path10.resolve(input.inputPath);
|
|
1731
|
+
const payload = parsePayload(await fs8.readFile(inputPath, "utf8"));
|
|
1732
1732
|
const mode = input.replace ? "replace" : "merge";
|
|
1733
1733
|
const restore = input.database.transaction(() => {
|
|
1734
1734
|
if (input.replace) {
|
|
@@ -2110,6 +2110,7 @@ var FeishuQuestionHandler = class {
|
|
|
2110
2110
|
const { retriever, close } = await createHybridRetriever({
|
|
2111
2111
|
config: this.options.config,
|
|
2112
2112
|
secrets: this.options.secrets,
|
|
2113
|
+
database: this.options.database,
|
|
2113
2114
|
messages: new MessageRepository(this.options.database),
|
|
2114
2115
|
excludeMessageIds: options.excludeMessageIds
|
|
2115
2116
|
});
|
|
@@ -2470,10 +2471,9 @@ function normalizeFeishuReceiveMessageEvent(payload) {
|
|
|
2470
2471
|
}
|
|
2471
2472
|
|
|
2472
2473
|
// src/feishu/resource-downloader.ts
|
|
2473
|
-
init_paths();
|
|
2474
2474
|
import * as lark3 from "@larksuiteoapi/node-sdk";
|
|
2475
|
-
import
|
|
2476
|
-
import
|
|
2475
|
+
import fs9 from "fs/promises";
|
|
2476
|
+
import path11 from "path";
|
|
2477
2477
|
var RESOURCE_TYPE_BY_KIND = {
|
|
2478
2478
|
file: "file",
|
|
2479
2479
|
image: "image",
|
|
@@ -2511,10 +2511,10 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2511
2511
|
}
|
|
2512
2512
|
async download(input) {
|
|
2513
2513
|
const resourceType = RESOURCE_TYPE_BY_KIND[input.attachment.kind];
|
|
2514
|
-
const targetDir =
|
|
2515
|
-
await
|
|
2514
|
+
const targetDir = path11.join(this.dataDir, "files", "feishu");
|
|
2515
|
+
await fs9.mkdir(targetDir, { recursive: true });
|
|
2516
2516
|
const fileName = buildStoredFileName(input);
|
|
2517
|
-
const storedPath =
|
|
2517
|
+
const storedPath = path11.join(targetDir, fileName);
|
|
2518
2518
|
const payload = {
|
|
2519
2519
|
params: { type: resourceType },
|
|
2520
2520
|
path: { message_id: input.messageId, file_key: input.attachment.fileKey }
|
|
@@ -2536,31 +2536,30 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2536
2536
|
};
|
|
2537
2537
|
|
|
2538
2538
|
// src/files/ingest.ts
|
|
2539
|
-
init_paths();
|
|
2540
2539
|
import crypto3 from "crypto";
|
|
2541
|
-
import fs12 from "fs/promises";
|
|
2542
|
-
import path14 from "path";
|
|
2543
|
-
|
|
2544
|
-
// src/files/parser.ts
|
|
2545
2540
|
import fs11 from "fs/promises";
|
|
2546
2541
|
import path13 from "path";
|
|
2542
|
+
|
|
2543
|
+
// src/files/parser.ts
|
|
2544
|
+
import fs10 from "fs/promises";
|
|
2545
|
+
import path12 from "path";
|
|
2547
2546
|
import mammoth from "mammoth";
|
|
2548
2547
|
import { PDFParse } from "pdf-parse";
|
|
2549
2548
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([".txt", ".md", ".markdown", ".json", ".csv", ".tsv", ".log"]);
|
|
2550
2549
|
var DOCX_EXTENSIONS = /* @__PURE__ */ new Set([".docx"]);
|
|
2551
2550
|
var PDF_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
|
|
2552
2551
|
function isSupportedParseFile(filePath) {
|
|
2553
|
-
const extension =
|
|
2552
|
+
const extension = path12.extname(filePath).toLowerCase();
|
|
2554
2553
|
return TEXT_EXTENSIONS.has(extension) || DOCX_EXTENSIONS.has(extension) || PDF_EXTENSIONS.has(extension);
|
|
2555
2554
|
}
|
|
2556
2555
|
function describeSupportedParseTypes() {
|
|
2557
2556
|
return "txt\u3001md\u3001json\u3001csv\u3001tsv\u3001log\u3001docx\u3001pdf";
|
|
2558
2557
|
}
|
|
2559
2558
|
async function parseFileToText(filePath) {
|
|
2560
|
-
const extension =
|
|
2559
|
+
const extension = path12.extname(filePath).toLowerCase();
|
|
2561
2560
|
if (TEXT_EXTENSIONS.has(extension)) {
|
|
2562
2561
|
return {
|
|
2563
|
-
text: await
|
|
2562
|
+
text: await fs10.readFile(filePath, "utf8"),
|
|
2564
2563
|
parser: "text",
|
|
2565
2564
|
warnings: []
|
|
2566
2565
|
};
|
|
@@ -2574,7 +2573,7 @@ async function parseFileToText(filePath) {
|
|
|
2574
2573
|
};
|
|
2575
2574
|
}
|
|
2576
2575
|
if (PDF_EXTENSIONS.has(extension)) {
|
|
2577
|
-
const buffer = await
|
|
2576
|
+
const buffer = await fs10.readFile(filePath);
|
|
2578
2577
|
const parser = new PDFParse({ data: buffer });
|
|
2579
2578
|
try {
|
|
2580
2579
|
const result = await parser.getText();
|
|
@@ -2596,7 +2595,7 @@ function isSupportedTextFile(filePath) {
|
|
|
2596
2595
|
}
|
|
2597
2596
|
function ensureSupportedTextFile(filePath) {
|
|
2598
2597
|
if (!isSupportedTextFile(filePath)) {
|
|
2599
|
-
const extension =
|
|
2598
|
+
const extension = path13.extname(filePath).toLowerCase();
|
|
2600
2599
|
throw new Error(`\u6682\u4E0D\u652F\u6301\u8BE5\u6587\u4EF6\u7C7B\u578B\uFF1A${extension || "\u65E0\u6269\u5C55\u540D"}\u3002\u5F53\u524D\u652F\u6301 ${describeSupportedParseTypes()}\u3002`);
|
|
2601
2600
|
}
|
|
2602
2601
|
}
|
|
@@ -2605,12 +2604,12 @@ function stableStoredName(sourcePath, fileName) {
|
|
|
2605
2604
|
return `${digest}-${fileName}`;
|
|
2606
2605
|
}
|
|
2607
2606
|
async function ingestLocalFile(input) {
|
|
2608
|
-
const sourcePath =
|
|
2609
|
-
const fileName =
|
|
2607
|
+
const sourcePath = path13.resolve(input.filePath);
|
|
2608
|
+
const fileName = path13.basename(sourcePath);
|
|
2610
2609
|
const jobId = input.jobs?.start({ sourcePath, fileName });
|
|
2611
2610
|
try {
|
|
2612
2611
|
ensureSupportedTextFile(sourcePath);
|
|
2613
|
-
const stat = await
|
|
2612
|
+
const stat = await fs11.stat(sourcePath);
|
|
2614
2613
|
if (!stat.isFile()) {
|
|
2615
2614
|
throw new Error(`\u4E0D\u662F\u6587\u4EF6\uFF1A${sourcePath}`);
|
|
2616
2615
|
}
|
|
@@ -2619,10 +2618,10 @@ async function ingestLocalFile(input) {
|
|
|
2619
2618
|
if (!text) {
|
|
2620
2619
|
throw new Error(`\u6587\u4EF6\u6CA1\u6709\u53EF\u7D22\u5F15\u6587\u672C\uFF1A${sourcePath}`);
|
|
2621
2620
|
}
|
|
2622
|
-
const fileDir =
|
|
2623
|
-
await
|
|
2624
|
-
const storedPath =
|
|
2625
|
-
await
|
|
2621
|
+
const fileDir = path13.join(resolveHomePath(input.config.storage.dataDir), "files");
|
|
2622
|
+
await fs11.mkdir(fileDir, { recursive: true });
|
|
2623
|
+
const storedPath = path13.join(fileDir, stableStoredName(sourcePath, fileName));
|
|
2624
|
+
await fs11.copyFile(sourcePath, storedPath);
|
|
2626
2625
|
const messageId = input.messages.ingest({
|
|
2627
2626
|
platform: "local-file",
|
|
2628
2627
|
platformChatId: "local-files",
|
|
@@ -2753,27 +2752,6 @@ var GatewayIngestor = class {
|
|
|
2753
2752
|
}
|
|
2754
2753
|
};
|
|
2755
2754
|
|
|
2756
|
-
// src/rag/embedding.ts
|
|
2757
|
-
function cosineSimilarity(left, right) {
|
|
2758
|
-
if (left.length === 0 || right.length === 0 || left.length !== right.length) {
|
|
2759
|
-
return 0;
|
|
2760
|
-
}
|
|
2761
|
-
let dot = 0;
|
|
2762
|
-
let leftNorm = 0;
|
|
2763
|
-
let rightNorm = 0;
|
|
2764
|
-
for (let index = 0; index < left.length; index += 1) {
|
|
2765
|
-
const leftValue = left[index] ?? 0;
|
|
2766
|
-
const rightValue = right[index] ?? 0;
|
|
2767
|
-
dot += leftValue * rightValue;
|
|
2768
|
-
leftNorm += leftValue * leftValue;
|
|
2769
|
-
rightNorm += rightValue * rightValue;
|
|
2770
|
-
}
|
|
2771
|
-
if (leftNorm === 0 || rightNorm === 0) {
|
|
2772
|
-
return 0;
|
|
2773
|
-
}
|
|
2774
|
-
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
2775
|
-
}
|
|
2776
|
-
|
|
2777
2755
|
// src/rag/indexer.ts
|
|
2778
2756
|
async function indexMessageChunks(input) {
|
|
2779
2757
|
const chunks = input.messageIds ? input.messages.listMessageChunksByMessageIds(input.messageIds, input.limit ?? 1e4) : input.messages.listAllMessageChunks(input.limit ?? 1e4);
|
|
@@ -2794,7 +2772,7 @@ async function indexMessageChunks(input) {
|
|
|
2794
2772
|
id: chunk.chunkId,
|
|
2795
2773
|
text: chunk.text,
|
|
2796
2774
|
score: 1,
|
|
2797
|
-
source:
|
|
2775
|
+
source: toEvidenceSource3(chunk)
|
|
2798
2776
|
}
|
|
2799
2777
|
});
|
|
2800
2778
|
}
|
|
@@ -2804,7 +2782,7 @@ async function indexMessageChunks(input) {
|
|
|
2804
2782
|
vectors: records.length
|
|
2805
2783
|
};
|
|
2806
2784
|
}
|
|
2807
|
-
function
|
|
2785
|
+
function toEvidenceSource3(chunk) {
|
|
2808
2786
|
if (chunk.messageType === "file") {
|
|
2809
2787
|
return {
|
|
2810
2788
|
type: "file",
|
|
@@ -2820,9 +2798,6 @@ function toEvidenceSource2(chunk) {
|
|
|
2820
2798
|
};
|
|
2821
2799
|
}
|
|
2822
2800
|
|
|
2823
|
-
// src/index.ts
|
|
2824
|
-
init_lancedb_store();
|
|
2825
|
-
|
|
2826
2801
|
// src/rag/manual-index.ts
|
|
2827
2802
|
async function processMessagesNow(input) {
|
|
2828
2803
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -2836,25 +2811,23 @@ async function processMessagesNow(input) {
|
|
|
2836
2811
|
finishedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2837
2812
|
};
|
|
2838
2813
|
}
|
|
2839
|
-
const
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
2845
|
-
|
|
2846
|
-
|
|
2847
|
-
|
|
2848
|
-
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
}
|
|
2856
|
-
vectorStore.close();
|
|
2857
|
-
}
|
|
2814
|
+
const vectorStore = new SqliteVectorStore(input.database, {
|
|
2815
|
+
model: input.config.embedding.model
|
|
2816
|
+
});
|
|
2817
|
+
const embedding = input.embedding ?? createEmbeddingModel(input.config, input.secrets);
|
|
2818
|
+
const stats = await indexMessageChunks({
|
|
2819
|
+
messages: new MessageRepository(input.database),
|
|
2820
|
+
embedding,
|
|
2821
|
+
store: vectorStore,
|
|
2822
|
+
limit: input.limit
|
|
2823
|
+
});
|
|
2824
|
+
return {
|
|
2825
|
+
status: "completed",
|
|
2826
|
+
chunks: stats.chunks,
|
|
2827
|
+
vectors: stats.vectors,
|
|
2828
|
+
startedAt,
|
|
2829
|
+
finishedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2830
|
+
};
|
|
2858
2831
|
}
|
|
2859
2832
|
|
|
2860
2833
|
// src/rag/vector-store.ts
|
|
@@ -3274,7 +3247,7 @@ function createWebApp(config) {
|
|
|
3274
3247
|
note: "\u95EE\u7B54\u5FC5\u987B\u5148\u68C0\u7D22\u8BC1\u636E\uFF0C\u7981\u6B62\u5168\u91CF\u4E0A\u4E0B\u6587\u5806\u53E0\u3002",
|
|
3275
3248
|
retrieval: {
|
|
3276
3249
|
keyword: "SQLite FTS5",
|
|
3277
|
-
vector: "
|
|
3250
|
+
vector: "SQLite embedding",
|
|
3278
3251
|
hybrid: true
|
|
3279
3252
|
}
|
|
3280
3253
|
},
|
|
@@ -3338,7 +3311,6 @@ export {
|
|
|
3338
3311
|
FileJobRepository,
|
|
3339
3312
|
GatewayIngestor,
|
|
3340
3313
|
HybridRetriever,
|
|
3341
|
-
LanceDbVectorStore,
|
|
3342
3314
|
MemoryVectorStore,
|
|
3343
3315
|
MessageFtsRetriever,
|
|
3344
3316
|
MessageRepository,
|
|
@@ -3375,7 +3347,6 @@ export {
|
|
|
3375
3347
|
getGatewayLogPath,
|
|
3376
3348
|
getGatewayPidPath,
|
|
3377
3349
|
getGatewayRuntimeState,
|
|
3378
|
-
getLanceDbPath,
|
|
3379
3350
|
getLogsDirectory,
|
|
3380
3351
|
hasEmbeddingConfig,
|
|
3381
3352
|
indexMessageChunks,
|