chattercatcher 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +12 -25
- package/dist/cli.js +321 -281
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +3 -16
- package/dist/index.js +222 -251
- package/dist/index.js.map +1 -1
- package/docs/DEVELOPMENT_PLAN.md +1 -1
- package/docs/TECHNICAL_ARCHITECTURE.md +10 -10
- package/package.json +1 -2
package/dist/cli.js
CHANGED
|
@@ -1,160 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
var __defProp = Object.defineProperty;
|
|
3
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
4
|
-
var __esm = (fn, res) => function __init() {
|
|
5
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
6
|
-
};
|
|
7
|
-
var __export = (target, all) => {
|
|
8
|
-
for (var name in all)
|
|
9
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
// src/config/paths.ts
|
|
13
|
-
import os2 from "os";
|
|
14
|
-
import path2 from "path";
|
|
15
|
-
function getChatterCatcherHome() {
|
|
16
|
-
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
17
|
-
}
|
|
18
|
-
function resolveHomePath(value) {
|
|
19
|
-
if (value === "~") {
|
|
20
|
-
return os2.homedir();
|
|
21
|
-
}
|
|
22
|
-
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
23
|
-
return path2.join(os2.homedir(), value.slice(2));
|
|
24
|
-
}
|
|
25
|
-
return path2.resolve(value);
|
|
26
|
-
}
|
|
27
|
-
function getConfigPath() {
|
|
28
|
-
return path2.join(getChatterCatcherHome(), "config.json");
|
|
29
|
-
}
|
|
30
|
-
function getSecretsPath() {
|
|
31
|
-
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
32
|
-
}
|
|
33
|
-
var init_paths = __esm({
|
|
34
|
-
"src/config/paths.ts"() {
|
|
35
|
-
"use strict";
|
|
36
|
-
}
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
// src/rag/lancedb-store.ts
|
|
40
|
-
var lancedb_store_exports = {};
|
|
41
|
-
__export(lancedb_store_exports, {
|
|
42
|
-
LanceDbVectorStore: () => LanceDbVectorStore,
|
|
43
|
-
getLanceDbPath: () => getLanceDbPath
|
|
44
|
-
});
|
|
45
|
-
import fs6 from "fs/promises";
|
|
46
|
-
import path9 from "path";
|
|
47
|
-
function getLanceDbPath(config) {
|
|
48
|
-
return path9.join(resolveHomePath(config.storage.dataDir), "vector", "lancedb");
|
|
49
|
-
}
|
|
50
|
-
function toRow(record) {
|
|
51
|
-
return {
|
|
52
|
-
id: record.id,
|
|
53
|
-
vector: record.vector,
|
|
54
|
-
text: record.evidence.text,
|
|
55
|
-
source_json: JSON.stringify(record.evidence.source)
|
|
56
|
-
};
|
|
57
|
-
}
|
|
58
|
-
function toLanceData(rows) {
|
|
59
|
-
return rows.map((row) => ({
|
|
60
|
-
id: row.id,
|
|
61
|
-
vector: row.vector,
|
|
62
|
-
text: row.text,
|
|
63
|
-
source_json: row.source_json
|
|
64
|
-
}));
|
|
65
|
-
}
|
|
66
|
-
function escapeSqlString(value) {
|
|
67
|
-
return value.replace(/'/g, "''");
|
|
68
|
-
}
|
|
69
|
-
function toEvidence(row) {
|
|
70
|
-
const distance = row._distance ?? 0;
|
|
71
|
-
const vectorScore = 1 / (1 + Math.max(0, distance));
|
|
72
|
-
return {
|
|
73
|
-
id: row.id,
|
|
74
|
-
text: row.text,
|
|
75
|
-
score: vectorScore,
|
|
76
|
-
vectorScore,
|
|
77
|
-
source: JSON.parse(row.source_json)
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
var DEFAULT_TABLE_NAME, LanceDbVectorStore;
|
|
81
|
-
var init_lancedb_store = __esm({
|
|
82
|
-
"src/rag/lancedb-store.ts"() {
|
|
83
|
-
"use strict";
|
|
84
|
-
init_paths();
|
|
85
|
-
DEFAULT_TABLE_NAME = "message_chunks";
|
|
86
|
-
LanceDbVectorStore = class _LanceDbVectorStore {
|
|
87
|
-
constructor(connection, tableName) {
|
|
88
|
-
this.connection = connection;
|
|
89
|
-
this.tableName = tableName;
|
|
90
|
-
}
|
|
91
|
-
connection;
|
|
92
|
-
tableName;
|
|
93
|
-
static async connect(uri, tableName = DEFAULT_TABLE_NAME) {
|
|
94
|
-
await fs6.mkdir(uri, { recursive: true });
|
|
95
|
-
const lancedb = await import("@lancedb/lancedb");
|
|
96
|
-
const connection = await lancedb.connect(uri);
|
|
97
|
-
return new _LanceDbVectorStore(connection, tableName);
|
|
98
|
-
}
|
|
99
|
-
static async connectFromConfig(config, tableName = DEFAULT_TABLE_NAME) {
|
|
100
|
-
return _LanceDbVectorStore.connect(getLanceDbPath(config), tableName);
|
|
101
|
-
}
|
|
102
|
-
close() {
|
|
103
|
-
this.connection.close();
|
|
104
|
-
}
|
|
105
|
-
async upsert(records) {
|
|
106
|
-
if (records.length === 0) {
|
|
107
|
-
return;
|
|
108
|
-
}
|
|
109
|
-
const rows = records.map(toRow);
|
|
110
|
-
const data2 = toLanceData(rows);
|
|
111
|
-
const table = await this.ensureTable(data2);
|
|
112
|
-
const ids = rows.map((row) => `'${escapeSqlString(row.id)}'`).join(", ");
|
|
113
|
-
await table.delete(`id IN (${ids})`);
|
|
114
|
-
await table.add(data2);
|
|
115
|
-
}
|
|
116
|
-
async search(vector, limit) {
|
|
117
|
-
const table = await this.openTableIfExists();
|
|
118
|
-
if (!table) {
|
|
119
|
-
return [];
|
|
120
|
-
}
|
|
121
|
-
const rows = await table.vectorSearch(vector).limit(limit).toArray();
|
|
122
|
-
return rows.map(toEvidence);
|
|
123
|
-
}
|
|
124
|
-
async count() {
|
|
125
|
-
const table = await this.openTableIfExists();
|
|
126
|
-
if (!table) {
|
|
127
|
-
return 0;
|
|
128
|
-
}
|
|
129
|
-
return table.countRows();
|
|
130
|
-
}
|
|
131
|
-
async ensureTable(initialRows) {
|
|
132
|
-
const table = await this.openTableIfExists();
|
|
133
|
-
if (table) {
|
|
134
|
-
return table;
|
|
135
|
-
}
|
|
136
|
-
return this.connection.createTable(this.tableName, initialRows);
|
|
137
|
-
}
|
|
138
|
-
async openTableIfExists() {
|
|
139
|
-
const tableNames = await this.connection.tableNames();
|
|
140
|
-
if (!tableNames.includes(this.tableName)) {
|
|
141
|
-
return null;
|
|
142
|
-
}
|
|
143
|
-
return this.connection.openTable(this.tableName);
|
|
144
|
-
}
|
|
145
|
-
};
|
|
146
|
-
}
|
|
147
|
-
});
|
|
148
2
|
|
|
149
3
|
// src/cli.ts
|
|
150
4
|
import { input, password, select, confirm, number } from "@inquirer/prompts";
|
|
151
5
|
import { Command } from "commander";
|
|
152
|
-
import
|
|
6
|
+
import fs13 from "fs/promises";
|
|
153
7
|
|
|
154
8
|
// package.json
|
|
155
9
|
var package_default = {
|
|
156
10
|
name: "chattercatcher",
|
|
157
|
-
version: "0.1.
|
|
11
|
+
version: "0.1.9",
|
|
158
12
|
description: "\u672C\u5730\u4F18\u5148\u7684\u98DE\u4E66/Lark \u5BB6\u5EAD\u7FA4\u77E5\u8BC6\u5E93\u673A\u5668\u4EBA",
|
|
159
13
|
type: "module",
|
|
160
14
|
main: "dist/index.js",
|
|
@@ -198,7 +52,6 @@ var package_default = {
|
|
|
198
52
|
license: "MIT",
|
|
199
53
|
dependencies: {
|
|
200
54
|
"@inquirer/prompts": "^8.4.2",
|
|
201
|
-
"@lancedb/lancedb": "0.23.0",
|
|
202
55
|
"@larksuiteoapi/node-sdk": "^1.62.0",
|
|
203
56
|
"better-sqlite3": "^12.9.0",
|
|
204
57
|
commander: "^14.0.3",
|
|
@@ -287,8 +140,29 @@ function createDefaultSecrets() {
|
|
|
287
140
|
});
|
|
288
141
|
}
|
|
289
142
|
|
|
143
|
+
// src/config/paths.ts
|
|
144
|
+
import os2 from "os";
|
|
145
|
+
import path2 from "path";
|
|
146
|
+
function getChatterCatcherHome() {
|
|
147
|
+
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
148
|
+
}
|
|
149
|
+
function resolveHomePath(value) {
|
|
150
|
+
if (value === "~") {
|
|
151
|
+
return os2.homedir();
|
|
152
|
+
}
|
|
153
|
+
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
154
|
+
return path2.join(os2.homedir(), value.slice(2));
|
|
155
|
+
}
|
|
156
|
+
return path2.resolve(value);
|
|
157
|
+
}
|
|
158
|
+
function getConfigPath() {
|
|
159
|
+
return path2.join(getChatterCatcherHome(), "config.json");
|
|
160
|
+
}
|
|
161
|
+
function getSecretsPath() {
|
|
162
|
+
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
163
|
+
}
|
|
164
|
+
|
|
290
165
|
// src/config/store.ts
|
|
291
|
-
init_paths();
|
|
292
166
|
async function readJsonFile(filePath, fallback) {
|
|
293
167
|
try {
|
|
294
168
|
const raw = await fs.readFile(filePath, "utf8");
|
|
@@ -351,11 +225,7 @@ function resolveEmbeddingApiKey(input2) {
|
|
|
351
225
|
return explicit || input2.llmApiKey;
|
|
352
226
|
}
|
|
353
227
|
|
|
354
|
-
// src/cli.ts
|
|
355
|
-
init_paths();
|
|
356
|
-
|
|
357
228
|
// src/data/deletion.ts
|
|
358
|
-
init_paths();
|
|
359
229
|
import fs2 from "fs/promises";
|
|
360
230
|
import path4 from "path";
|
|
361
231
|
function emptyResult(targetType, targetId) {
|
|
@@ -481,7 +351,6 @@ async function deleteLocalData(input2) {
|
|
|
481
351
|
}
|
|
482
352
|
|
|
483
353
|
// src/db/database.ts
|
|
484
|
-
init_paths();
|
|
485
354
|
import Database from "better-sqlite3";
|
|
486
355
|
import fs3 from "fs";
|
|
487
356
|
import path5 from "path";
|
|
@@ -542,6 +411,18 @@ function migrateDatabase(database) {
|
|
|
542
411
|
tokenize = 'unicode61'
|
|
543
412
|
);
|
|
544
413
|
|
|
414
|
+
CREATE TABLE IF NOT EXISTS message_chunk_embeddings (
|
|
415
|
+
chunk_id TEXT NOT NULL REFERENCES message_chunks(id) ON DELETE CASCADE,
|
|
416
|
+
model TEXT NOT NULL,
|
|
417
|
+
dimension INTEGER NOT NULL,
|
|
418
|
+
embedding_json TEXT NOT NULL,
|
|
419
|
+
updated_at TEXT NOT NULL,
|
|
420
|
+
PRIMARY KEY (chunk_id, model)
|
|
421
|
+
);
|
|
422
|
+
|
|
423
|
+
CREATE INDEX IF NOT EXISTS message_chunk_embeddings_model_idx
|
|
424
|
+
ON message_chunk_embeddings(model, dimension);
|
|
425
|
+
|
|
545
426
|
CREATE TABLE IF NOT EXISTS file_jobs (
|
|
546
427
|
id TEXT PRIMARY KEY,
|
|
547
428
|
source_path TEXT NOT NULL,
|
|
@@ -561,8 +442,7 @@ function migrateDatabase(database) {
|
|
|
561
442
|
}
|
|
562
443
|
|
|
563
444
|
// src/doctor/checks.ts
|
|
564
|
-
|
|
565
|
-
import fs7 from "fs/promises";
|
|
445
|
+
import fs6 from "fs/promises";
|
|
566
446
|
|
|
567
447
|
// src/files/jobs.ts
|
|
568
448
|
import crypto from "crypto";
|
|
@@ -704,12 +584,10 @@ var FileJobRepository = class {
|
|
|
704
584
|
};
|
|
705
585
|
|
|
706
586
|
// src/gateway/runtime.ts
|
|
707
|
-
init_paths();
|
|
708
587
|
import fs5 from "fs";
|
|
709
588
|
import path8 from "path";
|
|
710
589
|
|
|
711
590
|
// src/logs/reader.ts
|
|
712
|
-
init_paths();
|
|
713
591
|
import fs4 from "fs/promises";
|
|
714
592
|
import { watch } from "fs";
|
|
715
593
|
import path7 from "path";
|
|
@@ -1458,6 +1336,119 @@ var MessageFtsRetriever = class {
|
|
|
1458
1336
|
}
|
|
1459
1337
|
};
|
|
1460
1338
|
|
|
1339
|
+
// src/rag/embedding.ts
|
|
1340
|
+
function cosineSimilarity(left, right) {
|
|
1341
|
+
if (left.length === 0 || right.length === 0 || left.length !== right.length) {
|
|
1342
|
+
return 0;
|
|
1343
|
+
}
|
|
1344
|
+
let dot = 0;
|
|
1345
|
+
let leftNorm = 0;
|
|
1346
|
+
let rightNorm = 0;
|
|
1347
|
+
for (let index2 = 0; index2 < left.length; index2 += 1) {
|
|
1348
|
+
const leftValue = left[index2] ?? 0;
|
|
1349
|
+
const rightValue = right[index2] ?? 0;
|
|
1350
|
+
dot += leftValue * rightValue;
|
|
1351
|
+
leftNorm += leftValue * leftValue;
|
|
1352
|
+
rightNorm += rightValue * rightValue;
|
|
1353
|
+
}
|
|
1354
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
1355
|
+
return 0;
|
|
1356
|
+
}
|
|
1357
|
+
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
// src/rag/sqlite-vector-store.ts
|
|
1361
|
+
function parseEmbeddingJson(value) {
|
|
1362
|
+
try {
|
|
1363
|
+
const parsed = JSON.parse(value);
|
|
1364
|
+
return Array.isArray(parsed) && parsed.every((item) => typeof item === "number") ? parsed : [];
|
|
1365
|
+
} catch {
|
|
1366
|
+
return [];
|
|
1367
|
+
}
|
|
1368
|
+
}
|
|
1369
|
+
function toEvidenceSource2(row) {
|
|
1370
|
+
return {
|
|
1371
|
+
type: "message",
|
|
1372
|
+
label: row.chatName,
|
|
1373
|
+
sender: row.senderName,
|
|
1374
|
+
timestamp: row.sentAt
|
|
1375
|
+
};
|
|
1376
|
+
}
|
|
1377
|
+
var SqliteVectorStore = class {
|
|
1378
|
+
constructor(database, options) {
|
|
1379
|
+
this.database = database;
|
|
1380
|
+
this.options = options;
|
|
1381
|
+
}
|
|
1382
|
+
database;
|
|
1383
|
+
options;
|
|
1384
|
+
async upsert(records) {
|
|
1385
|
+
if (records.length === 0) {
|
|
1386
|
+
return;
|
|
1387
|
+
}
|
|
1388
|
+
const updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1389
|
+
const statement = this.database.prepare(`
|
|
1390
|
+
INSERT INTO message_chunk_embeddings (chunk_id, model, dimension, embedding_json, updated_at)
|
|
1391
|
+
VALUES (@chunkId, @model, @dimension, @embeddingJson, @updatedAt)
|
|
1392
|
+
ON CONFLICT(chunk_id, model)
|
|
1393
|
+
DO UPDATE SET
|
|
1394
|
+
dimension = excluded.dimension,
|
|
1395
|
+
embedding_json = excluded.embedding_json,
|
|
1396
|
+
updated_at = excluded.updated_at
|
|
1397
|
+
`);
|
|
1398
|
+
const transaction = this.database.transaction((input2) => {
|
|
1399
|
+
for (const record of input2) {
|
|
1400
|
+
statement.run({
|
|
1401
|
+
chunkId: record.id,
|
|
1402
|
+
model: this.options.model,
|
|
1403
|
+
dimension: record.vector.length,
|
|
1404
|
+
embeddingJson: JSON.stringify(record.vector),
|
|
1405
|
+
updatedAt
|
|
1406
|
+
});
|
|
1407
|
+
}
|
|
1408
|
+
});
|
|
1409
|
+
transaction(records);
|
|
1410
|
+
}
|
|
1411
|
+
async search(vector, limit) {
|
|
1412
|
+
if (limit <= 0) {
|
|
1413
|
+
return [];
|
|
1414
|
+
}
|
|
1415
|
+
const rows = this.database.prepare(
|
|
1416
|
+
`
|
|
1417
|
+
SELECT
|
|
1418
|
+
mc.id AS chunkId,
|
|
1419
|
+
mc.text AS text,
|
|
1420
|
+
c.name AS chatName,
|
|
1421
|
+
m.sender_name AS senderName,
|
|
1422
|
+
m.sent_at AS sentAt,
|
|
1423
|
+
e.embedding_json AS embeddingJson
|
|
1424
|
+
FROM message_chunk_embeddings e
|
|
1425
|
+
JOIN message_chunks mc ON mc.id = e.chunk_id
|
|
1426
|
+
JOIN messages m ON m.id = mc.message_id
|
|
1427
|
+
JOIN chats c ON c.id = m.chat_id
|
|
1428
|
+
WHERE e.model = ?
|
|
1429
|
+
`
|
|
1430
|
+
).all(this.options.model);
|
|
1431
|
+
return rows.flatMap((row) => {
|
|
1432
|
+
const storedVector = parseEmbeddingJson(row.embeddingJson);
|
|
1433
|
+
if (storedVector.length === 0) {
|
|
1434
|
+
return [];
|
|
1435
|
+
}
|
|
1436
|
+
const vectorScore = cosineSimilarity(vector, storedVector);
|
|
1437
|
+
return {
|
|
1438
|
+
id: row.chunkId,
|
|
1439
|
+
text: row.text,
|
|
1440
|
+
score: vectorScore,
|
|
1441
|
+
vectorScore,
|
|
1442
|
+
source: toEvidenceSource2(row)
|
|
1443
|
+
};
|
|
1444
|
+
}).sort((left, right) => right.vectorScore - left.vectorScore).slice(0, limit);
|
|
1445
|
+
}
|
|
1446
|
+
count() {
|
|
1447
|
+
const row = this.database.prepare("SELECT COUNT(*) AS count FROM message_chunk_embeddings WHERE model = ?").get(this.options.model);
|
|
1448
|
+
return row.count;
|
|
1449
|
+
}
|
|
1450
|
+
};
|
|
1451
|
+
|
|
1461
1452
|
// src/rag/vector-retriever.ts
|
|
1462
1453
|
var VectorRetriever = class {
|
|
1463
1454
|
constructor(embedding, store, limit = 8) {
|
|
@@ -1482,10 +1473,10 @@ async function createHybridRetriever(input2) {
|
|
|
1482
1473
|
const retrievers = [new MessageFtsRetriever(input2.messages, { excludeMessageIds: input2.excludeMessageIds })];
|
|
1483
1474
|
const closers = [];
|
|
1484
1475
|
if (hasEmbeddingConfig(input2.config, input2.secrets)) {
|
|
1485
|
-
const
|
|
1486
|
-
|
|
1476
|
+
const vectorStore = new SqliteVectorStore(input2.database, {
|
|
1477
|
+
model: input2.config.embedding.model
|
|
1478
|
+
});
|
|
1487
1479
|
retrievers.push(new VectorRetriever(createEmbeddingModel(input2.config, input2.secrets), vectorStore));
|
|
1488
|
-
closers.push(() => vectorStore.close());
|
|
1489
1480
|
}
|
|
1490
1481
|
return {
|
|
1491
1482
|
retriever: new HybridRetriever(retrievers),
|
|
@@ -1515,7 +1506,7 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1515
1506
|
checks.push(checkEmbeddingConfig(config, secrets));
|
|
1516
1507
|
checks.push(await checkSqlite(config));
|
|
1517
1508
|
checks.push(await checkFilePipeline(config));
|
|
1518
|
-
checks.push(await
|
|
1509
|
+
checks.push(await checkSqliteVectorIndex(config));
|
|
1519
1510
|
checks.push(checkRagPolicy());
|
|
1520
1511
|
if (options.online) {
|
|
1521
1512
|
checks.push(await checkChatModel(config, secrets));
|
|
@@ -1526,8 +1517,8 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1526
1517
|
async function checkHomeDirectory() {
|
|
1527
1518
|
const home = getChatterCatcherHome();
|
|
1528
1519
|
try {
|
|
1529
|
-
await
|
|
1530
|
-
await
|
|
1520
|
+
await fs6.mkdir(home, { recursive: true });
|
|
1521
|
+
await fs6.access(home);
|
|
1531
1522
|
return pass("\u914D\u7F6E\u76EE\u5F55", home);
|
|
1532
1523
|
} catch (error) {
|
|
1533
1524
|
return fail("\u914D\u7F6E\u76EE\u5F55", error instanceof Error ? error.message : String(error));
|
|
@@ -1548,7 +1539,7 @@ function checkLlmConfig(config, secrets) {
|
|
|
1548
1539
|
}
|
|
1549
1540
|
function checkEmbeddingConfig(config, secrets) {
|
|
1550
1541
|
if (!hasEmbeddingConfig(config, secrets)) {
|
|
1551
|
-
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\
|
|
1542
|
+
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u542F\u7528 SQLite embedding \u8BED\u4E49\u68C0\u7D22\u3002");
|
|
1552
1543
|
}
|
|
1553
1544
|
return pass("Embedding \u914D\u7F6E", `${config.embedding.model} @ ${config.embedding.baseUrl || config.llm.baseUrl}`);
|
|
1554
1545
|
}
|
|
@@ -1582,17 +1573,22 @@ async function checkFilePipeline(config) {
|
|
|
1582
1573
|
database?.close();
|
|
1583
1574
|
}
|
|
1584
1575
|
}
|
|
1585
|
-
async function
|
|
1586
|
-
let
|
|
1576
|
+
async function checkSqliteVectorIndex(config) {
|
|
1577
|
+
let database = null;
|
|
1587
1578
|
try {
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
const
|
|
1591
|
-
|
|
1579
|
+
database = openDatabase(config);
|
|
1580
|
+
const defaultModel = config.embedding.model || "default";
|
|
1581
|
+
const vectorStore = new SqliteVectorStore(database, { model: defaultModel });
|
|
1582
|
+
const vectors = vectorStore.count();
|
|
1583
|
+
const availableModels = database.prepare("SELECT COUNT(DISTINCT model) AS count FROM message_chunk_embeddings").get();
|
|
1584
|
+
return pass(
|
|
1585
|
+
"SQLite embedding \u5411\u91CF\u7D22\u5F15",
|
|
1586
|
+
`${getDatabasePath(config)}\uFF1Bvectors=${vectors}\uFF1Bmodels=${availableModels.count}${config.embedding.model ? `\uFF1Bactive_model=${config.embedding.model}` : "\uFF1Bactive_model=\u672A\u914D\u7F6E"}`
|
|
1587
|
+
);
|
|
1592
1588
|
} catch (error) {
|
|
1593
|
-
return fail("
|
|
1589
|
+
return fail("SQLite embedding \u5411\u91CF\u7D22\u5F15", error instanceof Error ? error.message : String(error));
|
|
1594
1590
|
} finally {
|
|
1595
|
-
|
|
1591
|
+
database?.close();
|
|
1596
1592
|
}
|
|
1597
1593
|
}
|
|
1598
1594
|
function checkRagPolicy() {
|
|
@@ -1633,9 +1629,8 @@ function formatDoctorChecks(checks) {
|
|
|
1633
1629
|
}
|
|
1634
1630
|
|
|
1635
1631
|
// src/export/data-export.ts
|
|
1636
|
-
|
|
1637
|
-
import
|
|
1638
|
-
import path10 from "path";
|
|
1632
|
+
import fs7 from "fs/promises";
|
|
1633
|
+
import path9 from "path";
|
|
1639
1634
|
function parseJsonObject(value) {
|
|
1640
1635
|
try {
|
|
1641
1636
|
const parsed = JSON.parse(value);
|
|
@@ -1654,11 +1649,11 @@ function parseJsonArray(value) {
|
|
|
1654
1649
|
}
|
|
1655
1650
|
function defaultExportPath(config, exportedAt) {
|
|
1656
1651
|
const fileName = `chattercatcher-export-${exportedAt.replace(/[:.]/g, "-")}.json`;
|
|
1657
|
-
return
|
|
1652
|
+
return path9.join(resolveHomePath(config.storage.dataDir), "exports", fileName);
|
|
1658
1653
|
}
|
|
1659
1654
|
async function exportLocalData(input2) {
|
|
1660
1655
|
const exportedAt = input2.exportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
1661
|
-
const outputPath =
|
|
1656
|
+
const outputPath = path9.resolve(input2.outputPath ?? defaultExportPath(input2.config, exportedAt));
|
|
1662
1657
|
const chats = input2.database.prepare(
|
|
1663
1658
|
`
|
|
1664
1659
|
SELECT
|
|
@@ -1745,8 +1740,8 @@ async function exportLocalData(input2) {
|
|
|
1745
1740
|
fileJobs
|
|
1746
1741
|
}
|
|
1747
1742
|
};
|
|
1748
|
-
await
|
|
1749
|
-
await
|
|
1743
|
+
await fs7.mkdir(path9.dirname(outputPath), { recursive: true });
|
|
1744
|
+
await fs7.writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
|
|
1750
1745
|
`, "utf8");
|
|
1751
1746
|
return {
|
|
1752
1747
|
outputPath,
|
|
@@ -1758,8 +1753,8 @@ async function exportLocalData(input2) {
|
|
|
1758
1753
|
}
|
|
1759
1754
|
|
|
1760
1755
|
// src/export/data-restore.ts
|
|
1761
|
-
import
|
|
1762
|
-
import
|
|
1756
|
+
import fs8 from "fs/promises";
|
|
1757
|
+
import path10 from "path";
|
|
1763
1758
|
function asObject(value) {
|
|
1764
1759
|
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
1765
1760
|
}
|
|
@@ -1807,8 +1802,8 @@ function clearDatabase(database) {
|
|
|
1807
1802
|
database.prepare("DELETE FROM chats").run();
|
|
1808
1803
|
}
|
|
1809
1804
|
async function restoreLocalData(input2) {
|
|
1810
|
-
const inputPath =
|
|
1811
|
-
const payload = parsePayload(await
|
|
1805
|
+
const inputPath = path10.resolve(input2.inputPath);
|
|
1806
|
+
const payload = parsePayload(await fs8.readFile(inputPath, "utf8"));
|
|
1812
1807
|
const mode = input2.replace ? "replace" : "merge";
|
|
1813
1808
|
const restore = input2.database.transaction(() => {
|
|
1814
1809
|
if (input2.replace) {
|
|
@@ -2190,6 +2185,7 @@ var FeishuQuestionHandler = class {
|
|
|
2190
2185
|
const { retriever, close } = await createHybridRetriever({
|
|
2191
2186
|
config: this.options.config,
|
|
2192
2187
|
secrets: this.options.secrets,
|
|
2188
|
+
database: this.options.database,
|
|
2193
2189
|
messages: new MessageRepository(this.options.database),
|
|
2194
2190
|
excludeMessageIds: options.excludeMessageIds
|
|
2195
2191
|
});
|
|
@@ -2400,10 +2396,9 @@ function createFeishuGateway(options) {
|
|
|
2400
2396
|
}
|
|
2401
2397
|
|
|
2402
2398
|
// src/feishu/resource-downloader.ts
|
|
2403
|
-
init_paths();
|
|
2404
2399
|
import * as lark3 from "@larksuiteoapi/node-sdk";
|
|
2405
|
-
import
|
|
2406
|
-
import
|
|
2400
|
+
import fs9 from "fs/promises";
|
|
2401
|
+
import path11 from "path";
|
|
2407
2402
|
var RESOURCE_TYPE_BY_KIND = {
|
|
2408
2403
|
file: "file",
|
|
2409
2404
|
image: "image",
|
|
@@ -2441,10 +2436,10 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2441
2436
|
}
|
|
2442
2437
|
async download(input2) {
|
|
2443
2438
|
const resourceType = RESOURCE_TYPE_BY_KIND[input2.attachment.kind];
|
|
2444
|
-
const targetDir =
|
|
2445
|
-
await
|
|
2439
|
+
const targetDir = path11.join(this.dataDir, "files", "feishu");
|
|
2440
|
+
await fs9.mkdir(targetDir, { recursive: true });
|
|
2446
2441
|
const fileName = buildStoredFileName(input2);
|
|
2447
|
-
const storedPath =
|
|
2442
|
+
const storedPath = path11.join(targetDir, fileName);
|
|
2448
2443
|
const payload = {
|
|
2449
2444
|
params: { type: resourceType },
|
|
2450
2445
|
path: { message_id: input2.messageId, file_key: input2.attachment.fileKey }
|
|
@@ -2466,31 +2461,30 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2466
2461
|
};
|
|
2467
2462
|
|
|
2468
2463
|
// src/files/ingest.ts
|
|
2469
|
-
init_paths();
|
|
2470
2464
|
import crypto3 from "crypto";
|
|
2471
|
-
import fs12 from "fs/promises";
|
|
2472
|
-
import path14 from "path";
|
|
2473
|
-
|
|
2474
|
-
// src/files/parser.ts
|
|
2475
2465
|
import fs11 from "fs/promises";
|
|
2476
2466
|
import path13 from "path";
|
|
2467
|
+
|
|
2468
|
+
// src/files/parser.ts
|
|
2469
|
+
import fs10 from "fs/promises";
|
|
2470
|
+
import path12 from "path";
|
|
2477
2471
|
import mammoth from "mammoth";
|
|
2478
2472
|
import { PDFParse } from "pdf-parse";
|
|
2479
2473
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([".txt", ".md", ".markdown", ".json", ".csv", ".tsv", ".log"]);
|
|
2480
2474
|
var DOCX_EXTENSIONS = /* @__PURE__ */ new Set([".docx"]);
|
|
2481
2475
|
var PDF_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
|
|
2482
2476
|
function isSupportedParseFile(filePath) {
|
|
2483
|
-
const extension =
|
|
2477
|
+
const extension = path12.extname(filePath).toLowerCase();
|
|
2484
2478
|
return TEXT_EXTENSIONS.has(extension) || DOCX_EXTENSIONS.has(extension) || PDF_EXTENSIONS.has(extension);
|
|
2485
2479
|
}
|
|
2486
2480
|
function describeSupportedParseTypes() {
|
|
2487
2481
|
return "txt\u3001md\u3001json\u3001csv\u3001tsv\u3001log\u3001docx\u3001pdf";
|
|
2488
2482
|
}
|
|
2489
2483
|
async function parseFileToText(filePath) {
|
|
2490
|
-
const extension =
|
|
2484
|
+
const extension = path12.extname(filePath).toLowerCase();
|
|
2491
2485
|
if (TEXT_EXTENSIONS.has(extension)) {
|
|
2492
2486
|
return {
|
|
2493
|
-
text: await
|
|
2487
|
+
text: await fs10.readFile(filePath, "utf8"),
|
|
2494
2488
|
parser: "text",
|
|
2495
2489
|
warnings: []
|
|
2496
2490
|
};
|
|
@@ -2504,7 +2498,7 @@ async function parseFileToText(filePath) {
|
|
|
2504
2498
|
};
|
|
2505
2499
|
}
|
|
2506
2500
|
if (PDF_EXTENSIONS.has(extension)) {
|
|
2507
|
-
const buffer = await
|
|
2501
|
+
const buffer = await fs10.readFile(filePath);
|
|
2508
2502
|
const parser = new PDFParse({ data: buffer });
|
|
2509
2503
|
try {
|
|
2510
2504
|
const result = await parser.getText();
|
|
@@ -2526,7 +2520,7 @@ function isSupportedTextFile(filePath) {
|
|
|
2526
2520
|
}
|
|
2527
2521
|
function ensureSupportedTextFile(filePath) {
|
|
2528
2522
|
if (!isSupportedTextFile(filePath)) {
|
|
2529
|
-
const extension =
|
|
2523
|
+
const extension = path13.extname(filePath).toLowerCase();
|
|
2530
2524
|
throw new Error(`\u6682\u4E0D\u652F\u6301\u8BE5\u6587\u4EF6\u7C7B\u578B\uFF1A${extension || "\u65E0\u6269\u5C55\u540D"}\u3002\u5F53\u524D\u652F\u6301 ${describeSupportedParseTypes()}\u3002`);
|
|
2531
2525
|
}
|
|
2532
2526
|
}
|
|
@@ -2535,12 +2529,12 @@ function stableStoredName(sourcePath, fileName) {
|
|
|
2535
2529
|
return `${digest}-${fileName}`;
|
|
2536
2530
|
}
|
|
2537
2531
|
async function ingestLocalFile(input2) {
|
|
2538
|
-
const sourcePath =
|
|
2539
|
-
const fileName =
|
|
2532
|
+
const sourcePath = path13.resolve(input2.filePath);
|
|
2533
|
+
const fileName = path13.basename(sourcePath);
|
|
2540
2534
|
const jobId = input2.jobs?.start({ sourcePath, fileName });
|
|
2541
2535
|
try {
|
|
2542
2536
|
ensureSupportedTextFile(sourcePath);
|
|
2543
|
-
const stat = await
|
|
2537
|
+
const stat = await fs11.stat(sourcePath);
|
|
2544
2538
|
if (!stat.isFile()) {
|
|
2545
2539
|
throw new Error(`\u4E0D\u662F\u6587\u4EF6\uFF1A${sourcePath}`);
|
|
2546
2540
|
}
|
|
@@ -2549,10 +2543,10 @@ async function ingestLocalFile(input2) {
|
|
|
2549
2543
|
if (!text) {
|
|
2550
2544
|
throw new Error(`\u6587\u4EF6\u6CA1\u6709\u53EF\u7D22\u5F15\u6587\u672C\uFF1A${sourcePath}`);
|
|
2551
2545
|
}
|
|
2552
|
-
const fileDir =
|
|
2553
|
-
await
|
|
2554
|
-
const storedPath =
|
|
2555
|
-
await
|
|
2546
|
+
const fileDir = path13.join(resolveHomePath(input2.config.storage.dataDir), "files");
|
|
2547
|
+
await fs11.mkdir(fileDir, { recursive: true });
|
|
2548
|
+
const storedPath = path13.join(fileDir, stableStoredName(sourcePath, fileName));
|
|
2549
|
+
await fs11.copyFile(sourcePath, storedPath);
|
|
2556
2550
|
const messageId = input2.messages.ingest({
|
|
2557
2551
|
platform: "local-file",
|
|
2558
2552
|
platformChatId: "local-files",
|
|
@@ -2835,8 +2829,9 @@ var GatewayIngestor = class {
|
|
|
2835
2829
|
|
|
2836
2830
|
// src/gateway/detached.ts
|
|
2837
2831
|
import { spawn } from "child_process";
|
|
2838
|
-
import
|
|
2839
|
-
import
|
|
2832
|
+
import fs12 from "fs";
|
|
2833
|
+
import path14 from "path";
|
|
2834
|
+
var START_FAILURE_GRACE_MS = 250;
|
|
2840
2835
|
function buildGatewayForegroundSpawnCommand(argv = process.argv) {
|
|
2841
2836
|
const [command = process.execPath, ...rawArgs] = argv;
|
|
2842
2837
|
const args = [...rawArgs];
|
|
@@ -2851,7 +2846,37 @@ function buildGatewayForegroundSpawnCommand(argv = process.argv) {
|
|
|
2851
2846
|
args: [...args, "gateway", "start", "--foreground"]
|
|
2852
2847
|
};
|
|
2853
2848
|
}
|
|
2854
|
-
function
|
|
2849
|
+
function describeImmediateChildFailure(event) {
|
|
2850
|
+
if (event.type === "error") {
|
|
2851
|
+
return event.error.message;
|
|
2852
|
+
}
|
|
2853
|
+
return event.signal ? `signal=${event.signal}` : `exitCode=${event.code ?? "unknown"}`;
|
|
2854
|
+
}
|
|
2855
|
+
function waitForImmediateChildFailure(child, graceMs = START_FAILURE_GRACE_MS) {
|
|
2856
|
+
return new Promise((resolve) => {
|
|
2857
|
+
let settled = false;
|
|
2858
|
+
let timer;
|
|
2859
|
+
const cleanup = () => {
|
|
2860
|
+
clearTimeout(timer);
|
|
2861
|
+
child.off("error", onError);
|
|
2862
|
+
child.off("exit", onExit);
|
|
2863
|
+
};
|
|
2864
|
+
const settle = (result) => {
|
|
2865
|
+
if (settled) {
|
|
2866
|
+
return;
|
|
2867
|
+
}
|
|
2868
|
+
settled = true;
|
|
2869
|
+
cleanup();
|
|
2870
|
+
resolve(result);
|
|
2871
|
+
};
|
|
2872
|
+
const onError = (error) => settle({ type: "error", error });
|
|
2873
|
+
const onExit = (code, signal) => settle({ type: "exit", code, signal });
|
|
2874
|
+
child.once("error", onError);
|
|
2875
|
+
child.once("exit", onExit);
|
|
2876
|
+
timer = setTimeout(() => settle(null), graceMs);
|
|
2877
|
+
});
|
|
2878
|
+
}
|
|
2879
|
+
async function startDetachedGateway(input2) {
|
|
2855
2880
|
const status = getGatewayStatus(input2.config, input2.secrets);
|
|
2856
2881
|
const logFile = getGatewayLogPath();
|
|
2857
2882
|
if (status.connection === "running") {
|
|
@@ -2862,7 +2887,7 @@ function startDetachedGateway(input2) {
|
|
|
2862
2887
|
...status.pid ? { pid: status.pid } : {}
|
|
2863
2888
|
};
|
|
2864
2889
|
}
|
|
2865
|
-
|
|
2890
|
+
fs12.mkdirSync(path14.dirname(logFile), { recursive: true });
|
|
2866
2891
|
let out;
|
|
2867
2892
|
let err;
|
|
2868
2893
|
let stdioClosed = false;
|
|
@@ -2872,22 +2897,31 @@ function startDetachedGateway(input2) {
|
|
|
2872
2897
|
}
|
|
2873
2898
|
stdioClosed = true;
|
|
2874
2899
|
if (typeof out === "number") {
|
|
2875
|
-
|
|
2900
|
+
fs12.closeSync(out);
|
|
2876
2901
|
}
|
|
2877
2902
|
if (typeof err === "number") {
|
|
2878
|
-
|
|
2903
|
+
fs12.closeSync(err);
|
|
2879
2904
|
}
|
|
2880
2905
|
};
|
|
2881
2906
|
try {
|
|
2882
|
-
out =
|
|
2883
|
-
err =
|
|
2907
|
+
out = fs12.openSync(logFile, "a");
|
|
2908
|
+
err = fs12.openSync(logFile, "a");
|
|
2884
2909
|
const foreground = buildGatewayForegroundSpawnCommand(input2.argv);
|
|
2885
2910
|
const child = spawn(foreground.command, foreground.args, {
|
|
2886
2911
|
detached: true,
|
|
2887
2912
|
stdio: ["ignore", out, err],
|
|
2888
2913
|
windowsHide: true
|
|
2889
2914
|
});
|
|
2915
|
+
const immediateFailure = await waitForImmediateChildFailure(child);
|
|
2890
2916
|
closeStdio();
|
|
2917
|
+
if (immediateFailure) {
|
|
2918
|
+
return {
|
|
2919
|
+
started: false,
|
|
2920
|
+
message: `\u98DE\u4E66 Gateway \u542F\u52A8\u5931\u8D25\uFF1A${describeImmediateChildFailure(immediateFailure)}\u3002\u8BF7\u67E5\u770B\u65E5\u5FD7\uFF1A${logFile}`,
|
|
2921
|
+
pid: child.pid,
|
|
2922
|
+
logFile
|
|
2923
|
+
};
|
|
2924
|
+
}
|
|
2891
2925
|
child.unref();
|
|
2892
2926
|
return {
|
|
2893
2927
|
started: true,
|
|
@@ -2921,7 +2955,7 @@ async function indexMessageChunks(input2) {
|
|
|
2921
2955
|
id: chunk.chunkId,
|
|
2922
2956
|
text: chunk.text,
|
|
2923
2957
|
score: 1,
|
|
2924
|
-
source:
|
|
2958
|
+
source: toEvidenceSource3(chunk)
|
|
2925
2959
|
}
|
|
2926
2960
|
});
|
|
2927
2961
|
}
|
|
@@ -2931,7 +2965,7 @@ async function indexMessageChunks(input2) {
|
|
|
2931
2965
|
vectors: records.length
|
|
2932
2966
|
};
|
|
2933
2967
|
}
|
|
2934
|
-
function
|
|
2968
|
+
function toEvidenceSource3(chunk) {
|
|
2935
2969
|
if (chunk.messageType === "file") {
|
|
2936
2970
|
return {
|
|
2937
2971
|
type: "file",
|
|
@@ -2960,25 +2994,23 @@ async function processMessagesNow(input2) {
|
|
|
2960
2994
|
finishedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2961
2995
|
};
|
|
2962
2996
|
}
|
|
2963
|
-
const
|
|
2964
|
-
|
|
2965
|
-
|
|
2966
|
-
|
|
2967
|
-
|
|
2968
|
-
|
|
2969
|
-
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
}
|
|
2980
|
-
vectorStore.close();
|
|
2981
|
-
}
|
|
2997
|
+
const vectorStore = new SqliteVectorStore(input2.database, {
|
|
2998
|
+
model: input2.config.embedding.model
|
|
2999
|
+
});
|
|
3000
|
+
const embedding = input2.embedding ?? createEmbeddingModel(input2.config, input2.secrets);
|
|
3001
|
+
const stats = await indexMessageChunks({
|
|
3002
|
+
messages: new MessageRepository(input2.database),
|
|
3003
|
+
embedding,
|
|
3004
|
+
store: vectorStore,
|
|
3005
|
+
limit: input2.limit
|
|
3006
|
+
});
|
|
3007
|
+
return {
|
|
3008
|
+
status: "completed",
|
|
3009
|
+
chunks: stats.chunks,
|
|
3010
|
+
vectors: stats.vectors,
|
|
3011
|
+
startedAt,
|
|
3012
|
+
finishedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
3013
|
+
};
|
|
2982
3014
|
}
|
|
2983
3015
|
|
|
2984
3016
|
// src/update/npm-updater.ts
|
|
@@ -3480,7 +3512,7 @@ function createWebApp(config) {
|
|
|
3480
3512
|
note: "\u95EE\u7B54\u5FC5\u987B\u5148\u68C0\u7D22\u8BC1\u636E\uFF0C\u7981\u6B62\u5168\u91CF\u4E0A\u4E0B\u6587\u5806\u53E0\u3002",
|
|
3481
3513
|
retrieval: {
|
|
3482
3514
|
keyword: "SQLite FTS5",
|
|
3483
|
-
vector: "
|
|
3515
|
+
vector: "SQLite embedding",
|
|
3484
3516
|
hybrid: true
|
|
3485
3517
|
}
|
|
3486
3518
|
},
|
|
@@ -3688,8 +3720,7 @@ async function startGatewayForegroundCommand() {
|
|
|
3688
3720
|
mode: "gateway"
|
|
3689
3721
|
});
|
|
3690
3722
|
const database = openDatabase(config);
|
|
3691
|
-
const
|
|
3692
|
-
const vectorStore = LanceDbVectorStore2 ? await LanceDbVectorStore2.connectFromConfig(config) : null;
|
|
3723
|
+
const vectorStore = hasEmbeddingConfig(config, secrets) ? new SqliteVectorStore(database, { model: config.embedding.model }) : null;
|
|
3693
3724
|
const gatewayRuntime = createFeishuGateway({
|
|
3694
3725
|
config,
|
|
3695
3726
|
secrets,
|
|
@@ -3711,7 +3742,6 @@ async function startGatewayForegroundCommand() {
|
|
|
3711
3742
|
});
|
|
3712
3743
|
const cleanup = () => {
|
|
3713
3744
|
gatewayRuntime.stop();
|
|
3714
|
-
vectorStore?.close();
|
|
3715
3745
|
database.close();
|
|
3716
3746
|
removeGatewayPidRecord();
|
|
3717
3747
|
};
|
|
@@ -3739,7 +3769,7 @@ async function startGatewayCommand(options = {}) {
|
|
|
3739
3769
|
}
|
|
3740
3770
|
const config = await loadConfig();
|
|
3741
3771
|
const secrets = await loadSecrets();
|
|
3742
|
-
const result = startDetachedGateway({ config, secrets });
|
|
3772
|
+
const result = await startDetachedGateway({ config, secrets });
|
|
3743
3773
|
console.log(result.message);
|
|
3744
3774
|
if (result.pid) {
|
|
3745
3775
|
console.log(`PID\uFF1A${result.pid}`);
|
|
@@ -3789,7 +3819,7 @@ async function deleteDataCommand(targetType, targetId, options) {
|
|
|
3789
3819
|
if (result.skippedStoredFiles.length > 0) {
|
|
3790
3820
|
console.log(`\u8DF3\u8FC7\u975E\u6570\u636E\u76EE\u5F55\u6587\u4EF6\uFF1A${result.skippedStoredFiles.join("\uFF1B")}`);
|
|
3791
3821
|
}
|
|
3792
|
-
console.log("SQLite FTS \u5DF2\u540C\u6B65\u5220\u9664\uFF1B\u5982\u4F7F\u7528
|
|
3822
|
+
console.log("SQLite FTS \u5DF2\u540C\u6B65\u5220\u9664\uFF1B\u5982\u4F7F\u7528 SQLite embedding \u8BED\u4E49\u68C0\u7D22\uFF0C\u8BF7\u8FD0\u884C chattercatcher index rebuild\u3002");
|
|
3793
3823
|
} finally {
|
|
3794
3824
|
database.close();
|
|
3795
3825
|
}
|
|
@@ -3803,55 +3833,63 @@ index.command("status").description("\u67E5\u770B\u7D22\u5F15\u72B6\u6001").acti
|
|
|
3803
3833
|
const config = await loadConfig();
|
|
3804
3834
|
const secrets = await loadSecrets();
|
|
3805
3835
|
const database = openDatabase(config);
|
|
3806
|
-
|
|
3807
|
-
|
|
3808
|
-
|
|
3809
|
-
|
|
3810
|
-
|
|
3811
|
-
|
|
3812
|
-
|
|
3813
|
-
|
|
3814
|
-
|
|
3815
|
-
|
|
3816
|
-
|
|
3817
|
-
|
|
3818
|
-
|
|
3819
|
-
|
|
3820
|
-
|
|
3821
|
-
|
|
3822
|
-
|
|
3823
|
-
|
|
3824
|
-
|
|
3825
|
-
|
|
3826
|
-
|
|
3827
|
-
|
|
3828
|
-
|
|
3836
|
+
try {
|
|
3837
|
+
const messages = new MessageRepository(database);
|
|
3838
|
+
const vectorStore = new SqliteVectorStore(database, { model: config.embedding.model });
|
|
3839
|
+
const vectors = vectorStore.count();
|
|
3840
|
+
console.log(JSON.stringify(
|
|
3841
|
+
{
|
|
3842
|
+
database: getDatabasePath(config),
|
|
3843
|
+
chats: messages.getChatCount(),
|
|
3844
|
+
messages: messages.getMessageCount(),
|
|
3845
|
+
embeddings: {
|
|
3846
|
+
backend: "SQLite embedding \u5411\u91CF\u7D22\u5F15",
|
|
3847
|
+
configured: hasEmbeddingConfig(config, secrets),
|
|
3848
|
+
model: config.embedding.model,
|
|
3849
|
+
vectors,
|
|
3850
|
+
status: hasEmbeddingConfig(config, secrets) ? "SQLite embedding \u5411\u91CF\u7D22\u5F15\u5DF2\u53EF\u7528\u4E8E\u8BED\u4E49\u68C0\u7D22" : "SQLite embedding \u5411\u91CF\u7D22\u5F15\u5DF2\u63A5\u5165\uFF1B\u9700\u914D\u7F6E embedding \u540E\u542F\u7528\u8BED\u4E49\u68C0\u7D22"
|
|
3851
|
+
},
|
|
3852
|
+
retrieval: {
|
|
3853
|
+
keyword: "SQLite FTS5",
|
|
3854
|
+
vector: "SQLite embedding \u5411\u91CF\u7D22\u5F15",
|
|
3855
|
+
hybrid: "\u542F\u7528\uFF1ASQLite FTS + SQLite embedding \u5411\u91CF\u68C0\u7D22",
|
|
3856
|
+
rag: "\u5F3A\u5236\u5148\u68C0\u7D22\u8BC1\u636E\u518D\u56DE\u7B54\uFF0C\u7981\u6B62\u5168\u91CF\u4E0A\u4E0B\u6587\u5806\u53E0"
|
|
3857
|
+
}
|
|
3858
|
+
},
|
|
3859
|
+
null,
|
|
3860
|
+
2
|
|
3861
|
+
));
|
|
3862
|
+
} finally {
|
|
3863
|
+
database.close();
|
|
3864
|
+
}
|
|
3829
3865
|
});
|
|
3830
|
-
index.command("rebuild").description("\u91CD\u5EFA
|
|
3866
|
+
index.command("rebuild").description("\u91CD\u5EFA\u8BED\u4E49\u5411\u91CF\u7D22\u5F15").option("--limit <number>", "\u6700\u591A\u7D22\u5F15\u7684 chunk \u6570", "10000").action(async (options) => {
|
|
3831
3867
|
const config = await loadConfig();
|
|
3832
3868
|
const secrets = await loadSecrets();
|
|
3833
3869
|
if (!hasEmbeddingConfig(config, secrets)) {
|
|
3834
|
-
console.log("Embedding \u914D\u7F6E\u4E0D\u5B8C\u6574\uFF0C\u65E0\u6CD5\u91CD\u5EFA\u5411\u91CF\u7D22\u5F15\u3002\u8BF7\u8FD0\u884C chattercatcher setup \u6216 chattercatcher settings\u3002");
|
|
3870
|
+
console.log("Embedding \u914D\u7F6E\u4E0D\u5B8C\u6574\uFF0C\u65E0\u6CD5\u91CD\u5EFA SQLite embedding \u5411\u91CF\u7D22\u5F15\u3002\u8BF7\u8FD0\u884C chattercatcher setup \u6216 chattercatcher settings\u3002");
|
|
3835
3871
|
return;
|
|
3836
3872
|
}
|
|
3837
3873
|
const database = openDatabase(config);
|
|
3838
|
-
const
|
|
3839
|
-
const vectorStore = await LanceDbVectorStore2.connectFromConfig(config);
|
|
3874
|
+
const limit = Number(options.limit);
|
|
3840
3875
|
try {
|
|
3841
|
-
const
|
|
3842
|
-
|
|
3843
|
-
|
|
3844
|
-
|
|
3845
|
-
limit: Number(
|
|
3876
|
+
const result = await processMessagesNow({
|
|
3877
|
+
config,
|
|
3878
|
+
secrets,
|
|
3879
|
+
database,
|
|
3880
|
+
limit: Number.isFinite(limit) ? limit : 1e4
|
|
3846
3881
|
});
|
|
3847
|
-
|
|
3882
|
+
if (result.status === "skipped") {
|
|
3883
|
+
console.log(`\u5904\u7406\u8DF3\u8FC7\uFF1A${result.reason}`);
|
|
3884
|
+
return;
|
|
3885
|
+
}
|
|
3886
|
+
console.log(`SQLite embedding \u5411\u91CF\u7D22\u5F15\u5B8C\u6210\uFF1Achunks=${result.chunks}, vectors=${result.vectors}`);
|
|
3848
3887
|
} finally {
|
|
3849
|
-
vectorStore.close();
|
|
3850
3888
|
database.close();
|
|
3851
3889
|
}
|
|
3852
3890
|
});
|
|
3853
3891
|
var processCommand = program.command("process").description("\u7ACB\u5373\u5904\u7406\u540E\u53F0\u4EFB\u52A1");
|
|
3854
|
-
processCommand.command("messages").description("\u7ACB\u5373\u5904\u7406\u6D88\u606F\u7D22\u5F15\u4EFB\u52A1\uFF0C\u628A\u6D88\u606F chunks \u5199\u5165
|
|
3892
|
+
processCommand.command("messages").description("\u7ACB\u5373\u5904\u7406\u6D88\u606F\u7D22\u5F15\u4EFB\u52A1\uFF0C\u628A\u6D88\u606F chunks \u5199\u5165 SQLite embedding \u5411\u91CF\u7D22\u5F15").option("--limit <number>", "\u6700\u591A\u5904\u7406\u7684 chunk \u6570", "10000").action(async (options) => {
|
|
3855
3893
|
const config = await loadConfig();
|
|
3856
3894
|
const secrets = await loadSecrets();
|
|
3857
3895
|
const database = openDatabase(config);
|
|
@@ -3885,7 +3923,7 @@ files.command("add").description("\u628A\u672C\u5730\u6587\u4EF6\u89E3\u6790\u30
|
|
|
3885
3923
|
`\u5DF2\u5BFC\u5165\u6587\u4EF6\uFF1A${result.fileName}\uFF0C\u89E3\u6790\u5668=${result.parser}\uFF0C\u5B57\u7B26\u6570=${result.characters}\uFF0C\u6D88\u606FID=${result.messageId}`
|
|
3886
3924
|
);
|
|
3887
3925
|
}
|
|
3888
|
-
console.log("\u6587\u4EF6\u5DF2\u8FDB\u5165 SQLite FTS \u68C0\u7D22\uFF1B\u5982\u5DF2\u914D\u7F6E embedding\uFF0C\u53EF\u8FD0\u884C chattercatcher index rebuild \u66F4\u65B0
|
|
3926
|
+
console.log("\u6587\u4EF6\u5DF2\u8FDB\u5165 SQLite FTS \u68C0\u7D22\uFF1B\u5982\u5DF2\u914D\u7F6E embedding\uFF0C\u53EF\u8FD0\u884C chattercatcher index rebuild \u66F4\u65B0 SQLite embedding \u5411\u91CF\u7D22\u5F15\u3002");
|
|
3889
3927
|
} finally {
|
|
3890
3928
|
database.close();
|
|
3891
3929
|
}
|
|
@@ -4014,7 +4052,7 @@ program.command("restore").description("\u4ECE ChatterCatcher \u5BFC\u51FA\u6587
|
|
|
4014
4052
|
console.log(`\u6062\u590D\u5B8C\u6210\uFF1A${result.inputPath}`);
|
|
4015
4053
|
console.log(`\u6A21\u5F0F\uFF1A${result.mode === "replace" ? "\u66FF\u6362" : "\u5408\u5E76"}`);
|
|
4016
4054
|
console.log(`\u5305\u542B\uFF1A\u7FA4\u804A=${result.chats}\uFF0C\u6D88\u606F=${result.messages}\uFF0Cchunks=${result.chunks}\uFF0C\u6587\u4EF6\u4EFB\u52A1=${result.fileJobs}`);
|
|
4017
|
-
console.log("SQLite FTS \u5DF2\u91CD\u5EFA\uFF1B\u5982\u4F7F\u7528
|
|
4055
|
+
console.log("SQLite FTS \u5DF2\u91CD\u5EFA\uFF1B\u5982\u4F7F\u7528 SQLite embedding \u8BED\u4E49\u68C0\u7D22\uFF0C\u8BF7\u8FD0\u884C chattercatcher index rebuild\u3002");
|
|
4018
4056
|
} finally {
|
|
4019
4057
|
database.close();
|
|
4020
4058
|
}
|
|
@@ -4044,7 +4082,7 @@ dev.command("ingest-feishu-event").description("\u4ECE JSON \u6587\u4EF6\u6A21\u
|
|
|
4044
4082
|
const config = await loadConfig();
|
|
4045
4083
|
const database = openDatabase(config);
|
|
4046
4084
|
try {
|
|
4047
|
-
const raw = await
|
|
4085
|
+
const raw = await fs13.readFile(options.file, "utf8");
|
|
4048
4086
|
const payload = JSON.parse(raw);
|
|
4049
4087
|
const result = new GatewayIngestor(database).ingestFeishuEvent(payload);
|
|
4050
4088
|
if (!result.accepted) {
|
|
@@ -4063,6 +4101,7 @@ dev.command("search").description("\u901A\u8FC7\u672C\u5730 FTS \u68C0\u7D22\u6D
|
|
|
4063
4101
|
const { retriever, close } = await createHybridRetriever({
|
|
4064
4102
|
config,
|
|
4065
4103
|
secrets,
|
|
4104
|
+
database,
|
|
4066
4105
|
messages: new MessageRepository(database)
|
|
4067
4106
|
});
|
|
4068
4107
|
const evidence = await retriever.retrieve(question);
|
|
@@ -4083,6 +4122,7 @@ dev.command("ask").description("\u901A\u8FC7\u672C\u5730\u68C0\u7D22\u8BC1\u636E
|
|
|
4083
4122
|
const { retriever, close } = await createHybridRetriever({
|
|
4084
4123
|
config,
|
|
4085
4124
|
secrets,
|
|
4125
|
+
database,
|
|
4086
4126
|
messages: new MessageRepository(database)
|
|
4087
4127
|
});
|
|
4088
4128
|
try {
|