chattercatcher 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +180 -238
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -12
- package/dist/index.js +169 -245
- package/dist/index.js.map +1 -1
- package/package.json +1 -2
package/dist/index.js
CHANGED
|
@@ -1,150 +1,3 @@
|
|
|
1
|
-
var __defProp = Object.defineProperty;
|
|
2
|
-
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
3
|
-
var __esm = (fn, res) => function __init() {
|
|
4
|
-
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
5
|
-
};
|
|
6
|
-
var __export = (target, all) => {
|
|
7
|
-
for (var name in all)
|
|
8
|
-
__defProp(target, name, { get: all[name], enumerable: true });
|
|
9
|
-
};
|
|
10
|
-
|
|
11
|
-
// src/config/paths.ts
|
|
12
|
-
import os2 from "os";
|
|
13
|
-
import path2 from "path";
|
|
14
|
-
function getChatterCatcherHome() {
|
|
15
|
-
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
16
|
-
}
|
|
17
|
-
function resolveHomePath(value) {
|
|
18
|
-
if (value === "~") {
|
|
19
|
-
return os2.homedir();
|
|
20
|
-
}
|
|
21
|
-
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
22
|
-
return path2.join(os2.homedir(), value.slice(2));
|
|
23
|
-
}
|
|
24
|
-
return path2.resolve(value);
|
|
25
|
-
}
|
|
26
|
-
function getConfigPath() {
|
|
27
|
-
return path2.join(getChatterCatcherHome(), "config.json");
|
|
28
|
-
}
|
|
29
|
-
function getSecretsPath() {
|
|
30
|
-
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
31
|
-
}
|
|
32
|
-
var init_paths = __esm({
|
|
33
|
-
"src/config/paths.ts"() {
|
|
34
|
-
"use strict";
|
|
35
|
-
}
|
|
36
|
-
});
|
|
37
|
-
|
|
38
|
-
// src/rag/lancedb-store.ts
|
|
39
|
-
var lancedb_store_exports = {};
|
|
40
|
-
__export(lancedb_store_exports, {
|
|
41
|
-
LanceDbVectorStore: () => LanceDbVectorStore,
|
|
42
|
-
getLanceDbPath: () => getLanceDbPath
|
|
43
|
-
});
|
|
44
|
-
import fs5 from "fs/promises";
|
|
45
|
-
import path8 from "path";
|
|
46
|
-
function getLanceDbPath(config) {
|
|
47
|
-
return path8.join(resolveHomePath(config.storage.dataDir), "vector", "lancedb");
|
|
48
|
-
}
|
|
49
|
-
function toRow(record) {
|
|
50
|
-
return {
|
|
51
|
-
id: record.id,
|
|
52
|
-
vector: record.vector,
|
|
53
|
-
text: record.evidence.text,
|
|
54
|
-
source_json: JSON.stringify(record.evidence.source)
|
|
55
|
-
};
|
|
56
|
-
}
|
|
57
|
-
function toLanceData(rows) {
|
|
58
|
-
return rows.map((row) => ({
|
|
59
|
-
id: row.id,
|
|
60
|
-
vector: row.vector,
|
|
61
|
-
text: row.text,
|
|
62
|
-
source_json: row.source_json
|
|
63
|
-
}));
|
|
64
|
-
}
|
|
65
|
-
function escapeSqlString(value) {
|
|
66
|
-
return value.replace(/'/g, "''");
|
|
67
|
-
}
|
|
68
|
-
function toEvidence(row) {
|
|
69
|
-
const distance = row._distance ?? 0;
|
|
70
|
-
const vectorScore = 1 / (1 + Math.max(0, distance));
|
|
71
|
-
return {
|
|
72
|
-
id: row.id,
|
|
73
|
-
text: row.text,
|
|
74
|
-
score: vectorScore,
|
|
75
|
-
vectorScore,
|
|
76
|
-
source: JSON.parse(row.source_json)
|
|
77
|
-
};
|
|
78
|
-
}
|
|
79
|
-
var DEFAULT_TABLE_NAME, LanceDbVectorStore;
|
|
80
|
-
var init_lancedb_store = __esm({
|
|
81
|
-
"src/rag/lancedb-store.ts"() {
|
|
82
|
-
"use strict";
|
|
83
|
-
init_paths();
|
|
84
|
-
DEFAULT_TABLE_NAME = "message_chunks";
|
|
85
|
-
LanceDbVectorStore = class _LanceDbVectorStore {
|
|
86
|
-
constructor(connection, tableName) {
|
|
87
|
-
this.connection = connection;
|
|
88
|
-
this.tableName = tableName;
|
|
89
|
-
}
|
|
90
|
-
connection;
|
|
91
|
-
tableName;
|
|
92
|
-
static async connect(uri, tableName = DEFAULT_TABLE_NAME) {
|
|
93
|
-
await fs5.mkdir(uri, { recursive: true });
|
|
94
|
-
const lancedb = await import("@lancedb/lancedb");
|
|
95
|
-
const connection = await lancedb.connect(uri);
|
|
96
|
-
return new _LanceDbVectorStore(connection, tableName);
|
|
97
|
-
}
|
|
98
|
-
static async connectFromConfig(config, tableName = DEFAULT_TABLE_NAME) {
|
|
99
|
-
return _LanceDbVectorStore.connect(getLanceDbPath(config), tableName);
|
|
100
|
-
}
|
|
101
|
-
close() {
|
|
102
|
-
this.connection.close();
|
|
103
|
-
}
|
|
104
|
-
async upsert(records) {
|
|
105
|
-
if (records.length === 0) {
|
|
106
|
-
return;
|
|
107
|
-
}
|
|
108
|
-
const rows = records.map(toRow);
|
|
109
|
-
const data = toLanceData(rows);
|
|
110
|
-
const table = await this.ensureTable(data);
|
|
111
|
-
const ids = rows.map((row) => `'${escapeSqlString(row.id)}'`).join(", ");
|
|
112
|
-
await table.delete(`id IN (${ids})`);
|
|
113
|
-
await table.add(data);
|
|
114
|
-
}
|
|
115
|
-
async search(vector, limit) {
|
|
116
|
-
const table = await this.openTableIfExists();
|
|
117
|
-
if (!table) {
|
|
118
|
-
return [];
|
|
119
|
-
}
|
|
120
|
-
const rows = await table.vectorSearch(vector).limit(limit).toArray();
|
|
121
|
-
return rows.map(toEvidence);
|
|
122
|
-
}
|
|
123
|
-
async count() {
|
|
124
|
-
const table = await this.openTableIfExists();
|
|
125
|
-
if (!table) {
|
|
126
|
-
return 0;
|
|
127
|
-
}
|
|
128
|
-
return table.countRows();
|
|
129
|
-
}
|
|
130
|
-
async ensureTable(initialRows) {
|
|
131
|
-
const table = await this.openTableIfExists();
|
|
132
|
-
if (table) {
|
|
133
|
-
return table;
|
|
134
|
-
}
|
|
135
|
-
return this.connection.createTable(this.tableName, initialRows);
|
|
136
|
-
}
|
|
137
|
-
async openTableIfExists() {
|
|
138
|
-
const tableNames = await this.connection.tableNames();
|
|
139
|
-
if (!tableNames.includes(this.tableName)) {
|
|
140
|
-
return null;
|
|
141
|
-
}
|
|
142
|
-
return this.connection.openTable(this.tableName);
|
|
143
|
-
}
|
|
144
|
-
};
|
|
145
|
-
}
|
|
146
|
-
});
|
|
147
|
-
|
|
148
1
|
// src/config/schema.ts
|
|
149
2
|
import os from "os";
|
|
150
3
|
import path from "path";
|
|
@@ -211,7 +64,30 @@ function createDefaultSecrets() {
|
|
|
211
64
|
// src/config/store.ts
|
|
212
65
|
import fs from "fs/promises";
|
|
213
66
|
import path3 from "path";
|
|
214
|
-
|
|
67
|
+
|
|
68
|
+
// src/config/paths.ts
|
|
69
|
+
import os2 from "os";
|
|
70
|
+
import path2 from "path";
|
|
71
|
+
function getChatterCatcherHome() {
|
|
72
|
+
return process.env.CHATTERCATCHER_HOME || path2.join(os2.homedir(), ".chattercatcher");
|
|
73
|
+
}
|
|
74
|
+
function resolveHomePath(value) {
|
|
75
|
+
if (value === "~") {
|
|
76
|
+
return os2.homedir();
|
|
77
|
+
}
|
|
78
|
+
if (value.startsWith("~/") || value.startsWith("~\\")) {
|
|
79
|
+
return path2.join(os2.homedir(), value.slice(2));
|
|
80
|
+
}
|
|
81
|
+
return path2.resolve(value);
|
|
82
|
+
}
|
|
83
|
+
function getConfigPath() {
|
|
84
|
+
return path2.join(getChatterCatcherHome(), "config.json");
|
|
85
|
+
}
|
|
86
|
+
function getSecretsPath() {
|
|
87
|
+
return path2.join(getChatterCatcherHome(), "secrets.json");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// src/config/store.ts
|
|
215
91
|
async function readJsonFile(filePath, fallback) {
|
|
216
92
|
try {
|
|
217
93
|
const raw = await fs.readFile(filePath, "utf8");
|
|
@@ -275,7 +151,6 @@ function resolveEmbeddingApiKey(input) {
|
|
|
275
151
|
}
|
|
276
152
|
|
|
277
153
|
// src/data/deletion.ts
|
|
278
|
-
init_paths();
|
|
279
154
|
import fs2 from "fs/promises";
|
|
280
155
|
import path4 from "path";
|
|
281
156
|
function emptyResult(targetType, targetId) {
|
|
@@ -401,7 +276,6 @@ async function deleteLocalData(input) {
|
|
|
401
276
|
}
|
|
402
277
|
|
|
403
278
|
// src/db/database.ts
|
|
404
|
-
init_paths();
|
|
405
279
|
import Database from "better-sqlite3";
|
|
406
280
|
import fs3 from "fs";
|
|
407
281
|
import path5 from "path";
|
|
@@ -462,6 +336,13 @@ function migrateDatabase(database) {
|
|
|
462
336
|
tokenize = 'unicode61'
|
|
463
337
|
);
|
|
464
338
|
|
|
339
|
+
CREATE TABLE IF NOT EXISTS message_chunk_vectors (
|
|
340
|
+
chunk_id TEXT PRIMARY KEY REFERENCES message_chunks(id) ON DELETE CASCADE,
|
|
341
|
+
vector_json TEXT NOT NULL,
|
|
342
|
+
evidence_json TEXT NOT NULL,
|
|
343
|
+
updated_at TEXT NOT NULL
|
|
344
|
+
);
|
|
345
|
+
|
|
465
346
|
CREATE TABLE IF NOT EXISTS file_jobs (
|
|
466
347
|
id TEXT PRIMARY KEY,
|
|
467
348
|
source_path TEXT NOT NULL,
|
|
@@ -481,8 +362,7 @@ function migrateDatabase(database) {
|
|
|
481
362
|
}
|
|
482
363
|
|
|
483
364
|
// src/doctor/checks.ts
|
|
484
|
-
|
|
485
|
-
import fs6 from "fs/promises";
|
|
365
|
+
import fs5 from "fs/promises";
|
|
486
366
|
|
|
487
367
|
// src/files/jobs.ts
|
|
488
368
|
import crypto from "crypto";
|
|
@@ -624,7 +504,6 @@ var FileJobRepository = class {
|
|
|
624
504
|
};
|
|
625
505
|
|
|
626
506
|
// src/gateway/runtime.ts
|
|
627
|
-
init_paths();
|
|
628
507
|
import fs4 from "fs";
|
|
629
508
|
import path7 from "path";
|
|
630
509
|
function getGatewayPidPath() {
|
|
@@ -1254,6 +1133,82 @@ var MessageFtsRetriever = class {
|
|
|
1254
1133
|
}
|
|
1255
1134
|
};
|
|
1256
1135
|
|
|
1136
|
+
// src/rag/embedding.ts
|
|
1137
|
+
function cosineSimilarity(left, right) {
|
|
1138
|
+
if (left.length === 0 || right.length === 0 || left.length !== right.length) {
|
|
1139
|
+
return 0;
|
|
1140
|
+
}
|
|
1141
|
+
let dot = 0;
|
|
1142
|
+
let leftNorm = 0;
|
|
1143
|
+
let rightNorm = 0;
|
|
1144
|
+
for (let index = 0; index < left.length; index += 1) {
|
|
1145
|
+
const leftValue = left[index] ?? 0;
|
|
1146
|
+
const rightValue = right[index] ?? 0;
|
|
1147
|
+
dot += leftValue * rightValue;
|
|
1148
|
+
leftNorm += leftValue * leftValue;
|
|
1149
|
+
rightNorm += rightValue * rightValue;
|
|
1150
|
+
}
|
|
1151
|
+
if (leftNorm === 0 || rightNorm === 0) {
|
|
1152
|
+
return 0;
|
|
1153
|
+
}
|
|
1154
|
+
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
1155
|
+
}
|
|
1156
|
+
|
|
1157
|
+
// src/rag/sqlite-vector-store.ts
|
|
1158
|
+
var SQLiteVectorStore = class {
|
|
1159
|
+
constructor(database) {
|
|
1160
|
+
this.database = database;
|
|
1161
|
+
}
|
|
1162
|
+
database;
|
|
1163
|
+
async upsert(records) {
|
|
1164
|
+
if (records.length === 0) {
|
|
1165
|
+
return;
|
|
1166
|
+
}
|
|
1167
|
+
const statement = this.database.prepare(`
|
|
1168
|
+
INSERT INTO message_chunk_vectors (chunk_id, vector_json, evidence_json, updated_at)
|
|
1169
|
+
VALUES (@chunkId, @vectorJson, @evidenceJson, @updatedAt)
|
|
1170
|
+
ON CONFLICT(chunk_id) DO UPDATE SET
|
|
1171
|
+
vector_json = excluded.vector_json,
|
|
1172
|
+
evidence_json = excluded.evidence_json,
|
|
1173
|
+
updated_at = excluded.updated_at
|
|
1174
|
+
`);
|
|
1175
|
+
const upsertMany = this.database.transaction((items) => {
|
|
1176
|
+
const updatedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1177
|
+
for (const item of items) {
|
|
1178
|
+
statement.run({
|
|
1179
|
+
chunkId: item.id,
|
|
1180
|
+
vectorJson: JSON.stringify(item.vector),
|
|
1181
|
+
evidenceJson: JSON.stringify(item.evidence),
|
|
1182
|
+
updatedAt
|
|
1183
|
+
});
|
|
1184
|
+
}
|
|
1185
|
+
});
|
|
1186
|
+
upsertMany(records);
|
|
1187
|
+
}
|
|
1188
|
+
async search(vector, limit) {
|
|
1189
|
+
if (limit <= 0) {
|
|
1190
|
+
return [];
|
|
1191
|
+
}
|
|
1192
|
+
const rows = this.database.prepare("SELECT chunk_id, vector_json, evidence_json FROM message_chunk_vectors").all();
|
|
1193
|
+
return rows.map((row) => {
|
|
1194
|
+
const storedVector = JSON.parse(row.vector_json);
|
|
1195
|
+
const evidence = JSON.parse(row.evidence_json);
|
|
1196
|
+
const vectorScore = cosineSimilarity(vector, storedVector);
|
|
1197
|
+
return {
|
|
1198
|
+
...evidence,
|
|
1199
|
+
score: vectorScore,
|
|
1200
|
+
vectorScore
|
|
1201
|
+
};
|
|
1202
|
+
}).sort((left, right) => right.vectorScore - left.vectorScore).slice(0, limit);
|
|
1203
|
+
}
|
|
1204
|
+
async count() {
|
|
1205
|
+
const row = this.database.prepare("SELECT COUNT(*) AS count FROM message_chunk_vectors").get();
|
|
1206
|
+
return row.count;
|
|
1207
|
+
}
|
|
1208
|
+
close() {
|
|
1209
|
+
}
|
|
1210
|
+
};
|
|
1211
|
+
|
|
1257
1212
|
// src/rag/vector-retriever.ts
|
|
1258
1213
|
var VectorRetriever = class {
|
|
1259
1214
|
constructor(embedding, store, limit = 8) {
|
|
@@ -1278,8 +1233,7 @@ async function createHybridRetriever(input) {
|
|
|
1278
1233
|
const retrievers = [new MessageFtsRetriever(input.messages, { excludeMessageIds: input.excludeMessageIds })];
|
|
1279
1234
|
const closers = [];
|
|
1280
1235
|
if (hasEmbeddingConfig(input.config, input.secrets)) {
|
|
1281
|
-
const
|
|
1282
|
-
const vectorStore = await LanceDbVectorStore2.connectFromConfig(input.config);
|
|
1236
|
+
const vectorStore = new SQLiteVectorStore(input.messages.database);
|
|
1283
1237
|
retrievers.push(new VectorRetriever(createEmbeddingModel(input.config, input.secrets), vectorStore));
|
|
1284
1238
|
closers.push(() => vectorStore.close());
|
|
1285
1239
|
}
|
|
@@ -1311,7 +1265,7 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1311
1265
|
checks.push(checkEmbeddingConfig(config, secrets));
|
|
1312
1266
|
checks.push(await checkSqlite(config));
|
|
1313
1267
|
checks.push(await checkFilePipeline(config));
|
|
1314
|
-
checks.push(await
|
|
1268
|
+
checks.push(await checkSqliteVector(config));
|
|
1315
1269
|
checks.push(checkRagPolicy());
|
|
1316
1270
|
if (options.online) {
|
|
1317
1271
|
checks.push(await checkChatModel(config, secrets));
|
|
@@ -1322,8 +1276,8 @@ async function runDoctor(config, secrets, options = {}) {
|
|
|
1322
1276
|
async function checkHomeDirectory() {
|
|
1323
1277
|
const home = getChatterCatcherHome();
|
|
1324
1278
|
try {
|
|
1325
|
-
await
|
|
1326
|
-
await
|
|
1279
|
+
await fs5.mkdir(home, { recursive: true });
|
|
1280
|
+
await fs5.access(home);
|
|
1327
1281
|
return pass("\u914D\u7F6E\u76EE\u5F55", home);
|
|
1328
1282
|
} catch (error) {
|
|
1329
1283
|
return fail("\u914D\u7F6E\u76EE\u5F55", error instanceof Error ? error.message : String(error));
|
|
@@ -1344,7 +1298,7 @@ function checkLlmConfig(config, secrets) {
|
|
|
1344
1298
|
}
|
|
1345
1299
|
function checkEmbeddingConfig(config, secrets) {
|
|
1346
1300
|
if (!hasEmbeddingConfig(config, secrets)) {
|
|
1347
|
-
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u4F7F\u7528
|
|
1301
|
+
return warn("Embedding \u914D\u7F6E", "\u672A\u914D\u7F6E\u5B8C\u6574\uFF1BRAG \u4F1A\u4F7F\u7528 SQLite FTS\uFF0C\u65E0\u6CD5\u4F7F\u7528 SQLite Vector \u8BED\u4E49\u68C0\u7D22\u3002");
|
|
1348
1302
|
}
|
|
1349
1303
|
return pass("Embedding \u914D\u7F6E", `${config.embedding.model} @ ${config.embedding.baseUrl || config.llm.baseUrl}`);
|
|
1350
1304
|
}
|
|
@@ -1378,17 +1332,17 @@ async function checkFilePipeline(config) {
|
|
|
1378
1332
|
database?.close();
|
|
1379
1333
|
}
|
|
1380
1334
|
}
|
|
1381
|
-
async function
|
|
1382
|
-
let
|
|
1335
|
+
async function checkSqliteVector(config) {
|
|
1336
|
+
let database = null;
|
|
1383
1337
|
try {
|
|
1384
|
-
|
|
1385
|
-
store =
|
|
1338
|
+
database = openDatabase(config);
|
|
1339
|
+
const store = new SQLiteVectorStore(database);
|
|
1386
1340
|
const count = await store.count();
|
|
1387
|
-
return pass("
|
|
1341
|
+
return pass("SQLite Vector", `${getDatabasePath(config)}\uFF1Bvectors=${count}`);
|
|
1388
1342
|
} catch (error) {
|
|
1389
|
-
return fail("
|
|
1343
|
+
return fail("SQLite Vector", error instanceof Error ? error.message : String(error));
|
|
1390
1344
|
} finally {
|
|
1391
|
-
|
|
1345
|
+
database?.close();
|
|
1392
1346
|
}
|
|
1393
1347
|
}
|
|
1394
1348
|
function checkRagPolicy() {
|
|
@@ -1429,9 +1383,8 @@ function formatDoctorChecks(checks) {
|
|
|
1429
1383
|
}
|
|
1430
1384
|
|
|
1431
1385
|
// src/export/data-export.ts
|
|
1432
|
-
|
|
1433
|
-
import
|
|
1434
|
-
import path9 from "path";
|
|
1386
|
+
import fs6 from "fs/promises";
|
|
1387
|
+
import path8 from "path";
|
|
1435
1388
|
function parseJsonObject(value) {
|
|
1436
1389
|
try {
|
|
1437
1390
|
const parsed = JSON.parse(value);
|
|
@@ -1450,11 +1403,11 @@ function parseJsonArray(value) {
|
|
|
1450
1403
|
}
|
|
1451
1404
|
function defaultExportPath(config, exportedAt) {
|
|
1452
1405
|
const fileName = `chattercatcher-export-${exportedAt.replace(/[:.]/g, "-")}.json`;
|
|
1453
|
-
return
|
|
1406
|
+
return path8.join(resolveHomePath(config.storage.dataDir), "exports", fileName);
|
|
1454
1407
|
}
|
|
1455
1408
|
async function exportLocalData(input) {
|
|
1456
1409
|
const exportedAt = input.exportedAt ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
1457
|
-
const outputPath =
|
|
1410
|
+
const outputPath = path8.resolve(input.outputPath ?? defaultExportPath(input.config, exportedAt));
|
|
1458
1411
|
const chats = input.database.prepare(
|
|
1459
1412
|
`
|
|
1460
1413
|
SELECT
|
|
@@ -1541,8 +1494,8 @@ async function exportLocalData(input) {
|
|
|
1541
1494
|
fileJobs
|
|
1542
1495
|
}
|
|
1543
1496
|
};
|
|
1544
|
-
await
|
|
1545
|
-
await
|
|
1497
|
+
await fs6.mkdir(path8.dirname(outputPath), { recursive: true });
|
|
1498
|
+
await fs6.writeFile(outputPath, `${JSON.stringify(payload, null, 2)}
|
|
1546
1499
|
`, "utf8");
|
|
1547
1500
|
return {
|
|
1548
1501
|
outputPath,
|
|
@@ -1554,8 +1507,8 @@ async function exportLocalData(input) {
|
|
|
1554
1507
|
}
|
|
1555
1508
|
|
|
1556
1509
|
// src/export/data-restore.ts
|
|
1557
|
-
import
|
|
1558
|
-
import
|
|
1510
|
+
import fs7 from "fs/promises";
|
|
1511
|
+
import path9 from "path";
|
|
1559
1512
|
function asObject(value) {
|
|
1560
1513
|
return value && typeof value === "object" && !Array.isArray(value) ? value : {};
|
|
1561
1514
|
}
|
|
@@ -1603,8 +1556,8 @@ function clearDatabase(database) {
|
|
|
1603
1556
|
database.prepare("DELETE FROM chats").run();
|
|
1604
1557
|
}
|
|
1605
1558
|
async function restoreLocalData(input) {
|
|
1606
|
-
const inputPath =
|
|
1607
|
-
const payload = parsePayload(await
|
|
1559
|
+
const inputPath = path9.resolve(input.inputPath);
|
|
1560
|
+
const payload = parsePayload(await fs7.readFile(inputPath, "utf8"));
|
|
1608
1561
|
const mode = input.replace ? "replace" : "merge";
|
|
1609
1562
|
const restore = input.database.transaction(() => {
|
|
1610
1563
|
if (input.replace) {
|
|
@@ -2346,10 +2299,9 @@ function normalizeFeishuReceiveMessageEvent(payload) {
|
|
|
2346
2299
|
}
|
|
2347
2300
|
|
|
2348
2301
|
// src/feishu/resource-downloader.ts
|
|
2349
|
-
init_paths();
|
|
2350
2302
|
import * as lark3 from "@larksuiteoapi/node-sdk";
|
|
2351
|
-
import
|
|
2352
|
-
import
|
|
2303
|
+
import fs8 from "fs/promises";
|
|
2304
|
+
import path10 from "path";
|
|
2353
2305
|
var RESOURCE_TYPE_BY_KIND = {
|
|
2354
2306
|
file: "file",
|
|
2355
2307
|
image: "image",
|
|
@@ -2387,10 +2339,10 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2387
2339
|
}
|
|
2388
2340
|
async download(input) {
|
|
2389
2341
|
const resourceType = RESOURCE_TYPE_BY_KIND[input.attachment.kind];
|
|
2390
|
-
const targetDir =
|
|
2391
|
-
await
|
|
2342
|
+
const targetDir = path10.join(this.dataDir, "files", "feishu");
|
|
2343
|
+
await fs8.mkdir(targetDir, { recursive: true });
|
|
2392
2344
|
const fileName = buildStoredFileName(input);
|
|
2393
|
-
const storedPath =
|
|
2345
|
+
const storedPath = path10.join(targetDir, fileName);
|
|
2394
2346
|
const payload = {
|
|
2395
2347
|
params: { type: resourceType },
|
|
2396
2348
|
path: { message_id: input.messageId, file_key: input.attachment.fileKey }
|
|
@@ -2412,31 +2364,30 @@ var FeishuResourceDownloader = class _FeishuResourceDownloader {
|
|
|
2412
2364
|
};
|
|
2413
2365
|
|
|
2414
2366
|
// src/files/ingest.ts
|
|
2415
|
-
init_paths();
|
|
2416
2367
|
import crypto3 from "crypto";
|
|
2417
|
-
import fs11 from "fs/promises";
|
|
2418
|
-
import path13 from "path";
|
|
2419
|
-
|
|
2420
|
-
// src/files/parser.ts
|
|
2421
2368
|
import fs10 from "fs/promises";
|
|
2422
2369
|
import path12 from "path";
|
|
2370
|
+
|
|
2371
|
+
// src/files/parser.ts
|
|
2372
|
+
import fs9 from "fs/promises";
|
|
2373
|
+
import path11 from "path";
|
|
2423
2374
|
import mammoth from "mammoth";
|
|
2424
2375
|
import { PDFParse } from "pdf-parse";
|
|
2425
2376
|
var TEXT_EXTENSIONS = /* @__PURE__ */ new Set([".txt", ".md", ".markdown", ".json", ".csv", ".tsv", ".log"]);
|
|
2426
2377
|
var DOCX_EXTENSIONS = /* @__PURE__ */ new Set([".docx"]);
|
|
2427
2378
|
var PDF_EXTENSIONS = /* @__PURE__ */ new Set([".pdf"]);
|
|
2428
2379
|
function isSupportedParseFile(filePath) {
|
|
2429
|
-
const extension =
|
|
2380
|
+
const extension = path11.extname(filePath).toLowerCase();
|
|
2430
2381
|
return TEXT_EXTENSIONS.has(extension) || DOCX_EXTENSIONS.has(extension) || PDF_EXTENSIONS.has(extension);
|
|
2431
2382
|
}
|
|
2432
2383
|
function describeSupportedParseTypes() {
|
|
2433
2384
|
return "txt\u3001md\u3001json\u3001csv\u3001tsv\u3001log\u3001docx\u3001pdf";
|
|
2434
2385
|
}
|
|
2435
2386
|
async function parseFileToText(filePath) {
|
|
2436
|
-
const extension =
|
|
2387
|
+
const extension = path11.extname(filePath).toLowerCase();
|
|
2437
2388
|
if (TEXT_EXTENSIONS.has(extension)) {
|
|
2438
2389
|
return {
|
|
2439
|
-
text: await
|
|
2390
|
+
text: await fs9.readFile(filePath, "utf8"),
|
|
2440
2391
|
parser: "text",
|
|
2441
2392
|
warnings: []
|
|
2442
2393
|
};
|
|
@@ -2450,7 +2401,7 @@ async function parseFileToText(filePath) {
|
|
|
2450
2401
|
};
|
|
2451
2402
|
}
|
|
2452
2403
|
if (PDF_EXTENSIONS.has(extension)) {
|
|
2453
|
-
const buffer = await
|
|
2404
|
+
const buffer = await fs9.readFile(filePath);
|
|
2454
2405
|
const parser = new PDFParse({ data: buffer });
|
|
2455
2406
|
try {
|
|
2456
2407
|
const result = await parser.getText();
|
|
@@ -2472,7 +2423,7 @@ function isSupportedTextFile(filePath) {
|
|
|
2472
2423
|
}
|
|
2473
2424
|
function ensureSupportedTextFile(filePath) {
|
|
2474
2425
|
if (!isSupportedTextFile(filePath)) {
|
|
2475
|
-
const extension =
|
|
2426
|
+
const extension = path12.extname(filePath).toLowerCase();
|
|
2476
2427
|
throw new Error(`\u6682\u4E0D\u652F\u6301\u8BE5\u6587\u4EF6\u7C7B\u578B\uFF1A${extension || "\u65E0\u6269\u5C55\u540D"}\u3002\u5F53\u524D\u652F\u6301 ${describeSupportedParseTypes()}\u3002`);
|
|
2477
2428
|
}
|
|
2478
2429
|
}
|
|
@@ -2481,12 +2432,12 @@ function stableStoredName(sourcePath, fileName) {
|
|
|
2481
2432
|
return `${digest}-${fileName}`;
|
|
2482
2433
|
}
|
|
2483
2434
|
async function ingestLocalFile(input) {
|
|
2484
|
-
const sourcePath =
|
|
2485
|
-
const fileName =
|
|
2435
|
+
const sourcePath = path12.resolve(input.filePath);
|
|
2436
|
+
const fileName = path12.basename(sourcePath);
|
|
2486
2437
|
const jobId = input.jobs?.start({ sourcePath, fileName });
|
|
2487
2438
|
try {
|
|
2488
2439
|
ensureSupportedTextFile(sourcePath);
|
|
2489
|
-
const stat = await
|
|
2440
|
+
const stat = await fs10.stat(sourcePath);
|
|
2490
2441
|
if (!stat.isFile()) {
|
|
2491
2442
|
throw new Error(`\u4E0D\u662F\u6587\u4EF6\uFF1A${sourcePath}`);
|
|
2492
2443
|
}
|
|
@@ -2495,10 +2446,10 @@ async function ingestLocalFile(input) {
|
|
|
2495
2446
|
if (!text) {
|
|
2496
2447
|
throw new Error(`\u6587\u4EF6\u6CA1\u6709\u53EF\u7D22\u5F15\u6587\u672C\uFF1A${sourcePath}`);
|
|
2497
2448
|
}
|
|
2498
|
-
const fileDir =
|
|
2499
|
-
await
|
|
2500
|
-
const storedPath =
|
|
2501
|
-
await
|
|
2449
|
+
const fileDir = path12.join(resolveHomePath(input.config.storage.dataDir), "files");
|
|
2450
|
+
await fs10.mkdir(fileDir, { recursive: true });
|
|
2451
|
+
const storedPath = path12.join(fileDir, stableStoredName(sourcePath, fileName));
|
|
2452
|
+
await fs10.copyFile(sourcePath, storedPath);
|
|
2502
2453
|
const messageId = input.messages.ingest({
|
|
2503
2454
|
platform: "local-file",
|
|
2504
2455
|
platformChatId: "local-files",
|
|
@@ -2630,15 +2581,14 @@ var GatewayIngestor = class {
|
|
|
2630
2581
|
};
|
|
2631
2582
|
|
|
2632
2583
|
// src/logs/reader.ts
|
|
2633
|
-
|
|
2634
|
-
import fs12 from "fs/promises";
|
|
2584
|
+
import fs11 from "fs/promises";
|
|
2635
2585
|
import { watch } from "fs";
|
|
2636
|
-
import
|
|
2586
|
+
import path13 from "path";
|
|
2637
2587
|
function getLogsDirectory() {
|
|
2638
|
-
return
|
|
2588
|
+
return path13.join(getChatterCatcherHome(), "logs");
|
|
2639
2589
|
}
|
|
2640
2590
|
function resolveLogPath(fileName, logsDir = getLogsDirectory()) {
|
|
2641
|
-
return
|
|
2591
|
+
return path13.isAbsolute(fileName) ? fileName : path13.join(logsDir, fileName);
|
|
2642
2592
|
}
|
|
2643
2593
|
function normalizeLineCount(value, fallback = 200) {
|
|
2644
2594
|
const parsed = Number(value ?? fallback);
|
|
@@ -2647,7 +2597,7 @@ function normalizeLineCount(value, fallback = 200) {
|
|
|
2647
2597
|
async function listLogFiles(logsDir = getLogsDirectory()) {
|
|
2648
2598
|
let entries;
|
|
2649
2599
|
try {
|
|
2650
|
-
entries = await
|
|
2600
|
+
entries = await fs11.readdir(logsDir, { withFileTypes: true });
|
|
2651
2601
|
} catch (error) {
|
|
2652
2602
|
if (error.code === "ENOENT") {
|
|
2653
2603
|
return [];
|
|
@@ -2656,8 +2606,8 @@ async function listLogFiles(logsDir = getLogsDirectory()) {
|
|
|
2656
2606
|
}
|
|
2657
2607
|
const files = await Promise.all(
|
|
2658
2608
|
entries.filter((entry) => entry.isFile() && entry.name.endsWith(".log")).map(async (entry) => {
|
|
2659
|
-
const filePath =
|
|
2660
|
-
const stats = await
|
|
2609
|
+
const filePath = path13.join(logsDir, entry.name);
|
|
2610
|
+
const stats = await fs11.stat(filePath);
|
|
2661
2611
|
return {
|
|
2662
2612
|
name: entry.name,
|
|
2663
2613
|
path: filePath,
|
|
@@ -2674,11 +2624,11 @@ function tailLines(content, lines) {
|
|
|
2674
2624
|
return parts.slice(-lines).join("\n");
|
|
2675
2625
|
}
|
|
2676
2626
|
async function readLogTail(input) {
|
|
2677
|
-
const stats = await
|
|
2678
|
-
const content = await
|
|
2627
|
+
const stats = await fs11.stat(input.filePath);
|
|
2628
|
+
const content = await fs11.readFile(input.filePath, "utf8");
|
|
2679
2629
|
return {
|
|
2680
2630
|
file: {
|
|
2681
|
-
name:
|
|
2631
|
+
name: path13.basename(input.filePath),
|
|
2682
2632
|
path: input.filePath,
|
|
2683
2633
|
updatedAt: stats.mtime,
|
|
2684
2634
|
bytes: stats.size
|
|
@@ -2700,18 +2650,18 @@ async function readLatestLogTail(input = {}) {
|
|
|
2700
2650
|
return readLogTail({ filePath: latest.path, lines: input.lines });
|
|
2701
2651
|
}
|
|
2702
2652
|
async function followLogFile(input) {
|
|
2703
|
-
let offset = (await
|
|
2704
|
-
const directory =
|
|
2705
|
-
const fileName =
|
|
2653
|
+
let offset = (await fs11.stat(input.filePath)).size;
|
|
2654
|
+
const directory = path13.dirname(input.filePath);
|
|
2655
|
+
const fileName = path13.basename(input.filePath);
|
|
2706
2656
|
async function readAppended() {
|
|
2707
|
-
const stats = await
|
|
2657
|
+
const stats = await fs11.stat(input.filePath);
|
|
2708
2658
|
if (stats.size < offset) {
|
|
2709
2659
|
offset = 0;
|
|
2710
2660
|
}
|
|
2711
2661
|
if (stats.size === offset) {
|
|
2712
2662
|
return;
|
|
2713
2663
|
}
|
|
2714
|
-
const handle = await
|
|
2664
|
+
const handle = await fs11.open(input.filePath, "r");
|
|
2715
2665
|
try {
|
|
2716
2666
|
const length = stats.size - offset;
|
|
2717
2667
|
const buffer = Buffer.alloc(length);
|
|
@@ -2733,27 +2683,6 @@ async function followLogFile(input) {
|
|
|
2733
2683
|
return () => watcher.close();
|
|
2734
2684
|
}
|
|
2735
2685
|
|
|
2736
|
-
// src/rag/embedding.ts
|
|
2737
|
-
function cosineSimilarity(left, right) {
|
|
2738
|
-
if (left.length === 0 || right.length === 0 || left.length !== right.length) {
|
|
2739
|
-
return 0;
|
|
2740
|
-
}
|
|
2741
|
-
let dot = 0;
|
|
2742
|
-
let leftNorm = 0;
|
|
2743
|
-
let rightNorm = 0;
|
|
2744
|
-
for (let index = 0; index < left.length; index += 1) {
|
|
2745
|
-
const leftValue = left[index] ?? 0;
|
|
2746
|
-
const rightValue = right[index] ?? 0;
|
|
2747
|
-
dot += leftValue * rightValue;
|
|
2748
|
-
leftNorm += leftValue * leftValue;
|
|
2749
|
-
rightNorm += rightValue * rightValue;
|
|
2750
|
-
}
|
|
2751
|
-
if (leftNorm === 0 || rightNorm === 0) {
|
|
2752
|
-
return 0;
|
|
2753
|
-
}
|
|
2754
|
-
return dot / (Math.sqrt(leftNorm) * Math.sqrt(rightNorm));
|
|
2755
|
-
}
|
|
2756
|
-
|
|
2757
2686
|
// src/rag/indexer.ts
|
|
2758
2687
|
async function indexMessageChunks(input) {
|
|
2759
2688
|
const chunks = input.messageIds ? input.messages.listMessageChunksByMessageIds(input.messageIds, input.limit ?? 1e4) : input.messages.listAllMessageChunks(input.limit ?? 1e4);
|
|
@@ -2800,9 +2729,6 @@ function toEvidenceSource2(chunk) {
|
|
|
2800
2729
|
};
|
|
2801
2730
|
}
|
|
2802
2731
|
|
|
2803
|
-
// src/index.ts
|
|
2804
|
-
init_lancedb_store();
|
|
2805
|
-
|
|
2806
2732
|
// src/rag/manual-index.ts
|
|
2807
2733
|
async function processMessagesNow(input) {
|
|
2808
2734
|
const startedAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -2816,8 +2742,7 @@ async function processMessagesNow(input) {
|
|
|
2816
2742
|
finishedAt: (/* @__PURE__ */ new Date()).toISOString()
|
|
2817
2743
|
};
|
|
2818
2744
|
}
|
|
2819
|
-
const
|
|
2820
|
-
const vectorStore = await LanceDbVectorStore2.connectFromConfig(input.config);
|
|
2745
|
+
const vectorStore = new SQLiteVectorStore(input.database);
|
|
2821
2746
|
try {
|
|
2822
2747
|
const stats = await indexMessageChunks({
|
|
2823
2748
|
messages: new MessageRepository(input.database),
|
|
@@ -3254,7 +3179,7 @@ function createWebApp(config) {
|
|
|
3254
3179
|
note: "\u95EE\u7B54\u5FC5\u987B\u5148\u68C0\u7D22\u8BC1\u636E\uFF0C\u7981\u6B62\u5168\u91CF\u4E0A\u4E0B\u6587\u5806\u53E0\u3002",
|
|
3255
3180
|
retrieval: {
|
|
3256
3181
|
keyword: "SQLite FTS5",
|
|
3257
|
-
vector: "
|
|
3182
|
+
vector: "SQLite Vector",
|
|
3258
3183
|
hybrid: true
|
|
3259
3184
|
}
|
|
3260
3185
|
},
|
|
@@ -3318,12 +3243,12 @@ export {
|
|
|
3318
3243
|
FileJobRepository,
|
|
3319
3244
|
GatewayIngestor,
|
|
3320
3245
|
HybridRetriever,
|
|
3321
|
-
LanceDbVectorStore,
|
|
3322
3246
|
MemoryVectorStore,
|
|
3323
3247
|
MessageFtsRetriever,
|
|
3324
3248
|
MessageRepository,
|
|
3325
3249
|
OpenAICompatibleChatModel,
|
|
3326
3250
|
OpenAICompatibleEmbeddingModel,
|
|
3251
|
+
SQLiteVectorStore,
|
|
3327
3252
|
VectorRetriever,
|
|
3328
3253
|
appConfigSchema,
|
|
3329
3254
|
appSecretsSchema,
|
|
@@ -3354,7 +3279,6 @@ export {
|
|
|
3354
3279
|
getFeishuQuestionDecision,
|
|
3355
3280
|
getGatewayPidPath,
|
|
3356
3281
|
getGatewayRuntimeState,
|
|
3357
|
-
getLanceDbPath,
|
|
3358
3282
|
getLogsDirectory,
|
|
3359
3283
|
hasEmbeddingConfig,
|
|
3360
3284
|
indexMessageChunks,
|