jinzd-ai-cli 0.4.89 → 0.4.90
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{batch-UMQYXVKG.js → batch-3MJ56YAA.js} +2 -2
- package/dist/chat-index-QKFH7ZP6.js +17 -0
- package/dist/chunk-5S3PIG5O.js +453 -0
- package/dist/{chunk-3O3U3L5W.js → chunk-AB2LA33A.js} +1 -1
- package/dist/{chunk-4WVXTADR.js → chunk-BJXGZFE6.js} +1 -1
- package/dist/{chunk-FVRLRIKC.js → chunk-DJGP7AR6.js} +2 -2
- package/dist/{chunk-TKYNTXKB.js → chunk-EEEAFWNK.js} +1 -1
- package/dist/{chunk-ABPT6XCI.js → chunk-G65IDWVP.js} +2 -2
- package/dist/chunk-JV5N65KN.js +50 -0
- package/dist/{chunk-GTKJUEBS.js → chunk-MO7MWNWC.js} +6 -4
- package/dist/{chunk-XMA222FQ.js → chunk-PASCDYMH.js} +17 -63
- package/dist/{chunk-E7YC4GWV.js → chunk-WPQ4D6T3.js} +1 -1
- package/dist/electron-server.js +127 -202
- package/dist/{hub-4P2BH57W.js → hub-B7NJSCWF.js} +1 -1
- package/dist/index.js +10 -10
- package/dist/{run-tests-5TO5G3YH.js → run-tests-2DYVHTIH.js} +2 -2
- package/dist/{run-tests-TGGXTOFF.js → run-tests-37FEBJTR.js} +1 -1
- package/dist/{semantic-MYAXLDCZ.js → semantic-3KJPAUW6.js} +3 -2
- package/dist/{server-NG7AEAD5.js → server-FCTPLKGO.js} +112 -6
- package/dist/{server-U2BBLP4Y.js → server-S6JYNMMF.js} +3 -3
- package/dist/{task-orchestrator-ODU45UQG.js → task-orchestrator-K6HDX4YE.js} +3 -3
- package/dist/{vector-store-UR7IARXB.js → vector-store-NDUFLNGN.js} +2 -1
- package/dist/web/client/app.js +201 -0
- package/dist/web/client/index.html +24 -0
- package/package.json +1 -1
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import {
|
|
3
3
|
ConfigManager
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-AB2LA33A.js";
|
|
5
5
|
import "./chunk-2ZD3YTVM.js";
|
|
6
|
-
import "./chunk-
|
|
6
|
+
import "./chunk-WPQ4D6T3.js";
|
|
7
7
|
|
|
8
8
|
// src/cli/batch.ts
|
|
9
9
|
import Anthropic from "@anthropic-ai/sdk";
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import {
|
|
2
|
+
buildChatIndex,
|
|
3
|
+
chunkSession,
|
|
4
|
+
clearChatIndex,
|
|
5
|
+
getChatIndexStatus,
|
|
6
|
+
loadChatIndex,
|
|
7
|
+
searchChatMemory
|
|
8
|
+
} from "./chunk-5S3PIG5O.js";
|
|
9
|
+
import "./chunk-JV5N65KN.js";
|
|
10
|
+
export {
|
|
11
|
+
buildChatIndex,
|
|
12
|
+
chunkSession,
|
|
13
|
+
clearChatIndex,
|
|
14
|
+
getChatIndexStatus,
|
|
15
|
+
loadChatIndex,
|
|
16
|
+
searchChatMemory
|
|
17
|
+
};
|
|
@@ -0,0 +1,453 @@
|
|
|
1
|
+
import {
|
|
2
|
+
EMBEDDING_DIM,
|
|
3
|
+
embed,
|
|
4
|
+
embedOne
|
|
5
|
+
} from "./chunk-JV5N65KN.js";
|
|
6
|
+
|
|
7
|
+
// src/memory/chat-index.ts
|
|
8
|
+
import fs from "fs";
|
|
9
|
+
import path from "path";
|
|
10
|
+
import os from "os";
|
|
11
|
+
import crypto from "crypto";
|
|
12
|
+
|
|
13
|
+
// src/security/redactor.ts
|
|
14
|
+
var DEFAULT_PATTERNS = [
|
|
15
|
+
// password: xxx / password = xxx / password="xxx"
|
|
16
|
+
// Covers YAML / JSON / shell-ish / env-file forms.
|
|
17
|
+
{ kind: "password", regex: /\b(password|passwd|pwd)\s*[:=]\s*["']?([^\s"',;{}]{4,200})["']?/gi },
|
|
18
|
+
// PGPASSWORD=xxx (explicit bash env-var form, separate rule because no quotes usually)
|
|
19
|
+
{ kind: "pgpassword-env", regex: /\b(PGPASSWORD)=([^\s"']{4,200})/g },
|
|
20
|
+
// JDBC/PG/MySQL/Mongo connection strings with inline credentials
|
|
21
|
+
// postgresql://user:pass@host/db → redact pass
|
|
22
|
+
{ kind: "db-uri-password", regex: /(\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|mssql):\/\/[^:\s]+:)([^@\s]+)(@)/gi },
|
|
23
|
+
// Anthropic API keys
|
|
24
|
+
{ kind: "anthropic-key", regex: /(sk-ant-[a-zA-Z0-9_-]{90,})/g },
|
|
25
|
+
// OpenAI / generic sk- keys — requires length ≥32 to avoid eating short identifiers
|
|
26
|
+
{ kind: "openai-key", regex: /(sk-(?:proj-)?[a-zA-Z0-9_-]{32,})/g },
|
|
27
|
+
// GitHub personal access tokens
|
|
28
|
+
{ kind: "github-pat", regex: /\b(ghp_[a-zA-Z0-9]{36})\b/g },
|
|
29
|
+
{ kind: "github-oauth", regex: /\b(gho_[a-zA-Z0-9]{36})\b/g },
|
|
30
|
+
{ kind: "github-install", regex: /\b(ghs_[a-zA-Z0-9]{36})\b/g },
|
|
31
|
+
// Slack tokens
|
|
32
|
+
{ kind: "slack-bot", regex: /\b(xoxb-\d+-\d+-[a-zA-Z0-9]+)\b/g },
|
|
33
|
+
{ kind: "slack-user", regex: /\b(xoxp-\d+-\d+-\d+-[a-zA-Z0-9]+)\b/g },
|
|
34
|
+
// AWS access key IDs (AKIA...) and secret access keys are context-dependent;
|
|
35
|
+
// we only catch the ID because secret key alone is indistinguishable from random base64.
|
|
36
|
+
{ kind: "aws-access-key-id", regex: /\b(AKIA[0-9A-Z]{16})\b/g },
|
|
37
|
+
// Google API keys
|
|
38
|
+
{ kind: "google-api-key", regex: /\b(AIza[0-9A-Za-z_-]{35})\b/g },
|
|
39
|
+
// Generic "api_key": "..." / "apiKey": "..." / api-key=xxx
|
|
40
|
+
{ kind: "api-key", regex: /\b(api[_-]?key)\s*[:=]\s*["']?([a-zA-Z0-9_\-.]{16,200})["']?/gi },
|
|
41
|
+
// Generic token: xxx (only when value looks token-shaped; avoids eating human prose)
|
|
42
|
+
{ kind: "token", regex: /\b(token|access[_-]?token|bearer[_-]?token)\s*[:=]\s*["']?([a-zA-Z0-9_\-.]{20,300})["']?/gi },
|
|
43
|
+
// Bearer <token> in Authorization headers
|
|
44
|
+
{ kind: "bearer", regex: /\b(Authorization:\s*Bearer\s+)([a-zA-Z0-9_\-.=]{20,500})/g },
|
|
45
|
+
// Private key PEM blocks — catch the header+footer together
|
|
46
|
+
{ kind: "private-key", regex: /-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----/g }
|
|
47
|
+
];
|
|
48
|
+
function render(placeholder, kind) {
|
|
49
|
+
return placeholder.replace("{kind}", kind);
|
|
50
|
+
}
|
|
51
|
+
function redactString(input, options) {
|
|
52
|
+
if (!options.enabled || !input) return { redacted: input, hits: [] };
|
|
53
|
+
const placeholder = options.placeholder ?? "[REDACTED:{kind}]";
|
|
54
|
+
const patterns = [
|
|
55
|
+
...options.patterns ?? DEFAULT_PATTERNS,
|
|
56
|
+
...(options.customRegexes ?? []).flatMap((src, i) => {
|
|
57
|
+
try {
|
|
58
|
+
const flags = src.match(/^\/.*\/([gimsuy]*)$/)?.[1] ?? "";
|
|
59
|
+
const body = src.replace(/^\/(.*)\/[gimsuy]*$/, "$1");
|
|
60
|
+
const regex = new RegExp(body, flags.includes("g") ? flags : flags + "g");
|
|
61
|
+
return [{ kind: `custom-${i}`, regex }];
|
|
62
|
+
} catch {
|
|
63
|
+
return [];
|
|
64
|
+
}
|
|
65
|
+
})
|
|
66
|
+
];
|
|
67
|
+
let redacted = input;
|
|
68
|
+
const hits = [];
|
|
69
|
+
for (const { kind, regex } of patterns) {
|
|
70
|
+
const rx = new RegExp(regex.source, regex.flags);
|
|
71
|
+
redacted = redacted.replace(rx, (...args) => {
|
|
72
|
+
const match = args[0];
|
|
73
|
+
const probe = new RegExp(rx.source).exec(match);
|
|
74
|
+
const captureCount = probe ? probe.length - 1 : 0;
|
|
75
|
+
const g1 = captureCount >= 1 ? args[1] : void 0;
|
|
76
|
+
const g2 = captureCount >= 2 ? args[2] : void 0;
|
|
77
|
+
const offset = args[1 + captureCount];
|
|
78
|
+
if (captureCount >= 2 && typeof g2 === "string") {
|
|
79
|
+
hits.push({ kind, start: offset + (g1?.length ?? 0), length: g2.length, secret: g2 });
|
|
80
|
+
return `${g1}${render(placeholder, kind)}`;
|
|
81
|
+
}
|
|
82
|
+
hits.push({ kind, start: offset, length: match.length, secret: g1 ?? match });
|
|
83
|
+
return render(placeholder, kind);
|
|
84
|
+
});
|
|
85
|
+
}
|
|
86
|
+
return { redacted, hits };
|
|
87
|
+
}
|
|
88
|
+
function redactJson(value, options) {
|
|
89
|
+
if (!options.enabled) return { value, hits: [] };
|
|
90
|
+
const allHits = [];
|
|
91
|
+
function walk(v) {
|
|
92
|
+
if (typeof v === "string") {
|
|
93
|
+
const r = redactString(v, options);
|
|
94
|
+
allHits.push(...r.hits);
|
|
95
|
+
return r.redacted;
|
|
96
|
+
}
|
|
97
|
+
if (Array.isArray(v)) return v.map(walk);
|
|
98
|
+
if (v && typeof v === "object") {
|
|
99
|
+
const out = {};
|
|
100
|
+
for (const [k, vv] of Object.entries(v)) {
|
|
101
|
+
out[k] = walk(vv);
|
|
102
|
+
}
|
|
103
|
+
return out;
|
|
104
|
+
}
|
|
105
|
+
return v;
|
|
106
|
+
}
|
|
107
|
+
const redacted = walk(value);
|
|
108
|
+
return { value: redacted, hits: allHits };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// src/memory/chat-index.ts
|
|
112
|
+
var MEMORY_DIR_NAME = "memory-index";
|
|
113
|
+
var CHUNKS_FILE = "chunks.json";
|
|
114
|
+
var VECTORS_FILE = "vectors.vec";
|
|
115
|
+
var VEC_MAGIC = 1094929750;
|
|
116
|
+
var VEC_VERSION = 1;
|
|
117
|
+
var VEC_HEADER_BYTES = 16;
|
|
118
|
+
function memoryIndexDir() {
|
|
119
|
+
return path.join(os.homedir(), ".aicli", MEMORY_DIR_NAME);
|
|
120
|
+
}
|
|
121
|
+
function chunksPath() {
|
|
122
|
+
return path.join(memoryIndexDir(), CHUNKS_FILE);
|
|
123
|
+
}
|
|
124
|
+
function vectorsPath() {
|
|
125
|
+
return path.join(memoryIndexDir(), VECTORS_FILE);
|
|
126
|
+
}
|
|
127
|
+
function historyDir() {
|
|
128
|
+
return path.join(os.homedir(), ".aicli", "history");
|
|
129
|
+
}
|
|
130
|
+
var MAX_CHUNK_CHARS = 1200;
|
|
131
|
+
var MIN_CHUNK_CHARS = 40;
|
|
132
|
+
function extractMessageText(msg) {
|
|
133
|
+
if (typeof msg.content === "string") return msg.content;
|
|
134
|
+
if (Array.isArray(msg.content)) {
|
|
135
|
+
return msg.content.filter((p) => p && p.type === "text" && typeof p.text === "string").map((p) => p.text).join("\n");
|
|
136
|
+
}
|
|
137
|
+
return "";
|
|
138
|
+
}
|
|
139
|
+
function chunkSession(session) {
|
|
140
|
+
const chunks = [];
|
|
141
|
+
let pending = null;
|
|
142
|
+
const flush = () => {
|
|
143
|
+
if (!pending) return;
|
|
144
|
+
const rawText = pending.parts.join("\n").trim();
|
|
145
|
+
if (rawText.length < MIN_CHUNK_CHARS) {
|
|
146
|
+
pending = null;
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
const { redacted } = redactString(rawText, { enabled: true });
|
|
150
|
+
const id = crypto.createHash("sha1").update(`${session.id}|${pending.start}|${pending.end}|${redacted.length}`).digest("hex").slice(0, 16);
|
|
151
|
+
chunks.push({
|
|
152
|
+
id,
|
|
153
|
+
sessionId: session.id,
|
|
154
|
+
sessionTitle: session.title,
|
|
155
|
+
provider: session.provider,
|
|
156
|
+
model: session.model,
|
|
157
|
+
startMessageIdx: pending.start,
|
|
158
|
+
endMessageIdx: pending.end,
|
|
159
|
+
text: redacted,
|
|
160
|
+
timestamp: pending.latestTs,
|
|
161
|
+
roles: pending.roles
|
|
162
|
+
});
|
|
163
|
+
pending = null;
|
|
164
|
+
};
|
|
165
|
+
for (let i = 0; i < session.messages.length; i++) {
|
|
166
|
+
const m = session.messages[i];
|
|
167
|
+
if (m.role !== "user" && m.role !== "assistant" && m.role !== "system") continue;
|
|
168
|
+
const text = extractMessageText(m).trim();
|
|
169
|
+
if (!text) continue;
|
|
170
|
+
const ts = m.timestamp ?? (/* @__PURE__ */ new Date()).toISOString();
|
|
171
|
+
const prefix = m.role === "user" ? "[USER] " : m.role === "assistant" ? "[AI] " : "[SYS] ";
|
|
172
|
+
const part = `${prefix}${text}`;
|
|
173
|
+
if (!pending) {
|
|
174
|
+
pending = { start: i, end: i, parts: [part], roles: [m.role], latestTs: ts };
|
|
175
|
+
continue;
|
|
176
|
+
}
|
|
177
|
+
const projected = pending.parts.reduce((n, p) => n + p.length + 1, 0) + part.length;
|
|
178
|
+
if (projected > MAX_CHUNK_CHARS) {
|
|
179
|
+
flush();
|
|
180
|
+
pending = { start: i, end: i, parts: [part], roles: [m.role], latestTs: ts };
|
|
181
|
+
} else {
|
|
182
|
+
pending.parts.push(part);
|
|
183
|
+
pending.end = i;
|
|
184
|
+
pending.roles.push(m.role);
|
|
185
|
+
pending.latestTs = ts;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
flush();
|
|
189
|
+
return chunks;
|
|
190
|
+
}
|
|
191
|
+
function writeVectorsFile(chunks, vectors) {
|
|
192
|
+
if (chunks.length * EMBEDDING_DIM !== vectors.length) {
|
|
193
|
+
throw new Error(
|
|
194
|
+
`writeVectorsFile: length mismatch \u2014 ${chunks.length} chunks vs ${vectors.length / EMBEDDING_DIM} vectors`
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
const dir = memoryIndexDir();
|
|
198
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
199
|
+
const totalBytes = VEC_HEADER_BYTES + vectors.byteLength;
|
|
200
|
+
const buf = Buffer.alloc(totalBytes);
|
|
201
|
+
buf.writeUInt32LE(VEC_MAGIC, 0);
|
|
202
|
+
buf.writeUInt32LE(VEC_VERSION, 4);
|
|
203
|
+
buf.writeUInt32LE(chunks.length, 8);
|
|
204
|
+
buf.writeUInt32LE(EMBEDDING_DIM, 12);
|
|
205
|
+
Buffer.from(vectors.buffer, vectors.byteOffset, vectors.byteLength).copy(buf, VEC_HEADER_BYTES);
|
|
206
|
+
const target = vectorsPath();
|
|
207
|
+
const tmp = `${target}.tmp`;
|
|
208
|
+
fs.writeFileSync(tmp, buf);
|
|
209
|
+
fs.renameSync(tmp, target);
|
|
210
|
+
}
|
|
211
|
+
function readVectorsFile(expectedCount) {
|
|
212
|
+
const p = vectorsPath();
|
|
213
|
+
if (!fs.existsSync(p)) return null;
|
|
214
|
+
let buf;
|
|
215
|
+
try {
|
|
216
|
+
buf = fs.readFileSync(p);
|
|
217
|
+
} catch {
|
|
218
|
+
return null;
|
|
219
|
+
}
|
|
220
|
+
if (buf.length < VEC_HEADER_BYTES) return null;
|
|
221
|
+
const magic = buf.readUInt32LE(0);
|
|
222
|
+
const version = buf.readUInt32LE(4);
|
|
223
|
+
const count = buf.readUInt32LE(8);
|
|
224
|
+
const dim = buf.readUInt32LE(12);
|
|
225
|
+
if (magic !== VEC_MAGIC || version !== VEC_VERSION || dim !== EMBEDDING_DIM) return null;
|
|
226
|
+
if (count !== expectedCount) return null;
|
|
227
|
+
const expected = VEC_HEADER_BYTES + count * dim * 4;
|
|
228
|
+
if (buf.length !== expected) return null;
|
|
229
|
+
return new Float32Array(
|
|
230
|
+
buf.buffer.slice(buf.byteOffset + VEC_HEADER_BYTES, buf.byteOffset + expected)
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
function writeIndexFile(idx) {
|
|
234
|
+
const dir = memoryIndexDir();
|
|
235
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
236
|
+
const target = chunksPath();
|
|
237
|
+
const tmp = `${target}.tmp`;
|
|
238
|
+
fs.writeFileSync(tmp, JSON.stringify(idx, null, 2), "utf-8");
|
|
239
|
+
fs.renameSync(tmp, target);
|
|
240
|
+
}
|
|
241
|
+
function readIndexFile() {
|
|
242
|
+
const p = chunksPath();
|
|
243
|
+
if (!fs.existsSync(p)) return null;
|
|
244
|
+
try {
|
|
245
|
+
const raw = fs.readFileSync(p, "utf-8");
|
|
246
|
+
const data = JSON.parse(raw);
|
|
247
|
+
if (data.version !== 1) return null;
|
|
248
|
+
return data;
|
|
249
|
+
} catch {
|
|
250
|
+
return null;
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
function loadChatIndex() {
|
|
254
|
+
const idx = readIndexFile();
|
|
255
|
+
if (!idx) return null;
|
|
256
|
+
const vectors = readVectorsFile(idx.chunks.length);
|
|
257
|
+
if (!vectors) return null;
|
|
258
|
+
return { idx, vectors };
|
|
259
|
+
}
|
|
260
|
+
function clearChatIndex() {
|
|
261
|
+
try {
|
|
262
|
+
if (fs.existsSync(chunksPath())) fs.unlinkSync(chunksPath());
|
|
263
|
+
} catch {
|
|
264
|
+
}
|
|
265
|
+
try {
|
|
266
|
+
if (fs.existsSync(vectorsPath())) fs.unlinkSync(vectorsPath());
|
|
267
|
+
} catch {
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
function listSessionFiles() {
|
|
271
|
+
const dir = historyDir();
|
|
272
|
+
if (!fs.existsSync(dir)) return [];
|
|
273
|
+
const out = [];
|
|
274
|
+
for (const name of fs.readdirSync(dir)) {
|
|
275
|
+
if (!name.endsWith(".json")) continue;
|
|
276
|
+
const id = name.replace(/\.json$/, "");
|
|
277
|
+
const p = path.join(dir, name);
|
|
278
|
+
try {
|
|
279
|
+
const st = fs.statSync(p);
|
|
280
|
+
out.push({ id, path: p, mtime: st.mtimeMs });
|
|
281
|
+
} catch {
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
return out;
|
|
285
|
+
}
|
|
286
|
+
function readSession(p) {
|
|
287
|
+
try {
|
|
288
|
+
const data = JSON.parse(fs.readFileSync(p, "utf-8"));
|
|
289
|
+
if (!data.id || !Array.isArray(data.messages)) return null;
|
|
290
|
+
return data;
|
|
291
|
+
} catch {
|
|
292
|
+
return null;
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
async function buildChatIndex(options = {}) {
|
|
296
|
+
const t0 = Date.now();
|
|
297
|
+
const onProgress = options.onProgress ?? (() => {
|
|
298
|
+
});
|
|
299
|
+
onProgress({ stage: "scanning" });
|
|
300
|
+
const files = listSessionFiles();
|
|
301
|
+
const existing = options.full ? null : loadChatIndex();
|
|
302
|
+
const prevMtimes = existing?.idx.sessionMtimes ?? {};
|
|
303
|
+
const prevChunksBySession = /* @__PURE__ */ new Map();
|
|
304
|
+
const prevVectorsByChunkId = /* @__PURE__ */ new Map();
|
|
305
|
+
if (existing) {
|
|
306
|
+
for (let i = 0; i < existing.idx.chunks.length; i++) {
|
|
307
|
+
const c = existing.idx.chunks[i];
|
|
308
|
+
const arr = prevChunksBySession.get(c.sessionId) ?? [];
|
|
309
|
+
arr.push(c);
|
|
310
|
+
prevChunksBySession.set(c.sessionId, arr);
|
|
311
|
+
prevVectorsByChunkId.set(
|
|
312
|
+
c.id,
|
|
313
|
+
existing.vectors.slice(i * EMBEDDING_DIM, (i + 1) * EMBEDDING_DIM)
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
onProgress({ stage: "chunking" });
|
|
318
|
+
const stats = {
|
|
319
|
+
sessionsScanned: files.length,
|
|
320
|
+
sessionsIndexed: 0,
|
|
321
|
+
sessionsSkipped: 0,
|
|
322
|
+
chunksTotal: 0,
|
|
323
|
+
chunksAdded: 0,
|
|
324
|
+
chunksRemoved: 0,
|
|
325
|
+
durationMs: 0
|
|
326
|
+
};
|
|
327
|
+
const newMtimes = {};
|
|
328
|
+
const finalChunks = [];
|
|
329
|
+
const finalVectors = [];
|
|
330
|
+
const toEmbed = [];
|
|
331
|
+
for (const f of files) {
|
|
332
|
+
newMtimes[f.id] = f.mtime;
|
|
333
|
+
const prevMtime = prevMtimes[f.id];
|
|
334
|
+
if (prevMtime === f.mtime && prevChunksBySession.has(f.id)) {
|
|
335
|
+
stats.sessionsSkipped++;
|
|
336
|
+
const cached = prevChunksBySession.get(f.id);
|
|
337
|
+
for (const c of cached) {
|
|
338
|
+
const v = prevVectorsByChunkId.get(c.id);
|
|
339
|
+
if (!v) continue;
|
|
340
|
+
finalChunks.push(c);
|
|
341
|
+
finalVectors.push(v);
|
|
342
|
+
}
|
|
343
|
+
continue;
|
|
344
|
+
}
|
|
345
|
+
const sess = readSession(f.path);
|
|
346
|
+
if (!sess) continue;
|
|
347
|
+
stats.sessionsIndexed++;
|
|
348
|
+
const chunks = chunkSession(sess);
|
|
349
|
+
for (const c of chunks) {
|
|
350
|
+
finalChunks.push(c);
|
|
351
|
+
toEmbed.push(c);
|
|
352
|
+
stats.chunksAdded++;
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
if (existing) {
|
|
356
|
+
for (const prevId of Object.keys(prevMtimes)) {
|
|
357
|
+
if (!(prevId in newMtimes)) {
|
|
358
|
+
const removed = prevChunksBySession.get(prevId) ?? [];
|
|
359
|
+
stats.chunksRemoved += removed.length;
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
stats.chunksTotal = finalChunks.length;
|
|
364
|
+
const BATCH = 16;
|
|
365
|
+
onProgress({ stage: "embedding", processed: 0, total: toEmbed.length });
|
|
366
|
+
const newVectorsByChunkId = /* @__PURE__ */ new Map();
|
|
367
|
+
for (let i = 0; i < toEmbed.length; i += BATCH) {
|
|
368
|
+
const batch = toEmbed.slice(i, i + BATCH);
|
|
369
|
+
const vecs = await embed(batch.map((c) => c.text));
|
|
370
|
+
for (let j = 0; j < batch.length; j++) {
|
|
371
|
+
newVectorsByChunkId.set(batch[j].id, vecs[j]);
|
|
372
|
+
}
|
|
373
|
+
onProgress({ stage: "embedding", processed: Math.min(i + BATCH, toEmbed.length), total: toEmbed.length });
|
|
374
|
+
}
|
|
375
|
+
const flat = new Float32Array(finalChunks.length * EMBEDDING_DIM);
|
|
376
|
+
for (let i = 0; i < finalChunks.length; i++) {
|
|
377
|
+
const c = finalChunks[i];
|
|
378
|
+
const v = newVectorsByChunkId.get(c.id) ?? prevVectorsByChunkId.get(c.id);
|
|
379
|
+
if (!v || v.length !== EMBEDDING_DIM) {
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
flat.set(v, i * EMBEDDING_DIM);
|
|
383
|
+
}
|
|
384
|
+
onProgress({ stage: "saving" });
|
|
385
|
+
const idx = {
|
|
386
|
+
version: 1,
|
|
387
|
+
built: (/* @__PURE__ */ new Date()).toISOString(),
|
|
388
|
+
model: "Xenova/paraphrase-multilingual-MiniLM-L12-v2",
|
|
389
|
+
sessionMtimes: newMtimes,
|
|
390
|
+
chunks: finalChunks
|
|
391
|
+
};
|
|
392
|
+
writeIndexFile(idx);
|
|
393
|
+
writeVectorsFile(finalChunks, flat);
|
|
394
|
+
stats.durationMs = Date.now() - t0;
|
|
395
|
+
onProgress({ stage: "done" });
|
|
396
|
+
return stats;
|
|
397
|
+
}
|
|
398
|
+
async function searchChatMemory(query, options = {}) {
|
|
399
|
+
const topK = options.topK ?? 5;
|
|
400
|
+
const minScore = options.minScore ?? 0.25;
|
|
401
|
+
const loaded = loadChatIndex();
|
|
402
|
+
if (!loaded || loaded.idx.chunks.length === 0) return [];
|
|
403
|
+
const { idx, vectors } = loaded;
|
|
404
|
+
const { redacted } = redactString(query, { enabled: true });
|
|
405
|
+
const qvec = await embedOne(redacted);
|
|
406
|
+
const candidates = [];
|
|
407
|
+
for (let i = 0; i < idx.chunks.length; i++) {
|
|
408
|
+
const c = idx.chunks[i];
|
|
409
|
+
if (options.sessionId && c.sessionId !== options.sessionId) continue;
|
|
410
|
+
if (options.excludeSessionId && c.sessionId === options.excludeSessionId) continue;
|
|
411
|
+
let score = 0;
|
|
412
|
+
const base = i * EMBEDDING_DIM;
|
|
413
|
+
for (let d = 0; d < EMBEDDING_DIM; d++) {
|
|
414
|
+
score += vectors[base + d] * qvec[d];
|
|
415
|
+
}
|
|
416
|
+
if (score < minScore) continue;
|
|
417
|
+
candidates.push({ chunk: c, score });
|
|
418
|
+
}
|
|
419
|
+
candidates.sort((a, b) => b.score - a.score);
|
|
420
|
+
return candidates.slice(0, topK);
|
|
421
|
+
}
|
|
422
|
+
function getChatIndexStatus() {
|
|
423
|
+
const status = {
|
|
424
|
+
exists: false,
|
|
425
|
+
chunks: 0,
|
|
426
|
+
sessions: 0,
|
|
427
|
+
vecFileSizeBytes: 0,
|
|
428
|
+
chunksFileSizeBytes: 0
|
|
429
|
+
};
|
|
430
|
+
try {
|
|
431
|
+
if (fs.existsSync(vectorsPath())) status.vecFileSizeBytes = fs.statSync(vectorsPath()).size;
|
|
432
|
+
if (fs.existsSync(chunksPath())) status.chunksFileSizeBytes = fs.statSync(chunksPath()).size;
|
|
433
|
+
} catch {
|
|
434
|
+
}
|
|
435
|
+
const idx = readIndexFile();
|
|
436
|
+
if (!idx) return status;
|
|
437
|
+
status.exists = true;
|
|
438
|
+
status.chunks = idx.chunks.length;
|
|
439
|
+
status.sessions = Object.keys(idx.sessionMtimes).length;
|
|
440
|
+
status.built = idx.built;
|
|
441
|
+
status.model = idx.model;
|
|
442
|
+
return status;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
export {
|
|
446
|
+
redactJson,
|
|
447
|
+
chunkSession,
|
|
448
|
+
loadChatIndex,
|
|
449
|
+
clearChatIndex,
|
|
450
|
+
buildChatIndex,
|
|
451
|
+
searchChatMemory,
|
|
452
|
+
getChatIndexStatus
|
|
453
|
+
};
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
import {
|
|
3
3
|
schemaToJsonSchema,
|
|
4
4
|
truncateForPersist
|
|
5
|
-
} from "./chunk-
|
|
5
|
+
} from "./chunk-G65IDWVP.js";
|
|
6
6
|
import {
|
|
7
7
|
AuthError,
|
|
8
8
|
ProviderError,
|
|
@@ -21,7 +21,7 @@ import {
|
|
|
21
21
|
MCP_PROTOCOL_VERSION,
|
|
22
22
|
MCP_TOOL_PREFIX,
|
|
23
23
|
VERSION
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-WPQ4D6T3.js";
|
|
25
25
|
|
|
26
26
|
// src/providers/claude.ts
|
|
27
27
|
import Anthropic from "@anthropic-ai/sdk";
|
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
} from "./chunk-6VRJGH25.js";
|
|
24
24
|
import {
|
|
25
25
|
runTestsTool
|
|
26
|
-
} from "./chunk-
|
|
26
|
+
} from "./chunk-EEEAFWNK.js";
|
|
27
27
|
import {
|
|
28
28
|
CONFIG_DIR_NAME,
|
|
29
29
|
DEFAULT_MAX_TOOL_OUTPUT_CHARS_CAP,
|
|
@@ -31,7 +31,7 @@ import {
|
|
|
31
31
|
SUBAGENT_ALLOWED_TOOLS,
|
|
32
32
|
SUBAGENT_DEFAULT_MAX_ROUNDS,
|
|
33
33
|
SUBAGENT_MAX_ROUNDS_LIMIT
|
|
34
|
-
} from "./chunk-
|
|
34
|
+
} from "./chunk-WPQ4D6T3.js";
|
|
35
35
|
|
|
36
36
|
// src/tools/types.ts
|
|
37
37
|
function isFileWriteTool(name) {
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
// src/symbols/embedder.ts
|
|
2
|
+
import path from "path";
|
|
3
|
+
import os from "os";
|
|
4
|
+
import fs from "fs";
|
|
5
|
+
var EMBEDDING_MODEL_ID = "Xenova/paraphrase-multilingual-MiniLM-L12-v2";
|
|
6
|
+
var EMBEDDING_DIM = 384;
|
|
7
|
+
var pipelinePromise = null;
|
|
8
|
+
function cacheDir() {
|
|
9
|
+
return path.join(os.homedir(), ".aicli", "models");
|
|
10
|
+
}
|
|
11
|
+
async function getEmbedder() {
|
|
12
|
+
if (pipelinePromise) return pipelinePromise;
|
|
13
|
+
pipelinePromise = (async () => {
|
|
14
|
+
const mod = await import("@huggingface/transformers");
|
|
15
|
+
const dir = cacheDir();
|
|
16
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
17
|
+
mod.env.cacheDir = dir;
|
|
18
|
+
mod.env.allowRemoteModels = true;
|
|
19
|
+
mod.env.allowLocalModels = true;
|
|
20
|
+
const pipe = await mod.pipeline("feature-extraction", EMBEDDING_MODEL_ID, {
|
|
21
|
+
// Keep the ONNX session in float32; int8 quantization exists but the
|
|
22
|
+
// quality drop on short code identifiers is noticeable.
|
|
23
|
+
dtype: "fp32"
|
|
24
|
+
});
|
|
25
|
+
return pipe;
|
|
26
|
+
})();
|
|
27
|
+
return pipelinePromise;
|
|
28
|
+
}
|
|
29
|
+
async function embed(texts) {
|
|
30
|
+
if (texts.length === 0) return [];
|
|
31
|
+
const pipe = await getEmbedder();
|
|
32
|
+
const out = await pipe(texts, { pooling: "mean", normalize: true });
|
|
33
|
+
const batch = texts.length;
|
|
34
|
+
const dim = EMBEDDING_DIM;
|
|
35
|
+
const rows = new Array(batch);
|
|
36
|
+
for (let i = 0; i < batch; i++) {
|
|
37
|
+
rows[i] = new Float32Array(out.data.buffer, out.data.byteOffset + i * dim * 4, dim).slice();
|
|
38
|
+
}
|
|
39
|
+
return rows;
|
|
40
|
+
}
|
|
41
|
+
async function embedOne(text) {
|
|
42
|
+
const [vec] = await embed([text]);
|
|
43
|
+
return vec;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export {
|
|
47
|
+
EMBEDDING_DIM,
|
|
48
|
+
embed,
|
|
49
|
+
embedOne
|
|
50
|
+
};
|
|
@@ -2,13 +2,15 @@ import {
|
|
|
2
2
|
loadIndex
|
|
3
3
|
} from "./chunk-BJAT4GNC.js";
|
|
4
4
|
import {
|
|
5
|
-
EMBEDDING_DIM,
|
|
6
|
-
embed,
|
|
7
|
-
embedOne,
|
|
8
5
|
loadVectorStore,
|
|
9
6
|
saveVectorStore,
|
|
10
7
|
searchVectorStore
|
|
11
|
-
} from "./chunk-
|
|
8
|
+
} from "./chunk-PASCDYMH.js";
|
|
9
|
+
import {
|
|
10
|
+
EMBEDDING_DIM,
|
|
11
|
+
embed,
|
|
12
|
+
embedOne
|
|
13
|
+
} from "./chunk-JV5N65KN.js";
|
|
12
14
|
|
|
13
15
|
// src/symbols/semantic.ts
|
|
14
16
|
function pathTokens(absFile, root) {
|