079project 1.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GroupStarter.cjs +211 -35
- package/README.md +3 -3
- package/crawler/agent.cjs +97 -0
- package/crawler/index.cjs +515 -0
- package/crawler/storage.cjs +163 -0
- package/groupmanager.cjs +2 -1
- package/loggerworker.cjs +202 -0
- package/main_Serve.cjs +1132 -115
- package/main_Study.cjs +1749 -365
- package/package.json +2 -1
- package/robots/seeds.txt +2 -0
- package/schedule.cjs +745 -0
- package/wikitext/wikitext-103-all.txt +0 -0
- package/wikitext/.gitattributes +0 -27
- package/wikitext/README.md +0 -344
- package/wikitext/describtion.txt +0 -1
package/main_Study.cjs
CHANGED
|
@@ -29,7 +29,8 @@ const pool = workerpool.pool(path.join(__dirname, 'memeMergeWorker.cjs'), {
|
|
|
29
29
|
});
|
|
30
30
|
const natural = require('natural');
|
|
31
31
|
const STOP_WORDS = natural.stopwords; // 英文停用词
|
|
32
|
-
|
|
32
|
+
const { CrawlerManager } = require('./crawler/index.cjs');
|
|
33
|
+
const { AdversaryScheduler } = require('./schedule.cjs');
|
|
33
34
|
console.log(`[WORKERS] 工作池已创建,最大工作进程数: ${MAX_WORKERS}`);
|
|
34
35
|
protobuf.load(runtimeProtoPath, (err, root) => {
|
|
35
36
|
if (err) throw err;
|
|
@@ -54,6 +55,407 @@ const modelDefaults = {
|
|
|
54
55
|
edgeWeight: 1
|
|
55
56
|
};
|
|
56
57
|
const currentModelParams = { ...modelDefaults };
|
|
58
|
+
// ...existing code...
|
|
59
|
+
|
|
60
|
+
// 统一发布到 Redis(沿用现有 RuntimeMessage)
|
|
61
|
+
async function publishRuntimeToRedis(runtime) {
|
|
62
|
+
try {
|
|
63
|
+
if (!RuntimeMessage) return;
|
|
64
|
+
if (!redisClient || !redisClient.isOpen) {
|
|
65
|
+
console.warn('[REDIS] 客户端未连接,跳过发布');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const plainObj = runtimeToPlain(runtime);
|
|
69
|
+
const errMsg = RuntimeMessage.verify(plainObj);
|
|
70
|
+
if (errMsg) throw Error(errMsg);
|
|
71
|
+
const message = RuntimeMessage.create(plainObj);
|
|
72
|
+
const buffer = RuntimeMessage.encode(message).finish();
|
|
73
|
+
await redisClient.publish(`AI-model-${__dirname}`, buffer);
|
|
74
|
+
console.log('[REDIS] 已发布运行时状态');
|
|
75
|
+
} catch (e) {
|
|
76
|
+
console.warn('[REDIS] 发布失败:', e.message);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// ...existing code...
|
|
80
|
+
const crypto = require('crypto');
|
|
81
|
+
// ...existing code...
|
|
82
|
+
|
|
83
|
+
// 差量复制辅助(递归分区哈希)
|
|
84
|
+
const DELTA = {
|
|
85
|
+
BUCKET_SIZE: 256,
|
|
86
|
+
hash(buf) {
|
|
87
|
+
return crypto.createHash('sha1').update(buf).digest('hex');
|
|
88
|
+
},
|
|
89
|
+
hashPoint(point) {
|
|
90
|
+
if (!point) return '0';
|
|
91
|
+
const arr = (point.connect || []).slice().sort((a, b) => {
|
|
92
|
+
if (a[1] !== b[1]) return String(a[1]).localeCompare(String(b[1]));
|
|
93
|
+
if ((a[2] || 0) !== (b[2] || 0)) return (a[2] || 0) - (b[2] || 0);
|
|
94
|
+
return (a[0] || 0) - (b[0] || 0);
|
|
95
|
+
});
|
|
96
|
+
const buf = Buffer.from(JSON.stringify([point.pointID, arr]));
|
|
97
|
+
return DELTA.hash(buf);
|
|
98
|
+
},
|
|
99
|
+
buildGraphIndex(graph) {
|
|
100
|
+
const ids = Array.from(graph.points.keys()).sort();
|
|
101
|
+
const nodeHash = new Map();
|
|
102
|
+
const buckets = [];
|
|
103
|
+
for (let i = 0; i < ids.length; i += DELTA.BUCKET_SIZE) {
|
|
104
|
+
const chunk = ids.slice(i, i + DELTA.BUCKET_SIZE);
|
|
105
|
+
let acc = '';
|
|
106
|
+
for (const id of chunk) {
|
|
107
|
+
const h = DELTA.hashPoint(graph.points.get(id));
|
|
108
|
+
nodeHash.set(id, h);
|
|
109
|
+
acc += h;
|
|
110
|
+
}
|
|
111
|
+
buckets.push({ start: chunk[0], end: chunk[chunk.length - 1], ids: chunk, hash: DELTA.hash(Buffer.from(acc)) });
|
|
112
|
+
}
|
|
113
|
+
const summary = DELTA.hash(Buffer.from(buckets.map(b => b.hash).join('')));
|
|
114
|
+
return { nodeHash, buckets, summary };
|
|
115
|
+
},
|
|
116
|
+
equalArray(a, b) {
|
|
117
|
+
if (a === b) return true;
|
|
118
|
+
if (!Array.isArray(a) || !Array.isArray(b)) return false;
|
|
119
|
+
if (a.length !== b.length) return false;
|
|
120
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// 基于递归分区哈希的差量克隆
|
|
126
|
+
async function deltaCloneRuntime(prevClone, srcRuntime) {
|
|
127
|
+
if (!prevClone) {
|
|
128
|
+
const clone = new Runtime({ ...srcRuntime.config });
|
|
129
|
+
clone.registerClone();
|
|
130
|
+
clone.spider = srcRuntime.spider;
|
|
131
|
+
|
|
132
|
+
clone.vocabManager.vocab = [...srcRuntime.vocabManager.vocab];
|
|
133
|
+
clone.vocabManager.updateMappings();
|
|
134
|
+
|
|
135
|
+
for (const [id, p] of srcRuntime.wordGraph.points.entries()) {
|
|
136
|
+
clone.wordGraph.addPoint(id, Array.isArray(p.connect) ? p.connect.map(e => [...e]) : []);
|
|
137
|
+
}
|
|
138
|
+
for (const [id, p] of srcRuntime.graph.points.entries()) {
|
|
139
|
+
clone.graph.addPoint(id, Array.isArray(p.connect) ? p.connect.map(e => [...e]) : []);
|
|
140
|
+
}
|
|
141
|
+
for (const [k, v] of srcRuntime.kvm.memory.entries()) {
|
|
142
|
+
clone.kvm.set(k, Array.isArray(v) ? [...v] : (v == null ? [] : [String(v)]));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
clone.__deltaIndexes = {
|
|
146
|
+
graph: DELTA.buildGraphIndex(clone.graph),
|
|
147
|
+
wordGraph: DELTA.buildGraphIndex(clone.wordGraph),
|
|
148
|
+
vocabHash: DELTA.hash(Buffer.from(JSON.stringify(clone.vocabManager.vocab)))
|
|
149
|
+
};
|
|
150
|
+
return clone;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const clone = prevClone;
|
|
154
|
+
clone.spider = srcRuntime.spider;
|
|
155
|
+
|
|
156
|
+
const srcVocabHash = DELTA.hash(Buffer.from(JSON.stringify(srcRuntime.vocabManager.vocab)));
|
|
157
|
+
if (!clone.__deltaIndexes || clone.__deltaIndexes.vocabHash !== srcVocabHash) {
|
|
158
|
+
clone.vocabManager.vocab = [...srcRuntime.vocabManager.vocab];
|
|
159
|
+
clone.vocabManager.updateMappings();
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const syncGraph = (cloneGraph, srcGraph, name) => {
|
|
163
|
+
const prevIdx = (clone.__deltaIndexes && clone.__deltaIndexes[name]) || { nodeHash: new Map(), buckets: [] };
|
|
164
|
+
const srcIdx = DELTA.buildGraphIndex(srcGraph);
|
|
165
|
+
|
|
166
|
+
for (const id of Array.from(cloneGraph.points.keys())) {
|
|
167
|
+
if (!srcGraph.points.has(id)) cloneGraph.points.delete(id);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
for (const b of srcIdx.buckets) {
|
|
171
|
+
const prevBucket = prevIdx.buckets.find(x => x.start === b.start && x.end === b.end && x.ids.length === b.ids.length);
|
|
172
|
+
if (prevBucket && prevBucket.hash === b.hash) continue;
|
|
173
|
+
for (const id of b.ids) {
|
|
174
|
+
const srcPoint = srcGraph.points.get(id);
|
|
175
|
+
const srcH = srcIdx.nodeHash.get(id);
|
|
176
|
+
const prevH = prevIdx.nodeHash.get(id);
|
|
177
|
+
if (srcH === prevH && cloneGraph.points.has(id)) continue;
|
|
178
|
+
cloneGraph.points.set(id, {
|
|
179
|
+
pointID: id,
|
|
180
|
+
connect: Array.isArray(srcPoint.connect) ? srcPoint.connect.map(e => [...e]) : []
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
clone.__deltaIndexes = clone.__deltaIndexes || {};
|
|
186
|
+
clone.__deltaIndexes[name] = DELTA.buildGraphIndex(cloneGraph);
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
syncGraph(clone.wordGraph, srcRuntime.wordGraph, 'wordGraph');
|
|
190
|
+
syncGraph(clone.graph, srcRuntime.graph, 'graph');
|
|
191
|
+
|
|
192
|
+
for (const k of Array.from(clone.kvm.memory.keys())) {
|
|
193
|
+
if (!srcRuntime.kvm.memory.has(k)) clone.kvm.memory.delete(k);
|
|
194
|
+
}
|
|
195
|
+
for (const [k, v] of srcRuntime.kvm.memory.entries()) {
|
|
196
|
+
const nv = Array.isArray(v) ? v : (v == null ? [] : [String(v)]);
|
|
197
|
+
const ov = clone.kvm.memory.get(k) || [];
|
|
198
|
+
if (!DELTA.equalArray(ov, nv)) {
|
|
199
|
+
clone.kvm.set(k, [...nv]);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
clone.__deltaIndexes.vocabHash = srcVocabHash;
|
|
204
|
+
return clone;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// 按需检索器:在用户输入(processInput)时触发网络检索与增量学习
|
|
208
|
+
class OnlineResearcher {
|
|
209
|
+
constructor(runtime, options = {}) {
|
|
210
|
+
this.runtime = runtime;
|
|
211
|
+
this.opts = Object.assign({
|
|
212
|
+
cooldownMs: 25 * 1000, // 触发冷却窗口
|
|
213
|
+
maxCrawl: 8, // 单次抓取最大页面
|
|
214
|
+
perQuery: 6,
|
|
215
|
+
maxEnqueue: 20,
|
|
216
|
+
minPromptLen: 3, // 过滤过短提示
|
|
217
|
+
ingestMinLen: 6, // ingest 的最小分词长度
|
|
218
|
+
recentCapacity: 64, // 近提示去重窗口
|
|
219
|
+
}, options);
|
|
220
|
+
this.running = false;
|
|
221
|
+
this.lastRunAt = 0;
|
|
222
|
+
this.pendingPrompt = null;
|
|
223
|
+
this.timer = null;
|
|
224
|
+
this.recent = new Set();
|
|
225
|
+
this.recentQueue = [];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
_normalizePromptFromWords(words) {
|
|
229
|
+
const ws = (Array.isArray(words) ? words : [])
|
|
230
|
+
.map(w => String(w || '').toLowerCase().trim())
|
|
231
|
+
.filter(Boolean);
|
|
232
|
+
// 去停用词后再判断长度
|
|
233
|
+
const filtered = this.runtime.filterStopWords ? this.runtime.filterStopWords(ws) : ws;
|
|
234
|
+
if (filtered.length < this.opts.minPromptLen) return '';
|
|
235
|
+
// 取前若干关键词,避免提示过长
|
|
236
|
+
return Array.from(new Set(filtered)).slice(0, 16).join(' ');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
_seen(key) {
|
|
240
|
+
if (this.recent.has(key)) return true;
|
|
241
|
+
this.recent.add(key);
|
|
242
|
+
this.recentQueue.push(key);
|
|
243
|
+
if (this.recentQueue.length > this.opts.recentCapacity) {
|
|
244
|
+
const old = this.recentQueue.shift();
|
|
245
|
+
this.recent.delete(old);
|
|
246
|
+
}
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
scheduleFromWords(words) {
|
|
251
|
+
if (!global.__crawler) return; // 爬虫未初始化时跳过
|
|
252
|
+
const prompt = this._normalizePromptFromWords(words);
|
|
253
|
+
if (!prompt) return;
|
|
254
|
+
|
|
255
|
+
// 近似去重:同一时间窗内同类提示不重复抓取
|
|
256
|
+
const key = prompt.slice(0, 64);
|
|
257
|
+
if (this._seen(key)) return;
|
|
258
|
+
|
|
259
|
+
const now = Date.now();
|
|
260
|
+
const remain = this.opts.cooldownMs - (now - this.lastRunAt);
|
|
261
|
+
|
|
262
|
+
// 若正在运行或处于冷却,合并为待执行
|
|
263
|
+
if (this.running || remain > 0) {
|
|
264
|
+
this.pendingPrompt = prompt;
|
|
265
|
+
if (!this.timer) {
|
|
266
|
+
this.timer = setTimeout(() => {
|
|
267
|
+
this.timer = null;
|
|
268
|
+
const p = this.pendingPrompt;
|
|
269
|
+
this.pendingPrompt = null;
|
|
270
|
+
if (p) this._doSearchAndIngest(p);
|
|
271
|
+
}, Math.max(100, remain));
|
|
272
|
+
}
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
this._doSearchAndIngest(prompt);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
async _doSearchAndIngest(prompt) {
|
|
280
|
+
this.running = true;
|
|
281
|
+
this.lastRunAt = Date.now();
|
|
282
|
+
try {
|
|
283
|
+
const report = await global.__crawler.directedSearch(prompt, {
|
|
284
|
+
vertical: 'general',
|
|
285
|
+
perQuery: this.opts.perQuery,
|
|
286
|
+
maxEnqueue: this.opts.maxEnqueue,
|
|
287
|
+
crawl: true,
|
|
288
|
+
maxCrawl: this.opts.maxCrawl
|
|
289
|
+
});
|
|
290
|
+
// 抓取完成后立即增量学习(只读最近若干文档)
|
|
291
|
+
const docs = global.__crawler.loadRecentDocs(24);
|
|
292
|
+
let fed = 0;
|
|
293
|
+
for (const d of docs) {
|
|
294
|
+
fed += await this.runtime.ingestTextDocument(d.text, {
|
|
295
|
+
addNewWords: true,
|
|
296
|
+
minLen: this.opts.ingestMinLen
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
if (fed > 0) {
|
|
300
|
+
this.runtime.updateAttentionLinks();
|
|
301
|
+
console.log(`[RESEARCH] "${prompt}" 抓取并增量学习完成: fed=${fed}, crawled=${report?.crawled ?? 0}`);
|
|
302
|
+
} else {
|
|
303
|
+
console.log(`[RESEARCH] "${prompt}" 未产生有效增量`);
|
|
304
|
+
}
|
|
305
|
+
} catch (e) {
|
|
306
|
+
console.warn('[RESEARCH] 抓取/增量学习失败:', e.message);
|
|
307
|
+
} finally {
|
|
308
|
+
this.running = false;
|
|
309
|
+
// 合并突发多次提示:优先处理最近一次
|
|
310
|
+
if (this.pendingPrompt) {
|
|
311
|
+
const p = this.pendingPrompt;
|
|
312
|
+
this.pendingPrompt = null;
|
|
313
|
+
this._doSearchAndIngest(p);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// ...existing code...
|
|
319
|
+
// 轮换管理器:在克隆上训练,评估通过后原子切换,避免占用在线副本
|
|
320
|
+
class RotationManager {
|
|
321
|
+
constructor(ctrlA, ctrlB, ctrlC, options = {}) {
|
|
322
|
+
this.ctrlA = ctrlA;
|
|
323
|
+
this.ctrlB = ctrlB;
|
|
324
|
+
this.ctrlC = ctrlC;
|
|
325
|
+
this.isRotating = false;
|
|
326
|
+
this.timer = null;
|
|
327
|
+
this.opts = Object.assign({
|
|
328
|
+
cycleMs: 30 * 60 * 1000, // 每轮步进间隔:默认30分钟
|
|
329
|
+
cooldownMs: 60 * 1000, // 单步完成后的冷却时间
|
|
330
|
+
learnIters: 3, // 克隆训练迭代
|
|
331
|
+
minImprove: 0.005 // 最小收益阈值(0.5%)
|
|
332
|
+
}, options);
|
|
333
|
+
this.sequence = [
|
|
334
|
+
{ from: 'A', to: 'B' },
|
|
335
|
+
{ from: 'B', to: 'C' },
|
|
336
|
+
{ from: 'C', to: 'A' }
|
|
337
|
+
];
|
|
338
|
+
this.idx = 0;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
start() {
|
|
342
|
+
if (this.timer) return;
|
|
343
|
+
const runStep = async () => {
|
|
344
|
+
if (this.isRotating || isShuttingDown) return;
|
|
345
|
+
this.isRotating = true;
|
|
346
|
+
try {
|
|
347
|
+
const step = this.sequence[this.idx % this.sequence.length];
|
|
348
|
+
await this._runOne(step.from, step.to);
|
|
349
|
+
this.idx++;
|
|
350
|
+
} catch (e) {
|
|
351
|
+
console.warn('[ROTATE] 步骤失败:', e.message);
|
|
352
|
+
} finally {
|
|
353
|
+
this.isRotating = false;
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
// 立即跑一次,再按周期跑
|
|
357
|
+
runStep();
|
|
358
|
+
this.timer = registerInterval(runStep, this.opts.cycleMs);
|
|
359
|
+
console.log(`[ROTATE] 轮换学习已启动:每${Math.round(this.opts.cycleMs / 60000)}分钟步进一次`);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
stop() {
|
|
363
|
+
if (this.timer) {
|
|
364
|
+
clearInterval(this.timer);
|
|
365
|
+
}
|
|
366
|
+
this.timer = null;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
_getCtrl(name) {
|
|
370
|
+
return name === 'A' ? this.ctrlA : (name === 'B' ? this.ctrlB : this.ctrlC);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async _runOne(fromName, toName) {
|
|
374
|
+
const fromCtrl = this._getCtrl(fromName);
|
|
375
|
+
const toCtrl = this._getCtrl(toName);
|
|
376
|
+
if (!fromCtrl || !toCtrl) return;
|
|
377
|
+
|
|
378
|
+
if (fromCtrl.isLearning || toCtrl.isLearning) {
|
|
379
|
+
console.log(`[ROTATE] ${fromName} 或 ${toName} 正在学习,跳过本步`);
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
console.log(`[ROTATE] ${fromName} -> ${toName} 开始克隆与训练(离线)`);
|
|
384
|
+
|
|
385
|
+
// 1) 识别模式 + 克隆
|
|
386
|
+
const association = new AssociationLayer(fromCtrl.runtime);
|
|
387
|
+
association.patterns = association.identifyPatterns();
|
|
388
|
+
const systemClone = await association.cloneSystem();
|
|
389
|
+
systemClone.spider = fromCtrl.runtime.spider;
|
|
390
|
+
association.applyPatternsToClone(systemClone);
|
|
391
|
+
|
|
392
|
+
// 2) 预热克隆(确保索引、边权等就绪)
|
|
393
|
+
const sampleWords = Array.from(new Set(systemClone.vocabManager.vocab.slice(4, 64))).filter(w => w.length > 1).slice(0, 16);
|
|
394
|
+
if (sampleWords.length) {
|
|
395
|
+
try {
|
|
396
|
+
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
397
|
+
} catch (_) { }
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// 3) 在克隆上训练而非在线副本
|
|
401
|
+
try {
|
|
402
|
+
await systemClone.startSelfLearning(this.opts.learnIters);
|
|
403
|
+
} catch (e) {
|
|
404
|
+
console.warn('[ROTATE] 克隆训练失败:', e.message);
|
|
405
|
+
return;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// 4) 评估:对比 from 与 clone
|
|
409
|
+
const { origScore, cloneScore } = await this._evaluateTwo(fromCtrl.runtime, systemClone);
|
|
410
|
+
const improve = origScore > 0 ? (cloneScore - origScore) / origScore : (cloneScore > 0 ? 1 : 0);
|
|
411
|
+
|
|
412
|
+
console.log(`[ROTATE] 评估:orig=${origScore.toFixed(4)} clone=${cloneScore.toFixed(4)} improve=${(improve * 100).toFixed(2)}%`);
|
|
413
|
+
|
|
414
|
+
if (improve >= this.opts.minImprove) {
|
|
415
|
+
// 5) 提交:原子切换 toCtrl.runtime = clone
|
|
416
|
+
toCtrl.updateRuntime(systemClone);
|
|
417
|
+
console.log(`[ROTATE] 已将 ${fromName} 的成果切换到 ${toName}`);
|
|
418
|
+
|
|
419
|
+
// 如切到 A,发布到 Redis
|
|
420
|
+
if (toName === 'A') {
|
|
421
|
+
await publishRuntimeToRedis(systemClone);
|
|
422
|
+
}
|
|
423
|
+
} else {
|
|
424
|
+
console.log('[ROTATE] 改进不足,放弃本次提交');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// 6) 冷却
|
|
428
|
+
await new Promise(r => setTimeout(r, this.opts.cooldownMs));
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async _evaluateTwo(baseRuntime, cloneRuntime) {
|
|
432
|
+
try {
|
|
433
|
+
// 复用 RL 的评估数据来源与方法
|
|
434
|
+
const rl = new ReinforcementLearner(baseRuntime);
|
|
435
|
+
const articles = rl.loadTestArticles();
|
|
436
|
+
if (!articles.length) {
|
|
437
|
+
// 无测试集:退化为用词表覆盖率近似
|
|
438
|
+
const s1 = baseRuntime.vocabManager.getsize();
|
|
439
|
+
const s2 = cloneRuntime.vocabManager.getsize();
|
|
440
|
+
return { origScore: s1, cloneScore: s2 };
|
|
441
|
+
}
|
|
442
|
+
let origCoverages = 0;
|
|
443
|
+
let cloneCoverages = 0;
|
|
444
|
+
for (const article of articles) {
|
|
445
|
+
origCoverages += rl.evaluateSystem(baseRuntime, article);
|
|
446
|
+
cloneCoverages += rl.evaluateSystem(cloneRuntime, article);
|
|
447
|
+
}
|
|
448
|
+
// 使用“总和-方差的平方根”的稳健度量(与 RL.learn 相同思想)
|
|
449
|
+
const avgOrig = origCoverages;
|
|
450
|
+
const avgClone = cloneCoverages;
|
|
451
|
+
return { origScore: avgOrig, cloneScore: avgClone };
|
|
452
|
+
} catch (e) {
|
|
453
|
+
console.warn('[ROTATE] 评估失败,回退到零分:', e.message);
|
|
454
|
+
return { origScore: 0, cloneScore: 0 };
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
// ...existing code...
|
|
57
459
|
async function batchAddPoints(graph, pointsArr, batchSize = 500) {
|
|
58
460
|
if (!graph || !pointsArr || !Array.isArray(pointsArr)) {
|
|
59
461
|
console.error('[BATCH] 无效的参数:', {
|
|
@@ -79,6 +481,7 @@ async function batchAddPoints(graph, pointsArr, batchSize = 500) {
|
|
|
79
481
|
await new Promise(resolve => setImmediate(resolve));
|
|
80
482
|
}
|
|
81
483
|
}
|
|
484
|
+
/*
|
|
82
485
|
// 反触发机制
|
|
83
486
|
async function antiTrigger(onContinue, onExit) {
|
|
84
487
|
const PORT = global.config.emitExitport || 8641;
|
|
@@ -145,6 +548,7 @@ async function antiTrigger(onContinue, onExit) {
|
|
|
145
548
|
}, 10000);
|
|
146
549
|
}
|
|
147
550
|
}
|
|
551
|
+
*/
|
|
148
552
|
function verifySystemConsistency(sourceRuntime, targetRuntime) {
|
|
149
553
|
console.log('[VERIFY] 开始验证系统一致性...');
|
|
150
554
|
console.log(`[VERIFY] 源词表大小: ${sourceRuntime.vocabManager.vocab.length}, 目标: ${targetRuntime.vocabManager.vocab.length}`);
|
|
@@ -163,12 +567,104 @@ const readline = require('readline');
|
|
|
163
567
|
|
|
164
568
|
app.use(bodyParser.json());
|
|
165
569
|
app.use(bodyParser.urlencoded({ extended: true }));
|
|
570
|
+
// ...existing code...
|
|
166
571
|
app.use(express.static(path.join(__dirname, 'public')));
|
|
167
572
|
|
|
168
573
|
// 持久化路径
|
|
169
574
|
const SAVE_PATH = path.join(__dirname, 'runtime_data.json');
|
|
170
575
|
|
|
171
|
-
|
|
576
|
+
// 新增:会话管理器(以对话为尺度管理记忆)
|
|
577
|
+
class SessionManager {
|
|
578
|
+
constructor({ idleMs = 10 * 60 * 1000, maxSessions = 200 } = {}) {
|
|
579
|
+
this.idleMs = idleMs; // 会话空闲超时,自动切新会话
|
|
580
|
+
this.maxSessions = maxSessions; // 最近保留的会话数量上限(用于遗忘窗口)
|
|
581
|
+
this.sessions = new Map(); // sessionId -> { start, lastActivity, messageCount }
|
|
582
|
+
this.current = null;
|
|
583
|
+
}
|
|
584
|
+
now() { return Date.now(); }
|
|
585
|
+
_newId() { return `S${Date.now()}_${Math.floor(Math.random() * 1e6)}`; }
|
|
586
|
+
|
|
587
|
+
startNewSession(meta = {}) {
|
|
588
|
+
const id = this._newId();
|
|
589
|
+
const ts = this.now();
|
|
590
|
+
this.sessions.set(id, {
|
|
591
|
+
start: ts,
|
|
592
|
+
lastActivity: ts,
|
|
593
|
+
messageCount: 0,
|
|
594
|
+
...meta
|
|
595
|
+
});
|
|
596
|
+
this.current = id;
|
|
597
|
+
this._truncateIfNeeded();
|
|
598
|
+
return id;
|
|
599
|
+
}
|
|
600
|
+
ensureActive() {
|
|
601
|
+
const ts = this.now();
|
|
602
|
+
if (!this.current || !this.sessions.has(this.current)) {
|
|
603
|
+
return this.startNewSession();
|
|
604
|
+
}
|
|
605
|
+
const s = this.sessions.get(this.current);
|
|
606
|
+
if (ts - s.lastActivity > this.idleMs) {
|
|
607
|
+
return this.startNewSession({ reason: 'idle-timeout' });
|
|
608
|
+
}
|
|
609
|
+
return this.current;
|
|
610
|
+
}
|
|
611
|
+
useSession(sessionId) {
|
|
612
|
+
if (!sessionId) return this.ensureActive();
|
|
613
|
+
if (!this.sessions.has(sessionId)) {
|
|
614
|
+
const ts = this.now();
|
|
615
|
+
this.sessions.set(sessionId, { start: ts, lastActivity: ts, messageCount: 0 });
|
|
616
|
+
}
|
|
617
|
+
this.current = sessionId;
|
|
618
|
+
return sessionId;
|
|
619
|
+
}
|
|
620
|
+
touch(sessionId = this.current) {
|
|
621
|
+
if (!sessionId) return;
|
|
622
|
+
const s = this.sessions.get(sessionId);
|
|
623
|
+
if (s) s.lastActivity = this.now();
|
|
624
|
+
}
|
|
625
|
+
incMessage(sessionId = this.current) {
|
|
626
|
+
if (!sessionId) return;
|
|
627
|
+
const s = this.sessions.get(sessionId);
|
|
628
|
+
if (s) {
|
|
629
|
+
s.messageCount = (s.messageCount || 0) + 1;
|
|
630
|
+
s.lastActivity = this.now();
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
getActiveSessionId() {
|
|
634
|
+
return this.ensureActive();
|
|
635
|
+
}
|
|
636
|
+
// 返回最近的会话ID(按 lastActivity 降序)
|
|
637
|
+
getRecentSessionIds(limit = this.maxSessions) {
|
|
638
|
+
const entries = Array.from(this.sessions.entries());
|
|
639
|
+
entries.sort((a, b) => (b[1].lastActivity || 0) - (a[1].lastActivity || 0));
|
|
640
|
+
return entries.slice(0, limit).map(([id]) => id);
|
|
641
|
+
}
|
|
642
|
+
_truncateIfNeeded() {
|
|
643
|
+
const ids = this.getRecentSessionIds(this.maxSessions);
|
|
644
|
+
const keep = new Set(ids);
|
|
645
|
+
for (const id of this.sessions.keys()) {
|
|
646
|
+
if (!keep.has(id)) this.sessions.delete(id);
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
export() {
|
|
650
|
+
return {
|
|
651
|
+
config: { idleMs: this.idleMs, maxSessions: this.maxSessions },
|
|
652
|
+
current: this.current,
|
|
653
|
+
sessions: Array.from(this.sessions.entries())
|
|
654
|
+
};
|
|
655
|
+
}
|
|
656
|
+
import(obj) {
|
|
657
|
+
if (!obj) return;
|
|
658
|
+
const { config, current, sessions } = obj;
|
|
659
|
+
if (config) {
|
|
660
|
+
this.idleMs = config.idleMs ?? this.idleMs;
|
|
661
|
+
this.maxSessions = config.maxSessions ?? this.maxSessions;
|
|
662
|
+
}
|
|
663
|
+
this.sessions = new Map(Array.isArray(sessions) ? sessions : []);
|
|
664
|
+
this.current = current || null;
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
// ...existing code...
|
|
172
668
|
|
|
173
669
|
|
|
174
670
|
class SnapshotManager {
|
|
@@ -206,7 +702,6 @@ class SnapshotManager {
|
|
|
206
702
|
}
|
|
207
703
|
|
|
208
704
|
async createSnapshot(name = 'auto') {
|
|
209
|
-
// 防止并发创建
|
|
210
705
|
if (this.isCreatingSnapshot) {
|
|
211
706
|
console.log('[SNAPSHOT] 另一个快照正在创建中,跳过');
|
|
212
707
|
return null;
|
|
@@ -222,31 +717,39 @@ class SnapshotManager {
|
|
|
222
717
|
|
|
223
718
|
console.log(`[SNAPSHOT] 开始创建快照: ${snapshotId}`);
|
|
224
719
|
|
|
225
|
-
//
|
|
720
|
+
// 优先使用分区图的全量导出(避免仅导出窗口)
|
|
721
|
+
let memesAll = [];
|
|
722
|
+
if (this.runtime.graph && typeof this.runtime.graph.exportAllPoints === 'function') {
|
|
723
|
+
try {
|
|
724
|
+
memesAll = await this.runtime.graph.exportAllPoints();
|
|
725
|
+
} catch (e) {
|
|
726
|
+
console.warn('[SNAPSHOT] 分区图导出失败,回退窗口:', e.message);
|
|
727
|
+
memesAll = this.runtime.graph.getAllPoints();
|
|
728
|
+
}
|
|
729
|
+
} else {
|
|
730
|
+
memesAll = this.runtime.graph.getAllPoints();
|
|
731
|
+
}
|
|
732
|
+
|
|
226
733
|
const snapshotData = {
|
|
227
734
|
id: snapshotId,
|
|
228
735
|
timestamp,
|
|
229
736
|
name,
|
|
230
737
|
createDate: new Date().toISOString(),
|
|
231
|
-
memes:
|
|
738
|
+
memes: memesAll,
|
|
232
739
|
wordGraph: Array.from(this.runtime.wordGraph.points.values()),
|
|
233
740
|
kvm: Array.from(this.runtime.kvm.memory.entries()),
|
|
234
741
|
vocab: this.runtime.vocabManager.vocab,
|
|
235
|
-
wordAccessLog: Array.from(this.runtime.wordAccessLog
|
|
742
|
+
wordAccessLog: Array.from(this.runtime.wordAccessLog.entries()).map(([w, per]) =>
|
|
743
|
+
[w, per instanceof Map ? Array.from(per.entries()) : (Array.isArray(per) ? [['legacy', per.length]] : [])]
|
|
744
|
+
),
|
|
745
|
+
sessions: this.runtime.session.export()
|
|
236
746
|
};
|
|
237
747
|
|
|
238
|
-
// 写入临时文件,然后原子重命名以确保数据完整性
|
|
239
748
|
const tempPath = `${filePath}.temp`;
|
|
240
749
|
await fs.promises.writeFile(tempPath, JSON.stringify(snapshotData), 'utf-8');
|
|
241
750
|
await fs.promises.rename(tempPath, filePath);
|
|
242
751
|
|
|
243
|
-
|
|
244
|
-
const snapshotInfo = {
|
|
245
|
-
id: snapshotId,
|
|
246
|
-
timestamp,
|
|
247
|
-
name,
|
|
248
|
-
path: filePath
|
|
249
|
-
};
|
|
752
|
+
const snapshotInfo = { id: snapshotId, timestamp, name, path: filePath };
|
|
250
753
|
this.snapshotList.unshift(snapshotInfo);
|
|
251
754
|
|
|
252
755
|
console.timeEnd('snapshotCreation');
|
|
@@ -260,11 +763,11 @@ class SnapshotManager {
|
|
|
260
763
|
}
|
|
261
764
|
}
|
|
262
765
|
|
|
766
|
+
|
|
263
767
|
async restoreSnapshot(snapshotId) {
|
|
264
768
|
console.log(`[SNAPSHOT] 开始从快照恢复: ${snapshotId}`);
|
|
265
769
|
console.time('snapshotRestore');
|
|
266
770
|
|
|
267
|
-
// 查找快照
|
|
268
771
|
const snapshot = this.snapshotList.find(s => s.id === snapshotId);
|
|
269
772
|
if (!snapshot) {
|
|
270
773
|
console.error(`[SNAPSHOT] 快照不存在: ${snapshotId}`);
|
|
@@ -272,37 +775,27 @@ class SnapshotManager {
|
|
|
272
775
|
}
|
|
273
776
|
|
|
274
777
|
try {
|
|
275
|
-
// 读取快照文件
|
|
276
|
-
console.log(`[SNAPSHOT] 从文件读取数据: ${snapshot.path}`);
|
|
277
778
|
const dataStr = await fs.promises.readFile(snapshot.path, 'utf-8');
|
|
278
779
|
const data = JSON.parse(dataStr);
|
|
279
780
|
|
|
280
|
-
// 在恢复前创建自动备份
|
|
281
781
|
await this.createSnapshot(`auto_before_restore_${snapshotId}`);
|
|
282
782
|
|
|
283
|
-
//
|
|
284
|
-
console.log('[SNAPSHOT] 清空当前运行时...');
|
|
285
|
-
this.runtime.graph = new GraphDB();
|
|
783
|
+
// 清空当前运行时(词图/KVM 内存)
|
|
286
784
|
this.runtime.wordGraph = new GraphDB();
|
|
287
785
|
this.runtime.kvm = new KVM();
|
|
288
786
|
this.runtime.wordAccessLog = new Map();
|
|
289
787
|
|
|
290
|
-
//
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
this.runtime.graph.addPoint(point.pointID, point.connect);
|
|
298
|
-
}
|
|
299
|
-
// 让事件循环有机会处理其他事件
|
|
300
|
-
await new Promise(resolve => setImmediate(resolve));
|
|
788
|
+
// 恢复模因图:走分区导入(覆盖分区存储)
|
|
789
|
+
if (data.memes && this.runtime.graph && typeof this.runtime.graph.importAllPoints === 'function') {
|
|
790
|
+
await this.runtime.graph.importAllPoints(data.memes);
|
|
791
|
+
} else if (data.memes) {
|
|
792
|
+
// 窗口回退(不推荐)
|
|
793
|
+
for (const point of data.memes) {
|
|
794
|
+
await this.runtime.graph.addPoint(point.pointID, point.connect);
|
|
301
795
|
}
|
|
302
796
|
}
|
|
303
797
|
|
|
304
798
|
// 恢复词图
|
|
305
|
-
console.log('[SNAPSHOT] 恢复词语网络...');
|
|
306
799
|
if (data.wordGraph) {
|
|
307
800
|
const BATCH_SIZE = 1000;
|
|
308
801
|
for (let i = 0; i < data.wordGraph.length; i += BATCH_SIZE) {
|
|
@@ -315,29 +808,39 @@ class SnapshotManager {
|
|
|
315
808
|
}
|
|
316
809
|
|
|
317
810
|
// 恢复KVM
|
|
318
|
-
console.log('[SNAPSHOT] 恢复键值存储...');
|
|
319
811
|
if (data.kvm) {
|
|
320
812
|
const BATCH_SIZE = 1000;
|
|
321
813
|
for (let i = 0; i < data.kvm.length; i += BATCH_SIZE) {
|
|
322
814
|
const batch = data.kvm.slice(i, i + BATCH_SIZE);
|
|
323
|
-
for (const [k, v] of batch)
|
|
324
|
-
this.runtime.kvm.set(k, v);
|
|
325
|
-
}
|
|
815
|
+
for (const [k, v] of batch) this.runtime.kvm.set(k, v);
|
|
326
816
|
await new Promise(resolve => setImmediate(resolve));
|
|
327
817
|
}
|
|
328
818
|
}
|
|
329
819
|
|
|
330
820
|
// 恢复词表
|
|
331
|
-
console.log('[SNAPSHOT] 恢复词表...');
|
|
332
821
|
if (data.vocab) {
|
|
333
822
|
this.runtime.vocabManager.vocab = data.vocab;
|
|
334
823
|
this.runtime.vocabManager.updateMappings();
|
|
335
824
|
}
|
|
336
825
|
|
|
337
826
|
// 恢复词访问日志
|
|
338
|
-
console.log('[SNAPSHOT] 恢复词访问日志...');
|
|
339
827
|
if (data.wordAccessLog) {
|
|
340
|
-
|
|
828
|
+
const restored = new Map();
|
|
829
|
+
for (const [word, per] of data.wordAccessLog) {
|
|
830
|
+
if (Array.isArray(per) && per.length > 0 && Array.isArray(per[0])) {
|
|
831
|
+
restored.set(word, new Map(per));
|
|
832
|
+
} else if (Array.isArray(per)) {
|
|
833
|
+
restored.set(word, new Map([['legacy', per.length]]));
|
|
834
|
+
} else {
|
|
835
|
+
restored.set(word, new Map());
|
|
836
|
+
}
|
|
837
|
+
}
|
|
838
|
+
this.runtime.wordAccessLog = restored;
|
|
839
|
+
}
|
|
840
|
+
if (data.sessions) {
|
|
841
|
+
this.runtime.session.import(data.sessions);
|
|
842
|
+
} else {
|
|
843
|
+
this.runtime.session.startNewSession({ reason: 'snapshot-legacy' });
|
|
341
844
|
}
|
|
342
845
|
|
|
343
846
|
console.timeEnd('snapshotRestore');
|
|
@@ -645,59 +1148,769 @@ class GraphDB {
|
|
|
645
1148
|
if (direction === 2 && current !== fromID) continue; // 只指向对方
|
|
646
1149
|
if (closedSet.has(neighborID)) continue;
|
|
647
1150
|
|
|
648
|
-
const tentativeGScore = (gScore.get(current) || Infinity) + weight;
|
|
649
|
-
if (!openSet.has(neighborID)) {
|
|
650
|
-
openSet.add(neighborID);
|
|
651
|
-
} else if (tentativeGScore >= (gScore.get(neighborID) || Infinity)) {
|
|
652
|
-
continue;
|
|
653
|
-
}
|
|
1151
|
+
const tentativeGScore = (gScore.get(current) || Infinity) + weight;
|
|
1152
|
+
if (!openSet.has(neighborID)) {
|
|
1153
|
+
openSet.add(neighborID);
|
|
1154
|
+
} else if (tentativeGScore >= (gScore.get(neighborID) || Infinity)) {
|
|
1155
|
+
continue;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
cameFrom.set(neighborID, current);
|
|
1159
|
+
gScore.set(neighborID, tentativeGScore);
|
|
1160
|
+
fScore.set(neighborID, tentativeGScore + this.heuristic(neighborID, toID));
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
return null;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// 获取所有点
|
|
1167
|
+
getAllPoints() {
|
|
1168
|
+
return Array.from(this.points.values());
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
// 启发式函数:简单的常数启发式
|
|
1172
|
+
heuristic(pointID, toID) {
|
|
1173
|
+
return 1; // 简化的启发式函数
|
|
1174
|
+
}
|
|
1175
|
+
existEdge(pointID, neighborID) {
|
|
1176
|
+
const point = this.points.get(pointID);
|
|
1177
|
+
if (!point) {
|
|
1178
|
+
return { exist: false, weight: undefined, type: undefined };
|
|
1179
|
+
}
|
|
1180
|
+
const connectArr = point.connect || [];
|
|
1181
|
+
const found = connectArr.find(([_, id]) => id === neighborID);
|
|
1182
|
+
return {
|
|
1183
|
+
exist: connectArr.some(([_, id]) => id === neighborID),
|
|
1184
|
+
weight: found ? found[0] : undefined,
|
|
1185
|
+
type: found ? found[2] : undefined
|
|
1186
|
+
};
|
|
1187
|
+
}
|
|
1188
|
+
existPoint(pointID) {
|
|
1189
|
+
return { exist: this.points.has(pointID), connect: this.points.get(pointID)?.connect || [] };
|
|
1190
|
+
}
|
|
1191
|
+
deleteEdge(pointID, neighborID) {
|
|
1192
|
+
if (this.existEdge(pointID, neighborID).exist) {
|
|
1193
|
+
this.points.get(pointID).connect = this.points.get(pointID).connect.filter(([_, id]) => id !== neighborID);
|
|
1194
|
+
this.points.get(neighborID).connect = this.points.get(neighborID).connect.filter(([_, id]) => id !== pointID);
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
deletePoint(pointID) {
|
|
1198
|
+
if (this.existPoint(pointID).exist) {
|
|
1199
|
+
this.points.delete(pointID);
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
// 简易日志辅助
|
|
1204
|
+
function logPart(...args) { console.log('[PART]', ...args); }
|
|
1205
|
+
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
1206
|
+
|
|
1207
|
+
// 存储适配层(FS/LMDB/Level 多后端,按需加载)
|
|
1208
|
+
class GraphStorageAdapter {
|
|
1209
|
+
constructor({ baseDir, backend = 'fs' } = {}) {
|
|
1210
|
+
this.baseDir = baseDir || path.join(__dirname, 'graph_parts');
|
|
1211
|
+
this.backend = backend;
|
|
1212
|
+
this.ready = false;
|
|
1213
|
+
|
|
1214
|
+
// 尝试创建目录
|
|
1215
|
+
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
1216
|
+
|
|
1217
|
+
// 可选依赖
|
|
1218
|
+
this.lmdb = null;
|
|
1219
|
+
this.level = null;
|
|
1220
|
+
|
|
1221
|
+
if (backend === 'lmdb') {
|
|
1222
|
+
try {
|
|
1223
|
+
this.lmdb = require('lmdb');
|
|
1224
|
+
} catch (e) {
|
|
1225
|
+
console.warn('[PART][ADAPTER] LMDB 不可用,降级为 FS:', e.message);
|
|
1226
|
+
this.backend = 'fs';
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
if (backend === 'level') {
|
|
1230
|
+
try {
|
|
1231
|
+
this.level = require('level');
|
|
1232
|
+
} catch (e) {
|
|
1233
|
+
console.warn('[PART][ADAPTER] level 不可用,降级为 FS:', e.message);
|
|
1234
|
+
this.backend = 'fs';
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
// 初始化后端
|
|
1239
|
+
this._initBackend();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
_initBackend() {
|
|
1243
|
+
if (this.backend === 'fs') {
|
|
1244
|
+
// FS: 每个分区一个 .jsonl(节点),边界事件一个独立 .jsonl
|
|
1245
|
+
this.ready = true;
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
if (this.backend === 'lmdb' && this.lmdb) {
|
|
1249
|
+
try {
|
|
1250
|
+
const storeDir = path.join(this.baseDir, 'lmdb');
|
|
1251
|
+
fs.mkdirSync(storeDir, { recursive: true });
|
|
1252
|
+
this.env = this.lmdb.open({
|
|
1253
|
+
path: storeDir,
|
|
1254
|
+
mapSize: 1024n * 1024n * 1024n * 64n,
|
|
1255
|
+
compression: true,
|
|
1256
|
+
});
|
|
1257
|
+
this.ready = true;
|
|
1258
|
+
} catch (e) {
|
|
1259
|
+
console.warn('[PART][ADAPTER] LMDB 初始化失败,降级 FS:', e.message);
|
|
1260
|
+
this.backend = 'fs';
|
|
1261
|
+
this.ready = true;
|
|
1262
|
+
}
|
|
1263
|
+
return;
|
|
1264
|
+
}
|
|
1265
|
+
if (this.backend === 'level' && this.level) {
|
|
1266
|
+
try {
|
|
1267
|
+
const dbDir = path.join(this.baseDir, 'leveldb');
|
|
1268
|
+
fs.mkdirSync(dbDir, { recursive: true });
|
|
1269
|
+
this.db = new this.level.Level(dbDir, { valueEncoding: 'json' });
|
|
1270
|
+
this.ready = true;
|
|
1271
|
+
} catch (e) {
|
|
1272
|
+
console.warn('[PART][ADAPTER] level 初始化失败,降级 FS:', e.message);
|
|
1273
|
+
this.backend = 'fs';
|
|
1274
|
+
this.ready = true;
|
|
1275
|
+
}
|
|
1276
|
+
return;
|
|
1277
|
+
}
|
|
1278
|
+
this.ready = true;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
// 分区文件名(FS)
|
|
1282
|
+
_partFile(pid) { return path.join(this.baseDir, `p_${pid}.jsonl`); }
|
|
1283
|
+
_eventFile(pid) { return path.join(this.baseDir, `p_${pid}.events.jsonl`); }
|
|
1284
|
+
|
|
1285
|
+
// 读取分区(返回 { points: Map<string,{pointID,connect:[]}> })
|
|
1286
|
+
async loadPartition(pid) {
|
|
1287
|
+
if (this.backend === 'fs') {
|
|
1288
|
+
const file = this._partFile(pid);
|
|
1289
|
+
const out = new Map();
|
|
1290
|
+
if (!fs.existsSync(file)) return { points: out };
|
|
1291
|
+
const rs = fs.createReadStream(file, { encoding: 'utf-8' });
|
|
1292
|
+
let buf = '';
|
|
1293
|
+
for await (const chunk of rs) {
|
|
1294
|
+
buf += chunk;
|
|
1295
|
+
let idx;
|
|
1296
|
+
while ((idx = buf.indexOf('\n')) >= 0) {
|
|
1297
|
+
const line = buf.slice(0, idx);
|
|
1298
|
+
buf = buf.slice(idx + 1);
|
|
1299
|
+
if (!line.trim()) continue;
|
|
1300
|
+
try {
|
|
1301
|
+
const obj = JSON.parse(line);
|
|
1302
|
+
if (obj && obj.pointID) {
|
|
1303
|
+
out.set(obj.pointID, { pointID: obj.pointID, connect: obj.connect || [] });
|
|
1304
|
+
}
|
|
1305
|
+
} catch { /* ignore */ }
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
return { points: out };
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1312
|
+
const points = new Map();
|
|
1313
|
+
const txn = this.env.beginTxn({ readOnly: true });
|
|
1314
|
+
try {
|
|
1315
|
+
const cursor = new this.lmdb.Cursors.Cursor(txn, this.env.openDB({ name: `p_${pid}`, create: true }));
|
|
1316
|
+
for (let found = cursor.goToFirst(); found; found = cursor.goToNext()) {
|
|
1317
|
+
const key = cursor.getCurrentString();
|
|
1318
|
+
const val = cursor.getCurrentBinary();
|
|
1319
|
+
try {
|
|
1320
|
+
const obj = JSON.parse(Buffer.from(val).toString('utf-8'));
|
|
1321
|
+
if (obj && obj.pointID) points.set(obj.pointID, obj);
|
|
1322
|
+
} catch { }
|
|
1323
|
+
}
|
|
1324
|
+
cursor.close();
|
|
1325
|
+
} catch { }
|
|
1326
|
+
txn.abort();
|
|
1327
|
+
return { points };
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
if (this.backend === 'level' && this.db) {
|
|
1331
|
+
const points = new Map();
|
|
1332
|
+
try {
|
|
1333
|
+
for await (const { key, value } of this.db.iterator({ gte: `p:${pid}:`, lt: `p:${pid};` })) {
|
|
1334
|
+
const obj = value;
|
|
1335
|
+
if (obj && obj.pointID) points.set(obj.pointID, obj);
|
|
1336
|
+
}
|
|
1337
|
+
} catch { }
|
|
1338
|
+
return { points };
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
return { points: new Map() };
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
// 保存分区(全量覆盖写)
|
|
1347
|
+
async savePartition(pid, pointsMap) {
|
|
1348
|
+
if (!(pointsMap instanceof Map)) return;
|
|
1349
|
+
if (this.backend === 'fs') {
|
|
1350
|
+
const file = this._partFile(pid);
|
|
1351
|
+
const tmp = `${file}.tmp`;
|
|
1352
|
+
const ws = fs.createWriteStream(tmp, { encoding: 'utf-8' });
|
|
1353
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1354
|
+
ws.write(JSON.stringify({ pointID: p.pointID, connect: p.connect || [] }) + '\n');
|
|
1355
|
+
}
|
|
1356
|
+
await new Promise((res, rej) => ws.end(res));
|
|
1357
|
+
await fs.promises.rename(tmp, file);
|
|
1358
|
+
return;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1362
|
+
const dbi = this.env.openDB({ name: `p_${pid}`, create: true });
|
|
1363
|
+
const txn = this.env.beginTxn();
|
|
1364
|
+
try {
|
|
1365
|
+
// 先清空:简化实现
|
|
1366
|
+
const cur = new this.lmdb.Cursors.Cursor(txn, dbi);
|
|
1367
|
+
for (let found = cur.goToFirst(); found; found = cur.goToNext()) {
|
|
1368
|
+
const k = cur.getCurrentString();
|
|
1369
|
+
txn.del(dbi, k);
|
|
1370
|
+
}
|
|
1371
|
+
cur.close();
|
|
1372
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1373
|
+
txn.put(dbi, p.pointID, JSON.stringify(p));
|
|
1374
|
+
}
|
|
1375
|
+
txn.commit();
|
|
1376
|
+
} catch (e) {
|
|
1377
|
+
try { txn.abort(); } catch { }
|
|
1378
|
+
console.warn('[PART][ADAPTER][LMDB] savePartition err:', e.message);
|
|
1379
|
+
}
|
|
1380
|
+
return;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
if (this.backend === 'level' && this.db) {
|
|
1384
|
+
const ops = [];
|
|
1385
|
+
// 简化:清理旧 key 不容易,直接覆盖同 key
|
|
1386
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1387
|
+
ops.push({ type: 'put', key: `p:${pid}:${p.pointID}`, value: p });
|
|
1388
|
+
}
|
|
1389
|
+
await this.db.batch(ops);
|
|
1390
|
+
return;
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
// 追加边界事件(跨分区边)
|
|
1395
|
+
async appendEdgeEvent(pid, event) {
|
|
1396
|
+
if (!event || !event.type) return;
|
|
1397
|
+
if (this.backend === 'fs') {
|
|
1398
|
+
const file = this._eventFile(pid);
|
|
1399
|
+
fs.appendFileSync(file, JSON.stringify(event) + '\n', 'utf-8');
|
|
1400
|
+
return;
|
|
1401
|
+
}
|
|
1402
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1403
|
+
const dbi = this.env.openDB({ name: `e_${pid}`, create: true });
|
|
1404
|
+
const txn = this.env.beginTxn();
|
|
1405
|
+
try {
|
|
1406
|
+
const key = `e:${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
|
|
1407
|
+
txn.put(dbi, key, JSON.stringify(event));
|
|
1408
|
+
txn.commit();
|
|
1409
|
+
} catch (e) {
|
|
1410
|
+
try { txn.abort(); } catch { }
|
|
1411
|
+
}
|
|
1412
|
+
return;
|
|
1413
|
+
}
|
|
1414
|
+
if (this.backend === 'level' && this.db) {
|
|
1415
|
+
const key = `e:${pid}:${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
|
|
1416
|
+
await this.db.put(key, event);
|
|
1417
|
+
return;
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
// 读取并消费边界事件(与该分区相关的)
|
|
1422
|
+
async consumeEdgeEvents(pid, filterFn = null, limit = 2000) {
|
|
1423
|
+
const events = [];
|
|
1424
|
+
if (this.backend === 'fs') {
|
|
1425
|
+
const file = this._eventFile(pid);
|
|
1426
|
+
if (!fs.existsSync(file)) return events;
|
|
1427
|
+
|
|
1428
|
+
const tmp = `${file}.tmp`;
|
|
1429
|
+
// 将不消费的事件写入 tmp,再覆盖原文件;已消费事件返回
|
|
1430
|
+
const lines = fs.readFileSync(file, 'utf-8').split(/\r?\n/).filter(Boolean);
|
|
1431
|
+
const remain = [];
|
|
1432
|
+
for (const line of lines) {
|
|
1433
|
+
try {
|
|
1434
|
+
const e = JSON.parse(line);
|
|
1435
|
+
const ok = filterFn ? filterFn(e) : true;
|
|
1436
|
+
if (ok && events.length < limit) {
|
|
1437
|
+
events.push(e);
|
|
1438
|
+
} else {
|
|
1439
|
+
remain.push(line);
|
|
1440
|
+
}
|
|
1441
|
+
} catch {
|
|
1442
|
+
remain.push(line);
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
fs.writeFileSync(tmp, remain.join('\n') + (remain.length ? '\n' : ''), 'utf-8');
|
|
1446
|
+
await fs.promises.rename(tmp, file);
|
|
1447
|
+
return events;
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1451
|
+
const dbi = this.env.openDB({ name: `e_${pid}`, create: true });
|
|
1452
|
+
const txn = this.env.beginTxn();
|
|
1453
|
+
const toDel = [];
|
|
1454
|
+
try {
|
|
1455
|
+
const cur = new this.lmdb.Cursors.Cursor(txn, dbi);
|
|
1456
|
+
for (let found = cur.goToFirst(); found; found = cur.goToNext()) {
|
|
1457
|
+
const k = cur.getCurrentString();
|
|
1458
|
+
const v = cur.getCurrentBinary();
|
|
1459
|
+
const e = JSON.parse(Buffer.from(v).toString('utf-8'));
|
|
1460
|
+
const ok = filterFn ? filterFn(e) : true;
|
|
1461
|
+
if (ok && events.length < limit) {
|
|
1462
|
+
events.push(e);
|
|
1463
|
+
toDel.push(k);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
cur.close();
|
|
1467
|
+
for (const k of toDel) txn.del(dbi, k);
|
|
1468
|
+
txn.commit();
|
|
1469
|
+
} catch (e) {
|
|
1470
|
+
try { txn.abort(); } catch { }
|
|
1471
|
+
}
|
|
1472
|
+
return events;
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
if (this.backend === 'level' && this.db) {
|
|
1476
|
+
// 简化:扫描全库 keys 读取该 pid 的事件
|
|
1477
|
+
try {
|
|
1478
|
+
const toDel = [];
|
|
1479
|
+
for await (const { key, value } of this.db.iterator({ gte: `e:${pid}:`, lt: `e:${pid};` })) {
|
|
1480
|
+
const ok = filterFn ? filterFn(value) : true;
|
|
1481
|
+
if (ok && events.length < limit) {
|
|
1482
|
+
events.push(value);
|
|
1483
|
+
toDel.push(key);
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
// 删除已消费
|
|
1487
|
+
const ops = toDel.map(k => ({ type: 'del', key: k }));
|
|
1488
|
+
if (ops.length) await this.db.batch(ops);
|
|
1489
|
+
} catch { }
|
|
1490
|
+
return events;
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
return events;
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
// 枚举所有分区 ID(FS 模式)
|
|
1497
|
+
async listPartitionIds() {
|
|
1498
|
+
if (this.backend === 'fs') {
|
|
1499
|
+
const files = fs.readdirSync(this.baseDir).filter(f => /^p_\d+\.jsonl$/.test(f));
|
|
1500
|
+
const ids = files.map(f => Number(f.match(/^p_(\d+)\.jsonl$/)[1])).sort((a, b) => a - b);
|
|
1501
|
+
return ids;
|
|
1502
|
+
}
|
|
1503
|
+
// LMDB/level 不易列举,约定 0..N-1 尝试加载
|
|
1504
|
+
return [];
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
// 分区器(哈希 -> 分区ID)
|
|
1509
|
+
class GraphPartitioner {
|
|
1510
|
+
constructor({ partitions = 64 } = {}) {
|
|
1511
|
+
this.partitions = Math.max(4, partitions);
|
|
1512
|
+
}
|
|
1513
|
+
idOf(pointID) {
|
|
1514
|
+
if (!pointID) return 0;
|
|
1515
|
+
const h = crypto.createHash('sha1').update(String(pointID)).digest();
|
|
1516
|
+
// 使用前 4 字节构造 uint32
|
|
1517
|
+
const u32 = h.readUInt32BE(0);
|
|
1518
|
+
return u32 % this.partitions;
|
|
1519
|
+
}
|
|
1520
|
+
neighborsOf(pid, radius = 1) {
|
|
1521
|
+
const out = new Set([pid]);
|
|
1522
|
+
for (let r = 1; r <= radius; r++) {
|
|
1523
|
+
out.add((pid - r + this.partitions) % this.partitions);
|
|
1524
|
+
out.add((pid + r) % this.partitions);
|
|
1525
|
+
}
|
|
1526
|
+
return Array.from(out).sort((a, b) => a - b);
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
// 分区图 + 滑动窗口 + 边界事件消费
|
|
1530
|
+
class PartitionedGraphDB {
|
|
1531
|
+
constructor({
|
|
1532
|
+
partitions = 64,
|
|
1533
|
+
maxLoadedPartitions = 8,
|
|
1534
|
+
windowRadius = 1,
|
|
1535
|
+
baseDir = path.join(__dirname, 'graph_parts'),
|
|
1536
|
+
backend = 'fs'
|
|
1537
|
+
} = {}) {
|
|
1538
|
+
this.partitioner = new GraphPartitioner({ partitions });
|
|
1539
|
+
this.adapter = new GraphStorageAdapter({ baseDir, backend });
|
|
1540
|
+
this.maxLoadedPartitions = Math.max(2, maxLoadedPartitions);
|
|
1541
|
+
this.windowRadius = Math.max(0, windowRadius);
|
|
1542
|
+
|
|
1543
|
+
// 已加载分区:pid -> { points: Map, dirty, lastAccess }
|
|
1544
|
+
this.loaded = new Map();
|
|
1545
|
+
// 兼容旧代码:合并视图(仅包含已加载分区的点)
|
|
1546
|
+
this.points = new Map();
|
|
1547
|
+
// LRU
|
|
1548
|
+
this.accessTick = 0;
|
|
1549
|
+
this.centerPid = null;
|
|
1550
|
+
|
|
1551
|
+
// 并发保护
|
|
1552
|
+
this.loading = new Set();
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
// ---------- 内部:加载/保存/淘汰 ----------
|
|
1556
|
+
async ensureLoaded(pid) {
|
|
1557
|
+
if (this.loaded.has(pid)) {
|
|
1558
|
+
this._touch(pid);
|
|
1559
|
+
return this.loaded.get(pid);
|
|
1560
|
+
}
|
|
1561
|
+
if (this.loading.has(pid)) {
|
|
1562
|
+
// 等待已有加载完成
|
|
1563
|
+
while (this.loading.has(pid)) { await sleep(10); }
|
|
1564
|
+
return this.loaded.get(pid);
|
|
1565
|
+
}
|
|
1566
|
+
this.loading.add(pid);
|
|
1567
|
+
try {
|
|
1568
|
+
const part = await this.adapter.loadPartition(pid);
|
|
1569
|
+
const bundle = {
|
|
1570
|
+
points: part.points || new Map(),
|
|
1571
|
+
dirty: false,
|
|
1572
|
+
lastAccess: ++this.accessTick
|
|
1573
|
+
};
|
|
1574
|
+
this.loaded.set(pid, bundle);
|
|
1575
|
+
// 合并到全局视图
|
|
1576
|
+
for (const [id, p] of bundle.points.entries()) this.points.set(id, p);
|
|
1577
|
+
|
|
1578
|
+
// 消费边界事件:把指向本分区的事件落库
|
|
1579
|
+
const events = await this.adapter.consumeEdgeEvents(pid, (e) =>
|
|
1580
|
+
e && e.type === 'cross-edge' && (e.toPid === pid || e.fromPid === pid), 5000);
|
|
1581
|
+
if (events.length) {
|
|
1582
|
+
for (const e of events) this._applyEdgeEvent(bundle, e);
|
|
1583
|
+
bundle.dirty = true;
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
// 控制内存:若超容量,执行淘汰
|
|
1587
|
+
await this._evictIfNeeded();
|
|
1588
|
+
return bundle;
|
|
1589
|
+
} finally {
|
|
1590
|
+
this.loading.delete(pid);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
async savePartitionIfDirty(pid) {
|
|
1595
|
+
const entry = this.loaded.get(pid);
|
|
1596
|
+
if (!entry) return;
|
|
1597
|
+
if (!entry.dirty) return;
|
|
1598
|
+
await this.adapter.savePartition(pid, entry.points);
|
|
1599
|
+
entry.dirty = false;
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
async _evictIfNeeded() {
|
|
1603
|
+
if (this.loaded.size <= this.maxLoadedPartitions) return;
|
|
1604
|
+
// 淘汰最近最少访问的分区(除中心窗口)
|
|
1605
|
+
const avoid = new Set(this.partitioner.neighborsOf(this.centerPid ?? 0, this.windowRadius));
|
|
1606
|
+
// 构建按 lastAccess 升序
|
|
1607
|
+
const list = Array.from(this.loaded.entries())
|
|
1608
|
+
.filter(([pid]) => !avoid.has(pid))
|
|
1609
|
+
.sort((a, b) => a[1].lastAccess - b[1].lastAccess);
|
|
1610
|
+
while (this.loaded.size > this.maxLoadedPartitions && list.length) {
|
|
1611
|
+
const [pid, entry] = list.shift();
|
|
1612
|
+
await this.savePartitionIfDirty(pid);
|
|
1613
|
+
// 从全局视图移除
|
|
1614
|
+
for (const [id] of entry.points.entries()) this.points.delete(id);
|
|
1615
|
+
this.loaded.delete(pid);
|
|
1616
|
+
logPart('evicted partition', pid);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
_touch(pid) {
|
|
1621
|
+
const entry = this.loaded.get(pid);
|
|
1622
|
+
if (entry) entry.lastAccess = ++this.accessTick;
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
_applyEdgeEvent(targetBundle, e) {
|
|
1626
|
+
// 事件格式:{ type:'cross-edge', from:'id', to:'id', weight, direction, fromPid, toPid }
|
|
1627
|
+
if (!e || e.type !== 'cross-edge') return;
|
|
1628
|
+
const ensurePoint = (m, id) => {
|
|
1629
|
+
if (!m.has(id)) m.set(id, { pointID: id, connect: [] });
|
|
1630
|
+
return m.get(id);
|
|
1631
|
+
};
|
|
1632
|
+
const mp = targetBundle.points;
|
|
1633
|
+
const pFrom = ensurePoint(mp, e.from);
|
|
1634
|
+
const pTo = ensurePoint(mp, e.to);
|
|
1635
|
+
// 在 from 中落边(若 from 属于本分区)
|
|
1636
|
+
if (e.toPid === e.fromPid) {
|
|
1637
|
+
// 同分区事件(理论上不会在事件日志里)
|
|
1638
|
+
if (!pFrom.connect.some(([w, id, d]) => id === e.to && d === e.direction)) {
|
|
1639
|
+
pFrom.connect.push([e.weight, e.to, e.direction]);
|
|
1640
|
+
}
|
|
1641
|
+
} else {
|
|
1642
|
+
// 当前 bundle 即为 toPid 或 fromPid 的载体
|
|
1643
|
+
if (e.toPid === this.partitioner.idOf(pTo.pointID)) {
|
|
1644
|
+
// 对于目标分区,至少要保证可被 selectPath 遍历;保留边终点即可(可选:反向提示边)
|
|
1645
|
+
// 不在 pTo 里写边(避免双写),仅保证 from 的边会在 from 分区生效
|
|
1646
|
+
}
|
|
1647
|
+
if (e.fromPid === this.partitioner.idOf(pFrom.pointID)) {
|
|
1648
|
+
if (!pFrom.connect.some(([w, id, d]) => id === e.to && d === e.direction)) {
|
|
1649
|
+
pFrom.connect.push([e.weight, e.to, e.direction]);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
// ---------- 滑动窗口 ----------
|
|
1656
|
+
async focusOnPoint(pointID) {
|
|
1657
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1658
|
+
this.centerPid = pid;
|
|
1659
|
+
const toLoad = this.partitioner.neighborsOf(pid, this.windowRadius);
|
|
1660
|
+
for (const id of toLoad) await this.ensureLoaded(id);
|
|
1661
|
+
await this._evictIfNeeded();
|
|
1662
|
+
}
|
|
1663
|
+
|
|
1664
|
+
// ---------- 兼容 API:点/边 操作 ----------
|
|
1665
|
+
addPoint(pointID, connect = []) {
|
|
1666
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1667
|
+
const ensure = (bundle) => {
|
|
1668
|
+
if (!bundle.points.has(pointID)) bundle.points.set(pointID, { pointID, connect: [] });
|
|
1669
|
+
this.points.set(pointID, bundle.points.get(pointID));
|
|
1670
|
+
return bundle.points.get(pointID);
|
|
1671
|
+
};
|
|
1672
|
+
return this.ensureLoaded(pid).then(bundle => {
|
|
1673
|
+
const p = ensure(bundle);
|
|
1674
|
+
// 添加本地边;跨分区写事件
|
|
1675
|
+
for (const [w, nid, dir] of connect) this._addEdgeInternal(pid, p, w, nid, dir, bundle);
|
|
1676
|
+
bundle.dirty = true;
|
|
1677
|
+
});
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
_addEdgeInternal(fromPid, fromPoint, weight, toID, direction, bundleOfFrom) {
|
|
1681
|
+
const toPid = this.partitioner.idOf(toID);
|
|
1682
|
+
const w = (typeof weight === 'number' && isFinite(weight)) ? weight : 1;
|
|
1683
|
+
const d = (direction === 0 || direction === 1 || direction === 2) ? direction : 0;
|
|
1684
|
+
|
|
1685
|
+
if (toPid === fromPid) {
|
|
1686
|
+
// 同分区直接写
|
|
1687
|
+
if (!fromPoint.connect.some(([ww, id, dd]) => id === toID && dd === d)) {
|
|
1688
|
+
fromPoint.connect.push([w, toID, d]);
|
|
1689
|
+
bundleOfFrom.dirty = true;
|
|
1690
|
+
}
|
|
1691
|
+
} else {
|
|
1692
|
+
// 跨分区 -> 记录边界事件至 fromPid(或 toPid 都可,这里记录到 fromPid,toPid 加载时也会消费相关事件)
|
|
1693
|
+
this.adapter.appendEdgeEvent(fromPid, {
|
|
1694
|
+
type: 'cross-edge',
|
|
1695
|
+
from: fromPoint.pointID,
|
|
1696
|
+
to: toID,
|
|
1697
|
+
weight: w,
|
|
1698
|
+
direction: d,
|
|
1699
|
+
fromPid,
|
|
1700
|
+
toPid
|
|
1701
|
+
});
|
|
1702
|
+
// 同时对“已加载且包含 toPid 的 bundle”进行即时应用(若存在)
|
|
1703
|
+
const toBundle = this.loaded.get(toPid);
|
|
1704
|
+
if (toBundle) {
|
|
1705
|
+
// 在 from 分区已经写入 from->to 事件;对于 to 分区无需写边(避免双写),可选择记录提示(此处略)
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
addBidirectionalEdge(id1, id2, weight = 1) {
|
|
1711
|
+
return this.addEdge(id1, id2, weight, 0);
|
|
1712
|
+
}
|
|
1713
|
+
|
|
1714
|
+
async addEdge(fromID, toID, weight = 1, direction = 0) {
|
|
1715
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1716
|
+
const fromBundle = await this.ensureLoaded(fromPid);
|
|
1717
|
+
if (!fromBundle.points.has(fromID)) {
|
|
1718
|
+
fromBundle.points.set(fromID, { pointID: fromID, connect: [] });
|
|
1719
|
+
this.points.set(fromID, fromBundle.points.get(fromID));
|
|
1720
|
+
}
|
|
1721
|
+
const fromPoint = fromBundle.points.get(fromID);
|
|
1722
|
+
this._addEdgeInternal(fromPid, fromPoint, weight, toID, direction, fromBundle);
|
|
654
1723
|
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
|
|
1724
|
+
if (direction === 0) {
|
|
1725
|
+
// 双向边:反向写入
|
|
1726
|
+
const toPid = this.partitioner.idOf(toID);
|
|
1727
|
+
const toBundle = await this.ensureLoaded(toPid);
|
|
1728
|
+
if (!toBundle.points.has(toID)) {
|
|
1729
|
+
toBundle.points.set(toID, { pointID: toID, connect: [] });
|
|
1730
|
+
this.points.set(toID, toBundle.points.get(toID));
|
|
658
1731
|
}
|
|
1732
|
+
const toPoint = toBundle.points.get(toID);
|
|
1733
|
+
this._addEdgeInternal(toPid, toPoint, weight, fromID, 0, toBundle);
|
|
659
1734
|
}
|
|
660
|
-
return null;
|
|
661
1735
|
}
|
|
662
1736
|
|
|
663
|
-
|
|
1737
|
+
async updateEdge(fromID, toID, newWeight, direction = 0) {
|
|
1738
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1739
|
+
const b = await this.ensureLoaded(fromPid);
|
|
1740
|
+
const p = b.points.get(fromID);
|
|
1741
|
+
if (!p) return;
|
|
1742
|
+
const idx = p.connect.findIndex(([w, id, d]) => id === toID && d === direction);
|
|
1743
|
+
if (idx >= 0) {
|
|
1744
|
+
p.connect[idx][0] = newWeight;
|
|
1745
|
+
b.dirty = true;
|
|
1746
|
+
} else {
|
|
1747
|
+
// 不存在则添加
|
|
1748
|
+
this._addEdgeInternal(fromPid, p, newWeight, toID, direction, b);
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
1751
|
+
|
|
1752
|
+
existEdge(fromID, toID) {
|
|
1753
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1754
|
+
const entry = this.loaded.get(fromPid);
|
|
1755
|
+
if (!entry) return { exist: false, weight: undefined, type: undefined };
|
|
1756
|
+
const p = entry.points.get(fromID);
|
|
1757
|
+
if (!p) return { exist: false, weight: undefined, type: undefined };
|
|
1758
|
+
const found = p.connect.find(([w, id]) => id === toID);
|
|
1759
|
+
return { exist: !!found, weight: found ? found[0] : undefined, type: found ? found[2] : undefined };
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
existPoint(pointID) {
|
|
1763
|
+
// 仅检查已加载窗口
|
|
1764
|
+
const p = this.points.get(pointID);
|
|
1765
|
+
return { exist: !!p, connect: p ? p.connect : [] };
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
deleteEdge(a, b) {
|
|
1769
|
+
const pid = this.partitioner.idOf(a);
|
|
1770
|
+
const entry = this.loaded.get(pid);
|
|
1771
|
+
if (!entry) return;
|
|
1772
|
+
const p = entry.points.get(a);
|
|
1773
|
+
if (!p) return;
|
|
1774
|
+
const before = p.connect.length;
|
|
1775
|
+
p.connect = p.connect.filter(([_, id]) => id !== b);
|
|
1776
|
+
entry.dirty = entry.dirty || (p.connect.length !== before);
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
deletePoint(pointID) {
|
|
1780
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1781
|
+
const entry = this.loaded.get(pid);
|
|
1782
|
+
if (!entry) return;
|
|
1783
|
+
if (entry.points.has(pointID)) {
|
|
1784
|
+
entry.points.delete(pointID);
|
|
1785
|
+
this.points.delete(pointID);
|
|
1786
|
+
entry.dirty = true;
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
// 仅遍历窗口内点(兼容旧 getAllPoints 调用)
|
|
664
1791
|
getAllPoints() {
|
|
665
1792
|
return Array.from(this.points.values());
|
|
666
1793
|
}
|
|
667
1794
|
|
|
668
|
-
//
|
|
669
|
-
|
|
670
|
-
|
|
1795
|
+
// 导出全量点(跨所有分区),用于快照/发布
|
|
1796
|
+
async exportAllPoints() {
|
|
1797
|
+
const out = [];
|
|
1798
|
+
// 尝试枚举 FS 分区;其他后端可按 0..N-1 遍历或仅导出已加载窗口
|
|
1799
|
+
const ids = await this.adapter.listPartitionIds();
|
|
1800
|
+
if (ids.length === 0) {
|
|
1801
|
+
// 回退:导出窗口
|
|
1802
|
+
return this.getAllPoints();
|
|
1803
|
+
}
|
|
1804
|
+
for (const pid of ids) {
|
|
1805
|
+
const part = await this.adapter.loadPartition(pid);
|
|
1806
|
+
for (const [, p] of part.points.entries()) out.push({ pointID: p.pointID, connect: p.connect || [] });
|
|
1807
|
+
}
|
|
1808
|
+
return out;
|
|
671
1809
|
}
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
1810
|
+
|
|
1811
|
+
// 批量导入(将 legacy 点集落到分区)
|
|
1812
|
+
async importAllPoints(pointsArr) {
|
|
1813
|
+
if (!Array.isArray(pointsArr)) return;
|
|
1814
|
+
// 分桶
|
|
1815
|
+
const buckets = new Map();
|
|
1816
|
+
for (const p of pointsArr) {
|
|
1817
|
+
const pid = this.partitioner.idOf(p.pointID);
|
|
1818
|
+
if (!buckets.has(pid)) buckets.set(pid, new Map());
|
|
1819
|
+
const bm = buckets.get(pid);
|
|
1820
|
+
bm.set(p.pointID, { pointID: p.pointID, connect: Array.isArray(p.connect) ? p.connect.slice() : [] });
|
|
1821
|
+
}
|
|
1822
|
+
// 写入并更新窗口视图(懒加载)
|
|
1823
|
+
for (const [pid, map] of buckets.entries()) {
|
|
1824
|
+
await this.adapter.savePartition(pid, map);
|
|
1825
|
+
// 若已加载该分区,刷新内存镜像
|
|
1826
|
+
if (this.loaded.has(pid)) {
|
|
1827
|
+
const entry = this.loaded.get(pid);
|
|
1828
|
+
// 从全局视图移除旧
|
|
1829
|
+
for (const [id] of entry.points.entries()) this.points.delete(id);
|
|
1830
|
+
entry.points = map;
|
|
1831
|
+
entry.dirty = false;
|
|
1832
|
+
entry.lastAccess = ++this.accessTick;
|
|
1833
|
+
for (const [id, p] of map.entries()) this.points.set(id, p);
|
|
1834
|
+
}
|
|
676
1835
|
}
|
|
677
|
-
const connectArr = point.connect || [];
|
|
678
|
-
const found = connectArr.find(([_, id]) => id === neighborID);
|
|
679
|
-
return {
|
|
680
|
-
exist: connectArr.some(([_, id]) => id === neighborID),
|
|
681
|
-
weight: found ? found[0] : undefined,
|
|
682
|
-
type: found ? found[2] : undefined
|
|
683
|
-
};
|
|
684
1836
|
}
|
|
685
|
-
|
|
686
|
-
|
|
1837
|
+
|
|
1838
|
+
// 聚合邻居(窗口内),供传播使用
|
|
1839
|
+
getNeighbors(pointID, maxNeighbors = 50) {
|
|
1840
|
+
const p = this.points.get(pointID);
|
|
1841
|
+
if (!p) return [];
|
|
1842
|
+
return p.connect.slice(0, maxNeighbors);
|
|
687
1843
|
}
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
1844
|
+
|
|
1845
|
+
// A* 简化:仅在窗口内搜索;跳出窗口时,尝试预取邻接分区后再继续
|
|
1846
|
+
async selectPath(fromID, toID) {
|
|
1847
|
+
if (fromID === toID) return [fromID];
|
|
1848
|
+
// 优先保证焦点加载
|
|
1849
|
+
await this.focusOnPoint(fromID);
|
|
1850
|
+
|
|
1851
|
+
const reconstruct = (came, cur) => {
|
|
1852
|
+
const path = [];
|
|
1853
|
+
let t = cur;
|
|
1854
|
+
while (came.has(t)) { path.push(t); t = came.get(t); }
|
|
1855
|
+
path.push(fromID);
|
|
1856
|
+
return path.reverse();
|
|
1857
|
+
};
|
|
1858
|
+
|
|
1859
|
+
const open = new Set([fromID]);
|
|
1860
|
+
const came = new Map();
|
|
1861
|
+
const g = new Map([[fromID, 0]]);
|
|
1862
|
+
const f = new Map([[fromID, 1]]);
|
|
1863
|
+
const closed = new Set();
|
|
1864
|
+
|
|
1865
|
+
const heuristic = () => 1;
|
|
1866
|
+
let iter = 0;
|
|
1867
|
+
const MAX_ITERS = 5000;
|
|
1868
|
+
|
|
1869
|
+
while (open.size && iter++ < MAX_ITERS) {
|
|
1870
|
+
// 取 f 最小
|
|
1871
|
+
let cur = null; let minF = Infinity;
|
|
1872
|
+
for (const id of open) {
|
|
1873
|
+
const val = f.get(id) ?? Infinity;
|
|
1874
|
+
if (val < minF) { minF = val; cur = id; }
|
|
1875
|
+
}
|
|
1876
|
+
if (cur == null) break;
|
|
1877
|
+
if (cur === toID) return reconstruct(came, cur);
|
|
1878
|
+
|
|
1879
|
+
open.delete(cur);
|
|
1880
|
+
closed.add(cur);
|
|
1881
|
+
|
|
1882
|
+
// 若遇到未知点,尝试加载其分区(滑动窗口)
|
|
1883
|
+
if (!this.points.has(cur)) {
|
|
1884
|
+
await this.focusOnPoint(cur);
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1887
|
+
const neighbors = this.getNeighbors(cur, 50);
|
|
1888
|
+
// 如果邻居为空,尝试边界事件预取(根据邻居 ID 的分区预取)
|
|
1889
|
+
if (neighbors.length === 0) {
|
|
1890
|
+
const pid = this.partitioner.idOf(cur);
|
|
1891
|
+
const ring = this.partitioner.neighborsOf(pid, 1);
|
|
1892
|
+
for (const rid of ring) await this.ensureLoaded(rid);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
for (const [w, nb] of neighbors) {
|
|
1896
|
+
if (closed.has(nb)) continue;
|
|
1897
|
+
const tentative = (g.get(cur) || Infinity) + w;
|
|
1898
|
+
if (!open.has(nb)) open.add(nb);
|
|
1899
|
+
else if (tentative >= (g.get(nb) || Infinity)) continue;
|
|
1900
|
+
|
|
1901
|
+
came.set(nb, cur);
|
|
1902
|
+
g.set(nb, tentative);
|
|
1903
|
+
f.set(nb, tentative + heuristic());
|
|
1904
|
+
}
|
|
692
1905
|
}
|
|
1906
|
+
return null;
|
|
693
1907
|
}
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
1908
|
+
|
|
1909
|
+
// 刷盘所有已加载分区
|
|
1910
|
+
async flushAll() {
|
|
1911
|
+
for (const [pid] of this.loaded.entries()) await this.savePartitionIfDirty(pid);
|
|
698
1912
|
}
|
|
699
1913
|
}
|
|
700
|
-
|
|
701
1914
|
class KVM {
|
|
702
1915
|
constructor() {
|
|
703
1916
|
this.memory = new Map();
|
|
@@ -795,22 +2008,36 @@ class Runtime {
|
|
|
795
2008
|
// 运行时负责AI核心的调度、模因转换、信号传递与主流程控制
|
|
796
2009
|
constructor(config = {}) {
|
|
797
2010
|
this.config = config;
|
|
798
|
-
|
|
2011
|
+
// 使用分区图作为模因图;词图仍用内存图
|
|
2012
|
+
this.graph = new PartitionedGraphDB({
|
|
2013
|
+
partitions: this.config.partitions || 64,
|
|
2014
|
+
maxLoadedPartitions: this.config.maxLoadedPartitions || 8,
|
|
2015
|
+
windowRadius: this.config.windowRadius || 1,
|
|
2016
|
+
baseDir: path.join(__dirname, 'graph_parts'),
|
|
2017
|
+
backend: this.config.graphBackend || 'lmdb' // 可选 'fs' | 'lmdb' | 'level'
|
|
2018
|
+
});
|
|
799
2019
|
this.wordGraph = new GraphDB();
|
|
800
2020
|
this.kvm = new KVM();
|
|
801
2021
|
|
|
802
2022
|
this.transformer = null;
|
|
803
2023
|
this.vocabManager = global.vocabmanager;
|
|
804
2024
|
this.spider = new Spider();
|
|
2025
|
+
// 新:按需检索器(基于全局爬虫)
|
|
2026
|
+
this.researcher = new OnlineResearcher(this);
|
|
2027
|
+
// 新:以“会话”为尺度的访问日志与会话管理
|
|
2028
|
+
this.session = new SessionManager({
|
|
2029
|
+
idleMs: this.config.sessionIdleMs || 10 * 60 * 1000,
|
|
2030
|
+
maxSessions: this.config.memoryRecentSessions || 200
|
|
2031
|
+
});
|
|
2032
|
+
// Map<word, Map<sessionId, count>>
|
|
805
2033
|
this.wordAccessLog = new Map();
|
|
2034
|
+
this.config.spiderMix = this.config.spiderMix || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
806
2035
|
this.initWordGraph();
|
|
807
|
-
this.forgetTimer = setInterval(() => this.forgetWords(), 350 * 1000);
|
|
808
|
-
this.MAX_MEME_WORDS = 100;
|
|
809
|
-
this.MIN_OVERLAP = 2;
|
|
810
|
-
|
|
811
|
-
this.activationStats = new Map(); // 记录激活关系
|
|
2036
|
+
this.forgetTimer = setInterval(() => this.forgetWords(), 350 * 1000);
|
|
2037
|
+
this.MAX_MEME_WORDS = 100;
|
|
2038
|
+
this.MIN_OVERLAP = 2;
|
|
2039
|
+
this.activationStats = new Map();
|
|
812
2040
|
this.isclone = false;
|
|
813
|
-
// 添加系统资源监控
|
|
814
2041
|
this.systemLoad = {
|
|
815
2042
|
lastCpuUsage: process.cpuUsage(),
|
|
816
2043
|
lastCheckTime: Date.now(),
|
|
@@ -818,12 +2045,61 @@ class Runtime {
|
|
|
818
2045
|
batchSizeMultiplier: 1
|
|
819
2046
|
};
|
|
820
2047
|
this.memeBarrier = new memeBarrier(this);
|
|
2048
|
+
}
|
|
2049
|
+
// 新增:应用可调参数(含 spiderMix / decayK / maxLen 等)
|
|
2050
|
+
applyTunableParams(partial = {}) {
|
|
2051
|
+
this.config = this.config || {};
|
|
2052
|
+
if (partial.spiderMix) {
|
|
2053
|
+
const ow = Math.max(0, Math.min(1, Number(partial.spiderMix.onlineWeight ?? this.config.spiderMix.onlineWeight ?? 0.5)));
|
|
2054
|
+
this.config.spiderMix = { onlineWeight: ow, offlineWeight: Math.max(0, Math.min(1, 1 - ow)) };
|
|
2055
|
+
}
|
|
2056
|
+
if (typeof partial.decayK === 'number') this.config.decayK = Math.max(0.1, Math.min(2.0, partial.decayK));
|
|
2057
|
+
if (typeof partial.maxLen === 'number') this.config.maxLen = Math.max(8, Math.min(64, Math.round(partial.maxLen)));
|
|
2058
|
+
if (typeof partial.edgeWeight === 'number') {
|
|
2059
|
+
for (const p of this.graph.getAllPoints()) for (const e of p.connect) e[0] = Math.max(0.1, Math.min(5, partial.edgeWeight));
|
|
2060
|
+
}
|
|
2061
|
+
// 可选:调节 crawler 抓取强度(若存在)
|
|
2062
|
+
if (global.__crawler) {
|
|
2063
|
+
if (typeof partial.perQuery === 'number') global.__crawler.__tune_perQuery = Math.max(2, Math.min(16, Math.round(partial.perQuery)));
|
|
2064
|
+
if (typeof partial.maxCrawl === 'number') global.__crawler.__tune_maxCrawl = Math.max(2, Math.min(24, Math.round(partial.maxCrawl)));
|
|
2065
|
+
}
|
|
2066
|
+
return {
|
|
2067
|
+
decayK: this.config.decayK,
|
|
2068
|
+
maxLen: this.config.maxLen,
|
|
2069
|
+
spiderMix: this.config.spiderMix,
|
|
2070
|
+
crawler: {
|
|
2071
|
+
perQuery: global.__crawler?.__tune_perQuery ?? 8,
|
|
2072
|
+
maxCrawl: global.__crawler?.__tune_maxCrawl ?? 12
|
|
2073
|
+
}
|
|
2074
|
+
};
|
|
821
2075
|
}
|
|
822
2076
|
// 添加到Runtime类内部
|
|
823
2077
|
filterStopWords(words) {
|
|
824
2078
|
return words.filter(word => !STOP_WORDS.includes(word.toLowerCase()));
|
|
825
2079
|
}
|
|
826
|
-
|
|
2080
|
+
async ingestTextDocument(raw, { addNewWords = true, minLen = 8 } = {}) {
|
|
2081
|
+
if (!raw) return 0;
|
|
2082
|
+
// 去除 meta,正文在空行后
|
|
2083
|
+
const parts = String(raw).split(/\r?\n\r?\n/);
|
|
2084
|
+
const body = parts.length > 1 ? parts.slice(1).join('\n') : parts[0];
|
|
2085
|
+
const sentences = body.split(/\r?\n+/).map(s => s.trim()).filter(Boolean);
|
|
2086
|
+
let fed = 0;
|
|
2087
|
+
for (const line of sentences) {
|
|
2088
|
+
// 分词 -> 归一化 -> 停用词过滤 -> processInput
|
|
2089
|
+
const words = line
|
|
2090
|
+
.toLowerCase()
|
|
2091
|
+
.replace(/[^a-z\s\u4e00-\u9fa5]/g, ' ')
|
|
2092
|
+
.split(/\s+/)
|
|
2093
|
+
.filter(w => w.length >= 2);
|
|
2094
|
+
if (!words.length) continue;
|
|
2095
|
+
const normalized = this.spider ? this.spider.lemmatizeWords(words) : words;
|
|
2096
|
+
const filtered = this.filterStopWords ? this.filterStopWords(normalized) : normalized;
|
|
2097
|
+
if (filtered.length < minLen) continue;
|
|
2098
|
+
this.processInput(filtered, { addNewWords });
|
|
2099
|
+
fed++;
|
|
2100
|
+
}
|
|
2101
|
+
return fed;
|
|
2102
|
+
}
|
|
827
2103
|
// 新增资源监控方法
|
|
828
2104
|
monitorSystemLoad() {
|
|
829
2105
|
const now = Date.now();
|
|
@@ -858,12 +2134,12 @@ class Runtime {
|
|
|
858
2134
|
return this.systemLoad.batchSizeMultiplier;
|
|
859
2135
|
}
|
|
860
2136
|
// 清理定时器
|
|
2137
|
+
// 清理定时器/刷盘
|
|
861
2138
|
cleanup() {
|
|
862
|
-
if (this.forgetTimer)
|
|
863
|
-
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
this.memeBarrier.stop();
|
|
2139
|
+
if (this.forgetTimer) clearInterval(this.forgetTimer);
|
|
2140
|
+
if (this.memeBarrier) this.memeBarrier.stop();
|
|
2141
|
+
if (this.graph && this.graph.flushAll) {
|
|
2142
|
+
this.graph.flushAll().catch(() => { });
|
|
867
2143
|
}
|
|
868
2144
|
}
|
|
869
2145
|
// Runtime类中添加监控函数
|
|
@@ -872,7 +2148,7 @@ class Runtime {
|
|
|
872
2148
|
for (const meme of memes) {
|
|
873
2149
|
const words = this.kvm.get(meme.pointID) || [];
|
|
874
2150
|
if (words.length > this.MAX_MEME_WORDS * 0.8) { // 如果接近最大限制
|
|
875
|
-
|
|
2151
|
+
// console.log(`[MONITOR] 检测到大模因: ${meme.pointID}, 词数: ${words.length}`);
|
|
876
2152
|
this.splitMemeIfNeeded(meme.pointID); // 尝试分裂
|
|
877
2153
|
}
|
|
878
2154
|
}
|
|
@@ -926,12 +2202,14 @@ class Runtime {
|
|
|
926
2202
|
}
|
|
927
2203
|
|
|
928
2204
|
// 记录词语点被访问
|
|
929
|
-
logWordAccess(word) {
|
|
930
|
-
const
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
2205
|
+
logWordAccess(word, sessionId) {
|
|
2206
|
+
const sid = sessionId || this.session.getActiveSessionId();
|
|
2207
|
+
let perSession = this.wordAccessLog.get(word);
|
|
2208
|
+
if (!perSession) {
|
|
2209
|
+
perSession = new Map();
|
|
2210
|
+
this.wordAccessLog.set(word, perSession);
|
|
2211
|
+
}
|
|
2212
|
+
perSession.set(sid, (perSession.get(sid) || 0) + 1);
|
|
935
2213
|
}
|
|
936
2214
|
registerClone() {
|
|
937
2215
|
Runtime.cloneRegistry.add(this);
|
|
@@ -945,56 +2223,59 @@ class Runtime {
|
|
|
945
2223
|
this.kvm.memory.clear();
|
|
946
2224
|
if (this.wordAccessLog) this.wordAccessLog.clear();
|
|
947
2225
|
if (this.forgetTimer) clearInterval(this.forgetTimer);
|
|
948
|
-
// 其它属性也可清空
|
|
949
2226
|
}
|
|
2227
|
+
// 将遗忘策略改为“最近N个会话窗口”
|
|
950
2228
|
forgetWords() {
|
|
951
|
-
const now = Date.now();
|
|
952
|
-
const windowMs = 350 * 1000;
|
|
953
|
-
|
|
954
|
-
// 只保留窗口内访问
|
|
955
|
-
for (const [key, times] of this.wordAccessLog.entries()) {
|
|
956
|
-
const recent = times.filter(t => now - t <= windowMs);
|
|
957
|
-
this.wordAccessLog.set(key, recent);
|
|
958
|
-
}
|
|
959
|
-
|
|
960
2229
|
// 保护:收集所有被KVM引用的词
|
|
961
2230
|
const protectedWords = new Set();
|
|
962
2231
|
for (const [, words] of this.kvm.memory.entries()) {
|
|
963
|
-
if (Array.isArray(words))
|
|
964
|
-
for (const w of words) protectedWords.add(w);
|
|
965
|
-
}
|
|
2232
|
+
if (Array.isArray(words)) for (const w of words) protectedWords.add(w);
|
|
966
2233
|
}
|
|
967
2234
|
|
|
968
|
-
// 若词表过小,直接跳过遗忘,避免崩塌
|
|
969
2235
|
const vocabSize = this.vocabManager.vocab.length;
|
|
970
2236
|
if (vocabSize < 1000) {
|
|
971
2237
|
console.log('[FORGET] 词表过小,跳过本轮遗忘');
|
|
972
2238
|
return;
|
|
973
2239
|
}
|
|
974
2240
|
|
|
975
|
-
|
|
2241
|
+
const recentSessions = this.session.getRecentSessionIds(this.config.memoryRecentSessions || 200);
|
|
2242
|
+
const recentSet = new Set(recentSessions);
|
|
2243
|
+
|
|
976
2244
|
const stats = [];
|
|
977
|
-
for (const [word,
|
|
2245
|
+
for (const [word, perSession] of this.wordAccessLog.entries()) {
|
|
978
2246
|
if (!this.wordGraph.points.has(word)) continue;
|
|
979
2247
|
if (!this.vocabManager.word2idx.has(word)) continue;
|
|
980
2248
|
if (protectedWords.has(word)) continue;
|
|
981
|
-
|
|
2249
|
+
|
|
2250
|
+
// 统计最近会话窗口内的使用次数
|
|
2251
|
+
let count = 0;
|
|
2252
|
+
if (perSession instanceof Map) {
|
|
2253
|
+
for (const [sid, c] of perSession.entries()) {
|
|
2254
|
+
if (recentSet.has(sid)) count += c || 0;
|
|
2255
|
+
}
|
|
2256
|
+
} else if (Array.isArray(perSession)) {
|
|
2257
|
+
// 兼容旧格式(时间戳数组),视作一个遗留会话
|
|
2258
|
+
count = perSession.length;
|
|
2259
|
+
}
|
|
2260
|
+
stats.push({ word, count });
|
|
982
2261
|
}
|
|
983
2262
|
if (stats.length === 0) return;
|
|
984
2263
|
|
|
985
2264
|
stats.sort((a, b) => a.count - b.count);
|
|
986
2265
|
|
|
987
|
-
//
|
|
988
|
-
const maxForgetRate = 0.001;
|
|
2266
|
+
// 每轮最多遗忘 0.1%(更保守)
|
|
2267
|
+
const maxForgetRate = 0.001;
|
|
989
2268
|
const n = Math.max(1, Math.floor(stats.length * maxForgetRate));
|
|
2269
|
+
const toForget = stats.slice(0, n).filter(s => s.count === 0).map(s => s.word);
|
|
2270
|
+
|
|
2271
|
+
if (toForget.length === 0) return;
|
|
990
2272
|
|
|
991
|
-
const toForget = stats.slice(0, n).map(s => s.word);
|
|
992
2273
|
for (const word of toForget) {
|
|
993
2274
|
this.wordGraph.points.delete(word);
|
|
994
2275
|
this.vocabManager.vocab = this.vocabManager.vocab.filter(w => w !== word);
|
|
995
2276
|
this.vocabManager.updateMappings();
|
|
996
2277
|
this.wordAccessLog.delete(word);
|
|
997
|
-
console.log(`[FORGET]
|
|
2278
|
+
console.log(`[FORGET] 淘汰词语点(无最近会话使用): ${word}`);
|
|
998
2279
|
|
|
999
2280
|
// 同步清理所有模因节点的词表
|
|
1000
2281
|
for (const [memeID, words] of this.kvm.memory.entries()) {
|
|
@@ -1010,9 +2291,6 @@ class Runtime {
|
|
|
1010
2291
|
}
|
|
1011
2292
|
}
|
|
1012
2293
|
}
|
|
1013
|
-
|
|
1014
|
-
// 不在这里调用 runMainLoop,避免循环内触发再次遗忘
|
|
1015
|
-
// this.runMainLoop();
|
|
1016
2294
|
}
|
|
1017
2295
|
// 修改 initWordGraph 方法
|
|
1018
2296
|
initWordGraph() {
|
|
@@ -1080,19 +2358,15 @@ class Runtime {
|
|
|
1080
2358
|
visitCount++;
|
|
1081
2359
|
activatedOrder.push(id);
|
|
1082
2360
|
|
|
1083
|
-
// 仅在是“词”时记录访问,避免把模因ID写入词访问日志
|
|
1084
2361
|
if (this.wordGraph.points.has(id)) {
|
|
1085
2362
|
this.logWordAccess(id);
|
|
1086
2363
|
}
|
|
1087
2364
|
|
|
1088
|
-
|
|
1089
|
-
|
|
1090
|
-
|
|
1091
|
-
|
|
1092
|
-
|
|
1093
|
-
if (!visited.has(neighborID)) {
|
|
1094
|
-
next.push({ id: neighborID, value: value - decayK * weight });
|
|
1095
|
-
}
|
|
2365
|
+
// 改为通过 graph.getNeighbors 访问(窗口内)
|
|
2366
|
+
const neighbors = this.graph.getNeighbors(id, 50);
|
|
2367
|
+
for (const [weight, neighborID] of neighbors) {
|
|
2368
|
+
if (!visited.has(neighborID)) {
|
|
2369
|
+
next.push({ id: neighborID, value: value - decayK * weight });
|
|
1096
2370
|
}
|
|
1097
2371
|
}
|
|
1098
2372
|
}
|
|
@@ -1147,7 +2421,11 @@ class Runtime {
|
|
|
1147
2421
|
processInput(wordsArr, { addNewWords = true } = {}) {
|
|
1148
2422
|
wordsArr = this.filterStopWords(wordsArr);
|
|
1149
2423
|
if (wordsArr.length === 0) { console.log('[FILTER] 输入全为停用词,已全部过滤'); return; }
|
|
1150
|
-
|
|
2424
|
+
// console.log('Processing input:', wordsArr);
|
|
2425
|
+
// 异步触发在线检索(不阻塞)
|
|
2426
|
+
if (triggerResearch && this.researcher) {
|
|
2427
|
+
try { this.researcher.scheduleFromWords(wordsArr); } catch (_) { }
|
|
2428
|
+
}
|
|
1151
2429
|
// 批量处理新词添加
|
|
1152
2430
|
if (addNewWords) {
|
|
1153
2431
|
// 一次性检查哪些词不在词表中
|
|
@@ -1233,7 +2511,7 @@ class Runtime {
|
|
|
1233
2511
|
const overlap = wordsArr.filter(w => memeWords.includes(w)).length;
|
|
1234
2512
|
if (overlap >= this.MIN_OVERLAP && memeWords.length + wordsArr.length <= this.MAX_MEME_WORDS) {
|
|
1235
2513
|
this.kvm.set(minMemeID, Array.from(new Set([...memeWords, ...wordsArr])));
|
|
1236
|
-
|
|
2514
|
+
/// console.log(`Merged to existing meme: ${minMemeID}`);
|
|
1237
2515
|
} else {
|
|
1238
2516
|
// 创建新模因,使用有向连接
|
|
1239
2517
|
const newID = 'meme_' + Date.now();
|
|
@@ -1243,9 +2521,9 @@ class Runtime {
|
|
|
1243
2521
|
// 单向连接到最近的模因 (方向:2表示指向对方)
|
|
1244
2522
|
if (minMemeID) {
|
|
1245
2523
|
this.graph.addDirectionalEdge(newID, minMemeID, minDistance, 2);
|
|
1246
|
-
|
|
2524
|
+
// console.log(`[LINK] 新模因 ${newID} 单向连接到最近模因 ${minMemeID}`);
|
|
1247
2525
|
}
|
|
1248
|
-
|
|
2526
|
+
// console.log(`Created new meme: ${newID}`);
|
|
1249
2527
|
}
|
|
1250
2528
|
} else {
|
|
1251
2529
|
// 创建新模因
|
|
@@ -1256,9 +2534,9 @@ class Runtime {
|
|
|
1256
2534
|
// 如果有较近的模因,仍然创建单向连接
|
|
1257
2535
|
if (minMemeID) {
|
|
1258
2536
|
this.graph.addDirectionalEdge(newID, minMemeID, Math.min(minDistance, 5), 2);
|
|
1259
|
-
|
|
2537
|
+
// console.log(`[LINK] 新模因 ${newID} 单向连接到最近模因 ${minMemeID}`);
|
|
1260
2538
|
}
|
|
1261
|
-
|
|
2539
|
+
// console.log(`Created new meme: ${newID}`);
|
|
1262
2540
|
}
|
|
1263
2541
|
}
|
|
1264
2542
|
// 新增批量添加边的辅助方法
|
|
@@ -1542,7 +2820,7 @@ class Runtime {
|
|
|
1542
2820
|
this.kvm.memory.delete(memeB.pointID);
|
|
1543
2821
|
memesToDelete.add(memeB.pointID);
|
|
1544
2822
|
|
|
1545
|
-
|
|
2823
|
+
// console.log(`Merged memes: ${memeA.pointID} <- ${memeB.pointID}`);
|
|
1546
2824
|
// 合并后立即尝试分裂
|
|
1547
2825
|
this.splitMemeIfNeeded(memeA.pointID);
|
|
1548
2826
|
} else {
|
|
@@ -1559,7 +2837,7 @@ class Runtime {
|
|
|
1559
2837
|
// 如果没有双向边,则添加双向边
|
|
1560
2838
|
if (!(existAtoB.exist && existAtoB.type === 0) && !(existBtoA.exist && existBtoA.type === 0)) {
|
|
1561
2839
|
this.graph.addBidirectionalEdge(memeA.pointID, memeB.pointID, avgDist);
|
|
1562
|
-
|
|
2840
|
+
// console.log(`[LINK] 添加双向边: ${memeA.pointID} <-> ${memeB.pointID} (avgDist=${avgDist})`);
|
|
1563
2841
|
}
|
|
1564
2842
|
}
|
|
1565
2843
|
}
|
|
@@ -1590,14 +2868,14 @@ class Runtime {
|
|
|
1590
2868
|
const newID = newIDs[i];
|
|
1591
2869
|
this.graph.addPoint(newID, []);
|
|
1592
2870
|
this.kvm.set(newID, chunk);
|
|
1593
|
-
|
|
2871
|
+
// console.log(`[SPLIT-FORCE] 新建模因: ${newID} 词数: ${chunk.length}`);
|
|
1594
2872
|
}
|
|
1595
2873
|
}
|
|
1596
2874
|
|
|
1597
2875
|
// 删除原模因
|
|
1598
2876
|
this.graph.points.delete(memeID);
|
|
1599
2877
|
this.kvm.memory.delete(memeID);
|
|
1600
|
-
|
|
2878
|
+
// console.log(`[SPLIT-FORCE] 删除原模因: ${memeID}`);
|
|
1601
2879
|
return;
|
|
1602
2880
|
}
|
|
1603
2881
|
|
|
@@ -1645,12 +2923,12 @@ class Runtime {
|
|
|
1645
2923
|
const newID = 'meme_' + Date.now() + '_' + Math.floor(Math.random() * 10000);
|
|
1646
2924
|
this.graph.addPoint(newID, []);
|
|
1647
2925
|
this.kvm.set(newID, comp);
|
|
1648
|
-
|
|
2926
|
+
// console.log(`[SPLIT] 新建模因: ${newID} 词数: ${comp.length}`);
|
|
1649
2927
|
}
|
|
1650
2928
|
// 删除原节点
|
|
1651
2929
|
this.graph.points.delete(memeID);
|
|
1652
2930
|
this.kvm.memory.delete(memeID);
|
|
1653
|
-
|
|
2931
|
+
// console.log(`[SPLIT] 删除原模因: ${memeID}`);
|
|
1654
2932
|
}
|
|
1655
2933
|
}
|
|
1656
2934
|
}
|
|
@@ -1670,6 +2948,7 @@ class AssociationLayer {
|
|
|
1670
2948
|
constructor(runtime) {
|
|
1671
2949
|
this.runtime = runtime;
|
|
1672
2950
|
this.patterns = []; // 存储发现的模因边关系模式
|
|
2951
|
+
this._prevClone = null; // 差量克隆基线
|
|
1673
2952
|
}
|
|
1674
2953
|
|
|
1675
2954
|
// 识别模因层的边关系模式
|
|
@@ -1726,83 +3005,33 @@ class AssociationLayer {
|
|
|
1726
3005
|
// ...前面的代码...
|
|
1727
3006
|
|
|
1728
3007
|
// 创建系统副本
|
|
3008
|
+
// 创建系统副本(差量克隆)
|
|
1729
3009
|
async cloneSystem() {
|
|
1730
|
-
console.log('[CLONE]
|
|
1731
|
-
const clone = new Runtime();
|
|
1732
|
-
clone.registerClone();
|
|
1733
|
-
|
|
3010
|
+
console.log('[CLONE] 差量克隆系统(递归分区哈希)');
|
|
1734
3011
|
try {
|
|
1735
|
-
|
|
1736
|
-
clone.
|
|
1737
|
-
|
|
1738
|
-
|
|
1739
|
-
|
|
1740
|
-
|
|
1741
|
-
|
|
1742
|
-
|
|
1743
|
-
|
|
3012
|
+
const clone = await deltaCloneRuntime(this._prevClone, this.runtime);
|
|
3013
|
+
clone.registerClone();
|
|
3014
|
+
this._prevClone = clone;
|
|
3015
|
+
return clone;
|
|
3016
|
+
} catch (err) {
|
|
3017
|
+
console.warn('[CLONE] 差量克隆失败,回退全量:', err.message);
|
|
3018
|
+
const fallback = new Runtime();
|
|
3019
|
+
fallback.registerClone();
|
|
3020
|
+
fallback.spider = this.runtime.spider;
|
|
3021
|
+
fallback.vocabManager.vocab = [...this.runtime.vocabManager.vocab];
|
|
3022
|
+
fallback.vocabManager.updateMappings();
|
|
1744
3023
|
for (const [key, value] of this.runtime.wordGraph.points.entries()) {
|
|
1745
|
-
|
|
1746
|
-
value.connect.map(conn => [...conn]) : []);
|
|
3024
|
+
fallback.wordGraph.addPoint(key, Array.isArray(value.connect) ? value.connect.map(conn => [...conn]) : []);
|
|
1747
3025
|
}
|
|
1748
|
-
|
|
1749
|
-
// 4. 复制模因网络
|
|
1750
|
-
console.log('[CLONE] 开始复制模因网络...');
|
|
1751
3026
|
for (const [key, value] of this.runtime.graph.points.entries()) {
|
|
1752
|
-
|
|
1753
|
-
value.connect.map(conn => [...conn]) : []);
|
|
3027
|
+
fallback.graph.addPoint(key, Array.isArray(value.connect) ? value.connect.map(conn => [...conn]) : []);
|
|
1754
3028
|
}
|
|
1755
|
-
|
|
1756
|
-
// 5. 复制KVM - 确保一致性和类型
|
|
1757
|
-
console.log('[CLONE] 开始复制KVM...');
|
|
1758
|
-
let nonEmptyCount = 0;
|
|
1759
|
-
let totalWordCount = 0;
|
|
1760
|
-
|
|
1761
3029
|
for (const [key, value] of this.runtime.kvm.memory.entries()) {
|
|
1762
|
-
|
|
1763
|
-
if (Array.isArray(value)) {
|
|
1764
|
-
// 确保数组中每个元素都是字符串且归一化
|
|
1765
|
-
const normalizedWords = value.map(word =>
|
|
1766
|
-
typeof word === 'string' ? word.toLowerCase().trim() : String(word)
|
|
1767
|
-
);
|
|
1768
|
-
|
|
1769
|
-
// 应用词形归一化
|
|
1770
|
-
const lemmatizedWords = clone.spider.lemmatizeWords(normalizedWords);
|
|
1771
|
-
clone.kvm.set(key, lemmatizedWords);
|
|
1772
|
-
|
|
1773
|
-
if (lemmatizedWords.length > 0) {
|
|
1774
|
-
nonEmptyCount++;
|
|
1775
|
-
totalWordCount += lemmatizedWords.length;
|
|
1776
|
-
}
|
|
1777
|
-
} else if (value != null) {
|
|
1778
|
-
// 非数组值转换为单元素数组
|
|
1779
|
-
const singleWord = String(value).toLowerCase().trim();
|
|
1780
|
-
const lemmatizedWord = clone.spider.lemmatize(singleWord);
|
|
1781
|
-
clone.kvm.set(key, [lemmatizedWord]);
|
|
1782
|
-
|
|
1783
|
-
nonEmptyCount++;
|
|
1784
|
-
totalWordCount++;
|
|
1785
|
-
} else {
|
|
1786
|
-
// null或undefined情况,设为空数组
|
|
1787
|
-
clone.kvm.set(key, []);
|
|
1788
|
-
}
|
|
3030
|
+
fallback.kvm.set(key, Array.isArray(value) ? [...value] : (value == null ? [] : [String(value)]));
|
|
1789
3031
|
}
|
|
1790
|
-
|
|
1791
|
-
console.log(`[CLONE] KVM复制完成: ${nonEmptyCount}个非空模因,${totalWordCount}个词语`);
|
|
1792
|
-
|
|
1793
|
-
// 6. 复制其他配置和参数
|
|
1794
|
-
console.log('[CLONE] 开始复制词表和其他属性...');
|
|
1795
|
-
clone.MAX_MEME_WORDS = this.runtime.MAX_MEME_WORDS;
|
|
1796
|
-
clone.MIN_OVERLAP = this.runtime.MIN_OVERLAP;
|
|
1797
|
-
clone.config = { ...this.runtime.config };
|
|
1798
|
-
|
|
1799
|
-
console.log('[CLONE] 系统副本创建完成');
|
|
1800
|
-
return clone;
|
|
1801
|
-
} catch (error) {
|
|
1802
|
-
console.error('[CLONE ERROR]', error);
|
|
1803
|
-
return clone;
|
|
3032
|
+
return fallback;
|
|
1804
3033
|
}
|
|
1805
|
-
}
|
|
3034
|
+
}
|
|
1806
3035
|
|
|
1807
3036
|
applyPatternsToClone(systemClone) {
|
|
1808
3037
|
console.log('[CLONE] 应用关系模式到副本');
|
|
@@ -1859,7 +3088,7 @@ class AssociationLayer {
|
|
|
1859
3088
|
meme.connect[connIdx][0] = newWeight;
|
|
1860
3089
|
meme.connect[connIdx][2] = direction;
|
|
1861
3090
|
|
|
1862
|
-
|
|
3091
|
+
// console.log(`[CLONE] 修改边权重: ${meme.pointID}->${meme.connect[connIdx][1]}, ${oldWeight}->${newWeight.toFixed(2)}, 保留方向: ${direction}`);
|
|
1863
3092
|
modified++;
|
|
1864
3093
|
}
|
|
1865
3094
|
}
|
|
@@ -2571,10 +3800,10 @@ class controller {
|
|
|
2571
3800
|
}
|
|
2572
3801
|
// 处理用户输入
|
|
2573
3802
|
async handleInput(text) {
|
|
3803
|
+
const sid = this.runtime.session.ensureActive();
|
|
3804
|
+
this.runtime.session.incMessage(sid);
|
|
2574
3805
|
const words = text.toLowerCase().split(' ').filter(w => w.length > 0);
|
|
2575
|
-
this.runtime.processInput(words, { addNewWords: false });
|
|
2576
|
-
// 用模因网络参与推理
|
|
2577
|
-
//console.log('[DEBUG] 当前所有模因节点:', this.runtime.kvm.memory);
|
|
3806
|
+
this.runtime.processInput(words, { addNewWords: false, triggerResearch: true });
|
|
2578
3807
|
return await this.runtime.generateResponseWithMemes(words);
|
|
2579
3808
|
}
|
|
2580
3809
|
// 启动自主学习
|
|
@@ -2648,15 +3877,23 @@ let saveQueued = false;
|
|
|
2648
3877
|
|
|
2649
3878
|
// 保存所有点、图、词表等到硬盘
|
|
2650
3879
|
function saveAll(runtime) {
|
|
3880
|
+
// 规范化 wordAccessLog 为 [word, [[sessionId, count], ...]]
|
|
3881
|
+
const serializedWordAccess = Array.from(runtime.wordAccessLog.entries()).map(([w, per]) => {
|
|
3882
|
+
if (per instanceof Map) return [w, Array.from(per.entries())];
|
|
3883
|
+
if (Array.isArray(per)) return [w, [['legacy', per.length]]]; // 兼容旧格式
|
|
3884
|
+
return [w, []];
|
|
3885
|
+
});
|
|
3886
|
+
|
|
2651
3887
|
// 只更新内存缓存
|
|
2652
3888
|
latestRuntimeData = {
|
|
2653
3889
|
memes: runtime.graph.getAllPoints(),
|
|
2654
3890
|
wordGraph: Array.from(runtime.wordGraph.points.values()),
|
|
2655
3891
|
kvm: Array.from(runtime.kvm.memory.entries()),
|
|
2656
3892
|
vocab: runtime.vocabManager.vocab,
|
|
2657
|
-
wordAccessLog:
|
|
3893
|
+
wordAccessLog: serializedWordAccess,
|
|
3894
|
+
// 新增:会话信息持久化
|
|
3895
|
+
sessions: runtime.session.export()
|
|
2658
3896
|
};
|
|
2659
|
-
// 标记有保存请求
|
|
2660
3897
|
saveQueued = true;
|
|
2661
3898
|
}
|
|
2662
3899
|
// 定时真正写入硬盘,只保存最新的
|
|
@@ -2668,6 +3905,28 @@ setInterval(() => {
|
|
|
2668
3905
|
saveQueued = false;
|
|
2669
3906
|
}
|
|
2670
3907
|
}, 10000); // 每10秒最多写盘一次
|
|
3908
|
+
setInterval(async () => {
|
|
3909
|
+
try {
|
|
3910
|
+
if (!global.__crawler || !global.ctrlA) return;
|
|
3911
|
+
const mix = global.ctrlA.runtime.config.spiderMix || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
3912
|
+
// 以 1 - onlineWeight 的概率跳过本轮(控制强度)
|
|
3913
|
+
if (Math.random() > (mix.onlineWeight || 0.5)) return;
|
|
3914
|
+
|
|
3915
|
+
const docs = global.__crawler.loadRecentDocs(12);
|
|
3916
|
+
if (!docs.length) return;
|
|
3917
|
+
let fed = 0;
|
|
3918
|
+
for (const d of docs) {
|
|
3919
|
+
fed += await global.ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen: 6 });
|
|
3920
|
+
}
|
|
3921
|
+
if (fed > 0) {
|
|
3922
|
+
console.log(`[INGEST] 在线文档本轮投喂 ${fed} 段 (mix=${mix.onlineWeight.toFixed(2)})`);
|
|
3923
|
+
global.ctrlA.runtime.updateAttentionLinks();
|
|
3924
|
+
}
|
|
3925
|
+
} catch (e) {
|
|
3926
|
+
console.warn('[INGEST] 失败:', e.message);
|
|
3927
|
+
}
|
|
3928
|
+
}, 20_000);
|
|
3929
|
+
// 从硬盘恢复
|
|
2671
3930
|
// 从硬盘恢复
|
|
2672
3931
|
function loadAll(runtime) {
|
|
2673
3932
|
if (!fs.existsSync(SAVE_PATH)) return;
|
|
@@ -2678,19 +3937,34 @@ function loadAll(runtime) {
|
|
|
2678
3937
|
runtime.wordGraph.addPoint(point.pointID, point.connect);
|
|
2679
3938
|
}
|
|
2680
3939
|
}
|
|
2681
|
-
// 3. 修改 loadAll 中的数据恢复
|
|
2682
3940
|
if (data.kvm) {
|
|
2683
3941
|
for (const [k, v] of data.kvm) {
|
|
2684
3942
|
runtime.kvm.set(k, Array.isArray(v) ? v : [String(v)]);
|
|
2685
|
-
// 确保一定是数组
|
|
2686
3943
|
}
|
|
2687
3944
|
}
|
|
2688
3945
|
if (data.vocab) {
|
|
2689
3946
|
runtime.vocabManager.vocab = data.vocab;
|
|
2690
3947
|
runtime.vocabManager.updateMappings();
|
|
2691
3948
|
}
|
|
3949
|
+
// 恢复会话
|
|
3950
|
+
if (data.sessions) {
|
|
3951
|
+
runtime.session.import(data.sessions);
|
|
3952
|
+
}
|
|
3953
|
+
// 恢复词访问日志(新格式 Map<word, Map<sessionId, count>>)
|
|
2692
3954
|
if (data.wordAccessLog && runtime.wordAccessLog) {
|
|
2693
|
-
|
|
3955
|
+
const restored = new Map();
|
|
3956
|
+
for (const [word, per] of data.wordAccessLog) {
|
|
3957
|
+
if (Array.isArray(per) && per.length > 0 && Array.isArray(per[0])) {
|
|
3958
|
+
// 新格式:[[sid, count], ...]
|
|
3959
|
+
restored.set(word, new Map(per));
|
|
3960
|
+
} else if (Array.isArray(per)) {
|
|
3961
|
+
// 旧格式的 timestamps 数组 -> 合并为 legacy 会话
|
|
3962
|
+
restored.set(word, new Map([['legacy', per.length]]));
|
|
3963
|
+
} else {
|
|
3964
|
+
restored.set(word, new Map());
|
|
3965
|
+
}
|
|
3966
|
+
}
|
|
3967
|
+
runtime.wordAccessLog = restored;
|
|
2694
3968
|
}
|
|
2695
3969
|
console.log(`[LOAD] 系统状态已从 ${SAVE_PATH} 恢复`);
|
|
2696
3970
|
}
|
|
@@ -2698,138 +3972,16 @@ function loadAll(runtime) {
|
|
|
2698
3972
|
|
|
2699
3973
|
|
|
2700
3974
|
function scheduleCrossLearning() {
|
|
2701
|
-
const
|
|
2702
|
-
|
|
2703
|
-
|
|
2704
|
-
|
|
2705
|
-
|
|
2706
|
-
|
|
2707
|
-
|
|
2708
|
-
global.ctrlA.startSelfLearning(3).then(() => {
|
|
2709
|
-
setTimeout(async () => {
|
|
2710
|
-
console.log('[CROSS] A将成果传递给B');
|
|
2711
|
-
const associationLayer = new AssociationLayer(global.ctrlA.runtime);
|
|
2712
|
-
|
|
2713
|
-
// 归一化一致性测试
|
|
2714
|
-
const testWord = "testing";
|
|
2715
|
-
const normalizedWord = global.ctrlA.runtime.spider.lemmatize(testWord);
|
|
2716
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2717
|
-
//const associationLayer = new AssociationLayer(global.ctrlA.runtime);
|
|
2718
|
-
// 这里要加上
|
|
2719
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2720
|
-
// 克隆
|
|
2721
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2722
|
-
systemClone.spider = global.ctrlA.runtime.spider;
|
|
2723
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2724
|
-
|
|
2725
|
-
verifySystemConsistency(global.ctrlA.runtime, systemClone);
|
|
2726
|
-
|
|
2727
|
-
// 预热克隆系统
|
|
2728
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2729
|
-
const sampleWords = Array.from(
|
|
2730
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2731
|
-
).filter(w => w.length > 1);
|
|
2732
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2733
|
-
|
|
2734
|
-
await systemClone.startSelfLearning(3);
|
|
2735
|
-
global.ctrlB.updateRuntime(systemClone);
|
|
2736
|
-
|
|
2737
|
-
verifySystemConsistency(global.ctrlA.runtime, global.ctrlB.runtime);
|
|
2738
|
-
|
|
2739
|
-
console.log('[CROSS] 已将A的系统更新到B');
|
|
2740
|
-
testCloneMatching(global.ctrlB.runtime);
|
|
2741
|
-
}, learnTime);
|
|
2742
|
-
});
|
|
2743
|
-
}, cycle);
|
|
2744
|
-
|
|
2745
|
-
// B -> C
|
|
2746
|
-
registerInterval(() => {
|
|
2747
|
-
if (global.ctrlA.isLearning || global.ctrlB.isLearning || global.ctrlC.isLearning || isShuttingDown) return;
|
|
2748
|
-
global.ctrlB.startSelfLearning(3).then(() => {
|
|
2749
|
-
setTimeout(async () => {
|
|
2750
|
-
console.log('[CROSS] B将成果传递给C');
|
|
2751
|
-
const associationLayer = new AssociationLayer(global.ctrlB.runtime);
|
|
2752
|
-
|
|
2753
|
-
const testWord = "testing";
|
|
2754
|
-
const normalizedWord = global.ctrlB.runtime.spider.lemmatize(testWord);
|
|
2755
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2756
|
-
// 这里要加上
|
|
2757
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2758
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2759
|
-
systemClone.spider = global.ctrlB.runtime.spider;
|
|
2760
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2761
|
-
|
|
2762
|
-
verifySystemConsistency(global.ctrlB.runtime, systemClone);
|
|
2763
|
-
|
|
2764
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2765
|
-
const sampleWords = Array.from(
|
|
2766
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2767
|
-
).filter(w => w.length > 1);
|
|
2768
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2769
|
-
|
|
2770
|
-
await systemClone.startSelfLearning(3);
|
|
2771
|
-
global.ctrlC.updateRuntime(systemClone);
|
|
2772
|
-
|
|
2773
|
-
verifySystemConsistency(global.ctrlB.runtime, global.ctrlC.runtime);
|
|
2774
|
-
|
|
2775
|
-
console.log('[CROSS] 已将B的系统更新到C');
|
|
2776
|
-
testCloneMatching(global.ctrlC.runtime);
|
|
2777
|
-
}, learnTime);
|
|
2778
|
-
});
|
|
2779
|
-
}, cycle);
|
|
2780
|
-
|
|
2781
|
-
// C -> A
|
|
2782
|
-
registerInterval(() => {
|
|
2783
|
-
if (global.ctrlA.isLearning || global.ctrlB.isLearning || global.ctrlC.isLearning || isShuttingDown) return;
|
|
2784
|
-
global.ctrlC.startSelfLearning(3).then(() => {
|
|
2785
|
-
setTimeout(async () => {
|
|
2786
|
-
console.log('[CROSS] C将成果传递给A');
|
|
2787
|
-
const associationLayer = new AssociationLayer(global.ctrlC.runtime);
|
|
2788
|
-
|
|
2789
|
-
const testWord = "testing";
|
|
2790
|
-
const normalizedWord = global.ctrlC.runtime.spider.lemmatize(testWord);
|
|
2791
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2792
|
-
// 这里要加上
|
|
2793
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2794
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2795
|
-
systemClone.spider = global.ctrlC.runtime.spider;
|
|
2796
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2797
|
-
|
|
2798
|
-
verifySystemConsistency(global.ctrlC.runtime, systemClone);
|
|
2799
|
-
|
|
2800
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2801
|
-
const sampleWords = Array.from(
|
|
2802
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2803
|
-
).filter(w => w.length > 1);
|
|
2804
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2805
|
-
|
|
2806
|
-
await systemClone.startSelfLearning(3);
|
|
2807
|
-
global.ctrlA.updateRuntime(systemClone);
|
|
2808
|
-
|
|
2809
|
-
verifySystemConsistency(global.ctrlC.runtime, global.ctrlA.runtime);
|
|
2810
|
-
|
|
2811
|
-
console.log('[CROSS] 已将C的系统更新到A');
|
|
2812
|
-
testCloneMatching(global.ctrlA.runtime);
|
|
2813
|
-
}, learnTime);
|
|
2814
|
-
});
|
|
2815
|
-
console.log('Publishing runtime state to Redis...');
|
|
2816
|
-
if (!RuntimeMessage) return; // protobuf未加载完成
|
|
2817
|
-
if (!redisClient || !redisClient.isOpen) {
|
|
2818
|
-
console.warn('[REDIS] 客户端未连接,跳过发布');
|
|
2819
|
-
return;
|
|
2820
|
-
}
|
|
2821
|
-
if (!RuntimeMessage) return;
|
|
2822
|
-
if (global.ctrlA.runtime.isLearning || global.ctrlA.runtime.isMainLoopRunning) return;
|
|
2823
|
-
const plainObj = runtimeToPlain(global.ctrlA.runtime);
|
|
2824
|
-
const errMsg = RuntimeMessage.verify(plainObj);
|
|
2825
|
-
if (errMsg) throw Error(errMsg);
|
|
2826
|
-
const message = RuntimeMessage.create(plainObj);
|
|
2827
|
-
const buffer = RuntimeMessage.encode(message).finish();
|
|
2828
|
-
redisClient.publish(`AI-model-${__dirname}`, buffer);
|
|
2829
|
-
console.log('已发布运行时状态到Redis');
|
|
2830
|
-
}, cycle);
|
|
3975
|
+
const rot = new RotationManager(global.ctrlA, global.ctrlB, global.ctrlC, {
|
|
3976
|
+
cycleMs: 15 * 60 * 1000, // 可根据需要调整
|
|
3977
|
+
cooldownMs: 60 * 1000,
|
|
3978
|
+
learnIters: 3,
|
|
3979
|
+
minImprove: 0.005
|
|
3980
|
+
});
|
|
3981
|
+
rot.start();
|
|
2831
3982
|
}
|
|
2832
3983
|
|
|
3984
|
+
|
|
2833
3985
|
// 新增:测试克隆系统的词汇匹配能力
|
|
2834
3986
|
function testCloneMatching(runtime) {
|
|
2835
3987
|
// 从词表中随机选取10个词
|
|
@@ -2968,7 +4120,7 @@ function optimizeMemory() {
|
|
|
2968
4120
|
}
|
|
2969
4121
|
}
|
|
2970
4122
|
async function main() {
|
|
2971
|
-
|
|
4123
|
+
console.log('Starting AI system...');
|
|
2972
4124
|
redisClient = redis.createClient();
|
|
2973
4125
|
// 创建三个全局控制器副本
|
|
2974
4126
|
const ctrlA = new controller();
|
|
@@ -2983,11 +4135,38 @@ async function main() {
|
|
|
2983
4135
|
loadAll(ctrlA.runtime);
|
|
2984
4136
|
loadAll(ctrlB.runtime);
|
|
2985
4137
|
loadAll(ctrlC.runtime);
|
|
2986
|
-
|
|
4138
|
+
const crawler = new CrawlerManager({
|
|
4139
|
+
concurrency: 5,
|
|
4140
|
+
perHostDelayMs: 2000,
|
|
4141
|
+
requestTimeoutMs: 12000,
|
|
4142
|
+
allowLang: ['en', 'zh'],
|
|
4143
|
+
seedsFile: path.join(__dirname, 'robots', 'seeds.txt'),
|
|
4144
|
+
proxiesFile: path.join(__dirname, 'crawler', 'proxies.txt')
|
|
4145
|
+
});
|
|
4146
|
+
global.__crawler = crawler;
|
|
4147
|
+
crawler.start();
|
|
4148
|
+
setInterval(async () => {
|
|
4149
|
+
try {
|
|
4150
|
+
const docs = crawler.loadRecentDocs(12);
|
|
4151
|
+
if (!docs.length) return;
|
|
4152
|
+
let fed = 0;
|
|
4153
|
+
for (const d of docs) {
|
|
4154
|
+
fed += await ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen: 6 });
|
|
4155
|
+
}
|
|
4156
|
+
if (fed > 0) {
|
|
4157
|
+
console.log(`[INGEST] 在线文档本轮投喂 ${fed} 段`);
|
|
4158
|
+
// 适当刷新注意力连接
|
|
4159
|
+
ctrlA.runtime.updateAttentionLinks();
|
|
4160
|
+
}
|
|
4161
|
+
} catch (e) {
|
|
4162
|
+
console.warn('[INGEST] 失败:', e.message);
|
|
4163
|
+
}
|
|
4164
|
+
}, 20_000); // 每20秒消费一批
|
|
2987
4165
|
// 用A副本初始化语料和模因
|
|
2988
4166
|
console.time('articleProcessing');
|
|
2989
4167
|
const articles = ctrlA.runtime.buildVocabFromSpider();
|
|
2990
4168
|
console.log(`Spider: 加载文章数: ${articles.length}`);
|
|
4169
|
+
// 周期性消费在线文档 -> 投喂到 ctrlA.runtime
|
|
2991
4170
|
|
|
2992
4171
|
// 修复:在首次使用前定义 lemmaCsvPath
|
|
2993
4172
|
const BATCH_SIZE = 20;
|
|
@@ -3040,13 +4219,171 @@ async function main() {
|
|
|
3040
4219
|
//每12分钟尝试启动memebarrier
|
|
3041
4220
|
ctrlA.runtime.memeBarrier.start();
|
|
3042
4221
|
}, 1000 * 60 * 12);
|
|
4222
|
+
|
|
4223
|
+
|
|
4224
|
+
// 可选:启动(默认不开启,避免未配置API Key)
|
|
4225
|
+
if (String(process.env.ADV_AUTOSTART || '').toLowerCase() === 'true') {
|
|
4226
|
+
adv.start();
|
|
4227
|
+
}
|
|
4228
|
+
// 新增:serve 侧参数调优 API(默认不启用自动调参,仅手动设置)
|
|
4229
|
+
app.get('/api/tune/get', (req, res) => {
|
|
4230
|
+
try {
|
|
4231
|
+
const rt = global.ctrlA?.runtime;
|
|
4232
|
+
if (!rt) return res.status(500).json({ ok: false, error: 'runtime missing' });
|
|
4233
|
+
res.json({
|
|
4234
|
+
ok: true,
|
|
4235
|
+
params: {
|
|
4236
|
+
decayK: rt.config?.decayK ?? 1,
|
|
4237
|
+
maxLen: rt.config?.maxLen ?? 16,
|
|
4238
|
+
spiderMix: rt.config?.spiderMix ?? { onlineWeight: 0.5, offlineWeight: 0.5 },
|
|
4239
|
+
crawler: {
|
|
4240
|
+
perQuery: global.__crawler?.__tune_perQuery ?? 8,
|
|
4241
|
+
maxCrawl: global.__crawler?.__tune_maxCrawl ?? 12
|
|
4242
|
+
}
|
|
4243
|
+
}
|
|
4244
|
+
});
|
|
4245
|
+
} catch (e) {
|
|
4246
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4247
|
+
}
|
|
4248
|
+
});
|
|
4249
|
+
|
|
4250
|
+
app.post('/api/tune/set', (req, res) => {
|
|
4251
|
+
try {
|
|
4252
|
+
const rt = global.ctrlA?.runtime;
|
|
4253
|
+
if (!rt) return res.status(500).json({ ok: false, error: 'runtime missing' });
|
|
4254
|
+
const snap = applyServeTunableParams(rt, req.body || {});
|
|
4255
|
+
res.json({ ok: true, snapshot: snap });
|
|
4256
|
+
} catch (e) {
|
|
4257
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4258
|
+
}
|
|
4259
|
+
});
|
|
4260
|
+
// API
|
|
4261
|
+
// 初始化对抗学习调度器时传递 promptMode/targetWeights 可选参数
|
|
4262
|
+
// ...existing code...
|
|
4263
|
+
const adv = new AdversaryScheduler(ctrlA.runtime, {
|
|
4264
|
+
providerSpec: process.env.ADV_MODEL || 'ollama:llama3.1:405b',
|
|
4265
|
+
judgeMode: process.env.ADV_JUDGE || 'llm',
|
|
4266
|
+
intervalMs: Number(process.env.ADV_INTERVAL || 60_000),
|
|
4267
|
+
batchSize: Number(process.env.ADV_BATCH || 3),
|
|
4268
|
+
promptMode: process.env.ADV_PROMPT_MODE || 'mixed',
|
|
4269
|
+
targetWeights: {
|
|
4270
|
+
decayK: Number(process.env.TUNE_W_DECAYK || 1.0),
|
|
4271
|
+
maxLen: Number(process.env.TUNE_W_MAXLEN || 0.7),
|
|
4272
|
+
onlineWeight: Number(process.env.TUNE_W_ONLINE || 0.8),
|
|
4273
|
+
edgeWeight: Number(process.env.TUNE_W_EDGE || 0.4),
|
|
4274
|
+
perQuery: Number(process.env.TUNE_W_PERQ || 0.5),
|
|
4275
|
+
maxCrawl: Number(process.env.TUNE_W_MAXC || 0.5),
|
|
4276
|
+
}
|
|
4277
|
+
});
|
|
4278
|
+
// ...existing code...
|
|
4279
|
+
global.__adversary = adv;
|
|
4280
|
+
|
|
4281
|
+
// 对抗学习控制 API 增补 promptMode / targets
|
|
4282
|
+
app.post('/api/adversary/start', (req, res) => {
|
|
4283
|
+
try {
|
|
4284
|
+
const { provider, judgeMode, intervalMs, batchSize, promptMode, targetWeights } = req.body || {};
|
|
4285
|
+
if (provider) {
|
|
4286
|
+
const neo = new AdversaryScheduler(global.ctrlA.runtime, {
|
|
4287
|
+
providerSpec: provider,
|
|
4288
|
+
judgeMode: judgeMode || adv.opts.judgeMode,
|
|
4289
|
+
intervalMs: Number(intervalMs || adv.opts.intervalMs),
|
|
4290
|
+
batchSize: Number(batchSize || adv.opts.batchSize),
|
|
4291
|
+
promptMode: promptMode || adv.opts.promptMode,
|
|
4292
|
+
targetWeights: targetWeights || adv.opts.targetWeights
|
|
4293
|
+
});
|
|
4294
|
+
global.__adversary?.stop?.();
|
|
4295
|
+
global.__adversary = neo;
|
|
4296
|
+
global.__adversary.start();
|
|
4297
|
+
} else {
|
|
4298
|
+
if (promptMode) adv.setPromptMode(promptMode);
|
|
4299
|
+
if (targetWeights) adv.setTargets(targetWeights);
|
|
4300
|
+
adv.start();
|
|
4301
|
+
}
|
|
4302
|
+
res.json({ ok: true, status: global.__adversary.getStatus() });
|
|
4303
|
+
} catch (e) {
|
|
4304
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4305
|
+
}
|
|
4306
|
+
});
|
|
4307
|
+
|
|
4308
|
+
app.post('/api/adversary/stop', (req, res) => {
|
|
4309
|
+
try { global.__adversary?.stop?.(); res.json({ ok: true }); }
|
|
4310
|
+
catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4311
|
+
});
|
|
4312
|
+
|
|
4313
|
+
app.get('/api/adversary/status', (req, res) => {
|
|
4314
|
+
try { res.json({ ok: true, status: global.__adversary?.getStatus?.() || { running: false } }); }
|
|
4315
|
+
catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4316
|
+
});
|
|
4317
|
+
|
|
4318
|
+
// 触发一次性对抗评估(可指定 prompts 数组)
|
|
4319
|
+
app.post('/api/adversary/once', async (req, res) => {
|
|
4320
|
+
try {
|
|
4321
|
+
const prompts = Array.isArray(req.body?.prompts) ? req.body.prompts.slice(0, 5) : null;
|
|
4322
|
+
const report = await global.__adversary.evaluateOnce(prompts);
|
|
4323
|
+
res.json({ ok: true, report });
|
|
4324
|
+
} catch (e) {
|
|
4325
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4326
|
+
}
|
|
4327
|
+
});
|
|
4328
|
+
app.get('/api/graph/partitions/status', async (req, res) => {
|
|
4329
|
+
try {
|
|
4330
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4331
|
+
if (!g || !(g instanceof PartitionedGraphDB)) {
|
|
4332
|
+
return res.json({ ok: true, mode: 'in-memory', loaded: 0 });
|
|
4333
|
+
}
|
|
4334
|
+
const loaded = Array.from(g.loaded.keys());
|
|
4335
|
+
res.json({
|
|
4336
|
+
ok: true,
|
|
4337
|
+
mode: 'partitioned',
|
|
4338
|
+
partitions: g.partitioner.partitions,
|
|
4339
|
+
loaded,
|
|
4340
|
+
maxLoaded: g.maxLoadedPartitions,
|
|
4341
|
+
windowRadius: g.windowRadius,
|
|
4342
|
+
centerPid: g.centerPid
|
|
4343
|
+
});
|
|
4344
|
+
} catch (e) {
|
|
4345
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4346
|
+
}
|
|
4347
|
+
});
|
|
4348
|
+
|
|
4349
|
+
app.post('/api/graph/partitions/flush', async (req, res) => {
|
|
4350
|
+
try {
|
|
4351
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4352
|
+
if (g && g.flushAll) await g.flushAll();
|
|
4353
|
+
res.json({ ok: true });
|
|
4354
|
+
} catch (e) {
|
|
4355
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4356
|
+
}
|
|
4357
|
+
});
|
|
4358
|
+
|
|
4359
|
+
app.post('/api/graph/prefetch', async (req, res) => {
|
|
4360
|
+
try {
|
|
4361
|
+
const { node } = req.body || {};
|
|
4362
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4363
|
+
if (!node || !(g instanceof PartitionedGraphDB)) return res.status(400).json({ ok: false, error: 'node 必填/或非分区图' });
|
|
4364
|
+
await g.focusOnPoint(String(node));
|
|
4365
|
+
res.json({ ok: true });
|
|
4366
|
+
} catch (e) {
|
|
4367
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4368
|
+
}
|
|
4369
|
+
});
|
|
3043
4370
|
// API路由 - 只做学习,不返回结果
|
|
3044
4371
|
app.post('/api/chat', async (req, res) => {
|
|
3045
4372
|
try {
|
|
3046
|
-
const { message } = req.body;
|
|
3047
|
-
|
|
3048
|
-
ctrlA.runtime.
|
|
3049
|
-
|
|
4373
|
+
const { message, sessionId } = req.body || {};
|
|
4374
|
+
const headerSid = req.headers['x-session-id'];
|
|
4375
|
+
const sid = sessionId || headerSid || global.ctrlA.runtime.session.ensureActive();
|
|
4376
|
+
|
|
4377
|
+
global.ctrlA.runtime.session.useSession(sid);
|
|
4378
|
+
global.ctrlA.runtime.session.incMessage(sid);
|
|
4379
|
+
|
|
4380
|
+
const words = String(message || '').toLowerCase().split(/\s+/).filter(w => w.length > 0);
|
|
4381
|
+
// 开启按需检索
|
|
4382
|
+
global.ctrlA.runtime.processInput(words, { triggerResearch: true });
|
|
4383
|
+
global.ctrlA.runtime.updateAttentionLinks();
|
|
4384
|
+
|
|
4385
|
+
res.set('X-Session-Id', sid);
|
|
4386
|
+
res.status(204).end();
|
|
3050
4387
|
} catch (error) {
|
|
3051
4388
|
res.status(500).json({ error: error.message });
|
|
3052
4389
|
console.error('Error in /api/chat:', error);
|
|
@@ -3107,7 +4444,20 @@ async function main() {
|
|
|
3107
4444
|
res.status(500).json({ success: false, error: error.message });
|
|
3108
4445
|
}
|
|
3109
4446
|
});
|
|
3110
|
-
|
|
4447
|
+
app.post('/api/crawler/start', (req, res) => {
|
|
4448
|
+
try { global.__crawler?.start(); res.json({ ok: true }); } catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4449
|
+
});
|
|
4450
|
+
app.post('/api/crawler/stop', (req, res) => {
|
|
4451
|
+
try { global.__crawler?.stop(); res.json({ ok: true }); } catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4452
|
+
});
|
|
4453
|
+
app.post('/api/crawler/seed', (req, res) => {
|
|
4454
|
+
const { urls } = req.body || {};
|
|
4455
|
+
const n = global.__crawler?.addSeeds(Array.isArray(urls) ? urls : []) || 0;
|
|
4456
|
+
res.json({ ok: true, added: n });
|
|
4457
|
+
});
|
|
4458
|
+
app.get('/api/crawler/status', (req, res) => {
|
|
4459
|
+
res.json({ ok: true, stats: global.__crawler?.stats() || {} });
|
|
4460
|
+
});
|
|
3111
4461
|
|
|
3112
4462
|
// 模型默认参数
|
|
3113
4463
|
const modelDefaults = {
|
|
@@ -3160,6 +4510,49 @@ async function main() {
|
|
|
3160
4510
|
console.log(`\nAI system running on port ${global.config.masterPortOfMain}`);
|
|
3161
4511
|
console.log(`API available at http://localhost:${global.config.masterPortOfMain}/api/`);
|
|
3162
4512
|
});
|
|
4513
|
+
app.post('/api/search-crawl', async (req, res) => {
|
|
4514
|
+
try {
|
|
4515
|
+
const { prompt, vertical, perQuery, maxEnqueue, crawl, maxCrawl, ingest = true, minLen = 6 } = req.body || {};
|
|
4516
|
+
if (!prompt || String(prompt).trim().length < 2) {
|
|
4517
|
+
return res.status(400).json({ ok: false, error: 'prompt 不能为空' });
|
|
4518
|
+
}
|
|
4519
|
+
if (!global.__crawler) {
|
|
4520
|
+
return res.status(500).json({ ok: false, error: 'crawler 未初始化' });
|
|
4521
|
+
}
|
|
4522
|
+
const report = await global.__crawler.directedSearch(String(prompt), {
|
|
4523
|
+
vertical: vertical || 'general',
|
|
4524
|
+
perQuery: perQuery || 8,
|
|
4525
|
+
maxEnqueue: maxEnqueue || 30,
|
|
4526
|
+
crawl: crawl !== false,
|
|
4527
|
+
maxCrawl: maxCrawl || 12
|
|
4528
|
+
});
|
|
4529
|
+
|
|
4530
|
+
let fed = 0;
|
|
4531
|
+
if (ingest !== false) {
|
|
4532
|
+
const docs = global.__crawler.loadRecentDocs(24);
|
|
4533
|
+
for (const d of docs) {
|
|
4534
|
+
fed += await global.ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen });
|
|
4535
|
+
}
|
|
4536
|
+
if (fed > 0) global.ctrlA.runtime.updateAttentionLinks();
|
|
4537
|
+
}
|
|
4538
|
+
|
|
4539
|
+
res.json({ ok: true, report, ingestedSegments: fed });
|
|
4540
|
+
} catch (e) {
|
|
4541
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4542
|
+
}
|
|
4543
|
+
});
|
|
4544
|
+
|
|
4545
|
+
// 仅添加待抓取URL(不立即抓)
|
|
4546
|
+
app.post('/api/crawler/query', async (req, res) => {
|
|
4547
|
+
try {
|
|
4548
|
+
const { prompt, vertical } = req.body || {};
|
|
4549
|
+
if (!prompt) return res.status(400).json({ ok: false, error: 'prompt 不能为空' });
|
|
4550
|
+
const report = await global.__crawler.directedSearch(String(prompt), { vertical: vertical || 'general', crawl: false });
|
|
4551
|
+
res.json({ ok: true, report });
|
|
4552
|
+
} catch (e) {
|
|
4553
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4554
|
+
}
|
|
4555
|
+
});
|
|
3163
4556
|
// 启动Redis客户端
|
|
3164
4557
|
redisClient.on('error', (err) => {
|
|
3165
4558
|
console.error('Redis error:', err);
|
|
@@ -3208,16 +4601,7 @@ function applyModelParams(runtime) {
|
|
|
3208
4601
|
}
|
|
3209
4602
|
// 如果直接运行此文件,启动主函数
|
|
3210
4603
|
if (require.main === module) {
|
|
3211
|
-
|
|
3212
|
-
() => main().catch(console.error), // onContinue
|
|
3213
|
-
() => {
|
|
3214
|
-
// onExit: 保存数据
|
|
3215
|
-
try {
|
|
3216
|
-
if (global.ctrl && global.ctrl.runtime) saveAll(global.ctrl.runtime);
|
|
3217
|
-
} catch (e) { }
|
|
3218
|
-
}
|
|
3219
|
-
);
|
|
3220
|
-
*/
|
|
4604
|
+
|
|
3221
4605
|
main().catch(console.error)
|
|
3222
4606
|
}
|
|
3223
4607
|
|