079project 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawler/agent.cjs +97 -0
- package/crawler/index.cjs +515 -0
- package/crawler/storage.cjs +163 -0
- package/groupmanager.cjs +2 -1
- package/main_Serve.cjs +1136 -210
- package/main_Study.cjs +1584 -349
- package/package.json +2 -1
- package/robots/seeds.txt +2 -0
- package/schedule.cjs +745 -0
- package/todo-list.txt +0 -86
package/main_Study.cjs
CHANGED
|
@@ -29,7 +29,8 @@ const pool = workerpool.pool(path.join(__dirname, 'memeMergeWorker.cjs'), {
|
|
|
29
29
|
});
|
|
30
30
|
const natural = require('natural');
|
|
31
31
|
const STOP_WORDS = natural.stopwords; // 英文停用词
|
|
32
|
-
|
|
32
|
+
const { CrawlerManager } = require('./crawler/index.cjs');
|
|
33
|
+
const { AdversaryScheduler } = require('./schedule.cjs');
|
|
33
34
|
console.log(`[WORKERS] 工作池已创建,最大工作进程数: ${MAX_WORKERS}`);
|
|
34
35
|
protobuf.load(runtimeProtoPath, (err, root) => {
|
|
35
36
|
if (err) throw err;
|
|
@@ -54,6 +55,407 @@ const modelDefaults = {
|
|
|
54
55
|
edgeWeight: 1
|
|
55
56
|
};
|
|
56
57
|
const currentModelParams = { ...modelDefaults };
|
|
58
|
+
// ...existing code...
|
|
59
|
+
|
|
60
|
+
// 统一发布到 Redis(沿用现有 RuntimeMessage)
|
|
61
|
+
async function publishRuntimeToRedis(runtime) {
|
|
62
|
+
try {
|
|
63
|
+
if (!RuntimeMessage) return;
|
|
64
|
+
if (!redisClient || !redisClient.isOpen) {
|
|
65
|
+
console.warn('[REDIS] 客户端未连接,跳过发布');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const plainObj = runtimeToPlain(runtime);
|
|
69
|
+
const errMsg = RuntimeMessage.verify(plainObj);
|
|
70
|
+
if (errMsg) throw Error(errMsg);
|
|
71
|
+
const message = RuntimeMessage.create(plainObj);
|
|
72
|
+
const buffer = RuntimeMessage.encode(message).finish();
|
|
73
|
+
await redisClient.publish(`AI-model-${__dirname}`, buffer);
|
|
74
|
+
console.log('[REDIS] 已发布运行时状态');
|
|
75
|
+
} catch (e) {
|
|
76
|
+
console.warn('[REDIS] 发布失败:', e.message);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// ...existing code...
|
|
80
|
+
const crypto = require('crypto');
|
|
81
|
+
// ...existing code...
|
|
82
|
+
|
|
83
|
+
// 差量复制辅助(递归分区哈希)
|
|
84
|
+
const DELTA = {
|
|
85
|
+
BUCKET_SIZE: 256,
|
|
86
|
+
hash(buf) {
|
|
87
|
+
return crypto.createHash('sha1').update(buf).digest('hex');
|
|
88
|
+
},
|
|
89
|
+
hashPoint(point) {
|
|
90
|
+
if (!point) return '0';
|
|
91
|
+
const arr = (point.connect || []).slice().sort((a, b) => {
|
|
92
|
+
if (a[1] !== b[1]) return String(a[1]).localeCompare(String(b[1]));
|
|
93
|
+
if ((a[2] || 0) !== (b[2] || 0)) return (a[2] || 0) - (b[2] || 0);
|
|
94
|
+
return (a[0] || 0) - (b[0] || 0);
|
|
95
|
+
});
|
|
96
|
+
const buf = Buffer.from(JSON.stringify([point.pointID, arr]));
|
|
97
|
+
return DELTA.hash(buf);
|
|
98
|
+
},
|
|
99
|
+
buildGraphIndex(graph) {
|
|
100
|
+
const ids = Array.from(graph.points.keys()).sort();
|
|
101
|
+
const nodeHash = new Map();
|
|
102
|
+
const buckets = [];
|
|
103
|
+
for (let i = 0; i < ids.length; i += DELTA.BUCKET_SIZE) {
|
|
104
|
+
const chunk = ids.slice(i, i + DELTA.BUCKET_SIZE);
|
|
105
|
+
let acc = '';
|
|
106
|
+
for (const id of chunk) {
|
|
107
|
+
const h = DELTA.hashPoint(graph.points.get(id));
|
|
108
|
+
nodeHash.set(id, h);
|
|
109
|
+
acc += h;
|
|
110
|
+
}
|
|
111
|
+
buckets.push({ start: chunk[0], end: chunk[chunk.length - 1], ids: chunk, hash: DELTA.hash(Buffer.from(acc)) });
|
|
112
|
+
}
|
|
113
|
+
const summary = DELTA.hash(Buffer.from(buckets.map(b => b.hash).join('')));
|
|
114
|
+
return { nodeHash, buckets, summary };
|
|
115
|
+
},
|
|
116
|
+
equalArray(a, b) {
|
|
117
|
+
if (a === b) return true;
|
|
118
|
+
if (!Array.isArray(a) || !Array.isArray(b)) return false;
|
|
119
|
+
if (a.length !== b.length) return false;
|
|
120
|
+
for (let i = 0; i < a.length; i++) if (a[i] !== b[i]) return false;
|
|
121
|
+
return true;
|
|
122
|
+
}
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
// 基于递归分区哈希的差量克隆
|
|
126
|
+
async function deltaCloneRuntime(prevClone, srcRuntime) {
|
|
127
|
+
if (!prevClone) {
|
|
128
|
+
const clone = new Runtime({ ...srcRuntime.config });
|
|
129
|
+
clone.registerClone();
|
|
130
|
+
clone.spider = srcRuntime.spider;
|
|
131
|
+
|
|
132
|
+
clone.vocabManager.vocab = [...srcRuntime.vocabManager.vocab];
|
|
133
|
+
clone.vocabManager.updateMappings();
|
|
134
|
+
|
|
135
|
+
for (const [id, p] of srcRuntime.wordGraph.points.entries()) {
|
|
136
|
+
clone.wordGraph.addPoint(id, Array.isArray(p.connect) ? p.connect.map(e => [...e]) : []);
|
|
137
|
+
}
|
|
138
|
+
for (const [id, p] of srcRuntime.graph.points.entries()) {
|
|
139
|
+
clone.graph.addPoint(id, Array.isArray(p.connect) ? p.connect.map(e => [...e]) : []);
|
|
140
|
+
}
|
|
141
|
+
for (const [k, v] of srcRuntime.kvm.memory.entries()) {
|
|
142
|
+
clone.kvm.set(k, Array.isArray(v) ? [...v] : (v == null ? [] : [String(v)]));
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
clone.__deltaIndexes = {
|
|
146
|
+
graph: DELTA.buildGraphIndex(clone.graph),
|
|
147
|
+
wordGraph: DELTA.buildGraphIndex(clone.wordGraph),
|
|
148
|
+
vocabHash: DELTA.hash(Buffer.from(JSON.stringify(clone.vocabManager.vocab)))
|
|
149
|
+
};
|
|
150
|
+
return clone;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const clone = prevClone;
|
|
154
|
+
clone.spider = srcRuntime.spider;
|
|
155
|
+
|
|
156
|
+
const srcVocabHash = DELTA.hash(Buffer.from(JSON.stringify(srcRuntime.vocabManager.vocab)));
|
|
157
|
+
if (!clone.__deltaIndexes || clone.__deltaIndexes.vocabHash !== srcVocabHash) {
|
|
158
|
+
clone.vocabManager.vocab = [...srcRuntime.vocabManager.vocab];
|
|
159
|
+
clone.vocabManager.updateMappings();
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
const syncGraph = (cloneGraph, srcGraph, name) => {
|
|
163
|
+
const prevIdx = (clone.__deltaIndexes && clone.__deltaIndexes[name]) || { nodeHash: new Map(), buckets: [] };
|
|
164
|
+
const srcIdx = DELTA.buildGraphIndex(srcGraph);
|
|
165
|
+
|
|
166
|
+
for (const id of Array.from(cloneGraph.points.keys())) {
|
|
167
|
+
if (!srcGraph.points.has(id)) cloneGraph.points.delete(id);
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
for (const b of srcIdx.buckets) {
|
|
171
|
+
const prevBucket = prevIdx.buckets.find(x => x.start === b.start && x.end === b.end && x.ids.length === b.ids.length);
|
|
172
|
+
if (prevBucket && prevBucket.hash === b.hash) continue;
|
|
173
|
+
for (const id of b.ids) {
|
|
174
|
+
const srcPoint = srcGraph.points.get(id);
|
|
175
|
+
const srcH = srcIdx.nodeHash.get(id);
|
|
176
|
+
const prevH = prevIdx.nodeHash.get(id);
|
|
177
|
+
if (srcH === prevH && cloneGraph.points.has(id)) continue;
|
|
178
|
+
cloneGraph.points.set(id, {
|
|
179
|
+
pointID: id,
|
|
180
|
+
connect: Array.isArray(srcPoint.connect) ? srcPoint.connect.map(e => [...e]) : []
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
clone.__deltaIndexes = clone.__deltaIndexes || {};
|
|
186
|
+
clone.__deltaIndexes[name] = DELTA.buildGraphIndex(cloneGraph);
|
|
187
|
+
};
|
|
188
|
+
|
|
189
|
+
syncGraph(clone.wordGraph, srcRuntime.wordGraph, 'wordGraph');
|
|
190
|
+
syncGraph(clone.graph, srcRuntime.graph, 'graph');
|
|
191
|
+
|
|
192
|
+
for (const k of Array.from(clone.kvm.memory.keys())) {
|
|
193
|
+
if (!srcRuntime.kvm.memory.has(k)) clone.kvm.memory.delete(k);
|
|
194
|
+
}
|
|
195
|
+
for (const [k, v] of srcRuntime.kvm.memory.entries()) {
|
|
196
|
+
const nv = Array.isArray(v) ? v : (v == null ? [] : [String(v)]);
|
|
197
|
+
const ov = clone.kvm.memory.get(k) || [];
|
|
198
|
+
if (!DELTA.equalArray(ov, nv)) {
|
|
199
|
+
clone.kvm.set(k, [...nv]);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
clone.__deltaIndexes.vocabHash = srcVocabHash;
|
|
204
|
+
return clone;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// 按需检索器:在用户输入(processInput)时触发网络检索与增量学习
|
|
208
|
+
class OnlineResearcher {
|
|
209
|
+
constructor(runtime, options = {}) {
|
|
210
|
+
this.runtime = runtime;
|
|
211
|
+
this.opts = Object.assign({
|
|
212
|
+
cooldownMs: 25 * 1000, // 触发冷却窗口
|
|
213
|
+
maxCrawl: 8, // 单次抓取最大页面
|
|
214
|
+
perQuery: 6,
|
|
215
|
+
maxEnqueue: 20,
|
|
216
|
+
minPromptLen: 3, // 过滤过短提示
|
|
217
|
+
ingestMinLen: 6, // ingest 的最小分词长度
|
|
218
|
+
recentCapacity: 64, // 近提示去重窗口
|
|
219
|
+
}, options);
|
|
220
|
+
this.running = false;
|
|
221
|
+
this.lastRunAt = 0;
|
|
222
|
+
this.pendingPrompt = null;
|
|
223
|
+
this.timer = null;
|
|
224
|
+
this.recent = new Set();
|
|
225
|
+
this.recentQueue = [];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
_normalizePromptFromWords(words) {
|
|
229
|
+
const ws = (Array.isArray(words) ? words : [])
|
|
230
|
+
.map(w => String(w || '').toLowerCase().trim())
|
|
231
|
+
.filter(Boolean);
|
|
232
|
+
// 去停用词后再判断长度
|
|
233
|
+
const filtered = this.runtime.filterStopWords ? this.runtime.filterStopWords(ws) : ws;
|
|
234
|
+
if (filtered.length < this.opts.minPromptLen) return '';
|
|
235
|
+
// 取前若干关键词,避免提示过长
|
|
236
|
+
return Array.from(new Set(filtered)).slice(0, 16).join(' ');
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
_seen(key) {
|
|
240
|
+
if (this.recent.has(key)) return true;
|
|
241
|
+
this.recent.add(key);
|
|
242
|
+
this.recentQueue.push(key);
|
|
243
|
+
if (this.recentQueue.length > this.opts.recentCapacity) {
|
|
244
|
+
const old = this.recentQueue.shift();
|
|
245
|
+
this.recent.delete(old);
|
|
246
|
+
}
|
|
247
|
+
return false;
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
scheduleFromWords(words) {
|
|
251
|
+
if (!global.__crawler) return; // 爬虫未初始化时跳过
|
|
252
|
+
const prompt = this._normalizePromptFromWords(words);
|
|
253
|
+
if (!prompt) return;
|
|
254
|
+
|
|
255
|
+
// 近似去重:同一时间窗内同类提示不重复抓取
|
|
256
|
+
const key = prompt.slice(0, 64);
|
|
257
|
+
if (this._seen(key)) return;
|
|
258
|
+
|
|
259
|
+
const now = Date.now();
|
|
260
|
+
const remain = this.opts.cooldownMs - (now - this.lastRunAt);
|
|
261
|
+
|
|
262
|
+
// 若正在运行或处于冷却,合并为待执行
|
|
263
|
+
if (this.running || remain > 0) {
|
|
264
|
+
this.pendingPrompt = prompt;
|
|
265
|
+
if (!this.timer) {
|
|
266
|
+
this.timer = setTimeout(() => {
|
|
267
|
+
this.timer = null;
|
|
268
|
+
const p = this.pendingPrompt;
|
|
269
|
+
this.pendingPrompt = null;
|
|
270
|
+
if (p) this._doSearchAndIngest(p);
|
|
271
|
+
}, Math.max(100, remain));
|
|
272
|
+
}
|
|
273
|
+
return;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
this._doSearchAndIngest(prompt);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
async _doSearchAndIngest(prompt) {
|
|
280
|
+
this.running = true;
|
|
281
|
+
this.lastRunAt = Date.now();
|
|
282
|
+
try {
|
|
283
|
+
const report = await global.__crawler.directedSearch(prompt, {
|
|
284
|
+
vertical: 'general',
|
|
285
|
+
perQuery: this.opts.perQuery,
|
|
286
|
+
maxEnqueue: this.opts.maxEnqueue,
|
|
287
|
+
crawl: true,
|
|
288
|
+
maxCrawl: this.opts.maxCrawl
|
|
289
|
+
});
|
|
290
|
+
// 抓取完成后立即增量学习(只读最近若干文档)
|
|
291
|
+
const docs = global.__crawler.loadRecentDocs(24);
|
|
292
|
+
let fed = 0;
|
|
293
|
+
for (const d of docs) {
|
|
294
|
+
fed += await this.runtime.ingestTextDocument(d.text, {
|
|
295
|
+
addNewWords: true,
|
|
296
|
+
minLen: this.opts.ingestMinLen
|
|
297
|
+
});
|
|
298
|
+
}
|
|
299
|
+
if (fed > 0) {
|
|
300
|
+
this.runtime.updateAttentionLinks();
|
|
301
|
+
console.log(`[RESEARCH] "${prompt}" 抓取并增量学习完成: fed=${fed}, crawled=${report?.crawled ?? 0}`);
|
|
302
|
+
} else {
|
|
303
|
+
console.log(`[RESEARCH] "${prompt}" 未产生有效增量`);
|
|
304
|
+
}
|
|
305
|
+
} catch (e) {
|
|
306
|
+
console.warn('[RESEARCH] 抓取/增量学习失败:', e.message);
|
|
307
|
+
} finally {
|
|
308
|
+
this.running = false;
|
|
309
|
+
// 合并突发多次提示:优先处理最近一次
|
|
310
|
+
if (this.pendingPrompt) {
|
|
311
|
+
const p = this.pendingPrompt;
|
|
312
|
+
this.pendingPrompt = null;
|
|
313
|
+
this._doSearchAndIngest(p);
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
// ...existing code...
|
|
319
|
+
// 轮换管理器:在克隆上训练,评估通过后原子切换,避免占用在线副本
|
|
320
|
+
class RotationManager {
|
|
321
|
+
constructor(ctrlA, ctrlB, ctrlC, options = {}) {
|
|
322
|
+
this.ctrlA = ctrlA;
|
|
323
|
+
this.ctrlB = ctrlB;
|
|
324
|
+
this.ctrlC = ctrlC;
|
|
325
|
+
this.isRotating = false;
|
|
326
|
+
this.timer = null;
|
|
327
|
+
this.opts = Object.assign({
|
|
328
|
+
cycleMs: 30 * 60 * 1000, // 每轮步进间隔:默认30分钟
|
|
329
|
+
cooldownMs: 60 * 1000, // 单步完成后的冷却时间
|
|
330
|
+
learnIters: 3, // 克隆训练迭代
|
|
331
|
+
minImprove: 0.005 // 最小收益阈值(0.5%)
|
|
332
|
+
}, options);
|
|
333
|
+
this.sequence = [
|
|
334
|
+
{ from: 'A', to: 'B' },
|
|
335
|
+
{ from: 'B', to: 'C' },
|
|
336
|
+
{ from: 'C', to: 'A' }
|
|
337
|
+
];
|
|
338
|
+
this.idx = 0;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
start() {
|
|
342
|
+
if (this.timer) return;
|
|
343
|
+
const runStep = async () => {
|
|
344
|
+
if (this.isRotating || isShuttingDown) return;
|
|
345
|
+
this.isRotating = true;
|
|
346
|
+
try {
|
|
347
|
+
const step = this.sequence[this.idx % this.sequence.length];
|
|
348
|
+
await this._runOne(step.from, step.to);
|
|
349
|
+
this.idx++;
|
|
350
|
+
} catch (e) {
|
|
351
|
+
console.warn('[ROTATE] 步骤失败:', e.message);
|
|
352
|
+
} finally {
|
|
353
|
+
this.isRotating = false;
|
|
354
|
+
}
|
|
355
|
+
};
|
|
356
|
+
// 立即跑一次,再按周期跑
|
|
357
|
+
runStep();
|
|
358
|
+
this.timer = registerInterval(runStep, this.opts.cycleMs);
|
|
359
|
+
console.log(`[ROTATE] 轮换学习已启动:每${Math.round(this.opts.cycleMs / 60000)}分钟步进一次`);
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
stop() {
|
|
363
|
+
if (this.timer) {
|
|
364
|
+
clearInterval(this.timer);
|
|
365
|
+
}
|
|
366
|
+
this.timer = null;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
_getCtrl(name) {
|
|
370
|
+
return name === 'A' ? this.ctrlA : (name === 'B' ? this.ctrlB : this.ctrlC);
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
async _runOne(fromName, toName) {
|
|
374
|
+
const fromCtrl = this._getCtrl(fromName);
|
|
375
|
+
const toCtrl = this._getCtrl(toName);
|
|
376
|
+
if (!fromCtrl || !toCtrl) return;
|
|
377
|
+
|
|
378
|
+
if (fromCtrl.isLearning || toCtrl.isLearning) {
|
|
379
|
+
console.log(`[ROTATE] ${fromName} 或 ${toName} 正在学习,跳过本步`);
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
console.log(`[ROTATE] ${fromName} -> ${toName} 开始克隆与训练(离线)`);
|
|
384
|
+
|
|
385
|
+
// 1) 识别模式 + 克隆
|
|
386
|
+
const association = new AssociationLayer(fromCtrl.runtime);
|
|
387
|
+
association.patterns = association.identifyPatterns();
|
|
388
|
+
const systemClone = await association.cloneSystem();
|
|
389
|
+
systemClone.spider = fromCtrl.runtime.spider;
|
|
390
|
+
association.applyPatternsToClone(systemClone);
|
|
391
|
+
|
|
392
|
+
// 2) 预热克隆(确保索引、边权等就绪)
|
|
393
|
+
const sampleWords = Array.from(new Set(systemClone.vocabManager.vocab.slice(4, 64))).filter(w => w.length > 1).slice(0, 16);
|
|
394
|
+
if (sampleWords.length) {
|
|
395
|
+
try {
|
|
396
|
+
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
397
|
+
} catch (_) { }
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// 3) 在克隆上训练而非在线副本
|
|
401
|
+
try {
|
|
402
|
+
await systemClone.startSelfLearning(this.opts.learnIters);
|
|
403
|
+
} catch (e) {
|
|
404
|
+
console.warn('[ROTATE] 克隆训练失败:', e.message);
|
|
405
|
+
return;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// 4) 评估:对比 from 与 clone
|
|
409
|
+
const { origScore, cloneScore } = await this._evaluateTwo(fromCtrl.runtime, systemClone);
|
|
410
|
+
const improve = origScore > 0 ? (cloneScore - origScore) / origScore : (cloneScore > 0 ? 1 : 0);
|
|
411
|
+
|
|
412
|
+
console.log(`[ROTATE] 评估:orig=${origScore.toFixed(4)} clone=${cloneScore.toFixed(4)} improve=${(improve * 100).toFixed(2)}%`);
|
|
413
|
+
|
|
414
|
+
if (improve >= this.opts.minImprove) {
|
|
415
|
+
// 5) 提交:原子切换 toCtrl.runtime = clone
|
|
416
|
+
toCtrl.updateRuntime(systemClone);
|
|
417
|
+
console.log(`[ROTATE] 已将 ${fromName} 的成果切换到 ${toName}`);
|
|
418
|
+
|
|
419
|
+
// 如切到 A,发布到 Redis
|
|
420
|
+
if (toName === 'A') {
|
|
421
|
+
await publishRuntimeToRedis(systemClone);
|
|
422
|
+
}
|
|
423
|
+
} else {
|
|
424
|
+
console.log('[ROTATE] 改进不足,放弃本次提交');
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
// 6) 冷却
|
|
428
|
+
await new Promise(r => setTimeout(r, this.opts.cooldownMs));
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
async _evaluateTwo(baseRuntime, cloneRuntime) {
|
|
432
|
+
try {
|
|
433
|
+
// 复用 RL 的评估数据来源与方法
|
|
434
|
+
const rl = new ReinforcementLearner(baseRuntime);
|
|
435
|
+
const articles = rl.loadTestArticles();
|
|
436
|
+
if (!articles.length) {
|
|
437
|
+
// 无测试集:退化为用词表覆盖率近似
|
|
438
|
+
const s1 = baseRuntime.vocabManager.getsize();
|
|
439
|
+
const s2 = cloneRuntime.vocabManager.getsize();
|
|
440
|
+
return { origScore: s1, cloneScore: s2 };
|
|
441
|
+
}
|
|
442
|
+
let origCoverages = 0;
|
|
443
|
+
let cloneCoverages = 0;
|
|
444
|
+
for (const article of articles) {
|
|
445
|
+
origCoverages += rl.evaluateSystem(baseRuntime, article);
|
|
446
|
+
cloneCoverages += rl.evaluateSystem(cloneRuntime, article);
|
|
447
|
+
}
|
|
448
|
+
// 使用“总和-方差的平方根”的稳健度量(与 RL.learn 相同思想)
|
|
449
|
+
const avgOrig = origCoverages;
|
|
450
|
+
const avgClone = cloneCoverages;
|
|
451
|
+
return { origScore: avgOrig, cloneScore: avgClone };
|
|
452
|
+
} catch (e) {
|
|
453
|
+
console.warn('[ROTATE] 评估失败,回退到零分:', e.message);
|
|
454
|
+
return { origScore: 0, cloneScore: 0 };
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
}
|
|
458
|
+
// ...existing code...
|
|
57
459
|
async function batchAddPoints(graph, pointsArr, batchSize = 500) {
|
|
58
460
|
if (!graph || !pointsArr || !Array.isArray(pointsArr)) {
|
|
59
461
|
console.error('[BATCH] 无效的参数:', {
|
|
@@ -300,7 +702,6 @@ class SnapshotManager {
|
|
|
300
702
|
}
|
|
301
703
|
|
|
302
704
|
async createSnapshot(name = 'auto') {
|
|
303
|
-
// 防止并发创建
|
|
304
705
|
if (this.isCreatingSnapshot) {
|
|
305
706
|
console.log('[SNAPSHOT] 另一个快照正在创建中,跳过');
|
|
306
707
|
return null;
|
|
@@ -316,36 +717,39 @@ class SnapshotManager {
|
|
|
316
717
|
|
|
317
718
|
console.log(`[SNAPSHOT] 开始创建快照: ${snapshotId}`);
|
|
318
719
|
|
|
319
|
-
//
|
|
720
|
+
// 优先使用分区图的全量导出(避免仅导出窗口)
|
|
721
|
+
let memesAll = [];
|
|
722
|
+
if (this.runtime.graph && typeof this.runtime.graph.exportAllPoints === 'function') {
|
|
723
|
+
try {
|
|
724
|
+
memesAll = await this.runtime.graph.exportAllPoints();
|
|
725
|
+
} catch (e) {
|
|
726
|
+
console.warn('[SNAPSHOT] 分区图导出失败,回退窗口:', e.message);
|
|
727
|
+
memesAll = this.runtime.graph.getAllPoints();
|
|
728
|
+
}
|
|
729
|
+
} else {
|
|
730
|
+
memesAll = this.runtime.graph.getAllPoints();
|
|
731
|
+
}
|
|
732
|
+
|
|
320
733
|
const snapshotData = {
|
|
321
734
|
id: snapshotId,
|
|
322
735
|
timestamp,
|
|
323
736
|
name,
|
|
324
737
|
createDate: new Date().toISOString(),
|
|
325
|
-
memes:
|
|
738
|
+
memes: memesAll,
|
|
326
739
|
wordGraph: Array.from(this.runtime.wordGraph.points.values()),
|
|
327
740
|
kvm: Array.from(this.runtime.kvm.memory.entries()),
|
|
328
741
|
vocab: this.runtime.vocabManager.vocab,
|
|
329
|
-
// 修正:序列化为 [word, [[sessionId,count], ...]]
|
|
330
742
|
wordAccessLog: Array.from(this.runtime.wordAccessLog.entries()).map(([w, per]) =>
|
|
331
743
|
[w, per instanceof Map ? Array.from(per.entries()) : (Array.isArray(per) ? [['legacy', per.length]] : [])]
|
|
332
744
|
),
|
|
333
|
-
// 新增:保存会话状态
|
|
334
745
|
sessions: this.runtime.session.export()
|
|
335
746
|
};
|
|
336
747
|
|
|
337
|
-
// 写入临时文件,然后原子重命名以确保数据完整性
|
|
338
748
|
const tempPath = `${filePath}.temp`;
|
|
339
749
|
await fs.promises.writeFile(tempPath, JSON.stringify(snapshotData), 'utf-8');
|
|
340
750
|
await fs.promises.rename(tempPath, filePath);
|
|
341
751
|
|
|
342
|
-
|
|
343
|
-
const snapshotInfo = {
|
|
344
|
-
id: snapshotId,
|
|
345
|
-
timestamp,
|
|
346
|
-
name,
|
|
347
|
-
path: filePath
|
|
348
|
-
};
|
|
752
|
+
const snapshotInfo = { id: snapshotId, timestamp, name, path: filePath };
|
|
349
753
|
this.snapshotList.unshift(snapshotInfo);
|
|
350
754
|
|
|
351
755
|
console.timeEnd('snapshotCreation');
|
|
@@ -359,11 +763,11 @@ class SnapshotManager {
|
|
|
359
763
|
}
|
|
360
764
|
}
|
|
361
765
|
|
|
766
|
+
|
|
362
767
|
async restoreSnapshot(snapshotId) {
|
|
363
768
|
console.log(`[SNAPSHOT] 开始从快照恢复: ${snapshotId}`);
|
|
364
769
|
console.time('snapshotRestore');
|
|
365
770
|
|
|
366
|
-
// 查找快照
|
|
367
771
|
const snapshot = this.snapshotList.find(s => s.id === snapshotId);
|
|
368
772
|
if (!snapshot) {
|
|
369
773
|
console.error(`[SNAPSHOT] 快照不存在: ${snapshotId}`);
|
|
@@ -371,37 +775,27 @@ class SnapshotManager {
|
|
|
371
775
|
}
|
|
372
776
|
|
|
373
777
|
try {
|
|
374
|
-
// 读取快照文件
|
|
375
|
-
console.log(`[SNAPSHOT] 从文件读取数据: ${snapshot.path}`);
|
|
376
778
|
const dataStr = await fs.promises.readFile(snapshot.path, 'utf-8');
|
|
377
779
|
const data = JSON.parse(dataStr);
|
|
378
780
|
|
|
379
|
-
// 在恢复前创建自动备份
|
|
380
781
|
await this.createSnapshot(`auto_before_restore_${snapshotId}`);
|
|
381
782
|
|
|
382
|
-
//
|
|
383
|
-
console.log('[SNAPSHOT] 清空当前运行时...');
|
|
384
|
-
this.runtime.graph = new GraphDB();
|
|
783
|
+
// 清空当前运行时(词图/KVM 内存)
|
|
385
784
|
this.runtime.wordGraph = new GraphDB();
|
|
386
785
|
this.runtime.kvm = new KVM();
|
|
387
786
|
this.runtime.wordAccessLog = new Map();
|
|
388
787
|
|
|
389
|
-
//
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
this.runtime.graph.addPoint(point.pointID, point.connect);
|
|
397
|
-
}
|
|
398
|
-
// 让事件循环有机会处理其他事件
|
|
399
|
-
await new Promise(resolve => setImmediate(resolve));
|
|
788
|
+
// 恢复模因图:走分区导入(覆盖分区存储)
|
|
789
|
+
if (data.memes && this.runtime.graph && typeof this.runtime.graph.importAllPoints === 'function') {
|
|
790
|
+
await this.runtime.graph.importAllPoints(data.memes);
|
|
791
|
+
} else if (data.memes) {
|
|
792
|
+
// 窗口回退(不推荐)
|
|
793
|
+
for (const point of data.memes) {
|
|
794
|
+
await this.runtime.graph.addPoint(point.pointID, point.connect);
|
|
400
795
|
}
|
|
401
796
|
}
|
|
402
797
|
|
|
403
798
|
// 恢复词图
|
|
404
|
-
console.log('[SNAPSHOT] 恢复词语网络...');
|
|
405
799
|
if (data.wordGraph) {
|
|
406
800
|
const BATCH_SIZE = 1000;
|
|
407
801
|
for (let i = 0; i < data.wordGraph.length; i += BATCH_SIZE) {
|
|
@@ -414,27 +808,22 @@ class SnapshotManager {
|
|
|
414
808
|
}
|
|
415
809
|
|
|
416
810
|
// 恢复KVM
|
|
417
|
-
console.log('[SNAPSHOT] 恢复键值存储...');
|
|
418
811
|
if (data.kvm) {
|
|
419
812
|
const BATCH_SIZE = 1000;
|
|
420
813
|
for (let i = 0; i < data.kvm.length; i += BATCH_SIZE) {
|
|
421
814
|
const batch = data.kvm.slice(i, i + BATCH_SIZE);
|
|
422
|
-
for (const [k, v] of batch)
|
|
423
|
-
this.runtime.kvm.set(k, v);
|
|
424
|
-
}
|
|
815
|
+
for (const [k, v] of batch) this.runtime.kvm.set(k, v);
|
|
425
816
|
await new Promise(resolve => setImmediate(resolve));
|
|
426
817
|
}
|
|
427
818
|
}
|
|
428
819
|
|
|
429
820
|
// 恢复词表
|
|
430
|
-
console.log('[SNAPSHOT] 恢复词表...');
|
|
431
821
|
if (data.vocab) {
|
|
432
822
|
this.runtime.vocabManager.vocab = data.vocab;
|
|
433
823
|
this.runtime.vocabManager.updateMappings();
|
|
434
824
|
}
|
|
435
825
|
|
|
436
826
|
// 恢复词访问日志
|
|
437
|
-
console.log('[SNAPSHOT] 恢复词访问日志...');
|
|
438
827
|
if (data.wordAccessLog) {
|
|
439
828
|
const restored = new Map();
|
|
440
829
|
for (const [word, per] of data.wordAccessLog) {
|
|
@@ -448,13 +837,12 @@ class SnapshotManager {
|
|
|
448
837
|
}
|
|
449
838
|
this.runtime.wordAccessLog = restored;
|
|
450
839
|
}
|
|
451
|
-
// 恢复会话信息
|
|
452
840
|
if (data.sessions) {
|
|
453
841
|
this.runtime.session.import(data.sessions);
|
|
454
842
|
} else {
|
|
455
|
-
// 无会话信息时,创建一个遗留会话
|
|
456
843
|
this.runtime.session.startNewSession({ reason: 'snapshot-legacy' });
|
|
457
844
|
}
|
|
845
|
+
|
|
458
846
|
console.timeEnd('snapshotRestore');
|
|
459
847
|
console.log(`[SNAPSHOT] 成功从快照恢复: ${snapshotId}`);
|
|
460
848
|
return true;
|
|
@@ -739,80 +1127,790 @@ class GraphDB {
|
|
|
739
1127
|
}
|
|
740
1128
|
if (current === null) break;
|
|
741
1129
|
|
|
742
|
-
openSet.delete(current);
|
|
743
|
-
closedSet.add(current);
|
|
1130
|
+
openSet.delete(current);
|
|
1131
|
+
closedSet.add(current);
|
|
1132
|
+
|
|
1133
|
+
if (current === toID) {
|
|
1134
|
+
// reconstruct the path
|
|
1135
|
+
const path = [];
|
|
1136
|
+
let temp = current;
|
|
1137
|
+
while (cameFrom.has(temp)) {
|
|
1138
|
+
path.push(temp);
|
|
1139
|
+
temp = cameFrom.get(temp);
|
|
1140
|
+
}
|
|
1141
|
+
path.push(fromID);
|
|
1142
|
+
return path.reverse();
|
|
1143
|
+
}
|
|
1144
|
+
|
|
1145
|
+
for (const [weight, neighborID, direction] of this.points.get(current).connect) {
|
|
1146
|
+
// 只考虑允许的方向
|
|
1147
|
+
if (direction === 1 && neighborID !== current) continue; // 只指向自己
|
|
1148
|
+
if (direction === 2 && current !== fromID) continue; // 只指向对方
|
|
1149
|
+
if (closedSet.has(neighborID)) continue;
|
|
1150
|
+
|
|
1151
|
+
const tentativeGScore = (gScore.get(current) || Infinity) + weight;
|
|
1152
|
+
if (!openSet.has(neighborID)) {
|
|
1153
|
+
openSet.add(neighborID);
|
|
1154
|
+
} else if (tentativeGScore >= (gScore.get(neighborID) || Infinity)) {
|
|
1155
|
+
continue;
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
cameFrom.set(neighborID, current);
|
|
1159
|
+
gScore.set(neighborID, tentativeGScore);
|
|
1160
|
+
fScore.set(neighborID, tentativeGScore + this.heuristic(neighborID, toID));
|
|
1161
|
+
}
|
|
1162
|
+
}
|
|
1163
|
+
return null;
|
|
1164
|
+
}
|
|
1165
|
+
|
|
1166
|
+
// 获取所有点
|
|
1167
|
+
getAllPoints() {
|
|
1168
|
+
return Array.from(this.points.values());
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
// 启发式函数:简单的常数启发式
|
|
1172
|
+
heuristic(pointID, toID) {
|
|
1173
|
+
return 1; // 简化的启发式函数
|
|
1174
|
+
}
|
|
1175
|
+
existEdge(pointID, neighborID) {
|
|
1176
|
+
const point = this.points.get(pointID);
|
|
1177
|
+
if (!point) {
|
|
1178
|
+
return { exist: false, weight: undefined, type: undefined };
|
|
1179
|
+
}
|
|
1180
|
+
const connectArr = point.connect || [];
|
|
1181
|
+
const found = connectArr.find(([_, id]) => id === neighborID);
|
|
1182
|
+
return {
|
|
1183
|
+
exist: connectArr.some(([_, id]) => id === neighborID),
|
|
1184
|
+
weight: found ? found[0] : undefined,
|
|
1185
|
+
type: found ? found[2] : undefined
|
|
1186
|
+
};
|
|
1187
|
+
}
|
|
1188
|
+
existPoint(pointID) {
|
|
1189
|
+
return { exist: this.points.has(pointID), connect: this.points.get(pointID)?.connect || [] };
|
|
1190
|
+
}
|
|
1191
|
+
deleteEdge(pointID, neighborID) {
|
|
1192
|
+
if (this.existEdge(pointID, neighborID).exist) {
|
|
1193
|
+
this.points.get(pointID).connect = this.points.get(pointID).connect.filter(([_, id]) => id !== neighborID);
|
|
1194
|
+
this.points.get(neighborID).connect = this.points.get(neighborID).connect.filter(([_, id]) => id !== pointID);
|
|
1195
|
+
}
|
|
1196
|
+
}
|
|
1197
|
+
deletePoint(pointID) {
|
|
1198
|
+
if (this.existPoint(pointID).exist) {
|
|
1199
|
+
this.points.delete(pointID);
|
|
1200
|
+
}
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
// 简易日志辅助
|
|
1204
|
+
function logPart(...args) { console.log('[PART]', ...args); }
|
|
1205
|
+
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
1206
|
+
|
|
1207
|
+
// 存储适配层(FS/LMDB/Level 多后端,按需加载)
|
|
1208
|
+
class GraphStorageAdapter {
|
|
1209
|
+
constructor({ baseDir, backend = 'fs' } = {}) {
|
|
1210
|
+
this.baseDir = baseDir || path.join(__dirname, 'graph_parts');
|
|
1211
|
+
this.backend = backend;
|
|
1212
|
+
this.ready = false;
|
|
1213
|
+
|
|
1214
|
+
// 尝试创建目录
|
|
1215
|
+
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
1216
|
+
|
|
1217
|
+
// 可选依赖
|
|
1218
|
+
this.lmdb = null;
|
|
1219
|
+
this.level = null;
|
|
1220
|
+
|
|
1221
|
+
if (backend === 'lmdb') {
|
|
1222
|
+
try {
|
|
1223
|
+
this.lmdb = require('lmdb');
|
|
1224
|
+
} catch (e) {
|
|
1225
|
+
console.warn('[PART][ADAPTER] LMDB 不可用,降级为 FS:', e.message);
|
|
1226
|
+
this.backend = 'fs';
|
|
1227
|
+
}
|
|
1228
|
+
}
|
|
1229
|
+
if (backend === 'level') {
|
|
1230
|
+
try {
|
|
1231
|
+
this.level = require('level');
|
|
1232
|
+
} catch (e) {
|
|
1233
|
+
console.warn('[PART][ADAPTER] level 不可用,降级为 FS:', e.message);
|
|
1234
|
+
this.backend = 'fs';
|
|
1235
|
+
}
|
|
1236
|
+
}
|
|
1237
|
+
|
|
1238
|
+
// 初始化后端
|
|
1239
|
+
this._initBackend();
|
|
1240
|
+
}
|
|
1241
|
+
|
|
1242
|
+
_initBackend() {
|
|
1243
|
+
if (this.backend === 'fs') {
|
|
1244
|
+
// FS: 每个分区一个 .jsonl(节点),边界事件一个独立 .jsonl
|
|
1245
|
+
this.ready = true;
|
|
1246
|
+
return;
|
|
1247
|
+
}
|
|
1248
|
+
if (this.backend === 'lmdb' && this.lmdb) {
|
|
1249
|
+
try {
|
|
1250
|
+
const storeDir = path.join(this.baseDir, 'lmdb');
|
|
1251
|
+
fs.mkdirSync(storeDir, { recursive: true });
|
|
1252
|
+
this.env = this.lmdb.open({
|
|
1253
|
+
path: storeDir,
|
|
1254
|
+
mapSize: 1024n * 1024n * 1024n * 64n,
|
|
1255
|
+
compression: true,
|
|
1256
|
+
});
|
|
1257
|
+
this.ready = true;
|
|
1258
|
+
} catch (e) {
|
|
1259
|
+
console.warn('[PART][ADAPTER] LMDB 初始化失败,降级 FS:', e.message);
|
|
1260
|
+
this.backend = 'fs';
|
|
1261
|
+
this.ready = true;
|
|
1262
|
+
}
|
|
1263
|
+
return;
|
|
1264
|
+
}
|
|
1265
|
+
if (this.backend === 'level' && this.level) {
|
|
1266
|
+
try {
|
|
1267
|
+
const dbDir = path.join(this.baseDir, 'leveldb');
|
|
1268
|
+
fs.mkdirSync(dbDir, { recursive: true });
|
|
1269
|
+
this.db = new this.level.Level(dbDir, { valueEncoding: 'json' });
|
|
1270
|
+
this.ready = true;
|
|
1271
|
+
} catch (e) {
|
|
1272
|
+
console.warn('[PART][ADAPTER] level 初始化失败,降级 FS:', e.message);
|
|
1273
|
+
this.backend = 'fs';
|
|
1274
|
+
this.ready = true;
|
|
1275
|
+
}
|
|
1276
|
+
return;
|
|
1277
|
+
}
|
|
1278
|
+
this.ready = true;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
// 分区文件名(FS)
|
|
1282
|
+
_partFile(pid) { return path.join(this.baseDir, `p_${pid}.jsonl`); }
|
|
1283
|
+
_eventFile(pid) { return path.join(this.baseDir, `p_${pid}.events.jsonl`); }
|
|
1284
|
+
|
|
1285
|
+
// 读取分区(返回 { points: Map<string,{pointID,connect:[]}> })
|
|
1286
|
+
async loadPartition(pid) {
|
|
1287
|
+
if (this.backend === 'fs') {
|
|
1288
|
+
const file = this._partFile(pid);
|
|
1289
|
+
const out = new Map();
|
|
1290
|
+
if (!fs.existsSync(file)) return { points: out };
|
|
1291
|
+
const rs = fs.createReadStream(file, { encoding: 'utf-8' });
|
|
1292
|
+
let buf = '';
|
|
1293
|
+
for await (const chunk of rs) {
|
|
1294
|
+
buf += chunk;
|
|
1295
|
+
let idx;
|
|
1296
|
+
while ((idx = buf.indexOf('\n')) >= 0) {
|
|
1297
|
+
const line = buf.slice(0, idx);
|
|
1298
|
+
buf = buf.slice(idx + 1);
|
|
1299
|
+
if (!line.trim()) continue;
|
|
1300
|
+
try {
|
|
1301
|
+
const obj = JSON.parse(line);
|
|
1302
|
+
if (obj && obj.pointID) {
|
|
1303
|
+
out.set(obj.pointID, { pointID: obj.pointID, connect: obj.connect || [] });
|
|
1304
|
+
}
|
|
1305
|
+
} catch { /* ignore */ }
|
|
1306
|
+
}
|
|
1307
|
+
}
|
|
1308
|
+
return { points: out };
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1312
|
+
const points = new Map();
|
|
1313
|
+
const txn = this.env.beginTxn({ readOnly: true });
|
|
1314
|
+
try {
|
|
1315
|
+
const cursor = new this.lmdb.Cursors.Cursor(txn, this.env.openDB({ name: `p_${pid}`, create: true }));
|
|
1316
|
+
for (let found = cursor.goToFirst(); found; found = cursor.goToNext()) {
|
|
1317
|
+
const key = cursor.getCurrentString();
|
|
1318
|
+
const val = cursor.getCurrentBinary();
|
|
1319
|
+
try {
|
|
1320
|
+
const obj = JSON.parse(Buffer.from(val).toString('utf-8'));
|
|
1321
|
+
if (obj && obj.pointID) points.set(obj.pointID, obj);
|
|
1322
|
+
} catch { }
|
|
1323
|
+
}
|
|
1324
|
+
cursor.close();
|
|
1325
|
+
} catch { }
|
|
1326
|
+
txn.abort();
|
|
1327
|
+
return { points };
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
if (this.backend === 'level' && this.db) {
|
|
1331
|
+
const points = new Map();
|
|
1332
|
+
try {
|
|
1333
|
+
for await (const { key, value } of this.db.iterator({ gte: `p:${pid}:`, lt: `p:${pid};` })) {
|
|
1334
|
+
const obj = value;
|
|
1335
|
+
if (obj && obj.pointID) points.set(obj.pointID, obj);
|
|
1336
|
+
}
|
|
1337
|
+
} catch { }
|
|
1338
|
+
return { points };
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
return { points: new Map() };
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
|
|
1345
|
+
|
|
1346
|
+
// 保存分区(全量覆盖写)
|
|
1347
|
+
async savePartition(pid, pointsMap) {
|
|
1348
|
+
if (!(pointsMap instanceof Map)) return;
|
|
1349
|
+
if (this.backend === 'fs') {
|
|
1350
|
+
const file = this._partFile(pid);
|
|
1351
|
+
const tmp = `${file}.tmp`;
|
|
1352
|
+
const ws = fs.createWriteStream(tmp, { encoding: 'utf-8' });
|
|
1353
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1354
|
+
ws.write(JSON.stringify({ pointID: p.pointID, connect: p.connect || [] }) + '\n');
|
|
1355
|
+
}
|
|
1356
|
+
await new Promise((res, rej) => ws.end(res));
|
|
1357
|
+
await fs.promises.rename(tmp, file);
|
|
1358
|
+
return;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1362
|
+
const dbi = this.env.openDB({ name: `p_${pid}`, create: true });
|
|
1363
|
+
const txn = this.env.beginTxn();
|
|
1364
|
+
try {
|
|
1365
|
+
// 先清空:简化实现
|
|
1366
|
+
const cur = new this.lmdb.Cursors.Cursor(txn, dbi);
|
|
1367
|
+
for (let found = cur.goToFirst(); found; found = cur.goToNext()) {
|
|
1368
|
+
const k = cur.getCurrentString();
|
|
1369
|
+
txn.del(dbi, k);
|
|
1370
|
+
}
|
|
1371
|
+
cur.close();
|
|
1372
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1373
|
+
txn.put(dbi, p.pointID, JSON.stringify(p));
|
|
1374
|
+
}
|
|
1375
|
+
txn.commit();
|
|
1376
|
+
} catch (e) {
|
|
1377
|
+
try { txn.abort(); } catch { }
|
|
1378
|
+
console.warn('[PART][ADAPTER][LMDB] savePartition err:', e.message);
|
|
1379
|
+
}
|
|
1380
|
+
return;
|
|
1381
|
+
}
|
|
1382
|
+
|
|
1383
|
+
if (this.backend === 'level' && this.db) {
|
|
1384
|
+
const ops = [];
|
|
1385
|
+
// 简化:清理旧 key 不容易,直接覆盖同 key
|
|
1386
|
+
for (const [, p] of pointsMap.entries()) {
|
|
1387
|
+
ops.push({ type: 'put', key: `p:${pid}:${p.pointID}`, value: p });
|
|
1388
|
+
}
|
|
1389
|
+
await this.db.batch(ops);
|
|
1390
|
+
return;
|
|
1391
|
+
}
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
// 追加边界事件(跨分区边)
|
|
1395
|
+
async appendEdgeEvent(pid, event) {
|
|
1396
|
+
if (!event || !event.type) return;
|
|
1397
|
+
if (this.backend === 'fs') {
|
|
1398
|
+
const file = this._eventFile(pid);
|
|
1399
|
+
fs.appendFileSync(file, JSON.stringify(event) + '\n', 'utf-8');
|
|
1400
|
+
return;
|
|
1401
|
+
}
|
|
1402
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1403
|
+
const dbi = this.env.openDB({ name: `e_${pid}`, create: true });
|
|
1404
|
+
const txn = this.env.beginTxn();
|
|
1405
|
+
try {
|
|
1406
|
+
const key = `e:${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
|
|
1407
|
+
txn.put(dbi, key, JSON.stringify(event));
|
|
1408
|
+
txn.commit();
|
|
1409
|
+
} catch (e) {
|
|
1410
|
+
try { txn.abort(); } catch { }
|
|
1411
|
+
}
|
|
1412
|
+
return;
|
|
1413
|
+
}
|
|
1414
|
+
if (this.backend === 'level' && this.db) {
|
|
1415
|
+
const key = `e:${pid}:${Date.now()}_${Math.random().toString(36).slice(2, 10)}`;
|
|
1416
|
+
await this.db.put(key, event);
|
|
1417
|
+
return;
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
// 读取并消费边界事件(与该分区相关的)
|
|
1422
|
+
async consumeEdgeEvents(pid, filterFn = null, limit = 2000) {
|
|
1423
|
+
const events = [];
|
|
1424
|
+
if (this.backend === 'fs') {
|
|
1425
|
+
const file = this._eventFile(pid);
|
|
1426
|
+
if (!fs.existsSync(file)) return events;
|
|
1427
|
+
|
|
1428
|
+
const tmp = `${file}.tmp`;
|
|
1429
|
+
// 将不消费的事件写入 tmp,再覆盖原文件;已消费事件返回
|
|
1430
|
+
const lines = fs.readFileSync(file, 'utf-8').split(/\r?\n/).filter(Boolean);
|
|
1431
|
+
const remain = [];
|
|
1432
|
+
for (const line of lines) {
|
|
1433
|
+
try {
|
|
1434
|
+
const e = JSON.parse(line);
|
|
1435
|
+
const ok = filterFn ? filterFn(e) : true;
|
|
1436
|
+
if (ok && events.length < limit) {
|
|
1437
|
+
events.push(e);
|
|
1438
|
+
} else {
|
|
1439
|
+
remain.push(line);
|
|
1440
|
+
}
|
|
1441
|
+
} catch {
|
|
1442
|
+
remain.push(line);
|
|
1443
|
+
}
|
|
1444
|
+
}
|
|
1445
|
+
fs.writeFileSync(tmp, remain.join('\n') + (remain.length ? '\n' : ''), 'utf-8');
|
|
1446
|
+
await fs.promises.rename(tmp, file);
|
|
1447
|
+
return events;
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
if (this.backend === 'lmdb' && this.env) {
|
|
1451
|
+
const dbi = this.env.openDB({ name: `e_${pid}`, create: true });
|
|
1452
|
+
const txn = this.env.beginTxn();
|
|
1453
|
+
const toDel = [];
|
|
1454
|
+
try {
|
|
1455
|
+
const cur = new this.lmdb.Cursors.Cursor(txn, dbi);
|
|
1456
|
+
for (let found = cur.goToFirst(); found; found = cur.goToNext()) {
|
|
1457
|
+
const k = cur.getCurrentString();
|
|
1458
|
+
const v = cur.getCurrentBinary();
|
|
1459
|
+
const e = JSON.parse(Buffer.from(v).toString('utf-8'));
|
|
1460
|
+
const ok = filterFn ? filterFn(e) : true;
|
|
1461
|
+
if (ok && events.length < limit) {
|
|
1462
|
+
events.push(e);
|
|
1463
|
+
toDel.push(k);
|
|
1464
|
+
}
|
|
1465
|
+
}
|
|
1466
|
+
cur.close();
|
|
1467
|
+
for (const k of toDel) txn.del(dbi, k);
|
|
1468
|
+
txn.commit();
|
|
1469
|
+
} catch (e) {
|
|
1470
|
+
try { txn.abort(); } catch { }
|
|
1471
|
+
}
|
|
1472
|
+
return events;
|
|
1473
|
+
}
|
|
1474
|
+
|
|
1475
|
+
if (this.backend === 'level' && this.db) {
|
|
1476
|
+
// 简化:扫描全库 keys 读取该 pid 的事件
|
|
1477
|
+
try {
|
|
1478
|
+
const toDel = [];
|
|
1479
|
+
for await (const { key, value } of this.db.iterator({ gte: `e:${pid}:`, lt: `e:${pid};` })) {
|
|
1480
|
+
const ok = filterFn ? filterFn(value) : true;
|
|
1481
|
+
if (ok && events.length < limit) {
|
|
1482
|
+
events.push(value);
|
|
1483
|
+
toDel.push(key);
|
|
1484
|
+
}
|
|
1485
|
+
}
|
|
1486
|
+
// 删除已消费
|
|
1487
|
+
const ops = toDel.map(k => ({ type: 'del', key: k }));
|
|
1488
|
+
if (ops.length) await this.db.batch(ops);
|
|
1489
|
+
} catch { }
|
|
1490
|
+
return events;
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
return events;
|
|
1494
|
+
}
|
|
1495
|
+
|
|
1496
|
+
// 枚举所有分区 ID(FS 模式)
|
|
1497
|
+
async listPartitionIds() {
|
|
1498
|
+
if (this.backend === 'fs') {
|
|
1499
|
+
const files = fs.readdirSync(this.baseDir).filter(f => /^p_\d+\.jsonl$/.test(f));
|
|
1500
|
+
const ids = files.map(f => Number(f.match(/^p_(\d+)\.jsonl$/)[1])).sort((a, b) => a - b);
|
|
1501
|
+
return ids;
|
|
1502
|
+
}
|
|
1503
|
+
// LMDB/level 不易列举,约定 0..N-1 尝试加载
|
|
1504
|
+
return [];
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
// 分区器(哈希 -> 分区ID)
|
|
1509
|
+
class GraphPartitioner {
|
|
1510
|
+
constructor({ partitions = 64 } = {}) {
|
|
1511
|
+
this.partitions = Math.max(4, partitions);
|
|
1512
|
+
}
|
|
1513
|
+
idOf(pointID) {
|
|
1514
|
+
if (!pointID) return 0;
|
|
1515
|
+
const h = crypto.createHash('sha1').update(String(pointID)).digest();
|
|
1516
|
+
// 使用前 4 字节构造 uint32
|
|
1517
|
+
const u32 = h.readUInt32BE(0);
|
|
1518
|
+
return u32 % this.partitions;
|
|
1519
|
+
}
|
|
1520
|
+
neighborsOf(pid, radius = 1) {
|
|
1521
|
+
const out = new Set([pid]);
|
|
1522
|
+
for (let r = 1; r <= radius; r++) {
|
|
1523
|
+
out.add((pid - r + this.partitions) % this.partitions);
|
|
1524
|
+
out.add((pid + r) % this.partitions);
|
|
1525
|
+
}
|
|
1526
|
+
return Array.from(out).sort((a, b) => a - b);
|
|
1527
|
+
}
|
|
1528
|
+
}
|
|
1529
|
+
// 分区图 + 滑动窗口 + 边界事件消费
|
|
1530
|
+
class PartitionedGraphDB {
|
|
1531
|
+
constructor({
|
|
1532
|
+
partitions = 64,
|
|
1533
|
+
maxLoadedPartitions = 8,
|
|
1534
|
+
windowRadius = 1,
|
|
1535
|
+
baseDir = path.join(__dirname, 'graph_parts'),
|
|
1536
|
+
backend = 'fs'
|
|
1537
|
+
} = {}) {
|
|
1538
|
+
this.partitioner = new GraphPartitioner({ partitions });
|
|
1539
|
+
this.adapter = new GraphStorageAdapter({ baseDir, backend });
|
|
1540
|
+
this.maxLoadedPartitions = Math.max(2, maxLoadedPartitions);
|
|
1541
|
+
this.windowRadius = Math.max(0, windowRadius);
|
|
1542
|
+
|
|
1543
|
+
// 已加载分区:pid -> { points: Map, dirty, lastAccess }
|
|
1544
|
+
this.loaded = new Map();
|
|
1545
|
+
// 兼容旧代码:合并视图(仅包含已加载分区的点)
|
|
1546
|
+
this.points = new Map();
|
|
1547
|
+
// LRU
|
|
1548
|
+
this.accessTick = 0;
|
|
1549
|
+
this.centerPid = null;
|
|
1550
|
+
|
|
1551
|
+
// 并发保护
|
|
1552
|
+
this.loading = new Set();
|
|
1553
|
+
}
|
|
1554
|
+
|
|
1555
|
+
// ---------- 内部:加载/保存/淘汰 ----------
|
|
1556
|
+
async ensureLoaded(pid) {
|
|
1557
|
+
if (this.loaded.has(pid)) {
|
|
1558
|
+
this._touch(pid);
|
|
1559
|
+
return this.loaded.get(pid);
|
|
1560
|
+
}
|
|
1561
|
+
if (this.loading.has(pid)) {
|
|
1562
|
+
// 等待已有加载完成
|
|
1563
|
+
while (this.loading.has(pid)) { await sleep(10); }
|
|
1564
|
+
return this.loaded.get(pid);
|
|
1565
|
+
}
|
|
1566
|
+
this.loading.add(pid);
|
|
1567
|
+
try {
|
|
1568
|
+
const part = await this.adapter.loadPartition(pid);
|
|
1569
|
+
const bundle = {
|
|
1570
|
+
points: part.points || new Map(),
|
|
1571
|
+
dirty: false,
|
|
1572
|
+
lastAccess: ++this.accessTick
|
|
1573
|
+
};
|
|
1574
|
+
this.loaded.set(pid, bundle);
|
|
1575
|
+
// 合并到全局视图
|
|
1576
|
+
for (const [id, p] of bundle.points.entries()) this.points.set(id, p);
|
|
1577
|
+
|
|
1578
|
+
// 消费边界事件:把指向本分区的事件落库
|
|
1579
|
+
const events = await this.adapter.consumeEdgeEvents(pid, (e) =>
|
|
1580
|
+
e && e.type === 'cross-edge' && (e.toPid === pid || e.fromPid === pid), 5000);
|
|
1581
|
+
if (events.length) {
|
|
1582
|
+
for (const e of events) this._applyEdgeEvent(bundle, e);
|
|
1583
|
+
bundle.dirty = true;
|
|
1584
|
+
}
|
|
1585
|
+
|
|
1586
|
+
// 控制内存:若超容量,执行淘汰
|
|
1587
|
+
await this._evictIfNeeded();
|
|
1588
|
+
return bundle;
|
|
1589
|
+
} finally {
|
|
1590
|
+
this.loading.delete(pid);
|
|
1591
|
+
}
|
|
1592
|
+
}
|
|
1593
|
+
|
|
1594
|
+
async savePartitionIfDirty(pid) {
|
|
1595
|
+
const entry = this.loaded.get(pid);
|
|
1596
|
+
if (!entry) return;
|
|
1597
|
+
if (!entry.dirty) return;
|
|
1598
|
+
await this.adapter.savePartition(pid, entry.points);
|
|
1599
|
+
entry.dirty = false;
|
|
1600
|
+
}
|
|
1601
|
+
|
|
1602
|
+
async _evictIfNeeded() {
|
|
1603
|
+
if (this.loaded.size <= this.maxLoadedPartitions) return;
|
|
1604
|
+
// 淘汰最近最少访问的分区(除中心窗口)
|
|
1605
|
+
const avoid = new Set(this.partitioner.neighborsOf(this.centerPid ?? 0, this.windowRadius));
|
|
1606
|
+
// 构建按 lastAccess 升序
|
|
1607
|
+
const list = Array.from(this.loaded.entries())
|
|
1608
|
+
.filter(([pid]) => !avoid.has(pid))
|
|
1609
|
+
.sort((a, b) => a[1].lastAccess - b[1].lastAccess);
|
|
1610
|
+
while (this.loaded.size > this.maxLoadedPartitions && list.length) {
|
|
1611
|
+
const [pid, entry] = list.shift();
|
|
1612
|
+
await this.savePartitionIfDirty(pid);
|
|
1613
|
+
// 从全局视图移除
|
|
1614
|
+
for (const [id] of entry.points.entries()) this.points.delete(id);
|
|
1615
|
+
this.loaded.delete(pid);
|
|
1616
|
+
logPart('evicted partition', pid);
|
|
1617
|
+
}
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
_touch(pid) {
|
|
1621
|
+
const entry = this.loaded.get(pid);
|
|
1622
|
+
if (entry) entry.lastAccess = ++this.accessTick;
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
_applyEdgeEvent(targetBundle, e) {
|
|
1626
|
+
// 事件格式:{ type:'cross-edge', from:'id', to:'id', weight, direction, fromPid, toPid }
|
|
1627
|
+
if (!e || e.type !== 'cross-edge') return;
|
|
1628
|
+
const ensurePoint = (m, id) => {
|
|
1629
|
+
if (!m.has(id)) m.set(id, { pointID: id, connect: [] });
|
|
1630
|
+
return m.get(id);
|
|
1631
|
+
};
|
|
1632
|
+
const mp = targetBundle.points;
|
|
1633
|
+
const pFrom = ensurePoint(mp, e.from);
|
|
1634
|
+
const pTo = ensurePoint(mp, e.to);
|
|
1635
|
+
// 在 from 中落边(若 from 属于本分区)
|
|
1636
|
+
if (e.toPid === e.fromPid) {
|
|
1637
|
+
// 同分区事件(理论上不会在事件日志里)
|
|
1638
|
+
if (!pFrom.connect.some(([w, id, d]) => id === e.to && d === e.direction)) {
|
|
1639
|
+
pFrom.connect.push([e.weight, e.to, e.direction]);
|
|
1640
|
+
}
|
|
1641
|
+
} else {
|
|
1642
|
+
// 当前 bundle 即为 toPid 或 fromPid 的载体
|
|
1643
|
+
if (e.toPid === this.partitioner.idOf(pTo.pointID)) {
|
|
1644
|
+
// 对于目标分区,至少要保证可被 selectPath 遍历;保留边终点即可(可选:反向提示边)
|
|
1645
|
+
// 不在 pTo 里写边(避免双写),仅保证 from 的边会在 from 分区生效
|
|
1646
|
+
}
|
|
1647
|
+
if (e.fromPid === this.partitioner.idOf(pFrom.pointID)) {
|
|
1648
|
+
if (!pFrom.connect.some(([w, id, d]) => id === e.to && d === e.direction)) {
|
|
1649
|
+
pFrom.connect.push([e.weight, e.to, e.direction]);
|
|
1650
|
+
}
|
|
1651
|
+
}
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
// ---------- 滑动窗口 ----------
|
|
1656
|
+
async focusOnPoint(pointID) {
|
|
1657
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1658
|
+
this.centerPid = pid;
|
|
1659
|
+
const toLoad = this.partitioner.neighborsOf(pid, this.windowRadius);
|
|
1660
|
+
for (const id of toLoad) await this.ensureLoaded(id);
|
|
1661
|
+
await this._evictIfNeeded();
|
|
1662
|
+
}
|
|
1663
|
+
|
|
1664
|
+
// ---------- 兼容 API:点/边 操作 ----------
|
|
1665
|
+
addPoint(pointID, connect = []) {
|
|
1666
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1667
|
+
const ensure = (bundle) => {
|
|
1668
|
+
if (!bundle.points.has(pointID)) bundle.points.set(pointID, { pointID, connect: [] });
|
|
1669
|
+
this.points.set(pointID, bundle.points.get(pointID));
|
|
1670
|
+
return bundle.points.get(pointID);
|
|
1671
|
+
};
|
|
1672
|
+
return this.ensureLoaded(pid).then(bundle => {
|
|
1673
|
+
const p = ensure(bundle);
|
|
1674
|
+
// 添加本地边;跨分区写事件
|
|
1675
|
+
for (const [w, nid, dir] of connect) this._addEdgeInternal(pid, p, w, nid, dir, bundle);
|
|
1676
|
+
bundle.dirty = true;
|
|
1677
|
+
});
|
|
1678
|
+
}
|
|
1679
|
+
|
|
1680
|
+
_addEdgeInternal(fromPid, fromPoint, weight, toID, direction, bundleOfFrom) {
|
|
1681
|
+
const toPid = this.partitioner.idOf(toID);
|
|
1682
|
+
const w = (typeof weight === 'number' && isFinite(weight)) ? weight : 1;
|
|
1683
|
+
const d = (direction === 0 || direction === 1 || direction === 2) ? direction : 0;
|
|
1684
|
+
|
|
1685
|
+
if (toPid === fromPid) {
|
|
1686
|
+
// 同分区直接写
|
|
1687
|
+
if (!fromPoint.connect.some(([ww, id, dd]) => id === toID && dd === d)) {
|
|
1688
|
+
fromPoint.connect.push([w, toID, d]);
|
|
1689
|
+
bundleOfFrom.dirty = true;
|
|
1690
|
+
}
|
|
1691
|
+
} else {
|
|
1692
|
+
// 跨分区 -> 记录边界事件至 fromPid(或 toPid 都可,这里记录到 fromPid,toPid 加载时也会消费相关事件)
|
|
1693
|
+
this.adapter.appendEdgeEvent(fromPid, {
|
|
1694
|
+
type: 'cross-edge',
|
|
1695
|
+
from: fromPoint.pointID,
|
|
1696
|
+
to: toID,
|
|
1697
|
+
weight: w,
|
|
1698
|
+
direction: d,
|
|
1699
|
+
fromPid,
|
|
1700
|
+
toPid
|
|
1701
|
+
});
|
|
1702
|
+
// 同时对“已加载且包含 toPid 的 bundle”进行即时应用(若存在)
|
|
1703
|
+
const toBundle = this.loaded.get(toPid);
|
|
1704
|
+
if (toBundle) {
|
|
1705
|
+
// 在 from 分区已经写入 from->to 事件;对于 to 分区无需写边(避免双写),可选择记录提示(此处略)
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
}
|
|
1709
|
+
|
|
1710
|
+
addBidirectionalEdge(id1, id2, weight = 1) {
|
|
1711
|
+
return this.addEdge(id1, id2, weight, 0);
|
|
1712
|
+
}
|
|
744
1713
|
|
|
745
|
-
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
754
|
-
|
|
1714
|
+
async addEdge(fromID, toID, weight = 1, direction = 0) {
|
|
1715
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1716
|
+
const fromBundle = await this.ensureLoaded(fromPid);
|
|
1717
|
+
if (!fromBundle.points.has(fromID)) {
|
|
1718
|
+
fromBundle.points.set(fromID, { pointID: fromID, connect: [] });
|
|
1719
|
+
this.points.set(fromID, fromBundle.points.get(fromID));
|
|
1720
|
+
}
|
|
1721
|
+
const fromPoint = fromBundle.points.get(fromID);
|
|
1722
|
+
this._addEdgeInternal(fromPid, fromPoint, weight, toID, direction, fromBundle);
|
|
1723
|
+
|
|
1724
|
+
if (direction === 0) {
|
|
1725
|
+
// 双向边:反向写入
|
|
1726
|
+
const toPid = this.partitioner.idOf(toID);
|
|
1727
|
+
const toBundle = await this.ensureLoaded(toPid);
|
|
1728
|
+
if (!toBundle.points.has(toID)) {
|
|
1729
|
+
toBundle.points.set(toID, { pointID: toID, connect: [] });
|
|
1730
|
+
this.points.set(toID, toBundle.points.get(toID));
|
|
755
1731
|
}
|
|
1732
|
+
const toPoint = toBundle.points.get(toID);
|
|
1733
|
+
this._addEdgeInternal(toPid, toPoint, weight, fromID, 0, toBundle);
|
|
1734
|
+
}
|
|
1735
|
+
}
|
|
756
1736
|
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
1737
|
+
async updateEdge(fromID, toID, newWeight, direction = 0) {
|
|
1738
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1739
|
+
const b = await this.ensureLoaded(fromPid);
|
|
1740
|
+
const p = b.points.get(fromID);
|
|
1741
|
+
if (!p) return;
|
|
1742
|
+
const idx = p.connect.findIndex(([w, id, d]) => id === toID && d === direction);
|
|
1743
|
+
if (idx >= 0) {
|
|
1744
|
+
p.connect[idx][0] = newWeight;
|
|
1745
|
+
b.dirty = true;
|
|
1746
|
+
} else {
|
|
1747
|
+
// 不存在则添加
|
|
1748
|
+
this._addEdgeInternal(fromPid, p, newWeight, toID, direction, b);
|
|
1749
|
+
}
|
|
1750
|
+
}
|
|
762
1751
|
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
1752
|
+
existEdge(fromID, toID) {
|
|
1753
|
+
const fromPid = this.partitioner.idOf(fromID);
|
|
1754
|
+
const entry = this.loaded.get(fromPid);
|
|
1755
|
+
if (!entry) return { exist: false, weight: undefined, type: undefined };
|
|
1756
|
+
const p = entry.points.get(fromID);
|
|
1757
|
+
if (!p) return { exist: false, weight: undefined, type: undefined };
|
|
1758
|
+
const found = p.connect.find(([w, id]) => id === toID);
|
|
1759
|
+
return { exist: !!found, weight: found ? found[0] : undefined, type: found ? found[2] : undefined };
|
|
1760
|
+
}
|
|
769
1761
|
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
1762
|
+
existPoint(pointID) {
|
|
1763
|
+
// 仅检查已加载窗口
|
|
1764
|
+
const p = this.points.get(pointID);
|
|
1765
|
+
return { exist: !!p, connect: p ? p.connect : [] };
|
|
1766
|
+
}
|
|
1767
|
+
|
|
1768
|
+
deleteEdge(a, b) {
|
|
1769
|
+
const pid = this.partitioner.idOf(a);
|
|
1770
|
+
const entry = this.loaded.get(pid);
|
|
1771
|
+
if (!entry) return;
|
|
1772
|
+
const p = entry.points.get(a);
|
|
1773
|
+
if (!p) return;
|
|
1774
|
+
const before = p.connect.length;
|
|
1775
|
+
p.connect = p.connect.filter(([_, id]) => id !== b);
|
|
1776
|
+
entry.dirty = entry.dirty || (p.connect.length !== before);
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
deletePoint(pointID) {
|
|
1780
|
+
const pid = this.partitioner.idOf(pointID);
|
|
1781
|
+
const entry = this.loaded.get(pid);
|
|
1782
|
+
if (!entry) return;
|
|
1783
|
+
if (entry.points.has(pointID)) {
|
|
1784
|
+
entry.points.delete(pointID);
|
|
1785
|
+
this.points.delete(pointID);
|
|
1786
|
+
entry.dirty = true;
|
|
774
1787
|
}
|
|
775
|
-
return null;
|
|
776
1788
|
}
|
|
777
1789
|
|
|
778
|
-
//
|
|
1790
|
+
// 仅遍历窗口内点(兼容旧 getAllPoints 调用)
|
|
779
1791
|
getAllPoints() {
|
|
780
1792
|
return Array.from(this.points.values());
|
|
781
1793
|
}
|
|
782
1794
|
|
|
783
|
-
//
|
|
784
|
-
|
|
785
|
-
|
|
1795
|
+
// 导出全量点(跨所有分区),用于快照/发布
|
|
1796
|
+
async exportAllPoints() {
|
|
1797
|
+
const out = [];
|
|
1798
|
+
// 尝试枚举 FS 分区;其他后端可按 0..N-1 遍历或仅导出已加载窗口
|
|
1799
|
+
const ids = await this.adapter.listPartitionIds();
|
|
1800
|
+
if (ids.length === 0) {
|
|
1801
|
+
// 回退:导出窗口
|
|
1802
|
+
return this.getAllPoints();
|
|
1803
|
+
}
|
|
1804
|
+
for (const pid of ids) {
|
|
1805
|
+
const part = await this.adapter.loadPartition(pid);
|
|
1806
|
+
for (const [, p] of part.points.entries()) out.push({ pointID: p.pointID, connect: p.connect || [] });
|
|
1807
|
+
}
|
|
1808
|
+
return out;
|
|
786
1809
|
}
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
1810
|
+
|
|
1811
|
+
// 批量导入(将 legacy 点集落到分区)
|
|
1812
|
+
async importAllPoints(pointsArr) {
|
|
1813
|
+
if (!Array.isArray(pointsArr)) return;
|
|
1814
|
+
// 分桶
|
|
1815
|
+
const buckets = new Map();
|
|
1816
|
+
for (const p of pointsArr) {
|
|
1817
|
+
const pid = this.partitioner.idOf(p.pointID);
|
|
1818
|
+
if (!buckets.has(pid)) buckets.set(pid, new Map());
|
|
1819
|
+
const bm = buckets.get(pid);
|
|
1820
|
+
bm.set(p.pointID, { pointID: p.pointID, connect: Array.isArray(p.connect) ? p.connect.slice() : [] });
|
|
1821
|
+
}
|
|
1822
|
+
// 写入并更新窗口视图(懒加载)
|
|
1823
|
+
for (const [pid, map] of buckets.entries()) {
|
|
1824
|
+
await this.adapter.savePartition(pid, map);
|
|
1825
|
+
// 若已加载该分区,刷新内存镜像
|
|
1826
|
+
if (this.loaded.has(pid)) {
|
|
1827
|
+
const entry = this.loaded.get(pid);
|
|
1828
|
+
// 从全局视图移除旧
|
|
1829
|
+
for (const [id] of entry.points.entries()) this.points.delete(id);
|
|
1830
|
+
entry.points = map;
|
|
1831
|
+
entry.dirty = false;
|
|
1832
|
+
entry.lastAccess = ++this.accessTick;
|
|
1833
|
+
for (const [id, p] of map.entries()) this.points.set(id, p);
|
|
1834
|
+
}
|
|
791
1835
|
}
|
|
792
|
-
const connectArr = point.connect || [];
|
|
793
|
-
const found = connectArr.find(([_, id]) => id === neighborID);
|
|
794
|
-
return {
|
|
795
|
-
exist: connectArr.some(([_, id]) => id === neighborID),
|
|
796
|
-
weight: found ? found[0] : undefined,
|
|
797
|
-
type: found ? found[2] : undefined
|
|
798
|
-
};
|
|
799
1836
|
}
|
|
800
|
-
|
|
801
|
-
|
|
1837
|
+
|
|
1838
|
+
// 聚合邻居(窗口内),供传播使用
|
|
1839
|
+
getNeighbors(pointID, maxNeighbors = 50) {
|
|
1840
|
+
const p = this.points.get(pointID);
|
|
1841
|
+
if (!p) return [];
|
|
1842
|
+
return p.connect.slice(0, maxNeighbors);
|
|
802
1843
|
}
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
1844
|
+
|
|
1845
|
+
// A* 简化:仅在窗口内搜索;跳出窗口时,尝试预取邻接分区后再继续
|
|
1846
|
+
async selectPath(fromID, toID) {
|
|
1847
|
+
if (fromID === toID) return [fromID];
|
|
1848
|
+
// 优先保证焦点加载
|
|
1849
|
+
await this.focusOnPoint(fromID);
|
|
1850
|
+
|
|
1851
|
+
const reconstruct = (came, cur) => {
|
|
1852
|
+
const path = [];
|
|
1853
|
+
let t = cur;
|
|
1854
|
+
while (came.has(t)) { path.push(t); t = came.get(t); }
|
|
1855
|
+
path.push(fromID);
|
|
1856
|
+
return path.reverse();
|
|
1857
|
+
};
|
|
1858
|
+
|
|
1859
|
+
const open = new Set([fromID]);
|
|
1860
|
+
const came = new Map();
|
|
1861
|
+
const g = new Map([[fromID, 0]]);
|
|
1862
|
+
const f = new Map([[fromID, 1]]);
|
|
1863
|
+
const closed = new Set();
|
|
1864
|
+
|
|
1865
|
+
const heuristic = () => 1;
|
|
1866
|
+
let iter = 0;
|
|
1867
|
+
const MAX_ITERS = 5000;
|
|
1868
|
+
|
|
1869
|
+
while (open.size && iter++ < MAX_ITERS) {
|
|
1870
|
+
// 取 f 最小
|
|
1871
|
+
let cur = null; let minF = Infinity;
|
|
1872
|
+
for (const id of open) {
|
|
1873
|
+
const val = f.get(id) ?? Infinity;
|
|
1874
|
+
if (val < minF) { minF = val; cur = id; }
|
|
1875
|
+
}
|
|
1876
|
+
if (cur == null) break;
|
|
1877
|
+
if (cur === toID) return reconstruct(came, cur);
|
|
1878
|
+
|
|
1879
|
+
open.delete(cur);
|
|
1880
|
+
closed.add(cur);
|
|
1881
|
+
|
|
1882
|
+
// 若遇到未知点,尝试加载其分区(滑动窗口)
|
|
1883
|
+
if (!this.points.has(cur)) {
|
|
1884
|
+
await this.focusOnPoint(cur);
|
|
1885
|
+
}
|
|
1886
|
+
|
|
1887
|
+
const neighbors = this.getNeighbors(cur, 50);
|
|
1888
|
+
// 如果邻居为空,尝试边界事件预取(根据邻居 ID 的分区预取)
|
|
1889
|
+
if (neighbors.length === 0) {
|
|
1890
|
+
const pid = this.partitioner.idOf(cur);
|
|
1891
|
+
const ring = this.partitioner.neighborsOf(pid, 1);
|
|
1892
|
+
for (const rid of ring) await this.ensureLoaded(rid);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
1895
|
+
for (const [w, nb] of neighbors) {
|
|
1896
|
+
if (closed.has(nb)) continue;
|
|
1897
|
+
const tentative = (g.get(cur) || Infinity) + w;
|
|
1898
|
+
if (!open.has(nb)) open.add(nb);
|
|
1899
|
+
else if (tentative >= (g.get(nb) || Infinity)) continue;
|
|
1900
|
+
|
|
1901
|
+
came.set(nb, cur);
|
|
1902
|
+
g.set(nb, tentative);
|
|
1903
|
+
f.set(nb, tentative + heuristic());
|
|
1904
|
+
}
|
|
807
1905
|
}
|
|
1906
|
+
return null;
|
|
808
1907
|
}
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
1908
|
+
|
|
1909
|
+
// 刷盘所有已加载分区
|
|
1910
|
+
async flushAll() {
|
|
1911
|
+
for (const [pid] of this.loaded.entries()) await this.savePartitionIfDirty(pid);
|
|
813
1912
|
}
|
|
814
1913
|
}
|
|
815
|
-
|
|
816
1914
|
class KVM {
|
|
817
1915
|
constructor() {
|
|
818
1916
|
this.memory = new Map();
|
|
@@ -910,14 +2008,22 @@ class Runtime {
|
|
|
910
2008
|
// 运行时负责AI核心的调度、模因转换、信号传递与主流程控制
|
|
911
2009
|
constructor(config = {}) {
|
|
912
2010
|
this.config = config;
|
|
913
|
-
|
|
2011
|
+
// 使用分区图作为模因图;词图仍用内存图
|
|
2012
|
+
this.graph = new PartitionedGraphDB({
|
|
2013
|
+
partitions: this.config.partitions || 64,
|
|
2014
|
+
maxLoadedPartitions: this.config.maxLoadedPartitions || 8,
|
|
2015
|
+
windowRadius: this.config.windowRadius || 1,
|
|
2016
|
+
baseDir: path.join(__dirname, 'graph_parts'),
|
|
2017
|
+
backend: this.config.graphBackend || 'lmdb' // 可选 'fs' | 'lmdb' | 'level'
|
|
2018
|
+
});
|
|
914
2019
|
this.wordGraph = new GraphDB();
|
|
915
2020
|
this.kvm = new KVM();
|
|
916
2021
|
|
|
917
2022
|
this.transformer = null;
|
|
918
2023
|
this.vocabManager = global.vocabmanager;
|
|
919
2024
|
this.spider = new Spider();
|
|
920
|
-
|
|
2025
|
+
// 新:按需检索器(基于全局爬虫)
|
|
2026
|
+
this.researcher = new OnlineResearcher(this);
|
|
921
2027
|
// 新:以“会话”为尺度的访问日志与会话管理
|
|
922
2028
|
this.session = new SessionManager({
|
|
923
2029
|
idleMs: this.config.sessionIdleMs || 10 * 60 * 1000,
|
|
@@ -925,7 +2031,7 @@ class Runtime {
|
|
|
925
2031
|
});
|
|
926
2032
|
// Map<word, Map<sessionId, count>>
|
|
927
2033
|
this.wordAccessLog = new Map();
|
|
928
|
-
|
|
2034
|
+
this.config.spiderMix = this.config.spiderMix || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
929
2035
|
this.initWordGraph();
|
|
930
2036
|
this.forgetTimer = setInterval(() => this.forgetWords(), 350 * 1000);
|
|
931
2037
|
this.MAX_MEME_WORDS = 100;
|
|
@@ -939,12 +2045,61 @@ class Runtime {
|
|
|
939
2045
|
batchSizeMultiplier: 1
|
|
940
2046
|
};
|
|
941
2047
|
this.memeBarrier = new memeBarrier(this);
|
|
2048
|
+
}
|
|
2049
|
+
// 新增:应用可调参数(含 spiderMix / decayK / maxLen 等)
|
|
2050
|
+
applyTunableParams(partial = {}) {
|
|
2051
|
+
this.config = this.config || {};
|
|
2052
|
+
if (partial.spiderMix) {
|
|
2053
|
+
const ow = Math.max(0, Math.min(1, Number(partial.spiderMix.onlineWeight ?? this.config.spiderMix.onlineWeight ?? 0.5)));
|
|
2054
|
+
this.config.spiderMix = { onlineWeight: ow, offlineWeight: Math.max(0, Math.min(1, 1 - ow)) };
|
|
2055
|
+
}
|
|
2056
|
+
if (typeof partial.decayK === 'number') this.config.decayK = Math.max(0.1, Math.min(2.0, partial.decayK));
|
|
2057
|
+
if (typeof partial.maxLen === 'number') this.config.maxLen = Math.max(8, Math.min(64, Math.round(partial.maxLen)));
|
|
2058
|
+
if (typeof partial.edgeWeight === 'number') {
|
|
2059
|
+
for (const p of this.graph.getAllPoints()) for (const e of p.connect) e[0] = Math.max(0.1, Math.min(5, partial.edgeWeight));
|
|
2060
|
+
}
|
|
2061
|
+
// 可选:调节 crawler 抓取强度(若存在)
|
|
2062
|
+
if (global.__crawler) {
|
|
2063
|
+
if (typeof partial.perQuery === 'number') global.__crawler.__tune_perQuery = Math.max(2, Math.min(16, Math.round(partial.perQuery)));
|
|
2064
|
+
if (typeof partial.maxCrawl === 'number') global.__crawler.__tune_maxCrawl = Math.max(2, Math.min(24, Math.round(partial.maxCrawl)));
|
|
2065
|
+
}
|
|
2066
|
+
return {
|
|
2067
|
+
decayK: this.config.decayK,
|
|
2068
|
+
maxLen: this.config.maxLen,
|
|
2069
|
+
spiderMix: this.config.spiderMix,
|
|
2070
|
+
crawler: {
|
|
2071
|
+
perQuery: global.__crawler?.__tune_perQuery ?? 8,
|
|
2072
|
+
maxCrawl: global.__crawler?.__tune_maxCrawl ?? 12
|
|
2073
|
+
}
|
|
2074
|
+
};
|
|
942
2075
|
}
|
|
943
2076
|
// 添加到Runtime类内部
|
|
944
2077
|
filterStopWords(words) {
|
|
945
2078
|
return words.filter(word => !STOP_WORDS.includes(word.toLowerCase()));
|
|
946
2079
|
}
|
|
947
|
-
|
|
2080
|
+
async ingestTextDocument(raw, { addNewWords = true, minLen = 8 } = {}) {
|
|
2081
|
+
if (!raw) return 0;
|
|
2082
|
+
// 去除 meta,正文在空行后
|
|
2083
|
+
const parts = String(raw).split(/\r?\n\r?\n/);
|
|
2084
|
+
const body = parts.length > 1 ? parts.slice(1).join('\n') : parts[0];
|
|
2085
|
+
const sentences = body.split(/\r?\n+/).map(s => s.trim()).filter(Boolean);
|
|
2086
|
+
let fed = 0;
|
|
2087
|
+
for (const line of sentences) {
|
|
2088
|
+
// 分词 -> 归一化 -> 停用词过滤 -> processInput
|
|
2089
|
+
const words = line
|
|
2090
|
+
.toLowerCase()
|
|
2091
|
+
.replace(/[^a-z\s\u4e00-\u9fa5]/g, ' ')
|
|
2092
|
+
.split(/\s+/)
|
|
2093
|
+
.filter(w => w.length >= 2);
|
|
2094
|
+
if (!words.length) continue;
|
|
2095
|
+
const normalized = this.spider ? this.spider.lemmatizeWords(words) : words;
|
|
2096
|
+
const filtered = this.filterStopWords ? this.filterStopWords(normalized) : normalized;
|
|
2097
|
+
if (filtered.length < minLen) continue;
|
|
2098
|
+
this.processInput(filtered, { addNewWords });
|
|
2099
|
+
fed++;
|
|
2100
|
+
}
|
|
2101
|
+
return fed;
|
|
2102
|
+
}
|
|
948
2103
|
// 新增资源监控方法
|
|
949
2104
|
monitorSystemLoad() {
|
|
950
2105
|
const now = Date.now();
|
|
@@ -979,12 +2134,12 @@ class Runtime {
|
|
|
979
2134
|
return this.systemLoad.batchSizeMultiplier;
|
|
980
2135
|
}
|
|
981
2136
|
// 清理定时器
|
|
2137
|
+
// 清理定时器/刷盘
|
|
982
2138
|
cleanup() {
|
|
983
|
-
if (this.forgetTimer)
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
this.memeBarrier.stop();
|
|
2139
|
+
if (this.forgetTimer) clearInterval(this.forgetTimer);
|
|
2140
|
+
if (this.memeBarrier) this.memeBarrier.stop();
|
|
2141
|
+
if (this.graph && this.graph.flushAll) {
|
|
2142
|
+
this.graph.flushAll().catch(() => { });
|
|
988
2143
|
}
|
|
989
2144
|
}
|
|
990
2145
|
// Runtime类中添加监控函数
|
|
@@ -993,7 +2148,7 @@ class Runtime {
|
|
|
993
2148
|
for (const meme of memes) {
|
|
994
2149
|
const words = this.kvm.get(meme.pointID) || [];
|
|
995
2150
|
if (words.length > this.MAX_MEME_WORDS * 0.8) { // 如果接近最大限制
|
|
996
|
-
|
|
2151
|
+
// console.log(`[MONITOR] 检测到大模因: ${meme.pointID}, 词数: ${words.length}`);
|
|
997
2152
|
this.splitMemeIfNeeded(meme.pointID); // 尝试分裂
|
|
998
2153
|
}
|
|
999
2154
|
}
|
|
@@ -1062,14 +2217,14 @@ class Runtime {
|
|
|
1062
2217
|
// 不自动销毁
|
|
1063
2218
|
}
|
|
1064
2219
|
|
|
1065
|
-
dispose() {
|
|
2220
|
+
dispose() {
|
|
1066
2221
|
this.graph.points.clear();
|
|
1067
2222
|
this.wordGraph.points.clear();
|
|
1068
2223
|
this.kvm.memory.clear();
|
|
1069
2224
|
if (this.wordAccessLog) this.wordAccessLog.clear();
|
|
1070
2225
|
if (this.forgetTimer) clearInterval(this.forgetTimer);
|
|
1071
2226
|
}
|
|
1072
|
-
|
|
2227
|
+
// 将遗忘策略改为“最近N个会话窗口”
|
|
1073
2228
|
forgetWords() {
|
|
1074
2229
|
// 保护:收集所有被KVM引用的词
|
|
1075
2230
|
const protectedWords = new Set();
|
|
@@ -1203,19 +2358,15 @@ dispose() {
|
|
|
1203
2358
|
visitCount++;
|
|
1204
2359
|
activatedOrder.push(id);
|
|
1205
2360
|
|
|
1206
|
-
// 仅在是“词”时记录访问,避免把模因ID写入词访问日志
|
|
1207
2361
|
if (this.wordGraph.points.has(id)) {
|
|
1208
2362
|
this.logWordAccess(id);
|
|
1209
2363
|
}
|
|
1210
2364
|
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1214
|
-
|
|
1215
|
-
|
|
1216
|
-
if (!visited.has(neighborID)) {
|
|
1217
|
-
next.push({ id: neighborID, value: value - decayK * weight });
|
|
1218
|
-
}
|
|
2365
|
+
// 改为通过 graph.getNeighbors 访问(窗口内)
|
|
2366
|
+
const neighbors = this.graph.getNeighbors(id, 50);
|
|
2367
|
+
for (const [weight, neighborID] of neighbors) {
|
|
2368
|
+
if (!visited.has(neighborID)) {
|
|
2369
|
+
next.push({ id: neighborID, value: value - decayK * weight });
|
|
1219
2370
|
}
|
|
1220
2371
|
}
|
|
1221
2372
|
}
|
|
@@ -1270,7 +2421,11 @@ dispose() {
|
|
|
1270
2421
|
processInput(wordsArr, { addNewWords = true } = {}) {
|
|
1271
2422
|
wordsArr = this.filterStopWords(wordsArr);
|
|
1272
2423
|
if (wordsArr.length === 0) { console.log('[FILTER] 输入全为停用词,已全部过滤'); return; }
|
|
1273
|
-
|
|
2424
|
+
// console.log('Processing input:', wordsArr);
|
|
2425
|
+
// 异步触发在线检索(不阻塞)
|
|
2426
|
+
if (triggerResearch && this.researcher) {
|
|
2427
|
+
try { this.researcher.scheduleFromWords(wordsArr); } catch (_) { }
|
|
2428
|
+
}
|
|
1274
2429
|
// 批量处理新词添加
|
|
1275
2430
|
if (addNewWords) {
|
|
1276
2431
|
// 一次性检查哪些词不在词表中
|
|
@@ -1356,7 +2511,7 @@ dispose() {
|
|
|
1356
2511
|
const overlap = wordsArr.filter(w => memeWords.includes(w)).length;
|
|
1357
2512
|
if (overlap >= this.MIN_OVERLAP && memeWords.length + wordsArr.length <= this.MAX_MEME_WORDS) {
|
|
1358
2513
|
this.kvm.set(minMemeID, Array.from(new Set([...memeWords, ...wordsArr])));
|
|
1359
|
-
|
|
2514
|
+
/// console.log(`Merged to existing meme: ${minMemeID}`);
|
|
1360
2515
|
} else {
|
|
1361
2516
|
// 创建新模因,使用有向连接
|
|
1362
2517
|
const newID = 'meme_' + Date.now();
|
|
@@ -1366,9 +2521,9 @@ dispose() {
|
|
|
1366
2521
|
// 单向连接到最近的模因 (方向:2表示指向对方)
|
|
1367
2522
|
if (minMemeID) {
|
|
1368
2523
|
this.graph.addDirectionalEdge(newID, minMemeID, minDistance, 2);
|
|
1369
|
-
|
|
2524
|
+
// console.log(`[LINK] 新模因 ${newID} 单向连接到最近模因 ${minMemeID}`);
|
|
1370
2525
|
}
|
|
1371
|
-
|
|
2526
|
+
// console.log(`Created new meme: ${newID}`);
|
|
1372
2527
|
}
|
|
1373
2528
|
} else {
|
|
1374
2529
|
// 创建新模因
|
|
@@ -1379,9 +2534,9 @@ dispose() {
|
|
|
1379
2534
|
// 如果有较近的模因,仍然创建单向连接
|
|
1380
2535
|
if (minMemeID) {
|
|
1381
2536
|
this.graph.addDirectionalEdge(newID, minMemeID, Math.min(minDistance, 5), 2);
|
|
1382
|
-
|
|
2537
|
+
// console.log(`[LINK] 新模因 ${newID} 单向连接到最近模因 ${minMemeID}`);
|
|
1383
2538
|
}
|
|
1384
|
-
|
|
2539
|
+
// console.log(`Created new meme: ${newID}`);
|
|
1385
2540
|
}
|
|
1386
2541
|
}
|
|
1387
2542
|
// 新增批量添加边的辅助方法
|
|
@@ -1665,7 +2820,7 @@ dispose() {
|
|
|
1665
2820
|
this.kvm.memory.delete(memeB.pointID);
|
|
1666
2821
|
memesToDelete.add(memeB.pointID);
|
|
1667
2822
|
|
|
1668
|
-
|
|
2823
|
+
// console.log(`Merged memes: ${memeA.pointID} <- ${memeB.pointID}`);
|
|
1669
2824
|
// 合并后立即尝试分裂
|
|
1670
2825
|
this.splitMemeIfNeeded(memeA.pointID);
|
|
1671
2826
|
} else {
|
|
@@ -1682,7 +2837,7 @@ dispose() {
|
|
|
1682
2837
|
// 如果没有双向边,则添加双向边
|
|
1683
2838
|
if (!(existAtoB.exist && existAtoB.type === 0) && !(existBtoA.exist && existBtoA.type === 0)) {
|
|
1684
2839
|
this.graph.addBidirectionalEdge(memeA.pointID, memeB.pointID, avgDist);
|
|
1685
|
-
|
|
2840
|
+
// console.log(`[LINK] 添加双向边: ${memeA.pointID} <-> ${memeB.pointID} (avgDist=${avgDist})`);
|
|
1686
2841
|
}
|
|
1687
2842
|
}
|
|
1688
2843
|
}
|
|
@@ -1713,14 +2868,14 @@ dispose() {
|
|
|
1713
2868
|
const newID = newIDs[i];
|
|
1714
2869
|
this.graph.addPoint(newID, []);
|
|
1715
2870
|
this.kvm.set(newID, chunk);
|
|
1716
|
-
|
|
2871
|
+
// console.log(`[SPLIT-FORCE] 新建模因: ${newID} 词数: ${chunk.length}`);
|
|
1717
2872
|
}
|
|
1718
2873
|
}
|
|
1719
2874
|
|
|
1720
2875
|
// 删除原模因
|
|
1721
2876
|
this.graph.points.delete(memeID);
|
|
1722
2877
|
this.kvm.memory.delete(memeID);
|
|
1723
|
-
|
|
2878
|
+
// console.log(`[SPLIT-FORCE] 删除原模因: ${memeID}`);
|
|
1724
2879
|
return;
|
|
1725
2880
|
}
|
|
1726
2881
|
|
|
@@ -1768,12 +2923,12 @@ dispose() {
|
|
|
1768
2923
|
const newID = 'meme_' + Date.now() + '_' + Math.floor(Math.random() * 10000);
|
|
1769
2924
|
this.graph.addPoint(newID, []);
|
|
1770
2925
|
this.kvm.set(newID, comp);
|
|
1771
|
-
|
|
2926
|
+
// console.log(`[SPLIT] 新建模因: ${newID} 词数: ${comp.length}`);
|
|
1772
2927
|
}
|
|
1773
2928
|
// 删除原节点
|
|
1774
2929
|
this.graph.points.delete(memeID);
|
|
1775
2930
|
this.kvm.memory.delete(memeID);
|
|
1776
|
-
|
|
2931
|
+
// console.log(`[SPLIT] 删除原模因: ${memeID}`);
|
|
1777
2932
|
}
|
|
1778
2933
|
}
|
|
1779
2934
|
}
|
|
@@ -1793,6 +2948,7 @@ class AssociationLayer {
|
|
|
1793
2948
|
constructor(runtime) {
|
|
1794
2949
|
this.runtime = runtime;
|
|
1795
2950
|
this.patterns = []; // 存储发现的模因边关系模式
|
|
2951
|
+
this._prevClone = null; // 差量克隆基线
|
|
1796
2952
|
}
|
|
1797
2953
|
|
|
1798
2954
|
// 识别模因层的边关系模式
|
|
@@ -1849,83 +3005,33 @@ class AssociationLayer {
|
|
|
1849
3005
|
// ...前面的代码...
|
|
1850
3006
|
|
|
1851
3007
|
// 创建系统副本
|
|
3008
|
+
// 创建系统副本(差量克隆)
|
|
1852
3009
|
async cloneSystem() {
|
|
1853
|
-
console.log('[CLONE]
|
|
1854
|
-
const clone = new Runtime();
|
|
1855
|
-
clone.registerClone();
|
|
1856
|
-
|
|
3010
|
+
console.log('[CLONE] 差量克隆系统(递归分区哈希)');
|
|
1857
3011
|
try {
|
|
1858
|
-
|
|
1859
|
-
clone.
|
|
1860
|
-
|
|
1861
|
-
|
|
1862
|
-
|
|
1863
|
-
|
|
1864
|
-
|
|
1865
|
-
|
|
1866
|
-
|
|
3012
|
+
const clone = await deltaCloneRuntime(this._prevClone, this.runtime);
|
|
3013
|
+
clone.registerClone();
|
|
3014
|
+
this._prevClone = clone;
|
|
3015
|
+
return clone;
|
|
3016
|
+
} catch (err) {
|
|
3017
|
+
console.warn('[CLONE] 差量克隆失败,回退全量:', err.message);
|
|
3018
|
+
const fallback = new Runtime();
|
|
3019
|
+
fallback.registerClone();
|
|
3020
|
+
fallback.spider = this.runtime.spider;
|
|
3021
|
+
fallback.vocabManager.vocab = [...this.runtime.vocabManager.vocab];
|
|
3022
|
+
fallback.vocabManager.updateMappings();
|
|
1867
3023
|
for (const [key, value] of this.runtime.wordGraph.points.entries()) {
|
|
1868
|
-
|
|
1869
|
-
value.connect.map(conn => [...conn]) : []);
|
|
3024
|
+
fallback.wordGraph.addPoint(key, Array.isArray(value.connect) ? value.connect.map(conn => [...conn]) : []);
|
|
1870
3025
|
}
|
|
1871
|
-
|
|
1872
|
-
// 4. 复制模因网络
|
|
1873
|
-
console.log('[CLONE] 开始复制模因网络...');
|
|
1874
3026
|
for (const [key, value] of this.runtime.graph.points.entries()) {
|
|
1875
|
-
|
|
1876
|
-
value.connect.map(conn => [...conn]) : []);
|
|
3027
|
+
fallback.graph.addPoint(key, Array.isArray(value.connect) ? value.connect.map(conn => [...conn]) : []);
|
|
1877
3028
|
}
|
|
1878
|
-
|
|
1879
|
-
// 5. 复制KVM - 确保一致性和类型
|
|
1880
|
-
console.log('[CLONE] 开始复制KVM...');
|
|
1881
|
-
let nonEmptyCount = 0;
|
|
1882
|
-
let totalWordCount = 0;
|
|
1883
|
-
|
|
1884
3029
|
for (const [key, value] of this.runtime.kvm.memory.entries()) {
|
|
1885
|
-
|
|
1886
|
-
if (Array.isArray(value)) {
|
|
1887
|
-
// 确保数组中每个元素都是字符串且归一化
|
|
1888
|
-
const normalizedWords = value.map(word =>
|
|
1889
|
-
typeof word === 'string' ? word.toLowerCase().trim() : String(word)
|
|
1890
|
-
);
|
|
1891
|
-
|
|
1892
|
-
// 应用词形归一化
|
|
1893
|
-
const lemmatizedWords = clone.spider.lemmatizeWords(normalizedWords);
|
|
1894
|
-
clone.kvm.set(key, lemmatizedWords);
|
|
1895
|
-
|
|
1896
|
-
if (lemmatizedWords.length > 0) {
|
|
1897
|
-
nonEmptyCount++;
|
|
1898
|
-
totalWordCount += lemmatizedWords.length;
|
|
1899
|
-
}
|
|
1900
|
-
} else if (value != null) {
|
|
1901
|
-
// 非数组值转换为单元素数组
|
|
1902
|
-
const singleWord = String(value).toLowerCase().trim();
|
|
1903
|
-
const lemmatizedWord = clone.spider.lemmatize(singleWord);
|
|
1904
|
-
clone.kvm.set(key, [lemmatizedWord]);
|
|
1905
|
-
|
|
1906
|
-
nonEmptyCount++;
|
|
1907
|
-
totalWordCount++;
|
|
1908
|
-
} else {
|
|
1909
|
-
// null或undefined情况,设为空数组
|
|
1910
|
-
clone.kvm.set(key, []);
|
|
1911
|
-
}
|
|
3030
|
+
fallback.kvm.set(key, Array.isArray(value) ? [...value] : (value == null ? [] : [String(value)]));
|
|
1912
3031
|
}
|
|
1913
|
-
|
|
1914
|
-
console.log(`[CLONE] KVM复制完成: ${nonEmptyCount}个非空模因,${totalWordCount}个词语`);
|
|
1915
|
-
|
|
1916
|
-
// 6. 复制其他配置和参数
|
|
1917
|
-
console.log('[CLONE] 开始复制词表和其他属性...');
|
|
1918
|
-
clone.MAX_MEME_WORDS = this.runtime.MAX_MEME_WORDS;
|
|
1919
|
-
clone.MIN_OVERLAP = this.runtime.MIN_OVERLAP;
|
|
1920
|
-
clone.config = { ...this.runtime.config };
|
|
1921
|
-
|
|
1922
|
-
console.log('[CLONE] 系统副本创建完成');
|
|
1923
|
-
return clone;
|
|
1924
|
-
} catch (error) {
|
|
1925
|
-
console.error('[CLONE ERROR]', error);
|
|
1926
|
-
return clone;
|
|
3032
|
+
return fallback;
|
|
1927
3033
|
}
|
|
1928
|
-
}
|
|
3034
|
+
}
|
|
1929
3035
|
|
|
1930
3036
|
applyPatternsToClone(systemClone) {
|
|
1931
3037
|
console.log('[CLONE] 应用关系模式到副本');
|
|
@@ -1982,7 +3088,7 @@ class AssociationLayer {
|
|
|
1982
3088
|
meme.connect[connIdx][0] = newWeight;
|
|
1983
3089
|
meme.connect[connIdx][2] = direction;
|
|
1984
3090
|
|
|
1985
|
-
|
|
3091
|
+
// console.log(`[CLONE] 修改边权重: ${meme.pointID}->${meme.connect[connIdx][1]}, ${oldWeight}->${newWeight.toFixed(2)}, 保留方向: ${direction}`);
|
|
1986
3092
|
modified++;
|
|
1987
3093
|
}
|
|
1988
3094
|
}
|
|
@@ -2697,7 +3803,7 @@ class controller {
|
|
|
2697
3803
|
const sid = this.runtime.session.ensureActive();
|
|
2698
3804
|
this.runtime.session.incMessage(sid);
|
|
2699
3805
|
const words = text.toLowerCase().split(' ').filter(w => w.length > 0);
|
|
2700
|
-
this.runtime.processInput(words, { addNewWords: false });
|
|
3806
|
+
this.runtime.processInput(words, { addNewWords: false, triggerResearch: true });
|
|
2701
3807
|
return await this.runtime.generateResponseWithMemes(words);
|
|
2702
3808
|
}
|
|
2703
3809
|
// 启动自主学习
|
|
@@ -2799,6 +3905,27 @@ setInterval(() => {
|
|
|
2799
3905
|
saveQueued = false;
|
|
2800
3906
|
}
|
|
2801
3907
|
}, 10000); // 每10秒最多写盘一次
|
|
3908
|
+
setInterval(async () => {
|
|
3909
|
+
try {
|
|
3910
|
+
if (!global.__crawler || !global.ctrlA) return;
|
|
3911
|
+
const mix = global.ctrlA.runtime.config.spiderMix || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
3912
|
+
// 以 1 - onlineWeight 的概率跳过本轮(控制强度)
|
|
3913
|
+
if (Math.random() > (mix.onlineWeight || 0.5)) return;
|
|
3914
|
+
|
|
3915
|
+
const docs = global.__crawler.loadRecentDocs(12);
|
|
3916
|
+
if (!docs.length) return;
|
|
3917
|
+
let fed = 0;
|
|
3918
|
+
for (const d of docs) {
|
|
3919
|
+
fed += await global.ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen: 6 });
|
|
3920
|
+
}
|
|
3921
|
+
if (fed > 0) {
|
|
3922
|
+
console.log(`[INGEST] 在线文档本轮投喂 ${fed} 段 (mix=${mix.onlineWeight.toFixed(2)})`);
|
|
3923
|
+
global.ctrlA.runtime.updateAttentionLinks();
|
|
3924
|
+
}
|
|
3925
|
+
} catch (e) {
|
|
3926
|
+
console.warn('[INGEST] 失败:', e.message);
|
|
3927
|
+
}
|
|
3928
|
+
}, 20_000);
|
|
2802
3929
|
// 从硬盘恢复
|
|
2803
3930
|
// 从硬盘恢复
|
|
2804
3931
|
function loadAll(runtime) {
|
|
@@ -2845,138 +3972,16 @@ function loadAll(runtime) {
|
|
|
2845
3972
|
|
|
2846
3973
|
|
|
2847
3974
|
function scheduleCrossLearning() {
|
|
2848
|
-
const
|
|
2849
|
-
|
|
2850
|
-
|
|
2851
|
-
|
|
2852
|
-
|
|
2853
|
-
|
|
2854
|
-
|
|
2855
|
-
global.ctrlA.startSelfLearning(3).then(() => {
|
|
2856
|
-
setTimeout(async () => {
|
|
2857
|
-
console.log('[CROSS] A将成果传递给B');
|
|
2858
|
-
const associationLayer = new AssociationLayer(global.ctrlA.runtime);
|
|
2859
|
-
|
|
2860
|
-
// 归一化一致性测试
|
|
2861
|
-
const testWord = "testing";
|
|
2862
|
-
const normalizedWord = global.ctrlA.runtime.spider.lemmatize(testWord);
|
|
2863
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2864
|
-
//const associationLayer = new AssociationLayer(global.ctrlA.runtime);
|
|
2865
|
-
// 这里要加上
|
|
2866
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2867
|
-
// 克隆
|
|
2868
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2869
|
-
systemClone.spider = global.ctrlA.runtime.spider;
|
|
2870
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2871
|
-
|
|
2872
|
-
verifySystemConsistency(global.ctrlA.runtime, systemClone);
|
|
2873
|
-
|
|
2874
|
-
// 预热克隆系统
|
|
2875
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2876
|
-
const sampleWords = Array.from(
|
|
2877
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2878
|
-
).filter(w => w.length > 1);
|
|
2879
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2880
|
-
|
|
2881
|
-
await systemClone.startSelfLearning(3);
|
|
2882
|
-
global.ctrlB.updateRuntime(systemClone);
|
|
2883
|
-
|
|
2884
|
-
verifySystemConsistency(global.ctrlA.runtime, global.ctrlB.runtime);
|
|
2885
|
-
|
|
2886
|
-
console.log('[CROSS] 已将A的系统更新到B');
|
|
2887
|
-
testCloneMatching(global.ctrlB.runtime);
|
|
2888
|
-
}, learnTime);
|
|
2889
|
-
});
|
|
2890
|
-
}, cycle);
|
|
2891
|
-
|
|
2892
|
-
// B -> C
|
|
2893
|
-
registerInterval(() => {
|
|
2894
|
-
if (global.ctrlA.isLearning || global.ctrlB.isLearning || global.ctrlC.isLearning || isShuttingDown) return;
|
|
2895
|
-
global.ctrlB.startSelfLearning(3).then(() => {
|
|
2896
|
-
setTimeout(async () => {
|
|
2897
|
-
console.log('[CROSS] B将成果传递给C');
|
|
2898
|
-
const associationLayer = new AssociationLayer(global.ctrlB.runtime);
|
|
2899
|
-
|
|
2900
|
-
const testWord = "testing";
|
|
2901
|
-
const normalizedWord = global.ctrlB.runtime.spider.lemmatize(testWord);
|
|
2902
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2903
|
-
// 这里要加上
|
|
2904
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2905
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2906
|
-
systemClone.spider = global.ctrlB.runtime.spider;
|
|
2907
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2908
|
-
|
|
2909
|
-
verifySystemConsistency(global.ctrlB.runtime, systemClone);
|
|
2910
|
-
|
|
2911
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2912
|
-
const sampleWords = Array.from(
|
|
2913
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2914
|
-
).filter(w => w.length > 1);
|
|
2915
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2916
|
-
|
|
2917
|
-
await systemClone.startSelfLearning(3);
|
|
2918
|
-
global.ctrlC.updateRuntime(systemClone);
|
|
2919
|
-
|
|
2920
|
-
verifySystemConsistency(global.ctrlB.runtime, global.ctrlC.runtime);
|
|
2921
|
-
|
|
2922
|
-
console.log('[CROSS] 已将B的系统更新到C');
|
|
2923
|
-
testCloneMatching(global.ctrlC.runtime);
|
|
2924
|
-
}, learnTime);
|
|
2925
|
-
});
|
|
2926
|
-
}, cycle);
|
|
2927
|
-
|
|
2928
|
-
// C -> A
|
|
2929
|
-
registerInterval(() => {
|
|
2930
|
-
if (global.ctrlA.isLearning || global.ctrlB.isLearning || global.ctrlC.isLearning || isShuttingDown) return;
|
|
2931
|
-
global.ctrlC.startSelfLearning(3).then(() => {
|
|
2932
|
-
setTimeout(async () => {
|
|
2933
|
-
console.log('[CROSS] C将成果传递给A');
|
|
2934
|
-
const associationLayer = new AssociationLayer(global.ctrlC.runtime);
|
|
2935
|
-
|
|
2936
|
-
const testWord = "testing";
|
|
2937
|
-
const normalizedWord = global.ctrlC.runtime.spider.lemmatize(testWord);
|
|
2938
|
-
console.log(`[CROSS-CHECK] 词归一化测试: "${testWord}" -> "${normalizedWord}"`);
|
|
2939
|
-
// 这里要加上
|
|
2940
|
-
associationLayer.patterns = associationLayer.identifyPatterns();
|
|
2941
|
-
const systemClone = await associationLayer.cloneSystem();
|
|
2942
|
-
systemClone.spider = global.ctrlC.runtime.spider;
|
|
2943
|
-
associationLayer.applyPatternsToClone(systemClone);
|
|
2944
|
-
|
|
2945
|
-
verifySystemConsistency(global.ctrlC.runtime, systemClone);
|
|
2946
|
-
|
|
2947
|
-
console.log('[CROSS] 预热克隆系统...');
|
|
2948
|
-
const sampleWords = Array.from(
|
|
2949
|
-
new Set([...systemClone.vocabManager.vocab].slice(4, 20))
|
|
2950
|
-
).filter(w => w.length > 1);
|
|
2951
|
-
await systemClone.processInput(sampleWords, { addNewWords: false });
|
|
2952
|
-
|
|
2953
|
-
await systemClone.startSelfLearning(3);
|
|
2954
|
-
global.ctrlA.updateRuntime(systemClone);
|
|
2955
|
-
|
|
2956
|
-
verifySystemConsistency(global.ctrlC.runtime, global.ctrlA.runtime);
|
|
2957
|
-
|
|
2958
|
-
console.log('[CROSS] 已将C的系统更新到A');
|
|
2959
|
-
testCloneMatching(global.ctrlA.runtime);
|
|
2960
|
-
}, learnTime);
|
|
2961
|
-
});
|
|
2962
|
-
console.log('Publishing runtime state to Redis...');
|
|
2963
|
-
if (!RuntimeMessage) return; // protobuf未加载完成
|
|
2964
|
-
if (!redisClient || !redisClient.isOpen) {
|
|
2965
|
-
console.warn('[REDIS] 客户端未连接,跳过发布');
|
|
2966
|
-
return;
|
|
2967
|
-
}
|
|
2968
|
-
if (!RuntimeMessage) return;
|
|
2969
|
-
if (global.ctrlA.runtime.isLearning || global.ctrlA.runtime.isMainLoopRunning) return;
|
|
2970
|
-
const plainObj = runtimeToPlain(global.ctrlA.runtime);
|
|
2971
|
-
const errMsg = RuntimeMessage.verify(plainObj);
|
|
2972
|
-
if (errMsg) throw Error(errMsg);
|
|
2973
|
-
const message = RuntimeMessage.create(plainObj);
|
|
2974
|
-
const buffer = RuntimeMessage.encode(message).finish();
|
|
2975
|
-
redisClient.publish(`AI-model-${__dirname}`, buffer);
|
|
2976
|
-
console.log('已发布运行时状态到Redis');
|
|
2977
|
-
}, cycle);
|
|
3975
|
+
const rot = new RotationManager(global.ctrlA, global.ctrlB, global.ctrlC, {
|
|
3976
|
+
cycleMs: 15 * 60 * 1000, // 可根据需要调整
|
|
3977
|
+
cooldownMs: 60 * 1000,
|
|
3978
|
+
learnIters: 3,
|
|
3979
|
+
minImprove: 0.005
|
|
3980
|
+
});
|
|
3981
|
+
rot.start();
|
|
2978
3982
|
}
|
|
2979
3983
|
|
|
3984
|
+
|
|
2980
3985
|
// 新增:测试克隆系统的词汇匹配能力
|
|
2981
3986
|
function testCloneMatching(runtime) {
|
|
2982
3987
|
// 从词表中随机选取10个词
|
|
@@ -3115,7 +4120,7 @@ function optimizeMemory() {
|
|
|
3115
4120
|
}
|
|
3116
4121
|
}
|
|
3117
4122
|
async function main() {
|
|
3118
|
-
|
|
4123
|
+
console.log('Starting AI system...');
|
|
3119
4124
|
redisClient = redis.createClient();
|
|
3120
4125
|
// 创建三个全局控制器副本
|
|
3121
4126
|
const ctrlA = new controller();
|
|
@@ -3130,11 +4135,38 @@ async function main() {
|
|
|
3130
4135
|
loadAll(ctrlA.runtime);
|
|
3131
4136
|
loadAll(ctrlB.runtime);
|
|
3132
4137
|
loadAll(ctrlC.runtime);
|
|
3133
|
-
|
|
4138
|
+
const crawler = new CrawlerManager({
|
|
4139
|
+
concurrency: 5,
|
|
4140
|
+
perHostDelayMs: 2000,
|
|
4141
|
+
requestTimeoutMs: 12000,
|
|
4142
|
+
allowLang: ['en', 'zh'],
|
|
4143
|
+
seedsFile: path.join(__dirname, 'robots', 'seeds.txt'),
|
|
4144
|
+
proxiesFile: path.join(__dirname, 'crawler', 'proxies.txt')
|
|
4145
|
+
});
|
|
4146
|
+
global.__crawler = crawler;
|
|
4147
|
+
crawler.start();
|
|
4148
|
+
setInterval(async () => {
|
|
4149
|
+
try {
|
|
4150
|
+
const docs = crawler.loadRecentDocs(12);
|
|
4151
|
+
if (!docs.length) return;
|
|
4152
|
+
let fed = 0;
|
|
4153
|
+
for (const d of docs) {
|
|
4154
|
+
fed += await ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen: 6 });
|
|
4155
|
+
}
|
|
4156
|
+
if (fed > 0) {
|
|
4157
|
+
console.log(`[INGEST] 在线文档本轮投喂 ${fed} 段`);
|
|
4158
|
+
// 适当刷新注意力连接
|
|
4159
|
+
ctrlA.runtime.updateAttentionLinks();
|
|
4160
|
+
}
|
|
4161
|
+
} catch (e) {
|
|
4162
|
+
console.warn('[INGEST] 失败:', e.message);
|
|
4163
|
+
}
|
|
4164
|
+
}, 20_000); // 每20秒消费一批
|
|
3134
4165
|
// 用A副本初始化语料和模因
|
|
3135
4166
|
console.time('articleProcessing');
|
|
3136
4167
|
const articles = ctrlA.runtime.buildVocabFromSpider();
|
|
3137
4168
|
console.log(`Spider: 加载文章数: ${articles.length}`);
|
|
4169
|
+
// 周期性消费在线文档 -> 投喂到 ctrlA.runtime
|
|
3138
4170
|
|
|
3139
4171
|
// 修复:在首次使用前定义 lemmaCsvPath
|
|
3140
4172
|
const BATCH_SIZE = 20;
|
|
@@ -3187,30 +4219,177 @@ async function main() {
|
|
|
3187
4219
|
//每12分钟尝试启动memebarrier
|
|
3188
4220
|
ctrlA.runtime.memeBarrier.start();
|
|
3189
4221
|
}, 1000 * 60 * 12);
|
|
3190
|
-
// API路由 - 只做学习,不返回结果
|
|
3191
|
-
app.post('/api/chat', async (req, res) => {
|
|
3192
|
-
try {
|
|
3193
|
-
const { message, sessionId } = req.body || {};
|
|
3194
|
-
// 支持从Header透传会话
|
|
3195
|
-
const headerSid = req.headers['x-session-id'];
|
|
3196
|
-
const sid = sessionId || headerSid || global.ctrlA.runtime.session.ensureActive();
|
|
3197
4222
|
|
|
3198
|
-
// 使用/续接会话并计数
|
|
3199
|
-
global.ctrlA.runtime.session.useSession(sid);
|
|
3200
|
-
global.ctrlA.runtime.session.incMessage(sid);
|
|
3201
4223
|
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
4224
|
+
// 可选:启动(默认不开启,避免未配置API Key)
|
|
4225
|
+
if (String(process.env.ADV_AUTOSTART || '').toLowerCase() === 'true') {
|
|
4226
|
+
adv.start();
|
|
4227
|
+
}
|
|
4228
|
+
// 新增:serve 侧参数调优 API(默认不启用自动调参,仅手动设置)
|
|
4229
|
+
app.get('/api/tune/get', (req, res) => {
|
|
4230
|
+
try {
|
|
4231
|
+
const rt = global.ctrlA?.runtime;
|
|
4232
|
+
if (!rt) return res.status(500).json({ ok: false, error: 'runtime missing' });
|
|
4233
|
+
res.json({
|
|
4234
|
+
ok: true,
|
|
4235
|
+
params: {
|
|
4236
|
+
decayK: rt.config?.decayK ?? 1,
|
|
4237
|
+
maxLen: rt.config?.maxLen ?? 16,
|
|
4238
|
+
spiderMix: rt.config?.spiderMix ?? { onlineWeight: 0.5, offlineWeight: 0.5 },
|
|
4239
|
+
crawler: {
|
|
4240
|
+
perQuery: global.__crawler?.__tune_perQuery ?? 8,
|
|
4241
|
+
maxCrawl: global.__crawler?.__tune_maxCrawl ?? 12
|
|
4242
|
+
}
|
|
4243
|
+
}
|
|
4244
|
+
});
|
|
4245
|
+
} catch (e) {
|
|
4246
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4247
|
+
}
|
|
4248
|
+
});
|
|
4249
|
+
|
|
4250
|
+
app.post('/api/tune/set', (req, res) => {
|
|
4251
|
+
try {
|
|
4252
|
+
const rt = global.ctrlA?.runtime;
|
|
4253
|
+
if (!rt) return res.status(500).json({ ok: false, error: 'runtime missing' });
|
|
4254
|
+
const snap = applyServeTunableParams(rt, req.body || {});
|
|
4255
|
+
res.json({ ok: true, snapshot: snap });
|
|
4256
|
+
} catch (e) {
|
|
4257
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4258
|
+
}
|
|
4259
|
+
});
|
|
4260
|
+
// API
|
|
4261
|
+
// 初始化对抗学习调度器时传递 promptMode/targetWeights 可选参数
|
|
4262
|
+
// ...existing code...
|
|
4263
|
+
const adv = new AdversaryScheduler(ctrlA.runtime, {
|
|
4264
|
+
providerSpec: process.env.ADV_MODEL || 'ollama:llama3.1:405b',
|
|
4265
|
+
judgeMode: process.env.ADV_JUDGE || 'llm',
|
|
4266
|
+
intervalMs: Number(process.env.ADV_INTERVAL || 60_000),
|
|
4267
|
+
batchSize: Number(process.env.ADV_BATCH || 3),
|
|
4268
|
+
promptMode: process.env.ADV_PROMPT_MODE || 'mixed',
|
|
4269
|
+
targetWeights: {
|
|
4270
|
+
decayK: Number(process.env.TUNE_W_DECAYK || 1.0),
|
|
4271
|
+
maxLen: Number(process.env.TUNE_W_MAXLEN || 0.7),
|
|
4272
|
+
onlineWeight: Number(process.env.TUNE_W_ONLINE || 0.8),
|
|
4273
|
+
edgeWeight: Number(process.env.TUNE_W_EDGE || 0.4),
|
|
4274
|
+
perQuery: Number(process.env.TUNE_W_PERQ || 0.5),
|
|
4275
|
+
maxCrawl: Number(process.env.TUNE_W_MAXC || 0.5),
|
|
4276
|
+
}
|
|
4277
|
+
});
|
|
4278
|
+
// ...existing code...
|
|
4279
|
+
global.__adversary = adv;
|
|
3205
4280
|
|
|
3206
|
-
|
|
3207
|
-
|
|
3208
|
-
|
|
3209
|
-
|
|
3210
|
-
|
|
4281
|
+
// 对抗学习控制 API 增补 promptMode / targets
|
|
4282
|
+
app.post('/api/adversary/start', (req, res) => {
|
|
4283
|
+
try {
|
|
4284
|
+
const { provider, judgeMode, intervalMs, batchSize, promptMode, targetWeights } = req.body || {};
|
|
4285
|
+
if (provider) {
|
|
4286
|
+
const neo = new AdversaryScheduler(global.ctrlA.runtime, {
|
|
4287
|
+
providerSpec: provider,
|
|
4288
|
+
judgeMode: judgeMode || adv.opts.judgeMode,
|
|
4289
|
+
intervalMs: Number(intervalMs || adv.opts.intervalMs),
|
|
4290
|
+
batchSize: Number(batchSize || adv.opts.batchSize),
|
|
4291
|
+
promptMode: promptMode || adv.opts.promptMode,
|
|
4292
|
+
targetWeights: targetWeights || adv.opts.targetWeights
|
|
4293
|
+
});
|
|
4294
|
+
global.__adversary?.stop?.();
|
|
4295
|
+
global.__adversary = neo;
|
|
4296
|
+
global.__adversary.start();
|
|
4297
|
+
} else {
|
|
4298
|
+
if (promptMode) adv.setPromptMode(promptMode);
|
|
4299
|
+
if (targetWeights) adv.setTargets(targetWeights);
|
|
4300
|
+
adv.start();
|
|
4301
|
+
}
|
|
4302
|
+
res.json({ ok: true, status: global.__adversary.getStatus() });
|
|
4303
|
+
} catch (e) {
|
|
4304
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
3211
4305
|
}
|
|
3212
4306
|
});
|
|
3213
4307
|
|
|
4308
|
+
app.post('/api/adversary/stop', (req, res) => {
|
|
4309
|
+
try { global.__adversary?.stop?.(); res.json({ ok: true }); }
|
|
4310
|
+
catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4311
|
+
});
|
|
4312
|
+
|
|
4313
|
+
app.get('/api/adversary/status', (req, res) => {
|
|
4314
|
+
try { res.json({ ok: true, status: global.__adversary?.getStatus?.() || { running: false } }); }
|
|
4315
|
+
catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4316
|
+
});
|
|
4317
|
+
|
|
4318
|
+
// 触发一次性对抗评估(可指定 prompts 数组)
|
|
4319
|
+
app.post('/api/adversary/once', async (req, res) => {
|
|
4320
|
+
try {
|
|
4321
|
+
const prompts = Array.isArray(req.body?.prompts) ? req.body.prompts.slice(0, 5) : null;
|
|
4322
|
+
const report = await global.__adversary.evaluateOnce(prompts);
|
|
4323
|
+
res.json({ ok: true, report });
|
|
4324
|
+
} catch (e) {
|
|
4325
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4326
|
+
}
|
|
4327
|
+
});
|
|
4328
|
+
app.get('/api/graph/partitions/status', async (req, res) => {
|
|
4329
|
+
try {
|
|
4330
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4331
|
+
if (!g || !(g instanceof PartitionedGraphDB)) {
|
|
4332
|
+
return res.json({ ok: true, mode: 'in-memory', loaded: 0 });
|
|
4333
|
+
}
|
|
4334
|
+
const loaded = Array.from(g.loaded.keys());
|
|
4335
|
+
res.json({
|
|
4336
|
+
ok: true,
|
|
4337
|
+
mode: 'partitioned',
|
|
4338
|
+
partitions: g.partitioner.partitions,
|
|
4339
|
+
loaded,
|
|
4340
|
+
maxLoaded: g.maxLoadedPartitions,
|
|
4341
|
+
windowRadius: g.windowRadius,
|
|
4342
|
+
centerPid: g.centerPid
|
|
4343
|
+
});
|
|
4344
|
+
} catch (e) {
|
|
4345
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4346
|
+
}
|
|
4347
|
+
});
|
|
4348
|
+
|
|
4349
|
+
app.post('/api/graph/partitions/flush', async (req, res) => {
|
|
4350
|
+
try {
|
|
4351
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4352
|
+
if (g && g.flushAll) await g.flushAll();
|
|
4353
|
+
res.json({ ok: true });
|
|
4354
|
+
} catch (e) {
|
|
4355
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4356
|
+
}
|
|
4357
|
+
});
|
|
4358
|
+
|
|
4359
|
+
app.post('/api/graph/prefetch', async (req, res) => {
|
|
4360
|
+
try {
|
|
4361
|
+
const { node } = req.body || {};
|
|
4362
|
+
const g = global.ctrlA?.runtime?.graph;
|
|
4363
|
+
if (!node || !(g instanceof PartitionedGraphDB)) return res.status(400).json({ ok: false, error: 'node 必填/或非分区图' });
|
|
4364
|
+
await g.focusOnPoint(String(node));
|
|
4365
|
+
res.json({ ok: true });
|
|
4366
|
+
} catch (e) {
|
|
4367
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4368
|
+
}
|
|
4369
|
+
});
|
|
4370
|
+
// API路由 - 只做学习,不返回结果
|
|
4371
|
+
app.post('/api/chat', async (req, res) => {
|
|
4372
|
+
try {
|
|
4373
|
+
const { message, sessionId } = req.body || {};
|
|
4374
|
+
const headerSid = req.headers['x-session-id'];
|
|
4375
|
+
const sid = sessionId || headerSid || global.ctrlA.runtime.session.ensureActive();
|
|
4376
|
+
|
|
4377
|
+
global.ctrlA.runtime.session.useSession(sid);
|
|
4378
|
+
global.ctrlA.runtime.session.incMessage(sid);
|
|
4379
|
+
|
|
4380
|
+
const words = String(message || '').toLowerCase().split(/\s+/).filter(w => w.length > 0);
|
|
4381
|
+
// 开启按需检索
|
|
4382
|
+
global.ctrlA.runtime.processInput(words, { triggerResearch: true });
|
|
4383
|
+
global.ctrlA.runtime.updateAttentionLinks();
|
|
4384
|
+
|
|
4385
|
+
res.set('X-Session-Id', sid);
|
|
4386
|
+
res.status(204).end();
|
|
4387
|
+
} catch (error) {
|
|
4388
|
+
res.status(500).json({ error: error.message });
|
|
4389
|
+
console.error('Error in /api/chat:', error);
|
|
4390
|
+
}
|
|
4391
|
+
});
|
|
4392
|
+
|
|
3214
4393
|
app.get('/api/status', (req, res) => {
|
|
3215
4394
|
res.json({ status: 'running', timestamp: new Date().toISOString() });
|
|
3216
4395
|
});
|
|
@@ -3265,7 +4444,20 @@ app.post('/api/chat', async (req, res) => {
|
|
|
3265
4444
|
res.status(500).json({ success: false, error: error.message });
|
|
3266
4445
|
}
|
|
3267
4446
|
});
|
|
3268
|
-
|
|
4447
|
+
app.post('/api/crawler/start', (req, res) => {
|
|
4448
|
+
try { global.__crawler?.start(); res.json({ ok: true }); } catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4449
|
+
});
|
|
4450
|
+
app.post('/api/crawler/stop', (req, res) => {
|
|
4451
|
+
try { global.__crawler?.stop(); res.json({ ok: true }); } catch (e) { res.status(500).json({ ok: false, error: e.message }); }
|
|
4452
|
+
});
|
|
4453
|
+
app.post('/api/crawler/seed', (req, res) => {
|
|
4454
|
+
const { urls } = req.body || {};
|
|
4455
|
+
const n = global.__crawler?.addSeeds(Array.isArray(urls) ? urls : []) || 0;
|
|
4456
|
+
res.json({ ok: true, added: n });
|
|
4457
|
+
});
|
|
4458
|
+
app.get('/api/crawler/status', (req, res) => {
|
|
4459
|
+
res.json({ ok: true, stats: global.__crawler?.stats() || {} });
|
|
4460
|
+
});
|
|
3269
4461
|
|
|
3270
4462
|
// 模型默认参数
|
|
3271
4463
|
const modelDefaults = {
|
|
@@ -3318,6 +4510,49 @@ app.post('/api/chat', async (req, res) => {
|
|
|
3318
4510
|
console.log(`\nAI system running on port ${global.config.masterPortOfMain}`);
|
|
3319
4511
|
console.log(`API available at http://localhost:${global.config.masterPortOfMain}/api/`);
|
|
3320
4512
|
});
|
|
4513
|
+
app.post('/api/search-crawl', async (req, res) => {
|
|
4514
|
+
try {
|
|
4515
|
+
const { prompt, vertical, perQuery, maxEnqueue, crawl, maxCrawl, ingest = true, minLen = 6 } = req.body || {};
|
|
4516
|
+
if (!prompt || String(prompt).trim().length < 2) {
|
|
4517
|
+
return res.status(400).json({ ok: false, error: 'prompt 不能为空' });
|
|
4518
|
+
}
|
|
4519
|
+
if (!global.__crawler) {
|
|
4520
|
+
return res.status(500).json({ ok: false, error: 'crawler 未初始化' });
|
|
4521
|
+
}
|
|
4522
|
+
const report = await global.__crawler.directedSearch(String(prompt), {
|
|
4523
|
+
vertical: vertical || 'general',
|
|
4524
|
+
perQuery: perQuery || 8,
|
|
4525
|
+
maxEnqueue: maxEnqueue || 30,
|
|
4526
|
+
crawl: crawl !== false,
|
|
4527
|
+
maxCrawl: maxCrawl || 12
|
|
4528
|
+
});
|
|
4529
|
+
|
|
4530
|
+
let fed = 0;
|
|
4531
|
+
if (ingest !== false) {
|
|
4532
|
+
const docs = global.__crawler.loadRecentDocs(24);
|
|
4533
|
+
for (const d of docs) {
|
|
4534
|
+
fed += await global.ctrlA.runtime.ingestTextDocument(d.text, { addNewWords: true, minLen });
|
|
4535
|
+
}
|
|
4536
|
+
if (fed > 0) global.ctrlA.runtime.updateAttentionLinks();
|
|
4537
|
+
}
|
|
4538
|
+
|
|
4539
|
+
res.json({ ok: true, report, ingestedSegments: fed });
|
|
4540
|
+
} catch (e) {
|
|
4541
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4542
|
+
}
|
|
4543
|
+
});
|
|
4544
|
+
|
|
4545
|
+
// 仅添加待抓取URL(不立即抓)
|
|
4546
|
+
app.post('/api/crawler/query', async (req, res) => {
|
|
4547
|
+
try {
|
|
4548
|
+
const { prompt, vertical } = req.body || {};
|
|
4549
|
+
if (!prompt) return res.status(400).json({ ok: false, error: 'prompt 不能为空' });
|
|
4550
|
+
const report = await global.__crawler.directedSearch(String(prompt), { vertical: vertical || 'general', crawl: false });
|
|
4551
|
+
res.json({ ok: true, report });
|
|
4552
|
+
} catch (e) {
|
|
4553
|
+
res.status(500).json({ ok: false, error: e.message });
|
|
4554
|
+
}
|
|
4555
|
+
});
|
|
3321
4556
|
// 启动Redis客户端
|
|
3322
4557
|
redisClient.on('error', (err) => {
|
|
3323
4558
|
console.error('Redis error:', err);
|