079project 6.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cache/38/9a0e6a4756f17b0edebad6a7be1eed.json +1 -0
- package/079project_frontend/README.md +70 -0
- package/079project_frontend/package-lock.json +17310 -0
- package/079project_frontend/package.json +40 -0
- package/079project_frontend/public/favicon.ico +0 -0
- package/079project_frontend/public/index.html +43 -0
- package/079project_frontend/public/logo192.png +0 -0
- package/079project_frontend/public/logo512.png +0 -0
- package/079project_frontend/public/manifest.json +25 -0
- package/079project_frontend/public/robots.txt +3 -0
- package/079project_frontend/src/App.css +515 -0
- package/079project_frontend/src/App.js +286 -0
- package/079project_frontend/src/App.test.js +8 -0
- package/079project_frontend/src/api/client.js +103 -0
- package/079project_frontend/src/components/AuthGate.js +153 -0
- package/079project_frontend/src/components/ConfigPanel.js +643 -0
- package/079project_frontend/src/index.css +21 -0
- package/079project_frontend/src/index.js +17 -0
- package/079project_frontend/src/logo.svg +1 -0
- package/079project_frontend/src/reportWebVitals.js +13 -0
- package/079project_frontend/src/setupTests.js +5 -0
- package/README.en.md +234 -0
- package/README.md +0 -0
- package/auth_frontend_server.cjs +312 -0
- package/main.cjs +2259 -83
- package/memeMergeWorker.cjs +256 -0
- package/package.json +28 -15
- package/robots/wikitext-something.txt +1 -39254
- package/tools_install.js +136 -0
- package/model_RNN.py +0 -209
package/main.cjs
CHANGED
|
@@ -24,6 +24,11 @@ const natural = safeRequire('natural');
|
|
|
24
24
|
const csvParse = safeRequire('csv-parse/sync');
|
|
25
25
|
const umap = safeRequire('umap-js');
|
|
26
26
|
const axios = safeRequire('axios');
|
|
27
|
+
const cheerio = safeRequire('cheerio');
|
|
28
|
+
const pdfParse = safeRequire('pdf-parse');
|
|
29
|
+
// 安全引用 ml-matrix,兼容不同导出结构
|
|
30
|
+
const MatrixLib = safeRequire('ml-matrix');
|
|
31
|
+
const Matrix = MatrixLib?.Matrix ?? MatrixLib ?? null;
|
|
27
32
|
|
|
28
33
|
const STOP_WORDS = natural?.stopwords ?? [];
|
|
29
34
|
const DEFAULT_CHANNEL = process.env.AI_REDIS_CHANNEL || 'AI-model-workspace';
|
|
@@ -55,10 +60,16 @@ const CONFIG = (() => {
|
|
|
55
60
|
return !(normalized === '0' || normalized === 'false' || normalized === 'off' || normalized === 'no');
|
|
56
61
|
};
|
|
57
62
|
const robotsLimitRaw = args['robots-limit'] || process.env.AI_ROBOTS_LIMIT || 200;
|
|
63
|
+
const aiCountRaw = args['ai-count'] || process.env.AI_COUNT || process.env.AI_NUM || 7;
|
|
64
|
+
const groupCountRaw = args['group-count'] || process.env.AI_GROUP_COUNT || process.env.GROUP_COUNT || 3;
|
|
58
65
|
return {
|
|
59
66
|
baseDir: path.resolve(args['base-dir'] || process.env.AI_BASE_DIR || path.join(__dirname, 'runtime_store')),
|
|
67
|
+
gatewayHost: String(args['gateway-host'] || process.env.AI_GATEWAY_HOST || '127.0.0.1'),
|
|
60
68
|
portGateway: Number(args.port || process.env.CONTROLLER_PORT || 5080),
|
|
61
69
|
portStudy: Number(args['study-port'] || process.env.AI_STUDY_PORT || 5081),
|
|
70
|
+
aiCount: Math.max(3, Number(aiCountRaw) || 7),
|
|
71
|
+
groupCount: Math.max(1, Number(groupCountRaw) || 3),
|
|
72
|
+
groupSize: 7,
|
|
62
73
|
redisUrl: process.env.REDIS_URL || args['redis-url'] || 'redis://127.0.0.1:6379',
|
|
63
74
|
redisChannel: args.channel || DEFAULT_CHANNEL,
|
|
64
75
|
snapshotDir: args['snapshot-dir'] || path.join(__dirname, 'snapshots'),
|
|
@@ -70,7 +81,13 @@ const CONFIG = (() => {
|
|
|
70
81
|
robotsDir: path.resolve(args['robots-dir'] || process.env.AI_ROBOTS_DIR || path.join(__dirname, 'robots')),
|
|
71
82
|
lemmaCsv: path.resolve(args['lemma-csv'] || process.env.AI_LEMMA_CSV || path.join(__dirname, 'lemma.csv')),
|
|
72
83
|
robotsWarmupLimit: Math.max(0, Number(robotsLimitRaw) || 0),
|
|
73
|
-
robotsAutoload: boolFrom(args['robots-autoload'] ?? process.env.AI_ROBOTS_AUTOLOAD, true)
|
|
84
|
+
robotsAutoload: boolFrom(args['robots-autoload'] ?? process.env.AI_ROBOTS_AUTOLOAD, true),
|
|
85
|
+
// Feature toggles via CLI/env
|
|
86
|
+
disableBarrier: boolFrom(args['disable-memebarrier'] ?? process.env.AI_DISABLE_MEMEBARRIER, false) === true,
|
|
87
|
+
disableRL: boolFrom(args['disable-rl'] ?? process.env.AI_DISABLE_RL, false) === true,
|
|
88
|
+
disableADV: boolFrom(args['disable-adv'] ?? process.env.AI_DISABLE_ADV, false) === true,
|
|
89
|
+
disableLearning: boolFrom(args['disable-learning'] ?? process.env.AI_DISABLE_LEARNING, false) === true,
|
|
90
|
+
exportDir: path.resolve(args['export-dir'] || process.env.AI_EXPORT_DIR || path.join(__dirname, 'runtime_store'))
|
|
74
91
|
};
|
|
75
92
|
})();
|
|
76
93
|
|
|
@@ -232,6 +249,56 @@ class LmdbStore {
|
|
|
232
249
|
}
|
|
233
250
|
}
|
|
234
251
|
|
|
252
|
+
class NamespacedStore {
|
|
253
|
+
constructor(store, namespace) {
|
|
254
|
+
this.store = store;
|
|
255
|
+
this.ns = String(namespace || '').trim();
|
|
256
|
+
if (!this.ns) {
|
|
257
|
+
throw new Error('NamespacedStore requires a namespace');
|
|
258
|
+
}
|
|
259
|
+
this.prefix = `ns:${this.ns}:`;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
_k(key) {
|
|
263
|
+
return `${this.prefix}${String(key)}`;
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
get(key) {
|
|
267
|
+
return this.store.get(this._k(key));
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
put(key, value) {
|
|
271
|
+
return this.store.put(this._k(key), value);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
delete(key) {
|
|
275
|
+
return this.store.delete(this._k(key));
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
entries(prefix = '') {
|
|
279
|
+
const list = this.store.entries(this._k(prefix));
|
|
280
|
+
const out = [];
|
|
281
|
+
for (const [key, value] of list) {
|
|
282
|
+
const k = String(key);
|
|
283
|
+
if (!k.startsWith(this.prefix)) {
|
|
284
|
+
continue;
|
|
285
|
+
}
|
|
286
|
+
out.push([k.slice(this.prefix.length), value]);
|
|
287
|
+
}
|
|
288
|
+
return out;
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
flush() {
|
|
292
|
+
if (typeof this.store.flush === 'function') {
|
|
293
|
+
this.store.flush();
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
close() {
|
|
298
|
+
// underlying store lifecycle is managed centrally
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
235
302
|
process.on('exit', () => {
|
|
236
303
|
// In this single-file mode stores are closed elsewhere.
|
|
237
304
|
});
|
|
@@ -587,6 +654,11 @@ class MemeGraph {
|
|
|
587
654
|
}
|
|
588
655
|
}
|
|
589
656
|
|
|
657
|
+
// 返回当前图中所有模因节点的ID列表,供扫描器等模块使用
|
|
658
|
+
getAllPoints() {
|
|
659
|
+
return Array.from(this.meta.keys());
|
|
660
|
+
}
|
|
661
|
+
|
|
590
662
|
_persistNode(memeId) {
|
|
591
663
|
const neighbors = this.nodes.get(memeId) || new Map();
|
|
592
664
|
const payload = {
|
|
@@ -753,9 +825,16 @@ class TensorEngine {
|
|
|
753
825
|
const start = rowPtr[row];
|
|
754
826
|
const end = rowPtr[row + 1];
|
|
755
827
|
for (let idx = start; idx < end; idx++) {
|
|
756
|
-
|
|
828
|
+
const w = values[idx];
|
|
829
|
+
const j = colIdx[idx];
|
|
830
|
+
const xv = (j >= 0 && j < x.length) ? x[j] : 0;
|
|
831
|
+
if (!Number.isFinite(w) || !Number.isFinite(xv)) {
|
|
832
|
+
// 忽略非有限值,避免传播 NaN
|
|
833
|
+
continue;
|
|
834
|
+
}
|
|
835
|
+
acc += w * xv;
|
|
757
836
|
}
|
|
758
|
-
result[row] = acc;
|
|
837
|
+
result[row] = Number.isFinite(acc) ? acc : 0;
|
|
759
838
|
}
|
|
760
839
|
return result;
|
|
761
840
|
}
|
|
@@ -797,11 +876,28 @@ class TensorEngine {
|
|
|
797
876
|
iteratePropagation(csr, seeds, steps, actFn, decayK, damp = 0.02) {
|
|
798
877
|
let state = Float32Array.from(seeds);
|
|
799
878
|
const next = new Float32Array(seeds.length);
|
|
879
|
+
const safeAct = (x) => {
|
|
880
|
+
let y;
|
|
881
|
+
try {
|
|
882
|
+
y = actFn(x);
|
|
883
|
+
} catch (_) {
|
|
884
|
+
y = 0;
|
|
885
|
+
}
|
|
886
|
+
return Number.isFinite(y) ? y : 0;
|
|
887
|
+
};
|
|
800
888
|
for (let s = 0; s < steps; s++) {
|
|
801
889
|
this.spmm(csr, state, next);
|
|
802
890
|
for (let i = 0; i < next.length; i++) {
|
|
803
|
-
const
|
|
804
|
-
|
|
891
|
+
const si = Number.isFinite(state[i]) ? state[i] : 0;
|
|
892
|
+
const ni = Number.isFinite(next[i]) ? next[i] : 0;
|
|
893
|
+
const raw = si + ni - (Number.isFinite(decayK) ? decayK : 0) * si * (Number.isFinite(damp) ? damp : 0.02);
|
|
894
|
+
state[i] = safeAct(raw);
|
|
895
|
+
}
|
|
896
|
+
}
|
|
897
|
+
// 最终保障结果为有限值
|
|
898
|
+
for (let i = 0; i < state.length; i++) {
|
|
899
|
+
if (!Number.isFinite(state[i])) {
|
|
900
|
+
state[i] = 0;
|
|
805
901
|
}
|
|
806
902
|
}
|
|
807
903
|
return state;
|
|
@@ -1027,6 +1123,7 @@ class OnlineResearcher {
|
|
|
1027
1123
|
this.cooldownMs = cooldownMs;
|
|
1028
1124
|
this.cacheSize = cacheSize;
|
|
1029
1125
|
this.cache = new Map();
|
|
1126
|
+
this.enabled = true;
|
|
1030
1127
|
}
|
|
1031
1128
|
|
|
1032
1129
|
_normalize(words) {
|
|
@@ -1062,7 +1159,39 @@ class OnlineResearcher {
|
|
|
1062
1159
|
}
|
|
1063
1160
|
|
|
1064
1161
|
async lookup(input, options = {}) {
|
|
1065
|
-
|
|
1162
|
+
if (this.enabled === false || options.disableRemote === true) {
|
|
1163
|
+
return this._fallback(Array.isArray(input) ? input : tokenize(String(input || '')));
|
|
1164
|
+
}
|
|
1165
|
+
// 如果输入里包含 URL(或显式要求 crawl),优先走站内递归抓取
|
|
1166
|
+
const rawText = Array.isArray(input) ? input.join(' ') : String(input || '');
|
|
1167
|
+
const crawlReq = options?.crawl || null;
|
|
1168
|
+
const urlFromInput = extractFirstUrl(rawText);
|
|
1169
|
+
const shouldCrawl = options?.mode === 'crawl' || Boolean(crawlReq?.startUrl) || Boolean(urlFromInput);
|
|
1170
|
+
if (shouldCrawl) {
|
|
1171
|
+
const startUrl = String(crawlReq?.startUrl || urlFromInput || '').trim();
|
|
1172
|
+
if (!startUrl) {
|
|
1173
|
+
return { ok: false, source: 'crawl', reason: 'startUrl-required' };
|
|
1174
|
+
}
|
|
1175
|
+
const crawlOptions = {
|
|
1176
|
+
maxPages: clampInt(crawlReq?.maxPages, 1, 500, 60),
|
|
1177
|
+
maxDepth: clampInt(crawlReq?.maxDepth, 0, 10, 3),
|
|
1178
|
+
includePdf: crawlReq?.includePdf !== false,
|
|
1179
|
+
sameSite: crawlReq?.sameSite !== false,
|
|
1180
|
+
timeoutMs: clampInt(crawlReq?.timeoutMs, 1000, 60000, 12000),
|
|
1181
|
+
maxBytesPerPage: clampInt(crawlReq?.maxBytesPerPage, 8 * 1024, 10 * 1024 * 1024, 2 * 1024 * 1024),
|
|
1182
|
+
maxPdfBytes: clampInt(crawlReq?.maxPdfBytes, 64 * 1024, 40 * 1024 * 1024, 20 * 1024 * 1024),
|
|
1183
|
+
userAgent: typeof crawlReq?.userAgent === 'string' && crawlReq.userAgent.trim() ? crawlReq.userAgent.trim() : '079ProjectCrawler/1.0'
|
|
1184
|
+
};
|
|
1185
|
+
const crawler = new SiteCrawler({ axios, cheerio, pdfParse });
|
|
1186
|
+
const result = await crawler.crawl(startUrl, crawlOptions);
|
|
1187
|
+
if (!options?.forceRemote) {
|
|
1188
|
+
const key = this._normalize(tokenize(`crawl ${startUrl}`));
|
|
1189
|
+
this._remember(key, result);
|
|
1190
|
+
}
|
|
1191
|
+
return result;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
const words = Array.isArray(input) ? input : tokenize(rawText);
|
|
1066
1195
|
if (words.length === 0) {
|
|
1067
1196
|
return { ok: false, source: 'local', reason: 'empty-query' };
|
|
1068
1197
|
}
|
|
@@ -1133,6 +1262,240 @@ class OnlineResearcher {
|
|
|
1133
1262
|
}
|
|
1134
1263
|
}
|
|
1135
1264
|
|
|
1265
|
+
const clampInt = (value, min, max, fallback) => {
|
|
1266
|
+
const n = Number(value);
|
|
1267
|
+
if (!Number.isFinite(n)) return fallback;
|
|
1268
|
+
return Math.max(min, Math.min(max, Math.trunc(n)));
|
|
1269
|
+
};
|
|
1270
|
+
|
|
1271
|
+
const extractFirstUrl = (text) => {
|
|
1272
|
+
const s = String(text || '').trim();
|
|
1273
|
+
if (!s) return null;
|
|
1274
|
+
// 优先匹配 http(s)://
|
|
1275
|
+
const m = s.match(/https?:\/\/[\w\-._~:/?#[\]@!$&'()*+,;=%]+/i);
|
|
1276
|
+
if (m && m[0]) return m[0];
|
|
1277
|
+
// 兼容裸域名:www.example.com/path
|
|
1278
|
+
const m2 = s.match(/\b(?:www\.)[a-z0-9\-]+(?:\.[a-z0-9\-]+)+(?:\/[\w\-._~:/?#[\]@!$&'()*+,;=%]*)?/i);
|
|
1279
|
+
if (m2 && m2[0]) return `https://${m2[0]}`;
|
|
1280
|
+
return null;
|
|
1281
|
+
};
|
|
1282
|
+
|
|
1283
|
+
class SiteCrawler {
|
|
1284
|
+
constructor({ axios, cheerio, pdfParse } = {}) {
|
|
1285
|
+
this.axios = axios;
|
|
1286
|
+
this.cheerio = cheerio;
|
|
1287
|
+
this.pdfParse = pdfParse;
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
_normalizeUrl(url) {
|
|
1291
|
+
try {
|
|
1292
|
+
const u = new URL(url);
|
|
1293
|
+
u.hash = '';
|
|
1294
|
+
// 轻度去重:丢弃常见追踪参数
|
|
1295
|
+
for (const key of Array.from(u.searchParams.keys())) {
|
|
1296
|
+
if (/^(utm_|fbclid$|gclid$)/i.test(key)) u.searchParams.delete(key);
|
|
1297
|
+
}
|
|
1298
|
+
return u.toString();
|
|
1299
|
+
} catch {
|
|
1300
|
+
return null;
|
|
1301
|
+
}
|
|
1302
|
+
}
|
|
1303
|
+
|
|
1304
|
+
_sameSite(baseUrl, targetUrl) {
|
|
1305
|
+
try {
|
|
1306
|
+
const base = new URL(baseUrl);
|
|
1307
|
+
const t = new URL(targetUrl);
|
|
1308
|
+
const baseHost = base.hostname.replace(/^www\./i, '');
|
|
1309
|
+
const tHost = t.hostname.replace(/^www\./i, '');
|
|
1310
|
+
return base.protocol === t.protocol && baseHost === tHost;
|
|
1311
|
+
} catch {
|
|
1312
|
+
return false;
|
|
1313
|
+
}
|
|
1314
|
+
}
|
|
1315
|
+
|
|
1316
|
+
_extractLinksHtml(html, baseUrl) {
|
|
1317
|
+
const out = [];
|
|
1318
|
+
if (!html) return out;
|
|
1319
|
+
const base = String(baseUrl || '');
|
|
1320
|
+
if (this.cheerio) {
|
|
1321
|
+
try {
|
|
1322
|
+
const $ = this.cheerio.load(html);
|
|
1323
|
+
const push = (href) => {
|
|
1324
|
+
if (!href) return;
|
|
1325
|
+
const h = String(href).trim();
|
|
1326
|
+
if (!h || h.startsWith('javascript:') || h.startsWith('mailto:') || h.startsWith('#')) return;
|
|
1327
|
+
try {
|
|
1328
|
+
const u = new URL(h, base);
|
|
1329
|
+
out.push(u.toString());
|
|
1330
|
+
} catch {}
|
|
1331
|
+
};
|
|
1332
|
+
$('a[href]').each((_, el) => push($(el).attr('href')));
|
|
1333
|
+
$('link[href]').each((_, el) => push($(el).attr('href')));
|
|
1334
|
+
$('iframe[src]').each((_, el) => push($(el).attr('src')));
|
|
1335
|
+
return out;
|
|
1336
|
+
} catch {
|
|
1337
|
+
// fallthrough
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
// 简单正则 fallback
|
|
1341
|
+
const re = /\b(?:href|src)\s*=\s*['"]([^'"#]+)['"]/gi;
|
|
1342
|
+
let m;
|
|
1343
|
+
while ((m = re.exec(html))) {
|
|
1344
|
+
const href = m[1];
|
|
1345
|
+
if (!href) continue;
|
|
1346
|
+
if (/^(javascript:|mailto:)/i.test(href)) continue;
|
|
1347
|
+
try {
|
|
1348
|
+
const u = new URL(href, base);
|
|
1349
|
+
out.push(u.toString());
|
|
1350
|
+
} catch {}
|
|
1351
|
+
}
|
|
1352
|
+
return out;
|
|
1353
|
+
}
|
|
1354
|
+
|
|
1355
|
+
_extractTextHtml(html) {
|
|
1356
|
+
if (!html) return '';
|
|
1357
|
+
if (this.cheerio) {
|
|
1358
|
+
try {
|
|
1359
|
+
const $ = this.cheerio.load(html);
|
|
1360
|
+
$('script,noscript,style').remove();
|
|
1361
|
+
const text = $('body').text() || $.root().text() || '';
|
|
1362
|
+
return text.replace(/\s+/g, ' ').trim();
|
|
1363
|
+
} catch {
|
|
1364
|
+
// fallthrough
|
|
1365
|
+
}
|
|
1366
|
+
}
|
|
1367
|
+
return String(html).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
|
|
1368
|
+
}
|
|
1369
|
+
|
|
1370
|
+
async _fetch(url, { timeoutMs, userAgent, responseType, maxBytes } = {}) {
|
|
1371
|
+
if (!this.axios) {
|
|
1372
|
+
throw new Error('axios-not-installed');
|
|
1373
|
+
}
|
|
1374
|
+
const resp = await this.axios.get(url, {
|
|
1375
|
+
timeout: timeoutMs ?? 12000,
|
|
1376
|
+
responseType: responseType || 'arraybuffer',
|
|
1377
|
+
maxContentLength: maxBytes,
|
|
1378
|
+
maxBodyLength: maxBytes,
|
|
1379
|
+
headers: {
|
|
1380
|
+
'User-Agent': userAgent || '079ProjectCrawler/1.0',
|
|
1381
|
+
'Accept': 'text/html,application/xhtml+xml,application/pdf;q=0.9,*/*;q=0.8'
|
|
1382
|
+
},
|
|
1383
|
+
validateStatus: (s) => s >= 200 && s < 400
|
|
1384
|
+
});
|
|
1385
|
+
const ctype = String(resp.headers?.['content-type'] || '').toLowerCase();
|
|
1386
|
+
const buf = Buffer.isBuffer(resp.data) ? resp.data : Buffer.from(resp.data);
|
|
1387
|
+
return { status: resp.status, contentType: ctype, bytes: buf.length, data: buf };
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
async _parsePdf(buffer) {
|
|
1391
|
+
if (!buffer || !Buffer.isBuffer(buffer)) return { text: '' };
|
|
1392
|
+
if (!this.pdfParse) {
|
|
1393
|
+
return { text: '', warning: 'pdf-parse-not-installed' };
|
|
1394
|
+
}
|
|
1395
|
+
const r = await this.pdfParse(buffer);
|
|
1396
|
+
const text = String(r?.text || '').replace(/\s+/g, ' ').trim();
|
|
1397
|
+
return { text, pages: r?.numpages };
|
|
1398
|
+
}
|
|
1399
|
+
|
|
1400
|
+
async crawl(startUrl, options = {}) {
|
|
1401
|
+
const normalizedStart = this._normalizeUrl(startUrl);
|
|
1402
|
+
if (!normalizedStart) {
|
|
1403
|
+
return { ok: false, source: 'crawl', reason: 'invalid-url' };
|
|
1404
|
+
}
|
|
1405
|
+
const {
|
|
1406
|
+
maxPages = 60,
|
|
1407
|
+
maxDepth = 3,
|
|
1408
|
+
includePdf = true,
|
|
1409
|
+
sameSite = true,
|
|
1410
|
+
timeoutMs = 12000,
|
|
1411
|
+
maxBytesPerPage = 2 * 1024 * 1024,
|
|
1412
|
+
maxPdfBytes = 20 * 1024 * 1024,
|
|
1413
|
+
userAgent = '079ProjectCrawler/1.0'
|
|
1414
|
+
} = options;
|
|
1415
|
+
|
|
1416
|
+
const visited = new Set();
|
|
1417
|
+
const queue = [{ url: normalizedStart, depth: 0 }];
|
|
1418
|
+
const pages = [];
|
|
1419
|
+
const errors = [];
|
|
1420
|
+
|
|
1421
|
+
while (queue.length && pages.length < maxPages) {
|
|
1422
|
+
const task = queue.shift();
|
|
1423
|
+
const url = this._normalizeUrl(task.url);
|
|
1424
|
+
if (!url) continue;
|
|
1425
|
+
if (visited.has(url)) continue;
|
|
1426
|
+
if (sameSite && !this._sameSite(normalizedStart, url)) continue;
|
|
1427
|
+
visited.add(url);
|
|
1428
|
+
|
|
1429
|
+
try {
|
|
1430
|
+
const isPdfByExt = /\.pdf(?:$|\?)/i.test(url);
|
|
1431
|
+
const maxBytes = isPdfByExt ? maxPdfBytes : maxBytesPerPage;
|
|
1432
|
+
const fetched = await this._fetch(url, { timeoutMs, userAgent, maxBytes });
|
|
1433
|
+
const ctype = fetched.contentType;
|
|
1434
|
+
const isPdf = isPdfByExt || ctype.includes('application/pdf');
|
|
1435
|
+
|
|
1436
|
+
if (isPdf) {
|
|
1437
|
+
if (!includePdf) {
|
|
1438
|
+
pages.push({ url, kind: 'pdf', skipped: true, reason: 'includePdf=false' });
|
|
1439
|
+
continue;
|
|
1440
|
+
}
|
|
1441
|
+
const parsed = await this._parsePdf(fetched.data);
|
|
1442
|
+
pages.push({
|
|
1443
|
+
url,
|
|
1444
|
+
kind: 'pdf',
|
|
1445
|
+
bytes: fetched.bytes,
|
|
1446
|
+
contentType: ctype || 'application/pdf',
|
|
1447
|
+
pages: parsed.pages,
|
|
1448
|
+
text: (parsed.text || '').slice(0, 20000)
|
|
1449
|
+
});
|
|
1450
|
+
continue;
|
|
1451
|
+
}
|
|
1452
|
+
|
|
1453
|
+
const html = fetched.data.toString('utf8');
|
|
1454
|
+
const text = this._extractTextHtml(html);
|
|
1455
|
+
const links = this._extractLinksHtml(html, url);
|
|
1456
|
+
pages.push({
|
|
1457
|
+
url,
|
|
1458
|
+
kind: 'html',
|
|
1459
|
+
bytes: fetched.bytes,
|
|
1460
|
+
contentType: ctype || 'text/html',
|
|
1461
|
+
linksFound: links.length,
|
|
1462
|
+
text: text.slice(0, 20000)
|
|
1463
|
+
});
|
|
1464
|
+
|
|
1465
|
+
if (task.depth < maxDepth) {
|
|
1466
|
+
for (const link of links) {
|
|
1467
|
+
const n = this._normalizeUrl(link);
|
|
1468
|
+
if (!n) continue;
|
|
1469
|
+
if (visited.has(n)) continue;
|
|
1470
|
+
if (sameSite && !this._sameSite(normalizedStart, n)) continue;
|
|
1471
|
+
queue.push({ url: n, depth: task.depth + 1 });
|
|
1472
|
+
}
|
|
1473
|
+
}
|
|
1474
|
+
} catch (e) {
|
|
1475
|
+
errors.push({ url: task.url, error: e?.message || String(e) });
|
|
1476
|
+
}
|
|
1477
|
+
}
|
|
1478
|
+
|
|
1479
|
+
const chunks = [];
|
|
1480
|
+
for (const p of pages) {
|
|
1481
|
+
if (!p?.text) continue;
|
|
1482
|
+
chunks.push(`URL: ${p.url}\n${p.text}`);
|
|
1483
|
+
if (chunks.join('\n\n').length > 60000) break;
|
|
1484
|
+
}
|
|
1485
|
+
const aggregated = chunks.join('\n\n');
|
|
1486
|
+
|
|
1487
|
+
return {
|
|
1488
|
+
ok: true,
|
|
1489
|
+
source: 'crawl',
|
|
1490
|
+
startUrl: normalizedStart,
|
|
1491
|
+
stats: { visited: visited.size, returned: pages.length, queued: queue.length, errors: errors.length },
|
|
1492
|
+
pages,
|
|
1493
|
+
text: aggregated,
|
|
1494
|
+
errors: errors.slice(0, 20)
|
|
1495
|
+
};
|
|
1496
|
+
}
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1136
1499
|
class SessionManager {
|
|
1137
1500
|
constructor(store, { idleMs = 10 * 60 * 1000, maxSessions = 200 } = {}) {
|
|
1138
1501
|
this.store = store;
|
|
@@ -1239,6 +1602,49 @@ class SnapshotManager {
|
|
|
1239
1602
|
}
|
|
1240
1603
|
}
|
|
1241
1604
|
|
|
1605
|
+
// 图导出构建器:将线性代数结构(CSR、窗口、嵌入)反序列化为 Go 可消费的图结构
|
|
1606
|
+
class GraphExportBuilder {
|
|
1607
|
+
static fromWindow(windowInfo, activation = null) {
|
|
1608
|
+
const n = windowInfo.ids.length;
|
|
1609
|
+
const coords = [];
|
|
1610
|
+
// 简易坐标:若有 activation 则用前两主分量近似,否则用索引散列
|
|
1611
|
+
for (let i = 0; i < n; i++) {
|
|
1612
|
+
const id = windowInfo.ids[i];
|
|
1613
|
+
const ax = activation && Number.isFinite(activation[i]) ? activation[i] : 0;
|
|
1614
|
+
const ay = ((i * 9973) % 101) / 100;
|
|
1615
|
+
coords.push([ax, ay, 0]);
|
|
1616
|
+
}
|
|
1617
|
+
const nodes = [];
|
|
1618
|
+
for (let i = 0; i < n; i++) {
|
|
1619
|
+
nodes.push({
|
|
1620
|
+
id: String(windowInfo.ids[i]),
|
|
1621
|
+
x: coords[i][0],
|
|
1622
|
+
y: coords[i][1],
|
|
1623
|
+
z: coords[i][2],
|
|
1624
|
+
value: Number.isFinite(activation?.[i]) ? activation[i] : 0,
|
|
1625
|
+
attrs: {}
|
|
1626
|
+
});
|
|
1627
|
+
}
|
|
1628
|
+
const edges = [];
|
|
1629
|
+
const { rowPtr, colIdx, values, nRows } = windowInfo.csr;
|
|
1630
|
+
for (let r = 0; r < nRows; r++) {
|
|
1631
|
+
const start = rowPtr[r];
|
|
1632
|
+
const end = rowPtr[r + 1];
|
|
1633
|
+
for (let k = start; k < end; k++) {
|
|
1634
|
+
const c = colIdx[k];
|
|
1635
|
+
const w = values[k];
|
|
1636
|
+
edges.push({
|
|
1637
|
+
from: String(windowInfo.ids[r]),
|
|
1638
|
+
to: String(windowInfo.ids[c]),
|
|
1639
|
+
weight: Number.isFinite(w) ? w : 0,
|
|
1640
|
+
dir: 1
|
|
1641
|
+
});
|
|
1642
|
+
}
|
|
1643
|
+
}
|
|
1644
|
+
return { Nodes: nodes, Edges: edges };
|
|
1645
|
+
}
|
|
1646
|
+
}
|
|
1647
|
+
|
|
1242
1648
|
class RuntimeState {
|
|
1243
1649
|
constructor({ kvmStore, memeStore, sessionStore, params, config }) {
|
|
1244
1650
|
this.kvm = new KVMStore(kvmStore);
|
|
@@ -1251,11 +1657,64 @@ class RuntimeState {
|
|
|
1251
1657
|
this.config = { ...(config || {}) };
|
|
1252
1658
|
this.config.robotsDir = this.config.robotsDir || path.join(__dirname, 'robots');
|
|
1253
1659
|
this.config.lemmaCsv = this.config.lemmaCsv || path.join(__dirname, 'lemma.csv');
|
|
1254
|
-
|
|
1660
|
+
// 在线搜索配置:支持运行时开关与 endpoint 库
|
|
1661
|
+
this.config.search = {
|
|
1662
|
+
enabled: this.config.search?.enabled ?? true,
|
|
1663
|
+
endpoints: Array.isArray(this.config.search?.endpoints) ? this.config.search.endpoints : [],
|
|
1664
|
+
active: typeof this.config.search?.active === 'string' ? this.config.search.active : (this.config.searchEndpoint || '')
|
|
1665
|
+
};
|
|
1666
|
+
// 兼容旧字段:searchEndpoint
|
|
1667
|
+
if (!this.config.search.active && this.config.searchEndpoint) {
|
|
1668
|
+
this.config.search.active = String(this.config.searchEndpoint);
|
|
1669
|
+
}
|
|
1670
|
+
// 默认库:如果没配置 endpoints 但 active 有值,则塞入库里
|
|
1671
|
+
if (this.config.search.active && !this.config.search.endpoints.includes(this.config.search.active)) {
|
|
1672
|
+
this.config.search.endpoints.push(this.config.search.active);
|
|
1673
|
+
}
|
|
1674
|
+
this.researcher = new OnlineResearcher(this, { endpoint: this.config.search.active });
|
|
1675
|
+
this.researcher.enabled = Boolean(this.config.search.enabled);
|
|
1255
1676
|
this.corpusStats = { ingested: 0, lastIngest: null };
|
|
1256
1677
|
this.robotsCorpus = null;
|
|
1257
1678
|
}
|
|
1258
1679
|
|
|
1680
|
+
getSearchConfig() {
|
|
1681
|
+
return {
|
|
1682
|
+
enabled: Boolean(this.config.search?.enabled),
|
|
1683
|
+
active: String(this.config.search?.active || ''),
|
|
1684
|
+
endpoints: Array.isArray(this.config.search?.endpoints) ? this.config.search.endpoints.slice() : []
|
|
1685
|
+
};
|
|
1686
|
+
}
|
|
1687
|
+
|
|
1688
|
+
setSearchConfig(patch = {}) {
|
|
1689
|
+
if (!this.config.search) {
|
|
1690
|
+
this.config.search = { enabled: true, endpoints: [], active: '' };
|
|
1691
|
+
}
|
|
1692
|
+
if (typeof patch.enabled === 'boolean') {
|
|
1693
|
+
this.config.search.enabled = patch.enabled;
|
|
1694
|
+
}
|
|
1695
|
+
if (Array.isArray(patch.endpoints)) {
|
|
1696
|
+
const cleaned = patch.endpoints
|
|
1697
|
+
.map((x) => String(x || '').trim())
|
|
1698
|
+
.filter(Boolean);
|
|
1699
|
+
this.config.search.endpoints = Array.from(new Set(cleaned));
|
|
1700
|
+
}
|
|
1701
|
+
if (typeof patch.active === 'string') {
|
|
1702
|
+
const next = patch.active.trim();
|
|
1703
|
+
this.config.search.active = next;
|
|
1704
|
+
if (next && Array.isArray(this.config.search.endpoints) && !this.config.search.endpoints.includes(next)) {
|
|
1705
|
+
this.config.search.endpoints.push(next);
|
|
1706
|
+
}
|
|
1707
|
+
}
|
|
1708
|
+
// 同步到 researcher
|
|
1709
|
+
if (this.researcher) {
|
|
1710
|
+
this.researcher.endpoint = this.config.search.active || '';
|
|
1711
|
+
this.researcher.enabled = Boolean(this.config.search.enabled);
|
|
1712
|
+
}
|
|
1713
|
+
// 兼容旧字段
|
|
1714
|
+
this.config.searchEndpoint = this.config.search.active;
|
|
1715
|
+
return this.getSearchConfig();
|
|
1716
|
+
}
|
|
1717
|
+
|
|
1259
1718
|
cloneParams() {
|
|
1260
1719
|
return { ...this.params };
|
|
1261
1720
|
}
|
|
@@ -1541,6 +2000,20 @@ class RuntimeState {
|
|
|
1541
2000
|
}
|
|
1542
2001
|
}
|
|
1543
2002
|
}
|
|
2003
|
+
|
|
2004
|
+
// 将当前窗口或指定种子集合导出为 Go 侧 Graph 结构并写入文件
|
|
2005
|
+
exportGraphToFile({ seeds = null, radius = 2, file = null } = {}) {
|
|
2006
|
+
const usedSeeds = seeds instanceof Map ? seeds : this.mapWordsToMemes(Array.isArray(seeds) ? seeds : []);
|
|
2007
|
+
const win = this.graph.buildWindow(Array.from(usedSeeds.keys()), radius);
|
|
2008
|
+
const act = this.tensor.iteratePropagation(win.csr, this._buildSeedVector(win, usedSeeds), this.params.iteration || 5, this._activation(), this.params.decayK, 0.02);
|
|
2009
|
+
const graphObj = GraphExportBuilder.fromWindow(win, act);
|
|
2010
|
+
const dir = CONFIG.exportDir || path.join(__dirname, 'runtime_store');
|
|
2011
|
+
ensureDir(dir);
|
|
2012
|
+
const name = `graph_export_${new Date().toISOString().replace(/[:.]/g, '-')}.json`;
|
|
2013
|
+
const outFile = file ? path.resolve(file) : path.join(dir, name);
|
|
2014
|
+
fs.writeFileSync(outFile, JSON.stringify(graphObj));
|
|
2015
|
+
return outFile;
|
|
2016
|
+
}
|
|
1544
2017
|
}
|
|
1545
2018
|
|
|
1546
2019
|
class Controller extends EventEmitter {
|
|
@@ -1592,14 +2065,38 @@ class ControllerPool extends EventEmitter {
|
|
|
1592
2065
|
constructor(baseStores) {
|
|
1593
2066
|
super();
|
|
1594
2067
|
this.baseStores = baseStores;
|
|
1595
|
-
this.controllers = {
|
|
1596
|
-
|
|
1597
|
-
|
|
1598
|
-
|
|
1599
|
-
};
|
|
1600
|
-
|
|
1601
|
-
|
|
1602
|
-
|
|
2068
|
+
this.controllers = {};
|
|
2069
|
+
this.groups = {}; // groupId -> controllerNames[]
|
|
2070
|
+
const groupCount = Math.max(1, Number(baseStores?.config?.groupCount ?? CONFIG.groupCount) || 3);
|
|
2071
|
+
const groupSize = Math.max(1, Number(baseStores?.config?.groupSize ?? CONFIG.groupSize) || 7);
|
|
2072
|
+
this.groupIds = Array.from({ length: groupCount }, (_, i) => `G${i + 1}`);
|
|
2073
|
+
|
|
2074
|
+
for (const groupId of this.groupIds) {
|
|
2075
|
+
const names = [];
|
|
2076
|
+
const sharedStores = {
|
|
2077
|
+
kvmStore: new NamespacedStore(baseStores.kvmStore, `${groupId}:kvm`),
|
|
2078
|
+
memeStore: new NamespacedStore(baseStores.memeStore, `${groupId}:graph`),
|
|
2079
|
+
sessionStore: new NamespacedStore(baseStores.sessionStore, `${groupId}:session`)
|
|
2080
|
+
};
|
|
2081
|
+
for (let i = 0; i < groupSize; i++) {
|
|
2082
|
+
const name = `${groupId}_AI${i + 1}`;
|
|
2083
|
+
const stores = {
|
|
2084
|
+
...baseStores,
|
|
2085
|
+
...sharedStores,
|
|
2086
|
+
config: { ...(baseStores.config || {}), controllerName: name, groupId, groupIndex: i + 1 }
|
|
2087
|
+
};
|
|
2088
|
+
this.controllers[name] = new Controller(name, new RuntimeState(stores));
|
|
2089
|
+
names.push(name);
|
|
2090
|
+
}
|
|
2091
|
+
this.groups[groupId] = names;
|
|
2092
|
+
}
|
|
2093
|
+
|
|
2094
|
+
// Rotation/compat: keep serving/standby/validation pointing to first group first three
|
|
2095
|
+
const g1 = this.groupIds[0] || 'G1';
|
|
2096
|
+
const g1Names = this.groups[g1] || Object.keys(this.controllers);
|
|
2097
|
+
this.serving = this.controllers[g1Names[0]];
|
|
2098
|
+
this.standby = this.controllers[g1Names[1] || g1Names[0]];
|
|
2099
|
+
this.validation = this.controllers[g1Names[2] || g1Names[0]];
|
|
1603
2100
|
}
|
|
1604
2101
|
|
|
1605
2102
|
getActive() {
|
|
@@ -1607,7 +2104,29 @@ class ControllerPool extends EventEmitter {
|
|
|
1607
2104
|
}
|
|
1608
2105
|
|
|
1609
2106
|
getByName(name) {
|
|
1610
|
-
|
|
2107
|
+
const n = String(name || '').trim();
|
|
2108
|
+
if (!n) return null;
|
|
2109
|
+
// Backward compatibility: A/B/C map to G1 first 3
|
|
2110
|
+
if (n === 'A' || n === 'B' || n === 'C') {
|
|
2111
|
+
const g1 = this.groupIds[0] || 'G1';
|
|
2112
|
+
const list = this.groups[g1] || [];
|
|
2113
|
+
if (n === 'A') return this.controllers[list[0]] || null;
|
|
2114
|
+
if (n === 'B') return this.controllers[list[1]] || null;
|
|
2115
|
+
if (n === 'C') return this.controllers[list[2]] || null;
|
|
2116
|
+
}
|
|
2117
|
+
return this.controllers[n] || null;
|
|
2118
|
+
}
|
|
2119
|
+
|
|
2120
|
+
listControllerNames() {
|
|
2121
|
+
return Object.keys(this.controllers);
|
|
2122
|
+
}
|
|
2123
|
+
|
|
2124
|
+
listGroupIds() {
|
|
2125
|
+
return this.groupIds.slice();
|
|
2126
|
+
}
|
|
2127
|
+
|
|
2128
|
+
listControllersInGroup(groupId) {
|
|
2129
|
+
return (this.groups[String(groupId || '').trim()] || []).slice();
|
|
1611
2130
|
}
|
|
1612
2131
|
|
|
1613
2132
|
async hotSwap(snapshot) {
|
|
@@ -1633,6 +2152,23 @@ class ControllerPool extends EventEmitter {
|
|
|
1633
2152
|
return results;
|
|
1634
2153
|
}
|
|
1635
2154
|
|
|
2155
|
+
async ingestDocumentTo(name, doc) {
|
|
2156
|
+
const ctrl = this.getByName(name);
|
|
2157
|
+
if (!ctrl) {
|
|
2158
|
+
return { ok: false, reason: 'controller-not-found' };
|
|
2159
|
+
}
|
|
2160
|
+
return ctrl.runtime.ingestDocument(doc);
|
|
2161
|
+
}
|
|
2162
|
+
|
|
2163
|
+
async ingestDocumentToGroup(groupId, doc) {
|
|
2164
|
+
const names = this.listControllersInGroup(groupId);
|
|
2165
|
+
if (!names.length) {
|
|
2166
|
+
return { ok: false, reason: 'group-not-found' };
|
|
2167
|
+
}
|
|
2168
|
+
const results = await Promise.all(names.map((name) => this.ingestDocumentTo(name, doc)));
|
|
2169
|
+
return results;
|
|
2170
|
+
}
|
|
2171
|
+
|
|
1636
2172
|
async forgetMemes(criteria) {
|
|
1637
2173
|
const results = await Promise.all(Object.values(this.controllers).map((ctrl) => ctrl.runtime.forgetMemes(criteria)));
|
|
1638
2174
|
return results;
|
|
@@ -1729,6 +2265,7 @@ class StudyEngine {
|
|
|
1729
2265
|
this.redis = redisSync;
|
|
1730
2266
|
this.running = false;
|
|
1731
2267
|
this.queue = [];
|
|
2268
|
+
this.metrics = { enqueued: 0, processed: 0, lastTickAt: 0, lastError: null };
|
|
1732
2269
|
this.poolWorker = workerpool.pool(CONFIG.workerFile, {
|
|
1733
2270
|
minWorkers: 1,
|
|
1734
2271
|
maxWorkers: CONFIG.maxWorkers,
|
|
@@ -1746,6 +2283,7 @@ class StudyEngine {
|
|
|
1746
2283
|
|
|
1747
2284
|
enqueueDocument(doc) {
|
|
1748
2285
|
this.queue.push(doc);
|
|
2286
|
+
this.metrics.enqueued += 1;
|
|
1749
2287
|
}
|
|
1750
2288
|
|
|
1751
2289
|
async _tick() {
|
|
@@ -1754,12 +2292,24 @@ class StudyEngine {
|
|
|
1754
2292
|
}
|
|
1755
2293
|
const doc = this.queue.shift();
|
|
1756
2294
|
try {
|
|
2295
|
+
this.metrics.lastTickAt = Date.now();
|
|
1757
2296
|
if (doc) {
|
|
1758
2297
|
await this.pool.ingestDocument(doc);
|
|
1759
2298
|
}
|
|
1760
|
-
|
|
2299
|
+
// memeMergeWorker.cjs 并未导出 ingestDocument,这里调用会触发:Unknown method "ingestDocument"。
|
|
2300
|
+
// 保留 workerpool 管线:用 batchLemmatize 做一次轻量预处理/校验。
|
|
2301
|
+
try {
|
|
2302
|
+
const text = String(doc?.text || '');
|
|
2303
|
+
const tokens = tokenize(text);
|
|
2304
|
+
await this.poolWorker.exec('batchLemmatize', [[tokens], this.pool.getActive().runtime?.config?.lemmaCsv]);
|
|
2305
|
+
} catch (_e) {
|
|
2306
|
+
// ignore
|
|
2307
|
+
}
|
|
2308
|
+
this.metrics.processed += 1;
|
|
2309
|
+
this.metrics.lastError = null;
|
|
1761
2310
|
} catch (err) {
|
|
1762
2311
|
console.error('[Study] worker ingest failed:', err.message);
|
|
2312
|
+
this.metrics.lastError = String(err.message || err);
|
|
1763
2313
|
}
|
|
1764
2314
|
const snapshot = this.pool.standby.snapshot();
|
|
1765
2315
|
await this.redis.publish(snapshot);
|
|
@@ -1833,11 +2383,13 @@ const textToMiniEmbedding = (text, dim = 64) => {
|
|
|
1833
2383
|
class ShardManager {
|
|
1834
2384
|
constructor(pool) {
|
|
1835
2385
|
this.pool = pool;
|
|
1836
|
-
this.shards = {
|
|
1837
|
-
|
|
1838
|
-
|
|
1839
|
-
|
|
1840
|
-
|
|
2386
|
+
this.shards = {};
|
|
2387
|
+
const names = typeof pool?.listControllerNames === 'function'
|
|
2388
|
+
? pool.listControllerNames()
|
|
2389
|
+
: Object.keys(pool?.controllers || {});
|
|
2390
|
+
for (const name of names) {
|
|
2391
|
+
this.shards[name] = new ShardDescriptor(name);
|
|
2392
|
+
}
|
|
1841
2393
|
this.lastEmbedding = new Float32Array(64);
|
|
1842
2394
|
}
|
|
1843
2395
|
|
|
@@ -1864,7 +2416,7 @@ class ShardManager {
|
|
|
1864
2416
|
}
|
|
1865
2417
|
|
|
1866
2418
|
record(controllerName, requestEmbedding, replyText, latency) {
|
|
1867
|
-
const shard = this.shards[controllerName];
|
|
2419
|
+
const shard = this.shards[controllerName] || null;
|
|
1868
2420
|
if (!shard) {
|
|
1869
2421
|
return;
|
|
1870
2422
|
}
|
|
@@ -1898,15 +2450,285 @@ const buildVariants = (text, count = 0) => {
|
|
|
1898
2450
|
return variants;
|
|
1899
2451
|
};
|
|
1900
2452
|
|
|
2453
|
+
const clamp01 = (x) => Math.max(0, Math.min(1, x));
|
|
2454
|
+
const clamp11 = (x) => Math.max(-1, Math.min(1, x));
|
|
2455
|
+
|
|
2456
|
+
const makeRng = (seed) => {
|
|
2457
|
+
// xorshift32
|
|
2458
|
+
let x = (seed >>> 0) || 0x9e3779b9;
|
|
2459
|
+
return () => {
|
|
2460
|
+
x ^= x << 13;
|
|
2461
|
+
x ^= x >>> 17;
|
|
2462
|
+
x ^= x << 5;
|
|
2463
|
+
return ((x >>> 0) / 0xffffffff);
|
|
2464
|
+
};
|
|
2465
|
+
};
|
|
2466
|
+
|
|
2467
|
+
class PersonaForestAverager {
|
|
2468
|
+
constructor(options = {}) {
|
|
2469
|
+
this.enabled = options.enabled !== false;
|
|
2470
|
+
this.trees = Math.max(8, Number(options.trees ?? 32) || 32);
|
|
2471
|
+
this.featureSubspace = Math.max(2, Number(options.featureSubspace ?? 4) || 4);
|
|
2472
|
+
this.sampleRate = clamp01(Number(options.sampleRate ?? 0.85) || 0.85);
|
|
2473
|
+
this.personaMomentum = clamp01(Number(options.personaMomentum ?? 0.92) || 0.92);
|
|
2474
|
+
this.targetReplyLen = Math.max(20, Number(options.targetReplyLen ?? 220) || 220);
|
|
2475
|
+
this.maxHistory = Math.max(10, Number(options.maxHistory ?? 60) || 60);
|
|
2476
|
+
this.globalPersona = null; // Float32Array
|
|
2477
|
+
this.controllerPersona = new Map(); // name -> Float32Array
|
|
2478
|
+
}
|
|
2479
|
+
|
|
2480
|
+
_blendPersona(prev, next, momentum) {
|
|
2481
|
+
if (!next) return prev;
|
|
2482
|
+
if (!prev) return Float32Array.from(next);
|
|
2483
|
+
const out = new Float32Array(prev.length);
|
|
2484
|
+
for (let i = 0; i < prev.length; i++) {
|
|
2485
|
+
const v = (momentum * prev[i]) + ((1 - momentum) * next[i]);
|
|
2486
|
+
out[i] = Number.isFinite(v) ? v : 0;
|
|
2487
|
+
}
|
|
2488
|
+
return out;
|
|
2489
|
+
}
|
|
2490
|
+
|
|
2491
|
+
_centroid(vecs, dim) {
|
|
2492
|
+
if (!Array.isArray(vecs) || vecs.length === 0) return null;
|
|
2493
|
+
const out = new Float32Array(dim);
|
|
2494
|
+
let count = 0;
|
|
2495
|
+
for (const v of vecs) {
|
|
2496
|
+
if (!v || v.length !== dim) continue;
|
|
2497
|
+
for (let i = 0; i < dim; i++) out[i] += v[i];
|
|
2498
|
+
count += 1;
|
|
2499
|
+
}
|
|
2500
|
+
if (!count) return null;
|
|
2501
|
+
for (let i = 0; i < dim; i++) out[i] /= count;
|
|
2502
|
+
return out;
|
|
2503
|
+
}
|
|
2504
|
+
|
|
2505
|
+
_tokenOverlapRatio(aTokens, bTokens) {
|
|
2506
|
+
if (!aTokens.length || !bTokens.length) return 0;
|
|
2507
|
+
const setB = new Set(bTokens);
|
|
2508
|
+
let hit = 0;
|
|
2509
|
+
for (const t of aTokens) if (setB.has(t)) hit += 1;
|
|
2510
|
+
return hit / Math.max(1, aTokens.length);
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
_variantStability(controller) {
|
|
2514
|
+
const variants = controller?.variants;
|
|
2515
|
+
if (!Array.isArray(variants) || variants.length === 0) return 0.5;
|
|
2516
|
+
let sum = 0;
|
|
2517
|
+
let n = 0;
|
|
2518
|
+
for (const v of variants) {
|
|
2519
|
+
if (!v || !v.response || typeof v.affinity !== 'number') continue;
|
|
2520
|
+
if (!Number.isFinite(v.affinity)) continue;
|
|
2521
|
+
sum += clamp11(v.affinity);
|
|
2522
|
+
n += 1;
|
|
2523
|
+
}
|
|
2524
|
+
if (!n) return 0.5;
|
|
2525
|
+
// map [-1,1] -> [0,1]
|
|
2526
|
+
return clamp01((sum / n + 1) / 2);
|
|
2527
|
+
}
|
|
2528
|
+
|
|
2529
|
+
pick({ payload, layerResults, requestEmbedding, history = [] }) {
|
|
2530
|
+
if (!this.enabled) {
|
|
2531
|
+
return null;
|
|
2532
|
+
}
|
|
2533
|
+
const dim = requestEmbedding?.length ?? 64;
|
|
2534
|
+
const inputText = String(payload?.text || '');
|
|
2535
|
+
const inputTokens = tokenize(inputText);
|
|
2536
|
+
|
|
2537
|
+
const candidates = [];
|
|
2538
|
+
for (const layer of layerResults || []) {
|
|
2539
|
+
for (const controller of layer.controllers || []) {
|
|
2540
|
+
const base = controller?.base;
|
|
2541
|
+
const reply = base?.reply;
|
|
2542
|
+
if (!base || !reply) continue;
|
|
2543
|
+
const emb = textToMiniEmbedding(String(reply), dim);
|
|
2544
|
+
const affinity = Number.isFinite(controller.affinity) ? clamp11(controller.affinity) : clamp11(cosineSim(requestEmbedding, emb));
|
|
2545
|
+
candidates.push({
|
|
2546
|
+
layer: layer.layer,
|
|
2547
|
+
controller: controller.controller,
|
|
2548
|
+
reply: String(reply),
|
|
2549
|
+
latency: base.latency,
|
|
2550
|
+
sessionId: base.sessionId,
|
|
2551
|
+
affinity,
|
|
2552
|
+
emb,
|
|
2553
|
+
controllerRef: controller
|
|
2554
|
+
});
|
|
2555
|
+
}
|
|
2556
|
+
}
|
|
2557
|
+
|
|
2558
|
+
if (candidates.length === 0) return null;
|
|
2559
|
+
if (candidates.length === 1) {
|
|
2560
|
+
const only = candidates[0];
|
|
2561
|
+
this._updatePersonas(only);
|
|
2562
|
+
return { ...only, score: 0, method: 'forest-single' };
|
|
2563
|
+
}
|
|
2564
|
+
|
|
2565
|
+
const candidateEmbeddings = candidates.map((c) => c.emb);
|
|
2566
|
+
const consensusCentroid = this._centroid(candidateEmbeddings, dim);
|
|
2567
|
+
const lastAgg = Array.isArray(history) && history.length ? history[history.length - 1]?.aggregate : null;
|
|
2568
|
+
const lastReplyEmb = lastAgg?.reply ? textToMiniEmbedding(String(lastAgg.reply), dim) : null;
|
|
2569
|
+
const lastReplyLen = typeof lastAgg?.reply === 'string' ? lastAgg.reply.length : null;
|
|
2570
|
+
const targetLen = Number.isFinite(lastReplyLen) && lastReplyLen > 20 ? lastReplyLen : this.targetReplyLen;
|
|
2571
|
+
|
|
2572
|
+
// feature extraction
|
|
2573
|
+
for (const c of candidates) {
|
|
2574
|
+
const replyTokens = tokenize(c.reply);
|
|
2575
|
+
const overlap = this._tokenOverlapRatio(inputTokens, replyTokens); // 0..1
|
|
2576
|
+
const consensus = consensusCentroid ? clamp11(cosineSim(c.emb, consensusCentroid)) : 0;
|
|
2577
|
+
const personaCoherence = this.globalPersona ? clamp11(cosineSim(c.emb, this.globalPersona)) : 0;
|
|
2578
|
+
const ctrlPersona = this.controllerPersona.get(c.controller);
|
|
2579
|
+
const controllerCoherence = ctrlPersona ? clamp11(cosineSim(c.emb, ctrlPersona)) : 0;
|
|
2580
|
+
const novelty = lastReplyEmb ? clamp01(1 - ((clamp11(cosineSim(c.emb, lastReplyEmb)) + 1) / 2)) : 0.5;
|
|
2581
|
+
const lenPenalty = -clamp01(Math.abs(c.reply.length - targetLen) / Math.max(40, targetLen));
|
|
2582
|
+
const stability = this._variantStability(c.controllerRef);
|
|
2583
|
+
const latencyNorm = Number.isFinite(c.latency) ? clamp01(1 - (Number(c.latency) / 2000)) : 0.5;
|
|
2584
|
+
c.features = {
|
|
2585
|
+
affinity: clamp01((c.affinity + 1) / 2),
|
|
2586
|
+
overlap,
|
|
2587
|
+
consensus: clamp01((consensus + 1) / 2),
|
|
2588
|
+
persona: clamp01((personaCoherence + 1) / 2),
|
|
2589
|
+
controllerPersona: clamp01((controllerCoherence + 1) / 2),
|
|
2590
|
+
novelty,
|
|
2591
|
+
lenPenalty: clamp01(1 + lenPenalty), // [-1,0] -> [0,1]
|
|
2592
|
+
stability,
|
|
2593
|
+
latency: latencyNorm
|
|
2594
|
+
};
|
|
2595
|
+
}
|
|
2596
|
+
|
|
2597
|
+
const featureNames = Object.keys(candidates[0].features);
|
|
2598
|
+
const seedText = `${payload?.sessionId || ''}|${hashStrSimple(inputText)}|${candidates.length}`;
|
|
2599
|
+
const rng = makeRng(hashStrSimple(seedText));
|
|
2600
|
+
|
|
2601
|
+
const scores = new Map();
|
|
2602
|
+
const votes = new Map();
|
|
2603
|
+
for (const c of candidates) {
|
|
2604
|
+
scores.set(c, 0);
|
|
2605
|
+
votes.set(c, 0);
|
|
2606
|
+
}
|
|
2607
|
+
|
|
2608
|
+
const pickSubspace = () => {
|
|
2609
|
+
const chosen = new Set();
|
|
2610
|
+
const maxPick = Math.min(this.featureSubspace, featureNames.length);
|
|
2611
|
+
while (chosen.size < maxPick) {
|
|
2612
|
+
const idx = Math.floor(rng() * featureNames.length);
|
|
2613
|
+
chosen.add(featureNames[idx]);
|
|
2614
|
+
}
|
|
2615
|
+
return Array.from(chosen);
|
|
2616
|
+
};
|
|
2617
|
+
|
|
2618
|
+
for (let t = 0; t < this.trees; t++) {
|
|
2619
|
+
const subspace = pickSubspace();
|
|
2620
|
+
// random weights per tree, with mild bias toward affinity/consensus/persona
|
|
2621
|
+
const weights = {};
|
|
2622
|
+
for (const f of subspace) {
|
|
2623
|
+
let w = (rng() * 2 - 1);
|
|
2624
|
+
if (f === 'affinity' || f === 'consensus' || f === 'persona') w *= 1.4;
|
|
2625
|
+
if (f === 'controllerPersona' || f === 'stability') w *= 1.1;
|
|
2626
|
+
if (f === 'novelty') w *= 0.6;
|
|
2627
|
+
weights[f] = w;
|
|
2628
|
+
}
|
|
2629
|
+
|
|
2630
|
+
// bagging candidates
|
|
2631
|
+
const bag = candidates.filter(() => rng() <= this.sampleRate);
|
|
2632
|
+
const bagCandidates = bag.length >= 2 ? bag : candidates;
|
|
2633
|
+
|
|
2634
|
+
let best = null;
|
|
2635
|
+
let bestScore = -Infinity;
|
|
2636
|
+
for (const c of bagCandidates) {
|
|
2637
|
+
let s = 0;
|
|
2638
|
+
for (const f of subspace) {
|
|
2639
|
+
const v = c.features[f];
|
|
2640
|
+
s += weights[f] * (Number.isFinite(v) ? v : 0);
|
|
2641
|
+
}
|
|
2642
|
+
if (s > bestScore) {
|
|
2643
|
+
bestScore = s;
|
|
2644
|
+
best = c;
|
|
2645
|
+
}
|
|
2646
|
+
}
|
|
2647
|
+
if (best) {
|
|
2648
|
+
votes.set(best, (votes.get(best) || 0) + 1);
|
|
2649
|
+
}
|
|
2650
|
+
// accumulate soft score for all candidates to enable tie-breaks
|
|
2651
|
+
for (const c of candidates) {
|
|
2652
|
+
let s = 0;
|
|
2653
|
+
for (const f of subspace) {
|
|
2654
|
+
const v = c.features[f];
|
|
2655
|
+
s += weights[f] * (Number.isFinite(v) ? v : 0);
|
|
2656
|
+
}
|
|
2657
|
+
scores.set(c, (scores.get(c) || 0) + s);
|
|
2658
|
+
}
|
|
2659
|
+
}
|
|
2660
|
+
|
|
2661
|
+
const ranked = candidates
|
|
2662
|
+
.map((c) => ({
|
|
2663
|
+
c,
|
|
2664
|
+
votes: votes.get(c) || 0,
|
|
2665
|
+
score: (scores.get(c) || 0) / Math.max(1, this.trees)
|
|
2666
|
+
}))
|
|
2667
|
+
.sort((a, b) => (b.votes - a.votes) || (b.score - a.score) || (b.c.features.consensus - a.c.features.consensus) || (b.c.features.affinity - a.c.features.affinity));
|
|
2668
|
+
|
|
2669
|
+
const winner = ranked[0]?.c;
|
|
2670
|
+
if (!winner) return null;
|
|
2671
|
+
|
|
2672
|
+
this._updatePersonas(winner);
|
|
2673
|
+
const picked = {
|
|
2674
|
+
layer: winner.layer,
|
|
2675
|
+
controller: winner.controller,
|
|
2676
|
+
affinity: winner.affinity,
|
|
2677
|
+
reply: winner.reply,
|
|
2678
|
+
latency: winner.latency,
|
|
2679
|
+
sessionId: winner.sessionId,
|
|
2680
|
+
score: ranked[0].score,
|
|
2681
|
+
votes: ranked[0].votes,
|
|
2682
|
+
method: 'persona-forest'
|
|
2683
|
+
};
|
|
2684
|
+
return picked;
|
|
2685
|
+
}
|
|
2686
|
+
|
|
2687
|
+
_updatePersonas(winner) {
|
|
2688
|
+
if (!winner?.emb) return;
|
|
2689
|
+
const next = winner.emb;
|
|
2690
|
+
this.globalPersona = this._blendPersona(this.globalPersona, next, this.personaMomentum);
|
|
2691
|
+
const prevCtrl = this.controllerPersona.get(winner.controller) || null;
|
|
2692
|
+
this.controllerPersona.set(winner.controller, this._blendPersona(prevCtrl, next, this.personaMomentum));
|
|
2693
|
+
|
|
2694
|
+
// keep map bounded
|
|
2695
|
+
if (this.controllerPersona.size > 64) {
|
|
2696
|
+
const keys = Array.from(this.controllerPersona.keys());
|
|
2697
|
+
while (this.controllerPersona.size > 64) {
|
|
2698
|
+
this.controllerPersona.delete(keys[Math.floor(Math.random() * keys.length)]);
|
|
2699
|
+
}
|
|
2700
|
+
}
|
|
2701
|
+
}
|
|
2702
|
+
}
|
|
2703
|
+
|
|
1901
2704
|
class SparkArray {
|
|
1902
|
-
|
|
2705
|
+
/**
|
|
2706
|
+
* @param {ControllerPool} pool
|
|
2707
|
+
* @param {ShardManager} shardManager
|
|
2708
|
+
* @param {Object} [options]
|
|
2709
|
+
* @param {number} [options.numAI] - 对话AI数量,默认7
|
|
2710
|
+
* @param {Array} [options.layers] - 自定义层结构
|
|
2711
|
+
*/
|
|
2712
|
+
constructor(pool, shardManager, options = {}) {
|
|
1903
2713
|
this.pool = pool;
|
|
1904
2714
|
this.shardManager = shardManager;
|
|
1905
|
-
this.
|
|
1906
|
-
|
|
1907
|
-
|
|
1908
|
-
|
|
1909
|
-
|
|
2715
|
+
this.groupId = String(options.groupId || (typeof pool?.listGroupIds === 'function' ? (pool.listGroupIds()[0] || 'G1') : 'G1'));
|
|
2716
|
+
const available = typeof pool?.listControllersInGroup === 'function'
|
|
2717
|
+
? pool.listControllersInGroup(this.groupId)
|
|
2718
|
+
: (typeof pool?.listControllerNames === 'function' ? pool.listControllerNames() : Object.keys(pool?.controllers || {}));
|
|
2719
|
+
const wanted = 7; // 每个工作组固定 7 个 AI
|
|
2720
|
+
const numAI = Math.max(1, Math.min(available.length || wanted, wanted));
|
|
2721
|
+
// 组内小 SparkArray:固定 7 个 AI(不足则截断)
|
|
2722
|
+
this.layers = Array.from({ length: numAI }, (_, i) => ({
|
|
2723
|
+
name: `${this.groupId}:a${i + 1}`,
|
|
2724
|
+
controllers: [available[i]],
|
|
2725
|
+
strategy: 'max'
|
|
2726
|
+
}));
|
|
2727
|
+
// 如果外部传入自定义层结构则覆盖
|
|
2728
|
+
if (Array.isArray(options.layers) && options.layers.length > 0) {
|
|
2729
|
+
this.layers = options.layers.map((layer) => ({ strategy: 'max', ...layer }));
|
|
2730
|
+
}
|
|
2731
|
+
this.personaForest = new PersonaForestAverager(options.personaForest || {});
|
|
1910
2732
|
this.history = [];
|
|
1911
2733
|
}
|
|
1912
2734
|
|
|
@@ -1934,14 +2756,24 @@ class SparkArray {
|
|
|
1934
2756
|
|
|
1935
2757
|
for (const layer of layers) {
|
|
1936
2758
|
const controllers = [];
|
|
1937
|
-
for (const
|
|
2759
|
+
for (const controllerSpec of layer.controllers) {
|
|
2760
|
+
const controllerName = typeof controllerSpec === 'string'
|
|
2761
|
+
? controllerSpec
|
|
2762
|
+
: String(controllerSpec?.name || controllerSpec?.controller || '').trim();
|
|
2763
|
+
const weightRaw = typeof controllerSpec === 'object' && controllerSpec
|
|
2764
|
+
? (controllerSpec.weight ?? controllerSpec.w)
|
|
2765
|
+
: 1;
|
|
2766
|
+
const weight = Math.max(1, Math.min(8, Math.round(Number(weightRaw) || 1)));
|
|
1938
2767
|
const ctrl = this.pool.getByName(controllerName);
|
|
1939
2768
|
if (!ctrl || !ctrl.online) {
|
|
1940
2769
|
continue;
|
|
1941
2770
|
}
|
|
1942
2771
|
let baseResult;
|
|
1943
2772
|
try {
|
|
1944
|
-
|
|
2773
|
+
const weightedText = weight <= 1
|
|
2774
|
+
? String(payload.text || '')
|
|
2775
|
+
: Array.from({ length: weight }, () => String(payload.text || '')).join(' ');
|
|
2776
|
+
baseResult = await ctrl.respond({ ...payload, text: weightedText });
|
|
1945
2777
|
} catch (err) {
|
|
1946
2778
|
controllers.push({
|
|
1947
2779
|
controller: controllerName,
|
|
@@ -1977,12 +2809,33 @@ class SparkArray {
|
|
|
1977
2809
|
layerResults.push({ layer: layer.name, controllers });
|
|
1978
2810
|
}
|
|
1979
2811
|
|
|
1980
|
-
const aggregate = this._aggregate(layerResults);
|
|
2812
|
+
const aggregate = this._aggregate(layerResults, payload, requestEmbedding);
|
|
1981
2813
|
this._recordHistory(payload, aggregate, layerResults);
|
|
1982
2814
|
return { aggregate, layers: layerResults };
|
|
1983
2815
|
}
|
|
1984
2816
|
|
|
1985
|
-
|
|
2817
|
+
// 多轮“大 SparkArray”:用上一轮 aggregate 作为下一轮输入以修补缝隙
|
|
2818
|
+
async dispatchBig(payload, options = {}) {
|
|
2819
|
+
const rounds = Math.max(1, Math.min(10, Number(options.bigRounds ?? options.bigSparkRounds ?? 1) || 1));
|
|
2820
|
+
const history = [];
|
|
2821
|
+
let current = { ...(payload || {}) };
|
|
2822
|
+
const originalText = String(current.text || '');
|
|
2823
|
+
for (let i = 0; i < rounds; i++) {
|
|
2824
|
+
const r = await this.dispatch(current, options);
|
|
2825
|
+
history.push(r);
|
|
2826
|
+
const reply = r?.aggregate?.reply ? String(r.aggregate.reply) : '';
|
|
2827
|
+
if (!reply.trim()) {
|
|
2828
|
+
break;
|
|
2829
|
+
}
|
|
2830
|
+
// 以“原问题 + 上轮回答”为下一轮输入,长度做截断避免爆炸
|
|
2831
|
+
const combined = `${originalText}\n${reply}`;
|
|
2832
|
+
current = { ...current, text: combined.slice(-4000) };
|
|
2833
|
+
}
|
|
2834
|
+
const last = history[history.length - 1] || { aggregate: null, layers: [] };
|
|
2835
|
+
return { ...last, rounds: history };
|
|
2836
|
+
}
|
|
2837
|
+
|
|
2838
|
+
_aggregate(layerResults, payload = {}, requestEmbedding = null) {
|
|
1986
2839
|
let best = null;
|
|
1987
2840
|
for (const layer of layerResults) {
|
|
1988
2841
|
for (const controller of layer.controllers) {
|
|
@@ -1996,11 +2849,37 @@ class SparkArray {
|
|
|
1996
2849
|
affinity: controller.affinity,
|
|
1997
2850
|
reply: controller.base.reply,
|
|
1998
2851
|
latency: controller.base.latency,
|
|
1999
|
-
sessionId: controller.base.sessionId
|
|
2852
|
+
sessionId: controller.base.sessionId,
|
|
2853
|
+
method: 'max-affinity'
|
|
2000
2854
|
};
|
|
2001
2855
|
}
|
|
2002
2856
|
}
|
|
2003
2857
|
}
|
|
2858
|
+
|
|
2859
|
+
// 随机森林式“中途平均/投票”:在不改变对外结构的前提下,优先选择更稳定且共识更强的回复
|
|
2860
|
+
try {
|
|
2861
|
+
const picked = this.personaForest.pick({
|
|
2862
|
+
payload: { text: payload?.text || '', sessionId: payload?.sessionId },
|
|
2863
|
+
layerResults,
|
|
2864
|
+
requestEmbedding: requestEmbedding || textToMiniEmbedding(payload?.text || '', 64),
|
|
2865
|
+
history: this.history.slice(-this.personaForest.maxHistory)
|
|
2866
|
+
});
|
|
2867
|
+
if (picked && picked.reply) {
|
|
2868
|
+
return {
|
|
2869
|
+
layer: picked.layer,
|
|
2870
|
+
controller: picked.controller,
|
|
2871
|
+
affinity: picked.affinity,
|
|
2872
|
+
reply: picked.reply,
|
|
2873
|
+
latency: picked.latency,
|
|
2874
|
+
sessionId: picked.sessionId,
|
|
2875
|
+
method: picked.method,
|
|
2876
|
+
score: picked.score,
|
|
2877
|
+
votes: picked.votes
|
|
2878
|
+
};
|
|
2879
|
+
}
|
|
2880
|
+
} catch (err) {
|
|
2881
|
+
// fall back to old behavior
|
|
2882
|
+
}
|
|
2004
2883
|
return best;
|
|
2005
2884
|
}
|
|
2006
2885
|
|
|
@@ -2017,54 +2896,862 @@ class SparkArray {
|
|
|
2017
2896
|
}
|
|
2018
2897
|
}
|
|
2019
2898
|
|
|
2020
|
-
class
|
|
2021
|
-
constructor(pool, shardManager,
|
|
2899
|
+
class BigSparkArray {
|
|
2900
|
+
constructor(pool, shardManager, options = {}) {
|
|
2022
2901
|
this.pool = pool;
|
|
2023
|
-
this.
|
|
2024
|
-
|
|
2025
|
-
|
|
2026
|
-
|
|
2027
|
-
this.
|
|
2028
|
-
|
|
2029
|
-
|
|
2030
|
-
|
|
2031
|
-
|
|
2032
|
-
this.
|
|
2902
|
+
this.shardManager = shardManager;
|
|
2903
|
+
const groupIds = Array.isArray(options.groupIds) && options.groupIds.length
|
|
2904
|
+
? options.groupIds
|
|
2905
|
+
: (typeof pool?.listGroupIds === 'function' ? pool.listGroupIds() : ['G1']);
|
|
2906
|
+
this.groups = groupIds.map((groupId) => ({
|
|
2907
|
+
groupId,
|
|
2908
|
+
weight: Math.max(1, Math.min(8, Math.round(Number(options?.groupWeights?.[groupId] ?? 1) || 1))),
|
|
2909
|
+
spark: new SparkArray(pool, shardManager, { groupId, ...(options.groupOptions || {}) })
|
|
2910
|
+
}));
|
|
2911
|
+
this.personaForest = new PersonaForestAverager(options.personaForest || {});
|
|
2912
|
+
this.history = [];
|
|
2033
2913
|
}
|
|
2034
2914
|
|
|
2035
|
-
|
|
2036
|
-
|
|
2037
|
-
|
|
2038
|
-
fs.readFile(full, 'utf8', (err, data) => {
|
|
2039
|
-
if (err) {
|
|
2040
|
-
res.status(404).send('Not found');
|
|
2041
|
-
return;
|
|
2042
|
-
}
|
|
2043
|
-
res.setHeader('Content-Type', 'text/html; charset=utf-8');
|
|
2044
|
-
res.send(data);
|
|
2045
|
-
});
|
|
2046
|
-
};
|
|
2047
|
-
|
|
2048
|
-
this.app.get('/', (req, res) => {
|
|
2049
|
-
sendHtml(res, 'index.html');
|
|
2050
|
-
});
|
|
2915
|
+
getLayers() {
|
|
2916
|
+
return this.groups.map((g) => ({ groupId: g.groupId, weight: g.weight, layers: g.spark.getLayers() }));
|
|
2917
|
+
}
|
|
2051
2918
|
|
|
2052
|
-
|
|
2053
|
-
|
|
2054
|
-
|
|
2919
|
+
updateLayers(patch) {
|
|
2920
|
+
// 兼容旧端点:如果传入数组,当作更新第一个工作组的小阵列
|
|
2921
|
+
if (Array.isArray(patch)) {
|
|
2922
|
+
this.groups[0]?.spark?.updateLayers(patch);
|
|
2923
|
+
return;
|
|
2924
|
+
}
|
|
2925
|
+
// 也支持 { groupId, layers }
|
|
2926
|
+
const groupId = String(patch?.groupId || '').trim();
|
|
2927
|
+
const layers = patch?.layers;
|
|
2928
|
+
if (groupId && Array.isArray(layers)) {
|
|
2929
|
+
const g = this.groups.find((x) => x.groupId === groupId);
|
|
2930
|
+
if (g) g.spark.updateLayers(layers);
|
|
2931
|
+
}
|
|
2932
|
+
}
|
|
2933
|
+
|
|
2934
|
+
async dispatch(payload, options = {}) {
|
|
2935
|
+
const requestEmbedding = textToMiniEmbedding(payload?.text || '', 64);
|
|
2936
|
+
const groupResults = [];
|
|
2937
|
+
|
|
2938
|
+
for (const g of this.groups) {
|
|
2939
|
+
const groupPayload = { ...(payload || {}) };
|
|
2940
|
+
// 组权重:同样体现在文本重复次数(组级别风格/偏好)
|
|
2941
|
+
const w = Math.max(1, g.weight);
|
|
2942
|
+
if (w > 1) {
|
|
2943
|
+
const t = String(groupPayload.text || '');
|
|
2944
|
+
groupPayload.text = Array.from({ length: w }, () => t).join(' ');
|
|
2945
|
+
}
|
|
2946
|
+
const result = await g.spark.dispatch(groupPayload, options);
|
|
2947
|
+
groupResults.push({ groupId: g.groupId, result });
|
|
2948
|
+
}
|
|
2949
|
+
|
|
2950
|
+
// 组间聚合:将每个工作组的 aggregate 当作候选,做随机森林式投票/共识
|
|
2951
|
+
const layerResults = [
|
|
2952
|
+
{
|
|
2953
|
+
layer: 'groups',
|
|
2954
|
+
controllers: groupResults.map((gr) => {
|
|
2955
|
+
const agg = gr?.result?.aggregate;
|
|
2956
|
+
return {
|
|
2957
|
+
controller: gr.groupId,
|
|
2958
|
+
base: agg ? { reply: agg.reply, latency: agg.latency, sessionId: agg.sessionId } : null,
|
|
2959
|
+
affinity: agg ? clamp11(Number(agg.affinity ?? 0)) : 0,
|
|
2960
|
+
variants: []
|
|
2961
|
+
};
|
|
2962
|
+
})
|
|
2963
|
+
}
|
|
2964
|
+
];
|
|
2965
|
+
|
|
2966
|
+
let aggregate = null;
|
|
2967
|
+
try {
|
|
2968
|
+
const picked = this.personaForest.pick({
|
|
2969
|
+
payload: { text: payload?.text || '', sessionId: payload?.sessionId },
|
|
2970
|
+
layerResults,
|
|
2971
|
+
requestEmbedding,
|
|
2972
|
+
history: this.history.slice(-this.personaForest.maxHistory)
|
|
2973
|
+
});
|
|
2974
|
+
if (picked && picked.reply) {
|
|
2975
|
+
aggregate = {
|
|
2976
|
+
layer: 'groups',
|
|
2977
|
+
controller: picked.controller,
|
|
2978
|
+
affinity: picked.affinity,
|
|
2979
|
+
reply: picked.reply,
|
|
2980
|
+
latency: picked.latency,
|
|
2981
|
+
sessionId: picked.sessionId,
|
|
2982
|
+
method: picked.method,
|
|
2983
|
+
score: picked.score,
|
|
2984
|
+
votes: picked.votes
|
|
2985
|
+
};
|
|
2986
|
+
}
|
|
2987
|
+
} catch (_e) {
|
|
2988
|
+
// ignore
|
|
2989
|
+
}
|
|
2990
|
+
if (!aggregate) {
|
|
2991
|
+
// fallback:选择 reply 非空且 affinity 最大的组
|
|
2992
|
+
let best = null;
|
|
2993
|
+
for (const gr of groupResults) {
|
|
2994
|
+
const agg = gr?.result?.aggregate;
|
|
2995
|
+
if (!agg || !agg.reply) continue;
|
|
2996
|
+
const a = Number.isFinite(agg.affinity) ? agg.affinity : 0;
|
|
2997
|
+
if (!best || a > best.affinity) {
|
|
2998
|
+
best = { ...agg, controller: gr.groupId, layer: 'groups' };
|
|
2999
|
+
}
|
|
3000
|
+
}
|
|
3001
|
+
aggregate = best;
|
|
3002
|
+
}
|
|
3003
|
+
|
|
3004
|
+
const out = { aggregate, groups: groupResults };
|
|
3005
|
+
this.history.push({ ts: Date.now(), request: { text: payload?.text, sessionId: payload?.sessionId }, aggregate, groups: groupResults });
|
|
3006
|
+
if (this.history.length > 100) this.history.shift();
|
|
3007
|
+
return out;
|
|
3008
|
+
}
|
|
3009
|
+
|
|
3010
|
+
async dispatchBig(payload, options = {}) {
|
|
3011
|
+
const rounds = Math.max(1, Math.min(10, Number(options.bigRounds ?? options.bigSparkRounds ?? 1) || 1));
|
|
3012
|
+
const history = [];
|
|
3013
|
+
let current = { ...(payload || {}) };
|
|
3014
|
+
const originalText = String(current.text || '');
|
|
3015
|
+
for (let i = 0; i < rounds; i++) {
|
|
3016
|
+
const r = await this.dispatch(current, options);
|
|
3017
|
+
history.push(r);
|
|
3018
|
+
const reply = r?.aggregate?.reply ? String(r.aggregate.reply) : '';
|
|
3019
|
+
if (!reply.trim()) break;
|
|
3020
|
+
const combined = `${originalText}\n${reply}`;
|
|
3021
|
+
current = { ...current, text: combined.slice(-4000) };
|
|
3022
|
+
}
|
|
3023
|
+
const last = history[history.length - 1] || { aggregate: null, groups: [] };
|
|
3024
|
+
return { ...last, rounds: history };
|
|
3025
|
+
}
|
|
3026
|
+
}
|
|
3027
|
+
|
|
3028
|
+
// 模因阻断模块 - 识别并隔离恶性模因(参考实验版)
|
|
3029
|
+
class MemeBarrier {
|
|
3030
|
+
constructor(runtime, {
|
|
3031
|
+
scanIntervalMs = 10_000,
|
|
3032
|
+
maliciousThreshold = 0.7,
|
|
3033
|
+
maxIsolatePerScan = 5
|
|
3034
|
+
} = {}) {
|
|
3035
|
+
this.runtime = runtime;
|
|
3036
|
+
this.scanIntervalMs = scanIntervalMs;
|
|
3037
|
+
this.maliciousThreshold = maliciousThreshold;
|
|
3038
|
+
this.maxIsolatePerScan = maxIsolatePerScan;
|
|
3039
|
+
this.timer = null;
|
|
3040
|
+
this.stats = { scans: 0, isolated: 0, lastScanTime: 0, lastIsolated: [] };
|
|
3041
|
+
}
|
|
3042
|
+
|
|
3043
|
+
start() {
|
|
3044
|
+
if (this.timer) return;
|
|
3045
|
+
this.timer = setInterval(() => {
|
|
3046
|
+
try { this.scanNetwork(); } catch (e) { console.error('[MemeBarrier] 扫描错误:', e); }
|
|
3047
|
+
}, this.scanIntervalMs);
|
|
3048
|
+
this.running = true;
|
|
3049
|
+
console.log('[MemeBarrier] 已启动');
|
|
3050
|
+
}
|
|
3051
|
+
|
|
3052
|
+
stop() {
|
|
3053
|
+
if (this.timer) { clearInterval(this.timer); this.timer = null; }
|
|
3054
|
+
this.running = false;
|
|
3055
|
+
console.log('[MemeBarrier] 已停止');
|
|
3056
|
+
}
|
|
3057
|
+
|
|
3058
|
+
generateReason(meme, score) {
|
|
3059
|
+
const conn = meme.connect || [];
|
|
3060
|
+
return `score=${score.toFixed(3)} conn=${conn.length}`;
|
|
3061
|
+
}
|
|
3062
|
+
|
|
3063
|
+
evaluateMaliciousness(meme) {
|
|
3064
|
+
const conn = meme.connect || [];
|
|
3065
|
+
const degree = conn.length;
|
|
3066
|
+
const selfLoops = conn.filter(([w, pid, dir]) => pid === meme.pointID).length;
|
|
3067
|
+
const outDegree = conn.filter(([w, pid, dir]) => dir === 2).length;
|
|
3068
|
+
// 访问增长近似
|
|
3069
|
+
let growth = 0;
|
|
3070
|
+
try {
|
|
3071
|
+
const access = this.runtime.wordAccessLog?.get(meme.pointID);
|
|
3072
|
+
if (access && access.size) {
|
|
3073
|
+
const total = Array.from(access.values()).reduce((a, b) => a + b, 0);
|
|
3074
|
+
growth = Math.min(1, Math.log1p(total) / 10);
|
|
3075
|
+
}
|
|
3076
|
+
} catch (_) {}
|
|
3077
|
+
const avgConn = this.getAverageConnections();
|
|
3078
|
+
const outSkew = avgConn > 0 ? Math.min(1, outDegree / (avgConn * 3)) : 0;
|
|
3079
|
+
const selfSkew = degree > 0 ? Math.min(1, selfLoops / degree) : 0;
|
|
3080
|
+
return 0.5 * growth + 0.3 * outSkew + 0.2 * selfSkew;
|
|
3081
|
+
}
|
|
3082
|
+
|
|
3083
|
+
getAverageConnections() {
|
|
3084
|
+
const points = this.runtime.graph.getAllPoints();
|
|
3085
|
+
if (!points.length) return 0;
|
|
3086
|
+
const sum = points.reduce((acc, p) => acc + (p.connect?.length || 0), 0);
|
|
3087
|
+
return sum / points.length;
|
|
3088
|
+
}
|
|
3089
|
+
|
|
3090
|
+
isolateMeme(memeID, score, reason) {
|
|
3091
|
+
const point = this.runtime.graph.points.get(memeID);
|
|
3092
|
+
if (!point) return false;
|
|
3093
|
+
const conn = point.connect || [];
|
|
3094
|
+
let cut = 0;
|
|
3095
|
+
for (let i = conn.length - 1; i >= 0; i--) {
|
|
3096
|
+
const [w, pid, dir] = conn[i];
|
|
3097
|
+
if (dir === 2) { // 出边
|
|
3098
|
+
conn.splice(i, 1);
|
|
3099
|
+
cut++;
|
|
3100
|
+
const target = this.runtime.graph.points.get(pid);
|
|
3101
|
+
if (target && target.connect) {
|
|
3102
|
+
for (let j = target.connect.length - 1; j >= 0; j--) {
|
|
3103
|
+
const [tw, tpid, tdir] = target.connect[j];
|
|
3104
|
+
if (tpid === memeID && tdir === 1) { // 目标的入边
|
|
3105
|
+
target.connect.splice(j, 1);
|
|
3106
|
+
}
|
|
3107
|
+
}
|
|
3108
|
+
}
|
|
3109
|
+
}
|
|
3110
|
+
}
|
|
3111
|
+
this.stats.isolated++;
|
|
3112
|
+
this.stats.lastIsolated.unshift({ memeID, score, reason, cut });
|
|
3113
|
+
this.stats.lastIsolated = this.stats.lastIsolated.slice(0, 20);
|
|
3114
|
+
console.log(`[MemeBarrier] 隔离 ${memeID}: cut=${cut} ${reason}`);
|
|
3115
|
+
return cut > 0;
|
|
3116
|
+
}
|
|
3117
|
+
|
|
3118
|
+
scanNetwork() {
|
|
3119
|
+
const points = this.runtime.graph.getAllPoints();
|
|
3120
|
+
this.stats.scans++;
|
|
3121
|
+
this.stats.lastScanTime = Date.now();
|
|
3122
|
+
if (!points.length) return;
|
|
3123
|
+
const scored = points.map(p => {
|
|
3124
|
+
const s = this.evaluateMaliciousness(p);
|
|
3125
|
+
return { id: p.pointID, meme: p, score: s, reason: this.generateReason(p, s) };
|
|
3126
|
+
}).sort((a, b) => b.score - a.score);
|
|
3127
|
+
let isolated = 0;
|
|
3128
|
+
for (const item of scored) {
|
|
3129
|
+
if (item.score >= this.maliciousThreshold) {
|
|
3130
|
+
if (this.isolateMeme(item.id, item.score, item.reason)) {
|
|
3131
|
+
isolated++;
|
|
3132
|
+
if (isolated >= this.maxIsolatePerScan) break;
|
|
3133
|
+
}
|
|
3134
|
+
} else { break; }
|
|
3135
|
+
}
|
|
3136
|
+
if (isolated > 0) {
|
|
3137
|
+
console.log(`[MemeBarrier] 本次扫描隔离 ${isolated} 个可疑模因`);
|
|
3138
|
+
}
|
|
3139
|
+
}
|
|
2055
3140
|
|
|
2056
|
-
|
|
2057
|
-
|
|
3141
|
+
getStats() { return { ...this.stats }; }
|
|
3142
|
+
}
|
|
3143
|
+
|
|
3144
|
+
// 强化学习模块:基于线性代数与可选外部库的评估-更新循环
|
|
3145
|
+
class ReinforcementLearner {
|
|
3146
|
+
constructor(pool, {
|
|
3147
|
+
testsDir = path.join(__dirname, 'tests'),
|
|
3148
|
+
maxDocs = 64,
|
|
3149
|
+
topKWords = 30,
|
|
3150
|
+
improvementThreshold = 0.01,
|
|
3151
|
+
iterations = 5,
|
|
3152
|
+
useUmap = true
|
|
3153
|
+
} = {}) {
|
|
3154
|
+
this.pool = pool;
|
|
3155
|
+
this.testsDir = testsDir;
|
|
3156
|
+
this.maxDocs = maxDocs;
|
|
3157
|
+
this.topKWords = topKWords;
|
|
3158
|
+
this.iterations = iterations;
|
|
3159
|
+
this.useUmap = useUmap;
|
|
3160
|
+
this.improvementThreshold = improvementThreshold;
|
|
3161
|
+
this.history = [];
|
|
3162
|
+
// 统一使用上方安全引用的 Matrix(可能为 null)
|
|
3163
|
+
this.Matrix = Matrix;
|
|
3164
|
+
this.kmeans = safeRequire('ml-kmeans');
|
|
3165
|
+
this.numeric = safeRequire('numeric');
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
_listTestFiles() {
|
|
3169
|
+
if (!fs.existsSync(this.testsDir)) {
|
|
3170
|
+
return [];
|
|
3171
|
+
}
|
|
3172
|
+
return fs.readdirSync(this.testsDir)
|
|
3173
|
+
.filter((n) => n.toLowerCase().endsWith('.txt'))
|
|
3174
|
+
.sort((a, b) => a.localeCompare(b))
|
|
3175
|
+
.slice(0, this.maxDocs);
|
|
3176
|
+
}
|
|
3177
|
+
|
|
3178
|
+
_readText(file) {
|
|
3179
|
+
try {
|
|
3180
|
+
return fs.readFileSync(path.join(this.testsDir, file), 'utf8');
|
|
3181
|
+
} catch (err) {
|
|
3182
|
+
return '';
|
|
3183
|
+
}
|
|
3184
|
+
}
|
|
3185
|
+
|
|
3186
|
+
_scoreReplyQuality(reply, seeds) {
|
|
3187
|
+
const words = tokenize(reply);
|
|
3188
|
+
if (!words.length) return 0;
|
|
3189
|
+
let coverage = 0;
|
|
3190
|
+
for (const [memeId, strength] of seeds.entries()) {
|
|
3191
|
+
const linked = this.pool.getActive().runtime.kvm.getMemeWords(memeId);
|
|
3192
|
+
if (!linked || linked.size === 0) continue;
|
|
3193
|
+
let hit = 0;
|
|
3194
|
+
for (const w of words) {
|
|
3195
|
+
if (linked.has(w)) { hit += 1; }
|
|
3196
|
+
}
|
|
3197
|
+
coverage += hit / Math.max(1, linked.size);
|
|
3198
|
+
}
|
|
3199
|
+
coverage = coverage / Math.max(1, seeds.size);
|
|
3200
|
+
const uniq = new Set(words).size / Math.max(1, words.length);
|
|
3201
|
+
return 0.7 * coverage + 0.3 * uniq;
|
|
3202
|
+
}
|
|
3203
|
+
|
|
3204
|
+
_buildFeatureForDoc(text) {
|
|
3205
|
+
const vec = textToMiniEmbedding(text, 128);
|
|
3206
|
+
return Array.from(vec);
|
|
3207
|
+
}
|
|
3208
|
+
|
|
3209
|
+
_reduceFeatures(features) {
|
|
3210
|
+
if (this.Matrix) {
|
|
3211
|
+
const X = new this.Matrix(features);
|
|
3212
|
+
// 简易 PCA:协方差 + 特征分解(若 numeric 可用)
|
|
3213
|
+
if (this.numeric) {
|
|
3214
|
+
try {
|
|
3215
|
+
const mean = X.mean('row');
|
|
3216
|
+
const centered = X.clone();
|
|
3217
|
+
for (let i = 0; i < centered.rows; i++) {
|
|
3218
|
+
centered.setRow(i, centered.getRow(i).map((v, j) => v - mean[j]));
|
|
3219
|
+
}
|
|
3220
|
+
const Xt = centered.transpose();
|
|
3221
|
+
const cov = Xt.mmul(centered).div(Math.max(1, centered.rows - 1));
|
|
3222
|
+
// 使用 numeric 进行特征分解
|
|
3223
|
+
const covArr = cov.to2DArray();
|
|
3224
|
+
const eig = this.numeric.eig(covArr);
|
|
3225
|
+
const vectors = eig.E?.x || eig.E; // 列为特征向量
|
|
3226
|
+
if (!vectors) throw new Error('eig-vectors-missing');
|
|
3227
|
+
const v1 = vectors.map((row) => row[0]);
|
|
3228
|
+
const v2 = vectors.map((row) => row[1] ?? 0);
|
|
3229
|
+
const coords = [];
|
|
3230
|
+
for (let i = 0; i < centered.rows; i++) {
|
|
3231
|
+
const r = centered.getRow(i);
|
|
3232
|
+
const x = r.reduce((acc, v, j) => acc + v * (v1[j] ?? 0), 0);
|
|
3233
|
+
const y = r.reduce((acc, v, j) => acc + v * (v2[j] ?? 0), 0);
|
|
3234
|
+
coords.push([x, y]);
|
|
3235
|
+
}
|
|
3236
|
+
return coords;
|
|
3237
|
+
} catch (err) {
|
|
3238
|
+
// numeric.eig 不收敛或失败时,回退到内置 PCA
|
|
3239
|
+
const reducer = new DimReducer();
|
|
3240
|
+
const emb = { data: Float32Array.from(features.flat()), nRows: features.length, nCols: features[0]?.length || 1 };
|
|
3241
|
+
const proj = reducer.project2D(emb, 'pca');
|
|
3242
|
+
return proj.coords;
|
|
3243
|
+
}
|
|
3244
|
+
}
|
|
3245
|
+
// 回退:基于随机投影的近似降维
|
|
3246
|
+
const D = X.columns;
|
|
3247
|
+
const rp1 = Array.from({ length: D }, () => Math.random() - 0.5);
|
|
3248
|
+
const rp2 = Array.from({ length: D }, () => Math.random() - 0.5);
|
|
3249
|
+
const coords = [];
|
|
3250
|
+
for (let i = 0; i < X.rows; i++) {
|
|
3251
|
+
const row = X.getRow(i);
|
|
3252
|
+
const x = row.reduce((acc, v, j) => acc + v * rp1[j], 0);
|
|
3253
|
+
const y = row.reduce((acc, v, j) => acc + v * rp2[j], 0);
|
|
3254
|
+
coords.push([x, y]);
|
|
3255
|
+
}
|
|
3256
|
+
return coords;
|
|
3257
|
+
}
|
|
3258
|
+
// 无外部库:使用已有 DimReducer 的 PCA
|
|
3259
|
+
const bridge = new GraphTensorBridge(this.pool.getActive().runtime);
|
|
3260
|
+
const reducer = new DimReducer();
|
|
3261
|
+
const emb = { data: Float32Array.from(features.flat()), nRows: features.length, nCols: features[0]?.length || 1 };
|
|
3262
|
+
const proj = reducer.project2D(emb, 'pca');
|
|
3263
|
+
return proj.coords;
|
|
3264
|
+
}
|
|
3265
|
+
|
|
3266
|
+
_cluster(coords, k = 3) {
|
|
3267
|
+
if (this.kmeans) {
|
|
3268
|
+
try {
|
|
3269
|
+
const out = this.kmeans(coords, k);
|
|
3270
|
+
return out.clusters;
|
|
3271
|
+
} catch (err) {
|
|
3272
|
+
// 回退到简单阈值聚类
|
|
3273
|
+
}
|
|
3274
|
+
}
|
|
3275
|
+
// 简易聚类:按 x 坐标等距切分
|
|
3276
|
+
const xs = coords.map((c) => c[0]);
|
|
3277
|
+
const min = Math.min(...xs);
|
|
3278
|
+
const max = Math.max(...xs);
|
|
3279
|
+
const step = (max - min) / Math.max(1, k);
|
|
3280
|
+
return coords.map((c) => Math.min(k - 1, Math.max(0, Math.floor((c[0] - min) / Math.max(step, 1e-6)))));
|
|
3281
|
+
}
|
|
3282
|
+
|
|
3283
|
+
_adjustParams(evalStats) {
|
|
3284
|
+
const active = this.pool.getActive();
|
|
3285
|
+
const params = active.runtime.cloneParams();
|
|
3286
|
+
const baseScore = evalStats.baseAvg;
|
|
3287
|
+
const learnedScore = evalStats.learnedAvg;
|
|
3288
|
+
if (learnedScore - baseScore >= this.improvementThreshold) {
|
|
3289
|
+
params.iteration = Math.min(12, (params.iteration || 5) + 1);
|
|
3290
|
+
params.decayK = Math.max(0.5, (params.decayK || 1) - 0.05);
|
|
3291
|
+
} else {
|
|
3292
|
+
params.iteration = Math.max(3, (params.iteration || 5) - 1);
|
|
3293
|
+
params.decayK = Math.min(2.0, (params.decayK || 1) + 0.05);
|
|
3294
|
+
}
|
|
3295
|
+
active.applyParams(params);
|
|
3296
|
+
return params;
|
|
3297
|
+
}
|
|
3298
|
+
|
|
3299
|
+
async evaluateOnce(file) {
|
|
3300
|
+
const text = this._readText(file);
|
|
3301
|
+
const ctrl = this.pool.getActive();
|
|
3302
|
+
const response = await ctrl.respond({ text });
|
|
3303
|
+
const seeds = new Map(response.seeds);
|
|
3304
|
+
const score = this._scoreReplyQuality(response.reply, seeds);
|
|
3305
|
+
return { file, score, words: tokenize(text), reply: response.reply };
|
|
3306
|
+
}
|
|
3307
|
+
|
|
3308
|
+
async learn(cycles = 3) {
|
|
3309
|
+
const files = this._listTestFiles();
|
|
3310
|
+
const evals = [];
|
|
3311
|
+
for (const file of files) {
|
|
3312
|
+
evals.push(await this.evaluateOnce(file));
|
|
3313
|
+
}
|
|
3314
|
+
const features = evals.map((e) => this._buildFeatureForDoc(e.reply));
|
|
3315
|
+
const coords = this._reduceFeatures(features);
|
|
3316
|
+
const clusters = this._cluster(coords, Math.min(4, Math.max(2, Math.floor(Math.sqrt(files.length)) )));
|
|
3317
|
+
const grouped = new Map();
|
|
3318
|
+
for (let i = 0; i < evals.length; i++) {
|
|
3319
|
+
const g = clusters[i] ?? 0;
|
|
3320
|
+
if (!grouped.has(g)) grouped.set(g, []);
|
|
3321
|
+
grouped.get(g).push(evals[i]);
|
|
3322
|
+
}
|
|
3323
|
+
const clusterStats = Array.from(grouped.entries()).map(([gid, arr]) => ({ gid, avg: arr.reduce((a, b) => a + b.score, 0) / Math.max(1, arr.length), n: arr.length }));
|
|
3324
|
+
const baseAvg = evals.reduce((a, b) => a + b.score, 0) / Math.max(1, evals.length);
|
|
3325
|
+
const learnedAvg = clusterStats.reduce((a, b) => a + b.avg, 0) / Math.max(1, clusterStats.length);
|
|
3326
|
+
const evalStats = { baseAvg, learnedAvg, clusters: clusterStats };
|
|
3327
|
+
const newParams = this._adjustParams(evalStats);
|
|
3328
|
+
this.history.push({ ts: Date.now(), evals, evalStats, params: newParams });
|
|
3329
|
+
return { ok: true, evalStats, params: newParams };
|
|
3330
|
+
}
|
|
3331
|
+
|
|
3332
|
+
latest() {
|
|
3333
|
+
return this.history[this.history.length - 1] || null;
|
|
3334
|
+
}
|
|
3335
|
+
}
|
|
3336
|
+
|
|
3337
|
+
// 对抗学习模块:模拟扰动、生成对手样本、鲁棒性评估与修复
|
|
3338
|
+
class AdversarialLearner {
|
|
3339
|
+
constructor(pool, {
|
|
3340
|
+
maxAdversaries = 64,
|
|
3341
|
+
noiseLevel = 0.2,
|
|
3342
|
+
synonymMap = null,
|
|
3343
|
+
attackRounds = 3,
|
|
3344
|
+
defenseRounds = 3,
|
|
3345
|
+
benchLimit = 50
|
|
3346
|
+
} = {}) {
|
|
3347
|
+
this.pool = pool;
|
|
3348
|
+
this.maxAdversaries = maxAdversaries;
|
|
3349
|
+
this.noiseLevel = noiseLevel;
|
|
3350
|
+
this.synonymMap = synonymMap || new Map();
|
|
3351
|
+
this.attackRounds = attackRounds;
|
|
3352
|
+
this.defenseRounds = defenseRounds;
|
|
3353
|
+
this.benchLimit = benchLimit;
|
|
3354
|
+
this.rng = safeRequire('seedrandom') ? safeRequire('seedrandom')('phoenix-adv') : Math.random;
|
|
3355
|
+
this.history = [];
|
|
3356
|
+
this.Matrix = safeRequire('ml-matrix');
|
|
3357
|
+
}
|
|
3358
|
+
|
|
3359
|
+
_perturbTokens(tokens) {
|
|
3360
|
+
const out = [];
|
|
3361
|
+
for (const t of tokens) {
|
|
3362
|
+
const r = (typeof this.rng === 'function') ? this.rng() : Math.random();
|
|
3363
|
+
if (r < this.noiseLevel / 2) {
|
|
3364
|
+
// 删除 token
|
|
3365
|
+
continue;
|
|
3366
|
+
}
|
|
3367
|
+
if (r < this.noiseLevel) {
|
|
3368
|
+
// 同义替换
|
|
3369
|
+
const syns = this.synonymMap.get(t);
|
|
3370
|
+
if (Array.isArray(syns) && syns.length) {
|
|
3371
|
+
out.push(syns[Math.floor(r * syns.length)]);
|
|
3372
|
+
continue;
|
|
3373
|
+
}
|
|
3374
|
+
}
|
|
3375
|
+
out.push(t);
|
|
3376
|
+
}
|
|
3377
|
+
// 随机插入噪声 token
|
|
3378
|
+
if ((typeof this.rng === 'function' ? this.rng() : Math.random()) < this.noiseLevel) {
|
|
3379
|
+
out.push('noise_token_' + Math.floor(((typeof this.rng === 'function') ? this.rng() : Math.random()) * 1000));
|
|
3380
|
+
}
|
|
3381
|
+
return out;
|
|
3382
|
+
}
|
|
3383
|
+
|
|
3384
|
+
_generateAdversaries(text) {
|
|
3385
|
+
const base = tokenize(text);
|
|
3386
|
+
const adversaries = [];
|
|
3387
|
+
for (let i = 0; i < Math.min(8, Math.max(2, Math.floor(base.length / 3))); i++) {
|
|
3388
|
+
const mut = this._perturbTokens(base);
|
|
3389
|
+
adversaries.push(mut.join(' '));
|
|
3390
|
+
}
|
|
3391
|
+
return adversaries;
|
|
3392
|
+
}
|
|
3393
|
+
|
|
3394
|
+
async _probe(text) {
|
|
3395
|
+
const ctrl = this.pool.getActive();
|
|
3396
|
+
const result = await ctrl.respond({ text });
|
|
3397
|
+
const seeds = new Map(result.seeds);
|
|
3398
|
+
const quality = this._qualityMetric(result.reply, seeds);
|
|
3399
|
+
return { text, reply: result.reply, quality, latency: result.latency, seeds };
|
|
3400
|
+
}
|
|
3401
|
+
|
|
3402
|
+
_qualityMetric(reply, seeds) {
|
|
3403
|
+
// 与强化评分一致,但加入长度惩罚与重复惩罚
|
|
3404
|
+
const base = tokenize(reply);
|
|
3405
|
+
if (!base.length) return 0;
|
|
3406
|
+
const linkedHit = this._linkedCoverage(base, seeds);
|
|
3407
|
+
const uniq = new Set(base).size / Math.max(1, base.length);
|
|
3408
|
+
const lenPenalty = base.length > 50 ? 0.9 : 1.0;
|
|
3409
|
+
return (0.6 * linkedHit + 0.4 * uniq) * lenPenalty;
|
|
3410
|
+
}
|
|
3411
|
+
|
|
3412
|
+
_linkedCoverage(words, seeds) {
|
|
3413
|
+
let coverage = 0;
|
|
3414
|
+
for (const [memeId] of seeds.entries()) {
|
|
3415
|
+
const linked = this.pool.getActive().runtime.kvm.getMemeWords(memeId);
|
|
3416
|
+
if (!linked || linked.size === 0) continue;
|
|
3417
|
+
let hit = 0;
|
|
3418
|
+
for (const w of words) {
|
|
3419
|
+
if (linked.has(w)) hit += 1;
|
|
3420
|
+
}
|
|
3421
|
+
coverage += hit / Math.max(1, linked.size);
|
|
3422
|
+
}
|
|
3423
|
+
return coverage / Math.max(1, seeds.size);
|
|
3424
|
+
}
|
|
3425
|
+
|
|
3426
|
+
async attackAndDefend(samples) {
|
|
3427
|
+
const bench = samples.slice(0, this.benchLimit);
|
|
3428
|
+
const rounds = [];
|
|
3429
|
+
// 攻击阶段
|
|
3430
|
+
for (let r = 0; r < this.attackRounds; r++) {
|
|
3431
|
+
const results = [];
|
|
3432
|
+
for (const s of bench) {
|
|
3433
|
+
const advs = this._generateAdversaries(s);
|
|
3434
|
+
for (const a of advs) {
|
|
3435
|
+
results.push(await this._probe(a));
|
|
3436
|
+
}
|
|
3437
|
+
}
|
|
3438
|
+
rounds.push({ phase: 'attack', results });
|
|
3439
|
+
}
|
|
3440
|
+
// 防御阶段:对低质量回复进行图修复(增加边权或添加桥接)
|
|
3441
|
+
const fixes = [];
|
|
3442
|
+
for (let d = 0; d < this.defenseRounds; d++) {
|
|
3443
|
+
for (const s of bench) {
|
|
3444
|
+
const res = await this._probe(s);
|
|
3445
|
+
if (res.quality < 0.25) {
|
|
3446
|
+
const words = tokenize(res.reply).slice(0, 12);
|
|
3447
|
+
const seeds = this.pool.getActive().runtime.mapWordsToMemes(words);
|
|
3448
|
+
const ids = Array.from(seeds.keys());
|
|
3449
|
+
for (let i = 0; i < ids.length - 1; i++) {
|
|
3450
|
+
this.pool.getActive().runtime.graph.link(ids[i], ids[i + 1], 0.5, 0);
|
|
3451
|
+
}
|
|
3452
|
+
fixes.push({ sample: s, addedEdges: Math.max(0, ids.length - 1) });
|
|
3453
|
+
}
|
|
3454
|
+
}
|
|
3455
|
+
rounds.push({ phase: 'defense', fixes });
|
|
3456
|
+
}
|
|
3457
|
+
const summary = this._summarize(rounds);
|
|
3458
|
+
this.history.push({ ts: Date.now(), rounds, summary });
|
|
3459
|
+
return { ok: true, summary };
|
|
3460
|
+
}
|
|
3461
|
+
|
|
3462
|
+
_summarize(rounds) {
|
|
3463
|
+
const stats = { attack: { n: 0, avgQuality: 0 }, defense: { fixes: 0 } };
|
|
3464
|
+
let qsum = 0;
|
|
3465
|
+
let qcnt = 0;
|
|
3466
|
+
for (const r of rounds) {
|
|
3467
|
+
if (r.phase === 'attack') {
|
|
3468
|
+
stats.attack.n += r.results.length;
|
|
3469
|
+
for (const item of r.results) {
|
|
3470
|
+
qsum += item.quality;
|
|
3471
|
+
qcnt += 1;
|
|
3472
|
+
}
|
|
3473
|
+
} else if (r.phase === 'defense') {
|
|
3474
|
+
stats.defense.fixes += (r.fixes || []).length;
|
|
3475
|
+
}
|
|
3476
|
+
}
|
|
3477
|
+
stats.attack.avgQuality = qcnt ? (qsum / qcnt) : 0;
|
|
3478
|
+
return stats;
|
|
3479
|
+
}
|
|
3480
|
+
|
|
3481
|
+
latest() { return this.history[this.history.length - 1] || null; }
|
|
3482
|
+
}
|
|
3483
|
+
|
|
3484
|
+
|
|
3485
|
+
class GatewayServer {
|
|
3486
|
+
constructor(pool, shardManager, snapshotManager, rotation, redisSync, study, sparkArray, learners = {}) {
|
|
3487
|
+
this.pool = pool;
|
|
3488
|
+
this.shards = shardManager;
|
|
3489
|
+
this.snapshots = snapshotManager;
|
|
3490
|
+
this.rotation = rotation;
|
|
3491
|
+
this.redisSync = redisSync;
|
|
3492
|
+
this.study = study;
|
|
3493
|
+
this.spark = sparkArray || new SparkArray(pool, shardManager);
|
|
3494
|
+
this.rl = learners.rl || new ReinforcementLearner(this.pool, {});
|
|
3495
|
+
this.adv = learners.adv || new AdversarialLearner(this.pool, {});
|
|
3496
|
+
this.rlDisabled = false;
|
|
3497
|
+
this.advDisabled = false;
|
|
3498
|
+
this.dialogLearningEnabled = true;
|
|
3499
|
+
this.dialogCounters = { total: 0, lastRL: 0, lastADV: 0 };
|
|
3500
|
+
this.dialogThresholds = { rlEvery: 20, advEvery: 30 };
|
|
3501
|
+
// 创建并启动 MemeBarrier(可通过 CLI 关闭)
|
|
3502
|
+
if (!CONFIG.disableBarrier) {
|
|
3503
|
+
try {
|
|
3504
|
+
this.barrier = new MemeBarrier(this.pool.getActive().runtime, {
|
|
3505
|
+
maliciousThreshold: modelDefaults.maliciousThreshold
|
|
3506
|
+
});
|
|
3507
|
+
this.barrier.start();
|
|
3508
|
+
} catch (e) {
|
|
3509
|
+
console.warn('[Gateway] MemeBarrier init failed:', e.message);
|
|
3510
|
+
}
|
|
3511
|
+
} else {
|
|
3512
|
+
this.barrier = null;
|
|
3513
|
+
console.log('[Gateway] MemeBarrier disabled by config');
|
|
3514
|
+
}
|
|
3515
|
+
this.app = express();
|
|
3516
|
+
this.app.use(bodyParser.json({ limit: '10mb' }));
|
|
3517
|
+
this.app.use(bodyParser.urlencoded({ extended: true, limit: '10mb' }));
|
|
3518
|
+
|
|
3519
|
+
// Auth: protect /api/* by default (can be disabled via env for dev).
|
|
3520
|
+
this.auth = {
|
|
3521
|
+
enabled: String(process.env.AI_AUTH_ENABLED || 'true').toLowerCase() !== 'false',
|
|
3522
|
+
jwtSecret: String(process.env.AI_AUTH_JWT_SECRET || process.env.AUTH_JWT_SECRET || 'dev-secret-change-me'),
|
|
3523
|
+
publicPaths: new Set([
|
|
3524
|
+
'/api/system/status'
|
|
3525
|
+
])
|
|
3526
|
+
};
|
|
3527
|
+
this._setupAuthMiddleware();
|
|
3528
|
+
// Compatibility shim: Express 4.0 relies on deprecated res._headers (removed in modern Node).
|
|
3529
|
+
// We monkey-patch setHeader to maintain res._headers and res.headers so fresh(req,resHeaders) works.
|
|
3530
|
+
this.app.use((req, res, next) => {
|
|
3531
|
+
if (!res._headers) {
|
|
3532
|
+
res._headers = {}; // initialize
|
|
3533
|
+
}
|
|
3534
|
+
// Maintain a plain object reference for (res._headers || res.headers) legacy check.
|
|
3535
|
+
res.headers = res._headers;
|
|
3536
|
+
const originalSetHeader = res.setHeader;
|
|
3537
|
+
if (!res.__patchedSetHeader) {
|
|
3538
|
+
res.setHeader = function(name, value) {
|
|
3539
|
+
originalSetHeader.call(this, name, value);
|
|
3540
|
+
const lower = String(name).toLowerCase();
|
|
3541
|
+
if (!this._headers) {
|
|
3542
|
+
this._headers = {};
|
|
3543
|
+
}
|
|
3544
|
+
this._headers[lower] = value;
|
|
3545
|
+
this.headers = this._headers;
|
|
3546
|
+
};
|
|
3547
|
+
res.__patchedSetHeader = true;
|
|
3548
|
+
}
|
|
3549
|
+
next();
|
|
3550
|
+
});
|
|
3551
|
+
// 根据 CLI 关闭学习模块(但允许运行时重新开启)
|
|
3552
|
+
this.rlDisabled = this.rlDisabled || CONFIG.disableLearning || CONFIG.disableRL;
|
|
3553
|
+
this.advDisabled = this.advDisabled || CONFIG.disableLearning || CONFIG.disableADV;
|
|
3554
|
+
// 如果 CLI 禁用学习,则默认关闭对话触发(可运行时打开)
|
|
3555
|
+
this.dialogLearningEnabled = !(CONFIG.disableLearning === true);
|
|
3556
|
+
this._setupRoutes();
|
|
3557
|
+
}
|
|
3558
|
+
|
|
3559
|
+
_setupAuthMiddleware() {
|
|
3560
|
+
let jwt;
|
|
3561
|
+
try {
|
|
3562
|
+
jwt = require('jsonwebtoken');
|
|
3563
|
+
} catch (_e) {
|
|
3564
|
+
jwt = null;
|
|
3565
|
+
}
|
|
3566
|
+
|
|
3567
|
+
const parseBearer = (req) => {
|
|
3568
|
+
const h = req.headers.authorization || '';
|
|
3569
|
+
const m = /^Bearer\s+(.+)$/i.exec(String(h));
|
|
3570
|
+
return m ? m[1] : '';
|
|
3571
|
+
};
|
|
3572
|
+
|
|
3573
|
+
this.app.use((req, res, next) => {
|
|
3574
|
+
if (!this.auth.enabled) return next();
|
|
3575
|
+
if (req.method === 'OPTIONS') return next();
|
|
3576
|
+
if (!req.path.startsWith('/api/')) return next();
|
|
3577
|
+
if (this.auth.publicPaths.has(req.path)) return next();
|
|
3578
|
+
|
|
3579
|
+
const token = parseBearer(req);
|
|
3580
|
+
if (!token) {
|
|
3581
|
+
res.status(401).json({ ok: false, error: 'unauthorized' });
|
|
3582
|
+
return;
|
|
3583
|
+
}
|
|
3584
|
+
if (!jwt) {
|
|
3585
|
+
res.status(500).json({ ok: false, error: 'auth-lib-missing' });
|
|
3586
|
+
return;
|
|
3587
|
+
}
|
|
3588
|
+
try {
|
|
3589
|
+
const payload = jwt.verify(token, this.auth.jwtSecret);
|
|
3590
|
+
req.user = payload;
|
|
3591
|
+
return next();
|
|
3592
|
+
} catch (e) {
|
|
3593
|
+
res.status(401).json({ ok: false, error: 'invalid-token', message: e.message });
|
|
3594
|
+
}
|
|
2058
3595
|
});
|
|
3596
|
+
}
|
|
3597
|
+
|
|
3598
|
+
_getRuntimeFeatureState() {
|
|
3599
|
+
return {
|
|
3600
|
+
memebarrier: {
|
|
3601
|
+
enabled: Boolean(this.barrier && this.barrier.running),
|
|
3602
|
+
available: !CONFIG.disableBarrier,
|
|
3603
|
+
threshold: this.barrier ? this.barrier.maliciousThreshold : modelDefaults.maliciousThreshold
|
|
3604
|
+
},
|
|
3605
|
+
learning: {
|
|
3606
|
+
// learning 是总开关:关闭时同时禁用 RL/ADV 的自动触发与端点
|
|
3607
|
+
enabled: !(this.rlDisabled && this.advDisabled),
|
|
3608
|
+
cliDisabled: Boolean(CONFIG.disableLearning)
|
|
3609
|
+
},
|
|
3610
|
+
rl: {
|
|
3611
|
+
enabled: !this.rlDisabled,
|
|
3612
|
+
cliDisabled: Boolean(CONFIG.disableLearning || CONFIG.disableRL)
|
|
3613
|
+
},
|
|
3614
|
+
adv: {
|
|
3615
|
+
enabled: !this.advDisabled,
|
|
3616
|
+
cliDisabled: Boolean(CONFIG.disableLearning || CONFIG.disableADV)
|
|
3617
|
+
},
|
|
3618
|
+
dialogLearning: {
|
|
3619
|
+
enabled: Boolean(this.dialogLearningEnabled)
|
|
3620
|
+
},
|
|
3621
|
+
dialogThresholds: { ...this.dialogThresholds },
|
|
3622
|
+
dialogCounters: { ...this.dialogCounters }
|
|
3623
|
+
};
|
|
3624
|
+
}
|
|
3625
|
+
|
|
3626
|
+
_applyRuntimeFeaturePatch(patch = {}) {
|
|
3627
|
+
const out = { applied: {}, warnings: [] };
|
|
3628
|
+
// MemeBarrier
|
|
3629
|
+
if (typeof patch.memebarrierEnabled === 'boolean') {
|
|
3630
|
+
if (patch.memebarrierEnabled) {
|
|
3631
|
+
if (!this.barrier) {
|
|
3632
|
+
this.barrier = new MemeBarrier(this.pool.getActive().runtime, {
|
|
3633
|
+
maliciousThreshold: modelDefaults.maliciousThreshold
|
|
3634
|
+
});
|
|
3635
|
+
}
|
|
3636
|
+
this.barrier.start();
|
|
3637
|
+
} else {
|
|
3638
|
+
if (this.barrier) this.barrier.stop();
|
|
3639
|
+
}
|
|
3640
|
+
out.applied.memebarrierEnabled = patch.memebarrierEnabled;
|
|
3641
|
+
}
|
|
3642
|
+
if (typeof patch.maliciousThreshold === 'number' && Number.isFinite(patch.maliciousThreshold)) {
|
|
3643
|
+
if (!this.barrier) {
|
|
3644
|
+
this.barrier = new MemeBarrier(this.pool.getActive().runtime, {
|
|
3645
|
+
maliciousThreshold: patch.maliciousThreshold
|
|
3646
|
+
});
|
|
3647
|
+
}
|
|
3648
|
+
this.barrier.maliciousThreshold = patch.maliciousThreshold;
|
|
3649
|
+
out.applied.maliciousThreshold = patch.maliciousThreshold;
|
|
3650
|
+
}
|
|
3651
|
+
|
|
3652
|
+
// Learning/RL/ADV
|
|
3653
|
+
// 注意:CLI disable 表示“启动默认禁用”,但允许运行时打开;若你希望完全锁死,可再加一个 CONFIG.lockRuntimeToggles。
|
|
3654
|
+
if (typeof patch.learningEnabled === 'boolean') {
|
|
3655
|
+
if (patch.learningEnabled) {
|
|
3656
|
+
// 仅解除总禁用:不强行打开 RL/ADV,由各自开关决定
|
|
3657
|
+
// 如果之前是因为 disableLearning 置为 true,这里也允许解除。
|
|
3658
|
+
if (CONFIG.disableLearning) {
|
|
3659
|
+
out.warnings.push('learning was CLI-disabled; runtime override enabled');
|
|
3660
|
+
}
|
|
3661
|
+
// 不做事,交给 rlEnabled/advEnabled 或保持现状
|
|
3662
|
+
} else {
|
|
3663
|
+
this.rlDisabled = true;
|
|
3664
|
+
this.advDisabled = true;
|
|
3665
|
+
this.dialogLearningEnabled = false;
|
|
3666
|
+
}
|
|
3667
|
+
out.applied.learningEnabled = patch.learningEnabled;
|
|
3668
|
+
}
|
|
3669
|
+
if (typeof patch.rlEnabled === 'boolean') {
|
|
3670
|
+
if (patch.rlEnabled) {
|
|
3671
|
+
if (CONFIG.disableLearning || CONFIG.disableRL) {
|
|
3672
|
+
out.warnings.push('rl was CLI-disabled; runtime override enabled');
|
|
3673
|
+
}
|
|
3674
|
+
this.rlDisabled = false;
|
|
3675
|
+
} else {
|
|
3676
|
+
this.rlDisabled = true;
|
|
3677
|
+
}
|
|
3678
|
+
out.applied.rlEnabled = patch.rlEnabled;
|
|
3679
|
+
}
|
|
3680
|
+
if (typeof patch.advEnabled === 'boolean') {
|
|
3681
|
+
if (patch.advEnabled) {
|
|
3682
|
+
if (CONFIG.disableLearning || CONFIG.disableADV) {
|
|
3683
|
+
out.warnings.push('adv was CLI-disabled; runtime override enabled');
|
|
3684
|
+
}
|
|
3685
|
+
this.advDisabled = false;
|
|
3686
|
+
} else {
|
|
3687
|
+
this.advDisabled = true;
|
|
3688
|
+
}
|
|
3689
|
+
out.applied.advEnabled = patch.advEnabled;
|
|
3690
|
+
}
|
|
3691
|
+
|
|
3692
|
+
if (typeof patch.dialogLearningEnabled === 'boolean') {
|
|
3693
|
+
this.dialogLearningEnabled = patch.dialogLearningEnabled;
|
|
3694
|
+
out.applied.dialogLearningEnabled = patch.dialogLearningEnabled;
|
|
3695
|
+
}
|
|
3696
|
+
|
|
3697
|
+
// Dialog thresholds
|
|
3698
|
+
if (typeof patch.rlEvery === 'number' && Number.isFinite(patch.rlEvery) && patch.rlEvery > 0) {
|
|
3699
|
+
this.dialogThresholds.rlEvery = patch.rlEvery;
|
|
3700
|
+
out.applied.rlEvery = patch.rlEvery;
|
|
3701
|
+
}
|
|
3702
|
+
if (typeof patch.advEvery === 'number' && Number.isFinite(patch.advEvery) && patch.advEvery > 0) {
|
|
3703
|
+
this.dialogThresholds.advEvery = patch.advEvery;
|
|
3704
|
+
out.applied.advEvery = patch.advEvery;
|
|
3705
|
+
}
|
|
3706
|
+
return out;
|
|
3707
|
+
}
|
|
3708
|
+
|
|
3709
|
+
_setupRoutes() {
|
|
3710
|
+
// Main AI 进程不再托管前端;根路径返回提示。
|
|
3711
|
+
this.app.get('/', (req, res) => {
|
|
3712
|
+
res.status(404).send('UI moved to auth_frontend_server.cjs (default :5081)');
|
|
3713
|
+
});
|
|
3714
|
+
|
|
3715
|
+
// 前端托管已迁移到独立进程(auth_frontend_server.cjs)。
|
|
3716
|
+
// 这里保留纯 API(/api、/robots 等)以减少主 AI 进程职责。
|
|
2059
3717
|
this.app.post('/api/chat', async (req, res) => {
|
|
2060
3718
|
try {
|
|
2061
3719
|
const controller = this.pool.getActive();
|
|
2062
3720
|
const result = await controller.respond(req.body || {});
|
|
2063
3721
|
res.json({ ok: true, result });
|
|
3722
|
+
this._onDialogCompleted(result, req.body || {});
|
|
2064
3723
|
} catch (err) {
|
|
2065
3724
|
res.status(500).json({ ok: false, error: err.message });
|
|
2066
3725
|
}
|
|
2067
3726
|
});
|
|
3727
|
+
|
|
3728
|
+
// 运行时配置/开关(React 控制台使用)
|
|
3729
|
+
this.app.get('/api/runtime/features', (req, res) => {
|
|
3730
|
+
res.json({ ok: true, features: this._getRuntimeFeatureState() });
|
|
3731
|
+
});
|
|
3732
|
+
this.app.patch('/api/runtime/features', (req, res) => {
|
|
3733
|
+
try {
|
|
3734
|
+
const result = this._applyRuntimeFeaturePatch(req.body || {});
|
|
3735
|
+
res.json({ ok: true, result, features: this._getRuntimeFeatureState() });
|
|
3736
|
+
} catch (err) {
|
|
3737
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
3738
|
+
}
|
|
3739
|
+
});
|
|
3740
|
+
|
|
3741
|
+
this.app.get('/api/study/status', (req, res) => {
|
|
3742
|
+
const q = this.study?.queue?.length ?? 0;
|
|
3743
|
+
res.json({
|
|
3744
|
+
ok: true,
|
|
3745
|
+
running: Boolean(this.study?.running),
|
|
3746
|
+
queue: q,
|
|
3747
|
+
metrics: this.study?.metrics || null
|
|
3748
|
+
});
|
|
3749
|
+
});
|
|
3750
|
+
|
|
3751
|
+
this.app.post('/api/learn/dialog/reset', (req, res) => {
|
|
3752
|
+
this.dialogCounters = { total: 0, lastRL: 0, lastADV: 0 };
|
|
3753
|
+
res.json({ ok: true, dialogCounters: this.dialogCounters });
|
|
3754
|
+
});
|
|
2068
3755
|
this.app.post('/api/array/chat', async (req, res) => {
|
|
2069
3756
|
try {
|
|
2070
3757
|
const payload = req.body || {};
|
|
@@ -2072,8 +3759,59 @@ class GatewayServer {
|
|
|
2072
3759
|
res.status(400).json({ ok: false, error: 'text required' });
|
|
2073
3760
|
return;
|
|
2074
3761
|
}
|
|
2075
|
-
|
|
3762
|
+
// Reinforcement Learning endpoints
|
|
3763
|
+
this.app.post('/api/learn/reinforce', async (req, res) => {
|
|
3764
|
+
try {
|
|
3765
|
+
if (this.rlDisabled) {
|
|
3766
|
+
res.status(503).json({ ok: false, error: 'rl-disabled' });
|
|
3767
|
+
return;
|
|
3768
|
+
}
|
|
3769
|
+
const cycles = Number(req.body?.cycles ?? 3) || 3;
|
|
3770
|
+
const out = await this.rl.learn(cycles);
|
|
3771
|
+
res.json({ ok: true, result: out });
|
|
3772
|
+
} catch (err) {
|
|
3773
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
3774
|
+
}
|
|
3775
|
+
});
|
|
3776
|
+
this.app.get('/api/learn/reinforce/latest', (req, res) => {
|
|
3777
|
+
res.json({ ok: true, latest: this.rl.latest() });
|
|
3778
|
+
});
|
|
3779
|
+
// Adversarial Learning endpoints
|
|
3780
|
+
this.app.post('/api/learn/adversarial', async (req, res) => {
|
|
3781
|
+
try {
|
|
3782
|
+
if (this.advDisabled) {
|
|
3783
|
+
res.status(503).json({ ok: false, error: 'adv-disabled' });
|
|
3784
|
+
return;
|
|
3785
|
+
}
|
|
3786
|
+
const samples = Array.isArray(req.body?.samples) ? req.body.samples : [];
|
|
3787
|
+
if (!samples.length) {
|
|
3788
|
+
res.status(400).json({ ok: false, error: 'samples required' });
|
|
3789
|
+
return;
|
|
3790
|
+
}
|
|
3791
|
+
const out = await this.adv.attackAndDefend(samples);
|
|
3792
|
+
res.json({ ok: true, result: out });
|
|
3793
|
+
} catch (err) {
|
|
3794
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
3795
|
+
}
|
|
3796
|
+
});
|
|
3797
|
+
this.app.get('/api/learn/adversarial/latest', (req, res) => {
|
|
3798
|
+
res.json({ ok: true, latest: this.adv.latest() });
|
|
3799
|
+
});
|
|
3800
|
+
this.app.post('/api/learn/thresholds', (req, res) => {
|
|
3801
|
+
const { rlEvery, advEvery } = req.body || {};
|
|
3802
|
+
if (Number.isFinite(rlEvery) && rlEvery > 0) this.dialogThresholds.rlEvery = rlEvery;
|
|
3803
|
+
if (Number.isFinite(advEvery) && advEvery > 0) this.dialogThresholds.advEvery = advEvery;
|
|
3804
|
+
res.json({ ok: true, thresholds: this.dialogThresholds });
|
|
3805
|
+
});
|
|
3806
|
+
const opts = payload.options || {};
|
|
3807
|
+
const bigRounds = Number(opts.bigRounds ?? opts.bigSparkRounds ?? 1) || 1;
|
|
3808
|
+
const result = bigRounds > 1
|
|
3809
|
+
? await this.spark.dispatchBig(payload, opts)
|
|
3810
|
+
: await this.spark.dispatch(payload, opts);
|
|
2076
3811
|
res.json({ ok: true, result });
|
|
3812
|
+
const agg = result?.aggregate;
|
|
3813
|
+
const dialog = agg ? { reply: agg.reply, latency: agg.latency, sessionId: agg.sessionId, seeds: [] } : null;
|
|
3814
|
+
this._onDialogCompleted(dialog, payload || {});
|
|
2077
3815
|
} catch (err) {
|
|
2078
3816
|
res.status(500).json({ ok: false, error: err.message });
|
|
2079
3817
|
}
|
|
@@ -2091,7 +3829,7 @@ class GatewayServer {
|
|
|
2091
3829
|
res.json({ ok: true, params: this.pool.getActive().runtime.cloneParams() });
|
|
2092
3830
|
});
|
|
2093
3831
|
this.app.get('/api/array/layers', (req, res) => {
|
|
2094
|
-
res.json({ ok: true, layers: this.spark.getLayers(), history: this.spark.history.slice(-20) });
|
|
3832
|
+
res.json({ ok: true, layers: this.spark.getLayers(), history: (this.spark.history || []).slice(-20) });
|
|
2095
3833
|
});
|
|
2096
3834
|
this.app.post('/api/array/layers', (req, res) => {
|
|
2097
3835
|
try {
|
|
@@ -2102,7 +3840,7 @@ class GatewayServer {
|
|
|
2102
3840
|
}
|
|
2103
3841
|
});
|
|
2104
3842
|
this.app.get('/api/array/history', (req, res) => {
|
|
2105
|
-
res.json({ ok: true, history: this.spark.history.slice(-20) });
|
|
3843
|
+
res.json({ ok: true, history: (this.spark.history || []).slice(-20) });
|
|
2106
3844
|
});
|
|
2107
3845
|
this.app.get('/api/snapshots', (req, res) => {
|
|
2108
3846
|
res.json({ ok: true, list: this.snapshots.list() });
|
|
@@ -2132,10 +3870,53 @@ class GatewayServer {
|
|
|
2132
3870
|
load: os.loadavg(),
|
|
2133
3871
|
memory: process.memoryUsage(),
|
|
2134
3872
|
controllers: this.pool.listMetrics(),
|
|
3873
|
+
groups: typeof this.pool.listGroupIds === 'function'
|
|
3874
|
+
? this.pool.listGroupIds().map((gid) => ({ gid, controllers: this.pool.listControllersInGroup(gid) }))
|
|
3875
|
+
: [],
|
|
2135
3876
|
rotation: { running: this.rotation.running, cycleMs: this.rotation.cycleMs },
|
|
2136
3877
|
redis: { channel: this.redisSync.channel }
|
|
2137
3878
|
});
|
|
2138
3879
|
});
|
|
3880
|
+
|
|
3881
|
+
// 只读系统配置(组数/组大小等;修改需要重启进程)
|
|
3882
|
+
this.app.get('/api/system/config', (req, res) => {
|
|
3883
|
+
res.json({
|
|
3884
|
+
ok: true,
|
|
3885
|
+
config: {
|
|
3886
|
+
groupCount: CONFIG.groupCount,
|
|
3887
|
+
groupSize: CONFIG.groupSize,
|
|
3888
|
+
groupIds: typeof this.pool.listGroupIds === 'function' ? this.pool.listGroupIds() : [],
|
|
3889
|
+
gatewayHost: CONFIG.gatewayHost,
|
|
3890
|
+
portGateway: CONFIG.portGateway,
|
|
3891
|
+
portStudy: CONFIG.portStudy
|
|
3892
|
+
}
|
|
3893
|
+
});
|
|
3894
|
+
});
|
|
3895
|
+
|
|
3896
|
+
// 组信息:用于前端展示与按组操作
|
|
3897
|
+
this.app.get('/api/groups', (req, res) => {
|
|
3898
|
+
const groupIds = typeof this.pool.listGroupIds === 'function' ? this.pool.listGroupIds() : [];
|
|
3899
|
+
res.json({
|
|
3900
|
+
ok: true,
|
|
3901
|
+
groupSize: CONFIG.groupSize,
|
|
3902
|
+
groupCount: CONFIG.groupCount,
|
|
3903
|
+
groups: groupIds.map((gid) => ({ gid, controllers: this.pool.listControllersInGroup(gid) }))
|
|
3904
|
+
});
|
|
3905
|
+
});
|
|
3906
|
+
|
|
3907
|
+
this.app.get('/api/groups/:gid/metrics', (req, res) => {
|
|
3908
|
+
const gid = String(req.params.gid || '').trim();
|
|
3909
|
+
const list = typeof this.pool.listControllersInGroup === 'function' ? this.pool.listControllersInGroup(gid) : [];
|
|
3910
|
+
if (!list.length) {
|
|
3911
|
+
res.status(404).json({ ok: false, error: 'group-not-found' });
|
|
3912
|
+
return;
|
|
3913
|
+
}
|
|
3914
|
+
const metrics = list
|
|
3915
|
+
.map((name) => this.pool.getByName(name))
|
|
3916
|
+
.filter(Boolean)
|
|
3917
|
+
.map((ctrl) => ctrl.metrics());
|
|
3918
|
+
res.json({ ok: true, gid, controllers: metrics });
|
|
3919
|
+
});
|
|
2139
3920
|
this.app.get('/api/system/ai/:name', (req, res) => {
|
|
2140
3921
|
const ctrl = this.pool.getByName(req.params.name);
|
|
2141
3922
|
if (!ctrl) {
|
|
@@ -2193,9 +3974,288 @@ class GatewayServer {
|
|
|
2193
3974
|
res.status(500).json({ ok: false, error: err.message });
|
|
2194
3975
|
}
|
|
2195
3976
|
});
|
|
3977
|
+
|
|
3978
|
+
// 站内递归抓取:同域 HTML + 可选 PDF 文本抽取(仅用于你有授权的网站)
|
|
3979
|
+
this.app.post('/api/corpus/crawl', async (req, res) => {
|
|
3980
|
+
try {
|
|
3981
|
+
const { startUrl, options, ingest, groupId, source } = req.body || {};
|
|
3982
|
+
const url = String(startUrl || '').trim();
|
|
3983
|
+
if (!url) {
|
|
3984
|
+
res.status(400).json({ ok: false, error: 'startUrl required' });
|
|
3985
|
+
return;
|
|
3986
|
+
}
|
|
3987
|
+
const runtime = this.pool.getActive().runtime;
|
|
3988
|
+
const result = await runtime.onlineLookup(url, { mode: 'crawl', crawl: { startUrl: url, ...(options || {}) } });
|
|
3989
|
+
|
|
3990
|
+
let ingested = null;
|
|
3991
|
+
if (ingest) {
|
|
3992
|
+
const docs = (result?.pages || [])
|
|
3993
|
+
.filter((p) => p && p.text && String(p.text).trim())
|
|
3994
|
+
.map((p) => ({
|
|
3995
|
+
text: String(p.text),
|
|
3996
|
+
source: String(source || p.url || 'crawl')
|
|
3997
|
+
}));
|
|
3998
|
+
if (docs.length) {
|
|
3999
|
+
const details = [];
|
|
4000
|
+
for (const doc of docs) {
|
|
4001
|
+
const r = groupId ? await this.pool.ingestDocumentToGroup(String(groupId), doc) : await this.pool.ingestDocument(doc);
|
|
4002
|
+
details.push({ source: doc.source, result: r });
|
|
4003
|
+
}
|
|
4004
|
+
ingested = { docs: docs.length, details: details.slice(0, 20) };
|
|
4005
|
+
} else {
|
|
4006
|
+
ingested = { docs: 0 };
|
|
4007
|
+
}
|
|
4008
|
+
}
|
|
4009
|
+
|
|
4010
|
+
res.json({ ok: true, result, ingested });
|
|
4011
|
+
} catch (err) {
|
|
4012
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4013
|
+
}
|
|
4014
|
+
});
|
|
4015
|
+
|
|
4016
|
+
// 在线搜索配置(运行时开关 + endpoint 库)
|
|
4017
|
+
this.app.get('/api/search/config', (req, res) => {
|
|
4018
|
+
try {
|
|
4019
|
+
const runtime = this.pool.getActive().runtime;
|
|
4020
|
+
res.json({ ok: true, config: runtime.getSearchConfig() });
|
|
4021
|
+
} catch (err) {
|
|
4022
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4023
|
+
}
|
|
4024
|
+
});
|
|
4025
|
+
this.app.put('/api/search/config', (req, res) => {
|
|
4026
|
+
try {
|
|
4027
|
+
const runtime = this.pool.getActive().runtime;
|
|
4028
|
+
const next = runtime.setSearchConfig(req.body || {});
|
|
4029
|
+
res.json({ ok: true, config: next });
|
|
4030
|
+
} catch (err) {
|
|
4031
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4032
|
+
}
|
|
4033
|
+
});
|
|
4034
|
+
this.app.post('/api/search/endpoints/add', (req, res) => {
|
|
4035
|
+
try {
|
|
4036
|
+
const url = String(req.body?.url || '').trim();
|
|
4037
|
+
if (!url) {
|
|
4038
|
+
res.status(400).json({ ok: false, error: 'url required' });
|
|
4039
|
+
return;
|
|
4040
|
+
}
|
|
4041
|
+
const runtime = this.pool.getActive().runtime;
|
|
4042
|
+
const cfg = runtime.getSearchConfig();
|
|
4043
|
+
const endpoints = Array.from(new Set([...(cfg.endpoints || []), url]));
|
|
4044
|
+
const next = runtime.setSearchConfig({ endpoints });
|
|
4045
|
+
res.json({ ok: true, config: next });
|
|
4046
|
+
} catch (err) {
|
|
4047
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4048
|
+
}
|
|
4049
|
+
});
|
|
4050
|
+
this.app.post('/api/search/endpoints/remove', (req, res) => {
|
|
4051
|
+
try {
|
|
4052
|
+
const url = String(req.body?.url || '').trim();
|
|
4053
|
+
if (!url) {
|
|
4054
|
+
res.status(400).json({ ok: false, error: 'url required' });
|
|
4055
|
+
return;
|
|
4056
|
+
}
|
|
4057
|
+
const runtime = this.pool.getActive().runtime;
|
|
4058
|
+
const cfg = runtime.getSearchConfig();
|
|
4059
|
+
const endpoints = (cfg.endpoints || []).filter((x) => x !== url);
|
|
4060
|
+
const nextPatch = { endpoints };
|
|
4061
|
+
if (cfg.active === url) {
|
|
4062
|
+
nextPatch.active = endpoints[0] || '';
|
|
4063
|
+
}
|
|
4064
|
+
const next = runtime.setSearchConfig(nextPatch);
|
|
4065
|
+
res.json({ ok: true, config: next });
|
|
4066
|
+
} catch (err) {
|
|
4067
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4068
|
+
}
|
|
4069
|
+
});
|
|
4070
|
+
|
|
4071
|
+
// Tests 用例:运行时新增/列出/刷新(用于 RL 的 testsDir 词表刷新)
|
|
4072
|
+
this.app.get('/api/tests/list', (req, res) => {
|
|
4073
|
+
try {
|
|
4074
|
+
const runtime = this.pool.getActive().runtime;
|
|
4075
|
+
const testsDir = runtime?.config?.testsDir || path.join(__dirname, 'tests');
|
|
4076
|
+
let files = [];
|
|
4077
|
+
if (fs.existsSync(testsDir)) {
|
|
4078
|
+
files = fs.readdirSync(testsDir)
|
|
4079
|
+
.filter((f) => /\.txt$/i.test(f))
|
|
4080
|
+
.sort();
|
|
4081
|
+
}
|
|
4082
|
+
res.json({ ok: true, directory: testsDir, files });
|
|
4083
|
+
} catch (err) {
|
|
4084
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4085
|
+
}
|
|
4086
|
+
});
|
|
4087
|
+
|
|
4088
|
+
this.app.post('/api/tests/case', (req, res) => {
|
|
4089
|
+
try {
|
|
4090
|
+
const nameRaw = String(req.body?.name || '').trim();
|
|
4091
|
+
const content = String(req.body?.content || '');
|
|
4092
|
+
if (!nameRaw) {
|
|
4093
|
+
res.status(400).json({ ok: false, error: 'name required' });
|
|
4094
|
+
return;
|
|
4095
|
+
}
|
|
4096
|
+
const safeName = nameRaw.replace(/[^a-zA-Z0-9._-]+/g, '_');
|
|
4097
|
+
const filename = safeName.toLowerCase().endsWith('.txt') ? safeName : `${safeName}.txt`;
|
|
4098
|
+
const runtime = this.pool.getActive().runtime;
|
|
4099
|
+
const testsDir = runtime?.config?.testsDir || path.join(__dirname, 'tests');
|
|
4100
|
+
fs.mkdirSync(testsDir, { recursive: true });
|
|
4101
|
+
const filePath = path.join(testsDir, filename);
|
|
4102
|
+
fs.writeFileSync(filePath, content, 'utf8');
|
|
4103
|
+
res.json({ ok: true, file: filename, path: filePath });
|
|
4104
|
+
} catch (err) {
|
|
4105
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4106
|
+
}
|
|
4107
|
+
});
|
|
4108
|
+
|
|
4109
|
+
this.app.post('/api/tests/refresh', async (req, res) => {
|
|
4110
|
+
try {
|
|
4111
|
+
// 目标:让 RL 模块在运行时读取最新 tests 词表
|
|
4112
|
+
// 做法:调用 rl.refreshTests()(若存在),否则尝试重建 RL 实例
|
|
4113
|
+
const testsDir = this.pool.getActive().runtime?.config?.testsDir || path.join(__dirname, 'tests');
|
|
4114
|
+
let refreshed = false;
|
|
4115
|
+
if (this.rl && typeof this.rl.refreshTests === 'function') {
|
|
4116
|
+
await Promise.resolve(this.rl.refreshTests({ testsDir }));
|
|
4117
|
+
refreshed = true;
|
|
4118
|
+
} else if (this.rl && typeof this.rl.setTestsDir === 'function') {
|
|
4119
|
+
await Promise.resolve(this.rl.setTestsDir(testsDir));
|
|
4120
|
+
refreshed = true;
|
|
4121
|
+
} else {
|
|
4122
|
+
// 最保守方案:替换 RL learner(不影响主 runtime)
|
|
4123
|
+
this.rl = new ReinforcementLearner(this.pool, { testsDir });
|
|
4124
|
+
// 如果当前是启用状态,保持启用
|
|
4125
|
+
refreshed = true;
|
|
4126
|
+
}
|
|
4127
|
+
res.json({ ok: true, refreshed, testsDir });
|
|
4128
|
+
} catch (err) {
|
|
4129
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4130
|
+
}
|
|
4131
|
+
});
|
|
4132
|
+
// 反序列化导出:将当前图窗口反序列化为 Graph 并写入文件
|
|
4133
|
+
this.app.post('/api/export/graph', (req, res) => {
|
|
4134
|
+
try {
|
|
4135
|
+
const { seeds, radius, file } = req.body || {};
|
|
4136
|
+
const runtime = this.pool.getActive().runtime;
|
|
4137
|
+
const outFile = runtime.exportGraphToFile({ seeds, radius, file });
|
|
4138
|
+
// 返回文件内容,便于直接复制到 Go 侧
|
|
4139
|
+
const content = fs.readFileSync(outFile, 'utf8');
|
|
4140
|
+
res.json({ ok: true, file: path.basename(outFile), path: outFile, content });
|
|
4141
|
+
} catch (err) {
|
|
4142
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4143
|
+
}
|
|
4144
|
+
});
|
|
4145
|
+
|
|
4146
|
+
// 按组导出图:用于 BigSparkArray 时代分别取各工作组的图结构
|
|
4147
|
+
this.app.post('/api/export/graph/group', (req, res) => {
|
|
4148
|
+
try {
|
|
4149
|
+
const { groupId, seeds, radius, file } = req.body || {};
|
|
4150
|
+
const gid = String(groupId || '').trim();
|
|
4151
|
+
if (!gid) {
|
|
4152
|
+
res.status(400).json({ ok: false, error: 'groupId required' });
|
|
4153
|
+
return;
|
|
4154
|
+
}
|
|
4155
|
+
const list = typeof this.pool.listControllersInGroup === 'function' ? this.pool.listControllersInGroup(gid) : [];
|
|
4156
|
+
if (!list.length) {
|
|
4157
|
+
res.status(404).json({ ok: false, error: 'group-not-found' });
|
|
4158
|
+
return;
|
|
4159
|
+
}
|
|
4160
|
+
const ctrl = this.pool.getByName(list[0]);
|
|
4161
|
+
if (!ctrl) {
|
|
4162
|
+
res.status(404).json({ ok: false, error: 'controller-not-found' });
|
|
4163
|
+
return;
|
|
4164
|
+
}
|
|
4165
|
+
const runtime = ctrl.runtime;
|
|
4166
|
+
const outFile = runtime.exportGraphToFile({ seeds, radius, file });
|
|
4167
|
+
const content = fs.readFileSync(outFile, 'utf8');
|
|
4168
|
+
res.json({ ok: true, groupId: gid, file: path.basename(outFile), path: outFile, content });
|
|
4169
|
+
} catch (err) {
|
|
4170
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4171
|
+
}
|
|
4172
|
+
});
|
|
4173
|
+
|
|
4174
|
+
// Robots:运行时触发重训(重新 ingest robots 语料,并可选 enqueueStudy)
|
|
4175
|
+
this.app.post('/api/robots/retrain', async (req, res) => {
|
|
4176
|
+
try {
|
|
4177
|
+
const options = req.body || {};
|
|
4178
|
+
const runtime = this.pool.getActive().runtime;
|
|
4179
|
+
const docs = runtime.collectRobotsDocuments({
|
|
4180
|
+
limit: options.limit,
|
|
4181
|
+
offset: options.offset,
|
|
4182
|
+
shuffle: Boolean(options.shuffle),
|
|
4183
|
+
files: Array.isArray(options.files)
|
|
4184
|
+
? options.files
|
|
4185
|
+
: (typeof options.files === 'string' && options.files.trim() ? [options.files.trim()] : undefined)
|
|
4186
|
+
});
|
|
4187
|
+
if (!docs.length) {
|
|
4188
|
+
res.status(404).json({ ok: false, error: 'no-documents' });
|
|
4189
|
+
return;
|
|
4190
|
+
}
|
|
4191
|
+
let ingested = 0;
|
|
4192
|
+
let memes = 0;
|
|
4193
|
+
let edges = 0;
|
|
4194
|
+
let failedControllers = 0;
|
|
4195
|
+
const groups = typeof this.pool.listGroupIds === 'function' ? this.pool.listGroupIds() : [];
|
|
4196
|
+
for (const doc of docs) {
|
|
4197
|
+
// 默认按组分片:每个工作组 ingest 不同语料
|
|
4198
|
+
const key = String(doc?.source || doc?.file || doc?.id || '') + '|' + String(doc?.text || '').slice(0, 256);
|
|
4199
|
+
const gi = groups.length ? (hashStrSimple(key) % groups.length) : 0;
|
|
4200
|
+
const targetGroup = groups[gi] || groups[0];
|
|
4201
|
+
const results = targetGroup ? await this.pool.ingestDocumentToGroup(targetGroup, doc) : await this.pool.ingestDocument(doc);
|
|
4202
|
+
ingested += 1;
|
|
4203
|
+
const list = Array.isArray(results) ? results : [results];
|
|
4204
|
+
for (const item of list) {
|
|
4205
|
+
if (item && item.ok !== false) {
|
|
4206
|
+
memes += Number(item.memes || 0) || 0;
|
|
4207
|
+
edges += Number(item.edges || 0) || 0;
|
|
4208
|
+
} else {
|
|
4209
|
+
failedControllers += 1;
|
|
4210
|
+
}
|
|
4211
|
+
}
|
|
4212
|
+
if (options.enqueueStudy && this.study) {
|
|
4213
|
+
this.study.enqueueDocument(doc);
|
|
4214
|
+
}
|
|
4215
|
+
}
|
|
4216
|
+
res.json({ ok: true, ingested, memes, edges, failedControllers });
|
|
4217
|
+
} catch (err) {
|
|
4218
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4219
|
+
}
|
|
4220
|
+
});
|
|
2196
4221
|
this.app.get('/robots/list', (req, res) => {
|
|
2197
4222
|
try {
|
|
2198
4223
|
const runtime = this.pool.getActive().runtime;
|
|
4224
|
+
// MemeBarrier 控制与统计端点
|
|
4225
|
+
this.app.post('/api/memebarrier/start', (req, res) => {
|
|
4226
|
+
try {
|
|
4227
|
+
if (!this.barrier) {
|
|
4228
|
+
this.barrier = new MemeBarrier(this.pool.getActive().runtime, {
|
|
4229
|
+
maliciousThreshold: (req.body?.maliciousThreshold ?? modelDefaults.maliciousThreshold)
|
|
4230
|
+
});
|
|
4231
|
+
}
|
|
4232
|
+
if (req.body && typeof req.body.maliciousThreshold === 'number') {
|
|
4233
|
+
this.barrier.maliciousThreshold = req.body.maliciousThreshold;
|
|
4234
|
+
}
|
|
4235
|
+
this.barrier.start();
|
|
4236
|
+
res.json({ ok: true, running: true, threshold: this.barrier.maliciousThreshold });
|
|
4237
|
+
} catch (err) {
|
|
4238
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4239
|
+
}
|
|
4240
|
+
});
|
|
4241
|
+
this.app.post('/api/memebarrier/stop', (req, res) => {
|
|
4242
|
+
try {
|
|
4243
|
+
if (this.barrier) {
|
|
4244
|
+
this.barrier.stop();
|
|
4245
|
+
}
|
|
4246
|
+
res.json({ ok: true, running: false });
|
|
4247
|
+
} catch (err) {
|
|
4248
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4249
|
+
}
|
|
4250
|
+
});
|
|
4251
|
+
this.app.get('/api/memebarrier/stats', (req, res) => {
|
|
4252
|
+
try {
|
|
4253
|
+
const stats = this.barrier ? this.barrier.getStats() : null;
|
|
4254
|
+
res.json({ ok: true, stats });
|
|
4255
|
+
} catch (err) {
|
|
4256
|
+
res.status(500).json({ ok: false, error: err.message });
|
|
4257
|
+
}
|
|
4258
|
+
});
|
|
2199
4259
|
res.json({
|
|
2200
4260
|
ok: true,
|
|
2201
4261
|
directory: runtime.config?.robotsDir,
|
|
@@ -2223,9 +4283,14 @@ class GatewayServer {
|
|
|
2223
4283
|
return;
|
|
2224
4284
|
}
|
|
2225
4285
|
const summary = [];
|
|
4286
|
+
const groups = typeof this.pool.listGroupIds === 'function' ? this.pool.listGroupIds() : [];
|
|
2226
4287
|
for (const doc of docs) {
|
|
2227
|
-
const
|
|
2228
|
-
const
|
|
4288
|
+
const key = String(doc?.source || doc?.file || doc?.id || '') + '|' + String(doc?.text || '').slice(0, 256);
|
|
4289
|
+
const gi = groups.length ? (hashStrSimple(key) % groups.length) : 0;
|
|
4290
|
+
const targetGroup = groups[gi] || groups[0];
|
|
4291
|
+
const results = targetGroup ? await this.pool.ingestDocumentToGroup(targetGroup, doc) : await this.pool.ingestDocument(doc);
|
|
4292
|
+
const list = Array.isArray(results) ? results : [results];
|
|
4293
|
+
const aggregated = list.reduce(
|
|
2229
4294
|
(acc, item) => {
|
|
2230
4295
|
if (item && item.ok !== false) {
|
|
2231
4296
|
const memes = Number(item.memes || 0);
|
|
@@ -2264,9 +4329,35 @@ class GatewayServer {
|
|
|
2264
4329
|
});
|
|
2265
4330
|
}
|
|
2266
4331
|
|
|
2267
|
-
|
|
2268
|
-
|
|
2269
|
-
|
|
4332
|
+
_onDialogCompleted(result, payload) {
|
|
4333
|
+
try {
|
|
4334
|
+
this.dialogCounters.total += 1;
|
|
4335
|
+
const total = this.dialogCounters.total;
|
|
4336
|
+
if (!this.dialogLearningEnabled) {
|
|
4337
|
+
return;
|
|
4338
|
+
}
|
|
4339
|
+
if (!this.rlDisabled && (total - this.dialogCounters.lastRL >= this.dialogThresholds.rlEvery)) {
|
|
4340
|
+
this.dialogCounters.lastRL = total;
|
|
4341
|
+
Promise.resolve().then(() => this.rl.learn(1)).catch((e) => console.warn('[Learn] RL trigger failed:', e.message));
|
|
4342
|
+
}
|
|
4343
|
+
if (!this.advDisabled && (total - this.dialogCounters.lastADV >= this.dialogThresholds.advEvery)) {
|
|
4344
|
+
this.dialogCounters.lastADV = total;
|
|
4345
|
+
const text = typeof payload?.text === 'string' ? payload.text : (Array.isArray(payload?.tokens) ? payload.tokens.join(' ') : '');
|
|
4346
|
+
const samples = [];
|
|
4347
|
+
if (text && text.trim()) samples.push(text.trim());
|
|
4348
|
+
if (result?.reply && typeof result.reply === 'string') samples.push(result.reply);
|
|
4349
|
+
if (samples.length) {
|
|
4350
|
+
Promise.resolve().then(() => this.adv.attackAndDefend(samples)).catch((e) => console.warn('[Learn] ADV trigger failed:', e.message));
|
|
4351
|
+
}
|
|
4352
|
+
}
|
|
4353
|
+
} catch (e) {
|
|
4354
|
+
console.warn('[Learn] dialog hook error:', e.message);
|
|
4355
|
+
}
|
|
4356
|
+
}
|
|
4357
|
+
|
|
4358
|
+
listen(port, host = '127.0.0.1') {
|
|
4359
|
+
this.app.listen(port, host, () => {
|
|
4360
|
+
console.log(`[Gateway] listening on ${host}:${port}`);
|
|
2270
4361
|
});
|
|
2271
4362
|
}
|
|
2272
4363
|
}
|
|
@@ -2289,8 +4380,24 @@ const bootstrap = async () => {
|
|
|
2289
4380
|
study.start();
|
|
2290
4381
|
const snapshots = new SnapshotManager(pool.getActive().runtime, CONFIG.snapshotDir);
|
|
2291
4382
|
const shards = new ShardManager(pool);
|
|
2292
|
-
const spark = new
|
|
2293
|
-
|
|
4383
|
+
const spark = new BigSparkArray(pool, shards, { groupIds: pool.listGroupIds() });
|
|
4384
|
+
// Try auto-restore latest snapshot to skip warmup/pretraining
|
|
4385
|
+
let __restoredFromSnapshot = false;
|
|
4386
|
+
try {
|
|
4387
|
+
const list = snapshots.list().sort((a, b) => b.localeCompare(a));
|
|
4388
|
+
if (list.length > 0) {
|
|
4389
|
+
await snapshots.restore(list[0]);
|
|
4390
|
+
__restoredFromSnapshot = true;
|
|
4391
|
+
console.log(`[Bootstrap] Restored latest snapshot: ${list[0]}`);
|
|
4392
|
+
// Keep standby/validation in sync with serving
|
|
4393
|
+
const snap = pool.getActive().snapshot();
|
|
4394
|
+
await pool.standby.applySnapshot(snap);
|
|
4395
|
+
await pool.validation.applySnapshot(snap);
|
|
4396
|
+
}
|
|
4397
|
+
} catch (err) {
|
|
4398
|
+
console.warn('[Bootstrap] Snapshot restore skipped:', err.message);
|
|
4399
|
+
}
|
|
4400
|
+
if (CONFIG.robotsAutoload && CONFIG.robotsWarmupLimit > 0 && !__restoredFromSnapshot) {
|
|
2294
4401
|
try {
|
|
2295
4402
|
const preloadDocs = pool.getActive().runtime.collectRobotsDocuments({
|
|
2296
4403
|
limit: CONFIG.robotsWarmupLimit,
|
|
@@ -2298,8 +4405,13 @@ const bootstrap = async () => {
|
|
|
2298
4405
|
});
|
|
2299
4406
|
if (preloadDocs.length) {
|
|
2300
4407
|
console.log(`[Bootstrap] Preloading ${preloadDocs.length} robots documents...`);
|
|
2301
|
-
|
|
2302
|
-
|
|
4408
|
+
const groups = pool.listGroupIds();
|
|
4409
|
+
for (let i = 0; i < preloadDocs.length; i++) {
|
|
4410
|
+
const doc = preloadDocs[i];
|
|
4411
|
+
const key = String(doc?.source || doc?.file || doc?.id || '') + '|' + String(doc?.text || '').slice(0, 256);
|
|
4412
|
+
const idx = groups.length ? (hashStrSimple(key) % groups.length) : 0;
|
|
4413
|
+
const targetGroup = groups[idx] || groups[0] || 'G1';
|
|
4414
|
+
await pool.ingestDocumentToGroup(targetGroup, { ...doc, source: doc?.source || `robots:${doc?.file || 'unknown'}` });
|
|
2303
4415
|
}
|
|
2304
4416
|
console.log('[Bootstrap] Robots corpus preload completed.');
|
|
2305
4417
|
} else {
|
|
@@ -2309,10 +4421,74 @@ const bootstrap = async () => {
|
|
|
2309
4421
|
console.warn('[Bootstrap] Robots preload skipped:', err.message);
|
|
2310
4422
|
}
|
|
2311
4423
|
}
|
|
2312
|
-
|
|
2313
|
-
|
|
2314
|
-
|
|
2315
|
-
|
|
4424
|
+
|
|
4425
|
+
// 额外:将 tests 目录用例按哈希分片到不同 AI,形成差异化“训练集”
|
|
4426
|
+
try {
|
|
4427
|
+
const testsDir = path.join(__dirname, 'tests');
|
|
4428
|
+
if (fs.existsSync(testsDir)) {
|
|
4429
|
+
const files = fs.readdirSync(testsDir).filter((f) => /\.txt$/i.test(f));
|
|
4430
|
+
if (files.length) {
|
|
4431
|
+
const groups = pool.listGroupIds();
|
|
4432
|
+
for (const f of files) {
|
|
4433
|
+
const full = path.join(testsDir, f);
|
|
4434
|
+
const text = fs.readFileSync(full, 'utf8');
|
|
4435
|
+
const key = `tests:${f}`;
|
|
4436
|
+
const idx = groups.length ? (hashStrSimple(key) % groups.length) : 0;
|
|
4437
|
+
const targetGroup = groups[idx] || groups[0] || 'G1';
|
|
4438
|
+
await pool.ingestDocumentToGroup(targetGroup, { text, source: key });
|
|
4439
|
+
}
|
|
4440
|
+
console.log(`[Bootstrap] Sharded tests corpus into ${groups.length} groups.`);
|
|
4441
|
+
}
|
|
4442
|
+
}
|
|
4443
|
+
} catch (err) {
|
|
4444
|
+
console.warn('[Bootstrap] Tests sharded preload skipped:', err.message);
|
|
4445
|
+
}
|
|
4446
|
+
// 初始化学习模块
|
|
4447
|
+
const rl = new ReinforcementLearner(pool, { testsDir: path.join(__dirname, 'tests') });
|
|
4448
|
+
const adv = new AdversarialLearner(pool, {});
|
|
4449
|
+
const gateway = new GatewayServer(pool, shards, snapshots, rotation, redisSync, study, spark, { rl, adv });
|
|
4450
|
+
gateway.listen(CONFIG.portGateway, CONFIG.gatewayHost);
|
|
4451
|
+
// 轻量启动:尝试执行一次强化学习与对抗学习以预热
|
|
4452
|
+
(async () => {
|
|
4453
|
+
// RL 预热(可通过 CLI 关闭)
|
|
4454
|
+
if (!CONFIG.disableLearning && !CONFIG.disableRL) {
|
|
4455
|
+
try {
|
|
4456
|
+
await rl.learn(1);
|
|
4457
|
+
} catch (e) {
|
|
4458
|
+
console.warn('[Bootstrap] RL warmup failed:', e.message);
|
|
4459
|
+
// 预热失败后禁用 RL,后续触发与端点均短路
|
|
4460
|
+
try { gateway.rlDisabled = true; } catch (_) {}
|
|
4461
|
+
}
|
|
4462
|
+
} else {
|
|
4463
|
+
try { gateway.rlDisabled = true; } catch (_) {}
|
|
4464
|
+
console.log('[Bootstrap] RL disabled by config');
|
|
4465
|
+
}
|
|
4466
|
+
// ADV 预热(可通过 CLI 关闭)
|
|
4467
|
+
if (!CONFIG.disableLearning && !CONFIG.disableADV) {
|
|
4468
|
+
try {
|
|
4469
|
+
// 从 robots 目录挑选少量样本作为对抗试探
|
|
4470
|
+
const runtime = pool.getActive().runtime;
|
|
4471
|
+
const docs = runtime.collectRobotsDocuments({ limit: 3, shuffle: true });
|
|
4472
|
+
const samples = docs.map(d => d.text).filter(Boolean).slice(0, 3);
|
|
4473
|
+
if (samples.length) {
|
|
4474
|
+
await adv.attackAndDefend(samples);
|
|
4475
|
+
}
|
|
4476
|
+
} catch (e) {
|
|
4477
|
+
console.warn('[Bootstrap] Adversarial warmup failed:', e.message);
|
|
4478
|
+
}
|
|
4479
|
+
} else {
|
|
4480
|
+
try { gateway.advDisabled = true; } catch (_) {}
|
|
4481
|
+
console.log('[Bootstrap] ADV disabled by config');
|
|
4482
|
+
}
|
|
4483
|
+
})();
|
|
4484
|
+
process.on('SIGINT', async () => {
|
|
4485
|
+
console.log('Received SIGINT, saving snapshot...');
|
|
4486
|
+
try {
|
|
4487
|
+
const file = await snapshots.create('autosave');
|
|
4488
|
+
console.log(`Snapshot saved: ${path.basename(file)}`);
|
|
4489
|
+
} catch (err) {
|
|
4490
|
+
console.warn('[Shutdown] Failed to save snapshot:', err.message);
|
|
4491
|
+
}
|
|
2316
4492
|
process.exit(0);
|
|
2317
4493
|
});
|
|
2318
4494
|
};
|