079project 4.0.0 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/forwarder.js CHANGED
@@ -1104,13 +1104,6 @@ function updateParamSliders(params) {
1104
1104
  `);
1105
1105
  });
1106
1106
 
1107
- // 目标端口列表
1108
- const AI_PORTS = [
1109
- process.env.AI_PORT_A || process.argv[3],
1110
- process.env.AI_PORT_B || process.argv[4],
1111
- process.env.AI_PORT_C || process.argv[5]
1112
- ];
1113
- const Study_Port=process.env.AI_STUDY_PORT || process.argv[6]; // 学习模块端口
1114
1107
 
1115
1108
  const systemStats = {
1116
1109
  requestsTotal: 0,
@@ -1121,7 +1114,167 @@ const systemStats = {
1121
1114
  aiResponseTimes: { [process.argv[3]]: [], [process.argv[4]]: [], [process.argv[5]]: [] },
1122
1115
  lastErrors: []
1123
1116
  };
1117
+ // ...existing code...
1118
+
1119
+ // ========== 分片/冷-热池调度(forwarder 层) ==========
1120
+ class ShardDescriptor {
1121
+ /**
1122
+ * @param {Object} opts
1123
+ * @param {string} opts.id 逻辑ID(如 "text_base", "law_1")
1124
+ * @param {number[]} opts.ports 该 shard 对应的后端端口列表(当前版本:本 forwarder 下的某个子集)
1125
+ * @param {number[]} [opts.embedding] 语义中心向量(可选,先空)
1126
+ * @param {string[]} [opts.tags] 领域标签,如 ['code','cn','law']
1127
+ */
1128
+ constructor(opts) {
1129
+ this.id = opts.id;
1130
+ this.ports = opts.ports || [];
1131
+ this.embedding = Array.isArray(opts.embedding) ? opts.embedding : null;
1132
+ this.tags = Array.isArray(opts.tags) ? opts.tags : [];
1133
+ this.lastUsedTs = 0;
1134
+ this.usageCount = 0;
1135
+ this.loaded = true; // 第1轮简化:都视为“热”,仅做路由选择
1136
+ }
1137
+
1138
+ touch() {
1139
+ this.lastUsedTs = Date.now();
1140
+ this.usageCount++;
1141
+ }
1142
+ }
1143
+
1144
+ // 非TF的极简 embedding:词袋hash到固定维度
1145
+ function hashStrSimple(str, seed = 1315423911) {
1146
+ let h = seed >>> 0;
1147
+ for (let i = 0; i < str.length; i++) {
1148
+ h ^= ((h << 5) + str.charCodeAt(i) + (h >>> 2)) >>> 0;
1149
+ }
1150
+ return h >>> 0;
1151
+ }
1152
+
1153
+ function textToMiniEmbedding(text, dim = 64) {
1154
+ const vec = new Float32Array(dim);
1155
+ const toks = basicClean(text).toLowerCase().split(/[^a-z0-9_\-\u4e00-\u9fa5]+/).filter(Boolean);
1156
+ if (!toks.length) return Array.from(vec);
1157
+ for (const t of toks) {
1158
+ const h = hashStrSimple(t);
1159
+ const idx = h % dim;
1160
+ vec[idx] += 1;
1161
+ }
1162
+ // L2 normalize
1163
+ let n2 = 0; for (let i = 0; i < dim; i++) n2 += vec[i] * vec[i];
1164
+ n2 = Math.sqrt(n2) || 1;
1165
+ for (let i = 0; i < dim; i++) vec[i] /= n2;
1166
+ return Array.from(vec);
1167
+ }
1168
+
1169
+ function cosineSim(a, b) {
1170
+ if (!a || !b || a.length !== b.length) return 0;
1171
+ let dot = 0, na = 0, nb = 0;
1172
+ for (let i = 0; i < a.length; i++) {
1173
+ dot += a[i] * b[i];
1174
+ na += a[i] * a[i];
1175
+ nb += b[i] * b[i];
1176
+ }
1177
+ if (!na || !nb) return 0;
1178
+ return dot / Math.sqrt(na * nb);
1179
+ }
1180
+
1181
+ class ShardManager {
1182
+ constructor(allPorts) {
1183
+ this.shards = new Map(); // id -> ShardDescriptor
1184
+ this.portToShard = new Map(); // port -> shardId(方便反查)
1185
+ this.dim = 64;
1186
+
1187
+ // 初始策略:把当前 AI_PORTS 按顺序平均分成几组,构成 “模型组”
1188
+ // 例如 3 个端口 => 一个 shard;12 个端口 => 3~4 个 shard
1189
+ const ports = allPorts.filter(p => !!p).map(p => Number(p));
1190
+ const N = ports.length;
1191
+ if (!N) return;
1192
+ const targetShardCount = Math.min(4, Math.max(1, Math.floor(N / 3))) || 1;
1193
+ const shardSize = Math.max(1, Math.floor(N / targetShardCount));
1194
+ let idx = 0;
1195
+ for (let s = 0; s < targetShardCount; s++) {
1196
+ const slice = ports.slice(idx, idx + shardSize);
1197
+ idx += shardSize;
1198
+ if (!slice.length) break;
1199
+ const id = `shard_${s}`;
1200
+ const desc = new ShardDescriptor({
1201
+ id,
1202
+ ports: slice,
1203
+ embedding: null,
1204
+ tags: [] // 可以后续通过API补充
1205
+ });
1206
+ this.shards.set(id, desc);
1207
+ for (const p of slice) this.portToShard.set(p, id);
1208
+ }
1209
+ }
1124
1210
 
1211
+ listShards() {
1212
+ return Array.from(this.shards.values()).map(s => ({
1213
+ id: s.id,
1214
+ ports: s.ports,
1215
+ tags: s.tags,
1216
+ loaded: s.loaded,
1217
+ lastUsedTs: s.lastUsedTs,
1218
+ usageCount: s.usageCount
1219
+ }));
1220
+ }
1221
+
1222
+ // 手工更新某个 shard 的语义中心 + 标签
1223
+ updateShardMeta(id, { embedding, tags } = {}) {
1224
+ const s = this.shards.get(id);
1225
+ if (!s) return false;
1226
+ if (Array.isArray(embedding)) s.embedding = embedding.slice();
1227
+ if (Array.isArray(tags)) s.tags = tags.slice();
1228
+ return true;
1229
+ }
1230
+
1231
+ /**
1232
+ * 根据当前对话 embedding + 可选 tags,选出本轮要用的 shard 列表
1233
+ * @param {number[]} queryEmb
1234
+ * @param {Object} opt
1235
+ * @param {number} opt.topK 选多少个 shard
1236
+ * @param {string[]} [opt.hints] 额外提示(如 'code','zh')
1237
+ */
1238
+ selectShards(queryEmb, { topK = 2, hints = [] } = {}) {
1239
+ const entries = Array.from(this.shards.values()).filter(s => s.loaded && s.ports.length);
1240
+ if (!entries.length) return [];
1241
+
1242
+ const scores = entries.map(s => {
1243
+ let sim = 0;
1244
+ if (s.embedding) sim = cosineSim(queryEmb, s.embedding);
1245
+ let tagBonus = 0;
1246
+ if (hints && hints.length && s.tags && s.tags.length) {
1247
+ const inter = s.tags.filter(t => hints.includes(t));
1248
+ tagBonus = inter.length ? 0.1 * inter.length : 0;
1249
+ }
1250
+ // 加一点近期使用度的温度
1251
+ const usageBoost = Math.log(1 + s.usageCount) * 0.01;
1252
+ return { shard: s, score: sim + tagBonus + usageBoost };
1253
+ });
1254
+ scores.sort((a, b) => b.score - a.score);
1255
+ const out = scores.slice(0, Math.max(1, topK)).map(x => x.shard);
1256
+ const now = Date.now();
1257
+ for (const s of out) { s.lastUsedTs = now; s.usageCount++; }
1258
+ return out;
1259
+ }
1260
+
1261
+ // 当前端口属于哪个 shard(用于监控)
1262
+ shardOfPort(port) {
1263
+ const id = this.portToShard.get(Number(port));
1264
+ return id || null;
1265
+ }
1266
+ }
1267
+
1268
+ // 初始化 ShardManager
1269
+ const AI_PORTS = [
1270
+ process.env.AI_PORT_A || process.argv[3],
1271
+ process.env.AI_PORT_B || process.argv[4],
1272
+ process.env.AI_PORT_C || process.argv[5]
1273
+ ];
1274
+ const Study_Port = process.env.AI_STUDY_PORT || process.argv[6];
1275
+
1276
+ const shardManager = new ShardManager(AI_PORTS);
1277
+ // ========== 冷/热池调度结束 ==========
1125
1278
  // 新增:统一从各后端响应中提取文本
1126
1279
  function extractText(resp) {
1127
1280
  if (!resp) return '';
@@ -1169,28 +1322,30 @@ function perturb(arr) {
1169
1322
  */
1170
1323
  // 修改 requestAI,返回 null 表示离线
1171
1324
  // 修改 requestAI 函数,增加重试和超时处理
1172
- async function requestAI(port, message, retries = 3) {
1325
+ async function requestAI(port, message, retries = 3, shardId = null) {
1173
1326
  const url = `http://localhost:${port}/api/chat`;
1174
1327
  for (let attempt = 1; attempt <= retries; attempt++) {
1175
1328
  try {
1176
1329
  const start = Date.now();
1177
- const response = await axios.post(url, { message }, { timeout: 10000 }); // 10秒超时
1330
+ const response = await axios.post(url, { message }, { timeout: 10000 });
1178
1331
  const latency = Date.now() - start;
1179
1332
  systemStats.aiResponseTimes[port] = systemStats.aiResponseTimes[port] || [];
1180
1333
  systemStats.aiResponseTimes[port].push(latency);
1334
+ // 这里可以将 shardId 写入统计日志,但先不动 API
1181
1335
  return response.data;
1182
1336
  } catch (error) {
1183
- console.warn(`[WARN] 请求 AI 实例 ${port} 失败 (尝试 ${attempt}/${retries}): ${error.message}`);
1337
+ console.warn(`[WARN] 请求 AI 实例 ${port} (shard=${shardId || 'unknown'}) 失败 (尝试 ${attempt}/${retries}): ${error.message}`);
1184
1338
  if (attempt === retries) {
1185
1339
  systemStats.lastErrors.push({ port, error: error.message });
1186
- return null; // 返回 null 表示服务不可用
1340
+ return null;
1187
1341
  }
1188
- await new Promise(resolve => setTimeout(resolve, 1000)); // 重试前等待 1 秒
1342
+ await new Promise(resolve => setTimeout(resolve, 1000));
1189
1343
  }
1190
1344
  }
1191
1345
  }
1192
1346
 
1193
1347
 
1348
+ // ...existing code...
1194
1349
  app.post('/api/chat', async (req, res) => {
1195
1350
  const { message } = req.body;
1196
1351
  if (!message) return res.status(400).json({ error: 'No message' });
@@ -1205,27 +1360,49 @@ app.post('/api/chat', async (req, res) => {
1205
1360
  let results = [];
1206
1361
 
1207
1362
  try {
1208
- // 星火阵列多轮交互(每一轮并发请求3个AI)
1363
+ // === 核心:为当前请求生成embedding并选择合适 shard ===
1364
+ const queryEmb = textToMiniEmbedding(message, shardManager.dim);
1365
+ // 可加一点简单的“领域 hint”:例如包含 code / law / zh 等关键词
1366
+ const hints = [];
1367
+ const lower = message.toLowerCase();
1368
+ if (/[{};()=]/.test(message) || /code|function|class|import|def /.test(lower)) hints.push('code');
1369
+ if (/[,。?!]/.test(message) || /的|了|吗/.test(message)) hints.push('zh');
1370
+ // 暂时不细分,topK=2
1371
+ const selectedShards = shardManager.selectShards(queryEmb, { topK: 2, hints });
1372
+
1373
+ // 从被选中的 shard 中收集端口
1374
+ let candidatePorts = [];
1375
+ for (const s of selectedShards) candidatePorts.push(...s.ports);
1376
+ // 去重复
1377
+ candidatePorts = Array.from(new Set(candidatePorts));
1378
+
1379
+ // 如果还为空,兜底用全部 AI_PORTS
1380
+ if (!candidatePorts.length) {
1381
+ candidatePorts = AI_PORTS.filter(p => !!p);
1382
+ }
1383
+
1384
+ // 星火阵列多轮交互(每一轮并发请求若干 AI)
1209
1385
  for (let round = 0; round <= communicateCount; round++) {
1386
+ const msgArr = inputs.map(arr => (arr || words).join(' '));
1387
+ // 对不超过 candidatePorts.length 的前几个输入进行并发请求
1210
1388
  results = await Promise.all(
1211
- AI_PORTS.map((port, i) => requestAI(port, (inputs[i] || words).join(' ')))
1389
+ candidatePorts.map((port, idx) =>
1390
+ requestAI(port, msgArr[idx % msgArr.length], 3, shardManager.shardOfPort(port))
1391
+ )
1212
1392
  );
1213
1393
 
1214
- // 下一轮输入为本轮输出分词(先提取文本,再 split)
1215
1394
  if (round < communicateCount) {
1216
- console.log(results);
1217
1395
  inputs = results.map(r => extractText(r).toLowerCase().split(/\s+/).filter(Boolean));
1218
1396
  }
1219
1397
  }
1220
1398
 
1221
- // 只保留有效响应(提取为字符串)
1222
1399
  const texts = results.map(extractText).filter(t => typeof t === 'string' && t.length > 0);
1223
1400
  if (texts.length === 0) {
1224
1401
  systemStats.requestsFailed++;
1225
1402
  return res.status(502).json({ error: '所有AI实例均不可用或无响应' });
1226
1403
  }
1227
1404
 
1228
- // 统计词频
1405
+ // ...后面词频融合 + SERIALIZER_API 部分保持不变...
1229
1406
  const freq = {};
1230
1407
  texts.forEach(t => t.split(/\s+/).forEach(w => { if (w) freq[w] = (freq[w] || 0) + 1; }));
1231
1408
  const sorted = Object.entries(freq).sort((a, b) => b[1] - a[1]);
@@ -1247,7 +1424,18 @@ app.post('/api/chat', async (req, res) => {
1247
1424
  res.status(500).json({ error: err.message });
1248
1425
  }
1249
1426
  });
1427
+ app.get('/api/shards', (req, res) => {
1428
+ res.json({ ok: true, shards: shardManager.listShards() });
1429
+ });
1250
1430
 
1431
+ // 更新某个 shard 的标签/embedding(embedding 可手工填或从文件导入)
1432
+ app.post('/api/shards/:id/meta', (req, res) => {
1433
+ const { id } = req.params;
1434
+ const { embedding, tags } = req.body || {};
1435
+ const ok = shardManager.updateShardMeta(id, { embedding, tags });
1436
+ if (!ok) return res.status(404).json({ ok: false, error: 'shard not found' });
1437
+ res.json({ ok: true });
1438
+ });
1251
1439
  //==============================================================================
1252
1440
  // 新增功能:模型参数调节
1253
1441
  //==============================================================================