079project 8.0.0 → 9.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +165 -0
- package/README.en.md +81 -1
- package/README.md +85 -1
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/dump.rdb +0 -0
- package/groupWorker.cjs +253 -0
- package/inferenceWorker.cjs +94 -0
- package/main.cjs +1263 -173
- package/mainFailedOfJing1Xi4Hua4Zhi4Duan3Yu3.cjs +6320 -0
- package/optimization.cjs +720 -0
- package/package.json +3 -2
- package/test_automatic/answer.csv +401 -0
- package/test_automatic/generate_daily_qa.py +645 -0
- package/test_automatic/question.csv +401 -0
- package/test_automatic.cjs +441 -0
|
@@ -0,0 +1,441 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const http = require('http');
|
|
6
|
+
const { spawn } = require('child_process');
|
|
7
|
+
|
|
8
|
+
const DEFAULT_HOST = '127.0.0.1';
|
|
9
|
+
const DEFAULT_PORT = 5080;
|
|
10
|
+
const DEFAULT_MAIN = path.join(__dirname, 'main.cjs');
|
|
11
|
+
const DEFAULT_Q = path.join(__dirname, 'test_automatic', 'question.csv');
|
|
12
|
+
const DEFAULT_A = path.join(__dirname, 'test_automatic', 'answer.csv');
|
|
13
|
+
|
|
14
|
+
const parseArgs = (argv) => {
|
|
15
|
+
const out = { _: [] };
|
|
16
|
+
for (const item of argv) {
|
|
17
|
+
if (!item.startsWith('--')) {
|
|
18
|
+
out._.push(item);
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
const eq = item.indexOf('=');
|
|
22
|
+
if (eq === -1) {
|
|
23
|
+
out[item.slice(2)] = true;
|
|
24
|
+
} else {
|
|
25
|
+
out[item.slice(2, eq)] = item.slice(eq + 1);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return out;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const coerceScalar = (value) => {
|
|
32
|
+
if (value === true) return true;
|
|
33
|
+
if (value === false) return false;
|
|
34
|
+
if (value == null) return value;
|
|
35
|
+
const s = String(value).trim();
|
|
36
|
+
if (!s) return s;
|
|
37
|
+
const low = s.toLowerCase();
|
|
38
|
+
if (low === 'true' || low === 'on' || low === 'yes') return true;
|
|
39
|
+
if (low === 'false' || low === 'off' || low === 'no') return false;
|
|
40
|
+
if (/^-?\d+(?:\.\d+)?$/.test(s)) return Number(s);
|
|
41
|
+
return s;
|
|
42
|
+
};
|
|
43
|
+
|
|
44
|
+
// 极简 CSV 解析:支持单列或多列;支持引号;默认第一行为 header。
|
|
45
|
+
const parseCsvFile = (filePath) => {
|
|
46
|
+
const content = fs.readFileSync(filePath, 'utf8');
|
|
47
|
+
const lines = content
|
|
48
|
+
.split(/\r?\n/)
|
|
49
|
+
.map((l) => l.trimEnd())
|
|
50
|
+
.filter((l) => l.trim().length > 0);
|
|
51
|
+
if (lines.length === 0) return [];
|
|
52
|
+
|
|
53
|
+
const parseLine = (line) => {
|
|
54
|
+
const out = [];
|
|
55
|
+
let cur = '';
|
|
56
|
+
let inQuotes = false;
|
|
57
|
+
for (let i = 0; i < line.length; i++) {
|
|
58
|
+
const ch = line[i];
|
|
59
|
+
if (inQuotes) {
|
|
60
|
+
if (ch === '"') {
|
|
61
|
+
if (line[i + 1] === '"') {
|
|
62
|
+
cur += '"';
|
|
63
|
+
i += 1;
|
|
64
|
+
} else {
|
|
65
|
+
inQuotes = false;
|
|
66
|
+
}
|
|
67
|
+
} else {
|
|
68
|
+
cur += ch;
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
if (ch === ',') {
|
|
72
|
+
out.push(cur);
|
|
73
|
+
cur = '';
|
|
74
|
+
} else if (ch === '"') {
|
|
75
|
+
inQuotes = true;
|
|
76
|
+
} else {
|
|
77
|
+
cur += ch;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
out.push(cur);
|
|
82
|
+
return out.map((x) => String(x ?? '').trim());
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const header = parseLine(lines[0]);
|
|
86
|
+
const rows = [];
|
|
87
|
+
for (const line of lines.slice(1)) {
|
|
88
|
+
const cols = parseLine(line);
|
|
89
|
+
if (cols.every((c) => !c)) continue;
|
|
90
|
+
const row = {};
|
|
91
|
+
for (let i = 0; i < header.length; i++) {
|
|
92
|
+
row[header[i] || `col${i + 1}`] = cols[i] ?? '';
|
|
93
|
+
}
|
|
94
|
+
// 多余列拼到最后一列,防止列数不齐
|
|
95
|
+
if (cols.length > header.length) {
|
|
96
|
+
const extra = cols.slice(header.length).join(',');
|
|
97
|
+
const lastKey = header[header.length - 1] || `col${header.length}`;
|
|
98
|
+
row[lastKey] = String(row[lastKey] || '') + (row[lastKey] ? ',' : '') + extra;
|
|
99
|
+
}
|
|
100
|
+
rows.push(row);
|
|
101
|
+
}
|
|
102
|
+
return rows;
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
const normalizeText = (text) => {
|
|
106
|
+
return String(text || '')
|
|
107
|
+
.toLowerCase()
|
|
108
|
+
.replace(/[\r\n\t]+/g, ' ')
|
|
109
|
+
.replace(/\s+/g, ' ')
|
|
110
|
+
.trim();
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const tokenize = (text) => {
|
|
114
|
+
const s = normalizeText(text);
|
|
115
|
+
if (!s) return [];
|
|
116
|
+
return s
|
|
117
|
+
.split(/[^a-z0-9_\-\u4e00-\u9fff]+/)
|
|
118
|
+
.filter(Boolean);
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
const jaccard = (a, b) => {
|
|
122
|
+
const A = new Set(Array.isArray(a) ? a : []);
|
|
123
|
+
const B = new Set(Array.isArray(b) ? b : []);
|
|
124
|
+
if (A.size === 0 && B.size === 0) return 1;
|
|
125
|
+
if (A.size === 0 || B.size === 0) return 0;
|
|
126
|
+
let inter = 0;
|
|
127
|
+
for (const x of A) if (B.has(x)) inter++;
|
|
128
|
+
const uni = A.size + B.size - inter;
|
|
129
|
+
return uni <= 0 ? 0 : inter / uni;
|
|
130
|
+
};
|
|
131
|
+
|
|
132
|
+
const levenshtein = (a, b, maxLen = 2000) => {
|
|
133
|
+
const A = String(a || '');
|
|
134
|
+
const B = String(b || '');
|
|
135
|
+
if (A === B) return 0;
|
|
136
|
+
if (!A.length) return Math.min(B.length, maxLen);
|
|
137
|
+
if (!B.length) return Math.min(A.length, maxLen);
|
|
138
|
+
// 防止极端长文本卡死
|
|
139
|
+
const a2 = A.slice(0, maxLen);
|
|
140
|
+
const b2 = B.slice(0, maxLen);
|
|
141
|
+
|
|
142
|
+
const n = a2.length;
|
|
143
|
+
const m = b2.length;
|
|
144
|
+
const prev = new Array(m + 1);
|
|
145
|
+
const cur = new Array(m + 1);
|
|
146
|
+
for (let j = 0; j <= m; j++) prev[j] = j;
|
|
147
|
+
for (let i = 1; i <= n; i++) {
|
|
148
|
+
cur[0] = i;
|
|
149
|
+
const ca = a2.charCodeAt(i - 1);
|
|
150
|
+
for (let j = 1; j <= m; j++) {
|
|
151
|
+
const cb = b2.charCodeAt(j - 1);
|
|
152
|
+
const cost = ca === cb ? 0 : 1;
|
|
153
|
+
cur[j] = Math.min(
|
|
154
|
+
prev[j] + 1,
|
|
155
|
+
cur[j - 1] + 1,
|
|
156
|
+
prev[j - 1] + cost
|
|
157
|
+
);
|
|
158
|
+
}
|
|
159
|
+
for (let j = 0; j <= m; j++) prev[j] = cur[j];
|
|
160
|
+
}
|
|
161
|
+
return prev[m];
|
|
162
|
+
};
|
|
163
|
+
|
|
164
|
+
const similarityScore = (response, answer) => {
|
|
165
|
+
const r = normalizeText(response);
|
|
166
|
+
const a = normalizeText(answer);
|
|
167
|
+
const tj = jaccard(tokenize(r), tokenize(a));
|
|
168
|
+
const maxLen = Math.max(r.length, a.length, 1);
|
|
169
|
+
const dist = levenshtein(r, a, 2000);
|
|
170
|
+
const cs = Math.max(0, 1 - dist / maxLen);
|
|
171
|
+
// 创造力:token 重合更代表“意思相近”,字符距离更代表“表达相近”
|
|
172
|
+
return 0.65 * tj + 0.35 * cs;
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
const httpJson = (method, url, body, { timeoutMs = 20_000 } = {}) => {
|
|
176
|
+
return new Promise((resolve, reject) => {
|
|
177
|
+
const u = new URL(url);
|
|
178
|
+
const payload = body == null ? '' : JSON.stringify(body);
|
|
179
|
+
const req = http.request(
|
|
180
|
+
{
|
|
181
|
+
method,
|
|
182
|
+
hostname: u.hostname,
|
|
183
|
+
port: u.port,
|
|
184
|
+
path: u.pathname + (u.search || ''),
|
|
185
|
+
headers: {
|
|
186
|
+
'Content-Type': 'application/json',
|
|
187
|
+
'Content-Length': Buffer.byteLength(payload)
|
|
188
|
+
},
|
|
189
|
+
timeout: timeoutMs
|
|
190
|
+
},
|
|
191
|
+
(res) => {
|
|
192
|
+
let data = '';
|
|
193
|
+
res.setEncoding('utf8');
|
|
194
|
+
res.on('data', (chunk) => (data += chunk));
|
|
195
|
+
res.on('end', () => {
|
|
196
|
+
try {
|
|
197
|
+
const parsed = data ? JSON.parse(data) : {};
|
|
198
|
+
resolve({ status: res.statusCode, json: parsed, raw: data });
|
|
199
|
+
} catch (e) {
|
|
200
|
+
resolve({ status: res.statusCode, json: null, raw: data });
|
|
201
|
+
}
|
|
202
|
+
});
|
|
203
|
+
}
|
|
204
|
+
);
|
|
205
|
+
req.on('error', reject);
|
|
206
|
+
req.on('timeout', () => {
|
|
207
|
+
req.destroy(new Error('request-timeout'));
|
|
208
|
+
});
|
|
209
|
+
if (payload) req.write(payload);
|
|
210
|
+
req.end();
|
|
211
|
+
});
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const waitForReady = async (baseUrl, { timeoutMs = 60_000, intervalMs = 800 } = {}) => {
|
|
215
|
+
const started = Date.now();
|
|
216
|
+
while (Date.now() - started < timeoutMs) {
|
|
217
|
+
try {
|
|
218
|
+
const r = await httpJson('GET', `${baseUrl}/api/system/status`, null, { timeoutMs: 5_000 });
|
|
219
|
+
if (r.status && r.status >= 200 && r.status < 500) {
|
|
220
|
+
return true;
|
|
221
|
+
}
|
|
222
|
+
} catch (_e) {
|
|
223
|
+
// ignore
|
|
224
|
+
}
|
|
225
|
+
await new Promise((r) => setTimeout(r, intervalMs));
|
|
226
|
+
}
|
|
227
|
+
return false;
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
const main = async () => {
|
|
231
|
+
const args = parseArgs(process.argv.slice(2));
|
|
232
|
+
|
|
233
|
+
const host = String(args.host || args['gateway-host'] || DEFAULT_HOST);
|
|
234
|
+
const port = Number(args.port || args['gateway-port'] || args['portGateway'] || DEFAULT_PORT) || DEFAULT_PORT;
|
|
235
|
+
const baseUrl = `http://${host}:${port}`;
|
|
236
|
+
|
|
237
|
+
const mainFile = path.resolve(String(args.main || DEFAULT_MAIN));
|
|
238
|
+
const qFile = path.resolve(String(args.questions || DEFAULT_Q));
|
|
239
|
+
const aFile = path.resolve(String(args.answers || DEFAULT_A));
|
|
240
|
+
|
|
241
|
+
const passThreshold = Number(args.pass ?? 0.65);
|
|
242
|
+
|
|
243
|
+
// 透传给 main.cjs 的参数:除本脚本消费的以外,全部原样传递
|
|
244
|
+
const reserved = new Set([
|
|
245
|
+
'main', 'host', 'gateway-host', 'port', 'gateway-port', 'portGateway',
|
|
246
|
+
'questions', 'answers', 'pass', 'model-json'
|
|
247
|
+
]);
|
|
248
|
+
|
|
249
|
+
const forwardArgs = [];
|
|
250
|
+
const modelPatch = {};
|
|
251
|
+
|
|
252
|
+
for (const raw of process.argv.slice(2)) {
|
|
253
|
+
if (!raw.startsWith('--')) {
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
const key = raw.slice(2).split('=')[0];
|
|
257
|
+
if (reserved.has(key)) {
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
// 支持 --model.xxx=yyy 作为运行时 params patch(通过 /api/model/params)
|
|
261
|
+
if (key.startsWith('model.')) {
|
|
262
|
+
const k2 = key.slice('model.'.length);
|
|
263
|
+
const v = raw.includes('=') ? raw.slice(raw.indexOf('=') + 1) : true;
|
|
264
|
+
modelPatch[k2] = coerceScalar(v);
|
|
265
|
+
continue;
|
|
266
|
+
}
|
|
267
|
+
forwardArgs.push(raw);
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
if (args['model-json']) {
|
|
271
|
+
try {
|
|
272
|
+
const obj = JSON.parse(String(args['model-json']));
|
|
273
|
+
if (obj && typeof obj === 'object') {
|
|
274
|
+
Object.assign(modelPatch, obj);
|
|
275
|
+
}
|
|
276
|
+
} catch (e) {
|
|
277
|
+
console.warn('[test_automatic] model-json parse failed:', e.message);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
if (!fs.existsSync(mainFile)) {
|
|
282
|
+
console.error('[test_automatic] main.cjs not found:', mainFile);
|
|
283
|
+
process.exit(2);
|
|
284
|
+
}
|
|
285
|
+
if (!fs.existsSync(qFile)) {
|
|
286
|
+
console.error('[test_automatic] question.csv not found:', qFile);
|
|
287
|
+
process.exit(2);
|
|
288
|
+
}
|
|
289
|
+
if (!fs.existsSync(aFile)) {
|
|
290
|
+
console.error('[test_automatic] answer.csv not found:', aFile);
|
|
291
|
+
process.exit(2);
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
console.log(`[test_automatic] starting main: ${mainFile}`);
|
|
295
|
+
console.log(`[test_automatic] baseUrl: ${baseUrl}`);
|
|
296
|
+
|
|
297
|
+
const child = spawn(process.execPath, [mainFile, ...forwardArgs], {
|
|
298
|
+
stdio: 'inherit',
|
|
299
|
+
cwd: path.dirname(mainFile),
|
|
300
|
+
env: { ...process.env }
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
const shutdown = async () => {
|
|
304
|
+
try {
|
|
305
|
+
if (!child.killed) {
|
|
306
|
+
child.kill('SIGINT');
|
|
307
|
+
}
|
|
308
|
+
} catch (_e) {
|
|
309
|
+
// ignore
|
|
310
|
+
}
|
|
311
|
+
const deadline = Date.now() + 6_000;
|
|
312
|
+
while (Date.now() < deadline) {
|
|
313
|
+
if (child.exitCode != null) return;
|
|
314
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
315
|
+
}
|
|
316
|
+
try {
|
|
317
|
+
if (!child.killed) {
|
|
318
|
+
child.kill('SIGKILL');
|
|
319
|
+
}
|
|
320
|
+
} catch (_e) {
|
|
321
|
+
// ignore
|
|
322
|
+
}
|
|
323
|
+
};
|
|
324
|
+
|
|
325
|
+
const ready = await waitForReady(baseUrl, { timeoutMs: 60_000, intervalMs: 800 });
|
|
326
|
+
if (!ready) {
|
|
327
|
+
console.error('[test_automatic] main not ready within timeout');
|
|
328
|
+
await shutdown();
|
|
329
|
+
process.exit(3);
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// 应用运行时参数(不重启 main)
|
|
333
|
+
if (Object.keys(modelPatch).length) {
|
|
334
|
+
console.log('[test_automatic] applying /api/model/params patch:', modelPatch);
|
|
335
|
+
try {
|
|
336
|
+
await httpJson('POST', `${baseUrl}/api/model/params`, modelPatch, { timeoutMs: 15_000 });
|
|
337
|
+
} catch (e) {
|
|
338
|
+
console.warn('[test_automatic] apply model params failed:', e.message);
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
const qRows = parseCsvFile(qFile);
|
|
343
|
+
const aRows = parseCsvFile(aFile);
|
|
344
|
+
|
|
345
|
+
const pickCol = (row, preferredKeys) => {
|
|
346
|
+
if (!row || typeof row !== 'object') return '';
|
|
347
|
+
for (const k of preferredKeys) {
|
|
348
|
+
if (row[k] != null && String(row[k]).trim()) return String(row[k]);
|
|
349
|
+
}
|
|
350
|
+
const keys = Object.keys(row);
|
|
351
|
+
if (keys.length) return String(row[keys[0]] ?? '');
|
|
352
|
+
return '';
|
|
353
|
+
};
|
|
354
|
+
|
|
355
|
+
const questions = qRows.map((r) => pickCol(r, ['question', 'q', 'text', 'content']));
|
|
356
|
+
const answers = aRows.map((r) => pickCol(r, ['answer', 'a', 'text', 'content']));
|
|
357
|
+
|
|
358
|
+
const N = Math.min(questions.length, answers.length);
|
|
359
|
+
if (N === 0) {
|
|
360
|
+
console.error('[test_automatic] empty questions/answers');
|
|
361
|
+
await shutdown();
|
|
362
|
+
process.exit(4);
|
|
363
|
+
}
|
|
364
|
+
if (questions.length !== answers.length) {
|
|
365
|
+
console.warn(`[test_automatic] warning: questions(${questions.length}) != answers(${answers.length}), using N=${N}`);
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
const results = [];
|
|
369
|
+
let sum = 0;
|
|
370
|
+
let passed = 0;
|
|
371
|
+
|
|
372
|
+
for (let i = 0; i < N; i++) {
|
|
373
|
+
const q = String(questions[i] || '').trim();
|
|
374
|
+
const a = String(answers[i] || '').trim();
|
|
375
|
+
if (!q) {
|
|
376
|
+
results.push({ i: i + 1, ok: false, reason: 'empty-question' });
|
|
377
|
+
continue;
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
let reply = '';
|
|
381
|
+
let status = 0;
|
|
382
|
+
const started = Date.now();
|
|
383
|
+
try {
|
|
384
|
+
const r = await httpJson('POST', `${baseUrl}/api/chat`, { text: q }, { timeoutMs: 30_000 });
|
|
385
|
+
status = r.status || 0;
|
|
386
|
+
reply = r?.json?.result?.reply ?? '';
|
|
387
|
+
if (!reply && r?.json?.result?.result?.reply) {
|
|
388
|
+
// 兼容某些返回结构
|
|
389
|
+
reply = r.json.result.result.reply;
|
|
390
|
+
}
|
|
391
|
+
} catch (e) {
|
|
392
|
+
results.push({ i: i + 1, ok: false, question: q, answer: a, error: e.message });
|
|
393
|
+
continue;
|
|
394
|
+
}
|
|
395
|
+
const latency = Date.now() - started;
|
|
396
|
+
const score = similarityScore(reply, a);
|
|
397
|
+
const ok = score >= passThreshold;
|
|
398
|
+
sum += score;
|
|
399
|
+
if (ok) passed += 1;
|
|
400
|
+
|
|
401
|
+
results.push({
|
|
402
|
+
i: i + 1,
|
|
403
|
+
ok,
|
|
404
|
+
score: Number(score.toFixed(4)),
|
|
405
|
+
status,
|
|
406
|
+
latency,
|
|
407
|
+
question: q,
|
|
408
|
+
expected: a,
|
|
409
|
+
reply
|
|
410
|
+
});
|
|
411
|
+
|
|
412
|
+
console.log(`[${i + 1}/${N}] score=${score.toFixed(3)} ok=${ok} latency=${latency}ms`);
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
const avg = sum / N;
|
|
416
|
+
console.log('');
|
|
417
|
+
console.log('[test_automatic] done');
|
|
418
|
+
console.log(`[test_automatic] avgScore=${avg.toFixed(4)} pass=${passed}/${N} (threshold=${passThreshold})`);
|
|
419
|
+
|
|
420
|
+
// 打印低分样例(最多 5 个)
|
|
421
|
+
const worst = results
|
|
422
|
+
.filter((r) => r && r.score != null)
|
|
423
|
+
.sort((a, b) => a.score - b.score)
|
|
424
|
+
.slice(0, 5);
|
|
425
|
+
for (const w of worst) {
|
|
426
|
+
console.log('---');
|
|
427
|
+
console.log(`#${w.i} score=${w.score}`);
|
|
428
|
+
console.log('Q:', w.question);
|
|
429
|
+
console.log('A:', w.expected);
|
|
430
|
+
console.log('R:', w.reply);
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
await shutdown();
|
|
434
|
+
// 以平均分作为退出码信号:>=阈值则 0,否则 5
|
|
435
|
+
process.exit(avg >= passThreshold ? 0 : 5);
|
|
436
|
+
};
|
|
437
|
+
|
|
438
|
+
main().catch((err) => {
|
|
439
|
+
console.error('[test_automatic] fatal:', err);
|
|
440
|
+
process.exit(1);
|
|
441
|
+
});
|