079project 2.0.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawler/agent.cjs +97 -0
- package/crawler/index.cjs +515 -0
- package/crawler/storage.cjs +163 -0
- package/groupmanager.cjs +2 -1
- package/main_Serve.cjs +1136 -210
- package/main_Study.cjs +1584 -349
- package/package.json +2 -1
- package/robots/seeds.txt +2 -0
- package/schedule.cjs +745 -0
- package/todo-list.txt +0 -86
package/schedule.cjs
ADDED
|
@@ -0,0 +1,745 @@
|
|
|
1
|
+
// ...existing code...
|
|
2
|
+
const fs = require('fs');
|
|
3
|
+
const path = require('path');
|
|
4
|
+
const axios = require('axios');
|
|
5
|
+
|
|
6
|
+
function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
|
|
7
|
+
function nowISO() { return new Date().toISOString(); }
|
|
8
|
+
function safeJSON(s) { try { return JSON.parse(s); } catch { return null; } }
|
|
9
|
+
function uniq(arr) { return Array.from(new Set(arr)); }
|
|
10
|
+
|
|
11
|
+
// ================ 新增:PromptBank / PromptSampler / ParamTuner =================
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* PromptBank: 多样化模板库(中文/英文混合)
|
|
15
|
+
* 类别:
|
|
16
|
+
* - explain, compare, critique, reason, plan, summarize, rewrite, code, qa, checklist, step
|
|
17
|
+
*/
|
|
18
|
+
class PromptBank {
|
|
19
|
+
constructor() {
|
|
20
|
+
this.templates = {
|
|
21
|
+
explain: [
|
|
22
|
+
'Explain the core concepts and common misconceptions of {topic} in a formal tone.',
|
|
23
|
+
'Explain the basics of {topic} in simple terms.',
|
|
24
|
+
'How would you explain {topic} to a 10-year-old?'
|
|
25
|
+
],
|
|
26
|
+
compare: [
|
|
27
|
+
'Compare and contrast {a} and {b} in terms of cost, performance, and usability.',
|
|
28
|
+
'{a} vs {b}: What are the pros and cons of each?',
|
|
29
|
+
'Provide a detailed comparison between {a} and {b}, highlighting their key differences.'
|
|
30
|
+
],
|
|
31
|
+
critique: [
|
|
32
|
+
'Evaluate the following statement for factual accuracy and logical consistency, and suggest improvements: {statement}',
|
|
33
|
+
'Identify the flaws or weaknesses in this response and provide a better version: {answer}'
|
|
34
|
+
],
|
|
35
|
+
reason: [
|
|
36
|
+
'Explain the causes and effects of {topic} in a logical chain.',
|
|
37
|
+
'Why is {topic} often misunderstood? Provide two common scenarios of misinterpretation.'
|
|
38
|
+
],
|
|
39
|
+
plan: [
|
|
40
|
+
'Create a 30-minute learning plan for {topic}, including objectives, outline, resources, and exercises.',
|
|
41
|
+
'Break down {topic} into five actionable tasks and provide completion criteria for each.'
|
|
42
|
+
],
|
|
43
|
+
summarize: [
|
|
44
|
+
'Summarize the following content in three sentences and include key points: {content}',
|
|
45
|
+
'Use the "current state - problem - solution" structure to summarize: {content}'
|
|
46
|
+
],
|
|
47
|
+
rewrite: [
|
|
48
|
+
'Rewrite this text to make it more conversational: {content}',
|
|
49
|
+
'Rewrite this content in a style suitable for Twitter or Instagram (under 100 words): {content}'
|
|
50
|
+
],
|
|
51
|
+
code: [
|
|
52
|
+
'Describe the implementation steps for {topic} using pseudocode (avoid specific programming languages).',
|
|
53
|
+
'Write an input/output example and a checklist for edge case validation for {topic}.'
|
|
54
|
+
],
|
|
55
|
+
qa: [
|
|
56
|
+
'What is {topic}? What problems does it solve?',
|
|
57
|
+
'What prior knowledge is required to learn {topic}?'
|
|
58
|
+
],
|
|
59
|
+
checklist: [
|
|
60
|
+
'Create a checklist of things to verify before starting {topic} (up to five items).',
|
|
61
|
+
'List common reasons for failure in {topic}, ranked by frequency.'
|
|
62
|
+
],
|
|
63
|
+
step: [
|
|
64
|
+
'Break down {topic} into a process of no more than seven steps, with one sentence per step.',
|
|
65
|
+
'What are the key steps to complete {topic}? Rank them by priority.'
|
|
66
|
+
]
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
this.categoryWheel = Object.keys(this.templates);
|
|
70
|
+
this._wheelIdx = 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
nextCategory() {
|
|
74
|
+
const cat = this.categoryWheel[this._wheelIdx % this.categoryWheel.length];
|
|
75
|
+
this._wheelIdx++;
|
|
76
|
+
return cat;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
build(template, vars) {
|
|
80
|
+
return template.replace(/{(\w+)}/g, (_, k) => {
|
|
81
|
+
const val = vars && vars[k] != null ? String(vars[k]) : '';
|
|
82
|
+
return val.length > 0 ? val : '';
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
randomTemplate(cat) {
|
|
87
|
+
const list = this.templates[cat] || [];
|
|
88
|
+
if (!list.length) return null;
|
|
89
|
+
return list[Math.floor(Math.random() * list.length)];
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* PromptSampler: 从多个来源采样主题并拼装模板
|
|
95
|
+
* 来源:
|
|
96
|
+
* - 会话词 Session words(wordAccessLog)
|
|
97
|
+
* - 词表 vocab
|
|
98
|
+
* - KVM 模因词
|
|
99
|
+
* - 爬虫近文档(global.__crawler)
|
|
100
|
+
* - 本地 Spider 文章片段(runtime.spider)
|
|
101
|
+
*/
|
|
102
|
+
class PromptSampler {
|
|
103
|
+
constructor(runtime, bank = new PromptBank()) {
|
|
104
|
+
this.runtime = runtime;
|
|
105
|
+
this.bank = bank;
|
|
106
|
+
this.seen = new Set();
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
sampleTopicsFromSessions(limit = 8) {
|
|
110
|
+
const out = [];
|
|
111
|
+
try {
|
|
112
|
+
const wal = this.runtime?.wordAccessLog;
|
|
113
|
+
if (!(wal instanceof Map)) return out;
|
|
114
|
+
const items = Array.from(wal.entries())
|
|
115
|
+
.map(([w, per]) => {
|
|
116
|
+
let cnt = 0;
|
|
117
|
+
if (per instanceof Map) for (const c of per.values()) cnt += (c || 0);
|
|
118
|
+
else if (Array.isArray(per)) cnt = per.length;
|
|
119
|
+
return { w, cnt };
|
|
120
|
+
})
|
|
121
|
+
.filter(x => x.w && x.cnt > 0)
|
|
122
|
+
.sort((a, b) => b.cnt - a.cnt)
|
|
123
|
+
.slice(0, 50);
|
|
124
|
+
for (const it of items) out.push(it.w);
|
|
125
|
+
return uniq(out).slice(0, limit);
|
|
126
|
+
} catch { return out; }
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
samplePairsFromVocab(limit = 6) {
|
|
130
|
+
const vocab = this.runtime?.vocabManager?.vocab || [];
|
|
131
|
+
const pool = vocab.slice(4).filter(w => w && w.length > 2);
|
|
132
|
+
const pairs = [];
|
|
133
|
+
for (let i = 0; i < Math.min(limit, Math.floor(pool.length / 2)); i++) {
|
|
134
|
+
const a = pool[Math.floor(Math.random() * pool.length)];
|
|
135
|
+
const b = pool[Math.floor(Math.random() * pool.length)];
|
|
136
|
+
if (a && b && a !== b) pairs.push([a, b]);
|
|
137
|
+
}
|
|
138
|
+
return pairs;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
sampleKvmMemes(limit = 6) {
|
|
142
|
+
const out = [];
|
|
143
|
+
try {
|
|
144
|
+
const kv = this.runtime?.kvm?.memory;
|
|
145
|
+
if (!(kv instanceof Map)) return out;
|
|
146
|
+
for (const [k, v] of kv.entries()) {
|
|
147
|
+
if (Array.isArray(v) && v.length >= 3) {
|
|
148
|
+
out.push({ meme: k, words: uniq(v).slice(0, 10) });
|
|
149
|
+
if (out.length >= limit) break;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return out;
|
|
153
|
+
} catch { return out; }
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
sampleCrawlerSnippets(limit = 6) {
|
|
157
|
+
const out = [];
|
|
158
|
+
try {
|
|
159
|
+
const docs = global.__crawler?.loadRecentDocs?.(24) || [];
|
|
160
|
+
for (const d of docs) {
|
|
161
|
+
if (!d || !d.text) continue;
|
|
162
|
+
const lines = String(d.text).split(/\r?\n/).map(s => s.trim()).filter(Boolean);
|
|
163
|
+
if (!lines.length) continue;
|
|
164
|
+
const pick = lines[Math.floor(Math.random() * Math.min(20, lines.length))];
|
|
165
|
+
if (pick && pick.length > 20) out.push(pick.slice(0, 240));
|
|
166
|
+
if (out.length >= limit) break;
|
|
167
|
+
}
|
|
168
|
+
return out;
|
|
169
|
+
} catch { return out; }
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
sampleSpiderSnippets(limit = 6) {
|
|
173
|
+
const out = [];
|
|
174
|
+
try {
|
|
175
|
+
const arts = this.runtime?.spider?.fetchArticles?.() || [];
|
|
176
|
+
for (let i = 0; i < Math.min(limit, arts.length); i++) {
|
|
177
|
+
const a = arts[Math.floor(Math.random() * arts.length)];
|
|
178
|
+
if (!a) continue;
|
|
179
|
+
out.push(String(a).slice(0, 240));
|
|
180
|
+
}
|
|
181
|
+
return out;
|
|
182
|
+
} catch { return out; }
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
buildMixedPrompts(n = 3, mode = 'mixed') {
|
|
186
|
+
const prompts = [];
|
|
187
|
+
const add = (p) => {
|
|
188
|
+
const key = (p || '').slice(0, 120);
|
|
189
|
+
if (!p || this.seen.has(key)) return;
|
|
190
|
+
this.seen.add(key);
|
|
191
|
+
prompts.push(p.slice(0, 280));
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
while (prompts.length < n) {
|
|
195
|
+
const cat = this.bank.nextCategory();
|
|
196
|
+
const tpl = this.bank.randomTemplate(cat) || '';
|
|
197
|
+
const vars = {};
|
|
198
|
+
switch (mode) {
|
|
199
|
+
case 'sessions': {
|
|
200
|
+
const topics = this.sampleTopicsFromSessions(10);
|
|
201
|
+
if (topics.length >= 1) vars.topic = topics[Math.floor(Math.random() * topics.length)];
|
|
202
|
+
if (/({a}|{b})/.test(tpl) && topics.length >= 2) {
|
|
203
|
+
vars.a = topics[0]; vars.b = topics[1];
|
|
204
|
+
}
|
|
205
|
+
break;
|
|
206
|
+
}
|
|
207
|
+
case 'kvm': {
|
|
208
|
+
const memes = this.sampleKvmMemes(6);
|
|
209
|
+
if (memes.length) {
|
|
210
|
+
const m = memes[Math.floor(Math.random() * memes.length)];
|
|
211
|
+
vars.topic = m.words.join('、');
|
|
212
|
+
if (/({content}|{answer}|{statement})/.test(tpl)) vars.content = m.words.join(' ');
|
|
213
|
+
}
|
|
214
|
+
break;
|
|
215
|
+
}
|
|
216
|
+
case 'crawler': {
|
|
217
|
+
const snips = this.sampleCrawlerSnippets(10);
|
|
218
|
+
if (snips.length) {
|
|
219
|
+
vars.content = snips[0];
|
|
220
|
+
vars.topic = tokenizeZhEn(snips[0]).slice(0, 6).join(' ');
|
|
221
|
+
}
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
case 'spider': {
|
|
225
|
+
const snips = this.sampleSpiderSnippets(10);
|
|
226
|
+
if (snips.length) {
|
|
227
|
+
vars.content = snips[0];
|
|
228
|
+
vars.topic = tokenizeZhEn(snips[0]).slice(0, 6).join(' ');
|
|
229
|
+
}
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
case 'vocab': {
|
|
233
|
+
const pairs = this.samplePairsFromVocab(6);
|
|
234
|
+
if (pairs.length) {
|
|
235
|
+
const [a, b] = pairs[Math.floor(Math.random() * pairs.length)];
|
|
236
|
+
vars.a = a; vars.b = b;
|
|
237
|
+
vars.topic = `${a} 与 ${b}`;
|
|
238
|
+
} else {
|
|
239
|
+
const topics = (this.runtime?.vocabManager?.vocab || []).slice(4, 200);
|
|
240
|
+
vars.topic = topics[Math.floor(Math.random() * topics.length)] || '通用AI';
|
|
241
|
+
}
|
|
242
|
+
break;
|
|
243
|
+
}
|
|
244
|
+
default:
|
|
245
|
+
case 'mixed': {
|
|
246
|
+
const roll = Math.random();
|
|
247
|
+
if (roll < 0.2) { Object.assign(vars, { topic: this.sampleTopicsFromSessions(6)[0] || '通用AI' }); }
|
|
248
|
+
else if (roll < 0.4) {
|
|
249
|
+
const p = this.samplePairsFromVocab(1)[0];
|
|
250
|
+
if (p) { vars.a = p[0]; vars.b = p[1]; vars.topic = `${p[0]} 与 ${p[1]}`; }
|
|
251
|
+
} else if (roll < 0.6) {
|
|
252
|
+
const m = this.sampleKvmMemes(1)[0];
|
|
253
|
+
if (m) vars.topic = m.words.slice(0, 6).join('、');
|
|
254
|
+
} else if (roll < 0.8) {
|
|
255
|
+
const c = this.sampleCrawlerSnippets(1)[0];
|
|
256
|
+
if (c) { vars.content = c; vars.topic = tokenizeZhEn(c).slice(0, 6).join(' '); }
|
|
257
|
+
} else {
|
|
258
|
+
const s = this.sampleSpiderSnippets(1)[0];
|
|
259
|
+
if (s) { vars.content = s; vars.topic = tokenizeZhEn(s).slice(0, 6).join(' '); }
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// 特殊位补齐
|
|
265
|
+
if (/({a}|{b})/.test(tpl) && (!vars.a || !vars.b)) {
|
|
266
|
+
const pts = this.samplePairsFromVocab(1)[0] || ['A', 'B'];
|
|
267
|
+
vars.a = vars.a || pts[0];
|
|
268
|
+
vars.b = vars.b || pts[1];
|
|
269
|
+
}
|
|
270
|
+
if (/({statement}|{answer})/.test(tpl) && !vars.statement && !vars.answer) {
|
|
271
|
+
const s = this.sampleSpiderSnippets(1)[0] || this.sampleCrawlerSnippets(1)[0] || '这是一个供评审的示例回答。';
|
|
272
|
+
vars.statement = vars.statement || s;
|
|
273
|
+
vars.answer = vars.answer || s;
|
|
274
|
+
}
|
|
275
|
+
if (/({content})/.test(tpl) && !vars.content) {
|
|
276
|
+
vars.content = (this.sampleSpiderSnippets(1)[0] || this.sampleCrawlerSnippets(1)[0] || '(示例文本)').slice(0, 240);
|
|
277
|
+
}
|
|
278
|
+
if (!vars.topic) vars.topic = '通用AI';
|
|
279
|
+
|
|
280
|
+
const built = this.bank.build(tpl, vars);
|
|
281
|
+
if (built) add(built);
|
|
282
|
+
if (prompts.length >= n) break;
|
|
283
|
+
}
|
|
284
|
+
return prompts;
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* ParamTuner: 参数调优器
|
|
290
|
+
* 支持:
|
|
291
|
+
* - runtime.config.decayK / maxLen
|
|
292
|
+
* - spiderMix: { onlineWeight, offlineWeight }
|
|
293
|
+
* - graph edgeWeight 微调
|
|
294
|
+
* - crawler 定向抓取强度(perQuery / maxCrawl)
|
|
295
|
+
*/
|
|
296
|
+
class ParamTuner {
|
|
297
|
+
constructor(runtime, options = {}) {
|
|
298
|
+
this.runtime = runtime;
|
|
299
|
+
this.opts = Object.assign({
|
|
300
|
+
bounds: {
|
|
301
|
+
decayK: [0.1, 2.0],
|
|
302
|
+
maxLen: [8, 64],
|
|
303
|
+
onlineWeight: [0, 1],
|
|
304
|
+
edgeWeight: [0.1, 5],
|
|
305
|
+
perQuery: [2, 16],
|
|
306
|
+
maxCrawl: [2, 24]
|
|
307
|
+
},
|
|
308
|
+
step: {
|
|
309
|
+
decayK: 0.05,
|
|
310
|
+
maxLen: 2,
|
|
311
|
+
onlineWeight: 0.1,
|
|
312
|
+
edgeWeight: 0.1,
|
|
313
|
+
perQuery: 1,
|
|
314
|
+
maxCrawl: 2
|
|
315
|
+
}
|
|
316
|
+
}, options);
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
_clip(v, [lo, hi]) { return Math.max(lo, Math.min(hi, v)); }
|
|
320
|
+
|
|
321
|
+
applyDelta(delta) {
|
|
322
|
+
const rt = this.runtime;
|
|
323
|
+
if (!rt) return;
|
|
324
|
+
|
|
325
|
+
// decayK
|
|
326
|
+
if (typeof delta.decayK === 'number') {
|
|
327
|
+
const cur = rt.config.decayK || 1;
|
|
328
|
+
rt.config.decayK = this._clip(cur + delta.decayK, this.opts.bounds.decayK);
|
|
329
|
+
}
|
|
330
|
+
// maxLen
|
|
331
|
+
if (typeof delta.maxLen === 'number') {
|
|
332
|
+
const cur = rt.config.maxLen || 16;
|
|
333
|
+
rt.config.maxLen = Math.round(this._clip(cur + delta.maxLen, this.opts.bounds.maxLen));
|
|
334
|
+
}
|
|
335
|
+
// spiderMix onlineWeight
|
|
336
|
+
if (typeof delta.onlineWeight === 'number') {
|
|
337
|
+
const mix = rt.config.spiderMix || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
338
|
+
const next = this._clip((mix.onlineWeight || 0.5) + delta.onlineWeight, this.opts.bounds.onlineWeight);
|
|
339
|
+
rt.config.spiderMix = {
|
|
340
|
+
onlineWeight: next,
|
|
341
|
+
offlineWeight: this._clip(1 - next, [0, 1])
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
// edgeWeight 全局微调
|
|
345
|
+
if (typeof delta.edgeWeight === 'number' && rt.graph) {
|
|
346
|
+
for (const p of rt.graph.getAllPoints()) {
|
|
347
|
+
for (const e of p.connect) {
|
|
348
|
+
e[0] = this._clip(e[0] + delta.edgeWeight, this.opts.bounds.edgeWeight);
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
// crawler 抓取强度(若存在)
|
|
353
|
+
if (global.__crawler) {
|
|
354
|
+
if (typeof delta.perQuery === 'number') {
|
|
355
|
+
global.__crawler.__tune_perQuery = this._clip((global.__crawler.__tune_perQuery || 8) + delta.perQuery, this.opts.bounds.perQuery);
|
|
356
|
+
}
|
|
357
|
+
if (typeof delta.maxCrawl === 'number') {
|
|
358
|
+
global.__crawler.__tune_maxCrawl = this._clip((global.__crawler.__tune_maxCrawl || 12) + delta.maxCrawl, this.opts.bounds.maxCrawl);
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
getSnapshot() {
|
|
364
|
+
const rt = this.runtime || {};
|
|
365
|
+
const mix = (rt.config && rt.config.spiderMix) || { onlineWeight: 0.5, offlineWeight: 0.5 };
|
|
366
|
+
return {
|
|
367
|
+
decayK: rt.config?.decayK ?? 1,
|
|
368
|
+
maxLen: rt.config?.maxLen ?? 16,
|
|
369
|
+
spiderMix: mix,
|
|
370
|
+
crawler: {
|
|
371
|
+
perQuery: global.__crawler?.__tune_perQuery ?? 8,
|
|
372
|
+
maxCrawl: global.__crawler?.__tune_maxCrawl ?? 12
|
|
373
|
+
}
|
|
374
|
+
};
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
// ================ Provider(保留并扩展 Azure OpenAI 可选) =====================
|
|
379
|
+
|
|
380
|
+
class OpenAIProvider {
|
|
381
|
+
constructor({ apiKey = process.env.OPENAI_API_KEY, baseURL = 'https://api.openai.com/v1', model = 'gpt-4.5' } = {}) {
|
|
382
|
+
this.apiKey = apiKey;
|
|
383
|
+
this.baseURL = baseURL;
|
|
384
|
+
this.model = model;
|
|
385
|
+
this.http = axios.create({
|
|
386
|
+
baseURL,
|
|
387
|
+
timeout: 30000,
|
|
388
|
+
headers: { Authorization: `Bearer ${apiKey}`, 'Content-Type': 'application/json' }
|
|
389
|
+
});
|
|
390
|
+
}
|
|
391
|
+
async generate(prompt) {
|
|
392
|
+
const body = {
|
|
393
|
+
model: this.model,
|
|
394
|
+
messages: [
|
|
395
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
396
|
+
{ role: 'user', content: prompt }
|
|
397
|
+
],
|
|
398
|
+
temperature: 0.7
|
|
399
|
+
};
|
|
400
|
+
const { data } = await this.http.post('/chat/completions', body);
|
|
401
|
+
const msg = data?.choices?.[0]?.message?.content || '';
|
|
402
|
+
return String(msg).trim();
|
|
403
|
+
}
|
|
404
|
+
async judge(prompt, ours, theirs) {
|
|
405
|
+
const judgePrompt = [
|
|
406
|
+
'你是一个严格的评审。请依据以下标准对两段回答进行0-10的评分:',
|
|
407
|
+
'1) 事实正确性 2) 完整性 3) 清晰度 4) 人类表达风格。给出一个JSON对象:{ "ours": <0-10>, "theirs": <0-10>, "comment": "<简短原因>" }。',
|
|
408
|
+
`问题: ${prompt}`,
|
|
409
|
+
`回答A(我方): ${ours}`,
|
|
410
|
+
`回答B(对照): ${theirs}`,
|
|
411
|
+
'只输出JSON,不要附加解释。'
|
|
412
|
+
].join('\n');
|
|
413
|
+
const out = await this.generate(judgePrompt);
|
|
414
|
+
const obj = safeJSON(out) || {};
|
|
415
|
+
return {
|
|
416
|
+
ours: Number(obj.ours ?? 0) || 0,
|
|
417
|
+
theirs: Number(obj.theirs ?? 0) || 0,
|
|
418
|
+
comment: String(obj.comment || '').slice(0, 400)
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
class AzureOpenAIProvider {
|
|
424
|
+
constructor({
|
|
425
|
+
endpoint = process.env.AZURE_OPENAI_ENDPOINT,
|
|
426
|
+
apiKey = process.env.AZURE_OPENAI_API_KEY,
|
|
427
|
+
deployment = process.env.AZURE_OPENAI_DEPLOYMENT || 'gpt-4o'
|
|
428
|
+
} = {}) {
|
|
429
|
+
this.endpoint = (endpoint || '').replace(/\/+$/, '');
|
|
430
|
+
this.deployment = deployment;
|
|
431
|
+
this.http = axios.create({
|
|
432
|
+
baseURL: `${this.endpoint}/openai/deployments/${deployment}`,
|
|
433
|
+
timeout: 30000,
|
|
434
|
+
headers: { 'api-key': apiKey, 'Content-Type': 'application/json' },
|
|
435
|
+
params: { 'api-version': '2024-02-15-preview' }
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
async generate(prompt) {
|
|
439
|
+
const body = {
|
|
440
|
+
messages: [
|
|
441
|
+
{ role: 'system', content: 'You are a helpful assistant.' },
|
|
442
|
+
{ role: 'user', content: prompt }
|
|
443
|
+
],
|
|
444
|
+
temperature: 0.7
|
|
445
|
+
};
|
|
446
|
+
const { data } = await this.http.post('/chat/completions', body);
|
|
447
|
+
const msg = data?.choices?.[0]?.message?.content || '';
|
|
448
|
+
return String(msg).trim();
|
|
449
|
+
}
|
|
450
|
+
async judge(prompt, ours, theirs) {
|
|
451
|
+
// 复用 OpenAI 风格 judge
|
|
452
|
+
const tmp = new OpenAIProvider({ model: this.deployment, baseURL: this.endpoint });
|
|
453
|
+
return tmp.judge(prompt, ours, theirs);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
class OllamaProvider {
|
|
458
|
+
constructor({ host = process.env.OLLAMA_HOST || 'http://127.0.0.1:11434', model = 'llama3.1:405b' } = {}) {
|
|
459
|
+
this.host = host.replace(/\/+$/, '');
|
|
460
|
+
this.model = model;
|
|
461
|
+
this.http = axios.create({ baseURL: this.host, timeout: 60000 });
|
|
462
|
+
}
|
|
463
|
+
async generate(prompt) {
|
|
464
|
+
const { data } = await this.http.post('/api/generate', {
|
|
465
|
+
model: this.model,
|
|
466
|
+
prompt,
|
|
467
|
+
stream: false,
|
|
468
|
+
options: { temperature: 0.7 }
|
|
469
|
+
});
|
|
470
|
+
const txt = data?.response || '';
|
|
471
|
+
return String(txt).trim();
|
|
472
|
+
}
|
|
473
|
+
async judge(prompt, ours, theirs) {
|
|
474
|
+
const judgePrompt = [
|
|
475
|
+
'你是一个严格的评审。依据:事实正确性/完整性/清晰度/人类表达风格,对两段回答打0-10分。',
|
|
476
|
+
'输出JSON: { "ours": <0-10>, "theirs": <0-10>, "comment": "<简短原因>" }。',
|
|
477
|
+
`问题: ${prompt}`,
|
|
478
|
+
`回答A(我方): ${ours}`,
|
|
479
|
+
`回答B(对照): ${theirs}`,
|
|
480
|
+
'只输出JSON,勿加解释。'
|
|
481
|
+
].join('\n');
|
|
482
|
+
const out = await this.generate(judgePrompt);
|
|
483
|
+
const obj = safeJSON(out) || {};
|
|
484
|
+
return {
|
|
485
|
+
ours: Number(obj.ours ?? 0) || 0,
|
|
486
|
+
theirs: Number(obj.theirs ?? 0) || 0,
|
|
487
|
+
comment: String(obj.comment || '').slice(0, 400)
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
function buildProvider(spec) {
|
|
493
|
+
// 支持: openai:MODEL | ollama:MODEL | azure:DEPLOYMENT
|
|
494
|
+
if (!spec || typeof spec !== 'string') return { type: 'none', p: null };
|
|
495
|
+
const [kind, nameRaw] = spec.split(':', 2);
|
|
496
|
+
if (kind === 'openai') {
|
|
497
|
+
return { type: 'openai', p: new OpenAIProvider({ model: nameRaw || 'gpt-4.5' }) };
|
|
498
|
+
}
|
|
499
|
+
if (kind === 'ollama') {
|
|
500
|
+
const model = nameRaw || 'llama3.1:405b';
|
|
501
|
+
return { type: 'ollama', p: new OllamaProvider({ model }) };
|
|
502
|
+
}
|
|
503
|
+
if (kind === 'azure') {
|
|
504
|
+
return { type: 'azure', p: new AzureOpenAIProvider({ deployment: nameRaw || process.env.AZURE_OPENAI_DEPLOYMENT }) };
|
|
505
|
+
}
|
|
506
|
+
return { type: 'none', p: null };
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
function tokenizeZhEn(s) {
|
|
510
|
+
return String(s || '')
|
|
511
|
+
.toLowerCase()
|
|
512
|
+
.replace(/[^a-z0-9\u4e00-\u9fa5\s]/g, ' ')
|
|
513
|
+
.split(/\s+/).filter(Boolean);
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
function heuristicScore(prompt, ours, theirs) {
|
|
517
|
+
const pt = new Set(tokenizeZhEn(prompt));
|
|
518
|
+
const a = tokenizeZhEn(ours);
|
|
519
|
+
const b = tokenizeZhEn(theirs);
|
|
520
|
+
const sa = new Set(a), sb = new Set(b);
|
|
521
|
+
|
|
522
|
+
const overlapA = [...sa].filter(x => pt.has(x)).length;
|
|
523
|
+
const overlapB = [...sb].filter(x => pt.has(x)).length;
|
|
524
|
+
const lenA = a.length, lenB = b.length;
|
|
525
|
+
const divA = sa.size / Math.max(1, lenA);
|
|
526
|
+
const divB = sb.size / Math.max(1, lenB);
|
|
527
|
+
|
|
528
|
+
const scoreA = overlapA + Math.min(10, Math.sqrt(lenA)) + 3 * divA;
|
|
529
|
+
const scoreB = overlapB + Math.min(10, Math.sqrt(lenB)) + 3 * divB;
|
|
530
|
+
|
|
531
|
+
const norm = (x) => Math.max(0, Math.min(10, (x / 10) * 10));
|
|
532
|
+
return { ours: norm(scoreA), theirs: norm(scoreB), comment: 'heuristic' };
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
class AdversaryScheduler {
|
|
536
|
+
constructor(runtime, options = {}) {
|
|
537
|
+
this.runtime = runtime;
|
|
538
|
+
this.opts = Object.assign({
|
|
539
|
+
providerSpec: process.env.ADV_MODEL || 'ollama:llama3.1:405b',
|
|
540
|
+
judgeMode: (process.env.ADV_JUDGE || 'llm'),
|
|
541
|
+
intervalMs: 60_000,
|
|
542
|
+
batchSize: 3,
|
|
543
|
+
maxPromptLen: 220,
|
|
544
|
+
promptMode: process.env.ADV_PROMPT_MODE || 'mixed', // mixed|sessions|kvm|crawler|spider|vocab
|
|
545
|
+
logDir: path.join(__dirname, 'adversary_logs'),
|
|
546
|
+
adjustParams: true,
|
|
547
|
+
// 目标参数权重(相对重要性)
|
|
548
|
+
targetWeights: { decayK: 1.0, maxLen: 0.7, onlineWeight: 0.8, edgeWeight: 0.4, perQuery: 0.5, maxCrawl: 0.5 },
|
|
549
|
+
// 单步步幅
|
|
550
|
+
step: { decayK: -0.05, maxLen: +2, onlineWeight: +0.1, edgeWeight: +0.05, perQuery: +1, maxCrawl: +2 },
|
|
551
|
+
// 滑动窗口平滑
|
|
552
|
+
smoothAlpha: 0.6
|
|
553
|
+
}, options);
|
|
554
|
+
|
|
555
|
+
const { p, type } = buildProvider(this.opts.providerSpec);
|
|
556
|
+
this.providerType = type;
|
|
557
|
+
this.provider = p;
|
|
558
|
+
|
|
559
|
+
this.running = false;
|
|
560
|
+
this.timer = null;
|
|
561
|
+
this.stats = {
|
|
562
|
+
rounds: 0,
|
|
563
|
+
lastAt: 0,
|
|
564
|
+
lastReport: null,
|
|
565
|
+
provider: this.opts.providerSpec,
|
|
566
|
+
judgeMode: this.opts.judgeMode,
|
|
567
|
+
promptMode: this.opts.promptMode,
|
|
568
|
+
movingDiff: 0 // oursAvg - theirsAvg 的指数滑动
|
|
569
|
+
};
|
|
570
|
+
|
|
571
|
+
this.sampler = new PromptSampler(runtime);
|
|
572
|
+
this.tuner = new ParamTuner(runtime);
|
|
573
|
+
fs.mkdirSync(this.opts.logDir, { recursive: true });
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
setRuntime(runtime) {
|
|
577
|
+
this.runtime = runtime;
|
|
578
|
+
this.sampler = new PromptSampler(runtime);
|
|
579
|
+
this.tuner = new ParamTuner(runtime);
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
setPromptMode(mode) {
|
|
583
|
+
this.opts.promptMode = mode || 'mixed';
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
setTargets(targetWeights) {
|
|
587
|
+
this.opts.targetWeights = Object.assign({}, this.opts.targetWeights, targetWeights || {});
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
getStatus() {
|
|
591
|
+
return Object.assign({}, this.stats, {
|
|
592
|
+
running: this.running,
|
|
593
|
+
params: this.tuner.getSnapshot()
|
|
594
|
+
});
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
start() {
|
|
598
|
+
if (this.running) return;
|
|
599
|
+
if (!this.provider) {
|
|
600
|
+
console.warn('[ADVERSARY] 未配置外部模型,无法启动');
|
|
601
|
+
return;
|
|
602
|
+
}
|
|
603
|
+
this.running = true;
|
|
604
|
+
const loop = async () => {
|
|
605
|
+
if (!this.running) return;
|
|
606
|
+
try {
|
|
607
|
+
const report = await this.evaluateOnce();
|
|
608
|
+
this.stats.lastReport = report;
|
|
609
|
+
this.stats.lastAt = Date.now();
|
|
610
|
+
this.stats.rounds++;
|
|
611
|
+
} catch (e) {
|
|
612
|
+
console.warn('[ADVERSARY] 本轮失败:', e.message);
|
|
613
|
+
}
|
|
614
|
+
};
|
|
615
|
+
loop();
|
|
616
|
+
this.timer = setInterval(loop, this.opts.intervalMs);
|
|
617
|
+
console.log(`[ADVERSARY] started provider=${this.opts.providerSpec}, mode=${this.opts.judgeMode}, prompts=${this.opts.promptMode}`);
|
|
618
|
+
}
|
|
619
|
+
|
|
620
|
+
stop() {
|
|
621
|
+
this.running = false;
|
|
622
|
+
if (this.timer) clearInterval(this.timer);
|
|
623
|
+
this.timer = null;
|
|
624
|
+
console.log('[ADVERSARY] stopped');
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
async evaluateOnce(explicitPrompts = null) {
|
|
628
|
+
if (!this.provider) throw new Error('provider not ready');
|
|
629
|
+
let prompts = explicitPrompts && explicitPrompts.length ? explicitPrompts : this.sampler.buildMixedPrompts(this.opts.batchSize, this.opts.promptMode);
|
|
630
|
+
if (!prompts.length) prompts = this.samplePrompts(this.opts.batchSize); // 兜底旧逻辑
|
|
631
|
+
|
|
632
|
+
const items = [];
|
|
633
|
+
for (const prompt of prompts) {
|
|
634
|
+
const ours = await this.answerByRuntime(prompt);
|
|
635
|
+
const theirs = await this.provider.generate(prompt);
|
|
636
|
+
let judge = null;
|
|
637
|
+
if (this.opts.judgeMode === 'llm') {
|
|
638
|
+
try { judge = await this.provider.judge(prompt, ours, theirs); }
|
|
639
|
+
catch { judge = heuristicScore(prompt, ours, theirs); }
|
|
640
|
+
} else {
|
|
641
|
+
judge = heuristicScore(prompt, ours, theirs);
|
|
642
|
+
}
|
|
643
|
+
items.push({ prompt, ours, theirs, judge });
|
|
644
|
+
this.appendLog({ t: nowISO(), prompt, ours, theirs, judge, provider: this.opts.providerSpec });
|
|
645
|
+
await this.applyFeedback({ prompt, ours, theirs, judge });
|
|
646
|
+
await sleep(250);
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
const avg = (arr) => arr.reduce((a, b) => a + b, 0) / Math.max(1, arr.length);
|
|
650
|
+
const oursAvg = avg(items.map(i => i.judge.ours));
|
|
651
|
+
const theirsAvg = avg(items.map(i => i.judge.theirs));
|
|
652
|
+
const diff = oursAvg - theirsAvg;
|
|
653
|
+
|
|
654
|
+
// 指数滑动更新
|
|
655
|
+
this.stats.movingDiff = this.stats.rounds === 0
|
|
656
|
+
? diff
|
|
657
|
+
: this.opts.smoothAlpha * diff + (1 - this.opts.smoothAlpha) * this.stats.movingDiff;
|
|
658
|
+
|
|
659
|
+
// 参数调节:当 movingDiff < 0(我方落后)时,按权重向“更人类”方向微调
|
|
660
|
+
if (this.opts.adjustParams) {
|
|
661
|
+
const gain = Math.max(0.2, Math.min(1.0, Math.abs(this.stats.movingDiff))); // 0.2~1.0
|
|
662
|
+
const s = this.opts.step;
|
|
663
|
+
const tw = this.opts.targetWeights;
|
|
664
|
+
const delta = {
|
|
665
|
+
decayK: (this.stats.movingDiff < 0 ? (s.decayK || -0.05) : -(s.decayK || -0.05)) * (tw.decayK || 1) * gain,
|
|
666
|
+
maxLen: (this.stats.movingDiff < 0 ? (s.maxLen || 2) : -(s.maxLen || 2)) * (tw.maxLen || 1) * gain,
|
|
667
|
+
onlineWeight: (this.stats.movingDiff < 0 ? (s.onlineWeight || 0.1) : -(s.onlineWeight || 0.1)) * (tw.onlineWeight || 1) * gain,
|
|
668
|
+
edgeWeight: (this.stats.movingDiff < 0 ? (s.edgeWeight || 0.05) : -(s.edgeWeight || 0.05)) * (tw.edgeWeight || 1) * 0.5 * gain,
|
|
669
|
+
perQuery: (this.stats.movingDiff < 0 ? (s.perQuery || 1) : 0) * (tw.perQuery || 1) * 0.5,
|
|
670
|
+
maxCrawl: (this.stats.movingDiff < 0 ? (s.maxCrawl || 2) : 0) * (tw.maxCrawl || 1) * 0.5
|
|
671
|
+
};
|
|
672
|
+
this.tuner.applyDelta(delta);
|
|
673
|
+
this.appendLog({ t: nowISO(), type: 'param-adjust', delta, snapshot: this.tuner.getSnapshot() });
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
return { n: items.length, oursAvg, theirsAvg, diff, movingDiff: this.stats.movingDiff, items };
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
samplePrompts(n = 3) {
|
|
680
|
+
// 旧的兜底逻辑(保留)
|
|
681
|
+
const pool = [];
|
|
682
|
+
const kvm = this.runtime?.kvm?.memory || this.runtime?.kvm?.exportEntries?.() || [];
|
|
683
|
+
const entries = Array.isArray(kvm instanceof Map ? Array.from(kvm.entries()) : kvm) ? (kvm instanceof Map ? Array.from(kvm.entries()) : kvm) : [];
|
|
684
|
+
for (let i = 0; i < Math.min(50, entries.length); i++) {
|
|
685
|
+
const [key, words] = entries[i];
|
|
686
|
+
const ws = Array.isArray(words) ? words : [];
|
|
687
|
+
if (ws.length >= 3) pool.push(`请用通俗语言解释 ${uniq(ws).slice(0, 6).join('、')} 之间的关系。`);
|
|
688
|
+
}
|
|
689
|
+
const vocab = this.runtime?.vocabManager?.vocab || [];
|
|
690
|
+
for (let i = 0; i < 10 && i < vocab.length; i += 5) {
|
|
691
|
+
const group = uniq(vocab.slice(i, i + 10).filter(w => w && w.length > 2)).slice(0, 6);
|
|
692
|
+
if (group.length >= 3) pool.push(`围绕 ${group.join('、')},给出一段深入的科普说明。`);
|
|
693
|
+
}
|
|
694
|
+
pool.push('给我一个关于边界事件和滑动窗口分区图存储的通俗解释,并举例。');
|
|
695
|
+
pool.push('比较 llama3.1-405b 与传统GPT在长文结构化总结上的差异与优劣。');
|
|
696
|
+
const sliced = pool.map(p => p.slice(0, this.opts.maxPromptLen));
|
|
697
|
+
return sliced.sort(() => Math.random() - 0.5).slice(0, n);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
async answerByRuntime(prompt) {
|
|
701
|
+
const words = tokenizeZhEn(prompt);
|
|
702
|
+
try { this.runtime.processInput(words, { addNewWords: false }); } catch {}
|
|
703
|
+
const ans = await this.runtime.generateResponseWithMemes(words, this.runtime?.config?.maxLen || 16);
|
|
704
|
+
return typeof ans === 'string' ? ans : String(ans || '');
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
appendLog(obj) {
|
|
708
|
+
try {
|
|
709
|
+
const f = path.join(this.opts.logDir, `${new Date().toISOString().slice(0, 10)}.jsonl`);
|
|
710
|
+
fs.appendFileSync(f, JSON.stringify(obj) + '\n', 'utf-8');
|
|
711
|
+
} catch {}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
async applyFeedback({ prompt, ours, theirs, judge }) {
|
|
715
|
+
if (!this.opts.adjustParams || !judge) return;
|
|
716
|
+
const better = (judge.theirs || 0) > (judge.ours || 0) + 0.5;
|
|
717
|
+
if (better) {
|
|
718
|
+
const addWords = uniq(tokenizeZhEn(theirs)).filter(w => w.length >= 2).slice(0, 20);
|
|
719
|
+
if (addWords.length >= 3) {
|
|
720
|
+
const memeId = `meme_adv_${Date.now()}_${Math.floor(Math.random() * 1e4)}`;
|
|
721
|
+
try {
|
|
722
|
+
this.runtime.graph.addPoint(memeId, []);
|
|
723
|
+
this.runtime.kvm.set(memeId, addWords);
|
|
724
|
+
const baseWords = tokenizeZhEn(prompt).slice(0, 6);
|
|
725
|
+
for (const mw of baseWords) {
|
|
726
|
+
const exist = this.runtime.wordGraph.points.get(mw);
|
|
727
|
+
if (exist) {
|
|
728
|
+
this.runtime.graph.addEdge(memeId, mw, Math.max(1, Math.ceil(addWords.length / 5)), 2);
|
|
729
|
+
}
|
|
730
|
+
}
|
|
731
|
+
} catch {}
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
module.exports = {
|
|
738
|
+
AdversaryScheduler,
|
|
739
|
+
OpenAIProvider,
|
|
740
|
+
OllamaProvider,
|
|
741
|
+
AzureOpenAIProvider,
|
|
742
|
+
PromptBank,
|
|
743
|
+
PromptSampler,
|
|
744
|
+
ParamTuner
|
|
745
|
+
};
|