079project 2.0.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/crawler/agent.cjs +97 -0
- package/crawler/index.cjs +515 -0
- package/crawler/storage.cjs +163 -0
- package/forwarder.js +106 -37
- package/groupmanager.cjs +2 -1
- package/main_Serve.cjs +1281 -270
- package/main_Study.cjs +1731 -375
- package/package.json +6 -1
- package/robots/seeds.txt +2 -0
- package/schedule.cjs +745 -0
- package/todo-list.txt +0 -86
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const crypto = require('crypto');
|
|
4
|
+
const { planQueries, extractSerpLinks, relevanceScore } = require('./agent.cjs');
|
|
5
|
+
// ...existing code...
|
|
6
|
+
|
|
7
|
+
function randomIPv4() {
|
|
8
|
+
return Array(4).fill(0).map(() => Math.floor(Math.random() * 254) + 1).join('.');
|
|
9
|
+
}
|
|
10
|
+
class CrawlerStorage {
|
|
11
|
+
constructor(baseDir = path.join(__dirname, '..', 'crawler_data')) {
|
|
12
|
+
this.baseDir = baseDir;
|
|
13
|
+
this.stateFile = path.join(this.baseDir, 'state.json');
|
|
14
|
+
this.docsDir = path.join(this.baseDir, 'docs');
|
|
15
|
+
this.ensureDirs();
|
|
16
|
+
this.state = {
|
|
17
|
+
frontier: [], // 待抓取队列(url)
|
|
18
|
+
visited: {}, // urlHash -> timestamp
|
|
19
|
+
enqueued: {}, // urlHash -> 1
|
|
20
|
+
stats: { fetched: 0, saved: 0, deduped: 0, errors: 0, lastSave: 0 }
|
|
21
|
+
};
|
|
22
|
+
this._loadState();
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
ensureDirs() {
|
|
26
|
+
fs.mkdirSync(this.baseDir, { recursive: true });
|
|
27
|
+
fs.mkdirSync(this.docsDir, { recursive: true });
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
_hash(str) {
|
|
31
|
+
return crypto.createHash('sha1').update(String(str)).digest('hex');
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
_loadState() {
|
|
35
|
+
try {
|
|
36
|
+
if (fs.existsSync(this.stateFile)) {
|
|
37
|
+
const obj = JSON.parse(fs.readFileSync(this.stateFile, 'utf-8'));
|
|
38
|
+
if (obj && typeof obj === 'object') this.state = obj;
|
|
39
|
+
}
|
|
40
|
+
} catch (e) {
|
|
41
|
+
console.warn('[CRAWLER][STATE] load failed:', e.message);
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
_saveState() {
|
|
46
|
+
try {
|
|
47
|
+
fs.writeFileSync(this.stateFile, JSON.stringify(this.state, null, 2), 'utf-8');
|
|
48
|
+
this.state.stats.lastSave = Date.now();
|
|
49
|
+
} catch (e) {
|
|
50
|
+
console.warn('[CRAWLER][STATE] save failed:', e.message);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
addSeed(urls = []) {
|
|
55
|
+
let n = 0;
|
|
56
|
+
for (const u of urls) {
|
|
57
|
+
const url = String(u || '').trim();
|
|
58
|
+
if (!url || !/^https?:\/\//i.test(url)) continue;
|
|
59
|
+
const h = this._hash(url);
|
|
60
|
+
if (this.state.enqueued[h] || this.state.visited[h]) continue;
|
|
61
|
+
this.state.frontier.push(url);
|
|
62
|
+
this.state.enqueued[h] = 1;
|
|
63
|
+
n++;
|
|
64
|
+
}
|
|
65
|
+
if (n > 0) this._saveState();
|
|
66
|
+
return n;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
nextUrl() {
|
|
70
|
+
while (this.state.frontier.length > 0) {
|
|
71
|
+
const url = this.state.frontier.shift();
|
|
72
|
+
if (!url) continue;
|
|
73
|
+
const h = this._hash(url);
|
|
74
|
+
if (this.state.visited[h]) continue;
|
|
75
|
+
return url;
|
|
76
|
+
}
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
markVisited(url) {
|
|
81
|
+
const h = this._hash(url);
|
|
82
|
+
this.state.visited[h] = Date.now();
|
|
83
|
+
this._saveState();
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// 新链接入队(去重)
|
|
87
|
+
enqueueLinks(links = [], limitPerBatch = 200) {
|
|
88
|
+
let n = 0;
|
|
89
|
+
for (const l of links) {
|
|
90
|
+
if (!l || !/^https?:\/\//i.test(l)) continue;
|
|
91
|
+
const h = this._hash(l);
|
|
92
|
+
if (this.state.enqueued[h] || this.state.visited[h]) continue;
|
|
93
|
+
this.state.frontier.push(l);
|
|
94
|
+
this.state.enqueued[h] = 1;
|
|
95
|
+
n++;
|
|
96
|
+
if (n >= limitPerBatch) break;
|
|
97
|
+
}
|
|
98
|
+
if (n > 0) this._saveState();
|
|
99
|
+
return n;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// 保存清洗后的正文为 txt,返回保存路径(增强:记录 parent/depth)
|
|
103
|
+
saveDocument(doc) {
|
|
104
|
+
try {
|
|
105
|
+
const day = new Date();
|
|
106
|
+
const dir = path.join(this.docsDir,
|
|
107
|
+
`${day.getFullYear()}-${String(day.getMonth() + 1).padStart(2, '0')}-${String(day.getDate()).padStart(2, '0')}`
|
|
108
|
+
);
|
|
109
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
110
|
+
const name = `${Date.now()}_${Math.floor(Math.random() * 1e6)}.txt`;
|
|
111
|
+
const file = path.join(dir, name);
|
|
112
|
+
const meta = [
|
|
113
|
+
`URL: ${doc.url || ''}`,
|
|
114
|
+
`Title: ${doc.title || ''}`,
|
|
115
|
+
`FetchedAt: ${new Date().toISOString()}`,
|
|
116
|
+
`Lang: ${doc.lang || ''}`,
|
|
117
|
+
`Depth: ${doc.depth ?? 0}`,
|
|
118
|
+
`Parent: ${doc.parent || ''}`
|
|
119
|
+
].join('\n');
|
|
120
|
+
const body = (doc.text || '').trim();
|
|
121
|
+
if (!body) return null;
|
|
122
|
+
fs.writeFileSync(file, meta + '\n\n' + body, 'utf-8');
|
|
123
|
+
this.state.stats.saved++;
|
|
124
|
+
this._saveState();
|
|
125
|
+
return file;
|
|
126
|
+
} catch (e) {
|
|
127
|
+
this.state.stats.errors++;
|
|
128
|
+
this._saveState();
|
|
129
|
+
return null;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
stats() {
|
|
134
|
+
return Object.assign({}, this.state.stats, {
|
|
135
|
+
frontier: this.state.frontier.length,
|
|
136
|
+
visited: Object.keys(this.state.visited).length
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// 消费已保存文档,按批次读入返回文本数组
|
|
141
|
+
// 返回:[{ path, text }]
|
|
142
|
+
loadRecentDocs(maxFiles = 20) {
|
|
143
|
+
const d = this.docsDir;
|
|
144
|
+
if (!fs.existsSync(d)) return [];
|
|
145
|
+
const days = fs.readdirSync(d).filter(f => /^\d{4}-\d{2}-\d{2}$/.test(f)).sort().reverse();
|
|
146
|
+
const out = [];
|
|
147
|
+
for (const day of days) {
|
|
148
|
+
const dir = path.join(d, day);
|
|
149
|
+
const files = fs.readdirSync(dir).filter(f => f.endsWith('.txt')).sort().reverse();
|
|
150
|
+
for (const f of files) {
|
|
151
|
+
try {
|
|
152
|
+
const p = path.join(dir, f);
|
|
153
|
+
const text = fs.readFileSync(p, 'utf-8');
|
|
154
|
+
out.push({ path: p, text });
|
|
155
|
+
if (out.length >= maxFiles) return out;
|
|
156
|
+
} catch {}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return out;
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
module.exports = { CrawlerStorage };
|
package/forwarder.js
CHANGED
|
@@ -285,6 +285,43 @@ app.get('/', (req, res) => {
|
|
|
285
285
|
<button type="submit" class="btn btn-primary">应用参数</button>
|
|
286
286
|
<button type="button" id="resetParams" class="btn btn-secondary">重置默认值</button>
|
|
287
287
|
</div>
|
|
288
|
+
<hr class="my-4"/>
|
|
289
|
+
<h6 class="mb-3">激活与传递函数</h6>
|
|
290
|
+
<div class="row g-3">
|
|
291
|
+
<div class="col-md-6">
|
|
292
|
+
<label for="activationType" class="form-label">Activation Function</label>
|
|
293
|
+
<select id="activationType" class="form-select">
|
|
294
|
+
<option value="identity">identity</option>
|
|
295
|
+
<option value="relu" selected>relu</option>
|
|
296
|
+
<option value="leaky_relu">leaky_relu</option>
|
|
297
|
+
<option value="tanh">tanh</option>
|
|
298
|
+
<option value="sigmoid">sigmoid</option>
|
|
299
|
+
<option value="elu">elu</option>
|
|
300
|
+
<option value="softplus">softplus</option>
|
|
301
|
+
<option value="gelu">gelu</option>
|
|
302
|
+
<option value="custom">custom</option>
|
|
303
|
+
</select>
|
|
304
|
+
<small class="text-muted">Node 侧将对节点聚合与扩散值应用该激活</small>
|
|
305
|
+
<textarea id="activationCustom" class="form-control mt-2" rows="3" style="display:none;" placeholder="Custom activation JS. Example: return x > 0 ? x : 0;"></textarea>
|
|
306
|
+
</div>
|
|
307
|
+
<div class="col-md-6">
|
|
308
|
+
<label for="transferType" class="form-label">Transfer Function</label>
|
|
309
|
+
<select id="transferType" class="form-select">
|
|
310
|
+
<option value="linear" selected>linear</option>
|
|
311
|
+
<option value="exp">exp</option>
|
|
312
|
+
<option value="inverse">inverse</option>
|
|
313
|
+
<option value="capped">capped</option>
|
|
314
|
+
<option value="custom">custom</option>
|
|
315
|
+
</select>
|
|
316
|
+
<small class="text-muted">传递函数决定沿边传播的信号衰减方式</small>
|
|
317
|
+
<textarea id="transferCustom" class="form-control mt-2" rows="3" style="display:none;" placeholder="Custom transfer JS. Args: value, weight, decayK, ctx. Example: return value * Math.exp(-(decayK*weight));"></textarea>
|
|
318
|
+
</div>
|
|
319
|
+
</div>
|
|
320
|
+
|
|
321
|
+
<div class="d-flex justify-content-between mt-4">
|
|
322
|
+
<button type="submit" class="btn btn-primary">应用参数</button>
|
|
323
|
+
<button type="button" id="resetParams" class="btn btn-secondary">重置默认值</button>
|
|
324
|
+
</div>
|
|
288
325
|
|
|
289
326
|
</form>
|
|
290
327
|
</div>
|
|
@@ -534,6 +571,7 @@ app.get('/', (req, res) => {
|
|
|
534
571
|
document.getElementById('confirmDeleteSnapshot').addEventListener('click', function() {
|
|
535
572
|
deleteSnapshot();
|
|
536
573
|
});
|
|
574
|
+
bindActivationEditors(); // 新增:绑定选择器与自定义编辑显隐
|
|
537
575
|
});
|
|
538
576
|
|
|
539
577
|
// 发送消息
|
|
@@ -595,6 +633,8 @@ app.get('/', (req, res) => {
|
|
|
595
633
|
}
|
|
596
634
|
|
|
597
635
|
// 加载模型参数
|
|
636
|
+
|
|
637
|
+
// 加载模型参数(保持接口不变)
|
|
598
638
|
function loadModelParams() {
|
|
599
639
|
fetch('/api/model/params')
|
|
600
640
|
.then(response => response.json())
|
|
@@ -672,6 +712,13 @@ function updateParamSliders(params) {
|
|
|
672
712
|
document.getElementById('waitingTimeRange').value = params.waitingTime;
|
|
673
713
|
document.getElementById('waitingTimeValue').textContent = params.waitingTime;
|
|
674
714
|
}
|
|
715
|
+
if (params.activationType) document.getElementById('activationType').value = params.activationType;
|
|
716
|
+
if (params.transferType) document.getElementById('transferType').value = params.transferType;
|
|
717
|
+
if (typeof params.activationCustom === 'string') document.getElementById('activationCustom').value = params.activationCustom;
|
|
718
|
+
if (typeof params.transferCustom === 'string') document.getElementById('transferCustom').value = params.transferCustom;
|
|
719
|
+
|
|
720
|
+
// 触发一次显隐同步
|
|
721
|
+
if (typeof bindActivationEditors === 'function') bindActivationEditors();
|
|
675
722
|
} // 添加这个花括号
|
|
676
723
|
|
|
677
724
|
function bindSliderEvents() {
|
|
@@ -705,43 +752,47 @@ function bindSliderEvents() {
|
|
|
705
752
|
});
|
|
706
753
|
}
|
|
707
754
|
// 更新模型参数
|
|
708
|
-
|
|
709
|
-
|
|
710
|
-
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
755
|
+
// 更新模型参数:增加激活/传递函数字段
|
|
756
|
+
function updateModelParams() {
|
|
757
|
+
const params = {
|
|
758
|
+
decayFactor: parseFloat(document.getElementById('decayFactorRange').value),
|
|
759
|
+
maxMemeWords: parseInt(document.getElementById('maxMemeWordsRange').value),
|
|
760
|
+
minOverlapThreshold: parseInt(document.getElementById('minOverlapThresholdRange').value),
|
|
761
|
+
maliciousThreshold: parseFloat(document.getElementById('maliciousThresholdRange').value),
|
|
762
|
+
learningIterations: parseInt(document.getElementById('learningIterationsRange').value),
|
|
763
|
+
iteration: parseInt(document.getElementById('iterationRange').value),
|
|
764
|
+
threshold: parseInt(document.getElementById('thresholdRange').value),
|
|
765
|
+
decay: parseFloat(document.getElementById('decayRange').value),
|
|
766
|
+
decayK: parseFloat(document.getElementById('decayKRange').value),
|
|
767
|
+
maxLen: parseInt(document.getElementById('maxLenRange').value),
|
|
768
|
+
edgeWeight: parseFloat(document.getElementById('edgeWeightRange').value),
|
|
769
|
+
communicateCount: parseInt(document.getElementById('communicateCountRange').value),
|
|
770
|
+
waitingTime: parseInt(document.getElementById('waitingTime').value),
|
|
771
|
+
// 新增
|
|
772
|
+
activationType: document.getElementById('activationType').value,
|
|
773
|
+
transferType: document.getElementById('transferType').value,
|
|
774
|
+
activationCustom: document.getElementById('activationCustom').value,
|
|
775
|
+
transferCustom: document.getElementById('transferCustom').value
|
|
776
|
+
};
|
|
777
|
+
|
|
778
|
+
fetch('/api/model/params', {
|
|
779
|
+
method: 'POST',
|
|
780
|
+
headers: { 'Content-Type': 'application/json' },
|
|
781
|
+
body: JSON.stringify(params),
|
|
782
|
+
})
|
|
783
|
+
.then(response => response.json())
|
|
784
|
+
.then(data => {
|
|
785
|
+
if (data.success) {
|
|
786
|
+
alert('参数更新成功!');
|
|
787
|
+
} else {
|
|
788
|
+
alert('参数更新失败: ' + (data.error || '未知错误'));
|
|
789
|
+
}
|
|
790
|
+
})
|
|
791
|
+
.catch(error => {
|
|
792
|
+
console.error('Error updating parameters:', error);
|
|
793
|
+
alert('更新参数时发生错误: ' + error.message);
|
|
794
|
+
});
|
|
738
795
|
}
|
|
739
|
-
})
|
|
740
|
-
.catch(error => {
|
|
741
|
-
console.error('Error updating parameters:', error);
|
|
742
|
-
alert('更新参数时发生错误: ' + error.message);
|
|
743
|
-
});
|
|
744
|
-
}
|
|
745
796
|
|
|
746
797
|
// 重置模型参数
|
|
747
798
|
function resetModelParams() {
|
|
@@ -776,6 +827,19 @@ function updateModelParams() {
|
|
|
776
827
|
.catch(error => console.error('Error loading system status:', error));
|
|
777
828
|
}
|
|
778
829
|
|
|
830
|
+
function bindActivationEditors() {
|
|
831
|
+
const actSel = document.getElementById('activationType');
|
|
832
|
+
const actTxt = document.getElementById('activationCustom');
|
|
833
|
+
const trSel = document.getElementById('transferType');
|
|
834
|
+
const trTxt = document.getElementById('transferCustom');
|
|
835
|
+
const sync = () => {
|
|
836
|
+
actTxt.style.display = (actSel.value === 'custom') ? '' : 'none';
|
|
837
|
+
trTxt.style.display = (trSel.value === 'custom') ? '' : 'none';
|
|
838
|
+
};
|
|
839
|
+
actSel.addEventListener('change', sync);
|
|
840
|
+
trSel.addEventListener('change', sync);
|
|
841
|
+
sync();
|
|
842
|
+
}
|
|
779
843
|
function updateParamSliders(params) {
|
|
780
844
|
// 定义一个更新滑块的辅助函数
|
|
781
845
|
function updateSlider(paramName, sliderId, valueId) {
|
|
@@ -1080,7 +1144,12 @@ const modelDefaults = {
|
|
|
1080
1144
|
minOverlapThreshold: 2,
|
|
1081
1145
|
maliciousThreshold: 0.7,
|
|
1082
1146
|
learningIterations: 3,
|
|
1083
|
-
communicateCount: 1
|
|
1147
|
+
communicateCount: 1,
|
|
1148
|
+
// 新增:激活与传递函数选择
|
|
1149
|
+
activationType: 'relu',
|
|
1150
|
+
transferType: 'linear',
|
|
1151
|
+
activationCustom: '',
|
|
1152
|
+
transferCustom: ''
|
|
1084
1153
|
};
|
|
1085
1154
|
|
|
1086
1155
|
// 当前应用的模型参数
|