079project 2.0.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,163 @@
1
+ const fs = require('fs');
2
+ const path = require('path');
3
+ const crypto = require('crypto');
4
+ const { planQueries, extractSerpLinks, relevanceScore } = require('./agent.cjs');
5
+ // ...existing code...
6
+
7
+ function randomIPv4() {
8
+ return Array(4).fill(0).map(() => Math.floor(Math.random() * 254) + 1).join('.');
9
+ }
10
+ class CrawlerStorage {
11
+ constructor(baseDir = path.join(__dirname, '..', 'crawler_data')) {
12
+ this.baseDir = baseDir;
13
+ this.stateFile = path.join(this.baseDir, 'state.json');
14
+ this.docsDir = path.join(this.baseDir, 'docs');
15
+ this.ensureDirs();
16
+ this.state = {
17
+ frontier: [], // 待抓取队列(url)
18
+ visited: {}, // urlHash -> timestamp
19
+ enqueued: {}, // urlHash -> 1
20
+ stats: { fetched: 0, saved: 0, deduped: 0, errors: 0, lastSave: 0 }
21
+ };
22
+ this._loadState();
23
+ }
24
+
25
+ ensureDirs() {
26
+ fs.mkdirSync(this.baseDir, { recursive: true });
27
+ fs.mkdirSync(this.docsDir, { recursive: true });
28
+ }
29
+
30
+ _hash(str) {
31
+ return crypto.createHash('sha1').update(String(str)).digest('hex');
32
+ }
33
+
34
+ _loadState() {
35
+ try {
36
+ if (fs.existsSync(this.stateFile)) {
37
+ const obj = JSON.parse(fs.readFileSync(this.stateFile, 'utf-8'));
38
+ if (obj && typeof obj === 'object') this.state = obj;
39
+ }
40
+ } catch (e) {
41
+ console.warn('[CRAWLER][STATE] load failed:', e.message);
42
+ }
43
+ }
44
+
45
+ _saveState() {
46
+ try {
47
+ fs.writeFileSync(this.stateFile, JSON.stringify(this.state, null, 2), 'utf-8');
48
+ this.state.stats.lastSave = Date.now();
49
+ } catch (e) {
50
+ console.warn('[CRAWLER][STATE] save failed:', e.message);
51
+ }
52
+ }
53
+
54
+ addSeed(urls = []) {
55
+ let n = 0;
56
+ for (const u of urls) {
57
+ const url = String(u || '').trim();
58
+ if (!url || !/^https?:\/\//i.test(url)) continue;
59
+ const h = this._hash(url);
60
+ if (this.state.enqueued[h] || this.state.visited[h]) continue;
61
+ this.state.frontier.push(url);
62
+ this.state.enqueued[h] = 1;
63
+ n++;
64
+ }
65
+ if (n > 0) this._saveState();
66
+ return n;
67
+ }
68
+
69
+ nextUrl() {
70
+ while (this.state.frontier.length > 0) {
71
+ const url = this.state.frontier.shift();
72
+ if (!url) continue;
73
+ const h = this._hash(url);
74
+ if (this.state.visited[h]) continue;
75
+ return url;
76
+ }
77
+ return null;
78
+ }
79
+
80
+ markVisited(url) {
81
+ const h = this._hash(url);
82
+ this.state.visited[h] = Date.now();
83
+ this._saveState();
84
+ }
85
+
86
+ // 新链接入队(去重)
87
+ enqueueLinks(links = [], limitPerBatch = 200) {
88
+ let n = 0;
89
+ for (const l of links) {
90
+ if (!l || !/^https?:\/\//i.test(l)) continue;
91
+ const h = this._hash(l);
92
+ if (this.state.enqueued[h] || this.state.visited[h]) continue;
93
+ this.state.frontier.push(l);
94
+ this.state.enqueued[h] = 1;
95
+ n++;
96
+ if (n >= limitPerBatch) break;
97
+ }
98
+ if (n > 0) this._saveState();
99
+ return n;
100
+ }
101
+
102
+ // 保存清洗后的正文为 txt,返回保存路径(增强:记录 parent/depth)
103
+ saveDocument(doc) {
104
+ try {
105
+ const day = new Date();
106
+ const dir = path.join(this.docsDir,
107
+ `${day.getFullYear()}-${String(day.getMonth() + 1).padStart(2, '0')}-${String(day.getDate()).padStart(2, '0')}`
108
+ );
109
+ fs.mkdirSync(dir, { recursive: true });
110
+ const name = `${Date.now()}_${Math.floor(Math.random() * 1e6)}.txt`;
111
+ const file = path.join(dir, name);
112
+ const meta = [
113
+ `URL: ${doc.url || ''}`,
114
+ `Title: ${doc.title || ''}`,
115
+ `FetchedAt: ${new Date().toISOString()}`,
116
+ `Lang: ${doc.lang || ''}`,
117
+ `Depth: ${doc.depth ?? 0}`,
118
+ `Parent: ${doc.parent || ''}`
119
+ ].join('\n');
120
+ const body = (doc.text || '').trim();
121
+ if (!body) return null;
122
+ fs.writeFileSync(file, meta + '\n\n' + body, 'utf-8');
123
+ this.state.stats.saved++;
124
+ this._saveState();
125
+ return file;
126
+ } catch (e) {
127
+ this.state.stats.errors++;
128
+ this._saveState();
129
+ return null;
130
+ }
131
+ }
132
+
133
+ stats() {
134
+ return Object.assign({}, this.state.stats, {
135
+ frontier: this.state.frontier.length,
136
+ visited: Object.keys(this.state.visited).length
137
+ });
138
+ }
139
+
140
+ // 消费已保存文档,按批次读入返回文本数组
141
+ // 返回:[{ path, text }]
142
+ loadRecentDocs(maxFiles = 20) {
143
+ const d = this.docsDir;
144
+ if (!fs.existsSync(d)) return [];
145
+ const days = fs.readdirSync(d).filter(f => /^\d{4}-\d{2}-\d{2}$/.test(f)).sort().reverse();
146
+ const out = [];
147
+ for (const day of days) {
148
+ const dir = path.join(d, day);
149
+ const files = fs.readdirSync(dir).filter(f => f.endsWith('.txt')).sort().reverse();
150
+ for (const f of files) {
151
+ try {
152
+ const p = path.join(dir, f);
153
+ const text = fs.readFileSync(p, 'utf-8');
154
+ out.push({ path: p, text });
155
+ if (out.length >= maxFiles) return out;
156
+ } catch {}
157
+ }
158
+ }
159
+ return out;
160
+ }
161
+ }
162
+
163
+ module.exports = { CrawlerStorage };
package/forwarder.js CHANGED
@@ -285,6 +285,43 @@ app.get('/', (req, res) => {
285
285
  <button type="submit" class="btn btn-primary">应用参数</button>
286
286
  <button type="button" id="resetParams" class="btn btn-secondary">重置默认值</button>
287
287
  </div>
288
+ <hr class="my-4"/>
289
+ <h6 class="mb-3">激活与传递函数</h6>
290
+ <div class="row g-3">
291
+ <div class="col-md-6">
292
+ <label for="activationType" class="form-label">Activation Function</label>
293
+ <select id="activationType" class="form-select">
294
+ <option value="identity">identity</option>
295
+ <option value="relu" selected>relu</option>
296
+ <option value="leaky_relu">leaky_relu</option>
297
+ <option value="tanh">tanh</option>
298
+ <option value="sigmoid">sigmoid</option>
299
+ <option value="elu">elu</option>
300
+ <option value="softplus">softplus</option>
301
+ <option value="gelu">gelu</option>
302
+ <option value="custom">custom</option>
303
+ </select>
304
+ <small class="text-muted">Node 侧将对节点聚合与扩散值应用该激活</small>
305
+ <textarea id="activationCustom" class="form-control mt-2" rows="3" style="display:none;" placeholder="Custom activation JS. Example: return x > 0 ? x : 0;"></textarea>
306
+ </div>
307
+ <div class="col-md-6">
308
+ <label for="transferType" class="form-label">Transfer Function</label>
309
+ <select id="transferType" class="form-select">
310
+ <option value="linear" selected>linear</option>
311
+ <option value="exp">exp</option>
312
+ <option value="inverse">inverse</option>
313
+ <option value="capped">capped</option>
314
+ <option value="custom">custom</option>
315
+ </select>
316
+ <small class="text-muted">传递函数决定沿边传播的信号衰减方式</small>
317
+ <textarea id="transferCustom" class="form-control mt-2" rows="3" style="display:none;" placeholder="Custom transfer JS. Args: value, weight, decayK, ctx. Example: return value * Math.exp(-(decayK*weight));"></textarea>
318
+ </div>
319
+ </div>
320
+
321
+ <div class="d-flex justify-content-between mt-4">
322
+ <button type="submit" class="btn btn-primary">应用参数</button>
323
+ <button type="button" id="resetParams" class="btn btn-secondary">重置默认值</button>
324
+ </div>
288
325
 
289
326
  </form>
290
327
  </div>
@@ -534,6 +571,7 @@ app.get('/', (req, res) => {
534
571
  document.getElementById('confirmDeleteSnapshot').addEventListener('click', function() {
535
572
  deleteSnapshot();
536
573
  });
574
+ bindActivationEditors(); // 新增:绑定选择器与自定义编辑显隐
537
575
  });
538
576
 
539
577
  // 发送消息
@@ -595,6 +633,8 @@ app.get('/', (req, res) => {
595
633
  }
596
634
 
597
635
  // 加载模型参数
636
+
637
+ // 加载模型参数(保持接口不变)
598
638
  function loadModelParams() {
599
639
  fetch('/api/model/params')
600
640
  .then(response => response.json())
@@ -672,6 +712,13 @@ function updateParamSliders(params) {
672
712
  document.getElementById('waitingTimeRange').value = params.waitingTime;
673
713
  document.getElementById('waitingTimeValue').textContent = params.waitingTime;
674
714
  }
715
+ if (params.activationType) document.getElementById('activationType').value = params.activationType;
716
+ if (params.transferType) document.getElementById('transferType').value = params.transferType;
717
+ if (typeof params.activationCustom === 'string') document.getElementById('activationCustom').value = params.activationCustom;
718
+ if (typeof params.transferCustom === 'string') document.getElementById('transferCustom').value = params.transferCustom;
719
+
720
+ // 触发一次显隐同步
721
+ if (typeof bindActivationEditors === 'function') bindActivationEditors();
675
722
  } // 添加这个花括号
676
723
 
677
724
  function bindSliderEvents() {
@@ -705,43 +752,47 @@ function bindSliderEvents() {
705
752
  });
706
753
  }
707
754
  // 更新模型参数
708
- function updateModelParams() {
709
- const params = {
710
- decayFactor: parseFloat(document.getElementById('decayFactorRange').value),
711
- maxMemeWords: parseInt(document.getElementById('maxMemeWordsRange').value),
712
- minOverlapThreshold: parseInt(document.getElementById('minOverlapThresholdRange').value),
713
- maliciousThreshold: parseFloat(document.getElementById('maliciousThresholdRange').value),
714
- learningIterations: parseInt(document.getElementById('learningIterationsRange').value),
715
- iteration: parseInt(document.getElementById('iterationRange').value),
716
- threshold: parseInt(document.getElementById('thresholdRange').value),
717
- decay: parseFloat(document.getElementById('decayRange').value),
718
- decayK: parseFloat(document.getElementById('decayKRange').value),
719
- maxLen: parseInt(document.getElementById('maxLenRange').value),
720
- edgeWeight: parseFloat(document.getElementById('edgeWeightRange').value),
721
- communicateCount: parseInt(document.getElementById('communicateCountRange').value),
722
- waitingTime: parseInt(document.getElementById('waitingTime').value) // 添加这个参数
723
- };
724
-
725
- fetch('/api/model/params', {
726
- method: 'POST',
727
- headers: {
728
- 'Content-Type': 'application/json',
729
- },
730
- body: JSON.stringify(params),
731
- })
732
- .then(response => response.json())
733
- .then(data => {
734
- if (data.success) {
735
- alert('参数更新成功!');
736
- } else {
737
- alert('参数更新失败: ' + (data.error || '未知错误'));
755
+ // 更新模型参数:增加激活/传递函数字段
756
+ function updateModelParams() {
757
+ const params = {
758
+ decayFactor: parseFloat(document.getElementById('decayFactorRange').value),
759
+ maxMemeWords: parseInt(document.getElementById('maxMemeWordsRange').value),
760
+ minOverlapThreshold: parseInt(document.getElementById('minOverlapThresholdRange').value),
761
+ maliciousThreshold: parseFloat(document.getElementById('maliciousThresholdRange').value),
762
+ learningIterations: parseInt(document.getElementById('learningIterationsRange').value),
763
+ iteration: parseInt(document.getElementById('iterationRange').value),
764
+ threshold: parseInt(document.getElementById('thresholdRange').value),
765
+ decay: parseFloat(document.getElementById('decayRange').value),
766
+ decayK: parseFloat(document.getElementById('decayKRange').value),
767
+ maxLen: parseInt(document.getElementById('maxLenRange').value),
768
+ edgeWeight: parseFloat(document.getElementById('edgeWeightRange').value),
769
+ communicateCount: parseInt(document.getElementById('communicateCountRange').value),
770
+ waitingTime: parseInt(document.getElementById('waitingTime').value),
771
+ // 新增
772
+ activationType: document.getElementById('activationType').value,
773
+ transferType: document.getElementById('transferType').value,
774
+ activationCustom: document.getElementById('activationCustom').value,
775
+ transferCustom: document.getElementById('transferCustom').value
776
+ };
777
+
778
+ fetch('/api/model/params', {
779
+ method: 'POST',
780
+ headers: { 'Content-Type': 'application/json' },
781
+ body: JSON.stringify(params),
782
+ })
783
+ .then(response => response.json())
784
+ .then(data => {
785
+ if (data.success) {
786
+ alert('参数更新成功!');
787
+ } else {
788
+ alert('参数更新失败: ' + (data.error || '未知错误'));
789
+ }
790
+ })
791
+ .catch(error => {
792
+ console.error('Error updating parameters:', error);
793
+ alert('更新参数时发生错误: ' + error.message);
794
+ });
738
795
  }
739
- })
740
- .catch(error => {
741
- console.error('Error updating parameters:', error);
742
- alert('更新参数时发生错误: ' + error.message);
743
- });
744
- }
745
796
 
746
797
  // 重置模型参数
747
798
  function resetModelParams() {
@@ -776,6 +827,19 @@ function updateModelParams() {
776
827
  .catch(error => console.error('Error loading system status:', error));
777
828
  }
778
829
 
830
+ function bindActivationEditors() {
831
+ const actSel = document.getElementById('activationType');
832
+ const actTxt = document.getElementById('activationCustom');
833
+ const trSel = document.getElementById('transferType');
834
+ const trTxt = document.getElementById('transferCustom');
835
+ const sync = () => {
836
+ actTxt.style.display = (actSel.value === 'custom') ? '' : 'none';
837
+ trTxt.style.display = (trSel.value === 'custom') ? '' : 'none';
838
+ };
839
+ actSel.addEventListener('change', sync);
840
+ trSel.addEventListener('change', sync);
841
+ sync();
842
+ }
779
843
  function updateParamSliders(params) {
780
844
  // 定义一个更新滑块的辅助函数
781
845
  function updateSlider(paramName, sliderId, valueId) {
@@ -1080,7 +1144,12 @@ const modelDefaults = {
1080
1144
  minOverlapThreshold: 2,
1081
1145
  maliciousThreshold: 0.7,
1082
1146
  learningIterations: 3,
1083
- communicateCount: 1 // 新增
1147
+ communicateCount: 1,
1148
+ // 新增:激活与传递函数选择
1149
+ activationType: 'relu',
1150
+ transferType: 'linear',
1151
+ activationCustom: '',
1152
+ transferCustom: ''
1084
1153
  };
1085
1154
 
1086
1155
  // 当前应用的模型参数
package/groupmanager.cjs CHANGED
@@ -51,7 +51,8 @@ const REQUIRED_DIRS = [
51
51
  'robots',
52
52
  'tests',
53
53
  'public',
54
- 'snapshots'
54
+ 'snapshots',
55
+ 'crawler'
55
56
  ];
56
57
 
57
58
  // 检查与准备目标 groups 文件夹