079project 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/GroupStarter.cjs +647 -0
- package/LICENSE +165 -0
- package/PropagateSignalUseJsWorker.js +92 -0
- package/README.md +102 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/README.md +52 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/README.zh_CN.md +59 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/RedisService.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cygcrypto-3.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cyggcc_s-seh-1.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cygssl-3.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cygstdc++-6.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cygwin1.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/cygz.dll +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/dump.rdb +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/install_redis_service.bat +100 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-benchmark.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-check-aof.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-check-rdb.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-cli.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-full.conf +376 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-sentinel.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis-server.exe +0 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/redis.conf +2348 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/sentinel.conf +361 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/start.bat +4 -0
- package/Redis-8.0.3-Windows-x64-cygwin-with-Service/uninstall_redis_service.bat +30 -0
- package/boot.py +51 -0
- package/chat_Client.js +29 -0
- package/controller.cjs +118 -0
- package/enhancedForwarder.js +378 -0
- package/forwarder.js +1456 -0
- package/groupmanager.cjs +143 -0
- package/howToStart.txt +8 -0
- package/lemma.csv +210 -0
- package/load.py +35 -0
- package/mainManager.cjs +81 -0
- package/mainStarter.cjs +535 -0
- package/main_Serve.cjs +2745 -0
- package/main_Study.cjs +3230 -0
- package/memeMergeWorker.cjs +55 -0
- package/model_RNN.py +117 -0
- package/note.txt +5 -0
- package/notebook.txt +8 -0
- package/npminstall-debug.log +206 -0
- package/package.json +48 -0
- package/public/chat_straight.html +90 -0
- package/public/index.html +247 -0
- package/public/indexmain.html +136 -0
- package/public/monitor.html +194 -0
- package/robots/wikitext-something.txt +25 -0
- package/runtime.proto +24 -0
- package/runtime_data.json +766294 -0
- package/serializer_seq2seq.h5 +0 -0
- package/start.js +46 -0
- package/tests/test_FIrststep1.txt +1224 -0
- package/tests/test_FIrststep2.txt +2956 -0
- package/tests/test_FIrststep3.txt +1224 -0
- package/tests/test_FIrststep4.txt +1396 -0
- package/tests/test_FIrststep5.txt +2852 -0
- package/tests/test_FIrststep6.txt +1516 -0
- package/tests/test_FirstStep7.txt +1748 -0
- package/tests/test_Firstsetp8.txt +2672 -0
- package/tokenizer.json +1 -0
- package/vocabularySplitter.js +253 -0
- package/wikitext/.gitattributes +27 -0
- package/wikitext/README.md +344 -0
- package/wikitext/describtion.txt +1 -0
package/groupmanager.cjs
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
const fs = require('fs');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
|
|
4
|
+
// 轻量参数解析
|
|
5
|
+
function parseArgs(argv) {
|
|
6
|
+
const out = {};
|
|
7
|
+
for (let i = 0; i < argv.length; i++) {
|
|
8
|
+
const a = argv[i];
|
|
9
|
+
if (a.startsWith('--')) {
|
|
10
|
+
const key = a.slice(2);
|
|
11
|
+
const val = argv[i + 1] && !argv[i + 1].startsWith('--') ? argv[++i] : true;
|
|
12
|
+
out[key] = val;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
return out;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
const args = parseArgs(process.argv.slice(2));
|
|
19
|
+
const SOURCE_ROOT = path.resolve(args['source-root'] || __dirname);
|
|
20
|
+
const TARGET_GROUPS_DIR = path.resolve(args['groups-dir'] || path.join(__dirname, 'groups'));
|
|
21
|
+
const ROBOTS_DIR = path.resolve(args['robots-dir'] || path.join(SOURCE_ROOT, 'robots'));
|
|
22
|
+
const TESTS_DIR = path.resolve(args['tests-dir'] || path.join(SOURCE_ROOT, 'tests'));
|
|
23
|
+
const PUBLIC_DIR = path.resolve(args['public-dir'] || path.join(SOURCE_ROOT, 'public'));
|
|
24
|
+
const SNAPSHOTS_DIR = path.resolve(args['snapshots-dir'] || path.join(SOURCE_ROOT, 'snapshots'));
|
|
25
|
+
const MAX_SIZE_KB = Number(args['shard-size-kb'] || 250); // 默认每分片约250KB
|
|
26
|
+
const CLEAN = Boolean(args['clean'] && String(args['clean']).toLowerCase() !== 'false');
|
|
27
|
+
|
|
28
|
+
console.log('[CFG] source-root =', SOURCE_ROOT);
|
|
29
|
+
console.log('[CFG] robots-dir =', ROBOTS_DIR);
|
|
30
|
+
console.log('[CFG] tests-dir =', TESTS_DIR);
|
|
31
|
+
console.log('[CFG] public-dir =', PUBLIC_DIR);
|
|
32
|
+
console.log('[CFG] snapshots =', SNAPSHOTS_DIR);
|
|
33
|
+
console.log('[CFG] groups-dir =', TARGET_GROUPS_DIR);
|
|
34
|
+
console.log('[CFG] shard-size =', MAX_SIZE_KB, 'KB');
|
|
35
|
+
|
|
36
|
+
// 必要文件和文件夹
|
|
37
|
+
const REQUIRED_FILES = [
|
|
38
|
+
'main_Serve.cjs',
|
|
39
|
+
'main_Study.cjs',
|
|
40
|
+
'forwarder.js',
|
|
41
|
+
'controller.cjs',
|
|
42
|
+
'model_RNN.py',
|
|
43
|
+
'propagateSignalUseJsWroker.js',
|
|
44
|
+
'memeMergeWorker.cjs',
|
|
45
|
+
'boot.py',
|
|
46
|
+
'model_RNN.py',
|
|
47
|
+
'runtime.proto',
|
|
48
|
+
'lemma.csv'
|
|
49
|
+
];
|
|
50
|
+
const REQUIRED_DIRS = [
|
|
51
|
+
'robots',
|
|
52
|
+
'tests',
|
|
53
|
+
'public',
|
|
54
|
+
'snapshots'
|
|
55
|
+
];
|
|
56
|
+
|
|
57
|
+
// 检查与准备目标 groups 文件夹
|
|
58
|
+
if (!fs.existsSync(TARGET_GROUPS_DIR)) {
|
|
59
|
+
fs.mkdirSync(TARGET_GROUPS_DIR, { recursive: true });
|
|
60
|
+
console.log('已创建 groups 文件夹:', TARGET_GROUPS_DIR);
|
|
61
|
+
} else if (CLEAN) {
|
|
62
|
+
// 清理旧分组
|
|
63
|
+
for (const f of fs.readdirSync(TARGET_GROUPS_DIR)) {
|
|
64
|
+
if (/^group_\d+$/.test(f)) {
|
|
65
|
+
const p = path.join(TARGET_GROUPS_DIR, f);
|
|
66
|
+
try { fs.rmSync(p, { recursive: true, force: true }); } catch {}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
console.log('已清理旧分片 group_*');
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// 读取 robots 目录下所有 .txt 文件并拼接内容
|
|
73
|
+
if (!fs.existsSync(ROBOTS_DIR)) {
|
|
74
|
+
console.error('[ERROR] robots 目录不存在:', ROBOTS_DIR);
|
|
75
|
+
process.exit(1);
|
|
76
|
+
}
|
|
77
|
+
const robotFiles = fs.readdirSync(ROBOTS_DIR).filter(f => f.toLowerCase().endsWith('.txt'));
|
|
78
|
+
if (robotFiles.length === 0) {
|
|
79
|
+
console.warn('[WARN] robots 目录未发现 .txt 文件:', ROBOTS_DIR);
|
|
80
|
+
}
|
|
81
|
+
let allLines = [];
|
|
82
|
+
for (const file of robotFiles) {
|
|
83
|
+
const filePath = path.join(ROBOTS_DIR, file);
|
|
84
|
+
const lines = fs.readFileSync(filePath, 'utf-8')
|
|
85
|
+
.split(/\r?\n/)
|
|
86
|
+
.filter(line => line.length > 0);
|
|
87
|
+
for (const line of lines) allLines.push(line);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// 按指定KB分割,每行1句
|
|
91
|
+
const MAX_SIZE = MAX_SIZE_KB * 1024;
|
|
92
|
+
let shards = [];
|
|
93
|
+
let currentShard = [];
|
|
94
|
+
let currentSize = 0;
|
|
95
|
+
for (const line of allLines) {
|
|
96
|
+
const lineSize = Buffer.byteLength(line, 'utf-8') + 1; // +1 for newline
|
|
97
|
+
if (currentSize + lineSize > MAX_SIZE && currentShard.length > 0) {
|
|
98
|
+
shards.push(currentShard);
|
|
99
|
+
currentShard = [line];
|
|
100
|
+
currentSize = lineSize;
|
|
101
|
+
} else {
|
|
102
|
+
currentShard.push(line);
|
|
103
|
+
currentSize += lineSize;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
if (currentShard.length > 0) shards.push(currentShard);
|
|
107
|
+
|
|
108
|
+
console.log(`共分割为 ${shards.length} 个 group,每组约${MAX_SIZE_KB}KB`);
|
|
109
|
+
|
|
110
|
+
// 复制文件和文件夹到每个 group_${x} 文件夹,并只包含该分片的 robots
|
|
111
|
+
for (let i = 0; i < shards.length; i++) {
|
|
112
|
+
const groupFolder = path.join(TARGET_GROUPS_DIR, `group_${i}`);
|
|
113
|
+
if (!fs.existsSync(groupFolder)) fs.mkdirSync(groupFolder, { recursive: true });
|
|
114
|
+
|
|
115
|
+
// 复制必要文件(从 SOURCE_ROOT 获取)
|
|
116
|
+
for (const file of REQUIRED_FILES) {
|
|
117
|
+
const src = path.join(SOURCE_ROOT, file);
|
|
118
|
+
const dest = path.join(groupFolder, file);
|
|
119
|
+
if (fs.existsSync(src)) { try { fs.copyFileSync(src, dest); } catch {} }
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// 复制必要文件夹(按参数映射)
|
|
123
|
+
const DIR_MAP = { robots: ROBOTS_DIR, tests: TESTS_DIR, public: PUBLIC_DIR, snapshots: SNAPSHOTS_DIR };
|
|
124
|
+
for (const dir of REQUIRED_DIRS) {
|
|
125
|
+
const srcDir = DIR_MAP[dir] || path.join(SOURCE_ROOT, dir);
|
|
126
|
+
const destDir = path.join(groupFolder, dir);
|
|
127
|
+
if (!fs.existsSync(srcDir)) continue;
|
|
128
|
+
if (!fs.existsSync(destDir)) fs.mkdirSync(destDir, { recursive: true });
|
|
129
|
+
if (dir === 'robots') {
|
|
130
|
+
const robotsTxt = path.join(destDir, `group_${i}.txt`);
|
|
131
|
+
try { fs.writeFileSync(robotsTxt, shards[i].join('\n')); } catch {}
|
|
132
|
+
} else {
|
|
133
|
+
const files = fs.readdirSync(srcDir);
|
|
134
|
+
for (const f of files) {
|
|
135
|
+
const srcFile = path.join(srcDir, f);
|
|
136
|
+
const destFile = path.join(destDir, f);
|
|
137
|
+
try { if (fs.statSync(srcFile).isFile()) fs.copyFileSync(srcFile, destFile); } catch {}
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
console.log(`已创建并初始化 ${groupFolder}`);
|
|
142
|
+
}
|
|
143
|
+
console.log('所有 group 初始化完成');
|
package/howToStart.txt
ADDED
package/lemma.csv
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
abandon,abandons,abandoned,abandoned,abandoning
|
|
2
|
+
accept,accepts,accepted,accepted,accepting
|
|
3
|
+
accompany,accompanies,accompanied,accompanied,accompanying
|
|
4
|
+
achieve,achieves,achieved,achieved,achieving
|
|
5
|
+
act,acts,acted,acted,acting
|
|
6
|
+
add,adds,added,added,adding
|
|
7
|
+
admire,admires,admired,admired,admiring
|
|
8
|
+
admit,admits,admitted,admitted,admitting
|
|
9
|
+
advise,advises,advised,advised,advising
|
|
10
|
+
afford,affords,afforded,afforded,affording
|
|
11
|
+
agree,agrees,agreed,agreed,agreeing
|
|
12
|
+
allow,allows,allowed,allowed,allowing
|
|
13
|
+
appear,appears,appeared,appeared,appearing
|
|
14
|
+
applaud,applauds,applauded,applauded,applauding
|
|
15
|
+
apply,applies,applied,applied,applying
|
|
16
|
+
approach,approaches,approached,approached,approaching
|
|
17
|
+
approve,approves,approved,approved,approving
|
|
18
|
+
argue,argues,argued,argued,arguing
|
|
19
|
+
arrange,arranges,arranged,arranged,arranging
|
|
20
|
+
arrest,arrests,arrested,arrested,arresting
|
|
21
|
+
ask,asks,asked,asked,asking
|
|
22
|
+
attach,attaches,attached,attached,attaching
|
|
23
|
+
attack,attacks,attacked,attacked,attacking
|
|
24
|
+
attempt,attempts,attempted,attempted,attempting
|
|
25
|
+
attend,attends,attended,attended,attending
|
|
26
|
+
attract,attracts,attracted,attracted,attracting
|
|
27
|
+
avoid,avoids,avoided,avoided,avoiding
|
|
28
|
+
be,is,was,been,being
|
|
29
|
+
be,are,were,been,being
|
|
30
|
+
be,am,was,been,being
|
|
31
|
+
bear,bears,bore,borne,bearing
|
|
32
|
+
beat,beats,beat,beaten,beating
|
|
33
|
+
become,becomes,became,become,becoming
|
|
34
|
+
begin,begins,began,begun,beginning
|
|
35
|
+
behave,behaves,behaved,behaved,behaving
|
|
36
|
+
belong,belongs,belonged,belonged,belonging
|
|
37
|
+
bend,bends,bent,bent,bending
|
|
38
|
+
bet,bets,bet,bet,betting
|
|
39
|
+
bid,bids,bid,bid,bidding
|
|
40
|
+
bind,binds,bound,bound,binding
|
|
41
|
+
bite,bites,bite,bitten,biting
|
|
42
|
+
bleed,bleeds,bleed,bled,bleeding
|
|
43
|
+
blow,blows,blew,blown,blowing
|
|
44
|
+
break,breaks,broke,broken,breaking
|
|
45
|
+
breed,breeds,bred,bred,breeding
|
|
46
|
+
bring,brings,brought,brought,bringing
|
|
47
|
+
broadcast,broadcasts,broadcast,broadcast,broadcasting
|
|
48
|
+
build,builds,built,built,building
|
|
49
|
+
burn,burns,burnt/burned,burnt/burned,burning
|
|
50
|
+
burst,bursts,burst,burst,bursting
|
|
51
|
+
buy,buys,bought,bought,buying
|
|
52
|
+
call,calls,call,call,called
|
|
53
|
+
catch,catches,caught,caught,catching
|
|
54
|
+
cause,causes,caused,caused,causing
|
|
55
|
+
change,changes,changed,changed,changing
|
|
56
|
+
charge,charges,charged,charged,charging
|
|
57
|
+
check,checks,checked,checked,checking
|
|
58
|
+
choose,chooses,chose,chosen,choosing
|
|
59
|
+
circle,circles,circled,circled,circling
|
|
60
|
+
claim,claims,claimed,claimed,claiming
|
|
61
|
+
clap,claps,clapped,clapped,clapping
|
|
62
|
+
clean,cleans,cleaned,cleaned,cleaning
|
|
63
|
+
clear,clears,cleared,cleared,clearing
|
|
64
|
+
climb,climbs,climbed,climbed,climbing
|
|
65
|
+
close,closes,closed,closed,closing
|
|
66
|
+
collect,collects,collected,collected,collecting
|
|
67
|
+
come,comes,came,come,coming
|
|
68
|
+
cost,costs,cost,cost,costing
|
|
69
|
+
count,counts,counted,counted,counting
|
|
70
|
+
cover,covers,covered,covered,covering
|
|
71
|
+
crash,crashes,crashed,crashed,crashing
|
|
72
|
+
creep,creeps,crept,crept,creeping
|
|
73
|
+
cut,cuts,cut,cut,cutting
|
|
74
|
+
dance,dances,danced,danced,dancing
|
|
75
|
+
deal,deals,dealt,dealt,dealing
|
|
76
|
+
decide,decides,decided,decided,deciding
|
|
77
|
+
declare,declares,declared,declared,declaring
|
|
78
|
+
decorate,decorates,decorated,decorated,decorating
|
|
79
|
+
delay,delay,delayed,delayed,delaying
|
|
80
|
+
depend,depends,depended,depended,depending
|
|
81
|
+
describe,describes,described,described,describing
|
|
82
|
+
destroy,destroys,destroyed,destroyed,destroying
|
|
83
|
+
develop,develops,developed,developed,developing
|
|
84
|
+
die,dies,died,died,dying
|
|
85
|
+
disagree,disagrees,disagreed,disagreed,disagreeing
|
|
86
|
+
disappear,disappears,disappeared,disappeared,disappearing
|
|
87
|
+
discover,discover,discovered,discovered,discovering
|
|
88
|
+
discuss,discusses,discussed,discussed,discussing
|
|
89
|
+
divide,divides,divided,divided,dividing
|
|
90
|
+
do,does,did,done,doing
|
|
91
|
+
draw,draws,drew,drawn,drawing
|
|
92
|
+
dream,dreams,dreamt/dreamed,dreamt/dreamed,dreaming
|
|
93
|
+
drink,drinks,drank,drunk,drinking
|
|
94
|
+
drive,drives,drove,driven,driving
|
|
95
|
+
drop,drops,dropped,dropped,dropping
|
|
96
|
+
dry,dries,dried,dried,drying
|
|
97
|
+
eat,eats,ate,eaten,eating
|
|
98
|
+
fall,fall,fall,fallen,falling
|
|
99
|
+
feel,feels,felt,felt,feeling
|
|
100
|
+
fight,fights,fought,fought,fighting
|
|
101
|
+
find,finds,found,found,finding
|
|
102
|
+
finish,finishes,finished,finished,finishing
|
|
103
|
+
fish,fishes,fished,fished,fishing
|
|
104
|
+
fit,fit,fit,fit,fitting
|
|
105
|
+
fly,flies,flew,flown,flying
|
|
106
|
+
forget,forgets,forgot,forgotten,forgetting
|
|
107
|
+
forgive,forgives,forgave,forgiven,forgiving
|
|
108
|
+
freeze,freezes,froze,frozen,freezing
|
|
109
|
+
get,gets,got,got,getting
|
|
110
|
+
give,gives,gave,given,giving
|
|
111
|
+
go,goes,went,gone,going
|
|
112
|
+
grow,grows,grew,grown,growing
|
|
113
|
+
hang,hangs,hung,hung,hanging
|
|
114
|
+
have,has,had,had,having
|
|
115
|
+
hear,hears,heard,heard,hearing
|
|
116
|
+
hide,hides,hide,hid,hidden,hiding
|
|
117
|
+
hit,hits,hit,hit,hitting
|
|
118
|
+
hold,holds,held,held,holding
|
|
119
|
+
hurt,hurts,hurt,hurt,hurting
|
|
120
|
+
keep,keeps,kept,kept,keeping
|
|
121
|
+
kick,kicks,kicked,kicked,kicking
|
|
122
|
+
kill,kills,killed,killed,killing
|
|
123
|
+
kiss,kisses,kissed,kissed,kissing
|
|
124
|
+
know,knows,knew,known,knowing
|
|
125
|
+
lay,lays,laid,laid,laying
|
|
126
|
+
lead,leads,led,led,leading
|
|
127
|
+
learn,learns,learned/learnt,learned/learnt,learning
|
|
128
|
+
leave,leaves,left,left,leaving
|
|
129
|
+
lend,lends,lent,lent,lending
|
|
130
|
+
let,lets,let,let,letting
|
|
131
|
+
lie,lies,lay,lain,lying
|
|
132
|
+
light,lights,lit/lighted,lit/lighted,lighting
|
|
133
|
+
like,likes,liked,liked,liking
|
|
134
|
+
lose,loses,lost,lost,losing
|
|
135
|
+
make,makes,made,made,making
|
|
136
|
+
mean,means,meant,meant,meaning
|
|
137
|
+
meet,meets,met,met,meeting
|
|
138
|
+
miss,misses,missed,missed,missing
|
|
139
|
+
move,moves,moved,moved,moving
|
|
140
|
+
need,needs,needed,needed,needing
|
|
141
|
+
paint,paints,painted,painted,painting
|
|
142
|
+
pay,pays,paid,paid,paying
|
|
143
|
+
play,plays,played,played,playing
|
|
144
|
+
prefer,prefers,preferred,preferred,preferring
|
|
145
|
+
prepare,prepares,prepared,prepared,preparing
|
|
146
|
+
present,presents,presented,presented,presenting
|
|
147
|
+
print,prints,printed,printed,printing
|
|
148
|
+
produce,produces,produced,produced,producing
|
|
149
|
+
promise,promises,promised,promised,promising
|
|
150
|
+
put,puts,put,put,putting
|
|
151
|
+
read,reads,read,read,reading
|
|
152
|
+
realize,realizes,realized,realized,realizing
|
|
153
|
+
receive,receives,received,received,receiving
|
|
154
|
+
recognize,recognizes,recognized,recognized,recognizing
|
|
155
|
+
remember,remembers,remembered,remembered,remembering
|
|
156
|
+
repeat,repeats,repeated,repeated,repeating
|
|
157
|
+
replace,replaces,replaced,replaced,replacing
|
|
158
|
+
report,reports,reported,reported,reporting
|
|
159
|
+
request,requests,requested,requested,requesting
|
|
160
|
+
rest,rests,rested,rested,resting
|
|
161
|
+
return,returns,returned,returned,returning
|
|
162
|
+
ring,rings,rang,rung,ringing
|
|
163
|
+
rise,rises,rose,risen,rising
|
|
164
|
+
run,runs,ran,run,running
|
|
165
|
+
say,says,said,said,saying
|
|
166
|
+
see,sees,saw,seen,seeing
|
|
167
|
+
seek,seeks,sought,sought,seeking
|
|
168
|
+
sell,sells,sold,sold,selling
|
|
169
|
+
send,sends,sent,sent,sending
|
|
170
|
+
set,sets,set,set,setting
|
|
171
|
+
settle,settle,settled,settled,settling
|
|
172
|
+
shake,shakes,shook,shaken,shaking
|
|
173
|
+
shine,shines,shone,shone,shining
|
|
174
|
+
shoot,shoots,shot,shot,shooting
|
|
175
|
+
show,shows,showed,shown/showed,showing
|
|
176
|
+
shut,shuts,shut,shut,shutting
|
|
177
|
+
sing,sings,sang,sung,singing
|
|
178
|
+
sink,sinks,sank,sunk,sinking
|
|
179
|
+
sit,sits,sat,sat,sitting
|
|
180
|
+
sleep,sleeps,slept,slept,sleeping
|
|
181
|
+
slip,slips,slipped,slipped,slipping
|
|
182
|
+
smell,smells,smelt/smelled,smelt/smelled,smelling
|
|
183
|
+
speak,speaks,spoke,spoken,speaking
|
|
184
|
+
spend,spends,spent,spent,spending
|
|
185
|
+
stand,stands,stood,stood,standing
|
|
186
|
+
start,starts,started,started,starting
|
|
187
|
+
stay,stays,stayed,stayed,staying
|
|
188
|
+
steal,steals,stole,stolen,stealing
|
|
189
|
+
stick,sticks,stick,stuck,stuck,stick
|
|
190
|
+
stick,sticks,stuck,stuck,stick
|
|
191
|
+
stop,stops,stopped,stopped,stop
|
|
192
|
+
study,studies,studied,studied,studying
|
|
193
|
+
succeed,succeeds,succeeded,succeeded,succeeding
|
|
194
|
+
suffer,suffers,suffered,suffered,suffering
|
|
195
|
+
suggest,suggests,suggested,suggested,suggesting
|
|
196
|
+
take,takes,took,taken,taking
|
|
197
|
+
teach,teaches,taught,taught,teaching
|
|
198
|
+
tear,tears,tore,torn,tearing
|
|
199
|
+
tell,tells,told,told,telling
|
|
200
|
+
think,thinks,thought,thought,thinking
|
|
201
|
+
throw,throws,threw,thrown,throwing
|
|
202
|
+
understand,understands,understood,understood,understanding
|
|
203
|
+
wake,wakes,woke,woken,waking
|
|
204
|
+
wear,wears,wore,worn,wearing
|
|
205
|
+
win,wins,won,won,winning
|
|
206
|
+
write,writes,wrote,written,writing
|
|
207
|
+
write,writes,wrote,written,writing
|
|
208
|
+
yawn,yawns,yawned,yawned,yawning
|
|
209
|
+
zip,zips,zipped,zipped,zipping
|
|
210
|
+
zoom,zooms,zoomed,zoomed,zooming
|
package/load.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
input_dir = "wikitext/wikitext-103-raw-v1"
|
|
5
|
+
output_file = "robots/wikitext-103-all.txt"
|
|
6
|
+
max_articles = None # 可设为整数限制导出数量
|
|
7
|
+
|
|
8
|
+
os.makedirs("robots", exist_ok=True)
|
|
9
|
+
|
|
10
|
+
file_list = [
|
|
11
|
+
"train-00000-of-00002.parquet",
|
|
12
|
+
"train-00001-of-00002.parquet",
|
|
13
|
+
"validation-00000-of-00001.parquet",
|
|
14
|
+
"test-00000-of-00001.parquet"
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
idx = 0
|
|
18
|
+
with open(output_file, "w", encoding="utf-8") as fout:
|
|
19
|
+
for fname in file_list:
|
|
20
|
+
fpath = os.path.join(input_dir, fname)
|
|
21
|
+
if not os.path.exists(fpath):
|
|
22
|
+
continue
|
|
23
|
+
df = pd.read_parquet(fpath)
|
|
24
|
+
for _, row in df.iterrows():
|
|
25
|
+
if max_articles and idx >= max_articles:
|
|
26
|
+
break
|
|
27
|
+
text = str(row["text"]).replace('\n\n', '\n').strip()
|
|
28
|
+
if not text:
|
|
29
|
+
continue
|
|
30
|
+
fout.write(text + "\n\n") # 用空行分隔
|
|
31
|
+
idx += 1
|
|
32
|
+
if idx % 10000 == 0:
|
|
33
|
+
print(f"已导出 {idx} 篇...")
|
|
34
|
+
|
|
35
|
+
print("导出完成!")
|
package/mainManager.cjs
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
const { spawn } = require('child_process');
|
|
2
|
+
const path = require('path');
|
|
3
|
+
const fs = require('fs');
|
|
4
|
+
|
|
5
|
+
// 本脚本用于“划分大组”:基于不同数据源目录,调用 groupmanager.cjs 生成
|
|
6
|
+
// groups_text / groups_image / groups_audio 三个大组。
|
|
7
|
+
|
|
8
|
+
function parseArgs(argv) {
|
|
9
|
+
const out = {};
|
|
10
|
+
for (let i = 0; i < argv.length; i++) {
|
|
11
|
+
const a = argv[i];
|
|
12
|
+
if (a.startsWith('--')) {
|
|
13
|
+
const key = a.slice(2);
|
|
14
|
+
const val = argv[i + 1] && !argv[i + 1].startsWith('--') ? argv[++i] : true;
|
|
15
|
+
out[key] = val;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return out;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const args = parseArgs(process.argv.slice(2));
|
|
22
|
+
|
|
23
|
+
// 可单独为每个大组指定源目录;未指定则回退到工程内默认目录
|
|
24
|
+
const SOURCES = {
|
|
25
|
+
text: path.resolve(args['text-root'] || path.join(__dirname)),
|
|
26
|
+
image: path.resolve(args['image-root'] || path.join(__dirname)),
|
|
27
|
+
audio: path.resolve(args['audio-root'] || path.join(__dirname)),
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const OUTPUTS = {
|
|
31
|
+
text: path.resolve(args['text-groups'] || path.join(__dirname, 'groups_text')),
|
|
32
|
+
image: path.resolve(args['image-groups'] || path.join(__dirname, 'groups_image')),
|
|
33
|
+
audio: path.resolve(args['audio-groups'] || path.join(__dirname, 'groups_audio')),
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const SHARD = String(args['shard-size-kb'] || 250);
|
|
37
|
+
const CLEAN = Boolean(args['clean'] && String(args['clean']).toLowerCase() !== 'false');
|
|
38
|
+
|
|
39
|
+
function runGroupManager(kind, sourceRoot, outDir) {
|
|
40
|
+
const script = path.join(__dirname, 'groupmanager.cjs');
|
|
41
|
+
const robotsDir = path.join(sourceRoot, 'robots');
|
|
42
|
+
const testsDir = path.join(sourceRoot, 'tests');
|
|
43
|
+
const publicDir = path.join(sourceRoot, 'public');
|
|
44
|
+
const snapshotsDir = path.join(sourceRoot, 'snapshots');
|
|
45
|
+
const args = [
|
|
46
|
+
script,
|
|
47
|
+
'--source-root', sourceRoot,
|
|
48
|
+
'--groups-dir', outDir,
|
|
49
|
+
'--robots-dir', robotsDir,
|
|
50
|
+
'--tests-dir', testsDir,
|
|
51
|
+
'--public-dir', publicDir,
|
|
52
|
+
'--snapshots-dir', snapshotsDir,
|
|
53
|
+
'--shard-size-kb', SHARD,
|
|
54
|
+
];
|
|
55
|
+
if (CLEAN) args.push('--clean', 'true');
|
|
56
|
+
console.log(`[MAIN-MANAGER] 生成 ${kind} 大组: node ${args.join(' ')}`);
|
|
57
|
+
return new Promise((resolve, reject) => {
|
|
58
|
+
const p = spawn('node', args, { stdio: 'inherit' });
|
|
59
|
+
p.on('close', (code) => {
|
|
60
|
+
if (code === 0) resolve(); else reject(new Error(`${kind} 分组失败 code=${code}`));
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async function main() {
|
|
66
|
+
// 确保输出根目录存在
|
|
67
|
+
for (const k of Object.keys(OUTPUTS)) {
|
|
68
|
+
fs.mkdirSync(OUTPUTS[k], { recursive: true });
|
|
69
|
+
}
|
|
70
|
+
// 依次生成三大组
|
|
71
|
+
await runGroupManager('text', SOURCES.text, OUTPUTS.text);
|
|
72
|
+
await runGroupManager('image', SOURCES.image, OUTPUTS.image);
|
|
73
|
+
await runGroupManager('audio', SOURCES.audio, OUTPUTS.audio);
|
|
74
|
+
console.log('[MAIN-MANAGER] 三个大组已准备完成:', OUTPUTS);
|
|
75
|
+
console.log('[MAIN-MANAGER] 后续可用 mainStarter.cjs 启动各自 GroupStarter');
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
main().catch((e) => {
|
|
79
|
+
console.error('[MAIN-MANAGER] 失败:', e.message);
|
|
80
|
+
process.exit(1);
|
|
81
|
+
});
|