079project 6.0.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.cache/38/9a0e6a4756f17b0edebad6a7be1eed.json +1 -0
- package/079project_frontend/README.md +70 -0
- package/079project_frontend/package-lock.json +17310 -0
- package/079project_frontend/package.json +40 -0
- package/079project_frontend/public/favicon.ico +0 -0
- package/079project_frontend/public/index.html +43 -0
- package/079project_frontend/public/logo192.png +0 -0
- package/079project_frontend/public/logo512.png +0 -0
- package/079project_frontend/public/manifest.json +25 -0
- package/079project_frontend/public/robots.txt +3 -0
- package/079project_frontend/src/App.css +515 -0
- package/079project_frontend/src/App.js +286 -0
- package/079project_frontend/src/App.test.js +8 -0
- package/079project_frontend/src/api/client.js +103 -0
- package/079project_frontend/src/components/AuthGate.js +153 -0
- package/079project_frontend/src/components/ConfigPanel.js +643 -0
- package/079project_frontend/src/index.css +21 -0
- package/079project_frontend/src/index.js +17 -0
- package/079project_frontend/src/logo.svg +1 -0
- package/079project_frontend/src/reportWebVitals.js +13 -0
- package/079project_frontend/src/setupTests.js +5 -0
- package/README.en.md +234 -0
- package/README.md +0 -0
- package/auth_frontend_server.cjs +312 -0
- package/main.cjs +2259 -83
- package/memeMergeWorker.cjs +256 -0
- package/package.json +28 -15
- package/robots/wikitext-something.txt +1 -39254
- package/tools_install.js +136 -0
- package/model_RNN.py +0 -209
package/tools_install.js
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
const { spawn } = require('child_process');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
|
|
6
|
+
const CNPM_CMD = process.platform === 'win32' ? 'cnpm.cmd' : 'cnpm';
|
|
7
|
+
const RETRY_LIMIT = 10;
|
|
8
|
+
|
|
9
|
+
const args = process.argv.slice(2);
|
|
10
|
+
if (args.length === 0) {
|
|
11
|
+
console.error('[tools_install] Usage: node tools_install.js <target-file> [-- <args...>]');
|
|
12
|
+
process.exit(1);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const splitIndex = args.indexOf('--');
|
|
16
|
+
const targetPath = path.resolve(splitIndex === -1 ? args[0] : args[0]);
|
|
17
|
+
const targetArgs = splitIndex === -1 ? args.slice(1) : args.slice(splitIndex + 1);
|
|
18
|
+
|
|
19
|
+
const attemptedInstalls = new Set();
|
|
20
|
+
let retryCount = 0;
|
|
21
|
+
|
|
22
|
+
function runTarget() {
|
|
23
|
+
return new Promise((resolve) => {
|
|
24
|
+
const child = spawn(process.execPath, [targetPath, ...targetArgs], {
|
|
25
|
+
stdio: 'inherit'
|
|
26
|
+
});
|
|
27
|
+
child.on('exit', (code) => {
|
|
28
|
+
resolve({ code, signal: null });
|
|
29
|
+
});
|
|
30
|
+
child.on('error', (err) => {
|
|
31
|
+
console.error('[tools_install] Failed to start target process:', err.message);
|
|
32
|
+
resolve({ code: 1, signal: 'spawn-error' });
|
|
33
|
+
});
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function parseMissingModule(stderrText) {
|
|
38
|
+
const regex = /Error: Cannot find module '([^']+)'/;
|
|
39
|
+
const match = regex.exec(stderrText);
|
|
40
|
+
if (match && match[1]) {
|
|
41
|
+
const moduleName = match[1];
|
|
42
|
+
if (moduleName.startsWith('.') || moduleName.startsWith('/')) {
|
|
43
|
+
return null;
|
|
44
|
+
}
|
|
45
|
+
return moduleName;
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function installDependency(moduleName) {
|
|
51
|
+
return new Promise((resolve) => {
|
|
52
|
+
console.log(`[tools_install] Installing missing dependency: ${moduleName}`);
|
|
53
|
+
const installer = spawn(CNPM_CMD, ['install', moduleName], {
|
|
54
|
+
stdio: 'inherit',
|
|
55
|
+
shell: process.platform === 'win32'
|
|
56
|
+
});
|
|
57
|
+
installer.on('exit', (code) => {
|
|
58
|
+
if (code === 0) {
|
|
59
|
+
console.log(`[tools_install] Installed ${moduleName} successfully.`);
|
|
60
|
+
resolve(true);
|
|
61
|
+
} else {
|
|
62
|
+
console.error(`[tools_install] Failed to install ${moduleName} (exit code ${code}).`);
|
|
63
|
+
resolve(false);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
installer.on('error', (err) => {
|
|
67
|
+
console.error('[tools_install] Failed to launch cnpm:', err.message);
|
|
68
|
+
if (err.code === 'ENOENT' || err.code === 'EINVAL') {
|
|
69
|
+
console.error('[tools_install] Hint: install cnpm globally via "npm install -g cnpm --registry=https://registry.npmmirror.com"');
|
|
70
|
+
}
|
|
71
|
+
resolve(false);
|
|
72
|
+
});
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function main() {
|
|
77
|
+
while (retryCount <= RETRY_LIMIT) {
|
|
78
|
+
const child = spawn(process.execPath, [targetPath, ...targetArgs], {
|
|
79
|
+
stdio: ['inherit', 'pipe', 'pipe']
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
let stderrBuffer = '';
|
|
83
|
+
child.stderr.on('data', (chunk) => {
|
|
84
|
+
const text = chunk.toString();
|
|
85
|
+
stderrBuffer += text;
|
|
86
|
+
process.stderr.write(chunk);
|
|
87
|
+
});
|
|
88
|
+
child.stdout.on('data', (chunk) => {
|
|
89
|
+
process.stdout.write(chunk);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
const { code, missingModule } = await new Promise((resolve) => {
|
|
93
|
+
child.on('exit', (exitCode) => {
|
|
94
|
+
resolve({ code: exitCode, missingModule: parseMissingModule(stderrBuffer) });
|
|
95
|
+
});
|
|
96
|
+
child.on('error', (err) => {
|
|
97
|
+
console.error('[tools_install] Target process error:', err.message);
|
|
98
|
+
resolve({ code: 1, missingModule: null });
|
|
99
|
+
});
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
if (code === 0) {
|
|
103
|
+
process.exit(0);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (!missingModule) {
|
|
107
|
+
console.error(`[tools_install] Target exited with code ${code}. No missing module detected.`);
|
|
108
|
+
process.exit(code || 1);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (attemptedInstalls.has(missingModule)) {
|
|
112
|
+
console.error(`[tools_install] Already attempted to install ${missingModule}. Aborting.`);
|
|
113
|
+
process.exit(code || 1);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
if (retryCount >= RETRY_LIMIT) {
|
|
117
|
+
console.error('[tools_install] Retry limit reached. Aborting.');
|
|
118
|
+
process.exit(code || 1);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
attemptedInstalls.add(missingModule);
|
|
122
|
+
retryCount += 1;
|
|
123
|
+
const success = await installDependency(missingModule);
|
|
124
|
+
if (!success) {
|
|
125
|
+
process.exit(1);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
console.error('[tools_install] Exceeded retry loop unexpectedly.');
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
main().catch((err) => {
|
|
134
|
+
console.error('[tools_install] Fatal error:', err);
|
|
135
|
+
process.exit(1);
|
|
136
|
+
});
|
package/model_RNN.py
DELETED
|
@@ -1,209 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
import os
|
|
3
|
-
import numpy as np
|
|
4
|
-
from flask import Flask, request, jsonify
|
|
5
|
-
from tensorflow.keras.models import Model, load_model
|
|
6
|
-
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
|
|
7
|
-
from tensorflow.keras.preprocessing.text import Tokenizer
|
|
8
|
-
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
|
9
|
-
import random
|
|
10
|
-
import json
|
|
11
|
-
import re
|
|
12
|
-
import glob
|
|
13
|
-
import time
|
|
14
|
-
|
|
15
|
-
app = Flask(__name__)
|
|
16
|
-
|
|
17
|
-
# 模型与分词器保存路径
|
|
18
|
-
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
|
|
19
|
-
MODEL_PATH = os.path.join(BASE_DIR, 'serializer_seq2seq.h5')
|
|
20
|
-
TOKENIZER_PATH = os.path.join(BASE_DIR, 'tokenizer.json')
|
|
21
|
-
|
|
22
|
-
# 语料目录(可用环境变量覆盖),默认使用当前目录下 robots/
|
|
23
|
-
ROBOTS_DIR = os.environ.get('ROBOTS_DIR', os.path.join(BASE_DIR, 'robots'))
|
|
24
|
-
|
|
25
|
-
# 超参(可外部环境变量覆盖)
|
|
26
|
-
max_words = int(os.environ.get('MAX_WORDS', '5000'))
|
|
27
|
-
max_len = int(os.environ.get('MAX_LEN', '12'))
|
|
28
|
-
embedding_dim = int(os.environ.get('EMB_DIM', '64'))
|
|
29
|
-
lstm_units = int(os.environ.get('LSTM_UNITS', '64'))
|
|
30
|
-
epochs = int(os.environ.get('EPOCHS', '40'))
|
|
31
|
-
batch_size = int(os.environ.get('BATCH_SIZE', '32'))
|
|
32
|
-
max_files = int(os.environ.get('MAX_FILES', '64'))
|
|
33
|
-
max_lines = int(os.environ.get('MAX_LINES', '800'))
|
|
34
|
-
min_line_len = int(os.environ.get('MIN_LINE_LEN', '6'))
|
|
35
|
-
force_retrain = os.environ.get('FORCE_RETRAIN', '0') == '1'
|
|
36
|
-
|
|
37
|
-
random.seed(42)
|
|
38
|
-
|
|
39
|
-
def clean_line(s: str) -> str:
|
|
40
|
-
if not s:
|
|
41
|
-
return ''
|
|
42
|
-
s = s.strip()
|
|
43
|
-
# 去掉URL、重复空格、控制字符
|
|
44
|
-
s = re.sub(r'https?://\S+', ' ', s)
|
|
45
|
-
s = re.sub(r'[\t\r\n]+', ' ', s)
|
|
46
|
-
s = re.sub(r'\s{2,}', ' ', s)
|
|
47
|
-
# 限长
|
|
48
|
-
if len(s) > 256:
|
|
49
|
-
s = s[:256]
|
|
50
|
-
return s.strip()
|
|
51
|
-
|
|
52
|
-
def load_corpus(robots_dir: str):
|
|
53
|
-
if not os.path.isdir(robots_dir):
|
|
54
|
-
print(f'[CORPUS] 目录不存在: {robots_dir}')
|
|
55
|
-
return []
|
|
56
|
-
files = sorted(glob.glob(os.path.join(robots_dir, '*.txt')))[:max_files]
|
|
57
|
-
lines = []
|
|
58
|
-
for fp in files:
|
|
59
|
-
try:
|
|
60
|
-
with open(fp, 'r', encoding='utf-8', errors='ignore') as f:
|
|
61
|
-
for ln in f:
|
|
62
|
-
if len(lines) >= max_lines:
|
|
63
|
-
break
|
|
64
|
-
ln = clean_line(ln)
|
|
65
|
-
if len(ln) >= min_line_len:
|
|
66
|
-
lines.append(ln)
|
|
67
|
-
except Exception as e:
|
|
68
|
-
print(f'[CORPUS] 读取失败 {fp}: {e}')
|
|
69
|
-
if len(lines) >= max_lines:
|
|
70
|
-
break
|
|
71
|
-
print(f'[CORPUS] 收集行数: {len(lines)} (files={len(files)})')
|
|
72
|
-
return lines
|
|
73
|
-
|
|
74
|
-
def make_pairs(sentences):
|
|
75
|
-
pairs_in = []
|
|
76
|
-
pairs_out = []
|
|
77
|
-
for s in sentences:
|
|
78
|
-
toks = s.split()
|
|
79
|
-
if len(toks) == 0:
|
|
80
|
-
continue
|
|
81
|
-
shuffled = toks[:]
|
|
82
|
-
random.shuffle(shuffled)
|
|
83
|
-
pairs_in.append(' '.join(shuffled))
|
|
84
|
-
pairs_out.append(' '.join(toks))
|
|
85
|
-
return pairs_in, pairs_out
|
|
86
|
-
|
|
87
|
-
def build_or_load_tokenizer(inputs, outputs):
|
|
88
|
-
if os.path.exists(TOKENIZER_PATH) and not force_retrain:
|
|
89
|
-
try:
|
|
90
|
-
with open(TOKENIZER_PATH, 'r', encoding='utf-8') as f:
|
|
91
|
-
data = json.load(f)
|
|
92
|
-
tokenizer = Tokenizer.from_json(json.dumps(data))
|
|
93
|
-
print('[TOKENIZER] 已加载现有 tokenizer')
|
|
94
|
-
return tokenizer
|
|
95
|
-
except Exception as e:
|
|
96
|
-
print('[TOKENIZER] 加载失败, 重新创建:', e)
|
|
97
|
-
|
|
98
|
-
tokenizer = Tokenizer(num_words=max_words, filters='', lower=True, oov_token='<UNK>')
|
|
99
|
-
tokenizer.fit_on_texts(inputs + outputs)
|
|
100
|
-
with open(TOKENIZER_PATH, 'w', encoding='utf-8') as f:
|
|
101
|
-
f.write(tokenizer.to_json())
|
|
102
|
-
print('[TOKENIZER] 已创建并保存')
|
|
103
|
-
return tokenizer
|
|
104
|
-
|
|
105
|
-
def prepare_sequences(tokenizer, inputs, outputs):
|
|
106
|
-
X = tokenizer.texts_to_sequences(inputs)
|
|
107
|
-
Y = tokenizer.texts_to_sequences(outputs)
|
|
108
|
-
X = pad_sequences(X, maxlen=max_len, padding='post')
|
|
109
|
-
Y = pad_sequences(Y, maxlen=max_len, padding='post')
|
|
110
|
-
return X, Y
|
|
111
|
-
|
|
112
|
-
def build_model(vocab_size):
|
|
113
|
-
encoder_inputs = Input(shape=(max_len,))
|
|
114
|
-
x = Embedding(vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
|
|
115
|
-
encoder_outputs, state_h, state_c = LSTM(lstm_units, return_state=True)(x)
|
|
116
|
-
encoder_states = [state_h, state_c]
|
|
117
|
-
|
|
118
|
-
decoder_inputs = Input(shape=(max_len,))
|
|
119
|
-
y = Embedding(vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
|
|
120
|
-
decoder_lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
|
|
121
|
-
decoder_outputs, _, _ = decoder_lstm(y, initial_state=encoder_states)
|
|
122
|
-
decoder_dense = Dense(vocab_size, activation='softmax')
|
|
123
|
-
decoder_outputs = decoder_dense(decoder_outputs)
|
|
124
|
-
|
|
125
|
-
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
|
|
126
|
-
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
|
|
127
|
-
return model
|
|
128
|
-
|
|
129
|
-
def train_if_needed():
|
|
130
|
-
# 加载语料
|
|
131
|
-
sentences = load_corpus(ROBOTS_DIR)
|
|
132
|
-
if not sentences:
|
|
133
|
-
# 回退到简单句子
|
|
134
|
-
sentences = [
|
|
135
|
-
"i am a person",
|
|
136
|
-
"you are a student",
|
|
137
|
-
"he is a teacher",
|
|
138
|
-
"she is my friend",
|
|
139
|
-
"this is a book",
|
|
140
|
-
"we are happy",
|
|
141
|
-
"they are here",
|
|
142
|
-
"it is raining",
|
|
143
|
-
"the cat is black"
|
|
144
|
-
]
|
|
145
|
-
print('[CORPUS] 使用内置 fallback 语料')
|
|
146
|
-
|
|
147
|
-
inputs, outputs = make_pairs(sentences)
|
|
148
|
-
tokenizer = build_or_load_tokenizer(inputs, outputs)
|
|
149
|
-
X, Y = prepare_sequences(tokenizer, inputs, outputs)
|
|
150
|
-
vocab_size = len(tokenizer.word_index) + 1
|
|
151
|
-
|
|
152
|
-
# 如果模型存在且不强制重训 => 直接加载
|
|
153
|
-
if os.path.exists(MODEL_PATH) and not force_retrain:
|
|
154
|
-
print('[MODEL] 加载现有模型:', MODEL_PATH)
|
|
155
|
-
model = load_model(MODEL_PATH)
|
|
156
|
-
return model, tokenizer
|
|
157
|
-
|
|
158
|
-
print('[MODEL] 训练新模型...')
|
|
159
|
-
model = build_model(vocab_size)
|
|
160
|
-
|
|
161
|
-
decoder_input_data = np.zeros_like(Y)
|
|
162
|
-
decoder_input_data[:, 1:] = Y[:, :-1]
|
|
163
|
-
decoder_target_data = np.expand_dims(Y, -1)
|
|
164
|
-
|
|
165
|
-
model.fit(
|
|
166
|
-
[X, decoder_input_data],
|
|
167
|
-
decoder_target_data,
|
|
168
|
-
batch_size=batch_size,
|
|
169
|
-
epochs=epochs,
|
|
170
|
-
verbose=2
|
|
171
|
-
)
|
|
172
|
-
model.save(MODEL_PATH)
|
|
173
|
-
print('[MODEL] 训练完成并保存')
|
|
174
|
-
return model, tokenizer
|
|
175
|
-
|
|
176
|
-
model, tokenizer = train_if_needed()
|
|
177
|
-
print('[READY] 模型 & tokenizer 就绪')
|
|
178
|
-
|
|
179
|
-
def predict_sequence(input_words):
|
|
180
|
-
seq = tokenizer.texts_to_sequences([' '.join(input_words)])[0]
|
|
181
|
-
seq = pad_sequences([seq], maxlen=max_len, padding='post')
|
|
182
|
-
decoder_input = np.zeros((1, max_len), dtype='int32')
|
|
183
|
-
output_sentence = []
|
|
184
|
-
for i in range(max_len):
|
|
185
|
-
preds = model.predict([seq, decoder_input], verbose=0)
|
|
186
|
-
next_token = np.argmax(preds[0, i])
|
|
187
|
-
if next_token == 0:
|
|
188
|
-
break
|
|
189
|
-
w = tokenizer.index_word.get(next_token, '')
|
|
190
|
-
if w and w not in output_sentence:
|
|
191
|
-
output_sentence.append(w)
|
|
192
|
-
decoder_input[0, i] = next_token
|
|
193
|
-
return ' '.join(output_sentence).strip()
|
|
194
|
-
|
|
195
|
-
@app.route('/api/serialize', methods=['POST'])
|
|
196
|
-
def serialize():
|
|
197
|
-
data = request.json
|
|
198
|
-
words = data.get('words')
|
|
199
|
-
if not words or not isinstance(words, list):
|
|
200
|
-
return jsonify({'error': 'words must be a list'}), 400
|
|
201
|
-
sentence = predict_sequence(words)
|
|
202
|
-
return jsonify({'sentence': sentence})
|
|
203
|
-
|
|
204
|
-
@app.route('/health', methods=['GET'])
|
|
205
|
-
def health():
|
|
206
|
-
return jsonify({'ok': True, 'time': time.time()})
|
|
207
|
-
|
|
208
|
-
if __name__ == '__main__':
|
|
209
|
-
app.run(host='0.0.0.0', port=5008)
|