079project 6.0.0 → 8.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,136 @@
1
+ 'use strict';
2
+
3
+ const { spawn } = require('child_process');
4
+ const path = require('path');
5
+
6
+ const CNPM_CMD = process.platform === 'win32' ? 'cnpm.cmd' : 'cnpm';
7
+ const RETRY_LIMIT = 10;
8
+
9
+ const args = process.argv.slice(2);
10
+ if (args.length === 0) {
11
+ console.error('[tools_install] Usage: node tools_install.js <target-file> [-- <args...>]');
12
+ process.exit(1);
13
+ }
14
+
15
+ const splitIndex = args.indexOf('--');
16
+ const targetPath = path.resolve(splitIndex === -1 ? args[0] : args[0]);
17
+ const targetArgs = splitIndex === -1 ? args.slice(1) : args.slice(splitIndex + 1);
18
+
19
+ const attemptedInstalls = new Set();
20
+ let retryCount = 0;
21
+
22
+ function runTarget() {
23
+ return new Promise((resolve) => {
24
+ const child = spawn(process.execPath, [targetPath, ...targetArgs], {
25
+ stdio: 'inherit'
26
+ });
27
+ child.on('exit', (code) => {
28
+ resolve({ code, signal: null });
29
+ });
30
+ child.on('error', (err) => {
31
+ console.error('[tools_install] Failed to start target process:', err.message);
32
+ resolve({ code: 1, signal: 'spawn-error' });
33
+ });
34
+ });
35
+ }
36
+
37
+ function parseMissingModule(stderrText) {
38
+ const regex = /Error: Cannot find module '([^']+)'/;
39
+ const match = regex.exec(stderrText);
40
+ if (match && match[1]) {
41
+ const moduleName = match[1];
42
+ if (moduleName.startsWith('.') || moduleName.startsWith('/')) {
43
+ return null;
44
+ }
45
+ return moduleName;
46
+ }
47
+ return null;
48
+ }
49
+
50
+ function installDependency(moduleName) {
51
+ return new Promise((resolve) => {
52
+ console.log(`[tools_install] Installing missing dependency: ${moduleName}`);
53
+ const installer = spawn(CNPM_CMD, ['install', moduleName], {
54
+ stdio: 'inherit',
55
+ shell: process.platform === 'win32'
56
+ });
57
+ installer.on('exit', (code) => {
58
+ if (code === 0) {
59
+ console.log(`[tools_install] Installed ${moduleName} successfully.`);
60
+ resolve(true);
61
+ } else {
62
+ console.error(`[tools_install] Failed to install ${moduleName} (exit code ${code}).`);
63
+ resolve(false);
64
+ }
65
+ });
66
+ installer.on('error', (err) => {
67
+ console.error('[tools_install] Failed to launch cnpm:', err.message);
68
+ if (err.code === 'ENOENT' || err.code === 'EINVAL') {
69
+ console.error('[tools_install] Hint: install cnpm globally via "npm install -g cnpm --registry=https://registry.npmmirror.com"');
70
+ }
71
+ resolve(false);
72
+ });
73
+ });
74
+ }
75
+
76
+ async function main() {
77
+ while (retryCount <= RETRY_LIMIT) {
78
+ const child = spawn(process.execPath, [targetPath, ...targetArgs], {
79
+ stdio: ['inherit', 'pipe', 'pipe']
80
+ });
81
+
82
+ let stderrBuffer = '';
83
+ child.stderr.on('data', (chunk) => {
84
+ const text = chunk.toString();
85
+ stderrBuffer += text;
86
+ process.stderr.write(chunk);
87
+ });
88
+ child.stdout.on('data', (chunk) => {
89
+ process.stdout.write(chunk);
90
+ });
91
+
92
+ const { code, missingModule } = await new Promise((resolve) => {
93
+ child.on('exit', (exitCode) => {
94
+ resolve({ code: exitCode, missingModule: parseMissingModule(stderrBuffer) });
95
+ });
96
+ child.on('error', (err) => {
97
+ console.error('[tools_install] Target process error:', err.message);
98
+ resolve({ code: 1, missingModule: null });
99
+ });
100
+ });
101
+
102
+ if (code === 0) {
103
+ process.exit(0);
104
+ }
105
+
106
+ if (!missingModule) {
107
+ console.error(`[tools_install] Target exited with code ${code}. No missing module detected.`);
108
+ process.exit(code || 1);
109
+ }
110
+
111
+ if (attemptedInstalls.has(missingModule)) {
112
+ console.error(`[tools_install] Already attempted to install ${missingModule}. Aborting.`);
113
+ process.exit(code || 1);
114
+ }
115
+
116
+ if (retryCount >= RETRY_LIMIT) {
117
+ console.error('[tools_install] Retry limit reached. Aborting.');
118
+ process.exit(code || 1);
119
+ }
120
+
121
+ attemptedInstalls.add(missingModule);
122
+ retryCount += 1;
123
+ const success = await installDependency(missingModule);
124
+ if (!success) {
125
+ process.exit(1);
126
+ }
127
+ }
128
+
129
+ console.error('[tools_install] Exceeded retry loop unexpectedly.');
130
+ process.exit(1);
131
+ }
132
+
133
+ main().catch((err) => {
134
+ console.error('[tools_install] Fatal error:', err);
135
+ process.exit(1);
136
+ });
package/model_RNN.py DELETED
@@ -1,209 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- import os
3
- import numpy as np
4
- from flask import Flask, request, jsonify
5
- from tensorflow.keras.models import Model, load_model
6
- from tensorflow.keras.layers import Input, Embedding, LSTM, Dense
7
- from tensorflow.keras.preprocessing.text import Tokenizer
8
- from tensorflow.keras.preprocessing.sequence import pad_sequences
9
- import random
10
- import json
11
- import re
12
- import glob
13
- import time
14
-
15
- app = Flask(__name__)
16
-
17
- # 模型与分词器保存路径
18
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
19
- MODEL_PATH = os.path.join(BASE_DIR, 'serializer_seq2seq.h5')
20
- TOKENIZER_PATH = os.path.join(BASE_DIR, 'tokenizer.json')
21
-
22
- # 语料目录(可用环境变量覆盖),默认使用当前目录下 robots/
23
- ROBOTS_DIR = os.environ.get('ROBOTS_DIR', os.path.join(BASE_DIR, 'robots'))
24
-
25
- # 超参(可外部环境变量覆盖)
26
- max_words = int(os.environ.get('MAX_WORDS', '5000'))
27
- max_len = int(os.environ.get('MAX_LEN', '12'))
28
- embedding_dim = int(os.environ.get('EMB_DIM', '64'))
29
- lstm_units = int(os.environ.get('LSTM_UNITS', '64'))
30
- epochs = int(os.environ.get('EPOCHS', '40'))
31
- batch_size = int(os.environ.get('BATCH_SIZE', '32'))
32
- max_files = int(os.environ.get('MAX_FILES', '64'))
33
- max_lines = int(os.environ.get('MAX_LINES', '800'))
34
- min_line_len = int(os.environ.get('MIN_LINE_LEN', '6'))
35
- force_retrain = os.environ.get('FORCE_RETRAIN', '0') == '1'
36
-
37
- random.seed(42)
38
-
39
- def clean_line(s: str) -> str:
40
- if not s:
41
- return ''
42
- s = s.strip()
43
- # 去掉URL、重复空格、控制字符
44
- s = re.sub(r'https?://\S+', ' ', s)
45
- s = re.sub(r'[\t\r\n]+', ' ', s)
46
- s = re.sub(r'\s{2,}', ' ', s)
47
- # 限长
48
- if len(s) > 256:
49
- s = s[:256]
50
- return s.strip()
51
-
52
- def load_corpus(robots_dir: str):
53
- if not os.path.isdir(robots_dir):
54
- print(f'[CORPUS] 目录不存在: {robots_dir}')
55
- return []
56
- files = sorted(glob.glob(os.path.join(robots_dir, '*.txt')))[:max_files]
57
- lines = []
58
- for fp in files:
59
- try:
60
- with open(fp, 'r', encoding='utf-8', errors='ignore') as f:
61
- for ln in f:
62
- if len(lines) >= max_lines:
63
- break
64
- ln = clean_line(ln)
65
- if len(ln) >= min_line_len:
66
- lines.append(ln)
67
- except Exception as e:
68
- print(f'[CORPUS] 读取失败 {fp}: {e}')
69
- if len(lines) >= max_lines:
70
- break
71
- print(f'[CORPUS] 收集行数: {len(lines)} (files={len(files)})')
72
- return lines
73
-
74
- def make_pairs(sentences):
75
- pairs_in = []
76
- pairs_out = []
77
- for s in sentences:
78
- toks = s.split()
79
- if len(toks) == 0:
80
- continue
81
- shuffled = toks[:]
82
- random.shuffle(shuffled)
83
- pairs_in.append(' '.join(shuffled))
84
- pairs_out.append(' '.join(toks))
85
- return pairs_in, pairs_out
86
-
87
- def build_or_load_tokenizer(inputs, outputs):
88
- if os.path.exists(TOKENIZER_PATH) and not force_retrain:
89
- try:
90
- with open(TOKENIZER_PATH, 'r', encoding='utf-8') as f:
91
- data = json.load(f)
92
- tokenizer = Tokenizer.from_json(json.dumps(data))
93
- print('[TOKENIZER] 已加载现有 tokenizer')
94
- return tokenizer
95
- except Exception as e:
96
- print('[TOKENIZER] 加载失败, 重新创建:', e)
97
-
98
- tokenizer = Tokenizer(num_words=max_words, filters='', lower=True, oov_token='<UNK>')
99
- tokenizer.fit_on_texts(inputs + outputs)
100
- with open(TOKENIZER_PATH, 'w', encoding='utf-8') as f:
101
- f.write(tokenizer.to_json())
102
- print('[TOKENIZER] 已创建并保存')
103
- return tokenizer
104
-
105
- def prepare_sequences(tokenizer, inputs, outputs):
106
- X = tokenizer.texts_to_sequences(inputs)
107
- Y = tokenizer.texts_to_sequences(outputs)
108
- X = pad_sequences(X, maxlen=max_len, padding='post')
109
- Y = pad_sequences(Y, maxlen=max_len, padding='post')
110
- return X, Y
111
-
112
- def build_model(vocab_size):
113
- encoder_inputs = Input(shape=(max_len,))
114
- x = Embedding(vocab_size, embedding_dim, mask_zero=True)(encoder_inputs)
115
- encoder_outputs, state_h, state_c = LSTM(lstm_units, return_state=True)(x)
116
- encoder_states = [state_h, state_c]
117
-
118
- decoder_inputs = Input(shape=(max_len,))
119
- y = Embedding(vocab_size, embedding_dim, mask_zero=True)(decoder_inputs)
120
- decoder_lstm = LSTM(lstm_units, return_sequences=True, return_state=True)
121
- decoder_outputs, _, _ = decoder_lstm(y, initial_state=encoder_states)
122
- decoder_dense = Dense(vocab_size, activation='softmax')
123
- decoder_outputs = decoder_dense(decoder_outputs)
124
-
125
- model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
126
- model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
127
- return model
128
-
129
- def train_if_needed():
130
- # 加载语料
131
- sentences = load_corpus(ROBOTS_DIR)
132
- if not sentences:
133
- # 回退到简单句子
134
- sentences = [
135
- "i am a person",
136
- "you are a student",
137
- "he is a teacher",
138
- "she is my friend",
139
- "this is a book",
140
- "we are happy",
141
- "they are here",
142
- "it is raining",
143
- "the cat is black"
144
- ]
145
- print('[CORPUS] 使用内置 fallback 语料')
146
-
147
- inputs, outputs = make_pairs(sentences)
148
- tokenizer = build_or_load_tokenizer(inputs, outputs)
149
- X, Y = prepare_sequences(tokenizer, inputs, outputs)
150
- vocab_size = len(tokenizer.word_index) + 1
151
-
152
- # 如果模型存在且不强制重训 => 直接加载
153
- if os.path.exists(MODEL_PATH) and not force_retrain:
154
- print('[MODEL] 加载现有模型:', MODEL_PATH)
155
- model = load_model(MODEL_PATH)
156
- return model, tokenizer
157
-
158
- print('[MODEL] 训练新模型...')
159
- model = build_model(vocab_size)
160
-
161
- decoder_input_data = np.zeros_like(Y)
162
- decoder_input_data[:, 1:] = Y[:, :-1]
163
- decoder_target_data = np.expand_dims(Y, -1)
164
-
165
- model.fit(
166
- [X, decoder_input_data],
167
- decoder_target_data,
168
- batch_size=batch_size,
169
- epochs=epochs,
170
- verbose=2
171
- )
172
- model.save(MODEL_PATH)
173
- print('[MODEL] 训练完成并保存')
174
- return model, tokenizer
175
-
176
- model, tokenizer = train_if_needed()
177
- print('[READY] 模型 & tokenizer 就绪')
178
-
179
- def predict_sequence(input_words):
180
- seq = tokenizer.texts_to_sequences([' '.join(input_words)])[0]
181
- seq = pad_sequences([seq], maxlen=max_len, padding='post')
182
- decoder_input = np.zeros((1, max_len), dtype='int32')
183
- output_sentence = []
184
- for i in range(max_len):
185
- preds = model.predict([seq, decoder_input], verbose=0)
186
- next_token = np.argmax(preds[0, i])
187
- if next_token == 0:
188
- break
189
- w = tokenizer.index_word.get(next_token, '')
190
- if w and w not in output_sentence:
191
- output_sentence.append(w)
192
- decoder_input[0, i] = next_token
193
- return ' '.join(output_sentence).strip()
194
-
195
- @app.route('/api/serialize', methods=['POST'])
196
- def serialize():
197
- data = request.json
198
- words = data.get('words')
199
- if not words or not isinstance(words, list):
200
- return jsonify({'error': 'words must be a list'}), 400
201
- sentence = predict_sequence(words)
202
- return jsonify({'sentence': sentence})
203
-
204
- @app.route('/health', methods=['GET'])
205
- def health():
206
- return jsonify({'ok': True, 'time': time.time()})
207
-
208
- if __name__ == '__main__':
209
- app.run(host='0.0.0.0', port=5008)