@hupan56/wlkj 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.js +213 -0
- package/package.json +11 -0
- package/templates/cli.js +198 -0
- package/templates/qoder/commands/wl-code.md +43 -0
- package/templates/qoder/commands/wl-commit.md +30 -0
- package/templates/qoder/commands/wl-init.md +80 -0
- package/templates/qoder/commands/wl-insight.md +51 -0
- package/templates/qoder/commands/wl-prd.md +199 -0
- package/templates/qoder/commands/wl-report.md +166 -0
- package/templates/qoder/commands/wl-search.md +52 -0
- package/templates/qoder/commands/wl-spec.md +18 -0
- package/templates/qoder/commands/wl-status.md +51 -0
- package/templates/qoder/commands/wl-task.md +71 -0
- package/templates/qoder/commands/wl-test.md +42 -0
- package/templates/qoder/config.toml +5 -0
- package/templates/qoder/config.yaml +141 -0
- package/templates/qoder/hooks/inject-workflow-state.py +117 -0
- package/templates/qoder/hooks/session-start.py +204 -0
- package/templates/qoder/rules/wl-pipeline.md +105 -0
- package/templates/qoder/scripts/add_session.py +245 -0
- package/templates/qoder/scripts/benchmark.py +209 -0
- package/templates/qoder/scripts/build_style_index.py +268 -0
- package/templates/qoder/scripts/code_index.py +41 -0
- package/templates/qoder/scripts/collect_prds.py +31 -0
- package/templates/qoder/scripts/common/__init__.py +0 -0
- package/templates/qoder/scripts/common/active_task.py +230 -0
- package/templates/qoder/scripts/common/atomicio.py +172 -0
- package/templates/qoder/scripts/common/developer.py +161 -0
- package/templates/qoder/scripts/common/eval_api.py +144 -0
- package/templates/qoder/scripts/common/feishu.py +278 -0
- package/templates/qoder/scripts/common/filelock.py +211 -0
- package/templates/qoder/scripts/common/identity.py +285 -0
- package/templates/qoder/scripts/common/mentions.py +134 -0
- package/templates/qoder/scripts/common/paths.py +311 -0
- package/templates/qoder/scripts/common/reqid.py +218 -0
- package/templates/qoder/scripts/common/search_engine.py +205 -0
- package/templates/qoder/scripts/common/task_utils.py +342 -0
- package/templates/qoder/scripts/common/terms.py +234 -0
- package/templates/qoder/scripts/common/utf8.py +38 -0
- package/templates/qoder/scripts/context_pack.py +196 -0
- package/templates/qoder/scripts/eval_prd.py +225 -0
- package/templates/qoder/scripts/export.py +487 -0
- package/templates/qoder/scripts/git_sync.py +1087 -0
- package/templates/qoder/scripts/handoff.py +22 -0
- package/templates/qoder/scripts/init_developer.py +76 -0
- package/templates/qoder/scripts/init_doctor.py +527 -0
- package/templates/qoder/scripts/install_qoderwork.py +339 -0
- package/templates/qoder/scripts/learn.py +67 -0
- package/templates/qoder/scripts/notify.py +5 -0
- package/templates/qoder/scripts/parse_prds.py +33 -0
- package/templates/qoder/scripts/report.py +281 -0
- package/templates/qoder/scripts/role.py +39 -0
- package/templates/qoder/scripts/run_weekly_update.bat +17 -0
- package/templates/qoder/scripts/run_weekly_update.sh +20 -0
- package/templates/qoder/scripts/search_index.py +352 -0
- package/templates/qoder/scripts/setup.py +453 -0
- package/templates/qoder/scripts/setup_weekly_cron.bat +22 -0
- package/templates/qoder/scripts/setup_weekly_cron.sh +19 -0
- package/templates/qoder/scripts/status.py +389 -0
- package/templates/qoder/scripts/syncgate.py +330 -0
- package/templates/qoder/scripts/task.py +954 -0
- package/templates/qoder/scripts/team.py +29 -0
- package/templates/qoder/scripts/team_sync.py +419 -0
- package/templates/qoder/scripts/workspace_init.py +102 -0
- package/templates/qoder/settings.json +53 -0
- package/templates/qoder/skills/design-review/SKILL.md +25 -0
- package/templates/qoder/skills/prd-generator/SKILL.md +180 -0
- package/templates/qoder/skills/prd-review/SKILL.md +36 -0
- package/templates/qoder/skills/prototype-generator/SKILL.md +141 -0
- package/templates/qoder/skills/spec-coder/SKILL.md +69 -0
- package/templates/qoder/skills/spec-generator/SKILL.md +67 -0
- package/templates/qoder/skills/test-generator/SKILL.md +72 -0
- package/templates/qoder/skills/wl-commit/SKILL.md +76 -0
- package/templates/qoder/skills/wl-init/SKILL.md +67 -0
- package/templates/qoder/skills/wl-insight/SKILL.md +81 -0
- package/templates/qoder/skills/wl-report/SKILL.md +87 -0
- package/templates/qoder/skills/wl-search/SKILL.md +75 -0
- package/templates/qoder/skills/wl-status/SKILL.md +61 -0
- package/templates/qoder/skills/wl-task/SKILL.md +58 -0
- package/templates/qoder/templates/prd-full-template.md +103 -0
- package/templates/qoder/templates/prd-quick-template.md +69 -0
- package/templates/qoder/templates/prototype-app.html +344 -0
- package/templates/qoder/templates/prototype-web.html +310 -0
- package/templates/root/AGENTS.md +182 -0
- package/templates/root/README-pipeline.md +56 -0
- package/templates/root/ROLES.md +85 -0
- package/templates/root//346/226/260/346/211/213/346/214/207/345/215/227.md +186 -0
|
@@ -0,0 +1,1087 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Weekly Knowledge Graph Update
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
python git_sync.py # Full update: git + PRD + index
|
|
8
|
+
python git_sync.py --sync-only # Git pull only
|
|
9
|
+
python git_sync.py --index-only # Re-index only (no git pull)
|
|
10
|
+
python git_sync.py --prd-only # Collect + parse PRDs only
|
|
11
|
+
python git_sync.py --project fywl-ics # Sync single project
|
|
12
|
+
|
|
13
|
+
What it does (Friday Update):
|
|
14
|
+
1. Git pull latest code for each project
|
|
15
|
+
2. Collect PRDs from all user workspaces
|
|
16
|
+
3. Parse PRDs to extract business rules and features
|
|
17
|
+
4. Build incremental indexes (only changed files)
|
|
18
|
+
5. Build PRD ↔ Code mapping
|
|
19
|
+
|
|
20
|
+
Config: .qoder/config.yaml -> git_sync section
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
import sys
|
|
25
|
+
import json
|
|
26
|
+
import hashlib
|
|
27
|
+
import subprocess
|
|
28
|
+
import re
|
|
29
|
+
import shutil
|
|
30
|
+
from datetime import datetime
|
|
31
|
+
|
|
32
|
+
# UTF-8 stdio (防御性: stdout 被捕获时不崩溃)
|
|
33
|
+
try:
|
|
34
|
+
sys.stdout.reconfigure(encoding='utf-8', errors='replace')
|
|
35
|
+
except (AttributeError, TypeError, OSError, IOError):
|
|
36
|
+
try:
|
|
37
|
+
sys.stdout.reconfigure(encoding='utf-8')
|
|
38
|
+
except Exception:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
42
|
+
from common.terms import BUSINESS_PATH_MAP, CN_TO_EN, PRD_STOP_WORDS
|
|
43
|
+
|
|
44
|
+
BASE = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
|
45
|
+
DATA_DIR = os.path.join(BASE, 'data')
|
|
46
|
+
CODE_DIR = os.path.join(DATA_DIR, 'code')
|
|
47
|
+
INDEX_DIR = os.path.join(DATA_DIR, 'index')
|
|
48
|
+
PRD_DIR = os.path.join(DATA_DIR, 'docs', 'prd')
|
|
49
|
+
WORKSPACE = os.path.join(BASE, 'workspace', 'members')
|
|
50
|
+
CONFIG_PATH = os.path.join(BASE, '.qoder', 'config.yaml')
|
|
51
|
+
|
|
52
|
+
# Simple lock to avoid two concurrent index writers (cron + manual run)
|
|
53
|
+
LOCK_FILE = os.path.join(INDEX_DIR, '.sync-lock')
|
|
54
|
+
LOCK_STALE_SECONDS = 2 * 60 * 60
|
|
55
|
+
|
|
56
|
+
# Collected at runtime; non-empty => exit code 1 so cron/bat can detect failure
|
|
57
|
+
FAILURES = []
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def fail(msg):
|
|
61
|
+
FAILURES.append(msg)
|
|
62
|
+
print('ERROR: ' + msg)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def acquire_lock():
|
|
66
|
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
|
67
|
+
if os.path.isfile(LOCK_FILE):
|
|
68
|
+
age = datetime.now().timestamp() - os.path.getmtime(LOCK_FILE)
|
|
69
|
+
if age < LOCK_STALE_SECONDS:
|
|
70
|
+
print('Another sync appears to be running (lock age {:.0f}s).'.format(age))
|
|
71
|
+
print('If you are sure it is not, delete: ' + LOCK_FILE)
|
|
72
|
+
return False
|
|
73
|
+
print('Removing stale lock ({:.0f}s old)'.format(age))
|
|
74
|
+
os.remove(LOCK_FILE)
|
|
75
|
+
with open(LOCK_FILE, 'w', encoding='utf-8') as f:
|
|
76
|
+
f.write('{} pid={}\n'.format(datetime.now().isoformat(), os.getpid()))
|
|
77
|
+
return True
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def release_lock():
|
|
81
|
+
try:
|
|
82
|
+
os.remove(LOCK_FILE)
|
|
83
|
+
except OSError:
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def load_json(path, default=None, required=False):
|
|
88
|
+
"""Load JSON file. A corrupt file is a hard error when required=True:
|
|
89
|
+
silently continuing would let an empty dict overwrite the real index."""
|
|
90
|
+
if os.path.isfile(path):
|
|
91
|
+
try:
|
|
92
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
93
|
+
return json.load(f)
|
|
94
|
+
except (json.JSONDecodeError, OSError) as e:
|
|
95
|
+
backup = path + '.corrupt'
|
|
96
|
+
try:
|
|
97
|
+
shutil.copy2(path, backup)
|
|
98
|
+
except OSError:
|
|
99
|
+
backup = '(backup failed)'
|
|
100
|
+
fail('corrupt JSON {}: {} (backed up to {})'.format(
|
|
101
|
+
os.path.basename(path), e, backup))
|
|
102
|
+
if required:
|
|
103
|
+
print('Aborting: refusing to rebuild on top of a corrupt index.')
|
|
104
|
+
print('Fix or delete the file, then run: python git_sync.py --full')
|
|
105
|
+
release_lock()
|
|
106
|
+
sys.exit(1)
|
|
107
|
+
return default if default is not None else {}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def save_json(path, data):
|
|
111
|
+
"""Atomic, deterministic JSON write (temp file + replace, sorted keys)."""
|
|
112
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
113
|
+
tmp = path + '.tmp'
|
|
114
|
+
with open(tmp, 'w', encoding='utf-8') as f:
|
|
115
|
+
json.dump(data, f, indent=2, ensure_ascii=False, sort_keys=True)
|
|
116
|
+
os.replace(tmp, path)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def file_md5(path):
|
|
120
|
+
h = hashlib.md5()
|
|
121
|
+
with open(path, 'rb') as f:
|
|
122
|
+
for chunk in iter(lambda: f.read(65536), b''):
|
|
123
|
+
h.update(chunk)
|
|
124
|
+
return h.hexdigest()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def load_config():
|
|
128
|
+
"""Load config from config.yaml"""
|
|
129
|
+
if not os.path.exists(CONFIG_PATH):
|
|
130
|
+
return {}
|
|
131
|
+
try:
|
|
132
|
+
import yaml
|
|
133
|
+
with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
|
|
134
|
+
return yaml.safe_load(f) or {}
|
|
135
|
+
except:
|
|
136
|
+
return {}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
# ============================================================
|
|
140
|
+
# Part 1: Git Sync (Code Pull)
|
|
141
|
+
# ============================================================
|
|
142
|
+
|
|
143
|
+
def git_pull_project(project_name, branch=None):
|
|
144
|
+
"""Git pull a single project on its configured branch.
|
|
145
|
+
|
|
146
|
+
Returns: list of changed files ([] = no change), or None on failure.
|
|
147
|
+
"""
|
|
148
|
+
project_dir = os.path.join(CODE_DIR, project_name)
|
|
149
|
+
if not os.path.isdir(os.path.join(project_dir, '.git')):
|
|
150
|
+
print(f' {project_name}: Not a git repo, skipping')
|
|
151
|
+
return []
|
|
152
|
+
|
|
153
|
+
def git(*args):
|
|
154
|
+
return subprocess.run(['git'] + list(args), cwd=project_dir,
|
|
155
|
+
capture_output=True, text=True, encoding='utf-8',
|
|
156
|
+
errors='replace')
|
|
157
|
+
|
|
158
|
+
# Verify we are on the configured branch (config.yaml git_sync.projects)
|
|
159
|
+
result = git('rev-parse', '--abbrev-ref', 'HEAD')
|
|
160
|
+
current_branch = result.stdout.strip() if result.returncode == 0 else ''
|
|
161
|
+
if branch and current_branch != branch:
|
|
162
|
+
fail(f'{project_name}: on branch "{current_branch}", expected "{branch}". '
|
|
163
|
+
f'Checkout the right branch manually, then re-run.')
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
# Get current commit before pull
|
|
167
|
+
result = git('rev-parse', 'HEAD')
|
|
168
|
+
if result.returncode != 0:
|
|
169
|
+
fail(f'{project_name}: rev-parse failed - {result.stderr.strip()[:120]}')
|
|
170
|
+
return None
|
|
171
|
+
old_commit = result.stdout.strip()[:8]
|
|
172
|
+
|
|
173
|
+
print(f' {project_name}: Fetching...')
|
|
174
|
+
result = git('fetch', 'origin')
|
|
175
|
+
if result.returncode != 0:
|
|
176
|
+
fail(f'{project_name}: fetch failed - {result.stderr.strip()[:120]}')
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
print(f' {project_name}: Pulling...')
|
|
180
|
+
pull_args = ['pull', 'origin'] + ([branch] if branch else [])
|
|
181
|
+
result = git(*pull_args)
|
|
182
|
+
if result.returncode != 0:
|
|
183
|
+
fail(f'{project_name}: pull failed - {result.stderr.strip()[:120]}')
|
|
184
|
+
return None
|
|
185
|
+
|
|
186
|
+
result = git('rev-parse', 'HEAD')
|
|
187
|
+
new_commit = result.stdout.strip()[:8] if result.returncode == 0 else ''
|
|
188
|
+
|
|
189
|
+
if old_commit == new_commit:
|
|
190
|
+
print(f' {project_name}: Already up to date ({new_commit})')
|
|
191
|
+
return []
|
|
192
|
+
|
|
193
|
+
# Get changed files
|
|
194
|
+
result = git('diff', '--name-only', old_commit, new_commit)
|
|
195
|
+
if result.returncode != 0:
|
|
196
|
+
fail(f'{project_name}: diff failed - run git_sync.py --full to reindex')
|
|
197
|
+
return None
|
|
198
|
+
changed = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()]
|
|
199
|
+
print(f' {project_name}: {old_commit} -> {new_commit}, {len(changed)} files changed')
|
|
200
|
+
return changed
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def git_sync_all(project_filter=None, config=None):
|
|
204
|
+
"""Sync all projects"""
|
|
205
|
+
print('\n=== Git Sync ===')
|
|
206
|
+
print(f'Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
|
|
207
|
+
|
|
208
|
+
if not os.path.exists(CODE_DIR):
|
|
209
|
+
fail('data/code/ not found')
|
|
210
|
+
return {}
|
|
211
|
+
|
|
212
|
+
branch_cfg = {}
|
|
213
|
+
if config:
|
|
214
|
+
for name, proj in (config.get('git_sync', {}).get('projects', {}) or {}).items():
|
|
215
|
+
if isinstance(proj, dict) and proj.get('branch'):
|
|
216
|
+
branch_cfg[name] = proj['branch']
|
|
217
|
+
|
|
218
|
+
changed_map = {}
|
|
219
|
+
|
|
220
|
+
for project_name in sorted(os.listdir(CODE_DIR)):
|
|
221
|
+
if not os.path.isdir(os.path.join(CODE_DIR, project_name)):
|
|
222
|
+
continue
|
|
223
|
+
if project_filter and project_name != project_filter:
|
|
224
|
+
continue
|
|
225
|
+
|
|
226
|
+
changed = git_pull_project(project_name, branch_cfg.get(project_name))
|
|
227
|
+
if changed:
|
|
228
|
+
changed_map[project_name] = changed
|
|
229
|
+
|
|
230
|
+
if not changed_map:
|
|
231
|
+
print('\nNo projects had changes.')
|
|
232
|
+
else:
|
|
233
|
+
total = sum(len(fs) for fs in changed_map.values())
|
|
234
|
+
print(f'\nChanged: {total} files in {len(changed_map)} projects')
|
|
235
|
+
|
|
236
|
+
return changed_map
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
# ============================================================
|
|
240
|
+
# Part 2: PRD Collection
|
|
241
|
+
# ============================================================
|
|
242
|
+
|
|
243
|
+
def collect_prds():
|
|
244
|
+
"""Collect PRDs from all user workspaces.
|
|
245
|
+
|
|
246
|
+
Uses content hash (not mtime) to decide updates - mtime is not preserved
|
|
247
|
+
by git, so a fresh clone would otherwise re-collect everything.
|
|
248
|
+
Same filename from two different users is a collision and is skipped
|
|
249
|
+
with a warning instead of silently overwriting.
|
|
250
|
+
"""
|
|
251
|
+
print('\n=== Collecting PRDs ===\n')
|
|
252
|
+
|
|
253
|
+
if not os.path.isdir(WORKSPACE):
|
|
254
|
+
print('Workspace not found')
|
|
255
|
+
return 0
|
|
256
|
+
|
|
257
|
+
os.makedirs(PRD_DIR, exist_ok=True)
|
|
258
|
+
track_file = os.path.join(INDEX_DIR, '.prd-collected.json')
|
|
259
|
+
collected = load_json(track_file)
|
|
260
|
+
|
|
261
|
+
total_new = 0
|
|
262
|
+
total_updated = 0
|
|
263
|
+
|
|
264
|
+
for user_name in sorted(os.listdir(WORKSPACE)):
|
|
265
|
+
user_dir = os.path.join(WORKSPACE, user_name)
|
|
266
|
+
if not os.path.isdir(user_dir):
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
drafts_dir = os.path.join(user_dir, 'drafts')
|
|
270
|
+
if not os.path.isdir(drafts_dir):
|
|
271
|
+
continue
|
|
272
|
+
|
|
273
|
+
for f in sorted(os.listdir(drafts_dir)):
|
|
274
|
+
if not ((f.startswith('REQ-') or f.startswith('PRD-') or f.startswith('prd-')) and f.endswith('.md')):
|
|
275
|
+
continue
|
|
276
|
+
filepath = os.path.join(drafts_dir, f)
|
|
277
|
+
digest = file_md5(filepath)
|
|
278
|
+
|
|
279
|
+
prev = collected.get(f)
|
|
280
|
+
if prev and prev.get('user') and prev['user'] != user_name:
|
|
281
|
+
fail(f'PRD filename collision: {f} exists from "{prev["user"]}" '
|
|
282
|
+
f'and "{user_name}". Rename one of them (REQ numbers must be unique).')
|
|
283
|
+
continue
|
|
284
|
+
|
|
285
|
+
is_new = prev is None
|
|
286
|
+
is_updated = (not is_new) and prev.get('md5') != digest
|
|
287
|
+
if not (is_new or is_updated):
|
|
288
|
+
continue
|
|
289
|
+
|
|
290
|
+
shutil.copy2(filepath, os.path.join(PRD_DIR, f))
|
|
291
|
+
collected[f] = {
|
|
292
|
+
'user': user_name,
|
|
293
|
+
'md5': digest,
|
|
294
|
+
'collected_at': datetime.now().strftime('%Y-%m-%d %H:%M')
|
|
295
|
+
}
|
|
296
|
+
print(f' {"NEW" if is_new else "UPDATED"}: {f} (by {user_name})')
|
|
297
|
+
if is_new:
|
|
298
|
+
total_new += 1
|
|
299
|
+
else:
|
|
300
|
+
total_updated += 1
|
|
301
|
+
|
|
302
|
+
save_json(track_file, collected)
|
|
303
|
+
|
|
304
|
+
print(f'\nCollected: {total_new} new, {total_updated} updated')
|
|
305
|
+
print(f'Total in prd/: {len(os.listdir(PRD_DIR)) if os.path.isdir(PRD_DIR) else 0}')
|
|
306
|
+
return total_new + total_updated
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
# ============================================================
|
|
310
|
+
# Part 3: PRD Parsing
|
|
311
|
+
# ============================================================
|
|
312
|
+
|
|
313
|
+
def extract_chinese_terms(text):
|
|
314
|
+
"""Extract meaningful business terms (stop words come from common.terms)"""
|
|
315
|
+
raw = re.findall(r'[一-鿿]{2,4}', text)
|
|
316
|
+
return list(set([t for t in raw if t not in PRD_STOP_WORDS and len(t) >= 2]))[:30]
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def parse_prd_file(filepath):
|
|
320
|
+
"""Parse a single PRD file"""
|
|
321
|
+
with open(filepath, 'r', encoding='utf-8') as f:
|
|
322
|
+
text = f.read()
|
|
323
|
+
|
|
324
|
+
filename = os.path.basename(filepath)
|
|
325
|
+
|
|
326
|
+
# Extract title
|
|
327
|
+
title_match = re.search(r'^#\s+(.+)', text, re.MULTILINE)
|
|
328
|
+
title = title_match.group(1).strip() if title_match else filename.replace('.md', '')
|
|
329
|
+
|
|
330
|
+
# Extract sections
|
|
331
|
+
sections = re.findall(r'^#{1,3}\s+(.+)', text, re.MULTILINE)
|
|
332
|
+
|
|
333
|
+
# Extract features (numbered items)
|
|
334
|
+
features = re.findall(r'(?:^|\n)\s*\d+\.\s+\**(.+?)\**', text)
|
|
335
|
+
features = [f.strip()[:100] for f in features if len(f.strip()) > 3][:20]
|
|
336
|
+
|
|
337
|
+
# Extract business rules
|
|
338
|
+
rules = []
|
|
339
|
+
rule_patterns = [
|
|
340
|
+
r'规则[::]\s*(.+)',
|
|
341
|
+
r'(?:必须|不能|不允许|需要|应当|默认).{5,}',
|
|
342
|
+
r'当.{2,20}时[,,].{5,}',
|
|
343
|
+
]
|
|
344
|
+
for p in rule_patterns:
|
|
345
|
+
rules.extend(re.findall(p, text))
|
|
346
|
+
rules = [r.strip()[:200] for r in rules][:15]
|
|
347
|
+
|
|
348
|
+
# Extract keywords (CN -> EN mapping from common.terms)
|
|
349
|
+
cn_terms = extract_chinese_terms(text)
|
|
350
|
+
|
|
351
|
+
keywords = set()
|
|
352
|
+
for term in cn_terms:
|
|
353
|
+
keywords.add(term)
|
|
354
|
+
if term in CN_TO_EN:
|
|
355
|
+
keywords.add(CN_TO_EN[term])
|
|
356
|
+
|
|
357
|
+
return {
|
|
358
|
+
'file': filename,
|
|
359
|
+
'title': title,
|
|
360
|
+
'sections': sections[:20],
|
|
361
|
+
'features': features,
|
|
362
|
+
'rules': rules,
|
|
363
|
+
'keywords': list(keywords)[:50],
|
|
364
|
+
'cn_terms': cn_terms[:30],
|
|
365
|
+
'mtime': os.path.getmtime(filepath),
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def build_prd_index(keyword_index=None):
|
|
370
|
+
"""Build PRD index from all PRD files.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
keyword_index: 可选预加载的 keyword-index (性能优化 A3: 避免重复加载 4.5MB JSON)。
|
|
374
|
+
None 则自行加载。
|
|
375
|
+
"""
|
|
376
|
+
print('\n=== Building PRD Index ===\n')
|
|
377
|
+
|
|
378
|
+
if not os.path.isdir(PRD_DIR):
|
|
379
|
+
print('PRD directory not found')
|
|
380
|
+
return {}
|
|
381
|
+
|
|
382
|
+
# Load keyword index for code matching (复用传入的, 避免重复加载)
|
|
383
|
+
if keyword_index is None:
|
|
384
|
+
ki_path = os.path.join(INDEX_DIR, 'keyword-index.json')
|
|
385
|
+
keyword_index = load_json(ki_path)
|
|
386
|
+
|
|
387
|
+
prd_index = {}
|
|
388
|
+
|
|
389
|
+
for f in sorted(os.listdir(PRD_DIR)):
|
|
390
|
+
if not f.endswith('.md'):
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
filepath = os.path.join(PRD_DIR, f)
|
|
394
|
+
try:
|
|
395
|
+
prd = parse_prd_file(filepath)
|
|
396
|
+
|
|
397
|
+
# Find related code
|
|
398
|
+
related = {}
|
|
399
|
+
for kw in prd['keywords']:
|
|
400
|
+
if kw in keyword_index:
|
|
401
|
+
for cf in keyword_index[kw][:3]:
|
|
402
|
+
proj = cf.split('/')[0] if '/' in cf else cf.split('\\')[0]
|
|
403
|
+
related.setdefault(proj, set()).add(cf)
|
|
404
|
+
prd['related_code'] = {p: list(fs) for p, fs in related.items()}
|
|
405
|
+
|
|
406
|
+
prd_index[f] = prd
|
|
407
|
+
|
|
408
|
+
print(f' {f}')
|
|
409
|
+
print(f' Title: {prd["title"][:50]}')
|
|
410
|
+
print(f' Features: {len(prd["features"])}, Rules: {len(prd["rules"])}')
|
|
411
|
+
if prd['related_code']:
|
|
412
|
+
for proj, files in prd['related_code'].items():
|
|
413
|
+
print(f' -> {proj}: {len(files)} files')
|
|
414
|
+
|
|
415
|
+
except Exception as e:
|
|
416
|
+
print(f' {f}: Error - {str(e)[:80]}')
|
|
417
|
+
|
|
418
|
+
# Save index
|
|
419
|
+
save_json(os.path.join(INDEX_DIR, 'prd-index.json'), prd_index)
|
|
420
|
+
|
|
421
|
+
print(f'\nPRD Index: {len(prd_index)} PRDs indexed')
|
|
422
|
+
return prd_index
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
# ============================================================
|
|
426
|
+
# Part 4: Incremental Code Index Update
|
|
427
|
+
# ============================================================
|
|
428
|
+
|
|
429
|
+
def parse_java_file(filepath):
|
|
430
|
+
"""Extract entities from a Java file"""
|
|
431
|
+
entities = []
|
|
432
|
+
try:
|
|
433
|
+
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
434
|
+
content = f.read()
|
|
435
|
+
|
|
436
|
+
class_match = re.search(r'(?:public\s+)?(?:abstract\s+)?(?:class|interface|enum)\s+(\w+)', content)
|
|
437
|
+
class_name = class_match.group(1) if class_match else None
|
|
438
|
+
|
|
439
|
+
apis = re.findall(
|
|
440
|
+
r'@(?:GetMapping|PostMapping|PutMapping|DeleteMapping|RequestMapping)\s*\(?["\']([^"\']+)["\']',
|
|
441
|
+
content
|
|
442
|
+
)
|
|
443
|
+
|
|
444
|
+
methods = re.findall(r'(?:public|private|protected)\s+\w+\s+(\w+)\s*\(', content)
|
|
445
|
+
|
|
446
|
+
rel = os.path.relpath(filepath, CODE_DIR).replace(os.sep, '/')
|
|
447
|
+
|
|
448
|
+
entity = {
|
|
449
|
+
'file': rel,
|
|
450
|
+
'class': class_name,
|
|
451
|
+
'apis': apis,
|
|
452
|
+
'methods': methods[:20],
|
|
453
|
+
'keywords': re.findall('[A-Z]?[a-z]+', class_name or '') if class_name else []
|
|
454
|
+
}
|
|
455
|
+
entities.append(entity)
|
|
456
|
+
except:
|
|
457
|
+
pass
|
|
458
|
+
return entities
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def parse_frontend_file(filepath):
|
|
462
|
+
"""Extract entities from frontend file"""
|
|
463
|
+
entities = []
|
|
464
|
+
try:
|
|
465
|
+
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
|
466
|
+
content = f.read()
|
|
467
|
+
|
|
468
|
+
rel = os.path.relpath(filepath, CODE_DIR).replace(os.sep, '/')
|
|
469
|
+
|
|
470
|
+
name_match = re.search(r'name:\s*["\'](\w+)["\']', content)
|
|
471
|
+
component_name = name_match.group(1) if name_match else os.path.basename(filepath).split('.')[0]
|
|
472
|
+
|
|
473
|
+
api_calls = re.findall(r'(?:axios|request|fetch|http)\.\w+\s*\(?["\']([^"\']+)["\']', content)
|
|
474
|
+
routes = re.findall(r'path:\s*["\']([^"\']+)["\']', content)
|
|
475
|
+
|
|
476
|
+
entity = {
|
|
477
|
+
'file': rel,
|
|
478
|
+
'component': component_name,
|
|
479
|
+
'api_calls': api_calls,
|
|
480
|
+
'routes': routes,
|
|
481
|
+
'type': 'vue' if filepath.endswith('.vue') else 'js/ts'
|
|
482
|
+
}
|
|
483
|
+
entities.append(entity)
|
|
484
|
+
except:
|
|
485
|
+
pass
|
|
486
|
+
return entities
|
|
487
|
+
|
|
488
|
+
|
|
489
|
+
def build_file_keys_map(keyword_index):
|
|
490
|
+
"""构建反向索引 {file: set(keywords)} (性能优化 A5)。
|
|
491
|
+
|
|
492
|
+
用于 remove_file_from_indexes 的 O(1) 查找, 替代 O(all_keys) 全扫描。
|
|
493
|
+
每次全量/增量构建后调用一次, 持久化到 .file-keys.json。
|
|
494
|
+
"""
|
|
495
|
+
fkm = {}
|
|
496
|
+
for kw, files in keyword_index.items():
|
|
497
|
+
for f in files:
|
|
498
|
+
fkm.setdefault(f, set()).add(kw)
|
|
499
|
+
return fkm
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def remove_file_from_indexes(filepath, keyword_index, api_index, file_keys_map=None):
|
|
503
|
+
"""Remove a file's entries from keyword/api indexes.
|
|
504
|
+
|
|
505
|
+
module-map.json stores per-project COUNTS (not file lists) and is
|
|
506
|
+
recomputed by rebuild_module_summary() after each update.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
file_keys_map: 可选反向索引 {file: set(keywords)} (A5)。
|
|
510
|
+
有则 O(1) 查找涉及的 keys; None 则回退到 O(all_keys) 全扫描。
|
|
511
|
+
"""
|
|
512
|
+
rel = filepath.replace(os.sep, '/')
|
|
513
|
+
|
|
514
|
+
# 性能优化 A5: 优先用反向索引 O(1) 查找, 避免遍历全部 keys
|
|
515
|
+
if file_keys_map is not None:
|
|
516
|
+
involved_keys = file_keys_map.pop(rel, set())
|
|
517
|
+
to_remove = []
|
|
518
|
+
for kw in involved_keys:
|
|
519
|
+
files = keyword_index.get(kw)
|
|
520
|
+
if files and rel in files:
|
|
521
|
+
files.remove(rel)
|
|
522
|
+
if not files:
|
|
523
|
+
to_remove.append(kw)
|
|
524
|
+
for kw in to_remove:
|
|
525
|
+
del keyword_index[kw]
|
|
526
|
+
else:
|
|
527
|
+
# 回退路径: 全扫描 (旧逻辑, 兼容)
|
|
528
|
+
to_remove = []
|
|
529
|
+
for kw, files in keyword_index.items():
|
|
530
|
+
if rel in files:
|
|
531
|
+
files.remove(rel)
|
|
532
|
+
if not files:
|
|
533
|
+
to_remove.append(kw)
|
|
534
|
+
for kw in to_remove:
|
|
535
|
+
del keyword_index[kw]
|
|
536
|
+
|
|
537
|
+
# Remove from api_index
|
|
538
|
+
to_remove_api = [api for api, f in api_index.items() if f == rel]
|
|
539
|
+
for api in to_remove_api:
|
|
540
|
+
del api_index[api]
|
|
541
|
+
|
|
542
|
+
|
|
543
|
+
INDEXED_EXTS_JAVA = ('.java',)
|
|
544
|
+
INDEXED_EXTS_FRONTEND = ('.vue', '.js', '.ts', '.jsx', '.tsx')
|
|
545
|
+
INDEXED_EXTS_CONFIG = ('.xml', '.yml', '.yaml', '.properties')
|
|
546
|
+
SKIP_DIRS = ['node_modules', 'target', 'build', 'dist', '__pycache__']
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def rebuild_module_summary(api_index, file_stats=None):
|
|
550
|
+
"""Recompute module-map.json counts.
|
|
551
|
+
|
|
552
|
+
Schema (counts only, matching what's committed):
|
|
553
|
+
{project: {files: int, classes: int, apis: int, components: int}}
|
|
554
|
+
|
|
555
|
+
Args:
|
|
556
|
+
api_index: endpoint→file map.
|
|
557
|
+
file_stats: 可选的预统计 {project: {files, classes, components}},
|
|
558
|
+
避免二次全盘 walk (性能优化 A3)。None 则回退到 walk。
|
|
559
|
+
"""
|
|
560
|
+
if file_stats:
|
|
561
|
+
# 快速路径: 复用 build_full_indexes 已有的统计, 不再 walk
|
|
562
|
+
module_map = {}
|
|
563
|
+
for project_name, st in file_stats.items():
|
|
564
|
+
prefix = project_name + '/'
|
|
565
|
+
module_map[project_name] = {
|
|
566
|
+
'files': st.get('files', 0),
|
|
567
|
+
'classes': st.get('classes', 0),
|
|
568
|
+
'components': st.get('components', 0),
|
|
569
|
+
'apis': sum(1 for f in api_index.values() if f.startswith(prefix)),
|
|
570
|
+
}
|
|
571
|
+
return module_map
|
|
572
|
+
|
|
573
|
+
# 回退路径: walk 统计 (旧逻辑, 兼容)
|
|
574
|
+
module_map = {}
|
|
575
|
+
if not os.path.isdir(CODE_DIR):
|
|
576
|
+
return module_map
|
|
577
|
+
for project_name in sorted(os.listdir(CODE_DIR)):
|
|
578
|
+
project_dir = os.path.join(CODE_DIR, project_name)
|
|
579
|
+
if not os.path.isdir(project_dir):
|
|
580
|
+
continue
|
|
581
|
+
total = java = fe = 0
|
|
582
|
+
for root, dirs, files in os.walk(project_dir):
|
|
583
|
+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in SKIP_DIRS]
|
|
584
|
+
for f in files:
|
|
585
|
+
ext = os.path.splitext(f)[1]
|
|
586
|
+
if ext in INDEXED_EXTS_JAVA:
|
|
587
|
+
java += 1
|
|
588
|
+
total += 1
|
|
589
|
+
elif ext in INDEXED_EXTS_FRONTEND:
|
|
590
|
+
fe += 1
|
|
591
|
+
total += 1
|
|
592
|
+
elif ext in INDEXED_EXTS_CONFIG:
|
|
593
|
+
total += 1
|
|
594
|
+
prefix = project_name + '/'
|
|
595
|
+
module_map[project_name] = {
|
|
596
|
+
'files': total,
|
|
597
|
+
'classes': java,
|
|
598
|
+
'components': fe,
|
|
599
|
+
'apis': sum(1 for f in api_index.values() if f.startswith(prefix)),
|
|
600
|
+
}
|
|
601
|
+
return module_map
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
def normalize_keyword_index(keyword_index):
|
|
605
|
+
"""Dedupe and sort file lists so output is deterministic (diff-friendly)."""
|
|
606
|
+
for kw in keyword_index:
|
|
607
|
+
keyword_index[kw] = sorted(set(keyword_index[kw]))
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
def index_one_file(filepath, rel, ext, keyword_index, api_index):
|
|
611
|
+
"""Parse a single source file into the keyword/api indexes.
|
|
612
|
+
|
|
613
|
+
Shared by incremental and full build so both produce identical entries.
|
|
614
|
+
Returns True if the file was indexed.
|
|
615
|
+
"""
|
|
616
|
+
if ext in INDEXED_EXTS_JAVA:
|
|
617
|
+
for e in parse_java_file(filepath):
|
|
618
|
+
for api in e.get('apis', []):
|
|
619
|
+
api_index[api] = rel
|
|
620
|
+
for kw in e.get('keywords', []):
|
|
621
|
+
kl = kw.lower()
|
|
622
|
+
if len(kl) >= 2:
|
|
623
|
+
keyword_index.setdefault(kl, [])
|
|
624
|
+
if rel not in keyword_index[kl]:
|
|
625
|
+
keyword_index[kl].append(rel)
|
|
626
|
+
return True
|
|
627
|
+
if ext in INDEXED_EXTS_FRONTEND:
|
|
628
|
+
for e in parse_frontend_file(filepath):
|
|
629
|
+
for api in e.get('api_calls', []):
|
|
630
|
+
key = 'api:' + api
|
|
631
|
+
keyword_index.setdefault(key, [])
|
|
632
|
+
if rel not in keyword_index[key]:
|
|
633
|
+
keyword_index[key].append(rel)
|
|
634
|
+
comp = e.get('component', '')
|
|
635
|
+
if comp:
|
|
636
|
+
cl = comp.lower()
|
|
637
|
+
keyword_index.setdefault(cl, [])
|
|
638
|
+
if rel not in keyword_index[cl]:
|
|
639
|
+
keyword_index[cl].append(rel)
|
|
640
|
+
return True
|
|
641
|
+
return False
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def update_indexes_incremental(changed_projects):
|
|
645
|
+
"""Update indexes incrementally for changed projects only"""
|
|
646
|
+
print('\n=== Incremental Index Update ===\n')
|
|
647
|
+
|
|
648
|
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
|
649
|
+
|
|
650
|
+
# required=True: refusing to "rebuild" on top of a corrupt/empty base
|
|
651
|
+
keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'), required=True)
|
|
652
|
+
api_index = load_json(os.path.join(INDEX_DIR, 'api-index.json'), required=True)
|
|
653
|
+
|
|
654
|
+
# 性能优化 A5: 加载反向索引 (若存在) 加速 remove_file_from_indexes
|
|
655
|
+
fkm_path = os.path.join(INDEX_DIR, '.file-keys.json')
|
|
656
|
+
file_keys_map = load_json(fkm_path) if os.path.isfile(fkm_path) else None
|
|
657
|
+
if file_keys_map:
|
|
658
|
+
# 转 set 形式 (内存里操作用 set, 持久化时转 list)
|
|
659
|
+
file_keys_map = {f: set(ks) for f, ks in file_keys_map.items()}
|
|
660
|
+
|
|
661
|
+
total_files = 0
|
|
662
|
+
|
|
663
|
+
for project_name, changed_files in changed_projects.items():
|
|
664
|
+
project_dir = os.path.join(CODE_DIR, project_name)
|
|
665
|
+
if not os.path.isdir(project_dir):
|
|
666
|
+
continue
|
|
667
|
+
|
|
668
|
+
print(f' [{project_name}] Updating {len(changed_files)} files...')
|
|
669
|
+
|
|
670
|
+
for changed_file in changed_files:
|
|
671
|
+
filepath = os.path.join(project_dir, changed_file)
|
|
672
|
+
rel = (project_name + '/' + changed_file).replace(os.sep, '/')
|
|
673
|
+
|
|
674
|
+
# Remove old entries (用反向索引 O(1), 若不可用回退全扫描)
|
|
675
|
+
remove_file_from_indexes(rel, keyword_index, api_index, file_keys_map=file_keys_map)
|
|
676
|
+
|
|
677
|
+
# Skip if file deleted
|
|
678
|
+
if not os.path.isfile(filepath):
|
|
679
|
+
continue
|
|
680
|
+
|
|
681
|
+
ext = os.path.splitext(changed_file)[1]
|
|
682
|
+
if index_one_file(filepath, rel, ext, keyword_index, api_index):
|
|
683
|
+
total_files += 1
|
|
684
|
+
# 同步更新反向索引
|
|
685
|
+
if file_keys_map is not None:
|
|
686
|
+
file_keys_map.setdefault(rel, set())
|
|
687
|
+
|
|
688
|
+
normalize_keyword_index(keyword_index)
|
|
689
|
+
|
|
690
|
+
# 重建反向索引 (增量后 keys 关系变了, 重建最可靠)
|
|
691
|
+
fkm_new = build_file_keys_map(keyword_index)
|
|
692
|
+
module_map = rebuild_module_summary(api_index)
|
|
693
|
+
|
|
694
|
+
save_json(os.path.join(INDEX_DIR, 'module-map.json'), module_map)
|
|
695
|
+
save_json(os.path.join(INDEX_DIR, 'keyword-index.json'), keyword_index)
|
|
696
|
+
save_json(os.path.join(INDEX_DIR, 'api-index.json'), api_index)
|
|
697
|
+
save_json(os.path.join(INDEX_DIR, '.file-keys.json'),
|
|
698
|
+
{f: sorted(ks) for f, ks in fkm_new.items()})
|
|
699
|
+
|
|
700
|
+
print(f'\n Updated: {total_files} files indexed')
|
|
701
|
+
print(f' Keywords: {len(keyword_index)}')
|
|
702
|
+
print(f' APIs: {len(api_index)}')
|
|
703
|
+
|
|
704
|
+
return total_files
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def _index_project(project_name, project_dir, skip_dirs):
|
|
708
|
+
"""索引单个项目的所有文件 (并行 worker 函数, A4)。
|
|
709
|
+
|
|
710
|
+
在子进程里运行: walk + parse, 返回该项目的 (keyword_dict, api_dict, stats)。
|
|
711
|
+
保持与串行版完全一致的解析逻辑 (复用 index_one_file 的核心)。
|
|
712
|
+
"""
|
|
713
|
+
local_ki = {}
|
|
714
|
+
local_api = {}
|
|
715
|
+
proj_count = proj_java = proj_fe = 0
|
|
716
|
+
|
|
717
|
+
for root, dirs, files in os.walk(project_dir):
|
|
718
|
+
dirs[:] = [d for d in dirs if not d.startswith('.') and d not in skip_dirs]
|
|
719
|
+
for f in sorted(files):
|
|
720
|
+
ext = os.path.splitext(f)[1]
|
|
721
|
+
filepath = os.path.join(root, f)
|
|
722
|
+
rel = os.path.relpath(filepath, os.path.dirname(project_dir)).replace(os.sep, '/')
|
|
723
|
+
if ext in INDEXED_EXTS_JAVA:
|
|
724
|
+
for e in parse_java_file(filepath):
|
|
725
|
+
for api in e.get('apis', []):
|
|
726
|
+
local_api[api] = rel
|
|
727
|
+
for kw in e.get('keywords', []):
|
|
728
|
+
kl = kw.lower()
|
|
729
|
+
if len(kl) >= 2:
|
|
730
|
+
local_ki.setdefault(kl, [])
|
|
731
|
+
if rel not in local_ki[kl]:
|
|
732
|
+
local_ki[kl].append(rel)
|
|
733
|
+
proj_count += 1
|
|
734
|
+
proj_java += 1
|
|
735
|
+
elif ext in INDEXED_EXTS_FRONTEND:
|
|
736
|
+
for e in parse_frontend_file(filepath):
|
|
737
|
+
for api in e.get('api_calls', []):
|
|
738
|
+
key = 'api:' + api
|
|
739
|
+
local_ki.setdefault(key, [])
|
|
740
|
+
if rel not in local_ki[key]:
|
|
741
|
+
local_ki[key].append(rel)
|
|
742
|
+
comp = e.get('component', '')
|
|
743
|
+
if comp:
|
|
744
|
+
cl = comp.lower()
|
|
745
|
+
local_ki.setdefault(cl, [])
|
|
746
|
+
if rel not in local_ki[cl]:
|
|
747
|
+
local_ki[cl].append(rel)
|
|
748
|
+
proj_count += 1
|
|
749
|
+
proj_fe += 1
|
|
750
|
+
elif ext in INDEXED_EXTS_CONFIG:
|
|
751
|
+
proj_count += 1
|
|
752
|
+
|
|
753
|
+
stats = {'files': proj_count, 'classes': proj_java, 'components': proj_fe}
|
|
754
|
+
return project_name, local_ki, local_api, stats
|
|
755
|
+
|
|
756
|
+
|
|
757
|
+
def build_full_indexes():
|
|
758
|
+
"""Build full indexes from scratch"""
|
|
759
|
+
print('\n=== Full Index Build ===\n')
|
|
760
|
+
|
|
761
|
+
if not os.path.isdir(CODE_DIR):
|
|
762
|
+
fail('data/code/ not found - cannot build indexes')
|
|
763
|
+
return 0
|
|
764
|
+
|
|
765
|
+
os.makedirs(INDEX_DIR, exist_ok=True)
|
|
766
|
+
|
|
767
|
+
# 收集要处理的项目
|
|
768
|
+
projects = []
|
|
769
|
+
for project_name in sorted(os.listdir(CODE_DIR)):
|
|
770
|
+
project_dir = os.path.join(CODE_DIR, project_name)
|
|
771
|
+
if os.path.isdir(project_dir):
|
|
772
|
+
projects.append((project_name, project_dir))
|
|
773
|
+
|
|
774
|
+
keyword_index = {}
|
|
775
|
+
api_index = {}
|
|
776
|
+
total_files = 0
|
|
777
|
+
file_stats = {}
|
|
778
|
+
|
|
779
|
+
# 性能优化 A4: 按 project 并行索引 (项目间独立, 适合 ProcessPool)
|
|
780
|
+
# 项目数少 (3个), 序列化开销可接受; 单项目内仍串行 (避免 12k 文件的小任务开销)
|
|
781
|
+
use_parallel = len(projects) >= 2
|
|
782
|
+
if use_parallel:
|
|
783
|
+
try:
|
|
784
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
785
|
+
import multiprocessing as mp
|
|
786
|
+
workers = min(len(projects), mp.cpu_count())
|
|
787
|
+
print(f' Parallel indexing with {workers} workers...')
|
|
788
|
+
with ProcessPoolExecutor(max_workers=workers) as pool:
|
|
789
|
+
futures = []
|
|
790
|
+
for pname, pdir in projects:
|
|
791
|
+
futures.append(pool.submit(_index_project, pname, pdir, SKIP_DIRS))
|
|
792
|
+
for fut in futures:
|
|
793
|
+
pname, local_ki, local_api, stats = fut.result()
|
|
794
|
+
# 合并到全局 index
|
|
795
|
+
for kw, files in local_ki.items():
|
|
796
|
+
keyword_index.setdefault(kw, []).extend(files)
|
|
797
|
+
api_index.update(local_api)
|
|
798
|
+
file_stats[pname] = stats
|
|
799
|
+
total_files += stats['files']
|
|
800
|
+
print(f' [{pname}] Files: {stats["files"]}')
|
|
801
|
+
except Exception as e:
|
|
802
|
+
# 并行失败回退串行 (鲁棒性)
|
|
803
|
+
print(f' Parallel failed ({e}), falling back to serial...')
|
|
804
|
+
use_parallel = False
|
|
805
|
+
|
|
806
|
+
if not use_parallel:
|
|
807
|
+
# 串行回退 (或单项目)
|
|
808
|
+
for project_name, project_dir in projects:
|
|
809
|
+
print(f' [{project_name}] Scanning...')
|
|
810
|
+
pname, local_ki, local_api, stats = _index_project(project_name, project_dir, SKIP_DIRS)
|
|
811
|
+
for kw, files in local_ki.items():
|
|
812
|
+
keyword_index.setdefault(kw, []).extend(files)
|
|
813
|
+
api_index.update(local_api)
|
|
814
|
+
file_stats[pname] = stats
|
|
815
|
+
total_files += stats['files']
|
|
816
|
+
print(f' Files: {stats["files"]}')
|
|
817
|
+
|
|
818
|
+
normalize_keyword_index(keyword_index)
|
|
819
|
+
# 复用 file_stats, 不再二次 walk (A3)
|
|
820
|
+
module_map = rebuild_module_summary(api_index, file_stats=file_stats)
|
|
821
|
+
|
|
822
|
+
save_json(os.path.join(INDEX_DIR, 'module-map.json'), module_map)
|
|
823
|
+
save_json(os.path.join(INDEX_DIR, 'keyword-index.json'), keyword_index)
|
|
824
|
+
save_json(os.path.join(INDEX_DIR, 'api-index.json'), api_index)
|
|
825
|
+
|
|
826
|
+
# 性能优化 A5: 持久化反向索引 {file: [keywords]} 供增量更新 O(1) 查找
|
|
827
|
+
fkm = build_file_keys_map(keyword_index)
|
|
828
|
+
save_json(os.path.join(INDEX_DIR, '.file-keys.json'),
|
|
829
|
+
{f: sorted(ks) for f, ks in fkm.items()})
|
|
830
|
+
|
|
831
|
+
print(f'\n Total: {total_files} files, {len(keyword_index)} keywords, {len(api_index)} APIs')
|
|
832
|
+
return total_files
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
# ============================================================
|
|
836
|
+
# Part 5: PRD ↔ Code Mapping
|
|
837
|
+
# ============================================================
|
|
838
|
+
# Business term -> code path mapping comes from common.terms (BUSINESS_PATH_MAP)
|
|
839
|
+
# so that index building and searching share the same semantics.
|
|
840
|
+
|
|
841
|
+
# Below this length, only exact matches count (substring matching on short
|
|
842
|
+
# keywords like "in"/"sa" pollutes the mapping)
|
|
843
|
+
MIN_PATTERN_FUZZY_LEN = 4
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def build_prd_code_mapping(keyword_index=None):
|
|
847
|
+
"""Build bidirectional PRD to Code mapping with business term awareness.
|
|
848
|
+
|
|
849
|
+
Args:
|
|
850
|
+
keyword_index: 可选预加载 (A3: 避免重复加载)。
|
|
851
|
+
"""
|
|
852
|
+
print('\n=== Building PRD to Code Mapping ===\n')
|
|
853
|
+
|
|
854
|
+
prd_index = load_json(os.path.join(INDEX_DIR, 'prd-index.json'))
|
|
855
|
+
if keyword_index is None:
|
|
856
|
+
keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
|
|
857
|
+
|
|
858
|
+
mapping = {'prd_to_code': {}, 'code_to_prd': {}}
|
|
859
|
+
|
|
860
|
+
for prd_file, prd in prd_index.items():
|
|
861
|
+
related_files = set()
|
|
862
|
+
title = prd.get('title', '')
|
|
863
|
+
cn_terms = prd.get('cn_terms', [])
|
|
864
|
+
keywords = prd.get('keywords', [])
|
|
865
|
+
|
|
866
|
+
# Strategy 1: Business term to path pattern matching
|
|
867
|
+
all_text = title + ' ' + ' '.join(cn_terms) + ' ' + ' '.join(keywords)
|
|
868
|
+
for cn_term, en_patterns in BUSINESS_PATH_MAP.items():
|
|
869
|
+
if cn_term not in all_text:
|
|
870
|
+
continue
|
|
871
|
+
for pattern in en_patterns:
|
|
872
|
+
pat_lower = pattern.strip('/-').lower()
|
|
873
|
+
if not pat_lower:
|
|
874
|
+
continue
|
|
875
|
+
for kw, files in keyword_index.items():
|
|
876
|
+
kw_lower = kw.lower()
|
|
877
|
+
if kw_lower == pat_lower or (
|
|
878
|
+
len(kw_lower) >= MIN_PATTERN_FUZZY_LEN and kw_lower in pat_lower
|
|
879
|
+
) or (
|
|
880
|
+
len(pat_lower) >= MIN_PATTERN_FUZZY_LEN and pat_lower in kw_lower
|
|
881
|
+
):
|
|
882
|
+
related_files.update(files[:5])
|
|
883
|
+
|
|
884
|
+
# Strategy 2: Direct keyword matching
|
|
885
|
+
for kw in keywords[:20]:
|
|
886
|
+
kw_lower = kw.lower()
|
|
887
|
+
if kw_lower in keyword_index:
|
|
888
|
+
related_files.update(keyword_index[kw_lower][:3])
|
|
889
|
+
|
|
890
|
+
# Strategy 3: CN to EN translation (shared map)
|
|
891
|
+
for term in cn_terms:
|
|
892
|
+
if term in CN_TO_EN:
|
|
893
|
+
en = CN_TO_EN[term].lower()
|
|
894
|
+
if en in keyword_index:
|
|
895
|
+
related_files.update(keyword_index[en][:5])
|
|
896
|
+
|
|
897
|
+
# Deduplicate, sort for determinism, and limit
|
|
898
|
+
related_files = sorted(related_files)[:30]
|
|
899
|
+
|
|
900
|
+
mapping['prd_to_code'][prd_file] = {
|
|
901
|
+
'title': title,
|
|
902
|
+
'files': related_files,
|
|
903
|
+
'features': prd.get('features', [])[:10]
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
# Reverse mapping
|
|
907
|
+
for f in related_files:
|
|
908
|
+
mapping['code_to_prd'].setdefault(f, [])
|
|
909
|
+
if prd_file not in mapping['code_to_prd'][f]:
|
|
910
|
+
mapping['code_to_prd'][f].append(prd_file)
|
|
911
|
+
|
|
912
|
+
save_json(os.path.join(INDEX_DIR, 'prd-code-map.json'), mapping)
|
|
913
|
+
|
|
914
|
+
print(f' PRD -> Code: {len(mapping["prd_to_code"])} PRDs mapped')
|
|
915
|
+
print(f' Code -> PRD: {len(mapping["code_to_prd"])} files with PRD links')
|
|
916
|
+
|
|
917
|
+
for prd_file, info in list(mapping['prd_to_code'].items())[:3]:
|
|
918
|
+
print(f'\n {prd_file}:')
|
|
919
|
+
print(f' {info["title"][:50]}')
|
|
920
|
+
print(f' -> {len(info["files"])} related files')
|
|
921
|
+
|
|
922
|
+
return mapping
|
|
923
|
+
|
|
924
|
+
|
|
925
|
+
# ============================================================
|
|
926
|
+
# Part 6: Index Verification (准确性校验)
|
|
927
|
+
# ============================================================
|
|
928
|
+
|
|
929
|
+
# 条目数比上次下降超过这个比例视为异常 (防止静默清空/构建残废)
|
|
930
|
+
MAX_SHRINK_RATIO = 0.3
|
|
931
|
+
# 抽样检查的文件数下限命中率
|
|
932
|
+
MIN_SAMPLE_HIT = 0.8
|
|
933
|
+
|
|
934
|
+
|
|
935
|
+
def verify_indexes(config):
|
|
936
|
+
"""Post-build sanity checks. Returns dict written into .index-meta.json.
|
|
937
|
+
|
|
938
|
+
1. 每个配置的项目都在 module-map 里且 files > 0
|
|
939
|
+
2. keyword/api 条目数与上次相比未暴跌 (>30% 下降 = 异常)
|
|
940
|
+
3. 抽样 20 个索引条目, 验证文件真实存在于磁盘
|
|
941
|
+
"""
|
|
942
|
+
print('\n=== Verifying Indexes ===\n')
|
|
943
|
+
result = {'checked_at': datetime.now().strftime('%Y-%m-%d %H:%M'), 'checks': {}}
|
|
944
|
+
|
|
945
|
+
module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
|
|
946
|
+
keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
|
|
947
|
+
api_index = load_json(os.path.join(INDEX_DIR, 'api-index.json'))
|
|
948
|
+
|
|
949
|
+
# Check 1: 配置的项目全部被索引
|
|
950
|
+
configured = list(((config.get('git_sync', {}) or {}).get('projects', {}) or {}).keys())
|
|
951
|
+
for proj in configured:
|
|
952
|
+
info = module_map.get(proj)
|
|
953
|
+
files = info.get('files', 0) if isinstance(info, dict) else 0
|
|
954
|
+
if not files:
|
|
955
|
+
fail(f'verify: 项目 {proj} 不在索引中或 files=0')
|
|
956
|
+
else:
|
|
957
|
+
print(f' [OK] {proj}: {files} files indexed')
|
|
958
|
+
result['checks']['projects'] = {p: (module_map.get(p) or {}).get('files', 0) for p in configured}
|
|
959
|
+
|
|
960
|
+
# Check 2: 条目数突变检测 (与上次 meta 对比)
|
|
961
|
+
prev_meta = load_json(os.path.join(INDEX_DIR, '.index-meta.json'))
|
|
962
|
+
prev_counts = prev_meta.get('counts', {})
|
|
963
|
+
counts = {'keywords': len(keyword_index), 'apis': len(api_index)}
|
|
964
|
+
for name, now in counts.items():
|
|
965
|
+
prev = prev_counts.get(name, 0)
|
|
966
|
+
if prev > 50 and now < prev * (1 - MAX_SHRINK_RATIO):
|
|
967
|
+
fail(f'verify: {name} 条目数从 {prev} 暴跌到 {now} (>30%), 索引可能损坏')
|
|
968
|
+
else:
|
|
969
|
+
print(f' [OK] {name}: {now} (prev {prev})')
|
|
970
|
+
result['checks']['counts'] = counts
|
|
971
|
+
|
|
972
|
+
# Check 3: 抽样验证索引指向的文件真实存在
|
|
973
|
+
sample, step = [], max(1, len(keyword_index) // 20)
|
|
974
|
+
for i, (kw, files) in enumerate(sorted(keyword_index.items())):
|
|
975
|
+
if i % step == 0 and files:
|
|
976
|
+
sample.append(files[0])
|
|
977
|
+
if len(sample) >= 20:
|
|
978
|
+
break
|
|
979
|
+
if sample:
|
|
980
|
+
hits = sum(1 for f in sample if os.path.isfile(os.path.join(CODE_DIR, f)))
|
|
981
|
+
ratio = hits / len(sample)
|
|
982
|
+
if ratio < MIN_SAMPLE_HIT:
|
|
983
|
+
fail(f'verify: 抽样 {len(sample)} 条仅 {hits} 条文件存在 ({ratio:.0%}), 索引与磁盘脱节')
|
|
984
|
+
else:
|
|
985
|
+
print(f' [OK] 抽样 {len(sample)} 条, {hits} 条文件存在 ({ratio:.0%})')
|
|
986
|
+
result['checks']['sample_hit_ratio'] = round(ratio, 2)
|
|
987
|
+
|
|
988
|
+
return result
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
def main():
|
|
992
|
+
config = load_config()
|
|
993
|
+
|
|
994
|
+
sync_only = '--sync-only' in sys.argv
|
|
995
|
+
index_only = '--index-only' in sys.argv
|
|
996
|
+
prd_only = '--prd-only' in sys.argv
|
|
997
|
+
full_build = '--full' in sys.argv
|
|
998
|
+
|
|
999
|
+
project_filter = None
|
|
1000
|
+
for i, arg in enumerate(sys.argv[1:], 1):
|
|
1001
|
+
if arg == '--project' and i + 1 < len(sys.argv):
|
|
1002
|
+
project_filter = sys.argv[i + 1]
|
|
1003
|
+
|
|
1004
|
+
print('=' * 50)
|
|
1005
|
+
print('QODER Knowledge Graph Update')
|
|
1006
|
+
print(f'Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
|
|
1007
|
+
print('=' * 50)
|
|
1008
|
+
|
|
1009
|
+
if not acquire_lock():
|
|
1010
|
+
sys.exit(2)
|
|
1011
|
+
|
|
1012
|
+
try:
|
|
1013
|
+
changed_projects = {}
|
|
1014
|
+
|
|
1015
|
+
# 性能优化 A3: 共享 keyword_index 加载 (避免 build_prd_index 和
|
|
1016
|
+
# build_prd_code_mapping 各自重复加载 4.5MB JSON)
|
|
1017
|
+
shared_ki = None # 懒加载: 首次需要时加载, 后续复用
|
|
1018
|
+
|
|
1019
|
+
# Step 1: Git sync
|
|
1020
|
+
if not (index_only or prd_only):
|
|
1021
|
+
changed_projects = git_sync_all(project_filter, config)
|
|
1022
|
+
|
|
1023
|
+
# Step 2: Collect PRDs
|
|
1024
|
+
if not (sync_only or index_only):
|
|
1025
|
+
collect_prds()
|
|
1026
|
+
|
|
1027
|
+
# Step 3: Parse PRDs and build PRD index (共享 keyword_index)
|
|
1028
|
+
if not sync_only:
|
|
1029
|
+
if shared_ki is None and os.path.isfile(os.path.join(INDEX_DIR, 'keyword-index.json')):
|
|
1030
|
+
shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
|
|
1031
|
+
build_prd_index(keyword_index=shared_ki)
|
|
1032
|
+
|
|
1033
|
+
# Step 4: Update code indexes
|
|
1034
|
+
if not (sync_only or prd_only):
|
|
1035
|
+
if full_build or index_only:
|
|
1036
|
+
build_full_indexes()
|
|
1037
|
+
# full build 重写了 keyword-index, 失效旧缓存
|
|
1038
|
+
shared_ki = None
|
|
1039
|
+
elif changed_projects:
|
|
1040
|
+
update_indexes_incremental(changed_projects)
|
|
1041
|
+
shared_ki = None # 增量也改了, 失效
|
|
1042
|
+
else:
|
|
1043
|
+
print('\nNo code changes to index.')
|
|
1044
|
+
|
|
1045
|
+
# Step 5: Build PRD ↔ Code mapping (共享 keyword_index, 重新加载若失效)
|
|
1046
|
+
if not sync_only:
|
|
1047
|
+
if shared_ki is None and os.path.isfile(os.path.join(INDEX_DIR, 'keyword-index.json')):
|
|
1048
|
+
shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
|
|
1049
|
+
build_prd_code_mapping(keyword_index=shared_ki)
|
|
1050
|
+
|
|
1051
|
+
# Step 5.5: Verify index accuracy (跨项目泛化的保证)
|
|
1052
|
+
verify_result = None
|
|
1053
|
+
if not (sync_only or prd_only):
|
|
1054
|
+
verify_result = verify_indexes(config)
|
|
1055
|
+
|
|
1056
|
+
# Step 6: Write meta (读 module-map 拿真实 project 数; counts 用已加载的 ki)
|
|
1057
|
+
module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
|
|
1058
|
+
if shared_ki is None:
|
|
1059
|
+
shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
|
|
1060
|
+
api_idx = load_json(os.path.join(INDEX_DIR, 'api-index.json'))
|
|
1061
|
+
meta = {
|
|
1062
|
+
'last_sync': datetime.now().strftime('%Y-%m-%d %H:%M'),
|
|
1063
|
+
'projects': {p: info.get('files', 0) for p, info in module_map.items()},
|
|
1064
|
+
'counts': {
|
|
1065
|
+
'keywords': len(shared_ki),
|
|
1066
|
+
'apis': len(api_idx),
|
|
1067
|
+
},
|
|
1068
|
+
'verify': verify_result,
|
|
1069
|
+
'failures': FAILURES,
|
|
1070
|
+
}
|
|
1071
|
+
save_json(os.path.join(INDEX_DIR, '.index-meta.json'), meta)
|
|
1072
|
+
finally:
|
|
1073
|
+
release_lock()
|
|
1074
|
+
|
|
1075
|
+
print('\n' + '=' * 50)
|
|
1076
|
+
if FAILURES:
|
|
1077
|
+
print(f'Update finished with {len(FAILURES)} ERROR(S):')
|
|
1078
|
+
for msg in FAILURES:
|
|
1079
|
+
print(' - ' + msg)
|
|
1080
|
+
print('=' * 50)
|
|
1081
|
+
sys.exit(1)
|
|
1082
|
+
print('Update complete!')
|
|
1083
|
+
print('=' * 50)
|
|
1084
|
+
|
|
1085
|
+
|
|
1086
|
+
if __name__ == '__main__':
|
|
1087
|
+
main()
|