@hupan56/wlkj 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/bin/cli.js +213 -0
  2. package/package.json +11 -0
  3. package/templates/cli.js +198 -0
  4. package/templates/qoder/commands/wl-code.md +43 -0
  5. package/templates/qoder/commands/wl-commit.md +30 -0
  6. package/templates/qoder/commands/wl-init.md +80 -0
  7. package/templates/qoder/commands/wl-insight.md +51 -0
  8. package/templates/qoder/commands/wl-prd.md +199 -0
  9. package/templates/qoder/commands/wl-report.md +166 -0
  10. package/templates/qoder/commands/wl-search.md +52 -0
  11. package/templates/qoder/commands/wl-spec.md +18 -0
  12. package/templates/qoder/commands/wl-status.md +51 -0
  13. package/templates/qoder/commands/wl-task.md +71 -0
  14. package/templates/qoder/commands/wl-test.md +42 -0
  15. package/templates/qoder/config.toml +5 -0
  16. package/templates/qoder/config.yaml +141 -0
  17. package/templates/qoder/hooks/inject-workflow-state.py +117 -0
  18. package/templates/qoder/hooks/session-start.py +204 -0
  19. package/templates/qoder/rules/wl-pipeline.md +105 -0
  20. package/templates/qoder/scripts/add_session.py +245 -0
  21. package/templates/qoder/scripts/benchmark.py +209 -0
  22. package/templates/qoder/scripts/build_style_index.py +268 -0
  23. package/templates/qoder/scripts/code_index.py +41 -0
  24. package/templates/qoder/scripts/collect_prds.py +31 -0
  25. package/templates/qoder/scripts/common/__init__.py +0 -0
  26. package/templates/qoder/scripts/common/active_task.py +230 -0
  27. package/templates/qoder/scripts/common/atomicio.py +172 -0
  28. package/templates/qoder/scripts/common/developer.py +161 -0
  29. package/templates/qoder/scripts/common/eval_api.py +144 -0
  30. package/templates/qoder/scripts/common/feishu.py +278 -0
  31. package/templates/qoder/scripts/common/filelock.py +211 -0
  32. package/templates/qoder/scripts/common/identity.py +285 -0
  33. package/templates/qoder/scripts/common/mentions.py +134 -0
  34. package/templates/qoder/scripts/common/paths.py +311 -0
  35. package/templates/qoder/scripts/common/reqid.py +218 -0
  36. package/templates/qoder/scripts/common/search_engine.py +205 -0
  37. package/templates/qoder/scripts/common/task_utils.py +342 -0
  38. package/templates/qoder/scripts/common/terms.py +234 -0
  39. package/templates/qoder/scripts/common/utf8.py +38 -0
  40. package/templates/qoder/scripts/context_pack.py +196 -0
  41. package/templates/qoder/scripts/eval_prd.py +225 -0
  42. package/templates/qoder/scripts/export.py +487 -0
  43. package/templates/qoder/scripts/git_sync.py +1087 -0
  44. package/templates/qoder/scripts/handoff.py +22 -0
  45. package/templates/qoder/scripts/init_developer.py +76 -0
  46. package/templates/qoder/scripts/init_doctor.py +527 -0
  47. package/templates/qoder/scripts/install_qoderwork.py +339 -0
  48. package/templates/qoder/scripts/learn.py +67 -0
  49. package/templates/qoder/scripts/notify.py +5 -0
  50. package/templates/qoder/scripts/parse_prds.py +33 -0
  51. package/templates/qoder/scripts/report.py +281 -0
  52. package/templates/qoder/scripts/role.py +39 -0
  53. package/templates/qoder/scripts/run_weekly_update.bat +17 -0
  54. package/templates/qoder/scripts/run_weekly_update.sh +20 -0
  55. package/templates/qoder/scripts/search_index.py +352 -0
  56. package/templates/qoder/scripts/setup.py +453 -0
  57. package/templates/qoder/scripts/setup_weekly_cron.bat +22 -0
  58. package/templates/qoder/scripts/setup_weekly_cron.sh +19 -0
  59. package/templates/qoder/scripts/status.py +389 -0
  60. package/templates/qoder/scripts/syncgate.py +330 -0
  61. package/templates/qoder/scripts/task.py +954 -0
  62. package/templates/qoder/scripts/team.py +29 -0
  63. package/templates/qoder/scripts/team_sync.py +419 -0
  64. package/templates/qoder/scripts/workspace_init.py +102 -0
  65. package/templates/qoder/settings.json +53 -0
  66. package/templates/qoder/skills/design-review/SKILL.md +25 -0
  67. package/templates/qoder/skills/prd-generator/SKILL.md +180 -0
  68. package/templates/qoder/skills/prd-review/SKILL.md +36 -0
  69. package/templates/qoder/skills/prototype-generator/SKILL.md +141 -0
  70. package/templates/qoder/skills/spec-coder/SKILL.md +69 -0
  71. package/templates/qoder/skills/spec-generator/SKILL.md +67 -0
  72. package/templates/qoder/skills/test-generator/SKILL.md +72 -0
  73. package/templates/qoder/skills/wl-commit/SKILL.md +76 -0
  74. package/templates/qoder/skills/wl-init/SKILL.md +67 -0
  75. package/templates/qoder/skills/wl-insight/SKILL.md +81 -0
  76. package/templates/qoder/skills/wl-report/SKILL.md +87 -0
  77. package/templates/qoder/skills/wl-search/SKILL.md +75 -0
  78. package/templates/qoder/skills/wl-status/SKILL.md +61 -0
  79. package/templates/qoder/skills/wl-task/SKILL.md +58 -0
  80. package/templates/qoder/templates/prd-full-template.md +103 -0
  81. package/templates/qoder/templates/prd-quick-template.md +69 -0
  82. package/templates/qoder/templates/prototype-app.html +344 -0
  83. package/templates/qoder/templates/prototype-web.html +310 -0
  84. package/templates/root/AGENTS.md +182 -0
  85. package/templates/root/README-pipeline.md +56 -0
  86. package/templates/root/ROLES.md +85 -0
  87. package/templates/root//346/226/260/346/211/213/346/214/207/345/215/227.md +186 -0
@@ -0,0 +1,1087 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Weekly Knowledge Graph Update
5
+
6
+ Usage:
7
+ python git_sync.py # Full update: git + PRD + index
8
+ python git_sync.py --sync-only # Git pull only
9
+ python git_sync.py --index-only # Re-index only (no git pull)
10
+ python git_sync.py --prd-only # Collect + parse PRDs only
11
+ python git_sync.py --project fywl-ics # Sync single project
12
+
13
+ What it does (Friday Update):
14
+ 1. Git pull latest code for each project
15
+ 2. Collect PRDs from all user workspaces
16
+ 3. Parse PRDs to extract business rules and features
17
+ 4. Build incremental indexes (only changed files)
18
+ 5. Build PRD ↔ Code mapping
19
+
20
+ Config: .qoder/config.yaml -> git_sync section
21
+ """
22
+
23
+ import os
24
+ import sys
25
+ import json
26
+ import hashlib
27
+ import subprocess
28
+ import re
29
+ import shutil
30
+ from datetime import datetime
31
+
32
+ # UTF-8 stdio (防御性: stdout 被捕获时不崩溃)
33
+ try:
34
+ sys.stdout.reconfigure(encoding='utf-8', errors='replace')
35
+ except (AttributeError, TypeError, OSError, IOError):
36
+ try:
37
+ sys.stdout.reconfigure(encoding='utf-8')
38
+ except Exception:
39
+ pass
40
+
41
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
42
+ from common.terms import BUSINESS_PATH_MAP, CN_TO_EN, PRD_STOP_WORDS
43
+
44
+ BASE = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
45
+ DATA_DIR = os.path.join(BASE, 'data')
46
+ CODE_DIR = os.path.join(DATA_DIR, 'code')
47
+ INDEX_DIR = os.path.join(DATA_DIR, 'index')
48
+ PRD_DIR = os.path.join(DATA_DIR, 'docs', 'prd')
49
+ WORKSPACE = os.path.join(BASE, 'workspace', 'members')
50
+ CONFIG_PATH = os.path.join(BASE, '.qoder', 'config.yaml')
51
+
52
+ # Simple lock to avoid two concurrent index writers (cron + manual run)
53
+ LOCK_FILE = os.path.join(INDEX_DIR, '.sync-lock')
54
+ LOCK_STALE_SECONDS = 2 * 60 * 60
55
+
56
+ # Collected at runtime; non-empty => exit code 1 so cron/bat can detect failure
57
+ FAILURES = []
58
+
59
+
60
+ def fail(msg):
61
+ FAILURES.append(msg)
62
+ print('ERROR: ' + msg)
63
+
64
+
65
+ def acquire_lock():
66
+ os.makedirs(INDEX_DIR, exist_ok=True)
67
+ if os.path.isfile(LOCK_FILE):
68
+ age = datetime.now().timestamp() - os.path.getmtime(LOCK_FILE)
69
+ if age < LOCK_STALE_SECONDS:
70
+ print('Another sync appears to be running (lock age {:.0f}s).'.format(age))
71
+ print('If you are sure it is not, delete: ' + LOCK_FILE)
72
+ return False
73
+ print('Removing stale lock ({:.0f}s old)'.format(age))
74
+ os.remove(LOCK_FILE)
75
+ with open(LOCK_FILE, 'w', encoding='utf-8') as f:
76
+ f.write('{} pid={}\n'.format(datetime.now().isoformat(), os.getpid()))
77
+ return True
78
+
79
+
80
+ def release_lock():
81
+ try:
82
+ os.remove(LOCK_FILE)
83
+ except OSError:
84
+ pass
85
+
86
+
87
+ def load_json(path, default=None, required=False):
88
+ """Load JSON file. A corrupt file is a hard error when required=True:
89
+ silently continuing would let an empty dict overwrite the real index."""
90
+ if os.path.isfile(path):
91
+ try:
92
+ with open(path, 'r', encoding='utf-8') as f:
93
+ return json.load(f)
94
+ except (json.JSONDecodeError, OSError) as e:
95
+ backup = path + '.corrupt'
96
+ try:
97
+ shutil.copy2(path, backup)
98
+ except OSError:
99
+ backup = '(backup failed)'
100
+ fail('corrupt JSON {}: {} (backed up to {})'.format(
101
+ os.path.basename(path), e, backup))
102
+ if required:
103
+ print('Aborting: refusing to rebuild on top of a corrupt index.')
104
+ print('Fix or delete the file, then run: python git_sync.py --full')
105
+ release_lock()
106
+ sys.exit(1)
107
+ return default if default is not None else {}
108
+
109
+
110
+ def save_json(path, data):
111
+ """Atomic, deterministic JSON write (temp file + replace, sorted keys)."""
112
+ os.makedirs(os.path.dirname(path), exist_ok=True)
113
+ tmp = path + '.tmp'
114
+ with open(tmp, 'w', encoding='utf-8') as f:
115
+ json.dump(data, f, indent=2, ensure_ascii=False, sort_keys=True)
116
+ os.replace(tmp, path)
117
+
118
+
119
+ def file_md5(path):
120
+ h = hashlib.md5()
121
+ with open(path, 'rb') as f:
122
+ for chunk in iter(lambda: f.read(65536), b''):
123
+ h.update(chunk)
124
+ return h.hexdigest()
125
+
126
+
127
+ def load_config():
128
+ """Load config from config.yaml"""
129
+ if not os.path.exists(CONFIG_PATH):
130
+ return {}
131
+ try:
132
+ import yaml
133
+ with open(CONFIG_PATH, 'r', encoding='utf-8') as f:
134
+ return yaml.safe_load(f) or {}
135
+ except:
136
+ return {}
137
+
138
+
139
+ # ============================================================
140
+ # Part 1: Git Sync (Code Pull)
141
+ # ============================================================
142
+
143
+ def git_pull_project(project_name, branch=None):
144
+ """Git pull a single project on its configured branch.
145
+
146
+ Returns: list of changed files ([] = no change), or None on failure.
147
+ """
148
+ project_dir = os.path.join(CODE_DIR, project_name)
149
+ if not os.path.isdir(os.path.join(project_dir, '.git')):
150
+ print(f' {project_name}: Not a git repo, skipping')
151
+ return []
152
+
153
+ def git(*args):
154
+ return subprocess.run(['git'] + list(args), cwd=project_dir,
155
+ capture_output=True, text=True, encoding='utf-8',
156
+ errors='replace')
157
+
158
+ # Verify we are on the configured branch (config.yaml git_sync.projects)
159
+ result = git('rev-parse', '--abbrev-ref', 'HEAD')
160
+ current_branch = result.stdout.strip() if result.returncode == 0 else ''
161
+ if branch and current_branch != branch:
162
+ fail(f'{project_name}: on branch "{current_branch}", expected "{branch}". '
163
+ f'Checkout the right branch manually, then re-run.')
164
+ return None
165
+
166
+ # Get current commit before pull
167
+ result = git('rev-parse', 'HEAD')
168
+ if result.returncode != 0:
169
+ fail(f'{project_name}: rev-parse failed - {result.stderr.strip()[:120]}')
170
+ return None
171
+ old_commit = result.stdout.strip()[:8]
172
+
173
+ print(f' {project_name}: Fetching...')
174
+ result = git('fetch', 'origin')
175
+ if result.returncode != 0:
176
+ fail(f'{project_name}: fetch failed - {result.stderr.strip()[:120]}')
177
+ return None
178
+
179
+ print(f' {project_name}: Pulling...')
180
+ pull_args = ['pull', 'origin'] + ([branch] if branch else [])
181
+ result = git(*pull_args)
182
+ if result.returncode != 0:
183
+ fail(f'{project_name}: pull failed - {result.stderr.strip()[:120]}')
184
+ return None
185
+
186
+ result = git('rev-parse', 'HEAD')
187
+ new_commit = result.stdout.strip()[:8] if result.returncode == 0 else ''
188
+
189
+ if old_commit == new_commit:
190
+ print(f' {project_name}: Already up to date ({new_commit})')
191
+ return []
192
+
193
+ # Get changed files
194
+ result = git('diff', '--name-only', old_commit, new_commit)
195
+ if result.returncode != 0:
196
+ fail(f'{project_name}: diff failed - run git_sync.py --full to reindex')
197
+ return None
198
+ changed = [f.strip() for f in result.stdout.strip().split('\n') if f.strip()]
199
+ print(f' {project_name}: {old_commit} -> {new_commit}, {len(changed)} files changed')
200
+ return changed
201
+
202
+
203
+ def git_sync_all(project_filter=None, config=None):
204
+ """Sync all projects"""
205
+ print('\n=== Git Sync ===')
206
+ print(f'Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n')
207
+
208
+ if not os.path.exists(CODE_DIR):
209
+ fail('data/code/ not found')
210
+ return {}
211
+
212
+ branch_cfg = {}
213
+ if config:
214
+ for name, proj in (config.get('git_sync', {}).get('projects', {}) or {}).items():
215
+ if isinstance(proj, dict) and proj.get('branch'):
216
+ branch_cfg[name] = proj['branch']
217
+
218
+ changed_map = {}
219
+
220
+ for project_name in sorted(os.listdir(CODE_DIR)):
221
+ if not os.path.isdir(os.path.join(CODE_DIR, project_name)):
222
+ continue
223
+ if project_filter and project_name != project_filter:
224
+ continue
225
+
226
+ changed = git_pull_project(project_name, branch_cfg.get(project_name))
227
+ if changed:
228
+ changed_map[project_name] = changed
229
+
230
+ if not changed_map:
231
+ print('\nNo projects had changes.')
232
+ else:
233
+ total = sum(len(fs) for fs in changed_map.values())
234
+ print(f'\nChanged: {total} files in {len(changed_map)} projects')
235
+
236
+ return changed_map
237
+
238
+
239
+ # ============================================================
240
+ # Part 2: PRD Collection
241
+ # ============================================================
242
+
243
+ def collect_prds():
244
+ """Collect PRDs from all user workspaces.
245
+
246
+ Uses content hash (not mtime) to decide updates - mtime is not preserved
247
+ by git, so a fresh clone would otherwise re-collect everything.
248
+ Same filename from two different users is a collision and is skipped
249
+ with a warning instead of silently overwriting.
250
+ """
251
+ print('\n=== Collecting PRDs ===\n')
252
+
253
+ if not os.path.isdir(WORKSPACE):
254
+ print('Workspace not found')
255
+ return 0
256
+
257
+ os.makedirs(PRD_DIR, exist_ok=True)
258
+ track_file = os.path.join(INDEX_DIR, '.prd-collected.json')
259
+ collected = load_json(track_file)
260
+
261
+ total_new = 0
262
+ total_updated = 0
263
+
264
+ for user_name in sorted(os.listdir(WORKSPACE)):
265
+ user_dir = os.path.join(WORKSPACE, user_name)
266
+ if not os.path.isdir(user_dir):
267
+ continue
268
+
269
+ drafts_dir = os.path.join(user_dir, 'drafts')
270
+ if not os.path.isdir(drafts_dir):
271
+ continue
272
+
273
+ for f in sorted(os.listdir(drafts_dir)):
274
+ if not ((f.startswith('REQ-') or f.startswith('PRD-') or f.startswith('prd-')) and f.endswith('.md')):
275
+ continue
276
+ filepath = os.path.join(drafts_dir, f)
277
+ digest = file_md5(filepath)
278
+
279
+ prev = collected.get(f)
280
+ if prev and prev.get('user') and prev['user'] != user_name:
281
+ fail(f'PRD filename collision: {f} exists from "{prev["user"]}" '
282
+ f'and "{user_name}". Rename one of them (REQ numbers must be unique).')
283
+ continue
284
+
285
+ is_new = prev is None
286
+ is_updated = (not is_new) and prev.get('md5') != digest
287
+ if not (is_new or is_updated):
288
+ continue
289
+
290
+ shutil.copy2(filepath, os.path.join(PRD_DIR, f))
291
+ collected[f] = {
292
+ 'user': user_name,
293
+ 'md5': digest,
294
+ 'collected_at': datetime.now().strftime('%Y-%m-%d %H:%M')
295
+ }
296
+ print(f' {"NEW" if is_new else "UPDATED"}: {f} (by {user_name})')
297
+ if is_new:
298
+ total_new += 1
299
+ else:
300
+ total_updated += 1
301
+
302
+ save_json(track_file, collected)
303
+
304
+ print(f'\nCollected: {total_new} new, {total_updated} updated')
305
+ print(f'Total in prd/: {len(os.listdir(PRD_DIR)) if os.path.isdir(PRD_DIR) else 0}')
306
+ return total_new + total_updated
307
+
308
+
309
+ # ============================================================
310
+ # Part 3: PRD Parsing
311
+ # ============================================================
312
+
313
+ def extract_chinese_terms(text):
314
+ """Extract meaningful business terms (stop words come from common.terms)"""
315
+ raw = re.findall(r'[一-鿿]{2,4}', text)
316
+ return list(set([t for t in raw if t not in PRD_STOP_WORDS and len(t) >= 2]))[:30]
317
+
318
+
319
+ def parse_prd_file(filepath):
320
+ """Parse a single PRD file"""
321
+ with open(filepath, 'r', encoding='utf-8') as f:
322
+ text = f.read()
323
+
324
+ filename = os.path.basename(filepath)
325
+
326
+ # Extract title
327
+ title_match = re.search(r'^#\s+(.+)', text, re.MULTILINE)
328
+ title = title_match.group(1).strip() if title_match else filename.replace('.md', '')
329
+
330
+ # Extract sections
331
+ sections = re.findall(r'^#{1,3}\s+(.+)', text, re.MULTILINE)
332
+
333
+ # Extract features (numbered items)
334
+ features = re.findall(r'(?:^|\n)\s*\d+\.\s+\**(.+?)\**', text)
335
+ features = [f.strip()[:100] for f in features if len(f.strip()) > 3][:20]
336
+
337
+ # Extract business rules
338
+ rules = []
339
+ rule_patterns = [
340
+ r'规则[::]\s*(.+)',
341
+ r'(?:必须|不能|不允许|需要|应当|默认).{5,}',
342
+ r'当.{2,20}时[,,].{5,}',
343
+ ]
344
+ for p in rule_patterns:
345
+ rules.extend(re.findall(p, text))
346
+ rules = [r.strip()[:200] for r in rules][:15]
347
+
348
+ # Extract keywords (CN -> EN mapping from common.terms)
349
+ cn_terms = extract_chinese_terms(text)
350
+
351
+ keywords = set()
352
+ for term in cn_terms:
353
+ keywords.add(term)
354
+ if term in CN_TO_EN:
355
+ keywords.add(CN_TO_EN[term])
356
+
357
+ return {
358
+ 'file': filename,
359
+ 'title': title,
360
+ 'sections': sections[:20],
361
+ 'features': features,
362
+ 'rules': rules,
363
+ 'keywords': list(keywords)[:50],
364
+ 'cn_terms': cn_terms[:30],
365
+ 'mtime': os.path.getmtime(filepath),
366
+ }
367
+
368
+
369
+ def build_prd_index(keyword_index=None):
370
+ """Build PRD index from all PRD files.
371
+
372
+ Args:
373
+ keyword_index: 可选预加载的 keyword-index (性能优化 A3: 避免重复加载 4.5MB JSON)。
374
+ None 则自行加载。
375
+ """
376
+ print('\n=== Building PRD Index ===\n')
377
+
378
+ if not os.path.isdir(PRD_DIR):
379
+ print('PRD directory not found')
380
+ return {}
381
+
382
+ # Load keyword index for code matching (复用传入的, 避免重复加载)
383
+ if keyword_index is None:
384
+ ki_path = os.path.join(INDEX_DIR, 'keyword-index.json')
385
+ keyword_index = load_json(ki_path)
386
+
387
+ prd_index = {}
388
+
389
+ for f in sorted(os.listdir(PRD_DIR)):
390
+ if not f.endswith('.md'):
391
+ continue
392
+
393
+ filepath = os.path.join(PRD_DIR, f)
394
+ try:
395
+ prd = parse_prd_file(filepath)
396
+
397
+ # Find related code
398
+ related = {}
399
+ for kw in prd['keywords']:
400
+ if kw in keyword_index:
401
+ for cf in keyword_index[kw][:3]:
402
+ proj = cf.split('/')[0] if '/' in cf else cf.split('\\')[0]
403
+ related.setdefault(proj, set()).add(cf)
404
+ prd['related_code'] = {p: list(fs) for p, fs in related.items()}
405
+
406
+ prd_index[f] = prd
407
+
408
+ print(f' {f}')
409
+ print(f' Title: {prd["title"][:50]}')
410
+ print(f' Features: {len(prd["features"])}, Rules: {len(prd["rules"])}')
411
+ if prd['related_code']:
412
+ for proj, files in prd['related_code'].items():
413
+ print(f' -> {proj}: {len(files)} files')
414
+
415
+ except Exception as e:
416
+ print(f' {f}: Error - {str(e)[:80]}')
417
+
418
+ # Save index
419
+ save_json(os.path.join(INDEX_DIR, 'prd-index.json'), prd_index)
420
+
421
+ print(f'\nPRD Index: {len(prd_index)} PRDs indexed')
422
+ return prd_index
423
+
424
+
425
+ # ============================================================
426
+ # Part 4: Incremental Code Index Update
427
+ # ============================================================
428
+
429
+ def parse_java_file(filepath):
430
+ """Extract entities from a Java file"""
431
+ entities = []
432
+ try:
433
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
434
+ content = f.read()
435
+
436
+ class_match = re.search(r'(?:public\s+)?(?:abstract\s+)?(?:class|interface|enum)\s+(\w+)', content)
437
+ class_name = class_match.group(1) if class_match else None
438
+
439
+ apis = re.findall(
440
+ r'@(?:GetMapping|PostMapping|PutMapping|DeleteMapping|RequestMapping)\s*\(?["\']([^"\']+)["\']',
441
+ content
442
+ )
443
+
444
+ methods = re.findall(r'(?:public|private|protected)\s+\w+\s+(\w+)\s*\(', content)
445
+
446
+ rel = os.path.relpath(filepath, CODE_DIR).replace(os.sep, '/')
447
+
448
+ entity = {
449
+ 'file': rel,
450
+ 'class': class_name,
451
+ 'apis': apis,
452
+ 'methods': methods[:20],
453
+ 'keywords': re.findall('[A-Z]?[a-z]+', class_name or '') if class_name else []
454
+ }
455
+ entities.append(entity)
456
+ except:
457
+ pass
458
+ return entities
459
+
460
+
461
+ def parse_frontend_file(filepath):
462
+ """Extract entities from frontend file"""
463
+ entities = []
464
+ try:
465
+ with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
466
+ content = f.read()
467
+
468
+ rel = os.path.relpath(filepath, CODE_DIR).replace(os.sep, '/')
469
+
470
+ name_match = re.search(r'name:\s*["\'](\w+)["\']', content)
471
+ component_name = name_match.group(1) if name_match else os.path.basename(filepath).split('.')[0]
472
+
473
+ api_calls = re.findall(r'(?:axios|request|fetch|http)\.\w+\s*\(?["\']([^"\']+)["\']', content)
474
+ routes = re.findall(r'path:\s*["\']([^"\']+)["\']', content)
475
+
476
+ entity = {
477
+ 'file': rel,
478
+ 'component': component_name,
479
+ 'api_calls': api_calls,
480
+ 'routes': routes,
481
+ 'type': 'vue' if filepath.endswith('.vue') else 'js/ts'
482
+ }
483
+ entities.append(entity)
484
+ except:
485
+ pass
486
+ return entities
487
+
488
+
489
+ def build_file_keys_map(keyword_index):
490
+ """构建反向索引 {file: set(keywords)} (性能优化 A5)。
491
+
492
+ 用于 remove_file_from_indexes 的 O(1) 查找, 替代 O(all_keys) 全扫描。
493
+ 每次全量/增量构建后调用一次, 持久化到 .file-keys.json。
494
+ """
495
+ fkm = {}
496
+ for kw, files in keyword_index.items():
497
+ for f in files:
498
+ fkm.setdefault(f, set()).add(kw)
499
+ return fkm
500
+
501
+
502
+ def remove_file_from_indexes(filepath, keyword_index, api_index, file_keys_map=None):
503
+ """Remove a file's entries from keyword/api indexes.
504
+
505
+ module-map.json stores per-project COUNTS (not file lists) and is
506
+ recomputed by rebuild_module_summary() after each update.
507
+
508
+ Args:
509
+ file_keys_map: 可选反向索引 {file: set(keywords)} (A5)。
510
+ 有则 O(1) 查找涉及的 keys; None 则回退到 O(all_keys) 全扫描。
511
+ """
512
+ rel = filepath.replace(os.sep, '/')
513
+
514
+ # 性能优化 A5: 优先用反向索引 O(1) 查找, 避免遍历全部 keys
515
+ if file_keys_map is not None:
516
+ involved_keys = file_keys_map.pop(rel, set())
517
+ to_remove = []
518
+ for kw in involved_keys:
519
+ files = keyword_index.get(kw)
520
+ if files and rel in files:
521
+ files.remove(rel)
522
+ if not files:
523
+ to_remove.append(kw)
524
+ for kw in to_remove:
525
+ del keyword_index[kw]
526
+ else:
527
+ # 回退路径: 全扫描 (旧逻辑, 兼容)
528
+ to_remove = []
529
+ for kw, files in keyword_index.items():
530
+ if rel in files:
531
+ files.remove(rel)
532
+ if not files:
533
+ to_remove.append(kw)
534
+ for kw in to_remove:
535
+ del keyword_index[kw]
536
+
537
+ # Remove from api_index
538
+ to_remove_api = [api for api, f in api_index.items() if f == rel]
539
+ for api in to_remove_api:
540
+ del api_index[api]
541
+
542
+
543
+ INDEXED_EXTS_JAVA = ('.java',)
544
+ INDEXED_EXTS_FRONTEND = ('.vue', '.js', '.ts', '.jsx', '.tsx')
545
+ INDEXED_EXTS_CONFIG = ('.xml', '.yml', '.yaml', '.properties')
546
+ SKIP_DIRS = ['node_modules', 'target', 'build', 'dist', '__pycache__']
547
+
548
+
549
+ def rebuild_module_summary(api_index, file_stats=None):
550
+ """Recompute module-map.json counts.
551
+
552
+ Schema (counts only, matching what's committed):
553
+ {project: {files: int, classes: int, apis: int, components: int}}
554
+
555
+ Args:
556
+ api_index: endpoint→file map.
557
+ file_stats: 可选的预统计 {project: {files, classes, components}},
558
+ 避免二次全盘 walk (性能优化 A3)。None 则回退到 walk。
559
+ """
560
+ if file_stats:
561
+ # 快速路径: 复用 build_full_indexes 已有的统计, 不再 walk
562
+ module_map = {}
563
+ for project_name, st in file_stats.items():
564
+ prefix = project_name + '/'
565
+ module_map[project_name] = {
566
+ 'files': st.get('files', 0),
567
+ 'classes': st.get('classes', 0),
568
+ 'components': st.get('components', 0),
569
+ 'apis': sum(1 for f in api_index.values() if f.startswith(prefix)),
570
+ }
571
+ return module_map
572
+
573
+ # 回退路径: walk 统计 (旧逻辑, 兼容)
574
+ module_map = {}
575
+ if not os.path.isdir(CODE_DIR):
576
+ return module_map
577
+ for project_name in sorted(os.listdir(CODE_DIR)):
578
+ project_dir = os.path.join(CODE_DIR, project_name)
579
+ if not os.path.isdir(project_dir):
580
+ continue
581
+ total = java = fe = 0
582
+ for root, dirs, files in os.walk(project_dir):
583
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in SKIP_DIRS]
584
+ for f in files:
585
+ ext = os.path.splitext(f)[1]
586
+ if ext in INDEXED_EXTS_JAVA:
587
+ java += 1
588
+ total += 1
589
+ elif ext in INDEXED_EXTS_FRONTEND:
590
+ fe += 1
591
+ total += 1
592
+ elif ext in INDEXED_EXTS_CONFIG:
593
+ total += 1
594
+ prefix = project_name + '/'
595
+ module_map[project_name] = {
596
+ 'files': total,
597
+ 'classes': java,
598
+ 'components': fe,
599
+ 'apis': sum(1 for f in api_index.values() if f.startswith(prefix)),
600
+ }
601
+ return module_map
602
+
603
+
604
+ def normalize_keyword_index(keyword_index):
605
+ """Dedupe and sort file lists so output is deterministic (diff-friendly)."""
606
+ for kw in keyword_index:
607
+ keyword_index[kw] = sorted(set(keyword_index[kw]))
608
+
609
+
610
+ def index_one_file(filepath, rel, ext, keyword_index, api_index):
611
+ """Parse a single source file into the keyword/api indexes.
612
+
613
+ Shared by incremental and full build so both produce identical entries.
614
+ Returns True if the file was indexed.
615
+ """
616
+ if ext in INDEXED_EXTS_JAVA:
617
+ for e in parse_java_file(filepath):
618
+ for api in e.get('apis', []):
619
+ api_index[api] = rel
620
+ for kw in e.get('keywords', []):
621
+ kl = kw.lower()
622
+ if len(kl) >= 2:
623
+ keyword_index.setdefault(kl, [])
624
+ if rel not in keyword_index[kl]:
625
+ keyword_index[kl].append(rel)
626
+ return True
627
+ if ext in INDEXED_EXTS_FRONTEND:
628
+ for e in parse_frontend_file(filepath):
629
+ for api in e.get('api_calls', []):
630
+ key = 'api:' + api
631
+ keyword_index.setdefault(key, [])
632
+ if rel not in keyword_index[key]:
633
+ keyword_index[key].append(rel)
634
+ comp = e.get('component', '')
635
+ if comp:
636
+ cl = comp.lower()
637
+ keyword_index.setdefault(cl, [])
638
+ if rel not in keyword_index[cl]:
639
+ keyword_index[cl].append(rel)
640
+ return True
641
+ return False
642
+
643
+
644
+ def update_indexes_incremental(changed_projects):
645
+ """Update indexes incrementally for changed projects only"""
646
+ print('\n=== Incremental Index Update ===\n')
647
+
648
+ os.makedirs(INDEX_DIR, exist_ok=True)
649
+
650
+ # required=True: refusing to "rebuild" on top of a corrupt/empty base
651
+ keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'), required=True)
652
+ api_index = load_json(os.path.join(INDEX_DIR, 'api-index.json'), required=True)
653
+
654
+ # 性能优化 A5: 加载反向索引 (若存在) 加速 remove_file_from_indexes
655
+ fkm_path = os.path.join(INDEX_DIR, '.file-keys.json')
656
+ file_keys_map = load_json(fkm_path) if os.path.isfile(fkm_path) else None
657
+ if file_keys_map:
658
+ # 转 set 形式 (内存里操作用 set, 持久化时转 list)
659
+ file_keys_map = {f: set(ks) for f, ks in file_keys_map.items()}
660
+
661
+ total_files = 0
662
+
663
+ for project_name, changed_files in changed_projects.items():
664
+ project_dir = os.path.join(CODE_DIR, project_name)
665
+ if not os.path.isdir(project_dir):
666
+ continue
667
+
668
+ print(f' [{project_name}] Updating {len(changed_files)} files...')
669
+
670
+ for changed_file in changed_files:
671
+ filepath = os.path.join(project_dir, changed_file)
672
+ rel = (project_name + '/' + changed_file).replace(os.sep, '/')
673
+
674
+ # Remove old entries (用反向索引 O(1), 若不可用回退全扫描)
675
+ remove_file_from_indexes(rel, keyword_index, api_index, file_keys_map=file_keys_map)
676
+
677
+ # Skip if file deleted
678
+ if not os.path.isfile(filepath):
679
+ continue
680
+
681
+ ext = os.path.splitext(changed_file)[1]
682
+ if index_one_file(filepath, rel, ext, keyword_index, api_index):
683
+ total_files += 1
684
+ # 同步更新反向索引
685
+ if file_keys_map is not None:
686
+ file_keys_map.setdefault(rel, set())
687
+
688
+ normalize_keyword_index(keyword_index)
689
+
690
+ # 重建反向索引 (增量后 keys 关系变了, 重建最可靠)
691
+ fkm_new = build_file_keys_map(keyword_index)
692
+ module_map = rebuild_module_summary(api_index)
693
+
694
+ save_json(os.path.join(INDEX_DIR, 'module-map.json'), module_map)
695
+ save_json(os.path.join(INDEX_DIR, 'keyword-index.json'), keyword_index)
696
+ save_json(os.path.join(INDEX_DIR, 'api-index.json'), api_index)
697
+ save_json(os.path.join(INDEX_DIR, '.file-keys.json'),
698
+ {f: sorted(ks) for f, ks in fkm_new.items()})
699
+
700
+ print(f'\n Updated: {total_files} files indexed')
701
+ print(f' Keywords: {len(keyword_index)}')
702
+ print(f' APIs: {len(api_index)}')
703
+
704
+ return total_files
705
+
706
+
707
+ def _index_project(project_name, project_dir, skip_dirs):
708
+ """索引单个项目的所有文件 (并行 worker 函数, A4)。
709
+
710
+ 在子进程里运行: walk + parse, 返回该项目的 (keyword_dict, api_dict, stats)。
711
+ 保持与串行版完全一致的解析逻辑 (复用 index_one_file 的核心)。
712
+ """
713
+ local_ki = {}
714
+ local_api = {}
715
+ proj_count = proj_java = proj_fe = 0
716
+
717
+ for root, dirs, files in os.walk(project_dir):
718
+ dirs[:] = [d for d in dirs if not d.startswith('.') and d not in skip_dirs]
719
+ for f in sorted(files):
720
+ ext = os.path.splitext(f)[1]
721
+ filepath = os.path.join(root, f)
722
+ rel = os.path.relpath(filepath, os.path.dirname(project_dir)).replace(os.sep, '/')
723
+ if ext in INDEXED_EXTS_JAVA:
724
+ for e in parse_java_file(filepath):
725
+ for api in e.get('apis', []):
726
+ local_api[api] = rel
727
+ for kw in e.get('keywords', []):
728
+ kl = kw.lower()
729
+ if len(kl) >= 2:
730
+ local_ki.setdefault(kl, [])
731
+ if rel not in local_ki[kl]:
732
+ local_ki[kl].append(rel)
733
+ proj_count += 1
734
+ proj_java += 1
735
+ elif ext in INDEXED_EXTS_FRONTEND:
736
+ for e in parse_frontend_file(filepath):
737
+ for api in e.get('api_calls', []):
738
+ key = 'api:' + api
739
+ local_ki.setdefault(key, [])
740
+ if rel not in local_ki[key]:
741
+ local_ki[key].append(rel)
742
+ comp = e.get('component', '')
743
+ if comp:
744
+ cl = comp.lower()
745
+ local_ki.setdefault(cl, [])
746
+ if rel not in local_ki[cl]:
747
+ local_ki[cl].append(rel)
748
+ proj_count += 1
749
+ proj_fe += 1
750
+ elif ext in INDEXED_EXTS_CONFIG:
751
+ proj_count += 1
752
+
753
+ stats = {'files': proj_count, 'classes': proj_java, 'components': proj_fe}
754
+ return project_name, local_ki, local_api, stats
755
+
756
+
757
+ def build_full_indexes():
758
+ """Build full indexes from scratch"""
759
+ print('\n=== Full Index Build ===\n')
760
+
761
+ if not os.path.isdir(CODE_DIR):
762
+ fail('data/code/ not found - cannot build indexes')
763
+ return 0
764
+
765
+ os.makedirs(INDEX_DIR, exist_ok=True)
766
+
767
+ # 收集要处理的项目
768
+ projects = []
769
+ for project_name in sorted(os.listdir(CODE_DIR)):
770
+ project_dir = os.path.join(CODE_DIR, project_name)
771
+ if os.path.isdir(project_dir):
772
+ projects.append((project_name, project_dir))
773
+
774
+ keyword_index = {}
775
+ api_index = {}
776
+ total_files = 0
777
+ file_stats = {}
778
+
779
+ # 性能优化 A4: 按 project 并行索引 (项目间独立, 适合 ProcessPool)
780
+ # 项目数少 (3个), 序列化开销可接受; 单项目内仍串行 (避免 12k 文件的小任务开销)
781
+ use_parallel = len(projects) >= 2
782
+ if use_parallel:
783
+ try:
784
+ from concurrent.futures import ProcessPoolExecutor
785
+ import multiprocessing as mp
786
+ workers = min(len(projects), mp.cpu_count())
787
+ print(f' Parallel indexing with {workers} workers...')
788
+ with ProcessPoolExecutor(max_workers=workers) as pool:
789
+ futures = []
790
+ for pname, pdir in projects:
791
+ futures.append(pool.submit(_index_project, pname, pdir, SKIP_DIRS))
792
+ for fut in futures:
793
+ pname, local_ki, local_api, stats = fut.result()
794
+ # 合并到全局 index
795
+ for kw, files in local_ki.items():
796
+ keyword_index.setdefault(kw, []).extend(files)
797
+ api_index.update(local_api)
798
+ file_stats[pname] = stats
799
+ total_files += stats['files']
800
+ print(f' [{pname}] Files: {stats["files"]}')
801
+ except Exception as e:
802
+ # 并行失败回退串行 (鲁棒性)
803
+ print(f' Parallel failed ({e}), falling back to serial...')
804
+ use_parallel = False
805
+
806
+ if not use_parallel:
807
+ # 串行回退 (或单项目)
808
+ for project_name, project_dir in projects:
809
+ print(f' [{project_name}] Scanning...')
810
+ pname, local_ki, local_api, stats = _index_project(project_name, project_dir, SKIP_DIRS)
811
+ for kw, files in local_ki.items():
812
+ keyword_index.setdefault(kw, []).extend(files)
813
+ api_index.update(local_api)
814
+ file_stats[pname] = stats
815
+ total_files += stats['files']
816
+ print(f' Files: {stats["files"]}')
817
+
818
+ normalize_keyword_index(keyword_index)
819
+ # 复用 file_stats, 不再二次 walk (A3)
820
+ module_map = rebuild_module_summary(api_index, file_stats=file_stats)
821
+
822
+ save_json(os.path.join(INDEX_DIR, 'module-map.json'), module_map)
823
+ save_json(os.path.join(INDEX_DIR, 'keyword-index.json'), keyword_index)
824
+ save_json(os.path.join(INDEX_DIR, 'api-index.json'), api_index)
825
+
826
+ # 性能优化 A5: 持久化反向索引 {file: [keywords]} 供增量更新 O(1) 查找
827
+ fkm = build_file_keys_map(keyword_index)
828
+ save_json(os.path.join(INDEX_DIR, '.file-keys.json'),
829
+ {f: sorted(ks) for f, ks in fkm.items()})
830
+
831
+ print(f'\n Total: {total_files} files, {len(keyword_index)} keywords, {len(api_index)} APIs')
832
+ return total_files
833
+
834
+
835
+ # ============================================================
836
+ # Part 5: PRD ↔ Code Mapping
837
+ # ============================================================
838
+ # Business term -> code path mapping comes from common.terms (BUSINESS_PATH_MAP)
839
+ # so that index building and searching share the same semantics.
840
+
841
+ # Below this length, only exact matches count (substring matching on short
842
+ # keywords like "in"/"sa" pollutes the mapping)
843
+ MIN_PATTERN_FUZZY_LEN = 4
844
+
845
+
846
+ def build_prd_code_mapping(keyword_index=None):
847
+ """Build bidirectional PRD to Code mapping with business term awareness.
848
+
849
+ Args:
850
+ keyword_index: 可选预加载 (A3: 避免重复加载)。
851
+ """
852
+ print('\n=== Building PRD to Code Mapping ===\n')
853
+
854
+ prd_index = load_json(os.path.join(INDEX_DIR, 'prd-index.json'))
855
+ if keyword_index is None:
856
+ keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
857
+
858
+ mapping = {'prd_to_code': {}, 'code_to_prd': {}}
859
+
860
+ for prd_file, prd in prd_index.items():
861
+ related_files = set()
862
+ title = prd.get('title', '')
863
+ cn_terms = prd.get('cn_terms', [])
864
+ keywords = prd.get('keywords', [])
865
+
866
+ # Strategy 1: Business term to path pattern matching
867
+ all_text = title + ' ' + ' '.join(cn_terms) + ' ' + ' '.join(keywords)
868
+ for cn_term, en_patterns in BUSINESS_PATH_MAP.items():
869
+ if cn_term not in all_text:
870
+ continue
871
+ for pattern in en_patterns:
872
+ pat_lower = pattern.strip('/-').lower()
873
+ if not pat_lower:
874
+ continue
875
+ for kw, files in keyword_index.items():
876
+ kw_lower = kw.lower()
877
+ if kw_lower == pat_lower or (
878
+ len(kw_lower) >= MIN_PATTERN_FUZZY_LEN and kw_lower in pat_lower
879
+ ) or (
880
+ len(pat_lower) >= MIN_PATTERN_FUZZY_LEN and pat_lower in kw_lower
881
+ ):
882
+ related_files.update(files[:5])
883
+
884
+ # Strategy 2: Direct keyword matching
885
+ for kw in keywords[:20]:
886
+ kw_lower = kw.lower()
887
+ if kw_lower in keyword_index:
888
+ related_files.update(keyword_index[kw_lower][:3])
889
+
890
+ # Strategy 3: CN to EN translation (shared map)
891
+ for term in cn_terms:
892
+ if term in CN_TO_EN:
893
+ en = CN_TO_EN[term].lower()
894
+ if en in keyword_index:
895
+ related_files.update(keyword_index[en][:5])
896
+
897
+ # Deduplicate, sort for determinism, and limit
898
+ related_files = sorted(related_files)[:30]
899
+
900
+ mapping['prd_to_code'][prd_file] = {
901
+ 'title': title,
902
+ 'files': related_files,
903
+ 'features': prd.get('features', [])[:10]
904
+ }
905
+
906
+ # Reverse mapping
907
+ for f in related_files:
908
+ mapping['code_to_prd'].setdefault(f, [])
909
+ if prd_file not in mapping['code_to_prd'][f]:
910
+ mapping['code_to_prd'][f].append(prd_file)
911
+
912
+ save_json(os.path.join(INDEX_DIR, 'prd-code-map.json'), mapping)
913
+
914
+ print(f' PRD -> Code: {len(mapping["prd_to_code"])} PRDs mapped')
915
+ print(f' Code -> PRD: {len(mapping["code_to_prd"])} files with PRD links')
916
+
917
+ for prd_file, info in list(mapping['prd_to_code'].items())[:3]:
918
+ print(f'\n {prd_file}:')
919
+ print(f' {info["title"][:50]}')
920
+ print(f' -> {len(info["files"])} related files')
921
+
922
+ return mapping
923
+
924
+
925
+ # ============================================================
926
+ # Part 6: Index Verification (准确性校验)
927
+ # ============================================================
928
+
929
+ # 条目数比上次下降超过这个比例视为异常 (防止静默清空/构建残废)
930
+ MAX_SHRINK_RATIO = 0.3
931
+ # 抽样检查的文件数下限命中率
932
+ MIN_SAMPLE_HIT = 0.8
933
+
934
+
935
+ def verify_indexes(config):
936
+ """Post-build sanity checks. Returns dict written into .index-meta.json.
937
+
938
+ 1. 每个配置的项目都在 module-map 里且 files > 0
939
+ 2. keyword/api 条目数与上次相比未暴跌 (>30% 下降 = 异常)
940
+ 3. 抽样 20 个索引条目, 验证文件真实存在于磁盘
941
+ """
942
+ print('\n=== Verifying Indexes ===\n')
943
+ result = {'checked_at': datetime.now().strftime('%Y-%m-%d %H:%M'), 'checks': {}}
944
+
945
+ module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
946
+ keyword_index = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
947
+ api_index = load_json(os.path.join(INDEX_DIR, 'api-index.json'))
948
+
949
+ # Check 1: 配置的项目全部被索引
950
+ configured = list(((config.get('git_sync', {}) or {}).get('projects', {}) or {}).keys())
951
+ for proj in configured:
952
+ info = module_map.get(proj)
953
+ files = info.get('files', 0) if isinstance(info, dict) else 0
954
+ if not files:
955
+ fail(f'verify: 项目 {proj} 不在索引中或 files=0')
956
+ else:
957
+ print(f' [OK] {proj}: {files} files indexed')
958
+ result['checks']['projects'] = {p: (module_map.get(p) or {}).get('files', 0) for p in configured}
959
+
960
+ # Check 2: 条目数突变检测 (与上次 meta 对比)
961
+ prev_meta = load_json(os.path.join(INDEX_DIR, '.index-meta.json'))
962
+ prev_counts = prev_meta.get('counts', {})
963
+ counts = {'keywords': len(keyword_index), 'apis': len(api_index)}
964
+ for name, now in counts.items():
965
+ prev = prev_counts.get(name, 0)
966
+ if prev > 50 and now < prev * (1 - MAX_SHRINK_RATIO):
967
+ fail(f'verify: {name} 条目数从 {prev} 暴跌到 {now} (>30%), 索引可能损坏')
968
+ else:
969
+ print(f' [OK] {name}: {now} (prev {prev})')
970
+ result['checks']['counts'] = counts
971
+
972
+ # Check 3: 抽样验证索引指向的文件真实存在
973
+ sample, step = [], max(1, len(keyword_index) // 20)
974
+ for i, (kw, files) in enumerate(sorted(keyword_index.items())):
975
+ if i % step == 0 and files:
976
+ sample.append(files[0])
977
+ if len(sample) >= 20:
978
+ break
979
+ if sample:
980
+ hits = sum(1 for f in sample if os.path.isfile(os.path.join(CODE_DIR, f)))
981
+ ratio = hits / len(sample)
982
+ if ratio < MIN_SAMPLE_HIT:
983
+ fail(f'verify: 抽样 {len(sample)} 条仅 {hits} 条文件存在 ({ratio:.0%}), 索引与磁盘脱节')
984
+ else:
985
+ print(f' [OK] 抽样 {len(sample)} 条, {hits} 条文件存在 ({ratio:.0%})')
986
+ result['checks']['sample_hit_ratio'] = round(ratio, 2)
987
+
988
+ return result
989
+
990
+
991
+ def main():
992
+ config = load_config()
993
+
994
+ sync_only = '--sync-only' in sys.argv
995
+ index_only = '--index-only' in sys.argv
996
+ prd_only = '--prd-only' in sys.argv
997
+ full_build = '--full' in sys.argv
998
+
999
+ project_filter = None
1000
+ for i, arg in enumerate(sys.argv[1:], 1):
1001
+ if arg == '--project' and i + 1 < len(sys.argv):
1002
+ project_filter = sys.argv[i + 1]
1003
+
1004
+ print('=' * 50)
1005
+ print('QODER Knowledge Graph Update')
1006
+ print(f'Time: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
1007
+ print('=' * 50)
1008
+
1009
+ if not acquire_lock():
1010
+ sys.exit(2)
1011
+
1012
+ try:
1013
+ changed_projects = {}
1014
+
1015
+ # 性能优化 A3: 共享 keyword_index 加载 (避免 build_prd_index 和
1016
+ # build_prd_code_mapping 各自重复加载 4.5MB JSON)
1017
+ shared_ki = None # 懒加载: 首次需要时加载, 后续复用
1018
+
1019
+ # Step 1: Git sync
1020
+ if not (index_only or prd_only):
1021
+ changed_projects = git_sync_all(project_filter, config)
1022
+
1023
+ # Step 2: Collect PRDs
1024
+ if not (sync_only or index_only):
1025
+ collect_prds()
1026
+
1027
+ # Step 3: Parse PRDs and build PRD index (共享 keyword_index)
1028
+ if not sync_only:
1029
+ if shared_ki is None and os.path.isfile(os.path.join(INDEX_DIR, 'keyword-index.json')):
1030
+ shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
1031
+ build_prd_index(keyword_index=shared_ki)
1032
+
1033
+ # Step 4: Update code indexes
1034
+ if not (sync_only or prd_only):
1035
+ if full_build or index_only:
1036
+ build_full_indexes()
1037
+ # full build 重写了 keyword-index, 失效旧缓存
1038
+ shared_ki = None
1039
+ elif changed_projects:
1040
+ update_indexes_incremental(changed_projects)
1041
+ shared_ki = None # 增量也改了, 失效
1042
+ else:
1043
+ print('\nNo code changes to index.')
1044
+
1045
+ # Step 5: Build PRD ↔ Code mapping (共享 keyword_index, 重新加载若失效)
1046
+ if not sync_only:
1047
+ if shared_ki is None and os.path.isfile(os.path.join(INDEX_DIR, 'keyword-index.json')):
1048
+ shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
1049
+ build_prd_code_mapping(keyword_index=shared_ki)
1050
+
1051
+ # Step 5.5: Verify index accuracy (跨项目泛化的保证)
1052
+ verify_result = None
1053
+ if not (sync_only or prd_only):
1054
+ verify_result = verify_indexes(config)
1055
+
1056
+ # Step 6: Write meta (读 module-map 拿真实 project 数; counts 用已加载的 ki)
1057
+ module_map = load_json(os.path.join(INDEX_DIR, 'module-map.json'))
1058
+ if shared_ki is None:
1059
+ shared_ki = load_json(os.path.join(INDEX_DIR, 'keyword-index.json'))
1060
+ api_idx = load_json(os.path.join(INDEX_DIR, 'api-index.json'))
1061
+ meta = {
1062
+ 'last_sync': datetime.now().strftime('%Y-%m-%d %H:%M'),
1063
+ 'projects': {p: info.get('files', 0) for p, info in module_map.items()},
1064
+ 'counts': {
1065
+ 'keywords': len(shared_ki),
1066
+ 'apis': len(api_idx),
1067
+ },
1068
+ 'verify': verify_result,
1069
+ 'failures': FAILURES,
1070
+ }
1071
+ save_json(os.path.join(INDEX_DIR, '.index-meta.json'), meta)
1072
+ finally:
1073
+ release_lock()
1074
+
1075
+ print('\n' + '=' * 50)
1076
+ if FAILURES:
1077
+ print(f'Update finished with {len(FAILURES)} ERROR(S):')
1078
+ for msg in FAILURES:
1079
+ print(' - ' + msg)
1080
+ print('=' * 50)
1081
+ sys.exit(1)
1082
+ print('Update complete!')
1083
+ print('=' * 50)
1084
+
1085
+
1086
+ if __name__ == '__main__':
1087
+ main()