ai-scaffold-pro 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,877 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ gen_references.py - 项目结构扫描器
4
+
5
+ 扫描项目模块结构,输出 JSON 中间数据供 AI 生成完整参考文档。
6
+ 不依赖任何特定语言或框架的源码解析——语义理解由 AI 完成。
7
+
8
+ 用法:
9
+ python gen_references.py # 全量扫描
10
+ python gen_references.py --module libcore # 单模块扫描
11
+ python gen_references.py --output scan.json # 指定输出文件
12
+ python gen_references.py --refresh # 仅刷新已有文档对应的扫描数据
13
+ python gen_references.py --diff # 增量模式:对比上次扫描,输出变更列表
14
+ python gen_references.py --lightweight # 轻量模式:跳过文件列表/目录树/资源(配合 CodeGraph 使用)
15
+ """
16
+
17
+ import argparse
18
+ import json
19
+ import os
20
+ import re
21
+ import sys
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional
24
+
25
+
26
+ # These will be set in main() based on CLI arguments
27
+ PROJECT_ROOT: Path = Path.cwd()
28
+ REFERENCES_DIR: Path = Path.cwd() / "references"
29
+ SCAN_OUTPUT: Path = Path.cwd() / "references" / "_scan.json"
30
+ MAX_DEPTH: int = 4
31
+
32
+
33
+ def _detect_output_dir(project_root: Path) -> Path:
34
+ """自动检测输出目录:检查 .qoder/.claude/.codex/.opencode 是否存在"""
35
+ for dirname in [".qoder", ".claude", ".codex", ".opencode"]:
36
+ candidate = project_root / dirname
37
+ if candidate.exists():
38
+ return candidate / "references"
39
+ # 默认使用当前目录下的 references/
40
+ return project_root / "references"
41
+
42
+ SOURCE_EXTENSIONS = {
43
+ ".kt", ".java", ".swift", ".m", ".h", ".dart", ".ts", ".tsx",
44
+ ".js", ".jsx", ".py", ".go", ".rs", ".cpp", ".c", ".cs",
45
+ ".vue", ".svelte",
46
+ }
47
+
48
+ RESOURCE_EXTENSIONS = {
49
+ ".xml", ".json", ".yaml", ".yml", ".properties", ".plist",
50
+ ".xib", ".storyboard",
51
+ }
52
+
53
+ ASSET_EXTENSIONS = {
54
+ ".png", ".jpg", ".jpeg", ".svg", ".webp", ".gif", ".ico",
55
+ ".ttf", ".otf", ".woff", ".woff2",
56
+ }
57
+
58
+
59
+ # === 项目检测 ===
60
+
61
+ def detect_project_type() -> dict:
62
+ """检测项目类型和构建系统"""
63
+ indicators = [
64
+ ("settings.gradle", {"platform": "Android", "build": "Gradle"}),
65
+ ("settings.gradle.kts", {"platform": "Android", "build": "Gradle Kotlin DSL"}),
66
+ ("build.gradle", {"platform": "JVM", "build": "Gradle"}),
67
+ ("build.gradle.kts", {"platform": "JVM", "build": "Gradle Kotlin DSL"}),
68
+ ("pom.xml", {"platform": "JVM", "build": "Maven"}),
69
+ ("hvigor-config.json5", {"platform": "HarmonyOS", "build": "Hvigor"}),
70
+ ("Package.swift", {"platform": "iOS", "build": "SPM"}),
71
+ ("Podfile", {"platform": "iOS", "build": "CocoaPods"}),
72
+ ("pubspec.yaml", {"platform": "Flutter", "build": "Dart Pub"}),
73
+ ("package.json", {"platform": "Node", "build": "npm/yarn"}),
74
+ ("Cargo.toml", {"platform": "Rust", "build": "Cargo"}),
75
+ ("go.mod", {"platform": "Go", "build": "Go Modules"}),
76
+ ("pyproject.toml", {"platform": "Python", "build": "Python"}),
77
+ ]
78
+ for filename, info in indicators:
79
+ if (PROJECT_ROOT / filename).exists():
80
+ return info
81
+
82
+ # npm/yarn 进一步区分
83
+ pkg_json = PROJECT_ROOT / "package.json"
84
+ if pkg_json.exists():
85
+ try:
86
+ data = json.loads(pkg_json.read_text(encoding="utf-8"))
87
+ deps = {**data.get("dependencies", {}), **data.get("devDependencies", {})}
88
+ if "react-native" in deps:
89
+ return {"platform": "React Native", "build": "npm/yarn"}
90
+ if "next" in deps:
91
+ return {"platform": "Next.js", "build": "npm/yarn"}
92
+ except json.JSONDecodeError:
93
+ pass
94
+
95
+ return {"platform": "Unknown", "build": "Unknown"}
96
+
97
+
98
+ # === 模块发现 ===
99
+
100
+ def discover_modules(lightweight: bool = False) -> List[dict]:
101
+ """自动发现项目模块(适配多种构建系统)"""
102
+ # Gradle
103
+ sg = PROJECT_ROOT / "settings.gradle"
104
+ sg_kts = PROJECT_ROOT / "settings.gradle.kts"
105
+ if sg.exists():
106
+ return _parse_gradle_modules(sg, lightweight=lightweight)
107
+ if sg_kts.exists():
108
+ return _parse_gradle_modules(sg_kts, lightweight=lightweight)
109
+
110
+ # Maven
111
+ pom = PROJECT_ROOT / "pom.xml"
112
+ if pom.exists():
113
+ return _parse_maven_modules(pom, lightweight=lightweight)
114
+
115
+ # npm/yarn workspaces
116
+ pkg_json = PROJECT_ROOT / "package.json"
117
+ if pkg_json.exists():
118
+ try:
119
+ data = json.loads(pkg_json.read_text(encoding="utf-8"))
120
+ workspaces = data.get("workspaces", [])
121
+ if workspaces:
122
+ return _parse_npm_workspaces(workspaces, lightweight=lightweight)
123
+ except json.JSONDecodeError:
124
+ pass
125
+
126
+ # Cargo workspace
127
+ cargo = PROJECT_ROOT / "Cargo.toml"
128
+ if cargo.exists():
129
+ return _parse_cargo_workspace(cargo, lightweight=lightweight)
130
+
131
+ # Go
132
+ go_mod = PROJECT_ROOT / "go.mod"
133
+ if go_mod.exists():
134
+ return [_scan_module_dir(PROJECT_ROOT, "root", lightweight=lightweight)]
135
+
136
+ # 单模块项目
137
+ return [_scan_module_dir(PROJECT_ROOT, "root", lightweight=lightweight)]
138
+
139
+
140
+ def _parse_gradle_modules(settings_path: Path, lightweight: bool = False) -> List[dict]:
141
+ """解析 settings.gradle 提取模块"""
142
+ modules = []
143
+ content = settings_path.read_text(encoding="utf-8", errors="ignore")
144
+ # 支持 include ':module', include 'module', includeFlat 'module' 格式
145
+ # 同时支持单引号和双引号
146
+ pattern = re.compile(r"^\s*(?:include|includeFlat)\s*['\"][:']?([^'\"]+)['\"]" , re.MULTILINE)
147
+ for match in pattern.finditer(content):
148
+ line_start = content.rfind("\n", 0, match.start()) + 1
149
+ line = content[line_start:match.start()]
150
+ if "//" in line:
151
+ continue
152
+ raw = match.group(1).lstrip(":")
153
+ is_aar = "libaar" in raw
154
+ name = raw.replace(":", "_").replace("/", "_")
155
+ module_path = PROJECT_ROOT / raw.replace(":", os.sep)
156
+ modules.append(_scan_module_dir(module_path, name, is_aar=is_aar, lightweight=lightweight))
157
+ return modules
158
+
159
+
160
+ def _parse_maven_modules(pom_path: Path, lightweight: bool = False) -> List[dict]:
161
+ """解析 pom.xml 的 <modules>"""
162
+ modules = []
163
+ content = pom_path.read_text(encoding="utf-8", errors="ignore")
164
+ for m in re.finditer(r"<module>([^<]+)</module>", content):
165
+ name = m.group(1).strip()
166
+ module_path = PROJECT_ROOT / name
167
+ modules.append(_scan_module_dir(module_path, name, lightweight=lightweight))
168
+ return modules
169
+
170
+
171
+ def _parse_npm_workspaces(workspaces: Any, lightweight: bool = False) -> List[dict]:
172
+ """解析 npm/yarn workspaces"""
173
+ modules = []
174
+ if isinstance(workspaces, list):
175
+ patterns = workspaces
176
+ elif isinstance(workspaces, dict):
177
+ patterns = workspaces.get("packages", [])
178
+ else:
179
+ return [_scan_module_dir(PROJECT_ROOT, "root", lightweight=lightweight)]
180
+
181
+ for pattern in patterns:
182
+ for module_path in sorted(PROJECT_ROOT.glob(pattern)):
183
+ if module_path.is_dir() and (module_path / "package.json").exists():
184
+ name = module_path.name
185
+ modules.append(_scan_module_dir(module_path, name, lightweight=lightweight))
186
+ return modules
187
+
188
+
189
+ def _parse_cargo_workspace(cargo_path: Path, lightweight: bool = False) -> List[dict]:
190
+ """解析 Cargo.toml workspace"""
191
+ modules = []
192
+ content = cargo_path.read_text(encoding="utf-8", errors="ignore")
193
+ for m in re.finditer(r'member\s*=\s*\[([^\]]+)\]', content):
194
+ for member in re.findall(r'"([^"]+)"', m.group(1)):
195
+ module_path = PROJECT_ROOT / member
196
+ if module_path.exists():
197
+ modules.append(_scan_module_dir(module_path, member.replace("/", "_"), lightweight=lightweight))
198
+ return modules if modules else [_scan_module_dir(PROJECT_ROOT, "root", lightweight=lightweight)]
199
+
200
+
201
+ # === 模块扫描 ===
202
+
203
+ def _scan_module_dir(module_path: Path, name: str, is_aar: bool = False, lightweight: bool = False) -> dict:
204
+ """扫描一个模块目录的结构信息"""
205
+ info = {
206
+ "name": name,
207
+ "path": str(module_path.relative_to(PROJECT_ROOT)) if _is_relative(module_path) else str(module_path),
208
+ "is_aar": is_aar,
209
+ "is_application": False,
210
+ "has_source": False,
211
+ "source_dirs": [],
212
+ "file_count": 0,
213
+ "file_list": [],
214
+ "resource_dirs": [],
215
+ "asset_files": [],
216
+ "tree": "",
217
+ "dependencies": [],
218
+ "build_config": {},
219
+ }
220
+
221
+ if is_aar or not module_path.exists():
222
+ return info
223
+
224
+ if lightweight:
225
+ # 轻量模式:只保留模块元数据和构建配置,跳过文件列表/目录树/资源
226
+ info["build_config"] = _parse_build_config(module_path)
227
+ info["dependencies"] = info["build_config"].get("dependencies", [])
228
+ info["is_application"] = info["build_config"].get("plugin_type") == "application"
229
+ return info
230
+
231
+ # 发现源码根目录
232
+ src_roots = _find_source_roots(module_path)
233
+ for src_root in src_roots:
234
+ src_info = _scan_source_dir(src_root)
235
+ info["source_dirs"].append(src_info)
236
+ info["file_count"] += src_info["file_count"]
237
+ info["file_list"].extend(src_info["file_list"])
238
+
239
+ info["has_source"] = info["file_count"] > 0
240
+
241
+ # 目录树
242
+ if src_roots:
243
+ tree_lines = []
244
+ for src_root in src_roots:
245
+ rel = src_root.relative_to(PROJECT_ROOT) if _is_relative(src_root) else src_root
246
+ tree_lines.append(f"{rel}/")
247
+ _build_tree(src_root, "", MAX_DEPTH, 0, tree_lines)
248
+ info["tree"] = "\n".join(tree_lines)
249
+
250
+ # 资源目录
251
+ res_dir = module_path / "src" / "main" / "res"
252
+ if res_dir.exists():
253
+ info["resource_dirs"] = _scan_resource_dir(res_dir)
254
+
255
+ # Assets
256
+ assets_dir = module_path / "src" / "main" / "assets"
257
+ if assets_dir.exists():
258
+ info["asset_files"] = _scan_assets_dir(assets_dir)
259
+
260
+ # 构建配置
261
+ info["build_config"] = _parse_build_config(module_path)
262
+ info["dependencies"] = info["build_config"].get("dependencies", [])
263
+ info["is_application"] = info["build_config"].get("plugin_type") == "application"
264
+
265
+ return info
266
+
267
+
268
+ def _is_relative(path: Path) -> bool:
269
+ """安全检查路径是否在项目根目录下"""
270
+ try:
271
+ path.relative_to(PROJECT_ROOT)
272
+ return True
273
+ except ValueError:
274
+ return False
275
+
276
+
277
+ def _find_source_roots(module_path: Path) -> List[Path]:
278
+ """发现模块的所有源码根目录"""
279
+ roots = []
280
+
281
+ # Android/Gradle 标准: src/main/java, src/main/kotlin
282
+ for variant in ["src/main/java", "src/main/kotlin"]:
283
+ candidate = module_path / variant
284
+ if candidate.exists() and any(candidate.rglob("*")):
285
+ roots.append(candidate)
286
+
287
+ # src/main 但排除 java/kotlin(已有)
288
+ src_main = module_path / "src" / "main"
289
+ if src_main.exists() and not roots:
290
+ # 检查是否有其他语言的源码
291
+ has_code = any(
292
+ f.is_file() and f.suffix in SOURCE_EXTENSIONS
293
+ for f in src_main.rglob("*")
294
+ )
295
+ if has_code:
296
+ roots.append(src_main)
297
+
298
+ # iOS: 同级 *.xcodeproj
299
+ for proj in module_path.glob("*.xcodeproj"):
300
+ roots.append(module_path)
301
+ break
302
+
303
+ # Flutter: lib/
304
+ lib_dir = module_path / "lib"
305
+ if (module_path / "pubspec.yaml").exists() and lib_dir.exists():
306
+ roots.append(lib_dir)
307
+
308
+ # Rust: src/
309
+ rust_src = module_path / "src"
310
+ if (module_path / "Cargo.toml").exists() and rust_src.exists():
311
+ roots.append(rust_src)
312
+
313
+ # Go: 整个模块
314
+ if (module_path / "go.mod").exists():
315
+ roots.append(module_path)
316
+
317
+ # Python
318
+ if (module_path / "pyproject.toml").exists() or (module_path / "setup.py").exists():
319
+ roots.append(module_path)
320
+
321
+ # Node/TS: src/
322
+ if (module_path / "package.json").exists():
323
+ for candidate in ["src", "lib", "app"]:
324
+ d = module_path / candidate
325
+ if d.exists():
326
+ roots.append(d)
327
+ break
328
+
329
+ # HarmonyOS: src/main/ets/
330
+ ets_dir = module_path / "src" / "main" / "ets"
331
+ if ets_dir.exists():
332
+ roots.append(ets_dir)
333
+
334
+ # 兜底: src/
335
+ if not roots:
336
+ src = module_path / "src"
337
+ if src.exists():
338
+ roots.append(src)
339
+
340
+ return roots
341
+
342
+
343
+ def _scan_source_dir(src_root: Path) -> dict:
344
+ """扫描源码目录"""
345
+ source_files = []
346
+ for f in src_root.rglob("*"):
347
+ if f.is_file() and f.suffix in SOURCE_EXTENSIONS:
348
+ source_files.append(str(f.relative_to(src_root)))
349
+
350
+ # 发现顶层包
351
+ top_packages = set()
352
+ for f in source_files:
353
+ parts = Path(f).parent.parts
354
+ if len(parts) >= 3:
355
+ top_packages.add(".".join(parts[:3]))
356
+ elif len(parts) >= 1:
357
+ top_packages.add(parts[0])
358
+
359
+ # 合并公共前缀
360
+ sorted_pkgs = sorted(top_packages)
361
+ merged = []
362
+ for pkg in sorted_pkgs:
363
+ if not merged or not pkg.startswith(merged[-1] + "."):
364
+ merged.append(pkg)
365
+
366
+ return {
367
+ "root": str(src_root.relative_to(PROJECT_ROOT)) if _is_relative(src_root) else str(src_root),
368
+ "file_count": len(source_files),
369
+ "file_list": sorted(source_files),
370
+ "top_packages": merged,
371
+ }
372
+
373
+
374
+ def _build_tree(path: Path, prefix: str, max_depth: int, depth: int, lines: List[str]):
375
+ """构建目录树"""
376
+ if depth >= max_depth:
377
+ return
378
+ try:
379
+ entries = sorted(path.iterdir(), key=lambda p: (not p.is_dir(), p.name))
380
+ except (PermissionError, OSError):
381
+ return
382
+
383
+ dirs = [e for e in entries if e.is_dir() and not e.name.startswith(".")]
384
+ files = [e for e in entries if e.is_file() and e.suffix in SOURCE_EXTENSIONS]
385
+
386
+ items = dirs + files
387
+ for i, item in enumerate(items):
388
+ is_last = i == len(items) - 1
389
+ connector = "└── " if is_last else "├── "
390
+ if item.is_dir():
391
+ fc = sum(1 for _ in item.rglob("*") if _.is_file() and _.suffix in SOURCE_EXTENSIONS)
392
+ lines.append(f"{prefix}{connector}{item.name}/ ({fc}个文件)")
393
+ _build_tree(item, prefix + (" " if is_last else "│ "), max_depth, depth + 1, lines)
394
+ else:
395
+ lines.append(f"{prefix}{connector}{item.name}")
396
+
397
+
398
+ def _scan_resource_dir(res_dir: Path) -> list:
399
+ """扫描资源目录"""
400
+ result = []
401
+ for item in sorted(res_dir.iterdir()):
402
+ if item.is_dir():
403
+ files = [f.name for f in item.iterdir() if f.is_file()]
404
+ result.append({"dir": item.name, "count": len(files), "files": sorted(files)})
405
+ return result
406
+
407
+
408
+ def _scan_assets_dir(assets_dir: Path) -> list:
409
+ """扫描 assets 目录"""
410
+ result = []
411
+ for f in sorted(assets_dir.rglob("*")):
412
+ if f.is_file():
413
+ result.append(str(f.relative_to(assets_dir)))
414
+ return result
415
+
416
+
417
+ def _parse_build_config(module_path: Path) -> dict:
418
+ """提取构建配置信息"""
419
+ config: Dict[str, Any] = {"dependencies": [], "plugin_type": "library"}
420
+
421
+ # Gradle
422
+ gradle_file = None
423
+ for candidate in ["build.gradle", "build.gradle.kts"]:
424
+ g = module_path / candidate
425
+ if g.exists():
426
+ gradle_file = g
427
+ break
428
+ if gradle_file:
429
+ content = gradle_file.read_text(encoding="utf-8", errors="ignore")
430
+ if "com.android.application" in content:
431
+ config["plugin_type"] = "application"
432
+ for m in re.finditer(
433
+ r"(implementation|api|compileOnly|runtimeOnly)\s+project\(\s*['\"][:']?([^'\"]+)['\"]", content
434
+ ):
435
+ config["dependencies"].append({"name": m.group(2).lstrip(":"), "type": m.group(1)})
436
+
437
+ # 外部关键依赖
438
+ for m in re.finditer(
439
+ r"(implementation|api)\s+['\"]([^'\"]+:[^'\"]+:[^'\"]+)['\"]", content
440
+ ):
441
+ config["dependencies"].append({"name": m.group(2), "type": m.group(1), "external": True})
442
+
443
+ # ViewBinding
444
+ config["view_binding"] = "viewBinding" in content and "true" in content[
445
+ content.find("viewBinding"):content.find("viewBinding") + 50
446
+ ] if "viewBinding" in content else False
447
+
448
+ # resourcePrefix
449
+ rp = re.search(r'resourcePrefix\s+["\']([^"\']+)["\']', content)
450
+ if rp:
451
+ config["resource_prefix"] = rp.group(1)
452
+
453
+ # package.json
454
+ pkg_json = module_path / "package.json"
455
+ if pkg_json.exists():
456
+ try:
457
+ data = json.loads(pkg_json.read_text(encoding="utf-8"))
458
+ if "main" in data or "bin" in data:
459
+ config["plugin_type"] = "application"
460
+ for dep_type in ["dependencies", "devDependencies", "peerDependencies"]:
461
+ for dep_name, ver in data.get(dep_type, {}).items():
462
+ config["dependencies"].append({
463
+ "name": dep_name, "version": ver, "type": dep_type, "external": True
464
+ })
465
+ except json.JSONDecodeError:
466
+ pass
467
+
468
+ # Cargo.toml
469
+ cargo = module_path / "Cargo.toml"
470
+ if cargo.exists():
471
+ content = cargo.read_text(encoding="utf-8", errors="ignore")
472
+ for m in re.finditer(r'(\w+)\s*=\s*"([^"]+)"', content):
473
+ config["dependencies"].append({
474
+ "name": m.group(1), "version": m.group(2), "external": True
475
+ })
476
+
477
+ # go.mod
478
+ go_mod = module_path / "go.mod"
479
+ if go_mod.exists():
480
+ content = go_mod.read_text(encoding="utf-8", errors="ignore")
481
+ for m in re.finditer(r'^\s+(\S+)\s+(v\S+)', content, re.MULTILINE):
482
+ config["dependencies"].append({
483
+ "name": m.group(1), "version": m.group(2), "external": True
484
+ })
485
+
486
+ return config
487
+
488
+
489
+ # === Diff 工具函数 ===
490
+
491
+ def _normalize_dep(dep):
492
+ """标准化依赖项用于比较(忽略 type 和 external 标记,只看 name)"""
493
+ if isinstance(dep, dict):
494
+ return dep.get("name", str(dep))
495
+ return str(dep)
496
+
497
+
498
+ def _diff_file_lists(old_files, new_files):
499
+ """比较两个文件列表,返回 added / removed(检测同目录重命名)"""
500
+ old_set = set(old_files)
501
+ new_set = set(new_files)
502
+
503
+ raw_added = sorted(new_set - old_set)
504
+ raw_removed = sorted(old_set - new_set)
505
+
506
+ # 同目录重命名检测:{parent_dir}/{stem} 相同但后缀/大小写不同
507
+ added = list(raw_added)
508
+ removed = []
509
+ for r in raw_removed:
510
+ r_parent = str(Path(r).parent)
511
+ r_stem = Path(r).stem.lower()
512
+ match_idx = None
513
+ for i, a in enumerate(added):
514
+ a_parent = str(Path(a).parent)
515
+ a_stem = Path(a).stem.lower()
516
+ if r_parent == a_parent and r_stem == a_stem:
517
+ match_idx = i
518
+ break
519
+ if match_idx is not None:
520
+ added.pop(match_idx) # 已匹配为重命名,不计入 added
521
+ else:
522
+ removed.append(r)
523
+
524
+ return added, removed
525
+
526
+
527
+ def _diff_module_files(old_mod, new_mod):
528
+ """比较单个模块的文件变更"""
529
+ old_files = set(old_mod.get("file_list", []))
530
+ new_files = set(new_mod.get("file_list", []))
531
+
532
+ if old_files == new_files:
533
+ return None # 文件无变更
534
+
535
+ added, removed = _diff_file_lists(
536
+ old_mod.get("file_list", []),
537
+ new_mod.get("file_list", [])
538
+ )
539
+ return {
540
+ "total_old": len(old_files),
541
+ "total_new": len(new_files),
542
+ "added": added,
543
+ "removed": removed,
544
+ }
545
+
546
+
547
+ def _diff_dependencies(old_deps, new_deps):
548
+ """比较依赖列表"""
549
+ old_names = sorted(_normalize_dep(d) for d in old_deps)
550
+ new_names = sorted(_normalize_dep(d) for d in new_deps)
551
+
552
+ added = [d for d in new_names if d not in old_names]
553
+ removed = [d for d in old_names if d not in new_names]
554
+
555
+ if not added and not removed:
556
+ return None
557
+ return {"added": added, "removed": removed}
558
+
559
+
560
+ def _detect_renames(old_modules, new_modules):
561
+ """检测模块级别的重命名(基于文件相似度)"""
562
+ renames = []
563
+ old_by_name = {m["name"]: m for m in old_modules}
564
+ new_by_name = {m["name"]: m for m in new_modules}
565
+
566
+ removed_names = set(old_by_name.keys()) - set(new_by_name.keys())
567
+ added_names = set(new_by_name.keys()) - set(old_by_name.keys())
568
+
569
+ matched_old = set()
570
+ matched_new = set()
571
+
572
+ for old_name in removed_names:
573
+ old_files = set(old_by_name[old_name].get("file_list", []))
574
+ if not old_files:
575
+ continue
576
+ best_match = None
577
+ best_ratio = 0
578
+ for new_name in added_names:
579
+ if new_name in matched_new:
580
+ continue
581
+ new_files = set(new_by_name[new_name].get("file_list", []))
582
+ if not new_files:
583
+ continue
584
+ common = len(old_files & new_files)
585
+ ratio = common / max(len(old_files), len(new_files))
586
+ if ratio > 0.5 and ratio > best_ratio:
587
+ best_ratio = ratio
588
+ best_match = new_name
589
+ if best_match:
590
+ renames.append({"from": old_name, "to": best_match, "similarity": round(best_ratio, 2)})
591
+ matched_old.add(old_name)
592
+ matched_new.add(best_match)
593
+
594
+ return renames, matched_old, matched_new
595
+
596
+
597
+ def _diff_scans(old_scan, new_scan):
598
+ """完整对比两次扫描结果"""
599
+ old_modules = {m["name"]: m for m in old_scan.get("modules", [])}
600
+ new_modules = {m["name"]: m for m in new_scan.get("modules", [])}
601
+
602
+ old_names = set(old_modules.keys())
603
+ new_names = set(new_modules.keys())
604
+
605
+ # 模块级重命名检测
606
+ renames, renamed_old, renamed_new = _detect_renames(
607
+ old_scan.get("modules", []),
608
+ new_scan.get("modules", [])
609
+ )
610
+
611
+ modules_added = sorted(new_names - old_names - renamed_new)
612
+ modules_removed = sorted(old_names - new_names - renamed_old)
613
+ modules_common = sorted(old_names & new_names)
614
+
615
+ modules_detail = {}
616
+
617
+ # 新增模块
618
+ for name in modules_added:
619
+ m = new_modules[name]
620
+ modules_detail[name] = {
621
+ "status": "added",
622
+ "file_count": m["file_count"],
623
+ "has_source": m["has_source"],
624
+ }
625
+
626
+ # 删除模块
627
+ for name in modules_removed:
628
+ m = old_modules[name]
629
+ modules_detail[name] = {
630
+ "status": "removed",
631
+ "file_count": m["file_count"],
632
+ }
633
+
634
+ # 重命名模块
635
+ for rename in renames:
636
+ modules_detail[rename["from"]] = {
637
+ "status": "renamed",
638
+ "new_name": rename["to"],
639
+ "similarity": rename["similarity"],
640
+ }
641
+ # 新名字也标记,方便 AI 查找
642
+ modules_detail[rename["to"]] = {
643
+ "status": "renamed_from",
644
+ "old_name": rename["from"],
645
+ "similarity": rename["similarity"],
646
+ }
647
+
648
+ # 公共模块:逐个检查文件和依赖变更
649
+ for name in modules_common:
650
+ old_m = old_modules[name]
651
+ new_m = new_modules[name]
652
+ changes = {}
653
+
654
+ file_diff = _diff_module_files(old_m, new_m)
655
+ if file_diff:
656
+ changes["files"] = file_diff
657
+
658
+ dep_diff = _diff_dependencies(
659
+ old_m.get("dependencies", []),
660
+ new_m.get("dependencies", [])
661
+ )
662
+ if dep_diff:
663
+ changes["dependencies"] = dep_diff
664
+
665
+ if changes:
666
+ modules_detail[name] = {"status": "changed", **changes}
667
+
668
+ return {
669
+ "diff_version": "1.0",
670
+ "old_scan_time": old_scan.get("scan_time", "unknown"),
671
+ "new_scan_time": new_scan.get("scan_time", "unknown"),
672
+ "project_type": new_scan.get("project_type", {}),
673
+ "summary": {
674
+ "modules_added": len(modules_added),
675
+ "modules_removed": len(modules_removed),
676
+ "modules_renamed": len(renames),
677
+ "modules_changed": sum(
678
+ 1 for v in modules_detail.values()
679
+ if v.get("status") == "changed"
680
+ ),
681
+ "modules_unchanged": len(modules_common) - sum(
682
+ 1 for v in modules_detail.values()
683
+ if v.get("status") == "changed"
684
+ ),
685
+ },
686
+ "modules": modules_detail,
687
+ }
688
+
689
+
690
+ def _build_diff_summary(diff_result):
691
+ """生成人类可读的 diff 摘要"""
692
+ s = diff_result["summary"]
693
+ lines = [
694
+ "增量扫描 Diff 报告",
695
+ f" 模块: +{s['modules_added']} 新增, -{s['modules_removed']} 删除, "
696
+ f"~{s['modules_renamed']} 重命名, Δ{s['modules_changed']} 变更, "
697
+ f"={s['modules_unchanged']} 未变",
698
+ ]
699
+
700
+ for name, detail in sorted(diff_result["modules"].items()):
701
+ status = detail["status"]
702
+ if status == "added":
703
+ lines.append(f" [+] {name} ({detail['file_count']} 文件)")
704
+ elif status == "removed":
705
+ lines.append(f" [-] {name} ({detail['file_count']} 文件)")
706
+ elif status == "renamed":
707
+ lines.append(f" [~] {detail['new_name']} ← {name} (相似度 {detail['similarity']:.0%})")
708
+ elif status == "changed":
709
+ parts = []
710
+ if "files" in detail:
711
+ f = detail["files"]
712
+ parts.append(f"文件 +{len(f['added'])}/-{len(f['removed'])}")
713
+ if "dependencies" in detail:
714
+ d = detail["dependencies"]
715
+ parts.append(f"依赖 +{len(d['added'])}/-{len(d['removed'])}")
716
+ lines.append(f" [Δ] {name}: {', '.join(parts)}")
717
+
718
+ needs_refresh = []
719
+ for name, detail in diff_result["modules"].items():
720
+ if detail["status"] in ("added", "removed", "renamed", "renamed_from"):
721
+ needs_refresh.append(name)
722
+ elif detail["status"] == "changed":
723
+ if "dependencies" in detail:
724
+ needs_refresh.append(name)
725
+
726
+ lines.append("")
727
+ if needs_refresh:
728
+ lines.append(f"需要重新生成文档的模块: {', '.join(sorted(set(needs_refresh)))}")
729
+ else:
730
+ files_only = [
731
+ name for name, detail in diff_result["modules"].items()
732
+ if detail["status"] == "changed" and "files" in detail
733
+ ]
734
+ if files_only:
735
+ lines.append(f"仅文件变更(可增量更新): {', '.join(sorted(files_only))}")
736
+ else:
737
+ lines.append("无变更,references 无需更新")
738
+
739
+ return "\n".join(lines)
740
+
741
+
742
+ # === 主流程 ===
743
+
744
+ def main():
745
+ global PROJECT_ROOT, REFERENCES_DIR, SCAN_OUTPUT, MAX_DEPTH
746
+
747
+ parser = argparse.ArgumentParser(description="项目结构扫描器 - 输出 JSON 供 AI 生成完整参考文档")
748
+ parser.add_argument("--module", type=str, help="仅扫描指定模块")
749
+ parser.add_argument("--output", type=str, help="输出文件路径(默认 <output-dir>/_scan.json)")
750
+ parser.add_argument("--refresh", action="store_true", help="仅刷新已有文档对应的模块")
751
+ parser.add_argument("--diff", action="store_true",
752
+ help="增量模式:对比当前项目结构与上次扫描结果,输出变更列表")
753
+ parser.add_argument("--project-root", type=str, default=None,
754
+ help="项目根目录(默认为当前工作目录)")
755
+ parser.add_argument("--output-dir", type=str, default=None,
756
+ help="输出目录(默认自动检测 .qoder/.claude/.codex/.opencode,否则使用 references/)")
757
+ parser.add_argument("--max-depth", type=int, default=4,
758
+ help="目录树最大深度(默认 4)")
759
+ parser.add_argument("--lightweight", action="store_true",
760
+ help="轻量模式:跳过文件列表、目录树、资源/资产扫描(配合 CodeGraph 使用)")
761
+ args = parser.parse_args()
762
+
763
+ # 设置 PROJECT_ROOT
764
+ PROJECT_ROOT = Path(args.project_root).resolve() if args.project_root else Path.cwd()
765
+
766
+ # 设置 REFERENCES_DIR
767
+ if args.output_dir:
768
+ REFERENCES_DIR = Path(args.output_dir).resolve()
769
+ else:
770
+ REFERENCES_DIR = _detect_output_dir(PROJECT_ROOT)
771
+
772
+ SCAN_OUTPUT = REFERENCES_DIR / "_scan.json"
773
+ MAX_DEPTH = args.max_depth
774
+
775
+ REFERENCES_DIR.mkdir(parents=True, exist_ok=True)
776
+
777
+ if args.diff:
778
+ # === 增量 Diff 模式 ===
779
+ if not SCAN_OUTPUT.exists():
780
+ print("首次运行,无历史扫描数据,将执行全量扫描...")
781
+ args.diff = False # fall through to full scan
782
+ else:
783
+ print("加载上次扫描数据...")
784
+ old_scan = json.loads(SCAN_OUTPUT.read_text(encoding="utf-8"))
785
+
786
+ print("扫描当前项目结构...")
787
+ project_type = detect_project_type()
788
+ modules = discover_modules(lightweight=args.lightweight)
789
+
790
+ if args.module:
791
+ modules = [m for m in modules if m["name"] == args.module]
792
+ if not modules:
793
+ print(f"错误: 未找到模块 '{args.module}'")
794
+ sys.exit(1)
795
+
796
+ new_scan = {
797
+ "project_root": str(PROJECT_ROOT),
798
+ "project_type": project_type,
799
+ "scan_time": _current_time(),
800
+ "module_count": len(modules),
801
+ "modules": modules,
802
+ }
803
+
804
+ # 对比
805
+ diff_result = _diff_scans(old_scan, new_scan)
806
+
807
+ # 输出 diff
808
+ output_path = Path(args.output) if args.output else REFERENCES_DIR / "_diff.json"
809
+ output_path.parent.mkdir(parents=True, exist_ok=True)
810
+ output_path.write_text(
811
+ json.dumps(diff_result, ensure_ascii=False, indent=2),
812
+ encoding="utf-8"
813
+ )
814
+
815
+ # 同时更新 _scan.json 为最新
816
+ SCAN_OUTPUT.write_text(
817
+ json.dumps(new_scan, ensure_ascii=False, indent=2),
818
+ encoding="utf-8"
819
+ )
820
+
821
+ # 打印摘要
822
+ print(_build_diff_summary(diff_result))
823
+ print(f"\nDiff 输出: {output_path}")
824
+ print(f"扫描数据已更新: {SCAN_OUTPUT}")
825
+
826
+ # 退出,不走下面的全量逻辑
827
+ return
828
+
829
+ # === 全量扫描模式(默认 / --refresh 降级) ===
830
+ print("扫描项目结构...")
831
+ project_type = detect_project_type()
832
+ modules = discover_modules(lightweight=args.lightweight)
833
+
834
+ if args.module:
835
+ modules = [m for m in modules if m["name"] == args.module]
836
+ if not modules:
837
+ print(f"错误: 未找到模块 '{args.module}'")
838
+ sys.exit(1)
839
+
840
+ if args.refresh:
841
+ existing = {f.stem for f in REFERENCES_DIR.glob("*.md") if not f.stem.startswith("_")}
842
+ modules = [m for m in modules if m["name"] in existing]
843
+
844
+ scan_result = {
845
+ "project_root": str(PROJECT_ROOT),
846
+ "project_type": project_type,
847
+ "scan_time": _current_time(),
848
+ "module_count": len(modules),
849
+ "modules": modules,
850
+ }
851
+
852
+ output_path = Path(args.output) if args.output else SCAN_OUTPUT
853
+ output_path.parent.mkdir(parents=True, exist_ok=True)
854
+ output_path.write_text(json.dumps(scan_result, ensure_ascii=False, indent=2), encoding="utf-8")
855
+
856
+ total_files = sum(m["file_count"] for m in modules)
857
+ has_source = sum(1 for m in modules if m["has_source"])
858
+ print(f"扫描完成: {len(modules)} 个模块, {has_source} 个有源码, {total_files} 个源文件")
859
+ print(f"项目类型: {project_type['platform']} / {project_type['build']}")
860
+ print(f"输出: {output_path}")
861
+ if args.lightweight:
862
+ print("(轻量模式:跳过文件列表/目录/资源扫描,仅保留模块元数据和依赖)")
863
+
864
+ if not SCAN_OUTPUT.exists() or args.output:
865
+ print(f"\n下一步: AI 读取 {output_path} + 各模块源码 → 生成完整参考文档")
866
+ else:
867
+ print(f"\n提示: 如需增量更新,运行 python gen_references.py --diff")
868
+
869
+
870
+ def _current_time():
871
+ """返回当前时间戳字符串"""
872
+ from datetime import datetime
873
+ return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
874
+
875
+
876
+ if __name__ == "__main__":
877
+ main()