@haaaiawd/anws 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/templates/.agents/skills/nexus-query/SKILL.md +114 -0
- package/templates/.agents/skills/nexus-query/scripts/extract_ast.py +706 -0
- package/templates/.agents/skills/nexus-query/scripts/git_detective.py +194 -0
- package/templates/.agents/skills/nexus-query/scripts/languages.json +127 -0
- package/templates/.agents/skills/nexus-query/scripts/query_graph.py +556 -0
- package/templates/.agents/skills/nexus-query/scripts/requirements.txt +6 -0
- package/templates/.agents/skills/runtime-inspector/SKILL.md +8 -2
- package/templates/.agents/skills/sequential-thinking/SKILL.md +44 -7
- package/templates/.agents/skills/task-planner/SKILL.md +25 -1
- package/templates/.agents/skills/task-planner/references/TASK_TEMPLATE.md +25 -6
- package/templates/.agents/workflows/blueprint.md +9 -1
- package/templates/.agents/workflows/challenge.md +7 -6
- package/templates/.agents/workflows/design-system.md +14 -5
- package/templates/.agents/workflows/explore.md +42 -8
- package/templates/.agents/workflows/forge.md +6 -1
- package/templates/.agents/workflows/probe.md +105 -35
- package/templates/.agents/workflows/quickstart.md +2 -0
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
extract_ast.py — 多语言代码仓库 AST 结构提取器
|
|
4
|
+
|
|
5
|
+
用途:基于 Tree-sitter 提取代码仓库的模块/类/函数结构,输出 JSON 到 stdout
|
|
6
|
+
支持:Python, JavaScript, TypeScript, TSX, Java, Go, Rust, C#, C/C++, Kotlin, Ruby, Swift, PHP, Lua ...
|
|
7
|
+
用法:python extract_ast.py <repo_path> [--max-nodes 500]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
import json
|
|
12
|
+
import argparse
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Optional, cast
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EXCLUDE_DIRS = {'.git', '__pycache__', '.venv', 'venv', 'node_modules',
|
|
18
|
+
'dist', 'build', '.mypy_cache', '.pytest_cache', 'site-packages',
|
|
19
|
+
'.nexus-map', '.tox', '.eggs', 'target', 'cmake-build-debug',
|
|
20
|
+
'.vs', 'out', '_build', 'vendor', '.ruff_cache', '.godot',
|
|
21
|
+
'.idea', '.vscode', '.nox'}
|
|
22
|
+
|
|
23
|
+
EXCLUDE_FILE_SUFFIXES = ('.import', '.vulkan.cache')
|
|
24
|
+
|
|
25
|
+
# ── 内建语言配置:从同目录 languages.json 加载 ────────────────────
|
|
26
|
+
_LANGUAGES_JSON = Path(__file__).parent / 'languages.json'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load_builtin_languages() -> tuple[dict[str, str], dict[str, dict[str, str]], dict[str, str]]:
|
|
30
|
+
"""从 languages.json 加载内建的扩展名映射、Tree-sitter 查询和已知不支持的扩展名。"""
|
|
31
|
+
try:
|
|
32
|
+
data = json.loads(_LANGUAGES_JSON.read_text(encoding='utf-8'))
|
|
33
|
+
except (FileNotFoundError, json.JSONDecodeError) as exc:
|
|
34
|
+
sys.stderr.write(f"[ERROR] Failed to load {_LANGUAGES_JSON}: {exc}\n")
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
extensions: dict[str, str] = data.get('extensions', {})
|
|
38
|
+
raw_queries: dict[str, dict[str, str]] = data.get('queries', {})
|
|
39
|
+
unsupported: dict[str, str] = data.get('unsupported_extensions', {})
|
|
40
|
+
|
|
41
|
+
# 规范化 queries:确保每个语言都有 struct 和 imports 键
|
|
42
|
+
queries: dict[str, dict[str, str]] = {}
|
|
43
|
+
for lang, parts in raw_queries.items():
|
|
44
|
+
queries[lang] = {
|
|
45
|
+
'struct': parts.get('struct', ''),
|
|
46
|
+
'imports': parts.get('imports', ''),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return extensions, queries, unsupported
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
BUILTIN_EXTENSION_MAP, BUILTIN_LANG_QUERIES, BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS = (
|
|
53
|
+
_load_builtin_languages()
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _should_skip_path(repo_path: Path, path: Path) -> bool:
|
|
58
|
+
rel_path = path.relative_to(repo_path)
|
|
59
|
+
if any(part in EXCLUDE_DIRS for part in rel_path.parts):
|
|
60
|
+
return True
|
|
61
|
+
if path.is_file() and any(path.name.endswith(suffix) for suffix in EXCLUDE_FILE_SUFFIXES):
|
|
62
|
+
return True
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def write_filtered_file_tree(repo_path: Path, output_path: Path) -> None:
|
|
67
|
+
lines: list[str] = []
|
|
68
|
+
for path in sorted(repo_path.rglob('*')):
|
|
69
|
+
if _should_skip_path(repo_path, path):
|
|
70
|
+
continue
|
|
71
|
+
rel_path = path.relative_to(repo_path).as_posix()
|
|
72
|
+
suffix = '/' if path.is_dir() else ''
|
|
73
|
+
lines.append(rel_path + suffix)
|
|
74
|
+
|
|
75
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
output_path.write_text('\n'.join(lines) + ('\n' if lines else ''), encoding='utf-8')
|
|
77
|
+
|
|
78
|
+
def _normalize_extension(ext: str) -> str:
|
|
79
|
+
normalized = ext.strip().lower()
|
|
80
|
+
if not normalized:
|
|
81
|
+
raise ValueError('extension must not be empty')
|
|
82
|
+
if not normalized.startswith('.'):
|
|
83
|
+
normalized = f'.{normalized}'
|
|
84
|
+
return normalized
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _copy_lang_queries(source: dict[str, dict[str, str]]) -> dict[str, dict[str, str]]:
|
|
88
|
+
return {
|
|
89
|
+
lang: {
|
|
90
|
+
'struct': query_parts.get('struct', ''),
|
|
91
|
+
'imports': query_parts.get('imports', ''),
|
|
92
|
+
}
|
|
93
|
+
for lang, query_parts in source.items()
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _apply_cli_customizations(
|
|
98
|
+
cli_extensions: list[str] | None,
|
|
99
|
+
cli_queries: list[list[str]] | None,
|
|
100
|
+
) -> tuple[
|
|
101
|
+
dict[str, str],
|
|
102
|
+
dict[str, dict[str, str]],
|
|
103
|
+
list[str],
|
|
104
|
+
dict[str, str],
|
|
105
|
+
]:
|
|
106
|
+
"""
|
|
107
|
+
从命令行参数应用语言自定义(--add-extension 和 --add-query)。
|
|
108
|
+
返回 (extension_override, query_override, warnings)
|
|
109
|
+
"""
|
|
110
|
+
extension_override: dict[str, str] = {}
|
|
111
|
+
query_override: dict[str, dict[str, str]] = {}
|
|
112
|
+
warnings: list[str] = []
|
|
113
|
+
custom_query_languages: dict[str, str] = {}
|
|
114
|
+
|
|
115
|
+
if cli_extensions:
|
|
116
|
+
for item in cli_extensions:
|
|
117
|
+
if '=' not in item:
|
|
118
|
+
warnings.append(f'ignored invalid extension mapping {item!r}, expected EXT=LANG')
|
|
119
|
+
continue
|
|
120
|
+
ext_part, lang_part = item.split('=', 1)
|
|
121
|
+
try:
|
|
122
|
+
ext = _normalize_extension(ext_part)
|
|
123
|
+
lang = lang_part.strip().lower()
|
|
124
|
+
if not lang:
|
|
125
|
+
warnings.append(f'ignored empty language name for extension {ext_part!r}')
|
|
126
|
+
continue
|
|
127
|
+
extension_override[ext] = lang
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
warnings.append(f'ignored invalid extension {ext_part!r}: {e}')
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if cli_queries:
|
|
133
|
+
for query_item in cli_queries:
|
|
134
|
+
if len(query_item) != 3:
|
|
135
|
+
warnings.append(f'ignored malformed query: expected 3 parts, got {len(query_item)}')
|
|
136
|
+
continue
|
|
137
|
+
lang, query_type, query_str = query_item
|
|
138
|
+
lang = lang.strip().lower()
|
|
139
|
+
if not lang:
|
|
140
|
+
warnings.append('ignored empty language name in query')
|
|
141
|
+
continue
|
|
142
|
+
if query_type not in ('struct', 'imports'):
|
|
143
|
+
warnings.append(f'ignored unknown query type {query_type!r} for language {lang!r}')
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
if lang not in query_override:
|
|
147
|
+
query_override[lang] = {'struct': '', 'imports': ''}
|
|
148
|
+
query_override[lang][query_type] = query_str
|
|
149
|
+
custom_query_languages[lang] = '<cli>'
|
|
150
|
+
|
|
151
|
+
return extension_override, query_override, warnings, custom_query_languages
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _load_language_customizations(
|
|
155
|
+
repo_path: Path,
|
|
156
|
+
explicit_config_path: Optional[str],
|
|
157
|
+
cli_extension_override: dict[str, str],
|
|
158
|
+
cli_query_override: dict[str, dict[str, str]],
|
|
159
|
+
cli_warnings: list[str],
|
|
160
|
+
cli_custom_query_languages: dict[str, str],
|
|
161
|
+
) -> tuple[
|
|
162
|
+
dict[str, str],
|
|
163
|
+
dict[str, dict[str, str]],
|
|
164
|
+
dict[str, str],
|
|
165
|
+
list[str],
|
|
166
|
+
list[str],
|
|
167
|
+
dict[str, str],
|
|
168
|
+
]:
|
|
169
|
+
"""
|
|
170
|
+
加载和合并语言自定义配置。
|
|
171
|
+
|
|
172
|
+
优先级:CLI --language-config > CLI --add-* 参数 > 内置配置
|
|
173
|
+
|
|
174
|
+
返回 (extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages)
|
|
175
|
+
"""
|
|
176
|
+
extension_map = dict(BUILTIN_EXTENSION_MAP)
|
|
177
|
+
lang_queries = _copy_lang_queries(BUILTIN_LANG_QUERIES)
|
|
178
|
+
known_unsupported_extensions = dict(BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS)
|
|
179
|
+
warnings: list[str] = list(cli_warnings)
|
|
180
|
+
loaded_config_paths: list[str] = []
|
|
181
|
+
custom_query_languages: dict[str, str] = dict(cli_custom_query_languages)
|
|
182
|
+
|
|
183
|
+
# 首先合并 CLI 参数的自定义
|
|
184
|
+
extension_map.update(cli_extension_override)
|
|
185
|
+
for lang, query_parts in cli_query_override.items():
|
|
186
|
+
if lang in lang_queries:
|
|
187
|
+
# 只覆盖提供的部分
|
|
188
|
+
if query_parts.get('struct'):
|
|
189
|
+
lang_queries[lang]['struct'] = query_parts['struct']
|
|
190
|
+
if query_parts.get('imports'):
|
|
191
|
+
lang_queries[lang]['imports'] = query_parts['imports']
|
|
192
|
+
else:
|
|
193
|
+
lang_queries[lang] = query_parts
|
|
194
|
+
|
|
195
|
+
# 然后加载 --language-config 文件(如果提供),优先级最高
|
|
196
|
+
if explicit_config_path:
|
|
197
|
+
config_path = Path(explicit_config_path)
|
|
198
|
+
resolved_path = config_path if config_path.is_absolute() else (repo_path / config_path)
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
config_data = json.loads(resolved_path.read_text(encoding='utf-8'))
|
|
202
|
+
except FileNotFoundError:
|
|
203
|
+
warnings.append(f'language config not found: {resolved_path}')
|
|
204
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
205
|
+
except json.JSONDecodeError as exc:
|
|
206
|
+
warnings.append(f'language config parse error in {resolved_path}: {exc}')
|
|
207
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
208
|
+
except OSError as exc:
|
|
209
|
+
warnings.append(f'language config read error in {resolved_path}: {exc}')
|
|
210
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
211
|
+
|
|
212
|
+
if not isinstance(config_data, dict):
|
|
213
|
+
warnings.append(f'language config ignored because root value is not an object: {resolved_path}')
|
|
214
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
215
|
+
|
|
216
|
+
loaded_config_paths.append(str(resolved_path))
|
|
217
|
+
|
|
218
|
+
# 从 --language-config 加载扩展名映射
|
|
219
|
+
extensions = config_data.get('extensions', {})
|
|
220
|
+
if isinstance(extensions, dict):
|
|
221
|
+
for raw_ext, raw_lang in extensions.items():
|
|
222
|
+
if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
|
|
223
|
+
try:
|
|
224
|
+
ext = _normalize_extension(raw_ext)
|
|
225
|
+
lang = raw_lang.strip().lower()
|
|
226
|
+
extension_map[ext] = lang
|
|
227
|
+
known_unsupported_extensions.pop(ext, None)
|
|
228
|
+
except ValueError:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
# 从 --language-config 加载查询
|
|
232
|
+
queries = config_data.get('queries', {})
|
|
233
|
+
if isinstance(queries, dict):
|
|
234
|
+
for raw_lang, raw_query_parts in queries.items():
|
|
235
|
+
if isinstance(raw_lang, str) and raw_lang.strip() and isinstance(raw_query_parts, dict):
|
|
236
|
+
lang = raw_lang.strip().lower()
|
|
237
|
+
struct_query = raw_query_parts.get('struct', '')
|
|
238
|
+
imports_query = raw_query_parts.get('imports', '')
|
|
239
|
+
if isinstance(struct_query, str) and isinstance(imports_query, str):
|
|
240
|
+
lang_queries[lang] = {
|
|
241
|
+
'struct': struct_query,
|
|
242
|
+
'imports': imports_query,
|
|
243
|
+
}
|
|
244
|
+
custom_query_languages[lang] = str(resolved_path)
|
|
245
|
+
|
|
246
|
+
# 从 --language-config 加载不支持的扩展名
|
|
247
|
+
unsupported_extensions = config_data.get('unsupported_extensions', {})
|
|
248
|
+
if isinstance(unsupported_extensions, dict):
|
|
249
|
+
for raw_ext, raw_lang in unsupported_extensions.items():
|
|
250
|
+
if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
|
|
251
|
+
try:
|
|
252
|
+
ext = _normalize_extension(raw_ext)
|
|
253
|
+
lang = raw_lang.strip().lower()
|
|
254
|
+
known_unsupported_extensions[ext] = lang
|
|
255
|
+
extension_map.pop(ext, None)
|
|
256
|
+
except ValueError:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _load_languages(
|
|
264
|
+
extension_map: dict[str, str],
|
|
265
|
+
lang_queries: dict[str, dict[str, str]],
|
|
266
|
+
requested: Optional[list[str]] = None,
|
|
267
|
+
) -> dict[str, Any]:
|
|
268
|
+
"""
|
|
269
|
+
加载 Tree-sitter 语言对象,返回 {lang_name: Language} 字典。
|
|
270
|
+
优先使用 tree-sitter-language-pack(160+ 语言),不可用时回退单语言包。
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
from tree_sitter_language_pack import get_language as _get
|
|
274
|
+
|
|
275
|
+
def get_language(name: str) -> Any:
|
|
276
|
+
return _get(cast(Any, name))
|
|
277
|
+
except ImportError:
|
|
278
|
+
# 仅 Python 单语言包 fallback
|
|
279
|
+
try:
|
|
280
|
+
import tree_sitter_python
|
|
281
|
+
from tree_sitter import Language
|
|
282
|
+
|
|
283
|
+
def get_language(name: str) -> Any:
|
|
284
|
+
if name == 'python':
|
|
285
|
+
return Language(tree_sitter_python.language())
|
|
286
|
+
raise LookupError(name)
|
|
287
|
+
except ImportError:
|
|
288
|
+
sys.stderr.write(
|
|
289
|
+
"[ERROR] 缺少 tree-sitter 语言支持。\n"
|
|
290
|
+
"请运行: pip install tree-sitter-language-pack\n"
|
|
291
|
+
)
|
|
292
|
+
sys.exit(1)
|
|
293
|
+
|
|
294
|
+
targets = requested if requested else sorted(set(extension_map.values()) | set(lang_queries.keys()))
|
|
295
|
+
languages: dict[str, Any] = {}
|
|
296
|
+
for name in targets:
|
|
297
|
+
try:
|
|
298
|
+
languages[name] = get_language(name)
|
|
299
|
+
except (LookupError, KeyError):
|
|
300
|
+
# 该语言包未安装,优雅跳过
|
|
301
|
+
pass
|
|
302
|
+
|
|
303
|
+
if not languages:
|
|
304
|
+
sys.stderr.write("[ERROR] 没有可用的语言解析器,请安装 tree-sitter-language-pack\n")
|
|
305
|
+
sys.exit(1)
|
|
306
|
+
return languages
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _file_module_id(repo_path: Path, file_path: Path) -> str:
|
|
310
|
+
"""将文件路径转换为点分隔的模块 ID。
|
|
311
|
+
例:src/nexus/api/routes.py → src.nexus.api.routes
|
|
312
|
+
src/core/parser.hpp → src.core.parser
|
|
313
|
+
"""
|
|
314
|
+
rel = file_path.relative_to(repo_path)
|
|
315
|
+
parts = list(rel.parts)
|
|
316
|
+
stem = Path(parts[-1]).stem # 去掉扩展名
|
|
317
|
+
parts[-1] = stem
|
|
318
|
+
# Python 特殊处理:__init__ 合并到包路径
|
|
319
|
+
if stem == '__init__' and len(parts) > 1:
|
|
320
|
+
parts = parts[:-1]
|
|
321
|
+
return '.'.join(parts) if parts else stem
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def extract_file(
|
|
327
|
+
repo_path: Path,
|
|
328
|
+
file_path: Path,
|
|
329
|
+
lang_name: str,
|
|
330
|
+
language: Any,
|
|
331
|
+
lang_queries: dict[str, dict[str, str]],
|
|
332
|
+
) -> tuple[list[dict], list[dict], list[str]]:
|
|
333
|
+
"""解析单个源文件,返回 (nodes, edges, errors)"""
|
|
334
|
+
from tree_sitter import Parser as TSParser, Query, QueryCursor
|
|
335
|
+
|
|
336
|
+
nodes: list[dict] = []
|
|
337
|
+
edges: list[dict] = []
|
|
338
|
+
errors: list[str] = []
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
source = file_path.read_bytes()
|
|
342
|
+
except OSError as e:
|
|
343
|
+
errors.append(f"{file_path}: read error: {e}")
|
|
344
|
+
return nodes, edges, errors
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
parser = TSParser(language)
|
|
348
|
+
tree = parser.parse(source)
|
|
349
|
+
except Exception as e:
|
|
350
|
+
errors.append(f"{file_path}: parse error: {e}")
|
|
351
|
+
return nodes, edges, errors
|
|
352
|
+
|
|
353
|
+
rel_path = str(file_path.relative_to(repo_path)).replace('\\', '/')
|
|
354
|
+
module_id = _file_module_id(repo_path, file_path)
|
|
355
|
+
line_count = source.count(b'\n') + 1
|
|
356
|
+
|
|
357
|
+
# Module 节点(文件级)
|
|
358
|
+
nodes.append({
|
|
359
|
+
'id': module_id,
|
|
360
|
+
'type': 'Module',
|
|
361
|
+
'label': module_id.split('.')[-1],
|
|
362
|
+
'path': rel_path,
|
|
363
|
+
'lines': line_count,
|
|
364
|
+
'lang': lang_name,
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
queries = lang_queries.get(lang_name, {})
|
|
368
|
+
|
|
369
|
+
# ── 结构:类 / 函数 ──────────────────────────────────────────
|
|
370
|
+
struct_q_text = queries.get('struct', '')
|
|
371
|
+
if struct_q_text.strip():
|
|
372
|
+
try:
|
|
373
|
+
struct_query = Query(language, struct_q_text)
|
|
374
|
+
class_ranges: list[tuple[int, int, str]] = []
|
|
375
|
+
|
|
376
|
+
for pattern_idx, captures in QueryCursor(struct_query).matches(tree.root_node):
|
|
377
|
+
capture_names = list(captures.keys())
|
|
378
|
+
is_class = any('class' in k for k in capture_names)
|
|
379
|
+
def_key = 'class.def' if is_class else 'func.def'
|
|
380
|
+
name_key = 'class.name' if is_class else 'func.name'
|
|
381
|
+
|
|
382
|
+
def_nodes = captures.get(def_key, [])
|
|
383
|
+
name_nodes = captures.get(name_key, [])
|
|
384
|
+
if not def_nodes or not name_nodes:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
def_node = def_nodes[0]
|
|
388
|
+
name_node = name_nodes[0]
|
|
389
|
+
name = source[name_node.start_byte:name_node.end_byte].decode('utf-8', 'replace')
|
|
390
|
+
|
|
391
|
+
if is_class:
|
|
392
|
+
node_id = f"{module_id}.{name}"
|
|
393
|
+
nodes.append({
|
|
394
|
+
'id': node_id,
|
|
395
|
+
'type': 'Class',
|
|
396
|
+
'label': name,
|
|
397
|
+
'path': rel_path,
|
|
398
|
+
'parent': module_id,
|
|
399
|
+
'start_line': def_node.start_point[0] + 1,
|
|
400
|
+
'end_line': def_node.end_point[0] + 1,
|
|
401
|
+
})
|
|
402
|
+
class_ranges.append((def_node.start_byte, def_node.end_byte, node_id))
|
|
403
|
+
edges.append({'source': module_id, 'target': node_id, 'type': 'contains'})
|
|
404
|
+
else:
|
|
405
|
+
parent_id = module_id
|
|
406
|
+
for cls_start, cls_end, cls_id in class_ranges:
|
|
407
|
+
if cls_start <= def_node.start_byte and def_node.end_byte <= cls_end:
|
|
408
|
+
parent_id = cls_id
|
|
409
|
+
break
|
|
410
|
+
node_id = f"{parent_id}.{name}"
|
|
411
|
+
nodes.append({
|
|
412
|
+
'id': node_id,
|
|
413
|
+
'type': 'Function',
|
|
414
|
+
'label': name,
|
|
415
|
+
'path': rel_path,
|
|
416
|
+
'parent': parent_id,
|
|
417
|
+
'start_line': def_node.start_point[0] + 1,
|
|
418
|
+
'end_line': def_node.end_point[0] + 1,
|
|
419
|
+
})
|
|
420
|
+
edges.append({'source': parent_id, 'target': node_id, 'type': 'contains'})
|
|
421
|
+
|
|
422
|
+
except Exception as e:
|
|
423
|
+
errors.append(f"{file_path}: struct query error: {e}")
|
|
424
|
+
|
|
425
|
+
# ── 导入:imports 边 ─────────────────────────────────────────
|
|
426
|
+
import_q_text = queries.get('imports', '')
|
|
427
|
+
if import_q_text.strip():
|
|
428
|
+
try:
|
|
429
|
+
import_query = Query(language, import_q_text)
|
|
430
|
+
for _pattern_idx, captures in QueryCursor(import_query).matches(tree.root_node):
|
|
431
|
+
for mod_node in captures.get('mod', []):
|
|
432
|
+
target = source[mod_node.start_byte:mod_node.end_byte].decode('utf-8', 'replace').strip('"\'<> ')
|
|
433
|
+
if target:
|
|
434
|
+
edges.append({'source': module_id, 'target': target, 'type': 'imports'})
|
|
435
|
+
except Exception as e:
|
|
436
|
+
errors.append(f"{file_path}: import query error: {e}")
|
|
437
|
+
|
|
438
|
+
return nodes, edges, errors
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def collect_source_files(
|
|
442
|
+
repo_path: Path,
|
|
443
|
+
languages: dict[str, Any],
|
|
444
|
+
extension_map: dict[str, str],
|
|
445
|
+
known_unsupported_extensions: dict[str, str],
|
|
446
|
+
) -> tuple[list[tuple[Path, str]], dict[str, int], dict[str, int], dict[str, int]]:
|
|
447
|
+
"""收集 repo 中所有已知语言的源文件,跳过排除目录。
|
|
448
|
+
|
|
449
|
+
返回:
|
|
450
|
+
- [(file_path, lang_name)]
|
|
451
|
+
- supported_file_counts: {lang_name: file_count}
|
|
452
|
+
- known_unsupported_file_counts: {lang_name: file_count}
|
|
453
|
+
- configured_but_unavailable_file_counts: {lang_name: file_count}
|
|
454
|
+
"""
|
|
455
|
+
files: list[tuple[Path, str]] = []
|
|
456
|
+
supported_file_counts: dict[str, int] = {}
|
|
457
|
+
known_unsupported_file_counts: dict[str, int] = {}
|
|
458
|
+
configured_but_unavailable_file_counts: dict[str, int] = {}
|
|
459
|
+
|
|
460
|
+
for p in repo_path.rglob('*'):
|
|
461
|
+
if not p.is_file():
|
|
462
|
+
continue
|
|
463
|
+
if _should_skip_path(repo_path, p):
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
suffix = p.suffix.lower()
|
|
467
|
+
lang = extension_map.get(suffix)
|
|
468
|
+
if lang:
|
|
469
|
+
if lang in languages:
|
|
470
|
+
files.append((p, lang))
|
|
471
|
+
supported_file_counts[lang] = supported_file_counts.get(lang, 0) + 1
|
|
472
|
+
else:
|
|
473
|
+
configured_but_unavailable_file_counts[lang] = (
|
|
474
|
+
configured_but_unavailable_file_counts.get(lang, 0) + 1
|
|
475
|
+
)
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
unsupported_lang = known_unsupported_extensions.get(suffix)
|
|
479
|
+
if unsupported_lang:
|
|
480
|
+
known_unsupported_file_counts[unsupported_lang] = (
|
|
481
|
+
known_unsupported_file_counts.get(unsupported_lang, 0) + 1
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
return (
|
|
485
|
+
sorted(files, key=lambda x: x[0]),
|
|
486
|
+
supported_file_counts,
|
|
487
|
+
known_unsupported_file_counts,
|
|
488
|
+
configured_but_unavailable_file_counts,
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def apply_max_nodes(
|
|
494
|
+
nodes: list[dict],
|
|
495
|
+
edges: list[dict],
|
|
496
|
+
max_nodes: int,
|
|
497
|
+
) -> tuple[list[dict], list[dict], bool, int]:
|
|
498
|
+
"""
|
|
499
|
+
节点数超出 max_nodes 时,优先保留 Module/Class,截断 Function。
|
|
500
|
+
返回 (filtered_nodes, filtered_edges, truncated, truncated_count)
|
|
501
|
+
"""
|
|
502
|
+
if len(nodes) <= max_nodes:
|
|
503
|
+
return nodes, edges, False, 0
|
|
504
|
+
|
|
505
|
+
priority_nodes = [n for n in nodes if n['type'] in ('Module', 'Class')]
|
|
506
|
+
func_nodes = [n for n in nodes if n['type'] == 'Function']
|
|
507
|
+
|
|
508
|
+
remaining_slots = max_nodes - len(priority_nodes)
|
|
509
|
+
if remaining_slots < 0:
|
|
510
|
+
kept_nodes = priority_nodes
|
|
511
|
+
truncated_count = len(func_nodes)
|
|
512
|
+
else:
|
|
513
|
+
kept_funcs = func_nodes[:remaining_slots]
|
|
514
|
+
kept_nodes = priority_nodes + kept_funcs
|
|
515
|
+
truncated_count = len(func_nodes) - len(kept_funcs)
|
|
516
|
+
|
|
517
|
+
kept_ids = {n['id'] for n in kept_nodes}
|
|
518
|
+
kept_edges = [
|
|
519
|
+
e for e in edges
|
|
520
|
+
if e['source'] in kept_ids or e['type'] == 'imports'
|
|
521
|
+
]
|
|
522
|
+
return kept_nodes, kept_edges, True, truncated_count
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def main() -> None:
|
|
526
|
+
parser = argparse.ArgumentParser(
|
|
527
|
+
description='Extract AST structure from a multi-language repository'
|
|
528
|
+
)
|
|
529
|
+
parser.add_argument('repo_path', help='Target repository path')
|
|
530
|
+
parser.add_argument('--max-nodes', type=int, default=500,
|
|
531
|
+
help='Max nodes in output (default: 500). Truncates Function nodes first.')
|
|
532
|
+
parser.add_argument(
|
|
533
|
+
'--add-extension',
|
|
534
|
+
action='append',
|
|
535
|
+
dest='add_extensions',
|
|
536
|
+
metavar='EXT=LANG',
|
|
537
|
+
help='Add extension mapping, e.g., .templ=templ. Can be used multiple times.',
|
|
538
|
+
)
|
|
539
|
+
parser.add_argument(
|
|
540
|
+
'--add-query',
|
|
541
|
+
action='append',
|
|
542
|
+
dest='add_queries',
|
|
543
|
+
nargs=3,
|
|
544
|
+
metavar=('LANG', 'TYPE', 'QUERY'),
|
|
545
|
+
help='Add/override a query for a language. TYPE is "struct" or "imports". Can be used multiple times.',
|
|
546
|
+
)
|
|
547
|
+
parser.add_argument(
|
|
548
|
+
'--language-config',
|
|
549
|
+
help='Optional JSON file that adds or overrides extension mappings and tree-sitter queries. Useful for complex configurations.',
|
|
550
|
+
)
|
|
551
|
+
parser.add_argument(
|
|
552
|
+
'--file-tree-out',
|
|
553
|
+
help='Optional output path for a filtered file tree (e.g. .nexus-map/raw/file_tree.txt). Uses the same exclude rules as AST collection.',
|
|
554
|
+
)
|
|
555
|
+
args = parser.parse_args()
|
|
556
|
+
|
|
557
|
+
repo_path = Path(args.repo_path).resolve()
|
|
558
|
+
if not repo_path.exists():
|
|
559
|
+
sys.stderr.write(f"[ERROR] repo_path not found: {repo_path}\n")
|
|
560
|
+
sys.exit(1)
|
|
561
|
+
if not (repo_path / '.git').exists():
|
|
562
|
+
sys.stderr.write(f"[WARNING] .git not found in {repo_path}, may not be a git repo\n")
|
|
563
|
+
|
|
564
|
+
if args.file_tree_out:
|
|
565
|
+
file_tree_path = Path(args.file_tree_out)
|
|
566
|
+
if not file_tree_path.is_absolute():
|
|
567
|
+
file_tree_path = repo_path / file_tree_path
|
|
568
|
+
write_filtered_file_tree(repo_path, file_tree_path.resolve())
|
|
569
|
+
|
|
570
|
+
# 处理 CLI 自定义参数
|
|
571
|
+
cli_ext_override, cli_query_override, cli_warnings, cli_custom_query_languages = _apply_cli_customizations(
|
|
572
|
+
args.add_extensions,
|
|
573
|
+
args.add_queries,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# 加载和合并配置
|
|
577
|
+
(
|
|
578
|
+
extension_map,
|
|
579
|
+
lang_queries,
|
|
580
|
+
known_unsupported_extensions,
|
|
581
|
+
config_warnings,
|
|
582
|
+
loaded_config_paths,
|
|
583
|
+
custom_query_languages,
|
|
584
|
+
) = _load_language_customizations(
|
|
585
|
+
repo_path,
|
|
586
|
+
args.language_config,
|
|
587
|
+
cli_ext_override,
|
|
588
|
+
cli_query_override,
|
|
589
|
+
cli_warnings,
|
|
590
|
+
cli_custom_query_languages,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
languages = _load_languages(extension_map, lang_queries)
|
|
594
|
+
(
|
|
595
|
+
source_files,
|
|
596
|
+
supported_file_counts,
|
|
597
|
+
known_unsupported_file_counts,
|
|
598
|
+
configured_but_unavailable_file_counts,
|
|
599
|
+
) = collect_source_files(
|
|
600
|
+
repo_path,
|
|
601
|
+
languages,
|
|
602
|
+
extension_map,
|
|
603
|
+
known_unsupported_extensions,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if not source_files:
|
|
607
|
+
sys.stderr.write(f"[WARNING] No supported source files found in {repo_path}\n")
|
|
608
|
+
|
|
609
|
+
all_nodes: list[dict] = []
|
|
610
|
+
all_edges: list[dict] = []
|
|
611
|
+
all_errors: list[str] = []
|
|
612
|
+
detected_langs: set[str] = set()
|
|
613
|
+
total_lines = 0
|
|
614
|
+
warnings: list[str] = list(config_warnings)
|
|
615
|
+
module_only_file_counts: dict[str, int] = {}
|
|
616
|
+
languages_with_structural_queries = sorted(
|
|
617
|
+
lang for lang, query_parts in lang_queries.items()
|
|
618
|
+
if query_parts.get('struct', '').strip()
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
for file_path, lang_name in source_files:
|
|
622
|
+
nodes, edges, errors = extract_file(
|
|
623
|
+
repo_path,
|
|
624
|
+
file_path,
|
|
625
|
+
lang_name,
|
|
626
|
+
languages[lang_name],
|
|
627
|
+
lang_queries,
|
|
628
|
+
)
|
|
629
|
+
all_nodes.extend(nodes)
|
|
630
|
+
all_edges.extend(edges)
|
|
631
|
+
all_errors.extend(errors)
|
|
632
|
+
if lang_name not in languages_with_structural_queries:
|
|
633
|
+
module_only_file_counts[lang_name] = module_only_file_counts.get(lang_name, 0) + 1
|
|
634
|
+
if nodes:
|
|
635
|
+
detected_langs.add(lang_name)
|
|
636
|
+
total_lines += nodes[0].get('lines', 0)
|
|
637
|
+
|
|
638
|
+
final_nodes, final_edges, truncated, truncated_count = apply_max_nodes(
|
|
639
|
+
all_nodes, all_edges, args.max_nodes
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
if known_unsupported_file_counts:
|
|
643
|
+
unsupported_summary = ', '.join(
|
|
644
|
+
f"{lang} ({count} files)"
|
|
645
|
+
for lang, count in sorted(known_unsupported_file_counts.items())
|
|
646
|
+
)
|
|
647
|
+
warnings.append(
|
|
648
|
+
"known unsupported languages present; downstream outputs must mark inferred sections explicitly: "
|
|
649
|
+
f"{unsupported_summary}"
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
if configured_but_unavailable_file_counts:
|
|
653
|
+
unavailable_summary = ', '.join(
|
|
654
|
+
f"{lang} ({count} files)"
|
|
655
|
+
for lang, count in sorted(configured_but_unavailable_file_counts.items())
|
|
656
|
+
)
|
|
657
|
+
warnings.append(
|
|
658
|
+
'some configured languages were detected in source files but no parser could be loaded: '
|
|
659
|
+
f'{unavailable_summary}'
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
if module_only_file_counts:
|
|
663
|
+
module_only_summary = ', '.join(
|
|
664
|
+
f"{lang} ({count} files)"
|
|
665
|
+
for lang, count in sorted(module_only_file_counts.items())
|
|
666
|
+
)
|
|
667
|
+
warnings.append(
|
|
668
|
+
"some languages were parsed with module-only coverage because no structural query template is bundled: "
|
|
669
|
+
f"{module_only_summary}"
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
if loaded_config_paths:
|
|
673
|
+
config_summary = ', '.join(loaded_config_paths)
|
|
674
|
+
warnings.append(f'custom language configuration loaded: {config_summary}')
|
|
675
|
+
|
|
676
|
+
result = {
|
|
677
|
+
'languages': sorted(detected_langs),
|
|
678
|
+
'stats': {
|
|
679
|
+
'total_files': len(source_files),
|
|
680
|
+
'total_lines': total_lines,
|
|
681
|
+
'parse_errors': len(all_errors),
|
|
682
|
+
'truncated': truncated,
|
|
683
|
+
'truncated_nodes': truncated_count,
|
|
684
|
+
'supported_file_counts': supported_file_counts,
|
|
685
|
+
'languages_with_structural_queries': languages_with_structural_queries,
|
|
686
|
+
'languages_with_custom_queries': sorted(custom_query_languages.keys()),
|
|
687
|
+
'module_only_file_counts': module_only_file_counts,
|
|
688
|
+
'known_unsupported_file_counts': known_unsupported_file_counts,
|
|
689
|
+
'configured_but_unavailable_file_counts': configured_but_unavailable_file_counts,
|
|
690
|
+
'custom_language_config_paths': loaded_config_paths,
|
|
691
|
+
},
|
|
692
|
+
'nodes': final_nodes,
|
|
693
|
+
'edges': final_edges,
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
if all_errors:
|
|
697
|
+
result['_errors'] = all_errors[:20]
|
|
698
|
+
if warnings:
|
|
699
|
+
result['warnings'] = warnings
|
|
700
|
+
|
|
701
|
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
if __name__ == '__main__':
|
|
705
|
+
main()
|
|
706
|
+
|