@haaaiawd/anws 2.2.6 → 2.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/cli.js +52 -22
- package/lib/diff.js +5 -2
- package/lib/init.js +217 -96
- package/lib/install-state.js +18 -3
- package/lib/manifest.js +510 -213
- package/lib/prompt.js +68 -0
- package/lib/resources/index.js +36 -2
- package/lib/update.js +12 -6
- package/package.json +48 -47
- package/templates/.agents/skills/anws-system/SKILL.md +108 -108
- package/templates/.agents/skills/code-reviewer/SKILL.md +170 -103
- package/templates/.agents/skills/concept-modeler/SKILL.md +230 -179
- package/templates/.agents/skills/craft-authoring/SKILL.md +112 -49
- package/templates/.agents/skills/craft-authoring/references/BUNDLE_POLICY.md +61 -0
- package/templates/.agents/skills/craft-authoring/references/PROMPT_QUALITY_RUBRIC.md +99 -0
- package/templates/.agents/skills/craft-authoring/references/SCORECARD_TEMPLATE.md +64 -0
- package/templates/.agents/skills/design-reviewer/SKILL.md +265 -190
- package/templates/.agents/skills/e2e-testing-guide/SKILL.md +246 -135
- package/templates/.agents/skills/nexus-mapper/SKILL.md +321 -321
- package/templates/.agents/skills/output-contract/SKILL.md +37 -0
- package/templates/.agents/skills/report-template/SKILL.md +92 -92
- package/templates/.agents/skills/sequential-thinking/SKILL.md +222 -225
- package/templates/.agents/skills/spec-writer/SKILL.md +75 -30
- package/templates/.agents/skills/system-architect/SKILL.md +538 -678
- package/templates/.agents/skills/system-designer/SKILL.md +601 -601
- package/templates/.agents/skills/task-planner/SKILL.md +1 -2
- package/templates/.agents/skills/task-reviewer/SKILL.md +428 -388
- package/templates/.agents/skills/tech-evaluator/SKILL.md +252 -144
- package/templates/.agents/workflows/blueprint.md +166 -43
- package/templates/.agents/workflows/challenge.md +331 -497
- package/templates/.agents/workflows/change.md +182 -339
- package/templates/.agents/workflows/craft.md +159 -236
- package/templates/.agents/workflows/design-system.md +202 -674
- package/templates/.agents/workflows/explore.md +187 -399
- package/templates/.agents/workflows/forge.md +650 -550
- package/templates/.agents/workflows/genesis.md +439 -351
- package/templates/.agents/workflows/probe.md +219 -241
- package/templates/.agents/workflows/quickstart.md +302 -123
- package/templates/.agents/workflows/upgrade.md +145 -182
- package/templates_en/.agents/skills/anws-system/SKILL.md +108 -0
- package/templates_en/.agents/skills/code-reviewer/SKILL.md +170 -0
- package/templates_en/.agents/skills/concept-modeler/SKILL.md +230 -0
- package/templates_en/.agents/skills/craft-authoring/SKILL.md +179 -0
- package/templates_en/.agents/skills/craft-authoring/references/BUNDLE_POLICY.md +60 -0
- package/templates_en/.agents/skills/craft-authoring/references/PROMPT_QUALITY_RUBRIC.md +92 -0
- package/templates_en/.agents/skills/craft-authoring/references/SCORECARD_TEMPLATE.md +52 -0
- package/templates_en/.agents/skills/design-reviewer/SKILL.md +265 -0
- package/templates_en/.agents/skills/e2e-testing-guide/SKILL.md +246 -0
- package/templates_en/.agents/skills/nexus-mapper/SKILL.md +306 -0
- package/templates_en/.agents/skills/nexus-mapper/references/language-customization.md +167 -0
- package/templates_en/.agents/skills/nexus-mapper/references/output-schema.md +311 -0
- package/templates_en/.agents/skills/nexus-mapper/references/probe-protocol.md +246 -0
- package/templates_en/.agents/skills/nexus-mapper/scripts/extract_ast.py +706 -0
- package/templates_en/.agents/skills/nexus-mapper/scripts/git_detective.py +194 -0
- package/templates_en/.agents/skills/nexus-mapper/scripts/languages.json +127 -0
- package/templates_en/.agents/skills/nexus-mapper/scripts/query_graph.py +556 -0
- package/templates_en/.agents/skills/nexus-mapper/scripts/requirements.txt +6 -0
- package/templates_en/.agents/skills/nexus-query/SKILL.md +114 -0
- package/templates_en/.agents/skills/nexus-query/scripts/extract_ast.py +706 -0
- package/templates_en/.agents/skills/nexus-query/scripts/git_detective.py +194 -0
- package/templates_en/.agents/skills/nexus-query/scripts/languages.json +127 -0
- package/templates_en/.agents/skills/nexus-query/scripts/query_graph.py +556 -0
- package/templates_en/.agents/skills/nexus-query/scripts/requirements.txt +6 -0
- package/templates_en/.agents/skills/output-contract/SKILL.md +37 -0
- package/templates_en/.agents/skills/report-template/SKILL.md +85 -0
- package/templates_en/.agents/skills/report-template/references/REPORT_TEMPLATE.md +100 -0
- package/templates_en/.agents/skills/runtime-inspector/SKILL.md +101 -0
- package/templates_en/.agents/skills/sequential-thinking/SKILL.md +214 -0
- package/templates_en/.agents/skills/spec-writer/SKILL.md +153 -0
- package/templates_en/.agents/skills/spec-writer/references/prd_template.md +177 -0
- package/templates_en/.agents/skills/system-architect/SKILL.md +538 -0
- package/templates_en/.agents/skills/system-architect/references/rfc_template.md +59 -0
- package/templates_en/.agents/skills/system-designer/SKILL.md +534 -0
- package/templates_en/.agents/skills/system-designer/references/system-design-detail-template.md +187 -0
- package/templates_en/.agents/skills/system-designer/references/system-design-template.md +605 -0
- package/templates_en/.agents/skills/task-planner/SKILL.md +251 -0
- package/templates_en/.agents/skills/task-planner/references/TASK_TEMPLATE_05A.md +109 -0
- package/templates_en/.agents/skills/task-planner/references/TASK_TEMPLATE_05B.md +176 -0
- package/templates_en/.agents/skills/task-reviewer/SKILL.md +428 -0
- package/templates_en/.agents/skills/tech-evaluator/SKILL.md +252 -0
- package/templates_en/.agents/skills/tech-evaluator/references/ADR_TEMPLATE.md +78 -0
- package/templates_en/.agents/workflows/blueprint.md +200 -0
- package/templates_en/.agents/workflows/challenge.md +331 -0
- package/templates_en/.agents/workflows/change.md +182 -0
- package/templates_en/.agents/workflows/craft.md +159 -0
- package/templates_en/.agents/workflows/design-system.md +202 -0
- package/templates_en/.agents/workflows/explore.md +187 -0
- package/templates_en/.agents/workflows/forge.md +651 -0
- package/templates_en/.agents/workflows/genesis.md +439 -0
- package/templates_en/.agents/workflows/probe.md +219 -0
- package/templates_en/.agents/workflows/quickstart.md +303 -0
- package/templates_en/.agents/workflows/upgrade.md +145 -0
- package/templates_en/AGENTS.md +149 -0
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
extract_ast.py — Multi-language code repository AST structure extractor
|
|
4
|
+
|
|
5
|
+
Purpose: Extract module/class/function structure from code repository based on Tree-sitter, output JSON to stdout
|
|
6
|
+
Supports: Python, JavaScript, TypeScript, TSX, Java, Go, Rust, C#, C/C++, Kotlin, Ruby, Swift, PHP, Lua ...
|
|
7
|
+
Usage: python extract_ast.py <repo_path> [--max-nodes 500]
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import sys
|
|
11
|
+
import json
|
|
12
|
+
import argparse
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any, Optional, cast
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EXCLUDE_DIRS = {'.git', '__pycache__', '.venv', 'venv', 'node_modules',
|
|
18
|
+
'dist', 'build', '.mypy_cache', '.pytest_cache', 'site-packages',
|
|
19
|
+
'.nexus-map', '.tox', '.eggs', 'target', 'cmake-build-debug',
|
|
20
|
+
'.vs', 'out', '_build', 'vendor', '.ruff_cache', '.godot',
|
|
21
|
+
'.idea', '.vscode', '.nox'}
|
|
22
|
+
|
|
23
|
+
EXCLUDE_FILE_SUFFIXES = ('.import', '.vulkan.cache')
|
|
24
|
+
|
|
25
|
+
# ── Built-in language config: load from languages.json in same directory ────────────────────
|
|
26
|
+
_LANGUAGES_JSON = Path(__file__).parent / 'languages.json'
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _load_builtin_languages() -> tuple[dict[str, str], dict[str, dict[str, str]], dict[str, str]]:
|
|
30
|
+
"""Load built-in extension mappings, Tree-sitter queries and known unsupported extensions from languages.json."""
|
|
31
|
+
try:
|
|
32
|
+
data = json.loads(_LANGUAGES_JSON.read_text(encoding='utf-8'))
|
|
33
|
+
except (FileNotFoundError, json.JSONDecodeError) as exc:
|
|
34
|
+
sys.stderr.write(f"[ERROR] Failed to load {_LANGUAGES_JSON}: {exc}\n")
|
|
35
|
+
sys.exit(1)
|
|
36
|
+
|
|
37
|
+
extensions: dict[str, str] = data.get('extensions', {})
|
|
38
|
+
raw_queries: dict[str, dict[str, str]] = data.get('queries', {})
|
|
39
|
+
unsupported: dict[str, str] = data.get('unsupported_extensions', {})
|
|
40
|
+
|
|
41
|
+
# Normalize queries: ensure each language has struct and imports keys
|
|
42
|
+
queries: dict[str, dict[str, str]] = {}
|
|
43
|
+
for lang, parts in raw_queries.items():
|
|
44
|
+
queries[lang] = {
|
|
45
|
+
'struct': parts.get('struct', ''),
|
|
46
|
+
'imports': parts.get('imports', ''),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
return extensions, queries, unsupported
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
BUILTIN_EXTENSION_MAP, BUILTIN_LANG_QUERIES, BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS = (
|
|
53
|
+
_load_builtin_languages()
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _should_skip_path(repo_path: Path, path: Path) -> bool:
|
|
58
|
+
rel_path = path.relative_to(repo_path)
|
|
59
|
+
if any(part in EXCLUDE_DIRS for part in rel_path.parts):
|
|
60
|
+
return True
|
|
61
|
+
if path.is_file() and any(path.name.endswith(suffix) for suffix in EXCLUDE_FILE_SUFFIXES):
|
|
62
|
+
return True
|
|
63
|
+
return False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def write_filtered_file_tree(repo_path: Path, output_path: Path) -> None:
|
|
67
|
+
lines: list[str] = []
|
|
68
|
+
for path in sorted(repo_path.rglob('*')):
|
|
69
|
+
if _should_skip_path(repo_path, path):
|
|
70
|
+
continue
|
|
71
|
+
rel_path = path.relative_to(repo_path).as_posix()
|
|
72
|
+
suffix = '/' if path.is_dir() else ''
|
|
73
|
+
lines.append(rel_path + suffix)
|
|
74
|
+
|
|
75
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
output_path.write_text('\n'.join(lines) + ('\n' if lines else ''), encoding='utf-8')
|
|
77
|
+
|
|
78
|
+
def _normalize_extension(ext: str) -> str:
|
|
79
|
+
normalized = ext.strip().lower()
|
|
80
|
+
if not normalized:
|
|
81
|
+
raise ValueError('extension must not be empty')
|
|
82
|
+
if not normalized.startswith('.'):
|
|
83
|
+
normalized = f'.{normalized}'
|
|
84
|
+
return normalized
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _copy_lang_queries(source: dict[str, dict[str, str]]) -> dict[str, dict[str, str]]:
|
|
88
|
+
return {
|
|
89
|
+
lang: {
|
|
90
|
+
'struct': query_parts.get('struct', ''),
|
|
91
|
+
'imports': query_parts.get('imports', ''),
|
|
92
|
+
}
|
|
93
|
+
for lang, query_parts in source.items()
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def _apply_cli_customizations(
|
|
98
|
+
cli_extensions: list[str] | None,
|
|
99
|
+
cli_queries: list[list[str]] | None,
|
|
100
|
+
) -> tuple[
|
|
101
|
+
dict[str, str],
|
|
102
|
+
dict[str, dict[str, str]],
|
|
103
|
+
list[str],
|
|
104
|
+
dict[str, str],
|
|
105
|
+
]:
|
|
106
|
+
"""
|
|
107
|
+
Apply language customizations from command line arguments (--add-extension and --add-query).
|
|
108
|
+
Returns (extension_override, query_override, warnings)
|
|
109
|
+
"""
|
|
110
|
+
extension_override: dict[str, str] = {}
|
|
111
|
+
query_override: dict[str, dict[str, str]] = {}
|
|
112
|
+
warnings: list[str] = []
|
|
113
|
+
custom_query_languages: dict[str, str] = {}
|
|
114
|
+
|
|
115
|
+
if cli_extensions:
|
|
116
|
+
for item in cli_extensions:
|
|
117
|
+
if '=' not in item:
|
|
118
|
+
warnings.append(f'ignored invalid extension mapping {item!r}, expected EXT=LANG')
|
|
119
|
+
continue
|
|
120
|
+
ext_part, lang_part = item.split('=', 1)
|
|
121
|
+
try:
|
|
122
|
+
ext = _normalize_extension(ext_part)
|
|
123
|
+
lang = lang_part.strip().lower()
|
|
124
|
+
if not lang:
|
|
125
|
+
warnings.append(f'ignored empty language name for extension {ext_part!r}')
|
|
126
|
+
continue
|
|
127
|
+
extension_override[ext] = lang
|
|
128
|
+
except ValueError as e:
|
|
129
|
+
warnings.append(f'ignored invalid extension {ext_part!r}: {e}')
|
|
130
|
+
continue
|
|
131
|
+
|
|
132
|
+
if cli_queries:
|
|
133
|
+
for query_item in cli_queries:
|
|
134
|
+
if len(query_item) != 3:
|
|
135
|
+
warnings.append(f'ignored malformed query: expected 3 parts, got {len(query_item)}')
|
|
136
|
+
continue
|
|
137
|
+
lang, query_type, query_str = query_item
|
|
138
|
+
lang = lang.strip().lower()
|
|
139
|
+
if not lang:
|
|
140
|
+
warnings.append('ignored empty language name in query')
|
|
141
|
+
continue
|
|
142
|
+
if query_type not in ('struct', 'imports'):
|
|
143
|
+
warnings.append(f'ignored unknown query type {query_type!r} for language {lang!r}')
|
|
144
|
+
continue
|
|
145
|
+
|
|
146
|
+
if lang not in query_override:
|
|
147
|
+
query_override[lang] = {'struct': '', 'imports': ''}
|
|
148
|
+
query_override[lang][query_type] = query_str
|
|
149
|
+
custom_query_languages[lang] = '<cli>'
|
|
150
|
+
|
|
151
|
+
return extension_override, query_override, warnings, custom_query_languages
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _load_language_customizations(
|
|
155
|
+
repo_path: Path,
|
|
156
|
+
explicit_config_path: Optional[str],
|
|
157
|
+
cli_extension_override: dict[str, str],
|
|
158
|
+
cli_query_override: dict[str, dict[str, str]],
|
|
159
|
+
cli_warnings: list[str],
|
|
160
|
+
cli_custom_query_languages: dict[str, str],
|
|
161
|
+
) -> tuple[
|
|
162
|
+
dict[str, str],
|
|
163
|
+
dict[str, dict[str, str]],
|
|
164
|
+
dict[str, str],
|
|
165
|
+
list[str],
|
|
166
|
+
list[str],
|
|
167
|
+
dict[str, str],
|
|
168
|
+
]:
|
|
169
|
+
"""
|
|
170
|
+
Load and merge language customization configuration.
|
|
171
|
+
|
|
172
|
+
Priority: CLI --language-config > CLI --add-* parameters > built-in config
|
|
173
|
+
|
|
174
|
+
Returns (extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages)
|
|
175
|
+
"""
|
|
176
|
+
extension_map = dict(BUILTIN_EXTENSION_MAP)
|
|
177
|
+
lang_queries = _copy_lang_queries(BUILTIN_LANG_QUERIES)
|
|
178
|
+
known_unsupported_extensions = dict(BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS)
|
|
179
|
+
warnings: list[str] = list(cli_warnings)
|
|
180
|
+
loaded_config_paths: list[str] = []
|
|
181
|
+
custom_query_languages: dict[str, str] = dict(cli_custom_query_languages)
|
|
182
|
+
|
|
183
|
+
# First merge CLI parameter customizations
|
|
184
|
+
extension_map.update(cli_extension_override)
|
|
185
|
+
for lang, query_parts in cli_query_override.items():
|
|
186
|
+
if lang in lang_queries:
|
|
187
|
+
# Only overwrite provided parts
|
|
188
|
+
if query_parts.get('struct'):
|
|
189
|
+
lang_queries[lang]['struct'] = query_parts['struct']
|
|
190
|
+
if query_parts.get('imports'):
|
|
191
|
+
lang_queries[lang]['imports'] = query_parts['imports']
|
|
192
|
+
else:
|
|
193
|
+
lang_queries[lang] = query_parts
|
|
194
|
+
|
|
195
|
+
# Then load --language-config file (if provided), highest priority
|
|
196
|
+
if explicit_config_path:
|
|
197
|
+
config_path = Path(explicit_config_path)
|
|
198
|
+
resolved_path = config_path if config_path.is_absolute() else (repo_path / config_path)
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
config_data = json.loads(resolved_path.read_text(encoding='utf-8'))
|
|
202
|
+
except FileNotFoundError:
|
|
203
|
+
warnings.append(f'language config not found: {resolved_path}')
|
|
204
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
205
|
+
except json.JSONDecodeError as exc:
|
|
206
|
+
warnings.append(f'language config parse error in {resolved_path}: {exc}')
|
|
207
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
208
|
+
except OSError as exc:
|
|
209
|
+
warnings.append(f'language config read error in {resolved_path}: {exc}')
|
|
210
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
211
|
+
|
|
212
|
+
if not isinstance(config_data, dict):
|
|
213
|
+
warnings.append(f'language config ignored because root value is not an object: {resolved_path}')
|
|
214
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
215
|
+
|
|
216
|
+
loaded_config_paths.append(str(resolved_path))
|
|
217
|
+
|
|
218
|
+
# Load extension mappings from --language-config
|
|
219
|
+
extensions = config_data.get('extensions', {})
|
|
220
|
+
if isinstance(extensions, dict):
|
|
221
|
+
for raw_ext, raw_lang in extensions.items():
|
|
222
|
+
if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
|
|
223
|
+
try:
|
|
224
|
+
ext = _normalize_extension(raw_ext)
|
|
225
|
+
lang = raw_lang.strip().lower()
|
|
226
|
+
extension_map[ext] = lang
|
|
227
|
+
known_unsupported_extensions.pop(ext, None)
|
|
228
|
+
except ValueError:
|
|
229
|
+
pass
|
|
230
|
+
|
|
231
|
+
# Load queries from --language-config
|
|
232
|
+
queries = config_data.get('queries', {})
|
|
233
|
+
if isinstance(queries, dict):
|
|
234
|
+
for raw_lang, raw_query_parts in queries.items():
|
|
235
|
+
if isinstance(raw_lang, str) and raw_lang.strip() and isinstance(raw_query_parts, dict):
|
|
236
|
+
lang = raw_lang.strip().lower()
|
|
237
|
+
struct_query = raw_query_parts.get('struct', '')
|
|
238
|
+
imports_query = raw_query_parts.get('imports', '')
|
|
239
|
+
if isinstance(struct_query, str) and isinstance(imports_query, str):
|
|
240
|
+
lang_queries[lang] = {
|
|
241
|
+
'struct': struct_query,
|
|
242
|
+
'imports': imports_query,
|
|
243
|
+
}
|
|
244
|
+
custom_query_languages[lang] = str(resolved_path)
|
|
245
|
+
|
|
246
|
+
# Load unsupported extensions from --language-config
|
|
247
|
+
unsupported_extensions = config_data.get('unsupported_extensions', {})
|
|
248
|
+
if isinstance(unsupported_extensions, dict):
|
|
249
|
+
for raw_ext, raw_lang in unsupported_extensions.items():
|
|
250
|
+
if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
|
|
251
|
+
try:
|
|
252
|
+
ext = _normalize_extension(raw_ext)
|
|
253
|
+
lang = raw_lang.strip().lower()
|
|
254
|
+
known_unsupported_extensions[ext] = lang
|
|
255
|
+
extension_map.pop(ext, None)
|
|
256
|
+
except ValueError:
|
|
257
|
+
pass
|
|
258
|
+
|
|
259
|
+
return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _load_languages(
|
|
264
|
+
extension_map: dict[str, str],
|
|
265
|
+
lang_queries: dict[str, dict[str, str]],
|
|
266
|
+
requested: Optional[list[str]] = None,
|
|
267
|
+
) -> dict[str, Any]:
|
|
268
|
+
"""
|
|
269
|
+
Load Tree-sitter language objects, return {lang_name: Language} dictionary.
|
|
270
|
+
Prioritize tree-sitter-language-pack (160+ languages), fallback to single-language package if unavailable.
|
|
271
|
+
"""
|
|
272
|
+
try:
|
|
273
|
+
from tree_sitter_language_pack import get_language as _get
|
|
274
|
+
|
|
275
|
+
def get_language(name: str) -> Any:
|
|
276
|
+
return _get(cast(Any, name))
|
|
277
|
+
except ImportError:
|
|
278
|
+
# Python single-language package fallback only
|
|
279
|
+
try:
|
|
280
|
+
import tree_sitter_python
|
|
281
|
+
from tree_sitter import Language
|
|
282
|
+
|
|
283
|
+
def get_language(name: str) -> Any:
|
|
284
|
+
if name == 'python':
|
|
285
|
+
return Language(tree_sitter_python.language())
|
|
286
|
+
raise LookupError(name)
|
|
287
|
+
except ImportError:
|
|
288
|
+
sys.stderr.write(
|
|
289
|
+
"[ERROR] Missing tree-sitter language support.\n"
|
|
290
|
+
"Please run: pip install tree-sitter-language-pack\n"
|
|
291
|
+
)
|
|
292
|
+
sys.exit(1)
|
|
293
|
+
|
|
294
|
+
targets = requested if requested else sorted(set(extension_map.values()) | set(lang_queries.keys()))
|
|
295
|
+
languages: dict[str, Any] = {}
|
|
296
|
+
for name in targets:
|
|
297
|
+
try:
|
|
298
|
+
languages[name] = get_language(name)
|
|
299
|
+
except (LookupError, KeyError):
|
|
300
|
+
# Language package not installed, skip gracefully
|
|
301
|
+
pass
|
|
302
|
+
|
|
303
|
+
if not languages:
|
|
304
|
+
sys.stderr.write("[ERROR] No available language parsers, please install tree-sitter-language-pack\n")
|
|
305
|
+
sys.exit(1)
|
|
306
|
+
return languages
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def _file_module_id(repo_path: Path, file_path: Path) -> str:
|
|
310
|
+
"""Convert file path to dot-separated module ID.
|
|
311
|
+
Example: src/nexus/api/routes.py → src.nexus.api.routes
|
|
312
|
+
src/core/parser.hpp → src.core.parser
|
|
313
|
+
"""
|
|
314
|
+
rel = file_path.relative_to(repo_path)
|
|
315
|
+
parts = list(rel.parts)
|
|
316
|
+
stem = Path(parts[-1]).stem # Remove extension
|
|
317
|
+
parts[-1] = stem
|
|
318
|
+
# Python special handling: __init__ merged into package path
|
|
319
|
+
if stem == '__init__' and len(parts) > 1:
|
|
320
|
+
parts = parts[:-1]
|
|
321
|
+
return '.'.join(parts) if parts else stem
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def extract_file(
|
|
327
|
+
repo_path: Path,
|
|
328
|
+
file_path: Path,
|
|
329
|
+
lang_name: str,
|
|
330
|
+
language: Any,
|
|
331
|
+
lang_queries: dict[str, dict[str, str]],
|
|
332
|
+
) -> tuple[list[dict], list[dict], list[str]]:
|
|
333
|
+
"""Parse single source file, return (nodes, edges, errors)"""
|
|
334
|
+
from tree_sitter import Parser as TSParser, Query, QueryCursor
|
|
335
|
+
|
|
336
|
+
nodes: list[dict] = []
|
|
337
|
+
edges: list[dict] = []
|
|
338
|
+
errors: list[str] = []
|
|
339
|
+
|
|
340
|
+
try:
|
|
341
|
+
source = file_path.read_bytes()
|
|
342
|
+
except OSError as e:
|
|
343
|
+
errors.append(f"{file_path}: read error: {e}")
|
|
344
|
+
return nodes, edges, errors
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
parser = TSParser(language)
|
|
348
|
+
tree = parser.parse(source)
|
|
349
|
+
except Exception as e:
|
|
350
|
+
errors.append(f"{file_path}: parse error: {e}")
|
|
351
|
+
return nodes, edges, errors
|
|
352
|
+
|
|
353
|
+
rel_path = str(file_path.relative_to(repo_path)).replace('\\', '/')
|
|
354
|
+
module_id = _file_module_id(repo_path, file_path)
|
|
355
|
+
line_count = source.count(b'\n') + 1
|
|
356
|
+
|
|
357
|
+
# Module node (file level)
|
|
358
|
+
nodes.append({
|
|
359
|
+
'id': module_id,
|
|
360
|
+
'type': 'Module',
|
|
361
|
+
'label': module_id.split('.')[-1],
|
|
362
|
+
'path': rel_path,
|
|
363
|
+
'lines': line_count,
|
|
364
|
+
'lang': lang_name,
|
|
365
|
+
})
|
|
366
|
+
|
|
367
|
+
queries = lang_queries.get(lang_name, {})
|
|
368
|
+
|
|
369
|
+
# ── Structure: Classes / Functions ──────────────────────────────────────────
|
|
370
|
+
struct_q_text = queries.get('struct', '')
|
|
371
|
+
if struct_q_text.strip():
|
|
372
|
+
try:
|
|
373
|
+
struct_query = Query(language, struct_q_text)
|
|
374
|
+
class_ranges: list[tuple[int, int, str]] = []
|
|
375
|
+
|
|
376
|
+
for pattern_idx, captures in QueryCursor(struct_query).matches(tree.root_node):
|
|
377
|
+
capture_names = list(captures.keys())
|
|
378
|
+
is_class = any('class' in k for k in capture_names)
|
|
379
|
+
def_key = 'class.def' if is_class else 'func.def'
|
|
380
|
+
name_key = 'class.name' if is_class else 'func.name'
|
|
381
|
+
|
|
382
|
+
def_nodes = captures.get(def_key, [])
|
|
383
|
+
name_nodes = captures.get(name_key, [])
|
|
384
|
+
if not def_nodes or not name_nodes:
|
|
385
|
+
continue
|
|
386
|
+
|
|
387
|
+
def_node = def_nodes[0]
|
|
388
|
+
name_node = name_nodes[0]
|
|
389
|
+
name = source[name_node.start_byte:name_node.end_byte].decode('utf-8', 'replace')
|
|
390
|
+
|
|
391
|
+
if is_class:
|
|
392
|
+
node_id = f"{module_id}.{name}"
|
|
393
|
+
nodes.append({
|
|
394
|
+
'id': node_id,
|
|
395
|
+
'type': 'Class',
|
|
396
|
+
'label': name,
|
|
397
|
+
'path': rel_path,
|
|
398
|
+
'parent': module_id,
|
|
399
|
+
'start_line': def_node.start_point[0] + 1,
|
|
400
|
+
'end_line': def_node.end_point[0] + 1,
|
|
401
|
+
})
|
|
402
|
+
class_ranges.append((def_node.start_byte, def_node.end_byte, node_id))
|
|
403
|
+
edges.append({'source': module_id, 'target': node_id, 'type': 'contains'})
|
|
404
|
+
else:
|
|
405
|
+
parent_id = module_id
|
|
406
|
+
for cls_start, cls_end, cls_id in class_ranges:
|
|
407
|
+
if cls_start <= def_node.start_byte and def_node.end_byte <= cls_end:
|
|
408
|
+
parent_id = cls_id
|
|
409
|
+
break
|
|
410
|
+
node_id = f"{parent_id}.{name}"
|
|
411
|
+
nodes.append({
|
|
412
|
+
'id': node_id,
|
|
413
|
+
'type': 'Function',
|
|
414
|
+
'label': name,
|
|
415
|
+
'path': rel_path,
|
|
416
|
+
'parent': parent_id,
|
|
417
|
+
'start_line': def_node.start_point[0] + 1,
|
|
418
|
+
'end_line': def_node.end_point[0] + 1,
|
|
419
|
+
})
|
|
420
|
+
edges.append({'source': parent_id, 'target': node_id, 'type': 'contains'})
|
|
421
|
+
|
|
422
|
+
except Exception as e:
|
|
423
|
+
errors.append(f"{file_path}: struct query error: {e}")
|
|
424
|
+
|
|
425
|
+
# ── Imports: imports edges ─────────────────────────────────────────
|
|
426
|
+
import_q_text = queries.get('imports', '')
|
|
427
|
+
if import_q_text.strip():
|
|
428
|
+
try:
|
|
429
|
+
import_query = Query(language, import_q_text)
|
|
430
|
+
for _pattern_idx, captures in QueryCursor(import_query).matches(tree.root_node):
|
|
431
|
+
for mod_node in captures.get('mod', []):
|
|
432
|
+
target = source[mod_node.start_byte:mod_node.end_byte].decode('utf-8', 'replace').strip('"\'<> ')
|
|
433
|
+
if target:
|
|
434
|
+
edges.append({'source': module_id, 'target': target, 'type': 'imports'})
|
|
435
|
+
except Exception as e:
|
|
436
|
+
errors.append(f"{file_path}: import query error: {e}")
|
|
437
|
+
|
|
438
|
+
return nodes, edges, errors
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def collect_source_files(
|
|
442
|
+
repo_path: Path,
|
|
443
|
+
languages: dict[str, Any],
|
|
444
|
+
extension_map: dict[str, str],
|
|
445
|
+
known_unsupported_extensions: dict[str, str],
|
|
446
|
+
) -> tuple[list[tuple[Path, str]], dict[str, int], dict[str, int], dict[str, int]]:
|
|
447
|
+
"""Collect all source files of known languages in repo, skip excluded directories.
|
|
448
|
+
|
|
449
|
+
Returns:
|
|
450
|
+
- [(file_path, lang_name)]
|
|
451
|
+
- supported_file_counts: {lang_name: file_count}
|
|
452
|
+
- known_unsupported_file_counts: {lang_name: file_count}
|
|
453
|
+
- configured_but_unavailable_file_counts: {lang_name: file_count}
|
|
454
|
+
"""
|
|
455
|
+
files: list[tuple[Path, str]] = []
|
|
456
|
+
supported_file_counts: dict[str, int] = {}
|
|
457
|
+
known_unsupported_file_counts: dict[str, int] = {}
|
|
458
|
+
configured_but_unavailable_file_counts: dict[str, int] = {}
|
|
459
|
+
|
|
460
|
+
for p in repo_path.rglob('*'):
|
|
461
|
+
if not p.is_file():
|
|
462
|
+
continue
|
|
463
|
+
if _should_skip_path(repo_path, p):
|
|
464
|
+
continue
|
|
465
|
+
|
|
466
|
+
suffix = p.suffix.lower()
|
|
467
|
+
lang = extension_map.get(suffix)
|
|
468
|
+
if lang:
|
|
469
|
+
if lang in languages:
|
|
470
|
+
files.append((p, lang))
|
|
471
|
+
supported_file_counts[lang] = supported_file_counts.get(lang, 0) + 1
|
|
472
|
+
else:
|
|
473
|
+
configured_but_unavailable_file_counts[lang] = (
|
|
474
|
+
configured_but_unavailable_file_counts.get(lang, 0) + 1
|
|
475
|
+
)
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
unsupported_lang = known_unsupported_extensions.get(suffix)
|
|
479
|
+
if unsupported_lang:
|
|
480
|
+
known_unsupported_file_counts[unsupported_lang] = (
|
|
481
|
+
known_unsupported_file_counts.get(unsupported_lang, 0) + 1
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
return (
|
|
485
|
+
sorted(files, key=lambda x: x[0]),
|
|
486
|
+
supported_file_counts,
|
|
487
|
+
known_unsupported_file_counts,
|
|
488
|
+
configured_but_unavailable_file_counts,
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
|
|
493
|
+
def apply_max_nodes(
|
|
494
|
+
nodes: list[dict],
|
|
495
|
+
edges: list[dict],
|
|
496
|
+
max_nodes: int,
|
|
497
|
+
) -> tuple[list[dict], list[dict], bool, int]:
|
|
498
|
+
"""
|
|
499
|
+
When node count exceeds max_nodes, prioritize keeping Module/Class, truncate Function.
|
|
500
|
+
Returns (filtered_nodes, filtered_edges, truncated, truncated_count)
|
|
501
|
+
"""
|
|
502
|
+
if len(nodes) <= max_nodes:
|
|
503
|
+
return nodes, edges, False, 0
|
|
504
|
+
|
|
505
|
+
priority_nodes = [n for n in nodes if n['type'] in ('Module', 'Class')]
|
|
506
|
+
func_nodes = [n for n in nodes if n['type'] == 'Function']
|
|
507
|
+
|
|
508
|
+
remaining_slots = max_nodes - len(priority_nodes)
|
|
509
|
+
if remaining_slots < 0:
|
|
510
|
+
kept_nodes = priority_nodes
|
|
511
|
+
truncated_count = len(func_nodes)
|
|
512
|
+
else:
|
|
513
|
+
kept_funcs = func_nodes[:remaining_slots]
|
|
514
|
+
kept_nodes = priority_nodes + kept_funcs
|
|
515
|
+
truncated_count = len(func_nodes) - len(kept_funcs)
|
|
516
|
+
|
|
517
|
+
kept_ids = {n['id'] for n in kept_nodes}
|
|
518
|
+
kept_edges = [
|
|
519
|
+
e for e in edges
|
|
520
|
+
if e['source'] in kept_ids or e['type'] == 'imports'
|
|
521
|
+
]
|
|
522
|
+
return kept_nodes, kept_edges, True, truncated_count
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
def main() -> None:
|
|
526
|
+
parser = argparse.ArgumentParser(
|
|
527
|
+
description='Extract AST structure from a multi-language repository'
|
|
528
|
+
)
|
|
529
|
+
parser.add_argument('repo_path', help='Target repository path')
|
|
530
|
+
parser.add_argument('--max-nodes', type=int, default=500,
|
|
531
|
+
help='Max nodes in output (default: 500). Truncates Function nodes first.')
|
|
532
|
+
parser.add_argument(
|
|
533
|
+
'--add-extension',
|
|
534
|
+
action='append',
|
|
535
|
+
dest='add_extensions',
|
|
536
|
+
metavar='EXT=LANG',
|
|
537
|
+
help='Add extension mapping, e.g., .templ=templ. Can be used multiple times.',
|
|
538
|
+
)
|
|
539
|
+
parser.add_argument(
|
|
540
|
+
'--add-query',
|
|
541
|
+
action='append',
|
|
542
|
+
dest='add_queries',
|
|
543
|
+
nargs=3,
|
|
544
|
+
metavar=('LANG', 'TYPE', 'QUERY'),
|
|
545
|
+
help='Add/override a query for a language. TYPE is "struct" or "imports". Can be used multiple times.',
|
|
546
|
+
)
|
|
547
|
+
parser.add_argument(
|
|
548
|
+
'--language-config',
|
|
549
|
+
help='Optional JSON file that adds or overrides extension mappings and tree-sitter queries. Useful for complex configurations.',
|
|
550
|
+
)
|
|
551
|
+
parser.add_argument(
|
|
552
|
+
'--file-tree-out',
|
|
553
|
+
help='Optional output path for a filtered file tree (e.g. .nexus-map/raw/file_tree.txt). Uses the same exclude rules as AST collection.',
|
|
554
|
+
)
|
|
555
|
+
args = parser.parse_args()
|
|
556
|
+
|
|
557
|
+
repo_path = Path(args.repo_path).resolve()
|
|
558
|
+
if not repo_path.exists():
|
|
559
|
+
sys.stderr.write(f"[ERROR] repo_path not found: {repo_path}\n")
|
|
560
|
+
sys.exit(1)
|
|
561
|
+
if not (repo_path / '.git').exists():
|
|
562
|
+
sys.stderr.write(f"[WARNING] .git not found in {repo_path}, may not be a git repo\n")
|
|
563
|
+
|
|
564
|
+
if args.file_tree_out:
|
|
565
|
+
file_tree_path = Path(args.file_tree_out)
|
|
566
|
+
if not file_tree_path.is_absolute():
|
|
567
|
+
file_tree_path = repo_path / file_tree_path
|
|
568
|
+
write_filtered_file_tree(repo_path, file_tree_path.resolve())
|
|
569
|
+
|
|
570
|
+
# Handle CLI custom parameters
|
|
571
|
+
cli_ext_override, cli_query_override, cli_warnings, cli_custom_query_languages = _apply_cli_customizations(
|
|
572
|
+
args.add_extensions,
|
|
573
|
+
args.add_queries,
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
# Load and merge configuration
|
|
577
|
+
(
|
|
578
|
+
extension_map,
|
|
579
|
+
lang_queries,
|
|
580
|
+
known_unsupported_extensions,
|
|
581
|
+
config_warnings,
|
|
582
|
+
loaded_config_paths,
|
|
583
|
+
custom_query_languages,
|
|
584
|
+
) = _load_language_customizations(
|
|
585
|
+
repo_path,
|
|
586
|
+
args.language_config,
|
|
587
|
+
cli_ext_override,
|
|
588
|
+
cli_query_override,
|
|
589
|
+
cli_warnings,
|
|
590
|
+
cli_custom_query_languages,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
languages = _load_languages(extension_map, lang_queries)
|
|
594
|
+
(
|
|
595
|
+
source_files,
|
|
596
|
+
supported_file_counts,
|
|
597
|
+
known_unsupported_file_counts,
|
|
598
|
+
configured_but_unavailable_file_counts,
|
|
599
|
+
) = collect_source_files(
|
|
600
|
+
repo_path,
|
|
601
|
+
languages,
|
|
602
|
+
extension_map,
|
|
603
|
+
known_unsupported_extensions,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
if not source_files:
|
|
607
|
+
sys.stderr.write(f"[WARNING] No supported source files found in {repo_path}\n")
|
|
608
|
+
|
|
609
|
+
all_nodes: list[dict] = []
|
|
610
|
+
all_edges: list[dict] = []
|
|
611
|
+
all_errors: list[str] = []
|
|
612
|
+
detected_langs: set[str] = set()
|
|
613
|
+
total_lines = 0
|
|
614
|
+
warnings: list[str] = list(config_warnings)
|
|
615
|
+
module_only_file_counts: dict[str, int] = {}
|
|
616
|
+
languages_with_structural_queries = sorted(
|
|
617
|
+
lang for lang, query_parts in lang_queries.items()
|
|
618
|
+
if query_parts.get('struct', '').strip()
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
for file_path, lang_name in source_files:
|
|
622
|
+
nodes, edges, errors = extract_file(
|
|
623
|
+
repo_path,
|
|
624
|
+
file_path,
|
|
625
|
+
lang_name,
|
|
626
|
+
languages[lang_name],
|
|
627
|
+
lang_queries,
|
|
628
|
+
)
|
|
629
|
+
all_nodes.extend(nodes)
|
|
630
|
+
all_edges.extend(edges)
|
|
631
|
+
all_errors.extend(errors)
|
|
632
|
+
if lang_name not in languages_with_structural_queries:
|
|
633
|
+
module_only_file_counts[lang_name] = module_only_file_counts.get(lang_name, 0) + 1
|
|
634
|
+
if nodes:
|
|
635
|
+
detected_langs.add(lang_name)
|
|
636
|
+
total_lines += nodes[0].get('lines', 0)
|
|
637
|
+
|
|
638
|
+
final_nodes, final_edges, truncated, truncated_count = apply_max_nodes(
|
|
639
|
+
all_nodes, all_edges, args.max_nodes
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
if known_unsupported_file_counts:
|
|
643
|
+
unsupported_summary = ', '.join(
|
|
644
|
+
f"{lang} ({count} files)"
|
|
645
|
+
for lang, count in sorted(known_unsupported_file_counts.items())
|
|
646
|
+
)
|
|
647
|
+
warnings.append(
|
|
648
|
+
"known unsupported languages present; downstream outputs must mark inferred sections explicitly: "
|
|
649
|
+
f"{unsupported_summary}"
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
if configured_but_unavailable_file_counts:
|
|
653
|
+
unavailable_summary = ', '.join(
|
|
654
|
+
f"{lang} ({count} files)"
|
|
655
|
+
for lang, count in sorted(configured_but_unavailable_file_counts.items())
|
|
656
|
+
)
|
|
657
|
+
warnings.append(
|
|
658
|
+
'some configured languages were detected in source files but no parser could be loaded: '
|
|
659
|
+
f'{unavailable_summary}'
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
if module_only_file_counts:
|
|
663
|
+
module_only_summary = ', '.join(
|
|
664
|
+
f"{lang} ({count} files)"
|
|
665
|
+
for lang, count in sorted(module_only_file_counts.items())
|
|
666
|
+
)
|
|
667
|
+
warnings.append(
|
|
668
|
+
"some languages were parsed with module-only coverage because no structural query template is bundled: "
|
|
669
|
+
f"{module_only_summary}"
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
if loaded_config_paths:
|
|
673
|
+
config_summary = ', '.join(loaded_config_paths)
|
|
674
|
+
warnings.append(f'custom language configuration loaded: {config_summary}')
|
|
675
|
+
|
|
676
|
+
result = {
|
|
677
|
+
'languages': sorted(detected_langs),
|
|
678
|
+
'stats': {
|
|
679
|
+
'total_files': len(source_files),
|
|
680
|
+
'total_lines': total_lines,
|
|
681
|
+
'parse_errors': len(all_errors),
|
|
682
|
+
'truncated': truncated,
|
|
683
|
+
'truncated_nodes': truncated_count,
|
|
684
|
+
'supported_file_counts': supported_file_counts,
|
|
685
|
+
'languages_with_structural_queries': languages_with_structural_queries,
|
|
686
|
+
'languages_with_custom_queries': sorted(custom_query_languages.keys()),
|
|
687
|
+
'module_only_file_counts': module_only_file_counts,
|
|
688
|
+
'known_unsupported_file_counts': known_unsupported_file_counts,
|
|
689
|
+
'configured_but_unavailable_file_counts': configured_but_unavailable_file_counts,
|
|
690
|
+
'custom_language_config_paths': loaded_config_paths,
|
|
691
|
+
},
|
|
692
|
+
'nodes': final_nodes,
|
|
693
|
+
'edges': final_edges,
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
if all_errors:
|
|
697
|
+
result['_errors'] = all_errors[:20]
|
|
698
|
+
if warnings:
|
|
699
|
+
result['warnings'] = warnings
|
|
700
|
+
|
|
701
|
+
print(json.dumps(result, ensure_ascii=False, indent=2))
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
if __name__ == '__main__':
|
|
705
|
+
main()
|
|
706
|
+
|