@haaaiawd/anws 2.3.0 → 2.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/README.md +1 -1
  2. package/bin/cli.js +52 -22
  3. package/lib/diff.js +5 -2
  4. package/lib/init.js +217 -96
  5. package/lib/install-state.js +18 -3
  6. package/lib/manifest.js +376 -79
  7. package/lib/prompt.js +68 -0
  8. package/lib/resources/index.js +36 -2
  9. package/lib/update.js +12 -6
  10. package/package.json +48 -47
  11. package/templates/.agents/skills/anws-system/SKILL.md +108 -108
  12. package/templates/.agents/skills/code-reviewer/SKILL.md +170 -115
  13. package/templates/.agents/skills/concept-modeler/SKILL.md +230 -179
  14. package/templates/.agents/skills/craft-authoring/SKILL.md +186 -183
  15. package/templates/.agents/skills/craft-authoring/references/BUNDLE_POLICY.md +61 -0
  16. package/templates/.agents/skills/design-reviewer/SKILL.md +265 -190
  17. package/templates/.agents/skills/e2e-testing-guide/SKILL.md +246 -135
  18. package/templates/.agents/skills/nexus-mapper/SKILL.md +321 -321
  19. package/templates/.agents/skills/output-contract/SKILL.md +37 -0
  20. package/templates/.agents/skills/report-template/SKILL.md +92 -92
  21. package/templates/.agents/skills/sequential-thinking/SKILL.md +222 -225
  22. package/templates/.agents/skills/spec-writer/SKILL.md +75 -30
  23. package/templates/.agents/skills/system-architect/SKILL.md +538 -678
  24. package/templates/.agents/skills/system-designer/SKILL.md +601 -601
  25. package/templates/.agents/skills/task-planner/SKILL.md +1 -2
  26. package/templates/.agents/skills/task-reviewer/SKILL.md +428 -388
  27. package/templates/.agents/skills/tech-evaluator/SKILL.md +252 -144
  28. package/templates/.agents/workflows/blueprint.md +157 -69
  29. package/templates/.agents/workflows/challenge.md +331 -497
  30. package/templates/.agents/workflows/change.md +182 -339
  31. package/templates/.agents/workflows/craft.md +159 -197
  32. package/templates/.agents/workflows/design-system.md +202 -674
  33. package/templates/.agents/workflows/explore.md +187 -399
  34. package/templates/.agents/workflows/forge.md +650 -609
  35. package/templates/.agents/workflows/genesis.md +439 -351
  36. package/templates/.agents/workflows/probe.md +219 -241
  37. package/templates/.agents/workflows/quickstart.md +302 -123
  38. package/templates/.agents/workflows/upgrade.md +145 -182
  39. package/templates_en/.agents/skills/anws-system/SKILL.md +108 -0
  40. package/templates_en/.agents/skills/code-reviewer/SKILL.md +170 -0
  41. package/templates_en/.agents/skills/concept-modeler/SKILL.md +230 -0
  42. package/templates_en/.agents/skills/craft-authoring/SKILL.md +179 -0
  43. package/templates_en/.agents/skills/craft-authoring/references/BUNDLE_POLICY.md +60 -0
  44. package/templates_en/.agents/skills/craft-authoring/references/PROMPT_QUALITY_RUBRIC.md +92 -0
  45. package/templates_en/.agents/skills/craft-authoring/references/SCORECARD_TEMPLATE.md +52 -0
  46. package/templates_en/.agents/skills/design-reviewer/SKILL.md +265 -0
  47. package/templates_en/.agents/skills/e2e-testing-guide/SKILL.md +246 -0
  48. package/templates_en/.agents/skills/nexus-mapper/SKILL.md +306 -0
  49. package/templates_en/.agents/skills/nexus-mapper/references/language-customization.md +167 -0
  50. package/templates_en/.agents/skills/nexus-mapper/references/output-schema.md +311 -0
  51. package/templates_en/.agents/skills/nexus-mapper/references/probe-protocol.md +246 -0
  52. package/templates_en/.agents/skills/nexus-mapper/scripts/extract_ast.py +706 -0
  53. package/templates_en/.agents/skills/nexus-mapper/scripts/git_detective.py +194 -0
  54. package/templates_en/.agents/skills/nexus-mapper/scripts/languages.json +127 -0
  55. package/templates_en/.agents/skills/nexus-mapper/scripts/query_graph.py +556 -0
  56. package/templates_en/.agents/skills/nexus-mapper/scripts/requirements.txt +6 -0
  57. package/templates_en/.agents/skills/nexus-query/SKILL.md +114 -0
  58. package/templates_en/.agents/skills/nexus-query/scripts/extract_ast.py +706 -0
  59. package/templates_en/.agents/skills/nexus-query/scripts/git_detective.py +194 -0
  60. package/templates_en/.agents/skills/nexus-query/scripts/languages.json +127 -0
  61. package/templates_en/.agents/skills/nexus-query/scripts/query_graph.py +556 -0
  62. package/templates_en/.agents/skills/nexus-query/scripts/requirements.txt +6 -0
  63. package/templates_en/.agents/skills/output-contract/SKILL.md +37 -0
  64. package/templates_en/.agents/skills/report-template/SKILL.md +85 -0
  65. package/templates_en/.agents/skills/report-template/references/REPORT_TEMPLATE.md +100 -0
  66. package/templates_en/.agents/skills/runtime-inspector/SKILL.md +101 -0
  67. package/templates_en/.agents/skills/sequential-thinking/SKILL.md +214 -0
  68. package/templates_en/.agents/skills/spec-writer/SKILL.md +153 -0
  69. package/templates_en/.agents/skills/spec-writer/references/prd_template.md +177 -0
  70. package/templates_en/.agents/skills/system-architect/SKILL.md +538 -0
  71. package/templates_en/.agents/skills/system-architect/references/rfc_template.md +59 -0
  72. package/templates_en/.agents/skills/system-designer/SKILL.md +534 -0
  73. package/templates_en/.agents/skills/system-designer/references/system-design-detail-template.md +187 -0
  74. package/templates_en/.agents/skills/system-designer/references/system-design-template.md +605 -0
  75. package/templates_en/.agents/skills/task-planner/SKILL.md +251 -0
  76. package/templates_en/.agents/skills/task-planner/references/TASK_TEMPLATE_05A.md +109 -0
  77. package/templates_en/.agents/skills/task-planner/references/TASK_TEMPLATE_05B.md +176 -0
  78. package/templates_en/.agents/skills/task-reviewer/SKILL.md +428 -0
  79. package/templates_en/.agents/skills/tech-evaluator/SKILL.md +252 -0
  80. package/templates_en/.agents/skills/tech-evaluator/references/ADR_TEMPLATE.md +78 -0
  81. package/templates_en/.agents/workflows/blueprint.md +200 -0
  82. package/templates_en/.agents/workflows/challenge.md +331 -0
  83. package/templates_en/.agents/workflows/change.md +182 -0
  84. package/templates_en/.agents/workflows/craft.md +159 -0
  85. package/templates_en/.agents/workflows/design-system.md +202 -0
  86. package/templates_en/.agents/workflows/explore.md +187 -0
  87. package/templates_en/.agents/workflows/forge.md +651 -0
  88. package/templates_en/.agents/workflows/genesis.md +439 -0
  89. package/templates_en/.agents/workflows/probe.md +219 -0
  90. package/templates_en/.agents/workflows/quickstart.md +303 -0
  91. package/templates_en/.agents/workflows/upgrade.md +145 -0
  92. package/templates_en/AGENTS.md +149 -0
@@ -0,0 +1,706 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ extract_ast.py — Multi-language repository AST structure extractor
4
+
5
+ Purpose: Extract module/class/function structure from a code repository using Tree-sitter and output JSON to stdout
6
+ Supports: Python, JavaScript, TypeScript, TSX, Java, Go, Rust, C#, C/C++, Kotlin, Ruby, Swift, PHP, Lua ...
7
+ Usage: python extract_ast.py <repo_path> [--max-nodes 500]
8
+ """
9
+
10
+ import sys
11
+ import json
12
+ import argparse
13
+ from pathlib import Path
14
+ from typing import Any, Optional, cast
15
+
16
+
17
+ EXCLUDE_DIRS = {'.git', '__pycache__', '.venv', 'venv', 'node_modules',
18
+ 'dist', 'build', '.mypy_cache', '.pytest_cache', 'site-packages',
19
+ '.nexus-map', '.tox', '.eggs', 'target', 'cmake-build-debug',
20
+ '.vs', 'out', '_build', 'vendor', '.ruff_cache', '.godot',
21
+ '.idea', '.vscode', '.nox'}
22
+
23
+ EXCLUDE_FILE_SUFFIXES = ('.import', '.vulkan.cache')
24
+
25
+ # ── Built-in language config: load from languages.json in the same directory ────────────────────
26
+ _LANGUAGES_JSON = Path(__file__).parent / 'languages.json'
27
+
28
+
29
+ def _load_builtin_languages() -> tuple[dict[str, str], dict[str, dict[str, str]], dict[str, str]]:
30
+ """Load built-in extension mappings, Tree-sitter queries, and known unsupported extensions from languages.json."""
31
+ try:
32
+ data = json.loads(_LANGUAGES_JSON.read_text(encoding='utf-8'))
33
+ except (FileNotFoundError, json.JSONDecodeError) as exc:
34
+ sys.stderr.write(f"[ERROR] Failed to load {_LANGUAGES_JSON}: {exc}\n")
35
+ sys.exit(1)
36
+
37
+ extensions: dict[str, str] = data.get('extensions', {})
38
+ raw_queries: dict[str, dict[str, str]] = data.get('queries', {})
39
+ unsupported: dict[str, str] = data.get('unsupported_extensions', {})
40
+
41
+ # Normalize queries: ensure each language has struct and imports keys
42
+ queries: dict[str, dict[str, str]] = {}
43
+ for lang, parts in raw_queries.items():
44
+ queries[lang] = {
45
+ 'struct': parts.get('struct', ''),
46
+ 'imports': parts.get('imports', ''),
47
+ }
48
+
49
+ return extensions, queries, unsupported
50
+
51
+
52
+ BUILTIN_EXTENSION_MAP, BUILTIN_LANG_QUERIES, BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS = (
53
+ _load_builtin_languages()
54
+ )
55
+
56
+
57
+ def _should_skip_path(repo_path: Path, path: Path) -> bool:
58
+ rel_path = path.relative_to(repo_path)
59
+ if any(part in EXCLUDE_DIRS for part in rel_path.parts):
60
+ return True
61
+ if path.is_file() and any(path.name.endswith(suffix) for suffix in EXCLUDE_FILE_SUFFIXES):
62
+ return True
63
+ return False
64
+
65
+
66
+ def write_filtered_file_tree(repo_path: Path, output_path: Path) -> None:
67
+ lines: list[str] = []
68
+ for path in sorted(repo_path.rglob('*')):
69
+ if _should_skip_path(repo_path, path):
70
+ continue
71
+ rel_path = path.relative_to(repo_path).as_posix()
72
+ suffix = '/' if path.is_dir() else ''
73
+ lines.append(rel_path + suffix)
74
+
75
+ output_path.parent.mkdir(parents=True, exist_ok=True)
76
+ output_path.write_text('\n'.join(lines) + ('\n' if lines else ''), encoding='utf-8')
77
+
78
+ def _normalize_extension(ext: str) -> str:
79
+ normalized = ext.strip().lower()
80
+ if not normalized:
81
+ raise ValueError('extension must not be empty')
82
+ if not normalized.startswith('.'):
83
+ normalized = f'.{normalized}'
84
+ return normalized
85
+
86
+
87
+ def _copy_lang_queries(source: dict[str, dict[str, str]]) -> dict[str, dict[str, str]]:
88
+ return {
89
+ lang: {
90
+ 'struct': query_parts.get('struct', ''),
91
+ 'imports': query_parts.get('imports', ''),
92
+ }
93
+ for lang, query_parts in source.items()
94
+ }
95
+
96
+
97
+ def _apply_cli_customizations(
98
+ cli_extensions: list[str] | None,
99
+ cli_queries: list[list[str]] | None,
100
+ ) -> tuple[
101
+ dict[str, str],
102
+ dict[str, dict[str, str]],
103
+ list[str],
104
+ dict[str, str],
105
+ ]:
106
+ """
107
+ Apply language customization from CLI arguments (--add-extension and --add-query).
108
+ Returns (extension_override, query_override, warnings)
109
+ """
110
+ extension_override: dict[str, str] = {}
111
+ query_override: dict[str, dict[str, str]] = {}
112
+ warnings: list[str] = []
113
+ custom_query_languages: dict[str, str] = {}
114
+
115
+ if cli_extensions:
116
+ for item in cli_extensions:
117
+ if '=' not in item:
118
+ warnings.append(f'ignored invalid extension mapping {item!r}, expected EXT=LANG')
119
+ continue
120
+ ext_part, lang_part = item.split('=', 1)
121
+ try:
122
+ ext = _normalize_extension(ext_part)
123
+ lang = lang_part.strip().lower()
124
+ if not lang:
125
+ warnings.append(f'ignored empty language name for extension {ext_part!r}')
126
+ continue
127
+ extension_override[ext] = lang
128
+ except ValueError as e:
129
+ warnings.append(f'ignored invalid extension {ext_part!r}: {e}')
130
+ continue
131
+
132
+ if cli_queries:
133
+ for query_item in cli_queries:
134
+ if len(query_item) != 3:
135
+ warnings.append(f'ignored malformed query: expected 3 parts, got {len(query_item)}')
136
+ continue
137
+ lang, query_type, query_str = query_item
138
+ lang = lang.strip().lower()
139
+ if not lang:
140
+ warnings.append('ignored empty language name in query')
141
+ continue
142
+ if query_type not in ('struct', 'imports'):
143
+ warnings.append(f'ignored unknown query type {query_type!r} for language {lang!r}')
144
+ continue
145
+
146
+ if lang not in query_override:
147
+ query_override[lang] = {'struct': '', 'imports': ''}
148
+ query_override[lang][query_type] = query_str
149
+ custom_query_languages[lang] = '<cli>'
150
+
151
+ return extension_override, query_override, warnings, custom_query_languages
152
+
153
+
154
+ def _load_language_customizations(
155
+ repo_path: Path,
156
+ explicit_config_path: Optional[str],
157
+ cli_extension_override: dict[str, str],
158
+ cli_query_override: dict[str, dict[str, str]],
159
+ cli_warnings: list[str],
160
+ cli_custom_query_languages: dict[str, str],
161
+ ) -> tuple[
162
+ dict[str, str],
163
+ dict[str, dict[str, str]],
164
+ dict[str, str],
165
+ list[str],
166
+ list[str],
167
+ dict[str, str],
168
+ ]:
169
+ """
170
+ Load and merge language customization config.
171
+
172
+ Priority: CLI --language-config > CLI --add-* args > built-in config
173
+
174
+ Returns (extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages)
175
+ """
176
+ extension_map = dict(BUILTIN_EXTENSION_MAP)
177
+ lang_queries = _copy_lang_queries(BUILTIN_LANG_QUERIES)
178
+ known_unsupported_extensions = dict(BUILTIN_KNOWN_UNSUPPORTED_EXTENSIONS)
179
+ warnings: list[str] = list(cli_warnings)
180
+ loaded_config_paths: list[str] = []
181
+ custom_query_languages: dict[str, str] = dict(cli_custom_query_languages)
182
+
183
+ # First merge customizations from CLI arguments
184
+ extension_map.update(cli_extension_override)
185
+ for lang, query_parts in cli_query_override.items():
186
+ if lang in lang_queries:
187
+ # Only override the provided parts
188
+ if query_parts.get('struct'):
189
+ lang_queries[lang]['struct'] = query_parts['struct']
190
+ if query_parts.get('imports'):
191
+ lang_queries[lang]['imports'] = query_parts['imports']
192
+ else:
193
+ lang_queries[lang] = query_parts
194
+
195
+ # Then load --language-config file (if provided), highest priority
196
+ if explicit_config_path:
197
+ config_path = Path(explicit_config_path)
198
+ resolved_path = config_path if config_path.is_absolute() else (repo_path / config_path)
199
+
200
+ try:
201
+ config_data = json.loads(resolved_path.read_text(encoding='utf-8'))
202
+ except FileNotFoundError:
203
+ warnings.append(f'language config not found: {resolved_path}')
204
+ return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
205
+ except json.JSONDecodeError as exc:
206
+ warnings.append(f'language config parse error in {resolved_path}: {exc}')
207
+ return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
208
+ except OSError as exc:
209
+ warnings.append(f'language config read error in {resolved_path}: {exc}')
210
+ return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
211
+
212
+ if not isinstance(config_data, dict):
213
+ warnings.append(f'language config ignored because root value is not an object: {resolved_path}')
214
+ return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
215
+
216
+ loaded_config_paths.append(str(resolved_path))
217
+
218
+ # Load extension mappings from --language-config
219
+ extensions = config_data.get('extensions', {})
220
+ if isinstance(extensions, dict):
221
+ for raw_ext, raw_lang in extensions.items():
222
+ if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
223
+ try:
224
+ ext = _normalize_extension(raw_ext)
225
+ lang = raw_lang.strip().lower()
226
+ extension_map[ext] = lang
227
+ known_unsupported_extensions.pop(ext, None)
228
+ except ValueError:
229
+ pass
230
+
231
+ # Load queries from --language-config
232
+ queries = config_data.get('queries', {})
233
+ if isinstance(queries, dict):
234
+ for raw_lang, raw_query_parts in queries.items():
235
+ if isinstance(raw_lang, str) and raw_lang.strip() and isinstance(raw_query_parts, dict):
236
+ lang = raw_lang.strip().lower()
237
+ struct_query = raw_query_parts.get('struct', '')
238
+ imports_query = raw_query_parts.get('imports', '')
239
+ if isinstance(struct_query, str) and isinstance(imports_query, str):
240
+ lang_queries[lang] = {
241
+ 'struct': struct_query,
242
+ 'imports': imports_query,
243
+ }
244
+ custom_query_languages[lang] = str(resolved_path)
245
+
246
+ # Load unsupported extensions from --language-config
247
+ unsupported_extensions = config_data.get('unsupported_extensions', {})
248
+ if isinstance(unsupported_extensions, dict):
249
+ for raw_ext, raw_lang in unsupported_extensions.items():
250
+ if isinstance(raw_ext, str) and isinstance(raw_lang, str) and raw_lang.strip():
251
+ try:
252
+ ext = _normalize_extension(raw_ext)
253
+ lang = raw_lang.strip().lower()
254
+ known_unsupported_extensions[ext] = lang
255
+ extension_map.pop(ext, None)
256
+ except ValueError:
257
+ pass
258
+
259
+ return extension_map, lang_queries, known_unsupported_extensions, warnings, loaded_config_paths, custom_query_languages
260
+
261
+
262
+
263
+ def _load_languages(
264
+ extension_map: dict[str, str],
265
+ lang_queries: dict[str, dict[str, str]],
266
+ requested: Optional[list[str]] = None,
267
+ ) -> dict[str, Any]:
268
+ """
269
+ Load Tree-sitter language objects and return a {lang_name: Language} dictionary.
270
+ Prefer tree-sitter-language-pack (160+ languages); fall back to a single-language package when unavailable.
271
+ """
272
+ try:
273
+ from tree_sitter_language_pack import get_language as _get
274
+
275
+ def get_language(name: str) -> Any:
276
+ return _get(cast(Any, name))
277
+ except ImportError:
278
+ # Python-only single-language fallback
279
+ try:
280
+ import tree_sitter_python
281
+ from tree_sitter import Language
282
+
283
+ def get_language(name: str) -> Any:
284
+ if name == 'python':
285
+ return Language(tree_sitter_python.language())
286
+ raise LookupError(name)
287
+ except ImportError:
288
+ sys.stderr.write(
289
+ "[ERROR] Missing tree-sitter language support.\n"
290
+ "Please run: pip install tree-sitter-language-pack\n"
291
+ )
292
+ sys.exit(1)
293
+
294
+ targets = requested if requested else sorted(set(extension_map.values()) | set(lang_queries.keys()))
295
+ languages: dict[str, Any] = {}
296
+ for name in targets:
297
+ try:
298
+ languages[name] = get_language(name)
299
+ except (LookupError, KeyError):
300
+ # Language package not installed; skip gracefully
301
+ pass
302
+
303
+ if not languages:
304
+ sys.stderr.write("[ERROR] No available language parsers. Install tree-sitter-language-pack\n")
305
+ sys.exit(1)
306
+ return languages
307
+
308
+
309
+ def _file_module_id(repo_path: Path, file_path: Path) -> str:
310
+ """Convert file path to dot-separated module ID.
311
+ Example: src/nexus/api/routes.py -> src.nexus.api.routes
312
+ src/core/parser.hpp → src.core.parser
313
+ """
314
+ rel = file_path.relative_to(repo_path)
315
+ parts = list(rel.parts)
316
+ stem = Path(parts[-1]).stem # Remove extension
317
+ parts[-1] = stem
318
+ # Python special case: merge __init__ into package path
319
+ if stem == '__init__' and len(parts) > 1:
320
+ parts = parts[:-1]
321
+ return '.'.join(parts) if parts else stem
322
+
323
+
324
+
325
+
326
+ def extract_file(
327
+ repo_path: Path,
328
+ file_path: Path,
329
+ lang_name: str,
330
+ language: Any,
331
+ lang_queries: dict[str, dict[str, str]],
332
+ ) -> tuple[list[dict], list[dict], list[str]]:
333
+ """Parse a single source file and return (nodes, edges, errors)"""
334
+ from tree_sitter import Parser as TSParser, Query, QueryCursor
335
+
336
+ nodes: list[dict] = []
337
+ edges: list[dict] = []
338
+ errors: list[str] = []
339
+
340
+ try:
341
+ source = file_path.read_bytes()
342
+ except OSError as e:
343
+ errors.append(f"{file_path}: read error: {e}")
344
+ return nodes, edges, errors
345
+
346
+ try:
347
+ parser = TSParser(language)
348
+ tree = parser.parse(source)
349
+ except Exception as e:
350
+ errors.append(f"{file_path}: parse error: {e}")
351
+ return nodes, edges, errors
352
+
353
+ rel_path = str(file_path.relative_to(repo_path)).replace('\\', '/')
354
+ module_id = _file_module_id(repo_path, file_path)
355
+ line_count = source.count(b'\n') + 1
356
+
357
+ # Module node (file-level)
358
+ nodes.append({
359
+ 'id': module_id,
360
+ 'type': 'Module',
361
+ 'label': module_id.split('.')[-1],
362
+ 'path': rel_path,
363
+ 'lines': line_count,
364
+ 'lang': lang_name,
365
+ })
366
+
367
+ queries = lang_queries.get(lang_name, {})
368
+
369
+ # ── Structure: classes / functions ──────────────────────────────────────────
370
+ struct_q_text = queries.get('struct', '')
371
+ if struct_q_text.strip():
372
+ try:
373
+ struct_query = Query(language, struct_q_text)
374
+ class_ranges: list[tuple[int, int, str]] = []
375
+
376
+ for pattern_idx, captures in QueryCursor(struct_query).matches(tree.root_node):
377
+ capture_names = list(captures.keys())
378
+ is_class = any('class' in k for k in capture_names)
379
+ def_key = 'class.def' if is_class else 'func.def'
380
+ name_key = 'class.name' if is_class else 'func.name'
381
+
382
+ def_nodes = captures.get(def_key, [])
383
+ name_nodes = captures.get(name_key, [])
384
+ if not def_nodes or not name_nodes:
385
+ continue
386
+
387
+ def_node = def_nodes[0]
388
+ name_node = name_nodes[0]
389
+ name = source[name_node.start_byte:name_node.end_byte].decode('utf-8', 'replace')
390
+
391
+ if is_class:
392
+ node_id = f"{module_id}.{name}"
393
+ nodes.append({
394
+ 'id': node_id,
395
+ 'type': 'Class',
396
+ 'label': name,
397
+ 'path': rel_path,
398
+ 'parent': module_id,
399
+ 'start_line': def_node.start_point[0] + 1,
400
+ 'end_line': def_node.end_point[0] + 1,
401
+ })
402
+ class_ranges.append((def_node.start_byte, def_node.end_byte, node_id))
403
+ edges.append({'source': module_id, 'target': node_id, 'type': 'contains'})
404
+ else:
405
+ parent_id = module_id
406
+ for cls_start, cls_end, cls_id in class_ranges:
407
+ if cls_start <= def_node.start_byte and def_node.end_byte <= cls_end:
408
+ parent_id = cls_id
409
+ break
410
+ node_id = f"{parent_id}.{name}"
411
+ nodes.append({
412
+ 'id': node_id,
413
+ 'type': 'Function',
414
+ 'label': name,
415
+ 'path': rel_path,
416
+ 'parent': parent_id,
417
+ 'start_line': def_node.start_point[0] + 1,
418
+ 'end_line': def_node.end_point[0] + 1,
419
+ })
420
+ edges.append({'source': parent_id, 'target': node_id, 'type': 'contains'})
421
+
422
+ except Exception as e:
423
+ errors.append(f"{file_path}: struct query error: {e}")
424
+
425
+ # ── Imports: imports edges ─────────────────────────────────────────
426
+ import_q_text = queries.get('imports', '')
427
+ if import_q_text.strip():
428
+ try:
429
+ import_query = Query(language, import_q_text)
430
+ for _pattern_idx, captures in QueryCursor(import_query).matches(tree.root_node):
431
+ for mod_node in captures.get('mod', []):
432
+ target = source[mod_node.start_byte:mod_node.end_byte].decode('utf-8', 'replace').strip('"\'<> ')
433
+ if target:
434
+ edges.append({'source': module_id, 'target': target, 'type': 'imports'})
435
+ except Exception as e:
436
+ errors.append(f"{file_path}: import query error: {e}")
437
+
438
+ return nodes, edges, errors
439
+
440
+
441
+ def collect_source_files(
442
+ repo_path: Path,
443
+ languages: dict[str, Any],
444
+ extension_map: dict[str, str],
445
+ known_unsupported_extensions: dict[str, str],
446
+ ) -> tuple[list[tuple[Path, str]], dict[str, int], dict[str, int], dict[str, int]]:
447
+ """Collect source files of all known languages in repo, skipping excluded directories.
448
+
449
+ Returns:
450
+ - [(file_path, lang_name)]
451
+ - supported_file_counts: {lang_name: file_count}
452
+ - known_unsupported_file_counts: {lang_name: file_count}
453
+ - configured_but_unavailable_file_counts: {lang_name: file_count}
454
+ """
455
+ files: list[tuple[Path, str]] = []
456
+ supported_file_counts: dict[str, int] = {}
457
+ known_unsupported_file_counts: dict[str, int] = {}
458
+ configured_but_unavailable_file_counts: dict[str, int] = {}
459
+
460
+ for p in repo_path.rglob('*'):
461
+ if not p.is_file():
462
+ continue
463
+ if _should_skip_path(repo_path, p):
464
+ continue
465
+
466
+ suffix = p.suffix.lower()
467
+ lang = extension_map.get(suffix)
468
+ if lang:
469
+ if lang in languages:
470
+ files.append((p, lang))
471
+ supported_file_counts[lang] = supported_file_counts.get(lang, 0) + 1
472
+ else:
473
+ configured_but_unavailable_file_counts[lang] = (
474
+ configured_but_unavailable_file_counts.get(lang, 0) + 1
475
+ )
476
+ continue
477
+
478
+ unsupported_lang = known_unsupported_extensions.get(suffix)
479
+ if unsupported_lang:
480
+ known_unsupported_file_counts[unsupported_lang] = (
481
+ known_unsupported_file_counts.get(unsupported_lang, 0) + 1
482
+ )
483
+
484
+ return (
485
+ sorted(files, key=lambda x: x[0]),
486
+ supported_file_counts,
487
+ known_unsupported_file_counts,
488
+ configured_but_unavailable_file_counts,
489
+ )
490
+
491
+
492
+
493
+ def apply_max_nodes(
494
+ nodes: list[dict],
495
+ edges: list[dict],
496
+ max_nodes: int,
497
+ ) -> tuple[list[dict], list[dict], bool, int]:
498
+ """
499
+ When node count exceeds max_nodes, keep Module/Class first and truncate Function nodes.
500
+ Returns (filtered_nodes, filtered_edges, truncated, truncated_count)
501
+ """
502
+ if len(nodes) <= max_nodes:
503
+ return nodes, edges, False, 0
504
+
505
+ priority_nodes = [n for n in nodes if n['type'] in ('Module', 'Class')]
506
+ func_nodes = [n for n in nodes if n['type'] == 'Function']
507
+
508
+ remaining_slots = max_nodes - len(priority_nodes)
509
+ if remaining_slots < 0:
510
+ kept_nodes = priority_nodes
511
+ truncated_count = len(func_nodes)
512
+ else:
513
+ kept_funcs = func_nodes[:remaining_slots]
514
+ kept_nodes = priority_nodes + kept_funcs
515
+ truncated_count = len(func_nodes) - len(kept_funcs)
516
+
517
+ kept_ids = {n['id'] for n in kept_nodes}
518
+ kept_edges = [
519
+ e for e in edges
520
+ if e['source'] in kept_ids or e['type'] == 'imports'
521
+ ]
522
+ return kept_nodes, kept_edges, True, truncated_count
523
+
524
+
525
+ def main() -> None:
526
+ parser = argparse.ArgumentParser(
527
+ description='Extract AST structure from a multi-language repository'
528
+ )
529
+ parser.add_argument('repo_path', help='Target repository path')
530
+ parser.add_argument('--max-nodes', type=int, default=500,
531
+ help='Max nodes in output (default: 500). Truncates Function nodes first.')
532
+ parser.add_argument(
533
+ '--add-extension',
534
+ action='append',
535
+ dest='add_extensions',
536
+ metavar='EXT=LANG',
537
+ help='Add extension mapping, e.g., .templ=templ. Can be used multiple times.',
538
+ )
539
+ parser.add_argument(
540
+ '--add-query',
541
+ action='append',
542
+ dest='add_queries',
543
+ nargs=3,
544
+ metavar=('LANG', 'TYPE', 'QUERY'),
545
+ help='Add/override a query for a language. TYPE is "struct" or "imports". Can be used multiple times.',
546
+ )
547
+ parser.add_argument(
548
+ '--language-config',
549
+ help='Optional JSON file that adds or overrides extension mappings and tree-sitter queries. Useful for complex configurations.',
550
+ )
551
+ parser.add_argument(
552
+ '--file-tree-out',
553
+ help='Optional output path for a filtered file tree (e.g. .nexus-map/raw/file_tree.txt). Uses the same exclude rules as AST collection.',
554
+ )
555
+ args = parser.parse_args()
556
+
557
+ repo_path = Path(args.repo_path).resolve()
558
+ if not repo_path.exists():
559
+ sys.stderr.write(f"[ERROR] repo_path not found: {repo_path}\n")
560
+ sys.exit(1)
561
+ if not (repo_path / '.git').exists():
562
+ sys.stderr.write(f"[WARNING] .git not found in {repo_path}, may not be a git repo\n")
563
+
564
+ if args.file_tree_out:
565
+ file_tree_path = Path(args.file_tree_out)
566
+ if not file_tree_path.is_absolute():
567
+ file_tree_path = repo_path / file_tree_path
568
+ write_filtered_file_tree(repo_path, file_tree_path.resolve())
569
+
570
+ # Process CLI customization arguments
571
+ cli_ext_override, cli_query_override, cli_warnings, cli_custom_query_languages = _apply_cli_customizations(
572
+ args.add_extensions,
573
+ args.add_queries,
574
+ )
575
+
576
+ # Load and merge config
577
+ (
578
+ extension_map,
579
+ lang_queries,
580
+ known_unsupported_extensions,
581
+ config_warnings,
582
+ loaded_config_paths,
583
+ custom_query_languages,
584
+ ) = _load_language_customizations(
585
+ repo_path,
586
+ args.language_config,
587
+ cli_ext_override,
588
+ cli_query_override,
589
+ cli_warnings,
590
+ cli_custom_query_languages,
591
+ )
592
+
593
+ languages = _load_languages(extension_map, lang_queries)
594
+ (
595
+ source_files,
596
+ supported_file_counts,
597
+ known_unsupported_file_counts,
598
+ configured_but_unavailable_file_counts,
599
+ ) = collect_source_files(
600
+ repo_path,
601
+ languages,
602
+ extension_map,
603
+ known_unsupported_extensions,
604
+ )
605
+
606
+ if not source_files:
607
+ sys.stderr.write(f"[WARNING] No supported source files found in {repo_path}\n")
608
+
609
+ all_nodes: list[dict] = []
610
+ all_edges: list[dict] = []
611
+ all_errors: list[str] = []
612
+ detected_langs: set[str] = set()
613
+ total_lines = 0
614
+ warnings: list[str] = list(config_warnings)
615
+ module_only_file_counts: dict[str, int] = {}
616
+ languages_with_structural_queries = sorted(
617
+ lang for lang, query_parts in lang_queries.items()
618
+ if query_parts.get('struct', '').strip()
619
+ )
620
+
621
+ for file_path, lang_name in source_files:
622
+ nodes, edges, errors = extract_file(
623
+ repo_path,
624
+ file_path,
625
+ lang_name,
626
+ languages[lang_name],
627
+ lang_queries,
628
+ )
629
+ all_nodes.extend(nodes)
630
+ all_edges.extend(edges)
631
+ all_errors.extend(errors)
632
+ if lang_name not in languages_with_structural_queries:
633
+ module_only_file_counts[lang_name] = module_only_file_counts.get(lang_name, 0) + 1
634
+ if nodes:
635
+ detected_langs.add(lang_name)
636
+ total_lines += nodes[0].get('lines', 0)
637
+
638
+ final_nodes, final_edges, truncated, truncated_count = apply_max_nodes(
639
+ all_nodes, all_edges, args.max_nodes
640
+ )
641
+
642
+ if known_unsupported_file_counts:
643
+ unsupported_summary = ', '.join(
644
+ f"{lang} ({count} files)"
645
+ for lang, count in sorted(known_unsupported_file_counts.items())
646
+ )
647
+ warnings.append(
648
+ "known unsupported languages present; downstream outputs must mark inferred sections explicitly: "
649
+ f"{unsupported_summary}"
650
+ )
651
+
652
+ if configured_but_unavailable_file_counts:
653
+ unavailable_summary = ', '.join(
654
+ f"{lang} ({count} files)"
655
+ for lang, count in sorted(configured_but_unavailable_file_counts.items())
656
+ )
657
+ warnings.append(
658
+ 'some configured languages were detected in source files but no parser could be loaded: '
659
+ f'{unavailable_summary}'
660
+ )
661
+
662
+ if module_only_file_counts:
663
+ module_only_summary = ', '.join(
664
+ f"{lang} ({count} files)"
665
+ for lang, count in sorted(module_only_file_counts.items())
666
+ )
667
+ warnings.append(
668
+ "some languages were parsed with module-only coverage because no structural query template is bundled: "
669
+ f"{module_only_summary}"
670
+ )
671
+
672
+ if loaded_config_paths:
673
+ config_summary = ', '.join(loaded_config_paths)
674
+ warnings.append(f'custom language configuration loaded: {config_summary}')
675
+
676
+ result = {
677
+ 'languages': sorted(detected_langs),
678
+ 'stats': {
679
+ 'total_files': len(source_files),
680
+ 'total_lines': total_lines,
681
+ 'parse_errors': len(all_errors),
682
+ 'truncated': truncated,
683
+ 'truncated_nodes': truncated_count,
684
+ 'supported_file_counts': supported_file_counts,
685
+ 'languages_with_structural_queries': languages_with_structural_queries,
686
+ 'languages_with_custom_queries': sorted(custom_query_languages.keys()),
687
+ 'module_only_file_counts': module_only_file_counts,
688
+ 'known_unsupported_file_counts': known_unsupported_file_counts,
689
+ 'configured_but_unavailable_file_counts': configured_but_unavailable_file_counts,
690
+ 'custom_language_config_paths': loaded_config_paths,
691
+ },
692
+ 'nodes': final_nodes,
693
+ 'edges': final_edges,
694
+ }
695
+
696
+ if all_errors:
697
+ result['_errors'] = all_errors[:20]
698
+ if warnings:
699
+ result['warnings'] = warnings
700
+
701
+ print(json.dumps(result, ensure_ascii=False, indent=2))
702
+
703
+
704
+ if __name__ == '__main__':
705
+ main()
706
+