ruth-code 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. frontend/dist/assets/geist-mono-cyrillic-400-normal-BPBWmzPh.woff +0 -0
  2. frontend/dist/assets/geist-mono-cyrillic-400-normal-Ce5q_31Z.woff2 +0 -0
  3. frontend/dist/assets/geist-mono-cyrillic-500-normal-CJBLNVQT.woff2 +0 -0
  4. frontend/dist/assets/geist-mono-cyrillic-500-normal-mNhfPmgl.woff +0 -0
  5. frontend/dist/assets/geist-mono-cyrillic-600-normal-CGND36d7.woff2 +0 -0
  6. frontend/dist/assets/geist-mono-cyrillic-600-normal-DrylrLu6.woff +0 -0
  7. frontend/dist/assets/geist-mono-cyrillic-700-normal-DH5Q319x.woff +0 -0
  8. frontend/dist/assets/geist-mono-cyrillic-700-normal-VCNRadI3.woff2 +0 -0
  9. frontend/dist/assets/geist-mono-latin-400-normal-CoULgQGM.woff +0 -0
  10. frontend/dist/assets/geist-mono-latin-400-normal-LC9RFr9I.woff2 +0 -0
  11. frontend/dist/assets/geist-mono-latin-500-normal-D3o2eNa9.woff2 +0 -0
  12. frontend/dist/assets/geist-mono-latin-500-normal-DOxI7kZ4.woff +0 -0
  13. frontend/dist/assets/geist-mono-latin-600-normal-DQQBcVN0.woff2 +0 -0
  14. frontend/dist/assets/geist-mono-latin-600-normal-DsVeri3b.woff +0 -0
  15. frontend/dist/assets/geist-mono-latin-700-normal-D6izGJRP.woff2 +0 -0
  16. frontend/dist/assets/geist-mono-latin-700-normal-QGw08Lff.woff +0 -0
  17. frontend/dist/assets/geist-mono-latin-ext-400-normal-Cgks_Qgx.woff2 +0 -0
  18. frontend/dist/assets/geist-mono-latin-ext-400-normal-CxNRRMGd.woff +0 -0
  19. frontend/dist/assets/geist-mono-latin-ext-500-normal-CQcGuCNt.woff2 +0 -0
  20. frontend/dist/assets/geist-mono-latin-ext-500-normal-diTenJ8L.woff +0 -0
  21. frontend/dist/assets/geist-mono-latin-ext-600-normal-CJwYYto2.woff2 +0 -0
  22. frontend/dist/assets/geist-mono-latin-ext-600-normal-EvIRCXgu.woff +0 -0
  23. frontend/dist/assets/geist-mono-latin-ext-700-normal-BX9f1BHp.woff +0 -0
  24. frontend/dist/assets/geist-mono-latin-ext-700-normal-YOllDaLV.woff2 +0 -0
  25. frontend/dist/assets/index-AEO_WTHY.js +59 -0
  26. frontend/dist/assets/index-JUssvikZ.css +1 -0
  27. frontend/dist/assets/inter-cyrillic-400-normal-HOLc17fK.woff +0 -0
  28. frontend/dist/assets/inter-cyrillic-400-normal-obahsSVq.woff2 +0 -0
  29. frontend/dist/assets/inter-cyrillic-500-normal-BasfLYem.woff2 +0 -0
  30. frontend/dist/assets/inter-cyrillic-500-normal-CxZf_p3X.woff +0 -0
  31. frontend/dist/assets/inter-cyrillic-600-normal-4D_pXhcN.woff +0 -0
  32. frontend/dist/assets/inter-cyrillic-600-normal-CWCymEST.woff2 +0 -0
  33. frontend/dist/assets/inter-cyrillic-700-normal-CjBOestx.woff2 +0 -0
  34. frontend/dist/assets/inter-cyrillic-700-normal-DrXBdSj3.woff +0 -0
  35. frontend/dist/assets/inter-cyrillic-ext-400-normal-BQZuk6qB.woff2 +0 -0
  36. frontend/dist/assets/inter-cyrillic-ext-400-normal-DQukG94-.woff +0 -0
  37. frontend/dist/assets/inter-cyrillic-ext-500-normal-B0yAr1jD.woff2 +0 -0
  38. frontend/dist/assets/inter-cyrillic-ext-500-normal-BmqWE9Dz.woff +0 -0
  39. frontend/dist/assets/inter-cyrillic-ext-600-normal-Bcila6Z-.woff +0 -0
  40. frontend/dist/assets/inter-cyrillic-ext-600-normal-Dfes3d0z.woff2 +0 -0
  41. frontend/dist/assets/inter-cyrillic-ext-700-normal-BjwYoWNd.woff2 +0 -0
  42. frontend/dist/assets/inter-cyrillic-ext-700-normal-LO58E6JB.woff +0 -0
  43. frontend/dist/assets/inter-greek-400-normal-B4URO6DV.woff2 +0 -0
  44. frontend/dist/assets/inter-greek-400-normal-q2sYcFCs.woff +0 -0
  45. frontend/dist/assets/inter-greek-500-normal-BIZE56-Y.woff2 +0 -0
  46. frontend/dist/assets/inter-greek-500-normal-Xzm54t5V.woff +0 -0
  47. frontend/dist/assets/inter-greek-600-normal-BZpKdvQh.woff +0 -0
  48. frontend/dist/assets/inter-greek-600-normal-plRanbMR.woff2 +0 -0
  49. frontend/dist/assets/inter-greek-700-normal-BUv2fZ6O.woff +0 -0
  50. frontend/dist/assets/inter-greek-700-normal-C3JjAnD8.woff2 +0 -0
  51. frontend/dist/assets/inter-greek-ext-400-normal-DGGRlc-M.woff2 +0 -0
  52. frontend/dist/assets/inter-greek-ext-400-normal-KugGGMne.woff +0 -0
  53. frontend/dist/assets/inter-greek-ext-500-normal-2j5mBUwD.woff +0 -0
  54. frontend/dist/assets/inter-greek-ext-500-normal-C4iEst2y.woff2 +0 -0
  55. frontend/dist/assets/inter-greek-ext-600-normal-B8X0CLgF.woff +0 -0
  56. frontend/dist/assets/inter-greek-ext-600-normal-DRtmH8MT.woff2 +0 -0
  57. frontend/dist/assets/inter-greek-ext-700-normal-BoQ6DsYi.woff +0 -0
  58. frontend/dist/assets/inter-greek-ext-700-normal-qfdV9bQt.woff2 +0 -0
  59. frontend/dist/assets/inter-latin-400-normal-C38fXH4l.woff2 +0 -0
  60. frontend/dist/assets/inter-latin-400-normal-CyCys3Eg.woff +0 -0
  61. frontend/dist/assets/inter-latin-500-normal-BL9OpVg8.woff +0 -0
  62. frontend/dist/assets/inter-latin-500-normal-Cerq10X2.woff2 +0 -0
  63. frontend/dist/assets/inter-latin-600-normal-CiBQ2DWP.woff +0 -0
  64. frontend/dist/assets/inter-latin-600-normal-LgqL8muc.woff2 +0 -0
  65. frontend/dist/assets/inter-latin-700-normal-BLAVimhd.woff +0 -0
  66. frontend/dist/assets/inter-latin-700-normal-Yt3aPRUw.woff2 +0 -0
  67. frontend/dist/assets/inter-latin-ext-400-normal-77YHD8bZ.woff +0 -0
  68. frontend/dist/assets/inter-latin-ext-400-normal-C1nco2VV.woff2 +0 -0
  69. frontend/dist/assets/inter-latin-ext-500-normal-BxGbmqWO.woff +0 -0
  70. frontend/dist/assets/inter-latin-ext-500-normal-CV4jyFjo.woff2 +0 -0
  71. frontend/dist/assets/inter-latin-ext-600-normal-CIVaiw4L.woff +0 -0
  72. frontend/dist/assets/inter-latin-ext-600-normal-D2bJ5OIk.woff2 +0 -0
  73. frontend/dist/assets/inter-latin-ext-700-normal-Ca8adRJv.woff2 +0 -0
  74. frontend/dist/assets/inter-latin-ext-700-normal-TidjK2hL.woff +0 -0
  75. frontend/dist/assets/inter-vietnamese-400-normal-Bbgyi5SW.woff +0 -0
  76. frontend/dist/assets/inter-vietnamese-400-normal-DMkecbls.woff2 +0 -0
  77. frontend/dist/assets/inter-vietnamese-500-normal-DOriooB6.woff2 +0 -0
  78. frontend/dist/assets/inter-vietnamese-500-normal-mJboJaSs.woff +0 -0
  79. frontend/dist/assets/inter-vietnamese-600-normal-BuLX-rYi.woff +0 -0
  80. frontend/dist/assets/inter-vietnamese-600-normal-Cc8MFFhd.woff2 +0 -0
  81. frontend/dist/assets/inter-vietnamese-700-normal-BZaoP0fm.woff +0 -0
  82. frontend/dist/assets/inter-vietnamese-700-normal-DlLaEgI2.woff2 +0 -0
  83. frontend/dist/favicon.svg +1 -0
  84. frontend/dist/icons.svg +24 -0
  85. frontend/dist/index.html +15 -0
  86. frontend/dist/logo.svg +1 -0
  87. ruth/__init__.py +3 -0
  88. ruth/annotations/__init__.py +1 -0
  89. ruth/annotations/complexity.py +128 -0
  90. ruth/annotations/coverage.py +106 -0
  91. ruth/cli.py +167 -0
  92. ruth/graph/__init__.py +1 -0
  93. ruth/graph/engine.py +383 -0
  94. ruth/parser/__init__.py +1 -0
  95. ruth/parser/discovery.py +226 -0
  96. ruth/parser/symbols.py +656 -0
  97. ruth/server.py +162 -0
  98. ruth_code-0.1.0.dist-info/METADATA +106 -0
  99. ruth_code-0.1.0.dist-info/RECORD +102 -0
  100. ruth_code-0.1.0.dist-info/WHEEL +4 -0
  101. ruth_code-0.1.0.dist-info/entry_points.txt +2 -0
  102. ruth_code-0.1.0.dist-info/licenses/LICENSE +21 -0
ruth/graph/engine.py ADDED
@@ -0,0 +1,383 @@
1
+ """Graph engine — builds React Flow-compatible nodes and edges from parsed symbols.
2
+
3
+ Takes the output of the parser layer (discovered files + extracted symbols)
4
+ and produces a graph structure matching the frontend TypeScript types exactly.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import hashlib
10
+ from pathlib import Path
11
+ from typing import Any
12
+
13
+ from ruth.parser.discovery import SourceFile, DiscoveryResult
14
+ from ruth.parser.symbols import FileSymbols, parse_file
15
+ from ruth.annotations.complexity import compute_complexity
16
+
17
+
18
+ def _node_id(kind: str, name: str, file_path: str) -> str:
19
+ """Generate a stable, unique node ID."""
20
+ raw = f"{kind}:{file_path}:{name}"
21
+ return hashlib.sha1(raw.encode()).hexdigest()[:12]
22
+
23
+
24
+ def _edge_id(source: str, target: str, kind: str) -> str:
25
+ """Generate a stable edge ID."""
26
+ raw = f"{kind}:{source}->{target}"
27
+ return hashlib.sha1(raw.encode()).hexdigest()[:12]
28
+
29
+
30
+ def build_graph(
31
+ discovery: DiscoveryResult,
32
+ project_root: Path,
33
+ granularity: str = "module", # module | class | function
34
+ ) -> dict[str, Any]:
35
+ """Build the full graph from discovery results.
36
+
37
+ Args:
38
+ discovery: Result from file discovery.
39
+ project_root: Root path of the project.
40
+ granularity: Level of detail for nodes.
41
+
42
+ Returns:
43
+ Dict matching the frontend FullGraphPayload type.
44
+ """
45
+ nodes: list[dict[str, Any]] = []
46
+ edges: list[dict[str, Any]] = []
47
+ seen_nodes: dict[str, str] = {} # key -> node_id
48
+ module_symbols: dict[str, FileSymbols] = {} # rel_path -> symbols
49
+
50
+ # Phase 1: Parse all files and create module nodes
51
+ for source_file in discovery.files:
52
+ symbols = parse_file(source_file.content, source_file.language)
53
+ module_symbols[source_file.relative_path] = symbols
54
+
55
+ # Create module node
56
+ node_id = _node_id("module", source_file.relative_path, source_file.relative_path)
57
+ seen_nodes[source_file.relative_path] = node_id
58
+
59
+ complexity = compute_complexity(source_file.content, source_file.language)
60
+
61
+ module_node = {
62
+ "id": node_id,
63
+ "type": "module",
64
+ "position": {"x": 0, "y": 0},
65
+ "data": {
66
+ "label": Path(source_file.relative_path).stem,
67
+ "kind": "module",
68
+ "language": source_file.language,
69
+ "filePath": source_file.relative_path,
70
+ "annotations": {
71
+ "complexity": complexity,
72
+ "vulnerabilities": [],
73
+ "coverage": None,
74
+ },
75
+ "exportCount": symbols.export_count,
76
+ "importCount": len(symbols.imports),
77
+ "lineCount": source_file.line_count,
78
+ },
79
+ }
80
+ nodes.append(module_node)
81
+
82
+ # Phase 1b: Create class and function nodes if granularity allows
83
+ if granularity in ("class", "function"):
84
+ for cls in symbols.classes:
85
+ cls_id = _node_id("class", cls.name, source_file.relative_path)
86
+ seen_nodes[f"{source_file.relative_path}::{cls.name}"] = cls_id
87
+ cls_node = {
88
+ "id": cls_id,
89
+ "type": "class",
90
+ "position": {"x": 0, "y": 0},
91
+ "data": {
92
+ "label": cls.name,
93
+ "kind": "class",
94
+ "language": source_file.language,
95
+ "filePath": source_file.relative_path,
96
+ "annotations": {
97
+ "complexity": None,
98
+ "vulnerabilities": [],
99
+ "coverage": None,
100
+ },
101
+ "methodCount": len(cls.methods),
102
+ "propertyCount": len(cls.properties),
103
+ "parentClass": cls.parent_class,
104
+ },
105
+ }
106
+ nodes.append(cls_node)
107
+ # Edge from module to class
108
+ edges.append({
109
+ "id": _edge_id(node_id, cls_id, "contains"),
110
+ "source": node_id,
111
+ "target": cls_id,
112
+ "type": "import",
113
+ "data": {"edgeKind": "import"},
114
+ })
115
+
116
+ if granularity == "function":
117
+ for func in symbols.functions:
118
+ func_id = _node_id("function", func.name, source_file.relative_path)
119
+ seen_nodes[f"{source_file.relative_path}::{func.name}"] = func_id
120
+ func_node = {
121
+ "id": func_id,
122
+ "type": "function",
123
+ "position": {"x": 0, "y": 0},
124
+ "data": {
125
+ "label": func.name,
126
+ "kind": "function",
127
+ "language": source_file.language,
128
+ "filePath": source_file.relative_path,
129
+ "annotations": {
130
+ "complexity": None,
131
+ "vulnerabilities": [],
132
+ "coverage": None,
133
+ },
134
+ "params": func.params,
135
+ "returnType": func.return_type,
136
+ "isAsync": func.is_async,
137
+ "isExported": func.is_exported,
138
+ },
139
+ }
140
+ nodes.append(func_node)
141
+
142
+ # Phase 2: Create directory supernodes
143
+ for directory in sorted(discovery.directories):
144
+ dir_id = _node_id("directory", directory, directory)
145
+ child_count = sum(
146
+ 1 for f in discovery.files if f.directory == directory
147
+ )
148
+ dir_node = {
149
+ "id": dir_id,
150
+ "type": "directory",
151
+ "position": {"x": 0, "y": 0},
152
+ "data": {
153
+ "label": Path(directory).name,
154
+ "kind": "directory",
155
+ "language": "unknown",
156
+ "filePath": directory,
157
+ "annotations": {
158
+ "complexity": None,
159
+ "vulnerabilities": [],
160
+ "coverage": None,
161
+ },
162
+ "childCount": child_count,
163
+ "isExpanded": True,
164
+ },
165
+ }
166
+ nodes.append(dir_node)
167
+ seen_nodes[f"dir:{directory}"] = dir_id
168
+
169
+ # Phase 3: Create import edges between modules
170
+ for source_file in discovery.files:
171
+ symbols = module_symbols[source_file.relative_path]
172
+ source_id = seen_nodes[source_file.relative_path]
173
+
174
+ for imp in symbols.imports:
175
+ target_id = _resolve_import(
176
+ imp.module, source_file, discovery.files, seen_nodes
177
+ )
178
+ if target_id and target_id != source_id:
179
+ eid = _edge_id(source_id, target_id, "import")
180
+ # Accumulate weight for duplicate edges (traffic lanes)
181
+ existing = next((e for e in edges if e["id"] == eid), None)
182
+ if existing:
183
+ existing["data"]["weight"] = existing["data"].get("weight", 1) + 1
184
+ else:
185
+ edges.append({
186
+ "id": eid,
187
+ "source": source_id,
188
+ "target": target_id,
189
+ "type": "import",
190
+ "data": {"edgeKind": "import", "weight": 1},
191
+ })
192
+
193
+ # Phase 4: Create call edges (match calls to known functions)
194
+ if granularity == "function":
195
+ all_functions = {}
196
+ for source_file in discovery.files:
197
+ syms = module_symbols[source_file.relative_path]
198
+ for func in syms.functions:
199
+ key = f"{source_file.relative_path}::{func.name}"
200
+ if key in seen_nodes:
201
+ all_functions[func.name] = seen_nodes[key]
202
+
203
+ for source_file in discovery.files:
204
+ syms = module_symbols[source_file.relative_path]
205
+ for call in syms.calls:
206
+ # Simple name match
207
+ callee_name = call.callee.split(".")[-1]
208
+ if callee_name in all_functions:
209
+ source_func_id = None
210
+ # Try to find which function this call is in
211
+ for func in syms.functions:
212
+ if func.line <= call.line:
213
+ source_func_id = seen_nodes.get(
214
+ f"{source_file.relative_path}::{func.name}"
215
+ )
216
+ if source_func_id is None:
217
+ source_func_id = seen_nodes.get(source_file.relative_path)
218
+ target_func_id = all_functions[callee_name]
219
+ if source_func_id and target_func_id and source_func_id != target_func_id:
220
+ eid = _edge_id(source_func_id, target_func_id, "call")
221
+ if not any(e["id"] == eid for e in edges):
222
+ edges.append({
223
+ "id": eid,
224
+ "source": source_func_id,
225
+ "target": target_func_id,
226
+ "type": "call",
227
+ "data": {"edgeKind": "call"},
228
+ })
229
+
230
+ # Phase 5: Detect landmarks (POIs) — like Google Maps pins
231
+ _detect_landmarks(nodes, edges, module_symbols)
232
+
233
+ return {
234
+ "nodes": nodes,
235
+ "edges": edges,
236
+ "projectName": project_root.name,
237
+ "languages": sorted(discovery.languages),
238
+ }
239
+
240
+
241
+ # ── Entry-point filename patterns ────────────────────────────────────────
242
+ _ENTRYPOINT_STEMS = {
243
+ "main", "index", "app", "cli", "server", "manage", "wsgi", "asgi",
244
+ "__main__", "run", "start", "entry", "boot",
245
+ }
246
+
247
+ # Config-like filenames
248
+ _CONFIG_STEMS = {
249
+ "config", "settings", "configuration", "conf", "constants", "env",
250
+ "setup", "webpack.config", "vite.config", "tsconfig", "eslint.config",
251
+ "pyproject", "package",
252
+ }
253
+
254
+
255
+ def _detect_landmarks(
256
+ nodes: list[dict[str, Any]],
257
+ edges: list[dict[str, Any]],
258
+ module_symbols: dict[str, FileSymbols],
259
+ ) -> None:
260
+ """Detect and tag landmark nodes — the 'points of interest' of the codebase.
261
+
262
+ Roles:
263
+ entrypoint — main/index/cli files (like a Google Maps 'start here' pin)
264
+ orchestrator — files with the most outgoing imports (they wire everything together)
265
+ hub — files imported by the most other files (central dependencies)
266
+ config — configuration/settings files
267
+ island — files with zero connections (isolated, potential dead code)
268
+ """
269
+ module_nodes = [n for n in nodes if n["type"] == "module"]
270
+ if not module_nodes:
271
+ return
272
+
273
+ # Build adjacency counts
274
+ out_degree: dict[str, int] = {} # node_id -> number of outgoing edges
275
+ in_degree: dict[str, int] = {} # node_id -> number of incoming edges
276
+ for edge in edges:
277
+ if edge["data"]["edgeKind"] == "import":
278
+ out_degree[edge["source"]] = out_degree.get(edge["source"], 0) + 1
279
+ in_degree[edge["target"]] = in_degree.get(edge["target"], 0) + 1
280
+
281
+ # Thresholds for orchestrator/hub (top percentile)
282
+ all_out = sorted(out_degree.values()) if out_degree else [0]
283
+ all_in = sorted(in_degree.values()) if in_degree else [0]
284
+ orchestrator_threshold = all_out[int(len(all_out) * 0.85)] if len(all_out) > 2 else 3
285
+ hub_threshold = all_in[int(len(all_in) * 0.85)] if len(all_in) > 2 else 3
286
+ orchestrator_threshold = max(orchestrator_threshold, 3)
287
+ hub_threshold = max(hub_threshold, 3)
288
+
289
+ for node in module_nodes:
290
+ nid = node["id"]
291
+ file_path = node["data"]["filePath"]
292
+ stem = Path(file_path).stem.lower()
293
+ out = out_degree.get(nid, 0)
294
+ inp = in_degree.get(nid, 0)
295
+
296
+ roles: list[str] = []
297
+
298
+ # 1. Entry points — by filename convention
299
+ if stem in _ENTRYPOINT_STEMS:
300
+ roles.append("entrypoint")
301
+
302
+ # 2. Config files
303
+ if stem in _CONFIG_STEMS:
304
+ roles.append("config")
305
+
306
+ # 3. Orchestrators — files that import many others (wiring files)
307
+ if out >= orchestrator_threshold and out > inp:
308
+ roles.append("orchestrator")
309
+
310
+ # 4. Hubs — files imported by many others (core libraries)
311
+ if inp >= hub_threshold and inp >= out:
312
+ roles.append("hub")
313
+
314
+ # 5. Islands — zero connections
315
+ if out == 0 and inp == 0:
316
+ roles.append("island")
317
+
318
+ # Write into node data
319
+ node["data"]["roles"] = roles
320
+ node["data"]["inDegree"] = inp
321
+ node["data"]["outDegree"] = out
322
+
323
+ # Also tag directory nodes with empty roles
324
+ for node in nodes:
325
+ if node["type"] == "directory":
326
+ node["data"]["roles"] = []
327
+ node["data"]["inDegree"] = 0
328
+ node["data"]["outDegree"] = 0
329
+
330
+
331
+ def _resolve_import(
332
+ module_name: str,
333
+ source_file: SourceFile,
334
+ all_files: list[SourceFile],
335
+ seen_nodes: dict[str, str],
336
+ ) -> str | None:
337
+ """Try to resolve an import string to a known module node ID.
338
+
339
+ Handles relative imports and various module naming conventions.
340
+ """
341
+ # Normalize module name to possible filenames
342
+ candidates = []
343
+
344
+ # Direct match: "foo.bar" → "foo/bar.py", "foo/bar.ts", etc.
345
+ parts = module_name.replace(".", "/").replace("::", "/")
346
+ candidates.append(parts)
347
+
348
+ # Try with various extensions
349
+ for ext in (".py", ".ts", ".tsx", ".js", ".jsx", ".rs", ".go", ".java", ".rb"):
350
+ candidates.append(f"{parts}{ext}")
351
+ candidates.append(f"{parts}/index{ext}")
352
+ candidates.append(f"{parts}/mod{ext}")
353
+
354
+ # For relative imports, try relative to source file
355
+ source_dir = str(Path(source_file.relative_path).parent)
356
+ if source_dir != ".":
357
+ for ext in (".py", ".ts", ".tsx", ".js", ".jsx"):
358
+ candidates.append(f"{source_dir}/{parts}{ext}")
359
+ candidates.append(f"{source_dir}/{parts}/index{ext}")
360
+
361
+ # Try npm-style: @scope/package → node_modules/...
362
+ # For now just match by filename stem
363
+ module_stem = parts.split("/")[-1]
364
+
365
+ # Search seen_nodes
366
+ for candidate in candidates:
367
+ if candidate in seen_nodes:
368
+ return seen_nodes[candidate]
369
+
370
+ # Fuzzy: match by filename stem
371
+ for file_path, node_id in seen_nodes.items():
372
+ if file_path.startswith("dir:"):
373
+ continue
374
+ file_stem = Path(file_path).stem
375
+ if file_stem == module_stem:
376
+ return node_id
377
+ # Try "index" files in matching directory
378
+ if file_stem in ("index", "mod", "__init__"):
379
+ file_dir = Path(file_path).parent.name
380
+ if file_dir == module_stem:
381
+ return node_id
382
+
383
+ return None
@@ -0,0 +1 @@
1
+ """Ruth parser package — file discovery, language detection, AST extraction."""
@@ -0,0 +1,226 @@
1
+ """File discovery and language detection for Ruth.
2
+
3
+ Walks a project directory, respects gitignore, detects languages, and returns
4
+ a list of source files to parse.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import os
10
+ from pathlib import Path
11
+ from dataclasses import dataclass, field
12
+ from typing import Iterator
13
+
14
+ # ── Language detection by file extension ────────────────────────────────
15
+
16
+ EXTENSION_MAP: dict[str, str] = {
17
+ ".py": "python",
18
+ ".pyi": "python",
19
+ ".ts": "typescript",
20
+ ".tsx": "typescript",
21
+ ".js": "javascript",
22
+ ".jsx": "javascript",
23
+ ".mjs": "javascript",
24
+ ".cjs": "javascript",
25
+ ".rs": "rust",
26
+ ".go": "go",
27
+ ".java": "java",
28
+ ".rb": "ruby",
29
+ ".c": "c",
30
+ ".h": "c",
31
+ ".cpp": "cpp",
32
+ ".cc": "cpp",
33
+ ".cxx": "cpp",
34
+ ".hpp": "cpp",
35
+ }
36
+
37
+ # Directories to always skip
38
+ SKIP_DIRS: set[str] = {
39
+ ".git", ".svn", ".hg",
40
+ "node_modules", "__pycache__", ".mypy_cache", ".pytest_cache",
41
+ ".tox", ".nox", ".venv", "venv", "env",
42
+ "dist", "build", ".next", ".nuxt",
43
+ "target", # Rust
44
+ "vendor", # Go
45
+ ".cargo",
46
+ ".eggs", "*.egg-info",
47
+ "coverage", ".coverage",
48
+ ".idea", ".vscode",
49
+ }
50
+
51
+ # Files to skip
52
+ SKIP_FILES: set[str] = {
53
+ "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
54
+ "Cargo.lock", "go.sum", "poetry.lock",
55
+ }
56
+
57
+ # Max file size to parse (500KB — skip minified bundles etc.)
58
+ MAX_FILE_SIZE = 500_000
59
+
60
+
61
+ @dataclass
62
+ class SourceFile:
63
+ """A source file discovered in the project."""
64
+ path: Path
65
+ relative_path: str # relative to project root
66
+ language: str
67
+ size: int
68
+ line_count: int = 0
69
+ content: str = ""
70
+
71
+ @property
72
+ def directory(self) -> str:
73
+ """Parent directory relative path."""
74
+ parent = str(Path(self.relative_path).parent)
75
+ return parent if parent != "." else ""
76
+
77
+
78
+ @dataclass
79
+ class DiscoveryResult:
80
+ """Results of project file discovery."""
81
+ files: list[SourceFile] = field(default_factory=list)
82
+ languages: set[str] = field(default_factory=set)
83
+ directories: set[str] = field(default_factory=set)
84
+ total_lines: int = 0
85
+ skipped: int = 0
86
+
87
+
88
+ def detect_language(path: Path) -> str | None:
89
+ """Detect language from file extension."""
90
+ return EXTENSION_MAP.get(path.suffix.lower())
91
+
92
+
93
+ def should_skip_dir(name: str) -> bool:
94
+ """Check if a directory should be skipped."""
95
+ return name in SKIP_DIRS or name.startswith(".")
96
+
97
+
98
+ def should_skip_file(name: str, size: int) -> bool:
99
+ """Check if a file should be skipped."""
100
+ if name in SKIP_FILES:
101
+ return True
102
+ if size > MAX_FILE_SIZE:
103
+ return True
104
+ return False
105
+
106
+
107
+ def _parse_gitignore(project_root: Path) -> list[str]:
108
+ """Parse .gitignore patterns (simple implementation)."""
109
+ gitignore = project_root / ".gitignore"
110
+ if not gitignore.exists():
111
+ return []
112
+ patterns = []
113
+ for line in gitignore.read_text(errors="ignore").splitlines():
114
+ line = line.strip()
115
+ if line and not line.startswith("#"):
116
+ patterns.append(line)
117
+ return patterns
118
+
119
+
120
+ def _matches_gitignore(rel_path: str, patterns: list[str]) -> bool:
121
+ """Simple gitignore matching (directory and file patterns)."""
122
+ parts = rel_path.split(os.sep)
123
+ for pattern in patterns:
124
+ clean = pattern.rstrip("/")
125
+ # Directory match
126
+ if clean in parts:
127
+ return True
128
+ # Simple glob suffix match
129
+ if clean.startswith("*") and rel_path.endswith(clean[1:]):
130
+ return True
131
+ # Exact match
132
+ if rel_path == clean:
133
+ return True
134
+ return False
135
+
136
+
137
+ def discover_files(
138
+ project_root: Path,
139
+ max_files: int = 5000,
140
+ ) -> DiscoveryResult:
141
+ """Walk the project directory and discover parseable source files.
142
+
143
+ Args:
144
+ project_root: Root directory to scan.
145
+ max_files: Max files to process (safety limit for huge repos).
146
+
147
+ Returns:
148
+ DiscoveryResult with all discovered source files.
149
+ """
150
+ result = DiscoveryResult()
151
+ project_root = project_root.resolve()
152
+ gitignore_patterns = _parse_gitignore(project_root)
153
+
154
+ for dirpath, dirnames, filenames in os.walk(project_root, topdown=True):
155
+ # Filter out skipped directories in-place
156
+ dirnames[:] = [
157
+ d for d in dirnames
158
+ if not should_skip_dir(d)
159
+ ]
160
+
161
+ rel_dir = os.path.relpath(dirpath, project_root)
162
+
163
+ # Check gitignore for directory
164
+ if rel_dir != "." and _matches_gitignore(rel_dir, gitignore_patterns):
165
+ dirnames.clear()
166
+ continue
167
+
168
+ for filename in filenames:
169
+ if len(result.files) >= max_files:
170
+ result.skipped += 1
171
+ continue
172
+
173
+ filepath = Path(dirpath) / filename
174
+ rel_path = os.path.relpath(filepath, project_root)
175
+
176
+ # Skip by gitignore
177
+ if _matches_gitignore(rel_path, gitignore_patterns):
178
+ result.skipped += 1
179
+ continue
180
+
181
+ # Detect language
182
+ language = detect_language(filepath)
183
+ if language is None:
184
+ continue
185
+
186
+ # Skip large/binary files
187
+ try:
188
+ size = filepath.stat().st_size
189
+ except OSError:
190
+ continue
191
+
192
+ if should_skip_file(filename, size):
193
+ result.skipped += 1
194
+ continue
195
+
196
+ # Read content
197
+ try:
198
+ content = filepath.read_text(encoding="utf-8", errors="ignore")
199
+ except (OSError, UnicodeDecodeError):
200
+ result.skipped += 1
201
+ continue
202
+
203
+ line_count = content.count("\n") + (1 if content and not content.endswith("\n") else 0)
204
+
205
+ source_file = SourceFile(
206
+ path=filepath,
207
+ relative_path=rel_path,
208
+ language=language,
209
+ size=size,
210
+ line_count=line_count,
211
+ content=content,
212
+ )
213
+
214
+ result.files.append(source_file)
215
+ result.languages.add(language)
216
+ result.total_lines += line_count
217
+
218
+ # Track directories
219
+ directory = source_file.directory
220
+ if directory:
221
+ # Add all parent directories too
222
+ parts = Path(directory).parts
223
+ for i in range(len(parts)):
224
+ result.directories.add(str(Path(*parts[: i + 1])))
225
+
226
+ return result