codedocent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- codedocent/__init__.py +1 -0
- codedocent/__main__.py +4 -0
- codedocent/analyzer.py +620 -0
- codedocent/cli.py +132 -0
- codedocent/editor.py +85 -0
- codedocent/parser.py +369 -0
- codedocent/renderer.py +79 -0
- codedocent/scanner.py +135 -0
- codedocent/server.py +304 -0
- codedocent/templates/base.html +538 -0
- codedocent/templates/interactive.html +1032 -0
- codedocent-0.1.0.dist-info/METADATA +16 -0
- codedocent-0.1.0.dist-info/RECORD +17 -0
- codedocent-0.1.0.dist-info/WHEEL +5 -0
- codedocent-0.1.0.dist-info/entry_points.txt +2 -0
- codedocent-0.1.0.dist-info/licenses/LICENSE +21 -0
- codedocent-0.1.0.dist-info/top_level.txt +1 -0
codedocent/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""codedocent — code visualization for non-programmers."""
|
codedocent/__main__.py
ADDED
codedocent/analyzer.py
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
1
|
+
"""AI-powered analysis: summaries, pseudocode, quality scoring, and caching."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
13
|
+
|
|
14
|
+
from codedocent.parser import CodeNode
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import ollama
|
|
18
|
+
except ImportError:
|
|
19
|
+
ollama = None # type: ignore[assignment]
|
|
20
|
+
|
|
21
|
+
CACHE_FILENAME = ".codedocent_cache.json"
|
|
22
|
+
MAX_SOURCE_LINES = 200
|
|
23
|
+
MIN_LINES_FOR_AI = 3
|
|
24
|
+
|
|
25
|
+
# Quality scoring thresholds: (yellow_threshold, red_threshold)
|
|
26
|
+
# yellow = "complex", red = "warning"
|
|
27
|
+
LINE_THRESHOLDS: dict[str, tuple[int, int]] = {
|
|
28
|
+
"function": (50, 100),
|
|
29
|
+
"method": (50, 100),
|
|
30
|
+
"file": (500, 1000),
|
|
31
|
+
"class": (300, 600),
|
|
32
|
+
}
|
|
33
|
+
PARAM_THRESHOLD = 5
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _count_nodes(node: CodeNode) -> int:
|
|
37
|
+
"""Recursive count of all nodes in tree."""
|
|
38
|
+
return 1 + sum(_count_nodes(c) for c in node.children)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _build_prompt(node: CodeNode, model: str = "") -> str:
|
|
42
|
+
"""Build the AI prompt for a given node."""
|
|
43
|
+
language = node.language or "unknown"
|
|
44
|
+
source = node.source
|
|
45
|
+
lines = source.splitlines()
|
|
46
|
+
if len(lines) > MAX_SOURCE_LINES:
|
|
47
|
+
source = "\n".join(lines[:MAX_SOURCE_LINES])
|
|
48
|
+
|
|
49
|
+
prompt = (
|
|
50
|
+
f"You are a code explainer for non-programmers. "
|
|
51
|
+
f"Given the following {language} code, provide:\n\n"
|
|
52
|
+
f"1. SUMMARY: A plain English explanation (1-3 sentences) "
|
|
53
|
+
f"that a "
|
|
54
|
+
f"non-programmer can understand. Explain WHAT it does "
|
|
55
|
+
f"and WHY, not HOW. "
|
|
56
|
+
f"Avoid jargon.\n\n"
|
|
57
|
+
f"2. PSEUDOCODE: A simplified pseudocode version using plain English "
|
|
58
|
+
f"function/variable names. Keep it short.\n\n"
|
|
59
|
+
f"Respond in exactly this format:\n"
|
|
60
|
+
f"SUMMARY: <your summary>\n"
|
|
61
|
+
f"PSEUDOCODE:\n"
|
|
62
|
+
f"<your pseudocode>\n\n"
|
|
63
|
+
f"Here is the code:\n"
|
|
64
|
+
f"```{language}\n"
|
|
65
|
+
f"{source}\n"
|
|
66
|
+
f"```"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if "qwen3" in model.lower():
|
|
70
|
+
prompt += "\n\n/no_think"
|
|
71
|
+
|
|
72
|
+
return prompt
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _strip_think_tags(text: str) -> str:
|
|
76
|
+
"""Remove <think>...</think> blocks from model output.
|
|
77
|
+
|
|
78
|
+
Handles variants: <think>, <|think|>, and unclosed tags.
|
|
79
|
+
"""
|
|
80
|
+
# Remove well-formed pairs (including <|think|> variants)
|
|
81
|
+
text = re.sub(r"<\|?think\|?>.*?<\|?/think\|?>", "", text, flags=re.DOTALL)
|
|
82
|
+
# Remove unclosed tags (tag to end of string)
|
|
83
|
+
text = re.sub(r"<\|?think\|?>.*", "", text, flags=re.DOTALL)
|
|
84
|
+
return text.strip()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _parse_ai_response(text: str) -> tuple[str, str]:
|
|
88
|
+
"""Parse SUMMARY and PSEUDOCODE from AI response text."""
|
|
89
|
+
summary = ""
|
|
90
|
+
pseudocode = ""
|
|
91
|
+
|
|
92
|
+
summary_match = re.search(
|
|
93
|
+
r"SUMMARY:\s*(.*?)(?=\nPSEUDOCODE:|$)", text, re.DOTALL
|
|
94
|
+
)
|
|
95
|
+
pseudocode_match = re.search(r"PSEUDOCODE:\s*(.*)", text, re.DOTALL)
|
|
96
|
+
|
|
97
|
+
if summary_match:
|
|
98
|
+
summary = summary_match.group(1).strip()
|
|
99
|
+
if pseudocode_match:
|
|
100
|
+
pseudocode = pseudocode_match.group(1).strip()
|
|
101
|
+
|
|
102
|
+
# Fallback: first line as summary if parsing failed
|
|
103
|
+
if not summary:
|
|
104
|
+
lines = text.strip().splitlines()
|
|
105
|
+
if lines:
|
|
106
|
+
summary = lines[0].strip()
|
|
107
|
+
|
|
108
|
+
return summary, pseudocode
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _summarize_with_ai(
|
|
112
|
+
node: CodeNode, model: str
|
|
113
|
+
) -> tuple[str, str]:
|
|
114
|
+
"""Call ollama to get summary and pseudocode for a node."""
|
|
115
|
+
prompt = _build_prompt(node, model)
|
|
116
|
+
response = ollama.chat(
|
|
117
|
+
model=model, messages=[{"role": "user", "content": prompt}]
|
|
118
|
+
)
|
|
119
|
+
raw = response.message.content or "" # pylint: disable=no-member
|
|
120
|
+
raw = _strip_think_tags(raw)
|
|
121
|
+
# Garbage response fallback: empty or very short after stripping
|
|
122
|
+
if not raw or len(raw) < 10:
|
|
123
|
+
return ("Could not generate summary", "")
|
|
124
|
+
summary, pseudocode = _parse_ai_response(raw)
|
|
125
|
+
# Final guard: if summary is empty or too short, replace it
|
|
126
|
+
if not summary or len(summary) < 5:
|
|
127
|
+
summary = "Could not generate summary"
|
|
128
|
+
return summary, pseudocode
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def _count_parameters(node: CodeNode) -> int:
|
|
132
|
+
"""Count parameters of a function/method using tree-sitter."""
|
|
133
|
+
if not node.source or not node.language:
|
|
134
|
+
return 0
|
|
135
|
+
|
|
136
|
+
import tree_sitter_language_pack as tslp # pylint: disable=import-outside-toplevel # noqa: E501
|
|
137
|
+
|
|
138
|
+
try:
|
|
139
|
+
parser = tslp.get_parser(node.language) # type: ignore[arg-type]
|
|
140
|
+
except (KeyError, ValueError):
|
|
141
|
+
return 0
|
|
142
|
+
|
|
143
|
+
tree = parser.parse(node.source.encode())
|
|
144
|
+
root = tree.root_node
|
|
145
|
+
|
|
146
|
+
# Find the parameters / formal_parameters node
|
|
147
|
+
param_node = None
|
|
148
|
+
|
|
149
|
+
def _find_params(n):
|
|
150
|
+
nonlocal param_node
|
|
151
|
+
if param_node is not None:
|
|
152
|
+
return
|
|
153
|
+
if n.type in ("parameters", "formal_parameters"):
|
|
154
|
+
param_node = n
|
|
155
|
+
return
|
|
156
|
+
for child in n.children:
|
|
157
|
+
_find_params(child)
|
|
158
|
+
|
|
159
|
+
_find_params(root)
|
|
160
|
+
if param_node is None:
|
|
161
|
+
return 0
|
|
162
|
+
|
|
163
|
+
count = 0
|
|
164
|
+
for child in param_node.children:
|
|
165
|
+
# Skip punctuation like ( ) ,
|
|
166
|
+
if child.type in ("(", ")", ","):
|
|
167
|
+
continue
|
|
168
|
+
# For Python, skip self/cls
|
|
169
|
+
if node.language == "python":
|
|
170
|
+
text = child.text.decode() if child.text else ""
|
|
171
|
+
if text in ("self", "cls"):
|
|
172
|
+
continue
|
|
173
|
+
count += 1
|
|
174
|
+
|
|
175
|
+
return count
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _worst_quality(a: str, b: str) -> str:
|
|
179
|
+
"""Return the worse of two quality labels."""
|
|
180
|
+
order = {"clean": 0, "complex": 1, "warning": 2}
|
|
181
|
+
return a if order.get(a, 0) >= order.get(b, 0) else b
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def _score_quality(
|
|
185
|
+
node: CodeNode,
|
|
186
|
+
) -> tuple[str | None, list[str] | None]:
|
|
187
|
+
"""Score code quality using radon and heuristics.
|
|
188
|
+
|
|
189
|
+
Returns (quality, warnings) where quality is 'clean', 'complex',
|
|
190
|
+
or 'warning', and warnings is a list of warning strings.
|
|
191
|
+
For directories, returns (None, None).
|
|
192
|
+
"""
|
|
193
|
+
if node.node_type == "directory":
|
|
194
|
+
return None, None
|
|
195
|
+
|
|
196
|
+
warnings: list[str] = []
|
|
197
|
+
quality = "clean"
|
|
198
|
+
|
|
199
|
+
# Radon complexity for Python
|
|
200
|
+
if node.language == "python" and node.source:
|
|
201
|
+
try:
|
|
202
|
+
from radon.complexity import cc_visit, cc_rank # type: ignore[import-untyped] # pylint: disable=import-outside-toplevel # noqa: E501
|
|
203
|
+
|
|
204
|
+
blocks = cc_visit(node.source)
|
|
205
|
+
if blocks:
|
|
206
|
+
worst = max(b.complexity for b in blocks)
|
|
207
|
+
rank = cc_rank(worst)
|
|
208
|
+
if rank in ("A", "B"):
|
|
209
|
+
pass # clean
|
|
210
|
+
elif rank == "C":
|
|
211
|
+
quality = _worst_quality(quality, "complex")
|
|
212
|
+
warnings.append(
|
|
213
|
+
f"Moderate complexity (grade {rank},"
|
|
214
|
+
f" score {worst})"
|
|
215
|
+
)
|
|
216
|
+
else:
|
|
217
|
+
quality = _worst_quality(quality, "warning")
|
|
218
|
+
warnings.append(
|
|
219
|
+
f"High complexity (grade {rank},"
|
|
220
|
+
f" score {worst})"
|
|
221
|
+
)
|
|
222
|
+
except (ImportError, AttributeError): # nosec B110
|
|
223
|
+
pass
|
|
224
|
+
|
|
225
|
+
# Line-count check (two-tier: yellow/red)
|
|
226
|
+
thresholds = LINE_THRESHOLDS.get(node.node_type)
|
|
227
|
+
if thresholds and node.line_count:
|
|
228
|
+
yellow, red = thresholds
|
|
229
|
+
if node.line_count > red:
|
|
230
|
+
quality = _worst_quality(quality, "warning")
|
|
231
|
+
warnings.append(
|
|
232
|
+
f"This {node.node_type} is"
|
|
233
|
+
f" {node.line_count} lines long"
|
|
234
|
+
)
|
|
235
|
+
elif node.line_count > yellow:
|
|
236
|
+
quality = _worst_quality(quality, "complex")
|
|
237
|
+
warnings.append(f"Long {node.node_type}: {node.line_count} lines")
|
|
238
|
+
|
|
239
|
+
# Heuristic: many parameters
|
|
240
|
+
if node.node_type in ("function", "method"):
|
|
241
|
+
param_count = _count_parameters(node)
|
|
242
|
+
if param_count > PARAM_THRESHOLD:
|
|
243
|
+
quality = _worst_quality(quality, "complex")
|
|
244
|
+
warnings.append("Many parameters: consider grouping")
|
|
245
|
+
|
|
246
|
+
return quality, warnings if warnings else None
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def _summarize_directory(node: CodeNode) -> None:
|
|
250
|
+
"""Synthesize a directory summary from children. No AI needed."""
|
|
251
|
+
if node.node_type != "directory":
|
|
252
|
+
return
|
|
253
|
+
|
|
254
|
+
file_children = [c for c in node.children if c.node_type == "file"]
|
|
255
|
+
dir_children = [c for c in node.children if c.node_type == "directory"]
|
|
256
|
+
|
|
257
|
+
parts: list[str] = []
|
|
258
|
+
if file_children:
|
|
259
|
+
names = ", ".join(c.name for c in file_children)
|
|
260
|
+
parts.append(f"{len(file_children)} files: {names}")
|
|
261
|
+
if dir_children:
|
|
262
|
+
names = ", ".join(c.name for c in dir_children)
|
|
263
|
+
parts.append(f"{len(dir_children)} directories: {names}")
|
|
264
|
+
|
|
265
|
+
node.summary = (
|
|
266
|
+
f"Contains {'; '.join(parts)}" if parts else "Empty directory"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
# Quality = worst child quality with descriptive rollup
|
|
270
|
+
quality_order = {"warning": 2, "complex": 1, "clean": 0}
|
|
271
|
+
worst = "clean"
|
|
272
|
+
rollup_warnings: list[str] = []
|
|
273
|
+
complex_count = 0
|
|
274
|
+
warning_count = 0
|
|
275
|
+
for child in node.children:
|
|
276
|
+
child_rank = quality_order.get(
|
|
277
|
+
child.quality or "clean", 0
|
|
278
|
+
)
|
|
279
|
+
worst_rank = quality_order.get(worst, 0)
|
|
280
|
+
if child.quality and child_rank > worst_rank:
|
|
281
|
+
worst = child.quality
|
|
282
|
+
if child.quality == "complex":
|
|
283
|
+
complex_count += 1
|
|
284
|
+
if child.quality == "warning":
|
|
285
|
+
warning_count += 1
|
|
286
|
+
|
|
287
|
+
if warning_count:
|
|
288
|
+
label = "child" if warning_count == 1 else "children"
|
|
289
|
+
rollup_warnings.append(f"Contains {warning_count} high-risk {label}")
|
|
290
|
+
if complex_count:
|
|
291
|
+
label = "child" if complex_count == 1 else "children"
|
|
292
|
+
rollup_warnings.append(f"{complex_count} complex {label} inside")
|
|
293
|
+
|
|
294
|
+
node.quality = worst
|
|
295
|
+
node.warnings = rollup_warnings if rollup_warnings else None
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _rollup_quality(node: CodeNode) -> None:
|
|
299
|
+
"""Roll up child quality into a file or class node."""
|
|
300
|
+
if not node.children:
|
|
301
|
+
return
|
|
302
|
+
quality_order = {"warning": 2, "complex": 1, "clean": 0}
|
|
303
|
+
own_quality = node.quality or "clean"
|
|
304
|
+
own_warnings = list(node.warnings) if node.warnings else []
|
|
305
|
+
complex_count = sum(1 for c in node.children if c.quality == "complex")
|
|
306
|
+
warning_count = sum(1 for c in node.children if c.quality == "warning")
|
|
307
|
+
worst_child = (
|
|
308
|
+
"warning" if warning_count
|
|
309
|
+
else ("complex" if complex_count else "clean")
|
|
310
|
+
)
|
|
311
|
+
if quality_order[worst_child] > quality_order.get(own_quality, 0):
|
|
312
|
+
node.quality = worst_child
|
|
313
|
+
if warning_count:
|
|
314
|
+
label = "function" if warning_count == 1 else "functions"
|
|
315
|
+
own_warnings.append(f"Contains {warning_count} high-risk {label}")
|
|
316
|
+
if complex_count:
|
|
317
|
+
label = "function" if complex_count == 1 else "functions"
|
|
318
|
+
own_warnings.append(f"{complex_count} complex {label} inside")
|
|
319
|
+
node.warnings = own_warnings if own_warnings else None
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
# ---------------------------------------------------------------------------
|
|
323
|
+
# Cache
|
|
324
|
+
# ---------------------------------------------------------------------------
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _cache_key(node: CodeNode) -> str:
|
|
328
|
+
"""Generate a cache key based on filepath, name, and source hash."""
|
|
329
|
+
source_hash = hashlib.md5(
|
|
330
|
+
node.source.encode(), usedforsecurity=False
|
|
331
|
+
).hexdigest()
|
|
332
|
+
return f"{node.filepath}::{node.name}::{source_hash}"
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def _load_cache(path: str) -> dict:
|
|
336
|
+
"""Load cache from JSON file."""
|
|
337
|
+
try:
|
|
338
|
+
with open(path, encoding="utf-8") as f:
|
|
339
|
+
data = json.load(f)
|
|
340
|
+
if isinstance(data, dict) and data.get("version") == 1:
|
|
341
|
+
return data
|
|
342
|
+
except (FileNotFoundError, json.JSONDecodeError, OSError):
|
|
343
|
+
pass
|
|
344
|
+
return {"version": 1, "model": "", "entries": {}}
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
def _save_cache(path: str, data: dict) -> None:
|
|
348
|
+
"""Save cache to JSON file."""
|
|
349
|
+
try:
|
|
350
|
+
with open(path, "w", encoding="utf-8") as f:
|
|
351
|
+
json.dump(data, f, indent=2)
|
|
352
|
+
except OSError as e:
|
|
353
|
+
print(f"Warning: could not save cache: {e}", file=sys.stderr)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
# ---------------------------------------------------------------------------
|
|
357
|
+
# Node ID assignment
|
|
358
|
+
# ---------------------------------------------------------------------------
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def assign_node_ids(root: CodeNode) -> dict[str, CodeNode]:
|
|
362
|
+
"""Walk tree, assign a unique 12-char hex node_id to every node.
|
|
363
|
+
|
|
364
|
+
Returns a lookup dict mapping node_id -> CodeNode.
|
|
365
|
+
"""
|
|
366
|
+
lookup: dict[str, CodeNode] = {}
|
|
367
|
+
|
|
368
|
+
def _walk(node: CodeNode, path_parts: list[str]) -> None:
|
|
369
|
+
key = "::".join(path_parts)
|
|
370
|
+
node_id = hashlib.md5(
|
|
371
|
+
key.encode(), usedforsecurity=False
|
|
372
|
+
).hexdigest()[:12]
|
|
373
|
+
node.node_id = node_id
|
|
374
|
+
lookup[node_id] = node
|
|
375
|
+
for child in node.children:
|
|
376
|
+
child_parts = path_parts + [child.node_type, child.name]
|
|
377
|
+
_walk(child, child_parts)
|
|
378
|
+
|
|
379
|
+
_walk(root, [root.name])
|
|
380
|
+
return lookup
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
# ---------------------------------------------------------------------------
|
|
384
|
+
# Single-node analysis (used by server)
|
|
385
|
+
# ---------------------------------------------------------------------------
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def analyze_single_node(node: CodeNode, model: str, cache_dir: str) -> None:
|
|
389
|
+
"""Run quality scoring + AI analysis on a single node.
|
|
390
|
+
|
|
391
|
+
Reads/writes the cache. Applies min-lines guard and garbage fallback.
|
|
392
|
+
"""
|
|
393
|
+
if ollama is None:
|
|
394
|
+
node.summary = "AI unavailable (ollama not installed)"
|
|
395
|
+
return
|
|
396
|
+
|
|
397
|
+
# Quality scoring
|
|
398
|
+
quality, warnings = _score_quality(node)
|
|
399
|
+
node.quality = quality
|
|
400
|
+
node.warnings = warnings
|
|
401
|
+
|
|
402
|
+
# Min-lines guard
|
|
403
|
+
if node.line_count < MIN_LINES_FOR_AI:
|
|
404
|
+
node.summary = f"Small {node.node_type} ({node.line_count} lines)"
|
|
405
|
+
return
|
|
406
|
+
|
|
407
|
+
# Directory nodes get synthesized summaries, not AI
|
|
408
|
+
if node.node_type == "directory":
|
|
409
|
+
_summarize_directory(node)
|
|
410
|
+
return
|
|
411
|
+
|
|
412
|
+
# Cache
|
|
413
|
+
cache_path = os.path.join(cache_dir, CACHE_FILENAME)
|
|
414
|
+
cache = _load_cache(cache_path)
|
|
415
|
+
|
|
416
|
+
if cache.get("model") != model:
|
|
417
|
+
cache = {"version": 1, "model": model, "entries": {}}
|
|
418
|
+
|
|
419
|
+
key = _cache_key(node)
|
|
420
|
+
if key in cache["entries"]:
|
|
421
|
+
entry = cache["entries"][key]
|
|
422
|
+
node.summary = entry.get("summary")
|
|
423
|
+
node.pseudocode = entry.get("pseudocode")
|
|
424
|
+
return
|
|
425
|
+
|
|
426
|
+
try:
|
|
427
|
+
summary, pseudocode = _summarize_with_ai(node, model)
|
|
428
|
+
node.summary = summary
|
|
429
|
+
node.pseudocode = pseudocode
|
|
430
|
+
cache["entries"][key] = {"summary": summary, "pseudocode": pseudocode}
|
|
431
|
+
_save_cache(cache_path, cache)
|
|
432
|
+
except (ConnectionError, RuntimeError, ValueError, OSError) as e:
|
|
433
|
+
node.summary = f"Summary generation failed: {e}"
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
# ---------------------------------------------------------------------------
|
|
437
|
+
# Main entry points
|
|
438
|
+
# ---------------------------------------------------------------------------
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _collect_nodes(
|
|
442
|
+
node: CodeNode, depth: int = 0,
|
|
443
|
+
) -> list[tuple[CodeNode, int]]:
|
|
444
|
+
"""Collect all nodes with their depth for priority batching."""
|
|
445
|
+
result = [(node, depth)]
|
|
446
|
+
for child in node.children:
|
|
447
|
+
result.extend(_collect_nodes(child, depth + 1))
|
|
448
|
+
return result
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def analyze( # pylint: disable=too-many-locals,too-many-statements
|
|
452
|
+
root: CodeNode,
|
|
453
|
+
model: str = "qwen3:14b",
|
|
454
|
+
workers: int = 1,
|
|
455
|
+
) -> CodeNode:
|
|
456
|
+
"""Analyze the full tree with AI summaries and quality scoring.
|
|
457
|
+
|
|
458
|
+
Uses priority batching:
|
|
459
|
+
1. Quality-score all nodes (fast pass).
|
|
460
|
+
2. AI-analyze files (shallowest first).
|
|
461
|
+
3. AI-analyze classes/functions/methods (shallowest first).
|
|
462
|
+
4. Synthesize directory summaries (deepest first / bottom-up).
|
|
463
|
+
"""
|
|
464
|
+
if ollama is None:
|
|
465
|
+
print(
|
|
466
|
+
"Error: ollama package not installed. "
|
|
467
|
+
"Install with: pip install ollama\n"
|
|
468
|
+
"Or use --no-ai to skip AI analysis.",
|
|
469
|
+
file=sys.stderr,
|
|
470
|
+
)
|
|
471
|
+
sys.exit(1)
|
|
472
|
+
|
|
473
|
+
# Determine cache path
|
|
474
|
+
cache_dir = root.filepath or "."
|
|
475
|
+
cache_path = os.path.join(cache_dir, CACHE_FILENAME)
|
|
476
|
+
cache = _load_cache(cache_path)
|
|
477
|
+
|
|
478
|
+
# Invalidate cache if model changed
|
|
479
|
+
if cache.get("model") != model:
|
|
480
|
+
cache = {"version": 1, "model": model, "entries": {}}
|
|
481
|
+
|
|
482
|
+
all_nodes = _collect_nodes(root)
|
|
483
|
+
total = len(all_nodes)
|
|
484
|
+
counter = [0]
|
|
485
|
+
cache_lock = threading.Lock()
|
|
486
|
+
progress_lock = threading.Lock()
|
|
487
|
+
start_time = time.monotonic()
|
|
488
|
+
|
|
489
|
+
def _progress(label: str) -> None:
|
|
490
|
+
with progress_lock:
|
|
491
|
+
counter[0] += 1
|
|
492
|
+
print(f"[{counter[0]}/{total}] {label}...", file=sys.stderr)
|
|
493
|
+
|
|
494
|
+
def _ai_analyze(node: CodeNode) -> None:
|
|
495
|
+
"""Run AI analysis on a single non-directory node."""
|
|
496
|
+
label = node.name
|
|
497
|
+
if node.line_count < MIN_LINES_FOR_AI:
|
|
498
|
+
node.summary = f"Small {node.node_type} ({node.line_count} lines)"
|
|
499
|
+
_progress(f"Skipping small {label}")
|
|
500
|
+
return
|
|
501
|
+
|
|
502
|
+
key = _cache_key(node)
|
|
503
|
+
with cache_lock:
|
|
504
|
+
if key in cache["entries"]:
|
|
505
|
+
entry = cache["entries"][key]
|
|
506
|
+
node.summary = entry.get("summary")
|
|
507
|
+
node.pseudocode = entry.get("pseudocode")
|
|
508
|
+
_progress(f"Cache hit: {label}")
|
|
509
|
+
return
|
|
510
|
+
|
|
511
|
+
_progress(f"Analyzing {label}")
|
|
512
|
+
try:
|
|
513
|
+
summary, pseudocode = _summarize_with_ai(node, model)
|
|
514
|
+
with cache_lock:
|
|
515
|
+
node.summary = summary
|
|
516
|
+
node.pseudocode = pseudocode
|
|
517
|
+
cache["entries"][key] = {
|
|
518
|
+
"summary": summary,
|
|
519
|
+
"pseudocode": pseudocode,
|
|
520
|
+
}
|
|
521
|
+
except Exception as e: # pylint: disable=broad-exception-caught
|
|
522
|
+
node.summary = "Summary generation failed"
|
|
523
|
+
print(
|
|
524
|
+
f" AI error for {label}: {e}",
|
|
525
|
+
file=sys.stderr,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
try:
|
|
529
|
+
# Phase 1: Quality-score all nodes
|
|
530
|
+
for node, _depth in all_nodes:
|
|
531
|
+
quality, warnings = _score_quality(node)
|
|
532
|
+
node.quality = quality
|
|
533
|
+
node.warnings = warnings
|
|
534
|
+
|
|
535
|
+
# Phase 1b: Rollup quality to files and classes (deepest first)
|
|
536
|
+
rollup_nodes = [
|
|
537
|
+
(n, d) for n, d in all_nodes
|
|
538
|
+
if n.node_type in ("file", "class")
|
|
539
|
+
]
|
|
540
|
+
rollup_nodes.sort(key=lambda x: x[1], reverse=True)
|
|
541
|
+
for node, _depth in rollup_nodes:
|
|
542
|
+
_rollup_quality(node)
|
|
543
|
+
|
|
544
|
+
# Phase 2: AI-analyze files (shallowest first)
|
|
545
|
+
files = [(n, d) for n, d in all_nodes if n.node_type == "file"]
|
|
546
|
+
files.sort(key=lambda x: x[1])
|
|
547
|
+
|
|
548
|
+
# Phase 3: AI-analyze classes/functions/methods (shallowest first)
|
|
549
|
+
code_nodes = [(n, d) for n, d in all_nodes
|
|
550
|
+
if n.node_type in ("class", "function", "method")]
|
|
551
|
+
code_nodes.sort(key=lambda x: x[1])
|
|
552
|
+
|
|
553
|
+
# Combine phases 2 & 3 into a single list for submission
|
|
554
|
+
ai_nodes = [n for n, _d in files] + [n for n, _d in code_nodes]
|
|
555
|
+
|
|
556
|
+
if workers == 1:
|
|
557
|
+
for node in ai_nodes:
|
|
558
|
+
_ai_analyze(node)
|
|
559
|
+
else:
|
|
560
|
+
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
561
|
+
futures = {executor.submit(_ai_analyze, node): node
|
|
562
|
+
for node in ai_nodes}
|
|
563
|
+
for future in as_completed(futures):
|
|
564
|
+
exc = future.exception()
|
|
565
|
+
if isinstance(exc, ConnectionError):
|
|
566
|
+
raise exc
|
|
567
|
+
|
|
568
|
+
# Phase 4: Synthesize directory summaries (deepest first)
|
|
569
|
+
dirs = [(n, d) for n, d in all_nodes if n.node_type == "directory"]
|
|
570
|
+
dirs.sort(key=lambda x: x[1], reverse=True)
|
|
571
|
+
for node, _depth in dirs:
|
|
572
|
+
_summarize_directory(node)
|
|
573
|
+
|
|
574
|
+
except ConnectionError as e:
|
|
575
|
+
print(
|
|
576
|
+
f"\nError: Could not connect to ollama: {e}\n"
|
|
577
|
+
"Make sure ollama is running (ollama serve),"
|
|
578
|
+
" or use --no-ai to skip AI analysis.",
|
|
579
|
+
file=sys.stderr,
|
|
580
|
+
)
|
|
581
|
+
sys.exit(1)
|
|
582
|
+
|
|
583
|
+
_save_cache(cache_path, cache)
|
|
584
|
+
|
|
585
|
+
elapsed = time.monotonic() - start_time
|
|
586
|
+
ai_count = len(files) + len(code_nodes)
|
|
587
|
+
print(
|
|
588
|
+
f"Analysis complete: {ai_count} nodes in {elapsed:.1f}s "
|
|
589
|
+
f"({workers} workers, model: {model})",
|
|
590
|
+
file=sys.stderr,
|
|
591
|
+
)
|
|
592
|
+
|
|
593
|
+
return root
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def analyze_no_ai(root: CodeNode) -> CodeNode:
|
|
597
|
+
"""Analyze with quality scoring only — no ollama calls."""
|
|
598
|
+
total = _count_nodes(root)
|
|
599
|
+
counter = [0]
|
|
600
|
+
|
|
601
|
+
def _walk(node: CodeNode) -> None:
|
|
602
|
+
counter[0] += 1
|
|
603
|
+
idx = counter[0]
|
|
604
|
+
print(f"[{idx}/{total}] Scoring {node.name}...", file=sys.stderr)
|
|
605
|
+
|
|
606
|
+
quality, warnings = _score_quality(node)
|
|
607
|
+
node.quality = quality
|
|
608
|
+
node.warnings = warnings
|
|
609
|
+
|
|
610
|
+
for child in node.children:
|
|
611
|
+
_walk(child)
|
|
612
|
+
|
|
613
|
+
if node.node_type in ("file", "class"):
|
|
614
|
+
_rollup_quality(node)
|
|
615
|
+
|
|
616
|
+
if node.node_type == "directory":
|
|
617
|
+
_summarize_directory(node)
|
|
618
|
+
|
|
619
|
+
_walk(root)
|
|
620
|
+
return root
|