codedocent 0.1.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codedocent-0.1.0 → codedocent-0.2.1}/PKG-INFO +1 -1
- {codedocent-0.1.0 → codedocent-0.2.1}/README.md +31 -11
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/analyzer.py +117 -293
- codedocent-0.2.1/codedocent/cli.py +244 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/editor.py +41 -37
- codedocent-0.2.1/codedocent/gui.py +139 -0
- codedocent-0.2.1/codedocent/ollama_utils.py +31 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/parser.py +99 -96
- codedocent-0.2.1/codedocent/quality.py +254 -0
- codedocent-0.2.1/codedocent/server.py +343 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/templates/base.html +18 -4
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/templates/interactive.html +19 -5
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/PKG-INFO +1 -1
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/SOURCES.txt +5 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/entry_points.txt +1 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/pyproject.toml +2 -1
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_analyzer.py +49 -13
- codedocent-0.2.1/tests/test_cli.py +204 -0
- codedocent-0.2.1/tests/test_gui.py +57 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_server.py +35 -0
- codedocent-0.1.0/codedocent/cli.py +0 -132
- codedocent-0.1.0/codedocent/server.py +0 -304
- {codedocent-0.1.0 → codedocent-0.2.1}/LICENSE +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/__init__.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/__main__.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/renderer.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent/scanner.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/dependency_links.txt +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/requires.txt +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/codedocent.egg-info/top_level.txt +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/setup.cfg +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_editor.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_parser.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_renderer.py +0 -0
- {codedocent-0.1.0 → codedocent-0.2.1}/tests/test_scanner.py +0 -0
|
@@ -22,7 +22,7 @@ Blocks are **nested** — directories contain files, files contain classes, clas
|
|
|
22
22
|
|
|
23
23
|
### Code actions
|
|
24
24
|
|
|
25
|
-
Every
|
|
25
|
+
Every block gets a toolbar with one-click actions (no AI analysis needed):
|
|
26
26
|
|
|
27
27
|
| Button | What it does |
|
|
28
28
|
|--------|-------------|
|
|
@@ -56,6 +56,12 @@ pip install -e .
|
|
|
56
56
|
|
|
57
57
|
### Run
|
|
58
58
|
|
|
59
|
+
**Setup wizard** — just run `codedocent` with no arguments:
|
|
60
|
+
```bash
|
|
61
|
+
codedocent
|
|
62
|
+
```
|
|
63
|
+
The wizard walks you through picking a folder, detecting Ollama, choosing a model, and selecting a mode. No flags to memorize.
|
|
64
|
+
|
|
59
65
|
**Interactive mode** (recommended) — instant load, AI analyzes each block on click:
|
|
60
66
|
```bash
|
|
61
67
|
codedocent /path/to/any/codebase
|
|
@@ -73,10 +79,20 @@ codedocent /path/to/any/codebase --full
|
|
|
73
79
|
codedocent /path/to/any/codebase --text
|
|
74
80
|
```
|
|
75
81
|
|
|
82
|
+
**GUI launcher** — a graphical window with folder picker, model dropdown, and mode selector:
|
|
83
|
+
```bash
|
|
84
|
+
codedocent --gui
|
|
85
|
+
# or use the standalone entry point:
|
|
86
|
+
codedocent-gui
|
|
87
|
+
```
|
|
88
|
+
Requires tkinter (usually included with Python; on Ubuntu: `sudo apt install python3-tk`).
|
|
89
|
+
|
|
76
90
|
### Options
|
|
77
91
|
|
|
78
92
|
| Flag | Description |
|
|
79
93
|
|------|-------------|
|
|
94
|
+
| *(no args)* | Launch interactive setup wizard |
|
|
95
|
+
| `--gui` | Open GUI launcher (tkinter) |
|
|
80
96
|
| `--full` | Analyze everything upfront, output static HTML |
|
|
81
97
|
| `--text` | Print text tree to terminal (no browser) |
|
|
82
98
|
| `--no-ai` | Skip AI summaries, show structure only |
|
|
@@ -131,13 +147,15 @@ Warnings roll up through the tree: a file inherits the worst quality of its func
|
|
|
131
147
|
|
|
132
148
|
```
|
|
133
149
|
codedocent/
|
|
134
|
-
├── cli.py
|
|
135
|
-
├──
|
|
136
|
-
├──
|
|
137
|
-
├──
|
|
138
|
-
├──
|
|
139
|
-
├──
|
|
140
|
-
├──
|
|
150
|
+
├── cli.py Command-line interface, setup wizard, entry point
|
|
151
|
+
├── gui.py Tkinter GUI launcher
|
|
152
|
+
├── ollama_utils.py Shared Ollama detection and model listing
|
|
153
|
+
├── scanner.py File discovery with .gitignore support
|
|
154
|
+
├── parser.py AST parsing via tree-sitter
|
|
155
|
+
├── analyzer.py AI summaries, quality scoring, caching
|
|
156
|
+
├── editor.py Code replacement with backup safety
|
|
157
|
+
├── renderer.py HTML generation (static + interactive)
|
|
158
|
+
├── server.py Local server for interactive mode
|
|
141
159
|
└── templates/
|
|
142
160
|
└── interactive.html Single-page app UI
|
|
143
161
|
```
|
|
@@ -145,13 +163,15 @@ codedocent/
|
|
|
145
163
|
## Current status
|
|
146
164
|
|
|
147
165
|
- Scanner, parser, renderer, analyzer, editor, server, CLI — all built and tested
|
|
166
|
+
- Interactive setup wizard when run with no arguments
|
|
167
|
+
- GUI launcher via `--gui` flag or `codedocent-gui` entry point
|
|
148
168
|
- Interactive navigation with lazy AI analysis
|
|
169
|
+
- Code action buttons (Show Code, Export, Copy for AI, Replace) available immediately — no AI analysis required
|
|
149
170
|
- Static HTML full-analysis mode with parallel workers
|
|
150
|
-
- Code actions — Show Code, Export Code, Copy for AI, Replace Code
|
|
151
171
|
- Code replacement with `.bak` backup and cache invalidation
|
|
152
172
|
- Quality scoring with two-tier thresholds and warning rollup across the tree
|
|
153
|
-
- pip-installable package with `codedocent` CLI entry
|
|
154
|
-
-
|
|
173
|
+
- pip-installable package with `codedocent` and `codedocent-gui` CLI entry points
|
|
174
|
+
- 93 tests passing
|
|
155
175
|
- Code quality: pylint 10/10, bandit/flake8/mypy all clean
|
|
156
176
|
|
|
157
177
|
## License
|
|
@@ -12,6 +12,11 @@ import time
|
|
|
12
12
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
13
13
|
|
|
14
14
|
from codedocent.parser import CodeNode
|
|
15
|
+
from codedocent.quality import (
|
|
16
|
+
_score_quality,
|
|
17
|
+
_rollup_quality,
|
|
18
|
+
_summarize_directory,
|
|
19
|
+
)
|
|
15
20
|
|
|
16
21
|
try:
|
|
17
22
|
import ollama
|
|
@@ -22,16 +27,6 @@ CACHE_FILENAME = ".codedocent_cache.json"
|
|
|
22
27
|
MAX_SOURCE_LINES = 200
|
|
23
28
|
MIN_LINES_FOR_AI = 3
|
|
24
29
|
|
|
25
|
-
# Quality scoring thresholds: (yellow_threshold, red_threshold)
|
|
26
|
-
# yellow = "complex", red = "warning"
|
|
27
|
-
LINE_THRESHOLDS: dict[str, tuple[int, int]] = {
|
|
28
|
-
"function": (50, 100),
|
|
29
|
-
"method": (50, 100),
|
|
30
|
-
"file": (500, 1000),
|
|
31
|
-
"class": (300, 600),
|
|
32
|
-
}
|
|
33
|
-
PARAM_THRESHOLD = 5
|
|
34
|
-
|
|
35
30
|
|
|
36
31
|
def _count_nodes(node: CodeNode) -> int:
|
|
37
32
|
"""Recursive count of all nodes in tree."""
|
|
@@ -128,197 +123,6 @@ def _summarize_with_ai(
|
|
|
128
123
|
return summary, pseudocode
|
|
129
124
|
|
|
130
125
|
|
|
131
|
-
def _count_parameters(node: CodeNode) -> int:
|
|
132
|
-
"""Count parameters of a function/method using tree-sitter."""
|
|
133
|
-
if not node.source or not node.language:
|
|
134
|
-
return 0
|
|
135
|
-
|
|
136
|
-
import tree_sitter_language_pack as tslp # pylint: disable=import-outside-toplevel # noqa: E501
|
|
137
|
-
|
|
138
|
-
try:
|
|
139
|
-
parser = tslp.get_parser(node.language) # type: ignore[arg-type]
|
|
140
|
-
except (KeyError, ValueError):
|
|
141
|
-
return 0
|
|
142
|
-
|
|
143
|
-
tree = parser.parse(node.source.encode())
|
|
144
|
-
root = tree.root_node
|
|
145
|
-
|
|
146
|
-
# Find the parameters / formal_parameters node
|
|
147
|
-
param_node = None
|
|
148
|
-
|
|
149
|
-
def _find_params(n):
|
|
150
|
-
nonlocal param_node
|
|
151
|
-
if param_node is not None:
|
|
152
|
-
return
|
|
153
|
-
if n.type in ("parameters", "formal_parameters"):
|
|
154
|
-
param_node = n
|
|
155
|
-
return
|
|
156
|
-
for child in n.children:
|
|
157
|
-
_find_params(child)
|
|
158
|
-
|
|
159
|
-
_find_params(root)
|
|
160
|
-
if param_node is None:
|
|
161
|
-
return 0
|
|
162
|
-
|
|
163
|
-
count = 0
|
|
164
|
-
for child in param_node.children:
|
|
165
|
-
# Skip punctuation like ( ) ,
|
|
166
|
-
if child.type in ("(", ")", ","):
|
|
167
|
-
continue
|
|
168
|
-
# For Python, skip self/cls
|
|
169
|
-
if node.language == "python":
|
|
170
|
-
text = child.text.decode() if child.text else ""
|
|
171
|
-
if text in ("self", "cls"):
|
|
172
|
-
continue
|
|
173
|
-
count += 1
|
|
174
|
-
|
|
175
|
-
return count
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
def _worst_quality(a: str, b: str) -> str:
|
|
179
|
-
"""Return the worse of two quality labels."""
|
|
180
|
-
order = {"clean": 0, "complex": 1, "warning": 2}
|
|
181
|
-
return a if order.get(a, 0) >= order.get(b, 0) else b
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
def _score_quality(
|
|
185
|
-
node: CodeNode,
|
|
186
|
-
) -> tuple[str | None, list[str] | None]:
|
|
187
|
-
"""Score code quality using radon and heuristics.
|
|
188
|
-
|
|
189
|
-
Returns (quality, warnings) where quality is 'clean', 'complex',
|
|
190
|
-
or 'warning', and warnings is a list of warning strings.
|
|
191
|
-
For directories, returns (None, None).
|
|
192
|
-
"""
|
|
193
|
-
if node.node_type == "directory":
|
|
194
|
-
return None, None
|
|
195
|
-
|
|
196
|
-
warnings: list[str] = []
|
|
197
|
-
quality = "clean"
|
|
198
|
-
|
|
199
|
-
# Radon complexity for Python
|
|
200
|
-
if node.language == "python" and node.source:
|
|
201
|
-
try:
|
|
202
|
-
from radon.complexity import cc_visit, cc_rank # type: ignore[import-untyped] # pylint: disable=import-outside-toplevel # noqa: E501
|
|
203
|
-
|
|
204
|
-
blocks = cc_visit(node.source)
|
|
205
|
-
if blocks:
|
|
206
|
-
worst = max(b.complexity for b in blocks)
|
|
207
|
-
rank = cc_rank(worst)
|
|
208
|
-
if rank in ("A", "B"):
|
|
209
|
-
pass # clean
|
|
210
|
-
elif rank == "C":
|
|
211
|
-
quality = _worst_quality(quality, "complex")
|
|
212
|
-
warnings.append(
|
|
213
|
-
f"Moderate complexity (grade {rank},"
|
|
214
|
-
f" score {worst})"
|
|
215
|
-
)
|
|
216
|
-
else:
|
|
217
|
-
quality = _worst_quality(quality, "warning")
|
|
218
|
-
warnings.append(
|
|
219
|
-
f"High complexity (grade {rank},"
|
|
220
|
-
f" score {worst})"
|
|
221
|
-
)
|
|
222
|
-
except (ImportError, AttributeError): # nosec B110
|
|
223
|
-
pass
|
|
224
|
-
|
|
225
|
-
# Line-count check (two-tier: yellow/red)
|
|
226
|
-
thresholds = LINE_THRESHOLDS.get(node.node_type)
|
|
227
|
-
if thresholds and node.line_count:
|
|
228
|
-
yellow, red = thresholds
|
|
229
|
-
if node.line_count > red:
|
|
230
|
-
quality = _worst_quality(quality, "warning")
|
|
231
|
-
warnings.append(
|
|
232
|
-
f"This {node.node_type} is"
|
|
233
|
-
f" {node.line_count} lines long"
|
|
234
|
-
)
|
|
235
|
-
elif node.line_count > yellow:
|
|
236
|
-
quality = _worst_quality(quality, "complex")
|
|
237
|
-
warnings.append(f"Long {node.node_type}: {node.line_count} lines")
|
|
238
|
-
|
|
239
|
-
# Heuristic: many parameters
|
|
240
|
-
if node.node_type in ("function", "method"):
|
|
241
|
-
param_count = _count_parameters(node)
|
|
242
|
-
if param_count > PARAM_THRESHOLD:
|
|
243
|
-
quality = _worst_quality(quality, "complex")
|
|
244
|
-
warnings.append("Many parameters: consider grouping")
|
|
245
|
-
|
|
246
|
-
return quality, warnings if warnings else None
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
def _summarize_directory(node: CodeNode) -> None:
|
|
250
|
-
"""Synthesize a directory summary from children. No AI needed."""
|
|
251
|
-
if node.node_type != "directory":
|
|
252
|
-
return
|
|
253
|
-
|
|
254
|
-
file_children = [c for c in node.children if c.node_type == "file"]
|
|
255
|
-
dir_children = [c for c in node.children if c.node_type == "directory"]
|
|
256
|
-
|
|
257
|
-
parts: list[str] = []
|
|
258
|
-
if file_children:
|
|
259
|
-
names = ", ".join(c.name for c in file_children)
|
|
260
|
-
parts.append(f"{len(file_children)} files: {names}")
|
|
261
|
-
if dir_children:
|
|
262
|
-
names = ", ".join(c.name for c in dir_children)
|
|
263
|
-
parts.append(f"{len(dir_children)} directories: {names}")
|
|
264
|
-
|
|
265
|
-
node.summary = (
|
|
266
|
-
f"Contains {'; '.join(parts)}" if parts else "Empty directory"
|
|
267
|
-
)
|
|
268
|
-
|
|
269
|
-
# Quality = worst child quality with descriptive rollup
|
|
270
|
-
quality_order = {"warning": 2, "complex": 1, "clean": 0}
|
|
271
|
-
worst = "clean"
|
|
272
|
-
rollup_warnings: list[str] = []
|
|
273
|
-
complex_count = 0
|
|
274
|
-
warning_count = 0
|
|
275
|
-
for child in node.children:
|
|
276
|
-
child_rank = quality_order.get(
|
|
277
|
-
child.quality or "clean", 0
|
|
278
|
-
)
|
|
279
|
-
worst_rank = quality_order.get(worst, 0)
|
|
280
|
-
if child.quality and child_rank > worst_rank:
|
|
281
|
-
worst = child.quality
|
|
282
|
-
if child.quality == "complex":
|
|
283
|
-
complex_count += 1
|
|
284
|
-
if child.quality == "warning":
|
|
285
|
-
warning_count += 1
|
|
286
|
-
|
|
287
|
-
if warning_count:
|
|
288
|
-
label = "child" if warning_count == 1 else "children"
|
|
289
|
-
rollup_warnings.append(f"Contains {warning_count} high-risk {label}")
|
|
290
|
-
if complex_count:
|
|
291
|
-
label = "child" if complex_count == 1 else "children"
|
|
292
|
-
rollup_warnings.append(f"{complex_count} complex {label} inside")
|
|
293
|
-
|
|
294
|
-
node.quality = worst
|
|
295
|
-
node.warnings = rollup_warnings if rollup_warnings else None
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
def _rollup_quality(node: CodeNode) -> None:
|
|
299
|
-
"""Roll up child quality into a file or class node."""
|
|
300
|
-
if not node.children:
|
|
301
|
-
return
|
|
302
|
-
quality_order = {"warning": 2, "complex": 1, "clean": 0}
|
|
303
|
-
own_quality = node.quality or "clean"
|
|
304
|
-
own_warnings = list(node.warnings) if node.warnings else []
|
|
305
|
-
complex_count = sum(1 for c in node.children if c.quality == "complex")
|
|
306
|
-
warning_count = sum(1 for c in node.children if c.quality == "warning")
|
|
307
|
-
worst_child = (
|
|
308
|
-
"warning" if warning_count
|
|
309
|
-
else ("complex" if complex_count else "clean")
|
|
310
|
-
)
|
|
311
|
-
if quality_order[worst_child] > quality_order.get(own_quality, 0):
|
|
312
|
-
node.quality = worst_child
|
|
313
|
-
if warning_count:
|
|
314
|
-
label = "function" if warning_count == 1 else "functions"
|
|
315
|
-
own_warnings.append(f"Contains {warning_count} high-risk {label}")
|
|
316
|
-
if complex_count:
|
|
317
|
-
label = "function" if complex_count == 1 else "functions"
|
|
318
|
-
own_warnings.append(f"{complex_count} complex {label} inside")
|
|
319
|
-
node.warnings = own_warnings if own_warnings else None
|
|
320
|
-
|
|
321
|
-
|
|
322
126
|
# ---------------------------------------------------------------------------
|
|
323
127
|
# Cache
|
|
324
128
|
# ---------------------------------------------------------------------------
|
|
@@ -448,67 +252,84 @@ def _collect_nodes(
|
|
|
448
252
|
return result
|
|
449
253
|
|
|
450
254
|
|
|
451
|
-
def
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
Uses priority batching:
|
|
459
|
-
1. Quality-score all nodes (fast pass).
|
|
460
|
-
2. AI-analyze files (shallowest first).
|
|
461
|
-
3. AI-analyze classes/functions/methods (shallowest first).
|
|
462
|
-
4. Synthesize directory summaries (deepest first / bottom-up).
|
|
463
|
-
"""
|
|
464
|
-
if ollama is None:
|
|
465
|
-
print(
|
|
466
|
-
"Error: ollama package not installed. "
|
|
467
|
-
"Install with: pip install ollama\n"
|
|
468
|
-
"Or use --no-ai to skip AI analysis.",
|
|
469
|
-
file=sys.stderr,
|
|
470
|
-
)
|
|
471
|
-
sys.exit(1)
|
|
472
|
-
|
|
473
|
-
# Determine cache path
|
|
474
|
-
cache_dir = root.filepath or "."
|
|
475
|
-
cache_path = os.path.join(cache_dir, CACHE_FILENAME)
|
|
476
|
-
cache = _load_cache(cache_path)
|
|
255
|
+
def _score_all_nodes(all_nodes: list[tuple[CodeNode, int]]) -> None:
|
|
256
|
+
"""Phase 1: Quality-score all nodes."""
|
|
257
|
+
for node, _depth in all_nodes:
|
|
258
|
+
quality, warnings = _score_quality(node)
|
|
259
|
+
node.quality = quality
|
|
260
|
+
node.warnings = warnings
|
|
477
261
|
|
|
478
|
-
# Invalidate cache if model changed
|
|
479
|
-
if cache.get("model") != model:
|
|
480
|
-
cache = {"version": 1, "model": model, "entries": {}}
|
|
481
262
|
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
263
|
+
def _rollup_file_quality(all_nodes: list[tuple[CodeNode, int]]) -> None:
|
|
264
|
+
"""Phase 1b: Rollup quality to files and classes (deepest first)."""
|
|
265
|
+
rollup_nodes = [
|
|
266
|
+
(n, d) for n, d in all_nodes
|
|
267
|
+
if n.node_type in ("file", "class")
|
|
268
|
+
]
|
|
269
|
+
rollup_nodes.sort(key=lambda x: x[1], reverse=True)
|
|
270
|
+
for node, _depth in rollup_nodes:
|
|
271
|
+
_rollup_quality(node)
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def _select_ai_nodes(
|
|
275
|
+
all_nodes: list[tuple[CodeNode, int]],
|
|
276
|
+
) -> list[CodeNode]:
|
|
277
|
+
"""Select and sort nodes for AI analysis (files then code)."""
|
|
278
|
+
files = sorted(
|
|
279
|
+
((n, d) for n, d in all_nodes if n.node_type == "file"),
|
|
280
|
+
key=lambda x: x[1],
|
|
281
|
+
)
|
|
282
|
+
code = sorted(
|
|
283
|
+
((n, d) for n, d in all_nodes
|
|
284
|
+
if n.node_type in ("class", "function", "method")),
|
|
285
|
+
key=lambda x: x[1],
|
|
286
|
+
)
|
|
287
|
+
return [n for n, _ in files] + [n for n, _ in code]
|
|
288
|
+
|
|
289
|
+
|
|
290
|
+
def _dispatch_work(func, nodes: list[CodeNode], workers: int) -> None:
|
|
291
|
+
"""Run *func* on each node, serially or in parallel."""
|
|
292
|
+
if workers == 1:
|
|
293
|
+
for node in nodes:
|
|
294
|
+
func(node)
|
|
295
|
+
else:
|
|
296
|
+
with ThreadPoolExecutor(max_workers=workers) as pool:
|
|
297
|
+
futs = {pool.submit(func, n): n for n in nodes}
|
|
298
|
+
for future in as_completed(futs):
|
|
299
|
+
exc = future.exception()
|
|
300
|
+
if isinstance(exc, ConnectionError):
|
|
301
|
+
raise exc
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
def _run_ai_batch(
|
|
305
|
+
all_nodes: list[tuple[CodeNode, int]],
|
|
306
|
+
model: str,
|
|
307
|
+
cache: dict,
|
|
308
|
+
workers: int,
|
|
309
|
+
) -> int:
|
|
310
|
+
"""Phases 2 & 3: AI-analyze files then code nodes."""
|
|
311
|
+
total, counter = len(all_nodes), [0]
|
|
312
|
+
cache_lock, progress_lock = threading.Lock(), threading.Lock()
|
|
488
313
|
|
|
489
314
|
def _progress(label: str) -> None:
|
|
490
315
|
with progress_lock:
|
|
491
316
|
counter[0] += 1
|
|
492
317
|
print(f"[{counter[0]}/{total}] {label}...", file=sys.stderr)
|
|
493
318
|
|
|
494
|
-
def
|
|
495
|
-
"""Run AI analysis on a single non-directory node."""
|
|
496
|
-
label = node.name
|
|
319
|
+
def _do_one(node: CodeNode) -> None:
|
|
497
320
|
if node.line_count < MIN_LINES_FOR_AI:
|
|
498
321
|
node.summary = f"Small {node.node_type} ({node.line_count} lines)"
|
|
499
|
-
_progress(f"Skipping small {
|
|
322
|
+
_progress(f"Skipping small {node.name}")
|
|
500
323
|
return
|
|
501
|
-
|
|
502
324
|
key = _cache_key(node)
|
|
503
325
|
with cache_lock:
|
|
504
326
|
if key in cache["entries"]:
|
|
505
327
|
entry = cache["entries"][key]
|
|
506
328
|
node.summary = entry.get("summary")
|
|
507
329
|
node.pseudocode = entry.get("pseudocode")
|
|
508
|
-
_progress(f"Cache hit: {
|
|
330
|
+
_progress(f"Cache hit: {node.name}")
|
|
509
331
|
return
|
|
510
|
-
|
|
511
|
-
_progress(f"Analyzing {label}")
|
|
332
|
+
_progress(f"Analyzing {node.name}")
|
|
512
333
|
try:
|
|
513
334
|
summary, pseudocode = _summarize_with_ai(node, model)
|
|
514
335
|
with cache_lock:
|
|
@@ -520,57 +341,60 @@ def analyze( # pylint: disable=too-many-locals,too-many-statements
|
|
|
520
341
|
}
|
|
521
342
|
except Exception as e: # pylint: disable=broad-exception-caught
|
|
522
343
|
node.summary = "Summary generation failed"
|
|
523
|
-
print(
|
|
524
|
-
f" AI error for {label}: {e}",
|
|
525
|
-
file=sys.stderr,
|
|
526
|
-
)
|
|
344
|
+
print(f" AI error for {node.name}: {e}", file=sys.stderr)
|
|
527
345
|
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
quality, warnings = _score_quality(node)
|
|
532
|
-
node.quality = quality
|
|
533
|
-
node.warnings = warnings
|
|
534
|
-
|
|
535
|
-
# Phase 1b: Rollup quality to files and classes (deepest first)
|
|
536
|
-
rollup_nodes = [
|
|
537
|
-
(n, d) for n, d in all_nodes
|
|
538
|
-
if n.node_type in ("file", "class")
|
|
539
|
-
]
|
|
540
|
-
rollup_nodes.sort(key=lambda x: x[1], reverse=True)
|
|
541
|
-
for node, _depth in rollup_nodes:
|
|
542
|
-
_rollup_quality(node)
|
|
346
|
+
ai_nodes = _select_ai_nodes(all_nodes)
|
|
347
|
+
_dispatch_work(_do_one, ai_nodes, workers)
|
|
348
|
+
return len(ai_nodes)
|
|
543
349
|
|
|
544
|
-
# Phase 2: AI-analyze files (shallowest first)
|
|
545
|
-
files = [(n, d) for n, d in all_nodes if n.node_type == "file"]
|
|
546
|
-
files.sort(key=lambda x: x[1])
|
|
547
|
-
|
|
548
|
-
# Phase 3: AI-analyze classes/functions/methods (shallowest first)
|
|
549
|
-
code_nodes = [(n, d) for n, d in all_nodes
|
|
550
|
-
if n.node_type in ("class", "function", "method")]
|
|
551
|
-
code_nodes.sort(key=lambda x: x[1])
|
|
552
|
-
|
|
553
|
-
# Combine phases 2 & 3 into a single list for submission
|
|
554
|
-
ai_nodes = [n for n, _d in files] + [n for n, _d in code_nodes]
|
|
555
|
-
|
|
556
|
-
if workers == 1:
|
|
557
|
-
for node in ai_nodes:
|
|
558
|
-
_ai_analyze(node)
|
|
559
|
-
else:
|
|
560
|
-
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
561
|
-
futures = {executor.submit(_ai_analyze, node): node
|
|
562
|
-
for node in ai_nodes}
|
|
563
|
-
for future in as_completed(futures):
|
|
564
|
-
exc = future.exception()
|
|
565
|
-
if isinstance(exc, ConnectionError):
|
|
566
|
-
raise exc
|
|
567
|
-
|
|
568
|
-
# Phase 4: Synthesize directory summaries (deepest first)
|
|
569
|
-
dirs = [(n, d) for n, d in all_nodes if n.node_type == "directory"]
|
|
570
|
-
dirs.sort(key=lambda x: x[1], reverse=True)
|
|
571
|
-
for node, _depth in dirs:
|
|
572
|
-
_summarize_directory(node)
|
|
573
350
|
|
|
351
|
+
def _summarize_directories(all_nodes: list[tuple[CodeNode, int]]) -> None:
|
|
352
|
+
"""Phase 4: Synthesize directory summaries (deepest first)."""
|
|
353
|
+
dirs = [(n, d) for n, d in all_nodes if n.node_type == "directory"]
|
|
354
|
+
dirs.sort(key=lambda x: x[1], reverse=True)
|
|
355
|
+
for node, _depth in dirs:
|
|
356
|
+
_summarize_directory(node)
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
def _require_ollama() -> None:
|
|
360
|
+
"""Exit with error if ollama is not installed."""
|
|
361
|
+
if ollama is None:
|
|
362
|
+
print(
|
|
363
|
+
"Error: ollama package not installed. "
|
|
364
|
+
"Install with: pip install ollama\n"
|
|
365
|
+
"Or use --no-ai to skip AI analysis.",
|
|
366
|
+
file=sys.stderr,
|
|
367
|
+
)
|
|
368
|
+
sys.exit(1)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def _init_cache(root: CodeNode, model: str) -> tuple[str, dict]:
|
|
372
|
+
"""Load (or reset) the analysis cache for *model*."""
|
|
373
|
+
cache_dir = root.filepath or "."
|
|
374
|
+
cache_path = os.path.join(cache_dir, CACHE_FILENAME)
|
|
375
|
+
cache = _load_cache(cache_path)
|
|
376
|
+
if cache.get("model") != model:
|
|
377
|
+
cache = {"version": 1, "model": model, "entries": {}}
|
|
378
|
+
return cache_path, cache
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def analyze(
|
|
382
|
+
root: CodeNode,
|
|
383
|
+
model: str = "qwen3:14b",
|
|
384
|
+
workers: int = 1,
|
|
385
|
+
) -> CodeNode:
|
|
386
|
+
"""Analyze the full tree with AI summaries and quality scoring."""
|
|
387
|
+
_require_ollama()
|
|
388
|
+
|
|
389
|
+
cache_path, cache = _init_cache(root, model)
|
|
390
|
+
all_nodes = _collect_nodes(root)
|
|
391
|
+
start_time = time.monotonic()
|
|
392
|
+
|
|
393
|
+
_score_all_nodes(all_nodes)
|
|
394
|
+
_rollup_file_quality(all_nodes)
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
ai_count = _run_ai_batch(all_nodes, model, cache, workers)
|
|
574
398
|
except ConnectionError as e:
|
|
575
399
|
print(
|
|
576
400
|
f"\nError: Could not connect to ollama: {e}\n"
|
|
@@ -580,10 +404,10 @@ def analyze( # pylint: disable=too-many-locals,too-many-statements
|
|
|
580
404
|
)
|
|
581
405
|
sys.exit(1)
|
|
582
406
|
|
|
407
|
+
_summarize_directories(all_nodes)
|
|
583
408
|
_save_cache(cache_path, cache)
|
|
584
409
|
|
|
585
410
|
elapsed = time.monotonic() - start_time
|
|
586
|
-
ai_count = len(files) + len(code_nodes)
|
|
587
411
|
print(
|
|
588
412
|
f"Analysis complete: {ai_count} nodes in {elapsed:.1f}s "
|
|
589
413
|
f"({workers} workers, model: {model})",
|