ai-codeindex 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ai_codeindex-0.7.0.dist-info/METADATA +966 -0
- ai_codeindex-0.7.0.dist-info/RECORD +41 -0
- ai_codeindex-0.7.0.dist-info/WHEEL +4 -0
- ai_codeindex-0.7.0.dist-info/entry_points.txt +2 -0
- ai_codeindex-0.7.0.dist-info/licenses/LICENSE +21 -0
- codeindex/README_AI.md +767 -0
- codeindex/__init__.py +11 -0
- codeindex/adaptive_config.py +83 -0
- codeindex/adaptive_selector.py +171 -0
- codeindex/ai_helper.py +48 -0
- codeindex/cli.py +40 -0
- codeindex/cli_common.py +10 -0
- codeindex/cli_config.py +97 -0
- codeindex/cli_docs.py +66 -0
- codeindex/cli_hooks.py +765 -0
- codeindex/cli_scan.py +562 -0
- codeindex/cli_symbols.py +295 -0
- codeindex/cli_tech_debt.py +238 -0
- codeindex/config.py +479 -0
- codeindex/directory_tree.py +229 -0
- codeindex/docstring_processor.py +342 -0
- codeindex/errors.py +62 -0
- codeindex/extractors/__init__.py +9 -0
- codeindex/extractors/thinkphp.py +132 -0
- codeindex/file_classifier.py +148 -0
- codeindex/framework_detect.py +323 -0
- codeindex/hierarchical.py +428 -0
- codeindex/incremental.py +278 -0
- codeindex/invoker.py +260 -0
- codeindex/parallel.py +155 -0
- codeindex/parser.py +740 -0
- codeindex/route_extractor.py +98 -0
- codeindex/route_registry.py +77 -0
- codeindex/scanner.py +167 -0
- codeindex/semantic_extractor.py +408 -0
- codeindex/smart_writer.py +737 -0
- codeindex/symbol_index.py +199 -0
- codeindex/symbol_scorer.py +283 -0
- codeindex/tech_debt.py +619 -0
- codeindex/tech_debt_formatters.py +234 -0
- codeindex/writer.py +164 -0
codeindex/invoker.py
ADDED
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
"""AI CLI invoker - calls external AI CLI tools."""
|
|
2
|
+
|
|
3
|
+
import shlex
|
|
4
|
+
import subprocess
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
console = Console()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def clean_ai_output(output: str) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Clean AI output to extract valid markdown content.
|
|
16
|
+
|
|
17
|
+
Handles cases where AI includes explanations before/after markdown.
|
|
18
|
+
"""
|
|
19
|
+
if not output or not output.strip():
|
|
20
|
+
return ""
|
|
21
|
+
|
|
22
|
+
lines = output.strip().split("\n")
|
|
23
|
+
|
|
24
|
+
# Find the first markdown heading
|
|
25
|
+
start_idx = 0
|
|
26
|
+
for i, line in enumerate(lines):
|
|
27
|
+
if line.strip().startswith("#"):
|
|
28
|
+
start_idx = i
|
|
29
|
+
break
|
|
30
|
+
|
|
31
|
+
# Extract from first heading onwards
|
|
32
|
+
cleaned = "\n".join(lines[start_idx:])
|
|
33
|
+
|
|
34
|
+
# Remove any trailing non-markdown content (common AI commentary)
|
|
35
|
+
# Look for patterns like "---" followed by explanations
|
|
36
|
+
final_lines = []
|
|
37
|
+
in_code_block = False
|
|
38
|
+
for line in cleaned.split("\n"):
|
|
39
|
+
if line.strip().startswith("```"):
|
|
40
|
+
in_code_block = not in_code_block
|
|
41
|
+
# Skip lines that look like AI commentary (not in code block)
|
|
42
|
+
if not in_code_block and line.strip().startswith(("Note:", "I ", "This ")):
|
|
43
|
+
if not any(
|
|
44
|
+
line.strip().startswith(f"- {x}") for x in ["Note:", "I ", "This "]
|
|
45
|
+
):
|
|
46
|
+
continue
|
|
47
|
+
final_lines.append(line)
|
|
48
|
+
|
|
49
|
+
return "\n".join(final_lines).strip()
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def validate_markdown_output(output: str) -> bool:
|
|
53
|
+
"""Check if output looks like valid README markdown."""
|
|
54
|
+
if not output or len(output) < 50:
|
|
55
|
+
return False
|
|
56
|
+
# Must start with a heading
|
|
57
|
+
first_line = output.strip().split("\n")[0]
|
|
58
|
+
if not first_line.startswith("#"):
|
|
59
|
+
return False
|
|
60
|
+
# Should have some structure
|
|
61
|
+
if output.count("#") < 2:
|
|
62
|
+
return False
|
|
63
|
+
return True
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
@dataclass
|
|
67
|
+
class InvokeResult:
|
|
68
|
+
"""Result of invoking AI CLI."""
|
|
69
|
+
|
|
70
|
+
success: bool
|
|
71
|
+
output: str
|
|
72
|
+
error: str = ""
|
|
73
|
+
command: str = ""
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def format_prompt(
|
|
77
|
+
dir_path: Path,
|
|
78
|
+
files_info: str,
|
|
79
|
+
symbols_info: str,
|
|
80
|
+
imports_info: str,
|
|
81
|
+
) -> str:
|
|
82
|
+
"""
|
|
83
|
+
Format the prompt to send to AI CLI.
|
|
84
|
+
|
|
85
|
+
Uses Markdown format for readability.
|
|
86
|
+
"""
|
|
87
|
+
dir_name = dir_path.name
|
|
88
|
+
prompt = f"""CRITICAL: Output ONLY valid markdown. No explanations.
|
|
89
|
+
Start with: # README_AI.md - {dir_name}
|
|
90
|
+
|
|
91
|
+
## Directory
|
|
92
|
+
{dir_path}
|
|
93
|
+
|
|
94
|
+
## Files
|
|
95
|
+
{files_info}
|
|
96
|
+
|
|
97
|
+
## Symbols (Classes, Functions)
|
|
98
|
+
{symbols_info}
|
|
99
|
+
|
|
100
|
+
## Imports/Dependencies
|
|
101
|
+
{imports_info}
|
|
102
|
+
|
|
103
|
+
## Task
|
|
104
|
+
Generate a README_AI.md for this module. Include:
|
|
105
|
+
1. Purpose - what this module does (1-2 sentences)
|
|
106
|
+
2. Architecture - key components and data flow
|
|
107
|
+
3. Key Components - classes/functions with roles
|
|
108
|
+
4. Consumes - dependencies on other modules
|
|
109
|
+
5. Provides - exports for other modules
|
|
110
|
+
|
|
111
|
+
Requirements:
|
|
112
|
+
- Start with: # README_AI.md - {dir_name}
|
|
113
|
+
- Use markdown tables for Consumes/Provides
|
|
114
|
+
- Focus on WHAT and WHY, not HOW
|
|
115
|
+
- Keep it concise (~50-100 lines)
|
|
116
|
+
- Output ONLY markdown, no commentary
|
|
117
|
+
"""
|
|
118
|
+
return prompt
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def invoke_ai_cli(
|
|
122
|
+
command_template: str,
|
|
123
|
+
prompt: str,
|
|
124
|
+
timeout: int = 120,
|
|
125
|
+
dry_run: bool = False,
|
|
126
|
+
) -> InvokeResult:
|
|
127
|
+
"""
|
|
128
|
+
Invoke the AI CLI with the given prompt.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
command_template: Command template with {prompt} placeholder
|
|
132
|
+
prompt: The prompt to send
|
|
133
|
+
timeout: Timeout in seconds
|
|
134
|
+
dry_run: If True, just print the command without executing
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
InvokeResult with output or error
|
|
138
|
+
"""
|
|
139
|
+
# Escape the prompt for shell
|
|
140
|
+
escaped_prompt = prompt.replace('"', '\\"').replace("$", "\\$").replace("`", "\\`")
|
|
141
|
+
|
|
142
|
+
# Build the command
|
|
143
|
+
command = command_template.replace("{prompt}", escaped_prompt)
|
|
144
|
+
|
|
145
|
+
if dry_run:
|
|
146
|
+
console.print("[dim]Would execute:[/dim]")
|
|
147
|
+
console.print(f"[cyan]{command[:200]}...[/cyan]")
|
|
148
|
+
return InvokeResult(
|
|
149
|
+
success=True,
|
|
150
|
+
output="[DRY RUN] No actual execution",
|
|
151
|
+
command=command,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
# Run the command
|
|
156
|
+
result = subprocess.run(
|
|
157
|
+
command,
|
|
158
|
+
shell=True,
|
|
159
|
+
capture_output=True,
|
|
160
|
+
text=True,
|
|
161
|
+
timeout=timeout,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
if result.returncode == 0:
|
|
165
|
+
return InvokeResult(
|
|
166
|
+
success=True,
|
|
167
|
+
output=result.stdout,
|
|
168
|
+
command=command,
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
return InvokeResult(
|
|
172
|
+
success=False,
|
|
173
|
+
output=result.stdout,
|
|
174
|
+
error=result.stderr or f"Exit code: {result.returncode}",
|
|
175
|
+
command=command,
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
except subprocess.TimeoutExpired:
|
|
179
|
+
return InvokeResult(
|
|
180
|
+
success=False,
|
|
181
|
+
output="",
|
|
182
|
+
error=f"Command timed out after {timeout} seconds",
|
|
183
|
+
command=command,
|
|
184
|
+
)
|
|
185
|
+
except Exception as e:
|
|
186
|
+
return InvokeResult(
|
|
187
|
+
success=False,
|
|
188
|
+
output="",
|
|
189
|
+
error=str(e),
|
|
190
|
+
command=command,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def invoke_ai_cli_stdin(
|
|
195
|
+
command: str,
|
|
196
|
+
prompt: str,
|
|
197
|
+
timeout: int = 120,
|
|
198
|
+
dry_run: bool = False,
|
|
199
|
+
) -> InvokeResult:
|
|
200
|
+
"""
|
|
201
|
+
Alternative: invoke AI CLI with prompt via stdin.
|
|
202
|
+
|
|
203
|
+
Some CLI tools prefer stdin input for long prompts.
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
command: Command to run (without prompt)
|
|
207
|
+
prompt: The prompt to send via stdin
|
|
208
|
+
timeout: Timeout in seconds
|
|
209
|
+
dry_run: If True, just print the command without executing
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
InvokeResult with output or error
|
|
213
|
+
"""
|
|
214
|
+
if dry_run:
|
|
215
|
+
console.print("[dim]Would execute:[/dim]")
|
|
216
|
+
console.print(f"[cyan]{command}[/cyan]")
|
|
217
|
+
console.print(f"[dim]With stdin prompt ({len(prompt)} chars)[/dim]")
|
|
218
|
+
return InvokeResult(
|
|
219
|
+
success=True,
|
|
220
|
+
output="[DRY RUN] No actual execution",
|
|
221
|
+
command=command,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
result = subprocess.run(
|
|
226
|
+
shlex.split(command),
|
|
227
|
+
input=prompt,
|
|
228
|
+
capture_output=True,
|
|
229
|
+
text=True,
|
|
230
|
+
timeout=timeout,
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if result.returncode == 0:
|
|
234
|
+
return InvokeResult(
|
|
235
|
+
success=True,
|
|
236
|
+
output=result.stdout,
|
|
237
|
+
command=command,
|
|
238
|
+
)
|
|
239
|
+
else:
|
|
240
|
+
return InvokeResult(
|
|
241
|
+
success=False,
|
|
242
|
+
output=result.stdout,
|
|
243
|
+
error=result.stderr or f"Exit code: {result.returncode}",
|
|
244
|
+
command=command,
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
except subprocess.TimeoutExpired:
|
|
248
|
+
return InvokeResult(
|
|
249
|
+
success=False,
|
|
250
|
+
output="",
|
|
251
|
+
error=f"Command timed out after {timeout} seconds",
|
|
252
|
+
command=command,
|
|
253
|
+
)
|
|
254
|
+
except Exception as e:
|
|
255
|
+
return InvokeResult(
|
|
256
|
+
success=False,
|
|
257
|
+
output="",
|
|
258
|
+
error=str(e),
|
|
259
|
+
command=command,
|
|
260
|
+
)
|
codeindex/parallel.py
ADDED
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
"""Parallel processing utilities for codeindex."""
|
|
2
|
+
|
|
3
|
+
import concurrent.futures
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
from rich.console import Console
|
|
9
|
+
|
|
10
|
+
from .config import Config
|
|
11
|
+
from .parser import ParseResult, parse_file
|
|
12
|
+
from .scanner import scan_directory
|
|
13
|
+
|
|
14
|
+
console = Console()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class BatchResult:
|
|
19
|
+
"""Result of processing a batch of files."""
|
|
20
|
+
parse_results: List[ParseResult]
|
|
21
|
+
success_count: int
|
|
22
|
+
error_count: int
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_files_parallel(
|
|
26
|
+
files: List[Path],
|
|
27
|
+
config: Config,
|
|
28
|
+
quiet: bool = False
|
|
29
|
+
) -> list[ParseResult]:
|
|
30
|
+
"""
|
|
31
|
+
Parse files using multiple workers in parallel.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
files: List of files to parse
|
|
35
|
+
config: Configuration object
|
|
36
|
+
quiet: Whether to suppress output
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of parse results (same order as input)
|
|
40
|
+
"""
|
|
41
|
+
if not files:
|
|
42
|
+
return []
|
|
43
|
+
|
|
44
|
+
if config.parallel_workers <= 1 or len(files) < config.batch_size:
|
|
45
|
+
# Use sequential processing for small jobs or single worker
|
|
46
|
+
if not quiet:
|
|
47
|
+
console.print(f" [dim]→ Parsing {len(files)} files sequentially...[/dim]")
|
|
48
|
+
return [parse_file(f) for f in files]
|
|
49
|
+
|
|
50
|
+
# Process files in parallel
|
|
51
|
+
if not quiet:
|
|
52
|
+
console.print(
|
|
53
|
+
f" [dim]→ Parsing {len(files)} files with {config.parallel_workers} workers...[/dim]"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
parse_results = [None] * len(files) # Pre-allocate to maintain order
|
|
57
|
+
|
|
58
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=config.parallel_workers) as executor:
|
|
59
|
+
# Submit all tasks
|
|
60
|
+
future_to_index = {
|
|
61
|
+
executor.submit(parse_file, file): i
|
|
62
|
+
for i, file in enumerate(files)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
# Process results as they complete
|
|
66
|
+
completed = 0
|
|
67
|
+
errors = 0
|
|
68
|
+
|
|
69
|
+
for future in concurrent.futures.as_completed(future_to_index):
|
|
70
|
+
index = future_to_index[future]
|
|
71
|
+
try:
|
|
72
|
+
result = future.result()
|
|
73
|
+
parse_results[index] = result
|
|
74
|
+
if result.error:
|
|
75
|
+
errors += 1
|
|
76
|
+
except Exception as e:
|
|
77
|
+
# Create error result
|
|
78
|
+
error_result = ParseResult(
|
|
79
|
+
path=files[index],
|
|
80
|
+
error=f"Processing error: {str(e)}",
|
|
81
|
+
file_lines=0,
|
|
82
|
+
)
|
|
83
|
+
parse_results[index] = error_result
|
|
84
|
+
errors += 1
|
|
85
|
+
|
|
86
|
+
completed += 1
|
|
87
|
+
if not quiet and completed % 10 == 0:
|
|
88
|
+
console.print(f" [dim]→ Processed {completed}/{len(files)} files...[/dim]")
|
|
89
|
+
|
|
90
|
+
if not quiet:
|
|
91
|
+
success = len(files) - errors
|
|
92
|
+
console.print(f" [dim]→ Parsed {success} files successfully, {errors} errors[/dim]")
|
|
93
|
+
|
|
94
|
+
return parse_results
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def scan_directories_parallel(
|
|
98
|
+
directories: List[Path],
|
|
99
|
+
config: Config,
|
|
100
|
+
quiet: bool = False
|
|
101
|
+
) -> List[Path]:
|
|
102
|
+
"""
|
|
103
|
+
Scan multiple directories in parallel for batch processing.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
directories: List of directories to process
|
|
107
|
+
config: Configuration object
|
|
108
|
+
quiet: Whether to suppress output
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
List of results for each directory
|
|
112
|
+
"""
|
|
113
|
+
if not directories:
|
|
114
|
+
return []
|
|
115
|
+
|
|
116
|
+
if config.parallel_workers <= 1 or len(directories) == 1:
|
|
117
|
+
# Sequential processing
|
|
118
|
+
return [scan_directory(d, config, d.parent) for d in directories]
|
|
119
|
+
|
|
120
|
+
if not quiet:
|
|
121
|
+
console.print(
|
|
122
|
+
f" [dim]→ Processing {len(directories)} directories in parallel...[/dim]"
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Use ThreadPoolExecutor for I/O bound directory scanning
|
|
126
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=config.parallel_workers) as executor:
|
|
127
|
+
futures = {
|
|
128
|
+
executor.submit(scan_directory, d, config, d.parent): d
|
|
129
|
+
for d in directories
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
results = []
|
|
133
|
+
completed = 0
|
|
134
|
+
|
|
135
|
+
for future in concurrent.futures.as_completed(futures):
|
|
136
|
+
dir_path = futures[future]
|
|
137
|
+
try:
|
|
138
|
+
result = future.result()
|
|
139
|
+
results.append(result)
|
|
140
|
+
if not quiet and len(result.files) > 0:
|
|
141
|
+
console.print(
|
|
142
|
+
f" [dim]→ Found {len(result.files)} files in {dir_path.name}[/dim]"
|
|
143
|
+
)
|
|
144
|
+
except Exception as e:
|
|
145
|
+
if not quiet:
|
|
146
|
+
console.print(f" [yellow]⚠ Error scanning {dir_path.name}: {e}[/yellow]")
|
|
147
|
+
|
|
148
|
+
completed += 1
|
|
149
|
+
if not quiet and completed % 5 == 0:
|
|
150
|
+
console.print(
|
|
151
|
+
f" [dim]→ Processed {completed}/{len(directories)} "
|
|
152
|
+
f"directories...[/dim]"
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return results
|