ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeindex/invoker.py ADDED
@@ -0,0 +1,260 @@
1
+ """AI CLI invoker - calls external AI CLI tools."""
2
+
3
+ import shlex
4
+ import subprocess
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+
8
+ from rich.console import Console
9
+
10
+ console = Console()
11
+
12
+
13
+ def clean_ai_output(output: str) -> str:
14
+ """
15
+ Clean AI output to extract valid markdown content.
16
+
17
+ Handles cases where AI includes explanations before/after markdown.
18
+ """
19
+ if not output or not output.strip():
20
+ return ""
21
+
22
+ lines = output.strip().split("\n")
23
+
24
+ # Find the first markdown heading
25
+ start_idx = 0
26
+ for i, line in enumerate(lines):
27
+ if line.strip().startswith("#"):
28
+ start_idx = i
29
+ break
30
+
31
+ # Extract from first heading onwards
32
+ cleaned = "\n".join(lines[start_idx:])
33
+
34
+ # Remove any trailing non-markdown content (common AI commentary)
35
+ # Look for patterns like "---" followed by explanations
36
+ final_lines = []
37
+ in_code_block = False
38
+ for line in cleaned.split("\n"):
39
+ if line.strip().startswith("```"):
40
+ in_code_block = not in_code_block
41
+ # Skip lines that look like AI commentary (not in code block)
42
+ if not in_code_block and line.strip().startswith(("Note:", "I ", "This ")):
43
+ if not any(
44
+ line.strip().startswith(f"- {x}") for x in ["Note:", "I ", "This "]
45
+ ):
46
+ continue
47
+ final_lines.append(line)
48
+
49
+ return "\n".join(final_lines).strip()
50
+
51
+
52
+ def validate_markdown_output(output: str) -> bool:
53
+ """Check if output looks like valid README markdown."""
54
+ if not output or len(output) < 50:
55
+ return False
56
+ # Must start with a heading
57
+ first_line = output.strip().split("\n")[0]
58
+ if not first_line.startswith("#"):
59
+ return False
60
+ # Should have some structure
61
+ if output.count("#") < 2:
62
+ return False
63
+ return True
64
+
65
+
66
+ @dataclass
67
+ class InvokeResult:
68
+ """Result of invoking AI CLI."""
69
+
70
+ success: bool
71
+ output: str
72
+ error: str = ""
73
+ command: str = ""
74
+
75
+
76
+ def format_prompt(
77
+ dir_path: Path,
78
+ files_info: str,
79
+ symbols_info: str,
80
+ imports_info: str,
81
+ ) -> str:
82
+ """
83
+ Format the prompt to send to AI CLI.
84
+
85
+ Uses Markdown format for readability.
86
+ """
87
+ dir_name = dir_path.name
88
+ prompt = f"""CRITICAL: Output ONLY valid markdown. No explanations.
89
+ Start with: # README_AI.md - {dir_name}
90
+
91
+ ## Directory
92
+ {dir_path}
93
+
94
+ ## Files
95
+ {files_info}
96
+
97
+ ## Symbols (Classes, Functions)
98
+ {symbols_info}
99
+
100
+ ## Imports/Dependencies
101
+ {imports_info}
102
+
103
+ ## Task
104
+ Generate a README_AI.md for this module. Include:
105
+ 1. Purpose - what this module does (1-2 sentences)
106
+ 2. Architecture - key components and data flow
107
+ 3. Key Components - classes/functions with roles
108
+ 4. Consumes - dependencies on other modules
109
+ 5. Provides - exports for other modules
110
+
111
+ Requirements:
112
+ - Start with: # README_AI.md - {dir_name}
113
+ - Use markdown tables for Consumes/Provides
114
+ - Focus on WHAT and WHY, not HOW
115
+ - Keep it concise (~50-100 lines)
116
+ - Output ONLY markdown, no commentary
117
+ """
118
+ return prompt
119
+
120
+
121
+ def invoke_ai_cli(
122
+ command_template: str,
123
+ prompt: str,
124
+ timeout: int = 120,
125
+ dry_run: bool = False,
126
+ ) -> InvokeResult:
127
+ """
128
+ Invoke the AI CLI with the given prompt.
129
+
130
+ Args:
131
+ command_template: Command template with {prompt} placeholder
132
+ prompt: The prompt to send
133
+ timeout: Timeout in seconds
134
+ dry_run: If True, just print the command without executing
135
+
136
+ Returns:
137
+ InvokeResult with output or error
138
+ """
139
+ # Escape the prompt for shell
140
+ escaped_prompt = prompt.replace('"', '\\"').replace("$", "\\$").replace("`", "\\`")
141
+
142
+ # Build the command
143
+ command = command_template.replace("{prompt}", escaped_prompt)
144
+
145
+ if dry_run:
146
+ console.print("[dim]Would execute:[/dim]")
147
+ console.print(f"[cyan]{command[:200]}...[/cyan]")
148
+ return InvokeResult(
149
+ success=True,
150
+ output="[DRY RUN] No actual execution",
151
+ command=command,
152
+ )
153
+
154
+ try:
155
+ # Run the command
156
+ result = subprocess.run(
157
+ command,
158
+ shell=True,
159
+ capture_output=True,
160
+ text=True,
161
+ timeout=timeout,
162
+ )
163
+
164
+ if result.returncode == 0:
165
+ return InvokeResult(
166
+ success=True,
167
+ output=result.stdout,
168
+ command=command,
169
+ )
170
+ else:
171
+ return InvokeResult(
172
+ success=False,
173
+ output=result.stdout,
174
+ error=result.stderr or f"Exit code: {result.returncode}",
175
+ command=command,
176
+ )
177
+
178
+ except subprocess.TimeoutExpired:
179
+ return InvokeResult(
180
+ success=False,
181
+ output="",
182
+ error=f"Command timed out after {timeout} seconds",
183
+ command=command,
184
+ )
185
+ except Exception as e:
186
+ return InvokeResult(
187
+ success=False,
188
+ output="",
189
+ error=str(e),
190
+ command=command,
191
+ )
192
+
193
+
194
+ def invoke_ai_cli_stdin(
195
+ command: str,
196
+ prompt: str,
197
+ timeout: int = 120,
198
+ dry_run: bool = False,
199
+ ) -> InvokeResult:
200
+ """
201
+ Alternative: invoke AI CLI with prompt via stdin.
202
+
203
+ Some CLI tools prefer stdin input for long prompts.
204
+
205
+ Args:
206
+ command: Command to run (without prompt)
207
+ prompt: The prompt to send via stdin
208
+ timeout: Timeout in seconds
209
+ dry_run: If True, just print the command without executing
210
+
211
+ Returns:
212
+ InvokeResult with output or error
213
+ """
214
+ if dry_run:
215
+ console.print("[dim]Would execute:[/dim]")
216
+ console.print(f"[cyan]{command}[/cyan]")
217
+ console.print(f"[dim]With stdin prompt ({len(prompt)} chars)[/dim]")
218
+ return InvokeResult(
219
+ success=True,
220
+ output="[DRY RUN] No actual execution",
221
+ command=command,
222
+ )
223
+
224
+ try:
225
+ result = subprocess.run(
226
+ shlex.split(command),
227
+ input=prompt,
228
+ capture_output=True,
229
+ text=True,
230
+ timeout=timeout,
231
+ )
232
+
233
+ if result.returncode == 0:
234
+ return InvokeResult(
235
+ success=True,
236
+ output=result.stdout,
237
+ command=command,
238
+ )
239
+ else:
240
+ return InvokeResult(
241
+ success=False,
242
+ output=result.stdout,
243
+ error=result.stderr or f"Exit code: {result.returncode}",
244
+ command=command,
245
+ )
246
+
247
+ except subprocess.TimeoutExpired:
248
+ return InvokeResult(
249
+ success=False,
250
+ output="",
251
+ error=f"Command timed out after {timeout} seconds",
252
+ command=command,
253
+ )
254
+ except Exception as e:
255
+ return InvokeResult(
256
+ success=False,
257
+ output="",
258
+ error=str(e),
259
+ command=command,
260
+ )
codeindex/parallel.py ADDED
@@ -0,0 +1,155 @@
1
+ """Parallel processing utilities for codeindex."""
2
+
3
+ import concurrent.futures
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import List
7
+
8
+ from rich.console import Console
9
+
10
+ from .config import Config
11
+ from .parser import ParseResult, parse_file
12
+ from .scanner import scan_directory
13
+
14
+ console = Console()
15
+
16
+
17
+ @dataclass
18
+ class BatchResult:
19
+ """Result of processing a batch of files."""
20
+ parse_results: List[ParseResult]
21
+ success_count: int
22
+ error_count: int
23
+
24
+
25
+ def parse_files_parallel(
26
+ files: List[Path],
27
+ config: Config,
28
+ quiet: bool = False
29
+ ) -> list[ParseResult]:
30
+ """
31
+ Parse files using multiple workers in parallel.
32
+
33
+ Args:
34
+ files: List of files to parse
35
+ config: Configuration object
36
+ quiet: Whether to suppress output
37
+
38
+ Returns:
39
+ List of parse results (same order as input)
40
+ """
41
+ if not files:
42
+ return []
43
+
44
+ if config.parallel_workers <= 1 or len(files) < config.batch_size:
45
+ # Use sequential processing for small jobs or single worker
46
+ if not quiet:
47
+ console.print(f" [dim]→ Parsing {len(files)} files sequentially...[/dim]")
48
+ return [parse_file(f) for f in files]
49
+
50
+ # Process files in parallel
51
+ if not quiet:
52
+ console.print(
53
+ f" [dim]→ Parsing {len(files)} files with {config.parallel_workers} workers...[/dim]"
54
+ )
55
+
56
+ parse_results = [None] * len(files) # Pre-allocate to maintain order
57
+
58
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.parallel_workers) as executor:
59
+ # Submit all tasks
60
+ future_to_index = {
61
+ executor.submit(parse_file, file): i
62
+ for i, file in enumerate(files)
63
+ }
64
+
65
+ # Process results as they complete
66
+ completed = 0
67
+ errors = 0
68
+
69
+ for future in concurrent.futures.as_completed(future_to_index):
70
+ index = future_to_index[future]
71
+ try:
72
+ result = future.result()
73
+ parse_results[index] = result
74
+ if result.error:
75
+ errors += 1
76
+ except Exception as e:
77
+ # Create error result
78
+ error_result = ParseResult(
79
+ path=files[index],
80
+ error=f"Processing error: {str(e)}",
81
+ file_lines=0,
82
+ )
83
+ parse_results[index] = error_result
84
+ errors += 1
85
+
86
+ completed += 1
87
+ if not quiet and completed % 10 == 0:
88
+ console.print(f" [dim]→ Processed {completed}/{len(files)} files...[/dim]")
89
+
90
+ if not quiet:
91
+ success = len(files) - errors
92
+ console.print(f" [dim]→ Parsed {success} files successfully, {errors} errors[/dim]")
93
+
94
+ return parse_results
95
+
96
+
97
+ def scan_directories_parallel(
98
+ directories: List[Path],
99
+ config: Config,
100
+ quiet: bool = False
101
+ ) -> List[Path]:
102
+ """
103
+ Scan multiple directories in parallel for batch processing.
104
+
105
+ Args:
106
+ directories: List of directories to process
107
+ config: Configuration object
108
+ quiet: Whether to suppress output
109
+
110
+ Returns:
111
+ List of results for each directory
112
+ """
113
+ if not directories:
114
+ return []
115
+
116
+ if config.parallel_workers <= 1 or len(directories) == 1:
117
+ # Sequential processing
118
+ return [scan_directory(d, config, d.parent) for d in directories]
119
+
120
+ if not quiet:
121
+ console.print(
122
+ f" [dim]→ Processing {len(directories)} directories in parallel...[/dim]"
123
+ )
124
+
125
+ # Use ThreadPoolExecutor for I/O bound directory scanning
126
+ with concurrent.futures.ThreadPoolExecutor(max_workers=config.parallel_workers) as executor:
127
+ futures = {
128
+ executor.submit(scan_directory, d, config, d.parent): d
129
+ for d in directories
130
+ }
131
+
132
+ results = []
133
+ completed = 0
134
+
135
+ for future in concurrent.futures.as_completed(futures):
136
+ dir_path = futures[future]
137
+ try:
138
+ result = future.result()
139
+ results.append(result)
140
+ if not quiet and len(result.files) > 0:
141
+ console.print(
142
+ f" [dim]→ Found {len(result.files)} files in {dir_path.name}[/dim]"
143
+ )
144
+ except Exception as e:
145
+ if not quiet:
146
+ console.print(f" [yellow]⚠ Error scanning {dir_path.name}: {e}[/yellow]")
147
+
148
+ completed += 1
149
+ if not quiet and completed % 5 == 0:
150
+ console.print(
151
+ f" [dim]→ Processed {completed}/{len(directories)} "
152
+ f"directories...[/dim]"
153
+ )
154
+
155
+ return results