ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,428 @@
1
+ """Bottom-up hierarchical processing for codeindex."""
2
+
3
+ from collections import defaultdict
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Dict, List, Set, Tuple
7
+
8
+ from rich.console import Console
9
+
10
+ from .config import Config
11
+ from .scanner import find_all_directories, scan_directory
12
+ from .smart_writer import SmartWriter, determine_level
13
+
14
+ console = Console()
15
+
16
+
17
+ @dataclass
18
+ class DirectoryInfo:
19
+ """Information about a directory in the hierarchy."""
20
+ path: Path
21
+ level: int # 0 = deepest, numbers increase upward
22
+ children: Set[Path] # Directories directly contained
23
+ parent: Path | None
24
+ has_files: bool
25
+ scan_result = None # Will hold scan result
26
+ readmes_below: Set[Path] # README_AI.md files in subdirectories
27
+
28
+
29
+ def build_directory_hierarchy(
30
+ directories: List[Path],
31
+ ) -> Tuple[Dict[Path, DirectoryInfo], List[Path]]:
32
+ """
33
+ Build directory hierarchy from bottom up.
34
+
35
+ Returns:
36
+ - dict mapping paths to DirectoryInfo
37
+ - list of root directories (top level)
38
+ """
39
+ # Sort by depth (deepest first)
40
+ sorted_dirs = sorted(directories, key=lambda p: len(p.parts), reverse=True)
41
+
42
+ dir_info = {}
43
+ roots = []
44
+
45
+ # First pass: create all nodes
46
+ for dir_path in sorted_dirs:
47
+ info = DirectoryInfo(
48
+ path=dir_path,
49
+ level=0, # Will be calculated
50
+ children=set(),
51
+ parent=None,
52
+ has_files=False,
53
+ readmes_below=set()
54
+ )
55
+ dir_info[dir_path] = info
56
+
57
+ # Second pass: establish relationships
58
+ for dir_path, info in dir_info.items():
59
+ # Find parent relationship
60
+ parent_path = dir_path.parent
61
+ if parent_path in dir_info:
62
+ info.parent = parent_path
63
+ dir_info[parent_path].children.add(dir_path)
64
+ else:
65
+ roots.append(dir_path)
66
+
67
+ # Calculate levels bottom-up
68
+ def calculate_level(path: Path) -> int:
69
+ info = dir_info[path]
70
+ if not info.children:
71
+ info.level = 0
72
+ return 0
73
+
74
+ max_child_level = max(calculate_level(child) for child in info.children)
75
+ info.level = max_child_level + 1
76
+ return info.level
77
+
78
+ for root in roots:
79
+ calculate_level(root)
80
+
81
+ return dir_info, roots
82
+
83
+
84
+ def create_processing_batches(
85
+ dir_info: Dict[Path, DirectoryInfo], max_workers: int
86
+ ) -> List[List[Path]]:
87
+ """
88
+ Create batches for parallel processing.
89
+
90
+ All directories at the same level can be processed in parallel.
91
+ """
92
+ level_groups = defaultdict(list)
93
+
94
+ for path, info in dir_info.items():
95
+ if info.has_files: # Only include directories that need processing
96
+ level_groups[info.level].append(path)
97
+
98
+ # Create batches from level groups (deeper levels first)
99
+ batches = []
100
+ for level in sorted(level_groups.keys()):
101
+ dirs_at_level = level_groups[level]
102
+
103
+ # Split into batches of max_workers
104
+ for i in range(0, len(dirs_at_level), max_workers):
105
+ batch = dirs_at_level[i:i + max_workers]
106
+ batches.append(batch)
107
+
108
+ return batches
109
+
110
+
111
+ def process_directory_batch(
112
+ batch: List[Path],
113
+ config: Config,
114
+ use_fallback: bool = False,
115
+ quiet: bool = False,
116
+ timeout: int = 120,
117
+ root_path: Path = None,
118
+ ) -> Dict[Path, bool]:
119
+ """
120
+ Process a batch of directories in parallel.
121
+
122
+ Returns dict mapping path to success boolean.
123
+ """
124
+ import concurrent.futures
125
+
126
+ results = {}
127
+
128
+ def process_single(path: Path) -> Tuple[Path, bool]:
129
+ try:
130
+ # Use smart processing with level detection
131
+ return path, process_normal(path, config, use_fallback, quiet, timeout, root_path)
132
+ except Exception as e:
133
+ if not quiet:
134
+ console.print(f"[yellow]⚠ Skipping {path.name}: {e}[/yellow]")
135
+ return path, False
136
+
137
+ with concurrent.futures.ThreadPoolExecutor(max_workers=len(batch)) as executor:
138
+ futures = {executor.submit(process_single, path): path for path in batch}
139
+
140
+ for future in concurrent.futures.as_completed(futures):
141
+ path, success = future.result()
142
+ results[path] = success
143
+
144
+ return results
145
+
146
+
147
+ # Global variable to hold directory info (should be passed as parameter in real implementation)
148
+ dir_info = None
149
+
150
+
151
+ def process_normal(
152
+ path: Path,
153
+ config: Config,
154
+ use_fallback: bool,
155
+ quiet: bool,
156
+ timeout: int,
157
+ root_path: Path = None,
158
+ ) -> bool:
159
+ """Process a single directory with smart level detection."""
160
+ # Scan directory
161
+ if not quiet:
162
+ console.print(f" [dim]→ {path.name}: scanning...[/dim]")
163
+
164
+ scan_result = scan_directory(path, config)
165
+
166
+ # Parse files
167
+ from .parallel import parse_files_parallel
168
+ parse_results = []
169
+ if scan_result.files:
170
+ parse_results = parse_files_parallel(scan_result.files, config, quiet)
171
+
172
+ # Check if this directory has README_AI.md from children
173
+ child_dirs = []
174
+ if path in dir_info and dir_info[path].readmes_below:
175
+ child_dirs = list(dir_info[path].readmes_below)
176
+
177
+ # Determine appropriate level
178
+ has_children = bool(child_dirs)
179
+ if root_path is None:
180
+ root_path = path
181
+ level = determine_level(path, root_path, has_children, config.indexing)
182
+
183
+ if not quiet:
184
+ console.print(
185
+ f" [dim]→ {path.name}: generating [{level}] README "
186
+ f"with {len(child_dirs)} subdirs...[/dim]"
187
+ )
188
+
189
+ # Use smart writer
190
+ writer = SmartWriter(config.indexing)
191
+ write_result = writer.write_readme(
192
+ dir_path=path,
193
+ parse_results=parse_results,
194
+ level=level,
195
+ child_dirs=child_dirs,
196
+ output_file=config.output_file,
197
+ )
198
+
199
+ if write_result.truncated and not quiet:
200
+ size_kb = write_result.size_bytes // 1024
201
+ console.print(
202
+ f" [yellow]⚠ {path.name}: README truncated to {size_kb}KB[/yellow]"
203
+ )
204
+
205
+ return write_result.success
206
+
207
+
208
+ def process_with_children(
209
+ path: Path, config: Config, use_fallback: bool, quiet: bool, timeout: int
210
+ ) -> bool:
211
+ """Process a directory that has children, aggregating their information."""
212
+ # This would be similar process_normal but with child aggregation
213
+ return process_normal(path, config, use_fallback, quiet, timeout)
214
+
215
+
216
+ def scan_directories_hierarchical(
217
+ root: Path,
218
+ config: Config,
219
+ max_workers: int = 8,
220
+ use_fallback: bool = True,
221
+ quiet: bool = False,
222
+ timeout: int = 120
223
+ ) -> bool:
224
+ """
225
+ Main function for hierarchical directory scanning.
226
+
227
+ Returns True if processing was successful overall.
228
+ """
229
+ global dir_info
230
+
231
+ # Step 1: Find all directories
232
+ directories = find_all_directories(root, config)
233
+
234
+ if not directories:
235
+ if not quiet:
236
+ console.print("[yellow]No directories to process[/yellow]")
237
+ return True
238
+
239
+ # Step 2: Scan files to determine which directories need processing
240
+ if not quiet:
241
+ console.print("[bold]🔍 Building directory hierarchy...[/bold]")
242
+
243
+ for dir_path in directories:
244
+ scan_result = scan_directory(dir_path, config)
245
+ _ = bool(scan_result.files) # Check if directory has files
246
+
247
+ # Update dir_info after it's built
248
+ # (This would need restructuring in real implementation)
249
+ pass
250
+
251
+ # Step 3: Build hierarchy
252
+ dir_info, roots = build_directory_hierarchy(directories)
253
+
254
+ # Mark directories that have files
255
+ for dir_path in directories:
256
+ scan_result = scan_directory(dir_path, config)
257
+ if dir_path in dir_info:
258
+ dir_info[dir_path].has_files = bool(scan_result.files)
259
+ dir_info[dir_path].scan_result = scan_result
260
+
261
+ # Update parent-child relationship for README tracking
262
+ parent_path = dir_path.parent
263
+ if parent_path in dir_info:
264
+ dir_info[parent_path].readmes_below.add(dir_path)
265
+
266
+ # Step 4: Create processing batches
267
+ if not quiet:
268
+ console.print("[bold]📦 Creating processing batches...[/bold]")
269
+
270
+ batches = create_processing_batches(dir_info, max_workers)
271
+
272
+ if not quiet:
273
+ total_dirs = sum(len(batch) for batch in batches)
274
+ console.print(f"[green]✓ {total_dirs} directories in {len(batches)} levels/batches[/green]")
275
+
276
+ # Step 5: Process batches
277
+ global_processed = 0
278
+ for i, batch in enumerate(batches):
279
+ if not quiet:
280
+ level = dir_info[batch[0]].level if batch else 0
281
+ console.print(f"\n[bold]Level {level} - Batch {i+1}/{len(batches)}[/bold]")
282
+
283
+ results = process_directory_batch(
284
+ batch, config, use_fallback, quiet, timeout, root_path=root
285
+ )
286
+
287
+ for path, success in results.items():
288
+ if success:
289
+ global_processed += 1
290
+ elif not quiet:
291
+ console.print(f"[yellow]⚠ Skipped: {path.name}[/yellow]")
292
+
293
+ if not quiet:
294
+ console.print(f"\n[green]✓ Processed {global_processed}/{total_dirs} directories[/green]")
295
+
296
+ return global_processed > 0
297
+
298
+
299
+ def generate_enhanced_fallback_readme(
300
+ dir_path: Path,
301
+ parse_results: list,
302
+ child_readmes: List[Path],
303
+ output_file: str = "README_AI.md"
304
+ ):
305
+ """
306
+ Generate enhanced fallback README that includes child directory summaries.
307
+ """
308
+ from datetime import datetime
309
+
310
+ from .writer import format_imports_for_prompt, format_symbols_for_prompt
311
+
312
+ output_path = dir_path / output_file
313
+
314
+ # Basic directory info
315
+ lines = [
316
+ f"<!-- Generated by codeindex (hierarchical) at {datetime.now().isoformat()} -->",
317
+ "",
318
+ f"# {dir_path.name}",
319
+ ""
320
+ ]
321
+
322
+ # File statistics
323
+ files_count = len(parse_results)
324
+ symbols_count = sum(len(r.symbols) for r in parse_results)
325
+
326
+ lines.extend([
327
+ "## Overview",
328
+ f"- **Files**: {files_count}",
329
+ f"- **Symbols**: {symbols_count}",
330
+ f"- **Subdirectories**: {len(child_readmes)}",
331
+ ""
332
+ ])
333
+
334
+ # Child directories section
335
+ if child_readmes:
336
+ lines.extend([
337
+ "## Subdirectories",
338
+ ""
339
+ ])
340
+
341
+ for child_path in sorted(child_readmes):
342
+ child_name = child_path.name
343
+ child_readme = child_path / output_file
344
+
345
+ # Extract brief description from child README if it exists
346
+ description = "Module directory"
347
+ if child_readme.exists():
348
+ try:
349
+ content = child_readme.read_text()
350
+ # Look for first non-heading line
351
+ for line in content.split('\n')[2:10]: # Skip title and header
352
+ line = line.strip()
353
+ if line and not line.startswith('#'):
354
+ description = line[:100]
355
+ break
356
+ except Exception:
357
+ pass
358
+
359
+ lines.append(f"- **{child_name}** - {description}")
360
+
361
+ lines.append("")
362
+
363
+ # Local files and symbols
364
+ if parse_results:
365
+ lines.extend([
366
+ "## Files",
367
+ ""
368
+ ])
369
+
370
+ # Group by subdirectory
371
+ files_by_subdir = defaultdict(list)
372
+ for result in parse_results:
373
+ if not result.error:
374
+ rel_path = result.path.relative_to(dir_path)
375
+ if rel_path.parent != Path('.'):
376
+ files_by_subdir[str(rel_path.parent)].append(result)
377
+ else:
378
+ files_by_subdir['.'].append(result)
379
+
380
+ for subdir in sorted(files_by_subdir.keys()):
381
+ if subdir == '.':
382
+ # Files in root
383
+ for result in files_by_subdir[subdir]:
384
+ lines.append(f"- {result.path.name} ({len(result.symbols)} symbols)")
385
+ else:
386
+ # Files in subdirectory
387
+ lines.append(f"- **{subdir}/**")
388
+ for result in files_by_subdir[subdir]:
389
+ lines.append(f" - {result.path.name} ({len(result.symbols)} symbols)")
390
+
391
+ lines.extend([
392
+ "",
393
+ "## Symbols",
394
+ ""
395
+ ])
396
+
397
+ # Add symbols
398
+ lines.append(format_symbols_for_prompt(parse_results))
399
+
400
+ # Add dependencies if any
401
+ all_imports = []
402
+ for result in parse_results:
403
+ all_imports.extend(result.imports)
404
+
405
+ if all_imports:
406
+ lines.extend([
407
+ "",
408
+ "## Dependencies",
409
+ ""
410
+ ])
411
+ lines.append(format_imports_for_prompt(parse_results))
412
+
413
+ # Write file
414
+ try:
415
+ with open(output_path, "w") as f:
416
+ f.write("\n".join(lines))
417
+
418
+ return type('WriteResult', (), {
419
+ 'path': output_path,
420
+ 'success': True,
421
+ 'error': ""
422
+ })()
423
+ except Exception as e:
424
+ return type('WriteResult', (), {
425
+ 'path': output_path,
426
+ 'success': False,
427
+ 'error': str(e)
428
+ })()
@@ -0,0 +1,278 @@
1
+ """Incremental update logic for codeindex.
2
+
3
+ This module analyzes git changes and determines which directories
4
+ need README_AI.md updates based on configurable thresholds.
5
+ """
6
+
7
+ import subprocess
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from pathlib import Path
11
+
12
+ from .config import Config
13
+
14
+
15
+ class UpdateLevel(Enum):
16
+ """Update decision levels."""
17
+
18
+ SKIP = "skip" # Changes too small, skip update
19
+ CURRENT = "current" # Update current directory only
20
+ AFFECTED = "affected" # Update all affected directories
21
+ FULL = "full" # Suggest full project update
22
+
23
+
24
+ @dataclass
25
+ class FileChange:
26
+ """Represents a changed file."""
27
+
28
+ path: Path
29
+ additions: int = 0
30
+ deletions: int = 0
31
+
32
+ @property
33
+ def total_lines(self) -> int:
34
+ return self.additions + self.deletions
35
+
36
+ @property
37
+ def directory(self) -> Path:
38
+ return self.path.parent
39
+
40
+
41
+ @dataclass
42
+ class ChangeAnalysis:
43
+ """Analysis result of git changes."""
44
+
45
+ files: list[FileChange] = field(default_factory=list)
46
+ total_additions: int = 0
47
+ total_deletions: int = 0
48
+ affected_dirs: set[Path] = field(default_factory=set)
49
+ level: UpdateLevel = UpdateLevel.SKIP
50
+ message: str = ""
51
+
52
+ @property
53
+ def total_lines(self) -> int:
54
+ return self.total_additions + self.total_deletions
55
+
56
+ def to_dict(self) -> dict:
57
+ """Convert to dictionary for CLI output."""
58
+ return {
59
+ "total_lines": self.total_lines,
60
+ "additions": self.total_additions,
61
+ "deletions": self.total_deletions,
62
+ "files_changed": len(self.files),
63
+ "affected_dirs": [str(d) for d in sorted(self.affected_dirs)],
64
+ "level": self.level.value,
65
+ "message": self.message,
66
+ }
67
+
68
+
69
+ def run_git_command(args: list[str], cwd: Path | None = None) -> str:
70
+ """Run a git command and return output."""
71
+ try:
72
+ result = subprocess.run(
73
+ ["git"] + args,
74
+ capture_output=True,
75
+ text=True,
76
+ cwd=cwd,
77
+ check=True,
78
+ )
79
+ return result.stdout.strip()
80
+ except subprocess.CalledProcessError:
81
+ return ""
82
+
83
+
84
+ def get_changed_files(
85
+ since: str = "HEAD~1",
86
+ until: str = "HEAD",
87
+ cwd: Path | None = None,
88
+ ) -> list[FileChange]:
89
+ """Get list of changed files with line counts.
90
+
91
+ Args:
92
+ since: Starting commit reference (default: HEAD~1)
93
+ until: Ending commit reference (default: HEAD)
94
+ cwd: Working directory
95
+
96
+ Returns:
97
+ List of FileChange objects
98
+ """
99
+ # Get numstat for line counts
100
+ output = run_git_command(
101
+ ["diff", "--numstat", since, until],
102
+ cwd=cwd,
103
+ )
104
+
105
+ if not output:
106
+ return []
107
+
108
+ changes = []
109
+ for line in output.split("\n"):
110
+ if not line.strip():
111
+ continue
112
+
113
+ parts = line.split("\t")
114
+ if len(parts) != 3:
115
+ continue
116
+
117
+ additions, deletions, filepath = parts
118
+
119
+ # Handle binary files (shown as -)
120
+ try:
121
+ add_count = int(additions) if additions != "-" else 0
122
+ del_count = int(deletions) if deletions != "-" else 0
123
+ except ValueError:
124
+ continue
125
+
126
+ changes.append(
127
+ FileChange(
128
+ path=Path(filepath),
129
+ additions=add_count,
130
+ deletions=del_count,
131
+ )
132
+ )
133
+
134
+ return changes
135
+
136
+
137
+ def filter_code_files(
138
+ changes: list[FileChange],
139
+ languages: list[str],
140
+ ) -> list[FileChange]:
141
+ """Filter changes to only include code files.
142
+
143
+ Args:
144
+ changes: List of all file changes
145
+ languages: List of supported languages
146
+
147
+ Returns:
148
+ Filtered list of code file changes
149
+ """
150
+ extensions = {
151
+ "python": {".py"},
152
+ "javascript": {".js", ".jsx"},
153
+ "typescript": {".ts", ".tsx"},
154
+ "java": {".java"},
155
+ "go": {".go"},
156
+ "rust": {".rs"},
157
+ }
158
+
159
+ valid_extensions = set()
160
+ for lang in languages:
161
+ valid_extensions.update(extensions.get(lang, set()))
162
+
163
+ return [c for c in changes if c.path.suffix in valid_extensions]
164
+
165
+
166
+ def analyze_changes(
167
+ config: Config,
168
+ since: str = "HEAD~1",
169
+ until: str = "HEAD",
170
+ cwd: Path | None = None,
171
+ ) -> ChangeAnalysis:
172
+ """Analyze git changes and determine update strategy.
173
+
174
+ Args:
175
+ config: codeindex configuration
176
+ since: Starting commit reference
177
+ until: Ending commit reference
178
+ cwd: Working directory
179
+
180
+ Returns:
181
+ ChangeAnalysis with update recommendation
182
+ """
183
+ inc = config.incremental
184
+
185
+ # Get all changes
186
+ all_changes = get_changed_files(since, until, cwd)
187
+
188
+ # Filter to code files only
189
+ code_changes = filter_code_files(all_changes, config.languages)
190
+
191
+ if not code_changes:
192
+ return ChangeAnalysis(
193
+ level=UpdateLevel.SKIP,
194
+ message="No code files changed",
195
+ )
196
+
197
+ # Calculate totals
198
+ total_add = sum(c.additions for c in code_changes)
199
+ total_del = sum(c.deletions for c in code_changes)
200
+ total_lines = total_add + total_del
201
+
202
+ # Get affected directories
203
+ affected_dirs = {c.directory for c in code_changes}
204
+
205
+ # Determine update level based on thresholds
206
+ if total_lines < inc.skip_lines:
207
+ level = UpdateLevel.SKIP
208
+ message = f"Changes ({total_lines} lines) below skip threshold ({inc.skip_lines})"
209
+ elif total_lines < inc.current_only:
210
+ level = UpdateLevel.CURRENT
211
+ message = f"Small changes ({total_lines} lines), update current dirs only"
212
+ elif total_lines < inc.suggest_full:
213
+ level = UpdateLevel.AFFECTED
214
+ message = f"Medium changes ({total_lines} lines), update affected dirs"
215
+ else:
216
+ level = UpdateLevel.FULL
217
+ message = f"Large changes ({total_lines} lines), consider full update"
218
+
219
+ return ChangeAnalysis(
220
+ files=code_changes,
221
+ total_additions=total_add,
222
+ total_deletions=total_del,
223
+ affected_dirs=affected_dirs,
224
+ level=level,
225
+ message=message,
226
+ )
227
+
228
+
229
+ def get_dirs_to_update(
230
+ analysis: ChangeAnalysis,
231
+ config: Config,
232
+ ) -> list[Path]:
233
+ """Get list of directories that should be updated.
234
+
235
+ Args:
236
+ analysis: Change analysis result
237
+ config: codeindex configuration
238
+
239
+ Returns:
240
+ List of directory paths to update
241
+ """
242
+ if analysis.level == UpdateLevel.SKIP:
243
+ return []
244
+
245
+ # For CURRENT, AFFECTED, FULL - update affected dirs
246
+ dirs = list(analysis.affected_dirs)
247
+
248
+ # Filter to only include configured directories
249
+ include_patterns = config.include
250
+ filtered_dirs = []
251
+
252
+ for d in dirs:
253
+ d_str = str(d)
254
+ for pattern in include_patterns:
255
+ # Simple prefix matching (could be enhanced with glob)
256
+ pattern_clean = pattern.rstrip("/")
257
+ if d_str.startswith(pattern_clean) or d_str == pattern_clean:
258
+ filtered_dirs.append(d)
259
+ break
260
+
261
+ return sorted(filtered_dirs)
262
+
263
+
264
+ def should_update_project_index(analysis: ChangeAnalysis, config: Config) -> bool:
265
+ """Determine if PROJECT_INDEX.md should be updated.
266
+
267
+ Args:
268
+ analysis: Change analysis result
269
+ config: codeindex configuration
270
+
271
+ Returns:
272
+ True if PROJECT_INDEX.md should be updated
273
+ """
274
+ if not config.incremental.auto_project_index:
275
+ return False
276
+
277
+ # Update project index for large changes or multiple directories
278
+ return analysis.level == UpdateLevel.FULL or len(analysis.affected_dirs) > 2