ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,295 @@
1
+ """CLI commands for symbol indexing and dependency analysis.
2
+
3
+ This module provides commands for generating project-wide indices
4
+ and analyzing code dependencies and affected directories.
5
+ """
6
+
7
+ import json
8
+ from datetime import datetime
9
+ from pathlib import Path
10
+
11
+ import click
12
+ from rich.table import Table
13
+
14
+ from .cli_common import console
15
+ from .config import Config
16
+ from .incremental import (
17
+ UpdateLevel,
18
+ analyze_changes,
19
+ get_dirs_to_update,
20
+ should_update_project_index,
21
+ )
22
+ from .scanner import find_all_directories
23
+ from .semantic_extractor import DirectoryContext, SemanticExtractor
24
+ from .symbol_index import GlobalSymbolIndex
25
+
26
+
27
+ def extract_module_purpose(
28
+ dir_path: Path,
29
+ config: Config,
30
+ output_file: str = "README_AI.md"
31
+ ) -> str:
32
+ """
33
+ Extract module purpose/description from directory.
34
+
35
+ Args:
36
+ dir_path: Path to the directory
37
+ config: Configuration object
38
+ output_file: README filename to check
39
+
40
+ Returns:
41
+ Module purpose/description string
42
+ """
43
+ # Strategy:
44
+ # 1. If semantic extraction enabled, use SemanticExtractor
45
+ # 2. Otherwise, try to extract from README_AI.md "Purpose" section
46
+ # 3. Fallback to generic description
47
+
48
+ # Check if semantic extraction is enabled
49
+ if config.indexing.semantic.enabled:
50
+ try:
51
+ # Initialize semantic extractor
52
+ extractor = SemanticExtractor(
53
+ use_ai=config.indexing.semantic.use_ai,
54
+ ai_command=config.ai_command if config.indexing.semantic.use_ai else None
55
+ )
56
+
57
+ # Build DirectoryContext
58
+ files = []
59
+ subdirs = []
60
+ if dir_path.is_dir():
61
+ files = [f.name for f in dir_path.iterdir() if f.is_file()]
62
+ subdirs = [d.name for d in dir_path.iterdir() if d.is_dir()]
63
+
64
+ # Try to get symbols from README_AI.md or scan directory
65
+ # For now, we'll use a simplified approach without full parsing
66
+ symbols = []
67
+ imports = []
68
+
69
+ # Quick symbol extraction from filenames
70
+ for f in files:
71
+ if f.endswith(('.py', '.php', '.java', '.ts', '.js')):
72
+ # Extract class/file name without extension
73
+ name = f.rsplit('.', 1)[0]
74
+ symbols.append(name)
75
+
76
+ context = DirectoryContext(
77
+ path=str(dir_path),
78
+ files=files,
79
+ subdirs=subdirs,
80
+ symbols=symbols,
81
+ imports=imports
82
+ )
83
+
84
+ # Extract semantic
85
+ semantic = extractor.extract_directory_semantic(context)
86
+ return semantic.description
87
+
88
+ except Exception:
89
+ # Fall through to README extraction
90
+ pass
91
+
92
+ # Try to extract from README_AI.md
93
+ readme_path = dir_path / output_file
94
+ if readme_path.exists():
95
+ try:
96
+ content = readme_path.read_text()
97
+ lines = content.split("\n")
98
+ for i, line in enumerate(lines):
99
+ if line.startswith("## Purpose") or line.startswith("## 目的"):
100
+ # Get next non-empty line
101
+ for j in range(i + 1, min(i + 5, len(lines))):
102
+ if lines[j].strip() and not lines[j].startswith("#"):
103
+ full_purpose = lines[j].strip()
104
+ if len(full_purpose) <= 80:
105
+ return full_purpose
106
+ else:
107
+ # Smart truncate at word boundary
108
+ truncated = full_purpose[:80]
109
+ last_space = truncated.rfind(" ")
110
+ if last_space > 40:
111
+ return truncated[:last_space] + "..."
112
+ else:
113
+ return truncated + "..."
114
+ break
115
+ except Exception:
116
+ pass
117
+
118
+ # Fallback to generic description
119
+ return f"{dir_path.name} module"
120
+
121
+
122
+ @click.command()
123
+ @click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
124
+ @click.option("--output", "-o", default="PROJECT_INDEX.md", help="Output filename")
125
+ def index(root: Path, output: str):
126
+ """Generate PROJECT_INDEX.md - a lightweight project overview."""
127
+ root = root.resolve()
128
+ config = Config.load()
129
+
130
+ console.print(f"[bold]Generating project index:[/bold] {root}")
131
+
132
+ # Find all indexed directories (those with README_AI.md)
133
+ dirs = find_all_directories(root, config)
134
+ indexed_dirs = [d for d in dirs if (d / config.output_file).exists()]
135
+
136
+ if not indexed_dirs:
137
+ console.print("[yellow]No indexed directories found.[/yellow]")
138
+ console.print("Run 'codeindex scan' first to generate README_AI.md files.")
139
+ return
140
+
141
+ # Try to get project name from pyproject.toml or directory name
142
+ project_name = root.name
143
+ description = ""
144
+ entry_points = []
145
+
146
+ pyproject = root / "pyproject.toml"
147
+ if pyproject.exists():
148
+ try:
149
+ import tomllib
150
+ except ImportError:
151
+ import tomli as tomllib
152
+
153
+ with open(pyproject, "rb") as f:
154
+ data = tomllib.load(f)
155
+ project = data.get("project", {})
156
+ project_name = project.get("name", project_name)
157
+ description = project.get("description", "")
158
+ scripts = project.get("scripts", {})
159
+ entry_points = [f"- `{k}`: `{v}`" for k, v in scripts.items()]
160
+
161
+ # Build module table
162
+ modules = []
163
+ for d in sorted(indexed_dirs):
164
+ rel_path = d.relative_to(root)
165
+
166
+ # Extract purpose using semantic extraction or README fallback
167
+ purpose = extract_module_purpose(d, config, config.output_file)
168
+
169
+ modules.append(f"| `{rel_path}/` | {purpose} |")
170
+
171
+ # Generate PROJECT_INDEX.md
172
+ timestamp = datetime.now().strftime("%Y-%m-%d")
173
+ content = f"""# Project Index: {project_name}
174
+
175
+ > Generated: {timestamp}
176
+ > {description}
177
+
178
+ ## Modules
179
+
180
+ | Path | Purpose |
181
+ |------|---------|
182
+ {chr(10).join(modules)}
183
+
184
+ ## Entry Points
185
+
186
+ {chr(10).join(entry_points) if entry_points else "_No CLI entry points defined_"}
187
+
188
+ ---
189
+ *Generated by codeindex. See each directory's README_AI.md for details.*
190
+ """
191
+
192
+ # Write file
193
+ output_path = root / output
194
+ output_path.write_text(content)
195
+ console.print(f"[green]✓ Created:[/green] {output_path}")
196
+ console.print(f"[dim]Indexed {len(indexed_dirs)} modules[/dim]")
197
+
198
+
199
+ @click.command()
200
+ @click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
201
+ @click.option("--output", "-o", default="PROJECT_SYMBOLS.md", help="Output filename")
202
+ @click.option("--quiet", "-q", is_flag=True, help="Minimal output")
203
+ def symbols(root: Path, output: str, quiet: bool):
204
+ """Generate PROJECT_SYMBOLS.md - a global symbol index for all classes."""
205
+ root = root.resolve()
206
+ config = Config.load()
207
+
208
+ if not quiet:
209
+ console.print(f"[bold]Generating global symbol index:[/bold] {root}")
210
+ console.print("[dim]→ Scanning all directories...[/dim]")
211
+
212
+ indexer = GlobalSymbolIndex(root, config)
213
+ stats = indexer.collect_symbols(quiet=quiet)
214
+
215
+ if not quiet:
216
+ console.print(f"[dim]→ Found {stats['symbols']} symbols in {stats['files']} files[/dim]")
217
+
218
+ if stats["symbols"] == 0:
219
+ console.print("[yellow]No symbols found. Run 'codeindex scan' first.[/yellow]")
220
+ return
221
+
222
+ if not quiet:
223
+ console.print("[dim]→ Generating index...[/dim]")
224
+
225
+ output_path = indexer.generate_index(output)
226
+
227
+ console.print(f"[green]✓ Created:[/green] {output_path}")
228
+ index_msg = f"Indexed {stats['symbols']} symbols from {stats['directories']} directories"
229
+ console.print(f"[dim]{index_msg}[/dim]")
230
+
231
+
232
+ @click.command()
233
+ @click.option("--since", default="HEAD~1", help="Starting commit reference")
234
+ @click.option("--until", default="HEAD", help="Ending commit reference")
235
+ @click.option("--json", "as_json", is_flag=True, help="Output as JSON")
236
+ def affected(since: str, until: str, as_json: bool):
237
+ """Analyze git changes and show affected directories.
238
+
239
+ Shows which directories need README_AI.md updates based on code changes.
240
+ """
241
+ config = Config.load()
242
+ root = Path.cwd().resolve()
243
+
244
+ if not as_json:
245
+ console.print(f"[bold]Analyzing changes:[/bold] {since}..{until}")
246
+
247
+ # Analyze changes
248
+ analysis = analyze_changes(config, since, until, root)
249
+
250
+ if as_json:
251
+ # JSON output for scripting
252
+ print(json.dumps(analysis.to_dict(), indent=2))
253
+ return
254
+
255
+ # Human-readable output
256
+ if analysis.level == UpdateLevel.SKIP:
257
+ console.print(f"[green]✓ {analysis.message}[/green]")
258
+ return
259
+
260
+ # Show statistics
261
+ table = Table(title="Change Analysis")
262
+ table.add_column("Metric", style="bold")
263
+ table.add_column("Value")
264
+
265
+ table.add_row("Files changed", str(len(analysis.files)))
266
+ table.add_row("Lines added", f"+{analysis.total_additions}")
267
+ table.add_row("Lines deleted", f"-{analysis.total_deletions}")
268
+ table.add_row("Total changes", str(analysis.total_lines))
269
+ table.add_row("Update level", analysis.level.value.upper())
270
+
271
+ console.print(table)
272
+
273
+ # Show affected directories
274
+ dirs_to_update = get_dirs_to_update(analysis, config)
275
+ if dirs_to_update:
276
+ console.print("\n[bold]Directories to update:[/bold]")
277
+ for d in dirs_to_update:
278
+ rel = d.relative_to(root) if d.is_absolute() else d
279
+ readme_exists = (root / rel / config.output_file).exists()
280
+ status = "[green]✓[/green]" if readme_exists else "[yellow]⚠[/yellow]"
281
+ console.print(f" {status} {rel}/")
282
+
283
+ # Show recommendation
284
+ console.print(f"\n[dim]{analysis.message}[/dim]")
285
+
286
+ if should_update_project_index(analysis, config):
287
+ console.print("[yellow]→ Consider updating PROJECT_INDEX.md[/yellow]")
288
+
289
+ # Show suggested command
290
+ if dirs_to_update:
291
+ console.print("\n[bold]Suggested command:[/bold]")
292
+ if len(dirs_to_update) == 1:
293
+ console.print(f" codeindex scan {dirs_to_update[0]}")
294
+ else:
295
+ console.print(" codeindex list-dirs | xargs -P 4 -I {} codeindex scan {}")
@@ -0,0 +1,238 @@
1
+ """CLI commands for technical debt analysis.
2
+
3
+ This module provides the tech-debt command for analyzing technical debt
4
+ in a directory, including file size issues, god classes, and symbol overload.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ import click
10
+
11
+ from .cli_common import console
12
+ from .config import Config
13
+ from .symbol_scorer import ScoringContext, SymbolImportanceScorer
14
+ from .tech_debt import TechDebtDetector, TechDebtReport, TechDebtReporter
15
+ from .tech_debt_formatters import ConsoleFormatter, JSONFormatter, MarkdownFormatter
16
+
17
+
18
+ def _find_source_files(
19
+ path: Path, recursive: bool, languages: list[str] | None = None
20
+ ) -> list[Path]:
21
+ """Find source files in the given directory based on language configuration.
22
+
23
+ Args:
24
+ path: Directory path to search
25
+ recursive: If True, search subdirectories recursively
26
+ languages: List of languages to include (optional, uses config if None)
27
+
28
+ Returns:
29
+ List of source file paths
30
+ """
31
+ # Load languages from config if not provided
32
+ if languages is None:
33
+ config = Config.load()
34
+ languages = config.languages
35
+
36
+ # Map languages to file extensions
37
+ extensions = {
38
+ 'python': '*.py',
39
+ 'php': '*.php',
40
+ 'javascript': '*.js',
41
+ 'typescript': '*.ts',
42
+ 'java': '*.java',
43
+ 'go': '*.go',
44
+ 'rust': '*.rs',
45
+ 'cpp': '*.cpp',
46
+ 'c': '*.c',
47
+ }
48
+
49
+ files = []
50
+ for lang in languages:
51
+ ext = extensions.get(lang)
52
+ if ext:
53
+ if recursive:
54
+ files.extend([f for f in path.rglob(ext) if f.is_file()])
55
+ else:
56
+ files.extend([f for f in path.glob(ext) if f.is_file()])
57
+
58
+ return files
59
+
60
+
61
+ def _analyze_files(
62
+ files: list[Path],
63
+ detector: TechDebtDetector,
64
+ reporter: TechDebtReporter,
65
+ show_progress: bool,
66
+ ) -> None:
67
+ """Analyze files and add results to reporter.
68
+
69
+ Args:
70
+ files: List of source files to analyze
71
+ detector: Technical debt detector instance
72
+ reporter: Reporter to collect results
73
+ show_progress: Whether to show progress messages
74
+ """
75
+ from .parser import parse_file
76
+
77
+ for file_path in files:
78
+ try:
79
+ # Parse file
80
+ parse_result = parse_file(file_path)
81
+
82
+ if parse_result.error:
83
+ if show_progress:
84
+ console.print(
85
+ f"[yellow]⚠ Skipping {file_path.name}: {parse_result.error}[/yellow]"
86
+ )
87
+ continue
88
+
89
+ # Determine file type based on extension
90
+ file_ext = file_path.suffix.lower()
91
+ if file_ext == '.py':
92
+ file_type = 'python'
93
+ elif file_ext == '.php':
94
+ file_type = 'php'
95
+ elif file_ext == '.js':
96
+ file_type = 'javascript'
97
+ elif file_ext == '.ts':
98
+ file_type = 'typescript'
99
+ else:
100
+ file_type = file_ext[1:] if file_ext else 'unknown'
101
+
102
+ # Create scorer context
103
+ scoring_context = ScoringContext(
104
+ framework=None,
105
+ file_type=file_type,
106
+ total_symbols=len(parse_result.symbols),
107
+ )
108
+ scorer = SymbolImportanceScorer(scoring_context)
109
+
110
+ # Detect technical debt
111
+ debt_analysis = detector.analyze_file(parse_result, scorer)
112
+
113
+ # Analyze symbol overload
114
+ symbol_issues, symbol_analysis = detector.analyze_symbol_overload(
115
+ parse_result, scorer
116
+ )
117
+
118
+ # Merge symbol overload issues into debt analysis
119
+ debt_analysis.issues.extend(symbol_issues)
120
+
121
+ # Add to reporter
122
+ reporter.add_file_result(
123
+ file_path=file_path,
124
+ debt_analysis=debt_analysis,
125
+ symbol_analysis=symbol_analysis,
126
+ )
127
+
128
+ except Exception as e:
129
+ if show_progress:
130
+ console.print(f"[red]✗ Error analyzing {file_path.name}: {e}[/red]")
131
+ continue
132
+
133
+
134
+ def _format_and_output(
135
+ report: TechDebtReport,
136
+ format: str,
137
+ output: Path | None,
138
+ quiet: bool,
139
+ ) -> None:
140
+ """Format and output the technical debt report.
141
+
142
+ Args:
143
+ report: Technical debt report to format
144
+ format: Output format (console, markdown, or json)
145
+ output: Optional output file path
146
+ quiet: Whether to suppress status messages
147
+ """
148
+ # Select formatter
149
+ if format == "console":
150
+ formatter = ConsoleFormatter()
151
+ elif format == "markdown":
152
+ formatter = MarkdownFormatter()
153
+ else: # json
154
+ formatter = JSONFormatter()
155
+
156
+ formatted_output = formatter.format(report)
157
+
158
+ # Write output
159
+ if output:
160
+ output.write_text(formatted_output)
161
+ if not quiet:
162
+ console.print(f"[green]✓ Report written to {output}[/green]")
163
+ else:
164
+ # Print to stdout
165
+ print(formatted_output)
166
+
167
+
168
+ @click.command()
169
+ @click.argument("path", type=click.Path(exists=True, file_okay=False, path_type=Path))
170
+ @click.option(
171
+ "--format",
172
+ type=click.Choice(["console", "markdown", "json"], case_sensitive=False),
173
+ default="console",
174
+ help="Output format",
175
+ )
176
+ @click.option(
177
+ "--output",
178
+ "-o",
179
+ type=click.Path(path_type=Path),
180
+ help="Write output to file instead of stdout",
181
+ )
182
+ @click.option(
183
+ "--recursive",
184
+ "-r",
185
+ is_flag=True,
186
+ help="Recursively scan subdirectories",
187
+ )
188
+ @click.option(
189
+ "--quiet",
190
+ "-q",
191
+ is_flag=True,
192
+ help="Minimal output",
193
+ )
194
+ def tech_debt(path: Path, format: str, output: Path | None, recursive: bool, quiet: bool):
195
+ """Analyze technical debt in a directory.
196
+
197
+ Scans source files for technical debt issues including:
198
+ - Super large files (>5000 lines)
199
+ - Large files (>2000 lines)
200
+ - God Classes (>50 methods)
201
+ - Massive symbol count (>100 symbols)
202
+ - High noise ratio (>50% low-quality symbols)
203
+
204
+ Results can be output in console, markdown, or JSON format.
205
+ """
206
+ try:
207
+ # Load config
208
+ config = Config.load()
209
+
210
+ # Initialize detector and reporter
211
+ detector = TechDebtDetector(config)
212
+ reporter = TechDebtReporter()
213
+
214
+ # Find all source files to analyze
215
+ files_to_analyze = _find_source_files(path, recursive)
216
+
217
+ # Handle empty directory
218
+ if not files_to_analyze:
219
+ report = reporter.generate_report()
220
+ _format_and_output(report, format, output, quiet)
221
+ return
222
+
223
+ # Only show progress if not JSON to stdout (JSON needs clean output)
224
+ show_progress = not quiet and not (format == "json" and output is None)
225
+
226
+ if show_progress:
227
+ console.print(f"[dim]Analyzing {len(files_to_analyze)} source files...[/dim]")
228
+
229
+ # Parse and analyze each file
230
+ _analyze_files(files_to_analyze, detector, reporter, show_progress)
231
+
232
+ # Generate and output report
233
+ report = reporter.generate_report()
234
+ _format_and_output(report, format, output, quiet)
235
+
236
+ except Exception as e:
237
+ console.print(f"[red]✗ Error: {e}[/red]")
238
+ raise click.Abort()