ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeindex/__init__.py ADDED
@@ -0,0 +1,11 @@
1
+ """
2
+ codeindex - AI-native code indexing tool for large codebases
3
+
4
+ Usage:
5
+ codeindex scan <path> # Scan a directory and generate README_AI.md
6
+ codeindex init # Initialize .codeindex.yaml
7
+ codeindex status # Show indexing status
8
+ """
9
+
10
+ __version__ = "0.7.0"
11
+ __all__ = ["__version__"]
@@ -0,0 +1,83 @@
1
+ """Adaptive symbols configuration.
2
+
3
+ This module defines the configuration structure for adaptive symbol extraction,
4
+ which allows dynamically adjusting the number of symbols to extract based on
5
+ file size and other factors.
6
+ """
7
+
8
+ from dataclasses import dataclass, field
9
+
10
+
11
+ @dataclass
12
+ class AdaptiveSymbolsConfig:
13
+ """Configuration for adaptive symbol extraction.
14
+
15
+ Adaptive symbol extraction adjusts the number of symbols to display in
16
+ README_AI.md files based on file size, ensuring better information coverage
17
+ for large files while keeping smaller files concise.
18
+
19
+ Attributes:
20
+ enabled: Whether adaptive symbol extraction is enabled. If False,
21
+ the traditional max_per_file setting is used.
22
+ thresholds: File size thresholds (in lines) for categorizing files.
23
+ Keys: tiny, small, medium, large, xlarge, huge
24
+ Values: Line count thresholds
25
+ limits: Symbol count limits for each file size category.
26
+ Keys: tiny, small, medium, large, xlarge, huge, mega
27
+ Values: Maximum symbols to display
28
+ min_symbols: Minimum number of symbols to display, regardless of
29
+ file size or other factors.
30
+ max_symbols: Maximum number of symbols to display, regardless of
31
+ file size or other factors.
32
+
33
+ Example:
34
+ >>> config = AdaptiveSymbolsConfig(
35
+ ... enabled=True,
36
+ ... thresholds={"small": 200, "medium": 500, "large": 1000},
37
+ ... limits={"small": 15, "medium": 30, "large": 50},
38
+ ... )
39
+ >>> config.enabled
40
+ True
41
+ >>> config.limits["medium"]
42
+ 30
43
+
44
+ File Size Categories:
45
+ - tiny: < thresholds["tiny"] lines
46
+ - small: < thresholds["small"] lines
47
+ - medium: < thresholds["medium"] lines
48
+ - large: < thresholds["large"] lines
49
+ - xlarge: < thresholds["xlarge"] lines
50
+ - huge: < thresholds["huge"] lines
51
+ - mega: >= thresholds["huge"] lines
52
+ """
53
+
54
+ enabled: bool = False
55
+ thresholds: dict[str, int] = field(default_factory=dict)
56
+ limits: dict[str, int] = field(default_factory=dict)
57
+ min_symbols: int = 5
58
+ max_symbols: int = 200
59
+
60
+
61
+ # Default configuration matching the planning document
62
+ DEFAULT_ADAPTIVE_CONFIG = AdaptiveSymbolsConfig(
63
+ enabled=False, # Disabled by default for backward compatibility
64
+ thresholds={
65
+ "tiny": 100,
66
+ "small": 200,
67
+ "medium": 500,
68
+ "large": 1000,
69
+ "xlarge": 2000,
70
+ "huge": 5000,
71
+ },
72
+ limits={
73
+ "tiny": 10,
74
+ "small": 15,
75
+ "medium": 30,
76
+ "large": 50,
77
+ "xlarge": 80,
78
+ "huge": 120,
79
+ "mega": 150, # For files > 5000 lines
80
+ },
81
+ min_symbols=5,
82
+ max_symbols=200,
83
+ )
@@ -0,0 +1,171 @@
1
+ """Adaptive symbol selector for dynamic symbol limit calculation.
2
+
3
+ This module implements the core algorithm for adaptive symbol extraction,
4
+ which adjusts the number of symbols to display based on file size.
5
+ """
6
+
7
+ from codeindex.adaptive_config import DEFAULT_ADAPTIVE_CONFIG, AdaptiveSymbolsConfig
8
+
9
+
10
+ class AdaptiveSymbolSelector:
11
+ """Selects appropriate symbol limit based on file size.
12
+
13
+ This selector implements a tiered approach where larger files get
14
+ more symbols displayed, improving information coverage while keeping
15
+ smaller files concise.
16
+
17
+ The algorithm works in three steps:
18
+ 1. Determine file size category (tiny/small/medium/large/xlarge/huge/mega)
19
+ 2. Get configured symbol limit for that category
20
+ 3. Apply constraints (min/max symbols, total available symbols)
21
+
22
+ Attributes:
23
+ config: AdaptiveSymbolsConfig instance controlling the selection logic
24
+
25
+ Example:
26
+ >>> selector = AdaptiveSymbolSelector()
27
+ >>> limit = selector.calculate_limit(8891, 57) # 8891 lines, 57 symbols
28
+ >>> print(limit) # Returns 57 (mega category limit=150, but only 57 symbols)
29
+ 57
30
+ >>> limit = selector.calculate_limit(500, 100) # 500 lines, 100 symbols
31
+ >>> print(limit) # Returns 50 (large category limit=50)
32
+ 50
33
+ """
34
+
35
+ def __init__(self, config: AdaptiveSymbolsConfig | None = None):
36
+ """Initialize selector with optional configuration.
37
+
38
+ Args:
39
+ config: AdaptiveSymbolsConfig instance. If None, uses DEFAULT_ADAPTIVE_CONFIG.
40
+ """
41
+ if config is None:
42
+ # Use default config, creating a copy to avoid mutation
43
+ self.config = AdaptiveSymbolsConfig(
44
+ enabled=DEFAULT_ADAPTIVE_CONFIG.enabled,
45
+ thresholds=DEFAULT_ADAPTIVE_CONFIG.thresholds.copy(),
46
+ limits=DEFAULT_ADAPTIVE_CONFIG.limits.copy(),
47
+ min_symbols=DEFAULT_ADAPTIVE_CONFIG.min_symbols,
48
+ max_symbols=DEFAULT_ADAPTIVE_CONFIG.max_symbols,
49
+ )
50
+ else:
51
+ # Merge custom config with defaults to ensure all fields are present
52
+ self.config = AdaptiveSymbolsConfig(
53
+ enabled=config.enabled,
54
+ thresholds={**DEFAULT_ADAPTIVE_CONFIG.thresholds, **config.thresholds},
55
+ limits={**DEFAULT_ADAPTIVE_CONFIG.limits, **config.limits},
56
+ min_symbols=config.min_symbols,
57
+ max_symbols=config.max_symbols,
58
+ )
59
+
60
+ def calculate_limit(self, file_lines: int, total_symbols: int) -> int:
61
+ """Calculate appropriate symbol limit for a file.
62
+
63
+ This is the main entry point for the adaptive selection algorithm.
64
+
65
+ Args:
66
+ file_lines: Number of lines in the file
67
+ total_symbols: Total number of symbols available in the file
68
+
69
+ Returns:
70
+ int: Number of symbols to display (between min_symbols and max_symbols)
71
+
72
+ Example:
73
+ >>> selector = AdaptiveSymbolSelector()
74
+ >>> selector.calculate_limit(100, 20) # small file, 20 symbols
75
+ 15
76
+ >>> selector.calculate_limit(10000, 200) # huge file, 200 symbols
77
+ 150
78
+ """
79
+ # Step 1: Determine file size category
80
+ category = self._determine_size_category(file_lines)
81
+
82
+ # Step 2: Get configured limit for this category
83
+ limit = self.config.limits[category]
84
+
85
+ # Step 3: Apply constraints
86
+ limit = self._apply_constraints(limit, total_symbols)
87
+
88
+ return limit
89
+
90
+ def _determine_size_category(self, lines: int) -> str:
91
+ """Determine file size category based on line count.
92
+
93
+ Categories are determined by comparing against configured thresholds:
94
+ - tiny: < thresholds["tiny"] (default: <100)
95
+ - small: < thresholds["small"] (default: 100-199)
96
+ - medium: < thresholds["medium"] (default: 200-499)
97
+ - large: < thresholds["large"] (default: 500-999)
98
+ - xlarge: < thresholds["xlarge"] (default: 1000-1999)
99
+ - huge: < thresholds["huge"] (default: 2000-4999)
100
+ - mega: >= thresholds["huge"] (default: >=5000)
101
+
102
+ Args:
103
+ lines: Number of lines in the file
104
+
105
+ Returns:
106
+ str: Size category name
107
+
108
+ Example:
109
+ >>> selector = AdaptiveSymbolSelector()
110
+ >>> selector._determine_size_category(50)
111
+ 'tiny'
112
+ >>> selector._determine_size_category(150)
113
+ 'small'
114
+ >>> selector._determine_size_category(8891)
115
+ 'mega'
116
+ """
117
+ thresholds = self.config.thresholds
118
+
119
+ if lines < thresholds["tiny"]:
120
+ return "tiny"
121
+ elif lines < thresholds["small"]:
122
+ return "small"
123
+ elif lines < thresholds["medium"]:
124
+ return "medium"
125
+ elif lines < thresholds["large"]:
126
+ return "large"
127
+ elif lines < thresholds["xlarge"]:
128
+ return "xlarge"
129
+ elif lines < thresholds["huge"]:
130
+ return "huge"
131
+ else:
132
+ return "mega"
133
+
134
+ def _apply_constraints(self, limit: int, total_symbols: int) -> int:
135
+ """Apply constraints to ensure limit is valid.
136
+
137
+ Constraints applied:
138
+ 1. Not exceed total_symbols (can't display more symbols than available)
139
+ 2. Not less than min_symbols (only if total_symbols >= min_symbols)
140
+ 3. Not exceed max_symbols (prevent overly long README files)
141
+
142
+ Args:
143
+ limit: Calculated limit from category
144
+ total_symbols: Total symbols available in the file
145
+
146
+ Returns:
147
+ int: Constrained limit
148
+
149
+ Example:
150
+ >>> selector = AdaptiveSymbolSelector()
151
+ >>> selector._apply_constraints(50, 30) # Want 50, but only 30 available
152
+ 30
153
+ >>> selector._apply_constraints(250, 300) # Want 250, but max is 200
154
+ 200
155
+ >>> selector._apply_constraints(3, 100) # Want 3, but min is 5
156
+ 5
157
+ >>> selector._apply_constraints(10, 1) # Want 10, but only 1 available
158
+ 1
159
+ """
160
+ # Constraint 1: Don't exceed available symbols (hard constraint)
161
+ limit = min(limit, total_symbols)
162
+
163
+ # Constraint 2: Respect minimum (only if we have enough symbols)
164
+ # If total_symbols < min_symbols, we can't enforce the minimum
165
+ if total_symbols >= self.config.min_symbols:
166
+ limit = max(limit, self.config.min_symbols)
167
+
168
+ # Constraint 3: Respect maximum
169
+ limit = min(limit, self.config.max_symbols)
170
+
171
+ return limit
codeindex/ai_helper.py ADDED
@@ -0,0 +1,48 @@
1
+ """AI enhancement helper functions (Epic 4 Story 4.1).
2
+
3
+ This module provides reusable functions for AI enhancement operations,
4
+ eliminating code duplication in scan and scan-all commands.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ from codeindex.parser import ParseResult
10
+
11
+
12
+ def aggregate_parse_results(
13
+ parse_results: list[ParseResult],
14
+ path: Path,
15
+ ) -> ParseResult:
16
+ """Aggregate multiple parse results into one.
17
+
18
+ Combines symbols and line counts from multiple parse results into a single
19
+ ParseResult. This is useful for analyzing multi-file directories.
20
+
21
+ Args:
22
+ parse_results: List of parse results to aggregate
23
+ path: Path for the aggregated result
24
+
25
+ Returns:
26
+ ParseResult with combined symbols and total line count
27
+
28
+ Example:
29
+ >>> pr1 = ParseResult(Path("a.py"), file_lines=100, symbols=[...])
30
+ >>> pr2 = ParseResult(Path("b.py"), file_lines=200, symbols=[...])
31
+ >>> aggregated = aggregate_parse_results([pr1, pr2], Path("dir"))
32
+ >>> aggregated.file_lines
33
+ 300
34
+ """
35
+ all_symbols = []
36
+ total_lines = 0
37
+
38
+ for pr in parse_results:
39
+ all_symbols.extend(pr.symbols)
40
+ total_lines += pr.file_lines
41
+
42
+ return ParseResult(
43
+ path=path,
44
+ file_lines=total_lines,
45
+ symbols=all_symbols,
46
+ )
47
+
48
+
codeindex/cli.py ADDED
@@ -0,0 +1,40 @@
1
+ """CLI entry point for codeindex.
2
+
3
+ This module serves as the main entry point for the codeindex CLI tool.
4
+ It imports and registers commands from specialized modules to keep the
5
+ codebase organized and maintainable.
6
+ """
7
+
8
+ import click
9
+
10
+ from .cli_config import init, list_dirs, status
11
+ from .cli_docs import docs
12
+ from .cli_hooks import hooks
13
+ from .cli_scan import scan, scan_all
14
+ from .cli_symbols import affected, index, symbols
15
+ from .cli_tech_debt import tech_debt
16
+
17
+
18
+ @click.group()
19
+ @click.version_option()
20
+ def main():
21
+ """codeindex - AI-native code indexing tool for large codebases."""
22
+ pass
23
+
24
+
25
+ # Register commands from specialized modules
26
+ main.add_command(scan)
27
+ main.add_command(scan_all)
28
+ main.add_command(init)
29
+ main.add_command(status)
30
+ main.add_command(list_dirs)
31
+ main.add_command(index)
32
+ main.add_command(symbols)
33
+ main.add_command(affected)
34
+ main.add_command(tech_debt)
35
+ main.add_command(hooks)
36
+ main.add_command(docs)
37
+
38
+
39
+ if __name__ == "__main__":
40
+ main()
@@ -0,0 +1,10 @@
1
+ """Common utilities for CLI modules.
2
+
3
+ This module provides shared resources used across all CLI command modules,
4
+ such as the Rich console instance for formatted output.
5
+ """
6
+
7
+ from rich.console import Console
8
+
9
+ # Shared console instance for all CLI commands
10
+ console = Console()
@@ -0,0 +1,97 @@
1
+ """CLI commands for configuration and project status.
2
+
3
+ This module provides commands for initializing configuration files,
4
+ checking indexing status, and listing indexable directories.
5
+ """
6
+
7
+ from pathlib import Path
8
+
9
+ import click
10
+ from rich.table import Table
11
+
12
+ from .cli_common import console
13
+ from .config import DEFAULT_CONFIG_NAME, Config
14
+ from .scanner import find_all_directories
15
+
16
+
17
+ @click.command()
18
+ @click.option("--force", "-f", is_flag=True, help="Overwrite existing config")
19
+ def init(force: bool):
20
+ """Initialize .codeindex.yaml configuration file."""
21
+ config_path = Path.cwd() / DEFAULT_CONFIG_NAME
22
+
23
+ if config_path.exists() and not force:
24
+ console.print(f"[yellow]Config already exists:[/yellow] {config_path}")
25
+ console.print("Use --force to overwrite")
26
+ return
27
+
28
+ created_path = Config.create_default()
29
+ console.print(f"[green]Created:[/green] {created_path}")
30
+ console.print("\nEdit this file to configure:")
31
+ console.print(" - ai_command: Your AI CLI command")
32
+ console.print(" - include/exclude: Directories to scan")
33
+
34
+
35
+ @click.command()
36
+ @click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
37
+ def status(root: Path):
38
+ """Show indexing status for the project."""
39
+ root = root.resolve()
40
+ config = Config.load()
41
+
42
+ console.print(f"[bold]Project:[/bold] {root}")
43
+ console.print(f"[bold]Config:[/bold] {DEFAULT_CONFIG_NAME}")
44
+
45
+ # Find all directories that should be indexed
46
+ dirs = find_all_directories(root, config)
47
+
48
+ if not dirs:
49
+ console.print("[yellow]No indexable directories found[/yellow]")
50
+ return
51
+
52
+ # Check which have README_AI.md
53
+ indexed = []
54
+ not_indexed = []
55
+
56
+ for d in dirs:
57
+ readme_path = d / config.output_file
58
+ if readme_path.exists():
59
+ indexed.append(d)
60
+ else:
61
+ not_indexed.append(d)
62
+
63
+ # Display table
64
+ table = Table(title="Indexing Status")
65
+ table.add_column("Status", style="bold")
66
+ table.add_column("Count")
67
+ table.add_column("Percentage")
68
+
69
+ total = len(dirs)
70
+ indexed_count = len(indexed)
71
+ coverage = (indexed_count / total * 100) if total > 0 else 0
72
+
73
+ table.add_row("[green]Indexed[/green]", str(indexed_count), f"{coverage:.1f}%")
74
+ table.add_row("[yellow]Not indexed[/yellow]", str(len(not_indexed)), f"{100-coverage:.1f}%")
75
+ table.add_row("Total", str(total), "100%")
76
+
77
+ console.print(table)
78
+
79
+ if not_indexed and len(not_indexed) <= 10:
80
+ console.print("\n[dim]Not indexed:[/dim]")
81
+ for d in not_indexed[:10]:
82
+ rel = d.relative_to(root)
83
+ console.print(f" {rel}")
84
+
85
+
86
+ @click.command()
87
+ @click.option("--root", type=click.Path(exists=True, file_okay=False, path_type=Path), default=".")
88
+ def list_dirs(root: Path):
89
+ """List all directories that would be indexed."""
90
+ root = root.resolve()
91
+ config = Config.load()
92
+
93
+ dirs = find_all_directories(root, config)
94
+
95
+ for d in dirs:
96
+ rel = d.relative_to(root)
97
+ print(rel)
codeindex/cli_docs.py ADDED
@@ -0,0 +1,66 @@
1
+ """Documentation CLI commands for codeindex."""
2
+
3
+ from pathlib import Path
4
+
5
+ import click
6
+
7
+ from .cli_common import console
8
+
9
+
10
+ @click.group()
11
+ def docs():
12
+ """Show codeindex documentation."""
13
+ pass
14
+
15
+
16
+ @docs.command()
17
+ def show_ai_guide():
18
+ """
19
+ Show AI integration guide for Git Hooks.
20
+
21
+ This command outputs the complete guide that AI Code tools can read
22
+ to understand codeindex Git Hooks and update user project documentation.
23
+
24
+ Usage:
25
+ codeindex docs show-ai-guide
26
+ """
27
+ # Get the installed package location
28
+ package_dir = Path(__file__).parent.parent.parent
29
+ guide_path = package_dir / "examples" / "ai-integration-guide.md"
30
+
31
+ if not guide_path.exists():
32
+ console.print(
33
+ "[red]Error:[/red] AI integration guide not found.\n"
34
+ f"Expected at: {guide_path}\n\n"
35
+ "Please reinstall codeindex or check installation.",
36
+ style="red",
37
+ )
38
+ raise click.Abort()
39
+
40
+ # Read and output the guide
41
+ content = guide_path.read_text()
42
+
43
+ console.print(
44
+ "\n[bold cyan]═══════════════════════════════════════════════════[/bold cyan]"
45
+ )
46
+ console.print(
47
+ "[bold cyan] AI Integration Guide: codeindex Git Hooks[/bold cyan]"
48
+ )
49
+ console.print(
50
+ "[bold cyan]═══════════════════════════════════════════════════[/bold cyan]\n"
51
+ )
52
+
53
+ console.print(content)
54
+
55
+ console.print(
56
+ "\n[bold cyan]═══════════════════════════════════════════════════[/bold cyan]"
57
+ )
58
+ console.print(
59
+ "[dim]Tip: Your AI Code can read this output to understand Git Hooks[/dim]"
60
+ )
61
+ console.print(
62
+ "[dim]Run: codeindex docs show-ai-guide > guide.md (to save to file)[/dim]"
63
+ )
64
+ console.print(
65
+ "[bold cyan]═══════════════════════════════════════════════════[/bold cyan]\n"
66
+ )