ai-codeindex 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
codeindex/config.py ADDED
@@ -0,0 +1,479 @@
1
+ """Configuration management for codeindex."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Optional
6
+
7
+ import yaml
8
+
9
+ from codeindex.adaptive_config import DEFAULT_ADAPTIVE_CONFIG, AdaptiveSymbolsConfig
10
+
11
+ DEFAULT_CONFIG_NAME = ".codeindex.yaml"
12
+ DEFAULT_OUTPUT_FILE = "README_AI.md"
13
+ DEFAULT_AI_COMMAND = 'claude -p "{prompt}" --allowedTools "Read"'
14
+ DEFAULT_INCLUDE = ["src/", "lib/", "tests/", "examples/"]
15
+ DEFAULT_EXCLUDE = [
16
+ "**/__pycache__/**",
17
+ "**/node_modules/**",
18
+ "**/.git/**",
19
+ ]
20
+ DEFAULT_LANGUAGES = ["python"]
21
+ DEFAULT_PARALLEL_WORKERS = 4
22
+ DEFAULT_BATCH_SIZE = 50
23
+
24
+ # Incremental update defaults
25
+ DEFAULT_INCREMENTAL = {
26
+ "enabled": True,
27
+ "thresholds": {
28
+ "skip_lines": 5, # Changes < this: skip update
29
+ "current_only": 50, # Changes < this: update current dir only
30
+ "suggest_full": 200, # Changes > this: suggest full update
31
+ },
32
+ "auto_update": {
33
+ "on_commit": True, # Auto-update on git commit
34
+ "project_index": False, # Auto-update PROJECT_INDEX.md
35
+ },
36
+ }
37
+
38
+ # Indexing strategy defaults
39
+ DEFAULT_INDEXING = {
40
+ "max_readme_size": 50 * 1024, # 50KB
41
+ "symbols": {
42
+ "max_per_file": 15,
43
+ "include_visibility": ["public", "protected"],
44
+ "exclude_patterns": ["get*", "set*", "__*"],
45
+ },
46
+ "grouping": {
47
+ "enabled": True,
48
+ "by": "suffix", # suffix | function | none
49
+ "patterns": {
50
+ "Controller": "HTTP 请求处理",
51
+ "Service": "业务逻辑",
52
+ "Model": "数据模型",
53
+ "Repository": "数据访问",
54
+ "Command": "命令行",
55
+ "Event": "事件处理",
56
+ "Job": "后台任务",
57
+ "Middleware": "中间件",
58
+ "Exception": "异常处理",
59
+ "Helper": "工具函数",
60
+ },
61
+ },
62
+ "levels": {
63
+ "root": "overview", # 只有概述和模块列表
64
+ "module": "navigation", # 模块导航 + 关键类
65
+ "leaf": "detailed", # 完整符号信息
66
+ },
67
+ }
68
+
69
+ DEFAULT_CONFIG_TEMPLATE = """\
70
+ # codeindex configuration
71
+ version: 1
72
+
73
+ # AI CLI command template
74
+ # {prompt} will be replaced with the actual prompt
75
+ # Examples:
76
+ # claude -p "{prompt}" --allowedTools "Read"
77
+ # opencode run "{prompt}"
78
+ # gemini "{prompt}"
79
+ ai_command: 'claude -p "{prompt}" --allowedTools "Read"'
80
+
81
+ # Directories to scan (tests included for better AI understanding)
82
+ include:
83
+ - src/
84
+ - lib/
85
+ - tests/
86
+ - examples/
87
+
88
+ # Patterns to exclude
89
+ exclude:
90
+ - "**/__pycache__/**"
91
+ - "**/node_modules/**"
92
+ - "**/.git/**"
93
+ - "**/venv/**"
94
+ - "**/.venv/**"
95
+
96
+ # Supported languages (currently PHP support added)
97
+ languages:
98
+ - php
99
+
100
+ # Output file name
101
+ output_file: README_AI.md
102
+
103
+ # Parallel processing settings
104
+ parallel_workers: 8 # Number of parallel workers for parsing files
105
+ batch_size: 50 # Files per batch for AI processing
106
+
107
+ # Incremental update settings
108
+ incremental:
109
+ enabled: true
110
+ thresholds:
111
+ skip_lines: 5 # Changes < this: skip update (trivial)
112
+ current_only: 50 # Changes < this: update current dir only
113
+ suggest_full: 200 # Changes > this: suggest full update
114
+ auto_update:
115
+ on_commit: true # Auto-update on git commit
116
+ project_index: false # Auto-update PROJECT_INDEX.md
117
+
118
+ # Smart indexing settings (控制 README 生成策略)
119
+ indexing:
120
+ max_readme_size: 51200 # 50KB, 超过则拆分
121
+ symbols:
122
+ max_per_file: 15 # 每文件最多列出的符号数
123
+ include_visibility: # 只包含这些可见性的符号
124
+ - public
125
+ - protected
126
+ exclude_patterns: # 排除匹配这些模式的符号
127
+ - "get*"
128
+ - "set*"
129
+ - "__*"
130
+ grouping:
131
+ enabled: true
132
+ by: suffix # suffix | function | none
133
+ patterns:
134
+ Controller: "HTTP 请求处理"
135
+ Service: "业务逻辑"
136
+ Model: "数据模型"
137
+ levels:
138
+ root: overview # 根目录:只有概述
139
+ module: navigation # 模块目录:导航 + 关键类
140
+ leaf: detailed # 叶子目录:完整信息
141
+
142
+ # Git Hooks configuration (Story 6)
143
+ hooks:
144
+ post_commit:
145
+ mode: auto # auto | disabled | async | sync | prompt
146
+ max_dirs_sync: 2 # Auto mode: ≤2 dirs = sync, >2 = async
147
+ enabled: true # Master switch
148
+ log_file: ~/.codeindex/hooks/post-commit.log
149
+ """
150
+
151
+
152
+ @dataclass
153
+ class SymbolsConfig:
154
+ """Configuration for symbol extraction."""
155
+ max_per_file: int = 15
156
+ include_visibility: list[str] = field(default_factory=lambda: ["public", "protected"])
157
+ exclude_patterns: list[str] = field(default_factory=lambda: ["get*", "set*", "__*"])
158
+ adaptive_symbols: AdaptiveSymbolsConfig = field(default_factory=lambda: AdaptiveSymbolsConfig(
159
+ enabled=DEFAULT_ADAPTIVE_CONFIG.enabled,
160
+ thresholds=DEFAULT_ADAPTIVE_CONFIG.thresholds.copy(),
161
+ limits=DEFAULT_ADAPTIVE_CONFIG.limits.copy(),
162
+ min_symbols=DEFAULT_ADAPTIVE_CONFIG.min_symbols,
163
+ max_symbols=DEFAULT_ADAPTIVE_CONFIG.max_symbols,
164
+ ))
165
+
166
+
167
+ @dataclass
168
+ class GroupingConfig:
169
+ """Configuration for file grouping."""
170
+ enabled: bool = True
171
+ by: str = "suffix" # suffix | prefix | pattern
172
+ patterns: dict[str, list[str]] = field(default_factory=dict)
173
+
174
+
175
+ @dataclass
176
+ class SemanticConfig:
177
+ """Configuration for semantic extraction."""
178
+ enabled: bool = True # Enable semantic extraction
179
+ use_ai: bool = False # Use AI mode (requires ai_command in Config)
180
+ fallback_to_heuristic: bool = True # Fallback to heuristic if AI fails
181
+
182
+ @classmethod
183
+ def from_dict(cls, data: dict) -> "SemanticConfig":
184
+ """Create from config dict."""
185
+ if not data:
186
+ return cls()
187
+
188
+ return cls(
189
+ enabled=data.get("enabled", True),
190
+ use_ai=data.get("use_ai", False),
191
+ fallback_to_heuristic=data.get("fallback_to_heuristic", True),
192
+ )
193
+
194
+
195
+ @dataclass
196
+ class IndexingConfig:
197
+ """Configuration for smart indexing."""
198
+ max_readme_size: int = 50 * 1024 # 50KB
199
+ symbols: SymbolsConfig = field(default_factory=SymbolsConfig)
200
+ grouping: GroupingConfig = field(default_factory=GroupingConfig)
201
+ semantic: SemanticConfig = field(default_factory=SemanticConfig)
202
+ root_level: str = "overview" # overview | navigation | detailed
203
+ module_level: str = "navigation"
204
+ leaf_level: str = "detailed"
205
+
206
+ @classmethod
207
+ def from_dict(cls, data: dict) -> "IndexingConfig":
208
+ """Create from config dict."""
209
+ if not data:
210
+ return cls()
211
+
212
+ symbols_data = data.get("symbols", {})
213
+
214
+ # Load adaptive_symbols configuration
215
+ adaptive_data = symbols_data.get("adaptive_symbols", {})
216
+ if adaptive_data:
217
+ # Merge user config with defaults
218
+ adaptive_config = AdaptiveSymbolsConfig(
219
+ enabled=adaptive_data.get("enabled", DEFAULT_ADAPTIVE_CONFIG.enabled),
220
+ thresholds={
221
+ **DEFAULT_ADAPTIVE_CONFIG.thresholds,
222
+ **adaptive_data.get("thresholds", {})
223
+ },
224
+ limits={
225
+ **DEFAULT_ADAPTIVE_CONFIG.limits,
226
+ **adaptive_data.get("limits", {})
227
+ },
228
+ min_symbols=adaptive_data.get("min_symbols", DEFAULT_ADAPTIVE_CONFIG.min_symbols),
229
+ max_symbols=adaptive_data.get("max_symbols", DEFAULT_ADAPTIVE_CONFIG.max_symbols),
230
+ )
231
+ else:
232
+ # Use default adaptive config
233
+ adaptive_config = AdaptiveSymbolsConfig(
234
+ enabled=DEFAULT_ADAPTIVE_CONFIG.enabled,
235
+ thresholds=DEFAULT_ADAPTIVE_CONFIG.thresholds.copy(),
236
+ limits=DEFAULT_ADAPTIVE_CONFIG.limits.copy(),
237
+ min_symbols=DEFAULT_ADAPTIVE_CONFIG.min_symbols,
238
+ max_symbols=DEFAULT_ADAPTIVE_CONFIG.max_symbols,
239
+ )
240
+
241
+ symbols = SymbolsConfig(
242
+ max_per_file=symbols_data.get("max_per_file", 15),
243
+ include_visibility=symbols_data.get("include_visibility", ["public", "protected"]),
244
+ exclude_patterns=symbols_data.get("exclude_patterns", ["get*", "set*", "__*"]),
245
+ adaptive_symbols=adaptive_config,
246
+ )
247
+
248
+ grouping_data = data.get("grouping", {})
249
+ grouping = GroupingConfig(
250
+ enabled=grouping_data.get("enabled", True),
251
+ by=grouping_data.get("by", "suffix"),
252
+ patterns=grouping_data.get("patterns", DEFAULT_INDEXING["grouping"]["patterns"].copy()),
253
+ )
254
+
255
+ # Load semantic configuration
256
+ semantic_data = data.get("semantic", {})
257
+ semantic = SemanticConfig.from_dict(semantic_data)
258
+
259
+ levels = data.get("levels", {})
260
+ return cls(
261
+ max_readme_size=data.get("max_readme_size", 50 * 1024),
262
+ symbols=symbols,
263
+ grouping=grouping,
264
+ semantic=semantic,
265
+ root_level=levels.get("root", "overview"),
266
+ module_level=levels.get("module", "navigation"),
267
+ leaf_level=levels.get("leaf", "detailed"),
268
+ )
269
+
270
+
271
+ @dataclass
272
+ class IncrementalConfig:
273
+ """Configuration for incremental updates."""
274
+
275
+ enabled: bool = True
276
+ skip_lines: int = 5
277
+ current_only: int = 50
278
+ suggest_full: int = 200
279
+ auto_on_commit: bool = True
280
+ auto_project_index: bool = False
281
+
282
+ @classmethod
283
+ def from_dict(cls, data: dict) -> "IncrementalConfig":
284
+ """Create from config dict."""
285
+ if not data:
286
+ return cls()
287
+ thresholds = data.get("thresholds", {})
288
+ auto_update = data.get("auto_update", {})
289
+ return cls(
290
+ enabled=data.get("enabled", True),
291
+ skip_lines=thresholds.get("skip_lines", 5),
292
+ current_only=thresholds.get("current_only", 50),
293
+ suggest_full=thresholds.get("suggest_full", 200),
294
+ auto_on_commit=auto_update.get("on_commit", True),
295
+ auto_project_index=auto_update.get("project_index", False),
296
+ )
297
+
298
+
299
+ @dataclass
300
+ class DocstringConfig:
301
+ """Configuration for docstring extraction (Epic 9).
302
+
303
+ Supports AI-powered docstring extraction and normalization.
304
+
305
+ Modes:
306
+ - off: No docstring processing (default, backward compatible)
307
+ - hybrid: Simple extraction + selective AI (cost-effective, <$1 per 250 dirs)
308
+ - all-ai: AI processes everything (highest quality, higher cost)
309
+ """
310
+
311
+ mode: str = "off" # off | hybrid | all-ai
312
+ ai_command: str = "" # AI CLI command (defaults to global ai_command)
313
+ cost_limit: float = 1.0 # Maximum cost in USD
314
+
315
+ @classmethod
316
+ def from_dict(cls, data: dict, global_ai_command: str = "") -> "DocstringConfig":
317
+ """Create from config dict.
318
+
319
+ Args:
320
+ data: Docstrings config dict
321
+ global_ai_command: Global AI command to inherit if not specified
322
+
323
+ Returns:
324
+ DocstringConfig instance
325
+ """
326
+ if not data:
327
+ return cls(ai_command=global_ai_command)
328
+
329
+ mode = data.get("mode", "off")
330
+
331
+ # Handle YAML parsing quirk: "off" is parsed as False
332
+ if mode is False:
333
+ mode = "off"
334
+
335
+ # Validate mode
336
+ valid_modes = ("off", "hybrid", "all-ai")
337
+ if mode not in valid_modes:
338
+ raise ValueError(
339
+ f"Invalid docstring mode: {mode}. Must be one of {valid_modes}"
340
+ )
341
+
342
+ # Inherit global ai_command if not specified
343
+ ai_command = data.get("ai_command", "") or global_ai_command
344
+
345
+ return cls(
346
+ mode=mode,
347
+ ai_command=ai_command,
348
+ cost_limit=data.get("cost_limit", 1.0),
349
+ )
350
+
351
+
352
+ @dataclass
353
+ class PostCommitConfig:
354
+ """Configuration for post-commit Git hook.
355
+
356
+ Modes:
357
+ - auto: Smart detection (≤2 dirs = sync, >2 = async) [default]
358
+ - disabled: Completely disabled
359
+ - async: Always run in background (non-blocking)
360
+ - sync: Always run synchronously (blocking)
361
+ - prompt: Only show reminder, don't auto-execute
362
+ """
363
+
364
+ mode: str = "auto" # auto | disabled | async | sync | prompt
365
+ enabled: bool = True # Master switch
366
+ max_dirs_sync: int = 2 # Threshold for auto mode
367
+ log_file: str = "~/.codeindex/hooks/post-commit.log"
368
+
369
+ @classmethod
370
+ def from_dict(cls, data: dict) -> "PostCommitConfig":
371
+ """Create from config dict."""
372
+ if not data:
373
+ return cls()
374
+
375
+ mode = data.get("mode", "auto")
376
+ valid_modes = ("auto", "disabled", "async", "sync", "prompt")
377
+ if mode not in valid_modes:
378
+ raise ValueError(
379
+ f"Invalid post_commit mode: {mode}. Must be one of {valid_modes}"
380
+ )
381
+
382
+ return cls(
383
+ mode=mode,
384
+ enabled=data.get("enabled", True),
385
+ max_dirs_sync=data.get("max_dirs_sync", 2),
386
+ log_file=data.get("log_file", "~/.codeindex/hooks/post-commit.log"),
387
+ )
388
+
389
+
390
+ @dataclass
391
+ class HooksConfig:
392
+ """Configuration for Git hooks (Story 6)."""
393
+
394
+ post_commit: PostCommitConfig = field(default_factory=PostCommitConfig)
395
+
396
+ @classmethod
397
+ def from_dict(cls, data: dict) -> "HooksConfig":
398
+ """Create from config dict."""
399
+ if not data:
400
+ return cls()
401
+
402
+ return cls(
403
+ post_commit=PostCommitConfig.from_dict(data.get("post_commit", {}))
404
+ )
405
+
406
+
407
+ @dataclass
408
+ class Config:
409
+ """Configuration for codeindex."""
410
+
411
+ version: int = 1
412
+ ai_command: str = DEFAULT_AI_COMMAND
413
+ include: list[str] = field(default_factory=lambda: DEFAULT_INCLUDE.copy())
414
+ exclude: list[str] = field(default_factory=lambda: DEFAULT_EXCLUDE.copy())
415
+ languages: list[str] = field(default_factory=lambda: DEFAULT_LANGUAGES.copy())
416
+ output_file: str = DEFAULT_OUTPUT_FILE
417
+ incremental: IncrementalConfig = field(default_factory=IncrementalConfig)
418
+ indexing: IndexingConfig = field(default_factory=IndexingConfig)
419
+ docstrings: DocstringConfig = field(default_factory=DocstringConfig) # Epic 9
420
+ hooks: HooksConfig = field(default_factory=HooksConfig) # Story 6
421
+ parallel_workers: int = DEFAULT_PARALLEL_WORKERS
422
+ batch_size: int = DEFAULT_BATCH_SIZE
423
+
424
+ @classmethod
425
+ def load(cls, path: Optional[Path | str] = None) -> "Config":
426
+ """Load config from yaml file."""
427
+ if path is None:
428
+ path = Path.cwd() / DEFAULT_CONFIG_NAME
429
+ else:
430
+ path = Path(path)
431
+
432
+ if not path.exists():
433
+ return cls()
434
+
435
+ with open(path) as f:
436
+ data = yaml.safe_load(f) or {}
437
+
438
+ # Parse global ai_command first (needed for docstrings inheritance)
439
+ ai_command = data.get("ai_command", DEFAULT_AI_COMMAND)
440
+
441
+ return cls(
442
+ version=data.get("version", 1),
443
+ ai_command=ai_command,
444
+ include=data.get("include", DEFAULT_INCLUDE.copy()),
445
+ exclude=data.get("exclude", DEFAULT_EXCLUDE.copy()),
446
+ languages=data.get("languages", DEFAULT_LANGUAGES.copy()),
447
+ output_file=data.get("output_file", DEFAULT_OUTPUT_FILE),
448
+ incremental=IncrementalConfig.from_dict(data.get("incremental", {})),
449
+ indexing=IndexingConfig.from_dict(data.get("indexing", {})),
450
+ docstrings=DocstringConfig.from_dict(
451
+ data.get("docstrings", {}), global_ai_command=ai_command
452
+ ),
453
+ hooks=HooksConfig.from_dict(data.get("hooks", {})),
454
+ parallel_workers=data.get("parallel_workers", DEFAULT_PARALLEL_WORKERS),
455
+ batch_size=data.get("batch_size", DEFAULT_BATCH_SIZE),
456
+ )
457
+
458
+ @classmethod
459
+ def from_yaml(cls, path: Path) -> "Config":
460
+ """Load config from YAML file (alias for load()).
461
+
462
+ Args:
463
+ path: Path to YAML config file
464
+
465
+ Returns:
466
+ Config instance
467
+ """
468
+ return cls.load(path)
469
+
470
+ @staticmethod
471
+ def create_default(path: Optional[Path] = None) -> Path:
472
+ """Create default config file."""
473
+ if path is None:
474
+ path = Path.cwd() / DEFAULT_CONFIG_NAME
475
+
476
+ with open(path, "w") as f:
477
+ f.write(DEFAULT_CONFIG_TEMPLATE)
478
+
479
+ return path
@@ -0,0 +1,229 @@
1
+ """Directory tree structure for hierarchical indexing."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import Literal
6
+
7
+ from .config import Config
8
+
9
+ LevelType = Literal["overview", "navigation", "detailed"]
10
+
11
+
12
+ @dataclass
13
+ class DirectoryNode:
14
+ """A node in the directory tree."""
15
+ path: Path
16
+ has_files: bool = False
17
+ children: set[Path] = field(default_factory=set)
18
+ parent: Path | None = None
19
+ depth: int = 0 # Depth from root (root = 0)
20
+
21
+ @property
22
+ def has_children(self) -> bool:
23
+ """Whether this directory has indexed child directories."""
24
+ return bool(self.children)
25
+
26
+ @property
27
+ def is_leaf(self) -> bool:
28
+ """Whether this is a leaf directory (no indexed children)."""
29
+ return not self.children
30
+
31
+
32
+ class DirectoryTree:
33
+ """
34
+ Pre-scanned directory tree for determining index levels.
35
+
36
+ This enables two-pass indexing:
37
+ 1. First pass: Build tree structure
38
+ 2. Second pass: Generate READMEs with correct levels
39
+ """
40
+
41
+ def __init__(self, root: Path, config: Config):
42
+ self.root = root.resolve()
43
+ self.config = config
44
+ self.nodes: dict[Path, DirectoryNode] = {}
45
+ self._build_tree()
46
+
47
+ def _build_tree(self):
48
+ """Build the directory tree from root."""
49
+ from .scanner import get_language_extensions, should_exclude
50
+
51
+ # Get valid extensions for this config
52
+ valid_extensions = get_language_extensions(self.config.languages)
53
+
54
+ def has_indexable_files(dir_path: Path) -> bool:
55
+ """Check if directory has any indexable files."""
56
+ try:
57
+ for item in dir_path.iterdir():
58
+ if item.is_file() and item.suffix.lower() in valid_extensions:
59
+ return True
60
+ except PermissionError:
61
+ pass
62
+ return False
63
+
64
+ def walk_directory(current: Path, depth: int = 0):
65
+ """Recursively walk directory tree."""
66
+ # Check exclusions
67
+ if should_exclude(current, self.config.exclude, self.root):
68
+ return
69
+
70
+ # For include paths, check if we're within them
71
+ if self.config.include:
72
+ in_include = False
73
+ for include_path in self.config.include:
74
+ include_full = (self.root / include_path).resolve()
75
+ try:
76
+ current.relative_to(include_full)
77
+ in_include = True
78
+ break
79
+ except ValueError:
80
+ # Check if current is parent of include
81
+ try:
82
+ include_full.relative_to(current)
83
+ in_include = True
84
+ break
85
+ except ValueError:
86
+ pass
87
+ if not in_include and current != self.root:
88
+ return
89
+
90
+ # Create node for this directory
91
+ current = current.resolve()
92
+ has_files = has_indexable_files(current)
93
+
94
+ # Only add if has files or is root
95
+ if has_files or current == self.root or depth == 0:
96
+ self.nodes[current] = DirectoryNode(
97
+ path=current,
98
+ has_files=has_files,
99
+ depth=depth,
100
+ )
101
+
102
+ # Recurse into subdirectories
103
+ try:
104
+ for item in sorted(current.iterdir()):
105
+ if item.is_dir() and not item.name.startswith('.'):
106
+ walk_directory(item, depth + 1)
107
+ except PermissionError:
108
+ pass
109
+
110
+ # Walk from root
111
+ walk_directory(self.root)
112
+
113
+ # Build parent-child relationships
114
+ for dir_path, node in list(self.nodes.items()):
115
+ parent_path = dir_path.parent.resolve()
116
+ if parent_path in self.nodes and parent_path != dir_path:
117
+ node.parent = parent_path
118
+ self.nodes[parent_path].children.add(dir_path)
119
+
120
+ # Add intermediate directories that have children but weren't added
121
+ # (directories without files but with children need to be in tree)
122
+ dirs_to_add = {}
123
+ for dir_path, node in list(self.nodes.items()):
124
+ current = dir_path.parent.resolve()
125
+ while current != self.root.parent and current not in self.nodes:
126
+ try:
127
+ depth = len(current.relative_to(self.root).parts)
128
+ except ValueError:
129
+ break
130
+ dirs_to_add[current] = DirectoryNode(
131
+ path=current,
132
+ has_files=False,
133
+ depth=depth,
134
+ )
135
+ current = current.parent.resolve()
136
+
137
+ # Add intermediate directories and rebuild relationships
138
+ self.nodes.update(dirs_to_add)
139
+
140
+ # Rebuild all parent-child relationships
141
+ for dir_path, node in self.nodes.items():
142
+ node.children.clear()
143
+
144
+ for dir_path, node in self.nodes.items():
145
+ parent_path = dir_path.parent.resolve()
146
+ if parent_path in self.nodes and parent_path != dir_path:
147
+ node.parent = parent_path
148
+ self.nodes[parent_path].children.add(dir_path)
149
+
150
+ def get_level(self, dir_path: Path) -> LevelType:
151
+ """
152
+ Determine the appropriate index level for a directory.
153
+
154
+ Rules:
155
+ - Root directory (depth=0) -> overview
156
+ - Has indexed children -> navigation
157
+ - Leaf directory (no children) -> detailed
158
+ """
159
+ dir_path = dir_path.resolve()
160
+ node = self.nodes.get(dir_path)
161
+
162
+ if node is None:
163
+ # Not in tree, default to detailed
164
+ return self.config.indexing.leaf_level
165
+
166
+ # Root directory
167
+ if node.depth == 0 or dir_path == self.root:
168
+ return self.config.indexing.root_level
169
+
170
+ # Has children -> navigation
171
+ if node.has_children:
172
+ return self.config.indexing.module_level
173
+
174
+ # Leaf directory
175
+ return self.config.indexing.leaf_level
176
+
177
+ def get_children(self, dir_path: Path) -> list[Path]:
178
+ """Get indexed child directories for a path."""
179
+ dir_path = dir_path.resolve()
180
+ node = self.nodes.get(dir_path)
181
+ if node is None:
182
+ return []
183
+ return sorted(node.children)
184
+
185
+ def get_processing_order(self) -> list[Path]:
186
+ """
187
+ Get directories in bottom-up processing order.
188
+
189
+ Returns directories sorted by depth (deepest first),
190
+ so children are processed before parents.
191
+ """
192
+ return sorted(
193
+ self.nodes.keys(),
194
+ key=lambda p: (self.nodes[p].depth, str(p)),
195
+ reverse=True
196
+ )
197
+
198
+ def get_stats(self) -> dict:
199
+ """Get tree statistics."""
200
+ total = len(self.nodes)
201
+ with_files = sum(1 for n in self.nodes.values() if n.has_files)
202
+ with_children = sum(1 for n in self.nodes.values() if n.has_children)
203
+ max_depth = max((n.depth for n in self.nodes.values()), default=0)
204
+
205
+ return {
206
+ "total_directories": total,
207
+ "with_files": with_files,
208
+ "with_children": with_children,
209
+ "leaf_directories": total - with_children,
210
+ "max_depth": max_depth,
211
+ }
212
+
213
+ def print_tree(self, max_depth: int = 3):
214
+ """Print tree structure for debugging."""
215
+ def _print_node(path: Path, indent: int = 0):
216
+ node = self.nodes.get(path)
217
+ if node is None or node.depth > max_depth:
218
+ return
219
+
220
+ level = self.get_level(path)
221
+ prefix = " " * indent
222
+ marker = "📁" if node.has_children else "📄"
223
+ files_marker = f" ({node.has_files})" if node.has_files else ""
224
+ print(f"{prefix}{marker} {path.name} [{level}]{files_marker}")
225
+
226
+ for child in sorted(node.children):
227
+ _print_node(child, indent + 1)
228
+
229
+ _print_node(self.root)