mcp-vector-search 0.9.3__py3-none-any.whl → 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
1
  """Database abstraction and ChromaDB implementation for MCP Vector Search."""
2
2
 
3
3
  import asyncio
4
+ import json
4
5
  import shutil
5
6
  from abc import ABC, abstractmethod
6
7
  from pathlib import Path
@@ -273,16 +274,16 @@ class ChromaVectorDatabase(VectorDatabase):
273
274
  "class_name": chunk.class_name or "",
274
275
  "docstring": chunk.docstring or "",
275
276
  "complexity_score": chunk.complexity_score,
276
- # Hierarchy fields
277
+ # Hierarchy fields (convert lists to JSON strings for ChromaDB)
277
278
  "chunk_id": chunk.chunk_id or "",
278
279
  "parent_chunk_id": chunk.parent_chunk_id or "",
279
- "child_chunk_ids": chunk.child_chunk_ids or [],
280
+ "child_chunk_ids": json.dumps(chunk.child_chunk_ids or []),
280
281
  "chunk_depth": chunk.chunk_depth,
281
- # Additional metadata
282
- "decorators": chunk.decorators or [],
283
- "parameters": chunk.parameters or [],
282
+ # Additional metadata (convert lists/dicts to JSON strings)
283
+ "decorators": json.dumps(chunk.decorators or []),
284
+ "parameters": json.dumps(chunk.parameters or []),
284
285
  "return_type": chunk.return_type or "",
285
- "type_annotations": chunk.type_annotations or {},
286
+ "type_annotations": json.dumps(chunk.type_annotations or {}),
286
287
  # Monorepo support
287
288
  "subproject_name": chunk.subproject_name or "",
288
289
  "subproject_path": chunk.subproject_path or "",
@@ -510,6 +511,23 @@ class ChromaVectorDatabase(VectorDatabase):
510
511
  metadata = results["metadatas"][i]
511
512
  content = results["documents"][i]
512
513
 
514
+ # Parse JSON strings back to lists/dicts
515
+ child_chunk_ids = metadata.get("child_chunk_ids", "[]")
516
+ if isinstance(child_chunk_ids, str):
517
+ child_chunk_ids = json.loads(child_chunk_ids)
518
+
519
+ decorators = metadata.get("decorators", "[]")
520
+ if isinstance(decorators, str):
521
+ decorators = json.loads(decorators)
522
+
523
+ parameters = metadata.get("parameters", "[]")
524
+ if isinstance(parameters, str):
525
+ parameters = json.loads(parameters)
526
+
527
+ type_annotations = metadata.get("type_annotations", "{}")
528
+ if isinstance(type_annotations, str):
529
+ type_annotations = json.loads(type_annotations)
530
+
513
531
  chunk = CodeChunk(
514
532
  content=content,
515
533
  file_path=Path(metadata["file_path"]),
@@ -524,12 +542,12 @@ class ChromaVectorDatabase(VectorDatabase):
524
542
  complexity_score=metadata.get("complexity_score", 0.0),
525
543
  chunk_id=metadata.get("chunk_id"),
526
544
  parent_chunk_id=metadata.get("parent_chunk_id"),
527
- child_chunk_ids=metadata.get("child_chunk_ids", []),
545
+ child_chunk_ids=child_chunk_ids,
528
546
  chunk_depth=metadata.get("chunk_depth", 0),
529
- decorators=metadata.get("decorators", []),
530
- parameters=metadata.get("parameters", []),
547
+ decorators=decorators,
548
+ parameters=parameters,
531
549
  return_type=metadata.get("return_type"),
532
- type_annotations=metadata.get("type_annotations", {}),
550
+ type_annotations=type_annotations,
533
551
  subproject_name=metadata.get("subproject_name"),
534
552
  subproject_path=metadata.get("subproject_path"),
535
553
  )
@@ -775,16 +793,16 @@ class PooledChromaVectorDatabase(VectorDatabase):
775
793
  "class_name": chunk.class_name or "",
776
794
  "docstring": chunk.docstring or "",
777
795
  "complexity_score": chunk.complexity_score,
778
- # Hierarchy fields
796
+ # Hierarchy fields (convert lists to JSON strings for ChromaDB)
779
797
  "chunk_id": chunk.chunk_id or "",
780
798
  "parent_chunk_id": chunk.parent_chunk_id or "",
781
- "child_chunk_ids": chunk.child_chunk_ids or [],
799
+ "child_chunk_ids": json.dumps(chunk.child_chunk_ids or []),
782
800
  "chunk_depth": chunk.chunk_depth,
783
- # Additional metadata
784
- "decorators": chunk.decorators or [],
785
- "parameters": chunk.parameters or [],
801
+ # Additional metadata (convert lists/dicts to JSON strings)
802
+ "decorators": json.dumps(chunk.decorators or []),
803
+ "parameters": json.dumps(chunk.parameters or []),
786
804
  "return_type": chunk.return_type or "",
787
- "type_annotations": chunk.type_annotations or {},
805
+ "type_annotations": json.dumps(chunk.type_annotations or {}),
788
806
  # Monorepo support
789
807
  "subproject_name": chunk.subproject_name or "",
790
808
  "subproject_path": chunk.subproject_path or "",
@@ -1023,6 +1041,23 @@ class PooledChromaVectorDatabase(VectorDatabase):
1023
1041
  metadata = results["metadatas"][i]
1024
1042
  content = results["documents"][i]
1025
1043
 
1044
+ # Parse JSON strings back to lists/dicts
1045
+ child_chunk_ids = metadata.get("child_chunk_ids", "[]")
1046
+ if isinstance(child_chunk_ids, str):
1047
+ child_chunk_ids = json.loads(child_chunk_ids)
1048
+
1049
+ decorators = metadata.get("decorators", "[]")
1050
+ if isinstance(decorators, str):
1051
+ decorators = json.loads(decorators)
1052
+
1053
+ parameters = metadata.get("parameters", "[]")
1054
+ if isinstance(parameters, str):
1055
+ parameters = json.loads(parameters)
1056
+
1057
+ type_annotations = metadata.get("type_annotations", "{}")
1058
+ if isinstance(type_annotations, str):
1059
+ type_annotations = json.loads(type_annotations)
1060
+
1026
1061
  chunk = CodeChunk(
1027
1062
  content=content,
1028
1063
  file_path=Path(metadata["file_path"]),
@@ -1037,12 +1072,12 @@ class PooledChromaVectorDatabase(VectorDatabase):
1037
1072
  complexity_score=metadata.get("complexity_score", 0.0),
1038
1073
  chunk_id=metadata.get("chunk_id"),
1039
1074
  parent_chunk_id=metadata.get("parent_chunk_id"),
1040
- child_chunk_ids=metadata.get("child_chunk_ids", []),
1075
+ child_chunk_ids=child_chunk_ids,
1041
1076
  chunk_depth=metadata.get("chunk_depth", 0),
1042
- decorators=metadata.get("decorators", []),
1043
- parameters=metadata.get("parameters", []),
1077
+ decorators=decorators,
1078
+ parameters=parameters,
1044
1079
  return_type=metadata.get("return_type"),
1045
- type_annotations=metadata.get("type_annotations", {}),
1080
+ type_annotations=type_annotations,
1046
1081
  subproject_name=metadata.get("subproject_name"),
1047
1082
  subproject_path=metadata.get("subproject_path"),
1048
1083
  )
@@ -0,0 +1,303 @@
1
+ """Directory index for tracking project structure and file relationships."""
2
+
3
+ import json
4
+ from collections import defaultdict
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ from loguru import logger
9
+
10
+ from .models import Directory
11
+
12
+
13
+ class DirectoryIndex:
14
+ """Manages directory structure and file-directory relationships."""
15
+
16
+ def __init__(self, index_path: Path) -> None:
17
+ """Initialize directory index.
18
+
19
+ Args:
20
+ index_path: Path to directory index file (JSON)
21
+ """
22
+ self.index_path = index_path
23
+ self.directories: dict[str, Directory] = {} # path -> Directory
24
+ self.file_to_directory: dict[str, str] = {} # file_path -> directory_path
25
+ self.directory_files: dict[str, list[str]] = defaultdict(list) # dir_path -> [file_paths]
26
+
27
+ def load(self) -> None:
28
+ """Load directory index from disk."""
29
+ if not self.index_path.exists():
30
+ logger.debug("No directory index found, starting fresh")
31
+ return
32
+
33
+ try:
34
+ with open(self.index_path, "r") as f:
35
+ data = json.load(f)
36
+
37
+ # Load directories
38
+ for dir_data in data.get("directories", []):
39
+ directory = Directory.from_dict(dir_data)
40
+ self.directories[str(directory.path)] = directory
41
+
42
+ # Load file mappings
43
+ self.file_to_directory = data.get("file_to_directory", {})
44
+
45
+ # Rebuild directory_files from file_to_directory
46
+ self.directory_files = defaultdict(list)
47
+ for file_path, dir_path in self.file_to_directory.items():
48
+ self.directory_files[dir_path].append(file_path)
49
+
50
+ logger.info(f"Loaded {len(self.directories)} directories from index")
51
+
52
+ except Exception as e:
53
+ logger.error(f"Failed to load directory index: {e}")
54
+ self.directories = {}
55
+ self.file_to_directory = {}
56
+ self.directory_files = defaultdict(list)
57
+
58
+ def save(self) -> None:
59
+ """Save directory index to disk."""
60
+ try:
61
+ # Ensure parent directory exists
62
+ self.index_path.parent.mkdir(parents=True, exist_ok=True)
63
+
64
+ data = {
65
+ "directories": [d.to_dict() for d in self.directories.values()],
66
+ "file_to_directory": self.file_to_directory,
67
+ }
68
+
69
+ with open(self.index_path, "w") as f:
70
+ json.dump(data, f, indent=2)
71
+
72
+ logger.debug(f"Saved {len(self.directories)} directories to index")
73
+
74
+ except Exception as e:
75
+ logger.error(f"Failed to save directory index: {e}")
76
+ raise
77
+
78
+ def add_directory(self, directory: Directory) -> None:
79
+ """Add or update a directory in the index.
80
+
81
+ Args:
82
+ directory: Directory to add
83
+ """
84
+ dir_path = str(directory.path)
85
+ self.directories[dir_path] = directory
86
+
87
+ def add_file(self, file_path: Path, directory_path: Path) -> None:
88
+ """Associate a file with its directory.
89
+
90
+ Args:
91
+ file_path: Path to the file
92
+ directory_path: Path to the directory containing the file
93
+ """
94
+ file_path_str = str(file_path)
95
+ dir_path_str = str(directory_path)
96
+
97
+ self.file_to_directory[file_path_str] = dir_path_str
98
+ if file_path_str not in self.directory_files[dir_path_str]:
99
+ self.directory_files[dir_path_str].append(file_path_str)
100
+
101
+ # Update directory file count
102
+ if dir_path_str in self.directories:
103
+ self.directories[dir_path_str].file_count = len(
104
+ self.directory_files[dir_path_str]
105
+ )
106
+
107
+ def get_directory(self, directory_path: Path) -> Directory | None:
108
+ """Get directory by path.
109
+
110
+ Args:
111
+ directory_path: Path to directory
112
+
113
+ Returns:
114
+ Directory object or None if not found
115
+ """
116
+ return self.directories.get(str(directory_path))
117
+
118
+ def get_files_in_directory(self, directory_path: Path) -> list[str]:
119
+ """Get all files in a directory.
120
+
121
+ Args:
122
+ directory_path: Path to directory
123
+
124
+ Returns:
125
+ List of file paths in the directory
126
+ """
127
+ return self.directory_files.get(str(directory_path), [])
128
+
129
+ def get_subdirectories(self, directory_path: Path) -> list[Directory]:
130
+ """Get all immediate subdirectories.
131
+
132
+ Args:
133
+ directory_path: Path to parent directory
134
+
135
+ Returns:
136
+ List of subdirectory objects
137
+ """
138
+ parent_path_str = str(directory_path)
139
+ subdirs = []
140
+
141
+ for dir_path_str, directory in self.directories.items():
142
+ if directory.parent_path and str(directory.parent_path) == parent_path_str:
143
+ subdirs.append(directory)
144
+
145
+ return subdirs
146
+
147
+ def get_root_directories(self) -> list[Directory]:
148
+ """Get all root-level directories (no parent).
149
+
150
+ Returns:
151
+ List of root directory objects
152
+ """
153
+ return [d for d in self.directories.values() if d.parent_path is None]
154
+
155
+ def delete_directory(self, directory_path: Path) -> None:
156
+ """Remove directory and its file associations.
157
+
158
+ Args:
159
+ directory_path: Path to directory to remove
160
+ """
161
+ dir_path_str = str(directory_path)
162
+
163
+ # Remove directory
164
+ if dir_path_str in self.directories:
165
+ del self.directories[dir_path_str]
166
+
167
+ # Remove file associations
168
+ if dir_path_str in self.directory_files:
169
+ for file_path in self.directory_files[dir_path_str]:
170
+ if file_path in self.file_to_directory:
171
+ del self.file_to_directory[file_path]
172
+ del self.directory_files[dir_path_str]
173
+
174
+ def delete_file(self, file_path: Path) -> None:
175
+ """Remove file from directory associations.
176
+
177
+ Args:
178
+ file_path: Path to file to remove
179
+ """
180
+ file_path_str = str(file_path)
181
+
182
+ if file_path_str in self.file_to_directory:
183
+ dir_path = self.file_to_directory[file_path_str]
184
+ del self.file_to_directory[file_path_str]
185
+
186
+ # Remove from directory_files
187
+ if dir_path in self.directory_files:
188
+ self.directory_files[dir_path] = [
189
+ f for f in self.directory_files[dir_path] if f != file_path_str
190
+ ]
191
+
192
+ # Update directory file count
193
+ if dir_path in self.directories:
194
+ self.directories[dir_path].file_count = len(
195
+ self.directory_files[dir_path]
196
+ )
197
+
198
+ def rebuild_from_files(self, file_paths: list[Path], root_path: Path, chunk_stats: dict[str, dict] | None = None) -> None:
199
+ """Rebuild directory index from list of files with statistics from chunks.
200
+
201
+ Args:
202
+ file_paths: List of file paths to index
203
+ root_path: Project root path
204
+ chunk_stats: Optional dict mapping file_path -> {'chunks': count, 'language': str}
205
+ """
206
+ self.directories = {}
207
+ self.file_to_directory = {}
208
+ self.directory_files = defaultdict(list)
209
+
210
+ # Track all unique directories and their statistics
211
+ dir_set = set()
212
+ dir_chunks = defaultdict(int) # directory -> total chunks
213
+ dir_languages = defaultdict(lambda: defaultdict(int)) # directory -> {language: count}
214
+ dir_modified = defaultdict(float) # directory -> most recent modification time
215
+
216
+ for file_path in file_paths:
217
+ # Get relative path from root
218
+ try:
219
+ rel_path = file_path.relative_to(root_path)
220
+ parent_dir = rel_path.parent
221
+
222
+ # Add all parent directories up to root
223
+ current = parent_dir
224
+ while current != Path("."):
225
+ dir_set.add(current)
226
+
227
+ # Accumulate statistics up the directory tree
228
+ if chunk_stats and str(file_path) in chunk_stats:
229
+ stats = chunk_stats[str(file_path)]
230
+ dir_chunks[current] += stats.get('chunks', 0)
231
+ if 'language' in stats:
232
+ dir_languages[current][stats['language']] += stats.get('chunks', 0)
233
+ # Track most recent modification time
234
+ if 'modified' in stats:
235
+ dir_modified[current] = max(dir_modified.get(current, 0), stats['modified'])
236
+
237
+ current = current.parent
238
+
239
+ # Associate file with its direct parent
240
+ if parent_dir != Path("."):
241
+ self.add_file(rel_path, parent_dir)
242
+
243
+ except ValueError:
244
+ # File not relative to root, skip
245
+ logger.warning(f"File {file_path} not under root {root_path}")
246
+ continue
247
+
248
+ # Create Directory objects for all directories
249
+ for dir_path in sorted(dir_set):
250
+ # Determine parent
251
+ parent_path = dir_path.parent if dir_path.parent != Path(".") else None
252
+
253
+ # Check if it's a package
254
+ is_package = False
255
+ full_dir_path = root_path / dir_path
256
+ if (full_dir_path / "__init__.py").exists():
257
+ is_package = True
258
+ elif (full_dir_path / "package.json").exists():
259
+ is_package = True
260
+
261
+ directory = Directory(
262
+ path=dir_path,
263
+ name=dir_path.name,
264
+ parent_path=parent_path,
265
+ depth=len(dir_path.parts),
266
+ is_package=is_package,
267
+ total_chunks=dir_chunks.get(dir_path, 0),
268
+ languages=dict(dir_languages.get(dir_path, {})),
269
+ last_modified=dir_modified.get(dir_path),
270
+ )
271
+
272
+ self.add_directory(directory)
273
+
274
+ # Update subdirectory counts
275
+ for directory in self.directories.values():
276
+ subdirs = self.get_subdirectories(directory.path)
277
+ directory.subdirectory_count = len(subdirs)
278
+
279
+ logger.info(f"Rebuilt directory index with {len(self.directories)} directories, {sum(dir_chunks.values())} total chunks")
280
+
281
+ def get_stats(self) -> dict[str, Any]:
282
+ """Get directory index statistics.
283
+
284
+ Returns:
285
+ Dictionary with statistics
286
+ """
287
+ return {
288
+ "total_directories": len(self.directories),
289
+ "total_files": len(self.file_to_directory),
290
+ "root_directories": len(self.get_root_directories()),
291
+ "packages": sum(1 for d in self.directories.values() if d.is_package),
292
+ }
293
+
294
+ def reset(self) -> None:
295
+ """Clear all directory data."""
296
+ self.directories = {}
297
+ self.file_to_directory = {}
298
+ self.directory_files = defaultdict(list)
299
+
300
+ if self.index_path.exists():
301
+ self.index_path.unlink()
302
+
303
+ logger.info("Directory index reset")
@@ -15,6 +15,7 @@ from ..parsers.registry import get_parser_registry
15
15
  from ..utils.gitignore import create_gitignore_parser
16
16
  from ..utils.monorepo import MonorepoDetector
17
17
  from .database import VectorDatabase
18
+ from .directory_index import DirectoryIndex
18
19
  from .exceptions import ParsingError
19
20
  from .models import CodeChunk
20
21
 
@@ -29,6 +30,7 @@ class SemanticIndexer:
29
30
  file_extensions: list[str],
30
31
  max_workers: int | None = None,
31
32
  batch_size: int = 10,
33
+ debug: bool = False,
32
34
  ) -> None:
33
35
  """Initialize semantic indexer.
34
36
 
@@ -38,12 +40,14 @@ class SemanticIndexer:
38
40
  file_extensions: File extensions to index
39
41
  max_workers: Maximum number of worker threads for parallel processing
40
42
  batch_size: Number of files to process in each batch
43
+ debug: Enable debug output for hierarchy building
41
44
  """
42
45
  self.database = database
43
46
  self.project_root = project_root
44
47
  self.file_extensions = {ext.lower() for ext in file_extensions}
45
48
  self.parser_registry = get_parser_registry()
46
49
  self._ignore_patterns = set(DEFAULT_IGNORE_PATTERNS)
50
+ self.debug = debug
47
51
 
48
52
  # Safely get event loop for max_workers
49
53
  try:
@@ -81,6 +85,13 @@ class SemanticIndexer:
81
85
  for sp in subprojects:
82
86
  logger.debug(f" - {sp.name} ({sp.relative_path})")
83
87
 
88
+ # Initialize directory index
89
+ self.directory_index = DirectoryIndex(
90
+ project_root / ".mcp-vector-search" / "directory_index.json"
91
+ )
92
+ # Load existing directory index
93
+ self.directory_index.load()
94
+
84
95
  async def index_project(
85
96
  self,
86
97
  force_reindex: bool = False,
@@ -156,6 +167,38 @@ class SemanticIndexer:
156
167
 
157
168
  self._save_index_metadata(metadata)
158
169
 
170
+ # Rebuild directory index from successfully indexed files
171
+ try:
172
+ logger.debug("Rebuilding directory index...")
173
+ # We don't have chunk counts here, but we have file modification times
174
+ # Build a simple stats dict with file mod times for recency tracking
175
+ chunk_stats = {}
176
+ for file_path in files_to_index:
177
+ try:
178
+ mtime = os.path.getmtime(file_path)
179
+ # For now, just track modification time
180
+ # Chunk counts will be aggregated from the database later if needed
181
+ chunk_stats[str(file_path)] = {
182
+ 'modified': mtime,
183
+ 'chunks': 1, # Placeholder - real count from chunks
184
+ }
185
+ except OSError:
186
+ pass
187
+
188
+ self.directory_index.rebuild_from_files(
189
+ files_to_index, self.project_root, chunk_stats=chunk_stats
190
+ )
191
+ self.directory_index.save()
192
+ dir_stats = self.directory_index.get_stats()
193
+ logger.info(
194
+ f"Directory index updated: {dir_stats['total_directories']} directories, "
195
+ f"{dir_stats['total_files']} files"
196
+ )
197
+ except Exception as e:
198
+ logger.error(f"Failed to update directory index: {e}")
199
+ import traceback
200
+ logger.debug(traceback.format_exc())
201
+
159
202
  logger.info(
160
203
  f"Indexing complete: {indexed_count} files indexed, {failed_count} failed"
161
204
  )
@@ -307,6 +350,10 @@ class SemanticIndexer:
307
350
  # Build hierarchical relationships between chunks
308
351
  chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
309
352
 
353
+ # Debug: Check if hierarchy was built
354
+ methods_with_parents = sum(1 for c in chunks_with_hierarchy if c.chunk_type in ("method", "function") and c.parent_chunk_id)
355
+ logger.debug(f"After hierarchy build: {methods_with_parents}/{len([c for c in chunks_with_hierarchy if c.chunk_type in ('method', 'function')])} methods have parents")
356
+
310
357
  # Add chunks to database
311
358
  await self.database.add_chunks(chunks_with_hierarchy)
312
359
 
@@ -790,6 +837,15 @@ class SemanticIndexer:
790
837
  class_chunks = [c for c in chunks if c.chunk_type in ("class", "interface", "mixin")]
791
838
  function_chunks = [c for c in chunks if c.chunk_type in ("function", "method", "constructor")]
792
839
 
840
+ # DEBUG: Print what we have (if debug enabled)
841
+ if self.debug:
842
+ import sys
843
+ print(f"\n[DEBUG] Building hierarchy: {len(module_chunks)} modules, {len(class_chunks)} classes, {len(function_chunks)} functions", file=sys.stderr)
844
+ if class_chunks:
845
+ print(f"[DEBUG] Class names: {[c.class_name for c in class_chunks[:5]]}", file=sys.stderr)
846
+ if function_chunks:
847
+ print(f"[DEBUG] First 5 functions with class_name: {[(f.function_name, f.class_name) for f in function_chunks[:5]]}", file=sys.stderr)
848
+
793
849
  # Build relationships
794
850
  for func in function_chunks:
795
851
  if func.class_name:
@@ -803,6 +859,10 @@ class SemanticIndexer:
803
859
  func.chunk_depth = parent_class.chunk_depth + 1
804
860
  if func.chunk_id not in parent_class.child_chunk_ids:
805
861
  parent_class.child_chunk_ids.append(func.chunk_id)
862
+ if self.debug:
863
+ import sys
864
+ print(f"[DEBUG] ✓ Linked '{func.function_name}' to class '{parent_class.class_name}'", file=sys.stderr)
865
+ logger.debug(f"Linked method '{func.function_name}' (ID: {func.chunk_id[:8]}) to class '{parent_class.class_name}' (ID: {parent_class.chunk_id[:8]})")
806
866
  else:
807
867
  # Top-level function
808
868
  if not func.chunk_depth:
@@ -828,6 +888,13 @@ class SemanticIndexer:
828
888
  if not mod.chunk_depth:
829
889
  mod.chunk_depth = 0
830
890
 
891
+ # DEBUG: Print summary
892
+ if self.debug:
893
+ import sys
894
+ funcs_with_parents = sum(1 for f in function_chunks if f.parent_chunk_id)
895
+ classes_with_parents = sum(1 for c in class_chunks if c.parent_chunk_id)
896
+ print(f"[DEBUG] Hierarchy built: {funcs_with_parents}/{len(function_chunks)} functions linked, {classes_with_parents}/{len(class_chunks)} classes linked\n", file=sys.stderr)
897
+
831
898
  return chunks
832
899
 
833
900
  def _write_indexing_run_header(self) -> None:
@@ -206,6 +206,64 @@ class IndexStats(BaseModel):
206
206
  }
207
207
 
208
208
 
209
+ @dataclass
210
+ class Directory:
211
+ """Represents a directory in the project structure."""
212
+
213
+ path: Path # Relative path from project root
214
+ name: str # Directory name
215
+ parent_path: Path | None = None # Parent directory path (None for root)
216
+ file_count: int = 0 # Number of files directly in this directory
217
+ subdirectory_count: int = 0 # Number of subdirectories
218
+ total_chunks: int = 0 # Total code chunks in this directory (recursive)
219
+ languages: dict[str, int] = None # Language distribution in this directory
220
+ depth: int = 0 # Depth from project root (0 = root)
221
+ is_package: bool = False # True if contains __init__.py or package.json
222
+ last_modified: float | None = None # Most recent file modification time (unix timestamp)
223
+
224
+ def __post_init__(self) -> None:
225
+ """Initialize default values and generate directory ID."""
226
+ if self.languages is None:
227
+ self.languages = {}
228
+
229
+ @property
230
+ def id(self) -> str:
231
+ """Generate unique ID for this directory."""
232
+ import hashlib
233
+ return hashlib.sha256(str(self.path).encode()).hexdigest()[:16]
234
+
235
+ def to_dict(self) -> dict[str, Any]:
236
+ """Convert to dictionary for storage."""
237
+ return {
238
+ "path": str(self.path),
239
+ "name": self.name,
240
+ "parent_path": str(self.parent_path) if self.parent_path else None,
241
+ "file_count": self.file_count,
242
+ "subdirectory_count": self.subdirectory_count,
243
+ "total_chunks": self.total_chunks,
244
+ "languages": self.languages,
245
+ "depth": self.depth,
246
+ "is_package": self.is_package,
247
+ "last_modified": self.last_modified,
248
+ }
249
+
250
+ @classmethod
251
+ def from_dict(cls, data: dict[str, Any]) -> "Directory":
252
+ """Create from dictionary."""
253
+ return cls(
254
+ path=Path(data["path"]),
255
+ name=data["name"],
256
+ parent_path=Path(data["parent_path"]) if data.get("parent_path") else None,
257
+ file_count=data.get("file_count", 0),
258
+ subdirectory_count=data.get("subdirectory_count", 0),
259
+ total_chunks=data.get("total_chunks", 0),
260
+ languages=data.get("languages", {}),
261
+ depth=data.get("depth", 0),
262
+ is_package=data.get("is_package", False),
263
+ last_modified=data.get("last_modified"),
264
+ )
265
+
266
+
209
267
  class ProjectInfo(BaseModel):
210
268
  """Information about a project."""
211
269
 
@@ -51,14 +51,24 @@ class GitignorePattern:
51
51
  Returns:
52
52
  True if the pattern matches
53
53
  """
54
- # Directory-only patterns only match directories
55
- if self.is_directory_only and not is_directory:
56
- return False
57
-
58
54
  # Convert path separators for consistent matching
59
55
  path = path.replace("\\", "/")
60
56
  pattern = self.pattern.replace("\\", "/")
61
57
 
58
+ # For directory-only patterns, check if any parent directory matches
59
+ # This implements Git's behavior where "dir/" excludes both the directory
60
+ # AND all files within it recursively
61
+ if self.is_directory_only:
62
+ path_parts = path.split("/")
63
+ # Check each parent directory component
64
+ for i in range(1, len(path_parts) + 1):
65
+ parent = "/".join(path_parts[:i])
66
+ if fnmatch.fnmatch(parent, pattern):
67
+ return True
68
+ # If no parent matches and this is not a directory, don't exclude
69
+ if not is_directory:
70
+ return False
71
+
62
72
  # Try exact match first
63
73
  if fnmatch.fnmatch(path, pattern):
64
74
  return True