mcp-vector-search 0.7.5__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mcp-vector-search might be problematic. Click here for more details.

@@ -1,5 +1,6 @@
1
1
  """Database abstraction and ChromaDB implementation for MCP Vector Search."""
2
2
 
3
+ import asyncio
3
4
  import shutil
4
5
  from abc import ABC, abstractmethod
5
6
  from pathlib import Path
@@ -369,38 +370,67 @@ class ChromaVectorDatabase(VectorDatabase):
369
370
  raise DatabaseError(f"Failed to delete chunks: {e}") from e
370
371
 
371
372
  async def get_stats(self) -> IndexStats:
372
- """Get database statistics."""
373
+ """Get database statistics with optimized chunked queries."""
373
374
  if not self._collection:
374
375
  raise DatabaseNotInitializedError("Database not initialized")
375
376
 
376
377
  try:
377
- # Get total count
378
+ # Get total count (fast operation)
378
379
  count = self._collection.count()
379
380
 
380
- # Get ALL metadata to analyze (not just a sample)
381
- # Only fetch metadata, not embeddings, for performance
382
- results = self._collection.get(include=["metadatas"])
381
+ if count == 0:
382
+ return IndexStats(
383
+ total_files=0,
384
+ total_chunks=0,
385
+ languages={},
386
+ file_types={},
387
+ index_size_mb=0.0,
388
+ last_updated="N/A",
389
+ embedding_model="unknown",
390
+ )
383
391
 
384
- # Count unique files from all chunks
385
- files = {m.get("file_path", "") for m in results.get("metadatas", [])}
392
+ # Process in chunks to avoid loading everything at once
393
+ batch_size_limit = 1000
386
394
 
387
- # Count languages and file types
388
- language_counts = {}
389
- file_type_counts = {}
395
+ files = set()
396
+ language_counts: dict[str, int] = {}
397
+ file_type_counts: dict[str, int] = {}
390
398
 
391
- for metadata in results.get("metadatas", []):
392
- # Count languages
393
- lang = metadata.get("language", "unknown")
394
- language_counts[lang] = language_counts.get(lang, 0) + 1
399
+ offset = 0
400
+ while offset < count:
401
+ # Fetch batch
402
+ batch_size = min(batch_size_limit, count - offset)
403
+ logger.debug(
404
+ f"Processing database stats: batch {offset // batch_size_limit + 1}, "
405
+ f"{offset}-{offset + batch_size} of {count} chunks"
406
+ )
395
407
 
396
- # Count file types
397
- file_path = metadata.get("file_path", "")
398
- if file_path:
399
- ext = Path(file_path).suffix or "no_extension"
400
- file_type_counts[ext] = file_type_counts.get(ext, 0) + 1
408
+ results = self._collection.get(
409
+ include=["metadatas"],
410
+ limit=batch_size,
411
+ offset=offset,
412
+ )
401
413
 
402
- # Estimate index size (rough approximation)
403
- index_size_mb = count * 0.001 # Rough estimate
414
+ # Process batch metadata
415
+ for metadata in results.get("metadatas", []):
416
+ # Language stats
417
+ lang = metadata.get("language", "unknown")
418
+ language_counts[lang] = language_counts.get(lang, 0) + 1
419
+
420
+ # File stats
421
+ file_path = metadata.get("file_path", "")
422
+ if file_path:
423
+ files.add(file_path)
424
+ ext = Path(file_path).suffix or "no_extension"
425
+ file_type_counts[ext] = file_type_counts.get(ext, 0) + 1
426
+
427
+ offset += batch_size
428
+
429
+ # Yield to event loop periodically to prevent blocking
430
+ await asyncio.sleep(0)
431
+
432
+ # Estimate index size (rough approximation: ~1KB per chunk)
433
+ index_size_mb = count * 0.001
404
434
 
405
435
  return IndexStats(
406
436
  total_files=len(files),
@@ -408,12 +438,13 @@ class ChromaVectorDatabase(VectorDatabase):
408
438
  languages=language_counts,
409
439
  file_types=file_type_counts,
410
440
  index_size_mb=index_size_mb,
411
- last_updated="unknown", # TODO: Track this
412
- embedding_model="unknown", # TODO: Track this
441
+ last_updated="unknown",
442
+ embedding_model="unknown",
413
443
  )
414
444
 
415
445
  except Exception as e:
416
- logger.error(f"Failed to get stats: {e}")
446
+ logger.error(f"Failed to get database statistics: {e}")
447
+ # Return empty stats instead of raising
417
448
  return IndexStats(
418
449
  total_files=0,
419
450
  total_chunks=0,
@@ -768,56 +799,88 @@ class PooledChromaVectorDatabase(VectorDatabase):
768
799
  raise DatabaseError(f"Failed to delete chunks: {e}") from e
769
800
 
770
801
  async def get_stats(self) -> IndexStats:
771
- """Get database statistics using pooled connection."""
802
+ """Get database statistics with connection pooling and chunked queries."""
772
803
  try:
773
804
  async with self._pool.get_connection() as conn:
774
- # Get total count
805
+ # Get total count (fast operation)
775
806
  count = conn.collection.count()
776
807
 
777
- # Get all metadata to analyze
778
- results = conn.collection.get(include=["metadatas"])
808
+ if count == 0:
809
+ return IndexStats(
810
+ total_files=0,
811
+ total_chunks=0,
812
+ languages={},
813
+ file_types={},
814
+ index_size_mb=0.0,
815
+ last_updated="N/A",
816
+ embedding_model="unknown",
817
+ )
818
+
819
+ # Process in chunks to avoid loading everything at once
820
+ batch_size_limit = 1000
779
821
 
780
- # Analyze languages and files
781
- languages = set()
782
822
  files = set()
823
+ language_counts: dict[str, int] = {}
824
+ file_type_counts: dict[str, int] = {}
825
+
826
+ offset = 0
827
+ while offset < count:
828
+ # Fetch batch
829
+ batch_size = min(batch_size_limit, count - offset)
830
+ logger.debug(
831
+ f"Processing database stats: batch {offset // batch_size_limit + 1}, "
832
+ f"{offset}-{offset + batch_size} of {count} chunks"
833
+ )
783
834
 
784
- for metadata in results["metadatas"]:
785
- if "language" in metadata:
786
- languages.add(metadata["language"])
787
- if "file_path" in metadata:
788
- files.add(metadata["file_path"])
835
+ results = conn.collection.get(
836
+ include=["metadatas"],
837
+ limit=batch_size,
838
+ offset=offset,
839
+ )
789
840
 
790
- # Count languages and file types
791
- language_counts = {}
792
- file_type_counts = {}
841
+ # Process batch metadata
842
+ for metadata in results.get("metadatas", []):
843
+ # Language stats
844
+ lang = metadata.get("language", "unknown")
845
+ language_counts[lang] = language_counts.get(lang, 0) + 1
793
846
 
794
- for metadata in results["metadatas"]:
795
- # Count languages
796
- lang = metadata.get("language", "unknown")
797
- language_counts[lang] = language_counts.get(lang, 0) + 1
847
+ # File stats
848
+ file_path = metadata.get("file_path", "")
849
+ if file_path:
850
+ files.add(file_path)
851
+ ext = Path(file_path).suffix or "no_extension"
852
+ file_type_counts[ext] = file_type_counts.get(ext, 0) + 1
798
853
 
799
- # Count file types
800
- file_path = metadata.get("file_path", "")
801
- if file_path:
802
- ext = Path(file_path).suffix or "no_extension"
803
- file_type_counts[ext] = file_type_counts.get(ext, 0) + 1
854
+ offset += batch_size
855
+
856
+ # Yield to event loop periodically to prevent blocking
857
+ await asyncio.sleep(0)
804
858
 
805
- # Estimate index size (rough approximation)
806
- index_size_mb = count * 0.001 # Rough estimate
859
+ # Estimate index size (rough approximation: ~1KB per chunk)
860
+ index_size_mb = count * 0.001
807
861
 
808
862
  return IndexStats(
809
- total_chunks=count,
810
863
  total_files=len(files),
864
+ total_chunks=count,
811
865
  languages=language_counts,
812
866
  file_types=file_type_counts,
813
867
  index_size_mb=index_size_mb,
814
- last_updated="unknown", # ChromaDB doesn't track this
815
- embedding_model="unknown", # TODO: Track this in metadata
868
+ last_updated="unknown",
869
+ embedding_model="unknown",
816
870
  )
817
871
 
818
872
  except Exception as e:
819
- logger.error(f"Failed to get database stats: {e}")
820
- raise DatabaseError(f"Failed to get stats: {e}") from e
873
+ logger.error(f"Failed to get database statistics: {e}")
874
+ # Return empty stats instead of raising
875
+ return IndexStats(
876
+ total_files=0,
877
+ total_chunks=0,
878
+ languages={},
879
+ file_types={},
880
+ index_size_mb=0.0,
881
+ last_updated="error",
882
+ embedding_model="unknown",
883
+ )
821
884
 
822
885
  async def remove_file_chunks(self, file_path: str) -> int:
823
886
  """Remove all chunks for a specific file using pooled connection."""
@@ -57,6 +57,11 @@ class SemanticIndexer:
57
57
  project_root / ".mcp-vector-search" / "index_metadata.json"
58
58
  )
59
59
 
60
+ # Add cache for indexable files to avoid repeated filesystem scans
61
+ self._indexable_files_cache: list[Path] | None = None
62
+ self._cache_timestamp: float = 0
63
+ self._cache_ttl: float = 60.0 # 60 second TTL
64
+
60
65
  # Initialize gitignore parser
61
66
  try:
62
67
  self.gitignore_parser = create_gitignore_parser(project_root)
@@ -290,8 +295,11 @@ class SemanticIndexer:
290
295
  logger.debug(f"No chunks extracted from {file_path}")
291
296
  return True # Not an error, just empty file
292
297
 
298
+ # Build hierarchical relationships between chunks
299
+ chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
300
+
293
301
  # Add chunks to database
294
- await self.database.add_chunks(chunks)
302
+ await self.database.add_chunks(chunks_with_hierarchy)
295
303
 
296
304
  # Update metadata after successful indexing
297
305
  metadata = self._load_index_metadata()
@@ -334,38 +342,120 @@ class SemanticIndexer:
334
342
  return 0
335
343
 
336
344
  def _find_indexable_files(self) -> list[Path]:
337
- """Find all files that should be indexed.
345
+ """Find all files that should be indexed with caching.
338
346
 
339
347
  Returns:
340
348
  List of file paths to index
341
349
  """
350
+ import time
351
+
352
+ # Check cache
353
+ current_time = time.time()
354
+ if (
355
+ self._indexable_files_cache is not None
356
+ and current_time - self._cache_timestamp < self._cache_ttl
357
+ ):
358
+ logger.debug(
359
+ f"Using cached indexable files ({len(self._indexable_files_cache)} files)"
360
+ )
361
+ return self._indexable_files_cache
362
+
363
+ # Rebuild cache using efficient directory filtering
364
+ logger.debug("Rebuilding indexable files cache...")
365
+ indexable_files = self._scan_files_sync()
366
+
367
+ self._indexable_files_cache = sorted(indexable_files)
368
+ self._cache_timestamp = current_time
369
+ logger.debug(f"Rebuilt indexable files cache ({len(indexable_files)} files)")
370
+
371
+ return self._indexable_files_cache
372
+
373
+ def _scan_files_sync(self) -> list[Path]:
374
+ """Synchronous file scanning (runs in thread pool).
375
+
376
+ Uses os.walk with directory filtering to avoid traversing ignored directories.
377
+
378
+ Returns:
379
+ List of indexable file paths
380
+ """
342
381
  indexable_files = []
343
382
 
344
- for file_path in self.project_root.rglob("*"):
345
- if self._should_index_file(file_path):
346
- indexable_files.append(file_path)
383
+ # Use os.walk for efficient directory traversal with early filtering
384
+ for root, dirs, files in os.walk(self.project_root):
385
+ root_path = Path(root)
386
+
387
+ # Filter out ignored directories IN-PLACE to prevent os.walk from traversing them
388
+ # This is much more efficient than checking every file in ignored directories
389
+ # PERFORMANCE: Pass is_directory=True hint to skip filesystem stat() calls
390
+ dirs[:] = [d for d in dirs if not self._should_ignore_path(root_path / d, is_directory=True)]
391
+
392
+ # Check each file in the current directory
393
+ # PERFORMANCE: skip_file_check=True because os.walk guarantees these are files
394
+ for filename in files:
395
+ file_path = root_path / filename
396
+ if self._should_index_file(file_path, skip_file_check=True):
397
+ indexable_files.append(file_path)
347
398
 
348
- return sorted(indexable_files)
399
+ return indexable_files
400
+
401
+ async def _find_indexable_files_async(self) -> list[Path]:
402
+ """Find all files asynchronously without blocking event loop.
403
+
404
+ Returns:
405
+ List of file paths to index
406
+ """
407
+ import time
408
+ from concurrent.futures import ThreadPoolExecutor
409
+
410
+ # Check cache first
411
+ current_time = time.time()
412
+ if (
413
+ self._indexable_files_cache is not None
414
+ and current_time - self._cache_timestamp < self._cache_ttl
415
+ ):
416
+ logger.debug(
417
+ f"Using cached indexable files ({len(self._indexable_files_cache)} files)"
418
+ )
419
+ return self._indexable_files_cache
420
+
421
+ # Run filesystem scan in thread pool to avoid blocking
422
+ logger.debug("Scanning files in background thread...")
423
+ loop = asyncio.get_running_loop()
424
+ with ThreadPoolExecutor(max_workers=1) as executor:
425
+ indexable_files = await loop.run_in_executor(
426
+ executor, self._scan_files_sync
427
+ )
349
428
 
350
- def _should_index_file(self, file_path: Path) -> bool:
429
+ # Update cache
430
+ self._indexable_files_cache = sorted(indexable_files)
431
+ self._cache_timestamp = current_time
432
+ logger.debug(f"Found {len(indexable_files)} indexable files")
433
+
434
+ return self._indexable_files_cache
435
+
436
+ def _should_index_file(self, file_path: Path, skip_file_check: bool = False) -> bool:
351
437
  """Check if a file should be indexed.
352
438
 
353
439
  Args:
354
440
  file_path: Path to check
441
+ skip_file_check: Skip is_file() check if caller knows it's a file (optimization)
355
442
 
356
443
  Returns:
357
444
  True if file should be indexed
358
445
  """
359
- # Must be a file
360
- if not file_path.is_file():
446
+ # PERFORMANCE: Check file extension FIRST (cheapest operation, no I/O)
447
+ # This eliminates most files without any filesystem calls
448
+ if file_path.suffix.lower() not in self.file_extensions:
361
449
  return False
362
450
 
363
- # Check file extension
364
- if file_path.suffix.lower() not in self.file_extensions:
451
+ # PERFORMANCE: Only check is_file() if not coming from os.walk
452
+ # os.walk already guarantees files, so we skip this expensive check
453
+ if not skip_file_check and not file_path.is_file():
365
454
  return False
366
455
 
367
456
  # Check if path should be ignored
368
- if self._should_ignore_path(file_path):
457
+ # PERFORMANCE: Pass is_directory=False to skip stat() call (we know it's a file)
458
+ if self._should_ignore_path(file_path, is_directory=False):
369
459
  return False
370
460
 
371
461
  # Check file size (skip very large files)
@@ -379,18 +469,20 @@ class SemanticIndexer:
379
469
 
380
470
  return True
381
471
 
382
- def _should_ignore_path(self, file_path: Path) -> bool:
472
+ def _should_ignore_path(self, file_path: Path, is_directory: bool | None = None) -> bool:
383
473
  """Check if a path should be ignored.
384
474
 
385
475
  Args:
386
476
  file_path: Path to check
477
+ is_directory: Optional hint if path is a directory (avoids filesystem check)
387
478
 
388
479
  Returns:
389
480
  True if path should be ignored
390
481
  """
391
482
  try:
392
483
  # First check gitignore rules if available
393
- if self.gitignore_parser and self.gitignore_parser.is_ignored(file_path):
484
+ # PERFORMANCE: Pass is_directory hint to avoid redundant stat() calls
485
+ if self.gitignore_parser and self.gitignore_parser.is_ignored(file_path, is_directory=is_directory):
394
486
  logger.debug(f"Path ignored by .gitignore: {file_path}")
395
487
  return True
396
488
 
@@ -532,8 +624,8 @@ class SemanticIndexer:
532
624
  # Get database stats
533
625
  db_stats = await self.database.get_stats()
534
626
 
535
- # Count indexable files
536
- indexable_files = self._find_indexable_files()
627
+ # Count indexable files asynchronously without blocking
628
+ indexable_files = await self._find_indexable_files_async()
537
629
 
538
630
  return {
539
631
  "total_indexable_files": len(indexable_files),
@@ -553,3 +645,157 @@ class SemanticIndexer:
553
645
  "indexed_files": 0,
554
646
  "total_chunks": 0,
555
647
  }
648
+
649
+ async def get_files_to_index(
650
+ self, force_reindex: bool = False
651
+ ) -> tuple[list[Path], list[Path]]:
652
+ """Get all indexable files and those that need indexing.
653
+
654
+ Args:
655
+ force_reindex: Whether to force reindex of all files
656
+
657
+ Returns:
658
+ Tuple of (all_indexable_files, files_to_index)
659
+ """
660
+ # Find all indexable files
661
+ all_files = await self._find_indexable_files_async()
662
+
663
+ if not all_files:
664
+ return [], []
665
+
666
+ # Load existing metadata for incremental indexing
667
+ metadata = self._load_index_metadata()
668
+
669
+ # Filter files that need indexing
670
+ if force_reindex:
671
+ files_to_index = all_files
672
+ logger.info(f"Force reindex: processing all {len(files_to_index)} files")
673
+ else:
674
+ files_to_index = [
675
+ f for f in all_files if self._needs_reindexing(f, metadata)
676
+ ]
677
+ logger.info(
678
+ f"Incremental index: {len(files_to_index)} of {len(all_files)} files need updating"
679
+ )
680
+
681
+ return all_files, files_to_index
682
+
683
+ async def index_files_with_progress(
684
+ self,
685
+ files_to_index: list[Path],
686
+ force_reindex: bool = False,
687
+ ):
688
+ """Index files and yield progress updates for each file.
689
+
690
+ Args:
691
+ files_to_index: List of file paths to index
692
+ force_reindex: Whether to force reindexing
693
+
694
+ Yields:
695
+ Tuple of (file_path, chunks_added, success) for each processed file
696
+ """
697
+ metadata = self._load_index_metadata()
698
+
699
+ # Process files in batches for better memory management
700
+ for i in range(0, len(files_to_index), self.batch_size):
701
+ batch = files_to_index[i : i + self.batch_size]
702
+
703
+ # Process each file in the batch
704
+ for file_path in batch:
705
+ chunks_added = 0
706
+ success = False
707
+
708
+ try:
709
+ # Always remove existing chunks when reindexing
710
+ await self.database.delete_by_file(file_path)
711
+
712
+ # Parse file into chunks
713
+ chunks = await self._parse_file(file_path)
714
+
715
+ if chunks:
716
+ # Build hierarchical relationships
717
+ chunks_with_hierarchy = self._build_chunk_hierarchy(chunks)
718
+
719
+ # Add chunks to database
720
+ await self.database.add_chunks(chunks_with_hierarchy)
721
+ chunks_added = len(chunks)
722
+ logger.debug(f"Indexed {chunks_added} chunks from {file_path}")
723
+
724
+ success = True
725
+
726
+ # Update metadata after successful indexing
727
+ metadata[str(file_path)] = os.path.getmtime(file_path)
728
+
729
+ except Exception as e:
730
+ logger.error(f"Failed to index file {file_path}: {e}")
731
+ success = False
732
+
733
+ # Yield progress update
734
+ yield (file_path, chunks_added, success)
735
+
736
+ # Save metadata at the end
737
+ self._save_index_metadata(metadata)
738
+
739
+ def _build_chunk_hierarchy(self, chunks: list[CodeChunk]) -> list[CodeChunk]:
740
+ """Build parent-child relationships between chunks.
741
+
742
+ Logic:
743
+ - Module chunks (chunk_type="module") have depth 0
744
+ - Class chunks have depth 1, parent is module
745
+ - Method chunks have depth 2, parent is class
746
+ - Function chunks outside classes have depth 1, parent is module
747
+ - Nested classes increment depth
748
+
749
+ Args:
750
+ chunks: List of code chunks to process
751
+
752
+ Returns:
753
+ List of chunks with hierarchy relationships established
754
+ """
755
+ if not chunks:
756
+ return chunks
757
+
758
+ # Group chunks by type and name
759
+ module_chunks = [c for c in chunks if c.chunk_type in ("module", "imports")]
760
+ class_chunks = [c for c in chunks if c.chunk_type in ("class", "interface", "mixin")]
761
+ function_chunks = [c for c in chunks if c.chunk_type in ("function", "method", "constructor")]
762
+
763
+ # Build relationships
764
+ for func in function_chunks:
765
+ if func.class_name:
766
+ # Find parent class
767
+ parent_class = next(
768
+ (c for c in class_chunks if c.class_name == func.class_name),
769
+ None
770
+ )
771
+ if parent_class:
772
+ func.parent_chunk_id = parent_class.chunk_id
773
+ func.chunk_depth = parent_class.chunk_depth + 1
774
+ if func.chunk_id not in parent_class.child_chunk_ids:
775
+ parent_class.child_chunk_ids.append(func.chunk_id)
776
+ else:
777
+ # Top-level function
778
+ if not func.chunk_depth:
779
+ func.chunk_depth = 1
780
+ # Link to module if exists
781
+ if module_chunks and not func.parent_chunk_id:
782
+ func.parent_chunk_id = module_chunks[0].chunk_id
783
+ if func.chunk_id not in module_chunks[0].child_chunk_ids:
784
+ module_chunks[0].child_chunk_ids.append(func.chunk_id)
785
+
786
+ for cls in class_chunks:
787
+ # Classes without parent are top-level (depth 1)
788
+ if not cls.chunk_depth:
789
+ cls.chunk_depth = 1
790
+ # Link to module if exists
791
+ if module_chunks and not cls.parent_chunk_id:
792
+ cls.parent_chunk_id = module_chunks[0].chunk_id
793
+ if cls.chunk_id not in module_chunks[0].child_chunk_ids:
794
+ module_chunks[0].child_chunk_ids.append(cls.chunk_id)
795
+
796
+ # Module chunks stay at depth 0
797
+ for mod in module_chunks:
798
+ if not mod.chunk_depth:
799
+ mod.chunk_depth = 0
800
+
801
+ return chunks
@@ -21,12 +21,40 @@ class CodeChunk:
21
21
  class_name: str | None = None
22
22
  docstring: str | None = None
23
23
  imports: list[str] = None
24
+
25
+ # Enhancement 1: Complexity scoring
24
26
  complexity_score: float = 0.0
25
27
 
28
+ # Enhancement 3: Hierarchical relationships
29
+ chunk_id: str | None = None
30
+ parent_chunk_id: str | None = None
31
+ child_chunk_ids: list[str] = None
32
+ chunk_depth: int = 0
33
+
34
+ # Enhancement 4: Enhanced metadata
35
+ decorators: list[str] = None
36
+ parameters: list[dict] = None
37
+ return_type: str | None = None
38
+ type_annotations: dict[str, str] = None
39
+
26
40
  def __post_init__(self) -> None:
27
- """Initialize default values."""
41
+ """Initialize default values and generate chunk ID."""
28
42
  if self.imports is None:
29
43
  self.imports = []
44
+ if self.child_chunk_ids is None:
45
+ self.child_chunk_ids = []
46
+ if self.decorators is None:
47
+ self.decorators = []
48
+ if self.parameters is None:
49
+ self.parameters = []
50
+ if self.type_annotations is None:
51
+ self.type_annotations = {}
52
+
53
+ # Generate chunk ID if not provided
54
+ if self.chunk_id is None:
55
+ import hashlib
56
+ id_string = f"{self.file_path}:{self.chunk_type}:{self.start_line}:{self.end_line}"
57
+ self.chunk_id = hashlib.sha256(id_string.encode()).hexdigest()[:16]
30
58
 
31
59
  @property
32
60
  def id(self) -> str:
@@ -52,6 +80,14 @@ class CodeChunk:
52
80
  "docstring": self.docstring,
53
81
  "imports": self.imports,
54
82
  "complexity_score": self.complexity_score,
83
+ "chunk_id": self.chunk_id,
84
+ "parent_chunk_id": self.parent_chunk_id,
85
+ "child_chunk_ids": self.child_chunk_ids,
86
+ "chunk_depth": self.chunk_depth,
87
+ "decorators": self.decorators,
88
+ "parameters": self.parameters,
89
+ "return_type": self.return_type,
90
+ "type_annotations": self.type_annotations,
55
91
  }
56
92
 
57
93
  @classmethod
@@ -69,6 +105,14 @@ class CodeChunk:
69
105
  docstring=data.get("docstring"),
70
106
  imports=data.get("imports", []),
71
107
  complexity_score=data.get("complexity_score", 0.0),
108
+ chunk_id=data.get("chunk_id"),
109
+ parent_chunk_id=data.get("parent_chunk_id"),
110
+ child_chunk_ids=data.get("child_chunk_ids", []),
111
+ chunk_depth=data.get("chunk_depth", 0),
112
+ decorators=data.get("decorators", []),
113
+ parameters=data.get("parameters", []),
114
+ return_type=data.get("return_type"),
115
+ type_annotations=data.get("type_annotations", {}),
72
116
  )
73
117
 
74
118