kodit 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (36) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +23 -4
  3. kodit/application/factories/code_indexing_factory.py +2 -24
  4. kodit/application/services/code_indexing_application_service.py +10 -2
  5. kodit/application/services/sync_scheduler.py +128 -0
  6. kodit/cli.py +103 -28
  7. kodit/config.py +15 -0
  8. kodit/domain/services/index_service.py +25 -66
  9. kodit/domain/value_objects.py +10 -22
  10. kodit/infrastructure/slicing/__init__.py +1 -0
  11. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  12. kodit/infrastructure/slicing/slicer.py +894 -0
  13. kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
  14. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
  15. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  16. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  17. kodit/migrations/versions/85155663351e_initial.py +64 -48
  18. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  19. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/METADATA +10 -4
  20. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/RECORD +23 -32
  21. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  22. kodit/infrastructure/snippet_extraction/factories.py +0 -13
  23. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  24. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  25. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  26. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  27. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  28. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  29. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  30. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  31. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
  32. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  33. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  34. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/WHEEL +0 -0
  35. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/entry_points.txt +0 -0
  36. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.3'
21
- __version_tuple__ = version_tuple = (0, 3, 3)
20
+ __version__ = version = '0.3.5'
21
+ __version_tuple__ = version_tuple = (0, 3, 5)
kodit/app.py CHANGED
@@ -6,28 +6,47 @@ from contextlib import asynccontextmanager
6
6
  from asgi_correlation_id import CorrelationIdMiddleware
7
7
  from fastapi import FastAPI
8
8
 
9
+ from kodit.application.services.sync_scheduler import SyncSchedulerService
9
10
  from kodit.config import AppContext
10
11
  from kodit.infrastructure.indexing.auto_indexing_service import AutoIndexingService
11
12
  from kodit.mcp import mcp
12
13
  from kodit.middleware import ASGICancelledErrorMiddleware, logging_middleware
13
14
 
14
- # Global auto-indexing service
15
+ # Global services
15
16
  _auto_indexing_service: AutoIndexingService | None = None
17
+ _sync_scheduler_service: SyncSchedulerService | None = None
16
18
 
17
19
 
18
20
  @asynccontextmanager
19
21
  async def app_lifespan(_: FastAPI) -> AsyncIterator[None]:
20
- """Manage application lifespan for auto-indexing."""
21
- global _auto_indexing_service # noqa: PLW0603
22
- # Start auto-indexing service
22
+ """Manage application lifespan for auto-indexing and sync."""
23
+ global _auto_indexing_service, _sync_scheduler_service # noqa: PLW0603
24
+
23
25
  app_context = AppContext()
24
26
  db = await app_context.get_db()
27
+
28
+ # Start auto-indexing service
25
29
  _auto_indexing_service = AutoIndexingService(
26
30
  app_context=app_context,
27
31
  session_factory=db.session_factory,
28
32
  )
29
33
  await _auto_indexing_service.start_background_indexing()
34
+
35
+ # Start sync scheduler service
36
+ if app_context.sync.enabled:
37
+ _sync_scheduler_service = SyncSchedulerService(
38
+ app_context=app_context,
39
+ session_factory=db.session_factory,
40
+ )
41
+ _sync_scheduler_service.start_periodic_sync(
42
+ interval_seconds=app_context.sync.interval_seconds
43
+ )
44
+
30
45
  yield
46
+
47
+ # Stop services
48
+ if _sync_scheduler_service:
49
+ await _sync_scheduler_service.stop_periodic_sync()
31
50
  if _auto_indexing_service:
32
51
  await _auto_indexing_service.stop()
33
52
 
@@ -13,7 +13,7 @@ from kodit.domain.services.index_query_service import IndexQueryService
13
13
  from kodit.domain.services.index_service import (
14
14
  IndexDomainService,
15
15
  )
16
- from kodit.domain.value_objects import LanguageMapping, SnippetExtractionStrategy
16
+ from kodit.domain.value_objects import LanguageMapping
17
17
  from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
18
18
  from kodit.infrastructure.embedding.embedding_factory import (
19
19
  embedding_domain_service_factory,
@@ -31,15 +31,9 @@ from kodit.infrastructure.enrichment.null_enrichment_provider import (
31
31
  NullEnrichmentProvider,
32
32
  )
33
33
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
34
- from kodit.infrastructure.snippet_extraction.factories import (
35
- create_snippet_query_provider,
36
- )
37
- from kodit.infrastructure.snippet_extraction.language_detection_service import (
34
+ from kodit.infrastructure.slicing.language_detection_service import (
38
35
  FileSystemLanguageDetectionService,
39
36
  )
40
- from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
41
- TreeSitterSnippetExtractor,
42
- )
43
37
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
44
38
  SqlAlchemyEmbeddingRepository,
45
39
  )
@@ -63,17 +57,9 @@ def create_code_indexing_application_service(
63
57
 
64
58
  # Create infrastructure services
65
59
  language_detector = FileSystemLanguageDetectionService(language_map)
66
- query_provider = create_snippet_query_provider()
67
60
 
68
- # Create snippet extractors
69
- method_extractor = TreeSitterSnippetExtractor(query_provider)
70
-
71
- snippet_extractors = {
72
- SnippetExtractionStrategy.METHOD_BASED: method_extractor,
73
- }
74
61
  index_domain_service = IndexDomainService(
75
62
  language_detector=language_detector,
76
- snippet_extractors=snippet_extractors,
77
63
  enrichment_service=enrichment_service,
78
64
  clone_dir=app_context.get_clone_dir(),
79
65
  )
@@ -136,17 +122,9 @@ def create_fast_test_code_indexing_application_service(
136
122
 
137
123
  # Create infrastructure services
138
124
  language_detector = FileSystemLanguageDetectionService(language_map)
139
- query_provider = create_snippet_query_provider()
140
-
141
- # Create snippet extractors
142
- method_extractor = TreeSitterSnippetExtractor(query_provider)
143
125
 
144
- snippet_extractors = {
145
- SnippetExtractionStrategy.METHOD_BASED: method_extractor,
146
- }
147
126
  index_domain_service = IndexDomainService(
148
127
  language_detector=language_detector,
149
- snippet_extractors=snippet_extractors,
150
128
  enrichment_service=enrichment_service,
151
129
  clone_dir=app_context.get_clone_dir(),
152
130
  )
@@ -100,6 +100,11 @@ class CodeIndexingApplicationService:
100
100
  self.log.info("No new changes to index", index_id=index.id)
101
101
  return
102
102
 
103
+ # Delete the old snippets from the files that have changed
104
+ await self.index_repository.delete_snippets_by_file_ids(
105
+ [file.id for file in index.source.working_copy.changed_files() if file.id]
106
+ )
107
+
103
108
  # Extract and create snippets (domain service handles progress)
104
109
  self.log.info("Creating snippets for files", index_id=index.id)
105
110
  index = await self.index_domain_service.extract_snippets_from_index(
@@ -115,6 +120,9 @@ class CodeIndexingApplicationService:
115
120
  msg = f"Index {index.id} not found after snippet extraction"
116
121
  raise ValueError(msg)
117
122
  index = flushed_index
123
+ if len(index.snippets) == 0:
124
+ self.log.info("No snippets to index after extraction", index_id=index.id)
125
+ return
118
126
 
119
127
  # Create BM25 index
120
128
  self.log.info("Creating keyword index")
@@ -154,8 +162,8 @@ class CodeIndexingApplicationService:
154
162
  # Apply filters if provided
155
163
  filtered_snippet_ids: list[int] | None = None
156
164
  if request.filters:
157
- # Use domain service for filtering
158
- prefilter_request = replace(request, top_k=None)
165
+ # Use domain service for filtering (use large top_k for pre-filtering)
166
+ prefilter_request = replace(request, top_k=10000)
159
167
  snippet_results = await self.index_query_service.search_snippets(
160
168
  prefilter_request
161
169
  )
@@ -0,0 +1,128 @@
1
+ """Service for scheduling periodic sync operations."""
2
+
3
+ import asyncio
4
+ from collections.abc import Callable
5
+ from contextlib import suppress
6
+
7
+ import structlog
8
+ from sqlalchemy.ext.asyncio import AsyncSession
9
+
10
+ from kodit.application.factories.code_indexing_factory import (
11
+ create_code_indexing_application_service,
12
+ )
13
+ from kodit.config import AppContext
14
+ from kodit.domain.services.index_query_service import IndexQueryService
15
+ from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
16
+ from kodit.infrastructure.sqlalchemy.index_repository import SqlAlchemyIndexRepository
17
+
18
+
19
+ class SyncSchedulerService:
20
+ """Service for scheduling periodic sync operations."""
21
+
22
+ def __init__(
23
+ self,
24
+ app_context: AppContext,
25
+ session_factory: Callable[[], AsyncSession],
26
+ ) -> None:
27
+ """Initialize the sync scheduler service."""
28
+ self.app_context = app_context
29
+ self.session_factory = session_factory
30
+ self.log = structlog.get_logger(__name__)
31
+ self._sync_task: asyncio.Task | None = None
32
+ self._shutdown_event = asyncio.Event()
33
+
34
+ def start_periodic_sync(self, interval_seconds: float = 1800) -> None:
35
+ """Start periodic sync of all indexes."""
36
+ self.log.info("Starting periodic sync", interval_seconds=interval_seconds)
37
+
38
+ self._sync_task = asyncio.create_task(self._sync_loop(interval_seconds))
39
+
40
+ async def stop_periodic_sync(self) -> None:
41
+ """Stop the periodic sync task."""
42
+ self.log.info("Stopping periodic sync")
43
+ self._shutdown_event.set()
44
+
45
+ if self._sync_task and not self._sync_task.done():
46
+ self._sync_task.cancel()
47
+ with suppress(asyncio.CancelledError):
48
+ await self._sync_task
49
+
50
+ async def _sync_loop(self, interval_seconds: float) -> None:
51
+ """Run the sync loop at the specified interval."""
52
+ while not self._shutdown_event.is_set():
53
+ try:
54
+ await self._perform_sync()
55
+ except Exception as e:
56
+ self.log.exception("Sync operation failed", error=e)
57
+
58
+ # Wait for the interval or until shutdown
59
+ try:
60
+ await asyncio.wait_for(
61
+ self._shutdown_event.wait(), timeout=interval_seconds
62
+ )
63
+ # If we reach here, shutdown was requested
64
+ break
65
+ except TimeoutError:
66
+ # Continue to next sync cycle
67
+ continue
68
+
69
+ async def _perform_sync(self) -> None:
70
+ """Perform a sync operation on all indexes."""
71
+ self.log.info("Starting sync operation")
72
+
73
+ async with self.session_factory() as session:
74
+ # Create services
75
+ service = create_code_indexing_application_service(
76
+ app_context=self.app_context,
77
+ session=session,
78
+ )
79
+ index_query_service = IndexQueryService(
80
+ index_repository=SqlAlchemyIndexRepository(session=session),
81
+ fusion_service=ReciprocalRankFusionService(),
82
+ )
83
+
84
+ # Get all existing indexes
85
+ all_indexes = await index_query_service.list_indexes()
86
+
87
+ if not all_indexes:
88
+ self.log.info("No indexes found to sync")
89
+ return
90
+
91
+ self.log.info("Syncing indexes", count=len(all_indexes))
92
+
93
+ success_count = 0
94
+ failure_count = 0
95
+
96
+ # Sync each index
97
+ for index in all_indexes:
98
+ try:
99
+ self.log.info(
100
+ "Syncing index",
101
+ index_id=index.id,
102
+ source=str(index.source.working_copy.remote_uri),
103
+ )
104
+
105
+ await service.run_index(index, progress_callback=None)
106
+ success_count += 1
107
+
108
+ self.log.info(
109
+ "Index sync completed",
110
+ index_id=index.id,
111
+ source=str(index.source.working_copy.remote_uri),
112
+ )
113
+
114
+ except Exception as e:
115
+ failure_count += 1
116
+ self.log.exception(
117
+ "Index sync failed",
118
+ index_id=index.id,
119
+ source=str(index.source.working_copy.remote_uri),
120
+ error=e,
121
+ )
122
+
123
+ self.log.info(
124
+ "Sync operation completed",
125
+ total=len(all_indexes),
126
+ success=success_count,
127
+ failures=failure_count,
128
+ )
kodit/cli.py CHANGED
@@ -63,11 +63,105 @@ def cli(
63
63
  ctx.obj = config
64
64
 
65
65
 
66
+ async def _handle_auto_index(
67
+ app_context: AppContext,
68
+ sources: list[str], # noqa: ARG001
69
+ ) -> list[str]:
70
+ """Handle auto-index option and return sources to process."""
71
+ log = structlog.get_logger(__name__)
72
+ log.info("Auto-indexing configuration", config=app_context.auto_indexing)
73
+ if not app_context.auto_indexing or not app_context.auto_indexing.sources:
74
+ click.echo("No auto-index sources configured.")
75
+ return []
76
+ auto_sources = app_context.auto_indexing.sources
77
+ click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
78
+ return [source.uri for source in auto_sources]
79
+
80
+
81
+ async def _handle_sync(
82
+ service: Any,
83
+ index_query_service: IndexQueryService,
84
+ sources: list[str],
85
+ ) -> None:
86
+ """Handle sync operation."""
87
+ log = structlog.get_logger(__name__)
88
+ log_event("kodit.cli.index.sync")
89
+
90
+ # Get all existing indexes
91
+ all_indexes = await index_query_service.list_indexes()
92
+
93
+ if not all_indexes:
94
+ click.echo("No existing indexes found to sync.")
95
+ return
96
+
97
+ # Filter indexes if specific sources are provided
98
+ indexes_to_sync = all_indexes
99
+ if sources:
100
+ # Filter indexes that match the provided sources
101
+ source_uris = set(sources)
102
+ indexes_to_sync = [
103
+ index for index in all_indexes
104
+ if str(index.source.working_copy.remote_uri) in source_uris
105
+ ]
106
+
107
+ if not indexes_to_sync:
108
+ click.echo(
109
+ f"No indexes found for the specified sources: {', '.join(sources)}"
110
+ )
111
+ return
112
+
113
+ click.echo(f"Syncing {len(indexes_to_sync)} indexes...")
114
+
115
+ # Sync each index
116
+ for index in indexes_to_sync:
117
+ click.echo(f"Syncing: {index.source.working_copy.remote_uri}")
118
+
119
+ # Create progress callback for this sync operation
120
+ progress_callback = create_multi_stage_progress_callback()
121
+
122
+ try:
123
+ await service.run_index(index, progress_callback)
124
+ click.echo(f"✓ Sync completed: {index.source.working_copy.remote_uri}")
125
+ except Exception as e:
126
+ log.exception("Sync failed", index_id=index.id, error=e)
127
+ click.echo(
128
+ f"✗ Sync failed: {index.source.working_copy.remote_uri} - {e}"
129
+ )
130
+
131
+
132
+ async def _handle_list_indexes(index_query_service: IndexQueryService) -> None:
133
+ """Handle listing all indexes."""
134
+ log_event("kodit.cli.index.list")
135
+ # No source specified, list all indexes
136
+ indexes = await index_query_service.list_indexes()
137
+ headers: list[str | Cell] = [
138
+ "ID",
139
+ "Created At",
140
+ "Updated At",
141
+ "Source",
142
+ "Num Snippets",
143
+ ]
144
+ data = [
145
+ [
146
+ index.id,
147
+ index.created_at,
148
+ index.updated_at,
149
+ index.source.working_copy.remote_uri,
150
+ len(index.source.working_copy.files),
151
+ ]
152
+ for index in indexes
153
+ ]
154
+ click.echo(Table(headers=headers, data=data))
155
+
156
+
66
157
  @cli.command()
67
158
  @click.argument("sources", nargs=-1)
68
159
  @click.option(
69
160
  "--auto-index", is_flag=True, help="Index all configured auto-index sources"
70
161
  )
162
+ @click.option(
163
+ "--sync", is_flag=True, help="Sync existing indexes with their remotes"
164
+ )
71
165
  @with_app_context
72
166
  @with_session
73
167
  async def index(
@@ -76,8 +170,9 @@ async def index(
76
170
  sources: list[str],
77
171
  *, # Force keyword-only arguments
78
172
  auto_index: bool,
173
+ sync: bool,
79
174
  ) -> None:
80
- """List indexes, or index data sources."""
175
+ """List indexes, index data sources, or sync existing indexes."""
81
176
  log = structlog.get_logger(__name__)
82
177
  service = create_code_indexing_application_service(
83
178
  app_context=app_context,
@@ -89,36 +184,16 @@ async def index(
89
184
  )
90
185
 
91
186
  if auto_index:
92
- log.info("Auto-indexing configuration", config=app_context.auto_indexing)
93
- if not app_context.auto_indexing or not app_context.auto_indexing.sources:
94
- click.echo("No auto-index sources configured.")
187
+ sources = await _handle_auto_index(app_context, sources)
188
+ if not sources:
95
189
  return
96
- auto_sources = app_context.auto_indexing.sources
97
- click.echo(f"Auto-indexing {len(auto_sources)} configured sources...")
98
- sources = [source.uri for source in auto_sources]
190
+
191
+ if sync:
192
+ await _handle_sync(service, index_query_service, sources)
193
+ return
99
194
 
100
195
  if not sources:
101
- log_event("kodit.cli.index.list")
102
- # No source specified, list all indexes
103
- indexes = await index_query_service.list_indexes()
104
- headers: list[str | Cell] = [
105
- "ID",
106
- "Created At",
107
- "Updated At",
108
- "Source",
109
- "Num Snippets",
110
- ]
111
- data = [
112
- [
113
- index.id,
114
- index.created_at,
115
- index.updated_at,
116
- index.source.working_copy.remote_uri,
117
- len(index.source.working_copy.files),
118
- ]
119
- for index in indexes
120
- ]
121
- click.echo(Table(headers=headers, data=data))
196
+ await _handle_list_indexes(index_query_service)
122
197
  return
123
198
  # Handle source indexing
124
199
  for source in sources:
kodit/config.py CHANGED
@@ -81,6 +81,18 @@ class AutoIndexingConfig(BaseModel):
81
81
  return v
82
82
 
83
83
 
84
+ class PeriodicSyncConfig(BaseModel):
85
+ """Configuration for periodic/scheduled syncing."""
86
+
87
+ enabled: bool = Field(default=True, description="Enable periodic sync")
88
+ interval_seconds: float = Field(
89
+ default=1800, description="Interval between automatic syncs in seconds"
90
+ )
91
+ retry_attempts: int = Field(
92
+ default=3, description="Number of retry attempts for failed syncs"
93
+ )
94
+
95
+
84
96
  class CustomAutoIndexingEnvSource(EnvSettingsSource):
85
97
  """Custom environment source for parsing AutoIndexingConfig."""
86
98
 
@@ -173,6 +185,9 @@ class AppContext(BaseSettings):
173
185
  auto_indexing: AutoIndexingConfig | None = Field(
174
186
  default=AutoIndexingConfig(), description="Auto-indexing configuration"
175
187
  )
188
+ periodic_sync: PeriodicSyncConfig = Field(
189
+ default=PeriodicSyncConfig(), description="Periodic sync configuration"
190
+ )
176
191
  _db: Database | None = None
177
192
 
178
193
  def model_post_init(self, _: Any) -> None:
@@ -1,7 +1,6 @@
1
1
  """Pure domain service for Index aggregate operations."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
- from collections.abc import Mapping
5
4
  from pathlib import Path
6
5
 
7
6
  import structlog
@@ -13,14 +12,13 @@ from kodit.domain.services.enrichment_service import EnrichmentDomainService
13
12
  from kodit.domain.value_objects import (
14
13
  EnrichmentIndexRequest,
15
14
  EnrichmentRequest,
16
- SnippetExtractionRequest,
17
- SnippetExtractionResult,
18
- SnippetExtractionStrategy,
15
+ LanguageMapping,
19
16
  )
20
17
  from kodit.infrastructure.cloning.git.working_copy import GitWorkingCopyProvider
21
18
  from kodit.infrastructure.cloning.metadata import FileMetadataExtractor
22
19
  from kodit.infrastructure.git.git_utils import is_valid_clone_target
23
20
  from kodit.infrastructure.ignore.ignore_pattern_provider import GitIgnorePatternProvider
21
+ from kodit.infrastructure.slicing.slicer import Slicer
24
22
  from kodit.reporting import Reporter
25
23
  from kodit.utils.path_utils import path_from_uri
26
24
 
@@ -33,14 +31,6 @@ class LanguageDetectionService(ABC):
33
31
  """Detect the programming language of a file."""
34
32
 
35
33
 
36
- class SnippetExtractor(ABC):
37
- """Abstract interface for snippet extraction."""
38
-
39
- @abstractmethod
40
- async def extract(self, file_path: Path, language: str) -> list[str]:
41
- """Extract snippets from a file."""
42
-
43
-
44
34
  class IndexDomainService:
45
35
  """Pure domain service for Index aggregate operations.
46
36
 
@@ -54,14 +44,12 @@ class IndexDomainService:
54
44
  def __init__(
55
45
  self,
56
46
  language_detector: LanguageDetectionService,
57
- snippet_extractors: Mapping[SnippetExtractionStrategy, SnippetExtractor],
58
47
  enrichment_service: EnrichmentDomainService,
59
48
  clone_dir: Path,
60
49
  ) -> None:
61
50
  """Initialize the index domain service."""
62
51
  self._clone_dir = clone_dir
63
52
  self._language_detector = language_detector
64
- self._snippet_extractors = snippet_extractors
65
53
  self._enrichment_service = enrichment_service
66
54
  self.log = structlog.get_logger(__name__)
67
55
 
@@ -99,7 +87,6 @@ class IndexDomainService:
99
87
  async def extract_snippets_from_index(
100
88
  self,
101
89
  index: domain_entities.Index,
102
- strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED,
103
90
  progress_callback: ProgressCallback | None = None,
104
91
  ) -> domain_entities.Index:
105
92
  """Extract code snippets from files in the index."""
@@ -109,46 +96,40 @@ class IndexDomainService:
109
96
  "Extracting snippets",
110
97
  index_id=index.id,
111
98
  file_count=file_count,
112
- strategy=strategy.value,
113
99
  )
114
100
 
115
101
  # Only create snippets for files that have been added or modified
116
102
  files = index.source.working_copy.changed_files()
117
103
  index.delete_snippets_for_files(files)
118
104
 
119
- reporter = Reporter(self.log, progress_callback)
120
- await reporter.start(
121
- "extract_snippets", len(files), "Extracting code snippets..."
122
- )
123
-
124
- new_snippets = []
125
- for i, domain_file in enumerate(files, 1):
105
+ # Create a set of languages to extract snippets for
106
+ extensions = {file.extension() for file in files}
107
+ languages = []
108
+ for ext in extensions:
126
109
  try:
127
- # Extract snippets from file
128
- request = SnippetExtractionRequest(
129
- file_path=domain_file.as_path(), strategy=strategy
130
- )
131
- result = await self._extract_snippets(request)
132
- for snippet_text in result.snippets:
133
- snippet = domain_entities.Snippet(
134
- derives_from=[domain_file],
135
- )
136
- snippet.add_original_content(snippet_text, result.language)
137
- new_snippets.append(snippet)
138
-
139
- except (OSError, ValueError) as e:
140
- self.log.debug(
141
- "Skipping file for snippet extraction",
142
- file_uri=str(domain_file.uri),
143
- error=str(e),
144
- )
110
+ languages.append(LanguageMapping.get_language_for_extension(ext))
111
+ except ValueError as e:
112
+ self.log.info("Skipping", error=str(e))
145
113
  continue
146
114
 
115
+ reporter = Reporter(self.log, progress_callback)
116
+ await reporter.start(
117
+ "extract_snippets",
118
+ len(files) * len(languages),
119
+ "Extracting code snippets...",
120
+ )
121
+ # Calculate snippets for each language
122
+ slicer = Slicer()
123
+ for i, language in enumerate(languages):
147
124
  await reporter.step(
148
- "extract_snippets", i, len(files), f"Processed {domain_file.uri.path}"
125
+ "extract_snippets",
126
+ len(files) * (i + 1),
127
+ len(files) * len(languages),
128
+ "Extracting code snippets...",
149
129
  )
130
+ s = slicer.extract_snippets(files, language=language)
131
+ index.snippets.extend(s)
150
132
 
151
- index.snippets.extend(new_snippets)
152
133
  await reporter.done("extract_snippets")
153
134
  return index
154
135
 
@@ -187,28 +168,6 @@ class IndexDomainService:
187
168
  await reporter.done("enrichment")
188
169
  return list(snippet_map.values())
189
170
 
190
- async def _extract_snippets(
191
- self, request: SnippetExtractionRequest
192
- ) -> SnippetExtractionResult:
193
- # Domain logic: validate file exists
194
- if not request.file_path.exists():
195
- raise ValueError(f"File does not exist: {request.file_path}")
196
-
197
- # Domain logic: detect language
198
- language = await self._language_detector.detect_language(request.file_path)
199
-
200
- # Domain logic: choose strategy and extractor
201
- if request.strategy not in self._snippet_extractors:
202
- raise ValueError(f"Unsupported extraction strategy: {request.strategy}")
203
-
204
- extractor = self._snippet_extractors[request.strategy]
205
- snippets = await extractor.extract(request.file_path, language)
206
-
207
- # Domain logic: filter out empty snippets
208
- filtered_snippets = [snippet for snippet in snippets if snippet.strip()]
209
-
210
- return SnippetExtractionResult(snippets=filtered_snippets, language=language)
211
-
212
171
  def sanitize_uri(
213
172
  self, uri_or_path_like: str
214
173
  ) -> tuple[AnyUrl, domain_entities.SourceType]:
@@ -297,7 +256,7 @@ class IndexDomainService:
297
256
  await metadata_extractor.extract(file_path=file_path)
298
257
  )
299
258
  except (OSError, ValueError) as e:
300
- self.log.info("Skipping file", file=str(file_path), error=str(e))
259
+ self.log.debug("Skipping file", file=str(file_path), error=str(e))
301
260
  continue
302
261
 
303
262
  # Finally check if there are any modified files
@@ -134,14 +134,6 @@ class SearchType(Enum):
134
134
  HYBRID = "hybrid"
135
135
 
136
136
 
137
- @dataclass
138
- class SnippetExtractionResult:
139
- """Domain model for snippet extraction result."""
140
-
141
- snippets: list[str]
142
- language: str
143
-
144
-
145
137
  @dataclass
146
138
  class Document:
147
139
  """Generic document model for indexing."""
@@ -640,20 +632,6 @@ class SnippetQuery(BaseModel):
640
632
  top_k: int = 10
641
633
 
642
634
 
643
- class SnippetExtractionStrategy(str, Enum):
644
- """Different strategies for extracting snippets from files."""
645
-
646
- METHOD_BASED = "method_based"
647
-
648
-
649
- @dataclass
650
- class SnippetExtractionRequest:
651
- """Domain model for snippet extraction request."""
652
-
653
- file_path: Path
654
- strategy: SnippetExtractionStrategy = SnippetExtractionStrategy.METHOD_BASED
655
-
656
-
657
635
  class FileProcessingStatus(IntEnum):
658
636
  """File processing status."""
659
637
 
@@ -661,3 +639,13 @@ class FileProcessingStatus(IntEnum):
661
639
  ADDED = 1
662
640
  MODIFIED = 2
663
641
  DELETED = 3
642
+
643
+
644
+ @dataclass
645
+ class FunctionDefinition:
646
+ """Cached function definition."""
647
+
648
+ name: str
649
+ qualified_name: str
650
+ start_byte: int
651
+ end_byte: int