kodit 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (34) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/factories/__init__.py +1 -0
  3. kodit/application/factories/code_indexing_factory.py +119 -0
  4. kodit/application/services/{indexing_application_service.py → code_indexing_application_service.py} +159 -198
  5. kodit/cli.py +199 -62
  6. kodit/domain/entities.py +7 -5
  7. kodit/domain/repositories.py +33 -0
  8. kodit/domain/services/bm25_service.py +14 -17
  9. kodit/domain/services/embedding_service.py +10 -14
  10. kodit/domain/services/snippet_service.py +198 -0
  11. kodit/domain/value_objects.py +301 -21
  12. kodit/infrastructure/bm25/local_bm25_repository.py +20 -12
  13. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +31 -11
  14. kodit/infrastructure/cloning/metadata.py +1 -0
  15. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +14 -25
  16. kodit/infrastructure/embedding/local_vector_search_repository.py +26 -38
  17. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +50 -35
  18. kodit/infrastructure/enrichment/enrichment_factory.py +1 -1
  19. kodit/infrastructure/indexing/indexing_factory.py +8 -91
  20. kodit/infrastructure/indexing/snippet_domain_service_factory.py +37 -0
  21. kodit/infrastructure/snippet_extraction/languages/java.scm +12 -0
  22. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +3 -31
  23. kodit/infrastructure/sqlalchemy/embedding_repository.py +14 -3
  24. kodit/infrastructure/sqlalchemy/snippet_repository.py +174 -2
  25. kodit/mcp.py +61 -49
  26. {kodit-0.2.8.dist-info → kodit-0.2.9.dist-info}/METADATA +1 -1
  27. {kodit-0.2.8.dist-info → kodit-0.2.9.dist-info}/RECORD +30 -29
  28. kodit/application/commands/__init__.py +0 -1
  29. kodit/application/commands/snippet_commands.py +0 -22
  30. kodit/application/services/snippet_application_service.py +0 -149
  31. kodit/infrastructure/enrichment/legacy_enrichment_models.py +0 -42
  32. {kodit-0.2.8.dist-info → kodit-0.2.9.dist-info}/WHEEL +0 -0
  33. {kodit-0.2.8.dist-info → kodit-0.2.9.dist-info}/entry_points.txt +0 -0
  34. {kodit-0.2.8.dist-info → kodit-0.2.9.dist-info}/licenses/LICENSE +0 -0
@@ -8,10 +8,10 @@ from sqlalchemy.ext.asyncio import AsyncSession
8
8
 
9
9
  from kodit.domain.services.bm25_service import BM25Repository
10
10
  from kodit.domain.value_objects import (
11
- BM25DeleteRequest,
12
- BM25IndexRequest,
13
- BM25SearchRequest,
14
- BM25SearchResult,
11
+ DeleteRequest,
12
+ IndexRequest,
13
+ SearchRequest,
14
+ SearchResult,
15
15
  )
16
16
 
17
17
  TABLE_NAME = "vectorchord_bm25_documents"
@@ -80,6 +80,17 @@ SEARCH_QUERY = f"""
80
80
  ORDER BY bm25_score
81
81
  LIMIT :limit
82
82
  """ # noqa: S608
83
+ SEARCH_QUERY_WITH_FILTER = f"""
84
+ SELECT
85
+ snippet_id,
86
+ embedding <&>
87
+ to_bm25query('{INDEX_NAME}', tokenize(:query_text, '{TOKENIZER_NAME}'))
88
+ AS bm25_score
89
+ FROM {TABLE_NAME}
90
+ WHERE snippet_id = ANY(:snippet_ids)
91
+ ORDER BY bm25_score
92
+ LIMIT :limit
93
+ """ # noqa: S608
83
94
  DELETE_QUERY = f"""
84
95
  DELETE FROM {TABLE_NAME}
85
96
  WHERE snippet_id IN :snippet_ids
@@ -146,7 +157,7 @@ class VectorChordBM25Repository(BM25Repository):
146
157
  """Commit the session."""
147
158
  await self.__session.commit()
148
159
 
149
- async def index_documents(self, request: BM25IndexRequest) -> None:
160
+ async def index_documents(self, request: IndexRequest) -> None:
150
161
  """Index documents for BM25 search."""
151
162
  # Filter out any documents that don't have a snippet_id or text
152
163
  valid_documents = [
@@ -172,27 +183,36 @@ class VectorChordBM25Repository(BM25Repository):
172
183
  await self._execute(text(UPDATE_QUERY))
173
184
  await self._commit()
174
185
 
175
- async def search(self, request: BM25SearchRequest) -> list[BM25SearchResult]:
186
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
176
187
  """Search documents using BM25."""
177
188
  if not request.query or request.query == "":
178
189
  return []
179
190
 
180
- sql = text(SEARCH_QUERY).bindparams(
181
- query_text=request.query, limit=request.top_k
182
- )
191
+ if request.snippet_ids is not None:
192
+ sql = text(SEARCH_QUERY_WITH_FILTER).bindparams(
193
+ query_text=request.query,
194
+ limit=request.top_k,
195
+ snippet_ids=request.snippet_ids,
196
+ )
197
+ else:
198
+ sql = text(SEARCH_QUERY).bindparams(
199
+ query_text=request.query,
200
+ limit=request.top_k,
201
+ )
202
+
183
203
  try:
184
204
  result = await self._execute(sql)
185
205
  rows = result.mappings().all()
186
206
 
187
207
  return [
188
- BM25SearchResult(snippet_id=row["snippet_id"], score=row["bm25_score"])
208
+ SearchResult(snippet_id=row["snippet_id"], score=row["bm25_score"])
189
209
  for row in rows
190
210
  ]
191
211
  except Exception as e:
192
212
  msg = f"Error during BM25 search: {e}"
193
213
  raise RuntimeError(msg) from e
194
214
 
195
- async def delete_documents(self, request: BM25DeleteRequest) -> None:
215
+ async def delete_documents(self, request: DeleteRequest) -> None:
196
216
  """Delete documents from the index."""
197
217
  await self._execute(
198
218
  text(DELETE_QUERY).bindparams(bindparam("snippet_ids", expanding=True)),
@@ -38,6 +38,7 @@ class BaseFileMetadataExtractor:
38
38
  uri=path.as_uri(),
39
39
  sha256=sha,
40
40
  size_bytes=len(content),
41
+ extension=path.suffix.removeprefix(".").lower(),
41
42
  )
42
43
 
43
44
  async def _get_timestamps(
@@ -26,40 +26,29 @@ class HashEmbeddingProvider(EmbeddingProvider):
26
26
  self.embedding_size = embedding_size
27
27
  self.log = structlog.get_logger(__name__)
28
28
 
29
- def embed(
29
+ async def embed(
30
30
  self, data: list[EmbeddingRequest]
31
31
  ) -> AsyncGenerator[list[EmbeddingResponse], None]:
32
32
  """Embed a list of strings using a simple hash-based approach."""
33
33
  if not data:
34
-
35
- async def empty_generator() -> AsyncGenerator[
36
- list[EmbeddingResponse], None
37
- ]:
38
- if False:
39
- yield []
40
-
41
- return empty_generator()
34
+ yield []
42
35
 
43
36
  # Process in batches
44
37
  batch_size = 10
45
-
46
- async def _embed_batches() -> AsyncGenerator[list[EmbeddingResponse], None]:
47
- for i in range(0, len(data), batch_size):
48
- batch = data[i : i + batch_size]
49
- responses = []
50
-
51
- for request in batch:
52
- # Generate a deterministic embedding based on the text
53
- embedding = self._generate_embedding(request.text)
54
- responses.append(
55
- EmbeddingResponse(
56
- snippet_id=request.snippet_id, embedding=embedding
57
- )
38
+ for i in range(0, len(data), batch_size):
39
+ batch = data[i : i + batch_size]
40
+ responses = []
41
+
42
+ for request in batch:
43
+ # Generate a deterministic embedding based on the text
44
+ embedding = self._generate_embedding(request.text)
45
+ responses.append(
46
+ EmbeddingResponse(
47
+ snippet_id=request.snippet_id, embedding=embedding
58
48
  )
49
+ )
59
50
 
60
- yield responses
61
-
62
- return _embed_batches()
51
+ yield responses
63
52
 
64
53
  def _generate_embedding(self, text: str) -> list[float]:
65
54
  """Generate a deterministic embedding for the given text."""
@@ -3,7 +3,6 @@
3
3
  from collections.abc import AsyncGenerator
4
4
 
5
5
  import structlog
6
- import tiktoken
7
6
 
8
7
  from kodit.domain.entities import Embedding, EmbeddingType
9
8
  from kodit.domain.services.embedding_service import (
@@ -12,10 +11,10 @@ from kodit.domain.services.embedding_service import (
12
11
  )
13
12
  from kodit.domain.value_objects import (
14
13
  EmbeddingRequest,
14
+ IndexRequest,
15
15
  IndexResult,
16
- VectorIndexRequest,
17
- VectorSearchQueryRequest,
18
- VectorSearchResult,
16
+ SearchRequest,
17
+ SearchResult,
19
18
  )
20
19
  from kodit.infrastructure.sqlalchemy.embedding_repository import (
21
20
  SqlAlchemyEmbeddingRepository,
@@ -27,35 +26,29 @@ class LocalVectorSearchRepository(VectorSearchRepository):
27
26
 
28
27
  def __init__(
29
28
  self,
30
- embedding_repository: SqlAlchemyEmbeddingRepository,
31
29
  embedding_provider: EmbeddingProvider,
32
- embedding_type: EmbeddingType = EmbeddingType.CODE,
30
+ embedding_repository: SqlAlchemyEmbeddingRepository,
31
+ embedding_type: EmbeddingType,
33
32
  ) -> None:
34
33
  """Initialize the local vector search repository.
35
34
 
36
35
  Args:
37
- embedding_repository: The SQLAlchemy embedding repository
38
36
  embedding_provider: The embedding provider for generating embeddings
37
+ embedding_repository: The embedding repository for persistence
39
38
  embedding_type: The type of embedding to use
40
39
 
41
40
  """
42
- self.log = structlog.get_logger(__name__)
43
- self.embedding_repository = embedding_repository
44
41
  self.embedding_provider = embedding_provider
45
- self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
42
+ self.embedding_repository = embedding_repository
46
43
  self.embedding_type = embedding_type
44
+ self.log = structlog.get_logger(__name__)
47
45
 
48
- def index_documents(
49
- self, request: VectorIndexRequest
46
+ async def index_documents(
47
+ self, request: IndexRequest
50
48
  ) -> AsyncGenerator[list[IndexResult], None]:
51
49
  """Index documents for vector search."""
52
- if not request.documents:
53
-
54
- async def empty_generator() -> AsyncGenerator[list[IndexResult], None]:
55
- if False:
56
- yield []
57
-
58
- return empty_generator()
50
+ if not request.documents or len(request.documents) == 0:
51
+ yield []
59
52
 
60
53
  # Convert to embedding requests
61
54
  requests = [
@@ -63,25 +56,20 @@ class LocalVectorSearchRepository(VectorSearchRepository):
63
56
  for doc in request.documents
64
57
  ]
65
58
 
66
- async def _index_batches() -> AsyncGenerator[list[IndexResult], None]:
67
- async for batch in self.embedding_provider.embed(requests):
68
- results = []
69
- for result in batch:
70
- await self.embedding_repository.create_embedding(
71
- Embedding(
72
- snippet_id=result.snippet_id,
73
- embedding=result.embedding,
74
- type=self.embedding_type,
75
- )
59
+ async for batch in self.embedding_provider.embed(requests):
60
+ results = []
61
+ for result in batch:
62
+ await self.embedding_repository.create_embedding(
63
+ Embedding(
64
+ snippet_id=result.snippet_id,
65
+ embedding=result.embedding,
66
+ type=self.embedding_type,
76
67
  )
77
- results.append(IndexResult(snippet_id=result.snippet_id))
78
- yield results
79
-
80
- return _index_batches()
68
+ )
69
+ results.append(IndexResult(snippet_id=result.snippet_id))
70
+ yield results
81
71
 
82
- async def search(
83
- self, request: VectorSearchQueryRequest
84
- ) -> list[VectorSearchResult]:
72
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
85
73
  """Search documents using vector similarity."""
86
74
  # Build a single-item request and collect its embedding
87
75
  req = EmbeddingRequest(snippet_id=0, text=request.query)
@@ -95,10 +83,10 @@ class LocalVectorSearchRepository(VectorSearchRepository):
95
83
  return []
96
84
 
97
85
  results = await self.embedding_repository.list_semantic_results(
98
- self.embedding_type, embedding_vec, request.top_k
86
+ self.embedding_type, embedding_vec, request.top_k, request.snippet_ids
99
87
  )
100
88
  return [
101
- VectorSearchResult(snippet_id=snippet_id, score=score)
89
+ SearchResult(snippet_id=snippet_id, score=score)
102
90
  for snippet_id, score in results
103
91
  ]
104
92
 
@@ -14,10 +14,10 @@ from kodit.domain.services.embedding_service import (
14
14
  )
15
15
  from kodit.domain.value_objects import (
16
16
  EmbeddingRequest,
17
+ IndexRequest,
17
18
  IndexResult,
18
- VectorIndexRequest,
19
- VectorSearchQueryRequest,
20
- VectorSearchResult,
19
+ SearchRequest,
20
+ SearchResult,
21
21
  )
22
22
 
23
23
  # SQL Queries
@@ -59,6 +59,15 @@ ORDER BY score ASC
59
59
  LIMIT :top_k;
60
60
  """
61
61
 
62
+ # Filtered search query with snippet_ids
63
+ SEARCH_QUERY_WITH_FILTER = """
64
+ SELECT snippet_id, embedding <=> :query as score
65
+ FROM {TABLE_NAME}
66
+ WHERE snippet_id = ANY(:snippet_ids)
67
+ ORDER BY score ASC
68
+ LIMIT :top_k;
69
+ """
70
+
62
71
  CHECK_VCHORD_EMBEDDING_EXISTS = """
63
72
  SELECT EXISTS(SELECT 1 FROM {TABLE_NAME} WHERE snippet_id = :snippet_id)
64
73
  """
@@ -156,17 +165,12 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
156
165
  """Commit the session."""
157
166
  await self._session.commit()
158
167
 
159
- def index_documents(
160
- self, request: VectorIndexRequest
168
+ async def index_documents(
169
+ self, request: IndexRequest
161
170
  ) -> AsyncGenerator[list[IndexResult], None]:
162
171
  """Index documents for vector search."""
163
172
  if not request.documents:
164
-
165
- async def empty_generator() -> AsyncGenerator[list[IndexResult], None]:
166
- if False:
167
- yield []
168
-
169
- return empty_generator()
173
+ yield []
170
174
 
171
175
  # Convert to embedding requests
172
176
  requests = [
@@ -174,27 +178,25 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
174
178
  for doc in request.documents
175
179
  ]
176
180
 
177
- async def _index_batches() -> AsyncGenerator[list[IndexResult], None]:
178
- async for batch in self.embedding_provider.embed(requests):
179
- await self._execute(
180
- text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
181
- [
182
- {
183
- "snippet_id": result.snippet_id,
184
- "embedding": str(result.embedding),
185
- }
186
- for result in batch
187
- ],
188
- )
189
- await self._commit()
190
- yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
191
-
192
- return _index_batches()
181
+ async for batch in self.embedding_provider.embed(requests):
182
+ await self._execute(
183
+ text(INSERT_QUERY.format(TABLE_NAME=self.table_name)),
184
+ [
185
+ {
186
+ "snippet_id": result.snippet_id,
187
+ "embedding": str(result.embedding),
188
+ }
189
+ for result in batch
190
+ ],
191
+ )
192
+ await self._commit()
193
+ yield [IndexResult(snippet_id=result.snippet_id) for result in batch]
193
194
 
194
- async def search(
195
- self, request: VectorSearchQueryRequest
196
- ) -> list[VectorSearchResult]:
195
+ async def search(self, request: SearchRequest) -> list[SearchResult]:
197
196
  """Search documents using vector similarity."""
197
+ if not request.query or not request.query.strip():
198
+ return []
199
+
198
200
  req = EmbeddingRequest(snippet_id=0, text=request.query)
199
201
  embedding_vec: list[float] | None = None
200
202
  async for batch in self.embedding_provider.embed([req]):
@@ -204,14 +206,27 @@ class VectorChordVectorSearchRepository(VectorSearchRepository):
204
206
 
205
207
  if not embedding_vec:
206
208
  return []
207
- result = await self._execute(
208
- text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
209
- {"query": str(embedding_vec), "top_k": request.top_k},
210
- )
209
+
210
+ # Use filtered query if snippet_ids are provided
211
+ if request.snippet_ids is not None:
212
+ result = await self._execute(
213
+ text(SEARCH_QUERY_WITH_FILTER.format(TABLE_NAME=self.table_name)),
214
+ {
215
+ "query": str(embedding_vec),
216
+ "top_k": request.top_k,
217
+ "snippet_ids": request.snippet_ids,
218
+ },
219
+ )
220
+ else:
221
+ result = await self._execute(
222
+ text(SEARCH_QUERY.format(TABLE_NAME=self.table_name)),
223
+ {"query": str(embedding_vec), "top_k": request.top_k},
224
+ )
225
+
211
226
  rows = result.mappings().all()
212
227
 
213
228
  return [
214
- VectorSearchResult(snippet_id=row["snippet_id"], score=row["score"])
229
+ SearchResult(snippet_id=row["snippet_id"], score=row["score"])
215
230
  for row in rows
216
231
  ]
217
232
 
@@ -24,7 +24,7 @@ def _get_endpoint_configuration(app_context: AppContext) -> Endpoint | None:
24
24
  return app_context.enrichment_endpoint or app_context.default_endpoint or None
25
25
 
26
26
 
27
- def create_enrichment_domain_service(
27
+ def enrichment_domain_service_factory(
28
28
  app_context: AppContext,
29
29
  ) -> EnrichmentDomainService:
30
30
  """Create an enrichment domain service.
@@ -1,113 +1,30 @@
1
1
  """Factory for creating indexing services."""
2
2
 
3
- from typing import Any
4
-
5
3
  from sqlalchemy.ext.asyncio import AsyncSession
6
4
 
7
- from kodit.application.services.indexing_application_service import (
8
- IndexingApplicationService,
9
- )
10
- from kodit.application.services.snippet_application_service import (
11
- SnippetApplicationService,
12
- )
13
- from kodit.domain.services.bm25_service import BM25DomainService
14
5
  from kodit.domain.services.indexing_service import IndexingDomainService
15
- from kodit.domain.services.source_service import SourceService
16
- from kodit.infrastructure.bm25.bm25_factory import bm25_repository_factory
17
- from kodit.infrastructure.embedding.embedding_factory import (
18
- embedding_domain_service_factory,
19
- )
20
- from kodit.infrastructure.enrichment.enrichment_factory import (
21
- create_enrichment_domain_service,
22
- )
23
6
  from kodit.infrastructure.indexing.fusion_service import ReciprocalRankFusionService
24
7
  from kodit.infrastructure.indexing.index_repository import SQLAlchemyIndexRepository
25
- from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
26
- create_snippet_extraction_domain_service,
27
- )
28
- from kodit.infrastructure.sqlalchemy.file_repository import (
29
- SqlAlchemyFileRepository,
30
- )
31
- from kodit.infrastructure.sqlalchemy.snippet_repository import (
32
- SqlAlchemySnippetRepository,
33
- )
34
-
35
-
36
- def create_snippet_application_service(
37
- session: AsyncSession,
38
- ) -> SnippetApplicationService:
39
- """Create a snippet application service with all dependencies."""
40
- # Create domain service
41
- snippet_extraction_service = create_snippet_extraction_domain_service()
42
-
43
- # Create repositories
44
- snippet_repository = SqlAlchemySnippetRepository(session)
45
- file_repository = SqlAlchemyFileRepository(session)
46
8
 
47
- # Create application service
48
- from kodit.application.services.snippet_application_service import (
49
- SnippetApplicationService,
50
- )
51
-
52
- return SnippetApplicationService(
53
- snippet_extraction_service=snippet_extraction_service,
54
- snippet_repository=snippet_repository,
55
- file_repository=file_repository,
56
- session=session,
57
- )
58
9
 
59
-
60
- def create_indexing_domain_service(session: AsyncSession) -> IndexingDomainService:
61
- """Create an indexing domain service.
10
+ def indexing_domain_service_factory(session: AsyncSession) -> IndexingDomainService:
11
+ """Create an indexing domain service with all dependencies.
62
12
 
63
13
  Args:
64
- session: The database session.
14
+ session: SQLAlchemy session
65
15
 
66
16
  Returns:
67
- An indexing domain service instance.
17
+ Configured indexing domain service
68
18
 
69
19
  """
20
+ # Create repositories
70
21
  index_repository = SQLAlchemyIndexRepository(session)
22
+
23
+ # Create fusion service
71
24
  fusion_service = ReciprocalRankFusionService()
72
25
 
26
+ # Create domain service
73
27
  return IndexingDomainService(
74
28
  index_repository=index_repository,
75
29
  fusion_service=fusion_service,
76
30
  )
77
-
78
-
79
- def create_indexing_application_service(
80
- app_context: Any,
81
- session: AsyncSession,
82
- source_service: SourceService,
83
- snippet_application_service: SnippetApplicationService,
84
- ) -> IndexingApplicationService:
85
- """Create an indexing application service.
86
-
87
- Args:
88
- app_context: The application context.
89
- session: The database session.
90
- source_service: The source service.
91
- snippet_application_service: The snippet application service.
92
-
93
- Returns:
94
- An indexing application service instance.
95
-
96
- """
97
- # Create domain services
98
- indexing_domain_service = create_indexing_domain_service(session)
99
- bm25_service = BM25DomainService(bm25_repository_factory(app_context, session))
100
- code_search_service = embedding_domain_service_factory("code", app_context, session)
101
- text_search_service = embedding_domain_service_factory("text", app_context, session)
102
- enrichment_service = create_enrichment_domain_service(app_context)
103
-
104
- return IndexingApplicationService(
105
- indexing_domain_service=indexing_domain_service,
106
- source_service=source_service,
107
- bm25_service=bm25_service,
108
- code_search_service=code_search_service,
109
- text_search_service=text_search_service,
110
- enrichment_service=enrichment_service,
111
- snippet_application_service=snippet_application_service,
112
- session=session,
113
- )
@@ -0,0 +1,37 @@
1
+ """Factory for creating snippet domain service."""
2
+
3
+ from sqlalchemy.ext.asyncio import AsyncSession
4
+
5
+ from kodit.domain.services.snippet_service import SnippetDomainService
6
+ from kodit.infrastructure.snippet_extraction.snippet_extraction_factory import (
7
+ create_snippet_extraction_domain_service,
8
+ )
9
+ from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
10
+ from kodit.infrastructure.sqlalchemy.snippet_repository import (
11
+ SqlAlchemySnippetRepository,
12
+ )
13
+
14
+
15
+ def snippet_domain_service_factory(session: AsyncSession) -> SnippetDomainService:
16
+ """Create a snippet domain service with all dependencies.
17
+
18
+ Args:
19
+ session: The database session
20
+
21
+ Returns:
22
+ Configured snippet domain service
23
+
24
+ """
25
+ # Create domain service for snippet extraction
26
+ snippet_extraction_service = create_snippet_extraction_domain_service()
27
+
28
+ # Create repositories
29
+ snippet_repository = SqlAlchemySnippetRepository(session)
30
+ file_repository = SqlAlchemyFileRepository(session)
31
+
32
+ # Create and return the domain service
33
+ return SnippetDomainService(
34
+ snippet_extraction_service=snippet_extraction_service,
35
+ snippet_repository=snippet_repository,
36
+ file_repository=file_repository,
37
+ )
@@ -0,0 +1,12 @@
1
+ (import_declaration
2
+ (scoped_identifier) @import.name
3
+ )
4
+
5
+ (method_declaration
6
+ name: (identifier) @function.name
7
+ body: (block) @function.body
8
+ ) @function.def
9
+
10
+ (class_declaration
11
+ name: (identifier) @class.name
12
+ ) @class.def
@@ -9,6 +9,7 @@ from kodit.domain.repositories import FileRepository, SnippetRepository
9
9
  from kodit.domain.services.snippet_extraction_service import (
10
10
  SnippetExtractionDomainService,
11
11
  )
12
+ from kodit.domain.value_objects import LanguageMapping
12
13
  from kodit.infrastructure.snippet_extraction.language_detection_service import (
13
14
  FileSystemLanguageDetectionService,
14
15
  )
@@ -31,37 +32,8 @@ def create_snippet_extraction_domain_service() -> SnippetExtractionDomainService
31
32
  Configured snippet extraction domain service
32
33
 
33
34
  """
34
- # Language mapping from the existing languages module
35
- language_map = {
36
- # JavaScript/TypeScript
37
- "js": "javascript",
38
- "jsx": "javascript",
39
- "ts": "typescript",
40
- "tsx": "typescript",
41
- # Python
42
- "py": "python",
43
- # Rust
44
- "rs": "rust",
45
- # Go
46
- "go": "go",
47
- # C/C++
48
- "cpp": "cpp",
49
- "hpp": "cpp",
50
- "c": "c",
51
- "h": "c",
52
- # C#
53
- "cs": "csharp",
54
- # Ruby
55
- "rb": "ruby",
56
- # Java
57
- "java": "java",
58
- # PHP
59
- "php": "php",
60
- # Swift
61
- "swift": "swift",
62
- # Kotlin
63
- "kt": "kotlin",
64
- }
35
+ # Use the unified language mapping from the domain layer
36
+ language_map = LanguageMapping.get_extension_to_language_map()
65
37
 
66
38
  # Create infrastructure services
67
39
  language_detector = FileSystemLanguageDetectionService(language_map)