kodit 0.3.8__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/domain/services/index_service.py +19 -9
- kodit/infrastructure/slicing/slicer.py +46 -21
- {kodit-0.3.8.dist-info → kodit-0.3.10.dist-info}/METADATA +1 -1
- {kodit-0.3.8.dist-info → kodit-0.3.10.dist-info}/RECORD +8 -8
- {kodit-0.3.8.dist-info → kodit-0.3.10.dist-info}/WHEEL +0 -0
- {kodit-0.3.8.dist-info → kodit-0.3.10.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.8.dist-info → kodit-0.3.10.dist-info}/licenses/LICENSE +0 -0
kodit/_version.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Pure domain service for Index aggregate operations."""
|
|
2
2
|
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
|
+
from collections import defaultdict
|
|
4
5
|
from pathlib import Path
|
|
5
6
|
|
|
6
7
|
import structlog
|
|
@@ -104,30 +105,39 @@ class IndexDomainService:
|
|
|
104
105
|
|
|
105
106
|
# Create a set of languages to extract snippets for
|
|
106
107
|
extensions = {file.extension() for file in files}
|
|
107
|
-
|
|
108
|
+
lang_files_map: dict[str, list[domain_entities.File]] = defaultdict(list)
|
|
108
109
|
for ext in extensions:
|
|
109
110
|
try:
|
|
110
|
-
|
|
111
|
+
lang = LanguageMapping.get_language_for_extension(ext)
|
|
112
|
+
lang_files_map[lang].extend(
|
|
113
|
+
file for file in files if file.extension() == ext
|
|
114
|
+
)
|
|
111
115
|
except ValueError as e:
|
|
112
|
-
self.log.
|
|
116
|
+
self.log.debug("Skipping", error=str(e))
|
|
113
117
|
continue
|
|
114
118
|
|
|
119
|
+
self.log.info(
|
|
120
|
+
"Languages to process",
|
|
121
|
+
languages=lang_files_map.keys(),
|
|
122
|
+
)
|
|
123
|
+
|
|
115
124
|
reporter = Reporter(self.log, progress_callback)
|
|
116
125
|
await reporter.start(
|
|
117
126
|
"extract_snippets",
|
|
118
|
-
len(
|
|
127
|
+
len(lang_files_map.keys()),
|
|
119
128
|
"Extracting code snippets...",
|
|
120
129
|
)
|
|
130
|
+
|
|
121
131
|
# Calculate snippets for each language
|
|
122
132
|
slicer = Slicer()
|
|
123
|
-
for i,
|
|
133
|
+
for i, (lang, lang_files) in enumerate(lang_files_map.items()):
|
|
124
134
|
await reporter.step(
|
|
125
135
|
"extract_snippets",
|
|
126
|
-
|
|
127
|
-
len(
|
|
128
|
-
"Extracting code snippets...",
|
|
136
|
+
i,
|
|
137
|
+
len(lang_files_map.keys()),
|
|
138
|
+
f"Extracting code snippets for {lang}...",
|
|
129
139
|
)
|
|
130
|
-
s = slicer.extract_snippets(
|
|
140
|
+
s = slicer.extract_snippets(lang_files, language=lang)
|
|
131
141
|
index.snippets.extend(s)
|
|
132
142
|
|
|
133
143
|
await reporter.done("extract_snippets")
|
|
@@ -10,10 +10,12 @@ from dataclasses import dataclass, field
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from typing import Any, ClassVar
|
|
12
12
|
|
|
13
|
+
import structlog
|
|
13
14
|
from tree_sitter import Node, Parser, Tree
|
|
14
15
|
from tree_sitter_language_pack import get_language
|
|
15
16
|
|
|
16
17
|
from kodit.domain.entities import File, Snippet
|
|
18
|
+
from kodit.domain.value_objects import LanguageMapping
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
@dataclass
|
|
@@ -145,8 +147,9 @@ class Slicer:
|
|
|
145
147
|
|
|
146
148
|
def __init__(self) -> None:
|
|
147
149
|
"""Initialize an empty slicer."""
|
|
150
|
+
self.log = structlog.get_logger(__name__)
|
|
148
151
|
|
|
149
|
-
def extract_snippets(
|
|
152
|
+
def extract_snippets( # noqa: C901
|
|
150
153
|
self, files: list[File], language: str = "python"
|
|
151
154
|
) -> list[Snippet]:
|
|
152
155
|
"""Extract code snippets from a list of files.
|
|
@@ -170,6 +173,7 @@ class Slicer:
|
|
|
170
173
|
|
|
171
174
|
# Get language configuration
|
|
172
175
|
if language not in LanguageConfig.CONFIGS:
|
|
176
|
+
self.log.debug("Skipping", language=language)
|
|
173
177
|
return []
|
|
174
178
|
|
|
175
179
|
config = LanguageConfig.CONFIGS[language]
|
|
@@ -185,16 +189,20 @@ class Slicer:
|
|
|
185
189
|
# Create mapping from Paths to File objects and extract paths
|
|
186
190
|
path_to_file_map: dict[Path, File] = {}
|
|
187
191
|
file_paths: list[Path] = []
|
|
188
|
-
|
|
189
192
|
for file in files:
|
|
190
193
|
file_path = file.as_path()
|
|
191
|
-
|
|
192
|
-
|
|
194
|
+
|
|
195
|
+
# Validate file matches language
|
|
196
|
+
if not self._file_matches_language(file_path.suffix, language):
|
|
197
|
+
raise ValueError(f"File {file_path} does not match language {language}")
|
|
193
198
|
|
|
194
199
|
# Validate file exists
|
|
195
200
|
if not file_path.exists():
|
|
196
201
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
197
202
|
|
|
203
|
+
path_to_file_map[file_path] = file
|
|
204
|
+
file_paths.append(file_path)
|
|
205
|
+
|
|
198
206
|
# Initialize state
|
|
199
207
|
state = AnalyzerState(parser=parser)
|
|
200
208
|
state.files = file_paths
|
|
@@ -209,7 +217,7 @@ class Slicer:
|
|
|
209
217
|
state.asts[file_path] = tree
|
|
210
218
|
except OSError:
|
|
211
219
|
# Skip files that can't be parsed
|
|
212
|
-
|
|
220
|
+
continue
|
|
213
221
|
|
|
214
222
|
# Build indexes
|
|
215
223
|
self._build_definition_and_import_indexes(state, config, language)
|
|
@@ -233,6 +241,19 @@ class Slicer:
|
|
|
233
241
|
|
|
234
242
|
return snippets
|
|
235
243
|
|
|
244
|
+
def _file_matches_language(self, file_extension: str, language: str) -> bool:
|
|
245
|
+
"""Check if a file extension matches the current language."""
|
|
246
|
+
if language not in LanguageConfig.CONFIGS:
|
|
247
|
+
return False
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
return (
|
|
251
|
+
language == LanguageMapping.get_language_for_extension(file_extension)
|
|
252
|
+
)
|
|
253
|
+
except ValueError:
|
|
254
|
+
# Extension not supported, so it doesn't match any language
|
|
255
|
+
return False
|
|
256
|
+
|
|
236
257
|
def _get_tree_sitter_language_name(self, language: str) -> str:
|
|
237
258
|
"""Map user language names to tree-sitter language names."""
|
|
238
259
|
mapping = {
|
|
@@ -247,9 +268,9 @@ class Slicer:
|
|
|
247
268
|
"typescript": "typescript",
|
|
248
269
|
"js": "javascript",
|
|
249
270
|
"ts": "typescript",
|
|
250
|
-
"csharp": "
|
|
251
|
-
"c#": "
|
|
252
|
-
"cs": "
|
|
271
|
+
"csharp": "csharp",
|
|
272
|
+
"c#": "csharp",
|
|
273
|
+
"cs": "csharp",
|
|
253
274
|
"html": "html",
|
|
254
275
|
"css": "css",
|
|
255
276
|
}
|
|
@@ -299,19 +320,23 @@ class Slicer:
|
|
|
299
320
|
|
|
300
321
|
def _walk_tree(self, node: Node) -> Generator[Node, None, None]:
|
|
301
322
|
"""Walk the AST tree, yielding all nodes."""
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
323
|
+
# Use a simple queue-based approach to avoid recursion issues
|
|
324
|
+
queue = [node]
|
|
325
|
+
visited: set[int] = set() # Track by node id (memory address)
|
|
326
|
+
|
|
327
|
+
while queue:
|
|
328
|
+
current = queue.pop(0)
|
|
329
|
+
|
|
330
|
+
# Use node id (memory address) as unique identifier to avoid infinite loops
|
|
331
|
+
node_id = id(current)
|
|
332
|
+
if node_id in visited:
|
|
333
|
+
continue
|
|
334
|
+
visited.add(node_id)
|
|
335
|
+
|
|
336
|
+
yield current
|
|
337
|
+
|
|
338
|
+
# Add children to queue
|
|
339
|
+
queue.extend(current.children)
|
|
315
340
|
|
|
316
341
|
def _is_function_definition(self, node: Node, config: dict[str, Any]) -> bool:
|
|
317
342
|
"""Check if node is a function definition."""
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
|
|
2
2
|
kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
|
|
3
|
-
kodit/_version.py,sha256=
|
|
3
|
+
kodit/_version.py,sha256=8f9qESpn_-snEACtTM18TNc6AEJLWU6rpXlL21ijVSc,513
|
|
4
4
|
kodit/app.py,sha256=3_smkoioIQEYtRLIGHDtgGkmkP6Movd5CygQEMOStP8,3043
|
|
5
5
|
kodit/cli.py,sha256=ZOS_VzCHGjJRZzZpaVR00QXSPIwRXPYu-pTrbEtlyR0,19328
|
|
6
6
|
kodit/config.py,sha256=Il_eeyg7s83QF5lmiFB6qX6pmpiqCWncHtPgPcdA4xA,8063
|
|
@@ -26,7 +26,7 @@ kodit/domain/services/bm25_service.py,sha256=nsfTan3XtDwXuuAu1LUv-6Jukm6qFKVqqCV
|
|
|
26
26
|
kodit/domain/services/embedding_service.py,sha256=7drYRC2kjg0WJmo06a2E9N0vDnwInUlBB96twjz2BT8,4526
|
|
27
27
|
kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
|
|
28
28
|
kodit/domain/services/index_query_service.py,sha256=02UWfyB_HoHUskunGuHeq5XwQLSWxGSK4OhvxcqIfY0,2022
|
|
29
|
-
kodit/domain/services/index_service.py,sha256=
|
|
29
|
+
kodit/domain/services/index_service.py,sha256=r6skJzN0Hp_lJNaUjQSpHSRETCHNnfmJWH4X6A2-rFE,11159
|
|
30
30
|
kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
|
|
31
31
|
kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
|
|
32
32
|
kodit/infrastructure/bm25/bm25_factory.py,sha256=I4eo7qRslnyXIRkBf-StZ5ga2Evrr5J5YFocTChFD3g,884
|
|
@@ -61,7 +61,7 @@ kodit/infrastructure/mappers/__init__.py,sha256=QPHOjNreXmBPPovZ6elnYFS0vD-IsmrG
|
|
|
61
61
|
kodit/infrastructure/mappers/index_mapper.py,sha256=ZSfu8kjTaa8_UY0nTqr4b02NS3VrjqZYkduCN71AL2g,12743
|
|
62
62
|
kodit/infrastructure/slicing/__init__.py,sha256=x7cjvHA9Ay2weUYE_dpdAaPaStp20M-4U2b5MLgT5KM,37
|
|
63
63
|
kodit/infrastructure/slicing/language_detection_service.py,sha256=JGJXrq9bLyfnisWJXeP7y1jbZMmKAISdPBlRBCosUcE,684
|
|
64
|
-
kodit/infrastructure/slicing/slicer.py,sha256=
|
|
64
|
+
kodit/infrastructure/slicing/slicer.py,sha256=GOqJykd00waOTO1WJHyE5KUgJ2RLx2rOQ7M7T_u5LLg,35600
|
|
65
65
|
kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
|
|
66
66
|
kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=dC2Wzj_zQiWExwfScE1LAGiiyxPyg0YepwyLOgDwcs4,7905
|
|
67
67
|
kodit/infrastructure/sqlalchemy/entities.py,sha256=Dmh0z-dMI0wfMAPpf62kxU4md6NUH9P5Nx1QSTITOfg,5961
|
|
@@ -82,8 +82,8 @@ kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD
|
|
|
82
82
|
kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=r7ukmJ_axXLAWewYx-F1fEmZ4JbtFd37i7cSb0tq3y0,1722
|
|
83
83
|
kodit/utils/__init__.py,sha256=DPEB1i8evnLF4Ns3huuAYg-0pKBFKUFuiDzOKG9r-sw,33
|
|
84
84
|
kodit/utils/path_utils.py,sha256=thK6YGGNvQThdBaCYCCeCvS1L8x-lwl3AoGht2jnjGw,1645
|
|
85
|
-
kodit-0.3.
|
|
86
|
-
kodit-0.3.
|
|
87
|
-
kodit-0.3.
|
|
88
|
-
kodit-0.3.
|
|
89
|
-
kodit-0.3.
|
|
85
|
+
kodit-0.3.10.dist-info/METADATA,sha256=SUpOyQI6dJQnd9Mza8Dml3A4zOeoM_XV6Q6cac2k3rw,6974
|
|
86
|
+
kodit-0.3.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
87
|
+
kodit-0.3.10.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
|
|
88
|
+
kodit-0.3.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
89
|
+
kodit-0.3.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|