kodit 0.3.9__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

kodit/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '0.3.9'
21
- __version_tuple__ = version_tuple = (0, 3, 9)
20
+ __version__ = version = '0.3.10'
21
+ __version_tuple__ = version_tuple = (0, 3, 10)
@@ -1,6 +1,7 @@
1
1
  """Pure domain service for Index aggregate operations."""
2
2
 
3
3
  from abc import ABC, abstractmethod
4
+ from collections import defaultdict
4
5
  from pathlib import Path
5
6
 
6
7
  import structlog
@@ -104,30 +105,39 @@ class IndexDomainService:
104
105
 
105
106
  # Create a set of languages to extract snippets for
106
107
  extensions = {file.extension() for file in files}
107
- languages = []
108
+ lang_files_map: dict[str, list[domain_entities.File]] = defaultdict(list)
108
109
  for ext in extensions:
109
110
  try:
110
- languages.append(LanguageMapping.get_language_for_extension(ext))
111
+ lang = LanguageMapping.get_language_for_extension(ext)
112
+ lang_files_map[lang].extend(
113
+ file for file in files if file.extension() == ext
114
+ )
111
115
  except ValueError as e:
112
- self.log.info("Skipping", error=str(e))
116
+ self.log.debug("Skipping", error=str(e))
113
117
  continue
114
118
 
119
+ self.log.info(
120
+ "Languages to process",
121
+ languages=lang_files_map.keys(),
122
+ )
123
+
115
124
  reporter = Reporter(self.log, progress_callback)
116
125
  await reporter.start(
117
126
  "extract_snippets",
118
- len(files) * len(languages),
127
+ len(lang_files_map.keys()),
119
128
  "Extracting code snippets...",
120
129
  )
130
+
121
131
  # Calculate snippets for each language
122
132
  slicer = Slicer()
123
- for i, language in enumerate(languages):
133
+ for i, (lang, lang_files) in enumerate(lang_files_map.items()):
124
134
  await reporter.step(
125
135
  "extract_snippets",
126
- len(files) * (i + 1),
127
- len(files) * len(languages),
128
- "Extracting code snippets...",
136
+ i,
137
+ len(lang_files_map.keys()),
138
+ f"Extracting code snippets for {lang}...",
129
139
  )
130
- s = slicer.extract_snippets(files, language=language)
140
+ s = slicer.extract_snippets(lang_files, language=lang)
131
141
  index.snippets.extend(s)
132
142
 
133
143
  await reporter.done("extract_snippets")
@@ -10,10 +10,12 @@ from dataclasses import dataclass, field
10
10
  from pathlib import Path
11
11
  from typing import Any, ClassVar
12
12
 
13
+ import structlog
13
14
  from tree_sitter import Node, Parser, Tree
14
15
  from tree_sitter_language_pack import get_language
15
16
 
16
17
  from kodit.domain.entities import File, Snippet
18
+ from kodit.domain.value_objects import LanguageMapping
17
19
 
18
20
 
19
21
  @dataclass
@@ -145,8 +147,9 @@ class Slicer:
145
147
 
146
148
  def __init__(self) -> None:
147
149
  """Initialize an empty slicer."""
150
+ self.log = structlog.get_logger(__name__)
148
151
 
149
- def extract_snippets(
152
+ def extract_snippets( # noqa: C901
150
153
  self, files: list[File], language: str = "python"
151
154
  ) -> list[Snippet]:
152
155
  """Extract code snippets from a list of files.
@@ -170,6 +173,7 @@ class Slicer:
170
173
 
171
174
  # Get language configuration
172
175
  if language not in LanguageConfig.CONFIGS:
176
+ self.log.debug("Skipping", language=language)
173
177
  return []
174
178
 
175
179
  config = LanguageConfig.CONFIGS[language]
@@ -185,16 +189,20 @@ class Slicer:
185
189
  # Create mapping from Paths to File objects and extract paths
186
190
  path_to_file_map: dict[Path, File] = {}
187
191
  file_paths: list[Path] = []
188
-
189
192
  for file in files:
190
193
  file_path = file.as_path()
191
- path_to_file_map[file_path] = file
192
- file_paths.append(file_path)
194
+
195
+ # Validate file matches language
196
+ if not self._file_matches_language(file_path.suffix, language):
197
+ raise ValueError(f"File {file_path} does not match language {language}")
193
198
 
194
199
  # Validate file exists
195
200
  if not file_path.exists():
196
201
  raise FileNotFoundError(f"File not found: {file_path}")
197
202
 
203
+ path_to_file_map[file_path] = file
204
+ file_paths.append(file_path)
205
+
198
206
  # Initialize state
199
207
  state = AnalyzerState(parser=parser)
200
208
  state.files = file_paths
@@ -209,7 +217,7 @@ class Slicer:
209
217
  state.asts[file_path] = tree
210
218
  except OSError:
211
219
  # Skip files that can't be parsed
212
- pass
220
+ continue
213
221
 
214
222
  # Build indexes
215
223
  self._build_definition_and_import_indexes(state, config, language)
@@ -233,6 +241,19 @@ class Slicer:
233
241
 
234
242
  return snippets
235
243
 
244
+ def _file_matches_language(self, file_extension: str, language: str) -> bool:
245
+ """Check if a file extension matches the current language."""
246
+ if language not in LanguageConfig.CONFIGS:
247
+ return False
248
+
249
+ try:
250
+ return (
251
+ language == LanguageMapping.get_language_for_extension(file_extension)
252
+ )
253
+ except ValueError:
254
+ # Extension not supported, so it doesn't match any language
255
+ return False
256
+
236
257
  def _get_tree_sitter_language_name(self, language: str) -> str:
237
258
  """Map user language names to tree-sitter language names."""
238
259
  mapping = {
@@ -299,19 +320,23 @@ class Slicer:
299
320
 
300
321
  def _walk_tree(self, node: Node) -> Generator[Node, None, None]:
301
322
  """Walk the AST tree, yielding all nodes."""
302
- cursor = node.walk()
303
-
304
- def _walk_recursive() -> Generator[Node, None, None]:
305
- current_node = cursor.node
306
- if current_node is not None:
307
- yield current_node
308
- if cursor.goto_first_child():
309
- yield from _walk_recursive()
310
- while cursor.goto_next_sibling():
311
- yield from _walk_recursive()
312
- cursor.goto_parent()
313
-
314
- yield from _walk_recursive()
323
+ # Use a simple queue-based approach to avoid recursion issues
324
+ queue = [node]
325
+ visited: set[int] = set() # Track by node id (memory address)
326
+
327
+ while queue:
328
+ current = queue.pop(0)
329
+
330
+ # Use node id (memory address) as unique identifier to avoid infinite loops
331
+ node_id = id(current)
332
+ if node_id in visited:
333
+ continue
334
+ visited.add(node_id)
335
+
336
+ yield current
337
+
338
+ # Add children to queue
339
+ queue.extend(current.children)
315
340
 
316
341
  def _is_function_definition(self, node: Node, config: dict[str, Any]) -> bool:
317
342
  """Check if node is a function definition."""
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kodit
3
- Version: 0.3.9
3
+ Version: 0.3.10
4
4
  Summary: Code indexing for better AI code generation
5
5
  Project-URL: Homepage, https://docs.helixml.tech/kodit/
6
6
  Project-URL: Documentation, https://docs.helixml.tech/kodit/
@@ -1,6 +1,6 @@
1
1
  kodit/.gitignore,sha256=ztkjgRwL9Uud1OEi36hGQeDGk3OLK1NfDEO8YqGYy8o,11
2
2
  kodit/__init__.py,sha256=aEKHYninUq1yh6jaNfvJBYg-6fenpN132nJt1UU6Jxs,59
3
- kodit/_version.py,sha256=2CFg1ZfWyy1OjlshL7TqqlWTw1jORYgWRqP2Fc0c4K0,511
3
+ kodit/_version.py,sha256=8f9qESpn_-snEACtTM18TNc6AEJLWU6rpXlL21ijVSc,513
4
4
  kodit/app.py,sha256=3_smkoioIQEYtRLIGHDtgGkmkP6Movd5CygQEMOStP8,3043
5
5
  kodit/cli.py,sha256=ZOS_VzCHGjJRZzZpaVR00QXSPIwRXPYu-pTrbEtlyR0,19328
6
6
  kodit/config.py,sha256=Il_eeyg7s83QF5lmiFB6qX6pmpiqCWncHtPgPcdA4xA,8063
@@ -26,7 +26,7 @@ kodit/domain/services/bm25_service.py,sha256=nsfTan3XtDwXuuAu1LUv-6Jukm6qFKVqqCV
26
26
  kodit/domain/services/embedding_service.py,sha256=7drYRC2kjg0WJmo06a2E9N0vDnwInUlBB96twjz2BT8,4526
27
27
  kodit/domain/services/enrichment_service.py,sha256=XsXg3nV-KN4rqtC7Zro_ZiZ6RSq-1eA1MG6IDzFGyBA,1316
28
28
  kodit/domain/services/index_query_service.py,sha256=02UWfyB_HoHUskunGuHeq5XwQLSWxGSK4OhvxcqIfY0,2022
29
- kodit/domain/services/index_service.py,sha256=ezVGbWdII25adri4_yyvsAF2eJOt4xmoHRDuS_-c6Ro,10810
29
+ kodit/domain/services/index_service.py,sha256=r6skJzN0Hp_lJNaUjQSpHSRETCHNnfmJWH4X6A2-rFE,11159
30
30
  kodit/infrastructure/__init__.py,sha256=HzEYIjoXnkz_i_MHO2e0sIVYweUcRnl2RpyBiTbMObU,28
31
31
  kodit/infrastructure/bm25/__init__.py,sha256=DmGbrEO34FOJy4e685BbyxLA7gPW1eqs2gAxsp6JOuM,34
32
32
  kodit/infrastructure/bm25/bm25_factory.py,sha256=I4eo7qRslnyXIRkBf-StZ5ga2Evrr5J5YFocTChFD3g,884
@@ -61,7 +61,7 @@ kodit/infrastructure/mappers/__init__.py,sha256=QPHOjNreXmBPPovZ6elnYFS0vD-IsmrG
61
61
  kodit/infrastructure/mappers/index_mapper.py,sha256=ZSfu8kjTaa8_UY0nTqr4b02NS3VrjqZYkduCN71AL2g,12743
62
62
  kodit/infrastructure/slicing/__init__.py,sha256=x7cjvHA9Ay2weUYE_dpdAaPaStp20M-4U2b5MLgT5KM,37
63
63
  kodit/infrastructure/slicing/language_detection_service.py,sha256=JGJXrq9bLyfnisWJXeP7y1jbZMmKAISdPBlRBCosUcE,684
64
- kodit/infrastructure/slicing/slicer.py,sha256=HGpxbMJWoDXhLLC7RJzWLAuE93zpoVW1MXzntJhueVo,34626
64
+ kodit/infrastructure/slicing/slicer.py,sha256=GOqJykd00waOTO1WJHyE5KUgJ2RLx2rOQ7M7T_u5LLg,35600
65
65
  kodit/infrastructure/sqlalchemy/__init__.py,sha256=UXPMSF_hgWaqr86cawRVqM8XdVNumQyyK5B8B97GnlA,33
66
66
  kodit/infrastructure/sqlalchemy/embedding_repository.py,sha256=dC2Wzj_zQiWExwfScE1LAGiiyxPyg0YepwyLOgDwcs4,7905
67
67
  kodit/infrastructure/sqlalchemy/entities.py,sha256=Dmh0z-dMI0wfMAPpf62kxU4md6NUH9P5Nx1QSTITOfg,5961
@@ -82,8 +82,8 @@ kodit/migrations/versions/__init__.py,sha256=9-lHzptItTzq_fomdIRBegQNm4Znx6pVjwD
82
82
  kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py,sha256=r7ukmJ_axXLAWewYx-F1fEmZ4JbtFd37i7cSb0tq3y0,1722
83
83
  kodit/utils/__init__.py,sha256=DPEB1i8evnLF4Ns3huuAYg-0pKBFKUFuiDzOKG9r-sw,33
84
84
  kodit/utils/path_utils.py,sha256=thK6YGGNvQThdBaCYCCeCvS1L8x-lwl3AoGht2jnjGw,1645
85
- kodit-0.3.9.dist-info/METADATA,sha256=3YOAFUcvQJJSNSOfZvhfp-VcAmQbwUjbZRwmz1qXekI,6973
86
- kodit-0.3.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
87
- kodit-0.3.9.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
88
- kodit-0.3.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
89
- kodit-0.3.9.dist-info/RECORD,,
85
+ kodit-0.3.10.dist-info/METADATA,sha256=SUpOyQI6dJQnd9Mza8Dml3A4zOeoM_XV6Q6cac2k3rw,6974
86
+ kodit-0.3.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
87
+ kodit-0.3.10.dist-info/entry_points.txt,sha256=hoTn-1aKyTItjnY91fnO-rV5uaWQLQ-Vi7V5et2IbHY,40
88
+ kodit-0.3.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
89
+ kodit-0.3.10.dist-info/RECORD,,
File without changes