kodit 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (135) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +51 -23
  3. kodit/application/factories/reporting_factory.py +6 -2
  4. kodit/application/factories/server_factory.py +353 -0
  5. kodit/application/services/code_search_application_service.py +144 -0
  6. kodit/application/services/commit_indexing_application_service.py +700 -0
  7. kodit/application/services/indexing_worker_service.py +13 -44
  8. kodit/application/services/queue_service.py +24 -3
  9. kodit/application/services/reporting.py +0 -2
  10. kodit/application/services/sync_scheduler.py +15 -31
  11. kodit/cli.py +2 -753
  12. kodit/cli_utils.py +2 -9
  13. kodit/config.py +4 -97
  14. kodit/database.py +38 -1
  15. kodit/domain/enrichments/__init__.py +1 -0
  16. kodit/domain/enrichments/architecture/__init__.py +1 -0
  17. kodit/domain/enrichments/architecture/architecture.py +20 -0
  18. kodit/domain/enrichments/architecture/physical/__init__.py +1 -0
  19. kodit/domain/enrichments/architecture/physical/discovery_notes.py +14 -0
  20. kodit/domain/enrichments/architecture/physical/formatter.py +11 -0
  21. kodit/domain/enrichments/architecture/physical/physical.py +17 -0
  22. kodit/domain/enrichments/development/__init__.py +1 -0
  23. kodit/domain/enrichments/development/development.py +18 -0
  24. kodit/domain/enrichments/development/snippet/__init__.py +1 -0
  25. kodit/domain/enrichments/development/snippet/snippet.py +21 -0
  26. kodit/domain/enrichments/enricher.py +17 -0
  27. kodit/domain/enrichments/enrichment.py +39 -0
  28. kodit/domain/enrichments/request.py +12 -0
  29. kodit/domain/enrichments/response.py +11 -0
  30. kodit/domain/enrichments/usage/__init__.py +1 -0
  31. kodit/domain/enrichments/usage/api_docs.py +19 -0
  32. kodit/domain/enrichments/usage/usage.py +18 -0
  33. kodit/domain/{entities.py → entities/__init__.py} +50 -195
  34. kodit/domain/entities/git.py +190 -0
  35. kodit/domain/factories/__init__.py +1 -0
  36. kodit/domain/factories/git_repo_factory.py +76 -0
  37. kodit/domain/protocols.py +264 -64
  38. kodit/domain/services/bm25_service.py +5 -1
  39. kodit/domain/services/embedding_service.py +3 -0
  40. kodit/domain/services/enrichment_service.py +9 -30
  41. kodit/domain/services/git_repository_service.py +429 -0
  42. kodit/domain/services/git_service.py +300 -0
  43. kodit/domain/services/physical_architecture_service.py +182 -0
  44. kodit/domain/services/task_status_query_service.py +2 -2
  45. kodit/domain/value_objects.py +87 -135
  46. kodit/infrastructure/api/client/__init__.py +0 -2
  47. kodit/infrastructure/api/v1/__init__.py +0 -4
  48. kodit/infrastructure/api/v1/dependencies.py +92 -46
  49. kodit/infrastructure/api/v1/routers/__init__.py +0 -6
  50. kodit/infrastructure/api/v1/routers/commits.py +352 -0
  51. kodit/infrastructure/api/v1/routers/queue.py +2 -2
  52. kodit/infrastructure/api/v1/routers/repositories.py +282 -0
  53. kodit/infrastructure/api/v1/routers/search.py +31 -14
  54. kodit/infrastructure/api/v1/schemas/__init__.py +0 -24
  55. kodit/infrastructure/api/v1/schemas/commit.py +96 -0
  56. kodit/infrastructure/api/v1/schemas/context.py +2 -0
  57. kodit/infrastructure/api/v1/schemas/enrichment.py +29 -0
  58. kodit/infrastructure/api/v1/schemas/repository.py +128 -0
  59. kodit/infrastructure/api/v1/schemas/search.py +12 -9
  60. kodit/infrastructure/api/v1/schemas/snippet.py +58 -0
  61. kodit/infrastructure/api/v1/schemas/tag.py +31 -0
  62. kodit/infrastructure/api/v1/schemas/task_status.py +2 -0
  63. kodit/infrastructure/bm25/local_bm25_repository.py +16 -4
  64. kodit/infrastructure/bm25/vectorchord_bm25_repository.py +68 -52
  65. kodit/infrastructure/cloning/git/git_python_adaptor.py +534 -0
  66. kodit/infrastructure/cloning/git/working_copy.py +1 -1
  67. kodit/infrastructure/embedding/embedding_factory.py +3 -2
  68. kodit/infrastructure/embedding/local_vector_search_repository.py +1 -1
  69. kodit/infrastructure/embedding/vectorchord_vector_search_repository.py +111 -84
  70. kodit/infrastructure/enricher/__init__.py +1 -0
  71. kodit/infrastructure/enricher/enricher_factory.py +53 -0
  72. kodit/infrastructure/{enrichment/litellm_enrichment_provider.py → enricher/litellm_enricher.py} +36 -56
  73. kodit/infrastructure/{enrichment/local_enrichment_provider.py → enricher/local_enricher.py} +19 -24
  74. kodit/infrastructure/enricher/null_enricher.py +36 -0
  75. kodit/infrastructure/indexing/fusion_service.py +1 -1
  76. kodit/infrastructure/mappers/enrichment_mapper.py +83 -0
  77. kodit/infrastructure/mappers/git_mapper.py +193 -0
  78. kodit/infrastructure/mappers/snippet_mapper.py +104 -0
  79. kodit/infrastructure/mappers/task_mapper.py +5 -44
  80. kodit/infrastructure/physical_architecture/__init__.py +1 -0
  81. kodit/infrastructure/physical_architecture/detectors/__init__.py +1 -0
  82. kodit/infrastructure/physical_architecture/detectors/docker_compose_detector.py +336 -0
  83. kodit/infrastructure/physical_architecture/formatters/__init__.py +1 -0
  84. kodit/infrastructure/physical_architecture/formatters/narrative_formatter.py +149 -0
  85. kodit/infrastructure/reporting/log_progress.py +8 -5
  86. kodit/infrastructure/reporting/telemetry_progress.py +21 -0
  87. kodit/infrastructure/slicing/api_doc_extractor.py +836 -0
  88. kodit/infrastructure/slicing/ast_analyzer.py +1128 -0
  89. kodit/infrastructure/slicing/slicer.py +87 -421
  90. kodit/infrastructure/sqlalchemy/embedding_repository.py +43 -23
  91. kodit/infrastructure/sqlalchemy/enrichment_v2_repository.py +118 -0
  92. kodit/infrastructure/sqlalchemy/entities.py +402 -158
  93. kodit/infrastructure/sqlalchemy/git_branch_repository.py +274 -0
  94. kodit/infrastructure/sqlalchemy/git_commit_repository.py +346 -0
  95. kodit/infrastructure/sqlalchemy/git_repository.py +262 -0
  96. kodit/infrastructure/sqlalchemy/git_tag_repository.py +268 -0
  97. kodit/infrastructure/sqlalchemy/snippet_v2_repository.py +479 -0
  98. kodit/infrastructure/sqlalchemy/task_repository.py +29 -23
  99. kodit/infrastructure/sqlalchemy/task_status_repository.py +24 -12
  100. kodit/infrastructure/sqlalchemy/unit_of_work.py +10 -14
  101. kodit/mcp.py +12 -30
  102. kodit/migrations/env.py +1 -0
  103. kodit/migrations/versions/04b80f802e0c_foreign_key_review.py +100 -0
  104. kodit/migrations/versions/19f8c7faf8b9_add_generic_enrichment_type.py +260 -0
  105. kodit/migrations/versions/7f15f878c3a1_add_new_git_entities.py +690 -0
  106. kodit/migrations/versions/f9e5ef5e688f_add_git_commits_number.py +43 -0
  107. kodit/py.typed +0 -0
  108. kodit/utils/dump_config.py +361 -0
  109. kodit/utils/dump_openapi.py +6 -4
  110. kodit/utils/path_utils.py +29 -0
  111. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/METADATA +3 -3
  112. kodit-0.5.1.dist-info/RECORD +168 -0
  113. kodit/application/factories/code_indexing_factory.py +0 -195
  114. kodit/application/services/auto_indexing_service.py +0 -99
  115. kodit/application/services/code_indexing_application_service.py +0 -410
  116. kodit/domain/services/index_query_service.py +0 -70
  117. kodit/domain/services/index_service.py +0 -269
  118. kodit/infrastructure/api/client/index_client.py +0 -57
  119. kodit/infrastructure/api/v1/routers/indexes.py +0 -164
  120. kodit/infrastructure/api/v1/schemas/index.py +0 -101
  121. kodit/infrastructure/bm25/bm25_factory.py +0 -28
  122. kodit/infrastructure/cloning/__init__.py +0 -1
  123. kodit/infrastructure/cloning/metadata.py +0 -98
  124. kodit/infrastructure/enrichment/__init__.py +0 -1
  125. kodit/infrastructure/enrichment/enrichment_factory.py +0 -52
  126. kodit/infrastructure/enrichment/null_enrichment_provider.py +0 -19
  127. kodit/infrastructure/mappers/index_mapper.py +0 -345
  128. kodit/infrastructure/reporting/tdqm_progress.py +0 -38
  129. kodit/infrastructure/slicing/language_detection_service.py +0 -18
  130. kodit/infrastructure/sqlalchemy/index_repository.py +0 -646
  131. kodit-0.4.3.dist-info/RECORD +0 -125
  132. /kodit/infrastructure/{enrichment → enricher}/utils.py +0 -0
  133. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/WHEEL +0 -0
  134. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/entry_points.txt +0 -0
  135. {kodit-0.4.3.dist-info → kodit-0.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -8,14 +8,19 @@ from collections import defaultdict
8
8
  from collections.abc import Generator
9
9
  from dataclasses import dataclass, field
10
10
  from pathlib import Path
11
- from typing import Any, ClassVar
11
+ from typing import Any
12
12
 
13
13
  import structlog
14
14
  from tree_sitter import Node, Parser, Tree
15
- from tree_sitter_language_pack import get_language
16
15
 
17
- from kodit.domain.entities import File, Snippet
16
+ from kodit.domain.entities.git import GitFile, SnippetV2
18
17
  from kodit.domain.value_objects import LanguageMapping
18
+ from kodit.infrastructure.slicing.ast_analyzer import (
19
+ ASTAnalyzer,
20
+ FunctionDefinition,
21
+ LanguageConfig,
22
+ ParsedFile,
23
+ )
19
24
 
20
25
 
21
26
  @dataclass
@@ -43,105 +48,6 @@ class AnalyzerState:
43
48
  )
44
49
 
45
50
 
46
- class LanguageConfig:
47
- """Language-specific configuration."""
48
-
49
- CONFIGS: ClassVar[dict[str, dict[str, Any]]] = {
50
- "python": {
51
- "function_nodes": ["function_definition"],
52
- "method_nodes": [],
53
- "call_node": "call",
54
- "import_nodes": ["import_statement", "import_from_statement"],
55
- "extension": ".py",
56
- "name_field": None, # Use identifier child
57
- },
58
- "java": {
59
- "function_nodes": ["method_declaration"],
60
- "method_nodes": [],
61
- "call_node": "method_invocation",
62
- "import_nodes": ["import_declaration"],
63
- "extension": ".java",
64
- "name_field": None,
65
- },
66
- "c": {
67
- "function_nodes": ["function_definition"],
68
- "method_nodes": [],
69
- "call_node": "call_expression",
70
- "import_nodes": ["preproc_include"],
71
- "extension": ".c",
72
- "name_field": "declarator",
73
- },
74
- "cpp": {
75
- "function_nodes": ["function_definition"],
76
- "method_nodes": [],
77
- "call_node": "call_expression",
78
- "import_nodes": ["preproc_include", "using_declaration"],
79
- "extension": ".cpp",
80
- "name_field": "declarator",
81
- },
82
- "rust": {
83
- "function_nodes": ["function_item"],
84
- "method_nodes": [],
85
- "call_node": "call_expression",
86
- "import_nodes": ["use_declaration", "extern_crate_declaration"],
87
- "extension": ".rs",
88
- "name_field": "name",
89
- },
90
- "go": {
91
- "function_nodes": ["function_declaration"],
92
- "method_nodes": ["method_declaration"],
93
- "call_node": "call_expression",
94
- "import_nodes": ["import_declaration"],
95
- "extension": ".go",
96
- "name_field": None,
97
- },
98
- "javascript": {
99
- "function_nodes": [
100
- "function_declaration",
101
- "function_expression",
102
- "arrow_function",
103
- ],
104
- "method_nodes": [],
105
- "call_node": "call_expression",
106
- "import_nodes": ["import_statement", "import_declaration"],
107
- "extension": ".js",
108
- "name_field": None,
109
- },
110
- "csharp": {
111
- "function_nodes": ["method_declaration"],
112
- "method_nodes": ["constructor_declaration"],
113
- "call_node": "invocation_expression",
114
- "import_nodes": ["using_directive"],
115
- "extension": ".cs",
116
- "name_field": None,
117
- },
118
- "html": {
119
- "function_nodes": ["script_element", "style_element"],
120
- "method_nodes": ["element"], # Elements with id/class attributes
121
- "call_node": "attribute",
122
- "import_nodes": ["script_element", "element"], # script and link elements
123
- "extension": ".html",
124
- "name_field": None,
125
- },
126
- "css": {
127
- "function_nodes": ["rule_set", "keyframes_statement"],
128
- "method_nodes": ["media_statement"],
129
- "call_node": "call_expression",
130
- "import_nodes": ["import_statement"],
131
- "extension": ".css",
132
- "name_field": None,
133
- },
134
- }
135
-
136
- # Aliases
137
- CONFIGS["c++"] = CONFIGS["cpp"]
138
- CONFIGS["typescript"] = CONFIGS["javascript"]
139
- CONFIGS["ts"] = CONFIGS["javascript"]
140
- CONFIGS["js"] = CONFIGS["javascript"]
141
- CONFIGS["c#"] = CONFIGS["csharp"]
142
- CONFIGS["cs"] = CONFIGS["csharp"]
143
-
144
-
145
51
  class Slicer:
146
52
  """Slicer that extracts code snippets from files."""
147
53
 
@@ -149,9 +55,9 @@ class Slicer:
149
55
  """Initialize an empty slicer."""
150
56
  self.log = structlog.get_logger(__name__)
151
57
 
152
- def extract_snippets( # noqa: C901
153
- self, files: list[File], language: str = "python"
154
- ) -> list[Snippet]:
58
+ def extract_snippets_from_git_files(
59
+ self, files: list[GitFile], language: str = "python"
60
+ ) -> list[SnippetV2]:
155
61
  """Extract code snippets from a list of files.
156
62
 
157
63
  Args:
@@ -171,26 +77,17 @@ class Slicer:
171
77
 
172
78
  language = language.lower()
173
79
 
174
- # Get language configuration
175
- if language not in LanguageConfig.CONFIGS:
176
- self.log.debug("Skipping", language=language)
80
+ # Initialize ASTAnalyzer
81
+ try:
82
+ analyzer = ASTAnalyzer(language)
83
+ except ValueError:
84
+ self.log.debug("Skipping unsupported language", language=language)
177
85
  return []
178
86
 
179
- config = LanguageConfig.CONFIGS[language]
180
-
181
- # Initialize tree-sitter
182
- tree_sitter_name = self._get_tree_sitter_language_name(language)
183
- try:
184
- ts_language = get_language(tree_sitter_name) # type: ignore[arg-type]
185
- parser = Parser(ts_language)
186
- except Exception as e:
187
- raise RuntimeError(f"Failed to load {language} parser: {e}") from e
188
-
189
- # Create mapping from Paths to File objects and extract paths
190
- path_to_file_map: dict[Path, File] = {}
191
- file_paths: list[Path] = []
87
+ # Validate files
88
+ path_to_file_map: dict[Path, GitFile] = {}
192
89
  for file in files:
193
- file_path = file.as_path()
90
+ file_path = Path(file.path)
194
91
 
195
92
  # Validate file matches language
196
93
  if not self._file_matches_language(file_path.suffix, language):
@@ -201,31 +98,27 @@ class Slicer:
201
98
  raise FileNotFoundError(f"File not found: {file_path}")
202
99
 
203
100
  path_to_file_map[file_path] = file
204
- file_paths.append(file_path)
205
101
 
206
- # Initialize state
207
- state = AnalyzerState(parser=parser)
208
- state.files = file_paths
209
- file_contents: dict[Path, str] = {}
102
+ # Parse files and extract definitions using ASTAnalyzer
103
+ parsed_files = analyzer.parse_files(files)
104
+ if not parsed_files:
105
+ return []
210
106
 
211
- # Parse all files
212
- for file_path in file_paths:
213
- try:
214
- with file_path.open("rb") as f:
215
- source_code = f.read()
216
- tree = state.parser.parse(source_code)
217
- state.asts[file_path] = tree
218
- except OSError:
219
- # Skip files that can't be parsed
220
- continue
107
+ functions, _, _ = analyzer.extract_definitions(
108
+ parsed_files, include_private=True
109
+ )
221
110
 
222
- # Build indexes
223
- self._build_definition_and_import_indexes(state, config, language)
111
+ # Build state from ASTAnalyzer results
112
+ state = self._build_state_from_ast_analyzer(parsed_files, functions)
113
+ config = LanguageConfig.CONFIGS[language]
114
+
115
+ # Build call graph and snippets (Slicer-specific logic)
224
116
  self._build_call_graph(state, config)
225
117
  self._build_reverse_call_graph(state)
226
118
 
227
119
  # Extract snippets for all functions
228
- snippets = []
120
+ file_contents: dict[Path, str] = {}
121
+ snippets: list[SnippetV2] = []
229
122
  for qualified_name in state.def_index:
230
123
  snippet_content = self._get_snippet(
231
124
  qualified_name,
@@ -234,7 +127,7 @@ class Slicer:
234
127
  {"max_depth": 2, "max_functions": 8},
235
128
  )
236
129
  if "not found" not in snippet_content:
237
- snippet = self._create_snippet_entity(
130
+ snippet = self._create_snippet_entity_from_git_files(
238
131
  qualified_name, snippet_content, language, state, path_to_file_map
239
132
  )
240
133
  snippets.append(snippet)
@@ -247,62 +140,42 @@ class Slicer:
247
140
  return False
248
141
 
249
142
  try:
250
- return (
251
- language == LanguageMapping.get_language_for_extension(file_extension)
143
+ return language == LanguageMapping.get_language_for_extension(
144
+ file_extension
252
145
  )
253
146
  except ValueError:
254
147
  # Extension not supported, so it doesn't match any language
255
148
  return False
256
149
 
257
- def _get_tree_sitter_language_name(self, language: str) -> str:
258
- """Map user language names to tree-sitter language names."""
259
- mapping = {
260
- "c++": "cpp",
261
- "c": "c",
262
- "cpp": "cpp",
263
- "java": "java",
264
- "rust": "rust",
265
- "python": "python",
266
- "go": "go",
267
- "javascript": "javascript",
268
- "typescript": "typescript",
269
- "js": "javascript",
270
- "ts": "typescript",
271
- "csharp": "csharp",
272
- "c#": "csharp",
273
- "cs": "csharp",
274
- "html": "html",
275
- "css": "css",
276
- }
277
- return mapping.get(language, language)
278
-
279
- def _build_definition_and_import_indexes(
280
- self, state: AnalyzerState, config: dict[str, Any], language: str
281
- ) -> None:
282
- """Build definition and import indexes."""
283
- for file_path, tree in state.asts.items():
284
- # Build definition index
285
- for node in self._walk_tree(tree.root_node):
286
- if self._is_function_definition(node, config):
287
- qualified_name = self._qualify_name(
288
- node, file_path, config, language
289
- )
290
- if qualified_name:
291
- span = (node.start_byte, node.end_byte)
292
- state.def_index[qualified_name] = FunctionInfo(
293
- file=file_path,
294
- node=node,
295
- span=span,
296
- qualified_name=qualified_name,
297
- )
298
-
299
- # Build import map
300
- file_imports = {}
301
- for node in self._walk_tree(tree.root_node):
302
- if self._is_import_statement(node, config):
303
- imports = self._extract_imports(node)
304
- file_imports.update(imports)
305
- state.imports[file_path] = file_imports
150
+ def _build_state_from_ast_analyzer(
151
+ self,
152
+ parsed_files: list["ParsedFile"],
153
+ functions: list["FunctionDefinition"],
154
+ ) -> AnalyzerState:
155
+ """Build AnalyzerState from ASTAnalyzer results."""
156
+ # Create a dummy parser (not used for new parsing)
157
+ from tree_sitter_language_pack import get_language
158
+
159
+ ts_language = get_language("python")
160
+ parser = Parser(ts_language)
161
+
162
+ state = AnalyzerState(parser=parser)
163
+
164
+ # Populate files and ASTs from ParsedFile objects
165
+ for parsed in parsed_files:
166
+ state.files.append(parsed.path)
167
+ state.asts[parsed.path] = parsed.tree
168
+
169
+ # Populate def_index from FunctionDefinition objects
170
+ for func_def in functions:
171
+ state.def_index[func_def.qualified_name] = FunctionInfo(
172
+ file=func_def.file,
173
+ node=func_def.node,
174
+ span=func_def.span,
175
+ qualified_name=func_def.qualified_name,
176
+ )
177
+
178
+ return state
306
179
 
307
180
  def _build_call_graph(self, state: AnalyzerState, config: dict[str, Any]) -> None:
308
181
  """Build call graph from function definitions."""
@@ -338,214 +211,6 @@ class Slicer:
338
211
  # Add children to queue
339
212
  queue.extend(current.children)
340
213
 
341
- def _is_function_definition(self, node: Node, config: dict[str, Any]) -> bool:
342
- """Check if node is a function definition."""
343
- return node.type in (config["function_nodes"] + config["method_nodes"])
344
-
345
- def _is_import_statement(self, node: Node, config: dict[str, Any]) -> bool:
346
- """Check if node is an import statement."""
347
- return node.type in config["import_nodes"]
348
-
349
- def _extract_function_name(
350
- self, node: Node, config: dict[str, Any], language: str
351
- ) -> str | None:
352
- """Extract function name from a function definition node."""
353
- if language == "html":
354
- return self._extract_html_element_name(node)
355
- if language == "css":
356
- return self._extract_css_rule_name(node)
357
- if language == "go" and node.type == "method_declaration":
358
- return self._extract_go_method_name(node)
359
- if language in ["c", "cpp"] and config["name_field"]:
360
- return self._extract_c_cpp_function_name(node, config)
361
- if language == "rust" and config["name_field"]:
362
- return self._extract_rust_function_name(node, config)
363
- return self._extract_default_function_name(node)
364
-
365
- def _extract_go_method_name(self, node: Node) -> str | None:
366
- """Extract method name from Go method declaration."""
367
- for child in node.children:
368
- if child.type == "field_identifier" and child.text is not None:
369
- return child.text.decode("utf-8")
370
- return None
371
-
372
- def _extract_c_cpp_function_name(
373
- self, node: Node, config: dict[str, Any]
374
- ) -> str | None:
375
- """Extract function name from C/C++ function definition."""
376
- declarator = node.child_by_field_name(config["name_field"])
377
- if not declarator:
378
- return None
379
-
380
- if declarator.type == "function_declarator":
381
- for child in declarator.children:
382
- if child.type == "identifier" and child.text is not None:
383
- return child.text.decode("utf-8")
384
- elif declarator.type == "identifier" and declarator.text is not None:
385
- return declarator.text.decode("utf-8")
386
- return None
387
-
388
- def _extract_rust_function_name(
389
- self, node: Node, config: dict[str, Any]
390
- ) -> str | None:
391
- """Extract function name from Rust function definition."""
392
- name_node = node.child_by_field_name(config["name_field"])
393
- if name_node and name_node.type == "identifier" and name_node.text is not None:
394
- return name_node.text.decode("utf-8")
395
- return None
396
-
397
- def _extract_html_element_name(self, node: Node) -> str | None:
398
- """Extract meaningful name from HTML element."""
399
- if node.type == "script_element":
400
- return "script"
401
- if node.type == "style_element":
402
- return "style"
403
- if node.type == "element":
404
- return self._extract_html_element_info(node)
405
- return None
406
-
407
- def _extract_html_element_info(self, node: Node) -> str | None:
408
- """Extract element info with ID or class."""
409
- for child in node.children:
410
- if child.type == "start_tag":
411
- tag_name = self._get_tag_name(child)
412
- element_id = self._get_element_id(child)
413
- class_name = self._get_element_class(child)
414
-
415
- if element_id:
416
- return f"{tag_name or 'element'}#{element_id}"
417
- if class_name:
418
- return f"{tag_name or 'element'}.{class_name}"
419
- if tag_name:
420
- return tag_name
421
- return None
422
-
423
- def _get_tag_name(self, start_tag: Node) -> str | None:
424
- """Get tag name from start_tag node."""
425
- for child in start_tag.children:
426
- if child.type == "tag_name" and child.text:
427
- try:
428
- return child.text.decode("utf-8")
429
- except UnicodeDecodeError:
430
- return None
431
- return None
432
-
433
- def _get_element_id(self, start_tag: Node) -> str | None:
434
- """Get element ID from start_tag node."""
435
- return self._get_attribute_value(start_tag, "id")
436
-
437
- def _get_element_class(self, start_tag: Node) -> str | None:
438
- """Get first class name from start_tag node."""
439
- class_value = self._get_attribute_value(start_tag, "class")
440
- return class_value.split()[0] if class_value else None
441
-
442
- def _get_attribute_value(self, start_tag: Node, attr_name: str) -> str | None:
443
- """Get attribute value from start_tag node."""
444
- for child in start_tag.children:
445
- if child.type == "attribute":
446
- name = self._get_attr_name(child)
447
- if name == attr_name:
448
- return self._get_attr_value(child)
449
- return None
450
-
451
- def _get_attr_name(self, attr_node: Node) -> str | None:
452
- """Get attribute name."""
453
- for child in attr_node.children:
454
- if child.type == "attribute_name" and child.text:
455
- try:
456
- return child.text.decode("utf-8")
457
- except UnicodeDecodeError:
458
- return None
459
- return None
460
-
461
- def _get_attr_value(self, attr_node: Node) -> str | None:
462
- """Get attribute value."""
463
- for child in attr_node.children:
464
- if child.type == "quoted_attribute_value":
465
- for val_child in child.children:
466
- if val_child.type == "attribute_value" and val_child.text:
467
- try:
468
- return val_child.text.decode("utf-8")
469
- except UnicodeDecodeError:
470
- return None
471
- return None
472
-
473
- def _extract_css_rule_name(self, node: Node) -> str | None:
474
- """Extract meaningful name from CSS rule."""
475
- if node.type == "rule_set":
476
- return self._extract_css_selector(node)
477
- if node.type == "keyframes_statement":
478
- return self._extract_keyframes_name(node)
479
- if node.type == "media_statement":
480
- return "@media"
481
- return None
482
-
483
- def _extract_css_selector(self, rule_node: Node) -> str | None:
484
- """Extract CSS selector from rule_set."""
485
- for child in rule_node.children:
486
- if child.type == "selectors":
487
- selector_parts = []
488
- for selector_child in child.children:
489
- part = self._get_selector_part(selector_child)
490
- if part:
491
- selector_parts.append(part)
492
- if selector_parts:
493
- return "".join(selector_parts[:2]) # First couple selectors
494
- return None
495
-
496
- def _get_selector_part(self, selector_node: Node) -> str | None:
497
- """Get a single selector part."""
498
- if selector_node.type == "class_selector":
499
- return self._extract_class_selector(selector_node)
500
- if selector_node.type == "id_selector":
501
- return self._extract_id_selector(selector_node)
502
- if selector_node.type == "type_selector" and selector_node.text:
503
- return selector_node.text.decode("utf-8")
504
- return None
505
-
506
- def _extract_class_selector(self, node: Node) -> str | None:
507
- """Extract class selector name."""
508
- for child in node.children:
509
- if child.type == "class_name":
510
- for name_child in child.children:
511
- if name_child.type == "identifier" and name_child.text:
512
- return f".{name_child.text.decode('utf-8')}"
513
- return None
514
-
515
- def _extract_id_selector(self, node: Node) -> str | None:
516
- """Extract ID selector name."""
517
- for child in node.children:
518
- if child.type == "id_name":
519
- for name_child in child.children:
520
- if name_child.type == "identifier" and name_child.text:
521
- return f"#{name_child.text.decode('utf-8')}"
522
- return None
523
-
524
- def _extract_keyframes_name(self, node: Node) -> str | None:
525
- """Extract keyframes animation name."""
526
- for child in node.children:
527
- if child.type == "keyframes_name" and child.text:
528
- return f"@keyframes-{child.text.decode('utf-8')}"
529
- return None
530
-
531
- def _extract_default_function_name(self, node: Node) -> str | None:
532
- """Extract function name using default identifier search."""
533
- for child in node.children:
534
- if child.type == "identifier" and child.text is not None:
535
- return child.text.decode("utf-8")
536
- return None
537
-
538
- def _qualify_name(
539
- self, node: Node, file_path: Path, config: dict[str, Any], language: str
540
- ) -> str | None:
541
- """Create qualified name for a function node."""
542
- function_name = self._extract_function_name(node, config, language)
543
- if not function_name:
544
- return None
545
-
546
- module_name = file_path.stem
547
- return f"{module_name}.{function_name}"
548
-
549
214
  def _get_file_content(self, file_path: Path, file_contents: dict[Path, str]) -> str:
550
215
  """Get cached file content."""
551
216
  if file_path not in file_contents:
@@ -614,7 +279,8 @@ class Slicer:
614
279
  if callers:
615
280
  snippet_lines.append("")
616
281
  snippet_lines.append("# === USAGE EXAMPLES ===")
617
- for caller in list(callers)[:2]: # Show up to 2 examples
282
+ # Show up to 2 examples, sorted for deterministic order
283
+ for caller in sorted(callers)[:2]:
618
284
  call_line = self._find_function_call_line(
619
285
  caller, function_name, state, file_contents
620
286
  )
@@ -625,37 +291,37 @@ class Slicer:
625
291
 
626
292
  return "\n".join(snippet_lines)
627
293
 
628
- def _create_snippet_entity(
294
+ def _create_snippet_entity_from_git_files(
629
295
  self,
630
296
  qualified_name: str,
631
297
  snippet_content: str,
632
298
  language: str,
633
299
  state: AnalyzerState,
634
- path_to_file_map: dict[Path, File],
635
- ) -> Snippet:
300
+ path_to_file_map: dict[Path, GitFile],
301
+ ) -> SnippetV2:
636
302
  """Create a Snippet domain entity from extracted content."""
637
303
  # Determine all files that this snippet derives from
638
- derives_from_files = self._find_source_files_for_snippet(
304
+ derives_from_files = self._find_source_files_for_snippet_from_git_files(
639
305
  qualified_name, snippet_content, state, path_to_file_map
640
306
  )
641
307
 
642
308
  # Create the snippet entity
643
- snippet = Snippet(derives_from=derives_from_files)
644
-
645
- # Add the original content
646
- snippet.add_original_content(snippet_content, language)
647
-
648
- return snippet
309
+ return SnippetV2(
310
+ derives_from=derives_from_files,
311
+ content=snippet_content,
312
+ extension=language,
313
+ sha=SnippetV2.compute_sha(snippet_content),
314
+ )
649
315
 
650
- def _find_source_files_for_snippet(
316
+ def _find_source_files_for_snippet_from_git_files(
651
317
  self,
652
318
  qualified_name: str,
653
319
  snippet_content: str,
654
320
  state: AnalyzerState,
655
- path_to_file_map: dict[Path, File],
656
- ) -> list[File]:
321
+ path_to_file_map: dict[Path, GitFile],
322
+ ) -> list[GitFile]:
657
323
  """Find all source files that a snippet derives from."""
658
- source_files: list[File] = []
324
+ source_files: list[GitFile] = []
659
325
  source_file_paths: set[Path] = set()
660
326
 
661
327
  # Add the primary function's file
@@ -835,7 +501,7 @@ class Slicer:
835
501
  # Add direct dependencies
836
502
  to_visit.extend(
837
503
  (callee, depth + 1)
838
- for callee in state.call_graph.get(current, set())
504
+ for callee in sorted(state.call_graph.get(current, set()))
839
505
  if callee not in visited and callee in state.def_index
840
506
  )
841
507
 
@@ -850,26 +516,26 @@ class Slicer:
850
516
  in_degree: dict[str, int] = defaultdict(int)
851
517
  graph: dict[str, set[str]] = defaultdict(set)
852
518
 
853
- for func in functions:
854
- for callee in state.call_graph.get(func, set()):
519
+ for func in sorted(functions):
520
+ for callee in sorted(state.call_graph.get(func, set())):
855
521
  if callee in functions:
856
522
  graph[func].add(callee)
857
523
  in_degree[callee] += 1
858
524
 
859
525
  # Find roots
860
- queue = [f for f in functions if in_degree[f] == 0]
526
+ queue = [f for f in sorted(functions) if in_degree[f] == 0]
861
527
  result = []
862
528
 
863
529
  while queue:
864
530
  current = queue.pop(0)
865
531
  result.append(current)
866
- for neighbor in graph[current]:
532
+ for neighbor in sorted(graph[current]):
867
533
  in_degree[neighbor] -= 1
868
534
  if in_degree[neighbor] == 0:
869
535
  queue.append(neighbor)
870
536
 
871
537
  # Add any remaining (cycles)
872
- for func in functions:
538
+ for func in sorted(functions):
873
539
  if func not in result:
874
540
  result.append(func)
875
541