kodit 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kodit might be problematic. Click here for more details.
- kodit/_version.py +2 -2
- kodit/application/factories/code_indexing_factory.py +2 -24
- kodit/application/services/code_indexing_application_service.py +10 -2
- kodit/domain/services/index_service.py +25 -66
- kodit/domain/value_objects.py +10 -22
- kodit/infrastructure/slicing/__init__.py +1 -0
- kodit/infrastructure/slicing/language_detection_service.py +18 -0
- kodit/infrastructure/slicing/slicer.py +894 -0
- kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
- kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
- kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
- kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
- kodit/migrations/versions/85155663351e_initial.py +64 -48
- kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/METADATA +9 -4
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/RECORD +19 -29
- kodit/infrastructure/snippet_extraction/__init__.py +0 -1
- kodit/infrastructure/snippet_extraction/factories.py +0 -13
- kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
- kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
- kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
- kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
- kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
- kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
- kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
- kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
- kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
- kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/WHEEL +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/entry_points.txt +0 -0
- {kodit-0.3.3.dist-info → kodit-0.3.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
(function_definition
|
|
2
|
-
name: (identifier) @function.name
|
|
3
|
-
body: (block) @function.body
|
|
4
|
-
) @function.def
|
|
5
|
-
|
|
6
|
-
(class_definition
|
|
7
|
-
name: (identifier) @class.name
|
|
8
|
-
) @class.def
|
|
9
|
-
|
|
10
|
-
(import_statement
|
|
11
|
-
name: (dotted_name (identifier) @import.name))
|
|
12
|
-
|
|
13
|
-
(import_from_statement
|
|
14
|
-
module_name: (dotted_name (identifier) @import.from))
|
|
15
|
-
|
|
16
|
-
(identifier) @ident
|
|
17
|
-
|
|
18
|
-
(assignment
|
|
19
|
-
left: (identifier) @assignment.lhs)
|
|
20
|
-
|
|
21
|
-
(parameters
|
|
22
|
-
(identifier) @param.name)
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
(import_statement
|
|
2
|
-
(import_clause
|
|
3
|
-
(named_imports
|
|
4
|
-
(import_specifier
|
|
5
|
-
name: (identifier) @import.name
|
|
6
|
-
)
|
|
7
|
-
)
|
|
8
|
-
)
|
|
9
|
-
)
|
|
10
|
-
|
|
11
|
-
(variable_declarator
|
|
12
|
-
name: (identifier) @function.name
|
|
13
|
-
value: (arrow_function
|
|
14
|
-
body: (statement_block) @function.body
|
|
15
|
-
)
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
(class_declaration
|
|
19
|
-
name: (type_identifier) @class.name
|
|
20
|
-
) @class.def
|
|
21
|
-
|
|
22
|
-
(method_definition
|
|
23
|
-
name: (property_identifier) @function.name
|
|
24
|
-
body: (statement_block) @function.body
|
|
25
|
-
)
|
|
@@ -1,67 +0,0 @@
|
|
|
1
|
-
"""Factory for creating snippet extraction services."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
|
|
5
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
6
|
-
|
|
7
|
-
from kodit.domain.enums import SnippetExtractionStrategy
|
|
8
|
-
from kodit.domain.repositories import FileRepository, SnippetRepository
|
|
9
|
-
from kodit.domain.services.snippet_extraction_service import (
|
|
10
|
-
SnippetExtractionDomainService,
|
|
11
|
-
)
|
|
12
|
-
from kodit.domain.value_objects import LanguageMapping
|
|
13
|
-
from kodit.infrastructure.snippet_extraction.language_detection_service import (
|
|
14
|
-
FileSystemLanguageDetectionService,
|
|
15
|
-
)
|
|
16
|
-
from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
|
|
17
|
-
FileSystemSnippetQueryProvider,
|
|
18
|
-
)
|
|
19
|
-
from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
|
|
20
|
-
TreeSitterSnippetExtractor,
|
|
21
|
-
)
|
|
22
|
-
from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
|
|
23
|
-
from kodit.infrastructure.sqlalchemy.snippet_repository import (
|
|
24
|
-
SqlAlchemySnippetRepository,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def create_snippet_extraction_domain_service() -> SnippetExtractionDomainService:
|
|
29
|
-
"""Create a snippet extraction domain service with all dependencies.
|
|
30
|
-
|
|
31
|
-
Returns:
|
|
32
|
-
Configured snippet extraction domain service
|
|
33
|
-
|
|
34
|
-
"""
|
|
35
|
-
# Use the unified language mapping from the domain layer
|
|
36
|
-
language_map = LanguageMapping.get_extension_to_language_map()
|
|
37
|
-
|
|
38
|
-
# Create infrastructure services
|
|
39
|
-
language_detector = FileSystemLanguageDetectionService(language_map)
|
|
40
|
-
query_provider = FileSystemSnippetQueryProvider(Path(__file__).parent / "languages")
|
|
41
|
-
|
|
42
|
-
# Create snippet extractors
|
|
43
|
-
method_extractor = TreeSitterSnippetExtractor(query_provider)
|
|
44
|
-
|
|
45
|
-
snippet_extractors = {
|
|
46
|
-
SnippetExtractionStrategy.METHOD_BASED: method_extractor,
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
# Create domain service
|
|
50
|
-
return SnippetExtractionDomainService(language_detector, snippet_extractors)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
def create_snippet_repositories(
|
|
54
|
-
session: AsyncSession,
|
|
55
|
-
) -> tuple[SnippetRepository, FileRepository]:
|
|
56
|
-
"""Create snippet and file repositories.
|
|
57
|
-
|
|
58
|
-
Args:
|
|
59
|
-
session: SQLAlchemy session
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
Tuple of (snippet_repository, file_repository)
|
|
63
|
-
|
|
64
|
-
"""
|
|
65
|
-
snippet_repository = SqlAlchemySnippetRepository(session)
|
|
66
|
-
file_repository = SqlAlchemyFileRepository(session)
|
|
67
|
-
return snippet_repository, file_repository
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
"""Infrastructure implementation for loading snippet queries from files."""
|
|
2
|
-
|
|
3
|
-
from abc import ABC, abstractmethod
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
class SnippetQueryProvider(ABC):
|
|
8
|
-
"""Abstract interface for providing snippet queries."""
|
|
9
|
-
|
|
10
|
-
@abstractmethod
|
|
11
|
-
async def get_query(self, language: str) -> str:
|
|
12
|
-
"""Get the query for a specific language."""
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class FileSystemSnippetQueryProvider(SnippetQueryProvider):
|
|
16
|
-
"""Infrastructure implementation for loading snippet queries from files."""
|
|
17
|
-
|
|
18
|
-
def __init__(self, query_directory: Path) -> None:
|
|
19
|
-
"""Initialize the query provider.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
query_directory: Directory containing query files
|
|
23
|
-
|
|
24
|
-
"""
|
|
25
|
-
self.query_directory = query_directory
|
|
26
|
-
|
|
27
|
-
async def get_query(self, language: str) -> str:
|
|
28
|
-
"""Load query from file system.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
language: The programming language to get the query for
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
The query string for the language
|
|
35
|
-
|
|
36
|
-
Raises:
|
|
37
|
-
FileNotFoundError: If the query file doesn't exist
|
|
38
|
-
|
|
39
|
-
"""
|
|
40
|
-
query_path = self.query_directory / f"{language}.scm"
|
|
41
|
-
if not query_path.exists():
|
|
42
|
-
raise FileNotFoundError(f"Query file not found: {query_path}")
|
|
43
|
-
|
|
44
|
-
return query_path.read_text()
|
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
"""Infrastructure implementation using tree-sitter for method extraction."""
|
|
2
|
-
|
|
3
|
-
from pathlib import Path
|
|
4
|
-
from typing import cast
|
|
5
|
-
|
|
6
|
-
from tree_sitter import Node, Query
|
|
7
|
-
from tree_sitter_language_pack import SupportedLanguage, get_language, get_parser
|
|
8
|
-
|
|
9
|
-
from kodit.domain.services.index_service import SnippetExtractor
|
|
10
|
-
from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
|
|
11
|
-
SnippetQueryProvider,
|
|
12
|
-
)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class TreeSitterSnippetExtractor(SnippetExtractor):
|
|
16
|
-
"""Infrastructure implementation using tree-sitter for method extraction."""
|
|
17
|
-
|
|
18
|
-
def __init__(self, query_provider: SnippetQueryProvider) -> None:
|
|
19
|
-
"""Initialize the tree-sitter snippet extractor.
|
|
20
|
-
|
|
21
|
-
Args:
|
|
22
|
-
query_provider: Provider for snippet queries
|
|
23
|
-
|
|
24
|
-
"""
|
|
25
|
-
self.query_provider = query_provider
|
|
26
|
-
|
|
27
|
-
async def extract(self, file_path: Path, language: str) -> list[str]:
|
|
28
|
-
"""Extract snippets using tree-sitter parsing.
|
|
29
|
-
|
|
30
|
-
Args:
|
|
31
|
-
file_path: Path to the file to extract snippets from
|
|
32
|
-
language: The programming language of the file
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
List of extracted code snippets
|
|
36
|
-
|
|
37
|
-
Raises:
|
|
38
|
-
ValueError: If the file cannot be read or language is not supported
|
|
39
|
-
|
|
40
|
-
"""
|
|
41
|
-
try:
|
|
42
|
-
# Get the query for the language
|
|
43
|
-
query = await self.query_provider.get_query(language)
|
|
44
|
-
except FileNotFoundError as e:
|
|
45
|
-
raise ValueError(f"Unsupported language: {file_path}") from e
|
|
46
|
-
|
|
47
|
-
# Get parser and language for tree-sitter
|
|
48
|
-
try:
|
|
49
|
-
tree_sitter_language = get_language(cast("SupportedLanguage", language))
|
|
50
|
-
parser = get_parser(cast("SupportedLanguage", language))
|
|
51
|
-
except Exception as e:
|
|
52
|
-
raise ValueError(f"Unsupported language: {file_path}") from e
|
|
53
|
-
|
|
54
|
-
# Create query object
|
|
55
|
-
query_obj = Query(tree_sitter_language, query)
|
|
56
|
-
|
|
57
|
-
# Read file content
|
|
58
|
-
try:
|
|
59
|
-
file_bytes = file_path.read_bytes()
|
|
60
|
-
except Exception as e:
|
|
61
|
-
raise ValueError(f"Failed to read file: {file_path}") from e
|
|
62
|
-
|
|
63
|
-
# Parse and extract snippets
|
|
64
|
-
tree = parser.parse(file_bytes)
|
|
65
|
-
captures_by_name = query_obj.captures(tree.root_node)
|
|
66
|
-
lines = file_bytes.decode().splitlines()
|
|
67
|
-
|
|
68
|
-
# Extract snippets using the existing logic
|
|
69
|
-
snippets = self._extract_snippets_from_captures(captures_by_name, lines)
|
|
70
|
-
|
|
71
|
-
# If there are no results, return the entire file
|
|
72
|
-
if not snippets:
|
|
73
|
-
return [file_bytes.decode()]
|
|
74
|
-
|
|
75
|
-
return snippets
|
|
76
|
-
|
|
77
|
-
def _extract_snippets_from_captures(
|
|
78
|
-
self, captures_by_name: dict[str, list[Node]], lines: list[str]
|
|
79
|
-
) -> list[str]:
|
|
80
|
-
"""Extract snippets from tree-sitter captures.
|
|
81
|
-
|
|
82
|
-
Args:
|
|
83
|
-
captures_by_name: Captures organized by name
|
|
84
|
-
lines: Lines of the source file
|
|
85
|
-
|
|
86
|
-
Returns:
|
|
87
|
-
List of extracted code snippets
|
|
88
|
-
|
|
89
|
-
"""
|
|
90
|
-
# Find all leaf functions
|
|
91
|
-
leaf_functions = self._get_leaf_functions(captures_by_name)
|
|
92
|
-
|
|
93
|
-
# Find all imports
|
|
94
|
-
imports = self._get_imports(captures_by_name)
|
|
95
|
-
|
|
96
|
-
results = []
|
|
97
|
-
|
|
98
|
-
# For each leaf function, find all lines this function is dependent on
|
|
99
|
-
for func_node in leaf_functions:
|
|
100
|
-
all_lines_to_keep = set()
|
|
101
|
-
|
|
102
|
-
ancestors = self._get_ancestors(captures_by_name, func_node)
|
|
103
|
-
|
|
104
|
-
# Add self to keep
|
|
105
|
-
all_lines_to_keep.update(
|
|
106
|
-
range(func_node.start_point[0], func_node.end_point[0] + 1)
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# Add imports to keep
|
|
110
|
-
for import_node in imports:
|
|
111
|
-
all_lines_to_keep.update(
|
|
112
|
-
range(import_node.start_point[0], import_node.end_point[0] + 1)
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
# Add ancestors to keep
|
|
116
|
-
for node in ancestors:
|
|
117
|
-
# Get the first line of the node for now
|
|
118
|
-
start = node.start_point[0]
|
|
119
|
-
end = node.start_point[0]
|
|
120
|
-
all_lines_to_keep.update(range(start, end + 1))
|
|
121
|
-
|
|
122
|
-
pseudo_code = []
|
|
123
|
-
for i, line in enumerate(lines):
|
|
124
|
-
if i in all_lines_to_keep:
|
|
125
|
-
pseudo_code.append(line)
|
|
126
|
-
|
|
127
|
-
results.append("\n".join(pseudo_code))
|
|
128
|
-
|
|
129
|
-
return results
|
|
130
|
-
|
|
131
|
-
def _get_leaf_functions(
|
|
132
|
-
self, captures_by_name: dict[str, list[Node]]
|
|
133
|
-
) -> list[Node]:
|
|
134
|
-
"""Return all leaf functions in the AST."""
|
|
135
|
-
return [
|
|
136
|
-
node
|
|
137
|
-
for node in captures_by_name.get("function.body", [])
|
|
138
|
-
if self._is_leaf_function(captures_by_name, node)
|
|
139
|
-
]
|
|
140
|
-
|
|
141
|
-
def _is_leaf_function(
|
|
142
|
-
self, captures_by_name: dict[str, list[Node]], node: Node
|
|
143
|
-
) -> bool:
|
|
144
|
-
"""Return True if the node is a leaf function."""
|
|
145
|
-
for other in captures_by_name.get("function.body", []):
|
|
146
|
-
if other == node: # Skip self
|
|
147
|
-
continue
|
|
148
|
-
# if other is inside node, it's not a leaf function
|
|
149
|
-
if other.start_byte >= node.start_byte and other.end_byte <= node.end_byte:
|
|
150
|
-
return False
|
|
151
|
-
return True
|
|
152
|
-
|
|
153
|
-
def _get_imports(self, captures_by_name: dict[str, list[Node]]) -> list[Node]:
|
|
154
|
-
"""Return all imports in the AST."""
|
|
155
|
-
return captures_by_name.get("import.name", []) + captures_by_name.get(
|
|
156
|
-
"import.from", []
|
|
157
|
-
)
|
|
158
|
-
|
|
159
|
-
def _classes_and_functions(
|
|
160
|
-
self, captures_by_name: dict[str, list[Node]]
|
|
161
|
-
) -> list[int]:
|
|
162
|
-
"""Return all classes and functions in the AST."""
|
|
163
|
-
return [
|
|
164
|
-
node.id
|
|
165
|
-
for node in {
|
|
166
|
-
*captures_by_name.get("function.def", []),
|
|
167
|
-
*captures_by_name.get("class.def", []),
|
|
168
|
-
}
|
|
169
|
-
]
|
|
170
|
-
|
|
171
|
-
def _get_ancestors(
|
|
172
|
-
self, captures_by_name: dict[str, list[Node]], node: Node
|
|
173
|
-
) -> list[Node]:
|
|
174
|
-
"""Return all ancestors of the node."""
|
|
175
|
-
valid_ancestors = self._classes_and_functions(captures_by_name)
|
|
176
|
-
ancestors = []
|
|
177
|
-
parent = node.parent
|
|
178
|
-
while parent:
|
|
179
|
-
if parent.id in valid_ancestors:
|
|
180
|
-
ancestors.append(parent)
|
|
181
|
-
parent = parent.parent
|
|
182
|
-
return ancestors
|
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
"""SQLAlchemy implementation of file repository."""
|
|
2
|
-
|
|
3
|
-
from collections.abc import Sequence
|
|
4
|
-
|
|
5
|
-
from sqlalchemy import select
|
|
6
|
-
from sqlalchemy.ext.asyncio import AsyncSession
|
|
7
|
-
|
|
8
|
-
from kodit.domain.repositories import FileRepository
|
|
9
|
-
from kodit.infrastructure.sqlalchemy.entities import File, Index
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class SqlAlchemyFileRepository(FileRepository):
|
|
13
|
-
"""SQLAlchemy implementation of file repository."""
|
|
14
|
-
|
|
15
|
-
def __init__(self, session: AsyncSession) -> None:
|
|
16
|
-
"""Initialize the SQLAlchemy file repository.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
session: The SQLAlchemy async session to use for database operations
|
|
20
|
-
|
|
21
|
-
"""
|
|
22
|
-
self.session = session
|
|
23
|
-
|
|
24
|
-
async def get(self, id: int) -> File | None: # noqa: A002
|
|
25
|
-
"""Get a file by ID."""
|
|
26
|
-
return await self.session.get(File, id)
|
|
27
|
-
|
|
28
|
-
async def save(self, entity: File) -> File:
|
|
29
|
-
"""Save entity."""
|
|
30
|
-
self.session.add(entity)
|
|
31
|
-
return entity
|
|
32
|
-
|
|
33
|
-
async def delete(self, id: int) -> None: # noqa: A002
|
|
34
|
-
"""Delete entity by ID."""
|
|
35
|
-
file = await self.get(id)
|
|
36
|
-
if file:
|
|
37
|
-
await self.session.delete(file)
|
|
38
|
-
|
|
39
|
-
async def list(self) -> Sequence[File]:
|
|
40
|
-
"""List all entities."""
|
|
41
|
-
return (await self.session.scalars(select(File))).all()
|
|
42
|
-
|
|
43
|
-
async def get_files_for_index(self, index_id: int) -> Sequence[File]:
|
|
44
|
-
"""Get all files for an index.
|
|
45
|
-
|
|
46
|
-
Args:
|
|
47
|
-
index_id: The ID of the index to get files for
|
|
48
|
-
|
|
49
|
-
Returns:
|
|
50
|
-
A list of File instances
|
|
51
|
-
|
|
52
|
-
"""
|
|
53
|
-
# Get the index first to find its source_id
|
|
54
|
-
index_query = select(Index).where(Index.id == index_id)
|
|
55
|
-
index_result = await self.session.execute(index_query)
|
|
56
|
-
index = index_result.scalar_one_or_none()
|
|
57
|
-
|
|
58
|
-
if not index:
|
|
59
|
-
return []
|
|
60
|
-
|
|
61
|
-
# Get all files for the source
|
|
62
|
-
query = select(File).where(File.source_id == index.source_id)
|
|
63
|
-
result = await self.session.execute(query)
|
|
64
|
-
return list(result.scalars())
|
|
65
|
-
|
|
66
|
-
async def get_by_id(self, file_id: int) -> File | None:
|
|
67
|
-
"""Get a file by ID.
|
|
68
|
-
|
|
69
|
-
Args:
|
|
70
|
-
file_id: The ID of the file to retrieve
|
|
71
|
-
|
|
72
|
-
Returns:
|
|
73
|
-
The File instance if found, None otherwise
|
|
74
|
-
|
|
75
|
-
"""
|
|
76
|
-
query = select(File).where(File.id == file_id)
|
|
77
|
-
result = await self.session.execute(query)
|
|
78
|
-
return result.scalar_one_or_none()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|