kodit 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (36) hide show
  1. kodit/_version.py +2 -2
  2. kodit/app.py +23 -4
  3. kodit/application/factories/code_indexing_factory.py +2 -24
  4. kodit/application/services/code_indexing_application_service.py +10 -2
  5. kodit/application/services/sync_scheduler.py +128 -0
  6. kodit/cli.py +103 -28
  7. kodit/config.py +15 -0
  8. kodit/domain/services/index_service.py +25 -66
  9. kodit/domain/value_objects.py +10 -22
  10. kodit/infrastructure/slicing/__init__.py +1 -0
  11. kodit/infrastructure/slicing/language_detection_service.py +18 -0
  12. kodit/infrastructure/slicing/slicer.py +894 -0
  13. kodit/infrastructure/sqlalchemy/index_repository.py +29 -0
  14. kodit/migrations/versions/4073b33f9436_add_file_processing_flag.py +6 -4
  15. kodit/migrations/versions/4552eb3f23ce_add_summary.py +4 -4
  16. kodit/migrations/versions/7c3bbc2ab32b_add_embeddings_table.py +24 -16
  17. kodit/migrations/versions/85155663351e_initial.py +64 -48
  18. kodit/migrations/versions/c3f5137d30f5_index_all_the_things.py +20 -14
  19. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/METADATA +10 -4
  20. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/RECORD +23 -32
  21. kodit/infrastructure/snippet_extraction/__init__.py +0 -1
  22. kodit/infrastructure/snippet_extraction/factories.py +0 -13
  23. kodit/infrastructure/snippet_extraction/language_detection_service.py +0 -39
  24. kodit/infrastructure/snippet_extraction/languages/csharp.scm +0 -12
  25. kodit/infrastructure/snippet_extraction/languages/go.scm +0 -26
  26. kodit/infrastructure/snippet_extraction/languages/java.scm +0 -12
  27. kodit/infrastructure/snippet_extraction/languages/javascript.scm +0 -24
  28. kodit/infrastructure/snippet_extraction/languages/python.scm +0 -22
  29. kodit/infrastructure/snippet_extraction/languages/typescript.scm +0 -25
  30. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +0 -67
  31. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +0 -44
  32. kodit/infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py +0 -182
  33. kodit/infrastructure/sqlalchemy/file_repository.py +0 -78
  34. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/WHEEL +0 -0
  35. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/entry_points.txt +0 -0
  36. {kodit-0.3.3.dist-info → kodit-0.3.5.dist-info}/licenses/LICENSE +0 -0
@@ -1,22 +0,0 @@
1
- (function_definition
2
- name: (identifier) @function.name
3
- body: (block) @function.body
4
- ) @function.def
5
-
6
- (class_definition
7
- name: (identifier) @class.name
8
- ) @class.def
9
-
10
- (import_statement
11
- name: (dotted_name (identifier) @import.name))
12
-
13
- (import_from_statement
14
- module_name: (dotted_name (identifier) @import.from))
15
-
16
- (identifier) @ident
17
-
18
- (assignment
19
- left: (identifier) @assignment.lhs)
20
-
21
- (parameters
22
- (identifier) @param.name)
@@ -1,25 +0,0 @@
1
- (import_statement
2
- (import_clause
3
- (named_imports
4
- (import_specifier
5
- name: (identifier) @import.name
6
- )
7
- )
8
- )
9
- )
10
-
11
- (variable_declarator
12
- name: (identifier) @function.name
13
- value: (arrow_function
14
- body: (statement_block) @function.body
15
- )
16
- )
17
-
18
- (class_declaration
19
- name: (type_identifier) @class.name
20
- ) @class.def
21
-
22
- (method_definition
23
- name: (property_identifier) @function.name
24
- body: (statement_block) @function.body
25
- )
@@ -1,67 +0,0 @@
1
- """Factory for creating snippet extraction services."""
2
-
3
- from pathlib import Path
4
-
5
- from sqlalchemy.ext.asyncio import AsyncSession
6
-
7
- from kodit.domain.enums import SnippetExtractionStrategy
8
- from kodit.domain.repositories import FileRepository, SnippetRepository
9
- from kodit.domain.services.snippet_extraction_service import (
10
- SnippetExtractionDomainService,
11
- )
12
- from kodit.domain.value_objects import LanguageMapping
13
- from kodit.infrastructure.snippet_extraction.language_detection_service import (
14
- FileSystemLanguageDetectionService,
15
- )
16
- from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
17
- FileSystemSnippetQueryProvider,
18
- )
19
- from kodit.infrastructure.snippet_extraction.tree_sitter_snippet_extractor import (
20
- TreeSitterSnippetExtractor,
21
- )
22
- from kodit.infrastructure.sqlalchemy.file_repository import SqlAlchemyFileRepository
23
- from kodit.infrastructure.sqlalchemy.snippet_repository import (
24
- SqlAlchemySnippetRepository,
25
- )
26
-
27
-
28
- def create_snippet_extraction_domain_service() -> SnippetExtractionDomainService:
29
- """Create a snippet extraction domain service with all dependencies.
30
-
31
- Returns:
32
- Configured snippet extraction domain service
33
-
34
- """
35
- # Use the unified language mapping from the domain layer
36
- language_map = LanguageMapping.get_extension_to_language_map()
37
-
38
- # Create infrastructure services
39
- language_detector = FileSystemLanguageDetectionService(language_map)
40
- query_provider = FileSystemSnippetQueryProvider(Path(__file__).parent / "languages")
41
-
42
- # Create snippet extractors
43
- method_extractor = TreeSitterSnippetExtractor(query_provider)
44
-
45
- snippet_extractors = {
46
- SnippetExtractionStrategy.METHOD_BASED: method_extractor,
47
- }
48
-
49
- # Create domain service
50
- return SnippetExtractionDomainService(language_detector, snippet_extractors)
51
-
52
-
53
- def create_snippet_repositories(
54
- session: AsyncSession,
55
- ) -> tuple[SnippetRepository, FileRepository]:
56
- """Create snippet and file repositories.
57
-
58
- Args:
59
- session: SQLAlchemy session
60
-
61
- Returns:
62
- Tuple of (snippet_repository, file_repository)
63
-
64
- """
65
- snippet_repository = SqlAlchemySnippetRepository(session)
66
- file_repository = SqlAlchemyFileRepository(session)
67
- return snippet_repository, file_repository
@@ -1,44 +0,0 @@
1
- """Infrastructure implementation for loading snippet queries from files."""
2
-
3
- from abc import ABC, abstractmethod
4
- from pathlib import Path
5
-
6
-
7
- class SnippetQueryProvider(ABC):
8
- """Abstract interface for providing snippet queries."""
9
-
10
- @abstractmethod
11
- async def get_query(self, language: str) -> str:
12
- """Get the query for a specific language."""
13
-
14
-
15
- class FileSystemSnippetQueryProvider(SnippetQueryProvider):
16
- """Infrastructure implementation for loading snippet queries from files."""
17
-
18
- def __init__(self, query_directory: Path) -> None:
19
- """Initialize the query provider.
20
-
21
- Args:
22
- query_directory: Directory containing query files
23
-
24
- """
25
- self.query_directory = query_directory
26
-
27
- async def get_query(self, language: str) -> str:
28
- """Load query from file system.
29
-
30
- Args:
31
- language: The programming language to get the query for
32
-
33
- Returns:
34
- The query string for the language
35
-
36
- Raises:
37
- FileNotFoundError: If the query file doesn't exist
38
-
39
- """
40
- query_path = self.query_directory / f"{language}.scm"
41
- if not query_path.exists():
42
- raise FileNotFoundError(f"Query file not found: {query_path}")
43
-
44
- return query_path.read_text()
@@ -1,182 +0,0 @@
1
- """Infrastructure implementation using tree-sitter for method extraction."""
2
-
3
- from pathlib import Path
4
- from typing import cast
5
-
6
- from tree_sitter import Node, Query
7
- from tree_sitter_language_pack import SupportedLanguage, get_language, get_parser
8
-
9
- from kodit.domain.services.index_service import SnippetExtractor
10
- from kodit.infrastructure.snippet_extraction.snippet_query_provider import (
11
- SnippetQueryProvider,
12
- )
13
-
14
-
15
- class TreeSitterSnippetExtractor(SnippetExtractor):
16
- """Infrastructure implementation using tree-sitter for method extraction."""
17
-
18
- def __init__(self, query_provider: SnippetQueryProvider) -> None:
19
- """Initialize the tree-sitter snippet extractor.
20
-
21
- Args:
22
- query_provider: Provider for snippet queries
23
-
24
- """
25
- self.query_provider = query_provider
26
-
27
- async def extract(self, file_path: Path, language: str) -> list[str]:
28
- """Extract snippets using tree-sitter parsing.
29
-
30
- Args:
31
- file_path: Path to the file to extract snippets from
32
- language: The programming language of the file
33
-
34
- Returns:
35
- List of extracted code snippets
36
-
37
- Raises:
38
- ValueError: If the file cannot be read or language is not supported
39
-
40
- """
41
- try:
42
- # Get the query for the language
43
- query = await self.query_provider.get_query(language)
44
- except FileNotFoundError as e:
45
- raise ValueError(f"Unsupported language: {file_path}") from e
46
-
47
- # Get parser and language for tree-sitter
48
- try:
49
- tree_sitter_language = get_language(cast("SupportedLanguage", language))
50
- parser = get_parser(cast("SupportedLanguage", language))
51
- except Exception as e:
52
- raise ValueError(f"Unsupported language: {file_path}") from e
53
-
54
- # Create query object
55
- query_obj = Query(tree_sitter_language, query)
56
-
57
- # Read file content
58
- try:
59
- file_bytes = file_path.read_bytes()
60
- except Exception as e:
61
- raise ValueError(f"Failed to read file: {file_path}") from e
62
-
63
- # Parse and extract snippets
64
- tree = parser.parse(file_bytes)
65
- captures_by_name = query_obj.captures(tree.root_node)
66
- lines = file_bytes.decode().splitlines()
67
-
68
- # Extract snippets using the existing logic
69
- snippets = self._extract_snippets_from_captures(captures_by_name, lines)
70
-
71
- # If there are no results, return the entire file
72
- if not snippets:
73
- return [file_bytes.decode()]
74
-
75
- return snippets
76
-
77
- def _extract_snippets_from_captures(
78
- self, captures_by_name: dict[str, list[Node]], lines: list[str]
79
- ) -> list[str]:
80
- """Extract snippets from tree-sitter captures.
81
-
82
- Args:
83
- captures_by_name: Captures organized by name
84
- lines: Lines of the source file
85
-
86
- Returns:
87
- List of extracted code snippets
88
-
89
- """
90
- # Find all leaf functions
91
- leaf_functions = self._get_leaf_functions(captures_by_name)
92
-
93
- # Find all imports
94
- imports = self._get_imports(captures_by_name)
95
-
96
- results = []
97
-
98
- # For each leaf function, find all lines this function is dependent on
99
- for func_node in leaf_functions:
100
- all_lines_to_keep = set()
101
-
102
- ancestors = self._get_ancestors(captures_by_name, func_node)
103
-
104
- # Add self to keep
105
- all_lines_to_keep.update(
106
- range(func_node.start_point[0], func_node.end_point[0] + 1)
107
- )
108
-
109
- # Add imports to keep
110
- for import_node in imports:
111
- all_lines_to_keep.update(
112
- range(import_node.start_point[0], import_node.end_point[0] + 1)
113
- )
114
-
115
- # Add ancestors to keep
116
- for node in ancestors:
117
- # Get the first line of the node for now
118
- start = node.start_point[0]
119
- end = node.start_point[0]
120
- all_lines_to_keep.update(range(start, end + 1))
121
-
122
- pseudo_code = []
123
- for i, line in enumerate(lines):
124
- if i in all_lines_to_keep:
125
- pseudo_code.append(line)
126
-
127
- results.append("\n".join(pseudo_code))
128
-
129
- return results
130
-
131
- def _get_leaf_functions(
132
- self, captures_by_name: dict[str, list[Node]]
133
- ) -> list[Node]:
134
- """Return all leaf functions in the AST."""
135
- return [
136
- node
137
- for node in captures_by_name.get("function.body", [])
138
- if self._is_leaf_function(captures_by_name, node)
139
- ]
140
-
141
- def _is_leaf_function(
142
- self, captures_by_name: dict[str, list[Node]], node: Node
143
- ) -> bool:
144
- """Return True if the node is a leaf function."""
145
- for other in captures_by_name.get("function.body", []):
146
- if other == node: # Skip self
147
- continue
148
- # if other is inside node, it's not a leaf function
149
- if other.start_byte >= node.start_byte and other.end_byte <= node.end_byte:
150
- return False
151
- return True
152
-
153
- def _get_imports(self, captures_by_name: dict[str, list[Node]]) -> list[Node]:
154
- """Return all imports in the AST."""
155
- return captures_by_name.get("import.name", []) + captures_by_name.get(
156
- "import.from", []
157
- )
158
-
159
- def _classes_and_functions(
160
- self, captures_by_name: dict[str, list[Node]]
161
- ) -> list[int]:
162
- """Return all classes and functions in the AST."""
163
- return [
164
- node.id
165
- for node in {
166
- *captures_by_name.get("function.def", []),
167
- *captures_by_name.get("class.def", []),
168
- }
169
- ]
170
-
171
- def _get_ancestors(
172
- self, captures_by_name: dict[str, list[Node]], node: Node
173
- ) -> list[Node]:
174
- """Return all ancestors of the node."""
175
- valid_ancestors = self._classes_and_functions(captures_by_name)
176
- ancestors = []
177
- parent = node.parent
178
- while parent:
179
- if parent.id in valid_ancestors:
180
- ancestors.append(parent)
181
- parent = parent.parent
182
- return ancestors
@@ -1,78 +0,0 @@
1
- """SQLAlchemy implementation of file repository."""
2
-
3
- from collections.abc import Sequence
4
-
5
- from sqlalchemy import select
6
- from sqlalchemy.ext.asyncio import AsyncSession
7
-
8
- from kodit.domain.repositories import FileRepository
9
- from kodit.infrastructure.sqlalchemy.entities import File, Index
10
-
11
-
12
- class SqlAlchemyFileRepository(FileRepository):
13
- """SQLAlchemy implementation of file repository."""
14
-
15
- def __init__(self, session: AsyncSession) -> None:
16
- """Initialize the SQLAlchemy file repository.
17
-
18
- Args:
19
- session: The SQLAlchemy async session to use for database operations
20
-
21
- """
22
- self.session = session
23
-
24
- async def get(self, id: int) -> File | None: # noqa: A002
25
- """Get a file by ID."""
26
- return await self.session.get(File, id)
27
-
28
- async def save(self, entity: File) -> File:
29
- """Save entity."""
30
- self.session.add(entity)
31
- return entity
32
-
33
- async def delete(self, id: int) -> None: # noqa: A002
34
- """Delete entity by ID."""
35
- file = await self.get(id)
36
- if file:
37
- await self.session.delete(file)
38
-
39
- async def list(self) -> Sequence[File]:
40
- """List all entities."""
41
- return (await self.session.scalars(select(File))).all()
42
-
43
- async def get_files_for_index(self, index_id: int) -> Sequence[File]:
44
- """Get all files for an index.
45
-
46
- Args:
47
- index_id: The ID of the index to get files for
48
-
49
- Returns:
50
- A list of File instances
51
-
52
- """
53
- # Get the index first to find its source_id
54
- index_query = select(Index).where(Index.id == index_id)
55
- index_result = await self.session.execute(index_query)
56
- index = index_result.scalar_one_or_none()
57
-
58
- if not index:
59
- return []
60
-
61
- # Get all files for the source
62
- query = select(File).where(File.source_id == index.source_id)
63
- result = await self.session.execute(query)
64
- return list(result.scalars())
65
-
66
- async def get_by_id(self, file_id: int) -> File | None:
67
- """Get a file by ID.
68
-
69
- Args:
70
- file_id: The ID of the file to retrieve
71
-
72
- Returns:
73
- The File instance if found, None otherwise
74
-
75
- """
76
- query = select(File).where(File.id == file_id)
77
- result = await self.session.execute(query)
78
- return result.scalar_one_or_none()
File without changes