kodit 0.2.4__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kodit might be problematic. Click here for more details.

Files changed (117) hide show
  1. kodit/_version.py +2 -2
  2. kodit/application/__init__.py +1 -0
  3. kodit/application/commands/__init__.py +1 -0
  4. kodit/application/commands/snippet_commands.py +22 -0
  5. kodit/application/services/__init__.py +1 -0
  6. kodit/application/services/indexing_application_service.py +363 -0
  7. kodit/application/services/snippet_application_service.py +143 -0
  8. kodit/cli.py +105 -82
  9. kodit/database.py +0 -22
  10. kodit/domain/__init__.py +1 -0
  11. kodit/{source/source_models.py → domain/entities.py} +88 -19
  12. kodit/domain/enums.py +9 -0
  13. kodit/domain/interfaces.py +27 -0
  14. kodit/domain/repositories.py +95 -0
  15. kodit/domain/services/__init__.py +1 -0
  16. kodit/domain/services/bm25_service.py +124 -0
  17. kodit/domain/services/embedding_service.py +155 -0
  18. kodit/domain/services/enrichment_service.py +48 -0
  19. kodit/domain/services/ignore_service.py +45 -0
  20. kodit/domain/services/indexing_service.py +203 -0
  21. kodit/domain/services/snippet_extraction_service.py +89 -0
  22. kodit/domain/services/source_service.py +83 -0
  23. kodit/domain/value_objects.py +215 -0
  24. kodit/infrastructure/__init__.py +1 -0
  25. kodit/infrastructure/bm25/__init__.py +1 -0
  26. kodit/infrastructure/bm25/bm25_factory.py +28 -0
  27. kodit/{bm25/local_bm25.py → infrastructure/bm25/local_bm25_repository.py} +33 -22
  28. kodit/{bm25/vectorchord_bm25.py → infrastructure/bm25/vectorchord_bm25_repository.py} +40 -35
  29. kodit/infrastructure/cloning/__init__.py +1 -0
  30. kodit/infrastructure/cloning/folder/__init__.py +1 -0
  31. kodit/infrastructure/cloning/folder/factory.py +119 -0
  32. kodit/infrastructure/cloning/folder/working_copy.py +38 -0
  33. kodit/infrastructure/cloning/git/__init__.py +1 -0
  34. kodit/infrastructure/cloning/git/factory.py +133 -0
  35. kodit/infrastructure/cloning/git/working_copy.py +32 -0
  36. kodit/infrastructure/cloning/metadata.py +127 -0
  37. kodit/infrastructure/embedding/__init__.py +1 -0
  38. kodit/infrastructure/embedding/embedding_factory.py +87 -0
  39. kodit/infrastructure/embedding/embedding_providers/__init__.py +1 -0
  40. kodit/infrastructure/embedding/embedding_providers/batching.py +93 -0
  41. kodit/infrastructure/embedding/embedding_providers/hash_embedding_provider.py +79 -0
  42. kodit/infrastructure/embedding/embedding_providers/local_embedding_provider.py +129 -0
  43. kodit/infrastructure/embedding/embedding_providers/openai_embedding_provider.py +113 -0
  44. kodit/infrastructure/embedding/local_vector_search_repository.py +114 -0
  45. kodit/{embedding/vectorchord_vector_search_service.py → infrastructure/embedding/vectorchord_vector_search_repository.py} +65 -46
  46. kodit/infrastructure/enrichment/__init__.py +1 -0
  47. kodit/{enrichment → infrastructure/enrichment}/enrichment_factory.py +28 -12
  48. kodit/infrastructure/enrichment/legacy_enrichment_models.py +42 -0
  49. kodit/{enrichment/enrichment_provider → infrastructure/enrichment}/local_enrichment_provider.py +38 -26
  50. kodit/infrastructure/enrichment/null_enrichment_provider.py +25 -0
  51. kodit/infrastructure/enrichment/openai_enrichment_provider.py +89 -0
  52. kodit/infrastructure/git/__init__.py +1 -0
  53. kodit/{source/git.py → infrastructure/git/git_utils.py} +10 -2
  54. kodit/infrastructure/ignore/__init__.py +1 -0
  55. kodit/{source/ignore.py → infrastructure/ignore/ignore_pattern_provider.py} +23 -6
  56. kodit/infrastructure/indexing/__init__.py +1 -0
  57. kodit/infrastructure/indexing/fusion_service.py +55 -0
  58. kodit/infrastructure/indexing/index_repository.py +296 -0
  59. kodit/infrastructure/indexing/indexing_factory.py +111 -0
  60. kodit/infrastructure/snippet_extraction/__init__.py +1 -0
  61. kodit/infrastructure/snippet_extraction/language_detection_service.py +39 -0
  62. kodit/infrastructure/snippet_extraction/snippet_extraction_factory.py +95 -0
  63. kodit/infrastructure/snippet_extraction/snippet_query_provider.py +45 -0
  64. kodit/{snippets/method_snippets.py → infrastructure/snippet_extraction/tree_sitter_snippet_extractor.py} +123 -61
  65. kodit/infrastructure/sqlalchemy/__init__.py +1 -0
  66. kodit/{embedding → infrastructure/sqlalchemy}/embedding_repository.py +40 -24
  67. kodit/infrastructure/sqlalchemy/file_repository.py +73 -0
  68. kodit/infrastructure/sqlalchemy/repository.py +121 -0
  69. kodit/infrastructure/sqlalchemy/snippet_repository.py +75 -0
  70. kodit/infrastructure/ui/__init__.py +1 -0
  71. kodit/infrastructure/ui/progress.py +127 -0
  72. kodit/{util → infrastructure/ui}/spinner.py +19 -4
  73. kodit/mcp.py +50 -28
  74. kodit/migrations/env.py +1 -4
  75. kodit/reporting.py +78 -0
  76. {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/METADATA +1 -1
  77. kodit-0.2.5.dist-info/RECORD +99 -0
  78. kodit/bm25/__init__.py +0 -1
  79. kodit/bm25/keyword_search_factory.py +0 -17
  80. kodit/bm25/keyword_search_service.py +0 -34
  81. kodit/embedding/__init__.py +0 -1
  82. kodit/embedding/embedding_factory.py +0 -69
  83. kodit/embedding/embedding_models.py +0 -28
  84. kodit/embedding/embedding_provider/__init__.py +0 -1
  85. kodit/embedding/embedding_provider/embedding_provider.py +0 -92
  86. kodit/embedding/embedding_provider/hash_embedding_provider.py +0 -86
  87. kodit/embedding/embedding_provider/local_embedding_provider.py +0 -96
  88. kodit/embedding/embedding_provider/openai_embedding_provider.py +0 -73
  89. kodit/embedding/local_vector_search_service.py +0 -87
  90. kodit/embedding/vector_search_service.py +0 -55
  91. kodit/enrichment/__init__.py +0 -1
  92. kodit/enrichment/enrichment_provider/__init__.py +0 -1
  93. kodit/enrichment/enrichment_provider/enrichment_provider.py +0 -36
  94. kodit/enrichment/enrichment_provider/openai_enrichment_provider.py +0 -79
  95. kodit/enrichment/enrichment_service.py +0 -45
  96. kodit/indexing/__init__.py +0 -1
  97. kodit/indexing/fusion.py +0 -67
  98. kodit/indexing/indexing_models.py +0 -43
  99. kodit/indexing/indexing_repository.py +0 -216
  100. kodit/indexing/indexing_service.py +0 -344
  101. kodit/snippets/__init__.py +0 -1
  102. kodit/snippets/languages/__init__.py +0 -53
  103. kodit/snippets/snippets.py +0 -50
  104. kodit/source/__init__.py +0 -1
  105. kodit/source/source_factories.py +0 -356
  106. kodit/source/source_repository.py +0 -169
  107. kodit/source/source_service.py +0 -150
  108. kodit/util/__init__.py +0 -1
  109. kodit-0.2.4.dist-info/RECORD +0 -71
  110. /kodit/{snippets → infrastructure/snippet_extraction}/languages/csharp.scm +0 -0
  111. /kodit/{snippets → infrastructure/snippet_extraction}/languages/go.scm +0 -0
  112. /kodit/{snippets → infrastructure/snippet_extraction}/languages/javascript.scm +0 -0
  113. /kodit/{snippets → infrastructure/snippet_extraction}/languages/python.scm +0 -0
  114. /kodit/{snippets → infrastructure/snippet_extraction}/languages/typescript.scm +0 -0
  115. {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/WHEEL +0 -0
  116. {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/entry_points.txt +0 -0
  117. {kodit-0.2.4.dist-info → kodit-0.2.5.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,42 @@
1
+ """Legacy enrichment models for backward compatibility."""
2
+
3
+ from abc import ABC, abstractmethod
4
+ from collections.abc import AsyncGenerator
5
+ from dataclasses import dataclass
6
+
7
+
8
+ @dataclass
9
+ class EnrichmentRequest:
10
+ """Legacy enrichment request model."""
11
+
12
+ snippet_id: int
13
+ text: str
14
+
15
+
16
+ @dataclass
17
+ class EnrichmentResponse:
18
+ """Legacy enrichment response model."""
19
+
20
+ snippet_id: int
21
+ text: str
22
+
23
+
24
+ class EnrichmentService(ABC):
25
+ """Legacy enrichment service interface."""
26
+
27
+ @abstractmethod
28
+ def enrich(
29
+ self, data: list[EnrichmentRequest]
30
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
31
+ """Enrich a list of requests."""
32
+
33
+
34
+ class NullEnrichmentService(EnrichmentService):
35
+ """Null enrichment service for testing."""
36
+
37
+ async def enrich(
38
+ self, data: list[EnrichmentRequest]
39
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
40
+ """Return empty responses for all requests."""
41
+ for request in data:
42
+ yield EnrichmentResponse(snippet_id=request.snippet_id, text="")
@@ -1,4 +1,4 @@
1
- """Local embedding service."""
1
+ """Local enrichment provider implementation."""
2
2
 
3
3
  import os
4
4
  from collections.abc import AsyncGenerator
@@ -6,29 +6,33 @@ from collections.abc import AsyncGenerator
6
6
  import structlog
7
7
  import tiktoken
8
8
 
9
- from kodit.embedding.embedding_provider.embedding_provider import (
10
- EmbeddingRequest,
11
- )
12
- from kodit.enrichment.enrichment_provider.enrichment_provider import (
13
- ENRICHMENT_SYSTEM_PROMPT,
14
- EnrichmentProvider,
15
- EnrichmentRequest,
16
- EnrichmentResponse,
17
- )
9
+ from kodit.domain.services.enrichment_service import EnrichmentProvider
10
+ from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
11
+
12
+ ENRICHMENT_SYSTEM_PROMPT = """
13
+ You are a professional software developer. You will be given a snippet of code.
14
+ Please provide a concise explanation of the code.
15
+ """
18
16
 
19
17
  DEFAULT_ENRICHMENT_MODEL = "Qwen/Qwen3-0.6B"
20
18
  DEFAULT_CONTEXT_WINDOW_SIZE = 2048 # Small so it works even on low-powered devices
21
19
 
22
20
 
23
21
  class LocalEnrichmentProvider(EnrichmentProvider):
24
- """Local embedder."""
22
+ """Local enrichment provider implementation."""
25
23
 
26
24
  def __init__(
27
25
  self,
28
26
  model_name: str = DEFAULT_ENRICHMENT_MODEL,
29
27
  context_window: int = DEFAULT_CONTEXT_WINDOW_SIZE,
30
28
  ) -> None:
31
- """Initialize the local enrichment provider."""
29
+ """Initialize the local enrichment provider.
30
+
31
+ Args:
32
+ model_name: The model name to use for enrichment.
33
+ context_window: The context window size for the model.
34
+
35
+ """
32
36
  self.log = structlog.get_logger(__name__)
33
37
  self.model_name = model_name
34
38
  self.context_window = context_window
@@ -37,14 +41,22 @@ class LocalEnrichmentProvider(EnrichmentProvider):
37
41
  self.encoding = tiktoken.encoding_for_model("text-embedding-3-small")
38
42
 
39
43
  async def enrich(
40
- self, data: list[EnrichmentRequest]
44
+ self, requests: list[EnrichmentRequest]
41
45
  ) -> AsyncGenerator[EnrichmentResponse, None]:
42
- """Enrich a list of strings."""
46
+ """Enrich a list of requests using local model.
47
+
48
+ Args:
49
+ requests: List of enrichment requests.
50
+
51
+ Yields:
52
+ Enrichment responses as they are processed.
53
+
54
+ """
43
55
  # Remove empty snippets
44
- data = [snippet for snippet in data if snippet.text]
56
+ requests = [req for req in requests if req.text]
45
57
 
46
- if not data or len(data) == 0:
47
- self.log.warning("Data is empty, skipping enrichment")
58
+ if not requests:
59
+ self.log.warning("No valid requests for enrichment")
48
60
  return
49
61
 
50
62
  from transformers.models.auto.modeling_auto import (
@@ -66,25 +78,25 @@ class LocalEnrichmentProvider(EnrichmentProvider):
66
78
  )
67
79
 
68
80
  # Prepare prompts
69
- prompts: list[EmbeddingRequest] = [
70
- EmbeddingRequest(
71
- id=snippet.snippet_id,
72
- text=self.tokenizer.apply_chat_template(
81
+ prompts = [
82
+ {
83
+ "id": req.snippet_id,
84
+ "text": self.tokenizer.apply_chat_template(
73
85
  [
74
86
  {"role": "system", "content": ENRICHMENT_SYSTEM_PROMPT},
75
- {"role": "user", "content": snippet.text},
87
+ {"role": "user", "content": req.text},
76
88
  ],
77
89
  tokenize=False,
78
90
  add_generation_prompt=True,
79
91
  enable_thinking=False,
80
92
  ),
81
- )
82
- for snippet in data
93
+ }
94
+ for req in requests
83
95
  ]
84
96
 
85
97
  for prompt in prompts:
86
98
  model_inputs = self.tokenizer(
87
- prompt.text,
99
+ prompt["text"],
88
100
  return_tensors="pt",
89
101
  padding=True,
90
102
  truncation=True,
@@ -98,6 +110,6 @@ class LocalEnrichmentProvider(EnrichmentProvider):
98
110
  "\n"
99
111
  )
100
112
  yield EnrichmentResponse(
101
- snippet_id=prompt.id,
113
+ snippet_id=prompt["id"],
102
114
  text=content,
103
115
  )
@@ -0,0 +1,25 @@
1
+ """Null enrichment provider for testing."""
2
+
3
+ from collections.abc import AsyncGenerator
4
+
5
+ from kodit.domain.services.enrichment_service import EnrichmentProvider
6
+ from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
7
+
8
+
9
+ class NullEnrichmentProvider(EnrichmentProvider):
10
+ """Null enrichment provider that returns empty responses."""
11
+
12
+ async def enrich(
13
+ self, requests: list[EnrichmentRequest]
14
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
15
+ """Return empty responses for all requests.
16
+
17
+ Args:
18
+ requests: List of enrichment requests.
19
+
20
+ Yields:
21
+ Empty enrichment responses.
22
+
23
+ """
24
+ for request in requests:
25
+ yield EnrichmentResponse(snippet_id=request.snippet_id, text="")
@@ -0,0 +1,89 @@
1
+ """OpenAI enrichment provider implementation."""
2
+
3
+ import asyncio
4
+ from collections.abc import AsyncGenerator
5
+ from typing import Any
6
+
7
+ import structlog
8
+
9
+ from kodit.domain.services.enrichment_service import EnrichmentProvider
10
+ from kodit.domain.value_objects import EnrichmentRequest, EnrichmentResponse
11
+
12
+ ENRICHMENT_SYSTEM_PROMPT = """
13
+ You are a professional software developer. You will be given a snippet of code.
14
+ Please provide a concise explanation of the code.
15
+ """
16
+
17
+ # Default tuned to approximately fit within OpenAI's rate limit of 500 / RPM
18
+ OPENAI_NUM_PARALLEL_TASKS = 40
19
+
20
+
21
+ class OpenAIEnrichmentProvider(EnrichmentProvider):
22
+ """OpenAI enrichment provider implementation."""
23
+
24
+ def __init__(self, openai_client: Any, model_name: str = "gpt-4o-mini") -> None:
25
+ """Initialize the OpenAI enrichment provider.
26
+
27
+ Args:
28
+ openai_client: The OpenAI client instance.
29
+ model_name: The model name to use for enrichment.
30
+
31
+ """
32
+ self.log = structlog.get_logger(__name__)
33
+ self.openai_client = openai_client
34
+ self.model_name = model_name
35
+
36
+ async def enrich(
37
+ self, requests: list[EnrichmentRequest]
38
+ ) -> AsyncGenerator[EnrichmentResponse, None]:
39
+ """Enrich a list of requests using OpenAI API.
40
+
41
+ Args:
42
+ requests: List of enrichment requests.
43
+
44
+ Yields:
45
+ Enrichment responses as they are processed.
46
+
47
+ """
48
+ if not requests:
49
+ self.log.warning("No requests for enrichment")
50
+ return
51
+
52
+ # Process batches in parallel with a semaphore to limit concurrent requests
53
+ sem = asyncio.Semaphore(OPENAI_NUM_PARALLEL_TASKS)
54
+
55
+ async def process_request(request: EnrichmentRequest) -> EnrichmentResponse:
56
+ async with sem:
57
+ if not request.text:
58
+ return EnrichmentResponse(
59
+ snippet_id=request.snippet_id,
60
+ text="",
61
+ )
62
+ try:
63
+ response = await self.openai_client.chat.completions.create(
64
+ model=self.model_name,
65
+ messages=[
66
+ {
67
+ "role": "system",
68
+ "content": ENRICHMENT_SYSTEM_PROMPT,
69
+ },
70
+ {"role": "user", "content": request.text},
71
+ ],
72
+ )
73
+ return EnrichmentResponse(
74
+ snippet_id=request.snippet_id,
75
+ text=response.choices[0].message.content or "",
76
+ )
77
+ except Exception as e:
78
+ self.log.exception("Error enriching request", error=str(e))
79
+ return EnrichmentResponse(
80
+ snippet_id=request.snippet_id,
81
+ text="",
82
+ )
83
+
84
+ # Create tasks for all requests
85
+ tasks = [process_request(request) for request in requests]
86
+
87
+ # Process all requests and yield results as they complete
88
+ for task in asyncio.as_completed(tasks):
89
+ yield await task
@@ -0,0 +1 @@
1
+ """Git infrastructure module."""
@@ -1,4 +1,4 @@
1
- """Git utilities."""
1
+ """Git utilities for infrastructure operations."""
2
2
 
3
3
  import tempfile
4
4
 
@@ -6,7 +6,15 @@ import git
6
6
 
7
7
 
8
8
  def is_valid_clone_target(target: str) -> bool:
9
- """Return True if the target is clonable."""
9
+ """Return True if the target is clonable.
10
+
11
+ Args:
12
+ target: The git repository URL or path to validate.
13
+
14
+ Returns:
15
+ True if the target can be cloned, False otherwise.
16
+
17
+ """
10
18
  with tempfile.TemporaryDirectory() as temp_dir:
11
19
  try:
12
20
  git.Repo.clone_from(target, temp_dir)
@@ -0,0 +1 @@
1
+ """Ignore infrastructure module."""
@@ -1,18 +1,27 @@
1
- """Ignore patterns."""
1
+ """Infrastructure implementation of ignore pattern provider."""
2
2
 
3
3
  from pathlib import Path
4
4
 
5
5
  import git
6
6
  import pathspec
7
7
 
8
- from kodit.source.git import is_valid_clone_target
8
+ from kodit.domain.services.ignore_service import IgnorePatternProvider
9
+ from kodit.infrastructure.git.git_utils import is_valid_clone_target
9
10
 
10
11
 
11
- class IgnorePatterns:
12
- """Ignore patterns."""
12
+ class GitIgnorePatternProvider(IgnorePatternProvider):
13
+ """Ignore pattern provider for git repositories."""
13
14
 
14
15
  def __init__(self, base_dir: Path) -> None:
15
- """Initialize the ignore patterns."""
16
+ """Initialize the ignore pattern provider.
17
+
18
+ Args:
19
+ base_dir: The base directory to check for ignore patterns.
20
+
21
+ Raises:
22
+ ValueError: If the base directory is not a directory.
23
+
24
+ """
16
25
  if not base_dir.is_dir():
17
26
  msg = f"Base directory is not a directory: {base_dir}"
18
27
  raise ValueError(msg)
@@ -25,7 +34,15 @@ class IgnorePatterns:
25
34
  self.git_repo = git.Repo(base_dir)
26
35
 
27
36
  def should_ignore(self, path: Path) -> bool:
28
- """Check if a path should be ignored."""
37
+ """Check if a path should be ignored.
38
+
39
+ Args:
40
+ path: The path to check.
41
+
42
+ Returns:
43
+ True if the path should be ignored, False otherwise.
44
+
45
+ """
29
46
  if path.is_dir():
30
47
  return False
31
48
 
@@ -0,0 +1 @@
1
+ """Infrastructure indexing module."""
@@ -0,0 +1,55 @@
1
+ """Infrastructure implementation of the fusion service."""
2
+
3
+ from collections import defaultdict
4
+
5
+ from kodit.domain.services.indexing_service import FusionService
6
+ from kodit.domain.value_objects import FusionRequest, FusionResult
7
+
8
+
9
+ class ReciprocalRankFusionService(FusionService):
10
+ """Infrastructure implementation of reciprocal rank fusion."""
11
+
12
+ def reciprocal_rank_fusion(
13
+ self, rankings: list[list[FusionRequest]], k: float = 60
14
+ ) -> list[FusionResult]:
15
+ """Perform reciprocal rank fusion on search results.
16
+
17
+ Args:
18
+ rankings: List of rankers, each containing a list of document ids.
19
+ Top of the list is considered to be the best result.
20
+ k: Parameter for RRF.
21
+
22
+ Returns:
23
+ List of fused results with scores.
24
+
25
+ """
26
+ scores = {}
27
+ for ranker in rankings:
28
+ for rank in ranker:
29
+ scores[rank.id] = float(0)
30
+
31
+ for ranker in rankings:
32
+ for i, rank in enumerate(ranker):
33
+ scores[rank.id] += 1.0 / (k + i)
34
+
35
+ # Create a list of tuples of ids and their scores
36
+ results = [(rank, scores[rank]) for rank in scores]
37
+
38
+ # Sort results by score
39
+ results.sort(key=lambda x: x[1], reverse=True)
40
+
41
+ # Create a map of original scores to ids
42
+ original_scores_to_ids = defaultdict(list)
43
+ for ranker in rankings:
44
+ for rank in ranker:
45
+ original_scores_to_ids[rank.id].append(rank.score)
46
+
47
+ # Rebuild a list of final results with their original scores
48
+ return [
49
+ FusionResult(
50
+ id=result[0],
51
+ score=result[1],
52
+ original_scores=original_scores_to_ids[result[0]],
53
+ )
54
+ for result in results
55
+ ]