crackerjack 0.38.15__py3-none-any.whl → 0.39.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crackerjack might be problematic. Click here for more details.
- crackerjack/__main__.py +134 -13
- crackerjack/agents/__init__.py +2 -0
- crackerjack/agents/base.py +1 -0
- crackerjack/agents/claude_code_bridge.py +319 -0
- crackerjack/agents/coordinator.py +6 -3
- crackerjack/agents/dry_agent.py +187 -3
- crackerjack/agents/enhanced_coordinator.py +279 -0
- crackerjack/agents/enhanced_proactive_agent.py +185 -0
- crackerjack/agents/performance_agent.py +324 -3
- crackerjack/agents/refactoring_agent.py +254 -5
- crackerjack/agents/semantic_agent.py +479 -0
- crackerjack/agents/semantic_helpers.py +356 -0
- crackerjack/cli/options.py +27 -0
- crackerjack/cli/semantic_handlers.py +290 -0
- crackerjack/core/async_workflow_orchestrator.py +9 -8
- crackerjack/core/enhanced_container.py +1 -1
- crackerjack/core/phase_coordinator.py +1 -1
- crackerjack/core/proactive_workflow.py +1 -1
- crackerjack/core/workflow_orchestrator.py +9 -6
- crackerjack/documentation/ai_templates.py +1 -1
- crackerjack/interactive.py +1 -1
- crackerjack/mcp/server_core.py +2 -0
- crackerjack/mcp/tools/__init__.py +2 -0
- crackerjack/mcp/tools/semantic_tools.py +584 -0
- crackerjack/models/semantic_models.py +271 -0
- crackerjack/plugins/loader.py +2 -2
- crackerjack/py313.py +4 -1
- crackerjack/services/embeddings.py +444 -0
- crackerjack/services/quality_intelligence.py +11 -1
- crackerjack/services/smart_scheduling.py +1 -1
- crackerjack/services/vector_store.py +681 -0
- crackerjack/slash_commands/run.md +84 -50
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/METADATA +7 -2
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/RECORD +37 -27
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/WHEEL +0 -0
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/entry_points.txt +0 -0
- {crackerjack-0.38.15.dist-info → crackerjack-0.39.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
"""Semantic search data models for crackerjack vector store functionality."""
|
|
2
|
+
|
|
3
|
+
import typing as t
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EmbeddingVector(BaseModel):
|
|
11
|
+
"""Represents a single embedding vector with metadata."""
|
|
12
|
+
|
|
13
|
+
file_path: Path = Field(..., description="Path to the source file")
|
|
14
|
+
chunk_id: str = Field(..., description="Unique identifier for this chunk")
|
|
15
|
+
content: str = Field(..., description="The text content that was embedded")
|
|
16
|
+
embedding: list[float] = Field(
|
|
17
|
+
..., description="The numerical vector representation"
|
|
18
|
+
)
|
|
19
|
+
created_at: datetime = Field(
|
|
20
|
+
default_factory=datetime.now, description="Creation timestamp"
|
|
21
|
+
)
|
|
22
|
+
file_hash: str = Field(
|
|
23
|
+
..., description="Hash of the source file for change detection"
|
|
24
|
+
)
|
|
25
|
+
start_line: int = Field(..., description="Starting line number in the source file")
|
|
26
|
+
end_line: int = Field(..., description="Ending line number in the source file")
|
|
27
|
+
file_type: str = Field(..., description="File extension or type identifier")
|
|
28
|
+
|
|
29
|
+
class Config:
|
|
30
|
+
"""Pydantic configuration."""
|
|
31
|
+
|
|
32
|
+
json_encoders = {
|
|
33
|
+
Path: str,
|
|
34
|
+
datetime: lambda v: v.isoformat(),
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class SearchResult(BaseModel):
|
|
39
|
+
"""Represents a semantic search result with similarity score."""
|
|
40
|
+
|
|
41
|
+
file_path: Path = Field(..., description="Path to the matching file")
|
|
42
|
+
chunk_id: str = Field(..., description="Identifier of the matching chunk")
|
|
43
|
+
content: str = Field(..., description="The matching text content")
|
|
44
|
+
similarity_score: float = Field(
|
|
45
|
+
..., ge=0.0, le=1.0, description="Similarity score (0-1)"
|
|
46
|
+
)
|
|
47
|
+
start_line: int = Field(..., description="Starting line number")
|
|
48
|
+
end_line: int = Field(..., description="Ending line number")
|
|
49
|
+
file_type: str = Field(..., description="File type identifier")
|
|
50
|
+
context_lines: list[str] = Field(
|
|
51
|
+
default_factory=list, description="Surrounding context lines"
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
class Config:
|
|
55
|
+
"""Pydantic configuration."""
|
|
56
|
+
|
|
57
|
+
json_encoders = {
|
|
58
|
+
Path: str,
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class IndexStats(BaseModel):
|
|
63
|
+
"""Statistics about the semantic index."""
|
|
64
|
+
|
|
65
|
+
total_files: int = Field(..., description="Total number of indexed files")
|
|
66
|
+
total_chunks: int = Field(..., description="Total number of text chunks")
|
|
67
|
+
index_size_mb: float = Field(..., description="Index size in megabytes")
|
|
68
|
+
last_updated: datetime = Field(..., description="Last index update timestamp")
|
|
69
|
+
file_types: dict[str, int] = Field(
|
|
70
|
+
default_factory=dict, description="Count by file type"
|
|
71
|
+
)
|
|
72
|
+
embedding_model: str = Field(..., description="Name of the embedding model used")
|
|
73
|
+
avg_chunk_size: float = Field(..., description="Average chunk size in characters")
|
|
74
|
+
|
|
75
|
+
class Config:
|
|
76
|
+
"""Pydantic configuration."""
|
|
77
|
+
|
|
78
|
+
json_encoders = {
|
|
79
|
+
datetime: lambda v: v.isoformat(),
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class SearchQuery(BaseModel):
|
|
84
|
+
"""Represents a semantic search query with parameters."""
|
|
85
|
+
|
|
86
|
+
query: str = Field(..., min_length=1, description="The search query text")
|
|
87
|
+
max_results: int = Field(
|
|
88
|
+
default=10, ge=1, le=100, description="Maximum number of results"
|
|
89
|
+
)
|
|
90
|
+
min_similarity: float = Field(
|
|
91
|
+
default=0.3, ge=0.0, le=1.0, description="Minimum similarity threshold"
|
|
92
|
+
)
|
|
93
|
+
file_types: list[str] = Field(
|
|
94
|
+
default_factory=list, description="Filter by file types"
|
|
95
|
+
)
|
|
96
|
+
include_context: bool = Field(
|
|
97
|
+
default=True, description="Include surrounding context lines"
|
|
98
|
+
)
|
|
99
|
+
context_lines: int = Field(
|
|
100
|
+
default=3, ge=0, le=10, description="Number of context lines"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
class Config:
|
|
104
|
+
"""Pydantic configuration."""
|
|
105
|
+
|
|
106
|
+
validate_assignment = True
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class IndexingProgress(BaseModel):
|
|
110
|
+
"""Progress information for indexing operations."""
|
|
111
|
+
|
|
112
|
+
current_file: Path = Field(..., description="Currently processing file")
|
|
113
|
+
files_processed: int = Field(..., ge=0, description="Number of files processed")
|
|
114
|
+
total_files: int = Field(..., ge=0, description="Total files to process")
|
|
115
|
+
chunks_created: int = Field(..., ge=0, description="Number of chunks created")
|
|
116
|
+
elapsed_time: float = Field(..., ge=0.0, description="Elapsed time in seconds")
|
|
117
|
+
estimated_remaining: float | None = Field(
|
|
118
|
+
default=None, description="Estimated remaining time in seconds"
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def progress_percentage(self) -> float:
|
|
123
|
+
"""Calculate progress as a percentage."""
|
|
124
|
+
if self.total_files == 0:
|
|
125
|
+
return 0.0
|
|
126
|
+
return min(100.0, (self.files_processed / self.total_files) * 100.0)
|
|
127
|
+
|
|
128
|
+
class Config:
|
|
129
|
+
"""Pydantic configuration."""
|
|
130
|
+
|
|
131
|
+
json_encoders = {
|
|
132
|
+
Path: str,
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class SemanticConfig(BaseModel):
|
|
137
|
+
"""Configuration for semantic search functionality."""
|
|
138
|
+
|
|
139
|
+
embedding_model: str = Field(
|
|
140
|
+
default="all-MiniLM-L6-v2", description="Sentence transformer model name"
|
|
141
|
+
)
|
|
142
|
+
chunk_size: int = Field(
|
|
143
|
+
default=500, ge=100, le=2000, description="Maximum characters per chunk"
|
|
144
|
+
)
|
|
145
|
+
chunk_overlap: int = Field(
|
|
146
|
+
default=50, ge=0, le=500, description="Overlap between chunks"
|
|
147
|
+
)
|
|
148
|
+
max_search_results: int = Field(
|
|
149
|
+
default=10, ge=1, le=100, description="Maximum number of search results"
|
|
150
|
+
)
|
|
151
|
+
similarity_threshold: float = Field(
|
|
152
|
+
default=0.7, ge=0.0, le=1.0, description="Minimum similarity threshold"
|
|
153
|
+
)
|
|
154
|
+
embedding_dimension: int = Field(
|
|
155
|
+
default=384, ge=128, le=1024, description="Embedding vector dimension"
|
|
156
|
+
)
|
|
157
|
+
max_file_size_mb: int = Field(
|
|
158
|
+
default=10, ge=1, le=100, description="Maximum file size to process"
|
|
159
|
+
)
|
|
160
|
+
excluded_patterns: list[str] = Field(
|
|
161
|
+
default_factory=lambda: [
|
|
162
|
+
"*.pyc",
|
|
163
|
+
"*.pyo",
|
|
164
|
+
"*.pyd",
|
|
165
|
+
"__pycache__/*",
|
|
166
|
+
".git/*",
|
|
167
|
+
".venv/*",
|
|
168
|
+
"*.log",
|
|
169
|
+
"*.tmp",
|
|
170
|
+
],
|
|
171
|
+
description="File patterns to exclude from indexing",
|
|
172
|
+
)
|
|
173
|
+
included_extensions: list[str] = Field(
|
|
174
|
+
default_factory=lambda: [
|
|
175
|
+
".py",
|
|
176
|
+
".md",
|
|
177
|
+
".txt",
|
|
178
|
+
".yml",
|
|
179
|
+
".yaml",
|
|
180
|
+
".json",
|
|
181
|
+
".toml",
|
|
182
|
+
".ini",
|
|
183
|
+
".cfg",
|
|
184
|
+
".sh",
|
|
185
|
+
".js",
|
|
186
|
+
".ts",
|
|
187
|
+
".html",
|
|
188
|
+
".css",
|
|
189
|
+
".sql",
|
|
190
|
+
],
|
|
191
|
+
description="File extensions to include in indexing",
|
|
192
|
+
)
|
|
193
|
+
cache_embeddings: bool = Field(default=True, description="Cache embeddings to disk")
|
|
194
|
+
cache_ttl_hours: int = Field(
|
|
195
|
+
default=24, ge=1, le=168, description="Cache time-to-live in hours"
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
class Config:
|
|
199
|
+
"""Pydantic configuration."""
|
|
200
|
+
|
|
201
|
+
validate_assignment = True
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class FileChangeEvent(BaseModel):
|
|
205
|
+
"""Represents a file system change event for incremental indexing."""
|
|
206
|
+
|
|
207
|
+
file_path: Path = Field(..., description="Path to the changed file")
|
|
208
|
+
event_type: t.Literal["created", "modified", "deleted"] = Field(
|
|
209
|
+
..., description="Type of change"
|
|
210
|
+
)
|
|
211
|
+
timestamp: datetime = Field(
|
|
212
|
+
default_factory=datetime.now, description="When the change occurred"
|
|
213
|
+
)
|
|
214
|
+
file_hash: str | None = Field(
|
|
215
|
+
default=None, description="New file hash if available"
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
class Config:
|
|
219
|
+
"""Pydantic configuration."""
|
|
220
|
+
|
|
221
|
+
json_encoders = {
|
|
222
|
+
Path: str,
|
|
223
|
+
datetime: lambda v: v.isoformat(),
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
class SemanticContext(BaseModel):
|
|
228
|
+
"""Context information for AI agents using semantic search."""
|
|
229
|
+
|
|
230
|
+
query: str = Field(..., description="The query that generated this context")
|
|
231
|
+
related_files: list[SearchResult] = Field(
|
|
232
|
+
..., description="Semantically related files"
|
|
233
|
+
)
|
|
234
|
+
patterns: list[str] = Field(
|
|
235
|
+
default_factory=list, description="Identified code patterns"
|
|
236
|
+
)
|
|
237
|
+
suggestions: list[str] = Field(
|
|
238
|
+
default_factory=list, description="AI-generated suggestions"
|
|
239
|
+
)
|
|
240
|
+
confidence: float = Field(
|
|
241
|
+
..., ge=0.0, le=1.0, description="Confidence in the context relevance"
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
class Config:
|
|
245
|
+
"""Pydantic configuration."""
|
|
246
|
+
|
|
247
|
+
json_encoders = {
|
|
248
|
+
Path: str,
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
# Type aliases for better code readability
|
|
253
|
+
EmbeddingMatrix = list[list[float]]
|
|
254
|
+
SimilarityMatrix = list[list[float]]
|
|
255
|
+
FilePathSet = set[Path]
|
|
256
|
+
ChunkMapping = dict[str, EmbeddingVector]
|
|
257
|
+
|
|
258
|
+
__all__ = [
|
|
259
|
+
"EmbeddingVector",
|
|
260
|
+
"SearchResult",
|
|
261
|
+
"IndexStats",
|
|
262
|
+
"SearchQuery",
|
|
263
|
+
"IndexingProgress",
|
|
264
|
+
"SemanticConfig",
|
|
265
|
+
"FileChangeEvent",
|
|
266
|
+
"SemanticContext",
|
|
267
|
+
"EmbeddingMatrix",
|
|
268
|
+
"SimilarityMatrix",
|
|
269
|
+
"FilePathSet",
|
|
270
|
+
"ChunkMapping",
|
|
271
|
+
]
|
crackerjack/plugins/loader.py
CHANGED
|
@@ -264,7 +264,7 @@ class PluginDiscovery:
|
|
|
264
264
|
if not directory.exists() or not directory.is_dir():
|
|
265
265
|
return []
|
|
266
266
|
|
|
267
|
-
plugin_files = []
|
|
267
|
+
plugin_files: list[Path] = []
|
|
268
268
|
|
|
269
269
|
patterns = ["*.py", "*.json", "*.yaml", "*.yml"]
|
|
270
270
|
|
|
@@ -277,7 +277,7 @@ class PluginDiscovery:
|
|
|
277
277
|
return [f for f in plugin_files if self._looks_like_plugin_file(f)]
|
|
278
278
|
|
|
279
279
|
def discover_in_project(self, project_path: Path) -> list[Path]:
|
|
280
|
-
plugin_files = []
|
|
280
|
+
plugin_files: list[Path] = []
|
|
281
281
|
|
|
282
282
|
plugin_dirs = [
|
|
283
283
|
project_path / "plugins",
|
crackerjack/py313.py
CHANGED
|
@@ -114,7 +114,10 @@ def process_hook_results[T: HookResult, R](
|
|
|
114
114
|
for result in results:
|
|
115
115
|
# Type checker knows T is HookResult, so no need for isinstance check
|
|
116
116
|
# But we keep it for runtime safety
|
|
117
|
-
if
|
|
117
|
+
if (
|
|
118
|
+
hasattr(result, "status")
|
|
119
|
+
and typing.cast(HookResult, result)["status"] == HookStatus.SUCCESS
|
|
120
|
+
):
|
|
118
121
|
processed_results.append(success_handler(result))
|
|
119
122
|
else:
|
|
120
123
|
processed_results.append(failure_handler(result))
|