crackerjack 0.38.15__py3-none-any.whl → 0.39.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crackerjack might be problematic. Click here for more details.

Files changed (37) hide show
  1. crackerjack/__main__.py +134 -13
  2. crackerjack/agents/__init__.py +2 -0
  3. crackerjack/agents/base.py +1 -0
  4. crackerjack/agents/claude_code_bridge.py +319 -0
  5. crackerjack/agents/coordinator.py +6 -3
  6. crackerjack/agents/dry_agent.py +187 -3
  7. crackerjack/agents/enhanced_coordinator.py +279 -0
  8. crackerjack/agents/enhanced_proactive_agent.py +185 -0
  9. crackerjack/agents/performance_agent.py +324 -3
  10. crackerjack/agents/refactoring_agent.py +254 -5
  11. crackerjack/agents/semantic_agent.py +479 -0
  12. crackerjack/agents/semantic_helpers.py +356 -0
  13. crackerjack/cli/options.py +27 -0
  14. crackerjack/cli/semantic_handlers.py +290 -0
  15. crackerjack/core/async_workflow_orchestrator.py +9 -8
  16. crackerjack/core/enhanced_container.py +1 -1
  17. crackerjack/core/phase_coordinator.py +1 -1
  18. crackerjack/core/proactive_workflow.py +1 -1
  19. crackerjack/core/workflow_orchestrator.py +9 -6
  20. crackerjack/documentation/ai_templates.py +1 -1
  21. crackerjack/interactive.py +1 -1
  22. crackerjack/mcp/server_core.py +2 -0
  23. crackerjack/mcp/tools/__init__.py +2 -0
  24. crackerjack/mcp/tools/semantic_tools.py +584 -0
  25. crackerjack/models/semantic_models.py +271 -0
  26. crackerjack/plugins/loader.py +2 -2
  27. crackerjack/py313.py +4 -1
  28. crackerjack/services/embeddings.py +444 -0
  29. crackerjack/services/quality_intelligence.py +11 -1
  30. crackerjack/services/smart_scheduling.py +1 -1
  31. crackerjack/services/vector_store.py +681 -0
  32. crackerjack/slash_commands/run.md +84 -50
  33. {crackerjack-0.38.15.dist-info → crackerjack-0.39.1.dist-info}/METADATA +7 -2
  34. {crackerjack-0.38.15.dist-info → crackerjack-0.39.1.dist-info}/RECORD +37 -27
  35. {crackerjack-0.38.15.dist-info → crackerjack-0.39.1.dist-info}/WHEEL +0 -0
  36. {crackerjack-0.38.15.dist-info → crackerjack-0.39.1.dist-info}/entry_points.txt +0 -0
  37. {crackerjack-0.38.15.dist-info → crackerjack-0.39.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,584 @@
1
+ """Semantic search and vector store MCP tools for AI agent integration."""
2
+
3
+ import json
4
+ import typing as t
5
+ from pathlib import Path
6
+
7
+ from crackerjack.models.semantic_models import SearchQuery, SemanticConfig
8
+ from crackerjack.services.embeddings import EmbeddingService
9
+ from crackerjack.services.input_validator import get_input_validator
10
+ from crackerjack.services.vector_store import VectorStore
11
+
12
+
13
+ def _get_persistent_db_path() -> Path:
14
+ """Get the path to the persistent semantic search database."""
15
+ db_path = Path.cwd() / ".crackerjack" / "semantic_index.db"
16
+ db_path.parent.mkdir(exist_ok=True)
17
+ return db_path
18
+
19
+
20
+ def register_semantic_tools(mcp_app: t.Any) -> None:
21
+ """Register all semantic search tools with the MCP server."""
22
+ _register_index_file_tool(mcp_app)
23
+ _register_search_semantic_tool(mcp_app)
24
+ _register_get_semantic_stats_tool(mcp_app)
25
+ _register_remove_file_from_index_tool(mcp_app)
26
+ _register_get_embeddings_tool(mcp_app)
27
+ _register_calculate_similarity_tool(mcp_app)
28
+
29
+
30
+ def _register_index_file_tool(mcp_app: t.Any) -> None:
31
+ @mcp_app.tool() # type: ignore[misc]
32
+ async def index_file_semantic(
33
+ file_path: str,
34
+ config_json: str = "",
35
+ ) -> str:
36
+ """Index a file for semantic search.
37
+
38
+ Args:
39
+ file_path: Path to the file to index
40
+ config_json: Optional JSON configuration for semantic search settings
41
+
42
+ Returns:
43
+ JSON string with indexing results
44
+ """
45
+ try:
46
+ validator = get_input_validator()
47
+
48
+ # Validate file path
49
+ path_result = validator.validate_file_path(file_path)
50
+ if not path_result.valid:
51
+ return json.dumps(
52
+ {
53
+ "success": False,
54
+ "error": f"Invalid file path: {path_result.error_message}",
55
+ "validation_type": path_result.validation_type,
56
+ }
57
+ )
58
+
59
+ file_path_obj = Path(path_result.sanitized_value or file_path)
60
+
61
+ # Parse configuration
62
+ config = _parse_semantic_config(config_json)
63
+ if isinstance(config, str): # Error occurred
64
+ return config
65
+
66
+ # Initialize vector store with persistent database
67
+ vector_store = VectorStore(config, db_path=_get_persistent_db_path())
68
+
69
+ # Index the file
70
+ embeddings = vector_store.index_file(file_path_obj)
71
+
72
+ return json.dumps(
73
+ {
74
+ "success": True,
75
+ "chunks_processed": len(embeddings),
76
+ "file_path": str(file_path_obj),
77
+ "embedding_dimension": config.embedding_dimension,
78
+ "message": f"Successfully indexed {len(embeddings)} chunks from {file_path_obj.name}",
79
+ }
80
+ )
81
+
82
+ except Exception as e:
83
+ return json.dumps(
84
+ {
85
+ "success": False,
86
+ "error": f"Failed to index file: {e}",
87
+ "file_path": file_path,
88
+ }
89
+ )
90
+
91
+
92
+ def _register_search_semantic_tool(mcp_app: t.Any) -> None:
93
+ @mcp_app.tool() # type: ignore[misc]
94
+ async def search_semantic(
95
+ query: str,
96
+ max_results: int = 10,
97
+ min_similarity: float = 0.7,
98
+ file_types: str = "",
99
+ config_json: str = "",
100
+ ) -> str:
101
+ """Perform semantic search across indexed files.
102
+
103
+ Args:
104
+ query: The search query text
105
+ max_results: Maximum number of results to return (1-100)
106
+ min_similarity: Minimum similarity threshold (0.0-1.0)
107
+ file_types: Comma-separated list of file types to filter by
108
+ config_json: Optional JSON configuration for semantic search settings
109
+
110
+ Returns:
111
+ JSON string with search results
112
+ """
113
+ try:
114
+ # Validate query
115
+ sanitized_query, query_error = _validate_search_query(query)
116
+ if query_error:
117
+ return _create_error_response(
118
+ query_error,
119
+ validation_type="query_validation",
120
+ )
121
+
122
+ # Validate parameters
123
+ param_error = _validate_search_parameters(max_results, min_similarity)
124
+ if param_error:
125
+ return _create_error_response(param_error)
126
+
127
+ # Parse file types and configuration
128
+ file_types_list = _parse_file_types(file_types)
129
+ config = _parse_semantic_config(config_json)
130
+ if isinstance(config, str): # Error occurred
131
+ return config
132
+
133
+ # Create search query and execute search
134
+ search_query = SearchQuery(
135
+ query=sanitized_query,
136
+ max_results=max_results,
137
+ min_similarity=min_similarity,
138
+ file_types=file_types_list,
139
+ )
140
+
141
+ vector_store = VectorStore(config, db_path=_get_persistent_db_path())
142
+ results = vector_store.search(search_query)
143
+
144
+ # Format and return results
145
+ response_data = _format_search_results(
146
+ results, sanitized_query, max_results, min_similarity
147
+ )
148
+ return json.dumps(response_data)
149
+
150
+ except Exception as e:
151
+ return _create_error_response(
152
+ f"Failed to perform semantic search: {e}",
153
+ query=query,
154
+ )
155
+
156
+
157
+ def _register_get_semantic_stats_tool(mcp_app: t.Any) -> None:
158
+ @mcp_app.tool() # type: ignore[misc]
159
+ async def get_semantic_stats(config_json: str = "") -> str:
160
+ """Get statistics about the semantic search index.
161
+
162
+ Args:
163
+ config_json: Optional JSON configuration for semantic search settings
164
+
165
+ Returns:
166
+ JSON string with index statistics
167
+ """
168
+ try:
169
+ # Parse configuration
170
+ config = _parse_semantic_config(config_json)
171
+ if isinstance(config, str): # Error occurred
172
+ return config
173
+
174
+ # Initialize vector store and get stats
175
+ vector_store = VectorStore(config, db_path=_get_persistent_db_path())
176
+ stats = vector_store.get_stats()
177
+
178
+ return json.dumps(
179
+ {
180
+ "success": True,
181
+ "total_files": stats.total_files,
182
+ "total_chunks": stats.total_chunks,
183
+ "index_size_mb": stats.index_size_mb,
184
+ "average_chunks_per_file": round(
185
+ stats.total_chunks / stats.total_files, 2
186
+ )
187
+ if stats.total_files > 0
188
+ else 0.0,
189
+ "embedding_model": config.embedding_model,
190
+ "embedding_dimension": config.embedding_dimension,
191
+ "last_updated": stats.last_updated.isoformat()
192
+ if stats.last_updated
193
+ else None,
194
+ }
195
+ )
196
+
197
+ except Exception as e:
198
+ return json.dumps(
199
+ {
200
+ "success": False,
201
+ "error": f"Failed to get semantic stats: {e}",
202
+ }
203
+ )
204
+
205
+
206
+ def _register_remove_file_from_index_tool(mcp_app: t.Any) -> None:
207
+ @mcp_app.tool() # type: ignore[misc]
208
+ async def remove_file_from_semantic_index(
209
+ file_path: str,
210
+ config_json: str = "",
211
+ ) -> str:
212
+ """Remove a file from the semantic search index.
213
+
214
+ Args:
215
+ file_path: Path to the file to remove
216
+ config_json: Optional JSON configuration for semantic search settings
217
+
218
+ Returns:
219
+ JSON string with removal results
220
+ """
221
+ try:
222
+ validator = get_input_validator()
223
+
224
+ # Validate file path
225
+ path_result = validator.validate_file_path(file_path)
226
+ if not path_result.valid:
227
+ return json.dumps(
228
+ {
229
+ "success": False,
230
+ "error": f"Invalid file path: {path_result.error_message}",
231
+ "validation_type": path_result.validation_type,
232
+ }
233
+ )
234
+
235
+ file_path_obj = Path(path_result.sanitized_value or file_path)
236
+
237
+ # Parse configuration
238
+ config = _parse_semantic_config(config_json)
239
+ if isinstance(config, str): # Error occurred
240
+ return config
241
+
242
+ # Initialize vector store and remove file
243
+ vector_store = VectorStore(config, db_path=_get_persistent_db_path())
244
+ success = vector_store.remove_file(file_path_obj)
245
+
246
+ return json.dumps(
247
+ {
248
+ "success": success,
249
+ "file_path": str(file_path_obj),
250
+ "message": f"{'Successfully removed' if success else 'Failed to remove'} {file_path_obj.name} from index",
251
+ }
252
+ )
253
+
254
+ except Exception as e:
255
+ return json.dumps(
256
+ {
257
+ "success": False,
258
+ "error": f"Failed to remove file: {e}",
259
+ "file_path": file_path,
260
+ }
261
+ )
262
+
263
+
264
+ def _register_get_embeddings_tool(mcp_app: t.Any) -> None:
265
+ @mcp_app.tool() # type: ignore[misc]
266
+ async def get_embeddings(
267
+ texts: str,
268
+ config_json: str = "",
269
+ ) -> str:
270
+ """Generate embeddings for given texts.
271
+
272
+ Args:
273
+ texts: JSON array of texts to generate embeddings for
274
+ config_json: Optional JSON configuration for semantic search settings
275
+
276
+ Returns:
277
+ JSON string with embeddings
278
+ """
279
+ try:
280
+ # Parse and validate input texts
281
+ texts_list, parse_error = _parse_texts_input(texts)
282
+ if parse_error:
283
+ return parse_error
284
+
285
+ # Parse configuration
286
+ config = _parse_semantic_config(config_json)
287
+ if isinstance(config, str): # Error occurred
288
+ return config
289
+
290
+ # Generate embeddings
291
+ embeddings = _generate_embeddings_for_texts(texts_list, config)
292
+
293
+ return _format_embeddings_response(texts_list, embeddings)
294
+
295
+ except Exception as e:
296
+ return json.dumps(
297
+ {
298
+ "success": False,
299
+ "error": f"Failed to generate embeddings: {e}",
300
+ }
301
+ )
302
+
303
+
304
+ def _register_calculate_similarity_tool(mcp_app: t.Any) -> None:
305
+ @mcp_app.tool() # type: ignore[misc]
306
+ async def calculate_similarity_semantic(
307
+ embedding1: str,
308
+ embedding2: str,
309
+ config_json: str = "",
310
+ ) -> str:
311
+ """Calculate cosine similarity between two embeddings.
312
+
313
+ Args:
314
+ embedding1: JSON array representing first embedding vector
315
+ embedding2: JSON array representing second embedding vector
316
+ config_json: Optional JSON configuration for semantic search settings
317
+
318
+ Returns:
319
+ JSON string with similarity score
320
+ """
321
+ try:
322
+ # Parse embeddings
323
+ try:
324
+ emb1 = json.loads(embedding1)
325
+ emb2 = json.loads(embedding2)
326
+
327
+ if not (isinstance(emb1, list) and isinstance(emb2, list)):
328
+ return json.dumps(
329
+ {
330
+ "success": False,
331
+ "error": "Both embeddings must be JSON arrays",
332
+ }
333
+ )
334
+ except json.JSONDecodeError as e:
335
+ return json.dumps(
336
+ {
337
+ "success": False,
338
+ "error": f"Invalid JSON for embeddings: {e}",
339
+ }
340
+ )
341
+
342
+ # Parse configuration
343
+ config = _parse_semantic_config(config_json)
344
+ if isinstance(config, str): # Error occurred
345
+ return config
346
+
347
+ # Calculate similarity
348
+ embedding_service = EmbeddingService(config)
349
+ similarity = embedding_service.calculate_similarity(emb1, emb2)
350
+
351
+ return json.dumps(
352
+ {
353
+ "success": True,
354
+ "similarity_score": round(similarity, 6),
355
+ "embedding1_dimension": len(emb1),
356
+ "embedding2_dimension": len(emb2),
357
+ }
358
+ )
359
+
360
+ except Exception as e:
361
+ return json.dumps(
362
+ {
363
+ "success": False,
364
+ "error": f"Failed to calculate similarity: {e}",
365
+ }
366
+ )
367
+
368
+
369
+ def _create_error_response(error: str, **kwargs) -> str:
370
+ """Create standardized error response JSON.
371
+
372
+ Args:
373
+ error: Error message
374
+ **kwargs: Additional fields to include in response
375
+
376
+ Returns:
377
+ JSON string with error response
378
+ """
379
+ response = {
380
+ "success": False,
381
+ "error": error,
382
+ }
383
+ response.update(kwargs)
384
+ return json.dumps(response)
385
+
386
+
387
+ def _validate_search_query(query: str) -> tuple[str, str | None]:
388
+ """Validate and sanitize search query.
389
+
390
+ Args:
391
+ query: Raw search query
392
+
393
+ Returns:
394
+ Tuple of (sanitized_query, error_message)
395
+ """
396
+ validator = get_input_validator()
397
+ query_result = validator.validate_command_args(query)
398
+
399
+ if not query_result.valid:
400
+ error_msg = f"Invalid query: {query_result.error_message}"
401
+ return "", error_msg
402
+
403
+ return query_result.sanitized_value or query, None
404
+
405
+
406
+ def _validate_search_parameters(max_results: int, min_similarity: float) -> str | None:
407
+ """Validate search parameters.
408
+
409
+ Args:
410
+ max_results: Maximum results to return
411
+ min_similarity: Minimum similarity threshold
412
+
413
+ Returns:
414
+ Error message if invalid, None if valid
415
+ """
416
+ if not (1 <= max_results <= 100):
417
+ return "max_results must be between 1 and 100"
418
+
419
+ if not (0.0 <= min_similarity <= 1.0):
420
+ return "min_similarity must be between 0.0 and 1.0"
421
+
422
+ return None
423
+
424
+
425
+ def _parse_file_types(file_types: str) -> list[str]:
426
+ """Parse comma-separated file types string.
427
+
428
+ Args:
429
+ file_types: Comma-separated file types
430
+
431
+ Returns:
432
+ List of file type strings
433
+ """
434
+ if not file_types.strip():
435
+ return []
436
+ return [ft.strip() for ft in file_types.split(",")]
437
+
438
+
439
+ def _format_search_results(
440
+ results: list,
441
+ sanitized_query: str,
442
+ max_results: int,
443
+ min_similarity: float,
444
+ ) -> dict:
445
+ """Format search results for JSON response.
446
+
447
+ Args:
448
+ results: Search results from vector store
449
+ sanitized_query: Sanitized search query
450
+ max_results: Maximum results requested
451
+ min_similarity: Minimum similarity threshold
452
+
453
+ Returns:
454
+ Dictionary with formatted results
455
+ """
456
+ formatted_results = [
457
+ {
458
+ "file_path": str(result.file_path),
459
+ "content": result.content,
460
+ "similarity_score": round(result.similarity_score, 4),
461
+ "start_line": result.start_line,
462
+ "end_line": result.end_line,
463
+ "file_type": result.file_type,
464
+ "chunk_id": result.chunk_id,
465
+ }
466
+ for result in results
467
+ ]
468
+
469
+ return {
470
+ "success": True,
471
+ "query": sanitized_query,
472
+ "results_count": len(results),
473
+ "max_results": max_results,
474
+ "min_similarity": min_similarity,
475
+ "results": formatted_results,
476
+ }
477
+
478
+
479
+ def _parse_texts_input(texts: str) -> tuple[list[str], str | None]:
480
+ """Parse and validate texts input.
481
+
482
+ Args:
483
+ texts: JSON string containing array of texts
484
+
485
+ Returns:
486
+ Tuple of (texts_list, error_message)
487
+ """
488
+ try:
489
+ texts_list = json.loads(texts)
490
+ if not isinstance(texts_list, list):
491
+ error = json.dumps(
492
+ {
493
+ "success": False,
494
+ "error": "texts must be a JSON array of strings",
495
+ }
496
+ )
497
+ return [], error
498
+ return texts_list, None
499
+ except json.JSONDecodeError as e:
500
+ error = json.dumps(
501
+ {
502
+ "success": False,
503
+ "error": f"Invalid JSON for texts: {e}",
504
+ }
505
+ )
506
+ return [], error
507
+
508
+
509
+ def _generate_embeddings_for_texts(
510
+ texts_list: list[str], config: SemanticConfig
511
+ ) -> list:
512
+ """Generate embeddings for a list of texts.
513
+
514
+ Args:
515
+ texts_list: List of texts to generate embeddings for
516
+ config: Semantic search configuration
517
+
518
+ Returns:
519
+ List of embedding vectors
520
+ """
521
+ embedding_service = EmbeddingService(config)
522
+
523
+ if len(texts_list) == 1:
524
+ return [embedding_service.generate_embedding(texts_list[0])]
525
+ return embedding_service.generate_embeddings_batch(texts_list)
526
+
527
+
528
+ def _format_embeddings_response(texts_list: list[str], embeddings: list) -> str:
529
+ """Format embeddings response as JSON.
530
+
531
+ Args:
532
+ texts_list: Original list of texts
533
+ embeddings: Generated embeddings
534
+
535
+ Returns:
536
+ JSON string with formatted response
537
+ """
538
+ return json.dumps(
539
+ {
540
+ "success": True,
541
+ "texts_count": len(texts_list),
542
+ "embedding_dimension": len(embeddings[0]) if embeddings else 0,
543
+ "embeddings": embeddings,
544
+ }
545
+ )
546
+
547
+
548
+ def _parse_semantic_config(config_json: str) -> SemanticConfig | str:
549
+ """Parse semantic configuration from JSON string.
550
+
551
+ Args:
552
+ config_json: JSON configuration string
553
+
554
+ Returns:
555
+ SemanticConfig object or error string
556
+ """
557
+ if not config_json.strip():
558
+ # Use default configuration
559
+ return SemanticConfig(
560
+ embedding_model="sentence-transformers/all-MiniLM-L6-v2",
561
+ chunk_size=512,
562
+ chunk_overlap=50,
563
+ max_search_results=10,
564
+ similarity_threshold=0.7,
565
+ embedding_dimension=384,
566
+ )
567
+
568
+ try:
569
+ config_dict = json.loads(config_json)
570
+ return SemanticConfig(**config_dict)
571
+ except json.JSONDecodeError as e:
572
+ return json.dumps(
573
+ {
574
+ "success": False,
575
+ "error": f"Invalid JSON configuration: {e}",
576
+ }
577
+ )
578
+ except Exception as e:
579
+ return json.dumps(
580
+ {
581
+ "success": False,
582
+ "error": f"Invalid configuration: {e}",
583
+ }
584
+ )