crackerjack 0.37.9__py3-none-any.whl → 0.45.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (425) hide show
  1. crackerjack/README.md +19 -0
  2. crackerjack/__init__.py +30 -1
  3. crackerjack/__main__.py +342 -1263
  4. crackerjack/adapters/README.md +18 -0
  5. crackerjack/adapters/__init__.py +27 -5
  6. crackerjack/adapters/_output_paths.py +167 -0
  7. crackerjack/adapters/_qa_adapter_base.py +309 -0
  8. crackerjack/adapters/_tool_adapter_base.py +706 -0
  9. crackerjack/adapters/ai/README.md +65 -0
  10. crackerjack/adapters/ai/__init__.py +5 -0
  11. crackerjack/adapters/ai/claude.py +853 -0
  12. crackerjack/adapters/complexity/README.md +53 -0
  13. crackerjack/adapters/complexity/__init__.py +10 -0
  14. crackerjack/adapters/complexity/complexipy.py +641 -0
  15. crackerjack/adapters/dependency/__init__.py +22 -0
  16. crackerjack/adapters/dependency/pip_audit.py +418 -0
  17. crackerjack/adapters/format/README.md +72 -0
  18. crackerjack/adapters/format/__init__.py +11 -0
  19. crackerjack/adapters/format/mdformat.py +313 -0
  20. crackerjack/adapters/format/ruff.py +516 -0
  21. crackerjack/adapters/lint/README.md +47 -0
  22. crackerjack/adapters/lint/__init__.py +11 -0
  23. crackerjack/adapters/lint/codespell.py +273 -0
  24. crackerjack/adapters/lsp/README.md +49 -0
  25. crackerjack/adapters/lsp/__init__.py +27 -0
  26. crackerjack/adapters/{rust_tool_manager.py → lsp/_manager.py} +3 -3
  27. crackerjack/adapters/{skylos_adapter.py → lsp/skylos.py} +59 -7
  28. crackerjack/adapters/{zuban_adapter.py → lsp/zuban.py} +3 -6
  29. crackerjack/adapters/refactor/README.md +59 -0
  30. crackerjack/adapters/refactor/__init__.py +12 -0
  31. crackerjack/adapters/refactor/creosote.py +318 -0
  32. crackerjack/adapters/refactor/refurb.py +406 -0
  33. crackerjack/adapters/refactor/skylos.py +494 -0
  34. crackerjack/adapters/sast/README.md +132 -0
  35. crackerjack/adapters/sast/__init__.py +32 -0
  36. crackerjack/adapters/sast/_base.py +201 -0
  37. crackerjack/adapters/sast/bandit.py +423 -0
  38. crackerjack/adapters/sast/pyscn.py +405 -0
  39. crackerjack/adapters/sast/semgrep.py +241 -0
  40. crackerjack/adapters/security/README.md +111 -0
  41. crackerjack/adapters/security/__init__.py +17 -0
  42. crackerjack/adapters/security/gitleaks.py +339 -0
  43. crackerjack/adapters/type/README.md +52 -0
  44. crackerjack/adapters/type/__init__.py +12 -0
  45. crackerjack/adapters/type/pyrefly.py +402 -0
  46. crackerjack/adapters/type/ty.py +402 -0
  47. crackerjack/adapters/type/zuban.py +522 -0
  48. crackerjack/adapters/utility/README.md +51 -0
  49. crackerjack/adapters/utility/__init__.py +10 -0
  50. crackerjack/adapters/utility/checks.py +884 -0
  51. crackerjack/agents/README.md +264 -0
  52. crackerjack/agents/__init__.py +40 -12
  53. crackerjack/agents/base.py +1 -0
  54. crackerjack/agents/claude_code_bridge.py +641 -0
  55. crackerjack/agents/coordinator.py +49 -53
  56. crackerjack/agents/dry_agent.py +187 -3
  57. crackerjack/agents/enhanced_coordinator.py +279 -0
  58. crackerjack/agents/enhanced_proactive_agent.py +185 -0
  59. crackerjack/agents/error_middleware.py +53 -0
  60. crackerjack/agents/formatting_agent.py +6 -8
  61. crackerjack/agents/helpers/__init__.py +9 -0
  62. crackerjack/agents/helpers/performance/__init__.py +22 -0
  63. crackerjack/agents/helpers/performance/performance_ast_analyzer.py +357 -0
  64. crackerjack/agents/helpers/performance/performance_pattern_detector.py +909 -0
  65. crackerjack/agents/helpers/performance/performance_recommender.py +572 -0
  66. crackerjack/agents/helpers/refactoring/__init__.py +22 -0
  67. crackerjack/agents/helpers/refactoring/code_transformer.py +536 -0
  68. crackerjack/agents/helpers/refactoring/complexity_analyzer.py +344 -0
  69. crackerjack/agents/helpers/refactoring/dead_code_detector.py +437 -0
  70. crackerjack/agents/helpers/test_creation/__init__.py +19 -0
  71. crackerjack/agents/helpers/test_creation/test_ast_analyzer.py +216 -0
  72. crackerjack/agents/helpers/test_creation/test_coverage_analyzer.py +643 -0
  73. crackerjack/agents/helpers/test_creation/test_template_generator.py +1031 -0
  74. crackerjack/agents/performance_agent.py +121 -1152
  75. crackerjack/agents/refactoring_agent.py +156 -655
  76. crackerjack/agents/semantic_agent.py +479 -0
  77. crackerjack/agents/semantic_helpers.py +356 -0
  78. crackerjack/agents/test_creation_agent.py +19 -1605
  79. crackerjack/api.py +5 -7
  80. crackerjack/cli/README.md +394 -0
  81. crackerjack/cli/__init__.py +1 -1
  82. crackerjack/cli/cache_handlers.py +23 -18
  83. crackerjack/cli/cache_handlers_enhanced.py +1 -4
  84. crackerjack/cli/facade.py +70 -8
  85. crackerjack/cli/formatting.py +13 -0
  86. crackerjack/cli/handlers/__init__.py +85 -0
  87. crackerjack/cli/handlers/advanced.py +103 -0
  88. crackerjack/cli/handlers/ai_features.py +62 -0
  89. crackerjack/cli/handlers/analytics.py +479 -0
  90. crackerjack/cli/handlers/changelog.py +271 -0
  91. crackerjack/cli/handlers/config_handlers.py +16 -0
  92. crackerjack/cli/handlers/coverage.py +84 -0
  93. crackerjack/cli/handlers/documentation.py +280 -0
  94. crackerjack/cli/handlers/main_handlers.py +497 -0
  95. crackerjack/cli/handlers/monitoring.py +371 -0
  96. crackerjack/cli/handlers.py +249 -49
  97. crackerjack/cli/interactive.py +8 -5
  98. crackerjack/cli/options.py +203 -110
  99. crackerjack/cli/semantic_handlers.py +292 -0
  100. crackerjack/cli/version.py +19 -0
  101. crackerjack/code_cleaner.py +60 -24
  102. crackerjack/config/README.md +472 -0
  103. crackerjack/config/__init__.py +256 -0
  104. crackerjack/config/global_lock_config.py +191 -54
  105. crackerjack/config/hooks.py +188 -16
  106. crackerjack/config/loader.py +239 -0
  107. crackerjack/config/settings.py +141 -0
  108. crackerjack/config/tool_commands.py +331 -0
  109. crackerjack/core/README.md +393 -0
  110. crackerjack/core/async_workflow_orchestrator.py +79 -53
  111. crackerjack/core/autofix_coordinator.py +22 -9
  112. crackerjack/core/container.py +10 -9
  113. crackerjack/core/enhanced_container.py +9 -9
  114. crackerjack/core/performance.py +1 -1
  115. crackerjack/core/performance_monitor.py +5 -3
  116. crackerjack/core/phase_coordinator.py +1018 -634
  117. crackerjack/core/proactive_workflow.py +3 -3
  118. crackerjack/core/retry.py +275 -0
  119. crackerjack/core/service_watchdog.py +167 -23
  120. crackerjack/core/session_coordinator.py +187 -382
  121. crackerjack/core/timeout_manager.py +161 -44
  122. crackerjack/core/workflow/__init__.py +21 -0
  123. crackerjack/core/workflow/workflow_ai_coordinator.py +863 -0
  124. crackerjack/core/workflow/workflow_event_orchestrator.py +1107 -0
  125. crackerjack/core/workflow/workflow_issue_parser.py +714 -0
  126. crackerjack/core/workflow/workflow_phase_executor.py +1158 -0
  127. crackerjack/core/workflow/workflow_security_gates.py +400 -0
  128. crackerjack/core/workflow_orchestrator.py +1247 -953
  129. crackerjack/data/README.md +11 -0
  130. crackerjack/data/__init__.py +8 -0
  131. crackerjack/data/models.py +79 -0
  132. crackerjack/data/repository.py +210 -0
  133. crackerjack/decorators/README.md +180 -0
  134. crackerjack/decorators/__init__.py +35 -0
  135. crackerjack/decorators/error_handling.py +649 -0
  136. crackerjack/decorators/error_handling_decorators.py +334 -0
  137. crackerjack/decorators/helpers.py +58 -0
  138. crackerjack/decorators/patterns.py +281 -0
  139. crackerjack/decorators/utils.py +58 -0
  140. crackerjack/docs/README.md +11 -0
  141. crackerjack/docs/generated/api/CLI_REFERENCE.md +1 -1
  142. crackerjack/documentation/README.md +11 -0
  143. crackerjack/documentation/ai_templates.py +1 -1
  144. crackerjack/documentation/dual_output_generator.py +11 -9
  145. crackerjack/documentation/reference_generator.py +104 -59
  146. crackerjack/dynamic_config.py +52 -61
  147. crackerjack/errors.py +1 -1
  148. crackerjack/events/README.md +11 -0
  149. crackerjack/events/__init__.py +16 -0
  150. crackerjack/events/telemetry.py +175 -0
  151. crackerjack/events/workflow_bus.py +346 -0
  152. crackerjack/exceptions/README.md +301 -0
  153. crackerjack/exceptions/__init__.py +5 -0
  154. crackerjack/exceptions/config.py +4 -0
  155. crackerjack/exceptions/tool_execution_error.py +245 -0
  156. crackerjack/executors/README.md +591 -0
  157. crackerjack/executors/__init__.py +2 -0
  158. crackerjack/executors/async_hook_executor.py +539 -77
  159. crackerjack/executors/cached_hook_executor.py +3 -3
  160. crackerjack/executors/hook_executor.py +967 -102
  161. crackerjack/executors/hook_lock_manager.py +31 -22
  162. crackerjack/executors/individual_hook_executor.py +66 -32
  163. crackerjack/executors/lsp_aware_hook_executor.py +136 -57
  164. crackerjack/executors/progress_hook_executor.py +282 -0
  165. crackerjack/executors/tool_proxy.py +23 -7
  166. crackerjack/hooks/README.md +485 -0
  167. crackerjack/hooks/lsp_hook.py +8 -9
  168. crackerjack/intelligence/README.md +557 -0
  169. crackerjack/interactive.py +37 -10
  170. crackerjack/managers/README.md +369 -0
  171. crackerjack/managers/async_hook_manager.py +41 -57
  172. crackerjack/managers/hook_manager.py +449 -79
  173. crackerjack/managers/publish_manager.py +81 -36
  174. crackerjack/managers/test_command_builder.py +290 -12
  175. crackerjack/managers/test_executor.py +93 -8
  176. crackerjack/managers/test_manager.py +1082 -75
  177. crackerjack/managers/test_progress.py +118 -26
  178. crackerjack/mcp/README.md +374 -0
  179. crackerjack/mcp/cache.py +25 -2
  180. crackerjack/mcp/client_runner.py +35 -18
  181. crackerjack/mcp/context.py +9 -9
  182. crackerjack/mcp/dashboard.py +24 -8
  183. crackerjack/mcp/enhanced_progress_monitor.py +34 -23
  184. crackerjack/mcp/file_monitor.py +27 -6
  185. crackerjack/mcp/progress_components.py +45 -34
  186. crackerjack/mcp/progress_monitor.py +6 -9
  187. crackerjack/mcp/rate_limiter.py +11 -7
  188. crackerjack/mcp/server.py +2 -0
  189. crackerjack/mcp/server_core.py +187 -55
  190. crackerjack/mcp/service_watchdog.py +12 -9
  191. crackerjack/mcp/task_manager.py +2 -2
  192. crackerjack/mcp/tools/README.md +27 -0
  193. crackerjack/mcp/tools/__init__.py +2 -0
  194. crackerjack/mcp/tools/core_tools.py +75 -52
  195. crackerjack/mcp/tools/execution_tools.py +87 -31
  196. crackerjack/mcp/tools/intelligence_tools.py +2 -2
  197. crackerjack/mcp/tools/proactive_tools.py +1 -1
  198. crackerjack/mcp/tools/semantic_tools.py +584 -0
  199. crackerjack/mcp/tools/utility_tools.py +180 -132
  200. crackerjack/mcp/tools/workflow_executor.py +87 -46
  201. crackerjack/mcp/websocket/README.md +31 -0
  202. crackerjack/mcp/websocket/app.py +11 -1
  203. crackerjack/mcp/websocket/event_bridge.py +188 -0
  204. crackerjack/mcp/websocket/jobs.py +27 -4
  205. crackerjack/mcp/websocket/monitoring/__init__.py +25 -0
  206. crackerjack/mcp/websocket/monitoring/api/__init__.py +19 -0
  207. crackerjack/mcp/websocket/monitoring/api/dependencies.py +141 -0
  208. crackerjack/mcp/websocket/monitoring/api/heatmap.py +154 -0
  209. crackerjack/mcp/websocket/monitoring/api/intelligence.py +199 -0
  210. crackerjack/mcp/websocket/monitoring/api/metrics.py +203 -0
  211. crackerjack/mcp/websocket/monitoring/api/telemetry.py +101 -0
  212. crackerjack/mcp/websocket/monitoring/dashboard.py +18 -0
  213. crackerjack/mcp/websocket/monitoring/factory.py +109 -0
  214. crackerjack/mcp/websocket/monitoring/filters.py +10 -0
  215. crackerjack/mcp/websocket/monitoring/metrics.py +64 -0
  216. crackerjack/mcp/websocket/monitoring/models.py +90 -0
  217. crackerjack/mcp/websocket/monitoring/utils.py +171 -0
  218. crackerjack/mcp/websocket/monitoring/websocket_manager.py +78 -0
  219. crackerjack/mcp/websocket/monitoring/websockets/__init__.py +17 -0
  220. crackerjack/mcp/websocket/monitoring/websockets/dependencies.py +126 -0
  221. crackerjack/mcp/websocket/monitoring/websockets/heatmap.py +176 -0
  222. crackerjack/mcp/websocket/monitoring/websockets/intelligence.py +291 -0
  223. crackerjack/mcp/websocket/monitoring/websockets/metrics.py +291 -0
  224. crackerjack/mcp/websocket/monitoring_endpoints.py +16 -2930
  225. crackerjack/mcp/websocket/server.py +1 -3
  226. crackerjack/mcp/websocket/websocket_handler.py +107 -6
  227. crackerjack/models/README.md +308 -0
  228. crackerjack/models/__init__.py +10 -1
  229. crackerjack/models/config.py +639 -22
  230. crackerjack/models/config_adapter.py +6 -6
  231. crackerjack/models/protocols.py +1167 -23
  232. crackerjack/models/pydantic_models.py +320 -0
  233. crackerjack/models/qa_config.py +145 -0
  234. crackerjack/models/qa_results.py +134 -0
  235. crackerjack/models/results.py +35 -0
  236. crackerjack/models/semantic_models.py +258 -0
  237. crackerjack/models/task.py +19 -3
  238. crackerjack/models/test_models.py +60 -0
  239. crackerjack/monitoring/README.md +11 -0
  240. crackerjack/monitoring/ai_agent_watchdog.py +5 -4
  241. crackerjack/monitoring/metrics_collector.py +4 -3
  242. crackerjack/monitoring/regression_prevention.py +4 -3
  243. crackerjack/monitoring/websocket_server.py +4 -241
  244. crackerjack/orchestration/README.md +340 -0
  245. crackerjack/orchestration/__init__.py +43 -0
  246. crackerjack/orchestration/advanced_orchestrator.py +20 -67
  247. crackerjack/orchestration/cache/README.md +312 -0
  248. crackerjack/orchestration/cache/__init__.py +37 -0
  249. crackerjack/orchestration/cache/memory_cache.py +338 -0
  250. crackerjack/orchestration/cache/tool_proxy_cache.py +340 -0
  251. crackerjack/orchestration/config.py +297 -0
  252. crackerjack/orchestration/coverage_improvement.py +13 -6
  253. crackerjack/orchestration/execution_strategies.py +6 -6
  254. crackerjack/orchestration/hook_orchestrator.py +1398 -0
  255. crackerjack/orchestration/strategies/README.md +401 -0
  256. crackerjack/orchestration/strategies/__init__.py +39 -0
  257. crackerjack/orchestration/strategies/adaptive_strategy.py +630 -0
  258. crackerjack/orchestration/strategies/parallel_strategy.py +237 -0
  259. crackerjack/orchestration/strategies/sequential_strategy.py +299 -0
  260. crackerjack/orchestration/test_progress_streamer.py +1 -1
  261. crackerjack/plugins/README.md +11 -0
  262. crackerjack/plugins/hooks.py +3 -2
  263. crackerjack/plugins/loader.py +3 -3
  264. crackerjack/plugins/managers.py +1 -1
  265. crackerjack/py313.py +191 -0
  266. crackerjack/security/README.md +11 -0
  267. crackerjack/services/README.md +374 -0
  268. crackerjack/services/__init__.py +8 -21
  269. crackerjack/services/ai/README.md +295 -0
  270. crackerjack/services/ai/__init__.py +7 -0
  271. crackerjack/services/ai/advanced_optimizer.py +878 -0
  272. crackerjack/services/{contextual_ai_assistant.py → ai/contextual_ai_assistant.py} +5 -3
  273. crackerjack/services/ai/embeddings.py +444 -0
  274. crackerjack/services/ai/intelligent_commit.py +328 -0
  275. crackerjack/services/ai/predictive_analytics.py +510 -0
  276. crackerjack/services/api_extractor.py +5 -3
  277. crackerjack/services/bounded_status_operations.py +45 -5
  278. crackerjack/services/cache.py +249 -318
  279. crackerjack/services/changelog_automation.py +7 -3
  280. crackerjack/services/command_execution_service.py +305 -0
  281. crackerjack/services/config_integrity.py +83 -39
  282. crackerjack/services/config_merge.py +9 -6
  283. crackerjack/services/config_service.py +198 -0
  284. crackerjack/services/config_template.py +13 -26
  285. crackerjack/services/coverage_badge_service.py +6 -4
  286. crackerjack/services/coverage_ratchet.py +53 -27
  287. crackerjack/services/debug.py +18 -7
  288. crackerjack/services/dependency_analyzer.py +4 -4
  289. crackerjack/services/dependency_monitor.py +13 -13
  290. crackerjack/services/documentation_generator.py +4 -2
  291. crackerjack/services/documentation_service.py +62 -33
  292. crackerjack/services/enhanced_filesystem.py +81 -27
  293. crackerjack/services/enterprise_optimizer.py +1 -1
  294. crackerjack/services/error_pattern_analyzer.py +10 -10
  295. crackerjack/services/file_filter.py +221 -0
  296. crackerjack/services/file_hasher.py +5 -7
  297. crackerjack/services/file_io_service.py +361 -0
  298. crackerjack/services/file_modifier.py +615 -0
  299. crackerjack/services/filesystem.py +80 -109
  300. crackerjack/services/git.py +99 -5
  301. crackerjack/services/health_metrics.py +4 -6
  302. crackerjack/services/heatmap_generator.py +12 -3
  303. crackerjack/services/incremental_executor.py +380 -0
  304. crackerjack/services/initialization.py +101 -49
  305. crackerjack/services/log_manager.py +2 -2
  306. crackerjack/services/logging.py +120 -68
  307. crackerjack/services/lsp_client.py +12 -12
  308. crackerjack/services/memory_optimizer.py +27 -22
  309. crackerjack/services/monitoring/README.md +30 -0
  310. crackerjack/services/monitoring/__init__.py +9 -0
  311. crackerjack/services/monitoring/dependency_monitor.py +678 -0
  312. crackerjack/services/monitoring/error_pattern_analyzer.py +676 -0
  313. crackerjack/services/monitoring/health_metrics.py +716 -0
  314. crackerjack/services/monitoring/metrics.py +587 -0
  315. crackerjack/services/{performance_benchmarks.py → monitoring/performance_benchmarks.py} +100 -14
  316. crackerjack/services/{performance_cache.py → monitoring/performance_cache.py} +21 -15
  317. crackerjack/services/{performance_monitor.py → monitoring/performance_monitor.py} +10 -6
  318. crackerjack/services/parallel_executor.py +166 -55
  319. crackerjack/services/patterns/__init__.py +142 -0
  320. crackerjack/services/patterns/agents.py +107 -0
  321. crackerjack/services/patterns/code/__init__.py +15 -0
  322. crackerjack/services/patterns/code/detection.py +118 -0
  323. crackerjack/services/patterns/code/imports.py +107 -0
  324. crackerjack/services/patterns/code/paths.py +159 -0
  325. crackerjack/services/patterns/code/performance.py +119 -0
  326. crackerjack/services/patterns/code/replacement.py +36 -0
  327. crackerjack/services/patterns/core.py +212 -0
  328. crackerjack/services/patterns/documentation/__init__.py +14 -0
  329. crackerjack/services/patterns/documentation/badges_markdown.py +96 -0
  330. crackerjack/services/patterns/documentation/comments_blocks.py +83 -0
  331. crackerjack/services/patterns/documentation/docstrings.py +89 -0
  332. crackerjack/services/patterns/formatting.py +226 -0
  333. crackerjack/services/patterns/operations.py +339 -0
  334. crackerjack/services/patterns/security/__init__.py +23 -0
  335. crackerjack/services/patterns/security/code_injection.py +122 -0
  336. crackerjack/services/patterns/security/credentials.py +190 -0
  337. crackerjack/services/patterns/security/path_traversal.py +221 -0
  338. crackerjack/services/patterns/security/unsafe_operations.py +216 -0
  339. crackerjack/services/patterns/templates.py +62 -0
  340. crackerjack/services/patterns/testing/__init__.py +18 -0
  341. crackerjack/services/patterns/testing/error_patterns.py +107 -0
  342. crackerjack/services/patterns/testing/pytest_output.py +126 -0
  343. crackerjack/services/patterns/tool_output/__init__.py +16 -0
  344. crackerjack/services/patterns/tool_output/bandit.py +72 -0
  345. crackerjack/services/patterns/tool_output/other.py +97 -0
  346. crackerjack/services/patterns/tool_output/pyright.py +67 -0
  347. crackerjack/services/patterns/tool_output/ruff.py +44 -0
  348. crackerjack/services/patterns/url_sanitization.py +114 -0
  349. crackerjack/services/patterns/utilities.py +42 -0
  350. crackerjack/services/patterns/utils.py +339 -0
  351. crackerjack/services/patterns/validation.py +46 -0
  352. crackerjack/services/patterns/versioning.py +62 -0
  353. crackerjack/services/predictive_analytics.py +21 -8
  354. crackerjack/services/profiler.py +280 -0
  355. crackerjack/services/quality/README.md +415 -0
  356. crackerjack/services/quality/__init__.py +11 -0
  357. crackerjack/services/quality/anomaly_detector.py +392 -0
  358. crackerjack/services/quality/pattern_cache.py +333 -0
  359. crackerjack/services/quality/pattern_detector.py +479 -0
  360. crackerjack/services/quality/qa_orchestrator.py +491 -0
  361. crackerjack/services/{quality_baseline.py → quality/quality_baseline.py} +163 -2
  362. crackerjack/services/{quality_baseline_enhanced.py → quality/quality_baseline_enhanced.py} +4 -1
  363. crackerjack/services/{quality_intelligence.py → quality/quality_intelligence.py} +180 -16
  364. crackerjack/services/regex_patterns.py +58 -2987
  365. crackerjack/services/regex_utils.py +55 -29
  366. crackerjack/services/secure_status_formatter.py +42 -15
  367. crackerjack/services/secure_subprocess.py +35 -2
  368. crackerjack/services/security.py +16 -8
  369. crackerjack/services/server_manager.py +40 -51
  370. crackerjack/services/smart_scheduling.py +46 -6
  371. crackerjack/services/status_authentication.py +3 -3
  372. crackerjack/services/thread_safe_status_collector.py +1 -0
  373. crackerjack/services/tool_filter.py +368 -0
  374. crackerjack/services/tool_version_service.py +9 -5
  375. crackerjack/services/unified_config.py +43 -351
  376. crackerjack/services/vector_store.py +689 -0
  377. crackerjack/services/version_analyzer.py +6 -4
  378. crackerjack/services/version_checker.py +14 -8
  379. crackerjack/services/zuban_lsp_service.py +5 -4
  380. crackerjack/slash_commands/README.md +11 -0
  381. crackerjack/slash_commands/init.md +2 -12
  382. crackerjack/slash_commands/run.md +84 -50
  383. crackerjack/tools/README.md +11 -0
  384. crackerjack/tools/__init__.py +30 -0
  385. crackerjack/tools/_git_utils.py +105 -0
  386. crackerjack/tools/check_added_large_files.py +139 -0
  387. crackerjack/tools/check_ast.py +105 -0
  388. crackerjack/tools/check_json.py +103 -0
  389. crackerjack/tools/check_jsonschema.py +297 -0
  390. crackerjack/tools/check_toml.py +103 -0
  391. crackerjack/tools/check_yaml.py +110 -0
  392. crackerjack/tools/codespell_wrapper.py +72 -0
  393. crackerjack/tools/end_of_file_fixer.py +202 -0
  394. crackerjack/tools/format_json.py +128 -0
  395. crackerjack/tools/mdformat_wrapper.py +114 -0
  396. crackerjack/tools/trailing_whitespace.py +198 -0
  397. crackerjack/tools/validate_regex_patterns.py +7 -3
  398. crackerjack/ui/README.md +11 -0
  399. crackerjack/ui/dashboard_renderer.py +28 -0
  400. crackerjack/ui/templates/README.md +11 -0
  401. crackerjack/utils/console_utils.py +13 -0
  402. crackerjack/utils/dependency_guard.py +230 -0
  403. crackerjack/utils/retry_utils.py +275 -0
  404. crackerjack/workflows/README.md +590 -0
  405. crackerjack/workflows/__init__.py +46 -0
  406. crackerjack/workflows/actions.py +811 -0
  407. crackerjack/workflows/auto_fix.py +444 -0
  408. crackerjack/workflows/container_builder.py +499 -0
  409. crackerjack/workflows/definitions.py +443 -0
  410. crackerjack/workflows/engine.py +177 -0
  411. crackerjack/workflows/event_bridge.py +242 -0
  412. {crackerjack-0.37.9.dist-info → crackerjack-0.45.2.dist-info}/METADATA +678 -98
  413. crackerjack-0.45.2.dist-info/RECORD +478 -0
  414. {crackerjack-0.37.9.dist-info → crackerjack-0.45.2.dist-info}/WHEEL +1 -1
  415. crackerjack/managers/test_manager_backup.py +0 -1075
  416. crackerjack/mcp/tools/execution_tools_backup.py +0 -1011
  417. crackerjack/mixins/__init__.py +0 -3
  418. crackerjack/mixins/error_handling.py +0 -145
  419. crackerjack/services/config.py +0 -358
  420. crackerjack/ui/server_panels.py +0 -125
  421. crackerjack-0.37.9.dist-info/RECORD +0 -231
  422. /crackerjack/adapters/{rust_tool_adapter.py → lsp/_base.py} +0 -0
  423. /crackerjack/adapters/{lsp_client.py → lsp/_client.py} +0 -0
  424. {crackerjack-0.37.9.dist-info → crackerjack-0.45.2.dist-info}/entry_points.txt +0 -0
  425. {crackerjack-0.37.9.dist-info → crackerjack-0.45.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,689 @@
1
+ """Core vector store service for semantic search functionality."""
2
+
3
+ import json
4
+ import logging
5
+ import sqlite3
6
+ import tempfile
7
+ import typing as t
8
+ from contextlib import contextmanager
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+
12
+ from crackerjack.services.ai.embeddings import EmbeddingService
13
+
14
+ from ..models.semantic_models import (
15
+ EmbeddingVector,
16
+ IndexingProgress,
17
+ IndexStats,
18
+ SearchQuery,
19
+ SearchResult,
20
+ SemanticConfig,
21
+ )
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class VectorStore:
27
+ """Core vector store for managing embeddings and semantic search."""
28
+
29
+ def __init__(
30
+ self,
31
+ config: SemanticConfig,
32
+ db_path: Path | None = None,
33
+ embedding_service: EmbeddingService | None = None,
34
+ ) -> None:
35
+ """Initialize the vector store.
36
+
37
+ Args:
38
+ config: Semantic search configuration
39
+ db_path: Optional path to SQLite database (uses temp file if None)
40
+ embedding_service: Optional embedding service (creates new if None)
41
+ """
42
+ self.config = config
43
+ self.embedding_service = embedding_service or EmbeddingService(config)
44
+
45
+ # Database setup
46
+ self._temp_db: tempfile._TemporaryFileWrapper[bytes] | None = None
47
+ if db_path is None:
48
+ # Create temporary database file
49
+ self._temp_db = tempfile.NamedTemporaryFile(
50
+ suffix=".db", delete=False, prefix="crackerjack_vectors_"
51
+ )
52
+ self.db_path = Path(self._temp_db.name)
53
+ else:
54
+ self.db_path = db_path
55
+
56
+ self._initialize_database()
57
+
58
+ def _initialize_database(self) -> None:
59
+ """Initialize SQLite database with required tables."""
60
+ with self._get_connection() as conn:
61
+ # Create embeddings table
62
+ conn.execute("""
63
+ CREATE TABLE IF NOT EXISTS embeddings (
64
+ chunk_id TEXT PRIMARY KEY,
65
+ file_path TEXT NOT NULL,
66
+ content TEXT NOT NULL,
67
+ embedding BLOB NOT NULL,
68
+ created_at TEXT NOT NULL,
69
+ file_hash TEXT NOT NULL,
70
+ start_line INTEGER NOT NULL,
71
+ end_line INTEGER NOT NULL,
72
+ file_type TEXT NOT NULL
73
+ )
74
+ """)
75
+
76
+ # Create indexes for performance
77
+ conn.execute("""
78
+ CREATE INDEX IF NOT EXISTS idx_file_path ON embeddings(file_path)
79
+ """)
80
+ conn.execute("""
81
+ CREATE INDEX IF NOT EXISTS idx_file_hash ON embeddings(file_hash)
82
+ """)
83
+ conn.execute("""
84
+ CREATE INDEX IF NOT EXISTS idx_file_type ON embeddings(file_type)
85
+ """)
86
+
87
+ # Create file tracking table
88
+ conn.execute("""
89
+ CREATE TABLE IF NOT EXISTS file_tracking (
90
+ file_path TEXT PRIMARY KEY,
91
+ file_hash TEXT NOT NULL,
92
+ last_indexed TEXT NOT NULL,
93
+ chunk_count INTEGER NOT NULL DEFAULT 0
94
+ )
95
+ """)
96
+
97
+ conn.commit()
98
+
99
+ @contextmanager
100
+ def _get_connection(self) -> t.Iterator[sqlite3.Connection]:
101
+ """Get a database connection with proper error handling."""
102
+ conn = None
103
+ try:
104
+ conn = sqlite3.connect(self.db_path)
105
+ conn.row_factory = sqlite3.Row
106
+ yield conn
107
+ except Exception as e:
108
+ if conn:
109
+ conn.rollback()
110
+ logger.error(f"Database error: {e}")
111
+ raise
112
+ finally:
113
+ if conn:
114
+ conn.close()
115
+
116
+ def index_file(
117
+ self,
118
+ file_path: Path,
119
+ progress_callback: t.Callable[[IndexingProgress], None] | None = None,
120
+ ) -> list[EmbeddingVector]:
121
+ """Index a single file and return created embeddings.
122
+
123
+ Args:
124
+ file_path: Path to file to index
125
+ progress_callback: Optional callback for progress updates
126
+
127
+ Returns:
128
+ List of embedding vectors created for the file
129
+
130
+ Raises:
131
+ OSError: If file cannot be read
132
+ ValueError: If file is too large or has unsupported extension
133
+ """
134
+ # Validate file and check if reindexing is needed
135
+ current_hash = self._prepare_file_for_indexing(file_path)
136
+ if current_hash is None: # File up to date
137
+ return self._get_existing_embeddings(file_path)
138
+
139
+ logger.info(f"Indexing file: {file_path}")
140
+
141
+ try:
142
+ # Process file content into chunks and metadata
143
+ chunk_data = self._process_file_content(file_path, current_hash)
144
+ if not chunk_data["chunks"]:
145
+ logger.warning(f"No chunks generated for file: {file_path}")
146
+ return []
147
+
148
+ # Generate embeddings and create vector objects
149
+ embeddings = self._create_embedding_vectors(
150
+ file_path, current_hash, chunk_data, progress_callback
151
+ )
152
+
153
+ # Store results and update tracking
154
+ self._store_embeddings(embeddings)
155
+ self._update_file_tracking(file_path, current_hash, len(embeddings))
156
+
157
+ logger.info(
158
+ f"Successfully indexed {len(embeddings)} chunks from {file_path}"
159
+ )
160
+ return embeddings
161
+
162
+ except Exception as e:
163
+ logger.error(f"Failed to index file {file_path}: {e}")
164
+ raise
165
+
166
+ def _prepare_file_for_indexing(self, file_path: Path) -> str | None:
167
+ """Prepare file for indexing and return hash if reindexing needed.
168
+
169
+ Args:
170
+ file_path: Path to prepare for indexing
171
+
172
+ Returns:
173
+ File hash if reindexing needed, None if file is up to date
174
+ """
175
+ self._validate_file_for_indexing(file_path)
176
+
177
+ current_hash = self.embedding_service.get_file_hash(file_path)
178
+ if not self._needs_reindexing(file_path, current_hash):
179
+ logger.debug(f"File {file_path} is up to date, skipping")
180
+ return None
181
+
182
+ return current_hash
183
+
184
+ def _process_file_content(
185
+ self, file_path: Path, current_hash: str
186
+ ) -> dict[str, t.Any]:
187
+ """Process file content into chunks and metadata.
188
+
189
+ Args:
190
+ file_path: Path to process
191
+ current_hash: File hash for chunk IDs
192
+
193
+ Returns:
194
+ Dictionary with chunks, texts, and metadata
195
+ """
196
+ content = file_path.read_text(encoding="utf-8")
197
+ chunks = self.embedding_service.chunk_text(content)
198
+
199
+ chunk_texts = []
200
+ chunk_metadata = []
201
+
202
+ for i, chunk_content in enumerate(chunks):
203
+ chunk_id = f"{file_path.stem}_{current_hash[:8]}_{i}"
204
+ start_line = i * (self.config.chunk_size // 50) + 1 # Rough estimate
205
+ end_line = start_line + (len(chunk_content.split("\n")) - 1)
206
+
207
+ chunk_texts.append(chunk_content)
208
+ chunk_metadata.append(
209
+ {
210
+ "chunk_id": chunk_id,
211
+ "start_line": start_line,
212
+ "end_line": end_line,
213
+ }
214
+ )
215
+
216
+ return {
217
+ "chunks": chunks,
218
+ "chunk_texts": chunk_texts,
219
+ "chunk_metadata": chunk_metadata,
220
+ }
221
+
222
+ def _create_embedding_vectors(
223
+ self,
224
+ file_path: Path,
225
+ current_hash: str,
226
+ chunk_data: dict[str, t.Any],
227
+ progress_callback: t.Callable[[IndexingProgress], None] | None = None,
228
+ ) -> list[EmbeddingVector]:
229
+ """Create embedding vectors from chunk data.
230
+
231
+ Args:
232
+ file_path: Path being indexed
233
+ current_hash: File hash
234
+ chunk_data: Processed chunk data
235
+ progress_callback: Optional progress callback
236
+
237
+ Returns:
238
+ List of embedding vectors
239
+ """
240
+ chunk_texts = chunk_data["chunk_texts"]
241
+ chunk_metadata = chunk_data["chunk_metadata"]
242
+
243
+ # Generate embeddings in batch for efficiency
244
+ embedding_vectors = self.embedding_service.generate_embeddings_batch(
245
+ chunk_texts
246
+ )
247
+
248
+ embeddings = []
249
+ for i, (embedding_vector, metadata) in enumerate(
250
+ zip(embedding_vectors, chunk_metadata)
251
+ ):
252
+ if not embedding_vector: # Skip empty embeddings
253
+ continue
254
+
255
+ # Progress callback
256
+ if progress_callback:
257
+ progress = IndexingProgress(
258
+ current_file=file_path,
259
+ files_processed=0,
260
+ total_files=1,
261
+ chunks_created=i,
262
+ elapsed_time=0.0,
263
+ )
264
+ progress_callback(progress)
265
+
266
+ embedding = EmbeddingVector(
267
+ file_path=file_path,
268
+ chunk_id=metadata["chunk_id"],
269
+ content=chunk_texts[i],
270
+ embedding=embedding_vector,
271
+ created_at=datetime.now(),
272
+ file_hash=current_hash,
273
+ start_line=metadata["start_line"],
274
+ end_line=metadata["end_line"],
275
+ file_type=file_path.suffix,
276
+ )
277
+ embeddings.append(embedding)
278
+
279
+ return embeddings
280
+
281
+ def _validate_file_for_indexing(self, file_path: Path) -> None:
282
+ """Validate that a file can be indexed.
283
+
284
+ Args:
285
+ file_path: Path to validate
286
+
287
+ Raises:
288
+ ValueError: If file cannot be indexed
289
+ OSError: If file cannot be accessed
290
+ """
291
+ if not file_path.exists():
292
+ raise OSError(f"File does not exist: {file_path}")
293
+
294
+ if not file_path.is_file():
295
+ raise ValueError(f"Path is not a file: {file_path}")
296
+
297
+ # Check file size
298
+ file_size_mb = file_path.stat().st_size / (1024 * 1024)
299
+ if file_size_mb > self.config.max_file_size_mb:
300
+ raise ValueError(
301
+ f"File too large: {file_size_mb:.1f}MB > {self.config.max_file_size_mb}MB"
302
+ )
303
+
304
+ # Check file extension
305
+ if (
306
+ self.config.included_extensions
307
+ and file_path.suffix not in self.config.included_extensions
308
+ ):
309
+ raise ValueError(f"File extension not included: {file_path.suffix}")
310
+
311
+ # Check exclusion patterns
312
+ file_str = str(file_path)
313
+ for pattern in self.config.excluded_patterns:
314
+ if self._matches_pattern(file_str, pattern):
315
+ raise ValueError(f"File matches exclusion pattern: {pattern}")
316
+
317
+ def _matches_pattern(self, file_path: str, pattern: str) -> bool:
318
+ """Check if file path matches exclusion pattern."""
319
+ import fnmatch
320
+
321
+ return fnmatch.fnmatch(file_path, pattern)
322
+
323
+ def _needs_reindexing(self, file_path: Path, current_hash: str) -> bool:
324
+ """Check if file needs to be reindexed.
325
+
326
+ Args:
327
+ file_path: Path to check
328
+ current_hash: Current file hash
329
+
330
+ Returns:
331
+ True if file needs reindexing
332
+ """
333
+ with self._get_connection() as conn:
334
+ cursor = conn.execute(
335
+ "SELECT file_hash FROM file_tracking WHERE file_path = ?",
336
+ (str(file_path),),
337
+ )
338
+ row = cursor.fetchone()
339
+
340
+ if row is None:
341
+ return True # File not indexed yet
342
+
343
+ return row["file_hash"] != current_hash
344
+
345
+ def _get_existing_embeddings(self, file_path: Path) -> list[EmbeddingVector]:
346
+ """Get existing embeddings for a file.
347
+
348
+ Args:
349
+ file_path: Path to get embeddings for
350
+
351
+ Returns:
352
+ List of existing embeddings
353
+ """
354
+ embeddings = []
355
+
356
+ with self._get_connection() as conn:
357
+ cursor = conn.execute(
358
+ """SELECT chunk_id, file_path, content, embedding, created_at,
359
+ file_hash, start_line, end_line, file_type
360
+ FROM embeddings WHERE file_path = ?""",
361
+ (str(file_path),),
362
+ )
363
+
364
+ for row in cursor.fetchall():
365
+ # Deserialize embedding
366
+ embedding_data = json.loads(row["embedding"])
367
+
368
+ embedding = EmbeddingVector(
369
+ file_path=Path(row["file_path"]),
370
+ chunk_id=row["chunk_id"],
371
+ content=row["content"],
372
+ embedding=embedding_data,
373
+ created_at=datetime.fromisoformat(row["created_at"]),
374
+ file_hash=row["file_hash"],
375
+ start_line=row["start_line"],
376
+ end_line=row["end_line"],
377
+ file_type=row["file_type"],
378
+ )
379
+ embeddings.append(embedding)
380
+
381
+ return embeddings
382
+
383
+ def _store_embeddings(self, embeddings: list[EmbeddingVector]) -> None:
384
+ """Store embeddings in database.
385
+
386
+ Args:
387
+ embeddings: List of embeddings to store
388
+ """
389
+ if not embeddings:
390
+ return
391
+
392
+ with self._get_connection() as conn:
393
+ # Remove existing embeddings for these files
394
+ file_paths = {str(emb.file_path) for emb in embeddings}
395
+ for file_path in file_paths:
396
+ conn.execute("DELETE FROM embeddings WHERE file_path = ?", (file_path,))
397
+
398
+ # Insert new embeddings
399
+ for embedding in embeddings:
400
+ conn.execute(
401
+ """
402
+ INSERT INTO embeddings
403
+ (chunk_id, file_path, content, embedding, created_at,
404
+ file_hash, start_line, end_line, file_type)
405
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
406
+ """,
407
+ (
408
+ embedding.chunk_id,
409
+ str(embedding.file_path),
410
+ embedding.content,
411
+ json.dumps(embedding.embedding),
412
+ embedding.created_at.isoformat(),
413
+ embedding.file_hash,
414
+ embedding.start_line,
415
+ embedding.end_line,
416
+ embedding.file_type,
417
+ ),
418
+ )
419
+
420
+ conn.commit()
421
+
422
+ def _update_file_tracking(
423
+ self, file_path: Path, file_hash: str, chunk_count: int
424
+ ) -> None:
425
+ """Update file tracking information.
426
+
427
+ Args:
428
+ file_path: Path of indexed file
429
+ file_hash: Hash of file content
430
+ chunk_count: Number of chunks created
431
+ """
432
+ with self._get_connection() as conn:
433
+ conn.execute(
434
+ """
435
+ INSERT OR REPLACE INTO file_tracking
436
+ (file_path, file_hash, last_indexed, chunk_count)
437
+ VALUES (?, ?, ?, ?)
438
+ """,
439
+ (str(file_path), file_hash, datetime.now().isoformat(), chunk_count),
440
+ )
441
+ conn.commit()
442
+
443
+ def search(self, query: SearchQuery) -> list[SearchResult]:
444
+ """Perform semantic search and return results.
445
+
446
+ Args:
447
+ query: Search query with parameters
448
+
449
+ Returns:
450
+ List of search results sorted by similarity score
451
+ """
452
+ # Generate embedding for query
453
+ query_embedding = self.embedding_service.generate_embedding(query.query)
454
+
455
+ # Get all embeddings from database
456
+ embeddings_data: list[dict[str, t.Any]] = self._get_all_embeddings(
457
+ query.file_types
458
+ )
459
+
460
+ if not embeddings_data:
461
+ return []
462
+
463
+ # Calculate similarities
464
+ similarities = self.embedding_service.calculate_similarities_batch(
465
+ query_embedding, [data["embedding"] for data in embeddings_data]
466
+ )
467
+
468
+ # Create search results
469
+ results = []
470
+ for i, (data, similarity) in enumerate(zip(embeddings_data, similarities)):
471
+ if similarity >= query.min_similarity:
472
+ # Get context lines if requested
473
+ context_lines = []
474
+ if query.include_context:
475
+ context_lines = self._get_context_lines(
476
+ Path(data["file_path"]),
477
+ data["start_line"],
478
+ data["end_line"],
479
+ query.context_lines,
480
+ )
481
+
482
+ result = SearchResult(
483
+ file_path=Path(data["file_path"]),
484
+ chunk_id=data["chunk_id"],
485
+ content=data["content"],
486
+ similarity_score=similarity,
487
+ start_line=data["start_line"],
488
+ end_line=data["end_line"],
489
+ file_type=data["file_type"],
490
+ context_lines=context_lines,
491
+ )
492
+ results.append(result)
493
+
494
+ # Sort by similarity score (descending) and limit results
495
+ results.sort(key=lambda x: x.similarity_score, reverse=True)
496
+ return results[: query.max_results]
497
+
498
+ def _get_all_embeddings(
499
+ self, file_types: list[str] | None = None
500
+ ) -> list[dict[str, t.Any]]:
501
+ """Get all embeddings from database with optional file type filtering.
502
+
503
+ Args:
504
+ file_types: Optional list of file types to filter by
505
+
506
+ Returns:
507
+ List of embedding data dictionaries
508
+ """
509
+ embeddings_data = []
510
+
511
+ with self._get_connection() as conn:
512
+ if file_types:
513
+ # Build parameterized query safely with proper placeholders
514
+ placeholders = ",".join("?" * len(file_types))
515
+ # Use static query template with placeholders - safe from injection
516
+ query_template = (
517
+ "SELECT chunk_id, file_path, content, embedding, start_line, end_line, file_type "
518
+ "FROM embeddings WHERE file_type IN ({})"
519
+ )
520
+ query_sql = query_template.format(placeholders) # nosec B608
521
+ cursor = conn.execute(query_sql, file_types)
522
+ else:
523
+ cursor = conn.execute("""
524
+ SELECT chunk_id, file_path, content, embedding, start_line, end_line, file_type
525
+ FROM embeddings
526
+ """)
527
+
528
+ for row in cursor.fetchall():
529
+ data = {
530
+ "chunk_id": row["chunk_id"],
531
+ "file_path": row["file_path"],
532
+ "content": row["content"],
533
+ "embedding": json.loads(row["embedding"]),
534
+ "start_line": row["start_line"],
535
+ "end_line": row["end_line"],
536
+ "file_type": row["file_type"],
537
+ }
538
+ embeddings_data.append(data)
539
+
540
+ return embeddings_data
541
+
542
+ def _get_context_lines(
543
+ self, file_path: Path, start_line: int, end_line: int, context_count: int
544
+ ) -> list[str]:
545
+ """Get context lines around a text chunk.
546
+
547
+ Args:
548
+ file_path: Path to source file
549
+ start_line: Starting line of chunk
550
+ end_line: Ending line of chunk
551
+ context_count: Number of context lines to include
552
+
553
+ Returns:
554
+ List of context lines
555
+ """
556
+ try:
557
+ if not file_path.exists():
558
+ return []
559
+
560
+ lines = file_path.read_text(encoding="utf-8").splitlines()
561
+
562
+ # Calculate context range
563
+ context_start = max(0, start_line - context_count - 1)
564
+ context_end = min(len(lines), end_line + context_count)
565
+
566
+ return lines[context_start:context_end]
567
+
568
+ except Exception as e:
569
+ logger.warning(f"Failed to get context lines for {file_path}: {e}")
570
+ return []
571
+
572
+ def get_stats(self) -> IndexStats:
573
+ """Get statistics about the vector store index.
574
+
575
+ Returns:
576
+ Index statistics
577
+ """
578
+ with self._get_connection() as conn:
579
+ # Get total counts
580
+ cursor = conn.execute("SELECT COUNT(*) as total_chunks FROM embeddings")
581
+ total_chunks = cursor.fetchone()["total_chunks"]
582
+
583
+ cursor = conn.execute(
584
+ "SELECT COUNT(DISTINCT file_path) as total_files FROM embeddings"
585
+ )
586
+ total_files = cursor.fetchone()["total_files"]
587
+
588
+ # Get file type distribution
589
+ cursor = conn.execute("""
590
+ SELECT file_type, COUNT(*) as count
591
+ FROM embeddings
592
+ GROUP BY file_type
593
+ """)
594
+ file_types = {row["file_type"]: row["count"] for row in cursor.fetchall()}
595
+
596
+ # Get last update time
597
+ cursor = conn.execute(
598
+ "SELECT MAX(created_at) as last_updated FROM embeddings"
599
+ )
600
+ last_updated_str = cursor.fetchone()["last_updated"]
601
+ last_updated = (
602
+ datetime.fromisoformat(last_updated_str)
603
+ if last_updated_str
604
+ else datetime.now()
605
+ )
606
+
607
+ # Calculate average chunk size
608
+ cursor = conn.execute(
609
+ "SELECT AVG(LENGTH(content)) as avg_size FROM embeddings"
610
+ )
611
+ avg_chunk_size = cursor.fetchone()["avg_size"] or 0.0
612
+
613
+ # Estimate index size (rough approximation)
614
+ index_size_mb = (total_chunks * 384 * 4) / (
615
+ 1024 * 1024
616
+ ) # Assuming 384-dim embeddings
617
+
618
+ return IndexStats(
619
+ total_files=total_files,
620
+ total_chunks=total_chunks,
621
+ index_size_mb=index_size_mb,
622
+ last_updated=last_updated,
623
+ file_types=file_types,
624
+ embedding_model=self.config.embedding_model,
625
+ avg_chunk_size=avg_chunk_size,
626
+ )
627
+
628
+ def remove_file(self, file_path: Path) -> bool:
629
+ """Remove a file's embeddings from the index.
630
+
631
+ Args:
632
+ file_path: Path of file to remove
633
+
634
+ Returns:
635
+ True if file was removed, False if not found
636
+ """
637
+ with self._get_connection() as conn:
638
+ # Check if file exists in index
639
+ cursor = conn.execute(
640
+ "SELECT COUNT(*) as count FROM embeddings WHERE file_path = ?",
641
+ (str(file_path),),
642
+ )
643
+ count = cursor.fetchone()["count"]
644
+
645
+ if count == 0:
646
+ return False
647
+
648
+ # Remove embeddings
649
+ conn.execute(
650
+ "DELETE FROM embeddings WHERE file_path = ?", (str(file_path),)
651
+ )
652
+
653
+ # Remove from file tracking
654
+ conn.execute(
655
+ "DELETE FROM file_tracking WHERE file_path = ?", (str(file_path),)
656
+ )
657
+
658
+ conn.commit()
659
+ logger.info(f"Removed {count} embeddings for file: {file_path}")
660
+ return True
661
+
662
+ def clear_index(self) -> None:
663
+ """Clear all embeddings from the index."""
664
+ with self._get_connection() as conn:
665
+ conn.execute("DELETE FROM embeddings")
666
+ conn.execute("DELETE FROM file_tracking")
667
+ conn.commit()
668
+ logger.info("Cleared all embeddings from index")
669
+
670
+ def close(self) -> None:
671
+ """Clean up resources."""
672
+ if self._temp_db:
673
+ self._temp_db.close()
674
+ if self.db_path.exists():
675
+ self.db_path.unlink()
676
+ logger.debug("Cleaned up temporary database")
677
+
678
+ def __enter__(self) -> "VectorStore":
679
+ """Context manager entry."""
680
+ return self
681
+
682
+ def __exit__(
683
+ self,
684
+ exc_type: type[BaseException] | None,
685
+ exc_val: BaseException | None,
686
+ exc_tb: t.Any,
687
+ ) -> None:
688
+ """Context manager exit."""
689
+ self.close()