empathy-framework 4.6.6__py3-none-any.whl → 4.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (273) hide show
  1. empathy_framework-4.7.1.dist-info/METADATA +690 -0
  2. empathy_framework-4.7.1.dist-info/RECORD +379 -0
  3. {empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/top_level.txt +1 -2
  4. empathy_healthcare_plugin/monitors/monitoring/__init__.py +9 -9
  5. empathy_llm_toolkit/agent_factory/__init__.py +6 -6
  6. empathy_llm_toolkit/agent_factory/adapters/wizard_adapter.py +7 -10
  7. empathy_llm_toolkit/agents_md/__init__.py +22 -0
  8. empathy_llm_toolkit/agents_md/loader.py +218 -0
  9. empathy_llm_toolkit/agents_md/parser.py +271 -0
  10. empathy_llm_toolkit/agents_md/registry.py +307 -0
  11. empathy_llm_toolkit/commands/__init__.py +51 -0
  12. empathy_llm_toolkit/commands/context.py +375 -0
  13. empathy_llm_toolkit/commands/loader.py +301 -0
  14. empathy_llm_toolkit/commands/models.py +231 -0
  15. empathy_llm_toolkit/commands/parser.py +371 -0
  16. empathy_llm_toolkit/commands/registry.py +429 -0
  17. empathy_llm_toolkit/config/__init__.py +8 -8
  18. empathy_llm_toolkit/config/unified.py +3 -7
  19. empathy_llm_toolkit/context/__init__.py +22 -0
  20. empathy_llm_toolkit/context/compaction.py +455 -0
  21. empathy_llm_toolkit/context/manager.py +434 -0
  22. empathy_llm_toolkit/hooks/__init__.py +24 -0
  23. empathy_llm_toolkit/hooks/config.py +306 -0
  24. empathy_llm_toolkit/hooks/executor.py +289 -0
  25. empathy_llm_toolkit/hooks/registry.py +302 -0
  26. empathy_llm_toolkit/hooks/scripts/__init__.py +39 -0
  27. empathy_llm_toolkit/hooks/scripts/evaluate_session.py +201 -0
  28. empathy_llm_toolkit/hooks/scripts/first_time_init.py +285 -0
  29. empathy_llm_toolkit/hooks/scripts/pre_compact.py +207 -0
  30. empathy_llm_toolkit/hooks/scripts/session_end.py +183 -0
  31. empathy_llm_toolkit/hooks/scripts/session_start.py +163 -0
  32. empathy_llm_toolkit/hooks/scripts/suggest_compact.py +225 -0
  33. empathy_llm_toolkit/learning/__init__.py +30 -0
  34. empathy_llm_toolkit/learning/evaluator.py +438 -0
  35. empathy_llm_toolkit/learning/extractor.py +514 -0
  36. empathy_llm_toolkit/learning/storage.py +560 -0
  37. empathy_llm_toolkit/providers.py +4 -11
  38. empathy_llm_toolkit/security/__init__.py +17 -17
  39. empathy_llm_toolkit/utils/tokens.py +2 -5
  40. empathy_os/__init__.py +202 -70
  41. empathy_os/cache_monitor.py +5 -3
  42. empathy_os/cli/__init__.py +11 -55
  43. empathy_os/cli/__main__.py +29 -15
  44. empathy_os/cli/commands/inspection.py +21 -12
  45. empathy_os/cli/commands/memory.py +4 -12
  46. empathy_os/cli/commands/profiling.py +198 -0
  47. empathy_os/cli/commands/utilities.py +27 -7
  48. empathy_os/cli.py +28 -57
  49. empathy_os/cli_unified.py +525 -1164
  50. empathy_os/cost_tracker.py +9 -3
  51. empathy_os/dashboard/server.py +200 -2
  52. empathy_os/hot_reload/__init__.py +7 -7
  53. empathy_os/hot_reload/config.py +6 -7
  54. empathy_os/hot_reload/integration.py +35 -35
  55. empathy_os/hot_reload/reloader.py +57 -57
  56. empathy_os/hot_reload/watcher.py +28 -28
  57. empathy_os/hot_reload/websocket.py +2 -2
  58. empathy_os/memory/__init__.py +11 -4
  59. empathy_os/memory/claude_memory.py +1 -1
  60. empathy_os/memory/cross_session.py +8 -12
  61. empathy_os/memory/edges.py +6 -6
  62. empathy_os/memory/file_session.py +770 -0
  63. empathy_os/memory/graph.py +30 -30
  64. empathy_os/memory/nodes.py +6 -6
  65. empathy_os/memory/short_term.py +15 -9
  66. empathy_os/memory/unified.py +606 -140
  67. empathy_os/meta_workflows/agent_creator.py +3 -9
  68. empathy_os/meta_workflows/cli_meta_workflows.py +113 -53
  69. empathy_os/meta_workflows/form_engine.py +6 -18
  70. empathy_os/meta_workflows/intent_detector.py +64 -24
  71. empathy_os/meta_workflows/models.py +3 -1
  72. empathy_os/meta_workflows/pattern_learner.py +13 -31
  73. empathy_os/meta_workflows/plan_generator.py +55 -47
  74. empathy_os/meta_workflows/session_context.py +2 -3
  75. empathy_os/meta_workflows/workflow.py +20 -51
  76. empathy_os/models/cli.py +2 -2
  77. empathy_os/models/tasks.py +1 -2
  78. empathy_os/models/telemetry.py +4 -1
  79. empathy_os/models/token_estimator.py +3 -1
  80. empathy_os/monitoring/alerts.py +938 -9
  81. empathy_os/monitoring/alerts_cli.py +346 -183
  82. empathy_os/orchestration/execution_strategies.py +12 -29
  83. empathy_os/orchestration/pattern_learner.py +20 -26
  84. empathy_os/orchestration/real_tools.py +6 -15
  85. empathy_os/platform_utils.py +2 -1
  86. empathy_os/plugins/__init__.py +2 -2
  87. empathy_os/plugins/base.py +64 -64
  88. empathy_os/plugins/registry.py +32 -32
  89. empathy_os/project_index/index.py +49 -15
  90. empathy_os/project_index/models.py +1 -2
  91. empathy_os/project_index/reports.py +1 -1
  92. empathy_os/project_index/scanner.py +1 -0
  93. empathy_os/redis_memory.py +10 -7
  94. empathy_os/resilience/__init__.py +1 -1
  95. empathy_os/resilience/health.py +10 -10
  96. empathy_os/routing/__init__.py +7 -7
  97. empathy_os/routing/chain_executor.py +37 -37
  98. empathy_os/routing/classifier.py +36 -36
  99. empathy_os/routing/smart_router.py +40 -40
  100. empathy_os/routing/{wizard_registry.py → workflow_registry.py} +47 -47
  101. empathy_os/scaffolding/__init__.py +8 -8
  102. empathy_os/scaffolding/__main__.py +1 -1
  103. empathy_os/scaffolding/cli.py +28 -28
  104. empathy_os/socratic/__init__.py +3 -19
  105. empathy_os/socratic/ab_testing.py +25 -36
  106. empathy_os/socratic/blueprint.py +38 -38
  107. empathy_os/socratic/cli.py +34 -20
  108. empathy_os/socratic/collaboration.py +30 -28
  109. empathy_os/socratic/domain_templates.py +9 -1
  110. empathy_os/socratic/embeddings.py +17 -13
  111. empathy_os/socratic/engine.py +135 -70
  112. empathy_os/socratic/explainer.py +70 -60
  113. empathy_os/socratic/feedback.py +24 -19
  114. empathy_os/socratic/forms.py +15 -10
  115. empathy_os/socratic/generator.py +51 -35
  116. empathy_os/socratic/llm_analyzer.py +25 -23
  117. empathy_os/socratic/mcp_server.py +99 -159
  118. empathy_os/socratic/session.py +19 -13
  119. empathy_os/socratic/storage.py +98 -67
  120. empathy_os/socratic/success.py +38 -27
  121. empathy_os/socratic/visual_editor.py +51 -39
  122. empathy_os/socratic/web_ui.py +99 -66
  123. empathy_os/telemetry/cli.py +3 -1
  124. empathy_os/telemetry/usage_tracker.py +1 -3
  125. empathy_os/test_generator/__init__.py +3 -3
  126. empathy_os/test_generator/cli.py +28 -28
  127. empathy_os/test_generator/generator.py +64 -66
  128. empathy_os/test_generator/risk_analyzer.py +11 -11
  129. empathy_os/vscode_bridge 2.py +173 -0
  130. empathy_os/vscode_bridge.py +173 -0
  131. empathy_os/workflows/__init__.py +212 -120
  132. empathy_os/workflows/batch_processing.py +8 -24
  133. empathy_os/workflows/bug_predict.py +1 -1
  134. empathy_os/workflows/code_review.py +20 -5
  135. empathy_os/workflows/code_review_pipeline.py +13 -8
  136. empathy_os/workflows/keyboard_shortcuts/workflow.py +6 -2
  137. empathy_os/workflows/manage_documentation.py +1 -0
  138. empathy_os/workflows/orchestrated_health_check.py +6 -11
  139. empathy_os/workflows/orchestrated_release_prep.py +3 -3
  140. empathy_os/workflows/pr_review.py +18 -10
  141. empathy_os/workflows/progressive/README 2.md +454 -0
  142. empathy_os/workflows/progressive/__init__ 2.py +92 -0
  143. empathy_os/workflows/progressive/__init__.py +2 -12
  144. empathy_os/workflows/progressive/cli 2.py +242 -0
  145. empathy_os/workflows/progressive/cli.py +14 -37
  146. empathy_os/workflows/progressive/core 2.py +488 -0
  147. empathy_os/workflows/progressive/core.py +12 -12
  148. empathy_os/workflows/progressive/orchestrator 2.py +701 -0
  149. empathy_os/workflows/progressive/orchestrator.py +166 -144
  150. empathy_os/workflows/progressive/reports 2.py +528 -0
  151. empathy_os/workflows/progressive/reports.py +22 -31
  152. empathy_os/workflows/progressive/telemetry 2.py +280 -0
  153. empathy_os/workflows/progressive/telemetry.py +8 -14
  154. empathy_os/workflows/progressive/test_gen 2.py +514 -0
  155. empathy_os/workflows/progressive/test_gen.py +29 -48
  156. empathy_os/workflows/progressive/workflow 2.py +628 -0
  157. empathy_os/workflows/progressive/workflow.py +31 -70
  158. empathy_os/workflows/release_prep.py +21 -6
  159. empathy_os/workflows/release_prep_crew.py +1 -0
  160. empathy_os/workflows/secure_release.py +13 -6
  161. empathy_os/workflows/security_audit.py +8 -3
  162. empathy_os/workflows/test_coverage_boost_crew.py +3 -2
  163. empathy_os/workflows/test_maintenance_crew.py +1 -0
  164. empathy_os/workflows/test_runner.py +16 -12
  165. empathy_software_plugin/SOFTWARE_PLUGIN_README.md +25 -703
  166. empathy_software_plugin/cli.py +0 -122
  167. patterns/README.md +119 -0
  168. patterns/__init__.py +95 -0
  169. patterns/behavior.py +298 -0
  170. patterns/code_review_memory.json +441 -0
  171. patterns/core.py +97 -0
  172. patterns/debugging.json +3763 -0
  173. patterns/empathy.py +268 -0
  174. patterns/health_check_memory.json +505 -0
  175. patterns/input.py +161 -0
  176. patterns/memory_graph.json +8 -0
  177. patterns/refactoring_memory.json +1113 -0
  178. patterns/registry.py +663 -0
  179. patterns/security_memory.json +8 -0
  180. patterns/structural.py +415 -0
  181. patterns/validation.py +194 -0
  182. coach_wizards/__init__.py +0 -45
  183. coach_wizards/accessibility_wizard.py +0 -91
  184. coach_wizards/api_wizard.py +0 -91
  185. coach_wizards/base_wizard.py +0 -209
  186. coach_wizards/cicd_wizard.py +0 -91
  187. coach_wizards/code_reviewer_README.md +0 -60
  188. coach_wizards/code_reviewer_wizard.py +0 -180
  189. coach_wizards/compliance_wizard.py +0 -91
  190. coach_wizards/database_wizard.py +0 -91
  191. coach_wizards/debugging_wizard.py +0 -91
  192. coach_wizards/documentation_wizard.py +0 -91
  193. coach_wizards/generate_wizards.py +0 -347
  194. coach_wizards/localization_wizard.py +0 -173
  195. coach_wizards/migration_wizard.py +0 -91
  196. coach_wizards/monitoring_wizard.py +0 -91
  197. coach_wizards/observability_wizard.py +0 -91
  198. coach_wizards/performance_wizard.py +0 -91
  199. coach_wizards/prompt_engineering_wizard.py +0 -661
  200. coach_wizards/refactoring_wizard.py +0 -91
  201. coach_wizards/scaling_wizard.py +0 -90
  202. coach_wizards/security_wizard.py +0 -92
  203. coach_wizards/testing_wizard.py +0 -91
  204. empathy_framework-4.6.6.dist-info/METADATA +0 -1597
  205. empathy_framework-4.6.6.dist-info/RECORD +0 -410
  206. empathy_llm_toolkit/wizards/__init__.py +0 -43
  207. empathy_llm_toolkit/wizards/base_wizard.py +0 -364
  208. empathy_llm_toolkit/wizards/customer_support_wizard.py +0 -190
  209. empathy_llm_toolkit/wizards/healthcare_wizard.py +0 -378
  210. empathy_llm_toolkit/wizards/patient_assessment_README.md +0 -64
  211. empathy_llm_toolkit/wizards/patient_assessment_wizard.py +0 -193
  212. empathy_llm_toolkit/wizards/technology_wizard.py +0 -209
  213. empathy_os/wizard_factory_cli.py +0 -170
  214. empathy_software_plugin/wizards/__init__.py +0 -42
  215. empathy_software_plugin/wizards/advanced_debugging_wizard.py +0 -395
  216. empathy_software_plugin/wizards/agent_orchestration_wizard.py +0 -511
  217. empathy_software_plugin/wizards/ai_collaboration_wizard.py +0 -503
  218. empathy_software_plugin/wizards/ai_context_wizard.py +0 -441
  219. empathy_software_plugin/wizards/ai_documentation_wizard.py +0 -503
  220. empathy_software_plugin/wizards/base_wizard.py +0 -288
  221. empathy_software_plugin/wizards/book_chapter_wizard.py +0 -519
  222. empathy_software_plugin/wizards/code_review_wizard.py +0 -604
  223. empathy_software_plugin/wizards/debugging/__init__.py +0 -50
  224. empathy_software_plugin/wizards/debugging/bug_risk_analyzer.py +0 -414
  225. empathy_software_plugin/wizards/debugging/config_loaders.py +0 -446
  226. empathy_software_plugin/wizards/debugging/fix_applier.py +0 -469
  227. empathy_software_plugin/wizards/debugging/language_patterns.py +0 -385
  228. empathy_software_plugin/wizards/debugging/linter_parsers.py +0 -470
  229. empathy_software_plugin/wizards/debugging/verification.py +0 -369
  230. empathy_software_plugin/wizards/enhanced_testing_wizard.py +0 -537
  231. empathy_software_plugin/wizards/memory_enhanced_debugging_wizard.py +0 -816
  232. empathy_software_plugin/wizards/multi_model_wizard.py +0 -501
  233. empathy_software_plugin/wizards/pattern_extraction_wizard.py +0 -422
  234. empathy_software_plugin/wizards/pattern_retriever_wizard.py +0 -400
  235. empathy_software_plugin/wizards/performance/__init__.py +0 -9
  236. empathy_software_plugin/wizards/performance/bottleneck_detector.py +0 -221
  237. empathy_software_plugin/wizards/performance/profiler_parsers.py +0 -278
  238. empathy_software_plugin/wizards/performance/trajectory_analyzer.py +0 -429
  239. empathy_software_plugin/wizards/performance_profiling_wizard.py +0 -305
  240. empathy_software_plugin/wizards/prompt_engineering_wizard.py +0 -425
  241. empathy_software_plugin/wizards/rag_pattern_wizard.py +0 -461
  242. empathy_software_plugin/wizards/security/__init__.py +0 -32
  243. empathy_software_plugin/wizards/security/exploit_analyzer.py +0 -290
  244. empathy_software_plugin/wizards/security/owasp_patterns.py +0 -241
  245. empathy_software_plugin/wizards/security/vulnerability_scanner.py +0 -604
  246. empathy_software_plugin/wizards/security_analysis_wizard.py +0 -322
  247. empathy_software_plugin/wizards/security_learning_wizard.py +0 -740
  248. empathy_software_plugin/wizards/tech_debt_wizard.py +0 -726
  249. empathy_software_plugin/wizards/testing/__init__.py +0 -27
  250. empathy_software_plugin/wizards/testing/coverage_analyzer.py +0 -459
  251. empathy_software_plugin/wizards/testing/quality_analyzer.py +0 -525
  252. empathy_software_plugin/wizards/testing/test_suggester.py +0 -533
  253. empathy_software_plugin/wizards/testing_wizard.py +0 -274
  254. wizards/__init__.py +0 -82
  255. wizards/admission_assessment_wizard.py +0 -644
  256. wizards/care_plan.py +0 -321
  257. wizards/clinical_assessment.py +0 -769
  258. wizards/discharge_planning.py +0 -77
  259. wizards/discharge_summary_wizard.py +0 -468
  260. wizards/dosage_calculation.py +0 -497
  261. wizards/incident_report_wizard.py +0 -454
  262. wizards/medication_reconciliation.py +0 -85
  263. wizards/nursing_assessment.py +0 -171
  264. wizards/patient_education.py +0 -654
  265. wizards/quality_improvement.py +0 -705
  266. wizards/sbar_report.py +0 -324
  267. wizards/sbar_wizard.py +0 -608
  268. wizards/shift_handoff_wizard.py +0 -535
  269. wizards/soap_note_wizard.py +0 -679
  270. wizards/treatment_plan.py +0 -15
  271. {empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/WHEEL +0 -0
  272. {empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/entry_points.txt +0 -0
  273. {empathy_framework-4.6.6.dist-info → empathy_framework-4.7.1.dist-info}/licenses/LICENSE +0 -0
@@ -1,461 +0,0 @@
1
- """RAG Pattern Wizard - Level 4 Anticipatory Empathy
2
-
3
- Alerts developers when RAG (Retrieval-Augmented Generation) implementation
4
- will encounter scalability or quality issues.
5
-
6
- In our experience, RAG seems simple at first (vector DB + similarity search).
7
- But we learned: embedding quality, chunk strategy, and retrieval relevance
8
- degrade as data grows. This wizard alerts before those issues surface.
9
-
10
- Copyright 2025 Smart AI Memory, LLC
11
- Licensed under Fair Source 0.9
12
- """
13
-
14
- import os
15
- import sys
16
- from typing import Any
17
-
18
- sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "src"))
19
-
20
- from empathy_os.plugins import BaseWizard
21
-
22
-
23
- class RAGPatternWizard(BaseWizard):
24
- """Level 4 Anticipatory: Predicts RAG implementation issues.
25
-
26
- What We Learned About RAG:
27
- - Naive chunking (split by char count) fails when data grows
28
- - Embedding quality matters more than vector DB choice
29
- - Retrieval relevance degrades as corpus grows without tuning
30
- - Hybrid search (vector + keyword) becomes essential at scale
31
- """
32
-
33
- def __init__(self):
34
- super().__init__(
35
- name="RAG Pattern Wizard",
36
- domain="software",
37
- empathy_level=4,
38
- category="ai_development",
39
- )
40
-
41
- def get_required_context(self) -> list[str]:
42
- """Required context for analysis"""
43
- return [
44
- "rag_implementation", # RAG implementation files
45
- "embedding_strategy", # How embeddings are created
46
- "chunk_strategy", # How documents are chunked
47
- "vector_db_config", # Vector DB configuration
48
- "corpus_size", # Current corpus size
49
- ]
50
-
51
- async def analyze(self, context: dict[str, Any]) -> dict[str, Any]:
52
- """Analyze RAG implementation and predict quality/scale issues.
53
-
54
- In our experience: RAG breaks down in predictable ways as it scales.
55
- Early detection prevents painful rewrites.
56
- """
57
- self.validate_context(context)
58
-
59
- rag_impl = context.get("rag_implementation", [])
60
- embedding_strat = context.get("embedding_strategy", {})
61
- chunk_strat = context.get("chunk_strategy", {})
62
- corpus_size = context.get("corpus_size", 0)
63
-
64
- # Current issues
65
- issues = await self._analyze_rag_implementation(
66
- rag_impl,
67
- embedding_strat,
68
- chunk_strat,
69
- corpus_size,
70
- )
71
-
72
- # Level 4: Predict future RAG issues
73
- predictions = await self._predict_rag_degradation(
74
- rag_impl,
75
- embedding_strat,
76
- chunk_strat,
77
- corpus_size,
78
- context,
79
- )
80
-
81
- recommendations = self._generate_recommendations(issues, predictions)
82
- patterns = self._extract_patterns(issues, predictions)
83
-
84
- return {
85
- "issues": issues,
86
- "predictions": predictions,
87
- "recommendations": recommendations,
88
- "patterns": patterns,
89
- "confidence": 0.80,
90
- "metadata": {
91
- "wizard": self.name,
92
- "empathy_level": self.empathy_level,
93
- "corpus_size": corpus_size,
94
- "chunking_strategy": chunk_strat.get("type", "unknown"),
95
- },
96
- }
97
-
98
- async def _analyze_rag_implementation(
99
- self,
100
- rag_impl: list[str],
101
- embedding_strat: dict,
102
- chunk_strat: dict,
103
- corpus_size: int,
104
- ) -> list[dict[str, Any]]:
105
- """Analyze current RAG implementation"""
106
- issues = []
107
-
108
- # Issue: Naive character-based chunking
109
- if chunk_strat.get("type") == "character" or chunk_strat.get("type") == "fixed":
110
- issues.append(
111
- {
112
- "severity": "warning",
113
- "type": "naive_chunking",
114
- "message": (
115
- "Using character-based chunking. In our experience, this breaks "
116
- "semantic coherence and reduces retrieval quality."
117
- ),
118
- "suggestion": (
119
- "Use semantic chunking: split by paragraphs, sentences, or "
120
- "semantic boundaries (e.g., LangChain SemanticChunker)"
121
- ),
122
- },
123
- )
124
-
125
- # Issue: No chunk overlap
126
- if not chunk_strat.get("overlap", False):
127
- issues.append(
128
- {
129
- "severity": "info",
130
- "type": "no_chunk_overlap",
131
- "message": (
132
- "No chunk overlap detected. Without overlap, relevant context "
133
- "split across chunk boundaries is lost."
134
- ),
135
- "suggestion": "Add 10-20% overlap between chunks",
136
- },
137
- )
138
-
139
- # Issue: No metadata enrichment
140
- if not embedding_strat.get("metadata_enrichment", False):
141
- issues.append(
142
- {
143
- "severity": "info",
144
- "type": "missing_metadata",
145
- "message": (
146
- "Chunks not enriched with metadata (source, date, category). "
147
- "In our experience, metadata filtering dramatically improves retrieval."
148
- ),
149
- "suggestion": "Add metadata to chunks for filtering and relevance",
150
- },
151
- )
152
-
153
- # Issue: Single embedding model
154
- if not embedding_strat.get("multi_model", False) and corpus_size > 1000:
155
- issues.append(
156
- {
157
- "severity": "info",
158
- "type": "single_embedding_model",
159
- "message": (
160
- "Using single embedding model for diverse content. "
161
- "Different content types (code, docs, data) benefit from "
162
- "specialized embeddings."
163
- ),
164
- "suggestion": "Consider domain-specific embeddings for different content types",
165
- },
166
- )
167
-
168
- # Issue: No reranking
169
- if not self._has_reranking(rag_impl):
170
- issues.append(
171
- {
172
- "severity": "warning",
173
- "type": "no_reranking",
174
- "message": (
175
- "No reranking layer detected. In our experience, initial vector "
176
- "similarity often returns suboptimal results. Reranking improves quality 30-50%."
177
- ),
178
- "suggestion": "Add reranker (e.g., Cohere rerank, cross-encoder model)",
179
- },
180
- )
181
-
182
- return issues
183
-
184
- async def _predict_rag_degradation(
185
- self,
186
- rag_impl: list[str],
187
- embedding_strat: dict,
188
- chunk_strat: dict,
189
- corpus_size: int,
190
- full_context: dict[str, Any],
191
- ) -> list[dict[str, Any]]:
192
- """Level 4: Predict when RAG quality will degrade.
193
-
194
- Based on our experience: RAG breaks at predictable thresholds.
195
- """
196
- predictions = []
197
-
198
- # Pattern 1: Corpus growth will degrade retrieval
199
- if corpus_size > 5000 and not self._has_hybrid_search(rag_impl):
200
- predictions.append(
201
- {
202
- "type": "retrieval_degradation",
203
- "alert": (
204
- f"Corpus size: {corpus_size:,} documents. In our experience, "
205
- "pure vector search degrades above 10,000 documents without hybrid search. "
206
- "Alert: Implement hybrid search (vector + keyword) before quality drops."
207
- ),
208
- "probability": "high",
209
- "impact": "high",
210
- "prevention_steps": [
211
- "Implement hybrid search (combine vector similarity + BM25 keyword)",
212
- "Add query expansion (synonyms, related terms)",
213
- "Implement result fusion (merge vector and keyword results)",
214
- "Add relevance feedback loop (learn from user selections)",
215
- ],
216
- "reasoning": (
217
- "Vector search alone: high recall, low precision at scale. "
218
- "Keyword search: high precision, low recall. "
219
- "Hybrid: best of both. We've seen 40-60% quality improvement."
220
- ),
221
- "personal_experience": (
222
- "At 8,000 documents, our vector-only retrieval started returning "
223
- "too many 'similar but not relevant' results. Added BM25 hybrid search, "
224
- "quality jumped immediately."
225
- ),
226
- },
227
- )
228
-
229
- # Pattern 2: Embedding staleness
230
- if corpus_size > 1000 and not embedding_strat.get("refresh_strategy"):
231
- predictions.append(
232
- {
233
- "type": "embedding_staleness",
234
- "alert": (
235
- "No embedding refresh strategy detected. As documents change, "
236
- "embeddings become stale. In our experience, this causes gradual "
237
- "quality degradation that's hard to notice."
238
- ),
239
- "probability": "medium-high",
240
- "impact": "medium",
241
- "prevention_steps": [
242
- "Implement incremental embedding updates (only changed docs)",
243
- "Add embedding versioning (track which model version)",
244
- "Create embedding freshness metrics",
245
- "Schedule periodic re-embedding for entire corpus",
246
- ],
247
- "reasoning": (
248
- "Documents change, but embeddings don't auto-update. "
249
- "Stale embeddings = stale retrieval. Incremental updates solve this."
250
- ),
251
- },
252
- )
253
-
254
- # Pattern 3: No query understanding layer
255
- if not self._has_query_understanding(rag_impl):
256
- predictions.append(
257
- {
258
- "type": "poor_query_handling",
259
- "alert": (
260
- "No query understanding layer. Users ask questions in many ways. "
261
- "In our experience, naive query → embedding → search fails on "
262
- "complex queries. Alert: Add query processing before retrieval quality plateaus."
263
- ),
264
- "probability": "medium",
265
- "impact": "high",
266
- "prevention_steps": [
267
- "Add query decomposition (break complex queries into sub-queries)",
268
- "Implement query rewriting (rephrase for better retrieval)",
269
- "Add intent classification (route to different retrieval strategies)",
270
- "Create query expansion (add relevant terms)",
271
- ],
272
- "reasoning": (
273
- "User query: 'How do I prevent SQL injection in React apps?'. "
274
- "Needs decomposition: 1) SQL injection prevention, 2) React context. "
275
- "Naive embedding misses nuance."
276
- ),
277
- "personal_experience": (
278
- "We added simple query rewriting (expand acronyms, add synonyms). "
279
- "Retrieval quality improved 25% with minimal effort."
280
- ),
281
- },
282
- )
283
-
284
- # Pattern 4: Missing evaluation framework
285
- if not self._has_evaluation(rag_impl):
286
- predictions.append(
287
- {
288
- "type": "no_rag_evaluation",
289
- "alert": (
290
- "No RAG evaluation framework detected. In our experience, "
291
- "you can't improve what you don't measure. Alert: Build evaluation "
292
- "before you waste time on optimizations that don't help."
293
- ),
294
- "probability": "high",
295
- "impact": "high",
296
- "prevention_steps": [
297
- "Create ground truth Q&A pairs (what SHOULD be retrieved)",
298
- "Implement retrieval metrics (MRR, NDCG, precision@k)",
299
- "Add end-to-end evaluation (does RAG answer correctly?)",
300
- "Build A/B testing framework (compare strategies)",
301
- "Create evaluation dashboard (track quality over time)",
302
- ],
303
- "reasoning": (
304
- "Is chunking A better than chunking B? Is reranking worth it? "
305
- "Without eval, you're guessing. We spent weeks optimizing the wrong things."
306
- ),
307
- "personal_experience": (
308
- "We thought our RAG was great. Built evaluation, discovered 40% "
309
- "of queries retrieved irrelevant docs. Fixed in 2 days with data."
310
- ),
311
- },
312
- )
313
-
314
- # Pattern 5: Context window waste
315
- if not self._has_context_optimization(rag_impl):
316
- predictions.append(
317
- {
318
- "type": "inefficient_context_usage",
319
- "alert": (
320
- "No context optimization detected. In our experience, dumping "
321
- "all retrieved chunks into context wastes tokens and reduces quality. "
322
- "Alert: Optimize context usage before costs and quality both degrade."
323
- ),
324
- "probability": "medium",
325
- "impact": "medium",
326
- "prevention_steps": [
327
- "Implement relevance-based pruning (only use top-k)",
328
- "Add chunk summarization (compress verbose chunks)",
329
- "Create context deduplication (remove redundant info)",
330
- "Implement adaptive retrieval (fewer chunks for simple queries)",
331
- "Add context budget management",
332
- ],
333
- "reasoning": (
334
- "Retrieving 10 chunks doesn't mean use all 10. "
335
- "Top 3 might be enough. Extra context confuses AI and costs money."
336
- ),
337
- },
338
- )
339
-
340
- return predictions
341
-
342
- def _generate_recommendations(self, issues: list[dict], predictions: list[dict]) -> list[str]:
343
- """Generate actionable recommendations"""
344
- recommendations = []
345
-
346
- # Quick wins
347
- if any(i["type"] == "no_reranking" for i in issues):
348
- recommendations.append(
349
- "[QUICK WIN] Add reranking layer. In our experience, "
350
- "this is highest ROI improvement (30-50% quality boost, minimal effort).",
351
- )
352
-
353
- # High-impact predictions
354
- for pred in predictions:
355
- if pred.get("impact") == "high":
356
- recommendations.append(f"\n[ALERT] {pred['alert']}")
357
- if "personal_experience" in pred:
358
- recommendations.append(f"Experience: {pred['personal_experience']}")
359
- recommendations.append("Prevention steps:")
360
- for i, step in enumerate(pred["prevention_steps"][:3], 1):
361
- recommendations.append(f" {i}. {step}")
362
-
363
- return recommendations
364
-
365
- def _extract_patterns(
366
- self,
367
- issues: list[dict],
368
- predictions: list[dict],
369
- ) -> list[dict[str, Any]]:
370
- """Extract cross-domain patterns"""
371
- return [
372
- {
373
- "pattern_type": "retrieval_quality_degradation",
374
- "description": (
375
- "Single-strategy retrieval systems degrade as corpus grows. "
376
- "Hybrid approaches become essential at scale."
377
- ),
378
- "domain_agnostic": True,
379
- "applicable_to": [
380
- "RAG systems",
381
- "Search engines",
382
- "Recommendation systems",
383
- "Information retrieval (healthcare, legal, etc.)",
384
- ],
385
- "threshold": "5,000-10,000 items",
386
- "solution": "Hybrid retrieval (multiple signals combined)",
387
- },
388
- ]
389
-
390
- # Helper methods
391
-
392
- def _has_reranking(self, rag_impl: list[str]) -> bool:
393
- """Check for reranking layer"""
394
- for file_path in rag_impl:
395
- try:
396
- with open(file_path) as f:
397
- content = f.read()
398
- if any(
399
- kw in content.lower() for kw in ["rerank", "cross-encoder", "cohere.rerank"]
400
- ):
401
- return True
402
- except OSError:
403
- pass
404
- return False
405
-
406
- def _has_hybrid_search(self, rag_impl: list[str]) -> bool:
407
- """Check for hybrid search implementation"""
408
- for file_path in rag_impl:
409
- try:
410
- with open(file_path) as f:
411
- content = f.read()
412
- if any(kw in content.lower() for kw in ["hybrid", "bm25", "keyword", "fusion"]):
413
- return True
414
- except OSError:
415
- pass
416
- return False
417
-
418
- def _has_query_understanding(self, rag_impl: list[str]) -> bool:
419
- """Check for query understanding/processing"""
420
- for file_path in rag_impl:
421
- try:
422
- with open(file_path) as f:
423
- content = f.read()
424
- if any(
425
- kw in content.lower()
426
- for kw in ["query_rewrite", "query_expansion", "decompose", "intent"]
427
- ):
428
- return True
429
- except OSError:
430
- pass
431
- return False
432
-
433
- def _has_evaluation(self, rag_impl: list[str]) -> bool:
434
- """Check for RAG evaluation framework"""
435
- for file_path in rag_impl:
436
- try:
437
- with open(file_path) as f:
438
- content = f.read()
439
- if any(
440
- kw in content.lower()
441
- for kw in ["evaluate", "metrics", "ground_truth", "precision", "recall"]
442
- ):
443
- return True
444
- except OSError:
445
- pass
446
- return False
447
-
448
- def _has_context_optimization(self, rag_impl: list[str]) -> bool:
449
- """Check for context optimization strategies"""
450
- for file_path in rag_impl:
451
- try:
452
- with open(file_path) as f:
453
- content = f.read()
454
- if any(
455
- kw in content.lower()
456
- for kw in ["prune", "summarize", "deduplicate", "context_budget"]
457
- ):
458
- return True
459
- except OSError:
460
- pass
461
- return False
@@ -1,32 +0,0 @@
1
- """Security Analysis Components
2
-
3
- Supporting modules for Security Analysis Wizard.
4
-
5
- Copyright 2025 Smart-AI-Memory
6
- Licensed under Fair Source License 0.9
7
- """
8
-
9
- from .exploit_analyzer import ExploitAnalyzer
10
- from .owasp_patterns import OWASPPatternDetector
11
- from .vulnerability_scanner import (
12
- DependencyVulnerability,
13
- Severity,
14
- Vulnerability,
15
- VulnerabilityScanner,
16
- VulnerabilityScanReport,
17
- VulnerabilityType,
18
- )
19
-
20
- __all__ = [
21
- "DependencyVulnerability",
22
- # Exploit Analysis
23
- "ExploitAnalyzer",
24
- # OWASP Patterns
25
- "OWASPPatternDetector",
26
- "Severity",
27
- "Vulnerability",
28
- "VulnerabilityScanReport",
29
- # Vulnerability Scanning
30
- "VulnerabilityScanner",
31
- "VulnerabilityType",
32
- ]