kailash 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +27 -3
- kailash/nodes/admin/__init__.py +42 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1523 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +248 -40
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +436 -5
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/ai/vision_utils.py +148 -0
- kailash/nodes/alerts/__init__.py +26 -0
- kailash/nodes/alerts/base.py +234 -0
- kailash/nodes/alerts/discord.py +499 -0
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +283 -10
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +103 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +133 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/security.py +1 -1
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/METADATA +256 -20
- kailash-0.4.1.dist-info/RECORD +227 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.2.dist-info/RECORD +0 -136
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/WHEEL +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,547 @@
|
|
1
|
+
"""
|
2
|
+
RAG Workflow Registry
|
3
|
+
|
4
|
+
Central registry for discovering and accessing RAG workflows and strategies.
|
5
|
+
Provides a unified interface for users to find the right RAG approach.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from typing import Any, Dict, List, Optional, Type
|
10
|
+
|
11
|
+
from .router import (
|
12
|
+
RAGPerformanceMonitorNode,
|
13
|
+
RAGQualityAnalyzerNode,
|
14
|
+
RAGStrategyRouterNode,
|
15
|
+
)
|
16
|
+
from .strategies import (
|
17
|
+
HierarchicalRAGNode,
|
18
|
+
HybridRAGNode,
|
19
|
+
RAGConfig,
|
20
|
+
SemanticRAGNode,
|
21
|
+
StatisticalRAGNode,
|
22
|
+
create_hierarchical_rag_workflow,
|
23
|
+
create_hybrid_rag_workflow,
|
24
|
+
create_semantic_rag_workflow,
|
25
|
+
create_statistical_rag_workflow,
|
26
|
+
)
|
27
|
+
from .workflows import (
|
28
|
+
AdaptiveRAGWorkflowNode,
|
29
|
+
AdvancedRAGWorkflowNode,
|
30
|
+
RAGPipelineWorkflowNode,
|
31
|
+
SimpleRAGWorkflowNode,
|
32
|
+
)
|
33
|
+
|
34
|
+
logger = logging.getLogger(__name__)
|
35
|
+
|
36
|
+
|
37
|
+
class RAGWorkflowRegistry:
|
38
|
+
"""
|
39
|
+
Central registry for RAG workflows and strategies.
|
40
|
+
|
41
|
+
Provides discovery, recommendation, and instantiation of RAG components
|
42
|
+
based on user requirements and use cases.
|
43
|
+
"""
|
44
|
+
|
45
|
+
def __init__(self):
|
46
|
+
self._strategies = {}
|
47
|
+
self._workflows = {}
|
48
|
+
self._utilities = {}
|
49
|
+
self._register_components()
|
50
|
+
|
51
|
+
def _register_components(self):
|
52
|
+
"""Register all available RAG components"""
|
53
|
+
|
54
|
+
# Core strategies
|
55
|
+
self._strategies = {
|
56
|
+
"semantic": {
|
57
|
+
"class": SemanticRAGNode,
|
58
|
+
"factory": create_semantic_rag_workflow,
|
59
|
+
"description": "Semantic chunking with dense embeddings for conceptual queries",
|
60
|
+
"use_cases": ["general Q&A", "narrative content", "conceptual queries"],
|
61
|
+
"strengths": ["excellent semantic matching", "good for flowing text"],
|
62
|
+
"performance": {
|
63
|
+
"speed": "fast",
|
64
|
+
"accuracy": "high",
|
65
|
+
"complexity": "low",
|
66
|
+
},
|
67
|
+
},
|
68
|
+
"statistical": {
|
69
|
+
"class": StatisticalRAGNode,
|
70
|
+
"factory": create_statistical_rag_workflow,
|
71
|
+
"description": "Statistical chunking with sparse retrieval for technical content",
|
72
|
+
"use_cases": ["technical documentation", "code", "structured content"],
|
73
|
+
"strengths": ["precise keyword matching", "handles technical terms"],
|
74
|
+
"performance": {
|
75
|
+
"speed": "fast",
|
76
|
+
"accuracy": "high",
|
77
|
+
"complexity": "low",
|
78
|
+
},
|
79
|
+
},
|
80
|
+
"hybrid": {
|
81
|
+
"class": HybridRAGNode,
|
82
|
+
"factory": create_hybrid_rag_workflow,
|
83
|
+
"description": "Combines semantic + statistical for optimal coverage",
|
84
|
+
"use_cases": ["mixed content", "general purpose", "maximum coverage"],
|
85
|
+
"strengths": ["20-30% better performance", "comprehensive results"],
|
86
|
+
"performance": {
|
87
|
+
"speed": "medium",
|
88
|
+
"accuracy": "very high",
|
89
|
+
"complexity": "medium",
|
90
|
+
},
|
91
|
+
},
|
92
|
+
"hierarchical": {
|
93
|
+
"class": HierarchicalRAGNode,
|
94
|
+
"factory": create_hierarchical_rag_workflow,
|
95
|
+
"description": "Multi-level processing preserving document structure",
|
96
|
+
"use_cases": [
|
97
|
+
"long documents",
|
98
|
+
"structured content",
|
99
|
+
"complex queries",
|
100
|
+
],
|
101
|
+
"strengths": ["maintains context", "handles complex documents"],
|
102
|
+
"performance": {
|
103
|
+
"speed": "slow",
|
104
|
+
"accuracy": "very high",
|
105
|
+
"complexity": "high",
|
106
|
+
},
|
107
|
+
},
|
108
|
+
}
|
109
|
+
|
110
|
+
# Workflow components
|
111
|
+
self._workflows = {
|
112
|
+
"simple": {
|
113
|
+
"class": SimpleRAGWorkflowNode,
|
114
|
+
"description": "Basic RAG workflow for getting started",
|
115
|
+
"complexity": "beginner",
|
116
|
+
"features": ["semantic chunking", "dense retrieval", "single strategy"],
|
117
|
+
},
|
118
|
+
"advanced": {
|
119
|
+
"class": AdvancedRAGWorkflowNode,
|
120
|
+
"description": "Multi-strategy RAG with quality checks",
|
121
|
+
"complexity": "intermediate",
|
122
|
+
"features": [
|
123
|
+
"strategy selection",
|
124
|
+
"quality validation",
|
125
|
+
"performance monitoring",
|
126
|
+
],
|
127
|
+
},
|
128
|
+
"adaptive": {
|
129
|
+
"class": AdaptiveRAGWorkflowNode,
|
130
|
+
"description": "AI-driven strategy selection",
|
131
|
+
"complexity": "advanced",
|
132
|
+
"features": [
|
133
|
+
"LLM-powered routing",
|
134
|
+
"automatic optimization",
|
135
|
+
"context awareness",
|
136
|
+
],
|
137
|
+
},
|
138
|
+
"configurable": {
|
139
|
+
"class": RAGPipelineWorkflowNode,
|
140
|
+
"description": "Flexible pipeline for custom configurations",
|
141
|
+
"complexity": "intermediate",
|
142
|
+
"features": [
|
143
|
+
"runtime configuration",
|
144
|
+
"strategy switching",
|
145
|
+
"custom parameters",
|
146
|
+
],
|
147
|
+
},
|
148
|
+
}
|
149
|
+
|
150
|
+
# Utility components
|
151
|
+
self._utilities = {
|
152
|
+
"router": {
|
153
|
+
"class": RAGStrategyRouterNode,
|
154
|
+
"description": "LLM-powered strategy selection",
|
155
|
+
"use_case": "automatic strategy routing",
|
156
|
+
},
|
157
|
+
"quality_analyzer": {
|
158
|
+
"class": RAGQualityAnalyzerNode,
|
159
|
+
"description": "Analyzes RAG results quality",
|
160
|
+
"use_case": "quality assessment and optimization",
|
161
|
+
},
|
162
|
+
"performance_monitor": {
|
163
|
+
"class": RAGPerformanceMonitorNode,
|
164
|
+
"description": "Monitors performance over time",
|
165
|
+
"use_case": "performance tracking and insights",
|
166
|
+
},
|
167
|
+
}
|
168
|
+
|
169
|
+
def list_strategies(self) -> Dict[str, Dict[str, Any]]:
|
170
|
+
"""List all available RAG strategies"""
|
171
|
+
return {
|
172
|
+
name: {
|
173
|
+
"description": info["description"],
|
174
|
+
"use_cases": info["use_cases"],
|
175
|
+
"strengths": info["strengths"],
|
176
|
+
"performance": info["performance"],
|
177
|
+
}
|
178
|
+
for name, info in self._strategies.items()
|
179
|
+
}
|
180
|
+
|
181
|
+
def list_workflows(self) -> Dict[str, Dict[str, Any]]:
|
182
|
+
"""List all available RAG workflows"""
|
183
|
+
return {
|
184
|
+
name: {
|
185
|
+
"description": info["description"],
|
186
|
+
"complexity": info["complexity"],
|
187
|
+
"features": info["features"],
|
188
|
+
}
|
189
|
+
for name, info in self._workflows.items()
|
190
|
+
}
|
191
|
+
|
192
|
+
def list_utilities(self) -> Dict[str, Dict[str, Any]]:
|
193
|
+
"""List all available RAG utilities"""
|
194
|
+
return {
|
195
|
+
name: {"description": info["description"], "use_case": info["use_case"]}
|
196
|
+
for name, info in self._utilities.items()
|
197
|
+
}
|
198
|
+
|
199
|
+
def recommend_strategy(
|
200
|
+
self,
|
201
|
+
document_count: int = 0,
|
202
|
+
avg_document_length: int = 0,
|
203
|
+
is_technical: bool = False,
|
204
|
+
has_structure: bool = False,
|
205
|
+
query_type: str = "general",
|
206
|
+
performance_priority: str = "accuracy",
|
207
|
+
) -> Dict[str, Any]:
|
208
|
+
"""
|
209
|
+
Recommend optimal RAG strategy based on use case characteristics.
|
210
|
+
|
211
|
+
Args:
|
212
|
+
document_count: Number of documents in collection
|
213
|
+
avg_document_length: Average document length in characters
|
214
|
+
is_technical: Whether content is technical/code-heavy
|
215
|
+
has_structure: Whether documents have clear structure (headings, sections)
|
216
|
+
query_type: Type of queries ("technical", "conceptual", "general")
|
217
|
+
performance_priority: Priority ("speed", "accuracy", "coverage")
|
218
|
+
|
219
|
+
Returns:
|
220
|
+
Recommendation with strategy name, reasoning, and alternatives
|
221
|
+
"""
|
222
|
+
|
223
|
+
# Rule-based recommendation logic
|
224
|
+
recommendations = []
|
225
|
+
|
226
|
+
# Hierarchical for structured long documents
|
227
|
+
if has_structure and avg_document_length > 2000:
|
228
|
+
recommendations.append(
|
229
|
+
{
|
230
|
+
"strategy": "hierarchical",
|
231
|
+
"score": 0.9,
|
232
|
+
"reasoning": "Long structured documents benefit from hierarchical processing",
|
233
|
+
}
|
234
|
+
)
|
235
|
+
|
236
|
+
# Statistical for technical content
|
237
|
+
if is_technical or query_type == "technical":
|
238
|
+
recommendations.append(
|
239
|
+
{
|
240
|
+
"strategy": "statistical",
|
241
|
+
"score": 0.85,
|
242
|
+
"reasoning": "Technical content requires precise keyword matching",
|
243
|
+
}
|
244
|
+
)
|
245
|
+
|
246
|
+
# Hybrid for large collections or when accuracy is priority
|
247
|
+
if document_count > 50 or performance_priority == "accuracy":
|
248
|
+
recommendations.append(
|
249
|
+
{
|
250
|
+
"strategy": "hybrid",
|
251
|
+
"score": 0.8,
|
252
|
+
"reasoning": "Large collections and accuracy priority benefit from hybrid approach",
|
253
|
+
}
|
254
|
+
)
|
255
|
+
|
256
|
+
# Semantic for conceptual queries or general content
|
257
|
+
if query_type == "conceptual" or (not is_technical and not has_structure):
|
258
|
+
recommendations.append(
|
259
|
+
{
|
260
|
+
"strategy": "semantic",
|
261
|
+
"score": 0.75,
|
262
|
+
"reasoning": "Conceptual queries and general content work well with semantic matching",
|
263
|
+
}
|
264
|
+
)
|
265
|
+
|
266
|
+
# Speed priority adjustments
|
267
|
+
if performance_priority == "speed":
|
268
|
+
for rec in recommendations:
|
269
|
+
if rec["strategy"] in ["semantic", "statistical"]:
|
270
|
+
rec["score"] += 0.1
|
271
|
+
elif rec["strategy"] == "hierarchical":
|
272
|
+
rec["score"] -= 0.2
|
273
|
+
|
274
|
+
# Sort by score
|
275
|
+
recommendations.sort(key=lambda x: x["score"], reverse=True)
|
276
|
+
|
277
|
+
# Default fallback
|
278
|
+
if not recommendations:
|
279
|
+
recommendations.append(
|
280
|
+
{
|
281
|
+
"strategy": "semantic",
|
282
|
+
"score": 0.7,
|
283
|
+
"reasoning": "Default strategy for general use cases",
|
284
|
+
}
|
285
|
+
)
|
286
|
+
|
287
|
+
primary = recommendations[0]
|
288
|
+
alternatives = recommendations[1:3] if len(recommendations) > 1 else []
|
289
|
+
|
290
|
+
return {
|
291
|
+
"recommended_strategy": primary["strategy"],
|
292
|
+
"reasoning": primary["reasoning"],
|
293
|
+
"confidence": primary["score"],
|
294
|
+
"alternatives": [
|
295
|
+
{"strategy": alt["strategy"], "reasoning": alt["reasoning"]}
|
296
|
+
for alt in alternatives
|
297
|
+
],
|
298
|
+
"strategy_details": self._strategies[primary["strategy"]],
|
299
|
+
}
|
300
|
+
|
301
|
+
def recommend_workflow(
|
302
|
+
self,
|
303
|
+
user_level: str = "beginner",
|
304
|
+
use_case: str = "general",
|
305
|
+
needs_customization: bool = False,
|
306
|
+
needs_monitoring: bool = False,
|
307
|
+
) -> Dict[str, Any]:
|
308
|
+
"""
|
309
|
+
Recommend optimal RAG workflow based on user requirements.
|
310
|
+
|
311
|
+
Args:
|
312
|
+
user_level: User experience level ("beginner", "intermediate", "advanced")
|
313
|
+
use_case: Primary use case ("prototyping", "production", "research")
|
314
|
+
needs_customization: Whether user needs runtime customization
|
315
|
+
needs_monitoring: Whether user needs performance monitoring
|
316
|
+
|
317
|
+
Returns:
|
318
|
+
Workflow recommendation with details
|
319
|
+
"""
|
320
|
+
|
321
|
+
# Workflow selection logic
|
322
|
+
if user_level == "beginner" or use_case == "prototyping":
|
323
|
+
workflow = "simple"
|
324
|
+
reasoning = "Simple workflow is best for beginners and prototyping"
|
325
|
+
elif needs_customization:
|
326
|
+
workflow = "configurable"
|
327
|
+
reasoning = "Configurable pipeline provides runtime flexibility"
|
328
|
+
elif use_case == "research" or user_level == "advanced":
|
329
|
+
workflow = "adaptive"
|
330
|
+
reasoning = "Adaptive workflow provides AI-driven optimization for research"
|
331
|
+
elif needs_monitoring or use_case == "production":
|
332
|
+
workflow = "advanced"
|
333
|
+
reasoning = "Advanced workflow includes monitoring for production use"
|
334
|
+
else:
|
335
|
+
workflow = "simple"
|
336
|
+
reasoning = "Simple workflow covers most general use cases"
|
337
|
+
|
338
|
+
return {
|
339
|
+
"recommended_workflow": workflow,
|
340
|
+
"reasoning": reasoning,
|
341
|
+
"workflow_details": self._workflows[workflow],
|
342
|
+
"suggested_utilities": self._get_suggested_utilities(
|
343
|
+
workflow, needs_monitoring
|
344
|
+
),
|
345
|
+
}
|
346
|
+
|
347
|
+
def _get_suggested_utilities(
|
348
|
+
self, workflow: str, needs_monitoring: bool
|
349
|
+
) -> List[str]:
|
350
|
+
"""Get suggested utility components for a workflow"""
|
351
|
+
utilities = []
|
352
|
+
|
353
|
+
if workflow == "adaptive":
|
354
|
+
utilities.append("router")
|
355
|
+
|
356
|
+
if workflow in ["advanced", "adaptive"] or needs_monitoring:
|
357
|
+
utilities.extend(["quality_analyzer", "performance_monitor"])
|
358
|
+
|
359
|
+
return utilities
|
360
|
+
|
361
|
+
def create_strategy(
|
362
|
+
self, strategy_name: str, config: Optional[RAGConfig] = None, **kwargs
|
363
|
+
):
|
364
|
+
"""Create a strategy instance"""
|
365
|
+
if strategy_name not in self._strategies:
|
366
|
+
raise ValueError(
|
367
|
+
f"Unknown strategy: {strategy_name}. Available: {list(self._strategies.keys())}"
|
368
|
+
)
|
369
|
+
|
370
|
+
strategy_info = self._strategies[strategy_name]
|
371
|
+
strategy_class = strategy_info["class"]
|
372
|
+
|
373
|
+
# Create instance with config
|
374
|
+
if config:
|
375
|
+
return strategy_class(config=config, **kwargs)
|
376
|
+
else:
|
377
|
+
return strategy_class(**kwargs)
|
378
|
+
|
379
|
+
def create_workflow(
|
380
|
+
self, workflow_name: str, config: Optional[RAGConfig] = None, **kwargs
|
381
|
+
):
|
382
|
+
"""Create a workflow instance"""
|
383
|
+
if workflow_name not in self._workflows:
|
384
|
+
raise ValueError(
|
385
|
+
f"Unknown workflow: {workflow_name}. Available: {list(self._workflows.keys())}"
|
386
|
+
)
|
387
|
+
|
388
|
+
workflow_info = self._workflows[workflow_name]
|
389
|
+
workflow_class = workflow_info["class"]
|
390
|
+
|
391
|
+
# Create instance with config
|
392
|
+
if config:
|
393
|
+
return workflow_class(config=config, **kwargs)
|
394
|
+
else:
|
395
|
+
return workflow_class(**kwargs)
|
396
|
+
|
397
|
+
def create_utility(self, utility_name: str, **kwargs):
|
398
|
+
"""Create a utility instance"""
|
399
|
+
if utility_name not in self._utilities:
|
400
|
+
raise ValueError(
|
401
|
+
f"Unknown utility: {utility_name}. Available: {list(self._utilities.keys())}"
|
402
|
+
)
|
403
|
+
|
404
|
+
utility_info = self._utilities[utility_name]
|
405
|
+
utility_class = utility_info["class"]
|
406
|
+
|
407
|
+
return utility_class(**kwargs)
|
408
|
+
|
409
|
+
def get_quick_start_guide(self) -> str:
|
410
|
+
"""Get quick start guide for RAG toolkit"""
|
411
|
+
return """
|
412
|
+
# RAG Toolkit Quick Start Guide
|
413
|
+
|
414
|
+
## 1. Choose Your Approach
|
415
|
+
|
416
|
+
### For Beginners:
|
417
|
+
```python
|
418
|
+
from kailash.nodes.rag import RAGWorkflowRegistry
|
419
|
+
|
420
|
+
registry = RAGWorkflowRegistry()
|
421
|
+
simple_rag = registry.create_workflow("simple")
|
422
|
+
```
|
423
|
+
|
424
|
+
### For Custom Requirements:
|
425
|
+
```python
|
426
|
+
# Get recommendation
|
427
|
+
recommendation = registry.recommend_strategy(
|
428
|
+
document_count=100,
|
429
|
+
avg_document_length=1500,
|
430
|
+
is_technical=True
|
431
|
+
)
|
432
|
+
|
433
|
+
# Create recommended strategy
|
434
|
+
strategy = registry.create_strategy(recommendation["recommended_strategy"])
|
435
|
+
```
|
436
|
+
|
437
|
+
### For Production Use:
|
438
|
+
```python
|
439
|
+
# Advanced workflow with monitoring
|
440
|
+
advanced_rag = registry.create_workflow("advanced")
|
441
|
+
quality_analyzer = registry.create_utility("quality_analyzer")
|
442
|
+
performance_monitor = registry.create_utility("performance_monitor")
|
443
|
+
```
|
444
|
+
|
445
|
+
## 2. Integration Patterns
|
446
|
+
|
447
|
+
### Direct Usage:
|
448
|
+
```python
|
449
|
+
# Use strategy directly
|
450
|
+
semantic_rag = registry.create_strategy("semantic")
|
451
|
+
result = semantic_rag.run(documents=docs, operation="index")
|
452
|
+
```
|
453
|
+
|
454
|
+
### In Workflows:
|
455
|
+
```python
|
456
|
+
from kailash.workflow.builder import WorkflowBuilder
|
457
|
+
from kailash.nodes.logic import SwitchNode
|
458
|
+
|
459
|
+
builder = WorkflowBuilder()
|
460
|
+
|
461
|
+
# Add RAG router
|
462
|
+
router = registry.create_utility("router")
|
463
|
+
builder.add_node(router, "rag_router")
|
464
|
+
|
465
|
+
# Add strategies
|
466
|
+
semantic_rag = registry.create_strategy("semantic")
|
467
|
+
hybrid_rag = registry.create_strategy("hybrid")
|
468
|
+
|
469
|
+
builder.add_node(semantic_rag, "semantic_strategy")
|
470
|
+
builder.add_node(hybrid_rag, "hybrid_strategy")
|
471
|
+
|
472
|
+
# Add routing logic
|
473
|
+
switch = SwitchNode(
|
474
|
+
condition_field="strategy",
|
475
|
+
routes={
|
476
|
+
"semantic": "semantic_strategy",
|
477
|
+
"hybrid": "hybrid_strategy"
|
478
|
+
}
|
479
|
+
)
|
480
|
+
builder.add_node(switch, "strategy_switch")
|
481
|
+
|
482
|
+
# Connect pipeline
|
483
|
+
builder.connect("rag_router", "strategy_switch")
|
484
|
+
builder.connect("strategy_switch", "semantic_strategy", route="semantic")
|
485
|
+
builder.connect("strategy_switch", "hybrid_strategy", route="hybrid")
|
486
|
+
```
|
487
|
+
|
488
|
+
## 3. Available Components
|
489
|
+
|
490
|
+
### Strategies:
|
491
|
+
- **semantic**: Best for general content and conceptual queries
|
492
|
+
- **statistical**: Best for technical documentation and code
|
493
|
+
- **hybrid**: Best for mixed content (20-30% better performance)
|
494
|
+
- **hierarchical**: Best for long, structured documents
|
495
|
+
|
496
|
+
### Workflows:
|
497
|
+
- **simple**: Basic RAG for getting started
|
498
|
+
- **advanced**: Multi-strategy with quality checks
|
499
|
+
- **adaptive**: AI-driven strategy selection
|
500
|
+
- **configurable**: Flexible runtime configuration
|
501
|
+
|
502
|
+
### Utilities:
|
503
|
+
- **router**: LLM-powered strategy selection
|
504
|
+
- **quality_analyzer**: Results quality assessment
|
505
|
+
- **performance_monitor**: Performance tracking over time
|
506
|
+
|
507
|
+
## 4. Best Practices
|
508
|
+
|
509
|
+
1. **Start Simple**: Use SimpleRAGWorkflowNode for prototyping
|
510
|
+
2. **Measure Performance**: Always use quality analyzer in production
|
511
|
+
3. **Let AI Decide**: Use AdaptiveRAGWorkflowNode for optimal results
|
512
|
+
4. **Monitor Over Time**: Use performance monitor for continuous improvement
|
513
|
+
5. **Customize When Needed**: Use configurable pipeline for specific requirements
|
514
|
+
|
515
|
+
For detailed examples, see: sdk-users/workflows/by-pattern/rag/
|
516
|
+
"""
|
517
|
+
|
518
|
+
def get_strategy_comparison(self) -> Dict[str, Any]:
|
519
|
+
"""Get detailed comparison of all strategies"""
|
520
|
+
comparison = {
|
521
|
+
"performance_matrix": {
|
522
|
+
"semantic": {"speed": 9, "accuracy": 8, "complexity": 3},
|
523
|
+
"statistical": {"speed": 9, "accuracy": 8, "complexity": 3},
|
524
|
+
"hybrid": {"speed": 7, "accuracy": 9, "complexity": 6},
|
525
|
+
"hierarchical": {"speed": 5, "accuracy": 9, "complexity": 8},
|
526
|
+
},
|
527
|
+
"use_case_fit": {
|
528
|
+
"general_qa": ["semantic", "hybrid"],
|
529
|
+
"technical_docs": ["statistical", "hybrid"],
|
530
|
+
"long_documents": ["hierarchical", "hybrid"],
|
531
|
+
"mixed_content": ["hybrid", "adaptive"],
|
532
|
+
"code_search": ["statistical", "hybrid"],
|
533
|
+
},
|
534
|
+
"selection_guide": {
|
535
|
+
"prioritize_speed": ["semantic", "statistical"],
|
536
|
+
"prioritize_accuracy": ["hybrid", "hierarchical"],
|
537
|
+
"large_collections": ["hybrid", "hierarchical"],
|
538
|
+
"technical_content": ["statistical", "hybrid"],
|
539
|
+
"narrative_content": ["semantic", "hybrid"],
|
540
|
+
},
|
541
|
+
}
|
542
|
+
|
543
|
+
return comparison
|
544
|
+
|
545
|
+
|
546
|
+
# Global registry instance
|
547
|
+
rag_registry = RAGWorkflowRegistry()
|