kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +25 -3
- kailash/nodes/admin/__init__.py +35 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1519 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +1 -0
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +407 -2
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +293 -12
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +91 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +132 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
- kailash-0.4.0.dist-info/RECORD +223 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.1.dist-info/RECORD +0 -136
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,575 @@
|
|
1
|
+
"""
|
2
|
+
RAG Workflow Nodes
|
3
|
+
|
4
|
+
Pre-built WorkflowNode components that combine multiple RAG strategies
|
5
|
+
and operations into reusable workflow patterns.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import logging
|
9
|
+
from typing import Any, Dict, Optional
|
10
|
+
|
11
|
+
from ...workflow.builder import WorkflowBuilder
|
12
|
+
from ..base import Node, NodeParameter, register_node
|
13
|
+
from ..logic import SwitchNode
|
14
|
+
from ..logic.workflow import WorkflowNode
|
15
|
+
from .strategies import (
|
16
|
+
RAGConfig,
|
17
|
+
create_hierarchical_rag_workflow,
|
18
|
+
create_hybrid_rag_workflow,
|
19
|
+
create_semantic_rag_workflow,
|
20
|
+
create_statistical_rag_workflow,
|
21
|
+
)
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
|
26
|
+
@register_node()
|
27
|
+
class SimpleRAGWorkflowNode(WorkflowNode):
|
28
|
+
"""
|
29
|
+
Simple RAG Workflow Node
|
30
|
+
|
31
|
+
Basic chunk → embed → store → retrieve pipeline using semantic chunking.
|
32
|
+
Perfect for getting started with RAG or simple document Q&A.
|
33
|
+
"""
|
34
|
+
|
35
|
+
def __init__(
|
36
|
+
self, name: str = "simple_rag_workflow", config: Optional[RAGConfig] = None
|
37
|
+
):
|
38
|
+
self.rag_config = config or RAGConfig()
|
39
|
+
|
40
|
+
# Create semantic RAG workflow
|
41
|
+
workflow_node = create_semantic_rag_workflow(self.rag_config)
|
42
|
+
|
43
|
+
# Initialize as WorkflowNode
|
44
|
+
super().__init__(
|
45
|
+
workflow=workflow_node.workflow,
|
46
|
+
name=name,
|
47
|
+
description="Simple RAG workflow with semantic chunking and dense retrieval",
|
48
|
+
)
|
49
|
+
|
50
|
+
|
51
|
+
@register_node()
|
52
|
+
class AdvancedRAGWorkflowNode(WorkflowNode):
|
53
|
+
"""
|
54
|
+
Advanced RAG Workflow Node
|
55
|
+
|
56
|
+
Multi-stage RAG pipeline with quality checks, multiple retrieval strategies,
|
57
|
+
and result validation. Includes monitoring and performance optimization.
|
58
|
+
"""
|
59
|
+
|
60
|
+
def __init__(
|
61
|
+
self, name: str = "advanced_rag_workflow", config: Optional[RAGConfig] = None
|
62
|
+
):
|
63
|
+
self.rag_config = config or RAGConfig()
|
64
|
+
|
65
|
+
# Build advanced workflow
|
66
|
+
workflow = self._create_advanced_workflow()
|
67
|
+
|
68
|
+
super().__init__(
|
69
|
+
workflow=workflow,
|
70
|
+
name=name,
|
71
|
+
description="Advanced RAG with quality checks and multi-stage processing",
|
72
|
+
)
|
73
|
+
|
74
|
+
def _create_advanced_workflow(self):
|
75
|
+
"""Create advanced RAG workflow with quality checks and monitoring"""
|
76
|
+
builder = WorkflowBuilder()
|
77
|
+
|
78
|
+
# Document quality analyzer
|
79
|
+
quality_analyzer_id = builder.add_node(
|
80
|
+
"PythonCodeNode",
|
81
|
+
node_id="quality_analyzer",
|
82
|
+
config={
|
83
|
+
"code": """
|
84
|
+
# Analyze document quality and determine best RAG strategy
|
85
|
+
def analyze_documents(documents):
|
86
|
+
analysis = {
|
87
|
+
"total_docs": len(documents),
|
88
|
+
"avg_length": sum(len(doc.get("content", "")) for doc in documents) / len(documents) if documents else 0,
|
89
|
+
"has_structure": any("section" in doc or "heading" in doc for doc in documents),
|
90
|
+
"is_technical": any(keyword in doc.get("content", "").lower()
|
91
|
+
for doc in documents
|
92
|
+
for keyword in ["code", "function", "algorithm", "api", "class"]),
|
93
|
+
"recommended_strategy": "semantic" # Default
|
94
|
+
}
|
95
|
+
|
96
|
+
# Determine best strategy based on analysis
|
97
|
+
if analysis["has_structure"] and analysis["avg_length"] > 2000:
|
98
|
+
analysis["recommended_strategy"] = "hierarchical"
|
99
|
+
elif analysis["is_technical"]:
|
100
|
+
analysis["recommended_strategy"] = "statistical"
|
101
|
+
elif analysis["total_docs"] > 100:
|
102
|
+
analysis["recommended_strategy"] = "hybrid"
|
103
|
+
|
104
|
+
return analysis
|
105
|
+
|
106
|
+
result = {"analysis": analyze_documents(documents), "documents": documents}
|
107
|
+
"""
|
108
|
+
},
|
109
|
+
)
|
110
|
+
|
111
|
+
# Strategy router using switch node
|
112
|
+
router_id = builder.add_node(
|
113
|
+
"SwitchNode",
|
114
|
+
node_id="strategy_router",
|
115
|
+
config={
|
116
|
+
"condition_field": "analysis.recommended_strategy",
|
117
|
+
"routes": {
|
118
|
+
"semantic": "semantic_rag_pipeline",
|
119
|
+
"statistical": "statistical_rag_pipeline",
|
120
|
+
"hybrid": "hybrid_rag_pipeline",
|
121
|
+
"hierarchical": "hierarchical_rag_pipeline",
|
122
|
+
},
|
123
|
+
},
|
124
|
+
)
|
125
|
+
|
126
|
+
# Add all RAG strategy pipelines
|
127
|
+
semantic_workflow = create_semantic_rag_workflow(self.rag_config)
|
128
|
+
statistical_workflow = create_statistical_rag_workflow(self.rag_config)
|
129
|
+
hybrid_workflow = create_hybrid_rag_workflow(self.rag_config)
|
130
|
+
hierarchical_workflow = create_hierarchical_rag_workflow(self.rag_config)
|
131
|
+
|
132
|
+
semantic_id = builder.add_node(
|
133
|
+
"WorkflowNode",
|
134
|
+
node_id="semantic_rag_pipeline",
|
135
|
+
config={"workflow": semantic_workflow.workflow},
|
136
|
+
)
|
137
|
+
|
138
|
+
statistical_id = builder.add_node(
|
139
|
+
"WorkflowNode",
|
140
|
+
node_id="statistical_rag_pipeline",
|
141
|
+
config={"workflow": statistical_workflow.workflow},
|
142
|
+
)
|
143
|
+
|
144
|
+
hybrid_id = builder.add_node(
|
145
|
+
"WorkflowNode",
|
146
|
+
node_id="hybrid_rag_pipeline",
|
147
|
+
config={"workflow": hybrid_workflow.workflow},
|
148
|
+
)
|
149
|
+
|
150
|
+
hierarchical_id = builder.add_node(
|
151
|
+
"WorkflowNode",
|
152
|
+
node_id="hierarchical_rag_pipeline",
|
153
|
+
config={"workflow": hierarchical_workflow.workflow},
|
154
|
+
)
|
155
|
+
|
156
|
+
# Quality validator
|
157
|
+
validator_id = builder.add_node(
|
158
|
+
"PythonCodeNode",
|
159
|
+
node_id="quality_validator",
|
160
|
+
config={
|
161
|
+
"code": """
|
162
|
+
def validate_rag_results(results, analysis):
|
163
|
+
validation = {
|
164
|
+
"results_count": len(results.get("documents", [])),
|
165
|
+
"avg_score": sum(results.get("scores", [])) / len(results.get("scores", [])) if results.get("scores") else 0,
|
166
|
+
"quality_score": 0.0,
|
167
|
+
"passed": False
|
168
|
+
}
|
169
|
+
|
170
|
+
# Calculate quality score
|
171
|
+
if validation["results_count"] > 0:
|
172
|
+
validation["quality_score"] = validation["avg_score"] * (validation["results_count"] / 5.0)
|
173
|
+
validation["passed"] = validation["quality_score"] > 0.5
|
174
|
+
|
175
|
+
return {
|
176
|
+
"results": results,
|
177
|
+
"validation": validation,
|
178
|
+
"strategy_used": analysis.get("recommended_strategy"),
|
179
|
+
"final_status": "passed" if validation["passed"] else "needs_improvement"
|
180
|
+
}
|
181
|
+
|
182
|
+
result = validate_rag_results(rag_results, analysis)
|
183
|
+
"""
|
184
|
+
},
|
185
|
+
)
|
186
|
+
|
187
|
+
# Connect the advanced pipeline
|
188
|
+
builder.add_connection(quality_analyzer_id, "result", router_id, "input")
|
189
|
+
|
190
|
+
# Connect router to all strategy pipelines
|
191
|
+
builder.add_connection(router_id, semantic_id, route="semantic")
|
192
|
+
builder.add_connection(router_id, statistical_id, route="statistical")
|
193
|
+
builder.add_connection(router_id, hybrid_id, route="hybrid")
|
194
|
+
builder.add_connection(router_id, hierarchical_id, route="hierarchical")
|
195
|
+
|
196
|
+
# Connect all pipelines to validator
|
197
|
+
builder.add_connection(semantic_id, "output", validator_id, "rag_results")
|
198
|
+
builder.add_connection(statistical_id, "output", validator_id, "rag_results")
|
199
|
+
builder.add_connection(hybrid_id, "output", validator_id, "rag_results")
|
200
|
+
builder.add_connection(hierarchical_id, "output", validator_id, "rag_results")
|
201
|
+
builder.add_connection(quality_analyzer_id, "result", validator_id, "analysis")
|
202
|
+
|
203
|
+
return builder.build(name="advanced_rag_workflow")
|
204
|
+
|
205
|
+
|
206
|
+
@register_node()
|
207
|
+
class AdaptiveRAGWorkflowNode(WorkflowNode):
|
208
|
+
"""
|
209
|
+
Adaptive RAG Workflow Node
|
210
|
+
|
211
|
+
AI-driven strategy selection that uses LLM to analyze documents and queries
|
212
|
+
to automatically choose the optimal RAG approach for each use case.
|
213
|
+
"""
|
214
|
+
|
215
|
+
def __init__(
|
216
|
+
self,
|
217
|
+
name: str = "adaptive_rag_workflow",
|
218
|
+
llm_model: str = "gpt-4",
|
219
|
+
config: Optional[RAGConfig] = None,
|
220
|
+
):
|
221
|
+
self.rag_config = config or RAGConfig()
|
222
|
+
self.llm_model = llm_model
|
223
|
+
|
224
|
+
# Build adaptive workflow
|
225
|
+
workflow = self._create_adaptive_workflow()
|
226
|
+
|
227
|
+
super().__init__(
|
228
|
+
workflow=workflow,
|
229
|
+
name=name,
|
230
|
+
description="AI-driven adaptive RAG with intelligent strategy selection",
|
231
|
+
)
|
232
|
+
|
233
|
+
def _create_adaptive_workflow(self):
|
234
|
+
"""Create adaptive RAG workflow with LLM-driven strategy selection"""
|
235
|
+
builder = WorkflowBuilder()
|
236
|
+
|
237
|
+
# LLM Strategy Analyzer
|
238
|
+
llm_analyzer_id = builder.add_node(
|
239
|
+
"LLMAgentNode",
|
240
|
+
node_id="rag_strategy_analyzer",
|
241
|
+
config={
|
242
|
+
"model": self.llm_model,
|
243
|
+
"provider": "openai",
|
244
|
+
"system_prompt": """You are a RAG strategy expert. Analyze documents and queries to recommend the optimal RAG approach.
|
245
|
+
|
246
|
+
Available strategies:
|
247
|
+
- semantic: Best for narrative content, general Q&A, semantic similarity
|
248
|
+
- statistical: Best for technical docs, code, structured content, keyword matching
|
249
|
+
- hybrid: Best for mixed content, combines semantic + statistical (20-30% better performance)
|
250
|
+
- hierarchical: Best for long documents, structured content with sections/headings
|
251
|
+
|
252
|
+
Analyze the input and respond with ONLY a JSON object:
|
253
|
+
{
|
254
|
+
"recommended_strategy": "semantic|statistical|hybrid|hierarchical",
|
255
|
+
"reasoning": "Brief explanation of why this strategy is optimal",
|
256
|
+
"confidence": 0.0-1.0,
|
257
|
+
"fallback_strategy": "backup strategy if primary fails"
|
258
|
+
}""",
|
259
|
+
"prompt_template": """Analyze these documents for optimal RAG strategy:
|
260
|
+
|
261
|
+
Document Analysis:
|
262
|
+
- Count: {document_count}
|
263
|
+
- Average length: {avg_length} characters
|
264
|
+
- Has structure (headings/sections): {has_structure}
|
265
|
+
- Technical content detected: {is_technical}
|
266
|
+
- Content types: {content_types}
|
267
|
+
|
268
|
+
Query (if provided): {query}
|
269
|
+
|
270
|
+
Recommend the optimal RAG strategy:""",
|
271
|
+
},
|
272
|
+
)
|
273
|
+
|
274
|
+
# Document preprocessor for LLM analysis
|
275
|
+
preprocessor_id = builder.add_node(
|
276
|
+
"PythonCodeNode",
|
277
|
+
node_id="document_preprocessor",
|
278
|
+
config={
|
279
|
+
"code": """
|
280
|
+
import re
|
281
|
+
|
282
|
+
def analyze_for_llm(documents, query=""):
|
283
|
+
if not documents:
|
284
|
+
return {
|
285
|
+
"document_count": 0,
|
286
|
+
"avg_length": 0,
|
287
|
+
"has_structure": False,
|
288
|
+
"is_technical": False,
|
289
|
+
"content_types": [],
|
290
|
+
"query": query
|
291
|
+
}
|
292
|
+
|
293
|
+
# Analyze documents
|
294
|
+
total_length = sum(len(doc.get("content", "")) for doc in documents)
|
295
|
+
avg_length = total_length / len(documents)
|
296
|
+
|
297
|
+
# Check for structure
|
298
|
+
has_structure = any(
|
299
|
+
any(keyword in doc.get("content", "").lower()
|
300
|
+
for keyword in ["# ", "## ", "### ", "heading", "section", "chapter"])
|
301
|
+
for doc in documents
|
302
|
+
)
|
303
|
+
|
304
|
+
# Check for technical content
|
305
|
+
technical_keywords = ["code", "function", "class", "algorithm", "api", "import", "def ", "return", "variable"]
|
306
|
+
is_technical = any(
|
307
|
+
any(keyword in doc.get("content", "").lower()
|
308
|
+
for keyword in technical_keywords)
|
309
|
+
for doc in documents
|
310
|
+
)
|
311
|
+
|
312
|
+
# Determine content types
|
313
|
+
content_types = []
|
314
|
+
if has_structure:
|
315
|
+
content_types.append("structured")
|
316
|
+
if is_technical:
|
317
|
+
content_types.append("technical")
|
318
|
+
if avg_length > 2000:
|
319
|
+
content_types.append("long_form")
|
320
|
+
if len(documents) > 50:
|
321
|
+
content_types.append("large_collection")
|
322
|
+
|
323
|
+
return {
|
324
|
+
"document_count": len(documents),
|
325
|
+
"avg_length": int(avg_length),
|
326
|
+
"has_structure": has_structure,
|
327
|
+
"is_technical": is_technical,
|
328
|
+
"content_types": content_types,
|
329
|
+
"query": query,
|
330
|
+
"documents": documents
|
331
|
+
}
|
332
|
+
|
333
|
+
result = analyze_for_llm(documents, query)
|
334
|
+
"""
|
335
|
+
},
|
336
|
+
)
|
337
|
+
|
338
|
+
# Strategy executor with switch
|
339
|
+
executor_id = builder.add_node(
|
340
|
+
"SwitchNode",
|
341
|
+
node_id="strategy_executor",
|
342
|
+
config={
|
343
|
+
"condition_field": "recommended_strategy",
|
344
|
+
"routes": {
|
345
|
+
"semantic": "semantic_pipeline",
|
346
|
+
"statistical": "statistical_pipeline",
|
347
|
+
"hybrid": "hybrid_pipeline",
|
348
|
+
"hierarchical": "hierarchical_pipeline",
|
349
|
+
},
|
350
|
+
},
|
351
|
+
)
|
352
|
+
|
353
|
+
# Add strategy pipelines
|
354
|
+
semantic_workflow = create_semantic_rag_workflow(self.rag_config)
|
355
|
+
statistical_workflow = create_statistical_rag_workflow(self.rag_config)
|
356
|
+
hybrid_workflow = create_hybrid_rag_workflow(self.rag_config)
|
357
|
+
hierarchical_workflow = create_hierarchical_rag_workflow(self.rag_config)
|
358
|
+
|
359
|
+
semantic_pipeline_id = builder.add_node(
|
360
|
+
"WorkflowNode",
|
361
|
+
node_id="semantic_pipeline",
|
362
|
+
config={"workflow": semantic_workflow.workflow},
|
363
|
+
)
|
364
|
+
|
365
|
+
statistical_pipeline_id = builder.add_node(
|
366
|
+
"WorkflowNode",
|
367
|
+
node_id="statistical_pipeline",
|
368
|
+
config={"workflow": statistical_workflow.workflow},
|
369
|
+
)
|
370
|
+
|
371
|
+
hybrid_pipeline_id = builder.add_node(
|
372
|
+
"WorkflowNode",
|
373
|
+
node_id="hybrid_pipeline",
|
374
|
+
config={"workflow": hybrid_workflow.workflow},
|
375
|
+
)
|
376
|
+
|
377
|
+
hierarchical_pipeline_id = builder.add_node(
|
378
|
+
"WorkflowNode",
|
379
|
+
node_id="hierarchical_pipeline",
|
380
|
+
config={"workflow": hierarchical_workflow.workflow},
|
381
|
+
)
|
382
|
+
|
383
|
+
# Results aggregator
|
384
|
+
aggregator_id = builder.add_node(
|
385
|
+
"PythonCodeNode",
|
386
|
+
node_id="results_aggregator",
|
387
|
+
config={
|
388
|
+
"code": """
|
389
|
+
def aggregate_adaptive_results(rag_results, llm_decision, preprocessed_data):
|
390
|
+
return {
|
391
|
+
"results": rag_results,
|
392
|
+
"strategy_used": llm_decision.get("recommended_strategy"),
|
393
|
+
"llm_reasoning": llm_decision.get("reasoning"),
|
394
|
+
"confidence": llm_decision.get("confidence"),
|
395
|
+
"document_analysis": {
|
396
|
+
"count": preprocessed_data.get("document_count"),
|
397
|
+
"avg_length": preprocessed_data.get("avg_length"),
|
398
|
+
"content_types": preprocessed_data.get("content_types")
|
399
|
+
},
|
400
|
+
"adaptive_metadata": {
|
401
|
+
"llm_model_used": "gpt-4",
|
402
|
+
"strategy_selection_method": "llm_analysis",
|
403
|
+
"fallback_available": llm_decision.get("fallback_strategy")
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
result = aggregate_adaptive_results(rag_results, llm_decision, preprocessed_data)
|
408
|
+
"""
|
409
|
+
},
|
410
|
+
)
|
411
|
+
|
412
|
+
# Connect adaptive pipeline
|
413
|
+
builder.add_connection(preprocessor_id, "result", llm_analyzer_id, "input")
|
414
|
+
builder.add_connection(llm_analyzer_id, "result", executor_id, "input")
|
415
|
+
builder.add_connection(
|
416
|
+
preprocessor_id, "result", executor_id, "preprocessed_data"
|
417
|
+
)
|
418
|
+
|
419
|
+
# Connect executor to strategy pipelines
|
420
|
+
builder.add_connection(executor_id, semantic_pipeline_id, route="semantic")
|
421
|
+
builder.add_connection(
|
422
|
+
executor_id, statistical_pipeline_id, route="statistical"
|
423
|
+
)
|
424
|
+
builder.add_connection(executor_id, hybrid_pipeline_id, route="hybrid")
|
425
|
+
builder.add_connection(
|
426
|
+
executor_id, hierarchical_pipeline_id, route="hierarchical"
|
427
|
+
)
|
428
|
+
|
429
|
+
# Connect all pipelines to aggregator
|
430
|
+
builder.add_connection(
|
431
|
+
semantic_pipeline_id, "output", aggregator_id, "rag_results"
|
432
|
+
)
|
433
|
+
builder.add_connection(
|
434
|
+
statistical_pipeline_id, "output", aggregator_id, "rag_results"
|
435
|
+
)
|
436
|
+
builder.add_connection(
|
437
|
+
hybrid_pipeline_id, "output", aggregator_id, "rag_results"
|
438
|
+
)
|
439
|
+
builder.add_connection(
|
440
|
+
hierarchical_pipeline_id, "output", aggregator_id, "rag_results"
|
441
|
+
)
|
442
|
+
builder.add_connection(llm_analyzer_id, "result", aggregator_id, "llm_decision")
|
443
|
+
builder.add_connection(
|
444
|
+
preprocessor_id, "result", aggregator_id, "preprocessed_data"
|
445
|
+
)
|
446
|
+
|
447
|
+
return builder.build(name="adaptive_rag_workflow")
|
448
|
+
|
449
|
+
|
450
|
+
@register_node()
|
451
|
+
class RAGPipelineWorkflowNode(WorkflowNode):
|
452
|
+
"""
|
453
|
+
Configurable RAG Pipeline Workflow Node
|
454
|
+
|
455
|
+
Flexible RAG workflow that can be configured for different use cases
|
456
|
+
without code changes. Supports all strategies and custom configurations.
|
457
|
+
"""
|
458
|
+
|
459
|
+
def __init__(
|
460
|
+
self,
|
461
|
+
name: str = "rag_pipeline",
|
462
|
+
default_strategy: str = "hybrid",
|
463
|
+
config: Optional[RAGConfig] = None,
|
464
|
+
):
|
465
|
+
self.rag_config = config or RAGConfig()
|
466
|
+
self.default_strategy = default_strategy
|
467
|
+
|
468
|
+
# Build configurable workflow
|
469
|
+
workflow = self._create_configurable_workflow()
|
470
|
+
|
471
|
+
super().__init__(
|
472
|
+
workflow=workflow,
|
473
|
+
name=name,
|
474
|
+
description=f"Configurable RAG pipeline with {default_strategy} as default strategy",
|
475
|
+
)
|
476
|
+
|
477
|
+
def _create_configurable_workflow(self):
|
478
|
+
"""Create configurable RAG workflow"""
|
479
|
+
builder = WorkflowBuilder()
|
480
|
+
|
481
|
+
# Configuration processor
|
482
|
+
config_processor_id = builder.add_node(
|
483
|
+
"PythonCodeNode",
|
484
|
+
node_id="config_processor",
|
485
|
+
config={
|
486
|
+
"code": f"""
|
487
|
+
def process_config(documents, query="", strategy="{self.default_strategy}", **kwargs):
|
488
|
+
# Merge user config with defaults
|
489
|
+
processed_config = {{
|
490
|
+
"strategy": strategy,
|
491
|
+
"documents": documents,
|
492
|
+
"query": query,
|
493
|
+
"chunk_size": kwargs.get("chunk_size", {self.rag_config.chunk_size}),
|
494
|
+
"chunk_overlap": kwargs.get("chunk_overlap", {self.rag_config.chunk_overlap}),
|
495
|
+
"embedding_model": kwargs.get("embedding_model", "{self.rag_config.embedding_model}"),
|
496
|
+
"retrieval_k": kwargs.get("retrieval_k", {self.rag_config.retrieval_k})
|
497
|
+
}}
|
498
|
+
|
499
|
+
return processed_config
|
500
|
+
|
501
|
+
result = process_config(documents, **kwargs)
|
502
|
+
"""
|
503
|
+
},
|
504
|
+
)
|
505
|
+
|
506
|
+
# Strategy dispatcher
|
507
|
+
dispatcher_id = builder.add_node(
|
508
|
+
"SwitchNode",
|
509
|
+
node_id="strategy_dispatcher",
|
510
|
+
config={
|
511
|
+
"condition_field": "strategy",
|
512
|
+
"routes": {
|
513
|
+
"semantic": "semantic_strategy",
|
514
|
+
"statistical": "statistical_strategy",
|
515
|
+
"hybrid": "hybrid_strategy",
|
516
|
+
"hierarchical": "hierarchical_strategy",
|
517
|
+
},
|
518
|
+
"default_route": "hybrid_strategy",
|
519
|
+
},
|
520
|
+
)
|
521
|
+
|
522
|
+
# Add all strategy implementations
|
523
|
+
strategies = {
|
524
|
+
"semantic": create_semantic_rag_workflow(self.rag_config),
|
525
|
+
"statistical": create_statistical_rag_workflow(self.rag_config),
|
526
|
+
"hybrid": create_hybrid_rag_workflow(self.rag_config),
|
527
|
+
"hierarchical": create_hierarchical_rag_workflow(self.rag_config),
|
528
|
+
}
|
529
|
+
|
530
|
+
strategy_ids = {}
|
531
|
+
for strategy_name, workflow_node in strategies.items():
|
532
|
+
strategy_id = builder.add_node(
|
533
|
+
"WorkflowNode",
|
534
|
+
node_id=f"{strategy_name}_strategy",
|
535
|
+
config={"workflow": workflow_node.workflow},
|
536
|
+
)
|
537
|
+
strategy_ids[strategy_name] = strategy_id
|
538
|
+
|
539
|
+
# Results formatter
|
540
|
+
formatter_id = builder.add_node(
|
541
|
+
"PythonCodeNode",
|
542
|
+
node_id="results_formatter",
|
543
|
+
config={
|
544
|
+
"code": """
|
545
|
+
def format_pipeline_results(results, config):
|
546
|
+
return {
|
547
|
+
"results": results,
|
548
|
+
"strategy_used": config.get("strategy"),
|
549
|
+
"configuration": config,
|
550
|
+
"pipeline_type": "configurable",
|
551
|
+
"success": True if results else False
|
552
|
+
}
|
553
|
+
|
554
|
+
result = format_pipeline_results(strategy_results, processed_config)
|
555
|
+
"""
|
556
|
+
},
|
557
|
+
)
|
558
|
+
|
559
|
+
# Connect configurable pipeline
|
560
|
+
builder.add_connection(config_processor_id, "result", dispatcher_id, "input")
|
561
|
+
|
562
|
+
# Connect dispatcher to all strategies
|
563
|
+
for strategy_name, strategy_id in strategy_ids.items():
|
564
|
+
builder.add_connection(
|
565
|
+
dispatcher_id, strategy_id, route=f"{strategy_name}_strategy"
|
566
|
+
)
|
567
|
+
builder.add_connection(
|
568
|
+
strategy_id, "output", formatter_id, "strategy_results"
|
569
|
+
)
|
570
|
+
|
571
|
+
builder.add_connection(
|
572
|
+
config_processor_id, "result", formatter_id, "processed_config"
|
573
|
+
)
|
574
|
+
|
575
|
+
return builder.build(name="configurable_rag_pipeline")
|
@@ -0,0 +1,19 @@
|
|
1
|
+
"""Security-related nodes for the Kailash SDK."""
|
2
|
+
|
3
|
+
from .abac_evaluator import ABACPermissionEvaluatorNode
|
4
|
+
from .audit_log import AuditLogNode
|
5
|
+
from .behavior_analysis import BehaviorAnalysisNode
|
6
|
+
from .credential_manager import CredentialManagerNode
|
7
|
+
from .rotating_credentials import RotatingCredentialNode
|
8
|
+
from .security_event import SecurityEventNode
|
9
|
+
from .threat_detection import ThreatDetectionNode
|
10
|
+
|
11
|
+
__all__ = [
|
12
|
+
"CredentialManagerNode",
|
13
|
+
"RotatingCredentialNode",
|
14
|
+
"AuditLogNode",
|
15
|
+
"SecurityEventNode",
|
16
|
+
"ThreatDetectionNode",
|
17
|
+
"ABACPermissionEvaluatorNode",
|
18
|
+
"BehaviorAnalysisNode",
|
19
|
+
]
|