kailash 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +25 -3
  37. kailash/nodes/admin/__init__.py +35 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1519 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +1 -0
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +407 -2
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/api/auth.py +287 -6
  50. kailash/nodes/api/rest.py +151 -0
  51. kailash/nodes/auth/__init__.py +17 -0
  52. kailash/nodes/auth/directory_integration.py +1228 -0
  53. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  54. kailash/nodes/auth/mfa.py +2338 -0
  55. kailash/nodes/auth/risk_assessment.py +872 -0
  56. kailash/nodes/auth/session_management.py +1093 -0
  57. kailash/nodes/auth/sso.py +1040 -0
  58. kailash/nodes/base.py +344 -13
  59. kailash/nodes/base_cycle_aware.py +4 -2
  60. kailash/nodes/base_with_acl.py +1 -1
  61. kailash/nodes/code/python.py +293 -12
  62. kailash/nodes/compliance/__init__.py +9 -0
  63. kailash/nodes/compliance/data_retention.py +1888 -0
  64. kailash/nodes/compliance/gdpr.py +2004 -0
  65. kailash/nodes/data/__init__.py +22 -2
  66. kailash/nodes/data/async_connection.py +469 -0
  67. kailash/nodes/data/async_sql.py +757 -0
  68. kailash/nodes/data/async_vector.py +598 -0
  69. kailash/nodes/data/readers.py +767 -0
  70. kailash/nodes/data/retrieval.py +360 -1
  71. kailash/nodes/data/sharepoint_graph.py +397 -21
  72. kailash/nodes/data/sql.py +94 -5
  73. kailash/nodes/data/streaming.py +68 -8
  74. kailash/nodes/data/vector_db.py +54 -4
  75. kailash/nodes/enterprise/__init__.py +13 -0
  76. kailash/nodes/enterprise/batch_processor.py +741 -0
  77. kailash/nodes/enterprise/data_lineage.py +497 -0
  78. kailash/nodes/logic/convergence.py +31 -9
  79. kailash/nodes/logic/operations.py +14 -3
  80. kailash/nodes/mixins/__init__.py +8 -0
  81. kailash/nodes/mixins/event_emitter.py +201 -0
  82. kailash/nodes/mixins/mcp.py +9 -4
  83. kailash/nodes/mixins/security.py +165 -0
  84. kailash/nodes/monitoring/__init__.py +7 -0
  85. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  86. kailash/nodes/rag/__init__.py +284 -0
  87. kailash/nodes/rag/advanced.py +1615 -0
  88. kailash/nodes/rag/agentic.py +773 -0
  89. kailash/nodes/rag/conversational.py +999 -0
  90. kailash/nodes/rag/evaluation.py +875 -0
  91. kailash/nodes/rag/federated.py +1188 -0
  92. kailash/nodes/rag/graph.py +721 -0
  93. kailash/nodes/rag/multimodal.py +671 -0
  94. kailash/nodes/rag/optimized.py +933 -0
  95. kailash/nodes/rag/privacy.py +1059 -0
  96. kailash/nodes/rag/query_processing.py +1335 -0
  97. kailash/nodes/rag/realtime.py +764 -0
  98. kailash/nodes/rag/registry.py +547 -0
  99. kailash/nodes/rag/router.py +837 -0
  100. kailash/nodes/rag/similarity.py +1854 -0
  101. kailash/nodes/rag/strategies.py +566 -0
  102. kailash/nodes/rag/workflows.py +575 -0
  103. kailash/nodes/security/__init__.py +19 -0
  104. kailash/nodes/security/abac_evaluator.py +1411 -0
  105. kailash/nodes/security/audit_log.py +91 -0
  106. kailash/nodes/security/behavior_analysis.py +1893 -0
  107. kailash/nodes/security/credential_manager.py +401 -0
  108. kailash/nodes/security/rotating_credentials.py +760 -0
  109. kailash/nodes/security/security_event.py +132 -0
  110. kailash/nodes/security/threat_detection.py +1103 -0
  111. kailash/nodes/testing/__init__.py +9 -0
  112. kailash/nodes/testing/credential_testing.py +499 -0
  113. kailash/nodes/transform/__init__.py +10 -2
  114. kailash/nodes/transform/chunkers.py +592 -1
  115. kailash/nodes/transform/processors.py +484 -14
  116. kailash/nodes/validation.py +321 -0
  117. kailash/runtime/access_controlled.py +1 -1
  118. kailash/runtime/async_local.py +41 -7
  119. kailash/runtime/docker.py +1 -1
  120. kailash/runtime/local.py +474 -55
  121. kailash/runtime/parallel.py +1 -1
  122. kailash/runtime/parallel_cyclic.py +1 -1
  123. kailash/runtime/testing.py +210 -2
  124. kailash/utils/migrations/__init__.py +25 -0
  125. kailash/utils/migrations/generator.py +433 -0
  126. kailash/utils/migrations/models.py +231 -0
  127. kailash/utils/migrations/runner.py +489 -0
  128. kailash/utils/secure_logging.py +342 -0
  129. kailash/workflow/__init__.py +16 -0
  130. kailash/workflow/cyclic_runner.py +3 -4
  131. kailash/workflow/graph.py +70 -2
  132. kailash/workflow/resilience.py +249 -0
  133. kailash/workflow/templates.py +726 -0
  134. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
  135. kailash-0.4.0.dist-info/RECORD +223 -0
  136. kailash/api/__init__.py +0 -17
  137. kailash/api/__main__.py +0 -6
  138. kailash/api/studio_secure.py +0 -893
  139. kailash/mcp/__main__.py +0 -13
  140. kailash/mcp/server_new.py +0 -336
  141. kailash/mcp/servers/__init__.py +0 -12
  142. kailash-0.3.1.dist-info/RECORD +0 -136
  143. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
  144. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
  145. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
  146. {kailash-0.3.1.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,566 @@
1
+ """
2
+ RAG Strategy Workflow Nodes
3
+
4
+ RAG strategies implemented as WorkflowNodes that encapsulate complete
5
+ RAG pipelines using existing Kailash components. Each strategy creates
6
+ a workflow using WorkflowBuilder and delegates all execution to the SDK.
7
+ """
8
+
9
+ import logging
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, Dict, List, Optional
12
+
13
+ from ...workflow.builder import WorkflowBuilder
14
+ from ..base import Node, NodeParameter, register_node
15
+ from ..logic.workflow import WorkflowNode
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ @dataclass
21
+ class RAGConfig:
22
+ """Configuration for RAG strategies"""
23
+
24
+ chunk_size: int = 1000
25
+ chunk_overlap: int = 200
26
+ embedding_model: str = "text-embedding-3-small"
27
+ embedding_provider: str = "openai"
28
+ vector_db_provider: str = "postgresql"
29
+ retrieval_k: int = 5
30
+ similarity_threshold: float = 0.7
31
+
32
+
33
+ def create_semantic_rag_workflow(config: RAGConfig) -> WorkflowNode:
34
+ """
35
+ Create semantic RAG workflow using existing Kailash nodes.
36
+
37
+ Pipeline: Documents → SemanticChunker → EmbeddingGenerator → VectorDatabase → HybridRetriever
38
+ """
39
+ builder = WorkflowBuilder()
40
+
41
+ # Add chunking node
42
+ chunker_id = builder.add_node(
43
+ "SemanticChunkerNode",
44
+ node_id="semantic_chunker",
45
+ config={
46
+ "chunk_size": config.chunk_size,
47
+ "chunk_overlap": config.chunk_overlap,
48
+ "similarity_threshold": config.similarity_threshold,
49
+ },
50
+ )
51
+
52
+ # Add embedding generation
53
+ embedder_id = builder.add_node(
54
+ "EmbeddingGeneratorNode",
55
+ node_id="embedder",
56
+ config={"model": config.embedding_model, "provider": config.embedding_provider},
57
+ )
58
+
59
+ # Add vector database storage
60
+ vectordb_id = builder.add_node(
61
+ "VectorDatabaseNode",
62
+ node_id="vector_db",
63
+ config={
64
+ "provider": config.vector_db_provider,
65
+ "collection_name": "semantic_rag",
66
+ },
67
+ )
68
+
69
+ # Add retrieval node
70
+ retriever_id = builder.add_node(
71
+ "HybridRetrieverNode",
72
+ node_id="retriever",
73
+ config={
74
+ "k": config.retrieval_k,
75
+ "similarity_threshold": config.similarity_threshold,
76
+ "method": "dense",
77
+ },
78
+ )
79
+
80
+ # Connect the pipeline
81
+ builder.add_connection(chunker_id, "chunks", embedder_id, "texts")
82
+ builder.add_connection(embedder_id, "embeddings", vectordb_id, "embeddings")
83
+ builder.add_connection(chunker_id, "chunks", vectordb_id, "documents")
84
+ builder.add_connection(
85
+ vectordb_id, "stored_documents", retriever_id, "document_store"
86
+ )
87
+
88
+ # Build workflow
89
+ workflow = builder.build(name="semantic_rag_workflow")
90
+
91
+ # Return as WorkflowNode
92
+ return WorkflowNode(
93
+ workflow=workflow,
94
+ name="semantic_rag_node",
95
+ description="Semantic RAG with dense embeddings and semantic chunking",
96
+ )
97
+
98
+
99
+ def create_statistical_rag_workflow(config: RAGConfig) -> WorkflowNode:
100
+ """
101
+ Create statistical RAG workflow using existing Kailash nodes.
102
+
103
+ Pipeline: Documents → StatisticalChunker → EmbeddingGenerator → VectorDatabase → HybridRetriever (sparse)
104
+ """
105
+ builder = WorkflowBuilder()
106
+
107
+ # Add statistical chunking
108
+ chunker_id = builder.add_node(
109
+ "StatisticalChunkerNode",
110
+ node_id="statistical_chunker",
111
+ config={"chunk_size": config.chunk_size, "overlap": config.chunk_overlap},
112
+ )
113
+
114
+ # Add embedding generation (for backup dense retrieval)
115
+ embedder_id = builder.add_node(
116
+ "EmbeddingGeneratorNode",
117
+ node_id="embedder",
118
+ config={"model": config.embedding_model, "provider": config.embedding_provider},
119
+ )
120
+
121
+ # Add keyword extraction for sparse retrieval
122
+ keyword_extractor_id = builder.add_node(
123
+ "PythonCodeNode",
124
+ node_id="keyword_extractor",
125
+ config={
126
+ "code": """
127
+ import re
128
+ def extract_keywords(text):
129
+ words = re.findall(r'\\b[a-zA-Z]{3,}\\b', text.lower())
130
+ stop_words = {'the', 'and', 'for', 'are', 'but', 'not', 'you', 'all', 'can', 'had', 'her', 'was', 'one', 'our', 'out', 'day', 'get', 'has', 'him', 'his', 'how', 'man', 'new', 'now', 'old', 'see', 'two', 'way', 'who', 'boy', 'did', 'its', 'let', 'put', 'say', 'she', 'too', 'use'}
131
+ keywords = [word for word in set(words) if word not in stop_words]
132
+ return keywords[:20]
133
+
134
+ result = {"keywords": [extract_keywords(chunk["content"]) for chunk in chunks]}
135
+ """
136
+ },
137
+ )
138
+
139
+ # Add vector database
140
+ vectordb_id = builder.add_node(
141
+ "VectorDatabaseNode",
142
+ node_id="vector_db",
143
+ config={
144
+ "provider": config.vector_db_provider,
145
+ "collection_name": "statistical_rag",
146
+ },
147
+ )
148
+
149
+ # Add sparse retrieval
150
+ retriever_id = builder.add_node(
151
+ "HybridRetrieverNode",
152
+ node_id="retriever",
153
+ config={"k": config.retrieval_k, "method": "sparse"},
154
+ )
155
+
156
+ # Connect pipeline
157
+ builder.add_connection(chunker_id, "chunks", keyword_extractor_id, "chunks")
158
+ builder.add_connection(chunker_id, "chunks", embedder_id, "texts")
159
+ builder.add_connection(keyword_extractor_id, "result", vectordb_id, "keywords")
160
+ builder.add_connection(embedder_id, "embeddings", vectordb_id, "embeddings")
161
+ builder.add_connection(chunker_id, "chunks", vectordb_id, "documents")
162
+ builder.add_connection(
163
+ vectordb_id, "stored_documents", retriever_id, "document_store"
164
+ )
165
+
166
+ workflow = builder.build(name="statistical_rag_workflow")
167
+
168
+ return WorkflowNode(
169
+ workflow=workflow,
170
+ name="statistical_rag_node",
171
+ description="Statistical RAG with sparse retrieval and keyword matching",
172
+ )
173
+
174
+
175
+ def create_hybrid_rag_workflow(
176
+ config: RAGConfig, fusion_method: str = "rrf"
177
+ ) -> WorkflowNode:
178
+ """
179
+ Create hybrid RAG workflow combining semantic and statistical approaches.
180
+
181
+ Pipeline: Documents → [SemanticRAG + StatisticalRAG] → ResultFuser → HybridRetriever
182
+ """
183
+ builder = WorkflowBuilder()
184
+
185
+ # Create both semantic and statistical sub-workflows
186
+ semantic_workflow = create_semantic_rag_workflow(config)
187
+ statistical_workflow = create_statistical_rag_workflow(config)
188
+
189
+ # Add sub-workflows as nodes
190
+ semantic_id = builder.add_node(
191
+ "WorkflowNode",
192
+ node_id="semantic_rag",
193
+ config={"workflow": semantic_workflow.workflow},
194
+ )
195
+
196
+ statistical_id = builder.add_node(
197
+ "WorkflowNode",
198
+ node_id="statistical_rag",
199
+ config={"workflow": statistical_workflow.workflow},
200
+ )
201
+
202
+ # Add result fusion node
203
+ fusion_id = builder.add_node(
204
+ "PythonCodeNode",
205
+ node_id="result_fusion",
206
+ config={
207
+ "code": f"""
208
+ def rrf_fusion(semantic_results, statistical_results, k=60):
209
+ '''Reciprocal Rank Fusion for combining results'''
210
+ doc_scores = {{}}
211
+
212
+ # Add semantic results
213
+ for i, doc in enumerate(semantic_results.get("results", [])):
214
+ doc_id = doc.get("id", f"semantic_{{i}}")
215
+ doc_scores[doc_id] = {{
216
+ "document": doc,
217
+ "score": 1 / (k + i + 1),
218
+ "sources": ["semantic"]
219
+ }}
220
+
221
+ # Add statistical results
222
+ for i, doc in enumerate(statistical_results.get("results", [])):
223
+ doc_id = doc.get("id", f"statistical_{{i}}")
224
+ if doc_id in doc_scores:
225
+ doc_scores[doc_id]["score"] += 1 / (k + i + 1)
226
+ doc_scores[doc_id]["sources"].append("statistical")
227
+ else:
228
+ doc_scores[doc_id] = {{
229
+ "document": doc,
230
+ "score": 1 / (k + i + 1),
231
+ "sources": ["statistical"]
232
+ }}
233
+
234
+ # Sort by fused score
235
+ sorted_results = sorted(doc_scores.items(), key=lambda x: x[1]["score"], reverse=True)
236
+
237
+ return {{
238
+ "documents": [item[1]["document"] for item in sorted_results[:5]],
239
+ "scores": [item[1]["score"] for item in sorted_results[:5]],
240
+ "fusion_method": "{fusion_method}"
241
+ }}
242
+
243
+ # Execute fusion
244
+ fusion_results = rrf_fusion(semantic_results, statistical_results)
245
+ result = {{"fused_results": fusion_results}}
246
+ """
247
+ },
248
+ )
249
+
250
+ # Connect workflows to fusion
251
+ builder.add_connection(semantic_id, "output", fusion_id, "semantic_results")
252
+ builder.add_connection(statistical_id, "output", fusion_id, "statistical_results")
253
+
254
+ workflow = builder.build(name="hybrid_rag_workflow")
255
+
256
+ return WorkflowNode(
257
+ workflow=workflow,
258
+ name="hybrid_rag_node",
259
+ description=f"Hybrid RAG with {fusion_method} fusion combining semantic and statistical approaches",
260
+ )
261
+
262
+
263
+ def create_hierarchical_rag_workflow(config: RAGConfig) -> WorkflowNode:
264
+ """
265
+ Create hierarchical RAG workflow for multi-level document processing.
266
+
267
+ Pipeline: Documents → HierarchicalChunker → Multi-level Embedding → Multi-collection Storage → Hierarchical Retrieval
268
+ """
269
+ builder = WorkflowBuilder()
270
+
271
+ # Add hierarchical chunking
272
+ chunker_id = builder.add_node(
273
+ "HierarchicalChunkerNode",
274
+ node_id="hierarchical_chunker",
275
+ config={"chunk_size": config.chunk_size, "overlap": config.chunk_overlap},
276
+ )
277
+
278
+ # Add embedding for each level
279
+ embedder_id = builder.add_node(
280
+ "EmbeddingGeneratorNode",
281
+ node_id="embedder",
282
+ config={"model": config.embedding_model, "provider": config.embedding_provider},
283
+ )
284
+
285
+ # Add level processor for organizing chunks by hierarchy
286
+ level_processor_id = builder.add_node(
287
+ "PythonCodeNode",
288
+ node_id="level_processor",
289
+ config={
290
+ "code": """
291
+ levels = ["document", "section", "paragraph"]
292
+ level_chunks = {}
293
+
294
+ for level in levels:
295
+ level_chunks[level] = [chunk for chunk in chunks if chunk.get("hierarchy_level") == level]
296
+
297
+ result = {"level_chunks": level_chunks, "levels": levels}
298
+ """
299
+ },
300
+ )
301
+
302
+ # Add vector databases for each level
303
+ doc_vectordb_id = builder.add_node(
304
+ "VectorDatabaseNode",
305
+ node_id="doc_vector_db",
306
+ config={
307
+ "provider": config.vector_db_provider,
308
+ "collection_name": "hierarchical_rag_document",
309
+ },
310
+ )
311
+
312
+ section_vectordb_id = builder.add_node(
313
+ "VectorDatabaseNode",
314
+ node_id="section_vector_db",
315
+ config={
316
+ "provider": config.vector_db_provider,
317
+ "collection_name": "hierarchical_rag_section",
318
+ },
319
+ )
320
+
321
+ para_vectordb_id = builder.add_node(
322
+ "VectorDatabaseNode",
323
+ node_id="para_vector_db",
324
+ config={
325
+ "provider": config.vector_db_provider,
326
+ "collection_name": "hierarchical_rag_paragraph",
327
+ },
328
+ )
329
+
330
+ # Add hierarchical retriever
331
+ retriever_id = builder.add_node(
332
+ "HybridRetrieverNode",
333
+ node_id="hierarchical_retriever",
334
+ config={"k": config.retrieval_k, "method": "hierarchical"},
335
+ )
336
+
337
+ # Connect pipeline
338
+ builder.add_connection(chunker_id, "chunks", level_processor_id, "chunks")
339
+ builder.add_connection(chunker_id, "chunks", embedder_id, "texts")
340
+ builder.add_connection(
341
+ level_processor_id, "result", doc_vectordb_id, "level_chunks"
342
+ )
343
+ builder.add_connection(
344
+ level_processor_id, "result", section_vectordb_id, "level_chunks"
345
+ )
346
+ builder.add_connection(
347
+ level_processor_id, "result", para_vectordb_id, "level_chunks"
348
+ )
349
+ builder.add_connection(embedder_id, "embeddings", doc_vectordb_id, "embeddings")
350
+ builder.add_connection(embedder_id, "embeddings", section_vectordb_id, "embeddings")
351
+ builder.add_connection(embedder_id, "embeddings", para_vectordb_id, "embeddings")
352
+
353
+ # Connect all vector DBs to retriever
354
+ builder.add_connection(
355
+ doc_vectordb_id, "stored_documents", retriever_id, "document_store"
356
+ )
357
+ builder.add_connection(
358
+ section_vectordb_id, "stored_documents", retriever_id, "section_store"
359
+ )
360
+ builder.add_connection(
361
+ para_vectordb_id, "stored_documents", retriever_id, "paragraph_store"
362
+ )
363
+
364
+ workflow = builder.build(name="hierarchical_rag_workflow")
365
+
366
+ return WorkflowNode(
367
+ workflow=workflow,
368
+ name="hierarchical_rag_node",
369
+ description="Hierarchical RAG with multi-level document processing and context aggregation",
370
+ )
371
+
372
+
373
+ @register_node()
374
+ class SemanticRAGNode(Node):
375
+ """
376
+ Semantic RAG Strategy Node
377
+
378
+ Wraps the semantic RAG workflow as a single node for easy integration.
379
+ Uses semantic chunking with dense embeddings for optimal semantic matching.
380
+ """
381
+
382
+ def __init__(self, name: str = "semantic_rag", config: Optional[RAGConfig] = None):
383
+ self.config = config or RAGConfig()
384
+ self.workflow_node = None
385
+ super().__init__(name)
386
+
387
+ def get_parameters(self) -> Dict[str, NodeParameter]:
388
+ return {
389
+ "documents": NodeParameter(
390
+ name="documents",
391
+ type=list,
392
+ required=True,
393
+ description="Documents to process for semantic RAG",
394
+ ),
395
+ "query": NodeParameter(
396
+ name="query",
397
+ type=str,
398
+ required=False,
399
+ description="Query for retrieval",
400
+ ),
401
+ "operation": NodeParameter(
402
+ name="operation",
403
+ type=str,
404
+ default="index",
405
+ description="Operation: 'index' or 'retrieve'",
406
+ ),
407
+ }
408
+
409
+ def run(self, **kwargs) -> Dict[str, Any]:
410
+ """Run semantic RAG using WorkflowNode"""
411
+ if not self.workflow_node:
412
+ self.workflow_node = create_semantic_rag_workflow(self.config)
413
+
414
+ # Delegate to WorkflowNode
415
+ return self.workflow_node.run(**kwargs)
416
+
417
+
418
+ @register_node()
419
+ class StatisticalRAGNode(Node):
420
+ """
421
+ Statistical RAG Strategy Node
422
+
423
+ Wraps the statistical RAG workflow for sparse keyword-based retrieval.
424
+ Uses statistical chunking with keyword extraction for technical content.
425
+ """
426
+
427
+ def __init__(
428
+ self, name: str = "statistical_rag", config: Optional[RAGConfig] = None
429
+ ):
430
+ self.config = config or RAGConfig()
431
+ self.workflow_node = None
432
+ super().__init__(name)
433
+
434
+ def get_parameters(self) -> Dict[str, NodeParameter]:
435
+ return {
436
+ "documents": NodeParameter(
437
+ name="documents",
438
+ type=list,
439
+ required=True,
440
+ description="Documents to process for statistical RAG",
441
+ ),
442
+ "query": NodeParameter(
443
+ name="query",
444
+ type=str,
445
+ required=False,
446
+ description="Query for retrieval",
447
+ ),
448
+ "operation": NodeParameter(
449
+ name="operation",
450
+ type=str,
451
+ default="index",
452
+ description="Operation: 'index' or 'retrieve'",
453
+ ),
454
+ }
455
+
456
+ def run(self, **kwargs) -> Dict[str, Any]:
457
+ """Run statistical RAG using WorkflowNode"""
458
+ if not self.workflow_node:
459
+ self.workflow_node = create_statistical_rag_workflow(self.config)
460
+
461
+ return self.workflow_node.run(**kwargs)
462
+
463
+
464
+ @register_node()
465
+ class HybridRAGNode(Node):
466
+ """
467
+ Hybrid RAG Strategy Node
468
+
469
+ Combines semantic and statistical approaches using result fusion.
470
+ Provides 20-30% better performance than individual methods.
471
+ """
472
+
473
+ def __init__(
474
+ self,
475
+ name: str = "hybrid_rag",
476
+ config: Optional[RAGConfig] = None,
477
+ fusion_method: str = "rrf",
478
+ ):
479
+ self.config = config or RAGConfig()
480
+ self.fusion_method = fusion_method
481
+ self.workflow_node = None
482
+ super().__init__(name)
483
+
484
+ def get_parameters(self) -> Dict[str, NodeParameter]:
485
+ return {
486
+ "documents": NodeParameter(
487
+ name="documents",
488
+ type=list,
489
+ required=True,
490
+ description="Documents to process for hybrid RAG",
491
+ ),
492
+ "query": NodeParameter(
493
+ name="query",
494
+ type=str,
495
+ required=False,
496
+ description="Query for retrieval",
497
+ ),
498
+ "operation": NodeParameter(
499
+ name="operation",
500
+ type=str,
501
+ default="index",
502
+ description="Operation: 'index' or 'retrieve'",
503
+ ),
504
+ "fusion_method": NodeParameter(
505
+ name="fusion_method",
506
+ type=str,
507
+ default="rrf",
508
+ description="Fusion method: 'rrf', 'linear', 'weighted'",
509
+ ),
510
+ }
511
+
512
+ def run(self, **kwargs) -> Dict[str, Any]:
513
+ """Run hybrid RAG using WorkflowNode"""
514
+ fusion_method = kwargs.get("fusion_method", self.fusion_method)
515
+
516
+ if not self.workflow_node or fusion_method != self.fusion_method:
517
+ self.fusion_method = fusion_method
518
+ self.workflow_node = create_hybrid_rag_workflow(self.config, fusion_method)
519
+
520
+ return self.workflow_node.run(**kwargs)
521
+
522
+
523
+ @register_node()
524
+ class HierarchicalRAGNode(Node):
525
+ """
526
+ Hierarchical RAG Strategy Node
527
+
528
+ Multi-level document processing that preserves document structure.
529
+ Processes documents at document, section, and paragraph levels.
530
+ """
531
+
532
+ def __init__(
533
+ self, name: str = "hierarchical_rag", config: Optional[RAGConfig] = None
534
+ ):
535
+ self.config = config or RAGConfig()
536
+ self.workflow_node = None
537
+ super().__init__(name)
538
+
539
+ def get_parameters(self) -> Dict[str, NodeParameter]:
540
+ return {
541
+ "documents": NodeParameter(
542
+ name="documents",
543
+ type=list,
544
+ required=True,
545
+ description="Documents to process hierarchically",
546
+ ),
547
+ "query": NodeParameter(
548
+ name="query",
549
+ type=str,
550
+ required=False,
551
+ description="Query for hierarchical retrieval",
552
+ ),
553
+ "operation": NodeParameter(
554
+ name="operation",
555
+ type=str,
556
+ default="index",
557
+ description="Operation: 'index' or 'retrieve'",
558
+ ),
559
+ }
560
+
561
+ def run(self, **kwargs) -> Dict[str, Any]:
562
+ """Run hierarchical RAG using WorkflowNode"""
563
+ if not self.workflow_node:
564
+ self.workflow_node = create_hierarchical_rag_workflow(self.config)
565
+
566
+ return self.workflow_node.run(**kwargs)