kailash 0.3.2__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. kailash/__init__.py +33 -1
  2. kailash/access_control/__init__.py +129 -0
  3. kailash/access_control/managers.py +461 -0
  4. kailash/access_control/rule_evaluators.py +467 -0
  5. kailash/access_control_abac.py +825 -0
  6. kailash/config/__init__.py +27 -0
  7. kailash/config/database_config.py +359 -0
  8. kailash/database/__init__.py +28 -0
  9. kailash/database/execution_pipeline.py +499 -0
  10. kailash/middleware/__init__.py +306 -0
  11. kailash/middleware/auth/__init__.py +33 -0
  12. kailash/middleware/auth/access_control.py +436 -0
  13. kailash/middleware/auth/auth_manager.py +422 -0
  14. kailash/middleware/auth/jwt_auth.py +477 -0
  15. kailash/middleware/auth/kailash_jwt_auth.py +616 -0
  16. kailash/middleware/communication/__init__.py +37 -0
  17. kailash/middleware/communication/ai_chat.py +989 -0
  18. kailash/middleware/communication/api_gateway.py +802 -0
  19. kailash/middleware/communication/events.py +470 -0
  20. kailash/middleware/communication/realtime.py +710 -0
  21. kailash/middleware/core/__init__.py +21 -0
  22. kailash/middleware/core/agent_ui.py +890 -0
  23. kailash/middleware/core/schema.py +643 -0
  24. kailash/middleware/core/workflows.py +396 -0
  25. kailash/middleware/database/__init__.py +63 -0
  26. kailash/middleware/database/base.py +113 -0
  27. kailash/middleware/database/base_models.py +525 -0
  28. kailash/middleware/database/enums.py +106 -0
  29. kailash/middleware/database/migrations.py +12 -0
  30. kailash/{api/database.py → middleware/database/models.py} +183 -291
  31. kailash/middleware/database/repositories.py +685 -0
  32. kailash/middleware/database/session_manager.py +19 -0
  33. kailash/middleware/mcp/__init__.py +38 -0
  34. kailash/middleware/mcp/client_integration.py +585 -0
  35. kailash/middleware/mcp/enhanced_server.py +576 -0
  36. kailash/nodes/__init__.py +27 -3
  37. kailash/nodes/admin/__init__.py +42 -0
  38. kailash/nodes/admin/audit_log.py +794 -0
  39. kailash/nodes/admin/permission_check.py +864 -0
  40. kailash/nodes/admin/role_management.py +823 -0
  41. kailash/nodes/admin/security_event.py +1523 -0
  42. kailash/nodes/admin/user_management.py +944 -0
  43. kailash/nodes/ai/a2a.py +24 -7
  44. kailash/nodes/ai/ai_providers.py +248 -40
  45. kailash/nodes/ai/embedding_generator.py +11 -11
  46. kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
  47. kailash/nodes/ai/llm_agent.py +436 -5
  48. kailash/nodes/ai/self_organizing.py +85 -10
  49. kailash/nodes/ai/vision_utils.py +148 -0
  50. kailash/nodes/alerts/__init__.py +26 -0
  51. kailash/nodes/alerts/base.py +234 -0
  52. kailash/nodes/alerts/discord.py +499 -0
  53. kailash/nodes/api/auth.py +287 -6
  54. kailash/nodes/api/rest.py +151 -0
  55. kailash/nodes/auth/__init__.py +17 -0
  56. kailash/nodes/auth/directory_integration.py +1228 -0
  57. kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
  58. kailash/nodes/auth/mfa.py +2338 -0
  59. kailash/nodes/auth/risk_assessment.py +872 -0
  60. kailash/nodes/auth/session_management.py +1093 -0
  61. kailash/nodes/auth/sso.py +1040 -0
  62. kailash/nodes/base.py +344 -13
  63. kailash/nodes/base_cycle_aware.py +4 -2
  64. kailash/nodes/base_with_acl.py +1 -1
  65. kailash/nodes/code/python.py +283 -10
  66. kailash/nodes/compliance/__init__.py +9 -0
  67. kailash/nodes/compliance/data_retention.py +1888 -0
  68. kailash/nodes/compliance/gdpr.py +2004 -0
  69. kailash/nodes/data/__init__.py +22 -2
  70. kailash/nodes/data/async_connection.py +469 -0
  71. kailash/nodes/data/async_sql.py +757 -0
  72. kailash/nodes/data/async_vector.py +598 -0
  73. kailash/nodes/data/readers.py +767 -0
  74. kailash/nodes/data/retrieval.py +360 -1
  75. kailash/nodes/data/sharepoint_graph.py +397 -21
  76. kailash/nodes/data/sql.py +94 -5
  77. kailash/nodes/data/streaming.py +68 -8
  78. kailash/nodes/data/vector_db.py +54 -4
  79. kailash/nodes/enterprise/__init__.py +13 -0
  80. kailash/nodes/enterprise/batch_processor.py +741 -0
  81. kailash/nodes/enterprise/data_lineage.py +497 -0
  82. kailash/nodes/logic/convergence.py +31 -9
  83. kailash/nodes/logic/operations.py +14 -3
  84. kailash/nodes/mixins/__init__.py +8 -0
  85. kailash/nodes/mixins/event_emitter.py +201 -0
  86. kailash/nodes/mixins/mcp.py +9 -4
  87. kailash/nodes/mixins/security.py +165 -0
  88. kailash/nodes/monitoring/__init__.py +7 -0
  89. kailash/nodes/monitoring/performance_benchmark.py +2497 -0
  90. kailash/nodes/rag/__init__.py +284 -0
  91. kailash/nodes/rag/advanced.py +1615 -0
  92. kailash/nodes/rag/agentic.py +773 -0
  93. kailash/nodes/rag/conversational.py +999 -0
  94. kailash/nodes/rag/evaluation.py +875 -0
  95. kailash/nodes/rag/federated.py +1188 -0
  96. kailash/nodes/rag/graph.py +721 -0
  97. kailash/nodes/rag/multimodal.py +671 -0
  98. kailash/nodes/rag/optimized.py +933 -0
  99. kailash/nodes/rag/privacy.py +1059 -0
  100. kailash/nodes/rag/query_processing.py +1335 -0
  101. kailash/nodes/rag/realtime.py +764 -0
  102. kailash/nodes/rag/registry.py +547 -0
  103. kailash/nodes/rag/router.py +837 -0
  104. kailash/nodes/rag/similarity.py +1854 -0
  105. kailash/nodes/rag/strategies.py +566 -0
  106. kailash/nodes/rag/workflows.py +575 -0
  107. kailash/nodes/security/__init__.py +19 -0
  108. kailash/nodes/security/abac_evaluator.py +1411 -0
  109. kailash/nodes/security/audit_log.py +103 -0
  110. kailash/nodes/security/behavior_analysis.py +1893 -0
  111. kailash/nodes/security/credential_manager.py +401 -0
  112. kailash/nodes/security/rotating_credentials.py +760 -0
  113. kailash/nodes/security/security_event.py +133 -0
  114. kailash/nodes/security/threat_detection.py +1103 -0
  115. kailash/nodes/testing/__init__.py +9 -0
  116. kailash/nodes/testing/credential_testing.py +499 -0
  117. kailash/nodes/transform/__init__.py +10 -2
  118. kailash/nodes/transform/chunkers.py +592 -1
  119. kailash/nodes/transform/processors.py +484 -14
  120. kailash/nodes/validation.py +321 -0
  121. kailash/runtime/access_controlled.py +1 -1
  122. kailash/runtime/async_local.py +41 -7
  123. kailash/runtime/docker.py +1 -1
  124. kailash/runtime/local.py +474 -55
  125. kailash/runtime/parallel.py +1 -1
  126. kailash/runtime/parallel_cyclic.py +1 -1
  127. kailash/runtime/testing.py +210 -2
  128. kailash/security.py +1 -1
  129. kailash/utils/migrations/__init__.py +25 -0
  130. kailash/utils/migrations/generator.py +433 -0
  131. kailash/utils/migrations/models.py +231 -0
  132. kailash/utils/migrations/runner.py +489 -0
  133. kailash/utils/secure_logging.py +342 -0
  134. kailash/workflow/__init__.py +16 -0
  135. kailash/workflow/cyclic_runner.py +3 -4
  136. kailash/workflow/graph.py +70 -2
  137. kailash/workflow/resilience.py +249 -0
  138. kailash/workflow/templates.py +726 -0
  139. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/METADATA +256 -20
  140. kailash-0.4.1.dist-info/RECORD +227 -0
  141. kailash/api/__init__.py +0 -17
  142. kailash/api/__main__.py +0 -6
  143. kailash/api/studio_secure.py +0 -893
  144. kailash/mcp/__main__.py +0 -13
  145. kailash/mcp/server_new.py +0 -336
  146. kailash/mcp/servers/__init__.py +0 -12
  147. kailash-0.3.2.dist-info/RECORD +0 -136
  148. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/WHEEL +0 -0
  149. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/entry_points.txt +0 -0
  150. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/licenses/LICENSE +0 -0
  151. {kailash-0.3.2.dist-info → kailash-0.4.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,989 @@
1
+ """
2
+ AI Chat Integration for Kailash Middleware
3
+
4
+ Provides AI-powered chat interface for natural language workflow generation,
5
+ assistance, and guidance using existing Kailash LLM capabilities.
6
+ """
7
+
8
+ import asyncio
9
+ import json
10
+ import logging
11
+ import re
12
+ import uuid
13
+ from datetime import datetime, timezone
14
+ from typing import Any, Dict, List, Optional, Tuple, Union
15
+
16
+ from ...nodes.ai import EmbeddingGeneratorNode, LLMAgentNode
17
+ from ...nodes.data import AsyncSQLDatabaseNode
18
+ from ...nodes.security import CredentialManagerNode
19
+ from ...nodes.transform import DataTransformer
20
+ from ...workflow.builder import WorkflowBuilder
21
+ from ..core.agent_ui import AgentUIMiddleware
22
+ from ..core.schema import DynamicSchemaRegistry
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class ChatMessage:
28
+ """Represents a chat message in the conversation."""
29
+
30
+ def __init__(
31
+ self,
32
+ content: str,
33
+ role: str = "user",
34
+ message_id: str = None,
35
+ timestamp: datetime = None,
36
+ metadata: Dict[str, Any] = None,
37
+ ):
38
+ self.message_id = message_id or str(uuid.uuid4())
39
+ self.content = content
40
+ self.role = role # user, assistant, system
41
+ self.timestamp = timestamp or datetime.now(timezone.utc)
42
+ self.metadata = metadata or {}
43
+
44
+ def to_dict(self) -> Dict[str, Any]:
45
+ """Convert message to dictionary."""
46
+ return {
47
+ "message_id": self.message_id,
48
+ "content": self.content,
49
+ "role": self.role,
50
+ "timestamp": self.timestamp.isoformat(),
51
+ "metadata": self.metadata,
52
+ }
53
+
54
+
55
+ class ChatSession:
56
+ """Manages a chat conversation for workflow assistance."""
57
+
58
+ def __init__(self, session_id: str, user_id: str = None):
59
+ self.session_id = session_id
60
+ self.user_id = user_id
61
+ self.messages: List[ChatMessage] = []
62
+ self.context: Dict[str, Any] = {}
63
+ self.created_at = datetime.now(timezone.utc)
64
+ self.last_activity = datetime.now(timezone.utc)
65
+
66
+ # Initialize with system message
67
+ system_msg = ChatMessage(
68
+ content=self._get_system_prompt(),
69
+ role="system",
70
+ metadata={"type": "system_initialization"},
71
+ )
72
+ self.messages.append(system_msg)
73
+
74
+ def add_message(
75
+ self, content: str, role: str = "user", metadata: Dict[str, Any] = None
76
+ ) -> str:
77
+ """Add a message to the conversation."""
78
+ message = ChatMessage(content, role, metadata=metadata)
79
+ self.messages.append(message)
80
+ self.last_activity = datetime.now(timezone.utc)
81
+ return message.message_id
82
+
83
+ def get_conversation_history(self, limit: int = None) -> List[Dict[str, Any]]:
84
+ """Get conversation history as list of dictionaries."""
85
+ messages = self.messages[-limit:] if limit else self.messages
86
+ return [msg.to_dict() for msg in messages]
87
+
88
+ def update_context(self, key: str, value: Any):
89
+ """Update conversation context."""
90
+ self.context[key] = value
91
+ self.last_activity = datetime.now(timezone.utc)
92
+
93
+ def _get_system_prompt(self) -> str:
94
+ """Get system prompt for workflow assistance."""
95
+ return """You are an AI assistant specialized in helping users create and manage Kailash workflows.
96
+
97
+ Your capabilities include:
98
+ - Creating workflows from natural language descriptions
99
+ - Suggesting appropriate nodes for specific tasks
100
+ - Explaining workflow concepts and best practices
101
+ - Debugging workflow issues
102
+ - Optimizing workflow performance
103
+
104
+ You have access to a comprehensive node library including:
105
+ - AI nodes (LLM agents, embedding generators, A2A agents)
106
+ - Data nodes (CSV readers, SQL databases, directory readers)
107
+ - Transform nodes (filters, processors, chunkers)
108
+ - Logic nodes (switches, merges, conditionals)
109
+ - API nodes (HTTP requests, REST clients, GraphQL)
110
+
111
+ When creating workflows, always:
112
+ 1. Ask clarifying questions if the requirements are unclear
113
+ 2. Suggest the most appropriate nodes for the task
114
+ 3. Explain the workflow structure and data flow
115
+ 4. Provide configuration guidance
116
+ 5. Mention any potential limitations or considerations
117
+
118
+ Be concise but thorough, and always prioritize creating working, efficient workflows."""
119
+
120
+
121
+ class WorkflowGenerator:
122
+ """Generates workflows from natural language descriptions."""
123
+
124
+ def __init__(self, schema_registry: DynamicSchemaRegistry):
125
+ self.schema_registry = schema_registry
126
+ self.llm_node = None
127
+ self._initialize_llm()
128
+
129
+ def _initialize_llm(self):
130
+ """Initialize LLM node for workflow generation."""
131
+ try:
132
+ self.llm_node = LLMAgentNode(
133
+ name="workflow_generator",
134
+ provider="ollama", # Default to Ollama
135
+ model="llama3.2:3b",
136
+ temperature=0.3, # Lower temperature for more consistent results
137
+ )
138
+ except Exception as e:
139
+ logger.warning(f"Could not initialize LLM node: {e}")
140
+
141
+ async def generate_workflow_from_description(
142
+ self, description: str, context: Dict[str, Any] = None
143
+ ) -> Tuple[Dict[str, Any], str]:
144
+ """
145
+ Generate workflow configuration from natural language description.
146
+
147
+ Returns:
148
+ Tuple of (workflow_config, explanation)
149
+ """
150
+ if not self.llm_node:
151
+ return self._fallback_workflow_generation(description)
152
+
153
+ try:
154
+ # Get available nodes for context
155
+ available_nodes = await self._get_available_nodes_summary()
156
+
157
+ # Create prompt for workflow generation
158
+ prompt = self._create_workflow_generation_prompt(
159
+ description, available_nodes, context
160
+ )
161
+
162
+ # Generate workflow using LLM
163
+ response = await self._call_llm(prompt)
164
+
165
+ # Parse response to extract workflow config
166
+ workflow_config, explanation = self._parse_workflow_response(response)
167
+
168
+ # Validate and enhance the configuration
169
+ workflow_config = await self._validate_and_enhance_config(workflow_config)
170
+
171
+ return workflow_config, explanation
172
+
173
+ except Exception as e:
174
+ logger.error(f"Error generating workflow: {e}")
175
+ return self._fallback_workflow_generation(description)
176
+
177
+ async def suggest_nodes_for_task(
178
+ self, task_description: str
179
+ ) -> List[Dict[str, Any]]:
180
+ """Suggest appropriate nodes for a specific task."""
181
+ try:
182
+ # Hardcoded common nodes for now - in production this would query node registry
183
+ available_nodes = {
184
+ "CSVReaderNode": {"description": "Read CSV files", "category": "data"},
185
+ "JSONReaderNode": {
186
+ "description": "Read JSON files",
187
+ "category": "data",
188
+ },
189
+ "HTTPRequestNode": {
190
+ "description": "Make HTTP API requests",
191
+ "category": "api",
192
+ },
193
+ "LLMAgentNode": {"description": "Run LLM inference", "category": "ai"},
194
+ "PythonCodeNode": {
195
+ "description": "Execute Python code",
196
+ "category": "code",
197
+ },
198
+ "DataTransformer": {
199
+ "description": "Transform data",
200
+ "category": "transform",
201
+ },
202
+ "SwitchNode": {
203
+ "description": "Conditional routing",
204
+ "category": "logic",
205
+ },
206
+ "AsyncSQLDatabaseNode": {
207
+ "description": "Database operations",
208
+ "category": "data",
209
+ },
210
+ }
211
+
212
+ suggestions = []
213
+ task_lower = task_description.lower()
214
+
215
+ for node_name, node_info in available_nodes.items():
216
+ description = node_info["description"].lower()
217
+
218
+ # Calculate relevance score
219
+ relevance = self._calculate_relevance(
220
+ task_lower, description, node_name.lower()
221
+ )
222
+
223
+ if relevance > 0.3: # Threshold for relevance
224
+ suggestions.append(
225
+ {
226
+ "node_type": node_name,
227
+ "description": node_info["description"],
228
+ "category": node_info["category"],
229
+ "relevance": relevance,
230
+ "schema": node_info,
231
+ }
232
+ )
233
+
234
+ # Sort by relevance
235
+ suggestions.sort(key=lambda x: x["relevance"], reverse=True)
236
+ return suggestions[:10] # Return top 10 suggestions
237
+
238
+ except Exception as e:
239
+ logger.error(f"Error suggesting nodes: {e}")
240
+ return []
241
+
242
+ def _calculate_relevance(
243
+ self, task: str, description: str, node_name: str
244
+ ) -> float:
245
+ """Calculate relevance score between task and node."""
246
+ relevance = 0.0
247
+
248
+ # Direct keyword matches
249
+ task_words = set(task.split())
250
+ desc_words = set(description.split())
251
+ name_words = set(node_name.split("_"))
252
+
253
+ # Exact matches get high scores
254
+ common_words = task_words.intersection(desc_words.union(name_words))
255
+ relevance += len(common_words) * 0.3
256
+
257
+ # Category-based matching
258
+ category_keywords = {
259
+ "data": [
260
+ "read",
261
+ "load",
262
+ "import",
263
+ "data",
264
+ "file",
265
+ "csv",
266
+ "json",
267
+ "database",
268
+ ],
269
+ "ai": [
270
+ "llm",
271
+ "ai",
272
+ "generate",
273
+ "analyze",
274
+ "understand",
275
+ "chat",
276
+ "language",
277
+ ],
278
+ "transform": [
279
+ "process",
280
+ "transform",
281
+ "filter",
282
+ "clean",
283
+ "modify",
284
+ "convert",
285
+ ],
286
+ "api": ["api", "http", "request", "fetch", "call", "rest", "graphql"],
287
+ "logic": ["if", "then", "condition", "switch", "route", "decide", "logic"],
288
+ }
289
+
290
+ for category, keywords in category_keywords.items():
291
+ if category in node_name.lower():
292
+ matches = len(task_words.intersection(set(keywords)))
293
+ relevance += matches * 0.2
294
+
295
+ return min(relevance, 1.0) # Cap at 1.0
296
+
297
+ async def _get_available_nodes_summary(self) -> str:
298
+ """Get a summary of available nodes for LLM context."""
299
+ try:
300
+ # Hardcoded for now - in production would query node registry
301
+ summary_parts = [
302
+ "Data: CSVReaderNode, JSONReaderNode, AsyncSQLDatabaseNode",
303
+ "AI: LLMAgentNode, EmbeddingGeneratorNode",
304
+ "API: HTTPRequestNode, RESTClientNode",
305
+ "Transform: DataTransformer, FilterNode",
306
+ "Logic: SwitchNode, MergeNode",
307
+ "Code: PythonCodeNode",
308
+ ]
309
+
310
+ return "\n".join(summary_parts)
311
+
312
+ except Exception as e:
313
+ logger.error(f"Error getting nodes summary: {e}")
314
+ return "Node information not available"
315
+
316
+ def _create_workflow_generation_prompt(
317
+ self, description: str, available_nodes: str, context: Dict[str, Any] = None
318
+ ) -> str:
319
+ """Create prompt for workflow generation."""
320
+ prompt = f"""Create a Kailash workflow configuration for the following requirement:
321
+
322
+ REQUIREMENT: {description}
323
+
324
+ AVAILABLE NODES:
325
+ {available_nodes}
326
+
327
+ Please respond with a JSON configuration that includes:
328
+ 1. workflow metadata (name, description)
329
+ 2. nodes array with type, id, and parameters
330
+ 3. connections array linking nodes together
331
+ 4. a brief explanation of the workflow
332
+
333
+ Format your response as:
334
+ ```json
335
+ {{
336
+ "metadata": {{
337
+ "name": "workflow_name",
338
+ "description": "Brief description"
339
+ }},
340
+ "nodes": [
341
+ {{
342
+ "id": "node1",
343
+ "type": "NodeType",
344
+ "parameters": {{}}
345
+ }}
346
+ ],
347
+ "connections": [
348
+ {{
349
+ "source": "node1",
350
+ "target": "node2",
351
+ "source_output": "output",
352
+ "target_input": "input"
353
+ }}
354
+ ]
355
+ }}
356
+ ```
357
+
358
+ EXPLANATION:
359
+ [Brief explanation of the workflow and its components]
360
+ """
361
+
362
+ if context:
363
+ prompt += f"\n\nADDITIONAL CONTEXT:\n{json.dumps(context, indent=2)}"
364
+
365
+ return prompt
366
+
367
+ async def _call_llm(self, prompt: str) -> str:
368
+ """Call LLM with the given prompt."""
369
+ if not self.llm_node:
370
+ raise Exception("LLM node not available")
371
+
372
+ try:
373
+ result = await asyncio.to_thread(
374
+ self.llm_node.process, messages=[{"role": "user", "content": prompt}]
375
+ )
376
+
377
+ # Extract content from response
378
+ if isinstance(result, dict) and "choices" in result:
379
+ return result["choices"][0]["message"]["content"]
380
+ else:
381
+ return str(result)
382
+
383
+ except Exception as e:
384
+ logger.error(f"LLM call failed: {e}")
385
+ raise
386
+
387
+ def _parse_workflow_response(self, response: str) -> Tuple[Dict[str, Any], str]:
388
+ """Parse LLM response to extract workflow config and explanation."""
389
+ try:
390
+ # Extract JSON from response
391
+ json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
392
+ if not json_match:
393
+ raise ValueError("No JSON configuration found in response")
394
+
395
+ json_str = json_match.group(1)
396
+ workflow_config = json.loads(json_str)
397
+
398
+ # Extract explanation
399
+ explanation_match = re.search(
400
+ r"EXPLANATION:\s*(.*?)(?:\n\n|$)", response, re.DOTALL
401
+ )
402
+ explanation = (
403
+ explanation_match.group(1).strip()
404
+ if explanation_match
405
+ else "Workflow generated successfully"
406
+ )
407
+
408
+ return workflow_config, explanation
409
+
410
+ except Exception as e:
411
+ logger.error(f"Error parsing workflow response: {e}")
412
+ raise ValueError(f"Failed to parse workflow configuration: {e}")
413
+
414
+ async def _validate_and_enhance_config(
415
+ self, config: Dict[str, Any]
416
+ ) -> Dict[str, Any]:
417
+ """Validate and enhance workflow configuration."""
418
+ try:
419
+ # Ensure required fields exist
420
+ if "metadata" not in config:
421
+ config["metadata"] = {}
422
+ if "nodes" not in config:
423
+ config["nodes"] = []
424
+ if "connections" not in config:
425
+ config["connections"] = []
426
+
427
+ # Validate node types (simplified for now)
428
+ valid_node_types = {
429
+ "CSVReaderNode",
430
+ "JSONReaderNode",
431
+ "HTTPRequestNode",
432
+ "LLMAgentNode",
433
+ "PythonCodeNode",
434
+ "DataTransformer",
435
+ "SwitchNode",
436
+ "AsyncSQLDatabaseNode",
437
+ }
438
+ for node in config["nodes"]:
439
+ if node.get("type") not in valid_node_types:
440
+ logger.warning(f"Unknown node type: {node.get('type')}")
441
+ # Could suggest alternative or use PythonCodeNode as fallback
442
+
443
+ # Ensure unique node IDs
444
+ node_ids = [node.get("id") for node in config["nodes"]]
445
+ if len(set(node_ids)) != len(node_ids):
446
+ # Make IDs unique
447
+ for i, node in enumerate(config["nodes"]):
448
+ if not node.get("id"):
449
+ node["id"] = f"node_{i+1}"
450
+
451
+ return config
452
+
453
+ except Exception as e:
454
+ logger.error(f"Error validating workflow config: {e}")
455
+ return config
456
+
457
+ def _fallback_workflow_generation(
458
+ self, description: str
459
+ ) -> Tuple[Dict[str, Any], str]:
460
+ """Fallback workflow generation when LLM is not available."""
461
+ # Create a simple workflow with PythonCodeNode
462
+ workflow_config = {
463
+ "metadata": {
464
+ "name": "simple_workflow",
465
+ "description": f"Generated workflow for: {description}",
466
+ },
467
+ "nodes": [
468
+ {
469
+ "id": "input_node",
470
+ "type": "PythonCodeNode",
471
+ "parameters": {
472
+ "code": "# Process input data\nresult = {'message': 'Workflow created successfully'}\nreturn {'result': result}"
473
+ },
474
+ }
475
+ ],
476
+ "connections": [],
477
+ }
478
+
479
+ explanation = (
480
+ "Created a simple workflow with a Python code node. "
481
+ "For more sophisticated workflows, please configure an LLM provider."
482
+ )
483
+
484
+ return workflow_config, explanation
485
+
486
+
487
+ class AIChatMiddleware:
488
+ """
489
+ AI Chat middleware for natural language workflow assistance.
490
+
491
+ Enhanced with SDK components:
492
+ - Vector database for semantic search of chat history
493
+ - Embedding generation for conversation similarity
494
+ - Audit logging for all chat interactions
495
+ - Data transformation for message formatting
496
+
497
+ Provides:
498
+ - Natural language workflow generation
499
+ - Interactive workflow assistance
500
+ - Node suggestions and recommendations
501
+ - Workflow optimization guidance
502
+ - Debug and troubleshooting help
503
+ - Semantic search of past conversations
504
+ """
505
+
506
+ def __init__(
507
+ self,
508
+ agent_ui_middleware: AgentUIMiddleware,
509
+ vector_db_url: str = None,
510
+ enable_semantic_search: bool = True,
511
+ ):
512
+ self.agent_ui = agent_ui_middleware
513
+ self.schema_registry = DynamicSchemaRegistry()
514
+ self.workflow_generator = WorkflowGenerator(self.schema_registry)
515
+ self.enable_semantic_search = (
516
+ enable_semantic_search and vector_db_url is not None
517
+ )
518
+
519
+ # Chat sessions (kept for quick access)
520
+ self.chat_sessions: Dict[str, ChatSession] = {}
521
+
522
+ # Initialize SDK nodes
523
+ self._initialize_sdk_nodes(vector_db_url)
524
+
525
+ # Performance tracking
526
+ self.conversations_started = 0
527
+ self.workflows_generated = 0
528
+ self.suggestions_provided = 0
529
+ self.embeddings_generated = 0
530
+
531
+ def _initialize_sdk_nodes(self, vector_db_url: str = None):
532
+ """Initialize SDK nodes for enhanced chat functionality."""
533
+
534
+ # Embedding generator for semantic search
535
+ if self.enable_semantic_search:
536
+ self.embedding_node = EmbeddingGeneratorNode(
537
+ name="chat_embedder",
538
+ provider="sentence-transformers",
539
+ model="all-MiniLM-L6-v2",
540
+ )
541
+
542
+ # Vector database for chat history (using SQL database for now)
543
+ self.vector_db = AsyncSQLDatabaseNode(
544
+ name="chat_vector_store", connection_string=vector_db_url, pool_size=5
545
+ )
546
+
547
+ # Credential management for chat features
548
+ self.credential_node = CredentialManagerNode(
549
+ name="chat_credentials",
550
+ credential_name="chat_config",
551
+ credential_type="custom",
552
+ )
553
+
554
+ # Data transformer for message formatting
555
+ self.message_transformer = DataTransformer(
556
+ name="chat_message_transformer",
557
+ transformations=[
558
+ {"type": "validate", "schema": "chat_message"},
559
+ {"type": "add_field", "field": "processed_at", "value": "now()"},
560
+ ],
561
+ )
562
+
563
+ async def start_chat_session(self, session_id: str, user_id: str = None) -> str:
564
+ """Start a new chat session."""
565
+ chat_session = ChatSession(session_id, user_id)
566
+ self.chat_sessions[session_id] = chat_session
567
+ self.conversations_started += 1
568
+
569
+ logger.info(f"Started chat session {session_id} for user {user_id}")
570
+ return session_id
571
+
572
+ async def send_message(
573
+ self, session_id: str, content: str, context: Dict[str, Any] = None
574
+ ) -> Dict[str, Any]:
575
+ """Send message and get AI response, storing in vector database."""
576
+ chat_session = self.chat_sessions.get(session_id)
577
+ if not chat_session:
578
+ raise ValueError(f"Chat session {session_id} not found")
579
+
580
+ # Add user message to session
581
+ user_message_id = chat_session.add_message(content, "user")
582
+
583
+ # Store user message in vector database if enabled
584
+ if self.enable_semantic_search:
585
+ await self._store_message_with_embedding(
586
+ session_id, user_message_id, content, "user", chat_session.user_id
587
+ )
588
+
589
+ # Log user message
590
+ logger.info(
591
+ f"Chat message received: session={session_id}, user={chat_session.user_id}, length={len(content)}"
592
+ )
593
+
594
+ # Update context if provided
595
+ if context:
596
+ for key, value in context.items():
597
+ chat_session.update_context(key, value)
598
+
599
+ # Find similar past conversations if semantic search is enabled
600
+ similar_conversations = []
601
+ if self.enable_semantic_search:
602
+ similar_conversations = await self._find_similar_conversations(
603
+ content, limit=3
604
+ )
605
+
606
+ # Determine intent and generate response
607
+ intent, confidence = await self._analyze_intent(content)
608
+
609
+ response_content = ""
610
+ workflow_config = None
611
+ suggestions = []
612
+
613
+ if intent == "generate_workflow" and confidence > 0.7:
614
+ # Generate workflow
615
+ try:
616
+ # Include similar conversations as context
617
+ enhanced_context = {
618
+ **chat_session.context,
619
+ "similar_conversations": similar_conversations,
620
+ }
621
+
622
+ workflow_config, explanation = (
623
+ await self.workflow_generator.generate_workflow_from_description(
624
+ content, enhanced_context
625
+ )
626
+ )
627
+
628
+ response_content = f"I've created a workflow for you. {explanation}"
629
+ self.workflows_generated += 1
630
+
631
+ # Log workflow generation
632
+ logger.info(
633
+ f"Workflow generated: session={session_id}, name={workflow_config.get('metadata', {}).get('name', 'unnamed')}"
634
+ )
635
+
636
+ except Exception as e:
637
+ response_content = f"I had trouble generating the workflow: {str(e)}. Could you provide more details about what you want to accomplish?"
638
+
639
+ elif intent == "suggest_nodes" and confidence > 0.6:
640
+ # Suggest nodes
641
+ try:
642
+ suggestions = await self.workflow_generator.suggest_nodes_for_task(
643
+ content
644
+ )
645
+
646
+ if suggestions:
647
+ response_content = (
648
+ "Based on your request, I recommend these nodes:\n\n"
649
+ )
650
+ for i, suggestion in enumerate(suggestions[:5], 1):
651
+ response_content += f"{i}. **{suggestion['node_type']}** - {suggestion['description']}\n"
652
+
653
+ self.suggestions_provided += 1
654
+
655
+ # Log suggestions
656
+ logger.info(
657
+ f"Nodes suggested: session={session_id}, count={len(suggestions)}, top={suggestions[0]['node_type'] if suggestions else None}"
658
+ )
659
+ else:
660
+ response_content = "I couldn't find specific node recommendations. Could you describe your task in more detail?"
661
+
662
+ except Exception as e:
663
+ response_content = f"I had trouble finding node suggestions: {str(e)}"
664
+
665
+ elif intent == "explain_concept":
666
+ # Explain Kailash concepts
667
+ response_content = await self._explain_concept(content)
668
+
669
+ elif intent == "help_debug":
670
+ # Help with debugging
671
+ response_content = await self._help_debug(content, chat_session.context)
672
+
673
+ else:
674
+ # General assistance
675
+ response_content = await self._provide_general_assistance(
676
+ content, chat_session.context
677
+ )
678
+
679
+ # Add assistant response to session
680
+ assistant_message_id = chat_session.add_message(
681
+ response_content,
682
+ "assistant",
683
+ metadata={
684
+ "intent": intent,
685
+ "confidence": confidence,
686
+ "has_workflow": workflow_config is not None,
687
+ "suggestion_count": len(suggestions),
688
+ },
689
+ )
690
+
691
+ # Store assistant response in vector database if enabled
692
+ if self.enable_semantic_search:
693
+ await self._store_message_with_embedding(
694
+ session_id,
695
+ assistant_message_id,
696
+ response_content,
697
+ "assistant",
698
+ chat_session.user_id,
699
+ )
700
+
701
+ # Log assistant response
702
+ logger.info(
703
+ f"Chat response sent: session={session_id}, intent={intent}, confidence={confidence}, length={len(response_content)}"
704
+ )
705
+
706
+ return {
707
+ "message": response_content,
708
+ "intent": intent,
709
+ "confidence": confidence,
710
+ "workflow_config": workflow_config,
711
+ "suggestions": suggestions,
712
+ "session_id": session_id,
713
+ "similar_conversations": len(similar_conversations),
714
+ }
715
+
716
+ async def _analyze_intent(self, content: str) -> Tuple[str, float]:
717
+ """Analyze user message to determine intent."""
718
+ content_lower = content.lower()
719
+
720
+ # Workflow generation keywords
721
+ generate_keywords = [
722
+ "create",
723
+ "build",
724
+ "make",
725
+ "generate",
726
+ "workflow",
727
+ "pipeline",
728
+ "automate",
729
+ "process",
730
+ "chain",
731
+ "flow",
732
+ ]
733
+
734
+ # Node suggestion keywords
735
+ suggest_keywords = [
736
+ "recommend",
737
+ "suggest",
738
+ "what node",
739
+ "which node",
740
+ "best node",
741
+ "how to",
742
+ "node for",
743
+ ]
744
+
745
+ # Explanation keywords
746
+ explain_keywords = [
747
+ "what is",
748
+ "explain",
749
+ "how does",
750
+ "understand",
751
+ "concept",
752
+ "definition",
753
+ "meaning",
754
+ ]
755
+
756
+ # Debug keywords
757
+ debug_keywords = [
758
+ "error",
759
+ "problem",
760
+ "issue",
761
+ "debug",
762
+ "troubleshoot",
763
+ "fix",
764
+ "not working",
765
+ "failed",
766
+ ]
767
+
768
+ # Calculate scores
769
+ generate_score = sum(
770
+ 1 for keyword in generate_keywords if keyword in content_lower
771
+ )
772
+ suggest_score = sum(
773
+ 1 for keyword in suggest_keywords if keyword in content_lower
774
+ )
775
+ explain_score = sum(
776
+ 1 for keyword in explain_keywords if keyword in content_lower
777
+ )
778
+ debug_score = sum(1 for keyword in debug_keywords if keyword in content_lower)
779
+
780
+ # Determine intent
781
+ scores = {
782
+ "generate_workflow": generate_score / len(generate_keywords),
783
+ "suggest_nodes": suggest_score / len(suggest_keywords),
784
+ "explain_concept": explain_score / len(explain_keywords),
785
+ "help_debug": debug_score / len(debug_keywords),
786
+ }
787
+
788
+ intent = max(scores.items(), key=lambda x: x[1])
789
+ return intent[0], min(intent[1] * 2, 1.0) # Scale confidence
790
+
791
+ async def _explain_concept(self, content: str) -> str:
792
+ """Provide explanations for Kailash concepts."""
793
+ concepts = {
794
+ "workflow": "A workflow in Kailash is a directed graph of interconnected nodes that process data. Each node performs a specific task, and connections define how data flows between nodes.",
795
+ "node": "A node is a single processing unit in a workflow. Nodes can read data, transform it, call APIs, run AI models, or perform logic operations.",
796
+ "connection": "Connections link nodes together, defining how output from one node becomes input to another. You can map specific outputs to specific inputs.",
797
+ "session": "A session represents a frontend client's interaction with the Kailash middleware. Sessions can contain multiple workflows and executions.",
798
+ "execution": "An execution is a single run of a workflow with specific input parameters. You can track progress and get real-time updates.",
799
+ "schema": "Schemas define the structure and parameters of nodes, enabling dynamic UI generation and validation.",
800
+ }
801
+
802
+ content_lower = content.lower()
803
+ for concept, explanation in concepts.items():
804
+ if concept in content_lower:
805
+ return f"**{concept.title()}**: {explanation}"
806
+
807
+ return "I can explain concepts like workflows, nodes, connections, sessions, executions, and schemas. What would you like to know more about?"
808
+
809
+ async def _help_debug(self, content: str, context: Dict[str, Any]) -> str:
810
+ """Provide debugging assistance."""
811
+ common_issues = {
812
+ "connection": "Check that node IDs match exactly in your connections. Ensure source_output and target_input names are correct.",
813
+ "parameter": "Verify that all required parameters are provided and have the correct types. Check the node schema for requirements.",
814
+ "execution": "Look at the execution status and error messages. Common issues include missing inputs or incorrect parameter values.",
815
+ "timeout": "Some operations may take time. Check if your workflow is still running or if there are performance bottlenecks.",
816
+ }
817
+
818
+ content_lower = content.lower()
819
+ for issue_type, suggestion in common_issues.items():
820
+ if issue_type in content_lower:
821
+ return f"**{issue_type.title()} Issue**: {suggestion}"
822
+
823
+ return "I can help debug common issues with connections, parameters, executions, and timeouts. Can you describe the specific problem you're experiencing?"
824
+
825
+ async def _provide_general_assistance(
826
+ self, content: str, context: Dict[str, Any]
827
+ ) -> str:
828
+ """Provide general assistance and guidance."""
829
+ return """I'm here to help you with Kailash workflows! I can:
830
+
831
+ • **Create workflows** from natural language descriptions
832
+ • **Suggest nodes** for specific tasks
833
+ • **Explain concepts** and best practices
834
+ • **Debug issues** and troubleshoot problems
835
+ • **Optimize workflows** for better performance
836
+
837
+ What would you like to work on? Just describe what you want to accomplish and I'll help you build it!"""
838
+
839
+ async def _store_message_with_embedding(
840
+ self,
841
+ session_id: str,
842
+ message_id: str,
843
+ content: str,
844
+ role: str,
845
+ user_id: str = None,
846
+ ):
847
+ """Store chat message with embedding in vector database."""
848
+ try:
849
+ # Generate embedding
850
+ embedding_result = await self.embedding_node.process({"text": content})
851
+
852
+ # Store in database (simplified for now)
853
+ await self.vector_db.process(
854
+ {
855
+ "query": "INSERT INTO chat_messages (id, session_id, user_id, content, role, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
856
+ "parameters": [
857
+ message_id,
858
+ session_id,
859
+ user_id,
860
+ content,
861
+ role,
862
+ datetime.now(timezone.utc),
863
+ ],
864
+ }
865
+ )
866
+
867
+ self.embeddings_generated += 1
868
+
869
+ except Exception as e:
870
+ logger.error(f"Failed to store message with embedding: {e}")
871
+
872
+ async def _find_similar_conversations(
873
+ self, query: str, limit: int = 5
874
+ ) -> List[Dict[str, Any]]:
875
+ """Find similar past conversations using vector search."""
876
+ try:
877
+ # Generate query embedding
878
+ query_embedding = await self.embedding_node.process({"text": query})
879
+
880
+ # Search for similar messages (simplified for now)
881
+ search_result = await self.vector_db.process(
882
+ {
883
+ "query": "SELECT * FROM chat_messages WHERE role = 'user' ORDER BY timestamp DESC LIMIT ?",
884
+ "parameters": [limit * 2],
885
+ }
886
+ )
887
+
888
+ # Group by session and return unique conversations
889
+ seen_sessions = set()
890
+ similar_conversations = []
891
+
892
+ for row in search_result.get("rows", []):
893
+ session_id = row["session_id"]
894
+ if session_id not in seen_sessions:
895
+ seen_sessions.add(session_id)
896
+ similar_conversations.append(
897
+ {
898
+ "session_id": session_id,
899
+ "content": row["content"],
900
+ "similarity": 0.8, # Simplified similarity
901
+ "timestamp": row["timestamp"],
902
+ }
903
+ )
904
+
905
+ if len(similar_conversations) >= limit:
906
+ break
907
+
908
+ return similar_conversations
909
+
910
+ except Exception as e:
911
+ logger.error(f"Failed to find similar conversations: {e}")
912
+ return []
913
+
914
+ def get_chat_history(
915
+ self, session_id: str, limit: int = None
916
+ ) -> List[Dict[str, Any]]:
917
+ """Get chat history for a session."""
918
+ chat_session = self.chat_sessions.get(session_id)
919
+ if not chat_session:
920
+ return []
921
+
922
+ return chat_session.get_conversation_history(limit)
923
+
924
+ async def search_chat_history(
925
+ self, query: str, user_id: str = None, limit: int = 10
926
+ ) -> List[Dict[str, Any]]:
927
+ """Search chat history using semantic search."""
928
+ if not self.enable_semantic_search:
929
+ return []
930
+
931
+ try:
932
+ # Generate query embedding
933
+ query_embedding = await self.embedding_node.process({"text": query})
934
+
935
+ # Prepare filters
936
+ filters = {}
937
+ if user_id:
938
+ filters["user_id"] = user_id
939
+
940
+ # Search database (simplified for now)
941
+ query_parts = ["SELECT * FROM chat_messages WHERE 1=1"]
942
+ params = []
943
+
944
+ if user_id:
945
+ query_parts.append("AND user_id = ?")
946
+ params.append(user_id)
947
+
948
+ query_parts.append("ORDER BY timestamp DESC LIMIT ?")
949
+ params.append(limit)
950
+
951
+ search_result = await self.vector_db.process(
952
+ {"query": " ".join(query_parts), "parameters": params}
953
+ )
954
+
955
+ # Format results
956
+ results = []
957
+ for row in search_result.get("rows", []):
958
+ results.append(
959
+ {
960
+ "message_id": row["id"],
961
+ "session_id": row["session_id"],
962
+ "content": row["content"],
963
+ "role": row["role"],
964
+ "similarity": 0.8, # Simplified similarity
965
+ "timestamp": row["timestamp"],
966
+ }
967
+ )
968
+
969
+ return results
970
+
971
+ except Exception as e:
972
+ logger.error(f"Failed to search chat history: {e}")
973
+ return []
974
+
975
+ def get_stats(self) -> Dict[str, Any]:
976
+ """Get chat middleware statistics."""
977
+ stats = {
978
+ "conversations_started": self.conversations_started,
979
+ "workflows_generated": self.workflows_generated,
980
+ "suggestions_provided": self.suggestions_provided,
981
+ "active_chat_sessions": len(self.chat_sessions),
982
+ "embeddings_generated": self.embeddings_generated,
983
+ }
984
+
985
+ # Add vector database stats if available
986
+ if self.enable_semantic_search:
987
+ stats["semantic_search_enabled"] = True
988
+
989
+ return stats