kailash 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/__init__.py +33 -1
- kailash/access_control/__init__.py +129 -0
- kailash/access_control/managers.py +461 -0
- kailash/access_control/rule_evaluators.py +467 -0
- kailash/access_control_abac.py +825 -0
- kailash/config/__init__.py +27 -0
- kailash/config/database_config.py +359 -0
- kailash/database/__init__.py +28 -0
- kailash/database/execution_pipeline.py +499 -0
- kailash/middleware/__init__.py +306 -0
- kailash/middleware/auth/__init__.py +33 -0
- kailash/middleware/auth/access_control.py +436 -0
- kailash/middleware/auth/auth_manager.py +422 -0
- kailash/middleware/auth/jwt_auth.py +477 -0
- kailash/middleware/auth/kailash_jwt_auth.py +616 -0
- kailash/middleware/communication/__init__.py +37 -0
- kailash/middleware/communication/ai_chat.py +989 -0
- kailash/middleware/communication/api_gateway.py +802 -0
- kailash/middleware/communication/events.py +470 -0
- kailash/middleware/communication/realtime.py +710 -0
- kailash/middleware/core/__init__.py +21 -0
- kailash/middleware/core/agent_ui.py +890 -0
- kailash/middleware/core/schema.py +643 -0
- kailash/middleware/core/workflows.py +396 -0
- kailash/middleware/database/__init__.py +63 -0
- kailash/middleware/database/base.py +113 -0
- kailash/middleware/database/base_models.py +525 -0
- kailash/middleware/database/enums.py +106 -0
- kailash/middleware/database/migrations.py +12 -0
- kailash/{api/database.py → middleware/database/models.py} +183 -291
- kailash/middleware/database/repositories.py +685 -0
- kailash/middleware/database/session_manager.py +19 -0
- kailash/middleware/mcp/__init__.py +38 -0
- kailash/middleware/mcp/client_integration.py +585 -0
- kailash/middleware/mcp/enhanced_server.py +576 -0
- kailash/nodes/__init__.py +25 -3
- kailash/nodes/admin/__init__.py +35 -0
- kailash/nodes/admin/audit_log.py +794 -0
- kailash/nodes/admin/permission_check.py +864 -0
- kailash/nodes/admin/role_management.py +823 -0
- kailash/nodes/admin/security_event.py +1519 -0
- kailash/nodes/admin/user_management.py +944 -0
- kailash/nodes/ai/a2a.py +24 -7
- kailash/nodes/ai/ai_providers.py +1 -0
- kailash/nodes/ai/embedding_generator.py +11 -11
- kailash/nodes/ai/intelligent_agent_orchestrator.py +99 -11
- kailash/nodes/ai/llm_agent.py +407 -2
- kailash/nodes/ai/self_organizing.py +85 -10
- kailash/nodes/api/auth.py +287 -6
- kailash/nodes/api/rest.py +151 -0
- kailash/nodes/auth/__init__.py +17 -0
- kailash/nodes/auth/directory_integration.py +1228 -0
- kailash/nodes/auth/enterprise_auth_provider.py +1328 -0
- kailash/nodes/auth/mfa.py +2338 -0
- kailash/nodes/auth/risk_assessment.py +872 -0
- kailash/nodes/auth/session_management.py +1093 -0
- kailash/nodes/auth/sso.py +1040 -0
- kailash/nodes/base.py +344 -13
- kailash/nodes/base_cycle_aware.py +4 -2
- kailash/nodes/base_with_acl.py +1 -1
- kailash/nodes/code/python.py +283 -10
- kailash/nodes/compliance/__init__.py +9 -0
- kailash/nodes/compliance/data_retention.py +1888 -0
- kailash/nodes/compliance/gdpr.py +2004 -0
- kailash/nodes/data/__init__.py +22 -2
- kailash/nodes/data/async_connection.py +469 -0
- kailash/nodes/data/async_sql.py +757 -0
- kailash/nodes/data/async_vector.py +598 -0
- kailash/nodes/data/readers.py +767 -0
- kailash/nodes/data/retrieval.py +360 -1
- kailash/nodes/data/sharepoint_graph.py +397 -21
- kailash/nodes/data/sql.py +94 -5
- kailash/nodes/data/streaming.py +68 -8
- kailash/nodes/data/vector_db.py +54 -4
- kailash/nodes/enterprise/__init__.py +13 -0
- kailash/nodes/enterprise/batch_processor.py +741 -0
- kailash/nodes/enterprise/data_lineage.py +497 -0
- kailash/nodes/logic/convergence.py +31 -9
- kailash/nodes/logic/operations.py +14 -3
- kailash/nodes/mixins/__init__.py +8 -0
- kailash/nodes/mixins/event_emitter.py +201 -0
- kailash/nodes/mixins/mcp.py +9 -4
- kailash/nodes/mixins/security.py +165 -0
- kailash/nodes/monitoring/__init__.py +7 -0
- kailash/nodes/monitoring/performance_benchmark.py +2497 -0
- kailash/nodes/rag/__init__.py +284 -0
- kailash/nodes/rag/advanced.py +1615 -0
- kailash/nodes/rag/agentic.py +773 -0
- kailash/nodes/rag/conversational.py +999 -0
- kailash/nodes/rag/evaluation.py +875 -0
- kailash/nodes/rag/federated.py +1188 -0
- kailash/nodes/rag/graph.py +721 -0
- kailash/nodes/rag/multimodal.py +671 -0
- kailash/nodes/rag/optimized.py +933 -0
- kailash/nodes/rag/privacy.py +1059 -0
- kailash/nodes/rag/query_processing.py +1335 -0
- kailash/nodes/rag/realtime.py +764 -0
- kailash/nodes/rag/registry.py +547 -0
- kailash/nodes/rag/router.py +837 -0
- kailash/nodes/rag/similarity.py +1854 -0
- kailash/nodes/rag/strategies.py +566 -0
- kailash/nodes/rag/workflows.py +575 -0
- kailash/nodes/security/__init__.py +19 -0
- kailash/nodes/security/abac_evaluator.py +1411 -0
- kailash/nodes/security/audit_log.py +91 -0
- kailash/nodes/security/behavior_analysis.py +1893 -0
- kailash/nodes/security/credential_manager.py +401 -0
- kailash/nodes/security/rotating_credentials.py +760 -0
- kailash/nodes/security/security_event.py +132 -0
- kailash/nodes/security/threat_detection.py +1103 -0
- kailash/nodes/testing/__init__.py +9 -0
- kailash/nodes/testing/credential_testing.py +499 -0
- kailash/nodes/transform/__init__.py +10 -2
- kailash/nodes/transform/chunkers.py +592 -1
- kailash/nodes/transform/processors.py +484 -14
- kailash/nodes/validation.py +321 -0
- kailash/runtime/access_controlled.py +1 -1
- kailash/runtime/async_local.py +41 -7
- kailash/runtime/docker.py +1 -1
- kailash/runtime/local.py +474 -55
- kailash/runtime/parallel.py +1 -1
- kailash/runtime/parallel_cyclic.py +1 -1
- kailash/runtime/testing.py +210 -2
- kailash/utils/migrations/__init__.py +25 -0
- kailash/utils/migrations/generator.py +433 -0
- kailash/utils/migrations/models.py +231 -0
- kailash/utils/migrations/runner.py +489 -0
- kailash/utils/secure_logging.py +342 -0
- kailash/workflow/__init__.py +16 -0
- kailash/workflow/cyclic_runner.py +3 -4
- kailash/workflow/graph.py +70 -2
- kailash/workflow/resilience.py +249 -0
- kailash/workflow/templates.py +726 -0
- {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/METADATA +253 -20
- kailash-0.4.0.dist-info/RECORD +223 -0
- kailash/api/__init__.py +0 -17
- kailash/api/__main__.py +0 -6
- kailash/api/studio_secure.py +0 -893
- kailash/mcp/__main__.py +0 -13
- kailash/mcp/server_new.py +0 -336
- kailash/mcp/servers/__init__.py +0 -12
- kailash-0.3.2.dist-info/RECORD +0 -136
- {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/WHEEL +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/entry_points.txt +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.3.2.dist-info → kailash-0.4.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,989 @@
|
|
1
|
+
"""
|
2
|
+
AI Chat Integration for Kailash Middleware
|
3
|
+
|
4
|
+
Provides AI-powered chat interface for natural language workflow generation,
|
5
|
+
assistance, and guidance using existing Kailash LLM capabilities.
|
6
|
+
"""
|
7
|
+
|
8
|
+
import asyncio
|
9
|
+
import json
|
10
|
+
import logging
|
11
|
+
import re
|
12
|
+
import uuid
|
13
|
+
from datetime import datetime, timezone
|
14
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
15
|
+
|
16
|
+
from ...nodes.ai import EmbeddingGeneratorNode, LLMAgentNode
|
17
|
+
from ...nodes.data import AsyncSQLDatabaseNode
|
18
|
+
from ...nodes.security import CredentialManagerNode
|
19
|
+
from ...nodes.transform import DataTransformer
|
20
|
+
from ...workflow.builder import WorkflowBuilder
|
21
|
+
from ..core.agent_ui import AgentUIMiddleware
|
22
|
+
from ..core.schema import DynamicSchemaRegistry
|
23
|
+
|
24
|
+
logger = logging.getLogger(__name__)
|
25
|
+
|
26
|
+
|
27
|
+
class ChatMessage:
|
28
|
+
"""Represents a chat message in the conversation."""
|
29
|
+
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
content: str,
|
33
|
+
role: str = "user",
|
34
|
+
message_id: str = None,
|
35
|
+
timestamp: datetime = None,
|
36
|
+
metadata: Dict[str, Any] = None,
|
37
|
+
):
|
38
|
+
self.message_id = message_id or str(uuid.uuid4())
|
39
|
+
self.content = content
|
40
|
+
self.role = role # user, assistant, system
|
41
|
+
self.timestamp = timestamp or datetime.now(timezone.utc)
|
42
|
+
self.metadata = metadata or {}
|
43
|
+
|
44
|
+
def to_dict(self) -> Dict[str, Any]:
|
45
|
+
"""Convert message to dictionary."""
|
46
|
+
return {
|
47
|
+
"message_id": self.message_id,
|
48
|
+
"content": self.content,
|
49
|
+
"role": self.role,
|
50
|
+
"timestamp": self.timestamp.isoformat(),
|
51
|
+
"metadata": self.metadata,
|
52
|
+
}
|
53
|
+
|
54
|
+
|
55
|
+
class ChatSession:
|
56
|
+
"""Manages a chat conversation for workflow assistance."""
|
57
|
+
|
58
|
+
def __init__(self, session_id: str, user_id: str = None):
|
59
|
+
self.session_id = session_id
|
60
|
+
self.user_id = user_id
|
61
|
+
self.messages: List[ChatMessage] = []
|
62
|
+
self.context: Dict[str, Any] = {}
|
63
|
+
self.created_at = datetime.now(timezone.utc)
|
64
|
+
self.last_activity = datetime.now(timezone.utc)
|
65
|
+
|
66
|
+
# Initialize with system message
|
67
|
+
system_msg = ChatMessage(
|
68
|
+
content=self._get_system_prompt(),
|
69
|
+
role="system",
|
70
|
+
metadata={"type": "system_initialization"},
|
71
|
+
)
|
72
|
+
self.messages.append(system_msg)
|
73
|
+
|
74
|
+
def add_message(
|
75
|
+
self, content: str, role: str = "user", metadata: Dict[str, Any] = None
|
76
|
+
) -> str:
|
77
|
+
"""Add a message to the conversation."""
|
78
|
+
message = ChatMessage(content, role, metadata=metadata)
|
79
|
+
self.messages.append(message)
|
80
|
+
self.last_activity = datetime.now(timezone.utc)
|
81
|
+
return message.message_id
|
82
|
+
|
83
|
+
def get_conversation_history(self, limit: int = None) -> List[Dict[str, Any]]:
|
84
|
+
"""Get conversation history as list of dictionaries."""
|
85
|
+
messages = self.messages[-limit:] if limit else self.messages
|
86
|
+
return [msg.to_dict() for msg in messages]
|
87
|
+
|
88
|
+
def update_context(self, key: str, value: Any):
|
89
|
+
"""Update conversation context."""
|
90
|
+
self.context[key] = value
|
91
|
+
self.last_activity = datetime.now(timezone.utc)
|
92
|
+
|
93
|
+
def _get_system_prompt(self) -> str:
|
94
|
+
"""Get system prompt for workflow assistance."""
|
95
|
+
return """You are an AI assistant specialized in helping users create and manage Kailash workflows.
|
96
|
+
|
97
|
+
Your capabilities include:
|
98
|
+
- Creating workflows from natural language descriptions
|
99
|
+
- Suggesting appropriate nodes for specific tasks
|
100
|
+
- Explaining workflow concepts and best practices
|
101
|
+
- Debugging workflow issues
|
102
|
+
- Optimizing workflow performance
|
103
|
+
|
104
|
+
You have access to a comprehensive node library including:
|
105
|
+
- AI nodes (LLM agents, embedding generators, A2A agents)
|
106
|
+
- Data nodes (CSV readers, SQL databases, directory readers)
|
107
|
+
- Transform nodes (filters, processors, chunkers)
|
108
|
+
- Logic nodes (switches, merges, conditionals)
|
109
|
+
- API nodes (HTTP requests, REST clients, GraphQL)
|
110
|
+
|
111
|
+
When creating workflows, always:
|
112
|
+
1. Ask clarifying questions if the requirements are unclear
|
113
|
+
2. Suggest the most appropriate nodes for the task
|
114
|
+
3. Explain the workflow structure and data flow
|
115
|
+
4. Provide configuration guidance
|
116
|
+
5. Mention any potential limitations or considerations
|
117
|
+
|
118
|
+
Be concise but thorough, and always prioritize creating working, efficient workflows."""
|
119
|
+
|
120
|
+
|
121
|
+
class WorkflowGenerator:
|
122
|
+
"""Generates workflows from natural language descriptions."""
|
123
|
+
|
124
|
+
def __init__(self, schema_registry: DynamicSchemaRegistry):
|
125
|
+
self.schema_registry = schema_registry
|
126
|
+
self.llm_node = None
|
127
|
+
self._initialize_llm()
|
128
|
+
|
129
|
+
def _initialize_llm(self):
|
130
|
+
"""Initialize LLM node for workflow generation."""
|
131
|
+
try:
|
132
|
+
self.llm_node = LLMAgentNode(
|
133
|
+
name="workflow_generator",
|
134
|
+
provider="ollama", # Default to Ollama
|
135
|
+
model="llama3.2:3b",
|
136
|
+
temperature=0.3, # Lower temperature for more consistent results
|
137
|
+
)
|
138
|
+
except Exception as e:
|
139
|
+
logger.warning(f"Could not initialize LLM node: {e}")
|
140
|
+
|
141
|
+
async def generate_workflow_from_description(
|
142
|
+
self, description: str, context: Dict[str, Any] = None
|
143
|
+
) -> Tuple[Dict[str, Any], str]:
|
144
|
+
"""
|
145
|
+
Generate workflow configuration from natural language description.
|
146
|
+
|
147
|
+
Returns:
|
148
|
+
Tuple of (workflow_config, explanation)
|
149
|
+
"""
|
150
|
+
if not self.llm_node:
|
151
|
+
return self._fallback_workflow_generation(description)
|
152
|
+
|
153
|
+
try:
|
154
|
+
# Get available nodes for context
|
155
|
+
available_nodes = await self._get_available_nodes_summary()
|
156
|
+
|
157
|
+
# Create prompt for workflow generation
|
158
|
+
prompt = self._create_workflow_generation_prompt(
|
159
|
+
description, available_nodes, context
|
160
|
+
)
|
161
|
+
|
162
|
+
# Generate workflow using LLM
|
163
|
+
response = await self._call_llm(prompt)
|
164
|
+
|
165
|
+
# Parse response to extract workflow config
|
166
|
+
workflow_config, explanation = self._parse_workflow_response(response)
|
167
|
+
|
168
|
+
# Validate and enhance the configuration
|
169
|
+
workflow_config = await self._validate_and_enhance_config(workflow_config)
|
170
|
+
|
171
|
+
return workflow_config, explanation
|
172
|
+
|
173
|
+
except Exception as e:
|
174
|
+
logger.error(f"Error generating workflow: {e}")
|
175
|
+
return self._fallback_workflow_generation(description)
|
176
|
+
|
177
|
+
async def suggest_nodes_for_task(
|
178
|
+
self, task_description: str
|
179
|
+
) -> List[Dict[str, Any]]:
|
180
|
+
"""Suggest appropriate nodes for a specific task."""
|
181
|
+
try:
|
182
|
+
# Hardcoded common nodes for now - in production this would query node registry
|
183
|
+
available_nodes = {
|
184
|
+
"CSVReaderNode": {"description": "Read CSV files", "category": "data"},
|
185
|
+
"JSONReaderNode": {
|
186
|
+
"description": "Read JSON files",
|
187
|
+
"category": "data",
|
188
|
+
},
|
189
|
+
"HTTPRequestNode": {
|
190
|
+
"description": "Make HTTP API requests",
|
191
|
+
"category": "api",
|
192
|
+
},
|
193
|
+
"LLMAgentNode": {"description": "Run LLM inference", "category": "ai"},
|
194
|
+
"PythonCodeNode": {
|
195
|
+
"description": "Execute Python code",
|
196
|
+
"category": "code",
|
197
|
+
},
|
198
|
+
"DataTransformer": {
|
199
|
+
"description": "Transform data",
|
200
|
+
"category": "transform",
|
201
|
+
},
|
202
|
+
"SwitchNode": {
|
203
|
+
"description": "Conditional routing",
|
204
|
+
"category": "logic",
|
205
|
+
},
|
206
|
+
"AsyncSQLDatabaseNode": {
|
207
|
+
"description": "Database operations",
|
208
|
+
"category": "data",
|
209
|
+
},
|
210
|
+
}
|
211
|
+
|
212
|
+
suggestions = []
|
213
|
+
task_lower = task_description.lower()
|
214
|
+
|
215
|
+
for node_name, node_info in available_nodes.items():
|
216
|
+
description = node_info["description"].lower()
|
217
|
+
|
218
|
+
# Calculate relevance score
|
219
|
+
relevance = self._calculate_relevance(
|
220
|
+
task_lower, description, node_name.lower()
|
221
|
+
)
|
222
|
+
|
223
|
+
if relevance > 0.3: # Threshold for relevance
|
224
|
+
suggestions.append(
|
225
|
+
{
|
226
|
+
"node_type": node_name,
|
227
|
+
"description": node_info["description"],
|
228
|
+
"category": node_info["category"],
|
229
|
+
"relevance": relevance,
|
230
|
+
"schema": node_info,
|
231
|
+
}
|
232
|
+
)
|
233
|
+
|
234
|
+
# Sort by relevance
|
235
|
+
suggestions.sort(key=lambda x: x["relevance"], reverse=True)
|
236
|
+
return suggestions[:10] # Return top 10 suggestions
|
237
|
+
|
238
|
+
except Exception as e:
|
239
|
+
logger.error(f"Error suggesting nodes: {e}")
|
240
|
+
return []
|
241
|
+
|
242
|
+
def _calculate_relevance(
|
243
|
+
self, task: str, description: str, node_name: str
|
244
|
+
) -> float:
|
245
|
+
"""Calculate relevance score between task and node."""
|
246
|
+
relevance = 0.0
|
247
|
+
|
248
|
+
# Direct keyword matches
|
249
|
+
task_words = set(task.split())
|
250
|
+
desc_words = set(description.split())
|
251
|
+
name_words = set(node_name.split("_"))
|
252
|
+
|
253
|
+
# Exact matches get high scores
|
254
|
+
common_words = task_words.intersection(desc_words.union(name_words))
|
255
|
+
relevance += len(common_words) * 0.3
|
256
|
+
|
257
|
+
# Category-based matching
|
258
|
+
category_keywords = {
|
259
|
+
"data": [
|
260
|
+
"read",
|
261
|
+
"load",
|
262
|
+
"import",
|
263
|
+
"data",
|
264
|
+
"file",
|
265
|
+
"csv",
|
266
|
+
"json",
|
267
|
+
"database",
|
268
|
+
],
|
269
|
+
"ai": [
|
270
|
+
"llm",
|
271
|
+
"ai",
|
272
|
+
"generate",
|
273
|
+
"analyze",
|
274
|
+
"understand",
|
275
|
+
"chat",
|
276
|
+
"language",
|
277
|
+
],
|
278
|
+
"transform": [
|
279
|
+
"process",
|
280
|
+
"transform",
|
281
|
+
"filter",
|
282
|
+
"clean",
|
283
|
+
"modify",
|
284
|
+
"convert",
|
285
|
+
],
|
286
|
+
"api": ["api", "http", "request", "fetch", "call", "rest", "graphql"],
|
287
|
+
"logic": ["if", "then", "condition", "switch", "route", "decide", "logic"],
|
288
|
+
}
|
289
|
+
|
290
|
+
for category, keywords in category_keywords.items():
|
291
|
+
if category in node_name.lower():
|
292
|
+
matches = len(task_words.intersection(set(keywords)))
|
293
|
+
relevance += matches * 0.2
|
294
|
+
|
295
|
+
return min(relevance, 1.0) # Cap at 1.0
|
296
|
+
|
297
|
+
async def _get_available_nodes_summary(self) -> str:
|
298
|
+
"""Get a summary of available nodes for LLM context."""
|
299
|
+
try:
|
300
|
+
# Hardcoded for now - in production would query node registry
|
301
|
+
summary_parts = [
|
302
|
+
"Data: CSVReaderNode, JSONReaderNode, AsyncSQLDatabaseNode",
|
303
|
+
"AI: LLMAgentNode, EmbeddingGeneratorNode",
|
304
|
+
"API: HTTPRequestNode, RESTClientNode",
|
305
|
+
"Transform: DataTransformer, FilterNode",
|
306
|
+
"Logic: SwitchNode, MergeNode",
|
307
|
+
"Code: PythonCodeNode",
|
308
|
+
]
|
309
|
+
|
310
|
+
return "\n".join(summary_parts)
|
311
|
+
|
312
|
+
except Exception as e:
|
313
|
+
logger.error(f"Error getting nodes summary: {e}")
|
314
|
+
return "Node information not available"
|
315
|
+
|
316
|
+
def _create_workflow_generation_prompt(
|
317
|
+
self, description: str, available_nodes: str, context: Dict[str, Any] = None
|
318
|
+
) -> str:
|
319
|
+
"""Create prompt for workflow generation."""
|
320
|
+
prompt = f"""Create a Kailash workflow configuration for the following requirement:
|
321
|
+
|
322
|
+
REQUIREMENT: {description}
|
323
|
+
|
324
|
+
AVAILABLE NODES:
|
325
|
+
{available_nodes}
|
326
|
+
|
327
|
+
Please respond with a JSON configuration that includes:
|
328
|
+
1. workflow metadata (name, description)
|
329
|
+
2. nodes array with type, id, and parameters
|
330
|
+
3. connections array linking nodes together
|
331
|
+
4. a brief explanation of the workflow
|
332
|
+
|
333
|
+
Format your response as:
|
334
|
+
```json
|
335
|
+
{{
|
336
|
+
"metadata": {{
|
337
|
+
"name": "workflow_name",
|
338
|
+
"description": "Brief description"
|
339
|
+
}},
|
340
|
+
"nodes": [
|
341
|
+
{{
|
342
|
+
"id": "node1",
|
343
|
+
"type": "NodeType",
|
344
|
+
"parameters": {{}}
|
345
|
+
}}
|
346
|
+
],
|
347
|
+
"connections": [
|
348
|
+
{{
|
349
|
+
"source": "node1",
|
350
|
+
"target": "node2",
|
351
|
+
"source_output": "output",
|
352
|
+
"target_input": "input"
|
353
|
+
}}
|
354
|
+
]
|
355
|
+
}}
|
356
|
+
```
|
357
|
+
|
358
|
+
EXPLANATION:
|
359
|
+
[Brief explanation of the workflow and its components]
|
360
|
+
"""
|
361
|
+
|
362
|
+
if context:
|
363
|
+
prompt += f"\n\nADDITIONAL CONTEXT:\n{json.dumps(context, indent=2)}"
|
364
|
+
|
365
|
+
return prompt
|
366
|
+
|
367
|
+
async def _call_llm(self, prompt: str) -> str:
|
368
|
+
"""Call LLM with the given prompt."""
|
369
|
+
if not self.llm_node:
|
370
|
+
raise Exception("LLM node not available")
|
371
|
+
|
372
|
+
try:
|
373
|
+
result = await asyncio.to_thread(
|
374
|
+
self.llm_node.process, messages=[{"role": "user", "content": prompt}]
|
375
|
+
)
|
376
|
+
|
377
|
+
# Extract content from response
|
378
|
+
if isinstance(result, dict) and "choices" in result:
|
379
|
+
return result["choices"][0]["message"]["content"]
|
380
|
+
else:
|
381
|
+
return str(result)
|
382
|
+
|
383
|
+
except Exception as e:
|
384
|
+
logger.error(f"LLM call failed: {e}")
|
385
|
+
raise
|
386
|
+
|
387
|
+
def _parse_workflow_response(self, response: str) -> Tuple[Dict[str, Any], str]:
|
388
|
+
"""Parse LLM response to extract workflow config and explanation."""
|
389
|
+
try:
|
390
|
+
# Extract JSON from response
|
391
|
+
json_match = re.search(r"```json\s*(.*?)\s*```", response, re.DOTALL)
|
392
|
+
if not json_match:
|
393
|
+
raise ValueError("No JSON configuration found in response")
|
394
|
+
|
395
|
+
json_str = json_match.group(1)
|
396
|
+
workflow_config = json.loads(json_str)
|
397
|
+
|
398
|
+
# Extract explanation
|
399
|
+
explanation_match = re.search(
|
400
|
+
r"EXPLANATION:\s*(.*?)(?:\n\n|$)", response, re.DOTALL
|
401
|
+
)
|
402
|
+
explanation = (
|
403
|
+
explanation_match.group(1).strip()
|
404
|
+
if explanation_match
|
405
|
+
else "Workflow generated successfully"
|
406
|
+
)
|
407
|
+
|
408
|
+
return workflow_config, explanation
|
409
|
+
|
410
|
+
except Exception as e:
|
411
|
+
logger.error(f"Error parsing workflow response: {e}")
|
412
|
+
raise ValueError(f"Failed to parse workflow configuration: {e}")
|
413
|
+
|
414
|
+
async def _validate_and_enhance_config(
|
415
|
+
self, config: Dict[str, Any]
|
416
|
+
) -> Dict[str, Any]:
|
417
|
+
"""Validate and enhance workflow configuration."""
|
418
|
+
try:
|
419
|
+
# Ensure required fields exist
|
420
|
+
if "metadata" not in config:
|
421
|
+
config["metadata"] = {}
|
422
|
+
if "nodes" not in config:
|
423
|
+
config["nodes"] = []
|
424
|
+
if "connections" not in config:
|
425
|
+
config["connections"] = []
|
426
|
+
|
427
|
+
# Validate node types (simplified for now)
|
428
|
+
valid_node_types = {
|
429
|
+
"CSVReaderNode",
|
430
|
+
"JSONReaderNode",
|
431
|
+
"HTTPRequestNode",
|
432
|
+
"LLMAgentNode",
|
433
|
+
"PythonCodeNode",
|
434
|
+
"DataTransformer",
|
435
|
+
"SwitchNode",
|
436
|
+
"AsyncSQLDatabaseNode",
|
437
|
+
}
|
438
|
+
for node in config["nodes"]:
|
439
|
+
if node.get("type") not in valid_node_types:
|
440
|
+
logger.warning(f"Unknown node type: {node.get('type')}")
|
441
|
+
# Could suggest alternative or use PythonCodeNode as fallback
|
442
|
+
|
443
|
+
# Ensure unique node IDs
|
444
|
+
node_ids = [node.get("id") for node in config["nodes"]]
|
445
|
+
if len(set(node_ids)) != len(node_ids):
|
446
|
+
# Make IDs unique
|
447
|
+
for i, node in enumerate(config["nodes"]):
|
448
|
+
if not node.get("id"):
|
449
|
+
node["id"] = f"node_{i+1}"
|
450
|
+
|
451
|
+
return config
|
452
|
+
|
453
|
+
except Exception as e:
|
454
|
+
logger.error(f"Error validating workflow config: {e}")
|
455
|
+
return config
|
456
|
+
|
457
|
+
def _fallback_workflow_generation(
|
458
|
+
self, description: str
|
459
|
+
) -> Tuple[Dict[str, Any], str]:
|
460
|
+
"""Fallback workflow generation when LLM is not available."""
|
461
|
+
# Create a simple workflow with PythonCodeNode
|
462
|
+
workflow_config = {
|
463
|
+
"metadata": {
|
464
|
+
"name": "simple_workflow",
|
465
|
+
"description": f"Generated workflow for: {description}",
|
466
|
+
},
|
467
|
+
"nodes": [
|
468
|
+
{
|
469
|
+
"id": "input_node",
|
470
|
+
"type": "PythonCodeNode",
|
471
|
+
"parameters": {
|
472
|
+
"code": "# Process input data\nresult = {'message': 'Workflow created successfully'}\nreturn {'result': result}"
|
473
|
+
},
|
474
|
+
}
|
475
|
+
],
|
476
|
+
"connections": [],
|
477
|
+
}
|
478
|
+
|
479
|
+
explanation = (
|
480
|
+
"Created a simple workflow with a Python code node. "
|
481
|
+
"For more sophisticated workflows, please configure an LLM provider."
|
482
|
+
)
|
483
|
+
|
484
|
+
return workflow_config, explanation
|
485
|
+
|
486
|
+
|
487
|
+
class AIChatMiddleware:
|
488
|
+
"""
|
489
|
+
AI Chat middleware for natural language workflow assistance.
|
490
|
+
|
491
|
+
Enhanced with SDK components:
|
492
|
+
- Vector database for semantic search of chat history
|
493
|
+
- Embedding generation for conversation similarity
|
494
|
+
- Audit logging for all chat interactions
|
495
|
+
- Data transformation for message formatting
|
496
|
+
|
497
|
+
Provides:
|
498
|
+
- Natural language workflow generation
|
499
|
+
- Interactive workflow assistance
|
500
|
+
- Node suggestions and recommendations
|
501
|
+
- Workflow optimization guidance
|
502
|
+
- Debug and troubleshooting help
|
503
|
+
- Semantic search of past conversations
|
504
|
+
"""
|
505
|
+
|
506
|
+
def __init__(
|
507
|
+
self,
|
508
|
+
agent_ui_middleware: AgentUIMiddleware,
|
509
|
+
vector_db_url: str = None,
|
510
|
+
enable_semantic_search: bool = True,
|
511
|
+
):
|
512
|
+
self.agent_ui = agent_ui_middleware
|
513
|
+
self.schema_registry = DynamicSchemaRegistry()
|
514
|
+
self.workflow_generator = WorkflowGenerator(self.schema_registry)
|
515
|
+
self.enable_semantic_search = (
|
516
|
+
enable_semantic_search and vector_db_url is not None
|
517
|
+
)
|
518
|
+
|
519
|
+
# Chat sessions (kept for quick access)
|
520
|
+
self.chat_sessions: Dict[str, ChatSession] = {}
|
521
|
+
|
522
|
+
# Initialize SDK nodes
|
523
|
+
self._initialize_sdk_nodes(vector_db_url)
|
524
|
+
|
525
|
+
# Performance tracking
|
526
|
+
self.conversations_started = 0
|
527
|
+
self.workflows_generated = 0
|
528
|
+
self.suggestions_provided = 0
|
529
|
+
self.embeddings_generated = 0
|
530
|
+
|
531
|
+
def _initialize_sdk_nodes(self, vector_db_url: str = None):
|
532
|
+
"""Initialize SDK nodes for enhanced chat functionality."""
|
533
|
+
|
534
|
+
# Embedding generator for semantic search
|
535
|
+
if self.enable_semantic_search:
|
536
|
+
self.embedding_node = EmbeddingGeneratorNode(
|
537
|
+
name="chat_embedder",
|
538
|
+
provider="sentence-transformers",
|
539
|
+
model="all-MiniLM-L6-v2",
|
540
|
+
)
|
541
|
+
|
542
|
+
# Vector database for chat history (using SQL database for now)
|
543
|
+
self.vector_db = AsyncSQLDatabaseNode(
|
544
|
+
name="chat_vector_store", connection_string=vector_db_url, pool_size=5
|
545
|
+
)
|
546
|
+
|
547
|
+
# Credential management for chat features
|
548
|
+
self.credential_node = CredentialManagerNode(
|
549
|
+
name="chat_credentials",
|
550
|
+
credential_name="chat_config",
|
551
|
+
credential_type="custom",
|
552
|
+
)
|
553
|
+
|
554
|
+
# Data transformer for message formatting
|
555
|
+
self.message_transformer = DataTransformer(
|
556
|
+
name="chat_message_transformer",
|
557
|
+
transformations=[
|
558
|
+
{"type": "validate", "schema": "chat_message"},
|
559
|
+
{"type": "add_field", "field": "processed_at", "value": "now()"},
|
560
|
+
],
|
561
|
+
)
|
562
|
+
|
563
|
+
async def start_chat_session(self, session_id: str, user_id: str = None) -> str:
|
564
|
+
"""Start a new chat session."""
|
565
|
+
chat_session = ChatSession(session_id, user_id)
|
566
|
+
self.chat_sessions[session_id] = chat_session
|
567
|
+
self.conversations_started += 1
|
568
|
+
|
569
|
+
logger.info(f"Started chat session {session_id} for user {user_id}")
|
570
|
+
return session_id
|
571
|
+
|
572
|
+
async def send_message(
|
573
|
+
self, session_id: str, content: str, context: Dict[str, Any] = None
|
574
|
+
) -> Dict[str, Any]:
|
575
|
+
"""Send message and get AI response, storing in vector database."""
|
576
|
+
chat_session = self.chat_sessions.get(session_id)
|
577
|
+
if not chat_session:
|
578
|
+
raise ValueError(f"Chat session {session_id} not found")
|
579
|
+
|
580
|
+
# Add user message to session
|
581
|
+
user_message_id = chat_session.add_message(content, "user")
|
582
|
+
|
583
|
+
# Store user message in vector database if enabled
|
584
|
+
if self.enable_semantic_search:
|
585
|
+
await self._store_message_with_embedding(
|
586
|
+
session_id, user_message_id, content, "user", chat_session.user_id
|
587
|
+
)
|
588
|
+
|
589
|
+
# Log user message
|
590
|
+
logger.info(
|
591
|
+
f"Chat message received: session={session_id}, user={chat_session.user_id}, length={len(content)}"
|
592
|
+
)
|
593
|
+
|
594
|
+
# Update context if provided
|
595
|
+
if context:
|
596
|
+
for key, value in context.items():
|
597
|
+
chat_session.update_context(key, value)
|
598
|
+
|
599
|
+
# Find similar past conversations if semantic search is enabled
|
600
|
+
similar_conversations = []
|
601
|
+
if self.enable_semantic_search:
|
602
|
+
similar_conversations = await self._find_similar_conversations(
|
603
|
+
content, limit=3
|
604
|
+
)
|
605
|
+
|
606
|
+
# Determine intent and generate response
|
607
|
+
intent, confidence = await self._analyze_intent(content)
|
608
|
+
|
609
|
+
response_content = ""
|
610
|
+
workflow_config = None
|
611
|
+
suggestions = []
|
612
|
+
|
613
|
+
if intent == "generate_workflow" and confidence > 0.7:
|
614
|
+
# Generate workflow
|
615
|
+
try:
|
616
|
+
# Include similar conversations as context
|
617
|
+
enhanced_context = {
|
618
|
+
**chat_session.context,
|
619
|
+
"similar_conversations": similar_conversations,
|
620
|
+
}
|
621
|
+
|
622
|
+
workflow_config, explanation = (
|
623
|
+
await self.workflow_generator.generate_workflow_from_description(
|
624
|
+
content, enhanced_context
|
625
|
+
)
|
626
|
+
)
|
627
|
+
|
628
|
+
response_content = f"I've created a workflow for you. {explanation}"
|
629
|
+
self.workflows_generated += 1
|
630
|
+
|
631
|
+
# Log workflow generation
|
632
|
+
logger.info(
|
633
|
+
f"Workflow generated: session={session_id}, name={workflow_config.get('metadata', {}).get('name', 'unnamed')}"
|
634
|
+
)
|
635
|
+
|
636
|
+
except Exception as e:
|
637
|
+
response_content = f"I had trouble generating the workflow: {str(e)}. Could you provide more details about what you want to accomplish?"
|
638
|
+
|
639
|
+
elif intent == "suggest_nodes" and confidence > 0.6:
|
640
|
+
# Suggest nodes
|
641
|
+
try:
|
642
|
+
suggestions = await self.workflow_generator.suggest_nodes_for_task(
|
643
|
+
content
|
644
|
+
)
|
645
|
+
|
646
|
+
if suggestions:
|
647
|
+
response_content = (
|
648
|
+
"Based on your request, I recommend these nodes:\n\n"
|
649
|
+
)
|
650
|
+
for i, suggestion in enumerate(suggestions[:5], 1):
|
651
|
+
response_content += f"{i}. **{suggestion['node_type']}** - {suggestion['description']}\n"
|
652
|
+
|
653
|
+
self.suggestions_provided += 1
|
654
|
+
|
655
|
+
# Log suggestions
|
656
|
+
logger.info(
|
657
|
+
f"Nodes suggested: session={session_id}, count={len(suggestions)}, top={suggestions[0]['node_type'] if suggestions else None}"
|
658
|
+
)
|
659
|
+
else:
|
660
|
+
response_content = "I couldn't find specific node recommendations. Could you describe your task in more detail?"
|
661
|
+
|
662
|
+
except Exception as e:
|
663
|
+
response_content = f"I had trouble finding node suggestions: {str(e)}"
|
664
|
+
|
665
|
+
elif intent == "explain_concept":
|
666
|
+
# Explain Kailash concepts
|
667
|
+
response_content = await self._explain_concept(content)
|
668
|
+
|
669
|
+
elif intent == "help_debug":
|
670
|
+
# Help with debugging
|
671
|
+
response_content = await self._help_debug(content, chat_session.context)
|
672
|
+
|
673
|
+
else:
|
674
|
+
# General assistance
|
675
|
+
response_content = await self._provide_general_assistance(
|
676
|
+
content, chat_session.context
|
677
|
+
)
|
678
|
+
|
679
|
+
# Add assistant response to session
|
680
|
+
assistant_message_id = chat_session.add_message(
|
681
|
+
response_content,
|
682
|
+
"assistant",
|
683
|
+
metadata={
|
684
|
+
"intent": intent,
|
685
|
+
"confidence": confidence,
|
686
|
+
"has_workflow": workflow_config is not None,
|
687
|
+
"suggestion_count": len(suggestions),
|
688
|
+
},
|
689
|
+
)
|
690
|
+
|
691
|
+
# Store assistant response in vector database if enabled
|
692
|
+
if self.enable_semantic_search:
|
693
|
+
await self._store_message_with_embedding(
|
694
|
+
session_id,
|
695
|
+
assistant_message_id,
|
696
|
+
response_content,
|
697
|
+
"assistant",
|
698
|
+
chat_session.user_id,
|
699
|
+
)
|
700
|
+
|
701
|
+
# Log assistant response
|
702
|
+
logger.info(
|
703
|
+
f"Chat response sent: session={session_id}, intent={intent}, confidence={confidence}, length={len(response_content)}"
|
704
|
+
)
|
705
|
+
|
706
|
+
return {
|
707
|
+
"message": response_content,
|
708
|
+
"intent": intent,
|
709
|
+
"confidence": confidence,
|
710
|
+
"workflow_config": workflow_config,
|
711
|
+
"suggestions": suggestions,
|
712
|
+
"session_id": session_id,
|
713
|
+
"similar_conversations": len(similar_conversations),
|
714
|
+
}
|
715
|
+
|
716
|
+
async def _analyze_intent(self, content: str) -> Tuple[str, float]:
|
717
|
+
"""Analyze user message to determine intent."""
|
718
|
+
content_lower = content.lower()
|
719
|
+
|
720
|
+
# Workflow generation keywords
|
721
|
+
generate_keywords = [
|
722
|
+
"create",
|
723
|
+
"build",
|
724
|
+
"make",
|
725
|
+
"generate",
|
726
|
+
"workflow",
|
727
|
+
"pipeline",
|
728
|
+
"automate",
|
729
|
+
"process",
|
730
|
+
"chain",
|
731
|
+
"flow",
|
732
|
+
]
|
733
|
+
|
734
|
+
# Node suggestion keywords
|
735
|
+
suggest_keywords = [
|
736
|
+
"recommend",
|
737
|
+
"suggest",
|
738
|
+
"what node",
|
739
|
+
"which node",
|
740
|
+
"best node",
|
741
|
+
"how to",
|
742
|
+
"node for",
|
743
|
+
]
|
744
|
+
|
745
|
+
# Explanation keywords
|
746
|
+
explain_keywords = [
|
747
|
+
"what is",
|
748
|
+
"explain",
|
749
|
+
"how does",
|
750
|
+
"understand",
|
751
|
+
"concept",
|
752
|
+
"definition",
|
753
|
+
"meaning",
|
754
|
+
]
|
755
|
+
|
756
|
+
# Debug keywords
|
757
|
+
debug_keywords = [
|
758
|
+
"error",
|
759
|
+
"problem",
|
760
|
+
"issue",
|
761
|
+
"debug",
|
762
|
+
"troubleshoot",
|
763
|
+
"fix",
|
764
|
+
"not working",
|
765
|
+
"failed",
|
766
|
+
]
|
767
|
+
|
768
|
+
# Calculate scores
|
769
|
+
generate_score = sum(
|
770
|
+
1 for keyword in generate_keywords if keyword in content_lower
|
771
|
+
)
|
772
|
+
suggest_score = sum(
|
773
|
+
1 for keyword in suggest_keywords if keyword in content_lower
|
774
|
+
)
|
775
|
+
explain_score = sum(
|
776
|
+
1 for keyword in explain_keywords if keyword in content_lower
|
777
|
+
)
|
778
|
+
debug_score = sum(1 for keyword in debug_keywords if keyword in content_lower)
|
779
|
+
|
780
|
+
# Determine intent
|
781
|
+
scores = {
|
782
|
+
"generate_workflow": generate_score / len(generate_keywords),
|
783
|
+
"suggest_nodes": suggest_score / len(suggest_keywords),
|
784
|
+
"explain_concept": explain_score / len(explain_keywords),
|
785
|
+
"help_debug": debug_score / len(debug_keywords),
|
786
|
+
}
|
787
|
+
|
788
|
+
intent = max(scores.items(), key=lambda x: x[1])
|
789
|
+
return intent[0], min(intent[1] * 2, 1.0) # Scale confidence
|
790
|
+
|
791
|
+
async def _explain_concept(self, content: str) -> str:
|
792
|
+
"""Provide explanations for Kailash concepts."""
|
793
|
+
concepts = {
|
794
|
+
"workflow": "A workflow in Kailash is a directed graph of interconnected nodes that process data. Each node performs a specific task, and connections define how data flows between nodes.",
|
795
|
+
"node": "A node is a single processing unit in a workflow. Nodes can read data, transform it, call APIs, run AI models, or perform logic operations.",
|
796
|
+
"connection": "Connections link nodes together, defining how output from one node becomes input to another. You can map specific outputs to specific inputs.",
|
797
|
+
"session": "A session represents a frontend client's interaction with the Kailash middleware. Sessions can contain multiple workflows and executions.",
|
798
|
+
"execution": "An execution is a single run of a workflow with specific input parameters. You can track progress and get real-time updates.",
|
799
|
+
"schema": "Schemas define the structure and parameters of nodes, enabling dynamic UI generation and validation.",
|
800
|
+
}
|
801
|
+
|
802
|
+
content_lower = content.lower()
|
803
|
+
for concept, explanation in concepts.items():
|
804
|
+
if concept in content_lower:
|
805
|
+
return f"**{concept.title()}**: {explanation}"
|
806
|
+
|
807
|
+
return "I can explain concepts like workflows, nodes, connections, sessions, executions, and schemas. What would you like to know more about?"
|
808
|
+
|
809
|
+
async def _help_debug(self, content: str, context: Dict[str, Any]) -> str:
|
810
|
+
"""Provide debugging assistance."""
|
811
|
+
common_issues = {
|
812
|
+
"connection": "Check that node IDs match exactly in your connections. Ensure source_output and target_input names are correct.",
|
813
|
+
"parameter": "Verify that all required parameters are provided and have the correct types. Check the node schema for requirements.",
|
814
|
+
"execution": "Look at the execution status and error messages. Common issues include missing inputs or incorrect parameter values.",
|
815
|
+
"timeout": "Some operations may take time. Check if your workflow is still running or if there are performance bottlenecks.",
|
816
|
+
}
|
817
|
+
|
818
|
+
content_lower = content.lower()
|
819
|
+
for issue_type, suggestion in common_issues.items():
|
820
|
+
if issue_type in content_lower:
|
821
|
+
return f"**{issue_type.title()} Issue**: {suggestion}"
|
822
|
+
|
823
|
+
return "I can help debug common issues with connections, parameters, executions, and timeouts. Can you describe the specific problem you're experiencing?"
|
824
|
+
|
825
|
+
async def _provide_general_assistance(
|
826
|
+
self, content: str, context: Dict[str, Any]
|
827
|
+
) -> str:
|
828
|
+
"""Provide general assistance and guidance."""
|
829
|
+
return """I'm here to help you with Kailash workflows! I can:
|
830
|
+
|
831
|
+
• **Create workflows** from natural language descriptions
|
832
|
+
• **Suggest nodes** for specific tasks
|
833
|
+
• **Explain concepts** and best practices
|
834
|
+
• **Debug issues** and troubleshoot problems
|
835
|
+
• **Optimize workflows** for better performance
|
836
|
+
|
837
|
+
What would you like to work on? Just describe what you want to accomplish and I'll help you build it!"""
|
838
|
+
|
839
|
+
async def _store_message_with_embedding(
|
840
|
+
self,
|
841
|
+
session_id: str,
|
842
|
+
message_id: str,
|
843
|
+
content: str,
|
844
|
+
role: str,
|
845
|
+
user_id: str = None,
|
846
|
+
):
|
847
|
+
"""Store chat message with embedding in vector database."""
|
848
|
+
try:
|
849
|
+
# Generate embedding
|
850
|
+
embedding_result = await self.embedding_node.process({"text": content})
|
851
|
+
|
852
|
+
# Store in database (simplified for now)
|
853
|
+
await self.vector_db.process(
|
854
|
+
{
|
855
|
+
"query": "INSERT INTO chat_messages (id, session_id, user_id, content, role, timestamp) VALUES (?, ?, ?, ?, ?, ?)",
|
856
|
+
"parameters": [
|
857
|
+
message_id,
|
858
|
+
session_id,
|
859
|
+
user_id,
|
860
|
+
content,
|
861
|
+
role,
|
862
|
+
datetime.now(timezone.utc),
|
863
|
+
],
|
864
|
+
}
|
865
|
+
)
|
866
|
+
|
867
|
+
self.embeddings_generated += 1
|
868
|
+
|
869
|
+
except Exception as e:
|
870
|
+
logger.error(f"Failed to store message with embedding: {e}")
|
871
|
+
|
872
|
+
async def _find_similar_conversations(
|
873
|
+
self, query: str, limit: int = 5
|
874
|
+
) -> List[Dict[str, Any]]:
|
875
|
+
"""Find similar past conversations using vector search."""
|
876
|
+
try:
|
877
|
+
# Generate query embedding
|
878
|
+
query_embedding = await self.embedding_node.process({"text": query})
|
879
|
+
|
880
|
+
# Search for similar messages (simplified for now)
|
881
|
+
search_result = await self.vector_db.process(
|
882
|
+
{
|
883
|
+
"query": "SELECT * FROM chat_messages WHERE role = 'user' ORDER BY timestamp DESC LIMIT ?",
|
884
|
+
"parameters": [limit * 2],
|
885
|
+
}
|
886
|
+
)
|
887
|
+
|
888
|
+
# Group by session and return unique conversations
|
889
|
+
seen_sessions = set()
|
890
|
+
similar_conversations = []
|
891
|
+
|
892
|
+
for row in search_result.get("rows", []):
|
893
|
+
session_id = row["session_id"]
|
894
|
+
if session_id not in seen_sessions:
|
895
|
+
seen_sessions.add(session_id)
|
896
|
+
similar_conversations.append(
|
897
|
+
{
|
898
|
+
"session_id": session_id,
|
899
|
+
"content": row["content"],
|
900
|
+
"similarity": 0.8, # Simplified similarity
|
901
|
+
"timestamp": row["timestamp"],
|
902
|
+
}
|
903
|
+
)
|
904
|
+
|
905
|
+
if len(similar_conversations) >= limit:
|
906
|
+
break
|
907
|
+
|
908
|
+
return similar_conversations
|
909
|
+
|
910
|
+
except Exception as e:
|
911
|
+
logger.error(f"Failed to find similar conversations: {e}")
|
912
|
+
return []
|
913
|
+
|
914
|
+
def get_chat_history(
|
915
|
+
self, session_id: str, limit: int = None
|
916
|
+
) -> List[Dict[str, Any]]:
|
917
|
+
"""Get chat history for a session."""
|
918
|
+
chat_session = self.chat_sessions.get(session_id)
|
919
|
+
if not chat_session:
|
920
|
+
return []
|
921
|
+
|
922
|
+
return chat_session.get_conversation_history(limit)
|
923
|
+
|
924
|
+
async def search_chat_history(
|
925
|
+
self, query: str, user_id: str = None, limit: int = 10
|
926
|
+
) -> List[Dict[str, Any]]:
|
927
|
+
"""Search chat history using semantic search."""
|
928
|
+
if not self.enable_semantic_search:
|
929
|
+
return []
|
930
|
+
|
931
|
+
try:
|
932
|
+
# Generate query embedding
|
933
|
+
query_embedding = await self.embedding_node.process({"text": query})
|
934
|
+
|
935
|
+
# Prepare filters
|
936
|
+
filters = {}
|
937
|
+
if user_id:
|
938
|
+
filters["user_id"] = user_id
|
939
|
+
|
940
|
+
# Search database (simplified for now)
|
941
|
+
query_parts = ["SELECT * FROM chat_messages WHERE 1=1"]
|
942
|
+
params = []
|
943
|
+
|
944
|
+
if user_id:
|
945
|
+
query_parts.append("AND user_id = ?")
|
946
|
+
params.append(user_id)
|
947
|
+
|
948
|
+
query_parts.append("ORDER BY timestamp DESC LIMIT ?")
|
949
|
+
params.append(limit)
|
950
|
+
|
951
|
+
search_result = await self.vector_db.process(
|
952
|
+
{"query": " ".join(query_parts), "parameters": params}
|
953
|
+
)
|
954
|
+
|
955
|
+
# Format results
|
956
|
+
results = []
|
957
|
+
for row in search_result.get("rows", []):
|
958
|
+
results.append(
|
959
|
+
{
|
960
|
+
"message_id": row["id"],
|
961
|
+
"session_id": row["session_id"],
|
962
|
+
"content": row["content"],
|
963
|
+
"role": row["role"],
|
964
|
+
"similarity": 0.8, # Simplified similarity
|
965
|
+
"timestamp": row["timestamp"],
|
966
|
+
}
|
967
|
+
)
|
968
|
+
|
969
|
+
return results
|
970
|
+
|
971
|
+
except Exception as e:
|
972
|
+
logger.error(f"Failed to search chat history: {e}")
|
973
|
+
return []
|
974
|
+
|
975
|
+
def get_stats(self) -> Dict[str, Any]:
|
976
|
+
"""Get chat middleware statistics."""
|
977
|
+
stats = {
|
978
|
+
"conversations_started": self.conversations_started,
|
979
|
+
"workflows_generated": self.workflows_generated,
|
980
|
+
"suggestions_provided": self.suggestions_provided,
|
981
|
+
"active_chat_sessions": len(self.chat_sessions),
|
982
|
+
"embeddings_generated": self.embeddings_generated,
|
983
|
+
}
|
984
|
+
|
985
|
+
# Add vector database stats if available
|
986
|
+
if self.enable_semantic_search:
|
987
|
+
stats["semantic_search_enabled"] = True
|
988
|
+
|
989
|
+
return stats
|