kailash 0.1.1__py3-none-any.whl → 0.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kailash/nodes/__init__.py +2 -1
- kailash/nodes/ai/__init__.py +26 -0
- kailash/nodes/ai/ai_providers.py +1272 -0
- kailash/nodes/ai/embedding_generator.py +853 -0
- kailash/nodes/ai/llm_agent.py +1166 -0
- kailash/nodes/api/auth.py +3 -3
- kailash/nodes/api/graphql.py +2 -2
- kailash/nodes/api/http.py +391 -44
- kailash/nodes/api/rate_limiting.py +2 -2
- kailash/nodes/api/rest.py +464 -56
- kailash/nodes/base.py +71 -12
- kailash/nodes/code/python.py +2 -1
- kailash/nodes/data/__init__.py +7 -0
- kailash/nodes/data/readers.py +28 -26
- kailash/nodes/data/retrieval.py +178 -0
- kailash/nodes/data/sharepoint_graph.py +7 -7
- kailash/nodes/data/sources.py +65 -0
- kailash/nodes/data/sql.py +4 -2
- kailash/nodes/data/writers.py +6 -3
- kailash/nodes/logic/operations.py +2 -1
- kailash/nodes/mcp/__init__.py +11 -0
- kailash/nodes/mcp/client.py +558 -0
- kailash/nodes/mcp/resource.py +682 -0
- kailash/nodes/mcp/server.py +571 -0
- kailash/nodes/transform/__init__.py +16 -1
- kailash/nodes/transform/chunkers.py +78 -0
- kailash/nodes/transform/formatters.py +96 -0
- kailash/runtime/docker.py +6 -6
- kailash/sdk_exceptions.py +24 -10
- kailash/tracking/metrics_collector.py +2 -1
- kailash/utils/templates.py +6 -6
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/METADATA +344 -46
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/RECORD +37 -26
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/WHEEL +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/entry_points.txt +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/licenses/LICENSE +0 -0
- {kailash-0.1.1.dist-info → kailash-0.1.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1166 @@
|
|
1
|
+
"""Advanced LLM Agent node with LangChain integration and MCP support."""
|
2
|
+
|
3
|
+
import json
|
4
|
+
from typing import Any, Dict, List, Optional
|
5
|
+
|
6
|
+
from kailash.nodes.base import Node, NodeParameter, register_node
|
7
|
+
|
8
|
+
|
9
|
+
@register_node()
|
10
|
+
class LLMAgent(Node):
|
11
|
+
"""
|
12
|
+
Advanced Large Language Model agent with LangChain integration and MCP
|
13
|
+
support.
|
14
|
+
|
15
|
+
Design Purpose and Philosophy:
|
16
|
+
The LLMAgent node provides enterprise-grade AI agent capabilities with
|
17
|
+
support for multiple LLM providers, conversation memory, tool calling, and
|
18
|
+
MCP protocol integration.
|
19
|
+
It's designed to replace simple PythonCodeNode workarounds with proper
|
20
|
+
agent architecture.
|
21
|
+
|
22
|
+
Upstream Dependencies:
|
23
|
+
- LLM provider credentials (OpenAI, Anthropic, Azure)
|
24
|
+
- Tool definitions and implementations for agent capabilities
|
25
|
+
- Conversation history and context for memory management
|
26
|
+
- MCP server configurations for context sharing
|
27
|
+
- Prompt templates and system instructions
|
28
|
+
|
29
|
+
Downstream Consumers:
|
30
|
+
- Workflow orchestration nodes that need AI decision-making
|
31
|
+
- Data processing pipelines requiring intelligent analysis
|
32
|
+
- Multi-agent systems coordinating complex tasks
|
33
|
+
- User interfaces presenting agent responses
|
34
|
+
- Monitoring systems tracking agent performance
|
35
|
+
|
36
|
+
Usage Patterns:
|
37
|
+
1. Single-turn Q&A with context from MCP resources
|
38
|
+
2. Multi-turn conversations with persistent memory
|
39
|
+
3. Tool-calling agents that execute workflow operations
|
40
|
+
4. Planning agents that decompose complex goals
|
41
|
+
5. RAG agents combining retrieval with generation
|
42
|
+
|
43
|
+
Implementation Details:
|
44
|
+
- Supports OpenAI, Anthropic Claude, Azure OpenAI, and local models
|
45
|
+
- Integrates with LangChain for advanced agent patterns
|
46
|
+
- Implements conversation memory with configurable persistence
|
47
|
+
- Provides tool calling with proper error handling and validation
|
48
|
+
- Supports MCP protocol for seamless context sharing
|
49
|
+
- Includes prompt optimization and template management
|
50
|
+
|
51
|
+
Error Handling:
|
52
|
+
- APIError: When LLM provider API calls fail
|
53
|
+
- AuthenticationError: When API credentials are invalid
|
54
|
+
- RateLimitError: When API rate limits are exceeded
|
55
|
+
- ToolExecutionError: When agent tool calls fail
|
56
|
+
- MemoryError: When conversation memory operations fail
|
57
|
+
- MCPError: When MCP protocol operations fail
|
58
|
+
|
59
|
+
Side Effects:
|
60
|
+
- Makes API calls to external LLM providers
|
61
|
+
- Stores conversation history in memory or persistent storage
|
62
|
+
- Executes tools that may modify external systems
|
63
|
+
- Connects to MCP servers for context retrieval
|
64
|
+
- Logs agent interactions and performance metrics
|
65
|
+
|
66
|
+
Examples:
|
67
|
+
|
68
|
+
Basic Q&A agent with OpenAI::
|
69
|
+
|
70
|
+
agent = LLMAgent()
|
71
|
+
result = agent.run(
|
72
|
+
provider="openai",
|
73
|
+
model="gpt-4",
|
74
|
+
messages=[
|
75
|
+
{"role": "user", "content": "Analyze the customer data and provide insights"}
|
76
|
+
],
|
77
|
+
system_prompt="You are a data analyst expert.",
|
78
|
+
mcp_context=["data://customer_reports/*"]
|
79
|
+
)
|
80
|
+
|
81
|
+
Tool-calling agent::
|
82
|
+
|
83
|
+
tool_agent = LLMAgent()
|
84
|
+
result = tool_agent.run(
|
85
|
+
provider="anthropic",
|
86
|
+
model="claude-3-sonnet",
|
87
|
+
messages=[{"role": "user", "content": "Create a report and email it"}],
|
88
|
+
tools=[
|
89
|
+
{
|
90
|
+
"name": "create_report",
|
91
|
+
"description": "Generate a data report",
|
92
|
+
"parameters": {"type": "object", "properties": {"format": {"type": "string"}}}
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"name": "send_email",
|
96
|
+
"description": "Send email with attachment",
|
97
|
+
"parameters": {"type": "object", "properties": {"recipient": {"type": "string"}}}
|
98
|
+
}
|
99
|
+
],
|
100
|
+
conversation_id="report_session_123"
|
101
|
+
)
|
102
|
+
|
103
|
+
RAG agent with MCP integration::
|
104
|
+
|
105
|
+
rag_agent = LLMAgent()
|
106
|
+
result = rag_agent.run(
|
107
|
+
provider="azure",
|
108
|
+
model="gpt-4-turbo",
|
109
|
+
messages=[{"role": "user", "content": "What are the compliance requirements?"}],
|
110
|
+
rag_config={
|
111
|
+
"enabled": True,
|
112
|
+
"top_k": 5,
|
113
|
+
"similarity_threshold": 0.8
|
114
|
+
},
|
115
|
+
mcp_servers=[
|
116
|
+
{
|
117
|
+
"name": "compliance-server",
|
118
|
+
"transport": "stdio",
|
119
|
+
"command": "python",
|
120
|
+
"args": ["-m", "compliance_mcp"]
|
121
|
+
}
|
122
|
+
]
|
123
|
+
)
|
124
|
+
"""
|
125
|
+
|
126
|
+
def get_parameters(self) -> Dict[str, NodeParameter]:
|
127
|
+
return {
|
128
|
+
"provider": NodeParameter(
|
129
|
+
name="provider",
|
130
|
+
type=str,
|
131
|
+
required=False,
|
132
|
+
default="mock",
|
133
|
+
description="LLM provider: openai, anthropic, azure, local, or mock",
|
134
|
+
),
|
135
|
+
"model": NodeParameter(
|
136
|
+
name="model",
|
137
|
+
type=str,
|
138
|
+
required=False,
|
139
|
+
default="gpt-4",
|
140
|
+
description="Model name (e.g., gpt-4, claude-3-sonnet, gpt-4-turbo)",
|
141
|
+
),
|
142
|
+
"messages": NodeParameter(
|
143
|
+
name="messages",
|
144
|
+
type=list,
|
145
|
+
required=False,
|
146
|
+
default=[],
|
147
|
+
description="Conversation messages in OpenAI format",
|
148
|
+
),
|
149
|
+
"system_prompt": NodeParameter(
|
150
|
+
name="system_prompt",
|
151
|
+
type=str,
|
152
|
+
required=False,
|
153
|
+
description="System prompt to guide agent behavior",
|
154
|
+
),
|
155
|
+
"tools": NodeParameter(
|
156
|
+
name="tools",
|
157
|
+
type=list,
|
158
|
+
required=False,
|
159
|
+
default=[],
|
160
|
+
description="Available tools for agent to call",
|
161
|
+
),
|
162
|
+
"conversation_id": NodeParameter(
|
163
|
+
name="conversation_id",
|
164
|
+
type=str,
|
165
|
+
required=False,
|
166
|
+
description="Unique ID for conversation memory persistence",
|
167
|
+
),
|
168
|
+
"memory_config": NodeParameter(
|
169
|
+
name="memory_config",
|
170
|
+
type=dict,
|
171
|
+
required=False,
|
172
|
+
default={},
|
173
|
+
description="Memory configuration (type, max_tokens, persistence)",
|
174
|
+
),
|
175
|
+
"mcp_servers": NodeParameter(
|
176
|
+
name="mcp_servers",
|
177
|
+
type=list,
|
178
|
+
required=False,
|
179
|
+
default=[],
|
180
|
+
description="MCP server configurations for context retrieval",
|
181
|
+
),
|
182
|
+
"mcp_context": NodeParameter(
|
183
|
+
name="mcp_context",
|
184
|
+
type=list,
|
185
|
+
required=False,
|
186
|
+
default=[],
|
187
|
+
description="MCP resource URIs to include as context",
|
188
|
+
),
|
189
|
+
"rag_config": NodeParameter(
|
190
|
+
name="rag_config",
|
191
|
+
type=dict,
|
192
|
+
required=False,
|
193
|
+
default={},
|
194
|
+
description="RAG configuration (enabled, top_k, threshold, embeddings)",
|
195
|
+
),
|
196
|
+
"generation_config": NodeParameter(
|
197
|
+
name="generation_config",
|
198
|
+
type=dict,
|
199
|
+
required=False,
|
200
|
+
default={},
|
201
|
+
description="Generation parameters (temperature, max_tokens, top_p)",
|
202
|
+
),
|
203
|
+
"streaming": NodeParameter(
|
204
|
+
name="streaming",
|
205
|
+
type=bool,
|
206
|
+
required=False,
|
207
|
+
default=False,
|
208
|
+
description="Enable streaming responses",
|
209
|
+
),
|
210
|
+
"timeout": NodeParameter(
|
211
|
+
name="timeout",
|
212
|
+
type=int,
|
213
|
+
required=False,
|
214
|
+
default=120,
|
215
|
+
description="Request timeout in seconds",
|
216
|
+
),
|
217
|
+
"max_retries": NodeParameter(
|
218
|
+
name="max_retries",
|
219
|
+
type=int,
|
220
|
+
required=False,
|
221
|
+
default=3,
|
222
|
+
description="Maximum retry attempts for failed requests",
|
223
|
+
),
|
224
|
+
}
|
225
|
+
|
226
|
+
def run(self, **kwargs) -> Dict[str, Any]:
|
227
|
+
"""
|
228
|
+
Execute the LLM agent with the specified configuration.
|
229
|
+
|
230
|
+
This is the main entry point for using the LLMAgent. It handles context
|
231
|
+
preparation, provider selection, response generation, and
|
232
|
+
post-processing.
|
233
|
+
|
234
|
+
Args:
|
235
|
+
**kwargs: Configuration parameters including:
|
236
|
+
provider (str): LLM provider name. Options: "openai", "anthropic", "ollama", "mock"
|
237
|
+
model (str): Model identifier specific to the provider
|
238
|
+
messages (List[Dict[str, str]]): Conversation messages in OpenAI format
|
239
|
+
system_prompt (str, optional): System message to guide agent behavior
|
240
|
+
tools (List[Dict], optional): Available tools for function calling
|
241
|
+
conversation_id (str, optional): ID for conversation memory persistence
|
242
|
+
memory_config (Dict, optional): Memory configuration options
|
243
|
+
mcp_servers (List[Dict], optional): MCP server configurations
|
244
|
+
mcp_context (List[str], optional): MCP resource URIs to include
|
245
|
+
rag_config (Dict, optional): RAG configuration for retrieval
|
246
|
+
generation_config (Dict, optional): LLM generation parameters
|
247
|
+
streaming (bool, optional): Enable streaming responses
|
248
|
+
timeout (int, optional): Request timeout in seconds
|
249
|
+
max_retries (int, optional): Maximum retry attempts
|
250
|
+
|
251
|
+
Returns:
|
252
|
+
Dict[str, Any]: Response dictionary containing:
|
253
|
+
success (bool): Whether the operation succeeded
|
254
|
+
response (Dict): LLM response with content, role, tool_calls, etc.
|
255
|
+
conversation_id (str): Conversation identifier
|
256
|
+
usage (Dict): Token usage and cost metrics
|
257
|
+
context (Dict): Information about context sources used
|
258
|
+
metadata (Dict): Additional metadata about the request
|
259
|
+
error (str, optional): Error message if success is False
|
260
|
+
error_type (str, optional): Type of error that occurred
|
261
|
+
recovery_suggestions (List[str], optional): Suggestions for fixing errors
|
262
|
+
|
263
|
+
Examples:
|
264
|
+
|
265
|
+
Basic usage with OpenAI::
|
266
|
+
|
267
|
+
agent = LLMAgent()
|
268
|
+
result = agent.run(
|
269
|
+
provider="openai",
|
270
|
+
model="gpt-4",
|
271
|
+
messages=[
|
272
|
+
{"role": "user", "content": "Explain quantum computing"}
|
273
|
+
],
|
274
|
+
generation_config={
|
275
|
+
"temperature": 0.7,
|
276
|
+
"max_tokens": 500,
|
277
|
+
"top_p": 0.9,
|
278
|
+
"frequency_penalty": 0.0,
|
279
|
+
"presence_penalty": 0.0
|
280
|
+
}
|
281
|
+
)
|
282
|
+
print(result["response"]["content"])
|
283
|
+
|
284
|
+
Using Ollama with custom model::
|
285
|
+
|
286
|
+
result = agent.run(
|
287
|
+
provider="ollama",
|
288
|
+
model="llama3.1:8b-instruct-q8_0",
|
289
|
+
messages=[
|
290
|
+
{"role": "user", "content": "Write a Python function"}
|
291
|
+
],
|
292
|
+
generation_config={
|
293
|
+
"temperature": 0.5,
|
294
|
+
"max_tokens": 1000,
|
295
|
+
"top_p": 0.95,
|
296
|
+
"seed": 42 # For reproducible outputs
|
297
|
+
}
|
298
|
+
)
|
299
|
+
|
300
|
+
With system prompt and conversation memory::
|
301
|
+
|
302
|
+
result = agent.run(
|
303
|
+
provider="anthropic",
|
304
|
+
model="claude-3-sonnet-20240229",
|
305
|
+
system_prompt="You are a helpful coding assistant.",
|
306
|
+
messages=[
|
307
|
+
{"role": "user", "content": "Help me optimize this code"}
|
308
|
+
],
|
309
|
+
conversation_id="coding-session-123",
|
310
|
+
memory_config={
|
311
|
+
"type": "buffer", # or "summary", "buffer_window"
|
312
|
+
"max_tokens": 4000,
|
313
|
+
"persistence": "memory" # or "disk", "database"
|
314
|
+
}
|
315
|
+
)
|
316
|
+
|
317
|
+
With tool calling::
|
318
|
+
|
319
|
+
result = agent.run(
|
320
|
+
provider="openai",
|
321
|
+
model="gpt-4-turbo",
|
322
|
+
messages=[
|
323
|
+
{"role": "user", "content": "Get the weather in NYC"}
|
324
|
+
],
|
325
|
+
tools=[
|
326
|
+
{
|
327
|
+
"type": "function",
|
328
|
+
"function": {
|
329
|
+
"name": "get_weather",
|
330
|
+
"description": "Get weather for a location",
|
331
|
+
"parameters": {
|
332
|
+
"type": "object",
|
333
|
+
"properties": {
|
334
|
+
"location": {"type": "string"},
|
335
|
+
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
|
336
|
+
},
|
337
|
+
"required": ["location"]
|
338
|
+
}
|
339
|
+
}
|
340
|
+
}
|
341
|
+
],
|
342
|
+
generation_config={
|
343
|
+
"temperature": 0, # Use 0 for tool calling
|
344
|
+
"tool_choice": "auto" # or "none", {"type": "function", "function": {"name": "get_weather"}}
|
345
|
+
}
|
346
|
+
)
|
347
|
+
|
348
|
+
With RAG (Retrieval Augmented Generation)::
|
349
|
+
|
350
|
+
result = agent.run(
|
351
|
+
provider="openai",
|
352
|
+
model="gpt-4",
|
353
|
+
messages=[
|
354
|
+
{"role": "user", "content": "What is our refund policy?"}
|
355
|
+
],
|
356
|
+
rag_config={
|
357
|
+
"enabled": True,
|
358
|
+
"top_k": 5, # Number of documents to retrieve
|
359
|
+
"similarity_threshold": 0.7, # Minimum similarity score
|
360
|
+
"embeddings": {
|
361
|
+
"model": "text-embedding-ada-002",
|
362
|
+
"dimension": 1536
|
363
|
+
},
|
364
|
+
"reranking": {
|
365
|
+
"enabled": True,
|
366
|
+
"model": "cross-encoder/ms-marco-MiniLM-L-12-v2"
|
367
|
+
}
|
368
|
+
}
|
369
|
+
)
|
370
|
+
|
371
|
+
With MCP (Model Context Protocol) integration::
|
372
|
+
|
373
|
+
result = agent.run(
|
374
|
+
provider="anthropic",
|
375
|
+
model="claude-3-opus-20240229",
|
376
|
+
messages=[
|
377
|
+
{"role": "user", "content": "Analyze the sales data"}
|
378
|
+
],
|
379
|
+
mcp_servers=[
|
380
|
+
{
|
381
|
+
"name": "data-server",
|
382
|
+
"transport": "stdio",
|
383
|
+
"command": "python",
|
384
|
+
"args": ["-m", "mcp_data_server"],
|
385
|
+
"env": {"API_KEY": "secret"}
|
386
|
+
}
|
387
|
+
],
|
388
|
+
mcp_context=[
|
389
|
+
"data://sales/2024/q4",
|
390
|
+
"data://customers/segments",
|
391
|
+
"resource://templates/analysis"
|
392
|
+
]
|
393
|
+
)
|
394
|
+
|
395
|
+
Advanced configuration with all features::
|
396
|
+
|
397
|
+
result = agent.run(
|
398
|
+
provider="openai",
|
399
|
+
model="gpt-4-turbo",
|
400
|
+
messages=[
|
401
|
+
{"role": "user", "content": "Complex analysis request"}
|
402
|
+
],
|
403
|
+
system_prompt="You are an expert data analyst.",
|
404
|
+
conversation_id="analysis-session-456",
|
405
|
+
memory_config={
|
406
|
+
"type": "buffer_window",
|
407
|
+
"max_tokens": 3000,
|
408
|
+
"window_size": 10 # Keep last 10 exchanges
|
409
|
+
},
|
410
|
+
tools=[...], # Tool definitions
|
411
|
+
rag_config={
|
412
|
+
"enabled": True,
|
413
|
+
"top_k": 3,
|
414
|
+
"similarity_threshold": 0.8
|
415
|
+
},
|
416
|
+
mcp_servers=[...], # MCP server configs
|
417
|
+
mcp_context=["data://reports/*"],
|
418
|
+
generation_config={
|
419
|
+
"temperature": 0.7,
|
420
|
+
"max_tokens": 2000,
|
421
|
+
"top_p": 0.9,
|
422
|
+
"frequency_penalty": 0.1,
|
423
|
+
"presence_penalty": 0.1,
|
424
|
+
"stop": ["\\n\\n", "END"], # Stop sequences
|
425
|
+
"logit_bias": {123: -100} # Token biases
|
426
|
+
},
|
427
|
+
streaming=False,
|
428
|
+
timeout=120,
|
429
|
+
max_retries=3
|
430
|
+
)
|
431
|
+
|
432
|
+
Error handling::
|
433
|
+
|
434
|
+
result = agent.run(
|
435
|
+
provider="openai",
|
436
|
+
model="gpt-4",
|
437
|
+
messages=[{"role": "user", "content": "Hello"}]
|
438
|
+
)
|
439
|
+
|
440
|
+
if result["success"]:
|
441
|
+
print(f"Response: {result['response']['content']}")
|
442
|
+
print(f"Tokens used: {result['usage']['total_tokens']}")
|
443
|
+
print(f"Estimated cost: ${result['usage']['estimated_cost_usd']}")
|
444
|
+
else:
|
445
|
+
print(f"Error: {result['error']}")
|
446
|
+
print(f"Type: {result['error_type']}")
|
447
|
+
for suggestion in result['recovery_suggestions']:
|
448
|
+
print(f"- {suggestion}")
|
449
|
+
"""
|
450
|
+
provider = kwargs["provider"]
|
451
|
+
model = kwargs["model"]
|
452
|
+
messages = kwargs["messages"]
|
453
|
+
system_prompt = kwargs.get("system_prompt")
|
454
|
+
tools = kwargs.get("tools", [])
|
455
|
+
conversation_id = kwargs.get("conversation_id")
|
456
|
+
memory_config = kwargs.get("memory_config", {})
|
457
|
+
mcp_servers = kwargs.get("mcp_servers", [])
|
458
|
+
mcp_context = kwargs.get("mcp_context", [])
|
459
|
+
rag_config = kwargs.get("rag_config", {})
|
460
|
+
generation_config = kwargs.get("generation_config", {})
|
461
|
+
streaming = kwargs.get("streaming", False)
|
462
|
+
timeout = kwargs.get("timeout", 120)
|
463
|
+
max_retries = kwargs.get("max_retries", 3)
|
464
|
+
|
465
|
+
try:
|
466
|
+
# Import LangChain and related libraries (graceful fallback)
|
467
|
+
langchain_available = self._check_langchain_availability()
|
468
|
+
|
469
|
+
# Load conversation memory if configured
|
470
|
+
conversation_memory = self._load_conversation_memory(
|
471
|
+
conversation_id, memory_config
|
472
|
+
)
|
473
|
+
|
474
|
+
# Retrieve MCP context if configured
|
475
|
+
mcp_context_data = self._retrieve_mcp_context(mcp_servers, mcp_context)
|
476
|
+
|
477
|
+
# Perform RAG retrieval if configured
|
478
|
+
rag_context = self._perform_rag_retrieval(
|
479
|
+
messages, rag_config, mcp_context_data
|
480
|
+
)
|
481
|
+
|
482
|
+
# Prepare conversation with context
|
483
|
+
enriched_messages = self._prepare_conversation(
|
484
|
+
messages,
|
485
|
+
system_prompt,
|
486
|
+
conversation_memory,
|
487
|
+
mcp_context_data,
|
488
|
+
rag_context,
|
489
|
+
)
|
490
|
+
|
491
|
+
# Generate response using selected provider
|
492
|
+
if provider == "mock":
|
493
|
+
response = self._mock_llm_response(
|
494
|
+
enriched_messages, tools, generation_config
|
495
|
+
)
|
496
|
+
elif langchain_available and provider in ["langchain"]:
|
497
|
+
response = self._langchain_llm_response(
|
498
|
+
provider,
|
499
|
+
model,
|
500
|
+
enriched_messages,
|
501
|
+
tools,
|
502
|
+
generation_config,
|
503
|
+
streaming,
|
504
|
+
timeout,
|
505
|
+
max_retries,
|
506
|
+
)
|
507
|
+
else:
|
508
|
+
# Use the new provider architecture
|
509
|
+
response = self._provider_llm_response(
|
510
|
+
provider, model, enriched_messages, tools, generation_config
|
511
|
+
)
|
512
|
+
|
513
|
+
# Update conversation memory
|
514
|
+
if conversation_id:
|
515
|
+
self._update_conversation_memory(
|
516
|
+
conversation_id, enriched_messages, response, memory_config
|
517
|
+
)
|
518
|
+
|
519
|
+
# Track usage and performance
|
520
|
+
usage_metrics = self._calculate_usage_metrics(
|
521
|
+
enriched_messages, response, model, provider
|
522
|
+
)
|
523
|
+
|
524
|
+
return {
|
525
|
+
"success": True,
|
526
|
+
"response": response,
|
527
|
+
"conversation_id": conversation_id,
|
528
|
+
"usage": usage_metrics,
|
529
|
+
"context": {
|
530
|
+
"mcp_resources_used": len(mcp_context_data),
|
531
|
+
"rag_documents_retrieved": len(rag_context.get("documents", [])),
|
532
|
+
"tools_available": len(tools),
|
533
|
+
"memory_tokens": conversation_memory.get("token_count", 0),
|
534
|
+
},
|
535
|
+
"metadata": {
|
536
|
+
"provider": provider,
|
537
|
+
"model": model,
|
538
|
+
"langchain_used": langchain_available,
|
539
|
+
"streaming": streaming,
|
540
|
+
"generation_config": generation_config,
|
541
|
+
},
|
542
|
+
}
|
543
|
+
|
544
|
+
except Exception as e:
|
545
|
+
return {
|
546
|
+
"success": False,
|
547
|
+
"error": str(e),
|
548
|
+
"error_type": type(e).__name__,
|
549
|
+
"provider": provider,
|
550
|
+
"model": model,
|
551
|
+
"conversation_id": conversation_id,
|
552
|
+
"recovery_suggestions": [
|
553
|
+
"Check API credentials and model availability",
|
554
|
+
"Verify MCP server connections",
|
555
|
+
"Reduce message length if hitting token limits",
|
556
|
+
"Check tool definitions for syntax errors",
|
557
|
+
],
|
558
|
+
}
|
559
|
+
|
560
|
+
def _check_langchain_availability(self) -> bool:
|
561
|
+
"""Check if LangChain and related libraries are available."""
|
562
|
+
try:
|
563
|
+
import importlib.util
|
564
|
+
|
565
|
+
langchain_spec = importlib.util.find_spec("langchain")
|
566
|
+
langchain_anthropic_spec = importlib.util.find_spec("langchain_anthropic")
|
567
|
+
langchain_openai_spec = importlib.util.find_spec("langchain_openai")
|
568
|
+
|
569
|
+
return (
|
570
|
+
langchain_spec is not None
|
571
|
+
and langchain_anthropic_spec is not None
|
572
|
+
and langchain_openai_spec is not None
|
573
|
+
)
|
574
|
+
except ImportError:
|
575
|
+
return False
|
576
|
+
|
577
|
+
def _load_conversation_memory(
|
578
|
+
self, conversation_id: Optional[str], memory_config: dict
|
579
|
+
) -> Dict[str, Any]:
|
580
|
+
"""
|
581
|
+
Load conversation memory for persistent conversations.
|
582
|
+
|
583
|
+
This method manages conversation history across multiple interactions,
|
584
|
+
allowing the agent to maintain context over time.
|
585
|
+
|
586
|
+
Args:
|
587
|
+
conversation_id (Optional[str]): Unique identifier for the conversation.
|
588
|
+
If None, no memory is loaded.
|
589
|
+
memory_config (dict): Configuration for memory management with options:
|
590
|
+
type (str): Memory type - "buffer", "summary", "buffer_window"
|
591
|
+
- "buffer": Keep full conversation history
|
592
|
+
- "summary": Summarize older messages
|
593
|
+
- "buffer_window": Keep only recent N exchanges
|
594
|
+
max_tokens (int): Maximum tokens to store (default: 4000)
|
595
|
+
persistence (str): Storage type - "memory", "disk", "database"
|
596
|
+
window_size (int): For buffer_window, number of exchanges to keep
|
597
|
+
summary_method (str): For summary type - "abstractive", "extractive"
|
598
|
+
|
599
|
+
Returns:
|
600
|
+
Dict[str, Any]: Memory data containing:
|
601
|
+
conversation_id (str): The conversation identifier
|
602
|
+
type (str): Memory type being used
|
603
|
+
messages (List[Dict]): Previous conversation messages
|
604
|
+
token_count (int): Estimated tokens in memory
|
605
|
+
max_tokens (int): Maximum allowed tokens
|
606
|
+
loaded_from (str): Source of the memory data
|
607
|
+
|
608
|
+
Examples:
|
609
|
+
Buffer memory (keep everything)::
|
610
|
+
|
611
|
+
memory = self._load_conversation_memory(
|
612
|
+
"chat-123",
|
613
|
+
{"type": "buffer", "max_tokens": 4000}
|
614
|
+
)
|
615
|
+
|
616
|
+
Window memory (keep last 5 exchanges)::
|
617
|
+
|
618
|
+
memory = self._load_conversation_memory(
|
619
|
+
"chat-456",
|
620
|
+
{
|
621
|
+
"type": "buffer_window",
|
622
|
+
"window_size": 5,
|
623
|
+
"max_tokens": 2000
|
624
|
+
}
|
625
|
+
)
|
626
|
+
|
627
|
+
Summary memory (summarize old content)::
|
628
|
+
|
629
|
+
memory = self._load_conversation_memory(
|
630
|
+
"chat-789",
|
631
|
+
{
|
632
|
+
"type": "summary",
|
633
|
+
"max_tokens": 1000,
|
634
|
+
"summary_method": "abstractive"
|
635
|
+
}
|
636
|
+
)
|
637
|
+
"""
|
638
|
+
if not conversation_id:
|
639
|
+
return {"messages": [], "token_count": 0}
|
640
|
+
|
641
|
+
# Mock memory implementation (in real implementation, use persistent storage)
|
642
|
+
memory_type = memory_config.get("type", "buffer")
|
643
|
+
max_tokens = memory_config.get("max_tokens", 4000)
|
644
|
+
|
645
|
+
# Simulate loading conversation history
|
646
|
+
mock_history = [
|
647
|
+
{
|
648
|
+
"role": "user",
|
649
|
+
"content": "Previous conversation context...",
|
650
|
+
"timestamp": "2025-06-01T10:00:00Z",
|
651
|
+
},
|
652
|
+
{
|
653
|
+
"role": "assistant",
|
654
|
+
"content": "Previous response context...",
|
655
|
+
"timestamp": "2025-06-01T10:00:30Z",
|
656
|
+
},
|
657
|
+
]
|
658
|
+
|
659
|
+
return {
|
660
|
+
"conversation_id": conversation_id,
|
661
|
+
"type": memory_type,
|
662
|
+
"messages": mock_history,
|
663
|
+
"token_count": 150, # Mock token count
|
664
|
+
"max_tokens": max_tokens,
|
665
|
+
"loaded_from": "mock_storage",
|
666
|
+
}
|
667
|
+
|
668
|
+
def _retrieve_mcp_context(
|
669
|
+
self, mcp_servers: List[dict], mcp_context: List[str]
|
670
|
+
) -> List[Dict[str, Any]]:
|
671
|
+
"""
|
672
|
+
Retrieve context from Model Context Protocol (MCP) servers.
|
673
|
+
|
674
|
+
MCP enables standardized context sharing between AI models and tools.
|
675
|
+
This method connects to MCP servers and retrieves relevant context.
|
676
|
+
|
677
|
+
Args:
|
678
|
+
mcp_servers (List[dict]): MCP server configurations, each containing:
|
679
|
+
name (str): Server identifier
|
680
|
+
transport (str): Transport type - "stdio", "http", "sse"
|
681
|
+
command (str): Command to launch stdio server
|
682
|
+
args (List[str]): Command arguments
|
683
|
+
env (Dict[str, str]): Environment variables
|
684
|
+
url (str): For HTTP/SSE transports
|
685
|
+
headers (Dict[str, str]): HTTP headers for auth
|
686
|
+
mcp_context (List[str]): Resource URIs to retrieve:
|
687
|
+
- "data://path/to/resource": Data resources
|
688
|
+
- "file://path/to/file": File resources
|
689
|
+
- "resource://type/name": Named resources
|
690
|
+
- "prompt://template/name": Prompt templates
|
691
|
+
|
692
|
+
Returns:
|
693
|
+
List[Dict[str, Any]]: Retrieved context items, each containing:
|
694
|
+
uri (str): Resource URI
|
695
|
+
content (str): Resource content
|
696
|
+
source (str): Server that provided the resource
|
697
|
+
retrieved_at (str): ISO timestamp of retrieval
|
698
|
+
relevance_score (float): Relevance score (0-1)
|
699
|
+
metadata (Dict): Additional resource metadata
|
700
|
+
|
701
|
+
Examples:
|
702
|
+
Connect to stdio MCP server::
|
703
|
+
|
704
|
+
context = self._retrieve_mcp_context(
|
705
|
+
mcp_servers=[{
|
706
|
+
"name": "data-server",
|
707
|
+
"transport": "stdio",
|
708
|
+
"command": "python",
|
709
|
+
"args": ["-m", "mcp_data_server"],
|
710
|
+
"env": {"API_KEY": "secret"}
|
711
|
+
}],
|
712
|
+
mcp_context=["data://sales/2024/q4"]
|
713
|
+
)
|
714
|
+
|
715
|
+
Connect to HTTP MCP server::
|
716
|
+
|
717
|
+
context = self._retrieve_mcp_context(
|
718
|
+
mcp_servers=[{
|
719
|
+
"name": "api-server",
|
720
|
+
"transport": "http",
|
721
|
+
"url": "https://mcp.example.com",
|
722
|
+
"headers": {"Authorization": "Bearer token"}
|
723
|
+
}],
|
724
|
+
mcp_context=[
|
725
|
+
"resource://customers/segments",
|
726
|
+
"prompt://analysis/financial"
|
727
|
+
]
|
728
|
+
)
|
729
|
+
"""
|
730
|
+
if not (mcp_servers or mcp_context):
|
731
|
+
return []
|
732
|
+
|
733
|
+
context_data = []
|
734
|
+
|
735
|
+
# Mock MCP context retrieval
|
736
|
+
for uri in mcp_context:
|
737
|
+
context_data.append(
|
738
|
+
{
|
739
|
+
"uri": uri,
|
740
|
+
"content": f"Mock context content for {uri}",
|
741
|
+
"source": "mcp_server",
|
742
|
+
"retrieved_at": "2025-06-01T12:00:00Z",
|
743
|
+
"relevance_score": 0.85,
|
744
|
+
}
|
745
|
+
)
|
746
|
+
|
747
|
+
# Simulate server-based retrieval
|
748
|
+
for server_config in mcp_servers:
|
749
|
+
server_name = server_config.get("name", "unknown")
|
750
|
+
context_data.append(
|
751
|
+
{
|
752
|
+
"uri": f"mcp://{server_name}/auto-context",
|
753
|
+
"content": f"Auto-retrieved context from {server_name}",
|
754
|
+
"source": server_name,
|
755
|
+
"retrieved_at": "2025-06-01T12:00:00Z",
|
756
|
+
"relevance_score": 0.75,
|
757
|
+
}
|
758
|
+
)
|
759
|
+
|
760
|
+
return context_data
|
761
|
+
|
762
|
+
def _perform_rag_retrieval(
|
763
|
+
self, messages: List[dict], rag_config: dict, mcp_context: List[dict]
|
764
|
+
) -> Dict[str, Any]:
|
765
|
+
"""
|
766
|
+
Perform Retrieval Augmented Generation (RAG) to find relevant documents.
|
767
|
+
|
768
|
+
This method searches through a knowledge base to find documents relevant
|
769
|
+
to the user's query, which are then included as context for the LLM.
|
770
|
+
|
771
|
+
Args:
|
772
|
+
messages (List[dict]): Conversation messages to extract query from
|
773
|
+
rag_config (dict): RAG configuration options:
|
774
|
+
enabled (bool): Whether RAG is enabled
|
775
|
+
top_k (int): Number of documents to retrieve (default: 5)
|
776
|
+
similarity_threshold (float): Minimum similarity score (0-1)
|
777
|
+
embeddings (dict): Embedding model configuration:
|
778
|
+
model (str): Embedding model name
|
779
|
+
dimension (int): Embedding dimension
|
780
|
+
provider (str): "openai", "huggingface", "sentence-transformers"
|
781
|
+
reranking (dict): Reranking configuration:
|
782
|
+
enabled (bool): Whether to rerank results
|
783
|
+
model (str): Reranking model name
|
784
|
+
top_n (int): Number of results after reranking
|
785
|
+
vector_store (dict): Vector database configuration:
|
786
|
+
type (str): "faiss", "pinecone", "weaviate", "chroma"
|
787
|
+
index_name (str): Name of the index
|
788
|
+
namespace (str): Namespace within index
|
789
|
+
filters (dict): Metadata filters for search
|
790
|
+
hybrid_search (dict): Hybrid search configuration:
|
791
|
+
enabled (bool): Combine vector and keyword search
|
792
|
+
alpha (float): Weight for vector search (0-1)
|
793
|
+
mcp_context (List[dict]): MCP context to include in search
|
794
|
+
|
795
|
+
Returns:
|
796
|
+
Dict[str, Any]: RAG results containing:
|
797
|
+
query (str): Extracted search query
|
798
|
+
documents (List[Dict]): Retrieved documents with:
|
799
|
+
content (str): Document text
|
800
|
+
score (float): Relevance score
|
801
|
+
source (str): Document source
|
802
|
+
metadata (Dict): Document metadata
|
803
|
+
scores (List[float]): Just the scores for quick access
|
804
|
+
total_candidates (int): Total documents searched
|
805
|
+
threshold (float): Similarity threshold used
|
806
|
+
top_k (int): Number of results requested
|
807
|
+
search_time_ms (float): Search duration
|
808
|
+
|
809
|
+
Examples:
|
810
|
+
Basic RAG retrieval::
|
811
|
+
|
812
|
+
rag_result = self._perform_rag_retrieval(
|
813
|
+
messages=[{"role": "user", "content": "What is the refund policy?"}],
|
814
|
+
rag_config={
|
815
|
+
"enabled": True,
|
816
|
+
"top_k": 5,
|
817
|
+
"similarity_threshold": 0.7
|
818
|
+
},
|
819
|
+
mcp_context=[]
|
820
|
+
)
|
821
|
+
|
822
|
+
Advanced RAG with reranking::
|
823
|
+
|
824
|
+
rag_result = self._perform_rag_retrieval(
|
825
|
+
messages=[{"role": "user", "content": "Technical specifications"}],
|
826
|
+
rag_config={
|
827
|
+
"enabled": True,
|
828
|
+
"top_k": 10,
|
829
|
+
"similarity_threshold": 0.6,
|
830
|
+
"embeddings": {
|
831
|
+
"model": "text-embedding-ada-002",
|
832
|
+
"dimension": 1536,
|
833
|
+
"provider": "openai"
|
834
|
+
},
|
835
|
+
"reranking": {
|
836
|
+
"enabled": True,
|
837
|
+
"model": "cross-encoder/ms-marco-MiniLM-L-12-v2",
|
838
|
+
"top_n": 3
|
839
|
+
},
|
840
|
+
"vector_store": {
|
841
|
+
"type": "pinecone",
|
842
|
+
"index_name": "products",
|
843
|
+
"namespace": "technical-docs"
|
844
|
+
}
|
845
|
+
},
|
846
|
+
mcp_context=[]
|
847
|
+
)
|
848
|
+
|
849
|
+
Hybrid search with filters::
|
850
|
+
|
851
|
+
rag_result = self._perform_rag_retrieval(
|
852
|
+
messages=[{"role": "user", "content": "Python tutorials"}],
|
853
|
+
rag_config={
|
854
|
+
"enabled": True,
|
855
|
+
"top_k": 5,
|
856
|
+
"similarity_threshold": 0.7,
|
857
|
+
"filters": {
|
858
|
+
"category": "tutorial",
|
859
|
+
"language": "python",
|
860
|
+
"level": ["beginner", "intermediate"]
|
861
|
+
},
|
862
|
+
"hybrid_search": {
|
863
|
+
"enabled": True,
|
864
|
+
"alpha": 0.7 # 70% vector, 30% keyword
|
865
|
+
}
|
866
|
+
},
|
867
|
+
mcp_context=[]
|
868
|
+
)
|
869
|
+
"""
|
870
|
+
if not rag_config.get("enabled", False):
|
871
|
+
return {"documents": [], "scores": []}
|
872
|
+
|
873
|
+
# Extract query from the last user message
|
874
|
+
query = ""
|
875
|
+
for msg in reversed(messages):
|
876
|
+
if msg.get("role") == "user":
|
877
|
+
query = msg.get("content", "")
|
878
|
+
break
|
879
|
+
|
880
|
+
if not query:
|
881
|
+
return {"documents": [], "scores": []}
|
882
|
+
|
883
|
+
top_k = rag_config.get("top_k", 5)
|
884
|
+
threshold = rag_config.get("similarity_threshold", 0.7)
|
885
|
+
|
886
|
+
# Mock RAG retrieval
|
887
|
+
mock_documents = [
|
888
|
+
{
|
889
|
+
"content": f"Relevant document 1 for query: {query[:50]}...",
|
890
|
+
"score": 0.92,
|
891
|
+
"source": "knowledge_base",
|
892
|
+
"metadata": {"doc_id": "kb_001", "section": "overview"},
|
893
|
+
},
|
894
|
+
{
|
895
|
+
"content": f"Relevant document 2 for query: {query[:50]}...",
|
896
|
+
"score": 0.87,
|
897
|
+
"source": "documentation",
|
898
|
+
"metadata": {"doc_id": "doc_023", "section": "procedures"},
|
899
|
+
},
|
900
|
+
{
|
901
|
+
"content": f"Relevant document 3 for query: {query[:50]}...",
|
902
|
+
"score": 0.81,
|
903
|
+
"source": "mcp_resource",
|
904
|
+
"metadata": {"uri": "data://reports/latest.json"},
|
905
|
+
},
|
906
|
+
]
|
907
|
+
|
908
|
+
# Filter by threshold and limit by top_k
|
909
|
+
filtered_docs = [doc for doc in mock_documents if doc["score"] >= threshold][
|
910
|
+
:top_k
|
911
|
+
]
|
912
|
+
|
913
|
+
return {
|
914
|
+
"query": query,
|
915
|
+
"documents": filtered_docs,
|
916
|
+
"scores": [doc["score"] for doc in filtered_docs],
|
917
|
+
"total_candidates": len(mock_documents),
|
918
|
+
"threshold": threshold,
|
919
|
+
"top_k": top_k,
|
920
|
+
}
|
921
|
+
|
922
|
+
def _prepare_conversation(
|
923
|
+
self,
|
924
|
+
messages: List[dict],
|
925
|
+
system_prompt: Optional[str],
|
926
|
+
memory: dict,
|
927
|
+
mcp_context: List[dict],
|
928
|
+
rag_context: dict,
|
929
|
+
) -> List[dict]:
|
930
|
+
"""Prepare enriched conversation with all context."""
|
931
|
+
enriched_messages = []
|
932
|
+
|
933
|
+
# Add system prompt
|
934
|
+
if system_prompt:
|
935
|
+
enriched_messages.append({"role": "system", "content": system_prompt})
|
936
|
+
|
937
|
+
# Add conversation memory
|
938
|
+
if memory.get("messages"):
|
939
|
+
enriched_messages.extend(memory["messages"])
|
940
|
+
|
941
|
+
# Add MCP context as system messages
|
942
|
+
if mcp_context:
|
943
|
+
context_content = "=== MCP Context ===\n"
|
944
|
+
for ctx in mcp_context:
|
945
|
+
context_content += f"Resource: {ctx['uri']}\n{ctx['content']}\n\n"
|
946
|
+
|
947
|
+
enriched_messages.append({"role": "system", "content": context_content})
|
948
|
+
|
949
|
+
# Add RAG context
|
950
|
+
if rag_context.get("documents"):
|
951
|
+
rag_content = "=== Retrieved Documents ===\n"
|
952
|
+
for doc in rag_context["documents"]:
|
953
|
+
rag_content += (
|
954
|
+
f"Document (score: {doc['score']:.2f}): {doc['content']}\n\n"
|
955
|
+
)
|
956
|
+
|
957
|
+
enriched_messages.append({"role": "system", "content": rag_content})
|
958
|
+
|
959
|
+
# Add current conversation messages
|
960
|
+
enriched_messages.extend(messages)
|
961
|
+
|
962
|
+
return enriched_messages
|
963
|
+
|
964
|
+
def _mock_llm_response(
|
965
|
+
self, messages: List[dict], tools: List[dict], generation_config: dict
|
966
|
+
) -> Dict[str, Any]:
|
967
|
+
"""Generate mock LLM response for testing."""
|
968
|
+
last_user_message = ""
|
969
|
+
for msg in reversed(messages):
|
970
|
+
if msg.get("role") == "user":
|
971
|
+
last_user_message = msg.get("content", "")
|
972
|
+
break
|
973
|
+
|
974
|
+
# Generate contextual mock response
|
975
|
+
if "analyze" in last_user_message.lower():
|
976
|
+
response_content = "Based on the provided data and context, I can see several key patterns: 1) Customer engagement has increased by 15% this quarter, 2) Product A shows the highest conversion rate, and 3) There are opportunities for improvement in the onboarding process."
|
977
|
+
elif (
|
978
|
+
"create" in last_user_message.lower()
|
979
|
+
or "generate" in last_user_message.lower()
|
980
|
+
):
|
981
|
+
response_content = "I'll help you create that. Based on the requirements and available tools, I recommend a structured approach with the following steps..."
|
982
|
+
elif "?" in last_user_message:
|
983
|
+
response_content = f"Regarding your question about '{last_user_message[:50]}...', here's what I found from the available context and resources..."
|
984
|
+
else:
|
985
|
+
response_content = f"I understand you want me to work with: '{last_user_message[:100]}...'. Based on the context provided, I can help you achieve this goal."
|
986
|
+
|
987
|
+
# Simulate tool calls if tools are available
|
988
|
+
tool_calls = []
|
989
|
+
if tools and any(
|
990
|
+
keyword in last_user_message.lower()
|
991
|
+
for keyword in ["create", "send", "execute", "run"]
|
992
|
+
):
|
993
|
+
for tool in tools[:2]: # Limit to first 2 tools
|
994
|
+
tool_calls.append(
|
995
|
+
{
|
996
|
+
"id": f"call_{hash(tool['name']) % 10000}",
|
997
|
+
"type": "function",
|
998
|
+
"function": {
|
999
|
+
"name": tool["name"],
|
1000
|
+
"arguments": json.dumps({"mock": "arguments"}),
|
1001
|
+
},
|
1002
|
+
}
|
1003
|
+
)
|
1004
|
+
|
1005
|
+
return {
|
1006
|
+
"id": f"msg_{hash(last_user_message) % 100000}",
|
1007
|
+
"content": response_content,
|
1008
|
+
"role": "assistant",
|
1009
|
+
"model": "mock-model",
|
1010
|
+
"created": 1701234567,
|
1011
|
+
"tool_calls": tool_calls,
|
1012
|
+
"finish_reason": "stop" if not tool_calls else "tool_calls",
|
1013
|
+
"usage": {
|
1014
|
+
"prompt_tokens": len(
|
1015
|
+
" ".join(msg.get("content", "") for msg in messages)
|
1016
|
+
)
|
1017
|
+
// 4,
|
1018
|
+
"completion_tokens": len(response_content) // 4,
|
1019
|
+
"total_tokens": 0, # Will be calculated
|
1020
|
+
},
|
1021
|
+
}
|
1022
|
+
|
1023
|
+
def _langchain_llm_response(
|
1024
|
+
self,
|
1025
|
+
provider: str,
|
1026
|
+
model: str,
|
1027
|
+
messages: List[dict],
|
1028
|
+
tools: List[dict],
|
1029
|
+
generation_config: dict,
|
1030
|
+
streaming: bool,
|
1031
|
+
timeout: int,
|
1032
|
+
max_retries: int,
|
1033
|
+
) -> Dict[str, Any]:
|
1034
|
+
"""Generate LLM response using LangChain (mock implementation)."""
|
1035
|
+
# This would be the real LangChain integration
|
1036
|
+
return {
|
1037
|
+
"id": "langchain_response_123",
|
1038
|
+
"content": f"LangChain response using {provider} {model} with advanced agent capabilities",
|
1039
|
+
"role": "assistant",
|
1040
|
+
"model": model,
|
1041
|
+
"provider": provider,
|
1042
|
+
"langchain_used": True,
|
1043
|
+
"tool_calls": [],
|
1044
|
+
"finish_reason": "stop",
|
1045
|
+
"usage": {
|
1046
|
+
"prompt_tokens": 250,
|
1047
|
+
"completion_tokens": 75,
|
1048
|
+
"total_tokens": 325,
|
1049
|
+
},
|
1050
|
+
}
|
1051
|
+
|
1052
|
+
def _provider_llm_response(
|
1053
|
+
self,
|
1054
|
+
provider: str,
|
1055
|
+
model: str,
|
1056
|
+
messages: List[dict],
|
1057
|
+
tools: List[dict],
|
1058
|
+
generation_config: dict,
|
1059
|
+
) -> Dict[str, Any]:
|
1060
|
+
"""Generate LLM response using provider architecture."""
|
1061
|
+
try:
|
1062
|
+
from .ai_providers import get_provider
|
1063
|
+
|
1064
|
+
# Get the provider instance
|
1065
|
+
provider_instance = get_provider(provider)
|
1066
|
+
|
1067
|
+
# Check if provider is available
|
1068
|
+
if not provider_instance.is_available():
|
1069
|
+
raise RuntimeError(
|
1070
|
+
f"Provider {provider} is not available. Check dependencies and configuration."
|
1071
|
+
)
|
1072
|
+
|
1073
|
+
# Call the provider
|
1074
|
+
response = provider_instance.chat(
|
1075
|
+
messages=messages,
|
1076
|
+
model=model,
|
1077
|
+
generation_config=generation_config,
|
1078
|
+
tools=tools,
|
1079
|
+
)
|
1080
|
+
|
1081
|
+
# Ensure usage totals are calculated
|
1082
|
+
if "usage" in response:
|
1083
|
+
usage = response["usage"]
|
1084
|
+
if usage.get("total_tokens", 0) == 0:
|
1085
|
+
usage["total_tokens"] = usage.get("prompt_tokens", 0) + usage.get(
|
1086
|
+
"completion_tokens", 0
|
1087
|
+
)
|
1088
|
+
|
1089
|
+
return response
|
1090
|
+
|
1091
|
+
except ImportError:
|
1092
|
+
# Fallback to the original fallback method
|
1093
|
+
return self._fallback_llm_response(
|
1094
|
+
provider, model, messages, tools, generation_config
|
1095
|
+
)
|
1096
|
+
except Exception as e:
|
1097
|
+
# Re-raise provider errors with context
|
1098
|
+
raise RuntimeError(f"Provider {provider} error: {str(e)}") from e
|
1099
|
+
|
1100
|
+
def _fallback_llm_response(
|
1101
|
+
self,
|
1102
|
+
provider: str,
|
1103
|
+
model: str,
|
1104
|
+
messages: List[dict],
|
1105
|
+
tools: List[dict],
|
1106
|
+
generation_config: dict,
|
1107
|
+
) -> Dict[str, Any]:
|
1108
|
+
"""Generate LLM response using direct API calls (mock implementation)."""
|
1109
|
+
return {
|
1110
|
+
"id": "fallback_response_456",
|
1111
|
+
"content": f"Direct API response from {provider} {model}",
|
1112
|
+
"role": "assistant",
|
1113
|
+
"model": model,
|
1114
|
+
"provider": provider,
|
1115
|
+
"langchain_used": False,
|
1116
|
+
"tool_calls": [],
|
1117
|
+
"finish_reason": "stop",
|
1118
|
+
"usage": {
|
1119
|
+
"prompt_tokens": 200,
|
1120
|
+
"completion_tokens": 50,
|
1121
|
+
"total_tokens": 250,
|
1122
|
+
},
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
def _update_conversation_memory(
|
1126
|
+
self,
|
1127
|
+
conversation_id: str,
|
1128
|
+
messages: List[dict],
|
1129
|
+
response: dict,
|
1130
|
+
memory_config: dict,
|
1131
|
+
) -> None:
|
1132
|
+
"""Update conversation memory with new exchange."""
|
1133
|
+
# Mock memory update (in real implementation, persist to storage)
|
1134
|
+
pass
|
1135
|
+
|
1136
|
+
def _calculate_usage_metrics(
|
1137
|
+
self, messages: List[dict], response: dict, model: str, provider: str
|
1138
|
+
) -> Dict[str, Any]:
|
1139
|
+
"""Calculate token usage and cost metrics."""
|
1140
|
+
usage = response.get("usage", {})
|
1141
|
+
prompt_tokens = usage.get("prompt_tokens", 0)
|
1142
|
+
completion_tokens = usage.get("completion_tokens", 0)
|
1143
|
+
total_tokens = prompt_tokens + completion_tokens
|
1144
|
+
|
1145
|
+
# Mock cost calculation (real implementation would use current pricing)
|
1146
|
+
mock_costs = {
|
1147
|
+
"gpt-4": {"input": 0.03, "output": 0.06},
|
1148
|
+
"gpt-3.5-turbo": {"input": 0.001, "output": 0.002},
|
1149
|
+
"claude-3-sonnet": {"input": 0.003, "output": 0.015},
|
1150
|
+
"claude-3-haiku": {"input": 0.00025, "output": 0.00125},
|
1151
|
+
}
|
1152
|
+
|
1153
|
+
cost_per_1k = mock_costs.get(model, {"input": 0.001, "output": 0.002})
|
1154
|
+
estimated_cost = (prompt_tokens / 1000) * cost_per_1k["input"] + (
|
1155
|
+
completion_tokens / 1000
|
1156
|
+
) * cost_per_1k["output"]
|
1157
|
+
|
1158
|
+
return {
|
1159
|
+
"prompt_tokens": prompt_tokens,
|
1160
|
+
"completion_tokens": completion_tokens,
|
1161
|
+
"total_tokens": total_tokens,
|
1162
|
+
"estimated_cost_usd": round(estimated_cost, 6),
|
1163
|
+
"model": model,
|
1164
|
+
"provider": provider,
|
1165
|
+
"efficiency_score": completion_tokens / max(total_tokens, 1),
|
1166
|
+
}
|