letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904104046__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +10 -14
- letta/agents/base_agent.py +18 -0
- letta/agents/helpers.py +32 -7
- letta/agents/letta_agent.py +953 -762
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +0 -1
- letta/constants.py +11 -8
- letta/errors.py +9 -0
- letta/functions/function_sets/base.py +77 -69
- letta/functions/function_sets/builtin.py +41 -22
- letta/functions/function_sets/multi_agent.py +1 -2
- letta/functions/schema_generator.py +0 -1
- letta/helpers/converters.py +8 -3
- letta/helpers/datetime_helpers.py +5 -4
- letta/helpers/message_helper.py +1 -2
- letta/helpers/pinecone_utils.py +0 -1
- letta/helpers/tool_rule_solver.py +10 -0
- letta/helpers/tpuf_client.py +848 -0
- letta/interface.py +8 -8
- letta/interfaces/anthropic_streaming_interface.py +7 -0
- letta/interfaces/openai_streaming_interface.py +29 -6
- letta/llm_api/anthropic_client.py +188 -18
- letta/llm_api/azure_client.py +0 -1
- letta/llm_api/bedrock_client.py +1 -2
- letta/llm_api/deepseek_client.py +319 -5
- letta/llm_api/google_vertex_client.py +75 -17
- letta/llm_api/groq_client.py +0 -1
- letta/llm_api/helpers.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -50
- letta/llm_api/llm_client.py +6 -8
- letta/llm_api/mistral.py +1 -1
- letta/llm_api/openai.py +16 -13
- letta/llm_api/openai_client.py +31 -16
- letta/llm_api/together_client.py +0 -1
- letta/llm_api/xai_client.py +0 -1
- letta/local_llm/chat_completion_proxy.py +7 -6
- letta/local_llm/settings/settings.py +1 -1
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +8 -6
- letta/orm/archive.py +9 -1
- letta/orm/block.py +3 -4
- letta/orm/block_history.py +3 -1
- letta/orm/group.py +2 -3
- letta/orm/identity.py +1 -2
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -2
- letta/orm/message.py +8 -4
- letta/orm/mixins.py +18 -0
- letta/orm/organization.py +2 -0
- letta/orm/passage.py +8 -1
- letta/orm/passage_tag.py +55 -0
- letta/orm/sandbox_config.py +1 -3
- letta/orm/step.py +1 -2
- letta/orm/tool.py +1 -0
- letta/otel/resource.py +2 -2
- letta/plugins/plugins.py +1 -1
- letta/prompts/prompt_generator.py +10 -2
- letta/schemas/agent.py +11 -0
- letta/schemas/archive.py +4 -0
- letta/schemas/block.py +13 -0
- letta/schemas/embedding_config.py +0 -1
- letta/schemas/enums.py +24 -7
- letta/schemas/group.py +12 -0
- letta/schemas/letta_message.py +55 -1
- letta/schemas/letta_message_content.py +28 -0
- letta/schemas/letta_request.py +21 -4
- letta/schemas/letta_stop_reason.py +9 -1
- letta/schemas/llm_config.py +24 -8
- letta/schemas/mcp.py +0 -3
- letta/schemas/memory.py +14 -0
- letta/schemas/message.py +245 -141
- letta/schemas/openai/chat_completion_request.py +2 -1
- letta/schemas/passage.py +1 -0
- letta/schemas/providers/bedrock.py +1 -1
- letta/schemas/providers/openai.py +2 -2
- letta/schemas/tool.py +11 -5
- letta/schemas/tool_execution_result.py +0 -1
- letta/schemas/tool_rule.py +71 -0
- letta/serialize_schemas/marshmallow_agent.py +1 -2
- letta/server/rest_api/app.py +3 -3
- letta/server/rest_api/auth/index.py +0 -1
- letta/server/rest_api/interface.py +3 -11
- letta/server/rest_api/redis_stream_manager.py +3 -4
- letta/server/rest_api/routers/v1/agents.py +143 -84
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/folders.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +23 -22
- letta/server/rest_api/routers/v1/internal_templates.py +68 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
- letta/server/rest_api/routers/v1/sources.py +1 -1
- letta/server/rest_api/routers/v1/tools.py +167 -15
- letta/server/rest_api/streaming_response.py +4 -3
- letta/server/rest_api/utils.py +75 -18
- letta/server/server.py +24 -35
- letta/services/agent_manager.py +359 -45
- letta/services/agent_serialization_manager.py +23 -3
- letta/services/archive_manager.py +72 -3
- letta/services/block_manager.py +1 -2
- letta/services/context_window_calculator/token_counter.py +11 -6
- letta/services/file_manager.py +1 -3
- letta/services/files_agents_manager.py +2 -4
- letta/services/group_manager.py +73 -12
- letta/services/helpers/agent_manager_helper.py +5 -5
- letta/services/identity_manager.py +8 -3
- letta/services/job_manager.py +2 -14
- letta/services/llm_batch_manager.py +1 -3
- letta/services/mcp/base_client.py +1 -2
- letta/services/mcp_manager.py +5 -6
- letta/services/message_manager.py +536 -15
- letta/services/organization_manager.py +1 -2
- letta/services/passage_manager.py +287 -12
- letta/services/provider_manager.py +1 -3
- letta/services/sandbox_config_manager.py +12 -7
- letta/services/source_manager.py +1 -2
- letta/services/step_manager.py +0 -1
- letta/services/summarizer/summarizer.py +4 -2
- letta/services/telemetry_manager.py +1 -3
- letta/services/tool_executor/builtin_tool_executor.py +136 -316
- letta/services/tool_executor/core_tool_executor.py +231 -74
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/mcp_tool_executor.py +0 -1
- letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
- letta/services/tool_executor/sandbox_tool_executor.py +0 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -3
- letta/services/tool_manager.py +181 -64
- letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
- letta/services/user_manager.py +1 -2
- letta/settings.py +5 -3
- letta/streaming_interface.py +3 -3
- letta/system.py +1 -1
- letta/utils.py +0 -1
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/METADATA +11 -7
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/RECORD +137 -135
- letta/llm_api/deepseek.py +0 -303
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,7 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
|
-
import os
|
4
|
-
import time
|
5
3
|
from typing import Any, Dict, List, Literal, Optional
|
6
4
|
|
7
|
-
from pydantic import BaseModel
|
8
|
-
|
9
|
-
from letta.constants import WEB_SEARCH_MODEL_ENV_VAR_DEFAULT_VALUE, WEB_SEARCH_MODEL_ENV_VAR_NAME
|
10
|
-
from letta.functions.prompts import FIRECRAWL_SEARCH_SYSTEM_PROMPT, get_firecrawl_search_user_prompt
|
11
|
-
from letta.functions.types import SearchTask
|
12
5
|
from letta.log import get_logger
|
13
6
|
from letta.otel.tracing import trace_method
|
14
7
|
from letta.schemas.agent import AgentState
|
@@ -17,36 +10,11 @@ from letta.schemas.tool import Tool
|
|
17
10
|
from letta.schemas.tool_execution_result import ToolExecutionResult
|
18
11
|
from letta.schemas.user import User
|
19
12
|
from letta.services.tool_executor.tool_executor_base import ToolExecutor
|
20
|
-
from letta.settings import
|
13
|
+
from letta.settings import tool_settings
|
21
14
|
|
22
15
|
logger = get_logger(__name__)
|
23
16
|
|
24
17
|
|
25
|
-
class Citation(BaseModel):
|
26
|
-
"""A relevant text snippet identified by line numbers in a document."""
|
27
|
-
|
28
|
-
start_line: int # Starting line number (1-indexed)
|
29
|
-
end_line: int # Ending line number (1-indexed, inclusive)
|
30
|
-
|
31
|
-
|
32
|
-
class CitationWithText(BaseModel):
|
33
|
-
"""A citation with the actual extracted text."""
|
34
|
-
|
35
|
-
text: str # The actual extracted text from the lines
|
36
|
-
|
37
|
-
|
38
|
-
class DocumentAnalysis(BaseModel):
|
39
|
-
"""Analysis of a document's relevance to a search question."""
|
40
|
-
|
41
|
-
citations: List[Citation]
|
42
|
-
|
43
|
-
|
44
|
-
class DocumentAnalysisWithText(BaseModel):
|
45
|
-
"""Analysis with extracted text from line citations."""
|
46
|
-
|
47
|
-
citations: List[CitationWithText]
|
48
|
-
|
49
|
-
|
50
18
|
class LettaBuiltinToolExecutor(ToolExecutor):
|
51
19
|
"""Executor for built in Letta tools."""
|
52
20
|
|
@@ -61,7 +29,7 @@ class LettaBuiltinToolExecutor(ToolExecutor):
|
|
61
29
|
sandbox_config: Optional[SandboxConfig] = None,
|
62
30
|
sandbox_env_vars: Optional[Dict[str, Any]] = None,
|
63
31
|
) -> ToolExecutionResult:
|
64
|
-
function_map = {"run_code": self.run_code, "web_search": self.web_search}
|
32
|
+
function_map = {"run_code": self.run_code, "web_search": self.web_search, "fetch_webpage": self.fetch_webpage}
|
65
33
|
|
66
34
|
if function_name not in function_map:
|
67
35
|
raise ValueError(f"Unknown function: {function_name}")
|
@@ -105,314 +73,166 @@ class LettaBuiltinToolExecutor(ToolExecutor):
|
|
105
73
|
return out
|
106
74
|
|
107
75
|
@trace_method
|
108
|
-
async def web_search(
|
76
|
+
async def web_search(
|
77
|
+
self,
|
78
|
+
agent_state: "AgentState",
|
79
|
+
query: str,
|
80
|
+
num_results: int = 10,
|
81
|
+
category: Optional[
|
82
|
+
Literal["company", "research paper", "news", "pdf", "github", "tweet", "personal site", "linkedin profile", "financial report"]
|
83
|
+
] = None,
|
84
|
+
include_text: bool = False,
|
85
|
+
include_domains: Optional[List[str]] = None,
|
86
|
+
exclude_domains: Optional[List[str]] = None,
|
87
|
+
start_published_date: Optional[str] = None,
|
88
|
+
end_published_date: Optional[str] = None,
|
89
|
+
user_location: Optional[str] = None,
|
90
|
+
) -> str:
|
109
91
|
"""
|
110
|
-
Search the web
|
111
|
-
|
112
|
-
Examples:
|
113
|
-
tasks -> [
|
114
|
-
SearchTask(
|
115
|
-
query="Tesla Q1 2025 earnings report PDF",
|
116
|
-
question="What was Tesla's net profit in Q1 2025?"
|
117
|
-
),
|
118
|
-
SearchTask(
|
119
|
-
query="Letta API prebuilt tools core_memory_append",
|
120
|
-
question="What does the core_memory_append tool do in Letta?"
|
121
|
-
)
|
122
|
-
]
|
92
|
+
Search the web using Exa's AI-powered search engine and retrieve relevant content.
|
123
93
|
|
124
94
|
Args:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
95
|
+
query: The search query to find relevant web content
|
96
|
+
num_results: Number of results to return (1-100)
|
97
|
+
category: Focus search on specific content types
|
98
|
+
include_text: Whether to retrieve full page content (default: False, only returns summary and highlights)
|
99
|
+
include_domains: List of domains to include in search results
|
100
|
+
exclude_domains: List of domains to exclude from search results
|
101
|
+
start_published_date: Only return content published after this date (ISO format)
|
102
|
+
end_published_date: Only return content published before this date (ISO format)
|
103
|
+
user_location: Two-letter country code for localized results
|
129
104
|
|
130
105
|
Returns:
|
131
|
-
|
132
|
-
Each result includes ranked snippets with their source URLs and relevance scores,
|
133
|
-
corresponding to each search task.
|
106
|
+
JSON-encoded string containing search results
|
134
107
|
"""
|
135
|
-
# # TODO: Temporary, maybe deprecate this field?
|
136
|
-
# if return_raw:
|
137
|
-
# logger.warning("WARNING! return_raw was set to True, we default to False always. Deprecate this field.")
|
138
|
-
# return_raw = False
|
139
108
|
try:
|
140
|
-
from
|
109
|
+
from exa_py import Exa
|
141
110
|
except ImportError:
|
142
|
-
raise ImportError("
|
143
|
-
|
144
|
-
if not tasks:
|
145
|
-
return json.dumps({"error": "No search tasks provided."})
|
146
|
-
|
147
|
-
# Convert dict objects to SearchTask objects
|
148
|
-
search_tasks = []
|
149
|
-
for task in tasks:
|
150
|
-
if isinstance(task, dict):
|
151
|
-
search_tasks.append(SearchTask(**task))
|
152
|
-
else:
|
153
|
-
search_tasks.append(task)
|
154
|
-
|
155
|
-
logger.info(f"[DEBUG] Starting web search with {len(search_tasks)} tasks, limit={limit}, return_raw={return_raw}")
|
156
|
-
|
157
|
-
# Check if the API key exists on the agent state
|
158
|
-
agent_state_tool_env_vars = agent_state.get_agent_env_vars_as_dict()
|
159
|
-
firecrawl_api_key = agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") or tool_settings.firecrawl_api_key
|
160
|
-
if not firecrawl_api_key:
|
161
|
-
raise ValueError("FIRECRAWL_API_KEY is not set in environment or on agent_state tool exec environment variables.")
|
162
|
-
|
163
|
-
# Track which API key source was used
|
164
|
-
api_key_source = "agent_environment" if agent_state_tool_env_vars.get("FIRECRAWL_API_KEY") else "system_settings"
|
165
|
-
|
166
|
-
if limit <= 0:
|
167
|
-
raise ValueError("limit must be greater than 0")
|
168
|
-
|
169
|
-
# Initialize Firecrawl client
|
170
|
-
app = AsyncFirecrawlApp(api_key=firecrawl_api_key)
|
171
|
-
|
172
|
-
# Process all search tasks serially
|
173
|
-
search_results = []
|
174
|
-
for task in search_tasks:
|
175
|
-
try:
|
176
|
-
result = await self._process_single_search_task(app, task, limit, return_raw, api_key_source, agent_state)
|
177
|
-
search_results.append(result)
|
178
|
-
except Exception as e:
|
179
|
-
search_results.append(e)
|
180
|
-
|
181
|
-
# Build final response as a mapping of query -> result
|
182
|
-
final_results = {}
|
183
|
-
successful_tasks = 0
|
184
|
-
failed_tasks = 0
|
185
|
-
|
186
|
-
for i, result in enumerate(search_results):
|
187
|
-
query = search_tasks[i].query
|
188
|
-
if isinstance(result, Exception):
|
189
|
-
logger.error(f"Search task {i} failed: {result}")
|
190
|
-
failed_tasks += 1
|
191
|
-
final_results[query] = {"query": query, "question": search_tasks[i].question, "error": str(result)}
|
192
|
-
else:
|
193
|
-
successful_tasks += 1
|
194
|
-
final_results[query] = result
|
195
|
-
|
196
|
-
logger.info(f"[DEBUG] Web search completed: {successful_tasks} successful, {failed_tasks} failed")
|
197
|
-
|
198
|
-
# Build final response with api_key_source at top level
|
199
|
-
response = {"api_key_source": api_key_source, "results": final_results}
|
200
|
-
|
201
|
-
return json.dumps(response, indent=2, ensure_ascii=False)
|
202
|
-
|
203
|
-
@trace_method
|
204
|
-
async def _process_single_search_task(
|
205
|
-
self, app: "AsyncFirecrawlApp", task: SearchTask, limit: int, return_raw: bool, api_key_source: str, agent_state: "AgentState"
|
206
|
-
) -> Dict[str, Any]:
|
207
|
-
"""Process a single search task."""
|
208
|
-
from firecrawl import ScrapeOptions
|
209
|
-
|
210
|
-
logger.info(f"[DEBUG] Starting Firecrawl search for query: '{task.query}' with limit={limit}")
|
111
|
+
raise ImportError("exa-py is not installed in the tool execution environment")
|
211
112
|
|
212
|
-
|
213
|
-
|
214
|
-
formats=["markdown"], excludeTags=["#ad", "#footer"], onlyMainContent=True, parsePDF=True, removeBase64Images=True
|
215
|
-
)
|
216
|
-
search_result = await app.search(task.query, limit=limit, scrape_options=scrape_options)
|
217
|
-
|
218
|
-
logger.info(
|
219
|
-
f"[DEBUG] Firecrawl search completed for '{task.query}': {len(search_result.get('data', [])) if search_result else 0} results"
|
220
|
-
)
|
113
|
+
if not query.strip():
|
114
|
+
return json.dumps({"error": "Query cannot be empty", "query": query})
|
221
115
|
|
222
|
-
|
223
|
-
return {"query": task.query, "question": task.question, "error": "No search results found."}
|
224
|
-
|
225
|
-
# If raw results requested, return them directly
|
226
|
-
if return_raw:
|
227
|
-
return {"query": task.query, "question": task.question, "raw_results": search_result}
|
228
|
-
|
229
|
-
# Check if OpenAI API key is available for semantic parsing
|
230
|
-
if model_settings.openai_api_key:
|
231
|
-
try:
|
232
|
-
from openai import AsyncOpenAI
|
233
|
-
|
234
|
-
logger.info(f"[DEBUG] Starting OpenAI analysis for '{task.query}'")
|
235
|
-
|
236
|
-
# Initialize OpenAI client
|
237
|
-
client = AsyncOpenAI(
|
238
|
-
api_key=model_settings.openai_api_key,
|
239
|
-
)
|
240
|
-
|
241
|
-
# Process each result with OpenAI concurrently
|
242
|
-
analysis_tasks = []
|
243
|
-
results_with_markdown = []
|
244
|
-
results_without_markdown = []
|
245
|
-
|
246
|
-
for result in search_result.get("data"):
|
247
|
-
if result.get("markdown"):
|
248
|
-
# Create async task for OpenAI analysis
|
249
|
-
analysis_task = self._analyze_document_with_openai(
|
250
|
-
client, result["markdown"], task.query, task.question, agent_state
|
251
|
-
)
|
252
|
-
analysis_tasks.append(analysis_task)
|
253
|
-
results_with_markdown.append(result)
|
254
|
-
else:
|
255
|
-
results_without_markdown.append(result)
|
256
|
-
|
257
|
-
logger.info(f"[DEBUG] Starting parallel OpenAI analysis of {len(analysis_tasks)} documents for '{task.query}'")
|
258
|
-
|
259
|
-
# Fire off all OpenAI requests concurrently
|
260
|
-
analyses = await asyncio.gather(*analysis_tasks, return_exceptions=True)
|
261
|
-
|
262
|
-
logger.info(f"[DEBUG] Completed parallel OpenAI analysis of {len(analyses)} documents for '{task.query}'")
|
263
|
-
|
264
|
-
# Build processed results
|
265
|
-
processed_results = []
|
266
|
-
|
267
|
-
# Check if any analysis failed - if so, fall back to raw results
|
268
|
-
for result, analysis in zip(results_with_markdown, analyses):
|
269
|
-
if isinstance(analysis, Exception) or analysis is None:
|
270
|
-
logger.error(f"Analysis failed for {result.get('url')}, falling back to raw results")
|
271
|
-
return {"query": task.query, "question": task.question, "raw_results": search_result}
|
272
|
-
|
273
|
-
# All analyses succeeded, build processed results
|
274
|
-
for result, analysis in zip(results_with_markdown, analyses):
|
275
|
-
# Extract actual text from line number citations
|
276
|
-
analysis_with_text = None
|
277
|
-
if analysis and analysis.citations:
|
278
|
-
analysis_with_text = self._extract_text_from_line_citations(analysis, result["markdown"])
|
279
|
-
|
280
|
-
processed_results.append(
|
281
|
-
{
|
282
|
-
"url": result.get("url"),
|
283
|
-
"title": result.get("title"),
|
284
|
-
"description": result.get("description"),
|
285
|
-
"analysis": analysis_with_text.model_dump() if analysis_with_text else None,
|
286
|
-
}
|
287
|
-
)
|
288
|
-
|
289
|
-
# Add results without markdown
|
290
|
-
for result in results_without_markdown:
|
291
|
-
processed_results.append(
|
292
|
-
{"url": result.get("url"), "title": result.get("title"), "description": result.get("description"), "analysis": None}
|
293
|
-
)
|
294
|
-
|
295
|
-
# Build final response for this task
|
296
|
-
return self._build_final_response_dict(processed_results, task.query, task.question)
|
297
|
-
except Exception as e:
|
298
|
-
# Log error but continue with raw results
|
299
|
-
logger.error(f"Error with OpenAI processing for task '{task.query}': {e}")
|
300
|
-
|
301
|
-
# Return raw search results if OpenAI processing isn't available or fails
|
302
|
-
return {"query": task.query, "question": task.question, "raw_results": search_result}
|
303
|
-
|
304
|
-
@trace_method
|
305
|
-
async def _analyze_document_with_openai(
|
306
|
-
self, client, markdown_content: str, query: str, question: str, agent_state: "AgentState"
|
307
|
-
) -> Optional[DocumentAnalysis]:
|
308
|
-
"""Use OpenAI to analyze a document and extract relevant passages using line numbers."""
|
309
|
-
original_length = len(markdown_content)
|
310
|
-
|
311
|
-
# Create numbered markdown for the LLM to reference
|
312
|
-
numbered_lines = markdown_content.split("\n")
|
313
|
-
numbered_markdown = "\n".join([f"{i+1:4d}: {line}" for i, line in enumerate(numbered_lines)])
|
314
|
-
|
315
|
-
# Truncate if too long
|
316
|
-
max_content_length = 200000
|
317
|
-
truncated = False
|
318
|
-
if len(numbered_markdown) > max_content_length:
|
319
|
-
numbered_markdown = numbered_markdown[:max_content_length] + "..."
|
320
|
-
truncated = True
|
321
|
-
|
322
|
-
user_prompt = get_firecrawl_search_user_prompt(query, question, numbered_markdown)
|
323
|
-
|
324
|
-
logger.info(
|
325
|
-
f"[DEBUG] Starting OpenAI request with line numbers - Query: '{query}', Content: {original_length} chars (truncated: {truncated})"
|
326
|
-
)
|
327
|
-
|
328
|
-
# Time the OpenAI request
|
329
|
-
start_time = time.time()
|
330
|
-
|
331
|
-
# Check agent state env vars first, then fall back to os.getenv
|
116
|
+
# Get EXA API key from agent environment or tool settings
|
332
117
|
agent_state_tool_env_vars = agent_state.get_agent_env_vars_as_dict()
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
model=model,
|
337
|
-
messages=[{"role": "system", "content": FIRECRAWL_SEARCH_SYSTEM_PROMPT}, {"role": "user", "content": user_prompt}],
|
338
|
-
response_format=DocumentAnalysis,
|
339
|
-
temperature=0.1,
|
340
|
-
)
|
118
|
+
exa_api_key = agent_state_tool_env_vars.get("EXA_API_KEY") or tool_settings.exa_api_key
|
119
|
+
if not exa_api_key:
|
120
|
+
raise ValueError("EXA_API_KEY is not set in environment or on agent_state tool execution environment variables.")
|
341
121
|
|
342
|
-
|
343
|
-
request_duration = end_time - start_time
|
122
|
+
logger.info(f"[DEBUG] Starting Exa web search for query: '{query}' with {num_results} results")
|
344
123
|
|
345
|
-
#
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
output_length = 0
|
352
|
-
if parsed_result and parsed_result.citations:
|
353
|
-
for citation in parsed_result.citations:
|
354
|
-
output_length += 20 # ~20 chars for line numbers only
|
124
|
+
# Build search parameters
|
125
|
+
search_params = {
|
126
|
+
"query": query,
|
127
|
+
"num_results": min(max(num_results, 1), 100), # Clamp between 1-100
|
128
|
+
"type": "auto", # Always use auto search type
|
129
|
+
}
|
355
130
|
|
356
|
-
|
357
|
-
|
358
|
-
|
131
|
+
# Add optional parameters if provided
|
132
|
+
if category:
|
133
|
+
search_params["category"] = category
|
134
|
+
if include_domains:
|
135
|
+
search_params["include_domains"] = include_domains
|
136
|
+
if exclude_domains:
|
137
|
+
search_params["exclude_domains"] = exclude_domains
|
138
|
+
if start_published_date:
|
139
|
+
search_params["start_published_date"] = start_published_date
|
140
|
+
if end_published_date:
|
141
|
+
search_params["end_published_date"] = end_published_date
|
142
|
+
if user_location:
|
143
|
+
search_params["user_location"] = user_location
|
144
|
+
|
145
|
+
# Configure contents retrieval
|
146
|
+
contents_params = {
|
147
|
+
"text": include_text,
|
148
|
+
"highlights": {"num_sentences": 2, "highlights_per_url": 3, "query": query},
|
149
|
+
"summary": {"query": f"Summarize the key information from this content related to: {query}"},
|
150
|
+
}
|
359
151
|
|
360
|
-
|
152
|
+
def _sync_exa_search():
|
153
|
+
"""Synchronous Exa API call to run in thread pool."""
|
154
|
+
exa = Exa(api_key=exa_api_key)
|
155
|
+
return exa.search_and_contents(**search_params, **contents_params)
|
361
156
|
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
157
|
+
try:
|
158
|
+
# Perform search with content retrieval in thread pool to avoid blocking event loop
|
159
|
+
logger.info(f"[DEBUG] Making async Exa API call with params: {search_params}")
|
160
|
+
result = await asyncio.to_thread(_sync_exa_search)
|
366
161
|
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
162
|
+
# Format results
|
163
|
+
formatted_results = []
|
164
|
+
for res in result.results:
|
165
|
+
formatted_result = {
|
166
|
+
"title": res.title,
|
167
|
+
"url": res.url,
|
168
|
+
"published_date": res.published_date,
|
169
|
+
"author": res.author,
|
170
|
+
}
|
372
171
|
|
373
|
-
#
|
374
|
-
|
375
|
-
|
172
|
+
# Add content if requested
|
173
|
+
if include_text and hasattr(res, "text") and res.text:
|
174
|
+
formatted_result["text"] = res.text
|
376
175
|
|
377
|
-
|
176
|
+
# Add highlights if available
|
177
|
+
if hasattr(res, "highlights") and res.highlights:
|
178
|
+
formatted_result["highlights"] = res.highlights
|
378
179
|
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
citations_with_text.append(CitationWithText(text=""))
|
180
|
+
# Add summary if available
|
181
|
+
if hasattr(res, "summary") and res.summary:
|
182
|
+
formatted_result["summary"] = res.summary
|
383
183
|
|
384
|
-
|
184
|
+
formatted_results.append(formatted_result)
|
385
185
|
|
386
|
-
|
387
|
-
def _build_final_response_dict(self, processed_results: List[Dict], query: str, question: str) -> Dict[str, Any]:
|
388
|
-
"""Build the final response dictionary from all processed results."""
|
186
|
+
response = {"query": query, "results": formatted_results}
|
389
187
|
|
390
|
-
|
391
|
-
|
392
|
-
total_snippets = 0
|
188
|
+
logger.info(f"[DEBUG] Exa search completed successfully with {len(formatted_results)} results")
|
189
|
+
return json.dumps(response, indent=2, ensure_ascii=False)
|
393
190
|
|
394
|
-
|
395
|
-
|
191
|
+
except Exception as e:
|
192
|
+
logger.error(f"Exa search failed for query '{query}': {str(e)}")
|
193
|
+
return json.dumps({"query": query, "error": f"Search failed: {str(e)}"})
|
396
194
|
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
total_snippets += len(analysis["citations"])
|
401
|
-
else:
|
402
|
-
source["citations"] = []
|
195
|
+
async def fetch_webpage(self, agent_state: "AgentState", url: str) -> str:
|
196
|
+
"""
|
197
|
+
Fetch a webpage and convert it to markdown/text format using trafilatura with readability fallback.
|
403
198
|
|
404
|
-
|
199
|
+
Args:
|
200
|
+
url: The URL of the webpage to fetch and convert
|
405
201
|
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
"total_sources": len(sources),
|
411
|
-
"total_citations": total_snippets,
|
412
|
-
"sources": sources,
|
413
|
-
}
|
202
|
+
Returns:
|
203
|
+
String containing the webpage content in markdown/text format
|
204
|
+
"""
|
205
|
+
import asyncio
|
414
206
|
|
415
|
-
|
416
|
-
|
207
|
+
import html2text
|
208
|
+
import requests
|
209
|
+
from readability import Document
|
210
|
+
from trafilatura import extract, fetch_url
|
417
211
|
|
418
|
-
|
212
|
+
try:
|
213
|
+
# single thread pool call for the entire trafilatura pipeline
|
214
|
+
def trafilatura_pipeline():
|
215
|
+
downloaded = fetch_url(url) # fetch_url doesn't accept timeout parameter
|
216
|
+
if downloaded:
|
217
|
+
md = extract(downloaded, output_format="markdown")
|
218
|
+
return md
|
219
|
+
|
220
|
+
md = await asyncio.to_thread(trafilatura_pipeline)
|
221
|
+
if md:
|
222
|
+
return md
|
223
|
+
|
224
|
+
# single thread pool call for the entire fallback pipeline
|
225
|
+
def readability_pipeline():
|
226
|
+
response = requests.get(url, timeout=30, headers={"User-Agent": "Mozilla/5.0 (compatible; LettaBot/1.0)"})
|
227
|
+
response.raise_for_status()
|
228
|
+
|
229
|
+
doc = Document(response.text)
|
230
|
+
clean_html = doc.summary(html_partial=True)
|
231
|
+
return html2text.html2text(clean_html)
|
232
|
+
|
233
|
+
return await asyncio.to_thread(readability_pipeline)
|
234
|
+
|
235
|
+
except requests.exceptions.RequestException as e:
|
236
|
+
raise Exception(f"Error fetching webpage: {str(e)}")
|
237
|
+
except Exception as e:
|
238
|
+
raise Exception(f"Unexpected error: {str(e)}")
|