alita-sdk 0.3.175__py3-none-any.whl → 0.3.177__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. alita_sdk/community/__init__.py +7 -17
  2. alita_sdk/tools/carrier/api_wrapper.py +6 -0
  3. alita_sdk/tools/carrier/backend_tests_tool.py +308 -7
  4. alita_sdk/tools/carrier/carrier_sdk.py +18 -0
  5. alita_sdk/tools/carrier/create_ui_test_tool.py +90 -109
  6. alita_sdk/tools/carrier/run_ui_test_tool.py +311 -184
  7. alita_sdk/tools/carrier/tools.py +2 -1
  8. alita_sdk/tools/confluence/api_wrapper.py +1 -0
  9. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/METADATA +2 -2
  10. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/RECORD +13 -44
  11. alita_sdk/community/browseruse/__init__.py +0 -73
  12. alita_sdk/community/browseruse/api_wrapper.py +0 -288
  13. alita_sdk/community/deep_researcher/__init__.py +0 -70
  14. alita_sdk/community/deep_researcher/agents/__init__.py +0 -1
  15. alita_sdk/community/deep_researcher/agents/baseclass.py +0 -182
  16. alita_sdk/community/deep_researcher/agents/knowledge_gap_agent.py +0 -74
  17. alita_sdk/community/deep_researcher/agents/long_writer_agent.py +0 -251
  18. alita_sdk/community/deep_researcher/agents/planner_agent.py +0 -124
  19. alita_sdk/community/deep_researcher/agents/proofreader_agent.py +0 -80
  20. alita_sdk/community/deep_researcher/agents/thinking_agent.py +0 -64
  21. alita_sdk/community/deep_researcher/agents/tool_agents/__init__.py +0 -20
  22. alita_sdk/community/deep_researcher/agents/tool_agents/crawl_agent.py +0 -87
  23. alita_sdk/community/deep_researcher/agents/tool_agents/search_agent.py +0 -96
  24. alita_sdk/community/deep_researcher/agents/tool_selector_agent.py +0 -83
  25. alita_sdk/community/deep_researcher/agents/utils/__init__.py +0 -0
  26. alita_sdk/community/deep_researcher/agents/utils/parse_output.py +0 -148
  27. alita_sdk/community/deep_researcher/agents/writer_agent.py +0 -63
  28. alita_sdk/community/deep_researcher/api_wrapper.py +0 -116
  29. alita_sdk/community/deep_researcher/deep_research.py +0 -185
  30. alita_sdk/community/deep_researcher/examples/deep_example.py +0 -30
  31. alita_sdk/community/deep_researcher/examples/iterative_example.py +0 -34
  32. alita_sdk/community/deep_researcher/examples/report_plan_example.py +0 -27
  33. alita_sdk/community/deep_researcher/iterative_research.py +0 -419
  34. alita_sdk/community/deep_researcher/llm_config.py +0 -87
  35. alita_sdk/community/deep_researcher/main.py +0 -67
  36. alita_sdk/community/deep_researcher/tools/__init__.py +0 -2
  37. alita_sdk/community/deep_researcher/tools/crawl_website.py +0 -109
  38. alita_sdk/community/deep_researcher/tools/web_search.py +0 -294
  39. alita_sdk/community/deep_researcher/utils/__init__.py +0 -0
  40. alita_sdk/community/deep_researcher/utils/md_to_pdf.py +0 -8
  41. alita_sdk/community/deep_researcher/utils/os.py +0 -21
  42. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/WHEEL +0 -0
  43. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/licenses/LICENSE +0 -0
  44. {alita_sdk-0.3.175.dist-info → alita_sdk-0.3.177.dist-info}/top_level.txt +0 -0
@@ -1,294 +0,0 @@
1
- import json
2
- import os
3
- import ssl
4
- import aiohttp
5
- import asyncio
6
- from agents import function_tool
7
- from ..agents.baseclass import ResearchAgent, ResearchRunner
8
- from ..agents.utils.parse_output import create_type_parser
9
- from typing import List, Union, Optional
10
- from bs4 import BeautifulSoup
11
- from dotenv import load_dotenv
12
- from pydantic import BaseModel, Field
13
- from ..llm_config import LLMConfig, model_supports_structured_output
14
-
15
- load_dotenv()
16
- CONTENT_LENGTH_LIMIT = 10000 # Trim scraped content to this length to avoid large context / token limit issues
17
-
18
- # ------- DEFINE TYPES -------
19
-
20
- class ScrapeResult(BaseModel):
21
- url: str = Field(description="The URL of the webpage")
22
- text: str = Field(description="The full text content of the webpage")
23
- title: str = Field(description="The title of the webpage")
24
- description: str = Field(description="A short description of the webpage")
25
-
26
-
27
- class WebpageSnippet(BaseModel):
28
- url: str = Field(description="The URL of the webpage")
29
- title: str = Field(description="The title of the webpage")
30
- description: Optional[str] = Field(description="A short description of the webpage")
31
-
32
- class SearchResults(BaseModel):
33
- results_list: List[WebpageSnippet]
34
-
35
- # ------- DEFINE TOOL -------
36
-
37
- def create_web_search_tool(config: LLMConfig) -> function_tool:
38
- filter_agent = init_filter_agent(config)
39
- serper_client = SerperClient(filter_agent)
40
-
41
- @function_tool
42
- async def web_search(query: str) -> Union[List[ScrapeResult], str]:
43
- """Perform a web search for a given query and get back the URLs along with their titles, descriptions and text contents.
44
-
45
- Args:
46
- query: The search query
47
-
48
- Returns:
49
- List of ScrapeResult objects which have the following fields:
50
- - url: The URL of the search result
51
- - title: The title of the search result
52
- - description: The description of the search result
53
- - text: The full text content of the search result
54
- """
55
- try:
56
- search_results = await serper_client.search(query, filter_for_relevance=True, max_results=5)
57
- results = await scrape_urls(search_results)
58
- return results
59
- except Exception as e:
60
- # Return a user-friendly error message
61
- return f"Sorry, I encountered an error while searching: {str(e)}"
62
-
63
- return web_search
64
-
65
- # ------- DEFINE AGENT FOR FILTERING SEARCH RESULTS BY RELEVANCE -------
66
-
67
- FILTER_AGENT_INSTRUCTIONS = f"""
68
- You are a search result filter. Your task is to analyze a list of SERP search results and determine which ones are relevant
69
- to the original query based on the link, title and snippet. Return only the relevant results in the specified format.
70
-
71
- - Remove any results that refer to entities that have similar names to the queried entity, but are not the same.
72
- - E.g. if the query asks about a company "Amce Inc, acme.com", remove results with "acmesolutions.com" or "acme.net" in the link.
73
-
74
- Only output JSON. Follow the JSON schema below. Do not output anything else. I will be parsing this with Pydantic so output valid JSON only:
75
- {SearchResults.model_json_schema()}
76
- """
77
-
78
- def init_filter_agent(config: LLMConfig) -> ResearchAgent:
79
- selected_model = config.reasoning_model
80
-
81
- return ResearchAgent(
82
- name="SearchFilterAgent",
83
- instructions=FILTER_AGENT_INSTRUCTIONS,
84
- model=selected_model,
85
- output_type=SearchResults if model_supports_structured_output(selected_model) else None,
86
- output_parser=create_type_parser(SearchResults) if not model_supports_structured_output(selected_model) else None
87
- )
88
-
89
- # ------- DEFINE UNDERLYING TOOL LOGIC -------
90
-
91
- # Create a shared connector
92
- ssl_context = ssl.create_default_context()
93
- ssl_context.check_hostname = False
94
- ssl_context.verify_mode = ssl.CERT_NONE
95
- ssl_context.set_ciphers('DEFAULT:@SECLEVEL=1') # Add this line to allow older cipher suites
96
-
97
-
98
- class SerperClient:
99
- """A client for the Serper API to perform Google searches."""
100
-
101
- def __init__(self, filter_agent: ResearchAgent, api_key: str = None):
102
- self.filter_agent = filter_agent
103
- self.api_key = api_key or os.getenv("SERPER_API_KEY")
104
- if not self.api_key:
105
- raise ValueError("No API key provided. Set SERPER_API_KEY environment variable.")
106
-
107
- self.url = "https://google.serper.dev/search"
108
- self.headers = {
109
- "X-API-KEY": self.api_key,
110
- "Content-Type": "application/json"
111
- }
112
-
113
- async def search(self, query: str, filter_for_relevance: bool = True, max_results: int = 5) -> List[WebpageSnippet]:
114
- """Perform a Google search using Serper API and fetch basic details for top results.
115
-
116
- Args:
117
- query: The search query
118
- num_results: Maximum number of results to return (max 10)
119
-
120
- Returns:
121
- Dictionary with search results
122
- """
123
- connector = aiohttp.TCPConnector(ssl=ssl_context)
124
- async with aiohttp.ClientSession(connector=connector) as session:
125
- async with session.post(
126
- self.url,
127
- headers=self.headers,
128
- json={"q": query, "autocorrect": False}
129
- ) as response:
130
- response.raise_for_status()
131
- results = await response.json()
132
- results_list = [
133
- WebpageSnippet(
134
- url=result.get('link', ''),
135
- title=result.get('title', ''),
136
- description=result.get('snippet', '')
137
- )
138
- for result in results.get('organic', [])
139
- ]
140
-
141
- if not results_list:
142
- return []
143
-
144
- if not filter_for_relevance:
145
- return results_list[:max_results]
146
-
147
- return await self._filter_results(results_list, query, max_results=max_results)
148
-
149
- async def _filter_results(self, results: List[WebpageSnippet], query: str, max_results: int = 5) -> List[WebpageSnippet]:
150
- serialized_results = [result.model_dump() if isinstance(result, WebpageSnippet) else result for result in results]
151
-
152
- user_prompt = f"""
153
- Original search query: {query}
154
-
155
- Search results to analyze:
156
- {json.dumps(serialized_results, indent=2)}
157
-
158
- Return {max_results} search results or less.
159
- """
160
-
161
- try:
162
- result = await ResearchRunner.run(self.filter_agent, user_prompt)
163
- output = result.final_output_as(SearchResults)
164
- return output.results_list
165
- except Exception as e:
166
- print("Error filtering results:", str(e))
167
- return results[:max_results]
168
-
169
-
170
- async def scrape_urls(items: List[WebpageSnippet]) -> List[ScrapeResult]:
171
- """Fetch text content from provided URLs.
172
-
173
- Args:
174
- items: List of SearchEngineResult items to extract content from
175
-
176
- Returns:
177
- List of ScrapeResult objects which have the following fields:
178
- - url: The URL of the search result
179
- - title: The title of the search result
180
- - description: The description of the search result
181
- - text: The full text content of the search result
182
- """
183
- connector = aiohttp.TCPConnector(ssl=ssl_context)
184
- async with aiohttp.ClientSession(connector=connector) as session:
185
- # Create list of tasks for concurrent execution
186
- tasks = []
187
- for item in items:
188
- if item.url: # Skip empty URLs
189
- tasks.append(fetch_and_process_url(session, item))
190
-
191
- # Execute all tasks concurrently and gather results
192
- results = await asyncio.gather(*tasks, return_exceptions=True)
193
-
194
- # Filter out errors and return successful results
195
- return [r for r in results if isinstance(r, ScrapeResult)]
196
-
197
-
198
- async def fetch_and_process_url(session: aiohttp.ClientSession, item: WebpageSnippet) -> ScrapeResult:
199
- """Helper function to fetch and process a single URL."""
200
-
201
- if not is_valid_url(item.url):
202
- return ScrapeResult(
203
- url=item.url,
204
- title=item.title,
205
- description=item.description,
206
- text=f"Error fetching content: URL contains restricted file extension"
207
- )
208
-
209
- try:
210
- async with session.get(item.url, timeout=8) as response:
211
- if response.status == 200:
212
- content = await response.text()
213
- # Run html_to_text in a thread pool to avoid blocking
214
- text_content = await asyncio.get_event_loop().run_in_executor(
215
- None, html_to_text, content
216
- )
217
- text_content = text_content[:CONTENT_LENGTH_LIMIT] # Trim content to avoid exceeding token limit
218
- return ScrapeResult(
219
- url=item.url,
220
- title=item.title,
221
- description=item.description,
222
- text=text_content
223
- )
224
- else:
225
- # Instead of raising, return a WebSearchResult with an error message
226
- return ScrapeResult(
227
- url=item.url,
228
- title=item.title,
229
- description=item.description,
230
- text=f"Error fetching content: HTTP {response.status}"
231
- )
232
- except Exception as e:
233
- # Instead of raising, return a WebSearchResult with an error message
234
- return ScrapeResult(
235
- url=item.url,
236
- title=item.title,
237
- description=item.description,
238
- text=f"Error fetching content: {str(e)}"
239
- )
240
-
241
-
242
- def html_to_text(html_content: str) -> str:
243
- """
244
- Strips out all of the unnecessary elements from the HTML context to prepare it for text extraction / LLM processing.
245
- """
246
- # Parse the HTML using lxml for speed
247
- soup = BeautifulSoup(html_content, 'lxml')
248
-
249
- # Extract text from relevant tags
250
- tags_to_extract = ('h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'blockquote')
251
-
252
- # Use a generator expression for efficiency
253
- extracted_text = "\n".join(element.get_text(strip=True) for element in soup.find_all(tags_to_extract) if element.get_text(strip=True))
254
-
255
- return extracted_text
256
-
257
-
258
- def is_valid_url(url: str) -> bool:
259
- """Check that a URL does not contain restricted file extensions."""
260
- if any(ext in url for ext in [
261
- ".pdf",
262
- ".doc",
263
- ".xls",
264
- ".ppt",
265
- ".zip",
266
- ".rar",
267
- ".7z",
268
- ".txt",
269
- ".js",
270
- ".xml",
271
- ".css",
272
- ".png",
273
- ".jpg",
274
- ".jpeg",
275
- ".gif",
276
- ".ico",
277
- ".svg",
278
- ".webp",
279
- ".mp3",
280
- ".mp4",
281
- ".avi",
282
- ".mov",
283
- ".wmv",
284
- ".flv",
285
- ".wma",
286
- ".wav",
287
- ".m4a",
288
- ".m4v",
289
- ".m4b",
290
- ".m4p",
291
- ".m4u"
292
- ]):
293
- return False
294
- return True
File without changes
@@ -1,8 +0,0 @@
1
- import os
2
- from md2pdf import md2pdf
3
-
4
- curdir = os.path.dirname(os.path.abspath(__file__))
5
- css_path = os.path.join(curdir, "markdown.css")
6
-
7
- def md_to_pdf(md_text: str, pdf_file_path: str):
8
- md2pdf(pdf_file_path, md_text, css_file_path=css_path)
@@ -1,21 +0,0 @@
1
- import os
2
- from typing import Optional
3
-
4
- def get_env_with_prefix(base_name: str, prefix: str = "DR_", default: Optional[str] = None) -> Optional[str]:
5
- """
6
- Retrieves an environment variable, checking for a prefixed version first.
7
-
8
- Args:
9
- base_name: The base name of the environment variable (e.g., "OPENAI_API_KEY").
10
- prefix: The prefix to check for (e.g., "DR_"). Defaults to "DR_".
11
- default: The default value to return if neither the prefixed nor the
12
- base variable is found.
13
-
14
- Returns:
15
- The value of the environment variable, or the default value, or None.
16
- """
17
- prefixed_name = f"{prefix}{base_name}"
18
- value = os.getenv(prefixed_name)
19
- if value is not None:
20
- return value
21
- return os.getenv(base_name, default)