alita-sdk 0.3.313__py3-none-any.whl → 0.3.315__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,8 +59,6 @@ def get_tools(tools_list: list, alita_client, llm, memory_store: BaseStore = Non
59
59
  llm=llm
60
60
  ))
61
61
  elif tool['type'] == 'memory':
62
- if memory_store is None:
63
- raise ToolException(f"Memory store is not provided for memory tool: {tool.get('name', tool.get('toolkit_name', 'unknown'))}")
64
62
  tools += MemoryToolkit.get_toolkit(
65
63
  namespace=tool['settings'].get('namespace', str(tool['id'])),
66
64
  pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
@@ -12,6 +12,8 @@ from ..utils import get_max_toolkit_length, clean_string, TOOLKIT_SPLITTER
12
12
  from ...configurations.browser import BrowserConfiguration
13
13
  from logging import getLogger
14
14
 
15
+ from ...configurations.pgvector import PgVectorConfiguration
16
+
15
17
  logger = getLogger(__name__)
16
18
 
17
19
  name = "browser"
@@ -21,6 +23,8 @@ def get_tools(tool):
21
23
  return BrowserToolkit().get_toolkit(
22
24
  selected_tools=tool['settings'].get('selected_tools', []),
23
25
  browser_configuration=tool['settings']['browser_configuration'],
26
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
27
+ embedding_model=tool['settings'].get('embedding_model'),
24
28
  toolkit_name=tool.get('toolkit_name', '')
25
29
  ).get_tools()
26
30
 
@@ -51,8 +55,21 @@ class BrowserToolkit(BaseToolkit):
51
55
 
52
56
  return create_model(
53
57
  name,
54
- __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Browser", "icon_url": None, "categories": ["testing"], "extra_categories": ["web scraping", "search", "crawler"]}}),
55
- browser_configuration=(BrowserConfiguration, Field(description="Browser Configuration", json_schema_extra={'configuration_types': ['browser']})),
58
+ __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Browser", "icon_url": None,
59
+ "categories": ["testing"],
60
+ "extra_categories": [
61
+ "web scraping", "search", "crawler"
62
+ ]}}),
63
+ browser_configuration=(Optional[BrowserConfiguration],
64
+ Field(description="Browser Configuration (required for tools and `google`)",
65
+ default=None, json_schema_extra={'configuration_types': ['browser']})),
66
+ pgvector_configuration=(Optional[PgVectorConfiguration],
67
+ Field(description="PgVector configuration (required for tools `multi_url_crawler`)",
68
+ default=None, json_schema_extra={'configuration_types': ['pgvector']})),
69
+ embedding_model=(Optional[str],
70
+ Field(default=None,
71
+ description="Embedding configuration (required for tools `multi_url_crawler`)",
72
+ json_schema_extra={'configuration_model': 'embedding'})),
56
73
  selected_tools=(List[Literal[tuple(selected_tools)]],
57
74
  Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
58
75
  __validators__={
@@ -65,9 +82,15 @@ class BrowserToolkit(BaseToolkit):
65
82
  if selected_tools is None:
66
83
  selected_tools = []
67
84
 
68
- wrapper_payload = {
85
+ wrapper_payload_google = {
69
86
  **kwargs,
70
87
  **kwargs.get('browser_configuration', {}),
88
+ **kwargs.get('pgvector_configuration', {}),
89
+ }
90
+
91
+ wrapper_payload_rag_based = {
92
+ **kwargs,
93
+ **kwargs.get('pgvector_configuration', {}),
71
94
  }
72
95
 
73
96
  tools = []
@@ -85,7 +108,7 @@ class BrowserToolkit(BaseToolkit):
85
108
  if tool == 'single_url_crawler':
86
109
  tool_entry = SingleURLCrawler()
87
110
  elif tool == 'multi_url_crawler':
88
- tool_entry = MultiURLCrawler()
111
+ tool_entry = MultiURLCrawler(**wrapper_payload_rag_based)
89
112
  elif tool == 'get_html_content':
90
113
  tool_entry = GetHTMLContent()
91
114
  elif tool == 'get_pdf_content':
@@ -93,7 +116,7 @@ class BrowserToolkit(BaseToolkit):
93
116
  elif tool == 'google':
94
117
  try:
95
118
  google_api_wrapper = GoogleSearchAPIWrapper(
96
- **wrapper_payload
119
+ **wrapper_payload_google
97
120
  )
98
121
  tool_entry = GoogleSearchResults(api_wrapper=google_api_wrapper)
99
122
  # rename the tool to avoid conflicts
@@ -27,13 +27,16 @@ class MultiURLCrawler(BaseTool):
27
27
  max_response_size: int = 3000
28
28
  name: str = "multi_url_crawler"
29
29
  description: str = "Crawls multiple URLs and returns the content related to query"
30
+ embedding_model: str = None
31
+ connection_string: str = None
30
32
  args_schema: Type[BaseModel] = create_model("MultiURLCrawlerModel",
31
33
  query=(str, Field(description="Query text to search pages")),
32
34
  urls=(list[str], Field(description="list of URLs to search like ['url1', 'url2']")))
33
35
 
34
36
  def _run(self, query: str, urls: list[str], run_manager=None):
35
37
  urls = [url.strip() for url in urls]
36
- return webRag(urls, self.max_response_size, query)
38
+ return webRag(urls=urls, max_response_size=self.max_response_size, query=query,
39
+ connection_string=self.connection_string, embedding_model=self.embedding_model)
37
40
 
38
41
 
39
42
  class GetHTMLContent(BaseTool):
@@ -6,9 +6,9 @@ from langchain.text_splitter import CharacterTextSplitter
6
6
  import fitz
7
7
 
8
8
  try:
9
- from langchain_chroma import Chroma
9
+ from langchain_postgres import PGVector
10
10
  except ImportError:
11
- Chroma = None
11
+ PGVector = None
12
12
 
13
13
  from langchain_community.embeddings.sentence_transformer import (
14
14
  SentenceTransformerEmbeddings,
@@ -32,13 +32,22 @@ def get_page(urls, html_only=False):
32
32
  return docs_transformed
33
33
 
34
34
 
35
- def webRag(urls, max_response_size, query):
36
- if Chroma is None:
37
- return "Chroma is not initialized. Web rag is not available."
35
+ def webRag(urls, max_response_size, query, connection_string=None, embedding_model=None):
36
+ if PGVector is None:
37
+ return "PGVector is not initialized. Web rag is not available."
38
+
39
+ if not connection_string or not embedding_model:
40
+ return "Connection string or embedding model is missing. Web rag is not available."
38
41
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
39
42
  docs = text_splitter.split_documents(get_page(urls))
40
- embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
41
- db = Chroma.from_documents(docs, embedding_function)
43
+ embedding_function = SentenceTransformerEmbeddings(model_name=embedding_model)
44
+ db = PGVector.from_documents(
45
+ documents=docs,
46
+ embedding=embedding_function,
47
+ collection_name="web_rag",
48
+ pre_delete_collection=True,
49
+ connection=connection_string
50
+ )
42
51
  docs = db.search(query, "mmr", k=10)
43
52
  text = ""
44
53
  for doc in docs:
@@ -1,6 +1,6 @@
1
- from typing import Optional, List, Literal
1
+ from typing import List, Literal
2
2
 
3
- from langchain_core.tools import BaseToolkit, BaseTool
3
+ from langchain_core.tools import BaseToolkit, BaseTool, ToolException
4
4
 
5
5
  from alita_sdk.configurations.pgvector import PgVectorConfiguration
6
6
 
@@ -101,6 +101,8 @@ class MemoryToolkit(BaseToolkit):
101
101
  # The store is not provided, attempt to create it from configuration
102
102
  from ...runtime.langchain.store_manager import get_manager
103
103
  conn_str = (kwargs.get('pgvector_configuration') or {}).get('connection_string', '')
104
+ if not conn_str:
105
+ raise ToolException("Connection string is required to create PostgresStore for memory toolkit.")
104
106
  store = get_manager().get_store(conn_str)
105
107
 
106
108
  # Validate store type
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.313
3
+ Version: 0.3.315
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -101,7 +101,7 @@ alita_sdk/runtime/toolkits/configurations.py,sha256=kIDAlnryPQfbZyFxV-9SzN2-Vefz
101
101
  alita_sdk/runtime/toolkits/datasource.py,sha256=qk78OdPoReYPCWwahfkKLbKc4pfsu-061oXRryFLP6I,2498
102
102
  alita_sdk/runtime/toolkits/prompt.py,sha256=WIpTkkVYWqIqOWR_LlSWz3ug8uO9tm5jJ7aZYdiGRn0,1192
103
103
  alita_sdk/runtime/toolkits/subgraph.py,sha256=wwUK8JjPXkGzyVZ3tAukmvST6eGbqx_U11rpnmbrvtg,2105
104
- alita_sdk/runtime/toolkits/tools.py,sha256=oW8-qLPndPTjjTrJRzldLWH4E5259_HdD0LScaGVDik,7943
104
+ alita_sdk/runtime/toolkits/tools.py,sha256=Ea3LO6voPNysdzVB7jYMZIqSMtda7LCq_6fkVfb8C54,7764
105
105
  alita_sdk/runtime/toolkits/vectorstore.py,sha256=BGppQADa1ZiLO17fC0uCACTTEvPHlodEDYEzUcBRbAA,2901
106
106
  alita_sdk/runtime/tools/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
107
107
  alita_sdk/runtime/tools/agent.py,sha256=m98QxOHwnCRTT9j18Olbb5UPS8-ZGeQaGiUyZJSyFck,3162
@@ -162,11 +162,11 @@ alita_sdk/tools/bitbucket/__init__.py,sha256=2VAY45Jij5dHkz6UGTmsEmOcLeJMWmcX-Wr
162
162
  alita_sdk/tools/bitbucket/api_wrapper.py,sha256=xHrluV2aCckOK_lGd42fFz1c-pyuZAnC-A_v1SKiM5g,20006
163
163
  alita_sdk/tools/bitbucket/bitbucket_constants.py,sha256=UsbhQ1iEvrKoxceTFPWTYhaXS1zSxbmjs1TwY0-P4gw,462
164
164
  alita_sdk/tools/bitbucket/cloud_api_wrapper.py,sha256=QHdud-d3xcz3mOP3xb1Htk1sv9QFg7bTm1szdN_zohQ,15517
165
- alita_sdk/tools/browser/__init__.py,sha256=ypYaShyDzjKDgErANsaMohRv0C_fQaVANX0As1L3amQ,5316
166
- alita_sdk/tools/browser/crawler.py,sha256=jhE35dU94eQLURSM-D50tspOqEMsiGzMDbYNqNSR2mU,2279
165
+ alita_sdk/tools/browser/__init__.py,sha256=faLmuMt2CzCPMmxquGcdV-TGAbKxHi7sTQyuK0VKYNs,6760
166
+ alita_sdk/tools/browser/crawler.py,sha256=Ah0tyF7lKGJIlxMY4MXEQmuDehaB_I-FmECxG27DnPw,2476
167
167
  alita_sdk/tools/browser/duck_duck_go_search.py,sha256=iKws923v34o-ySXohJw-8xTDBWlj3fMsnzC_ZRuPugE,2002
168
168
  alita_sdk/tools/browser/google_search_rag.py,sha256=QVHFbVwymiJGuno_HLSJOK1c_MpgMdBSTYQKf6fLRk8,1838
169
- alita_sdk/tools/browser/utils.py,sha256=4k3YM_f1Kqlhjz9vt2pNsGkvCjhy-EmY3nvcwdFCsLA,2501
169
+ alita_sdk/tools/browser/utils.py,sha256=J4-ZSb5TeCJnYJTsPKUOyiOC_vfTye3QtZA-T_AYEoA,2853
170
170
  alita_sdk/tools/browser/wiki.py,sha256=Qh3HBFd4dkS2VavXbFJOm4b8SjVSIe5xSD7CY1vEkKE,1126
171
171
  alita_sdk/tools/carrier/__init__.py,sha256=Ove5wAXBxyLS5F5ZxgydV2xKZJIR3OoMB5fMkn8jNUc,4296
172
172
  alita_sdk/tools/carrier/api_wrapper.py,sha256=tP7oR_U0HX1rxqat0Jkz6oh3RB9BEr1ESKQ9J8OWDcE,9093
@@ -269,7 +269,7 @@ alita_sdk/tools/llm/llm_utils.py,sha256=6P2j-42JGbyqpO8lNRuEP8GEhja-LC9E-98jTelK
269
269
  alita_sdk/tools/localgit/__init__.py,sha256=NScO0Eu-wl-rc63jjD5Qv1RXXB1qukSIJXx-yS_JQLI,2529
270
270
  alita_sdk/tools/localgit/local_git.py,sha256=gsAftNcK7nMCd8VsIkwDLs2SoG0MgpYdkQG5tmoynkA,18074
271
271
  alita_sdk/tools/localgit/tool.py,sha256=It_B24rMvFPurB355Oy5IShg2BsZTASsEoSS8hu2SXw,998
272
- alita_sdk/tools/memory/__init__.py,sha256=D6xRY2EndTgr8mWvtQ4nmNa6kFDIgoTTJZ-PsVQMAmU,4642
272
+ alita_sdk/tools/memory/__init__.py,sha256=aOF0-PAAqBZS3rI2IOezyLhZpn-WpV--ABy4J_eIAKo,4789
273
273
  alita_sdk/tools/ocr/__init__.py,sha256=pvslKVXyJmK0q23FFDNieuc7RBIuzNXTjTNj-GqhGb0,3335
274
274
  alita_sdk/tools/ocr/api_wrapper.py,sha256=08UF8wj1sR8DcW0z16pw19bgLatLkBF8dySW-Ds8iRk,29649
275
275
  alita_sdk/tools/ocr/text_detection.py,sha256=1DBxt54r3_HdEi93QynSIVta3rH3UpIvy799TPtDTtk,23825
@@ -349,8 +349,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=A6CUEKjENt3mZlPU9lai88WV9esCD
349
349
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
350
350
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
351
351
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
352
- alita_sdk-0.3.313.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
- alita_sdk-0.3.313.dist-info/METADATA,sha256=bqPVkuBoXljOCcRuVk2v9Wo7TQchfLd5lKw-1dk3qkw,18897
354
- alita_sdk-0.3.313.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
- alita_sdk-0.3.313.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
- alita_sdk-0.3.313.dist-info/RECORD,,
352
+ alita_sdk-0.3.315.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
+ alita_sdk-0.3.315.dist-info/METADATA,sha256=iwfMJ3l6wJ6OAHVEJWZ1xIUsQgfKlcgUzxdqUvS4POA,18897
354
+ alita_sdk-0.3.315.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
+ alita_sdk-0.3.315.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
+ alita_sdk-0.3.315.dist-info/RECORD,,