alita-sdk 0.3.314__py3-none-any.whl → 0.3.316__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,6 +12,8 @@ from ..utils import get_max_toolkit_length, clean_string, TOOLKIT_SPLITTER
12
12
  from ...configurations.browser import BrowserConfiguration
13
13
  from logging import getLogger
14
14
 
15
+ from ...configurations.pgvector import PgVectorConfiguration
16
+
15
17
  logger = getLogger(__name__)
16
18
 
17
19
  name = "browser"
@@ -21,6 +23,8 @@ def get_tools(tool):
21
23
  return BrowserToolkit().get_toolkit(
22
24
  selected_tools=tool['settings'].get('selected_tools', []),
23
25
  browser_configuration=tool['settings']['browser_configuration'],
26
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
27
+ embedding_model=tool['settings'].get('embedding_model'),
24
28
  toolkit_name=tool.get('toolkit_name', '')
25
29
  ).get_tools()
26
30
 
@@ -51,8 +55,17 @@ class BrowserToolkit(BaseToolkit):
51
55
 
52
56
  return create_model(
53
57
  name,
54
- __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Browser", "icon_url": None, "categories": ["testing"], "extra_categories": ["web scraping", "search", "crawler"]}}),
55
- browser_configuration=(BrowserConfiguration, Field(description="Browser Configuration", json_schema_extra={'configuration_types': ['browser']})),
58
+ __config__=ConfigDict(json_schema_extra={'metadata': {"label": "Browser", "icon_url": None,
59
+ "categories": ["testing"],
60
+ "extra_categories": [
61
+ "web scraping", "search", "crawler"
62
+ ]}}),
63
+ browser_configuration=(Optional[BrowserConfiguration],
64
+ Field(description="Browser Configuration (required for tools and `google`)",
65
+ default=None, json_schema_extra={'configuration_types': ['browser']})),
66
+ pgvector_configuration=(Optional[PgVectorConfiguration],
67
+ Field(description="PgVector configuration (required for tools `multi_url_crawler`)",
68
+ default=None, json_schema_extra={'configuration_types': ['pgvector']})),
56
69
  selected_tools=(List[Literal[tuple(selected_tools)]],
57
70
  Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
58
71
  __validators__={
@@ -65,9 +78,15 @@ class BrowserToolkit(BaseToolkit):
65
78
  if selected_tools is None:
66
79
  selected_tools = []
67
80
 
68
- wrapper_payload = {
81
+ wrapper_payload_google = {
69
82
  **kwargs,
70
83
  **kwargs.get('browser_configuration', {}),
84
+ **kwargs.get('pgvector_configuration', {}),
85
+ }
86
+
87
+ wrapper_payload_rag_based = {
88
+ **kwargs,
89
+ **kwargs.get('pgvector_configuration', {}),
71
90
  }
72
91
 
73
92
  tools = []
@@ -85,7 +104,7 @@ class BrowserToolkit(BaseToolkit):
85
104
  if tool == 'single_url_crawler':
86
105
  tool_entry = SingleURLCrawler()
87
106
  elif tool == 'multi_url_crawler':
88
- tool_entry = MultiURLCrawler()
107
+ tool_entry = MultiURLCrawler(**wrapper_payload_rag_based)
89
108
  elif tool == 'get_html_content':
90
109
  tool_entry = GetHTMLContent()
91
110
  elif tool == 'get_pdf_content':
@@ -93,7 +112,7 @@ class BrowserToolkit(BaseToolkit):
93
112
  elif tool == 'google':
94
113
  try:
95
114
  google_api_wrapper = GoogleSearchAPIWrapper(
96
- **wrapper_payload
115
+ **wrapper_payload_google
97
116
  )
98
117
  tool_entry = GoogleSearchResults(api_wrapper=google_api_wrapper)
99
118
  # rename the tool to avoid conflicts
@@ -27,13 +27,15 @@ class MultiURLCrawler(BaseTool):
27
27
  max_response_size: int = 3000
28
28
  name: str = "multi_url_crawler"
29
29
  description: str = "Crawls multiple URLs and returns the content related to query"
30
+ connection_string: str = None
30
31
  args_schema: Type[BaseModel] = create_model("MultiURLCrawlerModel",
31
32
  query=(str, Field(description="Query text to search pages")),
32
33
  urls=(list[str], Field(description="list of URLs to search like ['url1', 'url2']")))
33
34
 
34
35
  def _run(self, query: str, urls: list[str], run_manager=None):
35
36
  urls = [url.strip() for url in urls]
36
- return webRag(urls, self.max_response_size, query)
37
+ return webRag(urls=urls, max_response_size=self.max_response_size, query=query,
38
+ connection_string=self.connection_string)
37
39
 
38
40
 
39
41
  class GetHTMLContent(BaseTool):
@@ -6,9 +6,9 @@ from langchain.text_splitter import CharacterTextSplitter
6
6
  import fitz
7
7
 
8
8
  try:
9
- from langchain_chroma import Chroma
9
+ from langchain_postgres import PGVector
10
10
  except ImportError:
11
- Chroma = None
11
+ PGVector = None
12
12
 
13
13
  from langchain_community.embeddings.sentence_transformer import (
14
14
  SentenceTransformerEmbeddings,
@@ -32,13 +32,22 @@ def get_page(urls, html_only=False):
32
32
  return docs_transformed
33
33
 
34
34
 
35
- def webRag(urls, max_response_size, query):
36
- if Chroma is None:
37
- return "Chroma is not initialized. Web rag is not available."
35
+ def webRag(urls, max_response_size, query, connection_string=None):
36
+ if PGVector is None:
37
+ return "PGVector is not initialized. Web rag is not available."
38
+
39
+ if not connection_string:
40
+ return "Connection string or embedding model is missing. Web rag is not available."
38
41
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
39
42
  docs = text_splitter.split_documents(get_page(urls))
40
43
  embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
41
- db = Chroma.from_documents(docs, embedding_function)
44
+ db = PGVector.from_documents(
45
+ documents=docs,
46
+ embedding=embedding_function,
47
+ collection_name="web_rag",
48
+ pre_delete_collection=True,
49
+ connection=connection_string
50
+ )
42
51
  docs = db.search(query, "mmr", k=10)
43
52
  text = ""
44
53
  for doc in docs:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alita_sdk
3
- Version: 0.3.314
3
+ Version: 0.3.316
4
4
  Summary: SDK for building langchain agents using resources from Alita
5
5
  Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
6
6
  License-Expression: Apache-2.0
@@ -162,11 +162,11 @@ alita_sdk/tools/bitbucket/__init__.py,sha256=2VAY45Jij5dHkz6UGTmsEmOcLeJMWmcX-Wr
162
162
  alita_sdk/tools/bitbucket/api_wrapper.py,sha256=xHrluV2aCckOK_lGd42fFz1c-pyuZAnC-A_v1SKiM5g,20006
163
163
  alita_sdk/tools/bitbucket/bitbucket_constants.py,sha256=UsbhQ1iEvrKoxceTFPWTYhaXS1zSxbmjs1TwY0-P4gw,462
164
164
  alita_sdk/tools/bitbucket/cloud_api_wrapper.py,sha256=QHdud-d3xcz3mOP3xb1Htk1sv9QFg7bTm1szdN_zohQ,15517
165
- alita_sdk/tools/browser/__init__.py,sha256=ypYaShyDzjKDgErANsaMohRv0C_fQaVANX0As1L3amQ,5316
166
- alita_sdk/tools/browser/crawler.py,sha256=jhE35dU94eQLURSM-D50tspOqEMsiGzMDbYNqNSR2mU,2279
165
+ alita_sdk/tools/browser/__init__.py,sha256=9jPKsYlNACuH25JknczS6EQ_Jxkck42R2Bkl3huHxc0,6459
166
+ alita_sdk/tools/browser/crawler.py,sha256=tkB5UX9FmpJrhKPfaS-a2pL9loRf8zN-V5SfpQvX2NI,2406
167
167
  alita_sdk/tools/browser/duck_duck_go_search.py,sha256=iKws923v34o-ySXohJw-8xTDBWlj3fMsnzC_ZRuPugE,2002
168
168
  alita_sdk/tools/browser/google_search_rag.py,sha256=QVHFbVwymiJGuno_HLSJOK1c_MpgMdBSTYQKf6fLRk8,1838
169
- alita_sdk/tools/browser/utils.py,sha256=4k3YM_f1Kqlhjz9vt2pNsGkvCjhy-EmY3nvcwdFCsLA,2501
169
+ alita_sdk/tools/browser/utils.py,sha256=zFbpsTw593TRqxZ8bu5RQ7PHzZTfZjxvH5IGgRRjR2Q,2811
170
170
  alita_sdk/tools/browser/wiki.py,sha256=Qh3HBFd4dkS2VavXbFJOm4b8SjVSIe5xSD7CY1vEkKE,1126
171
171
  alita_sdk/tools/carrier/__init__.py,sha256=Ove5wAXBxyLS5F5ZxgydV2xKZJIR3OoMB5fMkn8jNUc,4296
172
172
  alita_sdk/tools/carrier/api_wrapper.py,sha256=tP7oR_U0HX1rxqat0Jkz6oh3RB9BEr1ESKQ9J8OWDcE,9093
@@ -349,8 +349,8 @@ alita_sdk/tools/zephyr_scale/api_wrapper.py,sha256=A6CUEKjENt3mZlPU9lai88WV9esCD
349
349
  alita_sdk/tools/zephyr_squad/__init__.py,sha256=0ne8XLJEQSLOWfzd2HdnqOYmQlUliKHbBED5kW_Vias,2895
350
350
  alita_sdk/tools/zephyr_squad/api_wrapper.py,sha256=kmw_xol8YIYFplBLWTqP_VKPRhL_1ItDD0_vXTe_UuI,14906
351
351
  alita_sdk/tools/zephyr_squad/zephyr_squad_cloud_client.py,sha256=R371waHsms4sllHCbijKYs90C-9Yu0sSR3N4SUfQOgU,5066
352
- alita_sdk-0.3.314.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
- alita_sdk-0.3.314.dist-info/METADATA,sha256=zmL9d8KWnZmuKvbhUXrbLn4HfljL2bvYyilxieU63Ys,18897
354
- alita_sdk-0.3.314.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
- alita_sdk-0.3.314.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
- alita_sdk-0.3.314.dist-info/RECORD,,
352
+ alita_sdk-0.3.316.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
353
+ alita_sdk-0.3.316.dist-info/METADATA,sha256=YGBCkcwWCg9D93hfXv1HxNW6BCtUUaxDhoJWeuk4fkI,18897
354
+ alita_sdk-0.3.316.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
355
+ alita_sdk-0.3.316.dist-info/top_level.txt,sha256=0vJYy5p_jK6AwVb1aqXr7Kgqgk3WDtQ6t5C-XI9zkmg,10
356
+ alita_sdk-0.3.316.dist-info/RECORD,,