alita-sdk 0.3.203__py3-none-any.whl → 0.3.205__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,6 +3,8 @@ import fnmatch
3
3
  import logging
4
4
  import traceback
5
5
  from typing import Any, Optional, List, Dict
6
+
7
+ from langchain_core.documents import Document
6
8
  from langchain_core.tools import ToolException
7
9
  from pydantic import BaseModel, create_model, Field
8
10
  from .utils import TOOLKIT_SPLITTER
@@ -186,7 +188,41 @@ class BaseToolApiWrapper(BaseModel):
186
188
 
187
189
  class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
188
190
  """Base class for tool API wrappers that support vector store functionality."""
189
-
191
+
192
+ doctype: str = "document"
193
+
194
+ def _base_loader(self, **kwargs) -> List[Document]:
195
+ """ Loads documents from a source, processes them,
196
+ and returns a list of Document objects with base metadata: id and created_on."""
197
+ pass
198
+
199
+ def _process_document(self, base_document: Document) -> Document:
200
+ """ Process an existing base document to extract relevant metadata for full document preparation.
201
+ Used for late processing of documents after we ensure that the document has to be indexed to avoid
202
+ time-consuming operations for documents which might be useless.
203
+
204
+ Args:
205
+ document (Document): The base document to process.
206
+
207
+ Returns:
208
+ Document: The processed document with metadata."""
209
+ pass
210
+
211
+ def _process_documents(self, documents: List[Document]) -> List[Document]:
212
+ """
213
+ Process a list of base documents to extract relevant metadata for full document preparation.
214
+ Used for late processing of documents after we ensure that the documents have to be indexed to avoid
215
+ time-consuming operations for documents which might be useless.
216
+ This function passed to index_documents method of vector store and called after _reduce_duplicates method.
217
+
218
+ Args:
219
+ documents (List[Document]): The base documents to process.
220
+
221
+ Returns:
222
+ List[Document]: The processed documents with metadata.
223
+ """
224
+ return [self._process_document(doc) for doc in documents]
225
+
190
226
  def _init_vector_store(self, collection_suffix: str = "", embeddings: Optional[Any] = None):
191
227
  """ Initializes the vector store wrapper with the provided parameters."""
192
228
  try:
@@ -225,7 +261,8 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
225
261
  embedding_model=self.embedding_model,
226
262
  embedding_model_params=self.embedding_model_params,
227
263
  vectorstore_params=vectorstore_params,
228
- embeddings=embeddings
264
+ embeddings=embeddings,
265
+ process_document_func=self._process_documents,
229
266
  )
230
267
 
231
268
  def search_index(self,
@@ -345,7 +382,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
345
382
  Handles the retrieval of files from a specific path and branch.
346
383
  This method should be implemented in subclasses to provide the actual file retrieval logic.
347
384
  """
348
- _files = self._get_files(path, branch)
385
+ _files = self._get_files(path=path, branch=branch)
349
386
  if isinstance(_files, str):
350
387
  try:
351
388
  # Attempt to convert the string to a list using ast.literal_eval
@@ -447,4 +484,12 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
447
484
  }
448
485
 
449
486
  # Return index_data tool first, then the search tools
450
- return [index_tool] + base_tools
487
+ return [index_tool] + base_tools
488
+
489
+ def extend_with_vector_tools(method):
490
+ def wrapper(self, *args, **kwargs):
491
+ tools = method(self, *args, **kwargs)
492
+ tools.extend(self._get_vector_search_tools())
493
+ return tools
494
+
495
+ return wrapper
@@ -0,0 +1,7 @@
1
+ from .bigquery import BigQueryToolkit
2
+
3
+ name = "google"
4
+
5
+ def get_tools(tool_type, tool):
6
+ if tool_type == 'bigquery':
7
+ return BigQueryToolkit().get_toolkit().get_tools()
@@ -0,0 +1,154 @@
1
+ from functools import lru_cache
2
+ from typing import List, Optional, Type
3
+
4
+ from langchain_core.tools import BaseTool, BaseToolkit
5
+ from pydantic import BaseModel, Field, SecretStr, computed_field, field_validator
6
+
7
+ from ...utils import TOOLKIT_SPLITTER, clean_string, get_max_toolkit_length
8
+ from .api_wrapper import BigQueryApiWrapper
9
+ from .tool import BigQueryAction
10
+
11
+ name = "bigquery"
12
+
13
+
14
+ @lru_cache(maxsize=1)
15
+ def get_available_tools() -> dict[str, dict]:
16
+ api_wrapper = BigQueryApiWrapper.model_construct()
17
+ available_tools: dict = {
18
+ x["name"]: x["args_schema"].model_json_schema()
19
+ for x in api_wrapper.get_available_tools()
20
+ }
21
+ return available_tools
22
+
23
+
24
+ toolkit_max_length = lru_cache(maxsize=1)(
25
+ lambda: get_max_toolkit_length(get_available_tools())
26
+ )
27
+
28
+
29
+ class BigQueryToolkitConfig(BaseModel):
30
+ class Config:
31
+ title = name
32
+ json_schema_extra = {
33
+ "metadata": {
34
+ "hidden": True,
35
+ "label": "Cloud GCP",
36
+ "icon_url": "google.svg",
37
+ "sections": {
38
+ "auth": {
39
+ "required": False,
40
+ "subsections": [
41
+ {"name": "API Key", "fields": ["api_key"]},
42
+ ],
43
+ }
44
+ },
45
+ }
46
+ }
47
+
48
+ api_key: Optional[SecretStr] = Field(
49
+ default=None,
50
+ description="GCP API key",
51
+ json_schema_extra={"secret": True, "configuration": True},
52
+ )
53
+ project: Optional[str] = Field(
54
+ default=None,
55
+ description="BigQuery project ID",
56
+ json_schema_extra={"configuration": True},
57
+ )
58
+ location: Optional[str] = Field(
59
+ default=None,
60
+ description="BigQuery location",
61
+ json_schema_extra={"configuration": True},
62
+ )
63
+ dataset: Optional[str] = Field(
64
+ default=None,
65
+ description="BigQuery dataset name",
66
+ json_schema_extra={"configuration": True},
67
+ )
68
+ table: Optional[str] = Field(
69
+ default=None,
70
+ description="BigQuery table name",
71
+ json_schema_extra={"configuration": True},
72
+ )
73
+ selected_tools: List[str] = Field(
74
+ default=[],
75
+ description="Selected tools",
76
+ json_schema_extra={"args_schemas": get_available_tools()},
77
+ )
78
+
79
+ @field_validator("selected_tools", mode="before", check_fields=False)
80
+ @classmethod
81
+ def selected_tools_validator(cls, value: List[str]) -> list[str]:
82
+ return [i for i in value if i in get_available_tools()]
83
+
84
+
85
+ def _get_toolkit(tool) -> BaseToolkit:
86
+ return BigQueryToolkit().get_toolkit(
87
+ selected_tools=tool["settings"].get("selected_tools", []),
88
+ api_key=tool["settings"].get("api_key", ""),
89
+ toolkit_name=tool.get("toolkit_name"),
90
+ )
91
+
92
+
93
+ def get_toolkit():
94
+ return BigQueryToolkit.toolkit_config_schema()
95
+
96
+
97
+ def get_tools(tool):
98
+ return _get_toolkit(tool).get_tools()
99
+
100
+
101
+ class BigQueryToolkit(BaseToolkit):
102
+ tools: List[BaseTool] = []
103
+ api_wrapper: Optional[BigQueryApiWrapper] = Field(
104
+ default_factory=BigQueryApiWrapper.model_construct
105
+ )
106
+ toolkit_name: Optional[str] = None
107
+
108
+ @computed_field
109
+ @property
110
+ def tool_prefix(self) -> str:
111
+ return (
112
+ clean_string(self.toolkit_name, toolkit_max_length()) + TOOLKIT_SPLITTER
113
+ if self.toolkit_name
114
+ else ""
115
+ )
116
+
117
+ @computed_field
118
+ @property
119
+ def available_tools(self) -> List[dict]:
120
+ return self.api_wrapper.get_available_tools()
121
+
122
+ @staticmethod
123
+ def toolkit_config_schema() -> Type[BaseModel]:
124
+ return BigQueryToolkitConfig
125
+
126
+ @classmethod
127
+ def get_toolkit(
128
+ cls,
129
+ selected_tools: list[str] | None = None,
130
+ toolkit_name: Optional[str] = None,
131
+ **kwargs,
132
+ ) -> "BigQueryToolkit":
133
+ bigquery_api_wrapper = BigQueryApiWrapper(**kwargs)
134
+ instance = cls(
135
+ tools=[], api_wrapper=bigquery_api_wrapper, toolkit_name=toolkit_name
136
+ )
137
+ if selected_tools:
138
+ selected_tools = set(selected_tools)
139
+ for t in instance.available_tools:
140
+ if t["name"] in selected_tools:
141
+ instance.tools.append(
142
+ BigQueryAction(
143
+ api_wrapper=instance.api_wrapper,
144
+ name=instance.tool_prefix + t["name"],
145
+ # set unique description for declared tools to differentiate the same methods for different toolkits
146
+ description=f"Project: {getattr(instance.api_wrapper, 'project', '')}\n"
147
+ + t["description"],
148
+ args_schema=t["args_schema"],
149
+ )
150
+ )
151
+ return instance
152
+
153
+ def get_tools(self):
154
+ return self.tools