alita-sdk 0.3.229__py3-none-any.whl → 0.3.231__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/runtime/langchain/document_loaders/AlitaDocLoader.py +29 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +6 -0
- alita_sdk/runtime/langchain/document_loaders/utils.py +22 -1
- alita_sdk/runtime/tools/vectorstore.py +167 -46
- alita_sdk/tools/ado/__init__.py +2 -3
- alita_sdk/tools/ado/repos/__init__.py +12 -10
- alita_sdk/tools/ado/repos/repos_wrapper.py +1 -0
- alita_sdk/tools/ado/test_plan/__init__.py +17 -15
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +1 -0
- alita_sdk/tools/ado/wiki/__init__.py +18 -20
- alita_sdk/tools/ado/wiki/ado_wrapper.py +1 -0
- alita_sdk/tools/ado/work_item/__init__.py +18 -19
- alita_sdk/tools/ado/work_item/ado_wrapper.py +1 -0
- alita_sdk/tools/azure_ai/search/__init__.py +7 -4
- alita_sdk/tools/bitbucket/__init__.py +9 -4
- alita_sdk/tools/confluence/__init__.py +9 -5
- alita_sdk/tools/elitea_base.py +35 -38
- alita_sdk/tools/figma/__init__.py +8 -5
- alita_sdk/tools/figma/api_wrapper.py +37 -12
- alita_sdk/tools/github/__init__.py +9 -10
- alita_sdk/tools/gitlab/__init__.py +10 -5
- alita_sdk/tools/google/bigquery/__init__.py +7 -2
- alita_sdk/tools/jira/__init__.py +9 -5
- alita_sdk/tools/postman/__init__.py +7 -2
- alita_sdk/tools/qtest/__init__.py +7 -2
- alita_sdk/tools/servicenow/__init__.py +7 -3
- alita_sdk/tools/sharepoint/__init__.py +8 -5
- alita_sdk/tools/slack/__init__.py +7 -3
- alita_sdk/tools/testrail/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +16 -26
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -11
- {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/METADATA +2 -2
- {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/RECORD +36 -35
- {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,11 @@
|
|
1
1
|
from typing import List, Literal, Optional
|
2
2
|
from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azure DevOps
|
3
3
|
from langchain_core.tools import BaseTool, BaseToolkit
|
4
|
-
from pydantic import create_model, BaseModel, Field
|
4
|
+
from pydantic import create_model, BaseModel, Field
|
5
5
|
|
6
6
|
import requests
|
7
|
+
from ....configurations.ado import AdoConfiguration
|
8
|
+
from ....configurations.pgvector import PgVectorConfiguration
|
7
9
|
from ...base.tool import BaseAction
|
8
10
|
from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
|
9
11
|
|
@@ -25,22 +27,9 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
|
|
25
27
|
'toolkit_name': True,
|
26
28
|
'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
|
27
29
|
),
|
28
|
-
|
29
|
-
description="ADO organization url", json_schema_extra={
|
30
|
-
'configuration': True,
|
31
|
-
})),
|
32
|
-
project=(str, Field(description="ADO project",
|
33
|
-
json_schema_extra={
|
34
|
-
'configuration': True
|
35
|
-
})),
|
36
|
-
token=(SecretStr,
|
37
|
-
Field(description="ADO token",
|
38
|
-
json_schema_extra={'secret': True, 'configuration': True }
|
39
|
-
)),
|
30
|
+
ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
|
40
31
|
# indexer settings
|
41
|
-
|
42
|
-
default=None,
|
43
|
-
json_schema_extra={'secret': True})),
|
32
|
+
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
|
44
33
|
# embedder settings
|
45
34
|
embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.",
|
46
35
|
default="HuggingFaceEmbeddings")),
|
@@ -53,7 +42,7 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
|
|
53
42
|
'json_schema_extra': {
|
54
43
|
'metadata': {
|
55
44
|
"label": "ADO wiki",
|
56
|
-
"icon_url":
|
45
|
+
"icon_url": "ado-wiki-icon.svg",
|
57
46
|
"categories": ["documentation"],
|
58
47
|
"extra_categories": ["knowledge base", "documentation management", "wiki"],
|
59
48
|
"sections": {
|
@@ -77,9 +66,12 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
|
|
77
66
|
|
78
67
|
@check_connection_response
|
79
68
|
def check_connection(self):
|
69
|
+
ado_config = self.ado_wiki_configuration.ado_configuration if self.ado_wiki_configuration else None
|
70
|
+
if not ado_config:
|
71
|
+
raise ValueError("ADO wiki configuration is required")
|
80
72
|
response = requests.get(
|
81
|
-
f'{
|
82
|
-
headers={'Authorization': f'Bearer {
|
73
|
+
f'{ado_config.organization_url}/{ado_config.project}/_apis/wiki/wikis?api-version=7.0',
|
74
|
+
headers={'Authorization': f'Bearer {ado_config.token}'},
|
83
75
|
timeout=5
|
84
76
|
)
|
85
77
|
return response
|
@@ -94,7 +86,13 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
|
|
94
86
|
environ['AZURE_DEVOPS_CACHE_DIR'] = '/tmp/.azure-devops'
|
95
87
|
if selected_tools is None:
|
96
88
|
selected_tools = []
|
97
|
-
|
89
|
+
wrapper_payload = {
|
90
|
+
**kwargs,
|
91
|
+
# TODO use ado_configuration fields in AzureDevOpsApiWrapper
|
92
|
+
**kwargs['ado_configuration'],
|
93
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
94
|
+
}
|
95
|
+
azure_devops_api_wrapper = AzureDevOpsApiWrapper(**wrapper_payload)
|
98
96
|
available_tools = azure_devops_api_wrapper.get_available_tools()
|
99
97
|
tools = []
|
100
98
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
@@ -1,9 +1,11 @@
|
|
1
1
|
from typing import List, Optional, Literal
|
2
2
|
from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azure DevOps
|
3
3
|
from langchain_core.tools import BaseTool, BaseToolkit
|
4
|
-
from pydantic import create_model, BaseModel, Field
|
4
|
+
from pydantic import create_model, BaseModel, Field
|
5
5
|
|
6
6
|
import requests
|
7
|
+
from ....configurations.ado import AdoConfiguration
|
8
|
+
from ....configurations.pgvector import PgVectorConfiguration
|
7
9
|
from ...base.tool import BaseAction
|
8
10
|
from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
|
9
11
|
|
@@ -24,23 +26,11 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
24
26
|
'toolkit_name': True,
|
25
27
|
'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
|
26
28
|
),
|
27
|
-
|
28
|
-
description="ADO organization url",
|
29
|
-
json_schema_extra={
|
30
|
-
'configuration': True
|
31
|
-
})),
|
32
|
-
project=(str, Field(description="ADO project",
|
33
|
-
json_schema_extra={
|
34
|
-
'configuration': True
|
35
|
-
}
|
36
|
-
)),
|
37
|
-
token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
|
29
|
+
ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado_work_item']})),
|
38
30
|
limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
|
39
31
|
selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
|
40
32
|
# indexer settings
|
41
|
-
|
42
|
-
default=None,
|
43
|
-
json_schema_extra={'secret': True})),
|
33
|
+
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
|
44
34
|
# embedder settings
|
45
35
|
embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.",
|
46
36
|
default="HuggingFaceEmbeddings")),
|
@@ -51,7 +41,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
51
41
|
'json_schema_extra': {
|
52
42
|
'metadata': {
|
53
43
|
"label": "ADO boards",
|
54
|
-
"icon_url":
|
44
|
+
"icon_url": "ado-boards-icon.svg",
|
55
45
|
"categories": ["project management"],
|
56
46
|
"extra_categories": ["work item management", "issue tracking", "agile boards"],
|
57
47
|
"sections": {
|
@@ -75,9 +65,12 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
75
65
|
|
76
66
|
@check_connection_response
|
77
67
|
def check_connection(self):
|
68
|
+
ado_config = self.ado_work_item_configuration.ado_configuration if self.ado_work_item_configuration else None
|
69
|
+
if not ado_config:
|
70
|
+
raise ValueError("ADO work item configuration is required")
|
78
71
|
response = requests.get(
|
79
|
-
f'{
|
80
|
-
headers={'Authorization': f'Bearer {
|
72
|
+
f'{ado_config.organization_url}/{ado_config.project}/_apis/wit/workitemtypes?api-version=7.0',
|
73
|
+
headers={'Authorization': f'Bearer {ado_config.token}'},
|
81
74
|
timeout=5
|
82
75
|
)
|
83
76
|
return response
|
@@ -93,7 +86,13 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
|
|
93
86
|
if selected_tools is None:
|
94
87
|
selected_tools = []
|
95
88
|
|
96
|
-
|
89
|
+
wrapper_payload = {
|
90
|
+
**kwargs,
|
91
|
+
# TODO use ado_configuration fields in AzureDevOpsApiWrapper
|
92
|
+
**kwargs['ado_configuration'],
|
93
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
94
|
+
}
|
95
|
+
azure_devops_api_wrapper = AzureDevOpsApiWrapper(**wrapper_payload)
|
97
96
|
available_tools = azure_devops_api_wrapper.get_available_tools()
|
98
97
|
tools = []
|
99
98
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
@@ -15,10 +15,8 @@ name = "azure_search"
|
|
15
15
|
def get_tools(tool):
|
16
16
|
return AzureSearchToolkit().get_toolkit(
|
17
17
|
selected_tools=tool['settings'].get('selected_tools', []),
|
18
|
-
|
19
|
-
endpoint=tool['settings'].get('azure_search_configuration', {}).get('endpoint', None),
|
18
|
+
azure_search_configuration=tool['settings']['azure_search_configuration'],
|
20
19
|
index_name=tool['settings'].get('index_name', None),
|
21
|
-
api_base=tool['settings'].get('azure_search_configuration', {}).get('api_base', None),
|
22
20
|
api_version=tool['settings'].get('api_version', None),
|
23
21
|
openai_api_key=tool['settings'].get('access_token', None),
|
24
22
|
model_name=tool['settings'].get('model_name', None),
|
@@ -71,7 +69,12 @@ class AzureSearchToolkit(BaseToolkit):
|
|
71
69
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
72
70
|
if selected_tools is None:
|
73
71
|
selected_tools = []
|
74
|
-
|
72
|
+
wrapper_payload = {
|
73
|
+
**kwargs,
|
74
|
+
# TODO use azure_search_configuration fields
|
75
|
+
**kwargs['azure_search_configuration'],
|
76
|
+
}
|
77
|
+
azure_search_api_wrapper = AzureSearchApiWrapper(**wrapper_payload)
|
75
78
|
available_tools = azure_search_api_wrapper.get_available_tools()
|
76
79
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
77
80
|
tools = []
|
@@ -22,13 +22,12 @@ def get_tools(tool):
|
|
22
22
|
url=tool['settings']['url'],
|
23
23
|
project=tool['settings']['project'],
|
24
24
|
repository=tool['settings']['repository'],
|
25
|
-
|
26
|
-
password=tool['settings'].get('bitbucket_configuration', {}).get('password', ''),
|
25
|
+
bitbucket_configuration=tool['settings']['bitbucket_configuration'],
|
27
26
|
branch=tool['settings']['branch'],
|
28
27
|
cloud=tool['settings'].get('cloud'),
|
29
28
|
llm=tool['settings'].get('llm', None),
|
30
29
|
alita=tool['settings'].get('alita', None),
|
31
|
-
|
30
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
32
31
|
collection_name=str(tool['toolkit_name']),
|
33
32
|
doctype='code',
|
34
33
|
embedding_model="HuggingFaceEmbeddings",
|
@@ -91,7 +90,13 @@ class AlitaBitbucketToolkit(BaseToolkit):
|
|
91
90
|
selected_tools = []
|
92
91
|
if kwargs["cloud"] is None:
|
93
92
|
kwargs["cloud"] = True if "bitbucket.org" in kwargs.get('url') else False
|
94
|
-
|
93
|
+
wrapper_payload = {
|
94
|
+
**kwargs,
|
95
|
+
# TODO use bitbucket_configuration fields
|
96
|
+
**kwargs['bitbucket_configuration'],
|
97
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
98
|
+
}
|
99
|
+
bitbucket_api_wrapper = BitbucketAPIWrapper(**wrapper_payload)
|
95
100
|
available_tools: List[Dict] = __all__
|
96
101
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
97
102
|
tools = []
|
@@ -17,9 +17,7 @@ def get_tools(tool):
|
|
17
17
|
base_url=tool['settings']['base_url'],
|
18
18
|
space=tool['settings'].get('space', None),
|
19
19
|
cloud=tool['settings'].get('cloud', True),
|
20
|
-
|
21
|
-
username=tool['settings'].get('confluence_configuration', {}).get('username', None),
|
22
|
-
token=tool['settings'].get('confluence_configuration', {}).get('token', None),
|
20
|
+
confluence_configuration=tool['settings']['confluence_configuration'],
|
23
21
|
limit=tool['settings'].get('limit', 5),
|
24
22
|
labels=parse_list(tool['settings'].get('labels', None)),
|
25
23
|
additional_fields=tool['settings'].get('additional_fields', []),
|
@@ -28,7 +26,7 @@ def get_tools(tool):
|
|
28
26
|
llm=tool['settings'].get('llm', None),
|
29
27
|
toolkit_name=tool.get('toolkit_name'),
|
30
28
|
# indexer settings
|
31
|
-
|
29
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
32
30
|
collection_name=str(tool['toolkit_name']),
|
33
31
|
doctype='doc',
|
34
32
|
embedding_model="HuggingFaceEmbeddings",
|
@@ -120,7 +118,13 @@ class ConfluenceToolkit(BaseToolkit):
|
|
120
118
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
121
119
|
if selected_tools is None:
|
122
120
|
selected_tools = []
|
123
|
-
|
121
|
+
wrapper_payload = {
|
122
|
+
**kwargs,
|
123
|
+
# TODO use confluence_configuration fields
|
124
|
+
**kwargs['confluence_configuration'],
|
125
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
126
|
+
}
|
127
|
+
confluence_api_wrapper = ConfluenceAPIWrapper(**wrapper_payload)
|
124
128
|
prefix = clean_string(toolkit_name, ConfluenceToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
125
129
|
available_tools = confluence_api_wrapper.get_available_tools()
|
126
130
|
tools = []
|
alita_sdk/tools/elitea_base.py
CHANGED
@@ -30,13 +30,13 @@ LoaderSchema = create_model(
|
|
30
30
|
# Base Vector Store Schema Models
|
31
31
|
BaseIndexParams = create_model(
|
32
32
|
"BaseIndexParams",
|
33
|
-
collection_suffix=(
|
33
|
+
collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
|
34
34
|
vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
|
35
35
|
)
|
36
36
|
|
37
37
|
BaseCodeIndexParams = create_model(
|
38
38
|
"BaseCodeIndexParams",
|
39
|
-
collection_suffix=(
|
39
|
+
collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
|
40
40
|
vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
|
41
41
|
branch=(Optional[str], Field(description="Branch to index files from. Defaults to active branch if None.", default=None)),
|
42
42
|
whitelist=(Optional[List[str]], Field(description="File extensions or paths to include. Defaults to all files if None.", default=None)),
|
@@ -51,7 +51,9 @@ RemoveIndexParams = create_model(
|
|
51
51
|
BaseSearchParams = create_model(
|
52
52
|
"BaseSearchParams",
|
53
53
|
query=(str, Field(description="Query text to search in the index")),
|
54
|
-
collection_suffix=(Optional[str], Field(
|
54
|
+
collection_suffix=(Optional[str], Field(
|
55
|
+
description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
|
56
|
+
default="", max_length=7)),
|
55
57
|
vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
|
56
58
|
filter=(Optional[dict | str], Field(
|
57
59
|
description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
|
@@ -219,6 +221,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
219
221
|
embedding_model: Optional[str] = "HuggingFaceEmbeddings"
|
220
222
|
embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
|
221
223
|
vectorstore_type: Optional[str] = "PGVector"
|
224
|
+
_vector_store: Optional[Any] = None
|
222
225
|
|
223
226
|
def __init__(self, **kwargs):
|
224
227
|
super().__init__(**kwargs)
|
@@ -297,9 +300,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
297
300
|
collection_suffix = kwargs.get("collection_suffix")
|
298
301
|
progress_step = kwargs.get("progress_step")
|
299
302
|
clean_index = kwargs.get("clean_index")
|
300
|
-
vs = self._init_vector_store(
|
303
|
+
vs = self._init_vector_store(embeddings=embedding)
|
301
304
|
#
|
302
|
-
return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
|
305
|
+
return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
|
303
306
|
|
304
307
|
def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
|
305
308
|
"""
|
@@ -333,42 +336,31 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
333
336
|
|
334
337
|
|
335
338
|
# TODO: init store once and re-use the instance
|
336
|
-
def _init_vector_store(self,
|
339
|
+
def _init_vector_store(self, embeddings: Optional[Any] = None):
|
337
340
|
"""Initializes the vector store wrapper with the provided parameters."""
|
338
341
|
try:
|
339
342
|
from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
|
340
343
|
except ImportError:
|
341
344
|
from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
|
342
345
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
return VectorStoreWrapper(
|
357
|
-
llm=self.llm,
|
358
|
-
vectorstore_type=self.vectorstore_type,
|
359
|
-
embedding_model=self.embedding_model,
|
360
|
-
embedding_model_params=self.embedding_model_params,
|
361
|
-
vectorstore_params=vectorstore_params,
|
362
|
-
embeddings=embeddings,
|
363
|
-
process_document_func=self._process_documents,
|
364
|
-
)
|
346
|
+
if not self._vector_store:
|
347
|
+
connection_string = self.connection_string.get_secret_value() if self.connection_string else None
|
348
|
+
vectorstore_params = self._adapter.get_vectorstore_params(self.collection_name, connection_string)
|
349
|
+
self._vector_store = VectorStoreWrapper(
|
350
|
+
llm=self.llm,
|
351
|
+
vectorstore_type=self.vectorstore_type,
|
352
|
+
embedding_model=self.embedding_model,
|
353
|
+
embedding_model_params=self.embedding_model_params,
|
354
|
+
vectorstore_params=vectorstore_params,
|
355
|
+
embeddings=embeddings,
|
356
|
+
process_document_func=self._process_documents,
|
357
|
+
)
|
358
|
+
return self._vector_store
|
365
359
|
|
366
360
|
def remove_index(self, collection_suffix: str = ""):
|
367
361
|
"""Cleans the indexed data in the collection."""
|
368
|
-
|
369
|
-
|
370
|
-
self._adapter.remove_collection(vectorstore_wrapper, collection_name)
|
371
|
-
return (f"Collection '{collection_name}' has been removed from the vector store.\n"
|
362
|
+
self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
|
363
|
+
return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
|
372
364
|
f"Available collections: {self.list_collections()}")
|
373
365
|
|
374
366
|
def list_collections(self):
|
@@ -386,7 +378,14 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
|
|
386
378
|
extended_search: Optional[List[str]] = None,
|
387
379
|
**kwargs):
|
388
380
|
""" Searches indexed documents in the vector store."""
|
389
|
-
vectorstore = self._init_vector_store(
|
381
|
+
vectorstore = self._init_vector_store()
|
382
|
+
# build filter on top of collection_suffix
|
383
|
+
filter = filter if isinstance(filter, dict) else json.loads(filter)
|
384
|
+
if collection_suffix:
|
385
|
+
filter.update({"collection": {
|
386
|
+
"$eq": collection_suffix.strip()
|
387
|
+
}})
|
388
|
+
|
390
389
|
found_docs = vectorstore.search_documents(
|
391
390
|
query,
|
392
391
|
doctype=self.doctype,
|
@@ -579,22 +578,20 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
579
578
|
return parse_code_files_for_db(file_content_generator())
|
580
579
|
|
581
580
|
def index_data(self,
|
581
|
+
collection_suffix: str,
|
582
582
|
branch: Optional[str] = None,
|
583
583
|
whitelist: Optional[List[str]] = None,
|
584
584
|
blacklist: Optional[List[str]] = None,
|
585
|
-
collection_suffix: str = "",
|
586
585
|
**kwargs) -> str:
|
587
586
|
"""Index repository files in the vector store using code parsing."""
|
588
587
|
|
589
|
-
|
590
|
-
|
591
588
|
documents = self.loader(
|
592
589
|
branch=branch,
|
593
590
|
whitelist=whitelist,
|
594
591
|
blacklist=blacklist
|
595
592
|
)
|
596
|
-
vectorstore = self._init_vector_store(
|
597
|
-
return vectorstore.index_documents(documents, clean_index=False, is_code=True)
|
593
|
+
vectorstore = self._init_vector_store()
|
594
|
+
return vectorstore.index_documents(documents, collection_suffix=collection_suffix, clean_index=False, is_code=True)
|
598
595
|
|
599
596
|
def _get_vector_search_tools(self):
|
600
597
|
"""
|
@@ -6,6 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, create_model, SecretStr
|
|
6
6
|
from ..base.tool import BaseAction
|
7
7
|
from .api_wrapper import FigmaApiWrapper, GLOBAL_LIMIT
|
8
8
|
from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
|
9
|
+
from ...configurations.pgvector import PgVectorConfiguration
|
9
10
|
|
10
11
|
name = "figma"
|
11
12
|
|
@@ -21,7 +22,7 @@ def get_tools(tool):
|
|
21
22
|
toolkit_name=tool.get('toolkit_name'),
|
22
23
|
# indexer settings
|
23
24
|
llm=tool['settings'].get('llm', None),
|
24
|
-
|
25
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
25
26
|
collection_name=str(tool['toolkit_name']),
|
26
27
|
doctype='doc',
|
27
28
|
embedding_model="HuggingFaceEmbeddings",
|
@@ -54,9 +55,7 @@ class FigmaToolkit(BaseToolkit):
|
|
54
55
|
Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
|
55
56
|
),
|
56
57
|
# indexer settings
|
57
|
-
|
58
|
-
default=None,
|
59
|
-
json_schema_extra={'secret': True})),
|
58
|
+
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
|
60
59
|
|
61
60
|
# embedder settings
|
62
61
|
embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
|
@@ -93,7 +92,11 @@ class FigmaToolkit(BaseToolkit):
|
|
93
92
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
94
93
|
if selected_tools is None:
|
95
94
|
selected_tools = []
|
96
|
-
|
95
|
+
wrapper_payload = {
|
96
|
+
**kwargs,
|
97
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
98
|
+
}
|
99
|
+
figma_api_wrapper = FigmaApiWrapper(**wrapper_payload)
|
97
100
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
98
101
|
available_tools = figma_api_wrapper.get_available_tools()
|
99
102
|
tools = []
|
@@ -4,7 +4,7 @@ import json
|
|
4
4
|
import logging
|
5
5
|
import re
|
6
6
|
from enum import Enum
|
7
|
-
from typing import Dict, Generator, Optional, Union
|
7
|
+
from typing import Dict, List, Generator, Optional, Union
|
8
8
|
|
9
9
|
import requests
|
10
10
|
from FigmaPy import FigmaPy
|
@@ -13,6 +13,7 @@ from langchain_core.tools import ToolException
|
|
13
13
|
from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
|
14
14
|
|
15
15
|
from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
|
16
|
+
from ..utils.content_parser import load_content_from_bytes
|
16
17
|
|
17
18
|
GLOBAL_LIMIT = 10000
|
18
19
|
|
@@ -235,15 +236,27 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
235
236
|
global_regexp: Optional[str] = Field(default=None)
|
236
237
|
_client: Optional[FigmaPy] = PrivateAttr()
|
237
238
|
|
238
|
-
def _base_loader(self, project_id: str, **kwargs) -> Generator[Document, None, None]:
|
239
|
-
files =
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
239
|
+
def _base_loader(self, project_id: Optional[str] = None, file_keys: Optional[List[str]] = None, **kwargs) -> Generator[Document, None, None]:
|
240
|
+
files = []
|
241
|
+
if project_id:
|
242
|
+
files = json.loads(self.get_project_files(project_id)).get('files', [])
|
243
|
+
for file in files:
|
244
|
+
yield Document(page_content=json.dumps(file), metadata={
|
245
|
+
'id': file.get('key', ''),
|
246
|
+
'file_key': file.get('key', ''),
|
247
|
+
'name': file.get('name', ''),
|
248
|
+
'updated_on': file.get('last_modified', '')
|
249
|
+
})
|
250
|
+
elif file_keys:
|
251
|
+
for file_key in file_keys:
|
252
|
+
file = self._client.get_file(file_key)
|
253
|
+
metadata = {
|
254
|
+
'id': file_key,
|
255
|
+
'file_key': file_key,
|
256
|
+
'name': file.name,
|
257
|
+
'updated_on': file.last_modified
|
258
|
+
}
|
259
|
+
yield Document(page_content=json.dumps(metadata), metadata=metadata)
|
247
260
|
|
248
261
|
def _process_document(self, document: Document) -> Generator[Document, None, None]:
|
249
262
|
file_key = document.metadata.get('id', '')
|
@@ -257,12 +270,19 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
257
270
|
|
258
271
|
# iterate over images values
|
259
272
|
for node_id, image_url in images.items():
|
273
|
+
if not image_url:
|
274
|
+
logging.warning(f"Image URL not found for node_id {node_id} in file {file_key}. Skipping.")
|
275
|
+
continue
|
260
276
|
response = requests.get(image_url)
|
261
277
|
if response.status_code == 200:
|
262
278
|
content_type = response.headers.get('Content-Type', '')
|
263
279
|
if 'text/html' not in content_type.lower():
|
280
|
+
extension = f".{content_type.split('/')[-1]}" if content_type.startswith('image') else '.txt'
|
281
|
+
page_content = load_content_from_bytes(
|
282
|
+
file_content=response.content,
|
283
|
+
extension=extension, llm = self.llm)
|
264
284
|
yield Document(
|
265
|
-
page_content=
|
285
|
+
page_content=page_content,
|
266
286
|
metadata={
|
267
287
|
'file_key': file_key,
|
268
288
|
'node_id': node_id,
|
@@ -273,7 +293,12 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
|
|
273
293
|
def _index_tool_params(self):
|
274
294
|
"""Return the parameters for indexing data."""
|
275
295
|
return {
|
276
|
-
"project_id": (str, Field(
|
296
|
+
"project_id": (Optional[str], Field(
|
297
|
+
description="ID of the project to list files from: i.e. '55391681'",
|
298
|
+
default=None)),
|
299
|
+
'file_keys': (Optional[List[str]], Field(
|
300
|
+
description="List of file keys to index: i.e. ['Fp24FuzPwH0L74ODSrCnQo', 'jmhAr6q78dJoMRqt48zisY']",
|
301
|
+
default=None))
|
277
302
|
}
|
278
303
|
|
279
304
|
def _send_request(
|
@@ -19,14 +19,10 @@ def _get_toolkit(tool) -> BaseToolkit:
|
|
19
19
|
github_repository=tool['settings']['repository'],
|
20
20
|
active_branch=tool['settings']['active_branch'],
|
21
21
|
github_base_branch=tool['settings']['base_branch'],
|
22
|
-
|
23
|
-
github_username=tool['settings'].get('github_configuration', {}).get('username', ''),
|
24
|
-
github_password=tool['settings'].get('github_configuration', {}).get('password', ''),
|
25
|
-
github_app_id=tool['settings'].get('github_configuration', {}).get('app_id', None),
|
26
|
-
github_app_private_key=tool['settings'].get('github_configuration', {}).get('app_private_key', None),
|
22
|
+
github_configuration=tool['settings']['github_configuration'],
|
27
23
|
llm=tool['settings'].get('llm', None),
|
28
24
|
alita=tool['settings'].get('alita', None),
|
29
|
-
|
25
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
30
26
|
collection_name=str(tool['toolkit_name']),
|
31
27
|
doctype='code',
|
32
28
|
embedding_model="HuggingFaceEmbeddings",
|
@@ -71,9 +67,6 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
71
67
|
active_branch=(Optional[str], Field(description="Active branch", default="main")),
|
72
68
|
base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
|
73
69
|
# indexer settings
|
74
|
-
connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
|
75
|
-
default=None,
|
76
|
-
json_schema_extra={'secret': True})),
|
77
70
|
embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
|
78
71
|
embedding_model_params=(dict, Field(description="Embedding model parameters: i.e. `{'model_name': 'sentence-transformers/all-MiniLM-L6-v2'}", default={"model_name": "sentence-transformers/all-MiniLM-L6-v2"})),
|
79
72
|
selected_tools=(List[Literal[tuple(selected_tools)]],
|
@@ -84,7 +77,13 @@ class AlitaGitHubToolkit(BaseToolkit):
|
|
84
77
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
85
78
|
if selected_tools is None:
|
86
79
|
selected_tools = []
|
87
|
-
|
80
|
+
wrapper_payload = {
|
81
|
+
**kwargs,
|
82
|
+
# TODO use github_configuration fields
|
83
|
+
**kwargs['github_configuration'],
|
84
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
85
|
+
}
|
86
|
+
github_api_wrapper = AlitaGitHubAPIWrapper(**wrapper_payload)
|
88
87
|
available_tools: List[Dict] = github_api_wrapper.get_available_tools()
|
89
88
|
tools = []
|
90
89
|
prefix = clean_string(toolkit_name, AlitaGitHubToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
@@ -20,11 +20,11 @@ def get_tools(tool):
|
|
20
20
|
url=tool['settings']['url'],
|
21
21
|
repository=tool['settings']['repository'],
|
22
22
|
branch=tool['settings']['branch'],
|
23
|
-
|
23
|
+
gitlab_configuration=tool['settings']['gitlab_configuration'],
|
24
24
|
|
25
25
|
llm=tool['settings'].get('llm', None),
|
26
26
|
alita=tool['settings'].get('alita', None),
|
27
|
-
|
27
|
+
pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
|
28
28
|
collection_name=str(tool['toolkit_name']),
|
29
29
|
doctype='code',
|
30
30
|
embedding_model="HuggingFaceEmbeddings",
|
@@ -49,8 +49,7 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
49
49
|
gitlab_configuration=(Optional[GitlabConfiguration], Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
|
50
50
|
branch=(str, Field(description="Main branch", default="main")),
|
51
51
|
# indexer settings
|
52
|
-
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector
|
53
|
-
json_schema_extra={'configuration_types': ['pgvector']})),
|
52
|
+
pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
|
54
53
|
# embedder settings
|
55
54
|
embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
|
56
55
|
embedding_model_params=(dict, Field(description="Embedding model parameters: i.e. `{'model_name': 'sentence-transformers/all-MiniLM-L6-v2'}", default={"model_name": "sentence-transformers/all-MiniLM-L6-v2"})),
|
@@ -80,7 +79,13 @@ class AlitaGitlabToolkit(BaseToolkit):
|
|
80
79
|
def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
|
81
80
|
if selected_tools is None:
|
82
81
|
selected_tools = []
|
83
|
-
|
82
|
+
wrapper_payload = {
|
83
|
+
**kwargs,
|
84
|
+
# TODO use gitlab_configuration fields
|
85
|
+
**kwargs['gitlab_configuration'],
|
86
|
+
**(kwargs.get('pgvector_configuration') or {}),
|
87
|
+
}
|
88
|
+
gitlab_api_wrapper = GitLabAPIWrapper(**wrapper_payload)
|
84
89
|
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
85
90
|
available_tools: List[Dict] = gitlab_api_wrapper.get_available_tools()
|
86
91
|
tools = []
|