alita-sdk 0.3.229__py3-none-any.whl → 0.3.231__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. alita_sdk/runtime/langchain/document_loaders/AlitaDocLoader.py +29 -0
  2. alita_sdk/runtime/langchain/document_loaders/constants.py +6 -0
  3. alita_sdk/runtime/langchain/document_loaders/utils.py +22 -1
  4. alita_sdk/runtime/tools/vectorstore.py +167 -46
  5. alita_sdk/tools/ado/__init__.py +2 -3
  6. alita_sdk/tools/ado/repos/__init__.py +12 -10
  7. alita_sdk/tools/ado/repos/repos_wrapper.py +1 -0
  8. alita_sdk/tools/ado/test_plan/__init__.py +17 -15
  9. alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +1 -0
  10. alita_sdk/tools/ado/wiki/__init__.py +18 -20
  11. alita_sdk/tools/ado/wiki/ado_wrapper.py +1 -0
  12. alita_sdk/tools/ado/work_item/__init__.py +18 -19
  13. alita_sdk/tools/ado/work_item/ado_wrapper.py +1 -0
  14. alita_sdk/tools/azure_ai/search/__init__.py +7 -4
  15. alita_sdk/tools/bitbucket/__init__.py +9 -4
  16. alita_sdk/tools/confluence/__init__.py +9 -5
  17. alita_sdk/tools/elitea_base.py +35 -38
  18. alita_sdk/tools/figma/__init__.py +8 -5
  19. alita_sdk/tools/figma/api_wrapper.py +37 -12
  20. alita_sdk/tools/github/__init__.py +9 -10
  21. alita_sdk/tools/gitlab/__init__.py +10 -5
  22. alita_sdk/tools/google/bigquery/__init__.py +7 -2
  23. alita_sdk/tools/jira/__init__.py +9 -5
  24. alita_sdk/tools/postman/__init__.py +7 -2
  25. alita_sdk/tools/qtest/__init__.py +7 -2
  26. alita_sdk/tools/servicenow/__init__.py +7 -3
  27. alita_sdk/tools/sharepoint/__init__.py +8 -5
  28. alita_sdk/tools/slack/__init__.py +7 -3
  29. alita_sdk/tools/testrail/__init__.py +9 -4
  30. alita_sdk/tools/utils/content_parser.py +16 -26
  31. alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +18 -11
  32. {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/METADATA +2 -2
  33. {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/RECORD +36 -35
  34. {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/WHEEL +0 -0
  35. {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/licenses/LICENSE +0 -0
  36. {alita_sdk-0.3.229.dist-info → alita_sdk-0.3.231.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,11 @@
1
1
  from typing import List, Literal, Optional
2
2
  from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azure DevOps
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
- from pydantic import create_model, BaseModel, Field, SecretStr
4
+ from pydantic import create_model, BaseModel, Field
5
5
 
6
6
  import requests
7
+ from ....configurations.ado import AdoConfiguration
8
+ from ....configurations.pgvector import PgVectorConfiguration
7
9
  from ...base.tool import BaseAction
8
10
  from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
9
11
 
@@ -25,22 +27,9 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
25
27
  'toolkit_name': True,
26
28
  'max_toolkit_length': AzureDevOpsWikiToolkit.toolkit_max_length})
27
29
  ),
28
- organization_url=(str, Field(title="Organization URL",
29
- description="ADO organization url", json_schema_extra={
30
- 'configuration': True,
31
- })),
32
- project=(str, Field(description="ADO project",
33
- json_schema_extra={
34
- 'configuration': True
35
- })),
36
- token=(SecretStr,
37
- Field(description="ADO token",
38
- json_schema_extra={'secret': True, 'configuration': True }
39
- )),
30
+ ado_configuration=(AdoConfiguration, Field(description="Ado configuration", json_schema_extra={'configuration_types': ['ado']})),
40
31
  # indexer settings
41
- connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
42
- default=None,
43
- json_schema_extra={'secret': True})),
32
+ pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
44
33
  # embedder settings
45
34
  embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.",
46
35
  default="HuggingFaceEmbeddings")),
@@ -53,7 +42,7 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
53
42
  'json_schema_extra': {
54
43
  'metadata': {
55
44
  "label": "ADO wiki",
56
- "icon_url": None,
45
+ "icon_url": "ado-wiki-icon.svg",
57
46
  "categories": ["documentation"],
58
47
  "extra_categories": ["knowledge base", "documentation management", "wiki"],
59
48
  "sections": {
@@ -77,9 +66,12 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
77
66
 
78
67
  @check_connection_response
79
68
  def check_connection(self):
69
+ ado_config = self.ado_wiki_configuration.ado_configuration if self.ado_wiki_configuration else None
70
+ if not ado_config:
71
+ raise ValueError("ADO wiki configuration is required")
80
72
  response = requests.get(
81
- f'{self.organization_url}/{self.project}/_apis/wiki/wikis?api-version=7.0',
82
- headers={'Authorization': f'Bearer {self.token}'},
73
+ f'{ado_config.organization_url}/{ado_config.project}/_apis/wiki/wikis?api-version=7.0',
74
+ headers={'Authorization': f'Bearer {ado_config.token}'},
83
75
  timeout=5
84
76
  )
85
77
  return response
@@ -94,7 +86,13 @@ class AzureDevOpsWikiToolkit(BaseToolkit):
94
86
  environ['AZURE_DEVOPS_CACHE_DIR'] = '/tmp/.azure-devops'
95
87
  if selected_tools is None:
96
88
  selected_tools = []
97
- azure_devops_api_wrapper = AzureDevOpsApiWrapper(**kwargs)
89
+ wrapper_payload = {
90
+ **kwargs,
91
+ # TODO use ado_configuration fields in AzureDevOpsApiWrapper
92
+ **kwargs['ado_configuration'],
93
+ **(kwargs.get('pgvector_configuration') or {}),
94
+ }
95
+ azure_devops_api_wrapper = AzureDevOpsApiWrapper(**wrapper_payload)
98
96
  available_tools = azure_devops_api_wrapper.get_available_tools()
99
97
  tools = []
100
98
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
@@ -61,6 +61,7 @@ RenamePageInput = create_model(
61
61
 
62
62
 
63
63
  class AzureDevOpsApiWrapper(BaseVectorStoreToolApiWrapper):
64
+ # TODO use ado_configuration instead of organization_url, project and token
64
65
  organization_url: str
65
66
  project: str
66
67
  token: SecretStr
@@ -1,9 +1,11 @@
1
1
  from typing import List, Optional, Literal
2
2
  from .ado_wrapper import AzureDevOpsApiWrapper # Import the API wrapper for Azure DevOps
3
3
  from langchain_core.tools import BaseTool, BaseToolkit
4
- from pydantic import create_model, BaseModel, Field, SecretStr
4
+ from pydantic import create_model, BaseModel, Field
5
5
 
6
6
  import requests
7
+ from ....configurations.ado import AdoConfiguration
8
+ from ....configurations.pgvector import PgVectorConfiguration
7
9
  from ...base.tool import BaseAction
8
10
  from ...utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length, check_connection_response
9
11
 
@@ -24,23 +26,11 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
24
26
  'toolkit_name': True,
25
27
  'max_toolkit_length': AzureDevOpsWorkItemsToolkit.toolkit_max_length})
26
28
  ),
27
- organization_url=(str, Field(title="Organization URL",
28
- description="ADO organization url",
29
- json_schema_extra={
30
- 'configuration': True
31
- })),
32
- project=(str, Field(description="ADO project",
33
- json_schema_extra={
34
- 'configuration': True
35
- }
36
- )),
37
- token=(SecretStr, Field(description="ADO token", json_schema_extra={'secret': True, 'configuration': True})),
29
+ ado_configuration=(AdoConfiguration, Field(description="Ado Work Item configuration", json_schema_extra={'configuration_types': ['ado_work_item']})),
38
30
  limit=(Optional[int], Field(description="ADO plans limit used for limitation of the list with results", default=5)),
39
31
  selected_tools=(List[Literal[tuple(selected_tools)]], Field(default=[], json_schema_extra={'args_schemas': selected_tools})),
40
32
  # indexer settings
41
- connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
42
- default=None,
43
- json_schema_extra={'secret': True})),
33
+ pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
44
34
  # embedder settings
45
35
  embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.",
46
36
  default="HuggingFaceEmbeddings")),
@@ -51,7 +41,7 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
51
41
  'json_schema_extra': {
52
42
  'metadata': {
53
43
  "label": "ADO boards",
54
- "icon_url": None,
44
+ "icon_url": "ado-boards-icon.svg",
55
45
  "categories": ["project management"],
56
46
  "extra_categories": ["work item management", "issue tracking", "agile boards"],
57
47
  "sections": {
@@ -75,9 +65,12 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
75
65
 
76
66
  @check_connection_response
77
67
  def check_connection(self):
68
+ ado_config = self.ado_work_item_configuration.ado_configuration if self.ado_work_item_configuration else None
69
+ if not ado_config:
70
+ raise ValueError("ADO work item configuration is required")
78
71
  response = requests.get(
79
- f'{self.organization_url}/{self.project}/_apis/wit/workitemtypes?api-version=7.0',
80
- headers={'Authorization': f'Bearer {self.token}'},
72
+ f'{ado_config.organization_url}/{ado_config.project}/_apis/wit/workitemtypes?api-version=7.0',
73
+ headers={'Authorization': f'Bearer {ado_config.token}'},
81
74
  timeout=5
82
75
  )
83
76
  return response
@@ -93,7 +86,13 @@ class AzureDevOpsWorkItemsToolkit(BaseToolkit):
93
86
  if selected_tools is None:
94
87
  selected_tools = []
95
88
 
96
- azure_devops_api_wrapper = AzureDevOpsApiWrapper(**kwargs)
89
+ wrapper_payload = {
90
+ **kwargs,
91
+ # TODO use ado_configuration fields in AzureDevOpsApiWrapper
92
+ **kwargs['ado_configuration'],
93
+ **(kwargs.get('pgvector_configuration') or {}),
94
+ }
95
+ azure_devops_api_wrapper = AzureDevOpsApiWrapper(**wrapper_payload)
97
96
  available_tools = azure_devops_api_wrapper.get_available_tools()
98
97
  tools = []
99
98
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
@@ -95,6 +95,7 @@ ADOUnlinkWorkItemsFromWikiPage = create_model(
95
95
  )
96
96
 
97
97
  class AzureDevOpsApiWrapper(BaseVectorStoreToolApiWrapper):
98
+ # TODO use ado_configuration instead of organization_url, project and token
98
99
  organization_url: str
99
100
  project: str
100
101
  token: SecretStr
@@ -15,10 +15,8 @@ name = "azure_search"
15
15
  def get_tools(tool):
16
16
  return AzureSearchToolkit().get_toolkit(
17
17
  selected_tools=tool['settings'].get('selected_tools', []),
18
- api_key=tool['settings'].get('azure_search_configuration', {}).get('api_key', None),
19
- endpoint=tool['settings'].get('azure_search_configuration', {}).get('endpoint', None),
18
+ azure_search_configuration=tool['settings']['azure_search_configuration'],
20
19
  index_name=tool['settings'].get('index_name', None),
21
- api_base=tool['settings'].get('azure_search_configuration', {}).get('api_base', None),
22
20
  api_version=tool['settings'].get('api_version', None),
23
21
  openai_api_key=tool['settings'].get('access_token', None),
24
22
  model_name=tool['settings'].get('model_name', None),
@@ -71,7 +69,12 @@ class AzureSearchToolkit(BaseToolkit):
71
69
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
72
70
  if selected_tools is None:
73
71
  selected_tools = []
74
- azure_search_api_wrapper = AzureSearchApiWrapper(**kwargs)
72
+ wrapper_payload = {
73
+ **kwargs,
74
+ # TODO use azure_search_configuration fields
75
+ **kwargs['azure_search_configuration'],
76
+ }
77
+ azure_search_api_wrapper = AzureSearchApiWrapper(**wrapper_payload)
75
78
  available_tools = azure_search_api_wrapper.get_available_tools()
76
79
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
77
80
  tools = []
@@ -22,13 +22,12 @@ def get_tools(tool):
22
22
  url=tool['settings']['url'],
23
23
  project=tool['settings']['project'],
24
24
  repository=tool['settings']['repository'],
25
- username=tool['settings'].get('bitbucket_configuration', {}).get('username', ''),
26
- password=tool['settings'].get('bitbucket_configuration', {}).get('password', ''),
25
+ bitbucket_configuration=tool['settings']['bitbucket_configuration'],
27
26
  branch=tool['settings']['branch'],
28
27
  cloud=tool['settings'].get('cloud'),
29
28
  llm=tool['settings'].get('llm', None),
30
29
  alita=tool['settings'].get('alita', None),
31
- connection_string=tool['settings'].get('pgvector_configuration', {}).get('connection_string', None),
30
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
32
31
  collection_name=str(tool['toolkit_name']),
33
32
  doctype='code',
34
33
  embedding_model="HuggingFaceEmbeddings",
@@ -91,7 +90,13 @@ class AlitaBitbucketToolkit(BaseToolkit):
91
90
  selected_tools = []
92
91
  if kwargs["cloud"] is None:
93
92
  kwargs["cloud"] = True if "bitbucket.org" in kwargs.get('url') else False
94
- bitbucket_api_wrapper = BitbucketAPIWrapper(**kwargs)
93
+ wrapper_payload = {
94
+ **kwargs,
95
+ # TODO use bitbucket_configuration fields
96
+ **kwargs['bitbucket_configuration'],
97
+ **(kwargs.get('pgvector_configuration') or {}),
98
+ }
99
+ bitbucket_api_wrapper = BitbucketAPIWrapper(**wrapper_payload)
95
100
  available_tools: List[Dict] = __all__
96
101
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
97
102
  tools = []
@@ -17,9 +17,7 @@ def get_tools(tool):
17
17
  base_url=tool['settings']['base_url'],
18
18
  space=tool['settings'].get('space', None),
19
19
  cloud=tool['settings'].get('cloud', True),
20
- api_key=tool['settings'].get('confluence_configuration', {}).get('api_key', None),
21
- username=tool['settings'].get('confluence_configuration', {}).get('username', None),
22
- token=tool['settings'].get('confluence_configuration', {}).get('token', None),
20
+ confluence_configuration=tool['settings']['confluence_configuration'],
23
21
  limit=tool['settings'].get('limit', 5),
24
22
  labels=parse_list(tool['settings'].get('labels', None)),
25
23
  additional_fields=tool['settings'].get('additional_fields', []),
@@ -28,7 +26,7 @@ def get_tools(tool):
28
26
  llm=tool['settings'].get('llm', None),
29
27
  toolkit_name=tool.get('toolkit_name'),
30
28
  # indexer settings
31
- connection_string = tool['settings'].get('pgvector_configuration', {}).get('connection_string', None),
29
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
32
30
  collection_name=str(tool['toolkit_name']),
33
31
  doctype='doc',
34
32
  embedding_model="HuggingFaceEmbeddings",
@@ -120,7 +118,13 @@ class ConfluenceToolkit(BaseToolkit):
120
118
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
121
119
  if selected_tools is None:
122
120
  selected_tools = []
123
- confluence_api_wrapper = ConfluenceAPIWrapper(**kwargs)
121
+ wrapper_payload = {
122
+ **kwargs,
123
+ # TODO use confluence_configuration fields
124
+ **kwargs['confluence_configuration'],
125
+ **(kwargs.get('pgvector_configuration') or {}),
126
+ }
127
+ confluence_api_wrapper = ConfluenceAPIWrapper(**wrapper_payload)
124
128
  prefix = clean_string(toolkit_name, ConfluenceToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
125
129
  available_tools = confluence_api_wrapper.get_available_tools()
126
130
  tools = []
@@ -30,13 +30,13 @@ LoaderSchema = create_model(
30
30
  # Base Vector Store Schema Models
31
31
  BaseIndexParams = create_model(
32
32
  "BaseIndexParams",
33
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
33
+ collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
34
34
  vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
35
35
  )
36
36
 
37
37
  BaseCodeIndexParams = create_model(
38
38
  "BaseCodeIndexParams",
39
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
39
+ collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
40
40
  vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
41
41
  branch=(Optional[str], Field(description="Branch to index files from. Defaults to active branch if None.", default=None)),
42
42
  whitelist=(Optional[List[str]], Field(description="File extensions or paths to include. Defaults to all files if None.", default=None)),
@@ -51,7 +51,9 @@ RemoveIndexParams = create_model(
51
51
  BaseSearchParams = create_model(
52
52
  "BaseSearchParams",
53
53
  query=(str, Field(description="Query text to search in the index")),
54
- collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
54
+ collection_suffix=(Optional[str], Field(
55
+ description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
56
+ default="", max_length=7)),
55
57
  vectorstore_type=(Optional[str], Field(description="Vectorstore type (Chroma, PGVector, Elastic, etc.)", default="PGVector")),
56
58
  filter=(Optional[dict | str], Field(
57
59
  description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -219,6 +221,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
219
221
  embedding_model: Optional[str] = "HuggingFaceEmbeddings"
220
222
  embedding_model_params: Optional[Dict[str, Any]] = {"model_name": "sentence-transformers/all-MiniLM-L6-v2"}
221
223
  vectorstore_type: Optional[str] = "PGVector"
224
+ _vector_store: Optional[Any] = None
222
225
 
223
226
  def __init__(self, **kwargs):
224
227
  super().__init__(**kwargs)
@@ -297,9 +300,9 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
297
300
  collection_suffix = kwargs.get("collection_suffix")
298
301
  progress_step = kwargs.get("progress_step")
299
302
  clean_index = kwargs.get("clean_index")
300
- vs = self._init_vector_store(collection_suffix, embeddings=embedding)
303
+ vs = self._init_vector_store(embeddings=embedding)
301
304
  #
302
- return vs.index_documents(docs, progress_step=progress_step, clean_index=clean_index)
305
+ return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
303
306
 
304
307
  def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
305
308
  """
@@ -333,42 +336,31 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
333
336
 
334
337
 
335
338
  # TODO: init store once and re-use the instance
336
- def _init_vector_store(self, collection_suffix: str = "", embeddings: Optional[Any] = None):
339
+ def _init_vector_store(self, embeddings: Optional[Any] = None):
337
340
  """Initializes the vector store wrapper with the provided parameters."""
338
341
  try:
339
342
  from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
340
343
  except ImportError:
341
344
  from alita_sdk.runtime.tools.vectorstore import VectorStoreWrapper
342
345
 
343
- # Validate collection_suffix length
344
- if collection_suffix and len(collection_suffix.strip()) > 7:
345
- raise ToolException("collection_suffix must be 7 characters or less")
346
-
347
- # Create collection name with suffix if provided
348
- collection_name = str(self.collection_name)
349
- if collection_suffix and collection_suffix.strip():
350
- collection_name = f"{self.collection_name}_{collection_suffix.strip()}"
351
-
352
- # Get database-specific parameters using adapter
353
- connection_string = self.connection_string.get_secret_value() if self.connection_string else None
354
- vectorstore_params = self._adapter.get_vectorstore_params(collection_name, connection_string)
355
-
356
- return VectorStoreWrapper(
357
- llm=self.llm,
358
- vectorstore_type=self.vectorstore_type,
359
- embedding_model=self.embedding_model,
360
- embedding_model_params=self.embedding_model_params,
361
- vectorstore_params=vectorstore_params,
362
- embeddings=embeddings,
363
- process_document_func=self._process_documents,
364
- )
346
+ if not self._vector_store:
347
+ connection_string = self.connection_string.get_secret_value() if self.connection_string else None
348
+ vectorstore_params = self._adapter.get_vectorstore_params(self.collection_name, connection_string)
349
+ self._vector_store = VectorStoreWrapper(
350
+ llm=self.llm,
351
+ vectorstore_type=self.vectorstore_type,
352
+ embedding_model=self.embedding_model,
353
+ embedding_model_params=self.embedding_model_params,
354
+ vectorstore_params=vectorstore_params,
355
+ embeddings=embeddings,
356
+ process_document_func=self._process_documents,
357
+ )
358
+ return self._vector_store
365
359
 
366
360
  def remove_index(self, collection_suffix: str = ""):
367
361
  """Cleans the indexed data in the collection."""
368
- vectorstore_wrapper = self._init_vector_store(collection_suffix)
369
- collection_name = f"{self.collection_name}_{collection_suffix}" if collection_suffix else str(self.collection_name)
370
- self._adapter.remove_collection(vectorstore_wrapper, collection_name)
371
- return (f"Collection '{collection_name}' has been removed from the vector store.\n"
362
+ self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
363
+ return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
372
364
  f"Available collections: {self.list_collections()}")
373
365
 
374
366
  def list_collections(self):
@@ -386,7 +378,14 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
386
378
  extended_search: Optional[List[str]] = None,
387
379
  **kwargs):
388
380
  """ Searches indexed documents in the vector store."""
389
- vectorstore = self._init_vector_store(collection_suffix)
381
+ vectorstore = self._init_vector_store()
382
+ # build filter on top of collection_suffix
383
+ filter = filter if isinstance(filter, dict) else json.loads(filter)
384
+ if collection_suffix:
385
+ filter.update({"collection": {
386
+ "$eq": collection_suffix.strip()
387
+ }})
388
+
390
389
  found_docs = vectorstore.search_documents(
391
390
  query,
392
391
  doctype=self.doctype,
@@ -579,22 +578,20 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
579
578
  return parse_code_files_for_db(file_content_generator())
580
579
 
581
580
  def index_data(self,
581
+ collection_suffix: str,
582
582
  branch: Optional[str] = None,
583
583
  whitelist: Optional[List[str]] = None,
584
584
  blacklist: Optional[List[str]] = None,
585
- collection_suffix: str = "",
586
585
  **kwargs) -> str:
587
586
  """Index repository files in the vector store using code parsing."""
588
587
 
589
-
590
-
591
588
  documents = self.loader(
592
589
  branch=branch,
593
590
  whitelist=whitelist,
594
591
  blacklist=blacklist
595
592
  )
596
- vectorstore = self._init_vector_store(collection_suffix)
597
- return vectorstore.index_documents(documents, clean_index=False, is_code=True)
593
+ vectorstore = self._init_vector_store()
594
+ return vectorstore.index_documents(documents, collection_suffix=collection_suffix, clean_index=False, is_code=True)
598
595
 
599
596
  def _get_vector_search_tools(self):
600
597
  """
@@ -6,6 +6,7 @@ from pydantic import BaseModel, ConfigDict, Field, create_model, SecretStr
6
6
  from ..base.tool import BaseAction
7
7
  from .api_wrapper import FigmaApiWrapper, GLOBAL_LIMIT
8
8
  from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
9
+ from ...configurations.pgvector import PgVectorConfiguration
9
10
 
10
11
  name = "figma"
11
12
 
@@ -21,7 +22,7 @@ def get_tools(tool):
21
22
  toolkit_name=tool.get('toolkit_name'),
22
23
  # indexer settings
23
24
  llm=tool['settings'].get('llm', None),
24
- connection_string = tool['settings'].get('connection_string', None),
25
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
25
26
  collection_name=str(tool['toolkit_name']),
26
27
  doctype='doc',
27
28
  embedding_model="HuggingFaceEmbeddings",
@@ -54,9 +55,7 @@ class FigmaToolkit(BaseToolkit):
54
55
  Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
55
56
  ),
56
57
  # indexer settings
57
- connection_string = (Optional[SecretStr], Field(description="Connection string for vectorstore",
58
- default=None,
59
- json_schema_extra={'secret': True})),
58
+ pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
60
59
 
61
60
  # embedder settings
62
61
  embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
@@ -93,7 +92,11 @@ class FigmaToolkit(BaseToolkit):
93
92
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
94
93
  if selected_tools is None:
95
94
  selected_tools = []
96
- figma_api_wrapper = FigmaApiWrapper(**kwargs)
95
+ wrapper_payload = {
96
+ **kwargs,
97
+ **(kwargs.get('pgvector_configuration') or {}),
98
+ }
99
+ figma_api_wrapper = FigmaApiWrapper(**wrapper_payload)
97
100
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
98
101
  available_tools = figma_api_wrapper.get_available_tools()
99
102
  tools = []
@@ -4,7 +4,7 @@ import json
4
4
  import logging
5
5
  import re
6
6
  from enum import Enum
7
- from typing import Dict, Generator, Optional, Union
7
+ from typing import Dict, List, Generator, Optional, Union
8
8
 
9
9
  import requests
10
10
  from FigmaPy import FigmaPy
@@ -13,6 +13,7 @@ from langchain_core.tools import ToolException
13
13
  from pydantic import Field, PrivateAttr, create_model, model_validator, SecretStr
14
14
 
15
15
  from ..elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
16
+ from ..utils.content_parser import load_content_from_bytes
16
17
 
17
18
  GLOBAL_LIMIT = 10000
18
19
 
@@ -235,15 +236,27 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
235
236
  global_regexp: Optional[str] = Field(default=None)
236
237
  _client: Optional[FigmaPy] = PrivateAttr()
237
238
 
238
- def _base_loader(self, project_id: str, **kwargs) -> Generator[Document, None, None]:
239
- files = json.loads(self.get_project_files(project_id)).get('files', [])
240
- for file in files:
241
- yield Document(page_content=json.dumps(file), metadata={
242
- 'id': file.get('key', ''),
243
- 'file_key': file.get('key', ''),
244
- 'name': file.get('name', ''),
245
- 'updated_on': file.get('last_modified', '')
246
- })
239
+ def _base_loader(self, project_id: Optional[str] = None, file_keys: Optional[List[str]] = None, **kwargs) -> Generator[Document, None, None]:
240
+ files = []
241
+ if project_id:
242
+ files = json.loads(self.get_project_files(project_id)).get('files', [])
243
+ for file in files:
244
+ yield Document(page_content=json.dumps(file), metadata={
245
+ 'id': file.get('key', ''),
246
+ 'file_key': file.get('key', ''),
247
+ 'name': file.get('name', ''),
248
+ 'updated_on': file.get('last_modified', '')
249
+ })
250
+ elif file_keys:
251
+ for file_key in file_keys:
252
+ file = self._client.get_file(file_key)
253
+ metadata = {
254
+ 'id': file_key,
255
+ 'file_key': file_key,
256
+ 'name': file.name,
257
+ 'updated_on': file.last_modified
258
+ }
259
+ yield Document(page_content=json.dumps(metadata), metadata=metadata)
247
260
 
248
261
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
249
262
  file_key = document.metadata.get('id', '')
@@ -257,12 +270,19 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
257
270
 
258
271
  # iterate over images values
259
272
  for node_id, image_url in images.items():
273
+ if not image_url:
274
+ logging.warning(f"Image URL not found for node_id {node_id} in file {file_key}. Skipping.")
275
+ continue
260
276
  response = requests.get(image_url)
261
277
  if response.status_code == 200:
262
278
  content_type = response.headers.get('Content-Type', '')
263
279
  if 'text/html' not in content_type.lower():
280
+ extension = f".{content_type.split('/')[-1]}" if content_type.startswith('image') else '.txt'
281
+ page_content = load_content_from_bytes(
282
+ file_content=response.content,
283
+ extension=extension, llm = self.llm)
264
284
  yield Document(
265
- page_content=base64.b64encode(response.content).decode("utf-8"),
285
+ page_content=page_content,
266
286
  metadata={
267
287
  'file_key': file_key,
268
288
  'node_id': node_id,
@@ -273,7 +293,12 @@ class FigmaApiWrapper(BaseVectorStoreToolApiWrapper):
273
293
  def _index_tool_params(self):
274
294
  """Return the parameters for indexing data."""
275
295
  return {
276
- "project_id": (str, Field(description="ID of the project to list files from", examples=["55391681"]))
296
+ "project_id": (Optional[str], Field(
297
+ description="ID of the project to list files from: i.e. '55391681'",
298
+ default=None)),
299
+ 'file_keys': (Optional[List[str]], Field(
300
+ description="List of file keys to index: i.e. ['Fp24FuzPwH0L74ODSrCnQo', 'jmhAr6q78dJoMRqt48zisY']",
301
+ default=None))
277
302
  }
278
303
 
279
304
  def _send_request(
@@ -19,14 +19,10 @@ def _get_toolkit(tool) -> BaseToolkit:
19
19
  github_repository=tool['settings']['repository'],
20
20
  active_branch=tool['settings']['active_branch'],
21
21
  github_base_branch=tool['settings']['base_branch'],
22
- github_access_token=tool['settings'].get('github_configuration', {}).get('access_token', ''),
23
- github_username=tool['settings'].get('github_configuration', {}).get('username', ''),
24
- github_password=tool['settings'].get('github_configuration', {}).get('password', ''),
25
- github_app_id=tool['settings'].get('github_configuration', {}).get('app_id', None),
26
- github_app_private_key=tool['settings'].get('github_configuration', {}).get('app_private_key', None),
22
+ github_configuration=tool['settings']['github_configuration'],
27
23
  llm=tool['settings'].get('llm', None),
28
24
  alita=tool['settings'].get('alita', None),
29
- connection_string=tool['settings'].get('pgvector_configuration', {}).get('connection_string', None),
25
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
30
26
  collection_name=str(tool['toolkit_name']),
31
27
  doctype='code',
32
28
  embedding_model="HuggingFaceEmbeddings",
@@ -71,9 +67,6 @@ class AlitaGitHubToolkit(BaseToolkit):
71
67
  active_branch=(Optional[str], Field(description="Active branch", default="main")),
72
68
  base_branch=(Optional[str], Field(description="Github Base branch", default="main")),
73
69
  # indexer settings
74
- connection_string=(Optional[SecretStr], Field(description="Connection string for vectorstore",
75
- default=None,
76
- json_schema_extra={'secret': True})),
77
70
  embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
78
71
  embedding_model_params=(dict, Field(description="Embedding model parameters: i.e. `{'model_name': 'sentence-transformers/all-MiniLM-L6-v2'}", default={"model_name": "sentence-transformers/all-MiniLM-L6-v2"})),
79
72
  selected_tools=(List[Literal[tuple(selected_tools)]],
@@ -84,7 +77,13 @@ class AlitaGitHubToolkit(BaseToolkit):
84
77
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
85
78
  if selected_tools is None:
86
79
  selected_tools = []
87
- github_api_wrapper = AlitaGitHubAPIWrapper(**kwargs)
80
+ wrapper_payload = {
81
+ **kwargs,
82
+ # TODO use github_configuration fields
83
+ **kwargs['github_configuration'],
84
+ **(kwargs.get('pgvector_configuration') or {}),
85
+ }
86
+ github_api_wrapper = AlitaGitHubAPIWrapper(**wrapper_payload)
88
87
  available_tools: List[Dict] = github_api_wrapper.get_available_tools()
89
88
  tools = []
90
89
  prefix = clean_string(toolkit_name, AlitaGitHubToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
@@ -20,11 +20,11 @@ def get_tools(tool):
20
20
  url=tool['settings']['url'],
21
21
  repository=tool['settings']['repository'],
22
22
  branch=tool['settings']['branch'],
23
- private_token=tool['settings'].get('gitlab_configuration', {}).get('private_token', ''),
23
+ gitlab_configuration=tool['settings']['gitlab_configuration'],
24
24
 
25
25
  llm=tool['settings'].get('llm', None),
26
26
  alita=tool['settings'].get('alita', None),
27
- connection_string=tool['settings'].get('pgvector_configuration', {}).get('connection_string', None),
27
+ pgvector_configuration=tool['settings'].get('pgvector_configuration', {}),
28
28
  collection_name=str(tool['toolkit_name']),
29
29
  doctype='code',
30
30
  embedding_model="HuggingFaceEmbeddings",
@@ -49,8 +49,7 @@ class AlitaGitlabToolkit(BaseToolkit):
49
49
  gitlab_configuration=(Optional[GitlabConfiguration], Field(description="GitLab configuration", json_schema_extra={'configuration_types': ['gitlab']})),
50
50
  branch=(str, Field(description="Main branch", default="main")),
51
51
  # indexer settings
52
- pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector configuration", default=None,
53
- json_schema_extra={'configuration_types': ['pgvector']})),
52
+ pgvector_configuration=(Optional[PgVectorConfiguration], Field(description="PgVector Configuration", json_schema_extra={'configuration_types': ['pgvector']})),
54
53
  # embedder settings
55
54
  embedding_model=(str, Field(description="Embedding model: i.e. 'HuggingFaceEmbeddings', etc.", default="HuggingFaceEmbeddings")),
56
55
  embedding_model_params=(dict, Field(description="Embedding model parameters: i.e. `{'model_name': 'sentence-transformers/all-MiniLM-L6-v2'}", default={"model_name": "sentence-transformers/all-MiniLM-L6-v2"})),
@@ -80,7 +79,13 @@ class AlitaGitlabToolkit(BaseToolkit):
80
79
  def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
81
80
  if selected_tools is None:
82
81
  selected_tools = []
83
- gitlab_api_wrapper = GitLabAPIWrapper(**kwargs)
82
+ wrapper_payload = {
83
+ **kwargs,
84
+ # TODO use gitlab_configuration fields
85
+ **kwargs['gitlab_configuration'],
86
+ **(kwargs.get('pgvector_configuration') or {}),
87
+ }
88
+ gitlab_api_wrapper = GitLabAPIWrapper(**wrapper_payload)
84
89
  prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
85
90
  available_tools: List[Dict] = gitlab_api_wrapper.get_available_tools()
86
91
  tools = []