alita-sdk 0.3.528__py3-none-any.whl → 0.3.554__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of alita-sdk might be problematic. Click here for more details.

Files changed (46) hide show
  1. alita_sdk/community/__init__.py +8 -4
  2. alita_sdk/configurations/__init__.py +1 -0
  3. alita_sdk/configurations/openapi.py +111 -0
  4. alita_sdk/runtime/clients/client.py +185 -10
  5. alita_sdk/runtime/langchain/langraph_agent.py +2 -2
  6. alita_sdk/runtime/langchain/utils.py +46 -0
  7. alita_sdk/runtime/skills/__init__.py +91 -0
  8. alita_sdk/runtime/skills/callbacks.py +498 -0
  9. alita_sdk/runtime/skills/discovery.py +540 -0
  10. alita_sdk/runtime/skills/executor.py +610 -0
  11. alita_sdk/runtime/skills/input_builder.py +371 -0
  12. alita_sdk/runtime/skills/models.py +330 -0
  13. alita_sdk/runtime/skills/registry.py +355 -0
  14. alita_sdk/runtime/skills/skill_runner.py +330 -0
  15. alita_sdk/runtime/toolkits/__init__.py +2 -0
  16. alita_sdk/runtime/toolkits/skill_router.py +238 -0
  17. alita_sdk/runtime/toolkits/tools.py +76 -9
  18. alita_sdk/runtime/tools/__init__.py +3 -1
  19. alita_sdk/runtime/tools/artifact.py +70 -21
  20. alita_sdk/runtime/tools/image_generation.py +50 -44
  21. alita_sdk/runtime/tools/llm.py +363 -44
  22. alita_sdk/runtime/tools/loop.py +3 -1
  23. alita_sdk/runtime/tools/loop_output.py +3 -1
  24. alita_sdk/runtime/tools/skill_router.py +776 -0
  25. alita_sdk/runtime/tools/tool.py +3 -1
  26. alita_sdk/runtime/tools/vectorstore.py +7 -2
  27. alita_sdk/runtime/tools/vectorstore_base.py +7 -2
  28. alita_sdk/runtime/utils/AlitaCallback.py +2 -1
  29. alita_sdk/runtime/utils/utils.py +34 -0
  30. alita_sdk/tools/__init__.py +41 -1
  31. alita_sdk/tools/ado/work_item/ado_wrapper.py +33 -2
  32. alita_sdk/tools/base_indexer_toolkit.py +36 -24
  33. alita_sdk/tools/confluence/api_wrapper.py +5 -6
  34. alita_sdk/tools/confluence/loader.py +4 -2
  35. alita_sdk/tools/openapi/__init__.py +280 -120
  36. alita_sdk/tools/openapi/api_wrapper.py +883 -0
  37. alita_sdk/tools/openapi/tool.py +20 -0
  38. alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
  39. alita_sdk/tools/servicenow/__init__.py +9 -9
  40. alita_sdk/tools/servicenow/api_wrapper.py +1 -1
  41. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/METADATA +2 -2
  42. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/RECORD +46 -33
  43. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/WHEEL +0 -0
  44. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/entry_points.txt +0 -0
  45. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/licenses/LICENSE +0 -0
  46. {alita_sdk-0.3.528.dist-info → alita_sdk-0.3.554.dist-info}/top_level.txt +0 -0
@@ -86,7 +86,9 @@ Answer must be JSON only extractable by JSON.LOADS."""
86
86
  else:
87
87
  input_[-1].content += self.unstructured_output
88
88
  completion = self.client.invoke(input_, config=config)
89
- result = _extract_json(completion.content.strip())
89
+ from ..langchain.utils import extract_text_from_completion
90
+ content_text = extract_text_from_completion(completion)
91
+ result = _extract_json(content_text.strip())
90
92
  logger.info(f"ToolNode tool params: {result}")
91
93
  try:
92
94
  # handler for application added as a tool
@@ -12,9 +12,11 @@ from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapte
12
12
  from logging import getLogger
13
13
 
14
14
  from ..utils.logging import dispatch_custom_event
15
+ from ..langchain.utils import extract_text_from_completion
15
16
 
16
17
  logger = getLogger(__name__)
17
18
 
19
+
18
20
  class IndexDocumentsModel(BaseModel):
19
21
  documents: Any = Field(description="Generator of documents to index")
20
22
 
@@ -684,8 +686,10 @@ class VectorStoreWrapper(BaseToolApiWrapper):
684
686
  ]
685
687
  )
686
688
  ])
689
+ # Extract text content safely (handles both string and list content from thinking models)
690
+ search_query = extract_text_from_completion(result)
687
691
  search_results = self.search_documents(
688
- result.content, doctype, filter, cut_off, search_top,
692
+ search_query, doctype, filter, cut_off, search_top,
689
693
  full_text_search=full_text_search,
690
694
  reranking_config=reranking_config,
691
695
  extended_search=extended_search
@@ -714,7 +718,8 @@ class VectorStoreWrapper(BaseToolApiWrapper):
714
718
  ]
715
719
  )
716
720
  ])
717
- return result.content
721
+ # Extract text content safely (handles both string and list content from thinking models)
722
+ return extract_text_from_completion(result)
718
723
 
719
724
  def _log_data(self, message: str, tool_name: str = "index_data"):
720
725
  """Log data and dispatch custom event for indexing progress"""
@@ -13,9 +13,11 @@ from pydantic import BaseModel, model_validator, Field
13
13
  from alita_sdk.tools.elitea_base import BaseToolApiWrapper
14
14
  from alita_sdk.tools.vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
15
15
  from ...runtime.utils.utils import IndexerKeywords
16
+ from ...runtime.langchain.utils import extract_text_from_completion
16
17
 
17
18
  logger = getLogger(__name__)
18
19
 
20
+
19
21
  class IndexDocumentsModel(BaseModel):
20
22
  documents: Any = Field(description="Generator of documents to index")
21
23
 
@@ -625,8 +627,10 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
625
627
  ]
626
628
  )
627
629
  ])
630
+ # Extract text content safely (handles both string and list content from thinking models)
631
+ search_query = extract_text_from_completion(result)
628
632
  search_results = self.search_documents(
629
- result.content, doctype, filter, cut_off, search_top,
633
+ search_query, doctype, filter, cut_off, search_top,
630
634
  full_text_search=full_text_search,
631
635
  reranking_config=reranking_config,
632
636
  extended_search=extended_search
@@ -655,7 +659,8 @@ class VectorStoreWrapperBase(BaseToolApiWrapper):
655
659
  ]
656
660
  )
657
661
  ])
658
- return result.content
662
+ # Extract text content safely (handles both string and list content from thinking models)
663
+ return extract_text_from_completion(result)
659
664
 
660
665
  def get_available_tools(self):
661
666
  return [
@@ -313,7 +313,8 @@ class AlitaStreamlitCallback(BaseCallbackHandler):
313
313
  if self.debug:
314
314
  log.debug("on_llm_end(%s, %s)", response, kwargs)
315
315
  llm_run_id = str(run_id)
316
- if self.callback_state.get(llm_run_id):
316
+ # Check if callback_state exists and is not None before accessing
317
+ if self.callback_state is not None and self.callback_state.get(llm_run_id):
317
318
  status_widget = self.callback_state[llm_run_id]
318
319
  self._safe_streamlit_call(
319
320
  status_widget.update,
@@ -1,5 +1,8 @@
1
+ import base64
2
+ import logging
1
3
  import re
2
4
  from enum import Enum
5
+ from typing import Any
3
6
 
4
7
  # DEPRECATED: Tool names no longer use prefixes
5
8
  # Kept for backward compatibility only
@@ -32,3 +35,34 @@ def clean_node_str(s: str) -> str:
32
35
  """Cleans a node string by removing all non-alphanumeric characters except underscores and spaces."""
33
36
  cleaned_string = re.sub(r'[^\w\s]', '', s)
34
37
  return cleaned_string
38
+
39
+
40
+ def resolve_image_from_cache(client: Any, cached_image_id: str) -> bytes:
41
+ """
42
+ Resolve cached_image_id from client's image cache and return decoded binary data.
43
+
44
+ Args:
45
+ client: AlitaClient instance with _generated_images_cache attribute
46
+ cached_image_id: The cached image ID to resolve
47
+
48
+ Returns:
49
+ bytes: Decoded binary image data
50
+
51
+ Raises:
52
+ ValueError: If cached_image_id not found or decoding fails
53
+ """
54
+ cache = getattr(client, '_generated_images_cache', {})
55
+
56
+ if cached_image_id not in cache:
57
+ raise ValueError(f"Image reference '{cached_image_id}' not found. The image may have expired.")
58
+
59
+ cached_data = cache[cached_image_id]
60
+ base64_data = cached_data.get('base64_data', '')
61
+ logging.debug(f"Resolved cached_image_id '{cached_image_id}' from cache (length: {len(base64_data)} chars)")
62
+ # Decode base64 to binary data for image files
63
+ try:
64
+ binary_data = base64.b64decode(base64_data)
65
+ logging.debug(f"Decoded base64 to binary data ({len(binary_data)} bytes)")
66
+ return binary_data
67
+ except Exception as e:
68
+ raise ValueError(f"Failed to decode image data for '{cached_image_id}': {e}")
@@ -49,6 +49,9 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
49
49
  if hasattr(module, 'get_toolkit'):
50
50
  imported['get_toolkit'] = getattr(module, 'get_toolkit')
51
51
 
52
+ if hasattr(module, 'get_toolkit_available_tools'):
53
+ imported['get_toolkit_available_tools'] = getattr(module, 'get_toolkit_available_tools')
54
+
52
55
  if toolkit_class_name and hasattr(module, toolkit_class_name):
53
56
  imported['toolkit_class'] = getattr(module, toolkit_class_name)
54
57
  AVAILABLE_TOOLKITS[toolkit_class_name] = getattr(module, toolkit_class_name)
@@ -64,7 +67,7 @@ def _safe_import_tool(tool_name, module_path, get_tools_name=None, toolkit_class
64
67
 
65
68
  # Safe imports for all tools
66
69
  _safe_import_tool('github', 'github', 'get_tools', 'AlitaGitHubToolkit')
67
- _safe_import_tool('openapi', 'openapi', 'get_tools')
70
+ _safe_import_tool('openapi', 'openapi', 'get_tools', 'AlitaOpenAPIToolkit')
68
71
  _safe_import_tool('jira', 'jira', 'get_tools', 'JiraToolkit')
69
72
  _safe_import_tool('confluence', 'confluence', 'get_tools', 'ConfluenceToolkit')
70
73
  _safe_import_tool('service_now', 'servicenow', 'get_tools', 'ServiceNowToolkit')
@@ -240,6 +243,42 @@ def get_available_toolkit_models():
240
243
  """Return dict with available toolkit classes."""
241
244
  return deepcopy(AVAILABLE_TOOLS)
242
245
 
246
+
247
+ def get_toolkit_available_tools(toolkit_type: str, settings: dict) -> dict:
248
+ """Return dynamic available tools + per-tool JSON schemas for a toolkit instance.
249
+
250
+ This is the single SDK entrypoint used by backend services (e.g. indexer_worker)
251
+ when the UI needs spec/instance-dependent tool enumeration. Toolkits that don't
252
+ support dynamic enumeration should return an empty payload.
253
+
254
+ Args:
255
+ toolkit_type: toolkit type string (e.g. 'openapi')
256
+ settings: persisted toolkit settings
257
+
258
+ Returns:
259
+ {
260
+ "tools": [{"name": str, "description": str}],
261
+ "args_schemas": {"tool_name": <json schema dict>}
262
+ }
263
+ """
264
+ toolkit_type = (toolkit_type or '').strip().lower()
265
+ if not isinstance(settings, dict):
266
+ settings = {}
267
+
268
+ tool_module = AVAILABLE_TOOLS.get(toolkit_type) or {}
269
+ enumerator = tool_module.get('get_toolkit_available_tools')
270
+ if not callable(enumerator):
271
+ return {"tools": [], "args_schemas": {}}
272
+
273
+ try:
274
+ result = enumerator(settings)
275
+ if not isinstance(result, dict):
276
+ return {"tools": [], "args_schemas": {}, "error": "Invalid response from toolkit enumerator"}
277
+ return result
278
+ except Exception as e: # pylint: disable=W0718
279
+ logger.exception("Failed to compute available tools for toolkit_type=%s", toolkit_type)
280
+ return {"tools": [], "args_schemas": {}, "error": str(e)}
281
+
243
282
  def diagnose_imports():
244
283
  """Print diagnostic information about tool imports."""
245
284
  available_count = len(AVAILABLE_TOOLS)
@@ -276,6 +315,7 @@ def diagnose_imports():
276
315
  __all__ = [
277
316
  'get_tools',
278
317
  'get_toolkits',
318
+ 'get_toolkit_available_tools',
279
319
  'get_available_tools',
280
320
  'get_failed_imports',
281
321
  'get_available_toolkits',
@@ -576,9 +576,40 @@ class AzureDevOpsApiWrapper(NonCodeIndexerToolkit):
576
576
  return b"".join(content_generator)
577
577
 
578
578
  def _process_document(self, document: Document) -> Generator[Document, None, None]:
579
- for attachment_id, file_name in document.metadata.get('attachment_ids', {}).items():
579
+ raw_attachment_ids = document.metadata.get('attachment_ids', {})
580
+
581
+ # Normalize attachment_ids: accept dict or JSON string, raise otherwise
582
+ if isinstance(raw_attachment_ids, str):
583
+ try:
584
+ loaded = json.loads(raw_attachment_ids)
585
+ except json.JSONDecodeError:
586
+ raise TypeError(
587
+ f"Expected dict or JSON string for 'attachment_ids', got non-JSON string for id="
588
+ f"{document.metadata.get('id')}: {raw_attachment_ids!r}"
589
+ )
590
+ if not isinstance(loaded, dict):
591
+ raise TypeError(
592
+ f"'attachment_ids' JSON did not decode to dict for id={document.metadata.get('id')}: {loaded!r}"
593
+ )
594
+ attachment_ids = loaded
595
+ elif isinstance(raw_attachment_ids, dict):
596
+ attachment_ids = raw_attachment_ids
597
+ else:
598
+ raise TypeError(
599
+ f"Expected 'attachment_ids' to be dict or JSON string, got {type(raw_attachment_ids)} "
600
+ f"for id={document.metadata.get('id')}: {raw_attachment_ids!r}"
601
+ )
602
+
603
+ for attachment_id, file_name in attachment_ids.items():
580
604
  content = self.get_attachment_content(attachment_id=attachment_id)
581
- yield Document(page_content="", metadata={'id': attachment_id, IndexerKeywords.CONTENT_FILE_NAME.value: file_name, IndexerKeywords.CONTENT_IN_BYTES.value: content})
605
+ yield Document(
606
+ page_content="",
607
+ metadata={
608
+ 'id': attachment_id,
609
+ IndexerKeywords.CONTENT_FILE_NAME.value: file_name,
610
+ IndexerKeywords.CONTENT_IN_BYTES.value: content,
611
+ },
612
+ )
582
613
 
583
614
  def _index_tool_params(self):
584
615
  """Return the parameters for indexing data."""
@@ -29,12 +29,6 @@ class IndexTools(str, Enum):
29
29
  REMOVE_INDEX = "remove_index"
30
30
  LIST_COLLECTIONS = "list_collections"
31
31
 
32
- # Base Vector Store Schema Models
33
- BaseIndexParams = create_model(
34
- "BaseIndexParams",
35
- index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
36
- )
37
-
38
32
  RemoveIndexParams = create_model(
39
33
  "RemoveIndexParams",
40
34
  index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
@@ -101,16 +95,6 @@ BaseStepbackSearchParams = create_model(
101
95
  )),
102
96
  )
103
97
 
104
- BaseIndexDataParams = create_model(
105
- "indexData",
106
- __base__=BaseIndexParams,
107
- clean_index=(Optional[bool], Field(default=False,
108
- description="Optional flag to enforce clean existing index before indexing new data")),
109
- progress_step=(Optional[int], Field(default=10, ge=0, le=100,
110
- description="Optional step size for progress reporting during indexing")),
111
- chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default=loaders_allowed_to_override)),
112
- )
113
-
114
98
 
115
99
  class BaseIndexerToolkit(VectorStoreWrapperBase):
116
100
  """Base class for tool API wrappers that support vector store functionality."""
@@ -236,7 +220,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
236
220
  self._log_tool_event(f"Dependent documents were processed. "
237
221
  f"Applying chunking tool '{chunking_tool}' if specified and preparing documents for indexing...")
238
222
  documents = self._apply_loaders_chunkers(documents, chunking_tool, chunking_config)
239
- self._clean_metadata(documents)
223
+ documents = self._clean_metadata(documents)
240
224
 
241
225
  logger.debug(f"Indexing base document #{base_doc_counter}: {base_doc} and all dependent documents: {documents}")
242
226
 
@@ -670,21 +654,49 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
670
654
  """
671
655
  Returns the standardized vector search tools (search operations only).
672
656
  Index operations are toolkit-specific and should be added manually to each toolkit.
673
-
657
+
658
+ This method constructs the argument schemas for each tool, merging base parameters with any extra parameters
659
+ defined in the subclass. It also handles the special case for chunking tools and their configuration.
660
+
674
661
  Returns:
675
- List of tool dictionaries with name, ref, description, and args_schema
662
+ list: List of tool dictionaries with name, ref, description, and args_schema.
676
663
  """
664
+ index_params = {
665
+ "index_name": (
666
+ str,
667
+ Field(description="Index name (max 7 characters)", min_length=1, max_length=7)
668
+ ),
669
+ "clean_index": (
670
+ Optional[bool],
671
+ Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")
672
+ ),
673
+ "progress_step": (
674
+ Optional[int],
675
+ Field(default=10, ge=0, le=100, description="Optional step size for progress reporting during indexing")
676
+ ),
677
+ }
678
+ chunking_config = (
679
+ Optional[dict],
680
+ Field(description="Chunking tool configuration", default=loaders_allowed_to_override)
681
+ )
682
+
683
+ index_extra_params = self._index_tool_params() or {}
684
+ chunking_tool = index_extra_params.pop("chunking_tool", None)
685
+ if chunking_tool:
686
+ index_params = {
687
+ **index_params,
688
+ "chunking_tool": chunking_tool,
689
+ }
690
+ index_params["chunking_config"] = chunking_config
691
+ index_args_schema = create_model("IndexData", **index_params, **index_extra_params)
692
+
677
693
  return [
678
694
  {
679
695
  "name": IndexTools.INDEX_DATA.value,
680
696
  "mode": IndexTools.INDEX_DATA.value,
681
697
  "ref": self.index_data,
682
698
  "description": "Loads data to index.",
683
- "args_schema": create_model(
684
- "IndexData",
685
- __base__=BaseIndexDataParams,
686
- **self._index_tool_params() if self._index_tool_params() else {}
687
- )
699
+ "args_schema": index_args_schema,
688
700
  },
689
701
  {
690
702
  "name": IndexTools.SEARCH_INDEX.value,
@@ -962,6 +962,9 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
962
962
  created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
963
963
  last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
964
964
 
965
+ attachment_path = attachment['_links']['download'] if attachment.get(
966
+ '_links', {}).get('download') else ''
967
+ download_url = self.client.url.rstrip('/') + attachment_path
965
968
  metadata = {
966
969
  'name': title,
967
970
  'size': attachment.get('extensions', {}).get('fileSize', None),
@@ -971,14 +974,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
971
974
  'media_type': media_type,
972
975
  'labels': [label['name'] for label in
973
976
  attachment.get('metadata', {}).get('labels', {}).get('results', [])],
974
- 'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
975
- '_links', {}).get('download') else None
977
+ 'download_url': download_url
976
978
  }
977
-
978
- download_url = self.base_url.rstrip('/') + attachment['_links']['download']
979
-
980
979
  try:
981
- resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
980
+ resp = self.client.request(method="GET", path=attachment_path, advanced_mode=True)
982
981
  if resp.status_code == 200:
983
982
  content = resp.content
984
983
  else:
@@ -48,7 +48,8 @@ class AlitaConfluenceLoader(ConfluenceLoader):
48
48
  del kwargs[key]
49
49
  except:
50
50
  pass
51
- self.base_url = kwargs.get('url')
51
+ # utilize adjusted URL from Confluence instance for base_url
52
+ self.base_url = confluence_client.url
52
53
  self.space_key = kwargs.get('space_key')
53
54
  self.page_ids = kwargs.get('page_ids')
54
55
  self.label = kwargs.get('label')
@@ -108,7 +109,8 @@ class AlitaConfluenceLoader(ConfluenceLoader):
108
109
  texts = []
109
110
  for attachment in attachments:
110
111
  media_type = attachment["metadata"]["mediaType"]
111
- absolute_url = self.base_url + attachment["_links"]["download"]
112
+ # utilize adjusted URL from Confluence instance for attachment download URL
113
+ absolute_url = self.confluence.url + attachment["_links"]["download"]
112
114
  title = attachment["title"]
113
115
  try:
114
116
  if media_type == "application/pdf":