PyPI - alita-sdk - Versions diffs - 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl - Mend

alita-sdk 0.3.257py3-none-any.whl → 0.3.584py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (281) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +155 -0
alita_sdk/cli/agent_loader.py +215 -0
alita_sdk/cli/agent_ui.py +228 -0
alita_sdk/cli/agents.py +3794 -0
alita_sdk/cli/callbacks.py +647 -0
alita_sdk/cli/cli.py +168 -0
alita_sdk/cli/config.py +306 -0
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1073 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +327 -0
alita_sdk/cli/toolkit_loader.py +85 -0
alita_sdk/cli/tools/__init__.py +43 -0
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +1751 -0
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +72 -12
alita_sdk/community/inventory/__init__.py +236 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/toolkit_utils.py +176 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/__init__.py +11 -0
alita_sdk/configurations/ado.py +148 -2
alita_sdk/configurations/azure_search.py +1 -1
alita_sdk/configurations/bigquery.py +1 -1
alita_sdk/configurations/bitbucket.py +94 -2
alita_sdk/configurations/browser.py +18 -0
alita_sdk/configurations/carrier.py +19 -0
alita_sdk/configurations/confluence.py +130 -1
alita_sdk/configurations/delta_lake.py +1 -1
alita_sdk/configurations/figma.py +76 -5
alita_sdk/configurations/github.py +65 -1
alita_sdk/configurations/gitlab.py +81 -0
alita_sdk/configurations/google_places.py +17 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/openapi.py +323 -0
alita_sdk/configurations/postman.py +1 -1
alita_sdk/configurations/qtest.py +72 -3
alita_sdk/configurations/report_portal.py +115 -0
alita_sdk/configurations/salesforce.py +19 -0
alita_sdk/configurations/service_now.py +1 -12
alita_sdk/configurations/sharepoint.py +167 -0
alita_sdk/configurations/sonar.py +18 -0
alita_sdk/configurations/sql.py +20 -0
alita_sdk/configurations/testio.py +101 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +94 -1
alita_sdk/configurations/zephyr_enterprise.py +94 -1
alita_sdk/configurations/zephyr_essential.py +95 -0
alita_sdk/runtime/clients/artifact.py +21 -4
alita_sdk/runtime/clients/client.py +458 -67
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +352 -0
alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
alita_sdk/runtime/langchain/assistant.py +183 -43
alita_sdk/runtime/langchain/constants.py +647 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
alita_sdk/runtime/langchain/langraph_agent.py +493 -105
alita_sdk/runtime/langchain/utils.py +118 -8
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/skills/__init__.py +91 -0
alita_sdk/runtime/skills/callbacks.py +498 -0
alita_sdk/runtime/skills/discovery.py +540 -0
alita_sdk/runtime/skills/executor.py +610 -0
alita_sdk/runtime/skills/input_builder.py +371 -0
alita_sdk/runtime/skills/models.py +330 -0
alita_sdk/runtime/skills/registry.py +355 -0
alita_sdk/runtime/skills/skill_runner.py +330 -0
alita_sdk/runtime/toolkits/__init__.py +28 -0
alita_sdk/runtime/toolkits/application.py +14 -4
alita_sdk/runtime/toolkits/artifact.py +25 -9
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +782 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/skill_router.py +238 -0
alita_sdk/runtime/toolkits/subgraph.py +11 -6
alita_sdk/runtime/toolkits/tools.py +314 -70
alita_sdk/runtime/toolkits/vectorstore.py +11 -5
alita_sdk/runtime/tools/__init__.py +24 -0
alita_sdk/runtime/tools/application.py +16 -4
alita_sdk/runtime/tools/artifact.py +367 -33
alita_sdk/runtime/tools/data_analysis.py +183 -0
alita_sdk/runtime/tools/function.py +100 -4
alita_sdk/runtime/tools/graph.py +81 -0
alita_sdk/runtime/tools/image_generation.py +218 -0
alita_sdk/runtime/tools/llm.py +1032 -177
alita_sdk/runtime/tools/loop.py +3 -1
alita_sdk/runtime/tools/loop_output.py +3 -1
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -1
alita_sdk/runtime/tools/sandbox.py +375 -0
alita_sdk/runtime/tools/skill_router.py +776 -0
alita_sdk/runtime/tools/tool.py +3 -1
alita_sdk/runtime/tools/vectorstore.py +69 -65
alita_sdk/runtime/tools/vectorstore_base.py +163 -90
alita_sdk/runtime/utils/AlitaCallback.py +137 -21
alita_sdk/runtime/utils/constants.py +5 -1
alita_sdk/runtime/utils/mcp_client.py +492 -0
alita_sdk/runtime/utils/mcp_oauth.py +361 -0
alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/streamlit.py +41 -14
alita_sdk/runtime/utils/toolkit_utils.py +28 -9
alita_sdk/runtime/utils/utils.py +48 -0
alita_sdk/tools/__init__.py +135 -37
alita_sdk/tools/ado/__init__.py +2 -2
alita_sdk/tools/ado/repos/__init__.py +16 -19
alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
alita_sdk/tools/ado/test_plan/__init__.py +27 -8
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
alita_sdk/tools/ado/wiki/__init__.py +28 -12
alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
alita_sdk/tools/ado/work_item/__init__.py +28 -12
alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
alita_sdk/tools/aws/delta_lake/tool.py +5 -1
alita_sdk/tools/azure_ai/search/__init__.py +14 -8
alita_sdk/tools/base/tool.py +5 -1
alita_sdk/tools/base_indexer_toolkit.py +454 -110
alita_sdk/tools/bitbucket/__init__.py +28 -19
alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
alita_sdk/tools/browser/__init__.py +41 -16
alita_sdk/tools/browser/crawler.py +3 -1
alita_sdk/tools/browser/utils.py +15 -6
alita_sdk/tools/carrier/__init__.py +18 -17
alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
alita_sdk/tools/carrier/excel_reporter.py +8 -4
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/code/codeparser.py +1 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +12 -7
alita_sdk/tools/cloud/azure/__init__.py +12 -7
alita_sdk/tools/cloud/gcp/__init__.py +12 -7
alita_sdk/tools/cloud/k8s/__init__.py +12 -7
alita_sdk/tools/code/linter/__init__.py +10 -8
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +21 -13
alita_sdk/tools/code_indexer_toolkit.py +199 -0
alita_sdk/tools/confluence/__init__.py +22 -14
alita_sdk/tools/confluence/api_wrapper.py +197 -58
alita_sdk/tools/confluence/loader.py +14 -2
alita_sdk/tools/custom_open_api/__init__.py +12 -5
alita_sdk/tools/elastic/__init__.py +11 -8
alita_sdk/tools/elitea_base.py +546 -64
alita_sdk/tools/figma/__init__.py +60 -11
alita_sdk/tools/figma/api_wrapper.py +1400 -167
alita_sdk/tools/figma/figma_client.py +73 -0
alita_sdk/tools/figma/toon_tools.py +2748 -0
alita_sdk/tools/github/__init__.py +18 -17
alita_sdk/tools/github/api_wrapper.py +9 -26
alita_sdk/tools/github/github_client.py +81 -12
alita_sdk/tools/github/schemas.py +2 -1
alita_sdk/tools/github/tool.py +5 -1
alita_sdk/tools/gitlab/__init__.py +19 -13
alita_sdk/tools/gitlab/api_wrapper.py +256 -80
alita_sdk/tools/gitlab_org/__init__.py +14 -10
alita_sdk/tools/google/bigquery/__init__.py +14 -13
alita_sdk/tools/google/bigquery/tool.py +5 -1
alita_sdk/tools/google_places/__init__.py +21 -11
alita_sdk/tools/jira/__init__.py +22 -11
alita_sdk/tools/jira/api_wrapper.py +315 -168
alita_sdk/tools/keycloak/__init__.py +11 -8
alita_sdk/tools/localgit/__init__.py +9 -3
alita_sdk/tools/localgit/local_git.py +62 -54
alita_sdk/tools/localgit/tool.py +5 -1
alita_sdk/tools/memory/__init__.py +38 -14
alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
alita_sdk/tools/ocr/__init__.py +11 -8
alita_sdk/tools/openapi/__init__.py +491 -106
alita_sdk/tools/openapi/api_wrapper.py +1357 -0
alita_sdk/tools/openapi/tool.py +20 -0
alita_sdk/tools/pandas/__init__.py +20 -12
alita_sdk/tools/pandas/api_wrapper.py +40 -45
alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
alita_sdk/tools/postman/__init__.py +11 -11
alita_sdk/tools/postman/api_wrapper.py +19 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +11 -10
alita_sdk/tools/qtest/__init__.py +22 -14
alita_sdk/tools/qtest/api_wrapper.py +1784 -88
alita_sdk/tools/rally/__init__.py +13 -10
alita_sdk/tools/report_portal/__init__.py +23 -16
alita_sdk/tools/salesforce/__init__.py +22 -16
alita_sdk/tools/servicenow/__init__.py +21 -16
alita_sdk/tools/servicenow/api_wrapper.py +1 -1
alita_sdk/tools/sharepoint/__init__.py +17 -14
alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +13 -8
alita_sdk/tools/sql/__init__.py +22 -19
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +21 -13
alita_sdk/tools/testrail/__init__.py +13 -11
alita_sdk/tools/testrail/api_wrapper.py +214 -46
alita_sdk/tools/utils/__init__.py +28 -4
alita_sdk/tools/utils/content_parser.py +241 -55
alita_sdk/tools/utils/text_operations.py +254 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
alita_sdk/tools/xray/__init__.py +18 -14
alita_sdk/tools/xray/api_wrapper.py +58 -113
alita_sdk/tools/yagmail/__init__.py +9 -3
alita_sdk/tools/zephyr/__init__.py +12 -7
alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
alita_sdk/tools/zephyr_essential/__init__.py +16 -10
alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
alita_sdk/tools/zephyr_essential/client.py +6 -4
alita_sdk/tools/zephyr_scale/__init__.py +13 -8
alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
alita_sdk/tools/zephyr_squad/__init__.py +12 -7
{alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
alita_sdk-0.3.584.dist-info/RECORD +452 -0
alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
alita_sdk/tools/bitbucket/tools.py +0 -304
alita_sdk-0.3.257.dist-info/RECORD +0 -343
{alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/artifact.py CHANGED Viewed

@@ -1,23 +1,33 @@
+import base64
 import hashlib
+import io
 import json
 import logging
+import re
 from typing import Any, Optional, Generator, List
+from langchain_core.callbacks import dispatch_custom_event
 from langchain_core.documents import Document
 from langchain_core.tools import ToolException
+from openpyxl.workbook.workbook import Workbook
 from pydantic import create_model, Field, model_validator
-from alita_sdk.tools.elitea_base import BaseVectorStoreToolApiWrapper, extend_with_vector_tools
+from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
+from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
+from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
+from ...runtime.utils.utils import IndexerKeywords, resolve_image_from_cache
-try:
-    from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
-except ImportError:
-    from alita_sdk.langchain.interfaces.llm_processor import get_embeddings
-class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
+class ArtifactWrapper(NonCodeIndexerToolkit):
     bucket: str
     artifact: Optional[Any] = None
+    # Import file operation methods from BaseCodeToolApiWrapper
+    read_file_chunk = BaseCodeToolApiWrapper.read_file_chunk
+    read_multiple_files = BaseCodeToolApiWrapper.read_multiple_files
+    search_file = BaseCodeToolApiWrapper.search_file
+    edit_file = BaseCodeToolApiWrapper.edit_file
     @model_validator(mode='before')
     @classmethod
     def validate_toolkit(cls, values):
@@ -26,13 +36,152 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
         if not values.get('bucket'):
             raise ValueError("Bucket is required.")
         values["artifact"] = values['alita'].artifact(values['bucket'])
-        return values
+        return super().validate_toolkit(values)
     def list_files(self, bucket_name = None, return_as_string = True):
-        return self.artifact.list(bucket_name, return_as_string)
+        """List all files in the artifact bucket with API download links."""
+        result = self.artifact.list(bucket_name, return_as_string=False)
+        # Add API download link to each file
+        if isinstance(result, dict) and 'rows' in result:
+            bucket = bucket_name or self.bucket
+            # Get base_url and project_id from alita client
+            base_url = getattr(self.alita, 'base_url', '').rstrip('/')
+            project_id = getattr(self.alita, 'project_id', '')
+            for file_info in result['rows']:
+                if 'name' in file_info:
+                    # Generate API download link
+                    file_name = file_info['name']
+                    file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
+        return str(result) if return_as_string else result
     def create_file(self, filename: str, filedata: str, bucket_name = None):
-        return self.artifact.create(filename, filedata, bucket_name)
+        # Sanitize filename to prevent regex errors during indexing
+        sanitized_filename, was_modified = self._sanitize_filename(filename)
+        if was_modified:
+            logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
+        # Auto-detect and extract base64 from image_url structures (from image_generation tool)
+        # Returns tuple: (processed_data, is_from_image_generation)
+        filedata, is_from_image_generation = self._extract_base64_if_needed(filedata)
+        if sanitized_filename.endswith(".xlsx"):
+            data = json.loads(filedata)
+            filedata = self.create_xlsx_filedata(data)
+        result = self.artifact.create(sanitized_filename, filedata, bucket_name)
+        # Skip file_modified event for images from image_generation tool
+        # These are already tracked in the tool output and don't need duplicate events
+        if not is_from_image_generation:
+            # Dispatch custom event for file creation
+            dispatch_custom_event("file_modified", {
+                "message": f"File '{filename}' created successfully",
+                "filename": filename,
+                "tool_name": "createFile",
+                "toolkit": "artifact",
+                "operation_type": "create",
+                "meta": {
+                    "bucket": bucket_name or self.bucket
+                }
+            })
+        return result
+    @staticmethod
+    def _sanitize_filename(filename: str) -> tuple:
+        """Sanitize filename for safe storage and regex pattern matching."""
+        from pathlib import Path
+        if not filename or not filename.strip():
+            return "unnamed_file", True
+        original = filename
+        path_obj = Path(filename)
+        name = path_obj.stem
+        extension = path_obj.suffix
+        # Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
+        sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
+        sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
+        sanitized_name = sanitized_name.strip('-').strip()
+        if not sanitized_name:
+            sanitized_name = "file"
+        if extension:
+            extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
+        sanitized = sanitized_name + extension
+        return sanitized, (sanitized != original)
+    def _extract_base64_if_needed(self, filedata: str) -> tuple[str | bytes, bool]:
+        """
+        Resolve cached_image_id references from cache and decode to binary data.
+        Requires JSON format with cached_image_id field: {"cached_image_id": "img_xxx"}
+        LLM must extract specific cached_image_id from generate_image response.
+        Returns:
+            tuple: (processed_data, is_from_image_generation)
+                - processed_data: Original filedata or resolved binary image data
+                - is_from_image_generation: True if data came from image_generation cache
+        """
+        if not filedata or not isinstance(filedata, str):
+            return filedata, False
+        # Require JSON format - fail fast if not JSON
+        if '{' not in filedata:
+            return filedata, False
+        try:
+            data = json.loads(filedata)
+        except json.JSONDecodeError:
+            # Not valid JSON, return as-is (regular file content)
+            return filedata, False
+        if not isinstance(data, dict):
+            return filedata, False
+        # Only accept direct cached_image_id format: {"cached_image_id": "img_xxx"}
+        # LLM must parse generate_image response and extract specific cached_image_id
+        if 'cached_image_id' in data:
+            binary_data = resolve_image_from_cache(self.alita, data['cached_image_id'])
+            return binary_data, True  # Mark as from image_generation
+        # If JSON doesn't have cached_image_id, treat as regular file content
+        return filedata, False
+    def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
+        try:
+            workbook = Workbook()
+            first_sheet = True
+            for sheet_name, sheet_data in data.items():
+                if first_sheet:
+                    sheet = workbook.active
+                    sheet.title = sheet_name
+                    first_sheet = False
+                else:
+                    sheet = workbook.create_sheet(title=sheet_name)
+                for row in sheet_data:
+                    sheet.append(row)
+            file_buffer = io.BytesIO()
+            workbook.save(file_buffer)
+            file_buffer.seek(0)
+            return file_buffer.read()
+        except json.JSONDecodeError:
+            raise ValueError("Invalid JSON format for .xlsx file data.")
+        except Exception as e:
+            raise ValueError(f"Error processing .xlsx file data: {e}")
     def read_file(self,
                   filename: str,
@@ -48,31 +197,187 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
                                   sheet_name=sheet_name,
                                   excel_by_sheets=excel_by_sheets,
                                   llm=self.llm)
+    def _read_file(
+        self,
+        file_path: str,
+        branch: str = None,
+        bucket_name: str = None,
+        **kwargs
+    ) -> str:
+        """
+        Read a file from artifact bucket with optional partial read support.
+        Parameters:
+            file_path: Name of the file in the bucket
+            branch: Not used for artifacts (kept for API consistency)
+            bucket_name: Name of the bucket (uses default if None)
+            **kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
+                     partial read handled client-side by base class methods
+        Returns:
+            File content as string
+        """
+        return self.read_file(filename=file_path, bucket_name=bucket_name)
+    def _write_file(
+        self,
+        file_path: str,
+        content: str,
+        branch: str = None,
+        commit_message: str = None,
+        bucket_name: str = None
+    ) -> str:
+        """
+        Write content to a file (create or overwrite).
+        Parameters:
+            file_path: Name of the file in the bucket
+            content: New file content
+            branch: Not used for artifacts (kept for API consistency)
+            commit_message: Not used for artifacts (kept for API consistency)
+            bucket_name: Name of the bucket (uses default if None)
+        Returns:
+            Success message
+        """
+        try:
+            # Sanitize filename
+            sanitized_filename, was_modified = self._sanitize_filename(file_path)
+            if was_modified:
+                logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
+            # Check if file exists
+            try:
+                self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
+                # File exists, overwrite it
+                result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
+                # Dispatch custom event
+                dispatch_custom_event("file_modified", {
+                    "message": f"File '{sanitized_filename}' updated successfully",
+                    "filename": sanitized_filename,
+                    "tool_name": "edit_file",
+                    "toolkit": "artifact",
+                    "operation_type": "modify",
+                    "meta": {
+                        "bucket": bucket_name or self.bucket
+                    }
+                })
+                return f"Updated file {sanitized_filename}"
+            except:
+                # File doesn't exist, create it
+                result = self.artifact.create(sanitized_filename, content, bucket_name)
+                # Dispatch custom event
+                dispatch_custom_event("file_modified", {
+                    "message": f"File '{sanitized_filename}' created successfully",
+                    "filename": sanitized_filename,
+                    "tool_name": "edit_file",
+                    "toolkit": "artifact",
+                    "operation_type": "create",
+                    "meta": {
+                        "bucket": bucket_name or self.bucket
+                    }
+                })
+                return f"Created file {sanitized_filename}"
+        except Exception as e:
+            raise ToolException(f"Unable to write file {file_path}: {str(e)}")
     def delete_file(self, filename: str, bucket_name = None):
         return self.artifact.delete(filename, bucket_name)
     def append_data(self, filename: str, filedata: str, bucket_name = None):
-        return self.artifact.append(filename, filedata, bucket_name)
+        result = self.artifact.append(filename, filedata, bucket_name)
+        # Dispatch custom event for file append
+        dispatch_custom_event("file_modified", {
+            "message": f"Data appended to file '{filename}' successfully",
+            "filename": filename,
+            "tool_name": "appendData",
+            "toolkit": "artifact",
+            "operation_type": "modify",
+            "meta": {
+                "bucket": bucket_name or self.bucket
+            }
+        })
+        return result
     def overwrite_data(self, filename: str, filedata: str, bucket_name = None):
-        return self.artifact.overwrite(filename, filedata, bucket_name)
+        result = self.artifact.overwrite(filename, filedata, bucket_name)
+        # Dispatch custom event for file overwrite
+        dispatch_custom_event("file_modified", {
+            "message": f"File '{filename}' overwritten successfully",
+            "filename": filename,
+            "tool_name": "overwriteData",
+            "toolkit": "artifact",
+            "operation_type": "modify",
+            "meta": {
+                "bucket": bucket_name or self.bucket
+            }
+        })
+        return result
     def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
-        return self.artifact.client.create_bucket(bucket_name, expiration_measure, expiration_value)
+        # Sanitize bucket name: replace underscores with hyphens and ensure lowercase
+        sanitized_name = bucket_name.replace('_', '-').lower()
+        if sanitized_name != bucket_name:
+            logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
+        return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
+    def _index_tool_params(self):
+        return {
+            'include_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to include when processing: i.e. ['*.png', '*.jpg']. "
+                            "If empty, all files will be processed (except skip_extensions).",
+                default=[])),
+            'skip_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to skip when processing: i.e. ['*.png', '*.jpg']",
+                default=[])),
+        }
     def _base_loader(self, **kwargs) -> Generator[Document, None, None]:
+        self._log_tool_event(message=f"Loading the files from artifact's bucket. {kwargs=}", tool_name="loader")
         try:
-            all_files = self.list_files(self.bucket, False)
+            all_files = self.list_files(self.bucket, False)['rows']
         except Exception as e:
             raise ToolException(f"Unable to extract files: {e}")
-        for file in all_files['rows']:
+        include_extensions = kwargs.get('include_extensions', [])
+        skip_extensions = kwargs.get('skip_extensions', [])
+        self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
+                                     f"Skip extensions: {skip_extensions}", tool_name="loader")
+        # show the progress of filtering
+        total_files = len(all_files) if isinstance(all_files, list) else 0
+        filtered_files_count = 0
+        for file in all_files:
+            filtered_files_count += 1
+            if filtered_files_count % 10 == 0 or filtered_files_count == total_files:
+                self._log_tool_event(message=f"Files filtering progress: {filtered_files_count}/{total_files}",
+                                     tool_name="loader")
+            file_name = file['name']
+            # Check if file should be skipped based on skip_extensions
+            if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
+                   for pattern in skip_extensions):
+                continue
+            # Check if file should be included based on include_extensions
+            # If include_extensions is empty, process all files (that weren't skipped)
+            if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
+                                               for pattern in include_extensions)):
+                continue
             metadata = {
                 ("updated_on" if k == "modified" else k): str(v)
                 for k, v in file.items()
             }
-            metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file['name'])
+            metadata['id'] = self.get_hash_from_bucket_and_file_name(self.bucket, file_name)
             yield Document(page_content="", metadata=metadata)
     def get_hash_from_bucket_and_file_name(self, bucket, file_name):
@@ -81,27 +386,28 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
         hasher.update(file_name.encode('utf-8'))
         return hasher.hexdigest()
-    def _process_document(self, document: Document) -> Generator[Document, None, None]:
-        try:
-            page_content = self.read_file(document.metadata['name'], is_capture_image=True, excel_by_sheets=True)
-        except Exception as e:
-            logging.error(f"Failed while parsing the file 'document.metadata['Path']': {e}")
-        if isinstance(page_content, dict):
-            for key, value in page_content.items():
-                metadata = document.metadata
-                metadata['page'] = key
-                yield Document(page_content=str(value), metadata=metadata)
-        else:
-            document.page_content = json.dumps(str(page_content))
-    @extend_with_vector_tools
+    def _extend_data(self, documents: Generator[Document, None, None]):
+        for document in documents:
+            try:
+                page_content = self.artifact.get_content_bytes(artifact_name=document.metadata['name'])
+                document.metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content
+                document.metadata[IndexerKeywords.CONTENT_FILE_NAME.value] = document.metadata['name']
+                yield document
+            except Exception as e:
+                logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
+                yield document
+    @extend_with_file_operations
     def get_available_tools(self):
+        """Get available tools. Returns all tools for schema; filtering happens at toolkit level."""
         bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
                                                         "If bucket is not specified by user directly, the name should be taken from chat history."
                                                         "If bucket never mentioned in chat, the name will be taken from tool configuration."
                                                         " ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
                                             default=None))
-        return [
+        # Basic artifact tools (always available)
+        basic_tools = [
             {
                 "ref": self.list_files,
                 "name": "listFiles",
@@ -115,7 +421,21 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
                 "args_schema": create_model(
                     "createFile",
                     filename=(str, Field(description="Filename")),
-                    filedata=(str, Field(description="Stringified content of the file")),
+                    filedata=(str, Field(description="""Stringified content of the file.
+                    Supports three input formats:
+                    1. CACHED IMAGE REFERENCE (for generated/cached images):
+                       Pass JSON with cached_image_id field: {"cached_image_id": "img_xxx"}
+                       The tool will automatically resolve and decode the image from cache.
+                       This is typically used when another tool returns an image reference.
+                    2. EXCEL FILES (.xlsx extension):
+                       Pass JSON with sheet structure: {"Sheet1": [["Name", "Age"], ["Alice", 25], ["Bob", 30]]}
+                    3. TEXT/OTHER FILES:
+                       Pass the plain text string directly.
+                    """)),
                     bucket_name=bucket_name
                 )
             },
@@ -176,11 +496,25 @@ class ArtifactWrapper(BaseVectorStoreToolApiWrapper):
                 "description": "Creates new bucket specified by user.",
                 "args_schema": create_model(
                     "createNewBucket",
-                    bucket_name=(str, Field(description="Bucket name to create. ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`.")),
+                    bucket_name=(str, Field(
+                        description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
+                        pattern=r'^[a-z][a-z0-9_-]*$'  # Allow underscores in input, will be sanitized
+                    )),
                     expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
                                                                          "Possible values: `days`, `weeks`, `months`, `years`.",
                                                              default="weeks")),
                     expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
                 )
             }
-        ]
+        ]
+        # Always include indexing tools in available tools list
+        # Filtering based on vector store config happens at toolkit level via decorator
+        try:
+            # Get indexing tools from parent class
+            indexing_tools = super(ArtifactWrapper, self).get_available_tools()
+            return indexing_tools + basic_tools
+        except Exception as e:
+            # If getting parent tools fails, log warning and return basic tools only
+            logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
+            return basic_tools

alita_sdk/runtime/tools/data_analysis.py ADDED Viewed

@@ -0,0 +1,183 @@
+"""
+Data Analysis internal tool for Alita SDK.
+This tool provides Pandas-based data analysis capabilities as an internal tool,
+accessible through the "Enable internal tools" dropdown menu.
+It uses the conversation attachment bucket for file storage, providing seamless
+integration with drag-and-drop file uploads in chat.
+"""
+import logging
+from typing import Any, List, Literal, Optional, Type
+from langchain_core.tools import BaseTool, BaseToolkit
+from pydantic import BaseModel, ConfigDict, create_model, Field
+logger = logging.getLogger(__name__)
+name = "data_analysis"
+def get_tools(tools_list: list, alita_client=None, llm=None, memory_store=None):
+    """
+    Get data analysis tools for the provided tool configurations.
+    Args:
+        tools_list: List of tool configurations
+        alita_client: Alita client instance (required for data analysis)
+        llm: LLM client instance (required for code generation)
+        memory_store: Optional memory store instance (unused)
+    Returns:
+        List of data analysis tools
+    """
+    all_tools = []
+    for tool in tools_list:
+        if (tool.get('type') == 'data_analysis' or
+                tool.get('toolkit_name') == 'data_analysis'):
+            try:
+                if not alita_client:
+                    logger.error("Alita client is required for data analysis tools")
+                    continue
+                settings = tool.get('settings', {})
+                bucket_name = settings.get('bucket_name')
+                if not bucket_name:
+                    logger.error("bucket_name is required for data analysis tools")
+                    continue
+                toolkit_instance = DataAnalysisToolkit.get_toolkit(
+                    alita_client=alita_client,
+                    llm=llm,
+                    bucket_name=bucket_name,
+                    toolkit_name=tool.get('toolkit_name', '')
+                )
+                all_tools.extend(toolkit_instance.get_tools())
+            except Exception as e:
+                logger.error(f"Error in data analysis toolkit get_tools: {e}")
+                logger.error(f"Tool config: {tool}")
+                raise
+    return all_tools
+class DataAnalysisToolkit(BaseToolkit):
+    """
+    Data Analysis toolkit providing Pandas-based data analysis capabilities.
+    This is an internal tool that uses the conversation attachment bucket
+    for file storage, enabling seamless integration with chat file uploads.
+    """
+    tools: List[BaseTool] = []
+    @staticmethod
+    def toolkit_config_schema() -> Type[BaseModel]:
+        """Get the configuration schema for the data analysis toolkit."""
+        # Import PandasWrapper to get available tools schema
+        from alita_sdk.tools.pandas.api_wrapper import PandasWrapper
+        selected_tools = {
+            x['name']: x['args_schema'].model_json_schema()
+            for x in PandasWrapper.model_construct().get_available_tools()
+        }
+        return create_model(
+            'data_analysis',
+            bucket_name=(
+                Optional[str],
+                Field(
+                    default=None,
+                    title="Bucket name",
+                    description="Bucket where files are stored (auto-injected from conversation)"
+                )
+            ),
+            selected_tools=(
+                List[Literal[tuple(selected_tools)]],
+                Field(
+                    default=[],
+                    json_schema_extra={'args_schemas': selected_tools}
+                )
+            ),
+            __config__=ConfigDict(json_schema_extra={
+                'metadata': {
+                    "label": "Data Analysis",
+                    "icon_url": "data-analysis.svg",
+                    "hidden": True,  # Hidden from regular toolkit menu
+                    "categories": ["internal_tool"],
+                    "extra_categories": ["data analysis", "pandas", "dataframes", "data science"],
+                }
+            })
+        )
+    @classmethod
+    def get_toolkit(
+        cls,
+        alita_client=None,
+        llm=None,
+        bucket_name: str = None,
+        toolkit_name: Optional[str] = None,
+        selected_tools: Optional[List[str]] = None,
+        **kwargs
+    ):
+        """
+        Get toolkit with data analysis tools.
+        Args:
+            alita_client: Alita client instance (required)
+            llm: LLM for code generation (optional, uses alita_client.llm if not provided)
+            bucket_name: Conversation attachment bucket (required)
+            toolkit_name: Optional name prefix for tools
+            selected_tools: Optional list of tool names to include (default: all)
+            **kwargs: Additional arguments
+        Returns:
+            DataAnalysisToolkit instance with configured tools
+        Raises:
+            ValueError: If alita_client or bucket_name is not provided
+        """
+        if not alita_client:
+            raise ValueError("Alita client is required for data analysis")
+        if not bucket_name:
+            raise ValueError("bucket_name is required for data analysis (should be conversation attachment bucket)")
+        # Import the PandasWrapper from existing toolkit
+        from alita_sdk.tools.pandas.api_wrapper import PandasWrapper
+        from alita_sdk.tools.base.tool import BaseAction
+        # Create wrapper with conversation bucket
+        wrapper = PandasWrapper(
+            alita=alita_client,
+            llm=llm,
+            bucket_name=bucket_name
+        )
+        # Get tools from wrapper
+        available_tools = wrapper.get_available_tools()
+        tools = []
+        for tool in available_tools:
+            # Filter by selected_tools if provided
+            if selected_tools and tool["name"] not in selected_tools:
+                continue
+            description = tool["description"]
+            if toolkit_name:
+                description = f"Toolkit: {toolkit_name}\n{description}"
+            description = description[:1000]
+            tools.append(BaseAction(
+                api_wrapper=wrapper,
+                name=tool["name"],
+                description=description,
+                args_schema=tool["args_schema"],
+                metadata={"toolkit_name": toolkit_name, "toolkit_type": name} if toolkit_name else {}
+            ))
+        return cls(tools=tools)
+    def get_tools(self):
+        return self.tools

alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.257py3-none-any.whl → 0.3.584py3-none-any.whl