PyPI - alita-sdk - Versions diffs - 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl - Mend

alita-sdk 0.3.351py3-none-any.whl → 0.3.499py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +155 -0
alita_sdk/cli/agent_loader.py +215 -0
alita_sdk/cli/agent_ui.py +228 -0
alita_sdk/cli/agents.py +3601 -0
alita_sdk/cli/callbacks.py +647 -0
alita_sdk/cli/cli.py +168 -0
alita_sdk/cli/config.py +306 -0
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1256 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +327 -0
alita_sdk/cli/toolkit_loader.py +85 -0
alita_sdk/cli/tools/__init__.py +43 -0
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +1751 -0
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +64 -8
alita_sdk/community/inventory/__init__.py +224 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/bitbucket.py +94 -2
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/artifact.py +1 -1
alita_sdk/runtime/clients/client.py +214 -42
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +373 -0
alita_sdk/runtime/langchain/assistant.py +118 -30
alita_sdk/runtime/langchain/constants.py +8 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
alita_sdk/runtime/langchain/langraph_agent.py +307 -71
alita_sdk/runtime/langchain/utils.py +48 -8
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +26 -0
alita_sdk/runtime/toolkits/application.py +9 -2
alita_sdk/runtime/toolkits/artifact.py +18 -6
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +780 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/tools.py +205 -55
alita_sdk/runtime/toolkits/vectorstore.py +9 -4
alita_sdk/runtime/tools/__init__.py +11 -3
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/artifact.py +225 -12
alita_sdk/runtime/tools/function.py +95 -5
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +212 -0
alita_sdk/runtime/tools/llm.py +494 -102
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -1
alita_sdk/runtime/tools/sandbox.py +180 -79
alita_sdk/runtime/tools/vectorstore.py +22 -21
alita_sdk/runtime/tools/vectorstore_base.py +125 -52
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_oauth.py +244 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/streamlit.py +40 -13
alita_sdk/runtime/utils/toolkit_utils.py +28 -9
alita_sdk/runtime/utils/utils.py +12 -0
alita_sdk/tools/__init__.py +77 -33
alita_sdk/tools/ado/repos/__init__.py +7 -6
alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
alita_sdk/tools/ado/test_plan/__init__.py +7 -7
alita_sdk/tools/ado/wiki/__init__.py +7 -11
alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
alita_sdk/tools/ado/work_item/__init__.py +7 -11
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
alita_sdk/tools/azure_ai/search/__init__.py +7 -6
alita_sdk/tools/base_indexer_toolkit.py +345 -70
alita_sdk/tools/bitbucket/__init__.py +9 -8
alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
alita_sdk/tools/browser/__init__.py +4 -4
alita_sdk/tools/carrier/__init__.py +4 -6
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +7 -6
alita_sdk/tools/cloud/azure/__init__.py +7 -6
alita_sdk/tools/cloud/gcp/__init__.py +7 -6
alita_sdk/tools/cloud/k8s/__init__.py +7 -6
alita_sdk/tools/code/linter/__init__.py +7 -7
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +8 -7
alita_sdk/tools/code_indexer_toolkit.py +199 -0
alita_sdk/tools/confluence/__init__.py +9 -8
alita_sdk/tools/confluence/api_wrapper.py +171 -75
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/custom_open_api/__init__.py +9 -4
alita_sdk/tools/elastic/__init__.py +8 -7
alita_sdk/tools/elitea_base.py +492 -52
alita_sdk/tools/figma/__init__.py +7 -7
alita_sdk/tools/figma/api_wrapper.py +2 -1
alita_sdk/tools/github/__init__.py +9 -9
alita_sdk/tools/github/api_wrapper.py +9 -26
alita_sdk/tools/github/github_client.py +62 -2
alita_sdk/tools/gitlab/__init__.py +8 -8
alita_sdk/tools/gitlab/api_wrapper.py +135 -33
alita_sdk/tools/gitlab_org/__init__.py +7 -8
alita_sdk/tools/google/bigquery/__init__.py +11 -12
alita_sdk/tools/google_places/__init__.py +8 -7
alita_sdk/tools/jira/__init__.py +9 -7
alita_sdk/tools/jira/api_wrapper.py +100 -52
alita_sdk/tools/keycloak/__init__.py +8 -7
alita_sdk/tools/localgit/local_git.py +56 -54
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
alita_sdk/tools/ocr/__init__.py +8 -7
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +8 -7
alita_sdk/tools/postman/__init__.py +7 -8
alita_sdk/tools/postman/api_wrapper.py +19 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +8 -9
alita_sdk/tools/qtest/__init__.py +16 -11
alita_sdk/tools/qtest/api_wrapper.py +1784 -88
alita_sdk/tools/rally/__init__.py +7 -8
alita_sdk/tools/report_portal/__init__.py +9 -7
alita_sdk/tools/salesforce/__init__.py +7 -7
alita_sdk/tools/servicenow/__init__.py +10 -10
alita_sdk/tools/sharepoint/__init__.py +7 -6
alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +7 -6
alita_sdk/tools/sql/__init__.py +8 -7
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +7 -6
alita_sdk/tools/testrail/__init__.py +8 -9
alita_sdk/tools/utils/__init__.py +26 -4
alita_sdk/tools/utils/content_parser.py +88 -60
alita_sdk/tools/utils/text_operations.py +254 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
alita_sdk/tools/xray/__init__.py +9 -7
alita_sdk/tools/zephyr/__init__.py +7 -6
alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
alita_sdk/tools/zephyr_essential/__init__.py +7 -6
alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
alita_sdk/tools/zephyr_scale/__init__.py +7 -6
alita_sdk/tools/zephyr_squad/__init__.py +7 -6
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0

alita_sdk/tools/confluence/api_wrapper.py CHANGED Viewed

@@ -7,12 +7,14 @@ from json import JSONDecodeError
 from typing import Optional, List, Any, Dict, Callable, Generator, Literal
 import requests
+from atlassian.errors import ApiError
 from langchain_community.document_loaders.confluence import ContentFormat
 from langchain_core.documents import Document
 from langchain_core.messages import HumanMessage
 from langchain_core.tools import ToolException
 from markdownify import markdownify
 from pydantic import Field, PrivateAttr, model_validator, create_model, SecretStr
+from requests import HTTPError
 from tenacity import retry, stop_after_attempt, wait_exponential, before_sleep_log
 from alita_sdk.tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
@@ -194,6 +196,7 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     keep_markdown_format: Optional[bool] = True
     ocr_languages: Optional[str] = None
     keep_newlines: Optional[bool] = True
+    _errors: Optional[list[str]] = None
     _image_cache: ImageDescriptionCache = PrivateAttr(default_factory=ImageDescriptionCache)
     @model_validator(mode='before')
@@ -477,28 +480,78 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         """Gets pages with specific label in the Confluence space."""
         start = 0
-        pages_info = []
-        for _ in range((self.max_pages + self.limit - 1) // self.limit):
-            pages = self.client.get_all_pages_by_label(label, start=start,
-                                                       limit=self.limit)  # , expand="body.view.value"
+        pages_info: List[Dict[str, Any]] = []
+        seen_ids: set[str] = set()
+        # Use a while-loop driven by unique pages collected and
+        # presence of additional results instead of a fixed number
+        # of iterations based purely on max_pages/limit.
+        while len(pages_info) < (self.max_pages or 0):
+            pages = self.client.get_all_pages_by_label(
+                label,
+                start=start,
+                limit=self.limit,
+            )  # , expand="body.view.value"
             if not pages:
                 break
-            pages_info += [{
-                'page_id': page.metadata['id'],
-                'page_title': page.metadata['title'],
-                'page_url': page.metadata['source'],
-                'content': page.page_content
-            } for page in self.get_pages_by_id([page["id"] for page in pages])]
+            # Collect only ids we haven't processed yet to avoid
+            # calling get_page_by_id multiple times for the same
+            # Confluence page.
+            new_ids: List[str] = []
+            for p in pages:
+                page_id = p["id"] if isinstance(p, dict) else getattr(p, "id", None)
+                if page_id is None:
+                    continue
+                if page_id in seen_ids:
+                    continue
+                seen_ids.add(page_id)
+                new_ids.append(page_id)
+            if new_ids:
+                for page in self.get_pages_by_id(new_ids):
+                    meta = getattr(page, "metadata", {}) or {}
+                    page_id = meta.get("id")
+                    page_title = meta.get("title")
+                    page_url = meta.get("source")
+                    content = getattr(page, "page_content", None)
+                    if page_id is None:
+                        continue
+                    pages_info.append(
+                        {
+                            "page_id": page_id,
+                            "page_title": page_title,
+                            "page_url": page_url,
+                            "content": content,
+                        }
+                    )
+                    # Respect max_pages on unique pages collected.
+                    if len(pages_info) >= (self.max_pages or 0):
+                        break
+            # Advance the offset by the requested page size.
             start += self.limit
-        return pages_info
+            # Defensive break: if the API returns fewer items than
+            # requested, there are likely no more pages to fetch.
+            if len(pages) < self.limit:
+                break
+        # Slice as an extra safety net in case of any race conditions
+        # around the max_pages guard in the loop above.
+        return pages_info[: (self.max_pages or len(pages_info))]
     def is_public_page(self, page: dict) -> bool:
         """Check if a page is publicly accessible."""
         restrictions = self.client.get_all_restrictions_for_content(page["id"])
         return (
-                page["status"] == "current"
+                (page["status"] == "current"
+                # allow user to see archived content if needed
+                 or page["status"] == "archived")
                 and not restrictions["read"]["restrictions"]["user"]["results"]
                 and not restrictions["read"]["restrictions"]["group"]["results"]
         )
@@ -518,18 +571,35 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
                 ),
                 before_sleep=before_sleep_log(logger, logging.WARNING),
             )(self.client.get_page_by_id)
-            page = get_page(
-                page_id=page_id, expand=f"{self.content_format.value},version"
-            )
-            if not self.include_restricted_content and not self.is_public_page(page):
-                continue
+            try:
+                page = get_page(
+                    page_id=page_id, expand=f"{self.content_format.value},version"
+                )
+            except (ApiError, HTTPError) as e:
+                logger.error(f"Error fetching page with ID {page_id}: {e}")
+                page_content_temp = f"Confluence API Error: cannot fetch the page with ID {page_id}: {e}"
+                # store errors
+                if self._errors is None:
+                    self._errors = []
+                self._errors.append(page_content_temp)
+                return Document(page_content=page_content_temp,
+                                metadata={})
+            # TODO: update on toolkit advanced settings level as a separate feature
+            # if not self.include_restricted_content and not self.is_public_page(page):
+            #     continue
             yield self.process_page(page, skip_images)
+    def _log_errors(self):
+        """ Log errors encountered during toolkit execution. """
+        if self._errors:
+            logger.info(f"Errors encountered during toolkit execution: {self._errors}")
     def read_page_by_id(self, page_id: str, skip_images: bool = False):
         """Reads a page by its id in the Confluence space. If id is not available, but there is a title - use get_page_id first."""
         result = list(self.get_pages_by_id([page_id], skip_images))
         if not result:
-            "Page not found"
+            return f"Pages not found. Errors: {self._errors}" if self._errors \
+                else "Pages not found or you do not have access to them."
         return result[0].page_content
         # return self._strip_base64_images(result[0].page_content) if skip_images else result[0].page_content
@@ -815,6 +885,10 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
         from .loader import AlitaConfluenceLoader
         from copy import copy
         content_format = kwargs.get('content_format', 'view').lower()
+        self._index_include_attachments = kwargs.get('include_attachments', False)
+        self._include_extensions = kwargs.get('include_extensions', [])
+        self._skip_extensions = kwargs.get('skip_extensions', [])
         base_params = {
             'url': self.base_url,
             'space_key': self.space,
@@ -847,65 +921,79 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
     def _process_document(self, document: Document) -> Generator[Document, None, None]:
         try:
-            page_id = document.metadata.get('id')
-            attachments = self.client.get_attachments_from_content(page_id)
-            if not attachments or not attachments.get('results'):
-                return f"No attachments found for page ID {page_id}."
-            # Get attachment history for created/updated info
-            history_map = {}
-            for attachment in attachments['results']:
-                try:
-                    hist = self.client.history(attachment['id'])
-                    history_map[attachment['id']] = hist
-                except Exception as e:
-                    logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
-                    history_map[attachment['id']] = None
-            import re
-            for attachment in attachments['results']:
-                title = attachment.get('title', '')
-                file_ext = title.lower().split('.')[-1] if '.' in title else ''
-                media_type = attachment.get('metadata', {}).get('mediaType', '')
-                # Core metadata extraction with history
-                hist = history_map.get(attachment['id']) or {}
-                created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
-                created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
-                last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
+            if self._index_include_attachments:
+                page_id = document.metadata.get('id')
+                attachments = self.client.get_attachments_from_content(page_id)
+                if not attachments or not attachments.get('results'):
+                    return f"No attachments found for page ID {page_id}."
+                # Get attachment history for created/updated info
+                history_map = {}
+                for attachment in attachments['results']:
+                    try:
+                        hist = self.client.history(attachment['id'])
+                        history_map[attachment['id']] = hist
+                    except Exception as e:
+                        logger.warning(f"Failed to fetch history for attachment {attachment.get('title', '')}: {str(e)}")
+                        history_map[attachment['id']] = None
+                import re
+                for attachment in attachments['results']:
+                    title = attachment.get('title', '')
+                    file_ext = title.lower().split('.')[-1] if '.' in title else ''
+                    # Re-verify extension filters
+                    # Check if file should be skipped based on skip_extensions
+                    if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
+                           for pattern in self._skip_extensions):
+                        continue
+                    # Check if file should be included based on include_extensions
+                    # If include_extensions is empty, process all files (that weren't skipped)
+                    if self._include_extensions and not (
+                    any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', title, re.IGNORECASE)
+                        for pattern in self._include_extensions)):
+                        continue
+                    media_type = attachment.get('metadata', {}).get('mediaType', '')
+                    # Core metadata extraction with history
+                    hist = history_map.get(attachment['id']) or {}
+                    created_by = hist.get('createdBy', {}).get('displayName', '') if hist else attachment.get('creator', {}).get('displayName', '')
+                    created_date = hist.get('createdDate', '') if hist else attachment.get('created', '')
+                    last_updated = hist.get('lastUpdated', {}).get('when', '') if hist else ''
+                    metadata = {
+                        'name': title,
+                        'size': attachment.get('extensions', {}).get('fileSize', None),
+                        'creator': created_by,
+                        'created': created_date,
+                        'updated': last_updated,
+                        'media_type': media_type,
+                        'labels': [label['name'] for label in
+                                   attachment.get('metadata', {}).get('labels', {}).get('results', [])],
+                        'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
+                            '_links', {}).get('download') else None
+                    }
-                metadata = {
-                    'name': title,
-                    'size': attachment.get('extensions', {}).get('fileSize', None),
-                    'creator': created_by,
-                    'created': created_date,
-                    'updated': last_updated,
-                    'media_type': media_type,
-                    'labels': [label['name'] for label in
-                               attachment.get('metadata', {}).get('labels', {}).get('results', [])],
-                    'download_url': self.base_url.rstrip('/') + attachment['_links']['download'] if attachment.get(
-                        '_links', {}).get('download') else None
-                }
+                    download_url = self.base_url.rstrip('/') + attachment['_links']['download']
-                download_url = self.base_url.rstrip('/') + attachment['_links']['download']
+                    try:
+                        resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
+                        if resp.status_code == 200:
+                            content = resp.content
+                        else:
+                            content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
+                    except Exception as e:
+                        content = f"[Error downloading content: {str(e)}]"
-                try:
-                    resp = self.client.request(method="GET", path=download_url[len(self.base_url):], advanced_mode=True)
-                    if resp.status_code == 200:
-                        content = resp.content
+                    if isinstance(content, str):
+                        yield Document(page_content=content, metadata=metadata)
                     else:
-                        content = f"[Failed to download {download_url}: HTTP status code {resp.status_code}]"
-                except Exception as e:
-                    content = f"[Error downloading content: {str(e)}]"
-                if isinstance(content, str):
-                    yield Document(page_content=content, metadata=metadata)
-                else:
-                    yield Document(page_content="", metadata={
-                        **metadata,
-                        IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
-                        IndexerKeywords.CONTENT_IN_BYTES.value: content
-                    })
+                        yield Document(page_content="", metadata={
+                            **metadata,
+                            IndexerKeywords.CONTENT_FILE_NAME.value: f".{file_ext}",
+                            IndexerKeywords.CONTENT_IN_BYTES.value: content
+                        })
         except Exception as e:
             yield from ()
@@ -1648,8 +1736,15 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
             "include_restricted_content": (Optional[bool], Field(description="Include restricted content.", default=False)),
             "include_archived_content": (Optional[bool], Field(description="Include archived content.", default=False)),
             "include_attachments": (Optional[bool], Field(description="Include attachments.", default=False)),
+            'include_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to include when processing attachments: i.e. ['*.png', '*.jpg']. "
+                            "If empty, all files will be processed (except skip_extensions).",
+                default=[])),
+            'skip_extensions': (Optional[List[str]], Field(
+                description="List of file extensions to skip when processing attachments: i.e. ['*.png', '*.jpg']",
+                default=[])),
             "include_comments": (Optional[bool], Field(description="Include comments.", default=False)),
-            "include_labels": (Optional[bool], Field(description="Include labels.", default=True)),
+            "include_labels": (Optional[bool], Field(description="Include labels.", default=False)),
             "ocr_languages": (Optional[str], Field(description="OCR languages for processing attachments.", default='eng')),
             "keep_markdown_format": (Optional[bool], Field(description="Keep the markdown format.", default=True)),
             "keep_newlines": (Optional[bool], Field(description="Keep newlines in the content.", default=True)),
@@ -1773,4 +1868,5 @@ class ConfluenceAPIWrapper(NonCodeIndexerToolkit):
                 "description": self.get_page_attachments.__doc__,
                 "args_schema": GetPageAttachmentsInput,
             }
-        ]
+        ]

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, List
 from logging import getLogger
 import requests
+from langchain_core.documents import Document
 logger = getLogger(__name__)
 from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
+    def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
+                     content_format: ContentFormat, ocr_languages: Optional[str] = None,
+                     keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
+        if not page.get("title"):
+            # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
+            page["title"] = "Untitled"
+        return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
+                                    ocr_languages, keep_markdown_format, keep_newlines)
     # TODO review usage
     # def process_svg(
     #         self,

alita_sdk/tools/custom_open_api/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import create_model, BaseModel, ConfigDict, Field
 from .api_wrapper import OpenApiWrapper
 from ..base.tool import BaseAction
-from ..utils import clean_string, TOOLKIT_SPLITTER
+from ..utils import clean_string
 name = "openapi"
@@ -43,14 +43,19 @@ class OpenApiToolkit(BaseToolkit):
         openapi_api_wrapper = OpenApiWrapper(**kwargs)
         available_tools = openapi_api_wrapper.get_available_tools()
         tools = []
-        prefix = clean_string(toolkit_name + TOOLKIT_SPLITTER) if toolkit_name else ''
+        # Use clean toolkit name for context (max 1000 chars in description)
+        toolkit_context = f" [Toolkit: {clean_string(toolkit_name)}]" if toolkit_name else ''
         for tool in available_tools:
             if selected_tools and tool["name"] not in selected_tools:
                 continue
+            # Add toolkit context to description with character limit
+            description = tool["description"]
+            if toolkit_context and len(description + toolkit_context) <= 1000:
+                description = description + toolkit_context
             tools.append(BaseAction(
                 api_wrapper=openapi_api_wrapper,
-                name=prefix + tool["name"],
-                description=tool["description"],
+                name=tool["name"],
+                description=description,
                 args_schema=tool["args_schema"]
             ))
         return cls(tools=tools)

alita_sdk/tools/elastic/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ from pydantic import BaseModel, ConfigDict, create_model, Field, SecretStr
 from .api_wrapper import ELITEAElasticApiWrapper
 from ..base.tool import BaseAction
-from ..utils import clean_string, TOOLKIT_SPLITTER, get_max_toolkit_length
+from ..utils import clean_string, get_max_toolkit_length
 name = "elastic"
@@ -19,15 +19,13 @@ def get_tools(tool):
 class ElasticToolkit(BaseToolkit):
     tools: list[BaseTool] = []
-    toolkit_max_length: int = 0
     @staticmethod
     def toolkit_config_schema() -> BaseModel:
         selected_tools = {x['name']: x['args_schema'].schema() for x in ELITEAElasticApiWrapper.model_construct().get_available_tools()}
-        ElasticToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
-            url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True, 'max_toolkit_length': ElasticToolkit.toolkit_max_length})),
+            url=(str, Field(default=None, title="Elasticsearch URL", description="Elasticsearch URL", json_schema_extra={'toolkit_name': True})),
             api_key=(
                 Optional[SecretStr],
                 Field(
@@ -48,14 +46,17 @@ class ElasticToolkit(BaseToolkit):
         elastic_api_wrapper = ELITEAElasticApiWrapper(**kwargs)
         available_tools = elastic_api_wrapper.get_available_tools()
         tools = []
-        prefix = clean_string(toolkit_name, ElasticToolkit.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
         for tool in available_tools:
             if selected_tools and tool["name"] not in selected_tools:
                 continue
+            description = tool["description"]
+            if toolkit_name:
+                description = f"Toolkit: {toolkit_name}\n{description}"
+            description = description[:1000]
             tools.append(BaseAction(
                 api_wrapper=elastic_api_wrapper,
-                name=prefix + tool["name"],
-                description=tool["description"],
+                name=tool["name"],
+                description=description,
                 args_schema=tool["args_schema"]
             ))
         return cls(tools=tools)

alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl

alita-sdk 0.3.351py3-none-any.whl → 0.3.499py3-none-any.whl