PyPI - alita-sdk - Versions diffs - 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl - Mend

alita-sdk 0.3.462py3-none-any.whl → 0.3.627py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (261) hide show

alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +15 -3
alita_sdk/cli/agent_loader.py +56 -8
alita_sdk/cli/agent_ui.py +93 -31
alita_sdk/cli/agents.py +2274 -230
alita_sdk/cli/callbacks.py +96 -25
alita_sdk/cli/cli.py +10 -1
alita_sdk/cli/config.py +162 -9
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1073 -0
alita_sdk/cli/testcases/__init__.py +94 -0
alita_sdk/cli/testcases/data_generation.py +119 -0
alita_sdk/cli/testcases/discovery.py +96 -0
alita_sdk/cli/testcases/executor.py +84 -0
alita_sdk/cli/testcases/logger.py +85 -0
alita_sdk/cli/testcases/parser.py +172 -0
alita_sdk/cli/testcases/prompts.py +91 -0
alita_sdk/cli/testcases/reporting.py +125 -0
alita_sdk/cli/testcases/setup.py +108 -0
alita_sdk/cli/testcases/test_runner.py +282 -0
alita_sdk/cli/testcases/utils.py +39 -0
alita_sdk/cli/testcases/validation.py +90 -0
alita_sdk/cli/testcases/workflow.py +196 -0
alita_sdk/cli/toolkit.py +14 -17
alita_sdk/cli/toolkit_loader.py +35 -5
alita_sdk/cli/tools/__init__.py +36 -2
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +910 -64
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +72 -12
alita_sdk/community/inventory/__init__.py +236 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/toolkit_utils.py +176 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/__init__.py +1 -1
alita_sdk/configurations/ado.py +141 -20
alita_sdk/configurations/bitbucket.py +0 -3
alita_sdk/configurations/confluence.py +76 -42
alita_sdk/configurations/figma.py +76 -0
alita_sdk/configurations/gitlab.py +17 -5
alita_sdk/configurations/openapi.py +329 -0
alita_sdk/configurations/qtest.py +72 -1
alita_sdk/configurations/report_portal.py +96 -0
alita_sdk/configurations/sharepoint.py +148 -0
alita_sdk/configurations/testio.py +83 -0
alita_sdk/runtime/clients/artifact.py +3 -3
alita_sdk/runtime/clients/client.py +353 -48
alita_sdk/runtime/clients/sandbox_client.py +0 -21
alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
alita_sdk/runtime/langchain/assistant.py +123 -26
alita_sdk/runtime/langchain/constants.py +642 -1
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +6 -3
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
alita_sdk/runtime/langchain/document_loaders/constants.py +12 -7
alita_sdk/runtime/langchain/langraph_agent.py +279 -73
alita_sdk/runtime/langchain/utils.py +82 -15
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/skills/__init__.py +91 -0
alita_sdk/runtime/skills/callbacks.py +498 -0
alita_sdk/runtime/skills/discovery.py +540 -0
alita_sdk/runtime/skills/executor.py +610 -0
alita_sdk/runtime/skills/input_builder.py +371 -0
alita_sdk/runtime/skills/models.py +330 -0
alita_sdk/runtime/skills/registry.py +355 -0
alita_sdk/runtime/skills/skill_runner.py +330 -0
alita_sdk/runtime/toolkits/__init__.py +7 -0
alita_sdk/runtime/toolkits/application.py +21 -9
alita_sdk/runtime/toolkits/artifact.py +15 -5
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +139 -251
alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/skill_router.py +238 -0
alita_sdk/runtime/toolkits/subgraph.py +251 -6
alita_sdk/runtime/toolkits/tools.py +238 -32
alita_sdk/runtime/toolkits/vectorstore.py +11 -5
alita_sdk/runtime/tools/__init__.py +3 -1
alita_sdk/runtime/tools/application.py +20 -6
alita_sdk/runtime/tools/artifact.py +511 -28
alita_sdk/runtime/tools/data_analysis.py +183 -0
alita_sdk/runtime/tools/function.py +43 -15
alita_sdk/runtime/tools/image_generation.py +50 -44
alita_sdk/runtime/tools/llm.py +852 -67
alita_sdk/runtime/tools/loop.py +3 -1
alita_sdk/runtime/tools/loop_output.py +3 -1
alita_sdk/runtime/tools/mcp_remote_tool.py +25 -10
alita_sdk/runtime/tools/mcp_server_tool.py +7 -6
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -4
alita_sdk/runtime/tools/sandbox.py +9 -6
alita_sdk/runtime/tools/skill_router.py +776 -0
alita_sdk/runtime/tools/tool.py +3 -1
alita_sdk/runtime/tools/vectorstore.py +7 -2
alita_sdk/runtime/tools/vectorstore_base.py +51 -11
alita_sdk/runtime/utils/AlitaCallback.py +137 -21
alita_sdk/runtime/utils/constants.py +5 -1
alita_sdk/runtime/utils/mcp_client.py +492 -0
alita_sdk/runtime/utils/mcp_oauth.py +202 -5
alita_sdk/runtime/utils/mcp_sse_client.py +36 -7
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/serialization.py +155 -0
alita_sdk/runtime/utils/streamlit.py +6 -10
alita_sdk/runtime/utils/toolkit_utils.py +16 -5
alita_sdk/runtime/utils/utils.py +36 -0
alita_sdk/tools/__init__.py +113 -29
alita_sdk/tools/ado/repos/__init__.py +51 -33
alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
alita_sdk/tools/ado/test_plan/__init__.py +25 -9
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
alita_sdk/tools/ado/utils.py +1 -18
alita_sdk/tools/ado/wiki/__init__.py +25 -8
alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
alita_sdk/tools/ado/work_item/__init__.py +26 -9
alita_sdk/tools/ado/work_item/ado_wrapper.py +56 -3
alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
alita_sdk/tools/aws/delta_lake/tool.py +5 -1
alita_sdk/tools/azure_ai/search/__init__.py +11 -8
alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
alita_sdk/tools/base/tool.py +5 -1
alita_sdk/tools/base_indexer_toolkit.py +170 -45
alita_sdk/tools/bitbucket/__init__.py +17 -12
alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
alita_sdk/tools/browser/__init__.py +5 -4
alita_sdk/tools/carrier/__init__.py +5 -6
alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +10 -7
alita_sdk/tools/cloud/azure/__init__.py +10 -7
alita_sdk/tools/cloud/gcp/__init__.py +10 -7
alita_sdk/tools/cloud/k8s/__init__.py +10 -7
alita_sdk/tools/code/linter/__init__.py +10 -8
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +10 -7
alita_sdk/tools/code_indexer_toolkit.py +73 -23
alita_sdk/tools/confluence/__init__.py +21 -15
alita_sdk/tools/confluence/api_wrapper.py +78 -23
alita_sdk/tools/confluence/loader.py +4 -2
alita_sdk/tools/custom_open_api/__init__.py +12 -5
alita_sdk/tools/elastic/__init__.py +11 -8
alita_sdk/tools/elitea_base.py +493 -30
alita_sdk/tools/figma/__init__.py +58 -11
alita_sdk/tools/figma/api_wrapper.py +1235 -143
alita_sdk/tools/figma/figma_client.py +73 -0
alita_sdk/tools/figma/toon_tools.py +2748 -0
alita_sdk/tools/github/__init__.py +13 -14
alita_sdk/tools/github/github_client.py +224 -100
alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
alita_sdk/tools/github/schemas.py +14 -5
alita_sdk/tools/github/tool.py +5 -1
alita_sdk/tools/github/tool_prompts.py +9 -22
alita_sdk/tools/gitlab/__init__.py +15 -11
alita_sdk/tools/gitlab/api_wrapper.py +207 -41
alita_sdk/tools/gitlab_org/__init__.py +10 -8
alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
alita_sdk/tools/google/bigquery/__init__.py +13 -12
alita_sdk/tools/google/bigquery/tool.py +5 -1
alita_sdk/tools/google_places/__init__.py +10 -8
alita_sdk/tools/google_places/api_wrapper.py +1 -1
alita_sdk/tools/jira/__init__.py +17 -11
alita_sdk/tools/jira/api_wrapper.py +91 -40
alita_sdk/tools/keycloak/__init__.py +11 -8
alita_sdk/tools/localgit/__init__.py +9 -3
alita_sdk/tools/localgit/local_git.py +62 -54
alita_sdk/tools/localgit/tool.py +5 -1
alita_sdk/tools/memory/__init__.py +11 -3
alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
alita_sdk/tools/ocr/__init__.py +11 -8
alita_sdk/tools/openapi/__init__.py +490 -114
alita_sdk/tools/openapi/api_wrapper.py +1368 -0
alita_sdk/tools/openapi/tool.py +20 -0
alita_sdk/tools/pandas/__init__.py +20 -12
alita_sdk/tools/pandas/api_wrapper.py +38 -25
alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
alita_sdk/tools/postman/__init__.py +11 -11
alita_sdk/tools/pptx/__init__.py +10 -9
alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
alita_sdk/tools/qtest/__init__.py +30 -10
alita_sdk/tools/qtest/api_wrapper.py +430 -13
alita_sdk/tools/rally/__init__.py +10 -8
alita_sdk/tools/rally/api_wrapper.py +1 -1
alita_sdk/tools/report_portal/__init__.py +12 -9
alita_sdk/tools/salesforce/__init__.py +10 -9
alita_sdk/tools/servicenow/__init__.py +17 -14
alita_sdk/tools/servicenow/api_wrapper.py +1 -1
alita_sdk/tools/sharepoint/__init__.py +10 -8
alita_sdk/tools/sharepoint/api_wrapper.py +4 -4
alita_sdk/tools/slack/__init__.py +10 -8
alita_sdk/tools/slack/api_wrapper.py +2 -2
alita_sdk/tools/sql/__init__.py +11 -9
alita_sdk/tools/testio/__init__.py +10 -8
alita_sdk/tools/testrail/__init__.py +11 -8
alita_sdk/tools/testrail/api_wrapper.py +1 -1
alita_sdk/tools/utils/__init__.py +9 -4
alita_sdk/tools/utils/content_parser.py +77 -3
alita_sdk/tools/utils/text_operations.py +410 -0
alita_sdk/tools/utils/tool_prompts.py +79 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
alita_sdk/tools/xray/__init__.py +12 -9
alita_sdk/tools/yagmail/__init__.py +9 -3
alita_sdk/tools/zephyr/__init__.py +9 -7
alita_sdk/tools/zephyr_enterprise/__init__.py +11 -8
alita_sdk/tools/zephyr_essential/__init__.py +10 -8
alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
alita_sdk/tools/zephyr_essential/client.py +2 -2
alita_sdk/tools/zephyr_scale/__init__.py +11 -9
alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
alita_sdk/tools/zephyr_squad/__init__.py +10 -8
{alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +147 -7
alita_sdk-0.3.627.dist-info/RECORD +468 -0
alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
alita_sdk-0.3.462.dist-info/RECORD +0 -384
alita_sdk-0.3.462.dist-info/entry_points.txt +0 -2
{alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.462.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py CHANGED Viewed

@@ -21,14 +21,16 @@ from openpyxl import load_workbook
 from xlrd import open_workbook
 from langchain_core.documents import Document
 from .AlitaTableLoader import AlitaTableLoader
+from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
 cell_delimiter = " | "
 class AlitaExcelLoader(AlitaTableLoader):
-    excel_by_sheets: bool = False
     sheet_name: str = None
-    return_type: str = 'str'
     file_name: str = None
+    max_tokens: int = LOADER_MAX_TOKENS_DEFAULT
+    add_header_to_chunks: bool = False
+    header_row_number: int = 1
     def __init__(self, **kwargs):
         if not kwargs.get('file_path'):
@@ -39,9 +41,22 @@ class AlitaExcelLoader(AlitaTableLoader):
         else:
             self.file_name = kwargs.get('file_path')
         super().__init__(**kwargs)
-        self.excel_by_sheets = kwargs.get('excel_by_sheets')
-        self.return_type = kwargs.get('return_type')
         self.sheet_name = kwargs.get('sheet_name')
+        # Set and validate chunking parameters only once
+        self.max_tokens = int(kwargs.get('max_tokens', LOADER_MAX_TOKENS_DEFAULT))
+        self.add_header_to_chunks = bool(kwargs.get('add_header_to_chunks', False))
+        header_row_number = kwargs.get('header_row_number', 1)
+        # Validate header_row_number
+        try:
+            header_row_number = int(header_row_number)
+            if header_row_number > 0:
+                self.header_row_number = header_row_number
+            else:
+                self.header_row_number = 1
+                self.add_header_to_chunks = False
+        except (ValueError, TypeError):
+            self.header_row_number = 1
+            self.add_header_to_chunks = False
     def get_content(self):
         try:
@@ -64,59 +79,32 @@ class AlitaExcelLoader(AlitaTableLoader):
         Reads .xlsx files using openpyxl.
         """
         workbook = load_workbook(self.file_path, data_only=True)  # `data_only=True` ensures we get cell values, not formulas
+        sheets = workbook.sheetnames
         if self.sheet_name:
-            # If a specific sheet name is provided, parse only that sheet
-            if self.sheet_name in workbook.sheetnames:
+            if self.sheet_name in sheets:
                 sheet_content = self.parse_sheet(workbook[self.sheet_name])
-                return sheet_content
             else:
-                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
-        elif self.excel_by_sheets:
-            # Parse each sheet individually and return as a dictionary
-            result = {}
-            for sheet_name in workbook.sheetnames:
-                sheet_content = self.parse_sheet(workbook[sheet_name])
-                result[sheet_name] = sheet_content
-            return result
+                sheet_content = [f"Sheet '{self.sheet_name}' does not exist in the workbook."]
+            return {self.sheet_name: sheet_content}
         else:
-            # Combine all sheets into a single string result
-            result = []
-            for sheet_name in workbook.sheetnames:
-                sheet_content = self.parse_sheet(workbook[sheet_name])
-                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
-            return "\n\n".join(result)
+            # Dictionary comprehension for all sheets
+            return {name: self.parse_sheet(workbook[name]) for name in sheets}
     def _read_xls(self):
         """
         Reads .xls files using xlrd.
         """
         workbook = open_workbook(filename=self.file_name, file_contents=self.file_content)
+        sheets = workbook.sheet_names()
         if self.sheet_name:
-            # If a specific sheet name is provided, parse only that sheet
-            if self.sheet_name in workbook.sheet_names():
+            if self.sheet_name in sheets:
                 sheet = workbook.sheet_by_name(self.sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                return sheet_content
+                return {self.sheet_name: self.parse_sheet_xls(sheet)}
             else:
-                raise ValueError(f"Sheet '{self.sheet_name}' does not exist in the workbook.")
-        elif self.excel_by_sheets:
-            # Parse each sheet individually and return as a dictionary
-            result = {}
-            for sheet_name in workbook.sheet_names():
-                sheet = workbook.sheet_by_name(sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                result[sheet_name] = sheet_content
-            return result
+                return {self.sheet_name: [f"Sheet '{self.sheet_name}' does not exist in the workbook."]}
         else:
-            # Combine all sheets into a single string result
-            result = []
-            for sheet_name in workbook.sheet_names():
-                sheet = workbook.sheet_by_name(sheet_name)
-                sheet_content = self.parse_sheet_xls(sheet)
-                result.append(f"====== Sheet name: {sheet_name} ======\n{sheet_content}")
-            return "\n\n".join(result)
+            # Dictionary comprehension for all sheets
+            return {name: self.parse_sheet_xls(workbook.sheet_by_name(name)) for name in sheets}
     def parse_sheet(self, sheet):
         """
@@ -170,34 +158,89 @@ class AlitaExcelLoader(AlitaTableLoader):
         # Format the sheet content based on the return type
         return self._format_sheet_content(sheet_content)
-    def _format_sheet_content(self, sheet_content):
+    def _format_sheet_content(self, rows):
         """
-        Formats the sheet content based on the return type.
+        Specification:
+        Formats a list of sheet rows into a list of string chunks according to the following rules:
+        1. If max_tokens < 1, returns a single chunk (list of one string) with all rows joined by a newline ('\n').
+           - If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended as the first line.
+        2. If max_tokens >= 1:
+           a. Each chunk is a string containing one or more rows, separated by newlines ('\n'), such that the total token count (as measured by tiktoken) does not exceed max_tokens.
+           b. If add_header_to_chunks is True and header_row_number is valid, the specified header row is prepended once at the top of each chunk (not before every row).
+           c. If a single row exceeds max_tokens, it is placed in its own chunk without splitting, with the header prepended if applicable.
+        3. Returns: List[str], where each string is a chunk ready for further processing.
         """
-        if self.return_type == 'dict':
-            # Convert to a list of dictionaries (each row is a dictionary)
-            headers = sheet_content[0].split(cell_delimiter) if sheet_content else []
-            data_rows = sheet_content[1:] if len(sheet_content) > 1 else []
-            return [dict(zip(headers, row.split(cell_delimiter))) for row in data_rows]
-        elif self.return_type == 'csv':
-            # Return as CSV (newline-separated rows, comma-separated values)
-            return "\n".join([",".join(row.split(cell_delimiter)) for row in sheet_content])
-        else:
-            # Default: Return as plain text (newline-separated rows, pipe-separated values)
-            return "\n".join(sheet_content)
+        import tiktoken
+        encoding = tiktoken.get_encoding('cl100k_base')
+        # --- Inner functions ---
+        def count_tokens(text):
+            """Count tokens in text using tiktoken encoding."""
+            return len(encoding.encode(text))
+        def finalize_chunk(chunk_rows):
+            """Join rows for a chunk, prepending header if needed."""
+            if self.add_header_to_chunks and header:
+                return '\n'.join([header] + chunk_rows)
+            else:
+                return '\n'.join(chunk_rows)
+        # --- End inner functions ---
+        # If max_tokens < 1, return all rows as a single chunk
+        if self.max_tokens < 1:
+            return ['\n'.join(rows)]
+        # Extract header if needed
+        header = None
+        if self.add_header_to_chunks and rows:
+            header_idx = self.header_row_number - 1
+            header = rows.pop(header_idx)
+        chunks = []  # List to store final chunks
+        current_chunk = []  # Accumulate rows for the current chunk
+        current_tokens = 0  # Token count for the current chunk
+        for row in rows:
+            row_tokens = count_tokens(row)
+            # If row itself exceeds max_tokens, flush current chunk and add row as its own chunk (with header if needed)
+            if row_tokens > self.max_tokens:
+                if current_chunk:
+                    chunks.append(finalize_chunk(current_chunk))
+                    current_chunk = []
+                    current_tokens = 0
+                # Add the large row as its own chunk, with header if needed
+                if self.add_header_to_chunks and header:
+                    chunks.append(finalize_chunk([row]))
+                else:
+                    chunks.append(row)
+                continue
+            # If adding row would exceed max_tokens, flush current chunk and start new
+            if current_tokens + row_tokens > self.max_tokens:
+                if current_chunk:
+                    chunks.append(finalize_chunk(current_chunk))
+                current_chunk = [row]
+                current_tokens = row_tokens
+            else:
+                current_chunk.append(row)
+                current_tokens += row_tokens
+        # Add any remaining rows as the last chunk
+        if current_chunk:
+            chunks.append(finalize_chunk(current_chunk))
+        return chunks
     def load(self) -> list:
         docs = []
         content_per_sheet = self.get_content()
-        for sheet_name, content in content_per_sheet.items():
+        # content_per_sheet is a dict of sheet_name: list of chunk strings
+        for sheet_name, content_chunks in content_per_sheet.items():
             metadata = {
                 "source": f'{self.file_path}:{sheet_name}',
                 "sheet_name": sheet_name,
                 "file_type": "excel",
-                "excel_by_sheets": self.excel_by_sheets,
-                "return_type": self.return_type,
             }
-            docs.append(Document(page_content=f"Sheet: {sheet_name}\n {str(content)}", metadata=metadata))
+            # Each chunk is a separate Document
+            for chunk in content_chunks:
+                docs.append(Document(page_content=chunk, metadata=metadata))
         return docs
     def read(self, lazy: bool = False):

alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py ADDED Viewed

@@ -0,0 +1,77 @@
+from .AlitaJSONLoader import AlitaJSONLoader
+import json
+from io import StringIO
+from typing import List, Iterator
+from langchain_core.documents import Document
+from langchain_core.tools import ToolException
+class AlitaJSONLinesLoader(AlitaJSONLoader):
+    """Load local JSONL files (one JSON object per line) using AlitaJSONLoader behavior.
+    Behavior:
+    - Supports both `file_path` and `file_content` (bytes or file-like object), same as AlitaJSONLoader.
+    - Treats each non-empty line as an independent JSON object.
+    - Aggregates all parsed JSON objects into a list and feeds them through the same
+      RecursiveJsonSplitter-based chunking used by AlitaJSONLoader.lazy_load.
+    - Returns a list of Documents with chunked JSON content.
+    """
+    def __init__(self, **kwargs):
+        # Reuse AlitaJSONLoader initialization logic (file_path / file_content handling, encoding, etc.)
+        super().__init__(**kwargs)
+    def _iter_lines(self) -> Iterator[str]:
+        """Yield lines from file_path or file_content, mirroring AlitaJSONLoader sources."""
+        # Prefer file_path if available
+        if hasattr(self, "file_path") and self.file_path:
+            with open(self.file_path, "r", encoding=self.encoding) as f:
+                for line in f:
+                    yield line
+        # Fallback to file_content if available
+        elif hasattr(self, "file_content") and self.file_content:
+            # file_content may be bytes or a file-like object
+            if isinstance(self.file_content, (bytes, bytearray)):
+                text = self.file_content.decode(self.encoding)
+                for line in StringIO(text):
+                    yield line
+            else:
+                # Assume it's a text file-like object positioned at the beginning
+                self.file_content.seek(0)
+                for line in self.file_content:
+                    yield line
+        else:
+            raise ToolException("'file_path' or 'file_content' parameter should be provided.")
+    def load(self) -> List[Document]:  # type: ignore[override]
+        """Load JSONL content by delegating each non-empty line to AlitaJSONLoader.
+        For each non-empty line in the underlying source (file_path or file_content):
+        - Create a temporary AlitaJSONLoader instance with that line as file_content.
+        - Call lazy_load() on that instance to apply the same RecursiveJsonSplitter logic
+          as for a normal JSON file.
+        - Accumulate all Documents from all lines and return them as a single list.
+        """
+        docs: List[Document] = []
+        for raw_line in self._iter_lines():
+            line = raw_line.strip()
+            if not line:
+                continue
+            try:
+                # Instantiate a per-line AlitaJSONLoader using the same configuration
+                line_loader = AlitaJSONLoader(
+                    file_content=line,
+                    file_name=getattr(self, "file_name", str(getattr(self, "file_path", "no_name"))),
+                    encoding=self.encoding,
+                    autodetect_encoding=self.autodetect_encoding,
+                    max_tokens=self.max_tokens,
+                )
+                for doc in line_loader.lazy_load():
+                    docs.append(doc)
+            except Exception as e:
+                raise ToolException(f"Error processing JSONL line: {line[:100]}... Error: {e}") from e
+        return docs

alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py CHANGED Viewed

@@ -32,6 +32,8 @@ class AlitaJSONLoader(BaseLoader):
             elif hasattr(self, 'file_content') and self.file_content:
                 if isinstance(self.file_content, bytes):
                     return json.loads(self.file_content.decode(self.encoding))
+                elif isinstance(self.file_content, str):
+                    return json.loads(self.file_content)
                 else:
                     return json.load(self.file_content)
             else:
@@ -45,7 +47,6 @@ class AlitaJSONLoader(BaseLoader):
                         try:
                             with open(self.file_path, encoding=encoding.encoding) as f:
                                 return f.read()
-                            break
                         except UnicodeDecodeError:
                             continue
                 elif hasattr(self, 'file_content') and self.file_content:
@@ -58,9 +59,11 @@ class AlitaJSONLoader(BaseLoader):
                 else:
                     raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
             else:
-                raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
+                raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
         except Exception as e:
-            raise RuntimeError(f"Error loading content.") from e
+            # Preserve original error details so callers (e.g., parse_file_content)
+            # can expose the real root cause instead of a generic message.
+            raise RuntimeError(f"Error loading content: {e}") from e
     def lazy_load(self) -> Iterator[Document]:
         """Load from file path."""

alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .utils import perform_llm_prediction_for_image_bytes, create_temp_file
 from pptx.enum.shapes import MSO_SHAPE_TYPE
 from langchain_core.documents import Document
 class AlitaPowerPointLoader:
     def __init__(self, file_path=None, file_content=None, mode=None, **unstructured_kwargs):
@@ -43,10 +44,203 @@ class AlitaPowerPointLoader:
         else:
             raise ToolException(f"Unknown mode value: {self.mode}. Only 'single', 'paged' values allowed.")
+    def _extract_table_as_markdown(self, table) -> str:
+        """Convert PPTX table to markdown format."""
+        if not table.rows:
+            return ""
+        rows = []
+        for row in table.rows:
+            cells = []
+            for cell in row.cells:
+                cell_text = cell.text.strip().replace("|", "\\|").replace("\n", " ")
+                cells.append(cell_text)
+            rows.append("| " + " | ".join(cells) + " |")
+        if len(rows) > 0:
+            # Add header separator after first row
+            num_cols = len(table.rows[0].cells)
+            header_sep = "| " + " | ".join(["---"] * num_cols) + " |"
+            rows.insert(1, header_sep)
+        return "\n**Table:**\n" + "\n".join(rows) + "\n"
+    def _extract_chart_info(self, chart) -> str:
+        """Extract data and labels from PPTX chart."""
+        result = []
+        # Extract chart title
+        try:
+            if chart.has_title and chart.chart_title.has_text_frame:
+                title_text = chart.chart_title.text_frame.text.strip()
+                if title_text:
+                    result.append(f"Chart Title: {title_text}")
+        except Exception:
+            pass
+        # Try to extract series data directly from chart.series (works for some chart types)
+        try:
+            if hasattr(chart, 'series') and chart.series:
+                for series in chart.series:
+                    series_name = series.name if series.name else "Unnamed Series"
+                    values = []
+                    categories = []
+                    # Try to get values
+                    try:
+                        if hasattr(series, 'values') and series.values:
+                            values = list(series.values)
+                    except Exception:
+                        pass
+                    # Try to get categories from series
+                    try:
+                        if hasattr(series, 'categories') and series.categories:
+                            categories = list(series.categories)
+                    except Exception:
+                        pass
+                    # Build output
+                    if categories and values and len(categories) == len(values):
+                        data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
+                        result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
+                    elif values:
+                        result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
+                    elif categories:
+                        result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
+        except Exception:
+            pass
+        # Fallback: try plots API for bar/line charts
+        if not result or (len(result) == 1 and "Chart Title" in result[0]):
+            try:
+                if hasattr(chart, 'plots') and chart.plots and len(chart.plots) > 0:
+                    plot = chart.plots[0]
+                    categories = []
+                    if hasattr(plot, 'categories') and plot.categories:
+                        categories = list(plot.categories)
+                        if categories:
+                            result.append(f"Categories: {', '.join(str(c) for c in categories)}")
+                    # Extract series data from plot
+                    for series in plot.series:
+                        series_name = series.name if series.name else "Unnamed Series"
+                        values = list(series.values) if series.values else []
+                        if categories and len(categories) == len(values):
+                            data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
+                            result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
+                        elif values:
+                            result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
+            except Exception:
+                pass
+        # Final fallback: parse XML directly for unsupported chart types (e.g., pie3DChart)
+        if not result or (len(result) == 1 and "Chart Title" in result[0]):
+            try:
+                result.extend(self._extract_chart_from_xml(chart))
+            except Exception:
+                pass
+        # If we still have no data, add a note
+        if not result:
+            result.append("(Chart detected - there is no parsed data from this type of chart)")
+        return "\n**Chart:**\n" + "\n".join(result) + "\n"
+    def _extract_chart_from_xml(self, chart) -> list:
+        """Extract chart data by parsing the underlying XML directly."""
+        result = []
+        # Get the chart part XML
+        chart_part = chart.part
+        chart_element = chart_part.element
+        # Define namespaces used in chart XML
+        namespaces = {
+            'c': 'http://schemas.openxmlformats.org/drawingml/2006/chart',
+            'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
+        }
+        # Find all series (ser) elements
+        series_elements = chart_element.findall('.//c:ser', namespaces)
+        for ser in series_elements:
+            series_name = "Unnamed Series"
+            categories = []
+            values = []
+            # Extract series name from tx/v or tx/strRef
+            tx = ser.find('.//c:tx', namespaces)
+            if tx is not None:
+                v = tx.find('.//c:v', namespaces)
+                if v is not None and v.text:
+                    series_name = v.text
+            # Extract category labels from c:cat
+            cat = ser.find('.//c:cat', namespaces)
+            if cat is not None:
+                # Try strRef first (string references)
+                str_cache = cat.find('.//c:strCache', namespaces)
+                if str_cache is not None:
+                    for pt in str_cache.findall('.//c:pt', namespaces):
+                        v = pt.find('c:v', namespaces)
+                        if v is not None and v.text:
+                            categories.append(v.text)
+                # Try numRef (numeric references used as categories)
+                if not categories:
+                    num_cache = cat.find('.//c:numCache', namespaces)
+                    if num_cache is not None:
+                        for pt in num_cache.findall('.//c:pt', namespaces):
+                            v = pt.find('c:v', namespaces)
+                            if v is not None and v.text:
+                                categories.append(v.text)
+            # Extract values from c:val
+            val = ser.find('.//c:val', namespaces)
+            if val is not None:
+                num_cache = val.find('.//c:numCache', namespaces)
+                if num_cache is not None:
+                    for pt in num_cache.findall('.//c:pt', namespaces):
+                        v = pt.find('c:v', namespaces)
+                        if v is not None and v.text:
+                            try:
+                                values.append(float(v.text))
+                            except ValueError:
+                                values.append(v.text)
+            # Build output
+            if categories and values and len(categories) == len(values):
+                data_pairs = [f"{cat}: {val}" for cat, val in zip(categories, values)]
+                result.append(f"Series '{series_name}': {', '.join(data_pairs)}")
+            elif values:
+                result.append(f"Series '{series_name}': {', '.join(str(v) for v in values)}")
+            elif categories:
+                result.append(f"Series '{series_name}' categories: {', '.join(str(c) for c in categories)}")
+        return result
     def read_pptx_slide(self, slide, index):
         text_content = f'Slide: {index}\n'
         for shape in slide.shapes:
-            if hasattr(shape, "text_frame") and shape.text_frame is not None:
+            # Handle tables
+            if shape.has_table:
+                text_content += self._extract_table_as_markdown(shape.table)
+            # Handle charts
+            elif shape.has_chart:
+                text_content += self._extract_chart_info(shape.chart)
+            # Handle images - check multiple ways images can be embedded
+            elif self.extract_images and self._is_image_shape(shape):
+                try:
+                    image_blob = self._get_image_blob(shape)
+                    if image_blob:
+                        caption = perform_llm_prediction_for_image_bytes(image_blob, self.llm, self.prompt)
+                        text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
+                except Exception:
+                    pass
+            # Handle text frames with hyperlinks
+            elif hasattr(shape, "text_frame") and shape.text_frame is not None:
                 for paragraph in shape.text_frame.paragraphs:
                     for run in paragraph.runs:
                         if run.hyperlink and run.hyperlink.address:
@@ -56,14 +250,39 @@ class AlitaPowerPointLoader:
                         else:
                             text_content += run.text
                 text_content += "\n"
-            elif self.extract_images and shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
-                try:
-                    caption = perform_llm_prediction_for_image_bytes(shape.image.blob, self.llm, self.prompt)
-                except:
-                    caption = "unknown"
-                text_content += "\n**Image Transcript:**\n" + caption + "\n--------------------\n"
         return text_content + "\n"
+    def _is_image_shape(self, shape) -> bool:
+        """Check if shape contains an image using multiple detection methods."""
+        # Method 1: Check shape type
+        if shape.shape_type == MSO_SHAPE_TYPE.PICTURE:
+            return True
+        # Method 2: Check if shape has image attribute with blob
+        if hasattr(shape, 'image') and shape.image is not None:
+            try:
+                if shape.image.blob:
+                    return True
+            except Exception:
+                pass
+        # Method 3: Check for placeholder with image
+        if hasattr(shape, 'placeholder_format') and shape.placeholder_format is not None:
+            try:
+                if hasattr(shape, 'image') and shape.image is not None:
+                    return True
+            except Exception:
+                pass
+        return False
+    def _get_image_blob(self, shape) -> bytes:
+        """Extract image blob from shape using available methods."""
+        # Try direct image access
+        if hasattr(shape, 'image') and shape.image is not None:
+            try:
+                return shape.image.blob
+            except Exception:
+                pass
+        return None
     def load(self):
         content = self.get_content()
         if isinstance(content, str):

alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py CHANGED Viewed

@@ -58,9 +58,12 @@ class AlitaTextLoader(BaseLoader):
                 else:
                     raise ValueError("Neither file_path nor file_content is provided for encoding detection.")
             else:
-                raise RuntimeError(f"Error loading content with encoding {self.encoding}.") from e
+                # Preserve original error details for callers
+                raise RuntimeError(f"Error loading content with encoding {self.encoding}: {e}") from e
         except Exception as e:
-            raise RuntimeError(f"Error loading content.") from e
+            # Preserve original error details so higher-level code (e.g., parse_file_content)
+            # can expose the real root cause instead of a generic message.
+            raise RuntimeError(f"Error loading content: {e}") from e
         return text

alita_sdk/runtime/langchain/document_loaders/constants.py CHANGED Viewed

@@ -21,12 +21,14 @@ from .AlitaDocxMammothLoader import AlitaDocxMammothLoader
 from .AlitaExcelLoader import AlitaExcelLoader
 from .AlitaImageLoader import AlitaImageLoader
 from .AlitaJSONLoader import AlitaJSONLoader
+from .AlitaJSONLinesLoader import AlitaJSONLinesLoader
 from .AlitaPDFLoader import AlitaPDFLoader
 from .AlitaPowerPointLoader import AlitaPowerPointLoader
 from .AlitaTextLoader import AlitaTextLoader
 from .AlitaMarkdownLoader import AlitaMarkdownLoader
 from .AlitaPythonLoader import AlitaPythonLoader
 from enum import Enum
+from alita_sdk.runtime.langchain.constants import LOADER_MAX_TOKENS_DEFAULT
 class LoaderProperties(Enum):
@@ -34,7 +36,7 @@ class LoaderProperties(Enum):
     PROMPT_DEFAULT = 'use_default_prompt'
     PROMPT = 'prompt'
-DEFAULT_ALLOWED_BASE = {'max_tokens': 512}
+DEFAULT_ALLOWED_BASE = {'max_tokens': LOADER_MAX_TOKENS_DEFAULT}
 DEFAULT_ALLOWED_WITH_LLM = {
     **DEFAULT_ALLOWED_BASE,
@@ -43,6 +45,8 @@ DEFAULT_ALLOWED_WITH_LLM = {
     LoaderProperties.PROMPT.value: "",
 }
+DEFAULT_ALLOWED_EXCEL = {**DEFAULT_ALLOWED_WITH_LLM, 'add_header_to_chunks': False, 'header_row_number': 1, 'max_tokens': -1, 'sheet_name': ''}
 # Image file loaders mapping - directly supported by LLM with image_url
 image_loaders_map = {
     '.png': {
@@ -162,11 +166,12 @@ document_loaders_map = {
                       'spreadsheetml.sheet'),
         'is_multimodal_processing': False,
         'kwargs': {
-            'excel_by_sheets': True,
-            'raw_content': True,
-            'cleanse': False
+            'add_header_to_chunks': False,
+            'header_row_number': 1,
+            'max_tokens': -1,
+            'sheet_name': ''
         },
-        'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
+        'allowed_to_override': DEFAULT_ALLOWED_EXCEL
     },
     '.xls': {
         'class': AlitaExcelLoader,
@@ -177,7 +182,7 @@ document_loaders_map = {
             'raw_content': True,
             'cleanse': False
         },
-        'allowed_to_override': DEFAULT_ALLOWED_WITH_LLM
+        'allowed_to_override': DEFAULT_ALLOWED_EXCEL
     },
     '.pdf': {
         'class': AlitaPDFLoader,
@@ -204,7 +209,7 @@ document_loaders_map = {
         'allowed_to_override': DEFAULT_ALLOWED_BASE
     },
     '.jsonl': {
-        'class': AirbyteJSONLoader,
+        'class': AlitaJSONLinesLoader,
         'mime_type': 'application/jsonl',
         'is_multimodal_processing': False,
         'kwargs': {},

alita-sdk 0.3.462__py3-none-any.whl → 0.3.627__py3-none-any.whl

alita-sdk 0.3.462py3-none-any.whl → 0.3.627py3-none-any.whl