PyPI - alita-sdk - Versions diffs - 0.3.390__py3-none-any.whl → 0.3.417__py3-none-any.whl - Mend

alita-sdk 0.3.390py3-none-any.whl → 0.3.417py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

alita_sdk/configurations/bitbucket.py +95 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/client.py +3 -2
alita_sdk/runtime/langchain/assistant.py +29 -5
alita_sdk/runtime/langchain/constants.py +2 -0
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +8 -8
alita_sdk/runtime/langchain/langraph_agent.py +46 -24
alita_sdk/runtime/langchain/utils.py +11 -4
alita_sdk/runtime/toolkits/application.py +8 -1
alita_sdk/runtime/toolkits/tools.py +72 -62
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/function.py +11 -4
alita_sdk/runtime/tools/llm.py +142 -116
alita_sdk/runtime/tools/sandbox.py +15 -31
alita_sdk/tools/__init__.py +41 -31
alita_sdk/tools/base_indexer_toolkit.py +27 -2
alita_sdk/tools/code_indexer_toolkit.py +13 -3
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/gitlab/api_wrapper.py +8 -9
alita_sdk/tools/jira/api_wrapper.py +1 -1
alita_sdk/tools/qtest/api_wrapper.py +7 -10
alita_sdk/tools/sharepoint/api_wrapper.py +81 -28
alita_sdk/tools/sharepoint/authorization_helper.py +131 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/utils/content_parser.py +27 -16
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +10 -2
{alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/METADATA +1 -1
{alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/RECORD +39 -39
{alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.390.dist-info → alita_sdk-0.3.417.dist-info}/top_level.txt +0 -0

alita_sdk/runtime/tools/llm.py CHANGED Viewed

@@ -7,6 +7,7 @@ from langchain_core.runnables import RunnableConfig
 from langchain_core.tools import BaseTool, ToolException
 from pydantic import Field
+from ..langchain.constants import ELITEA_RS
 from ..langchain.utils import create_pydantic_model, propagate_the_input_mapping
 logger = logging.getLogger(__name__)
@@ -30,6 +31,7 @@ class LLMNode(BaseTool):
     structured_output: Optional[bool] = Field(default=False, description='Whether to use structured output')
     available_tools: Optional[List[BaseTool]] = Field(default=None, description='Available tools for binding')
     tool_names: Optional[List[str]] = Field(default=None, description='Specific tool names to filter')
+    steps_limit: Optional[int] = Field(default=25, description='Maximum steps for tool execution')
     def get_filtered_tools(self) -> List[BaseTool]:
         """
@@ -88,8 +90,7 @@ class LLMNode(BaseTool):
                 raise ToolException(f"LLMNode requires 'system' and 'task' parameters in input mapping. "
                                     f"Actual params: {func_args}")
             # cast to str in case user passes variable different from str
-            messages = [SystemMessage(content=str(func_args.get('system'))), HumanMessage(content=str(func_args.get('task')))]
-            messages.extend(func_args.get('chat_history', []))
+            messages = [SystemMessage(content=str(func_args.get('system'))), *func_args.get('chat_history', []), HumanMessage(content=str(func_args.get('task')))]
         else:
             # Flow for chat-based LLM node w/o prompt/task from pipeline but with messages in state
             # verify messages structure
@@ -122,14 +123,25 @@ class LLMNode(BaseTool):
                     }
                     for key, value in (self.structured_output_dict or {}).items()
                 }
+                # Add default output field for proper response to user
+                struct_params['elitea_response'] = {'description': 'final output to user', 'type': 'str'}
                 struct_model = create_pydantic_model(f"LLMOutput", struct_params)
-                llm = llm_client.with_structured_output(struct_model)
-                completion = llm.invoke(messages, config=config)
-                result = completion.model_dump()
+                completion = llm_client.invoke(messages, config=config)
+                if hasattr(completion, 'tool_calls') and completion.tool_calls:
+                    new_messages, _ = self.__perform_tool_calling(completion, messages, llm_client, config)
+                    llm = self.__get_struct_output_model(llm_client, struct_model)
+                    completion = llm.invoke(new_messages, config=config)
+                    result = completion.model_dump()
+                else:
+                    llm = self.__get_struct_output_model(llm_client, struct_model)
+                    completion = llm.invoke(messages, config=config)
+                    result = completion.model_dump()
                 # Ensure messages are properly formatted
                 if result.get('messages') and isinstance(result['messages'], list):
                     result['messages'] = [{'role': 'assistant', 'content': '\n'.join(result['messages'])}]
+                else:
+                    result['messages'] = messages + [AIMessage(content=result.get(ELITEA_RS, ''))]
                 return result
             else:
@@ -139,117 +151,15 @@ class LLMNode(BaseTool):
                 # Handle both tool-calling and regular responses
                 if hasattr(completion, 'tool_calls') and completion.tool_calls:
                     # Handle iterative tool-calling and execution
-                    new_messages = messages + [completion]
-                    max_iterations = 15
-                    iteration = 0
-                    # Continue executing tools until no more tool calls or max iterations reached
-                    current_completion = completion
-                    while (hasattr(current_completion, 'tool_calls') and
-                           current_completion.tool_calls and
-                           iteration < max_iterations):
-                        iteration += 1
-                        logger.info(f"Tool execution iteration {iteration}/{max_iterations}")
-                        # Execute each tool call in the current completion
-                        tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
-                                                                              '__iter__') else []
-                        for tool_call in tool_calls:
-                            tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
-                                                                                                              'name',
-                                                                                                              '')
-                            tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
-                                                                                                              'args',
-                                                                                                              {})
-                            tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
-                                tool_call, 'id', '')
-                            # Find the tool in filtered tools
-                            filtered_tools = self.get_filtered_tools()
-                            tool_to_execute = None
-                            for tool in filtered_tools:
-                                if tool.name == tool_name:
-                                    tool_to_execute = tool
-                                    break
-                            if tool_to_execute:
-                                try:
-                                    logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
-                                    # Pass the underlying config to the tool execution invoke method
-                                    # since it may be another agent, graph, etc. to see it properly in thinking steps
-                                    tool_result = tool_to_execute.invoke(tool_args, config=config)
-                                    # Create tool message with result - preserve structured content
-                                    from langchain_core.messages import ToolMessage
-                                    # Check if tool_result is structured content (list of dicts)
-                                    # TODO: need solid check for being compatible with ToolMessage content format
-                                    if isinstance(tool_result, list) and all(
-                                        isinstance(item, dict) and 'type' in item for item in tool_result
-                                    ):
-                                        # Use structured content directly for multimodal support
-                                        tool_message = ToolMessage(
-                                            content=tool_result,
-                                            tool_call_id=tool_call_id
-                                        )
-                                    else:
-                                        # Fallback to string conversion for other tool results
-                                        tool_message = ToolMessage(
-                                            content=str(tool_result),
-                                            tool_call_id=tool_call_id
-                                        )
-                                    new_messages.append(tool_message)
-                                except Exception as e:
-                                    logger.error(f"Error executing tool '{tool_name}': {e}")
-                                    # Create error tool message
-                                    from langchain_core.messages import ToolMessage
-                                    tool_message = ToolMessage(
-                                        content=f"Error executing {tool_name}: {str(e)}",
-                                        tool_call_id=tool_call_id
-                                    )
-                                    new_messages.append(tool_message)
-                            else:
-                                logger.warning(f"Tool '{tool_name}' not found in available tools")
-                                # Create error tool message for missing tool
-                                from langchain_core.messages import ToolMessage
-                                tool_message = ToolMessage(
-                                    content=f"Tool '{tool_name}' not available",
-                                    tool_call_id=tool_call_id
-                                )
-                                new_messages.append(tool_message)
-                        # Call LLM again with tool results to get next response
-                        try:
-                            current_completion = llm_client.invoke(new_messages, config=config)
-                            new_messages.append(current_completion)
-                            # Check if we still have tool calls
-                            if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
-                                logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
-                            else:
-                                logger.info("LLM completed without requesting more tools")
-                                break
-                        except Exception as e:
-                            logger.error(f"Error in LLM call during iteration {iteration}: {e}")
-                            # Add error message and break the loop
-                            error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
-                            new_messages.append(AIMessage(content=error_msg))
-                            break
-                    # Log completion status
-                    if iteration >= max_iterations:
-                        logger.warning(f"Reached maximum iterations ({max_iterations}) for tool execution")
-                        # Add a warning message to the chat
-                        warning_msg = f"Maximum tool execution iterations ({max_iterations}) reached. Stopping tool execution."
-                        new_messages.append(AIMessage(content=warning_msg))
-                    else:
-                        logger.info(f"Tool execution completed after {iteration} iterations")
+                    new_messages, current_completion = self.__perform_tool_calling(completion, messages, llm_client, config)
-                    return {"messages": new_messages}
+                    output_msgs = {"messages": new_messages}
+                    if self.output_variables:
+                        if self.output_variables[0] == 'messages':
+                            return output_msgs
+                        output_msgs[self.output_variables[0]] = current_completion.content if current_completion else None
+                    return output_msgs
                 else:
                     # Regular text response
                     content = completion.content.strip() if hasattr(completion, 'content') else str(completion)
@@ -275,4 +185,120 @@ class LLMNode(BaseTool):
     def _run(self, *args, **kwargs):
         # Legacy support for old interface
-        return self.invoke(kwargs, **kwargs)
+        return self.invoke(kwargs, **kwargs)
+    def __perform_tool_calling(self, completion, messages, llm_client, config):
+        # Handle iterative tool-calling and execution
+        new_messages = messages + [completion]
+        iteration = 0
+        # Continue executing tools until no more tool calls or max iterations reached
+        current_completion = completion
+        while (hasattr(current_completion, 'tool_calls') and
+               current_completion.tool_calls and
+               iteration < self.steps_limit):
+            iteration += 1
+            logger.info(f"Tool execution iteration {iteration}/{self.steps_limit}")
+            # Execute each tool call in the current completion
+            tool_calls = current_completion.tool_calls if hasattr(current_completion.tool_calls,
+                                                                  '__iter__') else []
+            for tool_call in tool_calls:
+                tool_name = tool_call.get('name', '') if isinstance(tool_call, dict) else getattr(tool_call,
+                                                                                                  'name',
+                                                                                                  '')
+                tool_args = tool_call.get('args', {}) if isinstance(tool_call, dict) else getattr(tool_call,
+                                                                                                  'args',
+                                                                                                  {})
+                tool_call_id = tool_call.get('id', '') if isinstance(tool_call, dict) else getattr(
+                    tool_call, 'id', '')
+                # Find the tool in filtered tools
+                filtered_tools = self.get_filtered_tools()
+                tool_to_execute = None
+                for tool in filtered_tools:
+                    if tool.name == tool_name:
+                        tool_to_execute = tool
+                        break
+                if tool_to_execute:
+                    try:
+                        logger.info(f"Executing tool '{tool_name}' with args: {tool_args}")
+                        # Pass the underlying config to the tool execution invoke method
+                        # since it may be another agent, graph, etc. to see it properly in thinking steps
+                        tool_result = tool_to_execute.invoke(tool_args, config=config)
+                        # Create tool message with result - preserve structured content
+                        from langchain_core.messages import ToolMessage
+                        # Check if tool_result is structured content (list of dicts)
+                        # TODO: need solid check for being compatible with ToolMessage content format
+                        if isinstance(tool_result, list) and all(
+                                isinstance(item, dict) and 'type' in item for item in tool_result
+                        ):
+                            # Use structured content directly for multimodal support
+                            tool_message = ToolMessage(
+                                content=tool_result,
+                                tool_call_id=tool_call_id
+                            )
+                        else:
+                            # Fallback to string conversion for other tool results
+                            tool_message = ToolMessage(
+                                content=str(tool_result),
+                                tool_call_id=tool_call_id
+                            )
+                        new_messages.append(tool_message)
+                    except Exception as e:
+                        logger.error(f"Error executing tool '{tool_name}': {e}")
+                        # Create error tool message
+                        from langchain_core.messages import ToolMessage
+                        tool_message = ToolMessage(
+                            content=f"Error executing {tool_name}: {str(e)}",
+                            tool_call_id=tool_call_id
+                        )
+                        new_messages.append(tool_message)
+                else:
+                    logger.warning(f"Tool '{tool_name}' not found in available tools")
+                    # Create error tool message for missing tool
+                    from langchain_core.messages import ToolMessage
+                    tool_message = ToolMessage(
+                        content=f"Tool '{tool_name}' not available",
+                        tool_call_id=tool_call_id
+                    )
+                    new_messages.append(tool_message)
+            # Call LLM again with tool results to get next response
+            try:
+                current_completion = llm_client.invoke(new_messages, config=config)
+                new_messages.append(current_completion)
+                # Check if we still have tool calls
+                if hasattr(current_completion, 'tool_calls') and current_completion.tool_calls:
+                    logger.info(f"LLM requested {len(current_completion.tool_calls)} more tool calls")
+                else:
+                    logger.info("LLM completed without requesting more tools")
+                    break
+            except Exception as e:
+                logger.error(f"Error in LLM call during iteration {iteration}: {e}")
+                # Add error message and break the loop
+                error_msg = f"Error processing tool results in iteration {iteration}: {str(e)}"
+                new_messages.append(AIMessage(content=error_msg))
+                break
+        # Log completion status
+        if iteration >= self.steps_limit:
+            logger.warning(f"Reached maximum iterations ({self.steps_limit}) for tool execution")
+            # Add a warning message to the chat
+            warning_msg = f"Maximum tool execution iterations ({self.steps_limit}) reached. Stopping tool execution."
+            new_messages.append(AIMessage(content=warning_msg))
+        else:
+            logger.info(f"Tool execution completed after {iteration} iterations")
+        return new_messages, current_completion
+    def __get_struct_output_model(self, llm_client, pydantic_model):
+        return llm_client.with_structured_output(pydantic_model)

alita_sdk/runtime/tools/sandbox.py CHANGED Viewed

@@ -64,36 +64,10 @@ def _is_deno_available() -> bool:
 def _setup_pyodide_cache_env() -> None:
-    """Setup Pyodide caching environment variables for performance optimization"""
+    """Setup Pyodide caching environment variables for performance optimization [NO-OP]"""
     try:
-        # Check if cache environment file exists and source it
-        cache_env_file = os.path.expanduser("~/.pyodide_cache_env")
-        if os.path.exists(cache_env_file):
-            with open(cache_env_file, 'r') as f:
-                for line in f:
-                    line = line.strip()
-                    if line.startswith('export ') and '=' in line:
-                        # Parse export VAR=value format
-                        var_assignment = line[7:]  # Remove 'export '
-                        if '=' in var_assignment:
-                            key, value = var_assignment.split('=', 1)
-                            # Remove quotes if present
-                            value = value.strip('"').strip("'")
-                            os.environ[key] = value
-                            logger.debug(f"Set Pyodide cache env: {key}={value}")
-        # Set default caching environment variables if not already set
-        cache_defaults = {
-            'PYODIDE_PACKAGES_PATH': os.path.expanduser('~/.cache/pyodide'),
-            'DENO_DIR': os.path.expanduser('~/.cache/deno'),
-            'PYODIDE_CACHE_DIR': os.path.expanduser('~/.cache/pyodide'),
-        }
-        for key, default_value in cache_defaults.items():
-            if key not in os.environ:
-                os.environ[key] = default_value
-                logger.debug(f"Set default Pyodide env: {key}={default_value}")
+        for key in ["SANDBOX_BASE", "DENO_DIR"]:
+            logger.info("Sandbox env: %s -> %s", key, os.environ.get(key, "n/a"))
     except Exception as e:
         logger.warning(f"Could not setup Pyodide cache environment: {e}")
@@ -142,7 +116,7 @@ class PyodideSandboxTool(BaseTool):
     def _prepare_pyodide_input(self, code: str) -> str:
         """Prepare input for PyodideSandboxTool by injecting state and alita_client into the code block."""
         pyodide_predata = ""
         # Add alita_client if available
         if self.alita_client:
             try:
@@ -158,7 +132,7 @@ class PyodideSandboxTool(BaseTool):
                                     f"auth_token='{self.alita_client.auth_token}')\n")
             except FileNotFoundError:
                 logger.error(f"sandbox_client.py not found. Ensure the file exists.")
         return f"#elitea simplified client\n{pyodide_predata}{code}"
     def _initialize_sandbox(self) -> None:
@@ -175,9 +149,19 @@ class PyodideSandboxTool(BaseTool):
             from langchain_sandbox import PyodideSandbox
+            # Air-gapped settings
+            sandbox_base = os.environ.get("SANDBOX_BASE", os.path.expanduser('~/.cache/pyodide'))
+            sandbox_tmp = os.path.join(sandbox_base, "tmp")
+            deno_cache = os.environ.get("DENO_DIR", os.path.expanduser('~/.cache/deno'))
             # Configure sandbox with performance optimizations
             self._sandbox = PyodideSandbox(
                 stateful=self.stateful,
+                #
+                allow_env=["SANDBOX_BASE"],
+                allow_read=[sandbox_base, sandbox_tmp, deno_cache],
+                allow_write=[sandbox_tmp, deno_cache],
+                #
                 allow_net=self.allow_net,
                 # Use auto node_modules_dir for better caching
                 node_modules_dir="auto"

alita_sdk/tools/__init__.py CHANGED Viewed

@@ -90,64 +90,74 @@ available_count = len(AVAILABLE_TOOLS)
 total_attempted = len(AVAILABLE_TOOLS) + len(FAILED_IMPORTS)
 logger.info(f"Tool imports completed: {available_count}/{total_attempted} successful")
 def get_tools(tools_list, alita, llm, store: Optional[BaseStore] = None, *args, **kwargs):
     tools = []
     for tool in tools_list:
-        # validate tool name syntax - it cannot be started with _
-        for tool_name in tool.get('settings', {}).get('selected_tools', []):
-            if isinstance(tool_name, str) and tool_name.startswith('_'):
-                raise ValueError(f"Tool name '{tool_name}' from toolkit '{tool.get('type', '')}' cannot start with '_'")
-        if not tool.get('settings'):
+        settings = tool.get('settings')
+        # Skip tools without settings early
+        if not settings:
             logger.warning(f"Tool '{tool.get('type', '')}' has no settings, skipping...")
             continue
-        tool['settings']['alita'] = alita
-        tool['settings']['llm'] = llm
-        tool['settings']['store'] = store
+        # Validate tool names once
+        selected_tools = settings.get('selected_tools', [])
+        invalid_tools = [name for name in selected_tools if isinstance(name, str) and name.startswith('_')]
+        if invalid_tools:
+            raise ValueError(f"Tool names {invalid_tools} from toolkit '{tool.get('type', '')}' cannot start with '_'")
+        # Cache tool type and add common settings
         tool_type = tool['type']
+        settings['alita'] = alita
+        settings['llm'] = llm
+        settings['store'] = store
-        # Handle special cases for ADO tools
+        # Set pgvector collection schema if present
+        if settings.get('pgvector_configuration'):
+            settings['pgvector_configuration']['collection_schema'] = str(tool['id'])
+        # Handle ADO special cases
         if tool_type in ['ado_boards', 'ado_wiki', 'ado_plans']:
             tools.extend(AVAILABLE_TOOLS['ado']['get_tools'](tool_type, tool))
+            continue
-        # Check if tool is available and has get_tools function
-        elif tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
+        # Handle ADO repos aliases
+        if tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
             try:
-                get_tools_func = AVAILABLE_TOOLS[tool_type]['get_tools']
-                tools.extend(get_tools_func(tool))
+                tools.extend(AVAILABLE_TOOLS['ado_repos']['get_tools'](tool))
             except Exception as e:
-                logger.error(f"Error getting tools for {tool_type}: {e}")
-                raise ToolException(f"Error getting tools for {tool_type}: {e}")
+                logger.error(f"Error getting ADO repos tools: {e}")
+            continue
-        # Handle ADO repos special case (it might be requested as azure_devops_repos)
-        elif tool_type in ['ado_repos', 'azure_devops_repos'] and 'ado_repos' in AVAILABLE_TOOLS:
+        # Handle standard tools
+        if tool_type in AVAILABLE_TOOLS and 'get_tools' in AVAILABLE_TOOLS[tool_type]:
             try:
-                get_tools_func = AVAILABLE_TOOLS['ado_repos']['get_tools']
-                tools.extend(get_tools_func(tool))
+                tools.extend(AVAILABLE_TOOLS[tool_type]['get_tools'](tool))
             except Exception as e:
-                logger.error(f"Error getting ADO repos tools: {e}")
+                logger.error(f"Error getting tools for {tool_type}: {e}")
+                raise ToolException(f"Error getting tools for {tool_type}: {e}")
+            continue
         # Handle custom modules
-        elif tool.get("settings", {}).get("module"):
+        if settings.get("module"):
             try:
-                settings = tool.get("settings", {})
                 mod = import_module(settings.pop("module"))
                 tkitclass = getattr(mod, settings.pop("class"))
-                #
-                get_toolkit_params = tool["settings"].copy()
+                get_toolkit_params = settings.copy()
                 get_toolkit_params["name"] = tool.get("name")
-                #
                 toolkit = tkitclass.get_toolkit(**get_toolkit_params)
                 tools.extend(toolkit.get_tools())
             except Exception as e:
                 logger.error(f"Error in getting custom toolkit: {e}")
+            continue
+        # Tool not available
+        if tool_type in FAILED_IMPORTS:
+            logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
         else:
-            # Tool not available or not found
-            if tool_type in FAILED_IMPORTS:
-                logger.warning(f"Tool '{tool_type}' is not available: {FAILED_IMPORTS[tool_type]}")
-            else:
-                logger.warning(f"Unknown tool type: {tool_type}")
+            logger.warning(f"Unknown tool type: {tool_type}")
     return tools

alita_sdk/tools/base_indexer_toolkit.py CHANGED Viewed

@@ -110,7 +110,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
     def __init__(self, **kwargs):
         conn = kwargs.get('connection_string', None)
         connection_string = conn.get_secret_value() if isinstance(conn, SecretStr) else conn
-        collection_name = kwargs.get('collection_name')
+        collection_name = kwargs.get('collection_schema')
         if 'vectorstore_type' not in kwargs:
             kwargs['vectorstore_type'] = 'PGVector'
@@ -160,6 +160,8 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             if clean_index:
                 self._clean_index(index_name)
             #
+            self.index_meta_init(index_name, kwargs)
+            #
             self._log_tool_event(f"Indexing data into collection with suffix '{index_name}'. It can take some time...")
             self._log_tool_event(f"Loading the documents to index...{kwargs}")
             documents = self._base_loader(**kwargs)
@@ -179,7 +181,7 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             return {"status": "ok", "message": f"successfully indexed {results_count} documents" if results_count > 0
             else "no new documents to index"}
         except Exception as e:
-            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, results_count)
+            self.index_meta_update(index_name, IndexerKeywords.INDEX_META_FAILED.value, result["count"])
             raise e
@@ -454,6 +456,29 @@ class BaseIndexerToolkit(VectorStoreWrapperBase):
             reranking_config=reranking_config,
             extended_search=extended_search
         )
+    def index_meta_init(self, index_name: str, index_configuration: dict[str, Any]):
+        index_meta = super().get_index_meta(index_name)
+        if not index_meta:
+            self._log_tool_event(
+                f"There is no existing index_meta for collection '{index_name}'. Initializing it.",
+                tool_name="index_data"
+            )
+            from ..runtime.langchain.interfaces.llm_processor import add_documents
+            created_on = time.time()
+            metadata = {
+                "collection": index_name,
+                "type": IndexerKeywords.INDEX_META_TYPE.value,
+                "indexed": 0,
+                "state": IndexerKeywords.INDEX_META_IN_PROGRESS.value,
+                "index_configuration": index_configuration,
+                "created_on": created_on,
+                "updated_on": created_on,
+                "history": "[]",
+                "task_id": None,
+            }
+            index_meta_doc = Document(page_content=f"{IndexerKeywords.INDEX_META_TYPE.value}_{index_name}", metadata=metadata)
+            add_documents(vectorstore=self.vectorstore, documents=[index_meta_doc])
     def index_meta_update(self, index_name: str, state: str, result: int):
         index_meta_raw = super().get_index_meta(index_name)

alita_sdk/tools/code_indexer_toolkit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import ast
 import fnmatch
+import json
 import logging
 from typing import Optional, List, Generator
@@ -21,7 +22,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         return self.vector_adapter.get_code_indexed_data(self, index_name)
     def key_fn(self, document: Document):
-        return document.metadata.get('id')
+        return document.metadata.get("filename")
     def compare_fn(self, document: Document, idx_data):
         return (document.metadata.get('commit_hash') and
@@ -46,7 +47,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
         )
     def _extend_data(self, documents: Generator[Document, None, None]):
-        yield from parse_code_files_for_db(documents)
+        yield from documents
     def _index_tool_params(self):
         """Return the parameters for indexing data."""
@@ -117,6 +118,15 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     if not file_content:
                         # empty file, skip
                         continue
+                    #
+                    # ensure file content is a string
+                    if isinstance(file_content, bytes):
+                        file_content = file_content.decode("utf-8", errors="ignore")
+                    elif isinstance(file_content, dict) and file.endswith('.json'):
+                        file_content = json.dumps(file_content)
+                    elif not isinstance(file_content, str):
+                        file_content = str(file_content)
+                    #
                     # hash the file content to ensure uniqueness
                     import hashlib
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
@@ -127,7 +137,7 @@ class CodeIndexerToolkit(BaseIndexerToolkit):
                     self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
             self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
-        return file_content_generator()
+        return parse_code_files_for_db(file_content_generator())
     def __handle_get_files(self, path: str, branch: str):
         """

alita_sdk/tools/confluence/loader.py CHANGED Viewed

@@ -3,6 +3,7 @@ from typing import Optional, List
 from logging import getLogger
 import requests
+from langchain_core.documents import Document
 logger = getLogger(__name__)
 from PIL import Image
@@ -193,6 +194,15 @@ class AlitaConfluenceLoader(ConfluenceLoader):
         else:
             return super().process_image(link, ocr_languages)
+    def process_page(self, page: dict, include_attachments: bool, include_comments: bool, include_labels: bool,
+                     content_format: ContentFormat, ocr_languages: Optional[str] = None,
+                     keep_markdown_format: Optional[bool] = False, keep_newlines: bool = False) -> Document:
+        if not page.get("title"):
+            # if 'include_restricted_content' set to True, draft pages are loaded and can have no title
+            page["title"] = "Untitled"
+        return super().process_page(page, include_attachments, include_comments, include_labels, content_format,
+                                    ocr_languages, keep_markdown_format, keep_newlines)
     # TODO review usage
     # def process_svg(
     #         self,

alita_sdk/tools/gitlab/api_wrapper.py CHANGED Viewed

@@ -115,9 +115,8 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
         """Remove trailing slash from URL if present."""
         return url.rstrip('/') if url else url
-    @model_validator(mode='before')
-    @classmethod
-    def validate_toolkit(cls, values: Dict) -> Dict:
+    @model_validator(mode='after')
+    def validate_toolkit(self):
         try:
            import gitlab
         except ImportError:
@@ -125,17 +124,17 @@ class GitLabAPIWrapper(CodeIndexerToolkit):
                 "python-gitlab is not installed. "
                 "Please install it with `pip install python-gitlab`"
             )
-        values['repository'] = cls._sanitize_url(values['repository'])
+        self.repository = self._sanitize_url(self.repository)
         g = gitlab.Gitlab(
-            url=cls._sanitize_url(values['url']),
-            private_token=values['private_token'],
+            url=self._sanitize_url(self.url),
+            private_token=self.private_token.get_secret_value(),
             keep_base_url=True,
         )
         g.auth()
-        cls._git = g
-        cls._active_branch = values.get('branch')
-        return super().validate_toolkit(values)
+        self._git = g
+        self._active_branch = self.branch
+        return self
     @property
     def repo_instance(self):

alita-sdk 0.3.390__py3-none-any.whl → 0.3.417__py3-none-any.whl

alita-sdk 0.3.390py3-none-any.whl → 0.3.417py3-none-any.whl