PyPI - alita-sdk - Versions diffs - 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl - Mend

alita-sdk 0.3.351py3-none-any.whl → 0.3.499py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (206) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +155 -0
alita_sdk/cli/agent_loader.py +215 -0
alita_sdk/cli/agent_ui.py +228 -0
alita_sdk/cli/agents.py +3601 -0
alita_sdk/cli/callbacks.py +647 -0
alita_sdk/cli/cli.py +168 -0
alita_sdk/cli/config.py +306 -0
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1256 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +327 -0
alita_sdk/cli/toolkit_loader.py +85 -0
alita_sdk/cli/tools/__init__.py +43 -0
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +1751 -0
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +64 -8
alita_sdk/community/inventory/__init__.py +224 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/bitbucket.py +94 -2
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +93 -0
alita_sdk/configurations/zephyr_enterprise.py +93 -0
alita_sdk/configurations/zephyr_essential.py +75 -0
alita_sdk/runtime/clients/artifact.py +1 -1
alita_sdk/runtime/clients/client.py +214 -42
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +373 -0
alita_sdk/runtime/langchain/assistant.py +118 -30
alita_sdk/runtime/langchain/constants.py +8 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +4 -1
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +41 -12
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -1
alita_sdk/runtime/langchain/document_loaders/constants.py +116 -99
alita_sdk/runtime/langchain/interfaces/llm_processor.py +2 -2
alita_sdk/runtime/langchain/langraph_agent.py +307 -71
alita_sdk/runtime/langchain/utils.py +48 -8
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +26 -0
alita_sdk/runtime/toolkits/application.py +9 -2
alita_sdk/runtime/toolkits/artifact.py +18 -6
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +780 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/tools.py +205 -55
alita_sdk/runtime/toolkits/vectorstore.py +9 -4
alita_sdk/runtime/tools/__init__.py +11 -3
alita_sdk/runtime/tools/application.py +7 -0
alita_sdk/runtime/tools/artifact.py +225 -12
alita_sdk/runtime/tools/function.py +95 -5
alita_sdk/runtime/tools/graph.py +10 -4
alita_sdk/runtime/tools/image_generation.py +212 -0
alita_sdk/runtime/tools/llm.py +494 -102
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
alita_sdk/runtime/tools/mcp_server_tool.py +4 -4
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -1
alita_sdk/runtime/tools/sandbox.py +180 -79
alita_sdk/runtime/tools/vectorstore.py +22 -21
alita_sdk/runtime/tools/vectorstore_base.py +125 -52
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_oauth.py +244 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/streamlit.py +40 -13
alita_sdk/runtime/utils/toolkit_utils.py +28 -9
alita_sdk/runtime/utils/utils.py +12 -0
alita_sdk/tools/__init__.py +77 -33
alita_sdk/tools/ado/repos/__init__.py +7 -6
alita_sdk/tools/ado/repos/repos_wrapper.py +11 -11
alita_sdk/tools/ado/test_plan/__init__.py +7 -7
alita_sdk/tools/ado/wiki/__init__.py +7 -11
alita_sdk/tools/ado/wiki/ado_wrapper.py +89 -15
alita_sdk/tools/ado/work_item/__init__.py +7 -11
alita_sdk/tools/ado/work_item/ado_wrapper.py +17 -8
alita_sdk/tools/advanced_jira_mining/__init__.py +8 -7
alita_sdk/tools/aws/delta_lake/__init__.py +11 -9
alita_sdk/tools/azure_ai/search/__init__.py +7 -6
alita_sdk/tools/base_indexer_toolkit.py +345 -70
alita_sdk/tools/bitbucket/__init__.py +9 -8
alita_sdk/tools/bitbucket/api_wrapper.py +50 -6
alita_sdk/tools/browser/__init__.py +4 -4
alita_sdk/tools/carrier/__init__.py +4 -6
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +7 -6
alita_sdk/tools/cloud/azure/__init__.py +7 -6
alita_sdk/tools/cloud/gcp/__init__.py +7 -6
alita_sdk/tools/cloud/k8s/__init__.py +7 -6
alita_sdk/tools/code/linter/__init__.py +7 -7
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +8 -7
alita_sdk/tools/code_indexer_toolkit.py +199 -0
alita_sdk/tools/confluence/__init__.py +9 -8
alita_sdk/tools/confluence/api_wrapper.py +171 -75
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/custom_open_api/__init__.py +9 -4
alita_sdk/tools/elastic/__init__.py +8 -7
alita_sdk/tools/elitea_base.py +492 -52
alita_sdk/tools/figma/__init__.py +7 -7
alita_sdk/tools/figma/api_wrapper.py +2 -1
alita_sdk/tools/github/__init__.py +9 -9
alita_sdk/tools/github/api_wrapper.py +9 -26
alita_sdk/tools/github/github_client.py +62 -2
alita_sdk/tools/gitlab/__init__.py +8 -8
alita_sdk/tools/gitlab/api_wrapper.py +135 -33
alita_sdk/tools/gitlab_org/__init__.py +7 -8
alita_sdk/tools/google/bigquery/__init__.py +11 -12
alita_sdk/tools/google_places/__init__.py +8 -7
alita_sdk/tools/jira/__init__.py +9 -7
alita_sdk/tools/jira/api_wrapper.py +100 -52
alita_sdk/tools/keycloak/__init__.py +8 -7
alita_sdk/tools/localgit/local_git.py +56 -54
alita_sdk/tools/memory/__init__.py +1 -1
alita_sdk/tools/non_code_indexer_toolkit.py +3 -2
alita_sdk/tools/ocr/__init__.py +8 -7
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +8 -7
alita_sdk/tools/postman/__init__.py +7 -8
alita_sdk/tools/postman/api_wrapper.py +19 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +8 -9
alita_sdk/tools/qtest/__init__.py +16 -11
alita_sdk/tools/qtest/api_wrapper.py +1784 -88
alita_sdk/tools/rally/__init__.py +7 -8
alita_sdk/tools/report_portal/__init__.py +9 -7
alita_sdk/tools/salesforce/__init__.py +7 -7
alita_sdk/tools/servicenow/__init__.py +10 -10
alita_sdk/tools/sharepoint/__init__.py +7 -6
alita_sdk/tools/sharepoint/api_wrapper.py +127 -36
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +7 -6
alita_sdk/tools/sql/__init__.py +8 -7
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +7 -6
alita_sdk/tools/testrail/__init__.py +8 -9
alita_sdk/tools/utils/__init__.py +26 -4
alita_sdk/tools/utils/content_parser.py +88 -60
alita_sdk/tools/utils/text_operations.py +254 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +76 -26
alita_sdk/tools/xray/__init__.py +9 -7
alita_sdk/tools/zephyr/__init__.py +7 -6
alita_sdk/tools/zephyr_enterprise/__init__.py +8 -6
alita_sdk/tools/zephyr_essential/__init__.py +7 -6
alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
alita_sdk/tools/zephyr_scale/__init__.py +7 -6
alita_sdk/tools/zephyr_squad/__init__.py +7 -6
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +147 -2
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/RECORD +206 -130
alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.351.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -11,7 +11,6 @@ from pydantic import BaseModel, create_model, Field, SecretStr
 # from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
 from .chunkers import markdown_chunker
-from .utils import TOOLKIT_SPLITTER
 from .vector_adapters.VectorStoreAdapter import VectorStoreAdapterFactory
 from ..runtime.utils.utils import IndexerKeywords
@@ -33,12 +32,12 @@ LoaderSchema = create_model(
 # Base Vector Store Schema Models
 BaseIndexParams = create_model(
     "BaseIndexParams",
-    collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
+    index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
 )
 BaseCodeIndexParams = create_model(
     "BaseCodeIndexParams",
-    collection_suffix=(str, Field(description="Suffix for collection name (max 7 characters) used to separate datasets", min_length=1, max_length=7)),
+    index_name=(str, Field(description="Index name (max 7 characters)", min_length=1, max_length=7)),
     clean_index=(Optional[bool], Field(default=False, description="Optional flag to enforce clean existing index before indexing new data")),
     progress_step=(Optional[int], Field(default=5, ge=0, le=100,
                          description="Optional step size for progress reporting during indexing")),
@@ -50,14 +49,14 @@ BaseCodeIndexParams = create_model(
 RemoveIndexParams = create_model(
     "RemoveIndexParams",
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
 )
 BaseSearchParams = create_model(
     "BaseSearchParams",
     query=(str, Field(description="Query text to search in the index")),
-    collection_suffix=(Optional[str], Field(
-        description="Optional suffix for collection name (max 7 characters). Leave empty to search across all datasets",
+    index_name=(Optional[str], Field(
+        description="Optional index name (max 7 characters). Leave empty to search across all datasets",
         default="", max_length=7)),
     filter=(Optional[dict], Field(
         description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -87,7 +86,7 @@ BaseSearchParams = create_model(
 BaseStepbackSearchParams = create_model(
     "BaseStepbackSearchParams",
     query=(str, Field(description="Query text to search in the index")),
-    collection_suffix=(Optional[str], Field(description="Optional suffix for collection name (max 7 characters)", default="", max_length=7)),
+    index_name=(Optional[str], Field(description="Optional index name (max 7 characters)", default="", max_length=7)),
     messages=(Optional[List], Field(description="Chat messages for stepback search context", default=[])),
     filter=(Optional[dict], Field(
         description="Filter to apply to the search results. Can be a dictionary or a JSON string.",
@@ -126,14 +125,91 @@ BaseIndexDataParams = create_model(
     chunking_config=(Optional[dict], Field(description="Chunking tool configuration", default_factory=dict)),
 )
+# File Operations Schema Models
+ReadFileInput = create_model(
+    "ReadFileInput",
+    file_path=(str, Field(description="Path to the file to read")),
+    branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
+    offset=(Optional[int], Field(description="Starting line number (1-indexed, inclusive). Read from this line onwards.", default=None, ge=1)),
+    limit=(Optional[int], Field(description="Number of lines to read from offset. If None, reads to end.", default=None, ge=1)),
+    head=(Optional[int], Field(description="Read only the first N lines. Alternative to offset/limit.", default=None, ge=1)),
+    tail=(Optional[int], Field(description="Read only the last N lines. Alternative to offset/limit.", default=None, ge=1)),
+)
-class BaseToolApiWrapper(BaseModel):
+ReadFileChunkInput = create_model(
+    "ReadFileChunkInput",
+    file_path=(str, Field(description="Path to the file to read")),
+    branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
+    start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
+    end_line=(Optional[int], Field(description="Ending line number (1-indexed, inclusive). If None, reads to end.", default=None, ge=1)),
+)
+ReadMultipleFilesInput = create_model(
+    "ReadMultipleFilesInput",
+    file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
+    branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
+    offset=(Optional[int], Field(description="Starting line number for all files (1-indexed)", default=None, ge=1)),
+    limit=(Optional[int], Field(description="Number of lines to read from offset for all files", default=None, ge=1)),
+)
+EditFileInput = create_model(
+    "EditFileInput",
+    file_path=(str, Field(description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)")),
+    file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
+OLD <<<<
+old content to replace
+>>>> OLD
+NEW <<<<
+new content
+>>>> NEW
+Multiple OLD/NEW pairs can be provided for multiple edits.""")),
+    branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
+    commit_message=(Optional[str], Field(description="Commit message for the change (VCS toolkits only)", default=None)),
+)
+SearchFileInput = create_model(
+    "SearchFileInput",
+    file_path=(str, Field(description="Path to the file to search")),
+    pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
+    branch=(Optional[str], Field(description="Branch name. If None, uses active branch.", default=None)),
+    is_regex=(bool, Field(description="Whether pattern is a regex. Default is True for flexible matching.", default=True)),
+    context_lines=(int, Field(description="Number of lines before/after match to include for context", default=2, ge=0)),
+)
+class BaseToolApiWrapper(BaseModel):
+    # Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
+    _runnable_config: Optional[Dict[str, Any]] = None
+    # toolkit id propagated from backend
+    toolkit_id: int = 0
     def get_available_tools(self):
         raise NotImplementedError("Subclasses should implement this method")
-    def _log_tool_event(self, message: str, tool_name: str = None):
-        """Log data and dispatch custom event for the tool"""
+    def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
+        """
+        Set the RunnableConfig for dispatching custom events.
+        This is required when running outside of a LangChain agent context
+        (e.g., from CLI). Without a config containing a run_id,
+        dispatch_custom_event will fail with "Unable to dispatch an adhoc event
+        without a parent run id".
+        Args:
+            config: A RunnableConfig dict with at least {'run_id': uuid}
+        """
+        self._runnable_config = config
+    def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
+        """Log data and dispatch custom event for the tool.
+        Args:
+            message: The message to log
+            tool_name: Name of the tool (defaults to 'tool_progress')
+            config: Optional RunnableConfig. If not provided, uses self._runnable_config.
+                   Required when running outside a LangChain agent context.
+        """
         try:
             from langchain_core.callbacks import dispatch_custom_event
@@ -142,6 +218,10 @@ class BaseToolApiWrapper(BaseModel):
                 tool_name = 'tool_progress'
             logger.info(message)
+            # Use provided config, fall back to instance config
+            effective_config = config or self._runnable_config
             dispatch_custom_event(
                 name="thinking_step",
                 data={
@@ -149,14 +229,14 @@ class BaseToolApiWrapper(BaseModel):
                     "tool_name": tool_name,
                     "toolkit": self.__class__.__name__,
                 },
+                config=effective_config,
             )
         except Exception as e:
             logger.warning(f"Failed to dispatch progress event: {str(e)}")
     def run(self, mode: str, *args: Any, **kwargs: Any):
-        if TOOLKIT_SPLITTER in mode:
-            mode = mode.rsplit(TOOLKIT_SPLITTER, maxsplit=1)[1]
+        # Mode is now the clean tool name (no prefix to remove)
         for tool in self.get_available_tools():
             if tool["name"] == mode:
                 try:
@@ -165,6 +245,11 @@ class BaseToolApiWrapper(BaseModel):
                     #     execution = str(execution)
                     return execution
                 except Exception as e:
+                    # Re-raise McpAuthorizationRequired directly without wrapping
+                    from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
+                    if isinstance(e, McpAuthorizationRequired):
+                        raise
                     # Catch all tool execution exceptions and provide user-friendly error messages
                     error_type = type(e).__name__
                     error_message = str(e)
@@ -324,12 +409,12 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
             #
             docs = base_chunker(file_content_generator=docs, config=base_chunking_config)
         #
-        collection_suffix = kwargs.get("collection_suffix")
+        index_name = kwargs.get("index_name")
         progress_step = kwargs.get("progress_step")
         clean_index = kwargs.get("clean_index")
         vs = self._init_vector_store()
         #
-        return vs.index_documents(docs, collection_suffix=collection_suffix, progress_step=progress_step, clean_index=clean_index)
+        return vs.index_documents(docs, index_name=index_name, progress_step=progress_step, clean_index=clean_index)
     def _process_documents(self, documents: List[Document]) -> Generator[Document, None, None]:
         """
@@ -399,10 +484,10 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
             )
         return self._vector_store
-    def remove_index(self, collection_suffix: str = ""):
+    def remove_index(self, index_name: str = ""):
         """Cleans the indexed data in the collection."""
-        self._init_vector_store()._clean_collection(collection_suffix=collection_suffix)
-        return (f"Collection '{collection_suffix}' has been removed from the vector store.\n"
+        self._init_vector_store()._clean_collection(index_name=index_name)
+        return (f"Collection '{index_name}' has been removed from the vector store.\n"
                 f"Available collections: {self.list_collections()}")
     def list_collections(self):
@@ -410,19 +495,19 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
         vectorstore_wrapper = self._init_vector_store()
         return vectorstore_wrapper.list_collections()
-    def _build_collection_filter(self, filter: dict | str, collection_suffix: str = "") -> dict:
+    def _build_collection_filter(self, filter: dict | str, index_name: str = "") -> dict:
         """Builds a filter for the collection based on the provided suffix."""
         filter = filter if isinstance(filter, dict) else json.loads(filter)
-        if collection_suffix:
+        if index_name:
             filter.update({"collection": {
-                "$eq": collection_suffix.strip()
+                "$eq": index_name.strip()
             }})
         return filter
     def search_index(self,
                      query: str,
-                     collection_suffix: str = "",
+                     index_name: str = "",
                      filter: dict | str = {}, cut_off: float = 0.5,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
@@ -431,7 +516,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                      **kwargs):
         """ Searches indexed documents in the vector store."""
         vectorstore = self._init_vector_store()
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         found_docs = vectorstore.search_documents(
             query,
             doctype=self.doctype,
@@ -448,7 +533,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
     def stepback_search_index(self,
                      query: str,
                      messages: List[Dict[str, Any]] = [],
-                     collection_suffix: str = "",
+                     index_name: str = "",
                      filter: dict | str = {}, cut_off: float = 0.5,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
@@ -457,7 +542,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                      **kwargs):
         """ Searches indexed documents in the vector store."""
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         vectorstore = self._init_vector_store()
         found_docs = vectorstore.stepback_search(
             query,
@@ -475,7 +560,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
     def stepback_summary_index(self,
                      query: str,
                      messages: List[Dict[str, Any]] = [],
-                     collection_suffix: str = "",
+                     index_name: str = "",
                      filter: dict | str = {}, cut_off: float = 0.5,
                      search_top: int = 10, reranker: dict = {},
                      full_text_search: Optional[Dict[str, Any]] = None,
@@ -484,7 +569,7 @@ class BaseVectorStoreToolApiWrapper(BaseToolApiWrapper):
                      **kwargs):
         """ Generates a summary of indexed documents using stepback technique."""
         vectorstore = self._init_vector_store()
-        filter = self._build_collection_filter(filter, collection_suffix)
+        filter = self._build_collection_filter(filter, index_name)
         found_docs = vectorstore.stepback_summary(
             query,
@@ -554,11 +639,281 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
     def _get_files(self):
         raise NotImplementedError("Subclasses should implement this method")
-    def _read_file(self, file_path: str, branch: str):
+    def _read_file(
+        self,
+        file_path: str,
+        branch: str = None,
+        offset: Optional[int] = None,
+        limit: Optional[int] = None,
+        head: Optional[int] = None,
+        tail: Optional[int] = None,
+        **kwargs  # Allow subclasses to have additional parameters
+    ) -> str:
+        """
+        Read file content with optional partial read support.
+        Subclasses should implement this method. If they don't support partial reads,
+        they can accept **kwargs and ignore offset/limit/head/tail parameters - the base
+        class high-level methods will apply slicing client-side.
+        Args:
+            file_path: Path to the file
+            branch: Branch name (None for active branch)
+            offset: Starting line number (1-indexed)
+            limit: Number of lines to read from offset
+            head: Read only first N lines
+            tail: Read only last N lines
+            **kwargs: Additional toolkit-specific parameters (e.g., repo_name for GitHub)
+        Returns:
+            File content as string
+        """
         raise NotImplementedError("Subclasses should implement this method")
+    def _write_file(
+        self,
+        file_path: str,
+        content: str,
+        branch: str = None,
+        commit_message: str = None
+    ) -> str:
+        """
+        Write content to a file.
+        Subclasses should implement this method to enable edit_file functionality.
+        For VCS toolkits, this may involve creating or updating files with commits.
+        Args:
+            file_path: Path to the file
+            content: New file content
+            branch: Branch name (None for active branch)
+            commit_message: Commit message (VCS toolkits only)
+        Returns:
+            Success message
+        """
+        raise NotImplementedError("Subclasses should implement _write_file to enable editing")
     def _file_commit_hash(self, file_path: str, branch: str):
         pass
+    def read_file_chunk(
+        self,
+        file_path: str,
+        start_line: int,
+        end_line: Optional[int] = None,
+        branch: str = None
+    ) -> str:
+        """
+        Read a specific range of lines from a file.
+        Args:
+            file_path: Path to the file
+            start_line: Starting line number (1-indexed, inclusive)
+            end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
+            branch: Branch name (None for active branch)
+        Returns:
+            File content for the specified line range
+        """
+        from .utils.text_operations import apply_line_slice
+        # Calculate offset and limit from start_line and end_line
+        offset = start_line
+        limit = (end_line - start_line + 1) if end_line is not None else None
+        # Read the file with offset/limit
+        content = self._read_file(file_path, branch, offset=offset, limit=limit)
+        # Apply client-side slicing if toolkit doesn't support partial reads
+        # (toolkit's _read_file will return full content if it ignores offset/limit)
+        return apply_line_slice(content, offset=offset, limit=limit)
+    def read_multiple_files(
+        self,
+        file_paths: List[str],
+        branch: str = None,
+        offset: Optional[int] = None,
+        limit: Optional[int] = None
+    ) -> Dict[str, str]:
+        """
+        Read multiple files in batch.
+        Args:
+            file_paths: List of file paths to read
+            branch: Branch name (None for active branch)
+            offset: Starting line number for all files (1-indexed)
+            limit: Number of lines to read from offset for all files
+        Returns:
+            Dictionary mapping file paths to their content (or error messages)
+        """
+        results = {}
+        for file_path in file_paths:
+            try:
+                content = self._read_file(
+                    file_path,
+                    branch,
+                    offset=offset,
+                    limit=limit
+                )
+                results[file_path] = content
+            except Exception as e:
+                results[file_path] = f"Error reading file: {str(e)}"
+                logger.error(f"Failed to read {file_path}: {e}")
+        return results
+    def search_file(
+        self,
+        file_path: str,
+        pattern: str,
+        branch: str = None,
+        is_regex: bool = True,
+        context_lines: int = 2
+    ) -> str:
+        """
+        Search for pattern in file content with context.
+        Args:
+            file_path: Path to the file
+            pattern: Search pattern (regex if is_regex=True, else literal)
+            branch: Branch name (None for active branch)
+            is_regex: Whether pattern is regex (default True)
+            context_lines: Lines of context before/after matches (default 2)
+        Returns:
+            Formatted string with search results and context
+        """
+        from .utils.text_operations import search_in_content
+        # Read full file content
+        content = self._read_file(file_path, branch)
+        # Search for pattern
+        matches = search_in_content(content, pattern, is_regex, context_lines)
+        if not matches:
+            return f"No matches found for pattern '{pattern}' in {file_path}"
+        # Format results
+        result_lines = [f"Found {len(matches)} match(es) for pattern '{pattern}' in {file_path}:\n"]
+        for i, match in enumerate(matches, 1):
+            result_lines.append(f"\n--- Match {i} at line {match['line_number']} ---")
+            # Context before
+            if match['context_before']:
+                for line in match['context_before']:
+                    result_lines.append(f"  {line}")
+            # Matching line (highlighted)
+            result_lines.append(f"> {match['line_content']}")
+            # Context after
+            if match['context_after']:
+                for line in match['context_after']:
+                    result_lines.append(f"  {line}")
+        return "\n".join(result_lines)
+    def edit_file(
+        self,
+        file_path: str,
+        file_query: str,
+        branch: str = None,
+        commit_message: str = None
+    ) -> str:
+        """
+        Edit file using OLD/NEW markers for precise replacements.
+        Only works with text files (markdown, txt, csv, json, xml, html, yaml, code files).
+        Args:
+            file_path: Path to the file to edit
+            file_query: Edit instructions with OLD/NEW markers
+            branch: Branch name (None for active branch)
+            commit_message: Commit message (VCS toolkits only)
+        Returns:
+            Success message or error
+        Raises:
+            ToolException: If file is not text-editable or edit fails
+        """
+        from .utils.text_operations import parse_old_new_markers, is_text_editable
+        from langchain_core.callbacks import dispatch_custom_event
+        # Validate file is text-editable
+        if not is_text_editable(file_path):
+            raise ToolException(
+                f"Cannot edit binary/document file '{file_path}'. "
+                f"Supported text formats: markdown, txt, csv, json, xml, html, yaml, code files."
+            )
+        # Parse OLD/NEW markers
+        edits = parse_old_new_markers(file_query)
+        if not edits:
+            raise ToolException(
+                "No OLD/NEW marker pairs found in file_query. "
+                "Format: OLD <<<< old text >>>> OLD  NEW <<<< new text >>>> NEW"
+            )
+        # Read current file content
+        try:
+            current_content = self._read_file(file_path, branch)
+        except Exception as e:
+            raise ToolException(f"Failed to read file {file_path}: {e}")
+        # Apply all edits
+        updated_content = current_content
+        for old_text, new_text in edits:
+            if not old_text.strip():
+                continue
+            if old_text not in updated_content:
+                logger.warning(
+                    f"Old content not found in {file_path}. "
+                    f"Looking for: {old_text[:100]}..."
+                )
+                continue
+            updated_content = updated_content.replace(old_text, new_text)
+        # Check if any changes were made
+        if current_content == updated_content:
+            return (
+                f"No changes made to {file_path}. "
+                "Old content was not found or is empty. "
+                "Use read_file or search_file to verify current content."
+            )
+        # Write updated content
+        try:
+            result = self._write_file(file_path, updated_content, branch, commit_message)
+        except NotImplementedError:
+            raise ToolException(
+                f"Editing not supported for this toolkit. "
+                f"The _write_file method is not implemented."
+            )
+        except Exception as e:
+            raise ToolException(f"Failed to write file {file_path}: {e}")
+        # Dispatch file modification event
+        try:
+            dispatch_custom_event("file_modified", {
+                "message": f"File '{file_path}' edited successfully",
+                "filename": file_path,
+                "tool_name": "edit_file",
+                "toolkit": self.__class__.__name__,
+                "operation_type": "modify",
+                "edits_applied": len(edits)
+            })
+        except Exception as e:
+            logger.warning(f"Failed to dispatch file_modified event: {e}")
+        return result
     def __handle_get_files(self, path: str, branch: str):
         """
@@ -589,27 +944,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
     def loader(self,
                branch: Optional[str] = None,
                whitelist: Optional[List[str]] = None,
-               blacklist: Optional[List[str]] = None) -> str:
+               blacklist: Optional[List[str]] = None,
+               chunked: bool = True) -> Generator[Document, None, None]:
         """
-        Generates file content from a branch, respecting whitelist and blacklist patterns.
+        Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
         Parameters:
         - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
         - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
         - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
+        - chunked (bool): If True (default), applies universal chunker based on file type.
+                         If False, returns raw Documents without chunking.
         Returns:
-        - generator: Yields content from files matching the whitelist but not the blacklist.
+        - generator: Yields Documents from files matching the whitelist but not the blacklist.
         Example:
         # Use 'feature-branch', include '.py' files, exclude 'test_' files
-        file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
+        for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
+            print(doc.page_content)
         Notes:
         - Whitelist and blacklist use Unix shell-style wildcards.
         - Files must match the whitelist and not the blacklist to be included.
+        - When chunked=True:
+          - .md files → markdown chunker (header-based splitting)
+          - .py/.js/.ts/etc → code parser (TreeSitter-based)
+          - .json files → JSON chunker
+          - other files → default text chunker
         """
-        from .chunkers.code.codeparser import parse_code_files_for_db
+        from langchain_core.documents import Document
+        import hashlib
         _files = self.__handle_get_files("", self.__get_branch(branch))
         self._log_tool_event(message="Listing files in branch", tool_name="loader")
@@ -627,35 +992,55 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
                         or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
             return False
-        def file_content_generator():
+        def raw_document_generator() -> Generator[Document, None, None]:
+            """Yields raw Documents without chunking."""
             self._log_tool_event(message="Reading the files", tool_name="loader")
-            # log the progress of file reading
             total_files = len(_files)
+            processed = 0
             for idx, file in enumerate(_files, 1):
                 if is_whitelisted(file) and not is_blacklisted(file):
-                    # read file ONLY if it matches whitelist and does not match blacklist
                     try:
                         file_content = self._read_file(file, self.__get_branch(branch))
                     except Exception as e:
                         logger.error(f"Failed to read file {file}: {e}")
-                        file_content = ""
+                        continue
                     if not file_content:
-                        # empty file, skip
                         continue
-                    # hash the file content to ensure uniqueness
-                    import hashlib
+                    # Hash the file content for uniqueness tracking
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
-                    yield {"file_name": file,
-                           "file_content": file_content,
-                           "commit_hash": file_hash}
+                    processed += 1
+                    yield Document(
+                        page_content=file_content,
+                        metadata={
+                            'file_path': file,
+                            'file_name': file,
+                            'source': file,
+                            'commit_hash': file_hash,
+                        }
+                    )
                 if idx % 10 == 0 or idx == total_files:
-                    self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
-            self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
+                    self._log_tool_event(
+                        message=f"{idx} out of {total_files} files checked, {processed} matched",
+                        tool_name="loader"
+                    )
+            self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
-        return parse_code_files_for_db(file_content_generator())
+        if not chunked:
+            # Return raw documents without chunking
+            return raw_document_generator()
+        # Apply universal chunker based on file type
+        from .chunkers.universal_chunker import universal_chunker
+        return universal_chunker(raw_document_generator())
     def index_data(self,
-                   collection_suffix: str,
+                   index_name: str,
                    branch: Optional[str] = None,
                    whitelist: Optional[List[str]] = None,
                    blacklist: Optional[List[str]] = None,
@@ -669,7 +1054,7 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
         )
         vectorstore = self._init_vector_store()
         clean_index = kwargs.get('clean_index', False)
-        return vectorstore.index_documents(documents, collection_suffix=collection_suffix,
+        return vectorstore.index_documents(documents, index_name=index_name,
                                            clean_index=clean_index, is_code=True,
                                            progress_step=kwargs.get('progress_step', 5))
@@ -708,20 +1093,75 @@ def extend_with_vector_tools(method):
     return wrapper
+def extend_with_file_operations(method):
+    """
+    Decorator to automatically add file operation tools to toolkits that implement
+    _read_file and _write_file methods.
+    Adds:
+    - read_file_chunk: Read specific line ranges
+    - read_multiple_files: Batch read files
+    - search_file: Search for patterns in files
+    - edit_file: Edit files using OLD/NEW markers
+    """
+    def wrapper(self, *args, **kwargs):
+        tools = method(self, *args, **kwargs)
+        # Only add file operations if toolkit inherits from BaseCodeToolApiWrapper
+        # and has implemented the required methods
+        if isinstance(self, BaseCodeToolApiWrapper):
+            # Import schemas from elitea_base
+            from . import elitea_base
+            file_operation_tools = [
+                {
+                    "name": "read_file_chunk",
+                    "mode": "read_file_chunk",
+                    "ref": self.read_file_chunk,
+                    "description": self.read_file_chunk.__doc__,
+                    "args_schema": elitea_base.ReadFileChunkInput
+                },
+                {
+                    "name": "read_multiple_files",
+                    "mode": "read_multiple_files",
+                    "ref": self.read_multiple_files,
+                    "description": self.read_multiple_files.__doc__,
+                    "args_schema": elitea_base.ReadMultipleFilesInput
+                },
+                {
+                    "name": "search_file",
+                    "mode": "search_file",
+                    "ref": self.search_file,
+                    "description": self.search_file.__doc__,
+                    "args_schema": elitea_base.SearchFileInput
+                },
+                {
+                    "name": "edit_file",
+                    "mode": "edit_file",
+                    "ref": self.edit_file,
+                    "description": self.edit_file.__doc__,
+                    "args_schema": elitea_base.EditFileInput
+                },
+            ]
+            tools.extend(file_operation_tools)
+        return tools
+    return wrapper
 def filter_missconfigured_index_tools(method):
     def wrapper(self, *args, **kwargs):
         toolkit = method(self, *args, **kwargs)
         # Validate index tools misconfiguration and exclude them if necessary
-        is_index_toolkit = any(tool.name.rsplit(TOOLKIT_SPLITTER)[1]
-                               if TOOLKIT_SPLITTER in tool.name else tool.name
-                                                                     in INDEX_TOOL_NAMES for tool in toolkit.tools)
+        is_index_toolkit = any(tool.name in INDEX_TOOL_NAMES for tool in toolkit.tools)
         is_index_configuration_missing = not (kwargs.get('embedding_model')
                                               and kwargs.get('pgvector_configuration'))
         if is_index_toolkit and is_index_configuration_missing:
-            toolkit.tools = [tool for tool in toolkit.tools if (tool.name.rsplit(TOOLKIT_SPLITTER, 1)[
-                                                                    1] if TOOLKIT_SPLITTER in tool.name else tool.name) not in INDEX_TOOL_NAMES]
+            toolkit.tools = [tool for tool in toolkit.tools if tool.name not in INDEX_TOOL_NAMES]
         return toolkit

alita-sdk 0.3.351__py3-none-any.whl → 0.3.499__py3-none-any.whl

alita-sdk 0.3.351py3-none-any.whl → 0.3.499py3-none-any.whl