PyPI - alita-sdk - Versions diffs - 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl - Mend

alita-sdk 0.3.465py3-none-any.whl → 0.3.497py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of alita-sdk might be problematic. Click here for more details.

Files changed (103) hide show

alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +83 -1
alita_sdk/cli/agent_loader.py +22 -4
alita_sdk/cli/agent_ui.py +13 -3
alita_sdk/cli/agents.py +1876 -186
alita_sdk/cli/callbacks.py +96 -25
alita_sdk/cli/cli.py +10 -1
alita_sdk/cli/config.py +151 -9
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/input_handler.py +167 -4
alita_sdk/cli/inventory.py +1256 -0
alita_sdk/cli/toolkit.py +14 -17
alita_sdk/cli/toolkit_loader.py +35 -5
alita_sdk/cli/tools/__init__.py +8 -1
alita_sdk/cli/tools/filesystem.py +910 -64
alita_sdk/cli/tools/planning.py +143 -157
alita_sdk/cli/tools/terminal.py +154 -20
alita_sdk/community/__init__.py +64 -8
alita_sdk/community/inventory/__init__.py +224 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +169 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/bitbucket.py +0 -3
alita_sdk/runtime/clients/client.py +108 -31
alita_sdk/runtime/langchain/assistant.py +4 -2
alita_sdk/runtime/langchain/constants.py +3 -1
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
alita_sdk/runtime/langchain/document_loaders/constants.py +10 -6
alita_sdk/runtime/langchain/langraph_agent.py +123 -31
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/toolkits/__init__.py +2 -0
alita_sdk/runtime/toolkits/application.py +1 -1
alita_sdk/runtime/toolkits/mcp.py +107 -91
alita_sdk/runtime/toolkits/planning.py +173 -0
alita_sdk/runtime/toolkits/tools.py +59 -7
alita_sdk/runtime/tools/artifact.py +46 -17
alita_sdk/runtime/tools/function.py +2 -1
alita_sdk/runtime/tools/llm.py +320 -32
alita_sdk/runtime/tools/mcp_remote_tool.py +23 -7
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/vectorstore_base.py +44 -9
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_oauth.py +80 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/streamlit.py +6 -10
alita_sdk/runtime/utils/toolkit_utils.py +14 -5
alita_sdk/tools/__init__.py +54 -27
alita_sdk/tools/ado/repos/repos_wrapper.py +1 -2
alita_sdk/tools/base_indexer_toolkit.py +99 -20
alita_sdk/tools/bitbucket/__init__.py +2 -2
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code_indexer_toolkit.py +55 -22
alita_sdk/tools/confluence/api_wrapper.py +63 -14
alita_sdk/tools/elitea_base.py +86 -21
alita_sdk/tools/jira/__init__.py +1 -1
alita_sdk/tools/jira/api_wrapper.py +91 -40
alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
alita_sdk/tools/qtest/__init__.py +1 -1
alita_sdk/tools/sharepoint/api_wrapper.py +2 -2
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +17 -13
alita_sdk/tools/zephyr_essential/api_wrapper.py +12 -13
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA +2 -1
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/RECORD +103 -61
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/entry_points.txt +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/top_level.txt +0 -0

alita_sdk/tools/elitea_base.py CHANGED Viewed

@@ -128,12 +128,37 @@ BaseIndexDataParams = create_model(
 class BaseToolApiWrapper(BaseModel):
+    # Optional RunnableConfig for CLI/standalone usage (allows dispatch_custom_event to work)
+    _runnable_config: Optional[Dict[str, Any]] = None
+    # toolkit id propagated from backend
+    toolkit_id: int = 0
     def get_available_tools(self):
         raise NotImplementedError("Subclasses should implement this method")
-    def _log_tool_event(self, message: str, tool_name: str = None):
-        """Log data and dispatch custom event for the tool"""
+    def set_runnable_config(self, config: Optional[Dict[str, Any]]) -> None:
+        """
+        Set the RunnableConfig for dispatching custom events.
+        This is required when running outside of a LangChain agent context
+        (e.g., from CLI). Without a config containing a run_id,
+        dispatch_custom_event will fail with "Unable to dispatch an adhoc event
+        without a parent run id".
+        Args:
+            config: A RunnableConfig dict with at least {'run_id': uuid}
+        """
+        self._runnable_config = config
+    def _log_tool_event(self, message: str, tool_name: str = None, config: Optional[Dict[str, Any]] = None):
+        """Log data and dispatch custom event for the tool.
+        Args:
+            message: The message to log
+            tool_name: Name of the tool (defaults to 'tool_progress')
+            config: Optional RunnableConfig. If not provided, uses self._runnable_config.
+                   Required when running outside a LangChain agent context.
+        """
         try:
             from langchain_core.callbacks import dispatch_custom_event
@@ -142,6 +167,10 @@ class BaseToolApiWrapper(BaseModel):
                 tool_name = 'tool_progress'
             logger.info(message)
+            # Use provided config, fall back to instance config
+            effective_config = config or self._runnable_config
             dispatch_custom_event(
                 name="thinking_step",
                 data={
@@ -149,6 +178,7 @@ class BaseToolApiWrapper(BaseModel):
                     "tool_name": tool_name,
                     "toolkit": self.__class__.__name__,
                 },
+                config=effective_config,
             )
         except Exception as e:
             logger.warning(f"Failed to dispatch progress event: {str(e)}")
@@ -165,6 +195,11 @@ class BaseToolApiWrapper(BaseModel):
                     #     execution = str(execution)
                     return execution
                 except Exception as e:
+                    # Re-raise McpAuthorizationRequired directly without wrapping
+                    from alita_sdk.runtime.utils.mcp_oauth import McpAuthorizationRequired
+                    if isinstance(e, McpAuthorizationRequired):
+                        raise
                     # Catch all tool execution exceptions and provide user-friendly error messages
                     error_type = type(e).__name__
                     error_message = str(e)
@@ -589,27 +624,37 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
     def loader(self,
                branch: Optional[str] = None,
                whitelist: Optional[List[str]] = None,
-               blacklist: Optional[List[str]] = None) -> str:
+               blacklist: Optional[List[str]] = None,
+               chunked: bool = True) -> Generator[Document, None, None]:
         """
-        Generates file content from a branch, respecting whitelist and blacklist patterns.
+        Generates Documents from files in a branch, respecting whitelist and blacklist patterns.
         Parameters:
         - branch (Optional[str]): Branch for listing files. Defaults to the current branch if None.
         - whitelist (Optional[List[str]]): File extensions or paths to include. Defaults to all files if None.
         - blacklist (Optional[List[str]]): File extensions or paths to exclude. Defaults to no exclusions if None.
+        - chunked (bool): If True (default), applies universal chunker based on file type.
+                         If False, returns raw Documents without chunking.
         Returns:
-        - generator: Yields content from files matching the whitelist but not the blacklist.
+        - generator: Yields Documents from files matching the whitelist but not the blacklist.
         Example:
         # Use 'feature-branch', include '.py' files, exclude 'test_' files
-        file_generator = loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*'])
+        for doc in loader(branch='feature-branch', whitelist=['*.py'], blacklist=['*test_*']):
+            print(doc.page_content)
         Notes:
         - Whitelist and blacklist use Unix shell-style wildcards.
         - Files must match the whitelist and not the blacklist to be included.
+        - When chunked=True:
+          - .md files → markdown chunker (header-based splitting)
+          - .py/.js/.ts/etc → code parser (TreeSitter-based)
+          - .json files → JSON chunker
+          - other files → default text chunker
         """
-        from .chunkers.code.codeparser import parse_code_files_for_db
+        from langchain_core.documents import Document
+        import hashlib
         _files = self.__handle_get_files("", self.__get_branch(branch))
         self._log_tool_event(message="Listing files in branch", tool_name="loader")
@@ -627,32 +672,52 @@ class BaseCodeToolApiWrapper(BaseVectorStoreToolApiWrapper):
                         or any(file_path.endswith(f'.{pattern}') for pattern in blacklist))
             return False
-        def file_content_generator():
+        def raw_document_generator() -> Generator[Document, None, None]:
+            """Yields raw Documents without chunking."""
             self._log_tool_event(message="Reading the files", tool_name="loader")
-            # log the progress of file reading
             total_files = len(_files)
+            processed = 0
             for idx, file in enumerate(_files, 1):
                 if is_whitelisted(file) and not is_blacklisted(file):
-                    # read file ONLY if it matches whitelist and does not match blacklist
                     try:
                         file_content = self._read_file(file, self.__get_branch(branch))
                     except Exception as e:
                         logger.error(f"Failed to read file {file}: {e}")
-                        file_content = ""
+                        continue
                     if not file_content:
-                        # empty file, skip
                         continue
-                    # hash the file content to ensure uniqueness
-                    import hashlib
+                    # Hash the file content for uniqueness tracking
                     file_hash = hashlib.sha256(file_content.encode("utf-8")).hexdigest()
-                    yield {"file_name": file,
-                           "file_content": file_content,
-                           "commit_hash": file_hash}
+                    processed += 1
+                    yield Document(
+                        page_content=file_content,
+                        metadata={
+                            'file_path': file,
+                            'file_name': file,
+                            'source': file,
+                            'commit_hash': file_hash,
+                        }
+                    )
                 if idx % 10 == 0 or idx == total_files:
-                    self._log_tool_event(message=f"{idx} out of {total_files} files have been read", tool_name="loader")
-            self._log_tool_event(message=f"{len(_files)} have been read", tool_name="loader")
+                    self._log_tool_event(
+                        message=f"{idx} out of {total_files} files checked, {processed} matched",
+                        tool_name="loader"
+                    )
+            self._log_tool_event(message=f"{processed} files loaded", tool_name="loader")
-        return parse_code_files_for_db(file_content_generator())
+        if not chunked:
+            # Return raw documents without chunking
+            return raw_document_generator()
+        # Apply universal chunker based on file type
+        from .chunkers.universal_chunker import universal_chunker
+        return universal_chunker(raw_document_generator())
     def index_data(self,
                    index_name: str,

alita_sdk/tools/jira/__init__.py CHANGED Viewed

@@ -68,7 +68,7 @@ class JiraToolkit(BaseToolkit):
             name,
             cloud=(bool, Field(description="Hosting Option", json_schema_extra={'configuration': True})),
             limit=(int, Field(description="Limit issues. Default is 5", gt=0, default=5)),
-            api_version=(Optional[str], Field(description="Rest API version: optional. Default is 2", default="2")),
+            api_version=(Literal['2', '3'], Field(description="Rest API version: optional. Default is 2", default="3")),
             labels=(Optional[str], Field(
                 description="List of comma separated labels used for labeling of agent's created or updated entities",
                 default=None,

alita_sdk/tools/jira/api_wrapper.py CHANGED Viewed

@@ -453,41 +453,63 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         return super().validate_toolkit(values)
     def _parse_issues(self, issues: Dict) -> List[dict]:
-        parsed = []
-        for issue in issues["issues"]:
-            if len(parsed) >= self.limit:
+        parsed: List[dict] = []
+        issues_list = issues.get("issues") if isinstance(issues, dict) else None
+        if not isinstance(issues_list, list):
+            return parsed
+        for issue in issues_list:
+            if self.limit and len(parsed) >= self.limit:
                 break
-            issue_fields = issue["fields"]
-            key = issue["key"]
-            id = issue["id"]
-            summary = issue_fields["summary"]
-            description = issue_fields["description"]
-            created = issue_fields["created"][0:10]
-            updated = issue_fields["updated"]
-            duedate = issue_fields["duedate"]
-            priority = issue_fields["priority"]["name"]
-            status = issue_fields["status"]["name"]
-            project_id = issue_fields["project"]["id"]
-            issue_url = f"{self._client.url}browse/{key}"
-            try:
-                assignee = issue_fields["assignee"]["displayName"]
-            except Exception:
-                assignee = "None"
+            issue_fields = issue.get("fields") or {}
+            key = issue.get("key", "")
+            issue_id = issue.get("id", "")
+            summary = issue_fields.get("summary") or ""
+            description = issue_fields.get("description") or ""
+            created_raw = issue_fields.get("created") or ""
+            created = created_raw[:10] if created_raw else ""
+            updated = issue_fields.get("updated") or ""
+            duedate = issue_fields.get("duedate")
+            priority_info = issue_fields.get("priority") or {}
+            priority = priority_info.get("name") or "None"
+            status_info = issue_fields.get("status") or {}
+            status = status_info.get("name") or "Unknown"
+            project_info = issue_fields.get("project") or {}
+            project_id = project_info.get("id") or ""
+            issue_url = f"{self._client.url}browse/{key}" if key else self._client.url
+            assignee_info = issue_fields.get("assignee") or {}
+            assignee = assignee_info.get("displayName") or "None"
             rel_issues = {}
-            for related_issue in issue_fields["issuelinks"]:
-                if "inwardIssue" in related_issue.keys():
-                    rel_type = related_issue["type"]["inward"]
-                    rel_key = related_issue["inwardIssue"]["key"]
+            for related_issue in issue_fields.get("issuelinks") or []:
+                rel_type = None
+                rel_key = None
+                if related_issue.get("inwardIssue"):
+                    rel_type = related_issue.get("type", {}).get("inward")
+                    rel_key = related_issue["inwardIssue"].get("key")
                     # rel_summary = related_issue["inwardIssue"]["fields"]["summary"]
-                if "outwardIssue" in related_issue.keys():
-                    rel_type = related_issue["type"]["outward"]
-                    rel_key = related_issue["outwardIssue"]["key"]
+                elif related_issue.get("outwardIssue"):
+                    rel_type = related_issue.get("type", {}).get("outward")
+                    rel_key = related_issue["outwardIssue"].get("key")
                     # rel_summary = related_issue["outwardIssue"]["fields"]["summary"]
-                rel_issues = {"type": rel_type, "key": rel_key, "url": f"{self._client.url}browse/{rel_key}"}
+                if rel_type and rel_key:
+                    rel_issues = {
+                        "type": rel_type,
+                        "key": rel_key,
+                        "url": f"{self._client.url}browse/{rel_key}",
+                    }
             parsed_issue = {
                 "key": key,
-                "id": id,
+                "id": issue_id,
                 "projectId": project_id,
                 "summary": summary,
                 "description": description,
@@ -500,10 +522,13 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
                 "url": issue_url,
                 "related_issues": rel_issues,
             }
-            for field in self.additional_fields:
-                field_value = issue_fields.get(field, None)
+            for field in (self.additional_fields or []):
+                field_value = issue_fields.get(field)
                 parsed_issue[field] = field_value
             parsed.append(parsed_issue)
         return parsed
     @staticmethod
@@ -749,13 +774,24 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         attachment_data = []
         attachments = self._client.get_attachments_ids_from_issue(issue=jira_issue_key)
+        api_version = str(getattr(self._client, "api_version", "2"))
         for attachment in attachments:
             if attachment_pattern and not re.search(attachment_pattern, attachment['filename']):
                 logger.info(f"Skipping attachment {attachment['filename']} as it does not match pattern {attachment_pattern}")
                 continue
             logger.info(f"Processing attachment {attachment['filename']} with ID {attachment['attachment_id']}")
             try:
-                attachment_content = self._client.get_attachment_content(attachment['attachment_id'])
+                attachment_content = None
+                # Cloud (REST v3) attachments require signed URLs returned from metadata
+                if api_version in {"3", "latest"} or self.cloud:
+                    attachment_content = self._download_attachment_v3(
+                        attachment['attachment_id'],
+                        attachment['filename']
+                    )
+                if attachment_content is None:
+                    attachment_content = self._client.get_attachment_content(attachment['attachment_id'])
             except Exception as e:
                 logger.error(
                     f"Failed to download attachment {attachment['filename']} for issue {jira_issue_key}: {str(e)}")
@@ -797,15 +833,6 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
         logger.debug(response_string)
         return response_string
-    def _extract_attachment_content(self, attachment):
-        """Extract attachment's content if possible (used for api v.2)"""
-        try:
-            content = self._client.get(attachment['content'].replace(self.base_url, ''))
-        except Exception as e:
-            content = f"Unable to parse content of '{attachment['filename']}' due to: {str(e)}"
-        return f"filename: {attachment['filename']}\ncontent: {content}"
     # Helper functions for image processing
     @staticmethod
     def _collect_context_for_image(content: str, image_marker: str, context_radius: int = 500) -> str:
@@ -1038,6 +1065,30 @@ class JiraApiWrapper(NonCodeIndexerToolkit):
             logger.error(f"Error downloading attachment: {str(e)}")
             return None
+    def _download_attachment_v3(self, attachment_id: str, filename: str | None = None) -> Optional[bytes]:
+        """Download Jira attachment using metadata content URL (required for REST v3 / Cloud)."""
+        try:
+            metadata = self._client.get_attachment(attachment_id)
+        except Exception as e:
+            logger.error(f"Failed to retrieve metadata for attachment {attachment_id}: {str(e)}")
+            return None
+        download_url = metadata.get('content') or metadata.get('_links', {}).get('content')
+        if not download_url:
+            logger.warning(
+                f"Attachment {attachment_id} ({filename}) metadata does not include a content URL; falling back.")
+            return None
+        logger.info(f"Downloading attachment {attachment_id} via metadata content URL (v3).")
+        content = self._download_attachment(download_url)
+        if content is None:
+            logger.error(
+                f"Failed to download attachment {attachment_id} ({filename}) from v3 content URL: {download_url}")
+        return content
     def _extract_image_data(self, field_data):
         """
         Extracts image data from general JSON response.

alita_sdk/tools/non_code_indexer_toolkit.py CHANGED Viewed

@@ -7,6 +7,7 @@ from alita_sdk.tools.base_indexer_toolkit import BaseIndexerToolkit
 class NonCodeIndexerToolkit(BaseIndexerToolkit):
     def _get_indexed_data(self, index_name: str):
+        self._ensure_vectorstore_initialized()
         if not self.vector_adapter:
             raise ToolException("Vector adapter is not initialized. "
                              "Check your configuration: embedding_model and vectorstore_type.")

alita_sdk/tools/qtest/__init__.py CHANGED Viewed

@@ -37,7 +37,7 @@ class QtestToolkit(BaseToolkit):
             name,
             qtest_configuration=(QtestConfiguration, Field(description="QTest API token", json_schema_extra={
                 'configuration_types': ['qtest']})),
-            qtest_project_id=(int, Field(default=None, description="QTest project id")),
+            qtest_project_id=(int, Field(description="QTest project id")),
             no_of_tests_shown_in_dql_search=(Optional[int], Field(description="Max number of items returned by dql search",
                                                                   default=10)),

alita_sdk/tools/sharepoint/api_wrapper.py CHANGED Viewed

@@ -271,13 +271,13 @@ class SharepointApiWrapper(NonCodeIndexerToolkit):
             file_name = file.get('Name', '')
             # Check if file should be skipped based on skip_extensions
-            if any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
+            if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
                    for pattern in skip_extensions):
                 continue
             # Check if file should be included based on include_extensions
             # If include_extensions is empty, process all files (that weren't skipped)
-            if include_extensions and not (any(re.match(pattern.replace('*', '.*') + '$', file_name, re.IGNORECASE)
+            if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
                         for pattern in include_extensions)):
                 continue

alita_sdk/tools/vector_adapters/VectorStoreAdapter.py CHANGED Viewed

@@ -31,8 +31,8 @@ class VectorStoreAdapter(ABC):
         pass
     @abstractmethod
-    def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
         pass
     @abstractmethod
@@ -132,18 +132,22 @@ class PGVectorAdapter(VectorStoreAdapter):
             logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
             return []
-    def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
-        # This logic deletes all data from the vectorstore collection without removal of collection.
-        # Collection itself remains available for future indexing.
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
         from sqlalchemy.orm import Session
-        from sqlalchemy import func
+        from sqlalchemy import func, or_
         store = vectorstore_wrapper.vectorstore
         with Session(store.session_maker.bind) as session:
-            session.query(store.EmbeddingStore).filter(
-                func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
-            ).delete(synchronize_session=False)
+            if including_index_meta:
+                session.query(store.EmbeddingStore).filter(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
+                ).delete(synchronize_session=False)
+            else:
+                session.query(store.EmbeddingStore).filter(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
+                    or_(func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value)
+                ).delete(synchronize_session=False)
             session.commit()
     def is_vectorstore_type(self, vectorstore) -> bool:
@@ -334,8 +338,8 @@ class ChromaAdapter(VectorStoreAdapter):
             logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
             return []
-    def clean_collection(self, vectorstore_wrapper, index_name: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. including_index_meta is ignored."""
         vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, index_name))
     def get_indexed_data(self, vectorstore_wrapper):

alita_sdk/tools/zephyr_essential/api_wrapper.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import hashlib
 import json
 import logging
 from typing import Optional, Generator, Literal
@@ -284,22 +285,20 @@ class ZephyrEssentialApiWrapper(NonCodeIndexerToolkit):
                 if isinstance(v, (str, int, float, bool, list, dict))
             }
             metadata['type'] = "TEST_CASE"
-            yield Document(page_content="", metadata=metadata)
-    def _extend_data(self, documents: Generator[Document, None, None]) -> Generator[Document, None, None]:
-        for document in documents:
+            #
             try:
-                if 'type' in document.metadata and document.metadata['type'] == "TEST_CASE":
-                    additional_content = self._process_test_case(document.metadata['key'])
-                    for steps_type, content in additional_content.items():
-                        if content:
-                            page_content = json.dumps(content)
-                            document.metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content.encode('utf-8')
-                            document.metadata["steps_type"] = steps_type
+                additional_content = self._process_test_case(metadata['key'])
+                for steps_type, content in additional_content.items():
+                    if content:
+                        page_content = json.dumps(content)
+                        content_hash = hashlib.sha256(page_content.encode('utf-8')).hexdigest()
+                        metadata[IndexerKeywords.UPDATED_ON.value] = content_hash
+                        metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content.encode('utf-8')
+                        metadata["steps_type"] = steps_type
             except Exception as e:
                 logging.error(f"Failed to process document: {e}")
-            yield document
+            #
+            yield Document(page_content="", metadata=metadata)
     def _process_test_case(self, key) -> dict:
         steps = self.get_test_case_test_steps(key)

{alita_sdk-0.3.465.dist-info → alita_sdk-0.3.497.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alita_sdk
-Version: 0.3.465
+Version: 0.3.497
 Summary: SDK for building langchain agents using resources from Alita
 Author-email: Artem Rozumenko <artyom.rozumenko@gmail.com>, Mikalai Biazruchka <mikalai_biazruchka@epam.com>, Roman Mitusov <roman_mitusov@epam.com>, Ivan Krakhmaliuk <lifedj27@gmail.com>, Artem Dubrovskiy <ad13box@gmail.com>
 License-Expression: Apache-2.0
@@ -134,6 +134,7 @@ Provides-Extra: community
 Requires-Dist: retry-extended==0.2.3; extra == "community"
 Requires-Dist: pyobjtojson==0.3; extra == "community"
 Requires-Dist: elitea-analyse==0.1.2; extra == "community"
+Requires-Dist: networkx>=3.0; extra == "community"
 Provides-Extra: all
 Requires-Dist: alita-sdk[runtime]; extra == "all"
 Requires-Dist: alita-sdk[tools]; extra == "all"

alita-sdk 0.3.465__py3-none-any.whl → 0.3.497__py3-none-any.whl

Potentially problematic release.

alita-sdk 0.3.465py3-none-any.whl → 0.3.497py3-none-any.whl