PyPI - alita-sdk - Versions diffs - 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl - Mend

alita-sdk 0.3.263py3-none-any.whl → 0.3.499py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (248) hide show

alita_sdk/cli/__init__.py +10 -0
alita_sdk/cli/__main__.py +17 -0
alita_sdk/cli/agent/__init__.py +5 -0
alita_sdk/cli/agent/default.py +258 -0
alita_sdk/cli/agent_executor.py +155 -0
alita_sdk/cli/agent_loader.py +215 -0
alita_sdk/cli/agent_ui.py +228 -0
alita_sdk/cli/agents.py +3601 -0
alita_sdk/cli/callbacks.py +647 -0
alita_sdk/cli/cli.py +168 -0
alita_sdk/cli/config.py +306 -0
alita_sdk/cli/context/__init__.py +30 -0
alita_sdk/cli/context/cleanup.py +198 -0
alita_sdk/cli/context/manager.py +731 -0
alita_sdk/cli/context/message.py +285 -0
alita_sdk/cli/context/strategies.py +289 -0
alita_sdk/cli/context/token_estimation.py +127 -0
alita_sdk/cli/formatting.py +182 -0
alita_sdk/cli/input_handler.py +419 -0
alita_sdk/cli/inventory.py +1256 -0
alita_sdk/cli/mcp_loader.py +315 -0
alita_sdk/cli/toolkit.py +327 -0
alita_sdk/cli/toolkit_loader.py +85 -0
alita_sdk/cli/tools/__init__.py +43 -0
alita_sdk/cli/tools/approval.py +224 -0
alita_sdk/cli/tools/filesystem.py +1751 -0
alita_sdk/cli/tools/planning.py +389 -0
alita_sdk/cli/tools/terminal.py +414 -0
alita_sdk/community/__init__.py +64 -8
alita_sdk/community/inventory/__init__.py +224 -0
alita_sdk/community/inventory/config.py +257 -0
alita_sdk/community/inventory/enrichment.py +2137 -0
alita_sdk/community/inventory/extractors.py +1469 -0
alita_sdk/community/inventory/ingestion.py +3172 -0
alita_sdk/community/inventory/knowledge_graph.py +1457 -0
alita_sdk/community/inventory/parsers/__init__.py +218 -0
alita_sdk/community/inventory/parsers/base.py +295 -0
alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
alita_sdk/community/inventory/parsers/go_parser.py +851 -0
alita_sdk/community/inventory/parsers/html_parser.py +389 -0
alita_sdk/community/inventory/parsers/java_parser.py +593 -0
alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
alita_sdk/community/inventory/parsers/python_parser.py +604 -0
alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
alita_sdk/community/inventory/parsers/text_parser.py +322 -0
alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
alita_sdk/community/inventory/patterns/__init__.py +61 -0
alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
alita_sdk/community/inventory/patterns/loader.py +348 -0
alita_sdk/community/inventory/patterns/registry.py +198 -0
alita_sdk/community/inventory/presets.py +535 -0
alita_sdk/community/inventory/retrieval.py +1403 -0
alita_sdk/community/inventory/toolkit.py +173 -0
alita_sdk/community/inventory/visualize.py +1370 -0
alita_sdk/configurations/__init__.py +10 -0
alita_sdk/configurations/ado.py +4 -2
alita_sdk/configurations/azure_search.py +1 -1
alita_sdk/configurations/bigquery.py +1 -1
alita_sdk/configurations/bitbucket.py +94 -2
alita_sdk/configurations/browser.py +18 -0
alita_sdk/configurations/carrier.py +19 -0
alita_sdk/configurations/confluence.py +96 -1
alita_sdk/configurations/delta_lake.py +1 -1
alita_sdk/configurations/figma.py +0 -5
alita_sdk/configurations/github.py +65 -1
alita_sdk/configurations/gitlab.py +79 -0
alita_sdk/configurations/google_places.py +17 -0
alita_sdk/configurations/jira.py +103 -0
alita_sdk/configurations/postman.py +1 -1
alita_sdk/configurations/qtest.py +1 -3
alita_sdk/configurations/report_portal.py +19 -0
alita_sdk/configurations/salesforce.py +19 -0
alita_sdk/configurations/service_now.py +1 -12
alita_sdk/configurations/sharepoint.py +19 -0
alita_sdk/configurations/sonar.py +18 -0
alita_sdk/configurations/sql.py +20 -0
alita_sdk/configurations/testio.py +18 -0
alita_sdk/configurations/testrail.py +88 -0
alita_sdk/configurations/xray.py +94 -1
alita_sdk/configurations/zephyr_enterprise.py +94 -1
alita_sdk/configurations/zephyr_essential.py +95 -0
alita_sdk/runtime/clients/artifact.py +12 -2
alita_sdk/runtime/clients/client.py +235 -66
alita_sdk/runtime/clients/mcp_discovery.py +342 -0
alita_sdk/runtime/clients/mcp_manager.py +262 -0
alita_sdk/runtime/clients/sandbox_client.py +373 -0
alita_sdk/runtime/langchain/assistant.py +123 -17
alita_sdk/runtime/langchain/constants.py +8 -1
alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +8 -2
alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
alita_sdk/runtime/langchain/document_loaders/constants.py +187 -40
alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
alita_sdk/runtime/langchain/langraph_agent.py +406 -91
alita_sdk/runtime/langchain/utils.py +51 -8
alita_sdk/runtime/llms/preloaded.py +2 -6
alita_sdk/runtime/models/mcp_models.py +61 -0
alita_sdk/runtime/toolkits/__init__.py +26 -0
alita_sdk/runtime/toolkits/application.py +9 -2
alita_sdk/runtime/toolkits/artifact.py +19 -7
alita_sdk/runtime/toolkits/datasource.py +13 -6
alita_sdk/runtime/toolkits/mcp.py +780 -0
alita_sdk/runtime/toolkits/planning.py +178 -0
alita_sdk/runtime/toolkits/subgraph.py +11 -6
alita_sdk/runtime/toolkits/tools.py +214 -60
alita_sdk/runtime/toolkits/vectorstore.py +9 -4
alita_sdk/runtime/tools/__init__.py +22 -0
alita_sdk/runtime/tools/application.py +16 -4
alita_sdk/runtime/tools/artifact.py +312 -19
alita_sdk/runtime/tools/function.py +100 -4
alita_sdk/runtime/tools/graph.py +81 -0
alita_sdk/runtime/tools/image_generation.py +212 -0
alita_sdk/runtime/tools/llm.py +539 -180
alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
alita_sdk/runtime/tools/planning/__init__.py +36 -0
alita_sdk/runtime/tools/planning/models.py +246 -0
alita_sdk/runtime/tools/planning/wrapper.py +607 -0
alita_sdk/runtime/tools/router.py +2 -1
alita_sdk/runtime/tools/sandbox.py +375 -0
alita_sdk/runtime/tools/vectorstore.py +62 -63
alita_sdk/runtime/tools/vectorstore_base.py +156 -85
alita_sdk/runtime/utils/AlitaCallback.py +106 -20
alita_sdk/runtime/utils/mcp_client.py +465 -0
alita_sdk/runtime/utils/mcp_oauth.py +244 -0
alita_sdk/runtime/utils/mcp_sse_client.py +405 -0
alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
alita_sdk/runtime/utils/streamlit.py +41 -14
alita_sdk/runtime/utils/toolkit_utils.py +28 -9
alita_sdk/runtime/utils/utils.py +14 -0
alita_sdk/tools/__init__.py +78 -35
alita_sdk/tools/ado/__init__.py +0 -1
alita_sdk/tools/ado/repos/__init__.py +10 -6
alita_sdk/tools/ado/repos/repos_wrapper.py +12 -11
alita_sdk/tools/ado/test_plan/__init__.py +10 -7
alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -23
alita_sdk/tools/ado/wiki/__init__.py +10 -11
alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -28
alita_sdk/tools/ado/work_item/__init__.py +10 -11
alita_sdk/tools/ado/work_item/ado_wrapper.py +63 -10
alita_sdk/tools/advanced_jira_mining/__init__.py +10 -7
alita_sdk/tools/aws/delta_lake/__init__.py +13 -11
alita_sdk/tools/azure_ai/search/__init__.py +11 -7
alita_sdk/tools/base_indexer_toolkit.py +392 -86
alita_sdk/tools/bitbucket/__init__.py +18 -11
alita_sdk/tools/bitbucket/api_wrapper.py +52 -9
alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
alita_sdk/tools/browser/__init__.py +40 -16
alita_sdk/tools/browser/crawler.py +3 -1
alita_sdk/tools/browser/utils.py +15 -6
alita_sdk/tools/carrier/__init__.py +17 -17
alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
alita_sdk/tools/carrier/excel_reporter.py +8 -4
alita_sdk/tools/chunkers/__init__.py +3 -1
alita_sdk/tools/chunkers/code/codeparser.py +1 -1
alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
alita_sdk/tools/chunkers/universal_chunker.py +270 -0
alita_sdk/tools/cloud/aws/__init__.py +9 -6
alita_sdk/tools/cloud/azure/__init__.py +9 -6
alita_sdk/tools/cloud/gcp/__init__.py +9 -6
alita_sdk/tools/cloud/k8s/__init__.py +9 -6
alita_sdk/tools/code/linter/__init__.py +7 -7
alita_sdk/tools/code/loaders/codesearcher.py +3 -2
alita_sdk/tools/code/sonar/__init__.py +18 -12
alita_sdk/tools/code_indexer_toolkit.py +199 -0
alita_sdk/tools/confluence/__init__.py +14 -11
alita_sdk/tools/confluence/api_wrapper.py +198 -58
alita_sdk/tools/confluence/loader.py +10 -0
alita_sdk/tools/custom_open_api/__init__.py +9 -4
alita_sdk/tools/elastic/__init__.py +8 -7
alita_sdk/tools/elitea_base.py +543 -64
alita_sdk/tools/figma/__init__.py +10 -8
alita_sdk/tools/figma/api_wrapper.py +352 -153
alita_sdk/tools/github/__init__.py +13 -11
alita_sdk/tools/github/api_wrapper.py +9 -26
alita_sdk/tools/github/github_client.py +75 -12
alita_sdk/tools/github/schemas.py +2 -1
alita_sdk/tools/gitlab/__init__.py +11 -10
alita_sdk/tools/gitlab/api_wrapper.py +135 -45
alita_sdk/tools/gitlab_org/__init__.py +11 -9
alita_sdk/tools/google/bigquery/__init__.py +12 -13
alita_sdk/tools/google_places/__init__.py +18 -10
alita_sdk/tools/jira/__init__.py +14 -8
alita_sdk/tools/jira/api_wrapper.py +315 -168
alita_sdk/tools/keycloak/__init__.py +8 -7
alita_sdk/tools/localgit/local_git.py +56 -54
alita_sdk/tools/memory/__init__.py +27 -11
alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
alita_sdk/tools/ocr/__init__.py +8 -7
alita_sdk/tools/openapi/__init__.py +10 -1
alita_sdk/tools/pandas/__init__.py +8 -7
alita_sdk/tools/pandas/api_wrapper.py +7 -25
alita_sdk/tools/postman/__init__.py +8 -10
alita_sdk/tools/postman/api_wrapper.py +19 -8
alita_sdk/tools/postman/postman_analysis.py +8 -1
alita_sdk/tools/pptx/__init__.py +8 -9
alita_sdk/tools/qtest/__init__.py +19 -13
alita_sdk/tools/qtest/api_wrapper.py +1784 -88
alita_sdk/tools/rally/__init__.py +10 -9
alita_sdk/tools/report_portal/__init__.py +20 -15
alita_sdk/tools/salesforce/__init__.py +19 -15
alita_sdk/tools/servicenow/__init__.py +14 -11
alita_sdk/tools/sharepoint/__init__.py +14 -13
alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
alita_sdk/tools/sharepoint/utils.py +8 -2
alita_sdk/tools/slack/__init__.py +10 -7
alita_sdk/tools/sql/__init__.py +19 -18
alita_sdk/tools/sql/api_wrapper.py +71 -23
alita_sdk/tools/testio/__init__.py +18 -12
alita_sdk/tools/testrail/__init__.py +10 -10
alita_sdk/tools/testrail/api_wrapper.py +213 -45
alita_sdk/tools/utils/__init__.py +28 -4
alita_sdk/tools/utils/content_parser.py +181 -61
alita_sdk/tools/utils/text_operations.py +254 -0
alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
alita_sdk/tools/xray/__init__.py +12 -7
alita_sdk/tools/xray/api_wrapper.py +58 -113
alita_sdk/tools/zephyr/__init__.py +9 -6
alita_sdk/tools/zephyr_enterprise/__init__.py +13 -8
alita_sdk/tools/zephyr_enterprise/api_wrapper.py +17 -7
alita_sdk/tools/zephyr_essential/__init__.py +13 -9
alita_sdk/tools/zephyr_essential/api_wrapper.py +289 -47
alita_sdk/tools/zephyr_essential/client.py +6 -4
alita_sdk/tools/zephyr_scale/__init__.py +10 -7
alita_sdk/tools/zephyr_scale/api_wrapper.py +6 -2
alita_sdk/tools/zephyr_squad/__init__.py +9 -6
{alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/METADATA +180 -33
alita_sdk-0.3.499.dist-info/RECORD +433 -0
alita_sdk-0.3.499.dist-info/entry_points.txt +2 -0
alita_sdk-0.3.263.dist-info/RECORD +0 -342
{alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/WHEEL +0 -0
{alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/licenses/LICENSE +0 -0
{alita_sdk-0.3.263.dist-info → alita_sdk-0.3.499.dist-info}/top_level.txt +0 -0

alita_sdk/tools/vector_adapters/VectorStoreAdapter.py CHANGED Viewed

@@ -2,6 +2,8 @@ from abc import ABC, abstractmethod
 from typing import Any, Dict, Optional, List
 from logging import getLogger
+from ...runtime.utils.utils import IndexerKeywords
 logger = getLogger(__name__)
@@ -24,13 +26,13 @@ class VectorStoreAdapter(ABC):
         pass
     @abstractmethod
-    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+    def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from vectorstore"""
         pass
     @abstractmethod
-    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
         pass
     @abstractmethod
@@ -39,7 +41,7 @@ class VectorStoreAdapter(ABC):
         pass
     @abstractmethod
-    def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix) -> Dict[str, Dict[str, Any]]:
+    def get_code_indexed_data(self, vectorstore_wrapper, index_name) -> Dict[str, Dict[str, Any]]:
         """Get all indexed data from vectorstore for code content"""
         pass
@@ -48,15 +50,26 @@ class VectorStoreAdapter(ABC):
         """Add a new collection name to the metadata"""
         pass
+    @abstractmethod
+    def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
+        """Get all index_meta entries from the vector store."""
+        pass
 class PGVectorAdapter(VectorStoreAdapter):
     """Adapter for PGVector database operations."""
     def get_vectorstore_params(self, collection_name: str, connection_string: Optional[str] = None) -> Dict[str, Any]:
+        try:
+            from tools import this  # pylint: disable=E0401,C0415
+            worker_config = this.for_module("indexer_worker").descriptor.config
+        except:  # pylint: disable=W0702
+            worker_config = {}
+        #
         return {
             "use_jsonb": True,
             "collection_name": collection_name,
-            "create_extension": True,
+            "create_extension": worker_config.get("pgvector_create_extension", True),
             "alita_sdk_options": {
                 "target_schema": collection_name,
             },
@@ -93,20 +106,25 @@ class PGVectorAdapter(VectorStoreAdapter):
             session.commit()
             logger.info(f"Schema '{schema_name}' has been dropped.")
-    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+    def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from PGVector"""
         from sqlalchemy.orm import Session
-        from sqlalchemy import func
+        from sqlalchemy import func, or_
         store = vectorstore_wrapper.vectorstore
         try:
             with Session(store.session_maker.bind) as session:
                 # Start building the query
                 query = session.query(store.EmbeddingStore.id)
-                # Apply filter only if collection_suffix is provided
-                if collection_suffix:
+                # Apply filter only if index_name is provided
+                if index_name:
                     query = query.filter(
-                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
+                        or_(
+                            func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
+                            func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata,
+                                                         'type') != IndexerKeywords.INDEX_META_TYPE.value
+                        )
                     )
                 ids = query.all()
                 return [str(id_tuple[0]) for id_tuple in ids]
@@ -114,25 +132,37 @@ class PGVectorAdapter(VectorStoreAdapter):
             logger.error(f"Failed to get indexed IDs from PGVector: {str(e)}")
             return []
-    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
-        # This logic deletes all data from the vectorstore collection without removal of collection.
-        # Collection itself remains available for future indexing.
-        vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. If including_index_meta is True, skip the index_meta records."""
+        from sqlalchemy.orm import Session
+        from sqlalchemy import func, or_
+        store = vectorstore_wrapper.vectorstore
+        with Session(store.session_maker.bind) as session:
+            if including_index_meta:
+                session.query(store.EmbeddingStore).filter(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
+                ).delete(synchronize_session=False)
+            else:
+                session.query(store.EmbeddingStore).filter(
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name,
+                    or_(func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type').is_(None),
+                        func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'type') != IndexerKeywords.INDEX_META_TYPE.value)
+                ).delete(synchronize_session=False)
+            session.commit()
     def is_vectorstore_type(self, vectorstore) -> bool:
         """Check if the vectorstore is a PGVector store."""
         return hasattr(vectorstore, 'session_maker') and hasattr(vectorstore, 'EmbeddingStore')
-    def get_indexed_data(self, vectorstore_wrapper, collection_suffix: str)-> Dict[str, Dict[str, Any]]:
-        """Get all indexed data from PGVector for non-code content per collection_suffix."""
+    def get_indexed_data(self, vectorstore_wrapper, index_name: str)-> Dict[str, Dict[str, Any]]:
+        """Get all indexed data from PGVector for non-code content per index_name."""
         from sqlalchemy.orm import Session
         from sqlalchemy import func
         from ...runtime.utils.utils import IndexerKeywords
         result = {}
         try:
-            vectorstore_wrapper._log_data("Retrieving already indexed data from PGVector vectorstore",
+            vectorstore_wrapper._log_tool_event("Retrieving already indexed data from PGVector vectorstore",
                            tool_name="get_indexed_data")
             store = vectorstore_wrapper.vectorstore
             with Session(store.session_maker.bind) as session:
@@ -141,7 +171,7 @@ class PGVectorAdapter(VectorStoreAdapter):
                     store.EmbeddingStore.document,
                     store.EmbeddingStore.cmetadata
                 ).filter(
-                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
                 ).all()
             # Process the retrieved data
@@ -174,14 +204,14 @@ class PGVectorAdapter(VectorStoreAdapter):
         return result
-    def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix: str) -> Dict[str, Dict[str, Any]]:
+    def get_code_indexed_data(self, vectorstore_wrapper, index_name: str) -> Dict[str, Dict[str, Any]]:
         """Get all indexed code data from PGVector per collection suffix."""
         from sqlalchemy.orm import Session
         from sqlalchemy import func
         result = {}
         try:
-            vectorstore_wrapper._log_data("Retrieving already indexed code data from PGVector vectorstore",
+            vectorstore_wrapper._log_tool_event(message="Retrieving already indexed code data from PGVector vectorstore",
                            tool_name="index_code_data")
             store = vectorstore_wrapper.vectorstore
             with (Session(store.session_maker.bind) as session):
@@ -189,7 +219,7 @@ class PGVectorAdapter(VectorStoreAdapter):
                     store.EmbeddingStore.id,
                     store.EmbeddingStore.cmetadata
                 ).filter(
-                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == collection_suffix
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
                 ).all()
             for db_id, meta in docs:
@@ -259,6 +289,29 @@ class PGVectorAdapter(VectorStoreAdapter):
         except Exception as e:
             logger.error(f"Failed to update collection for entry ID {entry_id}: {str(e)}")
+    def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
+        from sqlalchemy.orm import Session
+        from sqlalchemy import func
+        store = vectorstore_wrapper.vectorstore
+        try:
+            with Session(store.session_maker.bind) as session:
+                meta = session.query(
+                    store.EmbeddingStore.id,
+                    store.EmbeddingStore.document,
+                    store.EmbeddingStore.cmetadata
+                ).filter(
+                    store.EmbeddingStore.cmetadata['type'].astext == IndexerKeywords.INDEX_META_TYPE.value,
+                    func.jsonb_extract_path_text(store.EmbeddingStore.cmetadata, 'collection') == index_name
+                ).all()
+                result = []
+                for id, document, cmetadata in meta:
+                    result.append({"id": id, "content": document, "metadata": cmetadata})
+                return result
+        except Exception as e:
+            logger.error(f"Failed to get index_meta from PGVector: {str(e)}")
+            raise e
 class ChromaAdapter(VectorStoreAdapter):
     """Adapter for Chroma database operations."""
@@ -276,7 +329,7 @@ class ChromaAdapter(VectorStoreAdapter):
     def remove_collection(self, vectorstore_wrapper, collection_name: str):
         vectorstore_wrapper.vectorstore.delete_collection()
-    def get_indexed_ids(self, vectorstore_wrapper, collection_suffix: Optional[str] = '') -> List[str]:
+    def get_indexed_ids(self, vectorstore_wrapper, index_name: Optional[str] = '') -> List[str]:
         """Get all indexed document IDs from Chroma"""
         try:
             data = vectorstore_wrapper.vectorstore.get(include=[])  # Only get IDs, no metadata
@@ -285,9 +338,9 @@ class ChromaAdapter(VectorStoreAdapter):
             logger.error(f"Failed to get indexed IDs from Chroma: {str(e)}")
             return []
-    def clean_collection(self, vectorstore_wrapper, collection_suffix: str = ''):
-        """Clean the vectorstore collection by deleting all indexed data."""
-        vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, collection_suffix))
+    def clean_collection(self, vectorstore_wrapper, index_name: str = '', including_index_meta: bool = False):
+        """Clean the vectorstore collection by deleting all indexed data. including_index_meta is ignored."""
+        vectorstore_wrapper.vectorstore.delete(ids=self.get_indexed_ids(vectorstore_wrapper, index_name))
     def get_indexed_data(self, vectorstore_wrapper):
         """Get all indexed data from Chroma for non-code content"""
@@ -325,7 +378,7 @@ class ChromaAdapter(VectorStoreAdapter):
         return result
-    def get_code_indexed_data(self, vectorstore_wrapper, collection_suffix) -> Dict[str, Dict[str, Any]]:
+    def get_code_indexed_data(self, vectorstore_wrapper, index_name) -> Dict[str, Dict[str, Any]]:
         """Get all indexed code data from Chroma."""
         result = {}
         try:
@@ -355,6 +408,9 @@ class ChromaAdapter(VectorStoreAdapter):
         # This is a simplified implementation - in practice, you might need more complex logic
         logger.warning("add_to_collection for Chroma is not fully implemented yet")
+    def get_index_meta(self, vectorstore_wrapper, index_name: str) -> List[Dict[str, Any]]:
+        logger.warning("get_index_meta for Chroma is not implemented yet")
 class VectorStoreAdapterFactory:
     """Factory for creating vector store adapters."""

alita_sdk/tools/xray/__init__.py CHANGED Viewed

@@ -7,7 +7,8 @@ from pydantic import create_model, BaseModel, Field
 from .api_wrapper import XrayApiWrapper
 from ..base.tool import BaseAction
-from ..utils import clean_string, get_max_toolkit_length, TOOLKIT_SPLITTER
+from ..elitea_base import filter_missconfigured_index_tools
+from ..utils import clean_string, get_max_toolkit_length
 from ...configurations.pgvector import PgVectorConfiguration
 from ...configurations.xray import XrayConfiguration
@@ -21,6 +22,7 @@ def get_tools(tool):
         limit=tool['settings'].get('limit', 20),
         verify_ssl=tool['settings'].get('verify_ssl', True),
         toolkit_name=tool.get('toolkit_name'),
+        llm=tool['settings'].get('llm', None),
         alita=tool['settings'].get('alita', None),
         # indexer settings
@@ -32,12 +34,10 @@ def get_tools(tool):
 class XrayToolkit(BaseToolkit):
     tools: List[BaseTool] = []
-    toolkit_max_length: int = 0
     @staticmethod
     def toolkit_config_schema() -> BaseModel:
         selected_tools = {x['name']: x['args_schema'].schema() for x in XrayApiWrapper.model_construct().get_available_tools()}
-        XrayToolkit.toolkit_max_length = get_max_toolkit_length(selected_tools)
         return create_model(
             name,
             limit=(Optional[int], Field(description="Limit", default=100)),
@@ -54,7 +54,7 @@ class XrayToolkit(BaseToolkit):
                             {
                                 'metadata': {
                                     "label": "XRAY cloud", "icon_url": "xray.svg",
-                                "categories": ["test management"],
+                                    "categories": ["test management"],
                                     "extra_categories": ["test automation", "test case management", "test planning"]
                                 }
                             }
@@ -62,6 +62,7 @@ class XrayToolkit(BaseToolkit):
         )
     @classmethod
+    @filter_missconfigured_index_tools
     def get_toolkit(cls, selected_tools: list[str] | None = None, toolkit_name: Optional[str] = None, **kwargs):
         if selected_tools is None:
             selected_tools = []
@@ -72,17 +73,21 @@ class XrayToolkit(BaseToolkit):
             **(kwargs.get('pgvector_configuration') or {}),
         }
         xray_api_wrapper = XrayApiWrapper(**wrapper_payload)
-        prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
         available_tools = xray_api_wrapper.get_available_tools()
         tools = []
         for tool in available_tools:
             if selected_tools:
                 if tool["name"] not in selected_tools:
                     continue
+            description = tool["description"]
+            if toolkit_name:
+                description = f"Toolkit: {toolkit_name}\n{description}"
+            description = description + "\nXray instance: " + xray_api_wrapper.base_url
+            description = description[:1000]
             tools.append(BaseAction(
                 api_wrapper=xray_api_wrapper,
-                name=prefix + tool["name"],
-                description=tool["description"] + "\nXray instance: " + xray_api_wrapper.base_url,
+                name=tool["name"],
+                description=description,
                 args_schema=tool["args_schema"]
             ))
         return cls(tools=tools)

alita_sdk/tools/xray/api_wrapper.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 import logging
 import hashlib
-from typing import Any, Dict, Generator, List, Optional
+from typing import Any, Dict, Generator, List, Optional, Literal
 import requests
 from langchain_core.documents import Document
@@ -9,12 +9,9 @@ from langchain_core.tools import ToolException
 from pydantic import PrivateAttr, SecretStr, create_model, model_validator, Field
 from python_graphql_client import GraphqlClient
-from ..elitea_base import (
-    BaseVectorStoreToolApiWrapper,
-    extend_with_vector_tools,
-)
+from ..non_code_indexer_toolkit import NonCodeIndexerToolkit
+from ..utils.available_tools_decorator import extend_with_parent_available_tools
 from ...runtime.utils.utils import IndexerKeywords
-from ..utils.content_parser import parse_file_content, load_content_from_bytes
 try:
     from alita_sdk.runtime.langchain.interfaces.llm_processor import get_embeddings
@@ -31,7 +28,7 @@ _get_tests_query = """query GetTests($jql: String!, $limit:Int!, $start: Int)
         limit
         results {
             issueId
-            jira(fields: ["key", "summary", "created", "updated", "assignee.displayName", "reporter.displayName"])
+            jira(fields: ["key", "summary", "description", "created", "updated", "assignee.displayName", "reporter.displayName"])
             projectId
             testType {
                 name
@@ -107,7 +104,7 @@ XrayCreateTest = create_model(
 XrayCreateTests = create_model(
     "XrayCreateTests",
-    graphql_mutations=(list[str], Field(description="list of GraphQL mutations:\n" + _graphql_mutation_description))
+    graphql_mutations=(List[str], Field(description="list of GraphQL mutations:\n" + _graphql_mutation_description))
 )
 def _parse_tests(test_results) -> List[Any]:
@@ -120,7 +117,7 @@ def _parse_tests(test_results) -> List[Any]:
     return test_results
-class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
+class XrayApiWrapper(NonCodeIndexerToolkit):
     _default_base_url: str = 'https://xray.cloud.getxray.app'
     base_url: str = ""
     client_id: str = None
@@ -147,7 +144,7 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
         client_id = values['client_id']
         client_secret = values['client_secret']
         # Authenticate to get the token
-        values['base_url'] = values.get('base_url', '') or cls._default_base_url
+        values['base_url'] = values.get('base_url', '') or cls._default_base_url.default
         auth_url = f"{values['base_url']}/api/v1/authenticate"
         auth_data = {
             "client_id": client_id,
@@ -168,7 +165,7 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                 return ToolException(f"Please, check you credentials ({values['client_id']} / {masked_secret}). Unable")
             else:
                 return ToolException(f"Authentication failed: {str(e)}")
-        return values
+        return super().validate_toolkit(values)
     def __init__(self, **data):
         super().__init__(**data)
@@ -333,6 +330,7 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
             for test in tests_data:
                 page_content = ""
+                content_structure = {}
                 test_type_name = test.get("testType", {}).get("name", "").lower()
                 attachment_ids = []
@@ -359,19 +357,16 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                     content_structure = {"steps": steps_content}
                     if attachment_ids:
                         content_structure["attachment_ids"] = sorted(attachment_ids)
-                    page_content = json.dumps(content_structure, indent=2)
                 elif test_type_name == "cucumber" and test.get("gherkin"):
                     content_structure = {"gherkin": test["gherkin"]}
                     if attachment_ids:
                         content_structure["attachment_ids"] = sorted(attachment_ids)
-                    page_content = json.dumps(content_structure, indent=2)
                 elif test.get("unstructured"):
                     content_structure = {"unstructured": test["unstructured"]}
                     if attachment_ids:
                         content_structure["attachment_ids"] = sorted(attachment_ids)
-                    page_content = json.dumps(content_structure, indent=2)
                 metadata = {"doctype": self.doctype}
@@ -382,7 +377,12 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                     if "created" in jira_data:
                         metadata["created_on"] = jira_data["created"]
+                    if jira_data.get("description"):
+                        content_structure["description"] = jira_data.get("description")
+                    page_content = json.dumps(content_structure if content_structure.items() else "", indent=2)
                     content_hash = hashlib.sha256(page_content.encode('utf-8')).hexdigest()[:16]
                     metadata["updated_on"] = content_hash
@@ -407,11 +407,13 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                         if "attachments" in step and step["attachments"]:
                             for attachment in step["attachments"]:
                                 if attachment and "id" in attachment and "filename" in attachment:
+                                    attachment['step_id'] = step['id']
                                     attachments_data.append(attachment)
                     if attachments_data:
                         metadata["_attachments_data"] = attachments_data
-                yield Document(page_content=page_content, metadata=metadata)
+                metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = page_content.encode('utf-8')
+                yield Document(page_content='', metadata=metadata)
         except Exception as e:
             logger.error(f"Error processing test data: {e}")
@@ -430,14 +432,7 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
             Generator[Document, None, None]: A generator yielding processed Document objects with metadata.
         """
         try:
-            if not getattr(self, '_include_attachments', False):
-                yield document
-                return
             attachments_data = document.metadata.get("_attachments_data", [])
-            if not attachments_data:
-                yield document
-                return
             issue_id = document.metadata.get("id")
@@ -458,44 +453,33 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                 ).append(attachment_id)
                 try:
-                    content = self._process_attachment(attachment)
-                    if not content or content.startswith("Attachment processing failed"):
-                        logger.warning(f"Skipping attachment {filename} due to processing failure")
-                        continue
+                    attachment_metadata = {
+                        'id': str(attachment_id),
+                        'issue_key': document.metadata.get('key', ''),
+                        'issueId': str(issue_id),
+                        'projectId': document.metadata.get('projectId', ''),
+                        'source': f"xray_test_{issue_id}",
+                        'filename': filename,
+                        'download_link': attachment.get('downloadLink', ''),
+                        'entity_type': 'test_case_attachment',
+                        'step_id': attachment.get('step_id', ''),
+                        'key': document.metadata.get('key', ''),
+                        IndexerKeywords.PARENT.value: document.metadata.get('id', str(issue_id)),
+                        'type': 'attachment',
+                        'doctype': self.doctype,
+                    }
+                    yield from self._process_attachment(attachment, attachment_metadata)
                 except Exception as e:
                     logger.error(f"Failed to process attachment {filename}: {str(e)}")
                     continue
-                attachment_metadata = {
-                    'id': str(attachment_id),
-                    'issue_key': document.metadata.get('key', ''),
-                    'issueId': str(issue_id),
-                    'projectId': document.metadata.get('projectId', ''),
-                    'source': f"xray_test_{issue_id}",
-                    'filename': filename,
-                    'download_link': attachment.get('downloadLink', ''),
-                    'entity_type': 'test_case_attachment',
-                    'key': document.metadata.get('key', ''),
-                    IndexerKeywords.PARENT.value: document.metadata.get('id', str(issue_id)),
-                    'type': 'attachment',
-                    'doctype': self.doctype,
-                }
-                yield Document(
-                    page_content=content,
-                    metadata=attachment_metadata
-                )
             if "_attachments_data" in document.metadata:
                 del document.metadata["_attachments_data"]
-            yield document
         except Exception as e:
             logger.error(f"Error processing document for attachments: {e}")
-            yield document
-    def _process_attachment(self, attachment: Dict[str, Any]) -> str:
+    def _process_attachment(self, attachment: Dict[str, Any], attachment_metadata) -> Generator[Document, None, None]:
         """
         Processes an attachment to extract its content.
@@ -508,38 +492,17 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
         try:
             download_link = attachment.get('downloadLink')
             filename = attachment.get('filename', '')
-            if not download_link:
-                return f"Attachment: {filename} (no download link available)"
             try:
                 auth_token = self._ensure_auth_token()
                 headers = {'Authorization': f'Bearer {auth_token}'}
                 response = requests.get(download_link, headers=headers, timeout=30)
                 response.raise_for_status()
-                ext = f".{filename.split('.')[-1].lower()}" if filename and '.' in filename else ""
-                if ext == '.pdf':
-                    content = parse_file_content(
-                        file_content=response.content,
-                        file_name=filename,
-                        llm=self.llm,
-                        is_capture_image=True
-                    )
-                else:
-                    content = load_content_from_bytes(
-                        response.content,
-                        ext,
-                        llm=self.llm
-                    )
-                if content:
-                    return f"filename: {filename}\ncontent: {content}"
-                else:
-                    logger.warning(f"No content extracted from attachment {filename}")
-                    return f"filename: {filename}\ncontent: [No extractable content]"
+                yield from self._load_attachment(content=response.content,
+                                                 file_name=filename,
+                                                 attachment_metadata=attachment_metadata)
             except requests.RequestException as req_e:
                 logger.error(f"Unable to download attachment {filename} with existing token: {req_e}")
@@ -560,23 +523,13 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                         fresh_headers = {'Authorization': f'Bearer {fresh_token}'}
                         response = requests.get(download_link, headers=fresh_headers, timeout=60)
                         response.raise_for_status()
-                        ext = f".{filename.split('.')[-1].lower()}" if filename and '.' in filename else ""
-                        content = parse_file_content(
-                            file_content=response.content,
-                            file_name=filename,
-                            llm=self.llm,
-                            is_capture_image=True
-                        ) if ext == '.pdf' else load_content_from_bytes(response.content, ext, llm=self.llm)
-                        if content:
-                            return f"filename: {filename}\ncontent: {content}"
-                        else:
-                            return f"filename: {filename}\ncontent: [Content extraction failed after re-auth]"
+                        yield from self._load_attachment(content=response.content,
+                                                         file_name=filename,
+                                                         attachment_metadata=attachment_metadata)
                     except Exception as reauth_e:
                         logger.error(f"Re-authentication and retry failed for {filename}: {reauth_e}")
-                        return f"Attachment: {filename} (download failed: {str(req_e)}, re-auth failed: {str(reauth_e)})"
                 else:
                     try:
                         auth_token = self._ensure_auth_token()
@@ -587,34 +540,29 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                         }
                         response = requests.get(download_link, headers=fallback_headers, timeout=60)
                         response.raise_for_status()
-                        ext = f".{filename.split('.')[-1].lower()}" if filename and '.' in filename else ""
-                        content = parse_file_content(
-                            file_content=response.content,
-                            file_name=filename,
-                            llm=self.llm,
-                            is_capture_image=True
-                        ) if ext == '.pdf' else load_content_from_bytes(response.content, ext, llm=self.llm)
-                        if content:
-                            return f"filename: {filename}\ncontent: {content}"
-                        else:
-                            return f"filename: {filename}\ncontent: [Content extraction failed after fallback]"
+                        yield from self._load_attachment(content=response.content,
+                                                         file_name=filename,
+                                                         attachment_metadata=attachment_metadata)
                     except Exception as fallback_e:
                         logger.error(f"Fallback download also failed for {filename}: {fallback_e}")
-                        return f"Attachment: {filename} (download failed: {str(req_e)}, fallback failed: {str(fallback_e)})"
             except Exception as parse_e:
                 logger.error(f"Unable to parse attachment {filename}: {parse_e}")
-                return f"Attachment: {filename} (parsing failed: {str(parse_e)})"
         except Exception as e:
             logger.error(f"Error processing attachment: {e}")
-            return f"Attachment processing failed: {str(e)}"
+    def _load_attachment(self, content, file_name, attachment_metadata) -> Generator[Document, None, None]:
+        attachment_metadata[IndexerKeywords.CONTENT_IN_BYTES.value] = content
+        attachment_metadata[IndexerKeywords.CONTENT_FILE_NAME.value] = file_name
+        yield Document(page_content='', metadata=attachment_metadata)
     def _index_tool_params(self, **kwargs) -> dict[str, tuple[type, Field]]:
         return {
+            'chunking_tool': (Literal['json', ''],
+                              Field(description="Name of chunking tool for base document", default='json')),
             'jql': (Optional[str], Field(description="""JQL query for searching test cases in Xray.
             Standard JQL query syntax for filtering Xray test cases. Examples:
@@ -684,9 +632,9 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
         except Exception as e:
             raise ToolException(f"Unable to execute GraphQL due to error: {str(e)}")
-    @extend_with_vector_tools
+    @extend_with_parent_available_tools
     def get_available_tools(self):
-        tools = [
+        return [
             {
                 "name": "get_tests",
                 "description": self.get_tests.__doc__,
@@ -711,7 +659,4 @@ class XrayApiWrapper(BaseVectorStoreToolApiWrapper):
                 "args_schema": XrayGrapql,
                 "ref": self.execute_graphql,
             }
-        ]
-        tools.extend(self._get_vector_search_tools())
-        return tools
+        ]

alita-sdk 0.3.263__py3-none-any.whl → 0.3.499__py3-none-any.whl

alita-sdk 0.3.263py3-none-any.whl → 0.3.499py3-none-any.whl