alita-sdk 0.3.257__py3-none-any.whl → 0.3.584__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of alita-sdk might be problematic. Click here for more details.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +155 -0
- alita_sdk/cli/agent_loader.py +215 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3794 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +11 -0
- alita_sdk/configurations/ado.py +148 -2
- alita_sdk/configurations/azure_search.py +1 -1
- alita_sdk/configurations/bigquery.py +1 -1
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/browser.py +18 -0
- alita_sdk/configurations/carrier.py +19 -0
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/delta_lake.py +1 -1
- alita_sdk/configurations/figma.py +76 -5
- alita_sdk/configurations/github.py +65 -1
- alita_sdk/configurations/gitlab.py +81 -0
- alita_sdk/configurations/google_places.py +17 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +323 -0
- alita_sdk/configurations/postman.py +1 -1
- alita_sdk/configurations/qtest.py +72 -3
- alita_sdk/configurations/report_portal.py +115 -0
- alita_sdk/configurations/salesforce.py +19 -0
- alita_sdk/configurations/service_now.py +1 -12
- alita_sdk/configurations/sharepoint.py +167 -0
- alita_sdk/configurations/sonar.py +18 -0
- alita_sdk/configurations/sql.py +20 -0
- alita_sdk/configurations/testio.py +101 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +94 -1
- alita_sdk/configurations/zephyr_enterprise.py +94 -1
- alita_sdk/configurations/zephyr_essential.py +95 -0
- alita_sdk/runtime/clients/artifact.py +21 -4
- alita_sdk/runtime/clients/client.py +458 -67
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +352 -0
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +183 -43
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +209 -31
- alita_sdk/runtime/langchain/document_loaders/AlitaImageLoader.py +1 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaMarkdownLoader.py +66 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaPDFLoader.py +79 -10
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +52 -15
- alita_sdk/runtime/langchain/document_loaders/AlitaPythonLoader.py +9 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaTableLoader.py +1 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +15 -2
- alita_sdk/runtime/langchain/document_loaders/ImageParser.py +30 -0
- alita_sdk/runtime/langchain/document_loaders/constants.py +189 -41
- alita_sdk/runtime/langchain/interfaces/llm_processor.py +4 -2
- alita_sdk/runtime/langchain/langraph_agent.py +493 -105
- alita_sdk/runtime/langchain/utils.py +118 -8
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +28 -0
- alita_sdk/runtime/toolkits/application.py +14 -4
- alita_sdk/runtime/toolkits/artifact.py +25 -9
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +782 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +11 -6
- alita_sdk/runtime/toolkits/tools.py +314 -70
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +24 -0
- alita_sdk/runtime/tools/application.py +16 -4
- alita_sdk/runtime/tools/artifact.py +367 -33
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +100 -4
- alita_sdk/runtime/tools/graph.py +81 -0
- alita_sdk/runtime/tools/image_generation.py +218 -0
- alita_sdk/runtime/tools/llm.py +1032 -177
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +3 -1
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -1
- alita_sdk/runtime/tools/sandbox.py +375 -0
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +69 -65
- alita_sdk/runtime/tools/vectorstore_base.py +163 -90
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/streamlit.py +41 -14
- alita_sdk/runtime/utils/toolkit_utils.py +28 -9
- alita_sdk/runtime/utils/utils.py +48 -0
- alita_sdk/tools/__init__.py +135 -37
- alita_sdk/tools/ado/__init__.py +2 -2
- alita_sdk/tools/ado/repos/__init__.py +16 -19
- alita_sdk/tools/ado/repos/repos_wrapper.py +12 -20
- alita_sdk/tools/ado/test_plan/__init__.py +27 -8
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +56 -28
- alita_sdk/tools/ado/wiki/__init__.py +28 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +114 -40
- alita_sdk/tools/ado/work_item/__init__.py +28 -12
- alita_sdk/tools/ado/work_item/ado_wrapper.py +95 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +13 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +15 -11
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +14 -8
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +454 -110
- alita_sdk/tools/bitbucket/__init__.py +28 -19
- alita_sdk/tools/bitbucket/api_wrapper.py +285 -27
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +5 -5
- alita_sdk/tools/browser/__init__.py +41 -16
- alita_sdk/tools/browser/crawler.py +3 -1
- alita_sdk/tools/browser/utils.py +15 -6
- alita_sdk/tools/carrier/__init__.py +18 -17
- alita_sdk/tools/carrier/backend_reports_tool.py +8 -4
- alita_sdk/tools/carrier/excel_reporter.py +8 -4
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/codeparser.py +1 -1
- alita_sdk/tools/chunkers/sematic/json_chunker.py +2 -1
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +12 -7
- alita_sdk/tools/cloud/azure/__init__.py +12 -7
- alita_sdk/tools/cloud/gcp/__init__.py +12 -7
- alita_sdk/tools/cloud/k8s/__init__.py +12 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +21 -13
- alita_sdk/tools/code_indexer_toolkit.py +199 -0
- alita_sdk/tools/confluence/__init__.py +22 -14
- alita_sdk/tools/confluence/api_wrapper.py +197 -58
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +546 -64
- alita_sdk/tools/figma/__init__.py +60 -11
- alita_sdk/tools/figma/api_wrapper.py +1400 -167
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +18 -17
- alita_sdk/tools/github/api_wrapper.py +9 -26
- alita_sdk/tools/github/github_client.py +81 -12
- alita_sdk/tools/github/schemas.py +2 -1
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/gitlab/__init__.py +19 -13
- alita_sdk/tools/gitlab/api_wrapper.py +256 -80
- alita_sdk/tools/gitlab_org/__init__.py +14 -10
- alita_sdk/tools/google/bigquery/__init__.py +14 -13
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +21 -11
- alita_sdk/tools/jira/__init__.py +22 -11
- alita_sdk/tools/jira/api_wrapper.py +315 -168
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +38 -14
- alita_sdk/tools/non_code_indexer_toolkit.py +7 -2
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1357 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +40 -45
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +11 -11
- alita_sdk/tools/postman/api_wrapper.py +19 -8
- alita_sdk/tools/postman/postman_analysis.py +8 -1
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/qtest/__init__.py +22 -14
- alita_sdk/tools/qtest/api_wrapper.py +1784 -88
- alita_sdk/tools/rally/__init__.py +13 -10
- alita_sdk/tools/report_portal/__init__.py +23 -16
- alita_sdk/tools/salesforce/__init__.py +22 -16
- alita_sdk/tools/servicenow/__init__.py +21 -16
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +17 -14
- alita_sdk/tools/sharepoint/api_wrapper.py +179 -39
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +13 -8
- alita_sdk/tools/sql/__init__.py +22 -19
- alita_sdk/tools/sql/api_wrapper.py +71 -23
- alita_sdk/tools/testio/__init__.py +21 -13
- alita_sdk/tools/testrail/__init__.py +13 -11
- alita_sdk/tools/testrail/api_wrapper.py +214 -46
- alita_sdk/tools/utils/__init__.py +28 -4
- alita_sdk/tools/utils/content_parser.py +241 -55
- alita_sdk/tools/utils/text_operations.py +254 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +83 -27
- alita_sdk/tools/xray/__init__.py +18 -14
- alita_sdk/tools/xray/api_wrapper.py +58 -113
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +12 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +16 -9
- alita_sdk/tools/zephyr_enterprise/api_wrapper.py +30 -15
- alita_sdk/tools/zephyr_essential/__init__.py +16 -10
- alita_sdk/tools/zephyr_essential/api_wrapper.py +297 -54
- alita_sdk/tools/zephyr_essential/client.py +6 -4
- alita_sdk/tools/zephyr_scale/__init__.py +13 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +39 -31
- alita_sdk/tools/zephyr_squad/__init__.py +12 -7
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/METADATA +184 -37
- alita_sdk-0.3.584.dist-info/RECORD +452 -0
- alita_sdk-0.3.584.dist-info/entry_points.txt +2 -0
- alita_sdk/tools/bitbucket/tools.py +0 -304
- alita_sdk-0.3.257.dist-info/RECORD +0 -343
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.257.dist-info → alita_sdk-0.3.584.dist-info}/top_level.txt +0 -0
alita_sdk/tools/browser/utils.py
CHANGED
|
@@ -6,9 +6,9 @@ from langchain.text_splitter import CharacterTextSplitter
|
|
|
6
6
|
import fitz
|
|
7
7
|
|
|
8
8
|
try:
|
|
9
|
-
from
|
|
9
|
+
from langchain_postgres import PGVector
|
|
10
10
|
except ImportError:
|
|
11
|
-
|
|
11
|
+
PGVector = None
|
|
12
12
|
|
|
13
13
|
from langchain_community.embeddings.sentence_transformer import (
|
|
14
14
|
SentenceTransformerEmbeddings,
|
|
@@ -32,13 +32,22 @@ def get_page(urls, html_only=False):
|
|
|
32
32
|
return docs_transformed
|
|
33
33
|
|
|
34
34
|
|
|
35
|
-
def webRag(urls, max_response_size, query):
|
|
36
|
-
if
|
|
37
|
-
return "
|
|
35
|
+
def webRag(urls, max_response_size, query, connection_string=None):
|
|
36
|
+
if PGVector is None:
|
|
37
|
+
return "PGVector is not initialized. Web rag is not available."
|
|
38
|
+
|
|
39
|
+
if not connection_string:
|
|
40
|
+
return "Connection string or embedding model is missing. Web rag is not available."
|
|
38
41
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
39
42
|
docs = text_splitter.split_documents(get_page(urls))
|
|
40
43
|
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
|
|
41
|
-
db =
|
|
44
|
+
db = PGVector.from_documents(
|
|
45
|
+
documents=docs,
|
|
46
|
+
embedding=embedding_function,
|
|
47
|
+
collection_name="web_rag",
|
|
48
|
+
pre_delete_collection=True,
|
|
49
|
+
connection=connection_string
|
|
50
|
+
)
|
|
42
51
|
docs = db.search(query, "mmr", k=10)
|
|
43
52
|
text = ""
|
|
44
53
|
for doc in docs:
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from typing import Dict, List, Optional, Literal
|
|
3
3
|
from langchain_core.tools import BaseToolkit, BaseTool
|
|
4
|
-
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
4
|
+
from pydantic import create_model, BaseModel, ConfigDict, Field
|
|
5
5
|
from functools import lru_cache
|
|
6
6
|
|
|
7
7
|
from .api_wrapper import CarrierAPIWrapper
|
|
8
8
|
from .tools import __all__
|
|
9
|
-
from ..
|
|
9
|
+
from ..elitea_base import filter_missconfigured_index_tools
|
|
10
|
+
from ..utils import clean_string, get_max_toolkit_length
|
|
11
|
+
from ...configurations.carrier import CarrierConfiguration
|
|
10
12
|
|
|
11
13
|
logger = logging.getLogger(__name__)
|
|
12
14
|
|
|
@@ -15,7 +17,6 @@ name = 'carrier'
|
|
|
15
17
|
|
|
16
18
|
class AlitaCarrierToolkit(BaseToolkit):
|
|
17
19
|
tools: List[BaseTool] = []
|
|
18
|
-
toolkit_max_length: int = 100
|
|
19
20
|
|
|
20
21
|
@classmethod
|
|
21
22
|
@lru_cache(maxsize=32)
|
|
@@ -24,15 +25,10 @@ class AlitaCarrierToolkit(BaseToolkit):
|
|
|
24
25
|
for t in __all__:
|
|
25
26
|
default = t['tool'].__pydantic_fields__['args_schema'].default
|
|
26
27
|
selected_tools[t['name']] = default.schema() if default else default
|
|
27
|
-
cls.toolkit_max_length = get_max_toolkit_length(selected_tools)
|
|
28
28
|
return create_model(
|
|
29
29
|
name,
|
|
30
|
-
url=(str, Field(description="Carrier Platform Base URL")),
|
|
31
|
-
organization=(str, Field(description="Carrier Organization Name", json_schema_extra={'toolkit_name': True,
|
|
32
|
-
'max_toolkit_length': cls.toolkit_max_length})),
|
|
33
|
-
private_token=(
|
|
34
|
-
SecretStr, Field(description="Carrier Platform Authentication Token", json_schema_extra={'secret': True})),
|
|
35
30
|
project_id=(Optional[str], Field(None, description="Optional project ID for scoped operations")),
|
|
31
|
+
carrier_configuration=(CarrierConfiguration, Field(description="Carrier Configuration", json_schema_extra={'configuration_types': ['carrier']})),
|
|
36
32
|
selected_tools=(
|
|
37
33
|
List[Literal[tuple(selected_tools)]],
|
|
38
34
|
Field(default=[], json_schema_extra={"args_schemas": selected_tools}),
|
|
@@ -49,6 +45,7 @@ class AlitaCarrierToolkit(BaseToolkit):
|
|
|
49
45
|
)
|
|
50
46
|
|
|
51
47
|
@classmethod
|
|
48
|
+
@filter_missconfigured_index_tools
|
|
52
49
|
def get_toolkit(
|
|
53
50
|
cls,
|
|
54
51
|
selected_tools: Optional[List[str]] = None,
|
|
@@ -58,23 +55,29 @@ class AlitaCarrierToolkit(BaseToolkit):
|
|
|
58
55
|
selected_tools = selected_tools or []
|
|
59
56
|
logger.info(f"[AlitaCarrierToolkit] Initializing toolkit with selected tools: {selected_tools}")
|
|
60
57
|
|
|
58
|
+
wrapper_payload = {
|
|
59
|
+
**kwargs,
|
|
60
|
+
**kwargs.get('carrier_configuration', {}),
|
|
61
|
+
}
|
|
62
|
+
|
|
61
63
|
try:
|
|
62
|
-
carrier_api_wrapper = CarrierAPIWrapper(**
|
|
64
|
+
carrier_api_wrapper = CarrierAPIWrapper(**wrapper_payload)
|
|
63
65
|
logger.info(
|
|
64
|
-
f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {
|
|
66
|
+
f"[AlitaCarrierToolkit] CarrierAPIWrapper initialized successfully with URL: {wrapper_payload.get('url')}")
|
|
65
67
|
except Exception as e:
|
|
66
68
|
logger.exception(f"[AlitaCarrierToolkit] Error initializing CarrierAPIWrapper: {e}")
|
|
67
69
|
raise ValueError(f"CarrierAPIWrapper initialization error: {e}")
|
|
68
70
|
|
|
69
|
-
prefix = clean_string(toolkit_name, cls.toolkit_max_length) + TOOLKIT_SPLITTER if toolkit_name else ''
|
|
70
|
-
|
|
71
71
|
tools = []
|
|
72
72
|
for tool_def in __all__:
|
|
73
73
|
if selected_tools and tool_def['name'] not in selected_tools:
|
|
74
74
|
continue
|
|
75
75
|
try:
|
|
76
76
|
tool_instance = tool_def['tool'](api_wrapper=carrier_api_wrapper)
|
|
77
|
-
|
|
77
|
+
if toolkit_name:
|
|
78
|
+
tool_instance.description = f"{tool_instance.description}\nToolkit: {toolkit_name}"
|
|
79
|
+
tool_instance.description = tool_instance.description[:1000]
|
|
80
|
+
tool_instance.metadata = {"toolkit_name": toolkit_name, "toolkit_type": name}
|
|
78
81
|
tools.append(tool_instance)
|
|
79
82
|
logger.info(f"[AlitaCarrierToolkit] Successfully initialized tool '{tool_instance.name}'")
|
|
80
83
|
except Exception as e:
|
|
@@ -92,9 +95,7 @@ class AlitaCarrierToolkit(BaseToolkit):
|
|
|
92
95
|
def get_tools(tool_config: Dict) -> List[BaseTool]:
|
|
93
96
|
return AlitaCarrierToolkit.get_toolkit(
|
|
94
97
|
selected_tools=tool_config.get('selected_tools', []),
|
|
95
|
-
url=tool_config['settings']['url'],
|
|
96
98
|
project_id=tool_config['settings'].get('project_id'),
|
|
97
|
-
|
|
98
|
-
private_token=tool_config['settings']['private_token'],
|
|
99
|
+
carrier_configuration=tool_config['settings']['carrier_configuration'],
|
|
99
100
|
toolkit_name=tool_config.get('toolkit_name')
|
|
100
101
|
).get_tools()
|
|
@@ -154,6 +154,7 @@ class CreateExcelReportTool(BaseTool):
|
|
|
154
154
|
"tp_threshold": (int, Field(default=None, description="Throughput threshold")),
|
|
155
155
|
"rt_threshold": (int, Field(default=None, description="Response time threshold")),
|
|
156
156
|
"er_threshold": (int, Field(default=None, description="Error rate threshold")),
|
|
157
|
+
"include_group_pauses": (bool, Field(default=False, description="Include group pauses in Gatling Excel report")),
|
|
157
158
|
}
|
|
158
159
|
)
|
|
159
160
|
|
|
@@ -200,6 +201,7 @@ class CreateExcelReportTool(BaseTool):
|
|
|
200
201
|
"tp_threshold": 10,
|
|
201
202
|
"rt_threshold": 500,
|
|
202
203
|
"er_threshold": 5,
|
|
204
|
+
"include_group_pauses": False,
|
|
203
205
|
}
|
|
204
206
|
|
|
205
207
|
def _request_parameter_confirmation(self, default_parameters):
|
|
@@ -217,7 +219,8 @@ class CreateExcelReportTool(BaseTool):
|
|
|
217
219
|
excel_report_file_name = f'/tmp/reports_test_results_{report["build_id"]}_excel_report.xlsx'
|
|
218
220
|
bucket_name = report["name"].replace("_", "").replace(" ", "").lower()
|
|
219
221
|
|
|
220
|
-
result_stats_j = self._parse_report(test_log_file_path, lg_type, parameters["think_time"],
|
|
222
|
+
result_stats_j = self._parse_report(test_log_file_path, lg_type, parameters["think_time"],
|
|
223
|
+
parameters["include_group_pauses"], is_absolute_file_path=True)
|
|
221
224
|
calc_thr_j = self._calculate_thresholds(result_stats_j, parameters)
|
|
222
225
|
|
|
223
226
|
return self._generate_and_upload_report(
|
|
@@ -233,21 +236,22 @@ class CreateExcelReportTool(BaseTool):
|
|
|
233
236
|
excel_report_file_name = f'{file_path}_{current_date}.xlsx'
|
|
234
237
|
bucket_name = bucket
|
|
235
238
|
|
|
236
|
-
result_stats_j = self._parse_report(file_path, lg_type, parameters["think_time"],
|
|
239
|
+
result_stats_j = self._parse_report(file_path, lg_type, parameters["think_time"],
|
|
240
|
+
parameters["include_group_pauses"], is_absolute_file_path=True)
|
|
237
241
|
calc_thr_j = self._calculate_thresholds(result_stats_j, parameters)
|
|
238
242
|
|
|
239
243
|
return self._generate_and_upload_report(
|
|
240
244
|
result_stats_j, carrier_report, calc_thr_j, parameters, excel_report_file_name, bucket_name, file_path
|
|
241
245
|
)
|
|
242
246
|
|
|
243
|
-
def _parse_report(self, file_path, lg_type, think_time, is_absolute_file_path=False):
|
|
247
|
+
def _parse_report(self, file_path, lg_type, think_time, include_group_pauses, is_absolute_file_path=False):
|
|
244
248
|
"""Parse the report based on its type."""
|
|
245
249
|
if lg_type == "gatling":
|
|
246
250
|
if is_absolute_file_path:
|
|
247
251
|
report_file = file_path
|
|
248
252
|
else:
|
|
249
253
|
report_file = get_latest_log_file(file_path, "simulation.log")
|
|
250
|
-
parser = GatlingReportParser(report_file, think_time)
|
|
254
|
+
parser = GatlingReportParser(report_file, include_group_pauses, think_time)
|
|
251
255
|
result_stats_j = parser.parse()
|
|
252
256
|
result_stats_j["requests"].update(result_stats_j["groups"])
|
|
253
257
|
elif lg_type == "jmeter":
|
|
@@ -118,9 +118,10 @@ class JMeterReportParser(PerformanceReportParser):
|
|
|
118
118
|
|
|
119
119
|
class GatlingReportParser(PerformanceReportParser):
|
|
120
120
|
|
|
121
|
-
def __init__(self, log_file: str, think_times="5,0-10,0"):
|
|
121
|
+
def __init__(self, log_file: str, include_group_pauses, think_times="5,0-10,0"):
|
|
122
122
|
self.calculated_think_time = think_times
|
|
123
123
|
self.log_file = log_file
|
|
124
|
+
self.include_group_pauses = include_group_pauses
|
|
124
125
|
|
|
125
126
|
@staticmethod
|
|
126
127
|
def convert_timestamp_to_datetime(timestamp: int) -> datetime:
|
|
@@ -210,7 +211,7 @@ class GatlingReportParser(PerformanceReportParser):
|
|
|
210
211
|
ramp_end = self.convert_timestamp_to_datetime(int(line.split('\t')[3]))
|
|
211
212
|
|
|
212
213
|
elif line.startswith('GROUP'):
|
|
213
|
-
self.parse_group_line(groups, line)
|
|
214
|
+
self.parse_group_line(groups, line, self.include_group_pauses)
|
|
214
215
|
except FileNotFoundError as e:
|
|
215
216
|
print(f"File not found: {e}")
|
|
216
217
|
raise
|
|
@@ -242,11 +243,14 @@ class GatlingReportParser(PerformanceReportParser):
|
|
|
242
243
|
requests[request_name].append((response_time, status))
|
|
243
244
|
|
|
244
245
|
@staticmethod
|
|
245
|
-
def parse_group_line(groups, line):
|
|
246
|
+
def parse_group_line(groups, line, include_group_pauses):
|
|
246
247
|
parts = line.split('\t')
|
|
247
248
|
if len(parts) >= 6:
|
|
248
249
|
group_name = parts[1]
|
|
249
|
-
|
|
250
|
+
if include_group_pauses:
|
|
251
|
+
response_time = int(parts[3]) - int(parts[2])
|
|
252
|
+
else:
|
|
253
|
+
response_time = int(parts[4])
|
|
250
254
|
status = parts[5].strip()
|
|
251
255
|
groups[group_name].append((response_time, status))
|
|
252
256
|
|
|
@@ -3,6 +3,7 @@ from .sematic.statistical_chunker import statistical_chunker
|
|
|
3
3
|
from .sematic.markdown_chunker import markdown_chunker
|
|
4
4
|
from .sematic.proposal_chunker import proposal_chunker
|
|
5
5
|
from .sematic.json_chunker import json_chunker
|
|
6
|
+
from .universal_chunker import universal_chunker, chunk_single_document, get_file_type
|
|
6
7
|
from .models import StatisticalChunkerConfig, MarkdownChunkerConfig, ProposalChunkerConfig
|
|
7
8
|
|
|
8
9
|
__all__ = {
|
|
@@ -10,7 +11,8 @@ __all__ = {
|
|
|
10
11
|
'statistical': statistical_chunker,
|
|
11
12
|
'markdown': markdown_chunker,
|
|
12
13
|
'proposal': proposal_chunker,
|
|
13
|
-
'json': json_chunker
|
|
14
|
+
'json': json_chunker,
|
|
15
|
+
'universal': universal_chunker,
|
|
14
16
|
}
|
|
15
17
|
|
|
16
18
|
__confluence_chunkers__ = {
|
|
@@ -79,7 +79,7 @@ def parse_code_files_for_db(file_content_generator: Generator[str, None, None],
|
|
|
79
79
|
for splitted_document in splitted_documents:
|
|
80
80
|
metadata = {
|
|
81
81
|
"filename": file_name,
|
|
82
|
-
"method_name": node.name,
|
|
82
|
+
"method_name": node.name if node.name else 'unknown',
|
|
83
83
|
"language": programming_language.value,
|
|
84
84
|
}
|
|
85
85
|
commit_hash = data.get("commit_hash")
|
|
@@ -9,7 +9,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
|
|
|
9
9
|
for doc in file_content_generator:
|
|
10
10
|
try:
|
|
11
11
|
data_dict = json.loads(doc.page_content)
|
|
12
|
-
chunks = RecursiveJsonSplitter(max_chunk_size=max_tokens).split_json(json_data=data_dict)
|
|
12
|
+
chunks = RecursiveJsonSplitter(max_chunk_size=max_tokens).split_json(json_data=data_dict, convert_lists=True)
|
|
13
13
|
if len(chunks) == 1:
|
|
14
14
|
yield doc
|
|
15
15
|
continue
|
|
@@ -17,6 +17,7 @@ def json_chunker(file_content_generator: Generator[Document, None, None], config
|
|
|
17
17
|
for chunk in chunks:
|
|
18
18
|
metadata = doc.metadata.copy()
|
|
19
19
|
metadata['chunk_id'] = chunk_id
|
|
20
|
+
metadata['method_name'] = 'json'
|
|
20
21
|
chunk_id += 1
|
|
21
22
|
yield Document(page_content=json.dumps(chunk), metadata=metadata)
|
|
22
23
|
except Exception as e:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Generator
|
|
1
|
+
from typing import Generator, List
|
|
2
2
|
from langchain_core.documents import Document
|
|
3
3
|
from langchain_text_splitters import MarkdownHeaderTextSplitter, ExperimentalMarkdownSyntaxTextSplitter
|
|
4
4
|
from langchain.text_splitter import TokenTextSplitter
|
|
@@ -7,34 +7,60 @@ from copy import deepcopy as copy
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def markdown_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
|
|
10
|
+
"""
|
|
11
|
+
Chunks markdown documents by headers, with support for:
|
|
12
|
+
- Minimum chunk size to avoid tiny fragments
|
|
13
|
+
- Maximum token limit with overflow splitting
|
|
14
|
+
- Header metadata preservation
|
|
15
|
+
|
|
16
|
+
Config options:
|
|
17
|
+
strip_header (bool): Remove headers from content. Default: False
|
|
18
|
+
return_each_line (bool): Split on every line. Default: False
|
|
19
|
+
headers_to_split_on (list): Headers to split on, e.g. [('#', 'H1'), ('##', 'H2')]
|
|
20
|
+
max_tokens (int): Maximum tokens per chunk. Default: 512
|
|
21
|
+
token_overlap (int): Token overlap for large chunk splitting. Default: 10
|
|
22
|
+
min_chunk_chars (int): Minimum characters per chunk. Default: 100
|
|
23
|
+
Chunks smaller than this will be merged with the next chunk.
|
|
24
|
+
"""
|
|
10
25
|
strip_header = config.get("strip_header", False)
|
|
11
26
|
return_each_line = config.get("return_each_line", False)
|
|
12
27
|
headers_to_split_on = config.get("headers_to_split_on", [])
|
|
13
28
|
max_tokens = config.get("max_tokens", 512)
|
|
14
29
|
tokens_overlapping = config.get("token_overlap", 10)
|
|
30
|
+
min_chunk_chars = config.get("min_chunk_chars", 100) # Minimum characters per chunk
|
|
31
|
+
|
|
15
32
|
headers_to_split_on = [tuple(header) for header in headers_to_split_on]
|
|
33
|
+
|
|
16
34
|
for doc in file_content_generator:
|
|
17
35
|
doc_metadata = doc.metadata
|
|
18
36
|
doc_content = doc.page_content
|
|
19
37
|
chunk_id = 0
|
|
38
|
+
|
|
20
39
|
markdown_splitter = MarkdownHeaderTextSplitter(
|
|
21
40
|
headers_to_split_on=headers_to_split_on,
|
|
22
41
|
strip_headers=strip_header,
|
|
23
42
|
return_each_line=return_each_line
|
|
24
43
|
)
|
|
25
44
|
md_header_splits = markdown_splitter.split_text(doc_content)
|
|
26
|
-
|
|
45
|
+
|
|
46
|
+
# Merge small chunks with the next one
|
|
47
|
+
merged_chunks = _merge_small_chunks(md_header_splits, min_chunk_chars)
|
|
48
|
+
|
|
49
|
+
for chunk in merged_chunks:
|
|
27
50
|
if tiktoken_length(chunk.page_content) > max_tokens:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
51
|
+
# Split large chunks into smaller ones
|
|
52
|
+
for subchunk in TokenTextSplitter(
|
|
53
|
+
encoding_name="cl100k_base",
|
|
54
|
+
chunk_size=max_tokens,
|
|
55
|
+
chunk_overlap=tokens_overlapping
|
|
56
|
+
).split_text(chunk.page_content):
|
|
32
57
|
chunk_id += 1
|
|
33
58
|
headers_meta = list(chunk.metadata.values())
|
|
34
59
|
docmeta = copy(doc_metadata)
|
|
35
60
|
docmeta.update({"headers": "; ".join(headers_meta)})
|
|
36
61
|
docmeta['chunk_id'] = chunk_id
|
|
37
62
|
docmeta['chunk_type'] = "document"
|
|
63
|
+
docmeta['method_name'] = 'markdown'
|
|
38
64
|
yield Document(
|
|
39
65
|
page_content=subchunk,
|
|
40
66
|
metadata=docmeta
|
|
@@ -46,12 +72,77 @@ def markdown_chunker(file_content_generator: Generator[Document, None, None], co
|
|
|
46
72
|
docmeta.update({"headers": "; ".join(headers_meta)})
|
|
47
73
|
docmeta['chunk_id'] = chunk_id
|
|
48
74
|
docmeta['chunk_type'] = "document"
|
|
75
|
+
docmeta['method_name'] = 'text'
|
|
49
76
|
yield Document(
|
|
50
77
|
page_content=chunk.page_content,
|
|
51
78
|
metadata=docmeta
|
|
52
79
|
)
|
|
53
80
|
|
|
54
81
|
|
|
82
|
+
def _merge_small_chunks(chunks: List[Document], min_chars: int) -> List[Document]:
|
|
83
|
+
"""
|
|
84
|
+
Merge chunks that are smaller than min_chars with the next chunk.
|
|
85
|
+
|
|
86
|
+
This prevents tiny fragments (like standalone headers or short notes)
|
|
87
|
+
from becoming separate chunks.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
chunks: List of Document chunks from markdown splitter
|
|
91
|
+
min_chars: Minimum character count for a chunk
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List of merged Document chunks
|
|
95
|
+
"""
|
|
96
|
+
if not chunks:
|
|
97
|
+
return chunks
|
|
98
|
+
|
|
99
|
+
merged = []
|
|
100
|
+
pending_content = ""
|
|
101
|
+
pending_metadata = {}
|
|
102
|
+
|
|
103
|
+
for i, chunk in enumerate(chunks):
|
|
104
|
+
content = chunk.page_content.strip()
|
|
105
|
+
|
|
106
|
+
if pending_content:
|
|
107
|
+
# Merge pending content with current chunk
|
|
108
|
+
combined_content = pending_content + "\n\n" + content
|
|
109
|
+
# Use the pending metadata (from the header) but can be extended
|
|
110
|
+
combined_metadata = {**pending_metadata}
|
|
111
|
+
# Add any new header info from current chunk
|
|
112
|
+
for key, value in chunk.metadata.items():
|
|
113
|
+
if key not in combined_metadata or not combined_metadata[key]:
|
|
114
|
+
combined_metadata[key] = value
|
|
115
|
+
|
|
116
|
+
if len(combined_content) >= min_chars:
|
|
117
|
+
# Combined is big enough, emit it
|
|
118
|
+
merged.append(Document(
|
|
119
|
+
page_content=combined_content,
|
|
120
|
+
metadata=combined_metadata
|
|
121
|
+
))
|
|
122
|
+
pending_content = ""
|
|
123
|
+
pending_metadata = {}
|
|
124
|
+
else:
|
|
125
|
+
# Still too small, keep accumulating
|
|
126
|
+
pending_content = combined_content
|
|
127
|
+
pending_metadata = combined_metadata
|
|
128
|
+
elif len(content) < min_chars:
|
|
129
|
+
# Current chunk is too small, start pending
|
|
130
|
+
pending_content = content
|
|
131
|
+
pending_metadata = dict(chunk.metadata)
|
|
132
|
+
else:
|
|
133
|
+
# Current chunk is big enough
|
|
134
|
+
merged.append(chunk)
|
|
135
|
+
|
|
136
|
+
# Don't forget any remaining pending content
|
|
137
|
+
if pending_content:
|
|
138
|
+
merged.append(Document(
|
|
139
|
+
page_content=pending_content,
|
|
140
|
+
metadata=pending_metadata
|
|
141
|
+
))
|
|
142
|
+
|
|
143
|
+
return merged
|
|
144
|
+
|
|
145
|
+
|
|
55
146
|
def markdown_by_headers_chunker(file_content_generator: Generator[Document, None, None], config: dict, *args, **kwargs) -> Generator[Document, None, None]:
|
|
56
147
|
strip_header = config.get("strip_header", False)
|
|
57
148
|
return_each_line = config.get("return_each_line", False)
|
|
@@ -6,7 +6,7 @@ from langchain_core.prompts import ChatPromptTemplate
|
|
|
6
6
|
from langchain.text_splitter import TokenTextSplitter
|
|
7
7
|
|
|
8
8
|
from typing import Optional, List
|
|
9
|
-
from
|
|
9
|
+
from pydantic import BaseModel
|
|
10
10
|
from ..utils import tiktoken_length
|
|
11
11
|
|
|
12
12
|
logger = getLogger(__name__)
|