alita-sdk 0.3.379__py3-none-any.whl → 0.3.627__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alita_sdk/cli/__init__.py +10 -0
- alita_sdk/cli/__main__.py +17 -0
- alita_sdk/cli/agent/__init__.py +5 -0
- alita_sdk/cli/agent/default.py +258 -0
- alita_sdk/cli/agent_executor.py +156 -0
- alita_sdk/cli/agent_loader.py +245 -0
- alita_sdk/cli/agent_ui.py +228 -0
- alita_sdk/cli/agents.py +3113 -0
- alita_sdk/cli/callbacks.py +647 -0
- alita_sdk/cli/cli.py +168 -0
- alita_sdk/cli/config.py +306 -0
- alita_sdk/cli/context/__init__.py +30 -0
- alita_sdk/cli/context/cleanup.py +198 -0
- alita_sdk/cli/context/manager.py +731 -0
- alita_sdk/cli/context/message.py +285 -0
- alita_sdk/cli/context/strategies.py +289 -0
- alita_sdk/cli/context/token_estimation.py +127 -0
- alita_sdk/cli/formatting.py +182 -0
- alita_sdk/cli/input_handler.py +419 -0
- alita_sdk/cli/inventory.py +1073 -0
- alita_sdk/cli/mcp_loader.py +315 -0
- alita_sdk/cli/testcases/__init__.py +94 -0
- alita_sdk/cli/testcases/data_generation.py +119 -0
- alita_sdk/cli/testcases/discovery.py +96 -0
- alita_sdk/cli/testcases/executor.py +84 -0
- alita_sdk/cli/testcases/logger.py +85 -0
- alita_sdk/cli/testcases/parser.py +172 -0
- alita_sdk/cli/testcases/prompts.py +91 -0
- alita_sdk/cli/testcases/reporting.py +125 -0
- alita_sdk/cli/testcases/setup.py +108 -0
- alita_sdk/cli/testcases/test_runner.py +282 -0
- alita_sdk/cli/testcases/utils.py +39 -0
- alita_sdk/cli/testcases/validation.py +90 -0
- alita_sdk/cli/testcases/workflow.py +196 -0
- alita_sdk/cli/toolkit.py +327 -0
- alita_sdk/cli/toolkit_loader.py +85 -0
- alita_sdk/cli/tools/__init__.py +43 -0
- alita_sdk/cli/tools/approval.py +224 -0
- alita_sdk/cli/tools/filesystem.py +1751 -0
- alita_sdk/cli/tools/planning.py +389 -0
- alita_sdk/cli/tools/terminal.py +414 -0
- alita_sdk/community/__init__.py +72 -12
- alita_sdk/community/inventory/__init__.py +236 -0
- alita_sdk/community/inventory/config.py +257 -0
- alita_sdk/community/inventory/enrichment.py +2137 -0
- alita_sdk/community/inventory/extractors.py +1469 -0
- alita_sdk/community/inventory/ingestion.py +3172 -0
- alita_sdk/community/inventory/knowledge_graph.py +1457 -0
- alita_sdk/community/inventory/parsers/__init__.py +218 -0
- alita_sdk/community/inventory/parsers/base.py +295 -0
- alita_sdk/community/inventory/parsers/csharp_parser.py +907 -0
- alita_sdk/community/inventory/parsers/go_parser.py +851 -0
- alita_sdk/community/inventory/parsers/html_parser.py +389 -0
- alita_sdk/community/inventory/parsers/java_parser.py +593 -0
- alita_sdk/community/inventory/parsers/javascript_parser.py +629 -0
- alita_sdk/community/inventory/parsers/kotlin_parser.py +768 -0
- alita_sdk/community/inventory/parsers/markdown_parser.py +362 -0
- alita_sdk/community/inventory/parsers/python_parser.py +604 -0
- alita_sdk/community/inventory/parsers/rust_parser.py +858 -0
- alita_sdk/community/inventory/parsers/swift_parser.py +832 -0
- alita_sdk/community/inventory/parsers/text_parser.py +322 -0
- alita_sdk/community/inventory/parsers/yaml_parser.py +370 -0
- alita_sdk/community/inventory/patterns/__init__.py +61 -0
- alita_sdk/community/inventory/patterns/ast_adapter.py +380 -0
- alita_sdk/community/inventory/patterns/loader.py +348 -0
- alita_sdk/community/inventory/patterns/registry.py +198 -0
- alita_sdk/community/inventory/presets.py +535 -0
- alita_sdk/community/inventory/retrieval.py +1403 -0
- alita_sdk/community/inventory/toolkit.py +173 -0
- alita_sdk/community/inventory/toolkit_utils.py +176 -0
- alita_sdk/community/inventory/visualize.py +1370 -0
- alita_sdk/configurations/__init__.py +1 -1
- alita_sdk/configurations/ado.py +141 -20
- alita_sdk/configurations/bitbucket.py +94 -2
- alita_sdk/configurations/confluence.py +130 -1
- alita_sdk/configurations/figma.py +76 -0
- alita_sdk/configurations/gitlab.py +91 -0
- alita_sdk/configurations/jira.py +103 -0
- alita_sdk/configurations/openapi.py +329 -0
- alita_sdk/configurations/qtest.py +72 -1
- alita_sdk/configurations/report_portal.py +96 -0
- alita_sdk/configurations/sharepoint.py +148 -0
- alita_sdk/configurations/testio.py +83 -0
- alita_sdk/configurations/testrail.py +88 -0
- alita_sdk/configurations/xray.py +93 -0
- alita_sdk/configurations/zephyr_enterprise.py +93 -0
- alita_sdk/configurations/zephyr_essential.py +75 -0
- alita_sdk/runtime/clients/artifact.py +3 -3
- alita_sdk/runtime/clients/client.py +388 -46
- alita_sdk/runtime/clients/mcp_discovery.py +342 -0
- alita_sdk/runtime/clients/mcp_manager.py +262 -0
- alita_sdk/runtime/clients/sandbox_client.py +8 -21
- alita_sdk/runtime/langchain/_constants_bkup.py +1318 -0
- alita_sdk/runtime/langchain/assistant.py +157 -39
- alita_sdk/runtime/langchain/constants.py +647 -1
- alita_sdk/runtime/langchain/document_loaders/AlitaDocxMammothLoader.py +315 -3
- alita_sdk/runtime/langchain/document_loaders/AlitaExcelLoader.py +103 -60
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLinesLoader.py +77 -0
- alita_sdk/runtime/langchain/document_loaders/AlitaJSONLoader.py +10 -4
- alita_sdk/runtime/langchain/document_loaders/AlitaPowerPointLoader.py +226 -7
- alita_sdk/runtime/langchain/document_loaders/AlitaTextLoader.py +5 -2
- alita_sdk/runtime/langchain/document_loaders/constants.py +40 -19
- alita_sdk/runtime/langchain/langraph_agent.py +405 -84
- alita_sdk/runtime/langchain/utils.py +106 -7
- alita_sdk/runtime/llms/preloaded.py +2 -6
- alita_sdk/runtime/models/mcp_models.py +61 -0
- alita_sdk/runtime/skills/__init__.py +91 -0
- alita_sdk/runtime/skills/callbacks.py +498 -0
- alita_sdk/runtime/skills/discovery.py +540 -0
- alita_sdk/runtime/skills/executor.py +610 -0
- alita_sdk/runtime/skills/input_builder.py +371 -0
- alita_sdk/runtime/skills/models.py +330 -0
- alita_sdk/runtime/skills/registry.py +355 -0
- alita_sdk/runtime/skills/skill_runner.py +330 -0
- alita_sdk/runtime/toolkits/__init__.py +31 -0
- alita_sdk/runtime/toolkits/application.py +29 -10
- alita_sdk/runtime/toolkits/artifact.py +20 -11
- alita_sdk/runtime/toolkits/datasource.py +13 -6
- alita_sdk/runtime/toolkits/mcp.py +783 -0
- alita_sdk/runtime/toolkits/mcp_config.py +1048 -0
- alita_sdk/runtime/toolkits/planning.py +178 -0
- alita_sdk/runtime/toolkits/skill_router.py +238 -0
- alita_sdk/runtime/toolkits/subgraph.py +251 -6
- alita_sdk/runtime/toolkits/tools.py +356 -69
- alita_sdk/runtime/toolkits/vectorstore.py +11 -5
- alita_sdk/runtime/tools/__init__.py +10 -3
- alita_sdk/runtime/tools/application.py +27 -6
- alita_sdk/runtime/tools/artifact.py +511 -28
- alita_sdk/runtime/tools/data_analysis.py +183 -0
- alita_sdk/runtime/tools/function.py +67 -35
- alita_sdk/runtime/tools/graph.py +10 -4
- alita_sdk/runtime/tools/image_generation.py +148 -46
- alita_sdk/runtime/tools/llm.py +1003 -128
- alita_sdk/runtime/tools/loop.py +3 -1
- alita_sdk/runtime/tools/loop_output.py +3 -1
- alita_sdk/runtime/tools/mcp_inspect_tool.py +284 -0
- alita_sdk/runtime/tools/mcp_remote_tool.py +181 -0
- alita_sdk/runtime/tools/mcp_server_tool.py +8 -5
- alita_sdk/runtime/tools/planning/__init__.py +36 -0
- alita_sdk/runtime/tools/planning/models.py +246 -0
- alita_sdk/runtime/tools/planning/wrapper.py +607 -0
- alita_sdk/runtime/tools/router.py +2 -4
- alita_sdk/runtime/tools/sandbox.py +65 -48
- alita_sdk/runtime/tools/skill_router.py +776 -0
- alita_sdk/runtime/tools/tool.py +3 -1
- alita_sdk/runtime/tools/vectorstore.py +9 -3
- alita_sdk/runtime/tools/vectorstore_base.py +70 -14
- alita_sdk/runtime/utils/AlitaCallback.py +137 -21
- alita_sdk/runtime/utils/constants.py +5 -1
- alita_sdk/runtime/utils/mcp_client.py +492 -0
- alita_sdk/runtime/utils/mcp_oauth.py +361 -0
- alita_sdk/runtime/utils/mcp_sse_client.py +434 -0
- alita_sdk/runtime/utils/mcp_tools_discovery.py +124 -0
- alita_sdk/runtime/utils/serialization.py +155 -0
- alita_sdk/runtime/utils/streamlit.py +40 -13
- alita_sdk/runtime/utils/toolkit_utils.py +30 -9
- alita_sdk/runtime/utils/utils.py +36 -0
- alita_sdk/tools/__init__.py +134 -35
- alita_sdk/tools/ado/repos/__init__.py +51 -32
- alita_sdk/tools/ado/repos/repos_wrapper.py +148 -89
- alita_sdk/tools/ado/test_plan/__init__.py +25 -9
- alita_sdk/tools/ado/test_plan/test_plan_wrapper.py +23 -1
- alita_sdk/tools/ado/utils.py +1 -18
- alita_sdk/tools/ado/wiki/__init__.py +25 -12
- alita_sdk/tools/ado/wiki/ado_wrapper.py +291 -22
- alita_sdk/tools/ado/work_item/__init__.py +26 -13
- alita_sdk/tools/ado/work_item/ado_wrapper.py +73 -11
- alita_sdk/tools/advanced_jira_mining/__init__.py +11 -8
- alita_sdk/tools/aws/delta_lake/__init__.py +13 -9
- alita_sdk/tools/aws/delta_lake/tool.py +5 -1
- alita_sdk/tools/azure_ai/search/__init__.py +11 -8
- alita_sdk/tools/azure_ai/search/api_wrapper.py +1 -1
- alita_sdk/tools/base/tool.py +5 -1
- alita_sdk/tools/base_indexer_toolkit.py +271 -84
- alita_sdk/tools/bitbucket/__init__.py +17 -11
- alita_sdk/tools/bitbucket/api_wrapper.py +59 -11
- alita_sdk/tools/bitbucket/cloud_api_wrapper.py +49 -35
- alita_sdk/tools/browser/__init__.py +5 -4
- alita_sdk/tools/carrier/__init__.py +5 -6
- alita_sdk/tools/carrier/backend_reports_tool.py +6 -6
- alita_sdk/tools/carrier/run_ui_test_tool.py +6 -6
- alita_sdk/tools/carrier/ui_reports_tool.py +5 -5
- alita_sdk/tools/chunkers/__init__.py +3 -1
- alita_sdk/tools/chunkers/code/treesitter/treesitter.py +37 -13
- alita_sdk/tools/chunkers/sematic/json_chunker.py +1 -0
- alita_sdk/tools/chunkers/sematic/markdown_chunker.py +97 -6
- alita_sdk/tools/chunkers/sematic/proposal_chunker.py +1 -1
- alita_sdk/tools/chunkers/universal_chunker.py +270 -0
- alita_sdk/tools/cloud/aws/__init__.py +10 -7
- alita_sdk/tools/cloud/azure/__init__.py +10 -7
- alita_sdk/tools/cloud/gcp/__init__.py +10 -7
- alita_sdk/tools/cloud/k8s/__init__.py +10 -7
- alita_sdk/tools/code/linter/__init__.py +10 -8
- alita_sdk/tools/code/loaders/codesearcher.py +3 -2
- alita_sdk/tools/code/sonar/__init__.py +11 -8
- alita_sdk/tools/code_indexer_toolkit.py +82 -22
- alita_sdk/tools/confluence/__init__.py +22 -16
- alita_sdk/tools/confluence/api_wrapper.py +107 -30
- alita_sdk/tools/confluence/loader.py +14 -2
- alita_sdk/tools/custom_open_api/__init__.py +12 -5
- alita_sdk/tools/elastic/__init__.py +11 -8
- alita_sdk/tools/elitea_base.py +493 -30
- alita_sdk/tools/figma/__init__.py +58 -11
- alita_sdk/tools/figma/api_wrapper.py +1235 -143
- alita_sdk/tools/figma/figma_client.py +73 -0
- alita_sdk/tools/figma/toon_tools.py +2748 -0
- alita_sdk/tools/github/__init__.py +14 -15
- alita_sdk/tools/github/github_client.py +224 -100
- alita_sdk/tools/github/graphql_client_wrapper.py +119 -33
- alita_sdk/tools/github/schemas.py +14 -5
- alita_sdk/tools/github/tool.py +5 -1
- alita_sdk/tools/github/tool_prompts.py +9 -22
- alita_sdk/tools/gitlab/__init__.py +16 -11
- alita_sdk/tools/gitlab/api_wrapper.py +218 -48
- alita_sdk/tools/gitlab_org/__init__.py +10 -9
- alita_sdk/tools/gitlab_org/api_wrapper.py +63 -64
- alita_sdk/tools/google/bigquery/__init__.py +13 -12
- alita_sdk/tools/google/bigquery/tool.py +5 -1
- alita_sdk/tools/google_places/__init__.py +11 -8
- alita_sdk/tools/google_places/api_wrapper.py +1 -1
- alita_sdk/tools/jira/__init__.py +17 -10
- alita_sdk/tools/jira/api_wrapper.py +92 -41
- alita_sdk/tools/keycloak/__init__.py +11 -8
- alita_sdk/tools/localgit/__init__.py +9 -3
- alita_sdk/tools/localgit/local_git.py +62 -54
- alita_sdk/tools/localgit/tool.py +5 -1
- alita_sdk/tools/memory/__init__.py +12 -4
- alita_sdk/tools/non_code_indexer_toolkit.py +1 -0
- alita_sdk/tools/ocr/__init__.py +11 -8
- alita_sdk/tools/openapi/__init__.py +491 -106
- alita_sdk/tools/openapi/api_wrapper.py +1368 -0
- alita_sdk/tools/openapi/tool.py +20 -0
- alita_sdk/tools/pandas/__init__.py +20 -12
- alita_sdk/tools/pandas/api_wrapper.py +38 -25
- alita_sdk/tools/pandas/dataframe/generator/base.py +3 -1
- alita_sdk/tools/postman/__init__.py +10 -9
- alita_sdk/tools/pptx/__init__.py +11 -10
- alita_sdk/tools/pptx/pptx_wrapper.py +1 -1
- alita_sdk/tools/qtest/__init__.py +31 -11
- alita_sdk/tools/qtest/api_wrapper.py +2135 -86
- alita_sdk/tools/rally/__init__.py +10 -9
- alita_sdk/tools/rally/api_wrapper.py +1 -1
- alita_sdk/tools/report_portal/__init__.py +12 -8
- alita_sdk/tools/salesforce/__init__.py +10 -8
- alita_sdk/tools/servicenow/__init__.py +17 -15
- alita_sdk/tools/servicenow/api_wrapper.py +1 -1
- alita_sdk/tools/sharepoint/__init__.py +10 -7
- alita_sdk/tools/sharepoint/api_wrapper.py +129 -38
- alita_sdk/tools/sharepoint/authorization_helper.py +191 -1
- alita_sdk/tools/sharepoint/utils.py +8 -2
- alita_sdk/tools/slack/__init__.py +10 -7
- alita_sdk/tools/slack/api_wrapper.py +2 -2
- alita_sdk/tools/sql/__init__.py +12 -9
- alita_sdk/tools/testio/__init__.py +10 -7
- alita_sdk/tools/testrail/__init__.py +11 -10
- alita_sdk/tools/testrail/api_wrapper.py +1 -1
- alita_sdk/tools/utils/__init__.py +9 -4
- alita_sdk/tools/utils/content_parser.py +103 -18
- alita_sdk/tools/utils/text_operations.py +410 -0
- alita_sdk/tools/utils/tool_prompts.py +79 -0
- alita_sdk/tools/vector_adapters/VectorStoreAdapter.py +30 -13
- alita_sdk/tools/xray/__init__.py +13 -9
- alita_sdk/tools/yagmail/__init__.py +9 -3
- alita_sdk/tools/zephyr/__init__.py +10 -7
- alita_sdk/tools/zephyr_enterprise/__init__.py +11 -7
- alita_sdk/tools/zephyr_essential/__init__.py +10 -7
- alita_sdk/tools/zephyr_essential/api_wrapper.py +30 -13
- alita_sdk/tools/zephyr_essential/client.py +2 -2
- alita_sdk/tools/zephyr_scale/__init__.py +11 -8
- alita_sdk/tools/zephyr_scale/api_wrapper.py +2 -2
- alita_sdk/tools/zephyr_squad/__init__.py +10 -7
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/METADATA +154 -8
- alita_sdk-0.3.627.dist-info/RECORD +468 -0
- alita_sdk-0.3.627.dist-info/entry_points.txt +2 -0
- alita_sdk-0.3.379.dist-info/RECORD +0 -360
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/WHEEL +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/licenses/LICENSE +0 -0
- {alita_sdk-0.3.379.dist-info → alita_sdk-0.3.627.dist-info}/top_level.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import base64
|
|
1
2
|
import hashlib
|
|
2
3
|
import io
|
|
3
4
|
import json
|
|
@@ -13,13 +14,262 @@ from pydantic import create_model, Field, model_validator
|
|
|
13
14
|
|
|
14
15
|
from ...tools.non_code_indexer_toolkit import NonCodeIndexerToolkit
|
|
15
16
|
from ...tools.utils.available_tools_decorator import extend_with_parent_available_tools
|
|
16
|
-
from ...
|
|
17
|
+
from ...tools.elitea_base import extend_with_file_operations, BaseCodeToolApiWrapper
|
|
18
|
+
from ...runtime.utils.utils import IndexerKeywords, resolve_image_from_cache
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
20
22
|
bucket: str
|
|
21
23
|
artifact: Optional[Any] = None
|
|
22
|
-
|
|
24
|
+
|
|
25
|
+
# Override file operation methods to support bucket_name parameter
|
|
26
|
+
# (instead of importing from BaseCodeToolApiWrapper which uses 'branch')
|
|
27
|
+
|
|
28
|
+
def read_file_chunk(
|
|
29
|
+
self,
|
|
30
|
+
file_path: str,
|
|
31
|
+
start_line: int,
|
|
32
|
+
end_line: Optional[int] = None,
|
|
33
|
+
bucket_name: str = None
|
|
34
|
+
) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Read a specific range of lines from a file in an artifact bucket.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
file_path: Path to the file to read
|
|
40
|
+
start_line: Starting line number (1-indexed, inclusive)
|
|
41
|
+
end_line: Ending line number (1-indexed, inclusive). If None, reads to end.
|
|
42
|
+
bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
|
|
43
|
+
|
|
44
|
+
Returns:
|
|
45
|
+
File content for the specified line range
|
|
46
|
+
"""
|
|
47
|
+
from ...tools.utils.text_operations import apply_line_slice
|
|
48
|
+
|
|
49
|
+
# Calculate offset and limit from start_line and end_line
|
|
50
|
+
offset = start_line
|
|
51
|
+
limit = (end_line - start_line + 1) if end_line is not None else None
|
|
52
|
+
|
|
53
|
+
# Read the file with bucket_name support
|
|
54
|
+
content = self._read_file(file_path, branch=None, bucket_name=bucket_name, offset=offset, limit=limit)
|
|
55
|
+
|
|
56
|
+
# Apply client-side slicing if toolkit doesn't support partial reads
|
|
57
|
+
return apply_line_slice(content, offset=offset, limit=limit)
|
|
58
|
+
|
|
59
|
+
def read_multiple_files(
|
|
60
|
+
self,
|
|
61
|
+
file_paths: List[str],
|
|
62
|
+
bucket_name: str = None,
|
|
63
|
+
offset: Optional[int] = None,
|
|
64
|
+
limit: Optional[int] = None
|
|
65
|
+
) -> dict:
|
|
66
|
+
"""
|
|
67
|
+
Read multiple files in batch from an artifact bucket.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
file_paths: List of file paths to read
|
|
71
|
+
bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
|
|
72
|
+
offset: Starting line number for all files (1-indexed)
|
|
73
|
+
limit: Number of lines to read from offset for all files
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
Dict mapping file paths to their content
|
|
77
|
+
"""
|
|
78
|
+
from ...tools.utils.text_operations import apply_line_slice
|
|
79
|
+
|
|
80
|
+
results = {}
|
|
81
|
+
for path in file_paths:
|
|
82
|
+
try:
|
|
83
|
+
content = self._read_file(path, branch=None, bucket_name=bucket_name, offset=offset, limit=limit)
|
|
84
|
+
results[path] = apply_line_slice(content, offset=offset, limit=limit)
|
|
85
|
+
except Exception as e:
|
|
86
|
+
results[path] = f"Error reading file: {str(e)}"
|
|
87
|
+
return results
|
|
88
|
+
|
|
89
|
+
def search_file(
|
|
90
|
+
self,
|
|
91
|
+
file_path: str,
|
|
92
|
+
pattern: str,
|
|
93
|
+
bucket_name: str = None,
|
|
94
|
+
is_regex: bool = True,
|
|
95
|
+
context_lines: int = 2
|
|
96
|
+
) -> str:
|
|
97
|
+
"""
|
|
98
|
+
Search for a pattern in a file from an artifact bucket.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
file_path: Path to the file to search
|
|
102
|
+
pattern: Search pattern. Treated as regex by default unless is_regex=False.
|
|
103
|
+
bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
|
|
104
|
+
is_regex: Whether pattern is a regex. Default is True for flexible matching.
|
|
105
|
+
context_lines: Number of lines before/after match to include for context
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
Formatted string with match results and context
|
|
109
|
+
"""
|
|
110
|
+
from ...tools.utils.text_operations import search_in_content
|
|
111
|
+
|
|
112
|
+
content = self._read_file(file_path, branch=None, bucket_name=bucket_name)
|
|
113
|
+
matches = search_in_content(content, pattern, is_regex=is_regex, context_lines=context_lines)
|
|
114
|
+
|
|
115
|
+
if not matches:
|
|
116
|
+
return f"No matches found for pattern '{pattern}' in {file_path}"
|
|
117
|
+
|
|
118
|
+
# Format results
|
|
119
|
+
results = [f"Found {len(matches)} match(es) in {file_path}:\n"]
|
|
120
|
+
for match in matches:
|
|
121
|
+
results.append(f"\n--- Line {match['line_number']} ---")
|
|
122
|
+
if match['context_before']:
|
|
123
|
+
results.append("\n".join(f" {l}" for l in match['context_before']))
|
|
124
|
+
results.append(f"> {match['line_content']}")
|
|
125
|
+
if match['context_after']:
|
|
126
|
+
results.append("\n".join(f" {l}" for l in match['context_after']))
|
|
127
|
+
|
|
128
|
+
return "\n".join(results)
|
|
129
|
+
|
|
130
|
+
def edit_file(
|
|
131
|
+
self,
|
|
132
|
+
file_path: str,
|
|
133
|
+
file_query: str,
|
|
134
|
+
bucket_name: str = None,
|
|
135
|
+
commit_message: str = None
|
|
136
|
+
) -> str:
|
|
137
|
+
"""
|
|
138
|
+
Edit a file in an artifact bucket using OLD/NEW markers.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file_path: Path to the file to edit. Must be a text file.
|
|
142
|
+
file_query: Edit instructions with OLD/NEW markers.
|
|
143
|
+
bucket_name: Bucket name. If not provided, uses toolkit-configured default bucket.
|
|
144
|
+
commit_message: Not used for artifacts (kept for API consistency)
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
Success message or error description
|
|
148
|
+
"""
|
|
149
|
+
from ...tools.utils.text_operations import parse_old_new_markers, is_text_editable, try_apply_edit
|
|
150
|
+
from langchain_core.tools import ToolException
|
|
151
|
+
|
|
152
|
+
# Validate file type
|
|
153
|
+
if not is_text_editable(file_path):
|
|
154
|
+
raise ToolException(f"File '{file_path}' is not a text-editable file type")
|
|
155
|
+
|
|
156
|
+
# Read current content
|
|
157
|
+
content = self._read_file(file_path, branch=None, bucket_name=bucket_name)
|
|
158
|
+
|
|
159
|
+
# Parse edit instructions
|
|
160
|
+
edits = parse_old_new_markers(file_query)
|
|
161
|
+
if not edits:
|
|
162
|
+
raise ToolException("No valid OLD/NEW marker pairs found in edit instructions")
|
|
163
|
+
|
|
164
|
+
# Apply edits
|
|
165
|
+
updated_content = content
|
|
166
|
+
applied_count = 0
|
|
167
|
+
for old_text, new_text in edits:
|
|
168
|
+
updated_content, used_fallback = try_apply_edit(updated_content, old_text, new_text, file_path)
|
|
169
|
+
if updated_content != content or used_fallback:
|
|
170
|
+
applied_count += 1
|
|
171
|
+
content = updated_content
|
|
172
|
+
|
|
173
|
+
if applied_count == 0:
|
|
174
|
+
return f"No edits were applied to {file_path}. The OLD blocks may not match the file content."
|
|
175
|
+
|
|
176
|
+
# Write updated content
|
|
177
|
+
self._write_file(file_path, updated_content, branch=None, commit_message=commit_message, bucket_name=bucket_name)
|
|
178
|
+
|
|
179
|
+
return f"Successfully applied {applied_count} edit(s) to {file_path}"
|
|
180
|
+
|
|
181
|
+
def _get_file_operation_schemas(self):
|
|
182
|
+
"""
|
|
183
|
+
Returns custom schemas for file operations that use bucket_name instead of branch.
|
|
184
|
+
|
|
185
|
+
This method is called by the @extend_with_file_operations decorator to get
|
|
186
|
+
toolkit-specific schemas for file operation tools.
|
|
187
|
+
"""
|
|
188
|
+
# Artifact-specific schemas with bucket_name instead of branch
|
|
189
|
+
ArtifactReadFileChunkInput = create_model(
|
|
190
|
+
"ArtifactReadFileChunkInput",
|
|
191
|
+
file_path=(str, Field(description="Path to the file to read")),
|
|
192
|
+
bucket_name=(Optional[str], Field(
|
|
193
|
+
description="Bucket name. If not provided, uses toolkit-configured default bucket.",
|
|
194
|
+
default=None
|
|
195
|
+
)),
|
|
196
|
+
start_line=(int, Field(description="Starting line number (1-indexed, inclusive)", ge=1)),
|
|
197
|
+
end_line=(Optional[int], Field(
|
|
198
|
+
description="Ending line number (1-indexed, inclusive). If None, reads to end.",
|
|
199
|
+
default=None,
|
|
200
|
+
ge=1
|
|
201
|
+
)),
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
ArtifactReadMultipleFilesInput = create_model(
|
|
205
|
+
"ArtifactReadMultipleFilesInput",
|
|
206
|
+
file_paths=(List[str], Field(description="List of file paths to read", min_length=1)),
|
|
207
|
+
bucket_name=(Optional[str], Field(
|
|
208
|
+
description="Bucket name. If not provided, uses toolkit-configured default bucket.",
|
|
209
|
+
default=None
|
|
210
|
+
)),
|
|
211
|
+
offset=(Optional[int], Field(
|
|
212
|
+
description="Starting line number for all files (1-indexed)",
|
|
213
|
+
default=None,
|
|
214
|
+
ge=1
|
|
215
|
+
)),
|
|
216
|
+
limit=(Optional[int], Field(
|
|
217
|
+
description="Number of lines to read from offset for all files",
|
|
218
|
+
default=None,
|
|
219
|
+
ge=1
|
|
220
|
+
)),
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
ArtifactSearchFileInput = create_model(
|
|
224
|
+
"ArtifactSearchFileInput",
|
|
225
|
+
file_path=(str, Field(description="Path to the file to search")),
|
|
226
|
+
pattern=(str, Field(description="Search pattern. Treated as regex by default unless is_regex=False.")),
|
|
227
|
+
bucket_name=(Optional[str], Field(
|
|
228
|
+
description="Bucket name. If not provided, uses toolkit-configured default bucket.",
|
|
229
|
+
default=None
|
|
230
|
+
)),
|
|
231
|
+
is_regex=(bool, Field(
|
|
232
|
+
description="Whether pattern is a regex. Default is True for flexible matching.",
|
|
233
|
+
default=True
|
|
234
|
+
)),
|
|
235
|
+
context_lines=(int, Field(
|
|
236
|
+
description="Number of lines before/after match to include for context",
|
|
237
|
+
default=2,
|
|
238
|
+
ge=0
|
|
239
|
+
)),
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
ArtifactEditFileInput = create_model(
|
|
243
|
+
"ArtifactEditFileInput",
|
|
244
|
+
file_path=(str, Field(
|
|
245
|
+
description="Path to the file to edit. Must be a text file (markdown, txt, csv, json, xml, html, yaml, etc.)"
|
|
246
|
+
)),
|
|
247
|
+
file_query=(str, Field(description="""Edit instructions with OLD/NEW markers. Format:
|
|
248
|
+
OLD <<<<
|
|
249
|
+
old content to replace
|
|
250
|
+
>>>> OLD
|
|
251
|
+
NEW <<<<
|
|
252
|
+
new content
|
|
253
|
+
>>>> NEW
|
|
254
|
+
|
|
255
|
+
Multiple OLD/NEW pairs can be provided for multiple edits.""")),
|
|
256
|
+
bucket_name=(Optional[str], Field(
|
|
257
|
+
description="Bucket name. If not provided, uses toolkit-configured default bucket.",
|
|
258
|
+
default=None
|
|
259
|
+
)),
|
|
260
|
+
commit_message=(Optional[str], Field(
|
|
261
|
+
description="Not used for artifacts (kept for API consistency)",
|
|
262
|
+
default=None
|
|
263
|
+
)),
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
return {
|
|
267
|
+
"read_file_chunk": ArtifactReadFileChunkInput,
|
|
268
|
+
"read_multiple_files": ArtifactReadMultipleFilesInput,
|
|
269
|
+
"search_file": ArtifactSearchFileInput,
|
|
270
|
+
"edit_file": ArtifactEditFileInput,
|
|
271
|
+
}
|
|
272
|
+
|
|
23
273
|
@model_validator(mode='before')
|
|
24
274
|
@classmethod
|
|
25
275
|
def validate_toolkit(cls, values):
|
|
@@ -31,20 +281,46 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
31
281
|
return super().validate_toolkit(values)
|
|
32
282
|
|
|
33
283
|
def list_files(self, bucket_name = None, return_as_string = True):
|
|
34
|
-
|
|
284
|
+
"""List all files in the artifact bucket with API download links."""
|
|
285
|
+
result = self.artifact.list(bucket_name, return_as_string=False)
|
|
286
|
+
|
|
287
|
+
# Add API download link to each file
|
|
288
|
+
if isinstance(result, dict) and 'rows' in result:
|
|
289
|
+
bucket = bucket_name or self.bucket
|
|
290
|
+
|
|
291
|
+
# Get base_url and project_id from alita client
|
|
292
|
+
base_url = getattr(self.alita, 'base_url', '').rstrip('/')
|
|
293
|
+
project_id = getattr(self.alita, 'project_id', '')
|
|
294
|
+
|
|
295
|
+
for file_info in result['rows']:
|
|
296
|
+
if 'name' in file_info:
|
|
297
|
+
# Generate API download link
|
|
298
|
+
file_name = file_info['name']
|
|
299
|
+
file_info['link'] = f"{base_url}/api/v2/artifacts/artifact/default/{project_id}/{bucket}/{file_name}"
|
|
300
|
+
|
|
301
|
+
return str(result) if return_as_string else result
|
|
35
302
|
|
|
36
303
|
def create_file(self, filename: str, filedata: str, bucket_name = None):
|
|
37
|
-
|
|
304
|
+
# Sanitize filename to prevent regex errors during indexing
|
|
305
|
+
sanitized_filename, was_modified = self._sanitize_filename(filename)
|
|
306
|
+
if was_modified:
|
|
307
|
+
logging.warning(f"Filename sanitized: '{filename}' -> '{sanitized_filename}'")
|
|
308
|
+
|
|
309
|
+
# Auto-detect and extract base64 from image_url structures (from image_generation tool)
|
|
310
|
+
# Returns tuple: (processed_data, is_from_image_generation)
|
|
311
|
+
filedata, is_from_image_generation = self._extract_base64_if_needed(filedata)
|
|
312
|
+
|
|
313
|
+
if sanitized_filename.endswith(".xlsx"):
|
|
38
314
|
data = json.loads(filedata)
|
|
39
315
|
filedata = self.create_xlsx_filedata(data)
|
|
40
316
|
|
|
41
|
-
result = self.artifact.create(
|
|
317
|
+
result = self.artifact.create(sanitized_filename, filedata, bucket_name)
|
|
42
318
|
|
|
43
|
-
#
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
{
|
|
319
|
+
# Skip file_modified event for images from image_generation tool
|
|
320
|
+
# These are already tracked in the tool output and don't need duplicate events
|
|
321
|
+
if not is_from_image_generation:
|
|
322
|
+
# Dispatch custom event for file creation
|
|
323
|
+
dispatch_custom_event("file_modified", {
|
|
48
324
|
"message": f"File '{filename}' created successfully",
|
|
49
325
|
"filename": filename,
|
|
50
326
|
"tool_name": "createFile",
|
|
@@ -53,9 +329,73 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
53
329
|
"meta": {
|
|
54
330
|
"bucket": bucket_name or self.bucket
|
|
55
331
|
}
|
|
56
|
-
}
|
|
332
|
+
})
|
|
57
333
|
|
|
58
334
|
return result
|
|
335
|
+
|
|
336
|
+
@staticmethod
|
|
337
|
+
def _sanitize_filename(filename: str) -> tuple:
|
|
338
|
+
"""Sanitize filename for safe storage and regex pattern matching."""
|
|
339
|
+
from pathlib import Path
|
|
340
|
+
|
|
341
|
+
if not filename or not filename.strip():
|
|
342
|
+
return "unnamed_file", True
|
|
343
|
+
|
|
344
|
+
original = filename
|
|
345
|
+
path_obj = Path(filename)
|
|
346
|
+
name = path_obj.stem
|
|
347
|
+
extension = path_obj.suffix
|
|
348
|
+
|
|
349
|
+
# Whitelist: alphanumeric, underscore, hyphen, space, Unicode letters/digits
|
|
350
|
+
sanitized_name = re.sub(r'[^\w\s-]', '', name, flags=re.UNICODE)
|
|
351
|
+
sanitized_name = re.sub(r'[-\s]+', '-', sanitized_name)
|
|
352
|
+
sanitized_name = sanitized_name.strip('-').strip()
|
|
353
|
+
|
|
354
|
+
if not sanitized_name:
|
|
355
|
+
sanitized_name = "file"
|
|
356
|
+
|
|
357
|
+
if extension:
|
|
358
|
+
extension = re.sub(r'[^\w.-]', '', extension, flags=re.UNICODE)
|
|
359
|
+
|
|
360
|
+
sanitized = sanitized_name + extension
|
|
361
|
+
return sanitized, (sanitized != original)
|
|
362
|
+
|
|
363
|
+
def _extract_base64_if_needed(self, filedata: str) -> tuple[str | bytes, bool]:
|
|
364
|
+
"""
|
|
365
|
+
Resolve cached_image_id references from cache and decode to binary data.
|
|
366
|
+
|
|
367
|
+
Requires JSON format with cached_image_id field: {"cached_image_id": "img_xxx"}
|
|
368
|
+
LLM must extract specific cached_image_id from generate_image response.
|
|
369
|
+
|
|
370
|
+
Returns:
|
|
371
|
+
tuple: (processed_data, is_from_image_generation)
|
|
372
|
+
- processed_data: Original filedata or resolved binary image data
|
|
373
|
+
- is_from_image_generation: True if data came from image_generation cache
|
|
374
|
+
"""
|
|
375
|
+
if not filedata or not isinstance(filedata, str):
|
|
376
|
+
return filedata, False
|
|
377
|
+
|
|
378
|
+
# Require JSON format - fail fast if not JSON
|
|
379
|
+
if '{' not in filedata:
|
|
380
|
+
return filedata, False
|
|
381
|
+
|
|
382
|
+
try:
|
|
383
|
+
data = json.loads(filedata)
|
|
384
|
+
except json.JSONDecodeError:
|
|
385
|
+
# Not valid JSON, return as-is (regular file content)
|
|
386
|
+
return filedata, False
|
|
387
|
+
|
|
388
|
+
if not isinstance(data, dict):
|
|
389
|
+
return filedata, False
|
|
390
|
+
|
|
391
|
+
# Only accept direct cached_image_id format: {"cached_image_id": "img_xxx"}
|
|
392
|
+
# LLM must parse generate_image response and extract specific cached_image_id
|
|
393
|
+
if 'cached_image_id' in data:
|
|
394
|
+
binary_data = resolve_image_from_cache(self.alita, data['cached_image_id'])
|
|
395
|
+
return binary_data, True # Mark as from image_generation
|
|
396
|
+
|
|
397
|
+
# If JSON doesn't have cached_image_id, treat as regular file content
|
|
398
|
+
return filedata, False
|
|
59
399
|
|
|
60
400
|
def create_xlsx_filedata(self, data: dict[str, list[list]]) -> bytes:
|
|
61
401
|
try:
|
|
@@ -99,9 +439,112 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
99
439
|
sheet_name=sheet_name,
|
|
100
440
|
excel_by_sheets=excel_by_sheets,
|
|
101
441
|
llm=self.llm)
|
|
442
|
+
|
|
443
|
+
def _read_file(
|
|
444
|
+
self,
|
|
445
|
+
file_path: str,
|
|
446
|
+
branch: str = None,
|
|
447
|
+
bucket_name: str = None,
|
|
448
|
+
**kwargs
|
|
449
|
+
) -> str:
|
|
450
|
+
"""
|
|
451
|
+
Read a file from artifact bucket with optional partial read support.
|
|
452
|
+
|
|
453
|
+
Parameters:
|
|
454
|
+
file_path: Name of the file in the bucket
|
|
455
|
+
branch: Not used for artifacts (kept for API consistency)
|
|
456
|
+
bucket_name: Name of the bucket (uses default if None)
|
|
457
|
+
**kwargs: Additional parameters (offset, limit, head, tail) - currently ignored,
|
|
458
|
+
partial read handled client-side by base class methods
|
|
459
|
+
|
|
460
|
+
Returns:
|
|
461
|
+
File content as string
|
|
462
|
+
"""
|
|
463
|
+
return self.read_file(filename=file_path, bucket_name=bucket_name)
|
|
464
|
+
|
|
465
|
+
def _write_file(
|
|
466
|
+
self,
|
|
467
|
+
file_path: str,
|
|
468
|
+
content: str,
|
|
469
|
+
branch: str = None,
|
|
470
|
+
commit_message: str = None,
|
|
471
|
+
bucket_name: str = None
|
|
472
|
+
) -> str:
|
|
473
|
+
"""
|
|
474
|
+
Write content to a file (create or overwrite).
|
|
475
|
+
|
|
476
|
+
Parameters:
|
|
477
|
+
file_path: Name of the file in the bucket
|
|
478
|
+
content: New file content
|
|
479
|
+
branch: Not used for artifacts (kept for API consistency)
|
|
480
|
+
commit_message: Not used for artifacts (kept for API consistency)
|
|
481
|
+
bucket_name: Name of the bucket (uses default if None)
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
Success message
|
|
485
|
+
"""
|
|
486
|
+
try:
|
|
487
|
+
# Sanitize filename
|
|
488
|
+
sanitized_filename, was_modified = self._sanitize_filename(file_path)
|
|
489
|
+
if was_modified:
|
|
490
|
+
logging.warning(f"Filename sanitized: '{file_path}' -> '{sanitized_filename}'")
|
|
491
|
+
|
|
492
|
+
# Check if file exists
|
|
493
|
+
try:
|
|
494
|
+
self.artifact.get(artifact_name=sanitized_filename, bucket_name=bucket_name, llm=self.llm)
|
|
495
|
+
# File exists, overwrite it
|
|
496
|
+
result = self.artifact.overwrite(sanitized_filename, content, bucket_name)
|
|
497
|
+
|
|
498
|
+
# Dispatch custom event
|
|
499
|
+
dispatch_custom_event("file_modified", {
|
|
500
|
+
"message": f"File '{sanitized_filename}' updated successfully",
|
|
501
|
+
"filename": sanitized_filename,
|
|
502
|
+
"tool_name": "edit_file",
|
|
503
|
+
"toolkit": "artifact",
|
|
504
|
+
"operation_type": "modify",
|
|
505
|
+
"meta": {
|
|
506
|
+
"bucket": bucket_name or self.bucket
|
|
507
|
+
}
|
|
508
|
+
})
|
|
509
|
+
|
|
510
|
+
return f"Updated file {sanitized_filename}"
|
|
511
|
+
except:
|
|
512
|
+
# File doesn't exist, create it
|
|
513
|
+
result = self.artifact.create(sanitized_filename, content, bucket_name)
|
|
514
|
+
|
|
515
|
+
# Dispatch custom event
|
|
516
|
+
dispatch_custom_event("file_modified", {
|
|
517
|
+
"message": f"File '{sanitized_filename}' created successfully",
|
|
518
|
+
"filename": sanitized_filename,
|
|
519
|
+
"tool_name": "edit_file",
|
|
520
|
+
"toolkit": "artifact",
|
|
521
|
+
"operation_type": "create",
|
|
522
|
+
"meta": {
|
|
523
|
+
"bucket": bucket_name or self.bucket
|
|
524
|
+
}
|
|
525
|
+
})
|
|
526
|
+
|
|
527
|
+
return f"Created file {sanitized_filename}"
|
|
528
|
+
except Exception as e:
|
|
529
|
+
raise ToolException(f"Unable to write file {file_path}: {str(e)}")
|
|
102
530
|
|
|
103
531
|
def delete_file(self, filename: str, bucket_name = None):
|
|
104
|
-
|
|
532
|
+
# Check if file exists before attempting deletion
|
|
533
|
+
# S3/MinIO delete is idempotent and won't fail for non-existing files
|
|
534
|
+
try:
|
|
535
|
+
files = self.list_files(bucket_name, return_as_string=False)
|
|
536
|
+
file_names = [f['name'] for f in files.get('rows', [])]
|
|
537
|
+
if filename not in file_names:
|
|
538
|
+
raise ToolException(f'Error (deleteFile): ENOENT: no such file or directory: \'{filename}\'')
|
|
539
|
+
except ToolException:
|
|
540
|
+
raise
|
|
541
|
+
except Exception as e:
|
|
542
|
+
raise ToolException(f'Error (deleteFile): Unable to verify file existence for \'{filename}\': {str(e)}')
|
|
543
|
+
|
|
544
|
+
result = self.artifact.delete(filename, bucket_name)
|
|
545
|
+
if result and isinstance(result, dict) and result.get('error'):
|
|
546
|
+
raise ToolException(f'Error (deleteFile): {result.get("error")} for file \'{filename}\'')
|
|
547
|
+
return f'File "{filename}" deleted successfully.'
|
|
105
548
|
|
|
106
549
|
def append_data(self, filename: str, filedata: str, bucket_name = None):
|
|
107
550
|
result = self.artifact.append(filename, filedata, bucket_name)
|
|
@@ -138,7 +581,11 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
138
581
|
return result
|
|
139
582
|
|
|
140
583
|
def create_new_bucket(self, bucket_name: str, expiration_measure = "weeks", expiration_value = 1):
|
|
141
|
-
|
|
584
|
+
# Sanitize bucket name: replace underscores with hyphens and ensure lowercase
|
|
585
|
+
sanitized_name = bucket_name.replace('_', '-').lower()
|
|
586
|
+
if sanitized_name != bucket_name:
|
|
587
|
+
logging.warning(f"Bucket name '{bucket_name}' was sanitized to '{sanitized_name}' (underscores replaced with hyphens, converted to lowercase)")
|
|
588
|
+
return self.artifact.client.create_bucket(sanitized_name, expiration_measure, expiration_value)
|
|
142
589
|
|
|
143
590
|
def _index_tool_params(self):
|
|
144
591
|
return {
|
|
@@ -160,6 +607,21 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
160
607
|
|
|
161
608
|
include_extensions = kwargs.get('include_extensions', [])
|
|
162
609
|
skip_extensions = kwargs.get('skip_extensions', [])
|
|
610
|
+
chunking_config = kwargs.get('chunking_config', {})
|
|
611
|
+
|
|
612
|
+
# Auto-include extensions from chunking_config if include_extensions is specified
|
|
613
|
+
# This allows chunking config to work without manually adding extensions to include_extensions
|
|
614
|
+
if chunking_config and include_extensions:
|
|
615
|
+
for ext_pattern in chunking_config.keys():
|
|
616
|
+
# Normalize extension pattern (both ".cbl" and "*.cbl" should work)
|
|
617
|
+
normalized = ext_pattern if ext_pattern.startswith('*') else f'*{ext_pattern}'
|
|
618
|
+
if normalized not in include_extensions:
|
|
619
|
+
include_extensions.append(normalized)
|
|
620
|
+
self._log_tool_event(
|
|
621
|
+
message=f"Auto-included extension '{normalized}' from chunking_config",
|
|
622
|
+
tool_name="loader"
|
|
623
|
+
)
|
|
624
|
+
|
|
163
625
|
self._log_tool_event(message=f"Files filtering started. Include extensions: {include_extensions}. "
|
|
164
626
|
f"Skip extensions: {skip_extensions}", tool_name="loader")
|
|
165
627
|
# show the progress of filtering
|
|
@@ -173,13 +635,13 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
173
635
|
file_name = file['name']
|
|
174
636
|
|
|
175
637
|
# Check if file should be skipped based on skip_extensions
|
|
176
|
-
if any(re.match(pattern.replace('
|
|
638
|
+
if any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
177
639
|
for pattern in skip_extensions):
|
|
178
640
|
continue
|
|
179
641
|
|
|
180
642
|
# Check if file should be included based on include_extensions
|
|
181
643
|
# If include_extensions is empty, process all files (that weren't skipped)
|
|
182
|
-
if include_extensions and not (any(re.match(pattern.replace('
|
|
644
|
+
if include_extensions and not (any(re.match(re.escape(pattern).replace(r'\*', '.*') + '$', file_name, re.IGNORECASE)
|
|
183
645
|
for pattern in include_extensions)):
|
|
184
646
|
continue
|
|
185
647
|
|
|
@@ -207,14 +669,17 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
207
669
|
logging.error(f"Failed while parsing the file '{document.metadata['name']}': {e}")
|
|
208
670
|
yield document
|
|
209
671
|
|
|
210
|
-
@
|
|
672
|
+
@extend_with_file_operations
|
|
211
673
|
def get_available_tools(self):
|
|
674
|
+
"""Get available tools. Returns all tools for schema; filtering happens at toolkit level."""
|
|
212
675
|
bucket_name = (Optional[str], Field(description="Name of the bucket to work with."
|
|
213
676
|
"If bucket is not specified by user directly, the name should be taken from chat history."
|
|
214
677
|
"If bucket never mentioned in chat, the name will be taken from tool configuration."
|
|
215
678
|
" ***IMPORTANT*** Underscore `_` is prohibited in bucket name and should be replaced by `-`",
|
|
216
679
|
default=None))
|
|
217
|
-
|
|
680
|
+
|
|
681
|
+
# Basic artifact tools (always available)
|
|
682
|
+
basic_tools = [
|
|
218
683
|
{
|
|
219
684
|
"ref": self.list_files,
|
|
220
685
|
"name": "listFiles",
|
|
@@ -229,15 +694,19 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
229
694
|
"createFile",
|
|
230
695
|
filename=(str, Field(description="Filename")),
|
|
231
696
|
filedata=(str, Field(description="""Stringified content of the file.
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
697
|
+
|
|
698
|
+
Supports three input formats:
|
|
699
|
+
|
|
700
|
+
1. CACHED IMAGE REFERENCE (for generated/cached images):
|
|
701
|
+
Pass JSON with cached_image_id field: {"cached_image_id": "img_xxx"}
|
|
702
|
+
The tool will automatically resolve and decode the image from cache.
|
|
703
|
+
This is typically used when another tool returns an image reference.
|
|
704
|
+
|
|
705
|
+
2. EXCEL FILES (.xlsx extension):
|
|
706
|
+
Pass JSON with sheet structure: {"Sheet1": [["Name", "Age"], ["Alice", 25], ["Bob", 30]]}
|
|
707
|
+
|
|
708
|
+
3. TEXT/OTHER FILES:
|
|
709
|
+
Pass the plain text string directly.
|
|
241
710
|
""")),
|
|
242
711
|
bucket_name=bucket_name
|
|
243
712
|
)
|
|
@@ -299,11 +768,25 @@ class ArtifactWrapper(NonCodeIndexerToolkit):
|
|
|
299
768
|
"description": "Creates new bucket specified by user.",
|
|
300
769
|
"args_schema": create_model(
|
|
301
770
|
"createNewBucket",
|
|
302
|
-
bucket_name=(str, Field(
|
|
771
|
+
bucket_name=(str, Field(
|
|
772
|
+
description="Bucket name to create. Must start with lowercase letter and contain only lowercase letters, numbers, and hyphens. Underscores will be automatically converted to hyphens.",
|
|
773
|
+
pattern=r'^[a-z][a-z0-9_-]*$' # Allow underscores in input, will be sanitized
|
|
774
|
+
)),
|
|
303
775
|
expiration_measure=(Optional[str], Field(description="Measure of expiration time for bucket configuration."
|
|
304
776
|
"Possible values: `days`, `weeks`, `months`, `years`.",
|
|
305
777
|
default="weeks")),
|
|
306
778
|
expiration_value=(Optional[int], Field(description="Expiration time values.", default=1))
|
|
307
779
|
)
|
|
308
780
|
}
|
|
309
|
-
]
|
|
781
|
+
]
|
|
782
|
+
|
|
783
|
+
# Always include indexing tools in available tools list
|
|
784
|
+
# Filtering based on vector store config happens at toolkit level via decorator
|
|
785
|
+
try:
|
|
786
|
+
# Get indexing tools from parent class
|
|
787
|
+
indexing_tools = super(ArtifactWrapper, self).get_available_tools()
|
|
788
|
+
return indexing_tools + basic_tools
|
|
789
|
+
except Exception as e:
|
|
790
|
+
# If getting parent tools fails, log warning and return basic tools only
|
|
791
|
+
logging.warning(f"Failed to load indexing tools: {e}. Only basic artifact tools will be available.")
|
|
792
|
+
return basic_tools
|