deepset-mcp 0.0.3rc1__py3-none-any.whl → 0.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. deepset_mcp/__init__.py +10 -0
  2. deepset_mcp/api/__init__.py +4 -0
  3. deepset_mcp/api/client.py +4 -0
  4. deepset_mcp/api/custom_components/__init__.py +4 -0
  5. deepset_mcp/api/custom_components/models.py +4 -0
  6. deepset_mcp/api/custom_components/protocols.py +4 -0
  7. deepset_mcp/api/custom_components/resource.py +4 -0
  8. deepset_mcp/api/exceptions.py +4 -0
  9. deepset_mcp/api/haystack_service/__init__.py +4 -0
  10. deepset_mcp/api/haystack_service/protocols.py +4 -0
  11. deepset_mcp/api/haystack_service/resource.py +4 -0
  12. deepset_mcp/api/indexes/__init__.py +4 -0
  13. deepset_mcp/api/indexes/models.py +4 -0
  14. deepset_mcp/api/indexes/protocols.py +4 -0
  15. deepset_mcp/api/indexes/resource.py +4 -0
  16. deepset_mcp/api/integrations/__init__.py +4 -0
  17. deepset_mcp/api/integrations/models.py +4 -0
  18. deepset_mcp/api/integrations/protocols.py +4 -0
  19. deepset_mcp/api/integrations/resource.py +4 -0
  20. deepset_mcp/api/pipeline/__init__.py +4 -0
  21. deepset_mcp/api/pipeline/log_level.py +4 -0
  22. deepset_mcp/api/pipeline/models.py +4 -0
  23. deepset_mcp/api/pipeline/protocols.py +8 -0
  24. deepset_mcp/api/pipeline/resource.py +4 -0
  25. deepset_mcp/api/pipeline_template/__init__.py +4 -0
  26. deepset_mcp/api/pipeline_template/models.py +4 -0
  27. deepset_mcp/api/pipeline_template/protocols.py +4 -0
  28. deepset_mcp/api/pipeline_template/resource.py +4 -0
  29. deepset_mcp/api/protocols.py +4 -0
  30. deepset_mcp/api/secrets/__init__.py +4 -0
  31. deepset_mcp/api/secrets/models.py +4 -0
  32. deepset_mcp/api/secrets/protocols.py +4 -0
  33. deepset_mcp/api/secrets/resource.py +4 -0
  34. deepset_mcp/api/shared_models.py +4 -0
  35. deepset_mcp/api/transport.py +4 -0
  36. deepset_mcp/api/user/__init__.py +4 -0
  37. deepset_mcp/api/user/protocols.py +4 -0
  38. deepset_mcp/api/user/resource.py +4 -0
  39. deepset_mcp/api/workspace/__init__.py +4 -0
  40. deepset_mcp/api/workspace/models.py +4 -0
  41. deepset_mcp/api/workspace/protocols.py +4 -0
  42. deepset_mcp/api/workspace/resource.py +4 -0
  43. deepset_mcp/config.py +8 -0
  44. deepset_mcp/initialize_embedding_model.py +4 -0
  45. deepset_mcp/main.py +8 -0
  46. deepset_mcp/store.py +4 -0
  47. deepset_mcp/tool_factory.py +11 -4
  48. deepset_mcp/tools/__init__.py +4 -0
  49. deepset_mcp/tools/custom_components.py +4 -0
  50. deepset_mcp/tools/doc_search.py +4 -0
  51. deepset_mcp/tools/haystack_service.py +4 -0
  52. deepset_mcp/tools/haystack_service_models.py +4 -0
  53. deepset_mcp/tools/indexes.py +4 -0
  54. deepset_mcp/tools/model_protocol.py +4 -0
  55. deepset_mcp/tools/pipeline.py +4 -0
  56. deepset_mcp/tools/pipeline_template.py +4 -0
  57. deepset_mcp/tools/secrets.py +4 -0
  58. deepset_mcp/tools/tokonomics/__init__.py +4 -0
  59. deepset_mcp/tools/tokonomics/decorators.py +4 -0
  60. deepset_mcp/tools/tokonomics/explorer.py +4 -0
  61. deepset_mcp/tools/tokonomics/object_store.py +4 -0
  62. deepset_mcp/tools/workspace.py +4 -0
  63. deepset_mcp-0.0.4rc1.dist-info/METADATA +761 -0
  64. deepset_mcp-0.0.4rc1.dist-info/RECORD +70 -0
  65. {deepset_mcp-0.0.3rc1.dist-info → deepset_mcp-0.0.4rc1.dist-info}/entry_points.txt +0 -1
  66. deepset_mcp-0.0.4rc1.dist-info/licenses/LICENSE +202 -0
  67. deepset_mcp/agents/__init__.py +0 -0
  68. deepset_mcp/agents/debugging/__init__.py +0 -0
  69. deepset_mcp/agents/debugging/debugging_agent.py +0 -37
  70. deepset_mcp/agents/debugging/system_prompt.md +0 -214
  71. deepset_mcp/agents/generalist/__init__.py +0 -0
  72. deepset_mcp/agents/generalist/generalist_agent.py +0 -38
  73. deepset_mcp/agents/generalist/system_prompt.md +0 -241
  74. deepset_mcp/benchmark/README.md +0 -425
  75. deepset_mcp/benchmark/__init__.py +0 -1
  76. deepset_mcp/benchmark/agent_configs/debugging_agent.yml +0 -10
  77. deepset_mcp/benchmark/agent_configs/generalist_agent.yml +0 -6
  78. deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
  79. deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +0 -757
  80. deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +0 -167
  81. deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +0 -213
  82. deepset_mcp/benchmark/runner/__init__.py +0 -0
  83. deepset_mcp/benchmark/runner/agent_benchmark_runner.py +0 -561
  84. deepset_mcp/benchmark/runner/agent_loader.py +0 -110
  85. deepset_mcp/benchmark/runner/cli.py +0 -39
  86. deepset_mcp/benchmark/runner/cli_agent.py +0 -373
  87. deepset_mcp/benchmark/runner/cli_index.py +0 -71
  88. deepset_mcp/benchmark/runner/cli_pipeline.py +0 -73
  89. deepset_mcp/benchmark/runner/cli_tests.py +0 -226
  90. deepset_mcp/benchmark/runner/cli_utils.py +0 -61
  91. deepset_mcp/benchmark/runner/config.py +0 -73
  92. deepset_mcp/benchmark/runner/config_loader.py +0 -64
  93. deepset_mcp/benchmark/runner/interactive.py +0 -140
  94. deepset_mcp/benchmark/runner/models.py +0 -203
  95. deepset_mcp/benchmark/runner/repl.py +0 -67
  96. deepset_mcp/benchmark/runner/setup_actions.py +0 -238
  97. deepset_mcp/benchmark/runner/streaming.py +0 -360
  98. deepset_mcp/benchmark/runner/teardown_actions.py +0 -196
  99. deepset_mcp/benchmark/runner/tracing.py +0 -21
  100. deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +0 -16
  101. deepset_mcp/benchmark/tasks/documents_output_wrong.yml +0 -13
  102. deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +0 -11
  103. deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +0 -11
  104. deepset_mcp/benchmark/tasks/missing_output_mapping.yml +0 -14
  105. deepset_mcp/benchmark/tasks/no_query_input.yml +0 -13
  106. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +0 -141
  107. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +0 -141
  108. deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +0 -181
  109. deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +0 -189
  110. deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +0 -193
  111. deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +0 -191
  112. deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +0 -167
  113. deepset_mcp-0.0.3rc1.dist-info/METADATA +0 -289
  114. deepset_mcp-0.0.3rc1.dist-info/RECORD +0 -115
  115. {deepset_mcp-0.0.3rc1.dist-info → deepset_mcp-0.0.4rc1.dist-info}/WHEEL +0 -0
@@ -1,226 +0,0 @@
1
- import typer
2
-
3
- from deepset_mcp.benchmark.runner.config_loader import (
4
- find_all_test_case_paths,
5
- load_test_case_by_name,
6
- load_test_case_from_path,
7
- )
8
- from deepset_mcp.benchmark.runner.models import TestCaseConfig
9
- from deepset_mcp.benchmark.runner.setup_actions import setup_all, setup_test_case
10
- from deepset_mcp.benchmark.runner.teardown_actions import teardown_all, teardown_test_case
11
-
12
- tests_app = typer.Typer(help="Commands for setting up and tearing down test-cases.")
13
-
14
-
15
- @tests_app.command("list")
16
- def list_cases(
17
- test_dir: str | None = typer.Option(
18
- None,
19
- help="Directory where all test-case YAMLs live (`benchmark/tasks/*.yml`).",
20
- ),
21
- ) -> None:
22
- """List all available test cases stored under `test_dir`."""
23
- paths = find_all_test_case_paths(test_dir)
24
- if not paths:
25
- typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
26
- raise typer.Exit(code=1)
27
-
28
- for p in paths:
29
- typer.echo(f" • {p.stem}")
30
-
31
-
32
- @tests_app.command("setup")
33
- def create_case(
34
- test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
35
- workspace_name: str = typer.Option(
36
- "default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
37
- ),
38
- api_key: str | None = typer.Option(
39
- None,
40
- "--api-key",
41
- "-k",
42
- help="Explicit DP_API_KEY to use (overrides environment).",
43
- ),
44
- test_dir: str | None = typer.Option(
45
- None,
46
- help="Directory where test-case YAMLs are stored.",
47
- ),
48
- ) -> None:
49
- """Load a single test-case by name and create its pipeline + index (if any) on deepset."""
50
- try:
51
- test_cfg = load_test_case_by_name(name=test_name, task_dir=test_dir)
52
- except FileNotFoundError:
53
- typer.secho(f"Test-case '{test_name}' not found under {test_dir}.", fg=typer.colors.RED)
54
- raise typer.Exit(code=1)
55
- except Exception as e:
56
- typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
57
- raise typer.Exit(code=1)
58
-
59
- typer.secho(f"→ Creating resources for '{test_name}' in '{workspace_name}'…", fg=typer.colors.GREEN)
60
- try:
61
- setup_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
62
- except Exception as e:
63
- typer.secho(f"✘ Failed to set up '{test_name}': {e}", fg=typer.colors.RED)
64
- raise typer.Exit(code=1)
65
-
66
- typer.secho(f"✔ '{test_name}' ready.", fg=typer.colors.GREEN)
67
-
68
-
69
- @tests_app.command("setup-all")
70
- def create_all(
71
- workspace_name: str = typer.Option(
72
- "default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
73
- ),
74
- api_key: str | None = typer.Option(
75
- None,
76
- "--api-key",
77
- "-k",
78
- help="Explicit DP_API_KEY to use (overrides environment).",
79
- ),
80
- concurrency: int = typer.Option(
81
- 5,
82
- "--concurrency",
83
- "-c",
84
- help="Maximum number of test-cases to set up in parallel.",
85
- ),
86
- test_dir: str | None = typer.Option(
87
- None,
88
- help="Directory where test-case YAMLs are stored.",
89
- ),
90
- ) -> None:
91
- """Load every test-case under `task_dir` and create pipelines + indexes in `workspace_name` in parallel."""
92
- paths = find_all_test_case_paths(test_dir)
93
- if not paths:
94
- typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
95
- raise typer.Exit(code=1)
96
-
97
- # 1) Load all configs
98
- test_cfgs: list[TestCaseConfig] = []
99
- for p in paths:
100
- try:
101
- cfg = load_test_case_from_path(path=p)
102
- test_cfgs.append(cfg)
103
- except Exception as e:
104
- typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
105
-
106
- if not test_cfgs:
107
- typer.secho("No valid test-case configs to create.", fg=typer.colors.RED)
108
- raise typer.Exit(code=1)
109
-
110
- typer.secho(
111
- f"→ Creating {len(test_cfgs)} test-cases in '{workspace_name}' (concurrency={concurrency})…",
112
- fg=typer.colors.GREEN,
113
- )
114
- try:
115
- setup_all(
116
- test_cfgs=test_cfgs,
117
- workspace_name=workspace_name,
118
- api_key=api_key,
119
- concurrency=concurrency,
120
- )
121
- except Exception as e:
122
- typer.secho(f"✘ Some test-cases failed during creation: {e}", fg=typer.colors.RED)
123
- raise typer.Exit(code=1)
124
-
125
- typer.secho("✔ All test-cases attempted.", fg=typer.colors.GREEN)
126
-
127
-
128
- @tests_app.command("teardown")
129
- def delete_case(
130
- test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
131
- workspace_name: str = typer.Option(
132
- "default", "--workspace", "-w", help="Workspace from which to delete pipelines and indexes."
133
- ),
134
- api_key: str | None = typer.Option(
135
- None,
136
- "--api-key",
137
- "-k",
138
- help="Explicit DP_API_KEY to use (overrides environment).",
139
- ),
140
- test_dir: str | None = typer.Option(
141
- None,
142
- help="Directory where test-case YAMLs are stored.",
143
- ),
144
- ) -> None:
145
- """Teardown a single test-case by name and delete its pipeline + index (if any) from deepset."""
146
- try:
147
- test_cfg = load_test_case_by_name(name=test_name, task_dir=test_dir)
148
- except FileNotFoundError:
149
- typer.secho(f"Test-case '{test_name}' not found under {test_dir}.", fg=typer.colors.RED)
150
- raise typer.Exit(code=1)
151
- except Exception as e:
152
- typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
153
- raise typer.Exit(code=1)
154
-
155
- typer.secho(f"→ Deleting resources for '{test_name}' from '{workspace_name}'…", fg=typer.colors.GREEN)
156
- try:
157
- teardown_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
158
- except Exception as e:
159
- typer.secho(f"✘ Failed to teardown '{test_name}': {e}", fg=typer.colors.RED)
160
- raise typer.Exit(code=1)
161
-
162
- typer.secho(f"✔ '{test_name}' resources deleted.", fg=typer.colors.GREEN)
163
-
164
-
165
- @tests_app.command("teardown-all")
166
- def delete_all(
167
- workspace_name: str = typer.Option(
168
- "default", "--workspace", "-w", help="Workspace from which to delete pipelines and indexes."
169
- ),
170
- api_key: str | None = typer.Option(
171
- None,
172
- "--api-key",
173
- "-k",
174
- help="Explicit DP_API_KEY to use (overrides environment).",
175
- ),
176
- concurrency: int = typer.Option(
177
- 5,
178
- "--concurrency",
179
- "-c",
180
- help="Maximum number of test-cases to teardown in parallel.",
181
- ),
182
- test_dir: str | None = typer.Option(
183
- None,
184
- help="Directory where test-case YAMLs are stored.",
185
- ),
186
- ) -> None:
187
- """Teardown every test-case under `task_dir` and delete pipelines and indexes from deepset."""
188
- paths = find_all_test_case_paths(test_dir)
189
- if not paths:
190
- typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
191
- raise typer.Exit(code=1)
192
-
193
- # 1) Load all configs
194
- test_cfgs: list[TestCaseConfig] = []
195
- for p in paths:
196
- try:
197
- cfg = load_test_case_from_path(path=p)
198
- test_cfgs.append(cfg)
199
- except Exception as e:
200
- typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
201
-
202
- if not test_cfgs:
203
- typer.secho("No valid test-case configs to delete.", fg=typer.colors.RED)
204
- raise typer.Exit(code=1)
205
-
206
- typer.secho(
207
- f"→ Deleting {len(test_cfgs)} test-cases from '{workspace_name}' (concurrency={concurrency})…",
208
- fg=typer.colors.GREEN,
209
- )
210
- try:
211
- teardown_all(
212
- test_cfgs=test_cfgs,
213
- workspace_name=workspace_name,
214
- api_key=api_key,
215
- concurrency=concurrency,
216
- )
217
- except Exception as e:
218
- typer.secho(f"✘ Some test-cases failed during deletion: {e}", fg=typer.colors.RED)
219
- raise typer.Exit(code=1)
220
-
221
- typer.secho("✔ All test-cases teardown attempted.", fg=typer.colors.GREEN)
222
-
223
-
224
- def create_tests_app() -> typer.Typer:
225
- """Create the tests CLI app."""
226
- return tests_app
@@ -1,61 +0,0 @@
1
- import os
2
- from pathlib import Path
3
-
4
- import typer
5
- from pydantic import ValidationError
6
-
7
- from deepset_mcp.benchmark.runner.config import BenchmarkConfig
8
- from deepset_mcp.benchmark.runner.models import AgentConfig
9
-
10
-
11
- def override_deepset_env_vars(api_key: str | None, workspace: str | None) -> None:
12
- """Overrides deepset-specific environment variables."""
13
- if api_key is not None:
14
- os.environ["DEEPSET_API_KEY"] = api_key
15
-
16
- if workspace is not None:
17
- os.environ["DEEPSET_WORKSPACE"] = workspace
18
-
19
-
20
- def validate_and_setup_configs(
21
- agent_config: str, test_case_base_dir: str | None, output_dir: str | None
22
- ) -> tuple[AgentConfig, BenchmarkConfig]:
23
- """Validate and setup configurations."""
24
- # Validate agent config path
25
- agent_config_path = Path(agent_config)
26
- if not agent_config_path.exists():
27
- typer.secho(f"Agent config file not found: {agent_config}", fg=typer.colors.RED)
28
- raise typer.Exit(code=1)
29
-
30
- test_case_base_path = None
31
- if test_case_base_dir is not None:
32
- test_case_base_path = Path(test_case_base_dir)
33
- if not test_case_base_path.exists():
34
- typer.secho(f"Test case base directory not found: {test_case_base_dir}", fg=typer.colors.RED)
35
- raise typer.Exit(code=1)
36
-
37
- benchmark_kwargs = {}
38
- if test_case_base_path is not None:
39
- benchmark_kwargs["test_case_base_dir"] = test_case_base_path
40
-
41
- if output_dir is not None:
42
- benchmark_kwargs["output_dir"] = Path(output_dir)
43
-
44
- # Load and validate configurations
45
- try:
46
- benchmark_config = BenchmarkConfig(**benchmark_kwargs) # type: ignore
47
- except ValidationError as e:
48
- typer.secho("Configuration error:", fg=typer.colors.RED)
49
- for error in e.errors():
50
- field = " -> ".join(str(loc) for loc in error["loc"])
51
- typer.secho(f" {field}: {error['msg']}", fg=typer.colors.RED)
52
- typer.secho("\nPlease ensure all required environment variables are set", fg=typer.colors.YELLOW)
53
- raise typer.Exit(code=1)
54
-
55
- try:
56
- agent_cfg = AgentConfig.from_file(agent_config_path)
57
- except Exception as e:
58
- typer.secho(f"Invalid agent config: {e}", fg=typer.colors.RED)
59
- raise typer.Exit(code=1)
60
-
61
- return agent_cfg, benchmark_config
@@ -1,73 +0,0 @@
1
- import os
2
- from pathlib import Path
3
- from typing import Any
4
-
5
- from pydantic import Field, field_validator
6
- from pydantic_settings import BaseSettings, SettingsConfigDict
7
-
8
-
9
- class BenchmarkConfig(BaseSettings):
10
- """Core configuration for the benchmark runner."""
11
-
12
- model_config = SettingsConfigDict(
13
- env_file=".env",
14
- env_file_encoding="utf-8",
15
- case_sensitive=False,
16
- extra="ignore", # Ignore extra env vars
17
- )
18
-
19
- deepset_workspace: str = Field(default_factory=lambda: os.environ.get("DEEPSET_WORKSPACE", ""))
20
- deepset_api_key: str = Field(default_factory=lambda: os.environ.get("DEEPSET_API_KEY", ""))
21
-
22
- # Optional fields with defaults
23
- output_dir: Path = Field(default_factory=Path.cwd)
24
- test_case_base_dir: Path = Field(default_factory=lambda: Path(__file__).parent.parent / "tasks")
25
-
26
- # Store all other available env vars
27
- additional_env_vars: dict[str, str] = Field(default_factory=dict)
28
-
29
- @field_validator("deepset_workspace", "deepset_api_key")
30
- @classmethod
31
- def validate_not_empty(cls, v: str) -> str:
32
- """Validate that the env var is not empty."""
33
- if not v or not v.strip():
34
- raise ValueError("DEEPSET_WORKSPACE or DEEPSET_API_KEY is empty.")
35
- return v
36
-
37
- def model_post_init(self, __context: Any) -> None:
38
- """After initialization, collect all available env vars."""
39
- # Collect all env vars that might be useful (excluding system ones)
40
- ignore_prefixes = ("PATH", "HOME", "USER", "SHELL", "TERM", "PWD", "LC_")
41
-
42
- for key, value in os.environ.items():
43
- # Skip system variables and already captured ones
44
- if (
45
- not any(key.startswith(prefix) for prefix in ignore_prefixes)
46
- and key not in ("DEEPSET_WORKSPACE", "DEEPSET_API_KEY")
47
- and value
48
- ): # Only include non-empty values
49
- self.additional_env_vars[key] = value
50
-
51
- def check_required_env_vars(self, required_vars: list[str]) -> tuple[bool, list[str]]:
52
- """
53
- Check if all required environment variables are available.
54
-
55
- Returns:
56
- Tuple of (all_available, missing_vars)
57
- """
58
- available = self.get_all_env_vars()
59
- missing = [var for var in required_vars if var not in available]
60
- return len(missing) == 0, missing
61
-
62
- def get_env_var(self, key: str) -> str:
63
- """Get a specific environment variable."""
64
- all_vars = self.get_all_env_vars()
65
- return all_vars[key]
66
-
67
- def get_all_env_vars(self) -> dict[str, str]:
68
- """Get all available environment variables."""
69
- return {
70
- "DEEPSET_WORKSPACE": self.deepset_workspace,
71
- "DEEPSET_API_KEY": self.deepset_api_key,
72
- **self.additional_env_vars,
73
- }
@@ -1,64 +0,0 @@
1
- import glob
2
- from pathlib import Path
3
-
4
- from pydantic import ValidationError
5
-
6
- from deepset_mcp.benchmark.runner.models import TestCaseConfig
7
-
8
-
9
- def _default_task_dir() -> Path:
10
- """Return the path to the `benchmark/tasks` directory, resolved relative to this file."""
11
- return Path(__file__).parent.parent / "tasks"
12
-
13
-
14
- def find_all_test_case_paths(task_dir: str | Path | None = None) -> list[Path]:
15
- """
16
- Return a list of all `.yml` or `.yaml` files under `task_dir`.
17
-
18
- If `task_dir` is None, we resolve to `benchmark/tasks` (relative to this file).
19
- """
20
- if task_dir is None:
21
- base = _default_task_dir()
22
- else:
23
- base = Path(task_dir)
24
-
25
- pattern1 = base / "*.yml"
26
- pattern2 = base / "*.yaml"
27
- return [Path(p) for p in glob.glob(str(pattern1))] + [Path(p) for p in glob.glob(str(pattern2))]
28
-
29
-
30
- def load_test_case_from_path(path: Path) -> TestCaseConfig:
31
- """
32
- Read a single test-case YAML at `path` using TestCaseConfig.from_file().
33
-
34
- Raises RuntimeError if validation or loading fails.
35
- """
36
- try:
37
- return TestCaseConfig.from_file(path)
38
- except (ValidationError, FileNotFoundError) as e:
39
- raise RuntimeError(f"Failed to load {path}: {e}") from e
40
-
41
-
42
- def load_test_case_by_name(name: str, task_dir: str | Path | None = None) -> TestCaseConfig:
43
- """
44
- Given a test‐case “name” (without extension), locate the corresponding `.yml` or `.yaml`under `task_dir`.
45
-
46
- If `task_dir` is None, defaults to `benchmark/tasks` relative to this file.
47
- Returns a loaded TestCaseConfig or raises FileNotFoundError if not found.
48
- """
49
- if task_dir is None:
50
- base = _default_task_dir()
51
- else:
52
- base = Path(task_dir)
53
-
54
- candidates: list[Path] = []
55
- for ext in (".yml", ".yaml"):
56
- p = base / f"{name}{ext}"
57
- if p.exists():
58
- candidates.append(p)
59
-
60
- if not candidates:
61
- raise FileNotFoundError(f"No test-case named '{name}' under {base}")
62
-
63
- # If multiple matches exist, pick the first
64
- return load_test_case_from_path(candidates[0])
@@ -1,140 +0,0 @@
1
- from collections.abc import Callable
2
- from typing import Any
3
-
4
- from haystack.tools import Tool, Toolset
5
- from rich.console import Console
6
- from rich.panel import Panel
7
- from rich.prompt import Prompt
8
-
9
- TOOL_CONFIRMATION_CHAR = "y"
10
- TOOL_REJECTED_CHAR = "n"
11
- TOOL_AUTO_CONFIRM_CHAR = "a"
12
-
13
-
14
- class InteractiveToolsetWrapper:
15
- """Simple wrapper that adds interactive confirmation to any toolset."""
16
-
17
- def __init__(self, toolset: Toolset):
18
- """
19
- Initialize the wrapper.
20
-
21
- Args:
22
- toolset: The base toolset to wrap
23
- """
24
- self.base_toolset = toolset
25
- self.auto_confirm_tools: set[str] = set()
26
- self.console = Console()
27
-
28
- # Create wrapped toolset
29
- self._wrapped_toolset = self._create_wrapped_toolset()
30
-
31
- def _create_wrapped_toolset(self) -> Toolset:
32
- """Create a new toolset with wrapped tools."""
33
- wrapped_tools = []
34
-
35
- for tool in self.base_toolset.tools:
36
- wrapped_tool = Tool(
37
- name=tool.name,
38
- description=tool.description,
39
- parameters=tool.parameters,
40
- function=self._wrap_function(tool),
41
- )
42
- wrapped_tools.append(wrapped_tool)
43
-
44
- return Toolset(tools=wrapped_tools)
45
-
46
- def _wrap_function(self, tool: Tool) -> Callable[..., Any]:
47
- """Wrap a tool function with confirmation logic."""
48
- original_function = tool.function
49
- tool_name = tool.name
50
-
51
- def wrapped_function(**kwargs: Any) -> Any:
52
- # Check if auto-confirmed
53
- if tool_name in self.auto_confirm_tools:
54
- self.console.print(f"[green]✓ Auto-executing '{tool_name}'[/green]")
55
- return original_function(**kwargs)
56
-
57
- # Ask for confirmation
58
- action = self._ask_confirmation(tool_name, kwargs)
59
-
60
- if action == "reject":
61
- # Get feedback message
62
- feedback = Prompt.ask("Feedback message (optional)", default="")
63
- return {
64
- "status": "rejected",
65
- "tool": tool_name,
66
- "feedback": feedback or "Tool execution rejected by user",
67
- }
68
- elif action == "confirm_auto":
69
- # Add to auto-confirm and execute
70
- self.auto_confirm_tools.add(tool_name)
71
- self.console.print(f"[green]✓ '{tool_name}' added to auto-confirm list[/green]")
72
-
73
- # Execute tool (for both "confirm" and "confirm_auto")
74
- return original_function(**kwargs)
75
-
76
- return wrapped_function
77
-
78
- def _ask_confirmation(self, tool_name: str, params: dict[str, Any]) -> str:
79
- """Ask user for confirmation with Rich formatting."""
80
- # Build tool call display
81
- lines = [f"[bold yellow]Tool:[/bold yellow] {tool_name}"]
82
-
83
- if params:
84
- lines.append("\n[bold yellow]Arguments:[/bold yellow]")
85
- for key, value in params.items():
86
- lines.append(f"\n[cyan]{key}:[/cyan]")
87
- # Format the value with proper indentation
88
- value_str = str(value)
89
- if "\n" in value_str:
90
- # Multi-line value - indent each line
91
- for line in value_str.split("\n"):
92
- lines.append(f" {line}")
93
- else:
94
- lines.append(f" {value_str}")
95
-
96
- # Create panel with tool information
97
- panel = Panel("\n".join(lines), title="🔧 Tool Execution Request", border_style="blue")
98
- self.console.print(panel)
99
-
100
- # Show options
101
- self.console.print("\n[bold]Options:[/bold]")
102
- self.console.print(f" [green]{TOOL_CONFIRMATION_CHAR}[/green] - Confirm execution")
103
- self.console.print(f" [yellow]{TOOL_AUTO_CONFIRM_CHAR}[/yellow] - Confirm and auto-confirm this tool")
104
- self.console.print(f" [red]{TOOL_REJECTED_CHAR}[/red] - Reject execution")
105
-
106
- # Get user choice
107
- while True:
108
- choice = Prompt.ask(
109
- "\nYour choice", choices=[TOOL_CONFIRMATION_CHAR, TOOL_AUTO_CONFIRM_CHAR, TOOL_REJECTED_CHAR]
110
- )
111
-
112
- if choice == TOOL_CONFIRMATION_CHAR:
113
- return "confirm"
114
- elif choice == TOOL_AUTO_CONFIRM_CHAR:
115
- return "confirm_auto"
116
- elif choice == TOOL_REJECTED_CHAR:
117
- return "reject"
118
-
119
- @property
120
- def toolset(self) -> Toolset:
121
- """Get the wrapped toolset."""
122
- return self._wrapped_toolset
123
-
124
- def close(self) -> None:
125
- """Close the underlying toolset if it has a close method."""
126
- if hasattr(self.base_toolset, "close"):
127
- self.base_toolset.close()
128
-
129
-
130
- def wrap_toolset_interactive(toolset: Toolset) -> InteractiveToolsetWrapper:
131
- """
132
- Wrap any toolset with interactive confirmation.
133
-
134
- Args:
135
- toolset: The toolset to wrap
136
-
137
- Returns:
138
- InteractiveToolsetWrapper instance
139
- """
140
- return InteractiveToolsetWrapper(toolset)