deepset-mcp 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepset_mcp/__init__.py +0 -0
- deepset_mcp/agents/__init__.py +0 -0
- deepset_mcp/agents/debugging/__init__.py +0 -0
- deepset_mcp/agents/debugging/debugging_agent.py +37 -0
- deepset_mcp/agents/debugging/system_prompt.md +214 -0
- deepset_mcp/agents/generalist/__init__.py +0 -0
- deepset_mcp/agents/generalist/generalist_agent.py +38 -0
- deepset_mcp/agents/generalist/system_prompt.md +241 -0
- deepset_mcp/api/README.md +536 -0
- deepset_mcp/api/__init__.py +0 -0
- deepset_mcp/api/client.py +277 -0
- deepset_mcp/api/custom_components/__init__.py +0 -0
- deepset_mcp/api/custom_components/models.py +25 -0
- deepset_mcp/api/custom_components/protocols.py +17 -0
- deepset_mcp/api/custom_components/resource.py +56 -0
- deepset_mcp/api/exceptions.py +70 -0
- deepset_mcp/api/haystack_service/__init__.py +0 -0
- deepset_mcp/api/haystack_service/protocols.py +13 -0
- deepset_mcp/api/haystack_service/resource.py +55 -0
- deepset_mcp/api/indexes/__init__.py +0 -0
- deepset_mcp/api/indexes/models.py +63 -0
- deepset_mcp/api/indexes/protocols.py +53 -0
- deepset_mcp/api/indexes/resource.py +138 -0
- deepset_mcp/api/integrations/__init__.py +1 -0
- deepset_mcp/api/integrations/models.py +49 -0
- deepset_mcp/api/integrations/protocols.py +27 -0
- deepset_mcp/api/integrations/resource.py +57 -0
- deepset_mcp/api/pipeline/__init__.py +17 -0
- deepset_mcp/api/pipeline/log_level.py +9 -0
- deepset_mcp/api/pipeline/models.py +235 -0
- deepset_mcp/api/pipeline/protocols.py +83 -0
- deepset_mcp/api/pipeline/resource.py +378 -0
- deepset_mcp/api/pipeline_template/__init__.py +0 -0
- deepset_mcp/api/pipeline_template/models.py +56 -0
- deepset_mcp/api/pipeline_template/protocols.py +17 -0
- deepset_mcp/api/pipeline_template/resource.py +88 -0
- deepset_mcp/api/protocols.py +122 -0
- deepset_mcp/api/secrets/__init__.py +0 -0
- deepset_mcp/api/secrets/models.py +16 -0
- deepset_mcp/api/secrets/protocols.py +29 -0
- deepset_mcp/api/secrets/resource.py +112 -0
- deepset_mcp/api/shared_models.py +17 -0
- deepset_mcp/api/transport.py +336 -0
- deepset_mcp/api/user/__init__.py +0 -0
- deepset_mcp/api/user/protocols.py +11 -0
- deepset_mcp/api/user/resource.py +38 -0
- deepset_mcp/api/workspace/__init__.py +7 -0
- deepset_mcp/api/workspace/models.py +23 -0
- deepset_mcp/api/workspace/protocols.py +41 -0
- deepset_mcp/api/workspace/resource.py +94 -0
- deepset_mcp/benchmark/README.md +425 -0
- deepset_mcp/benchmark/__init__.py +1 -0
- deepset_mcp/benchmark/agent_configs/debugging_agent.yml +10 -0
- deepset_mcp/benchmark/agent_configs/generalist_agent.yml +6 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +757 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +167 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +213 -0
- deepset_mcp/benchmark/runner/__init__.py +0 -0
- deepset_mcp/benchmark/runner/agent_benchmark_runner.py +561 -0
- deepset_mcp/benchmark/runner/agent_loader.py +110 -0
- deepset_mcp/benchmark/runner/cli.py +39 -0
- deepset_mcp/benchmark/runner/cli_agent.py +373 -0
- deepset_mcp/benchmark/runner/cli_index.py +71 -0
- deepset_mcp/benchmark/runner/cli_pipeline.py +73 -0
- deepset_mcp/benchmark/runner/cli_tests.py +226 -0
- deepset_mcp/benchmark/runner/cli_utils.py +61 -0
- deepset_mcp/benchmark/runner/config.py +73 -0
- deepset_mcp/benchmark/runner/config_loader.py +64 -0
- deepset_mcp/benchmark/runner/interactive.py +140 -0
- deepset_mcp/benchmark/runner/models.py +203 -0
- deepset_mcp/benchmark/runner/repl.py +67 -0
- deepset_mcp/benchmark/runner/setup_actions.py +238 -0
- deepset_mcp/benchmark/runner/streaming.py +360 -0
- deepset_mcp/benchmark/runner/teardown_actions.py +196 -0
- deepset_mcp/benchmark/runner/tracing.py +21 -0
- deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +16 -0
- deepset_mcp/benchmark/tasks/documents_output_wrong.yml +13 -0
- deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +11 -0
- deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +11 -0
- deepset_mcp/benchmark/tasks/missing_output_mapping.yml +14 -0
- deepset_mcp/benchmark/tasks/no_query_input.yml +13 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +181 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +189 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +193 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +191 -0
- deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +167 -0
- deepset_mcp/initialize_embedding_model.py +12 -0
- deepset_mcp/main.py +133 -0
- deepset_mcp/prompts/deepset_copilot_prompt.md +271 -0
- deepset_mcp/prompts/deepset_debugging_agent.md +214 -0
- deepset_mcp/store.py +5 -0
- deepset_mcp/tool_factory.py +473 -0
- deepset_mcp/tools/__init__.py +0 -0
- deepset_mcp/tools/custom_components.py +52 -0
- deepset_mcp/tools/doc_search.py +83 -0
- deepset_mcp/tools/haystack_service.py +358 -0
- deepset_mcp/tools/haystack_service_models.py +97 -0
- deepset_mcp/tools/indexes.py +129 -0
- deepset_mcp/tools/model_protocol.py +16 -0
- deepset_mcp/tools/pipeline.py +335 -0
- deepset_mcp/tools/pipeline_template.py +116 -0
- deepset_mcp/tools/secrets.py +45 -0
- deepset_mcp/tools/tokonomics/__init__.py +73 -0
- deepset_mcp/tools/tokonomics/decorators.py +396 -0
- deepset_mcp/tools/tokonomics/explorer.py +347 -0
- deepset_mcp/tools/tokonomics/object_store.py +177 -0
- deepset_mcp/tools/workspace.py +61 -0
- deepset_mcp-0.0.2.dist-info/METADATA +288 -0
- deepset_mcp-0.0.2.dist-info/RECORD +114 -0
- deepset_mcp-0.0.2.dist-info/WHEEL +4 -0
- deepset_mcp-0.0.2.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from deepset_mcp.benchmark.runner.config_loader import (
|
|
4
|
+
find_all_test_case_paths,
|
|
5
|
+
load_test_case_by_name,
|
|
6
|
+
load_test_case_from_path,
|
|
7
|
+
)
|
|
8
|
+
from deepset_mcp.benchmark.runner.models import TestCaseConfig
|
|
9
|
+
from deepset_mcp.benchmark.runner.setup_actions import setup_all, setup_test_case
|
|
10
|
+
from deepset_mcp.benchmark.runner.teardown_actions import teardown_all, teardown_test_case
|
|
11
|
+
|
|
12
|
+
tests_app = typer.Typer(help="Commands for setting up and tearing down test-cases.")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@tests_app.command("list")
|
|
16
|
+
def list_cases(
|
|
17
|
+
test_dir: str | None = typer.Option(
|
|
18
|
+
None,
|
|
19
|
+
help="Directory where all test-case YAMLs live (`benchmark/tasks/*.yml`).",
|
|
20
|
+
),
|
|
21
|
+
) -> None:
|
|
22
|
+
"""List all available test cases stored under `test_dir`."""
|
|
23
|
+
paths = find_all_test_case_paths(test_dir)
|
|
24
|
+
if not paths:
|
|
25
|
+
typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
|
|
26
|
+
raise typer.Exit(code=1)
|
|
27
|
+
|
|
28
|
+
for p in paths:
|
|
29
|
+
typer.echo(f" • {p.stem}")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@tests_app.command("setup")
|
|
33
|
+
def create_case(
|
|
34
|
+
test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
|
|
35
|
+
workspace_name: str = typer.Option(
|
|
36
|
+
"default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
|
|
37
|
+
),
|
|
38
|
+
api_key: str | None = typer.Option(
|
|
39
|
+
None,
|
|
40
|
+
"--api-key",
|
|
41
|
+
"-k",
|
|
42
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
43
|
+
),
|
|
44
|
+
test_dir: str | None = typer.Option(
|
|
45
|
+
None,
|
|
46
|
+
help="Directory where test-case YAMLs are stored.",
|
|
47
|
+
),
|
|
48
|
+
) -> None:
|
|
49
|
+
"""Load a single test-case by name and create its pipeline + index (if any) on deepset."""
|
|
50
|
+
try:
|
|
51
|
+
test_cfg = load_test_case_by_name(name=test_name, task_dir=test_dir)
|
|
52
|
+
except FileNotFoundError:
|
|
53
|
+
typer.secho(f"Test-case '{test_name}' not found under {test_dir}.", fg=typer.colors.RED)
|
|
54
|
+
raise typer.Exit(code=1)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
|
|
57
|
+
raise typer.Exit(code=1)
|
|
58
|
+
|
|
59
|
+
typer.secho(f"→ Creating resources for '{test_name}' in '{workspace_name}'…", fg=typer.colors.GREEN)
|
|
60
|
+
try:
|
|
61
|
+
setup_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
|
|
62
|
+
except Exception as e:
|
|
63
|
+
typer.secho(f"✘ Failed to set up '{test_name}': {e}", fg=typer.colors.RED)
|
|
64
|
+
raise typer.Exit(code=1)
|
|
65
|
+
|
|
66
|
+
typer.secho(f"✔ '{test_name}' ready.", fg=typer.colors.GREEN)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@tests_app.command("setup-all")
|
|
70
|
+
def create_all(
|
|
71
|
+
workspace_name: str = typer.Option(
|
|
72
|
+
"default", "--workspace", "-w", help="Workspace in which to create pipelines and indexes."
|
|
73
|
+
),
|
|
74
|
+
api_key: str | None = typer.Option(
|
|
75
|
+
None,
|
|
76
|
+
"--api-key",
|
|
77
|
+
"-k",
|
|
78
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
79
|
+
),
|
|
80
|
+
concurrency: int = typer.Option(
|
|
81
|
+
5,
|
|
82
|
+
"--concurrency",
|
|
83
|
+
"-c",
|
|
84
|
+
help="Maximum number of test-cases to set up in parallel.",
|
|
85
|
+
),
|
|
86
|
+
test_dir: str | None = typer.Option(
|
|
87
|
+
None,
|
|
88
|
+
help="Directory where test-case YAMLs are stored.",
|
|
89
|
+
),
|
|
90
|
+
) -> None:
|
|
91
|
+
"""Load every test-case under `task_dir` and create pipelines + indexes in `workspace_name` in parallel."""
|
|
92
|
+
paths = find_all_test_case_paths(test_dir)
|
|
93
|
+
if not paths:
|
|
94
|
+
typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
|
|
95
|
+
raise typer.Exit(code=1)
|
|
96
|
+
|
|
97
|
+
# 1) Load all configs
|
|
98
|
+
test_cfgs: list[TestCaseConfig] = []
|
|
99
|
+
for p in paths:
|
|
100
|
+
try:
|
|
101
|
+
cfg = load_test_case_from_path(path=p)
|
|
102
|
+
test_cfgs.append(cfg)
|
|
103
|
+
except Exception as e:
|
|
104
|
+
typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
|
|
105
|
+
|
|
106
|
+
if not test_cfgs:
|
|
107
|
+
typer.secho("No valid test-case configs to create.", fg=typer.colors.RED)
|
|
108
|
+
raise typer.Exit(code=1)
|
|
109
|
+
|
|
110
|
+
typer.secho(
|
|
111
|
+
f"→ Creating {len(test_cfgs)} test-cases in '{workspace_name}' (concurrency={concurrency})…",
|
|
112
|
+
fg=typer.colors.GREEN,
|
|
113
|
+
)
|
|
114
|
+
try:
|
|
115
|
+
setup_all(
|
|
116
|
+
test_cfgs=test_cfgs,
|
|
117
|
+
workspace_name=workspace_name,
|
|
118
|
+
api_key=api_key,
|
|
119
|
+
concurrency=concurrency,
|
|
120
|
+
)
|
|
121
|
+
except Exception as e:
|
|
122
|
+
typer.secho(f"✘ Some test-cases failed during creation: {e}", fg=typer.colors.RED)
|
|
123
|
+
raise typer.Exit(code=1)
|
|
124
|
+
|
|
125
|
+
typer.secho("✔ All test-cases attempted.", fg=typer.colors.GREEN)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
@tests_app.command("teardown")
|
|
129
|
+
def delete_case(
|
|
130
|
+
test_name: str = typer.Argument(..., help="Test-case name (without .yml)."),
|
|
131
|
+
workspace_name: str = typer.Option(
|
|
132
|
+
"default", "--workspace", "-w", help="Workspace from which to delete pipelines and indexes."
|
|
133
|
+
),
|
|
134
|
+
api_key: str | None = typer.Option(
|
|
135
|
+
None,
|
|
136
|
+
"--api-key",
|
|
137
|
+
"-k",
|
|
138
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
139
|
+
),
|
|
140
|
+
test_dir: str | None = typer.Option(
|
|
141
|
+
None,
|
|
142
|
+
help="Directory where test-case YAMLs are stored.",
|
|
143
|
+
),
|
|
144
|
+
) -> None:
|
|
145
|
+
"""Teardown a single test-case by name and delete its pipeline + index (if any) from deepset."""
|
|
146
|
+
try:
|
|
147
|
+
test_cfg = load_test_case_by_name(name=test_name, task_dir=test_dir)
|
|
148
|
+
except FileNotFoundError:
|
|
149
|
+
typer.secho(f"Test-case '{test_name}' not found under {test_dir}.", fg=typer.colors.RED)
|
|
150
|
+
raise typer.Exit(code=1)
|
|
151
|
+
except Exception as e:
|
|
152
|
+
typer.secho(f"Failed to load test-case '{test_name}': {e}", fg=typer.colors.RED)
|
|
153
|
+
raise typer.Exit(code=1)
|
|
154
|
+
|
|
155
|
+
typer.secho(f"→ Deleting resources for '{test_name}' from '{workspace_name}'…", fg=typer.colors.GREEN)
|
|
156
|
+
try:
|
|
157
|
+
teardown_test_case(test_cfg=test_cfg, workspace_name=workspace_name, api_key=api_key)
|
|
158
|
+
except Exception as e:
|
|
159
|
+
typer.secho(f"✘ Failed to teardown '{test_name}': {e}", fg=typer.colors.RED)
|
|
160
|
+
raise typer.Exit(code=1)
|
|
161
|
+
|
|
162
|
+
typer.secho(f"✔ '{test_name}' resources deleted.", fg=typer.colors.GREEN)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@tests_app.command("teardown-all")
|
|
166
|
+
def delete_all(
|
|
167
|
+
workspace_name: str = typer.Option(
|
|
168
|
+
"default", "--workspace", "-w", help="Workspace from which to delete pipelines and indexes."
|
|
169
|
+
),
|
|
170
|
+
api_key: str | None = typer.Option(
|
|
171
|
+
None,
|
|
172
|
+
"--api-key",
|
|
173
|
+
"-k",
|
|
174
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
175
|
+
),
|
|
176
|
+
concurrency: int = typer.Option(
|
|
177
|
+
5,
|
|
178
|
+
"--concurrency",
|
|
179
|
+
"-c",
|
|
180
|
+
help="Maximum number of test-cases to teardown in parallel.",
|
|
181
|
+
),
|
|
182
|
+
test_dir: str | None = typer.Option(
|
|
183
|
+
None,
|
|
184
|
+
help="Directory where test-case YAMLs are stored.",
|
|
185
|
+
),
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Teardown every test-case under `task_dir` and delete pipelines and indexes from deepset."""
|
|
188
|
+
paths = find_all_test_case_paths(test_dir)
|
|
189
|
+
if not paths:
|
|
190
|
+
typer.secho(f"No test-case files found in {test_dir}", fg=typer.colors.RED)
|
|
191
|
+
raise typer.Exit(code=1)
|
|
192
|
+
|
|
193
|
+
# 1) Load all configs
|
|
194
|
+
test_cfgs: list[TestCaseConfig] = []
|
|
195
|
+
for p in paths:
|
|
196
|
+
try:
|
|
197
|
+
cfg = load_test_case_from_path(path=p)
|
|
198
|
+
test_cfgs.append(cfg)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
typer.secho(f"Skipping '{p.stem}' (load error: {e})", fg=typer.colors.YELLOW)
|
|
201
|
+
|
|
202
|
+
if not test_cfgs:
|
|
203
|
+
typer.secho("No valid test-case configs to delete.", fg=typer.colors.RED)
|
|
204
|
+
raise typer.Exit(code=1)
|
|
205
|
+
|
|
206
|
+
typer.secho(
|
|
207
|
+
f"→ Deleting {len(test_cfgs)} test-cases from '{workspace_name}' (concurrency={concurrency})…",
|
|
208
|
+
fg=typer.colors.GREEN,
|
|
209
|
+
)
|
|
210
|
+
try:
|
|
211
|
+
teardown_all(
|
|
212
|
+
test_cfgs=test_cfgs,
|
|
213
|
+
workspace_name=workspace_name,
|
|
214
|
+
api_key=api_key,
|
|
215
|
+
concurrency=concurrency,
|
|
216
|
+
)
|
|
217
|
+
except Exception as e:
|
|
218
|
+
typer.secho(f"✘ Some test-cases failed during deletion: {e}", fg=typer.colors.RED)
|
|
219
|
+
raise typer.Exit(code=1)
|
|
220
|
+
|
|
221
|
+
typer.secho("✔ All test-cases teardown attempted.", fg=typer.colors.GREEN)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def create_tests_app() -> typer.Typer:
|
|
225
|
+
"""Create the tests CLI app."""
|
|
226
|
+
return tests_app
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
import typer
|
|
5
|
+
from pydantic import ValidationError
|
|
6
|
+
|
|
7
|
+
from deepset_mcp.benchmark.runner.config import BenchmarkConfig
|
|
8
|
+
from deepset_mcp.benchmark.runner.models import AgentConfig
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def override_deepset_env_vars(api_key: str | None, workspace: str | None) -> None:
|
|
12
|
+
"""Overrides deepset-specific environment variables."""
|
|
13
|
+
if api_key is not None:
|
|
14
|
+
os.environ["DEEPSET_API_KEY"] = api_key
|
|
15
|
+
|
|
16
|
+
if workspace is not None:
|
|
17
|
+
os.environ["DEEPSET_WORKSPACE"] = workspace
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def validate_and_setup_configs(
|
|
21
|
+
agent_config: str, test_case_base_dir: str | None, output_dir: str | None
|
|
22
|
+
) -> tuple[AgentConfig, BenchmarkConfig]:
|
|
23
|
+
"""Validate and setup configurations."""
|
|
24
|
+
# Validate agent config path
|
|
25
|
+
agent_config_path = Path(agent_config)
|
|
26
|
+
if not agent_config_path.exists():
|
|
27
|
+
typer.secho(f"Agent config file not found: {agent_config}", fg=typer.colors.RED)
|
|
28
|
+
raise typer.Exit(code=1)
|
|
29
|
+
|
|
30
|
+
test_case_base_path = None
|
|
31
|
+
if test_case_base_dir is not None:
|
|
32
|
+
test_case_base_path = Path(test_case_base_dir)
|
|
33
|
+
if not test_case_base_path.exists():
|
|
34
|
+
typer.secho(f"Test case base directory not found: {test_case_base_dir}", fg=typer.colors.RED)
|
|
35
|
+
raise typer.Exit(code=1)
|
|
36
|
+
|
|
37
|
+
benchmark_kwargs = {}
|
|
38
|
+
if test_case_base_path is not None:
|
|
39
|
+
benchmark_kwargs["test_case_base_dir"] = test_case_base_path
|
|
40
|
+
|
|
41
|
+
if output_dir is not None:
|
|
42
|
+
benchmark_kwargs["output_dir"] = Path(output_dir)
|
|
43
|
+
|
|
44
|
+
# Load and validate configurations
|
|
45
|
+
try:
|
|
46
|
+
benchmark_config = BenchmarkConfig(**benchmark_kwargs) # type: ignore
|
|
47
|
+
except ValidationError as e:
|
|
48
|
+
typer.secho("Configuration error:", fg=typer.colors.RED)
|
|
49
|
+
for error in e.errors():
|
|
50
|
+
field = " -> ".join(str(loc) for loc in error["loc"])
|
|
51
|
+
typer.secho(f" {field}: {error['msg']}", fg=typer.colors.RED)
|
|
52
|
+
typer.secho("\nPlease ensure all required environment variables are set", fg=typer.colors.YELLOW)
|
|
53
|
+
raise typer.Exit(code=1)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
agent_cfg = AgentConfig.from_file(agent_config_path)
|
|
57
|
+
except Exception as e:
|
|
58
|
+
typer.secho(f"Invalid agent config: {e}", fg=typer.colors.RED)
|
|
59
|
+
raise typer.Exit(code=1)
|
|
60
|
+
|
|
61
|
+
return agent_cfg, benchmark_config
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from pydantic import Field, field_validator
|
|
6
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BenchmarkConfig(BaseSettings):
|
|
10
|
+
"""Core configuration for the benchmark runner."""
|
|
11
|
+
|
|
12
|
+
model_config = SettingsConfigDict(
|
|
13
|
+
env_file=".env",
|
|
14
|
+
env_file_encoding="utf-8",
|
|
15
|
+
case_sensitive=False,
|
|
16
|
+
extra="ignore", # Ignore extra env vars
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
deepset_workspace: str = Field(default_factory=lambda: os.environ.get("DEEPSET_WORKSPACE", ""))
|
|
20
|
+
deepset_api_key: str = Field(default_factory=lambda: os.environ.get("DEEPSET_API_KEY", ""))
|
|
21
|
+
|
|
22
|
+
# Optional fields with defaults
|
|
23
|
+
output_dir: Path = Field(default_factory=Path.cwd)
|
|
24
|
+
test_case_base_dir: Path = Field(default_factory=lambda: Path(__file__).parent.parent / "tasks")
|
|
25
|
+
|
|
26
|
+
# Store all other available env vars
|
|
27
|
+
additional_env_vars: dict[str, str] = Field(default_factory=dict)
|
|
28
|
+
|
|
29
|
+
@field_validator("deepset_workspace", "deepset_api_key")
|
|
30
|
+
@classmethod
|
|
31
|
+
def validate_not_empty(cls, v: str) -> str:
|
|
32
|
+
"""Validate that the env var is not empty."""
|
|
33
|
+
if not v or not v.strip():
|
|
34
|
+
raise ValueError("DEEPSET_WORKSPACE or DEEPSET_API_KEY is empty.")
|
|
35
|
+
return v
|
|
36
|
+
|
|
37
|
+
def model_post_init(self, __context: Any) -> None:
|
|
38
|
+
"""After initialization, collect all available env vars."""
|
|
39
|
+
# Collect all env vars that might be useful (excluding system ones)
|
|
40
|
+
ignore_prefixes = ("PATH", "HOME", "USER", "SHELL", "TERM", "PWD", "LC_")
|
|
41
|
+
|
|
42
|
+
for key, value in os.environ.items():
|
|
43
|
+
# Skip system variables and already captured ones
|
|
44
|
+
if (
|
|
45
|
+
not any(key.startswith(prefix) for prefix in ignore_prefixes)
|
|
46
|
+
and key not in ("DEEPSET_WORKSPACE", "DEEPSET_API_KEY")
|
|
47
|
+
and value
|
|
48
|
+
): # Only include non-empty values
|
|
49
|
+
self.additional_env_vars[key] = value
|
|
50
|
+
|
|
51
|
+
def check_required_env_vars(self, required_vars: list[str]) -> tuple[bool, list[str]]:
|
|
52
|
+
"""
|
|
53
|
+
Check if all required environment variables are available.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Tuple of (all_available, missing_vars)
|
|
57
|
+
"""
|
|
58
|
+
available = self.get_all_env_vars()
|
|
59
|
+
missing = [var for var in required_vars if var not in available]
|
|
60
|
+
return len(missing) == 0, missing
|
|
61
|
+
|
|
62
|
+
def get_env_var(self, key: str) -> str:
|
|
63
|
+
"""Get a specific environment variable."""
|
|
64
|
+
all_vars = self.get_all_env_vars()
|
|
65
|
+
return all_vars[key]
|
|
66
|
+
|
|
67
|
+
def get_all_env_vars(self) -> dict[str, str]:
|
|
68
|
+
"""Get all available environment variables."""
|
|
69
|
+
return {
|
|
70
|
+
"DEEPSET_WORKSPACE": self.deepset_workspace,
|
|
71
|
+
"DEEPSET_API_KEY": self.deepset_api_key,
|
|
72
|
+
**self.additional_env_vars,
|
|
73
|
+
}
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
import glob
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from pydantic import ValidationError
|
|
5
|
+
|
|
6
|
+
from deepset_mcp.benchmark.runner.models import TestCaseConfig
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _default_task_dir() -> Path:
|
|
10
|
+
"""Return the path to the `benchmark/tasks` directory, resolved relative to this file."""
|
|
11
|
+
return Path(__file__).parent.parent / "tasks"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def find_all_test_case_paths(task_dir: str | Path | None = None) -> list[Path]:
|
|
15
|
+
"""
|
|
16
|
+
Return a list of all `.yml` or `.yaml` files under `task_dir`.
|
|
17
|
+
|
|
18
|
+
If `task_dir` is None, we resolve to `benchmark/tasks` (relative to this file).
|
|
19
|
+
"""
|
|
20
|
+
if task_dir is None:
|
|
21
|
+
base = _default_task_dir()
|
|
22
|
+
else:
|
|
23
|
+
base = Path(task_dir)
|
|
24
|
+
|
|
25
|
+
pattern1 = base / "*.yml"
|
|
26
|
+
pattern2 = base / "*.yaml"
|
|
27
|
+
return [Path(p) for p in glob.glob(str(pattern1))] + [Path(p) for p in glob.glob(str(pattern2))]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def load_test_case_from_path(path: Path) -> TestCaseConfig:
|
|
31
|
+
"""
|
|
32
|
+
Read a single test-case YAML at `path` using TestCaseConfig.from_file().
|
|
33
|
+
|
|
34
|
+
Raises RuntimeError if validation or loading fails.
|
|
35
|
+
"""
|
|
36
|
+
try:
|
|
37
|
+
return TestCaseConfig.from_file(path)
|
|
38
|
+
except (ValidationError, FileNotFoundError) as e:
|
|
39
|
+
raise RuntimeError(f"Failed to load {path}: {e}") from e
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def load_test_case_by_name(name: str, task_dir: str | Path | None = None) -> TestCaseConfig:
|
|
43
|
+
"""
|
|
44
|
+
Given a test‐case “name” (without extension), locate the corresponding `.yml` or `.yaml`under `task_dir`.
|
|
45
|
+
|
|
46
|
+
If `task_dir` is None, defaults to `benchmark/tasks` relative to this file.
|
|
47
|
+
Returns a loaded TestCaseConfig or raises FileNotFoundError if not found.
|
|
48
|
+
"""
|
|
49
|
+
if task_dir is None:
|
|
50
|
+
base = _default_task_dir()
|
|
51
|
+
else:
|
|
52
|
+
base = Path(task_dir)
|
|
53
|
+
|
|
54
|
+
candidates: list[Path] = []
|
|
55
|
+
for ext in (".yml", ".yaml"):
|
|
56
|
+
p = base / f"{name}{ext}"
|
|
57
|
+
if p.exists():
|
|
58
|
+
candidates.append(p)
|
|
59
|
+
|
|
60
|
+
if not candidates:
|
|
61
|
+
raise FileNotFoundError(f"No test-case named '{name}' under {base}")
|
|
62
|
+
|
|
63
|
+
# If multiple matches exist, pick the first
|
|
64
|
+
return load_test_case_from_path(candidates[0])
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
from collections.abc import Callable
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from haystack.tools import Tool, Toolset
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
from rich.panel import Panel
|
|
7
|
+
from rich.prompt import Prompt
|
|
8
|
+
|
|
9
|
+
TOOL_CONFIRMATION_CHAR = "y"
|
|
10
|
+
TOOL_REJECTED_CHAR = "n"
|
|
11
|
+
TOOL_AUTO_CONFIRM_CHAR = "a"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class InteractiveToolsetWrapper:
|
|
15
|
+
"""Simple wrapper that adds interactive confirmation to any toolset."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, toolset: Toolset):
|
|
18
|
+
"""
|
|
19
|
+
Initialize the wrapper.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
toolset: The base toolset to wrap
|
|
23
|
+
"""
|
|
24
|
+
self.base_toolset = toolset
|
|
25
|
+
self.auto_confirm_tools: set[str] = set()
|
|
26
|
+
self.console = Console()
|
|
27
|
+
|
|
28
|
+
# Create wrapped toolset
|
|
29
|
+
self._wrapped_toolset = self._create_wrapped_toolset()
|
|
30
|
+
|
|
31
|
+
def _create_wrapped_toolset(self) -> Toolset:
|
|
32
|
+
"""Create a new toolset with wrapped tools."""
|
|
33
|
+
wrapped_tools = []
|
|
34
|
+
|
|
35
|
+
for tool in self.base_toolset.tools:
|
|
36
|
+
wrapped_tool = Tool(
|
|
37
|
+
name=tool.name,
|
|
38
|
+
description=tool.description,
|
|
39
|
+
parameters=tool.parameters,
|
|
40
|
+
function=self._wrap_function(tool),
|
|
41
|
+
)
|
|
42
|
+
wrapped_tools.append(wrapped_tool)
|
|
43
|
+
|
|
44
|
+
return Toolset(tools=wrapped_tools)
|
|
45
|
+
|
|
46
|
+
def _wrap_function(self, tool: Tool) -> Callable[..., Any]:
|
|
47
|
+
"""Wrap a tool function with confirmation logic."""
|
|
48
|
+
original_function = tool.function
|
|
49
|
+
tool_name = tool.name
|
|
50
|
+
|
|
51
|
+
def wrapped_function(**kwargs: Any) -> Any:
|
|
52
|
+
# Check if auto-confirmed
|
|
53
|
+
if tool_name in self.auto_confirm_tools:
|
|
54
|
+
self.console.print(f"[green]✓ Auto-executing '{tool_name}'[/green]")
|
|
55
|
+
return original_function(**kwargs)
|
|
56
|
+
|
|
57
|
+
# Ask for confirmation
|
|
58
|
+
action = self._ask_confirmation(tool_name, kwargs)
|
|
59
|
+
|
|
60
|
+
if action == "reject":
|
|
61
|
+
# Get feedback message
|
|
62
|
+
feedback = Prompt.ask("Feedback message (optional)", default="")
|
|
63
|
+
return {
|
|
64
|
+
"status": "rejected",
|
|
65
|
+
"tool": tool_name,
|
|
66
|
+
"feedback": feedback or "Tool execution rejected by user",
|
|
67
|
+
}
|
|
68
|
+
elif action == "confirm_auto":
|
|
69
|
+
# Add to auto-confirm and execute
|
|
70
|
+
self.auto_confirm_tools.add(tool_name)
|
|
71
|
+
self.console.print(f"[green]✓ '{tool_name}' added to auto-confirm list[/green]")
|
|
72
|
+
|
|
73
|
+
# Execute tool (for both "confirm" and "confirm_auto")
|
|
74
|
+
return original_function(**kwargs)
|
|
75
|
+
|
|
76
|
+
return wrapped_function
|
|
77
|
+
|
|
78
|
+
def _ask_confirmation(self, tool_name: str, params: dict[str, Any]) -> str:
|
|
79
|
+
"""Ask user for confirmation with Rich formatting."""
|
|
80
|
+
# Build tool call display
|
|
81
|
+
lines = [f"[bold yellow]Tool:[/bold yellow] {tool_name}"]
|
|
82
|
+
|
|
83
|
+
if params:
|
|
84
|
+
lines.append("\n[bold yellow]Arguments:[/bold yellow]")
|
|
85
|
+
for key, value in params.items():
|
|
86
|
+
lines.append(f"\n[cyan]{key}:[/cyan]")
|
|
87
|
+
# Format the value with proper indentation
|
|
88
|
+
value_str = str(value)
|
|
89
|
+
if "\n" in value_str:
|
|
90
|
+
# Multi-line value - indent each line
|
|
91
|
+
for line in value_str.split("\n"):
|
|
92
|
+
lines.append(f" {line}")
|
|
93
|
+
else:
|
|
94
|
+
lines.append(f" {value_str}")
|
|
95
|
+
|
|
96
|
+
# Create panel with tool information
|
|
97
|
+
panel = Panel("\n".join(lines), title="🔧 Tool Execution Request", border_style="blue")
|
|
98
|
+
self.console.print(panel)
|
|
99
|
+
|
|
100
|
+
# Show options
|
|
101
|
+
self.console.print("\n[bold]Options:[/bold]")
|
|
102
|
+
self.console.print(f" [green]{TOOL_CONFIRMATION_CHAR}[/green] - Confirm execution")
|
|
103
|
+
self.console.print(f" [yellow]{TOOL_AUTO_CONFIRM_CHAR}[/yellow] - Confirm and auto-confirm this tool")
|
|
104
|
+
self.console.print(f" [red]{TOOL_REJECTED_CHAR}[/red] - Reject execution")
|
|
105
|
+
|
|
106
|
+
# Get user choice
|
|
107
|
+
while True:
|
|
108
|
+
choice = Prompt.ask(
|
|
109
|
+
"\nYour choice", choices=[TOOL_CONFIRMATION_CHAR, TOOL_AUTO_CONFIRM_CHAR, TOOL_REJECTED_CHAR]
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if choice == TOOL_CONFIRMATION_CHAR:
|
|
113
|
+
return "confirm"
|
|
114
|
+
elif choice == TOOL_AUTO_CONFIRM_CHAR:
|
|
115
|
+
return "confirm_auto"
|
|
116
|
+
elif choice == TOOL_REJECTED_CHAR:
|
|
117
|
+
return "reject"
|
|
118
|
+
|
|
119
|
+
@property
|
|
120
|
+
def toolset(self) -> Toolset:
|
|
121
|
+
"""Get the wrapped toolset."""
|
|
122
|
+
return self._wrapped_toolset
|
|
123
|
+
|
|
124
|
+
def close(self) -> None:
|
|
125
|
+
"""Close the underlying toolset if it has a close method."""
|
|
126
|
+
if hasattr(self.base_toolset, "close"):
|
|
127
|
+
self.base_toolset.close()
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def wrap_toolset_interactive(toolset: Toolset) -> InteractiveToolsetWrapper:
|
|
131
|
+
"""
|
|
132
|
+
Wrap any toolset with interactive confirmation.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
toolset: The toolset to wrap
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
InteractiveToolsetWrapper instance
|
|
139
|
+
"""
|
|
140
|
+
return InteractiveToolsetWrapper(toolset)
|