deepset-mcp 0.0.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepset_mcp/__init__.py +0 -0
- deepset_mcp/agents/__init__.py +0 -0
- deepset_mcp/agents/debugging/__init__.py +0 -0
- deepset_mcp/agents/debugging/debugging_agent.py +37 -0
- deepset_mcp/agents/debugging/system_prompt.md +214 -0
- deepset_mcp/agents/generalist/__init__.py +0 -0
- deepset_mcp/agents/generalist/generalist_agent.py +38 -0
- deepset_mcp/agents/generalist/system_prompt.md +241 -0
- deepset_mcp/api/README.md +536 -0
- deepset_mcp/api/__init__.py +0 -0
- deepset_mcp/api/client.py +277 -0
- deepset_mcp/api/custom_components/__init__.py +0 -0
- deepset_mcp/api/custom_components/models.py +25 -0
- deepset_mcp/api/custom_components/protocols.py +17 -0
- deepset_mcp/api/custom_components/resource.py +56 -0
- deepset_mcp/api/exceptions.py +70 -0
- deepset_mcp/api/haystack_service/__init__.py +0 -0
- deepset_mcp/api/haystack_service/protocols.py +13 -0
- deepset_mcp/api/haystack_service/resource.py +55 -0
- deepset_mcp/api/indexes/__init__.py +0 -0
- deepset_mcp/api/indexes/models.py +63 -0
- deepset_mcp/api/indexes/protocols.py +53 -0
- deepset_mcp/api/indexes/resource.py +138 -0
- deepset_mcp/api/integrations/__init__.py +1 -0
- deepset_mcp/api/integrations/models.py +49 -0
- deepset_mcp/api/integrations/protocols.py +27 -0
- deepset_mcp/api/integrations/resource.py +57 -0
- deepset_mcp/api/pipeline/__init__.py +17 -0
- deepset_mcp/api/pipeline/log_level.py +9 -0
- deepset_mcp/api/pipeline/models.py +235 -0
- deepset_mcp/api/pipeline/protocols.py +83 -0
- deepset_mcp/api/pipeline/resource.py +378 -0
- deepset_mcp/api/pipeline_template/__init__.py +0 -0
- deepset_mcp/api/pipeline_template/models.py +56 -0
- deepset_mcp/api/pipeline_template/protocols.py +17 -0
- deepset_mcp/api/pipeline_template/resource.py +88 -0
- deepset_mcp/api/protocols.py +122 -0
- deepset_mcp/api/secrets/__init__.py +0 -0
- deepset_mcp/api/secrets/models.py +16 -0
- deepset_mcp/api/secrets/protocols.py +29 -0
- deepset_mcp/api/secrets/resource.py +112 -0
- deepset_mcp/api/shared_models.py +17 -0
- deepset_mcp/api/transport.py +336 -0
- deepset_mcp/api/user/__init__.py +0 -0
- deepset_mcp/api/user/protocols.py +11 -0
- deepset_mcp/api/user/resource.py +38 -0
- deepset_mcp/api/workspace/__init__.py +7 -0
- deepset_mcp/api/workspace/models.py +23 -0
- deepset_mcp/api/workspace/protocols.py +41 -0
- deepset_mcp/api/workspace/resource.py +94 -0
- deepset_mcp/benchmark/README.md +425 -0
- deepset_mcp/benchmark/__init__.py +1 -0
- deepset_mcp/benchmark/agent_configs/debugging_agent.yml +10 -0
- deepset_mcp/benchmark/agent_configs/generalist_agent.yml +6 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +757 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +167 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +213 -0
- deepset_mcp/benchmark/runner/__init__.py +0 -0
- deepset_mcp/benchmark/runner/agent_benchmark_runner.py +561 -0
- deepset_mcp/benchmark/runner/agent_loader.py +110 -0
- deepset_mcp/benchmark/runner/cli.py +39 -0
- deepset_mcp/benchmark/runner/cli_agent.py +373 -0
- deepset_mcp/benchmark/runner/cli_index.py +71 -0
- deepset_mcp/benchmark/runner/cli_pipeline.py +73 -0
- deepset_mcp/benchmark/runner/cli_tests.py +226 -0
- deepset_mcp/benchmark/runner/cli_utils.py +61 -0
- deepset_mcp/benchmark/runner/config.py +73 -0
- deepset_mcp/benchmark/runner/config_loader.py +64 -0
- deepset_mcp/benchmark/runner/interactive.py +140 -0
- deepset_mcp/benchmark/runner/models.py +203 -0
- deepset_mcp/benchmark/runner/repl.py +67 -0
- deepset_mcp/benchmark/runner/setup_actions.py +238 -0
- deepset_mcp/benchmark/runner/streaming.py +360 -0
- deepset_mcp/benchmark/runner/teardown_actions.py +196 -0
- deepset_mcp/benchmark/runner/tracing.py +21 -0
- deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +16 -0
- deepset_mcp/benchmark/tasks/documents_output_wrong.yml +13 -0
- deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +11 -0
- deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +11 -0
- deepset_mcp/benchmark/tasks/missing_output_mapping.yml +14 -0
- deepset_mcp/benchmark/tasks/no_query_input.yml +13 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +181 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +189 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +193 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +191 -0
- deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +167 -0
- deepset_mcp/initialize_embedding_model.py +12 -0
- deepset_mcp/main.py +133 -0
- deepset_mcp/prompts/deepset_copilot_prompt.md +271 -0
- deepset_mcp/prompts/deepset_debugging_agent.md +214 -0
- deepset_mcp/store.py +5 -0
- deepset_mcp/tool_factory.py +473 -0
- deepset_mcp/tools/__init__.py +0 -0
- deepset_mcp/tools/custom_components.py +52 -0
- deepset_mcp/tools/doc_search.py +83 -0
- deepset_mcp/tools/haystack_service.py +358 -0
- deepset_mcp/tools/haystack_service_models.py +97 -0
- deepset_mcp/tools/indexes.py +129 -0
- deepset_mcp/tools/model_protocol.py +16 -0
- deepset_mcp/tools/pipeline.py +335 -0
- deepset_mcp/tools/pipeline_template.py +116 -0
- deepset_mcp/tools/secrets.py +45 -0
- deepset_mcp/tools/tokonomics/__init__.py +73 -0
- deepset_mcp/tools/tokonomics/decorators.py +396 -0
- deepset_mcp/tools/tokonomics/explorer.py +347 -0
- deepset_mcp/tools/tokonomics/object_store.py +177 -0
- deepset_mcp/tools/workspace.py +61 -0
- deepset_mcp-0.0.2rc1.dist-info/METADATA +292 -0
- deepset_mcp-0.0.2rc1.dist-info/RECORD +114 -0
- deepset_mcp-0.0.2rc1.dist-info/WHEEL +4 -0
- deepset_mcp-0.0.2rc1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from deepset_mcp.benchmark.runner.cli_agent import create_agents_app
|
|
4
|
+
from deepset_mcp.benchmark.runner.cli_index import create_index_app
|
|
5
|
+
from deepset_mcp.benchmark.runner.cli_pipeline import create_pipeline_app
|
|
6
|
+
from deepset_mcp.benchmark.runner.cli_tests import create_tests_app
|
|
7
|
+
|
|
8
|
+
app = typer.Typer(
|
|
9
|
+
name="deepset",
|
|
10
|
+
help="Deepset Copilot CLI for managing pipelines and running benchmarks.",
|
|
11
|
+
no_args_is_help=True,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
app.add_typer(
|
|
15
|
+
create_agents_app(),
|
|
16
|
+
name="agent",
|
|
17
|
+
help="Run agents against test cases.",
|
|
18
|
+
no_args_is_help=True,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
app.add_typer(create_tests_app(), name="test", help="Setup test cases on deepset.")
|
|
22
|
+
|
|
23
|
+
app.add_typer(
|
|
24
|
+
create_pipeline_app(),
|
|
25
|
+
name="pipeline",
|
|
26
|
+
help="Manage pipelines on deepset.",
|
|
27
|
+
no_args_is_help=True,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
app.add_typer(
|
|
31
|
+
create_index_app(),
|
|
32
|
+
name="index",
|
|
33
|
+
help="Manage indexes on deepset.",
|
|
34
|
+
no_args_is_help=True,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
if __name__ == "__main__":
|
|
39
|
+
app()
|
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from pydantic import ValidationError
|
|
6
|
+
|
|
7
|
+
from deepset_mcp.benchmark.runner.agent_benchmark_runner import run_agent_benchmark
|
|
8
|
+
from deepset_mcp.benchmark.runner.cli_utils import override_deepset_env_vars, validate_and_setup_configs
|
|
9
|
+
from deepset_mcp.benchmark.runner.config import BenchmarkConfig
|
|
10
|
+
from deepset_mcp.benchmark.runner.models import AgentConfig
|
|
11
|
+
from deepset_mcp.benchmark.runner.repl import run_repl_session
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def load_env_file(env_file: str | None) -> None:
|
|
15
|
+
"""Load environment variables from a file if specified."""
|
|
16
|
+
if env_file:
|
|
17
|
+
env_path = Path(env_file)
|
|
18
|
+
if not env_path.exists():
|
|
19
|
+
typer.secho(f"Environment file not found: {env_file}", fg=typer.colors.RED)
|
|
20
|
+
raise typer.Exit(code=1)
|
|
21
|
+
load_dotenv(env_path, override=True)
|
|
22
|
+
typer.secho(f"Loaded environment from: {env_file}", fg=typer.colors.BLUE)
|
|
23
|
+
else:
|
|
24
|
+
# Try to load default .env file
|
|
25
|
+
default_env_path = Path(__file__).parent / ".env"
|
|
26
|
+
if default_env_path.exists():
|
|
27
|
+
load_dotenv()
|
|
28
|
+
typer.secho("Loaded default .env file.", fg=typer.colors.BLUE)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
agent_app = typer.Typer(help="Commands for running agents against test cases.")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@agent_app.command("run")
|
|
35
|
+
def run_agent_single(
|
|
36
|
+
agent_config: str = typer.Argument(..., help="Path to agent configuration file (YAML)."),
|
|
37
|
+
test_case: str = typer.Argument(..., help="Name of the test case to run."),
|
|
38
|
+
workspace: str | None = typer.Option(None, "--workspace", "-w", help="Override Deepset workspace."),
|
|
39
|
+
api_key: str | None = typer.Option(None, "--api-key", "-k", help="Override Deepset API key."),
|
|
40
|
+
env_file: str | None = typer.Option(None, "--env-file", "-e", help="Path to environment file."),
|
|
41
|
+
output_dir: str | None = typer.Option(None, "--output-dir", "-o", help="Directory to save results."),
|
|
42
|
+
test_case_base_dir: str | None = typer.Option(None, "--test-base-dir", help="Base directory for test cases."),
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Run an agent against a single test case."""
|
|
45
|
+
load_env_file(env_file)
|
|
46
|
+
override_deepset_env_vars(workspace=workspace, api_key=api_key)
|
|
47
|
+
agent_cfg, benchmark_cfg = validate_and_setup_configs(
|
|
48
|
+
agent_config=agent_config,
|
|
49
|
+
test_case_base_dir=test_case_base_dir,
|
|
50
|
+
output_dir=output_dir,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
typer.secho(f"→ Running agent '{agent_cfg.display_name}' on test case '{test_case}'", fg=typer.colors.GREEN)
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
results, _ = run_agent_benchmark(
|
|
57
|
+
agent_config=agent_cfg, test_case_name=test_case, benchmark_config=benchmark_cfg, streaming=True
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
result = results[0]
|
|
61
|
+
|
|
62
|
+
if result["status"] == "success":
|
|
63
|
+
typer.secho("✔ Test completed successfully!", fg=typer.colors.GREEN)
|
|
64
|
+
typer.secho(f" Results saved to: {result['output_dir']}", fg=typer.colors.BLUE)
|
|
65
|
+
|
|
66
|
+
# Show basic stats
|
|
67
|
+
if "processed_data" in result:
|
|
68
|
+
stats = result["processed_data"]["messages"]["stats"]
|
|
69
|
+
typer.secho(f" Tool calls: {stats['total_tool_calls']}", fg=typer.colors.BLUE)
|
|
70
|
+
typer.secho(f" Prompt tokens: {stats['total_prompt_tokens']}", fg=typer.colors.BLUE)
|
|
71
|
+
typer.secho(f" Completion tokens: {stats['total_completion_tokens']}", fg=typer.colors.BLUE)
|
|
72
|
+
typer.secho(f" Model: {stats['model']}", fg=typer.colors.BLUE)
|
|
73
|
+
|
|
74
|
+
# Show validation results
|
|
75
|
+
validation = result["processed_data"]["validation"]
|
|
76
|
+
typer.secho(f" Pre-validation: {validation['pre_validation'] or 'N/A'}", fg=typer.colors.BLUE)
|
|
77
|
+
typer.secho(f" Post-validation: {validation['post_validation'] or 'N/A'}", fg=typer.colors.BLUE)
|
|
78
|
+
else:
|
|
79
|
+
typer.secho(f"✘ Test failed: {result['error']}", fg=typer.colors.RED)
|
|
80
|
+
raise typer.Exit(code=1)
|
|
81
|
+
|
|
82
|
+
# Check cleanup status
|
|
83
|
+
if result.get("cleanup_status") == "error":
|
|
84
|
+
typer.secho(f"⚠ Cleanup failed: {result.get('cleanup_error')}", fg=typer.colors.YELLOW)
|
|
85
|
+
|
|
86
|
+
except Exception as e:
|
|
87
|
+
typer.secho(f"✘ Error running benchmark: {e}", fg=typer.colors.RED)
|
|
88
|
+
raise typer.Exit(code=1)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@agent_app.command("run-all")
|
|
92
|
+
def run_agent_all(
|
|
93
|
+
agent_config: str = typer.Argument(..., help="Path to agent configuration file (YAML)."),
|
|
94
|
+
workspace: str | None = typer.Option(None, "--workspace", "-w", help="Override Deepset workspace."),
|
|
95
|
+
api_key: str | None = typer.Option(None, "--api-key", "-k", help="Override Deepset API key."),
|
|
96
|
+
env_file: str | None = typer.Option(None, "--env-file", "-e", help="Path to environment file."),
|
|
97
|
+
output_dir: str | None = typer.Option(None, "--output-dir", "-o", help="Directory to save results."),
|
|
98
|
+
test_case_base_dir: str | None = typer.Option(None, "--test-base-dir", help="Base directory for test cases."),
|
|
99
|
+
concurrency: int = typer.Option(1, "--concurrency", "-c", help="Number of concurrent test runs."),
|
|
100
|
+
) -> None:
|
|
101
|
+
"""Run an agent against all available test cases."""
|
|
102
|
+
load_env_file(env_file)
|
|
103
|
+
override_deepset_env_vars(workspace=workspace, api_key=api_key)
|
|
104
|
+
agent_cfg, benchmark_cfg = validate_and_setup_configs(
|
|
105
|
+
agent_config=agent_config,
|
|
106
|
+
test_case_base_dir=test_case_base_dir,
|
|
107
|
+
output_dir=output_dir,
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
typer.secho(
|
|
111
|
+
f"→ Running agent '{agent_cfg.display_name}' on all test cases (concurrency={concurrency})",
|
|
112
|
+
fg=typer.colors.GREEN,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
try:
|
|
116
|
+
results, summary = run_agent_benchmark(
|
|
117
|
+
agent_config=agent_cfg,
|
|
118
|
+
test_case_name=None, # Run all
|
|
119
|
+
benchmark_config=benchmark_cfg,
|
|
120
|
+
concurrency=concurrency,
|
|
121
|
+
streaming=True,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Display summary statistics
|
|
125
|
+
typer.secho("\n📊 BENCHMARK SUMMARY", fg=typer.colors.BRIGHT_BLUE, bold=True)
|
|
126
|
+
typer.secho("=" * 50, fg=typer.colors.BLUE)
|
|
127
|
+
|
|
128
|
+
typer.secho(f"Tests Completed: {summary['tests_completed']}", fg=typer.colors.GREEN)
|
|
129
|
+
typer.secho(
|
|
130
|
+
f"Tests Failed: {summary['tests_failed']}",
|
|
131
|
+
fg=typer.colors.RED if summary["tests_failed"] > 0 else typer.colors.GREEN,
|
|
132
|
+
)
|
|
133
|
+
typer.secho(
|
|
134
|
+
f"Pass Rate: {summary['pass_rate_percent']:.1f}%",
|
|
135
|
+
fg=typer.colors.GREEN
|
|
136
|
+
if summary["pass_rate_percent"] > 80
|
|
137
|
+
else typer.colors.YELLOW
|
|
138
|
+
if summary["pass_rate_percent"] > 50
|
|
139
|
+
else typer.colors.RED,
|
|
140
|
+
)
|
|
141
|
+
typer.secho(
|
|
142
|
+
f"Fail Rate: {summary['fail_rate_percent']:.1f}%",
|
|
143
|
+
fg=typer.colors.RED
|
|
144
|
+
if summary["fail_rate_percent"] > 20
|
|
145
|
+
else typer.colors.YELLOW
|
|
146
|
+
if summary["fail_rate_percent"] > 0
|
|
147
|
+
else typer.colors.GREEN,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
typer.secho("\nToken Usage:", fg=typer.colors.CYAN)
|
|
151
|
+
typer.secho(f" Prompt Tokens: {summary['total_prompt_tokens']:,}", fg=typer.colors.CYAN)
|
|
152
|
+
typer.secho(f" Completion Tokens: {summary['total_completion_tokens']:,}", fg=typer.colors.CYAN)
|
|
153
|
+
typer.secho(
|
|
154
|
+
f" Total Tokens: {summary['total_prompt_tokens'] + summary['total_completion_tokens']:,}",
|
|
155
|
+
fg=typer.colors.CYAN,
|
|
156
|
+
)
|
|
157
|
+
typer.secho(f" Avg Tool Calls: {summary['avg_tool_calls']:.1f}", fg=typer.colors.CYAN)
|
|
158
|
+
|
|
159
|
+
# Display detailed results table
|
|
160
|
+
if results:
|
|
161
|
+
typer.secho("\n📋 DETAILED RESULTS", fg=typer.colors.BRIGHT_BLUE, bold=True)
|
|
162
|
+
typer.secho("=" * 120, fg=typer.colors.BLUE)
|
|
163
|
+
|
|
164
|
+
# Table header
|
|
165
|
+
header = (
|
|
166
|
+
f"{'Test Case':<25} {'Status':<8} {'Pre':<5} {'Post':<5} {'Tools':<6} {'P.Tokens':<9} "
|
|
167
|
+
f"{'C.Tokens':<9} {'Cleanup':<8}"
|
|
168
|
+
)
|
|
169
|
+
typer.secho(header, fg=typer.colors.BRIGHT_WHITE, bold=True)
|
|
170
|
+
typer.secho("-" * 120, fg=typer.colors.BLUE)
|
|
171
|
+
|
|
172
|
+
# Table rows
|
|
173
|
+
for result in results:
|
|
174
|
+
test_case = result["test_case"][:24] # Truncate long names
|
|
175
|
+
status = result["status"]
|
|
176
|
+
|
|
177
|
+
if status == "success":
|
|
178
|
+
processed_data = result["processed_data"]
|
|
179
|
+
stats = processed_data["messages"]["stats"]
|
|
180
|
+
validation = processed_data["validation"]
|
|
181
|
+
|
|
182
|
+
pre_val = validation["pre_validation"] or "N/A"
|
|
183
|
+
post_val = validation["post_validation"] or "N/A"
|
|
184
|
+
tool_calls = stats["total_tool_calls"]
|
|
185
|
+
prompt_tokens = stats["total_prompt_tokens"]
|
|
186
|
+
completion_tokens = stats["total_completion_tokens"]
|
|
187
|
+
cleanup_status = result.get("cleanup_status", "N/A")
|
|
188
|
+
|
|
189
|
+
# Color coding for validation
|
|
190
|
+
pre_color = (
|
|
191
|
+
typer.colors.RED
|
|
192
|
+
if pre_val == "FAIL"
|
|
193
|
+
else typer.colors.GREEN
|
|
194
|
+
if pre_val == "PASS"
|
|
195
|
+
else typer.colors.WHITE
|
|
196
|
+
)
|
|
197
|
+
post_color = (
|
|
198
|
+
typer.colors.GREEN
|
|
199
|
+
if post_val == "PASS"
|
|
200
|
+
else typer.colors.RED
|
|
201
|
+
if post_val == "FAIL"
|
|
202
|
+
else typer.colors.WHITE
|
|
203
|
+
)
|
|
204
|
+
cleanup_color = (
|
|
205
|
+
typer.colors.GREEN
|
|
206
|
+
if cleanup_status == "success"
|
|
207
|
+
else typer.colors.RED
|
|
208
|
+
if cleanup_status == "error"
|
|
209
|
+
else typer.colors.WHITE
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Format the row
|
|
213
|
+
row = f"{test_case:<25} "
|
|
214
|
+
typer.echo(row, nl=False)
|
|
215
|
+
typer.secho("SUCCESS ", fg=typer.colors.GREEN, nl=False)
|
|
216
|
+
typer.secho(f"{pre_val:<5} ", fg=pre_color, nl=False)
|
|
217
|
+
typer.secho(f"{post_val:<5} ", fg=post_color, nl=False)
|
|
218
|
+
typer.echo(f"{tool_calls:<6} {prompt_tokens:<9} {completion_tokens:<9} ", nl=False)
|
|
219
|
+
typer.secho(f"{cleanup_status:<8}", fg=cleanup_color)
|
|
220
|
+
|
|
221
|
+
else:
|
|
222
|
+
# Error case
|
|
223
|
+
error_msg = result.get("error", "Unknown error")[:30]
|
|
224
|
+
cleanup_status = result.get("cleanup_status", "N/A")
|
|
225
|
+
cleanup_color = (
|
|
226
|
+
typer.colors.GREEN
|
|
227
|
+
if cleanup_status == "success"
|
|
228
|
+
else typer.colors.RED
|
|
229
|
+
if cleanup_status == "error"
|
|
230
|
+
else typer.colors.WHITE
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
row = f"{test_case:<25} "
|
|
234
|
+
typer.echo(row, nl=False)
|
|
235
|
+
typer.secho(f"ERROR {error_msg}", fg=typer.colors.RED, nl=False)
|
|
236
|
+
typer.echo(f"{'N/A':<5} {'N/A':<5} {'N/A':<6} {'N/A':<9} {'N/A':<9} ", nl=False)
|
|
237
|
+
typer.secho(f"{cleanup_status:<8}", fg=cleanup_color)
|
|
238
|
+
|
|
239
|
+
# Show output directory
|
|
240
|
+
if results and results[0].get("output_dir"):
|
|
241
|
+
example_output = results[0]["output_dir"]
|
|
242
|
+
base_dir = str(Path(example_output).parent)
|
|
243
|
+
typer.secho(f"\n💾 Results saved to: {base_dir}", fg=typer.colors.MAGENTA)
|
|
244
|
+
|
|
245
|
+
# Show failed test details if any
|
|
246
|
+
failed_results = [r for r in results if r["status"] == "error"]
|
|
247
|
+
if failed_results:
|
|
248
|
+
typer.secho("\n❌ FAILED TESTS DETAILS", fg=typer.colors.RED, bold=True)
|
|
249
|
+
typer.secho("-" * 50, fg=typer.colors.RED)
|
|
250
|
+
for result in failed_results:
|
|
251
|
+
typer.secho(f" • {result['test_case']}: {result.get('error', 'Unknown error')}", fg=typer.colors.RED)
|
|
252
|
+
|
|
253
|
+
# Check for cleanup issues
|
|
254
|
+
cleanup_issues = [r for r in results if r.get("cleanup_status") == "error"]
|
|
255
|
+
if cleanup_issues:
|
|
256
|
+
typer.secho("\n⚠️ CLEANUP ISSUES", fg=typer.colors.YELLOW, bold=True)
|
|
257
|
+
typer.secho("-" * 50, fg=typer.colors.YELLOW)
|
|
258
|
+
for result in cleanup_issues:
|
|
259
|
+
typer.secho(
|
|
260
|
+
f" • {result['test_case']}: {result.get('cleanup_error', 'Unknown cleanup error')}",
|
|
261
|
+
fg=typer.colors.YELLOW,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
typer.secho("\n✅ Benchmark completed successfully!", fg=typer.colors.GREEN, bold=True)
|
|
265
|
+
|
|
266
|
+
# Exit with error code if any tests failed
|
|
267
|
+
if summary["tests_failed"] > 0:
|
|
268
|
+
raise typer.Exit(code=1)
|
|
269
|
+
|
|
270
|
+
except Exception as e:
|
|
271
|
+
typer.secho(f"✘ Error running benchmarks: {e}", fg=typer.colors.RED)
|
|
272
|
+
raise typer.Exit(code=1)
|
|
273
|
+
|
|
274
|
+
|
|
275
|
+
@agent_app.command("check-env")
|
|
276
|
+
def check_environment(
|
|
277
|
+
agent_config: str = typer.Argument(..., help="Path to agent configuration file."),
|
|
278
|
+
env_file: str | None = typer.Option(None, "--env-file", "-e", help="Path to environment file."),
|
|
279
|
+
) -> None:
|
|
280
|
+
"""Check if environment variables are configured correctly for an agent to run."""
|
|
281
|
+
load_env_file(env_file)
|
|
282
|
+
|
|
283
|
+
# Try to load base config
|
|
284
|
+
try:
|
|
285
|
+
benchmark_config = BenchmarkConfig()
|
|
286
|
+
typer.secho("✓ Base configuration loaded", fg=typer.colors.GREEN)
|
|
287
|
+
except ValidationError as e:
|
|
288
|
+
typer.secho("✗ Base configuration missing:", fg=typer.colors.RED)
|
|
289
|
+
for error in e.errors():
|
|
290
|
+
field = str(error["loc"][0]) if error["loc"] else "unknown"
|
|
291
|
+
typer.secho(f" - {field.upper()}", fg=typer.colors.RED)
|
|
292
|
+
raise typer.Exit(1)
|
|
293
|
+
|
|
294
|
+
# Load agent config
|
|
295
|
+
try:
|
|
296
|
+
agent_cfg = AgentConfig.from_file(Path(agent_config))
|
|
297
|
+
except Exception as e:
|
|
298
|
+
typer.secho(f"✗ Failed to load agent config: {e}", fg=typer.colors.RED)
|
|
299
|
+
raise typer.Exit(1)
|
|
300
|
+
|
|
301
|
+
typer.secho(f"\nEnvironment check for: {agent_cfg.display_name}", fg=typer.colors.BLUE)
|
|
302
|
+
typer.secho("=" * 50, fg=typer.colors.BLUE)
|
|
303
|
+
|
|
304
|
+
# Show core configuration
|
|
305
|
+
typer.secho("\nCore configuration:", fg=typer.colors.YELLOW)
|
|
306
|
+
typer.secho(f" ✓ DEEPSET_WORKSPACE = {benchmark_config.deepset_workspace}", fg=typer.colors.GREEN)
|
|
307
|
+
typer.secho(f" ✓ DEEPSET_API_KEY = {'*' * 8}...", fg=typer.colors.GREEN)
|
|
308
|
+
|
|
309
|
+
# Try to load agent to discover requirements
|
|
310
|
+
typer.secho("\nAgent requirements:", fg=typer.colors.YELLOW)
|
|
311
|
+
is_valid, missing = benchmark_config.check_required_env_vars(agent_cfg.required_env_vars)
|
|
312
|
+
|
|
313
|
+
if not is_valid:
|
|
314
|
+
typer.secho(f"\n✗ Missing required variables: {', '.join(missing)}", fg=typer.colors.RED)
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
@agent_app.command("validate-config")
|
|
318
|
+
def validate_agent_config(
|
|
319
|
+
agent_config: str = typer.Argument(..., help="Path to agent configuration file to validate."),
|
|
320
|
+
) -> None:
|
|
321
|
+
"""Validate an agent configuration file."""
|
|
322
|
+
agent_config_path = Path(agent_config)
|
|
323
|
+
if not agent_config_path.exists():
|
|
324
|
+
typer.secho(f"Agent config file not found: {agent_config}", fg=typer.colors.RED)
|
|
325
|
+
raise typer.Exit(code=1)
|
|
326
|
+
|
|
327
|
+
try:
|
|
328
|
+
config = AgentConfig.from_file(agent_config_path)
|
|
329
|
+
typer.secho("✔ Agent config is valid", fg=typer.colors.GREEN)
|
|
330
|
+
typer.secho(f" Display name: {config.display_name}", fg=typer.colors.BLUE)
|
|
331
|
+
|
|
332
|
+
if config.agent_factory_function:
|
|
333
|
+
typer.secho(f" Type: Function-based ({config.agent_factory_function})", fg=typer.colors.BLUE)
|
|
334
|
+
elif config.agent_json:
|
|
335
|
+
typer.secho(f" Type: JSON-based ({config.agent_json})", fg=typer.colors.BLUE)
|
|
336
|
+
|
|
337
|
+
if config.required_env_vars:
|
|
338
|
+
typer.secho(f" Declared env vars: {', '.join(config.required_env_vars)}", fg=typer.colors.BLUE)
|
|
339
|
+
|
|
340
|
+
except Exception as e:
|
|
341
|
+
typer.secho(f"✘ Invalid agent config: {e}", fg=typer.colors.RED)
|
|
342
|
+
raise typer.Exit(code=1)
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@agent_app.command("chat")
|
|
346
|
+
def chat_with_agent(
|
|
347
|
+
agent_config: str = typer.Argument(
|
|
348
|
+
default=str(Path(__file__).parent.parent / "agent_configs/debugging_agent.yml"),
|
|
349
|
+
help="Path to agent configuration file (YAML).",
|
|
350
|
+
),
|
|
351
|
+
workspace: str | None = typer.Option(None, "--workspace", "-w", help="Override Deepset workspace."),
|
|
352
|
+
api_key: str | None = typer.Option(None, "--api-key", "-k", help="Override Deepset API key."),
|
|
353
|
+
env_file: str | None = typer.Option(None, "--env-file", "-e", help="Path to environment file."),
|
|
354
|
+
) -> None:
|
|
355
|
+
"""Start an interactive REPL session with an agent."""
|
|
356
|
+
load_env_file(env_file)
|
|
357
|
+
override_deepset_env_vars(workspace=workspace, api_key=api_key)
|
|
358
|
+
agent_cfg, benchmark_cfg = validate_and_setup_configs(
|
|
359
|
+
agent_config=agent_config,
|
|
360
|
+
test_case_base_dir=None,
|
|
361
|
+
output_dir=None,
|
|
362
|
+
)
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
run_repl_session(agent_config=agent_cfg, benchmark_config=benchmark_cfg)
|
|
366
|
+
except Exception as e:
|
|
367
|
+
typer.secho(f"✘ Error during REPL session: {e}", fg=typer.colors.RED)
|
|
368
|
+
raise typer.Exit(code=1)
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def create_agents_app() -> typer.Typer:
|
|
372
|
+
"""Create the agents CLI app."""
|
|
373
|
+
return agent_app
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from deepset_mcp.benchmark.runner.setup_actions import setup_index
|
|
4
|
+
from deepset_mcp.benchmark.runner.teardown_actions import teardown_index
|
|
5
|
+
|
|
6
|
+
index_app = typer.Typer(help="Commands for creating and deleting indexes.")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@index_app.command("create")
|
|
10
|
+
def create_index(
|
|
11
|
+
yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to an index YAML file."),
|
|
12
|
+
yaml_content: str | None = typer.Option(None, "--content", "-c", help="Raw YAML string for the index."),
|
|
13
|
+
index_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new index."),
|
|
14
|
+
workspace_name: str = typer.Option("default", "--workspace", "-w", help="Workspace in which to create the index."),
|
|
15
|
+
api_key: str | None = typer.Option(
|
|
16
|
+
None,
|
|
17
|
+
"--api-key",
|
|
18
|
+
"-k",
|
|
19
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
20
|
+
),
|
|
21
|
+
description: str | None = typer.Option(None, "--desc", help="Optional description for the index."),
|
|
22
|
+
) -> None:
|
|
23
|
+
"""Create a single index from a yaml configuration."""
|
|
24
|
+
if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
|
|
25
|
+
typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
|
|
26
|
+
raise typer.Exit(code=1)
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
setup_index(
|
|
30
|
+
yaml_path=yaml_path,
|
|
31
|
+
yaml_content=yaml_content,
|
|
32
|
+
index_name=index_name,
|
|
33
|
+
workspace_name=workspace_name,
|
|
34
|
+
api_key=api_key,
|
|
35
|
+
description=description,
|
|
36
|
+
)
|
|
37
|
+
typer.secho(f"✔ Index '{index_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
|
|
38
|
+
except Exception as e:
|
|
39
|
+
typer.secho(f"✘ Failed to create index '{index_name}': {e}", fg=typer.colors.RED)
|
|
40
|
+
raise typer.Exit(code=1)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@index_app.command("delete")
|
|
44
|
+
def delete_index(
|
|
45
|
+
index_name: str = typer.Option(..., "--name", "-n", help="Name of the index to delete."),
|
|
46
|
+
workspace_name: str = typer.Option(
|
|
47
|
+
"default", "--workspace", "-w", help="Workspace from which to delete the index."
|
|
48
|
+
),
|
|
49
|
+
api_key: str | None = typer.Option(
|
|
50
|
+
None,
|
|
51
|
+
"--api-key",
|
|
52
|
+
"-k",
|
|
53
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
54
|
+
),
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Delete a single index by name."""
|
|
57
|
+
try:
|
|
58
|
+
teardown_index(
|
|
59
|
+
index_name=index_name,
|
|
60
|
+
workspace_name=workspace_name,
|
|
61
|
+
api_key=api_key,
|
|
62
|
+
)
|
|
63
|
+
typer.secho(f"✔ Index '{index_name}' deleted from '{workspace_name}'.", fg=typer.colors.GREEN)
|
|
64
|
+
except Exception as e:
|
|
65
|
+
typer.secho(f"✘ Failed to delete index '{index_name}': {e}", fg=typer.colors.RED)
|
|
66
|
+
raise typer.Exit(code=1)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_index_app() -> typer.Typer:
|
|
70
|
+
"""Create the index CLI app."""
|
|
71
|
+
return index_app
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import typer
|
|
2
|
+
|
|
3
|
+
from deepset_mcp.benchmark.runner.setup_actions import setup_pipeline
|
|
4
|
+
from deepset_mcp.benchmark.runner.teardown_actions import teardown_pipeline
|
|
5
|
+
|
|
6
|
+
pipeline_app = typer.Typer(help="Commands for creating and deleting pipelines.")
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pipeline_app.command("create")
|
|
10
|
+
def create_pipe(
|
|
11
|
+
yaml_path: str | None = typer.Option(None, "--path", "-p", help="Path to a pipeline YAML file."),
|
|
12
|
+
yaml_content: str | None = typer.Option(
|
|
13
|
+
None, "--content", "-c", help="Raw YAML string for the pipeline (instead of a file)."
|
|
14
|
+
),
|
|
15
|
+
pipeline_name: str = typer.Option(..., "--name", "-n", help="Name to assign to the new pipeline."),
|
|
16
|
+
workspace_name: str = typer.Option(
|
|
17
|
+
"default", "--workspace", "-w", help="Workspace in which to create the pipeline."
|
|
18
|
+
),
|
|
19
|
+
api_key: str | None = typer.Option(
|
|
20
|
+
None,
|
|
21
|
+
"--api-key",
|
|
22
|
+
"-k",
|
|
23
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
24
|
+
),
|
|
25
|
+
) -> None:
|
|
26
|
+
"""Create a single pipeline from a yaml configuration."""
|
|
27
|
+
if (yaml_path and yaml_content) or (not yaml_path and not yaml_content):
|
|
28
|
+
typer.secho("Error: exactly one of `--path` or `--content` must be provided.", fg=typer.colors.RED)
|
|
29
|
+
raise typer.Exit(code=1)
|
|
30
|
+
|
|
31
|
+
try:
|
|
32
|
+
setup_pipeline(
|
|
33
|
+
yaml_path=yaml_path,
|
|
34
|
+
yaml_content=yaml_content,
|
|
35
|
+
pipeline_name=pipeline_name,
|
|
36
|
+
workspace_name=workspace_name,
|
|
37
|
+
api_key=api_key,
|
|
38
|
+
)
|
|
39
|
+
typer.secho(f"✔ Pipeline '{pipeline_name}' created in '{workspace_name}'.", fg=typer.colors.GREEN)
|
|
40
|
+
except Exception as e:
|
|
41
|
+
typer.secho(f"✘ Failed to create pipeline '{pipeline_name}': {e}", fg=typer.colors.RED)
|
|
42
|
+
raise typer.Exit(code=1)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@pipeline_app.command("delete")
|
|
46
|
+
def delete_pipe(
|
|
47
|
+
pipeline_name: str = typer.Option(..., "--name", "-n", help="Name of the pipeline to delete."),
|
|
48
|
+
workspace_name: str = typer.Option(
|
|
49
|
+
"default", "--workspace", "-w", help="Workspace from which to delete the pipeline."
|
|
50
|
+
),
|
|
51
|
+
api_key: str | None = typer.Option(
|
|
52
|
+
None,
|
|
53
|
+
"--api-key",
|
|
54
|
+
"-k",
|
|
55
|
+
help="Explicit DP_API_KEY to use (overrides environment).",
|
|
56
|
+
),
|
|
57
|
+
) -> None:
|
|
58
|
+
"""Delete a single pipeline from `workspace_name`."""
|
|
59
|
+
try:
|
|
60
|
+
teardown_pipeline(
|
|
61
|
+
pipeline_name=pipeline_name,
|
|
62
|
+
workspace_name=workspace_name,
|
|
63
|
+
api_key=api_key,
|
|
64
|
+
)
|
|
65
|
+
typer.secho(f"✔ Pipeline '{pipeline_name}' deleted from '{workspace_name}'.", fg=typer.colors.GREEN)
|
|
66
|
+
except Exception as e:
|
|
67
|
+
typer.secho(f"✘ Failed to delete pipeline '{pipeline_name}': {e}", fg=typer.colors.RED)
|
|
68
|
+
raise typer.Exit(code=1)
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def create_pipeline_app() -> typer.Typer:
|
|
72
|
+
"""Create the agent benchmark CLI app."""
|
|
73
|
+
return pipeline_app
|