deepset-mcp 0.0.2rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepset_mcp/__init__.py +0 -0
- deepset_mcp/agents/__init__.py +0 -0
- deepset_mcp/agents/debugging/__init__.py +0 -0
- deepset_mcp/agents/debugging/debugging_agent.py +37 -0
- deepset_mcp/agents/debugging/system_prompt.md +214 -0
- deepset_mcp/agents/generalist/__init__.py +0 -0
- deepset_mcp/agents/generalist/generalist_agent.py +38 -0
- deepset_mcp/agents/generalist/system_prompt.md +241 -0
- deepset_mcp/api/README.md +536 -0
- deepset_mcp/api/__init__.py +0 -0
- deepset_mcp/api/client.py +277 -0
- deepset_mcp/api/custom_components/__init__.py +0 -0
- deepset_mcp/api/custom_components/models.py +25 -0
- deepset_mcp/api/custom_components/protocols.py +17 -0
- deepset_mcp/api/custom_components/resource.py +56 -0
- deepset_mcp/api/exceptions.py +70 -0
- deepset_mcp/api/haystack_service/__init__.py +0 -0
- deepset_mcp/api/haystack_service/protocols.py +13 -0
- deepset_mcp/api/haystack_service/resource.py +55 -0
- deepset_mcp/api/indexes/__init__.py +0 -0
- deepset_mcp/api/indexes/models.py +63 -0
- deepset_mcp/api/indexes/protocols.py +53 -0
- deepset_mcp/api/indexes/resource.py +138 -0
- deepset_mcp/api/integrations/__init__.py +1 -0
- deepset_mcp/api/integrations/models.py +49 -0
- deepset_mcp/api/integrations/protocols.py +27 -0
- deepset_mcp/api/integrations/resource.py +57 -0
- deepset_mcp/api/pipeline/__init__.py +17 -0
- deepset_mcp/api/pipeline/log_level.py +9 -0
- deepset_mcp/api/pipeline/models.py +235 -0
- deepset_mcp/api/pipeline/protocols.py +83 -0
- deepset_mcp/api/pipeline/resource.py +378 -0
- deepset_mcp/api/pipeline_template/__init__.py +0 -0
- deepset_mcp/api/pipeline_template/models.py +56 -0
- deepset_mcp/api/pipeline_template/protocols.py +17 -0
- deepset_mcp/api/pipeline_template/resource.py +88 -0
- deepset_mcp/api/protocols.py +122 -0
- deepset_mcp/api/secrets/__init__.py +0 -0
- deepset_mcp/api/secrets/models.py +16 -0
- deepset_mcp/api/secrets/protocols.py +29 -0
- deepset_mcp/api/secrets/resource.py +112 -0
- deepset_mcp/api/shared_models.py +17 -0
- deepset_mcp/api/transport.py +336 -0
- deepset_mcp/api/user/__init__.py +0 -0
- deepset_mcp/api/user/protocols.py +11 -0
- deepset_mcp/api/user/resource.py +38 -0
- deepset_mcp/api/workspace/__init__.py +7 -0
- deepset_mcp/api/workspace/models.py +23 -0
- deepset_mcp/api/workspace/protocols.py +41 -0
- deepset_mcp/api/workspace/resource.py +94 -0
- deepset_mcp/benchmark/README.md +425 -0
- deepset_mcp/benchmark/__init__.py +1 -0
- deepset_mcp/benchmark/agent_configs/debugging_agent.yml +10 -0
- deepset_mcp/benchmark/agent_configs/generalist_agent.yml +6 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +757 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +167 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +213 -0
- deepset_mcp/benchmark/runner/__init__.py +0 -0
- deepset_mcp/benchmark/runner/agent_benchmark_runner.py +561 -0
- deepset_mcp/benchmark/runner/agent_loader.py +110 -0
- deepset_mcp/benchmark/runner/cli.py +39 -0
- deepset_mcp/benchmark/runner/cli_agent.py +373 -0
- deepset_mcp/benchmark/runner/cli_index.py +71 -0
- deepset_mcp/benchmark/runner/cli_pipeline.py +73 -0
- deepset_mcp/benchmark/runner/cli_tests.py +226 -0
- deepset_mcp/benchmark/runner/cli_utils.py +61 -0
- deepset_mcp/benchmark/runner/config.py +73 -0
- deepset_mcp/benchmark/runner/config_loader.py +64 -0
- deepset_mcp/benchmark/runner/interactive.py +140 -0
- deepset_mcp/benchmark/runner/models.py +203 -0
- deepset_mcp/benchmark/runner/repl.py +67 -0
- deepset_mcp/benchmark/runner/setup_actions.py +238 -0
- deepset_mcp/benchmark/runner/streaming.py +360 -0
- deepset_mcp/benchmark/runner/teardown_actions.py +196 -0
- deepset_mcp/benchmark/runner/tracing.py +21 -0
- deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +16 -0
- deepset_mcp/benchmark/tasks/documents_output_wrong.yml +13 -0
- deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +11 -0
- deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +11 -0
- deepset_mcp/benchmark/tasks/missing_output_mapping.yml +14 -0
- deepset_mcp/benchmark/tasks/no_query_input.yml +13 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +141 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +181 -0
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +189 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +193 -0
- deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +191 -0
- deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +167 -0
- deepset_mcp/initialize_embedding_model.py +12 -0
- deepset_mcp/main.py +133 -0
- deepset_mcp/prompts/deepset_copilot_prompt.md +271 -0
- deepset_mcp/prompts/deepset_debugging_agent.md +214 -0
- deepset_mcp/store.py +5 -0
- deepset_mcp/tool_factory.py +473 -0
- deepset_mcp/tools/__init__.py +0 -0
- deepset_mcp/tools/custom_components.py +52 -0
- deepset_mcp/tools/doc_search.py +83 -0
- deepset_mcp/tools/haystack_service.py +358 -0
- deepset_mcp/tools/haystack_service_models.py +97 -0
- deepset_mcp/tools/indexes.py +129 -0
- deepset_mcp/tools/model_protocol.py +16 -0
- deepset_mcp/tools/pipeline.py +335 -0
- deepset_mcp/tools/pipeline_template.py +116 -0
- deepset_mcp/tools/secrets.py +45 -0
- deepset_mcp/tools/tokonomics/__init__.py +73 -0
- deepset_mcp/tools/tokonomics/decorators.py +396 -0
- deepset_mcp/tools/tokonomics/explorer.py +347 -0
- deepset_mcp/tools/tokonomics/object_store.py +177 -0
- deepset_mcp/tools/workspace.py +61 -0
- deepset_mcp-0.0.2rc1.dist-info/METADATA +292 -0
- deepset_mcp-0.0.2rc1.dist-info/RECORD +114 -0
- deepset_mcp-0.0.2rc1.dist-info/WHEEL +4 -0
- deepset_mcp-0.0.2rc1.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Resource implementation for workspace API."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from deepset_mcp.api.shared_models import NoContentResponse
|
|
7
|
+
from deepset_mcp.api.transport import raise_for_status
|
|
8
|
+
from deepset_mcp.api.workspace.models import Workspace, WorkspaceList
|
|
9
|
+
from deepset_mcp.api.workspace.protocols import WorkspaceResourceProtocol
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from deepset_mcp.api.protocols import AsyncClientProtocol
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class WorkspaceResource(WorkspaceResourceProtocol):
|
|
18
|
+
"""Manages interactions with the deepset workspace API."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, client: "AsyncClientProtocol") -> None:
|
|
21
|
+
"""Initialize a WorkspaceResource instance.
|
|
22
|
+
|
|
23
|
+
:param client: The async client protocol instance.
|
|
24
|
+
"""
|
|
25
|
+
self._client = client
|
|
26
|
+
|
|
27
|
+
async def list(self) -> WorkspaceList:
|
|
28
|
+
"""List all workspaces.
|
|
29
|
+
|
|
30
|
+
:returns: A WorkspaceList containing all workspaces.
|
|
31
|
+
"""
|
|
32
|
+
resp = await self._client.request(
|
|
33
|
+
endpoint="v1/workspaces",
|
|
34
|
+
method="GET",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
raise_for_status(resp)
|
|
38
|
+
|
|
39
|
+
if resp.json is not None and isinstance(resp.json, list):
|
|
40
|
+
workspaces = [Workspace.model_validate(item) for item in resp.json]
|
|
41
|
+
return WorkspaceList(
|
|
42
|
+
data=workspaces,
|
|
43
|
+
has_more=False,
|
|
44
|
+
total=len(workspaces),
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
return WorkspaceList(data=[], has_more=False, total=0)
|
|
48
|
+
|
|
49
|
+
async def get(self, workspace_name: str) -> Workspace:
|
|
50
|
+
"""Get a specific workspace by name.
|
|
51
|
+
|
|
52
|
+
:param workspace_name: Name of the workspace to fetch.
|
|
53
|
+
:returns: A Workspace instance.
|
|
54
|
+
"""
|
|
55
|
+
resp = await self._client.request(
|
|
56
|
+
endpoint=f"v1/workspaces/{workspace_name}",
|
|
57
|
+
method="GET",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
raise_for_status(resp)
|
|
61
|
+
|
|
62
|
+
return Workspace.model_validate(resp.json)
|
|
63
|
+
|
|
64
|
+
async def create(self, name: str) -> NoContentResponse:
|
|
65
|
+
"""Create a new workspace.
|
|
66
|
+
|
|
67
|
+
:param name: Name of the new workspace.
|
|
68
|
+
:returns: NoContentResponse indicating successful creation.
|
|
69
|
+
"""
|
|
70
|
+
data = {"name": name}
|
|
71
|
+
resp = await self._client.request(
|
|
72
|
+
endpoint="v1/workspaces",
|
|
73
|
+
method="POST",
|
|
74
|
+
data=data,
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
raise_for_status(resp)
|
|
78
|
+
|
|
79
|
+
return NoContentResponse(message="Workspace created successfully.")
|
|
80
|
+
|
|
81
|
+
async def delete(self, workspace_name: str) -> NoContentResponse:
|
|
82
|
+
"""Delete a workspace.
|
|
83
|
+
|
|
84
|
+
:param workspace_name: Name of the workspace to delete.
|
|
85
|
+
:returns: NoContentResponse indicating successful deletion.
|
|
86
|
+
"""
|
|
87
|
+
resp = await self._client.request(
|
|
88
|
+
endpoint=f"v1/workspaces/{workspace_name}",
|
|
89
|
+
method="DELETE",
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
raise_for_status(resp)
|
|
93
|
+
|
|
94
|
+
return NoContentResponse(message="Workspace deleted successfully.")
|
|
@@ -0,0 +1,425 @@
|
|
|
1
|
+
# Deepset MCP Benchmark System
|
|
2
|
+
|
|
3
|
+
A comprehensive benchmarking and testing framework for the Deepset Cloud Platform that enables automated testing of AI agents against predefined test cases.
|
|
4
|
+
|
|
5
|
+
## Getting Started
|
|
6
|
+
|
|
7
|
+
### Prerequisites
|
|
8
|
+
|
|
9
|
+
- Python 3.11+
|
|
10
|
+
- Access to Deepset Cloud Platform
|
|
11
|
+
- Required environment variables:
|
|
12
|
+
- `DEEPSET_API_KEY`: Your Deepset Cloud API key
|
|
13
|
+
- `DEEPSET_WORKSPACE`: Your workspace name
|
|
14
|
+
|
|
15
|
+
### Installation
|
|
16
|
+
|
|
17
|
+
Install the benchmark dependencies:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install -e .[benchmark]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
For agent testing, also install:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install -e .[agents]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
### Quick Start
|
|
30
|
+
|
|
31
|
+
1. **Check your environment**:
|
|
32
|
+
```bash
|
|
33
|
+
deepset agent check-env agent_configs/generalist_agent.yml
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
2. **List available test cases**:
|
|
37
|
+
```bash
|
|
38
|
+
deepset test list
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
3. **Run a single test**:
|
|
42
|
+
```bash
|
|
43
|
+
deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Core Concepts
|
|
47
|
+
|
|
48
|
+
### Test Cases
|
|
49
|
+
|
|
50
|
+
Test cases define specific scenarios for testing agents. Each test case includes:
|
|
51
|
+
|
|
52
|
+
- **Pipeline configuration**: YAML files defining Haystack pipelines
|
|
53
|
+
- **Index configuration**: YAML files for document indexing
|
|
54
|
+
- **Test prompt**: The input message sent to the agent
|
|
55
|
+
- **Validation criteria**: Expected behavior and outputs
|
|
56
|
+
|
|
57
|
+
Test cases are stored as YAML files in `tasks/` directory.
|
|
58
|
+
|
|
59
|
+
### Agent Configurations
|
|
60
|
+
|
|
61
|
+
Agent configurations define how to instantiate and run AI agents. They specify:
|
|
62
|
+
|
|
63
|
+
- **Agent factory function**: Python function that creates the agent
|
|
64
|
+
- **Environment variables**: Required API keys and settings
|
|
65
|
+
- **Display name**: Human-readable identifier
|
|
66
|
+
|
|
67
|
+
### Pipelines and Indexes
|
|
68
|
+
|
|
69
|
+
- **Pipelines**: Define the processing workflow for queries and documents
|
|
70
|
+
- **Indexes**: Configure document storage and retrieval systems
|
|
71
|
+
- Both are managed as YAML configurations on the Deepset platform
|
|
72
|
+
|
|
73
|
+
## Tutorials
|
|
74
|
+
|
|
75
|
+
### Running Your First Benchmark
|
|
76
|
+
|
|
77
|
+
1. **Prepare your environment**:
|
|
78
|
+
```bash
|
|
79
|
+
export DEEPSET_API_KEY="your_api_key"
|
|
80
|
+
export DEEPSET_WORKSPACE="your_workspace"
|
|
81
|
+
export ANTHROPIC_API_KEY="your_anthropic_key"
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
2. **Validate your agent configuration**:
|
|
85
|
+
```bash
|
|
86
|
+
deepset agent validate-config agent_configs/generalist_agent.yml
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
3. **Run a single test case**:
|
|
90
|
+
```bash
|
|
91
|
+
deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
This will:
|
|
95
|
+
- Create necessary pipelines and indexes
|
|
96
|
+
- Run the agent against the test case
|
|
97
|
+
- Validate the results
|
|
98
|
+
- Clean up resources
|
|
99
|
+
- Save detailed results to disk
|
|
100
|
+
|
|
101
|
+
4. **View the results**:
|
|
102
|
+
Results are saved in `agent_runs/` directory with:
|
|
103
|
+
- Full message transcripts (`messages.json`)
|
|
104
|
+
- Performance metrics (`test_results.csv`)
|
|
105
|
+
- Pipeline configurations (`post_run_pipeline.yml`)
|
|
106
|
+
|
|
107
|
+
### Running Multiple Test Cases
|
|
108
|
+
|
|
109
|
+
Run all available test cases:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
deepset agent run-all agent_configs/generalist_agent.yml
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
With parallel execution:
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
deepset agent run-all agent_configs/generalist_agent.yml --concurrency 3
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Creating Test Cases
|
|
122
|
+
|
|
123
|
+
1. **Create a test case YAML file** in `tasks/`:
|
|
124
|
+
|
|
125
|
+
```yaml
|
|
126
|
+
name: "my_test_case"
|
|
127
|
+
objective: "Test pipeline validation"
|
|
128
|
+
prompt: "Please check my pipeline configuration"
|
|
129
|
+
query_yaml: "pipelines/my_pipeline.yml"
|
|
130
|
+
query_name: "test-pipeline"
|
|
131
|
+
index_yaml: "pipelines/my_index.yml"
|
|
132
|
+
index_name: "test-index"
|
|
133
|
+
tags:
|
|
134
|
+
- "validation"
|
|
135
|
+
- "debugging"
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
2. **Create the referenced pipeline files** in `tasks/pipelines/`
|
|
139
|
+
|
|
140
|
+
3. **Test your new case**:
|
|
141
|
+
```bash
|
|
142
|
+
deepset agent run agent_configs/generalist_agent.yml my_test_case
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## How-To Guides
|
|
146
|
+
|
|
147
|
+
### Managing Test Resources
|
|
148
|
+
|
|
149
|
+
#### Setup Test Cases Manually
|
|
150
|
+
|
|
151
|
+
Create all test case resources on the platform:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
deepset test setup-all --workspace your-workspace --concurrency 5
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Setup a specific test case:
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
deepset test setup my_test_case --workspace your-workspace
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
#### Cleanup Test Resources
|
|
164
|
+
|
|
165
|
+
Remove all test case resources:
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
deepset test teardown-all --workspace your-workspace
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
Remove a specific test case:
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
deepset test teardown my_test_case --workspace your-workspace
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Managing Pipelines and Indexes
|
|
178
|
+
|
|
179
|
+
#### Create Individual Resources
|
|
180
|
+
|
|
181
|
+
Create a pipeline from YAML file:
|
|
182
|
+
|
|
183
|
+
```bash
|
|
184
|
+
deepset pipeline create --path pipeline.yml --name my-pipeline --workspace your-workspace
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
Create an index from YAML content:
|
|
188
|
+
|
|
189
|
+
```bash
|
|
190
|
+
deepset index create --content "$(cat index.yml)" --name my-index --workspace your-workspace
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
#### Delete Resources
|
|
194
|
+
|
|
195
|
+
Delete a pipeline:
|
|
196
|
+
|
|
197
|
+
```bash
|
|
198
|
+
deepset pipeline delete --name my-pipeline --workspace your-workspace
|
|
199
|
+
```
|
|
200
|
+
|
|
201
|
+
Delete an index:
|
|
202
|
+
|
|
203
|
+
```bash
|
|
204
|
+
deepset index delete --name my-index --workspace your-workspace
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Environment Configuration
|
|
208
|
+
|
|
209
|
+
#### Using Environment Files
|
|
210
|
+
|
|
211
|
+
Create a `.env` file:
|
|
212
|
+
|
|
213
|
+
```bash
|
|
214
|
+
DEEPSET_API_KEY=your_api_key
|
|
215
|
+
DEEPSET_WORKSPACE=your_workspace
|
|
216
|
+
ANTHROPIC_API_KEY=your_anthropic_key
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
Use it with any command:
|
|
220
|
+
|
|
221
|
+
```bash
|
|
222
|
+
deepset agent run --env-file .env agent_configs/generalist_agent.yml test_case
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
#### Override Settings
|
|
226
|
+
|
|
227
|
+
Override workspace and API key:
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
231
|
+
--workspace different-workspace \
|
|
232
|
+
--api-key different-key
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Custom Output Directories
|
|
236
|
+
|
|
237
|
+
Specify where to save results:
|
|
238
|
+
|
|
239
|
+
```bash
|
|
240
|
+
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
241
|
+
--output-dir ./my_results
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
Specify test case directory:
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
248
|
+
--test-base-dir ./my_test_cases
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Debugging and Monitoring
|
|
252
|
+
|
|
253
|
+
#### Check Environment Variables
|
|
254
|
+
|
|
255
|
+
Verify all required environment variables are set:
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
deepset agent check-env agent_configs/generalist_agent.yml
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
#### Validate Configurations
|
|
262
|
+
|
|
263
|
+
Check agent configuration syntax:
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
deepset agent validate-config agent_configs/generalist_agent.yml
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
#### View Test Case Lists
|
|
270
|
+
|
|
271
|
+
List available test cases:
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
deepset test list --test-dir ./my_test_cases
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## Command Reference
|
|
278
|
+
|
|
279
|
+
### Agent Commands
|
|
280
|
+
|
|
281
|
+
- `deepset agent run` - Run agent against single test case
|
|
282
|
+
- `deepset agent run-all` - Run agent against all test cases
|
|
283
|
+
- `deepset agent check-env` - Verify environment configuration
|
|
284
|
+
- `deepset agent validate-config` - Validate agent configuration
|
|
285
|
+
|
|
286
|
+
### Test Management Commands
|
|
287
|
+
|
|
288
|
+
- `deepset test list` - List available test cases
|
|
289
|
+
- `deepset test setup` - Setup single test case resources
|
|
290
|
+
- `deepset test setup-all` - Setup all test case resources
|
|
291
|
+
- `deepset test teardown` - Remove single test case resources
|
|
292
|
+
- `deepset test teardown-all` - Remove all test case resources
|
|
293
|
+
|
|
294
|
+
### Pipeline Management Commands
|
|
295
|
+
|
|
296
|
+
- `deepset pipeline create` - Create new pipeline
|
|
297
|
+
- `deepset pipeline delete` - Delete existing pipeline
|
|
298
|
+
|
|
299
|
+
### Index Management Commands
|
|
300
|
+
|
|
301
|
+
- `deepset index create` - Create new index
|
|
302
|
+
- `deepset index delete` - Delete existing index
|
|
303
|
+
|
|
304
|
+
## Configuration Files
|
|
305
|
+
|
|
306
|
+
### Agent Configuration Format
|
|
307
|
+
|
|
308
|
+
```yaml
|
|
309
|
+
agent_factory_function: "module.path.to.get_agent"
|
|
310
|
+
display_name: "My Agent"
|
|
311
|
+
required_env_vars:
|
|
312
|
+
- DEEPSET_API_KEY
|
|
313
|
+
- DEEPSET_WORKSPACE
|
|
314
|
+
- ANTHROPIC_API_KEY
|
|
315
|
+
```
|
|
316
|
+
|
|
317
|
+
### Test Case Configuration Format
|
|
318
|
+
|
|
319
|
+
```yaml
|
|
320
|
+
name: "test_case_name"
|
|
321
|
+
objective: "Description of what this test validates"
|
|
322
|
+
prompt: "The message sent to the agent"
|
|
323
|
+
query_yaml: "relative/path/to/pipeline.yml" # Optional
|
|
324
|
+
query_name: "pipeline-name" # Required if query_yaml present
|
|
325
|
+
index_yaml: "relative/path/to/index.yml" # Optional
|
|
326
|
+
index_name: "index-name" # Required if index_yaml present
|
|
327
|
+
expected_query: "path/to/expected.yml" # Optional validation reference
|
|
328
|
+
tags:
|
|
329
|
+
- "category"
|
|
330
|
+
- "type"
|
|
331
|
+
judge_prompt: "Optional prompt for LLM validation" # Optional
|
|
332
|
+
```
|
|
333
|
+
|
|
334
|
+
## Result Analysis
|
|
335
|
+
|
|
336
|
+
### Understanding Output Files
|
|
337
|
+
|
|
338
|
+
Each test run produces:
|
|
339
|
+
|
|
340
|
+
1. **`messages.json`**: Complete conversation transcript with the agent
|
|
341
|
+
2. **`test_results.csv`**: Performance metrics and validation results
|
|
342
|
+
3. **`post_run_pipeline.yml`**: Final pipeline configuration after agent modifications
|
|
343
|
+
|
|
344
|
+
### Performance Metrics
|
|
345
|
+
|
|
346
|
+
The system tracks:
|
|
347
|
+
|
|
348
|
+
- **Token usage**: Prompt and completion tokens consumed
|
|
349
|
+
- **Tool calls**: Number of API calls made by the agent
|
|
350
|
+
- **Validation status**: Pre and post-run pipeline validation results
|
|
351
|
+
- **Model information**: Which AI model was used
|
|
352
|
+
|
|
353
|
+
### Aggregate Analysis
|
|
354
|
+
|
|
355
|
+
When running multiple test cases, the system provides:
|
|
356
|
+
|
|
357
|
+
- Success/failure counts
|
|
358
|
+
- Total resource consumption
|
|
359
|
+
- Per-test case breakdowns
|
|
360
|
+
- Cleanup status reports
|
|
361
|
+
|
|
362
|
+
## Troubleshooting
|
|
363
|
+
|
|
364
|
+
### Common Issues
|
|
365
|
+
|
|
366
|
+
**Environment variable errors**:
|
|
367
|
+
- Ensure all required variables are set
|
|
368
|
+
- Use `deepset agent check-env` to verify configuration
|
|
369
|
+
|
|
370
|
+
**Test case not found**:
|
|
371
|
+
- Check test case directory path
|
|
372
|
+
- Verify YAML file exists and is properly named
|
|
373
|
+
- Use `deepset test list` to see available cases
|
|
374
|
+
|
|
375
|
+
**Validation failures**:
|
|
376
|
+
- Review pipeline YAML syntax
|
|
377
|
+
- Check component type names and parameters
|
|
378
|
+
- Use Deepset Cloud UI to validate manually
|
|
379
|
+
|
|
380
|
+
**Resource conflicts**:
|
|
381
|
+
- Ensure unique names for pipelines and indexes
|
|
382
|
+
- Clean up existing resources before running tests
|
|
383
|
+
- Use different workspace for testing
|
|
384
|
+
|
|
385
|
+
**Permission errors**:
|
|
386
|
+
- Verify API key has sufficient permissions
|
|
387
|
+
- Check workspace access rights
|
|
388
|
+
- Confirm network connectivity to Deepset Cloud
|
|
389
|
+
|
|
390
|
+
### Getting Help
|
|
391
|
+
|
|
392
|
+
1. **Check logs**: Review detailed error messages in command output
|
|
393
|
+
2. **Validate configs**: Use validation commands before running tests
|
|
394
|
+
3. **Test incrementally**: Start with single test cases before batch runs
|
|
395
|
+
4. **Clean environment**: Remove conflicting resources and retry
|
|
396
|
+
|
|
397
|
+
## Best Practices
|
|
398
|
+
|
|
399
|
+
### Test Organization
|
|
400
|
+
|
|
401
|
+
- Use descriptive test case names with underscores
|
|
402
|
+
- Group related tests with consistent tag names
|
|
403
|
+
- Keep pipeline files organized in subdirectories
|
|
404
|
+
- Document test objectives clearly
|
|
405
|
+
|
|
406
|
+
### Resource Management
|
|
407
|
+
|
|
408
|
+
- Always clean up test resources after experiments
|
|
409
|
+
- Use unique names to avoid conflicts
|
|
410
|
+
- Prefer automated setup/teardown over manual management
|
|
411
|
+
- Monitor resource usage in your workspace
|
|
412
|
+
|
|
413
|
+
### Performance Optimization
|
|
414
|
+
|
|
415
|
+
- Use appropriate concurrency levels (start with 1-3)
|
|
416
|
+
- Set reasonable token limits for cost control
|
|
417
|
+
- Cache common pipeline configurations
|
|
418
|
+
- Run expensive tests separately from quick validation tests
|
|
419
|
+
|
|
420
|
+
### Environment Management
|
|
421
|
+
|
|
422
|
+
- Use environment files for consistent configuration
|
|
423
|
+
- Never commit API keys to version control
|
|
424
|
+
- Use different workspaces for development and testing
|
|
425
|
+
- Validate environment before important test runs
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
agent_factory_function: "deepset_mcp.agents.debugging.debugging_agent.get_agent"
|
|
2
|
+
display_name: "debugging-agent"
|
|
3
|
+
interactive: true
|
|
4
|
+
required_env_vars:
|
|
5
|
+
- DEEPSET_API_KEY
|
|
6
|
+
- DEEPSET_WORKSPACE
|
|
7
|
+
- ANTHROPIC_API_KEY
|
|
8
|
+
- DEEPSET_DOCS_WORKSPACE
|
|
9
|
+
- DEEPSET_DOCS_API_KEY
|
|
10
|
+
- DEEPSET_DOCS_PIPELINE_NAME
|
|
File without changes
|