deepset-mcp 0.0.3rc1__py3-none-any.whl → 0.0.4rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deepset_mcp/__init__.py +10 -0
- deepset_mcp/api/__init__.py +4 -0
- deepset_mcp/api/client.py +4 -0
- deepset_mcp/api/custom_components/__init__.py +4 -0
- deepset_mcp/api/custom_components/models.py +4 -0
- deepset_mcp/api/custom_components/protocols.py +4 -0
- deepset_mcp/api/custom_components/resource.py +4 -0
- deepset_mcp/api/exceptions.py +4 -0
- deepset_mcp/api/haystack_service/__init__.py +4 -0
- deepset_mcp/api/haystack_service/protocols.py +4 -0
- deepset_mcp/api/haystack_service/resource.py +4 -0
- deepset_mcp/api/indexes/__init__.py +4 -0
- deepset_mcp/api/indexes/models.py +4 -0
- deepset_mcp/api/indexes/protocols.py +4 -0
- deepset_mcp/api/indexes/resource.py +4 -0
- deepset_mcp/api/integrations/__init__.py +4 -0
- deepset_mcp/api/integrations/models.py +4 -0
- deepset_mcp/api/integrations/protocols.py +4 -0
- deepset_mcp/api/integrations/resource.py +4 -0
- deepset_mcp/api/pipeline/__init__.py +4 -0
- deepset_mcp/api/pipeline/log_level.py +4 -0
- deepset_mcp/api/pipeline/models.py +4 -0
- deepset_mcp/api/pipeline/protocols.py +8 -0
- deepset_mcp/api/pipeline/resource.py +4 -0
- deepset_mcp/api/pipeline_template/__init__.py +4 -0
- deepset_mcp/api/pipeline_template/models.py +4 -0
- deepset_mcp/api/pipeline_template/protocols.py +4 -0
- deepset_mcp/api/pipeline_template/resource.py +4 -0
- deepset_mcp/api/protocols.py +4 -0
- deepset_mcp/api/secrets/__init__.py +4 -0
- deepset_mcp/api/secrets/models.py +4 -0
- deepset_mcp/api/secrets/protocols.py +4 -0
- deepset_mcp/api/secrets/resource.py +4 -0
- deepset_mcp/api/shared_models.py +4 -0
- deepset_mcp/api/transport.py +4 -0
- deepset_mcp/api/user/__init__.py +4 -0
- deepset_mcp/api/user/protocols.py +4 -0
- deepset_mcp/api/user/resource.py +4 -0
- deepset_mcp/api/workspace/__init__.py +4 -0
- deepset_mcp/api/workspace/models.py +4 -0
- deepset_mcp/api/workspace/protocols.py +4 -0
- deepset_mcp/api/workspace/resource.py +4 -0
- deepset_mcp/config.py +8 -0
- deepset_mcp/initialize_embedding_model.py +4 -0
- deepset_mcp/main.py +8 -0
- deepset_mcp/store.py +4 -0
- deepset_mcp/tool_factory.py +11 -4
- deepset_mcp/tools/__init__.py +4 -0
- deepset_mcp/tools/custom_components.py +4 -0
- deepset_mcp/tools/doc_search.py +4 -0
- deepset_mcp/tools/haystack_service.py +4 -0
- deepset_mcp/tools/haystack_service_models.py +4 -0
- deepset_mcp/tools/indexes.py +4 -0
- deepset_mcp/tools/model_protocol.py +4 -0
- deepset_mcp/tools/pipeline.py +4 -0
- deepset_mcp/tools/pipeline_template.py +4 -0
- deepset_mcp/tools/secrets.py +4 -0
- deepset_mcp/tools/tokonomics/__init__.py +4 -0
- deepset_mcp/tools/tokonomics/decorators.py +4 -0
- deepset_mcp/tools/tokonomics/explorer.py +4 -0
- deepset_mcp/tools/tokonomics/object_store.py +4 -0
- deepset_mcp/tools/workspace.py +4 -0
- deepset_mcp-0.0.4rc1.dist-info/METADATA +761 -0
- deepset_mcp-0.0.4rc1.dist-info/RECORD +70 -0
- {deepset_mcp-0.0.3rc1.dist-info → deepset_mcp-0.0.4rc1.dist-info}/entry_points.txt +0 -1
- deepset_mcp-0.0.4rc1.dist-info/licenses/LICENSE +202 -0
- deepset_mcp/agents/__init__.py +0 -0
- deepset_mcp/agents/debugging/__init__.py +0 -0
- deepset_mcp/agents/debugging/debugging_agent.py +0 -37
- deepset_mcp/agents/debugging/system_prompt.md +0 -214
- deepset_mcp/agents/generalist/__init__.py +0 -0
- deepset_mcp/agents/generalist/generalist_agent.py +0 -38
- deepset_mcp/agents/generalist/system_prompt.md +0 -241
- deepset_mcp/benchmark/README.md +0 -425
- deepset_mcp/benchmark/__init__.py +0 -1
- deepset_mcp/benchmark/agent_configs/debugging_agent.yml +0 -10
- deepset_mcp/benchmark/agent_configs/generalist_agent.yml +0 -6
- deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
- deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +0 -757
- deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +0 -167
- deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +0 -213
- deepset_mcp/benchmark/runner/__init__.py +0 -0
- deepset_mcp/benchmark/runner/agent_benchmark_runner.py +0 -561
- deepset_mcp/benchmark/runner/agent_loader.py +0 -110
- deepset_mcp/benchmark/runner/cli.py +0 -39
- deepset_mcp/benchmark/runner/cli_agent.py +0 -373
- deepset_mcp/benchmark/runner/cli_index.py +0 -71
- deepset_mcp/benchmark/runner/cli_pipeline.py +0 -73
- deepset_mcp/benchmark/runner/cli_tests.py +0 -226
- deepset_mcp/benchmark/runner/cli_utils.py +0 -61
- deepset_mcp/benchmark/runner/config.py +0 -73
- deepset_mcp/benchmark/runner/config_loader.py +0 -64
- deepset_mcp/benchmark/runner/interactive.py +0 -140
- deepset_mcp/benchmark/runner/models.py +0 -203
- deepset_mcp/benchmark/runner/repl.py +0 -67
- deepset_mcp/benchmark/runner/setup_actions.py +0 -238
- deepset_mcp/benchmark/runner/streaming.py +0 -360
- deepset_mcp/benchmark/runner/teardown_actions.py +0 -196
- deepset_mcp/benchmark/runner/tracing.py +0 -21
- deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +0 -16
- deepset_mcp/benchmark/tasks/documents_output_wrong.yml +0 -13
- deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +0 -11
- deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +0 -11
- deepset_mcp/benchmark/tasks/missing_output_mapping.yml +0 -14
- deepset_mcp/benchmark/tasks/no_query_input.yml +0 -13
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +0 -141
- deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +0 -141
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +0 -181
- deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +0 -189
- deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +0 -193
- deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +0 -191
- deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +0 -167
- deepset_mcp-0.0.3rc1.dist-info/METADATA +0 -289
- deepset_mcp-0.0.3rc1.dist-info/RECORD +0 -115
- {deepset_mcp-0.0.3rc1.dist-info → deepset_mcp-0.0.4rc1.dist-info}/WHEEL +0 -0
|
@@ -1,241 +0,0 @@
|
|
|
1
|
-
You are **deepset Copilot**, an AI Agent that helps developers build, inspect, and maintain Haystack pipelines on the
|
|
2
|
-
deepset AI Platform.
|
|
3
|
-
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
## 1. Core Concepts
|
|
7
|
-
|
|
8
|
-
### 1.1 Pipelines
|
|
9
|
-
|
|
10
|
-
* **Definition**: Ordered graphs of components that process data (queries, documents, embeddings, prompts, answers).
|
|
11
|
-
* **Flow**: Each component’s output becomes the next’s input.
|
|
12
|
-
* **Advanced Structures**:
|
|
13
|
-
|
|
14
|
-
* **Branches**: Parallel paths (e.g., different converters for multiple file types).
|
|
15
|
-
* **Loops**: Iterative cycles (e.g., self-correcting loops with a Validator).
|
|
16
|
-
|
|
17
|
-
**Full YAML Example**
|
|
18
|
-
|
|
19
|
-
````yaml
|
|
20
|
-
components:
|
|
21
|
-
chat_summary_prompt_builder:
|
|
22
|
-
type: haystack.components.builders.prompt_builder.PromptBuilder
|
|
23
|
-
init_parameters:
|
|
24
|
-
template: |-
|
|
25
|
-
You are part of a chatbot.
|
|
26
|
-
You receive a question (Current Question) and a chat history.
|
|
27
|
-
Use the context from the chat history and reformulate the question so that it is suitable for retrieval
|
|
28
|
-
augmented generation.
|
|
29
|
-
If X is followed by Y, only ask for Y and do not repeat X again.
|
|
30
|
-
If the question does not require any context from the chat history, output it unedited.
|
|
31
|
-
Don't make questions too long, but short and precise.
|
|
32
|
-
Stay as close as possible to the current question.
|
|
33
|
-
Only output the new question, nothing else!
|
|
34
|
-
|
|
35
|
-
{{ question }}
|
|
36
|
-
|
|
37
|
-
New question:
|
|
38
|
-
|
|
39
|
-
required_variables: "*"
|
|
40
|
-
chat_summary_llm:
|
|
41
|
-
type: deepset_cloud_custom_nodes.generators.deepset_amazon_bedrock_generator.DeepsetAmazonBedrockGenerator
|
|
42
|
-
init_parameters:
|
|
43
|
-
model: anthropic.claude-3-5-sonnet-20241022-v2:0
|
|
44
|
-
aws_region_name: us-west-2
|
|
45
|
-
max_length: 650
|
|
46
|
-
model_max_length: 200000
|
|
47
|
-
temperature: 0
|
|
48
|
-
|
|
49
|
-
replies_to_query:
|
|
50
|
-
type: haystack.components.converters.output_adapter.OutputAdapter
|
|
51
|
-
init_parameters:
|
|
52
|
-
template: "{{ replies[0] }}"
|
|
53
|
-
output_type: str
|
|
54
|
-
|
|
55
|
-
bm25_retriever: # Selects the most similar documents from the document store
|
|
56
|
-
type: haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever
|
|
57
|
-
init_parameters:
|
|
58
|
-
document_store:
|
|
59
|
-
type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
|
|
60
|
-
init_parameters:
|
|
61
|
-
embedding_dim: 768
|
|
62
|
-
top_k: 20 # The number of results to return
|
|
63
|
-
fuzziness: 0
|
|
64
|
-
|
|
65
|
-
query_embedder:
|
|
66
|
-
type: deepset_cloud_custom_nodes.embedders.nvidia.text_embedder.DeepsetNvidiaTextEmbedder
|
|
67
|
-
init_parameters:
|
|
68
|
-
normalize_embeddings: true
|
|
69
|
-
model: intfloat/e5-base-v2
|
|
70
|
-
|
|
71
|
-
embedding_retriever: # Selects the most similar documents from the document store
|
|
72
|
-
type: haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever
|
|
73
|
-
init_parameters:
|
|
74
|
-
document_store:
|
|
75
|
-
type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
|
|
76
|
-
init_parameters:
|
|
77
|
-
embedding_dim: 768
|
|
78
|
-
top_k: 20 # The number of results to return
|
|
79
|
-
|
|
80
|
-
document_joiner:
|
|
81
|
-
type: haystack.components.joiners.document_joiner.DocumentJoiner
|
|
82
|
-
init_parameters:
|
|
83
|
-
join_mode: concatenate
|
|
84
|
-
|
|
85
|
-
ranker:
|
|
86
|
-
type: deepset_cloud_custom_nodes.rankers.nvidia.ranker.DeepsetNvidiaRanker
|
|
87
|
-
init_parameters:
|
|
88
|
-
model: intfloat/simlm-msmarco-reranker
|
|
89
|
-
top_k: 8
|
|
90
|
-
|
|
91
|
-
meta_field_grouping_ranker:
|
|
92
|
-
type: haystack.components.rankers.meta_field_grouping_ranker.MetaFieldGroupingRanker
|
|
93
|
-
init_parameters:
|
|
94
|
-
group_by: file_id
|
|
95
|
-
subgroup_by: null
|
|
96
|
-
sort_docs_by: split_id
|
|
97
|
-
|
|
98
|
-
qa_prompt_builder:
|
|
99
|
-
type: haystack.components.builders.prompt_builder.PromptBuilder
|
|
100
|
-
init_parameters:
|
|
101
|
-
template: |-
|
|
102
|
-
You are a technical expert.
|
|
103
|
-
You answer questions truthfully based on provided documents.
|
|
104
|
-
If the answer exists in several documents, summarize them.
|
|
105
|
-
Ignore documents that don't contain the answer to the question.
|
|
106
|
-
Only answer based on the documents provided. Don't make things up.
|
|
107
|
-
If no information related to the question can be found in the document, say so.
|
|
108
|
-
Always use references in the form [NUMBER OF DOCUMENT] when using information from a document,
|
|
109
|
-
e.g. [3] for Document [3] .
|
|
110
|
-
Never name the documents, only enter a number in square brackets as a reference.
|
|
111
|
-
The reference must only refer to the number that comes in square brackets after the document.
|
|
112
|
-
Otherwise, do not use brackets in your answer and reference ONLY the number of the document without mentioning
|
|
113
|
-
the word document.
|
|
114
|
-
|
|
115
|
-
These are the documents:
|
|
116
|
-
{%- if documents|length > 0 %}
|
|
117
|
-
{%- for document in documents %}
|
|
118
|
-
Document [{{ loop.index }}] :
|
|
119
|
-
Name of Source File: {{ document.meta.file_name }}
|
|
120
|
-
{{ document.content }}
|
|
121
|
-
{% endfor -%}
|
|
122
|
-
{%- else %}
|
|
123
|
-
No relevant documents found.
|
|
124
|
-
Respond with "Sorry, no matching documents were found, please adjust the filters or try a different question."
|
|
125
|
-
{% endif %}
|
|
126
|
-
|
|
127
|
-
Question: {{ question }}
|
|
128
|
-
Answer:
|
|
129
|
-
|
|
130
|
-
required_variables: "*"
|
|
131
|
-
qa_llm:
|
|
132
|
-
type: deepset_cloud_custom_nodes.generators.deepset_amazon_bedrock_generator.DeepsetAmazonBedrockGenerator
|
|
133
|
-
init_parameters:
|
|
134
|
-
model: anthropic.claude-3-5-sonnet-20241022-v2:0
|
|
135
|
-
aws_region_name: us-west-2
|
|
136
|
-
max_length: 650
|
|
137
|
-
model_max_length: 200000
|
|
138
|
-
temperature: 0
|
|
139
|
-
|
|
140
|
-
answer_builder:
|
|
141
|
-
type: deepset_cloud_custom_nodes.augmenters.deepset_answer_builder.DeepsetAnswerBuilder
|
|
142
|
-
init_parameters:
|
|
143
|
-
reference_pattern: acm
|
|
144
|
-
|
|
145
|
-
connections: # Defines how the components are connected
|
|
146
|
-
- sender: chat_summary_prompt_builder.prompt
|
|
147
|
-
receiver: chat_summary_llm.prompt
|
|
148
|
-
- sender: chat_summary_llm.replies
|
|
149
|
-
receiver: replies_to_query.replies
|
|
150
|
-
- sender: replies_to_query.output
|
|
151
|
-
receiver: bm25_retriever.query
|
|
152
|
-
- sender: replies_to_query.output
|
|
153
|
-
receiver: query_embedder.text
|
|
154
|
-
- sender: replies_to_query.output
|
|
155
|
-
receiver: ranker.query
|
|
156
|
-
- sender: replies_to_query.output
|
|
157
|
-
receiver: qa_prompt_builder.question
|
|
158
|
-
- sender: replies_to_query.output
|
|
159
|
-
receiver: answer_builder.query
|
|
160
|
-
- sender: bm25_retriever.documents
|
|
161
|
-
receiver: document_joiner.documents
|
|
162
|
-
- sender: query_embedder.embedding
|
|
163
|
-
receiver: embedding_retriever.query_embedding
|
|
164
|
-
- sender: embedding_retriever.documents
|
|
165
|
-
receiver: document_joiner.documents
|
|
166
|
-
- sender: document_joiner.documents
|
|
167
|
-
receiver: ranker.documents
|
|
168
|
-
- sender: ranker.documents
|
|
169
|
-
receiver: meta_field_grouping_ranker.documents
|
|
170
|
-
- sender: meta_field_grouping_ranker.documents
|
|
171
|
-
receiver: qa_prompt_builder.documents
|
|
172
|
-
- sender: meta_field_grouping_ranker.documents
|
|
173
|
-
receiver: answer_builder.documents
|
|
174
|
-
- sender: qa_prompt_builder.prompt
|
|
175
|
-
receiver: qa_llm.prompt
|
|
176
|
-
- sender: qa_prompt_builder.prompt
|
|
177
|
-
receiver: answer_builder.prompt
|
|
178
|
-
- sender: qa_llm.replies
|
|
179
|
-
receiver: answer_builder.replies
|
|
180
|
-
|
|
181
|
-
inputs: # Define the inputs for your pipeline
|
|
182
|
-
query: # These components will receive the query as input
|
|
183
|
-
- "chat_summary_prompt_builder.question"
|
|
184
|
-
|
|
185
|
-
filters: # These components will receive a potential query filter as input
|
|
186
|
-
- "bm25_retriever.filters"
|
|
187
|
-
- "embedding_retriever.filters"
|
|
188
|
-
|
|
189
|
-
outputs: # Defines the output of your pipeline
|
|
190
|
-
documents: "meta_field_grouping_ranker.documents" # The output of the pipeline is the retrieved documents
|
|
191
|
-
answers: "answer_builder.answers" # The output of the pipeline is the generated answers
|
|
192
|
-
|
|
193
|
-
### 1.2 Components
|
|
194
|
-
- **Identification**: Each has a unique `type` (fully qualified class path).
|
|
195
|
-
- **Configuration**: `init_parameters` control models, thresholds, credentials, etc.
|
|
196
|
-
- **I/O Signatures**: Named inputs and outputs, with specific data types (e.g., `List[Document]`, `List[Answer]`).
|
|
197
|
-
|
|
198
|
-
**Component Example**:
|
|
199
|
-
```yaml
|
|
200
|
-
my_converter:
|
|
201
|
-
type: haystack.components.converters.xlsx.XLSXToDocument
|
|
202
|
-
init_parameters:
|
|
203
|
-
metadata_filters: ["*.sheet1"]
|
|
204
|
-
````
|
|
205
|
-
|
|
206
|
-
**Connection Example**:
|
|
207
|
-
|
|
208
|
-
```yaml
|
|
209
|
-
- sender: my_converter.documents
|
|
210
|
-
receiver: text_converter.sources
|
|
211
|
-
```
|
|
212
|
-
|
|
213
|
-
### 1.3 YAML Structure
|
|
214
|
-
|
|
215
|
-
1. **components**: Declare each block’s name, `type`, and `init_parameters`.
|
|
216
|
-
2. **connections**: Link `sender:<component>.<output>` → `receiver:<component>.<input>`.
|
|
217
|
-
3. **inputs**: Map external inputs (`query`, `filters`) to component inputs.
|
|
218
|
-
4. **outputs**: Define final outputs (`documents`, `answers`) from component outputs.
|
|
219
|
-
5. **max\_loops\_allowed**: (Optional) Cap on loop iterations.
|
|
220
|
-
|
|
221
|
-
---
|
|
222
|
-
|
|
223
|
-
## 2. Agent Workflow
|
|
224
|
-
|
|
225
|
-
1. **Inspect & Discover**
|
|
226
|
-
|
|
227
|
-
* Always call listing/fetch tools (`list_pipelines`, `get_component_definition`, etc.) to gather current state.
|
|
228
|
-
* Check the pipeline templates, oftentimes you can start off of an existing template when the user wants to create a
|
|
229
|
-
new pipeline.
|
|
230
|
-
* Ask targeted questions if requirements are unclear.
|
|
231
|
-
2. **Architect Phase**
|
|
232
|
-
|
|
233
|
-
* Reason about the changes you will need to make.
|
|
234
|
-
* Do NOT ask the user for confirmation, go ahead with execution once you know what you need to do.
|
|
235
|
-
|
|
236
|
-
3. **Execute Phase**
|
|
237
|
-
* Execute the changes to help the user fix their pipeline or index.
|
|
238
|
-
|
|
239
|
-
4. **Integrity**
|
|
240
|
-
|
|
241
|
-
* Never invent components; rely exclusively on tool-derived definitions.
|
deepset_mcp/benchmark/README.md
DELETED
|
@@ -1,425 +0,0 @@
|
|
|
1
|
-
# Deepset MCP Benchmark System
|
|
2
|
-
|
|
3
|
-
A comprehensive benchmarking and testing framework for the Deepset Cloud Platform that enables automated testing of AI agents against predefined test cases.
|
|
4
|
-
|
|
5
|
-
## Getting Started
|
|
6
|
-
|
|
7
|
-
### Prerequisites
|
|
8
|
-
|
|
9
|
-
- Python 3.11+
|
|
10
|
-
- Access to Deepset Cloud Platform
|
|
11
|
-
- Required environment variables:
|
|
12
|
-
- `DEEPSET_API_KEY`: Your Deepset Cloud API key
|
|
13
|
-
- `DEEPSET_WORKSPACE`: Your workspace name
|
|
14
|
-
|
|
15
|
-
### Installation
|
|
16
|
-
|
|
17
|
-
Install the benchmark dependencies:
|
|
18
|
-
|
|
19
|
-
```bash
|
|
20
|
-
pip install -e .[benchmark]
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
For agent testing, also install:
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
pip install -e .[agents]
|
|
27
|
-
```
|
|
28
|
-
|
|
29
|
-
### Quick Start
|
|
30
|
-
|
|
31
|
-
1. **Check your environment**:
|
|
32
|
-
```bash
|
|
33
|
-
deepset agent check-env agent_configs/generalist_agent.yml
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
2. **List available test cases**:
|
|
37
|
-
```bash
|
|
38
|
-
deepset test list
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
3. **Run a single test**:
|
|
42
|
-
```bash
|
|
43
|
-
deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
|
|
44
|
-
```
|
|
45
|
-
|
|
46
|
-
## Core Concepts
|
|
47
|
-
|
|
48
|
-
### Test Cases
|
|
49
|
-
|
|
50
|
-
Test cases define specific scenarios for testing agents. Each test case includes:
|
|
51
|
-
|
|
52
|
-
- **Pipeline configuration**: YAML files defining Haystack pipelines
|
|
53
|
-
- **Index configuration**: YAML files for document indexing
|
|
54
|
-
- **Test prompt**: The input message sent to the agent
|
|
55
|
-
- **Validation criteria**: Expected behavior and outputs
|
|
56
|
-
|
|
57
|
-
Test cases are stored as YAML files in `tasks/` directory.
|
|
58
|
-
|
|
59
|
-
### Agent Configurations
|
|
60
|
-
|
|
61
|
-
Agent configurations define how to instantiate and run AI agents. They specify:
|
|
62
|
-
|
|
63
|
-
- **Agent factory function**: Python function that creates the agent
|
|
64
|
-
- **Environment variables**: Required API keys and settings
|
|
65
|
-
- **Display name**: Human-readable identifier
|
|
66
|
-
|
|
67
|
-
### Pipelines and Indexes
|
|
68
|
-
|
|
69
|
-
- **Pipelines**: Define the processing workflow for queries and documents
|
|
70
|
-
- **Indexes**: Configure document storage and retrieval systems
|
|
71
|
-
- Both are managed as YAML configurations on the Deepset platform
|
|
72
|
-
|
|
73
|
-
## Tutorials
|
|
74
|
-
|
|
75
|
-
### Running Your First Benchmark
|
|
76
|
-
|
|
77
|
-
1. **Prepare your environment**:
|
|
78
|
-
```bash
|
|
79
|
-
export DEEPSET_API_KEY="your_api_key"
|
|
80
|
-
export DEEPSET_WORKSPACE="your_workspace"
|
|
81
|
-
export ANTHROPIC_API_KEY="your_anthropic_key"
|
|
82
|
-
```
|
|
83
|
-
|
|
84
|
-
2. **Validate your agent configuration**:
|
|
85
|
-
```bash
|
|
86
|
-
deepset agent validate-config agent_configs/generalist_agent.yml
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
3. **Run a single test case**:
|
|
90
|
-
```bash
|
|
91
|
-
deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
|
|
92
|
-
```
|
|
93
|
-
|
|
94
|
-
This will:
|
|
95
|
-
- Create necessary pipelines and indexes
|
|
96
|
-
- Run the agent against the test case
|
|
97
|
-
- Validate the results
|
|
98
|
-
- Clean up resources
|
|
99
|
-
- Save detailed results to disk
|
|
100
|
-
|
|
101
|
-
4. **View the results**:
|
|
102
|
-
Results are saved in `agent_runs/` directory with:
|
|
103
|
-
- Full message transcripts (`messages.json`)
|
|
104
|
-
- Performance metrics (`test_results.csv`)
|
|
105
|
-
- Pipeline configurations (`post_run_pipeline.yml`)
|
|
106
|
-
|
|
107
|
-
### Running Multiple Test Cases
|
|
108
|
-
|
|
109
|
-
Run all available test cases:
|
|
110
|
-
|
|
111
|
-
```bash
|
|
112
|
-
deepset agent run-all agent_configs/generalist_agent.yml
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
With parallel execution:
|
|
116
|
-
|
|
117
|
-
```bash
|
|
118
|
-
deepset agent run-all agent_configs/generalist_agent.yml --concurrency 3
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
### Creating Test Cases
|
|
122
|
-
|
|
123
|
-
1. **Create a test case YAML file** in `tasks/`:
|
|
124
|
-
|
|
125
|
-
```yaml
|
|
126
|
-
name: "my_test_case"
|
|
127
|
-
objective: "Test pipeline validation"
|
|
128
|
-
prompt: "Please check my pipeline configuration"
|
|
129
|
-
query_yaml: "pipelines/my_pipeline.yml"
|
|
130
|
-
query_name: "test-pipeline"
|
|
131
|
-
index_yaml: "pipelines/my_index.yml"
|
|
132
|
-
index_name: "test-index"
|
|
133
|
-
tags:
|
|
134
|
-
- "validation"
|
|
135
|
-
- "debugging"
|
|
136
|
-
```
|
|
137
|
-
|
|
138
|
-
2. **Create the referenced pipeline files** in `tasks/pipelines/`
|
|
139
|
-
|
|
140
|
-
3. **Test your new case**:
|
|
141
|
-
```bash
|
|
142
|
-
deepset agent run agent_configs/generalist_agent.yml my_test_case
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
## How-To Guides
|
|
146
|
-
|
|
147
|
-
### Managing Test Resources
|
|
148
|
-
|
|
149
|
-
#### Setup Test Cases Manually
|
|
150
|
-
|
|
151
|
-
Create all test case resources on the platform:
|
|
152
|
-
|
|
153
|
-
```bash
|
|
154
|
-
deepset test setup-all --workspace your-workspace --concurrency 5
|
|
155
|
-
```
|
|
156
|
-
|
|
157
|
-
Setup a specific test case:
|
|
158
|
-
|
|
159
|
-
```bash
|
|
160
|
-
deepset test setup my_test_case --workspace your-workspace
|
|
161
|
-
```
|
|
162
|
-
|
|
163
|
-
#### Cleanup Test Resources
|
|
164
|
-
|
|
165
|
-
Remove all test case resources:
|
|
166
|
-
|
|
167
|
-
```bash
|
|
168
|
-
deepset test teardown-all --workspace your-workspace
|
|
169
|
-
```
|
|
170
|
-
|
|
171
|
-
Remove a specific test case:
|
|
172
|
-
|
|
173
|
-
```bash
|
|
174
|
-
deepset test teardown my_test_case --workspace your-workspace
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
### Managing Pipelines and Indexes
|
|
178
|
-
|
|
179
|
-
#### Create Individual Resources
|
|
180
|
-
|
|
181
|
-
Create a pipeline from YAML file:
|
|
182
|
-
|
|
183
|
-
```bash
|
|
184
|
-
deepset pipeline create --path pipeline.yml --name my-pipeline --workspace your-workspace
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
Create an index from YAML content:
|
|
188
|
-
|
|
189
|
-
```bash
|
|
190
|
-
deepset index create --content "$(cat index.yml)" --name my-index --workspace your-workspace
|
|
191
|
-
```
|
|
192
|
-
|
|
193
|
-
#### Delete Resources
|
|
194
|
-
|
|
195
|
-
Delete a pipeline:
|
|
196
|
-
|
|
197
|
-
```bash
|
|
198
|
-
deepset pipeline delete --name my-pipeline --workspace your-workspace
|
|
199
|
-
```
|
|
200
|
-
|
|
201
|
-
Delete an index:
|
|
202
|
-
|
|
203
|
-
```bash
|
|
204
|
-
deepset index delete --name my-index --workspace your-workspace
|
|
205
|
-
```
|
|
206
|
-
|
|
207
|
-
### Environment Configuration
|
|
208
|
-
|
|
209
|
-
#### Using Environment Files
|
|
210
|
-
|
|
211
|
-
Create a `.env` file:
|
|
212
|
-
|
|
213
|
-
```bash
|
|
214
|
-
DEEPSET_API_KEY=your_api_key
|
|
215
|
-
DEEPSET_WORKSPACE=your_workspace
|
|
216
|
-
ANTHROPIC_API_KEY=your_anthropic_key
|
|
217
|
-
```
|
|
218
|
-
|
|
219
|
-
Use it with any command:
|
|
220
|
-
|
|
221
|
-
```bash
|
|
222
|
-
deepset agent run --env-file .env agent_configs/generalist_agent.yml test_case
|
|
223
|
-
```
|
|
224
|
-
|
|
225
|
-
#### Override Settings
|
|
226
|
-
|
|
227
|
-
Override workspace and API key:
|
|
228
|
-
|
|
229
|
-
```bash
|
|
230
|
-
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
231
|
-
--workspace different-workspace \
|
|
232
|
-
--api-key different-key
|
|
233
|
-
```
|
|
234
|
-
|
|
235
|
-
### Custom Output Directories
|
|
236
|
-
|
|
237
|
-
Specify where to save results:
|
|
238
|
-
|
|
239
|
-
```bash
|
|
240
|
-
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
241
|
-
--output-dir ./my_results
|
|
242
|
-
```
|
|
243
|
-
|
|
244
|
-
Specify test case directory:
|
|
245
|
-
|
|
246
|
-
```bash
|
|
247
|
-
deepset agent run agent_configs/generalist_agent.yml test_case \
|
|
248
|
-
--test-base-dir ./my_test_cases
|
|
249
|
-
```
|
|
250
|
-
|
|
251
|
-
### Debugging and Monitoring
|
|
252
|
-
|
|
253
|
-
#### Check Environment Variables
|
|
254
|
-
|
|
255
|
-
Verify all required environment variables are set:
|
|
256
|
-
|
|
257
|
-
```bash
|
|
258
|
-
deepset agent check-env agent_configs/generalist_agent.yml
|
|
259
|
-
```
|
|
260
|
-
|
|
261
|
-
#### Validate Configurations
|
|
262
|
-
|
|
263
|
-
Check agent configuration syntax:
|
|
264
|
-
|
|
265
|
-
```bash
|
|
266
|
-
deepset agent validate-config agent_configs/generalist_agent.yml
|
|
267
|
-
```
|
|
268
|
-
|
|
269
|
-
#### View Test Case Lists
|
|
270
|
-
|
|
271
|
-
List available test cases:
|
|
272
|
-
|
|
273
|
-
```bash
|
|
274
|
-
deepset test list --test-dir ./my_test_cases
|
|
275
|
-
```
|
|
276
|
-
|
|
277
|
-
## Command Reference
|
|
278
|
-
|
|
279
|
-
### Agent Commands
|
|
280
|
-
|
|
281
|
-
- `deepset agent run` - Run agent against single test case
|
|
282
|
-
- `deepset agent run-all` - Run agent against all test cases
|
|
283
|
-
- `deepset agent check-env` - Verify environment configuration
|
|
284
|
-
- `deepset agent validate-config` - Validate agent configuration
|
|
285
|
-
|
|
286
|
-
### Test Management Commands
|
|
287
|
-
|
|
288
|
-
- `deepset test list` - List available test cases
|
|
289
|
-
- `deepset test setup` - Setup single test case resources
|
|
290
|
-
- `deepset test setup-all` - Setup all test case resources
|
|
291
|
-
- `deepset test teardown` - Remove single test case resources
|
|
292
|
-
- `deepset test teardown-all` - Remove all test case resources
|
|
293
|
-
|
|
294
|
-
### Pipeline Management Commands
|
|
295
|
-
|
|
296
|
-
- `deepset pipeline create` - Create new pipeline
|
|
297
|
-
- `deepset pipeline delete` - Delete existing pipeline
|
|
298
|
-
|
|
299
|
-
### Index Management Commands
|
|
300
|
-
|
|
301
|
-
- `deepset index create` - Create new index
|
|
302
|
-
- `deepset index delete` - Delete existing index
|
|
303
|
-
|
|
304
|
-
## Configuration Files
|
|
305
|
-
|
|
306
|
-
### Agent Configuration Format
|
|
307
|
-
|
|
308
|
-
```yaml
|
|
309
|
-
agent_factory_function: "module.path.to.get_agent"
|
|
310
|
-
display_name: "My Agent"
|
|
311
|
-
required_env_vars:
|
|
312
|
-
- DEEPSET_API_KEY
|
|
313
|
-
- DEEPSET_WORKSPACE
|
|
314
|
-
- ANTHROPIC_API_KEY
|
|
315
|
-
```
|
|
316
|
-
|
|
317
|
-
### Test Case Configuration Format
|
|
318
|
-
|
|
319
|
-
```yaml
|
|
320
|
-
name: "test_case_name"
|
|
321
|
-
objective: "Description of what this test validates"
|
|
322
|
-
prompt: "The message sent to the agent"
|
|
323
|
-
query_yaml: "relative/path/to/pipeline.yml" # Optional
|
|
324
|
-
query_name: "pipeline-name" # Required if query_yaml present
|
|
325
|
-
index_yaml: "relative/path/to/index.yml" # Optional
|
|
326
|
-
index_name: "index-name" # Required if index_yaml present
|
|
327
|
-
expected_query: "path/to/expected.yml" # Optional validation reference
|
|
328
|
-
tags:
|
|
329
|
-
- "category"
|
|
330
|
-
- "type"
|
|
331
|
-
judge_prompt: "Optional prompt for LLM validation" # Optional
|
|
332
|
-
```
|
|
333
|
-
|
|
334
|
-
## Result Analysis
|
|
335
|
-
|
|
336
|
-
### Understanding Output Files
|
|
337
|
-
|
|
338
|
-
Each test run produces:
|
|
339
|
-
|
|
340
|
-
1. **`messages.json`**: Complete conversation transcript with the agent
|
|
341
|
-
2. **`test_results.csv`**: Performance metrics and validation results
|
|
342
|
-
3. **`post_run_pipeline.yml`**: Final pipeline configuration after agent modifications
|
|
343
|
-
|
|
344
|
-
### Performance Metrics
|
|
345
|
-
|
|
346
|
-
The system tracks:
|
|
347
|
-
|
|
348
|
-
- **Token usage**: Prompt and completion tokens consumed
|
|
349
|
-
- **Tool calls**: Number of API calls made by the agent
|
|
350
|
-
- **Validation status**: Pre and post-run pipeline validation results
|
|
351
|
-
- **Model information**: Which AI model was used
|
|
352
|
-
|
|
353
|
-
### Aggregate Analysis
|
|
354
|
-
|
|
355
|
-
When running multiple test cases, the system provides:
|
|
356
|
-
|
|
357
|
-
- Success/failure counts
|
|
358
|
-
- Total resource consumption
|
|
359
|
-
- Per-test case breakdowns
|
|
360
|
-
- Cleanup status reports
|
|
361
|
-
|
|
362
|
-
## Troubleshooting
|
|
363
|
-
|
|
364
|
-
### Common Issues
|
|
365
|
-
|
|
366
|
-
**Environment variable errors**:
|
|
367
|
-
- Ensure all required variables are set
|
|
368
|
-
- Use `deepset agent check-env` to verify configuration
|
|
369
|
-
|
|
370
|
-
**Test case not found**:
|
|
371
|
-
- Check test case directory path
|
|
372
|
-
- Verify YAML file exists and is properly named
|
|
373
|
-
- Use `deepset test list` to see available cases
|
|
374
|
-
|
|
375
|
-
**Validation failures**:
|
|
376
|
-
- Review pipeline YAML syntax
|
|
377
|
-
- Check component type names and parameters
|
|
378
|
-
- Use Deepset Cloud UI to validate manually
|
|
379
|
-
|
|
380
|
-
**Resource conflicts**:
|
|
381
|
-
- Ensure unique names for pipelines and indexes
|
|
382
|
-
- Clean up existing resources before running tests
|
|
383
|
-
- Use different workspace for testing
|
|
384
|
-
|
|
385
|
-
**Permission errors**:
|
|
386
|
-
- Verify API key has sufficient permissions
|
|
387
|
-
- Check workspace access rights
|
|
388
|
-
- Confirm network connectivity to Deepset Cloud
|
|
389
|
-
|
|
390
|
-
### Getting Help
|
|
391
|
-
|
|
392
|
-
1. **Check logs**: Review detailed error messages in command output
|
|
393
|
-
2. **Validate configs**: Use validation commands before running tests
|
|
394
|
-
3. **Test incrementally**: Start with single test cases before batch runs
|
|
395
|
-
4. **Clean environment**: Remove conflicting resources and retry
|
|
396
|
-
|
|
397
|
-
## Best Practices
|
|
398
|
-
|
|
399
|
-
### Test Organization
|
|
400
|
-
|
|
401
|
-
- Use descriptive test case names with underscores
|
|
402
|
-
- Group related tests with consistent tag names
|
|
403
|
-
- Keep pipeline files organized in subdirectories
|
|
404
|
-
- Document test objectives clearly
|
|
405
|
-
|
|
406
|
-
### Resource Management
|
|
407
|
-
|
|
408
|
-
- Always clean up test resources after experiments
|
|
409
|
-
- Use unique names to avoid conflicts
|
|
410
|
-
- Prefer automated setup/teardown over manual management
|
|
411
|
-
- Monitor resource usage in your workspace
|
|
412
|
-
|
|
413
|
-
### Performance Optimization
|
|
414
|
-
|
|
415
|
-
- Use appropriate concurrency levels (start with 1-3)
|
|
416
|
-
- Set reasonable token limits for cost control
|
|
417
|
-
- Cache common pipeline configurations
|
|
418
|
-
- Run expensive tests separately from quick validation tests
|
|
419
|
-
|
|
420
|
-
### Environment Management
|
|
421
|
-
|
|
422
|
-
- Use environment files for consistent configuration
|
|
423
|
-
- Never commit API keys to version control
|
|
424
|
-
- Use different workspaces for development and testing
|
|
425
|
-
- Validate environment before important test runs
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
agent_factory_function: "deepset_mcp.agents.debugging.debugging_agent.get_agent"
|
|
2
|
-
display_name: "debugging-agent"
|
|
3
|
-
interactive: true
|
|
4
|
-
required_env_vars:
|
|
5
|
-
- DEEPSET_API_KEY
|
|
6
|
-
- DEEPSET_WORKSPACE
|
|
7
|
-
- ANTHROPIC_API_KEY
|
|
8
|
-
- DEEPSET_DOCS_WORKSPACE
|
|
9
|
-
- DEEPSET_DOCS_API_KEY
|
|
10
|
-
- DEEPSET_DOCS_PIPELINE_NAME
|
|
File without changes
|