deepset-mcp 0.0.3__py3-none-any.whl → 0.0.4rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. deepset_mcp/__init__.py +10 -0
  2. deepset_mcp/api/__init__.py +4 -0
  3. deepset_mcp/api/client.py +4 -0
  4. deepset_mcp/api/custom_components/__init__.py +4 -0
  5. deepset_mcp/api/custom_components/models.py +4 -0
  6. deepset_mcp/api/custom_components/protocols.py +4 -0
  7. deepset_mcp/api/custom_components/resource.py +4 -0
  8. deepset_mcp/api/exceptions.py +4 -0
  9. deepset_mcp/api/haystack_service/__init__.py +4 -0
  10. deepset_mcp/api/haystack_service/protocols.py +4 -0
  11. deepset_mcp/api/haystack_service/resource.py +4 -0
  12. deepset_mcp/api/indexes/__init__.py +4 -0
  13. deepset_mcp/api/indexes/models.py +4 -0
  14. deepset_mcp/api/indexes/protocols.py +4 -0
  15. deepset_mcp/api/indexes/resource.py +4 -0
  16. deepset_mcp/api/integrations/__init__.py +4 -0
  17. deepset_mcp/api/integrations/models.py +4 -0
  18. deepset_mcp/api/integrations/protocols.py +4 -0
  19. deepset_mcp/api/integrations/resource.py +4 -0
  20. deepset_mcp/api/pipeline/__init__.py +4 -0
  21. deepset_mcp/api/pipeline/log_level.py +4 -0
  22. deepset_mcp/api/pipeline/models.py +4 -0
  23. deepset_mcp/api/pipeline/protocols.py +8 -0
  24. deepset_mcp/api/pipeline/resource.py +4 -0
  25. deepset_mcp/api/pipeline_template/__init__.py +4 -0
  26. deepset_mcp/api/pipeline_template/models.py +4 -0
  27. deepset_mcp/api/pipeline_template/protocols.py +4 -0
  28. deepset_mcp/api/pipeline_template/resource.py +4 -0
  29. deepset_mcp/api/protocols.py +4 -0
  30. deepset_mcp/api/secrets/__init__.py +4 -0
  31. deepset_mcp/api/secrets/models.py +4 -0
  32. deepset_mcp/api/secrets/protocols.py +4 -0
  33. deepset_mcp/api/secrets/resource.py +4 -0
  34. deepset_mcp/api/shared_models.py +4 -0
  35. deepset_mcp/api/transport.py +4 -0
  36. deepset_mcp/api/user/__init__.py +4 -0
  37. deepset_mcp/api/user/protocols.py +4 -0
  38. deepset_mcp/api/user/resource.py +4 -0
  39. deepset_mcp/api/workspace/__init__.py +4 -0
  40. deepset_mcp/api/workspace/models.py +4 -0
  41. deepset_mcp/api/workspace/protocols.py +4 -0
  42. deepset_mcp/api/workspace/resource.py +4 -0
  43. deepset_mcp/config.py +8 -0
  44. deepset_mcp/initialize_embedding_model.py +4 -0
  45. deepset_mcp/main.py +8 -0
  46. deepset_mcp/store.py +4 -0
  47. deepset_mcp/tool_factory.py +11 -4
  48. deepset_mcp/tools/__init__.py +4 -0
  49. deepset_mcp/tools/custom_components.py +4 -0
  50. deepset_mcp/tools/doc_search.py +4 -0
  51. deepset_mcp/tools/haystack_service.py +4 -0
  52. deepset_mcp/tools/haystack_service_models.py +4 -0
  53. deepset_mcp/tools/indexes.py +4 -0
  54. deepset_mcp/tools/model_protocol.py +4 -0
  55. deepset_mcp/tools/pipeline.py +4 -0
  56. deepset_mcp/tools/pipeline_template.py +4 -0
  57. deepset_mcp/tools/secrets.py +4 -0
  58. deepset_mcp/tools/tokonomics/__init__.py +4 -0
  59. deepset_mcp/tools/tokonomics/decorators.py +4 -0
  60. deepset_mcp/tools/tokonomics/explorer.py +4 -0
  61. deepset_mcp/tools/tokonomics/object_store.py +4 -0
  62. deepset_mcp/tools/workspace.py +4 -0
  63. deepset_mcp-0.0.4rc1.dist-info/METADATA +761 -0
  64. deepset_mcp-0.0.4rc1.dist-info/RECORD +70 -0
  65. {deepset_mcp-0.0.3.dist-info → deepset_mcp-0.0.4rc1.dist-info}/entry_points.txt +0 -1
  66. deepset_mcp-0.0.4rc1.dist-info/licenses/LICENSE +202 -0
  67. deepset_mcp/agents/__init__.py +0 -0
  68. deepset_mcp/agents/debugging/__init__.py +0 -0
  69. deepset_mcp/agents/debugging/debugging_agent.py +0 -37
  70. deepset_mcp/agents/debugging/system_prompt.md +0 -214
  71. deepset_mcp/agents/generalist/__init__.py +0 -0
  72. deepset_mcp/agents/generalist/generalist_agent.py +0 -38
  73. deepset_mcp/agents/generalist/system_prompt.md +0 -241
  74. deepset_mcp/benchmark/README.md +0 -425
  75. deepset_mcp/benchmark/__init__.py +0 -1
  76. deepset_mcp/benchmark/agent_configs/debugging_agent.yml +0 -10
  77. deepset_mcp/benchmark/agent_configs/generalist_agent.yml +0 -6
  78. deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
  79. deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +0 -757
  80. deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +0 -167
  81. deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +0 -213
  82. deepset_mcp/benchmark/runner/__init__.py +0 -0
  83. deepset_mcp/benchmark/runner/agent_benchmark_runner.py +0 -561
  84. deepset_mcp/benchmark/runner/agent_loader.py +0 -110
  85. deepset_mcp/benchmark/runner/cli.py +0 -39
  86. deepset_mcp/benchmark/runner/cli_agent.py +0 -373
  87. deepset_mcp/benchmark/runner/cli_index.py +0 -71
  88. deepset_mcp/benchmark/runner/cli_pipeline.py +0 -73
  89. deepset_mcp/benchmark/runner/cli_tests.py +0 -226
  90. deepset_mcp/benchmark/runner/cli_utils.py +0 -61
  91. deepset_mcp/benchmark/runner/config.py +0 -73
  92. deepset_mcp/benchmark/runner/config_loader.py +0 -64
  93. deepset_mcp/benchmark/runner/interactive.py +0 -140
  94. deepset_mcp/benchmark/runner/models.py +0 -203
  95. deepset_mcp/benchmark/runner/repl.py +0 -67
  96. deepset_mcp/benchmark/runner/setup_actions.py +0 -238
  97. deepset_mcp/benchmark/runner/streaming.py +0 -360
  98. deepset_mcp/benchmark/runner/teardown_actions.py +0 -196
  99. deepset_mcp/benchmark/runner/tracing.py +0 -21
  100. deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +0 -16
  101. deepset_mcp/benchmark/tasks/documents_output_wrong.yml +0 -13
  102. deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +0 -11
  103. deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +0 -11
  104. deepset_mcp/benchmark/tasks/missing_output_mapping.yml +0 -14
  105. deepset_mcp/benchmark/tasks/no_query_input.yml +0 -13
  106. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +0 -141
  107. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +0 -141
  108. deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +0 -181
  109. deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +0 -189
  110. deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +0 -193
  111. deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +0 -191
  112. deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +0 -167
  113. deepset_mcp-0.0.3.dist-info/METADATA +0 -289
  114. deepset_mcp-0.0.3.dist-info/RECORD +0 -115
  115. {deepset_mcp-0.0.3.dist-info → deepset_mcp-0.0.4rc1.dist-info}/WHEEL +0 -0
@@ -1,241 +0,0 @@
1
- You are **deepset Copilot**, an AI Agent that helps developers build, inspect, and maintain Haystack pipelines on the
2
- deepset AI Platform.
3
-
4
- ---
5
-
6
- ## 1. Core Concepts
7
-
8
- ### 1.1 Pipelines
9
-
10
- * **Definition**: Ordered graphs of components that process data (queries, documents, embeddings, prompts, answers).
11
- * **Flow**: Each component’s output becomes the next’s input.
12
- * **Advanced Structures**:
13
-
14
- * **Branches**: Parallel paths (e.g., different converters for multiple file types).
15
- * **Loops**: Iterative cycles (e.g., self-correcting loops with a Validator).
16
-
17
- **Full YAML Example**
18
-
19
- ````yaml
20
- components:
21
- chat_summary_prompt_builder:
22
- type: haystack.components.builders.prompt_builder.PromptBuilder
23
- init_parameters:
24
- template: |-
25
- You are part of a chatbot.
26
- You receive a question (Current Question) and a chat history.
27
- Use the context from the chat history and reformulate the question so that it is suitable for retrieval
28
- augmented generation.
29
- If X is followed by Y, only ask for Y and do not repeat X again.
30
- If the question does not require any context from the chat history, output it unedited.
31
- Don't make questions too long, but short and precise.
32
- Stay as close as possible to the current question.
33
- Only output the new question, nothing else!
34
-
35
- {{ question }}
36
-
37
- New question:
38
-
39
- required_variables: "*"
40
- chat_summary_llm:
41
- type: deepset_cloud_custom_nodes.generators.deepset_amazon_bedrock_generator.DeepsetAmazonBedrockGenerator
42
- init_parameters:
43
- model: anthropic.claude-3-5-sonnet-20241022-v2:0
44
- aws_region_name: us-west-2
45
- max_length: 650
46
- model_max_length: 200000
47
- temperature: 0
48
-
49
- replies_to_query:
50
- type: haystack.components.converters.output_adapter.OutputAdapter
51
- init_parameters:
52
- template: "{{ replies[0] }}"
53
- output_type: str
54
-
55
- bm25_retriever: # Selects the most similar documents from the document store
56
- type: haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever
57
- init_parameters:
58
- document_store:
59
- type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
60
- init_parameters:
61
- embedding_dim: 768
62
- top_k: 20 # The number of results to return
63
- fuzziness: 0
64
-
65
- query_embedder:
66
- type: deepset_cloud_custom_nodes.embedders.nvidia.text_embedder.DeepsetNvidiaTextEmbedder
67
- init_parameters:
68
- normalize_embeddings: true
69
- model: intfloat/e5-base-v2
70
-
71
- embedding_retriever: # Selects the most similar documents from the document store
72
- type: haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever
73
- init_parameters:
74
- document_store:
75
- type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
76
- init_parameters:
77
- embedding_dim: 768
78
- top_k: 20 # The number of results to return
79
-
80
- document_joiner:
81
- type: haystack.components.joiners.document_joiner.DocumentJoiner
82
- init_parameters:
83
- join_mode: concatenate
84
-
85
- ranker:
86
- type: deepset_cloud_custom_nodes.rankers.nvidia.ranker.DeepsetNvidiaRanker
87
- init_parameters:
88
- model: intfloat/simlm-msmarco-reranker
89
- top_k: 8
90
-
91
- meta_field_grouping_ranker:
92
- type: haystack.components.rankers.meta_field_grouping_ranker.MetaFieldGroupingRanker
93
- init_parameters:
94
- group_by: file_id
95
- subgroup_by: null
96
- sort_docs_by: split_id
97
-
98
- qa_prompt_builder:
99
- type: haystack.components.builders.prompt_builder.PromptBuilder
100
- init_parameters:
101
- template: |-
102
- You are a technical expert.
103
- You answer questions truthfully based on provided documents.
104
- If the answer exists in several documents, summarize them.
105
- Ignore documents that don't contain the answer to the question.
106
- Only answer based on the documents provided. Don't make things up.
107
- If no information related to the question can be found in the document, say so.
108
- Always use references in the form [NUMBER OF DOCUMENT] when using information from a document,
109
- e.g. [3] for Document [3] .
110
- Never name the documents, only enter a number in square brackets as a reference.
111
- The reference must only refer to the number that comes in square brackets after the document.
112
- Otherwise, do not use brackets in your answer and reference ONLY the number of the document without mentioning
113
- the word document.
114
-
115
- These are the documents:
116
- {%- if documents|length > 0 %}
117
- {%- for document in documents %}
118
- Document [{{ loop.index }}] :
119
- Name of Source File: {{ document.meta.file_name }}
120
- {{ document.content }}
121
- {% endfor -%}
122
- {%- else %}
123
- No relevant documents found.
124
- Respond with "Sorry, no matching documents were found, please adjust the filters or try a different question."
125
- {% endif %}
126
-
127
- Question: {{ question }}
128
- Answer:
129
-
130
- required_variables: "*"
131
- qa_llm:
132
- type: deepset_cloud_custom_nodes.generators.deepset_amazon_bedrock_generator.DeepsetAmazonBedrockGenerator
133
- init_parameters:
134
- model: anthropic.claude-3-5-sonnet-20241022-v2:0
135
- aws_region_name: us-west-2
136
- max_length: 650
137
- model_max_length: 200000
138
- temperature: 0
139
-
140
- answer_builder:
141
- type: deepset_cloud_custom_nodes.augmenters.deepset_answer_builder.DeepsetAnswerBuilder
142
- init_parameters:
143
- reference_pattern: acm
144
-
145
- connections: # Defines how the components are connected
146
- - sender: chat_summary_prompt_builder.prompt
147
- receiver: chat_summary_llm.prompt
148
- - sender: chat_summary_llm.replies
149
- receiver: replies_to_query.replies
150
- - sender: replies_to_query.output
151
- receiver: bm25_retriever.query
152
- - sender: replies_to_query.output
153
- receiver: query_embedder.text
154
- - sender: replies_to_query.output
155
- receiver: ranker.query
156
- - sender: replies_to_query.output
157
- receiver: qa_prompt_builder.question
158
- - sender: replies_to_query.output
159
- receiver: answer_builder.query
160
- - sender: bm25_retriever.documents
161
- receiver: document_joiner.documents
162
- - sender: query_embedder.embedding
163
- receiver: embedding_retriever.query_embedding
164
- - sender: embedding_retriever.documents
165
- receiver: document_joiner.documents
166
- - sender: document_joiner.documents
167
- receiver: ranker.documents
168
- - sender: ranker.documents
169
- receiver: meta_field_grouping_ranker.documents
170
- - sender: meta_field_grouping_ranker.documents
171
- receiver: qa_prompt_builder.documents
172
- - sender: meta_field_grouping_ranker.documents
173
- receiver: answer_builder.documents
174
- - sender: qa_prompt_builder.prompt
175
- receiver: qa_llm.prompt
176
- - sender: qa_prompt_builder.prompt
177
- receiver: answer_builder.prompt
178
- - sender: qa_llm.replies
179
- receiver: answer_builder.replies
180
-
181
- inputs: # Define the inputs for your pipeline
182
- query: # These components will receive the query as input
183
- - "chat_summary_prompt_builder.question"
184
-
185
- filters: # These components will receive a potential query filter as input
186
- - "bm25_retriever.filters"
187
- - "embedding_retriever.filters"
188
-
189
- outputs: # Defines the output of your pipeline
190
- documents: "meta_field_grouping_ranker.documents" # The output of the pipeline is the retrieved documents
191
- answers: "answer_builder.answers" # The output of the pipeline is the generated answers
192
-
193
- ### 1.2 Components
194
- - **Identification**: Each has a unique `type` (fully qualified class path).
195
- - **Configuration**: `init_parameters` control models, thresholds, credentials, etc.
196
- - **I/O Signatures**: Named inputs and outputs, with specific data types (e.g., `List[Document]`, `List[Answer]`).
197
-
198
- **Component Example**:
199
- ```yaml
200
- my_converter:
201
- type: haystack.components.converters.xlsx.XLSXToDocument
202
- init_parameters:
203
- metadata_filters: ["*.sheet1"]
204
- ````
205
-
206
- **Connection Example**:
207
-
208
- ```yaml
209
- - sender: my_converter.documents
210
- receiver: text_converter.sources
211
- ```
212
-
213
- ### 1.3 YAML Structure
214
-
215
- 1. **components**: Declare each block’s name, `type`, and `init_parameters`.
216
- 2. **connections**: Link `sender:<component>.<output>` → `receiver:<component>.<input>`.
217
- 3. **inputs**: Map external inputs (`query`, `filters`) to component inputs.
218
- 4. **outputs**: Define final outputs (`documents`, `answers`) from component outputs.
219
- 5. **max\_loops\_allowed**: (Optional) Cap on loop iterations.
220
-
221
- ---
222
-
223
- ## 2. Agent Workflow
224
-
225
- 1. **Inspect & Discover**
226
-
227
- * Always call listing/fetch tools (`list_pipelines`, `get_component_definition`, etc.) to gather current state.
228
- * Check the pipeline templates, oftentimes you can start off of an existing template when the user wants to create a
229
- new pipeline.
230
- * Ask targeted questions if requirements are unclear.
231
- 2. **Architect Phase**
232
-
233
- * Reason about the changes you will need to make.
234
- * Do NOT ask the user for confirmation, go ahead with execution once you know what you need to do.
235
-
236
- 3. **Execute Phase**
237
- * Execute the changes to help the user fix their pipeline or index.
238
-
239
- 4. **Integrity**
240
-
241
- * Never invent components; rely exclusively on tool-derived definitions.
@@ -1,425 +0,0 @@
1
- # Deepset MCP Benchmark System
2
-
3
- A comprehensive benchmarking and testing framework for the Deepset Cloud Platform that enables automated testing of AI agents against predefined test cases.
4
-
5
- ## Getting Started
6
-
7
- ### Prerequisites
8
-
9
- - Python 3.11+
10
- - Access to Deepset Cloud Platform
11
- - Required environment variables:
12
- - `DEEPSET_API_KEY`: Your Deepset Cloud API key
13
- - `DEEPSET_WORKSPACE`: Your workspace name
14
-
15
- ### Installation
16
-
17
- Install the benchmark dependencies:
18
-
19
- ```bash
20
- pip install -e .[benchmark]
21
- ```
22
-
23
- For agent testing, also install:
24
-
25
- ```bash
26
- pip install -e .[agents]
27
- ```
28
-
29
- ### Quick Start
30
-
31
- 1. **Check your environment**:
32
- ```bash
33
- deepset agent check-env agent_configs/generalist_agent.yml
34
- ```
35
-
36
- 2. **List available test cases**:
37
- ```bash
38
- deepset test list
39
- ```
40
-
41
- 3. **Run a single test**:
42
- ```bash
43
- deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
44
- ```
45
-
46
- ## Core Concepts
47
-
48
- ### Test Cases
49
-
50
- Test cases define specific scenarios for testing agents. Each test case includes:
51
-
52
- - **Pipeline configuration**: YAML files defining Haystack pipelines
53
- - **Index configuration**: YAML files for document indexing
54
- - **Test prompt**: The input message sent to the agent
55
- - **Validation criteria**: Expected behavior and outputs
56
-
57
- Test cases are stored as YAML files in `tasks/` directory.
58
-
59
- ### Agent Configurations
60
-
61
- Agent configurations define how to instantiate and run AI agents. They specify:
62
-
63
- - **Agent factory function**: Python function that creates the agent
64
- - **Environment variables**: Required API keys and settings
65
- - **Display name**: Human-readable identifier
66
-
67
- ### Pipelines and Indexes
68
-
69
- - **Pipelines**: Define the processing workflow for queries and documents
70
- - **Indexes**: Configure document storage and retrieval systems
71
- - Both are managed as YAML configurations on the Deepset platform
72
-
73
- ## Tutorials
74
-
75
- ### Running Your First Benchmark
76
-
77
- 1. **Prepare your environment**:
78
- ```bash
79
- export DEEPSET_API_KEY="your_api_key"
80
- export DEEPSET_WORKSPACE="your_workspace"
81
- export ANTHROPIC_API_KEY="your_anthropic_key"
82
- ```
83
-
84
- 2. **Validate your agent configuration**:
85
- ```bash
86
- deepset agent validate-config agent_configs/generalist_agent.yml
87
- ```
88
-
89
- 3. **Run a single test case**:
90
- ```bash
91
- deepset agent run agent_configs/generalist_agent.yml chat_rag_answers_wrong_format
92
- ```
93
-
94
- This will:
95
- - Create necessary pipelines and indexes
96
- - Run the agent against the test case
97
- - Validate the results
98
- - Clean up resources
99
- - Save detailed results to disk
100
-
101
- 4. **View the results**:
102
- Results are saved in `agent_runs/` directory with:
103
- - Full message transcripts (`messages.json`)
104
- - Performance metrics (`test_results.csv`)
105
- - Pipeline configurations (`post_run_pipeline.yml`)
106
-
107
- ### Running Multiple Test Cases
108
-
109
- Run all available test cases:
110
-
111
- ```bash
112
- deepset agent run-all agent_configs/generalist_agent.yml
113
- ```
114
-
115
- With parallel execution:
116
-
117
- ```bash
118
- deepset agent run-all agent_configs/generalist_agent.yml --concurrency 3
119
- ```
120
-
121
- ### Creating Test Cases
122
-
123
- 1. **Create a test case YAML file** in `tasks/`:
124
-
125
- ```yaml
126
- name: "my_test_case"
127
- objective: "Test pipeline validation"
128
- prompt: "Please check my pipeline configuration"
129
- query_yaml: "pipelines/my_pipeline.yml"
130
- query_name: "test-pipeline"
131
- index_yaml: "pipelines/my_index.yml"
132
- index_name: "test-index"
133
- tags:
134
- - "validation"
135
- - "debugging"
136
- ```
137
-
138
- 2. **Create the referenced pipeline files** in `tasks/pipelines/`
139
-
140
- 3. **Test your new case**:
141
- ```bash
142
- deepset agent run agent_configs/generalist_agent.yml my_test_case
143
- ```
144
-
145
- ## How-To Guides
146
-
147
- ### Managing Test Resources
148
-
149
- #### Setup Test Cases Manually
150
-
151
- Create all test case resources on the platform:
152
-
153
- ```bash
154
- deepset test setup-all --workspace your-workspace --concurrency 5
155
- ```
156
-
157
- Setup a specific test case:
158
-
159
- ```bash
160
- deepset test setup my_test_case --workspace your-workspace
161
- ```
162
-
163
- #### Cleanup Test Resources
164
-
165
- Remove all test case resources:
166
-
167
- ```bash
168
- deepset test teardown-all --workspace your-workspace
169
- ```
170
-
171
- Remove a specific test case:
172
-
173
- ```bash
174
- deepset test teardown my_test_case --workspace your-workspace
175
- ```
176
-
177
- ### Managing Pipelines and Indexes
178
-
179
- #### Create Individual Resources
180
-
181
- Create a pipeline from YAML file:
182
-
183
- ```bash
184
- deepset pipeline create --path pipeline.yml --name my-pipeline --workspace your-workspace
185
- ```
186
-
187
- Create an index from YAML content:
188
-
189
- ```bash
190
- deepset index create --content "$(cat index.yml)" --name my-index --workspace your-workspace
191
- ```
192
-
193
- #### Delete Resources
194
-
195
- Delete a pipeline:
196
-
197
- ```bash
198
- deepset pipeline delete --name my-pipeline --workspace your-workspace
199
- ```
200
-
201
- Delete an index:
202
-
203
- ```bash
204
- deepset index delete --name my-index --workspace your-workspace
205
- ```
206
-
207
- ### Environment Configuration
208
-
209
- #### Using Environment Files
210
-
211
- Create a `.env` file:
212
-
213
- ```bash
214
- DEEPSET_API_KEY=your_api_key
215
- DEEPSET_WORKSPACE=your_workspace
216
- ANTHROPIC_API_KEY=your_anthropic_key
217
- ```
218
-
219
- Use it with any command:
220
-
221
- ```bash
222
- deepset agent run --env-file .env agent_configs/generalist_agent.yml test_case
223
- ```
224
-
225
- #### Override Settings
226
-
227
- Override workspace and API key:
228
-
229
- ```bash
230
- deepset agent run agent_configs/generalist_agent.yml test_case \
231
- --workspace different-workspace \
232
- --api-key different-key
233
- ```
234
-
235
- ### Custom Output Directories
236
-
237
- Specify where to save results:
238
-
239
- ```bash
240
- deepset agent run agent_configs/generalist_agent.yml test_case \
241
- --output-dir ./my_results
242
- ```
243
-
244
- Specify test case directory:
245
-
246
- ```bash
247
- deepset agent run agent_configs/generalist_agent.yml test_case \
248
- --test-base-dir ./my_test_cases
249
- ```
250
-
251
- ### Debugging and Monitoring
252
-
253
- #### Check Environment Variables
254
-
255
- Verify all required environment variables are set:
256
-
257
- ```bash
258
- deepset agent check-env agent_configs/generalist_agent.yml
259
- ```
260
-
261
- #### Validate Configurations
262
-
263
- Check agent configuration syntax:
264
-
265
- ```bash
266
- deepset agent validate-config agent_configs/generalist_agent.yml
267
- ```
268
-
269
- #### View Test Case Lists
270
-
271
- List available test cases:
272
-
273
- ```bash
274
- deepset test list --test-dir ./my_test_cases
275
- ```
276
-
277
- ## Command Reference
278
-
279
- ### Agent Commands
280
-
281
- - `deepset agent run` - Run agent against single test case
282
- - `deepset agent run-all` - Run agent against all test cases
283
- - `deepset agent check-env` - Verify environment configuration
284
- - `deepset agent validate-config` - Validate agent configuration
285
-
286
- ### Test Management Commands
287
-
288
- - `deepset test list` - List available test cases
289
- - `deepset test setup` - Setup single test case resources
290
- - `deepset test setup-all` - Setup all test case resources
291
- - `deepset test teardown` - Remove single test case resources
292
- - `deepset test teardown-all` - Remove all test case resources
293
-
294
- ### Pipeline Management Commands
295
-
296
- - `deepset pipeline create` - Create new pipeline
297
- - `deepset pipeline delete` - Delete existing pipeline
298
-
299
- ### Index Management Commands
300
-
301
- - `deepset index create` - Create new index
302
- - `deepset index delete` - Delete existing index
303
-
304
- ## Configuration Files
305
-
306
- ### Agent Configuration Format
307
-
308
- ```yaml
309
- agent_factory_function: "module.path.to.get_agent"
310
- display_name: "My Agent"
311
- required_env_vars:
312
- - DEEPSET_API_KEY
313
- - DEEPSET_WORKSPACE
314
- - ANTHROPIC_API_KEY
315
- ```
316
-
317
- ### Test Case Configuration Format
318
-
319
- ```yaml
320
- name: "test_case_name"
321
- objective: "Description of what this test validates"
322
- prompt: "The message sent to the agent"
323
- query_yaml: "relative/path/to/pipeline.yml" # Optional
324
- query_name: "pipeline-name" # Required if query_yaml present
325
- index_yaml: "relative/path/to/index.yml" # Optional
326
- index_name: "index-name" # Required if index_yaml present
327
- expected_query: "path/to/expected.yml" # Optional validation reference
328
- tags:
329
- - "category"
330
- - "type"
331
- judge_prompt: "Optional prompt for LLM validation" # Optional
332
- ```
333
-
334
- ## Result Analysis
335
-
336
- ### Understanding Output Files
337
-
338
- Each test run produces:
339
-
340
- 1. **`messages.json`**: Complete conversation transcript with the agent
341
- 2. **`test_results.csv`**: Performance metrics and validation results
342
- 3. **`post_run_pipeline.yml`**: Final pipeline configuration after agent modifications
343
-
344
- ### Performance Metrics
345
-
346
- The system tracks:
347
-
348
- - **Token usage**: Prompt and completion tokens consumed
349
- - **Tool calls**: Number of API calls made by the agent
350
- - **Validation status**: Pre and post-run pipeline validation results
351
- - **Model information**: Which AI model was used
352
-
353
- ### Aggregate Analysis
354
-
355
- When running multiple test cases, the system provides:
356
-
357
- - Success/failure counts
358
- - Total resource consumption
359
- - Per-test case breakdowns
360
- - Cleanup status reports
361
-
362
- ## Troubleshooting
363
-
364
- ### Common Issues
365
-
366
- **Environment variable errors**:
367
- - Ensure all required variables are set
368
- - Use `deepset agent check-env` to verify configuration
369
-
370
- **Test case not found**:
371
- - Check test case directory path
372
- - Verify YAML file exists and is properly named
373
- - Use `deepset test list` to see available cases
374
-
375
- **Validation failures**:
376
- - Review pipeline YAML syntax
377
- - Check component type names and parameters
378
- - Use Deepset Cloud UI to validate manually
379
-
380
- **Resource conflicts**:
381
- - Ensure unique names for pipelines and indexes
382
- - Clean up existing resources before running tests
383
- - Use different workspace for testing
384
-
385
- **Permission errors**:
386
- - Verify API key has sufficient permissions
387
- - Check workspace access rights
388
- - Confirm network connectivity to Deepset Cloud
389
-
390
- ### Getting Help
391
-
392
- 1. **Check logs**: Review detailed error messages in command output
393
- 2. **Validate configs**: Use validation commands before running tests
394
- 3. **Test incrementally**: Start with single test cases before batch runs
395
- 4. **Clean environment**: Remove conflicting resources and retry
396
-
397
- ## Best Practices
398
-
399
- ### Test Organization
400
-
401
- - Use descriptive test case names with underscores
402
- - Group related tests with consistent tag names
403
- - Keep pipeline files organized in subdirectories
404
- - Document test objectives clearly
405
-
406
- ### Resource Management
407
-
408
- - Always clean up test resources after experiments
409
- - Use unique names to avoid conflicts
410
- - Prefer automated setup/teardown over manual management
411
- - Monitor resource usage in your workspace
412
-
413
- ### Performance Optimization
414
-
415
- - Use appropriate concurrency levels (start with 1-3)
416
- - Set reasonable token limits for cost control
417
- - Cache common pipeline configurations
418
- - Run expensive tests separately from quick validation tests
419
-
420
- ### Environment Management
421
-
422
- - Use environment files for consistent configuration
423
- - Never commit API keys to version control
424
- - Use different workspaces for development and testing
425
- - Validate environment before important test runs
@@ -1 +0,0 @@
1
-
@@ -1,10 +0,0 @@
1
- agent_factory_function: "deepset_mcp.agents.debugging.debugging_agent.get_agent"
2
- display_name: "debugging-agent"
3
- interactive: true
4
- required_env_vars:
5
- - DEEPSET_API_KEY
6
- - DEEPSET_WORKSPACE
7
- - ANTHROPIC_API_KEY
8
- - DEEPSET_DOCS_WORKSPACE
9
- - DEEPSET_DOCS_API_KEY
10
- - DEEPSET_DOCS_PIPELINE_NAME
@@ -1,6 +0,0 @@
1
- agent_factory_function: "deepset_mcp.agents.generalist.generalist_agent.get_agent"
2
- display_name: "generalist-agent"
3
- required_env_vars:
4
- - DEEPSET_API_KEY
5
- - DEEPSET_WORKSPACE
6
- - ANTHROPIC_API_KEY