deepset-mcp 0.0.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. deepset_mcp/__init__.py +0 -0
  2. deepset_mcp/agents/__init__.py +0 -0
  3. deepset_mcp/agents/debugging/__init__.py +0 -0
  4. deepset_mcp/agents/debugging/debugging_agent.py +37 -0
  5. deepset_mcp/agents/debugging/system_prompt.md +214 -0
  6. deepset_mcp/agents/generalist/__init__.py +0 -0
  7. deepset_mcp/agents/generalist/generalist_agent.py +38 -0
  8. deepset_mcp/agents/generalist/system_prompt.md +241 -0
  9. deepset_mcp/api/README.md +536 -0
  10. deepset_mcp/api/__init__.py +0 -0
  11. deepset_mcp/api/client.py +277 -0
  12. deepset_mcp/api/custom_components/__init__.py +0 -0
  13. deepset_mcp/api/custom_components/models.py +25 -0
  14. deepset_mcp/api/custom_components/protocols.py +17 -0
  15. deepset_mcp/api/custom_components/resource.py +56 -0
  16. deepset_mcp/api/exceptions.py +70 -0
  17. deepset_mcp/api/haystack_service/__init__.py +0 -0
  18. deepset_mcp/api/haystack_service/protocols.py +13 -0
  19. deepset_mcp/api/haystack_service/resource.py +55 -0
  20. deepset_mcp/api/indexes/__init__.py +0 -0
  21. deepset_mcp/api/indexes/models.py +63 -0
  22. deepset_mcp/api/indexes/protocols.py +53 -0
  23. deepset_mcp/api/indexes/resource.py +138 -0
  24. deepset_mcp/api/integrations/__init__.py +1 -0
  25. deepset_mcp/api/integrations/models.py +49 -0
  26. deepset_mcp/api/integrations/protocols.py +27 -0
  27. deepset_mcp/api/integrations/resource.py +57 -0
  28. deepset_mcp/api/pipeline/__init__.py +17 -0
  29. deepset_mcp/api/pipeline/log_level.py +9 -0
  30. deepset_mcp/api/pipeline/models.py +235 -0
  31. deepset_mcp/api/pipeline/protocols.py +83 -0
  32. deepset_mcp/api/pipeline/resource.py +378 -0
  33. deepset_mcp/api/pipeline_template/__init__.py +0 -0
  34. deepset_mcp/api/pipeline_template/models.py +56 -0
  35. deepset_mcp/api/pipeline_template/protocols.py +17 -0
  36. deepset_mcp/api/pipeline_template/resource.py +88 -0
  37. deepset_mcp/api/protocols.py +122 -0
  38. deepset_mcp/api/secrets/__init__.py +0 -0
  39. deepset_mcp/api/secrets/models.py +16 -0
  40. deepset_mcp/api/secrets/protocols.py +29 -0
  41. deepset_mcp/api/secrets/resource.py +112 -0
  42. deepset_mcp/api/shared_models.py +17 -0
  43. deepset_mcp/api/transport.py +336 -0
  44. deepset_mcp/api/user/__init__.py +0 -0
  45. deepset_mcp/api/user/protocols.py +11 -0
  46. deepset_mcp/api/user/resource.py +38 -0
  47. deepset_mcp/api/workspace/__init__.py +7 -0
  48. deepset_mcp/api/workspace/models.py +23 -0
  49. deepset_mcp/api/workspace/protocols.py +41 -0
  50. deepset_mcp/api/workspace/resource.py +94 -0
  51. deepset_mcp/benchmark/README.md +425 -0
  52. deepset_mcp/benchmark/__init__.py +1 -0
  53. deepset_mcp/benchmark/agent_configs/debugging_agent.yml +10 -0
  54. deepset_mcp/benchmark/agent_configs/generalist_agent.yml +6 -0
  55. deepset_mcp/benchmark/dp_validation_error_analysis/__init__.py +0 -0
  56. deepset_mcp/benchmark/dp_validation_error_analysis/eda.ipynb +757 -0
  57. deepset_mcp/benchmark/dp_validation_error_analysis/prepare_interaction_data.ipynb +167 -0
  58. deepset_mcp/benchmark/dp_validation_error_analysis/preprocessing_utils.py +213 -0
  59. deepset_mcp/benchmark/runner/__init__.py +0 -0
  60. deepset_mcp/benchmark/runner/agent_benchmark_runner.py +561 -0
  61. deepset_mcp/benchmark/runner/agent_loader.py +110 -0
  62. deepset_mcp/benchmark/runner/cli.py +39 -0
  63. deepset_mcp/benchmark/runner/cli_agent.py +373 -0
  64. deepset_mcp/benchmark/runner/cli_index.py +71 -0
  65. deepset_mcp/benchmark/runner/cli_pipeline.py +73 -0
  66. deepset_mcp/benchmark/runner/cli_tests.py +226 -0
  67. deepset_mcp/benchmark/runner/cli_utils.py +61 -0
  68. deepset_mcp/benchmark/runner/config.py +73 -0
  69. deepset_mcp/benchmark/runner/config_loader.py +64 -0
  70. deepset_mcp/benchmark/runner/interactive.py +140 -0
  71. deepset_mcp/benchmark/runner/models.py +203 -0
  72. deepset_mcp/benchmark/runner/repl.py +67 -0
  73. deepset_mcp/benchmark/runner/setup_actions.py +238 -0
  74. deepset_mcp/benchmark/runner/streaming.py +360 -0
  75. deepset_mcp/benchmark/runner/teardown_actions.py +196 -0
  76. deepset_mcp/benchmark/runner/tracing.py +21 -0
  77. deepset_mcp/benchmark/tasks/chat_rag_answers_wrong_format.yml +16 -0
  78. deepset_mcp/benchmark/tasks/documents_output_wrong.yml +13 -0
  79. deepset_mcp/benchmark/tasks/jinja_str_instead_of_complex_type.yml +11 -0
  80. deepset_mcp/benchmark/tasks/jinja_syntax_error.yml +11 -0
  81. deepset_mcp/benchmark/tasks/missing_output_mapping.yml +14 -0
  82. deepset_mcp/benchmark/tasks/no_query_input.yml +13 -0
  83. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_str.yml +141 -0
  84. deepset_mcp/benchmark/tasks/pipelines/chat_agent_jinja_syntax.yml +141 -0
  85. deepset_mcp/benchmark/tasks/pipelines/chat_rag_answers_wrong_format.yml +181 -0
  86. deepset_mcp/benchmark/tasks/pipelines/chat_rag_missing_output_mapping.yml +189 -0
  87. deepset_mcp/benchmark/tasks/pipelines/rag_documents_wrong_format.yml +193 -0
  88. deepset_mcp/benchmark/tasks/pipelines/rag_no_query_input.yml +191 -0
  89. deepset_mcp/benchmark/tasks/pipelines/standard_index.yml +167 -0
  90. deepset_mcp/initialize_embedding_model.py +12 -0
  91. deepset_mcp/main.py +133 -0
  92. deepset_mcp/prompts/deepset_copilot_prompt.md +271 -0
  93. deepset_mcp/prompts/deepset_debugging_agent.md +214 -0
  94. deepset_mcp/store.py +5 -0
  95. deepset_mcp/tool_factory.py +473 -0
  96. deepset_mcp/tools/__init__.py +0 -0
  97. deepset_mcp/tools/custom_components.py +52 -0
  98. deepset_mcp/tools/doc_search.py +83 -0
  99. deepset_mcp/tools/haystack_service.py +358 -0
  100. deepset_mcp/tools/haystack_service_models.py +97 -0
  101. deepset_mcp/tools/indexes.py +129 -0
  102. deepset_mcp/tools/model_protocol.py +16 -0
  103. deepset_mcp/tools/pipeline.py +335 -0
  104. deepset_mcp/tools/pipeline_template.py +116 -0
  105. deepset_mcp/tools/secrets.py +45 -0
  106. deepset_mcp/tools/tokonomics/__init__.py +73 -0
  107. deepset_mcp/tools/tokonomics/decorators.py +396 -0
  108. deepset_mcp/tools/tokonomics/explorer.py +347 -0
  109. deepset_mcp/tools/tokonomics/object_store.py +177 -0
  110. deepset_mcp/tools/workspace.py +61 -0
  111. deepset_mcp-0.0.2rc1.dist-info/METADATA +292 -0
  112. deepset_mcp-0.0.2rc1.dist-info/RECORD +114 -0
  113. deepset_mcp-0.0.2rc1.dist-info/WHEEL +4 -0
  114. deepset_mcp-0.0.2rc1.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,141 @@
1
+ # If you need help with the YAML format, have a look at https://docs.cloud.deepset.ai/v2.0/docs/create-a-pipeline#create-a-pipeline-using-pipeline-editor.
2
+ # This section defines components that you want to use in your pipelines. Each component must have a name and a type. You can also set the component's parameters here.
3
+ # The name is up to you, you can give your component a friendly name. You then use components' names when specifying the connections in the pipeline.
4
+ # Type is the class path of the component. You can check the type on the component's documentation page.
5
+ components:
6
+ adapter:
7
+ init_parameters:
8
+ custom_filters: {}
9
+ output_type: List[str]
10
+ template: ' {{ [(messages|last).text] }} '
11
+ unsafe: false
12
+ type: haystack.components.converters.output_adapter.OutputAdapter
13
+ agent:
14
+ init_parameters:
15
+ chat_generator:
16
+ init_parameters:
17
+ api_base_url:
18
+ api_key:
19
+ env_vars:
20
+ - OPENAI_API_KEY
21
+ strict: false
22
+ type: env_var
23
+ generation_kwargs: {}
24
+ max_retries:
25
+ model: gpt-4o
26
+ organization:
27
+ streaming_callback:
28
+ timeout:
29
+ tools:
30
+ tools_strict: false
31
+ type: haystack.components.generators.chat.openai.OpenAIChatGenerator
32
+ exit_conditions:
33
+ - text
34
+ max_agent_steps: 100
35
+ raise_on_tool_invocation_failure: false
36
+ state_schema: {}
37
+ streaming_callback:
38
+ system_prompt: |-
39
+ You are a deep research assistant.
40
+ You create comprehensive research reports to answer the user's questions.
41
+ You use the 'search'-tool to answer any questions.
42
+ You perform multiple searches until you have the information you need to answer the question.
43
+ Make sure you research different aspects of the question.
44
+ Use markdown to format your response.
45
+ When you use information from the websearch results, cite your sources using markdown links.
46
+ It is important that you cite accurately.
47
+ tools:
48
+ - data:
49
+ component:
50
+ init_parameters:
51
+ input_mapping:
52
+ query:
53
+ - search.query
54
+ output_mapping:
55
+ builder.prompt: result
56
+ pipeline:
57
+ components:
58
+ builder:
59
+ init_parameters:
60
+ required_variables: "*"
61
+ template: |-
62
+ {% for doc in docs %}
63
+ {% if doc.content and doc.meta.url|length > 0 %}
64
+ <search-result url="{{ doc.meta.url }}">
65
+ {{ doc.content|truncate(25000) }}
66
+ </search-result>
67
+ {% endif %}
68
+ {% endfor %}
69
+ variables:
70
+ type: haystack.components.builders.prompt_builder.PromptBuilder
71
+ converter:
72
+ init_parameters:
73
+ extraction_kwargs: {}
74
+ store_full_path: false
75
+ type: haystack.components.converters.html.HTMLToDocument
76
+ fetcher:
77
+ init_parameters:
78
+ raise_on_failure: false
79
+ retry_attempts: 2
80
+ timeout: 3
81
+ user_agents:
82
+ - haystack/LinkContentFetcher/2.11.1
83
+ type: haystack.components.fetchers.link_content.LinkContentFetcher
84
+ search:
85
+ init_parameters:
86
+ api_key:
87
+ env_vars:
88
+ - SERPERDEV_API_KEY
89
+ strict: false
90
+ type: env_var
91
+ search_params: {}
92
+ top_k: 10
93
+ type: haystack.components.websearch.serper_dev.SerperDevWebSearch
94
+ connection_type_validation: true
95
+ connections:
96
+ - receiver: fetcher.urls
97
+ sender: search.links
98
+ - receiver: converter.sources
99
+ sender: fetcher.streams
100
+ - receiver: builder.docs
101
+ sender: converter.documents
102
+ max_runs_per_component: 100
103
+ metadata: {}
104
+ type: haystack.core.super_component.super_component.SuperComponent
105
+ description: Use this tool to search for information on the internet.
106
+ inputs_from_state:
107
+ name: search
108
+ parameters:
109
+ type: haystack.tools.component_tool.ComponentTool
110
+ type: haystack.components.agents.agent.Agent
111
+ answer_builder:
112
+ init_parameters:
113
+ pattern:
114
+ reference_pattern:
115
+ type: haystack.components.builders.answer_builder.AnswerBuilder
116
+ history_parser:
117
+ init_parameters: {}
118
+ type: dc_custom_component.components.parsers.chat_history_parser.DeepsetChatHistoryParser
119
+
120
+
121
+ connections:
122
+ - receiver: agent.messages
123
+ sender: history_parser.messages
124
+ - receiver: adapter.messages
125
+ sender: agent.messages
126
+ - receiver: answer_builder.replies
127
+ sender: adapter.output
128
+
129
+ inputs:
130
+ query:
131
+ - answer_builder.query
132
+ - history_parser.history_and_query
133
+
134
+ outputs:
135
+ answers: answer_builder.answers
136
+
137
+ pipeline_output_type: chat
138
+
139
+ max_runs_per_component: 100
140
+
141
+ metadata: {}
@@ -0,0 +1,141 @@
1
+ # If you need help with the YAML format, have a look at https://docs.cloud.deepset.ai/v2.0/docs/create-a-pipeline#create-a-pipeline-using-pipeline-editor.
2
+ # This section defines components that you want to use in your pipelines. Each component must have a name and a type. You can also set the component's parameters here.
3
+ # The name is up to you, you can give your component a friendly name. You then use components' names when specifying the connections in the pipeline.
4
+ # Type is the class path of the component. You can check the type on the component's documentation page.
5
+ components:
6
+ adapter:
7
+ init_parameters:
8
+ custom_filters: {}
9
+ output_type: List[str]
10
+ template: '{{ [messages|last.text] }}'
11
+ unsafe: false
12
+ type: haystack.components.converters.output_adapter.OutputAdapter
13
+ agent:
14
+ init_parameters:
15
+ chat_generator:
16
+ init_parameters:
17
+ api_base_url:
18
+ api_key:
19
+ env_vars:
20
+ - OPENAI_API_KEY
21
+ strict: false
22
+ type: env_var
23
+ generation_kwargs: {}
24
+ max_retries:
25
+ model: gpt-4o
26
+ organization:
27
+ streaming_callback:
28
+ timeout:
29
+ tools:
30
+ tools_strict: false
31
+ type: haystack.components.generators.chat.openai.OpenAIChatGenerator
32
+ exit_conditions:
33
+ - text
34
+ max_agent_steps: 100
35
+ raise_on_tool_invocation_failure: false
36
+ state_schema: {}
37
+ streaming_callback:
38
+ system_prompt: |-
39
+ You are a deep research assistant.
40
+ You create comprehensive research reports to answer the user's questions.
41
+ You use the 'search'-tool to answer any questions.
42
+ You perform multiple searches until you have the information you need to answer the question.
43
+ Make sure you research different aspects of the question.
44
+ Use markdown to format your response.
45
+ When you use information from the websearch results, cite your sources using markdown links.
46
+ It is important that you cite accurately.
47
+ tools:
48
+ - data:
49
+ component:
50
+ init_parameters:
51
+ input_mapping:
52
+ query:
53
+ - search.query
54
+ output_mapping:
55
+ builder.prompt: result
56
+ pipeline:
57
+ components:
58
+ builder:
59
+ init_parameters:
60
+ required_variables: "*"
61
+ template: |-
62
+ {% for doc in docs %}
63
+ {% if doc.content and doc.meta.url|length > 0 %}
64
+ <search-result url="{{ doc.meta.url }}">
65
+ {{ doc.content|truncate(25000) }}
66
+ </search-result>
67
+ {% endif %}
68
+ {% endfor %}
69
+ variables:
70
+ type: haystack.components.builders.prompt_builder.PromptBuilder
71
+ converter:
72
+ init_parameters:
73
+ extraction_kwargs: {}
74
+ store_full_path: false
75
+ type: haystack.components.converters.html.HTMLToDocument
76
+ fetcher:
77
+ init_parameters:
78
+ raise_on_failure: false
79
+ retry_attempts: 2
80
+ timeout: 3
81
+ user_agents:
82
+ - haystack/LinkContentFetcher/2.11.1
83
+ type: haystack.components.fetchers.link_content.LinkContentFetcher
84
+ search:
85
+ init_parameters:
86
+ api_key:
87
+ env_vars:
88
+ - SERPERDEV_API_KEY
89
+ strict: false
90
+ type: env_var
91
+ search_params: {}
92
+ top_k: 10
93
+ type: haystack.components.websearch.serper_dev.SerperDevWebSearch
94
+ connection_type_validation: true
95
+ connections:
96
+ - receiver: fetcher.urls
97
+ sender: search.links
98
+ - receiver: converter.sources
99
+ sender: fetcher.streams
100
+ - receiver: builder.docs
101
+ sender: converter.documents
102
+ max_runs_per_component: 100
103
+ metadata: {}
104
+ type: haystack.core.super_component.super_component.SuperComponent
105
+ description: Use this tool to search for information on the internet.
106
+ inputs_from_state:
107
+ name: search
108
+ parameters:
109
+ type: haystack.tools.component_tool.ComponentTool
110
+ type: haystack.components.agents.agent.Agent
111
+ answer_builder:
112
+ init_parameters:
113
+ pattern:
114
+ reference_pattern:
115
+ type: haystack.components.builders.answer_builder.AnswerBuilder
116
+ history_parser:
117
+ init_parameters: {}
118
+ type: dc_custom_component.components.parsers.chat_history_parser.DeepsetChatHistoryParser
119
+
120
+
121
+ connections:
122
+ - receiver: agent.messages
123
+ sender: history_parser.messages
124
+ - receiver: adapter.messages
125
+ sender: agent.messages
126
+ - receiver: answer_builder.replies
127
+ sender: adapter.output
128
+
129
+ inputs:
130
+ query:
131
+ - answer_builder.query
132
+ - history_parser.history_and_query
133
+
134
+ outputs:
135
+ answers: answer_builder.answers
136
+
137
+ pipeline_output_type: chat
138
+
139
+ max_runs_per_component: 100
140
+
141
+ metadata: {}
@@ -0,0 +1,181 @@
1
+ components:
2
+ chat_summary_prompt_builder:
3
+ type: haystack.components.builders.prompt_builder.PromptBuilder
4
+ init_parameters:
5
+ template: |-
6
+ You are part of a chatbot.
7
+ You receive a question (Current Question) and a chat history.
8
+ Use the context from the chat history and reformulate the question so that it is suitable for retrieval augmented generation.
9
+ If X is followed by Y, only ask for Y and do not repeat X again.
10
+ If the question does not require any context from the chat history, output it unedited.
11
+ Don't make questions too long, but short and precise.
12
+ Stay as close as possible to the current question.
13
+ Only output the new question, nothing else!
14
+
15
+ {{ question }}
16
+
17
+ New question:
18
+
19
+ chat_summary_llm:
20
+ type: haystack.components.generators.openai.OpenAIGenerator
21
+ init_parameters:
22
+ api_key: {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": false}
23
+ model: "gpt-4o"
24
+ generation_kwargs:
25
+ max_tokens: 650
26
+ temperature: 0
27
+ seed: 0
28
+
29
+ replies_to_query:
30
+ type: haystack.components.converters.output_adapter.OutputAdapter
31
+ init_parameters:
32
+ template: "{{ replies[0] }}"
33
+ output_type: str
34
+
35
+ bm25_retriever: # Selects the most similar documents from the document store
36
+ type: haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever
37
+ init_parameters:
38
+ document_store:
39
+ type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
40
+ init_parameters:
41
+ hosts:
42
+ index: 'standard-index'
43
+ max_chunk_bytes: 104857600
44
+ embedding_dim: 768
45
+ return_embedding: false
46
+ method:
47
+ mappings:
48
+ settings:
49
+ create_index: true
50
+ http_auth:
51
+ use_ssl:
52
+ verify_certs:
53
+ timeout:
54
+ top_k: 20 # The number of results to return
55
+
56
+ query_embedder:
57
+ type: deepset_cloud_custom_nodes.embedders.nvidia.text_embedder.DeepsetNvidiaTextEmbedder
58
+ init_parameters:
59
+ normalize_embeddings: true
60
+ model: intfloat/e5-base-v2
61
+
62
+
63
+ embedding_retriever: # Selects the most similar documents from the document store
64
+ type: haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever
65
+ init_parameters:
66
+ document_store:
67
+ type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
68
+ init_parameters:
69
+ hosts:
70
+ index: 'standard-index'
71
+ max_chunk_bytes: 104857600
72
+ embedding_dim: 768
73
+ return_embedding: false
74
+ method:
75
+ mappings:
76
+ settings:
77
+ create_index: true
78
+ http_auth:
79
+ use_ssl:
80
+ verify_certs:
81
+ timeout:
82
+ top_k: 20 # The number of results to return
83
+
84
+ document_joiner:
85
+ type: haystack.components.joiners.document_joiner.DocumentJoiner
86
+ init_parameters:
87
+ join_mode: concatenate
88
+
89
+ ranker:
90
+ type: deepset_cloud_custom_nodes.rankers.nvidia.ranker.DeepsetNvidiaRanker
91
+ init_parameters:
92
+ model: intfloat/simlm-msmarco-reranker
93
+ top_k: 8
94
+
95
+
96
+ qa_prompt_builder:
97
+ type: haystack.components.builders.prompt_builder.PromptBuilder
98
+ init_parameters:
99
+ template: |-
100
+ You are a technical expert.
101
+ You answer questions truthfully based on provided documents.
102
+ Ignore typing errors in the question.
103
+ For each document check whether it is related to the question.
104
+ Only use documents that are related to the question to answer it.
105
+ Ignore documents that are not related to the question.
106
+ If the answer exists in several documents, summarize them.
107
+ Only answer based on the documents provided. Don't make things up.
108
+ Just output the structured, informative and precise answer and nothing else.
109
+ If the documents can't answer the question, say so.
110
+ Always use references in the form [NUMBER OF DOCUMENT] when using information from a document, e.g. [3] for Document [3] .
111
+ Never name the documents, only enter a number in square brackets as a reference.
112
+ The reference must only refer to the number that comes in square brackets after the document.
113
+ Otherwise, do not use brackets in your answer and reference ONLY the number of the document without mentioning the word document.
114
+
115
+ These are the documents:
116
+ {%- if documents|length > 0 %}
117
+ {%- for document in documents %}
118
+ Document [{{ loop.index }}] :
119
+ Name of Source File: {{ document.meta.file_name }}
120
+ {{ document.content }}
121
+ {% endfor -%}
122
+ {%- else %}
123
+ No relevant documents found.
124
+ Respond with "Sorry, no matching documents were found, please adjust the filters or try a different question."
125
+ {% endif %}
126
+
127
+ Question: {{ question }}
128
+ Answer:
129
+
130
+ qa_llm:
131
+ type: haystack.components.generators.openai.OpenAIGenerator
132
+ init_parameters:
133
+ api_key: {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": false}
134
+ model: "gpt-4o"
135
+ generation_kwargs:
136
+ max_tokens: 650
137
+ temperature: 0
138
+ seed: 0
139
+
140
+ connections: # Defines how the components are connected
141
+ - sender: chat_summary_prompt_builder.prompt
142
+ receiver: chat_summary_llm.prompt
143
+ - sender: chat_summary_llm.replies
144
+ receiver: replies_to_query.replies
145
+ - sender: replies_to_query.output
146
+ receiver: bm25_retriever.query
147
+ - sender: replies_to_query.output
148
+ receiver: query_embedder.text
149
+ - sender: replies_to_query.output
150
+ receiver: ranker.query
151
+ - sender: replies_to_query.output
152
+ receiver: qa_prompt_builder.question
153
+ - sender: bm25_retriever.documents
154
+ receiver: document_joiner.documents
155
+ - sender: query_embedder.embedding
156
+ receiver: embedding_retriever.query_embedding
157
+ - sender: embedding_retriever.documents
158
+ receiver: document_joiner.documents
159
+ - sender: document_joiner.documents
160
+ receiver: ranker.documents
161
+ - sender: ranker.documents
162
+ receiver: qa_prompt_builder.documents
163
+ - sender: qa_prompt_builder.prompt
164
+ receiver: qa_llm.prompt
165
+
166
+ inputs: # Define the inputs for your pipeline
167
+ query: # These components will receive the query as input
168
+ - "chat_summary_prompt_builder.question"
169
+
170
+ filters: # These components will receive a potential query filter as input
171
+ - "bm25_retriever.filters"
172
+ - "embedding_retriever.filters"
173
+
174
+ outputs:
175
+ documents: "ranker.documents"
176
+ answers: "qa_llm.replies"
177
+
178
+
179
+ max_runs_per_component: 100
180
+
181
+ metadata: {}
@@ -0,0 +1,189 @@
1
+ components:
2
+ chat_summary_prompt_builder:
3
+ type: haystack.components.builders.prompt_builder.PromptBuilder
4
+ init_parameters:
5
+ template: |-
6
+ You are part of a chatbot.
7
+ You receive a question (Current Question) and a chat history.
8
+ Use the context from the chat history and reformulate the question so that it is suitable for retrieval augmented generation.
9
+ If X is followed by Y, only ask for Y and do not repeat X again.
10
+ If the question does not require any context from the chat history, output it unedited.
11
+ Don't make questions too long, but short and precise.
12
+ Stay as close as possible to the current question.
13
+ Only output the new question, nothing else!
14
+
15
+ {{ question }}
16
+
17
+ New question:
18
+
19
+ chat_summary_llm:
20
+ type: haystack.components.generators.openai.OpenAIGenerator
21
+ init_parameters:
22
+ api_key: {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": false}
23
+ model: "gpt-4o"
24
+ generation_kwargs:
25
+ max_tokens: 650
26
+ temperature: 0
27
+ seed: 0
28
+
29
+ replies_to_query:
30
+ type: haystack.components.converters.output_adapter.OutputAdapter
31
+ init_parameters:
32
+ template: "{{ replies[0] }}"
33
+ output_type: str
34
+
35
+ bm25_retriever: # Selects the most similar documents from the document store
36
+ type: haystack_integrations.components.retrievers.opensearch.bm25_retriever.OpenSearchBM25Retriever
37
+ init_parameters:
38
+ document_store:
39
+ type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
40
+ init_parameters:
41
+ hosts:
42
+ index: 'standard-index'
43
+ max_chunk_bytes: 104857600
44
+ embedding_dim: 768
45
+ return_embedding: false
46
+ method:
47
+ mappings:
48
+ settings:
49
+ create_index: true
50
+ http_auth:
51
+ use_ssl:
52
+ verify_certs:
53
+ timeout:
54
+ top_k: 20 # The number of results to return
55
+
56
+ query_embedder:
57
+ type: deepset_cloud_custom_nodes.embedders.nvidia.text_embedder.DeepsetNvidiaTextEmbedder
58
+ init_parameters:
59
+ normalize_embeddings: true
60
+ model: intfloat/e5-base-v2
61
+
62
+
63
+ embedding_retriever: # Selects the most similar documents from the document store
64
+ type: haystack_integrations.components.retrievers.opensearch.embedding_retriever.OpenSearchEmbeddingRetriever
65
+ init_parameters:
66
+ document_store:
67
+ type: haystack_integrations.document_stores.opensearch.document_store.OpenSearchDocumentStore
68
+ init_parameters:
69
+ hosts:
70
+ index: 'standard-index'
71
+ max_chunk_bytes: 104857600
72
+ embedding_dim: 768
73
+ return_embedding: false
74
+ method:
75
+ mappings:
76
+ settings:
77
+ create_index: true
78
+ http_auth:
79
+ use_ssl:
80
+ verify_certs:
81
+ timeout:
82
+ top_k: 20 # The number of results to return
83
+
84
+ document_joiner:
85
+ type: haystack.components.joiners.document_joiner.DocumentJoiner
86
+ init_parameters:
87
+ join_mode: concatenate
88
+
89
+ ranker:
90
+ type: deepset_cloud_custom_nodes.rankers.nvidia.ranker.DeepsetNvidiaRanker
91
+ init_parameters:
92
+ model: intfloat/simlm-msmarco-reranker
93
+ top_k: 8
94
+
95
+
96
+ qa_prompt_builder:
97
+ type: haystack.components.builders.prompt_builder.PromptBuilder
98
+ init_parameters:
99
+ template: |-
100
+ You are a technical expert.
101
+ You answer questions truthfully based on provided documents.
102
+ Ignore typing errors in the question.
103
+ For each document check whether it is related to the question.
104
+ Only use documents that are related to the question to answer it.
105
+ Ignore documents that are not related to the question.
106
+ If the answer exists in several documents, summarize them.
107
+ Only answer based on the documents provided. Don't make things up.
108
+ Just output the structured, informative and precise answer and nothing else.
109
+ If the documents can't answer the question, say so.
110
+ Always use references in the form [NUMBER OF DOCUMENT] when using information from a document, e.g. [3] for Document [3] .
111
+ Never name the documents, only enter a number in square brackets as a reference.
112
+ The reference must only refer to the number that comes in square brackets after the document.
113
+ Otherwise, do not use brackets in your answer and reference ONLY the number of the document without mentioning the word document.
114
+
115
+ These are the documents:
116
+ {%- if documents|length > 0 %}
117
+ {%- for document in documents %}
118
+ Document [{{ loop.index }}] :
119
+ Name of Source File: {{ document.meta.file_name }}
120
+ {{ document.content }}
121
+ {% endfor -%}
122
+ {%- else %}
123
+ No relevant documents found.
124
+ Respond with "Sorry, no matching documents were found, please adjust the filters or try a different question."
125
+ {% endif %}
126
+
127
+ Question: {{ question }}
128
+ Answer:
129
+
130
+ qa_llm:
131
+ type: haystack.components.generators.openai.OpenAIGenerator
132
+ init_parameters:
133
+ api_key: {"type": "env_var", "env_vars": ["OPENAI_API_KEY"], "strict": false}
134
+ model: "gpt-4o"
135
+ generation_kwargs:
136
+ max_tokens: 650
137
+ temperature: 0
138
+ seed: 0
139
+
140
+ answer_builder:
141
+ type: deepset_cloud_custom_nodes.augmenters.deepset_answer_builder.DeepsetAnswerBuilder
142
+ init_parameters:
143
+ reference_pattern: acm
144
+
145
+ connections: # Defines how the components are connected
146
+ - sender: chat_summary_prompt_builder.prompt
147
+ receiver: chat_summary_llm.prompt
148
+ - sender: chat_summary_llm.replies
149
+ receiver: replies_to_query.replies
150
+ - sender: replies_to_query.output
151
+ receiver: bm25_retriever.query
152
+ - sender: replies_to_query.output
153
+ receiver: query_embedder.text
154
+ - sender: replies_to_query.output
155
+ receiver: ranker.query
156
+ - sender: replies_to_query.output
157
+ receiver: qa_prompt_builder.question
158
+ - sender: replies_to_query.output
159
+ receiver: answer_builder.query
160
+ - sender: bm25_retriever.documents
161
+ receiver: document_joiner.documents
162
+ - sender: query_embedder.embedding
163
+ receiver: embedding_retriever.query_embedding
164
+ - sender: embedding_retriever.documents
165
+ receiver: document_joiner.documents
166
+ - sender: document_joiner.documents
167
+ receiver: ranker.documents
168
+ - sender: ranker.documents
169
+ receiver: qa_prompt_builder.documents
170
+ - sender: ranker.documents
171
+ receiver: answer_builder.documents
172
+ - sender: qa_prompt_builder.prompt
173
+ receiver: qa_llm.prompt
174
+ - sender: qa_prompt_builder.prompt
175
+ receiver: answer_builder.prompt
176
+ - sender: qa_llm.replies
177
+ receiver: answer_builder.replies
178
+
179
+ inputs: # Define the inputs for your pipeline
180
+ query: # These components will receive the query as input
181
+ - "chat_summary_prompt_builder.question"
182
+
183
+ filters: # These components will receive a potential query filter as input
184
+ - "bm25_retriever.filters"
185
+ - "embedding_retriever.filters"
186
+
187
+ max_runs_per_component: 100
188
+
189
+ metadata: {}