haystack-experimental 0.0.1__tar.gz → 0.0.2.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/LICENSE +1 -1
  2. haystack_experimental-0.0.2.dev0/PKG-INFO +145 -0
  3. haystack_experimental-0.0.2.dev0/README.md +117 -0
  4. haystack_experimental-0.0.2.dev0/haystack_experimental/__init__.py +3 -0
  5. haystack_experimental-0.0.2.dev0/haystack_experimental/components/__init__.py +7 -0
  6. haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/__init__.py +7 -0
  7. haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/openai/__init__.py +7 -0
  8. haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/openai/function_caller.py +101 -0
  9. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/__init__.py +7 -0
  10. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/__init__.py +7 -0
  11. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/evalution_harness.py +87 -0
  12. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/__init__.py +23 -0
  13. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py +55 -0
  14. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/harness.py +422 -0
  15. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/parameters.py +153 -0
  16. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/__init__.py +3 -0
  17. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/helpers.py +98 -0
  18. haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/pipeline_pair.py +209 -0
  19. haystack_experimental-0.0.2.dev0/haystack_experimental/testing/__init__.py +3 -0
  20. haystack_experimental-0.0.2.dev0/haystack_experimental/testing/sample_components.py +40 -0
  21. {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/pyproject.toml +20 -26
  22. haystack_experimental-0.0.1/PKG-INFO +0 -29
  23. haystack_experimental-0.0.1/README.md +0 -1
  24. haystack_experimental-0.0.1/VERSION.txt +0 -1
  25. {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/.gitignore +0 -0
@@ -186,7 +186,7 @@
186
186
  same "printed page" as the copyright notice for easier
187
187
  identification within third-party archives.
188
188
 
189
- Copyright [yyyy] [name of copyright owner]
189
+ Copyright 2024-present deepset GmbH <info@deepset.ai>
190
190
 
191
191
  Licensed under the Apache License, Version 2.0 (the "License");
192
192
  you may not use this file except in compliance with the License.
@@ -0,0 +1,145 @@
1
+ Metadata-Version: 2.3
2
+ Name: haystack-experimental
3
+ Version: 0.0.2.dev0
4
+ Summary: Experimental components and features for the Haystack LLM framework.
5
+ Project-URL: CI: GitHub, https://github.com/deepset-ai/haystack-experimental/actions
6
+ Project-URL: GitHub: issues, https://github.com/deepset-ai/haystack-experimental/issues
7
+ Project-URL: GitHub: repo, https://github.com/deepset-ai/haystack-experimental
8
+ Project-URL: Homepage, https://github.com/deepset-ai/haystack-experimental
9
+ Author-email: "deepset.ai" <info@deepset.ai>
10
+ License: Apache-2.0
11
+ License-File: LICENSE
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: Freely Distributable
15
+ Classifier: License :: OSI Approved :: Apache Software License
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python
18
+ Classifier: Programming Language :: Python :: 3
19
+ Classifier: Programming Language :: Python :: 3.8
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Programming Language :: Python :: 3.10
22
+ Classifier: Programming Language :: Python :: 3.11
23
+ Classifier: Programming Language :: Python :: 3.12
24
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
25
+ Requires-Python: >=3.8
26
+ Requires-Dist: haystack-ai
27
+ Description-Content-Type: text/markdown
28
+
29
+ [![PyPI - Version](https://img.shields.io/pypi/v/haystack-experimental.svg)](https://pypi.org/project/haystack-experimental)
30
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/haystack-experimental.svg)](https://pypi.org/project/haystack-experimental)
31
+ [![Tests](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml/badge.svg)](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml)
32
+ [![Project release on PyPi](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml/badge.svg)](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml)
33
+ [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
34
+ [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
35
+
36
+ # Haystack experimental package
37
+
38
+ The `haystack-experimental` package provides Haystack users with access to experimental features without immediately
39
+ committing to their official release. The main goal is to gather user feedback and iterate on new features quickly.
40
+
41
+ ## Installation
42
+
43
+ For simplicity, every release of `haystack-experimental` will ship all the available experiments at that time. To
44
+ install the latest experimental features, run:
45
+
46
+ ```sh
47
+ $ pip install -U haystack-experimental
48
+ ```
49
+
50
+ > [!IMPORTANT]
51
+ > The latest version of the experimental package is only tested against the latest version of Haystack. Compatibility
52
+ > with older versions of Haystack is not guaranteed.
53
+
54
+
55
+ ## Experiments lifecycle
56
+
57
+ Any experimental feature will be removed from `haystack-experimental` after a period of 3 months. After this time,
58
+ the experiment will be either:
59
+ - Merged into Haystack core and published in the next minor release
60
+ - Released as a Core Integration,
61
+ - Dropped.
62
+
63
+ ## Experiments catalog
64
+
65
+ The latest version of the package contains the following experiments:
66
+
67
+ | Name | Type | Experiment end date |
68
+ | ------------------------ | ----------------------- | ------------------- |
69
+ | [`EvaluationHarness`][1] | Evaluation orchestrator | August 2024 |
70
+ | [`OpenAIFunctionCaller`][2] | Function Calling Component | August 2024 |
71
+
72
+ [1]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/evaluation/harness
73
+ [2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openai
74
+
75
+ ## Usage
76
+
77
+ Experimental new features can be imported like any other Haystack integration package:
78
+
79
+ ```python
80
+ from haystack.dataclasses import ChatMessage
81
+ from haystack_experimental.components.generators import FoobarGenerator
82
+
83
+ c = FoobarGenerator()
84
+ c.run([ChatMessage.from_user("What's an experiment? Be brief.")])
85
+ ```
86
+
87
+ Experiments can also override existing Haystack features. For example, users can opt into an experimental type of
88
+ `Pipeline` by just changing the usual import:
89
+
90
+ ```python
91
+ # from haystack import Pipeline
92
+ from haystack_experimental import Pipeline
93
+
94
+ pipe = Pipeline()
95
+ # ...
96
+ pipe.run(...)
97
+ ```
98
+
99
+ ## Documentation
100
+
101
+ Documentation for `haystack-experimental` can be found [here](https://docs.haystack.deepset.ai/reference/haystack-experimental-api).
102
+
103
+ ## Implementation
104
+
105
+ Experiments should replicate the namespace of the core package. For example, a new generator:
106
+
107
+ ```python
108
+ # in haystack_experimental/components/generators/foobar.py
109
+
110
+ from haystack import component
111
+
112
+
113
+ @component
114
+ class FoobarGenerator:
115
+ ...
116
+
117
+ ```
118
+
119
+ When the experiment overrides an existing feature, the new symbol should be created at the same path in the experimental
120
+ package. This new symbol will override the original in `haystack-ai`: for classes, with a subclass and for bare
121
+ functions, with a wrapper. For example:
122
+
123
+ ```python
124
+ # in haystack_experiment/src/haystack_experiment/core/pipeline/pipeline.py
125
+
126
+ from haystack.core.pipeline import Pipeline as HaystackPipeline
127
+
128
+
129
+ class Pipeline(HaystackPipeline):
130
+ # Any new experimental method that doesn't exist in the original class
131
+ def run_async(self, inputs) -> Dict[str, Dict[str, Any]]:
132
+ ...
133
+
134
+ # Existing methods with breaking changes to their signature, like adding a new mandatory param
135
+ def to_dict(new_param: str) -> Dict[str, Any]:
136
+ # do something with the new parameter
137
+ print(new_param)
138
+ # call the original method
139
+ return super().to_dict()
140
+
141
+ ```
142
+
143
+ ## Contributing
144
+
145
+ Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
@@ -0,0 +1,117 @@
1
+ [![PyPI - Version](https://img.shields.io/pypi/v/haystack-experimental.svg)](https://pypi.org/project/haystack-experimental)
2
+ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/haystack-experimental.svg)](https://pypi.org/project/haystack-experimental)
3
+ [![Tests](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml/badge.svg)](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml)
4
+ [![Project release on PyPi](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml/badge.svg)](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml)
5
+ [![Hatch project](https://img.shields.io/badge/%F0%9F%A5%9A-Hatch-4051b5.svg)](https://github.com/pypa/hatch)
6
+ [![Checked with mypy](https://www.mypy-lang.org/static/mypy_badge.svg)](https://mypy-lang.org/)
7
+
8
+ # Haystack experimental package
9
+
10
+ The `haystack-experimental` package provides Haystack users with access to experimental features without immediately
11
+ committing to their official release. The main goal is to gather user feedback and iterate on new features quickly.
12
+
13
+ ## Installation
14
+
15
+ For simplicity, every release of `haystack-experimental` will ship all the available experiments at that time. To
16
+ install the latest experimental features, run:
17
+
18
+ ```sh
19
+ $ pip install -U haystack-experimental
20
+ ```
21
+
22
+ > [!IMPORTANT]
23
+ > The latest version of the experimental package is only tested against the latest version of Haystack. Compatibility
24
+ > with older versions of Haystack is not guaranteed.
25
+
26
+
27
+ ## Experiments lifecycle
28
+
29
+ Any experimental feature will be removed from `haystack-experimental` after a period of 3 months. After this time,
30
+ the experiment will be either:
31
+ - Merged into Haystack core and published in the next minor release
32
+ - Released as a Core Integration,
33
+ - Dropped.
34
+
35
+ ## Experiments catalog
36
+
37
+ The latest version of the package contains the following experiments:
38
+
39
+ | Name | Type | Experiment end date |
40
+ | ------------------------ | ----------------------- | ------------------- |
41
+ | [`EvaluationHarness`][1] | Evaluation orchestrator | August 2024 |
42
+ | [`OpenAIFunctionCaller`][2] | Function Calling Component | August 2024 |
43
+
44
+ [1]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/evaluation/harness
45
+ [2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openai
46
+
47
+ ## Usage
48
+
49
+ Experimental new features can be imported like any other Haystack integration package:
50
+
51
+ ```python
52
+ from haystack.dataclasses import ChatMessage
53
+ from haystack_experimental.components.generators import FoobarGenerator
54
+
55
+ c = FoobarGenerator()
56
+ c.run([ChatMessage.from_user("What's an experiment? Be brief.")])
57
+ ```
58
+
59
+ Experiments can also override existing Haystack features. For example, users can opt into an experimental type of
60
+ `Pipeline` by just changing the usual import:
61
+
62
+ ```python
63
+ # from haystack import Pipeline
64
+ from haystack_experimental import Pipeline
65
+
66
+ pipe = Pipeline()
67
+ # ...
68
+ pipe.run(...)
69
+ ```
70
+
71
+ ## Documentation
72
+
73
+ Documentation for `haystack-experimental` can be found [here](https://docs.haystack.deepset.ai/reference/haystack-experimental-api).
74
+
75
+ ## Implementation
76
+
77
+ Experiments should replicate the namespace of the core package. For example, a new generator:
78
+
79
+ ```python
80
+ # in haystack_experimental/components/generators/foobar.py
81
+
82
+ from haystack import component
83
+
84
+
85
+ @component
86
+ class FoobarGenerator:
87
+ ...
88
+
89
+ ```
90
+
91
+ When the experiment overrides an existing feature, the new symbol should be created at the same path in the experimental
92
+ package. This new symbol will override the original in `haystack-ai`: for classes, with a subclass and for bare
93
+ functions, with a wrapper. For example:
94
+
95
+ ```python
96
+ # in haystack_experiment/src/haystack_experiment/core/pipeline/pipeline.py
97
+
98
+ from haystack.core.pipeline import Pipeline as HaystackPipeline
99
+
100
+
101
+ class Pipeline(HaystackPipeline):
102
+ # Any new experimental method that doesn't exist in the original class
103
+ def run_async(self, inputs) -> Dict[str, Dict[str, Any]]:
104
+ ...
105
+
106
+ # Existing methods with breaking changes to their signature, like adding a new mandatory param
107
+ def to_dict(new_param: str) -> Dict[str, Any]:
108
+ # do something with the new parameter
109
+ print(new_param)
110
+ # call the original method
111
+ return super().to_dict()
112
+
113
+ ```
114
+
115
+ ## Contributing
116
+
117
+ Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
@@ -0,0 +1,3 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .tools import OpenAIFunctionCaller
6
+
7
+ _all_ = [ "OpenAIFunctionCaller"]
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .openai.function_caller import OpenAIFunctionCaller
6
+
7
+ _all_ = ["OpenAIFunctionCaller"]
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .function_caller import OpenAIFunctionCaller
6
+
7
+ _all_ = [ "OpenAIFunctionCaller"]
@@ -0,0 +1,101 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ import json
6
+ from typing import Any, Callable, Dict, List
7
+
8
+ from haystack import component, default_from_dict, default_to_dict
9
+ from haystack.dataclasses import ChatMessage
10
+ from haystack.utils import deserialize_callable, serialize_callable
11
+
12
+ _FUNCTION_NAME_FAILURE = (
13
+ "I'm sorry, I tried to run a function that did not exist. Would you like me to correct it and try again?"
14
+ )
15
+ _FUNCTION_RUN_FAILURE = "Seems there was an error while running the function: {error}"
16
+
17
+
18
+ @component
19
+ class OpenAIFunctionCaller:
20
+ """
21
+ OpenAIFunctionCaller processes a list of chat messages and call Python functions when needed.
22
+
23
+ The OpenAIFunctionCaller expects a list of ChatMessages and if there is a tool call with a function name and
24
+ arguments, it runs the function and returns the result as a ChatMessage from role = 'function'
25
+ """
26
+
27
+ def __init__(self, available_functions: Dict[str, Callable]):
28
+ """
29
+ Initialize the OpenAIFunctionCaller component.
30
+
31
+ :param available_functions:
32
+ A dictionary of available functions. This dictionary expects key value pairs of function name,
33
+ and the function itself. For example, `{"weather_function": weather_function}`
34
+ """
35
+ self.available_functions = available_functions
36
+
37
+ def to_dict(self) -> Dict[str, Any]:
38
+ """
39
+ Serializes the component to a dictionary.
40
+
41
+ :returns:
42
+ Dictionary with serialized data.
43
+ """
44
+ available_function_paths = {}
45
+ for name, function in self.available_functions.items():
46
+ available_function_paths[name] = serialize_callable(function)
47
+ serialization_dict = default_to_dict(self, available_functions=available_function_paths)
48
+ return serialization_dict
49
+
50
+ @classmethod
51
+ def from_dict(cls, data: Dict[str, Any]) -> "OpenAIFunctionCaller":
52
+ """
53
+ Deserializes the component from a dictionary.
54
+
55
+ :param data:
56
+ The dictionary to deserialize from.
57
+ :returns:
58
+ The deserialized component.
59
+ """
60
+ available_function_paths = data.get("init_parameters", {}).get("available_functions")
61
+ available_functions = {}
62
+ for name, path in available_function_paths.items():
63
+ available_functions[name] = deserialize_callable(path)
64
+ data["init_parameters"]["available_functions"] = available_functions
65
+ return default_from_dict(cls, data)
66
+
67
+ @component.output_types(function_replies=List[ChatMessage], assistant_replies=List[ChatMessage])
68
+ def run(self, messages: List[ChatMessage]):
69
+ """
70
+ Evaluates `messages` and invokes available functions if the messages contain tool_calls.
71
+
72
+ :param messages: A list of messages generated from the `OpenAIChatGenerator`
73
+ :returns: This component returns a list of messages in one of two outputs
74
+ - `function_replies`: List of ChatMessages containing the result of a function invocation.
75
+ This message comes from role = 'function'. If the function name was hallucinated or wrong,
76
+ an assistant message explaining as such is returned
77
+ - `assistant_replies`: List of ChatMessages containing a regular assistant reply. In this case,
78
+ there were no tool_calls in the received messages
79
+ """
80
+ if messages[0].meta["finish_reason"] == "tool_calls":
81
+ function_calls = json.loads(messages[0].content)
82
+ for function_call in function_calls:
83
+ function_name = function_call["function"]["name"]
84
+ function_args = json.loads(function_call["function"]["arguments"])
85
+ if function_name in self.available_functions:
86
+ function_to_call = self.available_functions[function_name]
87
+ try:
88
+ function_response = function_to_call(**function_args)
89
+ messages.append(
90
+ ChatMessage.from_function(
91
+ content=json.dumps(function_response),
92
+ name=function_name,
93
+ )
94
+ )
95
+ # pylint: disable=broad-exception-caught
96
+ except Exception as e:
97
+ messages.append(ChatMessage.from_assistant(_FUNCTION_RUN_FAILURE.format(error=e)))
98
+ else:
99
+ messages.append(ChatMessage.from_assistant(_FUNCTION_NAME_FAILURE))
100
+ return {"function_replies": messages}
101
+ return {"assistant_replies": messages}
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .harness import EvaluationHarness, EvaluationRunOverrides
6
+
7
+ _all_ = ["EvaluationHarness", "EvaluationRunOverrides"]
@@ -0,0 +1,7 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .evalution_harness import EvaluationHarness, EvaluationRunOverrides
6
+
7
+ _all_ = ["EvaluationHarness", "EvaluationRunOverrides"]
@@ -0,0 +1,87 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from abc import ABC, abstractmethod
6
+ from dataclasses import dataclass
7
+ from typing import Any, Dict, Generic, Optional, Type, TypeVar
8
+
9
+ from haystack import Pipeline
10
+ from haystack.core.serialization import DeserializationCallbacks
11
+
12
+
13
+ @dataclass
14
+ class EvaluationRunOverrides:
15
+ """
16
+ Overrides for an evaluation run.
17
+
18
+ Used to override the init parameters of components in either
19
+ (or both) the evaluated and evaluation pipelines. Each key is
20
+ a component name and its value a dictionary with init parameters
21
+ to override.
22
+
23
+ :param evaluated_pipeline_overrides:
24
+ Overrides for the evaluated pipeline.
25
+ :param evaluation_pipeline_overrides:
26
+ Overrides for the evaluation pipeline.
27
+ """
28
+
29
+ evaluated_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
30
+ evaluation_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
31
+
32
+
33
+ EvalRunInputT = TypeVar("EvalRunInputT")
34
+ EvalRunOutputT = TypeVar("EvalRunOutputT")
35
+ EvalRunOverridesT = TypeVar("EvalRunOverridesT")
36
+
37
+
38
+ class EvaluationHarness(ABC, Generic[EvalRunInputT, EvalRunOverridesT, EvalRunOutputT]):
39
+ """
40
+ Executes a pipeline with a given set of parameters, inputs and evaluates its outputs with an evaluation pipeline.
41
+ """
42
+
43
+ @staticmethod
44
+ def _override_pipeline(pipeline: Pipeline, parameter_overrides: Optional[Dict[str, Any]]) -> Pipeline:
45
+ def component_pre_init_callback(name: str, cls: Type, init_params: Dict[str, Any]): # pylint: disable=unused-argument
46
+ assert parameter_overrides is not None
47
+ overrides = parameter_overrides.get(name)
48
+ if overrides:
49
+ init_params.update(overrides)
50
+
51
+ def validate_overrides():
52
+ if parameter_overrides is None:
53
+ return
54
+
55
+ pipeline_components = pipeline.inputs(include_components_with_connected_inputs=True).keys()
56
+ for component_name in parameter_overrides.keys():
57
+ if component_name not in pipeline_components:
58
+ raise ValueError(f"Cannot override non-existent component '{component_name}'")
59
+
60
+ callbacks = DeserializationCallbacks(component_pre_init_callback)
61
+ if parameter_overrides:
62
+ validate_overrides()
63
+ serialized_pipeline = pipeline.dumps()
64
+ pipeline = Pipeline.loads(serialized_pipeline, callbacks=callbacks)
65
+
66
+ return pipeline
67
+
68
+ @abstractmethod
69
+ def run(
70
+ self,
71
+ inputs: EvalRunInputT,
72
+ *,
73
+ overrides: Optional[EvalRunOverridesT] = None,
74
+ run_name: Optional[str] = None,
75
+ ) -> EvalRunOutputT:
76
+ """
77
+ Launch a evaluation run.
78
+
79
+ :param inputs:
80
+ Inputs to the evaluated and evaluation pipelines.
81
+ :param overrides:
82
+ Overrides for the harness.
83
+ :param run_name:
84
+ A name for the evaluation run.
85
+ :returns:
86
+ The output of the evaluation pipeline.
87
+ """
@@ -0,0 +1,23 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from .harness import RAGEvaluationHarness
6
+ from .parameters import (
7
+ RAGEvaluationInput,
8
+ RAGEvaluationMetric,
9
+ RAGEvaluationOutput,
10
+ RAGEvaluationOverrides,
11
+ RAGExpectedComponent,
12
+ RAGExpectedComponentMetadata,
13
+ )
14
+
15
+ _all_ = [
16
+ "RAGEvaluationHarness",
17
+ "RAGExpectedComponent",
18
+ "RAGExpectedComponentMetadata",
19
+ "RAGEvaluationMetric",
20
+ "RAGEvaluationOutput",
21
+ "RAGEvaluationOverrides",
22
+ "RAGEvaluationInput",
23
+ ]
@@ -0,0 +1,55 @@
1
+ # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
2
+ #
3
+ # SPDX-License-Identifier: Apache-2.0
4
+
5
+ from functools import partial
6
+ from typing import Set
7
+
8
+ from haystack import Pipeline
9
+ from haystack.components.evaluators import (
10
+ ContextRelevanceEvaluator,
11
+ DocumentMAPEvaluator,
12
+ DocumentMRREvaluator,
13
+ DocumentRecallEvaluator,
14
+ FaithfulnessEvaluator,
15
+ SASEvaluator,
16
+ )
17
+ from haystack.components.evaluators.document_recall import RecallMode
18
+
19
+ from .parameters import RAGEvaluationMetric
20
+
21
+
22
+ def default_rag_evaluation_pipeline(
23
+ metrics: Set[RAGEvaluationMetric],
24
+ ) -> Pipeline:
25
+ """
26
+ Builds the default evaluation pipeline for RAG.
27
+
28
+ :param metrics:
29
+ The set of metrics to include in the pipeline.
30
+ :returns:
31
+ The evaluation pipeline.
32
+ """
33
+ pipeline = Pipeline()
34
+
35
+ metric_ctors = {
36
+ RAGEvaluationMetric.DOCUMENT_MAP: DocumentMAPEvaluator,
37
+ RAGEvaluationMetric.DOCUMENT_MRR: DocumentMRREvaluator,
38
+ RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT: partial(
39
+ DocumentRecallEvaluator, mode=RecallMode.SINGLE_HIT
40
+ ),
41
+ RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT: partial(
42
+ DocumentRecallEvaluator, mode=RecallMode.MULTI_HIT
43
+ ),
44
+ RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: partial(
45
+ SASEvaluator, model="sentence-transformers/all-MiniLM-L6-v2"
46
+ ),
47
+ RAGEvaluationMetric.ANSWER_FAITHFULNESS: partial(FaithfulnessEvaluator, raise_on_failure=False),
48
+ RAGEvaluationMetric.CONTEXT_RELEVANCE: partial(ContextRelevanceEvaluator, raise_on_failure=False),
49
+ }
50
+
51
+ for metric in metrics:
52
+ ctor = metric_ctors[metric]
53
+ pipeline.add_component(metric.value, ctor())
54
+
55
+ return pipeline