haystack-experimental 0.0.1__tar.gz → 0.0.2.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/LICENSE +1 -1
- haystack_experimental-0.0.2.dev0/PKG-INFO +145 -0
- haystack_experimental-0.0.2.dev0/README.md +117 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/__init__.py +3 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/components/__init__.py +7 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/__init__.py +7 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/openai/__init__.py +7 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/openai/function_caller.py +101 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/__init__.py +7 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/__init__.py +7 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/evalution_harness.py +87 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/__init__.py +23 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py +55 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/harness.py +422 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/parameters.py +153 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/__init__.py +3 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/helpers.py +98 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/util/pipeline_pair.py +209 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/testing/__init__.py +3 -0
- haystack_experimental-0.0.2.dev0/haystack_experimental/testing/sample_components.py +40 -0
- {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/pyproject.toml +20 -26
- haystack_experimental-0.0.1/PKG-INFO +0 -29
- haystack_experimental-0.0.1/README.md +0 -1
- haystack_experimental-0.0.1/VERSION.txt +0 -1
- {haystack_experimental-0.0.1 → haystack_experimental-0.0.2.dev0}/.gitignore +0 -0
|
@@ -186,7 +186,7 @@
|
|
|
186
186
|
same "printed page" as the copyright notice for easier
|
|
187
187
|
identification within third-party archives.
|
|
188
188
|
|
|
189
|
-
Copyright
|
|
189
|
+
Copyright 2024-present deepset GmbH <info@deepset.ai>
|
|
190
190
|
|
|
191
191
|
Licensed under the Apache License, Version 2.0 (the "License");
|
|
192
192
|
you may not use this file except in compliance with the License.
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: haystack-experimental
|
|
3
|
+
Version: 0.0.2.dev0
|
|
4
|
+
Summary: Experimental components and features for the Haystack LLM framework.
|
|
5
|
+
Project-URL: CI: GitHub, https://github.com/deepset-ai/haystack-experimental/actions
|
|
6
|
+
Project-URL: GitHub: issues, https://github.com/deepset-ai/haystack-experimental/issues
|
|
7
|
+
Project-URL: GitHub: repo, https://github.com/deepset-ai/haystack-experimental
|
|
8
|
+
Project-URL: Homepage, https://github.com/deepset-ai/haystack-experimental
|
|
9
|
+
Author-email: "deepset.ai" <info@deepset.ai>
|
|
10
|
+
License: Apache-2.0
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: Freely Distributable
|
|
15
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python
|
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
24
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
25
|
+
Requires-Python: >=3.8
|
|
26
|
+
Requires-Dist: haystack-ai
|
|
27
|
+
Description-Content-Type: text/markdown
|
|
28
|
+
|
|
29
|
+
[](https://pypi.org/project/haystack-experimental)
|
|
30
|
+
[](https://pypi.org/project/haystack-experimental)
|
|
31
|
+
[](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml)
|
|
32
|
+
[](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml)
|
|
33
|
+
[](https://github.com/pypa/hatch)
|
|
34
|
+
[](https://mypy-lang.org/)
|
|
35
|
+
|
|
36
|
+
# Haystack experimental package
|
|
37
|
+
|
|
38
|
+
The `haystack-experimental` package provides Haystack users with access to experimental features without immediately
|
|
39
|
+
committing to their official release. The main goal is to gather user feedback and iterate on new features quickly.
|
|
40
|
+
|
|
41
|
+
## Installation
|
|
42
|
+
|
|
43
|
+
For simplicity, every release of `haystack-experimental` will ship all the available experiments at that time. To
|
|
44
|
+
install the latest experimental features, run:
|
|
45
|
+
|
|
46
|
+
```sh
|
|
47
|
+
$ pip install -U haystack-experimental
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
> [!IMPORTANT]
|
|
51
|
+
> The latest version of the experimental package is only tested against the latest version of Haystack. Compatibility
|
|
52
|
+
> with older versions of Haystack is not guaranteed.
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
## Experiments lifecycle
|
|
56
|
+
|
|
57
|
+
Any experimental feature will be removed from `haystack-experimental` after a period of 3 months. After this time,
|
|
58
|
+
the experiment will be either:
|
|
59
|
+
- Merged into Haystack core and published in the next minor release
|
|
60
|
+
- Released as a Core Integration,
|
|
61
|
+
- Dropped.
|
|
62
|
+
|
|
63
|
+
## Experiments catalog
|
|
64
|
+
|
|
65
|
+
The latest version of the package contains the following experiments:
|
|
66
|
+
|
|
67
|
+
| Name | Type | Experiment end date |
|
|
68
|
+
| ------------------------ | ----------------------- | ------------------- |
|
|
69
|
+
| [`EvaluationHarness`][1] | Evaluation orchestrator | August 2024 |
|
|
70
|
+
| [`OpenAIFunctionCaller`][2] | Function Calling Component | August 2024 |
|
|
71
|
+
|
|
72
|
+
[1]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/evaluation/harness
|
|
73
|
+
[2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openai
|
|
74
|
+
|
|
75
|
+
## Usage
|
|
76
|
+
|
|
77
|
+
Experimental new features can be imported like any other Haystack integration package:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from haystack.dataclasses import ChatMessage
|
|
81
|
+
from haystack_experimental.components.generators import FoobarGenerator
|
|
82
|
+
|
|
83
|
+
c = FoobarGenerator()
|
|
84
|
+
c.run([ChatMessage.from_user("What's an experiment? Be brief.")])
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Experiments can also override existing Haystack features. For example, users can opt into an experimental type of
|
|
88
|
+
`Pipeline` by just changing the usual import:
|
|
89
|
+
|
|
90
|
+
```python
|
|
91
|
+
# from haystack import Pipeline
|
|
92
|
+
from haystack_experimental import Pipeline
|
|
93
|
+
|
|
94
|
+
pipe = Pipeline()
|
|
95
|
+
# ...
|
|
96
|
+
pipe.run(...)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## Documentation
|
|
100
|
+
|
|
101
|
+
Documentation for `haystack-experimental` can be found [here](https://docs.haystack.deepset.ai/reference/haystack-experimental-api).
|
|
102
|
+
|
|
103
|
+
## Implementation
|
|
104
|
+
|
|
105
|
+
Experiments should replicate the namespace of the core package. For example, a new generator:
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
# in haystack_experimental/components/generators/foobar.py
|
|
109
|
+
|
|
110
|
+
from haystack import component
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
@component
|
|
114
|
+
class FoobarGenerator:
|
|
115
|
+
...
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
When the experiment overrides an existing feature, the new symbol should be created at the same path in the experimental
|
|
120
|
+
package. This new symbol will override the original in `haystack-ai`: for classes, with a subclass and for bare
|
|
121
|
+
functions, with a wrapper. For example:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
# in haystack_experiment/src/haystack_experiment/core/pipeline/pipeline.py
|
|
125
|
+
|
|
126
|
+
from haystack.core.pipeline import Pipeline as HaystackPipeline
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class Pipeline(HaystackPipeline):
|
|
130
|
+
# Any new experimental method that doesn't exist in the original class
|
|
131
|
+
def run_async(self, inputs) -> Dict[str, Dict[str, Any]]:
|
|
132
|
+
...
|
|
133
|
+
|
|
134
|
+
# Existing methods with breaking changes to their signature, like adding a new mandatory param
|
|
135
|
+
def to_dict(new_param: str) -> Dict[str, Any]:
|
|
136
|
+
# do something with the new parameter
|
|
137
|
+
print(new_param)
|
|
138
|
+
# call the original method
|
|
139
|
+
return super().to_dict()
|
|
140
|
+
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Contributing
|
|
144
|
+
|
|
145
|
+
Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
[](https://pypi.org/project/haystack-experimental)
|
|
2
|
+
[](https://pypi.org/project/haystack-experimental)
|
|
3
|
+
[](https://github.com/deepset-ai/haystack-experimental/actions/workflows/tests.yml)
|
|
4
|
+
[](https://github.com/deepset-ai/haystack-experimental/actions/workflows/pypi_release.yml)
|
|
5
|
+
[](https://github.com/pypa/hatch)
|
|
6
|
+
[](https://mypy-lang.org/)
|
|
7
|
+
|
|
8
|
+
# Haystack experimental package
|
|
9
|
+
|
|
10
|
+
The `haystack-experimental` package provides Haystack users with access to experimental features without immediately
|
|
11
|
+
committing to their official release. The main goal is to gather user feedback and iterate on new features quickly.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
For simplicity, every release of `haystack-experimental` will ship all the available experiments at that time. To
|
|
16
|
+
install the latest experimental features, run:
|
|
17
|
+
|
|
18
|
+
```sh
|
|
19
|
+
$ pip install -U haystack-experimental
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
> [!IMPORTANT]
|
|
23
|
+
> The latest version of the experimental package is only tested against the latest version of Haystack. Compatibility
|
|
24
|
+
> with older versions of Haystack is not guaranteed.
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
## Experiments lifecycle
|
|
28
|
+
|
|
29
|
+
Any experimental feature will be removed from `haystack-experimental` after a period of 3 months. After this time,
|
|
30
|
+
the experiment will be either:
|
|
31
|
+
- Merged into Haystack core and published in the next minor release
|
|
32
|
+
- Released as a Core Integration,
|
|
33
|
+
- Dropped.
|
|
34
|
+
|
|
35
|
+
## Experiments catalog
|
|
36
|
+
|
|
37
|
+
The latest version of the package contains the following experiments:
|
|
38
|
+
|
|
39
|
+
| Name | Type | Experiment end date |
|
|
40
|
+
| ------------------------ | ----------------------- | ------------------- |
|
|
41
|
+
| [`EvaluationHarness`][1] | Evaluation orchestrator | August 2024 |
|
|
42
|
+
| [`OpenAIFunctionCaller`][2] | Function Calling Component | August 2024 |
|
|
43
|
+
|
|
44
|
+
[1]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/evaluation/harness
|
|
45
|
+
[2]: https://github.com/deepset-ai/haystack-experimental/tree/main/haystack_experimental/components/tools/openai
|
|
46
|
+
|
|
47
|
+
## Usage
|
|
48
|
+
|
|
49
|
+
Experimental new features can be imported like any other Haystack integration package:
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from haystack.dataclasses import ChatMessage
|
|
53
|
+
from haystack_experimental.components.generators import FoobarGenerator
|
|
54
|
+
|
|
55
|
+
c = FoobarGenerator()
|
|
56
|
+
c.run([ChatMessage.from_user("What's an experiment? Be brief.")])
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
Experiments can also override existing Haystack features. For example, users can opt into an experimental type of
|
|
60
|
+
`Pipeline` by just changing the usual import:
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
# from haystack import Pipeline
|
|
64
|
+
from haystack_experimental import Pipeline
|
|
65
|
+
|
|
66
|
+
pipe = Pipeline()
|
|
67
|
+
# ...
|
|
68
|
+
pipe.run(...)
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## Documentation
|
|
72
|
+
|
|
73
|
+
Documentation for `haystack-experimental` can be found [here](https://docs.haystack.deepset.ai/reference/haystack-experimental-api).
|
|
74
|
+
|
|
75
|
+
## Implementation
|
|
76
|
+
|
|
77
|
+
Experiments should replicate the namespace of the core package. For example, a new generator:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
# in haystack_experimental/components/generators/foobar.py
|
|
81
|
+
|
|
82
|
+
from haystack import component
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@component
|
|
86
|
+
class FoobarGenerator:
|
|
87
|
+
...
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
When the experiment overrides an existing feature, the new symbol should be created at the same path in the experimental
|
|
92
|
+
package. This new symbol will override the original in `haystack-ai`: for classes, with a subclass and for bare
|
|
93
|
+
functions, with a wrapper. For example:
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
# in haystack_experiment/src/haystack_experiment/core/pipeline/pipeline.py
|
|
97
|
+
|
|
98
|
+
from haystack.core.pipeline import Pipeline as HaystackPipeline
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class Pipeline(HaystackPipeline):
|
|
102
|
+
# Any new experimental method that doesn't exist in the original class
|
|
103
|
+
def run_async(self, inputs) -> Dict[str, Dict[str, Any]]:
|
|
104
|
+
...
|
|
105
|
+
|
|
106
|
+
# Existing methods with breaking changes to their signature, like adding a new mandatory param
|
|
107
|
+
def to_dict(new_param: str) -> Dict[str, Any]:
|
|
108
|
+
# do something with the new parameter
|
|
109
|
+
print(new_param)
|
|
110
|
+
# call the original method
|
|
111
|
+
return super().to_dict()
|
|
112
|
+
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Contributing
|
|
116
|
+
|
|
117
|
+
Direct contributions to `haystack-experimental` are not expected, but Haystack maintainers might ask contributors to move pull requests that target the [core repository](https://github.com/deepset-ai/haystack) to this repository.
|
haystack_experimental-0.0.2.dev0/haystack_experimental/components/tools/openai/function_caller.py
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
from typing import Any, Callable, Dict, List
|
|
7
|
+
|
|
8
|
+
from haystack import component, default_from_dict, default_to_dict
|
|
9
|
+
from haystack.dataclasses import ChatMessage
|
|
10
|
+
from haystack.utils import deserialize_callable, serialize_callable
|
|
11
|
+
|
|
12
|
+
_FUNCTION_NAME_FAILURE = (
|
|
13
|
+
"I'm sorry, I tried to run a function that did not exist. Would you like me to correct it and try again?"
|
|
14
|
+
)
|
|
15
|
+
_FUNCTION_RUN_FAILURE = "Seems there was an error while running the function: {error}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@component
|
|
19
|
+
class OpenAIFunctionCaller:
|
|
20
|
+
"""
|
|
21
|
+
OpenAIFunctionCaller processes a list of chat messages and call Python functions when needed.
|
|
22
|
+
|
|
23
|
+
The OpenAIFunctionCaller expects a list of ChatMessages and if there is a tool call with a function name and
|
|
24
|
+
arguments, it runs the function and returns the result as a ChatMessage from role = 'function'
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, available_functions: Dict[str, Callable]):
|
|
28
|
+
"""
|
|
29
|
+
Initialize the OpenAIFunctionCaller component.
|
|
30
|
+
|
|
31
|
+
:param available_functions:
|
|
32
|
+
A dictionary of available functions. This dictionary expects key value pairs of function name,
|
|
33
|
+
and the function itself. For example, `{"weather_function": weather_function}`
|
|
34
|
+
"""
|
|
35
|
+
self.available_functions = available_functions
|
|
36
|
+
|
|
37
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Serializes the component to a dictionary.
|
|
40
|
+
|
|
41
|
+
:returns:
|
|
42
|
+
Dictionary with serialized data.
|
|
43
|
+
"""
|
|
44
|
+
available_function_paths = {}
|
|
45
|
+
for name, function in self.available_functions.items():
|
|
46
|
+
available_function_paths[name] = serialize_callable(function)
|
|
47
|
+
serialization_dict = default_to_dict(self, available_functions=available_function_paths)
|
|
48
|
+
return serialization_dict
|
|
49
|
+
|
|
50
|
+
@classmethod
|
|
51
|
+
def from_dict(cls, data: Dict[str, Any]) -> "OpenAIFunctionCaller":
|
|
52
|
+
"""
|
|
53
|
+
Deserializes the component from a dictionary.
|
|
54
|
+
|
|
55
|
+
:param data:
|
|
56
|
+
The dictionary to deserialize from.
|
|
57
|
+
:returns:
|
|
58
|
+
The deserialized component.
|
|
59
|
+
"""
|
|
60
|
+
available_function_paths = data.get("init_parameters", {}).get("available_functions")
|
|
61
|
+
available_functions = {}
|
|
62
|
+
for name, path in available_function_paths.items():
|
|
63
|
+
available_functions[name] = deserialize_callable(path)
|
|
64
|
+
data["init_parameters"]["available_functions"] = available_functions
|
|
65
|
+
return default_from_dict(cls, data)
|
|
66
|
+
|
|
67
|
+
@component.output_types(function_replies=List[ChatMessage], assistant_replies=List[ChatMessage])
|
|
68
|
+
def run(self, messages: List[ChatMessage]):
|
|
69
|
+
"""
|
|
70
|
+
Evaluates `messages` and invokes available functions if the messages contain tool_calls.
|
|
71
|
+
|
|
72
|
+
:param messages: A list of messages generated from the `OpenAIChatGenerator`
|
|
73
|
+
:returns: This component returns a list of messages in one of two outputs
|
|
74
|
+
- `function_replies`: List of ChatMessages containing the result of a function invocation.
|
|
75
|
+
This message comes from role = 'function'. If the function name was hallucinated or wrong,
|
|
76
|
+
an assistant message explaining as such is returned
|
|
77
|
+
- `assistant_replies`: List of ChatMessages containing a regular assistant reply. In this case,
|
|
78
|
+
there were no tool_calls in the received messages
|
|
79
|
+
"""
|
|
80
|
+
if messages[0].meta["finish_reason"] == "tool_calls":
|
|
81
|
+
function_calls = json.loads(messages[0].content)
|
|
82
|
+
for function_call in function_calls:
|
|
83
|
+
function_name = function_call["function"]["name"]
|
|
84
|
+
function_args = json.loads(function_call["function"]["arguments"])
|
|
85
|
+
if function_name in self.available_functions:
|
|
86
|
+
function_to_call = self.available_functions[function_name]
|
|
87
|
+
try:
|
|
88
|
+
function_response = function_to_call(**function_args)
|
|
89
|
+
messages.append(
|
|
90
|
+
ChatMessage.from_function(
|
|
91
|
+
content=json.dumps(function_response),
|
|
92
|
+
name=function_name,
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
# pylint: disable=broad-exception-caught
|
|
96
|
+
except Exception as e:
|
|
97
|
+
messages.append(ChatMessage.from_assistant(_FUNCTION_RUN_FAILURE.format(error=e)))
|
|
98
|
+
else:
|
|
99
|
+
messages.append(ChatMessage.from_assistant(_FUNCTION_NAME_FAILURE))
|
|
100
|
+
return {"function_replies": messages}
|
|
101
|
+
return {"assistant_replies": messages}
|
haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/evalution_harness.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Dict, Generic, Optional, Type, TypeVar
|
|
8
|
+
|
|
9
|
+
from haystack import Pipeline
|
|
10
|
+
from haystack.core.serialization import DeserializationCallbacks
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass
|
|
14
|
+
class EvaluationRunOverrides:
|
|
15
|
+
"""
|
|
16
|
+
Overrides for an evaluation run.
|
|
17
|
+
|
|
18
|
+
Used to override the init parameters of components in either
|
|
19
|
+
(or both) the evaluated and evaluation pipelines. Each key is
|
|
20
|
+
a component name and its value a dictionary with init parameters
|
|
21
|
+
to override.
|
|
22
|
+
|
|
23
|
+
:param evaluated_pipeline_overrides:
|
|
24
|
+
Overrides for the evaluated pipeline.
|
|
25
|
+
:param evaluation_pipeline_overrides:
|
|
26
|
+
Overrides for the evaluation pipeline.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
evaluated_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
|
|
30
|
+
evaluation_pipeline_overrides: Optional[Dict[str, Dict[str, Any]]] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
EvalRunInputT = TypeVar("EvalRunInputT")
|
|
34
|
+
EvalRunOutputT = TypeVar("EvalRunOutputT")
|
|
35
|
+
EvalRunOverridesT = TypeVar("EvalRunOverridesT")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class EvaluationHarness(ABC, Generic[EvalRunInputT, EvalRunOverridesT, EvalRunOutputT]):
|
|
39
|
+
"""
|
|
40
|
+
Executes a pipeline with a given set of parameters, inputs and evaluates its outputs with an evaluation pipeline.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
@staticmethod
|
|
44
|
+
def _override_pipeline(pipeline: Pipeline, parameter_overrides: Optional[Dict[str, Any]]) -> Pipeline:
|
|
45
|
+
def component_pre_init_callback(name: str, cls: Type, init_params: Dict[str, Any]): # pylint: disable=unused-argument
|
|
46
|
+
assert parameter_overrides is not None
|
|
47
|
+
overrides = parameter_overrides.get(name)
|
|
48
|
+
if overrides:
|
|
49
|
+
init_params.update(overrides)
|
|
50
|
+
|
|
51
|
+
def validate_overrides():
|
|
52
|
+
if parameter_overrides is None:
|
|
53
|
+
return
|
|
54
|
+
|
|
55
|
+
pipeline_components = pipeline.inputs(include_components_with_connected_inputs=True).keys()
|
|
56
|
+
for component_name in parameter_overrides.keys():
|
|
57
|
+
if component_name not in pipeline_components:
|
|
58
|
+
raise ValueError(f"Cannot override non-existent component '{component_name}'")
|
|
59
|
+
|
|
60
|
+
callbacks = DeserializationCallbacks(component_pre_init_callback)
|
|
61
|
+
if parameter_overrides:
|
|
62
|
+
validate_overrides()
|
|
63
|
+
serialized_pipeline = pipeline.dumps()
|
|
64
|
+
pipeline = Pipeline.loads(serialized_pipeline, callbacks=callbacks)
|
|
65
|
+
|
|
66
|
+
return pipeline
|
|
67
|
+
|
|
68
|
+
@abstractmethod
|
|
69
|
+
def run(
|
|
70
|
+
self,
|
|
71
|
+
inputs: EvalRunInputT,
|
|
72
|
+
*,
|
|
73
|
+
overrides: Optional[EvalRunOverridesT] = None,
|
|
74
|
+
run_name: Optional[str] = None,
|
|
75
|
+
) -> EvalRunOutputT:
|
|
76
|
+
"""
|
|
77
|
+
Launch a evaluation run.
|
|
78
|
+
|
|
79
|
+
:param inputs:
|
|
80
|
+
Inputs to the evaluated and evaluation pipelines.
|
|
81
|
+
:param overrides:
|
|
82
|
+
Overrides for the harness.
|
|
83
|
+
:param run_name:
|
|
84
|
+
A name for the evaluation run.
|
|
85
|
+
:returns:
|
|
86
|
+
The output of the evaluation pipeline.
|
|
87
|
+
"""
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from .harness import RAGEvaluationHarness
|
|
6
|
+
from .parameters import (
|
|
7
|
+
RAGEvaluationInput,
|
|
8
|
+
RAGEvaluationMetric,
|
|
9
|
+
RAGEvaluationOutput,
|
|
10
|
+
RAGEvaluationOverrides,
|
|
11
|
+
RAGExpectedComponent,
|
|
12
|
+
RAGExpectedComponentMetadata,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_all_ = [
|
|
16
|
+
"RAGEvaluationHarness",
|
|
17
|
+
"RAGExpectedComponent",
|
|
18
|
+
"RAGExpectedComponentMetadata",
|
|
19
|
+
"RAGEvaluationMetric",
|
|
20
|
+
"RAGEvaluationOutput",
|
|
21
|
+
"RAGEvaluationOverrides",
|
|
22
|
+
"RAGEvaluationInput",
|
|
23
|
+
]
|
haystack_experimental-0.0.2.dev0/haystack_experimental/evaluation/harness/rag/evaluation_pipeline.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
from functools import partial
|
|
6
|
+
from typing import Set
|
|
7
|
+
|
|
8
|
+
from haystack import Pipeline
|
|
9
|
+
from haystack.components.evaluators import (
|
|
10
|
+
ContextRelevanceEvaluator,
|
|
11
|
+
DocumentMAPEvaluator,
|
|
12
|
+
DocumentMRREvaluator,
|
|
13
|
+
DocumentRecallEvaluator,
|
|
14
|
+
FaithfulnessEvaluator,
|
|
15
|
+
SASEvaluator,
|
|
16
|
+
)
|
|
17
|
+
from haystack.components.evaluators.document_recall import RecallMode
|
|
18
|
+
|
|
19
|
+
from .parameters import RAGEvaluationMetric
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def default_rag_evaluation_pipeline(
|
|
23
|
+
metrics: Set[RAGEvaluationMetric],
|
|
24
|
+
) -> Pipeline:
|
|
25
|
+
"""
|
|
26
|
+
Builds the default evaluation pipeline for RAG.
|
|
27
|
+
|
|
28
|
+
:param metrics:
|
|
29
|
+
The set of metrics to include in the pipeline.
|
|
30
|
+
:returns:
|
|
31
|
+
The evaluation pipeline.
|
|
32
|
+
"""
|
|
33
|
+
pipeline = Pipeline()
|
|
34
|
+
|
|
35
|
+
metric_ctors = {
|
|
36
|
+
RAGEvaluationMetric.DOCUMENT_MAP: DocumentMAPEvaluator,
|
|
37
|
+
RAGEvaluationMetric.DOCUMENT_MRR: DocumentMRREvaluator,
|
|
38
|
+
RAGEvaluationMetric.DOCUMENT_RECALL_SINGLE_HIT: partial(
|
|
39
|
+
DocumentRecallEvaluator, mode=RecallMode.SINGLE_HIT
|
|
40
|
+
),
|
|
41
|
+
RAGEvaluationMetric.DOCUMENT_RECALL_MULTI_HIT: partial(
|
|
42
|
+
DocumentRecallEvaluator, mode=RecallMode.MULTI_HIT
|
|
43
|
+
),
|
|
44
|
+
RAGEvaluationMetric.SEMANTIC_ANSWER_SIMILARITY: partial(
|
|
45
|
+
SASEvaluator, model="sentence-transformers/all-MiniLM-L6-v2"
|
|
46
|
+
),
|
|
47
|
+
RAGEvaluationMetric.ANSWER_FAITHFULNESS: partial(FaithfulnessEvaluator, raise_on_failure=False),
|
|
48
|
+
RAGEvaluationMetric.CONTEXT_RELEVANCE: partial(ContextRelevanceEvaluator, raise_on_failure=False),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
for metric in metrics:
|
|
52
|
+
ctor = metric_ctors[metric]
|
|
53
|
+
pipeline.add_component(metric.value, ctor())
|
|
54
|
+
|
|
55
|
+
return pipeline
|