graphrag-eval 5.1.2__tar.gz → 5.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/PKG-INFO +2 -1
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/README.md +1 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/evaluation.py +2 -2
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/sparql.py +10 -7
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/pyproject.toml +1 -1
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/LICENSE +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/__init__.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/aggregation.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/answer_correctness.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/answer_relevance.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/evaluation.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/prompts/template.md +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/__init__.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/retrieval_answer.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/retrieval_context_ids.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/steps/retrieval_context_texts.py +0 -0
- {graphrag_eval-5.1.2 → graphrag_eval-5.2.0}/graphrag_eval/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: graphrag-eval
|
|
3
|
-
Version: 5.
|
|
3
|
+
Version: 5.2.0
|
|
4
4
|
Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Philip Ganchev
|
|
@@ -107,6 +107,7 @@ Each step includes:
|
|
|
107
107
|
- `output_media_type`: (optional, missing or one of `application/sparql-results+json`, `application/json`) Indicates how the output of a step must be processed
|
|
108
108
|
- `ordered`: (optional, defaults to `false`) For SPARQL query results, whether results order matters. `true` means that the actual result rows must be ordered as the reference result; `false` means that result rows are matched as a set.
|
|
109
109
|
- `required_columns`: (optional) - required only for SPARQL query results; list of binding names, which are required for SPARQL query results to match
|
|
110
|
+
- `ignore_duplicates`: (optional, defaults to `true`) For SPARQL query results, whether duplicate binding values in the expected or in the actual results should be ignored for the comparison.
|
|
110
111
|
|
|
111
112
|
#### Reference Data
|
|
112
113
|
|
|
@@ -87,6 +87,7 @@ Each step includes:
|
|
|
87
87
|
- `output_media_type`: (optional, missing or one of `application/sparql-results+json`, `application/json`) Indicates how the output of a step must be processed
|
|
88
88
|
- `ordered`: (optional, defaults to `false`) For SPARQL query results, whether results order matters. `true` means that the actual result rows must be ordered as the reference result; `false` means that result rows are matched as a set.
|
|
89
89
|
- `required_columns`: (optional) - required only for SPARQL query results; list of binding names, which are required for SPARQL query results to match
|
|
90
|
+
- `ignore_duplicates`: (optional, defaults to `true`) For SPARQL query results, whether duplicate binding values in the expected or in the actual results should be ignored for the comparison.
|
|
90
91
|
|
|
91
92
|
#### Reference Data
|
|
92
93
|
|
|
@@ -1,12 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from typing import Any
|
|
4
3
|
from collections.abc import Sequence
|
|
4
|
+
from typing import Any
|
|
5
5
|
|
|
6
6
|
from .retrieval_context_ids import recall_at_k
|
|
7
7
|
from .sparql import compare_sparql_results
|
|
8
8
|
|
|
9
|
-
|
|
10
9
|
Match = tuple[int, int, int, float]
|
|
11
10
|
Step = dict[str, Any]
|
|
12
11
|
StepsGroup = Sequence[Step] # We will index into a group
|
|
@@ -23,6 +22,7 @@ def compare_steps_outputs(reference_step: Step, actual_step: Step) -> float:
|
|
|
23
22
|
json.loads(actual_output),
|
|
24
23
|
reference_step["required_columns"],
|
|
25
24
|
reference_step.get("ordered", False),
|
|
25
|
+
reference_step.get("ignore_duplicates", True),
|
|
26
26
|
)
|
|
27
27
|
if reference_step.get("output_media_type") == "application/json":
|
|
28
28
|
return float(json.loads(reference_output) == json.loads(actual_output))
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
from collections import Counter
|
|
2
|
-
from typing import Union
|
|
3
1
|
import itertools
|
|
4
2
|
import math
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from typing import Union
|
|
5
5
|
|
|
6
6
|
XSD_NUMERIC_TYPES = {
|
|
7
7
|
"http://www.w3.org/2001/XMLSchema#integer",
|
|
@@ -35,7 +35,7 @@ def truncate(number: float, decimals: int = 0) -> float:
|
|
|
35
35
|
elif decimals == 0:
|
|
36
36
|
return math.trunc(number)
|
|
37
37
|
|
|
38
|
-
factor = 10.0**decimals
|
|
38
|
+
factor = 10.0 ** decimals
|
|
39
39
|
return math.trunc(number * factor) / factor
|
|
40
40
|
|
|
41
41
|
|
|
@@ -137,8 +137,8 @@ def compare_values(
|
|
|
137
137
|
actual_vars: Union[list[str], tuple[str, ...]],
|
|
138
138
|
actual_var_to_values: dict[str, list],
|
|
139
139
|
results_are_ordered: bool,
|
|
140
|
+
ignore_duplicates: bool,
|
|
140
141
|
) -> bool:
|
|
141
|
-
|
|
142
142
|
if len(reference_vars) < len(actual_vars):
|
|
143
143
|
for combination in itertools.combinations(actual_vars, len(reference_vars)):
|
|
144
144
|
if compare_values(
|
|
@@ -147,6 +147,7 @@ def compare_values(
|
|
|
147
147
|
combination,
|
|
148
148
|
actual_var_to_values,
|
|
149
149
|
results_are_ordered,
|
|
150
|
+
ignore_duplicates,
|
|
150
151
|
):
|
|
151
152
|
return True
|
|
152
153
|
return False
|
|
@@ -154,9 +155,9 @@ def compare_values(
|
|
|
154
155
|
table = convert_table_dict2lines(reference_vars, reference_var_to_values)
|
|
155
156
|
for permutation in itertools.permutations(actual_vars):
|
|
156
157
|
actual_table = convert_table_dict2lines(permutation, actual_var_to_values)
|
|
157
|
-
if (results_are_ordered and table == actual_table) or
|
|
158
|
-
not results_are_ordered and
|
|
159
|
-
|
|
158
|
+
if (results_are_ordered and table == actual_table) or \
|
|
159
|
+
((not results_are_ordered) and ignore_duplicates and set(table) == set(actual_table)) or \
|
|
160
|
+
((not results_are_ordered) and (not ignore_duplicates) and Counter(table) == Counter(actual_table)):
|
|
160
161
|
return True
|
|
161
162
|
|
|
162
163
|
return False
|
|
@@ -167,6 +168,7 @@ def compare_sparql_results(
|
|
|
167
168
|
actual_sparql_result: dict,
|
|
168
169
|
required_vars: list[str],
|
|
169
170
|
results_are_ordered: bool = False,
|
|
171
|
+
ignore_duplicates: bool = True,
|
|
170
172
|
) -> float:
|
|
171
173
|
# DESCRIBE results
|
|
172
174
|
if isinstance(actual_sparql_result, str):
|
|
@@ -208,5 +210,6 @@ def compare_sparql_results(
|
|
|
208
210
|
actual_vars,
|
|
209
211
|
actual_var_to_values,
|
|
210
212
|
results_are_ordered,
|
|
213
|
+
ignore_duplicates,
|
|
211
214
|
)
|
|
212
215
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "graphrag-eval"
|
|
3
|
-
version = "5.
|
|
3
|
+
version = "5.2.0"
|
|
4
4
|
description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|