graphrag-eval 5.1.1__tar.gz → 5.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/PKG-INFO +1 -1
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/aggregation.py +17 -17
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/pyproject.toml +1 -1
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/LICENSE +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/README.md +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/__init__.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/answer_correctness.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/answer_relevance.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/evaluation.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/prompts/template.md +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/__init__.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/evaluation.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/retrieval_answer.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/retrieval_context_ids.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/retrieval_context_texts.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/steps/sparql.py +0 -0
- {graphrag_eval-5.1.1 → graphrag_eval-5.1.2}/graphrag_eval/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: graphrag-eval
|
|
3
|
-
Version: 5.1.
|
|
3
|
+
Version: 5.1.2
|
|
4
4
|
Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Philip Ganchev
|
|
@@ -4,7 +4,6 @@ from collections.abc import Sequence
|
|
|
4
4
|
from statistics import mean, median
|
|
5
5
|
from typing import Any, Collection, Iterable
|
|
6
6
|
|
|
7
|
-
|
|
8
7
|
METRICS = [
|
|
9
8
|
"answer_recall",
|
|
10
9
|
"answer_precision",
|
|
@@ -135,7 +134,7 @@ def compute_micro_stats(
|
|
|
135
134
|
number_of_samples_per_template_by_status,
|
|
136
135
|
stats_per_template,
|
|
137
136
|
step_metrics_per_template
|
|
138
|
-
):
|
|
137
|
+
) -> dict:
|
|
139
138
|
values = number_of_samples_per_template_by_status.values()
|
|
140
139
|
micro_summary = defaultdict(dict, {
|
|
141
140
|
"number_of_error_samples": sum(v["error"] for v in values),
|
|
@@ -158,7 +157,7 @@ def compute_micro_stats(
|
|
|
158
157
|
micro_step_metrics[metric].extend(values)
|
|
159
158
|
for metric, values in micro_step_metrics.items():
|
|
160
159
|
micro_summary[metric] = stats_for_series(values)
|
|
161
|
-
return micro_summary
|
|
160
|
+
return dict(micro_summary)
|
|
162
161
|
|
|
163
162
|
|
|
164
163
|
def compute_macro_stats(
|
|
@@ -182,7 +181,7 @@ def compute_macro_stats(
|
|
|
182
181
|
macro_step_metrics[metric].append(stats["mean"])
|
|
183
182
|
for metric, values in macro_step_metrics.items():
|
|
184
183
|
macro_summary[metric]["mean"] = mean(values or [0])
|
|
185
|
-
return macro_summary
|
|
184
|
+
return dict(macro_summary)
|
|
186
185
|
|
|
187
186
|
|
|
188
187
|
def compute_aggregates(samples: list[dict]) -> dict:
|
|
@@ -205,18 +204,19 @@ def compute_aggregates(samples: list[dict]) -> dict:
|
|
|
205
204
|
update_steps_summary(sample, steps_summary_per_template[template_id])
|
|
206
205
|
update_step_metrics(sample, step_metrics_per_template[template_id])
|
|
207
206
|
|
|
208
|
-
summary = {
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
207
|
+
summary = {
|
|
208
|
+
"per_template": compute_per_template_stats(
|
|
209
|
+
templates_ids,
|
|
210
|
+
number_of_samples_per_template_by_status,
|
|
211
|
+
stats_per_template,
|
|
212
|
+
steps_summary_per_template,
|
|
213
|
+
step_metrics_per_template,
|
|
214
|
+
),
|
|
215
|
+
"micro": compute_micro_stats(
|
|
216
|
+
number_of_samples_per_template_by_status,
|
|
217
|
+
stats_per_template,
|
|
218
|
+
step_metrics_per_template
|
|
219
|
+
)
|
|
220
|
+
}
|
|
221
221
|
summary["macro"] = compute_macro_stats(summary["per_template"])
|
|
222
222
|
return summary
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "graphrag-eval"
|
|
3
|
-
version = "5.1.
|
|
3
|
+
version = "5.1.2"
|
|
4
4
|
description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
|
|
5
5
|
authors = [
|
|
6
6
|
{ name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|