graphrag-eval 5.1.1__tar.gz → 5.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: graphrag-eval
3
- Version: 5.1.1
3
+ Version: 5.1.2
4
4
  Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
5
5
  License: Apache-2.0
6
6
  Author: Philip Ganchev
@@ -4,7 +4,6 @@ from collections.abc import Sequence
4
4
  from statistics import mean, median
5
5
  from typing import Any, Collection, Iterable
6
6
 
7
-
8
7
  METRICS = [
9
8
  "answer_recall",
10
9
  "answer_precision",
@@ -135,7 +134,7 @@ def compute_micro_stats(
135
134
  number_of_samples_per_template_by_status,
136
135
  stats_per_template,
137
136
  step_metrics_per_template
138
- ):
137
+ ) -> dict:
139
138
  values = number_of_samples_per_template_by_status.values()
140
139
  micro_summary = defaultdict(dict, {
141
140
  "number_of_error_samples": sum(v["error"] for v in values),
@@ -158,7 +157,7 @@ def compute_micro_stats(
158
157
  micro_step_metrics[metric].extend(values)
159
158
  for metric, values in micro_step_metrics.items():
160
159
  micro_summary[metric] = stats_for_series(values)
161
- return micro_summary
160
+ return dict(micro_summary)
162
161
 
163
162
 
164
163
  def compute_macro_stats(
@@ -182,7 +181,7 @@ def compute_macro_stats(
182
181
  macro_step_metrics[metric].append(stats["mean"])
183
182
  for metric, values in macro_step_metrics.items():
184
183
  macro_summary[metric]["mean"] = mean(values or [0])
185
- return macro_summary
184
+ return dict(macro_summary)
186
185
 
187
186
 
188
187
  def compute_aggregates(samples: list[dict]) -> dict:
@@ -205,18 +204,19 @@ def compute_aggregates(samples: list[dict]) -> dict:
205
204
  update_steps_summary(sample, steps_summary_per_template[template_id])
206
205
  update_step_metrics(sample, step_metrics_per_template[template_id])
207
206
 
208
- summary = {}
209
- summary["per_template"] = compute_per_template_stats(
210
- templates_ids,
211
- number_of_samples_per_template_by_status,
212
- stats_per_template,
213
- steps_summary_per_template,
214
- step_metrics_per_template,
215
- )
216
- summary["micro"] = compute_micro_stats(
217
- number_of_samples_per_template_by_status,
218
- stats_per_template,
219
- step_metrics_per_template
220
- )
207
+ summary = {
208
+ "per_template": compute_per_template_stats(
209
+ templates_ids,
210
+ number_of_samples_per_template_by_status,
211
+ stats_per_template,
212
+ steps_summary_per_template,
213
+ step_metrics_per_template,
214
+ ),
215
+ "micro": compute_micro_stats(
216
+ number_of_samples_per_template_by_status,
217
+ stats_per_template,
218
+ step_metrics_per_template
219
+ )
220
+ }
221
221
  summary["macro"] = compute_macro_stats(summary["per_template"])
222
222
  return summary
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "graphrag-eval"
3
- version = "5.1.1"
3
+ version = "5.1.2"
4
4
  description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
5
5
  authors = [
6
6
  { name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
File without changes
File without changes