graphrag-eval 5.0.1__tar.gz → 5.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/PKG-INFO +4 -4
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/README.md +1 -1
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/answer_correctness.py +1 -1
- graphrag_eval-5.0.2/graphrag_eval/prompts/template.md +26 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/pyproject.toml +4 -3
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/LICENSE +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/__init__.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/aggregation.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/answer_relevance.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/evaluation.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/steps/__init__.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/steps/retrieval_answer.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/steps/retrieval_context_ids.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/steps/retrieval_context_texts.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/steps/sparql.py +0 -0
- {graphrag_eval-5.0.1 → graphrag_eval-5.0.2}/graphrag_eval/util.py +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: graphrag-eval
|
|
3
|
-
Version: 5.0.
|
|
3
|
+
Version: 5.0.2
|
|
4
4
|
Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
|
|
5
5
|
License: Apache-2.0
|
|
6
|
-
Author:
|
|
7
|
-
Author-email:
|
|
6
|
+
Author: Philip Ganchev
|
|
7
|
+
Author-email: philip.ganchev@graphwise.ai
|
|
8
8
|
Requires-Python: >=3.12,<3.13
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
@@ -74,7 +74,7 @@ To evaluate answers and/or steps:
|
|
|
74
74
|
1. Format the answers and/or steps you want to evaluate: section [Evaluation Target Corpus](#Evaluation-Target-Corpus)
|
|
75
75
|
1. To evaluate answer relevance:
|
|
76
76
|
1. Include `actual_answer` in the target data to evaluate
|
|
77
|
-
1. Set environment
|
|
77
|
+
1. Set environment variable `OPENAI_API_KEY` appropriately
|
|
78
78
|
1. To evaluate answer correctness:
|
|
79
79
|
1. Include `reference_answer` in the reference corpus and `actual_answer` in the target data to evaluate
|
|
80
80
|
1. Set environment variable `OPENAI_API_KEY` appropriately
|
|
@@ -56,7 +56,7 @@ To evaluate answers and/or steps:
|
|
|
56
56
|
1. Format the answers and/or steps you want to evaluate: section [Evaluation Target Corpus](#Evaluation-Target-Corpus)
|
|
57
57
|
1. To evaluate answer relevance:
|
|
58
58
|
1. Include `actual_answer` in the target data to evaluate
|
|
59
|
-
1. Set environment
|
|
59
|
+
1. Set environment variable `OPENAI_API_KEY` appropriately
|
|
60
60
|
1. To evaluate answer correctness:
|
|
61
61
|
1. Include `reference_answer` in the reference corpus and `actual_answer` in the target data to evaluate
|
|
62
62
|
1. Set environment variable `OPENAI_API_KEY` appropriately
|
|
@@ -6,7 +6,7 @@ from tqdm import tqdm
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
IN_FILE_PATH = "../data/data-1.tsv"
|
|
9
|
-
PROMPT_FILE_PATH = "prompts/template.md"
|
|
9
|
+
PROMPT_FILE_PATH = Path(__file__).parent / "prompts" / "template.md"
|
|
10
10
|
OUT_FILE_PATH = "results/data-1.tsv"
|
|
11
11
|
OUT_FIELDS = ["#Reference", "#PTarget", "#Matching", "Reasoning", "Error"]
|
|
12
12
|
LLM_MODEL = "gpt-4o-mini"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
Below are a query, a reference response and a candidate response to it.
|
|
2
|
+
1. Extract all claims from each response
|
|
3
|
+
2. Find matching claims between responses. Matching claims have the same meaning and details such as subjects, names, locations, amounts, IDs, commands and paths.
|
|
4
|
+
3. Output the values listed below (all and only those).
|
|
5
|
+
|
|
6
|
+
# Query
|
|
7
|
+
{question}
|
|
8
|
+
|
|
9
|
+
# Reference response
|
|
10
|
+
{reference_answer}
|
|
11
|
+
|
|
12
|
+
# Candidate response
|
|
13
|
+
{candidate_answer}
|
|
14
|
+
|
|
15
|
+
# Output values
|
|
16
|
+
* v1: Count of reference response claims
|
|
17
|
+
* v2: Count of candidate response claims
|
|
18
|
+
* v3: Count of matching claims
|
|
19
|
+
* v4: Explanation of v1-v3 in English
|
|
20
|
+
|
|
21
|
+
# Value checks
|
|
22
|
+
* 1 <= v1, v2
|
|
23
|
+
* 0 <= v3 <= v1, v2
|
|
24
|
+
|
|
25
|
+
# Output format
|
|
26
|
+
<v1><tab><v2><tab><v3><tab><v4>
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "graphrag-eval"
|
|
3
|
-
version = "5.0.
|
|
3
|
+
version = "5.0.2"
|
|
4
4
|
description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
|
|
5
5
|
authors = [
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
{ name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
|
|
7
|
+
{ name = "Aleksis Datseris", email = "aleksis.datseris@graphwise.ai" },
|
|
8
|
+
{ name = "Neli Hateva", email = "neli.hateva@graphwise.ai" },
|
|
8
9
|
]
|
|
9
10
|
readme = "README.md"
|
|
10
11
|
license = "Apache-2.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|