graphrag-eval 5.0.1__tar.gz → 5.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: graphrag-eval
3
- Version: 5.0.1
3
+ Version: 5.0.2
4
4
  Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
5
5
  License: Apache-2.0
6
- Author: Neli Hateva
7
- Author-email: neli.hateva@graphwise.ai
6
+ Author: Philip Ganchev
7
+ Author-email: philip.ganchev@graphwise.ai
8
8
  Requires-Python: >=3.12,<3.13
9
9
  Classifier: License :: OSI Approved :: Apache Software License
10
10
  Classifier: Programming Language :: Python :: 3
@@ -74,7 +74,7 @@ To evaluate answers and/or steps:
74
74
  1. Format the answers and/or steps you want to evaluate: section [Evaluation Target Corpus](#Evaluation-Target-Corpus)
75
75
  1. To evaluate answer relevance:
76
76
  1. Include `actual_answer` in the target data to evaluate
77
- 1. Set environment vairabe `OPENAI_API_KEY` appropriately
77
+ 1. Set environment variable `OPENAI_API_KEY` appropriately
78
78
  1. To evaluate answer correctness:
79
79
  1. Include `reference_answer` in the reference corpus and `actual_answer` in the target data to evaluate
80
80
  1. Set environment variable `OPENAI_API_KEY` appropriately
@@ -56,7 +56,7 @@ To evaluate answers and/or steps:
56
56
  1. Format the answers and/or steps you want to evaluate: section [Evaluation Target Corpus](#Evaluation-Target-Corpus)
57
57
  1. To evaluate answer relevance:
58
58
  1. Include `actual_answer` in the target data to evaluate
59
- 1. Set environment vairabe `OPENAI_API_KEY` appropriately
59
+ 1. Set environment variable `OPENAI_API_KEY` appropriately
60
60
  1. To evaluate answer correctness:
61
61
  1. Include `reference_answer` in the reference corpus and `actual_answer` in the target data to evaluate
62
62
  1. Set environment variable `OPENAI_API_KEY` appropriately
@@ -6,7 +6,7 @@ from tqdm import tqdm
6
6
 
7
7
 
8
8
  IN_FILE_PATH = "../data/data-1.tsv"
9
- PROMPT_FILE_PATH = "prompts/template.md"
9
+ PROMPT_FILE_PATH = Path(__file__).parent / "prompts" / "template.md"
10
10
  OUT_FILE_PATH = "results/data-1.tsv"
11
11
  OUT_FIELDS = ["#Reference", "#PTarget", "#Matching", "Reasoning", "Error"]
12
12
  LLM_MODEL = "gpt-4o-mini"
@@ -0,0 +1,26 @@
1
+ Below are a query, a reference response and a candidate response to it.
2
+ 1. Extract all claims from each response
3
+ 2. Find matching claims between responses. Matching claims have the same meaning and details such as subjects, names, locations, amounts, IDs, commands and paths.
4
+ 3. Output the values listed below (all and only those).
5
+
6
+ # Query
7
+ {question}
8
+
9
+ # Reference response
10
+ {reference_answer}
11
+
12
+ # Candidate response
13
+ {candidate_answer}
14
+
15
+ # Output values
16
+ * v1: Count of reference response claims
17
+ * v2: Count of candidate response claims
18
+ * v3: Count of matching claims
19
+ * v4: Explanation of v1-v3 in English
20
+
21
+ # Value checks
22
+ * 1 <= v1, v2
23
+ * 0 <= v3 <= v1, v2
24
+
25
+ # Output format
26
+ <v1><tab><v2><tab><v3><tab><v4>
@@ -1,10 +1,11 @@
1
1
  [project]
2
2
  name = "graphrag-eval"
3
- version = "5.0.1"
3
+ version = "5.0.2"
4
4
  description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
5
5
  authors = [
6
- {name = "Neli Hateva", email = "neli.hateva@graphwise.ai"},
7
- {name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai"}
6
+ { name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
7
+ { name = "Aleksis Datseris", email = "aleksis.datseris@graphwise.ai" },
8
+ { name = "Neli Hateva", email = "neli.hateva@graphwise.ai" },
8
9
  ]
9
10
  readme = "README.md"
10
11
  license = "Apache-2.0"
File without changes