graphrag-eval 5.0.2__tar.gz → 5.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/PKG-INFO +20 -11
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/README.md +13 -6
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/answer_relevance.py +1 -1
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/steps/retrieval_answer.py +10 -7
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/steps/retrieval_context_texts.py +3 -3
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/steps/sparql.py +0 -1
- graphrag_eval-5.1.0/pyproject.toml +51 -0
- graphrag_eval-5.0.2/pyproject.toml +0 -47
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/LICENSE +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/__init__.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/aggregation.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/answer_correctness.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/evaluation.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/prompts/template.md +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/steps/__init__.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/steps/retrieval_context_ids.py +0 -0
- {graphrag_eval-5.0.2 → graphrag_eval-5.1.0}/graphrag_eval/util.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: graphrag-eval
|
|
3
|
-
Version: 5.0
|
|
3
|
+
Version: 5.1.0
|
|
4
4
|
Summary: For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps.
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Philip Ganchev
|
|
@@ -9,10 +9,12 @@ Requires-Python: >=3.12,<3.13
|
|
|
9
9
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
10
|
Classifier: Programming Language :: Python :: 3
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.12
|
|
12
|
-
Provides-Extra:
|
|
13
|
-
Requires-Dist:
|
|
14
|
-
Requires-Dist:
|
|
15
|
-
Requires-Dist:
|
|
12
|
+
Provides-Extra: ragas
|
|
13
|
+
Requires-Dist: langchain-openai (==0.3.7) ; extra == "ragas"
|
|
14
|
+
Requires-Dist: langchain_community (==0.3.18) ; extra == "ragas"
|
|
15
|
+
Requires-Dist: langevals[ragas] (==0.1.8) ; extra == "ragas"
|
|
16
|
+
Requires-Dist: litellm (==1.61.20) ; extra == "ragas"
|
|
17
|
+
Requires-Dist: ragas (==0.2.9) ; extra == "ragas"
|
|
16
18
|
Project-URL: Repository, https://github.com/Ontotext-AD/graphrag-eval
|
|
17
19
|
Description-Content-Type: text/markdown
|
|
18
20
|
|
|
@@ -43,12 +45,12 @@ graphrag-eval = "*"
|
|
|
43
45
|
To evaluate answer relevance and answer correctness:
|
|
44
46
|
|
|
45
47
|
```bash
|
|
46
|
-
pip install 'graphrag-eval[
|
|
48
|
+
pip install 'graphrag-eval[ragas]'
|
|
47
49
|
```
|
|
48
50
|
|
|
49
51
|
or add the following dependency in your `pyproject.toml` file:
|
|
50
52
|
```toml
|
|
51
|
-
graphrag-eval = {version = "*", extras = ["
|
|
53
|
+
graphrag-eval = {version = "*", extras = ["ragas"]}
|
|
52
54
|
```
|
|
53
55
|
|
|
54
56
|
## Maintainers
|
|
@@ -61,7 +63,7 @@ For issues or feature requests, please open [a GitHub issue](https://github.com/
|
|
|
61
63
|
To evaluate only correctness of final answers (system responses), you can clone this repository and run the code on the command line:
|
|
62
64
|
|
|
63
65
|
1. Prepare an input TSV file with columns `Question`, `Reference answer` and `Actual answer`
|
|
64
|
-
1. Execute `poetry install --with
|
|
66
|
+
1. Execute `poetry install --with ragas`
|
|
65
67
|
1. Execute `OPENAI_API_KEY=<your_api_key> poetry run answer-correctness -i <input_file.tsv> -o <output_file.tsv>`
|
|
66
68
|
|
|
67
69
|
We plan to improve CLI support in future releases.
|
|
@@ -445,7 +447,6 @@ The output is a list of statistics for each question from the reference Q&A data
|
|
|
445
447
|
retrieval_answer_recall_reason: The context contains all the transformers listed in the reference answer
|
|
446
448
|
retrieval_answer_recall_cost: 0.0007
|
|
447
449
|
retrieval_answer_precision: 1.0
|
|
448
|
-
retrieval_answer_precision_reason: The context contains only transformers listed in the reference answer
|
|
449
450
|
retrieval_answer_precision_cost: 0.0003
|
|
450
451
|
retrieval_answer_f1: 1.0
|
|
451
452
|
retrieval_answer_f1_cost: 0.001
|
|
@@ -570,7 +571,6 @@ All `actual_steps` with `name` "retrieval" contain:
|
|
|
570
571
|
- `retrieval_answer_recall_error`: (optional) error message if `retrieval_answer_recall` evaluation fails
|
|
571
572
|
- `retrieval_answer_recall_cost`: cost of evaluating `retrieval_answer_recall`, in US dollars
|
|
572
573
|
- `retrieval_answer_precision`: (optional) precision of the retrieved context with respect to the reference answer, if evaluation succeeds
|
|
573
|
-
- `retrieval_answer_precision_reason`: (optional) LLM reasoning in evaluating `retrieval_answer_precision`
|
|
574
574
|
- `retrieval_answer_precision_error`: (optional) error message if `retrieval_answer_precision` evaluation fails
|
|
575
575
|
- `retrieval_answer_precision_cost`: cost of evaluating `retrieval_answer_precision`, in US dollars
|
|
576
576
|
- `retrieval_answer_f1`: (optional) F1 score of the retrieved context with respect to the reference answer, if `retrieval_answer_recall` and `retrieval_answer_precision` succeed
|
|
@@ -605,6 +605,9 @@ Aggregates are:
|
|
|
605
605
|
- `once_per_sample`: how many times each step was executed, counted only once per question
|
|
606
606
|
- `empty_results`: how many times the step was executed and returned empty results
|
|
607
607
|
- `errors`: how many times the step was executed and resulted in error
|
|
608
|
+
- `retrieval_answer_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_recall` for all successful questions in this template
|
|
609
|
+
- `retrieval_answer_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_precision` for all successful questions in this template
|
|
610
|
+
- `retrieval_answer_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_f1` for all successful questions in this template
|
|
608
611
|
- `retrieval_context_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_recall` for all successful questions in this template
|
|
609
612
|
- `retrieval_context_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_precision` for all successful questions in this template
|
|
610
613
|
- `retrieval_context_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_f1` for all successful questions in this template
|
|
@@ -620,6 +623,9 @@ Aggregates are:
|
|
|
620
623
|
- `answer_f1`: `sum`, `mean`, `median`, `min` and `max` for `answer_f1` of all successful questions
|
|
621
624
|
- `answer_relevance`: `sum`, `mean`, `median`, `min` and `max` statistics for `answer_relevance` of all successful questions
|
|
622
625
|
- `answer_relevance_cost`: `sum`, `mean`, `median`, `min` and `max` statistics for `answer_relevance_cost` of all successful questions
|
|
626
|
+
- `retrieval_answer_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_recall` of all successful questions
|
|
627
|
+
- `retrieval_answer_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_precision` of all successful questions
|
|
628
|
+
- `retrieval_answer_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_f1` of all successful questions
|
|
623
629
|
- `retrieval_context_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_recall` of all successful questions
|
|
624
630
|
- `retrieval_context_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_precision` of all successful questions
|
|
625
631
|
- `retrieval_context_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_f1` of all successful questions
|
|
@@ -634,6 +640,9 @@ Aggregates are:
|
|
|
634
640
|
- `answer_f1`: `mean` for `answer_f1`
|
|
635
641
|
- `answer_relevance`: `mean` for `answer_relevance`
|
|
636
642
|
- `answer_relevance_cost`: `mean` for `answer_relevance_cost`
|
|
643
|
+
- `retrieval_answer_recall`: `mean` for `retrieval_answer_recall`
|
|
644
|
+
- `retrieval_answer_precision`: `mean` for `retrieval_answer_precision`
|
|
645
|
+
- `retrieval_answer_f1`: `mean` for `retrieval_answer_f1`
|
|
637
646
|
- `retrieval_context_recall`: `mean` for `retrieval_context_recall`
|
|
638
647
|
- `retrieval_context_precision`: `mean` for `retrieval_context_precision`
|
|
639
648
|
- `retrieval_context_f1`: `mean` for `retrieval_context_f1`
|
|
@@ -1031,7 +1040,7 @@ The following metrics are based on the content of retrieved documents.
|
|
|
1031
1040
|
|
|
1032
1041
|
#### Context Recall@k
|
|
1033
1042
|
|
|
1034
|
-
The fraction of relevant items among the top *k* recommendations. It answers the question: "Of all items the user cares about, how many did we
|
|
1043
|
+
The fraction of relevant items among the top *k* recommendations. It answers the question: "Of all items the user cares about, how many did we include in the first k spots?"
|
|
1035
1044
|
* **Formula**:
|
|
1036
1045
|
$`
|
|
1037
1046
|
\frac{\text{Number of relevant items in top k}}{\text{Number of relevant items}}
|
|
@@ -25,12 +25,12 @@ graphrag-eval = "*"
|
|
|
25
25
|
To evaluate answer relevance and answer correctness:
|
|
26
26
|
|
|
27
27
|
```bash
|
|
28
|
-
pip install 'graphrag-eval[
|
|
28
|
+
pip install 'graphrag-eval[ragas]'
|
|
29
29
|
```
|
|
30
30
|
|
|
31
31
|
or add the following dependency in your `pyproject.toml` file:
|
|
32
32
|
```toml
|
|
33
|
-
graphrag-eval = {version = "*", extras = ["
|
|
33
|
+
graphrag-eval = {version = "*", extras = ["ragas"]}
|
|
34
34
|
```
|
|
35
35
|
|
|
36
36
|
## Maintainers
|
|
@@ -43,7 +43,7 @@ For issues or feature requests, please open [a GitHub issue](https://github.com/
|
|
|
43
43
|
To evaluate only correctness of final answers (system responses), you can clone this repository and run the code on the command line:
|
|
44
44
|
|
|
45
45
|
1. Prepare an input TSV file with columns `Question`, `Reference answer` and `Actual answer`
|
|
46
|
-
1. Execute `poetry install --with
|
|
46
|
+
1. Execute `poetry install --with ragas`
|
|
47
47
|
1. Execute `OPENAI_API_KEY=<your_api_key> poetry run answer-correctness -i <input_file.tsv> -o <output_file.tsv>`
|
|
48
48
|
|
|
49
49
|
We plan to improve CLI support in future releases.
|
|
@@ -427,7 +427,6 @@ The output is a list of statistics for each question from the reference Q&A data
|
|
|
427
427
|
retrieval_answer_recall_reason: The context contains all the transformers listed in the reference answer
|
|
428
428
|
retrieval_answer_recall_cost: 0.0007
|
|
429
429
|
retrieval_answer_precision: 1.0
|
|
430
|
-
retrieval_answer_precision_reason: The context contains only transformers listed in the reference answer
|
|
431
430
|
retrieval_answer_precision_cost: 0.0003
|
|
432
431
|
retrieval_answer_f1: 1.0
|
|
433
432
|
retrieval_answer_f1_cost: 0.001
|
|
@@ -552,7 +551,6 @@ All `actual_steps` with `name` "retrieval" contain:
|
|
|
552
551
|
- `retrieval_answer_recall_error`: (optional) error message if `retrieval_answer_recall` evaluation fails
|
|
553
552
|
- `retrieval_answer_recall_cost`: cost of evaluating `retrieval_answer_recall`, in US dollars
|
|
554
553
|
- `retrieval_answer_precision`: (optional) precision of the retrieved context with respect to the reference answer, if evaluation succeeds
|
|
555
|
-
- `retrieval_answer_precision_reason`: (optional) LLM reasoning in evaluating `retrieval_answer_precision`
|
|
556
554
|
- `retrieval_answer_precision_error`: (optional) error message if `retrieval_answer_precision` evaluation fails
|
|
557
555
|
- `retrieval_answer_precision_cost`: cost of evaluating `retrieval_answer_precision`, in US dollars
|
|
558
556
|
- `retrieval_answer_f1`: (optional) F1 score of the retrieved context with respect to the reference answer, if `retrieval_answer_recall` and `retrieval_answer_precision` succeed
|
|
@@ -587,6 +585,9 @@ Aggregates are:
|
|
|
587
585
|
- `once_per_sample`: how many times each step was executed, counted only once per question
|
|
588
586
|
- `empty_results`: how many times the step was executed and returned empty results
|
|
589
587
|
- `errors`: how many times the step was executed and resulted in error
|
|
588
|
+
- `retrieval_answer_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_recall` for all successful questions in this template
|
|
589
|
+
- `retrieval_answer_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_precision` for all successful questions in this template
|
|
590
|
+
- `retrieval_answer_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_f1` for all successful questions in this template
|
|
590
591
|
- `retrieval_context_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_recall` for all successful questions in this template
|
|
591
592
|
- `retrieval_context_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_precision` for all successful questions in this template
|
|
592
593
|
- `retrieval_context_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_f1` for all successful questions in this template
|
|
@@ -602,6 +603,9 @@ Aggregates are:
|
|
|
602
603
|
- `answer_f1`: `sum`, `mean`, `median`, `min` and `max` for `answer_f1` of all successful questions
|
|
603
604
|
- `answer_relevance`: `sum`, `mean`, `median`, `min` and `max` statistics for `answer_relevance` of all successful questions
|
|
604
605
|
- `answer_relevance_cost`: `sum`, `mean`, `median`, `min` and `max` statistics for `answer_relevance_cost` of all successful questions
|
|
606
|
+
- `retrieval_answer_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_recall` of all successful questions
|
|
607
|
+
- `retrieval_answer_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_precision` of all successful questions
|
|
608
|
+
- `retrieval_answer_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_answer_f1` of all successful questions
|
|
605
609
|
- `retrieval_context_recall`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_recall` of all successful questions
|
|
606
610
|
- `retrieval_context_precision`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_precision` of all successful questions
|
|
607
611
|
- `retrieval_context_f1`: `sum`, `mean`, `median`, `min` and `max` statistics for `retrieval_context_f1` of all successful questions
|
|
@@ -616,6 +620,9 @@ Aggregates are:
|
|
|
616
620
|
- `answer_f1`: `mean` for `answer_f1`
|
|
617
621
|
- `answer_relevance`: `mean` for `answer_relevance`
|
|
618
622
|
- `answer_relevance_cost`: `mean` for `answer_relevance_cost`
|
|
623
|
+
- `retrieval_answer_recall`: `mean` for `retrieval_answer_recall`
|
|
624
|
+
- `retrieval_answer_precision`: `mean` for `retrieval_answer_precision`
|
|
625
|
+
- `retrieval_answer_f1`: `mean` for `retrieval_answer_f1`
|
|
619
626
|
- `retrieval_context_recall`: `mean` for `retrieval_context_recall`
|
|
620
627
|
- `retrieval_context_precision`: `mean` for `retrieval_context_precision`
|
|
621
628
|
- `retrieval_context_f1`: `mean` for `retrieval_context_f1`
|
|
@@ -1013,7 +1020,7 @@ The following metrics are based on the content of retrieved documents.
|
|
|
1013
1020
|
|
|
1014
1021
|
#### Context Recall@k
|
|
1015
1022
|
|
|
1016
|
-
The fraction of relevant items among the top *k* recommendations. It answers the question: "Of all items the user cares about, how many did we
|
|
1023
|
+
The fraction of relevant items among the top *k* recommendations. It answers the question: "Of all items the user cares about, how many did we include in the first k spots?"
|
|
1017
1024
|
* **Formula**:
|
|
1018
1025
|
$`
|
|
1019
1026
|
\frac{\text{Number of relevant items in top k}}{\text{Number of relevant items}}
|
|
@@ -7,7 +7,7 @@ from langevals_ragas.response_relevancy import (
|
|
|
7
7
|
def get_relevance_dict(
|
|
8
8
|
question_text: str,
|
|
9
9
|
actual_answer: str,
|
|
10
|
-
model_name
|
|
10
|
+
model_name: str = 'openai/gpt-4o-mini',
|
|
11
11
|
max_tokens: int = 65_536
|
|
12
12
|
) -> dict:
|
|
13
13
|
settings_dict = {
|
|
@@ -16,16 +16,19 @@ def _evaluate(
|
|
|
16
16
|
metric: str
|
|
17
17
|
) -> dict[str, float | str]:
|
|
18
18
|
try:
|
|
19
|
-
|
|
20
|
-
if
|
|
21
|
-
|
|
22
|
-
f"retrieval_answer_{metric}":
|
|
23
|
-
f"retrieval_answer_{metric}_cost": result.cost.amount,
|
|
24
|
-
f"retrieval_answer_{metric}_reason": result.details
|
|
19
|
+
le_result = evaluator.evaluate(entry)
|
|
20
|
+
if le_result.status == "processed":
|
|
21
|
+
result = {
|
|
22
|
+
f"retrieval_answer_{metric}": le_result.score,
|
|
25
23
|
}
|
|
24
|
+
if le_result.cost:
|
|
25
|
+
result[f"retrieval_answer_{metric}_cost"] = le_result.cost.amount
|
|
26
|
+
if le_result.details:
|
|
27
|
+
result[f"retrieval_answer_{metric}_reason"] = le_result.details
|
|
28
|
+
return result
|
|
26
29
|
else:
|
|
27
30
|
return {
|
|
28
|
-
f"retrieval_answer_{metric}_error":
|
|
31
|
+
f"retrieval_answer_{metric}_error": le_result.details
|
|
29
32
|
}
|
|
30
33
|
except Exception as e:
|
|
31
34
|
return {
|
|
@@ -12,14 +12,14 @@ from graphrag_eval.util import get_f1_dict
|
|
|
12
12
|
|
|
13
13
|
def _evaluate(
|
|
14
14
|
entry: RagasContextRecallEntry | RagasContextPrecisionEntry,
|
|
15
|
-
|
|
15
|
+
evaluator: RagasContextRecallEvaluator | RagasContextPrecisionEvaluator,
|
|
16
16
|
metric: str
|
|
17
17
|
) -> dict:
|
|
18
18
|
try:
|
|
19
|
-
result =
|
|
19
|
+
result = evaluator.evaluate(entry)
|
|
20
20
|
if result.status == "processed":
|
|
21
21
|
result_dict = {
|
|
22
|
-
f"retrieval_context_{metric}": result.score,
|
|
22
|
+
f"retrieval_context_{metric}": result.score,
|
|
23
23
|
}
|
|
24
24
|
if result.details:
|
|
25
25
|
result_dict[f"retrieval_context_{metric}_reason"] = result.details
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "graphrag-eval"
|
|
3
|
+
version = "5.1.0"
|
|
4
|
+
description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
|
|
5
|
+
authors = [
|
|
6
|
+
{ name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
|
|
7
|
+
{ name = "Aleksis Datseris", email = "aleksis.datseris@graphwise.ai" },
|
|
8
|
+
{ name = "Neli Hateva", email = "neli.hateva@graphwise.ai" },
|
|
9
|
+
]
|
|
10
|
+
readme = "README.md"
|
|
11
|
+
license = "Apache-2.0"
|
|
12
|
+
requires-python = ">=3.12,<3.13"
|
|
13
|
+
|
|
14
|
+
[project.urls]
|
|
15
|
+
repository = "https://github.com/Ontotext-AD/graphrag-eval"
|
|
16
|
+
|
|
17
|
+
[tool.poetry.dependencies]
|
|
18
|
+
langevals = { version = "0.1.8", optional = true, extras = ["ragas"] }
|
|
19
|
+
ragas = { version = "0.2.9", optional = true }
|
|
20
|
+
langchain-openai = { version = "0.3.7", optional = true }
|
|
21
|
+
langchain_community = { version = "0.3.18", optional = true }
|
|
22
|
+
litellm = { version = "1.61.20", optional = true }
|
|
23
|
+
|
|
24
|
+
[tool.poetry.extras]
|
|
25
|
+
ragas = ["langevals", "ragas", "langchain-openai", "langchain_community", "litellm"]
|
|
26
|
+
|
|
27
|
+
[tool.poetry.group.ragas.dependencies]
|
|
28
|
+
langevals = { version = "0.1.8", extras = ["ragas"] }
|
|
29
|
+
ragas = "0.2.9"
|
|
30
|
+
langchain-openai = "0.3.7"
|
|
31
|
+
langchain_community = "0.3.18"
|
|
32
|
+
litellm = "1.61.20"
|
|
33
|
+
|
|
34
|
+
[tool.poetry.group.ragas]
|
|
35
|
+
optional = true
|
|
36
|
+
|
|
37
|
+
[tool.poetry.group.test.dependencies]
|
|
38
|
+
pytest = "<9,>=8"
|
|
39
|
+
pytest-cov = "<8,>=7"
|
|
40
|
+
jsonlines = "4.0.0"
|
|
41
|
+
pyyaml = "6.0.3"
|
|
42
|
+
|
|
43
|
+
[tool.poetry.group.test]
|
|
44
|
+
optional = true
|
|
45
|
+
|
|
46
|
+
[project.scripts]
|
|
47
|
+
answer-correctness = "graphrag_eval.answer_correctness:main"
|
|
48
|
+
|
|
49
|
+
[build-system]
|
|
50
|
+
requires = ["poetry-core>=2.0.0"]
|
|
51
|
+
build-backend = "poetry.core.masonry.api"
|
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
[project]
|
|
2
|
-
name = "graphrag-eval"
|
|
3
|
-
version = "5.0.2"
|
|
4
|
-
description = "For assessing question answering systems' final answers and intermediate steps, against a given set of questions, reference answers and steps."
|
|
5
|
-
authors = [
|
|
6
|
-
{ name = "Philip Ganchev", email = "philip.ganchev@graphwise.ai" },
|
|
7
|
-
{ name = "Aleksis Datseris", email = "aleksis.datseris@graphwise.ai" },
|
|
8
|
-
{ name = "Neli Hateva", email = "neli.hateva@graphwise.ai" },
|
|
9
|
-
]
|
|
10
|
-
readme = "README.md"
|
|
11
|
-
license = "Apache-2.0"
|
|
12
|
-
requires-python = ">=3.12,<3.13"
|
|
13
|
-
|
|
14
|
-
[project.urls]
|
|
15
|
-
repository = "https://github.com/Ontotext-AD/graphrag-eval"
|
|
16
|
-
|
|
17
|
-
[tool.poetry.dependencies]
|
|
18
|
-
openai = { version = "^1.97.0", optional = true }
|
|
19
|
-
langevals = { version = "0.1.*", optional = true }
|
|
20
|
-
langevals-ragas = { version = "^0.1.12", optional = true }
|
|
21
|
-
|
|
22
|
-
[tool.poetry.extras]
|
|
23
|
-
openai = ["openai", "langevals", "langevals-ragas"]
|
|
24
|
-
|
|
25
|
-
[tool.poetry.group.openai.dependencies]
|
|
26
|
-
openai = "^1.97.0"
|
|
27
|
-
langevals = "0.1.*"
|
|
28
|
-
langevals-ragas = "^0.1.12"
|
|
29
|
-
|
|
30
|
-
[tool.poetry.group.openai]
|
|
31
|
-
optional = true
|
|
32
|
-
|
|
33
|
-
[tool.poetry.group.test.dependencies]
|
|
34
|
-
pytest = "<9,>=8"
|
|
35
|
-
pytest-cov = "<7,>=6"
|
|
36
|
-
jsonlines = "4.0.0"
|
|
37
|
-
pyyaml = "^6.0.2"
|
|
38
|
-
|
|
39
|
-
[tool.poetry.group.test]
|
|
40
|
-
optional = true
|
|
41
|
-
|
|
42
|
-
[project.scripts]
|
|
43
|
-
answer-correctness = "graphrag_eval.answer_correctness:main"
|
|
44
|
-
|
|
45
|
-
[build-system]
|
|
46
|
-
requires = ["poetry-core>=2.0.0"]
|
|
47
|
-
build-backend = "poetry.core.masonry.api"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|