crfm-helm 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/METADATA +11 -8
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/RECORD +67 -38
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/WHEEL +1 -1
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/entry_points.txt +2 -1
- helm/benchmark/__init__.py +13 -0
- helm/benchmark/adaptation/adapter_spec.py +3 -0
- helm/benchmark/adaptation/adapters/in_context_learning_adapter.py +20 -7
- helm/benchmark/augmentations/correct_to_misspelling.json +1 -0
- helm/benchmark/contamination/__init__.py +0 -0
- helm/benchmark/metrics/classification_metrics.py +70 -0
- helm/benchmark/metrics/machine_translation_metrics.py +36 -0
- helm/benchmark/metrics/summarization_metrics.py +7 -8
- helm/benchmark/metrics/test_classification_metrics.py +150 -0
- helm/benchmark/presentation/create_plots.py +617 -0
- helm/benchmark/presentation/run_display.py +7 -48
- helm/benchmark/presentation/summarize.py +4 -2
- helm/benchmark/presentation/test_create_plots.py +32 -0
- helm/benchmark/run.py +144 -48
- helm/benchmark/run_expander.py +164 -47
- helm/benchmark/run_specs.py +346 -39
- helm/benchmark/runner.py +34 -6
- helm/benchmark/scenarios/copyright_scenario.py +1 -1
- helm/benchmark/scenarios/covid_dialog_scenario.py +84 -0
- helm/benchmark/scenarios/imdb_listdir.json +50014 -0
- helm/benchmark/scenarios/lex_glue_scenario.py +253 -0
- helm/benchmark/scenarios/lextreme_scenario.py +458 -0
- helm/benchmark/scenarios/me_q_sum_scenario.py +86 -0
- helm/benchmark/scenarios/med_dialog_scenario.py +132 -0
- helm/benchmark/scenarios/med_mcqa_scenario.py +102 -0
- helm/benchmark/scenarios/med_paragraph_simplification_scenario.py +119 -0
- helm/benchmark/scenarios/med_qa_scenario.py +96 -0
- helm/benchmark/scenarios/opinions_qa_scenario.py +194 -0
- helm/benchmark/scenarios/scenario.py +5 -0
- helm/benchmark/scenarios/the_pile_scenario.py +1 -1
- helm/benchmark/scenarios/wmt_14_scenario.py +96 -0
- helm/benchmark/static/benchmarking.css +14 -0
- helm/benchmark/static/benchmarking.js +43 -0
- helm/benchmark/static/index.html +2 -0
- helm/benchmark/static/json-urls.js +4 -0
- helm/benchmark/static/plot-captions.js +16 -0
- helm/benchmark/static/schema.yaml +154 -1
- helm/benchmark/window_services/cohere_window_service.py +20 -0
- helm/benchmark/window_services/flan_t5_window_service.py +29 -0
- helm/benchmark/window_services/huggingface_window_service.py +39 -0
- helm/benchmark/window_services/santacoder_window_service.py +27 -0
- helm/benchmark/window_services/test_flan_t5_window_service.py +12 -0
- helm/benchmark/window_services/wider_ai21_window_service.py +13 -0
- helm/benchmark/window_services/window_service_factory.py +34 -7
- helm/common/codec.py +123 -0
- helm/common/general.py +12 -5
- helm/common/test_codec.py +144 -0
- helm/proxy/clients/aleph_alpha_client.py +47 -28
- helm/proxy/clients/auto_client.py +32 -24
- helm/proxy/clients/google_client.py +88 -0
- helm/proxy/clients/huggingface_client.py +32 -16
- helm/proxy/clients/huggingface_model_registry.py +111 -0
- helm/proxy/clients/huggingface_tokenizer.py +25 -7
- helm/proxy/clients/openai_client.py +60 -2
- helm/proxy/clients/test_huggingface_model_registry.py +57 -0
- helm/proxy/clients/test_huggingface_tokenizer.py +3 -0
- helm/proxy/clients/together_client.py +17 -2
- helm/proxy/clients/yalm_tokenizer/voc_100b.sp +0 -0
- helm/proxy/clients/yalm_tokenizer/yalm_tokenizer.py +8 -2
- helm/proxy/models.py +115 -7
- helm/proxy/test_models.py +1 -1
- helm/benchmark/presentation/present.py +0 -249
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/LICENSE +0 -0
- {crfm_helm-0.2.0.dist-info → crfm_helm-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import List
|
|
3
|
+
|
|
4
|
+
from helm.common.general import ensure_directory_exists, ensure_file_downloaded
|
|
5
|
+
from .scenario import Scenario, Instance, Reference, ALL_SPLITS, CORRECT_TAG, VALID_SPLIT, Input, Output
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class COVIDDialogScenario(Scenario):
|
|
9
|
+
"""
|
|
10
|
+
From https://github.com/UCSD-AI4H/COVID-Dialogue, "COVID-Dialogue-Dataset-English is an English medical dialogue
|
|
11
|
+
dataset about COVID-19 and other types of pneumonia. Patients who are concerned that they may be infected by
|
|
12
|
+
COVID-19 or other pneumonia consult doctors and doctors provide advice. There are 603 consultations. Each
|
|
13
|
+
consultation consists of ID, URL, Description of patient’s medical condition and Dialogue."
|
|
14
|
+
|
|
15
|
+
The following is an example a patient-doctor interaction from the dataset:
|
|
16
|
+
|
|
17
|
+
patient: i have all the symptoms except fever, i went to medicross and dr said i can get tested if i want to i'm
|
|
18
|
+
not sure if i should. she gave me antibiotics klacid xl 500mg, she said i can take it if i feel worse i'm worried
|
|
19
|
+
it will make immune system bad?
|
|
20
|
+
|
|
21
|
+
in brief: antibiotic i don't recommend antibiotics for a simple viral upper respiratory tract infection unless
|
|
22
|
+
examination revealed signs of acute bronchitis or sinusitis. they are not effective for viral infections like
|
|
23
|
+
covid 19 with no bacterial lung involvement either. if you've been exposed to someone with covid 19 or or if you
|
|
24
|
+
or someone you were exposed to travelled to a region where it was endemic, get tested would you like to video
|
|
25
|
+
or text chat with me?
|
|
26
|
+
|
|
27
|
+
@article{ju2020CovidDialog,
|
|
28
|
+
title={CovidDialog: Medical Dialogue Datasets about COVID-19},
|
|
29
|
+
author={Ju, Zeqian and Chakravorty, Subrato and He, Xuehai and Chen, Shu and Yang, Xingyi and Xie, Pengtao},
|
|
30
|
+
journal={ https://github.com/UCSD-AI4H/COVID-Dialogue},
|
|
31
|
+
year={2020}
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
SOURCE_URL_TEMPLATE: str = (
|
|
36
|
+
"https://worksheets.codalab.org/rest/bundles/0x6f1ac4b2e47043fcbb873b2af1c7ee0c/contents/blob/{file_name}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
name = "covid_dialog"
|
|
40
|
+
description = "Medical dialogue dataset of conversations between doctors and patients on their COVID-19 concerns"
|
|
41
|
+
tags = ["dialogue", "biomedical"]
|
|
42
|
+
|
|
43
|
+
def get_instances(self) -> List[Instance]:
|
|
44
|
+
"""
|
|
45
|
+
Build `Instance`s using the questions asked by concerned patients and the responses by doctors.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def download_and_read_lines(file_name: str) -> List[str]:
|
|
49
|
+
file_path: str = os.path.join(data_path, file_name)
|
|
50
|
+
ensure_file_downloaded(
|
|
51
|
+
source_url=COVIDDialogScenario.SOURCE_URL_TEMPLATE.format(file_name=file_name),
|
|
52
|
+
target_path=file_path,
|
|
53
|
+
unpack=False,
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
with open(file_path) as f:
|
|
57
|
+
return f.read().splitlines()
|
|
58
|
+
|
|
59
|
+
data_path: str = os.path.join(self.output_path, "data")
|
|
60
|
+
ensure_directory_exists(data_path)
|
|
61
|
+
|
|
62
|
+
instances: List[Instance] = []
|
|
63
|
+
for split in ALL_SPLITS:
|
|
64
|
+
dataset_split: str = "val" if split == VALID_SPLIT else split
|
|
65
|
+
|
|
66
|
+
# The files with the questions end with ".source"
|
|
67
|
+
questions: List[str] = download_and_read_lines(f"{dataset_split}.source")
|
|
68
|
+
|
|
69
|
+
# The files with the responses end with ".target"
|
|
70
|
+
responses: List[str] = download_and_read_lines(f"{dataset_split}.target")
|
|
71
|
+
|
|
72
|
+
for question, response in zip(questions, responses):
|
|
73
|
+
# Questions in the dataset are already prepended with "patient:".
|
|
74
|
+
# Remove those and add it via `input_prefix` of `AdapterSpec`.
|
|
75
|
+
question = question.replace("patient: ", "")
|
|
76
|
+
instances.append(
|
|
77
|
+
Instance(
|
|
78
|
+
input=Input(text=question),
|
|
79
|
+
references=[Reference(output=Output(text=response), tags=[CORRECT_TAG])],
|
|
80
|
+
split=split,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
return instances
|