EuroEval 15.16.0__py3-none-any.whl → 16.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of EuroEval might be problematic. Click here for more details.
- euroeval/__init__.py +3 -7
- euroeval/benchmark_config_factory.py +3 -7
- euroeval/benchmark_modules/base.py +35 -19
- euroeval/benchmark_modules/fresh.py +24 -19
- euroeval/benchmark_modules/hf.py +136 -154
- euroeval/benchmark_modules/litellm.py +190 -110
- euroeval/benchmark_modules/vllm.py +161 -114
- euroeval/benchmarker.py +49 -22
- euroeval/cli.py +3 -3
- euroeval/constants.py +13 -15
- euroeval/data_loading.py +33 -28
- euroeval/data_models.py +53 -7
- euroeval/dataset_configs/__init__.py +2 -0
- euroeval/dataset_configs/danish.py +38 -1
- euroeval/dataset_configs/dutch.py +38 -1
- euroeval/dataset_configs/english.py +38 -1
- euroeval/dataset_configs/estonian.py +95 -0
- euroeval/dataset_configs/faroese.py +38 -0
- euroeval/dataset_configs/finnish.py +39 -1
- euroeval/dataset_configs/french.py +38 -1
- euroeval/dataset_configs/german.py +38 -1
- euroeval/dataset_configs/icelandic.py +39 -1
- euroeval/dataset_configs/italian.py +38 -1
- euroeval/dataset_configs/latvian.py +81 -0
- euroeval/dataset_configs/norwegian.py +38 -1
- euroeval/dataset_configs/portuguese.py +38 -1
- euroeval/dataset_configs/spanish.py +38 -1
- euroeval/dataset_configs/swedish.py +38 -1
- euroeval/enums.py +0 -6
- euroeval/finetuning.py +6 -6
- euroeval/generation.py +25 -14
- euroeval/generation_utils.py +46 -14
- euroeval/languages.py +947 -187
- euroeval/metrics/__init__.py +6 -0
- euroeval/metrics/base.py +76 -0
- euroeval/metrics/huggingface.py +192 -0
- euroeval/metrics/llm_as_a_judge.py +257 -0
- euroeval/metrics/pipeline.py +234 -0
- euroeval/metrics/speed.py +51 -0
- euroeval/prompt_templates/linguistic_acceptability.py +40 -2
- euroeval/prompt_templates/multiple_choice.py +23 -2
- euroeval/prompt_templates/named_entity_recognition.py +65 -2
- euroeval/prompt_templates/reading_comprehension.py +42 -2
- euroeval/prompt_templates/sentiment_classification.py +46 -2
- euroeval/prompt_templates/summarization.py +24 -4
- euroeval/scores.py +7 -2
- euroeval/speed_benchmark.py +6 -6
- euroeval/task_group_utils/multiple_choice_classification.py +17 -6
- euroeval/task_group_utils/question_answering.py +35 -28
- euroeval/task_group_utils/sequence_classification.py +96 -23
- euroeval/task_group_utils/text_to_text.py +7 -3
- euroeval/task_group_utils/token_classification.py +47 -75
- euroeval/tasks.py +31 -6
- euroeval/tokenization_utils.py +295 -207
- euroeval/utils.py +118 -34
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/METADATA +11 -14
- euroeval-16.0.0.dist-info/RECORD +69 -0
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/entry_points.txt +0 -1
- euroeval/human_evaluation.py +0 -738
- euroeval/metrics.py +0 -470
- euroeval-15.16.0.dist-info/RECORD +0 -63
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/WHEEL +0 -0
- {euroeval-15.16.0.dist-info → euroeval-16.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
"""Utility functions related to the token-classification task group."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
|
-
import re
|
|
5
4
|
import typing as t
|
|
6
5
|
from copy import deepcopy
|
|
7
6
|
|
|
8
|
-
import demjson3
|
|
9
7
|
import numpy as np
|
|
10
8
|
|
|
11
9
|
from ..exceptions import InvalidBenchmark
|
|
12
|
-
from ..utils import
|
|
10
|
+
from ..utils import (
|
|
11
|
+
extract_json_dict_from_string,
|
|
12
|
+
raise_if_model_output_contains_nan_values,
|
|
13
|
+
)
|
|
13
14
|
|
|
14
15
|
if t.TYPE_CHECKING:
|
|
15
16
|
from datasets.arrow_dataset import Dataset
|
|
@@ -17,7 +18,7 @@ if t.TYPE_CHECKING:
|
|
|
17
18
|
from transformers.tokenization_utils_base import BatchEncoding
|
|
18
19
|
from transformers.trainer_utils import EvalPrediction
|
|
19
20
|
|
|
20
|
-
from ..data_models import DatasetConfig, GenerativeModelOutput
|
|
21
|
+
from ..data_models import BenchmarkConfig, DatasetConfig, GenerativeModelOutput
|
|
21
22
|
from ..types import Labels, Predictions
|
|
22
23
|
|
|
23
24
|
|
|
@@ -28,6 +29,7 @@ def compute_metrics(
|
|
|
28
29
|
model_outputs_and_labels: "tuple[Predictions, Labels] | EvalPrediction",
|
|
29
30
|
has_misc_tags: bool,
|
|
30
31
|
dataset_config: "DatasetConfig",
|
|
32
|
+
benchmark_config: "BenchmarkConfig",
|
|
31
33
|
dataset: "Dataset",
|
|
32
34
|
) -> dict[str, float]:
|
|
33
35
|
"""Compute the metrics needed for evaluation.
|
|
@@ -40,6 +42,8 @@ def compute_metrics(
|
|
|
40
42
|
Whether the dataset has MISC tags.
|
|
41
43
|
dataset_config:
|
|
42
44
|
The configuration of the dataset.
|
|
45
|
+
benchmark_config:
|
|
46
|
+
The configuration of the benchmark.
|
|
43
47
|
dataset:
|
|
44
48
|
The dataset used for evaluation. This is only used in case any additional
|
|
45
49
|
metadata is used to compute the metrics.
|
|
@@ -142,7 +146,11 @@ def compute_metrics(
|
|
|
142
146
|
if metric.name == "micro_f1"
|
|
143
147
|
)
|
|
144
148
|
micro_f1_score = metric(
|
|
145
|
-
predictions=predictions,
|
|
149
|
+
predictions=predictions,
|
|
150
|
+
references=list(labels),
|
|
151
|
+
dataset=dataset,
|
|
152
|
+
dataset_config=dataset_config,
|
|
153
|
+
benchmark_config=benchmark_config,
|
|
146
154
|
)
|
|
147
155
|
|
|
148
156
|
# Compute the metrics without MISC tags
|
|
@@ -165,7 +173,11 @@ def compute_metrics(
|
|
|
165
173
|
if metric.name == "micro_f1_no_misc"
|
|
166
174
|
)
|
|
167
175
|
micro_f1_no_misc_score = metric(
|
|
168
|
-
predictions=predictions_no_misc,
|
|
176
|
+
predictions=predictions_no_misc,
|
|
177
|
+
references=labels_no_misc,
|
|
178
|
+
dataset=dataset,
|
|
179
|
+
dataset_config=dataset_config,
|
|
180
|
+
benchmark_config=benchmark_config,
|
|
169
181
|
)
|
|
170
182
|
|
|
171
183
|
# Raise error if the metrics are invalid
|
|
@@ -194,51 +206,11 @@ def extract_labels_from_generation(
|
|
|
194
206
|
Returns:
|
|
195
207
|
The predicted labels.
|
|
196
208
|
"""
|
|
197
|
-
raw_predictions = model_output.sequences
|
|
198
|
-
|
|
199
|
-
# Attempt to extract the JSON dictionary from the predictions
|
|
200
|
-
json_regex = r"\{[^{}]+?\}"
|
|
201
|
-
json_matches = [
|
|
202
|
-
re.search(pattern=json_regex, string=raw_prediction, flags=re.DOTALL)
|
|
203
|
-
or raw_prediction
|
|
204
|
-
for raw_prediction in raw_predictions
|
|
205
|
-
]
|
|
206
|
-
raw_predictions = [
|
|
207
|
-
json_match.group() if isinstance(json_match, re.Match) else json_match
|
|
208
|
-
for json_match in json_matches
|
|
209
|
-
]
|
|
210
|
-
|
|
211
209
|
tokens = input_batch["tokens"]
|
|
212
210
|
predicted_labels: list[list[str]] = [["o"] * len(token_ids) for token_ids in tokens]
|
|
213
|
-
for idx, raw_prediction in enumerate(
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if not isinstance(json_output, dict):
|
|
217
|
-
logger.debug(
|
|
218
|
-
"The model output is not a JSON dictionary, so cannot parse "
|
|
219
|
-
f"it. Skipping. Here is the output: {raw_prediction}"
|
|
220
|
-
)
|
|
221
|
-
continue
|
|
222
|
-
elif not all(isinstance(key, str) for key in json_output.keys()):
|
|
223
|
-
logger.debug(
|
|
224
|
-
"The model output is not a JSON dictionary with string keys, "
|
|
225
|
-
"so cannot parse it. Skipping. Here is the output: "
|
|
226
|
-
f"{raw_prediction}"
|
|
227
|
-
)
|
|
228
|
-
continue
|
|
229
|
-
elif not all(isinstance(value, list) for value in json_output.values()):
|
|
230
|
-
logger.debug(
|
|
231
|
-
"The model output is not a JSON dictionary with list values, "
|
|
232
|
-
"so cannot parse it. Skipping. Here is the output: "
|
|
233
|
-
f"{raw_prediction}"
|
|
234
|
-
)
|
|
235
|
-
continue
|
|
236
|
-
prediction_dict: dict[str, list[str]] = json_output
|
|
237
|
-
except demjson3.JSONDecodeError:
|
|
238
|
-
logger.debug(
|
|
239
|
-
"The model output is not valid JSON, so cannot parse it. Skipping. "
|
|
240
|
-
f"Here is the output: {raw_prediction!r}"
|
|
241
|
-
)
|
|
211
|
+
for idx, raw_prediction in enumerate(model_output.sequences):
|
|
212
|
+
prediction_dict = extract_json_dict_from_string(s=raw_prediction)
|
|
213
|
+
if prediction_dict is None:
|
|
242
214
|
continue
|
|
243
215
|
|
|
244
216
|
prompt_label_mapping = dataset_config.prompt_label_mapping
|
|
@@ -272,15 +244,15 @@ def extract_labels_from_generation(
|
|
|
272
244
|
|
|
273
245
|
|
|
274
246
|
def tokenize_and_align_labels(
|
|
275
|
-
examples: dict,
|
|
247
|
+
examples: dict, tokeniser: "PreTrainedTokenizer", label2id: dict[str, int]
|
|
276
248
|
) -> "BatchEncoding":
|
|
277
249
|
"""Tokenise all texts and align the labels with them.
|
|
278
250
|
|
|
279
251
|
Args:
|
|
280
252
|
examples:
|
|
281
253
|
The examples to be tokenised.
|
|
282
|
-
|
|
283
|
-
A pretrained
|
|
254
|
+
tokeniser:
|
|
255
|
+
A pretrained tokeniser.
|
|
284
256
|
label2id:
|
|
285
257
|
A dictionary that converts NER tags to IDs.
|
|
286
258
|
|
|
@@ -289,22 +261,22 @@ def tokenize_and_align_labels(
|
|
|
289
261
|
"""
|
|
290
262
|
# Tokenize the texts. We use the `is_split_into_words` argument here because
|
|
291
263
|
# the texts in our dataset are lists of words (with a label for each word)
|
|
292
|
-
tokenized_inputs =
|
|
264
|
+
tokenized_inputs = tokeniser(
|
|
293
265
|
examples["tokens"], is_split_into_words=True, truncation=True, padding=True
|
|
294
266
|
)
|
|
295
267
|
|
|
296
268
|
# Extract a mapping between all the tokens and their corresponding word. If the
|
|
297
|
-
#
|
|
269
|
+
# tokeniser is of a "fast" variant then this can be accessed through the
|
|
298
270
|
# `word_ids` method. Otherwise, we have to extract it manually.
|
|
299
271
|
all_labels: list[list[int]] = list()
|
|
300
272
|
labels: list[str]
|
|
301
273
|
word_ids: list[int | None]
|
|
302
274
|
for i, labels in enumerate(examples["labels"]):
|
|
303
|
-
# Try to get the word IDs from the
|
|
275
|
+
# Try to get the word IDs from the tokeniser
|
|
304
276
|
try:
|
|
305
277
|
word_ids = tokenized_inputs.word_ids(batch_index=i)
|
|
306
278
|
|
|
307
|
-
# If the
|
|
279
|
+
# If the tokeniser is not of a "fast" variant, we have to extract the word
|
|
308
280
|
# IDs manually
|
|
309
281
|
except ValueError:
|
|
310
282
|
# Get the list of words in the document
|
|
@@ -314,7 +286,7 @@ def tokenize_and_align_labels(
|
|
|
314
286
|
tok_ids: list[int] = tokenized_inputs.input_ids[i]
|
|
315
287
|
|
|
316
288
|
# Decode the token IDs
|
|
317
|
-
tokens =
|
|
289
|
+
tokens = tokeniser.convert_ids_to_tokens(tok_ids)
|
|
318
290
|
assert isinstance(tokens, list)
|
|
319
291
|
|
|
320
292
|
# Remove prefixes from the tokens
|
|
@@ -326,14 +298,14 @@ def tokenize_and_align_labels(
|
|
|
326
298
|
tokens[tok_idx] = tok[len(prefix) :]
|
|
327
299
|
|
|
328
300
|
# Replace UNK tokens with the correct word
|
|
329
|
-
tokens = handle_unk_tokens(
|
|
301
|
+
tokens = handle_unk_tokens(tokeniser=tokeniser, tokens=tokens, words=words)
|
|
330
302
|
|
|
331
|
-
# Get list of special tokens. Some
|
|
303
|
+
# Get list of special tokens. Some tokenisers do not record these
|
|
332
304
|
# properly, which is why we convert the values to their indices and
|
|
333
305
|
# then back to strings
|
|
334
306
|
sp_toks = [
|
|
335
|
-
|
|
336
|
-
for sp_tok in
|
|
307
|
+
tokeniser.convert_ids_to_tokens(tokeniser.convert_tokens_to_ids(sp_tok))
|
|
308
|
+
for sp_tok in tokeniser.special_tokens_map.values()
|
|
337
309
|
]
|
|
338
310
|
|
|
339
311
|
# Replace special tokens with `None`
|
|
@@ -357,7 +329,7 @@ def tokenize_and_align_labels(
|
|
|
357
329
|
if len(word_idxs) != len(token_idxs):
|
|
358
330
|
raise InvalidBenchmark(
|
|
359
331
|
"The tokens could not be aligned with the words during manual "
|
|
360
|
-
"word-token alignment. It seems that the
|
|
332
|
+
"word-token alignment. It seems that the tokeniser is neither "
|
|
361
333
|
"of the fast variant nor of a SentencePiece/WordPiece variant."
|
|
362
334
|
)
|
|
363
335
|
|
|
@@ -387,9 +359,9 @@ def tokenize_and_align_labels(
|
|
|
387
359
|
label = labels[word_id]
|
|
388
360
|
try:
|
|
389
361
|
label_id = label2id[label.lower()]
|
|
390
|
-
except KeyError:
|
|
362
|
+
except KeyError as e:
|
|
391
363
|
msg = f"The label {label} was not found in the model's config."
|
|
392
|
-
raise InvalidBenchmark(msg)
|
|
364
|
+
raise InvalidBenchmark(msg) from e
|
|
393
365
|
label_ids.append(label_id)
|
|
394
366
|
|
|
395
367
|
# For the other tokens in a word, we set the label to -100
|
|
@@ -404,13 +376,13 @@ def tokenize_and_align_labels(
|
|
|
404
376
|
|
|
405
377
|
|
|
406
378
|
def handle_unk_tokens(
|
|
407
|
-
|
|
379
|
+
tokeniser: "PreTrainedTokenizer", tokens: list[str], words: list[str]
|
|
408
380
|
) -> list[str]:
|
|
409
381
|
"""Replace unknown tokens in the tokens with the corresponding word.
|
|
410
382
|
|
|
411
383
|
Args:
|
|
412
|
-
|
|
413
|
-
The
|
|
384
|
+
tokeniser:
|
|
385
|
+
The tokeniser used to tokenize the words.
|
|
414
386
|
tokens:
|
|
415
387
|
The list of tokens.
|
|
416
388
|
words:
|
|
@@ -420,15 +392,15 @@ def handle_unk_tokens(
|
|
|
420
392
|
The list of tokens with unknown tokens replaced by the corresponding word.
|
|
421
393
|
"""
|
|
422
394
|
# Locate the token indices of the unknown tokens
|
|
423
|
-
token_unk_idxs = [i for i, tok in enumerate(tokens) if tok ==
|
|
395
|
+
token_unk_idxs = [i for i, tok in enumerate(tokens) if tok == tokeniser.unk_token]
|
|
424
396
|
|
|
425
397
|
# Locate the word indices of the words which contain an unknown token
|
|
426
398
|
word_unk_idxs = [
|
|
427
399
|
i
|
|
428
400
|
for i, word in enumerate(words)
|
|
429
|
-
if
|
|
430
|
-
in
|
|
431
|
-
|
|
401
|
+
if tokeniser.unk_token
|
|
402
|
+
in tokeniser.convert_ids_to_tokens(
|
|
403
|
+
tokeniser.encode(word, add_special_tokens=False)
|
|
432
404
|
)
|
|
433
405
|
]
|
|
434
406
|
|
|
@@ -438,8 +410,8 @@ def handle_unk_tokens(
|
|
|
438
410
|
word = words[word_idx]
|
|
439
411
|
|
|
440
412
|
# Tokenize the word, which is now a list containing at least one UNK token
|
|
441
|
-
tokens_with_unk =
|
|
442
|
-
|
|
413
|
+
tokens_with_unk = tokeniser.convert_ids_to_tokens(
|
|
414
|
+
tokeniser.encode(word, add_special_tokens=False)
|
|
443
415
|
)
|
|
444
416
|
|
|
445
417
|
# Iterate over the tokens in the word
|
|
@@ -448,10 +420,10 @@ def handle_unk_tokens(
|
|
|
448
420
|
# of the content of this token from the word. The result of the `word`
|
|
449
421
|
# variable will be the content of the UNK token.
|
|
450
422
|
# NOTE: This is a bit hacky and not bulletproof. For instance, if the
|
|
451
|
-
# word is "1925-1950" and the
|
|
423
|
+
# word is "1925-1950" and the tokeniser splits it into ["[UNK]", "-",
|
|
452
424
|
# "19", "50"], then the result will be 2519 instead of 1925. This
|
|
453
425
|
# happens almost never, however, so we can live with it.
|
|
454
|
-
if possible_unk_token !=
|
|
426
|
+
if possible_unk_token != tokeniser.unk_token:
|
|
455
427
|
word = word.replace(possible_unk_token, "", 1)
|
|
456
428
|
|
|
457
429
|
# Replace the token with the word
|
euroeval/tasks.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from . import metrics as m
|
|
4
4
|
from .data_models import Task
|
|
5
|
-
from .enums import TaskGroup
|
|
5
|
+
from .enums import GenerativeType, ModelType, TaskGroup
|
|
6
6
|
from .prompt_templates import (
|
|
7
7
|
LA_TEMPLATES,
|
|
8
8
|
MULTIPLE_CHOICE_TEMPLATES,
|
|
@@ -28,8 +28,9 @@ LA = Task(
|
|
|
28
28
|
template_dict=LA_TEMPLATES,
|
|
29
29
|
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
30
30
|
default_num_few_shot_examples=12,
|
|
31
|
-
default_max_generated_tokens=
|
|
31
|
+
default_max_generated_tokens=10,
|
|
32
32
|
default_labels=["correct", "incorrect"],
|
|
33
|
+
uses_logprobs=True,
|
|
33
34
|
)
|
|
34
35
|
|
|
35
36
|
|
|
@@ -51,6 +52,7 @@ NER = Task(
|
|
|
51
52
|
"b-misc",
|
|
52
53
|
"i-misc",
|
|
53
54
|
],
|
|
55
|
+
uses_structured_output=True,
|
|
54
56
|
)
|
|
55
57
|
|
|
56
58
|
|
|
@@ -71,8 +73,9 @@ SENT = Task(
|
|
|
71
73
|
template_dict=SENT_TEMPLATES,
|
|
72
74
|
metrics=[m.mcc_metric, m.macro_f1_metric],
|
|
73
75
|
default_num_few_shot_examples=12,
|
|
74
|
-
default_max_generated_tokens=
|
|
76
|
+
default_max_generated_tokens=10,
|
|
75
77
|
default_labels=["positive", "neutral", "negative"],
|
|
78
|
+
uses_logprobs=True,
|
|
76
79
|
)
|
|
77
80
|
|
|
78
81
|
|
|
@@ -84,6 +87,7 @@ SUMM = Task(
|
|
|
84
87
|
default_num_few_shot_examples=1,
|
|
85
88
|
default_max_generated_tokens=256,
|
|
86
89
|
default_labels=[],
|
|
90
|
+
allowed_model_types=[ModelType.GENERATIVE],
|
|
87
91
|
)
|
|
88
92
|
|
|
89
93
|
|
|
@@ -93,8 +97,9 @@ KNOW = Task(
|
|
|
93
97
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
94
98
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
95
99
|
default_num_few_shot_examples=5,
|
|
96
|
-
default_max_generated_tokens=
|
|
100
|
+
default_max_generated_tokens=10,
|
|
97
101
|
default_labels=["a", "b", "c", "d"],
|
|
102
|
+
uses_logprobs=True,
|
|
98
103
|
)
|
|
99
104
|
|
|
100
105
|
|
|
@@ -104,8 +109,9 @@ MCRC = Task(
|
|
|
104
109
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
105
110
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
106
111
|
default_num_few_shot_examples=5,
|
|
107
|
-
default_max_generated_tokens=
|
|
112
|
+
default_max_generated_tokens=10,
|
|
108
113
|
default_labels=["a", "b", "c", "d"],
|
|
114
|
+
uses_logprobs=True,
|
|
109
115
|
)
|
|
110
116
|
|
|
111
117
|
|
|
@@ -115,8 +121,27 @@ COMMON_SENSE = Task(
|
|
|
115
121
|
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
116
122
|
metrics=[m.mcc_metric, m.accuracy_metric],
|
|
117
123
|
default_num_few_shot_examples=5,
|
|
118
|
-
default_max_generated_tokens=
|
|
124
|
+
default_max_generated_tokens=10,
|
|
119
125
|
default_labels=["a", "b", "c", "d"],
|
|
126
|
+
uses_logprobs=True,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
EUROPEAN_VALUES = Task(
|
|
131
|
+
name="european-values",
|
|
132
|
+
task_group=TaskGroup.MULTIPLE_CHOICE_CLASSIFICATION,
|
|
133
|
+
template_dict=MULTIPLE_CHOICE_TEMPLATES,
|
|
134
|
+
metrics=[m.european_values_metric],
|
|
135
|
+
default_num_few_shot_examples=0,
|
|
136
|
+
default_max_generated_tokens=10,
|
|
137
|
+
default_labels=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"],
|
|
138
|
+
allowed_model_types=[ModelType.GENERATIVE],
|
|
139
|
+
allowed_generative_types=[
|
|
140
|
+
GenerativeType.INSTRUCTION_TUNED,
|
|
141
|
+
GenerativeType.REASONING,
|
|
142
|
+
],
|
|
143
|
+
requires_zero_shot=True,
|
|
144
|
+
uses_logprobs=True,
|
|
120
145
|
)
|
|
121
146
|
|
|
122
147
|
|