mteb 2.1.9__py3-none-any.whl → 2.1.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +11 -4
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py +5 -4
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +9 -4
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/METADATA +1 -1
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/RECORD +9 -9
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/WHEEL +0 -0
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/entry_points.txt +0 -0
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.1.9.dist-info → mteb-2.1.10.dist-info}/top_level.txt +0 -0
|
@@ -46,10 +46,17 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
|
|
|
46
46
|
split=_EVAL_SPLIT,
|
|
47
47
|
revision=self.metadata.dataset["revision"],
|
|
48
48
|
)
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
|
|
49
|
+
|
|
50
|
+
question_ids = {}
|
|
51
|
+
answer_ids = {}
|
|
52
|
+
|
|
53
|
+
for row in data:
|
|
54
|
+
question = row["question"]
|
|
55
|
+
answer = row["answer"]
|
|
56
|
+
if question not in question_ids:
|
|
57
|
+
question_ids[question] = len(question_ids)
|
|
58
|
+
if answer not in answer_ids:
|
|
59
|
+
answer_ids[answer] = len(answer_ids)
|
|
53
60
|
|
|
54
61
|
for row in data:
|
|
55
62
|
question = row["question"]
|
|
@@ -230,10 +230,11 @@ class BelebeleRetrieval(AbsTaskRetrieval):
|
|
|
230
230
|
ds_corpus = self.dataset[lang_corpus]
|
|
231
231
|
ds_question = self.dataset[lang_question]
|
|
232
232
|
|
|
233
|
-
question_ids = {
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
233
|
+
question_ids = {}
|
|
234
|
+
for row in ds_question:
|
|
235
|
+
question = row["question"]
|
|
236
|
+
if question not in question_ids:
|
|
237
|
+
question_ids[question] = len(question_ids)
|
|
237
238
|
|
|
238
239
|
link_to_context_id = {}
|
|
239
240
|
context_idx = 0
|
|
@@ -32,10 +32,15 @@ def _load_publichealthqa_data(
|
|
|
32
32
|
split=split,
|
|
33
33
|
revision=revision,
|
|
34
34
|
)
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
}
|
|
38
|
-
|
|
35
|
+
|
|
36
|
+
question_ids = {}
|
|
37
|
+
answer_ids = {}
|
|
38
|
+
|
|
39
|
+
for row in data:
|
|
40
|
+
if row["question"] is not None and row["question"] not in question_ids:
|
|
41
|
+
question_ids[row["question"]] = len(question_ids)
|
|
42
|
+
if row["answer"] is not None and row["answer"] not in answer_ids:
|
|
43
|
+
answer_ids[row["answer"]] = len(answer_ids)
|
|
39
44
|
|
|
40
45
|
for row in data:
|
|
41
46
|
if row["question"] is None or row["answer"] is None:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.1.
|
|
3
|
+
Version: 2.1.10
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -2326,12 +2326,12 @@ mteb/tasks/retrieval/jpn/nlp_journal_abs_intro_retrieval.py,sha256=EEOQpTC6vEPUL
|
|
|
2326
2326
|
mteb/tasks/retrieval/jpn/nlp_journal_title_abs_retrieval.py,sha256=JOOW_5pRKHzVn8wTOY0fhxLJ6Ns7wlQHoGHGIYVovAQ,3056
|
|
2327
2327
|
mteb/tasks/retrieval/jpn/nlp_journal_title_intro_retrieval.py,sha256=aVFTFiANWrIz68FjHv9KBqlhpWlsmi9EAP052gECzaU,3078
|
|
2328
2328
|
mteb/tasks/retrieval/kat/__init__.py,sha256=H4phkKqg_yZzkK7T62aCMBzjbGZzLKJ-MngrQlPbW3A,93
|
|
2329
|
-
mteb/tasks/retrieval/kat/georgian_faq_retrieval.py,sha256=
|
|
2329
|
+
mteb/tasks/retrieval/kat/georgian_faq_retrieval.py,sha256=4zyodSYCtHtBW9WKIGxFZaTXDrtHuaf3uyfIsDRGBqM,2494
|
|
2330
2330
|
mteb/tasks/retrieval/kor/__init__.py,sha256=zNjAS2VRjeYX5u4vqev6dGOo_R3i9uSzxAsduZ0po4I,138
|
|
2331
2331
|
mteb/tasks/retrieval/kor/auto_rag_retrieval.py,sha256=tgffW8zMpDSv1FCOdS4_4SL5zKQj70JVSt_RKs3CgKY,1576
|
|
2332
2332
|
mteb/tasks/retrieval/kor/ko_strategy_qa.py,sha256=jk13ORetYtF0q36h8ljD6TeTHUwvK5F5ZbDoMCP3eWk,1156
|
|
2333
2333
|
mteb/tasks/retrieval/multilingual/__init__.py,sha256=mfVGkoB4DO5ktlg8ia-4nImFVmZcqXh1XkgCkIff0tY,6765
|
|
2334
|
-
mteb/tasks/retrieval/multilingual/belebele_retrieval.py,sha256=
|
|
2334
|
+
mteb/tasks/retrieval/multilingual/belebele_retrieval.py,sha256=gaVLEwuLEwMutMi9V-obpiYKbpllX2QNm2j3MVeebfE,7027
|
|
2335
2335
|
mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py,sha256=_6r34ZvRiLVENYcrd87NjilybGaetBwKFEbO29zYmBU,4676
|
|
2336
2336
|
mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py,sha256=Puy0PjpRr4M_Bbxdl7oWfa7pQGM04zaRaTNlnhyKejM,4677
|
|
2337
2337
|
mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py,sha256=dwzo2sqjamM_xkSiC-jbapyhDFezSJpM4S8KfBsuLPk,4562
|
|
@@ -2346,7 +2346,7 @@ mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py,sha256=3uGnj3O92_02zXZnPW
|
|
|
2346
2346
|
mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py,sha256=xF4GjBmJVgw6c8VGVh-5QLN_4i_NKeoAzqRWmA_pfnw,2440
|
|
2347
2347
|
mteb/tasks/retrieval/multilingual/neu_clir2022_retrieval.py,sha256=bkGMvMxG2toYL98kv85BvVpSZ-rVeWvB5FFIzXhdPO4,2749
|
|
2348
2348
|
mteb/tasks/retrieval/multilingual/neu_clir2023_retrieval.py,sha256=0cALhuU3ZU5c_y7tDIyiMc7Onv-qC7YwfnimZVb8-rg,2793
|
|
2349
|
-
mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py,sha256=
|
|
2349
|
+
mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py,sha256=LJGpx4RkSJPXldN0SlMA6PbG1x8R2l-Hupc9q1xfleg,3667
|
|
2350
2350
|
mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py,sha256=Mmcvrt_1cIxPfHZfUzSURPZyaaweGiB02im1ZszlS6M,6837
|
|
2351
2351
|
mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py,sha256=iFUQUlO_ogBdQBVYBQW3o-AJDQ792yg1pJtRxA5I3Qo,3796
|
|
2352
2352
|
mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py,sha256=UduWKefwP7bPYxiDlztPEvSWXmTdw0xElglMbPY6XhA,4449
|
|
@@ -2554,9 +2554,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
|
|
|
2554
2554
|
mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
|
|
2555
2555
|
mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
|
|
2556
2556
|
mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
|
|
2557
|
-
mteb-2.1.
|
|
2558
|
-
mteb-2.1.
|
|
2559
|
-
mteb-2.1.
|
|
2560
|
-
mteb-2.1.
|
|
2561
|
-
mteb-2.1.
|
|
2562
|
-
mteb-2.1.
|
|
2557
|
+
mteb-2.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
2558
|
+
mteb-2.1.10.dist-info/METADATA,sha256=LClBepxtjXoGssnPn6QgdAukEqJerTX67OC7zoKhdiE,13574
|
|
2559
|
+
mteb-2.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
2560
|
+
mteb-2.1.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
|
|
2561
|
+
mteb-2.1.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
|
|
2562
|
+
mteb-2.1.10.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|