mteb 2.1.9__py3-none-any.whl → 2.1.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -46,10 +46,17 @@ class GeorgianFAQRetrieval(AbsTaskRetrieval):
46
46
  split=_EVAL_SPLIT,
47
47
  revision=self.metadata.dataset["revision"],
48
48
  )
49
- question_ids = {
50
- question: _id for _id, question in enumerate(set(data["question"]))
51
- }
52
- answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))}
49
+
50
+ question_ids = {}
51
+ answer_ids = {}
52
+
53
+ for row in data:
54
+ question = row["question"]
55
+ answer = row["answer"]
56
+ if question not in question_ids:
57
+ question_ids[question] = len(question_ids)
58
+ if answer not in answer_ids:
59
+ answer_ids[answer] = len(answer_ids)
53
60
 
54
61
  for row in data:
55
62
  question = row["question"]
@@ -230,10 +230,11 @@ class BelebeleRetrieval(AbsTaskRetrieval):
230
230
  ds_corpus = self.dataset[lang_corpus]
231
231
  ds_question = self.dataset[lang_question]
232
232
 
233
- question_ids = {
234
- question: _id
235
- for _id, question in enumerate(set(ds_question["question"]))
236
- }
233
+ question_ids = {}
234
+ for row in ds_question:
235
+ question = row["question"]
236
+ if question not in question_ids:
237
+ question_ids[question] = len(question_ids)
237
238
 
238
239
  link_to_context_id = {}
239
240
  context_idx = 0
@@ -32,10 +32,15 @@ def _load_publichealthqa_data(
32
32
  split=split,
33
33
  revision=revision,
34
34
  )
35
- question_ids = {
36
- question: _id for _id, question in enumerate(set(data["question"]))
37
- }
38
- answer_ids = {answer: _id for _id, answer in enumerate(set(data["answer"]))}
35
+
36
+ question_ids = {}
37
+ answer_ids = {}
38
+
39
+ for row in data:
40
+ if row["question"] is not None and row["question"] not in question_ids:
41
+ question_ids[row["question"]] = len(question_ids)
42
+ if row["answer"] is not None and row["answer"] not in answer_ids:
43
+ answer_ids[row["answer"]] = len(answer_ids)
39
44
 
40
45
  for row in data:
41
46
  if row["question"] is None or row["answer"] is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mteb
3
- Version: 2.1.9
3
+ Version: 2.1.10
4
4
  Summary: Massive Text Embedding Benchmark
5
5
  Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
6
6
  Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
@@ -2326,12 +2326,12 @@ mteb/tasks/retrieval/jpn/nlp_journal_abs_intro_retrieval.py,sha256=EEOQpTC6vEPUL
2326
2326
  mteb/tasks/retrieval/jpn/nlp_journal_title_abs_retrieval.py,sha256=JOOW_5pRKHzVn8wTOY0fhxLJ6Ns7wlQHoGHGIYVovAQ,3056
2327
2327
  mteb/tasks/retrieval/jpn/nlp_journal_title_intro_retrieval.py,sha256=aVFTFiANWrIz68FjHv9KBqlhpWlsmi9EAP052gECzaU,3078
2328
2328
  mteb/tasks/retrieval/kat/__init__.py,sha256=H4phkKqg_yZzkK7T62aCMBzjbGZzLKJ-MngrQlPbW3A,93
2329
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py,sha256=XKEmWaMuxJraXfMF4k7S8CFy3KdLUphad7z8Nz29jp4,2345
2329
+ mteb/tasks/retrieval/kat/georgian_faq_retrieval.py,sha256=4zyodSYCtHtBW9WKIGxFZaTXDrtHuaf3uyfIsDRGBqM,2494
2330
2330
  mteb/tasks/retrieval/kor/__init__.py,sha256=zNjAS2VRjeYX5u4vqev6dGOo_R3i9uSzxAsduZ0po4I,138
2331
2331
  mteb/tasks/retrieval/kor/auto_rag_retrieval.py,sha256=tgffW8zMpDSv1FCOdS4_4SL5zKQj70JVSt_RKs3CgKY,1576
2332
2332
  mteb/tasks/retrieval/kor/ko_strategy_qa.py,sha256=jk13ORetYtF0q36h8ljD6TeTHUwvK5F5ZbDoMCP3eWk,1156
2333
2333
  mteb/tasks/retrieval/multilingual/__init__.py,sha256=mfVGkoB4DO5ktlg8ia-4nImFVmZcqXh1XkgCkIff0tY,6765
2334
- mteb/tasks/retrieval/multilingual/belebele_retrieval.py,sha256=AEShsRxcoCNC2C02dj-go6eFr008NLZ2cnebAmdo4Sk,6956
2334
+ mteb/tasks/retrieval/multilingual/belebele_retrieval.py,sha256=gaVLEwuLEwMutMi9V-obpiYKbpllX2QNm2j3MVeebfE,7027
2335
2335
  mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py,sha256=_6r34ZvRiLVENYcrd87NjilybGaetBwKFEbO29zYmBU,4676
2336
2336
  mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py,sha256=Puy0PjpRr4M_Bbxdl7oWfa7pQGM04zaRaTNlnhyKejM,4677
2337
2337
  mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py,sha256=dwzo2sqjamM_xkSiC-jbapyhDFezSJpM4S8KfBsuLPk,4562
@@ -2346,7 +2346,7 @@ mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py,sha256=3uGnj3O92_02zXZnPW
2346
2346
  mteb/tasks/retrieval/multilingual/multi_long_doc_retrieval.py,sha256=xF4GjBmJVgw6c8VGVh-5QLN_4i_NKeoAzqRWmA_pfnw,2440
2347
2347
  mteb/tasks/retrieval/multilingual/neu_clir2022_retrieval.py,sha256=bkGMvMxG2toYL98kv85BvVpSZ-rVeWvB5FFIzXhdPO4,2749
2348
2348
  mteb/tasks/retrieval/multilingual/neu_clir2023_retrieval.py,sha256=0cALhuU3ZU5c_y7tDIyiMc7Onv-qC7YwfnimZVb8-rg,2793
2349
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py,sha256=CGOTbGuwYNrzbefB76QY88fnb8CdJkN9bPhAssNtLvA,3502
2349
+ mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py,sha256=LJGpx4RkSJPXldN0SlMA6PbG1x8R2l-Hupc9q1xfleg,3667
2350
2350
  mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py,sha256=Mmcvrt_1cIxPfHZfUzSURPZyaaweGiB02im1ZszlS6M,6837
2351
2351
  mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py,sha256=iFUQUlO_ogBdQBVYBQW3o-AJDQ792yg1pJtRxA5I3Qo,3796
2352
2352
  mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py,sha256=UduWKefwP7bPYxiDlztPEvSWXmTdw0xElglMbPY6XhA,4449
@@ -2554,9 +2554,9 @@ mteb/types/_metadata.py,sha256=NN-W0S6a5TDV7UkpRx1pyWtGF4TyyCyoPUfHOwdeci8,2290
2554
2554
  mteb/types/_result.py,sha256=CRAUc5IvqI3_9SyXDwv-PWLCXwXdZem9RePeYESRtuw,996
2555
2555
  mteb/types/_string_validators.py,sha256=PY-dYq4E8O50VS3bLYdldPWp400fl_WzUjfVSkNWe8U,523
2556
2556
  mteb/types/statistics.py,sha256=YwJsxTf1eaCI_RE-J37a-gK5wDeGAsmkeZKoZCFihSo,3755
2557
- mteb-2.1.9.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2558
- mteb-2.1.9.dist-info/METADATA,sha256=yUOXi6O_wkyskXKnHDcDB6SqLZg5Q5Nc_a_qK7Pngpc,13573
2559
- mteb-2.1.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2560
- mteb-2.1.9.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2561
- mteb-2.1.9.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2562
- mteb-2.1.9.dist-info/RECORD,,
2557
+ mteb-2.1.10.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
2558
+ mteb-2.1.10.dist-info/METADATA,sha256=LClBepxtjXoGssnPn6QgdAukEqJerTX67OC7zoKhdiE,13574
2559
+ mteb-2.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2560
+ mteb-2.1.10.dist-info/entry_points.txt,sha256=8IJoEJFKoDHmVnNev-qJ9pp4Ln7_1-ma9QsXnzVCzGU,39
2561
+ mteb-2.1.10.dist-info/top_level.txt,sha256=OLVIjcQAlWBz0bdmutKlWHLF42FF0hp4uVAg3ZyiG4U,5
2562
+ mteb-2.1.10.dist-info/RECORD,,
File without changes