mteb 2.7.20__py3-none-any.whl → 2.7.22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/regression.py +0 -1
- mteb/models/model_implementations/{nvidia_llama_nemoretriever_colemb.py → nvidia_nemotron_colembed_vl.py} +46 -20
- mteb/tasks/clustering/deu/ten_k_gnad_clustering_p2p.py +9 -7
- mteb/tasks/clustering/deu/ten_k_gnad_clustering_s2s.py +13 -11
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nob/vg_clustering.py +1 -1
- mteb/tasks/clustering/rom/romani_bible_clustering.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/multilingual/indic_xnli_pair_classification.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +2 -2
- mteb/tasks/retrieval/eng/cqa_dupstack_android_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_english_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_gaming_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_gis_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_mathematica_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_physics_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_programmers_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_stats_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_tex_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_unix_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_webmasters_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/cqa_dupstack_wordpress_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/msmarc_ov2_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/msmarco_retrieval.py +2 -2
- mteb/tasks/retrieval/eng/nf_corpus_retrieval.py +6 -6
- mteb/tasks/retrieval/eng/nq_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/quora_retrieval.py +1 -1
- mteb/tasks/retrieval/fas/beir_fa.py +43 -36
- mteb/tasks/retrieval/fas/fa_mteb_retrieval.py +12 -5
- mteb/tasks/retrieval/fra/alloprof_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/pol/fi_qapl_retrieval.py +1 -1
- mteb/tasks/retrieval/pol/nqpl_retrieval.py +5 -5
- mteb/tasks/sts/eng/biosses_sts.py +1 -1
- mteb/tasks/sts/eng/humests_benchmark.py +1 -1
- mteb/tasks/sts/eng/sts_benchmark_sts.py +1 -1
- mteb/tasks/sts/fin/fin_para_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/ron/ron_sts.py +1 -1
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/METADATA +11 -1
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/RECORD +49 -49
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/WHEEL +0 -0
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/entry_points.txt +0 -0
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.7.20.dist-info → mteb-2.7.22.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
+
fa_mteb = """@article{zinvandi2025famteb,
|
|
5
|
+
author = {Zinvandi, Erfan and Alikhani, Morteza and Sarmadi, Mehran and Pourbahman, Zahra and Arvin, Sepehr and Kazemi, Reza and Amini, Arash},
|
|
6
|
+
journal = {arXiv preprint arXiv:2502.11571},
|
|
7
|
+
title = {Famteb: Massive text embedding benchmark in persian language},
|
|
8
|
+
year = {2025},
|
|
9
|
+
}"""
|
|
10
|
+
|
|
4
11
|
|
|
5
12
|
class ArguAnaFa(AbsTaskRetrieval):
|
|
6
13
|
ignore_identical_ids = True
|
|
@@ -25,7 +32,7 @@ class ArguAnaFa(AbsTaskRetrieval):
|
|
|
25
32
|
annotations_creators="derived",
|
|
26
33
|
dialect=[],
|
|
27
34
|
sample_creation="found",
|
|
28
|
-
bibtex_citation=
|
|
35
|
+
bibtex_citation=fa_mteb,
|
|
29
36
|
adapted_from=["ArguAna"],
|
|
30
37
|
superseded_by="ArguAna-Fa.v2",
|
|
31
38
|
)
|
|
@@ -54,7 +61,7 @@ class ArguAnaFaV2(AbsTaskRetrieval):
|
|
|
54
61
|
annotations_creators="derived",
|
|
55
62
|
dialect=[],
|
|
56
63
|
sample_creation="found",
|
|
57
|
-
bibtex_citation=
|
|
64
|
+
bibtex_citation=fa_mteb,
|
|
58
65
|
adapted_from=["ArguAna"],
|
|
59
66
|
)
|
|
60
67
|
|
|
@@ -81,7 +88,7 @@ class ClimateFEVERFa(AbsTaskRetrieval):
|
|
|
81
88
|
annotations_creators="derived",
|
|
82
89
|
dialect=[],
|
|
83
90
|
sample_creation="found",
|
|
84
|
-
bibtex_citation=
|
|
91
|
+
bibtex_citation=fa_mteb,
|
|
85
92
|
adapted_from=["ClimateFEVER"],
|
|
86
93
|
)
|
|
87
94
|
|
|
@@ -108,7 +115,7 @@ class CQADupstackAndroidRetrievalFa(AbsTaskRetrieval):
|
|
|
108
115
|
annotations_creators="derived",
|
|
109
116
|
dialect=[],
|
|
110
117
|
sample_creation="found",
|
|
111
|
-
bibtex_citation=
|
|
118
|
+
bibtex_citation=fa_mteb,
|
|
112
119
|
adapted_from=["CQADupstackAndroidRetrieval"],
|
|
113
120
|
)
|
|
114
121
|
|
|
@@ -135,7 +142,7 @@ class CQADupstackEnglishRetrievalFa(AbsTaskRetrieval):
|
|
|
135
142
|
annotations_creators="derived",
|
|
136
143
|
dialect=[],
|
|
137
144
|
sample_creation="found",
|
|
138
|
-
bibtex_citation=
|
|
145
|
+
bibtex_citation=fa_mteb,
|
|
139
146
|
adapted_from=["CQADupstackEnglishRetrieval"],
|
|
140
147
|
)
|
|
141
148
|
|
|
@@ -162,7 +169,7 @@ class CQADupstackGamingRetrievalFa(AbsTaskRetrieval):
|
|
|
162
169
|
annotations_creators="derived",
|
|
163
170
|
dialect=[],
|
|
164
171
|
sample_creation="found",
|
|
165
|
-
bibtex_citation=
|
|
172
|
+
bibtex_citation=fa_mteb,
|
|
166
173
|
adapted_from=["CQADupstackGamingRetrieval"],
|
|
167
174
|
)
|
|
168
175
|
|
|
@@ -189,7 +196,7 @@ class CQADupstackGisRetrievalFa(AbsTaskRetrieval):
|
|
|
189
196
|
annotations_creators="derived",
|
|
190
197
|
dialect=[],
|
|
191
198
|
sample_creation="found",
|
|
192
|
-
bibtex_citation=
|
|
199
|
+
bibtex_citation=fa_mteb,
|
|
193
200
|
adapted_from=["CQADupstackGisRetrieval"],
|
|
194
201
|
)
|
|
195
202
|
|
|
@@ -216,7 +223,7 @@ class CQADupstackMathematicaRetrievalFa(AbsTaskRetrieval):
|
|
|
216
223
|
annotations_creators="derived",
|
|
217
224
|
dialect=[],
|
|
218
225
|
sample_creation="found",
|
|
219
|
-
bibtex_citation=
|
|
226
|
+
bibtex_citation=fa_mteb,
|
|
220
227
|
adapted_from=["CQADupstackMathematicaRetrieval"],
|
|
221
228
|
)
|
|
222
229
|
|
|
@@ -243,7 +250,7 @@ class CQADupstackPhysicsRetrievalFa(AbsTaskRetrieval):
|
|
|
243
250
|
annotations_creators="derived",
|
|
244
251
|
dialect=[],
|
|
245
252
|
sample_creation="found",
|
|
246
|
-
bibtex_citation=
|
|
253
|
+
bibtex_citation=fa_mteb,
|
|
247
254
|
adapted_from=["CQADupstackPhysicsRetrieval"],
|
|
248
255
|
)
|
|
249
256
|
|
|
@@ -270,7 +277,7 @@ class CQADupstackProgrammersRetrievalFa(AbsTaskRetrieval):
|
|
|
270
277
|
annotations_creators="derived",
|
|
271
278
|
dialect=[],
|
|
272
279
|
sample_creation="found",
|
|
273
|
-
bibtex_citation=
|
|
280
|
+
bibtex_citation=fa_mteb,
|
|
274
281
|
adapted_from=["CQADupstackProgrammersRetrieval"],
|
|
275
282
|
)
|
|
276
283
|
|
|
@@ -297,7 +304,7 @@ class CQADupstackStatsRetrievalFa(AbsTaskRetrieval):
|
|
|
297
304
|
annotations_creators="derived",
|
|
298
305
|
dialect=[],
|
|
299
306
|
sample_creation="found",
|
|
300
|
-
bibtex_citation=
|
|
307
|
+
bibtex_citation=fa_mteb,
|
|
301
308
|
adapted_from=["CQADupstackStatsRetrieval"],
|
|
302
309
|
)
|
|
303
310
|
|
|
@@ -324,7 +331,7 @@ class CQADupstackTexRetrievalFa(AbsTaskRetrieval):
|
|
|
324
331
|
annotations_creators="derived",
|
|
325
332
|
dialect=[],
|
|
326
333
|
sample_creation="found",
|
|
327
|
-
bibtex_citation=
|
|
334
|
+
bibtex_citation=fa_mteb,
|
|
328
335
|
adapted_from=["CQADupstackTexRetrieval"],
|
|
329
336
|
)
|
|
330
337
|
|
|
@@ -351,7 +358,7 @@ class CQADupstackUnixRetrievalFa(AbsTaskRetrieval):
|
|
|
351
358
|
annotations_creators="derived",
|
|
352
359
|
dialect=[],
|
|
353
360
|
sample_creation="found",
|
|
354
|
-
bibtex_citation=
|
|
361
|
+
bibtex_citation=fa_mteb,
|
|
355
362
|
adapted_from=["CQADupstackUnixRetrieval"],
|
|
356
363
|
)
|
|
357
364
|
|
|
@@ -378,7 +385,7 @@ class CQADupstackWebmastersRetrievalFa(AbsTaskRetrieval):
|
|
|
378
385
|
annotations_creators="derived",
|
|
379
386
|
dialect=[],
|
|
380
387
|
sample_creation="found",
|
|
381
|
-
bibtex_citation=
|
|
388
|
+
bibtex_citation=fa_mteb,
|
|
382
389
|
adapted_from=["CQADupstackWebmastersRetrieval"],
|
|
383
390
|
)
|
|
384
391
|
|
|
@@ -405,7 +412,7 @@ class CQADupstackWordpressRetrievalFa(AbsTaskRetrieval):
|
|
|
405
412
|
annotations_creators="derived",
|
|
406
413
|
dialect=[],
|
|
407
414
|
sample_creation="found",
|
|
408
|
-
bibtex_citation=
|
|
415
|
+
bibtex_citation=fa_mteb,
|
|
409
416
|
adapted_from=["CQADupstackWordpressRetrieval"],
|
|
410
417
|
)
|
|
411
418
|
|
|
@@ -432,7 +439,7 @@ class DBPediaFa(AbsTaskRetrieval):
|
|
|
432
439
|
annotations_creators="derived",
|
|
433
440
|
dialect=[],
|
|
434
441
|
sample_creation="found",
|
|
435
|
-
bibtex_citation=
|
|
442
|
+
bibtex_citation=fa_mteb,
|
|
436
443
|
adapted_from=["DBPedia"],
|
|
437
444
|
)
|
|
438
445
|
|
|
@@ -460,7 +467,7 @@ class FiQA2018Fa(AbsTaskRetrieval):
|
|
|
460
467
|
annotations_creators="derived",
|
|
461
468
|
dialect=[],
|
|
462
469
|
sample_creation="found",
|
|
463
|
-
bibtex_citation=
|
|
470
|
+
bibtex_citation=fa_mteb,
|
|
464
471
|
adapted_from=["FiQA2018"],
|
|
465
472
|
superseded_by="FiQA2018-Fa.v2",
|
|
466
473
|
)
|
|
@@ -490,7 +497,7 @@ class FiQA2018FaV2(AbsTaskRetrieval):
|
|
|
490
497
|
annotations_creators="derived",
|
|
491
498
|
dialect=[],
|
|
492
499
|
sample_creation="found",
|
|
493
|
-
bibtex_citation=
|
|
500
|
+
bibtex_citation=fa_mteb,
|
|
494
501
|
adapted_from=["FiQA2018"],
|
|
495
502
|
)
|
|
496
503
|
|
|
@@ -517,7 +524,7 @@ class HotpotQAFa(AbsTaskRetrieval):
|
|
|
517
524
|
annotations_creators="derived",
|
|
518
525
|
dialect=[],
|
|
519
526
|
sample_creation="found",
|
|
520
|
-
bibtex_citation=
|
|
527
|
+
bibtex_citation=fa_mteb,
|
|
521
528
|
adapted_from=["HotpotQA"],
|
|
522
529
|
)
|
|
523
530
|
|
|
@@ -546,7 +553,7 @@ class MSMARCOFa(AbsTaskRetrieval):
|
|
|
546
553
|
annotations_creators="derived",
|
|
547
554
|
dialect=[],
|
|
548
555
|
sample_creation="found",
|
|
549
|
-
bibtex_citation=
|
|
556
|
+
bibtex_citation=fa_mteb,
|
|
550
557
|
adapted_from=["MSMARCO"],
|
|
551
558
|
)
|
|
552
559
|
|
|
@@ -573,7 +580,7 @@ class NFCorpusFa(AbsTaskRetrieval):
|
|
|
573
580
|
annotations_creators="derived",
|
|
574
581
|
dialect=[],
|
|
575
582
|
sample_creation="found",
|
|
576
|
-
bibtex_citation=
|
|
583
|
+
bibtex_citation=fa_mteb,
|
|
577
584
|
adapted_from=["NFCorpus"],
|
|
578
585
|
)
|
|
579
586
|
|
|
@@ -600,7 +607,7 @@ class NQFa(AbsTaskRetrieval):
|
|
|
600
607
|
annotations_creators="derived",
|
|
601
608
|
dialect=[],
|
|
602
609
|
sample_creation="found",
|
|
603
|
-
bibtex_citation=
|
|
610
|
+
bibtex_citation=fa_mteb,
|
|
604
611
|
adapted_from=["NQ"],
|
|
605
612
|
)
|
|
606
613
|
|
|
@@ -629,7 +636,7 @@ class QuoraRetrievalFa(AbsTaskRetrieval):
|
|
|
629
636
|
annotations_creators="derived",
|
|
630
637
|
dialect=[],
|
|
631
638
|
sample_creation="found",
|
|
632
|
-
bibtex_citation=
|
|
639
|
+
bibtex_citation=fa_mteb,
|
|
633
640
|
adapted_from=["QuoraRetrieval"],
|
|
634
641
|
superseded_by="QuoraRetrieval-Fa.v2",
|
|
635
642
|
)
|
|
@@ -659,7 +666,7 @@ class QuoraRetrievalFaV2(AbsTaskRetrieval):
|
|
|
659
666
|
annotations_creators="derived",
|
|
660
667
|
dialect=[],
|
|
661
668
|
sample_creation="found",
|
|
662
|
-
bibtex_citation=
|
|
669
|
+
bibtex_citation=fa_mteb,
|
|
663
670
|
adapted_from=["QuoraRetrieval"],
|
|
664
671
|
)
|
|
665
672
|
|
|
@@ -686,7 +693,7 @@ class SCIDOCSFa(AbsTaskRetrieval):
|
|
|
686
693
|
annotations_creators="derived",
|
|
687
694
|
dialect=[],
|
|
688
695
|
sample_creation="found",
|
|
689
|
-
bibtex_citation=
|
|
696
|
+
bibtex_citation=fa_mteb,
|
|
690
697
|
adapted_from=["SCIDOCS"],
|
|
691
698
|
superseded_by="SCIDOCS-Fa.v2",
|
|
692
699
|
)
|
|
@@ -714,7 +721,7 @@ class SCIDOCSFaV2(AbsTaskRetrieval):
|
|
|
714
721
|
annotations_creators="derived",
|
|
715
722
|
dialect=[],
|
|
716
723
|
sample_creation="found",
|
|
717
|
-
bibtex_citation=
|
|
724
|
+
bibtex_citation=fa_mteb,
|
|
718
725
|
adapted_from=["SCIDOCS"],
|
|
719
726
|
)
|
|
720
727
|
|
|
@@ -741,7 +748,7 @@ class SciFactFa(AbsTaskRetrieval):
|
|
|
741
748
|
annotations_creators="derived",
|
|
742
749
|
dialect=[],
|
|
743
750
|
sample_creation="found",
|
|
744
|
-
bibtex_citation=
|
|
751
|
+
bibtex_citation=fa_mteb,
|
|
745
752
|
adapted_from=["SciFact"],
|
|
746
753
|
superseded_by="SciFact-Fa.v2",
|
|
747
754
|
)
|
|
@@ -769,7 +776,7 @@ class SciFactFaV2(AbsTaskRetrieval):
|
|
|
769
776
|
annotations_creators="derived",
|
|
770
777
|
dialect=[],
|
|
771
778
|
sample_creation="found",
|
|
772
|
-
bibtex_citation=
|
|
779
|
+
bibtex_citation=fa_mteb,
|
|
773
780
|
adapted_from=["SciFact"],
|
|
774
781
|
)
|
|
775
782
|
|
|
@@ -796,7 +803,7 @@ class TRECCOVIDFa(AbsTaskRetrieval):
|
|
|
796
803
|
annotations_creators="derived",
|
|
797
804
|
dialect=[],
|
|
798
805
|
sample_creation="found",
|
|
799
|
-
bibtex_citation=
|
|
806
|
+
bibtex_citation=fa_mteb,
|
|
800
807
|
adapted_from=["TRECCOVID"],
|
|
801
808
|
superseded_by="TRECCOVID-Fa.v2",
|
|
802
809
|
)
|
|
@@ -824,7 +831,7 @@ class TRECCOVIDFaV2(AbsTaskRetrieval):
|
|
|
824
831
|
annotations_creators="derived",
|
|
825
832
|
dialect=[],
|
|
826
833
|
sample_creation="found",
|
|
827
|
-
bibtex_citation=
|
|
834
|
+
bibtex_citation=fa_mteb,
|
|
828
835
|
adapted_from=["TRECCOVID"],
|
|
829
836
|
)
|
|
830
837
|
|
|
@@ -851,7 +858,7 @@ class Touche2020Fa(AbsTaskRetrieval):
|
|
|
851
858
|
annotations_creators="derived",
|
|
852
859
|
dialect=[],
|
|
853
860
|
sample_creation="found",
|
|
854
|
-
bibtex_citation=
|
|
861
|
+
bibtex_citation=fa_mteb,
|
|
855
862
|
adapted_from=["Touche2020"],
|
|
856
863
|
superseded_by="Touche2020-Fa.v2",
|
|
857
864
|
)
|
|
@@ -879,7 +886,7 @@ class Touche2020FaV2(AbsTaskRetrieval):
|
|
|
879
886
|
annotations_creators="derived",
|
|
880
887
|
dialect=[],
|
|
881
888
|
sample_creation="found",
|
|
882
|
-
bibtex_citation=
|
|
889
|
+
bibtex_citation=fa_mteb,
|
|
883
890
|
adapted_from=["Touche2020"],
|
|
884
891
|
)
|
|
885
892
|
|
|
@@ -906,7 +913,7 @@ class HotpotQAFaHardNegatives(AbsTaskRetrieval):
|
|
|
906
913
|
annotations_creators="derived",
|
|
907
914
|
dialect=[],
|
|
908
915
|
sample_creation="found",
|
|
909
|
-
bibtex_citation=
|
|
916
|
+
bibtex_citation=fa_mteb,
|
|
910
917
|
adapted_from=["HotpotQA"],
|
|
911
918
|
)
|
|
912
919
|
|
|
@@ -935,7 +942,7 @@ class MSMARCOFaHardNegatives(AbsTaskRetrieval):
|
|
|
935
942
|
annotations_creators="derived",
|
|
936
943
|
dialect=[],
|
|
937
944
|
sample_creation="found",
|
|
938
|
-
bibtex_citation=
|
|
945
|
+
bibtex_citation=fa_mteb,
|
|
939
946
|
adapted_from=["MSMARCO"],
|
|
940
947
|
)
|
|
941
948
|
|
|
@@ -962,7 +969,7 @@ class NQFaHardNegatives(AbsTaskRetrieval):
|
|
|
962
969
|
annotations_creators="derived",
|
|
963
970
|
dialect=[],
|
|
964
971
|
sample_creation="found",
|
|
965
|
-
bibtex_citation=
|
|
972
|
+
bibtex_citation=fa_mteb,
|
|
966
973
|
adapted_from=["NQ"],
|
|
967
974
|
)
|
|
968
975
|
|
|
@@ -991,6 +998,6 @@ class FEVERFaHardNegatives(AbsTaskRetrieval):
|
|
|
991
998
|
annotations_creators="human-annotated",
|
|
992
999
|
dialect=[],
|
|
993
1000
|
sample_creation="found",
|
|
994
|
-
bibtex_citation=
|
|
1001
|
+
bibtex_citation=fa_mteb,
|
|
995
1002
|
adapted_from=["FEVER"],
|
|
996
1003
|
)
|
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
2
|
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
3
|
|
|
4
|
+
fa_mteb = """@article{zinvandi2025famteb,
|
|
5
|
+
author = {Zinvandi, Erfan and Alikhani, Morteza and Sarmadi, Mehran and Pourbahman, Zahra and Arvin, Sepehr and Kazemi, Reza and Amini, Arash},
|
|
6
|
+
journal = {arXiv preprint arXiv:2502.11571},
|
|
7
|
+
title = {Famteb: Massive text embedding benchmark in persian language},
|
|
8
|
+
year = {2025},
|
|
9
|
+
}"""
|
|
10
|
+
|
|
4
11
|
|
|
5
12
|
class SynPerQARetrieval(AbsTaskRetrieval):
|
|
6
13
|
ignore_identical_ids = True
|
|
@@ -25,7 +32,7 @@ class SynPerQARetrieval(AbsTaskRetrieval):
|
|
|
25
32
|
annotations_creators="LM-generated",
|
|
26
33
|
dialect=[],
|
|
27
34
|
sample_creation="LM-generated and verified",
|
|
28
|
-
bibtex_citation=
|
|
35
|
+
bibtex_citation=fa_mteb,
|
|
29
36
|
)
|
|
30
37
|
|
|
31
38
|
|
|
@@ -52,7 +59,7 @@ class SynPerChatbotTopicsRetrieval(AbsTaskRetrieval):
|
|
|
52
59
|
annotations_creators="LM-generated",
|
|
53
60
|
dialect=[],
|
|
54
61
|
sample_creation="LM-generated and verified",
|
|
55
|
-
bibtex_citation=
|
|
62
|
+
bibtex_citation=fa_mteb,
|
|
56
63
|
)
|
|
57
64
|
|
|
58
65
|
|
|
@@ -79,7 +86,7 @@ class SynPerChatbotRAGTopicsRetrieval(AbsTaskRetrieval):
|
|
|
79
86
|
annotations_creators="LM-generated",
|
|
80
87
|
dialect=[],
|
|
81
88
|
sample_creation="LM-generated and verified",
|
|
82
|
-
bibtex_citation=
|
|
89
|
+
bibtex_citation=fa_mteb,
|
|
83
90
|
)
|
|
84
91
|
|
|
85
92
|
|
|
@@ -106,7 +113,7 @@ class SynPerChatbotRAGFAQRetrieval(AbsTaskRetrieval):
|
|
|
106
113
|
annotations_creators="LM-generated",
|
|
107
114
|
dialect=[],
|
|
108
115
|
sample_creation="LM-generated and verified",
|
|
109
|
-
bibtex_citation=
|
|
116
|
+
bibtex_citation=fa_mteb,
|
|
110
117
|
)
|
|
111
118
|
|
|
112
119
|
|
|
@@ -133,5 +140,5 @@ class PersianWebDocumentRetrieval(AbsTaskRetrieval):
|
|
|
133
140
|
annotations_creators="derived",
|
|
134
141
|
dialect=[],
|
|
135
142
|
sample_creation="found",
|
|
136
|
-
bibtex_citation=
|
|
143
|
+
bibtex_citation=fa_mteb,
|
|
137
144
|
)
|
|
@@ -17,7 +17,7 @@ class AlloprofRetrieval(AbsTaskRetrieval):
|
|
|
17
17
|
eval_splits=["test"],
|
|
18
18
|
eval_langs=["fra-Latn"],
|
|
19
19
|
main_score="ndcg_at_10",
|
|
20
|
-
date=
|
|
20
|
+
date=("2023-02-01", "2023-02-28"), # publication year
|
|
21
21
|
domains=["Encyclopaedic", "Written"],
|
|
22
22
|
task_subtypes=[],
|
|
23
23
|
license="cc-by-nc-sa-4.0",
|
|
@@ -19,7 +19,7 @@ class SyntecRetrieval(AbsTaskRetrieval):
|
|
|
19
19
|
eval_splits=["test"],
|
|
20
20
|
eval_langs=["fra-Latn"],
|
|
21
21
|
main_score="ndcg_at_10",
|
|
22
|
-
date=
|
|
22
|
+
date=("2024-01-01", "2024-12-31"), # publication year
|
|
23
23
|
domains=["Legal", "Written"],
|
|
24
24
|
task_subtypes=[],
|
|
25
25
|
license="not specified",
|
|
@@ -19,7 +19,7 @@ class FiQAPLRetrieval(AbsTaskRetrieval):
|
|
|
19
19
|
eval_splits=["test"],
|
|
20
20
|
eval_langs=["pol-Latn"],
|
|
21
21
|
main_score="ndcg_at_10",
|
|
22
|
-
date=
|
|
22
|
+
date=("2018-01-01", "2018-12-31"), # publication year
|
|
23
23
|
domains=["Written", "Financial"],
|
|
24
24
|
task_subtypes=["Question answering"],
|
|
25
25
|
license="not specified",
|
|
@@ -53,11 +53,11 @@ class NQPLHardNegatives(AbsTaskRetrieval):
|
|
|
53
53
|
eval_splits=["test"],
|
|
54
54
|
eval_langs=["pol-Latn"],
|
|
55
55
|
main_score="ndcg_at_10",
|
|
56
|
-
date=
|
|
57
|
-
domains=
|
|
58
|
-
task_subtypes=
|
|
59
|
-
license=
|
|
60
|
-
annotations_creators=
|
|
56
|
+
date=("2019-01-01", "2019-12-31"), # original publication year
|
|
57
|
+
domains=["Written", "Encyclopaedic"],
|
|
58
|
+
task_subtypes=["Question answering"],
|
|
59
|
+
license="cc-by-nc-sa-3.0", # original license
|
|
60
|
+
annotations_creators="human-annotated", # original annotation method
|
|
61
61
|
dialect=[],
|
|
62
62
|
sample_creation="machine-translated",
|
|
63
63
|
bibtex_citation=r"""
|
|
@@ -17,7 +17,7 @@ class HUMESTSBenchmark(AbsTaskSTS):
|
|
|
17
17
|
eval_splits=["test"],
|
|
18
18
|
eval_langs=["eng-Latn"],
|
|
19
19
|
main_score="cosine_spearman",
|
|
20
|
-
date=
|
|
20
|
+
date=("2021-01-01", "2021-12-31"), # publication year
|
|
21
21
|
domains=["Blog", "News", "Written"],
|
|
22
22
|
task_subtypes=[],
|
|
23
23
|
license="not specified",
|
|
@@ -20,7 +20,7 @@ class STSBenchmarkSTS(AbsTaskSTS):
|
|
|
20
20
|
eval_splits=["test"],
|
|
21
21
|
eval_langs=["eng-Latn"],
|
|
22
22
|
main_score="cosine_spearman",
|
|
23
|
-
date=
|
|
23
|
+
date=("2021-01-01", "2021-12-31"), # publication year
|
|
24
24
|
domains=["Blog", "News", "Written"],
|
|
25
25
|
task_subtypes=[],
|
|
26
26
|
license="not specified",
|
|
@@ -21,7 +21,7 @@ class FinParaSTS(AbsTaskSTS):
|
|
|
21
21
|
main_score="cosine_spearman",
|
|
22
22
|
date=("2017-01-01", "2021-12-31"),
|
|
23
23
|
domains=["News", "Subtitles", "Written"],
|
|
24
|
-
task_subtypes=
|
|
24
|
+
task_subtypes=[],
|
|
25
25
|
license="cc-by-sa-4.0",
|
|
26
26
|
annotations_creators="expert-annotated",
|
|
27
27
|
dialect=[],
|
mteb/tasks/sts/kor/klue_sts.py
CHANGED
|
@@ -20,7 +20,7 @@ class KlueSTS(AbsTaskSTS):
|
|
|
20
20
|
main_score="cosine_spearman",
|
|
21
21
|
date=("2011-01-01", "2021-11-02"), # rough estimate,
|
|
22
22
|
domains=["Reviews", "News", "Spoken", "Written", "Spoken"],
|
|
23
|
-
task_subtypes=
|
|
23
|
+
task_subtypes=[],
|
|
24
24
|
license="cc-by-sa-4.0",
|
|
25
25
|
annotations_creators="human-annotated",
|
|
26
26
|
dialect=[],
|
mteb/tasks/sts/ron/ron_sts.py
CHANGED
|
@@ -19,7 +19,7 @@ class RonSTS(AbsTaskSTS):
|
|
|
19
19
|
main_score="cosine_spearman",
|
|
20
20
|
date=("2020-01-01", "2021-01-31"),
|
|
21
21
|
domains=["News", "Social", "Web", "Written"], # web for image captions
|
|
22
|
-
task_subtypes=
|
|
22
|
+
task_subtypes=[],
|
|
23
23
|
license="cc-by-4.0", # not specified
|
|
24
24
|
annotations_creators="human-annotated",
|
|
25
25
|
dialect=[],
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.22
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -104,6 +104,16 @@ Requires-Dist: tencentcloud-sdk-python-common>=3.0.1454; extra == "youtu"
|
|
|
104
104
|
Requires-Dist: tencentcloud-sdk-python-lkeap>=3.0.1451; extra == "youtu"
|
|
105
105
|
Provides-Extra: llama-embed-nemotron
|
|
106
106
|
Requires-Dist: transformers==4.51.0; extra == "llama-embed-nemotron"
|
|
107
|
+
Provides-Extra: llama-nemotron-colembed-vl
|
|
108
|
+
Requires-Dist: transformers[torch]==4.49.0; extra == "llama-nemotron-colembed-vl"
|
|
109
|
+
Requires-Dist: torchvision>=0.22.0; extra == "llama-nemotron-colembed-vl"
|
|
110
|
+
Requires-Dist: flash-attn>=2.6.3; extra == "llama-nemotron-colembed-vl"
|
|
111
|
+
Requires-Dist: accelerate; extra == "llama-nemotron-colembed-vl"
|
|
112
|
+
Provides-Extra: nemotron-colembed-vl-v2
|
|
113
|
+
Requires-Dist: transformers[torch]==5.0.0rc0; extra == "nemotron-colembed-vl-v2"
|
|
114
|
+
Requires-Dist: torchvision>=0.22.0; extra == "nemotron-colembed-vl-v2"
|
|
115
|
+
Requires-Dist: flash-attn>=2.6.3; extra == "nemotron-colembed-vl-v2"
|
|
116
|
+
Requires-Dist: accelerate; extra == "nemotron-colembed-vl-v2"
|
|
107
117
|
Provides-Extra: faiss-cpu
|
|
108
118
|
Requires-Dist: faiss-cpu>=1.12.0; extra == "faiss-cpu"
|
|
109
119
|
Provides-Extra: eager-embed
|