mteb 2.5.2__py3-none-any.whl → 2.7.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/__init__.py +2 -0
- mteb/_create_dataloaders.py +78 -30
- mteb/_evaluators/any_sts_evaluator.py +13 -6
- mteb/_evaluators/clustering_evaluator.py +13 -5
- mteb/_evaluators/evaluator.py +12 -4
- mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +22 -11
- mteb/_evaluators/pair_classification_evaluator.py +17 -7
- mteb/_evaluators/retrieval_evaluator.py +23 -14
- mteb/_evaluators/retrieval_metrics.py +26 -19
- mteb/_evaluators/sklearn_evaluator.py +27 -17
- mteb/_evaluators/text/bitext_mining_evaluator.py +36 -20
- mteb/_evaluators/text/summarization_evaluator.py +31 -20
- mteb/_evaluators/zeroshot_classification_evaluator.py +16 -5
- mteb/_helpful_enum.py +5 -1
- mteb/abstasks/_data_filter/filters.py +9 -3
- mteb/abstasks/_data_filter/task_pipelines.py +10 -2
- mteb/abstasks/_statistics_calculation.py +21 -11
- mteb/abstasks/_stratification.py +18 -18
- mteb/abstasks/abstask.py +78 -44
- mteb/abstasks/aggregate_task_metadata.py +21 -18
- mteb/abstasks/aggregated_task.py +23 -35
- mteb/abstasks/classification.py +39 -18
- mteb/abstasks/clustering.py +37 -20
- mteb/abstasks/clustering_legacy.py +30 -16
- mteb/abstasks/image/image_text_pair_classification.py +26 -9
- mteb/abstasks/multilabel_classification.py +33 -21
- mteb/abstasks/pair_classification.py +44 -19
- mteb/abstasks/regression.py +18 -10
- mteb/abstasks/retrieval.py +82 -52
- mteb/abstasks/retrieval_dataset_loaders.py +50 -39
- mteb/abstasks/sts.py +34 -15
- mteb/abstasks/task_metadata.py +44 -37
- mteb/abstasks/text/bitext_mining.py +57 -35
- mteb/abstasks/text/reranking.py +10 -8
- mteb/abstasks/text/summarization.py +26 -10
- mteb/abstasks/zeroshot_classification.py +27 -9
- mteb/benchmarks/_create_table.py +13 -7
- mteb/benchmarks/benchmark.py +15 -3
- mteb/benchmarks/benchmarks/__init__.py +6 -0
- mteb/benchmarks/benchmarks/benchmarks.py +153 -13
- mteb/benchmarks/benchmarks/rteb_benchmarks.py +20 -9
- mteb/benchmarks/get_benchmark.py +14 -55
- mteb/cache.py +189 -31
- mteb/cli/_display_tasks.py +10 -4
- mteb/cli/build_cli.py +112 -13
- mteb/cli/generate_model_card.py +50 -23
- mteb/deprecated_evaluator.py +72 -54
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2CybersecurityRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EconomicRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2EnergyRetrieval.json +32 -0
- mteb/descriptive_stats/Image/DocumentUnderstanding/KoVidore2HrRetrieval.json +32 -0
- mteb/descriptive_stats/Retrieval/BrightAopsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightBiologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEarthScienceRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightEconomicsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightLeetcodeRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPonyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightPsychologyRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightRoboticsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightStackoverflowRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingLongRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightSustainableLivingRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQAQuestionsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/BrightTheoremQATheoremsRetrieval.json +35 -0
- mteb/descriptive_stats/Retrieval/ChemRxivRetrieval.json +30 -0
- mteb/descriptive_stats/Retrieval/EuroPIRQRetrieval.json +116 -0
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/evaluate.py +71 -47
- mteb/filter_tasks.py +36 -32
- mteb/get_tasks.py +37 -33
- mteb/languages/language_scripts.py +11 -4
- mteb/leaderboard/app.py +172 -37
- mteb/leaderboard/table.py +7 -2
- mteb/load_results.py +20 -14
- mteb/models/abs_encoder.py +30 -16
- mteb/models/cache_wrappers/cache_backend_protocol.py +7 -7
- mteb/models/cache_wrappers/cache_backends/_hash_utils.py +10 -5
- mteb/models/cache_wrappers/cache_backends/faiss_cache.py +13 -4
- mteb/models/cache_wrappers/cache_backends/numpy_cache.py +43 -25
- mteb/models/cache_wrappers/cache_wrapper.py +16 -11
- mteb/models/get_model_meta.py +53 -9
- mteb/models/instruct_wrapper.py +41 -13
- mteb/models/model_implementations/align_models.py +11 -5
- mteb/models/model_implementations/amazon_models.py +1 -0
- mteb/models/model_implementations/andersborges.py +6 -4
- mteb/models/model_implementations/ara_models.py +2 -1
- mteb/models/model_implementations/arctic_models.py +16 -8
- mteb/models/model_implementations/b1ade_models.py +2 -1
- mteb/models/model_implementations/bedrock_models.py +20 -6
- mteb/models/model_implementations/bge_models.py +85 -22
- mteb/models/model_implementations/bica_model.py +4 -3
- mteb/models/model_implementations/blip2_models.py +13 -6
- mteb/models/model_implementations/blip_models.py +33 -20
- mteb/models/model_implementations/bm25.py +27 -17
- mteb/models/model_implementations/bmretriever_models.py +16 -6
- mteb/models/model_implementations/cadet_models.py +2 -1
- mteb/models/model_implementations/cde_models.py +22 -9
- mteb/models/model_implementations/clip_models.py +18 -10
- mteb/models/model_implementations/clips_models.py +6 -3
- mteb/models/model_implementations/codefuse_models.py +10 -5
- mteb/models/model_implementations/codesage_models.py +6 -3
- mteb/models/model_implementations/cohere_models.py +19 -9
- mteb/models/model_implementations/cohere_v.py +16 -6
- mteb/models/model_implementations/colpali_models.py +10 -6
- mteb/models/model_implementations/colqwen_models.py +24 -38
- mteb/models/model_implementations/colsmol_models.py +5 -3
- mteb/models/model_implementations/conan_models.py +12 -5
- mteb/models/model_implementations/dino_models.py +70 -46
- mteb/models/model_implementations/e5_instruct.py +27 -4
- mteb/models/model_implementations/e5_models.py +18 -9
- mteb/models/model_implementations/e5_v.py +16 -10
- mteb/models/model_implementations/eagerworks_models.py +12 -5
- mteb/models/model_implementations/emillykkejensen_models.py +9 -6
- mteb/models/model_implementations/en_code_retriever.py +2 -1
- mteb/models/model_implementations/euler_models.py +3 -2
- mteb/models/model_implementations/evaclip_models.py +13 -4
- mteb/models/model_implementations/fa_models.py +18 -9
- mteb/models/model_implementations/facebookai.py +16 -2
- mteb/models/model_implementations/geogpt_models.py +2 -1
- mteb/models/model_implementations/gme_v_models.py +13 -8
- mteb/models/model_implementations/google_models.py +16 -5
- mteb/models/model_implementations/granite_vision_embedding_models.py +8 -6
- mteb/models/model_implementations/gritlm_models.py +5 -2
- mteb/models/model_implementations/gte_models.py +34 -13
- mteb/models/model_implementations/hinvec_models.py +7 -2
- mteb/models/model_implementations/human.py +1 -0
- mteb/models/model_implementations/ibm_granite_models.py +36 -6
- mteb/models/model_implementations/inf_models.py +4 -2
- mteb/models/model_implementations/jasper_models.py +16 -7
- mteb/models/model_implementations/jina_clip.py +58 -14
- mteb/models/model_implementations/jina_models.py +35 -16
- mteb/models/model_implementations/kalm_models.py +24 -12
- mteb/models/model_implementations/kblab.py +13 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +6 -4
- mteb/models/model_implementations/kfst.py +2 -1
- mteb/models/model_implementations/kowshik24_models.py +2 -1
- mteb/models/model_implementations/lens_models.py +2 -0
- mteb/models/model_implementations/lgai_embedding_models.py +2 -1
- mteb/models/model_implementations/linq_models.py +8 -2
- mteb/models/model_implementations/listconranker.py +11 -5
- mteb/models/model_implementations/llm2clip_models.py +18 -10
- mteb/models/model_implementations/llm2vec_models.py +28 -14
- mteb/models/model_implementations/mcinext_models.py +12 -3
- mteb/models/model_implementations/mdbr_models.py +19 -3
- mteb/models/model_implementations/misc_models.py +131 -68
- mteb/models/model_implementations/mixedbread_ai_models.py +335 -0
- mteb/models/model_implementations/mme5_models.py +3 -2
- mteb/models/model_implementations/moco_models.py +15 -8
- mteb/models/model_implementations/mod_models.py +3 -2
- mteb/models/model_implementations/model2vec_models.py +37 -18
- mteb/models/model_implementations/moka_models.py +4 -1
- mteb/models/model_implementations/nbailab.py +6 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +15 -7
- mteb/models/model_implementations/nomic_models.py +47 -19
- mteb/models/model_implementations/nomic_models_vision.py +6 -4
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +20 -8
- mteb/models/model_implementations/nvidia_models.py +165 -22
- mteb/models/model_implementations/octen_models.py +64 -3
- mteb/models/model_implementations/openai_models.py +14 -4
- mteb/models/model_implementations/openclip_models.py +30 -17
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +20 -9
- mteb/models/model_implementations/ops_moa_models.py +10 -3
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +2 -1
- mteb/models/model_implementations/pawan_models.py +2 -1
- mteb/models/model_implementations/piccolo_models.py +3 -1
- mteb/models/model_implementations/pixie_models.py +56 -0
- mteb/models/model_implementations/promptriever_models.py +20 -10
- mteb/models/model_implementations/pylate_models.py +41 -21
- mteb/models/model_implementations/qodo_models.py +4 -2
- mteb/models/model_implementations/qtack_models.py +2 -1
- mteb/models/model_implementations/qwen3_models.py +14 -4
- mteb/models/model_implementations/qzhou_models.py +4 -2
- mteb/models/model_implementations/random_baseline.py +7 -6
- mteb/models/model_implementations/rasgaard_models.py +3 -2
- mteb/models/model_implementations/reasonir_model.py +66 -1
- mteb/models/model_implementations/repllama_models.py +18 -9
- mteb/models/model_implementations/rerankers_custom.py +25 -10
- mteb/models/model_implementations/rerankers_monot5_based.py +41 -21
- mteb/models/model_implementations/richinfoai_models.py +2 -1
- mteb/models/model_implementations/ru_sentence_models.py +40 -20
- mteb/models/model_implementations/ruri_models.py +20 -10
- mteb/models/model_implementations/salesforce_models.py +13 -4
- mteb/models/model_implementations/samilpwc_models.py +2 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +4 -2
- mteb/models/model_implementations/searchmap_models.py +2 -1
- mteb/models/model_implementations/seed_1_6_embedding_models.py +5 -2
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +119 -148
- mteb/models/model_implementations/seed_models.py +2 -1
- mteb/models/model_implementations/sentence_transformers_models.py +142 -22
- mteb/models/model_implementations/shuu_model.py +2 -1
- mteb/models/model_implementations/siglip_models.py +39 -24
- mteb/models/model_implementations/slm_models.py +419 -0
- mteb/models/model_implementations/sonar_models.py +2 -1
- mteb/models/model_implementations/spartan8806_atles_champion.py +2 -1
- mteb/models/model_implementations/stella_models.py +23 -4
- mteb/models/model_implementations/tarka_models.py +4 -2
- mteb/models/model_implementations/text2vec_models.py +12 -3
- mteb/models/model_implementations/ua_sentence_models.py +2 -1
- mteb/models/model_implementations/uae_models.py +17 -5
- mteb/models/model_implementations/vdr_models.py +9 -2
- mteb/models/model_implementations/vi_vn_models.py +12 -6
- mteb/models/model_implementations/vista_models.py +11 -4
- mteb/models/model_implementations/vlm2vec_models.py +14 -7
- mteb/models/model_implementations/voyage_models.py +136 -4
- mteb/models/model_implementations/voyage_v.py +17 -10
- mteb/models/model_implementations/xyz_models.py +1 -0
- mteb/models/model_implementations/youtu_models.py +2 -1
- mteb/models/model_implementations/yuan_models.py +2 -1
- mteb/models/model_implementations/yuan_models_en.py +3 -2
- mteb/models/model_meta.py +127 -40
- mteb/models/models_protocols.py +43 -22
- mteb/models/search_encoder_index/search_backend_protocol.py +7 -3
- mteb/models/search_encoder_index/search_indexes/faiss_search_index.py +21 -10
- mteb/models/search_wrappers.py +63 -29
- mteb/models/sentence_transformer_wrapper.py +52 -26
- mteb/models/vllm_wrapper.py +329 -0
- mteb/py.typed +0 -0
- mteb/results/benchmark_results.py +48 -35
- mteb/results/model_result.py +68 -32
- mteb/results/task_result.py +110 -72
- mteb/similarity_functions.py +19 -9
- mteb/tasks/aggregated_tasks/eng/cqadupstack_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts17_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/eng/sts_benchmark_multilingual_visual_sts_eng.py +3 -3
- mteb/tasks/aggregated_tasks/fas/cqadupstack_retrieval_fa.py +3 -3
- mteb/tasks/aggregated_tasks/fas/syn_per_chatbot_conv_sa_classification.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts17_multilingual_vision_sts.py +3 -3
- mteb/tasks/aggregated_tasks/multilingual/sts_benchmark_multilingual_visual_sts.py +3 -3
- mteb/tasks/aggregated_tasks/nld/cqadupstack_nl_retrieval.py +3 -3
- mteb/tasks/aggregated_tasks/pol/cqadupstack_retrieval_pl.py +3 -3
- mteb/tasks/bitext_mining/eng/pub_chem_smiles_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/fas/fa_mteb_summary_retrieval.py +3 -3
- mteb/tasks/bitext_mining/multilingual/bucc_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/flores_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_conv_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/in22_gen_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/norwegian_courts_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/ntrex_bitext_mining.py +1 -1
- mteb/tasks/bitext_mining/multilingual/roma_tales_bitext_mining.py +2 -2
- mteb/tasks/bitext_mining/multilingual/web_faq_bitext_mining.py +2 -2
- mteb/tasks/classification/ara/online_store_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/restaurant_review_sentiment_classification.py +1 -1
- mteb/tasks/classification/ara/tweet_sarcasm_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_hate_speech_classification.py +1 -1
- mteb/tasks/classification/ben/bengali_sentiment_analysis.py +1 -1
- mteb/tasks/classification/bul/bulgarian_store_review_sentiment_classfication.py +1 -1
- mteb/tasks/classification/ces/csfdcz_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/dan/ddisco_cohesion_classification.py +1 -1
- mteb/tasks/classification/dan/dk_hate_classification.py +2 -2
- mteb/tasks/classification/deu/german_politicians_twitter_sentiment_classification.py +1 -1
- mteb/tasks/classification/ell/greek_legal_code_classification.py +1 -1
- mteb/tasks/classification/eng/dbpedia_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_chat_classification.py +2 -2
- mteb/tasks/classification/eng/toxic_conversations_classification.py +2 -2
- mteb/tasks/classification/eng/tweet_topic_single_classification.py +1 -1
- mteb/tasks/classification/eng/yahoo_answers_topics_classification.py +1 -1
- mteb/tasks/classification/eng/yelp_review_full_classification.py +2 -2
- mteb/tasks/classification/est/estonian_valence.py +2 -2
- mteb/tasks/classification/fas/fa_mteb_classification.py +6 -6
- mteb/tasks/classification/fas/persian_food_sentiment_classification.py +1 -1
- mteb/tasks/classification/fil/filipino_shopee_reviews_classification.py +1 -1
- mteb/tasks/classification/fin/fin_toxicity_classification.py +1 -1
- mteb/tasks/classification/fra/french_book_reviews.py +2 -2
- mteb/tasks/classification/fra/movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/guj/gujarati_news_classification.py +1 -1
- mteb/tasks/classification/hin/hindi_discourse_classification.py +1 -1
- mteb/tasks/classification/hin/sentiment_analysis_hindi.py +1 -1
- mteb/tasks/classification/ind/indonesian_id_clickbait_classification.py +2 -2
- mteb/tasks/classification/ind/indonesian_mongabay_conservation_classification.py +1 -1
- mteb/tasks/classification/ita/dado_eval_coarse_classification.py +1 -1
- mteb/tasks/classification/ita/ita_casehold_classification.py +1 -1
- mteb/tasks/classification/ita/sardi_stance_classification.py +1 -1
- mteb/tasks/classification/jav/javanese_imdb_classification.py +1 -1
- mteb/tasks/classification/jpn/wrime_classification.py +1 -1
- mteb/tasks/classification/kan/kannada_news_classification.py +2 -2
- mteb/tasks/classification/kor/klue_tc.py +2 -2
- mteb/tasks/classification/kor/kor_fin.py +1 -1
- mteb/tasks/classification/kor/kor_hate_classification.py +1 -1
- mteb/tasks/classification/kor/kor_sarcasm_classification.py +1 -1
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/classification/mal/malayalam_news_classification.py +1 -1
- mteb/tasks/classification/mar/marathi_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/afri_senti_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/catalonia_tweet_classification.py +1 -1
- mteb/tasks/classification/multilingual/cyrillic_turkic_lang_classification.py +1 -1
- mteb/tasks/classification/multilingual/indic_nlp_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/masakha_news_classification.py +1 -1
- mteb/tasks/classification/multilingual/multi_hate_classification.py +1 -1
- mteb/tasks/classification/multilingual/multilingual_sentiment_classification.py +1 -1
- mteb/tasks/classification/multilingual/scala_classification.py +2 -2
- mteb/tasks/classification/multilingual/sib200_classification.py +1 -1
- mteb/tasks/classification/multilingual/turkic_classification.py +1 -1
- mteb/tasks/classification/multilingual/tweet_sentiment_classification.py +1 -1
- mteb/tasks/classification/nep/nepali_news_classification.py +2 -2
- mteb/tasks/classification/nld/dutch_sarcastic_headlines_classification.py +1 -1
- mteb/tasks/classification/nld/vaccin_chat_nl_classification.py +1 -1
- mteb/tasks/classification/ory/odia_news_classification.py +2 -2
- mteb/tasks/classification/pan/punjabi_news_classification.py +1 -1
- mteb/tasks/classification/ron/moroco.py +1 -1
- mteb/tasks/classification/ron/romanian_reviews_sentiment.py +1 -1
- mteb/tasks/classification/ron/romanian_sentiment_classification.py +1 -1
- mteb/tasks/classification/rus/georeview_classification.py +1 -1
- mteb/tasks/classification/rus/headline_classification.py +2 -2
- mteb/tasks/classification/rus/inappropriateness_classification.py +2 -2
- mteb/tasks/classification/rus/ru_reviews_classification.py +2 -2
- mteb/tasks/classification/rus/ru_sci_bench_grnti_classification.py +1 -1
- mteb/tasks/classification/rus/ru_sci_bench_oecd_classification.py +1 -1
- mteb/tasks/classification/rus/ru_toixic_classification_okmlcup.py +1 -1
- mteb/tasks/classification/san/sanskrit_shlokas_classification.py +1 -1
- mteb/tasks/classification/sin/sinhala_news_classification.py +2 -2
- mteb/tasks/classification/sin/sinhala_news_source_classification.py +2 -2
- mteb/tasks/classification/slk/csfdsk_movie_review_sentiment_classification.py +2 -2
- mteb/tasks/classification/slv/frenk_sl_classification.py +1 -1
- mteb/tasks/classification/spa/spanish_news_classification.py +2 -2
- mteb/tasks/classification/ssw/siswati_news_classification.py +1 -1
- mteb/tasks/classification/tam/tamil_news_classification.py +2 -2
- mteb/tasks/classification/tel/telugu_andhra_jyoti_news_classification.py +2 -2
- mteb/tasks/classification/tha/wongnai_reviews_classification.py +1 -1
- mteb/tasks/classification/tur/turkish_movie_sentiment_classification.py +2 -2
- mteb/tasks/classification/ukr/ukr_formality_classification.py +2 -2
- mteb/tasks/classification/vie/toxic_conversations_vn_classification.py +1 -1
- mteb/tasks/classification/vie/vie_student_feedback_classification.py +1 -1
- mteb/tasks/classification/zho/yue_openrice_review_classification.py +2 -2
- mteb/tasks/classification/zul/isi_zulu_news_classification.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_p2p.py +1 -1
- mteb/tasks/clustering/deu/blurbs_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/arxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/arxiv_hierarchical_clustering.py +2 -2
- mteb/tasks/clustering/eng/big_patent_clustering.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/biorxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/medrxiv_clustering_s2s.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering.py +1 -1
- mteb/tasks/clustering/eng/reddit_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering.py +1 -1
- mteb/tasks/clustering/eng/stack_exchange_clustering_p2p.py +1 -1
- mteb/tasks/clustering/eng/twenty_newsgroups_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/fas/fa_mteb_clustering.py +4 -4
- mteb/tasks/clustering/fra/hal_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_p2p.py +2 -2
- mteb/tasks/clustering/multilingual/mlsum_clustering_s2s.py +2 -2
- mteb/tasks/clustering/multilingual/sib200_clustering_s2s.py +1 -1
- mteb/tasks/clustering/multilingual/wiki_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/dutch_news_articles_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/iconclass_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nld/open_tender_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_p2p.py +1 -1
- mteb/tasks/clustering/nld/vabb_clustering_s2s.py +1 -1
- mteb/tasks/clustering/nob/snl_clustering.py +8 -3
- mteb/tasks/clustering/nob/vg_clustering.py +8 -3
- mteb/tasks/clustering/pol/polish_clustering.py +3 -3
- mteb/tasks/clustering/rus/ru_sci_bench_grnti_clustering_p2p.py +1 -1
- mteb/tasks/clustering/rus/ru_sci_bench_oecd_clustering_p2p.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +6 -6
- mteb/tasks/image_text_pair_classification/eng/image_co_de.py +1 -1
- mteb/tasks/image_text_pair_classification/eng/sugar_crepe.py +2 -2
- mteb/tasks/instruction_reranking/multilingual/m_follow_ir.py +2 -2
- mteb/tasks/multichoice/eng/cv_bench.py +4 -4
- mteb/tasks/multilabel_classification/ita/emit_classification.py +1 -1
- mteb/tasks/multilabel_classification/mlt/maltese_news_classification.py +1 -1
- mteb/tasks/multilabel_classification/rus/ru_toixic_multilabelclassification_okmlcup.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_group_classification.py +1 -1
- mteb/tasks/multilabel_classification/swe/swedish_patent_cpc_subclass_classification.py +1 -1
- mteb/tasks/pair_classification/ara/ar_entail.py +1 -1
- mteb/tasks/pair_classification/dan/talemaader_pc.py +1 -1
- mteb/tasks/pair_classification/deu/false_friends_de_en_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_ai_sentence_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_smilespc.py +4 -3
- mteb/tasks/pair_classification/eng/pub_chem_synonym_pc.py +1 -1
- mteb/tasks/pair_classification/eng/pub_chem_wiki_paragraphs_pc.py +1 -1
- mteb/tasks/pair_classification/eng/sprint_duplicate_questions_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_sem_eval2015_pc.py +1 -1
- mteb/tasks/pair_classification/eng/twitter_url_corpus_pc.py +1 -1
- mteb/tasks/pair_classification/fas/fa_mteb_pair_classification.py +5 -5
- mteb/tasks/pair_classification/fas/fars_tail.py +2 -2
- mteb/tasks/pair_classification/hye/armenian_paraphrase_pc.py +1 -1
- mteb/tasks/pair_classification/ita/dis_co_tex_pair_classification.py +1 -1
- mteb/tasks/pair_classification/kor/klue_nli.py +1 -1
- mteb/tasks/pair_classification/multilingual/rte3.py +2 -2
- mteb/tasks/pair_classification/multilingual/xnli.py +1 -1
- mteb/tasks/pair_classification/pol/polish_pc.py +4 -4
- mteb/tasks/pair_classification/por/assin2_rte.py +1 -1
- mteb/tasks/pair_classification/por/sick_br_pc.py +1 -1
- mteb/tasks/pair_classification/rus/terra.py +2 -2
- mteb/tasks/pair_classification/vie/sprint_duplicate_questions_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_sem_eval2015_pcvn.py +1 -1
- mteb/tasks/pair_classification/vie/twitter_url_corpus_pcvn.py +1 -1
- mteb/tasks/pair_classification/zho/cmteb_pair_classification.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/ara/sadeem_question_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_edit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/code/code_rag.py +16 -16
- mteb/tasks/retrieval/code/code_search_net_cc_retrieval.py +1 -1
- mteb/tasks/retrieval/code/coir_code_search_net_retrieval.py +1 -1
- mteb/tasks/retrieval/code/ds1000_retrieval.py +1 -1
- mteb/tasks/retrieval/code/fresh_stack_retrieval.py +1 -1
- mteb/tasks/retrieval/code/human_eval_retrieval.py +1 -1
- mteb/tasks/retrieval/code/mbpp_retrieval.py +1 -1
- mteb/tasks/retrieval/code/wiki_sql_retrieval.py +1 -1
- mteb/tasks/retrieval/dan/dan_fever_retrieval.py +2 -2
- mteb/tasks/retrieval/dan/tv2_nordretrieval.py +3 -3
- mteb/tasks/retrieval/dan/twitter_hjerne_retrieval.py +3 -3
- mteb/tasks/retrieval/deu/german_gov_service_retrieval.py +1 -1
- mteb/tasks/retrieval/deu/german_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/ell/greek_civics_qa.py +1 -1
- mteb/tasks/retrieval/eng/__init__.py +44 -0
- mteb/tasks/retrieval/eng/bright_retrieval.py +10 -2
- mteb/tasks/retrieval/eng/bright_v1_1_retrieval.py +968 -0
- mteb/tasks/retrieval/eng/chat_doctor_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/chemrxiv.py +33 -0
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/fin_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/finance_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hateful_memes_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/hc3_finance_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_needle_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_passkey_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_summ_screen_fd_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lemb_wikim_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/lembqm_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/limit_retrieval.py +6 -1
- mteb/tasks/retrieval/eng/lit_search_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/memotion_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/ml_questions.py +1 -1
- mteb/tasks/retrieval/eng/nano_argu_ana_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_climate_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_db_pedia_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fever_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_fi_qa2018_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_hotpot_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_msmarco_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nf_corpus_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_nq_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_quora_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_sci_fact_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_scidocs_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/nano_touche2020_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/narrative_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/r2_med_retrieval.py +8 -8
- mteb/tasks/retrieval/eng/sci_mmir_i2t_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/sci_mmir_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/eng/vidore_bench_retrieval.py +10 -10
- mteb/tasks/retrieval/fra/f_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/fra/syntec_retrieval.py +1 -1
- mteb/tasks/retrieval/hun/hun_sum2.py +1 -1
- mteb/tasks/retrieval/kat/georgian_faq_retrieval.py +1 -1
- mteb/tasks/retrieval/kor/__init__.py +15 -1
- mteb/tasks/retrieval/kor/kovidore2_bench_retrieval.py +142 -0
- mteb/tasks/retrieval/multilingual/__init__.py +2 -0
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt19.py +1 -1
- mteb/tasks/retrieval/multilingual/cross_lingual_semantic_discrimination_wmt21.py +1 -1
- mteb/tasks/retrieval/multilingual/cur_ev1_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/euro_pirq_retrieval.py +43 -0
- mteb/tasks/retrieval/multilingual/jina_vdr_bench_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/miracl_vision_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/mr_tidy_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/public_health_qa_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/ru_sci_bench_retrieval.py +5 -5
- mteb/tasks/retrieval/multilingual/statcan_dialogue_dataset_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vdr_multilingual_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/vidore2_bench_retrieval.py +14 -4
- mteb/tasks/retrieval/multilingual/vidore3_bench_retrieval.py +90 -100
- mteb/tasks/retrieval/multilingual/wit_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_flickr30k_co_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/x_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/multilingual/xm3600_t2i_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_android_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_english_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gaming_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_gis_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_mathematica_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_physics_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_programmers_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_stats_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_tex_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_unix_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_webmasters_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nld/cqa_dupstack_wordpress_nl_retrieval.py +1 -1
- mteb/tasks/retrieval/nob/norquad.py +3 -3
- mteb/tasks/retrieval/nob/snl_retrieval.py +3 -3
- mteb/tasks/retrieval/slk/slovak_sum_retrieval.py +1 -1
- mteb/tasks/retrieval/tur/tur_hist_quad.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/vie_qu_ad_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- mteb/tasks/sts/fao/faroese_sts.py +1 -1
- mteb/tasks/sts/fra/sick_fr_sts.py +1 -1
- mteb/tasks/sts/kor/klue_sts.py +1 -1
- mteb/tasks/sts/por/sick_br_sts.py +1 -1
- mteb/tasks/sts/rus/ru_para_phraser_sts.py +1 -1
- mteb/tasks/zeroshot_classification/eng/sci_mmir.py +1 -1
- mteb/types/__init__.py +2 -0
- mteb/types/_encoder_io.py +13 -1
- mteb/types/_result.py +2 -1
- mteb/types/statistics.py +18 -5
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/METADATA +15 -4
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/RECORD +528 -486
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/WHEEL +1 -1
- mteb/models/model_implementations/mxbai_models.py +0 -111
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/entry_points.txt +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.5.2.dist-info → mteb-2.7.9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from
|
|
4
|
-
from typing import Any, ClassVar, TypedDict
|
|
5
|
+
from typing import TYPE_CHECKING, Any, ClassVar, TypedDict, cast
|
|
5
6
|
|
|
6
7
|
from datasets import Dataset, DatasetDict
|
|
7
8
|
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
|
|
@@ -9,9 +10,15 @@ from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_sc
|
|
|
9
10
|
from mteb._evaluators import BitextMiningEvaluator
|
|
10
11
|
from mteb.abstasks._statistics_calculation import calculate_text_statistics
|
|
11
12
|
from mteb.abstasks.abstask import AbsTask
|
|
12
|
-
from mteb.models import EncoderProtocol
|
|
13
|
-
from mteb.types import
|
|
14
|
-
|
|
13
|
+
from mteb.models import EncoderProtocol
|
|
14
|
+
from mteb.types.statistics import SplitDescriptiveStatistics
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from mteb.models import MTEBModels
|
|
20
|
+
from mteb.types import EncodeKwargs, HFSubset, ScoresDict
|
|
21
|
+
from mteb.types.statistics import TextStatistics
|
|
15
22
|
|
|
16
23
|
logger = logging.getLogger(__name__)
|
|
17
24
|
|
|
@@ -73,13 +80,17 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
73
80
|
split: str = "test",
|
|
74
81
|
subsets_to_run: list[HFSubset] | None = None,
|
|
75
82
|
*,
|
|
76
|
-
encode_kwargs:
|
|
83
|
+
encode_kwargs: EncodeKwargs,
|
|
77
84
|
prediction_folder: Path | None = None,
|
|
85
|
+
num_proc: int = 1,
|
|
78
86
|
**kwargs: Any,
|
|
79
87
|
) -> dict[HFSubset, ScoresDict]:
|
|
80
88
|
"""Added load for "parallel" datasets"""
|
|
89
|
+
if not isinstance(model, EncoderProtocol):
|
|
90
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
91
|
+
|
|
81
92
|
if not self.data_loaded:
|
|
82
|
-
self.load_data()
|
|
93
|
+
self.load_data(num_proc=num_proc)
|
|
83
94
|
|
|
84
95
|
hf_subsets = self.hf_subsets
|
|
85
96
|
|
|
@@ -87,16 +98,22 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
87
98
|
if subsets_to_run is not None:
|
|
88
99
|
hf_subsets = [s for s in hf_subsets if s in subsets_to_run]
|
|
89
100
|
|
|
90
|
-
|
|
101
|
+
encoder_model = cast("EncoderProtocol", model)
|
|
102
|
+
|
|
103
|
+
if self.dataset is None:
|
|
104
|
+
raise ValueError("Dataset is not loaded.")
|
|
105
|
+
|
|
106
|
+
scores: dict[str, BitextMiningMetrics] = {}
|
|
91
107
|
if self.parallel_subsets:
|
|
92
|
-
scores = self._evaluate_subset(
|
|
93
|
-
|
|
94
|
-
self.dataset[split],
|
|
108
|
+
scores = self._evaluate_subset( # type: ignore[assignment]
|
|
109
|
+
encoder_model,
|
|
110
|
+
self.dataset[split],
|
|
95
111
|
parallel=True,
|
|
96
112
|
hf_split=split,
|
|
97
113
|
hf_subset="parallel",
|
|
98
114
|
encode_kwargs=encode_kwargs,
|
|
99
115
|
prediction_folder=prediction_folder,
|
|
116
|
+
num_proc=num_proc,
|
|
100
117
|
**kwargs,
|
|
101
118
|
)
|
|
102
119
|
else:
|
|
@@ -109,42 +126,44 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
109
126
|
data_split = self.dataset[split]
|
|
110
127
|
else:
|
|
111
128
|
data_split = self.dataset[hf_subset][split]
|
|
112
|
-
scores[hf_subset] = self._evaluate_subset(
|
|
113
|
-
|
|
129
|
+
scores[hf_subset] = self._evaluate_subset( # type: ignore[assignment]
|
|
130
|
+
encoder_model,
|
|
114
131
|
data_split,
|
|
115
132
|
hf_split=split,
|
|
116
133
|
hf_subset=hf_subset,
|
|
117
134
|
encode_kwargs=encode_kwargs,
|
|
118
135
|
prediction_folder=prediction_folder,
|
|
136
|
+
num_proc=num_proc,
|
|
119
137
|
**kwargs,
|
|
120
138
|
)
|
|
121
139
|
|
|
122
|
-
return scores
|
|
140
|
+
return cast("dict[HFSubset, ScoresDict]", scores)
|
|
123
141
|
|
|
124
142
|
def _get_pairs(self, parallel: bool) -> list[tuple[str, str]]:
|
|
125
143
|
pairs = self._DEFAULT_PAIR
|
|
126
144
|
if parallel:
|
|
127
|
-
pairs = [langpair.split("-") for langpair in self.hf_subsets]
|
|
145
|
+
pairs = [langpair.split("-") for langpair in self.hf_subsets] # type: ignore[misc]
|
|
128
146
|
return pairs
|
|
129
147
|
|
|
130
|
-
def _evaluate_subset(
|
|
148
|
+
def _evaluate_subset( # type: ignore[override]
|
|
131
149
|
self,
|
|
132
150
|
model: EncoderProtocol,
|
|
133
151
|
data_split: Dataset,
|
|
134
152
|
*,
|
|
135
153
|
hf_split: str,
|
|
136
154
|
hf_subset: str,
|
|
137
|
-
|
|
138
|
-
encode_kwargs: dict[str, Any],
|
|
155
|
+
encode_kwargs: EncodeKwargs,
|
|
139
156
|
prediction_folder: Path | None = None,
|
|
157
|
+
parallel: bool = False,
|
|
158
|
+
num_proc: int = 1,
|
|
140
159
|
**kwargs,
|
|
141
|
-
) ->
|
|
160
|
+
) -> BitextMiningMetrics | dict[str, BitextMiningMetrics]:
|
|
142
161
|
pairs = self._get_pairs(parallel)
|
|
143
162
|
|
|
144
163
|
evaluator = BitextMiningEvaluator(
|
|
145
164
|
data_split,
|
|
146
165
|
task_metadata=self.metadata,
|
|
147
|
-
pair_columns=pairs,
|
|
166
|
+
pair_columns=pairs,
|
|
148
167
|
hf_split=hf_split,
|
|
149
168
|
hf_subset=hf_subset,
|
|
150
169
|
**kwargs,
|
|
@@ -156,7 +175,7 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
156
175
|
else data_split["gold"]
|
|
157
176
|
)
|
|
158
177
|
|
|
159
|
-
neighbours = evaluator(model, encode_kwargs=encode_kwargs)
|
|
178
|
+
neighbours = evaluator(model, encode_kwargs=encode_kwargs, num_proc=num_proc)
|
|
160
179
|
|
|
161
180
|
if prediction_folder:
|
|
162
181
|
self._save_task_predictions(
|
|
@@ -168,16 +187,16 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
168
187
|
)
|
|
169
188
|
|
|
170
189
|
if parallel:
|
|
171
|
-
|
|
190
|
+
parallel_metrics = {}
|
|
172
191
|
for keys, nearest_neighbors in neighbours.items():
|
|
173
|
-
|
|
192
|
+
parallel_metrics[keys] = self._compute_metrics(nearest_neighbors, gold)
|
|
174
193
|
|
|
175
|
-
for v in
|
|
194
|
+
for v in parallel_metrics.values():
|
|
176
195
|
self._add_main_score(v)
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
196
|
+
return parallel_metrics
|
|
197
|
+
def_pair_str = "-".join(self._DEFAULT_PAIR[0])
|
|
198
|
+
metrics = self._compute_metrics(neighbours[def_pair_str], gold)
|
|
199
|
+
self._add_main_score(metrics)
|
|
181
200
|
return metrics
|
|
182
201
|
|
|
183
202
|
def _compute_metrics(
|
|
@@ -249,9 +268,12 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
249
268
|
sentence2_statistics=text2_statistics,
|
|
250
269
|
)
|
|
251
270
|
|
|
252
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
271
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
272
|
+
if self.dataset is None:
|
|
273
|
+
raise ValueError("Dataset is not loaded.")
|
|
274
|
+
|
|
253
275
|
if self.metadata.is_multilingual:
|
|
254
|
-
dataset = defaultdict(dict)
|
|
276
|
+
dataset: dict[str, dict[str, list[str]]] = defaultdict(dict)
|
|
255
277
|
for config in self.metadata.eval_langs:
|
|
256
278
|
logger.info(f"Converting {config} of {self.metadata.name}")
|
|
257
279
|
|
|
@@ -266,10 +288,10 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
266
288
|
for split in self.dataset[config]:
|
|
267
289
|
dataset[split][lang_1] = self.dataset[config][split][sent_1]
|
|
268
290
|
dataset[split][lang_2] = self.dataset[config][split][sent_2]
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
291
|
+
dataset_dict = DatasetDict(
|
|
292
|
+
{split: Dataset.from_dict(dataset[split]) for split in dataset}
|
|
293
|
+
)
|
|
294
|
+
dataset_dict.push_to_hub(repo_name, num_proc=num_proc)
|
|
273
295
|
else:
|
|
274
296
|
sentences = {}
|
|
275
297
|
for split in self.dataset:
|
|
@@ -281,4 +303,4 @@ class AbsTaskBitextMining(AbsTask):
|
|
|
281
303
|
}
|
|
282
304
|
)
|
|
283
305
|
sentences = DatasetDict(sentences)
|
|
284
|
-
sentences.push_to_hub(repo_name)
|
|
306
|
+
sentences.push_to_hub(repo_name, num_proc=num_proc)
|
mteb/abstasks/text/reranking.py
CHANGED
|
@@ -16,7 +16,7 @@ else:
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
-
OLD_FORMAT_RERANKING_TASKS = []
|
|
19
|
+
OLD_FORMAT_RERANKING_TASKS: list[str] = []
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
@deprecated(
|
|
@@ -34,7 +34,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
34
34
|
For dataformat and other information, see [AbsTaskRetrieval][mteb.abstasks.retrieval.AbsTaskRetrieval].
|
|
35
35
|
"""
|
|
36
36
|
|
|
37
|
-
def load_data(self) -> None:
|
|
37
|
+
def load_data(self, num_proc: int = 1, **kwargs) -> None:
|
|
38
38
|
"""Load the dataset."""
|
|
39
39
|
if self.data_loaded:
|
|
40
40
|
return
|
|
@@ -43,7 +43,7 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
43
43
|
self.transform_old_dataset_format()
|
|
44
44
|
else:
|
|
45
45
|
# use AbsTaskRetrieval default to load the data
|
|
46
|
-
return super().load_data()
|
|
46
|
+
return super().load_data(num_proc=num_proc)
|
|
47
47
|
|
|
48
48
|
def _process_example(self, example: dict, split: str, query_idx: int) -> dict:
|
|
49
49
|
"""Process a single example from the dataset.
|
|
@@ -100,12 +100,14 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
100
100
|
if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
|
|
101
101
|
return
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
logger.info(
|
|
104
104
|
f"Transforming old format to standard format for {self.metadata.name}"
|
|
105
105
|
)
|
|
106
106
|
|
|
107
107
|
given_dataset = copy(given_dataset)
|
|
108
|
-
self.dataset = defaultdict(
|
|
108
|
+
self.dataset: dict[str, dict[str, RetrievalSplitData]] = defaultdict(
|
|
109
|
+
lambda: defaultdict(dict) # type: ignore[arg-type]
|
|
110
|
+
)
|
|
109
111
|
|
|
110
112
|
hf_subsets = self.hf_subsets
|
|
111
113
|
|
|
@@ -115,19 +117,19 @@ class AbsTaskReranking(AbsTaskRetrieval):
|
|
|
115
117
|
if hf_subset in cur_dataset:
|
|
116
118
|
cur_dataset = cur_dataset[hf_subset]
|
|
117
119
|
elif "name" in self.metadata.dataset:
|
|
118
|
-
cur_dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
120
|
+
cur_dataset = datasets.load_dataset(**self.metadata.dataset)
|
|
119
121
|
assert hf_subset == "default", (
|
|
120
122
|
f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
|
|
121
123
|
)
|
|
122
124
|
else:
|
|
123
125
|
cur_dataset = datasets.load_dataset(
|
|
124
126
|
**self.metadata.dataset, name=hf_subset
|
|
125
|
-
)
|
|
127
|
+
)
|
|
126
128
|
|
|
127
129
|
for split in cur_dataset:
|
|
128
130
|
corpus = []
|
|
129
131
|
queries = []
|
|
130
|
-
relevant_docs = defaultdict(dict)
|
|
132
|
+
relevant_docs: dict[str, dict[str, int]] = defaultdict(dict)
|
|
131
133
|
top_ranked = defaultdict(list)
|
|
132
134
|
|
|
133
135
|
# Create an enumerated dataset to pass indices
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
4
5
|
|
|
5
6
|
import numpy as np
|
|
6
|
-
from datasets import Dataset
|
|
7
7
|
|
|
8
8
|
from mteb._evaluators import SummarizationEvaluator
|
|
9
|
-
from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
|
|
10
9
|
from mteb.abstasks._statistics_calculation import (
|
|
11
10
|
calculate_score_statistics,
|
|
12
11
|
calculate_text_statistics,
|
|
@@ -14,11 +13,22 @@ from mteb.abstasks._statistics_calculation import (
|
|
|
14
13
|
from mteb.abstasks.abstask import AbsTask
|
|
15
14
|
from mteb.models import EncoderProtocol
|
|
16
15
|
from mteb.types.statistics import (
|
|
17
|
-
ScoreStatistics,
|
|
18
16
|
SplitDescriptiveStatistics,
|
|
19
|
-
TextStatistics,
|
|
20
17
|
)
|
|
21
18
|
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
|
|
22
|
+
from datasets import Dataset
|
|
23
|
+
|
|
24
|
+
from mteb._evaluators.text.summarization_evaluator import SummarizationMetrics
|
|
25
|
+
from mteb.models import MTEBModels
|
|
26
|
+
from mteb.types import EncodeKwargs
|
|
27
|
+
from mteb.types.statistics import (
|
|
28
|
+
ScoreStatistics,
|
|
29
|
+
TextStatistics,
|
|
30
|
+
)
|
|
31
|
+
|
|
22
32
|
logger = logging.getLogger(__name__)
|
|
23
33
|
|
|
24
34
|
|
|
@@ -77,17 +87,23 @@ class AbsTaskSummarization(AbsTask):
|
|
|
77
87
|
|
|
78
88
|
def _evaluate_subset(
|
|
79
89
|
self,
|
|
80
|
-
model:
|
|
90
|
+
model: MTEBModels,
|
|
81
91
|
data_split: Dataset,
|
|
82
92
|
*,
|
|
83
93
|
hf_split: str,
|
|
84
94
|
hf_subset: str,
|
|
85
|
-
encode_kwargs:
|
|
95
|
+
encode_kwargs: EncodeKwargs,
|
|
86
96
|
prediction_folder: Path | None = None,
|
|
97
|
+
num_proc: int = 1,
|
|
87
98
|
**kwargs,
|
|
88
99
|
) -> SummarizationMetrics:
|
|
100
|
+
if not isinstance(model, EncoderProtocol):
|
|
101
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
102
|
+
|
|
89
103
|
normalized_scores = [
|
|
90
|
-
(
|
|
104
|
+
(
|
|
105
|
+
(np.array(x) - self.min_score) / (self.max_score - self.min_score)
|
|
106
|
+
).tolist()
|
|
91
107
|
for x in data_split[self.relevancy_column_name]
|
|
92
108
|
]
|
|
93
109
|
evaluator = self.evaluator(
|
|
@@ -100,7 +116,7 @@ class AbsTaskSummarization(AbsTask):
|
|
|
100
116
|
hf_subset=hf_subset,
|
|
101
117
|
**kwargs,
|
|
102
118
|
)
|
|
103
|
-
scores = evaluator(model, encode_kwargs=encode_kwargs)
|
|
119
|
+
scores = evaluator(model, encode_kwargs=encode_kwargs, num_proc=num_proc)
|
|
104
120
|
if prediction_folder:
|
|
105
121
|
self._save_task_predictions(
|
|
106
122
|
scores,
|
|
@@ -1,6 +1,7 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from typing import Any, TypedDict
|
|
4
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
4
5
|
|
|
5
6
|
import torch
|
|
6
7
|
from datasets import Dataset
|
|
@@ -9,10 +10,7 @@ from sklearn import metrics
|
|
|
9
10
|
from mteb._evaluators import ZeroShotClassificationEvaluator
|
|
10
11
|
from mteb.models import EncoderProtocol
|
|
11
12
|
from mteb.types.statistics import (
|
|
12
|
-
ImageStatistics,
|
|
13
|
-
LabelStatistics,
|
|
14
13
|
SplitDescriptiveStatistics,
|
|
15
|
-
TextStatistics,
|
|
16
14
|
)
|
|
17
15
|
|
|
18
16
|
from ._statistics_calculation import (
|
|
@@ -22,6 +20,17 @@ from ._statistics_calculation import (
|
|
|
22
20
|
)
|
|
23
21
|
from .abstask import AbsTask
|
|
24
22
|
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from mteb.models import MTEBModels
|
|
27
|
+
from mteb.types import EncodeKwargs
|
|
28
|
+
from mteb.types.statistics import (
|
|
29
|
+
ImageStatistics,
|
|
30
|
+
LabelStatistics,
|
|
31
|
+
TextStatistics,
|
|
32
|
+
)
|
|
33
|
+
|
|
25
34
|
logger = logging.getLogger(__name__)
|
|
26
35
|
|
|
27
36
|
|
|
@@ -111,15 +120,19 @@ class AbsTaskZeroShotClassification(AbsTask):
|
|
|
111
120
|
|
|
112
121
|
def _evaluate_subset(
|
|
113
122
|
self,
|
|
114
|
-
model:
|
|
123
|
+
model: MTEBModels,
|
|
115
124
|
data_split: Dataset,
|
|
116
125
|
*,
|
|
117
126
|
hf_split: str,
|
|
118
127
|
hf_subset: str,
|
|
119
|
-
encode_kwargs:
|
|
128
|
+
encode_kwargs: EncodeKwargs,
|
|
120
129
|
prediction_folder: Path | None = None,
|
|
130
|
+
num_proc: int = 1,
|
|
121
131
|
**kwargs,
|
|
122
132
|
) -> ZeroShotClassificationMetrics:
|
|
133
|
+
if not isinstance(model, EncoderProtocol):
|
|
134
|
+
raise TypeError("Expected model to be an instance of EncoderProtocol")
|
|
135
|
+
|
|
123
136
|
candidate_labels = self.get_candidate_labels()
|
|
124
137
|
data_split = data_split.select_columns(
|
|
125
138
|
[self.input_column_name, self.label_column_name]
|
|
@@ -133,7 +146,11 @@ class AbsTaskZeroShotClassification(AbsTask):
|
|
|
133
146
|
hf_subset=hf_subset,
|
|
134
147
|
**kwargs,
|
|
135
148
|
)
|
|
136
|
-
probs = evaluator(
|
|
149
|
+
probs = evaluator(
|
|
150
|
+
model,
|
|
151
|
+
encode_kwargs=encode_kwargs,
|
|
152
|
+
num_proc=num_proc,
|
|
153
|
+
)
|
|
137
154
|
|
|
138
155
|
if prediction_folder:
|
|
139
156
|
self._save_task_predictions(
|
|
@@ -158,13 +175,14 @@ class AbsTaskZeroShotClassification(AbsTask):
|
|
|
158
175
|
accuracy=metrics.accuracy_score(labels, predictions),
|
|
159
176
|
)
|
|
160
177
|
|
|
161
|
-
def _push_dataset_to_hub(self, repo_name: str) -> None:
|
|
178
|
+
def _push_dataset_to_hub(self, repo_name: str, num_proc: int = 1) -> None:
|
|
162
179
|
self._upload_dataset_to_hub(
|
|
163
180
|
repo_name,
|
|
164
181
|
[
|
|
165
182
|
self.input_column_name,
|
|
166
183
|
self.label_column_name,
|
|
167
184
|
],
|
|
185
|
+
num_proc=num_proc,
|
|
168
186
|
)
|
|
169
187
|
labels_dataset = Dataset.from_dict({"labels": self.get_candidate_labels()})
|
|
170
188
|
labels_dataset.push_to_hub(repo_name, config_name="labels")
|
mteb/benchmarks/_create_table.py
CHANGED
|
@@ -1,13 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
import re
|
|
2
4
|
from collections import defaultdict
|
|
3
|
-
from typing import Literal
|
|
5
|
+
from typing import TYPE_CHECKING, Literal
|
|
4
6
|
|
|
5
7
|
import numpy as np
|
|
6
8
|
import pandas as pd
|
|
7
9
|
|
|
8
10
|
import mteb
|
|
9
11
|
from mteb.get_tasks import get_task, get_tasks
|
|
10
|
-
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from mteb.results.benchmark_results import BenchmarkResults
|
|
11
15
|
|
|
12
16
|
|
|
13
17
|
def _borda_count(scores: pd.Series) -> pd.Series:
|
|
@@ -115,7 +119,6 @@ def _create_summary_table_from_benchmark_results(
|
|
|
115
119
|
|
|
116
120
|
# Build joint table
|
|
117
121
|
joint_table = mean_per_type.copy()
|
|
118
|
-
joint_table = joint_table.drop(models_to_remove, axis=0)
|
|
119
122
|
joint_table.insert(0, "mean", overall_mean)
|
|
120
123
|
joint_table.insert(1, "mean_by_task_type", typed_mean)
|
|
121
124
|
joint_table["borda_rank"] = _get_borda_rank(per_task)
|
|
@@ -303,6 +306,7 @@ def _create_per_language_table_from_benchmark_results(
|
|
|
303
306
|
|
|
304
307
|
def _create_summary_table_mean_public_private(
|
|
305
308
|
benchmark_results: BenchmarkResults,
|
|
309
|
+
exclude_private_from_borda: bool = False,
|
|
306
310
|
) -> pd.DataFrame:
|
|
307
311
|
"""Create summary table from BenchmarkResults.
|
|
308
312
|
|
|
@@ -311,6 +315,7 @@ def _create_summary_table_mean_public_private(
|
|
|
311
315
|
|
|
312
316
|
Args:
|
|
313
317
|
benchmark_results: BenchmarkResults object containing model results
|
|
318
|
+
exclude_private_from_borda: If True, calculate Borda rank using only public tasks
|
|
314
319
|
|
|
315
320
|
Returns:
|
|
316
321
|
DataFrame with model summaries, ready for styling in the leaderboard
|
|
@@ -353,10 +358,13 @@ def _create_summary_table_mean_public_private(
|
|
|
353
358
|
|
|
354
359
|
# Build joint table
|
|
355
360
|
joint_table = mean_per_type.copy()
|
|
356
|
-
joint_table = joint_table.drop(models_to_remove, axis=0)
|
|
357
361
|
joint_table.insert(0, "mean(public)", public_mean)
|
|
358
362
|
joint_table.insert(1, "mean(private)", private_mean)
|
|
359
|
-
|
|
363
|
+
if exclude_private_from_borda:
|
|
364
|
+
borda_per_task = per_task[public_task_name]
|
|
365
|
+
else:
|
|
366
|
+
borda_per_task = per_task
|
|
367
|
+
joint_table["borda_rank"] = _get_borda_rank(borda_per_task)
|
|
360
368
|
joint_table = joint_table.sort_values("borda_rank", ascending=True)
|
|
361
369
|
joint_table = joint_table.reset_index()
|
|
362
370
|
|
|
@@ -476,7 +484,6 @@ def _create_summary_table_mean_subset(
|
|
|
476
484
|
|
|
477
485
|
# Build joint table
|
|
478
486
|
joint_table = mean_per_type.copy()
|
|
479
|
-
joint_table = joint_table.drop(models_to_remove, axis=0)
|
|
480
487
|
joint_table.insert(0, "mean(subset)", overall_subset_mean)
|
|
481
488
|
joint_table["borda_rank"] = _get_borda_rank(per_subset)
|
|
482
489
|
joint_table = joint_table.sort_values("mean(subset)", ascending=False)
|
|
@@ -595,7 +602,6 @@ def _create_summary_table_mean_task_type(
|
|
|
595
602
|
|
|
596
603
|
# Build joint table
|
|
597
604
|
joint_table = mean_per_type.copy()
|
|
598
|
-
joint_table = joint_table.drop(models_to_remove, axis=0)
|
|
599
605
|
joint_table.insert(0, "mean_by_task_type", typed_mean)
|
|
600
606
|
joint_table = joint_table.sort_values("mean_by_task_type", ascending=False)
|
|
601
607
|
joint_table["borda_rank"] = _get_borda_rank(per_task)
|
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from collections.abc import
|
|
3
|
+
from collections.abc import Iterator, Sequence
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from typing import TYPE_CHECKING, Literal
|
|
6
6
|
|
|
@@ -19,6 +19,7 @@ class Benchmark:
|
|
|
19
19
|
|
|
20
20
|
Args:
|
|
21
21
|
name: The name of the benchmark
|
|
22
|
+
aliases: Alternative names for the benchmark
|
|
22
23
|
tasks: The tasks within the benchmark.
|
|
23
24
|
description: A description of the benchmark, should include its intended goal and potentially a description of its construction
|
|
24
25
|
reference: A link reference, to a source containing additional information typically to a paper, leaderboard or github.
|
|
@@ -38,6 +39,7 @@ class Benchmark:
|
|
|
38
39
|
|
|
39
40
|
name: str
|
|
40
41
|
tasks: Sequence[AbsTask]
|
|
42
|
+
aliases: Sequence[str] = field(default_factory=tuple)
|
|
41
43
|
description: str | None = None
|
|
42
44
|
reference: StrURL | None = None
|
|
43
45
|
citation: str | None = None
|
|
@@ -47,7 +49,7 @@ class Benchmark:
|
|
|
47
49
|
display_name: str | None = None
|
|
48
50
|
language_view: list[str] | Literal["all"] = field(default_factory=list)
|
|
49
51
|
|
|
50
|
-
def __iter__(self) ->
|
|
52
|
+
def __iter__(self) -> Iterator[AbsTask]:
|
|
51
53
|
return iter(self.tasks)
|
|
52
54
|
|
|
53
55
|
def __len__(self) -> int:
|
|
@@ -121,9 +123,19 @@ class RtebBenchmark(Benchmark):
|
|
|
121
123
|
_create_summary_table_mean_public_private,
|
|
122
124
|
)
|
|
123
125
|
|
|
124
|
-
joint_table = _create_summary_table_mean_public_private(
|
|
126
|
+
joint_table = _create_summary_table_mean_public_private(
|
|
127
|
+
benchmark_results, exclude_private_from_borda=True
|
|
128
|
+
)
|
|
129
|
+
# issue 3902: temporary remove the private column from RTEB summary table
|
|
130
|
+
if "Mean (Private)" in joint_table.columns:
|
|
131
|
+
joint_table = joint_table.drop(columns=["Mean (Private)"])
|
|
125
132
|
# For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
|
|
133
|
+
# but due to 3902, if Private column existed, Mean (Task) was the mean of Public and Private so instead we drop Mean (Task) and rename Mean (Public) to Mean (Task)
|
|
126
134
|
joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
|
|
135
|
+
if "Mean (Task)" in joint_table.columns:
|
|
136
|
+
joint_table = joint_table.drop(columns=["Mean (Task)"])
|
|
137
|
+
joint_table = joint_table.rename(columns={"Mean (Public)": "Mean (Task)"})
|
|
138
|
+
|
|
127
139
|
return joint_table
|
|
128
140
|
|
|
129
141
|
|
|
@@ -3,9 +3,11 @@ from mteb.benchmarks.benchmarks.benchmarks import (
|
|
|
3
3
|
BEIR_NL,
|
|
4
4
|
BRIGHT,
|
|
5
5
|
BRIGHT_LONG,
|
|
6
|
+
BRIGHT_V1_1,
|
|
6
7
|
BUILT_MTEB,
|
|
7
8
|
C_MTEB,
|
|
8
9
|
CHEMTEB,
|
|
10
|
+
CHEMTEB_V1_1,
|
|
9
11
|
CODE_RAG,
|
|
10
12
|
ENCODECHKA,
|
|
11
13
|
FA_MTEB,
|
|
@@ -14,6 +16,7 @@ from mteb.benchmarks.benchmarks.benchmarks import (
|
|
|
14
16
|
JINA_VDR,
|
|
15
17
|
JMTEB_LITE_V1,
|
|
16
18
|
JMTEB_V2,
|
|
19
|
+
KOVIDORE_V2,
|
|
17
20
|
LONG_EMBED,
|
|
18
21
|
MIEB_ENG,
|
|
19
22
|
MIEB_IMG,
|
|
@@ -67,8 +70,10 @@ __all__ = [
|
|
|
67
70
|
"BEIR_NL",
|
|
68
71
|
"BRIGHT",
|
|
69
72
|
"BRIGHT_LONG",
|
|
73
|
+
"BRIGHT_V1_1",
|
|
70
74
|
"BUILT_MTEB",
|
|
71
75
|
"CHEMTEB",
|
|
76
|
+
"CHEMTEB_V1_1",
|
|
72
77
|
"CODE_RAG",
|
|
73
78
|
"C_MTEB",
|
|
74
79
|
"ENCODECHKA",
|
|
@@ -79,6 +84,7 @@ __all__ = [
|
|
|
79
84
|
"JINA_VDR",
|
|
80
85
|
"JMTEB_LITE_V1",
|
|
81
86
|
"JMTEB_V2",
|
|
87
|
+
"KOVIDORE_V2",
|
|
82
88
|
"LONG_EMBED",
|
|
83
89
|
"MIEB_ENG",
|
|
84
90
|
"MIEB_IMG",
|