mteb 2.6.4__py3-none-any.whl → 2.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/abstasks/classification.py +2 -3
- mteb/abstasks/multilabel_classification.py +3 -3
- mteb/abstasks/regression.py +1 -1
- mteb/abstasks/retrieval.py +1 -1
- mteb/abstasks/task_metadata.py +9 -14
- mteb/descriptive_stats/Retrieval/NanoClimateFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoDBPedia-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoFEVER-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoHotpotQA-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoMSMARCO-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/NanoNQ-VN.json +30 -0
- mteb/descriptive_stats/Retrieval/TVPLRetrieval.json +30 -0
- mteb/models/model_implementations/align_models.py +1 -1
- mteb/models/model_implementations/andersborges.py +2 -2
- mteb/models/model_implementations/ara_models.py +1 -1
- mteb/models/model_implementations/arctic_models.py +8 -8
- mteb/models/model_implementations/b1ade_models.py +1 -1
- mteb/models/model_implementations/bge_models.py +45 -21
- mteb/models/model_implementations/bica_model.py +3 -3
- mteb/models/model_implementations/blip2_models.py +2 -2
- mteb/models/model_implementations/blip_models.py +8 -8
- mteb/models/model_implementations/bmretriever_models.py +4 -4
- mteb/models/model_implementations/cadet_models.py +1 -1
- mteb/models/model_implementations/cde_models.py +2 -2
- mteb/models/model_implementations/clip_models.py +3 -3
- mteb/models/model_implementations/clips_models.py +3 -3
- mteb/models/model_implementations/codefuse_models.py +5 -5
- mteb/models/model_implementations/codesage_models.py +3 -3
- mteb/models/model_implementations/cohere_models.py +4 -4
- mteb/models/model_implementations/colpali_models.py +3 -3
- mteb/models/model_implementations/colqwen_models.py +8 -8
- mteb/models/model_implementations/colsmol_models.py +2 -2
- mteb/models/model_implementations/conan_models.py +1 -1
- mteb/models/model_implementations/dino_models.py +19 -19
- mteb/models/model_implementations/e5_instruct.py +23 -4
- mteb/models/model_implementations/e5_models.py +9 -9
- mteb/models/model_implementations/e5_v.py +1 -1
- mteb/models/model_implementations/eagerworks_models.py +1 -1
- mteb/models/model_implementations/emillykkejensen_models.py +3 -3
- mteb/models/model_implementations/en_code_retriever.py +1 -1
- mteb/models/model_implementations/euler_models.py +2 -2
- mteb/models/model_implementations/fa_models.py +9 -9
- mteb/models/model_implementations/facebookai.py +14 -2
- mteb/models/model_implementations/geogpt_models.py +1 -1
- mteb/models/model_implementations/gme_v_models.py +2 -2
- mteb/models/model_implementations/google_models.py +1 -1
- mteb/models/model_implementations/granite_vision_embedding_models.py +1 -1
- mteb/models/model_implementations/gritlm_models.py +2 -2
- mteb/models/model_implementations/gte_models.py +25 -13
- mteb/models/model_implementations/hinvec_models.py +1 -1
- mteb/models/model_implementations/ibm_granite_models.py +30 -6
- mteb/models/model_implementations/inf_models.py +2 -2
- mteb/models/model_implementations/jasper_models.py +2 -2
- mteb/models/model_implementations/jina_clip.py +1 -1
- mteb/models/model_implementations/jina_models.py +11 -5
- mteb/models/model_implementations/kblab.py +12 -6
- mteb/models/model_implementations/kennethenevoldsen_models.py +2 -2
- mteb/models/model_implementations/kfst.py +1 -1
- mteb/models/model_implementations/kowshik24_models.py +1 -1
- mteb/models/model_implementations/lgai_embedding_models.py +1 -1
- mteb/models/model_implementations/linq_models.py +1 -1
- mteb/models/model_implementations/listconranker.py +1 -1
- mteb/models/model_implementations/llm2clip_models.py +3 -3
- mteb/models/model_implementations/llm2vec_models.py +8 -8
- mteb/models/model_implementations/mdbr_models.py +14 -2
- mteb/models/model_implementations/misc_models.py +68 -68
- mteb/models/model_implementations/mme5_models.py +1 -1
- mteb/models/model_implementations/moco_models.py +2 -2
- mteb/models/model_implementations/mod_models.py +1 -1
- mteb/models/model_implementations/model2vec_models.py +13 -13
- mteb/models/model_implementations/moka_models.py +1 -1
- mteb/models/model_implementations/mxbai_models.py +16 -3
- mteb/models/model_implementations/nbailab.py +3 -3
- mteb/models/model_implementations/no_instruct_sentence_models.py +1 -1
- mteb/models/model_implementations/nomic_models.py +18 -6
- mteb/models/model_implementations/nomic_models_vision.py +1 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +2 -2
- mteb/models/model_implementations/nvidia_models.py +3 -3
- mteb/models/model_implementations/octen_models.py +3 -3
- mteb/models/model_implementations/openclip_models.py +6 -6
- mteb/models/model_implementations/opensearch_neural_sparse_models.py +5 -5
- mteb/models/model_implementations/ops_moa_models.py +1 -1
- mteb/models/model_implementations/ordalietech_solon_embeddings_mini_beta_1_1.py +1 -1
- mteb/models/model_implementations/pawan_models.py +1 -1
- mteb/models/model_implementations/piccolo_models.py +1 -1
- mteb/models/model_implementations/promptriever_models.py +4 -4
- mteb/models/model_implementations/pylate_models.py +5 -5
- mteb/models/model_implementations/qodo_models.py +2 -2
- mteb/models/model_implementations/qtack_models.py +1 -1
- mteb/models/model_implementations/qwen3_models.py +3 -3
- mteb/models/model_implementations/qzhou_models.py +2 -2
- mteb/models/model_implementations/rasgaard_models.py +1 -1
- mteb/models/model_implementations/reasonir_model.py +1 -1
- mteb/models/model_implementations/repllama_models.py +1 -1
- mteb/models/model_implementations/rerankers_custom.py +9 -3
- mteb/models/model_implementations/rerankers_monot5_based.py +14 -14
- mteb/models/model_implementations/richinfoai_models.py +1 -1
- mteb/models/model_implementations/ru_sentence_models.py +20 -20
- mteb/models/model_implementations/ruri_models.py +10 -10
- mteb/models/model_implementations/salesforce_models.py +3 -3
- mteb/models/model_implementations/samilpwc_models.py +1 -1
- mteb/models/model_implementations/sarashina_embedding_models.py +2 -2
- mteb/models/model_implementations/searchmap_models.py +1 -1
- mteb/models/model_implementations/sentence_transformers_models.py +58 -22
- mteb/models/model_implementations/shuu_model.py +1 -1
- mteb/models/model_implementations/siglip_models.py +10 -10
- mteb/models/model_implementations/slm_models.py +416 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +1 -1
- mteb/models/model_implementations/stella_models.py +17 -4
- mteb/models/model_implementations/tarka_models.py +2 -2
- mteb/models/model_implementations/text2vec_models.py +9 -3
- mteb/models/model_implementations/ua_sentence_models.py +1 -1
- mteb/models/model_implementations/uae_models.py +7 -1
- mteb/models/model_implementations/vdr_models.py +1 -1
- mteb/models/model_implementations/vi_vn_models.py +6 -6
- mteb/models/model_implementations/vlm2vec_models.py +2 -2
- mteb/models/model_implementations/youtu_models.py +1 -1
- mteb/models/model_implementations/yuan_models.py +1 -1
- mteb/models/model_implementations/yuan_models_en.py +1 -1
- mteb/models/model_meta.py +46 -17
- mteb/results/benchmark_results.py +2 -2
- mteb/tasks/classification/kur/kurdish_sentiment_classification.py +2 -2
- mteb/tasks/clustering/eng/hume_wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/eng/wiki_cities_clustering.py +1 -1
- mteb/tasks/clustering/zho/cmteb_clustering.py +2 -2
- mteb/tasks/reranking/multilingual/wikipedia_reranking_multilingual.py +1 -1
- mteb/tasks/retrieval/eng/cub200_i2i_retrieval.py +1 -1
- mteb/tasks/retrieval/vie/__init__.py +14 -6
- mteb/tasks/retrieval/vie/climate_fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/db_pedia_vn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/fevervn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/hotpot_qavn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/msmarcovn_retrieval.py +48 -0
- mteb/tasks/retrieval/vie/nqvn_retrieval.py +39 -0
- mteb/tasks/retrieval/vie/tvpl_retrieval.py +42 -0
- mteb/tasks/retrieval/vie/zac_legal_text_retrieval.py +15 -1
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/METADATA +3 -3
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/RECORD +142 -133
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/WHEEL +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/entry_points.txt +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.6.4.dist-info → mteb-2.6.6.dist-info}/top_level.txt +0 -0
mteb/models/model_meta.py
CHANGED
|
@@ -17,6 +17,7 @@ from huggingface_hub import (
|
|
|
17
17
|
get_safetensors_metadata,
|
|
18
18
|
hf_hub_download,
|
|
19
19
|
list_repo_commits,
|
|
20
|
+
model_info,
|
|
20
21
|
repo_exists,
|
|
21
22
|
)
|
|
22
23
|
from huggingface_hub.errors import (
|
|
@@ -56,6 +57,10 @@ FRAMEWORKS = Literal[
|
|
|
56
57
|
"PyLate",
|
|
57
58
|
"ColBERT",
|
|
58
59
|
"ColPali",
|
|
60
|
+
"GGUF",
|
|
61
|
+
"safetensors",
|
|
62
|
+
"ONNX",
|
|
63
|
+
"Transformers",
|
|
59
64
|
]
|
|
60
65
|
|
|
61
66
|
MODEL_TYPES = Literal["dense", "cross-encoder", "late-interaction"]
|
|
@@ -82,9 +87,6 @@ def _get_loader_name(
|
|
|
82
87
|
return loader.__name__
|
|
83
88
|
|
|
84
89
|
|
|
85
|
-
_SENTENCE_TRANSFORMER_LIB_NAME: FRAMEWORKS = "Sentence Transformers"
|
|
86
|
-
|
|
87
|
-
|
|
88
90
|
class ModelMeta(BaseModel):
|
|
89
91
|
"""The model metadata object.
|
|
90
92
|
|
|
@@ -319,14 +321,10 @@ class ModelMeta(BaseModel):
|
|
|
319
321
|
model_config = None
|
|
320
322
|
logger.warning(f"Can't get configuration for {model_name}. Error: {e}")
|
|
321
323
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
)
|
|
325
|
-
|
|
326
|
-
else:
|
|
327
|
-
msg = "Model library not recognized, defaulting to Sentence Transformers loader."
|
|
328
|
-
logger.warning(msg)
|
|
329
|
-
warnings.warn(msg)
|
|
324
|
+
hf_frameworks = (
|
|
325
|
+
cls._get_frameworks_from_hf_tags(model_name) if model_name else []
|
|
326
|
+
)
|
|
327
|
+
frameworks.extend(hf_frameworks)
|
|
330
328
|
|
|
331
329
|
if revision is None:
|
|
332
330
|
revisions = _get_repo_commits(model_name, "model")
|
|
@@ -386,8 +384,6 @@ class ModelMeta(BaseModel):
|
|
|
386
384
|
else model.model_card_data.base_model
|
|
387
385
|
)
|
|
388
386
|
meta = cls._from_hub(name, revision, compute_metadata)
|
|
389
|
-
if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
|
|
390
|
-
meta.framework.append("Sentence Transformers")
|
|
391
387
|
meta.revision = model.model_card_data.base_model_revision or meta.revision
|
|
392
388
|
meta.max_tokens = model.max_seq_length
|
|
393
389
|
meta.embed_dim = model.get_sentence_embedding_dimension()
|
|
@@ -413,8 +409,6 @@ class ModelMeta(BaseModel):
|
|
|
413
409
|
The generated ModelMeta.
|
|
414
410
|
"""
|
|
415
411
|
meta = cls._from_hub(model, revision, compute_metadata)
|
|
416
|
-
if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
|
|
417
|
-
meta.framework.append("Sentence Transformers")
|
|
418
412
|
meta.modalities = ["text"]
|
|
419
413
|
|
|
420
414
|
if model and compute_metadata and _repo_exists(model):
|
|
@@ -461,8 +455,6 @@ class ModelMeta(BaseModel):
|
|
|
461
455
|
from mteb.models import CrossEncoderWrapper
|
|
462
456
|
|
|
463
457
|
meta = cls._from_hub(model.model.name_or_path, revision, compute_metadata)
|
|
464
|
-
if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
|
|
465
|
-
meta.framework.append("Sentence Transformers")
|
|
466
458
|
meta.revision = model.config._commit_hash or meta.revision
|
|
467
459
|
meta.loader = CrossEncoderWrapper
|
|
468
460
|
meta.embed_dim = None
|
|
@@ -644,6 +636,43 @@ class ModelMeta(BaseModel):
|
|
|
644
636
|
return release_date
|
|
645
637
|
return None
|
|
646
638
|
|
|
639
|
+
@staticmethod
|
|
640
|
+
def _get_frameworks_from_hf_tags(model_name: str) -> list[FRAMEWORKS]:
|
|
641
|
+
"""Extract frameworks supported by the model from HuggingFace model tags.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
model_name: HuggingFace model name
|
|
645
|
+
|
|
646
|
+
Returns:
|
|
647
|
+
List of framework names found in tags. Defaults to empty list if no frameworks found.
|
|
648
|
+
"""
|
|
649
|
+
try:
|
|
650
|
+
info = model_info(model_name)
|
|
651
|
+
if not info.tags:
|
|
652
|
+
return []
|
|
653
|
+
except Exception as e:
|
|
654
|
+
logger.warning(
|
|
655
|
+
f"Failed to fetch frameworks from HuggingFace tags for {model_name}: {e}"
|
|
656
|
+
)
|
|
657
|
+
return []
|
|
658
|
+
|
|
659
|
+
# Mapping from HuggingFace tags to MTEB framework names
|
|
660
|
+
tag_to_framework: dict[str, FRAMEWORKS] = {
|
|
661
|
+
"sentence-transformers": "Sentence Transformers",
|
|
662
|
+
"transformers": "Transformers",
|
|
663
|
+
"onnx": "ONNX",
|
|
664
|
+
"safetensors": "safetensors",
|
|
665
|
+
"gguf": "GGUF",
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
frameworks: list[FRAMEWORKS] = []
|
|
669
|
+
|
|
670
|
+
for framework_tag in tag_to_framework.keys():
|
|
671
|
+
if framework_tag in info.tags:
|
|
672
|
+
frameworks.append(tag_to_framework[framework_tag])
|
|
673
|
+
|
|
674
|
+
return frameworks
|
|
675
|
+
|
|
647
676
|
def to_python(self) -> str:
|
|
648
677
|
"""Returns a string representation of the model."""
|
|
649
678
|
return _pydantic_instance_to_code(self)
|
|
@@ -432,11 +432,11 @@ class BenchmarkResults(BaseModel):
|
|
|
432
432
|
out_file.write(self.model_dump_json(indent=2))
|
|
433
433
|
|
|
434
434
|
@classmethod
|
|
435
|
-
def from_validated(cls, **data) -> BenchmarkResults:
|
|
435
|
+
def from_validated(cls, **data: Any) -> BenchmarkResults:
|
|
436
436
|
"""Create BenchmarkResults from validated data.
|
|
437
437
|
|
|
438
438
|
Args:
|
|
439
|
-
data:
|
|
439
|
+
**data: Arbitrary keyword arguments containing the data.
|
|
440
440
|
|
|
441
441
|
Returns:
|
|
442
442
|
An instance of BenchmarkResults.
|
|
@@ -25,7 +25,7 @@ class KurdishSentimentClassification(AbsTaskClassification):
|
|
|
25
25
|
dialect=["Sorani"],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@article{
|
|
28
|
+
@article{badawi2024kurdisent,
|
|
29
29
|
author = {Badawi, Soran and Kazemi, Arefeh and Rezaie, Vali},
|
|
30
30
|
doi = {10.1007/s10579-023-09716-6},
|
|
31
31
|
journal = {Language Resources and Evaluation},
|
|
@@ -62,7 +62,7 @@ class KurdishSentimentClassificationV2(AbsTaskClassification):
|
|
|
62
62
|
dialect=["Sorani"],
|
|
63
63
|
sample_creation="found",
|
|
64
64
|
bibtex_citation=r"""
|
|
65
|
-
@article{
|
|
65
|
+
@article{badawi2024kurdisent,
|
|
66
66
|
author = {Badawi, Soran and Kazemi, Arefeh and Rezaie, Vali},
|
|
67
67
|
doi = {10.1007/s10579-023-09716-6},
|
|
68
68
|
journal = {Language Resources and Evaluation},
|
|
@@ -25,7 +25,7 @@ class HUMEWikiCitiesClustering(AbsTaskClusteringLegacy):
|
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@online{
|
|
28
|
+
@online{wikidump2024,
|
|
29
29
|
author = {Wikimedia Foundation},
|
|
30
30
|
title = {Wikimedia Downloads},
|
|
31
31
|
url = {https://dumps.wikimedia.org},
|
|
@@ -25,7 +25,7 @@ class WikiCitiesClustering(AbsTaskClusteringLegacy):
|
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@online{
|
|
28
|
+
@online{wikidump2024,
|
|
29
29
|
author = {Wikimedia Foundation},
|
|
30
30
|
title = {Wikimedia Downloads},
|
|
31
31
|
url = {https://dumps.wikimedia.org},
|
|
@@ -226,7 +226,7 @@ class ThuNewsClusteringFastS2S(AbsTaskClustering):
|
|
|
226
226
|
dialect=[],
|
|
227
227
|
sample_creation="found",
|
|
228
228
|
bibtex_citation=r"""
|
|
229
|
-
@software{
|
|
229
|
+
@software{sun2016thuctc,
|
|
230
230
|
author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
|
|
231
231
|
note = {THU Chinese Text Classification Toolkit},
|
|
232
232
|
publisher = {THU Natural Language Processing Lab},
|
|
@@ -285,7 +285,7 @@ class ThuNewsClusteringFastP2P(AbsTaskClustering):
|
|
|
285
285
|
dialect=[],
|
|
286
286
|
sample_creation="found",
|
|
287
287
|
bibtex_citation=r"""
|
|
288
|
-
@software{
|
|
288
|
+
@software{sun2016thuctc,
|
|
289
289
|
author = {Sun, M. and Li, J. and Guo, Z. and Yu, Z. and Zheng, Y. and Si, X. and Liu, Z.},
|
|
290
290
|
note = {THU Chinese Text Classification Toolkit},
|
|
291
291
|
publisher = {THU Natural Language Processing Lab},
|
|
@@ -44,7 +44,7 @@ class WikipediaRerankingMultilingual(AbsTaskRetrieval):
|
|
|
44
44
|
dialect=[],
|
|
45
45
|
sample_creation="LM-generated and verified",
|
|
46
46
|
bibtex_citation=r"""
|
|
47
|
-
@online{
|
|
47
|
+
@online{wikidump2024,
|
|
48
48
|
author = {Wikimedia Foundation},
|
|
49
49
|
title = {Wikimedia Downloads},
|
|
50
50
|
url = {https://dumps.wikimedia.org},
|
|
@@ -25,7 +25,7 @@ class CUB200I2I(AbsTaskRetrieval):
|
|
|
25
25
|
modalities=["image"],
|
|
26
26
|
sample_creation="created",
|
|
27
27
|
bibtex_citation=r"""
|
|
28
|
-
@article{
|
|
28
|
+
@article{welinder2010caltech,
|
|
29
29
|
author = {Welinder, Peter and Branson, Steve and Mita, Takeshi and Wah, Catherine and Schroff, Florian and Belongie, Serge and Perona, Pietro},
|
|
30
30
|
month = {09},
|
|
31
31
|
pages = {},
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from .argu_ana_vn_retrieval import ArguAnaVN
|
|
2
|
-
from .climate_fevervn_retrieval import ClimateFEVERVN
|
|
2
|
+
from .climate_fevervn_retrieval import ClimateFEVERVN, NanoClimateFEVERVN
|
|
3
3
|
from .cqa_dupstack_android_vn_retrieval import CQADupstackAndroidVN
|
|
4
4
|
from .cqa_dupstack_gis_vn_retrieval import CQADupstackGisVN
|
|
5
5
|
from .cqa_dupstack_mathematica_vn_retrieval import CQADupstackMathematicaVN
|
|
@@ -10,19 +10,20 @@ from .cqa_dupstack_tex_vn_retrieval import CQADupstackTexVN
|
|
|
10
10
|
from .cqa_dupstack_unix_vn_retrieval import CQADupstackUnixVN
|
|
11
11
|
from .cqa_dupstack_webmasters_vn_retrieval import CQADupstackWebmastersVN
|
|
12
12
|
from .cqa_dupstack_wordpress_vn_retrieval import CQADupstackWordpressVN
|
|
13
|
-
from .db_pedia_vn_retrieval import DBPediaVN
|
|
14
|
-
from .fevervn_retrieval import FEVERVN
|
|
13
|
+
from .db_pedia_vn_retrieval import DBPediaVN, NanoDBPediaVN
|
|
14
|
+
from .fevervn_retrieval import FEVERVN, NanoFEVERVN
|
|
15
15
|
from .fi_qa2018_vn_retrieval import FiQA2018VN
|
|
16
16
|
from .green_node_table_markdown_retrieval import GreenNodeTableMarkdownRetrieval
|
|
17
|
-
from .hotpot_qavn_retrieval import HotpotQAVN
|
|
18
|
-
from .msmarcovn_retrieval import MSMARCOVN
|
|
17
|
+
from .hotpot_qavn_retrieval import HotpotQAVN, NanoHotpotQAVN
|
|
18
|
+
from .msmarcovn_retrieval import MSMARCOVN, NanoMSMARCOVN
|
|
19
19
|
from .nf_corpus_vn_retrieval import NFCorpusVN
|
|
20
|
-
from .nqvn_retrieval import NQVN
|
|
20
|
+
from .nqvn_retrieval import NQVN, NanoNQVN
|
|
21
21
|
from .quora_vn_retrieval import QuoraVN
|
|
22
22
|
from .sci_fact_vn_retrieval import SciFactVN
|
|
23
23
|
from .scidocsvn_retrieval import SCIDOCSVN
|
|
24
24
|
from .touche2020_vn_retrieval import Touche2020VN
|
|
25
25
|
from .treccovidvn_retrieval import TRECCOVIDVN
|
|
26
|
+
from .tvpl_retrieval import TVPLRetrieval
|
|
26
27
|
from .vie_qu_ad_retrieval import VieQuADRetrieval
|
|
27
28
|
from .zac_legal_text_retrieval import ZacLegalTextRetrieval
|
|
28
29
|
|
|
@@ -49,8 +50,15 @@ __all__ = [
|
|
|
49
50
|
"GreenNodeTableMarkdownRetrieval",
|
|
50
51
|
"HotpotQAVN",
|
|
51
52
|
"NFCorpusVN",
|
|
53
|
+
"NanoClimateFEVERVN",
|
|
54
|
+
"NanoDBPediaVN",
|
|
55
|
+
"NanoFEVERVN",
|
|
56
|
+
"NanoHotpotQAVN",
|
|
57
|
+
"NanoMSMARCOVN",
|
|
58
|
+
"NanoNQVN",
|
|
52
59
|
"QuoraVN",
|
|
53
60
|
"SciFactVN",
|
|
61
|
+
"TVPLRetrieval",
|
|
54
62
|
"Touche2020VN",
|
|
55
63
|
"VieQuADRetrieval",
|
|
56
64
|
"ZacLegalTextRetrieval",
|
|
@@ -36,3 +36,42 @@ class ClimateFEVERVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["ClimateFEVER"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoClimateFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoClimateFEVER-VN",
|
|
44
|
+
description="NanoClimateFEVERVN is a small version of A translated dataset from CLIMATE-FEVER is a dataset adopting the FEVER methodology that consists of 1,535 real-world claims regarding climate-change. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://www.sustainablefinance.uzh.ch/en/research/climate-fever.html",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-climate-fever-vn",
|
|
48
|
+
"revision": "1852e852f07403d4529a8520d52b91ff6d57869b",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim about climate change, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["ClimateFEVER-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class DBPediaVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["DBPedia"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoDBPediaVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoDBPedia-VN",
|
|
44
|
+
description="NanoDBPediaVN is a small version of A translated dataset from DBpedia-Entity is a standard test collection for entity search over the DBpedia knowledge base The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
45
|
+
reference="https://github.com/iai-group/DBpedia-Entity/",
|
|
46
|
+
dataset={
|
|
47
|
+
"path": "GreenNode/nano-dbpedia-vn",
|
|
48
|
+
"revision": "bbc3259bc63bf1e250d7034024092cc3230d5850",
|
|
49
|
+
},
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Written", "Encyclopaedic"],
|
|
61
|
+
task_subtypes=[],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a query, retrieve relevant entity descriptions from DBPedia"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["DBPedia-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class FEVERVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["FEVER"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoFEVERVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoFEVER-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-fever-vn",
|
|
46
|
+
"revision": "457ca6b058ed19b28f2359e2d816d7527af6bef8",
|
|
47
|
+
},
|
|
48
|
+
description="NanoFEVERVN is a small version of A translated dataset from FEVER (Fact Extraction and VERification) consists of 185,445 claims generated by altering sentences extracted from Wikipedia and subsequently verified without knowledge of the sentence they were derived from. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://fever.ai/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Encyclopaedic", "Written"],
|
|
61
|
+
task_subtypes=["Claim verification"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a claim, retrieve documents that support or refute the claim"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["FEVER-VN"],
|
|
77
|
+
)
|
|
@@ -36,3 +36,42 @@ class HotpotQAVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["HotpotQA"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoHotpotQAVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoHotpotQA-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-hotpotqa-vn",
|
|
46
|
+
"revision": "f4de19a2fae1a582de114e5bcd178bb262183113",
|
|
47
|
+
},
|
|
48
|
+
description="NanoHotpotQAVN is a small version of A translated dataset from HotpotQA is a question answering dataset featuring natural, multi-hop questions, with strong supervision for supporting facts to enable more explainable question answering systems. The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://hotpotqa.github.io/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Web", "Written"],
|
|
61
|
+
task_subtypes=["Question answering"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a multi-hop question, retrieve documents that can help answer the question"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["HotpotQA-VN"],
|
|
77
|
+
)
|
|
@@ -47,3 +47,51 @@ class MSMARCOVN(AbsTaskRetrieval):
|
|
|
47
47
|
""",
|
|
48
48
|
adapted_from=["MSMARCO"],
|
|
49
49
|
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class NanoMSMARCOVN(AbsTaskRetrieval):
|
|
53
|
+
metadata = TaskMetadata(
|
|
54
|
+
name="NanoMSMARCO-VN",
|
|
55
|
+
dataset={
|
|
56
|
+
"path": "GreenNode/nano-msmarco-vn",
|
|
57
|
+
"revision": "f149369c82ec228b05b0f6677699ab4bfbab73f6",
|
|
58
|
+
},
|
|
59
|
+
description="NanoMSMARCOVN is a small version of A translated dataset from MS MARCO is a collection of datasets focused on deep learning in search The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
60
|
+
reference="https://microsoft.github.io/msmarco/",
|
|
61
|
+
type="Retrieval",
|
|
62
|
+
category="t2t",
|
|
63
|
+
eval_splits=["dev"],
|
|
64
|
+
eval_langs=["vie-Latn"],
|
|
65
|
+
main_score="ndcg_at_10",
|
|
66
|
+
date=("2025-07-29", "2025-07-30"),
|
|
67
|
+
license="cc-by-sa-4.0",
|
|
68
|
+
annotations_creators="derived",
|
|
69
|
+
dialect=[],
|
|
70
|
+
sample_creation="machine-translated and LM verified",
|
|
71
|
+
domains=[
|
|
72
|
+
"Encyclopaedic",
|
|
73
|
+
"Academic",
|
|
74
|
+
"Blog",
|
|
75
|
+
"News",
|
|
76
|
+
"Medical",
|
|
77
|
+
"Government",
|
|
78
|
+
"Reviews",
|
|
79
|
+
"Non-fiction",
|
|
80
|
+
"Social",
|
|
81
|
+
"Web",
|
|
82
|
+
],
|
|
83
|
+
task_subtypes=["Question answering"],
|
|
84
|
+
bibtex_citation=r"""
|
|
85
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
86
|
+
archiveprefix = {arXiv},
|
|
87
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
88
|
+
eprint = {2507.21500},
|
|
89
|
+
primaryclass = {cs.CL},
|
|
90
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
91
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
92
|
+
year = {2025},
|
|
93
|
+
}
|
|
94
|
+
""",
|
|
95
|
+
prompt={"query": "Given a query, retrieve relevant documents from MS MARCO-VN"},
|
|
96
|
+
adapted_from=["MSMARCO-VN"],
|
|
97
|
+
)
|
|
@@ -36,3 +36,42 @@ class NQVN(AbsTaskRetrieval):
|
|
|
36
36
|
""",
|
|
37
37
|
adapted_from=["NQ"],
|
|
38
38
|
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class NanoNQVN(AbsTaskRetrieval):
|
|
42
|
+
metadata = TaskMetadata(
|
|
43
|
+
name="NanoNQ-VN",
|
|
44
|
+
dataset={
|
|
45
|
+
"path": "GreenNode/nano-nq-vn",
|
|
46
|
+
"revision": "1ad4d6556fe0e5314994839089ce070fb0db8b19",
|
|
47
|
+
},
|
|
48
|
+
description="NanoNQVN is a small version of A translated dataset from NFCorpus: A Full-Text Learning to Rank Dataset for Medical Information Retrieval The process of creating the VN-MTEB (Vietnamese Massive Text Embedding Benchmark) from English samples involves a new automated system: - The system uses large language models (LLMs), specifically Coherence's Aya model, for translation. - Applies advanced embedding models to filter the translations. - Use LLM-as-a-judge to scoring the quality of the samples base on multiple criteria.",
|
|
49
|
+
reference="https://ai.google.com/research/NaturalQuestions/",
|
|
50
|
+
type="Retrieval",
|
|
51
|
+
category="t2t",
|
|
52
|
+
eval_splits=["test"],
|
|
53
|
+
eval_langs=["vie-Latn"],
|
|
54
|
+
main_score="ndcg_at_10",
|
|
55
|
+
date=("2025-07-29", "2025-07-30"),
|
|
56
|
+
license="cc-by-sa-4.0",
|
|
57
|
+
annotations_creators="derived",
|
|
58
|
+
dialect=[],
|
|
59
|
+
sample_creation="machine-translated and LM verified",
|
|
60
|
+
domains=["Written", "Encyclopaedic"],
|
|
61
|
+
task_subtypes=["Question answering"],
|
|
62
|
+
bibtex_citation=r"""
|
|
63
|
+
@misc{pham2025vnmtebvietnamesemassivetext,
|
|
64
|
+
archiveprefix = {arXiv},
|
|
65
|
+
author = {Loc Pham and Tung Luu and Thu Vo and Minh Nguyen and Viet Hoang},
|
|
66
|
+
eprint = {2507.21500},
|
|
67
|
+
primaryclass = {cs.CL},
|
|
68
|
+
title = {VN-MTEB: Vietnamese Massive Text Embedding Benchmark},
|
|
69
|
+
url = {https://arxiv.org/abs/2507.21500},
|
|
70
|
+
year = {2025},
|
|
71
|
+
}
|
|
72
|
+
""",
|
|
73
|
+
prompt={
|
|
74
|
+
"query": "Given a question, retrieve Wikipedia passages that answer the question"
|
|
75
|
+
},
|
|
76
|
+
adapted_from=["NQ-VN"],
|
|
77
|
+
)
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from mteb.abstasks.retrieval import AbsTaskRetrieval
|
|
2
|
+
from mteb.abstasks.task_metadata import TaskMetadata
|
|
3
|
+
|
|
4
|
+
TEST_SAMPLES = 2048
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TVPLRetrieval(AbsTaskRetrieval):
|
|
8
|
+
metadata = TaskMetadata(
|
|
9
|
+
name="TVPLRetrieval",
|
|
10
|
+
description="A Vietnamese dataset for evaluating legal text retrieval. From Thu vien phap luat (TVPL) dataset: Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models.",
|
|
11
|
+
reference="https://aclanthology.org/2020.coling-main.233.pdf",
|
|
12
|
+
dataset={
|
|
13
|
+
"path": "GreenNode/TVPL-Retrieval-VN",
|
|
14
|
+
"revision": "6661dba4dfedff606537732d9f35f2c3738b081a",
|
|
15
|
+
},
|
|
16
|
+
type="Retrieval",
|
|
17
|
+
category="t2t",
|
|
18
|
+
modalities=["text"],
|
|
19
|
+
eval_splits=["test"],
|
|
20
|
+
eval_langs=["vie-Latn"],
|
|
21
|
+
main_score="ndcg_at_10",
|
|
22
|
+
date=("2025-07-29", "2025-07-30"),
|
|
23
|
+
license="cc-by-sa-4.0",
|
|
24
|
+
dialect=[],
|
|
25
|
+
annotations_creators="human-annotated",
|
|
26
|
+
domains=["Legal"],
|
|
27
|
+
task_subtypes=["Question answering"],
|
|
28
|
+
sample_creation="found",
|
|
29
|
+
bibtex_citation=r"""
|
|
30
|
+
@article{10.1145/3732938,
|
|
31
|
+
address = {New York, NY, USA},
|
|
32
|
+
author = {Le, Huong and Luu, Ngoc and Nguyen, Thanh and Dao, Tuan and Dinh, Sang},
|
|
33
|
+
doi = {10.1145/3732938},
|
|
34
|
+
issn = {2375-4699},
|
|
35
|
+
journal = {ACM Trans. Asian Low-Resour. Lang. Inf. Process.},
|
|
36
|
+
publisher = {Association for Computing Machinery},
|
|
37
|
+
title = {Optimizing Answer Generator in Vietnamese Legal Question Answering Systems Using Language Models},
|
|
38
|
+
url = {https://doi.org/10.1145/3732938},
|
|
39
|
+
year = {2025},
|
|
40
|
+
}
|
|
41
|
+
""",
|
|
42
|
+
)
|
|
@@ -24,5 +24,19 @@ class ZacLegalTextRetrieval(AbsTaskRetrieval):
|
|
|
24
24
|
annotations_creators="human-annotated",
|
|
25
25
|
dialect=[],
|
|
26
26
|
sample_creation="found",
|
|
27
|
-
bibtex_citation=""
|
|
27
|
+
bibtex_citation=r"""
|
|
28
|
+
@inproceedings{10.1007/978-981-95-1746-6_17,
|
|
29
|
+
address = {Singapore},
|
|
30
|
+
author = {Pham, Bao Loc
|
|
31
|
+
and Hoang, Quoc Viet
|
|
32
|
+
and Luu, Quy Tung
|
|
33
|
+
and Vo, Trong Thu},
|
|
34
|
+
booktitle = {Proceedings of the Fifth International Conference on Intelligent Systems and Networks},
|
|
35
|
+
isbn = {978-981-95-1746-6},
|
|
36
|
+
pages = {153--163},
|
|
37
|
+
publisher = {Springer Nature Singapore},
|
|
38
|
+
title = {GN-TRVN: A Benchmark for Vietnamese Table Markdown Retrieval Task},
|
|
39
|
+
year = {2026},
|
|
40
|
+
}
|
|
41
|
+
""",
|
|
28
42
|
)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: mteb
|
|
3
|
-
Version: 2.6.
|
|
3
|
+
Version: 2.6.6
|
|
4
4
|
Summary: Massive Text Embedding Benchmark
|
|
5
5
|
Author-email: MTEB Contributors <niklas@huggingface.co>, Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Nouamane Tazi <nouamane@huggingface.co>, Nils Reimers <info@nils-reimers.de>
|
|
6
6
|
Maintainer-email: Kenneth Enevoldsen <kenneth.enevoldsen@cas.au.dk>, Roman Solomatin <risolomatin@gmail.com>, Isaac Chung <chungisaac1217@gmail.com>
|
|
@@ -32,8 +32,6 @@ Requires-Dist: rich>=0.0.0
|
|
|
32
32
|
Requires-Dist: pytrec-eval-terrier>=0.5.6
|
|
33
33
|
Requires-Dist: pydantic>=2.0.0
|
|
34
34
|
Requires-Dist: polars>=0.20.22
|
|
35
|
-
Requires-Dist: torch<2.9.0; python_full_version < "3.14"
|
|
36
|
-
Requires-Dist: torch>=2.9.0; python_full_version >= "3.14"
|
|
37
35
|
Provides-Extra: image
|
|
38
36
|
Requires-Dist: torchvision>0.2.1; extra == "image"
|
|
39
37
|
Requires-Dist: transformers[torch-vision,vision]; extra == "image"
|
|
@@ -97,6 +95,8 @@ Requires-Dist: colpali_engine>=0.3.12; python_full_version < "3.14" and extra ==
|
|
|
97
95
|
Provides-Extra: colqwen3
|
|
98
96
|
Requires-Dist: transformers>=4.57; extra == "colqwen3"
|
|
99
97
|
Requires-Dist: torchvision>=0.22.1; extra == "colqwen3"
|
|
98
|
+
Provides-Extra: sauerkrautlm-colpali
|
|
99
|
+
Requires-Dist: sauerkrautlm-colpali>=0.1.0; python_full_version < "3.14" and extra == "sauerkrautlm-colpali"
|
|
100
100
|
Provides-Extra: xet
|
|
101
101
|
Requires-Dist: huggingface_hub>=0.32.0; extra == "xet"
|
|
102
102
|
Provides-Extra: youtu
|