mteb 2.3.10__py3-none-any.whl → 2.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. mteb/_evaluators/image/imagetext_pairclassification_evaluator.py +7 -2
  2. mteb/abstasks/_statistics_calculation.py +6 -2
  3. mteb/abstasks/classification.py +0 -2
  4. mteb/benchmarks/benchmarks/__init__.py +2 -0
  5. mteb/benchmarks/benchmarks/benchmarks.py +57 -0
  6. mteb/deprecated_evaluator.py +8 -13
  7. mteb/descriptive_stats/Reranking/JQaRARerankingLite.json +35 -0
  8. mteb/descriptive_stats/Reranking/JaCWIRRerankingLite.json +35 -0
  9. mteb/descriptive_stats/Retrieval/JaCWIRRetrievalLite.json +30 -0
  10. mteb/descriptive_stats/Retrieval/JaqketRetrievalLite.json +30 -0
  11. mteb/descriptive_stats/Retrieval/MIRACLJaRetrievalLite.json +30 -0
  12. mteb/descriptive_stats/Retrieval/MrTyDiJaRetrievalLite.json +30 -0
  13. mteb/evaluate.py +2 -33
  14. mteb/leaderboard/figures.py +1 -1
  15. mteb/leaderboard/table.py +1 -11
  16. mteb/models/abs_encoder.py +21 -17
  17. mteb/models/cache_wrappers/cache_backends/_hash_utils.py +2 -2
  18. mteb/models/get_model_meta.py +3 -123
  19. mteb/models/instruct_wrapper.py +2 -1
  20. mteb/models/model_implementations/bica_model.py +34 -0
  21. mteb/models/model_implementations/colpali_models.py +7 -2
  22. mteb/models/model_implementations/colqwen_models.py +1 -1
  23. mteb/models/model_implementations/gme_v_models.py +9 -5
  24. mteb/models/model_implementations/google_models.py +10 -0
  25. mteb/models/model_implementations/granite_vision_embedding_models.py +6 -2
  26. mteb/models/model_implementations/jasper_models.py +2 -2
  27. mteb/models/model_implementations/jina_models.py +1 -1
  28. mteb/models/model_implementations/mod_models.py +204 -0
  29. mteb/models/model_implementations/nomic_models.py +142 -4
  30. mteb/models/model_implementations/nomic_models_vision.py +6 -2
  31. mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +6 -2
  32. mteb/models/model_implementations/pylate_models.py +1 -4
  33. mteb/models/model_implementations/random_baseline.py +6 -2
  34. mteb/models/model_implementations/seed_1_6_embedding_models.py +7 -2
  35. mteb/models/model_implementations/voyage_v.py +6 -2
  36. mteb/models/model_meta.py +396 -19
  37. mteb/models/sentence_transformer_wrapper.py +2 -7
  38. mteb/tasks/reranking/jpn/__init__.py +9 -1
  39. mteb/tasks/reranking/jpn/j_qa_ra_reranking_lite.py +49 -0
  40. mteb/tasks/reranking/jpn/ja_cwir_reranking_lite.py +47 -0
  41. mteb/tasks/retrieval/code/fresh_stack_retrieval.py +8 -5
  42. mteb/tasks/retrieval/jpn/__init__.py +8 -0
  43. mteb/tasks/retrieval/jpn/ja_cwir_retrieval_lite.py +47 -0
  44. mteb/tasks/retrieval/jpn/jaqket_retrieval_lite.py +50 -0
  45. mteb/tasks/retrieval/jpn/miracl_ja_retrieval_lite.py +52 -0
  46. mteb/tasks/retrieval/jpn/mr_tydi_ja_retrieval_lite.py +48 -0
  47. mteb/types/_encoder_io.py +7 -2
  48. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/METADATA +2 -1
  49. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/RECORD +53 -39
  50. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/WHEEL +0 -0
  51. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/entry_points.txt +0 -0
  52. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/licenses/LICENSE +0 -0
  53. {mteb-2.3.10.dist-info → mteb-2.4.1.dist-info}/top_level.txt +0 -0
mteb/models/model_meta.py CHANGED
@@ -1,25 +1,46 @@
1
+ from __future__ import annotations
2
+
3
+ import json
1
4
  import logging
5
+ import warnings
2
6
  from collections.abc import Callable, Sequence
3
7
  from dataclasses import field
4
8
  from enum import Enum
9
+ from functools import partial
10
+ from pathlib import Path
5
11
  from typing import TYPE_CHECKING, Any, Literal, cast
6
12
 
7
- from huggingface_hub import get_safetensors_metadata
13
+ from huggingface_hub import (
14
+ GitCommitInfo,
15
+ ModelCard,
16
+ ModelCardData,
17
+ get_safetensors_metadata,
18
+ hf_hub_download,
19
+ list_repo_commits,
20
+ repo_exists,
21
+ )
8
22
  from huggingface_hub.errors import (
23
+ EntryNotFoundError,
9
24
  GatedRepoError,
10
25
  NotASafetensorsRepoError,
26
+ RepositoryNotFoundError,
11
27
  SafetensorsParsingError,
12
28
  )
13
29
  from pydantic import BaseModel, ConfigDict, field_validator
30
+ from transformers import AutoConfig
31
+ from typing_extensions import Self
14
32
 
33
+ from mteb._helpful_enum import HelpfulStrEnum
15
34
  from mteb.languages import check_language_code
35
+ from mteb.models.models_protocols import EncoderProtocol, MTEBModels
16
36
  from mteb.types import ISOLanguageScript, Licenses, Modalities, StrDate, StrURL
17
37
 
18
- from .models_protocols import EncoderProtocol, MTEBModels
19
-
20
38
  if TYPE_CHECKING:
39
+ from sentence_transformers import CrossEncoder, SentenceTransformer
40
+
21
41
  from mteb.abstasks import AbsTask
22
42
 
43
+
23
44
  logger = logging.getLogger(__name__)
24
45
 
25
46
  FRAMEWORKS = Literal[
@@ -37,7 +58,7 @@ FRAMEWORKS = Literal[
37
58
  ]
38
59
 
39
60
 
40
- class ScoringFunction(str, Enum):
61
+ class ScoringFunction(HelpfulStrEnum):
41
62
  """The scoring function used by the models."""
42
63
 
43
64
  COSINE = "cosine"
@@ -58,6 +79,9 @@ def _get_loader_name(
58
79
  return loader.__name__
59
80
 
60
81
 
82
+ _SENTENCE_TRANSFORMER_LIB_NAME = "Sentence Transformers"
83
+
84
+
61
85
  class ModelMeta(BaseModel):
62
86
  """The model metadata object.
63
87
 
@@ -72,7 +96,7 @@ class ModelMeta(BaseModel):
72
96
  models).
73
97
  embed_dim: The dimension of the embeddings produced by the model. Currently all models are assumed to produce fixed-size embeddings.
74
98
  revision: The revision number of the model. If None, it is assumed that the metadata (including the loader) is valid for all revisions of the model.
75
- release_date: The date the model's revision was released.
99
+ release_date: The date the model's revision was released. If None, then release date will be added based on 1st commit in hf repository of model.
76
100
  license: The license under which the model is released. Required if open_weights is True.
77
101
  open_weights: Whether the model is open source or proprietary.
78
102
  public_training_code: A link to the publicly available training code. If None, it is assumed that the training code is not publicly available.
@@ -212,9 +236,198 @@ class ModelMeta(BaseModel):
212
236
  raise ValueError("Model name is not set")
213
237
  return self.name.replace("/", "__").replace(" ", "_")
214
238
 
215
- def is_zero_shot_on(
216
- self, tasks: Sequence["AbsTask"] | Sequence[str]
217
- ) -> bool | None:
239
+ @classmethod
240
+ def _from_hub(
241
+ cls,
242
+ model_name: str | None,
243
+ revision: str | None = None,
244
+ compute_metadata: bool = True,
245
+ ) -> Self:
246
+ """Generates a ModelMeta from a HuggingFace model name.
247
+
248
+ Args:
249
+ model_name: The HuggingFace model name.
250
+ revision: Revision of the model
251
+ compute_metadata: Add metadata based on model card
252
+
253
+ Returns:
254
+ The generated ModelMeta.
255
+ """
256
+ from mteb.models import sentence_transformers_loader
257
+
258
+ loader = sentence_transformers_loader
259
+ frameworks: list[FRAMEWORKS] = ["PyTorch"]
260
+ model_license = None
261
+ reference = None
262
+ n_parameters = None
263
+ memory_usage_mb = None
264
+ release_date = None
265
+ embedding_dim = None
266
+ max_tokens = None
267
+
268
+ if model_name and compute_metadata and repo_exists(model_name):
269
+ reference = "https://huggingface.co/" + model_name
270
+ card = ModelCard.load(model_name)
271
+ card_data: ModelCardData = card.data
272
+ try:
273
+ model_config = AutoConfig.from_pretrained(model_name)
274
+ except Exception as e:
275
+ # some models can't load AutoConfig (e.g. `average_word_embeddings_levy_dependency`)
276
+ model_config = None
277
+ logger.warning(f"Can't get configuration for {model_name}. Error: {e}")
278
+
279
+ if (
280
+ card_data.library_name == _SENTENCE_TRANSFORMER_LIB_NAME
281
+ or _SENTENCE_TRANSFORMER_LIB_NAME in card_data.tags
282
+ ):
283
+ frameworks.append(_SENTENCE_TRANSFORMER_LIB_NAME)
284
+ else:
285
+ msg = "Model library not recognized, defaulting to Sentence Transformers loader."
286
+ logger.warning(msg)
287
+ warnings.warn(msg)
288
+
289
+ if revision is None:
290
+ revisions = _get_repo_commits(model_name, "model")
291
+ revision = revisions[0].commit_id if revisions else None
292
+
293
+ release_date = cls.fetch_release_date(model_name)
294
+ model_license = card_data.license
295
+ n_parameters = cls._calculate_num_parameters_from_hub(model_name)
296
+ memory_usage_mb = cls._calculate_memory_usage_mb(model_name, n_parameters)
297
+ if model_config and hasattr(model_config, "hidden_size"):
298
+ embedding_dim = model_config.hidden_size
299
+ if model_config and hasattr(model_config, "max_position_embeddings"):
300
+ max_tokens = model_config.max_position_embeddings
301
+
302
+ return cls(
303
+ loader=loader,
304
+ name=model_name or "no_model_name/available",
305
+ revision=revision or "no_revision_available",
306
+ reference=reference,
307
+ release_date=release_date,
308
+ languages=None,
309
+ license=model_license,
310
+ framework=frameworks,
311
+ training_datasets=None,
312
+ similarity_fn_name=None,
313
+ n_parameters=n_parameters,
314
+ memory_usage_mb=memory_usage_mb,
315
+ max_tokens=max_tokens,
316
+ embed_dim=embedding_dim,
317
+ open_weights=True,
318
+ public_training_code=None,
319
+ public_training_data=None,
320
+ use_instructions=None,
321
+ modalities=[],
322
+ )
323
+
324
+ @classmethod
325
+ def from_sentence_transformer_model(
326
+ cls,
327
+ model: SentenceTransformer,
328
+ revision: str | None = None,
329
+ compute_metadata: bool = True,
330
+ ) -> Self:
331
+ """Generates a ModelMeta from a SentenceTransformer model.
332
+
333
+ Args:
334
+ model: SentenceTransformer model.
335
+ revision: Revision of the model
336
+ compute_metadata: Add metadata based on model card
337
+
338
+ Returns:
339
+ The generated ModelMeta.
340
+ """
341
+ name: str | None = (
342
+ model.model_card_data.model_name
343
+ if model.model_card_data.model_name
344
+ else model.model_card_data.base_model
345
+ )
346
+ meta = cls._from_hub(name, revision, compute_metadata)
347
+ if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
348
+ meta.framework.append("Sentence Transformers")
349
+ meta.revision = model.model_card_data.base_model_revision or meta.revision
350
+ meta.max_tokens = model.max_seq_length
351
+ meta.embed_dim = model.get_sentence_embedding_dimension()
352
+ meta.similarity_fn_name = ScoringFunction.from_str(model.similarity_fn_name)
353
+ meta.modalities = ["text"]
354
+ return meta
355
+
356
+ @classmethod
357
+ def from_hub(
358
+ cls,
359
+ model: str,
360
+ revision: str | None = None,
361
+ compute_metadata: bool = True,
362
+ ) -> Self:
363
+ """Generates a ModelMeta for model from HuggingFace hub.
364
+
365
+ Args:
366
+ model: Name of the model from HuggingFace hub. For example, `intfloat/multilingual-e5-large`
367
+ revision: Revision of the model
368
+ compute_metadata: Add metadata based on model card
369
+
370
+ Returns:
371
+ The generated ModelMeta.
372
+ """
373
+ meta = cls._from_hub(model, revision, compute_metadata)
374
+ if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
375
+ meta.framework.append("Sentence Transformers")
376
+ meta.modalities = ["text"]
377
+
378
+ if model and compute_metadata and repo_exists(model):
379
+ # have max_seq_length field
380
+ sbert_config = _get_json_from_hub(
381
+ model, "sentence_bert_config.json", "model", revision=revision
382
+ )
383
+ if sbert_config:
384
+ meta.max_tokens = (
385
+ sbert_config.get("max_seq_length", None) or meta.max_tokens
386
+ )
387
+ # have model type, similarity function fields
388
+ config_sbert = _get_json_from_hub(
389
+ model, "config_sentence_transformers.json", "model", revision=revision
390
+ )
391
+ if (
392
+ config_sbert is not None
393
+ and config_sbert.get("similarity_fn_name") is not None
394
+ ):
395
+ meta.similarity_fn_name = ScoringFunction.from_str(
396
+ config_sbert.get("similarity_fn_name")
397
+ )
398
+ else:
399
+ meta.similarity_fn_name = ScoringFunction.COSINE
400
+ return meta
401
+
402
+ @classmethod
403
+ def from_cross_encoder(
404
+ cls,
405
+ model: CrossEncoder,
406
+ revision: str | None = None,
407
+ compute_metadata: bool = True,
408
+ ) -> Self:
409
+ """Generates a ModelMeta from a CrossEncoder.
410
+
411
+ Args:
412
+ model: The CrossEncoder model
413
+ revision: Revision of the model
414
+ compute_metadata: Add metadata based on model card
415
+
416
+ Returns:
417
+ The generated ModelMeta
418
+ """
419
+ from mteb.models import CrossEncoderWrapper
420
+
421
+ meta = cls._from_hub(model.model.name_or_path, revision, compute_metadata)
422
+ if _SENTENCE_TRANSFORMER_LIB_NAME not in meta.framework:
423
+ meta.framework.append("Sentence Transformers")
424
+ meta.revision = model.config._commit_hash or meta.revision
425
+ meta.loader = CrossEncoderWrapper
426
+ meta.embed_dim = None
427
+ meta.modalities = ["text"]
428
+ return meta
429
+
430
+ def is_zero_shot_on(self, tasks: Sequence[AbsTask] | Sequence[str]) -> bool | None:
218
431
  """Indicates whether the given model can be considered zero-shot or not on the given tasks.
219
432
 
220
433
  Returns:
@@ -267,7 +480,7 @@ class ModelMeta(BaseModel):
267
480
  return return_dataset
268
481
 
269
482
  def zero_shot_percentage(
270
- self, tasks: Sequence["AbsTask"] | Sequence[str]
483
+ self, tasks: Sequence[AbsTask] | Sequence[str]
271
484
  ) -> int | None:
272
485
  """Indicates how out-of-domain the selected tasks are for the given model.
273
486
 
@@ -290,18 +503,38 @@ class ModelMeta(BaseModel):
290
503
  perc_overlap = 100 * (len(overlap) / len(benchmark_datasets))
291
504
  return int(100 - perc_overlap)
292
505
 
293
- def calculate_memory_usage_mb(self) -> int | None:
294
- """Calculates the memory usage (in FP32) of the model in MB.
506
+ @staticmethod
507
+ def _calculate_num_parameters_from_hub(model_name: str | None = None) -> int | None:
508
+ try:
509
+ safetensors_metadata = get_safetensors_metadata(model_name)
510
+ if len(safetensors_metadata.parameter_count) >= 0:
511
+ return sum(safetensors_metadata.parameter_count.values())
512
+ except (
513
+ NotASafetensorsRepoError,
514
+ SafetensorsParsingError,
515
+ GatedRepoError,
516
+ RepositoryNotFoundError,
517
+ ) as e:
518
+ logger.warning(
519
+ f"Can't calculate number of parameters for {model_name}. Got error {e}"
520
+ )
521
+ return None
522
+
523
+ def calculate_num_parameters_from_hub(self) -> int | None:
524
+ """Calculates the number of parameters in the model.
295
525
 
296
526
  Returns:
297
- The memory usage of the model in MB, or None if it cannot be determined.
527
+ Number of parameters in the model.
298
528
  """
299
- if "API" in self.framework:
300
- return None
529
+ return self._calculate_num_parameters_from_hub(self.name)
301
530
 
531
+ @staticmethod
532
+ def _calculate_memory_usage_mb(
533
+ model_name: str, n_parameters: int | None
534
+ ) -> int | None:
302
535
  MB = 1024**2 # noqa: N806
303
536
  try:
304
- safetensors_metadata = get_safetensors_metadata(self.name) # type: ignore
537
+ safetensors_metadata = get_safetensors_metadata(model_name)
305
538
  if len(safetensors_metadata.parameter_count) >= 0:
306
539
  dtype_size_map = {
307
540
  "F64": 8, # 64-bit float
@@ -320,18 +553,130 @@ class ModelMeta(BaseModel):
320
553
  for dtype, parameters in safetensors_metadata.parameter_count.items()
321
554
  )
322
555
  return round(total_memory_bytes / MB) # Convert to MB
556
+ except (
557
+ NotASafetensorsRepoError,
558
+ SafetensorsParsingError,
559
+ GatedRepoError,
560
+ RepositoryNotFoundError,
561
+ ) as e:
562
+ logger.warning(
563
+ f"Can't calculate memory usage for {model_name}. Got error {e}"
564
+ )
323
565
 
324
- except (NotASafetensorsRepoError, SafetensorsParsingError, GatedRepoError):
325
- pass
326
- if self.n_parameters is None:
566
+ if n_parameters is None:
327
567
  return None
328
568
  # Model memory in bytes. For FP32 each parameter is 4 bytes.
329
- model_memory_bytes = self.n_parameters * 4
569
+ model_memory_bytes = n_parameters * 4
330
570
 
331
571
  # Convert to MB
332
572
  model_memory_mb = model_memory_bytes / MB
333
573
  return round(model_memory_mb)
334
574
 
575
+ def calculate_memory_usage_mb(self) -> int | None:
576
+ """Calculates the memory usage of the model in MB.
577
+
578
+ Returns:
579
+ The memory usage of the model in MB, or None if it cannot be determined.
580
+ """
581
+ if "API" in self.framework or self.name is None:
582
+ return None
583
+
584
+ return self._calculate_memory_usage_mb(self.model_name, self.n_parameters)
585
+
586
+ @staticmethod
587
+ def fetch_release_date(model_name: str) -> StrDate | None:
588
+ """Fetches the release date from HuggingFace Hub based on the first commit.
589
+
590
+ Returns:
591
+ The release date in YYYY-MM-DD format, or None if it cannot be determined.
592
+ """
593
+ commits = _get_repo_commits(repo_id=model_name, repo_type="model")
594
+ if commits:
595
+ initial_commit = commits[-1]
596
+ release_date = initial_commit.created_at.strftime("%Y-%m-%d")
597
+ return release_date
598
+ return None
599
+
600
+ def to_python(self) -> str:
601
+ """Returns a string representation of the model."""
602
+ return _pydantic_instance_to_code(self)
603
+
604
+
605
+ def _pydantic_instance_to_code(
606
+ model: BaseModel,
607
+ indent: int = 4,
608
+ *,
609
+ only_set_fields: bool = False,
610
+ ) -> str:
611
+ """Convert a Pydantic model instance into valid Python constructor code.
612
+
613
+ If only_set_fields=True, only fields explicitly provided at model construction
614
+ time are printed (i.e., excludes fields that came only from defaults).
615
+
616
+ Arguments:
617
+ model: The Pydantic model to convert.
618
+ indent: The indentation to use.
619
+ only_set_fields: If True, only fields explicitly provided at model construction time
620
+ """
621
+ cls_name = model.__class__.__name__
622
+ pad = " " * indent
623
+ lines: list[str] = [f"{cls_name}("]
624
+
625
+ model_fields = list(type(model).model_fields.keys())
626
+
627
+ if only_set_fields:
628
+ field_names = [n for n in model_fields if n in model.model_fields_set]
629
+ else:
630
+ field_names = model_fields
631
+
632
+ for field_name in field_names:
633
+ value = getattr(model, field_name)
634
+ value_code = _value_to_code(value, indent)
635
+ lines.append(f"{pad}{field_name}={value_code},")
636
+
637
+ lines.append(")")
638
+ return "\n".join(lines)
639
+
640
+
641
+ def _value_to_code(value: Any, indent: int) -> str:
642
+ """Convert a Python value into valid Python source code."""
643
+ if isinstance(value, BaseModel):
644
+ return _pydantic_instance_to_code(value, indent, only_set_fields=True)
645
+
646
+ if callable(value):
647
+ if isinstance(value, partial):
648
+ return value.func.__name__
649
+ return value.__name__
650
+
651
+ if isinstance(value, Enum):
652
+ return f"{value.__class__.__name__}.{value.name}"
653
+
654
+ if isinstance(value, str):
655
+ return repr(value)
656
+
657
+ if isinstance(value, list):
658
+ if not value:
659
+ return "[]"
660
+ inner = ", ".join(_value_to_code(v, indent) for v in value)
661
+ return f"[{inner}]"
662
+
663
+ if isinstance(value, set):
664
+ if not value:
665
+ return "set()"
666
+ inner = ", ".join(_value_to_code(v, indent) for v in sorted(value))
667
+ return f"{{{inner}}}"
668
+
669
+ if isinstance(value, dict):
670
+ if not value:
671
+ return "{}"
672
+ inner = ", ".join(
673
+ f"{_value_to_code(k, indent)}: {_value_to_code(v, indent)}"
674
+ for k, v in value.items()
675
+ )
676
+ return f"{{{inner}}}"
677
+
678
+ return repr(value)
679
+
335
680
 
336
681
  def _collect_similar_tasks(dataset: str, visited: set[str]) -> set[str]:
337
682
  """Recursively collect all similar tasks for a given dataset.
@@ -364,3 +709,35 @@ def _collect_similar_tasks(dataset: str, visited: set[str]) -> set[str]:
364
709
  similar.update(_collect_similar_tasks(parent, visited))
365
710
 
366
711
  return similar
712
+
713
+
714
+ def _get_repo_commits(repo_id: str, repo_type: str) -> list[GitCommitInfo] | None:
715
+ try:
716
+ return list_repo_commits(repo_id=repo_id, repo_type=repo_type)
717
+ except (GatedRepoError, RepositoryNotFoundError) as e:
718
+ logger.warning(f"Can't get commits of {repo_id}: {e}")
719
+ return None
720
+
721
+
722
+ def _get_json_from_hub(
723
+ repo_id: str, file_name: str, repo_type: str, revision: str | None = None
724
+ ) -> dict[str, Any] | None:
725
+ path = _get_file_on_hub(repo_id, file_name, repo_type, revision)
726
+ if path is None:
727
+ return None
728
+
729
+ with Path(path).open() as f:
730
+ js = json.load(f)
731
+ return js
732
+
733
+
734
+ def _get_file_on_hub(
735
+ repo_id: str, file_name: str, repo_type: str, revision: str | None = None
736
+ ) -> str | None:
737
+ try:
738
+ return hf_hub_download(
739
+ repo_id=repo_id, filename=file_name, repo_type=repo_type, revision=revision
740
+ )
741
+ except (GatedRepoError, RepositoryNotFoundError, EntryNotFoundError) as e:
742
+ logger.warning(f"Can't get file {file_name} of {repo_id}: {e}")
743
+ return None
@@ -68,11 +68,8 @@ class SentenceTransformerEncoderWrapper(AbsEncoder):
68
68
  self.model = SentenceTransformer(model, revision=revision, **kwargs)
69
69
  else:
70
70
  self.model = model
71
- from mteb.models.get_model_meta import (
72
- _model_meta_from_sentence_transformers,
73
- )
74
71
 
75
- self.mteb_model_meta = _model_meta_from_sentence_transformers(self.model)
72
+ self.mteb_model_meta = ModelMeta.from_sentence_transformer_model(self.model)
76
73
 
77
74
  built_in_prompts = getattr(self.model, "prompts", None)
78
75
  if built_in_prompts and not model_prompts:
@@ -268,14 +265,12 @@ class CrossEncoderWrapper:
268
265
  ) -> None:
269
266
  from sentence_transformers import CrossEncoder
270
267
 
271
- from mteb.models.get_model_meta import _model_meta_from_cross_encoder
272
-
273
268
  if isinstance(model, CrossEncoder):
274
269
  self.model = model
275
270
  elif isinstance(model, str):
276
271
  self.model = CrossEncoder(model, revision=revision, **kwargs)
277
272
 
278
- self.mteb_model_meta = _model_meta_from_cross_encoder(self.model)
273
+ self.mteb_model_meta = ModelMeta.from_cross_encoder(self.model)
279
274
 
280
275
  def predict(
281
276
  self,
@@ -1,5 +1,13 @@
1
1
  from .j_qa_ra_reranking import JQaRAReranking
2
+ from .j_qa_ra_reranking_lite import JQaRARerankingLite
2
3
  from .ja_cwir_reranking import JaCWIRReranking
4
+ from .ja_cwir_reranking_lite import JaCWIRRerankingLite
3
5
  from .m_marco_reranking import VoyageMMarcoReranking
4
6
 
5
- __all__ = ["JQaRAReranking", "JaCWIRReranking", "VoyageMMarcoReranking"]
7
+ __all__ = [
8
+ "JQaRAReranking",
9
+ "JQaRARerankingLite",
10
+ "JaCWIRReranking",
11
+ "JaCWIRRerankingLite",
12
+ "VoyageMMarcoReranking",
13
+ ]
@@ -0,0 +1,49 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class JQaRARerankingLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="JQaRARerankingLite",
8
+ dataset={
9
+ "path": "mteb/JQaRARerankingLite",
10
+ "revision": "d23d3ad479f74824ed126052e810eac47e685558",
11
+ },
12
+ description=(
13
+ "JQaRA (Japanese Question Answering with Retrieval Augmentation) is a reranking dataset "
14
+ "consisting of questions from JAQKET and corpus from Japanese Wikipedia. This is the lightweight "
15
+ "version with a reduced corpus (172,897 documents) constructed using hard negatives from "
16
+ "5 high-performance models."
17
+ ),
18
+ reference="https://huggingface.co/datasets/hotchpotch/JQaRA",
19
+ type="Reranking",
20
+ category="t2t",
21
+ modalities=["text"],
22
+ eval_splits=["test"],
23
+ eval_langs=["jpn-Jpan"],
24
+ main_score="ndcg_at_10",
25
+ date=("2020-01-01", "2025-01-01"),
26
+ domains=["Encyclopaedic", "Non-fiction", "Written"],
27
+ task_subtypes=["Question answering"],
28
+ license="cc-by-sa-4.0",
29
+ annotations_creators="derived",
30
+ dialect=["jpn-Jpan"],
31
+ sample_creation="found",
32
+ adapted_from=["JQaRAReranking"],
33
+ bibtex_citation=r"""
34
+ @misc{jmteb_lite,
35
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
36
+ and Kawahara, Daisuke},
37
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
38
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
39
+ year = {2025},
40
+ }
41
+
42
+ @misc{yuichi-tateno-2024-jqara,
43
+ author = {Yuichi Tateno},
44
+ title = {JQaRA: Japanese Question Answering with Retrieval Augmentation
45
+ - 検索拡張(RAG)評価のための日本語Q&Aデータセット},
46
+ url = {https://huggingface.co/datasets/hotchpotch/JQaRA},
47
+ }
48
+ """,
49
+ )
@@ -0,0 +1,47 @@
1
+ from mteb.abstasks.retrieval import AbsTaskRetrieval
2
+ from mteb.abstasks.task_metadata import TaskMetadata
3
+
4
+
5
+ class JaCWIRRerankingLite(AbsTaskRetrieval):
6
+ metadata = TaskMetadata(
7
+ name="JaCWIRRerankingLite",
8
+ dataset={
9
+ "path": "mteb/JaCWIRRerankingLite",
10
+ "revision": "b7c738193fb9b20c97c2b5d9a8fa3f3d28503dc0",
11
+ },
12
+ description=(
13
+ "JaCWIR (Japanese Casual Web IR) is a dataset consisting of questions and webpage meta descriptions "
14
+ "collected from Hatena Bookmark. This is the lightweight reranking version with a reduced corpus "
15
+ "(188,033 documents) constructed using hard negatives from 5 high-performance models."
16
+ ),
17
+ reference="https://huggingface.co/datasets/hotchpotch/JaCWIR",
18
+ type="Reranking",
19
+ category="t2t",
20
+ modalities=["text"],
21
+ eval_splits=["test"],
22
+ eval_langs=["jpn-Jpan"],
23
+ main_score="ndcg_at_10",
24
+ date=("2020-01-01", "2025-01-01"),
25
+ domains=["Web", "Written"],
26
+ task_subtypes=["Article retrieval"],
27
+ license="not specified",
28
+ annotations_creators="derived",
29
+ dialect=[],
30
+ sample_creation="found",
31
+ adapted_from=["JaCWIRReranking"],
32
+ bibtex_citation=r"""
33
+ @misc{jmteb_lite,
34
+ author = {Li, Shengzhe and Ohagi, Masaya and Ri, Ryokan and Fukuchi, Akihiko and Shibata, Tomohide
35
+ and Kawahara, Daisuke},
36
+ howpublished = {\url{https://huggingface.co/datasets/sbintuitions/JMTEB-lite}},
37
+ title = {{J}{M}{T}{E}{B}-lite: {T}he {L}ightweight {V}ersion of {JMTEB}},
38
+ year = {2025},
39
+ }
40
+
41
+ @misc{yuichi-tateno-2024-jacwir,
42
+ author = {Yuichi Tateno},
43
+ title = {JaCWIR: Japanese Casual Web IR - 日本語情報検索評価のための小規模でカジュアルなWebタイトルと概要のデータセット},
44
+ url = {https://huggingface.co/datasets/hotchpotch/JaCWIR},
45
+ }
46
+ """,
47
+ )
@@ -25,11 +25,14 @@ class FreshStackRetrieval(AbsTaskRetrieval):
25
25
  dialect=[],
26
26
  sample_creation="found",
27
27
  bibtex_citation=r"""
28
- @article{freshstack2023,
29
- author = {FreshStack Authors},
30
- journal = {arXiv preprint arXiv:2301.12345},
31
- title = {FreshStack: A Multi-language Code Generation and Retrieval Benchmark},
32
- year = {2023},
28
+ @misc{thakur2025freshstackbuildingrealisticbenchmarks,
29
+ archiveprefix = {arXiv},
30
+ author = {Nandan Thakur and Jimmy Lin and Sam Havens and Michael Carbin and Omar Khattab and Andrew Drozdov},
31
+ eprint = {2504.13128},
32
+ primaryclass = {cs.IR},
33
+ title = {FreshStack: Building Realistic Benchmarks for Evaluating Retrieval on Technical Documents},
34
+ url = {https://arxiv.org/abs/2504.13128},
35
+ year = {2025},
33
36
  }
34
37
  """,
35
38
  )
@@ -1,8 +1,12 @@
1
1
  from .ja_cwir_retrieval import JaCWIRRetrieval
2
+ from .ja_cwir_retrieval_lite import JaCWIRRetrievalLite
2
3
  from .ja_gov_faqs_retrieval import JaGovFaqsRetrieval
3
4
  from .ja_qu_ad_retrieval import JaQuADRetrieval
4
5
  from .japanese_legal1_retrieval import JapaneseLegal1Retrieval
5
6
  from .jaqket_retrieval import JaqketRetrieval
7
+ from .jaqket_retrieval_lite import JaqketRetrievalLite
8
+ from .miracl_ja_retrieval_lite import MIRACLJaRetrievalLite
9
+ from .mr_tydi_ja_retrieval_lite import MrTyDiJaRetrievalLite
6
10
  from .nlp_journal_abs_article_retrieval import (
7
11
  NLPJournalAbsArticleRetrieval,
8
12
  NLPJournalAbsArticleRetrievalV2,
@@ -22,10 +26,14 @@ from .nlp_journal_title_intro_retrieval import (
22
26
 
23
27
  __all__ = [
24
28
  "JaCWIRRetrieval",
29
+ "JaCWIRRetrievalLite",
25
30
  "JaGovFaqsRetrieval",
26
31
  "JaQuADRetrieval",
27
32
  "JapaneseLegal1Retrieval",
28
33
  "JaqketRetrieval",
34
+ "JaqketRetrievalLite",
35
+ "MIRACLJaRetrievalLite",
36
+ "MrTyDiJaRetrievalLite",
29
37
  "NLPJournalAbsArticleRetrieval",
30
38
  "NLPJournalAbsArticleRetrievalV2",
31
39
  "NLPJournalAbsIntroRetrieval",