mteb 2.4.2__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/benchmark.py +31 -13
- mteb/benchmarks/benchmarks/benchmarks.py +2 -2
- mteb/cache.py +36 -7
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/models/model_implementations/codefuse_models.py +144 -0
- mteb/models/model_implementations/mod_models.py +3 -1
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +5 -3
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
- mteb/results/benchmark_results.py +22 -4
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/retrieval/kor/__init__.py +2 -1
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/METADATA +1 -1
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/RECORD +20 -15
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/WHEEL +0 -0
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/entry_points.txt +0 -0
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.4.2.dist-info → mteb-2.5.0.dist-info}/top_level.txt +0 -0
mteb/benchmarks/benchmark.py
CHANGED
|
@@ -1,22 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
1
3
|
from collections.abc import Iterable, Sequence
|
|
2
4
|
from dataclasses import dataclass, field
|
|
3
5
|
from typing import TYPE_CHECKING, Literal
|
|
4
6
|
|
|
5
7
|
import pandas as pd
|
|
6
8
|
|
|
7
|
-
from mteb.
|
|
8
|
-
_create_per_language_table_from_benchmark_results,
|
|
9
|
-
_create_per_task_table_from_benchmark_results,
|
|
10
|
-
_create_summary_table_from_benchmark_results,
|
|
11
|
-
_create_summary_table_mean_public_private,
|
|
12
|
-
_create_summary_table_mean_subset,
|
|
13
|
-
_create_summary_table_mean_task_type,
|
|
14
|
-
)
|
|
15
|
-
from mteb.results import BenchmarkResults
|
|
9
|
+
from mteb.abstasks.abstask import AbsTask
|
|
16
10
|
from mteb.types import StrURL
|
|
17
11
|
|
|
18
12
|
if TYPE_CHECKING:
|
|
19
|
-
from mteb.
|
|
13
|
+
from mteb.results import BenchmarkResults
|
|
20
14
|
|
|
21
15
|
|
|
22
16
|
@dataclass
|
|
@@ -43,7 +37,7 @@ class Benchmark:
|
|
|
43
37
|
"""
|
|
44
38
|
|
|
45
39
|
name: str
|
|
46
|
-
tasks: Sequence[
|
|
40
|
+
tasks: Sequence[AbsTask]
|
|
47
41
|
description: str | None = None
|
|
48
42
|
reference: StrURL | None = None
|
|
49
43
|
citation: str | None = None
|
|
@@ -53,13 +47,13 @@ class Benchmark:
|
|
|
53
47
|
display_name: str | None = None
|
|
54
48
|
language_view: list[str] | Literal["all"] = field(default_factory=list)
|
|
55
49
|
|
|
56
|
-
def __iter__(self) -> Iterable[
|
|
50
|
+
def __iter__(self) -> Iterable[AbsTask]:
|
|
57
51
|
return iter(self.tasks)
|
|
58
52
|
|
|
59
53
|
def __len__(self) -> int:
|
|
60
54
|
return len(self.tasks)
|
|
61
55
|
|
|
62
|
-
def __getitem__(self, index: int) ->
|
|
56
|
+
def __getitem__(self, index: int) -> AbsTask:
|
|
63
57
|
return self.tasks[index]
|
|
64
58
|
|
|
65
59
|
def _create_summary_table(
|
|
@@ -70,6 +64,10 @@ class Benchmark:
|
|
|
70
64
|
Returns:
|
|
71
65
|
A pandas DataFrame representing the summary results.
|
|
72
66
|
"""
|
|
67
|
+
from mteb.benchmarks._create_table import (
|
|
68
|
+
_create_summary_table_from_benchmark_results,
|
|
69
|
+
)
|
|
70
|
+
|
|
73
71
|
return _create_summary_table_from_benchmark_results(benchmark_results)
|
|
74
72
|
|
|
75
73
|
def _create_per_task_table(
|
|
@@ -80,6 +78,10 @@ class Benchmark:
|
|
|
80
78
|
Returns:
|
|
81
79
|
A pandas DataFrame representing the per-task results.
|
|
82
80
|
"""
|
|
81
|
+
from mteb.benchmarks._create_table import (
|
|
82
|
+
_create_per_task_table_from_benchmark_results,
|
|
83
|
+
)
|
|
84
|
+
|
|
83
85
|
return _create_per_task_table_from_benchmark_results(benchmark_results)
|
|
84
86
|
|
|
85
87
|
def _create_per_language_table(
|
|
@@ -90,6 +92,10 @@ class Benchmark:
|
|
|
90
92
|
Returns:
|
|
91
93
|
A pandas DataFrame representing the per-language results.
|
|
92
94
|
"""
|
|
95
|
+
from mteb.benchmarks._create_table import (
|
|
96
|
+
_create_per_language_table_from_benchmark_results,
|
|
97
|
+
)
|
|
98
|
+
|
|
93
99
|
if self.language_view == "all" or len(self.language_view) > 0:
|
|
94
100
|
return _create_per_language_table_from_benchmark_results(
|
|
95
101
|
benchmark_results, self.language_view
|
|
@@ -111,6 +117,10 @@ class RtebBenchmark(Benchmark):
|
|
|
111
117
|
def _create_summary_table(
|
|
112
118
|
self, benchmark_results: BenchmarkResults
|
|
113
119
|
) -> pd.DataFrame:
|
|
120
|
+
from mteb.benchmarks._create_table import (
|
|
121
|
+
_create_summary_table_mean_public_private,
|
|
122
|
+
)
|
|
123
|
+
|
|
114
124
|
joint_table = _create_summary_table_mean_public_private(benchmark_results)
|
|
115
125
|
# For RTEB: all tasks are Retrieval type, so Retrieval column = Mean (Task)
|
|
116
126
|
joint_table = joint_table.rename(columns={"Retrieval": "Mean (Task)"})
|
|
@@ -123,6 +133,8 @@ class HUMEBenchmark(Benchmark):
|
|
|
123
133
|
def _create_summary_table(
|
|
124
134
|
self, benchmark_results: BenchmarkResults
|
|
125
135
|
) -> pd.DataFrame:
|
|
136
|
+
from mteb.benchmarks._create_table import _create_summary_table_mean_subset
|
|
137
|
+
|
|
126
138
|
return _create_summary_table_mean_subset(benchmark_results)
|
|
127
139
|
|
|
128
140
|
|
|
@@ -132,6 +144,8 @@ class MIEBBenchmark(Benchmark):
|
|
|
132
144
|
def _create_summary_table(
|
|
133
145
|
self, benchmark_results: BenchmarkResults
|
|
134
146
|
) -> pd.DataFrame:
|
|
147
|
+
from mteb.benchmarks._create_table import _create_summary_table_mean_task_type
|
|
148
|
+
|
|
135
149
|
return _create_summary_table_mean_task_type(benchmark_results)
|
|
136
150
|
|
|
137
151
|
|
|
@@ -141,6 +155,10 @@ class VidoreBenchmark(Benchmark):
|
|
|
141
155
|
def _create_summary_table(
|
|
142
156
|
self, benchmark_results: BenchmarkResults
|
|
143
157
|
) -> pd.DataFrame:
|
|
158
|
+
from mteb.benchmarks._create_table import (
|
|
159
|
+
_create_summary_table_mean_public_private,
|
|
160
|
+
)
|
|
161
|
+
|
|
144
162
|
joint_table = _create_summary_table_mean_public_private(benchmark_results)
|
|
145
163
|
# For ViDoRe (V1, V2, V3): all tasks are Document Understanding type, so Document Understanding column = Mean (Task)
|
|
146
164
|
joint_table = joint_table.rename(
|
|
@@ -435,7 +435,7 @@ MTEB_RETRIEVAL_MEDICAL = Benchmark(
|
|
|
435
435
|
],
|
|
436
436
|
),
|
|
437
437
|
description="A curated set of MTEB tasks designed to evaluate systems in the context of medical information retrieval.",
|
|
438
|
-
reference=
|
|
438
|
+
reference=None,
|
|
439
439
|
citation=None,
|
|
440
440
|
)
|
|
441
441
|
|
|
@@ -2589,7 +2589,7 @@ HUME = HUMEBenchmark(
|
|
|
2589
2589
|
],
|
|
2590
2590
|
),
|
|
2591
2591
|
description="The HUME benchmark is designed to evaluate the performance of text embedding models and humans on a comparable set of tasks. This captures areas where models perform better than human annotators and the reverse. In the paper, we go further into the analysis and what conclusions can be drawn.",
|
|
2592
|
-
reference=
|
|
2592
|
+
reference=None,
|
|
2593
2593
|
citation=None,
|
|
2594
2594
|
contacts=["AdnanElAssadi56", "KennethEnevoldsen", "isaac-chung", "Samoed"],
|
|
2595
2595
|
)
|
mteb/cache.py
CHANGED
|
@@ -8,7 +8,9 @@ from collections.abc import Sequence
|
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import cast
|
|
10
10
|
|
|
11
|
+
import mteb
|
|
11
12
|
from mteb.abstasks import AbsTask
|
|
13
|
+
from mteb.benchmarks.benchmark import Benchmark
|
|
12
14
|
from mteb.models import ModelMeta
|
|
13
15
|
from mteb.results import BenchmarkResults, ModelResult, TaskResult
|
|
14
16
|
from mteb.types import ModelName, Revision
|
|
@@ -195,12 +197,14 @@ class ResultCache:
|
|
|
195
197
|
self,
|
|
196
198
|
remote: str = "https://github.com/embeddings-benchmark/results",
|
|
197
199
|
download_latest: bool = True,
|
|
200
|
+
revision: str | None = None,
|
|
198
201
|
) -> Path:
|
|
199
202
|
"""Downloads the latest version of the results repository from GitHub to a local cache directory. Required git to be installed.
|
|
200
203
|
|
|
201
204
|
Args:
|
|
202
205
|
remote: The URL of the results repository on GitHub.
|
|
203
206
|
download_latest: If True it will download the latest version of the repository, otherwise it will only update the existing repository.
|
|
207
|
+
revision: If specified, it will checkout the given revision after cloning or pulling the repository.
|
|
204
208
|
|
|
205
209
|
Returns:
|
|
206
210
|
The path to the local cache directory.
|
|
@@ -228,14 +232,27 @@ class ResultCache:
|
|
|
228
232
|
)
|
|
229
233
|
raise ValueError(msg)
|
|
230
234
|
|
|
231
|
-
if download_latest:
|
|
235
|
+
if revision or download_latest:
|
|
232
236
|
logger.info(
|
|
233
|
-
f"remote repository already exists in {results_directory},
|
|
237
|
+
f"remote repository already exists in {results_directory}, fetching updates"
|
|
238
|
+
)
|
|
239
|
+
subprocess.run(
|
|
240
|
+
["git", "fetch", "--all", "--tags"],
|
|
241
|
+
cwd=results_directory,
|
|
242
|
+
check=True,
|
|
234
243
|
)
|
|
235
|
-
subprocess.run(["git", "pull"], cwd=results_directory)
|
|
236
244
|
else:
|
|
237
245
|
logger.debug(
|
|
238
|
-
f"Results repository already exists in {results_directory}, skipping update,
|
|
246
|
+
f"Results repository already exists in {results_directory}, skipping update, "
|
|
247
|
+
f"set download_latest=True to update it"
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
if revision:
|
|
251
|
+
logger.info(f"Checking out revision '{revision}'")
|
|
252
|
+
subprocess.run(
|
|
253
|
+
["git", "checkout", revision],
|
|
254
|
+
cwd=results_directory,
|
|
255
|
+
check=True,
|
|
239
256
|
)
|
|
240
257
|
return results_directory
|
|
241
258
|
|
|
@@ -243,8 +260,15 @@ class ResultCache:
|
|
|
243
260
|
f"No results repository found in {results_directory}, cloning it from {remote}"
|
|
244
261
|
)
|
|
245
262
|
|
|
263
|
+
clone_cmd = ["git", "clone", "--depth", "1"]
|
|
264
|
+
|
|
265
|
+
if revision:
|
|
266
|
+
logger.info(f"Cloning repository at revision '{revision}'")
|
|
267
|
+
clone_cmd.append(f"--revision={revision}")
|
|
268
|
+
clone_cmd.extend([remote, "remote"])
|
|
269
|
+
|
|
246
270
|
subprocess.run(
|
|
247
|
-
|
|
271
|
+
clone_cmd,
|
|
248
272
|
cwd=self.cache_path,
|
|
249
273
|
check=True,
|
|
250
274
|
)
|
|
@@ -443,7 +467,7 @@ class ResultCache:
|
|
|
443
467
|
def load_results(
|
|
444
468
|
self,
|
|
445
469
|
models: Sequence[str] | Sequence[ModelMeta] | None = None,
|
|
446
|
-
tasks: Sequence[str] | Sequence[AbsTask] | None = None,
|
|
470
|
+
tasks: Sequence[str] | Sequence[AbsTask] | Benchmark | str | None = None,
|
|
447
471
|
require_model_meta: bool = True,
|
|
448
472
|
include_remote: bool = True,
|
|
449
473
|
validate_and_filter: bool = False,
|
|
@@ -453,7 +477,8 @@ class ResultCache:
|
|
|
453
477
|
|
|
454
478
|
Args:
|
|
455
479
|
models: A list of model names to load the results for. If None it will load the results for all models.
|
|
456
|
-
tasks: A list of task names to load the results for. If
|
|
480
|
+
tasks: A list of task names to load the results for. If str is passed, then benchmark will be loaded.
|
|
481
|
+
If None it will load the results for all tasks.
|
|
457
482
|
require_model_meta: If True it will ignore results that do not have a model_meta.json file. If false it attempt to
|
|
458
483
|
extract the model name and revision from the path.
|
|
459
484
|
include_remote: If True, it will include results from the remote repository.
|
|
@@ -475,6 +500,9 @@ class ResultCache:
|
|
|
475
500
|
... require_model_meta=True,
|
|
476
501
|
... )
|
|
477
502
|
"""
|
|
503
|
+
if isinstance(tasks, str):
|
|
504
|
+
tasks = mteb.get_benchmark(tasks)
|
|
505
|
+
|
|
478
506
|
paths = self.get_cache_paths(
|
|
479
507
|
models=models,
|
|
480
508
|
tasks=tasks,
|
|
@@ -524,6 +552,7 @@ class ResultCache:
|
|
|
524
552
|
|
|
525
553
|
benchmark_results = BenchmarkResults(
|
|
526
554
|
model_results=models_results,
|
|
555
|
+
benchmark=tasks if isinstance(tasks, Benchmark) else None,
|
|
527
556
|
)
|
|
528
557
|
|
|
529
558
|
return benchmark_results
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 193,
|
|
4
|
+
"number_texts_intersect_with_train": 0,
|
|
5
|
+
"text_statistics": {
|
|
6
|
+
"total_text_length": 1543015,
|
|
7
|
+
"min_text_length": 492,
|
|
8
|
+
"average_text_length": 7994.896373056995,
|
|
9
|
+
"max_text_length": 49510,
|
|
10
|
+
"unique_texts": 193
|
|
11
|
+
},
|
|
12
|
+
"image_statistics": null,
|
|
13
|
+
"label_statistics": {
|
|
14
|
+
"min_labels_per_text": 1,
|
|
15
|
+
"average_label_per_text": 1.0,
|
|
16
|
+
"max_labels_per_text": 1,
|
|
17
|
+
"unique_labels": 2,
|
|
18
|
+
"labels": {
|
|
19
|
+
"1": {
|
|
20
|
+
"count": 177
|
|
21
|
+
},
|
|
22
|
+
"0": {
|
|
23
|
+
"count": 16
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
},
|
|
28
|
+
"train": {
|
|
29
|
+
"num_samples": 870,
|
|
30
|
+
"number_texts_intersect_with_train": null,
|
|
31
|
+
"text_statistics": {
|
|
32
|
+
"total_text_length": 6968132,
|
|
33
|
+
"min_text_length": 259,
|
|
34
|
+
"average_text_length": 8009.347126436782,
|
|
35
|
+
"max_text_length": 74490,
|
|
36
|
+
"unique_texts": 870
|
|
37
|
+
},
|
|
38
|
+
"image_statistics": null,
|
|
39
|
+
"label_statistics": {
|
|
40
|
+
"min_labels_per_text": 1,
|
|
41
|
+
"average_label_per_text": 1.0,
|
|
42
|
+
"max_labels_per_text": 1,
|
|
43
|
+
"unique_labels": 2,
|
|
44
|
+
"labels": {
|
|
45
|
+
"1": {
|
|
46
|
+
"count": 755
|
|
47
|
+
},
|
|
48
|
+
"0": {
|
|
49
|
+
"count": 115
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"test": {
|
|
3
|
+
"num_samples": 6734,
|
|
4
|
+
"number_of_characters": 718835,
|
|
5
|
+
"documents_text_statistics": {
|
|
6
|
+
"total_text_length": 523388,
|
|
7
|
+
"min_text_length": 352,
|
|
8
|
+
"average_text_length": 545.1958333333333,
|
|
9
|
+
"max_text_length": 2952,
|
|
10
|
+
"unique_texts": 960
|
|
11
|
+
},
|
|
12
|
+
"documents_image_statistics": null,
|
|
13
|
+
"queries_text_statistics": {
|
|
14
|
+
"total_text_length": 195447,
|
|
15
|
+
"min_text_length": 5,
|
|
16
|
+
"average_text_length": 33.84949774852788,
|
|
17
|
+
"max_text_length": 110,
|
|
18
|
+
"unique_texts": 5764
|
|
19
|
+
},
|
|
20
|
+
"queries_image_statistics": null,
|
|
21
|
+
"relevant_docs_statistics": {
|
|
22
|
+
"num_relevant_docs": 5774,
|
|
23
|
+
"min_relevant_docs_per_query": 1,
|
|
24
|
+
"average_relevant_docs_per_query": 1.0,
|
|
25
|
+
"max_relevant_docs_per_query": 1,
|
|
26
|
+
"unique_relevant_docs": 960
|
|
27
|
+
},
|
|
28
|
+
"top_ranked_statistics": null
|
|
29
|
+
}
|
|
30
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from mteb.models import ModelMeta
|
|
2
2
|
from mteb.models.instruct_wrapper import InstructSentenceTransformerModel
|
|
3
|
+
from mteb.models.model_meta import ScoringFunction
|
|
3
4
|
from mteb.types import PromptType
|
|
4
5
|
|
|
5
6
|
F2LLM_CITATION = """@article{2025F2LLM,
|
|
@@ -74,6 +75,22 @@ training_datasets = {
|
|
|
74
75
|
"TwentyNewsgroupsClustering",
|
|
75
76
|
}
|
|
76
77
|
|
|
78
|
+
c2llm_training_datasets = {
|
|
79
|
+
"CodeSearchNet",
|
|
80
|
+
"CodeSearchNetRetrieval",
|
|
81
|
+
"CodeSearchNetCCRetrieval",
|
|
82
|
+
"CodeEditSearchRetrieval",
|
|
83
|
+
"CodeFeedbackMT",
|
|
84
|
+
"CodeFeedbackST",
|
|
85
|
+
"CodeTransOceanContest",
|
|
86
|
+
"CodeTransOceanDL",
|
|
87
|
+
"COIRCodeSearchNetRetrieval",
|
|
88
|
+
"CosQA",
|
|
89
|
+
"StackOverflowQA",
|
|
90
|
+
"SyntheticText2SQL",
|
|
91
|
+
"AdvTrain",
|
|
92
|
+
}
|
|
93
|
+
|
|
77
94
|
prompts_dict = {
|
|
78
95
|
"AmazonCounterfactualClassification": "Classify a given Amazon customer review text as either counterfactual or not counterfactual.",
|
|
79
96
|
"Banking77Classification": "Given an online banking query, find the corresponding intents.",
|
|
@@ -119,6 +136,77 @@ prompts_dict = {
|
|
|
119
136
|
}
|
|
120
137
|
|
|
121
138
|
|
|
139
|
+
c2llm_prompts_dict = {
|
|
140
|
+
"CodeEditSearchRetrieval": {
|
|
141
|
+
"query": "Retrieve the diff code that relevant the following query:\n",
|
|
142
|
+
"document": "Retrieved Answer:",
|
|
143
|
+
},
|
|
144
|
+
"CodeSearchNetRetrieval": {
|
|
145
|
+
"query": "Retrieve the code that solves the following query:\n",
|
|
146
|
+
"document": "Retrieved Answer:",
|
|
147
|
+
},
|
|
148
|
+
"AppsRetrieval": {
|
|
149
|
+
"query": "Given a problem description from a programming contest, retrieve code examples that can assist in solving it.\n",
|
|
150
|
+
"document": "Retrieved Answer:",
|
|
151
|
+
},
|
|
152
|
+
"CodeFeedbackMT": {
|
|
153
|
+
"query": "Given a multi-turn conversation history that includes both text and code, retrieve relevant multi-modal answers composed of text and code that address the ongoing discussion.\n",
|
|
154
|
+
"document": "Retrieved Answer:",
|
|
155
|
+
},
|
|
156
|
+
"CodeFeedbackST": {
|
|
157
|
+
"query": "Given a single-turn question composed of text and code, retrieve suitable answers that also mix text and code to provide helpful feedback.\n",
|
|
158
|
+
"document": "Retrieved Answer:",
|
|
159
|
+
},
|
|
160
|
+
"CodeSearchNetCCRetrieval": {
|
|
161
|
+
"query": "Given an initial code segment, retrieve the subsequent segment that continues the code.\n",
|
|
162
|
+
"document": "Retrieved Answer:",
|
|
163
|
+
},
|
|
164
|
+
"CodeTransOceanContest": {
|
|
165
|
+
"query": "Given a Python code snippet, retrieve its semantically equivalent version written in C++.\n",
|
|
166
|
+
"document": "Retrieved Answer:",
|
|
167
|
+
},
|
|
168
|
+
"CodeTransOceanDL": {
|
|
169
|
+
"query": "Given a Python code snippet, retrieve its semantically equivalent version written in C++.\n",
|
|
170
|
+
"document": "Retrieved Answer:",
|
|
171
|
+
},
|
|
172
|
+
"COIRCodeSearchNetRetrieval": {
|
|
173
|
+
"query": "Given a code snippet, retrieve its corresponding document string that summarizes its functionality.\n",
|
|
174
|
+
"document": "Retrieved Answer:",
|
|
175
|
+
},
|
|
176
|
+
"CosQA": {
|
|
177
|
+
"query": "Given a query from a web search, retrieve code that is helpful in addressing the query.\n",
|
|
178
|
+
"document": "Retrieved Answer:",
|
|
179
|
+
},
|
|
180
|
+
"StackOverflowQA": {
|
|
181
|
+
"query": "Given a question combining text and code, retrieve relevant answers that also contain both text and code snippets and can address the question.\n",
|
|
182
|
+
"document": "Retrieved Answer:",
|
|
183
|
+
},
|
|
184
|
+
"SyntheticText2SQL": {
|
|
185
|
+
"query": "Given a natural language question, retrieve SQL queries that serve as appropriate responses.\n",
|
|
186
|
+
"document": "Retrieved Answer:",
|
|
187
|
+
},
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
c2llm_languages = [
|
|
191
|
+
"eng-Latn",
|
|
192
|
+
"zho-Hans",
|
|
193
|
+
"python-Code",
|
|
194
|
+
"javascript-Code",
|
|
195
|
+
"go-Code",
|
|
196
|
+
"ruby-Code",
|
|
197
|
+
"java-Code",
|
|
198
|
+
"php-Code",
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
c2llm_loader_kwargs = dict(
|
|
202
|
+
trust_remote_code=True,
|
|
203
|
+
prompts_dict=c2llm_prompts_dict,
|
|
204
|
+
apply_instruction_to_passages=True,
|
|
205
|
+
max_seq_length=2048,
|
|
206
|
+
padding_side="left",
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
|
|
122
210
|
def instruction_template(
|
|
123
211
|
instruction: str, prompt_type: PromptType | None = None
|
|
124
212
|
) -> str:
|
|
@@ -218,3 +306,59 @@ F2LLM_4B = ModelMeta(
|
|
|
218
306
|
training_datasets=training_datasets,
|
|
219
307
|
citation=F2LLM_CITATION,
|
|
220
308
|
)
|
|
309
|
+
|
|
310
|
+
C2LLM_0B5 = ModelMeta(
|
|
311
|
+
loader=InstructSentenceTransformerModel,
|
|
312
|
+
loader_kwargs=c2llm_loader_kwargs,
|
|
313
|
+
name="codefuse-ai/C2LLM-0.5B",
|
|
314
|
+
revision="f08c18be03de42c6e388948a1804d4b271a953a2",
|
|
315
|
+
release_date="2025-12-22",
|
|
316
|
+
languages=c2llm_languages,
|
|
317
|
+
n_parameters=497252096,
|
|
318
|
+
memory_usage_mb=948.0,
|
|
319
|
+
max_tokens=32768,
|
|
320
|
+
embed_dim=896,
|
|
321
|
+
license="apache-2.0",
|
|
322
|
+
open_weights=True,
|
|
323
|
+
public_training_code=None,
|
|
324
|
+
public_training_data=None,
|
|
325
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
326
|
+
reference="https://huggingface.co/codefuse-ai/C2LLM-0.5B",
|
|
327
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
328
|
+
use_instructions=True,
|
|
329
|
+
training_datasets=c2llm_training_datasets,
|
|
330
|
+
adapted_from=None,
|
|
331
|
+
superseded_by=None,
|
|
332
|
+
modalities=["text"],
|
|
333
|
+
is_cross_encoder=None,
|
|
334
|
+
citation=None,
|
|
335
|
+
contacts=None,
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
C2LLM_7B = ModelMeta(
|
|
339
|
+
loader=InstructSentenceTransformerModel,
|
|
340
|
+
loader_kwargs=c2llm_loader_kwargs,
|
|
341
|
+
name="codefuse-ai/C2LLM-7B",
|
|
342
|
+
revision="c1dc16d6d64eb962c783bfb36a6d9c2f24a86dca",
|
|
343
|
+
release_date="2025-12-22",
|
|
344
|
+
languages=c2llm_languages,
|
|
345
|
+
n_parameters=7667028992,
|
|
346
|
+
memory_usage_mb=14624.0,
|
|
347
|
+
max_tokens=32768,
|
|
348
|
+
embed_dim=3584,
|
|
349
|
+
license="apache-2.0",
|
|
350
|
+
open_weights=True,
|
|
351
|
+
public_training_code=None,
|
|
352
|
+
public_training_data=None,
|
|
353
|
+
framework=["PyTorch", "Sentence Transformers"],
|
|
354
|
+
reference="https://huggingface.co/codefuse-ai/C2LLM-7B",
|
|
355
|
+
similarity_fn_name=ScoringFunction.COSINE,
|
|
356
|
+
use_instructions=True,
|
|
357
|
+
training_datasets=c2llm_training_datasets,
|
|
358
|
+
adapted_from=None,
|
|
359
|
+
superseded_by=None,
|
|
360
|
+
modalities=["text"],
|
|
361
|
+
is_cross_encoder=None,
|
|
362
|
+
citation=None,
|
|
363
|
+
contacts=None,
|
|
364
|
+
)
|
|
@@ -137,7 +137,7 @@ _PREDEFINED_PROMPTS = {
|
|
|
137
137
|
# SQL domain
|
|
138
138
|
"WikiSQLRetrieval": "Given a natural language query, retrieve relevant SQL examples",
|
|
139
139
|
# Multilingual
|
|
140
|
-
"MIRACLRetrievalHardNegatives": "Given a
|
|
140
|
+
"MIRACLRetrievalHardNegatives": "Given a question, retrieve Wikipedia passages that answer the question",
|
|
141
141
|
# ========== Private/Closed Datasets ==========
|
|
142
142
|
# Code domain (Private)
|
|
143
143
|
"Code1Retrieval": "Given a code problem description, retrieve relevant code examples",
|
|
@@ -166,6 +166,8 @@ MoD_Embedding = ModelMeta(
|
|
|
166
166
|
instruction_template=instruction_template,
|
|
167
167
|
apply_instruction_to_passages=False,
|
|
168
168
|
prompts_dict=_PREDEFINED_PROMPTS,
|
|
169
|
+
max_seq_length=18480,
|
|
170
|
+
model_kwargs={"torch_dtype": "bfloat16"},
|
|
169
171
|
),
|
|
170
172
|
name="bflhc/MoD-Embedding",
|
|
171
173
|
languages=multilingual_langs,
|
|
@@ -65,14 +65,16 @@ class LlamaNemoretrieverColembed(AbsEncoder):
|
|
|
65
65
|
iterator = DataLoader(images, batch_size=batch_size)
|
|
66
66
|
|
|
67
67
|
for batch in iterator:
|
|
68
|
-
for
|
|
68
|
+
for image in batch["image"]:
|
|
69
69
|
pil_img = (
|
|
70
|
-
|
|
70
|
+
image
|
|
71
|
+
if isinstance(image, Image.Image)
|
|
72
|
+
else F.to_pil_image(image.to("cpu"))
|
|
71
73
|
)
|
|
72
74
|
all_images.append(pil_img)
|
|
73
75
|
|
|
74
76
|
batch_size = 1
|
|
75
|
-
return self.model.
|
|
77
|
+
return self.model.forward_images(all_images, batch_size=batch_size)
|
|
76
78
|
|
|
77
79
|
def calculate_probs(self, text_embeddings, image_embeddings):
|
|
78
80
|
scores = self.similarity(text_embeddings, image_embeddings)
|