mteb 2.4.1__py3-none-any.whl → 2.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mteb/benchmarks/benchmark.py +31 -13
- mteb/benchmarks/benchmarks/benchmarks.py +2 -2
- mteb/cache.py +36 -7
- mteb/descriptive_stats/Classification/TurkishConstitutionalCourtViolation.json +54 -0
- mteb/descriptive_stats/Retrieval/SQuADKorV1Retrieval.json +30 -0
- mteb/models/model_implementations/andersborges.py +12 -0
- mteb/models/model_implementations/bge_models.py +43 -0
- mteb/models/model_implementations/codefuse_models.py +144 -0
- mteb/models/model_implementations/dino_models.py +152 -0
- mteb/models/model_implementations/emillykkejensen_models.py +18 -0
- mteb/models/model_implementations/euler_models.py +6 -0
- mteb/models/model_implementations/fa_models.py +50 -0
- mteb/models/model_implementations/facebookai.py +44 -0
- mteb/models/model_implementations/gte_models.py +69 -0
- mteb/models/model_implementations/kalm_models.py +38 -0
- mteb/models/model_implementations/kblab.py +6 -0
- mteb/models/model_implementations/kowshik24_models.py +9 -0
- mteb/models/model_implementations/misc_models.py +293 -0
- mteb/models/model_implementations/mod_models.py +10 -23
- mteb/models/model_implementations/mxbai_models.py +6 -0
- mteb/models/model_implementations/nomic_models.py +8 -0
- mteb/models/model_implementations/nvidia_llama_nemoretriever_colemb.py +5 -3
- mteb/models/model_implementations/pylate_models.py +33 -0
- mteb/models/model_implementations/ru_sentence_models.py +22 -0
- mteb/models/model_implementations/seed_1_6_embedding_models_1215.py +658 -0
- mteb/models/model_implementations/sentence_transformers_models.py +39 -0
- mteb/models/model_implementations/spartan8806_atles_champion.py +7 -0
- mteb/models/model_implementations/ua_sentence_models.py +9 -0
- mteb/models/model_implementations/vi_vn_models.py +33 -0
- mteb/results/benchmark_results.py +22 -4
- mteb/tasks/classification/tur/__init__.py +4 -0
- mteb/tasks/classification/tur/turkish_constitutional_court.py +41 -0
- mteb/tasks/retrieval/kor/__init__.py +2 -1
- mteb/tasks/retrieval/kor/squad_kor_v1_retrieval.py +47 -0
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/METADATA +1 -1
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/RECORD +40 -35
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/WHEEL +0 -0
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/entry_points.txt +0 -0
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/licenses/LICENSE +0 -0
- {mteb-2.4.1.dist-info → mteb-2.5.0.dist-info}/top_level.txt +0 -0
|
@@ -123,6 +123,14 @@ dinov2_small = ModelMeta(
|
|
|
123
123
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
124
124
|
use_instructions=False,
|
|
125
125
|
training_datasets=dinov2_training_datasets,
|
|
126
|
+
citation="""@misc{oquab2023dinov2,
|
|
127
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
128
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
129
|
+
year={2023},
|
|
130
|
+
eprint={2304.07193},
|
|
131
|
+
archivePrefix={arXiv},
|
|
132
|
+
primaryClass={cs.CV}
|
|
133
|
+
}""",
|
|
126
134
|
)
|
|
127
135
|
|
|
128
136
|
dinov2_base = ModelMeta(
|
|
@@ -145,6 +153,14 @@ dinov2_base = ModelMeta(
|
|
|
145
153
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
146
154
|
use_instructions=False,
|
|
147
155
|
training_datasets=dinov2_training_datasets,
|
|
156
|
+
citation="""@misc{oquab2023dinov2,
|
|
157
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
158
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
159
|
+
year={2023},
|
|
160
|
+
eprint={2304.07193},
|
|
161
|
+
archivePrefix={arXiv},
|
|
162
|
+
primaryClass={cs.CV}
|
|
163
|
+
}""",
|
|
148
164
|
)
|
|
149
165
|
|
|
150
166
|
dinov2_large = ModelMeta(
|
|
@@ -167,6 +183,14 @@ dinov2_large = ModelMeta(
|
|
|
167
183
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
168
184
|
use_instructions=False,
|
|
169
185
|
training_datasets=dinov2_training_datasets,
|
|
186
|
+
citation="""@misc{oquab2023dinov2,
|
|
187
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
188
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
189
|
+
year={2023},
|
|
190
|
+
eprint={2304.07193},
|
|
191
|
+
archivePrefix={arXiv},
|
|
192
|
+
primaryClass={cs.CV}
|
|
193
|
+
}""",
|
|
170
194
|
)
|
|
171
195
|
|
|
172
196
|
dinov2_giant = ModelMeta(
|
|
@@ -189,6 +213,14 @@ dinov2_giant = ModelMeta(
|
|
|
189
213
|
similarity_fn_name=ScoringFunction.COSINE,
|
|
190
214
|
use_instructions=False,
|
|
191
215
|
training_datasets=dinov2_training_datasets,
|
|
216
|
+
citation="""@misc{oquab2023dinov2,
|
|
217
|
+
title={DINOv2: Learning Robust Visual Features without Supervision},
|
|
218
|
+
author={Maxime Oquab and Timothée Darcet and Théo Moutakanni and Huy Vo and Marc Szafraniec and Vasil Khalidov and Pierre Fernandez and Daniel Haziza and Francisco Massa and Alaaeldin El-Nouby and Mahmoud Assran and Nicolas Ballas and Wojciech Galuba and Russell Howes and Po-Yao Huang and Shang-Wen Li and Ishan Misra and Michael Rabbat and Vasu Sharma and Gabriel Synnaeve and Hu Xu and Hervé Jegou and Julien Mairal and Patrick Labatut and Armand Joulin and Piotr Bojanowski},
|
|
219
|
+
year={2023},
|
|
220
|
+
eprint={2304.07193},
|
|
221
|
+
archivePrefix={arXiv},
|
|
222
|
+
primaryClass={cs.CV}
|
|
223
|
+
}""",
|
|
192
224
|
)
|
|
193
225
|
|
|
194
226
|
webssl_dino_training_datasets = set(
|
|
@@ -215,6 +247,14 @@ webssl_dino300m_full2b = ModelMeta(
|
|
|
215
247
|
similarity_fn_name=None,
|
|
216
248
|
use_instructions=False,
|
|
217
249
|
training_datasets=webssl_dino_training_datasets,
|
|
250
|
+
citation="""@article{fan2025scaling,
|
|
251
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
252
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
253
|
+
year={2025},
|
|
254
|
+
eprint={2504.01017},
|
|
255
|
+
archivePrefix={arXiv},
|
|
256
|
+
primaryClass={cs.CV}
|
|
257
|
+
}""",
|
|
218
258
|
)
|
|
219
259
|
|
|
220
260
|
webssl_dino1b_full2b = ModelMeta(
|
|
@@ -237,6 +277,14 @@ webssl_dino1b_full2b = ModelMeta(
|
|
|
237
277
|
similarity_fn_name=None,
|
|
238
278
|
use_instructions=False,
|
|
239
279
|
training_datasets=webssl_dino_training_datasets,
|
|
280
|
+
citation="""@article{fan2025scaling,
|
|
281
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
282
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
283
|
+
year={2025},
|
|
284
|
+
eprint={2504.01017},
|
|
285
|
+
archivePrefix={arXiv},
|
|
286
|
+
primaryClass={cs.CV}
|
|
287
|
+
}""",
|
|
240
288
|
)
|
|
241
289
|
|
|
242
290
|
webssl_dino2b_full2b = ModelMeta(
|
|
@@ -259,6 +307,14 @@ webssl_dino2b_full2b = ModelMeta(
|
|
|
259
307
|
similarity_fn_name=None,
|
|
260
308
|
use_instructions=False,
|
|
261
309
|
training_datasets=webssl_dino_training_datasets,
|
|
310
|
+
citation="""@article{fan2025scaling,
|
|
311
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
312
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
313
|
+
year={2025},
|
|
314
|
+
eprint={2504.01017},
|
|
315
|
+
archivePrefix={arXiv},
|
|
316
|
+
primaryClass={cs.CV}
|
|
317
|
+
}""",
|
|
262
318
|
)
|
|
263
319
|
|
|
264
320
|
webssl_dino3b_full2b = ModelMeta(
|
|
@@ -281,6 +337,14 @@ webssl_dino3b_full2b = ModelMeta(
|
|
|
281
337
|
similarity_fn_name=None,
|
|
282
338
|
use_instructions=False,
|
|
283
339
|
training_datasets=webssl_dino_training_datasets,
|
|
340
|
+
citation="""@article{fan2025scaling,
|
|
341
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
342
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
343
|
+
year={2025},
|
|
344
|
+
eprint={2504.01017},
|
|
345
|
+
archivePrefix={arXiv},
|
|
346
|
+
primaryClass={cs.CV}
|
|
347
|
+
}""",
|
|
284
348
|
)
|
|
285
349
|
|
|
286
350
|
webssl_dino5b_full2b = ModelMeta(
|
|
@@ -303,6 +367,14 @@ webssl_dino5b_full2b = ModelMeta(
|
|
|
303
367
|
similarity_fn_name=None,
|
|
304
368
|
use_instructions=False,
|
|
305
369
|
training_datasets=webssl_dino_training_datasets,
|
|
370
|
+
citation="""@article{fan2025scaling,
|
|
371
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
372
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
373
|
+
year={2025},
|
|
374
|
+
eprint={2504.01017},
|
|
375
|
+
archivePrefix={arXiv},
|
|
376
|
+
primaryClass={cs.CV}
|
|
377
|
+
}""",
|
|
306
378
|
)
|
|
307
379
|
|
|
308
380
|
webssl_dino7b_full8b_224 = ModelMeta(
|
|
@@ -325,6 +397,14 @@ webssl_dino7b_full8b_224 = ModelMeta(
|
|
|
325
397
|
similarity_fn_name=None,
|
|
326
398
|
use_instructions=False,
|
|
327
399
|
training_datasets=webssl_dino_training_datasets,
|
|
400
|
+
citation="""@article{fan2025scaling,
|
|
401
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
402
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
403
|
+
year={2025},
|
|
404
|
+
eprint={2504.01017},
|
|
405
|
+
archivePrefix={arXiv},
|
|
406
|
+
primaryClass={cs.CV}
|
|
407
|
+
}""",
|
|
328
408
|
)
|
|
329
409
|
|
|
330
410
|
webssl_dino7b_full8b_378 = ModelMeta(
|
|
@@ -347,6 +427,14 @@ webssl_dino7b_full8b_378 = ModelMeta(
|
|
|
347
427
|
similarity_fn_name=None,
|
|
348
428
|
use_instructions=False,
|
|
349
429
|
training_datasets=webssl_dino_training_datasets,
|
|
430
|
+
citation="""@article{fan2025scaling,
|
|
431
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
432
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
433
|
+
year={2025},
|
|
434
|
+
eprint={2504.01017},
|
|
435
|
+
archivePrefix={arXiv},
|
|
436
|
+
primaryClass={cs.CV}
|
|
437
|
+
}""",
|
|
350
438
|
)
|
|
351
439
|
|
|
352
440
|
webssl_dino7b_full8b_518 = ModelMeta(
|
|
@@ -369,6 +457,14 @@ webssl_dino7b_full8b_518 = ModelMeta(
|
|
|
369
457
|
similarity_fn_name=None,
|
|
370
458
|
use_instructions=False,
|
|
371
459
|
training_datasets=webssl_dino_training_datasets,
|
|
460
|
+
citation="""@article{fan2025scaling,
|
|
461
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
462
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
463
|
+
year={2025},
|
|
464
|
+
eprint={2504.01017},
|
|
465
|
+
archivePrefix={arXiv},
|
|
466
|
+
primaryClass={cs.CV}
|
|
467
|
+
}""",
|
|
372
468
|
)
|
|
373
469
|
|
|
374
470
|
|
|
@@ -392,6 +488,14 @@ webssl_dino2b_light2b = ModelMeta(
|
|
|
392
488
|
similarity_fn_name=None,
|
|
393
489
|
use_instructions=False,
|
|
394
490
|
training_datasets=webssl_dino_training_datasets,
|
|
491
|
+
citation="""@article{fan2025scaling,
|
|
492
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
493
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
494
|
+
year={2025},
|
|
495
|
+
eprint={2504.01017},
|
|
496
|
+
archivePrefix={arXiv},
|
|
497
|
+
primaryClass={cs.CV}
|
|
498
|
+
}""",
|
|
395
499
|
)
|
|
396
500
|
|
|
397
501
|
webssl_dino2b_heavy2b = ModelMeta(
|
|
@@ -414,6 +518,14 @@ webssl_dino2b_heavy2b = ModelMeta(
|
|
|
414
518
|
similarity_fn_name=None,
|
|
415
519
|
use_instructions=False,
|
|
416
520
|
training_datasets=webssl_dino_training_datasets,
|
|
521
|
+
citation="""@article{fan2025scaling,
|
|
522
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
523
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
524
|
+
year={2025},
|
|
525
|
+
eprint={2504.01017},
|
|
526
|
+
archivePrefix={arXiv},
|
|
527
|
+
primaryClass={cs.CV}
|
|
528
|
+
}""",
|
|
417
529
|
)
|
|
418
530
|
|
|
419
531
|
webssl_dino3b_light2b = ModelMeta(
|
|
@@ -436,6 +548,14 @@ webssl_dino3b_light2b = ModelMeta(
|
|
|
436
548
|
similarity_fn_name=None,
|
|
437
549
|
use_instructions=False,
|
|
438
550
|
training_datasets=webssl_dino_training_datasets,
|
|
551
|
+
citation="""@article{fan2025scaling,
|
|
552
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
553
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
554
|
+
year={2025},
|
|
555
|
+
eprint={2504.01017},
|
|
556
|
+
archivePrefix={arXiv},
|
|
557
|
+
primaryClass={cs.CV}
|
|
558
|
+
}""",
|
|
439
559
|
)
|
|
440
560
|
|
|
441
561
|
webssl_dino3b_heavy2b = ModelMeta(
|
|
@@ -458,6 +578,14 @@ webssl_dino3b_heavy2b = ModelMeta(
|
|
|
458
578
|
similarity_fn_name=None,
|
|
459
579
|
use_instructions=False,
|
|
460
580
|
training_datasets=webssl_dino_training_datasets,
|
|
581
|
+
citation="""@article{fan2025scaling,
|
|
582
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
583
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
584
|
+
year={2025},
|
|
585
|
+
eprint={2504.01017},
|
|
586
|
+
archivePrefix={arXiv},
|
|
587
|
+
primaryClass={cs.CV}
|
|
588
|
+
}""",
|
|
461
589
|
)
|
|
462
590
|
|
|
463
591
|
webssl_mae300m_full2b = ModelMeta(
|
|
@@ -480,6 +608,14 @@ webssl_mae300m_full2b = ModelMeta(
|
|
|
480
608
|
similarity_fn_name=None,
|
|
481
609
|
use_instructions=False,
|
|
482
610
|
training_datasets=webssl_dino_training_datasets,
|
|
611
|
+
citation="""@article{fan2025scaling,
|
|
612
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
613
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
614
|
+
year={2025},
|
|
615
|
+
eprint={2504.01017},
|
|
616
|
+
archivePrefix={arXiv},
|
|
617
|
+
primaryClass={cs.CV}
|
|
618
|
+
}""",
|
|
483
619
|
)
|
|
484
620
|
|
|
485
621
|
webssl_mae700m_full2b = ModelMeta(
|
|
@@ -502,6 +638,14 @@ webssl_mae700m_full2b = ModelMeta(
|
|
|
502
638
|
similarity_fn_name=None,
|
|
503
639
|
use_instructions=False,
|
|
504
640
|
training_datasets=webssl_dino_training_datasets,
|
|
641
|
+
citation="""@article{fan2025scaling,
|
|
642
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
643
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
644
|
+
year={2025},
|
|
645
|
+
eprint={2504.01017},
|
|
646
|
+
archivePrefix={arXiv},
|
|
647
|
+
primaryClass={cs.CV}
|
|
648
|
+
}""",
|
|
505
649
|
)
|
|
506
650
|
|
|
507
651
|
webssl_mae1b_full2b = ModelMeta(
|
|
@@ -524,4 +668,12 @@ webssl_mae1b_full2b = ModelMeta(
|
|
|
524
668
|
similarity_fn_name=None,
|
|
525
669
|
use_instructions=False,
|
|
526
670
|
training_datasets=webssl_dino_training_datasets,
|
|
671
|
+
citation="""@article{fan2025scaling,
|
|
672
|
+
title={Scaling Language-Free Visual Representation Learning},
|
|
673
|
+
author={David Fan and Shengbang Tong and Jiachen Zhu and Koustuv Sinha and Zhuang Liu and Xinlei Chen and Michael Rabbat and Nicolas Ballas and Yann LeCun and Amir Bar and Saining Xie},
|
|
674
|
+
year={2025},
|
|
675
|
+
eprint={2504.01017},
|
|
676
|
+
archivePrefix={arXiv},
|
|
677
|
+
primaryClass={cs.CV}
|
|
678
|
+
}""",
|
|
527
679
|
)
|
|
@@ -21,6 +21,15 @@ embedding_gemma_300m_scandi = ModelMeta(
|
|
|
21
21
|
similarity_fn_name="cosine", # type: ignore[arg-type]
|
|
22
22
|
adapted_from="google/embeddinggemma-300m",
|
|
23
23
|
memory_usage_mb=578,
|
|
24
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
25
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
26
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
27
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
28
|
+
month = "11",
|
|
29
|
+
year = "2019",
|
|
30
|
+
publisher = "Association for Computational Linguistics",
|
|
31
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
32
|
+
}""",
|
|
24
33
|
)
|
|
25
34
|
|
|
26
35
|
|
|
@@ -67,4 +76,13 @@ mmbert_scandi = ModelMeta(
|
|
|
67
76
|
training_datasets=set(),
|
|
68
77
|
similarity_fn_name="cosine", # type: ignore[arg-type]
|
|
69
78
|
adapted_from="jonasaise/scandmmBERT-base-scandinavian",
|
|
79
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
80
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
81
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
82
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
83
|
+
month = "11",
|
|
84
|
+
year = "2019",
|
|
85
|
+
publisher = "Association for Computational Linguistics",
|
|
86
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
87
|
+
}""",
|
|
70
88
|
)
|
|
@@ -22,4 +22,10 @@ Euler_Legal_Embedding_V1 = ModelMeta(
|
|
|
22
22
|
training_datasets=set(), # final-data-new-anonymized-grok4-filtered
|
|
23
23
|
adapted_from="Qwen/Qwen3-Embedding-8B",
|
|
24
24
|
superseded_by=None,
|
|
25
|
+
citation="""@misc{euler2025legal,
|
|
26
|
+
title={Euler-Legal-Embedding: Advanced Legal Representation Learning},
|
|
27
|
+
author={LawRank Team},
|
|
28
|
+
year={2025},
|
|
29
|
+
publisher={Hugging Face}
|
|
30
|
+
}""",
|
|
25
31
|
)
|
|
@@ -156,6 +156,15 @@ tooka_sbert = ModelMeta(
|
|
|
156
156
|
public_training_code=None,
|
|
157
157
|
public_training_data=None,
|
|
158
158
|
training_datasets=None,
|
|
159
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
160
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
161
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
162
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
163
|
+
month = "11",
|
|
164
|
+
year = "2019",
|
|
165
|
+
publisher = "Association for Computational Linguistics",
|
|
166
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
167
|
+
}""",
|
|
159
168
|
)
|
|
160
169
|
|
|
161
170
|
fa_bert = ModelMeta(
|
|
@@ -180,6 +189,29 @@ fa_bert = ModelMeta(
|
|
|
180
189
|
# It's just a base model
|
|
181
190
|
# https://huggingface.co/datasets/sbunlp/hmblogs-v3
|
|
182
191
|
),
|
|
192
|
+
citation="""@inproceedings{masumi-etal-2025-fabert,
|
|
193
|
+
title = "{F}a{BERT}: Pre-training {BERT} on {P}ersian Blogs",
|
|
194
|
+
author = "Masumi, Mostafa and
|
|
195
|
+
Majd, Seyed Soroush and
|
|
196
|
+
Shamsfard, Mehrnoush and
|
|
197
|
+
Beigy, Hamid",
|
|
198
|
+
editor = "Bak, JinYeong and
|
|
199
|
+
Goot, Rob van der and
|
|
200
|
+
Jang, Hyeju and
|
|
201
|
+
Buaphet, Weerayut and
|
|
202
|
+
Ramponi, Alan and
|
|
203
|
+
Xu, Wei and
|
|
204
|
+
Ritter, Alan",
|
|
205
|
+
booktitle = "Proceedings of the Tenth Workshop on Noisy and User-generated Text",
|
|
206
|
+
month = may,
|
|
207
|
+
year = "2025",
|
|
208
|
+
address = "Albuquerque, New Mexico, USA",
|
|
209
|
+
publisher = "Association for Computational Linguistics",
|
|
210
|
+
url = "https://aclanthology.org/2025.wnut-1.10/",
|
|
211
|
+
doi = "10.18653/v1/2025.wnut-1.10",
|
|
212
|
+
pages = "85--96",
|
|
213
|
+
ISBN = "979-8-89176-232-9",
|
|
214
|
+
}""",
|
|
183
215
|
)
|
|
184
216
|
|
|
185
217
|
tooka_sbert_v2_small = ModelMeta(
|
|
@@ -201,6 +233,15 @@ tooka_sbert_v2_small = ModelMeta(
|
|
|
201
233
|
public_training_code=None,
|
|
202
234
|
public_training_data=None,
|
|
203
235
|
training_datasets=None,
|
|
236
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
237
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
238
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
239
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
240
|
+
month = "11",
|
|
241
|
+
year = "2019",
|
|
242
|
+
publisher = "Association for Computational Linguistics",
|
|
243
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
244
|
+
}""",
|
|
204
245
|
)
|
|
205
246
|
|
|
206
247
|
tooka_sbert_v2_large = ModelMeta(
|
|
@@ -222,4 +263,13 @@ tooka_sbert_v2_large = ModelMeta(
|
|
|
222
263
|
public_training_code=None,
|
|
223
264
|
public_training_data=None,
|
|
224
265
|
training_datasets=None,
|
|
266
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
267
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
268
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
269
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
270
|
+
month = "11",
|
|
271
|
+
year = "2019",
|
|
272
|
+
publisher = "Association for Computational Linguistics",
|
|
273
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
274
|
+
}""",
|
|
225
275
|
)
|
|
@@ -123,6 +123,28 @@ xlmr_base = ModelMeta(
|
|
|
123
123
|
public_training_code=None,
|
|
124
124
|
public_training_data=None,
|
|
125
125
|
training_datasets=set(),
|
|
126
|
+
citation="""@article{DBLP:journals/corr/abs-1911-02116,
|
|
127
|
+
author = {Alexis Conneau and
|
|
128
|
+
Kartikay Khandelwal and
|
|
129
|
+
Naman Goyal and
|
|
130
|
+
Vishrav Chaudhary and
|
|
131
|
+
Guillaume Wenzek and
|
|
132
|
+
Francisco Guzm{\'{a}}n and
|
|
133
|
+
Edouard Grave and
|
|
134
|
+
Myle Ott and
|
|
135
|
+
Luke Zettlemoyer and
|
|
136
|
+
Veselin Stoyanov},
|
|
137
|
+
title = {Unsupervised Cross-lingual Representation Learning at Scale},
|
|
138
|
+
journal = {CoRR},
|
|
139
|
+
volume = {abs/1911.02116},
|
|
140
|
+
year = {2019},
|
|
141
|
+
url = {http://arxiv.org/abs/1911.02116},
|
|
142
|
+
eprinttype = {arXiv},
|
|
143
|
+
eprint = {1911.02116},
|
|
144
|
+
timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
|
|
145
|
+
biburl = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
|
|
146
|
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
147
|
+
}""",
|
|
126
148
|
)
|
|
127
149
|
|
|
128
150
|
xlmr_large = ModelMeta(
|
|
@@ -144,4 +166,26 @@ xlmr_large = ModelMeta(
|
|
|
144
166
|
public_training_code=None,
|
|
145
167
|
public_training_data=None,
|
|
146
168
|
training_datasets=set(),
|
|
169
|
+
citation="""@article{DBLP:journals/corr/abs-1911-02116,
|
|
170
|
+
author = {Alexis Conneau and
|
|
171
|
+
Kartikay Khandelwal and
|
|
172
|
+
Naman Goyal and
|
|
173
|
+
Vishrav Chaudhary and
|
|
174
|
+
Guillaume Wenzek and
|
|
175
|
+
Francisco Guzm{\'{a}}n and
|
|
176
|
+
Edouard Grave and
|
|
177
|
+
Myle Ott and
|
|
178
|
+
Luke Zettlemoyer and
|
|
179
|
+
Veselin Stoyanov},
|
|
180
|
+
title = {Unsupervised Cross-lingual Representation Learning at Scale},
|
|
181
|
+
journal = {CoRR},
|
|
182
|
+
volume = {abs/1911.02116},
|
|
183
|
+
year = {2019},
|
|
184
|
+
url = {http://arxiv.org/abs/1911.02116},
|
|
185
|
+
eprinttype = {arXiv},
|
|
186
|
+
eprint = {1911.02116},
|
|
187
|
+
timestamp = {Mon, 11 Nov 2019 18:38:09 +0100},
|
|
188
|
+
biburl = {https://dblp.org/rec/journals/corr/abs-1911-02116.bib},
|
|
189
|
+
bibsource = {dblp computer science bibliography, https://dblp.org}
|
|
190
|
+
}""",
|
|
147
191
|
)
|
|
@@ -89,6 +89,12 @@ gte_qwen1_5_7b_instruct = ModelMeta(
|
|
|
89
89
|
public_training_code=None,
|
|
90
90
|
public_training_data=None,
|
|
91
91
|
training_datasets=None,
|
|
92
|
+
citation="""@article{li2023towards,
|
|
93
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
94
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
95
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
96
|
+
year={2023}
|
|
97
|
+
}""",
|
|
92
98
|
)
|
|
93
99
|
|
|
94
100
|
gte_qwen2_1_5b_instruct = ModelMeta(
|
|
@@ -119,6 +125,12 @@ gte_qwen2_1_5b_instruct = ModelMeta(
|
|
|
119
125
|
public_training_code=None,
|
|
120
126
|
public_training_data=None,
|
|
121
127
|
training_datasets=None,
|
|
128
|
+
citation="""@article{li2023towards,
|
|
129
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
130
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
131
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
132
|
+
year={2023}
|
|
133
|
+
}""",
|
|
122
134
|
)
|
|
123
135
|
|
|
124
136
|
gte_small_zh = ModelMeta(
|
|
@@ -140,6 +152,12 @@ gte_small_zh = ModelMeta(
|
|
|
140
152
|
public_training_code=None,
|
|
141
153
|
public_training_data=None,
|
|
142
154
|
training_datasets=None, # Not disclosed
|
|
155
|
+
citation="""@article{li2023towards,
|
|
156
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
157
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
158
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
159
|
+
year={2023}
|
|
160
|
+
}""",
|
|
143
161
|
)
|
|
144
162
|
|
|
145
163
|
gte_base_zh = ModelMeta(
|
|
@@ -161,6 +179,12 @@ gte_base_zh = ModelMeta(
|
|
|
161
179
|
public_training_code=None,
|
|
162
180
|
public_training_data=None,
|
|
163
181
|
training_datasets=None, # Not disclosed
|
|
182
|
+
citation="""@article{li2023towards,
|
|
183
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
184
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
185
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
186
|
+
year={2023}
|
|
187
|
+
}""",
|
|
164
188
|
)
|
|
165
189
|
|
|
166
190
|
gte_large_zh = ModelMeta(
|
|
@@ -182,6 +206,12 @@ gte_large_zh = ModelMeta(
|
|
|
182
206
|
public_training_code=None,
|
|
183
207
|
public_training_data=None,
|
|
184
208
|
training_datasets=None, # Not disclosed
|
|
209
|
+
citation="""@article{li2023towards,
|
|
210
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
211
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
212
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
213
|
+
year={2023}
|
|
214
|
+
}""",
|
|
185
215
|
)
|
|
186
216
|
|
|
187
217
|
gte_multilingual_langs = [
|
|
@@ -304,6 +334,13 @@ gte_multilingual_base = ModelMeta(
|
|
|
304
334
|
public_training_code=None,
|
|
305
335
|
public_training_data=None, # couldn't find
|
|
306
336
|
training_datasets=gte_multi_training_data,
|
|
337
|
+
citation="""@inproceedings{zhang2024mgte,
|
|
338
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
339
|
+
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
|
|
340
|
+
booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
|
|
341
|
+
pages={1393--1412},
|
|
342
|
+
year={2024}
|
|
343
|
+
}""",
|
|
307
344
|
)
|
|
308
345
|
|
|
309
346
|
gte_modernbert_base = ModelMeta(
|
|
@@ -325,6 +362,20 @@ gte_modernbert_base = ModelMeta(
|
|
|
325
362
|
public_training_code=None, # couldn't find
|
|
326
363
|
public_training_data=None,
|
|
327
364
|
training_datasets=gte_multi_training_data, # English part of gte_multi_training_data,
|
|
365
|
+
citation="""@inproceedings{zhang2024mgte,
|
|
366
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
367
|
+
author={Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Wen and Dai, Ziqi and Tang, Jialong and Lin, Huan and Yang, Baosong and Xie, Pengjun and Huang, Fei and others},
|
|
368
|
+
booktitle={Proceedings of the 2024 Conference on Empirical Methods in Natural Language Processing: Industry Track},
|
|
369
|
+
pages={1393--1412},
|
|
370
|
+
year={2024}
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
@article{li2023towards,
|
|
374
|
+
title={Towards general text embeddings with multi-stage contrastive learning},
|
|
375
|
+
author={Li, Zehan and Zhang, Xin and Zhang, Yanzhao and Long, Dingkun and Xie, Pengjun and Zhang, Meishan},
|
|
376
|
+
journal={arXiv preprint arXiv:2308.03281},
|
|
377
|
+
year={2023}
|
|
378
|
+
}""",
|
|
328
379
|
)
|
|
329
380
|
|
|
330
381
|
|
|
@@ -349,4 +400,22 @@ gte_base_en_v15 = ModelMeta(
|
|
|
349
400
|
public_training_code=None,
|
|
350
401
|
public_training_data=None,
|
|
351
402
|
training_datasets=None,
|
|
403
|
+
citation="""@misc{zhang2024mgte,
|
|
404
|
+
title={mGTE: Generalized Long-Context Text Representation and Reranking Models for Multilingual Text Retrieval},
|
|
405
|
+
author={Xin Zhang and Yanzhao Zhang and Dingkun Long and Wen Xie and Ziqi Dai and Jialong Tang and Huan Lin and Baosong Yang and Pengjun Xie and Fei Huang and Meishan Zhang and Wenjie Li and Min Zhang},
|
|
406
|
+
year={2024},
|
|
407
|
+
eprint={2407.19669},
|
|
408
|
+
archivePrefix={arXiv},
|
|
409
|
+
primaryClass={cs.CL},
|
|
410
|
+
url={https://arxiv.org/abs/2407.19669},
|
|
411
|
+
}
|
|
412
|
+
@misc{li2023gte,
|
|
413
|
+
title={Towards General Text Embeddings with Multi-stage Contrastive Learning},
|
|
414
|
+
author={Zehan Li and Xin Zhang and Yanzhao Zhang and Dingkun Long and Pengjun Xie and Meishan Zhang},
|
|
415
|
+
year={2023},
|
|
416
|
+
eprint={2308.03281},
|
|
417
|
+
archivePrefix={arXiv},
|
|
418
|
+
primaryClass={cs.CL},
|
|
419
|
+
url={https://arxiv.org/abs/2308.03281},
|
|
420
|
+
}""",
|
|
352
421
|
)
|
|
@@ -901,6 +901,25 @@ KaLM_Embedding_KaLM_embedding_multilingual_mini_instruct_v2_5 = ModelMeta(
|
|
|
901
901
|
training_datasets=kalm_v2_training_data,
|
|
902
902
|
adapted_from="HIT-TMG/KaLM-embedding-multilingual-mini-instruct-v2",
|
|
903
903
|
superseded_by=None,
|
|
904
|
+
citation="""@misc{zhao2025kalmembeddingv2,
|
|
905
|
+
title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
|
|
906
|
+
author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
|
|
907
|
+
year={2025},
|
|
908
|
+
eprint={2506.20923},
|
|
909
|
+
archivePrefix={arXiv},
|
|
910
|
+
primaryClass={cs.CL},
|
|
911
|
+
url={https://arxiv.org/abs/2506.20923},
|
|
912
|
+
}
|
|
913
|
+
|
|
914
|
+
@misc{hu2025kalmembedding,
|
|
915
|
+
title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
|
|
916
|
+
author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
|
|
917
|
+
year={2025},
|
|
918
|
+
eprint={2501.01028},
|
|
919
|
+
archivePrefix={arXiv},
|
|
920
|
+
primaryClass={cs.CL},
|
|
921
|
+
url={https://arxiv.org/abs/2501.01028},
|
|
922
|
+
}""",
|
|
904
923
|
)
|
|
905
924
|
|
|
906
925
|
KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
|
|
@@ -928,4 +947,23 @@ KaLM_Embedding_gemma_3_12b_2511 = ModelMeta(
|
|
|
928
947
|
public_training_code="https://github.com/HITsz-TMG/KaLM-Embedding",
|
|
929
948
|
public_training_data=None,
|
|
930
949
|
training_datasets=KaLM_Embedding_gemma_3_12b_training_data,
|
|
950
|
+
citation="""@misc{zhao2025kalmembeddingv2,
|
|
951
|
+
title={KaLM-Embedding-V2: Superior Training Techniques and Data Inspire A Versatile Embedding Model},
|
|
952
|
+
author={Xinping Zhao and Xinshuo Hu and Zifei Shan and Shouzheng Huang and Yao Zhou and Xin Zhang and Zetian Sun and Zhenyu Liu and Dongfang Li and Xinyuan Wei and Youcheng Pan and Yang Xiang and Meishan Zhang and Haofen Wang and Jun Yu and Baotian Hu and Min Zhang},
|
|
953
|
+
year={2025},
|
|
954
|
+
eprint={2506.20923},
|
|
955
|
+
archivePrefix={arXiv},
|
|
956
|
+
primaryClass={cs.CL},
|
|
957
|
+
url={https://arxiv.org/abs/2506.20923},
|
|
958
|
+
}
|
|
959
|
+
|
|
960
|
+
@misc{hu2025kalmembedding,
|
|
961
|
+
title={KaLM-Embedding: Superior Training Data Brings A Stronger Embedding Model},
|
|
962
|
+
author={Xinshuo Hu and Zifei Shan and Xinping Zhao and Zetian Sun and Zhenyu Liu and Dongfang Li and Shaolin Ye and Xinyuan Wei and Qian Chen and Baotian Hu and Haofen Wang and Jun Yu and Min Zhang},
|
|
963
|
+
year={2025},
|
|
964
|
+
eprint={2501.01028},
|
|
965
|
+
archivePrefix={arXiv},
|
|
966
|
+
primaryClass={cs.CL},
|
|
967
|
+
url={https://arxiv.org/abs/2501.01028},
|
|
968
|
+
}""",
|
|
931
969
|
)
|
|
@@ -21,4 +21,10 @@ sbert_swedish = ModelMeta(
|
|
|
21
21
|
public_training_data=None,
|
|
22
22
|
training_datasets=None,
|
|
23
23
|
adapted_from="sentence-transformers/all-mpnet-base-v2",
|
|
24
|
+
citation="""@misc{rekathati2021introducing,
|
|
25
|
+
author = {Rekathati, Faton},
|
|
26
|
+
title = {The KBLab Blog: Introducing a Swedish Sentence Transformer},
|
|
27
|
+
url = {https://kb-labb.github.io/posts/2021-08-23-a-swedish-sentence-transformer/},
|
|
28
|
+
year = {2021}
|
|
29
|
+
}""",
|
|
24
30
|
)
|
|
@@ -19,4 +19,13 @@ kowshik24_bangla_embedding_model = ModelMeta(
|
|
|
19
19
|
public_training_code="https://github.com/kowshik24/Bangla-Embedding",
|
|
20
20
|
public_training_data="https://huggingface.co/datasets/sartajekram/BanglaRQA",
|
|
21
21
|
training_datasets=set(),
|
|
22
|
+
citation="""@inproceedings{reimers-2019-sentence-bert,
|
|
23
|
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
|
24
|
+
author = "Reimers, Nils and Gurevych, Iryna",
|
|
25
|
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
|
26
|
+
month = "11",
|
|
27
|
+
year = "2019",
|
|
28
|
+
publisher = "Association for Computational Linguistics",
|
|
29
|
+
url = "https://arxiv.org/abs/1908.10084",
|
|
30
|
+
}""",
|
|
22
31
|
)
|