EuroEval 15.12.0__py3-none-any.whl → 16.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. euroeval/__init__.py +32 -14
  2. euroeval/benchmark_config_factory.py +92 -180
  3. euroeval/benchmark_modules/base.py +49 -39
  4. euroeval/benchmark_modules/fresh.py +35 -21
  5. euroeval/benchmark_modules/hf.py +280 -244
  6. euroeval/benchmark_modules/litellm.py +752 -312
  7. euroeval/benchmark_modules/vllm.py +570 -268
  8. euroeval/benchmarker.py +651 -528
  9. euroeval/caching_utils.py +79 -0
  10. euroeval/callbacks.py +5 -7
  11. euroeval/cli.py +49 -38
  12. euroeval/constants.py +44 -25
  13. euroeval/data_loading.py +111 -55
  14. euroeval/data_models.py +490 -323
  15. euroeval/dataset_configs/__init__.py +26 -4
  16. euroeval/dataset_configs/bosnian.py +39 -0
  17. euroeval/dataset_configs/bulgarian.py +56 -0
  18. euroeval/dataset_configs/croatian.py +56 -0
  19. euroeval/dataset_configs/czech.py +75 -0
  20. euroeval/dataset_configs/danish.py +78 -50
  21. euroeval/dataset_configs/dutch.py +74 -44
  22. euroeval/dataset_configs/english.py +71 -36
  23. euroeval/dataset_configs/estonian.py +111 -0
  24. euroeval/dataset_configs/faroese.py +25 -18
  25. euroeval/dataset_configs/finnish.py +63 -26
  26. euroeval/dataset_configs/french.py +65 -32
  27. euroeval/dataset_configs/german.py +77 -36
  28. euroeval/dataset_configs/greek.py +64 -0
  29. euroeval/dataset_configs/icelandic.py +68 -57
  30. euroeval/dataset_configs/italian.py +68 -36
  31. euroeval/dataset_configs/latvian.py +87 -0
  32. euroeval/dataset_configs/lithuanian.py +64 -0
  33. euroeval/dataset_configs/norwegian.py +98 -72
  34. euroeval/dataset_configs/polish.py +96 -0
  35. euroeval/dataset_configs/portuguese.py +63 -40
  36. euroeval/dataset_configs/serbian.py +64 -0
  37. euroeval/dataset_configs/slovak.py +55 -0
  38. euroeval/dataset_configs/slovene.py +56 -0
  39. euroeval/dataset_configs/spanish.py +68 -34
  40. euroeval/dataset_configs/swedish.py +82 -41
  41. euroeval/dataset_configs/ukrainian.py +64 -0
  42. euroeval/enums.py +12 -6
  43. euroeval/exceptions.py +21 -1
  44. euroeval/finetuning.py +34 -26
  45. euroeval/generation.py +76 -41
  46. euroeval/generation_utils.py +169 -34
  47. euroeval/languages.py +1020 -188
  48. euroeval/logging_utils.py +268 -0
  49. euroeval/metrics/__init__.py +6 -0
  50. euroeval/metrics/base.py +85 -0
  51. euroeval/metrics/huggingface.py +216 -0
  52. euroeval/metrics/llm_as_a_judge.py +260 -0
  53. euroeval/metrics/pipeline.py +289 -0
  54. euroeval/metrics/speed.py +48 -0
  55. euroeval/model_cache.py +40 -21
  56. euroeval/model_config.py +4 -5
  57. euroeval/model_loading.py +3 -0
  58. euroeval/prompt_templates/__init__.py +2 -0
  59. euroeval/prompt_templates/classification.py +206 -0
  60. euroeval/prompt_templates/linguistic_acceptability.py +157 -22
  61. euroeval/prompt_templates/multiple_choice.py +159 -17
  62. euroeval/prompt_templates/named_entity_recognition.py +318 -21
  63. euroeval/prompt_templates/reading_comprehension.py +207 -16
  64. euroeval/prompt_templates/sentiment_classification.py +205 -22
  65. euroeval/prompt_templates/summarization.py +122 -22
  66. euroeval/prompt_templates/token_classification.py +279 -0
  67. euroeval/scores.py +20 -9
  68. euroeval/speed_benchmark.py +11 -12
  69. euroeval/task_group_utils/multiple_choice_classification.py +21 -12
  70. euroeval/task_group_utils/question_answering.py +101 -73
  71. euroeval/task_group_utils/sequence_classification.py +144 -61
  72. euroeval/task_group_utils/text_to_text.py +33 -12
  73. euroeval/task_group_utils/token_classification.py +86 -89
  74. euroeval/tasks.py +75 -16
  75. euroeval/tokenisation_utils.py +603 -0
  76. euroeval/types.py +17 -11
  77. euroeval/utils.py +332 -137
  78. euroeval-16.7.1.dist-info/METADATA +623 -0
  79. euroeval-16.7.1.dist-info/RECORD +84 -0
  80. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/entry_points.txt +0 -1
  81. euroeval/human_evaluation.py +0 -737
  82. euroeval/metrics.py +0 -452
  83. euroeval/tokenization_utils.py +0 -498
  84. euroeval-15.12.0.dist-info/METADATA +0 -285
  85. euroeval-15.12.0.dist-info/RECORD +0 -63
  86. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/WHEEL +0 -0
  87. {euroeval-15.12.0.dist-info → euroeval-16.7.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,84 @@
1
+ euroeval/__init__.py,sha256=MuVVOnGU3IJIROBmNkrFSCfnLaeqAFaI-gBaf78_Zr0,4118
2
+ euroeval/benchmark_config_factory.py,sha256=rjQrah8TdnSSSl2NY7AzfVld4DOnGyvv0Z-eTPolM4w,8450
3
+ euroeval/benchmarker.py,sha256=PCOiF2XgSk1QGeszmE0bg2ZJn_LxjFeVjDIk6fJen4U,53145
4
+ euroeval/caching_utils.py,sha256=lLUbkpDdJZy4xodIpwIz5d-WNKGuszbr_d9dyiJ5kZc,2591
5
+ euroeval/callbacks.py,sha256=l8f6Zr8EoHfVFsI1ZnMUK0Y8uZB00Nvaz_I6XDn6avE,2515
6
+ euroeval/cli.py,sha256=thDPROA6DcugKBkFGFnkJxynH0-3wq2QGfvnaSbow_0,9051
7
+ euroeval/constants.py,sha256=1Ew9yBPNu2blYb3v4HD5V_RGZV_MJ9PXNiakDrwMiGs,3509
8
+ euroeval/data_loading.py,sha256=2CYnvjMVvJOUaDhwKueh3gYIdIwyAsCBonD9uciv_RU,6896
9
+ euroeval/data_models.py,sha256=qagh1QV9Cff-8UqZ4XkZ8Mjy3PZkNddt4a9_tmMkIao,30195
10
+ euroeval/enums.py,sha256=SeFek-Lre2Q5sxbP5svqjDZFZR2vlJhg9dkRH4JvU1g,3436
11
+ euroeval/exceptions.py,sha256=4-N2OIo5PJ2aciLjagNAVhdHPxpq2QxywbBqJ8lkKj0,5780
12
+ euroeval/finetuning.py,sha256=tQZuMwwu-pnKgxEpJd424so5lK7p3Qd9aF3kKbBDYUI,11826
13
+ euroeval/generation.py,sha256=ymacCUOT94ul1YeclyE6UMBQNegRucbXn_DChhH5NKA,13334
14
+ euroeval/generation_utils.py,sha256=uJ7Nvto39GKxr6V-K4h3czthjUjYEV6OCCTmoFSaKek,18714
15
+ euroeval/languages.py,sha256=gUSosFbvf1eEQHjVsKhXdJ4jiGXC-9lMkOL8AsBG33Q,37295
16
+ euroeval/logging_utils.py,sha256=eyUwDVghAGkam1YzR1KBrC_cjpBHomGWLwRqqw4Auik,9325
17
+ euroeval/model_cache.py,sha256=oIlCEMdHxns_8W6urf89X1odrqTX9axFCCZ2NCIXhV8,9617
18
+ euroeval/model_config.py,sha256=fxHfgpw-9vj3hwke28DguVGvG9TU06nkTXT0V6KAMpQ,2761
19
+ euroeval/model_loading.py,sha256=Ru44ONwCMEquM48T-pDWCnZq2V7mFKhu7L80OXtX4co,2340
20
+ euroeval/scores.py,sha256=9a1XtppFbp8GJFc9JdThGxqBY0YUE7-92oyrlxScjNk,3281
21
+ euroeval/speed_benchmark.py,sha256=VUOvauc9tuAegThNT2g1a-Z1l7DEmKq57dHI4t16o5A,4068
22
+ euroeval/tasks.py,sha256=XaEI1IKpHU66DII-6D_8FishBur8kZ7Hx4aojqlmf48,5642
23
+ euroeval/tokenisation_utils.py,sha256=aLXbqvkIFpClXassuY5VrjMh-r5XlVEXjjWtdtuA0dU,21152
24
+ euroeval/types.py,sha256=dJFhHEPpTaDWWNUx1bayG7w9dVOspBdM1JT1Pr9EzhI,2951
25
+ euroeval/utils.py,sha256=WSP8Dl0zr8-2xjtWopK47Orz-V-PQw3ef-ToBKx_V4M,18034
26
+ euroeval/benchmark_modules/__init__.py,sha256=TNO-sNDwlXE-LMFXfwwqjQqUy55gywSmwRBcoPUFuaU,236
27
+ euroeval/benchmark_modules/base.py,sha256=Wy2-JNWuARDcfcy6_fj88uxQYwjsYK1ypxiAv4XUBhw,11397
28
+ euroeval/benchmark_modules/fresh.py,sha256=h4TPJlJK6xxxyhAXURr0T9gk4Khm3WyujnKBDFc3sCE,10806
29
+ euroeval/benchmark_modules/hf.py,sha256=4dFXgLuSi48NPftd8mLd5a4tcF8r28w9jnntKKXHBGw,45935
30
+ euroeval/benchmark_modules/litellm.py,sha256=ngXn3OqF8i5KngG81E7cgS8mJztWuerdb3UDi_y7KO4,70561
31
+ euroeval/benchmark_modules/vllm.py,sha256=1di2p304M2QTAQY4iZajonMVmJC6Aa1IXZYguR6T8Ms,51759
32
+ euroeval/dataset_configs/__init__.py,sha256=Z0f1J3M-eTEY5rACegGpdb5QtDJZaOAR3DXj4-rRIZc,2682
33
+ euroeval/dataset_configs/bosnian.py,sha256=golIWqwW1pFwSkuBM1v0yhHDblB2FoJgK24aO7kKm7M,877
34
+ euroeval/dataset_configs/bulgarian.py,sha256=OVoDPTRdU-lVq-xUka7-Ct20h2jbs8HV43KBxRQenIE,1284
35
+ euroeval/dataset_configs/croatian.py,sha256=U5oBTjttpWTWonTEzZAf-G3nvQICRQmw6Kla-HWn_5k,1260
36
+ euroeval/dataset_configs/czech.py,sha256=ghv2yNw839G-utll8PQRSjyKYbM5gfoQhFKy664GTCI,1562
37
+ euroeval/dataset_configs/danish.py,sha256=LEKs04vK2KnV0CYheT7FeS-g3iHBvf2bQxyl0D_LbTg,3293
38
+ euroeval/dataset_configs/dutch.py,sha256=HB1O7IxQUyOxLg7g0tqcCci1MHaKtZJiFlRJZo2jPr4,3107
39
+ euroeval/dataset_configs/english.py,sha256=nc9nGwxf1tHVMUhQeND61yJbpTO4rJaAusPZlstqtq0,2817
40
+ euroeval/dataset_configs/estonian.py,sha256=bWiKA_dJ7WUE8Z_1YZnSewhi4ZdCQBGJZ7pQxkCwMcU,2757
41
+ euroeval/dataset_configs/faroese.py,sha256=13qYwXonDPWG9Av5MY_NBNTRDglPVKz5_mbz7ZCJ_mo,1247
42
+ euroeval/dataset_configs/finnish.py,sha256=eyAMoQE43R-jKD3IZS0iqD2SZOPxWLqjmXzmm00tqPs,2444
43
+ euroeval/dataset_configs/french.py,sha256=z6cGY0J7TgXjqmkOLOxQE6ADO1EEPoMF1sdk2n2USe0,2611
44
+ euroeval/dataset_configs/german.py,sha256=CXW8_6CMMtrqrq85tDB6gY_fja_FATOqN5glNJI6efU,2858
45
+ euroeval/dataset_configs/greek.py,sha256=BLdhfBIG7ABzoZWvtI_VlInpb0SYLK36nhHN33LPVo0,1475
46
+ euroeval/dataset_configs/icelandic.py,sha256=G2Ibe6oF1NknkQmHqLpoHlysW_8f-0G53DJAGAlLkzQ,3552
47
+ euroeval/dataset_configs/italian.py,sha256=qhjAQChnQanzs7EyN1DSAJ4OOU41HAlWqWntQOtbWCw,2761
48
+ euroeval/dataset_configs/latvian.py,sha256=wbwIDieq5Lplng5Jzx9LEqq4d8b5LnNOyCUmT64b4bA,1928
49
+ euroeval/dataset_configs/lithuanian.py,sha256=NaU5uTeHFaXgWHHXsxOzG5LO7KuTT5yXHqdjhIJIVKo,1498
50
+ euroeval/dataset_configs/norwegian.py,sha256=skKKs4V4-zbd-1lpVUaxKXAjTMpBM6SAU5HZ8kcQ2mI,5454
51
+ euroeval/dataset_configs/polish.py,sha256=nN_NT8cUK2iv1L_zO_aCYOk2R7ACSDZgvI7e0hIaFAM,2074
52
+ euroeval/dataset_configs/portuguese.py,sha256=m9lEeVtI_yNvIdTIEOn3HFK_ilY2tn3-acC981hjZFM,2401
53
+ euroeval/dataset_configs/serbian.py,sha256=yE_aoQRU8qiJqSD-iEt6o_M4rSkGahLX8qz2RRYDvbo,1411
54
+ euroeval/dataset_configs/slovak.py,sha256=7JQj2GlRj1giACKqVZ6QJxiFBzP9byoowm0DjPaZ5u8,1252
55
+ euroeval/dataset_configs/slovene.py,sha256=r6BbFRvkFYf_4lvQaltaJ1VTVGETZ0xspsu9M_QSLcg,1271
56
+ euroeval/dataset_configs/spanish.py,sha256=FhHSsFemau8mMkqF5VCSViRBZEcnSGcDeUUO9318XDM,2745
57
+ euroeval/dataset_configs/swedish.py,sha256=QAN-cy818OojTQZdDRCPbMB_HZO_xZGoTb3CR7j9JmU,3071
58
+ euroeval/dataset_configs/ukrainian.py,sha256=spbCmCOU27jOfz6FZxqCIfVmDN5l8H-7VCl-k-8eAIo,1527
59
+ euroeval/metrics/__init__.py,sha256=qkELjrnBkuO9WzeQJZQRyXpZg_WclUByHswAc6Il7Ns,199
60
+ euroeval/metrics/base.py,sha256=dUBby-ZzettMjdcjek6rw0JTZMuScX4cQ2Rd6untKHY,2525
61
+ euroeval/metrics/huggingface.py,sha256=w0iTFIavi4Q4IGJCSFpcCX1ce28e8D6S1WjllNggi18,6735
62
+ euroeval/metrics/llm_as_a_judge.py,sha256=R9igrg8P0OgL4Z3Hb3DZHolQd9XZC0_wi2D8T1aXHfA,9785
63
+ euroeval/metrics/pipeline.py,sha256=xGCA7N1F4cLKOIeXP9SGAZvrWToREwAVb_gR5iBMQIU,10825
64
+ euroeval/metrics/speed.py,sha256=G5hEQcrtqxF070ZZwLDh61iZnq2CSW2o6ZM7zR4lOTY,1298
65
+ euroeval/prompt_templates/__init__.py,sha256=HN6Qspqm10ik6RKoPBJsvM-Nng9sywQojZbtbCqj4Z8,475
66
+ euroeval/prompt_templates/classification.py,sha256=QuZh6hTMaqMYTsoruAhwjVP9381zzlQmDIwSeyGnav0,10121
67
+ euroeval/prompt_templates/linguistic_acceptability.py,sha256=BJ4rb-K2y4EHPTlmx3jjOOFf_OaKdFQJjGTJKkKXfMs,13732
68
+ euroeval/prompt_templates/multiple_choice.py,sha256=6JckRm1nPsjFbA8nANP6Hc0m-N_txHJ8A9YHXrLJ_ZM,11422
69
+ euroeval/prompt_templates/named_entity_recognition.py,sha256=Zkwc1_dFNFYpH08n5YaOFqKXCvgkE4tVRvbNXfbVz4w,26775
70
+ euroeval/prompt_templates/reading_comprehension.py,sha256=cWB38RbkrQmgfiqdS1Kq_PbpAKWLFSxsmd3XirNu_Ek,14858
71
+ euroeval/prompt_templates/sentiment_classification.py,sha256=jfu_VjBBFzz7oFJTlx8uZqF744hZvZQqweQWGU8EeyM,15558
72
+ euroeval/prompt_templates/summarization.py,sha256=6GQpKGKrTk_-UL9ViMY0ux0Ae5mziYlwHpfihoaJRuk,9285
73
+ euroeval/prompt_templates/token_classification.py,sha256=8Uw34mN2xQ_5es-nz7vCK-GgDg_oE-zsAzPJPzAxFrQ,15531
74
+ euroeval/task_group_utils/__init__.py,sha256=CorGVkixkoEDOQuDsrOGlTmF1zmM0wnGHs8psWTfD28,72
75
+ euroeval/task_group_utils/multiple_choice_classification.py,sha256=m0jRF8ifm4iJsBwvLsPN7kIS_5HpnOy6XMAOaX09AcY,7085
76
+ euroeval/task_group_utils/question_answering.py,sha256=iGlt-KMYOPv75TbFxc9kaFPpedFo3w9bImc_-o0WrYI,28241
77
+ euroeval/task_group_utils/sequence_classification.py,sha256=vEV7b7DhiLaj2qz67u3k_rGdteC1yknC2RC0lnLzBKY,16520
78
+ euroeval/task_group_utils/text_to_text.py,sha256=Y9f27-I_ie0zRBmOwleObZ5u8B0lzvYmbJ0zH9DqI1U,5415
79
+ euroeval/task_group_utils/token_classification.py,sha256=_u8Ks4FK1oKB8Ifyu_I2NMW-z1GyJ7Mml5Z_edEaV_Q,17312
80
+ euroeval-16.7.1.dist-info/METADATA,sha256=fpnMejf4iK5v09kmw0pxZfrerOF6L_X80qIbriMjQZk,22816
81
+ euroeval-16.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
82
+ euroeval-16.7.1.dist-info/entry_points.txt,sha256=-mtBu-10bFWeZ2bS32gVK6-s-LNCQLxvnNUPBLd5ud4,87
83
+ euroeval-16.7.1.dist-info/licenses/LICENSE,sha256=guvz_zBHgkQSY_QiUU0Bkc1k-L_PFZuLjIPfuKne2OY,1080
84
+ euroeval-16.7.1.dist-info/RECORD,,
@@ -1,4 +1,3 @@
1
1
  [console_scripts]
2
2
  euroeval = euroeval.cli:benchmark
3
- human_evaluate = euroeval.human_evaluation:main
4
3
  scandeval = euroeval.cli:benchmark