tryaii-dre 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +190 -0
- package/README.md +234 -0
- package/dist/banner.d.ts +24 -0
- package/dist/banner.d.ts.map +1 -0
- package/dist/banner.js +125 -0
- package/dist/banner.js.map +1 -0
- package/dist/benchmarks/index.d.ts +4 -0
- package/dist/benchmarks/index.d.ts.map +1 -0
- package/dist/benchmarks/index.js +3 -0
- package/dist/benchmarks/index.js.map +1 -0
- package/dist/benchmarks/registry.d.ts +69 -0
- package/dist/benchmarks/registry.d.ts.map +1 -0
- package/dist/benchmarks/registry.js +128 -0
- package/dist/benchmarks/registry.js.map +1 -0
- package/dist/benchmarks/standard.d.ts +6 -0
- package/dist/benchmarks/standard.d.ts.map +1 -0
- package/dist/benchmarks/standard.js +115 -0
- package/dist/benchmarks/standard.js.map +1 -0
- package/dist/budget.d.ts +65 -0
- package/dist/budget.d.ts.map +1 -0
- package/dist/budget.js +344 -0
- package/dist/budget.js.map +1 -0
- package/dist/cache/index.d.ts +27 -0
- package/dist/cache/index.d.ts.map +1 -0
- package/dist/cache/index.js +63 -0
- package/dist/cache/index.js.map +1 -0
- package/dist/centroids/data/centroids_all-MiniLM-L6-v2.json +1 -0
- package/dist/centroids/data/trainingQueries.json +246 -0
- package/dist/centroids/generator.d.ts +63 -0
- package/dist/centroids/generator.d.ts.map +1 -0
- package/dist/centroids/generator.js +120 -0
- package/dist/centroids/generator.js.map +1 -0
- package/dist/centroids/index.d.ts +3 -0
- package/dist/centroids/index.d.ts.map +1 -0
- package/dist/centroids/index.js +3 -0
- package/dist/centroids/index.js.map +1 -0
- package/dist/centroids/loader.d.ts +87 -0
- package/dist/centroids/loader.d.ts.map +1 -0
- package/dist/centroids/loader.js +236 -0
- package/dist/centroids/loader.js.map +1 -0
- package/dist/classifiers/base.d.ts +56 -0
- package/dist/classifiers/base.d.ts.map +1 -0
- package/dist/classifiers/base.js +42 -0
- package/dist/classifiers/base.js.map +1 -0
- package/dist/classifiers/embedding.d.ts +68 -0
- package/dist/classifiers/embedding.d.ts.map +1 -0
- package/dist/classifiers/embedding.js +0 -0
- package/dist/classifiers/embedding.js.map +1 -0
- package/dist/classifiers/hybrid.d.ts +31 -0
- package/dist/classifiers/hybrid.d.ts.map +1 -0
- package/dist/classifiers/hybrid.js +61 -0
- package/dist/classifiers/hybrid.js.map +1 -0
- package/dist/classifiers/index.d.ts +4 -0
- package/dist/classifiers/index.d.ts.map +1 -0
- package/dist/classifiers/index.js +3 -0
- package/dist/classifiers/index.js.map +1 -0
- package/dist/classifiers/keyword.d.ts +29 -0
- package/dist/classifiers/keyword.d.ts.map +1 -0
- package/dist/classifiers/keyword.js +264 -0
- package/dist/classifiers/keyword.js.map +1 -0
- package/dist/cli.d.ts +15 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +597 -0
- package/dist/cli.js.map +1 -0
- package/dist/client-types.d.ts +101 -0
- package/dist/client-types.d.ts.map +1 -0
- package/dist/client-types.js +5 -0
- package/dist/client-types.js.map +1 -0
- package/dist/client.d.ts +50 -0
- package/dist/client.d.ts.map +1 -0
- package/dist/client.js +279 -0
- package/dist/client.js.map +1 -0
- package/dist/config.d.ts +45 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +37 -0
- package/dist/config.js.map +1 -0
- package/dist/dashboard/index.d.ts +48 -0
- package/dist/dashboard/index.d.ts.map +1 -0
- package/dist/dashboard/index.js +166 -0
- package/dist/dashboard/index.js.map +1 -0
- package/dist/embeddings/base.d.ts +66 -0
- package/dist/embeddings/base.d.ts.map +1 -0
- package/dist/embeddings/base.js +77 -0
- package/dist/embeddings/base.js.map +1 -0
- package/dist/embeddings/index.d.ts +3 -0
- package/dist/embeddings/index.d.ts.map +1 -0
- package/dist/embeddings/index.js +3 -0
- package/dist/embeddings/index.js.map +1 -0
- package/dist/embeddings/local.d.ts +42 -0
- package/dist/embeddings/local.d.ts.map +1 -0
- package/dist/embeddings/local.js +89 -0
- package/dist/embeddings/local.js.map +1 -0
- package/dist/index.d.ts +44 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +45 -0
- package/dist/index.js.map +1 -0
- package/dist/integrations/index.d.ts +3 -0
- package/dist/integrations/index.d.ts.map +1 -0
- package/dist/integrations/index.js +2 -0
- package/dist/integrations/index.js.map +1 -0
- package/dist/integrations/openrouter.d.ts +84 -0
- package/dist/integrations/openrouter.d.ts.map +1 -0
- package/dist/integrations/openrouter.js +253 -0
- package/dist/integrations/openrouter.js.map +1 -0
- package/dist/registry/index.d.ts +2 -0
- package/dist/registry/index.d.ts.map +1 -0
- package/dist/registry/index.js +2 -0
- package/dist/registry/index.js.map +1 -0
- package/dist/registry/models.d.ts +76 -0
- package/dist/registry/models.d.ts.map +1 -0
- package/dist/registry/models.js +170 -0
- package/dist/registry/models.js.map +1 -0
- package/dist/registry/presets/defaultModels.json +435 -0
- package/dist/router.d.ts +178 -0
- package/dist/router.d.ts.map +1 -0
- package/dist/router.js +259 -0
- package/dist/router.js.map +1 -0
- package/dist/scoring/benchmarks.d.ts +35 -0
- package/dist/scoring/benchmarks.d.ts.map +1 -0
- package/dist/scoring/benchmarks.js +68 -0
- package/dist/scoring/benchmarks.js.map +1 -0
- package/dist/scoring/engine.d.ts +43 -0
- package/dist/scoring/engine.d.ts.map +1 -0
- package/dist/scoring/engine.js +267 -0
- package/dist/scoring/engine.js.map +1 -0
- package/dist/scoring/index.d.ts +6 -0
- package/dist/scoring/index.d.ts.map +1 -0
- package/dist/scoring/index.js +4 -0
- package/dist/scoring/index.js.map +1 -0
- package/dist/scoring/priorities.d.ts +41 -0
- package/dist/scoring/priorities.d.ts.map +1 -0
- package/dist/scoring/priorities.js +49 -0
- package/dist/scoring/priorities.js.map +1 -0
- package/dist/types.d.ts +47 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +5 -0
- package/dist/types.js.map +1 -0
- package/dist/utils/cosine.d.ts +10 -0
- package/dist/utils/cosine.d.ts.map +1 -0
- package/dist/utils/cosine.js +18 -0
- package/dist/utils/cosine.js.map +1 -0
- package/dist/utils/math.d.ts +18 -0
- package/dist/utils/math.d.ts.map +1 -0
- package/dist/utils/math.js +54 -0
- package/dist/utils/math.js.map +1 -0
- package/package.json +65 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"metadata": {"model": "all-MiniLM-L6-v2", "dimension": 384, "benchmark_count": 12}, "centroids": {"MMLU": [-0.05367362126708031, 0.026150399819016457, -0.05861012265086174, -0.014820273034274578, 0.017190851271152496, 0.020788956433534622, -0.024329857900738716, 0.014296358451247215, 0.03533664718270302, 0.07343277335166931, 0.059876542538404465, 0.013473394326865673, -0.038966309279203415, -0.026116665452718735, -0.07637842744588852, -0.06876766681671143, -0.08447631448507309, 0.042526908218860626, -0.07045765966176987, -0.056116245687007904, 0.07412169873714447, -0.055854007601737976, -0.03331250697374344, 0.021483829244971275, -0.06758526712656021, 0.06414071470499039, -0.0214671790599823, 0.006115513853728771, 0.0033424249850213528, -0.06210387125611305, 0.024853600189089775, 0.0027999484445899725, 0.06446433812379837, -0.0012383521534502506, -0.04515577107667923, 0.02654869481921196, 0.12387106567621231, -0.04131907597184181, 0.04362541064620018, 0.0013834253186360002, -0.009113635867834091, -0.09962554275989532, -0.02173146978020668, 0.0958893746137619, 0.07942866533994675, 0.08988712728023529, 0.024153685197234154, 0.03508064150810242, -0.017309658229351044, -0.024625375866889954, -0.04497520253062248, -0.011574825271964073, -0.11601899564266205, 0.02801547572016716, 0.028390496969223022, 0.021174747496843338, 0.0017330491682514548, -0.035933565348386765, 0.05441407114267349, -0.05088803544640541, -0.015805667266249657, -0.036515332758426666, -0.09575771540403366, 0.08630650490522385, 0.13612420856952667, -0.06575749069452286, -0.015333774499595165, -0.0078002894297242165, -0.05726649612188339, 0.03243987634778023, -0.026892997324466705, -0.02399192750453949, -0.026396378874778748, -0.031411997973918915, -0.020094331353902817, -0.11505287885665894, 0.02249300666153431, 0.03143283352255821, -0.020598307251930237, 0.023921532556414604, -0.04905872046947479, 7.860602636355907e-05, -0.022970236837863922, 0.06449279934167862, -0.015264652669429779, 0.012212676927447319, 0.09087461978197098, -0.032478395849466324, -0.004920009523630142, 0.009558145888149738, -0.032915692776441574, -0.036143090575933456, -0.03498091921210289, 0.0158444344997406, -0.03318042308092117, 0.12480539828538895, 0.05783587321639061, -0.06742433458566666, 0.07426374405622482, 0.06841560453176498, 0.04700923338532448, -0.015604201704263687, 0.02933814749121666, -0.005897121969610453, -0.023584866896271706, -0.038001999258995056, -0.0050875162705779076, 0.01113677117973566, -0.004024357069283724, 0.0007783627952449024, -0.10313904285430908, -0.019012510776519775, -0.009807170368731022, -0.01099780760705471, 0.01540222205221653, -0.023834850639104843, 0.06359695643186569, -0.01345778163522482, 0.0418335422873497, 0.002952938200905919, 0.03211431950330734, -0.03502321615815163, -0.05766221508383751, 0.00798036903142929, -0.07399272173643112, -0.002345777116715908, -0.05328219383955002, -1.360720375227614e-32, -0.0475195050239563, -0.11981506645679474, 0.016386499628424644, 0.08085373044013977, -0.020681576803326607, -0.008916337043046951, -0.046518757939338684, -0.057825591415166855, 0.010915348306298256, 0.04445340484380722, 0.012537883594632149, -0.028888529166579247, 0.006644320208579302, 0.03996722027659416, 0.03671154007315636, -0.02420753426849842, -0.0885525569319725, 0.06615191698074341, 0.041164297610521317, 0.05067019537091255, -0.014364528469741344, 0.09446720033884048, -0.003977617248892784, -0.029732871800661087, 0.03726334124803543, 0.029050590470433235, -0.00378073425963521, -0.002716043731197715, -0.006918830331414938, -0.010957583785057068, 0.07886774837970734, 0.050489671528339386, -0.10410267114639282, 0.013430630788207054, 0.02683006040751934, 0.04681635648012161, 0.023015372455120087, -0.059888169169425964, 0.022174537181854248, -0.03613853082060814, -0.024400124326348305, -0.07889192551374435, -0.04954472556710243, 0.007773600518703461, 0.12018732726573944, 0.024342667311429977, -0.014454728923738003, -0.054426345974206924, -0.014204074628651142, 0.007515252567827702, 0.01360570453107357, -0.01255232933908701, 0.008981039747595787, -0.0811348631978035, 0.011407707817852497, 0.04102020338177681, -0.04540340229868889, -0.009942303411662579, -0.11779524385929108, 0.03544321283698082, 0.033434126526117325, 0.02422492392361164, 0.012617519125342369, 0.10039253532886505, 0.00103412673342973, 0.05482399836182594, -0.08144979923963547, 0.020490998402237892, 0.061521369963884354, 0.024190464988350868, -0.09170325845479965, -0.005703915376216173, 0.044532790780067444, 0.012782624922692776, 0.006143343169242144, 0.09078515321016312, -0.025717658922076225, -0.037186164408922195, -0.09285197407007217, 0.019641024991869926, -0.12760023772716522, -0.035685211420059204, -0.017609277740120888, 0.009453247301280499, -0.09868696331977844, 0.008578530512750149, -0.0023328671231865883, -0.032842982560396194, 0.06021503359079361, -0.07371803373098373, -0.0835186019539833, -0.042049068957567215, 0.03307868912816048, -0.08817242830991745, 0.00279439939185977, 4.7037535017848456e-33, 0.002556039486080408, 0.018073275685310364, -0.08044954389333725, 0.026411134749650955, 0.012397635728120804, 0.019367212429642677, -0.05597756803035736, -0.0203244686126709, -0.003176606958732009, 0.04554042965173721, -0.05835871025919914, -0.013527529314160347, 0.0005316601600497961, 0.014065053313970566, -0.054341770708560944, -0.01635296829044819, 0.11637933552265167, 0.018491391092538834, 0.017828788608312607, -0.03529826179146767, -0.06459314376115799, 0.17108434438705444, -0.022755013778805733, -0.05297097936272621, -0.015442760661244392, 0.025668585672974586, -0.08883558213710785, -0.021293578669428825, -0.009932425804436207, -0.003063903423026204, 0.0027808693703264, -0.03815840557217598, -0.009708231315016747, 0.05598767474293709, -0.06859655678272247, 0.08417532593011856, 0.0371701754629612, 0.06017276644706726, 0.0266426932066679, -0.013144873082637787, -0.022448379546403885, -0.06421301513910294, 0.1105206310749054, 0.026665199548006058, 0.03068215399980545, 0.006952310912311077, 0.06705669313669205, 0.09440849721431732, -0.04490802809596062, 0.04841778799891472, 0.013790996745228767, 0.023238683119416237, -0.008288404904305935, -0.04387987405061722, 0.021660543978214264, 0.01118121575564146, 0.00882716290652752, -0.057662233710289, 0.0219416543841362, 0.05036153644323349, 0.017067598178982735, -0.012214133515954018, -0.02049071714282036, 0.1022162139415741, -0.009281357750296593, -0.02180420607328415, -0.07921051234006882, -0.006091389339417219, 0.07221227884292603, -0.04092726856470108, 0.07412861287593842, 0.03804779425263405, -0.03702510520815849, 0.028546156361699104, 0.0006776235532015562, 7.558892684755847e-05, 0.00965460017323494, 0.01357846986502409, -0.04242049530148506, -0.023151446133852005, -0.053019024431705475, 0.0014086932642385364, 0.014036367647349834, -0.036356523633003235, -0.11712946742773056, 0.015738260000944138, 0.010073665529489517, -0.05933179333806038, 0.05068971589207649, -0.07162106037139893, -0.06035483255982399, -0.018765104934573174, -0.0025403625331819057, 0.011518384329974651, 0.02233734354376793, -3.488426969511238e-08, 0.054806992411613464, 0.015444888733327389, 0.05628575384616852, -0.02838241308927536, -0.03702684864401817, -0.029895029962062836, 0.06452128291130066, 0.044251371175050735, -0.031892675906419754, 0.026411816477775574, 0.007209956180304289, 0.10047875344753265, -0.011305497959256172, 0.030274003744125366, 0.04893624782562256, 0.04058190435171127, -0.002692554146051407, 0.008919394575059414, -0.05303090065717697, -0.00026728195371106267, 0.04678770899772644, -0.0353214293718338, -0.03620763123035431, -0.007556405384093523, 0.05233248695731163, 0.031693942844867706, 0.06058506667613983, 0.09668692201375961, 0.022934559732675552, 0.03645893186330795, 0.08529611676931381, 0.04148424044251442, -0.0404164120554924, -0.0944555252790451, -0.14977988600730896, 0.008563889190554619, -0.04172780364751816, -0.039665061980485916, 0.03455176576972008, -0.12531712651252747, 0.034568943083286285, 0.11574037373065948, -0.04727392643690109, 0.03949027881026268, 0.06773266941308975, 0.05697356164455414, -0.04808994010090828, -0.02899949625134468, 0.004355419427156448, 0.003124465001747012, -0.08946738392114639, 0.08216718584299088, 0.055619578808546066, -0.010585000738501549, -0.03027443215250969, 0.012613252736628056, 0.023045148700475693, -0.02460431307554245, -0.009382342919707298, -0.010508565232157707, 0.04993109032511711, 0.09804975241422653, 0.127427339553833, -0.04295671731233597], "HellaSwag": [-0.006660730578005314, 0.05686415731906891, 0.03334454074501991, 0.12125086784362793, 0.09903594851493835, -0.03275306895375252, 0.021570494398474693, -0.02824178710579872, 0.06271709501743317, -0.06280022114515305, 0.061169784516096115, -0.00180989655200392, -0.08342017233371735, 0.028808461502194405, -0.012005646713078022, -0.05005541816353798, 0.0006751120672561228, 0.017083562910556793, -0.057556428015232086, -0.00958162546157837, -0.002258694963529706, -0.037667643278837204, -0.023305300623178482, 0.032602690160274506, -0.015162426978349686, 0.11954329162836075, 0.030087199062108994, -0.031067103147506714, -0.05279359221458435, -0.009320761077105999, -0.056854285299777985, -0.040065694600343704, -0.019668065011501312, -0.008809474296867847, -0.06633482128381729, 0.0052791801281273365, 0.09492640197277069, -0.01396996807307005, 0.01851971633732319, -0.029040036723017693, 0.037280697375535965, -0.09141215682029724, -0.01640472561120987, 0.00704208854585886, 0.033555783331394196, 0.031762391328811646, 0.020633677020668983, -0.021578369662165642, 0.06357701122760773, 0.008779525756835938, 0.020734107121825218, -0.04194493964314461, -0.09454039484262466, 0.01345693040639162, -0.0057854242622852325, -0.0025178934447467327, -0.008914315141737461, 0.007360783405601978, 0.0664484053850174, 0.08069790154695511, -0.03901321813464165, -0.007630715146660805, -0.03797211870551109, 0.07682858407497406, 0.07743572443723679, -0.02675054781138897, -0.041355062276124954, 0.014098272658884525, 0.0807526707649231, 0.04469263553619385, -0.01826322078704834, 0.03495785966515541, -0.0036627331282943487, -0.04686540365219116, -0.058871179819107056, -0.07759420573711395, 0.04846547916531563, -0.16244229674339294, -0.0032853083685040474, 0.07793307304382324, -0.06228557601571083, -0.009902679361402988, -0.006271271966397762, 0.03794245049357414, -0.008210329338908195, 0.06196369603276253, 0.07978448271751404, 0.04253426939249039, -0.022035550326108932, 0.007347245700657368, -0.11693999916315079, -0.050391703844070435, 0.004121636506170034, 0.03738481551408768, -0.0634039044380188, 0.0822673812508583, -0.05666182190179825, -0.049348361790180206, -0.05150791257619858, 0.04127955809235573, 0.003631300525739789, 0.09457182139158249, -0.006067022681236267, -0.003215268487110734, 0.0428408719599247, 0.009089238941669464, -0.04503001272678375, 0.024726178497076035, 0.0035237055271863937, 0.024837084114551544, -0.01806206814944744, -0.0091510359197855, 0.04974909499287605, 0.05888715013861656, -0.07723377645015717, 0.043244924396276474, 0.032667286694049835, -0.012587268836796284, -0.09002158045768738, -0.0019699714612215757, -0.025060079991817474, 0.043803997337818146, 0.07516266405582428, 0.017516229301691055, -0.016294395551085472, 0.02184830792248249, -0.035853177309036255, -7.702985828263649e-33, 0.01316049788147211, -0.07811683416366577, 0.010309985838830471, 0.09429612755775452, 0.0712648555636406, -0.04482543468475342, -0.0510425865650177, 0.021986234933137894, 0.12544000148773193, 0.0632813572883606, 0.014252329245209694, -0.06294140219688416, -0.05642826855182648, -0.00862796325236559, -0.021559052169322968, -0.0010696941753849387, -0.050086718052625656, 0.09203329682350159, -0.020325446501374245, 0.00117536261677742, -0.01494777761399746, -0.07489822059869766, -0.02297188714146614, 0.09383600950241089, -0.016297558322548866, 0.03404628857970238, -0.03153209760785103, -0.053155072033405304, 0.043415721505880356, -0.024284815415740013, 0.018013790249824524, -0.010814560577273369, -0.029969653114676476, 0.05162125825881958, -0.02815886028110981, 0.08116905391216278, 0.002658928046002984, -0.01595219597220421, -0.03596779331564903, -0.059335529804229736, -0.07409872859716415, 0.021514153108000755, 0.002056243596598506, -0.012915191240608692, -0.022471418604254723, -0.07807386666536331, 0.030226048082113266, 0.027279077097773552, -0.04335141181945801, -0.026256125420331955, -0.002970700152218342, 0.028017055243253708, 0.027455974370241165, 0.0315331369638443, -0.06758023053407669, 0.017884863540530205, 0.08982514590024948, -0.11886433511972427, -0.012412742711603642, 0.002678625052794814, 0.035386402159929276, 0.04090924561023712, -0.02528678998351097, 0.023159988224506378, 0.007054855581372976, -0.043882858008146286, 0.02214701659977436, -0.04871002584695816, 0.02140107750892639, -0.08096781373023987, 0.012938465923070908, -0.014930683188140392, 0.01746566779911518, 0.028366439044475555, -0.02824495919048786, 0.02490057423710823, -0.04192250221967697, -0.07898099720478058, -0.042522333562374115, -0.044565100222826004, 0.16429944336414337, -0.13179397583007812, 0.00408551748842001, 0.016813931986689568, 0.03868217393755913, 0.09587941318750381, -0.07038259506225586, -0.038456790149211884, -0.06325428187847137, 0.0033697872422635555, -0.04738292843103409, -0.026518728584051132, 0.09627432376146317, -0.04734373092651367, 0.006470763590186834, 3.341121184995619e-33, 0.06790421903133392, -0.007084761280566454, -0.07161573320627213, 0.023442542180418968, 0.03316111117601395, -0.040636174380779266, -0.040959350764751434, -0.004387677181512117, -0.010227028280496597, 0.014659667387604713, -0.04070368781685829, -0.011966616846621037, 0.054533086717128754, 0.04109245166182518, 0.015351268462836742, 0.029873840510845184, 0.0919252410531044, 0.062239695340394974, -0.002479395130649209, 0.11316391080617905, -0.045896850526332855, -0.04206268861889839, 0.018216973170638084, 0.05566893890500069, -0.07507743686437607, 0.06338035315275192, 0.09920256584882736, -0.030310869216918945, -0.19897696375846863, -0.05793241783976555, 0.07108847051858902, -0.0368587002158165, -0.04166405275464058, -0.0037649061996489763, -0.011414491571485996, 0.10253505408763885, -0.0490923710167408, -0.029452962800860405, -0.03592151775956154, -0.0006701401434838772, 0.08446954935789108, 0.023004094138741493, 0.08561688661575317, 0.08908785134553909, -0.021579943597316742, 0.022215671837329865, -0.0060942829586565495, -0.03479042649269104, -0.02589450217783451, 0.05325796455144882, -0.003253191476687789, -0.012603266164660454, -0.006551320664584637, 0.031205609440803528, -0.022015191614627838, 0.01568254828453064, 0.029504787176847458, -0.046809934079647064, -0.020700251683592796, -0.01220524962991476, -0.011913057416677475, 0.020795384421944618, 0.03798164799809456, 0.0054393052123487, -0.060642559081315994, -0.034654807299375534, -0.09743833541870117, 0.04513334855437279, 0.006745648104697466, -0.009420613758265972, 0.1758183091878891, 0.03249015659093857, -0.0013361562741920352, 0.01817828230559826, -0.019379688426852226, -0.008218149654567242, 0.019812103360891342, 0.007601149380207062, -0.02760213054716587, -0.060190554708242416, -0.13174109160900116, -0.03718377649784088, -0.012397672049701214, 0.029140660539269447, -0.022885318845510483, -0.08147352188825607, 0.008746816776692867, -0.04268798232078552, 0.00536744948476553, -0.03842158615589142, -0.004545808304101229, 0.08661314845085144, 0.03427569940686226, 0.015527475625276566, 0.01100945845246315, -3.229821388117671e-08, -0.04418092221021652, 0.010255680419504642, 0.02567916549742222, -0.01406131125986576, 0.04719236120581627, -0.0419125072658062, 0.09164838492870331, 0.05662953481078148, -0.02451086789369583, -0.09155374765396118, -0.06018587201833725, 0.012361432425677776, 0.07580865919589996, 0.06421971321105957, -0.003972805570811033, 0.06625811755657196, 0.03427901118993759, -0.07430532574653625, -0.054147303104400635, 0.03826405480504036, -0.04928931966423988, -0.003559321165084839, 0.008239504881203175, 0.057934172451496124, 0.03711056709289551, 0.002759921597316861, 0.038908667862415314, 0.09134850651025772, 0.038702987134456635, 0.0967426747083664, 0.03446762636303902, -0.0039067077450454235, -0.07585572451353073, 0.03422998636960983, -0.034106846898794174, -0.008633705787360668, 0.03495276719331741, -0.047946155071258545, -0.005845851264894009, -0.02575399912893772, -0.07493282109498978, 0.03188076615333557, -0.03208998218178749, 0.03888154774904251, -0.0043100835755467415, -0.02946358174085617, -0.051936183124780655, 0.012079093605279922, -0.04552169516682625, 0.08434499800205231, 0.008902499452233315, -0.03928104788064957, 0.008869322016835213, 0.05731678009033203, 0.10480815917253494, -0.042418692260980606, 0.023773957043886185, 0.023703984916210175, -0.02355080470442772, 0.016708923503756523, 0.005330592859536409, -0.04720161482691765, -0.07361264526844025, -0.03582843765616417], "HumanEval": [-0.007729496341198683, 0.08998209983110428, -0.04775545373558998, -0.05410083383321762, -0.12296741455793381, -0.06578502058982849, 0.06186448410153389, 0.03811260312795639, -0.1053650826215744, 0.020319269970059395, -0.00653486605733633, 0.0003576515300665051, 0.01973467506468296, -0.01100825984030962, -0.06362111866474152, 0.047715406864881516, -0.1290530413389206, 0.1021936908364296, 0.056040260940790176, -0.1395556777715683, 0.03642288222908974, 0.054257411509752274, -0.0793548971414566, 0.04118383303284645, -0.010664582252502441, -0.0005107306642457843, -0.034836143255233765, 0.021529600024223328, -0.02178582362830639, -0.022522203624248505, 0.0504525862634182, -0.007846588268876076, 0.09746797382831573, 0.019878344610333443, 0.006273728795349598, 0.04395798593759537, -0.0513124093413353, -0.017354287207126617, 0.002070463728159666, 0.027345793321728706, -0.05329691991209984, -0.08053044974803925, 0.02448301762342453, -0.03293590620160103, -0.007393393199890852, 0.0387447364628315, 0.00874392781406641, 0.11941704899072647, 0.026801778003573418, -0.008243524469435215, 0.008153355680406094, 0.03891175240278244, -0.10078367590904236, -0.07063949108123779, 0.04100658372044563, -0.008524280041456223, 0.019262349233031273, -0.11597604304552078, -0.037379056215286255, -0.03695357218384743, -0.0101371044293046, 0.0006233407184481621, 0.02065122500061989, -0.035695336759090424, 0.07148923724889755, -0.015699589625000954, 0.021794497966766357, 0.010479139164090157, 0.04436833783984184, 0.07193031907081604, -0.03482380881905556, 0.055956702679395676, -0.014347619377076626, 0.054140228778123856, -0.01125784870237112, 0.02341117523610592, -0.004329038318246603, -0.050800636410713196, -0.05406854674220085, 0.004020418040454388, -0.05113352835178375, -0.02111617475748062, 0.07505093514919281, 0.03820885345339775, 0.023867139592766762, -0.017430145293474197, -0.030760403722524643, 0.08012025058269501, 0.026225775480270386, -0.0006696267519146204, -0.0328700989484787, 0.028764044865965843, 0.03600384294986725, -0.07576756179332733, -0.04504099115729332, 0.05397498607635498, 0.05544697865843773, -0.07460180670022964, -0.022140540182590485, 0.11598112434148788, -0.09814740717411041, 0.04896226152777672, -0.0005355972098186612, -0.1249806135892868, -0.032106660306453705, 0.07409489154815674, -0.009393886663019657, -0.02130652219057083, -0.007222390733659267, -0.0967973917722702, -0.005955843720585108, 0.01928670145571232, 0.013448694720864296, 0.008533846586942673, 0.04560273885726929, -0.05658974125981331, 0.027624335139989853, 0.010890614241361618, 0.0241763386875391, 0.021337660029530525, 0.09562446922063828, -0.0019482234492897987, -0.02062121033668518, 0.06717024743556976, -0.05420893803238869, -0.0452033206820488, -0.04120071604847908, -6.285306580393353e-33, -0.037157896906137466, 0.02352285012602806, 0.09714772552251816, -0.018617454916238785, -0.049327000975608826, 0.006349178962409496, -0.012734032236039639, -0.016918066889047623, -0.12709902226924896, 0.0655074343085289, -0.012754876166582108, 0.007537232246249914, 0.05274837464094162, 0.0049898759461939335, 0.02366279810667038, -0.014261906035244465, 0.07636595517396927, 0.010622438974678516, -0.02588445320725441, -0.09290402382612228, -0.024909960106015205, 0.010695483535528183, 0.015213361009955406, -0.06264086067676544, 0.030019966885447502, -0.004018775187432766, -0.003909466788172722, -0.08430969715118408, 0.07208764553070068, 0.008843592368066311, -0.02892947942018509, -0.00885207112878561, -0.031321607530117035, -0.04152520373463631, 0.07334920018911362, 0.010040421970188618, -0.014202799648046494, -0.01171022281050682, 0.02698930725455284, -0.01641668751835823, 0.020968515425920486, -0.02108272723853588, 0.020617567002773285, 0.042720548808574677, 0.062278155237436295, -0.03197715803980827, 0.005120414774864912, 0.05980057269334793, 0.047620195895433426, 0.04209491237998009, -0.0018965547205880284, -0.04110369458794594, -0.03288106992840767, 0.08287674188613892, -0.013839582912623882, 0.010116307996213436, 0.032967835664749146, 0.05902283638715744, 0.02179817110300064, 0.05763262137770653, 0.05491102114319801, 0.03299953415989876, 0.0006009613280184567, 0.02367084100842476, -0.08636894077062607, -0.05624132603406906, 0.03536467254161835, -0.004673602990806103, 0.03882945701479912, 0.08221349865198135, 0.016767723485827446, 0.030235419049859047, -0.020324861630797386, -0.0027942510787397623, -0.06949194520711899, -0.03390704095363617, -0.01712452992796898, -0.09921405464410782, -0.0731089785695076, -0.10556907951831818, 0.06874190270900726, 0.020714933052659035, 0.010616801679134369, 0.011837707832455635, 0.044817715883255005, -0.06149494647979736, -0.02014506608247757, -0.004846733994781971, -0.04615921899676323, -0.019329866394400597, -0.14011506736278534, -0.03983568027615547, 0.04893610253930092, -0.089154914021492, 0.023490464314818382, 1.497825554735313e-33, 0.030992045998573303, 0.039087023586034775, 0.08265721052885056, 0.013482446782290936, -0.04753795266151428, -0.10833771526813507, 0.011571165174245834, -0.04154051095247269, -0.02824830636382103, 0.08700937032699585, -0.025361526757478714, -0.027638981118798256, 0.0943107157945633, -0.04672678932547569, 0.09177259355783463, 0.05079028755426407, -0.0031113990116864443, 0.028381170704960823, 0.01908993534743786, 0.03809844329953194, -0.017256807535886765, 0.09618771821260452, 0.00865720771253109, 0.0458628311753273, -0.049474671483039856, 0.01856064237654209, 0.027953749522566795, -0.02682146057486534, 0.0561695471405983, 0.010132930241525173, -0.07146652042865753, 0.02728383056819439, 0.036398131400346756, -0.020614780485630035, -0.021203655749559402, -0.046354182064533234, 0.05382845178246498, -0.02013034000992775, -0.007540319114923477, -0.03285427391529083, 0.04958192631602287, 0.07343121618032455, 0.07037033885717392, -0.0008542010327801108, 0.053131334483623505, -0.0028940446209162474, -0.028178174048662186, 0.07873832434415817, 0.0074536362662911415, -0.01670694537460804, -0.03193724900484085, 0.009000126272439957, -0.08454228192567825, 0.0028048763051629066, -0.00022303515288513154, 0.011102340184152126, -0.05294208601117134, 0.029229631647467613, 0.05466700345277786, 0.01831141486763954, -0.05252069607377052, -0.08309996873140335, 0.10183496028184891, -0.012970758602023125, 0.04283531755208969, -0.08580417186021805, -0.0861060842871666, -0.04785570129752159, -0.0815579816699028, 0.05261586233973503, 0.08851093798875809, 0.07126002013683319, -0.0076805842109024525, -0.008404435589909554, -0.09369491040706635, -0.02181495539844036, -0.026451019570231438, 0.01061464473605156, 0.0020171264186501503, 0.10930249840021133, -0.056753940880298615, 0.021254269406199455, 0.005148415919393301, 0.0163993202149868, -0.08761656284332275, -0.07148130238056183, 0.10463918745517731, 0.061964940279722214, -0.0036926413886249065, -0.027073202654719353, -0.0037121016066521406, 0.011870997957885265, 0.07678266614675522, -0.04055652394890785, 0.011467053554952145, -3.254865177382271e-08, -0.02433355711400509, -0.12190411239862442, -0.02336541749536991, 0.022777996957302094, 0.02651223912835121, 0.029295574873685837, -0.017648732289671898, -0.05685610696673393, -0.017145706340670586, -0.16941937804222107, 0.058190349489450455, -0.006166383624076843, 0.010976002551615238, 0.019920511171221733, 0.0598427839577198, 0.07213930785655975, 0.04823511838912964, -0.041125744581222534, -0.05381886288523674, 0.0672491267323494, -0.009617670439183712, 0.055354323238134384, -0.010876727290451527, 0.04961283132433891, -0.08191806823015213, -0.015107084065675735, -0.024667901918292046, -0.011608319357037544, 0.06135302782058716, 0.015525002963840961, 0.06489820033311844, 0.050018683075904846, 0.00016940082423388958, -0.012098181061446667, -0.06117818504571915, 0.013310526497662067, -0.023381438106298447, 0.06735971570014954, -0.011238877661526203, -0.03965618833899498, 0.005312108900398016, 0.03158596158027649, -0.08481810241937637, 0.004098784178495407, 0.049703411757946014, -0.10397516191005707, 0.030704326927661896, -0.03747853636741638, -0.007524378132075071, -0.05807962641119957, -0.05038910731673241, -0.0015451572835445404, 0.023387858644127846, -0.08369480818510056, -0.016904795542359352, -0.00894987303763628, -0.01821691356599331, 0.00601901626214385, -0.009509739466011524, 0.052596256136894226, 0.021974904462695122, 0.012852486222982407, 0.10714749991893768, -0.015678387135267258], "SWE-bench": [-0.06119183450937271, 0.06605487316846848, -0.034891918301582336, -0.040058352053165436, -0.05863441899418831, -0.033880867063999176, 0.014311187900602818, 0.026339633390307426, -0.08415940403938293, 0.05889486148953438, 0.022523140534758568, -0.0829627588391304, -0.012209421955049038, -0.01527806743979454, -0.03247154504060745, 0.04519903287291527, -0.0247608944773674, -0.03015226684510708, 0.02103612944483757, 0.008022860623896122, -0.02077859826385975, 0.04297482222318649, -0.06308887898921967, 0.08322104066610336, -0.010901461355388165, -0.045527227222919464, -0.005413430742919445, -0.0020253409165889025, -0.04910057410597801, -0.053554993122816086, 0.07436717301607132, 0.014892754144966602, -0.07474058866500854, 0.021431250497698784, 0.0471564382314682, 0.09704649448394775, 0.06341274827718735, -0.08702816069126129, 0.05899639055132866, -0.053701870143413544, 0.008184229955077171, 0.0038567576557397842, 0.025811249390244484, 0.012933570891618729, -0.007352546788752079, 0.004784147720783949, -0.026065891608595848, -0.012459931895136833, 0.02178092487156391, -0.04557172954082489, -0.031109564006328583, -0.013950083404779434, -0.0479893684387207, -0.09858345240354538, -0.005368389189243317, -0.019369354471564293, -0.05348728224635124, 0.009912332519888878, 0.0024333884939551353, 0.033193401992321014, 0.07924076169729233, -0.020136525854468346, 0.03253675624728203, 0.008160748519003391, 0.015941962599754333, -0.05075196549296379, 0.042966920882463455, -0.034565962851047516, 0.03655977547168732, 0.01462488528341055, -0.037906464189291, 0.05044543370604515, -0.1415078043937683, 0.0029449593275785446, -0.002341989893466234, 0.0439455583691597, -0.07865757495164871, -0.032564908266067505, 0.016876213252544403, -0.08747760206460953, 0.001703260699287057, -0.05765511468052864, -0.03523021563887596, 0.11110348999500275, 0.043821148574352264, 0.03759227320551872, 0.029198547825217247, 0.00571060786023736, 0.038752857595682144, -0.019768642261624336, -0.00987811479717493, -0.03352499008178711, 0.035799428820610046, -0.011997140944004059, 0.06323594599962234, 0.06612718105316162, -0.0787743553519249, -0.05979183688759804, -0.026572659611701965, 0.0939713791012764, -0.013891401700675488, 0.04216416925191879, 0.0016791722737252712, -0.07326937466859818, -0.054010067135095596, -0.014644364826381207, 0.059015437960624695, 0.07427514344453812, -0.03859100118279457, -0.008343294262886047, 0.0075095645152032375, 0.03634988144040108, -0.06594176590442657, -0.016442706808447838, 0.006870273966342211, 0.07928472757339478, 0.05035524070262909, -0.020974846556782722, -0.004583925008773804, 0.02555171027779579, 0.07446995377540588, 0.03815583139657974, -0.07409222424030304, -0.01410727296024561, 0.012320039793848991, -0.06759822368621826, 0.08750032633543015, -5.6110815916700754e-33, -0.024766942486166954, 0.038041286170482635, 0.04802849143743515, 0.050908613950014114, 0.07321181893348694, 0.021845770999789238, 0.04324893280863762, -0.04720652103424072, -0.12700782716274261, -0.03297591954469681, -0.00665879575535655, 0.012039636261761189, -0.005353152751922607, -0.03796185180544853, -0.0165646281093359, -0.09220793098211288, 0.0687076598405838, 0.068301260471344, 0.03683500736951828, 0.014691759832203388, 0.03767955303192139, -0.055461883544921875, -0.02953152172267437, 0.005203336011618376, 0.06319398432970047, -0.00471660029143095, -0.008429653011262417, 0.05537952482700348, 0.02448245696723461, -0.015508349984884262, 0.04985160008072853, 0.030670996755361557, -0.033088911324739456, 0.08296696841716766, 0.05535468831658363, 0.012455868534743786, 0.02465364895761013, 0.0017073244089260697, -0.09674452990293503, -0.042551394551992416, -0.015872284770011902, 0.04798600822687149, -0.10358131676912308, 0.0038036892656236887, 0.06137034296989441, -0.027582114562392235, 0.0302576944231987, -0.0444006584584713, 0.09680386632680893, 0.05415166914463043, -0.0522543340921402, 0.03870248794555664, -0.006954648997634649, 0.02630600333213806, 0.023135695606470108, -0.06401136517524719, 0.04106828570365906, 0.013822563923895359, -0.008224312216043472, 0.01799803599715233, 0.03275371715426445, -0.034765008836984634, -0.04332365095615387, -0.06528795510530472, 0.06222572922706604, -0.03595559298992157, -0.043839991092681885, -0.026738544926047325, 0.08209039270877838, -0.047683149576187134, -0.011953371576964855, -0.03324858471751213, 0.0828319862484932, 0.06636785715818405, -0.01810270920395851, -0.023873744532465935, -0.04754997044801712, 0.04885207861661911, -0.043161604553461075, -0.10117213428020477, 0.03226548433303833, -0.07397697865962982, -0.042321350425481796, 0.029080074280500412, 0.008827867917716503, -0.007387903984636068, -0.03285110741853714, 0.021342763677239418, -0.013165776617825031, 0.15233869850635529, -0.07203944772481918, 0.0721520259976387, 0.032355859875679016, 0.05258305370807648, -0.04228246212005615, 2.3903161049305328e-34, -0.08774587512016296, -0.07437098771333694, 0.03406284749507904, -0.018837274983525276, -0.011106537654995918, -0.028735605999827385, 0.008366708643734455, 0.021147673949599266, -0.024372028186917305, 0.0882376953959465, 0.0662117749452591, -0.04236219823360443, 0.044543568044900894, 0.015839291736483574, -0.008910486474633217, 0.040187768638134, 0.037630919367074966, -0.02402631938457489, 0.058740049600601196, 0.03842049464583397, 0.03282532840967178, 0.20390839874744415, 0.04116528481245041, 0.056583553552627563, -0.08945135027170181, 0.11175478994846344, -0.04867979511618614, -0.018658816814422607, 0.009211991913616657, -0.022831009700894356, 0.012714985758066177, -0.04370484501123428, -0.04693177342414856, 0.016809256747364998, 0.034915950149297714, -0.0899026095867157, 0.08895063400268555, 0.11167878657579422, -0.015811150893568993, -0.04521499201655388, 0.08388065546751022, -0.02652902342379093, -0.04287227615714073, -0.023317724466323853, 0.0674193724989891, -0.08518823981285095, 0.06603744626045227, -0.02453923411667347, -0.04634174704551697, 0.014505206607282162, -0.07124800980091095, -0.10035532712936401, -0.0015488936332985759, -0.035674139857292175, 0.008537080138921738, 0.040477361530065536, 0.08683846145868301, -0.05434277653694153, 0.023881228640675545, -0.022026540711522102, 0.015168225392699242, 0.02931133285164833, -0.04202805459499359, -0.0032961268443614244, 0.08857748657464981, -0.09219621121883392, -0.029591919854283333, 0.018535837531089783, -0.026737216860055923, 0.05807489529252052, -0.03553122654557228, 0.009946994483470917, 0.008970987051725388, -0.03622156381607056, 0.07484171539545059, -0.10095307976007462, -0.05799892544746399, -0.07806400209665298, 0.03647001087665558, 0.09852278977632523, -0.07197602838277817, 0.006203506141901016, -0.00517955981194973, 0.05036972835659981, -0.04823427274823189, -0.044722605496644974, 0.04874073341488838, 0.029096653684973717, -0.0031534924637526274, -0.017704851925373077, -0.07501582056283951, -0.006743432022631168, 0.008896894752979279, -0.05372167006134987, -0.017771178856492043, -4.383223028980865e-08, -0.020787907764315605, -0.06438622623682022, -0.09642699360847473, 0.06486774235963821, 0.05912063270807266, -0.032245438545942307, -0.048604320734739304, 0.04657365381717682, 0.012112243101000786, -0.0048780180513858795, -0.022931022569537163, -0.0023079284001141787, 0.014689850620925426, 0.06688582897186279, -0.006319235544651747, -0.08024619519710541, -0.059286318719387054, 0.04406061768531799, -0.03954297676682472, -0.10232942551374435, -0.04522315040230751, 0.012174480594694614, 0.09900424629449844, -0.060468703508377075, -0.042725998908281326, 0.040531475096940994, 0.09879589080810547, 0.1104784682393074, -0.027258997783064842, -0.01607237569987774, -0.060325853526592255, 0.04720179736614227, -0.05609147250652313, -0.025106776505708694, -0.10986736416816711, -0.03498772531747818, 0.0292802806943655, 0.0499170683324337, 0.02810540236532688, -0.013222316280007362, -0.014087858609855175, 0.06748630106449127, -0.003972173668444157, 0.06221802160143852, 0.06843477487564087, -0.023309219628572464, 0.005388234276324511, 0.01729436218738556, 0.02802581898868084, -0.048297446221113205, -0.05063625052571297, -0.001663834322243929, -0.0011127396719530225, 0.07335573434829712, 0.01095141563564539, 0.01663501188158989, -0.01757734827697277, -0.010294045321643353, 0.030884936451911926, 0.0667371153831482, 0.02240404300391674, -0.06302506476640701, -0.0009959758026525378, -0.002241928828880191], "TruthfulQA": [0.04595303162932396, -0.007818949408829212, 0.0028358539566397667, 0.10353591293096542, 0.016881484538316727, 0.004694577772170305, 0.11250774562358856, 0.022069569677114487, -0.005827769637107849, 0.010848847217857838, -0.017709068953990936, 0.04536321014165878, -0.012845244258642197, 0.06828587502241135, 0.026259783655405045, 0.015324447304010391, 0.04745206609368324, 0.04595789313316345, 0.003626553574576974, 0.021296082064509392, 0.024961762130260468, -0.015784921124577522, 0.04469209536910057, -0.049163755029439926, -0.019661342725157738, 0.009112468920648098, -0.004895385354757309, -0.06990984827280045, -0.023333223536610603, -0.047667618840932846, -0.06903540343046188, -0.014411130920052528, -0.008375375531613827, -0.04527277871966362, -0.024244019761681557, 0.010234560817480087, 0.09646964073181152, 0.04833448678255081, -0.02012959122657776, 0.02116052806377411, 0.044538937509059906, -0.03217862546443939, 0.050717681646347046, -0.046757809817790985, 0.012145974673330784, 0.019579341635107994, -0.0023856721818447113, -0.05024371296167374, -0.003468360984697938, -0.00166211964096874, -0.06825528293848038, -0.007189035415649414, 0.06345100700855255, 0.04555412381887436, 0.09180114418268204, 0.021409273147583008, -0.03613034635782242, -0.05047006532549858, 0.05502437427639961, 0.05357181653380394, -0.03077639825642109, 0.031255874782800674, -0.00937849935144186, 0.03653280436992645, 0.10093755275011063, 0.03113482892513275, -0.04874497652053833, -0.1001376286149025, 0.04300176352262497, -0.008486994542181492, 0.008121349848806858, 0.03695607930421829, 0.049227021634578705, -0.02494056150317192, 0.0007059687632136047, 0.05368923395872116, 0.03441232070326805, -0.048054251819849014, 0.008855104446411133, 0.02896660938858986, -0.012917308136820793, 0.013472914695739746, 0.03426998853683472, 0.007635526359081268, 0.04498866945505142, 0.013500955887138844, 0.06415722519159317, 0.024748267605900764, -0.14771027863025665, 0.016747765243053436, 0.02336234413087368, -0.06021253019571304, -0.06584413349628448, 0.004226080607622862, -0.05146322399377823, -0.02405586466193199, -0.03797442838549614, -0.060695480555295944, -0.13126930594444275, -0.036636993288993835, -0.01536597590893507, 0.012198284268379211, 0.0007094581960700452, 0.014836336486041546, 0.03800220787525177, -0.01042496133595705, -0.0298679918050766, 0.008216582238674164, 0.01311268750578165, 0.013484237715601921, -0.01410865131765604, 0.029418019577860832, 0.030194949358701706, 0.05786088854074478, -0.00626171613112092, -0.009166073985397816, 0.022155476734042168, -0.03238324820995331, -0.04667636752128601, -0.004478459246456623, 0.03386862948536873, -0.03543040156364441, 0.07777094095945358, -0.005649417173117399, 0.013088461942970753, -0.13545240461826324, -0.07510844618082047, -1.0349124263668407e-32, 0.0021666146349161863, -0.0245139729231596, 0.008691691793501377, -0.01701204851269722, 0.05531471222639084, -0.02537880279123783, -0.0070512378588318825, 0.023267900571227074, 0.07498754560947418, 0.0068680583499372005, -0.025460636243224144, -0.014655359089374542, 0.032430555671453476, 0.019053954631090164, 0.045873675495386124, 0.05976225063204765, -0.09260793030261993, 0.03348873183131218, -0.03567028045654297, -0.031436942517757416, -0.06515685468912125, 0.0148897934705019, 0.03268112614750862, 0.005601484328508377, -0.05269273743033409, 0.015876203775405884, -0.06632473319768906, 0.013550857082009315, 0.009540437720716, 0.005474245175719261, -0.00943682249635458, -0.0019658419769257307, -0.06423735618591309, 0.04026945307850838, 0.05064650624990463, 0.10270518809556961, 0.06786856800317764, 0.027416739612817764, -0.05449691414833069, 0.03492584079504013, -0.010764370672404766, -0.012476654723286629, -0.010483653284609318, 0.011025937274098396, 0.01962398551404476, -0.023049497976899147, -0.03653493896126747, -0.04371108487248421, -0.129096120595932, -0.0760459452867508, 0.04281482845544815, 0.00616855314001441, -0.00782084558159113, 0.00703478092327714, 0.046938635408878326, 0.07607845216989517, 0.03608620539307594, -0.00823106151074171, 0.060192324221134186, 0.0529341846704483, -0.07527817785739899, 0.007337657734751701, -0.02654106169939041, 0.032527949661016464, 0.021931782364845276, 0.03784264996647835, 0.028249477967619896, 0.004266418516635895, -0.05945809558033943, -0.015181639231741428, 0.058869265019893646, -0.0447932630777359, -0.03167553246021271, -0.06771070510149002, -0.07500401139259338, -0.019828088581562042, 0.04851267486810684, 0.04668055474758148, -0.017727121710777283, -0.0027068881317973137, 0.06613554060459137, -0.08411713689565659, 0.08345018327236176, 0.04284666106104851, -0.06395470350980759, 0.06454746425151825, 0.00492965430021286, -0.06527666747570038, 0.0037528120446950197, -0.030066506937146187, -0.032530527561903, -0.06217357888817787, 0.025854216888546944, -0.10519680380821228, -0.20971043407917023, 6.546262652856055e-33, -0.07578551769256592, -0.09410455822944641, 0.006450847256928682, 0.044800397008657455, -0.028009315952658653, -0.017460277304053307, 0.05646020174026489, 0.08455181866884232, -0.06572341918945312, 0.02040819264948368, 0.09042315185070038, 0.049204692244529724, -0.040583621710538864, 0.021404681727290154, 0.12513810396194458, -0.030298009514808655, 0.08898431062698364, 0.14604593813419342, -0.01954967901110649, -0.09216956794261932, -0.0747537836432457, 0.019399123266339302, -0.017560945823788643, 0.0143629414960742, -0.020456556230783463, 0.12364283204078674, -0.027232995256781578, -0.007459125015884638, -0.13004936277866364, -0.03081965260207653, 0.07326164841651917, 0.06848343461751938, -0.08661976456642151, -0.02819490060210228, 0.009075775742530823, 0.03094024956226349, -0.00472122710198164, -0.02094523049890995, -0.0444832406938076, 0.04522579908370972, 0.0021463038865476847, 0.05206427350640297, 0.04256472736597061, 0.09388504922389984, 0.003503494430333376, 0.032180141657590866, 0.06312992423772812, 0.026632841676473618, -0.03713983669877052, 0.07555390149354935, 0.05537785589694977, 0.05070807412266731, -0.04202551767230034, -0.08527097851037979, -0.02239031344652176, -0.0019355574622750282, 0.0033234781585633755, -0.03873439505696297, 0.05543498322367668, -0.015423072502017021, 0.010399929247796535, 0.03429875895380974, -0.07416035979986191, -0.08247976750135422, 0.05464976653456688, 0.035890597850084305, -0.07142456620931625, 0.056452203541994095, 0.07587701827287674, -0.04016384854912758, 0.1772535890340805, -0.03320757672190666, -0.04681450501084328, 0.022008009254932404, -0.07200919091701508, 0.05054139718413353, 0.04839306324720383, 0.037107594311237335, -0.012698899023234844, 0.03440632298588753, -0.12421925365924835, -0.008788908831775188, 0.030788732692599297, -0.039552632719278336, -0.04966430366039276, -0.07756359130144119, -0.02733250893652439, -0.04788300767540932, -0.03019515983760357, 0.03588175028562546, -0.06271176785230637, -0.016979297623038292, -0.042202189564704895, -0.05227828770875931, 0.043559927493333817, -3.2743269429147404e-08, 0.03707258030772209, 0.0868029072880745, 0.08948438614606857, 0.047268643975257874, -0.04765463247895241, 0.01872778683900833, 0.051115959882736206, 0.05227755010128021, -0.07297828793525696, 0.030228707939386368, 0.05057508870959282, 0.04184780642390251, 0.028412552550435066, 0.04238585755228996, 0.025023018941283226, 0.004025445319712162, 0.09544625878334045, -0.021926777437329292, -0.06228072568774223, -0.019197218120098114, 0.025221826508641243, 0.05164221301674843, 0.019921904429793358, 0.033889517188072205, 0.01902015134692192, -0.040750619024038315, 0.027071300894021988, 0.03877970576286316, 0.022584283724427223, -0.01776486076414585, 0.015221049077808857, 0.0019033831777051091, -0.019876711070537567, 0.0007525543333031237, -0.007623054087162018, -0.09117905050516129, 0.036908119916915894, -0.014095978811383247, -0.02213343046605587, -0.037967123091220856, -0.054811060428619385, -0.016818126663565636, 0.023570574820041656, 0.1005927175283432, -0.11033426970243454, -0.06008648872375488, 0.004768830724060535, -0.04731794819235802, 0.016709674149751663, -0.020512912422418594, -0.07674796134233475, 0.0669848844408989, 0.029046006500720978, -0.030288293957710266, -0.07024654000997543, -0.034110117703676224, 0.009409228339791298, -0.03217627853155136, -0.067441925406456, -0.08657589554786682, 0.09513776004314423, -0.009204956702888012, 0.03354552760720253, 0.01251243706792593], "ARC": [-0.021968677639961243, -0.02693609520792961, 0.06413711607456207, 0.07788605988025665, 0.021299144253134727, -0.006079409271478653, 0.012116299010813236, -0.033755142241716385, 0.10511984676122665, 0.048871345818042755, 0.013141502626240253, -0.09040311723947525, -0.05906812846660614, -0.04487622156739235, -0.02449665404856205, -0.034112319350242615, -0.06660426408052444, 0.02414899878203869, -0.009243074804544449, -0.02452966943383217, 0.02235555648803711, -0.015289642848074436, -0.053359415382146835, -0.011017316952347755, -0.06395924836397171, 0.05360179394483566, -0.04317734017968178, -0.0073012011125683784, 0.010491390712559223, -0.07065685838460922, -0.015925174579024315, 0.04762650281190872, -0.007778290193527937, 0.012114124372601509, -0.08411930501461029, 0.04996567964553833, 0.02247791364789009, -0.0023172106593847275, -0.028950592502951622, -0.003618988674134016, -0.06179051846265793, -0.09313230961561203, 0.029985368251800537, 0.03187797963619232, 0.06104598566889763, 0.022394666448235512, 0.08523263037204742, 0.01605568826198578, 0.036609772592782974, -0.00816687848418951, -0.059355318546295166, -0.04182426631450653, -0.10908915102481842, 0.043462879955768585, -0.04283547401428223, 0.054530054330825806, 0.012767121195793152, 0.03478445112705231, 0.07417500764131546, -0.04707954078912735, 0.04015183448791504, 0.021366935223340988, -0.10124818235635757, 0.07032708823680878, 0.13368038833141327, -0.03802692890167236, -0.009041017852723598, -0.02504105493426323, 0.02914362959563732, -0.07111874967813492, 0.09340380877256393, 0.06831856817007065, -0.04983550310134888, -0.04486720263957977, 0.011933594942092896, -0.05153864622116089, -0.012184828519821167, 0.007929923012852669, 0.019802160561084747, 0.025624515488743782, -0.1020577996969223, 0.026248648762702942, -0.05466719716787338, 0.03516915813088417, 0.04763607308268547, 0.0330812931060791, 0.08136530965566635, -0.04463087022304535, -0.12761642038822174, 0.03605715557932854, 0.018351582810282707, -0.008519106544554234, -0.12498946487903595, -0.015060864388942719, -0.07597808539867401, 0.05513586103916168, 0.0492049902677536, -0.032636214047670364, 0.07988985627889633, 0.026291267946362495, 0.030939657241106033, 0.04234062135219574, -0.0027418388053774834, 0.10414979606866837, 0.01467320416122675, -0.0726470798254013, 0.04392528161406517, 0.04116857051849365, 0.005408392287790775, 0.057213373482227325, -0.02874026447534561, -0.039018139243125916, -0.0007540472433902323, 0.05065145716071129, -0.0024420819245278835, -0.05084623023867607, -0.059540197253227234, 0.03504997119307518, -0.0833115205168724, 0.011385097168385983, 0.037042707204818726, 0.02998659946024418, -0.03614901006221771, 0.03248317539691925, 0.00040190902655012906, -0.004679516889154911, -0.07064148038625717, -1.1625415785705767e-32, 0.05124379321932793, -0.08703991025686264, 0.04614727199077606, 0.026532789692282677, 0.06944990903139114, -0.07665862143039703, -0.06095371022820473, -0.02051924727857113, 0.00991523265838623, 0.09434852749109268, -0.045620713382959366, 0.03679198399186134, 0.03264910355210304, 0.02902287244796753, 0.027937451377511024, -0.05267855152487755, -0.1373176872730255, 0.02688557095825672, -0.05743847042322159, 0.0021749339066445827, -0.06644778698682785, 0.06836291402578354, 0.006042334251105785, -0.06362897157669067, -0.020613204687833786, 0.013872409239411354, -0.018619872629642487, 0.006939527578651905, -0.04372589290142059, 0.005246744956821203, 0.08529800921678543, -0.026286950334906578, -0.03861995041370392, 0.05389248952269554, 0.05975552275776863, 0.04013651981949806, 0.024893881753087044, -0.04567320644855499, 0.025460639968514442, -0.036035116761922836, 0.0037165351677685976, -0.08460020273923874, -0.038602374494075775, 0.01817196235060692, 0.11659708619117737, -0.033803317695856094, 0.03812786191701889, -0.003207068657502532, -0.03995618224143982, 0.021882658824324608, 0.04570374637842178, -0.03254964202642441, -0.020175646990537643, 0.04620221257209778, 0.05948600172996521, 0.04938996210694313, 0.01476927101612091, -0.04883434623479843, -0.1005559042096138, 0.03240847587585449, 0.016419939696788788, 0.051486119627952576, 0.03132321685552597, 0.03502565249800682, 0.038609642535448074, 0.016761548817157745, 0.020839696750044823, -0.001280197175219655, 0.008727168664336205, -0.04594658687710762, -0.04094752296805382, -0.005598265212029219, -0.007075964007526636, 0.030197374522686005, -0.07476121187210083, 0.018653474748134613, -0.06036181002855301, -0.029368579387664795, 0.008628757670521736, 0.049971383064985275, -0.08885551244020462, -0.04356350749731064, 0.005720042157918215, -0.05460247024893761, -0.0916704535484314, -0.01519815530627966, 0.02805326133966446, -0.0548865869641304, 0.06105140224099159, -0.05367593839764595, -0.03328355401754379, -0.04190479591488838, 0.06750214099884033, -0.12439300864934921, -0.010398305021226406, 4.036970494328227e-33, -0.05796453729271889, -0.06884165853261948, -0.04184059798717499, 0.012631706893444061, -0.02412705309689045, 0.007809229660779238, -0.05442344769835472, 0.04639289900660515, 0.004298578482121229, 0.04471438750624657, -0.008876172825694084, 0.010156962089240551, 0.027504034340381622, -0.032379791140556335, 0.0004417065065354109, -0.03128834068775177, 0.030908135697245598, 0.09359222650527954, 0.06292646378278732, -0.03482755273580551, -0.044323038309812546, 0.1342277228832245, 0.00676347641274333, 0.009553862735629082, -0.04078486189246178, 0.03291148692369461, -0.03489098697900772, 0.006999606732279062, -0.038123417645692825, 0.009708809666335583, -0.04598184674978256, 0.021851850673556328, -0.07560209184885025, 0.005339677911251783, -0.04139863699674606, 0.04888981953263283, 0.020727436989545822, 0.018636856228113174, -0.020685140043497086, 0.004298807121813297, -0.0291716530919075, -0.02782103233039379, 0.17333003878593445, 0.023895354941487312, 0.03228601813316345, 0.0076743001118302345, 0.03524993732571602, 0.13173730671405792, -0.0008634194964542985, -0.011116552166640759, 0.053491685539484024, 0.004526741337031126, 0.05318710580468178, -0.01925576850771904, 0.06613451987504959, 0.015429900959134102, 0.02169746160507202, -0.002412831876426935, 0.020112507045269012, 0.026923147961497307, 0.04876108840107918, -0.04721130430698395, 0.04862119257450104, 0.01216871477663517, -0.007081415969878435, 0.016667088493704796, -0.029458070173859596, 0.041060641407966614, 0.04461681842803955, -0.013076329603791237, 0.09479865431785583, 0.03247249498963356, 0.019200820475816727, -0.11223308742046356, -0.03071310557425022, -0.02864798530936241, -0.008698762394487858, -0.024371780455112457, -0.04155799373984337, 0.021032918244600296, -0.12267698347568512, 0.05803162604570389, -0.009084227494895458, -0.049627602100372314, -0.09825063496828079, -0.08932630717754364, -0.03654390573501587, -0.1010570302605629, -0.010656667873263359, 0.05165766924619675, -0.02814752236008644, -0.002411233726888895, -0.0017019592924043536, -0.028889505192637444, -0.05963229760527611, -3.1582253257056436e-08, 0.05123499780893326, 0.06069882959127426, 0.07991400361061096, -0.06103583052754402, 0.03086182102560997, 0.06573731452226639, 0.10785094648599625, 0.04165523499250412, -0.007670554332435131, -0.0843496099114418, -0.015710053965449333, 0.035289447754621506, 0.05196172744035721, 0.03602985665202141, 0.13619503378868103, 0.03286603465676308, -0.05051164701581001, -0.01376847829669714, -0.06849290430545807, -0.04867766052484512, 0.01403781771659851, -0.037660013884305954, 0.016611389815807343, 0.0628763884305954, 0.06896436214447021, 0.011716059409081936, 0.053567420691251755, 0.09966044127941132, 0.0007186679285950959, 0.06844452768564224, 0.028878385201096535, -0.014848100021481514, -0.005703559145331383, -0.06929294019937515, -0.044265542179346085, -0.02217901311814785, -0.04003399983048439, -0.025274455547332764, 0.00499209389090538, -0.12422394007444382, -0.01700678840279579, 0.10006009042263031, -0.056963544338941574, 0.009277536533772945, 0.004444205202162266, 0.03715236857533455, 0.0163711067289114, -0.0030346722342073917, -0.02049935981631279, 0.04360697790980339, -0.04936237260699272, 0.016983412206172943, 0.06815265864133835, -0.07977098226547241, -0.04333933815360069, 0.03772713616490364, -0.010994861833751202, -0.054565515369176865, 0.0057744518853724, 0.04530562832951546, -0.006043548695743084, -0.0021870797500014305, 0.1059885248541832, -0.001558526768349111], "GSM8K": [0.06393769383430481, 0.09055498242378235, -0.06877076625823975, 0.06864549219608307, -0.05705302208662033, -0.04169819876551628, -0.022402115166187286, 0.0664176195859909, -0.034481994807720184, 0.01581716351211071, 0.03109746426343918, -0.06285522878170013, -0.04242316260933876, 0.015027842484414577, -0.03730902448296547, 0.000141530079417862, -0.03833000734448433, 0.011080393567681313, -0.11589910835027695, -0.0523734912276268, 0.07349243015050888, -0.09517297148704529, 0.011362873017787933, 0.0425444096326828, 0.1137787327170372, 0.046024490147829056, -0.02795039303600788, -0.054848335683345795, 0.004094450268894434, -0.04054725170135498, -0.07406529039144516, 0.03853246569633484, 0.08319615572690964, -0.05173729360103607, -0.05440559983253479, -0.07298894226551056, -0.04310118779540062, -0.0005017796065658331, 0.03422948345541954, 0.0415908619761467, -0.04712287336587906, 0.011104068718850613, -0.01416752953082323, 0.03721209242939949, -0.01666567102074623, -0.0021298849023878574, -0.033612728118896484, 0.10166511684656143, 0.13949091732501984, 0.04367721453309059, -0.013041099533438683, 0.014928043819963932, -0.16997310519218445, 0.02165859006345272, 0.06588131934404373, 0.01934334635734558, -0.048266276717185974, 0.01610022969543934, 0.01530102826654911, 0.061628907918930054, -0.024447161704301834, -0.04738014191389084, -0.030050188302993774, 0.003742696950212121, 0.05636317655444145, -0.06038173288106918, -0.133500337600708, -0.028500782325863838, -0.10849406570196152, -0.0008918385137803853, -0.008240283466875553, 0.03759137913584709, 0.032488562166690826, -0.1264391541481018, 0.04439857602119446, -0.08444145321846008, 0.07947538793087006, -0.01153213158249855, 0.01626015082001686, -0.03978529945015907, -0.03525808826088905, -0.024747241288423538, 0.029295653104782104, 0.011145643889904022, 0.02335321344435215, -0.06079138442873955, 0.1009516716003418, 0.10362518578767776, 0.04128598794341087, -0.02626904472708702, -0.01825137436389923, 0.01359148696064949, -0.04334142431616783, -0.05160965770483017, -0.044065579771995544, 0.0709192156791687, -0.010108746588230133, -0.02840016409754753, -0.0209187138825655, 0.03192798048257828, 0.03427471965551376, 0.021744389086961746, 0.008733867667615414, 0.008706952445209026, -0.0013454495929181576, 0.024087944999337196, 0.03312618285417557, 0.09685897082090378, 0.016612550243735313, 0.03193739429116249, -0.03648374602198601, 0.006573391146957874, 0.04073048755526543, 0.03623161092400551, -0.013952057808637619, -0.010471987538039684, 0.0435715913772583, -0.03730468451976776, 0.050233155488967896, 0.05304466933012009, 0.03983413428068161, 0.05702580511569977, 0.055782388895750046, 0.016884570941329002, -0.15457241237163544, -0.0004447024839464575, 0.02409696951508522, -5.8472718348708744e-33, -0.14142264425754547, -0.05862070247530937, 4.335188350523822e-05, 0.02766920067369938, 0.022778358310461044, -0.05748291313648224, -0.0011892783222720027, 0.006306751631200314, 0.08334537595510483, 0.06540489941835403, -0.03977659344673157, -0.09084327518939972, 0.01604308746755123, 0.03115558996796608, 0.07254426926374435, -0.009886978194117546, 0.01182613056153059, 0.08193575590848923, -0.020336324349045753, -0.053042154759168625, -0.06374366581439972, -0.010494661517441273, -0.030292591080069542, -0.001951579237356782, -0.001096652471460402, -0.03218564763665199, 0.008513731881976128, -0.0006066610803827643, 0.044424012303352356, -0.0020777536556124687, 0.06488054245710373, 0.00821142178028822, -0.033869024366140366, 0.00863321777433157, -0.030293595045804977, 0.022938355803489685, 0.08462189137935638, 0.013627022504806519, 0.09017051756381989, -0.026543930172920227, -0.049427229911088943, 0.014559966512024403, 0.05001932755112648, -0.02205626294016838, 0.0019562565721571445, 0.021280499175190926, 0.05076764523983002, -0.028280258178710938, 0.022976955398917198, 0.08883360028266907, -0.03817425295710564, 0.025494730100035667, 0.004663982894271612, 0.027003856375813484, 0.02929564379155636, -0.004988853819668293, 0.01596125029027462, 0.0408782884478569, 0.024023696780204773, 0.08208804577589035, 0.06090554594993591, 0.06462189555168152, 0.06749579310417175, 0.019005903974175453, 0.005818013101816177, 0.0814988911151886, -0.07564028352499008, -0.02311553619801998, -0.03095286898314953, 0.05343911796808243, -0.012836144305765629, 0.008777830749750137, 0.10977385938167572, -0.013795829378068447, -0.0016697457758709788, -0.06280364841222763, 0.0006350107723847032, 0.0032851791474968195, -0.012642954476177692, 0.01124717015773058, -0.02657092921435833, 0.030544567853212357, -0.04748079180717468, -0.0011200765147805214, -0.011617899872362614, -0.014163468964397907, 0.01162376906722784, 0.027966801077127457, 0.03830334544181824, -0.06610356271266937, -0.04346125200390816, -0.02475842647254467, 0.030470648780465126, -0.01417912170290947, -0.03288529813289642, 2.372361944007409e-33, 0.0004899500054307282, 0.06071244180202484, -0.03130139037966728, 0.05712927132844925, 0.001052950625307858, 0.005762574262917042, 0.033732328563928604, -0.006009124219417572, 0.019770745187997818, 0.06902481615543365, -0.11455879360437393, 0.00677339406684041, 0.03205239772796631, 0.02645326405763626, 0.00365975359454751, 0.012494058348238468, 0.008335267193615437, -0.016587290912866592, -0.010453511029481888, -0.05198293551802635, -0.08607491105794907, 0.039976418018341064, -0.033811647444963455, 0.06879550963640213, 0.01627129130065441, 0.04417620971798897, -0.0048365178517997265, -0.09735200554132462, -0.08972793817520142, -0.02152653969824314, -0.036625225096940994, -0.1412820667028427, 0.010210206732153893, 0.043577130883932114, -0.06537520885467529, -0.013407445512712002, 0.011683888733386993, 0.0054498459212481976, -0.0025260900147259235, 0.0014036479406058788, 0.08386943489313126, -0.0785839632153511, 0.08553341776132584, 0.05678541585803032, -0.02119452692568302, -0.01454823836684227, 0.06998895853757858, -0.061249326914548874, 0.011727754957973957, 0.06985249370336533, 0.017725350335240364, -0.06229940056800842, -0.009366413578391075, -0.030988473445177078, -0.0377766378223896, 0.04775216057896614, 0.025181163102388382, 0.00044092064490541816, 0.029355546459555626, 0.010808234103024006, 0.08753938227891922, 0.02998591773211956, 0.017961930483579636, 0.02613370306789875, -0.00922053586691618, 0.024264086037874222, -0.01905849575996399, 0.021825114265084267, -0.001660310197621584, -0.044056035578250885, 0.0424710176885128, 0.13944153487682343, 0.020851625129580498, -0.030806558206677437, -0.04184114187955856, 0.0251894760876894, 0.055350035429000854, 0.04378032684326172, 0.03746442496776581, -0.055483974516391754, -0.1425924450159073, -0.050259023904800415, 0.01866164803504944, -0.06630729138851166, -0.11256444454193115, -0.12560419738292694, 0.08023329824209213, 0.012196636758744717, -0.016525505110621452, 0.02264219895005226, -0.01246469933539629, 0.05820988491177559, 0.05722659453749657, -0.07193642854690552, -0.013560720719397068, -3.549621752085841e-08, -0.008885417133569717, -0.004499420057982206, -0.044551484286785126, 0.018827049061655998, 0.04548877105116844, 0.011447812430560589, 0.053258124738931656, 0.05304902791976929, -0.05675233155488968, -0.016891883686184883, 0.08091422915458679, 0.013521725311875343, -0.06980061531066895, 0.046640150249004364, -0.10010962188243866, -0.11753836274147034, -0.0070762778632342815, 0.03203057870268822, -0.019177181646227837, -0.0696806088089943, 0.041082702577114105, 0.06361342966556549, 0.02432563155889511, 0.01823115162551403, 0.010291294194757938, 0.06001715362071991, 0.013864966109395027, 0.11026639491319656, 0.0576791949570179, -0.02966025099158287, 0.043947767466306686, 0.033701010048389435, -0.07367414236068726, -0.017524635419249535, -0.029519718140363693, -0.04656577855348587, -0.06675957888364792, 0.00010363326146034524, 0.015643395483493805, -0.03255753219127655, -0.04643598571419716, -0.041399553418159485, -0.03749420493841171, -0.037087541073560715, 0.0369933620095253, -0.023407388478517532, -0.06958507001399994, -0.049368686974048615, -0.011586122214794159, -0.08631265163421631, 0.0072387042455375195, 0.03492959216237068, 0.06837303936481476, 0.04551567882299423, 0.07457905262708664, -0.07951323688030243, -0.004209914710372686, -0.027126802131533623, -0.06619495153427124, 0.04542570933699608, 0.00767293618991971, -0.04850354045629501, 0.003407863900065422, -0.043047063052654266], "DROP": [0.03845338150858879, 0.03668418526649475, -0.032273538410663605, 0.04459439218044281, 0.018628185614943504, 0.014907819218933582, -0.03195982053875923, 0.08883179724216461, -0.020170465111732483, 0.05864161252975464, 0.07437226921319962, -0.010447057895362377, 0.009488413110375404, -0.036865029484033585, -0.06852898746728897, 0.02262173220515251, -0.07482310384511948, -0.10532985627651215, -0.052293892949819565, -0.035912804305553436, 0.04709809646010399, -0.024928105995059013, 0.04357992485165596, 0.0046842945739626884, 0.1524154096841812, 0.005702797323465347, -0.07077854126691818, 0.005292222369462252, 0.03143150731921196, 0.009986291639506817, -0.0481688417494297, 0.069392628967762, 0.12427210807800293, 0.01860477589070797, -0.057140424847602844, -0.06097548454999924, 0.050527267158031464, 0.0009869032073765993, 0.04464103654026985, 0.037725724279880524, -0.012969003058969975, -0.03642277792096138, 0.01582922413945198, 0.03976678475737572, 0.0012140370672568679, 0.0614616833627224, -0.0960858017206192, 0.07802855223417282, -0.06686776876449585, 0.11424680799245834, -0.04426819086074829, 0.08131474256515503, -0.06498643010854721, 0.02695639617741108, 0.04324992001056671, 0.01904410496354103, -0.08209200948476791, -0.08588847517967224, 0.005691120401024818, 0.004039862658828497, -0.05907483771443367, -0.0157155804336071, -0.008493885397911072, 0.028206054121255875, 0.11219791322946548, -0.05624513328075409, -0.015199501067399979, -0.07753869891166687, -0.11351816356182098, 0.04484257847070694, -0.0013816289138048887, -0.00851280614733696, -0.01922653242945671, -0.10866422951221466, 0.005202247761189938, -0.09759237617254257, 0.012816076166927814, 0.0969536304473877, 0.04175759479403496, -0.12422601878643036, 0.041225068271160126, 0.022901346907019615, -0.03812679275870323, 0.005124392453581095, -0.02040974050760269, -0.007340360898524523, 0.06512094289064407, -0.04562687501311302, 0.02626042254269123, 0.04993562772870064, 0.03077653795480728, -0.012820583768188953, 0.02050948143005371, 0.03684164211153984, -0.08420615643262863, 0.08397072553634644, -0.08763860166072845, -0.01723727397620678, 0.08445802330970764, 0.05009870231151581, 0.014689006842672825, 0.027510546147823334, -5.4544576414627954e-05, -0.05383983999490738, -0.09542401880025864, -0.07062764465808868, 0.001994939288124442, 0.04459116607904434, -0.0220055989921093, 0.016857866197824478, 0.006331583019345999, -0.02618063986301422, -0.05228542163968086, 0.010840930044651031, -0.03467441350221634, -0.02133602276444435, 0.03873284161090851, 0.049822043627500534, 0.10117220878601074, 0.038929663598537445, 0.1063499003648758, 0.06736621260643005, -0.030897099524736404, 0.04459620639681816, -0.10496464371681213, 0.010389323346316814, -0.03489137440919876, -8.116213234879962e-33, -0.01535838469862938, -0.1063145250082016, 0.00872046872973442, 0.006290356162935495, -0.03598931059241295, -0.00757549237459898, -0.050403326749801636, 0.020797478035092354, -0.03054608590900898, -0.0020715678110718727, -0.09012001007795334, 0.00045609797234646976, 0.011563085950911045, 0.006178795360028744, 0.08772750943899155, -0.053849004209041595, -0.0717349499464035, 0.12251431494951248, 0.019168078899383545, -0.03647736832499504, 0.0366673618555069, 0.0325787253677845, 0.03993462026119232, -0.00395414000377059, 0.05856524407863617, -0.011546862311661243, -0.03536965325474739, -0.0035472551826387644, -0.029805559664964676, 0.0022898598108440638, 0.08934468775987625, -0.0027486190665513277, -0.03961048275232315, -0.08756600320339203, 0.006039758212864399, -0.0018886849284172058, 0.08092953264713287, -0.011431429535150528, 0.038732994347810745, 0.0355616956949234, -0.025902217254042625, 0.013560754247009754, 0.023988083004951477, -0.05250507593154907, -0.05504970625042915, 0.017202315852046013, 0.0209837444126606, 0.046550821512937546, 0.037567317485809326, 0.0838848203420639, -0.09406248480081558, -0.021211033686995506, -0.020299313589930534, -0.03431350737810135, 0.05027034133672714, 0.013652597554028034, -0.03558606281876564, 0.03494531288743019, -0.02999945729970932, 0.05859357491135597, 0.03953798860311508, 0.040375709533691406, 0.057038843631744385, 0.014045603573322296, -0.021691283211112022, 0.10088684409856796, 0.013301068916916847, -0.0037768553011119366, 0.036159176379442215, 0.05002720654010773, 0.0031133866868913174, -0.0014012227766215801, 0.03843768686056137, 0.0405895859003067, -0.008921301923692226, 0.01729697547852993, 0.025269165635108948, 0.0020915321074426174, -0.0007113198516890407, 0.024504859000444412, -0.092278391122818, -0.009605404920876026, 0.010932782664895058, -0.0605299286544323, -0.04263855889439583, 0.018687952309846878, 0.05091661214828491, 0.03229551762342453, 0.03438952937722206, -0.04961405321955681, -0.007345914840698242, -0.05512984097003937, -0.007686844561249018, 0.01025301218032837, -0.037843260914087296, 3.100472144785357e-33, -0.0648883730173111, 0.1482839733362198, -0.015297554433345795, -0.010789039544761181, 0.015772605314850807, -0.02544592134654522, 0.01722775772213936, 0.02825234644114971, 0.05196632072329521, 0.07118566334247589, 0.012464774772524834, 0.021001318469643593, 0.024491382762789726, 0.03061964176595211, -0.008106378838419914, -0.03356975317001343, 0.06495971232652664, -0.04762051999568939, -0.05200732871890068, -0.0026432888116687536, -0.010014498606324196, 0.05773578956723213, -0.005223301704972982, 0.027757197618484497, -0.019446808844804764, 0.034873683005571365, -0.024426333606243134, -0.03890443220734596, -0.023315973579883575, -0.05351071432232857, 0.019114969298243523, -0.06847521662712097, -0.0756118893623352, 0.04818982258439064, -0.06006796658039093, -0.04197872430086136, 0.027552135288715363, -0.0421425886452198, 0.002229571109637618, -0.022737553343176842, 0.09631526470184326, -0.013656158931553364, -0.029185006394982338, 0.026866143569350243, 0.06514555960893631, 0.06977935880422592, 0.04828556999564171, -0.0022239782847464085, 0.09352919459342957, -0.066444031894207, -0.00570015050470829, 0.0176222063601017, -0.024739684537053108, 0.043931666761636734, -0.049766335636377335, 0.046444036066532135, 0.03288319334387779, -0.022074894979596138, -0.018699724227190018, -0.043354835361242294, -0.0012313609477132559, 0.033797893673181534, -0.056131478399038315, 0.048278506845235825, -0.015908608213067055, -0.012303494848310947, -0.01805683970451355, -0.06593640893697739, 0.026861945167183876, -0.010416035540401936, 0.03053564950823784, 0.022019021213054657, 0.007909242995083332, -0.0604003481566906, -0.06750568747520447, 0.06654082238674164, 0.004510354716330767, 0.0005659495946019888, 0.012515823356807232, -0.03668742999434471, -0.06877406686544418, 0.018516533076763153, 0.011009127832949162, -0.012899325229227543, -0.08200673758983612, -0.06803133338689804, 0.06084331125020981, -0.09122542291879654, -0.04592178016901016, 0.06694435328245163, -0.10664758086204529, -0.039093177765607834, -0.015747519209980965, -0.054381199181079865, 0.002627352252602577, -3.745605781091399e-08, 0.02867485210299492, 0.019674481824040413, -0.12537935376167297, 0.037418432533741, 0.09624514728784561, 0.02060689777135849, 0.028398960828781128, 0.07889525592327118, -0.007799162995070219, 0.060477424412965775, 0.03209726884961128, 0.02097010798752308, -0.0146811343729496, -0.00011736701708287, -0.056318800896406174, 0.02395777776837349, -0.07863274961709976, -0.012328281067311764, -0.003244036575779319, -0.11510991305112839, 0.0909862369298935, 0.033680446445941925, 0.025835232809185982, -0.046490591019392014, -0.042372941970825195, 0.03917979821562767, -0.010279797948896885, 0.1362956166267395, -0.046771060675382614, -0.05665455013513565, 0.039575811475515366, -0.01558412704616785, -0.054188914597034454, -0.06846673786640167, -0.026116544380784035, -0.014951745979487896, -0.06767627596855164, 0.030948558822274208, 0.0758361667394638, -0.015848739072680473, -0.026103468611836433, -0.02194797433912754, -0.019497981294989586, 0.0721297413110733, 0.08371538668870926, -0.042375802993774414, -0.1326076239347458, -0.015695111826062202, -0.0019865690264850855, -0.13087813556194305, 0.04559803381562233, 0.015594896860420704, 0.011655057780444622, -0.003875371301546693, 0.05225497856736183, -0.06006195768713951, -0.033988408744335175, -0.02032775618135929, -0.10462876409292221, -0.007485727779567242, 0.06551671773195267, -0.026729527860879898, 0.0200628861784935, -0.010867913253605366], "SuperGLUE": [0.006394115276634693, 0.1676100641489029, 0.023686304688453674, 0.052346136420965195, 0.03986488655209541, 0.050301387906074524, 0.18151147663593292, -0.03751938045024872, -0.013681826181709766, -0.0028574883472174406, 0.08701174706220627, -0.01465009804815054, 0.0031514870934188366, -0.08802294731140137, 0.03631003573536873, 0.05893469974398613, -0.011548790149390697, -0.07874038815498352, -0.08250565826892853, -0.0011252476833760738, 0.009174791164696217, 0.009514398872852325, -0.012079555541276932, 0.045870091766119, -0.01638440228998661, 0.10528556257486343, 0.0019891259726136923, 0.029644154012203217, 0.028996076434850693, 0.010477459989488125, -0.06596358865499496, 0.06462153047323227, 0.03013780526816845, 0.09581555426120758, 0.02424643561244011, 0.045187972486019135, 0.02759856916964054, 0.031833287328481674, 0.03287754952907562, -0.002324852393940091, -0.038987163454294205, -0.13435597717761993, -0.007364257704466581, 0.014215882867574692, 0.001258537289686501, 0.03766094520688057, -0.07501719146966934, 0.03142036870121956, -0.044789426028728485, -0.04002394154667854, -0.1269616186618805, -0.03606035187840462, -0.08742686361074448, -0.02666487917304039, -0.011439693160355091, 0.05950457230210304, -0.03745192661881447, 0.005539150908589363, 0.022383615374565125, -0.03517616540193558, 0.025119764730334282, -0.01045867707580328, -0.05153430625796318, 0.083077073097229, 0.1485929936170578, -0.08760213106870651, 0.0007517542107962072, -0.08857700973749161, -0.13024012744426727, 0.0854424312710762, 0.01765870302915573, 0.0035807318054139614, -0.005276407115161419, 0.0012520172167569399, -0.05443704500794411, -0.10004732757806778, 0.012986017391085625, -0.020393231883645058, 0.042476437985897064, -0.054211847484111786, 0.011791928671300411, -0.05188495293259621, -0.05314023420214653, 0.06491050124168396, -0.002938485238701105, -0.017633939161896706, 0.005171159747987986, -0.12271085381507874, -0.04898891597986221, 0.042801953852176666, -0.010388022288680077, -0.06955860555171967, 0.056860361248254776, 0.05626869201660156, 0.04067407175898552, 0.09267990291118622, -0.07887916266918182, -0.08207032084465027, -0.03085058555006981, 0.13422632217407227, 0.03234370797872543, 0.1021752581000328, -0.030172200873494148, -0.03320128098130226, -0.06364146620035172, -0.0733167976140976, -0.052338406443595886, -0.03910626098513603, -0.007287258747965097, 0.027239665389060974, 0.005227185320109129, -0.05799281597137451, -0.0243577491492033, -0.02405093051493168, 0.004915344063192606, 0.004881813656538725, 0.011855678632855415, 0.03526344895362854, -0.01747889630496502, 0.0035864359233528376, 0.016387324780225754, 0.013294552452862263, -0.03697776794433594, 0.04471644014120102, -0.06572478264570236, -0.11300662159919739, -0.0074445935897529125, -6.746530156037709e-33, 0.024807389825582504, -0.016411369666457176, 0.028560491278767586, 0.03174708038568497, -0.015008105896413326, -0.013242238201200962, -0.08390337228775024, -0.04448921978473663, -0.02641408145427704, -0.016108853742480278, -0.10391545295715332, -0.04418439418077469, 0.037381161004304886, 0.04677446186542511, 0.0014623659662902355, 0.008046932518482208, -0.09295228123664856, 0.05633172765374184, 0.027292808517813683, 0.016427544876933098, 0.026126151904463768, 0.09327233582735062, -0.031196892261505127, -0.06163042038679123, -0.0067360661923885345, -0.1063697561621666, 0.025767846032977104, -0.0051616160199046135, -0.0676838830113411, 0.048410382121801376, -0.0007717777043581009, 0.005027716979384422, 0.00399020453915, -0.04706190153956413, 0.07747756689786911, 0.03055189549922943, 0.004669735208153725, -0.015441499650478363, -0.019315673038363457, -0.029162688180804253, -0.08360452950000763, 0.009718834422528744, 0.009789129719138145, -0.014851577579975128, 0.04794030636548996, 0.09225267171859741, 0.01787395030260086, 0.002877636346966028, -0.044312454760074615, 0.04169031232595444, -0.0020390707068145275, 0.02682013437151909, 0.07102862745523453, 0.014248593710362911, 0.08092747628688812, 0.02001592144370079, -0.01868179440498352, 0.07732352614402771, 0.01114710420370102, -0.027614928781986237, 0.083121657371521, 0.02145005203783512, -0.03795866295695305, 0.033370811492204666, 0.02357896789908409, 0.03357786685228348, -0.02175328880548477, -0.055194735527038574, 0.0677114799618721, -0.002779087983071804, -0.055534277111291885, 0.02281012199819088, -0.025348804891109467, 0.04415252432227135, -0.05907060205936432, -0.014850876294076443, -0.03502684086561203, 0.027575448155403137, 0.015244575217366219, -0.05552748963236809, -0.00654834508895874, -0.05731688067317009, -0.005800750106573105, 0.014748395420610905, -0.06280691176652908, 0.05301731452345848, 0.028753746300935745, -0.11172100901603699, 0.036945994943380356, 0.0831553190946579, -0.05625279247760773, 0.04274359717965126, -0.003279132070019841, -0.013383752666413784, -0.01083352416753769, 4.47439297259782e-34, -0.06297345459461212, -0.022371657192707062, -0.07489008456468582, 0.029475027695298195, -0.023574382066726685, -0.0010025971569120884, -0.06057900935411453, -0.05357592552900314, 0.06053672358393669, 0.08861484378576279, -0.017548318952322006, -0.04490780085325241, 0.06345294415950775, -0.0063406373374164104, 0.01641412079334259, -0.010368742980062962, 0.1006162017583847, -0.05524873360991478, -0.04236997663974762, 0.052721887826919556, -0.008201376534998417, 0.04513987898826599, -0.03986120596528053, 0.024191251024603844, -0.01126017328351736, 0.07162454724311829, 0.04868631809949875, -0.019993338733911514, -0.0825873613357544, -0.04939131438732147, 0.04888712614774704, -0.05290945991873741, -0.08162477612495422, 0.055402740836143494, 0.0017361206701025367, 0.02636686898767948, 0.017868390306830406, -0.017942678183317184, -0.06719642877578735, -0.011989532969892025, 0.023382585495710373, 0.012026788666844368, -0.06109335273504257, -0.008835475891828537, -0.01837519370019436, -0.02618439495563507, 0.041396614164114, -0.004981356207281351, -0.01322844810783863, -0.01610252633690834, -0.04362263157963753, 0.005335599184036255, -0.019686372950673103, 0.03325289860367775, -0.03900771588087082, 0.0033178613521158695, 0.010072868317365646, -0.07125118374824524, 0.009231257252395153, 0.0159489493817091, -0.030147075653076172, 0.06142553687095642, -0.061807096004486084, 0.043836645781993866, 0.06249067932367325, -0.039042167365550995, -0.016715919598937035, 0.017265086993575096, 0.05428938940167427, -0.031916480511426926, 0.03668498992919922, -0.0355079285800457, -0.13077792525291443, -0.07276677340269089, -0.039418499916791916, 0.03748374059796333, 0.00742219528183341, -0.021102385595440865, -0.06602547317743301, -0.03267497569322586, 0.06567574292421341, -0.04462326318025589, 0.004099673591554165, 0.008763464167714119, -0.05369852855801582, -0.03519720584154129, -0.008485794998705387, -0.04211550951004028, 0.0017542206915095448, 0.05864638090133667, 0.0068895150907337666, -0.005690131336450577, -0.02855556644499302, -0.038755614310503006, -0.011233058758080006, -3.9025817244464633e-08, 0.0045797512866556644, -0.06358771026134491, -0.08307347446680069, 0.013127846643328667, 0.01717056706547737, -0.009305699728429317, 0.07053057849407196, -0.040650125592947006, -0.03220619261264801, 0.035805415362119675, 0.06527828425168991, 0.05462028458714485, -0.0658210963010788, 0.041902609169483185, 0.0019682953134179115, 0.09143263101577759, -0.04861669987440109, -0.049732379615306854, -0.03375174477696419, -0.0018848188919946551, 0.11904072016477585, 0.011714381165802479, -0.006137323100119829, -0.007953846827149391, -0.03286729380488396, 0.05635799840092659, 0.03439164161682129, 0.03952396661043167, -0.0420970544219017, 0.05292711406946182, 0.03773455694317818, 0.1424417495727539, -0.09540672600269318, -0.015657711774110794, -0.02564072795212269, -0.007650672923773527, 0.03756808489561081, 0.012481356039643288, 0.07654173672199249, -0.06984085589647293, -0.04665357992053032, 0.04125792533159256, -0.05368513986468315, 0.06756744533777237, 0.03644755855202675, -0.07129476964473724, -0.0074648200534284115, 0.0007881624042056501, -0.04588886722922325, 0.009610846638679504, -0.01660342514514923, -0.005963779985904694, 0.07447835057973862, 0.04997112601995468, 0.004244676325470209, -0.015548757277429104, 0.004408142529428005, 0.03705377131700516, -0.07725869119167328, 0.040605634450912476, 0.10808727145195007, 0.05481385439634323, 0.06914106011390686, -0.01495656929910183], "Chatbot Arena (LMSys)": [-0.04524091258645058, 0.01624363102018833, 0.059466250240802765, 0.06042369082570076, -0.03543027862906456, 0.008066413924098015, 0.103546641767025, -0.07581307739019394, -0.01681128889322281, -0.011249381117522717, -0.03246571496129036, -0.07490532845258713, -0.0428062342107296, 0.039452359080314636, -0.001536125666461885, 0.007299091201275587, 0.0691756010055542, -0.006092806346714497, 0.05644795298576355, -0.07135533541440964, -0.06695497035980225, -0.056571103632450104, -0.017187833786010742, 0.02466917224228382, 0.0034132623113691807, 0.03302016854286194, 0.012916065752506256, 0.037398528307676315, -0.11833145469427109, -0.029619697481393814, 0.002532626735046506, 0.046517372131347656, -0.007403850555419922, -0.022225167602300644, -0.09226575493812561, 0.057643551379442215, 0.09663105756044388, -0.053497470915317535, 0.00422553950920701, 0.05528798699378967, -0.024962157011032104, -0.030714312568306923, 0.06508833169937134, 0.02313425950706005, 0.049451589584350586, 0.003196664620190859, -0.013684963807463646, 0.07945140451192856, 0.08025600761175156, 0.08220034092664719, -0.01249604020267725, -0.045568082481622696, -0.0918959304690361, -0.02541690692305565, 0.033876433968544006, 0.0248097013682127, -0.10659127682447433, -0.019718721508979797, 0.07750727981328964, -0.04081347957253456, 0.007355025503784418, -0.031407784670591354, -0.05430888384580612, 0.015774235129356384, 0.003621283220127225, -0.11108563095331192, -0.0475754551589489, 0.1161457896232605, -0.008890911936759949, 0.003678568871691823, -0.11174961179494858, 0.021842559799551964, -0.011412559077143669, 0.07078102976083755, -0.017656538635492325, -0.01282457821071148, -0.02172771468758583, -0.04189245402812958, -0.05433144047856331, 0.030765550211071968, -0.08824434131383896, -0.08002721518278122, -0.05537889152765274, 0.0044894348829984665, -0.006227774079889059, 0.020629124715924263, 0.043362367898225784, 0.04888372868299484, -0.06685283780097961, -0.022740831598639488, -0.041573382914066315, -0.01636137068271637, -0.015394962392747402, 0.004791414365172386, -0.03635696694254875, 0.10879845917224884, -0.02234470099210739, -0.09065259248018265, -0.0887402817606926, 0.1341986358165741, 0.06228090077638626, 0.024216141551733017, 0.050692178308963776, -0.01407332718372345, 0.04579489305615425, -0.019553570076823235, -0.0027659821789711714, -0.009252992458641529, 0.055290162563323975, -0.009897940792143345, -0.02647787146270275, 0.030062368139624596, -0.07040752470493317, -0.012046843767166138, 0.03947604075074196, -0.030522005632519722, 0.07059209793806076, 0.0381440706551075, 0.00992793869227171, -0.003957514651119709, 0.0066087679006159306, 0.006602960173040628, -0.06323933601379395, 0.030380697920918465, -0.06027844920754433, 0.014713305048644543, 0.04742155969142914, -9.599637881791748e-33, 0.08042950183153152, -0.014025388285517693, 0.02876107580959797, 0.16970813274383545, 0.12635540962219238, -0.00684951338917017, 0.0032751273829489946, -0.074480801820755, -0.07552414387464523, -1.027661346597597e-05, 0.0016478735487908125, -0.0028883193153887987, -0.03199588134884834, 0.01590608060359955, 0.07500637322664261, -0.013183239847421646, -0.07216064631938934, 0.058620814234018326, 0.054212771356105804, -0.028958521783351898, -0.0712481364607811, 0.003683911170810461, -0.019575171172618866, 0.014730914495885372, 0.009863552637398243, -0.018586693331599236, 0.01670772023499012, -0.09982025623321533, 0.08725999295711517, -0.01973598077893257, -0.0052347443997859955, 0.06882520765066147, 0.04107134789228439, -0.01838970184326172, 0.024706024676561356, -0.014056365005671978, -0.041918110102415085, -0.047844406217336655, 0.03733760118484497, 0.03673054277896881, -0.042280398309230804, -0.042988840490579605, -0.08408737927675247, -0.04752633348107338, 0.06651831418275833, 0.007835780270397663, 0.08359207212924957, -0.0028113240841776133, -0.017917213961482048, -0.014972608536481857, -0.05552186816930771, -0.0637163445353508, 0.021568048745393753, -0.05634294077754021, -0.06833668053150177, 0.01103187445551157, 0.043678805232048035, 0.027473879978060722, 0.008849126286804676, -0.05732753872871399, 0.06684603542089462, 0.06676499545574188, 0.0016545921098440886, -0.03799678012728691, -0.06579456478357315, -0.01666308008134365, 0.0031747380271553993, 0.0020958581008017063, 0.041248518973588943, -0.05174247920513153, 0.015537133440375328, 0.06902437657117844, 0.07692450284957886, 0.0226358100771904, -0.03250008448958397, 0.08079114556312561, 0.031147358939051628, -0.07298757135868073, -0.039242930710315704, 0.012478111311793327, 0.014179917052388191, -0.04438799247145653, -0.027135752141475677, -0.022228632122278214, -0.06324092298746109, 0.0202819611877203, -0.010600890964269638, -0.06969580799341202, -0.05841264873743057, 0.010331941768527031, -0.15825189650058746, -0.04251275584101677, 0.0891045480966568, -0.09374450892210007, -0.051110535860061646, 4.203257761905471e-33, 0.06756029278039932, -0.042509887367486954, -0.037447087466716766, 0.033021144568920135, 0.10394084453582764, -0.05199844017624855, -0.008900323882699013, 0.032125845551490784, 0.05084558576345444, 0.005528968758881092, -0.060247793793678284, -0.0018705339170992374, 0.06716392189264297, -0.01681431755423546, 0.07861843705177307, -0.017121590673923492, 0.11437394469976425, 0.055656805634498596, -0.006568391807377338, 0.035797059535980225, -0.028555171564221382, 0.13699068129062653, -0.05565359815955162, 0.024904295802116394, 0.0004958159988746047, 0.051059748977422714, 0.029309118166565895, -0.030698014423251152, -0.08567917346954346, 0.008206859230995178, -0.04161180928349495, -0.04997836425900459, 0.06189791113138199, 0.01685783639550209, -0.0002587056951597333, 0.12412211298942566, 0.01680629886686802, -0.06126958131790161, -0.04074065387248993, 0.06680801510810852, 0.05844710022211075, -0.038406964391469955, 0.07647005468606949, 0.042745646089315414, 0.023671703413128853, -0.013368161395192146, -0.04893137514591217, 0.038353607058525085, -0.0010795864509418607, 0.04722916707396507, -0.010509654879570007, -4.767763311974704e-05, -0.10902056843042374, 0.009146181866526604, 0.03160794824361801, -0.08489453792572021, 0.007168085779994726, -0.06563008576631546, 0.018822815269231796, -0.027072669938206673, -0.10134570300579071, -0.00879206694662571, -0.04976016655564308, 0.11529852449893951, -0.026334252208471298, -0.0734703317284584, -0.09610344469547272, -0.034996598958969116, -0.018207047134637833, 0.03153041750192642, -0.0006147834938019514, 0.04364996775984764, 0.029253030195832253, -0.01836930587887764, 0.01691192388534546, -0.028354061767458916, 0.05202742666006088, -0.008018816821277142, -0.07007075101137161, 0.013377837836742401, -0.046522051095962524, 0.016842812299728394, -0.06166968122124672, 0.0670471116900444, -0.021107401698827744, 0.020341766998171806, 0.021386558189988136, 0.0533255897462368, -0.03118007443845272, 0.0068951500579714775, 0.018397610634565353, 0.013968106359243393, 0.03448551148176193, 0.03649934381246567, 0.006981539074331522, -4.0269547696425434e-08, 0.05603325366973877, -0.014512247405946255, -0.025011364370584488, 0.030393533408641815, -0.013384828343987465, -0.06322582811117172, -0.023652508854866028, -0.07755161821842194, 0.039363838732242584, -0.037592992186546326, 0.07739490270614624, -0.013794994913041592, 0.017869869247078896, 0.043069496750831604, 0.032124679535627365, 0.0024114707484841347, 0.01825626753270626, 0.06379050761461258, -0.062139276415109634, 0.014432521536946297, 0.007638326846063137, 0.037623122334480286, 0.020567674189805984, -0.007491652388125658, 0.023897593840956688, 0.025074653327465057, 0.05211677774786949, 0.09983988851308823, 0.08208052068948746, 0.09637519717216492, -0.05702947825193405, 0.05381558835506439, -0.05491350218653679, -0.03727266192436218, -0.05264434963464737, -0.04265590384602547, 0.02354089543223381, -0.03815285488963127, -0.003713797777891159, -0.0394790917634964, -0.017153654247522354, 0.0443301647901535, 0.00038641312858089805, 0.007676693610846996, -0.041947025805711746, -0.01012892834842205, 0.018266096711158752, -0.08356381952762604, -0.053033411502838135, 0.047788310796022415, -0.048801105469465256, 0.004364416468888521, 0.08851715922355652, 0.005074564833194017, 0.05858348682522774, 0.0805530697107315, -0.03343803063035011, 0.031022384762763977, 0.01672685705125332, 0.031922537833452225, 0.007433056831359863, 0.06354140490293503, -0.0364929661154747, -0.02123940922319889], "MT-Bench": [0.0024729992728680372, 0.09147808700799942, -0.03367866948246956, 0.028849812224507332, -0.023116160184144974, 0.02896643616259098, 0.05933096632361412, 0.03325282037258148, -0.0012153488351032138, 0.0534246452152729, -0.012764671817421913, 0.004518038593232632, -0.049570482224226, 0.028544249013066292, 0.013794716447591782, 0.014211351051926613, 0.04871357977390289, 0.0071291192434728146, -0.057991985231637955, -0.022431837394833565, 0.05971114709973335, 0.0061036208644509315, 0.023913269862532616, 0.0520104244351387, -0.00873678270727396, 0.029596637934446335, 0.022988196462392807, 0.04936610162258148, -0.044607169926166534, -0.0028434828855097294, 0.025453971698880196, 0.08945004642009735, 0.07965860515832901, 0.048378415405750275, -0.07033409923315048, 0.06783803552389145, -0.05105111002922058, 0.08535934239625931, -0.025971224531531334, -0.013820583932101727, -0.05233068764209747, -0.08381042629480362, -0.03136688843369484, 0.044000424444675446, 0.01726137101650238, -0.04585203155875206, -0.03628181666135788, -0.004692916292697191, 0.05604097247123718, 0.017948957160115242, -0.09288281947374344, -0.05889511480927467, -0.14394713938236237, -0.04604484140872955, 0.01267040241509676, 0.04545409977436066, -0.012819488532841206, -0.0035633216612040997, -0.045121416449546814, -0.11690367758274078, 0.014603802934288979, -0.027549076825380325, -4.230166086927056e-05, 0.035174570977687836, 0.1598692089319229, -0.0025840597227215767, -0.07553839683532715, 0.06850632280111313, -0.023983780294656754, 0.09554526209831238, -0.04406686872243881, 0.008274085819721222, -0.052059393376111984, 0.052311379462480545, -0.0545249879360199, -0.057760078459978104, -0.026637300848960876, 0.010433304123580456, 0.010494214482605457, 0.01759808138012886, -0.058094847947359085, 0.04259685426950455, -0.04933328554034233, 0.10127599537372589, 0.029608912765979767, 0.010150211863219738, 0.043316833674907684, -0.027328340336680412, -0.031073248013854027, -0.04373868927359581, -0.03716982901096344, -0.050308529287576675, 0.07066163420677185, -0.025002846494317055, -0.024187033995985985, 0.09681316465139389, -0.05370241403579712, -0.10898476094007492, -0.0179672259837389, 0.050920747220516205, 0.049606990069150925, 0.035892073065042496, -0.014399420469999313, -0.09356683492660522, -0.0030195999424904585, -0.0539623461663723, 0.013166673481464386, 0.004632105585187674, 0.034462831914424896, 7.0729132858105e-05, -0.017653610557317734, 0.034262169152498245, -0.053095493465662, 0.01683998852968216, 0.06468141078948975, 0.03374335169792175, 0.030535731464624405, 0.03352460637688637, 0.036050938069820404, 0.05008493736386299, 0.03653593361377716, -0.009402597323060036, -0.05211474373936653, 0.04158646985888481, -0.04294551908969879, -0.04539985582232475, -0.0027241730131208897, -8.205133505047914e-33, -0.02188391238451004, 0.023897897452116013, 0.06916642934083939, 0.14574357867240906, 0.08526038378477097, 0.014289350248873234, -0.026497570797801018, -0.009781092405319214, -0.0018871499923989177, 0.009114563465118408, 0.015119170770049095, 0.009340235032141209, 0.08720546215772629, 0.06613187491893768, -0.02348487637937069, -0.06726682186126709, -0.12327177077531815, 0.09340518712997437, 0.05408218130469322, 0.023287370800971985, -0.02657381072640419, -0.018326984718441963, 0.01612413488328457, -0.04288325086236, 0.0390569232404232, -0.021436521783471107, 0.04491810128092766, -0.05468010902404785, 0.04146122187376022, -0.023457789793610573, 0.030020155012607574, 0.05239984020590782, -0.09976161271333694, -0.045245423913002014, -0.02021569013595581, 0.006674894131720066, 0.04925079271197319, -0.06963300704956055, 0.05573146790266037, -0.04110489413142204, -0.08489730954170227, 0.0026837901677936316, -0.04261462017893791, -0.06133507937192917, 0.07705937325954437, 0.08445950597524643, 0.03674496337771416, -0.04038381204009056, -0.044751573354005814, -0.03420671448111534, 0.009129081852734089, -0.0312797948718071, 0.010647539980709553, -0.07482592016458511, -0.0003903559409081936, -0.0353616401553154, 0.004252887796610594, -0.07132609188556671, 0.020221877843141556, 0.05428396910429001, 0.05450284853577614, 0.0804319754242897, -0.015995291993021965, 0.05568625405430794, -0.016333656385540962, 0.016558552160859108, -0.017461484298110008, -0.06334537267684937, 0.07938569039106369, -0.05370258539915085, -0.03144460916519165, 0.01685039885342121, -0.006375557277351618, 0.06050918996334076, -0.06375591456890106, -0.028317896649241447, -0.0823068842291832, -0.025329090654850006, -0.032988421618938446, -0.009645242244005203, 0.0314570814371109, -0.03572547435760498, -0.014469360001385212, 0.017391005530953407, -0.04575847461819649, -0.012457175180315971, 0.02057890221476555, -0.056502893567085266, -0.06813813745975494, 0.010813687928020954, -0.10685217380523682, 0.0010553994216024876, 0.031384989619255066, 0.026596391573548317, 0.06976111233234406, 3.3094225104577577e-33, -0.02929212898015976, -0.026002496480941772, -0.051659829914569855, 0.0032994928769767284, 0.08598725497722626, -0.0366995707154274, -0.027419432997703552, -0.032751284539699554, 0.03671475499868393, 0.07015998661518097, -0.08271195739507675, -0.0656711533665657, 0.002475067274644971, -0.031018706038594246, -0.03045736253261566, 0.0036993895191699266, 0.04709699749946594, -0.02025013417005539, -0.0502631813287735, 0.057191118597984314, -0.028021465986967087, 0.08348104357719421, -0.06115744262933731, 0.013531209900975227, 0.054431673139333725, 0.10861000418663025, 0.03592352569103241, -0.002327116671949625, -0.01309240236878395, -0.06921005994081497, -0.06977920979261398, -0.08255748450756073, 0.017416756600141525, -0.01133121456950903, -0.03907560184597969, 0.05833440274000168, 0.01378621906042099, -0.0439896322786808, 0.01563706435263157, 0.0019214680651202798, 0.12159479409456253, -0.044658858329057693, 0.06820189207792282, -0.017812496051192284, 0.02131638489663601, -0.08335927873849869, -0.00020041978859808296, 0.023577068001031876, -0.03738649562001228, 0.044181790202856064, 0.01782662607729435, -0.018802860751748085, -0.05538540333509445, -0.05279496684670448, -0.065977543592453, -0.04434730485081673, 0.08539656549692154, -0.13955576717853546, 0.027094846591353416, -0.03625662252306938, -0.03944483771920204, -0.017553122714161873, 0.06376371532678604, 0.056310392916202545, -0.02129550836980343, -0.07471692562103271, -0.020505746826529503, 0.00496184267103672, 0.004281883593648672, -0.02515227161347866, 0.0008266369695775211, -0.021540610119700432, -0.009945416823029518, -0.02691502682864666, 0.10166212916374207, -0.04959047958254814, 0.03148674592375755, -0.0171754639595747, -0.05153894051909447, -0.018190128728747368, -0.0489405132830143, -0.03559718653559685, 0.06041615083813667, -0.011404857039451599, -0.02632606215775013, 0.0034645008854568005, -0.017436521127820015, 0.030767260119318962, -0.015191375277936459, 0.01103383768349886, -0.15353453159332275, -0.002061324892565608, 0.035837747156620026, 0.00877713318914175, -0.018730519339442253, -4.559772293077913e-08, -0.004442144650965929, -0.09745366126298904, 0.09604010730981827, 0.07856910675764084, 0.013092409819364548, -0.022314533591270447, 0.009619809687137604, -0.045603979378938675, -0.012552374042570591, -0.03682587295770645, -0.02701253443956375, 0.04647044092416763, -0.0018744140397757292, 0.12091763317584991, -0.015201068483293056, 0.028250010684132576, 0.04799247533082962, 0.002509051002562046, -0.05395332723855972, 0.05388130247592926, 0.05224158614873886, 0.08623495697975159, -0.041408471763134, 0.002219281392171979, 0.03937074542045593, 0.03953052684664726, -0.05436452850699425, 0.03414032235741615, -0.009448365308344364, 0.038613878190517426, 7.467735122190788e-05, 0.05934527888894081, -0.027667606249451637, 0.08927491307258606, -0.08483140170574188, -0.10256436467170715, 0.037431035190820694, -0.002452761633321643, 0.040887825191020966, -0.02644786424934864, -0.04641783609986305, 0.0774187222123146, -0.025929514318704605, 0.06476455181837082, 0.06254249811172485, -0.0886717289686203, -0.040956269949674606, -0.10341804474592209, -0.045660048723220825, 0.014908432960510254, 0.027587832883000374, -0.018972031772136688, 0.125148743391037, -0.029208417981863022, 0.026568647474050522, 0.058116521686315536, -0.0076254140585660934, 0.00728992884978652, -0.046493049710989, 0.0989493876695633, 0.04984310269355774, 0.0782654732465744, 0.021562442183494568, 0.05175015330314636], "LiveBench": [-0.05036706477403641, 0.14305782318115234, -0.02102639153599739, 0.030267544090747833, 0.020860331133008003, -0.028894880786538124, 0.10832729190587997, -0.012754515744745731, -0.043341100215911865, 0.003227795474231243, 0.050943516194820404, -0.07316869497299194, 0.04110459238290787, -0.02036685310304165, -0.060111455619335175, 0.07361415773630142, -0.07143305242061615, -0.010572475381195545, -0.05059446394443512, -0.05749987065792084, -0.016560448333621025, -0.013768518343567848, -0.08592073619365692, 0.06996702402830124, -0.021955404430627823, 0.09108427911996841, 0.06076040863990784, 0.03997163474559784, 0.06922199577093124, -0.05418990179896355, 0.07526978105306625, 0.07700889557600021, -0.004976638592779636, 0.039924949407577515, 0.08055941015481949, 0.021947313100099564, -0.042468637228012085, 0.04849608242511749, 0.0552390031516552, 0.03573485463857651, -0.043720584362745285, -0.14794526994228363, 0.02226310409605503, 0.010569899342954159, -0.003541318466886878, 0.07210403680801392, -0.08214014768600464, 0.10142392665147781, -0.022118909284472466, -0.03808223456144333, -0.11584673076868057, -0.007967445068061352, -0.11621860414743423, -0.09642692655324936, 0.0649937093257904, -0.02014141157269478, -0.027845952659845352, -0.03783262148499489, -0.03551063314080238, -0.03752908483147621, 0.03541341423988342, 0.011993564665317535, -0.02904159016907215, 0.005785851739346981, 0.130461648106575, 0.03289312496781349, -0.047551095485687256, -0.015225179493427277, -0.04496366158127785, 0.10220620781183243, 0.0010954837780445814, 0.08155689388513565, -0.007392974570393562, 0.057969190180301666, -0.018239328637719154, -0.050914399325847626, -0.03191135823726654, 0.0016898777103051543, 0.03849780932068825, -0.06897490471601486, 0.020134901627898216, -0.07579390704631805, -0.048094626516103745, 0.060359880328178406, 0.01692746952176094, -0.019997822120785713, -0.00639368174597621, 0.0022144021932035685, 0.03206263855099678, 0.02036898024380207, -0.015309963375329971, -0.0215963963419199, 0.08431055396795273, 0.029940297827124596, 0.04573020711541176, 0.1328009068965912, -0.04782193899154663, -0.05994301661849022, 0.028233353048563004, 0.14921967685222626, -0.03028871677815914, 0.1155061200261116, -0.020547352731227875, -0.10414215177297592, -0.026729639619588852, -0.02238440327346325, 0.0021099590230733156, -0.04563324525952339, 0.038516514003276825, -0.014266036450862885, 0.02299952320754528, -0.008177586831152439, -0.009058292955160141, 0.019985657185316086, 0.0019581944216042757, -0.06056353449821472, 0.04842250794172287, 0.05365552380681038, -0.048124901950359344, -0.01134526077657938, 0.07916519790887833, 0.009593180380761623, -0.06604058295488358, 0.0620102696120739, -0.05177363008260727, -0.06872325390577316, -0.03559357300400734, -5.4033103937683436e-33, -0.03979165479540825, -0.04321686178445816, 0.02359049767255783, 0.040465760976076126, 0.0720464289188385, -0.007489574141800404, -0.08301807194948196, -0.035544298589229584, -0.01082555577158928, 0.020345240831375122, -0.06901594251394272, -0.005994511768221855, 0.00045611601672135293, 0.021225078031420708, -0.03533124178647995, -0.011786086484789848, -0.025028835982084274, 0.0758550688624382, -0.002502510091289878, -0.0593627393245697, 0.020578041672706604, -0.007818370126187801, -0.034665826708078384, -0.011141757480800152, -0.017309851944446564, -0.01132732443511486, 0.026593897491693497, -0.04335026070475578, 0.03250395134091377, -0.0037345224991440773, 0.01972811296582222, -0.008570129983127117, -0.03472752124071121, 0.0046041239984333515, 0.04694681242108345, 0.0075459787622094154, 0.08384672552347183, -0.07540111988782883, 0.04089418053627014, 0.027111897245049477, -0.0658508762717247, -0.025217797607183456, 0.03405451774597168, -0.018103333190083504, 0.035406265407800674, 0.02649124152958393, -0.015190749429166317, 0.010169211775064468, -0.044558148831129074, 0.05349145829677582, -0.03179280832409859, 0.028462693095207214, 0.03945823758840561, -0.007611187640577555, 0.0002789986610878259, -0.028205813840031624, -0.06014556437730789, 0.024982741102576256, 0.039695341140031815, 0.06847558170557022, 0.0715877041220665, -0.002617875812575221, -0.05652502179145813, 0.05348381772637367, -0.0017243892652913928, 0.031850747764110565, -0.034020282328128815, -0.10985696315765381, 0.05827191844582558, 0.017537947744131088, -0.0382041335105896, -0.022519679740071297, -0.004037641920149326, 0.023751569911837578, -0.018156863749027252, -0.08748814463615417, -0.004813737701624632, 0.027811173349618912, -0.01621364615857601, -0.12922373414039612, -0.010334307327866554, -0.005886574741452932, 0.03673575818538666, -0.03190334513783455, -0.03383127599954605, 0.0562533438205719, -0.03726357966661453, -0.12995348870754242, -0.07955523580312729, 0.0570317804813385, -0.05245503783226013, -0.035431452095508575, -0.010801260359585285, -0.023330137133598328, 0.09840554744005203, 5.871393621798221e-34, -0.08506574481725693, 0.051647745072841644, -0.02002214640378952, -0.059780288487672806, -0.0015699623618274927, -0.0483725480735302, -0.027792202308773994, -0.08238013833761215, -0.01619708724319935, 0.05102568492293358, 0.004941737744957209, -0.05044064298272133, 0.060651347041130066, -0.0020028441213071346, 0.03383677825331688, 0.03155755251646042, 0.07130606472492218, -0.02376369573175907, -0.029989108443260193, 0.08872251957654953, 0.025840584188699722, 0.10470090061426163, -0.04624193534255028, 0.020658347755670547, -0.042999982833862305, 0.12263260781764984, 0.04806629940867424, -0.015610169619321823, 0.03396843373775482, 0.012753189541399479, -0.05741911381483078, -0.05747521296143532, 0.03279418870806694, 0.017760775983333588, -0.023557595908641815, 0.01658509112894535, 0.07032573968172073, -0.01138700358569622, -0.05926479771733284, -0.0330488421022892, 0.07109849154949188, 0.014106370508670807, -0.03170432150363922, -0.00727811036631465, 0.02281416393816471, -0.028253769502043724, 0.03586652874946594, 0.0749363973736763, 0.038908760994672775, -0.06082085147500038, -0.014452478848397732, -0.04113398864865303, -0.046144984662532806, 0.002672146074473858, -0.030741659924387932, 0.034152496606111526, 0.030793724581599236, -0.09272900223731995, -0.003357047913596034, 0.02537538856267929, -0.04462259262800217, -0.01441911980509758, -0.007631849031895399, 0.059305690228939056, 0.036875490099191666, -0.06651677191257477, -0.048474330455064774, 0.018605047836899757, -0.015949470922350883, 0.020714668557047844, 0.09278690814971924, 0.0104439752176404, -0.10982581973075867, -0.10936448723077774, 0.007860380224883556, 0.034738022834062576, -0.08182089775800705, 0.03860517963767052, -0.033432912081480026, 0.0587422139942646, -0.03375405818223953, -0.03073749877512455, 0.057732682675123215, 0.044672299176454544, -0.0438065230846405, -0.012879770249128342, 0.06696906685829163, 0.0018883842276409268, -0.006123237311840057, 0.03642501309514046, -0.029925907030701637, 0.06937430053949356, 0.007900314405560493, 0.011982563883066177, 0.02936621569097042, -3.5385365748652475e-08, -0.02051096223294735, -0.10887555032968521, -0.10237932950258255, -0.00883820652961731, 0.06532963365316391, -0.006915329024195671, -0.02551122196018696, -0.06725596636533737, -0.04711713641881943, -0.039950888603925705, 0.02073243260383606, 0.027918940410017967, -0.008905382826924324, 0.09451273828744888, -0.04367988929152489, 0.06830670684576035, -0.046452656388282776, -0.048660941421985626, -0.055894315242767334, 0.019366540014743805, 0.07126699388027191, 0.0375596359372139, 0.01442382950335741, 0.004460650961846113, -0.022234167903661728, 0.03989063575863838, -0.016815444454550743, 0.024933531880378723, 0.024599730968475342, 0.07620808482170105, 0.002701415680348873, 0.09343259036540985, -0.025328299030661583, -0.027571460232138634, -0.01647302694618702, -0.029945172369480133, -0.0018352654296904802, 0.04778631031513214, 0.0036028404720127583, -0.07250328361988068, -0.05023106187582016, -0.02958161011338234, -0.10195602476596832, 0.05111188068985939, 0.0674281194806099, -0.07014461606740952, -0.024069253355264664, -0.01483207568526268, -0.00852623675018549, 0.02493170276284218, -0.05748046934604645, -0.008685818873345852, 0.0668492466211319, 0.011369078420102596, 0.0643131285905838, -0.044460732489824295, -0.02211742103099823, 0.06616468727588654, -0.04474253952503204, 0.06759046763181686, 0.06399411708116531, 0.06331079453229904, 0.06118198111653328, -0.0013510453281924129]}}
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "0.1.0",
|
|
3
|
+
"description": "Training queries for generating benchmark centroids. Each benchmark has representative prompts that capture what that benchmark tests.",
|
|
4
|
+
"benchmarks": {
|
|
5
|
+
"MMLU": {
|
|
6
|
+
"description": "Academic knowledge across 57 subjects",
|
|
7
|
+
"queries": [
|
|
8
|
+
"What is the capital of France?",
|
|
9
|
+
"Explain the difference between mitosis and meiosis",
|
|
10
|
+
"What are the main causes of World War I?",
|
|
11
|
+
"Describe the process of photosynthesis",
|
|
12
|
+
"What is the significance of the Magna Carta?",
|
|
13
|
+
"Explain Newton's three laws of motion",
|
|
14
|
+
"What are the key principles of microeconomics?",
|
|
15
|
+
"Describe the structure of DNA",
|
|
16
|
+
"What is the theory of relativity?",
|
|
17
|
+
"Explain the concept of supply and demand",
|
|
18
|
+
"What are the branches of the US government?",
|
|
19
|
+
"Describe the water cycle in detail",
|
|
20
|
+
"What is the difference between classical and operant conditioning?",
|
|
21
|
+
"Explain how vaccines work",
|
|
22
|
+
"What are the key events of the French Revolution?"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"HellaSwag": {
|
|
26
|
+
"description": "Commonsense reasoning about everyday situations",
|
|
27
|
+
"queries": [
|
|
28
|
+
"What would happen if you left ice cream in the sun?",
|
|
29
|
+
"What's the most likely next step after boiling water for pasta?",
|
|
30
|
+
"If someone is crying, what might have happened?",
|
|
31
|
+
"What do people usually do after waking up in the morning?",
|
|
32
|
+
"What would happen if you forgot to water a plant for weeks?",
|
|
33
|
+
"After putting on shoes, what would you typically do next?",
|
|
34
|
+
"What's the natural consequence of staying up too late?",
|
|
35
|
+
"If a ball is thrown up, what happens next?",
|
|
36
|
+
"What do people usually do when they arrive at a restaurant?",
|
|
37
|
+
"What happens when you mix oil and water?",
|
|
38
|
+
"What would someone do if they heard a fire alarm?",
|
|
39
|
+
"What's the most likely reason someone would carry an umbrella?",
|
|
40
|
+
"If the lights suddenly go out, what might have happened?",
|
|
41
|
+
"What do you do when you arrive home with groceries?",
|
|
42
|
+
"What would happen if you tried to drive with no gas?"
|
|
43
|
+
]
|
|
44
|
+
},
|
|
45
|
+
"HumanEval": {
|
|
46
|
+
"description": "Code generation and programming tasks",
|
|
47
|
+
"queries": [
|
|
48
|
+
"Write a function that reverses a string",
|
|
49
|
+
"Implement a binary search algorithm",
|
|
50
|
+
"Create a function to check if a number is prime",
|
|
51
|
+
"Write code to merge two sorted arrays",
|
|
52
|
+
"Implement a stack using a linked list",
|
|
53
|
+
"Write a function to find the factorial of a number",
|
|
54
|
+
"Create a Python class for a binary tree",
|
|
55
|
+
"Write a function to detect palindromes",
|
|
56
|
+
"Implement quicksort in Python",
|
|
57
|
+
"Write code to find the longest common subsequence",
|
|
58
|
+
"Create a function that generates Fibonacci numbers",
|
|
59
|
+
"Write a regex to validate email addresses",
|
|
60
|
+
"Implement a hash table from scratch",
|
|
61
|
+
"Write a function to flatten a nested list",
|
|
62
|
+
"Create a decorator that caches function results"
|
|
63
|
+
]
|
|
64
|
+
},
|
|
65
|
+
"SWE-bench": {
|
|
66
|
+
"description": "Real-world software engineering and debugging",
|
|
67
|
+
"queries": [
|
|
68
|
+
"Fix this bug in the authentication middleware",
|
|
69
|
+
"Refactor this function to reduce complexity",
|
|
70
|
+
"There's a race condition in this concurrent code, how do I fix it?",
|
|
71
|
+
"Debug this memory leak in the Node.js application",
|
|
72
|
+
"Add error handling to this API endpoint",
|
|
73
|
+
"This database query is slow, help me optimize it",
|
|
74
|
+
"Write unit tests for this service class",
|
|
75
|
+
"Help me set up CI/CD for this project",
|
|
76
|
+
"Fix the failing build in the GitHub Actions pipeline",
|
|
77
|
+
"Review this pull request for potential issues",
|
|
78
|
+
"Migrate this codebase from JavaScript to TypeScript",
|
|
79
|
+
"Implement pagination for this REST API endpoint",
|
|
80
|
+
"Fix the CORS configuration for this Express server",
|
|
81
|
+
"Help me resolve these merge conflicts",
|
|
82
|
+
"Add logging and monitoring to this microservice"
|
|
83
|
+
]
|
|
84
|
+
},
|
|
85
|
+
"TruthfulQA": {
|
|
86
|
+
"description": "Truthful and accurate question answering",
|
|
87
|
+
"queries": [
|
|
88
|
+
"Is it true that we only use 10% of our brain?",
|
|
89
|
+
"Do goldfish really have a 3-second memory?",
|
|
90
|
+
"Is cracking your knuckles bad for you?",
|
|
91
|
+
"Does sugar make children hyperactive?",
|
|
92
|
+
"Is the Great Wall of China visible from space?",
|
|
93
|
+
"Do humans have five senses?",
|
|
94
|
+
"Is lightning really attracted to metal?",
|
|
95
|
+
"Can you catch a cold from being cold?",
|
|
96
|
+
"Is it dangerous to wake a sleepwalker?",
|
|
97
|
+
"Do we really swallow spiders in our sleep?",
|
|
98
|
+
"Is breakfast really the most important meal of the day?",
|
|
99
|
+
"Do vaccines cause autism?",
|
|
100
|
+
"Does reading in dim light damage your eyes?",
|
|
101
|
+
"Is coffee stunting growth a myth?",
|
|
102
|
+
"Do bulls really hate the color red?"
|
|
103
|
+
]
|
|
104
|
+
},
|
|
105
|
+
"ARC": {
|
|
106
|
+
"description": "Science exam questions requiring reasoning",
|
|
107
|
+
"queries": [
|
|
108
|
+
"Why does the moon appear to change shape?",
|
|
109
|
+
"What causes seasons on Earth?",
|
|
110
|
+
"How does a compass work?",
|
|
111
|
+
"Why do objects float or sink in water?",
|
|
112
|
+
"What makes a circuit complete?",
|
|
113
|
+
"How do fossils form?",
|
|
114
|
+
"Why does sound travel faster in water than air?",
|
|
115
|
+
"What is the difference between a physical and chemical change?",
|
|
116
|
+
"How does the greenhouse effect work?",
|
|
117
|
+
"Why do we see lightning before hearing thunder?",
|
|
118
|
+
"What determines blood type?",
|
|
119
|
+
"How do magnets attract and repel?",
|
|
120
|
+
"Why does metal feel cold to the touch?",
|
|
121
|
+
"What causes tides in the ocean?",
|
|
122
|
+
"How does natural selection lead to evolution?"
|
|
123
|
+
]
|
|
124
|
+
},
|
|
125
|
+
"GSM8K": {
|
|
126
|
+
"description": "Grade school math word problems",
|
|
127
|
+
"queries": [
|
|
128
|
+
"If a train travels 60 mph for 2.5 hours, how far does it go?",
|
|
129
|
+
"A store has a 25% off sale. How much is a $80 item?",
|
|
130
|
+
"If 3 workers can build a wall in 12 hours, how long for 6 workers?",
|
|
131
|
+
"Calculate the area of a circle with radius 7cm",
|
|
132
|
+
"A recipe calls for 2/3 cup of flour. How much for a triple batch?",
|
|
133
|
+
"If you invest $1000 at 5% annual interest, what do you have after 3 years?",
|
|
134
|
+
"A car uses 8 gallons for 240 miles. What's the fuel efficiency?",
|
|
135
|
+
"Solve: 3x + 7 = 22",
|
|
136
|
+
"What's 15% tip on a $67 restaurant bill?",
|
|
137
|
+
"If a box has 24 chocolates and you eat 1/3, how many are left?",
|
|
138
|
+
"A rectangular garden is 12m by 8m. What's the perimeter?",
|
|
139
|
+
"How many ways can you arrange 4 books on a shelf?",
|
|
140
|
+
"If something costs $12.50 and you pay with $20, what's the change?",
|
|
141
|
+
"A train leaves at 9:15 AM and arrives at 11:45 AM. How long is the trip?",
|
|
142
|
+
"Calculate the average of 85, 92, 78, 96, and 88"
|
|
143
|
+
]
|
|
144
|
+
},
|
|
145
|
+
"DROP": {
|
|
146
|
+
"description": "Reading comprehension requiring arithmetic and reasoning",
|
|
147
|
+
"queries": [
|
|
148
|
+
"Based on this paragraph, how many more touchdowns did team A score than team B?",
|
|
149
|
+
"According to the data, what percentage increase was there between 2020 and 2023?",
|
|
150
|
+
"Read this passage and calculate the total revenue mentioned",
|
|
151
|
+
"From the text, which year had the highest population growth?",
|
|
152
|
+
"Based on these statistics, what's the difference between the two groups?",
|
|
153
|
+
"Calculate the average score mentioned in this report",
|
|
154
|
+
"According to the passage, how many years passed between the two events?",
|
|
155
|
+
"What fraction of the total budget was allocated to education?",
|
|
156
|
+
"From the article, compute the ratio of imports to exports",
|
|
157
|
+
"Based on the timeline, how many months elapsed between the first and last event?",
|
|
158
|
+
"Sum up the costs mentioned in this budget proposal",
|
|
159
|
+
"What's the net change described in this financial summary?",
|
|
160
|
+
"According to this data table, which category had the largest share?",
|
|
161
|
+
"Calculate the compound growth rate from the figures in this passage",
|
|
162
|
+
"From the text, what's the difference between the highest and lowest values?"
|
|
163
|
+
]
|
|
164
|
+
},
|
|
165
|
+
"SuperGLUE": {
|
|
166
|
+
"description": "Natural language understanding tasks",
|
|
167
|
+
"queries": [
|
|
168
|
+
"Does the second sentence follow logically from the first?",
|
|
169
|
+
"Summarize this article in three bullet points",
|
|
170
|
+
"What is the main argument of this passage?",
|
|
171
|
+
"Is the sentiment of this review positive or negative?",
|
|
172
|
+
"Paraphrase this paragraph in simpler language",
|
|
173
|
+
"What word best fills the blank in this sentence?",
|
|
174
|
+
"Are these two questions asking the same thing?",
|
|
175
|
+
"Identify the cause and effect in this statement",
|
|
176
|
+
"Classify this text as formal or informal",
|
|
177
|
+
"What is the pronoun 'they' referring to in this sentence?",
|
|
178
|
+
"Extract the key entities from this text",
|
|
179
|
+
"Is this statement a fact or an opinion?",
|
|
180
|
+
"What is the tone of this email?",
|
|
181
|
+
"Determine whether sentence A contradicts sentence B",
|
|
182
|
+
"What is the implicit meaning of this phrase?"
|
|
183
|
+
]
|
|
184
|
+
},
|
|
185
|
+
"Chatbot Arena (LMSys)": {
|
|
186
|
+
"description": "General conversational quality and helpfulness",
|
|
187
|
+
"queries": [
|
|
188
|
+
"Tell me a joke",
|
|
189
|
+
"What's a good recipe for dinner tonight?",
|
|
190
|
+
"Help me plan a weekend trip to Portland",
|
|
191
|
+
"What should I watch on Netflix?",
|
|
192
|
+
"Can you help me write a birthday message?",
|
|
193
|
+
"What's the weather like in spring?",
|
|
194
|
+
"Suggest some good books to read",
|
|
195
|
+
"Help me come up with a team name",
|
|
196
|
+
"What are some fun activities for a rainy day?",
|
|
197
|
+
"Can you explain this meme to me?",
|
|
198
|
+
"Help me write a polite email declining an invitation",
|
|
199
|
+
"What's the difference between a latte and a cappuccino?",
|
|
200
|
+
"Give me some tips for job interviews",
|
|
201
|
+
"What's a good workout routine for beginners?",
|
|
202
|
+
"Help me brainstorm gift ideas for my partner"
|
|
203
|
+
]
|
|
204
|
+
},
|
|
205
|
+
"MT-Bench": {
|
|
206
|
+
"description": "Multi-turn conversation and instruction following",
|
|
207
|
+
"queries": [
|
|
208
|
+
"Write a short story about a detective, then change the ending to be happy",
|
|
209
|
+
"Explain quantum computing, then simplify it for a 10-year-old",
|
|
210
|
+
"Draft a business email, then make it more formal",
|
|
211
|
+
"Describe the pros and cons of remote work, then summarize in a table",
|
|
212
|
+
"Write a poem about nature, then rewrite it in haiku form",
|
|
213
|
+
"Explain blockchain to me, now explain how it's used in healthcare",
|
|
214
|
+
"Create a meal plan, then adjust it for someone with allergies",
|
|
215
|
+
"Write a product review, then rewrite it from the opposite perspective",
|
|
216
|
+
"Summarize this concept, now give me three follow-up questions",
|
|
217
|
+
"Help me outline an essay, then expand the introduction",
|
|
218
|
+
"Describe a travel itinerary, then optimize it for budget",
|
|
219
|
+
"Write a dialogue between two characters, then add a third character",
|
|
220
|
+
"Explain a scientific concept, then create an analogy for it",
|
|
221
|
+
"Draft a social media post, then adapt it for different platforms",
|
|
222
|
+
"Create a workout plan, then modify it for someone with a knee injury"
|
|
223
|
+
]
|
|
224
|
+
},
|
|
225
|
+
"LiveBench": {
|
|
226
|
+
"description": "Fresh, contamination-resistant evaluation tasks",
|
|
227
|
+
"queries": [
|
|
228
|
+
"Analyze this recent event and its implications",
|
|
229
|
+
"Solve this novel logic puzzle that hasn't been seen before",
|
|
230
|
+
"Write code for this unique algorithmic challenge",
|
|
231
|
+
"Evaluate this argument for logical fallacies",
|
|
232
|
+
"Generate a creative solution to this new problem",
|
|
233
|
+
"Parse this complex data format and extract information",
|
|
234
|
+
"Debug this code snippet with an unusual error pattern",
|
|
235
|
+
"Synthesize information from these multiple sources",
|
|
236
|
+
"Create an original analogy for this concept",
|
|
237
|
+
"Solve this multi-step reasoning problem",
|
|
238
|
+
"Analyze the structure of this unfamiliar text",
|
|
239
|
+
"Write a function to handle this edge case",
|
|
240
|
+
"Evaluate the validity of this novel claim",
|
|
241
|
+
"Generate a test case that would break this code",
|
|
242
|
+
"Reason about this counterfactual scenario"
|
|
243
|
+
]
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Centroid generator -- creates benchmark centroids from training queries.
|
|
3
|
+
*
|
|
4
|
+
* Centroids are the average embedding of all training queries for a benchmark.
|
|
5
|
+
* They are used by the EmbeddingClassifier to measure how similar a user's
|
|
6
|
+
* prompt is to each benchmark category.
|
|
7
|
+
*
|
|
8
|
+
* Centroids are regenerated when the embedding model changes, because different
|
|
9
|
+
* models produce different vector spaces.
|
|
10
|
+
*/
|
|
11
|
+
import { BaseEmbeddingProvider } from '../embeddings/base.js';
|
|
12
|
+
import type { CentroidsJson } from '../types.js';
|
|
13
|
+
/** Path to bundled training queries. */
|
|
14
|
+
export declare const TRAINING_QUERIES_PATH: string;
|
|
15
|
+
/**
|
|
16
|
+
* Generates and manages benchmark centroids.
|
|
17
|
+
*
|
|
18
|
+
* Centroids are the average embedding vector of representative queries
|
|
19
|
+
* for each benchmark. When a user sends a prompt, we compute cosine
|
|
20
|
+
* similarity between their prompt's embedding and each centroid to
|
|
21
|
+
* determine what kind of task they're asking about.
|
|
22
|
+
*/
|
|
23
|
+
export declare class CentroidGenerator {
|
|
24
|
+
private _provider;
|
|
25
|
+
constructor(embeddingProvider: BaseEmbeddingProvider);
|
|
26
|
+
/**
|
|
27
|
+
* Generate centroids from training queries.
|
|
28
|
+
*
|
|
29
|
+
* @param trainingQueries - Dict of benchmark_name -> list of queries.
|
|
30
|
+
* If undefined, uses bundled default queries.
|
|
31
|
+
* @returns Dict of benchmark_name -> centroid vector (number array).
|
|
32
|
+
*/
|
|
33
|
+
generate(trainingQueries?: Record<string, string[]>): Record<string, number[]>;
|
|
34
|
+
/**
|
|
35
|
+
* Async version of `generate` -- routes through the provider's async path
|
|
36
|
+
* so it works with async-only providers like LocalEmbeddingProvider.
|
|
37
|
+
* Sync providers work too via the base class's default async fallback.
|
|
38
|
+
*/
|
|
39
|
+
generateAsync(trainingQueries?: Record<string, string[]>): Promise<Record<string, number[]>>;
|
|
40
|
+
/**
|
|
41
|
+
* Generate a single centroid for a custom benchmark.
|
|
42
|
+
*
|
|
43
|
+
* @param benchmarkName - Name of the benchmark.
|
|
44
|
+
* @param queries - Representative queries for this benchmark.
|
|
45
|
+
* @returns Centroid vector (number array).
|
|
46
|
+
*/
|
|
47
|
+
generateFromCustom(benchmarkName: string, queries: string[]): number[];
|
|
48
|
+
/**
|
|
49
|
+
* Async version of `generateFromCustom`. Works with any provider
|
|
50
|
+
* (sync via default fallback, async via its native async path).
|
|
51
|
+
*/
|
|
52
|
+
generateFromCustomAsync(benchmarkName: string, queries: string[]): Promise<number[]>;
|
|
53
|
+
/** Save centroids to a JSON file. */
|
|
54
|
+
save(centroids: Record<string, number[]>, path: string): void;
|
|
55
|
+
/** Load centroids from a JSON file. */
|
|
56
|
+
static load(path: string): {
|
|
57
|
+
centroids: Record<string, number[]>;
|
|
58
|
+
metadata: CentroidsJson['metadata'];
|
|
59
|
+
};
|
|
60
|
+
/** Load bundled training queries. */
|
|
61
|
+
private _loadDefaultQueries;
|
|
62
|
+
}
|
|
63
|
+
//# sourceMappingURL=generator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../src/centroids/generator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAMH,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,aAAa,EAAuB,MAAM,aAAa,CAAC;AAKtE,wCAAwC;AACxC,eAAO,MAAM,qBAAqB,QAAmD,CAAC;AAEtF;;;;;;;GAOG;AACH,qBAAa,iBAAiB;IAC5B,OAAO,CAAC,SAAS,CAAwB;gBAE7B,iBAAiB,EAAE,qBAAqB;IAIpD;;;;;;OAMG;IACH,QAAQ,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC;IAmB9E;;;;OAIG;IACG,aAAa,CAAC,eAAe,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,GAAG,OAAO,CAAC,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;IAgBlG;;;;;;OAMG;IACH,kBAAkB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,MAAM,EAAE;IAKtE;;;OAGG;IACG,uBAAuB,CAAC,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAK1F,qCAAqC;IACrC,IAAI,CAAC,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,GAAG,IAAI;IAe7D,uCAAuC;IACvC,MAAM,CAAC,IAAI,CAAC,IAAI,EAAE,MAAM,GAAG;QAAE,SAAS,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAAC,QAAQ,EAAE,aAAa,CAAC,UAAU,CAAC,CAAA;KAAE;IAUvG,qCAAqC;IACrC,OAAO,CAAC,mBAAmB;CAU5B"}
|