kiln-ai 0.11.1__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (80) hide show
  1. kiln_ai/adapters/__init__.py +4 -0
  2. kiln_ai/adapters/adapter_registry.py +163 -39
  3. kiln_ai/adapters/data_gen/data_gen_task.py +18 -0
  4. kiln_ai/adapters/eval/__init__.py +28 -0
  5. kiln_ai/adapters/eval/base_eval.py +164 -0
  6. kiln_ai/adapters/eval/eval_runner.py +270 -0
  7. kiln_ai/adapters/eval/g_eval.py +368 -0
  8. kiln_ai/adapters/eval/registry.py +16 -0
  9. kiln_ai/adapters/eval/test_base_eval.py +325 -0
  10. kiln_ai/adapters/eval/test_eval_runner.py +641 -0
  11. kiln_ai/adapters/eval/test_g_eval.py +498 -0
  12. kiln_ai/adapters/eval/test_g_eval_data.py +4 -0
  13. kiln_ai/adapters/fine_tune/base_finetune.py +16 -2
  14. kiln_ai/adapters/fine_tune/finetune_registry.py +2 -0
  15. kiln_ai/adapters/fine_tune/test_dataset_formatter.py +4 -1
  16. kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +1 -1
  17. kiln_ai/adapters/fine_tune/test_openai_finetune.py +1 -1
  18. kiln_ai/adapters/fine_tune/test_together_finetune.py +531 -0
  19. kiln_ai/adapters/fine_tune/together_finetune.py +325 -0
  20. kiln_ai/adapters/ml_model_list.py +758 -163
  21. kiln_ai/adapters/model_adapters/__init__.py +2 -4
  22. kiln_ai/adapters/model_adapters/base_adapter.py +61 -43
  23. kiln_ai/adapters/model_adapters/litellm_adapter.py +391 -0
  24. kiln_ai/adapters/model_adapters/litellm_config.py +13 -0
  25. kiln_ai/adapters/model_adapters/test_base_adapter.py +22 -13
  26. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +407 -0
  27. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +41 -19
  28. kiln_ai/adapters/model_adapters/test_structured_output.py +59 -35
  29. kiln_ai/adapters/ollama_tools.py +3 -3
  30. kiln_ai/adapters/parsers/r1_parser.py +19 -14
  31. kiln_ai/adapters/parsers/test_r1_parser.py +17 -5
  32. kiln_ai/adapters/prompt_builders.py +80 -42
  33. kiln_ai/adapters/provider_tools.py +50 -58
  34. kiln_ai/adapters/repair/repair_task.py +9 -21
  35. kiln_ai/adapters/repair/test_repair_task.py +6 -6
  36. kiln_ai/adapters/run_output.py +3 -0
  37. kiln_ai/adapters/test_adapter_registry.py +26 -29
  38. kiln_ai/adapters/test_generate_docs.py +4 -4
  39. kiln_ai/adapters/test_ollama_tools.py +0 -1
  40. kiln_ai/adapters/test_prompt_adaptors.py +47 -33
  41. kiln_ai/adapters/test_prompt_builders.py +91 -31
  42. kiln_ai/adapters/test_provider_tools.py +26 -81
  43. kiln_ai/datamodel/__init__.py +50 -952
  44. kiln_ai/datamodel/basemodel.py +2 -0
  45. kiln_ai/datamodel/datamodel_enums.py +60 -0
  46. kiln_ai/datamodel/dataset_filters.py +114 -0
  47. kiln_ai/datamodel/dataset_split.py +170 -0
  48. kiln_ai/datamodel/eval.py +298 -0
  49. kiln_ai/datamodel/finetune.py +105 -0
  50. kiln_ai/datamodel/json_schema.py +7 -1
  51. kiln_ai/datamodel/project.py +23 -0
  52. kiln_ai/datamodel/prompt.py +37 -0
  53. kiln_ai/datamodel/prompt_id.py +83 -0
  54. kiln_ai/datamodel/strict_mode.py +24 -0
  55. kiln_ai/datamodel/task.py +181 -0
  56. kiln_ai/datamodel/task_output.py +328 -0
  57. kiln_ai/datamodel/task_run.py +164 -0
  58. kiln_ai/datamodel/test_basemodel.py +19 -11
  59. kiln_ai/datamodel/test_dataset_filters.py +71 -0
  60. kiln_ai/datamodel/test_dataset_split.py +32 -8
  61. kiln_ai/datamodel/test_datasource.py +22 -2
  62. kiln_ai/datamodel/test_eval_model.py +635 -0
  63. kiln_ai/datamodel/test_example_models.py +9 -13
  64. kiln_ai/datamodel/test_json_schema.py +23 -0
  65. kiln_ai/datamodel/test_models.py +2 -2
  66. kiln_ai/datamodel/test_prompt_id.py +129 -0
  67. kiln_ai/datamodel/test_task.py +159 -0
  68. kiln_ai/utils/config.py +43 -1
  69. kiln_ai/utils/dataset_import.py +232 -0
  70. kiln_ai/utils/test_dataset_import.py +596 -0
  71. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/METADATA +86 -6
  72. kiln_ai-0.13.0.dist-info/RECORD +103 -0
  73. kiln_ai/adapters/model_adapters/langchain_adapters.py +0 -302
  74. kiln_ai/adapters/model_adapters/openai_compatible_config.py +0 -11
  75. kiln_ai/adapters/model_adapters/openai_model_adapter.py +0 -246
  76. kiln_ai/adapters/model_adapters/test_langchain_adapter.py +0 -350
  77. kiln_ai/adapters/model_adapters/test_openai_model_adapter.py +0 -225
  78. kiln_ai-0.11.1.dist-info/RECORD +0 -76
  79. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/WHEEL +0 -0
  80. {kiln_ai-0.11.1.dist-info → kiln_ai-0.13.0.dist-info}/licenses/LICENSE.txt +0 -0
@@ -0,0 +1,498 @@
1
+ import math
2
+ import pickle
3
+
4
+ import pytest
5
+
6
+ from kiln_ai.adapters.eval.g_eval import TOKEN_TO_SCORE_MAP, GEval, GEvalTask
7
+ from kiln_ai.adapters.eval.test_g_eval_data import serialized_run_output
8
+ from kiln_ai.adapters.ml_model_list import built_in_models
9
+ from kiln_ai.adapters.model_adapters.base_adapter import RunOutput
10
+ from kiln_ai.adapters.test_prompt_adaptors import get_all_models_and_providers
11
+ from kiln_ai.datamodel import (
12
+ DataSource,
13
+ DataSourceType,
14
+ Project,
15
+ Task,
16
+ TaskOutput,
17
+ TaskOutputRatingType,
18
+ TaskRequirement,
19
+ TaskRun,
20
+ )
21
+ from kiln_ai.datamodel.eval import Eval, EvalConfig, EvalConfigType, EvalOutputScore
22
+ from kiln_ai.datamodel.task import RunConfig
23
+
24
+
25
+ @pytest.fixture
26
+ def test_task(tmp_path):
27
+ project = Project(name="Test Project", path=tmp_path / "project.kiln")
28
+ project.save_to_file()
29
+
30
+ task = Task(
31
+ name="Joke Generator",
32
+ instruction="Generate a joke, given a topic",
33
+ parent=project,
34
+ requirements=[
35
+ TaskRequirement(
36
+ name="Topic alignment",
37
+ instruction="Rate how aligned the joke is to the provided topic",
38
+ type=TaskOutputRatingType.five_star,
39
+ ),
40
+ TaskRequirement(
41
+ name="Appropriateness",
42
+ instruction="Check if the content is appropriate for all audiences",
43
+ type=TaskOutputRatingType.pass_fail,
44
+ ),
45
+ ],
46
+ )
47
+ task.save_to_file()
48
+ return task
49
+
50
+
51
+ @pytest.fixture
52
+ def test_eval_config(test_task):
53
+ eval = Eval(
54
+ name="Joke Quality Eval",
55
+ parent=test_task,
56
+ eval_set_filter_id="tag::tag1",
57
+ eval_configs_filter_id="tag::tag2",
58
+ output_scores=[
59
+ EvalOutputScore(
60
+ name="appropriateness",
61
+ type=TaskOutputRatingType.pass_fail,
62
+ ),
63
+ EvalOutputScore(
64
+ name="topic_alignment",
65
+ type=TaskOutputRatingType.five_star,
66
+ ),
67
+ EvalOutputScore(
68
+ name="overall_rating",
69
+ type=TaskOutputRatingType.five_star,
70
+ ),
71
+ ],
72
+ )
73
+ eval.save_to_file()
74
+
75
+ config = EvalConfig(
76
+ name="Llama 8b Joke Generator Eval",
77
+ parent=eval,
78
+ config_type=EvalConfigType.g_eval,
79
+ model_name="gpt_4o_mini",
80
+ model_provider="openai",
81
+ properties={
82
+ "eval_steps": [
83
+ "Is the joke funny?",
84
+ "Is the content appropriate for all audiences?",
85
+ "Is the joke culturally sensitive?",
86
+ "Is the joke politically correct?",
87
+ "Is the joke aligned with the provided topic?",
88
+ ]
89
+ },
90
+ )
91
+ config.save_to_file()
92
+ return config
93
+
94
+
95
+ @pytest.fixture
96
+ def test_run_config(test_task):
97
+ return RunConfig(
98
+ model_name="llama_3_1_8b",
99
+ model_provider_name="groq",
100
+ task=test_task,
101
+ prompt_id="simple_prompt_builder",
102
+ )
103
+
104
+
105
+ @pytest.fixture
106
+ def test_task_run(test_task):
107
+ task_run = TaskRun(
108
+ parent=test_task,
109
+ input="Tell me a chicken joke",
110
+ input_source=DataSource(
111
+ type=DataSourceType.human, properties={"created_by": "test_user"}
112
+ ),
113
+ output=TaskOutput(
114
+ output="Why did the chicken cross the road? To get to the other side!",
115
+ source=DataSource(
116
+ type=DataSourceType.synthetic,
117
+ properties={
118
+ "model_name": "llama_3_1_8b",
119
+ "model_provider": "groq",
120
+ "adapter_name": "langchain",
121
+ },
122
+ ),
123
+ ),
124
+ )
125
+ task_run.save_to_file()
126
+ return task_run
127
+
128
+
129
+ async def run_g_eval_test(
130
+ test_task,
131
+ test_eval_config,
132
+ test_task_run,
133
+ config_type,
134
+ test_run_config,
135
+ model_name: str | None = None,
136
+ provider_name: str | None = None,
137
+ ):
138
+ # Create G-Eval instance
139
+ test_eval_config.config_type = config_type
140
+ if model_name is not None and provider_name is not None:
141
+ test_eval_config.model_name = model_name
142
+ test_eval_config.model_provider = provider_name
143
+ g_eval = GEval(test_eval_config, test_run_config)
144
+
145
+ # Run the evaluation
146
+ eval_result, intermediate_outputs = await g_eval.run_eval(test_task_run)
147
+
148
+ # Should have 1 intermediate output (thinking or chain of thought)
149
+ assert len(intermediate_outputs) == 1
150
+
151
+ assert "topic_alignment" in eval_result
152
+ topic_alignment = eval_result["topic_alignment"]
153
+ assert isinstance(topic_alignment, float)
154
+ assert 1 <= topic_alignment <= 5
155
+
156
+ assert "appropriateness" in eval_result
157
+ appropriateness = eval_result["appropriateness"]
158
+ assert isinstance(appropriateness, float)
159
+ assert appropriateness >= 0.0 and appropriateness <= 1.0
160
+
161
+ assert "overall_rating" in eval_result
162
+ overall = eval_result["overall_rating"]
163
+ assert isinstance(overall, float)
164
+ assert 1.0 <= overall <= 5.0
165
+
166
+
167
+ @pytest.mark.parametrize(
168
+ "config_type", [EvalConfigType.g_eval, EvalConfigType.llm_as_judge]
169
+ )
170
+ @pytest.mark.paid
171
+ async def test_run_g_eval_paid(
172
+ test_task, test_eval_config, test_task_run, config_type, test_run_config
173
+ ):
174
+ await run_g_eval_test(
175
+ test_task, test_eval_config, test_task_run, config_type, test_run_config
176
+ )
177
+
178
+
179
+ @pytest.mark.parametrize(
180
+ "config_type", [EvalConfigType.g_eval, EvalConfigType.llm_as_judge]
181
+ )
182
+ @pytest.mark.paid
183
+ async def test_run_g_eval_e2e(
184
+ test_task, test_eval_config, test_task_run, config_type, test_run_config
185
+ ):
186
+ # Create G-Eval instance
187
+ test_eval_config.config_type = config_type
188
+ g_eval = GEval(test_eval_config, test_run_config)
189
+
190
+ # Run the evaluation
191
+ task_run, scores, intermediate_outputs = await g_eval.run_task_and_eval("chickens")
192
+
193
+ # Verify the evaluation results
194
+ assert isinstance(scores, dict)
195
+
196
+ # Should have 1 intermediate output (thinking or chain of thought)
197
+ assert len(intermediate_outputs) == 1
198
+
199
+ assert "topic_alignment" in scores
200
+ topic_alignment = scores["topic_alignment"]
201
+ assert isinstance(topic_alignment, float)
202
+ assert 1 <= topic_alignment <= 5
203
+
204
+ assert "appropriateness" in scores
205
+ appropriateness = scores["appropriateness"]
206
+ assert isinstance(appropriateness, float)
207
+ assert appropriateness >= 0.0 and appropriateness <= 1.0
208
+
209
+ assert "overall_rating" in scores
210
+ overall = scores["overall_rating"]
211
+ assert isinstance(overall, float)
212
+ assert 1.0 <= overall <= 5.0
213
+
214
+
215
+ async def test_g_eval_logprobs(
216
+ test_task, test_eval_config, test_task_run, test_run_config
217
+ ):
218
+ # Create G-Eval instance
219
+ run_output = pickle.loads(serialized_run_output)
220
+ assert isinstance(run_output, RunOutput)
221
+ assert run_output.output_logprobs is not None
222
+ g_eval = GEval(test_eval_config, test_run_config)
223
+ result = g_eval.build_g_eval_score(run_output)
224
+
225
+ assert "overall_rating" in result
226
+ overall = result["overall_rating"]
227
+ assert isinstance(overall, float)
228
+ assert overall >= 1.0 and overall <= 5.0
229
+ # Confirm weighted value, and confirm the approx isn't why it's passing
230
+ assert pytest.approx(overall) == 3.99752802363598
231
+ assert pytest.approx(overall) != 4.0
232
+
233
+ # Check topic_alignment
234
+ assert "topic_alignment" in result
235
+ topic_alignment = result["topic_alignment"]
236
+ assert isinstance(topic_alignment, float)
237
+ assert topic_alignment >= 1.0 and topic_alignment <= 5.0
238
+ # Confirm weighted value, and confirm the approx isn't why it's passing
239
+ assert pytest.approx(topic_alignment) == 4.999983298485167
240
+ assert pytest.approx(topic_alignment) != 5.0
241
+
242
+ # Check appropriateness
243
+ assert "appropriateness" in result
244
+ appropriateness = result["appropriateness"]
245
+ assert isinstance(appropriateness, float)
246
+ assert appropriateness >= 0.0 and appropriateness <= 1.0
247
+ # Fail chance so low, we need to specify the precision
248
+ assert pytest.approx(appropriateness, 1e-12) == 0.9999999999572222
249
+ assert pytest.approx(appropriateness, 1e-12) != 1.0
250
+
251
+
252
+ async def test_llm_as_judge(
253
+ test_task, test_eval_config, test_task_run, test_run_config
254
+ ):
255
+ # Create G-Eval instance, set to LLM as Judge
256
+ run_output = pickle.loads(serialized_run_output)
257
+ test_eval_config.config_type = EvalConfigType.llm_as_judge
258
+ g_eval = GEval(test_eval_config, test_run_config)
259
+
260
+ assert isinstance(run_output, RunOutput)
261
+ assert run_output.output_logprobs is not None
262
+ result = g_eval.build_llm_as_judge_score(run_output)
263
+
264
+ # unlike g_eval, llm_as_judge returns the main token converted to our float scores
265
+ assert result["overall_rating"] == 4.0
266
+ assert result["topic_alignment"] == 5.0
267
+ assert result["appropriateness"] == 1.0
268
+
269
+
270
+ def test_token_case():
271
+ # we assume the token is lower case in the logprobs token fuzzy matching code. This will catch if we ever add a token that's not.
272
+ for token in TOKEN_TO_SCORE_MAP.keys():
273
+ assert token.lower() == token
274
+
275
+
276
+ def test_metric_offsets_and_search_ranges(
277
+ test_eval_config, test_run_config, test_task_run
278
+ ):
279
+ g_eval = GEval(test_eval_config, test_run_config)
280
+ raw_output = (
281
+ '{"topic_alignment": 4, "appropriateness": "pass", "overall_rating": 5}'
282
+ )
283
+ metrics = ["topic_alignment", "appropriateness", "overall_rating"]
284
+
285
+ offsets = g_eval.metric_offsets(raw_output, metrics)
286
+
287
+ assert len(offsets) == 3
288
+ assert offsets["topic_alignment"] == 1 # Position after opening {
289
+ assert offsets["appropriateness"] == 23 # Position after "appropriateness":
290
+ assert offsets["overall_rating"] == 50 # Position after "overall_rating":
291
+
292
+ # Test search ranges
293
+
294
+ # Test first metric
295
+ start, end = g_eval.token_search_range(raw_output, "topic_alignment", offsets)
296
+ assert start == 16 # Position after "topic_alignment"
297
+ assert end == 23 # Position after "appropriateness"
298
+
299
+ # Test middle metric
300
+ start, end = g_eval.token_search_range(raw_output, "appropriateness", offsets)
301
+ assert start == 38 # Position after "appropriateness"
302
+ assert end == 50 # Position after "overall_rating"
303
+
304
+ # Test last metric
305
+ start, end = g_eval.token_search_range(raw_output, "overall_rating", offsets)
306
+ assert start == 64 # Position after "overall_rating"
307
+ assert end == len(raw_output) # end of string
308
+
309
+
310
+ def test_metric_offsets_invalid(test_eval_config, test_run_config):
311
+ g_eval = GEval(test_eval_config, test_run_config)
312
+ raw_output = '{"topic_alignment": 4, "topic_alignment": 5}'
313
+ metrics = ["topic_alignment"]
314
+
315
+ with pytest.raises(ValueError, match="should appear exactly once"):
316
+ g_eval.metric_offsets(raw_output, metrics)
317
+
318
+ raw_output = '{"something_else": 4}'
319
+ with pytest.raises(ValueError, match="should appear exactly once"):
320
+ g_eval.metric_offsets(raw_output, metrics)
321
+
322
+
323
+ @pytest.mark.parametrize(
324
+ "token_string,expected_score",
325
+ [
326
+ # Direct matches
327
+ ("1", 1.0),
328
+ ("5", 5.0),
329
+ ("pass", 1.0),
330
+ ("fail", 0.0),
331
+ ("critical", -1.0),
332
+ # Variations with quotes and spacing
333
+ ('"1"', 1.0),
334
+ (" pass ", 1.0),
335
+ ("PASS", 1.0),
336
+ ('"FAIL"', 0.0),
337
+ ('"pAss"', 1.0),
338
+ ("1.0", 1.0),
339
+ ("2.0", 2.0),
340
+ ("3.0", 3.0),
341
+ ("4.0", 4.0),
342
+ ("5.0", 5.0),
343
+ ("5.0000", 5.0),
344
+ # Invalid tokens
345
+ ("invalid", None),
346
+ ("6", None),
347
+ ("0", None),
348
+ ("", None),
349
+ ("4.9999999", None),
350
+ ],
351
+ )
352
+ def test_score_from_token_string(
353
+ test_eval_config, token_string, expected_score, test_run_config
354
+ ):
355
+ g_eval = GEval(test_eval_config, test_run_config)
356
+ assert g_eval.score_from_token_string(token_string) == expected_score
357
+
358
+
359
+ def test_raw_output_from_logprobs(test_eval_config, test_run_config):
360
+ g_eval = GEval(test_eval_config, test_run_config)
361
+
362
+ # Create a minimal RunOutput with some logprobs
363
+ class MockLogprob:
364
+ def __init__(self, token):
365
+ self.token = token
366
+
367
+ class MockLogprobs:
368
+ def __init__(self):
369
+ self.content = [
370
+ MockLogprob('{"'),
371
+ MockLogprob("score"),
372
+ MockLogprob('": '),
373
+ MockLogprob("5"),
374
+ MockLogprob("}"),
375
+ ]
376
+
377
+ run_output = RunOutput(
378
+ output={"score": 5},
379
+ output_logprobs=MockLogprobs(),
380
+ intermediate_outputs={},
381
+ )
382
+
383
+ raw = g_eval.raw_output_from_logprobs(run_output)
384
+ assert raw == '{"score": 5}'
385
+
386
+
387
+ def test_rating_token_to_score(test_eval_config, test_run_config):
388
+ g_eval = GEval(test_eval_config, test_run_config)
389
+
390
+ class MockTopLogprob:
391
+ def __init__(self, token, logprob):
392
+ self.token = token
393
+ self.logprob = logprob
394
+
395
+ class MockTokenLogprob:
396
+ def __init__(self, token, top_logprobs):
397
+ self.token = token
398
+ self.top_logprobs = [MockTopLogprob(t, lp) for t, lp in top_logprobs]
399
+
400
+ # Test single token case
401
+ token_logprob = MockTokenLogprob("5", [("5", 0.0)]) # log(1) = 0
402
+ score = g_eval.rating_token_to_score(token_logprob)
403
+ assert score == 5.0
404
+
405
+ # Test weighted average case
406
+ token_logprob = MockTokenLogprob(
407
+ "4",
408
+ [
409
+ ("4", math.log(0.6)), # 60% probability
410
+ ("5", math.log(0.4)), # 40% probability
411
+ ],
412
+ )
413
+ score = g_eval.rating_token_to_score(token_logprob)
414
+ assert pytest.approx(score) == 4.4 # (4 * 0.6 + 5 * 0.4)
415
+
416
+ # Test invalid token
417
+ token_logprob = MockTokenLogprob(":", [(":", 0.0)])
418
+ assert g_eval.rating_token_to_score(token_logprob) is None
419
+
420
+ # Test no valid scoring tokens
421
+ token_logprob = MockTokenLogprob("5", [])
422
+ with pytest.raises(RuntimeError, match="No valid scoring tokens found"):
423
+ g_eval.rating_token_to_score(token_logprob)
424
+
425
+
426
+ def test_g_eval_system_instruction():
427
+ eval = Eval(
428
+ name="Test Eval",
429
+ eval_set_filter_id="tag::tag1",
430
+ eval_configs_filter_id="tag::tag2",
431
+ output_scores=[
432
+ EvalOutputScore(name="overall_rating", type=TaskOutputRatingType.five_star),
433
+ ],
434
+ )
435
+ eval_config = EvalConfig(
436
+ parent=eval,
437
+ name="Test Eval",
438
+ model_name="gpt_4o_mini",
439
+ model_provider="openai",
440
+ config_type=EvalConfigType.g_eval,
441
+ properties={
442
+ "task_description": "Test task description",
443
+ "eval_steps": ["Step 1", "Step 2"],
444
+ },
445
+ )
446
+ g_eval_task = GEvalTask(eval_config)
447
+ assert g_eval_task.instruction == (
448
+ "Your job to evaluate a model's performance on a task. Blocks will be marked with <eval_data> tags.\n\n"
449
+ "The task the model was given is as follows:\n<eval_data>\n"
450
+ "Test task description\n"
451
+ "</eval_data>\n"
452
+ )
453
+
454
+ # Test without task description
455
+ eval_config.properties = {"eval_steps": ["Step 1", "Step 2"]}
456
+ g_eval_task = GEvalTask(eval_config)
457
+ assert (
458
+ g_eval_task.instruction
459
+ == "Your job to evaluate a model's performance on a task. Blocks will be marked with <eval_data> tags.\n"
460
+ )
461
+
462
+
463
+ def check_supports_logprobs(model_name: str, provider_name: str):
464
+ for model in built_in_models:
465
+ if model.name != model_name:
466
+ continue
467
+ for provider in model.providers:
468
+ if provider.name != provider_name:
469
+ continue
470
+ if not provider.supports_logprobs:
471
+ pytest.skip(
472
+ f"Skipping {model.name} {provider.name} because it does not support logprobs"
473
+ )
474
+ return
475
+ raise RuntimeError(f"No model {model_name} {provider_name} found")
476
+
477
+
478
+ @pytest.mark.paid
479
+ @pytest.mark.ollama
480
+ @pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
481
+ async def test_all_built_in_models_logprobs_geval(
482
+ model_name,
483
+ provider_name,
484
+ test_task,
485
+ test_eval_config,
486
+ test_task_run,
487
+ test_run_config,
488
+ ):
489
+ check_supports_logprobs(model_name, provider_name)
490
+ await run_g_eval_test(
491
+ test_task,
492
+ test_eval_config,
493
+ test_task_run,
494
+ EvalConfigType.g_eval,
495
+ test_run_config,
496
+ model_name,
497
+ provider_name.value,
498
+ )
@@ -0,0 +1,4 @@
1
+ # Saved a real RunOutput, with real logprobs via:
2
+ # po = pickle.dumps(result)
3
+ # print(f"\n\nPickled result: \n{po}\n\n")
4
+ serialized_run_output = b"\x80\x04\x95\xe8:\x00\x00\x00\x00\x00\x00\x8c\x1bkiln_ai.adapters.run_output\x94\x8c\tRunOutput\x94\x93\x94)\x81\x94}\x94(\x8c\x06output\x94}\x94(\x8c\x0ftopic_alignment\x94K\x05\x8c\x0fappropriateness\x94\x8c\x04pass\x94\x8c\x0eoverall_rating\x94K\x04u\x8c\x14intermediate_outputs\x94}\x94\x8c\x10chain_of_thought\x94X\x08\x06\x00\x001) **Is the joke funny?**\n The joke \"Why did the chicken cross the road? To get to the other side!\" is a classic joke that many consider to be humorous due to its simplicity and unexpected nature. However, as it's a very well-known punchline, some may find it less amusing for being overly familiar. Overall, it can elicit a chuckle, but it may not be considered original or particularly funny by everyone.\n\n2) **Is the content appropriate for all audiences?**\n Yes, the joke is appropriate for all audiences. It does not contain any offensive language or themes, making it suitable for children and adults alike.\n\n3) **Is the joke culturally sensitive?**\n Yes, the joke is culturally sensitive. It does not touch on any potentially sensitive topics or stereotypes. It\xe2\x80\x99s a universal humor that transcends cultural boundaries.\n\n4) **Is the joke politically correct?**\n Yes, the joke is politically correct. It does not make any political statements or discriminatory remarks. It simply presents a light-hearted situation involving a chicken, which is neutral and inoffensive.\n\n5) **Is the joke aligned with the provided topic?**\n Yes, the joke is aligned with the provided topic of a \"chicken joke.\" It directly references a chicken and is structured as a joke, fulfilling the prompt's requirements.\n\nIn summary, while the joke may lack originality, it is appropriate, sensitive, politically correct, and aligns well with the topic. The humor level can vary depending on personal taste, but overall, it meets the evaluation criteria.\x94s\x8c\x0foutput_logprobs\x94\x8c!openai.types.chat.chat_completion\x94\x8c\x0eChoiceLogprobs\x94\x93\x94)\x81\x94}\x94(\x8c\x08__dict__\x94}\x94(\x8c\x07content\x94]\x94(\x8c/openai.types.chat.chat_completion_token_logprob\x94\x8c\x1aChatCompletionTokenLogprob\x94\x93\x94)\x81\x94}\x94(h\x15}\x94(\x8c\x05token\x94\x8c\x02{\"\x94\x8c\x05bytes\x94]\x94(K{K\"e\x8c\x07logprob\x94G\xbf5\xfe.\xba\x97\xb1\xde\x8c\x0ctop_logprobs\x94]\x94(h\x19\x8c\nTopLogprob\x94\x93\x94)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{\"\x94h!]\x94(K{K\"eh#G\xbf5\xfe.\xba\x97\xb1\xdeu\x8c\x12__pydantic_extra__\x94}\x94\x8c\x17__pydantic_fields_set__\x94\x8f\x94(h\x1fh#h!\x90\x8c\x14__pydantic_private__\x94Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{\n\x94h!]\x94(K{K\neh#G\xc0 \x00,\nJ\x05\xdeuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01{\x94h!]\x94K{ah#G\xc0/\x80,\nJ\x05\xdeuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03{\r\n\x94h!]\x94(K{K\rK\neh#G\xc01@\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03{\n\n\x94h!]\x94(K{K\nK\neh#G\xc03\xc0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 {\"\x94h!]\x94(K K{K\"eh#G\xc05\x00\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 {\n\x94h!]\x94(K K{K\neh#G\xc06\xe0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\n\x94h!]\x94K\nah#G\xc07\xe0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{}\x94h!]\x94(K{K}eh#G\xc08 \x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05topic\x94h!]\x94(KtKoKpKiKceh#G\xbfS\x8a+<\x99\xb9Oh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05topic\x94h!]\x94(KtKoKpKiKceh#G\xbfS\x8a+<\x99\xb9Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xc0\x1b\x818\xa2\x07\xfd%uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04type\x94h!]\x94(KtKyKpKeeh#G\xc0!\x80\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03top\x94h!]\x94(KtKoKpeh#G\xc0-\x00\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05theme\x94h!]\x94(KtKhKeKmKeeh#G\xc0.\x00\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05total\x94h!]\x94(KtKoKtKaKleh#G\xc00\x00N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06 topic\x94h!]\x94(K KtKoKpKiKceh#G\xc00@N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05Topic\x94h!]\x94(KTKoKpKiKceh#G\xc00\xa0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0bappropriate\x94h!]\x94(KaKpKpKrKoKpKrKiKaKtKeeh#G\xc00\xa0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05title\x94h!]\x94(KtKiKtKlKeeh#G\xc00\xc0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_alignment\x94h!]\x94(K_KaKlKiKgKnKmKeKnKteh#G\xbe\xc1\x9f\x96D1\x8b\xf2h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_alignment\x94h!]\x94(K_KaKlKiKgKnKmKeKnKteh#G\xbe\xc1\x9f\x96D1\x8b\xf2uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n alignment\x94h!]\x94(K KaKlKiKgKnKmKeKnKteh#G\xc0+\x00\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06_align\x94h!]\x94(K_KaKlKiKgKneh#G\xc0.@\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_ALIGNMENT\x94h!]\x94(K_KAKLKIKGKNKMKEKNKTeh#G\xc0.\x80\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\tAlignment\x94h!]\x94(KAKlKiKgKnKmKeKnKteh#G\xc00\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0b_assignment\x94h!]\x94(K_KaKsKsKiKgKnKmKeKnKteh#G\xc01@\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n Alignment\x94h!]\x94(K KAKlKiKgKnKmKeKnKteh#G\xc01@\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03_al\x94h!]\x94(K_KaKleh#G\xc01\xa0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0b_similarity\x94h!]\x94(K_KsKiKmKiKlKaKrKiKtKyeh#G\xc01\xe0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xc02 \x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\xe2\x80\x9d:\x94h!]\x94(K\xe2K\x80K\x9dK:eh#G\xc02@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\\\":\x94h!]\x94(K\\K\"K:eh#G\xc03\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02':\x94h!]\x94(K'K:eh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\xc04\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02`:\x94h!]\x94(K`K:eh#G\xc05\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe2\x80\x9d\xef\xbc\x9a\x94h!]\x94(K\xe2K\x80K\x9dK\xefK\xbcK\x9aeh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\xc2\xbb:\x94h!]\x94(K\xc2K\xbbK:eh#G\xc07 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03+\":\x94h!]\x94(K+K\"K:eh#G\xc07@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":[\x94h!]\x94(K\"K:K[eh#G\xc07\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x015\x94h!]\x94K5ah#G\xbe\xf1\x93\xc3:x\xd77h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fjY\x01\x00\x00h!]\x94K5ah#G\xbe\xf1\x93\xc3:x\xd77uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x014\x94h!]\x94K4ah#G\xc0&\x00\x02:l\xe3Xuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01 \x94h!]\x94K ah#G\xc01\xc0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x013\x94h!]\x94K3ah#G\xc07\xc0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 \x94h!]\x94(K K eh#G\xc08\xa0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01-\x94h!]\x94K-ah#G\xc0; \x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01f\x94h!]\x94Kfah#G\xc0;0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\t\x94h!]\x94K\tah#G\xc0;0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 \x94h!]\x94(K K K eh#G\xc0;@\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\"\x94h!]\x94K\"ah#G\xc0;p\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01,\x94h!]\x94K,ah#G\xc05\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 ,\"\x94h!]\x94(K K,K\"eh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\"\\\x94h!]\x94(K,K\"K\\eh#G\xc07`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\"%\x94h!]\x94(K,K\"K%eh#G\xc07\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\",\x94h!]\x94(K,K\"K,eh#G\xc0:\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\n\x94h!]\x94(K,K\neh#G\xc0:\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\r\n\x94h!]\x94(K,K\rK\neh#G\xc0< \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x8f\x01\x00\x00h!]\x94K\tah#G\xc0=p\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01.\x94h!]\x94K.ah#G\xc0>@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07appropr\x94h!]\x94(KaKpKpKrKoKpKreh#G\xbf\x1d\x1c\xa4[(\x97\x91h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07appropr\x94h!]\x94(KaKpKpKrKoKpKreh#G\xbf\x1d\x1c\xa4[(\x97\x91uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05appro\x94h!]\x94(KaKpKpKrKoeh#G\xc0\"\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0bappropriate\x94h!]\x94(KaKpKpKrKoKpKrKiKaKtKeeh#G\xc0&\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t appropri\x94h!]\x94(K KaKpKpKrKoKpKrKieh#G\xc0*\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02in\x94h!]\x94(KiKneh#G\xc00\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05Appro\x94h!]\x94(KAKpKpKrKoeh#G\xc02\x80\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06 Appro\x94h!]\x94(K KAKpKpKrKoeh#G\xc02\xa0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xc02\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04apro\x94h!]\x94(KaKpKrKoeh#G\xc03\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\rapproximately\x94h!]\x94(KaKpKpKrKoKxKiKmKaKtKeKlKyeh#G\xc04@\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01i\x94h!]\x94Kiah#G\xbe\xaa~\xe0\xee\xab\x86\xb2h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fjA\x02\x00\x00h!]\x94Kiah#G\xbe\xaa~\xe0\xee\xab\x86\xb2uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06iation\x94h!]\x94(KiKaKtKiKoKneh#G\xc0.\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03iat\x94h!]\x94(KiKaKteh#G\xc0.\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xc00 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04iten\x94h!]\x94(KiKtKeKneh#G\xc00`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04iann\x94h!]\x94(KiKaKnKneh#G\xc01\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t appropri\x94h!]\x94(K KaKpKpKrKoKpKrKieh#G\xc01\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02ri\x94h!]\x94(KrKieh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06iately\x94h!]\x94(KiKaKtKeKlKyeh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05laten\x94h!]\x94(KlKaKtKeKneh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xbe\x89\xfcz\xe12u\x9dh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xbe\x89\xfcz\xe12u\x9duh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04aten\x94h!]\x94(KaKtKeKneh#G\xc0/@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05ensen\x94h!]\x94(KeKnKsKeKneh#G\xc05@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04ated\x94h!]\x94(KaKtKeKdeh#G\xc06 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06teness\x94h!]\x94(KtKeKnKeKsKseh#G\xc06@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04ates\x94h!]\x94(KaKtKeKseh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05eness\x94h!]\x94(KeKnKeKsKseh#G\xc06\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04onen\x94h!]\x94(KoKnKeKneh#G\xc06\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04uten\x94h!]\x94(KuKtKeKneh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06enness\x94h!]\x94(KeKnKnKeKsKseh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":\"'\x94h!]\x94(K\"K:K\"K'eh#G\xc02\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04 \":\"\x94h!]\x94(K K\"K:K\"eh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\":\"\",\"\x94h!]\x94(K\"K:K\"K\"K,K\"eh#G\xc04\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":[\"\x94h!]\x94(K\"K:K[K\"eh#G\xc05\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc05\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":\"+\x94h!]\x94(K\"K:K\"K+eh#G\xc05\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":{\"\x94h!]\x94(K\"K:K{K\"eh#G\xc06@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03':'\x94h!]\x94(K'K:K'eh#G\xc06\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\xc07\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04pass\x94h!]\x94(KpKaKsKseh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04pass\x94h!]\x94(KpKaKsKseh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05 pass\x94h!]\x94(K KpKaKsKseh#G\xc03 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04fail\x94h!]\x94(KfKaKiKleh#G\xc07\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03pas\x94h!]\x94(KpKaKseh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05.pass\x94h!]\x94(K.KpKaKsKseh#G\xc08\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04Pass\x94h!]\x94(KPKaKsKseh#G\xc09\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04PASS\x94h!]\x94(KPKAKSKSeh#G\xc09 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06passed\x94h!]\x94(KpKaKsKsKeKdeh#G\xc09\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05-pass\x94h!]\x94(K-KpKaKsKseh#G\xc09\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06passes\x94h!]\x94(KpKaKsKsKeKseh#G\xc0: \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\",\"\x94h!]\x94(K\"K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\",\"\x94h!]\x94(K\"K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04 \",\"\x94h!]\x94(K K\"K,K\"eh#G\xc02\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\xc04\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04.\",\"\x94h!]\x94(K.K\"K,K\"eh#G\xc04@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc05\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03','\x94h!]\x94(K'K,K'eh#G\xc06 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"#\x94h!]\x94(K\"K,K\"K#eh#G\xc07 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"+\x94h!]\x94(K\"K,K\"K+eh#G\xc07\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05\\\",\\\"\x94h!]\x94(K\\K\"K,K\\K\"eh#G\xc08@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"\\\x94h!]\x94(K\"K,K\"K\\eh#G\xc08\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xbe\x89\xfcz\xe12u\x9dh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xbe\x89\xfcz\xe12u\x9duh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07Overall\x94h!]\x94(KOKvKeKrKaKlKleh#G\xc00\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08 overall\x94h!]\x94(K KoKvKeKrKaKlKleh#G\xc02@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01c\x94h!]\x94Kcah#G\xc06\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08overview\x94h!]\x94(KoKvKeKrKvKiKeKweh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05total\x94h!]\x94(KtKoKtKaKleh#G\xc08@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04over\x94h!]\x94(KoKvKeKreh#G\xc08\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08 Overall\x94h!]\x94(K KOKvKeKrKaKlKleh#G\xc09 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe6\x95\xb4\xe4\xbd\x93\x94h!]\x94(K\xe6K\x95K\xb4K\xe4K\xbdK\x93eh#G\xc09`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05polit\x94h!]\x94(KpKoKlKiKteh#G\xc0:\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xbe\x94\xfe$\xc4\xceLIh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xbe\x94\xfe$\xc4\xceLIuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07 rating\x94h!]\x94(K KrKaKtKiKnKgeh#G\xc0/@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06rating\x94h!]\x94(KrKaKtKiKnKgeh#G\xc01\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07 Rating\x94h!]\x94(K KRKaKtKiKnKgeh#G\xc01\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06Rating\x94h!]\x94(KRKaKtKiKnKgeh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07-rating\x94h!]\x94(K-KrKaKtKiKnKgeh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07.rating\x94h!]\x94(K.KrKaKtKiKnKgeh#G\xc02\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05_rate\x94h!]\x94(K_KrKaKtKeeh#G\xc03\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t_rotation\x94h!]\x94(K_KrKoKtKaKtKiKoKneh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02_r\x94h!]\x94(K_Kreh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\xe2\x80\x9d:\x94h!]\x94(K\xe2K\x80K\x9dK:eh#G\xc04\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\\\":\x94h!]\x94(K\\K\"K:eh#G\xc04\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02':\x94h!]\x94(K'K:eh#G\xc05@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\xc06\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc06\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe2\x80\x9d\xef\xbc\x9a\x94h!]\x94(K\xe2K\x80K\x9dK\xefK\xbcK\x9aeh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02`:\x94h!]\x94(K`K:eh#G\xc07\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":[\x94h!]\x94(K\"K:K[eh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 \":\x94h!]\x94(K K\"K:eh#G\xc08 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1fje\x01\x00\x00h!]\x94K4ah#G\xbfdI\x15\x1e\x7f\x84\xe1h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fje\x01\x00\x00h!]\x94K4ah#G\xbfdI\x15\x1e\x7f\x84\xe1uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjs\x01\x00\x00h!]\x94K3ah#G\xc0\x18\x02\x89\x11\x8c\x19~uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjY\x01\x00\x00h!]\x94K5ah#G\xc0,\x81D\xaaS\xfc\x01uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjl\x01\x00\x00h!]\x94K ah#G\xc05\x10\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x012\x94h!]\x94K2ah#G\xc070\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x81\x01\x00\x00h!]\x94K-ah#G\xc08\xd0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\n\n\x94h!]\x94(K\nK\neh#G\xc09\x80\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fh_h!]\x94K\nah#G\xc09\xc0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 \x94h!]\x94(K K eh#G\xc09\xf0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x88\x01\x00\x00h!]\x94Kfah#G\xc0:0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01}\x94h!]\x94K}ah#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fj\xf3\x04\x00\x00h!]\x94K}ah#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 }\x94h!]\x94(K K}eh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\xc05`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02}\n\x94h!]\x94(K}K\neh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03}\n\n\x94h!]\x94(K}K\nK\neh#G\xc08\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\xea\x01\x00\x00h!]\x94K.ah#G\xc0:\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03}\r\n\x94h!]\x94(K}K\rK\neh#G\xc0; \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05}\r\n\r\n\x94h!]\x94(K}K\rK\nK\rK\neh#G\xc0=\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04}\n\n\n\x94h!]\x94(K}K\nK\nK\neh#G\xc0=\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07}\n\n\n\n\n\n\x94h!]\x94(K}K\nK\nK\nK\nK\nK\neh#G\xc0>\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nube\x8c\x07refusal\x94Nuh-}\x94h/\x8f\x94(h\x17j<\x05\x00\x00\x90h1Nubub."
@@ -4,7 +4,12 @@ from typing import Literal
4
4
  from pydantic import BaseModel
5
5
 
6
6
  from kiln_ai.adapters.ml_model_list import built_in_models
7
- from kiln_ai.datamodel import DatasetSplit, FinetuneDataStrategy, FineTuneStatusType
7
+ from kiln_ai.datamodel import (
8
+ DatasetSplit,
9
+ FinetuneDataStrategy,
10
+ FineTuneStatusType,
11
+ Task,
12
+ )
8
13
  from kiln_ai.datamodel import Finetune as FinetuneModel
9
14
  from kiln_ai.utils.name_generator import generate_memorable_name
10
15
 
@@ -101,7 +106,7 @@ class BaseFinetuneAdapter(ABC):
101
106
  train_split_name=train_split_name,
102
107
  validation_split_name=validation_split_name,
103
108
  parameters=parameters,
104
- system_message=system_message,
109
+ system_message=cls.augment_system_message(system_message, parent_task),
105
110
  thinking_instructions=thinking_instructions,
106
111
  parent=parent_task,
107
112
  data_strategy=data_strategy,
@@ -114,6 +119,15 @@ class BaseFinetuneAdapter(ABC):
114
119
 
115
120
  return adapter, datamodel
116
121
 
122
+ @classmethod
123
+ def augment_system_message(cls, system_message: str, task: Task) -> str:
124
+ """
125
+ Augment the system message with additional instructions, such as JSON instructions.
126
+ """
127
+
128
+ # Base implementation does nothing, can be overridden by subclasses
129
+ return system_message
130
+
117
131
  @abstractmethod
118
132
  async def _start(self, dataset: DatasetSplit) -> None:
119
133
  """
@@ -3,9 +3,11 @@ from typing import Type
3
3
  from kiln_ai.adapters.fine_tune.base_finetune import BaseFinetuneAdapter
4
4
  from kiln_ai.adapters.fine_tune.fireworks_finetune import FireworksFinetune
5
5
  from kiln_ai.adapters.fine_tune.openai_finetune import OpenAIFinetune
6
+ from kiln_ai.adapters.fine_tune.together_finetune import TogetherFinetune
6
7
  from kiln_ai.adapters.ml_model_list import ModelProviderName
7
8
 
8
9
  finetune_registry: dict[ModelProviderName, Type[BaseFinetuneAdapter]] = {
9
10
  ModelProviderName.openai: OpenAIFinetune,
10
11
  ModelProviderName.fireworks_ai: FireworksFinetune,
12
+ ModelProviderName.together_ai: TogetherFinetune,
11
13
  }
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import logging
2
3
  import tempfile
3
4
  from pathlib import Path
4
5
  from unittest.mock import Mock
@@ -27,6 +28,8 @@ from kiln_ai.datamodel import (
27
28
  TaskRun,
28
29
  )
29
30
 
31
+ logger = logging.getLogger(__name__)
32
+
30
33
 
31
34
  @pytest.fixture
32
35
  def mock_task():
@@ -474,7 +477,7 @@ def test_generate_vertex_template_thinking():
474
477
 
475
478
  result = generate_vertex_gemini_1_5(training_data)
476
479
 
477
- print(result)
480
+ logger.info(result)
478
481
 
479
482
  assert result == {
480
483
  "systemInstruction": {
@@ -16,9 +16,9 @@ from kiln_ai.datamodel import (
16
16
  FinetuneDataStrategy,
17
17
  StructuredOutputMode,
18
18
  Task,
19
- Train80Test20SplitDefinition,
20
19
  )
21
20
  from kiln_ai.datamodel import Finetune as FinetuneModel
21
+ from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
22
22
  from kiln_ai.utils.config import Config
23
23
 
24
24
 
@@ -15,9 +15,9 @@ from kiln_ai.datamodel import (
15
15
  FinetuneDataStrategy,
16
16
  StructuredOutputMode,
17
17
  Task,
18
- Train80Test20SplitDefinition,
19
18
  )
20
19
  from kiln_ai.datamodel import Finetune as FinetuneModel
20
+ from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
21
21
  from kiln_ai.utils.config import Config
22
22
 
23
23