kiln-ai 0.17.0__py3-none-any.whl → 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of kiln-ai might be problematic. Click here for more details.
- kiln_ai/adapters/adapter_registry.py +28 -0
- kiln_ai/adapters/chat/chat_formatter.py +0 -1
- kiln_ai/adapters/data_gen/data_gen_prompts.py +121 -36
- kiln_ai/adapters/data_gen/data_gen_task.py +51 -38
- kiln_ai/adapters/data_gen/test_data_gen_task.py +318 -37
- kiln_ai/adapters/eval/base_eval.py +6 -7
- kiln_ai/adapters/eval/eval_runner.py +5 -1
- kiln_ai/adapters/eval/g_eval.py +17 -12
- kiln_ai/adapters/eval/test_base_eval.py +8 -2
- kiln_ai/adapters/eval/test_eval_runner.py +6 -12
- kiln_ai/adapters/eval/test_g_eval.py +115 -5
- kiln_ai/adapters/eval/test_g_eval_data.py +1 -1
- kiln_ai/adapters/fine_tune/base_finetune.py +2 -6
- kiln_ai/adapters/fine_tune/dataset_formatter.py +1 -5
- kiln_ai/adapters/fine_tune/fireworks_finetune.py +32 -20
- kiln_ai/adapters/fine_tune/test_dataset_formatter.py +1 -1
- kiln_ai/adapters/fine_tune/test_fireworks_tinetune.py +30 -21
- kiln_ai/adapters/fine_tune/test_vertex_finetune.py +2 -7
- kiln_ai/adapters/fine_tune/together_finetune.py +1 -1
- kiln_ai/adapters/ml_model_list.py +926 -125
- kiln_ai/adapters/model_adapters/base_adapter.py +11 -7
- kiln_ai/adapters/model_adapters/litellm_adapter.py +23 -1
- kiln_ai/adapters/model_adapters/test_base_adapter.py +1 -2
- kiln_ai/adapters/model_adapters/test_litellm_adapter.py +70 -3
- kiln_ai/adapters/model_adapters/test_structured_output.py +13 -13
- kiln_ai/adapters/parsers/parser_registry.py +0 -2
- kiln_ai/adapters/parsers/r1_parser.py +0 -1
- kiln_ai/adapters/parsers/test_r1_parser.py +1 -1
- kiln_ai/adapters/provider_tools.py +20 -19
- kiln_ai/adapters/remote_config.py +113 -0
- kiln_ai/adapters/repair/repair_task.py +2 -7
- kiln_ai/adapters/test_adapter_registry.py +30 -2
- kiln_ai/adapters/test_ml_model_list.py +30 -0
- kiln_ai/adapters/test_prompt_adaptors.py +0 -4
- kiln_ai/adapters/test_provider_tools.py +18 -12
- kiln_ai/adapters/test_remote_config.py +456 -0
- kiln_ai/datamodel/basemodel.py +54 -28
- kiln_ai/datamodel/datamodel_enums.py +2 -0
- kiln_ai/datamodel/dataset_split.py +5 -3
- kiln_ai/datamodel/eval.py +35 -3
- kiln_ai/datamodel/finetune.py +2 -3
- kiln_ai/datamodel/project.py +3 -3
- kiln_ai/datamodel/prompt.py +2 -2
- kiln_ai/datamodel/prompt_id.py +4 -4
- kiln_ai/datamodel/task.py +6 -6
- kiln_ai/datamodel/task_output.py +1 -3
- kiln_ai/datamodel/task_run.py +0 -2
- kiln_ai/datamodel/test_basemodel.py +210 -18
- kiln_ai/datamodel/test_eval_model.py +152 -10
- kiln_ai/datamodel/test_model_perf.py +1 -1
- kiln_ai/datamodel/test_prompt_id.py +5 -1
- kiln_ai/datamodel/test_task.py +5 -0
- kiln_ai/utils/config.py +10 -0
- kiln_ai/utils/logging.py +4 -3
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/METADATA +33 -3
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/RECORD +58 -56
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/WHEEL +0 -0
- {kiln_ai-0.17.0.dist-info → kiln_ai-0.19.0.dist-info}/licenses/LICENSE.txt +0 -0
|
@@ -274,6 +274,36 @@ def test_token_case():
|
|
|
274
274
|
assert token.lower() == token
|
|
275
275
|
|
|
276
276
|
|
|
277
|
+
def test_generate_run_description(test_eval_config, test_run_config, test_task_run):
|
|
278
|
+
"""Test that generate_run_description correctly uses task_run.output.output (the string) rather than task_run.output (the object)."""
|
|
279
|
+
# Create G-Eval instance
|
|
280
|
+
g_eval = GEval(test_eval_config, test_run_config)
|
|
281
|
+
|
|
282
|
+
# Call generate_run_description
|
|
283
|
+
description = g_eval.generate_run_description(
|
|
284
|
+
test_task_run.input, test_task_run.output.output
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Verify that the actual string output is in the description
|
|
288
|
+
expected_output = "Why did the chicken cross the road? To get to the other side!"
|
|
289
|
+
assert expected_output in description
|
|
290
|
+
|
|
291
|
+
# Verify that the input is also in the description
|
|
292
|
+
assert "Tell me a chicken joke" in description
|
|
293
|
+
|
|
294
|
+
# Verify the description has the expected structure
|
|
295
|
+
assert "<eval_data>" in description
|
|
296
|
+
assert description.count("<eval_data>") == 2 # 2 opening tags
|
|
297
|
+
assert description.count("</eval_data>") == 2 # 2 closing tags
|
|
298
|
+
assert "The model was given the following input for the task:" in description
|
|
299
|
+
assert "The model produced the following output for the task:" in description
|
|
300
|
+
|
|
301
|
+
# Verify that we're getting the actual string value, not a Python object representation
|
|
302
|
+
# The string should not contain 'TaskOutput' or other object indicators
|
|
303
|
+
assert "TaskOutput" not in description
|
|
304
|
+
assert "output=" not in description # Would appear if object __repr__ was used
|
|
305
|
+
|
|
306
|
+
|
|
277
307
|
def test_metric_offsets_and_search_ranges(
|
|
278
308
|
test_eval_config, test_run_config, test_task_run
|
|
279
309
|
):
|
|
@@ -401,7 +431,7 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
|
|
|
401
431
|
|
|
402
432
|
# Test single token case
|
|
403
433
|
token_logprob = MockTokenLogprob("5", [("5", 0.0)], logprob=1e-8) # log(1) = 0
|
|
404
|
-
score = g_eval.rating_token_to_score(token_logprob)
|
|
434
|
+
score = g_eval.rating_token_to_score(token_logprob) # type: ignore
|
|
405
435
|
assert score == 5.0
|
|
406
436
|
|
|
407
437
|
# Test weighted average case
|
|
@@ -413,20 +443,62 @@ def test_rating_token_to_score(test_eval_config, test_run_config):
|
|
|
413
443
|
],
|
|
414
444
|
logprob=math.log(0.6),
|
|
415
445
|
)
|
|
416
|
-
score = g_eval.rating_token_to_score(token_logprob)
|
|
446
|
+
score = g_eval.rating_token_to_score(token_logprob) # type: ignore
|
|
417
447
|
assert pytest.approx(score) == 4.4 # (4 * 0.6 + 5 * 0.4)
|
|
418
448
|
|
|
419
449
|
# Test invalid token
|
|
420
450
|
token_logprob = MockTokenLogprob(":", [(":", 0.0)], logprob=1e-8)
|
|
421
|
-
assert g_eval.rating_token_to_score(token_logprob) is None
|
|
451
|
+
assert g_eval.rating_token_to_score(token_logprob) is None # type: ignore
|
|
422
452
|
|
|
423
453
|
# Test missing from top logprobs
|
|
424
454
|
token_logprob = MockTokenLogprob("5", [], logprob=1e-8)
|
|
425
|
-
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
|
|
455
|
+
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0 # type: ignore
|
|
426
456
|
|
|
427
457
|
# Test missing from top logprobs, with special case logprob
|
|
428
458
|
token_logprob = MockTokenLogprob("5", [], logprob=-9999)
|
|
429
|
-
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0
|
|
459
|
+
assert pytest.approx(g_eval.rating_token_to_score(token_logprob)) == 5.0 # type: ignore
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
def test_rating_token_to_score_zero_score_bug_fix(test_eval_config, test_run_config):
|
|
463
|
+
"""Test that rating_token_to_score correctly handles 0.0 scores (like 'fail') and doesn't return None.
|
|
464
|
+
|
|
465
|
+
This test verifies the fix for the bug where 'if not primary_token_score:' would incorrectly
|
|
466
|
+
treat 0.0 as falsy and return None, when it should only return None for actual None values.
|
|
467
|
+
"""
|
|
468
|
+
g_eval = GEval(test_eval_config, test_run_config)
|
|
469
|
+
|
|
470
|
+
class MockTopLogprob:
|
|
471
|
+
def __init__(self, token, logprob):
|
|
472
|
+
self.token = token
|
|
473
|
+
self.logprob = logprob
|
|
474
|
+
|
|
475
|
+
class MockTokenLogprob:
|
|
476
|
+
def __init__(self, token, top_logprobs, logprob):
|
|
477
|
+
self.token = token
|
|
478
|
+
self.top_logprobs = [MockTopLogprob(t, lp) for t, lp in top_logprobs]
|
|
479
|
+
self.logprob = logprob
|
|
480
|
+
|
|
481
|
+
# Test that "fail" token (which maps to 0.0) is handled correctly
|
|
482
|
+
token_logprob = MockTokenLogprob("fail", [("fail", 0.0)], logprob=1e-8)
|
|
483
|
+
score = g_eval.rating_token_to_score(token_logprob) # type: ignore
|
|
484
|
+
assert score == 0.0, f"Expected 0.0 for 'fail' token, got {score}"
|
|
485
|
+
|
|
486
|
+
# Test that "0" token (which maps to None) still returns None
|
|
487
|
+
token_logprob = MockTokenLogprob("0", [("0", 0.0)], logprob=1e-8)
|
|
488
|
+
score = g_eval.rating_token_to_score(token_logprob) # type: ignore
|
|
489
|
+
assert score is None, f"Expected None for '0' token, got {score}"
|
|
490
|
+
|
|
491
|
+
# Test weighted average case with fail token
|
|
492
|
+
token_logprob = MockTokenLogprob(
|
|
493
|
+
"fail",
|
|
494
|
+
[
|
|
495
|
+
("fail", math.log(0.7)), # 70% probability for fail (0.0)
|
|
496
|
+
("pass", math.log(0.3)), # 30% probability for pass (1.0)
|
|
497
|
+
],
|
|
498
|
+
logprob=math.log(0.7),
|
|
499
|
+
)
|
|
500
|
+
score = g_eval.rating_token_to_score(token_logprob) # type: ignore
|
|
501
|
+
assert pytest.approx(score) == 0.3 # (0.0 * 0.7 + 1.0 * 0.3)
|
|
430
502
|
|
|
431
503
|
|
|
432
504
|
def test_g_eval_system_instruction():
|
|
@@ -502,3 +574,41 @@ async def test_all_built_in_models_logprobs_geval(
|
|
|
502
574
|
model_name,
|
|
503
575
|
provider_name.value,
|
|
504
576
|
)
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
def check_supports_llm_as_judge(model_name: str, provider_name: str):
|
|
580
|
+
for model in built_in_models:
|
|
581
|
+
if model.name != model_name:
|
|
582
|
+
continue
|
|
583
|
+
for provider in model.providers:
|
|
584
|
+
if provider.name != provider_name:
|
|
585
|
+
continue
|
|
586
|
+
if not provider.supports_structured_output:
|
|
587
|
+
pytest.skip(
|
|
588
|
+
f"Skipping {model.name} {provider.name} because it does not support llm_as_judge (structured_output_mode)"
|
|
589
|
+
)
|
|
590
|
+
return
|
|
591
|
+
raise RuntimeError(f"No model {model_name} {provider_name} found")
|
|
592
|
+
|
|
593
|
+
|
|
594
|
+
@pytest.mark.paid
|
|
595
|
+
@pytest.mark.ollama
|
|
596
|
+
@pytest.mark.parametrize("model_name,provider_name", get_all_models_and_providers())
|
|
597
|
+
async def test_all_built_in_models_llm_as_judge(
|
|
598
|
+
model_name,
|
|
599
|
+
provider_name,
|
|
600
|
+
test_task,
|
|
601
|
+
test_eval_config,
|
|
602
|
+
test_task_run,
|
|
603
|
+
test_run_config,
|
|
604
|
+
):
|
|
605
|
+
check_supports_llm_as_judge(model_name, provider_name)
|
|
606
|
+
await run_g_eval_test(
|
|
607
|
+
test_task,
|
|
608
|
+
test_eval_config,
|
|
609
|
+
test_task_run,
|
|
610
|
+
EvalConfigType.llm_as_judge,
|
|
611
|
+
test_run_config,
|
|
612
|
+
model_name,
|
|
613
|
+
provider_name.value,
|
|
614
|
+
)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
# Saved a real RunOutput, with real logprobs via:
|
|
2
2
|
# po = pickle.dumps(result)
|
|
3
|
-
#
|
|
3
|
+
# Example inspection of the pickled result can be done with a print statement.
|
|
4
4
|
serialized_run_output = b"\x80\x04\x95\xe8:\x00\x00\x00\x00\x00\x00\x8c\x1bkiln_ai.adapters.run_output\x94\x8c\tRunOutput\x94\x93\x94)\x81\x94}\x94(\x8c\x06output\x94}\x94(\x8c\x0ftopic_alignment\x94K\x05\x8c\x0fappropriateness\x94\x8c\x04pass\x94\x8c\x0eoverall_rating\x94K\x04u\x8c\x14intermediate_outputs\x94}\x94\x8c\x10chain_of_thought\x94X\x08\x06\x00\x001) **Is the joke funny?**\n The joke \"Why did the chicken cross the road? To get to the other side!\" is a classic joke that many consider to be humorous due to its simplicity and unexpected nature. However, as it's a very well-known punchline, some may find it less amusing for being overly familiar. Overall, it can elicit a chuckle, but it may not be considered original or particularly funny by everyone.\n\n2) **Is the content appropriate for all audiences?**\n Yes, the joke is appropriate for all audiences. It does not contain any offensive language or themes, making it suitable for children and adults alike.\n\n3) **Is the joke culturally sensitive?**\n Yes, the joke is culturally sensitive. It does not touch on any potentially sensitive topics or stereotypes. It\xe2\x80\x99s a universal humor that transcends cultural boundaries.\n\n4) **Is the joke politically correct?**\n Yes, the joke is politically correct. It does not make any political statements or discriminatory remarks. It simply presents a light-hearted situation involving a chicken, which is neutral and inoffensive.\n\n5) **Is the joke aligned with the provided topic?**\n Yes, the joke is aligned with the provided topic of a \"chicken joke.\" It directly references a chicken and is structured as a joke, fulfilling the prompt's requirements.\n\nIn summary, while the joke may lack originality, it is appropriate, sensitive, politically correct, and aligns well with the topic. The humor level can vary depending on personal taste, but overall, it meets the evaluation criteria.\x94s\x8c\x0foutput_logprobs\x94\x8c!openai.types.chat.chat_completion\x94\x8c\x0eChoiceLogprobs\x94\x93\x94)\x81\x94}\x94(\x8c\x08__dict__\x94}\x94(\x8c\x07content\x94]\x94(\x8c/openai.types.chat.chat_completion_token_logprob\x94\x8c\x1aChatCompletionTokenLogprob\x94\x93\x94)\x81\x94}\x94(h\x15}\x94(\x8c\x05token\x94\x8c\x02{\"\x94\x8c\x05bytes\x94]\x94(K{K\"e\x8c\x07logprob\x94G\xbf5\xfe.\xba\x97\xb1\xde\x8c\x0ctop_logprobs\x94]\x94(h\x19\x8c\nTopLogprob\x94\x93\x94)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{\"\x94h!]\x94(K{K\"eh#G\xbf5\xfe.\xba\x97\xb1\xdeu\x8c\x12__pydantic_extra__\x94}\x94\x8c\x17__pydantic_fields_set__\x94\x8f\x94(h\x1fh#h!\x90\x8c\x14__pydantic_private__\x94Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{\n\x94h!]\x94(K{K\neh#G\xc0 \x00,\nJ\x05\xdeuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01{\x94h!]\x94K{ah#G\xc0/\x80,\nJ\x05\xdeuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03{\r\n\x94h!]\x94(K{K\rK\neh#G\xc01@\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03{\n\n\x94h!]\x94(K{K\nK\neh#G\xc03\xc0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 {\"\x94h!]\x94(K K{K\"eh#G\xc05\x00\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 {\n\x94h!]\x94(K K{K\neh#G\xc06\xe0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\n\x94h!]\x94K\nah#G\xc07\xe0\x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02{}\x94h!]\x94(K{K}eh#G\xc08 \x16\x05%\x02\xefuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05topic\x94h!]\x94(KtKoKpKiKceh#G\xbfS\x8a+<\x99\xb9Oh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05topic\x94h!]\x94(KtKoKpKiKceh#G\xbfS\x8a+<\x99\xb9Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xc0\x1b\x818\xa2\x07\xfd%uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04type\x94h!]\x94(KtKyKpKeeh#G\xc0!\x80\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03top\x94h!]\x94(KtKoKpeh#G\xc0-\x00\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05theme\x94h!]\x94(KtKhKeKmKeeh#G\xc0.\x00\x9c^o\xf7\xe0uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05total\x94h!]\x94(KtKoKtKaKleh#G\xc00\x00N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06 topic\x94h!]\x94(K KtKoKpKiKceh#G\xc00@N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05Topic\x94h!]\x94(KTKoKpKiKceh#G\xc00\xa0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0bappropriate\x94h!]\x94(KaKpKpKrKoKpKrKiKaKtKeeh#G\xc00\xa0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05title\x94h!]\x94(KtKiKtKlKeeh#G\xc00\xc0N\x1eq\x04Ouh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_alignment\x94h!]\x94(K_KaKlKiKgKnKmKeKnKteh#G\xbe\xc1\x9f\x96D1\x8b\xf2h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_alignment\x94h!]\x94(K_KaKlKiKgKnKmKeKnKteh#G\xbe\xc1\x9f\x96D1\x8b\xf2uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n alignment\x94h!]\x94(K KaKlKiKgKnKmKeKnKteh#G\xc0+\x00\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06_align\x94h!]\x94(K_KaKlKiKgKneh#G\xc0.@\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n_ALIGNMENT\x94h!]\x94(K_KAKLKIKGKNKMKEKNKTeh#G\xc0.\x80\x00C\x1b\xde\x83uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\tAlignment\x94h!]\x94(KAKlKiKgKnKmKeKnKteh#G\xc00\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0b_assignment\x94h!]\x94(K_KaKsKsKiKgKnKmKeKnKteh#G\xc01@\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\n Alignment\x94h!]\x94(K KAKlKiKgKnKmKeKnKteh#G\xc01@\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03_al\x94h!]\x94(K_KaKleh#G\xc01\xa0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0b_similarity\x94h!]\x94(K_KsKiKmKiKlKaKrKiKtKyeh#G\xc01\xe0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xc02 \x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\xe2\x80\x9d:\x94h!]\x94(K\xe2K\x80K\x9dK:eh#G\xc02@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\\\":\x94h!]\x94(K\\K\"K:eh#G\xc03\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02':\x94h!]\x94(K'K:eh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\xc04\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02`:\x94h!]\x94(K`K:eh#G\xc05\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe2\x80\x9d\xef\xbc\x9a\x94h!]\x94(K\xe2K\x80K\x9dK\xefK\xbcK\x9aeh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\xc2\xbb:\x94h!]\x94(K\xc2K\xbbK:eh#G\xc07 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03+\":\x94h!]\x94(K+K\"K:eh#G\xc07@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":[\x94h!]\x94(K\"K:K[eh#G\xc07\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x015\x94h!]\x94K5ah#G\xbe\xf1\x93\xc3:x\xd77h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fjY\x01\x00\x00h!]\x94K5ah#G\xbe\xf1\x93\xc3:x\xd77uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x014\x94h!]\x94K4ah#G\xc0&\x00\x02:l\xe3Xuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01 \x94h!]\x94K ah#G\xc01\xc0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x013\x94h!]\x94K3ah#G\xc07\xc0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 \x94h!]\x94(K K eh#G\xc08\xa0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01-\x94h!]\x94K-ah#G\xc0; \x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01f\x94h!]\x94Kfah#G\xc0;0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\t\x94h!]\x94K\tah#G\xc0;0\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 \x94h!]\x94(K K K eh#G\xc0;@\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01\"\x94h!]\x94K\"ah#G\xc0;p\x01\x1d6q\xacuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01,\x94h!]\x94K,ah#G\xc05\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 ,\"\x94h!]\x94(K K,K\"eh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\"\\\x94h!]\x94(K,K\"K\\eh#G\xc07`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\"%\x94h!]\x94(K,K\"K%eh#G\xc07\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\",\x94h!]\x94(K,K\"K,eh#G\xc0:\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\n\x94h!]\x94(K,K\neh#G\xc0:\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03,\r\n\x94h!]\x94(K,K\rK\neh#G\xc0< \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x8f\x01\x00\x00h!]\x94K\tah#G\xc0=p\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01.\x94h!]\x94K.ah#G\xc0>@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07appropr\x94h!]\x94(KaKpKpKrKoKpKreh#G\xbf\x1d\x1c\xa4[(\x97\x91h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07appropr\x94h!]\x94(KaKpKpKrKoKpKreh#G\xbf\x1d\x1c\xa4[(\x97\x91uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05appro\x94h!]\x94(KaKpKpKrKoeh#G\xc0\"\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x0bappropriate\x94h!]\x94(KaKpKpKrKoKpKrKiKaKtKeeh#G\xc0&\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t appropri\x94h!]\x94(K KaKpKpKrKoKpKrKieh#G\xc0*\x80\x0e\x8c\x8a\xbd^uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02in\x94h!]\x94(KiKneh#G\xc00\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05Appro\x94h!]\x94(KAKpKpKrKoeh#G\xc02\x80\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06 Appro\x94h!]\x94(K KAKpKpKrKoeh#G\xc02\xa0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xc02\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04apro\x94h!]\x94(KaKpKrKoeh#G\xc03\xe0\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\rapproximately\x94h!]\x94(KaKpKpKrKoKxKiKmKaKtKeKlKyeh#G\xc04@\x075~g\x0euh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01i\x94h!]\x94Kiah#G\xbe\xaa~\xe0\xee\xab\x86\xb2h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fjA\x02\x00\x00h!]\x94Kiah#G\xbe\xaa~\xe0\xee\xab\x86\xb2uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06iation\x94h!]\x94(KiKaKtKiKoKneh#G\xc0.\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03iat\x94h!]\x94(KiKaKteh#G\xc0.\xc0\x00!\x8d\xefAuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xc00 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04iten\x94h!]\x94(KiKtKeKneh#G\xc00`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04iann\x94h!]\x94(KiKaKnKneh#G\xc01\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t appropri\x94h!]\x94(K KaKpKpKrKoKpKrKieh#G\xc01\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02ri\x94h!]\x94(KrKieh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06iately\x94h!]\x94(KiKaKtKeKlKyeh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05laten\x94h!]\x94(KlKaKtKeKneh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xbe\x89\xfcz\xe12u\x9dh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07ateness\x94h!]\x94(KaKtKeKnKeKsKseh#G\xbe\x89\xfcz\xe12u\x9duh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04aten\x94h!]\x94(KaKtKeKneh#G\xc0/@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05ensen\x94h!]\x94(KeKnKsKeKneh#G\xc05@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04ated\x94h!]\x94(KaKtKeKdeh#G\xc06 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06teness\x94h!]\x94(KtKeKnKeKsKseh#G\xc06@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04ates\x94h!]\x94(KaKtKeKseh#G\xc06`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05eness\x94h!]\x94(KeKnKeKsKseh#G\xc06\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04onen\x94h!]\x94(KoKnKeKneh#G\xc06\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04uten\x94h!]\x94(KuKtKeKneh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06enness\x94h!]\x94(KeKnKnKeKsKseh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":\"'\x94h!]\x94(K\"K:K\"K'eh#G\xc02\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04 \":\"\x94h!]\x94(K K\"K:K\"eh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\":\"\",\"\x94h!]\x94(K\"K:K\"K\"K,K\"eh#G\xc04\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":[\"\x94h!]\x94(K\"K:K[K\"eh#G\xc05\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc05\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":\"+\x94h!]\x94(K\"K:K\"K+eh#G\xc05\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\":{\"\x94h!]\x94(K\"K:K{K\"eh#G\xc06@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03':'\x94h!]\x94(K'K:K'eh#G\xc06\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\xc07\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04pass\x94h!]\x94(KpKaKsKseh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04pass\x94h!]\x94(KpKaKsKseh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05 pass\x94h!]\x94(K KpKaKsKseh#G\xc03 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04fail\x94h!]\x94(KfKaKiKleh#G\xc07\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03pas\x94h!]\x94(KpKaKseh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05.pass\x94h!]\x94(K.KpKaKsKseh#G\xc08\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04Pass\x94h!]\x94(KPKaKsKseh#G\xc09\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04PASS\x94h!]\x94(KPKAKSKSeh#G\xc09 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06passed\x94h!]\x94(KpKaKsKsKeKdeh#G\xc09\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05-pass\x94h!]\x94(K-KpKaKsKseh#G\xc09\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06passes\x94h!]\x94(KpKaKsKsKeKseh#G\xc0: \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\",\"\x94h!]\x94(K\"K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\",\"\x94h!]\x94(K\"K,K\"eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04 \",\"\x94h!]\x94(K K\"K,K\"eh#G\xc02\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\xc04\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04.\",\"\x94h!]\x94(K.K\"K,K\"eh#G\xc04@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc05\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03','\x94h!]\x94(K'K,K'eh#G\xc06 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"#\x94h!]\x94(K\"K,K\"K#eh#G\xc07 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"+\x94h!]\x94(K\"K,K\"K+eh#G\xc07\xf0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05\\\",\\\"\x94h!]\x94(K\\K\"K,K\\K\"eh#G\xc08@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\",\"\\\x94h!]\x94(K\"K,K\"K\\eh#G\xc08\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xbe\x89\xfcz\xe12u\x9dh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07overall\x94h!]\x94(KoKvKeKrKaKlKleh#G\xbe\x89\xfcz\xe12u\x9duh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07Overall\x94h!]\x94(KOKvKeKrKaKlKleh#G\xc00\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08 overall\x94h!]\x94(K KoKvKeKrKaKlKleh#G\xc02@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01c\x94h!]\x94Kcah#G\xc06\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08overview\x94h!]\x94(KoKvKeKrKvKiKeKweh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05total\x94h!]\x94(KtKoKtKaKleh#G\xc08@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04over\x94h!]\x94(KoKvKeKreh#G\xc08\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x08 Overall\x94h!]\x94(K KOKvKeKrKaKlKleh#G\xc09 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe6\x95\xb4\xe4\xbd\x93\x94h!]\x94(K\xe6K\x95K\xb4K\xe4K\xbdK\x93eh#G\xc09`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05polit\x94h!]\x94(KpKoKlKiKteh#G\xc0:\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xbe\x94\xfe$\xc4\xceLIh$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07_rating\x94h!]\x94(K_KrKaKtKiKnKgeh#G\xbe\x94\xfe$\xc4\xceLIuh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07 rating\x94h!]\x94(K KrKaKtKiKnKgeh#G\xc0/@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06rating\x94h!]\x94(KrKaKtKiKnKgeh#G\xc01\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07 Rating\x94h!]\x94(K KRKaKtKiKnKgeh#G\xc01\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06Rating\x94h!]\x94(KRKaKtKiKnKgeh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07-rating\x94h!]\x94(K-KrKaKtKiKnKgeh#G\xc01\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07.rating\x94h!]\x94(K.KrKaKtKiKnKgeh#G\xc02\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05_rate\x94h!]\x94(K_KrKaKtKeeh#G\xc03\x80\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\t_rotation\x94h!]\x94(K_KrKoKtKaKtKiKoKneh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02_r\x94h!]\x94(K_Kreh#G\xc04 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\":\x94h!]\x94(K\"K:eh#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04\xe2\x80\x9d:\x94h!]\x94(K\xe2K\x80K\x9dK:eh#G\xc04\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\\\":\x94h!]\x94(K\\K\"K:eh#G\xc04\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02':\x94h!]\x94(K'K:eh#G\xc05@\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":\"\x94h!]\x94(K\"K:K\"eh#G\xc06\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07<|end|>\x94h!Nh#G\xc06\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x06\xe2\x80\x9d\xef\xbc\x9a\x94h!]\x94(K\xe2K\x80K\x9dK\xefK\xbcK\x9aeh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02`:\x94h!]\x94(K`K:eh#G\xc07\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03\":[\x94h!]\x94(K\"K:K[eh#G\xc08\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03 \":\x94h!]\x94(K K\"K:eh#G\xc08 \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1fje\x01\x00\x00h!]\x94K4ah#G\xbfdI\x15\x1e\x7f\x84\xe1h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fje\x01\x00\x00h!]\x94K4ah#G\xbfdI\x15\x1e\x7f\x84\xe1uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjs\x01\x00\x00h!]\x94K3ah#G\xc0\x18\x02\x89\x11\x8c\x19~uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjY\x01\x00\x00h!]\x94K5ah#G\xc0,\x81D\xaaS\xfc\x01uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fjl\x01\x00\x00h!]\x94K ah#G\xc05\x10\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x012\x94h!]\x94K2ah#G\xc070\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x81\x01\x00\x00h!]\x94K-ah#G\xc08\xd0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02\n\n\x94h!]\x94(K\nK\neh#G\xc09\x80\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fh_h!]\x94K\nah#G\xc09\xc0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 \x94h!]\x94(K K eh#G\xc09\xf0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\x88\x01\x00\x00h!]\x94Kfah#G\xc0:0\xa2Dc\x06`uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nubh\x1b)\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x01}\x94h!]\x94K}ah#G\x00\x00\x00\x00\x00\x00\x00\x00h$]\x94(h')\x81\x94}\x94(h\x15}\x94(h\x1fj\xf3\x04\x00\x00h!]\x94K}ah#G\x00\x00\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02 }\x94h!]\x94(K K}eh#G\xc01\xe0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02,\"\x94h!]\x94(K,K\"eh#G\xc05`\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x02}\n\x94h!]\x94(K}K\neh#G\xc07\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03}\n\n\x94h!]\x94(K}K\nK\neh#G\xc08\xc0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1fj\xea\x01\x00\x00h!]\x94K.ah#G\xc0:\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x03}\r\n\x94h!]\x94(K}K\rK\neh#G\xc0; \x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x05}\r\n\r\n\x94h!]\x94(K}K\rK\nK\rK\neh#G\xc0=\x90\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x04}\n\n\n\x94h!]\x94(K}K\nK\nK\neh#G\xc0=\xa0\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubh')\x81\x94}\x94(h\x15}\x94(h\x1f\x8c\x07}\n\n\n\n\n\n\x94h!]\x94(K}K\nK\nK\nK\nK\nK\neh#G\xc0>\x00\x00\x00\x00\x00\x00uh-}\x94h/\x8f\x94(h\x1fh#h!\x90h1Nubeuh-}\x94h/\x8f\x94(h\x1fh#h!h$\x90h1Nube\x8c\x07refusal\x94Nuh-}\x94h/\x8f\x94(h\x17j<\x05\x00\x00\x90h1Nubub."
|
|
@@ -3,12 +3,7 @@ from typing import Literal
|
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
|
-
from kiln_ai.
|
|
7
|
-
from kiln_ai.datamodel import (
|
|
8
|
-
DatasetSplit,
|
|
9
|
-
FineTuneStatusType,
|
|
10
|
-
Task,
|
|
11
|
-
)
|
|
6
|
+
from kiln_ai.datamodel import DatasetSplit, FineTuneStatusType, Task
|
|
12
7
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
13
8
|
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
14
9
|
from kiln_ai.utils.name_generator import generate_memorable_name
|
|
@@ -21,6 +16,7 @@ class FineTuneStatus(BaseModel):
|
|
|
21
16
|
|
|
22
17
|
status: FineTuneStatusType
|
|
23
18
|
message: str | None = None
|
|
19
|
+
error_details: str | None = None
|
|
24
20
|
|
|
25
21
|
|
|
26
22
|
class FineTuneParameter(BaseModel):
|
|
@@ -1,15 +1,11 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import tempfile
|
|
3
|
-
from dataclasses import dataclass
|
|
4
3
|
from enum import Enum
|
|
5
4
|
from pathlib import Path
|
|
6
5
|
from typing import Any, Dict, Protocol
|
|
7
6
|
from uuid import uuid4
|
|
8
7
|
|
|
9
|
-
from kiln_ai.adapters.chat.chat_formatter import
|
|
10
|
-
ChatMessage,
|
|
11
|
-
get_chat_formatter,
|
|
12
|
-
)
|
|
8
|
+
from kiln_ai.adapters.chat.chat_formatter import ChatMessage, get_chat_formatter
|
|
13
9
|
from kiln_ai.datamodel import DatasetSplit, TaskRun
|
|
14
10
|
from kiln_ai.datamodel.datamodel_enums import THINKING_DATA_STRATEGIES, ChatStrategy
|
|
15
11
|
from kiln_ai.utils.exhaustive_error import raise_exhaustive_enum_error
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
from dataclasses import dataclass
|
|
2
3
|
from typing import List, Tuple
|
|
3
4
|
from uuid import uuid4
|
|
4
5
|
|
|
@@ -23,6 +24,12 @@ serverless_models = [
|
|
|
23
24
|
]
|
|
24
25
|
|
|
25
26
|
|
|
27
|
+
@dataclass
|
|
28
|
+
class DeployStatus:
|
|
29
|
+
success: bool
|
|
30
|
+
error_details: str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
26
33
|
class FireworksFinetune(BaseFinetuneAdapter):
|
|
27
34
|
"""
|
|
28
35
|
A fine-tuning adapter for Fireworks.
|
|
@@ -39,8 +46,9 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
39
46
|
# Deploy every time we check status. This can help resolve issues, Fireworks will undeploy unused models after a time.
|
|
40
47
|
if status.status == FineTuneStatusType.completed:
|
|
41
48
|
deployed = await self._deploy()
|
|
42
|
-
if not deployed:
|
|
49
|
+
if not deployed.success:
|
|
43
50
|
status.message = "Fine-tuning job completed but failed to deploy model."
|
|
51
|
+
status.error_details = deployed.error_details
|
|
44
52
|
|
|
45
53
|
return status
|
|
46
54
|
|
|
@@ -292,7 +300,7 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
292
300
|
}
|
|
293
301
|
return {k: v for k, v in payload.items() if v is not None}
|
|
294
302
|
|
|
295
|
-
async def _deploy(self) ->
|
|
303
|
+
async def _deploy(self) -> DeployStatus:
|
|
296
304
|
if self.datamodel.base_model_id in serverless_models:
|
|
297
305
|
return await self._deploy_serverless()
|
|
298
306
|
else:
|
|
@@ -321,7 +329,7 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
321
329
|
return None
|
|
322
330
|
return model_id
|
|
323
331
|
|
|
324
|
-
async def _deploy_serverless(self) ->
|
|
332
|
+
async def _deploy_serverless(self) -> DeployStatus:
|
|
325
333
|
# Now we "deploy" the model using PEFT serverless.
|
|
326
334
|
# A bit complicated: most fireworks deploys are server based.
|
|
327
335
|
# However, a Lora can be serverless (PEFT).
|
|
@@ -334,10 +342,11 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
334
342
|
url = f"https://api.fireworks.ai/v1/accounts/{account_id}/deployedModels"
|
|
335
343
|
model_id = await self.model_id_checking_status()
|
|
336
344
|
if not model_id:
|
|
337
|
-
|
|
345
|
+
error_details = (
|
|
338
346
|
"Model ID not found - can't deploy model to Fireworks serverless"
|
|
339
347
|
)
|
|
340
|
-
|
|
348
|
+
logger.error(error_details)
|
|
349
|
+
return DeployStatus(success=False, error_details=error_details)
|
|
341
350
|
|
|
342
351
|
payload = {
|
|
343
352
|
"displayName": self.deployment_display_name(),
|
|
@@ -357,14 +366,13 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
357
366
|
self.datamodel.fine_tune_model_id = model_id
|
|
358
367
|
if self.datamodel.path:
|
|
359
368
|
self.datamodel.save_to_file()
|
|
360
|
-
return True
|
|
369
|
+
return DeployStatus(success=True)
|
|
361
370
|
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
)
|
|
365
|
-
return False
|
|
371
|
+
error_msg = f"Failed to deploy model to Fireworks serverless: [{response.status_code}] {response.text}"
|
|
372
|
+
logger.error(error_msg)
|
|
373
|
+
return DeployStatus(success=False, error_details=error_msg)
|
|
366
374
|
|
|
367
|
-
async def _check_or_deploy_server(self) ->
|
|
375
|
+
async def _check_or_deploy_server(self) -> DeployStatus:
|
|
368
376
|
"""
|
|
369
377
|
Check if the model is already deployed. If not, deploy it to a dedicated server.
|
|
370
378
|
"""
|
|
@@ -380,19 +388,22 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
380
388
|
"READY",
|
|
381
389
|
"CREATING",
|
|
382
390
|
]:
|
|
383
|
-
return True
|
|
391
|
+
return DeployStatus(success=True)
|
|
384
392
|
|
|
385
393
|
# If the model is not deployed, deploy it
|
|
386
394
|
return await self._deploy_server()
|
|
387
395
|
|
|
388
|
-
async def _deploy_server(self) ->
|
|
396
|
+
async def _deploy_server(self) -> DeployStatus:
|
|
389
397
|
# For models that are not serverless, we just need to deploy the model to a server.
|
|
390
398
|
# We use a scale-to-zero on-demand deployment. If you stop using it, it
|
|
391
399
|
# will scale to zero and charges will stop.
|
|
392
400
|
model_id = await self.model_id_checking_status()
|
|
393
401
|
if not model_id:
|
|
394
|
-
|
|
395
|
-
|
|
402
|
+
error_details = (
|
|
403
|
+
"Model ID not found - can't deploy model to Fireworks server"
|
|
404
|
+
)
|
|
405
|
+
logger.error(error_details)
|
|
406
|
+
return DeployStatus(success=False, error_details=error_details)
|
|
396
407
|
|
|
397
408
|
api_key, account_id = self.api_key_and_account_id()
|
|
398
409
|
url = f"https://api.fireworks.ai/v1/accounts/{account_id}/deployments"
|
|
@@ -408,6 +419,8 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
408
419
|
# Scale to zero after 5 minutes of inactivity - this is the minimum allowed
|
|
409
420
|
"scaleToZeroWindow": "300s",
|
|
410
421
|
},
|
|
422
|
+
# H100s are much more reliable than default A100
|
|
423
|
+
"acceleratorType": "NVIDIA_H100_80GB",
|
|
411
424
|
"baseModel": model_id,
|
|
412
425
|
}
|
|
413
426
|
headers = {
|
|
@@ -424,12 +437,11 @@ class FireworksFinetune(BaseFinetuneAdapter):
|
|
|
424
437
|
self.datamodel.fine_tune_model_id = basemodel
|
|
425
438
|
if self.datamodel.path:
|
|
426
439
|
self.datamodel.save_to_file()
|
|
427
|
-
return True
|
|
440
|
+
return DeployStatus(success=True)
|
|
428
441
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
)
|
|
432
|
-
return False
|
|
442
|
+
error_msg = f"Failed to deploy model to Fireworks server: [{response.status_code}] {response.text}"
|
|
443
|
+
logger.error(error_msg)
|
|
444
|
+
return DeployStatus(success=False, error_details=error_msg)
|
|
433
445
|
|
|
434
446
|
async def _fetch_all_deployments(self) -> List[dict]:
|
|
435
447
|
"""
|
|
@@ -857,7 +857,7 @@ def test_serialize_r1_style_message_missing_thinking(thinking, final_output):
|
|
|
857
857
|
|
|
858
858
|
def test_vertex_gemini_role_map_coverage():
|
|
859
859
|
"""Test that VERTEX_GEMINI_ROLE_MAP covers all possible ChatMessage.role values"""
|
|
860
|
-
from typing import
|
|
860
|
+
from typing import get_type_hints
|
|
861
861
|
|
|
862
862
|
# Get the Literal type from ChatMessage.role
|
|
863
863
|
role_type = get_type_hints(ChatMessage)["role"]
|
|
@@ -10,7 +10,10 @@ from kiln_ai.adapters.fine_tune.base_finetune import (
|
|
|
10
10
|
FineTuneStatusType,
|
|
11
11
|
)
|
|
12
12
|
from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
|
|
13
|
-
from kiln_ai.adapters.fine_tune.fireworks_finetune import
|
|
13
|
+
from kiln_ai.adapters.fine_tune.fireworks_finetune import (
|
|
14
|
+
DeployStatus,
|
|
15
|
+
FireworksFinetune,
|
|
16
|
+
)
|
|
14
17
|
from kiln_ai.datamodel import (
|
|
15
18
|
DatasetSplit,
|
|
16
19
|
StructuredOutputMode,
|
|
@@ -175,7 +178,9 @@ async def test_status_job_states(
|
|
|
175
178
|
|
|
176
179
|
with (
|
|
177
180
|
patch("httpx.AsyncClient") as mock_client_class,
|
|
178
|
-
patch.object(
|
|
181
|
+
patch.object(
|
|
182
|
+
fireworks_finetune, "_deploy", return_value=DeployStatus(success=True)
|
|
183
|
+
),
|
|
179
184
|
):
|
|
180
185
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
181
186
|
status = await fireworks_finetune.status()
|
|
@@ -468,7 +473,7 @@ async def test_deploy_serverless_success(fireworks_finetune, mock_api_key):
|
|
|
468
473
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
469
474
|
|
|
470
475
|
result = await fireworks_finetune._deploy_serverless()
|
|
471
|
-
assert result is True
|
|
476
|
+
assert result.success is True
|
|
472
477
|
assert fireworks_finetune.datamodel.fine_tune_model_id == "ftm-123"
|
|
473
478
|
|
|
474
479
|
|
|
@@ -495,7 +500,7 @@ async def test_deploy_serverless_already_deployed(fireworks_finetune, mock_api_k
|
|
|
495
500
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
496
501
|
|
|
497
502
|
result = await fireworks_finetune._deploy_serverless()
|
|
498
|
-
assert result is True
|
|
503
|
+
assert result.success is True
|
|
499
504
|
assert fireworks_finetune.datamodel.fine_tune_model_id == "ftm-123"
|
|
500
505
|
|
|
501
506
|
|
|
@@ -511,7 +516,7 @@ async def test_deploy_serverless_failure(fireworks_finetune, mock_api_key):
|
|
|
511
516
|
mock_client_class.return_value.__aenter__.return_value = mock_client
|
|
512
517
|
|
|
513
518
|
result = await fireworks_finetune._deploy_serverless()
|
|
514
|
-
assert result is False
|
|
519
|
+
assert result.success is False
|
|
515
520
|
|
|
516
521
|
|
|
517
522
|
async def test_deploy_serverless_missing_credentials(fireworks_finetune):
|
|
@@ -531,7 +536,7 @@ async def test_deploy_server_missing_credentials(fireworks_finetune):
|
|
|
531
536
|
mock_config.return_value.fireworks_account_id = None
|
|
532
537
|
|
|
533
538
|
response = await fireworks_finetune._check_or_deploy_server()
|
|
534
|
-
assert response is False
|
|
539
|
+
assert response.success is False
|
|
535
540
|
|
|
536
541
|
|
|
537
542
|
async def test_deploy_missing_model_id(fireworks_finetune, mock_api_key):
|
|
@@ -546,7 +551,7 @@ async def test_deploy_missing_model_id(fireworks_finetune, mock_api_key):
|
|
|
546
551
|
patch.object(fireworks_finetune, "_status", return_value=status_response),
|
|
547
552
|
):
|
|
548
553
|
response = await fireworks_finetune._deploy()
|
|
549
|
-
assert response is False
|
|
554
|
+
assert response.success is False
|
|
550
555
|
|
|
551
556
|
|
|
552
557
|
async def test_status_with_deploy(fireworks_finetune, mock_api_key):
|
|
@@ -561,7 +566,9 @@ async def test_status_with_deploy(fireworks_finetune, mock_api_key):
|
|
|
561
566
|
patch.object(
|
|
562
567
|
fireworks_finetune, "_status", return_value=status_response
|
|
563
568
|
) as mock_status,
|
|
564
|
-
patch.object(
|
|
569
|
+
patch.object(
|
|
570
|
+
fireworks_finetune, "_deploy", return_value=DeployStatus(success=False)
|
|
571
|
+
) as mock_deploy,
|
|
565
572
|
):
|
|
566
573
|
status = await fireworks_finetune.status()
|
|
567
574
|
|
|
@@ -810,11 +817,6 @@ async def test_deploy_server_success(fireworks_finetune, mock_api_key):
|
|
|
810
817
|
success_response.status_code = 200
|
|
811
818
|
success_response.json.return_value = {"baseModel": "model-123"}
|
|
812
819
|
|
|
813
|
-
status_response = (
|
|
814
|
-
FineTuneStatus(status=FineTuneStatusType.completed, message=""),
|
|
815
|
-
"model-123",
|
|
816
|
-
)
|
|
817
|
-
|
|
818
820
|
with (
|
|
819
821
|
patch("httpx.AsyncClient") as mock_client_class,
|
|
820
822
|
patch.object(
|
|
@@ -828,7 +830,7 @@ async def test_deploy_server_success(fireworks_finetune, mock_api_key):
|
|
|
828
830
|
result = await fireworks_finetune._deploy_server()
|
|
829
831
|
|
|
830
832
|
# Verify result
|
|
831
|
-
assert result is True
|
|
833
|
+
assert result.success is True
|
|
832
834
|
|
|
833
835
|
# Verify fine_tune_model_id was updated
|
|
834
836
|
assert fireworks_finetune.datamodel.fine_tune_model_id == "model-123"
|
|
@@ -868,7 +870,11 @@ async def test_deploy_server_failure(fireworks_finetune, mock_api_key):
|
|
|
868
870
|
result = await fireworks_finetune._deploy_server()
|
|
869
871
|
|
|
870
872
|
# Verify result
|
|
871
|
-
assert result is False
|
|
873
|
+
assert result.success is False
|
|
874
|
+
assert (
|
|
875
|
+
"Failed to deploy model to Fireworks server: [500] Internal Server Error"
|
|
876
|
+
in result.error_details
|
|
877
|
+
)
|
|
872
878
|
|
|
873
879
|
# Verify API was called
|
|
874
880
|
mock_client.post.assert_called_once()
|
|
@@ -895,7 +901,8 @@ async def test_deploy_server_non_200_but_valid_response(
|
|
|
895
901
|
result = await fireworks_finetune._deploy_server()
|
|
896
902
|
|
|
897
903
|
# Verify result - should fail because baseModel is missing
|
|
898
|
-
assert result is False
|
|
904
|
+
assert result.success is False
|
|
905
|
+
assert "Failed to deploy model to Fireworks server:" in result.error_details
|
|
899
906
|
|
|
900
907
|
|
|
901
908
|
async def test_deploy_server_missing_model_id(fireworks_finetune, mock_api_key):
|
|
@@ -906,7 +913,7 @@ async def test_deploy_server_missing_model_id(fireworks_finetune, mock_api_key):
|
|
|
906
913
|
result = await fireworks_finetune._deploy_server()
|
|
907
914
|
|
|
908
915
|
# Verify result - should fail because model ID is missing
|
|
909
|
-
assert result is False
|
|
916
|
+
assert result.success is False
|
|
910
917
|
|
|
911
918
|
|
|
912
919
|
@pytest.mark.parametrize(
|
|
@@ -937,10 +944,10 @@ async def test_check_or_deploy_server_already_deployed(
|
|
|
937
944
|
) as mock_fetch,
|
|
938
945
|
patch.object(fireworks_finetune, "_deploy_server") as mock_deploy,
|
|
939
946
|
):
|
|
940
|
-
mock_deploy.return_value = True
|
|
947
|
+
mock_deploy.return_value = DeployStatus(success=True)
|
|
941
948
|
result = await fireworks_finetune._check_or_deploy_server()
|
|
942
949
|
# Even true if the model is in a non-ready state, as we'll call deploy (checked below)
|
|
943
|
-
assert result is True
|
|
950
|
+
assert result.success is True
|
|
944
951
|
|
|
945
952
|
if expected_already_deployed:
|
|
946
953
|
assert mock_deploy.call_count == 0
|
|
@@ -968,13 +975,15 @@ async def test_check_or_deploy_server_not_deployed(fireworks_finetune, mock_api_
|
|
|
968
975
|
fireworks_finetune, "_fetch_all_deployments", return_value=mock_deployments
|
|
969
976
|
) as mock_fetch,
|
|
970
977
|
patch.object(
|
|
971
|
-
fireworks_finetune,
|
|
978
|
+
fireworks_finetune,
|
|
979
|
+
"_deploy_server",
|
|
980
|
+
return_value=DeployStatus(success=True),
|
|
972
981
|
) as mock_deploy,
|
|
973
982
|
):
|
|
974
983
|
result = await fireworks_finetune._check_or_deploy_server()
|
|
975
984
|
|
|
976
985
|
# Verify method returned True (from _deploy_server)
|
|
977
|
-
assert result is True
|
|
986
|
+
assert result.success is True
|
|
978
987
|
|
|
979
988
|
# Verify _fetch_all_deployments was called
|
|
980
989
|
mock_fetch.assert_called_once()
|
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import time
|
|
2
1
|
from pathlib import Path
|
|
3
|
-
from unittest.mock import
|
|
2
|
+
from unittest.mock import MagicMock, patch
|
|
4
3
|
|
|
5
4
|
import pytest
|
|
6
5
|
from google.cloud import storage
|
|
@@ -10,11 +9,7 @@ from vertexai.tuning import sft
|
|
|
10
9
|
from kiln_ai.adapters.fine_tune.base_finetune import FineTuneStatusType
|
|
11
10
|
from kiln_ai.adapters.fine_tune.dataset_formatter import DatasetFormat, DatasetFormatter
|
|
12
11
|
from kiln_ai.adapters.fine_tune.vertex_finetune import VertexFinetune
|
|
13
|
-
from kiln_ai.datamodel import
|
|
14
|
-
DatasetSplit,
|
|
15
|
-
StructuredOutputMode,
|
|
16
|
-
Task,
|
|
17
|
-
)
|
|
12
|
+
from kiln_ai.datamodel import DatasetSplit, StructuredOutputMode, Task
|
|
18
13
|
from kiln_ai.datamodel import Finetune as FinetuneModel
|
|
19
14
|
from kiln_ai.datamodel.datamodel_enums import ChatStrategy
|
|
20
15
|
from kiln_ai.datamodel.dataset_split import Train80Test20SplitDefinition
|