kiln-ai 0.21.0__py3-none-any.whl → 0.22.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

Files changed (53) hide show
  1. kiln_ai/adapters/extractors/litellm_extractor.py +52 -32
  2. kiln_ai/adapters/extractors/test_litellm_extractor.py +169 -71
  3. kiln_ai/adapters/ml_embedding_model_list.py +330 -28
  4. kiln_ai/adapters/ml_model_list.py +503 -23
  5. kiln_ai/adapters/model_adapters/litellm_adapter.py +39 -8
  6. kiln_ai/adapters/model_adapters/test_litellm_adapter.py +78 -0
  7. kiln_ai/adapters/model_adapters/test_litellm_adapter_tools.py +119 -5
  8. kiln_ai/adapters/model_adapters/test_saving_adapter_results.py +9 -3
  9. kiln_ai/adapters/model_adapters/test_structured_output.py +6 -9
  10. kiln_ai/adapters/test_ml_embedding_model_list.py +89 -279
  11. kiln_ai/adapters/test_ml_model_list.py +0 -10
  12. kiln_ai/adapters/vector_store/lancedb_adapter.py +24 -70
  13. kiln_ai/adapters/vector_store/lancedb_helpers.py +101 -0
  14. kiln_ai/adapters/vector_store/test_lancedb_adapter.py +9 -16
  15. kiln_ai/adapters/vector_store/test_lancedb_helpers.py +142 -0
  16. kiln_ai/adapters/vector_store_loaders/__init__.py +0 -0
  17. kiln_ai/adapters/vector_store_loaders/test_lancedb_loader.py +282 -0
  18. kiln_ai/adapters/vector_store_loaders/test_vector_store_loader.py +544 -0
  19. kiln_ai/adapters/vector_store_loaders/vector_store_loader.py +91 -0
  20. kiln_ai/datamodel/basemodel.py +31 -3
  21. kiln_ai/datamodel/external_tool_server.py +206 -54
  22. kiln_ai/datamodel/extraction.py +14 -0
  23. kiln_ai/datamodel/task.py +5 -0
  24. kiln_ai/datamodel/task_output.py +41 -11
  25. kiln_ai/datamodel/test_attachment.py +3 -3
  26. kiln_ai/datamodel/test_basemodel.py +269 -13
  27. kiln_ai/datamodel/test_datasource.py +50 -0
  28. kiln_ai/datamodel/test_external_tool_server.py +534 -152
  29. kiln_ai/datamodel/test_extraction_model.py +31 -0
  30. kiln_ai/datamodel/test_task.py +35 -1
  31. kiln_ai/datamodel/test_tool_id.py +106 -1
  32. kiln_ai/datamodel/tool_id.py +49 -0
  33. kiln_ai/tools/base_tool.py +30 -6
  34. kiln_ai/tools/built_in_tools/math_tools.py +12 -4
  35. kiln_ai/tools/kiln_task_tool.py +162 -0
  36. kiln_ai/tools/mcp_server_tool.py +7 -5
  37. kiln_ai/tools/mcp_session_manager.py +50 -24
  38. kiln_ai/tools/rag_tools.py +17 -6
  39. kiln_ai/tools/test_kiln_task_tool.py +527 -0
  40. kiln_ai/tools/test_mcp_server_tool.py +4 -15
  41. kiln_ai/tools/test_mcp_session_manager.py +186 -226
  42. kiln_ai/tools/test_rag_tools.py +86 -5
  43. kiln_ai/tools/test_tool_registry.py +199 -5
  44. kiln_ai/tools/tool_registry.py +49 -17
  45. kiln_ai/utils/filesystem.py +4 -4
  46. kiln_ai/utils/open_ai_types.py +19 -2
  47. kiln_ai/utils/pdf_utils.py +21 -0
  48. kiln_ai/utils/test_open_ai_types.py +88 -12
  49. kiln_ai/utils/test_pdf_utils.py +14 -1
  50. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/METADATA +79 -1
  51. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/RECORD +53 -45
  52. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/WHEEL +0 -0
  53. {kiln_ai-0.21.0.dist-info → kiln_ai-0.22.1.dist-info}/licenses/LICENSE.txt +0 -0
@@ -1,5 +1,7 @@
1
1
  import datetime
2
2
  import json
3
+ import logging
4
+ import time
3
5
  import uuid
4
6
  from pathlib import Path
5
7
  from typing import Optional
@@ -342,12 +344,12 @@ def test_delete_no_path():
342
344
  ("Hello 👍", "Hello 👍"),
343
345
  # Invalid characters are replaced
344
346
  ("Hello@World!", "Hello@World!"),
345
- ("File.name.txt", "File_name_txt"),
346
- ("Special%%%Chars", "Special_Chars"),
347
- ("Special#$%Chars", "Special#$_Chars"),
347
+ ("File.name.txt", "File name txt"),
348
+ ("Special%%%Chars", "Special Chars"),
349
+ ("Special#$%Chars", "Special#$ Chars"),
348
350
  # Consecutive invalid characters are replaced
349
- ("Special%%%Chars", "Special_Chars"),
350
- ("path/to/file", "path_to_file"),
351
+ ("Special%%%Chars", "Special Chars"),
352
+ ("path/to/file", "path to file"),
351
353
  # Leading/trailing special characters are removed
352
354
  ("__test__", "test"),
353
355
  ("...test...", "test"),
@@ -360,14 +362,14 @@ def test_delete_no_path():
360
362
  ("你好_世界", "你好_世界"),
361
363
  ("你好_世界_你好", "你好_世界_你好"),
362
364
  # Newlines, tabs, and other control characters are replaced
363
- ("Hello\nworld", "Hello_world"),
364
- ("Hello\tworld", "Hello_world"),
365
- ("Hello\rworld", "Hello_world"),
366
- ("Hello\fworld", "Hello_world"),
367
- ("Hello\bworld", "Hello_world"),
368
- ("Hello\vworld", "Hello_world"),
369
- ("Hello\0world", "Hello_world"),
370
- ("Hello\x00world", "Hello_world"),
365
+ ("Hello\nworld", "Hello world"),
366
+ ("Hello\tworld", "Hello world"),
367
+ ("Hello\rworld", "Hello world"),
368
+ ("Hello\fworld", "Hello world"),
369
+ ("Hello\bworld", "Hello world"),
370
+ ("Hello\vworld", "Hello world"),
371
+ ("Hello\0world", "Hello world"),
372
+ ("Hello\x00world", "Hello world"),
371
373
  ],
372
374
  )
373
375
  def test_string_to_valid_name(tmp_path, name, expected):
@@ -491,6 +493,7 @@ def test_from_id_and_parent_path(test_base_parented_file, tmp_model_cache):
491
493
  child3.save_to_file()
492
494
 
493
495
  # Test finding existing child by ID
496
+ assert child2.id is not None # Type safety
494
497
  found_child = DefaultParentedModel.from_id_and_parent_path(
495
498
  child2.id, test_base_parented_file
496
499
  )
@@ -513,6 +516,7 @@ def test_from_id_and_parent_path_with_cache(test_base_parented_file, tmp_model_c
513
516
  child.save_to_file()
514
517
 
515
518
  # First load to populate cache
519
+ assert child.id is not None # Type safety
516
520
  _ = DefaultParentedModel.from_id_and_parent_path(child.id, test_base_parented_file)
517
521
 
518
522
  # Mock cache to verify it's used
@@ -534,6 +538,258 @@ def test_from_id_and_parent_path_without_parent():
534
538
  assert not_found is None
535
539
 
536
540
 
541
+ def test_from_ids_and_parent_path_basic(test_base_parented_file, tmp_model_cache):
542
+ """Test basic functionality of from_ids_and_parent_path method"""
543
+ # Set up parent and children models
544
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
545
+
546
+ child1 = DefaultParentedModel(parent=parent, name="Child1")
547
+ child2 = DefaultParentedModel(parent=parent, name="Child2")
548
+ child3 = DefaultParentedModel(parent=parent, name="Child3")
549
+
550
+ # Save all children
551
+ child1.save_to_file()
552
+ child2.save_to_file()
553
+ child3.save_to_file()
554
+
555
+ # Test finding multiple children by IDs
556
+ assert child1.id is not None and child2.id is not None # Type safety
557
+ target_ids = {child1.id, child3.id}
558
+ found_children = DefaultParentedModel.from_ids_and_parent_path(
559
+ target_ids, test_base_parented_file
560
+ )
561
+
562
+ # Verify correct children were found
563
+ assert len(found_children) == 2
564
+ assert child1.id in found_children
565
+ assert child3.id in found_children
566
+ assert child2.id not in found_children
567
+
568
+ # Verify the returned models have correct data
569
+ assert found_children[child1.id].name == "Child1"
570
+ assert found_children[child3.id].name == "Child3"
571
+
572
+ # Verify they are not the same instances (deep copies)
573
+ assert found_children[child1.id] is not child1
574
+ assert found_children[child3.id] is not child3
575
+
576
+
577
+ def test_from_ids_and_parent_path_empty_list(test_base_parented_file):
578
+ """Test from_ids_and_parent_path with empty ID list"""
579
+ found_children = DefaultParentedModel.from_ids_and_parent_path(
580
+ set(), test_base_parented_file
581
+ )
582
+ assert found_children == {}
583
+
584
+
585
+ def test_from_ids_and_parent_path_none_parent():
586
+ """Test from_ids_and_parent_path with None parent_path"""
587
+ found_children = DefaultParentedModel.from_ids_and_parent_path({"any-id"}, None)
588
+ assert found_children == {}
589
+
590
+
591
+ def test_from_ids_and_parent_path_no_matches(test_base_parented_file, tmp_model_cache):
592
+ """Test from_ids_and_parent_path when no IDs match existing children"""
593
+ # Set up parent and children models
594
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
595
+
596
+ child1 = DefaultParentedModel(parent=parent, name="Child1")
597
+ child1.save_to_file()
598
+
599
+ # Test with non-existent IDs
600
+ found_children = DefaultParentedModel.from_ids_and_parent_path(
601
+ {"nonexistent1", "nonexistent2"}, test_base_parented_file
602
+ )
603
+ assert found_children == {}
604
+
605
+
606
+ def test_from_ids_and_parent_path_partial_matches(
607
+ test_base_parented_file, tmp_model_cache
608
+ ):
609
+ """Test from_ids_and_parent_path when only some IDs match"""
610
+ # Set up parent and children models
611
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
612
+
613
+ child1 = DefaultParentedModel(parent=parent, name="Child1")
614
+ child2 = DefaultParentedModel(parent=parent, name="Child2")
615
+
616
+ # Save children
617
+ child1.save_to_file()
618
+ child2.save_to_file()
619
+
620
+ # Test with mix of existing and non-existent IDs
621
+ assert child1.id is not None and child2.id is not None # Type safety
622
+ target_ids = {child1.id, "nonexistent", child2.id, "another_nonexistent"}
623
+ found_children = DefaultParentedModel.from_ids_and_parent_path(
624
+ target_ids, test_base_parented_file
625
+ )
626
+
627
+ # Should only find the existing children
628
+ assert len(found_children) == 2
629
+ assert child1.id in found_children
630
+ assert child2.id in found_children
631
+ assert "nonexistent" not in found_children
632
+ assert "another_nonexistent" not in found_children
633
+
634
+
635
+ def test_from_ids_and_parent_path_with_cache_fallback(
636
+ test_base_parented_file, tmp_model_cache
637
+ ):
638
+ """Test from_ids_and_parent_path when cache returns None and needs to load file"""
639
+ # Set up parent and child
640
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
641
+ child = DefaultParentedModel(parent=parent, name="Child")
642
+ child.save_to_file()
643
+
644
+ # Mock cache to return None for get_model_id, forcing file load
645
+ tmp_model_cache.get_model_id = MagicMock(return_value=None)
646
+
647
+ # Test should still work by loading the file
648
+ assert child.id is not None # Type safety
649
+ found_children = DefaultParentedModel.from_ids_and_parent_path(
650
+ {child.id}, test_base_parented_file
651
+ )
652
+
653
+ assert len(found_children) == 1
654
+ assert child.id in found_children
655
+ assert found_children[child.id].name == "Child"
656
+
657
+ # Verify cache was checked
658
+ tmp_model_cache.get_model_id.assert_called()
659
+
660
+
661
+ def test_from_ids_and_parent_path_equivalent_to_individual_lookups(
662
+ test_base_parented_file, tmp_model_cache
663
+ ):
664
+ """Test that from_ids_and_parent_path returns the same results as individual lookups"""
665
+ # Set up parent and multiple children
666
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
667
+
668
+ children = []
669
+ for i in range(10):
670
+ child = DefaultParentedModel(parent=parent, name=f"Child{i}")
671
+ child.save_to_file()
672
+ children.append(child)
673
+
674
+ # Select 5 children to lookup
675
+ target_ids = {
676
+ child.id for child in children[::2] if child.id is not None
677
+ } # Every other child
678
+ assert len(target_ids) == 5 # Ensure we have 5 children to test
679
+
680
+ # Test bulk method
681
+ bulk_results = DefaultParentedModel.from_ids_and_parent_path(
682
+ target_ids, test_base_parented_file
683
+ )
684
+
685
+ # Test individual method
686
+ individual_results = {}
687
+ for target_id in target_ids:
688
+ result = DefaultParentedModel.from_id_and_parent_path(
689
+ target_id, test_base_parented_file
690
+ )
691
+ if result:
692
+ individual_results[target_id] = result
693
+
694
+ # Results should be equivalent
695
+ assert len(bulk_results) == len(individual_results) == 5
696
+
697
+ for target_id in target_ids:
698
+ assert target_id in bulk_results
699
+ assert target_id in individual_results
700
+
701
+ # Compare the key attributes
702
+ bulk_child = bulk_results[target_id]
703
+ individual_child = individual_results[target_id]
704
+
705
+ assert bulk_child.id == individual_child.id
706
+ assert bulk_child.name == individual_child.name
707
+ assert bulk_child.model_type == individual_child.model_type
708
+
709
+
710
+ # Not actually paid, but we want the "must be run manually" feature of the paid marker as this is very slow
711
+ @pytest.mark.paid
712
+ @pytest.mark.parametrize("num_children", [100, 1000, 2500, 5000])
713
+ def test_from_ids_and_parent_path_benchmark(
714
+ test_base_parented_file, tmp_model_cache, num_children
715
+ ):
716
+ """Benchmark test for from_ids_and_parent_path method performance at scale"""
717
+ # Set up parent and many children
718
+ parent = BaseParentExample.load_from_file(test_base_parented_file)
719
+
720
+ children = []
721
+ for i in range(num_children):
722
+ child = DefaultParentedModel(parent=parent, name=f"Child{i:05d}")
723
+ child.save_to_file()
724
+ children.append(child)
725
+
726
+ # look up all children
727
+ lookup_count = num_children
728
+ target_ids = {child.id for child in children[:lookup_count] if child.id is not None}
729
+ assert len(target_ids) == lookup_count
730
+
731
+ # Benchmark the bulk method using manual timing
732
+ def bulk_lookup():
733
+ return DefaultParentedModel.from_ids_and_parent_path(
734
+ target_ids, test_base_parented_file
735
+ )
736
+
737
+ # Run bulk method once and time it
738
+ start_time = time.perf_counter()
739
+ bulk_result = bulk_lookup()
740
+ end_time = time.perf_counter()
741
+ bulk_time = end_time - start_time
742
+
743
+ # Verify we got the expected results
744
+ assert len(bulk_result) == lookup_count
745
+
746
+ # Calculate bulk method stats
747
+ bulk_ops_per_second = lookup_count / bulk_time
748
+
749
+ # Benchmark the individual lookup method using manual timing
750
+ def individual_lookups():
751
+ results = {}
752
+ for target_id in target_ids:
753
+ result = DefaultParentedModel.from_id_and_parent_path(
754
+ target_id, test_base_parented_file
755
+ )
756
+ if result:
757
+ results[target_id] = result
758
+ return results
759
+
760
+ # Run individual lookup method
761
+ start_time = time.perf_counter()
762
+ individual_result = individual_lookups()
763
+ end_time = time.perf_counter()
764
+ individual_time = end_time - start_time
765
+
766
+ assert len(individual_result) == lookup_count
767
+ individual_ops_per_second = lookup_count / individual_time
768
+
769
+ # Calculate performance comparison
770
+ speedup = individual_time / bulk_time
771
+ time_savings_pct = (individual_time - bulk_time) / individual_time * 100
772
+
773
+ # Use logging to display results (will show with -s flag or --log-cli-level=INFO)
774
+ logger = logging.getLogger(__name__)
775
+ logger.info(
776
+ f"Benchmark results for {num_children} children, {lookup_count} lookups:"
777
+ )
778
+ logger.info(f" Bulk method: {bulk_time:.4f}s ({bulk_ops_per_second:.2f} ops/sec)")
779
+ logger.info(
780
+ f" Individual method: {individual_time:.4f}s ({individual_ops_per_second:.2f} ops/sec)"
781
+ )
782
+ logger.info(
783
+ f" Speedup: {speedup:.2f}x faster, {time_savings_pct:.1f}% time savings"
784
+ )
785
+
786
+ assert bulk_time > 0, "Bulk method should complete successfully"
787
+ assert individual_time > 0, "Individual method should complete successfully"
788
+ assert speedup >= 1.0, (
789
+ f"Expected bulk method to be faster, but got {speedup:.2f}x speedup"
790
+ )
791
+
792
+
537
793
  class MockAdapter(BaseAdapter):
538
794
  """Implementation of BaseAdapter for testing"""
539
795
 
@@ -38,6 +38,15 @@ def test_valid_file_import_data_source():
38
38
  assert data_source.properties["file_name"] == "test.txt"
39
39
 
40
40
 
41
+ def test_empty_valid_tool_call_data_source():
42
+ data_source = DataSource(
43
+ type=DataSourceType.tool_call,
44
+ properties={},
45
+ )
46
+ assert data_source.type == DataSourceType.tool_call
47
+ assert data_source.properties == {}
48
+
49
+
41
50
  def test_missing_required_property():
42
51
  with pytest.raises(ValidationError, match="'created_by' is required for"):
43
52
  DataSource(type=DataSourceType.human)
@@ -79,6 +88,35 @@ def test_not_allowed_property():
79
88
  )
80
89
 
81
90
 
91
+ def test_not_allowed_property_tool_call():
92
+ with pytest.raises(
93
+ ValidationError,
94
+ match="'created_by' is not allowed for",
95
+ ):
96
+ DataSource(
97
+ type=DataSourceType.tool_call,
98
+ properties={
99
+ "model_name": "GPT-4",
100
+ "model_provider": "OpenAI",
101
+ "adapter_name": "langchain",
102
+ "created_by": "John Doe",
103
+ },
104
+ )
105
+
106
+
107
+ def test_not_allowed_file_name_tool_call():
108
+ with pytest.raises(
109
+ ValidationError,
110
+ match="'file_name' is not allowed for",
111
+ ):
112
+ DataSource(
113
+ type=DataSourceType.tool_call,
114
+ properties={
115
+ "file_name": "test.txt",
116
+ },
117
+ )
118
+
119
+
82
120
  def test_extra_properties():
83
121
  data_source = DataSource(
84
122
  type=DataSourceType.synthetic,
@@ -94,6 +132,18 @@ def test_extra_properties():
94
132
  assert data_source.properties["max_tokens"] == 100
95
133
 
96
134
 
135
+ def test_extra_properties_tool_call():
136
+ data_source = DataSource(
137
+ type=DataSourceType.tool_call,
138
+ properties={
139
+ "temperature": 0.7,
140
+ "max_tokens": 100,
141
+ },
142
+ )
143
+ assert data_source.properties["temperature"] == 0.7
144
+ assert data_source.properties["max_tokens"] == 100
145
+
146
+
97
147
  def test_prompt_type_optional_for_synthetic():
98
148
  data_source = DataSource(
99
149
  type=DataSourceType.synthetic,