kiln-ai 0.7.1__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of kiln-ai might be problematic. Click here for more details.

@@ -9,6 +9,7 @@ def adapter_for_task(
9
9
  model_name: str | None = None,
10
10
  provider: str | None = None,
11
11
  prompt_builder: BasePromptBuilder | None = None,
12
+ tags: list[str] | None = None,
12
13
  ) -> BaseAdapter:
13
14
  # We use langchain for everything right now, but can add any others here
14
15
  return LangchainAdapter(
@@ -16,4 +17,5 @@ def adapter_for_task(
16
17
  model_name=model_name,
17
18
  provider=provider,
18
19
  prompt_builder=prompt_builder,
20
+ tags=tags,
19
21
  )
@@ -45,12 +45,16 @@ class BaseAdapter(metaclass=ABCMeta):
45
45
  """
46
46
 
47
47
  def __init__(
48
- self, kiln_task: Task, prompt_builder: BasePromptBuilder | None = None
48
+ self,
49
+ kiln_task: Task,
50
+ prompt_builder: BasePromptBuilder | None = None,
51
+ tags: list[str] | None = None,
49
52
  ):
50
53
  self.prompt_builder = prompt_builder or SimplePromptBuilder(kiln_task)
51
54
  self.kiln_task = kiln_task
52
55
  self.output_schema = self.kiln_task.output_json_schema
53
56
  self.input_schema = self.kiln_task.input_json_schema
57
+ self.default_tags = tags
54
58
 
55
59
  async def invoke_returning_raw(
56
60
  self,
@@ -148,6 +152,7 @@ class BaseAdapter(metaclass=ABCMeta):
148
152
  ),
149
153
  ),
150
154
  intermediate_outputs=run_output.intermediate_outputs,
155
+ tags=self.default_tags or [],
151
156
  )
152
157
 
153
158
  exclude_fields = {
@@ -39,8 +39,9 @@ class LangchainAdapter(BaseAdapter):
39
39
  model_name: str | None = None,
40
40
  provider: str | None = None,
41
41
  prompt_builder: BasePromptBuilder | None = None,
42
+ tags: list[str] | None = None,
42
43
  ):
43
- super().__init__(kiln_task, prompt_builder=prompt_builder)
44
+ super().__init__(kiln_task, prompt_builder=prompt_builder, tags=tags)
44
45
  if custom_model is not None:
45
46
  self._model = custom_model
46
47
 
@@ -198,6 +199,9 @@ async def langchain_model_from_provider(
198
199
  if provider.name == ModelProviderName.openai:
199
200
  api_key = Config.shared().open_ai_api_key
200
201
  return ChatOpenAI(**provider.provider_options, openai_api_key=api_key) # type: ignore[arg-type]
202
+ elif provider.name == ModelProviderName.openai_compatible:
203
+ # See provider_tools.py for how base_url, key and other parameters are set
204
+ return ChatOpenAI(**provider.provider_options) # type: ignore[arg-type]
201
205
  elif provider.name == ModelProviderName.groq:
202
206
  api_key = Config.shared().groq_api_key
203
207
  if api_key is None:
@@ -23,6 +23,7 @@ class ModelProviderName(str, Enum):
23
23
  fireworks_ai = "fireworks_ai"
24
24
  kiln_fine_tune = "kiln_fine_tune"
25
25
  kiln_custom_registry = "kiln_custom_registry"
26
+ openai_compatible = "openai_compatible"
26
27
 
27
28
 
28
29
  class ModelFamily(str, Enum):
@@ -522,6 +523,12 @@ built_in_models: List[KilnModel] = [
522
523
  }
523
524
  },
524
525
  ),
526
+ KilnModelProvider(
527
+ name=ModelProviderName.groq,
528
+ supports_structured_output=True,
529
+ supports_data_gen=True,
530
+ provider_options={"model": "llama-3.3-70b-versatile"},
531
+ ),
525
532
  KilnModelProvider(
526
533
  name=ModelProviderName.ollama,
527
534
  provider_options={"model": "llama3.3"},
@@ -530,6 +537,8 @@ built_in_models: List[KilnModel] = [
530
537
  name=ModelProviderName.fireworks_ai,
531
538
  # Finetuning not live yet
532
539
  # provider_finetune_id="accounts/fireworks/models/llama-v3p3-70b-instruct",
540
+ supports_structured_output=True,
541
+ supports_data_gen=True,
533
542
  provider_options={
534
543
  "model": "accounts/fireworks/models/llama-v3p3-70b-instruct"
535
544
  },
@@ -108,6 +108,9 @@ async def kiln_model_provider_from(
108
108
  if provider_name == ModelProviderName.kiln_fine_tune:
109
109
  return finetune_provider_model(name)
110
110
 
111
+ if provider_name == ModelProviderName.openai_compatible:
112
+ return openai_compatible_provider_model(name)
113
+
111
114
  built_in_model = await builtin_model_from(name, provider_name)
112
115
  if built_in_model:
113
116
  return built_in_model
@@ -136,6 +139,45 @@ async def kiln_model_provider_from(
136
139
  finetune_cache: dict[str, KilnModelProvider] = {}
137
140
 
138
141
 
142
+ def openai_compatible_provider_model(
143
+ model_id: str,
144
+ ) -> KilnModelProvider:
145
+ try:
146
+ openai_provider_name, model_id = model_id.split("::")
147
+ except Exception:
148
+ raise ValueError(f"Invalid openai compatible model ID: {model_id}")
149
+
150
+ openai_compatible_providers = Config.shared().openai_compatible_providers or []
151
+ provider = next(
152
+ filter(
153
+ lambda p: p.get("name") == openai_provider_name, openai_compatible_providers
154
+ ),
155
+ None,
156
+ )
157
+ if provider is None:
158
+ raise ValueError(f"OpenAI compatible provider {openai_provider_name} not found")
159
+
160
+ # API key optional some providers don't use it
161
+ api_key = provider.get("api_key")
162
+ base_url = provider.get("base_url")
163
+ if base_url is None:
164
+ raise ValueError(
165
+ f"OpenAI compatible provider {openai_provider_name} has no base URL"
166
+ )
167
+
168
+ return KilnModelProvider(
169
+ name=ModelProviderName.openai_compatible,
170
+ provider_options={
171
+ "model": model_id,
172
+ "api_key": api_key,
173
+ "openai_api_base": base_url,
174
+ },
175
+ supports_structured_output=False,
176
+ supports_data_gen=False,
177
+ untested_model=True,
178
+ )
179
+
180
+
139
181
  def finetune_provider_model(
140
182
  model_id: str,
141
183
  ) -> KilnModelProvider:
@@ -228,6 +270,8 @@ def provider_name_from_id(id: str) -> str:
228
270
  return "Fireworks AI"
229
271
  case ModelProviderName.kiln_custom_registry:
230
272
  return "Custom Models"
273
+ case ModelProviderName.openai_compatible:
274
+ return "OpenAI Compatible"
231
275
  case _:
232
276
  # triggers pyright warning if I miss a case
233
277
  raise_exhaustive_error(enum_id)
@@ -266,6 +310,10 @@ def provider_options_for_custom_model(
266
310
  raise ValueError(
267
311
  "Fine tuned models should populate provider options via another path"
268
312
  )
313
+ case ModelProviderName.openai_compatible:
314
+ raise ValueError(
315
+ "OpenAI compatible models should populate provider options via another path"
316
+ )
269
317
  case _:
270
318
  # triggers pyright warning if I miss a case
271
319
  raise_exhaustive_error(enum_id)
@@ -15,6 +15,7 @@ from kiln_ai.adapters.provider_tools import (
15
15
  finetune_provider_model,
16
16
  get_model_and_provider,
17
17
  kiln_model_provider_from,
18
+ openai_compatible_provider_model,
18
19
  provider_enabled,
19
20
  provider_name_from_id,
20
21
  provider_options_for_custom_model,
@@ -64,6 +65,25 @@ def mock_finetune():
64
65
  yield mock
65
66
 
66
67
 
68
+ @pytest.fixture
69
+ def mock_shared_config():
70
+ with patch("kiln_ai.adapters.provider_tools.Config.shared") as mock:
71
+ config = Mock()
72
+ config.openai_compatible_providers = [
73
+ {
74
+ "name": "test_provider",
75
+ "base_url": "https://api.test.com",
76
+ "api_key": "test-key",
77
+ },
78
+ {
79
+ "name": "no_key_provider",
80
+ "base_url": "https://api.nokey.com",
81
+ },
82
+ ]
83
+ mock.return_value = config
84
+ yield mock
85
+
86
+
67
87
  def test_check_provider_warnings_no_warning(mock_config):
68
88
  mock_config.return_value = "some_value"
69
89
 
@@ -529,3 +549,78 @@ def test_finetune_provider_model_fireworks_provider(
529
549
  assert provider.adapter_options == {
530
550
  "langchain": {"with_structured_output_options": {"method": "json_mode"}}
531
551
  }
552
+
553
+
554
+ def test_openai_compatible_provider_model_success(mock_shared_config):
555
+ """Test successful creation of an OpenAI compatible provider"""
556
+ model_id = "test_provider::gpt-4"
557
+
558
+ provider = openai_compatible_provider_model(model_id)
559
+
560
+ assert provider.name == ModelProviderName.openai_compatible
561
+ assert provider.provider_options == {
562
+ "model": "gpt-4",
563
+ "api_key": "test-key",
564
+ "openai_api_base": "https://api.test.com",
565
+ }
566
+ assert provider.supports_structured_output is False
567
+ assert provider.supports_data_gen is False
568
+ assert provider.untested_model is True
569
+
570
+
571
+ def test_openai_compatible_provider_model_no_api_key(mock_shared_config):
572
+ """Test provider creation without API key (should work as some providers don't require it)"""
573
+ model_id = "no_key_provider::gpt-4"
574
+
575
+ provider = openai_compatible_provider_model(model_id)
576
+
577
+ assert provider.name == ModelProviderName.openai_compatible
578
+ assert provider.provider_options == {
579
+ "model": "gpt-4",
580
+ "api_key": None,
581
+ "openai_api_base": "https://api.nokey.com",
582
+ }
583
+
584
+
585
+ def test_openai_compatible_provider_model_invalid_id():
586
+ """Test handling of invalid model ID format"""
587
+ with pytest.raises(ValueError) as exc_info:
588
+ openai_compatible_provider_model("invalid-id-format")
589
+ assert (
590
+ str(exc_info.value) == "Invalid openai compatible model ID: invalid-id-format"
591
+ )
592
+
593
+
594
+ def test_openai_compatible_provider_model_no_providers(mock_shared_config):
595
+ """Test handling when no providers are configured"""
596
+ mock_shared_config.return_value.openai_compatible_providers = None
597
+
598
+ with pytest.raises(ValueError) as exc_info:
599
+ openai_compatible_provider_model("test_provider::gpt-4")
600
+ assert str(exc_info.value) == "OpenAI compatible provider test_provider not found"
601
+
602
+
603
+ def test_openai_compatible_provider_model_provider_not_found(mock_shared_config):
604
+ """Test handling of non-existent provider"""
605
+ with pytest.raises(ValueError) as exc_info:
606
+ openai_compatible_provider_model("unknown_provider::gpt-4")
607
+ assert (
608
+ str(exc_info.value) == "OpenAI compatible provider unknown_provider not found"
609
+ )
610
+
611
+
612
+ def test_openai_compatible_provider_model_no_base_url(mock_shared_config):
613
+ """Test handling of provider without base URL"""
614
+ mock_shared_config.return_value.openai_compatible_providers = [
615
+ {
616
+ "name": "test_provider",
617
+ "api_key": "test-key",
618
+ }
619
+ ]
620
+
621
+ with pytest.raises(ValueError) as exc_info:
622
+ openai_compatible_provider_model("test_provider::gpt-4")
623
+ assert (
624
+ str(exc_info.value)
625
+ == "OpenAI compatible provider test_provider has no base URL"
626
+ )
@@ -49,9 +49,15 @@ __all__ = [
49
49
  "DataSource",
50
50
  "DataSourceType",
51
51
  "DataSourceProperty",
52
+ "Finetune",
53
+ "FineTuneStatusType",
52
54
  "TaskOutputRatingType",
53
55
  "TaskRequirement",
54
56
  "TaskDeterminism",
57
+ "DatasetSplitDefinition",
58
+ "DatasetSplit",
59
+ "RequirementRating",
60
+ "TaskRequirement",
55
61
  "strict_mode",
56
62
  "set_strict_mode",
57
63
  ]
@@ -85,30 +91,71 @@ class TaskOutputRatingType(str, Enum):
85
91
  """Defines the types of rating systems available for task outputs."""
86
92
 
87
93
  five_star = "five_star"
94
+ pass_fail = "pass_fail"
95
+ pass_fail_critical = "pass_fail_critical"
88
96
  custom = "custom"
89
97
 
90
98
 
99
+ class RequirementRating(BaseModel):
100
+ """Rating for a specific requirement within a task output."""
101
+
102
+ value: float = Field(
103
+ description="The rating value. Interpretation depends on rating type"
104
+ )
105
+ type: TaskOutputRatingType = Field(description="The type of rating")
106
+
107
+
91
108
  class TaskOutputRating(KilnBaseModel):
92
109
  """
93
110
  A rating for a task output, including an overall rating and ratings for each requirement.
94
111
 
95
- Only supports five star ratings for now, but extensible for custom values.
112
+ Supports:
113
+ - five_star: 1-5 star ratings
114
+ - pass_fail: boolean pass/fail (1.0 = pass, 0.0 = fail)
115
+ - pass_fail_critical: tri-state (1.0 = pass, 0.0 = fail, -1.0 = critical fail)
96
116
  """
97
117
 
98
118
  type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
99
119
  value: float | None = Field(
100
- description="The overall rating value (typically 1-5 stars).",
120
+ description="The rating value. Interpretation depends on rating type:\n- five_star: 1-5 stars\n- pass_fail: 1.0 (pass) or 0.0 (fail)\n- pass_fail_critical: 1.0 (pass), 0.0 (fail), or -1.0 (critical fail)",
101
121
  default=None,
102
122
  )
103
- requirement_ratings: Dict[ID_TYPE, float] = Field(
123
+ requirement_ratings: Dict[ID_TYPE, RequirementRating] = Field(
104
124
  default={},
105
- description="The ratings of the requirements of the task. The keys are the ids of the requirements. The values are the ratings (typically 1-5 stars).",
125
+ description="The ratings of the requirements of the task.",
106
126
  )
107
127
 
128
+ # Previously we stored rating values as a dict of floats, but now we store them as RequirementRating objects.
129
+ @model_validator(mode="before")
130
+ def upgrade_old_format(cls, data: dict) -> dict:
131
+ if not isinstance(data, dict):
132
+ return data
133
+
134
+ # Check if we have the old format (dict of floats)
135
+ req_ratings = data.get("requirement_ratings", {})
136
+ if req_ratings and all(
137
+ isinstance(v, (int, float)) for v in req_ratings.values()
138
+ ):
139
+ # Convert each float to a RequirementRating object
140
+ # all ratings are five star at the point we used this format
141
+ data["requirement_ratings"] = {
142
+ k: {"value": v, "type": TaskOutputRatingType.five_star}
143
+ for k, v in req_ratings.items()
144
+ }
145
+
146
+ return data
147
+
108
148
  # Used to select high quality outputs for example selection (MultiShotPromptBuilder, etc)
109
149
  def is_high_quality(self) -> bool:
150
+ if self.value is None:
151
+ return False
152
+
110
153
  if self.type == TaskOutputRatingType.five_star:
111
- return self.value is not None and self.value >= 4
154
+ return self.value >= 4
155
+ elif self.type == TaskOutputRatingType.pass_fail:
156
+ return self.value == 1.0
157
+ elif self.type == TaskOutputRatingType.pass_fail_critical:
158
+ return self.value == 1.0
112
159
  return False
113
160
 
114
161
  @model_validator(mode="after")
@@ -116,24 +163,61 @@ class TaskOutputRating(KilnBaseModel):
116
163
  if self.type not in TaskOutputRatingType:
117
164
  raise ValueError(f"Invalid rating type: {self.type}")
118
165
 
119
- if self.type == TaskOutputRatingType.five_star:
120
- if self.value is not None:
121
- self._validate_five_star(self.value, "overall rating")
122
- for req_id, req_rating in self.requirement_ratings.items():
123
- self._validate_five_star(req_rating, f"requirement rating for {req_id}")
166
+ # Overall rating is optional
167
+ if self.value is not None:
168
+ self._validate_rating(self.type, self.value, "overall rating")
169
+
170
+ for req_id, req_rating in self.requirement_ratings.items():
171
+ self._validate_rating(
172
+ req_rating.type,
173
+ req_rating.value,
174
+ f"requirement rating for req ID: {req_id}",
175
+ )
124
176
 
125
177
  return self
126
178
 
127
- def _validate_five_star(self, rating: float, rating_name: str) -> None:
128
- if not isinstance(rating, float) or not rating.is_integer():
179
+ def _validate_rating(
180
+ self, type: TaskOutputRatingType, rating: float | None, rating_name: str
181
+ ) -> None:
182
+ if type == TaskOutputRatingType.five_star:
183
+ self._validate_five_star(rating, rating_name)
184
+ elif type == TaskOutputRatingType.pass_fail:
185
+ self._validate_pass_fail(rating, rating_name)
186
+ elif type == TaskOutputRatingType.pass_fail_critical:
187
+ self._validate_pass_fail_critical(rating, rating_name)
188
+
189
+ def _validate_five_star(self, rating: float | None, rating_name: str) -> None:
190
+ if rating is None or not isinstance(rating, float) or not rating.is_integer():
129
191
  raise ValueError(
130
- f"{rating_name.capitalize()} of type five_star must be an integer value (1.0, 2.0, 3.0, 4.0, or 5.0)"
192
+ f"{rating_name.capitalize()} of type five_star must be an integer value (1-5)"
131
193
  )
132
194
  if rating < 1 or rating > 5:
133
195
  raise ValueError(
134
196
  f"{rating_name.capitalize()} of type five_star must be between 1 and 5 stars"
135
197
  )
136
198
 
199
+ def _validate_pass_fail(self, rating: float | None, rating_name: str) -> None:
200
+ if rating is None or not isinstance(rating, float) or not rating.is_integer():
201
+ raise ValueError(
202
+ f"{rating_name.capitalize()} of type pass_fail must be an integer value (0 or 1)"
203
+ )
204
+ if rating not in [0, 1]:
205
+ raise ValueError(
206
+ f"{rating_name.capitalize()} of type pass_fail must be 0 (fail) or 1 (pass)"
207
+ )
208
+
209
+ def _validate_pass_fail_critical(
210
+ self, rating: float | None, rating_name: str
211
+ ) -> None:
212
+ if rating is None or not isinstance(rating, float) or not rating.is_integer():
213
+ raise ValueError(
214
+ f"{rating_name.capitalize()} of type pass_fail_critical must be an integer value (-1, 0, or 1)"
215
+ )
216
+ if rating not in [-1, 0, 1]:
217
+ raise ValueError(
218
+ f"{rating_name.capitalize()} of type pass_fail_critical must be -1 (critical fail), 0 (fail), or 1 (pass)"
219
+ )
220
+
137
221
 
138
222
  class TaskOutput(KilnBaseModel):
139
223
  """
@@ -381,6 +465,10 @@ class TaskRun(KilnParentedModel):
381
465
  default=None,
382
466
  description="Intermediate outputs from the task run. Keys are the names of the intermediate output steps (cot=chain of thought, etc), values are the output data.",
383
467
  )
468
+ tags: List[str] = Field(
469
+ default=[],
470
+ description="Tags for the task run. Tags are used to categorize task runs for filtering and reporting.",
471
+ )
384
472
 
385
473
  def parent_task(self) -> Task | None:
386
474
  if not isinstance(self.parent, Task):
@@ -442,6 +530,16 @@ class TaskRun(KilnParentedModel):
442
530
  raise ValueError("input_source is required when strict mode is enabled")
443
531
  return self
444
532
 
533
+ @model_validator(mode="after")
534
+ def validate_tags(self) -> Self:
535
+ for tag in self.tags:
536
+ if not tag:
537
+ raise ValueError("Tags cannot be empty strings")
538
+ if " " in tag:
539
+ raise ValueError("Tags cannot contain spaces. Try underscores.")
540
+
541
+ return self
542
+
445
543
 
446
544
  # Define the type alias for clarity
447
545
  DatasetFilter = Callable[[TaskRun], bool]
@@ -602,7 +700,7 @@ class TaskRequirement(BaseModel):
602
700
  Defines a specific requirement that should be met by task outputs.
603
701
 
604
702
  Includes an identifier, name, description, instruction for meeting the requirement,
605
- and priority level.
703
+ priority level, and rating type (five_star, pass_fail, pass_fail_critical, custom).
606
704
  """
607
705
 
608
706
  id: ID_TYPE = ID_FIELD
@@ -610,6 +708,7 @@ class TaskRequirement(BaseModel):
610
708
  description: str | None = Field(default=None)
611
709
  instruction: str = Field(min_length=1)
612
710
  priority: Priority = Field(default=Priority.p2)
711
+ type: TaskOutputRatingType = Field(default=TaskOutputRatingType.five_star)
613
712
 
614
713
 
615
714
  class TaskDeterminism(str, Enum):
@@ -142,14 +142,8 @@ class KilnBaseModel(BaseModel):
142
142
  # modified time of file for cache invalidation. From file descriptor so it's atomic w read.
143
143
  mtime_ns = os.fstat(file.fileno()).st_mtime_ns
144
144
  file_data = file.read()
145
- # TODO P2 perf: parsing the JSON twice here.
146
- # Once for model_type, once for model. Can't call model_validate with parsed json because enum types break; they get strings instead of enums.
147
145
  parsed_json = json.loads(file_data)
148
- m = cls.model_validate_json(
149
- file_data,
150
- strict=True,
151
- context={"loading_from_file": True},
152
- )
146
+ m = cls.model_validate(parsed_json, context={"loading_from_file": True})
153
147
  if not isinstance(m, cls):
154
148
  raise ValueError(f"Loaded model is not of type {cls.__name__}")
155
149
  m._loaded_from_file = True
@@ -471,7 +465,7 @@ class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
471
465
  validation_errors = []
472
466
 
473
467
  try:
474
- instance = cls.model_validate(data, strict=True)
468
+ instance = cls.model_validate(data)
475
469
  if path is not None:
476
470
  instance.path = path
477
471
  if parent is not None and isinstance(instance, KilnParentedModel):
@@ -499,7 +493,7 @@ class KilnParentModel(KilnBaseModel, metaclass=ABCMeta):
499
493
  parent_type._validate_nested(**kwargs)
500
494
  elif issubclass(parent_type, KilnParentedModel):
501
495
  # Root node
502
- subinstance = parent_type.model_validate(value, strict=True)
496
+ subinstance = parent_type.model_validate(value)
503
497
  if instance is not None:
504
498
  subinstance.parent = instance
505
499
  if save:
@@ -84,7 +84,7 @@ def task_run():
84
84
  type=DataSourceType.human,
85
85
  properties={"created_by": "test-user"},
86
86
  ),
87
- rating=TaskOutputRating(rating=5, type=TaskOutputRatingType.five_star),
87
+ rating=TaskOutputRating(value=5, type=TaskOutputRatingType.five_star),
88
88
  ),
89
89
  )
90
90
 
@@ -439,3 +439,52 @@ def test_task_output_source_validation(tmp_path):
439
439
  assert os.path.exists(task_missing_output_source)
440
440
  task_run = TaskRun.load_from_file(task_missing_output_source)
441
441
  assert task_run.output.source is None
442
+
443
+
444
+ def test_task_run_tags_validation():
445
+ # Setup basic output for TaskRun creation
446
+ output = TaskOutput(
447
+ output="test output",
448
+ source=DataSource(
449
+ type=DataSourceType.synthetic,
450
+ properties={
451
+ "model_name": "test-model",
452
+ "model_provider": "test-provider",
453
+ "adapter_name": "test-adapter",
454
+ },
455
+ ),
456
+ )
457
+
458
+ # Test 1: Valid tags should work
459
+ task_run = TaskRun(
460
+ input="test input",
461
+ output=output,
462
+ tags=["test_tag", "another_tag", "tag123"],
463
+ )
464
+ assert task_run.tags == ["test_tag", "another_tag", "tag123"]
465
+
466
+ # Test 2: Empty list of tags should work
467
+ task_run = TaskRun(
468
+ input="test input",
469
+ output=output,
470
+ tags=[],
471
+ )
472
+ assert task_run.tags == []
473
+
474
+ # Test 3: Empty string tag should fail
475
+ with pytest.raises(ValueError) as exc_info:
476
+ TaskRun(
477
+ input="test input",
478
+ output=output,
479
+ tags=["valid_tag", ""],
480
+ )
481
+ assert "Tags cannot be empty strings" in str(exc_info.value)
482
+
483
+ # Test 4: Tag with spaces should fail
484
+ with pytest.raises(ValueError) as exc_info:
485
+ TaskRun(
486
+ input="test input",
487
+ output=output,
488
+ tags=["valid_tag", "invalid tag"],
489
+ )
490
+ assert "Tags cannot contain spaces. Try underscores." in str(exc_info.value)
@@ -1,14 +1,34 @@
1
+ import json
2
+
1
3
  import pytest
2
4
  from pydantic import ValidationError
3
5
 
4
- from kiln_ai.datamodel import TaskOutputRating, TaskOutputRatingType
6
+ from kiln_ai.datamodel import RequirementRating, TaskOutputRating, TaskOutputRatingType
5
7
 
6
8
 
7
9
  def test_valid_task_output_rating():
8
10
  rating = TaskOutputRating(value=4.0, requirement_ratings={"req1": 5.0, "req2": 3.0})
9
11
  assert rating.type == TaskOutputRatingType.five_star
10
12
  assert rating.value == 4.0
11
- assert rating.requirement_ratings == {"req1": 5.0, "req2": 3.0}
13
+ dumped = json.loads(rating.model_dump_json())
14
+ assert dumped["requirement_ratings"] == {
15
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
16
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
17
+ }
18
+
19
+ # new format
20
+ rating = TaskOutputRating(
21
+ value=4.0,
22
+ requirement_ratings={
23
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
24
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
25
+ },
26
+ )
27
+ dumped = json.loads(rating.model_dump_json())
28
+ assert dumped["requirement_ratings"] == {
29
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
30
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
31
+ }
12
32
 
13
33
 
14
34
  def test_invalid_rating_type():
@@ -40,34 +60,92 @@ def test_rating_below_range():
40
60
  TaskOutputRating(value=0.0)
41
61
 
42
62
 
43
- def test_valid_requirement_ratings():
44
- rating = TaskOutputRating(
45
- value=4.0, requirement_ratings={"req1": 5.0, "req2": 3.0, "req3": 1.0}
63
+ def test_valid_requirement_ratings_old_format():
64
+ rating = TaskOutputRating.model_validate(
65
+ {"value": 4.0, "requirement_ratings": {"req1": 5.0, "req2": 3.0, "req3": 1.0}}
66
+ )
67
+ dumped = json.loads(rating.model_dump_json())
68
+ assert dumped["requirement_ratings"] == {
69
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
70
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
71
+ "req3": {"type": TaskOutputRatingType.five_star, "value": 1.0},
72
+ }
73
+
74
+
75
+ def test_valid_requirement_ratings_new_format():
76
+ rating = TaskOutputRating.model_validate(
77
+ {
78
+ "value": 4.0,
79
+ "requirement_ratings": {
80
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
81
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
82
+ "req3": {"type": TaskOutputRatingType.five_star, "value": 1.0},
83
+ },
84
+ }
46
85
  )
47
- assert rating.requirement_ratings == {"req1": 5.0, "req2": 3.0, "req3": 1.0}
86
+ dumped = json.loads(rating.model_dump_json())
87
+ assert dumped["requirement_ratings"] == {
88
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 5.0},
89
+ "req2": {"type": TaskOutputRatingType.five_star, "value": 3.0},
90
+ "req3": {"type": TaskOutputRatingType.five_star, "value": 1.0},
91
+ }
48
92
 
49
93
 
50
94
  def test_invalid_requirement_rating_value():
51
95
  with pytest.raises(
52
96
  ValidationError,
53
- match="Requirement rating for req1 of type five_star must be an integer value",
97
+ match="Requirement rating for req id: req1 of type five_star must be an integer value",
54
98
  ):
55
99
  TaskOutputRating(value=4.0, requirement_ratings={"req1": 3.5})
56
100
 
101
+ # new format
102
+ with pytest.raises(
103
+ ValidationError,
104
+ match="Requirement rating for req id: req1 of type five_star must be an integer value",
105
+ ):
106
+ TaskOutputRating(
107
+ value=4.0,
108
+ requirement_ratings={
109
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 3.5}
110
+ },
111
+ )
112
+
57
113
 
58
114
  def test_requirement_rating_out_of_range():
59
115
  with pytest.raises(
60
116
  ValidationError,
61
- match="Requirement rating for req1 of type five_star must be between 1 and 5 stars",
117
+ match="Requirement rating for req id: req1 of type five_star must be between 1 and 5 stars",
62
118
  ):
63
119
  TaskOutputRating(value=4.0, requirement_ratings={"req1": 6.0})
64
120
 
121
+ # new format
122
+ with pytest.raises(
123
+ ValidationError,
124
+ match="Requirement rating for req id: req1 of type five_star must be between 1 and 5 stars",
125
+ ):
126
+ TaskOutputRating(
127
+ value=4.0,
128
+ requirement_ratings={
129
+ "req1": {"type": TaskOutputRatingType.five_star, "value": 6.0}
130
+ },
131
+ )
132
+
65
133
 
66
134
  def test_empty_requirement_ratings():
67
135
  rating = TaskOutputRating(value=4.0)
68
136
  assert rating.requirement_ratings == {}
69
137
 
70
138
 
139
+ def test_empty_requirement_ratings_integer():
140
+ rating = TaskOutputRating(
141
+ value=4,
142
+ requirement_ratings={
143
+ "req1": RequirementRating(type=TaskOutputRatingType.five_star, value=5),
144
+ },
145
+ )
146
+ assert rating.requirement_ratings["req1"].value == 5.0
147
+
148
+
71
149
  def test_invalid_id_type():
72
150
  with pytest.raises(ValidationError):
73
151
  TaskOutputRating(
@@ -77,13 +155,302 @@ def test_invalid_id_type():
77
155
  },
78
156
  )
79
157
 
158
+ # new format
159
+ with pytest.raises(ValidationError):
160
+ TaskOutputRating(
161
+ value=4.0,
162
+ requirement_ratings={
163
+ 123: {"type": TaskOutputRatingType.five_star, "value": 4.0}
164
+ },
165
+ )
166
+
80
167
 
81
168
  def test_valid_custom_rating():
82
169
  rating = TaskOutputRating(
83
170
  type=TaskOutputRatingType.custom,
84
171
  value=31.459,
85
- requirement_ratings={"req1": 42.0, "req2": 3.14},
172
+ requirement_ratings={
173
+ "req1": {"type": TaskOutputRatingType.custom, "value": 42.0},
174
+ "req2": {"type": TaskOutputRatingType.custom, "value": 3.14},
175
+ },
86
176
  )
87
177
  assert rating.type == TaskOutputRatingType.custom
88
178
  assert rating.value == 31.459
89
- assert rating.requirement_ratings == {"req1": 42.0, "req2": 3.14}
179
+ dumped = json.loads(rating.model_dump_json())
180
+ assert dumped["requirement_ratings"] == {
181
+ "req1": {"type": TaskOutputRatingType.custom, "value": 42.0},
182
+ "req2": {"type": TaskOutputRatingType.custom, "value": 3.14},
183
+ }
184
+
185
+
186
+ # We upgraded the format of requirement_ratings to be a dict of RequirementRating objects from a dict of floats
187
+ def test_task_output_rating_format_upgrade():
188
+ # Test old format (dict of floats)
189
+ old_format = {
190
+ "type": "five_star",
191
+ "value": 4.0,
192
+ "requirement_ratings": {"req1": 5.0, "req2": 3.0},
193
+ }
194
+
195
+ rating = TaskOutputRating.model_validate(old_format)
196
+
197
+ # Verify the upgrade worked
198
+ assert isinstance(rating.requirement_ratings["req1"], RequirementRating)
199
+ assert rating.requirement_ratings["req1"].value == 5.0
200
+ assert rating.requirement_ratings["req1"].type == TaskOutputRatingType.five_star
201
+ assert rating.requirement_ratings["req2"].value == 3.0
202
+ assert rating.requirement_ratings["req2"].type == TaskOutputRatingType.five_star
203
+
204
+ # Verify the json dump is new format
205
+ json_dump = json.loads(rating.model_dump_json())
206
+ assert json_dump["requirement_ratings"]["req1"]["type"] == "five_star"
207
+ assert json_dump["requirement_ratings"]["req1"]["value"] == 5.0
208
+ assert json_dump["requirement_ratings"]["req2"]["type"] == "five_star"
209
+ assert json_dump["requirement_ratings"]["req2"]["value"] == 3.0
210
+
211
+ # Test new format (dict of RequirementRating)
212
+ new_format = {
213
+ "type": "five_star",
214
+ "value": 4.0,
215
+ "requirement_ratings": {
216
+ "req1": {"value": 5.0, "type": "five_star"},
217
+ "req2": {"value": 3.0, "type": "five_star"},
218
+ },
219
+ }
220
+
221
+ rating = TaskOutputRating.model_validate(new_format)
222
+
223
+ # Verify new format works as expected
224
+ assert isinstance(rating.requirement_ratings["req1"], RequirementRating)
225
+ assert rating.requirement_ratings["req1"].value == 5.0
226
+ assert rating.requirement_ratings["req1"].type == TaskOutputRatingType.five_star
227
+
228
+ # Verify the json dump is new format
229
+ json_dump = json.loads(rating.model_dump_json())
230
+ assert json_dump["requirement_ratings"]["req1"]["type"] == "five_star"
231
+ assert json_dump["requirement_ratings"]["req1"]["value"] == 5.0
232
+ assert json_dump["requirement_ratings"]["req2"]["type"] == "five_star"
233
+ assert json_dump["requirement_ratings"]["req2"]["value"] == 3.0
234
+
235
+ # Test mixed format (should fail)
236
+ mixed_format = {
237
+ "type": "five_star",
238
+ "value": 4.0,
239
+ "requirement_ratings": {
240
+ "req1": 5.0,
241
+ "req2": {"value": 3.0, "type": "five_star"},
242
+ },
243
+ }
244
+
245
+ with pytest.raises(ValidationError):
246
+ TaskOutputRating.model_validate(mixed_format)
247
+
248
+ # Test empty requirement_ratings
249
+ empty_format = {"type": "five_star", "value": 4.0, "requirement_ratings": {}}
250
+
251
+ rating = TaskOutputRating.model_validate(empty_format)
252
+ assert rating.requirement_ratings == {}
253
+
254
+
255
+ def test_valid_pass_fail_rating():
256
+ rating = TaskOutputRating(
257
+ type=TaskOutputRatingType.pass_fail,
258
+ value=1.0,
259
+ requirement_ratings={
260
+ "req1": {"type": TaskOutputRatingType.pass_fail, "value": 1.0},
261
+ "req2": {"type": TaskOutputRatingType.pass_fail, "value": 0.0},
262
+ },
263
+ )
264
+ assert rating.type == TaskOutputRatingType.pass_fail
265
+ assert rating.value == 1.0
266
+ dumped = json.loads(rating.model_dump_json())
267
+ assert dumped["requirement_ratings"] == {
268
+ "req1": {"type": TaskOutputRatingType.pass_fail, "value": 1.0},
269
+ "req2": {"type": TaskOutputRatingType.pass_fail, "value": 0.0},
270
+ }
271
+
272
+
273
+ def test_invalid_pass_fail_rating_value():
274
+ with pytest.raises(
275
+ ValidationError,
276
+ match="Overall rating of type pass_fail must be an integer value",
277
+ ):
278
+ TaskOutputRating(type=TaskOutputRatingType.pass_fail, value=0.5)
279
+
280
+ with pytest.raises(
281
+ ValidationError,
282
+ match="Requirement rating for req id: req1 of type pass_fail must be an integer value",
283
+ ):
284
+ TaskOutputRating(
285
+ type=TaskOutputRatingType.pass_fail,
286
+ value=1.0,
287
+ requirement_ratings={
288
+ "req1": {"type": TaskOutputRatingType.pass_fail, "value": 0.5}
289
+ },
290
+ )
291
+
292
+
293
+ def test_pass_fail_rating_out_of_range():
294
+ with pytest.raises(
295
+ ValidationError,
296
+ match="Overall rating of type pass_fail must be 0 \\(fail\\) or 1 \\(pass\\)",
297
+ ):
298
+ TaskOutputRating(type=TaskOutputRatingType.pass_fail, value=2.0)
299
+
300
+ with pytest.raises(
301
+ ValidationError,
302
+ match="Requirement rating for req id: req1 of type pass_fail must be 0 \\(fail\\) or 1 \\(pass\\)",
303
+ ):
304
+ TaskOutputRating(
305
+ type=TaskOutputRatingType.pass_fail,
306
+ value=1.0,
307
+ requirement_ratings={
308
+ "req1": {"type": TaskOutputRatingType.pass_fail, "value": 2.0}
309
+ },
310
+ )
311
+
312
+
313
+ def test_valid_pass_fail_critical_rating():
314
+ rating = TaskOutputRating(
315
+ type=TaskOutputRatingType.pass_fail_critical,
316
+ value=1.0,
317
+ requirement_ratings={
318
+ "req1": {"type": TaskOutputRatingType.pass_fail_critical, "value": 1.0},
319
+ "req2": {"type": TaskOutputRatingType.pass_fail_critical, "value": 0.0},
320
+ "req3": {"type": TaskOutputRatingType.pass_fail_critical, "value": -1.0},
321
+ },
322
+ )
323
+ assert rating.type == TaskOutputRatingType.pass_fail_critical
324
+ assert rating.value == 1.0
325
+ dumped = json.loads(rating.model_dump_json())
326
+ assert dumped["requirement_ratings"] == {
327
+ "req1": {"type": TaskOutputRatingType.pass_fail_critical, "value": 1.0},
328
+ "req2": {"type": TaskOutputRatingType.pass_fail_critical, "value": 0.0},
329
+ "req3": {"type": TaskOutputRatingType.pass_fail_critical, "value": -1.0},
330
+ }
331
+
332
+
333
+ def test_invalid_pass_fail_critical_rating_value():
334
+ with pytest.raises(
335
+ ValidationError,
336
+ match="Overall rating of type pass_fail_critical must be an integer value",
337
+ ):
338
+ TaskOutputRating(type=TaskOutputRatingType.pass_fail_critical, value=0.5)
339
+
340
+ with pytest.raises(
341
+ ValidationError,
342
+ match="Requirement rating for req id: req1 of type pass_fail_critical must be an integer value",
343
+ ):
344
+ TaskOutputRating(
345
+ type=TaskOutputRatingType.pass_fail_critical,
346
+ value=1.0,
347
+ requirement_ratings={
348
+ "req1": {"type": TaskOutputRatingType.pass_fail_critical, "value": 0.5}
349
+ },
350
+ )
351
+
352
+
353
+ def test_pass_fail_critical_rating_out_of_range():
354
+ with pytest.raises(
355
+ ValidationError,
356
+ match="Overall rating of type pass_fail_critical must be -1 \\(critical fail\\), 0 \\(fail\\), or 1 \\(pass\\)",
357
+ ):
358
+ TaskOutputRating(type=TaskOutputRatingType.pass_fail_critical, value=2.0)
359
+
360
+ with pytest.raises(
361
+ ValidationError,
362
+ match="Requirement rating for req id: req1 of type pass_fail_critical must be -1 \\(critical fail\\), 0 \\(fail\\), or 1 \\(pass\\)",
363
+ ):
364
+ TaskOutputRating(
365
+ type=TaskOutputRatingType.pass_fail_critical,
366
+ value=1.0,
367
+ requirement_ratings={
368
+ "req1": {"type": TaskOutputRatingType.pass_fail_critical, "value": 2.0}
369
+ },
370
+ )
371
+
372
+
373
+ def test_is_high_quality():
374
+ # Test five_star ratings
375
+ assert (
376
+ TaskOutputRating(
377
+ type=TaskOutputRatingType.five_star, value=5.0
378
+ ).is_high_quality()
379
+ is True
380
+ )
381
+ assert (
382
+ TaskOutputRating(
383
+ type=TaskOutputRatingType.five_star, value=4.0
384
+ ).is_high_quality()
385
+ is True
386
+ )
387
+ assert (
388
+ TaskOutputRating(
389
+ type=TaskOutputRatingType.five_star, value=3.0
390
+ ).is_high_quality()
391
+ is False
392
+ )
393
+ assert (
394
+ TaskOutputRating(
395
+ type=TaskOutputRatingType.five_star, value=2.0
396
+ ).is_high_quality()
397
+ is False
398
+ )
399
+ assert (
400
+ TaskOutputRating(
401
+ type=TaskOutputRatingType.five_star, value=1.0
402
+ ).is_high_quality()
403
+ is False
404
+ )
405
+
406
+ # Test pass_fail ratings
407
+ assert (
408
+ TaskOutputRating(
409
+ type=TaskOutputRatingType.pass_fail, value=1.0
410
+ ).is_high_quality()
411
+ is True
412
+ )
413
+ assert (
414
+ TaskOutputRating(
415
+ type=TaskOutputRatingType.pass_fail, value=0.0
416
+ ).is_high_quality()
417
+ is False
418
+ )
419
+
420
+ # Test pass_fail_critical ratings
421
+ assert (
422
+ TaskOutputRating(
423
+ type=TaskOutputRatingType.pass_fail_critical, value=1.0
424
+ ).is_high_quality()
425
+ is True
426
+ )
427
+ assert (
428
+ TaskOutputRating(
429
+ type=TaskOutputRatingType.pass_fail_critical, value=0.0
430
+ ).is_high_quality()
431
+ is False
432
+ )
433
+ assert (
434
+ TaskOutputRating(
435
+ type=TaskOutputRatingType.pass_fail_critical, value=-1.0
436
+ ).is_high_quality()
437
+ is False
438
+ )
439
+
440
+ # Test custom ratings (should always return False)
441
+ assert (
442
+ TaskOutputRating(
443
+ type=TaskOutputRatingType.custom, value=100.0
444
+ ).is_high_quality()
445
+ is False
446
+ )
447
+ assert (
448
+ TaskOutputRating(type=TaskOutputRatingType.custom, value=0.0).is_high_quality()
449
+ is False
450
+ )
451
+
452
+ # Test None value
453
+ assert (
454
+ TaskOutputRating(type=TaskOutputRatingType.custom, value=None).is_high_quality()
455
+ is False
456
+ )
kiln_ai/utils/config.py CHANGED
@@ -2,7 +2,7 @@ import getpass
2
2
  import os
3
3
  import threading
4
4
  from pathlib import Path
5
- from typing import Any, Callable, Dict, Optional
5
+ from typing import Any, Callable, Dict, List, Optional
6
6
 
7
7
  import yaml
8
8
 
@@ -15,12 +15,14 @@ class ConfigProperty:
15
15
  env_var: Optional[str] = None,
16
16
  default_lambda: Optional[Callable[[], Any]] = None,
17
17
  sensitive: bool = False,
18
+ sensitive_keys: Optional[List[str]] = None,
18
19
  ):
19
20
  self.type = type_
20
21
  self.default = default
21
22
  self.env_var = env_var
22
23
  self.default_lambda = default_lambda
23
24
  self.sensitive = sensitive
25
+ self.sensitive_keys = sensitive_keys
24
26
 
25
27
 
26
28
  class Config:
@@ -84,6 +86,11 @@ class Config:
84
86
  list,
85
87
  default_lambda=lambda: [],
86
88
  ),
89
+ "openai_compatible_providers": ConfigProperty(
90
+ list,
91
+ default_lambda=lambda: [],
92
+ sensitive_keys=["api_key"],
93
+ ),
87
94
  }
88
95
  self._settings = self.load_settings()
89
96
 
@@ -150,14 +157,26 @@ class Config:
150
157
  return settings
151
158
 
152
159
  def settings(self, hide_sensitive=False) -> Dict[str, Any]:
153
- if hide_sensitive:
154
- return {
155
- k: "[hidden]"
156
- if k in self._properties and self._properties[k].sensitive
157
- else v
158
- for k, v in self._settings.items()
159
- }
160
- return self._settings
160
+ if not hide_sensitive:
161
+ return self._settings
162
+
163
+ settings = {
164
+ k: "[hidden]"
165
+ if k in self._properties and self._properties[k].sensitive
166
+ else v
167
+ for k, v in self._settings.items()
168
+ }
169
+ # Hide sensitive keys in lists. Could generalize this if we every have more types, but right not it's only needed for root elements of lists
170
+ for key, value in settings.items():
171
+ if key in self._properties and self._properties[key].sensitive_keys:
172
+ sensitive_keys = self._properties[key].sensitive_keys or []
173
+ for sensitive_key in sensitive_keys:
174
+ if isinstance(value, list):
175
+ for item in value:
176
+ if sensitive_key in item:
177
+ item[sensitive_key] = "[hidden]"
178
+
179
+ return settings
161
180
 
162
181
  def save_setting(self, name: str, value: Any):
163
182
  self.update_settings({name: value})
@@ -27,6 +27,7 @@ def config_with_yaml(mock_yaml_file):
27
27
  ),
28
28
  "int_property": ConfigProperty(int, default=0),
29
29
  "empty_property": ConfigProperty(str),
30
+ "list_of_objects": ConfigProperty(list, default=[]),
30
31
  }
31
32
  )
32
33
 
@@ -251,3 +252,50 @@ def test_stale_values_bug(config_with_yaml):
251
252
  # Simulate updating the settings file with set_settings
252
253
  config_with_yaml.update_settings({"example_property": "third_value"})
253
254
  assert config_with_yaml.example_property == "third_value"
255
+
256
+
257
+ async def test_openai_compatible_providers():
258
+ config = Config.shared()
259
+ assert config.openai_compatible_providers == []
260
+
261
+ new_settings = [
262
+ {
263
+ "name": "provider1",
264
+ "url": "https://provider1.com",
265
+ "api_key": "password1",
266
+ },
267
+ {
268
+ "name": "provider2",
269
+ "url": "https://provider2.com",
270
+ },
271
+ ]
272
+ config.save_setting("openai_compatible_providers", new_settings)
273
+ assert config.openai_compatible_providers == new_settings
274
+
275
+ # Test that sensitive keys are hidden
276
+ settings = config.settings(hide_sensitive=True)
277
+ assert settings["openai_compatible_providers"] == [
278
+ {"name": "provider1", "url": "https://provider1.com", "api_key": "[hidden]"},
279
+ {"name": "provider2", "url": "https://provider2.com"},
280
+ ]
281
+
282
+
283
+ def test_yaml_persistence_structured_data(config_with_yaml, mock_yaml_file):
284
+ # Set a value
285
+ new_settings = [
286
+ {
287
+ "name": "provider1",
288
+ "url": "https://provider1.com",
289
+ "api_key": "password1",
290
+ },
291
+ {
292
+ "name": "provider2",
293
+ "url": "https://provider2.com",
294
+ },
295
+ ]
296
+ config_with_yaml.list_of_objects = new_settings
297
+
298
+ # Check that the value was saved to the YAML file
299
+ with open(mock_yaml_file, "r") as f:
300
+ saved_settings = yaml.safe_load(f)
301
+ assert saved_settings["list_of_objects"] == new_settings
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kiln-ai
3
- Version: 0.7.1
3
+ Version: 0.8.1
4
4
  Summary: Kiln AI
5
5
  Project-URL: Homepage, https://getkiln.ai
6
6
  Project-URL: Repository, https://github.com/Kiln-AI/kiln
@@ -1,17 +1,17 @@
1
1
  kiln_ai/__init__.py,sha256=Sc4z8LRVFMwJUoc_DPVUriSXTZ6PO9MaJ80PhRbKyB8,34
2
2
  kiln_ai/adapters/__init__.py,sha256=8-YlnTh3gsaPeEArFVLIqGE7-tbssI42fub4OQBp_DA,970
3
- kiln_ai/adapters/adapter_registry.py,sha256=EnB0rUIZ0KbBd2nxkNjwUqOpldwqPDyJ9LzIQoDl2GU,634
4
- kiln_ai/adapters/base_adapter.py,sha256=E_RfXxzEhW-i066xOhZdPuTM7OPKQv70hDpfMsxfYEs,6145
5
- kiln_ai/adapters/langchain_adapters.py,sha256=NeTZ8WbQTnVu8rtFX6AwkdjFj2ihyhe_vxNxM-_v2yE,10584
6
- kiln_ai/adapters/ml_model_list.py,sha256=jEayUwDMYjMJTEL5fJ4jKacWR0OKkQ64q4bL10fnMsE,25062
3
+ kiln_ai/adapters/adapter_registry.py,sha256=zO-0_CWF3ZGA-1420_0Uwq976o3-7WXxEY_aTeu0PzQ,688
4
+ kiln_ai/adapters/base_adapter.py,sha256=POSdMrZFqd0IJnLpVoyc1w9CGhdNtePZyQPgdBBRUpQ,6276
5
+ kiln_ai/adapters/langchain_adapters.py,sha256=S9VZ9JLBDEue-vh00iNv4wM1rdBQRNnF0ubeOFLAdZc,10861
6
+ kiln_ai/adapters/ml_model_list.py,sha256=Fl8PUlecibRjcWkKFwfge4cFz7jusVMeK35ewaWw8ac,25446
7
7
  kiln_ai/adapters/ollama_tools.py,sha256=0Of6ySbJ2d4j--9laOL6QKgRUQSrqX8dJUIrz20n59s,3561
8
8
  kiln_ai/adapters/prompt_builders.py,sha256=Mdu-f1mC9hWIDwoF7Qwd9F99GDx6oNGvtEZN-SrOsNM,10325
9
- kiln_ai/adapters/provider_tools.py,sha256=Y694_oDm5wKs-WGtnI5FAh1H_wgs0EXUqEcr5rVA6SY,10768
9
+ kiln_ai/adapters/provider_tools.py,sha256=m7X93DFbnYnw5H2HDumFJKpTKmeau-GZLv-SUmssJZ0,12381
10
10
  kiln_ai/adapters/test_langchain_adapter.py,sha256=QiVdCUJJ_uEzD0uA0jYMC3ZO4NTGJLm9iWTwvQfdFxI,12037
11
11
  kiln_ai/adapters/test_ollama_tools.py,sha256=2KwYVaj3ySV3ld-z51TCGbJEMdb3MZj2eoEicIWz3Q4,2552
12
12
  kiln_ai/adapters/test_prompt_adaptors.py,sha256=Mc0oSYgDLxfP2u3GVR_iDWaYctTQ8Ug1u6UGvWA90lM,7494
13
13
  kiln_ai/adapters/test_prompt_builders.py,sha256=sU0bSBZa9Y4Q-mmkDf3HbQ0MNSWk5o9bC9sNgtnBokk,14598
14
- kiln_ai/adapters/test_provider_tools.py,sha256=b9CDC-Cul2WHoVmON1IOp6BI5jiJNqJCC0rnr1Fm8P8,17468
14
+ kiln_ai/adapters/test_provider_tools.py,sha256=S1PSXd5MJnPvBe7Hq4FijptB0lbmym2E6iztncAvuUg,20752
15
15
  kiln_ai/adapters/test_saving_adapter_results.py,sha256=SYYh2xY1zmeKhFHfWAuEY4pEiLd8SitSV5ewGOTmaOI,6447
16
16
  kiln_ai/adapters/test_structured_output.py,sha256=9Mgng-HOXiZ_WcJG5cpMWhtsdJt8Rn-7qIouBWvWVoU,9324
17
17
  kiln_ai/adapters/data_gen/__init__.py,sha256=QTZWaf7kq5BorhPvexJfwDEKmjRmIbhwW9ei8LW2SIs,276
@@ -31,28 +31,28 @@ kiln_ai/adapters/fine_tune/test_openai_finetune.py,sha256=EF-f0JbVaPiVXF0eBYbwTK
31
31
  kiln_ai/adapters/repair/__init__.py,sha256=dOO9MEpEhjiwzDVFg3MNfA2bKMPlax9iekDatpTkX8E,217
32
32
  kiln_ai/adapters/repair/repair_task.py,sha256=L7WTFEpfaGpWXHPQf7BTNL0wiDPbeBIVqn7qNV_SeZc,3354
33
33
  kiln_ai/adapters/repair/test_repair_task.py,sha256=JBcyqyQYWniiUo4FSle9kUEsnbTsl5JN1LTRN1SRnrE,7940
34
- kiln_ai/datamodel/__init__.py,sha256=qavy8MSzO2n9O5KMrMHXkmj2nG949_vK4Q_Y4oSiuqw,24390
35
- kiln_ai/datamodel/basemodel.py,sha256=H2e_wvhoqqSJLz96xj9uVG-nXp5bgiuPwxJXRI4qZuU,21301
34
+ kiln_ai/datamodel/__init__.py,sha256=zhiyzeEgaE3IYunmv6KEjX43Biby6uxBjBMMEMAjE1g,28337
35
+ kiln_ai/datamodel/basemodel.py,sha256=zWyoYgsA2tmP55jl9H18xQ0yl9vM98aTOFJTUnW5ulU,20984
36
36
  kiln_ai/datamodel/json_schema.py,sha256=l4BIq1ItLHgcSHqsqDOchegLLHY48U4yR0SP2aMb4i0,2449
37
37
  kiln_ai/datamodel/model_cache.py,sha256=d8VjPp0p5BhrGSkx9soKyxO6VWW-bcesNSJI21ySvmA,4369
38
38
  kiln_ai/datamodel/registry.py,sha256=XwGFXJFKZtOpR1Z9ven6SftggfADdZRm8TFxCEVtfUQ,957
39
39
  kiln_ai/datamodel/test_basemodel.py,sha256=r40jWaW1073ZdIhHe-GHFE8jJDD9ocauItInOsK8pWU,15234
40
- kiln_ai/datamodel/test_dataset_split.py,sha256=aBjHVyTdt4mWXEKBkvvchEEZSj8jUwhXRZ37LbBxTi4,7265
40
+ kiln_ai/datamodel/test_dataset_split.py,sha256=Ug-vbga-opGN_LF51Mszx5NN4wXbx3MIP1LiNzIn5Nw,7264
41
41
  kiln_ai/datamodel/test_datasource.py,sha256=GAiZz31qezVVPwFqnt8wHMu15WvtlV89jw8C1Ue6YNI,3165
42
42
  kiln_ai/datamodel/test_example_models.py,sha256=9Jhc0bvbM4hCjJGiQNgWH5rwyIsGuneAD8h4o1P3zAY,20356
43
43
  kiln_ai/datamodel/test_json_schema.py,sha256=vdLnTQxxrcmuSrf6iOmkrmpfh7JnxqIw4B4dbDAAcZ4,3199
44
44
  kiln_ai/datamodel/test_model_cache.py,sha256=9HvK2etVZJyepdlRz5ja7u1CnyzhsV4_BupJF77yBxE,7285
45
- kiln_ai/datamodel/test_models.py,sha256=-ygQe8XeiZcZJxaffgK5KudRzHXs52ZDEDUSoz8B7eI,13665
45
+ kiln_ai/datamodel/test_models.py,sha256=t2Uthl559QioTyFAbQUk4BD3PqAywl3u1RSh4tHiMP0,15071
46
46
  kiln_ai/datamodel/test_nested_save.py,sha256=xciCddqvPyKyoyjC5Lx_3Kh1t4LJv1xYRAPazR3SRcs,5588
47
- kiln_ai/datamodel/test_output_rating.py,sha256=iw7fVUAPORA-0-VFiikZV3NDycGFaFMHSX1a38t_aQA,2647
47
+ kiln_ai/datamodel/test_output_rating.py,sha256=zvPIp2shAgCs2RQBgwYoL09fRA3krHvgAqUa91RlWR0,15125
48
48
  kiln_ai/datamodel/test_registry.py,sha256=PhS4anLi5Bf_023obuTlO5DALhtPB8WIc_bX12Yg6Po,2705
49
49
  kiln_ai/utils/__init__.py,sha256=PTD0MwBCKAMIOGsTAwsFaJOusTJJoRFTfOGqRvCaU-E,142
50
- kiln_ai/utils/config.py,sha256=BZpARYTcK0vNGo_h-0Fjp9aP-1xZYAGEuYS0HdBWWHA,5946
50
+ kiln_ai/utils/config.py,sha256=u289b2AHuQoPup_vILTSpgsO29fxJyU8zy8BwADAtvs,6859
51
51
  kiln_ai/utils/formatting.py,sha256=VtB9oag0lOGv17dwT7OPX_3HzBfaU9GsLH-iLete0yM,97
52
52
  kiln_ai/utils/name_generator.py,sha256=v26TgpCwQbhQFcZvzgjZvURinjrOyyFhxpsI6NQrHKc,1914
53
- kiln_ai/utils/test_config.py,sha256=pTYItz5WD15rTRdxKE7vszXF_mb-dik2qrFWzkVemEY,7671
53
+ kiln_ai/utils/test_config.py,sha256=Jw3nMFeIgZUsZDRJJY2HpB-2EkR2NoZ-rDe_o9oA7ws,9174
54
54
  kiln_ai/utils/test_name_geneator.py,sha256=9-hSTBshyakqlPbFnNcggwLrL7lcPTitauBYHg9jFWI,1513
55
- kiln_ai-0.7.1.dist-info/METADATA,sha256=Hl--1C_wZrj_ui9WByLBgjrSzielnezjX8CR7JCJboQ,9050
56
- kiln_ai-0.7.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
- kiln_ai-0.7.1.dist-info/licenses/LICENSE.txt,sha256=_NA5pnTYgRRr4qH6lE3X-TuZJ8iRcMUi5ASoGr-lEx8,1209
58
- kiln_ai-0.7.1.dist-info/RECORD,,
55
+ kiln_ai-0.8.1.dist-info/METADATA,sha256=hcnd6e-iYHbE9GsI0W2deM8O6UgZOXl70VI1j8kIJrg,9050
56
+ kiln_ai-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
57
+ kiln_ai-0.8.1.dist-info/licenses/LICENSE.txt,sha256=_NA5pnTYgRRr4qH6lE3X-TuZJ8iRcMUi5ASoGr-lEx8,1209
58
+ kiln_ai-0.8.1.dist-info/RECORD,,