PyPI - seekrai - Versions diffs - 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl - Mend

seekrai 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

seekrai/types/finetune.py CHANGED Viewed

@@ -1,15 +1,17 @@
 from __future__ import annotations
+import warnings
 from datetime import datetime
 from enum import Enum
-from typing import Any, Dict, List, Literal, Optional
+from typing import Any, Callable, Dict, List, Literal, Optional
-from pydantic import Field
+from pydantic import Field, field_validator, model_serializer, model_validator
 from seekrai.types.abstract import BaseModel
 from seekrai.types.common import (
     ObjectType,
 )
+from seekrai.utils._log import log_info
 class FinetuneJobStatus(str, Enum):
@@ -77,8 +79,28 @@ class FinetuneEventType(str, Enum):
 class FineTuneType(str, Enum):
     STANDARD = "STANDARD"
+    GRPO = "GRPO"  # deprecated
     PREFERENCE = "PREFERENCE"
-    GRPO = "GRPO"
+    REINFORCEMENT = "REINFORCEMENT"
+class GraderType(str, Enum):
+    FORMAT_CHECK = "format_check"
+    MATH_ACCURACY = "math_accuracy"
+    STRING_CHECK = "string_check"
+    TEXT_SIMILARITY = "text_similarity"
+class StringOperation(str, Enum):
+    EQUALS = "equals"
+    NOT_EQUALS = "not_equals"
+    CONTAINS = "contains"
+    CASE_INSENSITIVE_CONTAINS = "case_insensitive_contains"
+class TextSimilarityOperation(str, Enum):
+    BLEU = "bleu"
+    ROUGE = "rouge"
 class FinetuneEvent(BaseModel):
@@ -93,6 +115,17 @@ class FinetuneEvent(BaseModel):
     # metrics that we expose
     loss: float | None = None
     epoch: float | None = None
+    reward: float | None = None
+    @model_serializer(mode="wrap")
+    def serialize_model(
+        self, handler: Callable[[Any], dict[str, Any]]
+    ) -> dict[str, Any]:
+        # Remove 'reward' if it's None
+        dump_dict = handler(self)
+        if dump_dict.get("reward") is None:
+            del dump_dict["reward"]
+        return dump_dict
 class LoRAConfig(BaseModel):
@@ -111,6 +144,126 @@ class LoRAConfig(BaseModel):
     extras: Dict[str, Any] = Field(default_factory=dict)
+class Grader(BaseModel):
+    type: GraderType
+    weight: float | None = Field(default=None, gt=0.0, le=1.0)
+    operation: StringOperation | TextSimilarityOperation | None = Field(default=None)
+    @model_validator(mode="before")
+    @classmethod
+    def validate_operation(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+        grader_type = data.get("type")
+        operation_value = data.get("operation")
+        if grader_type == GraderType.STRING_CHECK:
+            if not operation_value:
+                raise ValueError(
+                    "string_check grader is missing required StringOperation"
+                )
+            if isinstance(operation_value, str):
+                try:
+                    # Convert to enum to validate it's a valid value
+                    data["operation"] = StringOperation(operation_value.lower())
+                except ValueError:
+                    raise ValueError(
+                        f"Invalid operation for string_check grader: "
+                        f"expected StringOperation, but got type '{type(operation_value).__name__}' with value '{operation_value}'"
+                    )
+        elif grader_type == GraderType.TEXT_SIMILARITY:
+            if not operation_value:
+                raise ValueError(
+                    "text_similarity grader is missing required TextSimilarityOperation"
+                )
+            if isinstance(operation_value, str):
+                try:
+                    data["operation"] = TextSimilarityOperation(operation_value.lower())
+                except ValueError:
+                    raise ValueError(
+                        f"Invalid operation for text_similarity grader: "
+                        f"expected TextSimilarityOperation, got type '{type(operation_value).__name__}' with value '{operation_value}'"
+                    )
+        elif grader_type in (GraderType.FORMAT_CHECK, GraderType.MATH_ACCURACY):
+            if operation_value:
+                raise ValueError(f"{grader_type} grader cannot have an operation")
+            data["operation"] = None
+        return data
+class RewardComponents(BaseModel):
+    format_reward_weight: float = Field(default=0.1, gt=0.0, le=1.0)
+    graders: list[Grader] = Field(min_length=1)
+    @model_validator(mode="after")
+    def validate_weights(self) -> "RewardComponents":
+        is_format_weight_specified = "format_reward_weight" in self.model_fields_set
+        grader_weights_specified = [
+            grader.weight is not None for grader in self.graders
+        ]
+        all_graders_have_weights = all(grader_weights_specified)
+        some_graders_have_weights = any(grader_weights_specified) and not all(
+            grader_weights_specified
+        )
+        no_graders_have_weights = not any(grader_weights_specified)
+        if some_graders_have_weights:
+            raise ValueError(
+                "Only some graders have weights specified. Either all graders must have weights specified, or none of them."
+            )
+        if all_graders_have_weights and is_format_weight_specified:
+            self._validate_weights_sum_to_one()
+        elif all_graders_have_weights and not is_format_weight_specified:
+            self._normalize_grader_weights()
+        elif no_graders_have_weights:
+            self._initialize_grader_weights()
+            self._normalize_grader_weights()
+        return self
+    def _validate_weights_sum_to_one(self) -> None:
+        """Validate that format_reward_weight and grader weights sum to 1.0"""
+        total_weight = self.format_reward_weight + sum(  # type: ignore[operator]
+            grader.weight  # type: ignore[misc]
+            for grader in self.graders
+        )
+        if abs(total_weight - 1.0) > 1e-10:
+            raise ValueError(
+                f"When all weights are explicitly provided, they must sum to 1.0. "
+                f"Got format_reward_weight={self.format_reward_weight}, "
+                f"graders={self.graders}"
+            )
+    def _normalize_grader_weights(self) -> None:
+        """Normalize only grader weights to fill (1 - format_reward_weight)"""
+        total_grader_weight = sum(grader.weight for grader in self.graders)  # type: ignore[misc]
+        target_grader_total = 1.0 - self.format_reward_weight
+        # only normalize if weights aren't already properly normalized
+        if abs(total_grader_weight - target_grader_total) > 1e-10:
+            scale_factor = target_grader_total / total_grader_weight
+            for grader in self.graders:
+                original_weight = grader.weight
+                grader.weight *= scale_factor  # type: ignore[operator]
+                log_info(
+                    f"{grader.type}'s weight scaled from {original_weight} to {grader.weight:.2f}"
+                )
+    def _initialize_grader_weights(self) -> None:
+        """Initialize all grader weights when none are provided"""
+        for grader in self.graders:
+            grader.weight = 1.0
 class TrainingConfig(BaseModel):
     # training file ID
     training_files: List[str]
@@ -136,6 +289,45 @@ class TrainingConfig(BaseModel):
     fine_tune_type: FineTuneType = FineTuneType.STANDARD
     # LoRA config
     lora_config: Optional[LoRAConfig] = None
+    # reward_components are REINFORCEMENT-specific
+    reward_components: Optional[RewardComponents] = None
+    @model_validator(mode="after")
+    def validate_reward_components(self) -> "TrainingConfig":
+        # TODO: re-enable the below and make reward_components required for REINFORCEMENT. Disabled for now for backwards-compatibility
+        # if (
+        #     self.fine_tune_type in (FineTuneType.REINFORCEMENT, FineTuneType.GRPO)
+        #     and not self.reward_components
+        # ):
+        #     raise ValueError("REINFORCEMENT fine-tuning requires reward components")
+        if (
+            self.fine_tune_type in (FineTuneType.REINFORCEMENT, FineTuneType.GRPO)
+            and not self.reward_components
+        ):
+            self.reward_components = RewardComponents(
+                format_reward_weight=0.1,
+                graders=[Grader(type=GraderType.MATH_ACCURACY, weight=0.9)],
+            )
+        if self.fine_tune_type == FineTuneType.STANDARD and self.reward_components:
+            raise ValueError(
+                "Reward components are incompatible with standard fine-tuning"
+            )
+        if self.fine_tune_type == FineTuneType.PREFERENCE and self.reward_components:
+            raise ValueError(
+                "Reward components are incompatible with preference fine-tuning"
+            )
+        return self
+    @field_validator("fine_tune_type")
+    def validate_fine_tune_type(cls, v: Any) -> Any:
+        if v == FineTuneType.GRPO:
+            warnings.warn(
+                "FineTuneType.GRPO is deprecated and will be removed in a future version. Use FineTuneType.REINFORCEMENT",
+                DeprecationWarning,
+                stacklevel=2,
+            )
+        return v
 class AcceleratorType(str, Enum):
@@ -172,6 +364,7 @@ class FinetuneResponse(BaseModel):
     id: str | None = None
     # fine-tune type
     fine_tune_type: FineTuneType = FineTuneType.STANDARD
+    reward_components: Optional[RewardComponents] = None
     # training file id
     training_files: List[str] | None = None
     # validation file id
@@ -228,6 +421,16 @@ class FinetuneResponse(BaseModel):
     # training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines")
     # training_file_size: int | None = Field(None, alias="TrainingFileSize")
+    @model_serializer(mode="wrap")
+    def serialize_model(
+        self, handler: Callable[[Any], dict[str, Any]]
+    ) -> dict[str, Any]:
+        # Remove 'reward_components' if it's None
+        dump_dict = handler(self)
+        if dump_dict.get("reward_components") is None:
+            del dump_dict["reward_components"]
+        return dump_dict
 class FinetuneList(BaseModel):
     # object type

{seekrai-0.5.26.dist-info → seekrai-0.5.28.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: seekrai
-Version: 0.5.26
+Version: 0.5.28
 Summary: Python client for SeekrAI
 License: Apache-2.0
 License-File: LICENSE

{seekrai-0.5.26.dist-info → seekrai-0.5.28.dist-info}/RECORD RENAMED Viewed

@@ -59,7 +59,7 @@ seekrai/types/enums.py,sha256=sQ1CW-ctbhpV2jM1cEAEy7ZUdzZa0IC85YvycjvudHE,633
 seekrai/types/error.py,sha256=uTKISs9aRC4_6zwirtNkanxepN8KY-SqCq0kNbfZylQ,370
 seekrai/types/explainability.py,sha256=Ih-8hCm5r22EMMtr83cDy8vePo7_Ik7UdUcXhsj5Zm0,835
 seekrai/types/files.py,sha256=kOy4s8D4tlsenyWmiiEyAS0jDAdxMScBu5j1GwQCf3E,2808
-seekrai/types/finetune.py,sha256=VHAzIvU-B99TEVsuwl0pf8TODFOMYKT1dxr0kRX4Z4o,7218
+seekrai/types/finetune.py,sha256=-dRSjRqJVu2-dEfykOJYTuuzt6Ok1nx91gJzQ_WAqEU,15341
 seekrai/types/images.py,sha256=Fusj8OhVYFsT8kz636lRGGivLbPXo_ZNgakKwmzJi3U,914
 seekrai/types/ingestion.py,sha256=uUdKOR4xqSfAXWQOR1UOltSlOnuyAwKVA1Q2a6Yslk8,919
 seekrai/types/models.py,sha256=9Z0nvLdlAfpF8mNRW5-IqBdDHoE-3qQ5przmIDJgwLo,1345
@@ -72,8 +72,8 @@ seekrai/utils/api_helpers.py,sha256=0Y8BblNIr9h_R12zdmhkxgTlxgoRkbq84QNi4nNWGu8,
 seekrai/utils/files.py,sha256=7ixn_hgV-6pEhYqLyOp-EN0o8c1CzUwJzX9n3PQ5oqo,7164
 seekrai/utils/tools.py,sha256=jgJTL-dOIouDbEJLdQpQfpXhqaz_poQYS52adyUtBjo,1781
 seekrai/version.py,sha256=q6iGQVFor8zXiPP5F-3vy9TndOxKv5JXbaNJ2kdOQws,125
-seekrai-0.5.26.dist-info/METADATA,sha256=6DxW3YlPHwJzM0fCH7ly9xq3xjjn3EloDfb9OMVNuuY,4788
-seekrai-0.5.26.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
-seekrai-0.5.26.dist-info/entry_points.txt,sha256=N49yOEGi1sK7Xr13F_rkkcOxQ88suyiMoOmRhUHTZ_U,48
-seekrai-0.5.26.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-seekrai-0.5.26.dist-info/RECORD,,
+seekrai-0.5.28.dist-info/METADATA,sha256=LQLnopWdiNd6l3OJP92tv-DzgJWOa_jw_0lbXooI2JE,4788
+seekrai-0.5.28.dist-info/WHEEL,sha256=kJCRJT_g0adfAJzTx2GUMmS80rTJIVHRCfG0DQgLq3o,88
+seekrai-0.5.28.dist-info/entry_points.txt,sha256=N49yOEGi1sK7Xr13F_rkkcOxQ88suyiMoOmRhUHTZ_U,48
+seekrai-0.5.28.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+seekrai-0.5.28.dist-info/RECORD,,

{seekrai-0.5.26.dist-info → seekrai-0.5.28.dist-info}/WHEEL RENAMED Viewed

@@ -1,4 +1,4 @@
 Wheel-Version: 1.0
-Generator: poetry-core 2.3.0
+Generator: poetry-core 2.3.1
 Root-Is-Purelib: true
 Tag: py3-none-any

{seekrai-0.5.26.dist-info → seekrai-0.5.28.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{seekrai-0.5.26.dist-info → seekrai-0.5.28.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

seekrai 0.5.26__py3-none-any.whl → 0.5.28__py3-none-any.whl

seekrai 0.5.26py3-none-any.whl → 0.5.28py3-none-any.whl