PyPI - freesolo - Versions diffs - 0.2.45__tar.gz → 0.2.46__tar.gz - Mend

freesolo 0.2.45tar.gz → 0.2.46tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (129) hide show

{freesolo-0.2.45 → freesolo-0.2.46}/.github/workflows/publish-packages.yml RENAMED Viewed

@@ -264,7 +264,12 @@ jobs:
             echo "::error::NPM_TOKEN is not configured; refusing to skip publish."
             exit 1
           fi
-          bun publish --access public
+          # bun publish does not pick up NODE_AUTH_TOKEN or ~/.npmrc auth, so
+          # publish the bun-built package with npm and a project npmrc.
+          umask 077
+          printf '//registry.npmjs.org/:_authToken=%s\n' "$NODE_AUTH_TOKEN" > .npmrc
+          npm publish --access public
+          rm -f .npmrc
       - name: No npm package changes
         if: github.event_name == 'push' && steps.changes.outputs.npm_changed == 'false'

{freesolo-0.2.45 → freesolo-0.2.46}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: freesolo
-Version: 0.2.45
+Version: 0.2.46
 Summary: Tracing, evaluation, and training utilities for LLM applications.
 Requires-Python: >=3.10
 Requires-Dist: typing-extensions>=4.8.0

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/__init__.py RENAMED Viewed

@@ -4,6 +4,7 @@ from types import ModuleType
 from .grpo.config import GrpoConfig
 from .types import (
     DEFAULT_TRAINING_LORA_RANK,
+    SUPPORTED_TRAINING_MODELS,
     SUPPORTED_TRAINING_RENDERERS,
     TRAINING_BASE_MODEL,
     TRAINING_RENDERER_NAME,
@@ -12,6 +13,7 @@ from .types import (
     TrainSftOptions,
     resolve_sft_config,
     resolve_tinker_base_url,
+    resolve_training_model,
     resolve_training_renderer,
     tinker_checkpoint_run_config,
     tinker_run_config,
@@ -48,6 +50,7 @@ __all__ = [
     "REWARD_METADATA_MEAN_TEMPLATE",
     "REWARD_METADATA_RATE_TEMPLATE",
     "SFT_WANDB_SERIES",
+    "SUPPORTED_TRAINING_MODELS",
     "SUPPORTED_TRAINING_RENDERERS",
     "TRAINING_BASE_MODEL",
     "TRAINING_RENDERER_NAME",
@@ -57,6 +60,7 @@ __all__ = [
     "TrainSftOptions",
     "resolve_sft_config",
     "resolve_tinker_base_url",
+    "resolve_training_model",
     "resolve_training_renderer",
     "tinker_checkpoint_run_config",
     "tinker_run_config",

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/grpo/README.md RENAMED Viewed

@@ -95,8 +95,22 @@ class RepoEnvironment(EnvironmentSingleTurn):
   or sampling helpers directly from generated repos.
 - Do not block GRPO on SFT. Pass `sft_state_path` or `sft_log_dir` only for a
   deliberate warm-start comparison.
-- If all rewards in a group are identical, GRPO skips that group. Design rewards
-  with enough diversity to create trainable groups.
+- Advantages use group reward-decoupled normalization (arXiv:2601.05242):
+  each reward component is z-normalized within the rollout group
+  independently and the weighted normalized advantages are summed, then the
+  whole batch is normalized once more. Components come from
+  `RewardResult.metrics` entries (one named component per contract reward
+  function). To weight components unequally set `RewardMetric.weight`
+  (default 1.0) — it multiplies the component's normalized advantage;
+  pre-scaling raw scores does nothing because z-normalization cancels
+  scale. Only keys reported by every result in the group are compared, and
+  when those carry no signal (mixed metric coverage, or shared components
+  that tie) the combined scores are z-normalized instead. `advantage_clip`
+  clamps the batch-normalized values.
+- If every reward component in a group is constant, the group carries no
+  training signal and GRPO skips it. Design rewards with enough diversity to
+  create trainable groups; component-level resolution means groups whose
+  combined totals tie can still train when individual components differ.
 - Log reward diagnostics such as nonzero rate, unique reward count, uniform
   groups, trainable groups, invalid output rate, and representative completions.
 - Keep public eval semantics stricter and stable; training reward shaping may be

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/grpo/datums.py RENAMED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
 import asyncio
+import math
 from dataclasses import dataclass, field
 from typing import Any
@@ -69,6 +70,7 @@ async def build_grpo_batch_datums(
         return token_trace_groups, episodes, reward_results
     result = GrpoBatchResult()
+    pending_groups: list[tuple[list[list[TokenTrace]], list[float]]] = []
     scored_examples = await asyncio.gather(
         *[sample_and_score(example) for example in batch]
     )
@@ -78,32 +80,29 @@ async def build_grpo_batch_datums(
                 "Environment score_episodes() must return one RewardResult per sampled episode"
             )
         result.reward_results.extend(reward_results)
-        rewards: list[float] = []
-        rescored_traces: list[tuple[list[TokenTrace], float]] = []
-        for token_traces, reward_result in zip(
-            token_trace_groups,
-            reward_results,
-            strict=True,
-        ):
-            reward = float(reward_result.score)
-            rewards.append(reward)
-            result.rewards.append(reward)
-            rescored_traces.append((token_traces, reward))
+        result.rewards.extend(
+            float(reward_result.score) for reward_result in reward_results
+        )
         result.response_count += len(episodes)
         result.group_count += 1
-        if not rewards or len(set(rewards)) == 1:
+        group_advantages = decoupled_group_advantages(reward_results)
+        if not any(value != 0.0 for value in group_advantages):
             result.uniform_group_count += 1
             continue
         result.trainable_group_count += 1
-        advantages = centered_advantages(rewards, advantage_clip=advantage_clip)
-        for (token_traces, _reward), advantage in zip(
-            rescored_traces,
-            advantages,
-            strict=True,
-        ):
+        pending_groups.append((token_trace_groups, group_advantages))
+    normalized = batch_normalized_advantages(
+        [value for _, advantages in pending_groups for value in advantages],
+        advantage_clip=advantage_clip,
+    )
+    cursor = 0
+    for token_trace_groups, _ in pending_groups:
+        for token_traces in token_trace_groups:
+            advantage = normalized[cursor]
+            cursor += 1
             for token_trace in token_traces:
                 token_advantages = [
                     advantage * mask_value for mask_value in token_trace.advantage_mask
@@ -278,12 +277,110 @@ def _resolve_max_episode_turns(value: int) -> int:
     return value
-def centered_advantages(rewards: list[float], *, advantage_clip: float) -> list[float]:
-    reward_mean = sum(rewards) / len(rewards)
-    centered = [reward - reward_mean for reward in rewards]
+_ADVANTAGE_EPSILON = 1e-6
+def _metric_weight(metric: Any) -> float:
+    weight = getattr(metric, "weight", None)
+    if (
+        isinstance(weight, (int, float))
+        and not isinstance(weight, bool)
+        and math.isfinite(weight)
+        and weight > 0
+    ):
+        return float(weight)
+    return 1.0
+def _component_scores(reward_result: Any) -> dict[str, tuple[float, float]]:
+    """Map component name -> (score, weight) for one RewardResult."""
+    components: dict[str, tuple[float, float]] = {}
+    for metric in getattr(reward_result, "metrics", ()) or ():
+        name = getattr(metric, "name", None)
+        score = getattr(metric, "score", None)
+        if (
+            isinstance(name, str)
+            and name
+            and isinstance(score, (int, float))
+            and not isinstance(score, bool)
+        ):
+            components[name] = (float(score), _metric_weight(metric))
+    if components:
+        return components
+    return {"score": (float(reward_result.score), 1.0)}
+def decoupled_group_advantages(reward_results: list[Any]) -> list[float]:
+    """Group advantages with reward-decoupled normalization (arXiv:2601.05242).
+    Joint normalization of a summed multi-reward score collapses distinct
+    reward combinations into identical advantages. Instead, each reward
+    component is z-normalized within the group independently and the
+    weighted normalized advantages are summed, preserving per-reward
+    resolution and making components comparable regardless of their raw
+    scales. Components come from RewardResult.metrics (one per contract
+    reward function); RewardMetric.weight multiplies the component's
+    normalized advantage (default 1.0 — raw score scale carries no weight,
+    z-normalization cancels it). Only keys reported by every result in the
+    group are compared: filling a missing component with an invented value
+    lets components cancel (a metric-less error-path result's fallback
+    score against zero-filled metric rows can zero out a group that has
+    real reward differences). When the shared components carry no variance,
+    the combined scores are z-normalized instead so reward differences
+    still train.
+    """
+    component_maps = [
+        _component_scores(reward_result) for reward_result in reward_results
+    ]
+    shared_keys = sorted(
+        set.intersection(*(set(mapping) for mapping in component_maps))
+        if component_maps
+        else set()
+    )
+    advantages = [0.0] * len(reward_results)
+    for key in shared_keys:
+        values = [mapping[key][0] for mapping in component_maps]
+        # The weight is a property of the reward function, not the rollout;
+        # the first rollout's entry is canonical when they disagree.
+        weight = component_maps[0][key][1]
+        mean = sum(values) / len(values)
+        std = math.sqrt(sum((value - mean) ** 2 for value in values) / len(values))
+        if std == 0.0:
+            continue
+        for index, value in enumerate(values):
+            advantages[index] += weight * (value - mean) / std
+    if not any(value != 0.0 for value in advantages):
+        scores = [float(reward_result.score) for reward_result in reward_results]
+        mean = sum(scores) / len(scores) if scores else 0.0
+        std = (
+            math.sqrt(sum((value - mean) ** 2 for value in scores) / len(scores))
+            if scores
+            else 0.0
+        )
+        if std > 0.0:
+            advantages = [(value - mean) / std for value in scores]
+    return advantages
+def batch_normalized_advantages(
+    advantages: list[float],
+    *,
+    advantage_clip: float,
+) -> list[float]:
+    """Batch-wise normalization from the same paper.
+    Stabilizes advantage magnitude regardless of how many reward components
+    contributed; group advantages are zero-mean by construction, so this is
+    primarily a rescale. advantage_clip clamps the normalized values.
+    """
+    if not advantages:
+        return []
+    mean = sum(advantages) / len(advantages)
+    std = math.sqrt(sum((value - mean) ** 2 for value in advantages) / len(advantages))
+    normalized = [(value - mean) / (std + _ADVANTAGE_EPSILON) for value in advantages]
     if advantage_clip <= 0:
-        return centered
-    return [max(-advantage_clip, min(advantage_clip, value)) for value in centered]
+        return normalized
+    return [max(-advantage_clip, min(advantage_clip, value)) for value in normalized]
 def build_importance_sampling_trace_datum(

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/train_grpo.py RENAMED Viewed

@@ -32,22 +32,26 @@ from freesolo.training.grpo.sampling import (
 from freesolo.training.storage import attach_stored_training_run
 from freesolo.training.types import (
     DEFAULT_TRAINING_LORA_RANK,
-    TRAINING_BASE_MODEL,
     TrainGrpoOptions,
     resolve_tinker_base_url,
+    resolve_training_model,
     tinker_checkpoint_run_config,
     tinker_run_config,
 )
 from freesolo.utils.checkpoints import (
     CheckpointUtils,
+    ensure_log_dir_base_model,
     get_last_tinker_checkpoint,
+    has_training_state,
     next_training_position,
+    read_log_dir_base_model,
     resolve_checkpoint_sampler_path,
     resolve_checkpoint_state_path,
     resolve_sft_sampler_path,
     resolve_sft_state_path,
     resolve_training_position,
     save_tinker_checkpoint,
+    sft_state_path_in_log_dir,
     write_training_progress,
 )
 from freesolo.utils.core import load_dotenv_if_available, required_path
@@ -70,6 +74,7 @@ def _parse_args() -> argparse.Namespace:
     parser.add_argument("--sft-state-path")
     parser.add_argument("--reward-command")
     parser.add_argument("--base-url")
+    parser.add_argument("--base-model")
     return parser.parse_args()
@@ -84,7 +89,9 @@ async def train_grpo_async(
     sft_state_path: str | None = None,
     reward_command: str | None = None,
     base_url: str | None = None,
+    base_model: str | None = None,
 ) -> int:
+    resolved_base_model = resolve_training_model(base_model)
     try:
         import numpy
         import tinker
@@ -112,12 +119,37 @@ async def train_grpo_async(
     if not examples:
         raise RuntimeError(f"No GRPO records found in {dataset_path}")
-    tokenizer = get_tokenizer(TRAINING_BASE_MODEL)
+    tokenizer = get_tokenizer(resolved_base_model)
     renderer = renderers.get_renderer(grpo_config.renderer_name, tokenizer)
     resolved_tinker_base_url = resolve_tinker_base_url(base_url)
     service_client = tinker.ServiceClient(base_url=resolved_tinker_base_url or None)
     log_dir = Path(log_dir)
     log_dir.mkdir(parents=True, exist_ok=True)
+    if sft_log_dir is not None:
+        sft_base_model = read_log_dir_base_model(sft_log_dir)
+        if sft_base_model is None and has_training_state(sft_log_dir):
+            raise RuntimeError(
+                f"sft_log_dir {sft_log_dir} holds training state but no "
+                "base_model.json marker, so its model cannot be verified. "
+                "Train SFT for this model first."
+            )
+        if sft_base_model is not None and sft_base_model != resolved_base_model:
+            raise RuntimeError(
+                f"sft_log_dir {sft_log_dir} holds checkpoints for base model "
+                f"{sft_base_model!r}; GRPO cannot initialize "
+                f"{resolved_base_model!r} from them. Train SFT for this model "
+                "first or point sft_log_dir at a matching run."
+            )
+    if sft_state_path and not sft_state_path_in_log_dir(sft_state_path, sft_log_dir):
+        print(
+            f"[freesolo] GRPO initializing from explicit sft_state_path with "
+            f"base_model {resolved_base_model!r}; the path is not recorded in "
+            "the validated sft_log_dir, so its remote tinker:// lineage "
+            "cannot be verified locally - make sure that state came from "
+            "the same base model.",
+            file=sys.stderr,
+        )
+    ensure_log_dir_base_model(log_dir, resolved_base_model)
     run_name = f"freesolo-grpo-{log_dir.name}"
     run_config = {
         "phase": "grpo",
@@ -128,7 +160,7 @@ async def train_grpo_async(
         "sft_log_dir": str(sft_log_dir) if sft_log_dir is not None else None,
         "sft_checkpoint_name": sft_checkpoint_name,
         "sft_state_path": sft_state_path,
-        "base_model": TRAINING_BASE_MODEL,
+        "base_model": resolved_base_model,
         "renderer": grpo_config.renderer_name,
         "lora_rank": DEFAULT_TRAINING_LORA_RANK,
         "batch_size": grpo_config.batch_size,
@@ -188,7 +220,7 @@ async def train_grpo_async(
         else:
             training_client = await asyncio.to_thread(
                 service_client.create_lora_training_client,
-                base_model=TRAINING_BASE_MODEL,
+                base_model=resolved_base_model,
                 rank=DEFAULT_TRAINING_LORA_RANK,
             )
         kl_reference_sampling_client = None
@@ -207,7 +239,7 @@ async def train_grpo_async(
             else:
                 kl_reference_sampling_client = await asyncio.to_thread(
                     service_client.create_sampling_client,
-                    base_model=TRAINING_BASE_MODEL,
+                    base_model=resolved_base_model,
                 )
         # renderer stop sequences may be token ids or strings; the grpo config
@@ -377,6 +409,7 @@ async def train_grpo_async(
                 training_client = await _warm_restart_training_client(
                     service_client=service_client,
                     state_path=recovery_state_path,
+                    base_model=resolved_base_model,
                 )
                 global_step, epoch, batch_index = recovery_position
                 sampler_state = SamplerState(
@@ -497,11 +530,6 @@ async def train_grpo_async(
 def train_grpo(**kwargs: Unpack[TrainGrpoOptions]) -> int:
     load_dotenv_if_available()
-    if "base_model" in kwargs:
-        raise TypeError(
-            "train_grpo() does not accept base_model; Freesolo training is pinned "
-            f"to {TRAINING_BASE_MODEL}"
-        )
     return asyncio.run(train_grpo_async(**kwargs))
@@ -590,6 +618,7 @@ async def _warm_restart_training_client(
     *,
     service_client: Any,
     state_path: str | None,
+    base_model: str,
 ) -> Any:
     """Open a fresh Tinker training session from the last durable state.
@@ -606,7 +635,7 @@ async def _warm_restart_training_client(
         )
     return await asyncio.to_thread(
         service_client.create_lora_training_client,
-        base_model=TRAINING_BASE_MODEL,
+        base_model=base_model,
         rank=DEFAULT_TRAINING_LORA_RANK,
     )
@@ -745,6 +774,7 @@ async def main_async() -> int:
         sft_state_path=args.sft_state_path,
         reward_command=args.reward_command,
         base_url=args.base_url,
+        base_model=args.base_model,
     )

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/train_sft.py RENAMED Viewed

@@ -16,16 +16,17 @@ from freesolo.datasets import load_dataset
 from freesolo.environments.base import load_environment
 from freesolo.training.storage import attach_stored_training_run
 from freesolo.training.types import (
-    TRAINING_BASE_MODEL,
     SftConfig,
     TrainSftOptions,
     resolve_sft_config,
     resolve_tinker_base_url,
+    resolve_training_model,
     tinker_checkpoint_run_config,
     tinker_run_config,
 )
 from freesolo.utils.checkpoints import (
     checkpoint_step,
+    ensure_log_dir_base_model,
     get_last_tinker_checkpoint,
     resolve_checkpoint_sampler_path,
     resolve_checkpoint_state_path,
@@ -42,6 +43,7 @@ def _parse_args() -> argparse.Namespace:
     parser.add_argument("--environment")
     parser.add_argument("--log-dir", default="./logs/sft")
     parser.add_argument("--base-url")
+    parser.add_argument("--base-model")
     parser.add_argument("--max-length", type=int, required=True)
     return parser.parse_args()
@@ -51,17 +53,13 @@ def train_sft(**kwargs: Unpack[TrainSftOptions]) -> int:
         raise TypeError(
             "train_sft() missing required keyword-only argument: 'dataset_path'"
         )
-    if "base_model" in kwargs:
-        raise TypeError(
-            "train_sft() does not accept base_model; Freesolo training is pinned "
-            f"to {TRAINING_BASE_MODEL}"
-        )
     return _train_sft(
         contract_path=kwargs.get("contract_path", "TRAINING_CONTRACT.md"),
         dataset_path=kwargs["dataset_path"],
         environment=kwargs.get("environment"),
         log_dir=kwargs.get("log_dir", "./logs/sft"),
         base_url=kwargs.get("base_url"),
+        base_model=kwargs.get("base_model"),
         sft_config=kwargs.get("sft_config"),
     )
@@ -73,9 +71,11 @@ def _train_sft(
     environment: str | None = None,
     log_dir: str | Path = "./logs/sft",
     base_url: str | None = None,
+    base_model: str | None = None,
     sft_config: SftConfig | None = None,
 ) -> int:
     load_dotenv_if_available()
+    resolved_base_model = resolve_training_model(base_model)
     try:
         import tinker
         from tinker_cookbook import checkpoint_utils, renderers
@@ -114,12 +114,13 @@ def _train_sft(
             f"No assistant turns found in SFT records from {dataset_path}"
         )
-    tokenizer = get_tokenizer(TRAINING_BASE_MODEL)
+    tokenizer = get_tokenizer(resolved_base_model)
     renderer = renderers.get_renderer(resolved_sft_config.renderer_name, tokenizer)
     resolved_tinker_base_url = resolve_tinker_base_url(base_url)
     service_client = tinker.ServiceClient(base_url=resolved_tinker_base_url or None)
     log_dir = Path(log_dir)
     log_dir.mkdir(parents=True, exist_ok=True)
+    ensure_log_dir_base_model(log_dir, resolved_base_model)
     run_name = f"freesolo-sft-{log_dir.name}"
     run_config = {
         "phase": "sft",
@@ -127,7 +128,7 @@ def _train_sft(
         "dataset_path": str(dataset_path),
         "environment": environment,
         "log_dir": str(log_dir),
-        "base_model": TRAINING_BASE_MODEL,
+        "base_model": resolved_base_model,
         "renderer": resolved_sft_config.renderer_name,
         "batch_size": batch_size,
         "learning_rate": learning_rate,
@@ -177,7 +178,7 @@ def _train_sft(
             )
         else:
             training_client = service_client.create_lora_training_client(
-                base_model=TRAINING_BASE_MODEL,
+                base_model=resolved_base_model,
                 rank=lora_rank,
             )
             start_step = 0
@@ -317,6 +318,7 @@ def main() -> int:
         environment=args.environment,
         log_dir=args.log_dir,
         base_url=args.base_url,
+        base_model=args.base_model,
         sft_config=SftConfig(max_length=args.max_length),
     )

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/training/types.py RENAMED Viewed

@@ -6,12 +6,23 @@ from typing import Any, TypedDict
 from typing_extensions import Required
 TRAINING_BASE_MODEL = "Qwen/Qwen3.6-35B-A3B"
+SUPPORTED_TRAINING_MODELS = ("Qwen/Qwen3.6-35B-A3B", "Qwen/Qwen3.5-4B")
 TRAINING_RENDERER_NAME = "qwen3_5_disable_thinking"
 SUPPORTED_TRAINING_RENDERERS = ("qwen3_5", "qwen3_5_disable_thinking")
 DEFAULT_TRAINING_LORA_RANK = 64
 TINKER_BASE_URL_ENV = "TINKER_BASE_URL"
+def resolve_training_model(base_model: str | None) -> str:
+    if base_model is None:
+        return TRAINING_BASE_MODEL
+    resolved = str(base_model).strip()
+    if resolved not in SUPPORTED_TRAINING_MODELS:
+        supported = ", ".join(SUPPORTED_TRAINING_MODELS)
+        raise ValueError(f"base_model must be one of: {supported}; got {base_model!r}")
+    return resolved
 def resolve_training_renderer(renderer_name: str | None) -> str:
     if renderer_name is None:
         return TRAINING_RENDERER_NAME
@@ -124,6 +135,7 @@ class TrainSftOptions(TypedDict, total=False):
     environment: str | None
     log_dir: str | Path
     base_url: str | None
+    base_model: str | None
     sft_config: SftConfig | None
@@ -137,10 +149,12 @@ class TrainGrpoOptions(TypedDict, total=False):
     sft_state_path: str | None
     reward_command: str | None
     base_url: str | None
+    base_model: str | None
 __all__ = [
     "DEFAULT_TRAINING_LORA_RANK",
+    "SUPPORTED_TRAINING_MODELS",
     "SUPPORTED_TRAINING_RENDERERS",
     "TRAINING_BASE_MODEL",
     "TRAINING_RENDERER_NAME",
@@ -150,6 +164,7 @@ __all__ = [
     "TrainSftOptions",
     "resolve_sft_config",
     "resolve_tinker_base_url",
+    "resolve_training_model",
     "resolve_training_renderer",
     "tinker_checkpoint_run_config",
     "tinker_run_config",

{freesolo-0.2.45 → freesolo-0.2.46}/pypi/freesolo/utils/checkpoints.py RENAMED Viewed

@@ -201,6 +201,20 @@ def resolve_checkpoint_state_path(record: object | None) -> str | None:
     return str(state_path) if state_path else None
+def sft_state_path_in_log_dir(
+    state_path: str,
+    sft_log_dir: str | Path | None,
+) -> bool:
+    """Whether an explicit SFT state path is recorded in the (already
+    marker-validated) SFT log dir, tying its lineage to that dir's model."""
+    if sft_log_dir is None:
+        return False
+    return any(
+        resolve_checkpoint_state_path(record) == state_path
+        for record in read_checkpoint_records(sft_log_dir)
+    )
 def resolve_checkpoint_sampler_path(record: object | None) -> str | None:
     sampler_path = checkpoint_value(record, "sampler_path")
     return str(sampler_path) if sampler_path else None
@@ -282,3 +296,56 @@ def save_tinker_checkpoint(
         loop_state=loop_state,
         kind=kind,
     )
+_BASE_MODEL_MARKER = "base_model.json"
+def read_log_dir_base_model(log_dir: str | Path) -> str | None:
+    marker = Path(log_dir) / _BASE_MODEL_MARKER
+    if not marker.is_file():
+        return None
+    try:
+        recorded = json.loads(marker.read_text(encoding="utf-8")).get("base_model")
+    except (OSError, ValueError):
+        return None
+    return recorded if isinstance(recorded, str) and recorded else None
+def has_training_state(log_dir: str | Path) -> bool:
+    base = Path(log_dir)
+    return any(
+        (base / name).exists()
+        for name in (
+            "checkpoints.jsonl",
+            TRAINING_PROGRESS_FILENAME,
+            "kl_reference.json",
+        )
+    )
+def ensure_log_dir_base_model(log_dir: str | Path, base_model: str) -> None:
+    """Pin a log dir to one base model.
+    Checkpoint resume loads optimizer/weight state blindly from the log dir, so
+    continuing a dir produced by a different base model would silently mix
+    weights and tokenizer. A dir that already holds training state without a
+    marker cannot be attributed to any model, so it is refused outright.
+    """
+    recorded = read_log_dir_base_model(log_dir)
+    if recorded is None and has_training_state(log_dir):
+        raise RuntimeError(
+            f"log_dir {log_dir} holds training state but no base_model.json "
+            "marker, so its model cannot be verified. Use a fresh log_dir."
+        )
+    if recorded is not None and recorded != base_model:
+        raise RuntimeError(
+            f"log_dir {log_dir} holds training state for base model "
+            f"{recorded!r}; refusing to continue it with {base_model!r}. "
+            "Use a fresh log_dir to train a different model."
+        )
+    if recorded is None:
+        marker = Path(log_dir) / _BASE_MODEL_MARKER
+        marker.write_text(
+            json.dumps({"base_model": base_model}) + "\n", encoding="utf-8"
+        )

{freesolo-0.2.45 → freesolo-0.2.46}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "freesolo"
-version = "0.2.45"
+version = "0.2.46"
 description = "Tracing, evaluation, and training utilities for LLM applications."
 readme = "README.md"
 requires-python = ">=3.10"

freesolo 0.2.45__tar.gz → 0.2.46__tar.gz

freesolo 0.2.45tar.gz → 0.2.46tar.gz