freesolo 0.2.43__tar.gz → 0.2.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (130) hide show
  1. {freesolo-0.2.43 → freesolo-0.2.45}/PKG-INFO +1 -1
  2. {freesolo-0.2.43 → freesolo-0.2.45}/npm/package.json +1 -1
  3. {freesolo-0.2.43 → freesolo-0.2.45}/package.json +1 -1
  4. freesolo-0.2.45/pypi/freesolo/training/storage.py +78 -0
  5. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/train_grpo.py +2 -4
  6. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/train_sft.py +2 -4
  7. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/storage.py +0 -28
  8. {freesolo-0.2.43 → freesolo-0.2.45}/pyproject.toml +1 -1
  9. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_storage_sync.py +40 -87
  10. {freesolo-0.2.43 → freesolo-0.2.45}/uv.lock +1 -1
  11. freesolo-0.2.43/pypi/freesolo/training/storage.py +0 -68
  12. {freesolo-0.2.43 → freesolo-0.2.45}/.github/workflows/publish-packages.yml +0 -0
  13. {freesolo-0.2.43 → freesolo-0.2.45}/.github/workflows/python-checks.yml +0 -0
  14. {freesolo-0.2.43 → freesolo-0.2.45}/.github/workflows/sync-package-function-usage.yml +0 -0
  15. {freesolo-0.2.43 → freesolo-0.2.45}/.github/workflows/version-consistency.yml +0 -0
  16. {freesolo-0.2.43 → freesolo-0.2.45}/.gitignore +0 -0
  17. {freesolo-0.2.43 → freesolo-0.2.45}/AGENTS.md +0 -0
  18. {freesolo-0.2.43 → freesolo-0.2.45}/README.md +0 -0
  19. {freesolo-0.2.43 → freesolo-0.2.45}/examples/PROMPT.md +0 -0
  20. {freesolo-0.2.43 → freesolo-0.2.45}/examples/README.md +0 -0
  21. {freesolo-0.2.43 → freesolo-0.2.45}/examples/TRAINING_CONTRACT.md +0 -0
  22. {freesolo-0.2.43 → freesolo-0.2.45}/examples/data/support_eval.jsonl +0 -0
  23. {freesolo-0.2.43 → freesolo-0.2.45}/examples/data/support_train.jsonl +0 -0
  24. {freesolo-0.2.43 → freesolo-0.2.45}/examples/environment.py +0 -0
  25. {freesolo-0.2.43 → freesolo-0.2.45}/examples/evaluation_custom_scorer.py +0 -0
  26. {freesolo-0.2.43 → freesolo-0.2.45}/examples/evaluation_from_files.py +0 -0
  27. {freesolo-0.2.43 → freesolo-0.2.45}/examples/gepa_prompt_example.py +0 -0
  28. {freesolo-0.2.43 → freesolo-0.2.45}/examples/support_dataset.py +0 -0
  29. {freesolo-0.2.43 → freesolo-0.2.45}/examples/tracing_manual_span.py +0 -0
  30. {freesolo-0.2.43 → freesolo-0.2.45}/examples/tracing_multistep_agent.py +0 -0
  31. {freesolo-0.2.43 → freesolo-0.2.45}/examples/training_sft_grpo.py +0 -0
  32. {freesolo-0.2.43 → freesolo-0.2.45}/npm/README.md +0 -0
  33. {freesolo-0.2.43 → freesolo-0.2.45}/npm/bun.lock +0 -0
  34. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/core.d.ts +0 -0
  35. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/core.d.ts.map +0 -0
  36. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/core.js +0 -0
  37. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/evaluation.d.ts +0 -0
  38. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/evaluation.d.ts.map +0 -0
  39. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/evaluation.js +0 -0
  40. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/index.d.ts +0 -0
  41. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/index.d.ts.map +0 -0
  42. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/index.js +0 -0
  43. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/tracing.d.ts +0 -0
  44. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/tracing.d.ts.map +0 -0
  45. {freesolo-0.2.43 → freesolo-0.2.45}/npm/dist/tracing.js +0 -0
  46. {freesolo-0.2.43 → freesolo-0.2.45}/npm/src/core.ts +0 -0
  47. {freesolo-0.2.43 → freesolo-0.2.45}/npm/src/evaluation.ts +0 -0
  48. {freesolo-0.2.43 → freesolo-0.2.45}/npm/src/index.ts +0 -0
  49. {freesolo-0.2.43 → freesolo-0.2.45}/npm/src/tracing.ts +0 -0
  50. {freesolo-0.2.43 → freesolo-0.2.45}/npm/tests/evaluation.test.ts +0 -0
  51. {freesolo-0.2.43 → freesolo-0.2.45}/npm/tests/tracing.test.ts +0 -0
  52. {freesolo-0.2.43 → freesolo-0.2.45}/npm/tsconfig.json +0 -0
  53. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/.gitignore +0 -0
  54. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/README.md +0 -0
  55. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/__init__.py +0 -0
  56. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/contracts/README.md +0 -0
  57. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/contracts/__init__.py +0 -0
  58. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/contracts/markdown.py +0 -0
  59. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/contracts/types.py +0 -0
  60. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/datasets/README.md +0 -0
  61. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/datasets/__init__.py +0 -0
  62. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/datasets/core.py +0 -0
  63. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/datasets/records.py +0 -0
  64. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/datasets/types.py +0 -0
  65. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/environments/README.md +0 -0
  66. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/environments/__init__.py +0 -0
  67. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/environments/base.py +0 -0
  68. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/environments/evaluation.py +0 -0
  69. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/environments/types.py +0 -0
  70. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/README.md +0 -0
  71. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/__init__.py +0 -0
  72. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/client.py +0 -0
  73. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/judges/__init__.py +0 -0
  74. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/judges/base.py +0 -0
  75. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/responses.py +0 -0
  76. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/results.py +0 -0
  77. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/evaluation/types.py +0 -0
  78. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/README.md +0 -0
  79. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/__init__.py +0 -0
  80. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/adapter.py +0 -0
  81. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/reflection.py +0 -0
  82. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/setup.py +0 -0
  83. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/gepa/types.py +0 -0
  84. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/py.typed +0 -0
  85. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/tracing/README.md +0 -0
  86. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/tracing/__init__.py +0 -0
  87. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/tracing/otel.py +0 -0
  88. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/tracing/sanitize.py +0 -0
  89. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/README.md +0 -0
  90. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/__init__.py +0 -0
  91. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/README.md +0 -0
  92. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/__init__.py +0 -0
  93. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/config.py +0 -0
  94. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/datums.py +0 -0
  95. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/rewards.py +0 -0
  96. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/grpo/sampling.py +0 -0
  97. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/types.py +0 -0
  98. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/training/wandb_series.py +0 -0
  99. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/README.md +0 -0
  100. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/__init__.py +0 -0
  101. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/checkpoints.py +0 -0
  102. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/core.py +0 -0
  103. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/hosting.py +0 -0
  104. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/judge.py +0 -0
  105. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/openai.py +0 -0
  106. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/oracle.py +0 -0
  107. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/upload.py +0 -0
  108. {freesolo-0.2.43 → freesolo-0.2.45}/pypi/freesolo/utils/wandb.py +0 -0
  109. {freesolo-0.2.43 → freesolo-0.2.45}/ruff.toml +0 -0
  110. {freesolo-0.2.43 → freesolo-0.2.45}/tests/end_to_end_testing/test_environment_evaluation_flow.py +0 -0
  111. {freesolo-0.2.43 → freesolo-0.2.45}/tests/end_to_end_testing/test_examples.py +0 -0
  112. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_contracts_and_judges.py +0 -0
  113. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_core_utils.py +0 -0
  114. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_datasets.py +0 -0
  115. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_environment_evaluation_edges.py +0 -0
  116. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_evaluation_client.py +0 -0
  117. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_gepa_adapter.py +0 -0
  118. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_grpo_datums_and_sampling.py +0 -0
  119. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_hosting_and_deployment_clients.py +0 -0
  120. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_openai_and_oracle_tokens.py +0 -0
  121. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_package_metadata.py +0 -0
  122. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_records_rewards_and_config.py +0 -0
  123. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_tracing_opentelemetry.py +0 -0
  124. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_train_sft.py +0 -0
  125. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_training_efficiency_fixes.py +0 -0
  126. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_upload.py +0 -0
  127. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_utils_checkpoints.py +0 -0
  128. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_wandb_series.py +0 -0
  129. {freesolo-0.2.43 → freesolo-0.2.45}/tests/functionality/test_wandb_utils.py +0 -0
  130. {freesolo-0.2.43 → freesolo-0.2.45}/tests/security/test_sanitize_and_contract_security.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: freesolo
3
- Version: 0.2.43
3
+ Version: 0.2.45
4
4
  Summary: Tracing, evaluation, and training utilities for LLM applications.
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: typing-extensions>=4.8.0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.43",
3
+ "version": "0.2.45",
4
4
  "description": "Tracing and evaluation utilities for TypeScript LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.43",
3
+ "version": "0.2.45",
4
4
  "description": "Tracing and evaluation utilities for LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./npm/dist/index.js",
@@ -0,0 +1,78 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from contextlib import suppress
5
+ from dataclasses import dataclass
6
+ from typing import Any
7
+
8
+ from freesolo.utils.storage import FreesoloStorageClient
9
+
10
+ TRAINING_RUN_ID_ENV = "FREESOLO_TRAINING_RUN_ID"
11
+
12
+
13
+ @dataclass(frozen=True)
14
+ class StoredTrainingRun:
15
+ """Handle to the platform run a training phase reports into.
16
+
17
+ When no platform run is attached (``client is None``), every method
18
+ no-ops so training works identically without storage sync.
19
+ """
20
+
21
+ client: FreesoloStorageClient | None
22
+ run_id: str | None
23
+
24
+ def link_wandb(self, wandb_url: str | None) -> None:
25
+ if self.client is None or self.run_id is None:
26
+ return
27
+ self.client.link_wandb(self.run_id, wandb_url)
28
+
29
+ def complete(self, config: dict[str, Any] | None = None) -> None:
30
+ """Record the phase's final config on the shared run.
31
+
32
+ Terminal status is platform-owned: the backend marks the run
33
+ completed/failed when the training job ends, so an SFT phase
34
+ finishing must not flip the shared run to completed while GRPO
35
+ is still ahead.
36
+ """
37
+ if self.client is None or self.run_id is None:
38
+ return
39
+ if config is not None:
40
+ self.client.update_run(self.run_id, config=config)
41
+
42
+ def fail(self, error: BaseException) -> None:
43
+ """Record the phase failure's error text on the shared run.
44
+
45
+ Status stays platform-owned (the job may retry the phase and
46
+ re-attach); the backend marks the run failed if the job fails.
47
+ """
48
+ if self.client is None or self.run_id is None:
49
+ return
50
+ with suppress(Exception):
51
+ # Preserve the original training exception for the caller.
52
+ self.client.update_run(self.run_id, error=str(error))
53
+
54
+
55
+ def attach_stored_training_run(
56
+ *,
57
+ phase: str,
58
+ config: dict[str, Any] | None = None,
59
+ ) -> StoredTrainingRun:
60
+ """Attach to the platform run named by ``FREESOLO_TRAINING_RUN_ID``.
61
+
62
+ The platform owns run creation: it creates exactly one run per training
63
+ job, and every SFT/GRPO phase of that job attaches to it, so the run
64
+ accumulates each phase's status, config, and W&B links. Without the env
65
+ id (optimizer pilots, ad-hoc local runs) storage sync is disabled and
66
+ the returned handle no-ops.
67
+ """
68
+ run_id = os.getenv(TRAINING_RUN_ID_ENV, "").strip()
69
+ if not run_id:
70
+ return StoredTrainingRun(client=None, run_id=None)
71
+
72
+ client = FreesoloStorageClient()
73
+ client.update_run(
74
+ run_id,
75
+ status="running",
76
+ config={**(config or {}), "phase": phase},
77
+ )
78
+ return StoredTrainingRun(client=client, run_id=run_id)
@@ -29,7 +29,7 @@ from freesolo.training.grpo.sampling import (
29
29
  ensure_sampling_client,
30
30
  session_id_from_tinker_path,
31
31
  )
32
- from freesolo.training.storage import start_stored_training_run
32
+ from freesolo.training.storage import attach_stored_training_run
33
33
  from freesolo.training.types import (
34
34
  DEFAULT_TRAINING_LORA_RANK,
35
35
  TRAINING_BASE_MODEL,
@@ -149,10 +149,8 @@ async def train_grpo_async(
149
149
  "record_count": len(examples),
150
150
  **tinker_run_config(resolved_tinker_base_url),
151
151
  }
152
- stored_run = start_stored_training_run(
152
+ stored_run = attach_stored_training_run(
153
153
  phase="grpo",
154
- name=run_name,
155
- description="Freesolo GRPO training run",
156
154
  config=run_config,
157
155
  )
158
156
  wandb_run = None
@@ -14,7 +14,7 @@ if __package__ in {None, ""}:
14
14
  from freesolo.contracts import ChatMessage, load_contract_text
15
15
  from freesolo.datasets import load_dataset
16
16
  from freesolo.environments.base import load_environment
17
- from freesolo.training.storage import start_stored_training_run
17
+ from freesolo.training.storage import attach_stored_training_run
18
18
  from freesolo.training.types import (
19
19
  TRAINING_BASE_MODEL,
20
20
  SftConfig,
@@ -144,10 +144,8 @@ def _train_sft(
144
144
  "training_example_count": len(training_conversations),
145
145
  **tinker_run_config(resolved_tinker_base_url),
146
146
  }
147
- stored_run = start_stored_training_run(
147
+ stored_run = attach_stored_training_run(
148
148
  phase="sft",
149
- name=run_name,
150
- description="Freesolo SFT training run",
151
149
  config=run_config,
152
150
  )
153
151
  wandb_run = None
@@ -106,34 +106,6 @@ class FreesoloStorageClient:
106
106
  )
107
107
  _json_response(response, AUTH_VERIFY_PATH)
108
108
 
109
- def create_run(
110
- self,
111
- *,
112
- name: str,
113
- description: str | None = None,
114
- version: str | None = None,
115
- evaluator_ids: list[str] | tuple[str, ...] = (),
116
- model_id: str | None = None,
117
- config: dict[str, Any] | None = None,
118
- ) -> str:
119
- payload: JsonObject = {"name": require_non_empty(name, "name")}
120
- if description:
121
- payload["description"] = description
122
- if version:
123
- payload["version"] = version
124
- if model_id:
125
- payload["model_id"] = model_id
126
- if evaluator_ids:
127
- payload["evaluator_ids"] = [str(value) for value in evaluator_ids]
128
- run_config = _training_run_config(config)
129
- if run_config:
130
- payload["config"] = run_config
131
- data = self._post_json(
132
- RUNS_PATH,
133
- payload,
134
- )
135
- return _response_id(data, "run_id")
136
-
137
109
  def update_run(
138
110
  self,
139
111
  run_id: str,
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "freesolo"
7
- version = "0.2.43"
7
+ version = "0.2.45"
8
8
  description = "Tracing, evaluation, and training utilities for LLM applications."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -4,7 +4,7 @@ from typing import Any, ClassVar
4
4
 
5
5
  import httpx
6
6
  import pytest
7
- from freesolo.training.storage import start_stored_training_run
7
+ from freesolo.training.storage import attach_stored_training_run
8
8
  from freesolo.utils.core import _auth_headers
9
9
  from freesolo.utils.storage import FreesoloStorageClient
10
10
 
@@ -139,7 +139,7 @@ def test_storage_client_sets_auth_headers(
139
139
  FreesoloStorageClient(
140
140
  api_key="fs-test",
141
141
  base_url="https://api.test",
142
- ).create_run(name="train")
142
+ ).update_run("run-1", status="running")
143
143
 
144
144
  assert _FakeHttpClient.calls[0]["headers"] == _auth_headers("fs-test")
145
145
 
@@ -156,7 +156,7 @@ def test_storage_client_sets_internal_training_scope_headers(
156
156
  user_id="user-1",
157
157
  org_id="org-A",
158
158
  base_url="https://api.test",
159
- ).create_run(name="train")
159
+ ).update_run("run-1", status="running")
160
160
 
161
161
  assert _FakeHttpClient.calls[0]["headers"] == {
162
162
  "Authorization": "Bearer internal-key",
@@ -216,28 +216,21 @@ def test_verify_api_key_rejects_unauthorized_response(
216
216
  ).verify_api_key()
217
217
 
218
218
 
219
- def test_run_create_update_and_wandb_use_freesolo_api(
219
+ def test_run_update_and_wandb_use_freesolo_api(
220
220
  monkeypatch: pytest.MonkeyPatch,
221
221
  ) -> None:
222
222
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
223
223
  client = FreesoloStorageClient(api_key="fs-test", base_url="https://api.test")
224
224
 
225
- run_id = client.create_run(name="train")
226
- client.update_run(run_id, status="running", config={"phase": "sft"})
227
- client.link_wandb(run_id, "https://wandb.ai/o/p/runs/r")
225
+ client.update_run("run-1", status="running", config={"phase": "sft"})
226
+ client.link_wandb("run-1", "https://wandb.ai/o/p/runs/r")
228
227
 
229
- assert run_id == "run-1"
230
228
  assert _FakeHttpClient.calls[0] == {
231
- "url": "https://api.test/api/runs",
232
- "headers": _auth_headers("fs-test"),
233
- "json": {"name": "train"},
234
- }
235
- assert _FakeHttpClient.calls[1] == {
236
229
  "url": "https://api.test/api/runs/run-1",
237
230
  "headers": _auth_headers("fs-test"),
238
231
  "json": {"status": "running", "config": {"phase": "sft"}},
239
232
  }
240
- assert _FakeHttpClient.calls[2] == {
233
+ assert _FakeHttpClient.calls[1] == {
241
234
  "url": "https://api.test/api/runs/run-1/wandb",
242
235
  "headers": _auth_headers("fs-test"),
243
236
  "json": {"wandb_url": "https://wandb.ai/o/p/runs/r"},
@@ -252,11 +245,10 @@ def test_run_config_includes_agent_job_context(
252
245
  monkeypatch.setenv("FREESOLO_TRAINING_TASK_ID", "task-1")
253
246
  client = FreesoloStorageClient(api_key="fs-test", base_url="https://api.test")
254
247
 
255
- run_id = client.create_run(name="train", config={"phase": "sft"})
256
- client.update_run(run_id, config={"phase": "grpo"})
248
+ client.update_run("run-1", config={"phase": "sft"})
249
+ client.update_run("run-1", config={"phase": "grpo"})
257
250
 
258
251
  assert _FakeHttpClient.calls[0]["json"] == {
259
- "name": "train",
260
252
  "config": {
261
253
  "phase": "sft",
262
254
  "trainingAgentJobId": "job-1",
@@ -306,28 +298,28 @@ def test_storage_client_surfaces_http_error_body(
306
298
  monkeypatch: pytest.MonkeyPatch,
307
299
  ) -> None:
308
300
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
309
- _FakeHttpClient.status_by_suffix = {"/api/runs": 500}
310
- _FakeHttpClient.text_by_suffix = {"/api/runs": "db unavailable"}
301
+ _FakeHttpClient.status_by_suffix = {"/api/runs/run-1": 500}
302
+ _FakeHttpClient.text_by_suffix = {"/api/runs/run-1": "db unavailable"}
311
303
 
312
304
  with pytest.raises(httpx.HTTPStatusError, match="db unavailable"):
313
305
  FreesoloStorageClient(
314
306
  api_key="fs-test",
315
307
  base_url="https://api.test",
316
- ).create_run(name="train")
308
+ ).update_run("run-1", status="running")
317
309
 
318
310
 
319
311
  def test_storage_client_truncates_long_http_error_body(
320
312
  monkeypatch: pytest.MonkeyPatch,
321
313
  ) -> None:
322
314
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
323
- _FakeHttpClient.status_by_suffix = {"/api/runs": 500}
324
- _FakeHttpClient.text_by_suffix = {"/api/runs": "x" * 2_500}
315
+ _FakeHttpClient.status_by_suffix = {"/api/runs/run-1": 500}
316
+ _FakeHttpClient.text_by_suffix = {"/api/runs/run-1": "x" * 2_500}
325
317
 
326
318
  with pytest.raises(httpx.HTTPStatusError) as exc_info:
327
319
  FreesoloStorageClient(
328
320
  api_key="fs-test",
329
321
  base_url="https://api.test",
330
- ).create_run(name="train")
322
+ ).update_run("run-1", status="running")
331
323
 
332
324
  message = str(exc_info.value)
333
325
  assert message.count("x") == 2_000
@@ -338,69 +330,37 @@ def test_storage_client_rejects_missing_response_id(
338
330
  monkeypatch: pytest.MonkeyPatch,
339
331
  ) -> None:
340
332
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
341
- _FakeHttpClient.response_json_by_suffix = {"/api/runs": {"ok": True}}
333
+ _FakeHttpClient.response_json_by_suffix = {"/api/codex-logs": {"ok": True}}
342
334
 
343
- with pytest.raises(ValueError, match="missing run_id"):
335
+ with pytest.raises(ValueError, match="missing codex_log_id"):
344
336
  FreesoloStorageClient(
345
337
  api_key="fs-test",
346
338
  base_url="https://api.test",
347
- ).create_run(name="train")
339
+ ).append_codex_log("Draft agent started")
348
340
 
349
341
 
350
- def test_training_storage_creates_run(
342
+ def test_training_storage_is_disabled_without_platform_run(
351
343
  monkeypatch: pytest.MonkeyPatch,
352
344
  ) -> None:
345
+ """The platform owns run creation: without FREESOLO_TRAINING_RUN_ID the
346
+ handle no-ops instead of creating a run (or requiring credentials)."""
353
347
  monkeypatch.delenv("FREESOLO_TRAINING_RUN_ID", raising=False)
354
348
 
355
- class FakeStorageClient:
356
- def __init__(self) -> None:
357
- self.calls: list[tuple[str, dict[str, Any]]] = []
358
- created_clients.append(self)
359
-
360
- def create_run(self, **kwargs: Any) -> str:
361
- self.calls.append(("create_run", kwargs))
362
- return "run-1"
363
-
364
- def update_run(self, run_id: str, **kwargs: Any) -> None:
365
- self.calls.append(("update_run", {"run_id": run_id, **kwargs}))
349
+ def _fail_client(*_args: Any, **_kwargs: Any) -> None:
350
+ raise AssertionError("storage client must not be constructed when disabled")
366
351
 
367
- created_clients: list[FakeStorageClient] = []
368
- monkeypatch.setattr(
369
- "freesolo.training.storage.FreesoloStorageClient", FakeStorageClient
370
- )
352
+ monkeypatch.setattr("freesolo.training.storage.FreesoloStorageClient", _fail_client)
371
353
 
372
- stored = start_stored_training_run(
354
+ stored = attach_stored_training_run(
373
355
  phase="grpo",
374
- name="full train",
375
- description="long training",
376
356
  config={"learning_rate": 1e-5},
377
357
  )
378
358
 
379
- assert stored.run_id == "run-1"
380
- assert created_clients[0].calls == [
381
- (
382
- "create_run",
383
- {
384
- "name": "full train",
385
- "description": "long training",
386
- "config": {
387
- "learning_rate": 1e-5,
388
- "phase": "grpo",
389
- },
390
- },
391
- ),
392
- (
393
- "update_run",
394
- {
395
- "run_id": "run-1",
396
- "status": "running",
397
- "config": {
398
- "learning_rate": 1e-5,
399
- "phase": "grpo",
400
- },
401
- },
402
- ),
403
- ]
359
+ assert stored.run_id is None
360
+ assert stored.client is None
361
+ stored.link_wandb("https://wandb.ai/acme/proj/runs/run-1")
362
+ stored.complete(config={"final": True})
363
+ stored.fail(RuntimeError("boom"))
404
364
 
405
365
 
406
366
  def test_training_storage_reuses_worker_run_id(
@@ -411,10 +371,6 @@ def test_training_storage_reuses_worker_run_id(
411
371
  self.calls: list[tuple[str, dict[str, Any]]] = []
412
372
  created_clients.append(self)
413
373
 
414
- def create_run(self, **kwargs: Any) -> str:
415
- self.calls.append(("create_run", kwargs))
416
- return "new-run"
417
-
418
374
  def update_run(self, run_id: str, **kwargs: Any) -> None:
419
375
  self.calls.append(("update_run", {"run_id": run_id, **kwargs}))
420
376
 
@@ -424,9 +380,8 @@ def test_training_storage_reuses_worker_run_id(
424
380
  "freesolo.training.storage.FreesoloStorageClient", FakeStorageClient
425
381
  )
426
382
 
427
- stored = start_stored_training_run(
383
+ stored = attach_stored_training_run(
428
384
  phase="sft",
429
- name="full train",
430
385
  config={"learning_rate": 1e-5},
431
386
  )
432
387
 
@@ -441,21 +396,17 @@ def test_training_storage_reuses_worker_run_id(
441
396
  "config": {
442
397
  "learning_rate": 1e-5,
443
398
  "phase": "sft",
444
- "platformRunId": "run-from-worker",
445
399
  },
446
400
  },
447
401
  ),
448
- (
449
- "update_run",
450
- {
451
- "run_id": "run-from-worker",
452
- "status": "completed",
453
- },
454
- ),
455
402
  ]
456
403
 
457
404
 
458
- def test_stored_training_run_complete_and_fail_update_status() -> None:
405
+ def test_stored_training_run_never_sets_terminal_status() -> None:
406
+ """Phases stream config/W&B/errors; the platform terminalizes the run
407
+ when the job ends, so a finished SFT phase cannot mark the shared run
408
+ completed while GRPO is still ahead."""
409
+
459
410
  class FakeClient:
460
411
  def __init__(self) -> None:
461
412
  self.calls: list[tuple[str, dict[str, Any]]] = []
@@ -473,10 +424,12 @@ def test_stored_training_run_complete_and_fail_update_status() -> None:
473
424
 
474
425
  stored.link_wandb("https://wandb.ai/o/p/runs/r")
475
426
  stored.complete()
427
+ stored.complete(config={"final_checkpoint": "tinker://final"})
476
428
  stored.fail(RuntimeError("boom"))
477
429
 
478
430
  assert client.calls == [
479
431
  ("run-1", {"wandb_url": "https://wandb.ai/o/p/runs/r"}),
480
- ("run-1", {"status": "completed"}),
481
- ("run-1", {"status": "failed", "error": "boom"}),
432
+ ("run-1", {"config": {"final_checkpoint": "tinker://final"}}),
433
+ ("run-1", {"error": "boom"}),
482
434
  ]
435
+ assert not any("status" in call[1] for call in client.calls)
@@ -572,7 +572,7 @@ wheels = [
572
572
 
573
573
  [[package]]
574
574
  name = "freesolo"
575
- version = "0.2.43"
575
+ version = "0.2.45"
576
576
  source = { editable = "." }
577
577
  dependencies = [
578
578
  { name = "typing-extensions" },
@@ -1,68 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
- from contextlib import suppress
5
- from dataclasses import dataclass
6
- from typing import Any
7
-
8
- from freesolo.utils.storage import FreesoloStorageClient
9
-
10
- TRAINING_RUN_ID_ENV = "FREESOLO_TRAINING_RUN_ID"
11
-
12
-
13
- @dataclass(frozen=True)
14
- class StoredTrainingRun:
15
- client: FreesoloStorageClient
16
- run_id: str
17
-
18
- def link_wandb(self, wandb_url: str | None) -> None:
19
- self.client.link_wandb(self.run_id, wandb_url)
20
-
21
- def complete(self, config: dict[str, Any] | None = None) -> None:
22
- if config is None:
23
- self.client.update_run(self.run_id, status="completed")
24
- else:
25
- self.client.update_run(self.run_id, status="completed", config=config)
26
-
27
- def fail(self, error: BaseException) -> None:
28
- with suppress(Exception):
29
- # Preserve the original training exception for the caller.
30
- self.client.update_run(self.run_id, status="failed", error=str(error))
31
-
32
-
33
- def start_stored_training_run(
34
- *,
35
- phase: str,
36
- name: str,
37
- description: str | None = None,
38
- config: dict[str, Any] | None = None,
39
- ) -> StoredTrainingRun:
40
- client = FreesoloStorageClient()
41
- initial_config = {
42
- **(config or {}),
43
- "phase": phase,
44
- }
45
- existing_run_id = os.getenv(TRAINING_RUN_ID_ENV, "").strip()
46
- if existing_run_id:
47
- initial_config.setdefault("platformRunId", existing_run_id)
48
- client.update_run(
49
- existing_run_id,
50
- status="running",
51
- config=initial_config,
52
- )
53
- return StoredTrainingRun(
54
- client=client,
55
- run_id=existing_run_id,
56
- )
57
-
58
- run_id = client.create_run(
59
- name=name,
60
- description=description,
61
- config=initial_config,
62
- )
63
- client.update_run(
64
- run_id,
65
- status="running",
66
- config=initial_config,
67
- )
68
- return StoredTrainingRun(client=client, run_id=run_id)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes