freesolo 0.2.44__tar.gz → 0.2.45__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {freesolo-0.2.44 → freesolo-0.2.45}/PKG-INFO +1 -1
  2. {freesolo-0.2.44 → freesolo-0.2.45}/npm/package.json +1 -1
  3. {freesolo-0.2.44 → freesolo-0.2.45}/package.json +1 -1
  4. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/storage.py +18 -19
  5. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/train_grpo.py +2 -4
  6. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/train_sft.py +2 -4
  7. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/storage.py +0 -28
  8. {freesolo-0.2.44 → freesolo-0.2.45}/pyproject.toml +1 -1
  9. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_storage_sync.py +29 -47
  10. {freesolo-0.2.44 → freesolo-0.2.45}/uv.lock +1 -1
  11. {freesolo-0.2.44 → freesolo-0.2.45}/.github/workflows/publish-packages.yml +0 -0
  12. {freesolo-0.2.44 → freesolo-0.2.45}/.github/workflows/python-checks.yml +0 -0
  13. {freesolo-0.2.44 → freesolo-0.2.45}/.github/workflows/sync-package-function-usage.yml +0 -0
  14. {freesolo-0.2.44 → freesolo-0.2.45}/.github/workflows/version-consistency.yml +0 -0
  15. {freesolo-0.2.44 → freesolo-0.2.45}/.gitignore +0 -0
  16. {freesolo-0.2.44 → freesolo-0.2.45}/AGENTS.md +0 -0
  17. {freesolo-0.2.44 → freesolo-0.2.45}/README.md +0 -0
  18. {freesolo-0.2.44 → freesolo-0.2.45}/examples/PROMPT.md +0 -0
  19. {freesolo-0.2.44 → freesolo-0.2.45}/examples/README.md +0 -0
  20. {freesolo-0.2.44 → freesolo-0.2.45}/examples/TRAINING_CONTRACT.md +0 -0
  21. {freesolo-0.2.44 → freesolo-0.2.45}/examples/data/support_eval.jsonl +0 -0
  22. {freesolo-0.2.44 → freesolo-0.2.45}/examples/data/support_train.jsonl +0 -0
  23. {freesolo-0.2.44 → freesolo-0.2.45}/examples/environment.py +0 -0
  24. {freesolo-0.2.44 → freesolo-0.2.45}/examples/evaluation_custom_scorer.py +0 -0
  25. {freesolo-0.2.44 → freesolo-0.2.45}/examples/evaluation_from_files.py +0 -0
  26. {freesolo-0.2.44 → freesolo-0.2.45}/examples/gepa_prompt_example.py +0 -0
  27. {freesolo-0.2.44 → freesolo-0.2.45}/examples/support_dataset.py +0 -0
  28. {freesolo-0.2.44 → freesolo-0.2.45}/examples/tracing_manual_span.py +0 -0
  29. {freesolo-0.2.44 → freesolo-0.2.45}/examples/tracing_multistep_agent.py +0 -0
  30. {freesolo-0.2.44 → freesolo-0.2.45}/examples/training_sft_grpo.py +0 -0
  31. {freesolo-0.2.44 → freesolo-0.2.45}/npm/README.md +0 -0
  32. {freesolo-0.2.44 → freesolo-0.2.45}/npm/bun.lock +0 -0
  33. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/core.d.ts +0 -0
  34. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/core.d.ts.map +0 -0
  35. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/core.js +0 -0
  36. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/evaluation.d.ts +0 -0
  37. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/evaluation.d.ts.map +0 -0
  38. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/evaluation.js +0 -0
  39. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/index.d.ts +0 -0
  40. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/index.d.ts.map +0 -0
  41. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/index.js +0 -0
  42. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/tracing.d.ts +0 -0
  43. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/tracing.d.ts.map +0 -0
  44. {freesolo-0.2.44 → freesolo-0.2.45}/npm/dist/tracing.js +0 -0
  45. {freesolo-0.2.44 → freesolo-0.2.45}/npm/src/core.ts +0 -0
  46. {freesolo-0.2.44 → freesolo-0.2.45}/npm/src/evaluation.ts +0 -0
  47. {freesolo-0.2.44 → freesolo-0.2.45}/npm/src/index.ts +0 -0
  48. {freesolo-0.2.44 → freesolo-0.2.45}/npm/src/tracing.ts +0 -0
  49. {freesolo-0.2.44 → freesolo-0.2.45}/npm/tests/evaluation.test.ts +0 -0
  50. {freesolo-0.2.44 → freesolo-0.2.45}/npm/tests/tracing.test.ts +0 -0
  51. {freesolo-0.2.44 → freesolo-0.2.45}/npm/tsconfig.json +0 -0
  52. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/.gitignore +0 -0
  53. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/README.md +0 -0
  54. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/__init__.py +0 -0
  55. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/contracts/README.md +0 -0
  56. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/contracts/__init__.py +0 -0
  57. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/contracts/markdown.py +0 -0
  58. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/contracts/types.py +0 -0
  59. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/datasets/README.md +0 -0
  60. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/datasets/__init__.py +0 -0
  61. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/datasets/core.py +0 -0
  62. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/datasets/records.py +0 -0
  63. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/datasets/types.py +0 -0
  64. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/environments/README.md +0 -0
  65. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/environments/__init__.py +0 -0
  66. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/environments/base.py +0 -0
  67. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/environments/evaluation.py +0 -0
  68. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/environments/types.py +0 -0
  69. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/README.md +0 -0
  70. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/__init__.py +0 -0
  71. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/client.py +0 -0
  72. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/judges/__init__.py +0 -0
  73. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/judges/base.py +0 -0
  74. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/responses.py +0 -0
  75. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/results.py +0 -0
  76. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/evaluation/types.py +0 -0
  77. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/README.md +0 -0
  78. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/__init__.py +0 -0
  79. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/adapter.py +0 -0
  80. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/reflection.py +0 -0
  81. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/setup.py +0 -0
  82. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/gepa/types.py +0 -0
  83. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/py.typed +0 -0
  84. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/tracing/README.md +0 -0
  85. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/tracing/__init__.py +0 -0
  86. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/tracing/otel.py +0 -0
  87. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/tracing/sanitize.py +0 -0
  88. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/README.md +0 -0
  89. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/__init__.py +0 -0
  90. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/README.md +0 -0
  91. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/__init__.py +0 -0
  92. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/config.py +0 -0
  93. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/datums.py +0 -0
  94. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/rewards.py +0 -0
  95. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/grpo/sampling.py +0 -0
  96. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/types.py +0 -0
  97. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/training/wandb_series.py +0 -0
  98. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/README.md +0 -0
  99. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/__init__.py +0 -0
  100. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/checkpoints.py +0 -0
  101. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/core.py +0 -0
  102. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/hosting.py +0 -0
  103. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/judge.py +0 -0
  104. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/openai.py +0 -0
  105. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/oracle.py +0 -0
  106. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/upload.py +0 -0
  107. {freesolo-0.2.44 → freesolo-0.2.45}/pypi/freesolo/utils/wandb.py +0 -0
  108. {freesolo-0.2.44 → freesolo-0.2.45}/ruff.toml +0 -0
  109. {freesolo-0.2.44 → freesolo-0.2.45}/tests/end_to_end_testing/test_environment_evaluation_flow.py +0 -0
  110. {freesolo-0.2.44 → freesolo-0.2.45}/tests/end_to_end_testing/test_examples.py +0 -0
  111. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_contracts_and_judges.py +0 -0
  112. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_core_utils.py +0 -0
  113. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_datasets.py +0 -0
  114. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_environment_evaluation_edges.py +0 -0
  115. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_evaluation_client.py +0 -0
  116. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_gepa_adapter.py +0 -0
  117. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_grpo_datums_and_sampling.py +0 -0
  118. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_hosting_and_deployment_clients.py +0 -0
  119. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_openai_and_oracle_tokens.py +0 -0
  120. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_package_metadata.py +0 -0
  121. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_records_rewards_and_config.py +0 -0
  122. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_tracing_opentelemetry.py +0 -0
  123. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_train_sft.py +0 -0
  124. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_training_efficiency_fixes.py +0 -0
  125. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_upload.py +0 -0
  126. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_utils_checkpoints.py +0 -0
  127. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_wandb_series.py +0 -0
  128. {freesolo-0.2.44 → freesolo-0.2.45}/tests/functionality/test_wandb_utils.py +0 -0
  129. {freesolo-0.2.44 → freesolo-0.2.45}/tests/security/test_sanitize_and_contract_security.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: freesolo
3
- Version: 0.2.44
3
+ Version: 0.2.45
4
4
  Summary: Tracing, evaluation, and training utilities for LLM applications.
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: typing-extensions>=4.8.0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.44",
3
+ "version": "0.2.45",
4
4
  "description": "Tracing and evaluation utilities for TypeScript LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.44",
3
+ "version": "0.2.45",
4
4
  "description": "Tracing and evaluation utilities for LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./npm/dist/index.js",
@@ -27,26 +27,34 @@ class StoredTrainingRun:
27
27
  self.client.link_wandb(self.run_id, wandb_url)
28
28
 
29
29
  def complete(self, config: dict[str, Any] | None = None) -> None:
30
+ """Record the phase's final config on the shared run.
31
+
32
+ Terminal status is platform-owned: the backend marks the run
33
+ completed/failed when the training job ends, so an SFT phase
34
+ finishing must not flip the shared run to completed while GRPO
35
+ is still ahead.
36
+ """
30
37
  if self.client is None or self.run_id is None:
31
38
  return
32
- if config is None:
33
- self.client.update_run(self.run_id, status="completed")
34
- else:
35
- self.client.update_run(self.run_id, status="completed", config=config)
39
+ if config is not None:
40
+ self.client.update_run(self.run_id, config=config)
36
41
 
37
42
  def fail(self, error: BaseException) -> None:
43
+ """Record the phase failure's error text on the shared run.
44
+
45
+ Status stays platform-owned (the job may retry the phase and
46
+ re-attach); the backend marks the run failed if the job fails.
47
+ """
38
48
  if self.client is None or self.run_id is None:
39
49
  return
40
50
  with suppress(Exception):
41
51
  # Preserve the original training exception for the caller.
42
- self.client.update_run(self.run_id, status="failed", error=str(error))
52
+ self.client.update_run(self.run_id, error=str(error))
43
53
 
44
54
 
45
- def start_stored_training_run(
55
+ def attach_stored_training_run(
46
56
  *,
47
57
  phase: str,
48
- name: str,
49
- description: str | None = None,
50
58
  config: dict[str, Any] | None = None,
51
59
  ) -> StoredTrainingRun:
52
60
  """Attach to the platform run named by ``FREESOLO_TRAINING_RUN_ID``.
@@ -55,25 +63,16 @@ def start_stored_training_run(
55
63
  job, and every SFT/GRPO phase of that job attaches to it, so the run
56
64
  accumulates each phase's status, config, and W&B links. Without the env
57
65
  id (optimizer pilots, ad-hoc local runs) storage sync is disabled and
58
- the returned handle no-ops; ``name`` and ``description`` only label the
59
- current phase in the attached run's config.
66
+ the returned handle no-ops.
60
67
  """
61
68
  run_id = os.getenv(TRAINING_RUN_ID_ENV, "").strip()
62
69
  if not run_id:
63
70
  return StoredTrainingRun(client=None, run_id=None)
64
71
 
65
72
  client = FreesoloStorageClient()
66
- phase_config = {
67
- **(config or {}),
68
- "phase": phase,
69
- "phaseRunName": name,
70
- "platformRunId": run_id,
71
- }
72
- if description:
73
- phase_config.setdefault("phaseDescription", description)
74
73
  client.update_run(
75
74
  run_id,
76
75
  status="running",
77
- config=phase_config,
76
+ config={**(config or {}), "phase": phase},
78
77
  )
79
78
  return StoredTrainingRun(client=client, run_id=run_id)
@@ -29,7 +29,7 @@ from freesolo.training.grpo.sampling import (
29
29
  ensure_sampling_client,
30
30
  session_id_from_tinker_path,
31
31
  )
32
- from freesolo.training.storage import start_stored_training_run
32
+ from freesolo.training.storage import attach_stored_training_run
33
33
  from freesolo.training.types import (
34
34
  DEFAULT_TRAINING_LORA_RANK,
35
35
  TRAINING_BASE_MODEL,
@@ -149,10 +149,8 @@ async def train_grpo_async(
149
149
  "record_count": len(examples),
150
150
  **tinker_run_config(resolved_tinker_base_url),
151
151
  }
152
- stored_run = start_stored_training_run(
152
+ stored_run = attach_stored_training_run(
153
153
  phase="grpo",
154
- name=run_name,
155
- description="Freesolo GRPO training run",
156
154
  config=run_config,
157
155
  )
158
156
  wandb_run = None
@@ -14,7 +14,7 @@ if __package__ in {None, ""}:
14
14
  from freesolo.contracts import ChatMessage, load_contract_text
15
15
  from freesolo.datasets import load_dataset
16
16
  from freesolo.environments.base import load_environment
17
- from freesolo.training.storage import start_stored_training_run
17
+ from freesolo.training.storage import attach_stored_training_run
18
18
  from freesolo.training.types import (
19
19
  TRAINING_BASE_MODEL,
20
20
  SftConfig,
@@ -144,10 +144,8 @@ def _train_sft(
144
144
  "training_example_count": len(training_conversations),
145
145
  **tinker_run_config(resolved_tinker_base_url),
146
146
  }
147
- stored_run = start_stored_training_run(
147
+ stored_run = attach_stored_training_run(
148
148
  phase="sft",
149
- name=run_name,
150
- description="Freesolo SFT training run",
151
149
  config=run_config,
152
150
  )
153
151
  wandb_run = None
@@ -106,34 +106,6 @@ class FreesoloStorageClient:
106
106
  )
107
107
  _json_response(response, AUTH_VERIFY_PATH)
108
108
 
109
- def create_run(
110
- self,
111
- *,
112
- name: str,
113
- description: str | None = None,
114
- version: str | None = None,
115
- evaluator_ids: list[str] | tuple[str, ...] = (),
116
- model_id: str | None = None,
117
- config: dict[str, Any] | None = None,
118
- ) -> str:
119
- payload: JsonObject = {"name": require_non_empty(name, "name")}
120
- if description:
121
- payload["description"] = description
122
- if version:
123
- payload["version"] = version
124
- if model_id:
125
- payload["model_id"] = model_id
126
- if evaluator_ids:
127
- payload["evaluator_ids"] = [str(value) for value in evaluator_ids]
128
- run_config = _training_run_config(config)
129
- if run_config:
130
- payload["config"] = run_config
131
- data = self._post_json(
132
- RUNS_PATH,
133
- payload,
134
- )
135
- return _response_id(data, "run_id")
136
-
137
109
  def update_run(
138
110
  self,
139
111
  run_id: str,
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "freesolo"
7
- version = "0.2.44"
7
+ version = "0.2.45"
8
8
  description = "Tracing, evaluation, and training utilities for LLM applications."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -4,7 +4,7 @@ from typing import Any, ClassVar
4
4
 
5
5
  import httpx
6
6
  import pytest
7
- from freesolo.training.storage import start_stored_training_run
7
+ from freesolo.training.storage import attach_stored_training_run
8
8
  from freesolo.utils.core import _auth_headers
9
9
  from freesolo.utils.storage import FreesoloStorageClient
10
10
 
@@ -139,7 +139,7 @@ def test_storage_client_sets_auth_headers(
139
139
  FreesoloStorageClient(
140
140
  api_key="fs-test",
141
141
  base_url="https://api.test",
142
- ).create_run(name="train")
142
+ ).update_run("run-1", status="running")
143
143
 
144
144
  assert _FakeHttpClient.calls[0]["headers"] == _auth_headers("fs-test")
145
145
 
@@ -156,7 +156,7 @@ def test_storage_client_sets_internal_training_scope_headers(
156
156
  user_id="user-1",
157
157
  org_id="org-A",
158
158
  base_url="https://api.test",
159
- ).create_run(name="train")
159
+ ).update_run("run-1", status="running")
160
160
 
161
161
  assert _FakeHttpClient.calls[0]["headers"] == {
162
162
  "Authorization": "Bearer internal-key",
@@ -216,28 +216,21 @@ def test_verify_api_key_rejects_unauthorized_response(
216
216
  ).verify_api_key()
217
217
 
218
218
 
219
- def test_run_create_update_and_wandb_use_freesolo_api(
219
+ def test_run_update_and_wandb_use_freesolo_api(
220
220
  monkeypatch: pytest.MonkeyPatch,
221
221
  ) -> None:
222
222
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
223
223
  client = FreesoloStorageClient(api_key="fs-test", base_url="https://api.test")
224
224
 
225
- run_id = client.create_run(name="train")
226
- client.update_run(run_id, status="running", config={"phase": "sft"})
227
- client.link_wandb(run_id, "https://wandb.ai/o/p/runs/r")
225
+ client.update_run("run-1", status="running", config={"phase": "sft"})
226
+ client.link_wandb("run-1", "https://wandb.ai/o/p/runs/r")
228
227
 
229
- assert run_id == "run-1"
230
228
  assert _FakeHttpClient.calls[0] == {
231
- "url": "https://api.test/api/runs",
232
- "headers": _auth_headers("fs-test"),
233
- "json": {"name": "train"},
234
- }
235
- assert _FakeHttpClient.calls[1] == {
236
229
  "url": "https://api.test/api/runs/run-1",
237
230
  "headers": _auth_headers("fs-test"),
238
231
  "json": {"status": "running", "config": {"phase": "sft"}},
239
232
  }
240
- assert _FakeHttpClient.calls[2] == {
233
+ assert _FakeHttpClient.calls[1] == {
241
234
  "url": "https://api.test/api/runs/run-1/wandb",
242
235
  "headers": _auth_headers("fs-test"),
243
236
  "json": {"wandb_url": "https://wandb.ai/o/p/runs/r"},
@@ -252,11 +245,10 @@ def test_run_config_includes_agent_job_context(
252
245
  monkeypatch.setenv("FREESOLO_TRAINING_TASK_ID", "task-1")
253
246
  client = FreesoloStorageClient(api_key="fs-test", base_url="https://api.test")
254
247
 
255
- run_id = client.create_run(name="train", config={"phase": "sft"})
256
- client.update_run(run_id, config={"phase": "grpo"})
248
+ client.update_run("run-1", config={"phase": "sft"})
249
+ client.update_run("run-1", config={"phase": "grpo"})
257
250
 
258
251
  assert _FakeHttpClient.calls[0]["json"] == {
259
- "name": "train",
260
252
  "config": {
261
253
  "phase": "sft",
262
254
  "trainingAgentJobId": "job-1",
@@ -306,28 +298,28 @@ def test_storage_client_surfaces_http_error_body(
306
298
  monkeypatch: pytest.MonkeyPatch,
307
299
  ) -> None:
308
300
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
309
- _FakeHttpClient.status_by_suffix = {"/api/runs": 500}
310
- _FakeHttpClient.text_by_suffix = {"/api/runs": "db unavailable"}
301
+ _FakeHttpClient.status_by_suffix = {"/api/runs/run-1": 500}
302
+ _FakeHttpClient.text_by_suffix = {"/api/runs/run-1": "db unavailable"}
311
303
 
312
304
  with pytest.raises(httpx.HTTPStatusError, match="db unavailable"):
313
305
  FreesoloStorageClient(
314
306
  api_key="fs-test",
315
307
  base_url="https://api.test",
316
- ).create_run(name="train")
308
+ ).update_run("run-1", status="running")
317
309
 
318
310
 
319
311
  def test_storage_client_truncates_long_http_error_body(
320
312
  monkeypatch: pytest.MonkeyPatch,
321
313
  ) -> None:
322
314
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
323
- _FakeHttpClient.status_by_suffix = {"/api/runs": 500}
324
- _FakeHttpClient.text_by_suffix = {"/api/runs": "x" * 2_500}
315
+ _FakeHttpClient.status_by_suffix = {"/api/runs/run-1": 500}
316
+ _FakeHttpClient.text_by_suffix = {"/api/runs/run-1": "x" * 2_500}
325
317
 
326
318
  with pytest.raises(httpx.HTTPStatusError) as exc_info:
327
319
  FreesoloStorageClient(
328
320
  api_key="fs-test",
329
321
  base_url="https://api.test",
330
- ).create_run(name="train")
322
+ ).update_run("run-1", status="running")
331
323
 
332
324
  message = str(exc_info.value)
333
325
  assert message.count("x") == 2_000
@@ -338,13 +330,13 @@ def test_storage_client_rejects_missing_response_id(
338
330
  monkeypatch: pytest.MonkeyPatch,
339
331
  ) -> None:
340
332
  monkeypatch.setattr("freesolo.utils.storage.httpx.Client", _FakeHttpClient)
341
- _FakeHttpClient.response_json_by_suffix = {"/api/runs": {"ok": True}}
333
+ _FakeHttpClient.response_json_by_suffix = {"/api/codex-logs": {"ok": True}}
342
334
 
343
- with pytest.raises(ValueError, match="missing run_id"):
335
+ with pytest.raises(ValueError, match="missing codex_log_id"):
344
336
  FreesoloStorageClient(
345
337
  api_key="fs-test",
346
338
  base_url="https://api.test",
347
- ).create_run(name="train")
339
+ ).append_codex_log("Draft agent started")
348
340
 
349
341
 
350
342
  def test_training_storage_is_disabled_without_platform_run(
@@ -359,10 +351,8 @@ def test_training_storage_is_disabled_without_platform_run(
359
351
 
360
352
  monkeypatch.setattr("freesolo.training.storage.FreesoloStorageClient", _fail_client)
361
353
 
362
- stored = start_stored_training_run(
354
+ stored = attach_stored_training_run(
363
355
  phase="grpo",
364
- name="full train",
365
- description="long training",
366
356
  config={"learning_rate": 1e-5},
367
357
  )
368
358
 
@@ -381,10 +371,6 @@ def test_training_storage_reuses_worker_run_id(
381
371
  self.calls: list[tuple[str, dict[str, Any]]] = []
382
372
  created_clients.append(self)
383
373
 
384
- def create_run(self, **kwargs: Any) -> str:
385
- self.calls.append(("create_run", kwargs))
386
- return "new-run"
387
-
388
374
  def update_run(self, run_id: str, **kwargs: Any) -> None:
389
375
  self.calls.append(("update_run", {"run_id": run_id, **kwargs}))
390
376
 
@@ -394,9 +380,8 @@ def test_training_storage_reuses_worker_run_id(
394
380
  "freesolo.training.storage.FreesoloStorageClient", FakeStorageClient
395
381
  )
396
382
 
397
- stored = start_stored_training_run(
383
+ stored = attach_stored_training_run(
398
384
  phase="sft",
399
- name="full train",
400
385
  config={"learning_rate": 1e-5},
401
386
  )
402
387
 
@@ -411,22 +396,17 @@ def test_training_storage_reuses_worker_run_id(
411
396
  "config": {
412
397
  "learning_rate": 1e-5,
413
398
  "phase": "sft",
414
- "phaseRunName": "full train",
415
- "platformRunId": "run-from-worker",
416
399
  },
417
400
  },
418
401
  ),
419
- (
420
- "update_run",
421
- {
422
- "run_id": "run-from-worker",
423
- "status": "completed",
424
- },
425
- ),
426
402
  ]
427
403
 
428
404
 
429
- def test_stored_training_run_complete_and_fail_update_status() -> None:
405
+ def test_stored_training_run_never_sets_terminal_status() -> None:
406
+ """Phases stream config/W&B/errors; the platform terminalizes the run
407
+ when the job ends, so a finished SFT phase cannot mark the shared run
408
+ completed while GRPO is still ahead."""
409
+
430
410
  class FakeClient:
431
411
  def __init__(self) -> None:
432
412
  self.calls: list[tuple[str, dict[str, Any]]] = []
@@ -444,10 +424,12 @@ def test_stored_training_run_complete_and_fail_update_status() -> None:
444
424
 
445
425
  stored.link_wandb("https://wandb.ai/o/p/runs/r")
446
426
  stored.complete()
427
+ stored.complete(config={"final_checkpoint": "tinker://final"})
447
428
  stored.fail(RuntimeError("boom"))
448
429
 
449
430
  assert client.calls == [
450
431
  ("run-1", {"wandb_url": "https://wandb.ai/o/p/runs/r"}),
451
- ("run-1", {"status": "completed"}),
452
- ("run-1", {"status": "failed", "error": "boom"}),
432
+ ("run-1", {"config": {"final_checkpoint": "tinker://final"}}),
433
+ ("run-1", {"error": "boom"}),
453
434
  ]
435
+ assert not any("status" in call[1] for call in client.calls)
@@ -572,7 +572,7 @@ wheels = [
572
572
 
573
573
  [[package]]
574
574
  name = "freesolo"
575
- version = "0.2.44"
575
+ version = "0.2.45"
576
576
  source = { editable = "." }
577
577
  dependencies = [
578
578
  { name = "typing-extensions" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes