freesolo 0.2.45__tar.gz → 0.2.47__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/publish-packages.yml +6 -1
  2. {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/python-checks.yml +4 -4
  3. {freesolo-0.2.45 → freesolo-0.2.47}/AGENTS.md +12 -0
  4. freesolo-0.2.47/PKG-INFO +56 -0
  5. freesolo-0.2.47/README.md +46 -0
  6. {freesolo-0.2.45 → freesolo-0.2.47}/npm/README.md +1 -1
  7. {freesolo-0.2.45 → freesolo-0.2.47}/npm/package.json +1 -1
  8. {freesolo-0.2.45 → freesolo-0.2.47}/package.json +1 -1
  9. freesolo-0.2.47/pypi/freesolo/README.md +47 -0
  10. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/README.md +3 -3
  11. freesolo-0.2.47/pypi/freesolo/datasets/_compat.py +53 -0
  12. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/core.py +1 -3
  13. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/records.py +1 -2
  14. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/types.py +1 -1
  15. freesolo-0.2.47/pypi/freesolo/environments/README.md +46 -0
  16. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/__init__.py +1 -3
  17. freesolo-0.2.45/pypi/freesolo/contracts/markdown.py → freesolo-0.2.47/pypi/freesolo/environments/_compat.py +43 -6
  18. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/base.py +10 -14
  19. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/types.py +18 -4
  20. freesolo-0.2.47/pyproject.toml +65 -0
  21. freesolo-0.2.47/tests/functionality/test_core_utils.py +68 -0
  22. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_datasets.py +0 -8
  23. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_hosting_and_deployment_clients.py +0 -45
  24. freesolo-0.2.47/tests/functionality/test_package_metadata.py +28 -0
  25. {freesolo-0.2.45 → freesolo-0.2.47}/uv.lock +1567 -1456
  26. freesolo-0.2.45/PKG-INFO +0 -380
  27. freesolo-0.2.45/README.md +0 -327
  28. freesolo-0.2.45/examples/PROMPT.md +0 -10
  29. freesolo-0.2.45/examples/README.md +0 -103
  30. freesolo-0.2.45/examples/TRAINING_CONTRACT.md +0 -10
  31. freesolo-0.2.45/examples/data/support_eval.jsonl +0 -3
  32. freesolo-0.2.45/examples/data/support_train.jsonl +0 -3
  33. freesolo-0.2.45/examples/environment.py +0 -110
  34. freesolo-0.2.45/examples/evaluation_custom_scorer.py +0 -105
  35. freesolo-0.2.45/examples/evaluation_from_files.py +0 -47
  36. freesolo-0.2.45/examples/gepa_prompt_example.py +0 -76
  37. freesolo-0.2.45/examples/support_dataset.py +0 -9
  38. freesolo-0.2.45/examples/tracing_manual_span.py +0 -36
  39. freesolo-0.2.45/examples/tracing_multistep_agent.py +0 -63
  40. freesolo-0.2.45/examples/training_sft_grpo.py +0 -82
  41. freesolo-0.2.45/pypi/freesolo/README.md +0 -60
  42. freesolo-0.2.45/pypi/freesolo/contracts/README.md +0 -64
  43. freesolo-0.2.45/pypi/freesolo/contracts/__init__.py +0 -31
  44. freesolo-0.2.45/pypi/freesolo/contracts/types.py +0 -54
  45. freesolo-0.2.45/pypi/freesolo/environments/README.md +0 -96
  46. freesolo-0.2.45/pypi/freesolo/environments/evaluation.py +0 -414
  47. freesolo-0.2.45/pypi/freesolo/evaluation/README.md +0 -77
  48. freesolo-0.2.45/pypi/freesolo/evaluation/__init__.py +0 -11
  49. freesolo-0.2.45/pypi/freesolo/evaluation/client.py +0 -506
  50. freesolo-0.2.45/pypi/freesolo/evaluation/judges/__init__.py +0 -5
  51. freesolo-0.2.45/pypi/freesolo/evaluation/judges/base.py +0 -24
  52. freesolo-0.2.45/pypi/freesolo/evaluation/responses.py +0 -57
  53. freesolo-0.2.45/pypi/freesolo/evaluation/results.py +0 -94
  54. freesolo-0.2.45/pypi/freesolo/evaluation/types.py +0 -16
  55. freesolo-0.2.45/pypi/freesolo/gepa/README.md +0 -40
  56. freesolo-0.2.45/pypi/freesolo/gepa/__init__.py +0 -17
  57. freesolo-0.2.45/pypi/freesolo/gepa/adapter.py +0 -305
  58. freesolo-0.2.45/pypi/freesolo/gepa/reflection.py +0 -88
  59. freesolo-0.2.45/pypi/freesolo/gepa/setup.py +0 -219
  60. freesolo-0.2.45/pypi/freesolo/gepa/types.py +0 -123
  61. freesolo-0.2.45/pypi/freesolo/tracing/README.md +0 -110
  62. freesolo-0.2.45/pypi/freesolo/tracing/__init__.py +0 -31
  63. freesolo-0.2.45/pypi/freesolo/tracing/otel.py +0 -655
  64. freesolo-0.2.45/pypi/freesolo/tracing/sanitize.py +0 -220
  65. freesolo-0.2.45/pypi/freesolo/training/README.md +0 -118
  66. freesolo-0.2.45/pypi/freesolo/training/__init__.py +0 -65
  67. freesolo-0.2.45/pypi/freesolo/training/grpo/README.md +0 -103
  68. freesolo-0.2.45/pypi/freesolo/training/grpo/__init__.py +0 -5
  69. freesolo-0.2.45/pypi/freesolo/training/grpo/config.py +0 -91
  70. freesolo-0.2.45/pypi/freesolo/training/grpo/datums.py +0 -323
  71. freesolo-0.2.45/pypi/freesolo/training/grpo/rewards.py +0 -78
  72. freesolo-0.2.45/pypi/freesolo/training/grpo/sampling.py +0 -134
  73. freesolo-0.2.45/pypi/freesolo/training/storage.py +0 -78
  74. freesolo-0.2.45/pypi/freesolo/training/train_grpo.py +0 -762
  75. freesolo-0.2.45/pypi/freesolo/training/train_sft.py +0 -352
  76. freesolo-0.2.45/pypi/freesolo/training/types.py +0 -156
  77. freesolo-0.2.45/pypi/freesolo/training/wandb_series.py +0 -70
  78. freesolo-0.2.45/pypi/freesolo/utils/README.md +0 -53
  79. freesolo-0.2.45/pypi/freesolo/utils/__init__.py +0 -0
  80. freesolo-0.2.45/pypi/freesolo/utils/checkpoints.py +0 -284
  81. freesolo-0.2.45/pypi/freesolo/utils/core.py +0 -289
  82. freesolo-0.2.45/pypi/freesolo/utils/hosting.py +0 -160
  83. freesolo-0.2.45/pypi/freesolo/utils/judge.py +0 -207
  84. freesolo-0.2.45/pypi/freesolo/utils/openai.py +0 -272
  85. freesolo-0.2.45/pypi/freesolo/utils/oracle.py +0 -576
  86. freesolo-0.2.45/pypi/freesolo/utils/storage.py +0 -228
  87. freesolo-0.2.45/pypi/freesolo/utils/upload.py +0 -142
  88. freesolo-0.2.45/pypi/freesolo/utils/wandb.py +0 -308
  89. freesolo-0.2.45/pyproject.toml +0 -76
  90. freesolo-0.2.45/tests/end_to_end_testing/test_examples.py +0 -180
  91. freesolo-0.2.45/tests/functionality/test_core_utils.py +0 -131
  92. freesolo-0.2.45/tests/functionality/test_gepa_adapter.py +0 -269
  93. freesolo-0.2.45/tests/functionality/test_grpo_datums_and_sampling.py +0 -482
  94. freesolo-0.2.45/tests/functionality/test_package_metadata.py +0 -49
  95. freesolo-0.2.45/tests/functionality/test_records_rewards_and_config.py +0 -493
  96. freesolo-0.2.45/tests/functionality/test_storage_sync.py +0 -435
  97. freesolo-0.2.45/tests/functionality/test_train_sft.py +0 -55
  98. freesolo-0.2.45/tests/functionality/test_training_efficiency_fixes.py +0 -473
  99. freesolo-0.2.45/tests/functionality/test_upload.py +0 -97
  100. freesolo-0.2.45/tests/functionality/test_utils_checkpoints.py +0 -120
  101. freesolo-0.2.45/tests/functionality/test_wandb_series.py +0 -132
  102. {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/sync-package-function-usage.yml +0 -0
  103. {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/version-consistency.yml +0 -0
  104. {freesolo-0.2.45 → freesolo-0.2.47}/.gitignore +0 -0
  105. {freesolo-0.2.45 → freesolo-0.2.47}/npm/bun.lock +0 -0
  106. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.d.ts +0 -0
  107. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.d.ts.map +0 -0
  108. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.js +0 -0
  109. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.d.ts +0 -0
  110. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.d.ts.map +0 -0
  111. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.js +0 -0
  112. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.d.ts +0 -0
  113. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.d.ts.map +0 -0
  114. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.js +0 -0
  115. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.d.ts +0 -0
  116. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.d.ts.map +0 -0
  117. {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.js +0 -0
  118. {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/core.ts +0 -0
  119. {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/evaluation.ts +0 -0
  120. {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/index.ts +0 -0
  121. {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/tracing.ts +0 -0
  122. {freesolo-0.2.45 → freesolo-0.2.47}/npm/tests/evaluation.test.ts +0 -0
  123. {freesolo-0.2.45 → freesolo-0.2.47}/npm/tests/tracing.test.ts +0 -0
  124. {freesolo-0.2.45 → freesolo-0.2.47}/npm/tsconfig.json +0 -0
  125. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/.gitignore +0 -0
  126. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/__init__.py +0 -0
  127. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/__init__.py +0 -0
  128. {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/py.typed +0 -0
  129. {freesolo-0.2.45 → freesolo-0.2.47}/ruff.toml +0 -0
  130. {freesolo-0.2.45 → freesolo-0.2.47}/tests/end_to_end_testing/test_environment_evaluation_flow.py +0 -0
  131. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_contracts_and_judges.py +0 -0
  132. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_environment_evaluation_edges.py +0 -0
  133. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_evaluation_client.py +0 -0
  134. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_openai_and_oracle_tokens.py +0 -0
  135. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_tracing_opentelemetry.py +0 -0
  136. {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_wandb_utils.py +0 -0
  137. {freesolo-0.2.45 → freesolo-0.2.47}/tests/security/test_sanitize_and_contract_security.py +0 -0
@@ -264,7 +264,12 @@ jobs:
264
264
  echo "::error::NPM_TOKEN is not configured; refusing to skip publish."
265
265
  exit 1
266
266
  fi
267
- bun publish --access public
267
+ # bun publish does not pick up NODE_AUTH_TOKEN or ~/.npmrc auth, so
268
+ # publish the bun-built package with npm and a project npmrc.
269
+ umask 077
270
+ printf '//registry.npmjs.org/:_authToken=%s\n' "$NODE_AUTH_TOKEN" > .npmrc
271
+ npm publish --access public
272
+ rm -f .npmrc
268
273
 
269
274
  - name: No npm package changes
270
275
  if: github.event_name == 'push' && steps.changes.outputs.npm_changed == 'false'
@@ -26,16 +26,16 @@ jobs:
26
26
  run: python3 -m pip install --upgrade uv
27
27
 
28
28
  - name: Install dependencies
29
- run: uv sync --locked --extra dev
29
+ run: uv sync --locked --group dev
30
30
 
31
31
  - name: Python compile check
32
32
  run: python3 -m py_compile $(find pypi tests -name '*.py' -print)
33
33
 
34
34
  - name: Ruff check
35
- run: uv run --extra dev python -m ruff check .
35
+ run: uv run python -m ruff check .
36
36
 
37
37
  - name: Ruff format check
38
- run: uv run --extra dev python -m ruff format --check .
38
+ run: uv run python -m ruff format --check .
39
39
 
40
40
  - name: Tests
41
- run: uv run --extra dev python -m pytest tests
41
+ run: uv run python -m pytest tests
@@ -21,3 +21,15 @@ This is a Python SDK (`freesolo`) for tracing, evaluating, and training LLM appl
21
21
  - When running examples outside of tests, set `PYTHONPATH="$PWD/pypi"` so the local source is used.
22
22
  - The `--local` flag on examples runs scorers locally without requiring `FREESOLO_API_KEY`.
23
23
  - Dev dependencies (`pytest`, `ruff`) are in the `[project.optional-dependencies] dev` group; use `uv sync --dev --extra dev` to install them.
24
+
25
+ ### Deployment / auto-pull
26
+
27
+ - The freesolo agent-worker on the deploy VM does **not** install this SDK from
28
+ PyPI — it bind-mounts a host checkout of this repo and imports from source
29
+ (`PYTHONPATH=/freesolo-sdk/pypi`; see `freesolo`'s `docker-compose.yml`).
30
+ - A push to `main` is auto-pulled: the deploy host polls `origin/main` every
31
+ ~2 min, fast-forwards `~/freesolo-sdk`, and restarts the agent-worker so it
32
+ re-imports the new code (`freesolo/scripts/sync-sdk.sh`, driven by
33
+ `freesolo-sdk-sync.timer`). So merging to `main` here updates the running
34
+ worker without a manual deploy — but it can interrupt an in-progress job
35
+ (requeue-stale re-enqueues it). Keep `main` deployable.
@@ -0,0 +1,56 @@
1
+ Metadata-Version: 2.4
2
+ Name: freesolo
3
+ Version: 0.2.47
4
+ Summary: Environment and dataset helpers for Freesolo-generated repos.
5
+ Requires-Python: >=3.10
6
+ Requires-Dist: typing-extensions>=4.8.0
7
+ Provides-Extra: bson
8
+ Requires-Dist: pymongo>=4.0.0; extra == 'bson'
9
+ Description-Content-Type: text/markdown
10
+
11
+ # freesolo
12
+
13
+ `freesolo` is the published Python SDK surface for generated repos: environments and
14
+ datasets used for evaluation and task definition.
15
+
16
+ It is intentionally narrow:
17
+
18
+ - `freesolo.environments`
19
+ - `freesolo.datasets`
20
+
21
+ Everything else (evaluation, tracing, and internal helpers) is kept in the
22
+ repository for internal workflows but is not part of the public SDK contract.
23
+
24
+ ## Install
25
+
26
+ ```bash
27
+ pip install freesolo
28
+ ```
29
+
30
+ From source:
31
+
32
+ ```bash
33
+ cd freesolo-sdk
34
+ export PYTHONPATH="$PWD/pypi"
35
+ ```
36
+
37
+ ## Example
38
+
39
+ ```python
40
+ from freesolo.datasets import load_dataset
41
+ from freesolo.environments import load_environment
42
+
43
+ dataset = load_dataset("support.jsonl")
44
+ environment = load_environment("freesolo/environment.py:load_environment")
45
+
46
+ print(len(dataset.records))
47
+ print(type(environment).__name__)
48
+ ```
49
+
50
+ ## API Guidance
51
+
52
+ Use `freesolo.datasets` for task examples and `freesolo.environments` for environment
53
+ loading/scoring interfaces.
54
+
55
+ - No command-line help surface is published as part of the SDK contract.
56
+ - Hidden modules remain available in source history for internal tooling only.
@@ -0,0 +1,46 @@
1
+ # freesolo
2
+
3
+ `freesolo` is the published Python SDK surface for generated repos: environments and
4
+ datasets used for evaluation and task definition.
5
+
6
+ It is intentionally narrow:
7
+
8
+ - `freesolo.environments`
9
+ - `freesolo.datasets`
10
+
11
+ Everything else (evaluation, tracing, and internal helpers) is kept in the
12
+ repository for internal workflows but is not part of the public SDK contract.
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pip install freesolo
18
+ ```
19
+
20
+ From source:
21
+
22
+ ```bash
23
+ cd freesolo-sdk
24
+ export PYTHONPATH="$PWD/pypi"
25
+ ```
26
+
27
+ ## Example
28
+
29
+ ```python
30
+ from freesolo.datasets import load_dataset
31
+ from freesolo.environments import load_environment
32
+
33
+ dataset = load_dataset("support.jsonl")
34
+ environment = load_environment("freesolo/environment.py:load_environment")
35
+
36
+ print(len(dataset.records))
37
+ print(type(environment).__name__)
38
+ ```
39
+
40
+ ## API Guidance
41
+
42
+ Use `freesolo.datasets` for task examples and `freesolo.environments` for environment
43
+ loading/scoring interfaces.
44
+
45
+ - No command-line help surface is published as part of the SDK contract.
46
+ - Hidden modules remain available in source history for internal tooling only.
@@ -8,7 +8,7 @@ This npm package intentionally contains only:
8
8
  - tracing helpers for exporting OpenTelemetry spans to Freesolo
9
9
  - evaluation primitives and `EvaluationClient`
10
10
 
11
- It does not include Freesolo training, datasets, GEPA, Tinker, or generated
11
+ It does not include Freesolo training, datasets, GEPA, AutoSLM, or generated
12
12
  Python training-repo helpers.
13
13
 
14
14
  ## Tracing
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.45",
3
+ "version": "0.2.46",
4
4
  "description": "Tracing and evaluation utilities for TypeScript LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@freesolo/sdk",
3
- "version": "0.2.45",
3
+ "version": "0.2.47",
4
4
  "description": "Tracing and evaluation utilities for LLM applications.",
5
5
  "type": "module",
6
6
  "main": "./npm/dist/index.js",
@@ -0,0 +1,47 @@
1
+ # Freesolo SDK Package Map
2
+
3
+ This package exposes the public surface for generated repos:
4
+
5
+ - `freesolo.environments` for environment loading, scoring helpers, and episode metadata.
6
+ - `freesolo.datasets` for task-record loading and prompt construction.
7
+
8
+ Everything else in `pypi/freesolo/*` (evaluation, tracing, and utilities) is
9
+ not part of the published SDK public contract.
10
+
11
+ ## Public Imports
12
+
13
+ Use these package areas:
14
+
15
+ - `freesolo.environments`: environment interface and environment utilities.
16
+ - `freesolo.datasets`: dataset utilities and task examples.
17
+
18
+ ## Install
19
+
20
+ ```bash
21
+ pip install freesolo
22
+ ```
23
+
24
+ ## From a repo checkout
25
+
26
+ ```bash
27
+ cd freesolo-sdk
28
+ export PYTHONPATH="$PWD/pypi"
29
+ ```
30
+
31
+ ```python
32
+ from freesolo.datasets import load_dataset
33
+ from freesolo.environments import load_environment
34
+
35
+ dataset = load_dataset("support.jsonl")
36
+ environment = load_environment("freesolo/environment.py:load_environment")
37
+
38
+ print(len(dataset.records))
39
+ print(type(environment).__name__)
40
+ ```
41
+
42
+ ## Public API
43
+
44
+ The root module exports nothing directly. Import from:
45
+
46
+ - `freesolo.environments`
47
+ - `freesolo.datasets`
@@ -1,7 +1,7 @@
1
1
  # Datasets
2
2
 
3
- Dataset helpers turn raw records into `TaskExample` objects and SFT
4
- conversations.
3
+ Dataset helpers turn raw records into `TaskExample` objects and generated
4
+ conversation prompts.
5
5
 
6
6
  ## Public Imports
7
7
 
@@ -28,7 +28,7 @@ Each record must include one task field:
28
28
  - `query`
29
29
  - `input`
30
30
 
31
- Labeled/SFT records should also include one target field:
31
+ Labeled records should also include one target field:
32
32
 
33
33
  - `ground_truth`
34
34
  - `expected_output`
@@ -0,0 +1,53 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from collections.abc import Mapping
5
+ from typing import TYPE_CHECKING, TypedDict
6
+
7
+ if TYPE_CHECKING:
8
+ JsonValue = (
9
+ str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"]
10
+ )
11
+ JsonObject = dict[str, JsonValue]
12
+ MetadataDict = dict[str, JsonValue]
13
+ else:
14
+ JsonValue = object
15
+ JsonObject = dict[str, object]
16
+ MetadataDict = dict[str, object]
17
+
18
+
19
+ class ChatMessage(TypedDict):
20
+ role: str
21
+ content: str
22
+
23
+
24
+ def json_safe_value(value: object) -> JsonValue:
25
+ if value is None or isinstance(value, (str, int, float, bool)):
26
+ return value
27
+ if isinstance(value, Mapping):
28
+ return {str(key): json_safe_value(item) for key, item in value.items()}
29
+ if isinstance(value, (list, tuple, set)):
30
+ return [json_safe_value(item) for item in value]
31
+ try:
32
+ json.dumps(value)
33
+ except (TypeError, ValueError):
34
+ return str(value)
35
+ return value
36
+
37
+
38
+ def serialize_value(value: object, *, pretty: bool = True) -> str:
39
+ if isinstance(value, str):
40
+ return value.strip()
41
+ payload = json_safe_value(value)
42
+ if pretty:
43
+ return json.dumps(payload, indent=2, sort_keys=True, ensure_ascii=True)
44
+ return json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
45
+
46
+
47
+ __all__ = [
48
+ "ChatMessage",
49
+ "JsonObject",
50
+ "JsonValue",
51
+ "MetadataDict",
52
+ "serialize_value",
53
+ ]
@@ -3,9 +3,7 @@ from __future__ import annotations
3
3
  from dataclasses import dataclass
4
4
  from typing import Protocol
5
5
 
6
- from freesolo.contracts.markdown import ChatMessage
7
- from freesolo.utils.core import serialize_value
8
-
6
+ from ._compat import ChatMessage, serialize_value
9
7
  from .records import load_task_examples
10
8
  from .types import DatasetSource, TaskExample
11
9
 
@@ -5,8 +5,7 @@ import json
5
5
  from pathlib import Path
6
6
  from typing import Any, TypedDict
7
7
 
8
- from freesolo.utils.core import serialize_value
9
-
8
+ from ._compat import serialize_value
10
9
  from .types import DatasetRecord, DatasetSource, TaskExample
11
10
 
12
11
  try:
@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
5
5
  from pathlib import Path
6
6
  from typing import TypeAlias
7
7
 
8
- from freesolo.utils.core import JsonObject, MetadataDict
8
+ from ._compat import JsonObject, MetadataDict
9
9
 
10
10
  DatasetRecord = JsonObject
11
11
  DatasetMetadata = MetadataDict
@@ -0,0 +1,46 @@
1
+ # Freesolo Environments
2
+
3
+ Generated repos should use one canonical environment module:
4
+
5
+ ```text
6
+ freesolo/environment.py
7
+ ```
8
+
9
+ That file must expose:
10
+
11
+ ```python
12
+ def load_environment(
13
+ *,
14
+ contract_path: str | None = None,
15
+ dataset_path: str | None = None,
16
+ reward_command: str | None = None,
17
+ mode: str = "eval",
18
+ **_: object,
19
+ ) -> Environment:
20
+ return RepoEnvironment(
21
+ contract_path=contract_path,
22
+ dataset_path=dataset_path,
23
+ reward_command=reward_command,
24
+ mode=mode,
25
+ )
26
+ ```
27
+
28
+ For authoring, prefer the explicit branch base:
29
+
30
+ - `EnvironmentSingleTurn` for one prompt → one assistant response tasks.
31
+ - `EnvironmentMultiTurn` for bounded transcript-based tasks.
32
+
33
+ Implement one concrete environment and keep environment loading in `load_environment()`.
34
+
35
+ ## Required environment API
36
+
37
+ - `load_environment()` must return `EnvironmentSingleTurn` or `EnvironmentMultiTurn`.
38
+ - `score_response(example, response_text)` must return `RewardResult`.
39
+ - Optionally override `build_prompt_messages`, `extract_response_text`,
40
+ `normalize_response_text`, and episode handlers.
41
+
42
+ ## Optional helper
43
+
44
+ `Environment.get_grpo_config()` exists for legacy compatibility. In published
45
+ SDK builds, this helper may return a lightweight object when non-public training
46
+ helpers are unavailable.
@@ -4,11 +4,9 @@ from .base import (
4
4
  EnvironmentSingleTurn,
5
5
  load_environment,
6
6
  )
7
- from .evaluation import (
8
- EnvironmentGeneration,
9
- )
10
7
  from .types import (
11
8
  EnvironmentEpisode,
9
+ EnvironmentGeneration,
12
10
  EnvironmentStepResult,
13
11
  EnvironmentTurn,
14
12
  RewardMetric,
@@ -3,9 +3,24 @@ from __future__ import annotations
3
3
  import json
4
4
  import re
5
5
  from pathlib import Path
6
- from typing import cast
6
+ from typing import TYPE_CHECKING, TypedDict, cast
7
+
8
+ if TYPE_CHECKING:
9
+ from typing import TypeAlias
10
+
11
+ JsonValue: TypeAlias = (
12
+ str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"]
13
+ )
14
+ MetadataDict = dict[str, JsonValue]
15
+ else:
16
+ JsonValue = object
17
+ MetadataDict = dict[str, object]
18
+
19
+
20
+ class ChatMessage(TypedDict):
21
+ role: str
22
+ content: str
7
23
 
8
- from .types import ChatMessage, ContractMessageSpec, ContractSpec
9
24
 
10
25
  _FREESOLO_CONTRACT_BLOCK = re.compile(
11
26
  r"```(?:json\s+)?freesolo-contract\s*(.*?)```",
@@ -13,6 +28,21 @@ _FREESOLO_CONTRACT_BLOCK = re.compile(
13
28
  )
14
29
 
15
30
 
31
+ class ContractMessageSpec(TypedDict, total=False):
32
+ role: str
33
+ content: str
34
+
35
+
36
+ class PromptConfig(TypedDict, total=False):
37
+ system: str
38
+ user: str
39
+ messages: list[ContractMessageSpec]
40
+
41
+
42
+ class ContractSpec(TypedDict, total=False):
43
+ prompt: PromptConfig
44
+
45
+
16
46
  def load_contract_text(path: str | Path) -> str:
17
47
  return Path(path).read_text(encoding="utf-8").strip()
18
48
 
@@ -34,10 +64,6 @@ def extract_contract_spec(contract_text: str) -> ContractSpec | None:
34
64
  return cast(ContractSpec, parsed)
35
65
 
36
66
 
37
- def load_contract_spec(path: str | Path) -> ContractSpec | None:
38
- return extract_contract_spec(load_contract_text(path))
39
-
40
-
41
67
  def build_oracle_messages(
42
68
  task_text: str,
43
69
  contract_text: str,
@@ -105,3 +131,14 @@ def _render_prompt_content(content: str, task_text: str) -> str:
105
131
  for placeholder in ("{input}", "{task}", "{query}"):
106
132
  rendered = rendered.replace(placeholder, task_text)
107
133
  return rendered
134
+
135
+
136
+ __all__ = [
137
+ "ChatMessage",
138
+ "ContractSpec",
139
+ "JsonValue",
140
+ "MetadataDict",
141
+ "build_oracle_messages",
142
+ "extract_contract_spec",
143
+ "load_contract_text",
144
+ ]
@@ -13,13 +13,8 @@ from types import ModuleType
13
13
  from typing import TypeVar
14
14
 
15
15
  from freesolo.datasets import TaskExample
16
- from freesolo.training import GrpoConfig
17
16
 
18
- from ..contracts.markdown import (
19
- ChatMessage,
20
- ContractSpec,
21
- build_oracle_messages,
22
- )
17
+ from ._compat import ChatMessage, ContractSpec, build_oracle_messages
23
18
  from .types import (
24
19
  EnvironmentEpisode,
25
20
  EnvironmentStepResult,
@@ -34,7 +29,7 @@ _ScoreItem = TypeVar("_ScoreItem")
34
29
 
35
30
 
36
31
  class Environment(ABC):
37
- """Task behavior adapter used by GRPO/RL, GEPA, and oracle generation.
32
+ """Task behavior adapter used by generated environments and oracle flows.
38
33
 
39
34
  Single-turn and multi-turn execution are peer branches: the default episode
40
35
  hooks implement the direct prompt/response branch, and interactive
@@ -107,8 +102,8 @@ class Environment(ABC):
107
102
  contract_spec=contract_spec,
108
103
  )
109
104
 
110
- def get_grpo_config(self) -> GrpoConfig:
111
- return GrpoConfig()
105
+ def get_grpo_config(self) -> object:
106
+ return object()
112
107
 
113
108
  def extract_response_text(self, parsed_message: object) -> str:
114
109
  if isinstance(parsed_message, dict):
@@ -281,13 +276,14 @@ def _validate_environment(
281
276
  "freesolo/environment.py with load_environment(...) returning a "
282
277
  "subclass of EnvironmentSingleTurn or EnvironmentMultiTurn."
283
278
  )
284
- grpo_config = environment.get_grpo_config()
285
- if not isinstance(grpo_config, GrpoConfig):
279
+ try:
280
+ environment.get_grpo_config()
281
+ except Exception as exc:
286
282
  source = f" from {reference!r}" if reference else ""
287
283
  raise TypeError(
288
- f"Environment{source} get_grpo_config() must return "
289
- "freesolo.training.GrpoConfig"
290
- )
284
+ f"Environment{source} get_grpo_config() must be callable and return"
285
+ " a configuration object"
286
+ ) from exc
291
287
  return environment
292
288
 
293
289
 
@@ -1,10 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from dataclasses import dataclass, field
4
- from typing import Literal
4
+ from typing import Literal, TypeAlias
5
5
 
6
- from ..contracts.markdown import ChatMessage
7
- from ..utils.core import JsonValue, MetadataDict
6
+ from ._compat import ChatMessage, JsonValue, MetadataDict
8
7
 
9
8
  EnvironmentMetadata = MetadataDict
10
9
  RewardReturnType = Literal["binary", "numeric"]
@@ -34,6 +33,19 @@ class EnvironmentTurn:
34
33
  return payload
35
34
 
36
35
 
36
+ @dataclass(slots=True)
37
+ class EnvironmentGeneration:
38
+ """Model output returned by an environment callback used during local eval."""
39
+
40
+ response_text: str
41
+ latency_ms: int | None = None
42
+ total_tokens: int | None = None
43
+ metadata: EnvironmentMetadata = field(default_factory=dict)
44
+
45
+
46
+ GenerationValue: TypeAlias = str | EnvironmentGeneration
47
+
48
+
37
49
  @dataclass(frozen=True)
38
50
  class EnvironmentStepResult:
39
51
  """Environment response after one assistant action in an episode."""
@@ -46,7 +58,7 @@ class EnvironmentStepResult:
46
58
 
47
59
  @dataclass(frozen=True)
48
60
  class EnvironmentEpisode:
49
- """Completed trajectory used by evals, GEPA, tracing, and GRPO.
61
+ """Completed trajectory used by evals and environment-driven workflows.
50
62
 
51
63
  A single-turn episode usually has one assistant response. A multi-turn
52
64
  episode includes the full observable transcript as ``turns``.
@@ -130,8 +142,10 @@ class RewardResult:
130
142
 
131
143
  __all__ = [
132
144
  "EnvironmentEpisode",
145
+ "EnvironmentGeneration",
133
146
  "EnvironmentStepResult",
134
147
  "EnvironmentTurn",
148
+ "GenerationValue",
135
149
  "RewardMetric",
136
150
  "RewardResult",
137
151
  ]
@@ -0,0 +1,65 @@
1
+ [build-system]
2
+ requires = ["hatchling>=1.25.0"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "freesolo"
7
+ version = "0.2.47"
8
+ description = "Environment and dataset helpers for Freesolo-generated repos."
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ dependencies = [
12
+ "typing-extensions>=4.8.0",
13
+ ]
14
+
15
+ [project.optional-dependencies]
16
+ bson = [
17
+ "pymongo>=4.0.0",
18
+ ]
19
+
20
+ [dependency-groups]
21
+ dev = [
22
+ "gepa>=0.1.1; python_version >= '3.10'",
23
+ "httpx>=0.27.0",
24
+ "jsonschema>=4.0.0",
25
+ "numpy>=1.26.0; python_version >= '3.11'",
26
+ "opentelemetry-api>=1.28.0",
27
+ "opentelemetry-exporter-otlp-proto-http>=1.28.0",
28
+ "opentelemetry-sdk>=1.28.0",
29
+ "mypy>=1.13.0",
30
+ "pymongo>=4.0.0",
31
+ "pytest>=8.0.0",
32
+ "python-dotenv>=1.0.0",
33
+ "ruff>=0.11.0",
34
+ "openai>=1.0.0",
35
+ "verifiers>=0.1.14",
36
+ "wandb>=0.17.0; python_version >= '3.10'",
37
+ ]
38
+
39
+ [tool.hatch.build.targets.wheel]
40
+ packages = ["pypi/freesolo"]
41
+ exclude = [
42
+ "pypi/freesolo/evaluation/**",
43
+ "pypi/freesolo/contracts/**",
44
+ "pypi/freesolo/gepa/**",
45
+ "pypi/freesolo/environments/evaluation.py",
46
+ "pypi/freesolo/tracing/**",
47
+ "pypi/freesolo/training/**",
48
+ "pypi/freesolo/utils/**",
49
+ ]
50
+
51
+ [tool.hatch.build.targets.sdist]
52
+ exclude = [
53
+ "pypi/freesolo/evaluation/**",
54
+ "pypi/freesolo/contracts/**",
55
+ "pypi/freesolo/gepa/**",
56
+ "pypi/freesolo/environments/evaluation.py",
57
+ "pypi/freesolo/tracing/**",
58
+ "pypi/freesolo/training/**",
59
+ "pypi/freesolo/utils/**",
60
+ ]
61
+
62
+ [tool.mypy]
63
+ python_version = "3.10"
64
+ strict = false
65
+ warn_unused_configs = true