freesolo 0.2.45__tar.gz → 0.2.47__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/publish-packages.yml +6 -1
- {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/python-checks.yml +4 -4
- {freesolo-0.2.45 → freesolo-0.2.47}/AGENTS.md +12 -0
- freesolo-0.2.47/PKG-INFO +56 -0
- freesolo-0.2.47/README.md +46 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/README.md +1 -1
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/package.json +1 -1
- {freesolo-0.2.45 → freesolo-0.2.47}/package.json +1 -1
- freesolo-0.2.47/pypi/freesolo/README.md +47 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/README.md +3 -3
- freesolo-0.2.47/pypi/freesolo/datasets/_compat.py +53 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/core.py +1 -3
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/records.py +1 -2
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/types.py +1 -1
- freesolo-0.2.47/pypi/freesolo/environments/README.md +46 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/__init__.py +1 -3
- freesolo-0.2.45/pypi/freesolo/contracts/markdown.py → freesolo-0.2.47/pypi/freesolo/environments/_compat.py +43 -6
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/base.py +10 -14
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/environments/types.py +18 -4
- freesolo-0.2.47/pyproject.toml +65 -0
- freesolo-0.2.47/tests/functionality/test_core_utils.py +68 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_datasets.py +0 -8
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_hosting_and_deployment_clients.py +0 -45
- freesolo-0.2.47/tests/functionality/test_package_metadata.py +28 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/uv.lock +1567 -1456
- freesolo-0.2.45/PKG-INFO +0 -380
- freesolo-0.2.45/README.md +0 -327
- freesolo-0.2.45/examples/PROMPT.md +0 -10
- freesolo-0.2.45/examples/README.md +0 -103
- freesolo-0.2.45/examples/TRAINING_CONTRACT.md +0 -10
- freesolo-0.2.45/examples/data/support_eval.jsonl +0 -3
- freesolo-0.2.45/examples/data/support_train.jsonl +0 -3
- freesolo-0.2.45/examples/environment.py +0 -110
- freesolo-0.2.45/examples/evaluation_custom_scorer.py +0 -105
- freesolo-0.2.45/examples/evaluation_from_files.py +0 -47
- freesolo-0.2.45/examples/gepa_prompt_example.py +0 -76
- freesolo-0.2.45/examples/support_dataset.py +0 -9
- freesolo-0.2.45/examples/tracing_manual_span.py +0 -36
- freesolo-0.2.45/examples/tracing_multistep_agent.py +0 -63
- freesolo-0.2.45/examples/training_sft_grpo.py +0 -82
- freesolo-0.2.45/pypi/freesolo/README.md +0 -60
- freesolo-0.2.45/pypi/freesolo/contracts/README.md +0 -64
- freesolo-0.2.45/pypi/freesolo/contracts/__init__.py +0 -31
- freesolo-0.2.45/pypi/freesolo/contracts/types.py +0 -54
- freesolo-0.2.45/pypi/freesolo/environments/README.md +0 -96
- freesolo-0.2.45/pypi/freesolo/environments/evaluation.py +0 -414
- freesolo-0.2.45/pypi/freesolo/evaluation/README.md +0 -77
- freesolo-0.2.45/pypi/freesolo/evaluation/__init__.py +0 -11
- freesolo-0.2.45/pypi/freesolo/evaluation/client.py +0 -506
- freesolo-0.2.45/pypi/freesolo/evaluation/judges/__init__.py +0 -5
- freesolo-0.2.45/pypi/freesolo/evaluation/judges/base.py +0 -24
- freesolo-0.2.45/pypi/freesolo/evaluation/responses.py +0 -57
- freesolo-0.2.45/pypi/freesolo/evaluation/results.py +0 -94
- freesolo-0.2.45/pypi/freesolo/evaluation/types.py +0 -16
- freesolo-0.2.45/pypi/freesolo/gepa/README.md +0 -40
- freesolo-0.2.45/pypi/freesolo/gepa/__init__.py +0 -17
- freesolo-0.2.45/pypi/freesolo/gepa/adapter.py +0 -305
- freesolo-0.2.45/pypi/freesolo/gepa/reflection.py +0 -88
- freesolo-0.2.45/pypi/freesolo/gepa/setup.py +0 -219
- freesolo-0.2.45/pypi/freesolo/gepa/types.py +0 -123
- freesolo-0.2.45/pypi/freesolo/tracing/README.md +0 -110
- freesolo-0.2.45/pypi/freesolo/tracing/__init__.py +0 -31
- freesolo-0.2.45/pypi/freesolo/tracing/otel.py +0 -655
- freesolo-0.2.45/pypi/freesolo/tracing/sanitize.py +0 -220
- freesolo-0.2.45/pypi/freesolo/training/README.md +0 -118
- freesolo-0.2.45/pypi/freesolo/training/__init__.py +0 -65
- freesolo-0.2.45/pypi/freesolo/training/grpo/README.md +0 -103
- freesolo-0.2.45/pypi/freesolo/training/grpo/__init__.py +0 -5
- freesolo-0.2.45/pypi/freesolo/training/grpo/config.py +0 -91
- freesolo-0.2.45/pypi/freesolo/training/grpo/datums.py +0 -323
- freesolo-0.2.45/pypi/freesolo/training/grpo/rewards.py +0 -78
- freesolo-0.2.45/pypi/freesolo/training/grpo/sampling.py +0 -134
- freesolo-0.2.45/pypi/freesolo/training/storage.py +0 -78
- freesolo-0.2.45/pypi/freesolo/training/train_grpo.py +0 -762
- freesolo-0.2.45/pypi/freesolo/training/train_sft.py +0 -352
- freesolo-0.2.45/pypi/freesolo/training/types.py +0 -156
- freesolo-0.2.45/pypi/freesolo/training/wandb_series.py +0 -70
- freesolo-0.2.45/pypi/freesolo/utils/README.md +0 -53
- freesolo-0.2.45/pypi/freesolo/utils/__init__.py +0 -0
- freesolo-0.2.45/pypi/freesolo/utils/checkpoints.py +0 -284
- freesolo-0.2.45/pypi/freesolo/utils/core.py +0 -289
- freesolo-0.2.45/pypi/freesolo/utils/hosting.py +0 -160
- freesolo-0.2.45/pypi/freesolo/utils/judge.py +0 -207
- freesolo-0.2.45/pypi/freesolo/utils/openai.py +0 -272
- freesolo-0.2.45/pypi/freesolo/utils/oracle.py +0 -576
- freesolo-0.2.45/pypi/freesolo/utils/storage.py +0 -228
- freesolo-0.2.45/pypi/freesolo/utils/upload.py +0 -142
- freesolo-0.2.45/pypi/freesolo/utils/wandb.py +0 -308
- freesolo-0.2.45/pyproject.toml +0 -76
- freesolo-0.2.45/tests/end_to_end_testing/test_examples.py +0 -180
- freesolo-0.2.45/tests/functionality/test_core_utils.py +0 -131
- freesolo-0.2.45/tests/functionality/test_gepa_adapter.py +0 -269
- freesolo-0.2.45/tests/functionality/test_grpo_datums_and_sampling.py +0 -482
- freesolo-0.2.45/tests/functionality/test_package_metadata.py +0 -49
- freesolo-0.2.45/tests/functionality/test_records_rewards_and_config.py +0 -493
- freesolo-0.2.45/tests/functionality/test_storage_sync.py +0 -435
- freesolo-0.2.45/tests/functionality/test_train_sft.py +0 -55
- freesolo-0.2.45/tests/functionality/test_training_efficiency_fixes.py +0 -473
- freesolo-0.2.45/tests/functionality/test_upload.py +0 -97
- freesolo-0.2.45/tests/functionality/test_utils_checkpoints.py +0 -120
- freesolo-0.2.45/tests/functionality/test_wandb_series.py +0 -132
- {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/sync-package-function-usage.yml +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/.github/workflows/version-consistency.yml +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/.gitignore +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/bun.lock +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.d.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.d.ts.map +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/core.js +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.d.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.d.ts.map +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/evaluation.js +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.d.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.d.ts.map +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/index.js +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.d.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.d.ts.map +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/dist/tracing.js +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/core.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/evaluation.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/index.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/src/tracing.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/tests/evaluation.test.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/tests/tracing.test.ts +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/npm/tsconfig.json +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/.gitignore +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/__init__.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/datasets/__init__.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/pypi/freesolo/py.typed +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/ruff.toml +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/end_to_end_testing/test_environment_evaluation_flow.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_contracts_and_judges.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_environment_evaluation_edges.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_evaluation_client.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_openai_and_oracle_tokens.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_tracing_opentelemetry.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/functionality/test_wandb_utils.py +0 -0
- {freesolo-0.2.45 → freesolo-0.2.47}/tests/security/test_sanitize_and_contract_security.py +0 -0
|
@@ -264,7 +264,12 @@ jobs:
|
|
|
264
264
|
echo "::error::NPM_TOKEN is not configured; refusing to skip publish."
|
|
265
265
|
exit 1
|
|
266
266
|
fi
|
|
267
|
-
bun publish
|
|
267
|
+
# bun publish does not pick up NODE_AUTH_TOKEN or ~/.npmrc auth, so
|
|
268
|
+
# publish the bun-built package with npm and a project npmrc.
|
|
269
|
+
umask 077
|
|
270
|
+
printf '//registry.npmjs.org/:_authToken=%s\n' "$NODE_AUTH_TOKEN" > .npmrc
|
|
271
|
+
npm publish --access public
|
|
272
|
+
rm -f .npmrc
|
|
268
273
|
|
|
269
274
|
- name: No npm package changes
|
|
270
275
|
if: github.event_name == 'push' && steps.changes.outputs.npm_changed == 'false'
|
|
@@ -26,16 +26,16 @@ jobs:
|
|
|
26
26
|
run: python3 -m pip install --upgrade uv
|
|
27
27
|
|
|
28
28
|
- name: Install dependencies
|
|
29
|
-
run: uv sync --locked --
|
|
29
|
+
run: uv sync --locked --group dev
|
|
30
30
|
|
|
31
31
|
- name: Python compile check
|
|
32
32
|
run: python3 -m py_compile $(find pypi tests -name '*.py' -print)
|
|
33
33
|
|
|
34
34
|
- name: Ruff check
|
|
35
|
-
run: uv run
|
|
35
|
+
run: uv run python -m ruff check .
|
|
36
36
|
|
|
37
37
|
- name: Ruff format check
|
|
38
|
-
run: uv run
|
|
38
|
+
run: uv run python -m ruff format --check .
|
|
39
39
|
|
|
40
40
|
- name: Tests
|
|
41
|
-
run: uv run
|
|
41
|
+
run: uv run python -m pytest tests
|
|
@@ -21,3 +21,15 @@ This is a Python SDK (`freesolo`) for tracing, evaluating, and training LLM appl
|
|
|
21
21
|
- When running examples outside of tests, set `PYTHONPATH="$PWD/pypi"` so the local source is used.
|
|
22
22
|
- The `--local` flag on examples runs scorers locally without requiring `FREESOLO_API_KEY`.
|
|
23
23
|
- Dev dependencies (`pytest`, `ruff`) are in the `[project.optional-dependencies] dev` group; use `uv sync --dev --extra dev` to install them.
|
|
24
|
+
|
|
25
|
+
### Deployment / auto-pull
|
|
26
|
+
|
|
27
|
+
- The freesolo agent-worker on the deploy VM does **not** install this SDK from
|
|
28
|
+
PyPI — it bind-mounts a host checkout of this repo and imports from source
|
|
29
|
+
(`PYTHONPATH=/freesolo-sdk/pypi`; see `freesolo`'s `docker-compose.yml`).
|
|
30
|
+
- A push to `main` is auto-pulled: the deploy host polls `origin/main` every
|
|
31
|
+
~2 min, fast-forwards `~/freesolo-sdk`, and restarts the agent-worker so it
|
|
32
|
+
re-imports the new code (`freesolo/scripts/sync-sdk.sh`, driven by
|
|
33
|
+
`freesolo-sdk-sync.timer`). So merging to `main` here updates the running
|
|
34
|
+
worker without a manual deploy — but it can interrupt an in-progress job
|
|
35
|
+
(requeue-stale re-enqueues it). Keep `main` deployable.
|
freesolo-0.2.47/PKG-INFO
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: freesolo
|
|
3
|
+
Version: 0.2.47
|
|
4
|
+
Summary: Environment and dataset helpers for Freesolo-generated repos.
|
|
5
|
+
Requires-Python: >=3.10
|
|
6
|
+
Requires-Dist: typing-extensions>=4.8.0
|
|
7
|
+
Provides-Extra: bson
|
|
8
|
+
Requires-Dist: pymongo>=4.0.0; extra == 'bson'
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
|
|
11
|
+
# freesolo
|
|
12
|
+
|
|
13
|
+
`freesolo` is the published Python SDK surface for generated repos: environments and
|
|
14
|
+
datasets used for evaluation and task definition.
|
|
15
|
+
|
|
16
|
+
It is intentionally narrow:
|
|
17
|
+
|
|
18
|
+
- `freesolo.environments`
|
|
19
|
+
- `freesolo.datasets`
|
|
20
|
+
|
|
21
|
+
Everything else (evaluation, tracing, and internal helpers) is kept in the
|
|
22
|
+
repository for internal workflows but is not part of the public SDK contract.
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install freesolo
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
From source:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
cd freesolo-sdk
|
|
34
|
+
export PYTHONPATH="$PWD/pypi"
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Example
|
|
38
|
+
|
|
39
|
+
```python
|
|
40
|
+
from freesolo.datasets import load_dataset
|
|
41
|
+
from freesolo.environments import load_environment
|
|
42
|
+
|
|
43
|
+
dataset = load_dataset("support.jsonl")
|
|
44
|
+
environment = load_environment("freesolo/environment.py:load_environment")
|
|
45
|
+
|
|
46
|
+
print(len(dataset.records))
|
|
47
|
+
print(type(environment).__name__)
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## API Guidance
|
|
51
|
+
|
|
52
|
+
Use `freesolo.datasets` for task examples and `freesolo.environments` for environment
|
|
53
|
+
loading/scoring interfaces.
|
|
54
|
+
|
|
55
|
+
- No command-line help surface is published as part of the SDK contract.
|
|
56
|
+
- Hidden modules remain available in source history for internal tooling only.
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# freesolo
|
|
2
|
+
|
|
3
|
+
`freesolo` is the published Python SDK surface for generated repos: environments and
|
|
4
|
+
datasets used for evaluation and task definition.
|
|
5
|
+
|
|
6
|
+
It is intentionally narrow:
|
|
7
|
+
|
|
8
|
+
- `freesolo.environments`
|
|
9
|
+
- `freesolo.datasets`
|
|
10
|
+
|
|
11
|
+
Everything else (evaluation, tracing, and internal helpers) is kept in the
|
|
12
|
+
repository for internal workflows but is not part of the public SDK contract.
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install freesolo
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
From source:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
cd freesolo-sdk
|
|
24
|
+
export PYTHONPATH="$PWD/pypi"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Example
|
|
28
|
+
|
|
29
|
+
```python
|
|
30
|
+
from freesolo.datasets import load_dataset
|
|
31
|
+
from freesolo.environments import load_environment
|
|
32
|
+
|
|
33
|
+
dataset = load_dataset("support.jsonl")
|
|
34
|
+
environment = load_environment("freesolo/environment.py:load_environment")
|
|
35
|
+
|
|
36
|
+
print(len(dataset.records))
|
|
37
|
+
print(type(environment).__name__)
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## API Guidance
|
|
41
|
+
|
|
42
|
+
Use `freesolo.datasets` for task examples and `freesolo.environments` for environment
|
|
43
|
+
loading/scoring interfaces.
|
|
44
|
+
|
|
45
|
+
- No command-line help surface is published as part of the SDK contract.
|
|
46
|
+
- Hidden modules remain available in source history for internal tooling only.
|
|
@@ -8,7 +8,7 @@ This npm package intentionally contains only:
|
|
|
8
8
|
- tracing helpers for exporting OpenTelemetry spans to Freesolo
|
|
9
9
|
- evaluation primitives and `EvaluationClient`
|
|
10
10
|
|
|
11
|
-
It does not include Freesolo training, datasets, GEPA,
|
|
11
|
+
It does not include Freesolo training, datasets, GEPA, AutoSLM, or generated
|
|
12
12
|
Python training-repo helpers.
|
|
13
13
|
|
|
14
14
|
## Tracing
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Freesolo SDK Package Map
|
|
2
|
+
|
|
3
|
+
This package exposes the public surface for generated repos:
|
|
4
|
+
|
|
5
|
+
- `freesolo.environments` for environment loading, scoring helpers, and episode metadata.
|
|
6
|
+
- `freesolo.datasets` for task-record loading and prompt construction.
|
|
7
|
+
|
|
8
|
+
Everything else in `pypi/freesolo/*` (evaluation, tracing, and utilities) is
|
|
9
|
+
not part of the published SDK public contract.
|
|
10
|
+
|
|
11
|
+
## Public Imports
|
|
12
|
+
|
|
13
|
+
Use these package areas:
|
|
14
|
+
|
|
15
|
+
- `freesolo.environments`: environment interface and environment utilities.
|
|
16
|
+
- `freesolo.datasets`: dataset utilities and task examples.
|
|
17
|
+
|
|
18
|
+
## Install
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
pip install freesolo
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## From a repo checkout
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
cd freesolo-sdk
|
|
28
|
+
export PYTHONPATH="$PWD/pypi"
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
```python
|
|
32
|
+
from freesolo.datasets import load_dataset
|
|
33
|
+
from freesolo.environments import load_environment
|
|
34
|
+
|
|
35
|
+
dataset = load_dataset("support.jsonl")
|
|
36
|
+
environment = load_environment("freesolo/environment.py:load_environment")
|
|
37
|
+
|
|
38
|
+
print(len(dataset.records))
|
|
39
|
+
print(type(environment).__name__)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Public API
|
|
43
|
+
|
|
44
|
+
The root module exports nothing directly. Import from:
|
|
45
|
+
|
|
46
|
+
- `freesolo.environments`
|
|
47
|
+
- `freesolo.datasets`
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# Datasets
|
|
2
2
|
|
|
3
|
-
Dataset helpers turn raw records into `TaskExample` objects and
|
|
4
|
-
|
|
3
|
+
Dataset helpers turn raw records into `TaskExample` objects and generated
|
|
4
|
+
conversation prompts.
|
|
5
5
|
|
|
6
6
|
## Public Imports
|
|
7
7
|
|
|
@@ -28,7 +28,7 @@ Each record must include one task field:
|
|
|
28
28
|
- `query`
|
|
29
29
|
- `input`
|
|
30
30
|
|
|
31
|
-
Labeled
|
|
31
|
+
Labeled records should also include one target field:
|
|
32
32
|
|
|
33
33
|
- `ground_truth`
|
|
34
34
|
- `expected_output`
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from typing import TYPE_CHECKING, TypedDict
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
JsonValue = (
|
|
9
|
+
str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"]
|
|
10
|
+
)
|
|
11
|
+
JsonObject = dict[str, JsonValue]
|
|
12
|
+
MetadataDict = dict[str, JsonValue]
|
|
13
|
+
else:
|
|
14
|
+
JsonValue = object
|
|
15
|
+
JsonObject = dict[str, object]
|
|
16
|
+
MetadataDict = dict[str, object]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ChatMessage(TypedDict):
|
|
20
|
+
role: str
|
|
21
|
+
content: str
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def json_safe_value(value: object) -> JsonValue:
|
|
25
|
+
if value is None or isinstance(value, (str, int, float, bool)):
|
|
26
|
+
return value
|
|
27
|
+
if isinstance(value, Mapping):
|
|
28
|
+
return {str(key): json_safe_value(item) for key, item in value.items()}
|
|
29
|
+
if isinstance(value, (list, tuple, set)):
|
|
30
|
+
return [json_safe_value(item) for item in value]
|
|
31
|
+
try:
|
|
32
|
+
json.dumps(value)
|
|
33
|
+
except (TypeError, ValueError):
|
|
34
|
+
return str(value)
|
|
35
|
+
return value
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def serialize_value(value: object, *, pretty: bool = True) -> str:
|
|
39
|
+
if isinstance(value, str):
|
|
40
|
+
return value.strip()
|
|
41
|
+
payload = json_safe_value(value)
|
|
42
|
+
if pretty:
|
|
43
|
+
return json.dumps(payload, indent=2, sort_keys=True, ensure_ascii=True)
|
|
44
|
+
return json.dumps(payload, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
__all__ = [
|
|
48
|
+
"ChatMessage",
|
|
49
|
+
"JsonObject",
|
|
50
|
+
"JsonValue",
|
|
51
|
+
"MetadataDict",
|
|
52
|
+
"serialize_value",
|
|
53
|
+
]
|
|
@@ -3,9 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
from dataclasses import dataclass
|
|
4
4
|
from typing import Protocol
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from freesolo.utils.core import serialize_value
|
|
8
|
-
|
|
6
|
+
from ._compat import ChatMessage, serialize_value
|
|
9
7
|
from .records import load_task_examples
|
|
10
8
|
from .types import DatasetSource, TaskExample
|
|
11
9
|
|
|
@@ -5,7 +5,7 @@ from dataclasses import dataclass, field
|
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from typing import TypeAlias
|
|
7
7
|
|
|
8
|
-
from
|
|
8
|
+
from ._compat import JsonObject, MetadataDict
|
|
9
9
|
|
|
10
10
|
DatasetRecord = JsonObject
|
|
11
11
|
DatasetMetadata = MetadataDict
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# Freesolo Environments
|
|
2
|
+
|
|
3
|
+
Generated repos should use one canonical environment module:
|
|
4
|
+
|
|
5
|
+
```text
|
|
6
|
+
freesolo/environment.py
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
That file must expose:
|
|
10
|
+
|
|
11
|
+
```python
|
|
12
|
+
def load_environment(
|
|
13
|
+
*,
|
|
14
|
+
contract_path: str | None = None,
|
|
15
|
+
dataset_path: str | None = None,
|
|
16
|
+
reward_command: str | None = None,
|
|
17
|
+
mode: str = "eval",
|
|
18
|
+
**_: object,
|
|
19
|
+
) -> Environment:
|
|
20
|
+
return RepoEnvironment(
|
|
21
|
+
contract_path=contract_path,
|
|
22
|
+
dataset_path=dataset_path,
|
|
23
|
+
reward_command=reward_command,
|
|
24
|
+
mode=mode,
|
|
25
|
+
)
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
For authoring, prefer the explicit branch base:
|
|
29
|
+
|
|
30
|
+
- `EnvironmentSingleTurn` for one prompt → one assistant response tasks.
|
|
31
|
+
- `EnvironmentMultiTurn` for bounded transcript-based tasks.
|
|
32
|
+
|
|
33
|
+
Implement one concrete environment and keep environment loading in `load_environment()`.
|
|
34
|
+
|
|
35
|
+
## Required environment API
|
|
36
|
+
|
|
37
|
+
- `load_environment()` must return `EnvironmentSingleTurn` or `EnvironmentMultiTurn`.
|
|
38
|
+
- `score_response(example, response_text)` must return `RewardResult`.
|
|
39
|
+
- Optionally override `build_prompt_messages`, `extract_response_text`,
|
|
40
|
+
`normalize_response_text`, and episode handlers.
|
|
41
|
+
|
|
42
|
+
## Optional helper
|
|
43
|
+
|
|
44
|
+
`Environment.get_grpo_config()` exists for legacy compatibility. In published
|
|
45
|
+
SDK builds, this helper may return a lightweight object when non-public training
|
|
46
|
+
helpers are unavailable.
|
|
@@ -4,11 +4,9 @@ from .base import (
|
|
|
4
4
|
EnvironmentSingleTurn,
|
|
5
5
|
load_environment,
|
|
6
6
|
)
|
|
7
|
-
from .evaluation import (
|
|
8
|
-
EnvironmentGeneration,
|
|
9
|
-
)
|
|
10
7
|
from .types import (
|
|
11
8
|
EnvironmentEpisode,
|
|
9
|
+
EnvironmentGeneration,
|
|
12
10
|
EnvironmentStepResult,
|
|
13
11
|
EnvironmentTurn,
|
|
14
12
|
RewardMetric,
|
|
@@ -3,9 +3,24 @@ from __future__ import annotations
|
|
|
3
3
|
import json
|
|
4
4
|
import re
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import cast
|
|
6
|
+
from typing import TYPE_CHECKING, TypedDict, cast
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from typing import TypeAlias
|
|
10
|
+
|
|
11
|
+
JsonValue: TypeAlias = (
|
|
12
|
+
str | int | float | bool | None | list["JsonValue"] | dict[str, "JsonValue"]
|
|
13
|
+
)
|
|
14
|
+
MetadataDict = dict[str, JsonValue]
|
|
15
|
+
else:
|
|
16
|
+
JsonValue = object
|
|
17
|
+
MetadataDict = dict[str, object]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ChatMessage(TypedDict):
|
|
21
|
+
role: str
|
|
22
|
+
content: str
|
|
7
23
|
|
|
8
|
-
from .types import ChatMessage, ContractMessageSpec, ContractSpec
|
|
9
24
|
|
|
10
25
|
_FREESOLO_CONTRACT_BLOCK = re.compile(
|
|
11
26
|
r"```(?:json\s+)?freesolo-contract\s*(.*?)```",
|
|
@@ -13,6 +28,21 @@ _FREESOLO_CONTRACT_BLOCK = re.compile(
|
|
|
13
28
|
)
|
|
14
29
|
|
|
15
30
|
|
|
31
|
+
class ContractMessageSpec(TypedDict, total=False):
|
|
32
|
+
role: str
|
|
33
|
+
content: str
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class PromptConfig(TypedDict, total=False):
|
|
37
|
+
system: str
|
|
38
|
+
user: str
|
|
39
|
+
messages: list[ContractMessageSpec]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class ContractSpec(TypedDict, total=False):
|
|
43
|
+
prompt: PromptConfig
|
|
44
|
+
|
|
45
|
+
|
|
16
46
|
def load_contract_text(path: str | Path) -> str:
|
|
17
47
|
return Path(path).read_text(encoding="utf-8").strip()
|
|
18
48
|
|
|
@@ -34,10 +64,6 @@ def extract_contract_spec(contract_text: str) -> ContractSpec | None:
|
|
|
34
64
|
return cast(ContractSpec, parsed)
|
|
35
65
|
|
|
36
66
|
|
|
37
|
-
def load_contract_spec(path: str | Path) -> ContractSpec | None:
|
|
38
|
-
return extract_contract_spec(load_contract_text(path))
|
|
39
|
-
|
|
40
|
-
|
|
41
67
|
def build_oracle_messages(
|
|
42
68
|
task_text: str,
|
|
43
69
|
contract_text: str,
|
|
@@ -105,3 +131,14 @@ def _render_prompt_content(content: str, task_text: str) -> str:
|
|
|
105
131
|
for placeholder in ("{input}", "{task}", "{query}"):
|
|
106
132
|
rendered = rendered.replace(placeholder, task_text)
|
|
107
133
|
return rendered
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
__all__ = [
|
|
137
|
+
"ChatMessage",
|
|
138
|
+
"ContractSpec",
|
|
139
|
+
"JsonValue",
|
|
140
|
+
"MetadataDict",
|
|
141
|
+
"build_oracle_messages",
|
|
142
|
+
"extract_contract_spec",
|
|
143
|
+
"load_contract_text",
|
|
144
|
+
]
|
|
@@ -13,13 +13,8 @@ from types import ModuleType
|
|
|
13
13
|
from typing import TypeVar
|
|
14
14
|
|
|
15
15
|
from freesolo.datasets import TaskExample
|
|
16
|
-
from freesolo.training import GrpoConfig
|
|
17
16
|
|
|
18
|
-
from
|
|
19
|
-
ChatMessage,
|
|
20
|
-
ContractSpec,
|
|
21
|
-
build_oracle_messages,
|
|
22
|
-
)
|
|
17
|
+
from ._compat import ChatMessage, ContractSpec, build_oracle_messages
|
|
23
18
|
from .types import (
|
|
24
19
|
EnvironmentEpisode,
|
|
25
20
|
EnvironmentStepResult,
|
|
@@ -34,7 +29,7 @@ _ScoreItem = TypeVar("_ScoreItem")
|
|
|
34
29
|
|
|
35
30
|
|
|
36
31
|
class Environment(ABC):
|
|
37
|
-
"""Task behavior adapter used by
|
|
32
|
+
"""Task behavior adapter used by generated environments and oracle flows.
|
|
38
33
|
|
|
39
34
|
Single-turn and multi-turn execution are peer branches: the default episode
|
|
40
35
|
hooks implement the direct prompt/response branch, and interactive
|
|
@@ -107,8 +102,8 @@ class Environment(ABC):
|
|
|
107
102
|
contract_spec=contract_spec,
|
|
108
103
|
)
|
|
109
104
|
|
|
110
|
-
def get_grpo_config(self) ->
|
|
111
|
-
return
|
|
105
|
+
def get_grpo_config(self) -> object:
|
|
106
|
+
return object()
|
|
112
107
|
|
|
113
108
|
def extract_response_text(self, parsed_message: object) -> str:
|
|
114
109
|
if isinstance(parsed_message, dict):
|
|
@@ -281,13 +276,14 @@ def _validate_environment(
|
|
|
281
276
|
"freesolo/environment.py with load_environment(...) returning a "
|
|
282
277
|
"subclass of EnvironmentSingleTurn or EnvironmentMultiTurn."
|
|
283
278
|
)
|
|
284
|
-
|
|
285
|
-
|
|
279
|
+
try:
|
|
280
|
+
environment.get_grpo_config()
|
|
281
|
+
except Exception as exc:
|
|
286
282
|
source = f" from {reference!r}" if reference else ""
|
|
287
283
|
raise TypeError(
|
|
288
|
-
f"Environment{source} get_grpo_config() must return
|
|
289
|
-
"
|
|
290
|
-
)
|
|
284
|
+
f"Environment{source} get_grpo_config() must be callable and return"
|
|
285
|
+
" a configuration object"
|
|
286
|
+
) from exc
|
|
291
287
|
return environment
|
|
292
288
|
|
|
293
289
|
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from dataclasses import dataclass, field
|
|
4
|
-
from typing import Literal
|
|
4
|
+
from typing import Literal, TypeAlias
|
|
5
5
|
|
|
6
|
-
from
|
|
7
|
-
from ..utils.core import JsonValue, MetadataDict
|
|
6
|
+
from ._compat import ChatMessage, JsonValue, MetadataDict
|
|
8
7
|
|
|
9
8
|
EnvironmentMetadata = MetadataDict
|
|
10
9
|
RewardReturnType = Literal["binary", "numeric"]
|
|
@@ -34,6 +33,19 @@ class EnvironmentTurn:
|
|
|
34
33
|
return payload
|
|
35
34
|
|
|
36
35
|
|
|
36
|
+
@dataclass(slots=True)
|
|
37
|
+
class EnvironmentGeneration:
|
|
38
|
+
"""Model output returned by an environment callback used during local eval."""
|
|
39
|
+
|
|
40
|
+
response_text: str
|
|
41
|
+
latency_ms: int | None = None
|
|
42
|
+
total_tokens: int | None = None
|
|
43
|
+
metadata: EnvironmentMetadata = field(default_factory=dict)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
GenerationValue: TypeAlias = str | EnvironmentGeneration
|
|
47
|
+
|
|
48
|
+
|
|
37
49
|
@dataclass(frozen=True)
|
|
38
50
|
class EnvironmentStepResult:
|
|
39
51
|
"""Environment response after one assistant action in an episode."""
|
|
@@ -46,7 +58,7 @@ class EnvironmentStepResult:
|
|
|
46
58
|
|
|
47
59
|
@dataclass(frozen=True)
|
|
48
60
|
class EnvironmentEpisode:
|
|
49
|
-
"""Completed trajectory used by evals
|
|
61
|
+
"""Completed trajectory used by evals and environment-driven workflows.
|
|
50
62
|
|
|
51
63
|
A single-turn episode usually has one assistant response. A multi-turn
|
|
52
64
|
episode includes the full observable transcript as ``turns``.
|
|
@@ -130,8 +142,10 @@ class RewardResult:
|
|
|
130
142
|
|
|
131
143
|
__all__ = [
|
|
132
144
|
"EnvironmentEpisode",
|
|
145
|
+
"EnvironmentGeneration",
|
|
133
146
|
"EnvironmentStepResult",
|
|
134
147
|
"EnvironmentTurn",
|
|
148
|
+
"GenerationValue",
|
|
135
149
|
"RewardMetric",
|
|
136
150
|
"RewardResult",
|
|
137
151
|
]
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling>=1.25.0"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "freesolo"
|
|
7
|
+
version = "0.2.47"
|
|
8
|
+
description = "Environment and dataset helpers for Freesolo-generated repos."
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
dependencies = [
|
|
12
|
+
"typing-extensions>=4.8.0",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
[project.optional-dependencies]
|
|
16
|
+
bson = [
|
|
17
|
+
"pymongo>=4.0.0",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[dependency-groups]
|
|
21
|
+
dev = [
|
|
22
|
+
"gepa>=0.1.1; python_version >= '3.10'",
|
|
23
|
+
"httpx>=0.27.0",
|
|
24
|
+
"jsonschema>=4.0.0",
|
|
25
|
+
"numpy>=1.26.0; python_version >= '3.11'",
|
|
26
|
+
"opentelemetry-api>=1.28.0",
|
|
27
|
+
"opentelemetry-exporter-otlp-proto-http>=1.28.0",
|
|
28
|
+
"opentelemetry-sdk>=1.28.0",
|
|
29
|
+
"mypy>=1.13.0",
|
|
30
|
+
"pymongo>=4.0.0",
|
|
31
|
+
"pytest>=8.0.0",
|
|
32
|
+
"python-dotenv>=1.0.0",
|
|
33
|
+
"ruff>=0.11.0",
|
|
34
|
+
"openai>=1.0.0",
|
|
35
|
+
"verifiers>=0.1.14",
|
|
36
|
+
"wandb>=0.17.0; python_version >= '3.10'",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[tool.hatch.build.targets.wheel]
|
|
40
|
+
packages = ["pypi/freesolo"]
|
|
41
|
+
exclude = [
|
|
42
|
+
"pypi/freesolo/evaluation/**",
|
|
43
|
+
"pypi/freesolo/contracts/**",
|
|
44
|
+
"pypi/freesolo/gepa/**",
|
|
45
|
+
"pypi/freesolo/environments/evaluation.py",
|
|
46
|
+
"pypi/freesolo/tracing/**",
|
|
47
|
+
"pypi/freesolo/training/**",
|
|
48
|
+
"pypi/freesolo/utils/**",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[tool.hatch.build.targets.sdist]
|
|
52
|
+
exclude = [
|
|
53
|
+
"pypi/freesolo/evaluation/**",
|
|
54
|
+
"pypi/freesolo/contracts/**",
|
|
55
|
+
"pypi/freesolo/gepa/**",
|
|
56
|
+
"pypi/freesolo/environments/evaluation.py",
|
|
57
|
+
"pypi/freesolo/tracing/**",
|
|
58
|
+
"pypi/freesolo/training/**",
|
|
59
|
+
"pypi/freesolo/utils/**",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
[tool.mypy]
|
|
63
|
+
python_version = "3.10"
|
|
64
|
+
strict = false
|
|
65
|
+
warn_unused_configs = true
|