freesolo 0.2.3__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- freesolo-0.2.4/.github/workflows/publish-packages.yml +96 -0
- freesolo-0.2.4/.github/workflows/python-checks.yml +41 -0
- freesolo-0.2.4/.github/workflows/sync-package-function-usage.yml +38 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/PKG-INFO +89 -141
- {freesolo-0.2.3 → freesolo-0.2.4}/README.md +77 -135
- freesolo-0.2.4/examples/PROMPT.md +10 -0
- freesolo-0.2.4/examples/README.md +96 -0
- freesolo-0.2.4/examples/TRAINING_CONTRACT.md +10 -0
- freesolo-0.2.4/examples/data/support_eval.jsonl +3 -0
- freesolo-0.2.4/examples/data/support_train.jsonl +3 -0
- freesolo-0.2.4/examples/environment.py +110 -0
- freesolo-0.2.4/examples/evaluation_custom_scorer.py +105 -0
- freesolo-0.2.4/examples/evaluation_from_files.py +47 -0
- freesolo-0.2.4/examples/gepa_prompt_example.py +76 -0
- freesolo-0.2.4/examples/support_dataset.py +20 -0
- freesolo-0.2.4/examples/tracing_manual_span.py +47 -0
- freesolo-0.2.4/examples/training_sft_grpo.py +75 -0
- freesolo-0.2.4/function_usage_registry.json +12 -0
- freesolo-0.2.4/pypi/freesolo/__init__.py +1 -0
- freesolo-0.2.4/pypi/freesolo/_usage.py +39 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/contracts/markdown.py +1 -5
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/contracts/types.py +0 -1
- freesolo-0.2.4/pypi/freesolo/datasets/__init__.py +13 -0
- freesolo-0.2.4/pypi/freesolo/datasets/core.py +74 -0
- freesolo-0.2.4/pypi/freesolo/datasets/records.py +139 -0
- freesolo-0.2.4/pypi/freesolo/datasets/types.py +26 -0
- freesolo-0.2.4/pypi/freesolo/environments/__init__.py +17 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/environments/base.py +5 -78
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/environments/evaluation.py +94 -57
- freesolo-0.2.4/pypi/freesolo/environments/types.py +79 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/__init__.py +0 -15
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/client.py +20 -16
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/__init__.py +0 -8
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/base.py +1 -48
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/groundedness.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/instruction_following.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/pairwise_preference.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/reference_correctness.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/judges/rubric.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/results.py +1 -1
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/types.py +1 -1
- freesolo-0.2.4/pypi/freesolo/gepa/__init__.py +17 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/gepa/adapter.py +32 -29
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/gepa/reflection.py +20 -5
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/gepa/setup.py +23 -31
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/gepa/types.py +8 -15
- freesolo-0.2.4/pypi/freesolo/tracing/__init__.py +13 -0
- freesolo-0.2.4/pypi/freesolo/tracing/otel.py +250 -0
- freesolo-0.2.4/pypi/freesolo/training/__init__.py +10 -0
- freesolo-0.2.4/pypi/freesolo/training/grpo/__init__.py +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/grpo/config.py +0 -6
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/grpo/datums.py +0 -9
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/grpo/rewards.py +0 -6
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/grpo/sampling.py +0 -8
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/storage.py +1 -4
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/train_grpo.py +7 -11
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/train_sft.py +33 -13
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/training/types.py +12 -0
- freesolo-0.2.4/pypi/freesolo/utils/__init__.py +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/utils/checkpoints.py +0 -23
- freesolo-0.2.3/pypi/freesolo/util.py → freesolo-0.2.4/pypi/freesolo/utils/core.py +15 -222
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/utils/openrouter.py +0 -37
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/utils/oracle.py +3 -4
- {freesolo-0.2.3/pypi/freesolo → freesolo-0.2.4/pypi/freesolo/utils}/storage.py +78 -37
- freesolo-0.2.4/pypi/freesolo/utils/upload.py +60 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/utils/wandb.py +0 -41
- {freesolo-0.2.3 → freesolo-0.2.4}/pyproject.toml +12 -7
- {freesolo-0.2.3 → freesolo-0.2.4}/ruff.toml +3 -0
- freesolo-0.2.4/scripts/sync_package_function_usage.py +81 -0
- freesolo-0.2.4/tests/end_to_end_testing/test_environment_evaluation_flow.py +140 -0
- freesolo-0.2.4/tests/end_to_end_testing/test_examples.py +141 -0
- freesolo-0.2.4/tests/functionality/test_datasets.py +113 -0
- {freesolo-0.2.3/tests → freesolo-0.2.4/tests/functionality}/test_evaluation_client.py +5 -4
- {freesolo-0.2.3/tests → freesolo-0.2.4/tests/functionality}/test_gepa_adapter.py +54 -16
- freesolo-0.2.4/tests/functionality/test_records_rewards_and_config.py +126 -0
- {freesolo-0.2.3/tests → freesolo-0.2.4/tests/functionality}/test_storage_sync.py +143 -20
- freesolo-0.2.4/tests/functionality/test_tracing_opentelemetry.py +128 -0
- freesolo-0.2.4/tests/functionality/test_upload.py +97 -0
- freesolo-0.2.4/tests/functionality/test_usage_registry.py +63 -0
- freesolo-0.2.4/tests/security/test_sanitize_and_contract_security.py +60 -0
- freesolo-0.2.4/uv.lock +3328 -0
- freesolo-0.2.3/pypi/examples/.env.example +0 -10
- freesolo-0.2.3/pypi/examples/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/anthropic/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/anthropic/chat.py +0 -56
- freesolo-0.2.3/pypi/examples/anthropic/vision.py +0 -82
- freesolo-0.2.3/pypi/examples/evals/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/evals/exact_match.py +0 -105
- freesolo-0.2.3/pypi/examples/evals/llm_judge.py +0 -149
- freesolo-0.2.3/pypi/examples/gemini/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/gemini/chat.py +0 -53
- freesolo-0.2.3/pypi/examples/gemini/vision.py +0 -79
- freesolo-0.2.3/pypi/examples/openai/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/openai/chat.py +0 -56
- freesolo-0.2.3/pypi/examples/openai/vision.py +0 -64
- freesolo-0.2.3/pypi/examples/openrouter/__init__.py +0 -1
- freesolo-0.2.3/pypi/examples/openrouter/chat.py +0 -60
- freesolo-0.2.3/pypi/examples/utils.py +0 -231
- freesolo-0.2.3/pypi/freesolo/__init__.py +0 -61
- freesolo-0.2.3/pypi/freesolo/environments/__init__.py +0 -32
- freesolo-0.2.3/pypi/freesolo/environments/types.py +0 -157
- freesolo-0.2.3/pypi/freesolo/gepa/__init__.py +0 -47
- freesolo-0.2.3/pypi/freesolo/tracing/__init__.py +0 -31
- freesolo-0.2.3/pypi/freesolo/tracing/client.py +0 -548
- freesolo-0.2.3/pypi/freesolo/tracing/decorators.py +0 -66
- freesolo-0.2.3/pypi/freesolo/tracing/providers/__init__.py +0 -14
- freesolo-0.2.3/pypi/freesolo/tracing/providers/anthropic.py +0 -111
- freesolo-0.2.3/pypi/freesolo/tracing/providers/config.py +0 -101
- freesolo-0.2.3/pypi/freesolo/tracing/providers/gemini.py +0 -205
- freesolo-0.2.3/pypi/freesolo/tracing/providers/openai.py +0 -208
- freesolo-0.2.3/pypi/freesolo/tracing/providers/utils.py +0 -276
- freesolo-0.2.3/pypi/freesolo/tracing/types.py +0 -79
- freesolo-0.2.3/pypi/freesolo/training/__init__.py +0 -14
- freesolo-0.2.3/pypi/freesolo/training/grpo/__init__.py +0 -20
- freesolo-0.2.3/pypi/freesolo/utils/__init__.py +0 -119
- freesolo-0.2.3/pypi/freesolo/utils/deployment.py +0 -70
- freesolo-0.2.3/uv.lock +0 -1204
- {freesolo-0.2.3 → freesolo-0.2.4}/.env.example +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/.gitignore +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/.gitignore +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/contracts/__init__.py +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/evaluation/responses.py +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/py.typed +0 -0
- {freesolo-0.2.3 → freesolo-0.2.4}/pypi/freesolo/tracing/sanitize.py +0 -0
- {freesolo-0.2.3/tests → freesolo-0.2.4/tests/functionality}/test_utils_checkpoints.py +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
name: Publish packages
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
paths:
|
|
8
|
+
- "pyproject.toml"
|
|
9
|
+
- "uv.lock"
|
|
10
|
+
- "pypi/**"
|
|
11
|
+
- "examples/**"
|
|
12
|
+
- ".github/workflows/publish-packages.yml"
|
|
13
|
+
workflow_dispatch:
|
|
14
|
+
|
|
15
|
+
concurrency:
|
|
16
|
+
group: publish-packages-${{ github.ref }}
|
|
17
|
+
cancel-in-progress: false
|
|
18
|
+
|
|
19
|
+
jobs:
|
|
20
|
+
publish-pypi:
|
|
21
|
+
name: Publish PyPI package
|
|
22
|
+
runs-on: ubuntu-latest
|
|
23
|
+
permissions:
|
|
24
|
+
contents: read
|
|
25
|
+
env:
|
|
26
|
+
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
|
|
27
|
+
|
|
28
|
+
steps:
|
|
29
|
+
- uses: actions/checkout@v6
|
|
30
|
+
|
|
31
|
+
- uses: actions/setup-python@v6
|
|
32
|
+
with:
|
|
33
|
+
python-version: "3.12"
|
|
34
|
+
|
|
35
|
+
- name: Read package metadata
|
|
36
|
+
id: metadata
|
|
37
|
+
run: |
|
|
38
|
+
python - <<'PY' >> "$GITHUB_OUTPUT"
|
|
39
|
+
import tomllib
|
|
40
|
+
|
|
41
|
+
with open("pyproject.toml", "rb") as f:
|
|
42
|
+
project = tomllib.load(f)["project"]
|
|
43
|
+
|
|
44
|
+
print(f"name={project['name']}")
|
|
45
|
+
print(f"version={project['version']}")
|
|
46
|
+
PY
|
|
47
|
+
|
|
48
|
+
- name: Check PyPI for existing version
|
|
49
|
+
id: pypi
|
|
50
|
+
env:
|
|
51
|
+
PACKAGE_NAME: ${{ steps.metadata.outputs.name }}
|
|
52
|
+
PACKAGE_VERSION: ${{ steps.metadata.outputs.version }}
|
|
53
|
+
run: |
|
|
54
|
+
python - <<'PY' >> "$GITHUB_OUTPUT"
|
|
55
|
+
import os
|
|
56
|
+
import urllib.error
|
|
57
|
+
import urllib.request
|
|
58
|
+
|
|
59
|
+
name = os.environ["PACKAGE_NAME"]
|
|
60
|
+
version = os.environ["PACKAGE_VERSION"]
|
|
61
|
+
url = f"https://pypi.org/pypi/{name}/{version}/json"
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
with urllib.request.urlopen(url, timeout=30) as response:
|
|
65
|
+
exists = response.status == 200
|
|
66
|
+
except urllib.error.HTTPError as error:
|
|
67
|
+
if error.code != 404:
|
|
68
|
+
raise
|
|
69
|
+
exists = False
|
|
70
|
+
|
|
71
|
+
print(f"exists={'true' if exists else 'false'}")
|
|
72
|
+
PY
|
|
73
|
+
|
|
74
|
+
- name: Skip existing PyPI version
|
|
75
|
+
if: steps.pypi.outputs.exists == 'true'
|
|
76
|
+
run: echo "${{ steps.metadata.outputs.name }} ${{ steps.metadata.outputs.version }} is already on PyPI."
|
|
77
|
+
|
|
78
|
+
- name: Install uv
|
|
79
|
+
if: steps.pypi.outputs.exists != 'true'
|
|
80
|
+
run: python -m pip install --upgrade uv
|
|
81
|
+
|
|
82
|
+
- name: Build distributions
|
|
83
|
+
if: steps.pypi.outputs.exists != 'true'
|
|
84
|
+
run: |
|
|
85
|
+
rm -rf dist
|
|
86
|
+
uv build
|
|
87
|
+
|
|
88
|
+
- name: Publish to PyPI
|
|
89
|
+
if: steps.pypi.outputs.exists != 'true' && env.UV_PUBLISH_TOKEN != ''
|
|
90
|
+
run: uv publish
|
|
91
|
+
|
|
92
|
+
- name: Skip publish without PyPI token
|
|
93
|
+
if: steps.pypi.outputs.exists != 'true' && env.UV_PUBLISH_TOKEN == ''
|
|
94
|
+
run: |
|
|
95
|
+
echo "PYPI_API_TOKEN is not configured; built distributions but skipped upload."
|
|
96
|
+
echo "Add a PYPI_API_TOKEN repository secret to publish this package."
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
name: Python checks
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
push:
|
|
6
|
+
branches:
|
|
7
|
+
- main
|
|
8
|
+
workflow_dispatch:
|
|
9
|
+
|
|
10
|
+
permissions:
|
|
11
|
+
contents: read
|
|
12
|
+
|
|
13
|
+
jobs:
|
|
14
|
+
checks:
|
|
15
|
+
name: Ruff and tests
|
|
16
|
+
runs-on: ubuntu-latest
|
|
17
|
+
|
|
18
|
+
steps:
|
|
19
|
+
- uses: actions/checkout@v6
|
|
20
|
+
|
|
21
|
+
- uses: actions/setup-python@v6
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Install uv
|
|
26
|
+
run: python3 -m pip install --upgrade uv
|
|
27
|
+
|
|
28
|
+
- name: Install dependencies
|
|
29
|
+
run: uv sync --locked --extra dev
|
|
30
|
+
|
|
31
|
+
- name: Python compile check
|
|
32
|
+
run: python3 -m py_compile $(find pypi tests -name '*.py' -print)
|
|
33
|
+
|
|
34
|
+
- name: Ruff check
|
|
35
|
+
run: uv run --extra dev python -m ruff check .
|
|
36
|
+
|
|
37
|
+
- name: Ruff format check
|
|
38
|
+
run: uv run --extra dev python -m ruff format --check .
|
|
39
|
+
|
|
40
|
+
- name: Tests
|
|
41
|
+
run: uv run --extra dev python -m pytest tests
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
name: Sync package function usage
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches:
|
|
6
|
+
- main
|
|
7
|
+
paths:
|
|
8
|
+
- "function_usage_registry.json"
|
|
9
|
+
- "scripts/sync_package_function_usage.py"
|
|
10
|
+
- ".github/workflows/sync-package-function-usage.yml"
|
|
11
|
+
workflow_dispatch:
|
|
12
|
+
|
|
13
|
+
permissions:
|
|
14
|
+
contents: read
|
|
15
|
+
|
|
16
|
+
jobs:
|
|
17
|
+
sync:
|
|
18
|
+
name: Sync usage registry
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
if: ${{ github.ref == 'refs/heads/main' }}
|
|
21
|
+
env:
|
|
22
|
+
SUPABASE_URL: ${{ secrets.SUPABASE_URL }}
|
|
23
|
+
SUPABASE_SERVICE_ROLE_KEY: ${{ secrets.SUPABASE_SERVICE_ROLE_KEY }}
|
|
24
|
+
|
|
25
|
+
steps:
|
|
26
|
+
- uses: actions/checkout@v6
|
|
27
|
+
|
|
28
|
+
- uses: actions/setup-python@v6
|
|
29
|
+
with:
|
|
30
|
+
python-version: "3.12"
|
|
31
|
+
|
|
32
|
+
- name: Sync package function rows
|
|
33
|
+
if: env.SUPABASE_URL != '' && env.SUPABASE_SERVICE_ROLE_KEY != ''
|
|
34
|
+
run: python scripts/sync_package_function_usage.py --remove-stale
|
|
35
|
+
|
|
36
|
+
- name: Skip without Supabase secrets
|
|
37
|
+
if: env.SUPABASE_URL == '' || env.SUPABASE_SERVICE_ROLE_KEY == ''
|
|
38
|
+
run: echo "SUPABASE_URL and SUPABASE_SERVICE_ROLE_KEY secrets are required to sync usage."
|
|
@@ -1,19 +1,25 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: freesolo
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.4
|
|
4
4
|
Summary: Tracing, evaluation, and training utilities for LLM applications.
|
|
5
|
-
Requires-Python: >=3.
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: gepa>=0.1.1
|
|
6
7
|
Requires-Dist: httpx>=0.27.0
|
|
8
|
+
Requires-Dist: jsonschema>=4.0.0
|
|
9
|
+
Requires-Dist: numpy>=1.26.0
|
|
10
|
+
Requires-Dist: opentelemetry-api>=1.28.0
|
|
11
|
+
Requires-Dist: opentelemetry-exporter-otlp-proto-http>=1.28.0
|
|
12
|
+
Requires-Dist: opentelemetry-sdk>=1.28.0
|
|
13
|
+
Requires-Dist: pymongo>=4.0.0
|
|
14
|
+
Requires-Dist: python-dotenv>=1.0.0
|
|
15
|
+
Requires-Dist: tinker-cookbook>=0.3.0
|
|
16
|
+
Requires-Dist: tinker>=0.19.0
|
|
7
17
|
Requires-Dist: wandb>=0.17.0
|
|
8
18
|
Provides-Extra: dev
|
|
9
19
|
Requires-Dist: pytest>=8.0.0; extra == 'dev'
|
|
10
20
|
Requires-Dist: ruff>=0.11.0; extra == 'dev'
|
|
11
21
|
Provides-Extra: examples
|
|
12
|
-
Requires-Dist: anthropic>=0.40.0; extra == 'examples'
|
|
13
|
-
Requires-Dist: google-genai>=1.0.0; extra == 'examples'
|
|
14
22
|
Requires-Dist: openai>=1.0.0; extra == 'examples'
|
|
15
|
-
Provides-Extra: gepa
|
|
16
|
-
Requires-Dist: gepa>=0.1.1; extra == 'gepa'
|
|
17
23
|
Description-Content-Type: text/markdown
|
|
18
24
|
|
|
19
25
|
# freesolo
|
|
@@ -24,36 +30,15 @@ It is built for the lowest-friction integration possible:
|
|
|
24
30
|
|
|
25
31
|
1. Install the package
|
|
26
32
|
2. Set `FREESOLO_API_KEY`
|
|
27
|
-
3.
|
|
33
|
+
3. Configure the tracer
|
|
28
34
|
4. Run traces and evaluations from the package APIs
|
|
29
35
|
|
|
30
|
-
## Current provider support
|
|
31
|
-
|
|
32
|
-
`freesolo` currently supports automatic client instrumentation for:
|
|
33
|
-
|
|
34
|
-
- OpenAI
|
|
35
|
-
- Anthropic
|
|
36
|
-
- Gemini
|
|
37
|
-
- OpenAI-compatible clients via `wrap(...)` / `wrap_provider(...)`
|
|
38
|
-
|
|
39
36
|
## Install
|
|
40
37
|
|
|
41
|
-
Install the package
|
|
42
|
-
|
|
43
|
-
```bash
|
|
44
|
-
pip install freesolo openai
|
|
45
|
-
```
|
|
46
|
-
|
|
47
|
-
or
|
|
48
|
-
|
|
49
|
-
```bash
|
|
50
|
-
pip install freesolo anthropic
|
|
51
|
-
```
|
|
52
|
-
|
|
53
|
-
or
|
|
38
|
+
Install the package:
|
|
54
39
|
|
|
55
40
|
```bash
|
|
56
|
-
pip install freesolo
|
|
41
|
+
pip install freesolo
|
|
57
42
|
```
|
|
58
43
|
|
|
59
44
|
## Environment
|
|
@@ -68,107 +53,77 @@ export FREESOLO_API_KEY=fslo_...
|
|
|
68
53
|
## Quickstart
|
|
69
54
|
|
|
70
55
|
```python
|
|
71
|
-
from
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
model
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
)
|
|
86
|
-
|
|
87
|
-
print(result.output_text or "")
|
|
56
|
+
from freesolo.tracing import configure_tracer, get_tracer
|
|
57
|
+
|
|
58
|
+
configure_tracer(service_name="my-llm-app")
|
|
59
|
+
tracer = get_tracer()
|
|
60
|
+
|
|
61
|
+
with tracer.start_as_current_span(
|
|
62
|
+
"model.call",
|
|
63
|
+
attributes={
|
|
64
|
+
"gen_ai.system": "openai",
|
|
65
|
+
"gen_ai.request.model": "gpt-5.5",
|
|
66
|
+
"freesolo.input": {"prompt": "How do I reset my password?"},
|
|
67
|
+
},
|
|
68
|
+
) as span:
|
|
69
|
+
result = "Reset it from account settings."
|
|
70
|
+
span.set_attribute("freesolo.output", result)
|
|
88
71
|
```
|
|
89
72
|
|
|
90
|
-
##
|
|
73
|
+
## Runnable Examples
|
|
91
74
|
|
|
92
|
-
|
|
93
|
-
from openai import OpenAI
|
|
94
|
-
from freesolo import wrap
|
|
75
|
+
Copy-pasteable examples live in [`examples/`](examples/):
|
|
95
76
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
77
|
+
- `tracing_manual_span.py`: configure OpenTelemetry and send one application span.
|
|
78
|
+
- `evaluation_custom_scorer.py`: run custom binary and numeric eval scorers.
|
|
79
|
+
- `evaluation_from_files.py`: run evals from a concrete dataset and environment.
|
|
80
|
+
- `environment.py`: example environment used by evals, training, and GEPA.
|
|
81
|
+
- `support_dataset.py`: example dataset paths and loaders used by evals, SFT, GRPO, and GEPA.
|
|
82
|
+
- `gepa_prompt_example.py`: run the Freesolo GEPA adapter over the example dataset.
|
|
83
|
+
- `training_sft_grpo.py`: start SFT or GRPO training runs from package APIs.
|
|
102
84
|
|
|
103
|
-
|
|
104
|
-
model="openai/gpt-4.1-mini",
|
|
105
|
-
messages=[
|
|
106
|
-
{"role": "system", "content": "Reply in plain text."},
|
|
107
|
-
{"role": "user", "content": "Write a one-sentence launch blurb."},
|
|
108
|
-
],
|
|
109
|
-
max_tokens=120,
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
print(response.choices[0].message.content or "")
|
|
113
|
-
```
|
|
114
|
-
|
|
115
|
-
## Gemini Quickstart
|
|
116
|
-
|
|
117
|
-
```python
|
|
118
|
-
from google import genai
|
|
119
|
-
from freesolo import instrument_gemini
|
|
85
|
+
From a repo checkout:
|
|
120
86
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
contents="Write a one-sentence release note for traced Gemini support.",
|
|
126
|
-
)
|
|
127
|
-
|
|
128
|
-
print(response.text or "")
|
|
87
|
+
```bash
|
|
88
|
+
cd freesolo-sdk
|
|
89
|
+
export PYTHONPATH="$PWD/pypi"
|
|
90
|
+
uv run python examples/evaluation_custom_scorer.py --local
|
|
129
91
|
```
|
|
130
92
|
|
|
131
|
-
##
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
with
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
max_tokens=64,
|
|
152
|
-
messages=[{"role": "user", "content": "Say goodbye"}],
|
|
153
|
-
)
|
|
154
|
-
```
|
|
93
|
+
## Public API
|
|
94
|
+
|
|
95
|
+
The root `freesolo` module intentionally exports no functions. Import from the
|
|
96
|
+
subpackages below; lower-level modules may be importable, but they are
|
|
97
|
+
implementation helpers unless they appear here or in an example.
|
|
98
|
+
|
|
99
|
+
| Import | Use case |
|
|
100
|
+
| --- | --- |
|
|
101
|
+
| `freesolo.tracing.configure_tracer`, `get_tracer`, `force_flush`, `shutdown` | Send OpenTelemetry traces from an application to Freesolo. |
|
|
102
|
+
| `freesolo.evaluation.EvaluationClient` | Run custom-scorer evals or environment evals and upload results to Freesolo. |
|
|
103
|
+
| `freesolo.evaluation.run_local_evaluation` | Run custom scorers locally without uploading results. |
|
|
104
|
+
| `freesolo.evaluation.CustomScorer`, `BinaryResponse`, `NumericResponse` | Define local scorer logic for eval rows. |
|
|
105
|
+
| `freesolo.evaluation.HostedJudgeClient` and hosted scorer classes | Use hosted LLM-as-judge scorers with OpenRouter-compatible credentials. |
|
|
106
|
+
| `freesolo.datasets.TaskExample`, `Dataset`, `load_dataset` | Load task examples and construct labeled conversations for evals or training. |
|
|
107
|
+
| `freesolo.environments.Environment`, `RewardResult`, `RewardMetric`, `GrpoConfig`, `EnvironmentGeneration` | Define task behavior once for evals, GEPA, SFT, and GRPO. |
|
|
108
|
+
| `freesolo.training.SftConfig`, `TrainGrpoOptions`, `train_sft`, `train_grpo` | Start SFT or GRPO training from package APIs. |
|
|
109
|
+
| `freesolo.gepa.GEPASetup`, `GEPAConfig`, `DefaultReflectionAgent`, `attach_gepa`, `optimize_gepa` | Optimize prompts through the GEPA adapter using the same environment and dataset abstractions. |
|
|
110
|
+
| `freesolo.contracts.load_contract_text`, `extract_contract_spec`, `load_contract_spec`, `build_oracle_messages` | Read contract markdown and build oracle prompt messages. |
|
|
111
|
+
| `freesolo.utils.oracle.generate_ground_truth_records` | Generate ground-truth JSONL records from source examples using a contract, environment, and oracle model. |
|
|
112
|
+
| `freesolo.utils.upload.upload_tinker_checkpoint_to_huggingface` | Upload a Tinker checkpoint to a private Hugging Face model repo. |
|
|
155
113
|
|
|
156
114
|
## What Gets Stored
|
|
157
115
|
|
|
158
|
-
-
|
|
159
|
-
-
|
|
160
|
-
-
|
|
161
|
-
-
|
|
162
|
-
-
|
|
163
|
-
- Image inputs with inline previews for the trace UI
|
|
116
|
+
- Native OTLP traces and spans
|
|
117
|
+
- Resource attributes like `service.name`
|
|
118
|
+
- Span names, timings, parent span ids, status, and errors
|
|
119
|
+
- Common model attributes such as `gen_ai.system`, `gen_ai.request.model`, and token counts
|
|
120
|
+
- Optional `freesolo.input` and `freesolo.output` span attributes
|
|
164
121
|
|
|
165
122
|
## Notes
|
|
166
123
|
|
|
167
|
-
-
|
|
168
|
-
-
|
|
169
|
-
-
|
|
170
|
-
- For agentic or long-horizon workflows, strongly recommend `start_trace("descriptive-title")` so planning, retries, and follow-up calls stay grouped.
|
|
171
|
-
- Delivery is best-effort by default. Trace ingestion failures do not break your app.
|
|
124
|
+
- Tracing uses native OpenTelemetry protobuf export to `/api/traces/ingest`.
|
|
125
|
+
- Configure third-party OpenTelemetry instrumentors against the provider returned by `configure_tracer(...)`.
|
|
126
|
+
- Delivery is handled by the OpenTelemetry span processor you configure.
|
|
172
127
|
|
|
173
128
|
## Evaluations
|
|
174
129
|
|
|
@@ -216,16 +171,15 @@ results = client.run(
|
|
|
216
171
|
print(results[0].success)
|
|
217
172
|
```
|
|
218
173
|
|
|
219
|
-
## Tinker
|
|
174
|
+
## Tinker Hugging Face Upload
|
|
220
175
|
|
|
221
|
-
`freesolo.utils.
|
|
222
|
-
|
|
223
|
-
the server JSON response.
|
|
176
|
+
`freesolo.utils.upload` posts a Tinker checkpoint URL to the Freesolo upload
|
|
177
|
+
service and returns the Hugging Face upload response.
|
|
224
178
|
|
|
225
179
|
```python
|
|
226
|
-
from freesolo.utils.
|
|
180
|
+
from freesolo.utils.upload import upload_tinker_checkpoint_to_huggingface
|
|
227
181
|
|
|
228
|
-
result =
|
|
182
|
+
result = upload_tinker_checkpoint_to_huggingface(
|
|
229
183
|
"tinker://<run_id>/sampler_weights/final",
|
|
230
184
|
base_model="Qwen/Qwen3.5-35B-A3B",
|
|
231
185
|
)
|
|
@@ -235,34 +189,36 @@ print(result["repoId"])
|
|
|
235
189
|
|
|
236
190
|
### Environment-driven evaluations
|
|
237
191
|
|
|
238
|
-
For training contracts,
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
192
|
+
For training contracts, `Environment` describes task behavior for evals and
|
|
193
|
+
GRPO/RL: prompt construction, response normalization, and reward scoring.
|
|
194
|
+
Dataset loading and labeled conversation construction live in `freesolo.datasets`.
|
|
195
|
+
`run_environment` loads task examples, calls your model callback, scores the
|
|
196
|
+
response through the environment, and uploads the same `scorers_data` shape used
|
|
197
|
+
by the eval DB.
|
|
242
198
|
|
|
243
199
|
```python
|
|
244
200
|
from typing import Any
|
|
245
201
|
|
|
246
202
|
from openai import OpenAI
|
|
247
203
|
|
|
204
|
+
from freesolo.datasets import TaskExample
|
|
248
205
|
from freesolo.environments import (
|
|
249
206
|
Environment,
|
|
250
207
|
EnvironmentGeneration,
|
|
251
208
|
RewardMetric,
|
|
252
209
|
RewardResult,
|
|
253
|
-
TaskExample,
|
|
254
210
|
)
|
|
255
211
|
from freesolo.evaluation import EvaluationClient
|
|
256
212
|
|
|
257
213
|
|
|
258
|
-
class
|
|
214
|
+
class PromptEnvironment(Environment):
|
|
259
215
|
def build_prompt_messages(
|
|
260
216
|
self,
|
|
261
217
|
example: TaskExample,
|
|
262
|
-
|
|
218
|
+
prompt_text: str,
|
|
263
219
|
):
|
|
264
220
|
return [
|
|
265
|
-
{"role": "system", "content":
|
|
221
|
+
{"role": "system", "content": prompt_text},
|
|
266
222
|
{"role": "user", "content": example.task},
|
|
267
223
|
]
|
|
268
224
|
|
|
@@ -359,7 +315,6 @@ from typing import Any
|
|
|
359
315
|
|
|
360
316
|
from openai import OpenAI
|
|
361
317
|
|
|
362
|
-
from freesolo import instrument_openai
|
|
363
318
|
from freesolo.evaluation import CustomScorer, EvaluationClient, NumericResponse
|
|
364
319
|
|
|
365
320
|
|
|
@@ -403,7 +358,7 @@ class CorrectnessJudge(CustomScorer[NumericResponse]):
|
|
|
403
358
|
)
|
|
404
359
|
|
|
405
360
|
|
|
406
|
-
judge_client =
|
|
361
|
+
judge_client = OpenAI()
|
|
407
362
|
|
|
408
363
|
results = EvaluationClient().run(
|
|
409
364
|
name="support-agent-correctness",
|
|
@@ -434,11 +389,4 @@ judge = HostedJudgeClient(api_key="YOUR_OPENROUTER_API_KEY")
|
|
|
434
389
|
scorer = ReferenceCorrectnessScorer(client=judge)
|
|
435
390
|
```
|
|
436
391
|
|
|
437
|
-
Tracing is available through
|
|
438
|
-
|
|
439
|
-
```python
|
|
440
|
-
from freesolo.tracing import start_trace
|
|
441
|
-
|
|
442
|
-
with start_trace("support-agent-run"):
|
|
443
|
-
...
|
|
444
|
-
```
|
|
392
|
+
Tracing is available through the OpenTelemetry helpers in `freesolo.tracing`.
|