korrel 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. korrel-0.1.0/.github/workflows/ci.yml +66 -0
  2. korrel-0.1.0/.gitignore +31 -0
  3. korrel-0.1.0/LICENSE +21 -0
  4. korrel-0.1.0/PKG-INFO +482 -0
  5. korrel-0.1.0/README.md +429 -0
  6. korrel-0.1.0/benchmarks/.gitignore +9 -0
  7. korrel-0.1.0/benchmarks/.hypothesis/.gitignore +9 -0
  8. korrel-0.1.0/benchmarks/.hypothesis/constants/00867ec756a8f917 +4 -0
  9. korrel-0.1.0/benchmarks/.hypothesis/constants/08babbfb7935a99a +4 -0
  10. korrel-0.1.0/benchmarks/.hypothesis/constants/0e8b13e362b1092c +4 -0
  11. korrel-0.1.0/benchmarks/.hypothesis/constants/15a0f5f59b6ec40b +4 -0
  12. korrel-0.1.0/benchmarks/.hypothesis/constants/233fc342f924ebed +4 -0
  13. korrel-0.1.0/benchmarks/.hypothesis/constants/244c9e4889358727 +4 -0
  14. korrel-0.1.0/benchmarks/.hypothesis/constants/32423b0cfa9c24a9 +4 -0
  15. korrel-0.1.0/benchmarks/.hypothesis/constants/446ac70b992165e6 +4 -0
  16. korrel-0.1.0/benchmarks/.hypothesis/constants/5a6e8d82ba8b4fa5 +4 -0
  17. korrel-0.1.0/benchmarks/.hypothesis/constants/69973fe7fc698225 +4 -0
  18. korrel-0.1.0/benchmarks/.hypothesis/constants/851cb8cad9040f61 +4 -0
  19. korrel-0.1.0/benchmarks/.hypothesis/constants/86b5a785fb80333d +4 -0
  20. korrel-0.1.0/benchmarks/.hypothesis/constants/9e83e537991c5991 +4 -0
  21. korrel-0.1.0/benchmarks/.hypothesis/constants/e524dd7b50a13049 +4 -0
  22. korrel-0.1.0/benchmarks/.hypothesis/constants/f1a15e17e31ea614 +4 -0
  23. korrel-0.1.0/benchmarks/.hypothesis/constants/f7a2bcd4488983c8 +4 -0
  24. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmp1ckzqlsz +0 -0
  25. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmp7ks99093 +0 -0
  26. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpbkult9uw +0 -0
  27. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpi2jixm9h +0 -0
  28. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpj10sjq7j +0 -0
  29. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpkx_k0dcj +0 -0
  30. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpt6c1xhxu +0 -0
  31. korrel-0.1.0/benchmarks/.hypothesis/tmp/tmpu2oays1z +0 -0
  32. korrel-0.1.0/benchmarks/.hypothesis/unicode_data/15.1.0/charmap.json.gz +0 -0
  33. korrel-0.1.0/benchmarks/.hypothesis/unicode_data/15.1.0/codec-utf-8.json.gz +0 -0
  34. korrel-0.1.0/benchmarks/.python-version +1 -0
  35. korrel-0.1.0/benchmarks/Makefile +61 -0
  36. korrel-0.1.0/benchmarks/README.md +417 -0
  37. korrel-0.1.0/benchmarks/fidelity/_convert.py +240 -0
  38. korrel-0.1.0/benchmarks/fidelity/_gen_pass_fixture.py +185 -0
  39. korrel-0.1.0/benchmarks/fidelity/_tau2_data.py +140 -0
  40. korrel-0.1.0/benchmarks/fidelity/fixtures/synthetic_task0.run.json +214 -0
  41. korrel-0.1.0/benchmarks/fidelity/fixtures/synthetic_task0_pass.run.json +302 -0
  42. korrel-0.1.0/benchmarks/fidelity/run_fidelity.py +919 -0
  43. korrel-0.1.0/benchmarks/fidelity/scenario_retail.py +209 -0
  44. korrel-0.1.0/benchmarks/pyproject.toml +52 -0
  45. korrel-0.1.0/benchmarks/regenerate/compute_labels.py +167 -0
  46. korrel-0.1.0/benchmarks/regenerate/gen_transcripts.py +238 -0
  47. korrel-0.1.0/benchmarks/results/.gitkeep +0 -0
  48. korrel-0.1.0/benchmarks/results/results.json +1544 -0
  49. korrel-0.1.0/benchmarks/results/selftest.json +58 -0
  50. korrel-0.1.0/benchmarks/results/summary.md +12 -0
  51. korrel-0.1.0/benchmarks/tests/conftest.py +23 -0
  52. korrel-0.1.0/benchmarks/tests/fixtures/__init__.py +1 -0
  53. korrel-0.1.0/benchmarks/tests/fixtures/tau2_builders.py +103 -0
  54. korrel-0.1.0/benchmarks/tests/test_convert.py +583 -0
  55. korrel-0.1.0/benchmarks/tests/test_labels_shape.py +252 -0
  56. korrel-0.1.0/benchmarks/tests/test_no_tau2_in_korrel_core.py +128 -0
  57. korrel-0.1.0/benchmarks/tests/test_rubric_fidelity.py +290 -0
  58. korrel-0.1.0/benchmarks/tests/test_tau2_data.py +169 -0
  59. korrel-0.1.0/benchmarks/transcripts/LICENSE +29 -0
  60. korrel-0.1.0/benchmarks/transcripts/labels.jsonl +80 -0
  61. korrel-0.1.0/benchmarks/transcripts/retail/.gitkeep +0 -0
  62. korrel-0.1.0/benchmarks/transcripts/retail/0__haiku.run.json +1547 -0
  63. korrel-0.1.0/benchmarks/transcripts/retail/0__sonnet.run.json +1590 -0
  64. korrel-0.1.0/benchmarks/transcripts/retail/10__haiku.run.json +1713 -0
  65. korrel-0.1.0/benchmarks/transcripts/retail/10__sonnet.run.json +1420 -0
  66. korrel-0.1.0/benchmarks/transcripts/retail/11__haiku.run.json +1927 -0
  67. korrel-0.1.0/benchmarks/transcripts/retail/11__sonnet.run.json +1710 -0
  68. korrel-0.1.0/benchmarks/transcripts/retail/12__haiku.run.json +1420 -0
  69. korrel-0.1.0/benchmarks/transcripts/retail/12__sonnet.run.json +1315 -0
  70. korrel-0.1.0/benchmarks/transcripts/retail/13__haiku.run.json +1691 -0
  71. korrel-0.1.0/benchmarks/transcripts/retail/13__sonnet.run.json +1691 -0
  72. korrel-0.1.0/benchmarks/transcripts/retail/14__haiku.run.json +1480 -0
  73. korrel-0.1.0/benchmarks/transcripts/retail/14__sonnet.run.json +1408 -0
  74. korrel-0.1.0/benchmarks/transcripts/retail/15__haiku.run.json +1602 -0
  75. korrel-0.1.0/benchmarks/transcripts/retail/15__sonnet.run.json +1750 -0
  76. korrel-0.1.0/benchmarks/transcripts/retail/16__haiku.run.json +1840 -0
  77. korrel-0.1.0/benchmarks/transcripts/retail/16__sonnet.run.json +1587 -0
  78. korrel-0.1.0/benchmarks/transcripts/retail/17__haiku.run.json +1317 -0
  79. korrel-0.1.0/benchmarks/transcripts/retail/17__sonnet.run.json +1169 -0
  80. korrel-0.1.0/benchmarks/transcripts/retail/18__haiku.run.json +1839 -0
  81. korrel-0.1.0/benchmarks/transcripts/retail/18__sonnet.run.json +1586 -0
  82. korrel-0.1.0/benchmarks/transcripts/retail/19__haiku.run.json +1619 -0
  83. korrel-0.1.0/benchmarks/transcripts/retail/19__sonnet.run.json +1753 -0
  84. korrel-0.1.0/benchmarks/transcripts/retail/1__haiku.run.json +1684 -0
  85. korrel-0.1.0/benchmarks/transcripts/retail/1__sonnet.run.json +1734 -0
  86. korrel-0.1.0/benchmarks/transcripts/retail/20__haiku.run.json +2999 -0
  87. korrel-0.1.0/benchmarks/transcripts/retail/20__sonnet.run.json +1894 -0
  88. korrel-0.1.0/benchmarks/transcripts/retail/21__haiku.run.json +2496 -0
  89. korrel-0.1.0/benchmarks/transcripts/retail/21__sonnet.run.json +2339 -0
  90. korrel-0.1.0/benchmarks/transcripts/retail/22__haiku.run.json +2216 -0
  91. korrel-0.1.0/benchmarks/transcripts/retail/22__sonnet.run.json +1801 -0
  92. korrel-0.1.0/benchmarks/transcripts/retail/23__haiku.run.json +1850 -0
  93. korrel-0.1.0/benchmarks/transcripts/retail/23__sonnet.run.json +2894 -0
  94. korrel-0.1.0/benchmarks/transcripts/retail/24__haiku.run.json +1754 -0
  95. korrel-0.1.0/benchmarks/transcripts/retail/24__sonnet.run.json +1458 -0
  96. korrel-0.1.0/benchmarks/transcripts/retail/25__haiku.run.json +1349 -0
  97. korrel-0.1.0/benchmarks/transcripts/retail/25__sonnet.run.json +1497 -0
  98. korrel-0.1.0/benchmarks/transcripts/retail/26__haiku.run.json +1537 -0
  99. korrel-0.1.0/benchmarks/transcripts/retail/26__sonnet.run.json +1537 -0
  100. korrel-0.1.0/benchmarks/transcripts/retail/27__haiku.run.json +1867 -0
  101. korrel-0.1.0/benchmarks/transcripts/retail/27__sonnet.run.json +1616 -0
  102. korrel-0.1.0/benchmarks/transcripts/retail/28__haiku.run.json +2346 -0
  103. korrel-0.1.0/benchmarks/transcripts/retail/28__sonnet.run.json +2554 -0
  104. korrel-0.1.0/benchmarks/transcripts/retail/29__haiku.run.json +1673 -0
  105. korrel-0.1.0/benchmarks/transcripts/retail/29__sonnet.run.json +1926 -0
  106. korrel-0.1.0/benchmarks/transcripts/retail/2__haiku.run.json +1453 -0
  107. korrel-0.1.0/benchmarks/transcripts/retail/2__sonnet.run.json +1601 -0
  108. korrel-0.1.0/benchmarks/transcripts/retail/30__haiku.run.json +3412 -0
  109. korrel-0.1.0/benchmarks/transcripts/retail/30__sonnet.run.json +3455 -0
  110. korrel-0.1.0/benchmarks/transcripts/retail/31__haiku.run.json +2919 -0
  111. korrel-0.1.0/benchmarks/transcripts/retail/31__sonnet.run.json +2623 -0
  112. korrel-0.1.0/benchmarks/transcripts/retail/32__haiku.run.json +2449 -0
  113. korrel-0.1.0/benchmarks/transcripts/retail/32__sonnet.run.json +3337 -0
  114. korrel-0.1.0/benchmarks/transcripts/retail/33__haiku.run.json +1332 -0
  115. korrel-0.1.0/benchmarks/transcripts/retail/33__sonnet.run.json +1480 -0
  116. korrel-0.1.0/benchmarks/transcripts/retail/34__haiku.run.json +1740 -0
  117. korrel-0.1.0/benchmarks/transcripts/retail/34__sonnet.run.json +1733 -0
  118. korrel-0.1.0/benchmarks/transcripts/retail/35__haiku.run.json +2155 -0
  119. korrel-0.1.0/benchmarks/transcripts/retail/35__sonnet.run.json +2155 -0
  120. korrel-0.1.0/benchmarks/transcripts/retail/36__haiku.run.json +2486 -0
  121. korrel-0.1.0/benchmarks/transcripts/retail/36__sonnet.run.json +2134 -0
  122. korrel-0.1.0/benchmarks/transcripts/retail/37__haiku.run.json +2334 -0
  123. korrel-0.1.0/benchmarks/transcripts/retail/37__sonnet.run.json +1885 -0
  124. korrel-0.1.0/benchmarks/transcripts/retail/38__haiku.run.json +1908 -0
  125. korrel-0.1.0/benchmarks/transcripts/retail/38__sonnet.run.json +1835 -0
  126. korrel-0.1.0/benchmarks/transcripts/retail/39__haiku.run.json +1621 -0
  127. korrel-0.1.0/benchmarks/transcripts/retail/39__sonnet.run.json +1728 -0
  128. korrel-0.1.0/benchmarks/transcripts/retail/3__haiku.run.json +1694 -0
  129. korrel-0.1.0/benchmarks/transcripts/retail/3__sonnet.run.json +1694 -0
  130. korrel-0.1.0/benchmarks/transcripts/retail/4__haiku.run.json +1900 -0
  131. korrel-0.1.0/benchmarks/transcripts/retail/4__sonnet.run.json +1828 -0
  132. korrel-0.1.0/benchmarks/transcripts/retail/5__haiku.run.json +1743 -0
  133. korrel-0.1.0/benchmarks/transcripts/retail/5__sonnet.run.json +2039 -0
  134. korrel-0.1.0/benchmarks/transcripts/retail/6__haiku.run.json +1912 -0
  135. korrel-0.1.0/benchmarks/transcripts/retail/6__sonnet.run.json +1912 -0
  136. korrel-0.1.0/benchmarks/transcripts/retail/7__haiku.run.json +1764 -0
  137. korrel-0.1.0/benchmarks/transcripts/retail/7__sonnet.run.json +1764 -0
  138. korrel-0.1.0/benchmarks/transcripts/retail/8__haiku.run.json +2060 -0
  139. korrel-0.1.0/benchmarks/transcripts/retail/8__sonnet.run.json +2060 -0
  140. korrel-0.1.0/benchmarks/transcripts/retail/9__haiku.run.json +1616 -0
  141. korrel-0.1.0/benchmarks/transcripts/retail/9__sonnet.run.json +1912 -0
  142. korrel-0.1.0/benchmarks/uv.lock +3453 -0
  143. korrel-0.1.0/docs/features/v0.1-cli-telemetry.md +194 -0
  144. korrel-0.1.0/docs/spec/korrel-to-openenv.md +353 -0
  145. korrel-0.1.0/docs/spec/korrel-to-verifiers.md +340 -0
  146. korrel-0.1.0/examples/support_refund.py +76 -0
  147. korrel-0.1.0/pyproject.toml +63 -0
  148. korrel-0.1.0/src/korrel/__init__.py +42 -0
  149. korrel-0.1.0/src/korrel/adapter.py +38 -0
  150. korrel-0.1.0/src/korrel/cli.py +375 -0
  151. korrel-0.1.0/src/korrel/exporters/__init__.py +14 -0
  152. korrel-0.1.0/src/korrel/exporters/_shared.py +139 -0
  153. korrel-0.1.0/src/korrel/exporters/openenv.py +1079 -0
  154. korrel-0.1.0/src/korrel/exporters/verifiers.py +613 -0
  155. korrel-0.1.0/src/korrel/persona.py +80 -0
  156. korrel-0.1.0/src/korrel/providers.py +254 -0
  157. korrel-0.1.0/src/korrel/pytest_plugin.py +185 -0
  158. korrel-0.1.0/src/korrel/rubric.py +168 -0
  159. korrel-0.1.0/src/korrel/runtime.py +214 -0
  160. korrel-0.1.0/src/korrel/scenario.py +36 -0
  161. korrel-0.1.0/src/korrel/telemetry.py +347 -0
  162. korrel-0.1.0/src/korrel/tools.py +31 -0
  163. korrel-0.1.0/src/korrel/types.py +63 -0
  164. korrel-0.1.0/tests/conftest.py +43 -0
  165. korrel-0.1.0/tests/test_cli.py +539 -0
  166. korrel-0.1.0/tests/test_openenv_exporter.py +755 -0
  167. korrel-0.1.0/tests/test_openenv_exporter_fake_ns.py +1339 -0
  168. korrel-0.1.0/tests/test_openenv_exporter_real.py +610 -0
  169. korrel-0.1.0/tests/test_providers.py +95 -0
  170. korrel-0.1.0/tests/test_pytest_plugin.py +288 -0
  171. korrel-0.1.0/tests/test_rubric.py +62 -0
  172. korrel-0.1.0/tests/test_runtime_smoke.py +141 -0
  173. korrel-0.1.0/tests/test_telemetry.py +712 -0
  174. korrel-0.1.0/tests/test_tool_loop_cap.py +230 -0
  175. korrel-0.1.0/tests/test_verifiers_exporter.py +1235 -0
  176. korrel-0.1.0/tests/test_verifiers_exporter_real.py +793 -0
  177. korrel-0.1.0/uv.lock +4729 -0
@@ -0,0 +1,66 @@
1
+ name: CI
2
+
3
+ on:
4
+ pull_request:
5
+ push:
6
+ branches:
7
+ - main
8
+
9
+ jobs:
10
+ test:
11
+ name: pytest (Python ${{ matrix.python-version }})
12
+ runs-on: ubuntu-latest
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
17
+
18
+ steps:
19
+ - uses: actions/checkout@v6.0.3
20
+
21
+ - name: Set up uv
22
+ uses: astral-sh/setup-uv@v8.2.0
23
+ with:
24
+ python-version: ${{ matrix.python-version }}
25
+
26
+ - name: Install dependencies
27
+ run: uv sync
28
+
29
+ - name: Run tests
30
+ run: uv run pytest
31
+
32
+ verifiers-export:
33
+ name: verifiers-export (Python 3.13)
34
+ runs-on: ubuntu-latest
35
+
36
+ steps:
37
+ - uses: actions/checkout@v6.0.3
38
+
39
+ - name: Set up uv
40
+ uses: astral-sh/setup-uv@v8.2.0
41
+ with:
42
+ python-version: "3.13"
43
+
44
+ - name: Install dependencies with verifiers extra
45
+ run: uv sync --extra verifiers
46
+
47
+ - name: Run tests
48
+ run: uv run pytest
49
+
50
+ openenv-export:
51
+ name: openenv-export (Python 3.13)
52
+ runs-on: ubuntu-latest
53
+
54
+ steps:
55
+ - uses: actions/checkout@v6.0.3
56
+
57
+ - name: Set up uv
58
+ uses: astral-sh/setup-uv@v8.2.0
59
+ with:
60
+ python-version: "3.13"
61
+
62
+ - name: Install dependencies with openenv extra
63
+ run: uv sync --extra openenv
64
+
65
+ - name: Run tests
66
+ run: uv run pytest
@@ -0,0 +1,31 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.egg-info/
5
+ .eggs/
6
+ build/
7
+ dist/
8
+
9
+ # Virtual environments
10
+ .venv/
11
+ venv/
12
+
13
+ # uv
14
+ .uv/
15
+
16
+ # Test and coverage
17
+ .pytest_cache/
18
+ .coverage
19
+ htmlcov/
20
+
21
+ # Editors / OS
22
+ .idea/
23
+ .vscode/
24
+ .DS_Store
25
+ Thumbs.db
26
+
27
+ # Local environment and secrets
28
+ .env
29
+
30
+ # korrel run and pytest-plugin transcript output
31
+ .korrel/
korrel-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 korrel-dev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
korrel-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,482 @@
1
+ Metadata-Version: 2.4
2
+ Name: korrel
3
+ Version: 0.1.0
4
+ Summary: OSS Python SDK for agent simulation: define a multi-turn agent test once, run it as a pytest CI gate, export it as a verifiers/OpenEnv RL environment.
5
+ Project-URL: Homepage, https://korrel.dev
6
+ Project-URL: Repository, https://github.com/korrel-dev/korrel
7
+ Project-URL: Documentation, https://github.com/korrel-dev/korrel#readme
8
+ Project-URL: Issues, https://github.com/korrel-dev/korrel/issues
9
+ Author: korrel-dev
10
+ License: MIT License
11
+
12
+ Copyright (c) 2026 korrel-dev
13
+
14
+ Permission is hereby granted, free of charge, to any person obtaining a copy
15
+ of this software and associated documentation files (the "Software"), to deal
16
+ in the Software without restriction, including without limitation the rights
17
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
18
+ copies of the Software, and to permit persons to whom the Software is
19
+ furnished to do so, subject to the following conditions:
20
+
21
+ The above copyright notice and this permission notice shall be included in all
22
+ copies or substantial portions of the Software.
23
+
24
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
25
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
26
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
27
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
28
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
29
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30
+ SOFTWARE.
31
+ License-File: LICENSE
32
+ Keywords: agent,evaluation,llm,openenv,pytest,reinforcement-learning,simulation,verifiers
33
+ Classifier: Development Status :: 4 - Beta
34
+ Classifier: Intended Audience :: Developers
35
+ Classifier: License :: OSI Approved :: MIT License
36
+ Classifier: Programming Language :: Python :: 3.10
37
+ Classifier: Programming Language :: Python :: 3.11
38
+ Classifier: Programming Language :: Python :: 3.12
39
+ Classifier: Programming Language :: Python :: 3.13
40
+ Classifier: Programming Language :: Python :: 3.14
41
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
42
+ Classifier: Topic :: Software Development :: Testing
43
+ Requires-Python: >=3.10
44
+ Requires-Dist: anthropic
45
+ Requires-Dist: pydantic<3,>=2
46
+ Provides-Extra: openai
47
+ Requires-Dist: openai; extra == 'openai'
48
+ Provides-Extra: openenv
49
+ Requires-Dist: openenv-core>=0.3.0; extra == 'openenv'
50
+ Provides-Extra: verifiers
51
+ Requires-Dist: verifiers>=0.1.14; extra == 'verifiers'
52
+ Description-Content-Type: text/markdown
53
+
54
+ # korrel
55
+
56
+ OSS Python SDK for agent simulation. Define a multi-turn agent test once, run it as a pytest CI gate, export it as a verifiers/OpenEnv RL environment.
57
+
58
+ ## Install
59
+
60
+ ```
61
+ pip install korrel
62
+ ```
63
+
64
+ Inside a uv project, add it as a dependency instead:
65
+
66
+ ```
67
+ uv add korrel
68
+ ```
69
+
70
+ Bring your own provider keys. Korrel reads keys from the environment at call time and stores none. The default provider is Claude via the `anthropic` SDK; set `ANTHROPIC_API_KEY`. OpenAI support is an optional extra (`korrel[openai]`).
71
+
72
+ ## Quickstart
73
+
74
+ The following is a complete path from installation to a passing scenario run.
75
+
76
+ **1. Write a scenario module** (`support_refund.py`):
77
+
78
+ ```python
79
+ from korrel import MockTool, Persona, Rubric, Scenario, adapter_from_provider
80
+ from korrel.providers import AnthropicProvider
81
+ from korrel.types import Message
82
+
83
+ def lookup_order(arguments, state):
84
+ state["called"] = True
85
+ orders = {"A1001": {"order_id": "A1001", "amount": 49.99, "refundable": True}}
86
+ return orders.get(arguments.get("order_id", ""), {"error": "not found"})
87
+
88
+ orders = MockTool(
89
+ name="lookup_order",
90
+ schema={"type": "function", "function": {
91
+ "name": "lookup_order",
92
+ "description": "Look up an order by its id.",
93
+ "parameters": {"type": "object",
94
+ "properties": {"order_id": {"type": "string"}},
95
+ "required": ["order_id"]},
96
+ }},
97
+ respond=lookup_order,
98
+ )
99
+
100
+ def confirmed(completion, info, **kwargs):
101
+ amount = f"{info['amount']:.2f}"
102
+ return 1.0 if any(
103
+ m.role == "assistant" and m.content
104
+ and "refund" in m.content.lower() and amount in m.content
105
+ for m in completion
106
+ ) else 0.0
107
+
108
+ scenario = Scenario(
109
+ id="support_refund",
110
+ system="You are a support agent. Resolve refunds using lookup_order.",
111
+ persona=Persona(goal="Get a refund for order A1001.", behavior="Polite but firm."),
112
+ opening_message="My order A1001 arrived broken and I want a refund.",
113
+ tools=[orders],
114
+ max_turns=3,
115
+ seed=7,
116
+ info={"order_id": "A1001", "amount": 49.99},
117
+ rubric=Rubric(funcs=[confirmed], pass_threshold=0.5),
118
+ )
119
+
120
+ # AnthropicProvider reads ANTHROPIC_API_KEY at call time, not at import time.
121
+ adapter = adapter_from_provider(AnthropicProvider())
122
+ ```
123
+
124
+ **2. Run it:**
125
+
126
+ ```
127
+ korrel run support_refund.py
128
+ ```
129
+
130
+ **Output on pass:**
131
+
132
+ ```
133
+ scenario : support_refund
134
+ score : 1.0000
135
+ status : pass
136
+ model calls : 6
137
+ transcript : .korrel/support_refund.transcript.json
138
+ ```
139
+
140
+ **Output on failure** (exit code 1):
141
+
142
+ ```
143
+ scenario : support_refund
144
+ score : 0.0000
145
+ status : fail
146
+ model calls : 6
147
+ failed : confirmed
148
+ clusters : confirmed(zero)
149
+ transcript : .korrel/support_refund.transcript.json
150
+ ```
151
+
152
+ The CLI exits zero on pass and non-zero on failure. The full conversation transcript is written to `.korrel/<scenario-id>.transcript.json`.
153
+
154
+ **CLI flags:**
155
+
156
+ ```
157
+ korrel run SCENARIO_PY [--out DIR] [--seed N]
158
+ [--scenario-attr NAME] [--adapter-attr NAME]
159
+ ```
160
+
161
+ `--out` overrides the transcript directory (default `.korrel/`). `--seed` overrides the scenario seed. `--scenario-attr` and `--adapter-attr` override the module attribute names (defaults: `scenario`, `adapter`).
162
+
163
+ ## Cost
164
+
165
+ Korrel is bring-your-own-keys. Every model call a run makes is billed to your own provider account. Korrel stores no key and bills nothing; the key is read from the environment at call time and stored nowhere. The cost of a run is the number of model calls it makes, so the figures below are stated in model calls, not dollars (the price per call depends on your provider and tier).
166
+
167
+ A run of one scenario makes these calls:
168
+
169
+ - One call to the agent under test at the start of each turn.
170
+ - One additional agent call for each tool-use round in a turn (running a mock tool is local Python, not a model call).
171
+ - One call to the user-simulator (the `Persona`) per turn that continues. The opening message is a fixed string and makes no call, and the final turn makes no user-simulator call.
172
+ - One call for an LLM judge, if the rubric has one, made once per run at scoring time. A plain reward function is local Python and makes no call.
173
+
174
+ As a rule of thumb, a scenario of `T` turns with no tool rounds and no judge is about `2T - 1` calls: `T` agent calls plus `T - 1` user-simulator calls. Each tool round adds one agent call; a judge adds one call.
175
+
176
+ `korrel run` prints the measured number of calls the simulation loop made for the run, on the `model calls` line of the summary. That count covers the agent and user-simulator calls the loop issues. It does not yet include the judge's scoring-time call, which happens inside the rubric after the loop ends.
177
+
178
+ ## The pytest CI gate
179
+
180
+ Name scenario files `*_scenario.py` and place them in your test tree. The korrel pytest plugin (registered automatically via the `pytest11` entry-point, no conftest needed) discovers and runs them:
181
+
182
+ ```
183
+ uv run pytest
184
+ ```
185
+
186
+ A passing scenario produces a green dot. A failing scenario produces a normal pytest failure block showing score, threshold, failed rubric function names, and the transcript path.
187
+
188
+ **Discovery options:**
189
+
190
+ - `--korrel-glob GLOB`: override which file name pattern the plugin picks up (command-line flag)
191
+ - `korrel_glob = *_scenario.py`: the corresponding `pytest.ini` / `pyproject.toml` option
192
+
193
+ Each scenario file in the CI gate must expose both a module-level `scenario` and a module-level `adapter`. If `adapter` is absent, the item is reported as an error immediately.
194
+
195
+ ## The four objects
196
+
197
+ - `Scenario`: a code-first test definition. Holds the system prompt, a `Persona`, the opening message, mock tools, `max_turns`, `max_tool_rounds`, a `seed`, ground-truth `info`, and a `Rubric`.
198
+ - `Persona`: the LLM-driven user-simulator. Given the conversation so far, it produces the next user message. Defaults to Claude.
199
+ - `MockTool`: a programmable tool. Holds a chat-completions tool schema and a `respond` callable that takes parsed arguments and a mutable per-run state and returns a result.
200
+ - `Rubric`: reward functions plus an optional hardened LLM judge. Reward signatures mirror verifiers: `(completion, info, **kwargs) -> float`. The judge treats the transcript as data, never as instructions.
201
+
202
+ ## The module convention
203
+
204
+ A scenario module exposes two module-level names:
205
+
206
+ - `scenario`: a `Scenario` instance.
207
+ - `adapter`: any callable `(messages: list[Message], tools: list[ToolSchema]) -> Message`. Both `korrel run` and the pytest plugin read these names (overridable via `--scenario-attr` / `--adapter-attr`).
208
+
209
+ The built-in helper `adapter_from_provider(provider)` wraps any `Provider` (such as `AnthropicProvider`) as an adapter. Because `AnthropicProvider` reads the API key from the environment only at call time, constructing `adapter_from_provider(AnthropicProvider())` at module level is import-safe: no key is required to import the module.
210
+
211
+ ## Running the Python API directly
212
+
213
+ ```python
214
+ from korrel import run_scenario
215
+ result = run_scenario(scenario, adapter)
216
+ print(result.score, result.passed, result.failed_functions)
217
+ ```
218
+
219
+ `examples/support_refund.py` holds a runnable scenario definition with a real `AnthropicProvider` adapter.
220
+
221
+ ## Export to verifiers
222
+
223
+ A Korrel scenario can be translated into a [`verifiers`](https://github.com/willccbb/verifiers) RL training environment. The full mapping is specified in [`docs/spec/korrel-to-verifiers.md`](docs/spec/korrel-to-verifiers.md).
224
+
225
+ ### Install
226
+
227
+ `verifiers` is an optional extra. `import korrel` works without it.
228
+
229
+ ```
230
+ pip install 'korrel[verifiers]'
231
+ # or
232
+ uv add 'korrel[verifiers]'
233
+ ```
234
+
235
+ `verifiers==0.1.14` requires Python `<3.14`, so the extra installs only on Python 3.10-3.13.
236
+
237
+ ### Python API
238
+
239
+ ```python
240
+ from korrel.exporters.verifiers import to_verifiers_env
241
+
242
+ env = to_verifiers_env(scenario)
243
+ ```
244
+
245
+ `to_verifiers_env` returns a constructed verifiers environment: a `MultiTurnEnv` subclass for most scenarios (persona-driven or tool-bearing), or a `SingleTurnEnv` for a single-exchange scenario with no tools and no persona follow-up. Using the support-refund scenario from the quickstart, the call returns a `MultiTurnEnv` because the scenario has both a persona and a mock tool.
246
+
247
+ ### CLI export
248
+
249
+ ```
250
+ korrel export support_refund.py --to verifiers --out ./support_refund_env
251
+ ```
252
+
253
+ This writes a pip-installable package that `verifiers` discovers via `load_environment`:
254
+
255
+ ```
256
+ support_refund_env/
257
+ pyproject.toml
258
+ support_refund.py # environment module exposing load_environment()
259
+ _scenario.py # copy of the original scenario source
260
+ ```
261
+
262
+ Install the package in a Python 3.10-3.13 environment and load it:
263
+
264
+ ```python
265
+ import verifiers as vf
266
+
267
+ env = vf.load_environment("support_refund")
268
+ ```
269
+
270
+ When `--out` is omitted, the package is written to `.korrel/export/<scenario-id>/`.
271
+
272
+ ### Concept mapping
273
+
274
+ | Korrel concept | verifiers target |
275
+ |---|---|
276
+ | `Scenario` (persona or tools or `max_turns > 1`) | `MultiTurnEnv` subclass |
277
+ | `Scenario` (single exchange, no tools) | `SingleTurnEnv` |
278
+ | `Persona` | user-turn generation inside `env_response` |
279
+ | `MockTool` | tool-execution branch of `env_response` |
280
+ | Rubric reward functions | `verifiers.Rubric(funcs=..., weights=...)` |
281
+ | `Scenario.system` + `opening_message` + `info` | dataset row (`prompt`, `info`) |
282
+
283
+ The spec holds the full field-level detail.
284
+
285
+ ### What survives the translation
286
+
287
+ The reward-function signature `(completion, info, **kwargs) -> float` is unchanged. The canonical transcript types in `korrel.types` are unchanged. The only adaptation at the boundary is converting the `completion` value from the verifiers message shape to the korrel canonical shape before calling each reward function.
288
+
289
+ Lossy edges (summarized; see the spec's [Lossy edges](docs/spec/korrel-to-verifiers.md#lossy-edges) section for the complete list):
290
+
291
+ - The persona generates user turns with a live model call inside `env_response`. BYO key, non-deterministic. Offline tests substitute a fake persona.
292
+ - The judge reward function likewise makes a live model call during scoring.
293
+ - Korrel aggregates reward functions by mean; verifiers uses a weighted sum. The exporter sets weights to `1/n` to reproduce the mean.
294
+ - Korrel `max_turns` counts user-to-assistant exchanges; verifiers `max_turns` counts individual model-response steps. The exporter derives the verifiers step budget from `scenario.max_turns` and `scenario.max_tool_rounds`.
295
+
296
+ ## Export to OpenEnv
297
+
298
+ A Korrel scenario can be translated into an [OpenEnv](https://github.com/huggingface/openenv) environment server. The full mapping is specified in [`docs/spec/korrel-to-openenv.md`](docs/spec/korrel-to-openenv.md).
299
+
300
+ ### Install
301
+
302
+ `openenv-core` is an optional extra. `import korrel` works without it.
303
+
304
+ ```
305
+ pip install 'korrel[openenv]'
306
+ # or
307
+ uv add 'korrel[openenv]'
308
+ ```
309
+
310
+ `openenv-core>=0.3.0` declares `Requires-Python: >=3.10` with no upper bound, so it installs on Python 3.10 through 3.14, including the repo's default Python 3.14. This contrasts with the verifiers extra, which is capped at Python `<3.14`.
311
+
312
+ ### Python API
313
+
314
+ ```python
315
+ from korrel.exporters.openenv import build_environment_class
316
+
317
+ EnvironmentCls = build_environment_class(scenario, observation_cls, action_cls)
318
+ ```
319
+
320
+ `build_environment_class` returns a concrete `openenv.core.env_server.interfaces.Environment` subclass. The caller supplies the author-defined `Action` and `Observation` subclasses (generated by the CLI export; see below). The `persona` keyword argument accepts an override for offline testing without live model calls.
321
+
322
+ The primary deployment path is the CLI export: the generated package is the artifact that gets deployed to a Hugging Face Space.
323
+
324
+ ### CLI export
325
+
326
+ ```
327
+ korrel export support_refund.py --to openenv --out ./support_refund_env
328
+ ```
329
+
330
+ This writes a pip-installable OpenEnv environment package:
331
+
332
+ ```
333
+ support_refund_env/
334
+ __init__.py
335
+ client.py
336
+ models.py
337
+ openenv.yaml
338
+ pyproject.toml
339
+ README.md
340
+ _scenario.py # copy of the original scenario source
341
+ server/
342
+ __init__.py
343
+ support_refund_environment.py
344
+ app.py
345
+ Dockerfile
346
+ requirements.txt
347
+ ```
348
+
349
+ When `--out` is omitted, the package is written to `.korrel/export/<scenario-id>/`.
350
+
351
+ Deploy the generated package to a Hugging Face Space:
352
+
353
+ ```
354
+ openenv push --secret ANTHROPIC_API_KEY=<your-key>
355
+ ```
356
+
357
+ The persona and judge make live model calls inside the container. The API key is supplied at runtime via `openenv push --secret` and is never written into any emitted file.
358
+
359
+ ### Concept mapping
360
+
361
+ | Korrel concept | OpenEnv target |
362
+ |---|---|
363
+ | `Scenario` | `Environment` subclass |
364
+ | `scenario.system` + `opening_message` | `reset()` seed observation |
365
+ | agent turn | author-defined `Action` subclass |
366
+ | environment reply | author-defined `Observation` subclass |
367
+ | `MockTool` execution | tool branch of `step()` |
368
+ | `Persona.next_message` | persona branch of `step()` |
369
+ | `Rubric` aggregate | terminal `observation.reward` (`done=True`) |
370
+
371
+ The spec holds the full field-level detail.
372
+
373
+ ### What survives the translation
374
+
375
+ The reward-function signature `(completion, info, **kwargs) -> float` is unchanged. The canonical transcript types in `korrel.types` are unchanged. The OpenEnv `Rubric` class (incompatible action/observation signature) is not used; korrel computes reward with its own `Rubric.score`.
376
+
377
+ Lossy edges (summarized; see the spec's [Lossy edges](docs/spec/korrel-to-openenv.md#lossy-edges) section for the complete list):
378
+
379
+ - Reward is terminal, not dense. Every intermediate step carries `reward=None`; the final step (`done=True`) carries the rubric aggregate.
380
+ - The persona and judge run server-side inside the container; they make live model calls, which are non-deterministic and require a key at runtime.
381
+ - Content-shape narrowing: `Observation.messages` carries dicts; the full canonical `Message` type is used internally and serialized at the boundary.
382
+ - Tool-call arguments stay a JSON string (`ToolFunction.arguments`), matching the chat-completions wire format.
383
+
384
+ ## Determinism
385
+
386
+ Every run takes a seed and records the model and request parameters. The seed pins scenario setup and any sampling Korrel controls. LLM calls are not bit-reproducible; provider nondeterminism is outside the seed.
387
+
388
+ ## Telemetry
389
+
390
+ Korrel includes opt-in telemetry. On opt-in, `korrel run` sends a single content-scrubbed `run` event to Korrel's collector. No scenario content, persona text, transcripts, prompts, tool schemas, file paths, model names, or keys are ever collected. The event carries only aggregate counters and version metadata.
391
+
392
+ **What the `run` event sends** (every field, nothing more):
393
+
394
+ | Field | Description |
395
+ |---|---|
396
+ | `event` | Always `"run"` |
397
+ | `schema_version` | Event schema version (currently `"1"`) |
398
+ | `korrel_version` | Installed korrel version string |
399
+ | `python_version` | CPython version string |
400
+ | `scenario_count` | Number of scenarios in the run |
401
+ | `total_turns` | Total turns across all scenarios |
402
+ | `pass_count` | Number of passing scenarios |
403
+ | `fail_count` | Number of failing scenarios |
404
+ | `duration_s` | Wall-clock duration in seconds |
405
+ | `install_id` | Anonymous, randomly generated UUID (created once, stored locally) |
406
+
407
+ No key, scenario id, path, persona, transcript, prompt, tool schema, or model name is present in the event.
408
+
409
+ **Opt-outs (any one disables telemetry):**
410
+
411
+ - Set `KORREL_TELEMETRY=0` (also accepts `false`, `no`, `off`) in the environment.
412
+ - Set `DO_NOT_TRACK=1` in the environment.
413
+ - Telemetry is automatically off in CI (detected via `CI`, `GITHUB_ACTIONS`, `TRAVIS`, `CIRCLECI`, `GITLAB_CI`, `JENKINS_URL`, `BUILDKITE`, `TF_BUILD`, `TEAMCITY_VERSION`, `BITBUCKET_BUILD_NUMBER`).
414
+ - On the first interactive run outside CI, Korrel prompts once for consent and persists the answer. Declining disables telemetry permanently for that install. Non-interactive sessions default to off with no prompt.
415
+
416
+ **Where the event goes.** Opted-in events are sent to Korrel's public collector. Set `KORREL_TELEMETRY_ENDPOINT` to redirect them to a self-hosted collector instead. Set `KORREL_TELEMETRY_DEBUG=1` to write the event JSON to stderr for inspection instead of sending it. Sending is best-effort with a 3 second timeout; a failure never affects the run.
417
+
418
+ Consent and the anonymous install id are stored in `%APPDATA%\korrel\config.json` (Windows) or `$XDG_CONFIG_HOME/korrel/config.json` / `~/.config/korrel/config.json` (Linux/macOS). No key, scenario content, or identifying information is ever written there.
419
+
420
+ ## Data model and chat-completions compatibility
421
+
422
+ The canonical transcript types in `korrel.types` are provider-neutral and wire-compatible with the OpenAI chat-completions message schema. They are the v0.2 verifiers/OpenEnv export target. Their attribute names and wire shapes are a contract.
423
+
424
+ ### Types
425
+
426
+ **`Message`** - one message in a conversation:
427
+
428
+ ```python
429
+ Message(
430
+ role="assistant", # "system" | "user" | "assistant" | "tool"
431
+ content="Your refund...", # text body; None when only tool_calls is set
432
+ tool_calls=[...], # present on assistant messages that call tools
433
+ tool_call_id="call_1", # links a role="tool" message to the call it answers
434
+ name=None, # optional speaker name
435
+ )
436
+ ```
437
+
438
+ **`ToolCall`** - a single tool invocation inside an assistant message:
439
+
440
+ ```python
441
+ ToolCall(
442
+ id="call_1",
443
+ type="function", # always "function"
444
+ function=ToolFunction(
445
+ name="lookup_order",
446
+ arguments='{"order_id": "A1001"}', # JSON-encoded string, not a dict
447
+ ),
448
+ )
449
+ ```
450
+
451
+ `arguments` is a JSON-encoded string, matching the chat-completions wire format (OpenAI API reference, `tool_calls[].function.arguments`). Parse with `json.loads()` to recover the call arguments.
452
+
453
+ **`ToolSchema`** - a tool definition passed to an adapter, in chat-completions tool-schema shape:
454
+
455
+ ```python
456
+ {"type": "function", "function": {
457
+ "name": "lookup_order",
458
+ "description": "Look up an order by its id.",
459
+ "parameters": {"type": "object",
460
+ "properties": {"order_id": {"type": "string"}},
461
+ "required": ["order_id"]},
462
+ }}
463
+ ```
464
+
465
+ ### Chat-completions mapping table
466
+
467
+ | Canonical field | Chat-completions wire field | Notes |
468
+ |---|---|---|
469
+ | `Message.role` | `role` | `"system"`, `"user"`, `"assistant"`, `"tool"` |
470
+ | `Message.content` | `content` | `None` when only tool calls are present |
471
+ | `Message.tool_calls` | `tool_calls` | array of `ToolCall` objects |
472
+ | `Message.tool_call_id` | `tool_call_id` | on `role="tool"` messages |
473
+ | `ToolCall.id` | `tool_calls[].id` | |
474
+ | `ToolCall.type` | `tool_calls[].type` | always `"function"` |
475
+ | `ToolCall.function.name` | `tool_calls[].function.name` | |
476
+ | `ToolCall.function.arguments` | `tool_calls[].function.arguments` | JSON string, not a dict |
477
+
478
+ The Anthropic provider (`AnthropicProvider` in `korrel.providers`) translates between `tool_use`/`tool_result` blocks and these canonical types. Nothing in `korrel.types` depends on the `openai` package.
479
+
480
+ ## License
481
+
482
+ MIT.