local-bench-ai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. local_bench_ai-0.1.0/PKG-INFO +17 -0
  2. local_bench_ai-0.1.0/pyproject.toml +50 -0
  3. local_bench_ai-0.1.0/setup.cfg +4 -0
  4. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/PKG-INFO +17 -0
  5. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/SOURCES.txt +332 -0
  6. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/dependency_links.txt +1 -0
  7. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/entry_points.txt +3 -0
  8. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/requires.txt +15 -0
  9. local_bench_ai-0.1.0/src/local_bench_ai.egg-info/top_level.txt +1 -0
  10. local_bench_ai-0.1.0/src/localbench/__init__.py +1 -0
  11. local_bench_ai-0.1.0/src/localbench/__main__.py +11 -0
  12. local_bench_ai-0.1.0/src/localbench/_requests.py +232 -0
  13. local_bench_ai-0.1.0/src/localbench/_response.py +70 -0
  14. local_bench_ai-0.1.0/src/localbench/_scoring.py +406 -0
  15. local_bench_ai-0.1.0/src/localbench/_suite.py +262 -0
  16. local_bench_ai-0.1.0/src/localbench/_types.py +84 -0
  17. local_bench_ai-0.1.0/src/localbench/budget_forcing.py +303 -0
  18. local_bench_ai-0.1.0/src/localbench/campaign.py +126 -0
  19. local_bench_ai-0.1.0/src/localbench/campaign_checkpoints.py +481 -0
  20. local_bench_ai-0.1.0/src/localbench/campaign_records.py +236 -0
  21. local_bench_ai-0.1.0/src/localbench/cli.py +1928 -0
  22. local_bench_ai-0.1.0/src/localbench/coding_exec/__init__.py +54 -0
  23. local_bench_ai-0.1.0/src/localbench/coding_exec/extract.py +25 -0
  24. local_bench_ai-0.1.0/src/localbench/coding_exec/orchestrate.py +262 -0
  25. local_bench_ai-0.1.0/src/localbench/coding_exec/program.py +36 -0
  26. local_bench_ai-0.1.0/src/localbench/coding_exec/runner.py +65 -0
  27. local_bench_ai-0.1.0/src/localbench/coding_exec/sandbox.py +303 -0
  28. local_bench_ai-0.1.0/src/localbench/coding_exec/score.py +43 -0
  29. local_bench_ai-0.1.0/src/localbench/data/board_sources.json +68 -0
  30. local_bench_ai-0.1.0/src/localbench/data/licenses/MIT.txt +19 -0
  31. local_bench_ai-0.1.0/src/localbench/data/licenses/ODC-BY-1.0.txt +217 -0
  32. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/ATTRIBUTION.md +21 -0
  33. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/CHANGES.md +9 -0
  34. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/BFCL-Apache-2.0 +157 -0
  35. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/IFBench-ODC-BY-1.0 +223 -0
  36. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/IFEval-Apache-2.0 +157 -0
  37. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/MMLU-Pro-MIT +19 -0
  38. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/NOTICE +14 -0
  39. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SCORECARD.json +221 -0
  40. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SHA256SUMS +14 -0
  41. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SOURCE_REVISIONS.md +16 -0
  42. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/ifbench.jsonl +294 -0
  43. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/itemsets.lock.json +26 -0
  44. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/mmlu_pro.jsonl +400 -0
  45. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/suite.json +101 -0
  46. local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/tc_json_v1.jsonl +330 -0
  47. local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/ifbench.jsonl +1 -0
  48. local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/itemsets.lock.json +12 -0
  49. local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/mmlu_pro.jsonl +1 -0
  50. local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/suite.json +51 -0
  51. local_bench_ai-0.1.0/src/localbench/exit_codes.py +11 -0
  52. local_bench_ai-0.1.0/src/localbench/kld/__init__.py +32 -0
  53. local_bench_ai-0.1.0/src/localbench/kld/churn.py +72 -0
  54. local_bench_ai-0.1.0/src/localbench/kld/parse.py +110 -0
  55. local_bench_ai-0.1.0/src/localbench/kld/run.py +128 -0
  56. local_bench_ai-0.1.0/src/localbench/lane_conformance.py +196 -0
  57. local_bench_ai-0.1.0/src/localbench/manifest.py +376 -0
  58. local_bench_ai-0.1.0/src/localbench/monitor_cli.py +226 -0
  59. local_bench_ai-0.1.0/src/localbench/monitor_records.py +211 -0
  60. local_bench_ai-0.1.0/src/localbench/monitoring.py +202 -0
  61. local_bench_ai-0.1.0/src/localbench/orchestrate.py +1637 -0
  62. local_bench_ai-0.1.0/src/localbench/persistence.py +73 -0
  63. local_bench_ai-0.1.0/src/localbench/probe/__init__.py +15 -0
  64. local_bench_ai-0.1.0/src/localbench/probe/__main__.py +184 -0
  65. local_bench_ai-0.1.0/src/localbench/probe/_point_biserial.py +74 -0
  66. local_bench_ai-0.1.0/src/localbench/probe/discrimination.py +439 -0
  67. local_bench_ai-0.1.0/src/localbench/probe/gates.py +184 -0
  68. local_bench_ai-0.1.0/src/localbench/prompt_rendering.py +186 -0
  69. local_bench_ai-0.1.0/src/localbench/providers/__init__.py +56 -0
  70. local_bench_ai-0.1.0/src/localbench/providers/_anthropic.py +259 -0
  71. local_bench_ai-0.1.0/src/localbench/providers/_base.py +63 -0
  72. local_bench_ai-0.1.0/src/localbench/providers/_openai.py +175 -0
  73. local_bench_ai-0.1.0/src/localbench/reasoning_leaks.py +72 -0
  74. local_bench_ai-0.1.0/src/localbench/reasoning_registry.py +156 -0
  75. local_bench_ai-0.1.0/src/localbench/release_test.py +154 -0
  76. local_bench_ai-0.1.0/src/localbench/run_plan.py +18 -0
  77. local_bench_ai-0.1.0/src/localbench/run_schema.py +22 -0
  78. local_bench_ai-0.1.0/src/localbench/runner.py +207 -0
  79. local_bench_ai-0.1.0/src/localbench/scorers/__init__.py +1 -0
  80. local_bench_ai-0.1.0/src/localbench/scorers/_reasoning.py +44 -0
  81. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/NOTICE +12 -0
  82. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/__init__.py +6 -0
  83. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_checker.py +180 -0
  84. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_checker_values.py +158 -0
  85. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_parser.py +145 -0
  86. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_prompt.py +62 -0
  87. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_types.py +20 -0
  88. local_bench_ai-0.1.0/src/localbench/scorers/bfcl/scorer.py +35 -0
  89. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/NOTICE +12 -0
  90. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/__init__.py +7 -0
  91. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_backend.py +133 -0
  92. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_executor.py +231 -0
  93. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_parser.py +161 -0
  94. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_prompt.py +53 -0
  95. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_sandbox.py +86 -0
  96. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_types.py +55 -0
  97. local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/scorer.py +66 -0
  98. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/NOTICE +32 -0
  99. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/__init__.py +20 -0
  100. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_count.py +111 -0
  101. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_custom.py +182 -0
  102. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_format.py +193 -0
  103. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_ratio.py +73 -0
  104. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_repeat.py +54 -0
  105. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_sentence.py +102 -0
  106. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_words.py +138 -0
  107. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_types.py +29 -0
  108. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_util.py +102 -0
  109. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/instructions.py +88 -0
  110. local_bench_ai-0.1.0/src/localbench/scorers/ifbench/scorer.py +80 -0
  111. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/__init__.py +22 -0
  112. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_format.py +96 -0
  113. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_keywords.py +73 -0
  114. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_length.py +102 -0
  115. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_misc.py +86 -0
  116. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_shared.py +80 -0
  117. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_types.py +34 -0
  118. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_util.py +89 -0
  119. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/instructions.py +55 -0
  120. local_bench_ai-0.1.0/src/localbench/scorers/ifeval/scorer.py +74 -0
  121. local_bench_ai-0.1.0/src/localbench/scorers/lcb.py +179 -0
  122. local_bench_ai-0.1.0/src/localbench/scorers/math_numeric.py +122 -0
  123. local_bench_ai-0.1.0/src/localbench/scorers/math_symbolic.py +302 -0
  124. local_bench_ai-0.1.0/src/localbench/scorers/mcq.py +132 -0
  125. local_bench_ai-0.1.0/src/localbench/scorers/ruler.py +222 -0
  126. local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/__init__.py +5 -0
  127. local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/_parser.py +359 -0
  128. local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/_types.py +79 -0
  129. local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/scorer.py +199 -0
  130. local_bench_ai-0.1.0/src/localbench/scoring/__init__.py +65 -0
  131. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/__init__.py +10 -0
  132. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/adapter.py +113 -0
  133. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/benchmark.py +276 -0
  134. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/block_introspect.py +81 -0
  135. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/block_parser.py +130 -0
  136. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/chat_client.py +269 -0
  137. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/config.py +24 -0
  138. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/env_host.py +391 -0
  139. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/funnel.py +704 -0
  140. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/hashing.py +82 -0
  141. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/loop_config.py +71 -0
  142. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/loop_types.py +178 -0
  143. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/model_client.py +88 -0
  144. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/observations.py +47 -0
  145. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/parser.py +96 -0
  146. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/prompt.py +142 -0
  147. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/protocol.py +54 -0
  148. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/protocol_c_loop.py +553 -0
  149. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/runner.py +207 -0
  150. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/runner_bootstrap.py +357 -0
  151. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/sandbox.py +701 -0
  152. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/sandbox_protocol.py +76 -0
  153. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/score.py +150 -0
  154. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/scripted_agent.py +251 -0
  155. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/stub_appworld.py +267 -0
  156. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/task_pool.py +98 -0
  157. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/types.py +73 -0
  158. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/wsl_bridge.py +451 -0
  159. local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/wsl_worker.py +367 -0
  160. local_bench_ai-0.1.0/src/localbench/scoring/axes.py +209 -0
  161. local_bench_ai-0.1.0/src/localbench/scoring/axis_status.py +234 -0
  162. local_bench_ai-0.1.0/src/localbench/scoring/benchmark_registry.py +106 -0
  163. local_bench_ai-0.1.0/src/localbench/scoring/board.py +185 -0
  164. local_bench_ai-0.1.0/src/localbench/scoring/board_manifest.py +44 -0
  165. local_bench_ai-0.1.0/src/localbench/scoring/board_scoring.py +502 -0
  166. local_bench_ai-0.1.0/src/localbench/scoring/board_sources.py +78 -0
  167. local_bench_ai-0.1.0/src/localbench/scoring/board_support.py +149 -0
  168. local_bench_ai-0.1.0/src/localbench/scoring/board_systems.py +65 -0
  169. local_bench_ai-0.1.0/src/localbench/scoring/board_types.py +86 -0
  170. local_bench_ai-0.1.0/src/localbench/scoring/bootstrap.py +201 -0
  171. local_bench_ai-0.1.0/src/localbench/scoring/metadata.py +187 -0
  172. local_bench_ai-0.1.0/src/localbench/scoring/paired_delta.py +320 -0
  173. local_bench_ai-0.1.0/src/localbench/scoring/public_rescore.py +20 -0
  174. local_bench_ai-0.1.0/src/localbench/scoring/scorecard.py +121 -0
  175. local_bench_ai-0.1.0/src/localbench/scoring/signed_score.py +49 -0
  176. local_bench_ai-0.1.0/src/localbench/scoring/subgroups.py +137 -0
  177. local_bench_ai-0.1.0/src/localbench/scoring/tc_json_conformance.py +46 -0
  178. local_bench_ai-0.1.0/src/localbench/scoring/web.py +74 -0
  179. local_bench_ai-0.1.0/src/localbench/serving/__init__.py +1 -0
  180. local_bench_ai-0.1.0/src/localbench/serving/assembly.py +328 -0
  181. local_bench_ai-0.1.0/src/localbench/serving/bench.py +137 -0
  182. local_bench_ai-0.1.0/src/localbench/serving/fingerprint.py +72 -0
  183. local_bench_ai-0.1.0/src/localbench/serving/job_object.py +96 -0
  184. local_bench_ai-0.1.0/src/localbench/serving/llama_cpp.py +213 -0
  185. local_bench_ai-0.1.0/src/localbench/serving/model_artifact.py +247 -0
  186. local_bench_ai-0.1.0/src/localbench/serving/options.py +36 -0
  187. local_bench_ai-0.1.0/src/localbench/serving/process.py +98 -0
  188. local_bench_ai-0.1.0/src/localbench/serving/provenance.py +250 -0
  189. local_bench_ai-0.1.0/src/localbench/serving/readiness.py +180 -0
  190. local_bench_ai-0.1.0/src/localbench/serving/runner.py +258 -0
  191. local_bench_ai-0.1.0/src/localbench/serving/teardown.py +107 -0
  192. local_bench_ai-0.1.0/src/localbench/submissions/__init__.py +6 -0
  193. local_bench_ai-0.1.0/src/localbench/submissions/archive.py +88 -0
  194. local_bench_ai-0.1.0/src/localbench/submissions/attestation.py +92 -0
  195. local_bench_ai-0.1.0/src/localbench/submissions/bundle.py +298 -0
  196. local_bench_ai-0.1.0/src/localbench/submissions/bundle_input.py +32 -0
  197. local_bench_ai-0.1.0/src/localbench/submissions/canon.py +61 -0
  198. local_bench_ai-0.1.0/src/localbench/submissions/client.py +326 -0
  199. local_bench_ai-0.1.0/src/localbench/submissions/contracts.py +30 -0
  200. local_bench_ai-0.1.0/src/localbench/submissions/crypto.py +190 -0
  201. local_bench_ai-0.1.0/src/localbench/submissions/dedup.py +17 -0
  202. local_bench_ai-0.1.0/src/localbench/submissions/divergence.py +92 -0
  203. local_bench_ai-0.1.0/src/localbench/submissions/foundation.py +419 -0
  204. local_bench_ai-0.1.0/src/localbench/submissions/foundation_scores.py +128 -0
  205. local_bench_ai-0.1.0/src/localbench/submissions/keys.py +27 -0
  206. local_bench_ai-0.1.0/src/localbench/submissions/origin.py +18 -0
  207. local_bench_ai-0.1.0/src/localbench/submissions/ports.py +46 -0
  208. local_bench_ai-0.1.0/src/localbench/submissions/projection.py +401 -0
  209. local_bench_ai-0.1.0/src/localbench/submissions/provenance.py +105 -0
  210. local_bench_ai-0.1.0/src/localbench/submissions/rescore.py +156 -0
  211. local_bench_ai-0.1.0/src/localbench/submissions/schemas/__init__.py +1 -0
  212. local_bench_ai-0.1.0/src/localbench/submissions/schemas/accepted_result_projection_v1.schema.json +45 -0
  213. local_bench_ai-0.1.0/src/localbench/submissions/schemas/result_bundle_v1.schema.json +59 -0
  214. local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_envelope_v1.schema.json +35 -0
  215. local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_item_v1.schema.json +18 -0
  216. local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_manifest_v1.schema.json +33 -0
  217. local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_verification_v1.schema.json +59 -0
  218. local_bench_ai-0.1.0/src/localbench/submissions/schemas/suite_release_manifest_v1.schema.json +40 -0
  219. local_bench_ai-0.1.0/src/localbench/submissions/status_update.py +56 -0
  220. local_bench_ai-0.1.0/src/localbench/submissions/submit_run.py +247 -0
  221. local_bench_ai-0.1.0/src/localbench/submissions/submit_run_inputs.py +236 -0
  222. local_bench_ai-0.1.0/src/localbench/submissions/submit_run_output.py +39 -0
  223. local_bench_ai-0.1.0/src/localbench/submissions/trust.py +11 -0
  224. local_bench_ai-0.1.0/src/localbench/submissions/validate.py +185 -0
  225. local_bench_ai-0.1.0/src/localbench/submissions/verify.py +85 -0
  226. local_bench_ai-0.1.0/src/localbench/suite_bundle.py +261 -0
  227. local_bench_ai-0.1.0/src/localbench/suite_errors.py +7 -0
  228. local_bench_ai-0.1.0/src/localbench/suite_release.py +175 -0
  229. local_bench_ai-0.1.0/src/localbench/suite_resolver.py +469 -0
  230. local_bench_ai-0.1.0/src/localbench/suite_verify.py +190 -0
  231. local_bench_ai-0.1.0/src/localbench/supervisor.py +138 -0
  232. local_bench_ai-0.1.0/src/localbench/tc_json_v1_runner.py +153 -0
  233. local_bench_ai-0.1.0/tests/test_agentic_appworld_adapter.py +109 -0
  234. local_bench_ai-0.1.0/tests/test_agentic_failure_policies.py +80 -0
  235. local_bench_ai-0.1.0/tests/test_agentic_hash_stability.py +56 -0
  236. local_bench_ai-0.1.0/tests/test_agentic_parser.py +136 -0
  237. local_bench_ai-0.1.0/tests/test_agentic_protocol_schema.py +55 -0
  238. local_bench_ai-0.1.0/tests/test_agentic_score_asr.py +140 -0
  239. local_bench_ai-0.1.0/tests/test_agentic_scripted_runner.py +49 -0
  240. local_bench_ai-0.1.0/tests/test_agentic_task_pool.py +92 -0
  241. local_bench_ai-0.1.0/tests/test_agentic_wsl_bridge.py +351 -0
  242. local_bench_ai-0.1.0/tests/test_agentic_wsl_bridge_acceptance.py +138 -0
  243. local_bench_ai-0.1.0/tests/test_appworld_c_funnel_units.py +651 -0
  244. local_bench_ai-0.1.0/tests/test_appworld_protocol_c_acceptance.py +84 -0
  245. local_bench_ai-0.1.0/tests/test_appworld_protocol_c_gauntlet.py +376 -0
  246. local_bench_ai-0.1.0/tests/test_appworld_protocol_c_units.py +1009 -0
  247. local_bench_ai-0.1.0/tests/test_appworld_sandbox_acceptance.py +109 -0
  248. local_bench_ai-0.1.0/tests/test_appworld_sandbox_units.py +179 -0
  249. local_bench_ai-0.1.0/tests/test_axes_registry.py +148 -0
  250. local_bench_ai-0.1.0/tests/test_axis_measurement_status.py +131 -0
  251. local_bench_ai-0.1.0/tests/test_bfcl.py +282 -0
  252. local_bench_ai-0.1.0/tests/test_bfcl_multi_turn.py +198 -0
  253. local_bench_ai-0.1.0/tests/test_board.py +604 -0
  254. local_bench_ai-0.1.0/tests/test_board_cli.py +32 -0
  255. local_bench_ai-0.1.0/tests/test_board_manifest.py +82 -0
  256. local_bench_ai-0.1.0/tests/test_board_provenance.py +55 -0
  257. local_bench_ai-0.1.0/tests/test_budget_forcing.py +453 -0
  258. local_bench_ai-0.1.0/tests/test_build_v1_bfcl_multi_turn.py +94 -0
  259. local_bench_ai-0.1.0/tests/test_build_v1_mmlu_pro.py +128 -0
  260. local_bench_ai-0.1.0/tests/test_campaign.py +484 -0
  261. local_bench_ai-0.1.0/tests/test_campaign_contracts.py +110 -0
  262. local_bench_ai-0.1.0/tests/test_cheat_proxy.py +98 -0
  263. local_bench_ai-0.1.0/tests/test_cli_axis_measurement_status.py +143 -0
  264. local_bench_ai-0.1.0/tests/test_cli_bench_exit_codes.py +153 -0
  265. local_bench_ai-0.1.0/tests/test_coding_exec_harness.py +95 -0
  266. local_bench_ai-0.1.0/tests/test_coding_exec_orchestrate.py +175 -0
  267. local_bench_ai-0.1.0/tests/test_coding_exec_sandbox.py +180 -0
  268. local_bench_ai-0.1.0/tests/test_distribution_cli.py +336 -0
  269. local_bench_ai-0.1.0/tests/test_gemma_reasoning_mode.py +132 -0
  270. local_bench_ai-0.1.0/tests/test_genmath_gen.py +143 -0
  271. local_bench_ai-0.1.0/tests/test_genmath_private.py +171 -0
  272. local_bench_ai-0.1.0/tests/test_ifbench.py +312 -0
  273. local_bench_ai-0.1.0/tests/test_ifeval.py +185 -0
  274. local_bench_ai-0.1.0/tests/test_kld.py +157 -0
  275. local_bench_ai-0.1.0/tests/test_lane_conformance.py +218 -0
  276. local_bench_ai-0.1.0/tests/test_lane_enforcement.py +215 -0
  277. local_bench_ai-0.1.0/tests/test_lcb.py +101 -0
  278. local_bench_ai-0.1.0/tests/test_math_genmath_parity.py +75 -0
  279. local_bench_ai-0.1.0/tests/test_math_numeric.py +102 -0
  280. local_bench_ai-0.1.0/tests/test_math_symbolic.py +144 -0
  281. local_bench_ai-0.1.0/tests/test_math_symbolic_robustness.py +45 -0
  282. local_bench_ai-0.1.0/tests/test_mcq.py +225 -0
  283. local_bench_ai-0.1.0/tests/test_monitoring.py +228 -0
  284. local_bench_ai-0.1.0/tests/test_online_distribution.py +131 -0
  285. local_bench_ai-0.1.0/tests/test_orchestrate.py +755 -0
  286. local_bench_ai-0.1.0/tests/test_orchestrate_agentic.py +328 -0
  287. local_bench_ai-0.1.0/tests/test_probe_discrimination.py +405 -0
  288. local_bench_ai-0.1.0/tests/test_probe_gates.py +128 -0
  289. local_bench_ai-0.1.0/tests/test_provider_orchestrate.py +112 -0
  290. local_bench_ai-0.1.0/tests/test_provider_profiles.py +539 -0
  291. local_bench_ai-0.1.0/tests/test_reasoning_registry.py +66 -0
  292. local_bench_ai-0.1.0/tests/test_release_test.py +44 -0
  293. local_bench_ai-0.1.0/tests/test_response.py +48 -0
  294. local_bench_ai-0.1.0/tests/test_response_wrapper_scoring.py +53 -0
  295. local_bench_ai-0.1.0/tests/test_run_plan.py +261 -0
  296. local_bench_ai-0.1.0/tests/test_run_record_distribution_schema.py +268 -0
  297. local_bench_ai-0.1.0/tests/test_runner.py +363 -0
  298. local_bench_ai-0.1.0/tests/test_runner_reasoning.py +53 -0
  299. local_bench_ai-0.1.0/tests/test_scorecard.py +83 -0
  300. local_bench_ai-0.1.0/tests/test_scoring_aggregate.py +92 -0
  301. local_bench_ai-0.1.0/tests/test_scoring_reasoning_strip.py +317 -0
  302. local_bench_ai-0.1.0/tests/test_scoring_v1.py +461 -0
  303. local_bench_ai-0.1.0/tests/test_serving_bench.py +879 -0
  304. local_bench_ai-0.1.0/tests/test_serving_provenance.py +422 -0
  305. local_bench_ai-0.1.0/tests/test_serving_teardown.py +133 -0
  306. local_bench_ai-0.1.0/tests/test_site_parity.py +357 -0
  307. local_bench_ai-0.1.0/tests/test_submission_slice_d1_migration.py +120 -0
  308. local_bench_ai-0.1.0/tests/test_suite_bundle.py +66 -0
  309. local_bench_ai-0.1.0/tests/test_suite_release_manifest.py +166 -0
  310. local_bench_ai-0.1.0/tests/test_suite_resolver.py +320 -0
  311. local_bench_ai-0.1.0/tests/test_supervisor.py +80 -0
  312. local_bench_ai-0.1.0/tests/test_tc_json_conformance_gate.py +151 -0
  313. local_bench_ai-0.1.0/tests/test_tc_json_v1_items.py +58 -0
  314. local_bench_ai-0.1.0/tests/test_tc_json_v1_runner.py +82 -0
  315. local_bench_ai-0.1.0/tests/test_tc_json_v1_scorer.py +312 -0
  316. local_bench_ai-0.1.0/tests/test_v1_bfcl_axis.py +73 -0
  317. local_bench_ai-0.1.0/tests/test_v1_bfcl_items.py +181 -0
  318. local_bench_ai-0.1.0/tests/test_v1_bfcl_multi_turn_axis.py +79 -0
  319. local_bench_ai-0.1.0/tests/test_v1_bfcl_multi_turn_items.py +162 -0
  320. local_bench_ai-0.1.0/tests/test_v1_bigcodebench_items.py +45 -0
  321. local_bench_ai-0.1.0/tests/test_v1_ifbench_axis.py +56 -0
  322. local_bench_ai-0.1.0/tests/test_v1_ifbench_items.py +118 -0
  323. local_bench_ai-0.1.0/tests/test_v1_lcb_axis.py +115 -0
  324. local_bench_ai-0.1.0/tests/test_v1_lcb_items.py +195 -0
  325. local_bench_ai-0.1.0/tests/test_v1_math_axis.py +80 -0
  326. local_bench_ai-0.1.0/tests/test_v1_math_items.py +78 -0
  327. local_bench_ai-0.1.0/tests/test_v1_mmlu_pro_axis.py +88 -0
  328. local_bench_ai-0.1.0/tests/test_v1_mmlu_pro_items.py +147 -0
  329. local_bench_ai-0.1.0/tests/test_v1_ruler_axis.py +309 -0
  330. local_bench_ai-0.1.0/tests/test_verdict_integrity_direct_finalize.py +311 -0
  331. local_bench_ai-0.1.0/tests/test_wave3_attestation_run_id.py +58 -0
  332. local_bench_ai-0.1.0/tests/test_wave3_cli_ux.py +222 -0
  333. local_bench_ai-0.1.0/tests/test_web_build_data.py +817 -0
  334. local_bench_ai-0.1.0/tests/test_web_scorecard.py +57 -0
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-bench-ai
3
+ Version: 0.1.0
4
+ Summary: Thin benchmark runner for OpenAI-compatible chat completion endpoints.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: anyio>=4
7
+ Requires-Dist: httpx>=0.27
8
+ Requires-Dist: langdetect>=1.0.9
9
+ Requires-Dist: math-verify>=0.9.0
10
+ Provides-Extra: build
11
+ Requires-Dist: datasets>=2.20; extra == "build"
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=8; extra == "dev"
14
+ Requires-Dist: nltk>=3.9; extra == "dev"
15
+ Requires-Dist: jinja2>=3.1; extra == "dev"
16
+ Provides-Extra: hf
17
+ Requires-Dist: transformers>=4.51; extra == "hf"
@@ -0,0 +1,50 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "local-bench-ai"
7
+ version = "0.1.0"
8
+ description = "Thin benchmark runner for OpenAI-compatible chat completion endpoints."
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "anyio>=4",
12
+ "httpx>=0.27",
13
+ "langdetect>=1.0.9",
14
+ "math-verify>=0.9.0",
15
+ ]
16
+
17
+ [project.scripts]
18
+ localbench = "localbench.cli:main"
19
+ localbench-monitor = "localbench.monitor_cli:main"
20
+
21
+ [project.optional-dependencies]
22
+ build = ["datasets>=2.20"]
23
+ dev = ["pytest>=8", "nltk>=3.9", "jinja2>=3.1"]
24
+ hf = ["transformers>=4.51"]
25
+
26
+ [tool.setuptools.package-dir]
27
+ "" = "src"
28
+
29
+ [tool.setuptools.packages.find]
30
+ where = ["src"]
31
+
32
+ [tool.setuptools.package-data]
33
+ localbench = [
34
+ "data/board_sources.json",
35
+ "data/licenses/*",
36
+ "data/suites/tiny-smoke-v1/*",
37
+ "data/suites/core-text-v1/*",
38
+ "data/suites/core-text-v1/LICENSES/*",
39
+ "scorers/ifbench/NOTICE",
40
+ "scorers/bfcl/NOTICE",
41
+ "scorers/bfcl_multi_turn/NOTICE",
42
+ "submissions/schemas/*.json",
43
+ ]
44
+
45
+ [tool.pytest.ini_options]
46
+ testpaths = ["tests"]
47
+ addopts = ["-ra", "--strict-config", "--strict-markers"]
48
+ markers = [
49
+ "wsl: requires WSL2 AppWorld harness and bubblewrap",
50
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,17 @@
1
+ Metadata-Version: 2.4
2
+ Name: local-bench-ai
3
+ Version: 0.1.0
4
+ Summary: Thin benchmark runner for OpenAI-compatible chat completion endpoints.
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: anyio>=4
7
+ Requires-Dist: httpx>=0.27
8
+ Requires-Dist: langdetect>=1.0.9
9
+ Requires-Dist: math-verify>=0.9.0
10
+ Provides-Extra: build
11
+ Requires-Dist: datasets>=2.20; extra == "build"
12
+ Provides-Extra: dev
13
+ Requires-Dist: pytest>=8; extra == "dev"
14
+ Requires-Dist: nltk>=3.9; extra == "dev"
15
+ Requires-Dist: jinja2>=3.1; extra == "dev"
16
+ Provides-Extra: hf
17
+ Requires-Dist: transformers>=4.51; extra == "hf"
@@ -0,0 +1,332 @@
1
+ pyproject.toml
2
+ src/local_bench_ai.egg-info/PKG-INFO
3
+ src/local_bench_ai.egg-info/SOURCES.txt
4
+ src/local_bench_ai.egg-info/dependency_links.txt
5
+ src/local_bench_ai.egg-info/entry_points.txt
6
+ src/local_bench_ai.egg-info/requires.txt
7
+ src/local_bench_ai.egg-info/top_level.txt
8
+ src/localbench/__init__.py
9
+ src/localbench/__main__.py
10
+ src/localbench/_requests.py
11
+ src/localbench/_response.py
12
+ src/localbench/_scoring.py
13
+ src/localbench/_suite.py
14
+ src/localbench/_types.py
15
+ src/localbench/budget_forcing.py
16
+ src/localbench/campaign.py
17
+ src/localbench/campaign_checkpoints.py
18
+ src/localbench/campaign_records.py
19
+ src/localbench/cli.py
20
+ src/localbench/exit_codes.py
21
+ src/localbench/lane_conformance.py
22
+ src/localbench/manifest.py
23
+ src/localbench/monitor_cli.py
24
+ src/localbench/monitor_records.py
25
+ src/localbench/monitoring.py
26
+ src/localbench/orchestrate.py
27
+ src/localbench/persistence.py
28
+ src/localbench/prompt_rendering.py
29
+ src/localbench/reasoning_leaks.py
30
+ src/localbench/reasoning_registry.py
31
+ src/localbench/release_test.py
32
+ src/localbench/run_plan.py
33
+ src/localbench/run_schema.py
34
+ src/localbench/runner.py
35
+ src/localbench/suite_bundle.py
36
+ src/localbench/suite_errors.py
37
+ src/localbench/suite_release.py
38
+ src/localbench/suite_resolver.py
39
+ src/localbench/suite_verify.py
40
+ src/localbench/supervisor.py
41
+ src/localbench/tc_json_v1_runner.py
42
+ src/localbench/coding_exec/__init__.py
43
+ src/localbench/coding_exec/extract.py
44
+ src/localbench/coding_exec/orchestrate.py
45
+ src/localbench/coding_exec/program.py
46
+ src/localbench/coding_exec/runner.py
47
+ src/localbench/coding_exec/sandbox.py
48
+ src/localbench/coding_exec/score.py
49
+ src/localbench/data/board_sources.json
50
+ src/localbench/data/licenses/MIT.txt
51
+ src/localbench/data/licenses/ODC-BY-1.0.txt
52
+ src/localbench/data/suites/core-text-v1/ATTRIBUTION.md
53
+ src/localbench/data/suites/core-text-v1/CHANGES.md
54
+ src/localbench/data/suites/core-text-v1/NOTICE
55
+ src/localbench/data/suites/core-text-v1/SCORECARD.json
56
+ src/localbench/data/suites/core-text-v1/SHA256SUMS
57
+ src/localbench/data/suites/core-text-v1/SOURCE_REVISIONS.md
58
+ src/localbench/data/suites/core-text-v1/ifbench.jsonl
59
+ src/localbench/data/suites/core-text-v1/itemsets.lock.json
60
+ src/localbench/data/suites/core-text-v1/mmlu_pro.jsonl
61
+ src/localbench/data/suites/core-text-v1/suite.json
62
+ src/localbench/data/suites/core-text-v1/tc_json_v1.jsonl
63
+ src/localbench/data/suites/core-text-v1/LICENSES/BFCL-Apache-2.0
64
+ src/localbench/data/suites/core-text-v1/LICENSES/IFBench-ODC-BY-1.0
65
+ src/localbench/data/suites/core-text-v1/LICENSES/IFEval-Apache-2.0
66
+ src/localbench/data/suites/core-text-v1/LICENSES/MMLU-Pro-MIT
67
+ src/localbench/data/suites/tiny-smoke-v1/ifbench.jsonl
68
+ src/localbench/data/suites/tiny-smoke-v1/itemsets.lock.json
69
+ src/localbench/data/suites/tiny-smoke-v1/mmlu_pro.jsonl
70
+ src/localbench/data/suites/tiny-smoke-v1/suite.json
71
+ src/localbench/kld/__init__.py
72
+ src/localbench/kld/churn.py
73
+ src/localbench/kld/parse.py
74
+ src/localbench/kld/run.py
75
+ src/localbench/probe/__init__.py
76
+ src/localbench/probe/__main__.py
77
+ src/localbench/probe/_point_biserial.py
78
+ src/localbench/probe/discrimination.py
79
+ src/localbench/probe/gates.py
80
+ src/localbench/providers/__init__.py
81
+ src/localbench/providers/_anthropic.py
82
+ src/localbench/providers/_base.py
83
+ src/localbench/providers/_openai.py
84
+ src/localbench/scorers/__init__.py
85
+ src/localbench/scorers/_reasoning.py
86
+ src/localbench/scorers/lcb.py
87
+ src/localbench/scorers/math_numeric.py
88
+ src/localbench/scorers/math_symbolic.py
89
+ src/localbench/scorers/mcq.py
90
+ src/localbench/scorers/ruler.py
91
+ src/localbench/scorers/bfcl/NOTICE
92
+ src/localbench/scorers/bfcl/__init__.py
93
+ src/localbench/scorers/bfcl/_checker.py
94
+ src/localbench/scorers/bfcl/_checker_values.py
95
+ src/localbench/scorers/bfcl/_parser.py
96
+ src/localbench/scorers/bfcl/_prompt.py
97
+ src/localbench/scorers/bfcl/_types.py
98
+ src/localbench/scorers/bfcl/scorer.py
99
+ src/localbench/scorers/bfcl_multi_turn/NOTICE
100
+ src/localbench/scorers/bfcl_multi_turn/__init__.py
101
+ src/localbench/scorers/bfcl_multi_turn/_backend.py
102
+ src/localbench/scorers/bfcl_multi_turn/_executor.py
103
+ src/localbench/scorers/bfcl_multi_turn/_parser.py
104
+ src/localbench/scorers/bfcl_multi_turn/_prompt.py
105
+ src/localbench/scorers/bfcl_multi_turn/_sandbox.py
106
+ src/localbench/scorers/bfcl_multi_turn/_types.py
107
+ src/localbench/scorers/bfcl_multi_turn/scorer.py
108
+ src/localbench/scorers/ifbench/NOTICE
109
+ src/localbench/scorers/ifbench/__init__.py
110
+ src/localbench/scorers/ifbench/_checks_count.py
111
+ src/localbench/scorers/ifbench/_checks_custom.py
112
+ src/localbench/scorers/ifbench/_checks_format.py
113
+ src/localbench/scorers/ifbench/_checks_ratio.py
114
+ src/localbench/scorers/ifbench/_checks_repeat.py
115
+ src/localbench/scorers/ifbench/_checks_sentence.py
116
+ src/localbench/scorers/ifbench/_checks_words.py
117
+ src/localbench/scorers/ifbench/_types.py
118
+ src/localbench/scorers/ifbench/_util.py
119
+ src/localbench/scorers/ifbench/instructions.py
120
+ src/localbench/scorers/ifbench/scorer.py
121
+ src/localbench/scorers/ifeval/__init__.py
122
+ src/localbench/scorers/ifeval/_checks_format.py
123
+ src/localbench/scorers/ifeval/_checks_keywords.py
124
+ src/localbench/scorers/ifeval/_checks_length.py
125
+ src/localbench/scorers/ifeval/_checks_misc.py
126
+ src/localbench/scorers/ifeval/_shared.py
127
+ src/localbench/scorers/ifeval/_types.py
128
+ src/localbench/scorers/ifeval/_util.py
129
+ src/localbench/scorers/ifeval/instructions.py
130
+ src/localbench/scorers/ifeval/scorer.py
131
+ src/localbench/scorers/tc_json_v1/__init__.py
132
+ src/localbench/scorers/tc_json_v1/_parser.py
133
+ src/localbench/scorers/tc_json_v1/_types.py
134
+ src/localbench/scorers/tc_json_v1/scorer.py
135
+ src/localbench/scoring/__init__.py
136
+ src/localbench/scoring/axes.py
137
+ src/localbench/scoring/axis_status.py
138
+ src/localbench/scoring/benchmark_registry.py
139
+ src/localbench/scoring/board.py
140
+ src/localbench/scoring/board_manifest.py
141
+ src/localbench/scoring/board_scoring.py
142
+ src/localbench/scoring/board_sources.py
143
+ src/localbench/scoring/board_support.py
144
+ src/localbench/scoring/board_systems.py
145
+ src/localbench/scoring/board_types.py
146
+ src/localbench/scoring/bootstrap.py
147
+ src/localbench/scoring/metadata.py
148
+ src/localbench/scoring/paired_delta.py
149
+ src/localbench/scoring/public_rescore.py
150
+ src/localbench/scoring/scorecard.py
151
+ src/localbench/scoring/signed_score.py
152
+ src/localbench/scoring/subgroups.py
153
+ src/localbench/scoring/tc_json_conformance.py
154
+ src/localbench/scoring/web.py
155
+ src/localbench/scoring/agentic_exec/__init__.py
156
+ src/localbench/scoring/agentic_exec/adapter.py
157
+ src/localbench/scoring/agentic_exec/benchmark.py
158
+ src/localbench/scoring/agentic_exec/block_introspect.py
159
+ src/localbench/scoring/agentic_exec/block_parser.py
160
+ src/localbench/scoring/agentic_exec/chat_client.py
161
+ src/localbench/scoring/agentic_exec/config.py
162
+ src/localbench/scoring/agentic_exec/env_host.py
163
+ src/localbench/scoring/agentic_exec/funnel.py
164
+ src/localbench/scoring/agentic_exec/hashing.py
165
+ src/localbench/scoring/agentic_exec/loop_config.py
166
+ src/localbench/scoring/agentic_exec/loop_types.py
167
+ src/localbench/scoring/agentic_exec/model_client.py
168
+ src/localbench/scoring/agentic_exec/observations.py
169
+ src/localbench/scoring/agentic_exec/parser.py
170
+ src/localbench/scoring/agentic_exec/prompt.py
171
+ src/localbench/scoring/agentic_exec/protocol.py
172
+ src/localbench/scoring/agentic_exec/protocol_c_loop.py
173
+ src/localbench/scoring/agentic_exec/runner.py
174
+ src/localbench/scoring/agentic_exec/runner_bootstrap.py
175
+ src/localbench/scoring/agentic_exec/sandbox.py
176
+ src/localbench/scoring/agentic_exec/sandbox_protocol.py
177
+ src/localbench/scoring/agentic_exec/score.py
178
+ src/localbench/scoring/agentic_exec/scripted_agent.py
179
+ src/localbench/scoring/agentic_exec/stub_appworld.py
180
+ src/localbench/scoring/agentic_exec/task_pool.py
181
+ src/localbench/scoring/agentic_exec/types.py
182
+ src/localbench/scoring/agentic_exec/wsl_bridge.py
183
+ src/localbench/scoring/agentic_exec/wsl_worker.py
184
+ src/localbench/serving/__init__.py
185
+ src/localbench/serving/assembly.py
186
+ src/localbench/serving/bench.py
187
+ src/localbench/serving/fingerprint.py
188
+ src/localbench/serving/job_object.py
189
+ src/localbench/serving/llama_cpp.py
190
+ src/localbench/serving/model_artifact.py
191
+ src/localbench/serving/options.py
192
+ src/localbench/serving/process.py
193
+ src/localbench/serving/provenance.py
194
+ src/localbench/serving/readiness.py
195
+ src/localbench/serving/runner.py
196
+ src/localbench/serving/teardown.py
197
+ src/localbench/submissions/__init__.py
198
+ src/localbench/submissions/archive.py
199
+ src/localbench/submissions/attestation.py
200
+ src/localbench/submissions/bundle.py
201
+ src/localbench/submissions/bundle_input.py
202
+ src/localbench/submissions/canon.py
203
+ src/localbench/submissions/client.py
204
+ src/localbench/submissions/contracts.py
205
+ src/localbench/submissions/crypto.py
206
+ src/localbench/submissions/dedup.py
207
+ src/localbench/submissions/divergence.py
208
+ src/localbench/submissions/foundation.py
209
+ src/localbench/submissions/foundation_scores.py
210
+ src/localbench/submissions/keys.py
211
+ src/localbench/submissions/origin.py
212
+ src/localbench/submissions/ports.py
213
+ src/localbench/submissions/projection.py
214
+ src/localbench/submissions/provenance.py
215
+ src/localbench/submissions/rescore.py
216
+ src/localbench/submissions/status_update.py
217
+ src/localbench/submissions/submit_run.py
218
+ src/localbench/submissions/submit_run_inputs.py
219
+ src/localbench/submissions/submit_run_output.py
220
+ src/localbench/submissions/trust.py
221
+ src/localbench/submissions/validate.py
222
+ src/localbench/submissions/verify.py
223
+ src/localbench/submissions/schemas/__init__.py
224
+ src/localbench/submissions/schemas/accepted_result_projection_v1.schema.json
225
+ src/localbench/submissions/schemas/result_bundle_v1.schema.json
226
+ src/localbench/submissions/schemas/submission_envelope_v1.schema.json
227
+ src/localbench/submissions/schemas/submission_item_v1.schema.json
228
+ src/localbench/submissions/schemas/submission_manifest_v1.schema.json
229
+ src/localbench/submissions/schemas/submission_verification_v1.schema.json
230
+ src/localbench/submissions/schemas/suite_release_manifest_v1.schema.json
231
+ tests/test_agentic_appworld_adapter.py
232
+ tests/test_agentic_failure_policies.py
233
+ tests/test_agentic_hash_stability.py
234
+ tests/test_agentic_parser.py
235
+ tests/test_agentic_protocol_schema.py
236
+ tests/test_agentic_score_asr.py
237
+ tests/test_agentic_scripted_runner.py
238
+ tests/test_agentic_task_pool.py
239
+ tests/test_agentic_wsl_bridge.py
240
+ tests/test_agentic_wsl_bridge_acceptance.py
241
+ tests/test_appworld_c_funnel_units.py
242
+ tests/test_appworld_protocol_c_acceptance.py
243
+ tests/test_appworld_protocol_c_gauntlet.py
244
+ tests/test_appworld_protocol_c_units.py
245
+ tests/test_appworld_sandbox_acceptance.py
246
+ tests/test_appworld_sandbox_units.py
247
+ tests/test_axes_registry.py
248
+ tests/test_axis_measurement_status.py
249
+ tests/test_bfcl.py
250
+ tests/test_bfcl_multi_turn.py
251
+ tests/test_board.py
252
+ tests/test_board_cli.py
253
+ tests/test_board_manifest.py
254
+ tests/test_board_provenance.py
255
+ tests/test_budget_forcing.py
256
+ tests/test_build_v1_bfcl_multi_turn.py
257
+ tests/test_build_v1_mmlu_pro.py
258
+ tests/test_campaign.py
259
+ tests/test_campaign_contracts.py
260
+ tests/test_cheat_proxy.py
261
+ tests/test_cli_axis_measurement_status.py
262
+ tests/test_cli_bench_exit_codes.py
263
+ tests/test_coding_exec_harness.py
264
+ tests/test_coding_exec_orchestrate.py
265
+ tests/test_coding_exec_sandbox.py
266
+ tests/test_distribution_cli.py
267
+ tests/test_gemma_reasoning_mode.py
268
+ tests/test_genmath_gen.py
269
+ tests/test_genmath_private.py
270
+ tests/test_ifbench.py
271
+ tests/test_ifeval.py
272
+ tests/test_kld.py
273
+ tests/test_lane_conformance.py
274
+ tests/test_lane_enforcement.py
275
+ tests/test_lcb.py
276
+ tests/test_math_genmath_parity.py
277
+ tests/test_math_numeric.py
278
+ tests/test_math_symbolic.py
279
+ tests/test_math_symbolic_robustness.py
280
+ tests/test_mcq.py
281
+ tests/test_monitoring.py
282
+ tests/test_online_distribution.py
283
+ tests/test_orchestrate.py
284
+ tests/test_orchestrate_agentic.py
285
+ tests/test_probe_discrimination.py
286
+ tests/test_probe_gates.py
287
+ tests/test_provider_orchestrate.py
288
+ tests/test_provider_profiles.py
289
+ tests/test_reasoning_registry.py
290
+ tests/test_release_test.py
291
+ tests/test_response.py
292
+ tests/test_response_wrapper_scoring.py
293
+ tests/test_run_plan.py
294
+ tests/test_run_record_distribution_schema.py
295
+ tests/test_runner.py
296
+ tests/test_runner_reasoning.py
297
+ tests/test_scorecard.py
298
+ tests/test_scoring_aggregate.py
299
+ tests/test_scoring_reasoning_strip.py
300
+ tests/test_scoring_v1.py
301
+ tests/test_serving_bench.py
302
+ tests/test_serving_provenance.py
303
+ tests/test_serving_teardown.py
304
+ tests/test_site_parity.py
305
+ tests/test_submission_slice_d1_migration.py
306
+ tests/test_suite_bundle.py
307
+ tests/test_suite_release_manifest.py
308
+ tests/test_suite_resolver.py
309
+ tests/test_supervisor.py
310
+ tests/test_tc_json_conformance_gate.py
311
+ tests/test_tc_json_v1_items.py
312
+ tests/test_tc_json_v1_runner.py
313
+ tests/test_tc_json_v1_scorer.py
314
+ tests/test_v1_bfcl_axis.py
315
+ tests/test_v1_bfcl_items.py
316
+ tests/test_v1_bfcl_multi_turn_axis.py
317
+ tests/test_v1_bfcl_multi_turn_items.py
318
+ tests/test_v1_bigcodebench_items.py
319
+ tests/test_v1_ifbench_axis.py
320
+ tests/test_v1_ifbench_items.py
321
+ tests/test_v1_lcb_axis.py
322
+ tests/test_v1_lcb_items.py
323
+ tests/test_v1_math_axis.py
324
+ tests/test_v1_math_items.py
325
+ tests/test_v1_mmlu_pro_axis.py
326
+ tests/test_v1_mmlu_pro_items.py
327
+ tests/test_v1_ruler_axis.py
328
+ tests/test_verdict_integrity_direct_finalize.py
329
+ tests/test_wave3_attestation_run_id.py
330
+ tests/test_wave3_cli_ux.py
331
+ tests/test_web_build_data.py
332
+ tests/test_web_scorecard.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ localbench = localbench.cli:main
3
+ localbench-monitor = localbench.monitor_cli:main
@@ -0,0 +1,15 @@
1
+ anyio>=4
2
+ httpx>=0.27
3
+ langdetect>=1.0.9
4
+ math-verify>=0.9.0
5
+
6
+ [build]
7
+ datasets>=2.20
8
+
9
+ [dev]
10
+ pytest>=8
11
+ nltk>=3.9
12
+ jinja2>=3.1
13
+
14
+ [hf]
15
+ transformers>=4.51
@@ -0,0 +1 @@
1
+ """Local benchmark runner package."""
@@ -0,0 +1,11 @@
1
+ """Module entry point for python -m localbench."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+ from localbench.cli import main
8
+
9
+
10
+ if __name__ == "__main__":
11
+ sys.exit(main())