local-bench-ai 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- local_bench_ai-0.1.0/PKG-INFO +17 -0
- local_bench_ai-0.1.0/pyproject.toml +50 -0
- local_bench_ai-0.1.0/setup.cfg +4 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/PKG-INFO +17 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/SOURCES.txt +332 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/dependency_links.txt +1 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/entry_points.txt +3 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/requires.txt +15 -0
- local_bench_ai-0.1.0/src/local_bench_ai.egg-info/top_level.txt +1 -0
- local_bench_ai-0.1.0/src/localbench/__init__.py +1 -0
- local_bench_ai-0.1.0/src/localbench/__main__.py +11 -0
- local_bench_ai-0.1.0/src/localbench/_requests.py +232 -0
- local_bench_ai-0.1.0/src/localbench/_response.py +70 -0
- local_bench_ai-0.1.0/src/localbench/_scoring.py +406 -0
- local_bench_ai-0.1.0/src/localbench/_suite.py +262 -0
- local_bench_ai-0.1.0/src/localbench/_types.py +84 -0
- local_bench_ai-0.1.0/src/localbench/budget_forcing.py +303 -0
- local_bench_ai-0.1.0/src/localbench/campaign.py +126 -0
- local_bench_ai-0.1.0/src/localbench/campaign_checkpoints.py +481 -0
- local_bench_ai-0.1.0/src/localbench/campaign_records.py +236 -0
- local_bench_ai-0.1.0/src/localbench/cli.py +1928 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/__init__.py +54 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/extract.py +25 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/orchestrate.py +262 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/program.py +36 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/runner.py +65 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/sandbox.py +303 -0
- local_bench_ai-0.1.0/src/localbench/coding_exec/score.py +43 -0
- local_bench_ai-0.1.0/src/localbench/data/board_sources.json +68 -0
- local_bench_ai-0.1.0/src/localbench/data/licenses/MIT.txt +19 -0
- local_bench_ai-0.1.0/src/localbench/data/licenses/ODC-BY-1.0.txt +217 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/ATTRIBUTION.md +21 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/CHANGES.md +9 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/BFCL-Apache-2.0 +157 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/IFBench-ODC-BY-1.0 +223 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/IFEval-Apache-2.0 +157 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/LICENSES/MMLU-Pro-MIT +19 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/NOTICE +14 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SCORECARD.json +221 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SHA256SUMS +14 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/SOURCE_REVISIONS.md +16 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/ifbench.jsonl +294 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/itemsets.lock.json +26 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/mmlu_pro.jsonl +400 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/suite.json +101 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/core-text-v1/tc_json_v1.jsonl +330 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/ifbench.jsonl +1 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/itemsets.lock.json +12 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/mmlu_pro.jsonl +1 -0
- local_bench_ai-0.1.0/src/localbench/data/suites/tiny-smoke-v1/suite.json +51 -0
- local_bench_ai-0.1.0/src/localbench/exit_codes.py +11 -0
- local_bench_ai-0.1.0/src/localbench/kld/__init__.py +32 -0
- local_bench_ai-0.1.0/src/localbench/kld/churn.py +72 -0
- local_bench_ai-0.1.0/src/localbench/kld/parse.py +110 -0
- local_bench_ai-0.1.0/src/localbench/kld/run.py +128 -0
- local_bench_ai-0.1.0/src/localbench/lane_conformance.py +196 -0
- local_bench_ai-0.1.0/src/localbench/manifest.py +376 -0
- local_bench_ai-0.1.0/src/localbench/monitor_cli.py +226 -0
- local_bench_ai-0.1.0/src/localbench/monitor_records.py +211 -0
- local_bench_ai-0.1.0/src/localbench/monitoring.py +202 -0
- local_bench_ai-0.1.0/src/localbench/orchestrate.py +1637 -0
- local_bench_ai-0.1.0/src/localbench/persistence.py +73 -0
- local_bench_ai-0.1.0/src/localbench/probe/__init__.py +15 -0
- local_bench_ai-0.1.0/src/localbench/probe/__main__.py +184 -0
- local_bench_ai-0.1.0/src/localbench/probe/_point_biserial.py +74 -0
- local_bench_ai-0.1.0/src/localbench/probe/discrimination.py +439 -0
- local_bench_ai-0.1.0/src/localbench/probe/gates.py +184 -0
- local_bench_ai-0.1.0/src/localbench/prompt_rendering.py +186 -0
- local_bench_ai-0.1.0/src/localbench/providers/__init__.py +56 -0
- local_bench_ai-0.1.0/src/localbench/providers/_anthropic.py +259 -0
- local_bench_ai-0.1.0/src/localbench/providers/_base.py +63 -0
- local_bench_ai-0.1.0/src/localbench/providers/_openai.py +175 -0
- local_bench_ai-0.1.0/src/localbench/reasoning_leaks.py +72 -0
- local_bench_ai-0.1.0/src/localbench/reasoning_registry.py +156 -0
- local_bench_ai-0.1.0/src/localbench/release_test.py +154 -0
- local_bench_ai-0.1.0/src/localbench/run_plan.py +18 -0
- local_bench_ai-0.1.0/src/localbench/run_schema.py +22 -0
- local_bench_ai-0.1.0/src/localbench/runner.py +207 -0
- local_bench_ai-0.1.0/src/localbench/scorers/__init__.py +1 -0
- local_bench_ai-0.1.0/src/localbench/scorers/_reasoning.py +44 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/NOTICE +12 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/__init__.py +6 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_checker.py +180 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_checker_values.py +158 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_parser.py +145 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_prompt.py +62 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/_types.py +20 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl/scorer.py +35 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/NOTICE +12 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/__init__.py +7 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_backend.py +133 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_executor.py +231 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_parser.py +161 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_prompt.py +53 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_sandbox.py +86 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/_types.py +55 -0
- local_bench_ai-0.1.0/src/localbench/scorers/bfcl_multi_turn/scorer.py +66 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/NOTICE +32 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/__init__.py +20 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_count.py +111 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_custom.py +182 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_format.py +193 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_ratio.py +73 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_repeat.py +54 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_sentence.py +102 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_checks_words.py +138 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_types.py +29 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/_util.py +102 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/instructions.py +88 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifbench/scorer.py +80 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/__init__.py +22 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_format.py +96 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_keywords.py +73 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_length.py +102 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_checks_misc.py +86 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_shared.py +80 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_types.py +34 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/_util.py +89 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/instructions.py +55 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ifeval/scorer.py +74 -0
- local_bench_ai-0.1.0/src/localbench/scorers/lcb.py +179 -0
- local_bench_ai-0.1.0/src/localbench/scorers/math_numeric.py +122 -0
- local_bench_ai-0.1.0/src/localbench/scorers/math_symbolic.py +302 -0
- local_bench_ai-0.1.0/src/localbench/scorers/mcq.py +132 -0
- local_bench_ai-0.1.0/src/localbench/scorers/ruler.py +222 -0
- local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/__init__.py +5 -0
- local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/_parser.py +359 -0
- local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/_types.py +79 -0
- local_bench_ai-0.1.0/src/localbench/scorers/tc_json_v1/scorer.py +199 -0
- local_bench_ai-0.1.0/src/localbench/scoring/__init__.py +65 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/__init__.py +10 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/adapter.py +113 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/benchmark.py +276 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/block_introspect.py +81 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/block_parser.py +130 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/chat_client.py +269 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/config.py +24 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/env_host.py +391 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/funnel.py +704 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/hashing.py +82 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/loop_config.py +71 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/loop_types.py +178 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/model_client.py +88 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/observations.py +47 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/parser.py +96 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/prompt.py +142 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/protocol.py +54 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/protocol_c_loop.py +553 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/runner.py +207 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/runner_bootstrap.py +357 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/sandbox.py +701 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/sandbox_protocol.py +76 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/score.py +150 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/scripted_agent.py +251 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/stub_appworld.py +267 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/task_pool.py +98 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/types.py +73 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/wsl_bridge.py +451 -0
- local_bench_ai-0.1.0/src/localbench/scoring/agentic_exec/wsl_worker.py +367 -0
- local_bench_ai-0.1.0/src/localbench/scoring/axes.py +209 -0
- local_bench_ai-0.1.0/src/localbench/scoring/axis_status.py +234 -0
- local_bench_ai-0.1.0/src/localbench/scoring/benchmark_registry.py +106 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board.py +185 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_manifest.py +44 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_scoring.py +502 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_sources.py +78 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_support.py +149 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_systems.py +65 -0
- local_bench_ai-0.1.0/src/localbench/scoring/board_types.py +86 -0
- local_bench_ai-0.1.0/src/localbench/scoring/bootstrap.py +201 -0
- local_bench_ai-0.1.0/src/localbench/scoring/metadata.py +187 -0
- local_bench_ai-0.1.0/src/localbench/scoring/paired_delta.py +320 -0
- local_bench_ai-0.1.0/src/localbench/scoring/public_rescore.py +20 -0
- local_bench_ai-0.1.0/src/localbench/scoring/scorecard.py +121 -0
- local_bench_ai-0.1.0/src/localbench/scoring/signed_score.py +49 -0
- local_bench_ai-0.1.0/src/localbench/scoring/subgroups.py +137 -0
- local_bench_ai-0.1.0/src/localbench/scoring/tc_json_conformance.py +46 -0
- local_bench_ai-0.1.0/src/localbench/scoring/web.py +74 -0
- local_bench_ai-0.1.0/src/localbench/serving/__init__.py +1 -0
- local_bench_ai-0.1.0/src/localbench/serving/assembly.py +328 -0
- local_bench_ai-0.1.0/src/localbench/serving/bench.py +137 -0
- local_bench_ai-0.1.0/src/localbench/serving/fingerprint.py +72 -0
- local_bench_ai-0.1.0/src/localbench/serving/job_object.py +96 -0
- local_bench_ai-0.1.0/src/localbench/serving/llama_cpp.py +213 -0
- local_bench_ai-0.1.0/src/localbench/serving/model_artifact.py +247 -0
- local_bench_ai-0.1.0/src/localbench/serving/options.py +36 -0
- local_bench_ai-0.1.0/src/localbench/serving/process.py +98 -0
- local_bench_ai-0.1.0/src/localbench/serving/provenance.py +250 -0
- local_bench_ai-0.1.0/src/localbench/serving/readiness.py +180 -0
- local_bench_ai-0.1.0/src/localbench/serving/runner.py +258 -0
- local_bench_ai-0.1.0/src/localbench/serving/teardown.py +107 -0
- local_bench_ai-0.1.0/src/localbench/submissions/__init__.py +6 -0
- local_bench_ai-0.1.0/src/localbench/submissions/archive.py +88 -0
- local_bench_ai-0.1.0/src/localbench/submissions/attestation.py +92 -0
- local_bench_ai-0.1.0/src/localbench/submissions/bundle.py +298 -0
- local_bench_ai-0.1.0/src/localbench/submissions/bundle_input.py +32 -0
- local_bench_ai-0.1.0/src/localbench/submissions/canon.py +61 -0
- local_bench_ai-0.1.0/src/localbench/submissions/client.py +326 -0
- local_bench_ai-0.1.0/src/localbench/submissions/contracts.py +30 -0
- local_bench_ai-0.1.0/src/localbench/submissions/crypto.py +190 -0
- local_bench_ai-0.1.0/src/localbench/submissions/dedup.py +17 -0
- local_bench_ai-0.1.0/src/localbench/submissions/divergence.py +92 -0
- local_bench_ai-0.1.0/src/localbench/submissions/foundation.py +419 -0
- local_bench_ai-0.1.0/src/localbench/submissions/foundation_scores.py +128 -0
- local_bench_ai-0.1.0/src/localbench/submissions/keys.py +27 -0
- local_bench_ai-0.1.0/src/localbench/submissions/origin.py +18 -0
- local_bench_ai-0.1.0/src/localbench/submissions/ports.py +46 -0
- local_bench_ai-0.1.0/src/localbench/submissions/projection.py +401 -0
- local_bench_ai-0.1.0/src/localbench/submissions/provenance.py +105 -0
- local_bench_ai-0.1.0/src/localbench/submissions/rescore.py +156 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/__init__.py +1 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/accepted_result_projection_v1.schema.json +45 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/result_bundle_v1.schema.json +59 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_envelope_v1.schema.json +35 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_item_v1.schema.json +18 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_manifest_v1.schema.json +33 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/submission_verification_v1.schema.json +59 -0
- local_bench_ai-0.1.0/src/localbench/submissions/schemas/suite_release_manifest_v1.schema.json +40 -0
- local_bench_ai-0.1.0/src/localbench/submissions/status_update.py +56 -0
- local_bench_ai-0.1.0/src/localbench/submissions/submit_run.py +247 -0
- local_bench_ai-0.1.0/src/localbench/submissions/submit_run_inputs.py +236 -0
- local_bench_ai-0.1.0/src/localbench/submissions/submit_run_output.py +39 -0
- local_bench_ai-0.1.0/src/localbench/submissions/trust.py +11 -0
- local_bench_ai-0.1.0/src/localbench/submissions/validate.py +185 -0
- local_bench_ai-0.1.0/src/localbench/submissions/verify.py +85 -0
- local_bench_ai-0.1.0/src/localbench/suite_bundle.py +261 -0
- local_bench_ai-0.1.0/src/localbench/suite_errors.py +7 -0
- local_bench_ai-0.1.0/src/localbench/suite_release.py +175 -0
- local_bench_ai-0.1.0/src/localbench/suite_resolver.py +469 -0
- local_bench_ai-0.1.0/src/localbench/suite_verify.py +190 -0
- local_bench_ai-0.1.0/src/localbench/supervisor.py +138 -0
- local_bench_ai-0.1.0/src/localbench/tc_json_v1_runner.py +153 -0
- local_bench_ai-0.1.0/tests/test_agentic_appworld_adapter.py +109 -0
- local_bench_ai-0.1.0/tests/test_agentic_failure_policies.py +80 -0
- local_bench_ai-0.1.0/tests/test_agentic_hash_stability.py +56 -0
- local_bench_ai-0.1.0/tests/test_agentic_parser.py +136 -0
- local_bench_ai-0.1.0/tests/test_agentic_protocol_schema.py +55 -0
- local_bench_ai-0.1.0/tests/test_agentic_score_asr.py +140 -0
- local_bench_ai-0.1.0/tests/test_agentic_scripted_runner.py +49 -0
- local_bench_ai-0.1.0/tests/test_agentic_task_pool.py +92 -0
- local_bench_ai-0.1.0/tests/test_agentic_wsl_bridge.py +351 -0
- local_bench_ai-0.1.0/tests/test_agentic_wsl_bridge_acceptance.py +138 -0
- local_bench_ai-0.1.0/tests/test_appworld_c_funnel_units.py +651 -0
- local_bench_ai-0.1.0/tests/test_appworld_protocol_c_acceptance.py +84 -0
- local_bench_ai-0.1.0/tests/test_appworld_protocol_c_gauntlet.py +376 -0
- local_bench_ai-0.1.0/tests/test_appworld_protocol_c_units.py +1009 -0
- local_bench_ai-0.1.0/tests/test_appworld_sandbox_acceptance.py +109 -0
- local_bench_ai-0.1.0/tests/test_appworld_sandbox_units.py +179 -0
- local_bench_ai-0.1.0/tests/test_axes_registry.py +148 -0
- local_bench_ai-0.1.0/tests/test_axis_measurement_status.py +131 -0
- local_bench_ai-0.1.0/tests/test_bfcl.py +282 -0
- local_bench_ai-0.1.0/tests/test_bfcl_multi_turn.py +198 -0
- local_bench_ai-0.1.0/tests/test_board.py +604 -0
- local_bench_ai-0.1.0/tests/test_board_cli.py +32 -0
- local_bench_ai-0.1.0/tests/test_board_manifest.py +82 -0
- local_bench_ai-0.1.0/tests/test_board_provenance.py +55 -0
- local_bench_ai-0.1.0/tests/test_budget_forcing.py +453 -0
- local_bench_ai-0.1.0/tests/test_build_v1_bfcl_multi_turn.py +94 -0
- local_bench_ai-0.1.0/tests/test_build_v1_mmlu_pro.py +128 -0
- local_bench_ai-0.1.0/tests/test_campaign.py +484 -0
- local_bench_ai-0.1.0/tests/test_campaign_contracts.py +110 -0
- local_bench_ai-0.1.0/tests/test_cheat_proxy.py +98 -0
- local_bench_ai-0.1.0/tests/test_cli_axis_measurement_status.py +143 -0
- local_bench_ai-0.1.0/tests/test_cli_bench_exit_codes.py +153 -0
- local_bench_ai-0.1.0/tests/test_coding_exec_harness.py +95 -0
- local_bench_ai-0.1.0/tests/test_coding_exec_orchestrate.py +175 -0
- local_bench_ai-0.1.0/tests/test_coding_exec_sandbox.py +180 -0
- local_bench_ai-0.1.0/tests/test_distribution_cli.py +336 -0
- local_bench_ai-0.1.0/tests/test_gemma_reasoning_mode.py +132 -0
- local_bench_ai-0.1.0/tests/test_genmath_gen.py +143 -0
- local_bench_ai-0.1.0/tests/test_genmath_private.py +171 -0
- local_bench_ai-0.1.0/tests/test_ifbench.py +312 -0
- local_bench_ai-0.1.0/tests/test_ifeval.py +185 -0
- local_bench_ai-0.1.0/tests/test_kld.py +157 -0
- local_bench_ai-0.1.0/tests/test_lane_conformance.py +218 -0
- local_bench_ai-0.1.0/tests/test_lane_enforcement.py +215 -0
- local_bench_ai-0.1.0/tests/test_lcb.py +101 -0
- local_bench_ai-0.1.0/tests/test_math_genmath_parity.py +75 -0
- local_bench_ai-0.1.0/tests/test_math_numeric.py +102 -0
- local_bench_ai-0.1.0/tests/test_math_symbolic.py +144 -0
- local_bench_ai-0.1.0/tests/test_math_symbolic_robustness.py +45 -0
- local_bench_ai-0.1.0/tests/test_mcq.py +225 -0
- local_bench_ai-0.1.0/tests/test_monitoring.py +228 -0
- local_bench_ai-0.1.0/tests/test_online_distribution.py +131 -0
- local_bench_ai-0.1.0/tests/test_orchestrate.py +755 -0
- local_bench_ai-0.1.0/tests/test_orchestrate_agentic.py +328 -0
- local_bench_ai-0.1.0/tests/test_probe_discrimination.py +405 -0
- local_bench_ai-0.1.0/tests/test_probe_gates.py +128 -0
- local_bench_ai-0.1.0/tests/test_provider_orchestrate.py +112 -0
- local_bench_ai-0.1.0/tests/test_provider_profiles.py +539 -0
- local_bench_ai-0.1.0/tests/test_reasoning_registry.py +66 -0
- local_bench_ai-0.1.0/tests/test_release_test.py +44 -0
- local_bench_ai-0.1.0/tests/test_response.py +48 -0
- local_bench_ai-0.1.0/tests/test_response_wrapper_scoring.py +53 -0
- local_bench_ai-0.1.0/tests/test_run_plan.py +261 -0
- local_bench_ai-0.1.0/tests/test_run_record_distribution_schema.py +268 -0
- local_bench_ai-0.1.0/tests/test_runner.py +363 -0
- local_bench_ai-0.1.0/tests/test_runner_reasoning.py +53 -0
- local_bench_ai-0.1.0/tests/test_scorecard.py +83 -0
- local_bench_ai-0.1.0/tests/test_scoring_aggregate.py +92 -0
- local_bench_ai-0.1.0/tests/test_scoring_reasoning_strip.py +317 -0
- local_bench_ai-0.1.0/tests/test_scoring_v1.py +461 -0
- local_bench_ai-0.1.0/tests/test_serving_bench.py +879 -0
- local_bench_ai-0.1.0/tests/test_serving_provenance.py +422 -0
- local_bench_ai-0.1.0/tests/test_serving_teardown.py +133 -0
- local_bench_ai-0.1.0/tests/test_site_parity.py +357 -0
- local_bench_ai-0.1.0/tests/test_submission_slice_d1_migration.py +120 -0
- local_bench_ai-0.1.0/tests/test_suite_bundle.py +66 -0
- local_bench_ai-0.1.0/tests/test_suite_release_manifest.py +166 -0
- local_bench_ai-0.1.0/tests/test_suite_resolver.py +320 -0
- local_bench_ai-0.1.0/tests/test_supervisor.py +80 -0
- local_bench_ai-0.1.0/tests/test_tc_json_conformance_gate.py +151 -0
- local_bench_ai-0.1.0/tests/test_tc_json_v1_items.py +58 -0
- local_bench_ai-0.1.0/tests/test_tc_json_v1_runner.py +82 -0
- local_bench_ai-0.1.0/tests/test_tc_json_v1_scorer.py +312 -0
- local_bench_ai-0.1.0/tests/test_v1_bfcl_axis.py +73 -0
- local_bench_ai-0.1.0/tests/test_v1_bfcl_items.py +181 -0
- local_bench_ai-0.1.0/tests/test_v1_bfcl_multi_turn_axis.py +79 -0
- local_bench_ai-0.1.0/tests/test_v1_bfcl_multi_turn_items.py +162 -0
- local_bench_ai-0.1.0/tests/test_v1_bigcodebench_items.py +45 -0
- local_bench_ai-0.1.0/tests/test_v1_ifbench_axis.py +56 -0
- local_bench_ai-0.1.0/tests/test_v1_ifbench_items.py +118 -0
- local_bench_ai-0.1.0/tests/test_v1_lcb_axis.py +115 -0
- local_bench_ai-0.1.0/tests/test_v1_lcb_items.py +195 -0
- local_bench_ai-0.1.0/tests/test_v1_math_axis.py +80 -0
- local_bench_ai-0.1.0/tests/test_v1_math_items.py +78 -0
- local_bench_ai-0.1.0/tests/test_v1_mmlu_pro_axis.py +88 -0
- local_bench_ai-0.1.0/tests/test_v1_mmlu_pro_items.py +147 -0
- local_bench_ai-0.1.0/tests/test_v1_ruler_axis.py +309 -0
- local_bench_ai-0.1.0/tests/test_verdict_integrity_direct_finalize.py +311 -0
- local_bench_ai-0.1.0/tests/test_wave3_attestation_run_id.py +58 -0
- local_bench_ai-0.1.0/tests/test_wave3_cli_ux.py +222 -0
- local_bench_ai-0.1.0/tests/test_web_build_data.py +817 -0
- local_bench_ai-0.1.0/tests/test_web_scorecard.py +57 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: local-bench-ai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Thin benchmark runner for OpenAI-compatible chat completion endpoints.
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: anyio>=4
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: langdetect>=1.0.9
|
|
9
|
+
Requires-Dist: math-verify>=0.9.0
|
|
10
|
+
Provides-Extra: build
|
|
11
|
+
Requires-Dist: datasets>=2.20; extra == "build"
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
14
|
+
Requires-Dist: nltk>=3.9; extra == "dev"
|
|
15
|
+
Requires-Dist: jinja2>=3.1; extra == "dev"
|
|
16
|
+
Provides-Extra: hf
|
|
17
|
+
Requires-Dist: transformers>=4.51; extra == "hf"
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "local-bench-ai"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Thin benchmark runner for OpenAI-compatible chat completion endpoints."
|
|
9
|
+
requires-python = ">=3.11"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"anyio>=4",
|
|
12
|
+
"httpx>=0.27",
|
|
13
|
+
"langdetect>=1.0.9",
|
|
14
|
+
"math-verify>=0.9.0",
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
[project.scripts]
|
|
18
|
+
localbench = "localbench.cli:main"
|
|
19
|
+
localbench-monitor = "localbench.monitor_cli:main"
|
|
20
|
+
|
|
21
|
+
[project.optional-dependencies]
|
|
22
|
+
build = ["datasets>=2.20"]
|
|
23
|
+
dev = ["pytest>=8", "nltk>=3.9", "jinja2>=3.1"]
|
|
24
|
+
hf = ["transformers>=4.51"]
|
|
25
|
+
|
|
26
|
+
[tool.setuptools.package-dir]
|
|
27
|
+
"" = "src"
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.packages.find]
|
|
30
|
+
where = ["src"]
|
|
31
|
+
|
|
32
|
+
[tool.setuptools.package-data]
|
|
33
|
+
localbench = [
|
|
34
|
+
"data/board_sources.json",
|
|
35
|
+
"data/licenses/*",
|
|
36
|
+
"data/suites/tiny-smoke-v1/*",
|
|
37
|
+
"data/suites/core-text-v1/*",
|
|
38
|
+
"data/suites/core-text-v1/LICENSES/*",
|
|
39
|
+
"scorers/ifbench/NOTICE",
|
|
40
|
+
"scorers/bfcl/NOTICE",
|
|
41
|
+
"scorers/bfcl_multi_turn/NOTICE",
|
|
42
|
+
"submissions/schemas/*.json",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[tool.pytest.ini_options]
|
|
46
|
+
testpaths = ["tests"]
|
|
47
|
+
addopts = ["-ra", "--strict-config", "--strict-markers"]
|
|
48
|
+
markers = [
|
|
49
|
+
"wsl: requires WSL2 AppWorld harness and bubblewrap",
|
|
50
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: local-bench-ai
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Thin benchmark runner for OpenAI-compatible chat completion endpoints.
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: anyio>=4
|
|
7
|
+
Requires-Dist: httpx>=0.27
|
|
8
|
+
Requires-Dist: langdetect>=1.0.9
|
|
9
|
+
Requires-Dist: math-verify>=0.9.0
|
|
10
|
+
Provides-Extra: build
|
|
11
|
+
Requires-Dist: datasets>=2.20; extra == "build"
|
|
12
|
+
Provides-Extra: dev
|
|
13
|
+
Requires-Dist: pytest>=8; extra == "dev"
|
|
14
|
+
Requires-Dist: nltk>=3.9; extra == "dev"
|
|
15
|
+
Requires-Dist: jinja2>=3.1; extra == "dev"
|
|
16
|
+
Provides-Extra: hf
|
|
17
|
+
Requires-Dist: transformers>=4.51; extra == "hf"
|
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
pyproject.toml
|
|
2
|
+
src/local_bench_ai.egg-info/PKG-INFO
|
|
3
|
+
src/local_bench_ai.egg-info/SOURCES.txt
|
|
4
|
+
src/local_bench_ai.egg-info/dependency_links.txt
|
|
5
|
+
src/local_bench_ai.egg-info/entry_points.txt
|
|
6
|
+
src/local_bench_ai.egg-info/requires.txt
|
|
7
|
+
src/local_bench_ai.egg-info/top_level.txt
|
|
8
|
+
src/localbench/__init__.py
|
|
9
|
+
src/localbench/__main__.py
|
|
10
|
+
src/localbench/_requests.py
|
|
11
|
+
src/localbench/_response.py
|
|
12
|
+
src/localbench/_scoring.py
|
|
13
|
+
src/localbench/_suite.py
|
|
14
|
+
src/localbench/_types.py
|
|
15
|
+
src/localbench/budget_forcing.py
|
|
16
|
+
src/localbench/campaign.py
|
|
17
|
+
src/localbench/campaign_checkpoints.py
|
|
18
|
+
src/localbench/campaign_records.py
|
|
19
|
+
src/localbench/cli.py
|
|
20
|
+
src/localbench/exit_codes.py
|
|
21
|
+
src/localbench/lane_conformance.py
|
|
22
|
+
src/localbench/manifest.py
|
|
23
|
+
src/localbench/monitor_cli.py
|
|
24
|
+
src/localbench/monitor_records.py
|
|
25
|
+
src/localbench/monitoring.py
|
|
26
|
+
src/localbench/orchestrate.py
|
|
27
|
+
src/localbench/persistence.py
|
|
28
|
+
src/localbench/prompt_rendering.py
|
|
29
|
+
src/localbench/reasoning_leaks.py
|
|
30
|
+
src/localbench/reasoning_registry.py
|
|
31
|
+
src/localbench/release_test.py
|
|
32
|
+
src/localbench/run_plan.py
|
|
33
|
+
src/localbench/run_schema.py
|
|
34
|
+
src/localbench/runner.py
|
|
35
|
+
src/localbench/suite_bundle.py
|
|
36
|
+
src/localbench/suite_errors.py
|
|
37
|
+
src/localbench/suite_release.py
|
|
38
|
+
src/localbench/suite_resolver.py
|
|
39
|
+
src/localbench/suite_verify.py
|
|
40
|
+
src/localbench/supervisor.py
|
|
41
|
+
src/localbench/tc_json_v1_runner.py
|
|
42
|
+
src/localbench/coding_exec/__init__.py
|
|
43
|
+
src/localbench/coding_exec/extract.py
|
|
44
|
+
src/localbench/coding_exec/orchestrate.py
|
|
45
|
+
src/localbench/coding_exec/program.py
|
|
46
|
+
src/localbench/coding_exec/runner.py
|
|
47
|
+
src/localbench/coding_exec/sandbox.py
|
|
48
|
+
src/localbench/coding_exec/score.py
|
|
49
|
+
src/localbench/data/board_sources.json
|
|
50
|
+
src/localbench/data/licenses/MIT.txt
|
|
51
|
+
src/localbench/data/licenses/ODC-BY-1.0.txt
|
|
52
|
+
src/localbench/data/suites/core-text-v1/ATTRIBUTION.md
|
|
53
|
+
src/localbench/data/suites/core-text-v1/CHANGES.md
|
|
54
|
+
src/localbench/data/suites/core-text-v1/NOTICE
|
|
55
|
+
src/localbench/data/suites/core-text-v1/SCORECARD.json
|
|
56
|
+
src/localbench/data/suites/core-text-v1/SHA256SUMS
|
|
57
|
+
src/localbench/data/suites/core-text-v1/SOURCE_REVISIONS.md
|
|
58
|
+
src/localbench/data/suites/core-text-v1/ifbench.jsonl
|
|
59
|
+
src/localbench/data/suites/core-text-v1/itemsets.lock.json
|
|
60
|
+
src/localbench/data/suites/core-text-v1/mmlu_pro.jsonl
|
|
61
|
+
src/localbench/data/suites/core-text-v1/suite.json
|
|
62
|
+
src/localbench/data/suites/core-text-v1/tc_json_v1.jsonl
|
|
63
|
+
src/localbench/data/suites/core-text-v1/LICENSES/BFCL-Apache-2.0
|
|
64
|
+
src/localbench/data/suites/core-text-v1/LICENSES/IFBench-ODC-BY-1.0
|
|
65
|
+
src/localbench/data/suites/core-text-v1/LICENSES/IFEval-Apache-2.0
|
|
66
|
+
src/localbench/data/suites/core-text-v1/LICENSES/MMLU-Pro-MIT
|
|
67
|
+
src/localbench/data/suites/tiny-smoke-v1/ifbench.jsonl
|
|
68
|
+
src/localbench/data/suites/tiny-smoke-v1/itemsets.lock.json
|
|
69
|
+
src/localbench/data/suites/tiny-smoke-v1/mmlu_pro.jsonl
|
|
70
|
+
src/localbench/data/suites/tiny-smoke-v1/suite.json
|
|
71
|
+
src/localbench/kld/__init__.py
|
|
72
|
+
src/localbench/kld/churn.py
|
|
73
|
+
src/localbench/kld/parse.py
|
|
74
|
+
src/localbench/kld/run.py
|
|
75
|
+
src/localbench/probe/__init__.py
|
|
76
|
+
src/localbench/probe/__main__.py
|
|
77
|
+
src/localbench/probe/_point_biserial.py
|
|
78
|
+
src/localbench/probe/discrimination.py
|
|
79
|
+
src/localbench/probe/gates.py
|
|
80
|
+
src/localbench/providers/__init__.py
|
|
81
|
+
src/localbench/providers/_anthropic.py
|
|
82
|
+
src/localbench/providers/_base.py
|
|
83
|
+
src/localbench/providers/_openai.py
|
|
84
|
+
src/localbench/scorers/__init__.py
|
|
85
|
+
src/localbench/scorers/_reasoning.py
|
|
86
|
+
src/localbench/scorers/lcb.py
|
|
87
|
+
src/localbench/scorers/math_numeric.py
|
|
88
|
+
src/localbench/scorers/math_symbolic.py
|
|
89
|
+
src/localbench/scorers/mcq.py
|
|
90
|
+
src/localbench/scorers/ruler.py
|
|
91
|
+
src/localbench/scorers/bfcl/NOTICE
|
|
92
|
+
src/localbench/scorers/bfcl/__init__.py
|
|
93
|
+
src/localbench/scorers/bfcl/_checker.py
|
|
94
|
+
src/localbench/scorers/bfcl/_checker_values.py
|
|
95
|
+
src/localbench/scorers/bfcl/_parser.py
|
|
96
|
+
src/localbench/scorers/bfcl/_prompt.py
|
|
97
|
+
src/localbench/scorers/bfcl/_types.py
|
|
98
|
+
src/localbench/scorers/bfcl/scorer.py
|
|
99
|
+
src/localbench/scorers/bfcl_multi_turn/NOTICE
|
|
100
|
+
src/localbench/scorers/bfcl_multi_turn/__init__.py
|
|
101
|
+
src/localbench/scorers/bfcl_multi_turn/_backend.py
|
|
102
|
+
src/localbench/scorers/bfcl_multi_turn/_executor.py
|
|
103
|
+
src/localbench/scorers/bfcl_multi_turn/_parser.py
|
|
104
|
+
src/localbench/scorers/bfcl_multi_turn/_prompt.py
|
|
105
|
+
src/localbench/scorers/bfcl_multi_turn/_sandbox.py
|
|
106
|
+
src/localbench/scorers/bfcl_multi_turn/_types.py
|
|
107
|
+
src/localbench/scorers/bfcl_multi_turn/scorer.py
|
|
108
|
+
src/localbench/scorers/ifbench/NOTICE
|
|
109
|
+
src/localbench/scorers/ifbench/__init__.py
|
|
110
|
+
src/localbench/scorers/ifbench/_checks_count.py
|
|
111
|
+
src/localbench/scorers/ifbench/_checks_custom.py
|
|
112
|
+
src/localbench/scorers/ifbench/_checks_format.py
|
|
113
|
+
src/localbench/scorers/ifbench/_checks_ratio.py
|
|
114
|
+
src/localbench/scorers/ifbench/_checks_repeat.py
|
|
115
|
+
src/localbench/scorers/ifbench/_checks_sentence.py
|
|
116
|
+
src/localbench/scorers/ifbench/_checks_words.py
|
|
117
|
+
src/localbench/scorers/ifbench/_types.py
|
|
118
|
+
src/localbench/scorers/ifbench/_util.py
|
|
119
|
+
src/localbench/scorers/ifbench/instructions.py
|
|
120
|
+
src/localbench/scorers/ifbench/scorer.py
|
|
121
|
+
src/localbench/scorers/ifeval/__init__.py
|
|
122
|
+
src/localbench/scorers/ifeval/_checks_format.py
|
|
123
|
+
src/localbench/scorers/ifeval/_checks_keywords.py
|
|
124
|
+
src/localbench/scorers/ifeval/_checks_length.py
|
|
125
|
+
src/localbench/scorers/ifeval/_checks_misc.py
|
|
126
|
+
src/localbench/scorers/ifeval/_shared.py
|
|
127
|
+
src/localbench/scorers/ifeval/_types.py
|
|
128
|
+
src/localbench/scorers/ifeval/_util.py
|
|
129
|
+
src/localbench/scorers/ifeval/instructions.py
|
|
130
|
+
src/localbench/scorers/ifeval/scorer.py
|
|
131
|
+
src/localbench/scorers/tc_json_v1/__init__.py
|
|
132
|
+
src/localbench/scorers/tc_json_v1/_parser.py
|
|
133
|
+
src/localbench/scorers/tc_json_v1/_types.py
|
|
134
|
+
src/localbench/scorers/tc_json_v1/scorer.py
|
|
135
|
+
src/localbench/scoring/__init__.py
|
|
136
|
+
src/localbench/scoring/axes.py
|
|
137
|
+
src/localbench/scoring/axis_status.py
|
|
138
|
+
src/localbench/scoring/benchmark_registry.py
|
|
139
|
+
src/localbench/scoring/board.py
|
|
140
|
+
src/localbench/scoring/board_manifest.py
|
|
141
|
+
src/localbench/scoring/board_scoring.py
|
|
142
|
+
src/localbench/scoring/board_sources.py
|
|
143
|
+
src/localbench/scoring/board_support.py
|
|
144
|
+
src/localbench/scoring/board_systems.py
|
|
145
|
+
src/localbench/scoring/board_types.py
|
|
146
|
+
src/localbench/scoring/bootstrap.py
|
|
147
|
+
src/localbench/scoring/metadata.py
|
|
148
|
+
src/localbench/scoring/paired_delta.py
|
|
149
|
+
src/localbench/scoring/public_rescore.py
|
|
150
|
+
src/localbench/scoring/scorecard.py
|
|
151
|
+
src/localbench/scoring/signed_score.py
|
|
152
|
+
src/localbench/scoring/subgroups.py
|
|
153
|
+
src/localbench/scoring/tc_json_conformance.py
|
|
154
|
+
src/localbench/scoring/web.py
|
|
155
|
+
src/localbench/scoring/agentic_exec/__init__.py
|
|
156
|
+
src/localbench/scoring/agentic_exec/adapter.py
|
|
157
|
+
src/localbench/scoring/agentic_exec/benchmark.py
|
|
158
|
+
src/localbench/scoring/agentic_exec/block_introspect.py
|
|
159
|
+
src/localbench/scoring/agentic_exec/block_parser.py
|
|
160
|
+
src/localbench/scoring/agentic_exec/chat_client.py
|
|
161
|
+
src/localbench/scoring/agentic_exec/config.py
|
|
162
|
+
src/localbench/scoring/agentic_exec/env_host.py
|
|
163
|
+
src/localbench/scoring/agentic_exec/funnel.py
|
|
164
|
+
src/localbench/scoring/agentic_exec/hashing.py
|
|
165
|
+
src/localbench/scoring/agentic_exec/loop_config.py
|
|
166
|
+
src/localbench/scoring/agentic_exec/loop_types.py
|
|
167
|
+
src/localbench/scoring/agentic_exec/model_client.py
|
|
168
|
+
src/localbench/scoring/agentic_exec/observations.py
|
|
169
|
+
src/localbench/scoring/agentic_exec/parser.py
|
|
170
|
+
src/localbench/scoring/agentic_exec/prompt.py
|
|
171
|
+
src/localbench/scoring/agentic_exec/protocol.py
|
|
172
|
+
src/localbench/scoring/agentic_exec/protocol_c_loop.py
|
|
173
|
+
src/localbench/scoring/agentic_exec/runner.py
|
|
174
|
+
src/localbench/scoring/agentic_exec/runner_bootstrap.py
|
|
175
|
+
src/localbench/scoring/agentic_exec/sandbox.py
|
|
176
|
+
src/localbench/scoring/agentic_exec/sandbox_protocol.py
|
|
177
|
+
src/localbench/scoring/agentic_exec/score.py
|
|
178
|
+
src/localbench/scoring/agentic_exec/scripted_agent.py
|
|
179
|
+
src/localbench/scoring/agentic_exec/stub_appworld.py
|
|
180
|
+
src/localbench/scoring/agentic_exec/task_pool.py
|
|
181
|
+
src/localbench/scoring/agentic_exec/types.py
|
|
182
|
+
src/localbench/scoring/agentic_exec/wsl_bridge.py
|
|
183
|
+
src/localbench/scoring/agentic_exec/wsl_worker.py
|
|
184
|
+
src/localbench/serving/__init__.py
|
|
185
|
+
src/localbench/serving/assembly.py
|
|
186
|
+
src/localbench/serving/bench.py
|
|
187
|
+
src/localbench/serving/fingerprint.py
|
|
188
|
+
src/localbench/serving/job_object.py
|
|
189
|
+
src/localbench/serving/llama_cpp.py
|
|
190
|
+
src/localbench/serving/model_artifact.py
|
|
191
|
+
src/localbench/serving/options.py
|
|
192
|
+
src/localbench/serving/process.py
|
|
193
|
+
src/localbench/serving/provenance.py
|
|
194
|
+
src/localbench/serving/readiness.py
|
|
195
|
+
src/localbench/serving/runner.py
|
|
196
|
+
src/localbench/serving/teardown.py
|
|
197
|
+
src/localbench/submissions/__init__.py
|
|
198
|
+
src/localbench/submissions/archive.py
|
|
199
|
+
src/localbench/submissions/attestation.py
|
|
200
|
+
src/localbench/submissions/bundle.py
|
|
201
|
+
src/localbench/submissions/bundle_input.py
|
|
202
|
+
src/localbench/submissions/canon.py
|
|
203
|
+
src/localbench/submissions/client.py
|
|
204
|
+
src/localbench/submissions/contracts.py
|
|
205
|
+
src/localbench/submissions/crypto.py
|
|
206
|
+
src/localbench/submissions/dedup.py
|
|
207
|
+
src/localbench/submissions/divergence.py
|
|
208
|
+
src/localbench/submissions/foundation.py
|
|
209
|
+
src/localbench/submissions/foundation_scores.py
|
|
210
|
+
src/localbench/submissions/keys.py
|
|
211
|
+
src/localbench/submissions/origin.py
|
|
212
|
+
src/localbench/submissions/ports.py
|
|
213
|
+
src/localbench/submissions/projection.py
|
|
214
|
+
src/localbench/submissions/provenance.py
|
|
215
|
+
src/localbench/submissions/rescore.py
|
|
216
|
+
src/localbench/submissions/status_update.py
|
|
217
|
+
src/localbench/submissions/submit_run.py
|
|
218
|
+
src/localbench/submissions/submit_run_inputs.py
|
|
219
|
+
src/localbench/submissions/submit_run_output.py
|
|
220
|
+
src/localbench/submissions/trust.py
|
|
221
|
+
src/localbench/submissions/validate.py
|
|
222
|
+
src/localbench/submissions/verify.py
|
|
223
|
+
src/localbench/submissions/schemas/__init__.py
|
|
224
|
+
src/localbench/submissions/schemas/accepted_result_projection_v1.schema.json
|
|
225
|
+
src/localbench/submissions/schemas/result_bundle_v1.schema.json
|
|
226
|
+
src/localbench/submissions/schemas/submission_envelope_v1.schema.json
|
|
227
|
+
src/localbench/submissions/schemas/submission_item_v1.schema.json
|
|
228
|
+
src/localbench/submissions/schemas/submission_manifest_v1.schema.json
|
|
229
|
+
src/localbench/submissions/schemas/submission_verification_v1.schema.json
|
|
230
|
+
src/localbench/submissions/schemas/suite_release_manifest_v1.schema.json
|
|
231
|
+
tests/test_agentic_appworld_adapter.py
|
|
232
|
+
tests/test_agentic_failure_policies.py
|
|
233
|
+
tests/test_agentic_hash_stability.py
|
|
234
|
+
tests/test_agentic_parser.py
|
|
235
|
+
tests/test_agentic_protocol_schema.py
|
|
236
|
+
tests/test_agentic_score_asr.py
|
|
237
|
+
tests/test_agentic_scripted_runner.py
|
|
238
|
+
tests/test_agentic_task_pool.py
|
|
239
|
+
tests/test_agentic_wsl_bridge.py
|
|
240
|
+
tests/test_agentic_wsl_bridge_acceptance.py
|
|
241
|
+
tests/test_appworld_c_funnel_units.py
|
|
242
|
+
tests/test_appworld_protocol_c_acceptance.py
|
|
243
|
+
tests/test_appworld_protocol_c_gauntlet.py
|
|
244
|
+
tests/test_appworld_protocol_c_units.py
|
|
245
|
+
tests/test_appworld_sandbox_acceptance.py
|
|
246
|
+
tests/test_appworld_sandbox_units.py
|
|
247
|
+
tests/test_axes_registry.py
|
|
248
|
+
tests/test_axis_measurement_status.py
|
|
249
|
+
tests/test_bfcl.py
|
|
250
|
+
tests/test_bfcl_multi_turn.py
|
|
251
|
+
tests/test_board.py
|
|
252
|
+
tests/test_board_cli.py
|
|
253
|
+
tests/test_board_manifest.py
|
|
254
|
+
tests/test_board_provenance.py
|
|
255
|
+
tests/test_budget_forcing.py
|
|
256
|
+
tests/test_build_v1_bfcl_multi_turn.py
|
|
257
|
+
tests/test_build_v1_mmlu_pro.py
|
|
258
|
+
tests/test_campaign.py
|
|
259
|
+
tests/test_campaign_contracts.py
|
|
260
|
+
tests/test_cheat_proxy.py
|
|
261
|
+
tests/test_cli_axis_measurement_status.py
|
|
262
|
+
tests/test_cli_bench_exit_codes.py
|
|
263
|
+
tests/test_coding_exec_harness.py
|
|
264
|
+
tests/test_coding_exec_orchestrate.py
|
|
265
|
+
tests/test_coding_exec_sandbox.py
|
|
266
|
+
tests/test_distribution_cli.py
|
|
267
|
+
tests/test_gemma_reasoning_mode.py
|
|
268
|
+
tests/test_genmath_gen.py
|
|
269
|
+
tests/test_genmath_private.py
|
|
270
|
+
tests/test_ifbench.py
|
|
271
|
+
tests/test_ifeval.py
|
|
272
|
+
tests/test_kld.py
|
|
273
|
+
tests/test_lane_conformance.py
|
|
274
|
+
tests/test_lane_enforcement.py
|
|
275
|
+
tests/test_lcb.py
|
|
276
|
+
tests/test_math_genmath_parity.py
|
|
277
|
+
tests/test_math_numeric.py
|
|
278
|
+
tests/test_math_symbolic.py
|
|
279
|
+
tests/test_math_symbolic_robustness.py
|
|
280
|
+
tests/test_mcq.py
|
|
281
|
+
tests/test_monitoring.py
|
|
282
|
+
tests/test_online_distribution.py
|
|
283
|
+
tests/test_orchestrate.py
|
|
284
|
+
tests/test_orchestrate_agentic.py
|
|
285
|
+
tests/test_probe_discrimination.py
|
|
286
|
+
tests/test_probe_gates.py
|
|
287
|
+
tests/test_provider_orchestrate.py
|
|
288
|
+
tests/test_provider_profiles.py
|
|
289
|
+
tests/test_reasoning_registry.py
|
|
290
|
+
tests/test_release_test.py
|
|
291
|
+
tests/test_response.py
|
|
292
|
+
tests/test_response_wrapper_scoring.py
|
|
293
|
+
tests/test_run_plan.py
|
|
294
|
+
tests/test_run_record_distribution_schema.py
|
|
295
|
+
tests/test_runner.py
|
|
296
|
+
tests/test_runner_reasoning.py
|
|
297
|
+
tests/test_scorecard.py
|
|
298
|
+
tests/test_scoring_aggregate.py
|
|
299
|
+
tests/test_scoring_reasoning_strip.py
|
|
300
|
+
tests/test_scoring_v1.py
|
|
301
|
+
tests/test_serving_bench.py
|
|
302
|
+
tests/test_serving_provenance.py
|
|
303
|
+
tests/test_serving_teardown.py
|
|
304
|
+
tests/test_site_parity.py
|
|
305
|
+
tests/test_submission_slice_d1_migration.py
|
|
306
|
+
tests/test_suite_bundle.py
|
|
307
|
+
tests/test_suite_release_manifest.py
|
|
308
|
+
tests/test_suite_resolver.py
|
|
309
|
+
tests/test_supervisor.py
|
|
310
|
+
tests/test_tc_json_conformance_gate.py
|
|
311
|
+
tests/test_tc_json_v1_items.py
|
|
312
|
+
tests/test_tc_json_v1_runner.py
|
|
313
|
+
tests/test_tc_json_v1_scorer.py
|
|
314
|
+
tests/test_v1_bfcl_axis.py
|
|
315
|
+
tests/test_v1_bfcl_items.py
|
|
316
|
+
tests/test_v1_bfcl_multi_turn_axis.py
|
|
317
|
+
tests/test_v1_bfcl_multi_turn_items.py
|
|
318
|
+
tests/test_v1_bigcodebench_items.py
|
|
319
|
+
tests/test_v1_ifbench_axis.py
|
|
320
|
+
tests/test_v1_ifbench_items.py
|
|
321
|
+
tests/test_v1_lcb_axis.py
|
|
322
|
+
tests/test_v1_lcb_items.py
|
|
323
|
+
tests/test_v1_math_axis.py
|
|
324
|
+
tests/test_v1_math_items.py
|
|
325
|
+
tests/test_v1_mmlu_pro_axis.py
|
|
326
|
+
tests/test_v1_mmlu_pro_items.py
|
|
327
|
+
tests/test_v1_ruler_axis.py
|
|
328
|
+
tests/test_verdict_integrity_direct_finalize.py
|
|
329
|
+
tests/test_wave3_attestation_run_id.py
|
|
330
|
+
tests/test_wave3_cli_ux.py
|
|
331
|
+
tests/test_web_build_data.py
|
|
332
|
+
tests/test_web_scorecard.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
localbench
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Local benchmark runner package."""
|