infereval 0.5.10__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infereval-0.5.10/.gitignore +85 -0
- infereval-0.5.10/CHANGELOG.md +1208 -0
- infereval-0.5.10/LICENSE +21 -0
- infereval-0.5.10/PKG-INFO +233 -0
- infereval-0.5.10/README.md +161 -0
- infereval-0.5.10/docs/README.md +40 -0
- infereval-0.5.10/examples/pulmonary_edema/README.md +64 -0
- infereval-0.5.10/examples/pulmonary_edema/benchmark.json +1204 -0
- infereval-0.5.10/examples/stop_sign/README.md +24 -0
- infereval-0.5.10/examples/stop_sign/benchmark.json +149 -0
- infereval-0.5.10/pyproject.toml +128 -0
- infereval-0.5.10/src/infereval/__init__.py +21 -0
- infereval-0.5.10/src/infereval/benchmark.py +679 -0
- infereval-0.5.10/src/infereval/cli/__init__.py +1 -0
- infereval-0.5.10/src/infereval/cli/describe_cmd.py +732 -0
- infereval-0.5.10/src/infereval/cli/evaluate_cmd.py +344 -0
- infereval-0.5.10/src/infereval/cli/main.py +48 -0
- infereval-0.5.10/src/infereval/cli/metrics_cmd.py +254 -0
- infereval-0.5.10/src/infereval/cli/model_cmd.py +155 -0
- infereval-0.5.10/src/infereval/cli/report_cmd.py +172 -0
- infereval-0.5.10/src/infereval/cli/structure_cmd.py +158 -0
- infereval-0.5.10/src/infereval/cli/sweep_cmd.py +217 -0
- infereval-0.5.10/src/infereval/cli/validate_cmd.py +85 -0
- infereval-0.5.10/src/infereval/context.py +176 -0
- infereval-0.5.10/src/infereval/endorsement.py +356 -0
- infereval-0.5.10/src/infereval/evaluation.py +451 -0
- infereval-0.5.10/src/infereval/frame.py +103 -0
- infereval-0.5.10/src/infereval/logging_setup.py +213 -0
- infereval-0.5.10/src/infereval/metrics.py +559 -0
- infereval-0.5.10/src/infereval/modeling.py +338 -0
- infereval-0.5.10/src/infereval/prompts.py +152 -0
- infereval-0.5.10/src/infereval/providers/__init__.py +83 -0
- infereval-0.5.10/src/infereval/providers/anthropic.py +216 -0
- infereval-0.5.10/src/infereval/providers/base.py +273 -0
- infereval-0.5.10/src/infereval/providers/mock.py +270 -0
- infereval-0.5.10/src/infereval/providers/openai.py +233 -0
- infereval-0.5.10/src/infereval/providers/openrouter.py +70 -0
- infereval-0.5.10/src/infereval/py.typed +0 -0
- infereval-0.5.10/src/infereval/report.py +782 -0
- infereval-0.5.10/src/infereval/schemas/__init__.py +103 -0
- infereval-0.5.10/src/infereval/schemas/benchmark.schema.json +641 -0
- infereval-0.5.10/src/infereval/schemas/evaluation.schema.json +536 -0
- infereval-0.5.10/src/infereval/structure.py +417 -0
- infereval-0.5.10/src/infereval/sweep.py +259 -0
- infereval-0.5.10/src/infereval/types.py +117 -0
- infereval-0.5.10/tests/__init__.py +0 -0
- infereval-0.5.10/tests/conftest.py +166 -0
- infereval-0.5.10/tests/fixtures/__init__.py +0 -0
- infereval-0.5.10/tests/fixtures/build_stop_sign_replay.py +133 -0
- infereval-0.5.10/tests/fixtures/stop_sign_replay.jsonl +20 -0
- infereval-0.5.10/tests/integration/__init__.py +0 -0
- infereval-0.5.10/tests/integration/test_providers_live.py +73 -0
- infereval-0.5.10/tests/unit/__init__.py +0 -0
- infereval-0.5.10/tests/unit/test_analyst_rationales_propagation.py +242 -0
- infereval-0.5.10/tests/unit/test_benchmark_io.py +997 -0
- infereval-0.5.10/tests/unit/test_cli_describe.py +669 -0
- infereval-0.5.10/tests/unit/test_cli_evaluate.py +468 -0
- infereval-0.5.10/tests/unit/test_cli_metrics.py +237 -0
- infereval-0.5.10/tests/unit/test_cli_validate.py +126 -0
- infereval-0.5.10/tests/unit/test_context.py +135 -0
- infereval-0.5.10/tests/unit/test_endorsement.py +471 -0
- infereval-0.5.10/tests/unit/test_evaluate.py +327 -0
- infereval-0.5.10/tests/unit/test_evaluation_io.py +218 -0
- infereval-0.5.10/tests/unit/test_frame.py +122 -0
- infereval-0.5.10/tests/unit/test_logging_e2e.py +205 -0
- infereval-0.5.10/tests/unit/test_logging_setup.py +231 -0
- infereval-0.5.10/tests/unit/test_majority_vote.py +124 -0
- infereval-0.5.10/tests/unit/test_metrics_basic.py +152 -0
- infereval-0.5.10/tests/unit/test_metrics_cohen.py +144 -0
- infereval-0.5.10/tests/unit/test_metrics_fleiss.py +282 -0
- infereval-0.5.10/tests/unit/test_metrics_report.py +140 -0
- infereval-0.5.10/tests/unit/test_metrics_stop_sign.py +162 -0
- infereval-0.5.10/tests/unit/test_modeling.py +218 -0
- infereval-0.5.10/tests/unit/test_prompts.py +181 -0
- infereval-0.5.10/tests/unit/test_provider_anthropic.py +289 -0
- infereval-0.5.10/tests/unit/test_provider_base.py +174 -0
- infereval-0.5.10/tests/unit/test_provider_mock.py +63 -0
- infereval-0.5.10/tests/unit/test_provider_openai.py +279 -0
- infereval-0.5.10/tests/unit/test_provider_openrouter.py +83 -0
- infereval-0.5.10/tests/unit/test_provider_registry.py +32 -0
- infereval-0.5.10/tests/unit/test_provider_replay.py +197 -0
- infereval-0.5.10/tests/unit/test_replay_e2e.py +243 -0
- infereval-0.5.10/tests/unit/test_report.py +748 -0
- infereval-0.5.10/tests/unit/test_schemas.py +100 -0
- infereval-0.5.10/tests/unit/test_smoke.py +35 -0
- infereval-0.5.10/tests/unit/test_structure.py +408 -0
- infereval-0.5.10/tests/unit/test_sweep.py +198 -0
- infereval-0.5.10/tests/unit/test_types.py +103 -0
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
.Python
|
|
7
|
+
build/
|
|
8
|
+
develop-eggs/
|
|
9
|
+
dist/
|
|
10
|
+
downloads/
|
|
11
|
+
eggs/
|
|
12
|
+
.eggs/
|
|
13
|
+
lib/
|
|
14
|
+
lib64/
|
|
15
|
+
parts/
|
|
16
|
+
sdist/
|
|
17
|
+
var/
|
|
18
|
+
wheels/
|
|
19
|
+
share/python-wheels/
|
|
20
|
+
*.egg-info/
|
|
21
|
+
.installed.cfg
|
|
22
|
+
*.egg
|
|
23
|
+
MANIFEST
|
|
24
|
+
|
|
25
|
+
# Virtual environments
|
|
26
|
+
.venv/
|
|
27
|
+
venv/
|
|
28
|
+
env/
|
|
29
|
+
ENV/
|
|
30
|
+
|
|
31
|
+
# Testing / coverage
|
|
32
|
+
.pytest_cache/
|
|
33
|
+
.coverage
|
|
34
|
+
.coverage.*
|
|
35
|
+
htmlcov/
|
|
36
|
+
.tox/
|
|
37
|
+
.nox/
|
|
38
|
+
coverage.xml
|
|
39
|
+
*.cover
|
|
40
|
+
.hypothesis/
|
|
41
|
+
|
|
42
|
+
# Type checkers
|
|
43
|
+
.mypy_cache/
|
|
44
|
+
.dmypy.json
|
|
45
|
+
.pyre/
|
|
46
|
+
.pytype/
|
|
47
|
+
.ruff_cache/
|
|
48
|
+
|
|
49
|
+
# Editors / OS
|
|
50
|
+
.idea/
|
|
51
|
+
.vscode/
|
|
52
|
+
*.swp
|
|
53
|
+
*.swo
|
|
54
|
+
.DS_Store
|
|
55
|
+
|
|
56
|
+
# Logs (research artifacts opt out — keep examples/* but not stray logs)
|
|
57
|
+
logs/
|
|
58
|
+
*.log
|
|
59
|
+
|
|
60
|
+
# Per-experiment outputs (regenerable; the scripts are tracked, not their output).
|
|
61
|
+
# Committed result sets live in experiments/results/, which is tracked.
|
|
62
|
+
experiments/out/
|
|
63
|
+
|
|
64
|
+
# LaTeX build artifacts
|
|
65
|
+
*.aux
|
|
66
|
+
*.bbl
|
|
67
|
+
*.bcf
|
|
68
|
+
*.blg
|
|
69
|
+
*.fdb_latexmk
|
|
70
|
+
*.fls
|
|
71
|
+
*.lof
|
|
72
|
+
*.log
|
|
73
|
+
*.lot
|
|
74
|
+
*.out
|
|
75
|
+
*.run.xml
|
|
76
|
+
*.synctex.gz
|
|
77
|
+
*.toc
|
|
78
|
+
revised.pdf
|
|
79
|
+
paper.pdf
|
|
80
|
+
|
|
81
|
+
# Local env
|
|
82
|
+
.env
|
|
83
|
+
.env.local
|
|
84
|
+
site/
|
|
85
|
+
.cache/
|