infereval 0.5.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. infereval-0.5.10/.gitignore +85 -0
  2. infereval-0.5.10/CHANGELOG.md +1208 -0
  3. infereval-0.5.10/LICENSE +21 -0
  4. infereval-0.5.10/PKG-INFO +233 -0
  5. infereval-0.5.10/README.md +161 -0
  6. infereval-0.5.10/docs/README.md +40 -0
  7. infereval-0.5.10/examples/pulmonary_edema/README.md +64 -0
  8. infereval-0.5.10/examples/pulmonary_edema/benchmark.json +1204 -0
  9. infereval-0.5.10/examples/stop_sign/README.md +24 -0
  10. infereval-0.5.10/examples/stop_sign/benchmark.json +149 -0
  11. infereval-0.5.10/pyproject.toml +128 -0
  12. infereval-0.5.10/src/infereval/__init__.py +21 -0
  13. infereval-0.5.10/src/infereval/benchmark.py +679 -0
  14. infereval-0.5.10/src/infereval/cli/__init__.py +1 -0
  15. infereval-0.5.10/src/infereval/cli/describe_cmd.py +732 -0
  16. infereval-0.5.10/src/infereval/cli/evaluate_cmd.py +344 -0
  17. infereval-0.5.10/src/infereval/cli/main.py +48 -0
  18. infereval-0.5.10/src/infereval/cli/metrics_cmd.py +254 -0
  19. infereval-0.5.10/src/infereval/cli/model_cmd.py +155 -0
  20. infereval-0.5.10/src/infereval/cli/report_cmd.py +172 -0
  21. infereval-0.5.10/src/infereval/cli/structure_cmd.py +158 -0
  22. infereval-0.5.10/src/infereval/cli/sweep_cmd.py +217 -0
  23. infereval-0.5.10/src/infereval/cli/validate_cmd.py +85 -0
  24. infereval-0.5.10/src/infereval/context.py +176 -0
  25. infereval-0.5.10/src/infereval/endorsement.py +356 -0
  26. infereval-0.5.10/src/infereval/evaluation.py +451 -0
  27. infereval-0.5.10/src/infereval/frame.py +103 -0
  28. infereval-0.5.10/src/infereval/logging_setup.py +213 -0
  29. infereval-0.5.10/src/infereval/metrics.py +559 -0
  30. infereval-0.5.10/src/infereval/modeling.py +338 -0
  31. infereval-0.5.10/src/infereval/prompts.py +152 -0
  32. infereval-0.5.10/src/infereval/providers/__init__.py +83 -0
  33. infereval-0.5.10/src/infereval/providers/anthropic.py +216 -0
  34. infereval-0.5.10/src/infereval/providers/base.py +273 -0
  35. infereval-0.5.10/src/infereval/providers/mock.py +270 -0
  36. infereval-0.5.10/src/infereval/providers/openai.py +233 -0
  37. infereval-0.5.10/src/infereval/providers/openrouter.py +70 -0
  38. infereval-0.5.10/src/infereval/py.typed +0 -0
  39. infereval-0.5.10/src/infereval/report.py +782 -0
  40. infereval-0.5.10/src/infereval/schemas/__init__.py +103 -0
  41. infereval-0.5.10/src/infereval/schemas/benchmark.schema.json +641 -0
  42. infereval-0.5.10/src/infereval/schemas/evaluation.schema.json +536 -0
  43. infereval-0.5.10/src/infereval/structure.py +417 -0
  44. infereval-0.5.10/src/infereval/sweep.py +259 -0
  45. infereval-0.5.10/src/infereval/types.py +117 -0
  46. infereval-0.5.10/tests/__init__.py +0 -0
  47. infereval-0.5.10/tests/conftest.py +166 -0
  48. infereval-0.5.10/tests/fixtures/__init__.py +0 -0
  49. infereval-0.5.10/tests/fixtures/build_stop_sign_replay.py +133 -0
  50. infereval-0.5.10/tests/fixtures/stop_sign_replay.jsonl +20 -0
  51. infereval-0.5.10/tests/integration/__init__.py +0 -0
  52. infereval-0.5.10/tests/integration/test_providers_live.py +73 -0
  53. infereval-0.5.10/tests/unit/__init__.py +0 -0
  54. infereval-0.5.10/tests/unit/test_analyst_rationales_propagation.py +242 -0
  55. infereval-0.5.10/tests/unit/test_benchmark_io.py +997 -0
  56. infereval-0.5.10/tests/unit/test_cli_describe.py +669 -0
  57. infereval-0.5.10/tests/unit/test_cli_evaluate.py +468 -0
  58. infereval-0.5.10/tests/unit/test_cli_metrics.py +237 -0
  59. infereval-0.5.10/tests/unit/test_cli_validate.py +126 -0
  60. infereval-0.5.10/tests/unit/test_context.py +135 -0
  61. infereval-0.5.10/tests/unit/test_endorsement.py +471 -0
  62. infereval-0.5.10/tests/unit/test_evaluate.py +327 -0
  63. infereval-0.5.10/tests/unit/test_evaluation_io.py +218 -0
  64. infereval-0.5.10/tests/unit/test_frame.py +122 -0
  65. infereval-0.5.10/tests/unit/test_logging_e2e.py +205 -0
  66. infereval-0.5.10/tests/unit/test_logging_setup.py +231 -0
  67. infereval-0.5.10/tests/unit/test_majority_vote.py +124 -0
  68. infereval-0.5.10/tests/unit/test_metrics_basic.py +152 -0
  69. infereval-0.5.10/tests/unit/test_metrics_cohen.py +144 -0
  70. infereval-0.5.10/tests/unit/test_metrics_fleiss.py +282 -0
  71. infereval-0.5.10/tests/unit/test_metrics_report.py +140 -0
  72. infereval-0.5.10/tests/unit/test_metrics_stop_sign.py +162 -0
  73. infereval-0.5.10/tests/unit/test_modeling.py +218 -0
  74. infereval-0.5.10/tests/unit/test_prompts.py +181 -0
  75. infereval-0.5.10/tests/unit/test_provider_anthropic.py +289 -0
  76. infereval-0.5.10/tests/unit/test_provider_base.py +174 -0
  77. infereval-0.5.10/tests/unit/test_provider_mock.py +63 -0
  78. infereval-0.5.10/tests/unit/test_provider_openai.py +279 -0
  79. infereval-0.5.10/tests/unit/test_provider_openrouter.py +83 -0
  80. infereval-0.5.10/tests/unit/test_provider_registry.py +32 -0
  81. infereval-0.5.10/tests/unit/test_provider_replay.py +197 -0
  82. infereval-0.5.10/tests/unit/test_replay_e2e.py +243 -0
  83. infereval-0.5.10/tests/unit/test_report.py +748 -0
  84. infereval-0.5.10/tests/unit/test_schemas.py +100 -0
  85. infereval-0.5.10/tests/unit/test_smoke.py +35 -0
  86. infereval-0.5.10/tests/unit/test_structure.py +408 -0
  87. infereval-0.5.10/tests/unit/test_sweep.py +198 -0
  88. infereval-0.5.10/tests/unit/test_types.py +103 -0
@@ -0,0 +1,85 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ share/python-wheels/
20
+ *.egg-info/
21
+ .installed.cfg
22
+ *.egg
23
+ MANIFEST
24
+
25
+ # Virtual environments
26
+ .venv/
27
+ venv/
28
+ env/
29
+ ENV/
30
+
31
+ # Testing / coverage
32
+ .pytest_cache/
33
+ .coverage
34
+ .coverage.*
35
+ htmlcov/
36
+ .tox/
37
+ .nox/
38
+ coverage.xml
39
+ *.cover
40
+ .hypothesis/
41
+
42
+ # Type checkers
43
+ .mypy_cache/
44
+ .dmypy.json
45
+ .pyre/
46
+ .pytype/
47
+ .ruff_cache/
48
+
49
+ # Editors / OS
50
+ .idea/
51
+ .vscode/
52
+ *.swp
53
+ *.swo
54
+ .DS_Store
55
+
56
+ # Logs (research artifacts opt out — keep examples/* but not stray logs)
57
+ logs/
58
+ *.log
59
+
60
+ # Per-experiment outputs (regenerable; the scripts are tracked, not their output).
61
+ # Committed result sets live in experiments/results/, which is tracked.
62
+ experiments/out/
63
+
64
+ # LaTeX build artifacts
65
+ *.aux
66
+ *.bbl
67
+ *.bcf
68
+ *.blg
69
+ *.fdb_latexmk
70
+ *.fls
71
+ *.lof
72
+ *.log
73
+ *.lot
74
+ *.out
75
+ *.run.xml
76
+ *.synctex.gz
77
+ *.toc
78
+ revised.pdf
79
+ paper.pdf
80
+
81
+ # Local env
82
+ .env
83
+ .env.local
84
+ site/
85
+ .cache/