PyPI - benchmark-reliability - Versions diffs - 0.1.2__tar.gz → 0.1.3__tar.gz - Mend

benchmark-reliability 0.1.2tar.gz → 0.1.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

{benchmark_reliability-0.1.2/src/benchmark_reliability.egg-info → benchmark_reliability-0.1.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: benchmark-reliability
-Version: 0.1.2
+Version: 0.1.3
 Summary: Benchmark Reliability Framework (BRF) - dataset-level reliability auditing for predictive benchmarks
 Author-email: zhanglizhuo <zhanglizhuo@gmail.com>
 License: MIT
@@ -38,7 +38,7 @@ from brf.phase import plot_phase_diagram
 from brf.report import export_json
 analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
-print(analyzer.brf_vector)   # (B, I, N, M) → (S, E) → class
+print(analyzer.brf_vector)   # (B, I, N, M) -> (S, E) -> class
 # Visualization
 plot_phase_diagram(
@@ -55,20 +55,20 @@ export_json(analyzer.brf_vector, "results.json")
 ```
 brf/
-├── __init__.py
-├── analyzer.py          ← BRFAnalyzer main class
-├── metrics/
-│   ├── baseline_gap.py  ← B
-│   ├── instability.py   ← I
-│   ├── null_test.py     ← N (permutation test)
-│   └── metadata.py      ← M
-├── phase/
-│   ├── embedding.py     ← S = N - I, E = B + M
-│   ├── classifier.py    ← Reliable / Fragile / Void
-│   └── visualization.py ← phase diagram, clustering plot
-├── report/
-│   ├── json_export.py
-│   └── latex_export.py
+|-- __init__.py
+|-- analyzer.py          <- BRFAnalyzer main class
+|-- metrics/
+|   |-- baseline_gap.py  <- B
+|   |-- instability.py   <- I
+|   |-- null_test.py     <- N (permutation test)
+|   |-- metadata.py      <- M
+|-- phase/
+|   |-- embedding.py     <- S = N - I, E = B + M
+|   |-- classifier.py    <- Reliable / Fragile / Void
+|   |-- visualization.py <- phase diagram, clustering plot
+|-- report/
+|   |-- json_export.py
+|   |-- latex_export.py
 ```
 ## Steps
@@ -86,9 +86,9 @@ brf/
 ### Phase 3: Documentation + distribution (1-2 weeks)
 - [x] Write README with quick-start tutorial and API docs
-- [ ] Publish to TestPyPI → PyPI
+- [ ] Publish to TestPyPI -> PyPI
 - [ ] Set up ReadTheDocs for auto-generated documentation
-- [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
+- [ ] Add GitHub Actions CI (test on Python 3.9-3.12)
 ### Phase 4: HuggingFace Hub integration (optional, 1 week)
 - [ ] Add HF dataset loading wrapper
@@ -104,7 +104,7 @@ brf/
 ## Relationship to Sister Repos
 - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
-- `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
+- `LLMScoringAudit/`: first applied use case (MM-TBA x multiple LLMs)
 - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
 - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
@@ -115,7 +115,7 @@ brf/
 ## Timeline
-- Phase 1–2: 3 weeks
+- Phase 1-2: 3 weeks
 - Phase 3: 2 weeks
 - Phase 4: optional
 - JOSS submission: after Phase 3

{benchmark_reliability-0.1.2 → benchmark_reliability-0.1.3}/README.md RENAMED Viewed

@@ -14,7 +14,7 @@ from brf.phase import plot_phase_diagram
 from brf.report import export_json
 analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
-print(analyzer.brf_vector)   # (B, I, N, M) → (S, E) → class
+print(analyzer.brf_vector)   # (B, I, N, M) -> (S, E) -> class
 # Visualization
 plot_phase_diagram(
@@ -31,20 +31,20 @@ export_json(analyzer.brf_vector, "results.json")
 ```
 brf/
-├── __init__.py
-├── analyzer.py          ← BRFAnalyzer main class
-├── metrics/
-│   ├── baseline_gap.py  ← B
-│   ├── instability.py   ← I
-│   ├── null_test.py     ← N (permutation test)
-│   └── metadata.py      ← M
-├── phase/
-│   ├── embedding.py     ← S = N - I, E = B + M
-│   ├── classifier.py    ← Reliable / Fragile / Void
-│   └── visualization.py ← phase diagram, clustering plot
-├── report/
-│   ├── json_export.py
-│   └── latex_export.py
+|-- __init__.py
+|-- analyzer.py          <- BRFAnalyzer main class
+|-- metrics/
+|   |-- baseline_gap.py  <- B
+|   |-- instability.py   <- I
+|   |-- null_test.py     <- N (permutation test)
+|   |-- metadata.py      <- M
+|-- phase/
+|   |-- embedding.py     <- S = N - I, E = B + M
+|   |-- classifier.py    <- Reliable / Fragile / Void
+|   |-- visualization.py <- phase diagram, clustering plot
+|-- report/
+|   |-- json_export.py
+|   |-- latex_export.py
 ```
 ## Steps
@@ -62,9 +62,9 @@ brf/
 ### Phase 3: Documentation + distribution (1-2 weeks)
 - [x] Write README with quick-start tutorial and API docs
-- [ ] Publish to TestPyPI → PyPI
+- [ ] Publish to TestPyPI -> PyPI
 - [ ] Set up ReadTheDocs for auto-generated documentation
-- [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
+- [ ] Add GitHub Actions CI (test on Python 3.9-3.12)
 ### Phase 4: HuggingFace Hub integration (optional, 1 week)
 - [ ] Add HF dataset loading wrapper
@@ -80,7 +80,7 @@ brf/
 ## Relationship to Sister Repos
 - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
-- `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
+- `LLMScoringAudit/`: first applied use case (MM-TBA x multiple LLMs)
 - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
 - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
@@ -91,7 +91,7 @@ brf/
 ## Timeline
-- Phase 1–2: 3 weeks
+- Phase 1-2: 3 weeks
 - Phase 3: 2 weeks
 - Phase 4: optional
 - JOSS submission: after Phase 3

{benchmark_reliability-0.1.2 → benchmark_reliability-0.1.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "benchmark-reliability"
-version = "0.1.2"
+version = "0.1.3"
 description = "Benchmark Reliability Framework (BRF) - dataset-level reliability auditing for predictive benchmarks"
 readme = "README.md"
 license = { text = "MIT" }

{benchmark_reliability-0.1.2 → benchmark_reliability-0.1.3}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="benchmark-reliability",
-    version="0.1.2",
+    version="0.1.3",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
 )

{benchmark_reliability-0.1.2 → benchmark_reliability-0.1.3/src/benchmark_reliability.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: benchmark-reliability
-Version: 0.1.2
+Version: 0.1.3
 Summary: Benchmark Reliability Framework (BRF) - dataset-level reliability auditing for predictive benchmarks
 Author-email: zhanglizhuo <zhanglizhuo@gmail.com>
 License: MIT
@@ -38,7 +38,7 @@ from brf.phase import plot_phase_diagram
 from brf.report import export_json
 analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
-print(analyzer.brf_vector)   # (B, I, N, M) → (S, E) → class
+print(analyzer.brf_vector)   # (B, I, N, M) -> (S, E) -> class
 # Visualization
 plot_phase_diagram(
@@ -55,20 +55,20 @@ export_json(analyzer.brf_vector, "results.json")
 ```
 brf/
-├── __init__.py
-├── analyzer.py          ← BRFAnalyzer main class
-├── metrics/
-│   ├── baseline_gap.py  ← B
-│   ├── instability.py   ← I
-│   ├── null_test.py     ← N (permutation test)
-│   └── metadata.py      ← M
-├── phase/
-│   ├── embedding.py     ← S = N - I, E = B + M
-│   ├── classifier.py    ← Reliable / Fragile / Void
-│   └── visualization.py ← phase diagram, clustering plot
-├── report/
-│   ├── json_export.py
-│   └── latex_export.py
+|-- __init__.py
+|-- analyzer.py          <- BRFAnalyzer main class
+|-- metrics/
+|   |-- baseline_gap.py  <- B
+|   |-- instability.py   <- I
+|   |-- null_test.py     <- N (permutation test)
+|   |-- metadata.py      <- M
+|-- phase/
+|   |-- embedding.py     <- S = N - I, E = B + M
+|   |-- classifier.py    <- Reliable / Fragile / Void
+|   |-- visualization.py <- phase diagram, clustering plot
+|-- report/
+|   |-- json_export.py
+|   |-- latex_export.py
 ```
 ## Steps
@@ -86,9 +86,9 @@ brf/
 ### Phase 3: Documentation + distribution (1-2 weeks)
 - [x] Write README with quick-start tutorial and API docs
-- [ ] Publish to TestPyPI → PyPI
+- [ ] Publish to TestPyPI -> PyPI
 - [ ] Set up ReadTheDocs for auto-generated documentation
-- [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
+- [ ] Add GitHub Actions CI (test on Python 3.9-3.12)
 ### Phase 4: HuggingFace Hub integration (optional, 1 week)
 - [ ] Add HF dataset loading wrapper
@@ -104,7 +104,7 @@ brf/
 ## Relationship to Sister Repos
 - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
-- `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
+- `LLMScoringAudit/`: first applied use case (MM-TBA x multiple LLMs)
 - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
 - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
@@ -115,7 +115,7 @@ brf/
 ## Timeline
-- Phase 1–2: 3 weeks
+- Phase 1-2: 3 weeks
 - Phase 3: 2 weeks
 - Phase 4: optional
 - JOSS submission: after Phase 3