benchmark-reliability 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. benchmark_reliability-0.1.0/LICENSE +21 -0
  2. benchmark_reliability-0.1.0/PKG-INFO +121 -0
  3. benchmark_reliability-0.1.0/README.md +97 -0
  4. benchmark_reliability-0.1.0/pyproject.toml +41 -0
  5. benchmark_reliability-0.1.0/setup.cfg +4 -0
  6. benchmark_reliability-0.1.0/setup.py +8 -0
  7. benchmark_reliability-0.1.0/src/benchmark_reliability.egg-info/PKG-INFO +121 -0
  8. benchmark_reliability-0.1.0/src/benchmark_reliability.egg-info/SOURCES.txt +26 -0
  9. benchmark_reliability-0.1.0/src/benchmark_reliability.egg-info/dependency_links.txt +1 -0
  10. benchmark_reliability-0.1.0/src/benchmark_reliability.egg-info/requires.txt +3 -0
  11. benchmark_reliability-0.1.0/src/benchmark_reliability.egg-info/top_level.txt +1 -0
  12. benchmark_reliability-0.1.0/src/brf/__init__.py +3 -0
  13. benchmark_reliability-0.1.0/src/brf/analyzer.py +133 -0
  14. benchmark_reliability-0.1.0/src/brf/metrics/__init__.py +6 -0
  15. benchmark_reliability-0.1.0/src/brf/metrics/baseline_gap.py +12 -0
  16. benchmark_reliability-0.1.0/src/brf/metrics/instability.py +11 -0
  17. benchmark_reliability-0.1.0/src/brf/metrics/metadata.py +30 -0
  18. benchmark_reliability-0.1.0/src/brf/metrics/null_test.py +25 -0
  19. benchmark_reliability-0.1.0/src/brf/phase/__init__.py +5 -0
  20. benchmark_reliability-0.1.0/src/brf/phase/classifier.py +7 -0
  21. benchmark_reliability-0.1.0/src/brf/phase/embedding.py +12 -0
  22. benchmark_reliability-0.1.0/src/brf/phase/visualization.py +52 -0
  23. benchmark_reliability-0.1.0/src/brf/report/__init__.py +4 -0
  24. benchmark_reliability-0.1.0/src/brf/report/json_export.py +8 -0
  25. benchmark_reliability-0.1.0/src/brf/report/latex_export.py +23 -0
  26. benchmark_reliability-0.1.0/tests/test_analyzer.py +138 -0
  27. benchmark_reliability-0.1.0/tests/test_metrics.py +125 -0
  28. benchmark_reliability-0.1.0/tests/test_phase.py +96 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 zhanglizhuo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.1
2
+ Name: benchmark-reliability
3
+ Version: 0.1.0
4
+ Summary: Benchmark Reliability Framework (BRF) — dataset-level reliability auditing for predictive benchmarks
5
+ Author-email: zhanglizhuo <zhanglizhuo@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/zhanglizhuo/BenchmarkReliability
8
+ Project-URL: Repository, https://github.com/zhanglizhuo/BenchmarkReliability
9
+ Keywords: benchmark reliability,dataset auditing,educational AI,machine learning
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy>=1.21
22
+ Requires-Dist: scikit-learn>=1.0
23
+ Requires-Dist: matplotlib>=3.5
24
+
25
+ # BenchmarkReliability — BRF Python Package
26
+
27
+ ## Target
28
+
29
+ Provide a standardized, pip-installable Python package that computes the Benchmark Reliability Framework (BRF) for any predictive dataset, enabling researchers to run the four-dimension audit protocol with a single API call.
30
+
31
+ ## Method
32
+
33
+ The package wraps the core logic from the BehaviorAudit project into a sklearn-style API:
34
+
35
+ ```python
36
+ from brf import BRFAnalyzer
37
+ from brf.phase import plot_phase_diagram
38
+ from brf.report import export_json
39
+
40
+ analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
41
+ print(analyzer.brf_vector) # (B, I, N, M) → (S, E) → class
42
+
43
+ # Visualization
44
+ plot_phase_diagram(
45
+ [analyzer.S], [analyzer.E],
46
+ labels=[analyzer.class_],
47
+ classes=[analyzer.class_],
48
+ )
49
+
50
+ # Export
51
+ export_json(analyzer.brf_vector, "results.json")
52
+ ```
53
+
54
+ ## Package Structure
55
+
56
+ ```
57
+ brf/
58
+ ├── __init__.py
59
+ ├── analyzer.py ← BRFAnalyzer main class
60
+ ├── metrics/
61
+ │ ├── baseline_gap.py ← B
62
+ │ ├── instability.py ← I
63
+ │ ├── null_test.py ← N (permutation test)
64
+ │ └── metadata.py ← M
65
+ ├── phase/
66
+ │ ├── embedding.py ← S = N - I, E = B + M
67
+ │ ├── classifier.py ← Reliable / Fragile / Void
68
+ │ └── visualization.py ← phase diagram, clustering plot
69
+ ├── report/
70
+ │ ├── json_export.py
71
+ │ └── latex_export.py
72
+ ```
73
+
74
+ ## Steps
75
+
76
+ ### Phase 1: Package skeleton (1-2 weeks)
77
+ - [x] Initialize Python project with `pyproject.toml`
78
+ - [x] Implement `BRFAnalyzer` main class with fit/predict interface
79
+ - [x] Port `compute_b`, `compute_i`, `compute_n`, `compute_m` from BehaviorAudit
80
+ - [x] Write unit tests for each metric
81
+
82
+ ### Phase 2: Phase embedding + classification (1 week)
83
+ - [x] Implement `compute_phase(S, E)` and `classify_dataset(S, E)`
84
+ - [x] Build phase diagram visualization (matplotlib)
85
+ - [x] Test on all 7 datasets from BehaviorAudit; verify BRF output matches SR paper results
86
+
87
+ ### Phase 3: Documentation + distribution (1-2 weeks)
88
+ - [x] Write README with quick-start tutorial and API docs
89
+ - [ ] Publish to TestPyPI → PyPI
90
+ - [ ] Set up ReadTheDocs for auto-generated documentation
91
+ - [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
92
+
93
+ ### Phase 4: HuggingFace Hub integration (optional, 1 week)
94
+ - [ ] Add HF dataset loading wrapper
95
+ - [ ] Allow `brf.fit(dataset_id="OULAD")` shorthand
96
+
97
+ ## Dependencies
98
+
99
+ - `numpy>=1.21`
100
+ - `scikit-learn>=1.0`
101
+ - `matplotlib>=3.5`
102
+ - No deep learning dependencies required
103
+
104
+ ## Relationship to Sister Repos
105
+
106
+ - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
107
+ - `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
108
+ - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
109
+ - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
110
+
111
+ ## Target Journal
112
+
113
+ - Journal of Open Source Software (JOSS) — tool paper, lightweight submission
114
+ - Followed by application papers in C&E / BJET
115
+
116
+ ## Timeline
117
+
118
+ - Phase 1–2: 3 weeks
119
+ - Phase 3: 2 weeks
120
+ - Phase 4: optional
121
+ - JOSS submission: after Phase 3
@@ -0,0 +1,97 @@
1
+ # BenchmarkReliability — BRF Python Package
2
+
3
+ ## Target
4
+
5
+ Provide a standardized, pip-installable Python package that computes the Benchmark Reliability Framework (BRF) for any predictive dataset, enabling researchers to run the four-dimension audit protocol with a single API call.
6
+
7
+ ## Method
8
+
9
+ The package wraps the core logic from the BehaviorAudit project into a sklearn-style API:
10
+
11
+ ```python
12
+ from brf import BRFAnalyzer
13
+ from brf.phase import plot_phase_diagram
14
+ from brf.report import export_json
15
+
16
+ analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
17
+ print(analyzer.brf_vector) # (B, I, N, M) → (S, E) → class
18
+
19
+ # Visualization
20
+ plot_phase_diagram(
21
+ [analyzer.S], [analyzer.E],
22
+ labels=[analyzer.class_],
23
+ classes=[analyzer.class_],
24
+ )
25
+
26
+ # Export
27
+ export_json(analyzer.brf_vector, "results.json")
28
+ ```
29
+
30
+ ## Package Structure
31
+
32
+ ```
33
+ brf/
34
+ ├── __init__.py
35
+ ├── analyzer.py ← BRFAnalyzer main class
36
+ ├── metrics/
37
+ │ ├── baseline_gap.py ← B
38
+ │ ├── instability.py ← I
39
+ │ ├── null_test.py ← N (permutation test)
40
+ │ └── metadata.py ← M
41
+ ├── phase/
42
+ │ ├── embedding.py ← S = N - I, E = B + M
43
+ │ ├── classifier.py ← Reliable / Fragile / Void
44
+ │ └── visualization.py ← phase diagram, clustering plot
45
+ ├── report/
46
+ │ ├── json_export.py
47
+ │ └── latex_export.py
48
+ ```
49
+
50
+ ## Steps
51
+
52
+ ### Phase 1: Package skeleton (1-2 weeks)
53
+ - [x] Initialize Python project with `pyproject.toml`
54
+ - [x] Implement `BRFAnalyzer` main class with fit/predict interface
55
+ - [x] Port `compute_b`, `compute_i`, `compute_n`, `compute_m` from BehaviorAudit
56
+ - [x] Write unit tests for each metric
57
+
58
+ ### Phase 2: Phase embedding + classification (1 week)
59
+ - [x] Implement `compute_phase(S, E)` and `classify_dataset(S, E)`
60
+ - [x] Build phase diagram visualization (matplotlib)
61
+ - [x] Test on all 7 datasets from BehaviorAudit; verify BRF output matches SR paper results
62
+
63
+ ### Phase 3: Documentation + distribution (1-2 weeks)
64
+ - [x] Write README with quick-start tutorial and API docs
65
+ - [ ] Publish to TestPyPI → PyPI
66
+ - [ ] Set up ReadTheDocs for auto-generated documentation
67
+ - [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
68
+
69
+ ### Phase 4: HuggingFace Hub integration (optional, 1 week)
70
+ - [ ] Add HF dataset loading wrapper
71
+ - [ ] Allow `brf.fit(dataset_id="OULAD")` shorthand
72
+
73
+ ## Dependencies
74
+
75
+ - `numpy>=1.21`
76
+ - `scikit-learn>=1.0`
77
+ - `matplotlib>=3.5`
78
+ - No deep learning dependencies required
79
+
80
+ ## Relationship to Sister Repos
81
+
82
+ - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
83
+ - `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
84
+ - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
85
+ - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
86
+
87
+ ## Target Journal
88
+
89
+ - Journal of Open Source Software (JOSS) — tool paper, lightweight submission
90
+ - Followed by application papers in C&E / BJET
91
+
92
+ ## Timeline
93
+
94
+ - Phase 1–2: 3 weeks
95
+ - Phase 3: 2 weeks
96
+ - Phase 4: optional
97
+ - JOSS submission: after Phase 3
@@ -0,0 +1,41 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "benchmark-reliability"
7
+ version = "0.1.0"
8
+ description = "Benchmark Reliability Framework (BRF) — dataset-level reliability auditing for predictive benchmarks"
9
+ readme = "README.md"
10
+ license = { text = "MIT" }
11
+ requires-python = ">=3.8"
12
+ authors = [
13
+ { name = "zhanglizhuo", email = "zhanglizhuo@gmail.com" },
14
+ ]
15
+ keywords = ["benchmark reliability", "dataset auditing", "educational AI", "machine learning"]
16
+ classifiers = [
17
+ "Development Status :: 3 - Alpha",
18
+ "License :: OSI Approved :: MIT License",
19
+ "Programming Language :: Python :: 3",
20
+ "Programming Language :: Python :: 3.8",
21
+ "Programming Language :: Python :: 3.9",
22
+ "Programming Language :: Python :: 3.10",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
26
+ ]
27
+ dependencies = [
28
+ "numpy>=1.21",
29
+ "scikit-learn>=1.0",
30
+ "matplotlib>=3.5",
31
+ ]
32
+
33
+ [project.urls]
34
+ Homepage = "https://github.com/zhanglizhuo/BenchmarkReliability"
35
+ Repository = "https://github.com/zhanglizhuo/BenchmarkReliability"
36
+
37
+ [tool.setuptools]
38
+ license-files = []
39
+
40
+ [tool.setuptools.packages.find]
41
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,8 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name="benchmark-reliability",
5
+ version="0.1.0",
6
+ packages=find_packages(where="src"),
7
+ package_dir={"": "src"},
8
+ )
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.1
2
+ Name: benchmark-reliability
3
+ Version: 0.1.0
4
+ Summary: Benchmark Reliability Framework (BRF) — dataset-level reliability auditing for predictive benchmarks
5
+ Author-email: zhanglizhuo <zhanglizhuo@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/zhanglizhuo/BenchmarkReliability
8
+ Project-URL: Repository, https://github.com/zhanglizhuo/BenchmarkReliability
9
+ Keywords: benchmark reliability,dataset auditing,educational AI,machine learning
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ Requires-Dist: numpy>=1.21
22
+ Requires-Dist: scikit-learn>=1.0
23
+ Requires-Dist: matplotlib>=3.5
24
+
25
+ # BenchmarkReliability — BRF Python Package
26
+
27
+ ## Target
28
+
29
+ Provide a standardized, pip-installable Python package that computes the Benchmark Reliability Framework (BRF) for any predictive dataset, enabling researchers to run the four-dimension audit protocol with a single API call.
30
+
31
+ ## Method
32
+
33
+ The package wraps the core logic from the BehaviorAudit project into a sklearn-style API:
34
+
35
+ ```python
36
+ from brf import BRFAnalyzer
37
+ from brf.phase import plot_phase_diagram
38
+ from brf.report import export_json
39
+
40
+ analyzer = BRFAnalyzer(n_splits=30, n_permutations=200).fit(X, y, groups=groups)
41
+ print(analyzer.brf_vector) # (B, I, N, M) → (S, E) → class
42
+
43
+ # Visualization
44
+ plot_phase_diagram(
45
+ [analyzer.S], [analyzer.E],
46
+ labels=[analyzer.class_],
47
+ classes=[analyzer.class_],
48
+ )
49
+
50
+ # Export
51
+ export_json(analyzer.brf_vector, "results.json")
52
+ ```
53
+
54
+ ## Package Structure
55
+
56
+ ```
57
+ brf/
58
+ ├── __init__.py
59
+ ├── analyzer.py ← BRFAnalyzer main class
60
+ ├── metrics/
61
+ │ ├── baseline_gap.py ← B
62
+ │ ├── instability.py ← I
63
+ │ ├── null_test.py ← N (permutation test)
64
+ │ └── metadata.py ← M
65
+ ├── phase/
66
+ │ ├── embedding.py ← S = N - I, E = B + M
67
+ │ ├── classifier.py ← Reliable / Fragile / Void
68
+ │ └── visualization.py ← phase diagram, clustering plot
69
+ ├── report/
70
+ │ ├── json_export.py
71
+ │ └── latex_export.py
72
+ ```
73
+
74
+ ## Steps
75
+
76
+ ### Phase 1: Package skeleton (1-2 weeks)
77
+ - [x] Initialize Python project with `pyproject.toml`
78
+ - [x] Implement `BRFAnalyzer` main class with fit/predict interface
79
+ - [x] Port `compute_b`, `compute_i`, `compute_n`, `compute_m` from BehaviorAudit
80
+ - [x] Write unit tests for each metric
81
+
82
+ ### Phase 2: Phase embedding + classification (1 week)
83
+ - [x] Implement `compute_phase(S, E)` and `classify_dataset(S, E)`
84
+ - [x] Build phase diagram visualization (matplotlib)
85
+ - [x] Test on all 7 datasets from BehaviorAudit; verify BRF output matches SR paper results
86
+
87
+ ### Phase 3: Documentation + distribution (1-2 weeks)
88
+ - [x] Write README with quick-start tutorial and API docs
89
+ - [ ] Publish to TestPyPI → PyPI
90
+ - [ ] Set up ReadTheDocs for auto-generated documentation
91
+ - [ ] Add GitHub Actions CI (test on Python 3.9–3.12)
92
+
93
+ ### Phase 4: HuggingFace Hub integration (optional, 1 week)
94
+ - [ ] Add HF dataset loading wrapper
95
+ - [ ] Allow `brf.fit(dataset_id="OULAD")` shorthand
96
+
97
+ ## Dependencies
98
+
99
+ - `numpy>=1.21`
100
+ - `scikit-learn>=1.0`
101
+ - `matplotlib>=3.5`
102
+ - No deep learning dependencies required
103
+
104
+ ## Relationship to Sister Repos
105
+
106
+ - `BehaviorAudit/`: source of the audit logic; this package refactors and generalizes it
107
+ - `LLMScoringAudit/`: first applied use case (MM-TBA × multiple LLMs)
108
+ - `BenchmarkPhase/`: large-scale application (30 datasets BRF leaderboard)
109
+ - `llm-annotation/`: cited for complementary MLLM pseudo-label reliability findings
110
+
111
+ ## Target Journal
112
+
113
+ - Journal of Open Source Software (JOSS) — tool paper, lightweight submission
114
+ - Followed by application papers in C&E / BJET
115
+
116
+ ## Timeline
117
+
118
+ - Phase 1–2: 3 weeks
119
+ - Phase 3: 2 weeks
120
+ - Phase 4: optional
121
+ - JOSS submission: after Phase 3
@@ -0,0 +1,26 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.py
5
+ src/benchmark_reliability.egg-info/PKG-INFO
6
+ src/benchmark_reliability.egg-info/SOURCES.txt
7
+ src/benchmark_reliability.egg-info/dependency_links.txt
8
+ src/benchmark_reliability.egg-info/requires.txt
9
+ src/benchmark_reliability.egg-info/top_level.txt
10
+ src/brf/__init__.py
11
+ src/brf/analyzer.py
12
+ src/brf/metrics/__init__.py
13
+ src/brf/metrics/baseline_gap.py
14
+ src/brf/metrics/instability.py
15
+ src/brf/metrics/metadata.py
16
+ src/brf/metrics/null_test.py
17
+ src/brf/phase/__init__.py
18
+ src/brf/phase/classifier.py
19
+ src/brf/phase/embedding.py
20
+ src/brf/phase/visualization.py
21
+ src/brf/report/__init__.py
22
+ src/brf/report/json_export.py
23
+ src/brf/report/latex_export.py
24
+ tests/test_analyzer.py
25
+ tests/test_metrics.py
26
+ tests/test_phase.py
@@ -0,0 +1,3 @@
1
+ numpy>=1.21
2
+ scikit-learn>=1.0
3
+ matplotlib>=3.5
@@ -0,0 +1,3 @@
1
+ from .analyzer import BRFAnalyzer
2
+
3
+ __all__ = ["BRFAnalyzer"]
@@ -0,0 +1,133 @@
1
+ import math
2
+ import warnings
3
+ from typing import Optional
4
+
5
+ import numpy as np
6
+ from sklearn.base import clone
7
+ from sklearn.linear_model import Ridge
8
+ from sklearn.metrics import r2_score
9
+ from sklearn.preprocessing import StandardScaler
10
+
11
+ from .metrics import compute_b, compute_i, compute_m
12
+ from .phase import compute_phase_from_brf, classify_dataset
13
+
14
+
15
+ class BRFAnalyzer:
16
+ def __init__(
17
+ self,
18
+ n_splits: int = 30,
19
+ n_permutations: int = 200,
20
+ model=None,
21
+ seed: int = 42,
22
+ scale: bool = True,
23
+ ):
24
+ if n_splits < 2:
25
+ raise ValueError("n_splits must be >= 2")
26
+ self.n_splits = n_splits
27
+ self.n_permutations = n_permutations
28
+ self.model = model or Ridge(alpha=1.0)
29
+ self.seed = seed
30
+ self.scale = scale
31
+
32
+ self._fitted = False
33
+ self.B: Optional[float] = None
34
+ self.I: Optional[float] = None
35
+ self.N: Optional[float] = None
36
+ self.M: Optional[float] = None
37
+ self.S: Optional[float] = None
38
+ self.E: Optional[float] = None
39
+ self.class_: Optional[str] = None
40
+
41
+ def _validate_inputs(self, X, y):
42
+ X = np.asarray(X, dtype=float)
43
+ y = np.asarray(y, dtype=float)
44
+ if X.ndim != 2:
45
+ raise ValueError(f"X must be 2D, got shape {X.shape}")
46
+ if y.ndim != 1:
47
+ raise ValueError(f"y must be 1D, got shape {y.shape}")
48
+ if len(X) != len(y):
49
+ raise ValueError(f"X and y length mismatch: {len(X)} vs {len(y)}")
50
+ if len(X) < 20:
51
+ raise ValueError(f"Need at least 20 samples, got {len(X)}")
52
+ if not np.all(np.isfinite(X)):
53
+ raise ValueError("X contains NaN or Inf values")
54
+ if not np.all(np.isfinite(y)):
55
+ raise ValueError("y contains NaN or Inf values")
56
+ unique_y = np.unique(y)
57
+ if len(unique_y) <= 12 and np.all(unique_y == unique_y.astype(int)):
58
+ warnings.warn(
59
+ "y appears to be integer classification labels "
60
+ f"({len(unique_y)} unique values). "
61
+ "BRF is designed for regression targets."
62
+ )
63
+ return X, y
64
+
65
+ def fit(self, X, y, groups=None):
66
+ X, y = self._validate_inputs(X, y)
67
+ n = len(y)
68
+
69
+ if self.scale:
70
+ scaler = StandardScaler()
71
+ X = scaler.fit_transform(X)
72
+
73
+ rng_cv = np.random.default_rng(self.seed)
74
+ rng_perm = np.random.default_rng(self.seed + 10_007)
75
+
76
+ r2_scores = []
77
+ b_gains = []
78
+
79
+ n_per_fold = max(3, math.ceil(self.n_permutations / self.n_splits))
80
+ exceed_count = 0
81
+
82
+ for i in range(self.n_splits):
83
+ idx = rng_cv.permutation(n)
84
+ split = max(1, int(0.8 * n))
85
+ train_idx = idx[:split]
86
+ test_idx = idx[split:]
87
+
88
+ Xtr, Xte = X[train_idx], X[test_idx]
89
+ ytr, yte = y[train_idx], y[test_idx]
90
+
91
+ y_mean = np.full(len(yte), float(np.mean(ytr)))
92
+ m = clone(self.model)
93
+ m.fit(Xtr, ytr)
94
+ y_pred = m.predict(Xte)
95
+
96
+ r2_real = r2_score(yte, y_pred)
97
+ r2_scores.append(r2_real)
98
+ b_gains.append(compute_b(yte, y_pred, y_mean))
99
+
100
+ perm_r2s = []
101
+ for _ in range(n_per_fold):
102
+ y_perm = rng_perm.permutation(ytr)
103
+ m_perm = clone(self.model)
104
+ m_perm.fit(Xtr, y_perm)
105
+ y_pred_perm = m_perm.predict(Xte)
106
+ perm_r2s.append(r2_score(yte, y_pred_perm))
107
+
108
+ if r2_real > float(np.median(perm_r2s)):
109
+ exceed_count += 1
110
+
111
+ self.B = float(np.mean(b_gains))
112
+ self.I = compute_i(r2_scores)
113
+ self.N = exceed_count / self.n_splits
114
+ self.M = compute_m(groups)
115
+ self.S, self.E = compute_phase_from_brf(self.B, self.I, self.N, self.M)
116
+ self.class_ = classify_dataset(self.S, self.E)
117
+ self._fitted = True
118
+
119
+ return self
120
+
121
+ @property
122
+ def brf_vector(self) -> dict:
123
+ if not self._fitted:
124
+ raise RuntimeError("call fit() before accessing brf_vector")
125
+ return {
126
+ "B": self.B,
127
+ "I": self.I,
128
+ "N": self.N,
129
+ "M": self.M,
130
+ "S": self.S,
131
+ "E": self.E,
132
+ "class": self.class_,
133
+ }
@@ -0,0 +1,6 @@
1
+ from .baseline_gap import compute_b
2
+ from .instability import compute_i
3
+ from .null_test import compute_n
4
+ from .metadata import compute_m
5
+
6
+ __all__ = ["compute_b", "compute_i", "compute_n", "compute_m"]
@@ -0,0 +1,12 @@
1
+ import numpy as np
2
+ from sklearn.metrics import r2_score
3
+
4
+
5
+ def compute_b(
6
+ y_true: np.ndarray,
7
+ y_pred_model: np.ndarray,
8
+ y_pred_baseline: np.ndarray,
9
+ ) -> float:
10
+ r2_model = r2_score(y_true, y_pred_model)
11
+ r2_baseline = r2_score(y_true, y_pred_baseline)
12
+ return float(r2_model - r2_baseline)
@@ -0,0 +1,11 @@
1
+ from typing import Sequence
2
+
3
+ import numpy as np
4
+
5
+
6
+ def compute_i(r2_values: Sequence[float], eps: float = 1e-8) -> float:
7
+ r2_arr = np.array(r2_values)
8
+ mean_r2 = float(np.mean(r2_arr))
9
+ std_r2 = float(np.std(r2_arr, ddof=1))
10
+ denom = max(abs(mean_r2), 1e-4) + eps
11
+ return std_r2 / denom
@@ -0,0 +1,30 @@
1
+ from typing import Optional
2
+
3
+ import numpy as np
4
+
5
+
6
+ def compute_m(groups: Optional[np.ndarray] = None) -> float:
7
+ if groups is None:
8
+ return 0.0
9
+
10
+ group_arr = np.asarray(groups)
11
+ if not np.issubdtype(group_arr.dtype, np.number):
12
+ _, group_arr = np.unique(group_arr, return_inverse=True)
13
+ if not np.all(np.isfinite(group_arr)):
14
+ raise ValueError("groups contains NaN or Inf values")
15
+
16
+ unique, counts = np.unique(group_arr, return_counts=True)
17
+ n_groups = len(unique)
18
+
19
+ if n_groups <= 1:
20
+ return 0.0
21
+
22
+ probs = counts / counts.sum()
23
+ entropy = -np.sum(probs * np.log(probs + 1e-10))
24
+ max_entropy = np.log(n_groups)
25
+ normalized_entropy = entropy / max_entropy if max_entropy > 0 else 0.0
26
+
27
+ group_balance = 1.0 - float(np.std(counts) / (np.mean(counts) + 1e-8))
28
+ group_balance = max(0.0, min(1.0, group_balance))
29
+
30
+ return float(0.5 * normalized_entropy + 0.5 * group_balance)
@@ -0,0 +1,25 @@
1
+ import numpy as np
2
+ from sklearn.metrics import r2_score
3
+
4
+
5
+ def compute_n(
6
+ y_true: np.ndarray,
7
+ y_pred_real: np.ndarray,
8
+ n_permutations: int = 500,
9
+ seed: int = 42,
10
+ ) -> float:
11
+ """Simple permutation test: shuffle y and compare R² against fixed predictions.
12
+ Does NOT retrain the model per permutation (see BRFAnalyzer for the
13
+ per-fold retrain version used in the full BRF protocol).
14
+ """
15
+ rng = np.random.default_rng(seed)
16
+ r2_real = r2_score(y_true, y_pred_real)
17
+
18
+ count_exceed = 0
19
+ for _ in range(n_permutations):
20
+ y_perm = rng.permutation(y_true)
21
+ r2_perm = r2_score(y_perm, y_pred_real)
22
+ if r2_real >= r2_perm:
23
+ count_exceed += 1
24
+
25
+ return count_exceed / n_permutations
@@ -0,0 +1,5 @@
1
+ from .embedding import compute_phase_from_brf
2
+ from .classifier import classify_dataset
3
+ from .visualization import plot_phase_diagram
4
+
5
+ __all__ = ["compute_phase_from_brf", "classify_dataset", "plot_phase_diagram"]
@@ -0,0 +1,7 @@
1
+ def classify_dataset(S: float, E: float, tau_s: float = 0.0, tau_e: float = 0.5) -> str:
2
+ if S <= tau_s:
3
+ return "Void"
4
+ elif E <= tau_e:
5
+ return "Fragile"
6
+ else:
7
+ return "Reliable"
@@ -0,0 +1,12 @@
1
+ from typing import Tuple
2
+
3
+
4
+ def compute_phase_from_brf(
5
+ B: float,
6
+ I: float,
7
+ N: float,
8
+ M: float,
9
+ ) -> Tuple[float, float]:
10
+ S = N - I
11
+ E = B + M
12
+ return S, E
@@ -0,0 +1,52 @@
1
+ from typing import List, Optional
2
+
3
+ import matplotlib.pyplot as plt
4
+ import numpy as np
5
+
6
+
7
+ def plot_phase_diagram(
8
+ S_list: List[float],
9
+ E_list: List[float],
10
+ labels: Optional[List[str]] = None,
11
+ classes: Optional[List[str]] = None,
12
+ title: str = "BRF Phase Diagram",
13
+ save_path: Optional[str] = None,
14
+ tau_s: float = 0.0,
15
+ tau_e: float = 0.5,
16
+ ):
17
+ fig, ax = plt.subplots(figsize=(8, 6))
18
+
19
+ if classes is not None:
20
+ color_map = {"Reliable": "#2ecc71", "Fragile": "#f39c12", "Void": "#e74c3c"}
21
+ for cls in set(classes):
22
+ mask = [c == cls for c in classes]
23
+ ax.scatter(
24
+ np.array(S_list)[mask],
25
+ np.array(E_list)[mask],
26
+ c=color_map.get(cls, "#95a5a6"),
27
+ label=cls,
28
+ s=80,
29
+ edgecolors="black",
30
+ linewidths=0.5,
31
+ alpha=0.8,
32
+ )
33
+ ax.legend(fontsize=12)
34
+ else:
35
+ ax.scatter(S_list, E_list, c="#3498db", s=80, edgecolors="black", linewidths=0.5)
36
+
37
+ if labels:
38
+ for i, label in enumerate(labels):
39
+ ax.annotate(label, (S_list[i], E_list[i]), fontsize=8, alpha=0.8)
40
+
41
+ ax.axhline(y=tau_e, color="gray", linestyle="--", alpha=0.4, label=f"E = {tau_e} (Fragile boundary)")
42
+ ax.axvline(x=tau_s, color="gray", linestyle="--", alpha=0.4, label=f"S = {tau_s} (Void boundary)")
43
+
44
+ ax.set_xlabel("Signal Identifiability (S = N - I)", fontsize=12)
45
+ ax.set_ylabel("Epistemic Completeness (E = B + M)", fontsize=12)
46
+ ax.set_title(title, fontsize=14)
47
+ ax.grid(True, alpha=0.3)
48
+
49
+ if save_path:
50
+ fig.savefig(save_path, dpi=300, bbox_inches="tight")
51
+
52
+ return fig
@@ -0,0 +1,4 @@
1
+ from .json_export import export_json
2
+ from .latex_export import export_latex
3
+
4
+ __all__ = ["export_json", "export_latex"]
@@ -0,0 +1,8 @@
1
+ import json
2
+
3
+
4
+ def export_json(brf_vector: dict, filepath: str) -> None:
5
+ if any(v is None for v in brf_vector.values()):
6
+ raise ValueError("BRF vector contains None values; call fit() first")
7
+ with open(filepath, "w", encoding="utf-8") as f:
8
+ json.dump(brf_vector, f, indent=2, ensure_ascii=False)
@@ -0,0 +1,23 @@
1
+ def export_latex(brf_vector: dict) -> str:
2
+ """Export BRF vector as a LaTeX table (requires booktabs package)."""
3
+ for v in brf_vector.values():
4
+ if v is None:
5
+ raise ValueError("BRF vector contains None values; call fit() first")
6
+ lines = [
7
+ r"\begin{tabular}{lcc}",
8
+ r"\toprule",
9
+ r"Dimension & Value & Interpretation \\",
10
+ r"\midrule",
11
+ f"B (Baseline Gain) & {brf_vector['B']:.3f} & Model improvement over mean predictor \\\\",
12
+ f"I (Instability) & {brf_vector['I']:.3f} & Sensitivity to split choice \\\\",
13
+ f"N (Null Separability) & {brf_vector['N']:.3f} & Signal distinguishability from noise \\\\",
14
+ f"M (Metadata Sufficiency) & {brf_vector['M']:.3f} & Group structure completeness \\\\",
15
+ r"\midrule",
16
+ f"S (Signal Identifiability) & {brf_vector['S']:.3f} & N - I \\\\",
17
+ f"E (Epistemic Completeness) & {brf_vector['E']:.3f} & B + M \\\\",
18
+ r"\midrule",
19
+ f"Class & \\multicolumn{{2}}{{c}}{{{brf_vector['class']}}} \\\\",
20
+ r"\bottomrule",
21
+ r"\end{tabular}",
22
+ ]
23
+ return "\n".join(lines)
@@ -0,0 +1,138 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from brf import BRFAnalyzer
5
+
6
+
7
+ class TestBRFAnalyzer:
8
+ def test_fit_returns_self(self):
9
+ X = np.random.default_rng(0).normal(size=(100, 5))
10
+ y = X[:, 0] + 0.1 * np.random.default_rng(0).normal(size=100)
11
+ analyzer = BRFAnalyzer(n_splits=5, n_permutations=20, seed=42)
12
+ result = analyzer.fit(X, y)
13
+ assert result is analyzer
14
+
15
+ def test_brf_vector_keys(self):
16
+ X = np.random.default_rng(1).normal(size=(100, 5))
17
+ y = X[:, 0] + 0.1 * np.random.default_rng(1).normal(size=100)
18
+ analyzer = BRFAnalyzer(n_splits=5, n_permutations=20, seed=42)
19
+ analyzer.fit(X, y)
20
+ v = analyzer.brf_vector
21
+ expected_keys = {"B", "I", "N", "M", "S", "E", "class"}
22
+ assert set(v.keys()) == expected_keys
23
+
24
+ def test_all_values_computed(self):
25
+ X = np.random.default_rng(2).normal(size=(100, 5))
26
+ y = X[:, 0] + 0.5 * X[:, 1] + np.random.default_rng(2).normal(scale=0.2, size=100)
27
+ analyzer = BRFAnalyzer(n_splits=10, n_permutations=30, seed=42)
28
+ analyzer.fit(X, y)
29
+ v = analyzer.brf_vector
30
+ for key in ["B", "I", "N", "M", "S", "E"]:
31
+ assert v[key] is not None, f"{key} should not be None"
32
+ assert np.isfinite(v[key]), f"{key} should be finite"
33
+ assert v["class"] in ["Reliable", "Fragile", "Void"]
34
+
35
+ def test_reliable_with_clean_signal(self):
36
+ rng = np.random.default_rng(42)
37
+ X = rng.normal(size=(200, 3))
38
+ y = 2.0 * X[:, 0] + 1.5 * X[:, 1] + rng.normal(scale=0.1, size=200)
39
+ analyzer = BRFAnalyzer(n_splits=10, n_permutations=50, seed=42)
40
+ analyzer.fit(X, y)
41
+ assert analyzer.class_ == "Reliable"
42
+
43
+ def test_void_with_noise_only(self):
44
+ rng = np.random.default_rng(42)
45
+ X = rng.normal(size=(200, 3))
46
+ y = rng.normal(size=200)
47
+ analyzer = BRFAnalyzer(n_splits=10, n_permutations=50, seed=42)
48
+ analyzer.fit(X, y)
49
+ assert analyzer.class_ in ("Fragile", "Void")
50
+
51
+ def test_groups_affect_m_score(self):
52
+ rng = np.random.default_rng(42)
53
+ X = rng.normal(size=(200, 3))
54
+ y = X[:, 0] + 0.3 * rng.normal(size=200)
55
+
56
+ a1 = BRFAnalyzer(n_splits=5, n_permutations=20, seed=42)
57
+ a1.fit(X, y, groups=np.repeat([0, 1, 2, 3], 50))
58
+ m_with = a1.M
59
+
60
+ a2 = BRFAnalyzer(n_splits=5, n_permutations=20, seed=42)
61
+ a2.fit(X, y)
62
+ m_without = a2.M
63
+
64
+ assert m_with > 0.0
65
+ assert m_without == 0.0
66
+
67
+ def test_custom_model(self):
68
+ from sklearn.linear_model import LinearRegression
69
+
70
+ X = np.random.default_rng(3).normal(size=(100, 3))
71
+ y = X[:, 0] + 0.2 * np.random.default_rng(3).normal(size=100)
72
+ analyzer = BRFAnalyzer(n_splits=5, n_permutations=20, model=LinearRegression(), seed=42)
73
+ analyzer.fit(X, y)
74
+ assert analyzer.class_ is not None
75
+
76
+ def test_nan_input_raises(self):
77
+ rng = np.random.default_rng(0)
78
+ X = rng.normal(size=(20, 3))
79
+ X[0, 0] = np.nan
80
+ y = rng.normal(size=20)
81
+ analyzer = BRFAnalyzer(n_splits=2, n_permutations=5)
82
+ with pytest.raises(ValueError, match="NaN"):
83
+ analyzer.fit(X, y)
84
+
85
+ def test_inf_input_raises(self):
86
+ rng = np.random.default_rng(0)
87
+ X = rng.normal(size=(20, 3))
88
+ X[0, 0] = np.inf
89
+ y = rng.normal(size=20)
90
+ analyzer = BRFAnalyzer(n_splits=2, n_permutations=5)
91
+ with pytest.raises(ValueError, match="Inf"):
92
+ analyzer.fit(X, y)
93
+
94
+ def test_too_few_samples_raises(self):
95
+ X = np.random.default_rng(0).normal(size=(3, 2))
96
+ y = np.random.default_rng(0).normal(size=3)
97
+ analyzer = BRFAnalyzer(n_splits=2, n_permutations=5)
98
+ with pytest.raises(ValueError, match="20 samples"):
99
+ analyzer.fit(X, y)
100
+
101
+ def test_dimension_mismatch_raises(self):
102
+ X = np.random.default_rng(0).normal(size=(20, 2))
103
+ y = np.random.default_rng(0).normal(size=5)
104
+ analyzer = BRFAnalyzer(n_splits=2, n_permutations=5)
105
+ with pytest.raises(ValueError, match="length mismatch"):
106
+ analyzer.fit(X, y)
107
+
108
+ def test_1d_X_raises(self):
109
+ X = np.random.default_rng(0).normal(size=20)
110
+ y = np.random.default_rng(0).normal(size=20)
111
+ analyzer = BRFAnalyzer(n_splits=2, n_permutations=5)
112
+ with pytest.raises(ValueError, match="2D"):
113
+ analyzer.fit(X, y)
114
+
115
+ def test_brf_vector_before_fit_raises(self):
116
+ analyzer = BRFAnalyzer()
117
+ with pytest.raises(RuntimeError, match="fit"):
118
+ _ = analyzer.brf_vector
119
+
120
+ def test_n_splits_less_than_2_raises(self):
121
+ with pytest.raises(ValueError, match="n_splits"):
122
+ BRFAnalyzer(n_splits=1)
123
+
124
+ def test_scale_false_still_works(self):
125
+ rng = np.random.default_rng(42)
126
+ X = rng.normal(size=(100, 5))
127
+ y = X[:, 0] + 0.3 * rng.normal(size=100)
128
+ analyzer = BRFAnalyzer(n_splits=5, n_permutations=20, scale=False, seed=42)
129
+ analyzer.fit(X, y)
130
+ assert analyzer.class_ is not None
131
+
132
+ def test_classification_warning(self):
133
+ rng = np.random.default_rng(42)
134
+ X = rng.normal(size=(50, 2))
135
+ y = np.random.default_rng(42).integers(0, 2, size=50)
136
+ analyzer = BRFAnalyzer(n_splits=5, n_permutations=12, seed=42)
137
+ with pytest.warns(UserWarning, match="classification"):
138
+ analyzer.fit(X, y)
@@ -0,0 +1,125 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from brf.metrics import compute_b, compute_i, compute_n, compute_m
5
+
6
+
7
+ class TestComputeB:
8
+ def test_perfect_prediction(self):
9
+ y_true = np.array([1.0, 2.0, 3.0, 4.0])
10
+ y_pred_model = np.array([1.0, 2.0, 3.0, 4.0])
11
+ y_pred_baseline = np.array([2.5, 2.5, 2.5, 2.5])
12
+ b = compute_b(y_true, y_pred_model, y_pred_baseline)
13
+ assert b == 1.0
14
+
15
+ def test_model_no_better_than_baseline(self):
16
+ y_true = np.array([1.0, 2.0, 3.0, 4.0])
17
+ y_pred_model = np.array([2.5, 2.5, 2.5, 2.5])
18
+ y_pred_baseline = np.array([2.5, 2.5, 2.5, 2.5])
19
+ b = compute_b(y_true, y_pred_model, y_pred_baseline)
20
+ assert b == 0.0
21
+
22
+ def test_negative_b_gap(self):
23
+ y_true = np.array([1.0, 2.0, 3.0, 4.0])
24
+ y_pred_model = np.array([4.0, 3.0, 2.0, 1.0])
25
+ y_pred_baseline = np.array([2.5, 2.5, 2.5, 2.5])
26
+ b = compute_b(y_true, y_pred_model, y_pred_baseline)
27
+ assert b < 0.0
28
+
29
+
30
+ class TestComputeI:
31
+ def test_zero_instability(self):
32
+ values = [0.5, 0.5, 0.5, 0.5]
33
+ i = compute_i(values)
34
+ assert i == 0.0
35
+
36
+ def test_low_instability(self):
37
+ values = [0.5, 0.51, 0.49, 0.5]
38
+ i = compute_i(values)
39
+ assert 0.0 < i < 0.1
40
+
41
+ def test_high_instability(self):
42
+ values = [0.9, 0.1, 0.8, 0.2]
43
+ i = compute_i(values)
44
+ assert i > 0.5
45
+
46
+ def test_eps_avoid_division_by_zero(self):
47
+ values = [0.0, 0.0, 0.0, 0.0]
48
+ i = compute_i(values)
49
+ assert not np.isnan(i)
50
+ assert np.isfinite(i)
51
+
52
+ def test_near_zero_mean_does_not_explode(self):
53
+ values = [0.001, -0.002, 0.003, -0.001, 0.002]
54
+ i = compute_i(values)
55
+ assert np.isfinite(i)
56
+ assert i < 1e4
57
+
58
+
59
+ class TestComputeN:
60
+ def test_perfect_predictions(self):
61
+ y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
62
+ y_pred = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
63
+ n = compute_n(y_true, y_pred, n_permutations=200)
64
+ assert n >= 0.99
65
+
66
+ def test_worse_than_random(self):
67
+ y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
68
+ y_pred = np.array([5.0, 4.0, 3.0, 2.0, 1.0])
69
+ n = compute_n(y_true, y_pred, n_permutations=200)
70
+ assert n < 0.5
71
+
72
+ def test_deterministic_seed(self):
73
+ y_true = np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0])
74
+ y_pred = np.array([1.2, 2.3, 2.8, 4.1, 4.9, 6.0, 7.1, 7.8, 9.2, 9.9])
75
+ n1 = compute_n(y_true, y_pred, n_permutations=100, seed=42)
76
+ n2 = compute_n(y_true, y_pred, n_permutations=100, seed=42)
77
+ assert n1 == n2
78
+
79
+ def test_range_bounded(self):
80
+ rng = np.random.default_rng(99)
81
+ y_true = rng.normal(size=50)
82
+ y_pred = y_true + rng.normal(scale=0.3, size=50)
83
+ n = compute_n(y_true, y_pred, n_permutations=100)
84
+ assert 0.0 <= n <= 1.0
85
+
86
+
87
+ class TestComputeM:
88
+ def test_no_groups(self):
89
+ m = compute_m(groups=None)
90
+ assert m == 0.0
91
+
92
+ def test_single_group(self):
93
+ groups = np.zeros(100)
94
+ m = compute_m(groups=groups)
95
+ assert m == 0.0
96
+
97
+ def test_perfectly_balanced_multi_group(self):
98
+ groups = np.repeat([0, 1, 2, 3], 25)
99
+ m = compute_m(groups=groups)
100
+ assert m > 0.5
101
+
102
+ def test_imbalanced_groups_lower_score(self):
103
+ balanced = np.repeat([0, 1, 2, 3], 25)
104
+ imbalanced = np.concatenate([np.full(85, 0), np.full(5, 1), np.full(5, 2), np.full(5, 3)])
105
+ m_bal = compute_m(groups=balanced)
106
+ m_imb = compute_m(groups=imbalanced)
107
+ assert m_bal > m_imb
108
+
109
+ def test_range_bounded_01(self):
110
+ rng = np.random.default_rng(42)
111
+ for _ in range(10):
112
+ n = rng.integers(5, 20)
113
+ groups = rng.integers(0, n // 2, size=100)
114
+ m = compute_m(groups=groups)
115
+ assert 0.0 <= m <= 1.0
116
+
117
+ def test_nan_groups_raises(self):
118
+ groups = np.array([0, 1, np.nan, 1, 0])
119
+ with pytest.raises(ValueError, match="NaN"):
120
+ compute_m(groups=groups)
121
+
122
+ def test_non_numeric_groups_converted(self):
123
+ groups = np.array(["a", "b", "c", "a", "b"])
124
+ m = compute_m(groups=groups)
125
+ assert m > 0.0
@@ -0,0 +1,96 @@
1
+ import numpy as np
2
+ import pytest
3
+
4
+ from brf.phase import compute_phase_from_brf, classify_dataset, plot_phase_diagram
5
+ from brf.report import export_json, export_latex
6
+
7
+
8
+ class TestEmbedding:
9
+ def test_phase_coordinates(self):
10
+ S, E = compute_phase_from_brf(B=1.0, I=0.01, N=1.0, M=0.7)
11
+ assert S == 0.99
12
+ assert E == 1.70
13
+
14
+
15
+ class TestClassifier:
16
+ def test_reliable(self):
17
+ assert classify_dataset(S=1.0, E=0.8) == "Reliable"
18
+
19
+ def test_fragile(self):
20
+ assert classify_dataset(S=0.3, E=0.2) == "Fragile"
21
+
22
+ def test_void_due_to_negative_s(self):
23
+ assert classify_dataset(S=-0.1, E=0.8) == "Void"
24
+
25
+ def test_void_due_to_zero_s(self):
26
+ assert classify_dataset(S=0.0, E=0.8) == "Void"
27
+
28
+ def test_custom_thresholds(self):
29
+ assert classify_dataset(S=0.1, E=0.6, tau_s=0.2) == "Void"
30
+ assert classify_dataset(S=0.5, E=0.4, tau_e=0.5) == "Fragile"
31
+
32
+ def test_edge_case_boundary(self):
33
+ assert classify_dataset(S=0.5, E=0.5) == "Fragile"
34
+
35
+
36
+ class TestReport:
37
+ def test_export_json_none_raises(self):
38
+ with pytest.raises(ValueError, match="None"):
39
+ export_json({"B": None, "I": 0.5}, "dummy.json")
40
+
41
+ def test_export_latex_none_raises(self):
42
+ with pytest.raises(ValueError, match="None"):
43
+ export_latex({"B": None, "I": 0.5})
44
+
45
+ def test_export_json_roundtrip(self, tmp_path):
46
+ p = str(tmp_path / "test.json")
47
+ bv = {"B": 0.8, "I": 0.1, "N": 0.95, "M": 0.5, "S": 0.85, "E": 1.3, "class": "Reliable"}
48
+ export_json(bv, p)
49
+ import json
50
+ with open(p) as f:
51
+ loaded = json.load(f)
52
+ assert loaded == bv
53
+
54
+ def test_export_latex_output(self):
55
+ bv = {"B": 0.8, "I": 0.1, "N": 0.95, "M": 0.5, "S": 0.85, "E": 1.3, "class": "Reliable"}
56
+ out = export_latex(bv)
57
+ assert "tabular" in out
58
+ assert "Reliable" in out
59
+ assert "0.800" in out
60
+
61
+
62
+ class TestVisualization:
63
+ def test_plot_returns_figure(self):
64
+ fig = plot_phase_diagram(
65
+ S_list=[0.9, 0.3, -0.5],
66
+ E_list=[1.5, 0.2, 0.8],
67
+ labels=["A", "B", "C"],
68
+ )
69
+ assert fig is not None
70
+
71
+ def test_plot_without_labels(self):
72
+ fig = plot_phase_diagram(
73
+ S_list=[0.5, 0.0],
74
+ E_list=[0.6, 1.0],
75
+ )
76
+ assert fig is not None
77
+
78
+ def test_plot_custom_thresholds(self):
79
+ fig = plot_phase_diagram(
80
+ S_list=[0.9, -0.1],
81
+ E_list=[0.6, 0.4],
82
+ tau_s=0.1,
83
+ tau_e=0.3,
84
+ )
85
+ assert fig is not None
86
+
87
+ def test_plot_save_path(self, tmp_path):
88
+ from pathlib import Path
89
+ save_path = str(tmp_path / "phase.png")
90
+ fig = plot_phase_diagram(
91
+ S_list=[0.5, 0.0],
92
+ E_list=[0.6, 1.0],
93
+ save_path=save_path,
94
+ )
95
+ assert Path(save_path).exists()
96
+ assert fig is not None