finval 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
finval/__init__.py ADDED
@@ -0,0 +1,29 @@
1
+ """finval — rigorous validation for synthetic financial time series.
2
+
3
+ Quick start:
4
+
5
+ import finval
6
+
7
+ # synthetic and real are (n_samples, n_features) arrays of returns
8
+ report = finval.validate(synthetic, real)
9
+ print(report.summary())
10
+ report.to_dict()
11
+
12
+ For path-level validation with drawdowns and calibration:
13
+
14
+ # paths are (n_paths, horizon, n_features) arrays
15
+ report = finval.validate_paths(synthetic_paths, real_returns)
16
+ """
17
+
18
+ from finval.core.result import MetricResult, ValidationReport
19
+ from finval.validate import validate, validate_paths
20
+
21
+ __version__ = "0.1.0"
22
+
23
+ __all__ = [
24
+ "MetricResult",
25
+ "ValidationReport",
26
+ "validate",
27
+ "validate_paths",
28
+ "__version__",
29
+ ]
@@ -0,0 +1,32 @@
1
+ """Baseline generators for benchmarking synthetic financial data.
2
+
3
+ A baseline is a simple generator that produces synthetic data from real
4
+ data. Baselines are not meant to be good models — they're meant to be
5
+ reference points so users can answer "is my fancy model actually better
6
+ than X?" where X is a trivially simple generator.
7
+
8
+ Three baselines ship with finval:
9
+
10
+ - `gaussian`: Multivariate Gaussian with the empirical mean and covariance
11
+ of the real data. No temporal structure, no tails, no vol clustering.
12
+ This is the minimum bar any generative model should clear.
13
+
14
+ - `historical_bootstrap`: Random sampling (with replacement) from real
15
+ returns. Reproduces marginals and joint distribution exactly in
16
+ expectation, but destroys all temporal structure (no ACF, no leverage).
17
+
18
+ - `block_bootstrap`: Moving-block bootstrap preserves local dependence.
19
+ This is the strongest simple baseline — it reproduces short-range
20
+ temporal structure and most stylized facts at the cost of exact
21
+ marginal duplication. A generative model that beats block bootstrap
22
+ on all metrics is genuinely doing something new.
23
+ """
24
+
25
+ from finval.baselines.gaussian import gaussian_baseline
26
+ from finval.baselines.historical import block_bootstrap, historical_bootstrap
27
+
28
+ __all__ = [
29
+ "gaussian_baseline",
30
+ "historical_bootstrap",
31
+ "block_bootstrap",
32
+ ]
@@ -0,0 +1,64 @@
1
+ """Multivariate Gaussian baseline.
2
+
3
+ Fits a multivariate normal to the real returns and samples from it.
4
+ This is the naive "Gaussian i.i.d." benchmark. A model that can't beat
5
+ this on temporal metrics is not capturing any time structure.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import numpy as np
11
+
12
+
13
+ def gaussian_baseline(
14
+ real: np.ndarray,
15
+ n_samples: int | None = None,
16
+ n_paths: int | None = None,
17
+ path_length: int | None = None,
18
+ seed: int = 42,
19
+ ) -> np.ndarray:
20
+ """Generate samples from a multivariate Gaussian fit to real returns.
21
+
22
+ Two output modes:
23
+
24
+ 1. Flat mode (n_samples given): returns shape (n_samples, n_features),
25
+ matching the format expected by distribution / dependence metrics.
26
+
27
+ 2. Path mode (n_paths and path_length given): returns shape
28
+ (n_paths, path_length, n_features), matching the format expected
29
+ by temporal and path metrics. Each row within a path is an
30
+ independent draw — there is no temporal structure.
31
+
32
+ Args:
33
+ real: (n_obs, n_features) real return series used to fit mean+cov.
34
+ n_samples: Number of flat samples to return.
35
+ n_paths: Number of paths to return (requires path_length).
36
+ path_length: Length of each path.
37
+ seed: RNG seed.
38
+
39
+ Returns:
40
+ numpy array of shape (n_samples, n_features) or (n_paths, path_length, n_features).
41
+ """
42
+ real = np.asarray(real)
43
+ if real.ndim != 2:
44
+ raise ValueError(f"real must be 2D, got shape {real.shape}")
45
+
46
+ mean = np.nanmean(real, axis=0)
47
+ # Handle NaN by dropping rows before covariance
48
+ clean = real[~np.any(np.isnan(real), axis=1)]
49
+ if len(clean) < 2:
50
+ raise ValueError("need at least 2 clean rows to fit covariance")
51
+ cov = np.cov(clean, rowvar=False)
52
+ # Regularize to ensure positive-definite
53
+ d = cov.shape[0]
54
+ cov = cov + 1e-10 * np.eye(d)
55
+
56
+ rng = np.random.default_rng(seed)
57
+
58
+ if n_samples is not None:
59
+ return rng.multivariate_normal(mean, cov, size=n_samples)
60
+
61
+ if n_paths is not None and path_length is not None:
62
+ return rng.multivariate_normal(mean, cov, size=(n_paths, path_length))
63
+
64
+ raise ValueError("must specify either n_samples or (n_paths and path_length)")
@@ -0,0 +1,80 @@
1
+ """Historical resampling baselines.
2
+
3
+ - `historical_bootstrap` samples rows i.i.d. from real returns. Preserves
4
+ the empirical marginal and joint distribution perfectly but destroys
5
+ all temporal structure.
6
+
7
+ - `block_bootstrap` samples contiguous blocks of rows. Preserves short-
8
+ range temporal dependence (within a block) at the cost of some joint
9
+ distribution fidelity at block boundaries. This is the strongest simple
10
+ baseline for temporal stylized facts.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import numpy as np
16
+
17
+
18
+ def historical_bootstrap(
19
+ real: np.ndarray,
20
+ n_samples: int | None = None,
21
+ n_paths: int | None = None,
22
+ path_length: int | None = None,
23
+ seed: int = 42,
24
+ ) -> np.ndarray:
25
+ """I.i.d. bootstrap: sample rows of real with replacement.
26
+
27
+ Perfect marginal and joint distribution match in expectation.
28
+ Zero temporal structure (independent rows).
29
+ """
30
+ real = np.asarray(real)
31
+ if real.ndim != 2:
32
+ raise ValueError(f"real must be 2D, got shape {real.shape}")
33
+
34
+ rng = np.random.default_rng(seed)
35
+ n = len(real)
36
+
37
+ if n_samples is not None:
38
+ idx = rng.integers(0, n, size=n_samples)
39
+ return real[idx]
40
+
41
+ if n_paths is not None and path_length is not None:
42
+ idx = rng.integers(0, n, size=(n_paths, path_length))
43
+ return real[idx]
44
+
45
+ raise ValueError("must specify either n_samples or (n_paths and path_length)")
46
+
47
+
48
+ def block_bootstrap(
49
+ real: np.ndarray,
50
+ n_paths: int,
51
+ path_length: int,
52
+ block_size: int = 20,
53
+ seed: int = 42,
54
+ ) -> np.ndarray:
55
+ """Moving-block bootstrap: sample contiguous blocks of rows.
56
+
57
+ Preserves temporal dependence up to block_size. A block size of
58
+ ~20 trading days is typical for financial data. Only supports path
59
+ output (the concept doesn't make sense for flat samples).
60
+
61
+ Returns shape (n_paths, path_length, n_features).
62
+ """
63
+ real = np.asarray(real)
64
+ if real.ndim != 2:
65
+ raise ValueError(f"real must be 2D, got shape {real.shape}")
66
+ if len(real) < block_size:
67
+ raise ValueError(f"real has {len(real)} rows, need >= block_size={block_size}")
68
+
69
+ rng = np.random.default_rng(seed)
70
+ n = len(real)
71
+ n_features = real.shape[1]
72
+ n_blocks = (path_length + block_size - 1) // block_size
73
+
74
+ paths = np.empty((n_paths, path_length, n_features), dtype=real.dtype)
75
+ for p in range(n_paths):
76
+ starts = rng.integers(0, n - block_size + 1, size=n_blocks)
77
+ blocks = [real[s : s + block_size] for s in starts]
78
+ full = np.concatenate(blocks, axis=0)
79
+ paths[p] = full[:path_length]
80
+ return paths
@@ -0,0 +1,11 @@
1
+ """Core types and utilities for finval."""
2
+
3
+ from finval.core.result import MetricResult, ValidationReport
4
+ from finval.core.thresholds import DEFAULT_THRESHOLDS, quality_from_value
5
+
6
+ __all__ = [
7
+ "MetricResult",
8
+ "ValidationReport",
9
+ "DEFAULT_THRESHOLDS",
10
+ "quality_from_value",
11
+ ]
@@ -0,0 +1,116 @@
1
+ """Bootstrap confidence intervals for metrics.
2
+
3
+ Block bootstrap is appropriate for time series (preserves local dependence).
4
+ Naive i.i.d. bootstrap is appropriate for flattened-sample metrics like
5
+ marginal KS or Pearson correlation.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from collections.abc import Callable
11
+
12
+ import numpy as np
13
+
14
+
15
+ def iid_bootstrap_ci(
16
+ metric_fn: Callable[[np.ndarray, np.ndarray], float],
17
+ synthetic: np.ndarray,
18
+ real: np.ndarray,
19
+ n_bootstrap: int = 200,
20
+ confidence: float = 0.95,
21
+ seed: int = 42,
22
+ ) -> tuple[float, float]:
23
+ """Compute a bootstrap confidence interval for a metric under i.i.d. resampling.
24
+
25
+ This is valid for cross-sectional metrics where rows are treated as
26
+ exchangeable samples (e.g., marginal KS, pairwise correlation error).
27
+
28
+ Args:
29
+ metric_fn: Callable taking (synthetic, real) arrays and returning
30
+ a scalar metric value.
31
+ synthetic: (n_samples, n_features) synthetic data.
32
+ real: (n_samples, n_features) real data.
33
+ n_bootstrap: Number of bootstrap replicates.
34
+ confidence: Desired confidence level, e.g. 0.95 for a 95% CI.
35
+ seed: RNG seed for reproducibility.
36
+
37
+ Returns:
38
+ (ci_low, ci_high) for the metric.
39
+ """
40
+ rng = np.random.default_rng(seed)
41
+ n_syn = len(synthetic)
42
+ n_real = len(real)
43
+
44
+ values: list[float] = []
45
+ for _ in range(n_bootstrap):
46
+ idx_syn = rng.integers(0, n_syn, size=n_syn)
47
+ idx_real = rng.integers(0, n_real, size=n_real)
48
+ try:
49
+ v = metric_fn(synthetic[idx_syn], real[idx_real])
50
+ if np.isfinite(v):
51
+ values.append(float(v))
52
+ except Exception:
53
+ continue
54
+
55
+ if not values:
56
+ return (float("nan"), float("nan"))
57
+
58
+ alpha = (1 - confidence) / 2
59
+ lo = float(np.percentile(values, alpha * 100))
60
+ hi = float(np.percentile(values, (1 - alpha) * 100))
61
+ return (lo, hi)
62
+
63
+
64
+ def block_bootstrap_ci(
65
+ metric_fn: Callable[[np.ndarray, np.ndarray], float],
66
+ synthetic: np.ndarray,
67
+ real: np.ndarray,
68
+ block_size: int = 20,
69
+ n_bootstrap: int = 200,
70
+ confidence: float = 0.95,
71
+ seed: int = 42,
72
+ ) -> tuple[float, float]:
73
+ """Compute a bootstrap CI using moving-block bootstrap for time series.
74
+
75
+ Preserves local temporal dependence by resampling contiguous blocks.
76
+ Appropriate for metrics that depend on the order of samples (ACF,
77
+ volatility clustering, leverage effect).
78
+
79
+ Args:
80
+ metric_fn: Callable taking (synthetic, real) arrays and returning
81
+ a scalar metric value.
82
+ synthetic: (n_timesteps, n_features) synthetic time series.
83
+ real: (n_timesteps, n_features) real time series.
84
+ block_size: Length of each resampling block.
85
+ n_bootstrap: Number of bootstrap replicates.
86
+ confidence: Desired confidence level.
87
+ seed: RNG seed.
88
+
89
+ Returns:
90
+ (ci_low, ci_high).
91
+ """
92
+ rng = np.random.default_rng(seed)
93
+
94
+ def resample(x: np.ndarray) -> np.ndarray:
95
+ n = len(x)
96
+ n_blocks = (n + block_size - 1) // block_size
97
+ starts = rng.integers(0, max(n - block_size + 1, 1), size=n_blocks)
98
+ out = np.concatenate([x[s : s + block_size] for s in starts], axis=0)
99
+ return out[:n]
100
+
101
+ values: list[float] = []
102
+ for _ in range(n_bootstrap):
103
+ try:
104
+ v = metric_fn(resample(synthetic), resample(real))
105
+ if np.isfinite(v):
106
+ values.append(float(v))
107
+ except Exception:
108
+ continue
109
+
110
+ if not values:
111
+ return (float("nan"), float("nan"))
112
+
113
+ alpha = (1 - confidence) / 2
114
+ lo = float(np.percentile(values, alpha * 100))
115
+ hi = float(np.percentile(values, (1 - alpha) * 100))
116
+ return (lo, hi)
finval/core/result.py ADDED
@@ -0,0 +1,201 @@
1
+ """Result types for validation metrics.
2
+
3
+ A `MetricResult` captures the output of a single metric: value, quality
4
+ grade, thresholds used, and optional bootstrap confidence interval and
5
+ per-feature/per-pair breakdown.
6
+
7
+ A `ValidationReport` aggregates multiple MetricResults into a single object
8
+ with an overall quality grade and weighted pass rate.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass, field
14
+ from typing import Any
15
+
16
+ import numpy as np
17
+
18
+ QUALITY_LEVELS = ("excellent", "good", "acceptable", "poor")
19
+ QUALITY_SCORES = {"excellent": 1.0, "good": 0.75, "acceptable": 0.5, "poor": 0.0}
20
+
21
+
22
+ @dataclass
23
+ class MetricResult:
24
+ """Result of a single validation metric.
25
+
26
+ Attributes:
27
+ name: Metric identifier, e.g. "marginal_ks".
28
+ value: Scalar metric value (lower is always better by convention).
29
+ quality: One of "excellent", "good", "acceptable", "poor".
30
+ passed: True if quality is not "poor".
31
+ thresholds: The threshold dict used to assign quality.
32
+ category: Metric category, e.g. "distribution", "dependence".
33
+ interpretation: Human-readable one-line summary.
34
+ ci_low: Lower bound of bootstrap confidence interval (optional).
35
+ ci_high: Upper bound of bootstrap confidence interval (optional).
36
+ per_feature: Per-feature breakdown (optional).
37
+ per_pair: Per-pair breakdown for dependence metrics (optional).
38
+ metadata: Additional metric-specific information.
39
+ """
40
+
41
+ name: str
42
+ value: float
43
+ quality: str
44
+ passed: bool
45
+ thresholds: dict[str, float] = field(default_factory=dict)
46
+ category: str = "uncategorized"
47
+ interpretation: str = ""
48
+ ci_low: float | None = None
49
+ ci_high: float | None = None
50
+ per_feature: dict[str, Any] = field(default_factory=dict)
51
+ per_pair: dict[str, Any] = field(default_factory=dict)
52
+ metadata: dict[str, Any] = field(default_factory=dict)
53
+
54
+ def __post_init__(self) -> None:
55
+ # Sanitize NaN/Inf to ensure serializable output
56
+ if self.value is not None and (np.isnan(self.value) or np.isinf(self.value)):
57
+ self.value = float("inf")
58
+ self.quality = "poor"
59
+ self.passed = False
60
+ if self.quality not in QUALITY_LEVELS:
61
+ raise ValueError(f"quality must be one of {QUALITY_LEVELS}, got {self.quality!r}")
62
+
63
+ @property
64
+ def score(self) -> float:
65
+ """Numeric score in [0, 1] for weighted aggregation."""
66
+ return QUALITY_SCORES[self.quality]
67
+
68
+ def to_dict(self) -> dict[str, Any]:
69
+ """Serialize to a plain dict (JSON-safe)."""
70
+ out: dict[str, Any] = {
71
+ "name": self.name,
72
+ "value": self.value,
73
+ "quality": self.quality,
74
+ "passed": self.passed,
75
+ "score": self.score,
76
+ "thresholds": self.thresholds,
77
+ "category": self.category,
78
+ "interpretation": self.interpretation,
79
+ }
80
+ if self.ci_low is not None:
81
+ out["ci_low"] = self.ci_low
82
+ out["ci_high"] = self.ci_high
83
+ if self.per_feature:
84
+ out["per_feature"] = self.per_feature
85
+ if self.per_pair:
86
+ out["per_pair"] = self.per_pair
87
+ if self.metadata:
88
+ out["metadata"] = self.metadata
89
+ return out
90
+
91
+
92
+ def create_error_metric(
93
+ name: str,
94
+ error: str,
95
+ category: str = "uncategorized",
96
+ ) -> MetricResult:
97
+ """Build a poor-quality result representing a computation failure."""
98
+ return MetricResult(
99
+ name=name,
100
+ value=float("inf"),
101
+ quality="poor",
102
+ passed=False,
103
+ category=category,
104
+ interpretation=f"Failed: {error}",
105
+ metadata={"error": error},
106
+ )
107
+
108
+
109
+ @dataclass
110
+ class ValidationReport:
111
+ """Aggregated report of multiple validation metrics.
112
+
113
+ A ValidationReport groups metrics by category, computes a weighted
114
+ overall score, and assigns an overall quality grade.
115
+
116
+ Attributes:
117
+ metrics: Dict mapping metric name to MetricResult.
118
+ weights: Dict mapping metric name to absolute weight (fractions
119
+ should sum to ~1 over included metrics; missing metrics
120
+ contribute 0).
121
+ category_weights: Dict mapping category to category weight.
122
+ overall_score: Weighted sum of metric scores in [0, 1].
123
+ overall_quality: One of "excellent" (>=0.85), "good" (>=0.65),
124
+ "acceptable" (>=0.45), "poor" otherwise.
125
+ """
126
+
127
+ metrics: dict[str, MetricResult] = field(default_factory=dict)
128
+ weights: dict[str, float] = field(default_factory=dict)
129
+ category_weights: dict[str, float] = field(default_factory=dict)
130
+
131
+ @property
132
+ def overall_score(self) -> float:
133
+ """Weighted average of metric scores. Weights for missing metrics
134
+ contribute zero, which penalizes incomplete validation runs."""
135
+ if not self.metrics or not self.weights:
136
+ return 0.0
137
+ total_weight = sum(self.weights.values())
138
+ if total_weight == 0:
139
+ return 0.0
140
+ weighted = sum(
141
+ m.score * self.weights.get(m.name, 0.0) for m in self.metrics.values()
142
+ )
143
+ return weighted / total_weight
144
+
145
+ @property
146
+ def overall_quality(self) -> str:
147
+ """Quality grade based on overall_score."""
148
+ s = self.overall_score
149
+ if s >= 0.85:
150
+ return "excellent"
151
+ if s >= 0.65:
152
+ return "good"
153
+ if s >= 0.45:
154
+ return "acceptable"
155
+ return "poor"
156
+
157
+ @property
158
+ def pass_rate(self) -> float:
159
+ """Fraction of metrics with quality >= acceptable."""
160
+ if not self.metrics:
161
+ return 0.0
162
+ passed = sum(1 for m in self.metrics.values() if m.passed)
163
+ return passed / len(self.metrics)
164
+
165
+ def by_category(self) -> dict[str, list[MetricResult]]:
166
+ """Group metrics by category."""
167
+ out: dict[str, list[MetricResult]] = {}
168
+ for m in self.metrics.values():
169
+ out.setdefault(m.category, []).append(m)
170
+ return out
171
+
172
+ def summary(self) -> str:
173
+ """Human-readable one-page summary."""
174
+ lines = [
175
+ f"finval ValidationReport — {self.overall_quality.upper()} ({self.overall_score:.0%})",
176
+ f" metrics: {len(self.metrics)}, passed: {int(self.pass_rate * len(self.metrics))}/{len(self.metrics)}",
177
+ "",
178
+ ]
179
+ for category, mlist in self.by_category().items():
180
+ lines.append(f" [{category}]")
181
+ for m in sorted(mlist, key=lambda x: x.name):
182
+ status = "PASS" if m.passed else "FAIL"
183
+ ci = ""
184
+ if m.ci_low is not None:
185
+ ci = f" ({m.ci_low:.3f}–{m.ci_high:.3f})"
186
+ lines.append(
187
+ f" {status} {m.name:28s} value={m.value:7.4f}{ci} {m.quality}"
188
+ )
189
+ lines.append("")
190
+ return "\n".join(lines)
191
+
192
+ def to_dict(self) -> dict[str, Any]:
193
+ """Serialize to a plain dict (JSON-safe)."""
194
+ return {
195
+ "overall_score": self.overall_score,
196
+ "overall_quality": self.overall_quality,
197
+ "pass_rate": self.pass_rate,
198
+ "metrics": {name: m.to_dict() for name, m in self.metrics.items()},
199
+ "weights": dict(self.weights),
200
+ "category_weights": dict(self.category_weights),
201
+ }