qflex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qflex/README.md +66 -0
- qflex/__init__.py +26 -0
- qflex/basis.py +263 -0
- qflex/constraints.py +555 -0
- qflex/core.py +430 -0
- qflex/mono_verification.py +141 -0
- qflex/transforms.py +298 -0
- qflex/utils.py +344 -0
- qflex-1.0.0.dist-info/METADATA +300 -0
- qflex-1.0.0.dist-info/RECORD +12 -0
- qflex-1.0.0.dist-info/WHEEL +4 -0
- qflex-1.0.0.dist-info/licenses/LICENSE +21 -0
qflex/README.md
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# QFlex Distributions
|
|
2
|
+
|
|
3
|
+
A modular Python implementation of QFlex quantile-parameterized distributions with flexible basis functions.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
QFlex is a distribution family that uses custom basis functions to fit quantile data. It supports unbounded, semibounded, and bounded distributions through log and logit transforms, with optional constraints to ensure valid probability densities.
|
|
8
|
+
|
|
9
|
+
## Module Structure
|
|
10
|
+
|
|
11
|
+
| File | Purpose |
|
|
12
|
+
|------|---------|
|
|
13
|
+
| `basis.py` | Basis function definitions and evaluation (constant, tail, center families) |
|
|
14
|
+
| `constraints.py` | Constraint solvers for coefficient estimation (Propositions 3–5) |
|
|
15
|
+
| `core.py` | Main `QFlex` class for unbounded distributions |
|
|
16
|
+
| `transforms.py` | `LogQFlex` (semibounded) and `LogitQFlex` (bounded) variants |
|
|
17
|
+
| `utils.py` | Gamma calculation, PDF/CDF computation, moments, and W1 distance |
|
|
18
|
+
| `mono_verification.py` | Proposition 4 verification and monotonicity checks |
|
|
19
|
+
| `__init__.py` | Public API exports |
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from qflex import QFlex, LogQFlex, LogitQFlex, ConstraintType
|
|
25
|
+
|
|
26
|
+
# Fit an unbounded distribution
|
|
27
|
+
qflex = QFlex(x_data, y_data, terms=5)
|
|
28
|
+
|
|
29
|
+
# With non-negativity constraints on coefficients
|
|
30
|
+
qflex = QFlex(x_data, y_data, terms=5, constraint_type=ConstraintType.A)
|
|
31
|
+
|
|
32
|
+
# Semibounded distribution (e.g., income, time-to-event)
|
|
33
|
+
log_qflex = LogQFlex(x_data, y_data, lower_bound=0, terms=5)
|
|
34
|
+
|
|
35
|
+
# Bounded distribution (e.g., proportions, percentages)
|
|
36
|
+
logit_qflex = LogitQFlex(x_data, y_data, lower_bound=0, upper_bound=1, terms=5)
|
|
37
|
+
|
|
38
|
+
# Verify Proposition 4 conditions
|
|
39
|
+
result = qflex.check_proposition4()
|
|
40
|
+
print(f"Satisfied: {result['satisfied']}, Margin: {result['margin']:.4f}")
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Constraint Types
|
|
44
|
+
|
|
45
|
+
| Type | Description |
|
|
46
|
+
|------|-------------|
|
|
47
|
+
| `NONE` | Unconstrained least squares |
|
|
48
|
+
| `A` | All coefficients non-negative (k ≥ 2) |
|
|
49
|
+
| `TL` | Leading tail coefficients non-negative |
|
|
50
|
+
| `TA` | All tail coefficients non-negative |
|
|
51
|
+
| `TC` | Proposition 5: tail-center margin constraint |
|
|
52
|
+
| `TC_MAG` | Proposition 4: m_tail > M_center on grid |
|
|
53
|
+
|
|
54
|
+
## Basis Functions
|
|
55
|
+
|
|
56
|
+
QFlex uses three basis function families:
|
|
57
|
+
|
|
58
|
+
- **Right tail (f1)**: `-ln(1-p)` raised to powers 1, 2, 3, ...
|
|
59
|
+
- **Left tail (f2)**: `(-1)^(i+1) × [ln(p)]^i` for orders 1, 2, 3, ...
|
|
60
|
+
- **Center (f3)**: `(p - γ)^(2i-1)` for odd powers 1, 3, 5, ...
|
|
61
|
+
|
|
62
|
+
The gamma (γ) parameter controls the center of the distribution and is estimated from the data using P10, P50, and P90 quantiles.
|
|
63
|
+
|
|
64
|
+
## References
|
|
65
|
+
|
|
66
|
+
See the QFlex paper for theoretical background on the basis functions, propositions, and constraint formulations.
|
qflex/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QFlex Distributions
|
|
3
|
+
|
|
4
|
+
A flexible quantile-parameterized distribution family with support for
|
|
5
|
+
unbounded, semibounded, and bounded domains, plus optional constraints
|
|
6
|
+
to ensure valid probability densities.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .core import QFlex
|
|
10
|
+
from .transforms import LogQFlex, LogitQFlex
|
|
11
|
+
from .constraints import ConstraintType, QFlexError
|
|
12
|
+
from .mono_verification import check_proposition4, check_delta_p_monotonicity
|
|
13
|
+
from .utils import compute_w1
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
'QFlex',
|
|
17
|
+
'LogQFlex',
|
|
18
|
+
'LogitQFlex',
|
|
19
|
+
'QFlexError',
|
|
20
|
+
'ConstraintType',
|
|
21
|
+
'check_proposition4',
|
|
22
|
+
'check_delta_p_monotonicity',
|
|
23
|
+
'compute_w1',
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
__version__ = '1.0.0'
|
qflex/basis.py
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""
|
|
2
|
+
QFlex Basis Functions
|
|
3
|
+
|
|
4
|
+
Defines the three basis function families used in QFlex:
|
|
5
|
+
- Constant term (a_1)
|
|
6
|
+
- Right tail: [-ln(1-p)]^i for i = 1, 2, 3, ...
|
|
7
|
+
- Left tail: (-1)^(i+1) × [ln(p)]^i for i = 1, 2, 3, ...
|
|
8
|
+
- Center: (p - γ)^(2i-1) for i = 1, 2, 3, ... (odd powers)
|
|
9
|
+
|
|
10
|
+
Derivatives are computed analytically for efficiency in Proposition 4/5 verification.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
from typing import List, Tuple
|
|
15
|
+
from enum import Enum
|
|
16
|
+
|
|
17
|
+
# Small epsilon to avoid log(0) and log(1) at boundaries
|
|
18
|
+
PROB_EPS = 1e-12
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class BasisType(Enum):
|
|
22
|
+
"""Identifies the basis function family."""
|
|
23
|
+
CONSTANT = "constant"
|
|
24
|
+
F1_TAIL_RIGHT = "f1"
|
|
25
|
+
F2_TAIL_LEFT = "f2"
|
|
26
|
+
F3_CENTER = "f3"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def get_term_structure(terms: int) -> List[Tuple[BasisType, int]]:
|
|
30
|
+
"""
|
|
31
|
+
Build the sequence of (basis_type, order) pairs for a given number of terms.
|
|
32
|
+
|
|
33
|
+
The pattern cycles through (constant, f1, f2, f3) at increasing orders:
|
|
34
|
+
terms=4: constant, f1^1, f2^1, f3^1
|
|
35
|
+
terms=7: constant, f1^1, f2^1, f3^1, f1^2, f2^2, f3^2
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
terms : int
|
|
40
|
+
Total number of terms in the expansion.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
structure : list of (BasisType, int)
|
|
45
|
+
Ordered list of basis types and their orders.
|
|
46
|
+
"""
|
|
47
|
+
if terms < 1:
|
|
48
|
+
raise ValueError("terms must be >= 1")
|
|
49
|
+
|
|
50
|
+
structure = [(BasisType.CONSTANT, 0)]
|
|
51
|
+
order = 1
|
|
52
|
+
idx = 1
|
|
53
|
+
|
|
54
|
+
while idx < terms:
|
|
55
|
+
if idx < terms:
|
|
56
|
+
structure.append((BasisType.F1_TAIL_RIGHT, order))
|
|
57
|
+
idx += 1
|
|
58
|
+
if idx < terms:
|
|
59
|
+
structure.append((BasisType.F2_TAIL_LEFT, order))
|
|
60
|
+
idx += 1
|
|
61
|
+
if idx < terms:
|
|
62
|
+
structure.append((BasisType.F3_CENTER, order))
|
|
63
|
+
idx += 1
|
|
64
|
+
order += 1
|
|
65
|
+
|
|
66
|
+
return structure
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def evaluate_basis(y: np.ndarray, basis_type: BasisType, order: int, gamma: float) -> np.ndarray:
|
|
70
|
+
"""
|
|
71
|
+
Evaluate a single basis function at the given probability values.
|
|
72
|
+
|
|
73
|
+
Parameters
|
|
74
|
+
----------
|
|
75
|
+
y : array
|
|
76
|
+
Cumulative probabilities in (0, 1).
|
|
77
|
+
basis_type : BasisType
|
|
78
|
+
Which basis family to evaluate.
|
|
79
|
+
order : int
|
|
80
|
+
Order of the basis function (1, 2, 3, ...).
|
|
81
|
+
gamma : float
|
|
82
|
+
Center parameter for f3 basis (ignored for other types).
|
|
83
|
+
|
|
84
|
+
Returns
|
|
85
|
+
-------
|
|
86
|
+
values : array
|
|
87
|
+
Basis function evaluated at each y.
|
|
88
|
+
"""
|
|
89
|
+
y = np.clip(np.asarray(y, dtype=float), PROB_EPS, 1 - PROB_EPS)
|
|
90
|
+
|
|
91
|
+
if basis_type == BasisType.CONSTANT:
|
|
92
|
+
return np.ones_like(y)
|
|
93
|
+
|
|
94
|
+
elif basis_type == BasisType.F1_TAIL_RIGHT:
|
|
95
|
+
# [-ln(1-y)]^order
|
|
96
|
+
return (-np.log(1 - y)) ** order
|
|
97
|
+
|
|
98
|
+
elif basis_type == BasisType.F2_TAIL_LEFT:
|
|
99
|
+
# (-1)^(order+1) × [ln(y)]^order
|
|
100
|
+
sign = (-1) ** (order + 1)
|
|
101
|
+
return sign * (np.log(y) ** order)
|
|
102
|
+
|
|
103
|
+
elif basis_type == BasisType.F3_CENTER:
|
|
104
|
+
# (y - γ)^(2*order - 1)
|
|
105
|
+
return (y - gamma) ** (2 * order - 1)
|
|
106
|
+
|
|
107
|
+
else:
|
|
108
|
+
raise ValueError(f"Unknown basis type: {basis_type}")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def evaluate_basis_derivative(y: np.ndarray, basis_type: BasisType, order: int, gamma: float) -> np.ndarray:
|
|
112
|
+
"""
|
|
113
|
+
Compute d/dy of a basis function (used for q(y) = dQ/dy).
|
|
114
|
+
|
|
115
|
+
Analytical derivatives are more efficient than finite differences
|
|
116
|
+
when evaluating on dense grids for Proposition verification.
|
|
117
|
+
|
|
118
|
+
Parameters
|
|
119
|
+
----------
|
|
120
|
+
y : array
|
|
121
|
+
Cumulative probabilities in (0, 1).
|
|
122
|
+
basis_type : BasisType
|
|
123
|
+
Which basis family.
|
|
124
|
+
order : int
|
|
125
|
+
Order of the basis function.
|
|
126
|
+
gamma : float
|
|
127
|
+
Center parameter for f3 basis.
|
|
128
|
+
|
|
129
|
+
Returns
|
|
130
|
+
-------
|
|
131
|
+
derivatives : array
|
|
132
|
+
Derivative of basis function at each y.
|
|
133
|
+
"""
|
|
134
|
+
y = np.clip(np.asarray(y, dtype=float), PROB_EPS, 1 - PROB_EPS)
|
|
135
|
+
|
|
136
|
+
if basis_type == BasisType.CONSTANT:
|
|
137
|
+
return np.zeros_like(y)
|
|
138
|
+
|
|
139
|
+
elif basis_type == BasisType.F1_TAIL_RIGHT:
|
|
140
|
+
# d/dy [-ln(1-y)]^order = order × [-ln(1-y)]^(order-1) / (1-y)
|
|
141
|
+
u = -np.log(1 - y)
|
|
142
|
+
denom = np.clip(1 - y, 1e-10, None)
|
|
143
|
+
if order == 1:
|
|
144
|
+
return np.ones_like(y) / denom
|
|
145
|
+
else:
|
|
146
|
+
u = np.clip(u, 0, None)
|
|
147
|
+
return order * (u ** (order - 1)) / denom
|
|
148
|
+
|
|
149
|
+
elif basis_type == BasisType.F2_TAIL_LEFT:
|
|
150
|
+
# d/dy [(-1)^(order+1) × ln(y)^order] = order × (-1)^(order+1) × ln(y)^(order-1) / y
|
|
151
|
+
sign = (-1) ** (order + 1)
|
|
152
|
+
v = np.log(y)
|
|
153
|
+
y_clipped = np.clip(y, 1e-10, None)
|
|
154
|
+
if order == 1:
|
|
155
|
+
return sign / y_clipped
|
|
156
|
+
else:
|
|
157
|
+
if sign < 0:
|
|
158
|
+
v_clipped = np.clip(v, None, 0)
|
|
159
|
+
else:
|
|
160
|
+
v_clipped = np.clip(v, 0, None)
|
|
161
|
+
return order * sign * (v_clipped ** (order - 1)) / y_clipped
|
|
162
|
+
|
|
163
|
+
elif basis_type == BasisType.F3_CENTER:
|
|
164
|
+
# d/dy (y-γ)^(2*order-1) = (2*order-1) × (y-γ)^(2*order-2)
|
|
165
|
+
return (2 * order - 1) * ((y - gamma) ** (2 * order - 2))
|
|
166
|
+
|
|
167
|
+
else:
|
|
168
|
+
raise ValueError(f"Unknown basis type: {basis_type}")
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def build_design_matrix(y_data: np.ndarray, terms: int, gamma: float) -> np.ndarray:
|
|
172
|
+
"""
|
|
173
|
+
Construct the design matrix Y where Y @ coefficients gives quantile values.
|
|
174
|
+
|
|
175
|
+
Parameters
|
|
176
|
+
----------
|
|
177
|
+
y_data : array of shape (m,)
|
|
178
|
+
Cumulative probabilities.
|
|
179
|
+
terms : int
|
|
180
|
+
Number of basis terms.
|
|
181
|
+
gamma : float
|
|
182
|
+
Center parameter.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
Y : array of shape (m, terms)
|
|
187
|
+
Design matrix with Y[i, j] = basis_j(y_data[i]).
|
|
188
|
+
"""
|
|
189
|
+
y_data = np.clip(np.asarray(y_data, dtype=float), PROB_EPS, 1 - PROB_EPS)
|
|
190
|
+
structure = get_term_structure(terms)
|
|
191
|
+
m = len(y_data)
|
|
192
|
+
Y = np.zeros((m, terms))
|
|
193
|
+
|
|
194
|
+
for col_idx, (basis_type, order) in enumerate(structure):
|
|
195
|
+
Y[:, col_idx] = evaluate_basis(y_data, basis_type, order, gamma)
|
|
196
|
+
|
|
197
|
+
return Y
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def evaluate_quantile(y: np.ndarray, coefficients: np.ndarray, terms: int, gamma: float) -> np.ndarray:
|
|
201
|
+
"""
|
|
202
|
+
Evaluate the quantile function Q(y) = Σ a_k × basis_k(y).
|
|
203
|
+
|
|
204
|
+
Parameters
|
|
205
|
+
----------
|
|
206
|
+
y : array
|
|
207
|
+
Cumulative probabilities.
|
|
208
|
+
coefficients : array of shape (terms,)
|
|
209
|
+
Fitted coefficients.
|
|
210
|
+
terms : int
|
|
211
|
+
Number of terms.
|
|
212
|
+
gamma : float
|
|
213
|
+
Center parameter.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
quantiles : array
|
|
218
|
+
Q(y) values.
|
|
219
|
+
"""
|
|
220
|
+
y = np.clip(np.asarray(y, dtype=float), PROB_EPS, 1 - PROB_EPS)
|
|
221
|
+
structure = get_term_structure(terms)
|
|
222
|
+
result = np.zeros_like(y)
|
|
223
|
+
|
|
224
|
+
for idx, (basis_type, order) in enumerate(structure):
|
|
225
|
+
if idx < len(coefficients):
|
|
226
|
+
result += coefficients[idx] * evaluate_basis(y, basis_type, order, gamma)
|
|
227
|
+
|
|
228
|
+
return result
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def evaluate_quantile_derivative(y: np.ndarray, coefficients: np.ndarray, terms: int, gamma: float) -> np.ndarray:
|
|
232
|
+
"""
|
|
233
|
+
Evaluate q(y) = dQ/dy = Σ a_k × basis_k'(y).
|
|
234
|
+
|
|
235
|
+
This is the derivative of the quantile function, used for:
|
|
236
|
+
- Proposition 4/5 verification
|
|
237
|
+
- Analytical PDF computation (as an alternative to numerical differentiation)
|
|
238
|
+
|
|
239
|
+
Parameters
|
|
240
|
+
----------
|
|
241
|
+
y : array
|
|
242
|
+
Cumulative probabilities.
|
|
243
|
+
coefficients : array of shape (terms,)
|
|
244
|
+
Fitted coefficients.
|
|
245
|
+
terms : int
|
|
246
|
+
Number of terms.
|
|
247
|
+
gamma : float
|
|
248
|
+
Center parameter.
|
|
249
|
+
|
|
250
|
+
Returns
|
|
251
|
+
-------
|
|
252
|
+
derivatives : array
|
|
253
|
+
q(y) = dQ/dy values.
|
|
254
|
+
"""
|
|
255
|
+
y = np.clip(np.asarray(y, dtype=float), PROB_EPS, 1 - PROB_EPS)
|
|
256
|
+
structure = get_term_structure(terms)
|
|
257
|
+
result = np.zeros_like(y)
|
|
258
|
+
|
|
259
|
+
for idx, (basis_type, order) in enumerate(structure):
|
|
260
|
+
if idx < len(coefficients):
|
|
261
|
+
result += coefficients[idx] * evaluate_basis_derivative(y, basis_type, order, gamma)
|
|
262
|
+
|
|
263
|
+
return result
|