qflex 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- qflex-1.0.0/.gitignore +31 -0
- qflex-1.0.0/LICENSE +21 -0
- qflex-1.0.0/PKG-INFO +300 -0
- qflex-1.0.0/README.md +250 -0
- qflex-1.0.0/pyproject.toml +49 -0
- qflex-1.0.0/qflex/README.md +66 -0
- qflex-1.0.0/qflex/__init__.py +26 -0
- qflex-1.0.0/qflex/basis.py +263 -0
- qflex-1.0.0/qflex/constraints.py +555 -0
- qflex-1.0.0/qflex/core.py +430 -0
- qflex-1.0.0/qflex/mono_verification.py +141 -0
- qflex-1.0.0/qflex/transforms.py +298 -0
- qflex-1.0.0/qflex/utils.py +344 -0
- qflex-1.0.0/tests/__init__.py +0 -0
- qflex-1.0.0/tests/test_qflex.py +277 -0
qflex-1.0.0/.gitignore
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*.pyo
|
|
5
|
+
*.pyd
|
|
6
|
+
.Python
|
|
7
|
+
|
|
8
|
+
# Distribution / packaging
|
|
9
|
+
dist/
|
|
10
|
+
build/
|
|
11
|
+
*.egg-info/
|
|
12
|
+
*.egg
|
|
13
|
+
MANIFEST
|
|
14
|
+
|
|
15
|
+
# Virtual environments
|
|
16
|
+
.venv/
|
|
17
|
+
venv/
|
|
18
|
+
env/
|
|
19
|
+
|
|
20
|
+
# Testing
|
|
21
|
+
.pytest_cache/
|
|
22
|
+
.coverage
|
|
23
|
+
htmlcov/
|
|
24
|
+
|
|
25
|
+
# OS
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
|
|
29
|
+
# IDEs
|
|
30
|
+
.vscode/
|
|
31
|
+
.idea/
|
qflex-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Rohit Khanna
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
qflex-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: qflex
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: QFlex quantile-parameterized distributions with flexible basis functions
|
|
5
|
+
Project-URL: Repository, https://github.com/rohitkhanna/qflex
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Rohit Khanna
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
License-File: LICENSE
|
|
28
|
+
Keywords: distribution,metalog,probability,quantile,statistics
|
|
29
|
+
Classifier: Development Status :: 4 - Beta
|
|
30
|
+
Classifier: Intended Audience :: Science/Research
|
|
31
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
32
|
+
Classifier: Programming Language :: Python :: 3
|
|
33
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
34
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
35
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
37
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
39
|
+
Requires-Python: >=3.9
|
|
40
|
+
Requires-Dist: numpy>=1.24
|
|
41
|
+
Requires-Dist: scipy>=1.10
|
|
42
|
+
Provides-Extra: dev
|
|
43
|
+
Requires-Dist: build>=1.0; extra == 'dev'
|
|
44
|
+
Requires-Dist: pytest-cov>=4.0; extra == 'dev'
|
|
45
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
46
|
+
Requires-Dist: twine>=4.0; extra == 'dev'
|
|
47
|
+
Provides-Extra: linear
|
|
48
|
+
Requires-Dist: cvxpy>=1.3; extra == 'linear'
|
|
49
|
+
Description-Content-Type: text/markdown
|
|
50
|
+
|
|
51
|
+
# QFlex
|
|
52
|
+
|
|
53
|
+
[](https://badge.fury.io/py/qflex)
|
|
54
|
+
[](https://pypi.org/project/qflex/)
|
|
55
|
+
[](https://opensource.org/licenses/MIT)
|
|
56
|
+
|
|
57
|
+
A Python library implementing **QFlex quantile-parameterized distributions** — a flexible family of probability distributions fit directly from quantile data, with support for unbounded, semibounded, and bounded domains.
|
|
58
|
+
|
|
59
|
+
---
|
|
60
|
+
|
|
61
|
+
## Installation
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install qflex
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
To enable the linear Proposition 5 solver (requires CVXPY):
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
pip install qflex[linear]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Background and Theory
|
|
76
|
+
|
|
77
|
+
### What is QFlex?
|
|
78
|
+
|
|
79
|
+
QFlex is a family of quantile-parameterized distributions (QPDs) that represents a probability distribution through its **quantile function** Q(p) rather than through a PDF or CDF. This makes it especially suited for:
|
|
80
|
+
|
|
81
|
+
- **Expert elicitation** — fit a distribution directly from quantile assessments (e.g. P10, P50, P90)
|
|
82
|
+
- **Bayesian updating** — update quantile-based priors without needing closed-form conjugates
|
|
83
|
+
- **Flexible tail modelling** — the basis structure independently controls left tail, right tail, and center behaviour
|
|
84
|
+
|
|
85
|
+
### The Quantile Function
|
|
86
|
+
|
|
87
|
+
The QFlex quantile function is a linear combination of three families of basis functions:
|
|
88
|
+
|
|
89
|
+
```
|
|
90
|
+
Q(p) = Σ_{j=1}^{m} [ a_j · R_j(p) + b_j · L_j(p) + c_j · C_j(p) ]
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
where the three basis families are:
|
|
94
|
+
|
|
95
|
+
| Family | Formula | Role |
|
|
96
|
+
|---|---|---|
|
|
97
|
+
| **Right tail** R_j(p) | `[-ln(1-p)]^j` | Controls right (upper) tail behaviour |
|
|
98
|
+
| **Left tail** L_j(p) | `(-1)^(j+1) · [ln(p)]^j` | Controls left (lower) tail behaviour |
|
|
99
|
+
| **Center** C_j(p) | `(p - γ)^(2j-1)` | Controls centre/body behaviour |
|
|
100
|
+
|
|
101
|
+
The total number of coefficients is determined by `terms`, which sets the depth of each family.
|
|
102
|
+
|
|
103
|
+
### The Gamma (γ) Parameter
|
|
104
|
+
|
|
105
|
+
γ is an internal location parameter that shifts the centre basis to match the skewness of the data. It is estimated automatically from the empirical P10, P50, and P90 of the input:
|
|
106
|
+
|
|
107
|
+
```
|
|
108
|
+
γ = (P50 - P10) / (P90 - P10)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
A symmetric distribution gives γ ≈ 0.5. Right-skewed data gives γ < 0.5 and left-skewed gives γ > 0.5.
|
|
112
|
+
|
|
113
|
+
### PDF and Feasibility
|
|
114
|
+
|
|
115
|
+
The PDF is obtained from the quantile function via:
|
|
116
|
+
|
|
117
|
+
```
|
|
118
|
+
f(Q(p)) = 1 / q(p) where q(p) = dQ/dp
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
A valid distribution requires q(p) > 0 for all p ∈ (0,1), i.e. Q(p) must be **strictly increasing**. This is not guaranteed by unconstrained least-squares fitting, which motivates the constraint system.
|
|
122
|
+
|
|
123
|
+
### Constraints (Propositions 3–5)
|
|
124
|
+
|
|
125
|
+
The paper establishes sufficient conditions for feasibility:
|
|
126
|
+
|
|
127
|
+
- **Proposition 3 (A / A+)**: Restricting all non-intercept coefficients to be non-negative guarantees a valid PDF.
|
|
128
|
+
- **Proposition 4 (TC_MAG)**: A grid-based condition requiring the minimum derivative contribution from the tail bases to exceed the maximum contribution from the centre basis: `m_tail > M_center`.
|
|
129
|
+
- **Proposition 5 (TC)**: A sharper analytical condition on the ratio of tail-to-centre basis magnitudes, enforced via SLSQP (nonlinear) or a linear auxiliary-variable reformulation.
|
|
130
|
+
|
|
131
|
+
The constraints form a hierarchy from most restrictive (A) to least restrictive (TC), with TC closest to the true feasibility boundary.
|
|
132
|
+
|
|
133
|
+
### Bounded and Semibounded Variants
|
|
134
|
+
|
|
135
|
+
For data that cannot be negative or must lie within a finite range, QFlex is applied to a **transformed** space:
|
|
136
|
+
|
|
137
|
+
| Variant | Transform | Use case |
|
|
138
|
+
|---|---|---|
|
|
139
|
+
| `LogQFlex` | `z = ln(x - L)` | x ∈ (L, +∞) — income, prices, durations |
|
|
140
|
+
| `LogitQFlex` | `z = ln((x-L)/(U-x))` | x ∈ (L, U) — proportions, scores, rates |
|
|
141
|
+
|
|
142
|
+
Fitting happens on z; all outputs are mapped back to the original x scale.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## Quick Start
|
|
147
|
+
|
|
148
|
+
### From quantile pairs (expert elicitation)
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
import numpy as np
|
|
152
|
+
from qflex import QFlex, LogQFlex, LogitQFlex, ConstraintType
|
|
153
|
+
|
|
154
|
+
# Quantile assessments from an expert or empirical summary
|
|
155
|
+
y_data = [0.10, 0.25, 0.50, 0.75, 0.90] # cumulative probabilities
|
|
156
|
+
x_data = [12.0, 18.0, 25.0, 34.0, 45.0] # corresponding quantile values
|
|
157
|
+
|
|
158
|
+
qf = QFlex(x_data, y_data, terms=5)
|
|
159
|
+
|
|
160
|
+
print(qf.quantile([0.1, 0.5, 0.9])) # → [12.0, 25.0, 45.0]
|
|
161
|
+
print(qf.pdf([0.1, 0.5, 0.9])) # density at those quantile points
|
|
162
|
+
print(qf.cdf([20.0, 25.0, 30.0])) # CDF at x values
|
|
163
|
+
|
|
164
|
+
samples = qf.sample(size=1000)
|
|
165
|
+
|
|
166
|
+
m = qf.moments(order=4)
|
|
167
|
+
print(m['mean'], m['std'], m['skewness'], m['kurtosis'])
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
### From raw data (Weibull plotting positions)
|
|
171
|
+
|
|
172
|
+
```python
|
|
173
|
+
import numpy as np
|
|
174
|
+
from qflex import QFlex, LogQFlex, LogitQFlex
|
|
175
|
+
|
|
176
|
+
data = np.random.lognormal(mean=3, sigma=0.5, size=200)
|
|
177
|
+
|
|
178
|
+
# Sorts data, assigns y_i = i/(n+1), then fits
|
|
179
|
+
qf = QFlex.fit_from_data(data, terms=5)
|
|
180
|
+
log_qf = LogQFlex.fit_from_data(data, lower_bound=0, terms=5)
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Summarise and plot
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
qf.summary() # prints formatted table of moments, percentiles, coefficients
|
|
187
|
+
|
|
188
|
+
fig, axes = qf.plot() # PDF (left) + quantile function (right)
|
|
189
|
+
fig.savefig('fit.png')
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
---
|
|
193
|
+
|
|
194
|
+
## Distribution Variants
|
|
195
|
+
|
|
196
|
+
### Unbounded: `QFlex`
|
|
197
|
+
|
|
198
|
+
For data with no natural bounds (e.g. log-returns, temperature anomalies).
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
qf = QFlex(x_data, y_data, terms=5)
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Semibounded: `LogQFlex`
|
|
205
|
+
|
|
206
|
+
For data with a lower bound (e.g. income, time-to-event, asset prices).
|
|
207
|
+
Internally fits QFlex to `ln(x - lower_bound)`.
|
|
208
|
+
|
|
209
|
+
```python
|
|
210
|
+
qf = LogQFlex(x_data, y_data, lower_bound=0, terms=5)
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
### Bounded: `LogitQFlex`
|
|
214
|
+
|
|
215
|
+
For data bounded on both sides (e.g. proportions, percentages, test scores).
|
|
216
|
+
Internally fits QFlex to `logit((x - L) / (U - L))`.
|
|
217
|
+
|
|
218
|
+
```python
|
|
219
|
+
qf = LogitQFlex(x_data, y_data, lower_bound=0, upper_bound=1, terms=5)
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
---
|
|
223
|
+
|
|
224
|
+
## Constraint Types
|
|
225
|
+
|
|
226
|
+
| Constraint | Description | Restrictiveness |
|
|
227
|
+
|---|---|---|
|
|
228
|
+
| `ConstraintType.NONE` | Unconstrained least squares (default) | — |
|
|
229
|
+
| `ConstraintType.A` | All coefficients ≥ 0 for k ≥ 2 (Prop 3) | Most restrictive |
|
|
230
|
+
| `ConstraintType.TL` | Leading tail coefficients ≥ 0 | High |
|
|
231
|
+
| `ConstraintType.TA` | All tail coefficients ≥ 0 | Medium |
|
|
232
|
+
| `ConstraintType.TC` | Prop 5 tail-centre margin > 0 via SLSQP | Low |
|
|
233
|
+
| `ConstraintType.TC_MAG` | Prop 4 grid-based m_tail > M_center | Least restrictive |
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
qf = QFlex(x_data, y_data, terms=5, constraint_type=ConstraintType.TC_MAG)
|
|
237
|
+
|
|
238
|
+
# TC with linear reformulation (requires cvxpy)
|
|
239
|
+
qf = QFlex(x_data, y_data, terms=5,
|
|
240
|
+
constraint_type=ConstraintType.TC, tc_method='linear')
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
---
|
|
244
|
+
|
|
245
|
+
## API Reference
|
|
246
|
+
|
|
247
|
+
All three classes (`QFlex`, `LogQFlex`, `LogitQFlex`) share the following interface:
|
|
248
|
+
|
|
249
|
+
### Constructor
|
|
250
|
+
|
|
251
|
+
| Class | Signature |
|
|
252
|
+
|---|---|
|
|
253
|
+
| `QFlex` | `QFlex(x_data, y_data, terms=5, constraint_type=..., tc_method='nonlinear')` |
|
|
254
|
+
| `LogQFlex` | `LogQFlex(x_data, y_data, lower_bound, terms=5, ...)` |
|
|
255
|
+
| `LogitQFlex` | `LogitQFlex(x_data, y_data, lower_bound, upper_bound, terms=5, ...)` |
|
|
256
|
+
|
|
257
|
+
### Instance Methods
|
|
258
|
+
|
|
259
|
+
| Method | Input | Output | Notes |
|
|
260
|
+
|---|---|---|---|
|
|
261
|
+
| `quantile(y)` | `y ∈ (0,1)` | x values | Core quantile function Q(p) |
|
|
262
|
+
| `pdf(y, method='numerical')` | `y ∈ (0,1)` | density values | Use `method='analytical'` for closed-form (QFlex only) |
|
|
263
|
+
| `cdf(x)` | x values | `p ∈ (0,1)` | Inverts Q(p) numerically |
|
|
264
|
+
| `sample(size=1)` | int | `np.ndarray` | Inverse transform sampling |
|
|
265
|
+
| `moments(order=4)` | int | `dict` | Keys: `mean`, `variance`, `std`, `skewness`, `kurtosis`, `raw_k`, `central_k` |
|
|
266
|
+
| `summary()` | — | printed table | Terms, γ, feasibility, moments, P10/P50/P90, coefficients |
|
|
267
|
+
| `plot(p_grid, show_data, ax)` | optional | `(fig, axes)` | PDF panel + quantile function panel |
|
|
268
|
+
| `check_proposition4()` | — | `dict` | Keys: `satisfied`, `m_tail`, `M_center`, `margin`, `q_flex_min`, `q_flex_positive` |
|
|
269
|
+
|
|
270
|
+
### Class Method
|
|
271
|
+
|
|
272
|
+
| Method | Description |
|
|
273
|
+
|---|---|
|
|
274
|
+
| `fit_from_data(data, terms=5, constraint_type=..., **kwargs)` | Fit from raw observations using Weibull plotting positions `y_i = i/(n+1)` |
|
|
275
|
+
|
|
276
|
+
### Utility
|
|
277
|
+
|
|
278
|
+
```python
|
|
279
|
+
from qflex.utils import compute_w1
|
|
280
|
+
|
|
281
|
+
w1, w1_norm = compute_w1(qf.quantile, x_data, y_data)
|
|
282
|
+
# w1 → Wasserstein-1 distance between fitted and target quantile functions
|
|
283
|
+
# w1_norm → W1 normalised by (P90 - P10) of the data
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
---
|
|
287
|
+
|
|
288
|
+
## Reference
|
|
289
|
+
|
|
290
|
+
> ⚠️ **TODO — fill in before publishing:**
|
|
291
|
+
>
|
|
292
|
+
> [Author(s)]. "[Paper Title]." *Journal/Conference*, vol. X, no. Y, Year, pp. Z–Z. DOI: [doi link]
|
|
293
|
+
|
|
294
|
+
This implementation follows the basis functions, gamma estimation, and Propositions 3–5 as described in the paper above.
|
|
295
|
+
|
|
296
|
+
---
|
|
297
|
+
|
|
298
|
+
## License
|
|
299
|
+
|
|
300
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
qflex-1.0.0/README.md
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# QFlex
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/py/qflex)
|
|
4
|
+
[](https://pypi.org/project/qflex/)
|
|
5
|
+
[](https://opensource.org/licenses/MIT)
|
|
6
|
+
|
|
7
|
+
A Python library implementing **QFlex quantile-parameterized distributions** — a flexible family of probability distributions fit directly from quantile data, with support for unbounded, semibounded, and bounded domains.
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install qflex
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
To enable the linear Proposition 5 solver (requires CVXPY):
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install qflex[linear]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
## Background and Theory
|
|
26
|
+
|
|
27
|
+
### What is QFlex?
|
|
28
|
+
|
|
29
|
+
QFlex is a family of quantile-parameterized distributions (QPDs) that represents a probability distribution through its **quantile function** Q(p) rather than through a PDF or CDF. This makes it especially suited for:
|
|
30
|
+
|
|
31
|
+
- **Expert elicitation** — fit a distribution directly from quantile assessments (e.g. P10, P50, P90)
|
|
32
|
+
- **Bayesian updating** — update quantile-based priors without needing closed-form conjugates
|
|
33
|
+
- **Flexible tail modelling** — the basis structure independently controls left tail, right tail, and center behaviour
|
|
34
|
+
|
|
35
|
+
### The Quantile Function
|
|
36
|
+
|
|
37
|
+
The QFlex quantile function is a linear combination of three families of basis functions:
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
Q(p) = Σ_{j=1}^{m} [ a_j · R_j(p) + b_j · L_j(p) + c_j · C_j(p) ]
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
where the three basis families are:
|
|
44
|
+
|
|
45
|
+
| Family | Formula | Role |
|
|
46
|
+
|---|---|---|
|
|
47
|
+
| **Right tail** R_j(p) | `[-ln(1-p)]^j` | Controls right (upper) tail behaviour |
|
|
48
|
+
| **Left tail** L_j(p) | `(-1)^(j+1) · [ln(p)]^j` | Controls left (lower) tail behaviour |
|
|
49
|
+
| **Center** C_j(p) | `(p - γ)^(2j-1)` | Controls centre/body behaviour |
|
|
50
|
+
|
|
51
|
+
The total number of coefficients is determined by `terms`, which sets the depth of each family.
|
|
52
|
+
|
|
53
|
+
### The Gamma (γ) Parameter
|
|
54
|
+
|
|
55
|
+
γ is an internal location parameter that shifts the centre basis to match the skewness of the data. It is estimated automatically from the empirical P10, P50, and P90 of the input:
|
|
56
|
+
|
|
57
|
+
```
|
|
58
|
+
γ = (P50 - P10) / (P90 - P10)
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
A symmetric distribution gives γ ≈ 0.5. Right-skewed data gives γ < 0.5 and left-skewed gives γ > 0.5.
|
|
62
|
+
|
|
63
|
+
### PDF and Feasibility
|
|
64
|
+
|
|
65
|
+
The PDF is obtained from the quantile function via:
|
|
66
|
+
|
|
67
|
+
```
|
|
68
|
+
f(Q(p)) = 1 / q(p) where q(p) = dQ/dp
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
A valid distribution requires q(p) > 0 for all p ∈ (0,1), i.e. Q(p) must be **strictly increasing**. This is not guaranteed by unconstrained least-squares fitting, which motivates the constraint system.
|
|
72
|
+
|
|
73
|
+
### Constraints (Propositions 3–5)
|
|
74
|
+
|
|
75
|
+
The paper establishes sufficient conditions for feasibility:
|
|
76
|
+
|
|
77
|
+
- **Proposition 3 (A / A+)**: Restricting all non-intercept coefficients to be non-negative guarantees a valid PDF.
|
|
78
|
+
- **Proposition 4 (TC_MAG)**: A grid-based condition requiring the minimum derivative contribution from the tail bases to exceed the maximum contribution from the centre basis: `m_tail > M_center`.
|
|
79
|
+
- **Proposition 5 (TC)**: A sharper analytical condition on the ratio of tail-to-centre basis magnitudes, enforced via SLSQP (nonlinear) or a linear auxiliary-variable reformulation.
|
|
80
|
+
|
|
81
|
+
The constraints form a hierarchy from most restrictive (A) to least restrictive (TC), with TC closest to the true feasibility boundary.
|
|
82
|
+
|
|
83
|
+
### Bounded and Semibounded Variants
|
|
84
|
+
|
|
85
|
+
For data that cannot be negative or must lie within a finite range, QFlex is applied to a **transformed** space:
|
|
86
|
+
|
|
87
|
+
| Variant | Transform | Use case |
|
|
88
|
+
|---|---|---|
|
|
89
|
+
| `LogQFlex` | `z = ln(x - L)` | x ∈ (L, +∞) — income, prices, durations |
|
|
90
|
+
| `LogitQFlex` | `z = ln((x-L)/(U-x))` | x ∈ (L, U) — proportions, scores, rates |
|
|
91
|
+
|
|
92
|
+
Fitting happens on z; all outputs are mapped back to the original x scale.
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Quick Start
|
|
97
|
+
|
|
98
|
+
### From quantile pairs (expert elicitation)
|
|
99
|
+
|
|
100
|
+
```python
|
|
101
|
+
import numpy as np
|
|
102
|
+
from qflex import QFlex, LogQFlex, LogitQFlex, ConstraintType
|
|
103
|
+
|
|
104
|
+
# Quantile assessments from an expert or empirical summary
|
|
105
|
+
y_data = [0.10, 0.25, 0.50, 0.75, 0.90] # cumulative probabilities
|
|
106
|
+
x_data = [12.0, 18.0, 25.0, 34.0, 45.0] # corresponding quantile values
|
|
107
|
+
|
|
108
|
+
qf = QFlex(x_data, y_data, terms=5)
|
|
109
|
+
|
|
110
|
+
print(qf.quantile([0.1, 0.5, 0.9])) # → [12.0, 25.0, 45.0]
|
|
111
|
+
print(qf.pdf([0.1, 0.5, 0.9])) # density at those quantile points
|
|
112
|
+
print(qf.cdf([20.0, 25.0, 30.0])) # CDF at x values
|
|
113
|
+
|
|
114
|
+
samples = qf.sample(size=1000)
|
|
115
|
+
|
|
116
|
+
m = qf.moments(order=4)
|
|
117
|
+
print(m['mean'], m['std'], m['skewness'], m['kurtosis'])
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### From raw data (Weibull plotting positions)
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
import numpy as np
|
|
124
|
+
from qflex import QFlex, LogQFlex, LogitQFlex
|
|
125
|
+
|
|
126
|
+
data = np.random.lognormal(mean=3, sigma=0.5, size=200)
|
|
127
|
+
|
|
128
|
+
# Sorts data, assigns y_i = i/(n+1), then fits
|
|
129
|
+
qf = QFlex.fit_from_data(data, terms=5)
|
|
130
|
+
log_qf = LogQFlex.fit_from_data(data, lower_bound=0, terms=5)
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Summarise and plot
|
|
134
|
+
|
|
135
|
+
```python
|
|
136
|
+
qf.summary() # prints formatted table of moments, percentiles, coefficients
|
|
137
|
+
|
|
138
|
+
fig, axes = qf.plot() # PDF (left) + quantile function (right)
|
|
139
|
+
fig.savefig('fit.png')
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
---
|
|
143
|
+
|
|
144
|
+
## Distribution Variants
|
|
145
|
+
|
|
146
|
+
### Unbounded: `QFlex`
|
|
147
|
+
|
|
148
|
+
For data with no natural bounds (e.g. log-returns, temperature anomalies).
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
qf = QFlex(x_data, y_data, terms=5)
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Semibounded: `LogQFlex`
|
|
155
|
+
|
|
156
|
+
For data with a lower bound (e.g. income, time-to-event, asset prices).
|
|
157
|
+
Internally fits QFlex to `ln(x - lower_bound)`.
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
qf = LogQFlex(x_data, y_data, lower_bound=0, terms=5)
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Bounded: `LogitQFlex`
|
|
164
|
+
|
|
165
|
+
For data bounded on both sides (e.g. proportions, percentages, test scores).
|
|
166
|
+
Internally fits QFlex to `logit((x - L) / (U - L))`.
|
|
167
|
+
|
|
168
|
+
```python
|
|
169
|
+
qf = LogitQFlex(x_data, y_data, lower_bound=0, upper_bound=1, terms=5)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Constraint Types
|
|
175
|
+
|
|
176
|
+
| Constraint | Description | Restrictiveness |
|
|
177
|
+
|---|---|---|
|
|
178
|
+
| `ConstraintType.NONE` | Unconstrained least squares (default) | — |
|
|
179
|
+
| `ConstraintType.A` | All coefficients ≥ 0 for k ≥ 2 (Prop 3) | Most restrictive |
|
|
180
|
+
| `ConstraintType.TL` | Leading tail coefficients ≥ 0 | High |
|
|
181
|
+
| `ConstraintType.TA` | All tail coefficients ≥ 0 | Medium |
|
|
182
|
+
| `ConstraintType.TC` | Prop 5 tail-centre margin > 0 via SLSQP | Low |
|
|
183
|
+
| `ConstraintType.TC_MAG` | Prop 4 grid-based m_tail > M_center | Least restrictive |
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
qf = QFlex(x_data, y_data, terms=5, constraint_type=ConstraintType.TC_MAG)
|
|
187
|
+
|
|
188
|
+
# TC with linear reformulation (requires cvxpy)
|
|
189
|
+
qf = QFlex(x_data, y_data, terms=5,
|
|
190
|
+
constraint_type=ConstraintType.TC, tc_method='linear')
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
---
|
|
194
|
+
|
|
195
|
+
## API Reference
|
|
196
|
+
|
|
197
|
+
All three classes (`QFlex`, `LogQFlex`, `LogitQFlex`) share the following interface:
|
|
198
|
+
|
|
199
|
+
### Constructor
|
|
200
|
+
|
|
201
|
+
| Class | Signature |
|
|
202
|
+
|---|---|
|
|
203
|
+
| `QFlex` | `QFlex(x_data, y_data, terms=5, constraint_type=..., tc_method='nonlinear')` |
|
|
204
|
+
| `LogQFlex` | `LogQFlex(x_data, y_data, lower_bound, terms=5, ...)` |
|
|
205
|
+
| `LogitQFlex` | `LogitQFlex(x_data, y_data, lower_bound, upper_bound, terms=5, ...)` |
|
|
206
|
+
|
|
207
|
+
### Instance Methods
|
|
208
|
+
|
|
209
|
+
| Method | Input | Output | Notes |
|
|
210
|
+
|---|---|---|---|
|
|
211
|
+
| `quantile(y)` | `y ∈ (0,1)` | x values | Core quantile function Q(p) |
|
|
212
|
+
| `pdf(y, method='numerical')` | `y ∈ (0,1)` | density values | Use `method='analytical'` for closed-form (QFlex only) |
|
|
213
|
+
| `cdf(x)` | x values | `p ∈ (0,1)` | Inverts Q(p) numerically |
|
|
214
|
+
| `sample(size=1)` | int | `np.ndarray` | Inverse transform sampling |
|
|
215
|
+
| `moments(order=4)` | int | `dict` | Keys: `mean`, `variance`, `std`, `skewness`, `kurtosis`, `raw_k`, `central_k` |
|
|
216
|
+
| `summary()` | — | printed table | Terms, γ, feasibility, moments, P10/P50/P90, coefficients |
|
|
217
|
+
| `plot(p_grid, show_data, ax)` | optional | `(fig, axes)` | PDF panel + quantile function panel |
|
|
218
|
+
| `check_proposition4()` | — | `dict` | Keys: `satisfied`, `m_tail`, `M_center`, `margin`, `q_flex_min`, `q_flex_positive` |
|
|
219
|
+
|
|
220
|
+
### Class Method
|
|
221
|
+
|
|
222
|
+
| Method | Description |
|
|
223
|
+
|---|---|
|
|
224
|
+
| `fit_from_data(data, terms=5, constraint_type=..., **kwargs)` | Fit from raw observations using Weibull plotting positions `y_i = i/(n+1)` |
|
|
225
|
+
|
|
226
|
+
### Utility
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from qflex.utils import compute_w1
|
|
230
|
+
|
|
231
|
+
w1, w1_norm = compute_w1(qf.quantile, x_data, y_data)
|
|
232
|
+
# w1 → Wasserstein-1 distance between fitted and target quantile functions
|
|
233
|
+
# w1_norm → W1 normalised by (P90 - P10) of the data
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
---
|
|
237
|
+
|
|
238
|
+
## Reference
|
|
239
|
+
|
|
240
|
+
> ⚠️ **TODO — fill in before publishing:**
|
|
241
|
+
>
|
|
242
|
+
> [Author(s)]. "[Paper Title]." *Journal/Conference*, vol. X, no. Y, Year, pp. Z–Z. DOI: [doi link]
|
|
243
|
+
|
|
244
|
+
This implementation follows the basis functions, gamma estimation, and Propositions 3–5 as described in the paper above.
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
MIT License. See [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "qflex"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "QFlex quantile-parameterized distributions with flexible basis functions"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.9"
|
|
12
|
+
keywords = ["quantile", "distribution", "statistics", "probability", "metalog"]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 4 - Beta",
|
|
15
|
+
"Intended Audience :: Science/Research",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.9",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
23
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"numpy>=1.24",
|
|
27
|
+
"scipy>=1.10",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
linear = ["cvxpy>=1.3"]
|
|
32
|
+
dev = [
|
|
33
|
+
"pytest>=7.0",
|
|
34
|
+
"pytest-cov>=4.0",
|
|
35
|
+
"build>=1.0",
|
|
36
|
+
"twine>=4.0",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.urls]
|
|
40
|
+
Repository = "https://github.com/rohitkhanna/qflex"
|
|
41
|
+
|
|
42
|
+
[tool.hatch.build.targets.wheel]
|
|
43
|
+
packages = ["qflex"]
|
|
44
|
+
|
|
45
|
+
[tool.hatch.build.targets.sdist]
|
|
46
|
+
exclude = [
|
|
47
|
+
"/.venv",
|
|
48
|
+
"/tests/__pycache__",
|
|
49
|
+
]
|