pytmpinvi 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pytmpinvi-1.0.0/LICENSE +1 -0
- pytmpinvi-1.0.0/PKG-INFO +271 -0
- pytmpinvi-1.0.0/README.md +248 -0
- pytmpinvi-1.0.0/pyproject.toml +42 -0
- pytmpinvi-1.0.0/pytmpinvi.egg-info/PKG-INFO +271 -0
- pytmpinvi-1.0.0/pytmpinvi.egg-info/SOURCES.txt +10 -0
- pytmpinvi-1.0.0/pytmpinvi.egg-info/dependency_links.txt +1 -0
- pytmpinvi-1.0.0/pytmpinvi.egg-info/requires.txt +2 -0
- pytmpinvi-1.0.0/pytmpinvi.egg-info/top_level.txt +1 -0
- pytmpinvi-1.0.0/setup.cfg +4 -0
- pytmpinvi-1.0.0/tmpinvi/__init__.py +8 -0
- pytmpinvi-1.0.0/tmpinvi/tmpinvi.py +212 -0
pytmpinvi-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
MIT License
|
pytmpinvi-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytmpinvi
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Interactive Tabular Matrix Problems via Pseudoinverse Estimation
|
|
5
|
+
Author-email: The Economist <29724411+econcz@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/econcz/pytmpinvi
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/econcz/pytmpinvi/issues
|
|
9
|
+
Keywords: tabular-matrix-problems,convex-optimization,least-squares,generalized-inverse,regularization
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: pytmpinv>=4.0.0
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Interactive Tabular Matrix Problems via Pseudoinverse Estimation
|
|
25
|
+
|
|
26
|
+
**Interactive Tabular Matrix Problems via Pseudoinverse Estimation (TMPinvi)** provides an interactive wrapper for the `tmpinv()` function from the *pytmpinv* package, with options extending its functionality to pre- and post-estimation processing and streamlined incorporation of prior cell information. The Tabular Matrix Problems via Pseudoinverse Estimation (TMPinv) is a two-stage estimation method that reformulates structured table-based systems — such as allocation problems, transaction matrices, and input–output tables — as structured least-squares problems. Based on the Convex Least Squares Programming (CLSP) framework, TMPinv solves systems with row and column constraints, block structure, and optionally reduced dimensionality by (1) constructing a canonical constraint form and applying a pseudoinverse-based projection, followed by (2) a convex-programming refinement stage to improve fit, coherence, and regularization (e.g., via Lasso, Ridge, or Elastic Net). All calculations are performed in numpy.float64 precision.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install pytmpinvi
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Example
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import numpy as np
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import statsmodels.formula.api as smf
|
|
40
|
+
from scipy.stats import norm
|
|
41
|
+
from tmpinvi import tmpinvi
|
|
42
|
+
|
|
43
|
+
# Reproducibility
|
|
44
|
+
rng = np.random.default_rng(123456789)
|
|
45
|
+
|
|
46
|
+
iso2 = ["CN", "DE", "JP", "NL", "US"]
|
|
47
|
+
T = 10
|
|
48
|
+
year = np.arange(
|
|
49
|
+
pd.Timestamp.today().year - T + 1,
|
|
50
|
+
pd.Timestamp.today().year + 1
|
|
51
|
+
)
|
|
52
|
+
m = len(iso2)
|
|
53
|
+
|
|
54
|
+
# Construct panel-like data frame
|
|
55
|
+
df = pd.MultiIndex.from_product(
|
|
56
|
+
[year, iso2],
|
|
57
|
+
names=["year", "iso2"]
|
|
58
|
+
).to_frame(index=False)
|
|
59
|
+
|
|
60
|
+
df = df.sort_values(["year", "iso2"]).reset_index(drop=True)
|
|
61
|
+
|
|
62
|
+
ex_cols = [f"EX_{c}" for c in iso2]
|
|
63
|
+
|
|
64
|
+
for nm in ex_cols:
|
|
65
|
+
df[nm] = np.nan
|
|
66
|
+
|
|
67
|
+
df["EX"] = np.nan
|
|
68
|
+
df["IM"] = np.nan
|
|
69
|
+
|
|
70
|
+
X_true = {}
|
|
71
|
+
|
|
72
|
+
# Generate true transaction matrices and incomplete observations
|
|
73
|
+
for t, y in enumerate(year, start=1):
|
|
74
|
+
scale = 1000.0 * (1.05 ** (t - 1))
|
|
75
|
+
|
|
76
|
+
X = rng.uniform(0.0, scale, size=(m, m))
|
|
77
|
+
np.fill_diagonal(X, 0.0)
|
|
78
|
+
|
|
79
|
+
X_true[str(y)] = X.copy()
|
|
80
|
+
|
|
81
|
+
rows = df["year"].eq(y)
|
|
82
|
+
|
|
83
|
+
df.loc[rows, "EX"] = X.sum(axis=1)
|
|
84
|
+
df.loc[rows, "IM"] = X.sum(axis=0)
|
|
85
|
+
|
|
86
|
+
miss = rng.uniform(size=(m, m)) > 0.5
|
|
87
|
+
|
|
88
|
+
X_obs = X.copy()
|
|
89
|
+
X_obs[miss] = np.nan
|
|
90
|
+
|
|
91
|
+
df.loc[rows, ex_cols] = X_obs
|
|
92
|
+
|
|
93
|
+
# Construct upper bounds using linear models
|
|
94
|
+
cv = norm.ppf(0.975)
|
|
95
|
+
|
|
96
|
+
for nm in ex_cols:
|
|
97
|
+
fit = smf.ols(f"{nm} ~ year * C(iso2)", data=df).fit()
|
|
98
|
+
|
|
99
|
+
pr = fit.get_prediction(df)
|
|
100
|
+
sf = pr.summary_frame(alpha=0.05)
|
|
101
|
+
|
|
102
|
+
ub = sf["mean"].to_numpy() + cv * sf["mean_se"].to_numpy()
|
|
103
|
+
ub[ub < 0.0] = np.nan
|
|
104
|
+
|
|
105
|
+
df[f"_{nm}_lb"] = 0.0
|
|
106
|
+
df[f"_{nm}_ub"] = ub
|
|
107
|
+
|
|
108
|
+
def make_bounds(lb, ub):
|
|
109
|
+
return [(a, b) for a, b in zip(lb, ub)]
|
|
110
|
+
|
|
111
|
+
df_out = df.copy()
|
|
112
|
+
|
|
113
|
+
lb_cols = [f"_EX_{c}_lb" for c in iso2]
|
|
114
|
+
ub_cols = [f"_EX_{c}_ub" for c in iso2]
|
|
115
|
+
|
|
116
|
+
# Iterative completion/refinement
|
|
117
|
+
for step in range(1, 3):
|
|
118
|
+
for y in year:
|
|
119
|
+
idx = df_out["year"].eq(y)
|
|
120
|
+
d = df_out.loc[idx].copy()
|
|
121
|
+
|
|
122
|
+
ival = d[ex_cols].to_numpy(dtype=np.float64)
|
|
123
|
+
|
|
124
|
+
lb = d[lb_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
125
|
+
ub = d[ub_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
126
|
+
|
|
127
|
+
fit = tmpinvi(
|
|
128
|
+
ival=ival,
|
|
129
|
+
ibounds=make_bounds(lb, ub),
|
|
130
|
+
b_row=d["EX"].to_numpy(dtype=np.float64),
|
|
131
|
+
b_col=d["IM"].to_numpy(dtype=np.float64),
|
|
132
|
+
alpha=1.0,
|
|
133
|
+
update=True,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
df_out.loc[idx, ex_cols] = fit.data
|
|
137
|
+
|
|
138
|
+
# Drop temporary bound columns
|
|
139
|
+
drop_cols = df_out.filter(regex=r"^_EX_.*_(lb|ub)$").columns
|
|
140
|
+
df_out = df_out.drop(columns=drop_cols)
|
|
141
|
+
|
|
142
|
+
df_out
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## User Reference
|
|
146
|
+
|
|
147
|
+
For comprehensive information on the estimator's capabilities, advanced configuration options, and implementation details, please refer to the [pytmpinv module](https://pypi.org/project/pytmpinv/ "Tabular Matrix Problems via Pseudoinverse Estimation"), on which TMPinvi is based.
|
|
148
|
+
|
|
149
|
+
To ensure cross-platform reproducibility, all CLSP implementations use a modified condition number function based on singular values, with a relative cutoff equal to `cond_tolerance * the largest singular value`.
|
|
150
|
+
|
|
151
|
+
**TMPinvi Parameters:**
|
|
152
|
+
|
|
153
|
+
`ival` : *array_like* or *None*, default = *None*<br>
|
|
154
|
+
Prior information on known cell values. If supplied and not entirely missing, `ival` is flattened and used to construct `b_val` and the corresponding identity-subset model matrix `M` internally. Missing entries (*np.nan*) are ignored. If all entries of `ival` are *np.nan*, no prior information is used and `b_val` and `M` are not passed to `tmpinv()`. When `ival` is provided, it overrides any `b_val` or `M` arguments supplied through keyword arguments.
|
|
155
|
+
|
|
156
|
+
`ibounds` : *tuple*, *list*, or *None*, default = *None*<br>
|
|
157
|
+
Dynamic cell-value bounds passed to `tmpinv(bounds=...)`. The object supplied to `ibounds` may be created or modified programmatically (for example within `preestimation()`). If a single pair such as *(low, high)* is provided, it is applied uniformly to all cells. Alternatively, a list of pairs may be supplied to specify cell-specific bounds with others set to None or *np.nan*. When `ibounds` is not *None*, it overrides any `bounds` argument supplied through keyword arguments.
|
|
158
|
+
|
|
159
|
+
`preestimation` : *callable* or *None*, default = *None*<br>
|
|
160
|
+
A function executed prior to model estimation. If supplied, it is called as `preestimation(ival)` and may perform arbitrary preparatory steps, such as constructing dynamic bounds or modifying objects in the calling environment. The return value is ignored.
|
|
161
|
+
|
|
162
|
+
`postestimation` : *callable* or *None*, default = *None*<br>
|
|
163
|
+
A function executed after model estimation. For a full model, it is called as `postestimation(model)`. For reduced (block-wise) models, it is called as `postestimation(model_i, i)` for each block index `i`. The return value is ignored.
|
|
164
|
+
|
|
165
|
+
`update` : *bool*, default = *False*<br>
|
|
166
|
+
If *True* and `ival` is supplied, missing entries (*np.nan*) in `ival` are replaced by the corresponding fitted values from `result.x`. The updated matrix is returned in the `result.data` component. If *False*, the data component contains the fitted solution matrix `result.x`.
|
|
167
|
+
|
|
168
|
+
**TMPinv Parameters:**
|
|
169
|
+
|
|
170
|
+
`S` : *array_like* of shape *(m + p, m + p)*, optional<br>
|
|
171
|
+
A diagonal sign slack (surplus) matrix with entries in *{0, ±1}*.<br>
|
|
172
|
+
- *0* enforces equality (== `b_row` or `b_col`),<br>
|
|
173
|
+
- *1* enforces a lower-than-or-equal (≤) condition,<br>
|
|
174
|
+
- *–1* enforces a greater-than-or-equal (≥) condition.
|
|
175
|
+
|
|
176
|
+
The first `m` diagonal entries correspond to row constraints, and the remaining `p` to column constraints. Please note that, in the reduced model, `S` is ignored: slack behavior is derived implicitly from block-wise marginal totals.
|
|
177
|
+
|
|
178
|
+
`M` : *array_like* of shape *(k, m * p)*, optional<br>
|
|
179
|
+
A model matrix with entries in *{0, 1}*. Each row defines a linear restriction on the flattened solution matrix. The corresponding right-hand side values must be provided in `b_val`. This block is used to encode known cell values. Please note that, in the reduced model, `M` must be a unique row subset of an identity matrix (i.e., diagonal-only). Arbitrary or non-diagonal model matrices cannot be mapped to reduced blocks, making the model infeasible.
|
|
180
|
+
|
|
181
|
+
`b_row` : *array_like* of shape *(m,)*<br>
|
|
182
|
+
Right-hand side vector of row totals. Please note that both `b_row` and `b_col` must be provided.
|
|
183
|
+
|
|
184
|
+
`b_col` : *array_like* of shape *(p,)*<br>
|
|
185
|
+
Right-hand side vector of column totals. Please note that both `b_row` and `b_col` must be provided.
|
|
186
|
+
|
|
187
|
+
`b_val` : *array_like* of shape *(k,)*<br>
|
|
188
|
+
Right-hand side vector of known cell values.
|
|
189
|
+
|
|
190
|
+
`i` : *int*, default = *1*<br>
|
|
191
|
+
Number of row groups.
|
|
192
|
+
|
|
193
|
+
`j` : *int*, default = *1*<br>
|
|
194
|
+
Number of column groups.
|
|
195
|
+
|
|
196
|
+
`zero_diagonal` : *bool*, default = *False*<br>
|
|
197
|
+
If *True*, enforces the zero diagonal.
|
|
198
|
+
|
|
199
|
+
`reduced` : *tuple* of *(int, int)*, optional<br>
|
|
200
|
+
Dimensions of the reduced problem. If specified, the problem is estimated as a set of reduced problems constructed from contiguous submatrices of the original table. For example, `reduced` = *(6, 6)* implies *5×5* data blocks with *1* slack row and *1* slack column each (edge blocks may be smaller).
|
|
201
|
+
|
|
202
|
+
`symmetric` : *bool*, default = *False*<br>
|
|
203
|
+
If True, enforces symmetry of the estimated solution matrix as: x = 0.5 * (x + x.T)
|
|
204
|
+
Applies to TMPinviResult.x only. For TMPinviResult.model symmetry, add explicit symmetry constraints to M in a full-model solve instead of using this flag.
|
|
205
|
+
|
|
206
|
+
`bounds` : *sequence* of *(low, high)*, optional<br>
|
|
207
|
+
Bounds on cell values. If a single tuple *(low, high)* is given, it is applied to all `m` * `p` cells. Example: *(0, None)*.
|
|
208
|
+
|
|
209
|
+
`replace_value` : *float* or *None*, default = *np.nan*<br>
|
|
210
|
+
Final replacement value for any cell in the solution matrix that violates the specified bounds by more than the given tolerance.
|
|
211
|
+
|
|
212
|
+
`tolerance` : *float*, default = *square root of machine epsilon*<br>
|
|
213
|
+
Convergence tolerance for bounds.
|
|
214
|
+
|
|
215
|
+
`iteration_limit` : *int*, default = *50*<br>
|
|
216
|
+
Maximum number of iterations allowed in the refinement loop.
|
|
217
|
+
|
|
218
|
+
**CLSP Parameters:**
|
|
219
|
+
|
|
220
|
+
`r` : *int*, default = *1*<br>
|
|
221
|
+
Number of refinement iterations for the pseudoinverse-based estimator.
|
|
222
|
+
|
|
223
|
+
`Z` : *np.ndarray* or *None*<br>
|
|
224
|
+
A symmetric idempotent matrix (projector) defining the subspace for Bott–Duffin pseudoinversion. If *None*, the identity matrix is used, reducing the Bott–Duffin inverse to the Moore–Penrose case.
|
|
225
|
+
|
|
226
|
+
`final` : *bool*, default = *True*<br>
|
|
227
|
+
If *True*, a convex programming problem is solved to refine `zhat`. The resulting solution `z` minimizes a weighted L1/L2 norm around `zhat` subject to `Az = b`.
|
|
228
|
+
|
|
229
|
+
`alpha` : *float*, *list[float]* or *None*, default = *None*<br>
|
|
230
|
+
Regularization parameter (weight) in the final convex program:<br>
|
|
231
|
+
- `α = 0`: Lasso (L1 norm)<br>
|
|
232
|
+
- `α = 1`: Tikhonov Regularization/Ridge (L2 norm)<br>
|
|
233
|
+
- `0 < α < 1`: Elastic Net<br>
|
|
234
|
+
If a scalar float is provided, that value is used after clipping to [0, 1].<br>
|
|
235
|
+
If a list/iterable of floats is provided, each candidate is evaluated via a full solve, and the α with the smallest NRMSE is selected.<br>
|
|
236
|
+
If None, α is chosen, based on an error rule: α = min(1.0, NRMSE_{α = 0} / (NRMSE_{α = 0} + NRMSE_{α = 1} + tolerance))
|
|
237
|
+
|
|
238
|
+
`cond_tolerance` : *float* or *None*, default = *None*<br>
|
|
239
|
+
Singular-value cutoff for the custom condition number function.<br>
|
|
240
|
+
If *None*, the implementation uses an internal relative cutoff of `1e-14`.
|
|
241
|
+
|
|
242
|
+
`*args`, `**kwargs` : optional<br>
|
|
243
|
+
CVXPY arguments passed to the CVXPY solver.
|
|
244
|
+
|
|
245
|
+
**Returns:**
|
|
246
|
+
*TMPinviResult*
|
|
247
|
+
|
|
248
|
+
`TMPinviResult.full` : *bool*<br>
|
|
249
|
+
Indicates if this result comes from the full (non-reduced) model.
|
|
250
|
+
|
|
251
|
+
`TMPinviResult.model` : *CLSP* or *list* of *CLSP*<br>
|
|
252
|
+
A single CLSP object in the full model, or a list of CLSP objects for each reduced block in the reduced model.
|
|
253
|
+
|
|
254
|
+
`TMPinviResult.x` : *np.ndarray*<br>
|
|
255
|
+
Final estimated solution matrix of shape *(m, p)*.
|
|
256
|
+
|
|
257
|
+
`TMPinviResult.data` : *np.ndarray*<br>
|
|
258
|
+
Processed output matrix of shape *(m, p)*. If `update=True` and `ival` is supplied in `tmpinvi()`, `data` contains `ival` with missing entries replaced by fitted values from `x`. Otherwise, data contains the fitted solution matrix `x`.
|
|
259
|
+
|
|
260
|
+
`TMPinviResult.summarize(i, display)`<br>
|
|
261
|
+
An alias of TMPinviResult.summary().
|
|
262
|
+
|
|
263
|
+
`TMPinviResult.summary(i, display)`<br>
|
|
264
|
+
Return or print a summary of the underlying CLSP result, where `i` : int, default = *None* is the index of a reduced-block model in TMPinviResult.model.
|
|
265
|
+
|
|
266
|
+
## Bibliography
|
|
267
|
+
Bolotov, I. (2025). CLSP: Linear Algebra Foundations of a Modular Two-Step Convex Optimization-Based Estimator for Ill-Posed Problems. *Mathematics*, *13*(21), 3476. [https://doi.org/10.3390/math13213476](https://doi.org/10.3390/math13213476)
|
|
268
|
+
|
|
269
|
+
## License
|
|
270
|
+
|
|
271
|
+
MIT License — see the [LICENSE](LICENSE) file.
|
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
# Interactive Tabular Matrix Problems via Pseudoinverse Estimation
|
|
2
|
+
|
|
3
|
+
**Interactive Tabular Matrix Problems via Pseudoinverse Estimation (TMPinvi)** provides an interactive wrapper for the `tmpinv()` function from the *pytmpinv* package, with options extending its functionality to pre- and post-estimation processing and streamlined incorporation of prior cell information. The Tabular Matrix Problems via Pseudoinverse Estimation (TMPinv) is a two-stage estimation method that reformulates structured table-based systems — such as allocation problems, transaction matrices, and input–output tables — as structured least-squares problems. Based on the Convex Least Squares Programming (CLSP) framework, TMPinv solves systems with row and column constraints, block structure, and optionally reduced dimensionality by (1) constructing a canonical constraint form and applying a pseudoinverse-based projection, followed by (2) a convex-programming refinement stage to improve fit, coherence, and regularization (e.g., via Lasso, Ridge, or Elastic Net). All calculations are performed in numpy.float64 precision.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install pytmpinvi
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Example
|
|
12
|
+
|
|
13
|
+
```python
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import statsmodels.formula.api as smf
|
|
17
|
+
from scipy.stats import norm
|
|
18
|
+
from tmpinvi import tmpinvi
|
|
19
|
+
|
|
20
|
+
# Reproducibility
|
|
21
|
+
rng = np.random.default_rng(123456789)
|
|
22
|
+
|
|
23
|
+
iso2 = ["CN", "DE", "JP", "NL", "US"]
|
|
24
|
+
T = 10
|
|
25
|
+
year = np.arange(
|
|
26
|
+
pd.Timestamp.today().year - T + 1,
|
|
27
|
+
pd.Timestamp.today().year + 1
|
|
28
|
+
)
|
|
29
|
+
m = len(iso2)
|
|
30
|
+
|
|
31
|
+
# Construct panel-like data frame
|
|
32
|
+
df = pd.MultiIndex.from_product(
|
|
33
|
+
[year, iso2],
|
|
34
|
+
names=["year", "iso2"]
|
|
35
|
+
).to_frame(index=False)
|
|
36
|
+
|
|
37
|
+
df = df.sort_values(["year", "iso2"]).reset_index(drop=True)
|
|
38
|
+
|
|
39
|
+
ex_cols = [f"EX_{c}" for c in iso2]
|
|
40
|
+
|
|
41
|
+
for nm in ex_cols:
|
|
42
|
+
df[nm] = np.nan
|
|
43
|
+
|
|
44
|
+
df["EX"] = np.nan
|
|
45
|
+
df["IM"] = np.nan
|
|
46
|
+
|
|
47
|
+
X_true = {}
|
|
48
|
+
|
|
49
|
+
# Generate true transaction matrices and incomplete observations
|
|
50
|
+
for t, y in enumerate(year, start=1):
|
|
51
|
+
scale = 1000.0 * (1.05 ** (t - 1))
|
|
52
|
+
|
|
53
|
+
X = rng.uniform(0.0, scale, size=(m, m))
|
|
54
|
+
np.fill_diagonal(X, 0.0)
|
|
55
|
+
|
|
56
|
+
X_true[str(y)] = X.copy()
|
|
57
|
+
|
|
58
|
+
rows = df["year"].eq(y)
|
|
59
|
+
|
|
60
|
+
df.loc[rows, "EX"] = X.sum(axis=1)
|
|
61
|
+
df.loc[rows, "IM"] = X.sum(axis=0)
|
|
62
|
+
|
|
63
|
+
miss = rng.uniform(size=(m, m)) > 0.5
|
|
64
|
+
|
|
65
|
+
X_obs = X.copy()
|
|
66
|
+
X_obs[miss] = np.nan
|
|
67
|
+
|
|
68
|
+
df.loc[rows, ex_cols] = X_obs
|
|
69
|
+
|
|
70
|
+
# Construct upper bounds using linear models
|
|
71
|
+
cv = norm.ppf(0.975)
|
|
72
|
+
|
|
73
|
+
for nm in ex_cols:
|
|
74
|
+
fit = smf.ols(f"{nm} ~ year * C(iso2)", data=df).fit()
|
|
75
|
+
|
|
76
|
+
pr = fit.get_prediction(df)
|
|
77
|
+
sf = pr.summary_frame(alpha=0.05)
|
|
78
|
+
|
|
79
|
+
ub = sf["mean"].to_numpy() + cv * sf["mean_se"].to_numpy()
|
|
80
|
+
ub[ub < 0.0] = np.nan
|
|
81
|
+
|
|
82
|
+
df[f"_{nm}_lb"] = 0.0
|
|
83
|
+
df[f"_{nm}_ub"] = ub
|
|
84
|
+
|
|
85
|
+
def make_bounds(lb, ub):
|
|
86
|
+
return [(a, b) for a, b in zip(lb, ub)]
|
|
87
|
+
|
|
88
|
+
df_out = df.copy()
|
|
89
|
+
|
|
90
|
+
lb_cols = [f"_EX_{c}_lb" for c in iso2]
|
|
91
|
+
ub_cols = [f"_EX_{c}_ub" for c in iso2]
|
|
92
|
+
|
|
93
|
+
# Iterative completion/refinement
|
|
94
|
+
for step in range(1, 3):
|
|
95
|
+
for y in year:
|
|
96
|
+
idx = df_out["year"].eq(y)
|
|
97
|
+
d = df_out.loc[idx].copy()
|
|
98
|
+
|
|
99
|
+
ival = d[ex_cols].to_numpy(dtype=np.float64)
|
|
100
|
+
|
|
101
|
+
lb = d[lb_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
102
|
+
ub = d[ub_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
103
|
+
|
|
104
|
+
fit = tmpinvi(
|
|
105
|
+
ival=ival,
|
|
106
|
+
ibounds=make_bounds(lb, ub),
|
|
107
|
+
b_row=d["EX"].to_numpy(dtype=np.float64),
|
|
108
|
+
b_col=d["IM"].to_numpy(dtype=np.float64),
|
|
109
|
+
alpha=1.0,
|
|
110
|
+
update=True,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
df_out.loc[idx, ex_cols] = fit.data
|
|
114
|
+
|
|
115
|
+
# Drop temporary bound columns
|
|
116
|
+
drop_cols = df_out.filter(regex=r"^_EX_.*_(lb|ub)$").columns
|
|
117
|
+
df_out = df_out.drop(columns=drop_cols)
|
|
118
|
+
|
|
119
|
+
df_out
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## User Reference
|
|
123
|
+
|
|
124
|
+
For comprehensive information on the estimator's capabilities, advanced configuration options, and implementation details, please refer to the [pytmpinv module](https://pypi.org/project/pytmpinv/ "Tabular Matrix Problems via Pseudoinverse Estimation"), on which TMPinvi is based.
|
|
125
|
+
|
|
126
|
+
To ensure cross-platform reproducibility, all CLSP implementations use a modified condition number function based on singular values, with a relative cutoff equal to `cond_tolerance * the largest singular value`.
|
|
127
|
+
|
|
128
|
+
**TMPinvi Parameters:**
|
|
129
|
+
|
|
130
|
+
`ival` : *array_like* or *None*, default = *None*<br>
|
|
131
|
+
Prior information on known cell values. If supplied and not entirely missing, `ival` is flattened and used to construct `b_val` and the corresponding identity-subset model matrix `M` internally. Missing entries (*np.nan*) are ignored. If all entries of `ival` are *np.nan*, no prior information is used and `b_val` and `M` are not passed to `tmpinv()`. When `ival` is provided, it overrides any `b_val` or `M` arguments supplied through keyword arguments.
|
|
132
|
+
|
|
133
|
+
`ibounds` : *tuple*, *list*, or *None*, default = *None*<br>
|
|
134
|
+
Dynamic cell-value bounds passed to `tmpinv(bounds=...)`. The object supplied to `ibounds` may be created or modified programmatically (for example within `preestimation()`). If a single pair such as *(low, high)* is provided, it is applied uniformly to all cells. Alternatively, a list of pairs may be supplied to specify cell-specific bounds with others set to None or *np.nan*. When `ibounds` is not *None*, it overrides any `bounds` argument supplied through keyword arguments.
|
|
135
|
+
|
|
136
|
+
`preestimation` : *callable* or *None*, default = *None*<br>
|
|
137
|
+
A function executed prior to model estimation. If supplied, it is called as `preestimation(ival)` and may perform arbitrary preparatory steps, such as constructing dynamic bounds or modifying objects in the calling environment. The return value is ignored.
|
|
138
|
+
|
|
139
|
+
`postestimation` : *callable* or *None*, default = *None*<br>
|
|
140
|
+
A function executed after model estimation. For a full model, it is called as `postestimation(model)`. For reduced (block-wise) models, it is called as `postestimation(model_i, i)` for each block index `i`. The return value is ignored.
|
|
141
|
+
|
|
142
|
+
`update` : *bool*, default = *False*<br>
|
|
143
|
+
If *True* and `ival` is supplied, missing entries (*np.nan*) in `ival` are replaced by the corresponding fitted values from `result.x`. The updated matrix is returned in the `result.data` component. If *False*, the data component contains the fitted solution matrix `result.x`.
|
|
144
|
+
|
|
145
|
+
**TMPinv Parameters:**
|
|
146
|
+
|
|
147
|
+
`S` : *array_like* of shape *(m + p, m + p)*, optional<br>
|
|
148
|
+
A diagonal sign slack (surplus) matrix with entries in *{0, ±1}*.<br>
|
|
149
|
+
- *0* enforces equality (== `b_row` or `b_col`),<br>
|
|
150
|
+
- *1* enforces a lower-than-or-equal (≤) condition,<br>
|
|
151
|
+
- *–1* enforces a greater-than-or-equal (≥) condition.
|
|
152
|
+
|
|
153
|
+
The first `m` diagonal entries correspond to row constraints, and the remaining `p` to column constraints. Please note that, in the reduced model, `S` is ignored: slack behavior is derived implicitly from block-wise marginal totals.
|
|
154
|
+
|
|
155
|
+
`M` : *array_like* of shape *(k, m * p)*, optional<br>
|
|
156
|
+
A model matrix with entries in *{0, 1}*. Each row defines a linear restriction on the flattened solution matrix. The corresponding right-hand side values must be provided in `b_val`. This block is used to encode known cell values. Please note that, in the reduced model, `M` must be a unique row subset of an identity matrix (i.e., diagonal-only). Arbitrary or non-diagonal model matrices cannot be mapped to reduced blocks, making the model infeasible.
|
|
157
|
+
|
|
158
|
+
`b_row` : *array_like* of shape *(m,)*<br>
|
|
159
|
+
Right-hand side vector of row totals. Please note that both `b_row` and `b_col` must be provided.
|
|
160
|
+
|
|
161
|
+
`b_col` : *array_like* of shape *(p,)*<br>
|
|
162
|
+
Right-hand side vector of column totals. Please note that both `b_row` and `b_col` must be provided.
|
|
163
|
+
|
|
164
|
+
`b_val` : *array_like* of shape *(k,)*<br>
|
|
165
|
+
Right-hand side vector of known cell values.
|
|
166
|
+
|
|
167
|
+
`i` : *int*, default = *1*<br>
|
|
168
|
+
Number of row groups.
|
|
169
|
+
|
|
170
|
+
`j` : *int*, default = *1*<br>
|
|
171
|
+
Number of column groups.
|
|
172
|
+
|
|
173
|
+
`zero_diagonal` : *bool*, default = *False*<br>
|
|
174
|
+
If *True*, enforces the zero diagonal.
|
|
175
|
+
|
|
176
|
+
`reduced` : *tuple* of *(int, int)*, optional<br>
|
|
177
|
+
Dimensions of the reduced problem. If specified, the problem is estimated as a set of reduced problems constructed from contiguous submatrices of the original table. For example, `reduced` = *(6, 6)* implies *5×5* data blocks with *1* slack row and *1* slack column each (edge blocks may be smaller).
|
|
178
|
+
|
|
179
|
+
`symmetric` : *bool*, default = *False*<br>
|
|
180
|
+
If True, enforces symmetry of the estimated solution matrix as: x = 0.5 * (x + x.T)
|
|
181
|
+
Applies to TMPinviResult.x only. For TMPinviResult.model symmetry, add explicit symmetry constraints to M in a full-model solve instead of using this flag.
|
|
182
|
+
|
|
183
|
+
`bounds` : *sequence* of *(low, high)*, optional<br>
|
|
184
|
+
Bounds on cell values. If a single tuple *(low, high)* is given, it is applied to all `m` * `p` cells. Example: *(0, None)*.
|
|
185
|
+
|
|
186
|
+
`replace_value` : *float* or *None*, default = *np.nan*<br>
|
|
187
|
+
Final replacement value for any cell in the solution matrix that violates the specified bounds by more than the given tolerance.
|
|
188
|
+
|
|
189
|
+
`tolerance` : *float*, default = *square root of machine epsilon*<br>
|
|
190
|
+
Convergence tolerance for bounds.
|
|
191
|
+
|
|
192
|
+
`iteration_limit` : *int*, default = *50*<br>
|
|
193
|
+
Maximum number of iterations allowed in the refinement loop.
|
|
194
|
+
|
|
195
|
+
**CLSP Parameters:**
|
|
196
|
+
|
|
197
|
+
`r` : *int*, default = *1*<br>
|
|
198
|
+
Number of refinement iterations for the pseudoinverse-based estimator.
|
|
199
|
+
|
|
200
|
+
`Z` : *np.ndarray* or *None*<br>
|
|
201
|
+
A symmetric idempotent matrix (projector) defining the subspace for Bott–Duffin pseudoinversion. If *None*, the identity matrix is used, reducing the Bott–Duffin inverse to the Moore–Penrose case.
|
|
202
|
+
|
|
203
|
+
`final` : *bool*, default = *True*<br>
|
|
204
|
+
If *True*, a convex programming problem is solved to refine `zhat`. The resulting solution `z` minimizes a weighted L1/L2 norm around `zhat` subject to `Az = b`.
|
|
205
|
+
|
|
206
|
+
`alpha` : *float*, *list[float]* or *None*, default = *None*<br>
|
|
207
|
+
Regularization parameter (weight) in the final convex program:<br>
|
|
208
|
+
- `α = 0`: Lasso (L1 norm)<br>
|
|
209
|
+
- `α = 1`: Tikhonov Regularization/Ridge (L2 norm)<br>
|
|
210
|
+
- `0 < α < 1`: Elastic Net<br>
|
|
211
|
+
If a scalar float is provided, that value is used after clipping to [0, 1].<br>
|
|
212
|
+
If a list/iterable of floats is provided, each candidate is evaluated via a full solve, and the α with the smallest NRMSE is selected.<br>
|
|
213
|
+
If None, α is chosen, based on an error rule: α = min(1.0, NRMSE_{α = 0} / (NRMSE_{α = 0} + NRMSE_{α = 1} + tolerance))
|
|
214
|
+
|
|
215
|
+
`cond_tolerance` : *float* or *None*, default = *None*<br>
|
|
216
|
+
Singular-value cutoff for the custom condition number function.<br>
|
|
217
|
+
If *None*, the implementation uses an internal relative cutoff of `1e-14`.
|
|
218
|
+
|
|
219
|
+
`*args`, `**kwargs` : optional<br>
|
|
220
|
+
CVXPY arguments passed to the CVXPY solver.
|
|
221
|
+
|
|
222
|
+
**Returns:**
|
|
223
|
+
*TMPinviResult*
|
|
224
|
+
|
|
225
|
+
`TMPinviResult.full` : *bool*<br>
|
|
226
|
+
Indicates if this result comes from the full (non-reduced) model.
|
|
227
|
+
|
|
228
|
+
`TMPinviResult.model` : *CLSP* or *list* of *CLSP*<br>
|
|
229
|
+
A single CLSP object in the full model, or a list of CLSP objects for each reduced block in the reduced model.
|
|
230
|
+
|
|
231
|
+
`TMPinviResult.x` : *np.ndarray*<br>
|
|
232
|
+
Final estimated solution matrix of shape *(m, p)*.
|
|
233
|
+
|
|
234
|
+
`TMPinviResult.data` : *np.ndarray*<br>
|
|
235
|
+
Processed output matrix of shape *(m, p)*. If `update=True` and `ival` is supplied in `tmpinvi()`, `data` contains `ival` with missing entries replaced by fitted values from `x`. Otherwise, data contains the fitted solution matrix `x`.
|
|
236
|
+
|
|
237
|
+
`TMPinviResult.summarize(i, display)`<br>
|
|
238
|
+
An alias of TMPinviResult.summary().
|
|
239
|
+
|
|
240
|
+
`TMPinviResult.summary(i, display)`<br>
|
|
241
|
+
Return or print a summary of the underlying CLSP result, where `i` : int, default = *None* is the index of a reduced-block model in TMPinviResult.model.
|
|
242
|
+
|
|
243
|
+
## Bibliography
|
|
244
|
+
Bolotov, I. (2025). CLSP: Linear Algebra Foundations of a Modular Two-Step Convex Optimization-Based Estimator for Ill-Posed Problems. *Mathematics*, *13*(21), 3476. [https://doi.org/10.3390/math13213476](https://doi.org/10.3390/math13213476)
|
|
245
|
+
|
|
246
|
+
## License
|
|
247
|
+
|
|
248
|
+
MIT License — see the [LICENSE](LICENSE) file.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "pytmpinvi"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "Interactive Tabular Matrix Problems via Pseudoinverse Estimation"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
license-files = ["LICENSE"]
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "The Economist", email = "29724411+econcz@users.noreply.github.com" }
|
|
11
|
+
]
|
|
12
|
+
keywords = [
|
|
13
|
+
"tabular-matrix-problems",
|
|
14
|
+
"convex-optimization",
|
|
15
|
+
"least-squares",
|
|
16
|
+
"generalized-inverse",
|
|
17
|
+
"regularization"
|
|
18
|
+
]
|
|
19
|
+
classifiers = [
|
|
20
|
+
"Programming Language :: Python :: 3",
|
|
21
|
+
"Programming Language :: Python :: 3.10",
|
|
22
|
+
"Programming Language :: Python :: 3.11",
|
|
23
|
+
"Programming Language :: Python :: 3.12",
|
|
24
|
+
"Operating System :: OS Independent",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
26
|
+
"Topic :: Scientific/Engineering :: Information Analysis"
|
|
27
|
+
]
|
|
28
|
+
dependencies = [
|
|
29
|
+
"numpy>=1.24",
|
|
30
|
+
"pytmpinv>=4.0.0"
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.urls]
|
|
34
|
+
Homepage = "https://github.com/econcz/pytmpinvi"
|
|
35
|
+
"Bug Tracker" = "https://github.com/econcz/pytmpinvi/issues"
|
|
36
|
+
|
|
37
|
+
[build-system]
|
|
38
|
+
requires = ["setuptools>=77", "wheel"]
|
|
39
|
+
build-backend = "setuptools.build_meta"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools]
|
|
42
|
+
packages = ["tmpinvi"]
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pytmpinvi
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Interactive Tabular Matrix Problems via Pseudoinverse Estimation
|
|
5
|
+
Author-email: The Economist <29724411+econcz@users.noreply.github.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/econcz/pytmpinvi
|
|
8
|
+
Project-URL: Bug Tracker, https://github.com/econcz/pytmpinvi/issues
|
|
9
|
+
Keywords: tabular-matrix-problems,convex-optimization,least-squares,generalized-inverse,regularization
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
License-File: LICENSE
|
|
20
|
+
Requires-Dist: numpy>=1.24
|
|
21
|
+
Requires-Dist: pytmpinv>=4.0.0
|
|
22
|
+
Dynamic: license-file
|
|
23
|
+
|
|
24
|
+
# Interactive Tabular Matrix Problems via Pseudoinverse Estimation
|
|
25
|
+
|
|
26
|
+
**Interactive Tabular Matrix Problems via Pseudoinverse Estimation (TMPinvi)** provides an interactive wrapper for the `tmpinv()` function from the *pytmpinv* package, with options extending its functionality to pre- and post-estimation processing and streamlined incorporation of prior cell information. The Tabular Matrix Problems via Pseudoinverse Estimation (TMPinv) is a two-stage estimation method that reformulates structured table-based systems — such as allocation problems, transaction matrices, and input–output tables — as structured least-squares problems. Based on the Convex Least Squares Programming (CLSP) framework, TMPinv solves systems with row and column constraints, block structure, and optionally reduced dimensionality by (1) constructing a canonical constraint form and applying a pseudoinverse-based projection, followed by (2) a convex-programming refinement stage to improve fit, coherence, and regularization (e.g., via Lasso, Ridge, or Elastic Net). All calculations are performed in numpy.float64 precision.
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
pip install pytmpinvi
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Example
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import numpy as np
|
|
38
|
+
import pandas as pd
|
|
39
|
+
import statsmodels.formula.api as smf
|
|
40
|
+
from scipy.stats import norm
|
|
41
|
+
from tmpinvi import tmpinvi
|
|
42
|
+
|
|
43
|
+
# Reproducibility
|
|
44
|
+
rng = np.random.default_rng(123456789)
|
|
45
|
+
|
|
46
|
+
iso2 = ["CN", "DE", "JP", "NL", "US"]
|
|
47
|
+
T = 10
|
|
48
|
+
year = np.arange(
|
|
49
|
+
pd.Timestamp.today().year - T + 1,
|
|
50
|
+
pd.Timestamp.today().year + 1
|
|
51
|
+
)
|
|
52
|
+
m = len(iso2)
|
|
53
|
+
|
|
54
|
+
# Construct panel-like data frame
|
|
55
|
+
df = pd.MultiIndex.from_product(
|
|
56
|
+
[year, iso2],
|
|
57
|
+
names=["year", "iso2"]
|
|
58
|
+
).to_frame(index=False)
|
|
59
|
+
|
|
60
|
+
df = df.sort_values(["year", "iso2"]).reset_index(drop=True)
|
|
61
|
+
|
|
62
|
+
ex_cols = [f"EX_{c}" for c in iso2]
|
|
63
|
+
|
|
64
|
+
for nm in ex_cols:
|
|
65
|
+
df[nm] = np.nan
|
|
66
|
+
|
|
67
|
+
df["EX"] = np.nan
|
|
68
|
+
df["IM"] = np.nan
|
|
69
|
+
|
|
70
|
+
X_true = {}
|
|
71
|
+
|
|
72
|
+
# Generate true transaction matrices and incomplete observations
|
|
73
|
+
for t, y in enumerate(year, start=1):
|
|
74
|
+
scale = 1000.0 * (1.05 ** (t - 1))
|
|
75
|
+
|
|
76
|
+
X = rng.uniform(0.0, scale, size=(m, m))
|
|
77
|
+
np.fill_diagonal(X, 0.0)
|
|
78
|
+
|
|
79
|
+
X_true[str(y)] = X.copy()
|
|
80
|
+
|
|
81
|
+
rows = df["year"].eq(y)
|
|
82
|
+
|
|
83
|
+
df.loc[rows, "EX"] = X.sum(axis=1)
|
|
84
|
+
df.loc[rows, "IM"] = X.sum(axis=0)
|
|
85
|
+
|
|
86
|
+
miss = rng.uniform(size=(m, m)) > 0.5
|
|
87
|
+
|
|
88
|
+
X_obs = X.copy()
|
|
89
|
+
X_obs[miss] = np.nan
|
|
90
|
+
|
|
91
|
+
df.loc[rows, ex_cols] = X_obs
|
|
92
|
+
|
|
93
|
+
# Construct upper bounds using linear models
|
|
94
|
+
cv = norm.ppf(0.975)
|
|
95
|
+
|
|
96
|
+
for nm in ex_cols:
|
|
97
|
+
fit = smf.ols(f"{nm} ~ year * C(iso2)", data=df).fit()
|
|
98
|
+
|
|
99
|
+
pr = fit.get_prediction(df)
|
|
100
|
+
sf = pr.summary_frame(alpha=0.05)
|
|
101
|
+
|
|
102
|
+
ub = sf["mean"].to_numpy() + cv * sf["mean_se"].to_numpy()
|
|
103
|
+
ub[ub < 0.0] = np.nan
|
|
104
|
+
|
|
105
|
+
df[f"_{nm}_lb"] = 0.0
|
|
106
|
+
df[f"_{nm}_ub"] = ub
|
|
107
|
+
|
|
108
|
+
def make_bounds(lb, ub):
|
|
109
|
+
return [(a, b) for a, b in zip(lb, ub)]
|
|
110
|
+
|
|
111
|
+
df_out = df.copy()
|
|
112
|
+
|
|
113
|
+
lb_cols = [f"_EX_{c}_lb" for c in iso2]
|
|
114
|
+
ub_cols = [f"_EX_{c}_ub" for c in iso2]
|
|
115
|
+
|
|
116
|
+
# Iterative completion/refinement
|
|
117
|
+
for step in range(1, 3):
|
|
118
|
+
for y in year:
|
|
119
|
+
idx = df_out["year"].eq(y)
|
|
120
|
+
d = df_out.loc[idx].copy()
|
|
121
|
+
|
|
122
|
+
ival = d[ex_cols].to_numpy(dtype=np.float64)
|
|
123
|
+
|
|
124
|
+
lb = d[lb_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
125
|
+
ub = d[ub_cols].to_numpy(dtype=np.float64).ravel(order="C")
|
|
126
|
+
|
|
127
|
+
fit = tmpinvi(
|
|
128
|
+
ival=ival,
|
|
129
|
+
ibounds=make_bounds(lb, ub),
|
|
130
|
+
b_row=d["EX"].to_numpy(dtype=np.float64),
|
|
131
|
+
b_col=d["IM"].to_numpy(dtype=np.float64),
|
|
132
|
+
alpha=1.0,
|
|
133
|
+
update=True,
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
df_out.loc[idx, ex_cols] = fit.data
|
|
137
|
+
|
|
138
|
+
# Drop temporary bound columns
|
|
139
|
+
drop_cols = df_out.filter(regex=r"^_EX_.*_(lb|ub)$").columns
|
|
140
|
+
df_out = df_out.drop(columns=drop_cols)
|
|
141
|
+
|
|
142
|
+
df_out
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## User Reference
|
|
146
|
+
|
|
147
|
+
For comprehensive information on the estimator's capabilities, advanced configuration options, and implementation details, please refer to the [pytmpinv module](https://pypi.org/project/pytmpinv/ "Tabular Matrix Problems via Pseudoinverse Estimation"), on which TMPinvi is based.
|
|
148
|
+
|
|
149
|
+
To ensure cross-platform reproducibility, all CLSP implementations use a modified condition number function based on singular values, with a relative cutoff equal to `cond_tolerance * the largest singular value`.
|
|
150
|
+
|
|
151
|
+
**TMPinvi Parameters:**
|
|
152
|
+
|
|
153
|
+
`ival` : *array_like* or *None*, default = *None*<br>
|
|
154
|
+
Prior information on known cell values. If supplied and not entirely missing, `ival` is flattened and used to construct `b_val` and the corresponding identity-subset model matrix `M` internally. Missing entries (*np.nan*) are ignored. If all entries of `ival` are *np.nan*, no prior information is used and `b_val` and `M` are not passed to `tmpinv()`. When `ival` is provided, it overrides any `b_val` or `M` arguments supplied through keyword arguments.
|
|
155
|
+
|
|
156
|
+
`ibounds` : *tuple*, *list*, or *None*, default = *None*<br>
|
|
157
|
+
Dynamic cell-value bounds passed to `tmpinv(bounds=...)`. The object supplied to `ibounds` may be created or modified programmatically (for example within `preestimation()`). If a single pair such as *(low, high)* is provided, it is applied uniformly to all cells. Alternatively, a list of pairs may be supplied to specify cell-specific bounds with others set to None or *np.nan*. When `ibounds` is not *None*, it overrides any `bounds` argument supplied through keyword arguments.
|
|
158
|
+
|
|
159
|
+
`preestimation` : *callable* or *None*, default = *None*<br>
|
|
160
|
+
A function executed prior to model estimation. If supplied, it is called as `preestimation(ival)` and may perform arbitrary preparatory steps, such as constructing dynamic bounds or modifying objects in the calling environment. The return value is ignored.
|
|
161
|
+
|
|
162
|
+
`postestimation` : *callable* or *None*, default = *None*<br>
|
|
163
|
+
A function executed after model estimation. For a full model, it is called as `postestimation(model)`. For reduced (block-wise) models, it is called as `postestimation(model_i, i)` for each block index `i`. The return value is ignored.
|
|
164
|
+
|
|
165
|
+
`update` : *bool*, default = *False*<br>
|
|
166
|
+
If *True* and `ival` is supplied, missing entries (*np.nan*) in `ival` are replaced by the corresponding fitted values from `result.x`. The updated matrix is returned in the `result.data` component. If *False*, the data component contains the fitted solution matrix `result.x`.
|
|
167
|
+
|
|
168
|
+
**TMPinv Parameters:**
|
|
169
|
+
|
|
170
|
+
`S` : *array_like* of shape *(m + p, m + p)*, optional<br>
|
|
171
|
+
A diagonal sign slack (surplus) matrix with entries in *{0, ±1}*.<br>
|
|
172
|
+
- *0* enforces equality (== `b_row` or `b_col`),<br>
|
|
173
|
+
- *1* enforces a lower-than-or-equal (≤) condition,<br>
|
|
174
|
+
- *–1* enforces a greater-than-or-equal (≥) condition.
|
|
175
|
+
|
|
176
|
+
The first `m` diagonal entries correspond to row constraints, and the remaining `p` to column constraints. Please note that, in the reduced model, `S` is ignored: slack behavior is derived implicitly from block-wise marginal totals.
|
|
177
|
+
|
|
178
|
+
`M` : *array_like* of shape *(k, m * p)*, optional<br>
|
|
179
|
+
A model matrix with entries in *{0, 1}*. Each row defines a linear restriction on the flattened solution matrix. The corresponding right-hand side values must be provided in `b_val`. This block is used to encode known cell values. Please note that, in the reduced model, `M` must be a unique row subset of an identity matrix (i.e., diagonal-only). Arbitrary or non-diagonal model matrices cannot be mapped to reduced blocks, making the model infeasible.
|
|
180
|
+
|
|
181
|
+
`b_row` : *array_like* of shape *(m,)*<br>
|
|
182
|
+
Right-hand side vector of row totals. Please note that both `b_row` and `b_col` must be provided.
|
|
183
|
+
|
|
184
|
+
`b_col` : *array_like* of shape *(p,)*<br>
|
|
185
|
+
Right-hand side vector of column totals. Please note that both `b_row` and `b_col` must be provided.
|
|
186
|
+
|
|
187
|
+
`b_val` : *array_like* of shape *(k,)*<br>
|
|
188
|
+
Right-hand side vector of known cell values.
|
|
189
|
+
|
|
190
|
+
`i` : *int*, default = *1*<br>
|
|
191
|
+
Number of row groups.
|
|
192
|
+
|
|
193
|
+
`j` : *int*, default = *1*<br>
|
|
194
|
+
Number of column groups.
|
|
195
|
+
|
|
196
|
+
`zero_diagonal` : *bool*, default = *False*<br>
|
|
197
|
+
If *True*, enforces the zero diagonal.
|
|
198
|
+
|
|
199
|
+
`reduced` : *tuple* of *(int, int)*, optional<br>
|
|
200
|
+
Dimensions of the reduced problem. If specified, the problem is estimated as a set of reduced problems constructed from contiguous submatrices of the original table. For example, `reduced` = *(6, 6)* implies *5×5* data blocks with *1* slack row and *1* slack column each (edge blocks may be smaller).
|
|
201
|
+
|
|
202
|
+
`symmetric` : *bool*, default = *False*<br>
|
|
203
|
+
If True, enforces symmetry of the estimated solution matrix as: x = 0.5 * (x + x.T)
|
|
204
|
+
Applies to TMPinviResult.x only. For TMPinviResult.model symmetry, add explicit symmetry constraints to M in a full-model solve instead of using this flag.
|
|
205
|
+
|
|
206
|
+
`bounds` : *sequence* of *(low, high)*, optional<br>
|
|
207
|
+
Bounds on cell values. If a single tuple *(low, high)* is given, it is applied to all `m` * `p` cells. Example: *(0, None)*.
|
|
208
|
+
|
|
209
|
+
`replace_value` : *float* or *None*, default = *np.nan*<br>
|
|
210
|
+
Final replacement value for any cell in the solution matrix that violates the specified bounds by more than the given tolerance.
|
|
211
|
+
|
|
212
|
+
`tolerance` : *float*, default = *square root of machine epsilon*<br>
|
|
213
|
+
Convergence tolerance for bounds.
|
|
214
|
+
|
|
215
|
+
`iteration_limit` : *int*, default = *50*<br>
|
|
216
|
+
Maximum number of iterations allowed in the refinement loop.
|
|
217
|
+
|
|
218
|
+
**CLSP Parameters:**
|
|
219
|
+
|
|
220
|
+
`r` : *int*, default = *1*<br>
|
|
221
|
+
Number of refinement iterations for the pseudoinverse-based estimator.
|
|
222
|
+
|
|
223
|
+
`Z` : *np.ndarray* or *None*<br>
|
|
224
|
+
A symmetric idempotent matrix (projector) defining the subspace for Bott–Duffin pseudoinversion. If *None*, the identity matrix is used, reducing the Bott–Duffin inverse to the Moore–Penrose case.
|
|
225
|
+
|
|
226
|
+
`final` : *bool*, default = *True*<br>
|
|
227
|
+
If *True*, a convex programming problem is solved to refine `zhat`. The resulting solution `z` minimizes a weighted L1/L2 norm around `zhat` subject to `Az = b`.
|
|
228
|
+
|
|
229
|
+
`alpha` : *float*, *list[float]* or *None*, default = *None*<br>
|
|
230
|
+
Regularization parameter (weight) in the final convex program:<br>
|
|
231
|
+
- `α = 0`: Lasso (L1 norm)<br>
|
|
232
|
+
- `α = 1`: Tikhonov Regularization/Ridge (L2 norm)<br>
|
|
233
|
+
- `0 < α < 1`: Elastic Net<br>
|
|
234
|
+
If a scalar float is provided, that value is used after clipping to [0, 1].<br>
|
|
235
|
+
If a list/iterable of floats is provided, each candidate is evaluated via a full solve, and the α with the smallest NRMSE is selected.<br>
|
|
236
|
+
If None, α is chosen, based on an error rule: α = min(1.0, NRMSE_{α = 0} / (NRMSE_{α = 0} + NRMSE_{α = 1} + tolerance))
|
|
237
|
+
|
|
238
|
+
`cond_tolerance` : *float* or *None*, default = *None*<br>
|
|
239
|
+
Singular-value cutoff for the custom condition number function.<br>
|
|
240
|
+
If *None*, the implementation uses an internal relative cutoff of `1e-14`.
|
|
241
|
+
|
|
242
|
+
`*args`, `**kwargs` : optional<br>
|
|
243
|
+
CVXPY arguments passed to the CVXPY solver.
|
|
244
|
+
|
|
245
|
+
**Returns:**
|
|
246
|
+
*TMPinviResult*
|
|
247
|
+
|
|
248
|
+
`TMPinviResult.full` : *bool*<br>
|
|
249
|
+
Indicates if this result comes from the full (non-reduced) model.
|
|
250
|
+
|
|
251
|
+
`TMPinviResult.model` : *CLSP* or *list* of *CLSP*<br>
|
|
252
|
+
A single CLSP object in the full model, or a list of CLSP objects for each reduced block in the reduced model.
|
|
253
|
+
|
|
254
|
+
`TMPinviResult.x` : *np.ndarray*<br>
|
|
255
|
+
Final estimated solution matrix of shape *(m, p)*.
|
|
256
|
+
|
|
257
|
+
`TMPinviResult.data` : *np.ndarray*<br>
|
|
258
|
+
Processed output matrix of shape *(m, p)*. If `update=True` and `ival` is supplied in `tmpinvi()`, `data` contains `ival` with missing entries replaced by fitted values from `x`. Otherwise, data contains the fitted solution matrix `x`.
|
|
259
|
+
|
|
260
|
+
`TMPinviResult.summarize(i, display)`<br>
|
|
261
|
+
An alias of TMPinviResult.summary().
|
|
262
|
+
|
|
263
|
+
`TMPinviResult.summary(i, display)`<br>
|
|
264
|
+
Return or print a summary of the underlying CLSP result, where `i` : int, default = *None* is the index of a reduced-block model in TMPinviResult.model.
|
|
265
|
+
|
|
266
|
+
## Bibliography
|
|
267
|
+
Bolotov, I. (2025). CLSP: Linear Algebra Foundations of a Modular Two-Step Convex Optimization-Based Estimator for Ill-Posed Problems. *Mathematics*, *13*(21), 3476. [https://doi.org/10.3390/math13213476](https://doi.org/10.3390/math13213476)
|
|
268
|
+
|
|
269
|
+
## License
|
|
270
|
+
|
|
271
|
+
MIT License — see the [LICENSE](LICENSE) file.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
tmpinvi
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from typing import Any, Callable
|
|
3
|
+
|
|
4
|
+
from tmpinv import tmpinv
|
|
5
|
+
|
|
6
|
+
class TMPinviInputError(Exception):
|
|
7
|
+
"""
|
|
8
|
+
Exception class for TMPinvi-related input errors.
|
|
9
|
+
|
|
10
|
+
Represents internal failures in Interactive Tabular Matrix Problems via
|
|
11
|
+
Pseudoinverse Estimation routines due to malformed or missing input.
|
|
12
|
+
Supports structured messaging and optional diagnostic augmentation.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
message : str, optional
|
|
17
|
+
Description of the error. Defaults to a generic TMPinvi message.
|
|
18
|
+
|
|
19
|
+
code : int or str, optional
|
|
20
|
+
Optional error code or identifier for downstream handling.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
message : str
|
|
25
|
+
Human-readable error message.
|
|
26
|
+
|
|
27
|
+
code : int or str
|
|
28
|
+
Optional error code for custom handling or debugging.
|
|
29
|
+
|
|
30
|
+
Usage
|
|
31
|
+
-----
|
|
32
|
+
raise TMPinviInputError("ival must be a 2D array", code=201)
|
|
33
|
+
"""
|
|
34
|
+
def __init__(self, message: str = "An error occurred in TMPinvi",
|
|
35
|
+
code: int | str | None = None):
|
|
36
|
+
self.message = message
|
|
37
|
+
self.code = code
|
|
38
|
+
full_message = f"{message} (Code: {code})" if code is not None \
|
|
39
|
+
else message
|
|
40
|
+
super().__init__(full_message)
|
|
41
|
+
|
|
42
|
+
def __str__(self) -> str:
|
|
43
|
+
return self.message if self.code is None \
|
|
44
|
+
else f"{self.message} [Code: {self.code}]"
|
|
45
|
+
|
|
46
|
+
def as_dict(self) -> dict:
|
|
47
|
+
"""
|
|
48
|
+
Return the error as a dictionary for structured logging or JSON output.
|
|
49
|
+
"""
|
|
50
|
+
return {"error": self.message, "code": self.code}
|
|
51
|
+
|
|
52
|
+
class TMPinviResult:
|
|
53
|
+
"""
|
|
54
|
+
Result container for TMPinvi estimation.
|
|
55
|
+
|
|
56
|
+
Attributes
|
|
57
|
+
----------
|
|
58
|
+
full : bool
|
|
59
|
+
Indicates if this result comes from the full (non-reduced) model.
|
|
60
|
+
|
|
61
|
+
model : CLSP or list of CLSP
|
|
62
|
+
A single CLSP object in the full model, or a list of CLSP objects
|
|
63
|
+
for each reduced block in the reduced model.
|
|
64
|
+
|
|
65
|
+
x : np.ndarray
|
|
66
|
+
Final estimated solution matrix of shape (m, p).
|
|
67
|
+
|
|
68
|
+
data : np.ndarray
|
|
69
|
+
Processed output matrix of shape (m, p). If `update=True` and `ival` is
|
|
70
|
+
supplied in `tmpinvi()`, `data` contains `ival` with missing entries
|
|
71
|
+
replaced by fitted values from `x`. Otherwise, data contains the fitted
|
|
72
|
+
solution matrix `x`.
|
|
73
|
+
"""
|
|
74
|
+
def __init__(self, result, data: np.ndarray):
|
|
75
|
+
self.full = result.full
|
|
76
|
+
self.model = result.model
|
|
77
|
+
self.x = np.asarray(result.x, dtype=np.float64)
|
|
78
|
+
self.data = data
|
|
79
|
+
|
|
80
|
+
def summarize(self, i: int | None = None, display : bool = False):
|
|
81
|
+
return self.summary(i=i, display=display)
|
|
82
|
+
|
|
83
|
+
def summary(self, i: int | None = None, display: bool = False):
|
|
84
|
+
"""
|
|
85
|
+
Return or print a summary for the TMPinvi estimator.
|
|
86
|
+
|
|
87
|
+
Parameters
|
|
88
|
+
----------
|
|
89
|
+
display: bool, default = False
|
|
90
|
+
If True, prints the summary instead of returning a dictionary.
|
|
91
|
+
"""
|
|
92
|
+
if not isinstance(self.model, list):
|
|
93
|
+
return self.model.summarize(display=display)
|
|
94
|
+
if i is None:
|
|
95
|
+
raise TMPinviInputError("Reduced model: please supply the block " +
|
|
96
|
+
"index using i=#.")
|
|
97
|
+
idx = int(i)
|
|
98
|
+
if idx < 0 or idx > len(self.model) - 1:
|
|
99
|
+
raise TMPinviInputError(f"i must be in 0..{len(self.model)}-1 " +
|
|
100
|
+
f"for reduced model.")
|
|
101
|
+
return self.model[idx].summarize(display=display)
|
|
102
|
+
|
|
103
|
+
def tmpinvi(
|
|
104
|
+
ival: Any = None,
|
|
105
|
+
ibounds: Any = None,
|
|
106
|
+
preestimation: Callable[[np.ndarray | None], Any] | None = None,
|
|
107
|
+
postestimation: Callable[..., Any] | None = None,
|
|
108
|
+
update: bool = False,
|
|
109
|
+
*args, **kwargs
|
|
110
|
+
) -> TMPinviResult:
|
|
111
|
+
"""
|
|
112
|
+
Solve an interactive tabular matrix estimation problem via Convex Least
|
|
113
|
+
Squares Programming (CLSP) with bound-constrained iterative refinement.
|
|
114
|
+
|
|
115
|
+
Parameters
|
|
116
|
+
----------
|
|
117
|
+
ival : array_like or None, default = None
|
|
118
|
+
Prior information on known cell values. If supplied and not entirely
|
|
119
|
+
missing, `ival` is flattened and used to construct `b_val` and the
|
|
120
|
+
corresponding identity-subset model matrix `M` internally. Missing
|
|
121
|
+
entries (np.nan) are ignored. If all entries of `ival` are np.nan, no
|
|
122
|
+
prior information is used and `b_val` and `M` are not passed to
|
|
123
|
+
`tmpinv()`. When `ival` is provided, it overrides any `b_val` or `M`
|
|
124
|
+
arguments supplied through keyword arguments.
|
|
125
|
+
ibounds : tuple, list, or None, default = None
|
|
126
|
+
Dynamic cell-value bounds passed to `tmpinv(bounds=...)`. The object
|
|
127
|
+
supplied to `ibounds` may be created or modified programmatically
|
|
128
|
+
(for example within `preestimation()`). If a single pair such as (low,
|
|
129
|
+
high) is provided, it is applied uniformly to all cells. Alternatively,
|
|
130
|
+
a list of pairs may be supplied to specify cell-specific bounds with
|
|
131
|
+
others set to None or np.nan. When `ibounds` is not None, it overrides
|
|
132
|
+
any `bounds` argument supplied through keyword arguments.
|
|
133
|
+
preestimation : callable or None, default = None
|
|
134
|
+
A function executed prior to model estimation. If supplied, it is
|
|
135
|
+
called as `preestimation(ival)` and may perform arbitrary preparatory
|
|
136
|
+
steps, such as constructing dynamic bounds or modifying objects in
|
|
137
|
+
the calling environment. The return value is ignored.
|
|
138
|
+
postestimation : callable or None, default = None
|
|
139
|
+
A function executed after model estimation. For a full model, it is
|
|
140
|
+
called as `postestimation(model)`. For reduced (block-wise) models,
|
|
141
|
+
it is called as `postestimation(model_i, i)` for each block index
|
|
142
|
+
`i`. The return value is ignored.
|
|
143
|
+
update : bool, default = False
|
|
144
|
+
If True and `ival` is supplied, missing entries (np.nan) in `ival`
|
|
145
|
+
are replaced by the corresponding fitted values from `result.x`. The
|
|
146
|
+
updated matrix is returned in the `result.data` component. If False,
|
|
147
|
+
the data component contains the fitted solution matrix `result.x`.
|
|
148
|
+
*args, **kwargs : additional arguments
|
|
149
|
+
Passed directly to tmpinv().
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
TMPinviResult
|
|
154
|
+
An object containing the fitted CLSP model(s), the solution matrix `x`,
|
|
155
|
+
and an the processed matrix `data`. If `update=True` and `ival` is
|
|
156
|
+
supplied, `data` contains `ival` with missing entries replaced by
|
|
157
|
+
fitted values from `x`. Otherwise, data contains the fitted solution
|
|
158
|
+
matrix `x`.
|
|
159
|
+
"""
|
|
160
|
+
# adjust and preprocess options
|
|
161
|
+
if ival is not None:
|
|
162
|
+
ival = np.asarray(ival, dtype=np.float64)
|
|
163
|
+
if ival.ndim != 2:
|
|
164
|
+
raise TMPinviInputError("ival must be a 2D array.")
|
|
165
|
+
kwargs.pop("b_val", None)
|
|
166
|
+
kwargs.pop("M", None)
|
|
167
|
+
if ibounds is not None:
|
|
168
|
+
kwargs.pop("bounds", None)
|
|
169
|
+
if preestimation is not None and not callable(preestimation):
|
|
170
|
+
raise TMPinviInputError("preestimation must be callable.")
|
|
171
|
+
if postestimation is not None and not callable(postestimation):
|
|
172
|
+
raise TMPinviInputError("postestimation must be callable.")
|
|
173
|
+
|
|
174
|
+
# run preestimation function
|
|
175
|
+
if preestimation is not None:
|
|
176
|
+
preestimation(ival)
|
|
177
|
+
|
|
178
|
+
# construct prior-information block
|
|
179
|
+
if ival is not None:
|
|
180
|
+
if not np.all(np.isnan(ival)):
|
|
181
|
+
b_raw = ival.ravel(order="C")
|
|
182
|
+
idx = ~np.isnan(b_raw)
|
|
183
|
+
kwargs["b_val"] = b_raw[idx].reshape(-1, 1)
|
|
184
|
+
kwargs["M"] = np.eye(b_raw.size, dtype=np.float64)[idx]
|
|
185
|
+
|
|
186
|
+
# perform estimation
|
|
187
|
+
if ibounds is not None:
|
|
188
|
+
kwargs["bounds"] = ibounds
|
|
189
|
+
result = tmpinv(*args, **kwargs)
|
|
190
|
+
|
|
191
|
+
# run postestimation function
|
|
192
|
+
if postestimation is not None:
|
|
193
|
+
if result.full:
|
|
194
|
+
postestimation(result.model)
|
|
195
|
+
else:
|
|
196
|
+
for i, model_i in enumerate(result.model):
|
|
197
|
+
postestimation(model_i, i)
|
|
198
|
+
|
|
199
|
+
# generate, update, or replace data from result.x
|
|
200
|
+
x = np.asarray(result.x, dtype=np.float64)
|
|
201
|
+
if update and ival is not None:
|
|
202
|
+
if ival.shape != x.shape:
|
|
203
|
+
raise TMPinviInputError(
|
|
204
|
+
"Dimensions of ival and result.x do not match."
|
|
205
|
+
)
|
|
206
|
+
data = ival.copy()
|
|
207
|
+
missing = np.isnan(data)
|
|
208
|
+
data[missing] = x[missing]
|
|
209
|
+
else:
|
|
210
|
+
data = x
|
|
211
|
+
|
|
212
|
+
return TMPinviResult(result, data)
|