rd2d 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rd2d/__init__.py +17 -0
- rd2d/_utils.py +272 -0
- rd2d/distance.py +888 -0
- rd2d/location.py +1186 -0
- rd2d/results.py +268 -0
- rd2d-0.1.0.dist-info/METADATA +127 -0
- rd2d-0.1.0.dist-info/RECORD +10 -0
- rd2d-0.1.0.dist-info/WHEEL +5 -0
- rd2d-0.1.0.dist-info/licenses/LICENSE.md +12 -0
- rd2d-0.1.0.dist-info/top_level.txt +1 -0
rd2d/__init__.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Local polynomial methods for boundary discontinuity designs."""
|
|
2
|
+
|
|
3
|
+
from .distance import rdbw2d_distance, rdbw2d_dist, rd2d_distance, rd2d_dist
|
|
4
|
+
from .location import rdbw2d, rd2d
|
|
5
|
+
from .results import RD2DResult, SummaryResult, summary
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"RD2DResult",
|
|
9
|
+
"SummaryResult",
|
|
10
|
+
"rdbw2d",
|
|
11
|
+
"rd2d",
|
|
12
|
+
"rdbw2d_dist",
|
|
13
|
+
"rdbw2d_distance",
|
|
14
|
+
"rd2d_dist",
|
|
15
|
+
"rd2d_distance",
|
|
16
|
+
"summary",
|
|
17
|
+
]
|
rd2d/_utils.py
ADDED
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def as_1d(x, name: str) -> np.ndarray:
|
|
13
|
+
arr = np.asarray(x, dtype=float)
|
|
14
|
+
if arr.ndim != 1:
|
|
15
|
+
arr = np.ravel(arr)
|
|
16
|
+
if arr.size == 0:
|
|
17
|
+
raise ValueError(f"{name} must not be empty.")
|
|
18
|
+
return arr
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def as_2d(x, name: str, ncol: int | None = None) -> np.ndarray:
|
|
22
|
+
arr = np.asarray(x, dtype=float)
|
|
23
|
+
if arr.ndim == 1:
|
|
24
|
+
arr = arr.reshape(-1, 1)
|
|
25
|
+
if arr.ndim != 2:
|
|
26
|
+
raise ValueError(f"{name} must be a two-dimensional array.")
|
|
27
|
+
if ncol is not None and arr.shape[1] != ncol:
|
|
28
|
+
raise ValueError(f"{name} must have exactly {ncol} columns.")
|
|
29
|
+
return arr
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def check_lengths(y: np.ndarray, *arrays: np.ndarray) -> None:
|
|
33
|
+
n = len(y)
|
|
34
|
+
for arr in arrays:
|
|
35
|
+
if len(arr) != n:
|
|
36
|
+
raise ValueError("Input vectors and rows of matrix inputs must have the same length.")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def complete_cases(*arrays: np.ndarray) -> np.ndarray:
|
|
40
|
+
mask = np.ones(len(arrays[0]), dtype=bool)
|
|
41
|
+
for arr in arrays:
|
|
42
|
+
arr = np.asarray(arr)
|
|
43
|
+
try:
|
|
44
|
+
finite = np.isfinite(arr.astype(float))
|
|
45
|
+
except (TypeError, ValueError):
|
|
46
|
+
finite = ~pd.isna(arr)
|
|
47
|
+
if arr.ndim == 1:
|
|
48
|
+
mask &= finite
|
|
49
|
+
else:
|
|
50
|
+
mask &= np.all(finite, axis=1)
|
|
51
|
+
return mask
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def normalize_binary(x, name: str) -> np.ndarray:
|
|
55
|
+
arr = np.asarray(x)
|
|
56
|
+
if arr.dtype == bool:
|
|
57
|
+
return arr.astype(bool)
|
|
58
|
+
vals = np.unique(arr[np.isfinite(arr.astype(float))].astype(float))
|
|
59
|
+
if not set(vals.tolist()).issubset({0.0, 1.0}):
|
|
60
|
+
raise ValueError(f"{name} must be logical or contain only 0 and 1.")
|
|
61
|
+
return arr.astype(float).astype(bool)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_order(value, name: str) -> int:
|
|
65
|
+
if value is None:
|
|
66
|
+
raise ValueError(f"{name} must not be None.")
|
|
67
|
+
value = float(value)
|
|
68
|
+
if not np.isfinite(value) or value < 0 or abs(value - round(value)) > np.sqrt(np.finfo(float).eps):
|
|
69
|
+
raise ValueError(f"{name} must be a nonnegative integer.")
|
|
70
|
+
return int(round(value))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def validate_deriv(deriv: Iterable[float], p: int) -> tuple[int, int]:
|
|
74
|
+
arr = np.asarray(tuple(deriv), dtype=float)
|
|
75
|
+
if arr.shape != (2,) or np.any(~np.isfinite(arr)) or np.any(arr < 0):
|
|
76
|
+
raise ValueError("deriv must be a nonnegative integer vector of length 2.")
|
|
77
|
+
if np.any(np.abs(arr - np.round(arr)) > np.sqrt(np.finfo(float).eps)):
|
|
78
|
+
raise ValueError("deriv must be a nonnegative integer vector of length 2.")
|
|
79
|
+
out = tuple(int(v) for v in np.round(arr))
|
|
80
|
+
if sum(out) > p:
|
|
81
|
+
raise ValueError("sum(deriv) must be less than or equal to p.")
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def kernel_weights(u: np.ndarray, kernel: str) -> np.ndarray:
|
|
86
|
+
kernel = kernel.lower()
|
|
87
|
+
if kernel in {"tri", "triangular"}:
|
|
88
|
+
return np.maximum(1.0 - np.abs(u), 0.0) * (np.abs(u) <= 1.0)
|
|
89
|
+
if kernel in {"epa", "epanechnikov"}:
|
|
90
|
+
return 0.75 * (1.0 - u**2) * (np.abs(u) <= 1.0)
|
|
91
|
+
if kernel in {"uni", "uniform"}:
|
|
92
|
+
return 0.5 * (np.abs(u) <= 1.0)
|
|
93
|
+
if kernel in {"gau", "gaussian"}:
|
|
94
|
+
return stats.norm.pdf(u)
|
|
95
|
+
raise ValueError("kernel must be one of tri, epa, uni, or gau.")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def multi_indices_2d(p: int) -> list[tuple[int, int]]:
|
|
99
|
+
out: list[tuple[int, int]] = []
|
|
100
|
+
for deg in range(p + 1):
|
|
101
|
+
for ypow in range(deg + 1):
|
|
102
|
+
xpow = deg - ypow
|
|
103
|
+
out.append((xpow, ypow))
|
|
104
|
+
return out
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def basis_2d(centered: np.ndarray, p: int) -> np.ndarray:
|
|
108
|
+
idx = multi_indices_2d(p)
|
|
109
|
+
x1 = centered[:, 0]
|
|
110
|
+
x2 = centered[:, 1]
|
|
111
|
+
return np.column_stack([(x1**a) * (x2**b) for a, b in idx])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def basis_1d(distance: np.ndarray, p: int) -> np.ndarray:
|
|
115
|
+
return np.column_stack([distance**j for j in range(p + 1)])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def target_2d(p: int, deriv: tuple[int, int], tangvec: np.ndarray | None, row: int) -> np.ndarray:
|
|
119
|
+
target = np.zeros(len(multi_indices_2d(p)))
|
|
120
|
+
if tangvec is not None:
|
|
121
|
+
if p < 1:
|
|
122
|
+
raise ValueError("tangvec requires p >= 1.")
|
|
123
|
+
idx = multi_indices_2d(p)
|
|
124
|
+
target[idx.index((1, 0))] = tangvec[row, 0]
|
|
125
|
+
target[idx.index((0, 1))] = tangvec[row, 1]
|
|
126
|
+
return target
|
|
127
|
+
if deriv in multi_indices_2d(p):
|
|
128
|
+
pos = multi_indices_2d(p).index(deriv)
|
|
129
|
+
target[pos] = float(math.factorial(deriv[0]) * math.factorial(deriv[1]))
|
|
130
|
+
return target
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def target_1d(p: int, deriv: int = 0) -> np.ndarray:
|
|
134
|
+
if deriv > p:
|
|
135
|
+
raise ValueError("deriv must be less than or equal to p.")
|
|
136
|
+
target = np.zeros(p + 1)
|
|
137
|
+
target[deriv] = float(math.factorial(deriv))
|
|
138
|
+
return target
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def weighted_pinv_design(design: np.ndarray, weights: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
142
|
+
keep = weights > 0
|
|
143
|
+
X = design[keep, :]
|
|
144
|
+
w = weights[keep]
|
|
145
|
+
if X.shape[0] == 0:
|
|
146
|
+
raise ValueError("No observations inside the bandwidth.")
|
|
147
|
+
WX = X * w[:, None]
|
|
148
|
+
gram = X.T @ WX
|
|
149
|
+
inv_gram = np.linalg.pinv(gram, rcond=1e-12)
|
|
150
|
+
return keep, X, inv_gram
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class LocalFit:
|
|
155
|
+
estimate: np.ndarray
|
|
156
|
+
se: np.ndarray
|
|
157
|
+
influence: np.ndarray
|
|
158
|
+
n_eff: int
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def local_fit_targets(
|
|
162
|
+
design_full: np.ndarray,
|
|
163
|
+
weights_full: np.ndarray,
|
|
164
|
+
outcomes_full: np.ndarray,
|
|
165
|
+
target: np.ndarray,
|
|
166
|
+
*,
|
|
167
|
+
vce: str = "hc1",
|
|
168
|
+
cluster: np.ndarray | None = None,
|
|
169
|
+
) -> LocalFit:
|
|
170
|
+
outcomes_full = np.asarray(outcomes_full, dtype=float)
|
|
171
|
+
if outcomes_full.ndim == 1:
|
|
172
|
+
outcomes_full = outcomes_full.reshape(-1, 1)
|
|
173
|
+
|
|
174
|
+
keep, X, inv_gram = weighted_pinv_design(design_full, weights_full)
|
|
175
|
+
w = weights_full[keep]
|
|
176
|
+
Y = outcomes_full[keep, :]
|
|
177
|
+
beta = inv_gram @ (X.T @ (w[:, None] * Y))
|
|
178
|
+
fitted = X @ beta
|
|
179
|
+
resid = Y - fitted
|
|
180
|
+
n_eff = X.shape[0]
|
|
181
|
+
k = X.shape[1]
|
|
182
|
+
|
|
183
|
+
leverage = np.sum((X @ inv_gram) * X, axis=1) * w
|
|
184
|
+
adj = np.ones(n_eff)
|
|
185
|
+
vce = vce.lower()
|
|
186
|
+
if vce == "hc2":
|
|
187
|
+
adj = 1.0 / np.maximum(1.0 - leverage, 1e-8)
|
|
188
|
+
elif vce == "hc3":
|
|
189
|
+
adj = 1.0 / np.maximum(1.0 - leverage, 1e-8) ** 2
|
|
190
|
+
|
|
191
|
+
# Influence contribution for the requested linear functional.
|
|
192
|
+
row = target @ inv_gram
|
|
193
|
+
score_base = (X * w[:, None]) @ row
|
|
194
|
+
infl_kept = score_base[:, None] * resid * np.sqrt(adj)[:, None]
|
|
195
|
+
|
|
196
|
+
scale = 1.0
|
|
197
|
+
if vce == "hc1" and n_eff > k:
|
|
198
|
+
scale = n_eff / (n_eff - k)
|
|
199
|
+
|
|
200
|
+
if cluster is not None:
|
|
201
|
+
cluster_kept = np.asarray(cluster)[keep]
|
|
202
|
+
groups = pd.unique(cluster_kept)
|
|
203
|
+
summed = np.zeros((len(groups), outcomes_full.shape[1]))
|
|
204
|
+
for i, group in enumerate(groups):
|
|
205
|
+
summed[i, :] = np.sum(infl_kept[cluster_kept == group, :], axis=0)
|
|
206
|
+
if vce == "hc1" and len(groups) > 1 and n_eff > k:
|
|
207
|
+
scale = (len(groups) / (len(groups) - 1.0)) * ((n_eff - 1.0) / (n_eff - k))
|
|
208
|
+
cov = scale * (summed.T @ summed)
|
|
209
|
+
infl_source = summed
|
|
210
|
+
else:
|
|
211
|
+
cov = scale * (infl_kept.T @ infl_kept)
|
|
212
|
+
infl_source = np.sqrt(scale) * infl_kept
|
|
213
|
+
|
|
214
|
+
estimate = target @ beta
|
|
215
|
+
se = np.sqrt(np.maximum(np.diag(cov), 0.0))
|
|
216
|
+
|
|
217
|
+
infl_full = np.zeros((design_full.shape[0], outcomes_full.shape[1]))
|
|
218
|
+
if cluster is None:
|
|
219
|
+
infl_full[keep, :] = infl_source
|
|
220
|
+
else:
|
|
221
|
+
# For cross-evaluation covariance, keep cluster-level sums in rows
|
|
222
|
+
# matching the first occurrence of each cluster. This preserves sums
|
|
223
|
+
# without needing a second representation.
|
|
224
|
+
cluster_all = np.asarray(cluster)
|
|
225
|
+
groups = pd.unique(cluster_all)
|
|
226
|
+
infl_full = np.zeros((len(groups), outcomes_full.shape[1]))
|
|
227
|
+
kept_groups = pd.unique(np.asarray(cluster)[keep])
|
|
228
|
+
group_to_row = {g: i for i, g in enumerate(groups)}
|
|
229
|
+
for j, g in enumerate(kept_groups):
|
|
230
|
+
infl_full[group_to_row[g], :] = infl_source[j, :]
|
|
231
|
+
|
|
232
|
+
return LocalFit(estimate=np.asarray(estimate), se=se, influence=infl_full, n_eff=int(n_eff))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def ci_columns(est: np.ndarray, se: np.ndarray, level: float, side: str) -> tuple[np.ndarray, np.ndarray]:
|
|
236
|
+
if side == "two":
|
|
237
|
+
cval = stats.norm.ppf((level + 100.0) / 200.0)
|
|
238
|
+
return est - cval * se, est + cval * se
|
|
239
|
+
cval = stats.norm.ppf(level / 100.0)
|
|
240
|
+
if side == "left":
|
|
241
|
+
return np.repeat(-np.inf, len(est)), est + cval * se
|
|
242
|
+
if side == "right":
|
|
243
|
+
return est - cval * se, np.repeat(np.inf, len(est))
|
|
244
|
+
raise ValueError("side must be two, left, or right.")
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def p_values(tvalues: np.ndarray) -> np.ndarray:
|
|
248
|
+
return 2.0 * stats.norm.sf(np.abs(tvalues))
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def bandwidth_floor(values: np.ndarray, bwcheck: int | None) -> float:
|
|
252
|
+
values = np.sort(np.asarray(values, dtype=float)[np.isfinite(values)])
|
|
253
|
+
if values.size == 0:
|
|
254
|
+
return np.nan
|
|
255
|
+
if bwcheck is None:
|
|
256
|
+
return 0.0
|
|
257
|
+
k = min(max(int(bwcheck), 1), values.size)
|
|
258
|
+
return float(values[k - 1])
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def cer_factor(n: int, p: int) -> float:
|
|
262
|
+
n = max(int(n), 1)
|
|
263
|
+
return n ** (1.0 / (2.0 * p + 4.0) - 1.0 / (p + 4.0))
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def ensure_dataframe(x: np.ndarray, columns: list[str]) -> pd.DataFrame:
|
|
267
|
+
return pd.DataFrame(x, columns=columns)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def covariance_from_influence(influence: np.ndarray) -> np.ndarray:
|
|
271
|
+
influence = np.asarray(influence, dtype=float)
|
|
272
|
+
return influence @ influence.T
|