rd2d 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
rd2d/__init__.py ADDED
@@ -0,0 +1,17 @@
1
+ """Local polynomial methods for boundary discontinuity designs."""
2
+
3
+ from .distance import rdbw2d_distance, rdbw2d_dist, rd2d_distance, rd2d_dist
4
+ from .location import rdbw2d, rd2d
5
+ from .results import RD2DResult, SummaryResult, summary
6
+
7
+ __all__ = [
8
+ "RD2DResult",
9
+ "SummaryResult",
10
+ "rdbw2d",
11
+ "rd2d",
12
+ "rdbw2d_dist",
13
+ "rdbw2d_distance",
14
+ "rd2d_dist",
15
+ "rd2d_distance",
16
+ "summary",
17
+ ]
rd2d/_utils.py ADDED
@@ -0,0 +1,272 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ from dataclasses import dataclass
5
+ from typing import Iterable
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+ from scipy import stats
10
+
11
+
12
+ def as_1d(x, name: str) -> np.ndarray:
13
+ arr = np.asarray(x, dtype=float)
14
+ if arr.ndim != 1:
15
+ arr = np.ravel(arr)
16
+ if arr.size == 0:
17
+ raise ValueError(f"{name} must not be empty.")
18
+ return arr
19
+
20
+
21
+ def as_2d(x, name: str, ncol: int | None = None) -> np.ndarray:
22
+ arr = np.asarray(x, dtype=float)
23
+ if arr.ndim == 1:
24
+ arr = arr.reshape(-1, 1)
25
+ if arr.ndim != 2:
26
+ raise ValueError(f"{name} must be a two-dimensional array.")
27
+ if ncol is not None and arr.shape[1] != ncol:
28
+ raise ValueError(f"{name} must have exactly {ncol} columns.")
29
+ return arr
30
+
31
+
32
+ def check_lengths(y: np.ndarray, *arrays: np.ndarray) -> None:
33
+ n = len(y)
34
+ for arr in arrays:
35
+ if len(arr) != n:
36
+ raise ValueError("Input vectors and rows of matrix inputs must have the same length.")
37
+
38
+
39
+ def complete_cases(*arrays: np.ndarray) -> np.ndarray:
40
+ mask = np.ones(len(arrays[0]), dtype=bool)
41
+ for arr in arrays:
42
+ arr = np.asarray(arr)
43
+ try:
44
+ finite = np.isfinite(arr.astype(float))
45
+ except (TypeError, ValueError):
46
+ finite = ~pd.isna(arr)
47
+ if arr.ndim == 1:
48
+ mask &= finite
49
+ else:
50
+ mask &= np.all(finite, axis=1)
51
+ return mask
52
+
53
+
54
+ def normalize_binary(x, name: str) -> np.ndarray:
55
+ arr = np.asarray(x)
56
+ if arr.dtype == bool:
57
+ return arr.astype(bool)
58
+ vals = np.unique(arr[np.isfinite(arr.astype(float))].astype(float))
59
+ if not set(vals.tolist()).issubset({0.0, 1.0}):
60
+ raise ValueError(f"{name} must be logical or contain only 0 and 1.")
61
+ return arr.astype(float).astype(bool)
62
+
63
+
64
+ def validate_order(value, name: str) -> int:
65
+ if value is None:
66
+ raise ValueError(f"{name} must not be None.")
67
+ value = float(value)
68
+ if not np.isfinite(value) or value < 0 or abs(value - round(value)) > np.sqrt(np.finfo(float).eps):
69
+ raise ValueError(f"{name} must be a nonnegative integer.")
70
+ return int(round(value))
71
+
72
+
73
+ def validate_deriv(deriv: Iterable[float], p: int) -> tuple[int, int]:
74
+ arr = np.asarray(tuple(deriv), dtype=float)
75
+ if arr.shape != (2,) or np.any(~np.isfinite(arr)) or np.any(arr < 0):
76
+ raise ValueError("deriv must be a nonnegative integer vector of length 2.")
77
+ if np.any(np.abs(arr - np.round(arr)) > np.sqrt(np.finfo(float).eps)):
78
+ raise ValueError("deriv must be a nonnegative integer vector of length 2.")
79
+ out = tuple(int(v) for v in np.round(arr))
80
+ if sum(out) > p:
81
+ raise ValueError("sum(deriv) must be less than or equal to p.")
82
+ return out
83
+
84
+
85
+ def kernel_weights(u: np.ndarray, kernel: str) -> np.ndarray:
86
+ kernel = kernel.lower()
87
+ if kernel in {"tri", "triangular"}:
88
+ return np.maximum(1.0 - np.abs(u), 0.0) * (np.abs(u) <= 1.0)
89
+ if kernel in {"epa", "epanechnikov"}:
90
+ return 0.75 * (1.0 - u**2) * (np.abs(u) <= 1.0)
91
+ if kernel in {"uni", "uniform"}:
92
+ return 0.5 * (np.abs(u) <= 1.0)
93
+ if kernel in {"gau", "gaussian"}:
94
+ return stats.norm.pdf(u)
95
+ raise ValueError("kernel must be one of tri, epa, uni, or gau.")
96
+
97
+
98
+ def multi_indices_2d(p: int) -> list[tuple[int, int]]:
99
+ out: list[tuple[int, int]] = []
100
+ for deg in range(p + 1):
101
+ for ypow in range(deg + 1):
102
+ xpow = deg - ypow
103
+ out.append((xpow, ypow))
104
+ return out
105
+
106
+
107
+ def basis_2d(centered: np.ndarray, p: int) -> np.ndarray:
108
+ idx = multi_indices_2d(p)
109
+ x1 = centered[:, 0]
110
+ x2 = centered[:, 1]
111
+ return np.column_stack([(x1**a) * (x2**b) for a, b in idx])
112
+
113
+
114
+ def basis_1d(distance: np.ndarray, p: int) -> np.ndarray:
115
+ return np.column_stack([distance**j for j in range(p + 1)])
116
+
117
+
118
+ def target_2d(p: int, deriv: tuple[int, int], tangvec: np.ndarray | None, row: int) -> np.ndarray:
119
+ target = np.zeros(len(multi_indices_2d(p)))
120
+ if tangvec is not None:
121
+ if p < 1:
122
+ raise ValueError("tangvec requires p >= 1.")
123
+ idx = multi_indices_2d(p)
124
+ target[idx.index((1, 0))] = tangvec[row, 0]
125
+ target[idx.index((0, 1))] = tangvec[row, 1]
126
+ return target
127
+ if deriv in multi_indices_2d(p):
128
+ pos = multi_indices_2d(p).index(deriv)
129
+ target[pos] = float(math.factorial(deriv[0]) * math.factorial(deriv[1]))
130
+ return target
131
+
132
+
133
+ def target_1d(p: int, deriv: int = 0) -> np.ndarray:
134
+ if deriv > p:
135
+ raise ValueError("deriv must be less than or equal to p.")
136
+ target = np.zeros(p + 1)
137
+ target[deriv] = float(math.factorial(deriv))
138
+ return target
139
+
140
+
141
+ def weighted_pinv_design(design: np.ndarray, weights: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
142
+ keep = weights > 0
143
+ X = design[keep, :]
144
+ w = weights[keep]
145
+ if X.shape[0] == 0:
146
+ raise ValueError("No observations inside the bandwidth.")
147
+ WX = X * w[:, None]
148
+ gram = X.T @ WX
149
+ inv_gram = np.linalg.pinv(gram, rcond=1e-12)
150
+ return keep, X, inv_gram
151
+
152
+
153
+ @dataclass
154
+ class LocalFit:
155
+ estimate: np.ndarray
156
+ se: np.ndarray
157
+ influence: np.ndarray
158
+ n_eff: int
159
+
160
+
161
+ def local_fit_targets(
162
+ design_full: np.ndarray,
163
+ weights_full: np.ndarray,
164
+ outcomes_full: np.ndarray,
165
+ target: np.ndarray,
166
+ *,
167
+ vce: str = "hc1",
168
+ cluster: np.ndarray | None = None,
169
+ ) -> LocalFit:
170
+ outcomes_full = np.asarray(outcomes_full, dtype=float)
171
+ if outcomes_full.ndim == 1:
172
+ outcomes_full = outcomes_full.reshape(-1, 1)
173
+
174
+ keep, X, inv_gram = weighted_pinv_design(design_full, weights_full)
175
+ w = weights_full[keep]
176
+ Y = outcomes_full[keep, :]
177
+ beta = inv_gram @ (X.T @ (w[:, None] * Y))
178
+ fitted = X @ beta
179
+ resid = Y - fitted
180
+ n_eff = X.shape[0]
181
+ k = X.shape[1]
182
+
183
+ leverage = np.sum((X @ inv_gram) * X, axis=1) * w
184
+ adj = np.ones(n_eff)
185
+ vce = vce.lower()
186
+ if vce == "hc2":
187
+ adj = 1.0 / np.maximum(1.0 - leverage, 1e-8)
188
+ elif vce == "hc3":
189
+ adj = 1.0 / np.maximum(1.0 - leverage, 1e-8) ** 2
190
+
191
+ # Influence contribution for the requested linear functional.
192
+ row = target @ inv_gram
193
+ score_base = (X * w[:, None]) @ row
194
+ infl_kept = score_base[:, None] * resid * np.sqrt(adj)[:, None]
195
+
196
+ scale = 1.0
197
+ if vce == "hc1" and n_eff > k:
198
+ scale = n_eff / (n_eff - k)
199
+
200
+ if cluster is not None:
201
+ cluster_kept = np.asarray(cluster)[keep]
202
+ groups = pd.unique(cluster_kept)
203
+ summed = np.zeros((len(groups), outcomes_full.shape[1]))
204
+ for i, group in enumerate(groups):
205
+ summed[i, :] = np.sum(infl_kept[cluster_kept == group, :], axis=0)
206
+ if vce == "hc1" and len(groups) > 1 and n_eff > k:
207
+ scale = (len(groups) / (len(groups) - 1.0)) * ((n_eff - 1.0) / (n_eff - k))
208
+ cov = scale * (summed.T @ summed)
209
+ infl_source = summed
210
+ else:
211
+ cov = scale * (infl_kept.T @ infl_kept)
212
+ infl_source = np.sqrt(scale) * infl_kept
213
+
214
+ estimate = target @ beta
215
+ se = np.sqrt(np.maximum(np.diag(cov), 0.0))
216
+
217
+ infl_full = np.zeros((design_full.shape[0], outcomes_full.shape[1]))
218
+ if cluster is None:
219
+ infl_full[keep, :] = infl_source
220
+ else:
221
+ # For cross-evaluation covariance, keep cluster-level sums in rows
222
+ # matching the first occurrence of each cluster. This preserves sums
223
+ # without needing a second representation.
224
+ cluster_all = np.asarray(cluster)
225
+ groups = pd.unique(cluster_all)
226
+ infl_full = np.zeros((len(groups), outcomes_full.shape[1]))
227
+ kept_groups = pd.unique(np.asarray(cluster)[keep])
228
+ group_to_row = {g: i for i, g in enumerate(groups)}
229
+ for j, g in enumerate(kept_groups):
230
+ infl_full[group_to_row[g], :] = infl_source[j, :]
231
+
232
+ return LocalFit(estimate=np.asarray(estimate), se=se, influence=infl_full, n_eff=int(n_eff))
233
+
234
+
235
+ def ci_columns(est: np.ndarray, se: np.ndarray, level: float, side: str) -> tuple[np.ndarray, np.ndarray]:
236
+ if side == "two":
237
+ cval = stats.norm.ppf((level + 100.0) / 200.0)
238
+ return est - cval * se, est + cval * se
239
+ cval = stats.norm.ppf(level / 100.0)
240
+ if side == "left":
241
+ return np.repeat(-np.inf, len(est)), est + cval * se
242
+ if side == "right":
243
+ return est - cval * se, np.repeat(np.inf, len(est))
244
+ raise ValueError("side must be two, left, or right.")
245
+
246
+
247
+ def p_values(tvalues: np.ndarray) -> np.ndarray:
248
+ return 2.0 * stats.norm.sf(np.abs(tvalues))
249
+
250
+
251
+ def bandwidth_floor(values: np.ndarray, bwcheck: int | None) -> float:
252
+ values = np.sort(np.asarray(values, dtype=float)[np.isfinite(values)])
253
+ if values.size == 0:
254
+ return np.nan
255
+ if bwcheck is None:
256
+ return 0.0
257
+ k = min(max(int(bwcheck), 1), values.size)
258
+ return float(values[k - 1])
259
+
260
+
261
+ def cer_factor(n: int, p: int) -> float:
262
+ n = max(int(n), 1)
263
+ return n ** (1.0 / (2.0 * p + 4.0) - 1.0 / (p + 4.0))
264
+
265
+
266
+ def ensure_dataframe(x: np.ndarray, columns: list[str]) -> pd.DataFrame:
267
+ return pd.DataFrame(x, columns=columns)
268
+
269
+
270
+ def covariance_from_influence(influence: np.ndarray) -> np.ndarray:
271
+ influence = np.asarray(influence, dtype=float)
272
+ return influence @ influence.T