rd2d 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rd2d-0.1.0/LICENSE.md +12 -0
- rd2d-0.1.0/PKG-INFO +127 -0
- rd2d-0.1.0/README.md +96 -0
- rd2d-0.1.0/pyproject.toml +51 -0
- rd2d-0.1.0/rd2d/__init__.py +17 -0
- rd2d-0.1.0/rd2d/_utils.py +272 -0
- rd2d-0.1.0/rd2d/distance.py +888 -0
- rd2d-0.1.0/rd2d/location.py +1186 -0
- rd2d-0.1.0/rd2d/results.py +268 -0
- rd2d-0.1.0/rd2d.egg-info/PKG-INFO +127 -0
- rd2d-0.1.0/rd2d.egg-info/SOURCES.txt +14 -0
- rd2d-0.1.0/rd2d.egg-info/dependency_links.txt +1 -0
- rd2d-0.1.0/rd2d.egg-info/requires.txt +12 -0
- rd2d-0.1.0/rd2d.egg-info/top_level.txt +1 -0
- rd2d-0.1.0/setup.cfg +4 -0
- rd2d-0.1.0/tests/test_basic.py +134 -0
rd2d-0.1.0/LICENSE.md
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
RD2D License
|
|
2
|
+
|
|
3
|
+
RD2D is free software: you can redistribute it and/or modify it
|
|
4
|
+
under the terms of the GNU General Public License version 3 as published
|
|
5
|
+
by the Free Software Foundation.
|
|
6
|
+
|
|
7
|
+
RD2D is distributed in the hope that it will be useful, but without
|
|
8
|
+
any warranty; without even the implied warranty of merchantability or
|
|
9
|
+
fitness for a particular purpose.
|
|
10
|
+
|
|
11
|
+
The full GPL-3.0 license text is available at:
|
|
12
|
+
https://www.gnu.org/licenses/gpl-3.0.en.html
|
rd2d-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: rd2d
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Local polynomial methods for boundary discontinuity designs
|
|
5
|
+
Author: Rocio Titiunik, Ruiqi Rae Yu
|
|
6
|
+
Author-email: "Matias D. Cattaneo" <matias.d.cattaneo@gmail.com>
|
|
7
|
+
License-Expression: GPL-3.0-only
|
|
8
|
+
Project-URL: Homepage, https://rdpackages.github.io/
|
|
9
|
+
Project-URL: Repository, https://github.com/rdpackages/rd2d
|
|
10
|
+
Project-URL: Issues, https://github.com/rdpackages/rd2d/issues
|
|
11
|
+
Keywords: boundary discontinuity,regression discontinuity,causal inference,local polynomial,bandwidth selection
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Mathematics
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
License-File: LICENSE.md
|
|
21
|
+
Requires-Dist: numpy
|
|
22
|
+
Requires-Dist: pandas
|
|
23
|
+
Requires-Dist: scipy
|
|
24
|
+
Provides-Extra: plots
|
|
25
|
+
Requires-Dist: matplotlib; extra == "plots"
|
|
26
|
+
Provides-Extra: replication
|
|
27
|
+
Requires-Dist: rdrobust; extra == "replication"
|
|
28
|
+
Provides-Extra: test
|
|
29
|
+
Requires-Dist: pytest; extra == "test"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# rd2d for Python
|
|
33
|
+
|
|
34
|
+
`rd2d` provides local polynomial estimation, robust bias-corrected inference,
|
|
35
|
+
and bandwidth helpers for boundary discontinuity designs with bivariate running
|
|
36
|
+
variables. The package includes location-based and distance-based methods,
|
|
37
|
+
sharp and fuzzy designs, pointwise confidence intervals, covariance-backed
|
|
38
|
+
summary inference, uniform confidence bands, and aggregate boundary effect
|
|
39
|
+
summaries.
|
|
40
|
+
|
|
41
|
+
```python
|
|
42
|
+
from rd2d import rd2d, rdbw2d, rd2d_dist, rdbw2d_dist, summary
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Main Functions
|
|
46
|
+
|
|
47
|
+
- `rd2d()`: location-based estimation and inference.
|
|
48
|
+
- `rdbw2d()`: location-based bandwidth selection.
|
|
49
|
+
- `rd2d_dist()` and `rd2d_distance()`: distance-based estimation and inference.
|
|
50
|
+
- `rdbw2d_dist()` and `rdbw2d_distance()`: distance-based bandwidth selection.
|
|
51
|
+
- `summary()`: summary tables with optional uniform bands, WBATE, and LBATE.
|
|
52
|
+
|
|
53
|
+
The sibling scripts `../rd2d_illustration.py` and `../rd2d_plot.py` provide a
|
|
54
|
+
self-contained simulation, estimation, and plotting workflow. Generated files
|
|
55
|
+
are written under `../output/`.
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```sh
|
|
60
|
+
python -m pip install rd2d
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
For local development:
|
|
64
|
+
|
|
65
|
+
```sh
|
|
66
|
+
python -m pip install -e .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
Optional plotting and testing dependencies can be installed with:
|
|
70
|
+
|
|
71
|
+
```sh
|
|
72
|
+
python -m pip install -e ".[plots,test]"
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Basic Usage
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import numpy as np
|
|
79
|
+
from rd2d import rd2d
|
|
80
|
+
|
|
81
|
+
rng = np.random.default_rng(123)
|
|
82
|
+
n = 800
|
|
83
|
+
x1 = rng.normal(size=n)
|
|
84
|
+
x2 = rng.normal(size=n)
|
|
85
|
+
assignment = (x1 >= 0).astype(float)
|
|
86
|
+
y = 3 + 2 * x1 + 1.5 * x2 + assignment + rng.normal(size=n)
|
|
87
|
+
x = np.column_stack([x1, x2])
|
|
88
|
+
b = np.array([[0.0, 0.0], [0.0, 1.0]])
|
|
89
|
+
|
|
90
|
+
fit = rd2d(y, x, assignment, b, h=0.9, params_cov="main")
|
|
91
|
+
fit.main
|
|
92
|
+
fit.summary(cbands="main").tables["main"]
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
For distance-based designs, pass one signed-distance column per evaluation
|
|
96
|
+
point. Nonnegative distances identify observations on the treated side.
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from rd2d import rd2d_dist
|
|
100
|
+
|
|
101
|
+
distance = x1.reshape(-1, 1)
|
|
102
|
+
fit_dist = rd2d_dist(y, distance, h=0.5, b=np.array([[0.0, 0.0]]))
|
|
103
|
+
fit_dist.main
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Development
|
|
107
|
+
|
|
108
|
+
From this directory:
|
|
109
|
+
|
|
110
|
+
```sh
|
|
111
|
+
python -m pytest
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## Publishing From GitHub
|
|
115
|
+
|
|
116
|
+
The workflow `.github/workflows/python-publish.yml` builds, checks, tests, and
|
|
117
|
+
publishes the Python package to PyPI with trusted publishing. Configure:
|
|
118
|
+
|
|
119
|
+
- PyPI project: `rd2d`.
|
|
120
|
+
- Trusted publisher owner: `rdpackages`.
|
|
121
|
+
- Trusted publisher repository: `rd2d`.
|
|
122
|
+
- Trusted publisher workflow: `python-publish.yml`.
|
|
123
|
+
- Trusted publisher environment: `pypi`.
|
|
124
|
+
|
|
125
|
+
In GitHub, create an environment named `pypi`. Add required reviewers there if
|
|
126
|
+
you want each PyPI upload to require manual approval. Publish by creating a
|
|
127
|
+
GitHub Release or by running the `Publish Python package` workflow manually.
|
rd2d-0.1.0/README.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# rd2d for Python
|
|
2
|
+
|
|
3
|
+
`rd2d` provides local polynomial estimation, robust bias-corrected inference,
|
|
4
|
+
and bandwidth helpers for boundary discontinuity designs with bivariate running
|
|
5
|
+
variables. The package includes location-based and distance-based methods,
|
|
6
|
+
sharp and fuzzy designs, pointwise confidence intervals, covariance-backed
|
|
7
|
+
summary inference, uniform confidence bands, and aggregate boundary effect
|
|
8
|
+
summaries.
|
|
9
|
+
|
|
10
|
+
```python
|
|
11
|
+
from rd2d import rd2d, rdbw2d, rd2d_dist, rdbw2d_dist, summary
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
## Main Functions
|
|
15
|
+
|
|
16
|
+
- `rd2d()`: location-based estimation and inference.
|
|
17
|
+
- `rdbw2d()`: location-based bandwidth selection.
|
|
18
|
+
- `rd2d_dist()` and `rd2d_distance()`: distance-based estimation and inference.
|
|
19
|
+
- `rdbw2d_dist()` and `rdbw2d_distance()`: distance-based bandwidth selection.
|
|
20
|
+
- `summary()`: summary tables with optional uniform bands, WBATE, and LBATE.
|
|
21
|
+
|
|
22
|
+
The sibling scripts `../rd2d_illustration.py` and `../rd2d_plot.py` provide a
|
|
23
|
+
self-contained simulation, estimation, and plotting workflow. Generated files
|
|
24
|
+
are written under `../output/`.
|
|
25
|
+
|
|
26
|
+
## Installation
|
|
27
|
+
|
|
28
|
+
```sh
|
|
29
|
+
python -m pip install rd2d
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
For local development:
|
|
33
|
+
|
|
34
|
+
```sh
|
|
35
|
+
python -m pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Optional plotting and testing dependencies can be installed with:
|
|
39
|
+
|
|
40
|
+
```sh
|
|
41
|
+
python -m pip install -e ".[plots,test]"
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Basic Usage
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
import numpy as np
|
|
48
|
+
from rd2d import rd2d
|
|
49
|
+
|
|
50
|
+
rng = np.random.default_rng(123)
|
|
51
|
+
n = 800
|
|
52
|
+
x1 = rng.normal(size=n)
|
|
53
|
+
x2 = rng.normal(size=n)
|
|
54
|
+
assignment = (x1 >= 0).astype(float)
|
|
55
|
+
y = 3 + 2 * x1 + 1.5 * x2 + assignment + rng.normal(size=n)
|
|
56
|
+
x = np.column_stack([x1, x2])
|
|
57
|
+
b = np.array([[0.0, 0.0], [0.0, 1.0]])
|
|
58
|
+
|
|
59
|
+
fit = rd2d(y, x, assignment, b, h=0.9, params_cov="main")
|
|
60
|
+
fit.main
|
|
61
|
+
fit.summary(cbands="main").tables["main"]
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
For distance-based designs, pass one signed-distance column per evaluation
|
|
65
|
+
point. Nonnegative distances identify observations on the treated side.
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from rd2d import rd2d_dist
|
|
69
|
+
|
|
70
|
+
distance = x1.reshape(-1, 1)
|
|
71
|
+
fit_dist = rd2d_dist(y, distance, h=0.5, b=np.array([[0.0, 0.0]]))
|
|
72
|
+
fit_dist.main
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## Development
|
|
76
|
+
|
|
77
|
+
From this directory:
|
|
78
|
+
|
|
79
|
+
```sh
|
|
80
|
+
python -m pytest
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
## Publishing From GitHub
|
|
84
|
+
|
|
85
|
+
The workflow `.github/workflows/python-publish.yml` builds, checks, tests, and
|
|
86
|
+
publishes the Python package to PyPI with trusted publishing. Configure:
|
|
87
|
+
|
|
88
|
+
- PyPI project: `rd2d`.
|
|
89
|
+
- Trusted publisher owner: `rdpackages`.
|
|
90
|
+
- Trusted publisher repository: `rd2d`.
|
|
91
|
+
- Trusted publisher workflow: `python-publish.yml`.
|
|
92
|
+
- Trusted publisher environment: `pypi`.
|
|
93
|
+
|
|
94
|
+
In GitHub, create an environment named `pypi`. Add required reviewers there if
|
|
95
|
+
you want each PyPI upload to require manual approval. Publish by creating a
|
|
96
|
+
GitHub Release or by running the `Publish Python package` workflow manually.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=77", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "rd2d"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Local polynomial methods for boundary discontinuity designs"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = "GPL-3.0-only"
|
|
12
|
+
license-files = ["LICENSE.md"]
|
|
13
|
+
authors = [
|
|
14
|
+
{ name = "Matias D. Cattaneo", email = "matias.d.cattaneo@gmail.com" },
|
|
15
|
+
{ name = "Rocio Titiunik" },
|
|
16
|
+
{ name = "Ruiqi Rae Yu" }
|
|
17
|
+
]
|
|
18
|
+
keywords = [
|
|
19
|
+
"boundary discontinuity",
|
|
20
|
+
"regression discontinuity",
|
|
21
|
+
"causal inference",
|
|
22
|
+
"local polynomial",
|
|
23
|
+
"bandwidth selection"
|
|
24
|
+
]
|
|
25
|
+
classifiers = [
|
|
26
|
+
"Development Status :: 3 - Alpha",
|
|
27
|
+
"Intended Audience :: Science/Research",
|
|
28
|
+
"Programming Language :: Python :: 3",
|
|
29
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
30
|
+
"Topic :: Scientific/Engineering",
|
|
31
|
+
"Topic :: Scientific/Engineering :: Mathematics",
|
|
32
|
+
]
|
|
33
|
+
dependencies = [
|
|
34
|
+
"numpy",
|
|
35
|
+
"pandas",
|
|
36
|
+
"scipy"
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
plots = ["matplotlib"]
|
|
41
|
+
replication = ["rdrobust"]
|
|
42
|
+
test = ["pytest"]
|
|
43
|
+
|
|
44
|
+
[project.urls]
|
|
45
|
+
Homepage = "https://rdpackages.github.io/"
|
|
46
|
+
Repository = "https://github.com/rdpackages/rd2d"
|
|
47
|
+
Issues = "https://github.com/rdpackages/rd2d/issues"
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["."]
|
|
51
|
+
include = ["rd2d*"]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Local polynomial methods for boundary discontinuity designs."""
|
|
2
|
+
|
|
3
|
+
from .distance import rdbw2d_distance, rdbw2d_dist, rd2d_distance, rd2d_dist
|
|
4
|
+
from .location import rdbw2d, rd2d
|
|
5
|
+
from .results import RD2DResult, SummaryResult, summary
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"RD2DResult",
|
|
9
|
+
"SummaryResult",
|
|
10
|
+
"rdbw2d",
|
|
11
|
+
"rd2d",
|
|
12
|
+
"rdbw2d_dist",
|
|
13
|
+
"rdbw2d_distance",
|
|
14
|
+
"rd2d_dist",
|
|
15
|
+
"rd2d_distance",
|
|
16
|
+
"summary",
|
|
17
|
+
]
|
|
@@ -0,0 +1,272 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import math
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
import pandas as pd
|
|
9
|
+
from scipy import stats
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def as_1d(x, name: str) -> np.ndarray:
|
|
13
|
+
arr = np.asarray(x, dtype=float)
|
|
14
|
+
if arr.ndim != 1:
|
|
15
|
+
arr = np.ravel(arr)
|
|
16
|
+
if arr.size == 0:
|
|
17
|
+
raise ValueError(f"{name} must not be empty.")
|
|
18
|
+
return arr
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def as_2d(x, name: str, ncol: int | None = None) -> np.ndarray:
|
|
22
|
+
arr = np.asarray(x, dtype=float)
|
|
23
|
+
if arr.ndim == 1:
|
|
24
|
+
arr = arr.reshape(-1, 1)
|
|
25
|
+
if arr.ndim != 2:
|
|
26
|
+
raise ValueError(f"{name} must be a two-dimensional array.")
|
|
27
|
+
if ncol is not None and arr.shape[1] != ncol:
|
|
28
|
+
raise ValueError(f"{name} must have exactly {ncol} columns.")
|
|
29
|
+
return arr
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def check_lengths(y: np.ndarray, *arrays: np.ndarray) -> None:
|
|
33
|
+
n = len(y)
|
|
34
|
+
for arr in arrays:
|
|
35
|
+
if len(arr) != n:
|
|
36
|
+
raise ValueError("Input vectors and rows of matrix inputs must have the same length.")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def complete_cases(*arrays: np.ndarray) -> np.ndarray:
|
|
40
|
+
mask = np.ones(len(arrays[0]), dtype=bool)
|
|
41
|
+
for arr in arrays:
|
|
42
|
+
arr = np.asarray(arr)
|
|
43
|
+
try:
|
|
44
|
+
finite = np.isfinite(arr.astype(float))
|
|
45
|
+
except (TypeError, ValueError):
|
|
46
|
+
finite = ~pd.isna(arr)
|
|
47
|
+
if arr.ndim == 1:
|
|
48
|
+
mask &= finite
|
|
49
|
+
else:
|
|
50
|
+
mask &= np.all(finite, axis=1)
|
|
51
|
+
return mask
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def normalize_binary(x, name: str) -> np.ndarray:
|
|
55
|
+
arr = np.asarray(x)
|
|
56
|
+
if arr.dtype == bool:
|
|
57
|
+
return arr.astype(bool)
|
|
58
|
+
vals = np.unique(arr[np.isfinite(arr.astype(float))].astype(float))
|
|
59
|
+
if not set(vals.tolist()).issubset({0.0, 1.0}):
|
|
60
|
+
raise ValueError(f"{name} must be logical or contain only 0 and 1.")
|
|
61
|
+
return arr.astype(float).astype(bool)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def validate_order(value, name: str) -> int:
|
|
65
|
+
if value is None:
|
|
66
|
+
raise ValueError(f"{name} must not be None.")
|
|
67
|
+
value = float(value)
|
|
68
|
+
if not np.isfinite(value) or value < 0 or abs(value - round(value)) > np.sqrt(np.finfo(float).eps):
|
|
69
|
+
raise ValueError(f"{name} must be a nonnegative integer.")
|
|
70
|
+
return int(round(value))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def validate_deriv(deriv: Iterable[float], p: int) -> tuple[int, int]:
|
|
74
|
+
arr = np.asarray(tuple(deriv), dtype=float)
|
|
75
|
+
if arr.shape != (2,) or np.any(~np.isfinite(arr)) or np.any(arr < 0):
|
|
76
|
+
raise ValueError("deriv must be a nonnegative integer vector of length 2.")
|
|
77
|
+
if np.any(np.abs(arr - np.round(arr)) > np.sqrt(np.finfo(float).eps)):
|
|
78
|
+
raise ValueError("deriv must be a nonnegative integer vector of length 2.")
|
|
79
|
+
out = tuple(int(v) for v in np.round(arr))
|
|
80
|
+
if sum(out) > p:
|
|
81
|
+
raise ValueError("sum(deriv) must be less than or equal to p.")
|
|
82
|
+
return out
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def kernel_weights(u: np.ndarray, kernel: str) -> np.ndarray:
|
|
86
|
+
kernel = kernel.lower()
|
|
87
|
+
if kernel in {"tri", "triangular"}:
|
|
88
|
+
return np.maximum(1.0 - np.abs(u), 0.0) * (np.abs(u) <= 1.0)
|
|
89
|
+
if kernel in {"epa", "epanechnikov"}:
|
|
90
|
+
return 0.75 * (1.0 - u**2) * (np.abs(u) <= 1.0)
|
|
91
|
+
if kernel in {"uni", "uniform"}:
|
|
92
|
+
return 0.5 * (np.abs(u) <= 1.0)
|
|
93
|
+
if kernel in {"gau", "gaussian"}:
|
|
94
|
+
return stats.norm.pdf(u)
|
|
95
|
+
raise ValueError("kernel must be one of tri, epa, uni, or gau.")
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def multi_indices_2d(p: int) -> list[tuple[int, int]]:
|
|
99
|
+
out: list[tuple[int, int]] = []
|
|
100
|
+
for deg in range(p + 1):
|
|
101
|
+
for ypow in range(deg + 1):
|
|
102
|
+
xpow = deg - ypow
|
|
103
|
+
out.append((xpow, ypow))
|
|
104
|
+
return out
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def basis_2d(centered: np.ndarray, p: int) -> np.ndarray:
|
|
108
|
+
idx = multi_indices_2d(p)
|
|
109
|
+
x1 = centered[:, 0]
|
|
110
|
+
x2 = centered[:, 1]
|
|
111
|
+
return np.column_stack([(x1**a) * (x2**b) for a, b in idx])
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def basis_1d(distance: np.ndarray, p: int) -> np.ndarray:
|
|
115
|
+
return np.column_stack([distance**j for j in range(p + 1)])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def target_2d(p: int, deriv: tuple[int, int], tangvec: np.ndarray | None, row: int) -> np.ndarray:
|
|
119
|
+
target = np.zeros(len(multi_indices_2d(p)))
|
|
120
|
+
if tangvec is not None:
|
|
121
|
+
if p < 1:
|
|
122
|
+
raise ValueError("tangvec requires p >= 1.")
|
|
123
|
+
idx = multi_indices_2d(p)
|
|
124
|
+
target[idx.index((1, 0))] = tangvec[row, 0]
|
|
125
|
+
target[idx.index((0, 1))] = tangvec[row, 1]
|
|
126
|
+
return target
|
|
127
|
+
if deriv in multi_indices_2d(p):
|
|
128
|
+
pos = multi_indices_2d(p).index(deriv)
|
|
129
|
+
target[pos] = float(math.factorial(deriv[0]) * math.factorial(deriv[1]))
|
|
130
|
+
return target
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def target_1d(p: int, deriv: int = 0) -> np.ndarray:
|
|
134
|
+
if deriv > p:
|
|
135
|
+
raise ValueError("deriv must be less than or equal to p.")
|
|
136
|
+
target = np.zeros(p + 1)
|
|
137
|
+
target[deriv] = float(math.factorial(deriv))
|
|
138
|
+
return target
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def weighted_pinv_design(design: np.ndarray, weights: np.ndarray) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
|
|
142
|
+
keep = weights > 0
|
|
143
|
+
X = design[keep, :]
|
|
144
|
+
w = weights[keep]
|
|
145
|
+
if X.shape[0] == 0:
|
|
146
|
+
raise ValueError("No observations inside the bandwidth.")
|
|
147
|
+
WX = X * w[:, None]
|
|
148
|
+
gram = X.T @ WX
|
|
149
|
+
inv_gram = np.linalg.pinv(gram, rcond=1e-12)
|
|
150
|
+
return keep, X, inv_gram
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
@dataclass
|
|
154
|
+
class LocalFit:
|
|
155
|
+
estimate: np.ndarray
|
|
156
|
+
se: np.ndarray
|
|
157
|
+
influence: np.ndarray
|
|
158
|
+
n_eff: int
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def local_fit_targets(
|
|
162
|
+
design_full: np.ndarray,
|
|
163
|
+
weights_full: np.ndarray,
|
|
164
|
+
outcomes_full: np.ndarray,
|
|
165
|
+
target: np.ndarray,
|
|
166
|
+
*,
|
|
167
|
+
vce: str = "hc1",
|
|
168
|
+
cluster: np.ndarray | None = None,
|
|
169
|
+
) -> LocalFit:
|
|
170
|
+
outcomes_full = np.asarray(outcomes_full, dtype=float)
|
|
171
|
+
if outcomes_full.ndim == 1:
|
|
172
|
+
outcomes_full = outcomes_full.reshape(-1, 1)
|
|
173
|
+
|
|
174
|
+
keep, X, inv_gram = weighted_pinv_design(design_full, weights_full)
|
|
175
|
+
w = weights_full[keep]
|
|
176
|
+
Y = outcomes_full[keep, :]
|
|
177
|
+
beta = inv_gram @ (X.T @ (w[:, None] * Y))
|
|
178
|
+
fitted = X @ beta
|
|
179
|
+
resid = Y - fitted
|
|
180
|
+
n_eff = X.shape[0]
|
|
181
|
+
k = X.shape[1]
|
|
182
|
+
|
|
183
|
+
leverage = np.sum((X @ inv_gram) * X, axis=1) * w
|
|
184
|
+
adj = np.ones(n_eff)
|
|
185
|
+
vce = vce.lower()
|
|
186
|
+
if vce == "hc2":
|
|
187
|
+
adj = 1.0 / np.maximum(1.0 - leverage, 1e-8)
|
|
188
|
+
elif vce == "hc3":
|
|
189
|
+
adj = 1.0 / np.maximum(1.0 - leverage, 1e-8) ** 2
|
|
190
|
+
|
|
191
|
+
# Influence contribution for the requested linear functional.
|
|
192
|
+
row = target @ inv_gram
|
|
193
|
+
score_base = (X * w[:, None]) @ row
|
|
194
|
+
infl_kept = score_base[:, None] * resid * np.sqrt(adj)[:, None]
|
|
195
|
+
|
|
196
|
+
scale = 1.0
|
|
197
|
+
if vce == "hc1" and n_eff > k:
|
|
198
|
+
scale = n_eff / (n_eff - k)
|
|
199
|
+
|
|
200
|
+
if cluster is not None:
|
|
201
|
+
cluster_kept = np.asarray(cluster)[keep]
|
|
202
|
+
groups = pd.unique(cluster_kept)
|
|
203
|
+
summed = np.zeros((len(groups), outcomes_full.shape[1]))
|
|
204
|
+
for i, group in enumerate(groups):
|
|
205
|
+
summed[i, :] = np.sum(infl_kept[cluster_kept == group, :], axis=0)
|
|
206
|
+
if vce == "hc1" and len(groups) > 1 and n_eff > k:
|
|
207
|
+
scale = (len(groups) / (len(groups) - 1.0)) * ((n_eff - 1.0) / (n_eff - k))
|
|
208
|
+
cov = scale * (summed.T @ summed)
|
|
209
|
+
infl_source = summed
|
|
210
|
+
else:
|
|
211
|
+
cov = scale * (infl_kept.T @ infl_kept)
|
|
212
|
+
infl_source = np.sqrt(scale) * infl_kept
|
|
213
|
+
|
|
214
|
+
estimate = target @ beta
|
|
215
|
+
se = np.sqrt(np.maximum(np.diag(cov), 0.0))
|
|
216
|
+
|
|
217
|
+
infl_full = np.zeros((design_full.shape[0], outcomes_full.shape[1]))
|
|
218
|
+
if cluster is None:
|
|
219
|
+
infl_full[keep, :] = infl_source
|
|
220
|
+
else:
|
|
221
|
+
# For cross-evaluation covariance, keep cluster-level sums in rows
|
|
222
|
+
# matching the first occurrence of each cluster. This preserves sums
|
|
223
|
+
# without needing a second representation.
|
|
224
|
+
cluster_all = np.asarray(cluster)
|
|
225
|
+
groups = pd.unique(cluster_all)
|
|
226
|
+
infl_full = np.zeros((len(groups), outcomes_full.shape[1]))
|
|
227
|
+
kept_groups = pd.unique(np.asarray(cluster)[keep])
|
|
228
|
+
group_to_row = {g: i for i, g in enumerate(groups)}
|
|
229
|
+
for j, g in enumerate(kept_groups):
|
|
230
|
+
infl_full[group_to_row[g], :] = infl_source[j, :]
|
|
231
|
+
|
|
232
|
+
return LocalFit(estimate=np.asarray(estimate), se=se, influence=infl_full, n_eff=int(n_eff))
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def ci_columns(est: np.ndarray, se: np.ndarray, level: float, side: str) -> tuple[np.ndarray, np.ndarray]:
|
|
236
|
+
if side == "two":
|
|
237
|
+
cval = stats.norm.ppf((level + 100.0) / 200.0)
|
|
238
|
+
return est - cval * se, est + cval * se
|
|
239
|
+
cval = stats.norm.ppf(level / 100.0)
|
|
240
|
+
if side == "left":
|
|
241
|
+
return np.repeat(-np.inf, len(est)), est + cval * se
|
|
242
|
+
if side == "right":
|
|
243
|
+
return est - cval * se, np.repeat(np.inf, len(est))
|
|
244
|
+
raise ValueError("side must be two, left, or right.")
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
def p_values(tvalues: np.ndarray) -> np.ndarray:
|
|
248
|
+
return 2.0 * stats.norm.sf(np.abs(tvalues))
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def bandwidth_floor(values: np.ndarray, bwcheck: int | None) -> float:
|
|
252
|
+
values = np.sort(np.asarray(values, dtype=float)[np.isfinite(values)])
|
|
253
|
+
if values.size == 0:
|
|
254
|
+
return np.nan
|
|
255
|
+
if bwcheck is None:
|
|
256
|
+
return 0.0
|
|
257
|
+
k = min(max(int(bwcheck), 1), values.size)
|
|
258
|
+
return float(values[k - 1])
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def cer_factor(n: int, p: int) -> float:
|
|
262
|
+
n = max(int(n), 1)
|
|
263
|
+
return n ** (1.0 / (2.0 * p + 4.0) - 1.0 / (p + 4.0))
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def ensure_dataframe(x: np.ndarray, columns: list[str]) -> pd.DataFrame:
|
|
267
|
+
return pd.DataFrame(x, columns=columns)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def covariance_from_influence(influence: np.ndarray) -> np.ndarray:
|
|
271
|
+
influence = np.asarray(influence, dtype=float)
|
|
272
|
+
return influence @ influence.T
|