expdpy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expdpy/__init__.py +147 -0
- expdpy/_assets/favicon.png +0 -0
- expdpy/_assets/favicon.svg +12 -0
- expdpy/_assets/logo-navbar.svg +14 -0
- expdpy/_assets/logo.png +0 -0
- expdpy/_assets/logo.svg +14 -0
- expdpy/_corr.py +80 -0
- expdpy/_estimation/__init__.py +38 -0
- expdpy/_estimation/_capture.py +26 -0
- expdpy/_estimation/_fit.py +56 -0
- expdpy/_estimation/_formula.py +50 -0
- expdpy/_estimation/_results.py +30 -0
- expdpy/_estimation/_spec.py +112 -0
- expdpy/_estimation/_tidy.py +35 -0
- expdpy/_estimation/_vcov.py +52 -0
- expdpy/_theme.py +201 -0
- expdpy/_types.py +505 -0
- expdpy/_validation.py +43 -0
- expdpy/app/__init__.py +766 -0
- expdpy/app/_components.py +282 -0
- expdpy/app/_config_io.py +63 -0
- expdpy/app/_export_nb.py +234 -0
- expdpy/app/_sample.py +124 -0
- expdpy/app/_state.py +102 -0
- expdpy/app/_udv.py +179 -0
- expdpy/app/_upload.py +43 -0
- expdpy/app/_varcat.py +100 -0
- expdpy/by_group.py +313 -0
- expdpy/coefplot.py +236 -0
- expdpy/correlation.py +152 -0
- expdpy/data/__init__.py +103 -0
- expdpy/data/expdpy_config_kuznets.json +54 -0
- expdpy/data/gapminder.parquet +0 -0
- expdpy/data/gapminder_data_def.parquet +0 -0
- expdpy/data/kuznets.parquet +0 -0
- expdpy/data/kuznets_data_def.parquet +0 -0
- expdpy/data/staggered_did.parquet +0 -0
- expdpy/data/staggered_did_data_def.parquet +0 -0
- expdpy/did.py +405 -0
- expdpy/distributions.py +130 -0
- expdpy/estimation.py +282 -0
- expdpy/fwl.py +284 -0
- expdpy/inference.py +92 -0
- expdpy/missing.py +117 -0
- expdpy/outliers.py +180 -0
- expdpy/panel_models.py +244 -0
- expdpy/pedagogy/__init__.py +43 -0
- expdpy/pedagogy/_format.py +88 -0
- expdpy/pedagogy/_interpret.py +355 -0
- expdpy/pedagogy/_mixin.py +44 -0
- expdpy/pedagogy/_registry.py +121 -0
- expdpy/pedagogy/_text/__init__.py +11 -0
- expdpy/pedagogy/_text/causal.py +65 -0
- expdpy/pedagogy/_text/correlation.py +77 -0
- expdpy/pedagogy/_text/outliers.py +53 -0
- expdpy/pedagogy/_text/regression.py +257 -0
- expdpy/pedagogy/_text/tables.py +51 -0
- expdpy/postestimation.py +202 -0
- expdpy/py.typed +0 -0
- expdpy/regression.py +201 -0
- expdpy/sandbox.py +307 -0
- expdpy/scatter.py +207 -0
- expdpy/streamlit_app/__init__.py +106 -0
- expdpy/streamlit_app/_context.py +99 -0
- expdpy/streamlit_app/_entry.py +57 -0
- expdpy/streamlit_app/_handoff.py +149 -0
- expdpy/streamlit_app/_launcher.py +103 -0
- expdpy/streamlit_app/_pages.py +424 -0
- expdpy/streamlit_app/_pipeline.py +99 -0
- expdpy/streamlit_app/_render.py +221 -0
- expdpy/streamlit_app/_run.py +9 -0
- expdpy/streamlit_app/_sidebar.py +258 -0
- expdpy/streamlit_app/_widgets.py +95 -0
- expdpy/tables.py +348 -0
- expdpy/trends.py +263 -0
- expdpy-0.2.0.dist-info/METADATA +203 -0
- expdpy-0.2.0.dist-info/RECORD +80 -0
- expdpy-0.2.0.dist-info/WHEEL +4 -0
- expdpy-0.2.0.dist-info/entry_points.txt +2 -0
- expdpy-0.2.0.dist-info/licenses/LICENSE +25 -0
expdpy/__init__.py
ADDED
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""expdpy — Explore your panel data interactively.
|
|
2
|
+
|
|
3
|
+
A Python port of the ExPanDaR R package (Joachim Gassen, TRR 266). Provides a set
|
|
4
|
+
of analytical functions for exploratory analysis of panel and cross-sectional data
|
|
5
|
+
(descriptive tables, correlations, time trends, scatter plots, regression tables)
|
|
6
|
+
returning interactive Plotly figures and Great Tables / pyfixest output, plus the
|
|
7
|
+
``ExPdPy`` interactive app (Shiny for Python).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from expdpy._types import (
|
|
13
|
+
BarChartResult,
|
|
14
|
+
ByGroupBarGraphResult,
|
|
15
|
+
ByGroupTrendGraphResult,
|
|
16
|
+
CoefficientPlotResult,
|
|
17
|
+
CorrelationGraphResult,
|
|
18
|
+
CorrelationTableResult,
|
|
19
|
+
DescriptiveTableResult,
|
|
20
|
+
EstimationResult,
|
|
21
|
+
EventStudyResult,
|
|
22
|
+
ExtObsTableResult,
|
|
23
|
+
FixefPlotResult,
|
|
24
|
+
FWLPlotResult,
|
|
25
|
+
HausmanTestResult,
|
|
26
|
+
HistogramResult,
|
|
27
|
+
JointTestResult,
|
|
28
|
+
PanelViewResult,
|
|
29
|
+
PredictionResult,
|
|
30
|
+
QuantileTrendGraphResult,
|
|
31
|
+
RegressionTableResult,
|
|
32
|
+
RobustInferenceResult,
|
|
33
|
+
SandboxResult,
|
|
34
|
+
TrendGraphResult,
|
|
35
|
+
)
|
|
36
|
+
from expdpy.by_group import (
|
|
37
|
+
prepare_by_group_bar_graph,
|
|
38
|
+
prepare_by_group_trend_graph,
|
|
39
|
+
prepare_by_group_violin_graph,
|
|
40
|
+
)
|
|
41
|
+
from expdpy.coefplot import prepare_coefficient_plot
|
|
42
|
+
from expdpy.correlation import prepare_correlation_graph
|
|
43
|
+
from expdpy.did import prepare_event_study, prepare_panel_view
|
|
44
|
+
from expdpy.distributions import prepare_bar_chart, prepare_histogram
|
|
45
|
+
from expdpy.estimation import prepare_estimation
|
|
46
|
+
from expdpy.fwl import prepare_fwl_plot
|
|
47
|
+
from expdpy.inference import prepare_robust_inference
|
|
48
|
+
from expdpy.missing import prepare_missing_values_graph
|
|
49
|
+
from expdpy.outliers import treat_outliers
|
|
50
|
+
from expdpy.panel_models import prepare_hausman_test, prepare_panel_table
|
|
51
|
+
from expdpy.pedagogy import Explainer, explain, list_topics
|
|
52
|
+
from expdpy.postestimation import (
|
|
53
|
+
prepare_fixef_plot,
|
|
54
|
+
prepare_joint_test,
|
|
55
|
+
prepare_predictions,
|
|
56
|
+
)
|
|
57
|
+
from expdpy.regression import prepare_regression_table
|
|
58
|
+
from expdpy.sandbox import (
|
|
59
|
+
sandbox_clustering_se,
|
|
60
|
+
sandbox_omitted_variable_bias,
|
|
61
|
+
sandbox_pooled_vs_fixed_effects,
|
|
62
|
+
)
|
|
63
|
+
from expdpy.scatter import prepare_scatter_plot
|
|
64
|
+
from expdpy.tables import (
|
|
65
|
+
prepare_correlation_table,
|
|
66
|
+
prepare_descriptive_table,
|
|
67
|
+
prepare_ext_obs_table,
|
|
68
|
+
)
|
|
69
|
+
from expdpy.trends import prepare_quantile_trend_graph, prepare_trend_graph
|
|
70
|
+
|
|
71
|
+
__version__ = "0.2.0"
|
|
72
|
+
|
|
73
|
+
__all__ = [
|
|
74
|
+
# outliers
|
|
75
|
+
"treat_outliers",
|
|
76
|
+
# tables
|
|
77
|
+
"prepare_descriptive_table",
|
|
78
|
+
"prepare_correlation_table",
|
|
79
|
+
"prepare_ext_obs_table",
|
|
80
|
+
# correlation graph
|
|
81
|
+
"prepare_correlation_graph",
|
|
82
|
+
# trends
|
|
83
|
+
"prepare_trend_graph",
|
|
84
|
+
"prepare_quantile_trend_graph",
|
|
85
|
+
# by group
|
|
86
|
+
"prepare_by_group_bar_graph",
|
|
87
|
+
"prepare_by_group_trend_graph",
|
|
88
|
+
"prepare_by_group_violin_graph",
|
|
89
|
+
# distributions
|
|
90
|
+
"prepare_histogram",
|
|
91
|
+
"prepare_bar_chart",
|
|
92
|
+
# missing
|
|
93
|
+
"prepare_missing_values_graph",
|
|
94
|
+
# scatter
|
|
95
|
+
"prepare_scatter_plot",
|
|
96
|
+
# regression
|
|
97
|
+
"prepare_regression_table",
|
|
98
|
+
# estimation (IV / Poisson / GLM / model comparison)
|
|
99
|
+
"prepare_estimation",
|
|
100
|
+
# post-estimation
|
|
101
|
+
"prepare_fixef_plot",
|
|
102
|
+
"prepare_predictions",
|
|
103
|
+
"prepare_joint_test",
|
|
104
|
+
# robust inference
|
|
105
|
+
"prepare_robust_inference",
|
|
106
|
+
# fwl plot
|
|
107
|
+
"prepare_fwl_plot",
|
|
108
|
+
# coefficient plot
|
|
109
|
+
"prepare_coefficient_plot",
|
|
110
|
+
# event study / staggered DiD
|
|
111
|
+
"prepare_event_study",
|
|
112
|
+
"prepare_panel_view",
|
|
113
|
+
# concept sandboxes
|
|
114
|
+
"sandbox_omitted_variable_bias",
|
|
115
|
+
"sandbox_pooled_vs_fixed_effects",
|
|
116
|
+
"sandbox_clustering_se",
|
|
117
|
+
# panel models (linearmodels)
|
|
118
|
+
"prepare_panel_table",
|
|
119
|
+
"prepare_hausman_test",
|
|
120
|
+
# pedagogy
|
|
121
|
+
"explain",
|
|
122
|
+
"list_topics",
|
|
123
|
+
"Explainer",
|
|
124
|
+
# result types
|
|
125
|
+
"DescriptiveTableResult",
|
|
126
|
+
"CorrelationTableResult",
|
|
127
|
+
"CorrelationGraphResult",
|
|
128
|
+
"ExtObsTableResult",
|
|
129
|
+
"TrendGraphResult",
|
|
130
|
+
"QuantileTrendGraphResult",
|
|
131
|
+
"ByGroupBarGraphResult",
|
|
132
|
+
"ByGroupTrendGraphResult",
|
|
133
|
+
"HistogramResult",
|
|
134
|
+
"BarChartResult",
|
|
135
|
+
"RegressionTableResult",
|
|
136
|
+
"FWLPlotResult",
|
|
137
|
+
"CoefficientPlotResult",
|
|
138
|
+
"EstimationResult",
|
|
139
|
+
"FixefPlotResult",
|
|
140
|
+
"PredictionResult",
|
|
141
|
+
"JointTestResult",
|
|
142
|
+
"RobustInferenceResult",
|
|
143
|
+
"EventStudyResult",
|
|
144
|
+
"PanelViewResult",
|
|
145
|
+
"SandboxResult",
|
|
146
|
+
"HausmanTestResult",
|
|
147
|
+
]
|
|
Binary file
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy">
|
|
2
|
+
<title>expdpy</title>
|
|
3
|
+
<!-- Solid blue tile so the mark reads at favicon sizes -->
|
|
4
|
+
<rect x="0" y="0" width="64" height="64" rx="14" fill="#1f77b4"/>
|
|
5
|
+
<!-- Same N-shaped Kuznets curve, white, fitted to the tile -->
|
|
6
|
+
<path d="M12 49 C17 36 22 16 27 13 C32 10 38 42 42 44 C46 46 51 21 56 12"
|
|
7
|
+
fill="none" stroke="#ffffff" stroke-width="5"
|
|
8
|
+
stroke-linecap="round" stroke-linejoin="round"/>
|
|
9
|
+
<circle cx="27" cy="13" r="3" fill="#ffffff"/>
|
|
10
|
+
<circle cx="42" cy="44" r="3" fill="#ffffff"/>
|
|
11
|
+
<circle cx="56" cy="12" r="3" fill="#ffffff"/>
|
|
12
|
+
</svg>
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy logo">
|
|
2
|
+
<title>expdpy</title>
|
|
3
|
+
<!-- Faint axis (baseline + left axis) — translucent white for chart context on a blue navbar -->
|
|
4
|
+
<path d="M12 8 V54 H56" fill="none" stroke="#ffffff" stroke-opacity="0.5" stroke-width="2.5"
|
|
5
|
+
stroke-linecap="round" stroke-linejoin="round"/>
|
|
6
|
+
<!-- N-shaped Kuznets curve: rise, fall, rise -->
|
|
7
|
+
<path d="M14 48 C19 36 23 18 27 15 C32 12 37 41 41 43 C45 45 50 22 54 13"
|
|
8
|
+
fill="none" stroke="#ffffff" stroke-width="4"
|
|
9
|
+
stroke-linecap="round" stroke-linejoin="round"/>
|
|
10
|
+
<!-- Data points on the turning peaks/trough -->
|
|
11
|
+
<circle cx="27" cy="15" r="2.6" fill="#ffffff"/>
|
|
12
|
+
<circle cx="41" cy="43" r="2.6" fill="#ffffff"/>
|
|
13
|
+
<circle cx="54" cy="13" r="2.6" fill="#ffffff"/>
|
|
14
|
+
</svg>
|
expdpy/_assets/logo.png
ADDED
|
Binary file
|
expdpy/_assets/logo.svg
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy logo">
|
|
2
|
+
<title>expdpy</title>
|
|
3
|
+
<!-- Faint axis (baseline + left axis) for chart context -->
|
|
4
|
+
<path d="M12 8 V54 H56" fill="none" stroke="#cfe0f1" stroke-width="2.5"
|
|
5
|
+
stroke-linecap="round" stroke-linejoin="round"/>
|
|
6
|
+
<!-- N-shaped Kuznets curve: rise, fall, rise -->
|
|
7
|
+
<path d="M14 48 C19 36 23 18 27 15 C32 12 37 41 41 43 C45 45 50 22 54 13"
|
|
8
|
+
fill="none" stroke="#1f77b4" stroke-width="4"
|
|
9
|
+
stroke-linecap="round" stroke-linejoin="round"/>
|
|
10
|
+
<!-- Data points on the turning peaks/trough -->
|
|
11
|
+
<circle cx="27" cy="15" r="2.6" fill="#1f77b4"/>
|
|
12
|
+
<circle cx="41" cy="43" r="2.6" fill="#1f77b4"/>
|
|
13
|
+
<circle cx="54" cy="13" r="2.6" fill="#1f77b4"/>
|
|
14
|
+
</svg>
|
expdpy/_corr.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""Pairwise correlation engine shared by the correlation table and graph.
|
|
2
|
+
|
|
3
|
+
Faithful port of ExPanDaR's internal ``cor_mat()`` helper: for every pair of columns it
|
|
4
|
+
computes the correlation, two-sided p-value and the number of *pairwise* complete
|
|
5
|
+
observations (rows finite in both columns), placing Pearson or Spearman results depending
|
|
6
|
+
on ``method``. p-values use the asymptotic approximation (R's ``cor.test(..., exact = FALSE)``).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import warnings
|
|
12
|
+
from dataclasses import dataclass
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from scipy import stats
|
|
17
|
+
|
|
18
|
+
__all__ = ["CorMat", "cor_mat"]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class CorMat:
|
|
23
|
+
"""Square correlation/p-value/observation-count matrices (aligned by column name)."""
|
|
24
|
+
|
|
25
|
+
r: pd.DataFrame
|
|
26
|
+
p: pd.DataFrame
|
|
27
|
+
n: pd.DataFrame
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def cor_mat(df: pd.DataFrame, method: str) -> CorMat:
|
|
31
|
+
"""Compute a pairwise correlation matrix.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
df
|
|
36
|
+
Data frame of numeric/logical columns.
|
|
37
|
+
method
|
|
38
|
+
``"pearson"`` or ``"spearman"``.
|
|
39
|
+
|
|
40
|
+
Returns
|
|
41
|
+
-------
|
|
42
|
+
CorMat
|
|
43
|
+
``r`` (coefficients, diagonal 1.0), ``p`` (p-values, diagonal 0.0) and ``n``
|
|
44
|
+
(pairwise observation counts, diagonal = non-missing count per column).
|
|
45
|
+
"""
|
|
46
|
+
if method not in ("pearson", "spearman"):
|
|
47
|
+
raise ValueError("method must be 'pearson' or 'spearman'")
|
|
48
|
+
|
|
49
|
+
cols = list(df.columns)
|
|
50
|
+
mat = df.to_numpy(dtype=float)
|
|
51
|
+
n_cols = mat.shape[1]
|
|
52
|
+
|
|
53
|
+
r = np.full((n_cols, n_cols), np.nan)
|
|
54
|
+
p = np.full((n_cols, n_cols), np.nan)
|
|
55
|
+
n = np.full((n_cols, n_cols), np.nan)
|
|
56
|
+
|
|
57
|
+
finite = np.isfinite(mat)
|
|
58
|
+
np.fill_diagonal(r, 1.0)
|
|
59
|
+
np.fill_diagonal(p, 0.0)
|
|
60
|
+
for k in range(n_cols):
|
|
61
|
+
n[k, k] = int(finite[:, k].sum())
|
|
62
|
+
|
|
63
|
+
corr_fn = stats.pearsonr if method == "pearson" else stats.spearmanr
|
|
64
|
+
for i in range(n_cols - 1):
|
|
65
|
+
for j in range(i + 1, n_cols):
|
|
66
|
+
mask = finite[:, i] & finite[:, j]
|
|
67
|
+
count = int(mask.sum())
|
|
68
|
+
n[i, j] = n[j, i] = count
|
|
69
|
+
if count > 2:
|
|
70
|
+
with warnings.catch_warnings():
|
|
71
|
+
warnings.simplefilter("ignore")
|
|
72
|
+
coef, pval = corr_fn(mat[mask, i], mat[mask, j])
|
|
73
|
+
r[i, j] = r[j, i] = float(coef)
|
|
74
|
+
p[i, j] = p[j, i] = float(pval)
|
|
75
|
+
|
|
76
|
+
return CorMat(
|
|
77
|
+
r=pd.DataFrame(r, index=cols, columns=cols),
|
|
78
|
+
p=pd.DataFrame(p, index=cols, columns=cols),
|
|
79
|
+
n=pd.DataFrame(n, index=cols, columns=cols).astype("Int64"),
|
|
80
|
+
)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""Shared estimation engine for expdpy's regression-style functions.
|
|
2
|
+
|
|
3
|
+
This private package holds the building blocks that every estimator plugs into:
|
|
4
|
+
|
|
5
|
+
* :mod:`._spec` — the normalized :class:`ModelSpec` / :class:`VCovSpec` dataclasses,
|
|
6
|
+
* :mod:`._formula` — a pure pyfixest-formula builder,
|
|
7
|
+
* :mod:`._vcov` — a pure ``(vcov, vcov_kwargs)`` builder,
|
|
8
|
+
* :mod:`._fit` — dispatch to ``feols`` / ``fepois`` / ``feglm`` (+ the SSC default),
|
|
9
|
+
* :mod:`._tidy` — the tidy-coefficient-frame helper,
|
|
10
|
+
* :mod:`._capture` — a stdout-capture context manager.
|
|
11
|
+
|
|
12
|
+
``expdpy.regression`` is a thin adapter over this engine; keeping the engine separate
|
|
13
|
+
lets future estimators (IV, Poisson, GLM, model comparison) reuse one tested core.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from expdpy._estimation._capture import capture_stdout
|
|
19
|
+
from expdpy._estimation._fit import SSC, fit_model
|
|
20
|
+
from expdpy._estimation._formula import build_formula
|
|
21
|
+
from expdpy._estimation._results import coerce_models, first_model
|
|
22
|
+
from expdpy._estimation._spec import ModelSpec, VCovSpec, as_list
|
|
23
|
+
from expdpy._estimation._tidy import tidy_model
|
|
24
|
+
from expdpy._estimation._vcov import build_vcov
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"SSC",
|
|
28
|
+
"ModelSpec",
|
|
29
|
+
"VCovSpec",
|
|
30
|
+
"as_list",
|
|
31
|
+
"build_formula",
|
|
32
|
+
"build_vcov",
|
|
33
|
+
"capture_stdout",
|
|
34
|
+
"coerce_models",
|
|
35
|
+
"first_model",
|
|
36
|
+
"fit_model",
|
|
37
|
+
"tidy_model",
|
|
38
|
+
]
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""A small stdout-capture context manager (several pyfixest helpers print to stdout)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import io
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
|
|
9
|
+
__all__ = ["capture_stdout"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@contextlib.contextmanager
|
|
13
|
+
def capture_stdout() -> Iterator[io.StringIO]:
|
|
14
|
+
"""Redirect ``sys.stdout`` into a buffer for the duration of the ``with`` block.
|
|
15
|
+
|
|
16
|
+
Some pyfixest helpers (notably ``etable(type="md")``) print to stdout and return
|
|
17
|
+
``None``; this captures that text so it can be returned as a string instead.
|
|
18
|
+
|
|
19
|
+
Yields
|
|
20
|
+
------
|
|
21
|
+
io.StringIO
|
|
22
|
+
The buffer; read its contents with ``.getvalue()`` after the block.
|
|
23
|
+
"""
|
|
24
|
+
buf = io.StringIO()
|
|
25
|
+
with contextlib.redirect_stdout(buf):
|
|
26
|
+
yield buf
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
"""Fit dispatcher: build the formula + vcov and call the right pyfixest entrypoint."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
import pyfixest as pf
|
|
9
|
+
|
|
10
|
+
from expdpy._estimation._formula import build_formula
|
|
11
|
+
from expdpy._estimation._spec import ModelSpec
|
|
12
|
+
from expdpy._estimation._vcov import build_vcov
|
|
13
|
+
|
|
14
|
+
__all__ = ["SSC", "fit_model"]
|
|
15
|
+
|
|
16
|
+
# Stata 'reghdfe'-consistent small-sample correction (matches lfe::felm cmethod='reghdfe').
|
|
17
|
+
SSC = pf.ssc(k_adj=True, G_adj=True)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fit_model(data: pd.DataFrame, spec: ModelSpec, *, ssc: Any = SSC) -> Any:
|
|
21
|
+
"""Fit ``spec`` on ``data`` via the appropriate pyfixest estimator.
|
|
22
|
+
|
|
23
|
+
Dispatches OLS/IV to ``feols``, ``"poisson"`` to ``fepois`` and ``"logit"``/``"probit"``
|
|
24
|
+
to ``feglm``. The caller is responsible for column selection, NA handling and casting
|
|
25
|
+
fixed effects to ``category`` (so behavior matches the historical implementation).
|
|
26
|
+
|
|
27
|
+
Parameters
|
|
28
|
+
----------
|
|
29
|
+
data
|
|
30
|
+
The (already cleaned) estimation frame.
|
|
31
|
+
spec
|
|
32
|
+
The normalized model specification.
|
|
33
|
+
ssc
|
|
34
|
+
The small-sample-correction object (defaults to the module-level :data:`SSC`).
|
|
35
|
+
|
|
36
|
+
Returns
|
|
37
|
+
-------
|
|
38
|
+
Any
|
|
39
|
+
A fitted pyfixest model (``Feols`` / ``Fepois`` / ``Feglm``), or a ``FixestMulti``
|
|
40
|
+
when ``spec`` requests stepwise or multiple outcomes.
|
|
41
|
+
"""
|
|
42
|
+
fml = build_formula(spec)
|
|
43
|
+
vcov, vcov_kwargs = build_vcov(spec.vcov)
|
|
44
|
+
kwargs: dict[str, Any] = {"vcov": vcov, "ssc": ssc}
|
|
45
|
+
if vcov_kwargs is not None:
|
|
46
|
+
kwargs["vcov_kwargs"] = vcov_kwargs
|
|
47
|
+
if spec.weights:
|
|
48
|
+
kwargs["weights"] = spec.weights
|
|
49
|
+
|
|
50
|
+
if spec.model in ("ols", "iv"):
|
|
51
|
+
return pf.feols(fml, data=data, **kwargs)
|
|
52
|
+
if spec.model == "poisson":
|
|
53
|
+
return pf.fepois(fml, data=data, **kwargs)
|
|
54
|
+
if spec.model in ("logit", "probit"):
|
|
55
|
+
return pf.feglm(fml, data=data, family=spec.model, **kwargs)
|
|
56
|
+
raise ValueError(f"unknown model kind: {spec.model!r}") # pragma: no cover
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Pure builder of the pyfixest formula string from a :class:`ModelSpec`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from expdpy._estimation._spec import ModelSpec
|
|
6
|
+
|
|
7
|
+
__all__ = ["build_formula"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_formula(spec: ModelSpec) -> str:
|
|
11
|
+
"""Return the pyfixest formula string for ``spec``.
|
|
12
|
+
|
|
13
|
+
Handles plain OLS/GLM (``"dv ~ x1 + x2"``), fixed effects (``"| f1 + f2"``), stepwise
|
|
14
|
+
sequences (``"csw(x1, x2, x3)"``), multiple outcomes (``"y1 + y2 ~ ..."``) and the
|
|
15
|
+
instrumental-variables third part (``"| endog ~ instr"``), which pyfixest expects after
|
|
16
|
+
the fixed-effect block.
|
|
17
|
+
|
|
18
|
+
Parameters
|
|
19
|
+
----------
|
|
20
|
+
spec
|
|
21
|
+
The normalized model specification.
|
|
22
|
+
|
|
23
|
+
Returns
|
|
24
|
+
-------
|
|
25
|
+
str
|
|
26
|
+
A formula string accepted by ``pyfixest.feols`` / ``fepois`` / ``feglm``.
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> from expdpy._estimation import ModelSpec, build_formula
|
|
31
|
+
>>> build_formula(ModelSpec(dv=("y",), idvs=("x1", "x2"), feffects=("firm",)))
|
|
32
|
+
'y ~ x1 + x2 | firm'
|
|
33
|
+
"""
|
|
34
|
+
lhs = " + ".join(spec.dv)
|
|
35
|
+
if spec.stepwise and spec.idvs:
|
|
36
|
+
rhs = f"{spec.stepwise}({', '.join(spec.idvs)})"
|
|
37
|
+
elif spec.idvs:
|
|
38
|
+
rhs = " + ".join(spec.idvs)
|
|
39
|
+
else:
|
|
40
|
+
rhs = "1"
|
|
41
|
+
fml = f"{lhs} ~ {rhs}"
|
|
42
|
+
if spec.feffects:
|
|
43
|
+
fml += " | " + " + ".join(spec.feffects)
|
|
44
|
+
if spec.model == "iv":
|
|
45
|
+
if not spec.endog or not spec.instruments:
|
|
46
|
+
raise ValueError(
|
|
47
|
+
"instrumental-variables models require both 'endog' and 'instruments'"
|
|
48
|
+
)
|
|
49
|
+
fml += f" | {' + '.join(spec.endog)} ~ {' + '.join(spec.instruments)}"
|
|
50
|
+
return fml
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
"""Helpers for accepting a fitted model, a list of them, or an expdpy result object."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
__all__ = ["coerce_models", "first_model"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def coerce_models(obj: Any) -> list[Any]:
|
|
11
|
+
"""Return a flat list of fitted models from a model, a list, or a result object.
|
|
12
|
+
|
|
13
|
+
Accepts a single fitted pyfixest model, a ``list``/``tuple`` of them, or any expdpy
|
|
14
|
+
result object that carries a ``.models`` list (e.g. ``RegressionTableResult`` /
|
|
15
|
+
``EstimationResult``).
|
|
16
|
+
"""
|
|
17
|
+
if hasattr(obj, "models"):
|
|
18
|
+
out = list(obj.models)
|
|
19
|
+
elif isinstance(obj, (list, tuple)):
|
|
20
|
+
out = list(obj)
|
|
21
|
+
else:
|
|
22
|
+
out = [obj]
|
|
23
|
+
if not out:
|
|
24
|
+
raise ValueError("no models found")
|
|
25
|
+
return out
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def first_model(obj: Any) -> Any:
|
|
29
|
+
"""Return the first fitted model from a model, a list, or a result object."""
|
|
30
|
+
return coerce_models(obj)[0]
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Normalized model + variance-covariance specifications for the estimation engine.
|
|
2
|
+
|
|
3
|
+
These small, frozen, hashable dataclasses sit between the friendly public function
|
|
4
|
+
signatures and pyfixest. Keeping the spec normalized in one place means the formula
|
|
5
|
+
builder, the vcov builder and the fit dispatcher never have to re-parse user input.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Any, Literal
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"ModelKind",
|
|
15
|
+
"ModelSpec",
|
|
16
|
+
"Stepwise",
|
|
17
|
+
"VCovKind",
|
|
18
|
+
"VCovSpec",
|
|
19
|
+
"as_list",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
VCovKind = Literal["iid", "hetero", "HC1", "HC2", "HC3", "CRV1", "CRV3", "NW", "DK"]
|
|
23
|
+
ModelKind = Literal["ols", "iv", "poisson", "logit", "probit"]
|
|
24
|
+
Stepwise = Literal["sw", "sw0", "csw", "csw0"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def as_list(value: Any) -> list[str]:
|
|
28
|
+
"""Normalize ``None`` / ``""`` / str / sequence into a flat list of non-empty strings.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
value
|
|
33
|
+
``None``, an empty string, a single variable name, or a sequence of names.
|
|
34
|
+
|
|
35
|
+
Returns
|
|
36
|
+
-------
|
|
37
|
+
list of str
|
|
38
|
+
The non-empty names, in order.
|
|
39
|
+
"""
|
|
40
|
+
if value is None or (isinstance(value, str) and value == ""):
|
|
41
|
+
return []
|
|
42
|
+
if isinstance(value, str):
|
|
43
|
+
return [value]
|
|
44
|
+
return [v for v in value if v]
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
@dataclass(frozen=True)
|
|
48
|
+
class VCovSpec:
|
|
49
|
+
"""A normalized variance-covariance (standard-error) specification.
|
|
50
|
+
|
|
51
|
+
Parameters
|
|
52
|
+
----------
|
|
53
|
+
kind
|
|
54
|
+
The estimator: ``"iid"``, ``"hetero"`` (alias of ``"HC1"``), ``"HC1"``/``"HC2"``/
|
|
55
|
+
``"HC3"`` (HC2/HC3 are unavailable with fixed effects), ``"CRV1"``/``"CRV3"``
|
|
56
|
+
(cluster-robust) or ``"NW"``/``"DK"`` (Newey-West / Driscoll-Kraay).
|
|
57
|
+
cluster
|
|
58
|
+
Cluster variable name(s); required for ``"CRV1"``/``"CRV3"``.
|
|
59
|
+
time_id
|
|
60
|
+
Time identifier; required for ``"NW"``/``"DK"``.
|
|
61
|
+
panel_id
|
|
62
|
+
Panel (unit) identifier; required for ``"NW"``/``"DK"``.
|
|
63
|
+
lag
|
|
64
|
+
Lag truncation for ``"NW"``/``"DK"`` (pyfixest picks a default when ``None``).
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
kind: VCovKind = "iid"
|
|
68
|
+
cluster: tuple[str, ...] = ()
|
|
69
|
+
time_id: str | None = None
|
|
70
|
+
panel_id: str | None = None
|
|
71
|
+
lag: int | None = None
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass(frozen=True)
|
|
75
|
+
class ModelSpec:
|
|
76
|
+
"""A normalized specification of a single (or stepwise/multi-outcome) model.
|
|
77
|
+
|
|
78
|
+
Parameters
|
|
79
|
+
----------
|
|
80
|
+
dv
|
|
81
|
+
Dependent-variable name(s). More than one name builds a multi-outcome formula.
|
|
82
|
+
idvs
|
|
83
|
+
Independent (exogenous) regressor names.
|
|
84
|
+
feffects
|
|
85
|
+
Fixed-effect variable names absorbed by pyfixest.
|
|
86
|
+
endog
|
|
87
|
+
Endogenous regressors (instrumental-variables models only).
|
|
88
|
+
instruments
|
|
89
|
+
Excluded instruments (instrumental-variables models only).
|
|
90
|
+
model
|
|
91
|
+
Estimator family: ``"ols"``, ``"iv"``, ``"poisson"``, ``"logit"`` or ``"probit"``.
|
|
92
|
+
stepwise
|
|
93
|
+
Optional stepwise wrapper (``"sw"``, ``"sw0"``, ``"csw"`` or ``"csw0"``) applied to
|
|
94
|
+
``idvs`` to estimate a sequence of nested models in one call.
|
|
95
|
+
vcov
|
|
96
|
+
The variance-covariance specification.
|
|
97
|
+
weights
|
|
98
|
+
Optional weights column name.
|
|
99
|
+
offset
|
|
100
|
+
Optional offset column name (Poisson models).
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
dv: tuple[str, ...]
|
|
104
|
+
idvs: tuple[str, ...]
|
|
105
|
+
feffects: tuple[str, ...] = ()
|
|
106
|
+
endog: tuple[str, ...] = ()
|
|
107
|
+
instruments: tuple[str, ...] = ()
|
|
108
|
+
model: ModelKind = "ols"
|
|
109
|
+
stepwise: Stepwise | None = None
|
|
110
|
+
vcov: VCovSpec = VCovSpec()
|
|
111
|
+
weights: str | None = None
|
|
112
|
+
offset: str | None = None
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""Tidy-coefficient-frame helper shared by the regression-style functions."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
__all__ = ["tidy_model"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def tidy_model(model: Any, model_id: int, byvalue: str | None = None) -> pd.DataFrame:
|
|
13
|
+
"""Return a tidy coefficient frame for one fitted model.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
model
|
|
18
|
+
A fitted pyfixest model exposing ``.tidy()``.
|
|
19
|
+
model_id
|
|
20
|
+
1-based identifier inserted as the ``model`` column (orders models in a table).
|
|
21
|
+
byvalue
|
|
22
|
+
Optional subgroup label inserted as a ``byvalue`` column (the ``byvar`` path).
|
|
23
|
+
|
|
24
|
+
Returns
|
|
25
|
+
-------
|
|
26
|
+
pandas.DataFrame
|
|
27
|
+
The model's ``tidy()`` frame with the coefficient index turned into a ``term``
|
|
28
|
+
column and a leading ``model`` column (plus ``byvalue`` when given).
|
|
29
|
+
"""
|
|
30
|
+
out = model.tidy().reset_index()
|
|
31
|
+
out = out.rename(columns={out.columns[0]: "term"})
|
|
32
|
+
out.insert(0, "model", model_id)
|
|
33
|
+
if byvalue is not None:
|
|
34
|
+
out["byvalue"] = byvalue
|
|
35
|
+
return out
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""Pure builder of pyfixest's ``(vcov, vcov_kwargs)`` pair from a :class:`VCovSpec`."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from expdpy._estimation._spec import VCovSpec
|
|
8
|
+
|
|
9
|
+
__all__ = ["build_vcov"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def build_vcov(spec: VCovSpec) -> tuple[Any, dict[str, Any] | None]:
|
|
13
|
+
"""Translate a :class:`VCovSpec` into pyfixest's ``vcov`` / ``vcov_kwargs`` arguments.
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
spec
|
|
18
|
+
The normalized variance-covariance specification.
|
|
19
|
+
|
|
20
|
+
Returns
|
|
21
|
+
-------
|
|
22
|
+
tuple
|
|
23
|
+
``(vcov, vcov_kwargs)``. ``vcov`` is a string (``"iid"``, ``"hetero"``, ``"HC1"``…)
|
|
24
|
+
or a ``{"CRV1"/"CRV3": "a + b"}`` dict; ``vcov_kwargs`` is ``None`` except for the
|
|
25
|
+
serial-correlation-robust estimators (``"NW"``/``"DK"``), which need ``time_id`` /
|
|
26
|
+
``panel_id`` (and optionally ``lag``).
|
|
27
|
+
|
|
28
|
+
Examples
|
|
29
|
+
--------
|
|
30
|
+
>>> from expdpy._estimation import VCovSpec, build_vcov
|
|
31
|
+
>>> build_vcov(VCovSpec(kind="CRV1", cluster=("firm", "year")))
|
|
32
|
+
({'CRV1': 'firm + year'}, None)
|
|
33
|
+
>>> build_vcov(VCovSpec(kind="iid"))
|
|
34
|
+
('iid', None)
|
|
35
|
+
"""
|
|
36
|
+
kind = spec.kind
|
|
37
|
+
if kind in ("CRV1", "CRV3"):
|
|
38
|
+
if not spec.cluster:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"{kind} standard errors require at least one cluster variable"
|
|
41
|
+
)
|
|
42
|
+
return {kind: " + ".join(spec.cluster)}, None
|
|
43
|
+
if kind in ("NW", "DK"):
|
|
44
|
+
if spec.time_id is None or spec.panel_id is None:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"{kind} standard errors require both 'time_id' and 'panel_id'"
|
|
47
|
+
)
|
|
48
|
+
kwargs: dict[str, Any] = {"time_id": spec.time_id, "panel_id": spec.panel_id}
|
|
49
|
+
if spec.lag is not None:
|
|
50
|
+
kwargs["lag"] = spec.lag
|
|
51
|
+
return kind, kwargs
|
|
52
|
+
return kind, None
|