expdpy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expdpy/__init__.py +147 -0
- expdpy/_assets/favicon.png +0 -0
- expdpy/_assets/favicon.svg +12 -0
- expdpy/_assets/logo-navbar.svg +14 -0
- expdpy/_assets/logo.png +0 -0
- expdpy/_assets/logo.svg +14 -0
- expdpy/_corr.py +80 -0
- expdpy/_estimation/__init__.py +38 -0
- expdpy/_estimation/_capture.py +26 -0
- expdpy/_estimation/_fit.py +56 -0
- expdpy/_estimation/_formula.py +50 -0
- expdpy/_estimation/_results.py +30 -0
- expdpy/_estimation/_spec.py +112 -0
- expdpy/_estimation/_tidy.py +35 -0
- expdpy/_estimation/_vcov.py +52 -0
- expdpy/_theme.py +201 -0
- expdpy/_types.py +505 -0
- expdpy/_validation.py +43 -0
- expdpy/app/__init__.py +766 -0
- expdpy/app/_components.py +282 -0
- expdpy/app/_config_io.py +63 -0
- expdpy/app/_export_nb.py +234 -0
- expdpy/app/_sample.py +124 -0
- expdpy/app/_state.py +102 -0
- expdpy/app/_udv.py +179 -0
- expdpy/app/_upload.py +43 -0
- expdpy/app/_varcat.py +100 -0
- expdpy/by_group.py +313 -0
- expdpy/coefplot.py +236 -0
- expdpy/correlation.py +152 -0
- expdpy/data/__init__.py +103 -0
- expdpy/data/expdpy_config_kuznets.json +54 -0
- expdpy/data/gapminder.parquet +0 -0
- expdpy/data/gapminder_data_def.parquet +0 -0
- expdpy/data/kuznets.parquet +0 -0
- expdpy/data/kuznets_data_def.parquet +0 -0
- expdpy/data/staggered_did.parquet +0 -0
- expdpy/data/staggered_did_data_def.parquet +0 -0
- expdpy/did.py +405 -0
- expdpy/distributions.py +130 -0
- expdpy/estimation.py +282 -0
- expdpy/fwl.py +284 -0
- expdpy/inference.py +92 -0
- expdpy/missing.py +117 -0
- expdpy/outliers.py +180 -0
- expdpy/panel_models.py +244 -0
- expdpy/pedagogy/__init__.py +43 -0
- expdpy/pedagogy/_format.py +88 -0
- expdpy/pedagogy/_interpret.py +355 -0
- expdpy/pedagogy/_mixin.py +44 -0
- expdpy/pedagogy/_registry.py +121 -0
- expdpy/pedagogy/_text/__init__.py +11 -0
- expdpy/pedagogy/_text/causal.py +65 -0
- expdpy/pedagogy/_text/correlation.py +77 -0
- expdpy/pedagogy/_text/outliers.py +53 -0
- expdpy/pedagogy/_text/regression.py +257 -0
- expdpy/pedagogy/_text/tables.py +51 -0
- expdpy/postestimation.py +202 -0
- expdpy/py.typed +0 -0
- expdpy/regression.py +201 -0
- expdpy/sandbox.py +307 -0
- expdpy/scatter.py +207 -0
- expdpy/streamlit_app/__init__.py +106 -0
- expdpy/streamlit_app/_context.py +99 -0
- expdpy/streamlit_app/_entry.py +57 -0
- expdpy/streamlit_app/_handoff.py +149 -0
- expdpy/streamlit_app/_launcher.py +103 -0
- expdpy/streamlit_app/_pages.py +424 -0
- expdpy/streamlit_app/_pipeline.py +99 -0
- expdpy/streamlit_app/_render.py +221 -0
- expdpy/streamlit_app/_run.py +9 -0
- expdpy/streamlit_app/_sidebar.py +258 -0
- expdpy/streamlit_app/_widgets.py +95 -0
- expdpy/tables.py +348 -0
- expdpy/trends.py +263 -0
- expdpy-0.2.0.dist-info/METADATA +203 -0
- expdpy-0.2.0.dist-info/RECORD +80 -0
- expdpy-0.2.0.dist-info/WHEEL +4 -0
- expdpy-0.2.0.dist-info/entry_points.txt +2 -0
- expdpy-0.2.0.dist-info/licenses/LICENSE +25 -0
expdpy/_theme.py
ADDED
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""Shared Plotly styling for expdpy figures.
|
|
2
|
+
|
|
3
|
+
This module centralizes the visual identity of every expdpy figure so the look is
|
|
4
|
+
consistent across notebooks, scripts, static exports and the Streamlit app:
|
|
5
|
+
|
|
6
|
+
* a **Tableau 10** qualitative palette for grouped series (:data:`COLOR_SEQUENCE`),
|
|
7
|
+
* cohesive Tableau-style continuous scales (:data:`DIVERGING_SCALE`,
|
|
8
|
+
:data:`SEQUENTIAL_SCALE`),
|
|
9
|
+
* a presentation-friendly font stack and sizes (Arial/Helvetica, larger labels),
|
|
10
|
+
* a registered Plotly template (``"expdpy"``) layered on ``plotly_white`` and set as the
|
|
11
|
+
default, so figures are styled even when a caller forgets :func:`apply_default_layout`,
|
|
12
|
+
* a high-resolution export config for crisp slide-ready PNGs (:data:`PLOTLY_CONFIG`).
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import plotly.graph_objects as go
|
|
18
|
+
import plotly.io as pio
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"COLOR_SEQUENCE",
|
|
22
|
+
"DIVERGING_SCALE",
|
|
23
|
+
"FONT_FAMILY",
|
|
24
|
+
"FONT_SIZE_AXIS_TITLE",
|
|
25
|
+
"FONT_SIZE_BASE",
|
|
26
|
+
"FONT_SIZE_LEGEND",
|
|
27
|
+
"FONT_SIZE_TICK",
|
|
28
|
+
"FONT_SIZE_TITLE",
|
|
29
|
+
"PLOTLY_CONFIG",
|
|
30
|
+
"SEQUENTIAL_SCALE",
|
|
31
|
+
"TEMPLATE_NAME",
|
|
32
|
+
"TEMPLATE_NAME_DARK",
|
|
33
|
+
"apply_default_layout",
|
|
34
|
+
"color_for",
|
|
35
|
+
"diverging_color",
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
# --- Qualitative palette -------------------------------------------------------------
|
|
39
|
+
# The classic Tableau 10 palette: distinct, muted, and well-suited to projection on
|
|
40
|
+
# presentation slides. Used for grouped series via :func:`color_for`.
|
|
41
|
+
COLOR_SEQUENCE: list[str] = [
|
|
42
|
+
"#4E79A7", # blue
|
|
43
|
+
"#F28E2B", # orange
|
|
44
|
+
"#59A14F", # green
|
|
45
|
+
"#E15759", # red
|
|
46
|
+
"#76B7B2", # teal
|
|
47
|
+
"#EDC948", # yellow
|
|
48
|
+
"#B07AA1", # purple
|
|
49
|
+
"#FF9DA7", # pink
|
|
50
|
+
"#9C755F", # brown
|
|
51
|
+
"#BAB0AC", # gray
|
|
52
|
+
]
|
|
53
|
+
|
|
54
|
+
# --- Continuous color scales ---------------------------------------------------------
|
|
55
|
+
# A Tableau-flavoured diverging scale (red <-> light neutral <-> blue), anchored at a
|
|
56
|
+
# near-white midpoint. Drives the correlation heatmap and the ellipse fill (see
|
|
57
|
+
# :func:`diverging_color`) so both styles look the same.
|
|
58
|
+
DIVERGING_SCALE: list[list[float | str]] = [
|
|
59
|
+
[0.0, "#E15759"], # strong negative -> Tableau red
|
|
60
|
+
[0.25, "#F1A7A9"],
|
|
61
|
+
[0.5, "#F5F5F5"], # zero -> near-white
|
|
62
|
+
[0.75, "#9FB8D4"],
|
|
63
|
+
[1.0, "#4E79A7"], # strong positive -> Tableau blue
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
# A Tableau-flavoured sequential blue ramp (light -> Tableau blue) for magnitude-only
|
|
67
|
+
# encodings such as the missing-values heatmap and continuous scatter color.
|
|
68
|
+
SEQUENTIAL_SCALE: list[list[float | str]] = [
|
|
69
|
+
[0.0, "#F7FBFF"],
|
|
70
|
+
[0.25, "#C6DBEF"],
|
|
71
|
+
[0.5, "#90B5D6"],
|
|
72
|
+
[0.75, "#5C8FBC"],
|
|
73
|
+
[1.0, "#2E5C8A"],
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# --- Fonts -------------------------------------------------------------------------
|
|
77
|
+
# Arial/Helvetica renders identically across machines and static exports. Sizes follow
|
|
78
|
+
# a "presentation" tier so axis labels remain legible when projected on slides.
|
|
79
|
+
FONT_FAMILY: str = "Arial, Helvetica, sans-serif"
|
|
80
|
+
FONT_SIZE_BASE: int = 16
|
|
81
|
+
FONT_SIZE_TICK: int = 15
|
|
82
|
+
FONT_SIZE_AXIS_TITLE: int = 18
|
|
83
|
+
FONT_SIZE_TITLE: int = 22
|
|
84
|
+
FONT_SIZE_LEGEND: int = 15
|
|
85
|
+
|
|
86
|
+
TEMPLATE_NAME: str = "expdpy"
|
|
87
|
+
TEMPLATE_NAME_DARK: str = "expdpy_dark"
|
|
88
|
+
|
|
89
|
+
# Modebar / export config: emit crisp 2x PNGs suitable for slides.
|
|
90
|
+
PLOTLY_CONFIG: dict[str, object] = {
|
|
91
|
+
"displaylogo": False,
|
|
92
|
+
"toImageButtonOptions": {
|
|
93
|
+
"format": "png",
|
|
94
|
+
"filename": "expdpy_figure",
|
|
95
|
+
"scale": 2,
|
|
96
|
+
},
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _build_template(*, dark: bool = False) -> go.layout.Template:
|
|
101
|
+
"""Construct an ``expdpy`` Plotly template (light by default, dark when ``dark=True``)."""
|
|
102
|
+
font_color = "#e6e6e6" if dark else "#2a2a2a"
|
|
103
|
+
grid = "rgba(255,255,255,0.12)" if dark else "rgba(0,0,0,0.08)"
|
|
104
|
+
zeroline = "rgba(255,255,255,0.25)" if dark else "rgba(0,0,0,0.15)"
|
|
105
|
+
legend_bg = "rgba(0,0,0,0.35)" if dark else "rgba(255,255,255,0.6)"
|
|
106
|
+
axis = {
|
|
107
|
+
"title": {"font": {"family": FONT_FAMILY, "size": FONT_SIZE_AXIS_TITLE}},
|
|
108
|
+
"tickfont": {"family": FONT_FAMILY, "size": FONT_SIZE_TICK},
|
|
109
|
+
"automargin": True,
|
|
110
|
+
"gridcolor": grid,
|
|
111
|
+
"zerolinecolor": zeroline,
|
|
112
|
+
}
|
|
113
|
+
template = go.layout.Template()
|
|
114
|
+
template.layout = go.Layout(
|
|
115
|
+
font={"family": FONT_FAMILY, "size": FONT_SIZE_BASE, "color": font_color},
|
|
116
|
+
title={"font": {"family": FONT_FAMILY, "size": FONT_SIZE_TITLE}, "x": 0.02},
|
|
117
|
+
colorway=COLOR_SEQUENCE,
|
|
118
|
+
colorscale={"sequential": SEQUENTIAL_SCALE, "diverging": DIVERGING_SCALE},
|
|
119
|
+
margin={"l": 70, "r": 30, "t": 60, "b": 60},
|
|
120
|
+
legend={
|
|
121
|
+
"bgcolor": legend_bg,
|
|
122
|
+
"font": {"family": FONT_FAMILY, "size": FONT_SIZE_LEGEND},
|
|
123
|
+
"title": {"font": {"family": FONT_FAMILY, "size": FONT_SIZE_LEGEND}},
|
|
124
|
+
},
|
|
125
|
+
hoverlabel={"font": {"family": FONT_FAMILY, "size": FONT_SIZE_TICK}},
|
|
126
|
+
xaxis=dict(axis),
|
|
127
|
+
yaxis=dict(axis),
|
|
128
|
+
)
|
|
129
|
+
return template
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
# Register the light + dark templates and make ``plotly_white + expdpy`` the process-wide
|
|
133
|
+
# default so even figures that bypass ``apply_default_layout`` pick up the expdpy look.
|
|
134
|
+
pio.templates[TEMPLATE_NAME] = _build_template()
|
|
135
|
+
pio.templates[TEMPLATE_NAME_DARK] = _build_template(dark=True)
|
|
136
|
+
pio.templates.default = f"plotly_white+{TEMPLATE_NAME}"
|
|
137
|
+
|
|
138
|
+
# The combined template strings applied to every figure for belt-and-suspenders styling.
|
|
139
|
+
_COMBINED_TEMPLATE = f"plotly_white+{TEMPLATE_NAME}"
|
|
140
|
+
_COMBINED_TEMPLATE_DARK = f"plotly_dark+{TEMPLATE_NAME_DARK}"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def apply_default_layout(
|
|
144
|
+
fig: go.Figure, *, dark: bool = False, **layout_kwargs: object
|
|
145
|
+
) -> go.Figure:
|
|
146
|
+
"""Apply expdpy's default layout (Tableau theme, presentation fonts) to ``fig``.
|
|
147
|
+
|
|
148
|
+
The expdpy template carries the palette, continuous scales, fonts and sizes; this
|
|
149
|
+
function applies it explicitly (so per-figure output is correct regardless of the
|
|
150
|
+
global default) and then forwards any extra ``layout_kwargs`` to
|
|
151
|
+
:meth:`plotly.graph_objects.Figure.update_layout`.
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
fig
|
|
156
|
+
The figure to style (modified in place and returned).
|
|
157
|
+
dark
|
|
158
|
+
Apply the dark template (``plotly_dark`` base) instead of the light one.
|
|
159
|
+
**layout_kwargs
|
|
160
|
+
Extra keyword arguments forwarded to
|
|
161
|
+
:meth:`plotly.graph_objects.Figure.update_layout`.
|
|
162
|
+
"""
|
|
163
|
+
fig.update_layout(template=_COMBINED_TEMPLATE_DARK if dark else _COMBINED_TEMPLATE)
|
|
164
|
+
if layout_kwargs:
|
|
165
|
+
fig.update_layout(**layout_kwargs)
|
|
166
|
+
return fig
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def color_for(index: int) -> str:
|
|
170
|
+
"""Return the palette color for a 0-based series ``index`` (wraps around)."""
|
|
171
|
+
return COLOR_SEQUENCE[index % len(COLOR_SEQUENCE)]
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def diverging_color(value: float) -> str:
|
|
175
|
+
"""Map a value in ``[-1, 1]`` to an ``rgb(...)`` string on :data:`DIVERGING_SCALE`.
|
|
176
|
+
|
|
177
|
+
Used for the correlation ellipse fills so they match the heatmap's diverging scale.
|
|
178
|
+
``-1`` is Tableau red, ``0`` near-white, ``+1`` Tableau blue, with linear
|
|
179
|
+
interpolation between the scale's anchor stops.
|
|
180
|
+
"""
|
|
181
|
+
v = max(-1.0, min(1.0, value))
|
|
182
|
+
pos = (v + 1.0) / 2.0 # map [-1, 1] -> [0, 1]
|
|
183
|
+
stops = DIVERGING_SCALE
|
|
184
|
+
for i in range(len(stops) - 1):
|
|
185
|
+
p0, c0 = float(stops[i][0]), str(stops[i][1])
|
|
186
|
+
p1, c1 = float(stops[i + 1][0]), str(stops[i + 1][1])
|
|
187
|
+
if pos <= p1:
|
|
188
|
+
t = 0.0 if p1 == p0 else (pos - p0) / (p1 - p0)
|
|
189
|
+
r0, g0, b0 = _hex_to_rgb(c0)
|
|
190
|
+
r1, g1, b1 = _hex_to_rgb(c1)
|
|
191
|
+
r = round(r0 + (r1 - r0) * t)
|
|
192
|
+
g = round(g0 + (g1 - g0) * t)
|
|
193
|
+
b = round(b0 + (b1 - b0) * t)
|
|
194
|
+
return f"rgb({r},{g},{b})"
|
|
195
|
+
return f"rgb{_hex_to_rgb(str(stops[-1][1]))}"
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _hex_to_rgb(value: str) -> tuple[int, int, int]:
|
|
199
|
+
"""Convert a ``#rrggbb`` hex string to an ``(r, g, b)`` integer tuple."""
|
|
200
|
+
h = value.lstrip("#")
|
|
201
|
+
return int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)
|
expdpy/_types.py
ADDED
|
@@ -0,0 +1,505 @@
|
|
|
1
|
+
"""Frozen result dataclasses returned by the ``prepare_*`` functions.
|
|
2
|
+
|
|
3
|
+
The R package returns ``list(df = ..., plot = ...)`` or ``list(df = ..., kable_ret = ...)``
|
|
4
|
+
objects. In Python we use small, typed, immutable dataclasses that expose the underlying
|
|
5
|
+
``pandas.DataFrame`` alongside the rendered object (a Plotly ``Figure``, a Great Tables
|
|
6
|
+
``GT``, or a pyfixest ``etable`` result).
|
|
7
|
+
|
|
8
|
+
Many result types also mix in :class:`expdpy.pedagogy.Interpretable`, which adds a small
|
|
9
|
+
broom-style surface: ``interpret()`` (plain-language reading of the result), ``explain()``
|
|
10
|
+
(the concept explainer for the method) and, where meaningful, ``tidy()`` / ``glance()``.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import TYPE_CHECKING, Any
|
|
17
|
+
|
|
18
|
+
from expdpy.pedagogy import Interpretable
|
|
19
|
+
from expdpy.pedagogy import explain as _explain
|
|
20
|
+
from expdpy.pedagogy._interpret import (
|
|
21
|
+
interpret_correlation,
|
|
22
|
+
interpret_descriptive,
|
|
23
|
+
interpret_estimation,
|
|
24
|
+
interpret_event_study,
|
|
25
|
+
interpret_fwl,
|
|
26
|
+
interpret_regression,
|
|
27
|
+
interpret_sandbox,
|
|
28
|
+
interpret_trend,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
|
+
import pandas as pd
|
|
33
|
+
import plotly.graph_objects as go
|
|
34
|
+
from great_tables import GT
|
|
35
|
+
|
|
36
|
+
from expdpy.pedagogy import Explainer
|
|
37
|
+
|
|
38
|
+
__all__ = [
|
|
39
|
+
"BarChartResult",
|
|
40
|
+
"ByGroupBarGraphResult",
|
|
41
|
+
"ByGroupTrendGraphResult",
|
|
42
|
+
"CoefficientPlotResult",
|
|
43
|
+
"CorrelationGraphResult",
|
|
44
|
+
"CorrelationTableResult",
|
|
45
|
+
"DescriptiveTableResult",
|
|
46
|
+
"EstimationResult",
|
|
47
|
+
"EventStudyResult",
|
|
48
|
+
"ExtObsTableResult",
|
|
49
|
+
"FWLPlotResult",
|
|
50
|
+
"FixefPlotResult",
|
|
51
|
+
"HausmanTestResult",
|
|
52
|
+
"HistogramResult",
|
|
53
|
+
"JointTestResult",
|
|
54
|
+
"PanelViewResult",
|
|
55
|
+
"PredictionResult",
|
|
56
|
+
"QuantileTrendGraphResult",
|
|
57
|
+
"RegressionTableResult",
|
|
58
|
+
"RobustInferenceResult",
|
|
59
|
+
"SandboxResult",
|
|
60
|
+
"TrendGraphResult",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass(frozen=True)
|
|
65
|
+
class DescriptiveTableResult(Interpretable):
|
|
66
|
+
"""Result of :func:`expdpy.prepare_descriptive_table`."""
|
|
67
|
+
|
|
68
|
+
df: pd.DataFrame
|
|
69
|
+
gt: GT
|
|
70
|
+
|
|
71
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
72
|
+
"""Plain-language summary of central tendency, spread and skew per variable."""
|
|
73
|
+
return interpret_descriptive(self, lang=lang)
|
|
74
|
+
|
|
75
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
76
|
+
"""Concept explainer for descriptive statistics."""
|
|
77
|
+
return _explain("descriptive_stats", lang=lang)
|
|
78
|
+
|
|
79
|
+
def tidy(self) -> pd.DataFrame:
|
|
80
|
+
"""Return the summary frame with the variable index promoted to a column."""
|
|
81
|
+
return self.df.rename_axis("variable").reset_index()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(frozen=True)
|
|
85
|
+
class CorrelationTableResult(Interpretable):
|
|
86
|
+
"""Result of :func:`expdpy.prepare_correlation_table`.
|
|
87
|
+
|
|
88
|
+
``df_corr`` holds Pearson correlations above and Spearman correlations below the
|
|
89
|
+
diagonal; ``df_prob`` the matching p-values; ``df_n`` the pairwise observation counts.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
df_corr: pd.DataFrame
|
|
93
|
+
df_prob: pd.DataFrame
|
|
94
|
+
df_n: pd.DataFrame
|
|
95
|
+
gt: GT
|
|
96
|
+
|
|
97
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
98
|
+
"""Plain-language reading of the strongest pair and Pearson-vs-Spearman divergence."""
|
|
99
|
+
return interpret_correlation(self, lang=lang)
|
|
100
|
+
|
|
101
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
102
|
+
"""Concept explainer for correlation (Pearson; see-also Spearman and causation)."""
|
|
103
|
+
return _explain("pearson", lang=lang)
|
|
104
|
+
|
|
105
|
+
def tidy(self) -> pd.DataFrame:
|
|
106
|
+
"""Long-format ``(var1, var2, correlation)`` frame from the correlation matrix."""
|
|
107
|
+
long = self.df_corr.stack().rename("correlation").reset_index()
|
|
108
|
+
long.columns = ["var1", "var2", "correlation"]
|
|
109
|
+
return long
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@dataclass(frozen=True)
|
|
113
|
+
class CorrelationGraphResult:
|
|
114
|
+
"""Result of :func:`expdpy.prepare_correlation_graph`."""
|
|
115
|
+
|
|
116
|
+
df_corr: pd.DataFrame
|
|
117
|
+
df_prob: pd.DataFrame
|
|
118
|
+
df_n: pd.DataFrame
|
|
119
|
+
fig: go.Figure
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
@dataclass(frozen=True)
|
|
123
|
+
class ExtObsTableResult:
|
|
124
|
+
"""Result of :func:`expdpy.prepare_ext_obs_table`."""
|
|
125
|
+
|
|
126
|
+
df: pd.DataFrame
|
|
127
|
+
gt: GT
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass(frozen=True)
|
|
131
|
+
class TrendGraphResult(Interpretable):
|
|
132
|
+
"""Result of :func:`expdpy.prepare_trend_graph`."""
|
|
133
|
+
|
|
134
|
+
df: pd.DataFrame
|
|
135
|
+
fig: go.Figure
|
|
136
|
+
|
|
137
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
138
|
+
"""Plain-language reading of the direction of change for each series."""
|
|
139
|
+
return interpret_trend(self, lang=lang)
|
|
140
|
+
|
|
141
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
142
|
+
"""Concept explainer for time trends."""
|
|
143
|
+
return _explain("time_trends", lang=lang)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass(frozen=True)
|
|
147
|
+
class QuantileTrendGraphResult:
|
|
148
|
+
"""Result of :func:`expdpy.prepare_quantile_trend_graph`."""
|
|
149
|
+
|
|
150
|
+
df: pd.DataFrame
|
|
151
|
+
fig: go.Figure
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass(frozen=True)
|
|
155
|
+
class ByGroupBarGraphResult:
|
|
156
|
+
"""Result of :func:`expdpy.prepare_by_group_bar_graph`."""
|
|
157
|
+
|
|
158
|
+
df: pd.DataFrame
|
|
159
|
+
fig: go.Figure
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
@dataclass(frozen=True)
|
|
163
|
+
class ByGroupTrendGraphResult:
|
|
164
|
+
"""Result of :func:`expdpy.prepare_by_group_trend_graph`."""
|
|
165
|
+
|
|
166
|
+
df: pd.DataFrame
|
|
167
|
+
fig: go.Figure
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@dataclass(frozen=True)
|
|
171
|
+
class HistogramResult:
|
|
172
|
+
"""Result of :func:`expdpy.prepare_histogram`."""
|
|
173
|
+
|
|
174
|
+
df: pd.DataFrame
|
|
175
|
+
fig: go.Figure
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@dataclass(frozen=True)
|
|
179
|
+
class BarChartResult:
|
|
180
|
+
"""Result of :func:`expdpy.prepare_bar_chart`."""
|
|
181
|
+
|
|
182
|
+
df: pd.DataFrame
|
|
183
|
+
fig: go.Figure
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass(frozen=True)
|
|
187
|
+
class CoefficientPlotResult:
|
|
188
|
+
"""Result of :func:`expdpy.prepare_coefficient_plot`.
|
|
189
|
+
|
|
190
|
+
``df`` is a tidy long frame with columns ``model``, ``term``, ``estimate``, ``se``,
|
|
191
|
+
``ci_lower`` and ``ci_upper``; ``fig`` is the Plotly coefficient plot.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
df: pd.DataFrame
|
|
195
|
+
fig: go.Figure
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@dataclass(frozen=True)
|
|
199
|
+
class RegressionTableResult(Interpretable):
|
|
200
|
+
"""Result of :func:`expdpy.prepare_regression_table`.
|
|
201
|
+
|
|
202
|
+
``models`` is the list of fitted pyfixest models, ``etable`` the rendered regression
|
|
203
|
+
table (a Great Tables object or a string depending on ``format``), and ``df`` a tidy
|
|
204
|
+
coefficient frame (term, model, estimate, se, p-value).
|
|
205
|
+
"""
|
|
206
|
+
|
|
207
|
+
models: list[Any]
|
|
208
|
+
etable: Any
|
|
209
|
+
df: pd.DataFrame
|
|
210
|
+
|
|
211
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
212
|
+
"""Plain-language reading of sign, magnitude and significance per coefficient."""
|
|
213
|
+
return interpret_regression(self, lang=lang)
|
|
214
|
+
|
|
215
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
216
|
+
"""Concept explainer keyed to the design (fixed effects / clustering / OLS)."""
|
|
217
|
+
model = self.models[0]
|
|
218
|
+
if bool(getattr(model, "_has_fixef", False)):
|
|
219
|
+
topic = "fixed_effects"
|
|
220
|
+
elif bool(getattr(model, "_is_clustered", False)):
|
|
221
|
+
topic = "clustered_se"
|
|
222
|
+
else:
|
|
223
|
+
topic = "ols"
|
|
224
|
+
return _explain(topic, lang=lang)
|
|
225
|
+
|
|
226
|
+
def tidy(self) -> pd.DataFrame:
|
|
227
|
+
"""Return the tidy coefficient frame (broom-style ``tidy``)."""
|
|
228
|
+
return self.df
|
|
229
|
+
|
|
230
|
+
def glance(self) -> pd.DataFrame:
|
|
231
|
+
"""One row per model with N, R² and within-R² (broom-style ``glance``)."""
|
|
232
|
+
import math
|
|
233
|
+
|
|
234
|
+
import pandas as pd
|
|
235
|
+
|
|
236
|
+
rows = [
|
|
237
|
+
{
|
|
238
|
+
"model": i + 1,
|
|
239
|
+
"depvar": getattr(m, "_depvar", None),
|
|
240
|
+
"N": int(getattr(m, "_N", 0)),
|
|
241
|
+
"r2": float(getattr(m, "_r2", math.nan)),
|
|
242
|
+
"r2_within": float(getattr(m, "_r2_within", math.nan)),
|
|
243
|
+
"has_fe": bool(getattr(m, "_has_fixef", False)),
|
|
244
|
+
}
|
|
245
|
+
for i, m in enumerate(self.models)
|
|
246
|
+
]
|
|
247
|
+
return pd.DataFrame(rows)
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
@dataclass(frozen=True)
|
|
251
|
+
class FWLPlotResult(Interpretable):
|
|
252
|
+
"""Result of :func:`expdpy.prepare_fwl_plot`.
|
|
253
|
+
|
|
254
|
+
``df`` is the residual frame sorted by ``x_resid`` with columns ``x_resid``,
|
|
255
|
+
``y_resid``, ``fit``, ``lwr`` and ``upr`` (the OLS fit and 95% pointwise confidence band
|
|
256
|
+
of ``y_resid`` on ``x_resid``). ``fig`` is the Plotly figure. ``slope`` equals the
|
|
257
|
+
full-model coefficient on the focal variable (Frisch-Waugh-Lovell theorem); ``se`` is its
|
|
258
|
+
standard error from the full model (clustered when clusters are given, matching
|
|
259
|
+
:func:`prepare_regression_table`) — note the plotted band is the simpler residual-OLS
|
|
260
|
+
confidence interval, so its implied uncertainty can differ from ``se``. ``intercept`` is
|
|
261
|
+
the residual-OLS intercept (≈ 0 when residualized); ``n_obs`` is the complete-case sample
|
|
262
|
+
size; ``r2_within`` is the full model's within-R² (``nan`` when there are no fixed
|
|
263
|
+
effects).
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
df: pd.DataFrame
|
|
267
|
+
fig: go.Figure
|
|
268
|
+
slope: float
|
|
269
|
+
se: float
|
|
270
|
+
intercept: float
|
|
271
|
+
n_obs: int
|
|
272
|
+
r2_within: float
|
|
273
|
+
|
|
274
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
275
|
+
"""Plain-language reading of the partial slope and the FWL identity."""
|
|
276
|
+
return interpret_fwl(self, lang=lang)
|
|
277
|
+
|
|
278
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
279
|
+
"""Concept explainer for the Frisch-Waugh-Lovell partial regression."""
|
|
280
|
+
return _explain("fwl", lang=lang)
|
|
281
|
+
|
|
282
|
+
def glance(self) -> pd.DataFrame:
|
|
283
|
+
"""One-row summary of the partial regression (broom-style ``glance``)."""
|
|
284
|
+
import pandas as pd
|
|
285
|
+
|
|
286
|
+
return pd.DataFrame(
|
|
287
|
+
[
|
|
288
|
+
{
|
|
289
|
+
"slope": self.slope,
|
|
290
|
+
"se": self.se,
|
|
291
|
+
"intercept": self.intercept,
|
|
292
|
+
"n_obs": self.n_obs,
|
|
293
|
+
"r2_within": self.r2_within,
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
@dataclass(frozen=True)
|
|
300
|
+
class EstimationResult(Interpretable):
|
|
301
|
+
"""Result of :func:`expdpy.prepare_estimation`.
|
|
302
|
+
|
|
303
|
+
``models`` are the fitted pyfixest model(s), ``etable`` the rendered table, ``df`` the
|
|
304
|
+
tidy coefficient frame, ``model_kind`` the estimator (``"ols"``/``"iv"``/``"poisson"``/
|
|
305
|
+
``"logit"``/``"probit"``), ``fit_stats`` a one-row-per-model summary, and ``notes`` any
|
|
306
|
+
advisory messages raised during estimation.
|
|
307
|
+
"""
|
|
308
|
+
|
|
309
|
+
models: list[Any]
|
|
310
|
+
etable: Any
|
|
311
|
+
df: pd.DataFrame
|
|
312
|
+
model_kind: str
|
|
313
|
+
fit_stats: pd.DataFrame
|
|
314
|
+
notes: tuple[str, ...] = ()
|
|
315
|
+
|
|
316
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
317
|
+
"""Model-kind-aware plain-language reading of the coefficients."""
|
|
318
|
+
return interpret_estimation(self, lang=lang)
|
|
319
|
+
|
|
320
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
321
|
+
"""Concept explainer keyed to the estimator and design."""
|
|
322
|
+
if self.model_kind == "iv":
|
|
323
|
+
topic = "iv"
|
|
324
|
+
elif self.model_kind in ("poisson", "logit", "probit"):
|
|
325
|
+
topic = "glm"
|
|
326
|
+
else:
|
|
327
|
+
model = self.models[0]
|
|
328
|
+
if bool(getattr(model, "_has_fixef", False)):
|
|
329
|
+
topic = "fixed_effects"
|
|
330
|
+
elif bool(getattr(model, "_is_clustered", False)):
|
|
331
|
+
topic = "clustered_se"
|
|
332
|
+
else:
|
|
333
|
+
topic = "ols"
|
|
334
|
+
return _explain(topic, lang=lang)
|
|
335
|
+
|
|
336
|
+
def tidy(self) -> pd.DataFrame:
|
|
337
|
+
"""Return the tidy coefficient frame (broom-style ``tidy``)."""
|
|
338
|
+
return self.df
|
|
339
|
+
|
|
340
|
+
def glance(self) -> pd.DataFrame:
|
|
341
|
+
"""Return the per-model fit-statistics frame (broom-style ``glance``)."""
|
|
342
|
+
return self.fit_stats
|
|
343
|
+
|
|
344
|
+
|
|
345
|
+
@dataclass(frozen=True)
|
|
346
|
+
class FixefPlotResult:
|
|
347
|
+
"""Result of :func:`expdpy.prepare_fixef_plot`.
|
|
348
|
+
|
|
349
|
+
``df`` has columns ``fixef`` (the fixed-effect dimension), ``level`` and ``value`` (the
|
|
350
|
+
estimated group intercept); ``fig`` is the Plotly figure.
|
|
351
|
+
"""
|
|
352
|
+
|
|
353
|
+
df: pd.DataFrame
|
|
354
|
+
fig: go.Figure
|
|
355
|
+
|
|
356
|
+
|
|
357
|
+
@dataclass(frozen=True)
|
|
358
|
+
class PredictionResult:
|
|
359
|
+
"""Result of :func:`expdpy.prepare_predictions`.
|
|
360
|
+
|
|
361
|
+
``df`` holds the fitted ``predicted`` values, plus ``actual`` and ``residual`` columns
|
|
362
|
+
when predicting on the estimation sample (no ``newdata``).
|
|
363
|
+
"""
|
|
364
|
+
|
|
365
|
+
df: pd.DataFrame
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
@dataclass(frozen=True)
|
|
369
|
+
class JointTestResult:
|
|
370
|
+
"""Result of :func:`expdpy.prepare_joint_test` (a Wald joint-significance test)."""
|
|
371
|
+
|
|
372
|
+
statistic: float
|
|
373
|
+
p_value: float
|
|
374
|
+
hypotheses: tuple[str, ...]
|
|
375
|
+
distribution: str
|
|
376
|
+
|
|
377
|
+
def summary(self) -> str:
|
|
378
|
+
"""Return a one-line plain-language verdict for the joint test."""
|
|
379
|
+
terms = ", ".join(self.hypotheses)
|
|
380
|
+
verdict = (
|
|
381
|
+
"jointly statistically significant"
|
|
382
|
+
if self.p_value < 0.05
|
|
383
|
+
else "not jointly statistically significant at the 5% level"
|
|
384
|
+
)
|
|
385
|
+
return (
|
|
386
|
+
f"Joint {self.distribution}-test that [{terms}] are all zero: "
|
|
387
|
+
f"statistic = {self.statistic:.4g}, p = {self.p_value:.4g} — {verdict}."
|
|
388
|
+
)
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
@dataclass(frozen=True)
|
|
392
|
+
class EventStudyResult(Interpretable):
|
|
393
|
+
"""Result of :func:`expdpy.prepare_event_study`.
|
|
394
|
+
|
|
395
|
+
``df`` is the tidy event-time path (columns ``event_time``, ``estimate``, ``se``,
|
|
396
|
+
``ci_lower``, ``ci_upper`` and ``cohort`` — ``cohort`` is filled only for the
|
|
397
|
+
Sun-Abraham ``"saturated"`` estimator). ``fig`` is the Plotly event-study plot,
|
|
398
|
+
``model`` the fitted pyfixest object, and ``estimator`` the chosen method.
|
|
399
|
+
"""
|
|
400
|
+
|
|
401
|
+
df: pd.DataFrame
|
|
402
|
+
fig: go.Figure
|
|
403
|
+
model: Any
|
|
404
|
+
estimator: str
|
|
405
|
+
|
|
406
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
407
|
+
"""Plain-language pre-trend diagnostic and dynamic-effect summary."""
|
|
408
|
+
return interpret_event_study(self, lang=lang)
|
|
409
|
+
|
|
410
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
411
|
+
"""Concept explainer for event studies / staggered difference-in-differences."""
|
|
412
|
+
return _explain("event_study", lang=lang)
|
|
413
|
+
|
|
414
|
+
def tidy(self) -> pd.DataFrame:
|
|
415
|
+
"""Return the tidy event-time path (broom-style ``tidy``)."""
|
|
416
|
+
return self.df
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@dataclass(frozen=True)
|
|
420
|
+
class PanelViewResult:
|
|
421
|
+
"""Result of :func:`expdpy.prepare_panel_view`.
|
|
422
|
+
|
|
423
|
+
``df`` is the treatment quilt (units by periods, 0/1) or, when an ``outcome`` is given,
|
|
424
|
+
the tidy outcome frame; ``fig`` is the Plotly figure.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
df: pd.DataFrame
|
|
428
|
+
fig: go.Figure
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
@dataclass(frozen=True)
|
|
432
|
+
class HausmanTestResult(Interpretable):
|
|
433
|
+
"""Result of :func:`expdpy.prepare_hausman_test` (fixed vs random effects).
|
|
434
|
+
|
|
435
|
+
``statistic`` is the Hausman chi-squared statistic, ``df_test`` its degrees of freedom,
|
|
436
|
+
``p_value`` the p-value, and ``fe_coefs`` / ``re_coefs`` the compared coefficients.
|
|
437
|
+
"""
|
|
438
|
+
|
|
439
|
+
statistic: float
|
|
440
|
+
df_test: int
|
|
441
|
+
p_value: float
|
|
442
|
+
fe_coefs: pd.DataFrame
|
|
443
|
+
re_coefs: pd.DataFrame
|
|
444
|
+
|
|
445
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
446
|
+
"""Plain-language verdict on the fixed-vs-random-effects choice."""
|
|
447
|
+
if self.p_value < 0.05:
|
|
448
|
+
verdict = (
|
|
449
|
+
"**reject** the null — the random-effects assumption is violated, so prefer "
|
|
450
|
+
"**fixed effects**"
|
|
451
|
+
)
|
|
452
|
+
else:
|
|
453
|
+
verdict = (
|
|
454
|
+
"**fail to reject** the null — **random effects** is admissible (and more "
|
|
455
|
+
"efficient than fixed effects)"
|
|
456
|
+
)
|
|
457
|
+
return (
|
|
458
|
+
f"Hausman test (χ²({self.df_test}) = {self.statistic:.3f}, "
|
|
459
|
+
f"p = {self.p_value:.4g}): {verdict}. Note that failing to reject reflects a lack "
|
|
460
|
+
"of evidence against random effects, not proof that it is correct."
|
|
461
|
+
)
|
|
462
|
+
|
|
463
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
464
|
+
"""Concept explainer for the Hausman test."""
|
|
465
|
+
return _explain("hausman", lang=lang)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
@dataclass(frozen=True)
|
|
469
|
+
class SandboxResult(Interpretable):
|
|
470
|
+
"""Result of an ``expdpy.sandbox_*`` teaching demonstration.
|
|
471
|
+
|
|
472
|
+
``df`` is the comparison table, ``fig`` the headline figure, ``summary`` the scalar facts
|
|
473
|
+
the demonstration turns on, and ``topic`` the concept it illustrates.
|
|
474
|
+
"""
|
|
475
|
+
|
|
476
|
+
df: pd.DataFrame
|
|
477
|
+
fig: go.Figure
|
|
478
|
+
summary: dict[str, float]
|
|
479
|
+
topic: str
|
|
480
|
+
|
|
481
|
+
def interpret(self, *, lang: str = "en") -> str:
|
|
482
|
+
"""Plain-language takeaway of the demonstration."""
|
|
483
|
+
return interpret_sandbox(self, lang=lang)
|
|
484
|
+
|
|
485
|
+
def explain(self, *, lang: str = "en") -> Explainer:
|
|
486
|
+
"""Concept explainer for the demonstrated topic."""
|
|
487
|
+
return _explain(self.topic, lang=lang)
|
|
488
|
+
|
|
489
|
+
|
|
490
|
+
@dataclass(frozen=True)
|
|
491
|
+
class RobustInferenceResult:
|
|
492
|
+
"""Result of :func:`expdpy.prepare_robust_inference`.
|
|
493
|
+
|
|
494
|
+
``method`` is ``"ritest"`` (randomization inference) or ``"wildboot"`` (wild cluster
|
|
495
|
+
bootstrap); ``estimate`` and ``p_value`` are for the tested ``param``; ``conf_int`` is
|
|
496
|
+
the (lower, upper) interval; ``raw`` is the underlying pyfixest result series.
|
|
497
|
+
"""
|
|
498
|
+
|
|
499
|
+
method: str
|
|
500
|
+
param: str
|
|
501
|
+
estimate: float
|
|
502
|
+
p_value: float
|
|
503
|
+
conf_int: tuple[float, float]
|
|
504
|
+
reps: int
|
|
505
|
+
raw: Any
|