expdpy 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- expdpy/__init__.py +147 -0
- expdpy/_assets/favicon.png +0 -0
- expdpy/_assets/favicon.svg +12 -0
- expdpy/_assets/logo-navbar.svg +14 -0
- expdpy/_assets/logo.png +0 -0
- expdpy/_assets/logo.svg +14 -0
- expdpy/_corr.py +80 -0
- expdpy/_estimation/__init__.py +38 -0
- expdpy/_estimation/_capture.py +26 -0
- expdpy/_estimation/_fit.py +56 -0
- expdpy/_estimation/_formula.py +50 -0
- expdpy/_estimation/_results.py +30 -0
- expdpy/_estimation/_spec.py +112 -0
- expdpy/_estimation/_tidy.py +35 -0
- expdpy/_estimation/_vcov.py +52 -0
- expdpy/_theme.py +201 -0
- expdpy/_types.py +505 -0
- expdpy/_validation.py +43 -0
- expdpy/app/__init__.py +766 -0
- expdpy/app/_components.py +282 -0
- expdpy/app/_config_io.py +63 -0
- expdpy/app/_export_nb.py +234 -0
- expdpy/app/_sample.py +124 -0
- expdpy/app/_state.py +102 -0
- expdpy/app/_udv.py +179 -0
- expdpy/app/_upload.py +43 -0
- expdpy/app/_varcat.py +100 -0
- expdpy/by_group.py +313 -0
- expdpy/coefplot.py +236 -0
- expdpy/correlation.py +152 -0
- expdpy/data/__init__.py +103 -0
- expdpy/data/expdpy_config_kuznets.json +54 -0
- expdpy/data/gapminder.parquet +0 -0
- expdpy/data/gapminder_data_def.parquet +0 -0
- expdpy/data/kuznets.parquet +0 -0
- expdpy/data/kuznets_data_def.parquet +0 -0
- expdpy/data/staggered_did.parquet +0 -0
- expdpy/data/staggered_did_data_def.parquet +0 -0
- expdpy/did.py +405 -0
- expdpy/distributions.py +130 -0
- expdpy/estimation.py +282 -0
- expdpy/fwl.py +284 -0
- expdpy/inference.py +92 -0
- expdpy/missing.py +117 -0
- expdpy/outliers.py +180 -0
- expdpy/panel_models.py +244 -0
- expdpy/pedagogy/__init__.py +43 -0
- expdpy/pedagogy/_format.py +88 -0
- expdpy/pedagogy/_interpret.py +355 -0
- expdpy/pedagogy/_mixin.py +44 -0
- expdpy/pedagogy/_registry.py +121 -0
- expdpy/pedagogy/_text/__init__.py +11 -0
- expdpy/pedagogy/_text/causal.py +65 -0
- expdpy/pedagogy/_text/correlation.py +77 -0
- expdpy/pedagogy/_text/outliers.py +53 -0
- expdpy/pedagogy/_text/regression.py +257 -0
- expdpy/pedagogy/_text/tables.py +51 -0
- expdpy/postestimation.py +202 -0
- expdpy/py.typed +0 -0
- expdpy/regression.py +201 -0
- expdpy/sandbox.py +307 -0
- expdpy/scatter.py +207 -0
- expdpy/streamlit_app/__init__.py +106 -0
- expdpy/streamlit_app/_context.py +99 -0
- expdpy/streamlit_app/_entry.py +57 -0
- expdpy/streamlit_app/_handoff.py +149 -0
- expdpy/streamlit_app/_launcher.py +103 -0
- expdpy/streamlit_app/_pages.py +424 -0
- expdpy/streamlit_app/_pipeline.py +99 -0
- expdpy/streamlit_app/_render.py +221 -0
- expdpy/streamlit_app/_run.py +9 -0
- expdpy/streamlit_app/_sidebar.py +258 -0
- expdpy/streamlit_app/_widgets.py +95 -0
- expdpy/tables.py +348 -0
- expdpy/trends.py +263 -0
- expdpy-0.2.0.dist-info/METADATA +203 -0
- expdpy-0.2.0.dist-info/RECORD +80 -0
- expdpy-0.2.0.dist-info/WHEEL +4 -0
- expdpy-0.2.0.dist-info/entry_points.txt +2 -0
- expdpy-0.2.0.dist-info/licenses/LICENSE +25 -0
expdpy/app/__init__.py
ADDED
|
@@ -0,0 +1,766 @@
|
|
|
1
|
+
"""The ExPdPy interactive app (Shiny for Python).
|
|
2
|
+
|
|
3
|
+
``ExPdPy`` builds a config-driven, no-code exploration UI on top of the library's
|
|
4
|
+
``prepare_*`` functions: a reactive sample pipeline (subset / outlier treatment) feeds an
|
|
5
|
+
ordered set of analysis components (descriptive table, histogram, correlations, trends,
|
|
6
|
+
scatter, regression, ...), each rendered with Plotly or Great Tables. It also supports
|
|
7
|
+
in-app data upload, save/load of the analysis configuration, and export of a reproducible
|
|
8
|
+
notebook.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Mapping, Sequence
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
from expdpy.app import _components as comp
|
|
19
|
+
from expdpy.app._components import COMPONENT_KIND, COMPONENT_ORDER, TS_COMPONENTS
|
|
20
|
+
from expdpy.app._config_io import dump_config, load_config
|
|
21
|
+
from expdpy.app._export_nb import build_export_zip
|
|
22
|
+
from expdpy.app._sample import apply_user_vars, build_analysis_sample
|
|
23
|
+
from expdpy.app._state import parse_config
|
|
24
|
+
from expdpy.app._varcat import create_var_categories
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from shiny import App
|
|
28
|
+
|
|
29
|
+
__all__ = ["ExPdPy"]
|
|
30
|
+
|
|
31
|
+
_OUTLIER_CHOICES = {
|
|
32
|
+
"1": "None",
|
|
33
|
+
"2": "Winsorize 1%",
|
|
34
|
+
"3": "Winsorize 5%",
|
|
35
|
+
"4": "Truncate 1%",
|
|
36
|
+
"5": "Truncate 5%",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _favicon_data_uri() -> str | None:
|
|
41
|
+
"""Base64 ``data:`` URI for the packaged favicon SVG, or ``None`` if unavailable.
|
|
42
|
+
|
|
43
|
+
Embedding the icon inline keeps the app self-contained — no static asset route needed.
|
|
44
|
+
"""
|
|
45
|
+
try:
|
|
46
|
+
from base64 import b64encode
|
|
47
|
+
from importlib.resources import files
|
|
48
|
+
|
|
49
|
+
raw = files("expdpy").joinpath("_assets/favicon.svg").read_bytes()
|
|
50
|
+
return "data:image/svg+xml;base64," + b64encode(raw).decode("ascii")
|
|
51
|
+
except Exception:
|
|
52
|
+
return None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _normalize_samples(
|
|
56
|
+
df: Any, df_name: str | Sequence[str] | None
|
|
57
|
+
) -> dict[str, pd.DataFrame]:
|
|
58
|
+
if df is None:
|
|
59
|
+
return {}
|
|
60
|
+
if isinstance(df, pd.DataFrame):
|
|
61
|
+
name = df_name if isinstance(df_name, str) else "Sample"
|
|
62
|
+
return {name: df}
|
|
63
|
+
if isinstance(df, Mapping):
|
|
64
|
+
return dict(df)
|
|
65
|
+
return {f"Sample {i + 1}": d for i, d in enumerate(df)}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _resolve_ids(
|
|
69
|
+
df_def: pd.DataFrame | None, cs_id: Sequence[str] | str | None, ts_id: str | None
|
|
70
|
+
) -> tuple[list[str], str | None]:
|
|
71
|
+
if df_def is not None:
|
|
72
|
+
cs = list(df_def.loc[df_def["type"] == "cs_id", "var_name"])
|
|
73
|
+
ts_rows = list(df_def.loc[df_def["type"] == "ts_id", "var_name"])
|
|
74
|
+
return cs, (ts_rows[0] if ts_rows else None)
|
|
75
|
+
if isinstance(cs_id, str):
|
|
76
|
+
cs_id = [cs_id]
|
|
77
|
+
return (list(cs_id) if cs_id else []), ts_id
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _active_components(components: Any, ts_id: str | None) -> list[str]:
|
|
81
|
+
if isinstance(components, Mapping):
|
|
82
|
+
selected = [c for c in COMPONENT_ORDER if components.get(c)]
|
|
83
|
+
elif isinstance(components, (list, tuple)):
|
|
84
|
+
selected = [c for c in components if c in COMPONENT_ORDER]
|
|
85
|
+
else:
|
|
86
|
+
selected = list(COMPONENT_ORDER)
|
|
87
|
+
renderable = [c for c in selected if c in COMPONENT_KIND]
|
|
88
|
+
if not ts_id:
|
|
89
|
+
renderable = [c for c in renderable if c not in TS_COMPONENTS]
|
|
90
|
+
return renderable
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _g(inp: Any, key: str, default: Any = None) -> Any:
|
|
94
|
+
"""Read an input value safely, returning ``default`` if absent/unset."""
|
|
95
|
+
try:
|
|
96
|
+
val = inp[key]()
|
|
97
|
+
except Exception:
|
|
98
|
+
return default
|
|
99
|
+
return default if val in (None, "") else val
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def ExPdPy(
|
|
103
|
+
df: Any = None,
|
|
104
|
+
cs_id: Sequence[str] | str | None = None,
|
|
105
|
+
ts_id: str | None = None,
|
|
106
|
+
df_def: pd.DataFrame | None = None,
|
|
107
|
+
var_def: pd.DataFrame | None = None,
|
|
108
|
+
config_list: dict | None = None,
|
|
109
|
+
*,
|
|
110
|
+
title: str = "ExPdPy - Explore your data!",
|
|
111
|
+
df_name: str | Sequence[str] | None = None,
|
|
112
|
+
components: Any = None,
|
|
113
|
+
factor_cutoff: int = 10,
|
|
114
|
+
export_nb_option: bool = True,
|
|
115
|
+
save_settings_option: bool = True,
|
|
116
|
+
store_encrypted: bool = False,
|
|
117
|
+
key_phrase: str = "What a wonderful key",
|
|
118
|
+
run: bool = True,
|
|
119
|
+
**run_kwargs: Any,
|
|
120
|
+
) -> App:
|
|
121
|
+
"""Launch (or build) the interactive ExPdPy app.
|
|
122
|
+
|
|
123
|
+
Parameters
|
|
124
|
+
----------
|
|
125
|
+
df
|
|
126
|
+
A :class:`pandas.DataFrame`, a mapping of name->DataFrame, a list of DataFrames, or
|
|
127
|
+
``None`` to start with an upload dialog.
|
|
128
|
+
cs_id, ts_id
|
|
129
|
+
Cross-sectional / time-series identifier column name(s). Overridden by ``df_def``.
|
|
130
|
+
df_def
|
|
131
|
+
Optional variable-definition frame (columns ``var_name``, ``var_def``, ``type``)
|
|
132
|
+
used to identify the panel dimensions.
|
|
133
|
+
var_def
|
|
134
|
+
Optional analysis-sample variable definitions (advanced mode). Each ``var_def`` is a
|
|
135
|
+
safe expression evaluated to build the analysis sample.
|
|
136
|
+
config_list
|
|
137
|
+
Optional startup configuration (see :func:`expdpy.data.get_config`).
|
|
138
|
+
title
|
|
139
|
+
App title.
|
|
140
|
+
df_name
|
|
141
|
+
Display name(s) for the provided sample(s).
|
|
142
|
+
components
|
|
143
|
+
Ordered list (or ``{name: bool}`` mapping) selecting which components to show.
|
|
144
|
+
factor_cutoff
|
|
145
|
+
Numeric columns with at most this many unique values are treated as factors.
|
|
146
|
+
export_nb_option, save_settings_option
|
|
147
|
+
Enable the notebook-export and config save/load controls.
|
|
148
|
+
store_encrypted, key_phrase
|
|
149
|
+
Encrypt saved configurations with a Fernet key derived from ``key_phrase``.
|
|
150
|
+
run
|
|
151
|
+
If ``True`` (default), start the app server; otherwise return the :class:`shiny.App`.
|
|
152
|
+
**run_kwargs
|
|
153
|
+
Forwarded to :meth:`shiny.App.run` (e.g. ``port``, ``host``, ``launch_browser``).
|
|
154
|
+
|
|
155
|
+
Returns
|
|
156
|
+
-------
|
|
157
|
+
shiny.App
|
|
158
|
+
The constructed app (also returned when ``run=True`` after the server stops).
|
|
159
|
+
"""
|
|
160
|
+
from shiny import App, reactive, render, req, ui
|
|
161
|
+
from shinywidgets import render_plotly
|
|
162
|
+
|
|
163
|
+
samples = _normalize_samples(df, df_name)
|
|
164
|
+
cs_list, ts = _resolve_ids(df_def, cs_id, ts_id)
|
|
165
|
+
active = _active_components(components, ts)
|
|
166
|
+
base_cfg = parse_config(config_list)
|
|
167
|
+
|
|
168
|
+
# ------------------------------------------------------------------ UI ---
|
|
169
|
+
sidebar_items: list[Any] = [ui.h4("ExPdPy")]
|
|
170
|
+
if len(samples) > 1:
|
|
171
|
+
sidebar_items.append(
|
|
172
|
+
ui.input_select(
|
|
173
|
+
"sample", "Sample", choices=list(samples), selected=next(iter(samples))
|
|
174
|
+
)
|
|
175
|
+
)
|
|
176
|
+
sidebar_items.append(
|
|
177
|
+
ui.input_file(
|
|
178
|
+
"upload",
|
|
179
|
+
"Upload data (CSV/Excel/parquet)",
|
|
180
|
+
accept=[".csv", ".xlsx", ".xls", ".parquet"],
|
|
181
|
+
)
|
|
182
|
+
)
|
|
183
|
+
sidebar_items.append(ui.output_ui("sidebar_controls"))
|
|
184
|
+
if save_settings_option:
|
|
185
|
+
sidebar_items += [
|
|
186
|
+
ui.hr(),
|
|
187
|
+
ui.download_button("download_config", "Save config"),
|
|
188
|
+
ui.input_file("upload_config", "Load config", accept=[".json", ".cfg"]),
|
|
189
|
+
]
|
|
190
|
+
if export_nb_option:
|
|
191
|
+
sidebar_items += [
|
|
192
|
+
ui.hr(),
|
|
193
|
+
ui.download_button("download_nb", "Export notebook + data"),
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
favicon = _favicon_data_uri()
|
|
197
|
+
head = (
|
|
198
|
+
[ui.head_content(ui.tags.link(rel="icon", type="image/svg+xml", href=favicon))]
|
|
199
|
+
if favicon
|
|
200
|
+
else []
|
|
201
|
+
)
|
|
202
|
+
app_ui = ui.page_sidebar(
|
|
203
|
+
ui.sidebar(*sidebar_items, width=320),
|
|
204
|
+
*head,
|
|
205
|
+
ui.output_ui("main_ui"),
|
|
206
|
+
title=title,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# -------------------------------------------------------------- server ---
|
|
210
|
+
def server(inp: Any, output: Any, session: Any) -> None:
|
|
211
|
+
cfg_state = reactive.value(dict(base_cfg))
|
|
212
|
+
uploaded = reactive.value(None)
|
|
213
|
+
|
|
214
|
+
@reactive.calc
|
|
215
|
+
def base_df() -> pd.DataFrame | None:
|
|
216
|
+
up = uploaded()
|
|
217
|
+
if up is not None:
|
|
218
|
+
return up
|
|
219
|
+
if not samples:
|
|
220
|
+
return None
|
|
221
|
+
if len(samples) > 1:
|
|
222
|
+
return samples.get(
|
|
223
|
+
_g(inp, "sample", next(iter(samples))), next(iter(samples.values()))
|
|
224
|
+
)
|
|
225
|
+
return next(iter(samples.values()))
|
|
226
|
+
|
|
227
|
+
@reactive.calc
|
|
228
|
+
def analysis_sample() -> pd.DataFrame | None:
|
|
229
|
+
data = base_df()
|
|
230
|
+
if data is None:
|
|
231
|
+
return None
|
|
232
|
+
if var_def is not None:
|
|
233
|
+
data = apply_user_vars(data, var_def, cs_list, ts)
|
|
234
|
+
cfg = dict(cfg_state())
|
|
235
|
+
cfg["subset_factor"] = _g(inp, "subset_factor", "Full Sample")
|
|
236
|
+
cfg["subset_value"] = _g(inp, "subset_value", "All")
|
|
237
|
+
cfg["outlier_treatment"] = _g(inp, "outlier_treatment", "1")
|
|
238
|
+
return build_analysis_sample(data, cs_list, ts, cfg)
|
|
239
|
+
|
|
240
|
+
@reactive.calc
|
|
241
|
+
def var_cats():
|
|
242
|
+
sample = analysis_sample()
|
|
243
|
+
if sample is None:
|
|
244
|
+
return create_var_categories(pd.DataFrame())
|
|
245
|
+
return create_var_categories(
|
|
246
|
+
sample, cs_list, ts, factor_cutoff=factor_cutoff
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# --- in-app upload ---------------------------------------------------
|
|
250
|
+
@reactive.effect
|
|
251
|
+
@reactive.event(lambda: inp.upload())
|
|
252
|
+
def _on_upload():
|
|
253
|
+
from expdpy.app._upload import read_uploaded
|
|
254
|
+
|
|
255
|
+
files = inp.upload()
|
|
256
|
+
if files:
|
|
257
|
+
f = files[0]
|
|
258
|
+
uploaded.set(read_uploaded(f["datapath"], f["name"]))
|
|
259
|
+
|
|
260
|
+
# --- config load -----------------------------------------------------
|
|
261
|
+
@reactive.effect
|
|
262
|
+
@reactive.event(lambda: inp.upload_config())
|
|
263
|
+
def _on_config():
|
|
264
|
+
files = inp.upload_config()
|
|
265
|
+
if files:
|
|
266
|
+
with open(files[0]["datapath"], "rb") as fh:
|
|
267
|
+
raw = fh.read()
|
|
268
|
+
cfg = load_config(raw, key_phrase if store_encrypted else None)
|
|
269
|
+
cfg_state.set(parse_config(cfg))
|
|
270
|
+
|
|
271
|
+
# --- sidebar controls (subset + outlier) -----------------------------
|
|
272
|
+
@render.ui
|
|
273
|
+
def sidebar_controls():
|
|
274
|
+
vc = var_cats()
|
|
275
|
+
cfg = cfg_state()
|
|
276
|
+
factors = ["Full Sample", *vc.grouping]
|
|
277
|
+
controls = [
|
|
278
|
+
ui.input_select(
|
|
279
|
+
"subset_factor",
|
|
280
|
+
"Subset by",
|
|
281
|
+
choices=factors,
|
|
282
|
+
selected=cfg.get("subset_factor", "Full Sample"),
|
|
283
|
+
),
|
|
284
|
+
ui.output_ui("subset_value_ui"),
|
|
285
|
+
ui.input_select(
|
|
286
|
+
"outlier_treatment",
|
|
287
|
+
"Outlier treatment",
|
|
288
|
+
choices=_OUTLIER_CHOICES,
|
|
289
|
+
selected=str(cfg.get("outlier_treatment", "1")),
|
|
290
|
+
),
|
|
291
|
+
]
|
|
292
|
+
return ui.TagList(*controls)
|
|
293
|
+
|
|
294
|
+
@render.ui
|
|
295
|
+
def subset_value_ui():
|
|
296
|
+
sample = analysis_sample()
|
|
297
|
+
sf = _g(inp, "subset_factor", "Full Sample")
|
|
298
|
+
if (
|
|
299
|
+
sample is None
|
|
300
|
+
or sf in (None, "Full Sample")
|
|
301
|
+
or sf not in sample.columns
|
|
302
|
+
):
|
|
303
|
+
return ui.TagList()
|
|
304
|
+
levels = [
|
|
305
|
+
"All",
|
|
306
|
+
*[str(v) for v in sorted(sample[sf].dropna().unique(), key=str)],
|
|
307
|
+
]
|
|
308
|
+
return ui.input_select(
|
|
309
|
+
"subset_value", "Value", choices=levels, selected="All"
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
@render.ui
|
|
313
|
+
def fwl_focal_ui():
|
|
314
|
+
# Focal-variable choices track the live regression regressors (reg_x); the
|
|
315
|
+
# other regressors become the FWL controls. Resets if the focal is removed.
|
|
316
|
+
xs = _g(inp, "reg_x", []) or []
|
|
317
|
+
xs = list(xs) if isinstance(xs, (list, tuple)) else [xs]
|
|
318
|
+
xs = [x for x in xs if x not in (None, "", "None")]
|
|
319
|
+
if not xs:
|
|
320
|
+
return ui.help_text(
|
|
321
|
+
"Select one or more independent variables in the regression card above."
|
|
322
|
+
)
|
|
323
|
+
sel = cfg_state().get("fwl_focal")
|
|
324
|
+
if sel not in xs:
|
|
325
|
+
sel = xs[0]
|
|
326
|
+
return ui.input_select(
|
|
327
|
+
"fwl_focal", "Focal variable", choices=xs, selected=sel
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# --- main component area --------------------------------------------
|
|
331
|
+
@render.ui
|
|
332
|
+
def main_ui():
|
|
333
|
+
if base_df() is None:
|
|
334
|
+
return ui.div(
|
|
335
|
+
ui.h3("Welcome to ExPdPy"),
|
|
336
|
+
ui.p(
|
|
337
|
+
"Upload a data file (sidebar) with at least two numeric variables to begin."
|
|
338
|
+
),
|
|
339
|
+
)
|
|
340
|
+
vc = var_cats()
|
|
341
|
+
cfg = cfg_state()
|
|
342
|
+
cards = [_component_card(name, vc, cfg, ts) for name in active]
|
|
343
|
+
return ui.TagList(*[c for c in cards if c is not None])
|
|
344
|
+
|
|
345
|
+
# --- component renderers (defined for every component; render only
|
|
346
|
+
# when their output placeholder is present in the DOM) -----------
|
|
347
|
+
@render.ui
|
|
348
|
+
def t_descriptive_table():
|
|
349
|
+
return ui.HTML(comp.descriptive(analysis_sample()) or "")
|
|
350
|
+
|
|
351
|
+
@render.ui
|
|
352
|
+
def t_ext_obs():
|
|
353
|
+
return ui.HTML(
|
|
354
|
+
comp.ext_obs(analysis_sample(), _g(inp, "ext_obs_var")) or ""
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
@render.ui
|
|
358
|
+
def t_regression():
|
|
359
|
+
xs = _g(inp, "reg_x", []) or []
|
|
360
|
+
xs = list(xs) if isinstance(xs, (list, tuple)) else [xs]
|
|
361
|
+
args = (
|
|
362
|
+
analysis_sample(),
|
|
363
|
+
_g(inp, "reg_y"),
|
|
364
|
+
xs,
|
|
365
|
+
[_g(inp, "reg_fe1"), _g(inp, "reg_fe2")],
|
|
366
|
+
_cluster_vars(
|
|
367
|
+
_g(inp, "cluster", 1), _g(inp, "reg_fe1"), _g(inp, "reg_fe2")
|
|
368
|
+
),
|
|
369
|
+
)
|
|
370
|
+
html = comp.regression(*args)
|
|
371
|
+
if not html:
|
|
372
|
+
return ui.HTML("")
|
|
373
|
+
blocks = [ui.HTML(html)]
|
|
374
|
+
notes = comp.regression_notes(*args)
|
|
375
|
+
if notes:
|
|
376
|
+
interp_md, explain_md = notes
|
|
377
|
+
blocks.append(
|
|
378
|
+
ui.tags.details(
|
|
379
|
+
ui.tags.summary("📝 Plain-language interpretation"),
|
|
380
|
+
ui.markdown(interp_md),
|
|
381
|
+
)
|
|
382
|
+
)
|
|
383
|
+
blocks.append(
|
|
384
|
+
ui.tags.details(
|
|
385
|
+
ui.tags.summary("❓ What is this? (method explainer)"),
|
|
386
|
+
ui.markdown(explain_md),
|
|
387
|
+
)
|
|
388
|
+
)
|
|
389
|
+
return ui.TagList(*blocks)
|
|
390
|
+
|
|
391
|
+
@render_plotly
|
|
392
|
+
def w_corrplot():
|
|
393
|
+
fig = comp.corrplot(analysis_sample())
|
|
394
|
+
req(fig is not None)
|
|
395
|
+
return fig
|
|
396
|
+
|
|
397
|
+
@render_plotly
|
|
398
|
+
def w_histogram():
|
|
399
|
+
fig = comp.histogram(
|
|
400
|
+
analysis_sample(),
|
|
401
|
+
_g(inp, "hist_var"),
|
|
402
|
+
int(_g(inp, "hist_nr_of_breaks", 20)),
|
|
403
|
+
)
|
|
404
|
+
req(fig is not None)
|
|
405
|
+
return fig
|
|
406
|
+
|
|
407
|
+
@render_plotly
|
|
408
|
+
def w_bar_chart():
|
|
409
|
+
fig = comp.bar_chart(analysis_sample(), _g(inp, "bar_chart_var1"))
|
|
410
|
+
req(fig is not None)
|
|
411
|
+
return fig
|
|
412
|
+
|
|
413
|
+
@render_plotly
|
|
414
|
+
def w_missing_values():
|
|
415
|
+
fig = comp.missing(analysis_sample(), ts)
|
|
416
|
+
req(fig is not None)
|
|
417
|
+
return fig
|
|
418
|
+
|
|
419
|
+
@render_plotly
|
|
420
|
+
def w_scatter_plot():
|
|
421
|
+
fig = comp.scatter(
|
|
422
|
+
analysis_sample(),
|
|
423
|
+
_g(inp, "scatter_x"),
|
|
424
|
+
_g(inp, "scatter_y"),
|
|
425
|
+
_g(inp, "scatter_color"),
|
|
426
|
+
_g(inp, "scatter_size"),
|
|
427
|
+
bool(_g(inp, "scatter_loess", True)),
|
|
428
|
+
)
|
|
429
|
+
req(fig is not None)
|
|
430
|
+
return fig
|
|
431
|
+
|
|
432
|
+
@render_plotly
|
|
433
|
+
def w_fwl_plot():
|
|
434
|
+
xs = _g(inp, "reg_x", []) or []
|
|
435
|
+
xs = list(xs) if isinstance(xs, (list, tuple)) else [xs]
|
|
436
|
+
fig = comp.fwl_plot(
|
|
437
|
+
analysis_sample(),
|
|
438
|
+
_g(inp, "reg_y"),
|
|
439
|
+
xs,
|
|
440
|
+
_g(inp, "fwl_focal"),
|
|
441
|
+
[_g(inp, "reg_fe1"), _g(inp, "reg_fe2")],
|
|
442
|
+
_cluster_vars(
|
|
443
|
+
_g(inp, "cluster", 1), _g(inp, "reg_fe1"), _g(inp, "reg_fe2")
|
|
444
|
+
),
|
|
445
|
+
)
|
|
446
|
+
req(fig is not None)
|
|
447
|
+
return fig
|
|
448
|
+
|
|
449
|
+
@render_plotly
|
|
450
|
+
def w_event_study():
|
|
451
|
+
unit = cs_list[0] if cs_list else None
|
|
452
|
+
fig = comp.event_study(
|
|
453
|
+
analysis_sample(),
|
|
454
|
+
_g(inp, "es_outcome"),
|
|
455
|
+
unit,
|
|
456
|
+
ts,
|
|
457
|
+
_g(inp, "es_cohort"),
|
|
458
|
+
_g(inp, "es_estimator", "did2s"),
|
|
459
|
+
)
|
|
460
|
+
req(fig is not None)
|
|
461
|
+
return fig
|
|
462
|
+
|
|
463
|
+
@render.ui
|
|
464
|
+
def t_event_study_notes():
|
|
465
|
+
unit = cs_list[0] if cs_list else None
|
|
466
|
+
notes = comp.event_study_notes(
|
|
467
|
+
analysis_sample(),
|
|
468
|
+
_g(inp, "es_outcome"),
|
|
469
|
+
unit,
|
|
470
|
+
ts,
|
|
471
|
+
_g(inp, "es_cohort"),
|
|
472
|
+
_g(inp, "es_estimator", "did2s"),
|
|
473
|
+
)
|
|
474
|
+
if not notes:
|
|
475
|
+
return ui.HTML("")
|
|
476
|
+
interp_md, explain_md = notes
|
|
477
|
+
return ui.TagList(
|
|
478
|
+
ui.tags.details(
|
|
479
|
+
ui.tags.summary("📝 Plain-language interpretation"),
|
|
480
|
+
ui.markdown(interp_md),
|
|
481
|
+
),
|
|
482
|
+
ui.tags.details(
|
|
483
|
+
ui.tags.summary("❓ What is this? (method explainer)"),
|
|
484
|
+
ui.markdown(explain_md),
|
|
485
|
+
),
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
@render.ui
|
|
489
|
+
def t_panel_models():
|
|
490
|
+
entity = cs_list[0] if cs_list else None
|
|
491
|
+
idvs = _g(inp, "pm_idvs", []) or []
|
|
492
|
+
idvs = list(idvs) if isinstance(idvs, (list, tuple)) else [idvs]
|
|
493
|
+
args = (analysis_sample(), _g(inp, "pm_dv"), idvs, entity, ts)
|
|
494
|
+
html = comp.panel_models(*args)
|
|
495
|
+
if not html:
|
|
496
|
+
return ui.HTML("")
|
|
497
|
+
blocks = [ui.HTML(html)]
|
|
498
|
+
notes = comp.panel_models_notes(*args)
|
|
499
|
+
if notes:
|
|
500
|
+
panel_md, hausman_md = notes
|
|
501
|
+
blocks.append(
|
|
502
|
+
ui.tags.details(
|
|
503
|
+
ui.tags.summary("📝 Plain-language interpretation"),
|
|
504
|
+
ui.markdown(panel_md),
|
|
505
|
+
)
|
|
506
|
+
)
|
|
507
|
+
blocks.append(
|
|
508
|
+
ui.tags.details(
|
|
509
|
+
ui.tags.summary("🔬 Hausman test (fixed vs random effects)"),
|
|
510
|
+
ui.markdown(hausman_md),
|
|
511
|
+
)
|
|
512
|
+
)
|
|
513
|
+
return ui.TagList(*blocks)
|
|
514
|
+
|
|
515
|
+
@render_plotly
|
|
516
|
+
def w_trend_graph():
|
|
517
|
+
variables = [_g(inp, f"trend_graph_var{i}") for i in (1, 2, 3)]
|
|
518
|
+
fig = comp.trend(analysis_sample(), ts, variables)
|
|
519
|
+
req(fig is not None)
|
|
520
|
+
return fig
|
|
521
|
+
|
|
522
|
+
@render_plotly
|
|
523
|
+
def w_quantile_trend_graph():
|
|
524
|
+
fig = comp.quantile_trend(
|
|
525
|
+
analysis_sample(), ts, _g(inp, "quantile_trend_graph_var")
|
|
526
|
+
)
|
|
527
|
+
req(fig is not None)
|
|
528
|
+
return fig
|
|
529
|
+
|
|
530
|
+
@render_plotly
|
|
531
|
+
def w_by_group_bar_graph():
|
|
532
|
+
fig = comp.by_group_bar(
|
|
533
|
+
analysis_sample(), _g(inp, "bgbg_byvar"), _g(inp, "bgbg_var")
|
|
534
|
+
)
|
|
535
|
+
req(fig is not None)
|
|
536
|
+
return fig
|
|
537
|
+
|
|
538
|
+
@render_plotly
|
|
539
|
+
def w_by_group_violin_graph():
|
|
540
|
+
fig = comp.by_group_violin(
|
|
541
|
+
analysis_sample(), _g(inp, "bgvg_byvar"), _g(inp, "bgvg_var")
|
|
542
|
+
)
|
|
543
|
+
req(fig is not None)
|
|
544
|
+
return fig
|
|
545
|
+
|
|
546
|
+
@render_plotly
|
|
547
|
+
def w_by_group_trend_graph():
|
|
548
|
+
fig = comp.by_group_trend(
|
|
549
|
+
analysis_sample(), ts, _g(inp, "bgtg_byvar"), _g(inp, "bgtg_var")
|
|
550
|
+
)
|
|
551
|
+
req(fig is not None)
|
|
552
|
+
return fig
|
|
553
|
+
|
|
554
|
+
# --- downloads -------------------------------------------------------
|
|
555
|
+
def _current_config() -> dict:
|
|
556
|
+
cfg = dict(cfg_state())
|
|
557
|
+
for key in _CONFIG_INPUT_KEYS:
|
|
558
|
+
val = _g(inp, key, None)
|
|
559
|
+
if val is not None:
|
|
560
|
+
cfg[key] = list(val) if isinstance(val, tuple) else val
|
|
561
|
+
return cfg
|
|
562
|
+
|
|
563
|
+
@render.download(filename="expdpy_config.json")
|
|
564
|
+
def download_config():
|
|
565
|
+
yield dump_config(
|
|
566
|
+
_current_config(), key_phrase if store_encrypted else None
|
|
567
|
+
)
|
|
568
|
+
|
|
569
|
+
@render.download(filename="ExPdPy_analysis.zip")
|
|
570
|
+
def download_nb():
|
|
571
|
+
sample = analysis_sample()
|
|
572
|
+
if sample is not None:
|
|
573
|
+
yield build_export_zip(_current_config(), active, sample, ts)
|
|
574
|
+
|
|
575
|
+
app = App(app_ui, server)
|
|
576
|
+
if run:
|
|
577
|
+
app.run(**run_kwargs)
|
|
578
|
+
return app
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
# Input ids that participate in saved configurations.
|
|
582
|
+
_CONFIG_INPUT_KEYS = [
|
|
583
|
+
"subset_factor",
|
|
584
|
+
"subset_value",
|
|
585
|
+
"outlier_treatment",
|
|
586
|
+
"hist_var",
|
|
587
|
+
"hist_nr_of_breaks",
|
|
588
|
+
"ext_obs_var",
|
|
589
|
+
"bar_chart_var1",
|
|
590
|
+
"scatter_x",
|
|
591
|
+
"scatter_y",
|
|
592
|
+
"scatter_color",
|
|
593
|
+
"scatter_size",
|
|
594
|
+
"scatter_loess",
|
|
595
|
+
"trend_graph_var1",
|
|
596
|
+
"trend_graph_var2",
|
|
597
|
+
"trend_graph_var3",
|
|
598
|
+
"quantile_trend_graph_var",
|
|
599
|
+
"bgbg_byvar",
|
|
600
|
+
"bgbg_var",
|
|
601
|
+
"bgvg_byvar",
|
|
602
|
+
"bgvg_var",
|
|
603
|
+
"bgtg_byvar",
|
|
604
|
+
"bgtg_var",
|
|
605
|
+
"reg_y",
|
|
606
|
+
"reg_x",
|
|
607
|
+
"reg_fe1",
|
|
608
|
+
"reg_fe2",
|
|
609
|
+
"cluster",
|
|
610
|
+
"fwl_focal",
|
|
611
|
+
"es_outcome",
|
|
612
|
+
"es_cohort",
|
|
613
|
+
"es_estimator",
|
|
614
|
+
"pm_dv",
|
|
615
|
+
"pm_idvs",
|
|
616
|
+
]
|
|
617
|
+
|
|
618
|
+
|
|
619
|
+
def _cluster_vars(choice: Any, fe1: str | None, fe2: str | None) -> list[str]:
|
|
620
|
+
"""Translate the cluster radio (1-4) into a list of cluster variables."""
|
|
621
|
+
fes = [f for f in (fe1, fe2) if f and f != "None"]
|
|
622
|
+
try:
|
|
623
|
+
choice = int(choice)
|
|
624
|
+
except (TypeError, ValueError):
|
|
625
|
+
choice = 1
|
|
626
|
+
if choice <= 1:
|
|
627
|
+
return []
|
|
628
|
+
return fes[: choice - 1]
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _sel(
|
|
632
|
+
id_: str, label: str, choices: list[str], cfg: dict, *, none: bool = False
|
|
633
|
+
) -> Any:
|
|
634
|
+
from shiny import ui
|
|
635
|
+
|
|
636
|
+
opts = (["None", *choices] if none else choices) or ["None"]
|
|
637
|
+
selected = cfg.get(id_)
|
|
638
|
+
if selected not in opts:
|
|
639
|
+
selected = opts[0]
|
|
640
|
+
return ui.input_select(id_, label, choices=opts, selected=selected)
|
|
641
|
+
|
|
642
|
+
|
|
643
|
+
def _component_card(name: str, vc, cfg: dict, ts: str | None) -> Any:
|
|
644
|
+
"""Build the controls + output card for a single component."""
|
|
645
|
+
from shiny import ui
|
|
646
|
+
from shinywidgets import output_widget
|
|
647
|
+
|
|
648
|
+
numeric = vc.numeric_logical or ["None"]
|
|
649
|
+
factors = vc.grouping or ["None"]
|
|
650
|
+
fe_choices = vc.fe_choices or ["None"]
|
|
651
|
+
kind = COMPONENT_KIND[name]
|
|
652
|
+
out = output_widget(f"w_{name}") if kind == "plotly" else ui.output_ui(f"t_{name}")
|
|
653
|
+
|
|
654
|
+
controls: list[Any] = []
|
|
655
|
+
if name == "histogram":
|
|
656
|
+
controls = [
|
|
657
|
+
_sel("hist_var", "Variable", numeric, cfg),
|
|
658
|
+
ui.input_slider(
|
|
659
|
+
"hist_nr_of_breaks",
|
|
660
|
+
"Bins",
|
|
661
|
+
5,
|
|
662
|
+
100,
|
|
663
|
+
int(cfg.get("hist_nr_of_breaks", 20)),
|
|
664
|
+
),
|
|
665
|
+
]
|
|
666
|
+
elif name == "ext_obs":
|
|
667
|
+
controls = [_sel("ext_obs_var", "Variable", numeric, cfg)]
|
|
668
|
+
elif name == "bar_chart":
|
|
669
|
+
controls = [_sel("bar_chart_var1", "Variable", factors, cfg)]
|
|
670
|
+
elif name == "scatter_plot":
|
|
671
|
+
controls = [
|
|
672
|
+
_sel("scatter_x", "X", numeric, cfg),
|
|
673
|
+
_sel("scatter_y", "Y", numeric, cfg),
|
|
674
|
+
_sel("scatter_color", "Color", factors + numeric, cfg, none=True),
|
|
675
|
+
_sel("scatter_size", "Size", numeric, cfg, none=True),
|
|
676
|
+
ui.input_checkbox(
|
|
677
|
+
"scatter_loess", "LOESS", bool(cfg.get("scatter_loess", True))
|
|
678
|
+
),
|
|
679
|
+
]
|
|
680
|
+
elif name == "trend_graph":
|
|
681
|
+
controls = [
|
|
682
|
+
_sel(f"trend_graph_var{i}", f"Variable {i}", numeric, cfg, none=(i > 1))
|
|
683
|
+
for i in (1, 2, 3)
|
|
684
|
+
]
|
|
685
|
+
elif name == "quantile_trend_graph":
|
|
686
|
+
controls = [_sel("quantile_trend_graph_var", "Variable", numeric, cfg)]
|
|
687
|
+
elif name == "by_group_bar_graph":
|
|
688
|
+
controls = [
|
|
689
|
+
_sel("bgbg_byvar", "Group by", factors, cfg),
|
|
690
|
+
_sel("bgbg_var", "Variable", numeric, cfg),
|
|
691
|
+
]
|
|
692
|
+
elif name == "by_group_violin_graph":
|
|
693
|
+
controls = [
|
|
694
|
+
_sel("bgvg_byvar", "Group by", factors, cfg),
|
|
695
|
+
_sel("bgvg_var", "Variable", numeric, cfg),
|
|
696
|
+
]
|
|
697
|
+
elif name == "by_group_trend_graph":
|
|
698
|
+
controls = [
|
|
699
|
+
_sel("bgtg_byvar", "Group by", factors, cfg),
|
|
700
|
+
_sel("bgtg_var", "Variable", numeric, cfg),
|
|
701
|
+
]
|
|
702
|
+
elif name == "regression":
|
|
703
|
+
controls = [
|
|
704
|
+
_sel("reg_y", "Dependent", numeric, cfg),
|
|
705
|
+
ui.input_selectize(
|
|
706
|
+
"reg_x",
|
|
707
|
+
"Independent",
|
|
708
|
+
choices=numeric,
|
|
709
|
+
multiple=True,
|
|
710
|
+
selected=[c for c in (cfg.get("reg_x") or []) if c in numeric],
|
|
711
|
+
),
|
|
712
|
+
_sel("reg_fe1", "Fixed effect 1", fe_choices, cfg, none=True),
|
|
713
|
+
_sel("reg_fe2", "Fixed effect 2", fe_choices, cfg, none=True),
|
|
714
|
+
ui.input_select(
|
|
715
|
+
"cluster",
|
|
716
|
+
"Cluster SE",
|
|
717
|
+
choices={"1": "None", "2": "FE 1", "3": "FE 1 + FE 2"},
|
|
718
|
+
selected=str(cfg.get("cluster", 1)),
|
|
719
|
+
),
|
|
720
|
+
]
|
|
721
|
+
elif name == "fwl_plot":
|
|
722
|
+
controls = [
|
|
723
|
+
ui.markdown(
|
|
724
|
+
"**Frisch-Waugh-Lovell plot.** Residualizes the dependent variable and "
|
|
725
|
+
"the focal regressor on the *other* regressors **and** the fixed effects "
|
|
726
|
+
"chosen in the regression above, then plots the two residuals. The fitted "
|
|
727
|
+
"slope equals the focal coefficient in that regression."
|
|
728
|
+
),
|
|
729
|
+
ui.output_ui("fwl_focal_ui"),
|
|
730
|
+
]
|
|
731
|
+
elif name == "event_study":
|
|
732
|
+
controls = [
|
|
733
|
+
_sel("es_outcome", "Outcome", numeric, cfg),
|
|
734
|
+
_sel("es_cohort", "Cohort (first-treated period)", factors, cfg),
|
|
735
|
+
ui.input_select(
|
|
736
|
+
"es_estimator",
|
|
737
|
+
"Estimator",
|
|
738
|
+
choices={
|
|
739
|
+
"did2s": "Gardner (did2s)",
|
|
740
|
+
"twfe": "Two-way FE",
|
|
741
|
+
"saturated": "Sun-Abraham",
|
|
742
|
+
"lpdid": "Local projections",
|
|
743
|
+
},
|
|
744
|
+
selected=str(cfg.get("es_estimator", "did2s")),
|
|
745
|
+
),
|
|
746
|
+
]
|
|
747
|
+
# A plotly card plus an interpretation/explainer block beneath the figure.
|
|
748
|
+
return ui.card(
|
|
749
|
+
ui.card_header("Event Study"),
|
|
750
|
+
*controls,
|
|
751
|
+
out,
|
|
752
|
+
ui.output_ui("t_event_study_notes"),
|
|
753
|
+
)
|
|
754
|
+
elif name == "panel_models":
|
|
755
|
+
controls = [
|
|
756
|
+
_sel("pm_dv", "Dependent", numeric, cfg),
|
|
757
|
+
ui.input_selectize(
|
|
758
|
+
"pm_idvs",
|
|
759
|
+
"Independent",
|
|
760
|
+
choices=numeric,
|
|
761
|
+
multiple=True,
|
|
762
|
+
selected=[c for c in (cfg.get("pm_idvs") or []) if c in numeric],
|
|
763
|
+
),
|
|
764
|
+
]
|
|
765
|
+
# descriptive_table, corrplot, missing_values need no selectors.
|
|
766
|
+
return ui.card(ui.card_header(name.replace("_", " ").title()), *controls, out)
|