expdpy 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. expdpy/__init__.py +147 -0
  2. expdpy/_assets/favicon.png +0 -0
  3. expdpy/_assets/favicon.svg +12 -0
  4. expdpy/_assets/logo-navbar.svg +14 -0
  5. expdpy/_assets/logo.png +0 -0
  6. expdpy/_assets/logo.svg +14 -0
  7. expdpy/_corr.py +80 -0
  8. expdpy/_estimation/__init__.py +38 -0
  9. expdpy/_estimation/_capture.py +26 -0
  10. expdpy/_estimation/_fit.py +56 -0
  11. expdpy/_estimation/_formula.py +50 -0
  12. expdpy/_estimation/_results.py +30 -0
  13. expdpy/_estimation/_spec.py +112 -0
  14. expdpy/_estimation/_tidy.py +35 -0
  15. expdpy/_estimation/_vcov.py +52 -0
  16. expdpy/_theme.py +201 -0
  17. expdpy/_types.py +505 -0
  18. expdpy/_validation.py +43 -0
  19. expdpy/app/__init__.py +766 -0
  20. expdpy/app/_components.py +282 -0
  21. expdpy/app/_config_io.py +63 -0
  22. expdpy/app/_export_nb.py +234 -0
  23. expdpy/app/_sample.py +124 -0
  24. expdpy/app/_state.py +102 -0
  25. expdpy/app/_udv.py +179 -0
  26. expdpy/app/_upload.py +43 -0
  27. expdpy/app/_varcat.py +100 -0
  28. expdpy/by_group.py +313 -0
  29. expdpy/coefplot.py +236 -0
  30. expdpy/correlation.py +152 -0
  31. expdpy/data/__init__.py +103 -0
  32. expdpy/data/expdpy_config_kuznets.json +54 -0
  33. expdpy/data/gapminder.parquet +0 -0
  34. expdpy/data/gapminder_data_def.parquet +0 -0
  35. expdpy/data/kuznets.parquet +0 -0
  36. expdpy/data/kuznets_data_def.parquet +0 -0
  37. expdpy/data/staggered_did.parquet +0 -0
  38. expdpy/data/staggered_did_data_def.parquet +0 -0
  39. expdpy/did.py +405 -0
  40. expdpy/distributions.py +130 -0
  41. expdpy/estimation.py +282 -0
  42. expdpy/fwl.py +284 -0
  43. expdpy/inference.py +92 -0
  44. expdpy/missing.py +117 -0
  45. expdpy/outliers.py +180 -0
  46. expdpy/panel_models.py +244 -0
  47. expdpy/pedagogy/__init__.py +43 -0
  48. expdpy/pedagogy/_format.py +88 -0
  49. expdpy/pedagogy/_interpret.py +355 -0
  50. expdpy/pedagogy/_mixin.py +44 -0
  51. expdpy/pedagogy/_registry.py +121 -0
  52. expdpy/pedagogy/_text/__init__.py +11 -0
  53. expdpy/pedagogy/_text/causal.py +65 -0
  54. expdpy/pedagogy/_text/correlation.py +77 -0
  55. expdpy/pedagogy/_text/outliers.py +53 -0
  56. expdpy/pedagogy/_text/regression.py +257 -0
  57. expdpy/pedagogy/_text/tables.py +51 -0
  58. expdpy/postestimation.py +202 -0
  59. expdpy/py.typed +0 -0
  60. expdpy/regression.py +201 -0
  61. expdpy/sandbox.py +307 -0
  62. expdpy/scatter.py +207 -0
  63. expdpy/streamlit_app/__init__.py +106 -0
  64. expdpy/streamlit_app/_context.py +99 -0
  65. expdpy/streamlit_app/_entry.py +57 -0
  66. expdpy/streamlit_app/_handoff.py +149 -0
  67. expdpy/streamlit_app/_launcher.py +103 -0
  68. expdpy/streamlit_app/_pages.py +424 -0
  69. expdpy/streamlit_app/_pipeline.py +99 -0
  70. expdpy/streamlit_app/_render.py +221 -0
  71. expdpy/streamlit_app/_run.py +9 -0
  72. expdpy/streamlit_app/_sidebar.py +258 -0
  73. expdpy/streamlit_app/_widgets.py +95 -0
  74. expdpy/tables.py +348 -0
  75. expdpy/trends.py +263 -0
  76. expdpy-0.2.0.dist-info/METADATA +203 -0
  77. expdpy-0.2.0.dist-info/RECORD +80 -0
  78. expdpy-0.2.0.dist-info/WHEEL +4 -0
  79. expdpy-0.2.0.dist-info/entry_points.txt +2 -0
  80. expdpy-0.2.0.dist-info/licenses/LICENSE +25 -0
expdpy/__init__.py ADDED
@@ -0,0 +1,147 @@
1
+ """expdpy — Explore your panel data interactively.
2
+
3
+ A Python port of the ExPanDaR R package (Joachim Gassen, TRR 266). Provides a set
4
+ of analytical functions for exploratory analysis of panel and cross-sectional data
5
+ (descriptive tables, correlations, time trends, scatter plots, regression tables)
6
+ returning interactive Plotly figures and Great Tables / pyfixest output, plus the
7
+ ``ExPdPy`` interactive app (Shiny for Python).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from expdpy._types import (
13
+ BarChartResult,
14
+ ByGroupBarGraphResult,
15
+ ByGroupTrendGraphResult,
16
+ CoefficientPlotResult,
17
+ CorrelationGraphResult,
18
+ CorrelationTableResult,
19
+ DescriptiveTableResult,
20
+ EstimationResult,
21
+ EventStudyResult,
22
+ ExtObsTableResult,
23
+ FixefPlotResult,
24
+ FWLPlotResult,
25
+ HausmanTestResult,
26
+ HistogramResult,
27
+ JointTestResult,
28
+ PanelViewResult,
29
+ PredictionResult,
30
+ QuantileTrendGraphResult,
31
+ RegressionTableResult,
32
+ RobustInferenceResult,
33
+ SandboxResult,
34
+ TrendGraphResult,
35
+ )
36
+ from expdpy.by_group import (
37
+ prepare_by_group_bar_graph,
38
+ prepare_by_group_trend_graph,
39
+ prepare_by_group_violin_graph,
40
+ )
41
+ from expdpy.coefplot import prepare_coefficient_plot
42
+ from expdpy.correlation import prepare_correlation_graph
43
+ from expdpy.did import prepare_event_study, prepare_panel_view
44
+ from expdpy.distributions import prepare_bar_chart, prepare_histogram
45
+ from expdpy.estimation import prepare_estimation
46
+ from expdpy.fwl import prepare_fwl_plot
47
+ from expdpy.inference import prepare_robust_inference
48
+ from expdpy.missing import prepare_missing_values_graph
49
+ from expdpy.outliers import treat_outliers
50
+ from expdpy.panel_models import prepare_hausman_test, prepare_panel_table
51
+ from expdpy.pedagogy import Explainer, explain, list_topics
52
+ from expdpy.postestimation import (
53
+ prepare_fixef_plot,
54
+ prepare_joint_test,
55
+ prepare_predictions,
56
+ )
57
+ from expdpy.regression import prepare_regression_table
58
+ from expdpy.sandbox import (
59
+ sandbox_clustering_se,
60
+ sandbox_omitted_variable_bias,
61
+ sandbox_pooled_vs_fixed_effects,
62
+ )
63
+ from expdpy.scatter import prepare_scatter_plot
64
+ from expdpy.tables import (
65
+ prepare_correlation_table,
66
+ prepare_descriptive_table,
67
+ prepare_ext_obs_table,
68
+ )
69
+ from expdpy.trends import prepare_quantile_trend_graph, prepare_trend_graph
70
+
71
+ __version__ = "0.2.0"
72
+
73
+ __all__ = [
74
+ # outliers
75
+ "treat_outliers",
76
+ # tables
77
+ "prepare_descriptive_table",
78
+ "prepare_correlation_table",
79
+ "prepare_ext_obs_table",
80
+ # correlation graph
81
+ "prepare_correlation_graph",
82
+ # trends
83
+ "prepare_trend_graph",
84
+ "prepare_quantile_trend_graph",
85
+ # by group
86
+ "prepare_by_group_bar_graph",
87
+ "prepare_by_group_trend_graph",
88
+ "prepare_by_group_violin_graph",
89
+ # distributions
90
+ "prepare_histogram",
91
+ "prepare_bar_chart",
92
+ # missing
93
+ "prepare_missing_values_graph",
94
+ # scatter
95
+ "prepare_scatter_plot",
96
+ # regression
97
+ "prepare_regression_table",
98
+ # estimation (IV / Poisson / GLM / model comparison)
99
+ "prepare_estimation",
100
+ # post-estimation
101
+ "prepare_fixef_plot",
102
+ "prepare_predictions",
103
+ "prepare_joint_test",
104
+ # robust inference
105
+ "prepare_robust_inference",
106
+ # fwl plot
107
+ "prepare_fwl_plot",
108
+ # coefficient plot
109
+ "prepare_coefficient_plot",
110
+ # event study / staggered DiD
111
+ "prepare_event_study",
112
+ "prepare_panel_view",
113
+ # concept sandboxes
114
+ "sandbox_omitted_variable_bias",
115
+ "sandbox_pooled_vs_fixed_effects",
116
+ "sandbox_clustering_se",
117
+ # panel models (linearmodels)
118
+ "prepare_panel_table",
119
+ "prepare_hausman_test",
120
+ # pedagogy
121
+ "explain",
122
+ "list_topics",
123
+ "Explainer",
124
+ # result types
125
+ "DescriptiveTableResult",
126
+ "CorrelationTableResult",
127
+ "CorrelationGraphResult",
128
+ "ExtObsTableResult",
129
+ "TrendGraphResult",
130
+ "QuantileTrendGraphResult",
131
+ "ByGroupBarGraphResult",
132
+ "ByGroupTrendGraphResult",
133
+ "HistogramResult",
134
+ "BarChartResult",
135
+ "RegressionTableResult",
136
+ "FWLPlotResult",
137
+ "CoefficientPlotResult",
138
+ "EstimationResult",
139
+ "FixefPlotResult",
140
+ "PredictionResult",
141
+ "JointTestResult",
142
+ "RobustInferenceResult",
143
+ "EventStudyResult",
144
+ "PanelViewResult",
145
+ "SandboxResult",
146
+ "HausmanTestResult",
147
+ ]
Binary file
@@ -0,0 +1,12 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy">
2
+ <title>expdpy</title>
3
+ <!-- Solid blue tile so the mark reads at favicon sizes -->
4
+ <rect x="0" y="0" width="64" height="64" rx="14" fill="#1f77b4"/>
5
+ <!-- Same N-shaped Kuznets curve, white, fitted to the tile -->
6
+ <path d="M12 49 C17 36 22 16 27 13 C32 10 38 42 42 44 C46 46 51 21 56 12"
7
+ fill="none" stroke="#ffffff" stroke-width="5"
8
+ stroke-linecap="round" stroke-linejoin="round"/>
9
+ <circle cx="27" cy="13" r="3" fill="#ffffff"/>
10
+ <circle cx="42" cy="44" r="3" fill="#ffffff"/>
11
+ <circle cx="56" cy="12" r="3" fill="#ffffff"/>
12
+ </svg>
@@ -0,0 +1,14 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy logo">
2
+ <title>expdpy</title>
3
+ <!-- Faint axis (baseline + left axis) — translucent white for chart context on a blue navbar -->
4
+ <path d="M12 8 V54 H56" fill="none" stroke="#ffffff" stroke-opacity="0.5" stroke-width="2.5"
5
+ stroke-linecap="round" stroke-linejoin="round"/>
6
+ <!-- N-shaped Kuznets curve: rise, fall, rise -->
7
+ <path d="M14 48 C19 36 23 18 27 15 C32 12 37 41 41 43 C45 45 50 22 54 13"
8
+ fill="none" stroke="#ffffff" stroke-width="4"
9
+ stroke-linecap="round" stroke-linejoin="round"/>
10
+ <!-- Data points on the turning peaks/trough -->
11
+ <circle cx="27" cy="15" r="2.6" fill="#ffffff"/>
12
+ <circle cx="41" cy="43" r="2.6" fill="#ffffff"/>
13
+ <circle cx="54" cy="13" r="2.6" fill="#ffffff"/>
14
+ </svg>
Binary file
@@ -0,0 +1,14 @@
1
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64" role="img" aria-label="expdpy logo">
2
+ <title>expdpy</title>
3
+ <!-- Faint axis (baseline + left axis) for chart context -->
4
+ <path d="M12 8 V54 H56" fill="none" stroke="#cfe0f1" stroke-width="2.5"
5
+ stroke-linecap="round" stroke-linejoin="round"/>
6
+ <!-- N-shaped Kuznets curve: rise, fall, rise -->
7
+ <path d="M14 48 C19 36 23 18 27 15 C32 12 37 41 41 43 C45 45 50 22 54 13"
8
+ fill="none" stroke="#1f77b4" stroke-width="4"
9
+ stroke-linecap="round" stroke-linejoin="round"/>
10
+ <!-- Data points on the turning peaks/trough -->
11
+ <circle cx="27" cy="15" r="2.6" fill="#1f77b4"/>
12
+ <circle cx="41" cy="43" r="2.6" fill="#1f77b4"/>
13
+ <circle cx="54" cy="13" r="2.6" fill="#1f77b4"/>
14
+ </svg>
expdpy/_corr.py ADDED
@@ -0,0 +1,80 @@
1
+ """Pairwise correlation engine shared by the correlation table and graph.
2
+
3
+ Faithful port of ExPanDaR's internal ``cor_mat()`` helper: for every pair of columns it
4
+ computes the correlation, two-sided p-value and the number of *pairwise* complete
5
+ observations (rows finite in both columns), placing Pearson or Spearman results depending
6
+ on ``method``. p-values use the asymptotic approximation (R's ``cor.test(..., exact = FALSE)``).
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import warnings
12
+ from dataclasses import dataclass
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from scipy import stats
17
+
18
+ __all__ = ["CorMat", "cor_mat"]
19
+
20
+
21
+ @dataclass(frozen=True)
22
+ class CorMat:
23
+ """Square correlation/p-value/observation-count matrices (aligned by column name)."""
24
+
25
+ r: pd.DataFrame
26
+ p: pd.DataFrame
27
+ n: pd.DataFrame
28
+
29
+
30
+ def cor_mat(df: pd.DataFrame, method: str) -> CorMat:
31
+ """Compute a pairwise correlation matrix.
32
+
33
+ Parameters
34
+ ----------
35
+ df
36
+ Data frame of numeric/logical columns.
37
+ method
38
+ ``"pearson"`` or ``"spearman"``.
39
+
40
+ Returns
41
+ -------
42
+ CorMat
43
+ ``r`` (coefficients, diagonal 1.0), ``p`` (p-values, diagonal 0.0) and ``n``
44
+ (pairwise observation counts, diagonal = non-missing count per column).
45
+ """
46
+ if method not in ("pearson", "spearman"):
47
+ raise ValueError("method must be 'pearson' or 'spearman'")
48
+
49
+ cols = list(df.columns)
50
+ mat = df.to_numpy(dtype=float)
51
+ n_cols = mat.shape[1]
52
+
53
+ r = np.full((n_cols, n_cols), np.nan)
54
+ p = np.full((n_cols, n_cols), np.nan)
55
+ n = np.full((n_cols, n_cols), np.nan)
56
+
57
+ finite = np.isfinite(mat)
58
+ np.fill_diagonal(r, 1.0)
59
+ np.fill_diagonal(p, 0.0)
60
+ for k in range(n_cols):
61
+ n[k, k] = int(finite[:, k].sum())
62
+
63
+ corr_fn = stats.pearsonr if method == "pearson" else stats.spearmanr
64
+ for i in range(n_cols - 1):
65
+ for j in range(i + 1, n_cols):
66
+ mask = finite[:, i] & finite[:, j]
67
+ count = int(mask.sum())
68
+ n[i, j] = n[j, i] = count
69
+ if count > 2:
70
+ with warnings.catch_warnings():
71
+ warnings.simplefilter("ignore")
72
+ coef, pval = corr_fn(mat[mask, i], mat[mask, j])
73
+ r[i, j] = r[j, i] = float(coef)
74
+ p[i, j] = p[j, i] = float(pval)
75
+
76
+ return CorMat(
77
+ r=pd.DataFrame(r, index=cols, columns=cols),
78
+ p=pd.DataFrame(p, index=cols, columns=cols),
79
+ n=pd.DataFrame(n, index=cols, columns=cols).astype("Int64"),
80
+ )
@@ -0,0 +1,38 @@
1
+ """Shared estimation engine for expdpy's regression-style functions.
2
+
3
+ This private package holds the building blocks that every estimator plugs into:
4
+
5
+ * :mod:`._spec` — the normalized :class:`ModelSpec` / :class:`VCovSpec` dataclasses,
6
+ * :mod:`._formula` — a pure pyfixest-formula builder,
7
+ * :mod:`._vcov` — a pure ``(vcov, vcov_kwargs)`` builder,
8
+ * :mod:`._fit` — dispatch to ``feols`` / ``fepois`` / ``feglm`` (+ the SSC default),
9
+ * :mod:`._tidy` — the tidy-coefficient-frame helper,
10
+ * :mod:`._capture` — a stdout-capture context manager.
11
+
12
+ ``expdpy.regression`` is a thin adapter over this engine; keeping the engine separate
13
+ lets future estimators (IV, Poisson, GLM, model comparison) reuse one tested core.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from expdpy._estimation._capture import capture_stdout
19
+ from expdpy._estimation._fit import SSC, fit_model
20
+ from expdpy._estimation._formula import build_formula
21
+ from expdpy._estimation._results import coerce_models, first_model
22
+ from expdpy._estimation._spec import ModelSpec, VCovSpec, as_list
23
+ from expdpy._estimation._tidy import tidy_model
24
+ from expdpy._estimation._vcov import build_vcov
25
+
26
+ __all__ = [
27
+ "SSC",
28
+ "ModelSpec",
29
+ "VCovSpec",
30
+ "as_list",
31
+ "build_formula",
32
+ "build_vcov",
33
+ "capture_stdout",
34
+ "coerce_models",
35
+ "first_model",
36
+ "fit_model",
37
+ "tidy_model",
38
+ ]
@@ -0,0 +1,26 @@
1
+ """A small stdout-capture context manager (several pyfixest helpers print to stdout)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import io
7
+ from collections.abc import Iterator
8
+
9
+ __all__ = ["capture_stdout"]
10
+
11
+
12
+ @contextlib.contextmanager
13
+ def capture_stdout() -> Iterator[io.StringIO]:
14
+ """Redirect ``sys.stdout`` into a buffer for the duration of the ``with`` block.
15
+
16
+ Some pyfixest helpers (notably ``etable(type="md")``) print to stdout and return
17
+ ``None``; this captures that text so it can be returned as a string instead.
18
+
19
+ Yields
20
+ ------
21
+ io.StringIO
22
+ The buffer; read its contents with ``.getvalue()`` after the block.
23
+ """
24
+ buf = io.StringIO()
25
+ with contextlib.redirect_stdout(buf):
26
+ yield buf
@@ -0,0 +1,56 @@
1
+ """Fit dispatcher: build the formula + vcov and call the right pyfixest entrypoint."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import pandas as pd
8
+ import pyfixest as pf
9
+
10
+ from expdpy._estimation._formula import build_formula
11
+ from expdpy._estimation._spec import ModelSpec
12
+ from expdpy._estimation._vcov import build_vcov
13
+
14
+ __all__ = ["SSC", "fit_model"]
15
+
16
+ # Stata 'reghdfe'-consistent small-sample correction (matches lfe::felm cmethod='reghdfe').
17
+ SSC = pf.ssc(k_adj=True, G_adj=True)
18
+
19
+
20
+ def fit_model(data: pd.DataFrame, spec: ModelSpec, *, ssc: Any = SSC) -> Any:
21
+ """Fit ``spec`` on ``data`` via the appropriate pyfixest estimator.
22
+
23
+ Dispatches OLS/IV to ``feols``, ``"poisson"`` to ``fepois`` and ``"logit"``/``"probit"``
24
+ to ``feglm``. The caller is responsible for column selection, NA handling and casting
25
+ fixed effects to ``category`` (so behavior matches the historical implementation).
26
+
27
+ Parameters
28
+ ----------
29
+ data
30
+ The (already cleaned) estimation frame.
31
+ spec
32
+ The normalized model specification.
33
+ ssc
34
+ The small-sample-correction object (defaults to the module-level :data:`SSC`).
35
+
36
+ Returns
37
+ -------
38
+ Any
39
+ A fitted pyfixest model (``Feols`` / ``Fepois`` / ``Feglm``), or a ``FixestMulti``
40
+ when ``spec`` requests stepwise or multiple outcomes.
41
+ """
42
+ fml = build_formula(spec)
43
+ vcov, vcov_kwargs = build_vcov(spec.vcov)
44
+ kwargs: dict[str, Any] = {"vcov": vcov, "ssc": ssc}
45
+ if vcov_kwargs is not None:
46
+ kwargs["vcov_kwargs"] = vcov_kwargs
47
+ if spec.weights:
48
+ kwargs["weights"] = spec.weights
49
+
50
+ if spec.model in ("ols", "iv"):
51
+ return pf.feols(fml, data=data, **kwargs)
52
+ if spec.model == "poisson":
53
+ return pf.fepois(fml, data=data, **kwargs)
54
+ if spec.model in ("logit", "probit"):
55
+ return pf.feglm(fml, data=data, family=spec.model, **kwargs)
56
+ raise ValueError(f"unknown model kind: {spec.model!r}") # pragma: no cover
@@ -0,0 +1,50 @@
1
+ """Pure builder of the pyfixest formula string from a :class:`ModelSpec`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from expdpy._estimation._spec import ModelSpec
6
+
7
+ __all__ = ["build_formula"]
8
+
9
+
10
+ def build_formula(spec: ModelSpec) -> str:
11
+ """Return the pyfixest formula string for ``spec``.
12
+
13
+ Handles plain OLS/GLM (``"dv ~ x1 + x2"``), fixed effects (``"| f1 + f2"``), stepwise
14
+ sequences (``"csw(x1, x2, x3)"``), multiple outcomes (``"y1 + y2 ~ ..."``) and the
15
+ instrumental-variables third part (``"| endog ~ instr"``), which pyfixest expects after
16
+ the fixed-effect block.
17
+
18
+ Parameters
19
+ ----------
20
+ spec
21
+ The normalized model specification.
22
+
23
+ Returns
24
+ -------
25
+ str
26
+ A formula string accepted by ``pyfixest.feols`` / ``fepois`` / ``feglm``.
27
+
28
+ Examples
29
+ --------
30
+ >>> from expdpy._estimation import ModelSpec, build_formula
31
+ >>> build_formula(ModelSpec(dv=("y",), idvs=("x1", "x2"), feffects=("firm",)))
32
+ 'y ~ x1 + x2 | firm'
33
+ """
34
+ lhs = " + ".join(spec.dv)
35
+ if spec.stepwise and spec.idvs:
36
+ rhs = f"{spec.stepwise}({', '.join(spec.idvs)})"
37
+ elif spec.idvs:
38
+ rhs = " + ".join(spec.idvs)
39
+ else:
40
+ rhs = "1"
41
+ fml = f"{lhs} ~ {rhs}"
42
+ if spec.feffects:
43
+ fml += " | " + " + ".join(spec.feffects)
44
+ if spec.model == "iv":
45
+ if not spec.endog or not spec.instruments:
46
+ raise ValueError(
47
+ "instrumental-variables models require both 'endog' and 'instruments'"
48
+ )
49
+ fml += f" | {' + '.join(spec.endog)} ~ {' + '.join(spec.instruments)}"
50
+ return fml
@@ -0,0 +1,30 @@
1
+ """Helpers for accepting a fitted model, a list of them, or an expdpy result object."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ __all__ = ["coerce_models", "first_model"]
8
+
9
+
10
+ def coerce_models(obj: Any) -> list[Any]:
11
+ """Return a flat list of fitted models from a model, a list, or a result object.
12
+
13
+ Accepts a single fitted pyfixest model, a ``list``/``tuple`` of them, or any expdpy
14
+ result object that carries a ``.models`` list (e.g. ``RegressionTableResult`` /
15
+ ``EstimationResult``).
16
+ """
17
+ if hasattr(obj, "models"):
18
+ out = list(obj.models)
19
+ elif isinstance(obj, (list, tuple)):
20
+ out = list(obj)
21
+ else:
22
+ out = [obj]
23
+ if not out:
24
+ raise ValueError("no models found")
25
+ return out
26
+
27
+
28
+ def first_model(obj: Any) -> Any:
29
+ """Return the first fitted model from a model, a list, or a result object."""
30
+ return coerce_models(obj)[0]
@@ -0,0 +1,112 @@
1
+ """Normalized model + variance-covariance specifications for the estimation engine.
2
+
3
+ These small, frozen, hashable dataclasses sit between the friendly public function
4
+ signatures and pyfixest. Keeping the spec normalized in one place means the formula
5
+ builder, the vcov builder and the fit dispatcher never have to re-parse user input.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass
11
+ from typing import Any, Literal
12
+
13
+ __all__ = [
14
+ "ModelKind",
15
+ "ModelSpec",
16
+ "Stepwise",
17
+ "VCovKind",
18
+ "VCovSpec",
19
+ "as_list",
20
+ ]
21
+
22
+ VCovKind = Literal["iid", "hetero", "HC1", "HC2", "HC3", "CRV1", "CRV3", "NW", "DK"]
23
+ ModelKind = Literal["ols", "iv", "poisson", "logit", "probit"]
24
+ Stepwise = Literal["sw", "sw0", "csw", "csw0"]
25
+
26
+
27
+ def as_list(value: Any) -> list[str]:
28
+ """Normalize ``None`` / ``""`` / str / sequence into a flat list of non-empty strings.
29
+
30
+ Parameters
31
+ ----------
32
+ value
33
+ ``None``, an empty string, a single variable name, or a sequence of names.
34
+
35
+ Returns
36
+ -------
37
+ list of str
38
+ The non-empty names, in order.
39
+ """
40
+ if value is None or (isinstance(value, str) and value == ""):
41
+ return []
42
+ if isinstance(value, str):
43
+ return [value]
44
+ return [v for v in value if v]
45
+
46
+
47
+ @dataclass(frozen=True)
48
+ class VCovSpec:
49
+ """A normalized variance-covariance (standard-error) specification.
50
+
51
+ Parameters
52
+ ----------
53
+ kind
54
+ The estimator: ``"iid"``, ``"hetero"`` (alias of ``"HC1"``), ``"HC1"``/``"HC2"``/
55
+ ``"HC3"`` (HC2/HC3 are unavailable with fixed effects), ``"CRV1"``/``"CRV3"``
56
+ (cluster-robust) or ``"NW"``/``"DK"`` (Newey-West / Driscoll-Kraay).
57
+ cluster
58
+ Cluster variable name(s); required for ``"CRV1"``/``"CRV3"``.
59
+ time_id
60
+ Time identifier; required for ``"NW"``/``"DK"``.
61
+ panel_id
62
+ Panel (unit) identifier; required for ``"NW"``/``"DK"``.
63
+ lag
64
+ Lag truncation for ``"NW"``/``"DK"`` (pyfixest picks a default when ``None``).
65
+ """
66
+
67
+ kind: VCovKind = "iid"
68
+ cluster: tuple[str, ...] = ()
69
+ time_id: str | None = None
70
+ panel_id: str | None = None
71
+ lag: int | None = None
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class ModelSpec:
76
+ """A normalized specification of a single (or stepwise/multi-outcome) model.
77
+
78
+ Parameters
79
+ ----------
80
+ dv
81
+ Dependent-variable name(s). More than one name builds a multi-outcome formula.
82
+ idvs
83
+ Independent (exogenous) regressor names.
84
+ feffects
85
+ Fixed-effect variable names absorbed by pyfixest.
86
+ endog
87
+ Endogenous regressors (instrumental-variables models only).
88
+ instruments
89
+ Excluded instruments (instrumental-variables models only).
90
+ model
91
+ Estimator family: ``"ols"``, ``"iv"``, ``"poisson"``, ``"logit"`` or ``"probit"``.
92
+ stepwise
93
+ Optional stepwise wrapper (``"sw"``, ``"sw0"``, ``"csw"`` or ``"csw0"``) applied to
94
+ ``idvs`` to estimate a sequence of nested models in one call.
95
+ vcov
96
+ The variance-covariance specification.
97
+ weights
98
+ Optional weights column name.
99
+ offset
100
+ Optional offset column name (Poisson models).
101
+ """
102
+
103
+ dv: tuple[str, ...]
104
+ idvs: tuple[str, ...]
105
+ feffects: tuple[str, ...] = ()
106
+ endog: tuple[str, ...] = ()
107
+ instruments: tuple[str, ...] = ()
108
+ model: ModelKind = "ols"
109
+ stepwise: Stepwise | None = None
110
+ vcov: VCovSpec = VCovSpec()
111
+ weights: str | None = None
112
+ offset: str | None = None
@@ -0,0 +1,35 @@
1
+ """Tidy-coefficient-frame helper shared by the regression-style functions."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import pandas as pd
8
+
9
+ __all__ = ["tidy_model"]
10
+
11
+
12
+ def tidy_model(model: Any, model_id: int, byvalue: str | None = None) -> pd.DataFrame:
13
+ """Return a tidy coefficient frame for one fitted model.
14
+
15
+ Parameters
16
+ ----------
17
+ model
18
+ A fitted pyfixest model exposing ``.tidy()``.
19
+ model_id
20
+ 1-based identifier inserted as the ``model`` column (orders models in a table).
21
+ byvalue
22
+ Optional subgroup label inserted as a ``byvalue`` column (the ``byvar`` path).
23
+
24
+ Returns
25
+ -------
26
+ pandas.DataFrame
27
+ The model's ``tidy()`` frame with the coefficient index turned into a ``term``
28
+ column and a leading ``model`` column (plus ``byvalue`` when given).
29
+ """
30
+ out = model.tidy().reset_index()
31
+ out = out.rename(columns={out.columns[0]: "term"})
32
+ out.insert(0, "model", model_id)
33
+ if byvalue is not None:
34
+ out["byvalue"] = byvalue
35
+ return out
@@ -0,0 +1,52 @@
1
+ """Pure builder of pyfixest's ``(vcov, vcov_kwargs)`` pair from a :class:`VCovSpec`."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from expdpy._estimation._spec import VCovSpec
8
+
9
+ __all__ = ["build_vcov"]
10
+
11
+
12
+ def build_vcov(spec: VCovSpec) -> tuple[Any, dict[str, Any] | None]:
13
+ """Translate a :class:`VCovSpec` into pyfixest's ``vcov`` / ``vcov_kwargs`` arguments.
14
+
15
+ Parameters
16
+ ----------
17
+ spec
18
+ The normalized variance-covariance specification.
19
+
20
+ Returns
21
+ -------
22
+ tuple
23
+ ``(vcov, vcov_kwargs)``. ``vcov`` is a string (``"iid"``, ``"hetero"``, ``"HC1"``…)
24
+ or a ``{"CRV1"/"CRV3": "a + b"}`` dict; ``vcov_kwargs`` is ``None`` except for the
25
+ serial-correlation-robust estimators (``"NW"``/``"DK"``), which need ``time_id`` /
26
+ ``panel_id`` (and optionally ``lag``).
27
+
28
+ Examples
29
+ --------
30
+ >>> from expdpy._estimation import VCovSpec, build_vcov
31
+ >>> build_vcov(VCovSpec(kind="CRV1", cluster=("firm", "year")))
32
+ ({'CRV1': 'firm + year'}, None)
33
+ >>> build_vcov(VCovSpec(kind="iid"))
34
+ ('iid', None)
35
+ """
36
+ kind = spec.kind
37
+ if kind in ("CRV1", "CRV3"):
38
+ if not spec.cluster:
39
+ raise ValueError(
40
+ f"{kind} standard errors require at least one cluster variable"
41
+ )
42
+ return {kind: " + ".join(spec.cluster)}, None
43
+ if kind in ("NW", "DK"):
44
+ if spec.time_id is None or spec.panel_id is None:
45
+ raise ValueError(
46
+ f"{kind} standard errors require both 'time_id' and 'panel_id'"
47
+ )
48
+ kwargs: dict[str, Any] = {"time_id": spec.time_id, "panel_id": spec.panel_id}
49
+ if spec.lag is not None:
50
+ kwargs["lag"] = spec.lag
51
+ return kind, kwargs
52
+ return kind, None