panelxai 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
panelxai-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Dr Merwan Roudane
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,147 @@
1
+ Metadata-Version: 2.4
2
+ Name: panelxai
3
+ Version: 0.1.0
4
+ Summary: Explainable AI for panel time-series econometrics: structured (variable x lag x unit x time) SHAP, factor / cross-sectional-dependence-aware attribution, regime-aware explanation drift, constrained counterfactuals, bootstrap uncertainty, and hybrid econometric-core + ML-residual models.
5
+ Author-email: Dr Merwan Roudane <merwanroudane920@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/merwanroudane/panelxai
8
+ Project-URL: Repository, https://github.com/merwanroudane/panelxai
9
+ Project-URL: Issues, https://github.com/merwanroudane/panelxai/issues
10
+ Keywords: explainable ai,xai,shap,panel data,time series,dynamic panel,econometrics,cross-sectional dependence,common factors,fixed effects,counterfactual,regime,interpretable machine learning,hybrid models
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.9
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: numpy>=1.22
20
+ Requires-Dist: pandas>=1.5
21
+ Requires-Dist: scipy>=1.9
22
+ Requires-Dist: scikit-learn>=1.1
23
+ Requires-Dist: statsmodels>=0.13
24
+ Requires-Dist: matplotlib>=3.5
25
+ Requires-Dist: shap>=0.41
26
+ Requires-Dist: xgboost>=1.6
27
+ Requires-Dist: linearmodels>=4.27
28
+ Requires-Dist: tabulate>=0.9
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest>=7.0; extra == "dev"
31
+ Dynamic: license-file
32
+
33
+ # panelxai
34
+
35
+ **Explainable AI for panel time-series econometrics.**
36
+
37
+ Most XAI work either explains *time series* (TimeSHAP, WindowSHAP, TFT) **or**
38
+ applies SHAP to *panel-shaped* data — but rarely respects the econometric
39
+ structure of a **dynamic panel**: distributed lags, cross-sectional dependence,
40
+ common factors, unit heterogeneity, regime change, fixed effects.
41
+
42
+ `panelxai` closes that gap. A raw SHAP value is indexed only by
43
+ *(observation, feature)*. Because the design builder encodes
44
+ **variable**, **lag**, and **kind** (own regressor vs cross-sectional-average
45
+ factor proxy) into every feature name, `panelxai` re-indexes attributions onto
46
+ the structure that matters:
47
+
48
+ ```
49
+ variable × lag × unit × time + own-vs-factor (CSD) decomposition
50
+ ```
51
+
52
+ Author: **Dr Merwan Roudane** · MIT License · `pip install -e .`
53
+
54
+ ---
55
+
56
+ ## What it provides
57
+
58
+ | Capability | Function / class | Idea |
59
+ |---|---|---|
60
+ | Panel TS simulator | `simulate_panel_ts` | DGP with true lag drivers, common factors (CSD), regime switch, non-linearity |
61
+ | Lag + CCE design | `build_design` | within-FE design with own lags and cross-sectional averages |
62
+ | Models | `GBPanel`, `HybridPanel` | XGBoost on the design; or **linear econometric core + ML residual** |
63
+ | Structured SHAP | `StructuredExplainer` | importance by variable, by lag, variable×lag matrix, per-unit |
64
+ | Factor-aware XAI | `.own_vs_factor()` | share of explanation from own dynamics vs common factors / CSD |
65
+ | Regime-aware XAI | `regime_importance`, `regime_effect_sign`, `explanation_drift` | importance & **effect-sign flips** across regimes; rolling drift |
66
+ | Counterfactual XAI | `counterfactual` | minimal, box-constrained change to move a prediction |
67
+ | Uncertainty-aware XAI | `bootstrap_importance` | unit cluster-bootstrap CIs + stability flag for importances |
68
+ | Plots / tables / report | `plot_*`, `tables`, `Report` | publication-ready outputs |
69
+
70
+ ## Quick start
71
+
72
+ ```python
73
+ import panelxai as px
74
+
75
+ df = px.simulate_panel_ts(n_units=30, n_periods=40, n_features=4,
76
+ n_lags=2, n_factors=1, seed=7)
77
+
78
+ model = px.GBPanel(lags=2, csa=True).fit(df) # explains the full model
79
+ ex = px.StructuredExplainer(model)
80
+
81
+ ex.variable_importance() # which variable matters
82
+ ex.lag_importance() # at which lag (temporal profile)
83
+ ex.variable_lag_matrix("own") # variable × lag heatmap data
84
+ ex.own_vs_factor() # own dynamics vs common-factor / CSD share
85
+ ex.unit_importance() # cross-sectional heterogeneity of explanations
86
+
87
+ px.plot_variable_lag_heatmap(ex, save="varlag.png")
88
+ ```
89
+
90
+ On the built-in DGP (x1 drives `y` at lag 0, x2 at lag 1, x3 at lag 2) the
91
+ structured SHAP **recovers exactly that** — each variable's mass concentrates at
92
+ its true lag.
93
+
94
+ ## Hybrid econometric core + ML
95
+
96
+ ```python
97
+ m = px.HybridPanel(lags=2).fit(df) # y = linear dynamic-panel core + ML residual
98
+ print(px.Report(m).text()) # SHAP here explains only the NON-LINEAR part
99
+ ```
100
+
101
+ ## Regime-aware (sign-flip detection)
102
+
103
+ ```python
104
+ df = px.simulate_panel_ts(regime_break=0.5, seed=11) # x1 effect flips at midpoint
105
+ ex = px.StructuredExplainer(px.GBPanel(lags=2).fit(df))
106
+ px.regime_effect_sign(ex, n_regimes=2) # x1: +corr -> -corr => sign_flip = True
107
+ ```
108
+
109
+ ## Uncertainty
110
+
111
+ ```python
112
+ boot = px.bootstrap_importance(lambda: px.GBPanel(lags=2),
113
+ df, n_boot=30, level=0.90)
114
+ boot # mean, lo, hi, stable (CI excludes zero)
115
+ ```
116
+
117
+ ## Examples
118
+
119
+ Runnable scripts in [`examples/`](examples):
120
+
121
+ 1. `01_structured_shap.py` — variable × lag × own/factor decomposition
122
+ 2. `02_regime_drift.py` — regime sign-flip & explanation drift
123
+ 3. `03_hybrid_counterfactual.py` — hybrid model + constrained counterfactual
124
+ 4. `04_uncertainty.py` — bootstrap importance CIs
125
+
126
+ ## Design notes
127
+
128
+ - **Fixed effects** are absorbed by the `fe="within"` transform, so SHAP
129
+ explains *within-unit* deviations (the dynamic signal), not level differences.
130
+ - **Cross-sectional dependence** is handled CCE-style: period-`t`
131
+ cross-sectional averages enter the design as proxies for unobserved common
132
+ factors; their SHAP share is reported separately from own dynamics.
133
+ - **The hybrid model** keeps a transparent linear econometric core and lets the
134
+ ML learner — and SHAP — speak only to the residual non-linearity, the honest
135
+ target of post-hoc XAI in an econometric setting.
136
+
137
+ ## Status & scope
138
+
139
+ v0.1.0 implements the structured / temporal, factor-aware, regime-aware,
140
+ counterfactual, uncertainty, and hybrid families on a tree-ensemble backbone.
141
+ Planned extensions: causal/interventional SHAP, deep-sequence models
142
+ (LSTM/TFT) with Integrated Gradients, and graph XAI for network panels.
143
+
144
+ ## Requirements
145
+
146
+ Python ≥ 3.9; numpy, pandas, scipy, scikit-learn, statsmodels, matplotlib,
147
+ shap, xgboost, linearmodels, tabulate.
@@ -0,0 +1,115 @@
1
+ # panelxai
2
+
3
+ **Explainable AI for panel time-series econometrics.**
4
+
5
+ Most XAI work either explains *time series* (TimeSHAP, WindowSHAP, TFT) **or**
6
+ applies SHAP to *panel-shaped* data — but rarely respects the econometric
7
+ structure of a **dynamic panel**: distributed lags, cross-sectional dependence,
8
+ common factors, unit heterogeneity, regime change, fixed effects.
9
+
10
+ `panelxai` closes that gap. A raw SHAP value is indexed only by
11
+ *(observation, feature)*. Because the design builder encodes
12
+ **variable**, **lag**, and **kind** (own regressor vs cross-sectional-average
13
+ factor proxy) into every feature name, `panelxai` re-indexes attributions onto
14
+ the structure that matters:
15
+
16
+ ```
17
+ variable × lag × unit × time + own-vs-factor (CSD) decomposition
18
+ ```
19
+
20
+ Author: **Dr Merwan Roudane** · MIT License · `pip install -e .`
21
+
22
+ ---
23
+
24
+ ## What it provides
25
+
26
+ | Capability | Function / class | Idea |
27
+ |---|---|---|
28
+ | Panel TS simulator | `simulate_panel_ts` | DGP with true lag drivers, common factors (CSD), regime switch, non-linearity |
29
+ | Lag + CCE design | `build_design` | within-FE design with own lags and cross-sectional averages |
30
+ | Models | `GBPanel`, `HybridPanel` | XGBoost on the design; or **linear econometric core + ML residual** |
31
+ | Structured SHAP | `StructuredExplainer` | importance by variable, by lag, variable×lag matrix, per-unit |
32
+ | Factor-aware XAI | `.own_vs_factor()` | share of explanation from own dynamics vs common factors / CSD |
33
+ | Regime-aware XAI | `regime_importance`, `regime_effect_sign`, `explanation_drift` | importance & **effect-sign flips** across regimes; rolling drift |
34
+ | Counterfactual XAI | `counterfactual` | minimal, box-constrained change to move a prediction |
35
+ | Uncertainty-aware XAI | `bootstrap_importance` | unit cluster-bootstrap CIs + stability flag for importances |
36
+ | Plots / tables / report | `plot_*`, `tables`, `Report` | publication-ready outputs |
37
+
38
+ ## Quick start
39
+
40
+ ```python
41
+ import panelxai as px
42
+
43
+ df = px.simulate_panel_ts(n_units=30, n_periods=40, n_features=4,
44
+ n_lags=2, n_factors=1, seed=7)
45
+
46
+ model = px.GBPanel(lags=2, csa=True).fit(df) # explains the full model
47
+ ex = px.StructuredExplainer(model)
48
+
49
+ ex.variable_importance() # which variable matters
50
+ ex.lag_importance() # at which lag (temporal profile)
51
+ ex.variable_lag_matrix("own") # variable × lag heatmap data
52
+ ex.own_vs_factor() # own dynamics vs common-factor / CSD share
53
+ ex.unit_importance() # cross-sectional heterogeneity of explanations
54
+
55
+ px.plot_variable_lag_heatmap(ex, save="varlag.png")
56
+ ```
57
+
58
+ On the built-in DGP (x1 drives `y` at lag 0, x2 at lag 1, x3 at lag 2) the
59
+ structured SHAP **recovers exactly that** — each variable's mass concentrates at
60
+ its true lag.
61
+
62
+ ## Hybrid econometric core + ML
63
+
64
+ ```python
65
+ m = px.HybridPanel(lags=2).fit(df) # y = linear dynamic-panel core + ML residual
66
+ print(px.Report(m).text()) # SHAP here explains only the NON-LINEAR part
67
+ ```
68
+
69
+ ## Regime-aware (sign-flip detection)
70
+
71
+ ```python
72
+ df = px.simulate_panel_ts(regime_break=0.5, seed=11) # x1 effect flips at midpoint
73
+ ex = px.StructuredExplainer(px.GBPanel(lags=2).fit(df))
74
+ px.regime_effect_sign(ex, n_regimes=2) # x1: +corr -> -corr => sign_flip = True
75
+ ```
76
+
77
+ ## Uncertainty
78
+
79
+ ```python
80
+ boot = px.bootstrap_importance(lambda: px.GBPanel(lags=2),
81
+ df, n_boot=30, level=0.90)
82
+ boot # mean, lo, hi, stable (CI excludes zero)
83
+ ```
84
+
85
+ ## Examples
86
+
87
+ Runnable scripts in [`examples/`](examples):
88
+
89
+ 1. `01_structured_shap.py` — variable × lag × own/factor decomposition
90
+ 2. `02_regime_drift.py` — regime sign-flip & explanation drift
91
+ 3. `03_hybrid_counterfactual.py` — hybrid model + constrained counterfactual
92
+ 4. `04_uncertainty.py` — bootstrap importance CIs
93
+
94
+ ## Design notes
95
+
96
+ - **Fixed effects** are absorbed by the `fe="within"` transform, so SHAP
97
+ explains *within-unit* deviations (the dynamic signal), not level differences.
98
+ - **Cross-sectional dependence** is handled CCE-style: period-`t`
99
+ cross-sectional averages enter the design as proxies for unobserved common
100
+ factors; their SHAP share is reported separately from own dynamics.
101
+ - **The hybrid model** keeps a transparent linear econometric core and lets the
102
+ ML learner — and SHAP — speak only to the residual non-linearity, the honest
103
+ target of post-hoc XAI in an econometric setting.
104
+
105
+ ## Status & scope
106
+
107
+ v0.1.0 implements the structured / temporal, factor-aware, regime-aware,
108
+ counterfactual, uncertainty, and hybrid families on a tree-ensemble backbone.
109
+ Planned extensions: causal/interventional SHAP, deep-sequence models
110
+ (LSTM/TFT) with Integrated Gradients, and graph XAI for network panels.
111
+
112
+ ## Requirements
113
+
114
+ Python ≥ 3.9; numpy, pandas, scipy, scikit-learn, statsmodels, matplotlib,
115
+ shap, xgboost, linearmodels, tabulate.
@@ -0,0 +1,50 @@
1
+ """panelxai - Explainable AI for panel time-series econometrics.
2
+
3
+ Structured (variable x lag x unit x time) SHAP, factor / cross-sectional-
4
+ dependence-aware attribution, regime-aware explanation drift, constrained
5
+ counterfactuals, bootstrap uncertainty, and hybrid econometric-core + ML
6
+ models.
7
+
8
+ Author : Dr Merwan Roudane <merwanroudane920@gmail.com>
9
+ GitHub : https://github.com/merwanroudane/panelxai
10
+ """
11
+ from .data import (
12
+ simulate_panel_ts, build_design, build_lags,
13
+ add_cross_sectional_averages, panel_describe,
14
+ make_feature_name, parse_feature_name, DesignSpec,
15
+ )
16
+ from .models import PanelModel, GBPanel, HybridPanel
17
+ from .explain import (
18
+ StructuredExplainer, regime_importance, regime_effect_sign,
19
+ explanation_drift, counterfactual, bootstrap_importance,
20
+ )
21
+ from .plots import (
22
+ plot_variable_lag_heatmap, plot_lag_profile, plot_own_vs_factor,
23
+ plot_unit_heterogeneity, plot_regime_drift, plot_importance_ci,
24
+ plot_counterfactual,
25
+ )
26
+ from . import tables
27
+ from .reports import Report
28
+
29
+ __author__ = "Dr Merwan Roudane"
30
+ __email__ = "merwanroudane920@gmail.com"
31
+ __url__ = "https://github.com/merwanroudane/panelxai"
32
+ __version__ = "0.1.0"
33
+
34
+ __all__ = [
35
+ # data
36
+ "simulate_panel_ts", "build_design", "build_lags",
37
+ "add_cross_sectional_averages", "panel_describe",
38
+ "make_feature_name", "parse_feature_name", "DesignSpec",
39
+ # models
40
+ "PanelModel", "GBPanel", "HybridPanel",
41
+ # explain
42
+ "StructuredExplainer", "regime_importance", "regime_effect_sign",
43
+ "explanation_drift", "counterfactual", "bootstrap_importance",
44
+ # plots
45
+ "plot_variable_lag_heatmap", "plot_lag_profile", "plot_own_vs_factor",
46
+ "plot_unit_heterogeneity", "plot_regime_drift", "plot_importance_ci",
47
+ "plot_counterfactual",
48
+ # misc
49
+ "tables", "Report",
50
+ ]
@@ -0,0 +1,262 @@
1
+ """Panel time-series data: simulation and the lag / cross-sectional-average
2
+ design builder.
3
+
4
+ The design builder is the backbone of the whole library: it engineers a
5
+ numeric feature matrix whose *column names* encode the structure
6
+
7
+ {variable}__L{lag}__{kind} kind in {own, csa}
8
+
9
+ so that downstream explainers can recover, for every attribution, which
10
+ *variable*, which *lag*, and whether it is an own regressor or a
11
+ cross-sectional / common-factor proxy (a CSA in the sense of Pesaran's CCE).
12
+
13
+ Author : Dr Merwan Roudane <merwanroudane920@gmail.com>
14
+ GitHub : https://github.com/merwanroudane/panelxai
15
+ """
16
+ from __future__ import annotations
17
+
18
+ from dataclasses import dataclass
19
+ import numpy as np
20
+ import pandas as pd
21
+
22
+ SEP = "__"
23
+
24
+
25
+ # --------------------------------------------------------------------------- #
26
+ # Column-name protocol
27
+ # --------------------------------------------------------------------------- #
28
+ def make_feature_name(var: str, lag: int, kind: str) -> str:
29
+ return f"{var}{SEP}L{lag}{SEP}{kind}"
30
+
31
+
32
+ def parse_feature_name(name: str) -> dict:
33
+ """Inverse of :func:`make_feature_name`. Returns var/lag/kind."""
34
+ parts = name.split(SEP)
35
+ if len(parts) != 3 or not parts[1].startswith("L"):
36
+ # Not a structured column (e.g. a raw passthrough); treat as own/lag0.
37
+ return {"var": name, "lag": 0, "kind": "own"}
38
+ return {"var": parts[0], "lag": int(parts[1][1:]), "kind": parts[2]}
39
+
40
+
41
+ @dataclass
42
+ class DesignSpec:
43
+ """Metadata describing a built design matrix."""
44
+ features: list # ordered feature column names
45
+ y: str # target column name
46
+ unit: str
47
+ time: str
48
+ lags: int
49
+ csa_lags: int
50
+ fe: str # 'within' | 'none'
51
+ unit_index: np.ndarray # unit label per design row
52
+ time_index: np.ndarray # time label per design row
53
+
54
+ def meta_frame(self) -> pd.DataFrame:
55
+ rows = [parse_feature_name(f) for f in self.features]
56
+ m = pd.DataFrame(rows)
57
+ m.insert(0, "feature", self.features)
58
+ return m
59
+
60
+
61
+ # --------------------------------------------------------------------------- #
62
+ # Simulation
63
+ # --------------------------------------------------------------------------- #
64
+ def simulate_panel_ts(
65
+ n_units: int = 30,
66
+ n_periods: int = 40,
67
+ n_features: int = 4,
68
+ n_lags: int = 2,
69
+ n_factors: int = 1,
70
+ csd_strength: float = 0.8,
71
+ regime_break: float | None = 0.5,
72
+ nonlinear: bool = True,
73
+ seed: int | None = None,
74
+ ) -> pd.DataFrame:
75
+ """Simulate a dynamic panel with the features the library is built to explain.
76
+
77
+ The data-generating process embeds, on purpose:
78
+
79
+ * **Distributed lags** - only a *subset* of (variable, lag) pairs truly
80
+ drive ``y`` (so importance recovery is testable).
81
+ * **Cross-sectional dependence** via ``n_factors`` common factors
82
+ ``F_t`` with heterogeneous loadings (strength ``csd_strength``).
83
+ * **A regime switch** at ``regime_break`` (fraction of the sample) where
84
+ one coefficient changes sign - this is what regime-aware explainers
85
+ should detect as *explanation drift*.
86
+ * Optional **non-linearity** (interaction + saturation) so that linear
87
+ models are mis-specified and ML adds value.
88
+
89
+ Returns a long (balanced) panel with columns ``unit, time, y, x1..xp``.
90
+ """
91
+ rng = np.random.default_rng(seed)
92
+ N, T, p = n_units, n_periods, n_features
93
+
94
+ # Common factors (shared across units) -> cross-sectional dependence.
95
+ F = rng.normal(size=(T, n_factors))
96
+ loadings = rng.normal(loc=csd_strength, scale=0.3, size=(N, n_factors))
97
+
98
+ # Regressors: persistent (AR) + a factor component.
99
+ x = np.zeros((N, T, p))
100
+ x_load = rng.normal(loc=0.5, scale=0.4, size=(N, p, n_factors))
101
+ for j in range(p):
102
+ ar = rng.uniform(0.3, 0.7)
103
+ for t in range(T):
104
+ prev = x[:, t - 1, j] if t > 0 else 0.0
105
+ common = x_load[:, j, :] @ F[t]
106
+ x[:, t, j] = ar * prev + common + rng.normal(scale=1.0, size=N)
107
+
108
+ # True distributed-lag structure: pick a few (var, lag) drivers.
109
+ beta = np.zeros((p, n_lags + 1))
110
+ beta[0, 0] = 1.2 # x1 contemporaneous
111
+ if n_lags >= 1:
112
+ beta[1, 1] = -0.9 # x2 at lag 1
113
+ if p >= 3 and n_lags >= 2:
114
+ beta[2, 2] = 0.7 # x3 at lag 2
115
+
116
+ alpha = rng.normal(scale=1.0, size=N) # unit fixed effects
117
+ gamma = rng.normal(loc=0.6, scale=0.2, size=(N, n_factors)) # y factor loadings
118
+
119
+ brk = int(regime_break * T) if regime_break is not None else None
120
+
121
+ y = np.zeros((N, T))
122
+ for t in range(T):
123
+ mu = alpha + gamma @ F[t]
124
+ for j in range(p):
125
+ for l in range(n_lags + 1):
126
+ if beta[j, l] == 0.0 or t - l < 0:
127
+ continue
128
+ coef = beta[j, l]
129
+ # Regime switch flips the sign of the x1 effect after the break.
130
+ if brk is not None and j == 0 and l == 0 and t >= brk:
131
+ coef = -coef
132
+ mu = mu + coef * x[:, t - l, j]
133
+ if nonlinear and p >= 2:
134
+ mu = mu + 0.5 * x[:, t, 0] * np.tanh(x[:, t, 1])
135
+ y[:, t] = mu + rng.normal(scale=0.7, size=N)
136
+
137
+ # Assemble long frame.
138
+ units = np.repeat(np.arange(1, N + 1), T)
139
+ times = np.tile(np.arange(1, T + 1), N)
140
+ data = {"unit": units, "time": times, "y": y.reshape(-1)}
141
+ for j in range(p):
142
+ data[f"x{j + 1}"] = x[:, :, j].reshape(-1)
143
+ return pd.DataFrame(data)
144
+
145
+
146
+ # --------------------------------------------------------------------------- #
147
+ # Design construction
148
+ # --------------------------------------------------------------------------- #
149
+ def build_lags(
150
+ df: pd.DataFrame,
151
+ cols: list,
152
+ lags: int,
153
+ unit: str = "unit",
154
+ time: str = "time",
155
+ ) -> pd.DataFrame:
156
+ """Add ``{col}__L{l}__own`` columns, lagging *within* each unit."""
157
+ out = df.sort_values([unit, time]).copy()
158
+ g = out.groupby(unit, sort=False)
159
+ for c in cols:
160
+ for l in range(lags + 1):
161
+ out[make_feature_name(c, l, "own")] = g[c].shift(l)
162
+ return out
163
+
164
+
165
+ def add_cross_sectional_averages(
166
+ df: pd.DataFrame,
167
+ cols: list,
168
+ csa_lags: int = 0,
169
+ unit: str = "unit",
170
+ time: str = "time",
171
+ ) -> pd.DataFrame:
172
+ """Add ``{col}__L{l}__csa``: the period-t mean across units (a CCE-style
173
+ proxy for the unobserved common factors), then lagged within unit."""
174
+ out = df.copy()
175
+ means = out.groupby(time, sort=True)[cols].transform("mean")
176
+ g_time = out.sort_values([unit, time]).groupby(unit, sort=False)
177
+ for c in cols:
178
+ base = f"__csa_base_{c}"
179
+ out[base] = means[c]
180
+ gb = out.sort_values([unit, time]).groupby(unit, sort=False)[base]
181
+ for l in range(csa_lags + 1):
182
+ out[make_feature_name(c, l, "csa")] = gb.shift(l)
183
+ out = out.drop(columns=base)
184
+ return out
185
+
186
+
187
+ def build_design(
188
+ df: pd.DataFrame,
189
+ y: str = "y",
190
+ X: list | None = None,
191
+ unit: str = "unit",
192
+ time: str = "time",
193
+ lags: int = 2,
194
+ csa: bool = True,
195
+ csa_lags: int = 0,
196
+ csa_vars: list | None = None,
197
+ fe: str = "within",
198
+ ):
199
+ """Turn a long panel into ``(X_design, y_vec, DesignSpec)``.
200
+
201
+ Parameters
202
+ ----------
203
+ lags : number of own lags (0..lags) for each regressor in ``X``.
204
+ csa : if True, append cross-sectional averages (CCE proxies for the
205
+ common factors driving cross-sectional dependence).
206
+ csa_lags : own-lags of the CSAs.
207
+ fe : ``'within'`` demeans ``y`` and every feature within unit
208
+ (absorbing fixed effects, the standard FE transform);
209
+ ``'none'`` leaves levels untouched.
210
+ """
211
+ if X is None:
212
+ X = [c for c in df.columns if c not in (y, unit, time)]
213
+ csa_vars = csa_vars if csa_vars is not None else list(X)
214
+
215
+ work = build_lags(df, X, lags, unit, time)
216
+ if csa:
217
+ work = add_cross_sectional_averages(work, csa_vars, csa_lags, unit, time)
218
+
219
+ feats = [make_feature_name(v, l, "own") for v in X for l in range(lags + 1)]
220
+ if csa:
221
+ feats += [make_feature_name(v, l, "csa")
222
+ for v in csa_vars for l in range(csa_lags + 1)]
223
+
224
+ work = work.dropna(subset=feats + [y]).reset_index(drop=True)
225
+
226
+ y_vec = work[y].copy()
227
+ X_des = work[feats].copy()
228
+ u_idx = work[unit].to_numpy()
229
+ t_idx = work[time].to_numpy()
230
+
231
+ if fe == "within":
232
+ # Demean target and features within unit (absorb fixed effects).
233
+ gy = y_vec.groupby(work[unit])
234
+ y_vec = y_vec - gy.transform("mean")
235
+ gX = X_des.groupby(work[unit].values)
236
+ X_des = X_des - gX.transform("mean")
237
+ elif fe != "none":
238
+ raise ValueError("fe must be 'within' or 'none'")
239
+
240
+ spec = DesignSpec(
241
+ features=feats, y=y, unit=unit, time=time, lags=lags,
242
+ csa_lags=csa_lags if csa else -1, fe=fe,
243
+ unit_index=u_idx, time_index=t_idx,
244
+ )
245
+ return X_des.reset_index(drop=True), y_vec.reset_index(drop=True), spec
246
+
247
+
248
+ def panel_describe(
249
+ df: pd.DataFrame, unit: str = "unit", time: str = "time"
250
+ ) -> pd.DataFrame:
251
+ """Quick structural summary of a long panel."""
252
+ n_units = df[unit].nunique()
253
+ counts = df.groupby(unit)[time].size()
254
+ rows = {
255
+ "n_units": n_units,
256
+ "n_periods_min": int(counts.min()),
257
+ "n_periods_max": int(counts.max()),
258
+ "balanced": bool(counts.nunique() == 1),
259
+ "n_obs": len(df),
260
+ "n_vars": df.shape[1] - 2,
261
+ }
262
+ return pd.DataFrame.from_dict(rows, orient="index", columns=["value"])
@@ -0,0 +1,21 @@
1
+ """Explainability core for panel time-series models.
2
+
3
+ The unifying idea: a raw SHAP value is indexed only by (observation, feature).
4
+ Because :mod:`panelxai.data` encodes ``variable``, ``lag`` and ``kind``
5
+ (own vs cross-sectional-average) in every feature name, we can *re-index*
6
+ attributions onto the structure that matters in panel econometrics:
7
+
8
+ variable x lag x unit x time + own-vs-factor decomposition.
9
+ """
10
+ from .structured import StructuredExplainer
11
+ from .regime import regime_importance, regime_effect_sign, explanation_drift
12
+ from .counterfactual import counterfactual
13
+ from .uncertainty import bootstrap_importance
14
+
15
+ __all__ = [
16
+ "StructuredExplainer",
17
+ "regime_importance",
18
+ "explanation_drift",
19
+ "counterfactual",
20
+ "bootstrap_importance",
21
+ ]