econcomplex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- econcomplex/__init__.py +220 -0
- econcomplex/complexity/__init__.py +23 -0
- econcomplex/complexity/eci_pci.py +131 -0
- econcomplex/complexity/eigenvector.py +115 -0
- econcomplex/complexity/fitness.py +130 -0
- econcomplex/complexity/reflections.py +173 -0
- econcomplex/complexity/subnational.py +82 -0
- econcomplex/core/__init__.py +23 -0
- econcomplex/core/diversity.py +125 -0
- econcomplex/core/preprocess.py +83 -0
- econcomplex/core/rca.py +161 -0
- econcomplex/core/utils.py +137 -0
- econcomplex/dynamics/__init__.py +10 -0
- econcomplex/dynamics/entry_exit.py +248 -0
- econcomplex/dynamics/growth.py +146 -0
- econcomplex/inequality/__init__.py +11 -0
- econcomplex/inequality/concentration.py +148 -0
- econcomplex/inequality/gini.py +164 -0
- econcomplex/optimization/__init__.py +46 -0
- econcomplex/optimization/diffusion.py +379 -0
- econcomplex/optimization/growth_target.py +170 -0
- econcomplex/optimization/portfolio.py +178 -0
- econcomplex/optimization/steppingstone.py +267 -0
- econcomplex/outlook/__init__.py +6 -0
- econcomplex/outlook/coi_cog.py +168 -0
- econcomplex/patents/__init__.py +7 -0
- econcomplex/patents/recombination.py +135 -0
- econcomplex/pipeline.py +255 -0
- econcomplex/productivity/__init__.py +8 -0
- econcomplex/productivity/prody.py +218 -0
- econcomplex/relatedness/__init__.py +25 -0
- econcomplex/relatedness/cooccurrence.py +173 -0
- econcomplex/relatedness/cross_space.py +142 -0
- econcomplex/relatedness/density.py +232 -0
- econcomplex/relatedness/proximity.py +214 -0
- econcomplex/specialization/__init__.py +17 -0
- econcomplex/specialization/location_quotient.py +163 -0
- econcomplex/specialization/similarity.py +68 -0
- econcomplex-1.0.0.dist-info/METADATA +223 -0
- econcomplex-1.0.0.dist-info/RECORD +43 -0
- econcomplex-1.0.0.dist-info/WHEEL +5 -0
- econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
- econcomplex-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Growth targeting for ECI Optimization.
|
|
3
|
+
|
|
4
|
+
Calibrates the growth regression (eq. 3 of Stojkoski & Hidalgo 2026) and
|
|
5
|
+
inverts it to find the ECI compatible with a target growth rate, which can
|
|
6
|
+
then be fed to `eci_optimization` as `target_eci`.
|
|
7
|
+
|
|
8
|
+
References
|
|
9
|
+
----------
|
|
10
|
+
Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
|
|
11
|
+
Research Policy 55, 105454.
|
|
12
|
+
Hausmann et al. (2014) "The Atlas of Economic Complexity".
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from typing import Dict, Union
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def calibrate_growth_model(
|
|
21
|
+
df: pd.DataFrame,
|
|
22
|
+
loc: str,
|
|
23
|
+
time: str,
|
|
24
|
+
gdppc: str,
|
|
25
|
+
eci: str,
|
|
26
|
+
horizon: int = 10,
|
|
27
|
+
) -> Dict:
|
|
28
|
+
"""
|
|
29
|
+
Calibrate the panel growth regression (eq. 3 of Stojkoski & Hidalgo
|
|
30
|
+
2026) by OLS:
|
|
31
|
+
|
|
32
|
+
annualized log growth of GDPpc over `horizon`
|
|
33
|
+
= a1*ECI + a2*z + a3*(ECI x z) + period fixed effects + u
|
|
34
|
+
|
|
35
|
+
where z is the log of initial GDP per capita, z-score normalized
|
|
36
|
+
across locations within each initial period (Solow convergence term).
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
df : pd.DataFrame
|
|
41
|
+
Long format with one row per (location, period).
|
|
42
|
+
loc, time, gdppc, eci : str
|
|
43
|
+
Column names for location, (numeric) period, GDP per capita, and
|
|
44
|
+
ECI.
|
|
45
|
+
horizon : int
|
|
46
|
+
Growth horizon in period units (default 10).
|
|
47
|
+
|
|
48
|
+
Returns
|
|
49
|
+
-------
|
|
50
|
+
dict with keys 'a1_eci', 'a2_z_gdppc', 'a3_interaction',
|
|
51
|
+
'period_effects' (dict period -> fixed effect), 'z_stats'
|
|
52
|
+
(dict period -> (mean, std) of log GDPpc), 'horizon', 'n_obs'.
|
|
53
|
+
"""
|
|
54
|
+
periods = sorted(df[time].unique())
|
|
55
|
+
initial = [t for t in periods if (t + horizon) in periods]
|
|
56
|
+
if not initial:
|
|
57
|
+
raise ValueError(
|
|
58
|
+
f"No initial period t has t+{horizon} in the panel "
|
|
59
|
+
f"(periods available: {periods})."
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
obs_y, obs_eci, obs_z, obs_t = [], [], [], []
|
|
63
|
+
z_stats = {}
|
|
64
|
+
|
|
65
|
+
for t in initial:
|
|
66
|
+
d0 = df[df[time] == t].set_index(loc)
|
|
67
|
+
d1 = df[df[time] == t + horizon].set_index(loc)
|
|
68
|
+
common = d0.index.intersection(d1.index)
|
|
69
|
+
d0, d1 = d0.loc[common], d1.loc[common]
|
|
70
|
+
ok = (d0[gdppc] > 0) & (d1[gdppc] > 0) & d0[eci].notna()
|
|
71
|
+
if ok.sum() < 5:
|
|
72
|
+
continue
|
|
73
|
+
lg = np.log(d0.loc[ok, gdppc].astype(float))
|
|
74
|
+
mu, sd = lg.mean(), lg.std()
|
|
75
|
+
if sd == 0:
|
|
76
|
+
continue
|
|
77
|
+
z_stats[t] = (float(mu), float(sd))
|
|
78
|
+
z = (lg - mu) / sd
|
|
79
|
+
growth = (np.log(d1.loc[ok, gdppc].astype(float)).values - lg.values) / horizon
|
|
80
|
+
obs_y.append(growth)
|
|
81
|
+
obs_eci.append(d0.loc[ok, eci].astype(float).values)
|
|
82
|
+
obs_z.append(z.values)
|
|
83
|
+
obs_t.append(np.full(ok.sum(), t))
|
|
84
|
+
|
|
85
|
+
if not obs_y:
|
|
86
|
+
raise ValueError("Not enough observations to fit the growth model.")
|
|
87
|
+
|
|
88
|
+
y = np.concatenate(obs_y)
|
|
89
|
+
e = np.concatenate(obs_eci)
|
|
90
|
+
z = np.concatenate(obs_z)
|
|
91
|
+
t_arr = np.concatenate(obs_t)
|
|
92
|
+
used_periods = sorted(set(t_arr))
|
|
93
|
+
|
|
94
|
+
# Design: [ECI, z, ECI*z, one dummy per initial period (no intercept)]
|
|
95
|
+
dummies = np.column_stack([(t_arr == t).astype(float) for t in used_periods])
|
|
96
|
+
X = np.column_stack([e, z, e * z, dummies])
|
|
97
|
+
beta, *_ = np.linalg.lstsq(X, y, rcond=None)
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
"a1_eci": float(beta[0]),
|
|
101
|
+
"a2_z_gdppc": float(beta[1]),
|
|
102
|
+
"a3_interaction": float(beta[2]),
|
|
103
|
+
"period_effects": {t: float(b) for t, b in zip(used_periods, beta[3:])},
|
|
104
|
+
"z_stats": z_stats,
|
|
105
|
+
"horizon": horizon,
|
|
106
|
+
"n_obs": int(len(y)),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _z_and_gamma(model: Dict, gdppc_now, period=None):
|
|
111
|
+
"""z-score of log GDPpc and period effect for prediction."""
|
|
112
|
+
ref = period if period is not None else max(model["z_stats"])
|
|
113
|
+
mu, sd = model["z_stats"][ref]
|
|
114
|
+
z = (np.log(np.asarray(gdppc_now, dtype=float)) - mu) / sd
|
|
115
|
+
gamma = model["period_effects"][ref]
|
|
116
|
+
return z, gamma
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def expected_growth(
|
|
120
|
+
model: Dict,
|
|
121
|
+
eci: Union[float, pd.Series],
|
|
122
|
+
gdppc_now: Union[float, pd.Series],
|
|
123
|
+
period=None,
|
|
124
|
+
) -> Union[float, pd.Series]:
|
|
125
|
+
"""
|
|
126
|
+
Annualized log growth rate implied by the calibrated model for a given
|
|
127
|
+
ECI and current GDP per capita (using the most recent period's fixed
|
|
128
|
+
effect unless `period` is given).
|
|
129
|
+
"""
|
|
130
|
+
z, gamma = _z_and_gamma(model, gdppc_now, period)
|
|
131
|
+
e = eci.values if isinstance(eci, pd.Series) else np.asarray(eci, dtype=float)
|
|
132
|
+
g = model["a1_eci"] * e + model["a2_z_gdppc"] * z \
|
|
133
|
+
+ model["a3_interaction"] * e * z + gamma
|
|
134
|
+
if isinstance(eci, pd.Series):
|
|
135
|
+
return pd.Series(g, index=eci.index, name="expected_growth")
|
|
136
|
+
return float(g) if np.ndim(g) == 0 else g
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def eci_target_for_growth(
|
|
140
|
+
model: Dict,
|
|
141
|
+
growth_target: float,
|
|
142
|
+
gdppc_now: Union[float, pd.Series],
|
|
143
|
+
period=None,
|
|
144
|
+
) -> Union[float, pd.Series]:
|
|
145
|
+
"""
|
|
146
|
+
Invert the growth regression to find the ECI compatible with a target
|
|
147
|
+
annualized log growth rate (e.g. 0.035 for ~3.5 % per year):
|
|
148
|
+
|
|
149
|
+
ECI* = (growth - a2*z - gamma) / (a1 + a3*z)
|
|
150
|
+
|
|
151
|
+
The result can be passed to `eci_optimization` as `target_eci`.
|
|
152
|
+
"""
|
|
153
|
+
import warnings
|
|
154
|
+
|
|
155
|
+
z, gamma = _z_and_gamma(model, gdppc_now, period)
|
|
156
|
+
denom = np.asarray(model["a1_eci"] + model["a3_interaction"] * z, dtype=float)
|
|
157
|
+
near_zero = np.abs(denom) < 1e-12
|
|
158
|
+
if np.any(near_zero):
|
|
159
|
+
warnings.warn(
|
|
160
|
+
"eci_target_for_growth: the marginal effect of ECI "
|
|
161
|
+
"(a1 + a3*z) is ~0 for some inputs; the target is undefined "
|
|
162
|
+
"there and returned as NaN.",
|
|
163
|
+
RuntimeWarning,
|
|
164
|
+
stacklevel=2,
|
|
165
|
+
)
|
|
166
|
+
denom = np.where(near_zero, np.nan, denom)
|
|
167
|
+
target = (growth_target - model["a2_z_gdppc"] * z - gamma) / denom
|
|
168
|
+
if isinstance(gdppc_now, pd.Series):
|
|
169
|
+
return pd.Series(target, index=gdppc_now.index, name="eci_target")
|
|
170
|
+
return float(target) if np.ndim(target) == 0 else target
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ECI Optimization: 0-1 portfolio selection.
|
|
3
|
+
|
|
4
|
+
Selects the minimal-effort set of new specializations that raises a
|
|
5
|
+
location's projection ECI (mean PCI of its portfolio) to a target.
|
|
6
|
+
|
|
7
|
+
References
|
|
8
|
+
----------
|
|
9
|
+
Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
|
|
10
|
+
Research Policy 55, 105454.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import warnings
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from typing import Dict, Optional, Union
|
|
18
|
+
|
|
19
|
+
from .steppingstone import effort_matrix, forecast_specialization
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
from scipy.optimize import milp, LinearConstraint, Bounds
|
|
23
|
+
_HAS_MILP = True
|
|
24
|
+
except ImportError: # SciPy < 1.9
|
|
25
|
+
_HAS_MILP = False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _select_portfolio(costs: np.ndarray, surplus: np.ndarray, deficit: float,
|
|
29
|
+
solver: str) -> Optional[np.ndarray]:
|
|
30
|
+
"""
|
|
31
|
+
Minimize sum(costs[x]) s.t. sum(surplus[x]) >= deficit, x binary.
|
|
32
|
+
Returns a boolean mask over the candidates, or None if infeasible.
|
|
33
|
+
"""
|
|
34
|
+
if surplus.sum() < deficit:
|
|
35
|
+
return None
|
|
36
|
+
|
|
37
|
+
if solver == "milp" and _HAS_MILP:
|
|
38
|
+
res = milp(
|
|
39
|
+
c=costs,
|
|
40
|
+
constraints=LinearConstraint(surplus[None, :], lb=deficit),
|
|
41
|
+
integrality=np.ones_like(costs),
|
|
42
|
+
bounds=Bounds(0, 1),
|
|
43
|
+
)
|
|
44
|
+
if res.status == 0 and res.x is not None:
|
|
45
|
+
return res.x > 0.5
|
|
46
|
+
return None
|
|
47
|
+
|
|
48
|
+
# Greedy fallback: cheapest cost per unit of ECI surplus first
|
|
49
|
+
order = np.argsort(costs / surplus)
|
|
50
|
+
chosen = np.zeros(len(costs), dtype=bool)
|
|
51
|
+
covered = 0.0
|
|
52
|
+
for i in order:
|
|
53
|
+
if covered >= deficit:
|
|
54
|
+
break
|
|
55
|
+
chosen[i] = True
|
|
56
|
+
covered += surplus[i]
|
|
57
|
+
return chosen if covered >= deficit else None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def eci_optimization(
|
|
61
|
+
mat: pd.DataFrame,
|
|
62
|
+
model: Dict,
|
|
63
|
+
delta_eci: float = 0.1,
|
|
64
|
+
target_eci: Optional[Union[float, Dict, pd.Series]] = None,
|
|
65
|
+
locations: Optional[list] = None,
|
|
66
|
+
solver: str = "milp",
|
|
67
|
+
) -> pd.DataFrame:
|
|
68
|
+
"""
|
|
69
|
+
ECI Optimization (Stojkoski & Hidalgo 2026): identify, per location,
|
|
70
|
+
the minimal-effort portfolio of new specializations that raises the
|
|
71
|
+
projected ECI to a target.
|
|
72
|
+
|
|
73
|
+
The pipeline: (i) project the no-policy specialization matrix and PCI
|
|
74
|
+
at t+horizon (`forecast_specialization`); (ii) compute the effort
|
|
75
|
+
W_cp required for each candidate entry (`effort_matrix`); (iii) solve
|
|
76
|
+
the 0-1 program
|
|
77
|
+
|
|
78
|
+
min sum_p W_cp x_cp
|
|
79
|
+
s.t. mean PCI of (projected portfolio + selected) >= target ECI
|
|
80
|
+
|
|
81
|
+
linearized as sum_p (PCI_p - target) x_cp >= deficit and solved
|
|
82
|
+
exactly with `scipy.optimize.milp` (greedy fallback on SciPy < 1.9).
|
|
83
|
+
|
|
84
|
+
Parameters
|
|
85
|
+
----------
|
|
86
|
+
mat : pd.DataFrame (R x C)
|
|
87
|
+
Value matrix at the initial period.
|
|
88
|
+
model : dict
|
|
89
|
+
Output of `calibrate_steppingstone`.
|
|
90
|
+
delta_eci : float
|
|
91
|
+
Target increase over each location's projected ECI (default 0.1,
|
|
92
|
+
in PCI standard-deviation units). Ignored when `target_eci` given.
|
|
93
|
+
target_eci : float, dict, or pd.Series, optional
|
|
94
|
+
Absolute ECI target (single value or per location).
|
|
95
|
+
locations : list, optional
|
|
96
|
+
Subset of locations to optimize (default: all rows of `mat`).
|
|
97
|
+
solver : str
|
|
98
|
+
'milp' (exact, default) or 'greedy'.
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
pd.DataFrame with one row per suggested activity:
|
|
103
|
+
[location, activity, effort, pci_projected, eci_projected,
|
|
104
|
+
eci_target, eci_achieved]
|
|
105
|
+
Locations whose projected ECI already meets the target contribute no
|
|
106
|
+
rows; infeasible locations are skipped with a warning.
|
|
107
|
+
"""
|
|
108
|
+
forecast = forecast_specialization(mat, model)
|
|
109
|
+
W = effort_matrix(mat, model)
|
|
110
|
+
pci = forecast["pci"]
|
|
111
|
+
mcp_hat = forecast["mcp"]
|
|
112
|
+
eci_proj = forecast["eci"]
|
|
113
|
+
|
|
114
|
+
locs = locations if locations is not None else list(mat.index)
|
|
115
|
+
valid_p = pci.notna()
|
|
116
|
+
rows = []
|
|
117
|
+
|
|
118
|
+
for c in locs:
|
|
119
|
+
base_eci = eci_proj.get(c, np.nan)
|
|
120
|
+
if np.isnan(base_eci):
|
|
121
|
+
warnings.warn(f"Location {c!r}: empty projected portfolio; skipped.")
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
if target_eci is None:
|
|
125
|
+
target = base_eci + delta_eci
|
|
126
|
+
elif np.isscalar(target_eci):
|
|
127
|
+
target = float(target_eci)
|
|
128
|
+
else:
|
|
129
|
+
target = float(pd.Series(target_eci).get(c, np.nan))
|
|
130
|
+
if np.isnan(target):
|
|
131
|
+
warnings.warn(f"Location {c!r}: no target provided; skipped.")
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
m_row = mcp_hat.loc[c].values.astype(bool) & valid_p.values
|
|
135
|
+
deficit = -float(((pci[valid_p] - target) * mcp_hat.loc[c][valid_p]).sum())
|
|
136
|
+
if deficit <= 0:
|
|
137
|
+
continue # target already met by the projected portfolio
|
|
138
|
+
|
|
139
|
+
# Candidates: not in projected portfolio, PCI above target
|
|
140
|
+
# (others cannot help the constraint), finite positive-side effort
|
|
141
|
+
cand = (~m_row) & valid_p.values & (pci.values > target) \
|
|
142
|
+
& np.isfinite(np.nan_to_num(W.loc[c].values, nan=np.inf))
|
|
143
|
+
if not cand.any():
|
|
144
|
+
warnings.warn(f"Location {c!r}: no feasible candidates; skipped.")
|
|
145
|
+
continue
|
|
146
|
+
|
|
147
|
+
costs = np.clip(W.loc[c].values[cand], 0.0, None)
|
|
148
|
+
surplus = (pci.values - target)[cand]
|
|
149
|
+
chosen = _select_portfolio(costs, surplus, deficit, solver)
|
|
150
|
+
if chosen is None:
|
|
151
|
+
warnings.warn(
|
|
152
|
+
f"Location {c!r}: target ECI {target:.3f} infeasible with "
|
|
153
|
+
"the available candidates; skipped."
|
|
154
|
+
)
|
|
155
|
+
continue
|
|
156
|
+
|
|
157
|
+
acts = mat.columns[cand][chosen]
|
|
158
|
+
n_new = chosen.sum()
|
|
159
|
+
achieved = (
|
|
160
|
+
(pci[valid_p] * mcp_hat.loc[c][valid_p]).sum()
|
|
161
|
+
+ pci[acts].sum()
|
|
162
|
+
) / (mcp_hat.loc[c][valid_p].sum() + n_new)
|
|
163
|
+
|
|
164
|
+
for a in acts:
|
|
165
|
+
rows.append({
|
|
166
|
+
"location": c,
|
|
167
|
+
"activity": a,
|
|
168
|
+
"effort": float(W.loc[c, a]),
|
|
169
|
+
"pci_projected": float(pci[a]),
|
|
170
|
+
"eci_projected": float(base_eci),
|
|
171
|
+
"eci_target": target,
|
|
172
|
+
"eci_achieved": float(achieved),
|
|
173
|
+
})
|
|
174
|
+
|
|
175
|
+
return pd.DataFrame(
|
|
176
|
+
rows, columns=["location", "activity", "effort", "pci_projected",
|
|
177
|
+
"eci_projected", "eci_target", "eci_achieved"],
|
|
178
|
+
)
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Stepping-stone forward model for ECI Optimization.
|
|
3
|
+
|
|
4
|
+
Calibrates the forecast model of specialization (eq. 1 of the paper),
|
|
5
|
+
computes the closed-form effort matrix W_cp (eq. 2), and projects the
|
|
6
|
+
future specialization matrix (no-policy baseline, W = 0).
|
|
7
|
+
|
|
8
|
+
References
|
|
9
|
+
----------
|
|
10
|
+
Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
|
|
11
|
+
Research Policy 55, 105454.
|
|
12
|
+
Pinheiro et al. (2021) for relative relatedness.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
import pandas as pd
|
|
17
|
+
from typing import Dict, List
|
|
18
|
+
|
|
19
|
+
from ..core.utils import pivot_to_matrix, safe_divide
|
|
20
|
+
from ..core.rca import rca as compute_rca
|
|
21
|
+
from ..relatedness.density import relatedness_density
|
|
22
|
+
|
|
23
|
+
COEF_NAMES = ["b1_r_stepping", "b2_r_initial", "b3_relatedness",
|
|
24
|
+
"b4_relative_relatedness", "b0_intercept"]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _features(mat: pd.DataFrame, threshold: float, proximity_method: str):
|
|
28
|
+
"""RCA, log(1+RCA), relatedness (0-1) and relative relatedness for `mat`."""
|
|
29
|
+
R = compute_rca(mat.values)
|
|
30
|
+
r = np.log1p(R)
|
|
31
|
+
dens = relatedness_density(mat, threshold=threshold,
|
|
32
|
+
proximity_method=proximity_method)
|
|
33
|
+
omega = dens.values / 100.0 # 0-1 scale, as in the paper
|
|
34
|
+
# Relative relatedness (Pinheiro et al. 2021, eq. 7): z-transform
|
|
35
|
+
# against the statistics of the location's option set (RCA < threshold).
|
|
36
|
+
# The same standardization is applied to specialized cells so the exit
|
|
37
|
+
# model shares the scale of the entry model.
|
|
38
|
+
opt = R < threshold
|
|
39
|
+
n_opt = opt.sum(axis=1, keepdims=True)
|
|
40
|
+
mu = safe_divide(np.where(opt, omega, 0.0).sum(axis=1, keepdims=True), n_opt)
|
|
41
|
+
var = safe_divide(
|
|
42
|
+
np.where(opt, (omega - mu) ** 2, 0.0).sum(axis=1, keepdims=True), n_opt
|
|
43
|
+
)
|
|
44
|
+
rel = safe_divide(omega - mu, np.sqrt(var))
|
|
45
|
+
return R, r, omega, rel
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def calibrate_steppingstone(
|
|
49
|
+
df: pd.DataFrame,
|
|
50
|
+
loc: str,
|
|
51
|
+
act: str,
|
|
52
|
+
val: str,
|
|
53
|
+
time: str,
|
|
54
|
+
horizon: int = 10,
|
|
55
|
+
steppingstone: int = 5,
|
|
56
|
+
threshold: float = 1.0,
|
|
57
|
+
proximity_method: str = "max",
|
|
58
|
+
) -> Dict:
|
|
59
|
+
"""
|
|
60
|
+
Calibrate the stepping-stone forecast model (eq. 1 of Stojkoski &
|
|
61
|
+
Hidalgo 2026) on a long-format panel.
|
|
62
|
+
|
|
63
|
+
For every initial period t with t+steppingstone and t+horizon also
|
|
64
|
+
present in the panel, fits by OLS:
|
|
65
|
+
|
|
66
|
+
r_cp(t+horizon) = b1*r_cp(t+steppingstone) + b2*r_cp(t)
|
|
67
|
+
+ b3*relatedness_cp(t) + b4*relative_relatedness_cp(t)
|
|
68
|
+
+ b0
|
|
69
|
+
|
|
70
|
+
where r = log(1 + RCA). Entry models use only cells with RCA(t) < 1;
|
|
71
|
+
exit models use only cells with RCA(t) >= 1. Coefficients are averaged
|
|
72
|
+
across all available initial periods.
|
|
73
|
+
|
|
74
|
+
Parameters
|
|
75
|
+
----------
|
|
76
|
+
df : pd.DataFrame
|
|
77
|
+
Long-format panel.
|
|
78
|
+
loc, act, val, time : str
|
|
79
|
+
Column names for location, activity, value, and (numeric) period.
|
|
80
|
+
horizon : int
|
|
81
|
+
Forecast horizon Delta-t in period units (default 10).
|
|
82
|
+
steppingstone : int
|
|
83
|
+
Steppingstone tau < horizon (default 5).
|
|
84
|
+
threshold : float
|
|
85
|
+
RCA binarization threshold for the relatedness features.
|
|
86
|
+
proximity_method : str
|
|
87
|
+
Proximity normalization ('max', 'sqrt', 'min').
|
|
88
|
+
|
|
89
|
+
Returns
|
|
90
|
+
-------
|
|
91
|
+
dict with keys:
|
|
92
|
+
'entry', 'exit' : coefficient dicts (b1, b2, b3, b4, b0)
|
|
93
|
+
'horizon', 'steppingstone', 'initial_periods',
|
|
94
|
+
'n_obs_entry', 'n_obs_exit'
|
|
95
|
+
"""
|
|
96
|
+
if steppingstone >= horizon:
|
|
97
|
+
raise ValueError("steppingstone must be smaller than horizon.")
|
|
98
|
+
|
|
99
|
+
periods = sorted(df[time].unique())
|
|
100
|
+
initial = [t for t in periods
|
|
101
|
+
if (t + steppingstone) in periods and (t + horizon) in periods]
|
|
102
|
+
if not initial:
|
|
103
|
+
raise ValueError(
|
|
104
|
+
f"No initial period t has both t+{steppingstone} and "
|
|
105
|
+
f"t+{horizon} in the panel (periods available: {periods})."
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
coefs_entry: List[np.ndarray] = []
|
|
109
|
+
coefs_exit: List[np.ndarray] = []
|
|
110
|
+
n_entry = n_exit = 0
|
|
111
|
+
|
|
112
|
+
for t in initial:
|
|
113
|
+
mats = {}
|
|
114
|
+
for y in (t, t + steppingstone, t + horizon):
|
|
115
|
+
mats[y] = pivot_to_matrix(df[df[time] == y], loc, act, val)
|
|
116
|
+
rows = mats[t].index
|
|
117
|
+
cols = mats[t].columns
|
|
118
|
+
for m in mats.values():
|
|
119
|
+
rows = rows.union(m.index)
|
|
120
|
+
cols = cols.union(m.columns)
|
|
121
|
+
for y in mats:
|
|
122
|
+
mats[y] = mats[y].reindex(index=rows, columns=cols, fill_value=0.0)
|
|
123
|
+
|
|
124
|
+
R_t, r_t, omega, rel = _features(mats[t], threshold, proximity_method)
|
|
125
|
+
r_tau = np.log1p(compute_rca(mats[t + steppingstone].values))
|
|
126
|
+
r_T = np.log1p(compute_rca(mats[t + horizon].values))
|
|
127
|
+
|
|
128
|
+
X = np.column_stack([
|
|
129
|
+
r_tau.ravel(), r_t.ravel(), omega.ravel(), rel.ravel(),
|
|
130
|
+
np.ones(r_t.size),
|
|
131
|
+
])
|
|
132
|
+
y_vec = r_T.ravel()
|
|
133
|
+
entry_mask = (R_t < threshold).ravel()
|
|
134
|
+
|
|
135
|
+
for mask, store in ((entry_mask, coefs_entry), (~entry_mask, coefs_exit)):
|
|
136
|
+
if mask.sum() > X.shape[1]:
|
|
137
|
+
beta, *_ = np.linalg.lstsq(X[mask], y_vec[mask], rcond=None)
|
|
138
|
+
store.append(beta)
|
|
139
|
+
n_entry += int(entry_mask.sum())
|
|
140
|
+
n_exit += int((~entry_mask).sum())
|
|
141
|
+
|
|
142
|
+
if not coefs_entry or not coefs_exit:
|
|
143
|
+
raise ValueError("Not enough observations to fit entry/exit models.")
|
|
144
|
+
|
|
145
|
+
entry = dict(zip(COEF_NAMES, np.mean(coefs_entry, axis=0)))
|
|
146
|
+
exit_ = dict(zip(COEF_NAMES, np.mean(coefs_exit, axis=0)))
|
|
147
|
+
return {
|
|
148
|
+
"entry": entry,
|
|
149
|
+
"exit": exit_,
|
|
150
|
+
"horizon": horizon,
|
|
151
|
+
"steppingstone": steppingstone,
|
|
152
|
+
"threshold": threshold,
|
|
153
|
+
"proximity_method": proximity_method,
|
|
154
|
+
"initial_periods": initial,
|
|
155
|
+
"n_obs_entry": n_entry,
|
|
156
|
+
"n_obs_exit": n_exit,
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def effort_matrix(
|
|
161
|
+
mat: pd.DataFrame,
|
|
162
|
+
model: Dict,
|
|
163
|
+
) -> pd.DataFrame:
|
|
164
|
+
"""
|
|
165
|
+
Effort W_cp: the added RCA an economy must reach by the steppingstone
|
|
166
|
+
period for the calibrated model to predict entry (RCA = 1) at the
|
|
167
|
+
horizon (eq. 2 of Stojkoski & Hidalgo 2026, solved in closed form).
|
|
168
|
+
|
|
169
|
+
Setting r_cp(t+horizon) = log(2) in the stepping-stone equation:
|
|
170
|
+
|
|
171
|
+
W_cp = exp[(log 2 - b0 - b2*r - b3*omega - b4*rel) / b1] - 1 - RCA_cp
|
|
172
|
+
|
|
173
|
+
Values are returned only for candidate cells (RCA < threshold);
|
|
174
|
+
currently specialized cells are NaN. W <= 0 means the model already
|
|
175
|
+
predicts entry without any boost.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
mat : pd.DataFrame (R x C)
|
|
180
|
+
Value matrix at the initial period.
|
|
181
|
+
model : dict
|
|
182
|
+
Output of `calibrate_steppingstone`.
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
R x C DataFrame of efforts.
|
|
187
|
+
"""
|
|
188
|
+
b = model["entry"]
|
|
189
|
+
if abs(b["b1_r_stepping"]) < 1e-12:
|
|
190
|
+
raise ValueError(
|
|
191
|
+
"The steppingstone coefficient (b1) of the entry model is ~0; "
|
|
192
|
+
"the effort W_cp is undefined. Recalibrate the model (more "
|
|
193
|
+
"periods or a different steppingstone/horizon)."
|
|
194
|
+
)
|
|
195
|
+
threshold = model.get("threshold", 1.0)
|
|
196
|
+
R, r, omega, rel = _features(mat, threshold,
|
|
197
|
+
model.get("proximity_method", "max"))
|
|
198
|
+
|
|
199
|
+
numerator = (np.log(2.0) - b["b0_intercept"]
|
|
200
|
+
- b["b2_r_initial"] * r
|
|
201
|
+
- b["b3_relatedness"] * omega
|
|
202
|
+
- b["b4_relative_relatedness"] * rel)
|
|
203
|
+
with np.errstate(over="ignore"):
|
|
204
|
+
W = np.exp(numerator / b["b1_r_stepping"]) - 1.0 - R
|
|
205
|
+
|
|
206
|
+
W = np.where(R < threshold, W, np.nan)
|
|
207
|
+
return pd.DataFrame(W, index=mat.index, columns=mat.columns)
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def forecast_specialization(
|
|
211
|
+
mat: pd.DataFrame,
|
|
212
|
+
model: Dict,
|
|
213
|
+
) -> Dict:
|
|
214
|
+
"""
|
|
215
|
+
No-policy baseline forecast (W = 0): project RCA at t+horizon with the
|
|
216
|
+
calibrated stepping-stone model, using entry coefficients for cells
|
|
217
|
+
with RCA < threshold and exit coefficients otherwise.
|
|
218
|
+
|
|
219
|
+
Parameters
|
|
220
|
+
----------
|
|
221
|
+
mat : pd.DataFrame (R x C)
|
|
222
|
+
Value matrix at the initial period.
|
|
223
|
+
model : dict
|
|
224
|
+
Output of `calibrate_steppingstone`.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
dict with:
|
|
229
|
+
'rca' : projected RCA matrix (DataFrame)
|
|
230
|
+
'mcp' : projected binary specialization matrix (DataFrame)
|
|
231
|
+
'pci' : projected PCI (Series, z-scored; NaN for activities trimmed
|
|
232
|
+
from the projected matrix)
|
|
233
|
+
'eci' : projection ECI per location = mean projected PCI over the
|
|
234
|
+
projected portfolio (Series)
|
|
235
|
+
"""
|
|
236
|
+
from ..complexity.eci_pci import eci_pci
|
|
237
|
+
|
|
238
|
+
threshold = model.get("threshold", 1.0)
|
|
239
|
+
R, r, omega, rel = _features(mat, threshold,
|
|
240
|
+
model.get("proximity_method", "max"))
|
|
241
|
+
|
|
242
|
+
r_hat = np.empty_like(r)
|
|
243
|
+
for key, mask in (("entry", R < threshold), ("exit", R >= threshold)):
|
|
244
|
+
b = model[key]
|
|
245
|
+
pred = (b["b0_intercept"]
|
|
246
|
+
+ (b["b1_r_stepping"] + b["b2_r_initial"]) * r
|
|
247
|
+
+ b["b3_relatedness"] * omega
|
|
248
|
+
+ b["b4_relative_relatedness"] * rel)
|
|
249
|
+
r_hat[mask] = pred[mask]
|
|
250
|
+
|
|
251
|
+
R_hat = np.expm1(np.clip(r_hat, 0.0, None))
|
|
252
|
+
rca_hat = pd.DataFrame(R_hat, index=mat.index, columns=mat.columns)
|
|
253
|
+
mcp_hat = (rca_hat >= threshold).astype(float)
|
|
254
|
+
|
|
255
|
+
_, pci = eci_pci(rca_hat, use_rca=False, threshold=threshold)
|
|
256
|
+
pci = pci.rename("pci_projected")
|
|
257
|
+
|
|
258
|
+
weights = mcp_hat.mul(pci.notna().astype(float), axis=1)
|
|
259
|
+
portfolio = weights.values * np.nan_to_num(pci.values)[None, :]
|
|
260
|
+
counts = weights.sum(axis=1).values
|
|
261
|
+
eci_proj = pd.Series(
|
|
262
|
+
safe_divide(portfolio.sum(axis=1), counts),
|
|
263
|
+
index=mat.index, name="eci_projection",
|
|
264
|
+
)
|
|
265
|
+
eci_proj[counts == 0] = np.nan
|
|
266
|
+
|
|
267
|
+
return {"rca": rca_hat, "mcp": mcp_hat, "pci": pci, "eci": eci_proj}
|