econcomplex 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. econcomplex/__init__.py +220 -0
  2. econcomplex/complexity/__init__.py +23 -0
  3. econcomplex/complexity/eci_pci.py +131 -0
  4. econcomplex/complexity/eigenvector.py +115 -0
  5. econcomplex/complexity/fitness.py +130 -0
  6. econcomplex/complexity/reflections.py +173 -0
  7. econcomplex/complexity/subnational.py +82 -0
  8. econcomplex/core/__init__.py +23 -0
  9. econcomplex/core/diversity.py +125 -0
  10. econcomplex/core/preprocess.py +83 -0
  11. econcomplex/core/rca.py +161 -0
  12. econcomplex/core/utils.py +137 -0
  13. econcomplex/dynamics/__init__.py +10 -0
  14. econcomplex/dynamics/entry_exit.py +248 -0
  15. econcomplex/dynamics/growth.py +146 -0
  16. econcomplex/inequality/__init__.py +11 -0
  17. econcomplex/inequality/concentration.py +148 -0
  18. econcomplex/inequality/gini.py +164 -0
  19. econcomplex/optimization/__init__.py +46 -0
  20. econcomplex/optimization/diffusion.py +379 -0
  21. econcomplex/optimization/growth_target.py +170 -0
  22. econcomplex/optimization/portfolio.py +178 -0
  23. econcomplex/optimization/steppingstone.py +267 -0
  24. econcomplex/outlook/__init__.py +6 -0
  25. econcomplex/outlook/coi_cog.py +168 -0
  26. econcomplex/patents/__init__.py +7 -0
  27. econcomplex/patents/recombination.py +135 -0
  28. econcomplex/pipeline.py +255 -0
  29. econcomplex/productivity/__init__.py +8 -0
  30. econcomplex/productivity/prody.py +218 -0
  31. econcomplex/relatedness/__init__.py +25 -0
  32. econcomplex/relatedness/cooccurrence.py +173 -0
  33. econcomplex/relatedness/cross_space.py +142 -0
  34. econcomplex/relatedness/density.py +232 -0
  35. econcomplex/relatedness/proximity.py +214 -0
  36. econcomplex/specialization/__init__.py +17 -0
  37. econcomplex/specialization/location_quotient.py +163 -0
  38. econcomplex/specialization/similarity.py +68 -0
  39. econcomplex-1.0.0.dist-info/METADATA +223 -0
  40. econcomplex-1.0.0.dist-info/RECORD +43 -0
  41. econcomplex-1.0.0.dist-info/WHEEL +5 -0
  42. econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
  43. econcomplex-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,170 @@
1
+ """
2
+ Growth targeting for ECI Optimization.
3
+
4
+ Calibrates the growth regression (eq. 3 of Stojkoski & Hidalgo 2026) and
5
+ inverts it to find the ECI compatible with a target growth rate, which can
6
+ then be fed to `eci_optimization` as `target_eci`.
7
+
8
+ References
9
+ ----------
10
+ Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
11
+ Research Policy 55, 105454.
12
+ Hausmann et al. (2014) "The Atlas of Economic Complexity".
13
+ """
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from typing import Dict, Union
18
+
19
+
20
+ def calibrate_growth_model(
21
+ df: pd.DataFrame,
22
+ loc: str,
23
+ time: str,
24
+ gdppc: str,
25
+ eci: str,
26
+ horizon: int = 10,
27
+ ) -> Dict:
28
+ """
29
+ Calibrate the panel growth regression (eq. 3 of Stojkoski & Hidalgo
30
+ 2026) by OLS:
31
+
32
+ annualized log growth of GDPpc over `horizon`
33
+ = a1*ECI + a2*z + a3*(ECI x z) + period fixed effects + u
34
+
35
+ where z is the log of initial GDP per capita, z-score normalized
36
+ across locations within each initial period (Solow convergence term).
37
+
38
+ Parameters
39
+ ----------
40
+ df : pd.DataFrame
41
+ Long format with one row per (location, period).
42
+ loc, time, gdppc, eci : str
43
+ Column names for location, (numeric) period, GDP per capita, and
44
+ ECI.
45
+ horizon : int
46
+ Growth horizon in period units (default 10).
47
+
48
+ Returns
49
+ -------
50
+ dict with keys 'a1_eci', 'a2_z_gdppc', 'a3_interaction',
51
+ 'period_effects' (dict period -> fixed effect), 'z_stats'
52
+ (dict period -> (mean, std) of log GDPpc), 'horizon', 'n_obs'.
53
+ """
54
+ periods = sorted(df[time].unique())
55
+ initial = [t for t in periods if (t + horizon) in periods]
56
+ if not initial:
57
+ raise ValueError(
58
+ f"No initial period t has t+{horizon} in the panel "
59
+ f"(periods available: {periods})."
60
+ )
61
+
62
+ obs_y, obs_eci, obs_z, obs_t = [], [], [], []
63
+ z_stats = {}
64
+
65
+ for t in initial:
66
+ d0 = df[df[time] == t].set_index(loc)
67
+ d1 = df[df[time] == t + horizon].set_index(loc)
68
+ common = d0.index.intersection(d1.index)
69
+ d0, d1 = d0.loc[common], d1.loc[common]
70
+ ok = (d0[gdppc] > 0) & (d1[gdppc] > 0) & d0[eci].notna()
71
+ if ok.sum() < 5:
72
+ continue
73
+ lg = np.log(d0.loc[ok, gdppc].astype(float))
74
+ mu, sd = lg.mean(), lg.std()
75
+ if sd == 0:
76
+ continue
77
+ z_stats[t] = (float(mu), float(sd))
78
+ z = (lg - mu) / sd
79
+ growth = (np.log(d1.loc[ok, gdppc].astype(float)).values - lg.values) / horizon
80
+ obs_y.append(growth)
81
+ obs_eci.append(d0.loc[ok, eci].astype(float).values)
82
+ obs_z.append(z.values)
83
+ obs_t.append(np.full(ok.sum(), t))
84
+
85
+ if not obs_y:
86
+ raise ValueError("Not enough observations to fit the growth model.")
87
+
88
+ y = np.concatenate(obs_y)
89
+ e = np.concatenate(obs_eci)
90
+ z = np.concatenate(obs_z)
91
+ t_arr = np.concatenate(obs_t)
92
+ used_periods = sorted(set(t_arr))
93
+
94
+ # Design: [ECI, z, ECI*z, one dummy per initial period (no intercept)]
95
+ dummies = np.column_stack([(t_arr == t).astype(float) for t in used_periods])
96
+ X = np.column_stack([e, z, e * z, dummies])
97
+ beta, *_ = np.linalg.lstsq(X, y, rcond=None)
98
+
99
+ return {
100
+ "a1_eci": float(beta[0]),
101
+ "a2_z_gdppc": float(beta[1]),
102
+ "a3_interaction": float(beta[2]),
103
+ "period_effects": {t: float(b) for t, b in zip(used_periods, beta[3:])},
104
+ "z_stats": z_stats,
105
+ "horizon": horizon,
106
+ "n_obs": int(len(y)),
107
+ }
108
+
109
+
110
+ def _z_and_gamma(model: Dict, gdppc_now, period=None):
111
+ """z-score of log GDPpc and period effect for prediction."""
112
+ ref = period if period is not None else max(model["z_stats"])
113
+ mu, sd = model["z_stats"][ref]
114
+ z = (np.log(np.asarray(gdppc_now, dtype=float)) - mu) / sd
115
+ gamma = model["period_effects"][ref]
116
+ return z, gamma
117
+
118
+
119
+ def expected_growth(
120
+ model: Dict,
121
+ eci: Union[float, pd.Series],
122
+ gdppc_now: Union[float, pd.Series],
123
+ period=None,
124
+ ) -> Union[float, pd.Series]:
125
+ """
126
+ Annualized log growth rate implied by the calibrated model for a given
127
+ ECI and current GDP per capita (using the most recent period's fixed
128
+ effect unless `period` is given).
129
+ """
130
+ z, gamma = _z_and_gamma(model, gdppc_now, period)
131
+ e = eci.values if isinstance(eci, pd.Series) else np.asarray(eci, dtype=float)
132
+ g = model["a1_eci"] * e + model["a2_z_gdppc"] * z \
133
+ + model["a3_interaction"] * e * z + gamma
134
+ if isinstance(eci, pd.Series):
135
+ return pd.Series(g, index=eci.index, name="expected_growth")
136
+ return float(g) if np.ndim(g) == 0 else g
137
+
138
+
139
+ def eci_target_for_growth(
140
+ model: Dict,
141
+ growth_target: float,
142
+ gdppc_now: Union[float, pd.Series],
143
+ period=None,
144
+ ) -> Union[float, pd.Series]:
145
+ """
146
+ Invert the growth regression to find the ECI compatible with a target
147
+ annualized log growth rate (e.g. 0.035 for ~3.5 % per year):
148
+
149
+ ECI* = (growth - a2*z - gamma) / (a1 + a3*z)
150
+
151
+ The result can be passed to `eci_optimization` as `target_eci`.
152
+ """
153
+ import warnings
154
+
155
+ z, gamma = _z_and_gamma(model, gdppc_now, period)
156
+ denom = np.asarray(model["a1_eci"] + model["a3_interaction"] * z, dtype=float)
157
+ near_zero = np.abs(denom) < 1e-12
158
+ if np.any(near_zero):
159
+ warnings.warn(
160
+ "eci_target_for_growth: the marginal effect of ECI "
161
+ "(a1 + a3*z) is ~0 for some inputs; the target is undefined "
162
+ "there and returned as NaN.",
163
+ RuntimeWarning,
164
+ stacklevel=2,
165
+ )
166
+ denom = np.where(near_zero, np.nan, denom)
167
+ target = (growth_target - model["a2_z_gdppc"] * z - gamma) / denom
168
+ if isinstance(gdppc_now, pd.Series):
169
+ return pd.Series(target, index=gdppc_now.index, name="eci_target")
170
+ return float(target) if np.ndim(target) == 0 else target
@@ -0,0 +1,178 @@
1
+ """
2
+ ECI Optimization: 0-1 portfolio selection.
3
+
4
+ Selects the minimal-effort set of new specializations that raises a
5
+ location's projection ECI (mean PCI of its portfolio) to a target.
6
+
7
+ References
8
+ ----------
9
+ Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
10
+ Research Policy 55, 105454.
11
+ """
12
+
13
+ import warnings
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from typing import Dict, Optional, Union
18
+
19
+ from .steppingstone import effort_matrix, forecast_specialization
20
+
21
+ try:
22
+ from scipy.optimize import milp, LinearConstraint, Bounds
23
+ _HAS_MILP = True
24
+ except ImportError: # SciPy < 1.9
25
+ _HAS_MILP = False
26
+
27
+
28
+ def _select_portfolio(costs: np.ndarray, surplus: np.ndarray, deficit: float,
29
+ solver: str) -> Optional[np.ndarray]:
30
+ """
31
+ Minimize sum(costs[x]) s.t. sum(surplus[x]) >= deficit, x binary.
32
+ Returns a boolean mask over the candidates, or None if infeasible.
33
+ """
34
+ if surplus.sum() < deficit:
35
+ return None
36
+
37
+ if solver == "milp" and _HAS_MILP:
38
+ res = milp(
39
+ c=costs,
40
+ constraints=LinearConstraint(surplus[None, :], lb=deficit),
41
+ integrality=np.ones_like(costs),
42
+ bounds=Bounds(0, 1),
43
+ )
44
+ if res.status == 0 and res.x is not None:
45
+ return res.x > 0.5
46
+ return None
47
+
48
+ # Greedy fallback: cheapest cost per unit of ECI surplus first
49
+ order = np.argsort(costs / surplus)
50
+ chosen = np.zeros(len(costs), dtype=bool)
51
+ covered = 0.0
52
+ for i in order:
53
+ if covered >= deficit:
54
+ break
55
+ chosen[i] = True
56
+ covered += surplus[i]
57
+ return chosen if covered >= deficit else None
58
+
59
+
60
+ def eci_optimization(
61
+ mat: pd.DataFrame,
62
+ model: Dict,
63
+ delta_eci: float = 0.1,
64
+ target_eci: Optional[Union[float, Dict, pd.Series]] = None,
65
+ locations: Optional[list] = None,
66
+ solver: str = "milp",
67
+ ) -> pd.DataFrame:
68
+ """
69
+ ECI Optimization (Stojkoski & Hidalgo 2026): identify, per location,
70
+ the minimal-effort portfolio of new specializations that raises the
71
+ projected ECI to a target.
72
+
73
+ The pipeline: (i) project the no-policy specialization matrix and PCI
74
+ at t+horizon (`forecast_specialization`); (ii) compute the effort
75
+ W_cp required for each candidate entry (`effort_matrix`); (iii) solve
76
+ the 0-1 program
77
+
78
+ min sum_p W_cp x_cp
79
+ s.t. mean PCI of (projected portfolio + selected) >= target ECI
80
+
81
+ linearized as sum_p (PCI_p - target) x_cp >= deficit and solved
82
+ exactly with `scipy.optimize.milp` (greedy fallback on SciPy < 1.9).
83
+
84
+ Parameters
85
+ ----------
86
+ mat : pd.DataFrame (R x C)
87
+ Value matrix at the initial period.
88
+ model : dict
89
+ Output of `calibrate_steppingstone`.
90
+ delta_eci : float
91
+ Target increase over each location's projected ECI (default 0.1,
92
+ in PCI standard-deviation units). Ignored when `target_eci` given.
93
+ target_eci : float, dict, or pd.Series, optional
94
+ Absolute ECI target (single value or per location).
95
+ locations : list, optional
96
+ Subset of locations to optimize (default: all rows of `mat`).
97
+ solver : str
98
+ 'milp' (exact, default) or 'greedy'.
99
+
100
+ Returns
101
+ -------
102
+ pd.DataFrame with one row per suggested activity:
103
+ [location, activity, effort, pci_projected, eci_projected,
104
+ eci_target, eci_achieved]
105
+ Locations whose projected ECI already meets the target contribute no
106
+ rows; infeasible locations are skipped with a warning.
107
+ """
108
+ forecast = forecast_specialization(mat, model)
109
+ W = effort_matrix(mat, model)
110
+ pci = forecast["pci"]
111
+ mcp_hat = forecast["mcp"]
112
+ eci_proj = forecast["eci"]
113
+
114
+ locs = locations if locations is not None else list(mat.index)
115
+ valid_p = pci.notna()
116
+ rows = []
117
+
118
+ for c in locs:
119
+ base_eci = eci_proj.get(c, np.nan)
120
+ if np.isnan(base_eci):
121
+ warnings.warn(f"Location {c!r}: empty projected portfolio; skipped.")
122
+ continue
123
+
124
+ if target_eci is None:
125
+ target = base_eci + delta_eci
126
+ elif np.isscalar(target_eci):
127
+ target = float(target_eci)
128
+ else:
129
+ target = float(pd.Series(target_eci).get(c, np.nan))
130
+ if np.isnan(target):
131
+ warnings.warn(f"Location {c!r}: no target provided; skipped.")
132
+ continue
133
+
134
+ m_row = mcp_hat.loc[c].values.astype(bool) & valid_p.values
135
+ deficit = -float(((pci[valid_p] - target) * mcp_hat.loc[c][valid_p]).sum())
136
+ if deficit <= 0:
137
+ continue # target already met by the projected portfolio
138
+
139
+ # Candidates: not in projected portfolio, PCI above target
140
+ # (others cannot help the constraint), finite positive-side effort
141
+ cand = (~m_row) & valid_p.values & (pci.values > target) \
142
+ & np.isfinite(np.nan_to_num(W.loc[c].values, nan=np.inf))
143
+ if not cand.any():
144
+ warnings.warn(f"Location {c!r}: no feasible candidates; skipped.")
145
+ continue
146
+
147
+ costs = np.clip(W.loc[c].values[cand], 0.0, None)
148
+ surplus = (pci.values - target)[cand]
149
+ chosen = _select_portfolio(costs, surplus, deficit, solver)
150
+ if chosen is None:
151
+ warnings.warn(
152
+ f"Location {c!r}: target ECI {target:.3f} infeasible with "
153
+ "the available candidates; skipped."
154
+ )
155
+ continue
156
+
157
+ acts = mat.columns[cand][chosen]
158
+ n_new = chosen.sum()
159
+ achieved = (
160
+ (pci[valid_p] * mcp_hat.loc[c][valid_p]).sum()
161
+ + pci[acts].sum()
162
+ ) / (mcp_hat.loc[c][valid_p].sum() + n_new)
163
+
164
+ for a in acts:
165
+ rows.append({
166
+ "location": c,
167
+ "activity": a,
168
+ "effort": float(W.loc[c, a]),
169
+ "pci_projected": float(pci[a]),
170
+ "eci_projected": float(base_eci),
171
+ "eci_target": target,
172
+ "eci_achieved": float(achieved),
173
+ })
174
+
175
+ return pd.DataFrame(
176
+ rows, columns=["location", "activity", "effort", "pci_projected",
177
+ "eci_projected", "eci_target", "eci_achieved"],
178
+ )
@@ -0,0 +1,267 @@
1
+ """
2
+ Stepping-stone forward model for ECI Optimization.
3
+
4
+ Calibrates the forecast model of specialization (eq. 1 of the paper),
5
+ computes the closed-form effort matrix W_cp (eq. 2), and projects the
6
+ future specialization matrix (no-policy baseline, W = 0).
7
+
8
+ References
9
+ ----------
10
+ Stojkoski & Hidalgo (2026) "Optimizing economic complexity",
11
+ Research Policy 55, 105454.
12
+ Pinheiro et al. (2021) for relative relatedness.
13
+ """
14
+
15
+ import numpy as np
16
+ import pandas as pd
17
+ from typing import Dict, List
18
+
19
+ from ..core.utils import pivot_to_matrix, safe_divide
20
+ from ..core.rca import rca as compute_rca
21
+ from ..relatedness.density import relatedness_density
22
+
23
+ COEF_NAMES = ["b1_r_stepping", "b2_r_initial", "b3_relatedness",
24
+ "b4_relative_relatedness", "b0_intercept"]
25
+
26
+
27
+ def _features(mat: pd.DataFrame, threshold: float, proximity_method: str):
28
+ """RCA, log(1+RCA), relatedness (0-1) and relative relatedness for `mat`."""
29
+ R = compute_rca(mat.values)
30
+ r = np.log1p(R)
31
+ dens = relatedness_density(mat, threshold=threshold,
32
+ proximity_method=proximity_method)
33
+ omega = dens.values / 100.0 # 0-1 scale, as in the paper
34
+ # Relative relatedness (Pinheiro et al. 2021, eq. 7): z-transform
35
+ # against the statistics of the location's option set (RCA < threshold).
36
+ # The same standardization is applied to specialized cells so the exit
37
+ # model shares the scale of the entry model.
38
+ opt = R < threshold
39
+ n_opt = opt.sum(axis=1, keepdims=True)
40
+ mu = safe_divide(np.where(opt, omega, 0.0).sum(axis=1, keepdims=True), n_opt)
41
+ var = safe_divide(
42
+ np.where(opt, (omega - mu) ** 2, 0.0).sum(axis=1, keepdims=True), n_opt
43
+ )
44
+ rel = safe_divide(omega - mu, np.sqrt(var))
45
+ return R, r, omega, rel
46
+
47
+
48
+ def calibrate_steppingstone(
49
+ df: pd.DataFrame,
50
+ loc: str,
51
+ act: str,
52
+ val: str,
53
+ time: str,
54
+ horizon: int = 10,
55
+ steppingstone: int = 5,
56
+ threshold: float = 1.0,
57
+ proximity_method: str = "max",
58
+ ) -> Dict:
59
+ """
60
+ Calibrate the stepping-stone forecast model (eq. 1 of Stojkoski &
61
+ Hidalgo 2026) on a long-format panel.
62
+
63
+ For every initial period t with t+steppingstone and t+horizon also
64
+ present in the panel, fits by OLS:
65
+
66
+ r_cp(t+horizon) = b1*r_cp(t+steppingstone) + b2*r_cp(t)
67
+ + b3*relatedness_cp(t) + b4*relative_relatedness_cp(t)
68
+ + b0
69
+
70
+ where r = log(1 + RCA). Entry models use only cells with RCA(t) < 1;
71
+ exit models use only cells with RCA(t) >= 1. Coefficients are averaged
72
+ across all available initial periods.
73
+
74
+ Parameters
75
+ ----------
76
+ df : pd.DataFrame
77
+ Long-format panel.
78
+ loc, act, val, time : str
79
+ Column names for location, activity, value, and (numeric) period.
80
+ horizon : int
81
+ Forecast horizon Delta-t in period units (default 10).
82
+ steppingstone : int
83
+ Steppingstone tau < horizon (default 5).
84
+ threshold : float
85
+ RCA binarization threshold for the relatedness features.
86
+ proximity_method : str
87
+ Proximity normalization ('max', 'sqrt', 'min').
88
+
89
+ Returns
90
+ -------
91
+ dict with keys:
92
+ 'entry', 'exit' : coefficient dicts (b1, b2, b3, b4, b0)
93
+ 'horizon', 'steppingstone', 'initial_periods',
94
+ 'n_obs_entry', 'n_obs_exit'
95
+ """
96
+ if steppingstone >= horizon:
97
+ raise ValueError("steppingstone must be smaller than horizon.")
98
+
99
+ periods = sorted(df[time].unique())
100
+ initial = [t for t in periods
101
+ if (t + steppingstone) in periods and (t + horizon) in periods]
102
+ if not initial:
103
+ raise ValueError(
104
+ f"No initial period t has both t+{steppingstone} and "
105
+ f"t+{horizon} in the panel (periods available: {periods})."
106
+ )
107
+
108
+ coefs_entry: List[np.ndarray] = []
109
+ coefs_exit: List[np.ndarray] = []
110
+ n_entry = n_exit = 0
111
+
112
+ for t in initial:
113
+ mats = {}
114
+ for y in (t, t + steppingstone, t + horizon):
115
+ mats[y] = pivot_to_matrix(df[df[time] == y], loc, act, val)
116
+ rows = mats[t].index
117
+ cols = mats[t].columns
118
+ for m in mats.values():
119
+ rows = rows.union(m.index)
120
+ cols = cols.union(m.columns)
121
+ for y in mats:
122
+ mats[y] = mats[y].reindex(index=rows, columns=cols, fill_value=0.0)
123
+
124
+ R_t, r_t, omega, rel = _features(mats[t], threshold, proximity_method)
125
+ r_tau = np.log1p(compute_rca(mats[t + steppingstone].values))
126
+ r_T = np.log1p(compute_rca(mats[t + horizon].values))
127
+
128
+ X = np.column_stack([
129
+ r_tau.ravel(), r_t.ravel(), omega.ravel(), rel.ravel(),
130
+ np.ones(r_t.size),
131
+ ])
132
+ y_vec = r_T.ravel()
133
+ entry_mask = (R_t < threshold).ravel()
134
+
135
+ for mask, store in ((entry_mask, coefs_entry), (~entry_mask, coefs_exit)):
136
+ if mask.sum() > X.shape[1]:
137
+ beta, *_ = np.linalg.lstsq(X[mask], y_vec[mask], rcond=None)
138
+ store.append(beta)
139
+ n_entry += int(entry_mask.sum())
140
+ n_exit += int((~entry_mask).sum())
141
+
142
+ if not coefs_entry or not coefs_exit:
143
+ raise ValueError("Not enough observations to fit entry/exit models.")
144
+
145
+ entry = dict(zip(COEF_NAMES, np.mean(coefs_entry, axis=0)))
146
+ exit_ = dict(zip(COEF_NAMES, np.mean(coefs_exit, axis=0)))
147
+ return {
148
+ "entry": entry,
149
+ "exit": exit_,
150
+ "horizon": horizon,
151
+ "steppingstone": steppingstone,
152
+ "threshold": threshold,
153
+ "proximity_method": proximity_method,
154
+ "initial_periods": initial,
155
+ "n_obs_entry": n_entry,
156
+ "n_obs_exit": n_exit,
157
+ }
158
+
159
+
160
+ def effort_matrix(
161
+ mat: pd.DataFrame,
162
+ model: Dict,
163
+ ) -> pd.DataFrame:
164
+ """
165
+ Effort W_cp: the added RCA an economy must reach by the steppingstone
166
+ period for the calibrated model to predict entry (RCA = 1) at the
167
+ horizon (eq. 2 of Stojkoski & Hidalgo 2026, solved in closed form).
168
+
169
+ Setting r_cp(t+horizon) = log(2) in the stepping-stone equation:
170
+
171
+ W_cp = exp[(log 2 - b0 - b2*r - b3*omega - b4*rel) / b1] - 1 - RCA_cp
172
+
173
+ Values are returned only for candidate cells (RCA < threshold);
174
+ currently specialized cells are NaN. W <= 0 means the model already
175
+ predicts entry without any boost.
176
+
177
+ Parameters
178
+ ----------
179
+ mat : pd.DataFrame (R x C)
180
+ Value matrix at the initial period.
181
+ model : dict
182
+ Output of `calibrate_steppingstone`.
183
+
184
+ Returns
185
+ -------
186
+ R x C DataFrame of efforts.
187
+ """
188
+ b = model["entry"]
189
+ if abs(b["b1_r_stepping"]) < 1e-12:
190
+ raise ValueError(
191
+ "The steppingstone coefficient (b1) of the entry model is ~0; "
192
+ "the effort W_cp is undefined. Recalibrate the model (more "
193
+ "periods or a different steppingstone/horizon)."
194
+ )
195
+ threshold = model.get("threshold", 1.0)
196
+ R, r, omega, rel = _features(mat, threshold,
197
+ model.get("proximity_method", "max"))
198
+
199
+ numerator = (np.log(2.0) - b["b0_intercept"]
200
+ - b["b2_r_initial"] * r
201
+ - b["b3_relatedness"] * omega
202
+ - b["b4_relative_relatedness"] * rel)
203
+ with np.errstate(over="ignore"):
204
+ W = np.exp(numerator / b["b1_r_stepping"]) - 1.0 - R
205
+
206
+ W = np.where(R < threshold, W, np.nan)
207
+ return pd.DataFrame(W, index=mat.index, columns=mat.columns)
208
+
209
+
210
+ def forecast_specialization(
211
+ mat: pd.DataFrame,
212
+ model: Dict,
213
+ ) -> Dict:
214
+ """
215
+ No-policy baseline forecast (W = 0): project RCA at t+horizon with the
216
+ calibrated stepping-stone model, using entry coefficients for cells
217
+ with RCA < threshold and exit coefficients otherwise.
218
+
219
+ Parameters
220
+ ----------
221
+ mat : pd.DataFrame (R x C)
222
+ Value matrix at the initial period.
223
+ model : dict
224
+ Output of `calibrate_steppingstone`.
225
+
226
+ Returns
227
+ -------
228
+ dict with:
229
+ 'rca' : projected RCA matrix (DataFrame)
230
+ 'mcp' : projected binary specialization matrix (DataFrame)
231
+ 'pci' : projected PCI (Series, z-scored; NaN for activities trimmed
232
+ from the projected matrix)
233
+ 'eci' : projection ECI per location = mean projected PCI over the
234
+ projected portfolio (Series)
235
+ """
236
+ from ..complexity.eci_pci import eci_pci
237
+
238
+ threshold = model.get("threshold", 1.0)
239
+ R, r, omega, rel = _features(mat, threshold,
240
+ model.get("proximity_method", "max"))
241
+
242
+ r_hat = np.empty_like(r)
243
+ for key, mask in (("entry", R < threshold), ("exit", R >= threshold)):
244
+ b = model[key]
245
+ pred = (b["b0_intercept"]
246
+ + (b["b1_r_stepping"] + b["b2_r_initial"]) * r
247
+ + b["b3_relatedness"] * omega
248
+ + b["b4_relative_relatedness"] * rel)
249
+ r_hat[mask] = pred[mask]
250
+
251
+ R_hat = np.expm1(np.clip(r_hat, 0.0, None))
252
+ rca_hat = pd.DataFrame(R_hat, index=mat.index, columns=mat.columns)
253
+ mcp_hat = (rca_hat >= threshold).astype(float)
254
+
255
+ _, pci = eci_pci(rca_hat, use_rca=False, threshold=threshold)
256
+ pci = pci.rename("pci_projected")
257
+
258
+ weights = mcp_hat.mul(pci.notna().astype(float), axis=1)
259
+ portfolio = weights.values * np.nan_to_num(pci.values)[None, :]
260
+ counts = weights.sum(axis=1).values
261
+ eci_proj = pd.Series(
262
+ safe_divide(portfolio.sum(axis=1), counts),
263
+ index=mat.index, name="eci_projection",
264
+ )
265
+ eci_proj[counts == 0] = np.nan
266
+
267
+ return {"rca": rca_hat, "mcp": mcp_hat, "pci": pci, "eci": eci_proj}
@@ -0,0 +1,6 @@
1
+ from .coi_cog import complexity_outlook_index, complexity_outlook_gain
2
+
3
+ __all__ = [
4
+ "complexity_outlook_index",
5
+ "complexity_outlook_gain",
6
+ ]