econcomplex 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. econcomplex/__init__.py +220 -0
  2. econcomplex/complexity/__init__.py +23 -0
  3. econcomplex/complexity/eci_pci.py +131 -0
  4. econcomplex/complexity/eigenvector.py +115 -0
  5. econcomplex/complexity/fitness.py +130 -0
  6. econcomplex/complexity/reflections.py +173 -0
  7. econcomplex/complexity/subnational.py +82 -0
  8. econcomplex/core/__init__.py +23 -0
  9. econcomplex/core/diversity.py +125 -0
  10. econcomplex/core/preprocess.py +83 -0
  11. econcomplex/core/rca.py +161 -0
  12. econcomplex/core/utils.py +137 -0
  13. econcomplex/dynamics/__init__.py +10 -0
  14. econcomplex/dynamics/entry_exit.py +248 -0
  15. econcomplex/dynamics/growth.py +146 -0
  16. econcomplex/inequality/__init__.py +11 -0
  17. econcomplex/inequality/concentration.py +148 -0
  18. econcomplex/inequality/gini.py +164 -0
  19. econcomplex/optimization/__init__.py +46 -0
  20. econcomplex/optimization/diffusion.py +379 -0
  21. econcomplex/optimization/growth_target.py +170 -0
  22. econcomplex/optimization/portfolio.py +178 -0
  23. econcomplex/optimization/steppingstone.py +267 -0
  24. econcomplex/outlook/__init__.py +6 -0
  25. econcomplex/outlook/coi_cog.py +168 -0
  26. econcomplex/patents/__init__.py +7 -0
  27. econcomplex/patents/recombination.py +135 -0
  28. econcomplex/pipeline.py +255 -0
  29. econcomplex/productivity/__init__.py +8 -0
  30. econcomplex/productivity/prody.py +218 -0
  31. econcomplex/relatedness/__init__.py +25 -0
  32. econcomplex/relatedness/cooccurrence.py +173 -0
  33. econcomplex/relatedness/cross_space.py +142 -0
  34. econcomplex/relatedness/density.py +232 -0
  35. econcomplex/relatedness/proximity.py +214 -0
  36. econcomplex/specialization/__init__.py +17 -0
  37. econcomplex/specialization/location_quotient.py +163 -0
  38. econcomplex/specialization/similarity.py +68 -0
  39. econcomplex-1.0.0.dist-info/METADATA +223 -0
  40. econcomplex-1.0.0.dist-info/RECORD +43 -0
  41. econcomplex-1.0.0.dist-info/WHEEL +5 -0
  42. econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
  43. econcomplex-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,232 @@
1
+ """
2
+ Relatedness density, distance, and related indicators.
3
+
4
+ References
5
+ ----------
6
+ Hidalgo et al. (2007); Hausmann & Klinger (2007);
7
+ Balland et al. (2019).
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from typing import Optional, Union
13
+
14
+ from ..core.utils import validate_matrix, safe_divide, binarize
15
+ from ..core.rca import rca as compute_rca
16
+
17
+
18
+ def _get_mcp_and_phi(
19
+ mat,
20
+ phi,
21
+ use_rca: bool,
22
+ threshold: float,
23
+ proximity_method: str,
24
+ ):
25
+ """Helper: returns binary M and proximity matrix as ndarrays."""
26
+ is_df = isinstance(mat, pd.DataFrame)
27
+
28
+ arr = validate_matrix(mat)
29
+
30
+ if use_rca:
31
+ m = binarize(compute_rca(arr), threshold)
32
+ else:
33
+ m = binarize(arr, threshold)
34
+
35
+ if phi is None:
36
+ from .proximity import proximity as _prox
37
+ phi_dict = _prox(
38
+ m, use_rca=False, threshold=0.5,
39
+ method=proximity_method, compute="product",
40
+ )
41
+ phi_arr = (
42
+ phi_dict["product"].values
43
+ if isinstance(phi_dict["product"], pd.DataFrame)
44
+ else phi_dict["product"]
45
+ )
46
+ else:
47
+ phi_arr = phi.values if isinstance(phi, pd.DataFrame) else np.array(phi, dtype=float)
48
+
49
+ return m, phi_arr, is_df
50
+
51
+
52
+ def relatedness_density(
53
+ mat: Union[np.ndarray, pd.DataFrame],
54
+ phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
55
+ use_rca: bool = True,
56
+ threshold: float = 1.0,
57
+ proximity_method: str = "max",
58
+ ) -> Union[pd.DataFrame, np.ndarray]:
59
+ """
60
+ Relatedness density for each (region, activity) pair.
61
+
62
+ density_{rc} = (M * Phi)_{rc} / rowSums(Phi)_c * 100
63
+
64
+ Fraction of activities related to c that region r already has,
65
+ expressed as a percentage.
66
+
67
+ Parameters
68
+ ----------
69
+ mat : array-like (R x C)
70
+ Value matrix.
71
+ phi : array-like (C x C), optional
72
+ Pre-computed product proximity matrix. Computed internally if None.
73
+ use_rca : bool
74
+ Compute RCA before binarizing.
75
+ threshold : float
76
+ Binarization threshold.
77
+ proximity_method : str
78
+ Normalization method for proximity ('max', 'sqrt', 'min').
79
+
80
+ Returns
81
+ -------
82
+ R x C relatedness density matrix (values 0–100).
83
+ """
84
+ is_df = isinstance(mat, pd.DataFrame)
85
+ row_index = mat.index if is_df else None
86
+ col_index = mat.columns if is_df else None
87
+
88
+ m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
89
+
90
+ col_sums_phi = phi_arr.sum(axis=0, keepdims=True) # 1 x C
91
+ numerator = m @ phi_arr # R x C
92
+ density = safe_divide(numerator, col_sums_phi) * 100
93
+
94
+ if is_df:
95
+ return pd.DataFrame(density, index=row_index, columns=col_index)
96
+ return density
97
+
98
+
99
+ def distance(
100
+ mat: Union[np.ndarray, pd.DataFrame],
101
+ phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
102
+ use_rca: bool = True,
103
+ threshold: float = 1.0,
104
+ proximity_method: str = "max",
105
+ ) -> Union[pd.DataFrame, np.ndarray]:
106
+ """
107
+ Distance (1 - density/100).
108
+
109
+ Weighted fraction of related activities that region r does NOT have.
110
+
111
+ Returns
112
+ -------
113
+ R x C distance matrix (values 0–1).
114
+ """
115
+ dens = relatedness_density(mat, phi=phi, use_rca=use_rca,
116
+ threshold=threshold, proximity_method=proximity_method)
117
+ if isinstance(dens, pd.DataFrame):
118
+ return 1 - dens / 100
119
+ return 1 - dens / 100
120
+
121
+
122
+ def relatedness_density_internal(
123
+ mat: Union[np.ndarray, pd.DataFrame],
124
+ phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
125
+ use_rca: bool = True,
126
+ threshold: float = 1.0,
127
+ proximity_method: str = "max",
128
+ ) -> Union[pd.DataFrame, np.ndarray]:
129
+ """
130
+ Internal relatedness density: density values for activities
131
+ the region ALREADY has (M_{rc} = 1).
132
+ Other cells are NaN.
133
+ """
134
+ is_df = isinstance(mat, pd.DataFrame)
135
+ row_index = mat.index if is_df else None
136
+ col_index = mat.columns if is_df else None
137
+
138
+ m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
139
+
140
+ col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
141
+ numerator = m @ phi_arr
142
+ density = safe_divide(numerator, col_sums_phi) * 100
143
+
144
+ mask = m == 0
145
+ density_internal = density.copy()
146
+ density_internal[mask] = np.nan
147
+
148
+ if is_df:
149
+ return pd.DataFrame(density_internal, index=row_index, columns=col_index)
150
+ return density_internal
151
+
152
+
153
+ def relatedness_density_external(
154
+ mat: Union[np.ndarray, pd.DataFrame],
155
+ phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
156
+ use_rca: bool = True,
157
+ threshold: float = 1.0,
158
+ proximity_method: str = "max",
159
+ ) -> Union[pd.DataFrame, np.ndarray]:
160
+ """
161
+ External relatedness density: density values for activities
162
+ the region does NOT yet have (M_{rc} = 0).
163
+ Other cells are NaN.
164
+ """
165
+ is_df = isinstance(mat, pd.DataFrame)
166
+ row_index = mat.index if is_df else None
167
+ col_index = mat.columns if is_df else None
168
+
169
+ m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
170
+
171
+ col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
172
+ numerator = m @ phi_arr
173
+ density = safe_divide(numerator, col_sums_phi) * 100
174
+
175
+ mask = m == 1
176
+ density_external = density.copy()
177
+ density_external[mask] = np.nan
178
+
179
+ if is_df:
180
+ return pd.DataFrame(density_external, index=row_index, columns=col_index)
181
+ return density_external
182
+
183
+
184
+ def relative_relatedness(
185
+ mat: Union[np.ndarray, pd.DataFrame],
186
+ phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
187
+ use_rca: bool = True,
188
+ threshold: float = 1.0,
189
+ proximity_method: str = "max",
190
+ ) -> Union[pd.DataFrame, np.ndarray]:
191
+ """
192
+ Relative relatedness (Pinheiro et al. 2021, eq. 7): z-transform of the
193
+ relatedness density against the statistics of the region's option set
194
+ (activities it does NOT currently hold, M_{rc} = 0).
195
+
196
+ relative_density_{rc} = (density_{rc} - mean_non_held_r) / std_non_held_r
197
+ for cells where M_{rc} = 0; NaN otherwise.
198
+
199
+ References
200
+ ----------
201
+ Pinheiro, Hartmann, Boschma & Hidalgo (2022) "The time and frequency
202
+ of unrelated diversification", Research Policy 51, 104323.
203
+
204
+ Returns
205
+ -------
206
+ R x C standardized density matrix.
207
+ """
208
+ is_df = isinstance(mat, pd.DataFrame)
209
+ row_index = mat.index if is_df else None
210
+ col_index = mat.columns if is_df else None
211
+
212
+ m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
213
+
214
+ col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
215
+ numerator = m @ phi_arr
216
+ density = safe_divide(numerator, col_sums_phi) * 100
217
+
218
+ result = np.full_like(density, np.nan)
219
+ for i in range(m.shape[0]):
220
+ non_held = m[i] == 0
221
+ vals = density[i, non_held]
222
+ std = vals.std()
223
+ if std > 0:
224
+ result[i, non_held] = (vals - vals.mean()) / std
225
+
226
+ if is_df:
227
+ return pd.DataFrame(result, index=row_index, columns=col_index)
228
+ return result
229
+
230
+
231
+ # Short alias matching the documented API
232
+ density = relatedness_density
@@ -0,0 +1,214 @@
1
+ """
2
+ Product Space proximity matrices.
3
+
4
+ References
5
+ ----------
6
+ Hidalgo et al. (2007) "The Product Space Conditions the Development of Nations".
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from typing import Literal, Union
12
+
13
+ from ..core.utils import validate_matrix, safe_divide, binarize
14
+ from ..core.rca import rca as compute_rca
15
+
16
+
17
+ def proximity(
18
+ mat: Union[np.ndarray, pd.DataFrame],
19
+ use_rca: bool = True,
20
+ threshold: float = 1.0,
21
+ method: Literal["max", "sqrt", "min"] = "max",
22
+ compute: Literal["product", "location", "both"] = "both",
23
+ continuous: bool = False,
24
+ continuous_method: Literal["correlation", "cosine"] = "correlation",
25
+ ) -> dict:
26
+ """
27
+ Compute product and/or location proximity matrices.
28
+
29
+ Product proximity (phi_pp'):
30
+ numerator = M^T * M (co-export / co-presence count)
31
+ "max" norm = max(U_p, U_p')
32
+ "sqrt" norm = sqrt(U_p * U_p') (geometric mean, cosine-like)
33
+ "min" norm = min(U_p, U_p') (conditional probability)
34
+
35
+ Location proximity (phi_rr'):
36
+ Same structure but on M * M^T, normalized by diversity.
37
+
38
+ Parameters
39
+ ----------
40
+ mat : array-like (R x C)
41
+ Value matrix.
42
+ use_rca : bool
43
+ Compute RCA before binarizing.
44
+ threshold : float
45
+ Binarization threshold.
46
+ method : str
47
+ Normalization method: 'max', 'sqrt', or 'min'.
48
+ compute : str
49
+ Which side to compute: 'product', 'location', or 'both'.
50
+ continuous : bool
51
+ If True, skip binarization and compute the proximity on the
52
+ continuous RCA values (see `continuous_proximity`); `method`
53
+ and `threshold` are ignored.
54
+ continuous_method : str
55
+ Similarity used when `continuous=True`: 'correlation'
56
+ (Pearson, rescaled to [0, 1]) or 'cosine'.
57
+
58
+ Returns
59
+ -------
60
+ dict with keys 'product' and/or 'location' as DataFrames (or ndarrays).
61
+ """
62
+ is_df = isinstance(mat, pd.DataFrame)
63
+ row_index = mat.index if is_df else None
64
+ col_index = mat.columns if is_df else None
65
+
66
+ arr = validate_matrix(mat)
67
+
68
+ if continuous:
69
+ rca_arr = compute_rca(arr) if use_rca else arr
70
+ results = {}
71
+ if compute in ("product", "both"):
72
+ phi_p = continuous_proximity(rca_arr, method=continuous_method)
73
+ if is_df:
74
+ results["product"] = pd.DataFrame(phi_p, index=col_index, columns=col_index)
75
+ else:
76
+ results["product"] = phi_p
77
+ if compute in ("location", "both"):
78
+ phi_l = continuous_proximity(rca_arr.T, method=continuous_method)
79
+ if is_df:
80
+ results["location"] = pd.DataFrame(phi_l, index=row_index, columns=row_index)
81
+ else:
82
+ results["location"] = phi_l
83
+ return results
84
+
85
+ if use_rca:
86
+ m = binarize(compute_rca(arr), threshold)
87
+ else:
88
+ m = binarize(arr, threshold)
89
+
90
+ results = {}
91
+
92
+ def _normalize(cooc: np.ndarray, counts: np.ndarray) -> np.ndarray:
93
+ """Normalize co-occurrence matrix by row/col counts."""
94
+ if method == "max":
95
+ denom = np.maximum(counts[:, None], counts[None, :])
96
+ elif method == "sqrt":
97
+ denom = np.sqrt(counts[:, None] * counts[None, :])
98
+ elif method == "min":
99
+ denom = np.minimum(counts[:, None], counts[None, :])
100
+ else:
101
+ raise ValueError("method must be 'max', 'sqrt', or 'min'.")
102
+ return safe_divide(cooc, denom)
103
+
104
+ if compute in ("product", "both"):
105
+ ubiq = m.sum(axis=0) # C
106
+ cooc_p = m.T @ m # C x C
107
+ phi_p = _normalize(cooc_p, ubiq)
108
+ np.fill_diagonal(phi_p, 0.0)
109
+ if is_df:
110
+ results["product"] = pd.DataFrame(phi_p, index=col_index, columns=col_index)
111
+ else:
112
+ results["product"] = phi_p
113
+
114
+ if compute in ("location", "both"):
115
+ div = m.sum(axis=1) # R
116
+ cooc_l = m @ m.T # R x R
117
+ phi_l = _normalize(cooc_l, div)
118
+ np.fill_diagonal(phi_l, 0.0)
119
+ if is_df:
120
+ results["location"] = pd.DataFrame(phi_l, index=row_index, columns=row_index)
121
+ else:
122
+ results["location"] = phi_l
123
+
124
+ return results
125
+
126
+
127
+ def continuous_proximity(
128
+ rca_mat: Union[np.ndarray, pd.DataFrame],
129
+ method: Literal["correlation", "cosine"] = "correlation",
130
+ ) -> Union[pd.DataFrame, np.ndarray]:
131
+ """
132
+ Continuous product proximity from a (continuous) RCA matrix.
133
+
134
+ method='correlation' (default):
135
+ phi_{pp'} = (1 + corr(RCA_p, RCA_{p'})) / 2
136
+ (Pearson correlation rescaled from [-1, 1] to [0, 1])
137
+ method='cosine':
138
+ phi_{pp'} = (RCA_p . RCA_{p'}) / (||RCA_p|| * ||RCA_{p'}||)
139
+
140
+ Parameters
141
+ ----------
142
+ rca_mat : array-like (R x C)
143
+ Pre-computed (continuous) RCA matrix.
144
+ method : str
145
+ 'correlation' or 'cosine'.
146
+
147
+ Returns
148
+ -------
149
+ C x C proximity matrix with zero diagonal.
150
+ """
151
+ is_df = isinstance(rca_mat, pd.DataFrame)
152
+ col_index = rca_mat.columns if is_df else None
153
+
154
+ arr = validate_matrix(rca_mat)
155
+
156
+ if method == "correlation":
157
+ corr = np.corrcoef(arr.T) # C x C
158
+ phi = (1 + corr) / 2.0
159
+ elif method == "cosine":
160
+ norms = np.linalg.norm(arr, axis=0) # C
161
+ phi = safe_divide(arr.T @ arr, norms[:, None] * norms[None, :])
162
+ else:
163
+ raise ValueError("method must be 'correlation' or 'cosine'.")
164
+ np.fill_diagonal(phi, 0.0)
165
+
166
+ if is_df:
167
+ return pd.DataFrame(phi, index=col_index, columns=col_index)
168
+ return phi
169
+
170
+
171
+ def _continuous_on_values(mat, use_rca, method):
172
+ """Shortcut: RCA (optional) + continuous_proximity with given method."""
173
+ is_df = isinstance(mat, pd.DataFrame)
174
+ col_index = mat.columns if is_df else None
175
+ arr = validate_matrix(mat)
176
+ rca_arr = compute_rca(arr) if use_rca else arr
177
+ phi = continuous_proximity(rca_arr, method=method)
178
+ if is_df:
179
+ return pd.DataFrame(phi, index=col_index, columns=col_index)
180
+ return phi
181
+
182
+
183
+ def cosine_proximity(
184
+ mat: Union[np.ndarray, pd.DataFrame],
185
+ use_rca: bool = True,
186
+ ) -> Union[pd.DataFrame, np.ndarray]:
187
+ """
188
+ Shortcut for `continuous_proximity(rca(mat), method='cosine')`:
189
+ cosine similarity between the RCA vectors of each pair of activities.
190
+
191
+ Returns a C x C proximity matrix with zero diagonal.
192
+ """
193
+ return _continuous_on_values(mat, use_rca, "cosine")
194
+
195
+
196
+ def correlation_proximity(
197
+ mat: Union[np.ndarray, pd.DataFrame],
198
+ use_rca: bool = True,
199
+ ) -> Union[pd.DataFrame, np.ndarray]:
200
+ """
201
+ Shortcut for `continuous_proximity(rca(mat), method='correlation')`:
202
+ Pearson correlation between RCA vectors, rescaled to [0, 1].
203
+
204
+ Returns a C x C proximity matrix with zero diagonal.
205
+ """
206
+ return _continuous_on_values(mat, use_rca, "correlation")
207
+
208
+
209
+ # Documented-API alias: relatedness(mat, phi) == relatedness_density
210
+ from .density import relatedness_density as _relatedness_density # noqa: E402
211
+
212
+ relatedness = _relatedness_density
213
+
214
+
@@ -0,0 +1,17 @@
1
+ from .location_quotient import (
2
+ location_quotient,
3
+ location_quotient_avg,
4
+ hachman_index,
5
+ specialization_coefficient,
6
+ krugman_index,
7
+ )
8
+ from .similarity import export_similarity
9
+
10
+ __all__ = [
11
+ "location_quotient",
12
+ "location_quotient_avg",
13
+ "hachman_index",
14
+ "specialization_coefficient",
15
+ "krugman_index",
16
+ "export_similarity",
17
+ ]
@@ -0,0 +1,163 @@
1
+ """
2
+ Location quotient (LQ) variants and specialization indices.
3
+
4
+ References
5
+ ----------
6
+ Hoover & Giarratani (1985); Hachman (various); Balland & Rigby (2017).
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from typing import Union
12
+
13
+ from ..core.utils import validate_matrix, safe_divide
14
+ from ..core.rca import rca as compute_rca
15
+
16
+
17
+ def location_quotient(
18
+ mat: Union[np.ndarray, pd.DataFrame],
19
+ binary: bool = False,
20
+ threshold: float = 1.0,
21
+ ) -> Union[pd.DataFrame, np.ndarray]:
22
+ """
23
+ Location Quotient (identical to RCA / Balassa Index).
24
+
25
+ LQ_{rc} = (x_{rc}/X_r) / (X_c/X_total)
26
+
27
+ Alias of `core.rca.rca`.
28
+ """
29
+ return compute_rca(mat, binary=binary, threshold=threshold)
30
+
31
+
32
+ def location_quotient_avg(
33
+ mat: Union[np.ndarray, pd.DataFrame],
34
+ ) -> Union[pd.Series, np.ndarray]:
35
+ """
36
+ Weighted average LQ per region (Coefficient of Specialization, Hoover 1985).
37
+
38
+ avg_LQ_r = sum_c (LQ_{rc} * s_{rc})
39
+ where s_{rc} = x_{rc} / X_r (share of activity c in region r)
40
+
41
+ A value > 1 indicates region is more specialized than the nation average.
42
+
43
+ Returns
44
+ -------
45
+ pd.Series indexed by region.
46
+ """
47
+ is_df = isinstance(mat, pd.DataFrame)
48
+ row_index = mat.index if is_df else None
49
+
50
+ arr = validate_matrix(mat)
51
+ lq = compute_rca(arr)
52
+ if isinstance(lq, pd.DataFrame):
53
+ lq = lq.values
54
+
55
+ row_sums = arr.sum(axis=1, keepdims=True)
56
+ shares = safe_divide(arr, row_sums) # s_{rc}
57
+
58
+ result = (lq * shares).sum(axis=1)
59
+
60
+ if is_df:
61
+ return pd.Series(result, index=row_index, name="lq_avg")
62
+ return result
63
+
64
+
65
+ def hachman_index(
66
+ mat: Union[np.ndarray, pd.DataFrame],
67
+ ) -> Union[pd.Series, np.ndarray]:
68
+ """
69
+ Hachman Index (structural similarity to national economy).
70
+
71
+ H_r = 1 / avg_LQ_r
72
+
73
+ Ranges 0 to 1; value of 1 means the regional economy perfectly
74
+ mirrors the national structure.
75
+
76
+ Returns
77
+ -------
78
+ pd.Series indexed by region (clipped to [0, 1]).
79
+ """
80
+ is_df = isinstance(mat, pd.DataFrame)
81
+ row_index = mat.index if is_df else None
82
+
83
+ avg_lq = location_quotient_avg(mat)
84
+ if isinstance(avg_lq, pd.Series):
85
+ avg_lq_arr = avg_lq.values
86
+ else:
87
+ avg_lq_arr = avg_lq
88
+
89
+ result = np.clip(safe_divide(1.0, avg_lq_arr), 0, 1)
90
+
91
+ if is_df:
92
+ return pd.Series(result, index=row_index, name="hachman")
93
+ return result
94
+
95
+
96
+ def specialization_coefficient(
97
+ mat: Union[np.ndarray, pd.DataFrame],
98
+ ) -> Union[pd.Series, np.ndarray]:
99
+ """
100
+ Hoover Coefficient of Specialization.
101
+
102
+ spec_r = (1/2) * sum_c |s_{rc} - s_c|
103
+
104
+ where s_{rc} = share of activity c in region r,
105
+ s_c = national share of activity c.
106
+
107
+ Equivalent to half the Krugman Index.
108
+ Ranges [0, 1]: 0 = region mirrors national structure.
109
+
110
+ Returns
111
+ -------
112
+ pd.Series indexed by region.
113
+ """
114
+ is_df = isinstance(mat, pd.DataFrame)
115
+ row_index = mat.index if is_df else None
116
+
117
+ arr = validate_matrix(mat)
118
+ total = arr.sum()
119
+
120
+ row_sums = arr.sum(axis=1, keepdims=True)
121
+ col_sums = arr.sum(axis=0, keepdims=True)
122
+
123
+ s_rc = safe_divide(arr, row_sums)
124
+ s_c = col_sums / total
125
+
126
+ result = 0.5 * np.abs(s_rc - s_c).sum(axis=1)
127
+
128
+ if is_df:
129
+ return pd.Series(result, index=row_index, name="spec_coeff")
130
+ return result
131
+
132
+
133
+ def krugman_index(
134
+ mat: Union[np.ndarray, pd.DataFrame],
135
+ ) -> Union[pd.Series, np.ndarray]:
136
+ """
137
+ Krugman Specialization Index.
138
+
139
+ K_r = sum_c |s_{rc} - s_c|
140
+
141
+ where s_{rc} = share of activity c in region r,
142
+ s_c = national share of activity c.
143
+
144
+ = 2 * specialization_coefficient.
145
+ Ranges [0, 2].
146
+
147
+ Returns
148
+ -------
149
+ pd.Series indexed by region.
150
+ """
151
+ is_df = isinstance(mat, pd.DataFrame)
152
+ row_index = mat.index if is_df else None
153
+
154
+ coeff = specialization_coefficient(mat)
155
+ result = 2 * (coeff.values if isinstance(coeff, pd.Series) else coeff)
156
+
157
+ if is_df:
158
+ return pd.Series(result, index=row_index, name="krugman_index")
159
+ return result
160
+
161
+
162
+ # Short alias matching the documented API
163
+ spec_coefficient = specialization_coefficient
@@ -0,0 +1,68 @@
1
+ """
2
+ Export / portfolio similarity between locations.
3
+
4
+ References
5
+ ----------
6
+ Bahar et al. (2014) "Neighbors and the Evolution of the Comparative Advantage
7
+ of Nations".
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ from typing import Union
13
+
14
+ from ..core.utils import validate_matrix
15
+ from ..core.rca import rca as compute_rca
16
+
17
+
18
+ def export_similarity(
19
+ mat: Union[np.ndarray, pd.DataFrame],
20
+ use_rca: bool = True,
21
+ epsilon: float = 0.1,
22
+ log: bool = True,
23
+ ) -> Union[pd.DataFrame, np.ndarray]:
24
+ """
25
+ Export Similarity Index (Bahar et al. 2014).
26
+
27
+ Pearson correlation of (log-)RCA vectors between location pairs.
28
+
29
+ SCC_{rr'} = corr(log(RCA_r + epsilon), log(RCA_{r'} + epsilon))
30
+
31
+ Parameters
32
+ ----------
33
+ mat : array-like (R x C)
34
+ Value matrix.
35
+ use_rca : bool
36
+ Compute RCA internally (True) or treat mat as RCA (False).
37
+ epsilon : float
38
+ Small constant added before log to avoid log(0).
39
+ log : bool
40
+ If True, apply log transform before correlation.
41
+
42
+ Returns
43
+ -------
44
+ R x R similarity matrix.
45
+ """
46
+ is_df = isinstance(mat, pd.DataFrame)
47
+ row_index = mat.index if is_df else None
48
+
49
+ arr = validate_matrix(mat)
50
+
51
+ if use_rca:
52
+ rca_arr = compute_rca(arr)
53
+ if isinstance(rca_arr, pd.DataFrame):
54
+ rca_arr = rca_arr.values
55
+ else:
56
+ rca_arr = arr
57
+
58
+ if log:
59
+ transformed = np.log(rca_arr + epsilon)
60
+ else:
61
+ transformed = rca_arr
62
+
63
+ result = np.corrcoef(transformed) # R x R
64
+ np.fill_diagonal(result, 1.0)
65
+
66
+ if is_df:
67
+ return pd.DataFrame(result, index=row_index, columns=row_index)
68
+ return result