econcomplex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- econcomplex/__init__.py +220 -0
- econcomplex/complexity/__init__.py +23 -0
- econcomplex/complexity/eci_pci.py +131 -0
- econcomplex/complexity/eigenvector.py +115 -0
- econcomplex/complexity/fitness.py +130 -0
- econcomplex/complexity/reflections.py +173 -0
- econcomplex/complexity/subnational.py +82 -0
- econcomplex/core/__init__.py +23 -0
- econcomplex/core/diversity.py +125 -0
- econcomplex/core/preprocess.py +83 -0
- econcomplex/core/rca.py +161 -0
- econcomplex/core/utils.py +137 -0
- econcomplex/dynamics/__init__.py +10 -0
- econcomplex/dynamics/entry_exit.py +248 -0
- econcomplex/dynamics/growth.py +146 -0
- econcomplex/inequality/__init__.py +11 -0
- econcomplex/inequality/concentration.py +148 -0
- econcomplex/inequality/gini.py +164 -0
- econcomplex/optimization/__init__.py +46 -0
- econcomplex/optimization/diffusion.py +379 -0
- econcomplex/optimization/growth_target.py +170 -0
- econcomplex/optimization/portfolio.py +178 -0
- econcomplex/optimization/steppingstone.py +267 -0
- econcomplex/outlook/__init__.py +6 -0
- econcomplex/outlook/coi_cog.py +168 -0
- econcomplex/patents/__init__.py +7 -0
- econcomplex/patents/recombination.py +135 -0
- econcomplex/pipeline.py +255 -0
- econcomplex/productivity/__init__.py +8 -0
- econcomplex/productivity/prody.py +218 -0
- econcomplex/relatedness/__init__.py +25 -0
- econcomplex/relatedness/cooccurrence.py +173 -0
- econcomplex/relatedness/cross_space.py +142 -0
- econcomplex/relatedness/density.py +232 -0
- econcomplex/relatedness/proximity.py +214 -0
- econcomplex/specialization/__init__.py +17 -0
- econcomplex/specialization/location_quotient.py +163 -0
- econcomplex/specialization/similarity.py +68 -0
- econcomplex-1.0.0.dist-info/METADATA +223 -0
- econcomplex-1.0.0.dist-info/RECORD +43 -0
- econcomplex-1.0.0.dist-info/WHEEL +5 -0
- econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
- econcomplex-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Relatedness density, distance, and related indicators.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Hidalgo et al. (2007); Hausmann & Klinger (2007);
|
|
7
|
+
Balland et al. (2019).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from typing import Optional, Union
|
|
13
|
+
|
|
14
|
+
from ..core.utils import validate_matrix, safe_divide, binarize
|
|
15
|
+
from ..core.rca import rca as compute_rca
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _get_mcp_and_phi(
|
|
19
|
+
mat,
|
|
20
|
+
phi,
|
|
21
|
+
use_rca: bool,
|
|
22
|
+
threshold: float,
|
|
23
|
+
proximity_method: str,
|
|
24
|
+
):
|
|
25
|
+
"""Helper: returns binary M and proximity matrix as ndarrays."""
|
|
26
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
27
|
+
|
|
28
|
+
arr = validate_matrix(mat)
|
|
29
|
+
|
|
30
|
+
if use_rca:
|
|
31
|
+
m = binarize(compute_rca(arr), threshold)
|
|
32
|
+
else:
|
|
33
|
+
m = binarize(arr, threshold)
|
|
34
|
+
|
|
35
|
+
if phi is None:
|
|
36
|
+
from .proximity import proximity as _prox
|
|
37
|
+
phi_dict = _prox(
|
|
38
|
+
m, use_rca=False, threshold=0.5,
|
|
39
|
+
method=proximity_method, compute="product",
|
|
40
|
+
)
|
|
41
|
+
phi_arr = (
|
|
42
|
+
phi_dict["product"].values
|
|
43
|
+
if isinstance(phi_dict["product"], pd.DataFrame)
|
|
44
|
+
else phi_dict["product"]
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
phi_arr = phi.values if isinstance(phi, pd.DataFrame) else np.array(phi, dtype=float)
|
|
48
|
+
|
|
49
|
+
return m, phi_arr, is_df
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def relatedness_density(
|
|
53
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
54
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
55
|
+
use_rca: bool = True,
|
|
56
|
+
threshold: float = 1.0,
|
|
57
|
+
proximity_method: str = "max",
|
|
58
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
59
|
+
"""
|
|
60
|
+
Relatedness density for each (region, activity) pair.
|
|
61
|
+
|
|
62
|
+
density_{rc} = (M * Phi)_{rc} / rowSums(Phi)_c * 100
|
|
63
|
+
|
|
64
|
+
Fraction of activities related to c that region r already has,
|
|
65
|
+
expressed as a percentage.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
mat : array-like (R x C)
|
|
70
|
+
Value matrix.
|
|
71
|
+
phi : array-like (C x C), optional
|
|
72
|
+
Pre-computed product proximity matrix. Computed internally if None.
|
|
73
|
+
use_rca : bool
|
|
74
|
+
Compute RCA before binarizing.
|
|
75
|
+
threshold : float
|
|
76
|
+
Binarization threshold.
|
|
77
|
+
proximity_method : str
|
|
78
|
+
Normalization method for proximity ('max', 'sqrt', 'min').
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
R x C relatedness density matrix (values 0–100).
|
|
83
|
+
"""
|
|
84
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
85
|
+
row_index = mat.index if is_df else None
|
|
86
|
+
col_index = mat.columns if is_df else None
|
|
87
|
+
|
|
88
|
+
m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
|
|
89
|
+
|
|
90
|
+
col_sums_phi = phi_arr.sum(axis=0, keepdims=True) # 1 x C
|
|
91
|
+
numerator = m @ phi_arr # R x C
|
|
92
|
+
density = safe_divide(numerator, col_sums_phi) * 100
|
|
93
|
+
|
|
94
|
+
if is_df:
|
|
95
|
+
return pd.DataFrame(density, index=row_index, columns=col_index)
|
|
96
|
+
return density
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def distance(
|
|
100
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
101
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
102
|
+
use_rca: bool = True,
|
|
103
|
+
threshold: float = 1.0,
|
|
104
|
+
proximity_method: str = "max",
|
|
105
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
106
|
+
"""
|
|
107
|
+
Distance (1 - density/100).
|
|
108
|
+
|
|
109
|
+
Weighted fraction of related activities that region r does NOT have.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
R x C distance matrix (values 0–1).
|
|
114
|
+
"""
|
|
115
|
+
dens = relatedness_density(mat, phi=phi, use_rca=use_rca,
|
|
116
|
+
threshold=threshold, proximity_method=proximity_method)
|
|
117
|
+
if isinstance(dens, pd.DataFrame):
|
|
118
|
+
return 1 - dens / 100
|
|
119
|
+
return 1 - dens / 100
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def relatedness_density_internal(
|
|
123
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
124
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
125
|
+
use_rca: bool = True,
|
|
126
|
+
threshold: float = 1.0,
|
|
127
|
+
proximity_method: str = "max",
|
|
128
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
129
|
+
"""
|
|
130
|
+
Internal relatedness density: density values for activities
|
|
131
|
+
the region ALREADY has (M_{rc} = 1).
|
|
132
|
+
Other cells are NaN.
|
|
133
|
+
"""
|
|
134
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
135
|
+
row_index = mat.index if is_df else None
|
|
136
|
+
col_index = mat.columns if is_df else None
|
|
137
|
+
|
|
138
|
+
m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
|
|
139
|
+
|
|
140
|
+
col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
|
|
141
|
+
numerator = m @ phi_arr
|
|
142
|
+
density = safe_divide(numerator, col_sums_phi) * 100
|
|
143
|
+
|
|
144
|
+
mask = m == 0
|
|
145
|
+
density_internal = density.copy()
|
|
146
|
+
density_internal[mask] = np.nan
|
|
147
|
+
|
|
148
|
+
if is_df:
|
|
149
|
+
return pd.DataFrame(density_internal, index=row_index, columns=col_index)
|
|
150
|
+
return density_internal
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def relatedness_density_external(
|
|
154
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
155
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
156
|
+
use_rca: bool = True,
|
|
157
|
+
threshold: float = 1.0,
|
|
158
|
+
proximity_method: str = "max",
|
|
159
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
160
|
+
"""
|
|
161
|
+
External relatedness density: density values for activities
|
|
162
|
+
the region does NOT yet have (M_{rc} = 0).
|
|
163
|
+
Other cells are NaN.
|
|
164
|
+
"""
|
|
165
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
166
|
+
row_index = mat.index if is_df else None
|
|
167
|
+
col_index = mat.columns if is_df else None
|
|
168
|
+
|
|
169
|
+
m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
|
|
170
|
+
|
|
171
|
+
col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
|
|
172
|
+
numerator = m @ phi_arr
|
|
173
|
+
density = safe_divide(numerator, col_sums_phi) * 100
|
|
174
|
+
|
|
175
|
+
mask = m == 1
|
|
176
|
+
density_external = density.copy()
|
|
177
|
+
density_external[mask] = np.nan
|
|
178
|
+
|
|
179
|
+
if is_df:
|
|
180
|
+
return pd.DataFrame(density_external, index=row_index, columns=col_index)
|
|
181
|
+
return density_external
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def relative_relatedness(
|
|
185
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
186
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
187
|
+
use_rca: bool = True,
|
|
188
|
+
threshold: float = 1.0,
|
|
189
|
+
proximity_method: str = "max",
|
|
190
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
191
|
+
"""
|
|
192
|
+
Relative relatedness (Pinheiro et al. 2021, eq. 7): z-transform of the
|
|
193
|
+
relatedness density against the statistics of the region's option set
|
|
194
|
+
(activities it does NOT currently hold, M_{rc} = 0).
|
|
195
|
+
|
|
196
|
+
relative_density_{rc} = (density_{rc} - mean_non_held_r) / std_non_held_r
|
|
197
|
+
for cells where M_{rc} = 0; NaN otherwise.
|
|
198
|
+
|
|
199
|
+
References
|
|
200
|
+
----------
|
|
201
|
+
Pinheiro, Hartmann, Boschma & Hidalgo (2022) "The time and frequency
|
|
202
|
+
of unrelated diversification", Research Policy 51, 104323.
|
|
203
|
+
|
|
204
|
+
Returns
|
|
205
|
+
-------
|
|
206
|
+
R x C standardized density matrix.
|
|
207
|
+
"""
|
|
208
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
209
|
+
row_index = mat.index if is_df else None
|
|
210
|
+
col_index = mat.columns if is_df else None
|
|
211
|
+
|
|
212
|
+
m, phi_arr, _ = _get_mcp_and_phi(mat, phi, use_rca, threshold, proximity_method)
|
|
213
|
+
|
|
214
|
+
col_sums_phi = phi_arr.sum(axis=0, keepdims=True)
|
|
215
|
+
numerator = m @ phi_arr
|
|
216
|
+
density = safe_divide(numerator, col_sums_phi) * 100
|
|
217
|
+
|
|
218
|
+
result = np.full_like(density, np.nan)
|
|
219
|
+
for i in range(m.shape[0]):
|
|
220
|
+
non_held = m[i] == 0
|
|
221
|
+
vals = density[i, non_held]
|
|
222
|
+
std = vals.std()
|
|
223
|
+
if std > 0:
|
|
224
|
+
result[i, non_held] = (vals - vals.mean()) / std
|
|
225
|
+
|
|
226
|
+
if is_df:
|
|
227
|
+
return pd.DataFrame(result, index=row_index, columns=col_index)
|
|
228
|
+
return result
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
# Short alias matching the documented API
|
|
232
|
+
density = relatedness_density
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Product Space proximity matrices.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Hidalgo et al. (2007) "The Product Space Conditions the Development of Nations".
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from typing import Literal, Union
|
|
12
|
+
|
|
13
|
+
from ..core.utils import validate_matrix, safe_divide, binarize
|
|
14
|
+
from ..core.rca import rca as compute_rca
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def proximity(
|
|
18
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
19
|
+
use_rca: bool = True,
|
|
20
|
+
threshold: float = 1.0,
|
|
21
|
+
method: Literal["max", "sqrt", "min"] = "max",
|
|
22
|
+
compute: Literal["product", "location", "both"] = "both",
|
|
23
|
+
continuous: bool = False,
|
|
24
|
+
continuous_method: Literal["correlation", "cosine"] = "correlation",
|
|
25
|
+
) -> dict:
|
|
26
|
+
"""
|
|
27
|
+
Compute product and/or location proximity matrices.
|
|
28
|
+
|
|
29
|
+
Product proximity (phi_pp'):
|
|
30
|
+
numerator = M^T * M (co-export / co-presence count)
|
|
31
|
+
"max" norm = max(U_p, U_p')
|
|
32
|
+
"sqrt" norm = sqrt(U_p * U_p') (geometric mean, cosine-like)
|
|
33
|
+
"min" norm = min(U_p, U_p') (conditional probability)
|
|
34
|
+
|
|
35
|
+
Location proximity (phi_rr'):
|
|
36
|
+
Same structure but on M * M^T, normalized by diversity.
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
mat : array-like (R x C)
|
|
41
|
+
Value matrix.
|
|
42
|
+
use_rca : bool
|
|
43
|
+
Compute RCA before binarizing.
|
|
44
|
+
threshold : float
|
|
45
|
+
Binarization threshold.
|
|
46
|
+
method : str
|
|
47
|
+
Normalization method: 'max', 'sqrt', or 'min'.
|
|
48
|
+
compute : str
|
|
49
|
+
Which side to compute: 'product', 'location', or 'both'.
|
|
50
|
+
continuous : bool
|
|
51
|
+
If True, skip binarization and compute the proximity on the
|
|
52
|
+
continuous RCA values (see `continuous_proximity`); `method`
|
|
53
|
+
and `threshold` are ignored.
|
|
54
|
+
continuous_method : str
|
|
55
|
+
Similarity used when `continuous=True`: 'correlation'
|
|
56
|
+
(Pearson, rescaled to [0, 1]) or 'cosine'.
|
|
57
|
+
|
|
58
|
+
Returns
|
|
59
|
+
-------
|
|
60
|
+
dict with keys 'product' and/or 'location' as DataFrames (or ndarrays).
|
|
61
|
+
"""
|
|
62
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
63
|
+
row_index = mat.index if is_df else None
|
|
64
|
+
col_index = mat.columns if is_df else None
|
|
65
|
+
|
|
66
|
+
arr = validate_matrix(mat)
|
|
67
|
+
|
|
68
|
+
if continuous:
|
|
69
|
+
rca_arr = compute_rca(arr) if use_rca else arr
|
|
70
|
+
results = {}
|
|
71
|
+
if compute in ("product", "both"):
|
|
72
|
+
phi_p = continuous_proximity(rca_arr, method=continuous_method)
|
|
73
|
+
if is_df:
|
|
74
|
+
results["product"] = pd.DataFrame(phi_p, index=col_index, columns=col_index)
|
|
75
|
+
else:
|
|
76
|
+
results["product"] = phi_p
|
|
77
|
+
if compute in ("location", "both"):
|
|
78
|
+
phi_l = continuous_proximity(rca_arr.T, method=continuous_method)
|
|
79
|
+
if is_df:
|
|
80
|
+
results["location"] = pd.DataFrame(phi_l, index=row_index, columns=row_index)
|
|
81
|
+
else:
|
|
82
|
+
results["location"] = phi_l
|
|
83
|
+
return results
|
|
84
|
+
|
|
85
|
+
if use_rca:
|
|
86
|
+
m = binarize(compute_rca(arr), threshold)
|
|
87
|
+
else:
|
|
88
|
+
m = binarize(arr, threshold)
|
|
89
|
+
|
|
90
|
+
results = {}
|
|
91
|
+
|
|
92
|
+
def _normalize(cooc: np.ndarray, counts: np.ndarray) -> np.ndarray:
|
|
93
|
+
"""Normalize co-occurrence matrix by row/col counts."""
|
|
94
|
+
if method == "max":
|
|
95
|
+
denom = np.maximum(counts[:, None], counts[None, :])
|
|
96
|
+
elif method == "sqrt":
|
|
97
|
+
denom = np.sqrt(counts[:, None] * counts[None, :])
|
|
98
|
+
elif method == "min":
|
|
99
|
+
denom = np.minimum(counts[:, None], counts[None, :])
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError("method must be 'max', 'sqrt', or 'min'.")
|
|
102
|
+
return safe_divide(cooc, denom)
|
|
103
|
+
|
|
104
|
+
if compute in ("product", "both"):
|
|
105
|
+
ubiq = m.sum(axis=0) # C
|
|
106
|
+
cooc_p = m.T @ m # C x C
|
|
107
|
+
phi_p = _normalize(cooc_p, ubiq)
|
|
108
|
+
np.fill_diagonal(phi_p, 0.0)
|
|
109
|
+
if is_df:
|
|
110
|
+
results["product"] = pd.DataFrame(phi_p, index=col_index, columns=col_index)
|
|
111
|
+
else:
|
|
112
|
+
results["product"] = phi_p
|
|
113
|
+
|
|
114
|
+
if compute in ("location", "both"):
|
|
115
|
+
div = m.sum(axis=1) # R
|
|
116
|
+
cooc_l = m @ m.T # R x R
|
|
117
|
+
phi_l = _normalize(cooc_l, div)
|
|
118
|
+
np.fill_diagonal(phi_l, 0.0)
|
|
119
|
+
if is_df:
|
|
120
|
+
results["location"] = pd.DataFrame(phi_l, index=row_index, columns=row_index)
|
|
121
|
+
else:
|
|
122
|
+
results["location"] = phi_l
|
|
123
|
+
|
|
124
|
+
return results
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def continuous_proximity(
|
|
128
|
+
rca_mat: Union[np.ndarray, pd.DataFrame],
|
|
129
|
+
method: Literal["correlation", "cosine"] = "correlation",
|
|
130
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
131
|
+
"""
|
|
132
|
+
Continuous product proximity from a (continuous) RCA matrix.
|
|
133
|
+
|
|
134
|
+
method='correlation' (default):
|
|
135
|
+
phi_{pp'} = (1 + corr(RCA_p, RCA_{p'})) / 2
|
|
136
|
+
(Pearson correlation rescaled from [-1, 1] to [0, 1])
|
|
137
|
+
method='cosine':
|
|
138
|
+
phi_{pp'} = (RCA_p . RCA_{p'}) / (||RCA_p|| * ||RCA_{p'}||)
|
|
139
|
+
|
|
140
|
+
Parameters
|
|
141
|
+
----------
|
|
142
|
+
rca_mat : array-like (R x C)
|
|
143
|
+
Pre-computed (continuous) RCA matrix.
|
|
144
|
+
method : str
|
|
145
|
+
'correlation' or 'cosine'.
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
C x C proximity matrix with zero diagonal.
|
|
150
|
+
"""
|
|
151
|
+
is_df = isinstance(rca_mat, pd.DataFrame)
|
|
152
|
+
col_index = rca_mat.columns if is_df else None
|
|
153
|
+
|
|
154
|
+
arr = validate_matrix(rca_mat)
|
|
155
|
+
|
|
156
|
+
if method == "correlation":
|
|
157
|
+
corr = np.corrcoef(arr.T) # C x C
|
|
158
|
+
phi = (1 + corr) / 2.0
|
|
159
|
+
elif method == "cosine":
|
|
160
|
+
norms = np.linalg.norm(arr, axis=0) # C
|
|
161
|
+
phi = safe_divide(arr.T @ arr, norms[:, None] * norms[None, :])
|
|
162
|
+
else:
|
|
163
|
+
raise ValueError("method must be 'correlation' or 'cosine'.")
|
|
164
|
+
np.fill_diagonal(phi, 0.0)
|
|
165
|
+
|
|
166
|
+
if is_df:
|
|
167
|
+
return pd.DataFrame(phi, index=col_index, columns=col_index)
|
|
168
|
+
return phi
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def _continuous_on_values(mat, use_rca, method):
|
|
172
|
+
"""Shortcut: RCA (optional) + continuous_proximity with given method."""
|
|
173
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
174
|
+
col_index = mat.columns if is_df else None
|
|
175
|
+
arr = validate_matrix(mat)
|
|
176
|
+
rca_arr = compute_rca(arr) if use_rca else arr
|
|
177
|
+
phi = continuous_proximity(rca_arr, method=method)
|
|
178
|
+
if is_df:
|
|
179
|
+
return pd.DataFrame(phi, index=col_index, columns=col_index)
|
|
180
|
+
return phi
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def cosine_proximity(
|
|
184
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
185
|
+
use_rca: bool = True,
|
|
186
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
187
|
+
"""
|
|
188
|
+
Shortcut for `continuous_proximity(rca(mat), method='cosine')`:
|
|
189
|
+
cosine similarity between the RCA vectors of each pair of activities.
|
|
190
|
+
|
|
191
|
+
Returns a C x C proximity matrix with zero diagonal.
|
|
192
|
+
"""
|
|
193
|
+
return _continuous_on_values(mat, use_rca, "cosine")
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def correlation_proximity(
|
|
197
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
198
|
+
use_rca: bool = True,
|
|
199
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
200
|
+
"""
|
|
201
|
+
Shortcut for `continuous_proximity(rca(mat), method='correlation')`:
|
|
202
|
+
Pearson correlation between RCA vectors, rescaled to [0, 1].
|
|
203
|
+
|
|
204
|
+
Returns a C x C proximity matrix with zero diagonal.
|
|
205
|
+
"""
|
|
206
|
+
return _continuous_on_values(mat, use_rca, "correlation")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
# Documented-API alias: relatedness(mat, phi) == relatedness_density
|
|
210
|
+
from .density import relatedness_density as _relatedness_density # noqa: E402
|
|
211
|
+
|
|
212
|
+
relatedness = _relatedness_density
|
|
213
|
+
|
|
214
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from .location_quotient import (
|
|
2
|
+
location_quotient,
|
|
3
|
+
location_quotient_avg,
|
|
4
|
+
hachman_index,
|
|
5
|
+
specialization_coefficient,
|
|
6
|
+
krugman_index,
|
|
7
|
+
)
|
|
8
|
+
from .similarity import export_similarity
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"location_quotient",
|
|
12
|
+
"location_quotient_avg",
|
|
13
|
+
"hachman_index",
|
|
14
|
+
"specialization_coefficient",
|
|
15
|
+
"krugman_index",
|
|
16
|
+
"export_similarity",
|
|
17
|
+
]
|
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Location quotient (LQ) variants and specialization indices.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Hoover & Giarratani (1985); Hachman (various); Balland & Rigby (2017).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
from typing import Union
|
|
12
|
+
|
|
13
|
+
from ..core.utils import validate_matrix, safe_divide
|
|
14
|
+
from ..core.rca import rca as compute_rca
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def location_quotient(
|
|
18
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
19
|
+
binary: bool = False,
|
|
20
|
+
threshold: float = 1.0,
|
|
21
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
22
|
+
"""
|
|
23
|
+
Location Quotient (identical to RCA / Balassa Index).
|
|
24
|
+
|
|
25
|
+
LQ_{rc} = (x_{rc}/X_r) / (X_c/X_total)
|
|
26
|
+
|
|
27
|
+
Alias of `core.rca.rca`.
|
|
28
|
+
"""
|
|
29
|
+
return compute_rca(mat, binary=binary, threshold=threshold)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def location_quotient_avg(
|
|
33
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
34
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
35
|
+
"""
|
|
36
|
+
Weighted average LQ per region (Coefficient of Specialization, Hoover 1985).
|
|
37
|
+
|
|
38
|
+
avg_LQ_r = sum_c (LQ_{rc} * s_{rc})
|
|
39
|
+
where s_{rc} = x_{rc} / X_r (share of activity c in region r)
|
|
40
|
+
|
|
41
|
+
A value > 1 indicates region is more specialized than the nation average.
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
pd.Series indexed by region.
|
|
46
|
+
"""
|
|
47
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
48
|
+
row_index = mat.index if is_df else None
|
|
49
|
+
|
|
50
|
+
arr = validate_matrix(mat)
|
|
51
|
+
lq = compute_rca(arr)
|
|
52
|
+
if isinstance(lq, pd.DataFrame):
|
|
53
|
+
lq = lq.values
|
|
54
|
+
|
|
55
|
+
row_sums = arr.sum(axis=1, keepdims=True)
|
|
56
|
+
shares = safe_divide(arr, row_sums) # s_{rc}
|
|
57
|
+
|
|
58
|
+
result = (lq * shares).sum(axis=1)
|
|
59
|
+
|
|
60
|
+
if is_df:
|
|
61
|
+
return pd.Series(result, index=row_index, name="lq_avg")
|
|
62
|
+
return result
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def hachman_index(
|
|
66
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
67
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
68
|
+
"""
|
|
69
|
+
Hachman Index (structural similarity to national economy).
|
|
70
|
+
|
|
71
|
+
H_r = 1 / avg_LQ_r
|
|
72
|
+
|
|
73
|
+
Ranges 0 to 1; value of 1 means the regional economy perfectly
|
|
74
|
+
mirrors the national structure.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
pd.Series indexed by region (clipped to [0, 1]).
|
|
79
|
+
"""
|
|
80
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
81
|
+
row_index = mat.index if is_df else None
|
|
82
|
+
|
|
83
|
+
avg_lq = location_quotient_avg(mat)
|
|
84
|
+
if isinstance(avg_lq, pd.Series):
|
|
85
|
+
avg_lq_arr = avg_lq.values
|
|
86
|
+
else:
|
|
87
|
+
avg_lq_arr = avg_lq
|
|
88
|
+
|
|
89
|
+
result = np.clip(safe_divide(1.0, avg_lq_arr), 0, 1)
|
|
90
|
+
|
|
91
|
+
if is_df:
|
|
92
|
+
return pd.Series(result, index=row_index, name="hachman")
|
|
93
|
+
return result
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def specialization_coefficient(
|
|
97
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
98
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
99
|
+
"""
|
|
100
|
+
Hoover Coefficient of Specialization.
|
|
101
|
+
|
|
102
|
+
spec_r = (1/2) * sum_c |s_{rc} - s_c|
|
|
103
|
+
|
|
104
|
+
where s_{rc} = share of activity c in region r,
|
|
105
|
+
s_c = national share of activity c.
|
|
106
|
+
|
|
107
|
+
Equivalent to half the Krugman Index.
|
|
108
|
+
Ranges [0, 1]: 0 = region mirrors national structure.
|
|
109
|
+
|
|
110
|
+
Returns
|
|
111
|
+
-------
|
|
112
|
+
pd.Series indexed by region.
|
|
113
|
+
"""
|
|
114
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
115
|
+
row_index = mat.index if is_df else None
|
|
116
|
+
|
|
117
|
+
arr = validate_matrix(mat)
|
|
118
|
+
total = arr.sum()
|
|
119
|
+
|
|
120
|
+
row_sums = arr.sum(axis=1, keepdims=True)
|
|
121
|
+
col_sums = arr.sum(axis=0, keepdims=True)
|
|
122
|
+
|
|
123
|
+
s_rc = safe_divide(arr, row_sums)
|
|
124
|
+
s_c = col_sums / total
|
|
125
|
+
|
|
126
|
+
result = 0.5 * np.abs(s_rc - s_c).sum(axis=1)
|
|
127
|
+
|
|
128
|
+
if is_df:
|
|
129
|
+
return pd.Series(result, index=row_index, name="spec_coeff")
|
|
130
|
+
return result
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def krugman_index(
|
|
134
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
135
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
136
|
+
"""
|
|
137
|
+
Krugman Specialization Index.
|
|
138
|
+
|
|
139
|
+
K_r = sum_c |s_{rc} - s_c|
|
|
140
|
+
|
|
141
|
+
where s_{rc} = share of activity c in region r,
|
|
142
|
+
s_c = national share of activity c.
|
|
143
|
+
|
|
144
|
+
= 2 * specialization_coefficient.
|
|
145
|
+
Ranges [0, 2].
|
|
146
|
+
|
|
147
|
+
Returns
|
|
148
|
+
-------
|
|
149
|
+
pd.Series indexed by region.
|
|
150
|
+
"""
|
|
151
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
152
|
+
row_index = mat.index if is_df else None
|
|
153
|
+
|
|
154
|
+
coeff = specialization_coefficient(mat)
|
|
155
|
+
result = 2 * (coeff.values if isinstance(coeff, pd.Series) else coeff)
|
|
156
|
+
|
|
157
|
+
if is_df:
|
|
158
|
+
return pd.Series(result, index=row_index, name="krugman_index")
|
|
159
|
+
return result
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
# Short alias matching the documented API
|
|
163
|
+
spec_coefficient = specialization_coefficient
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Export / portfolio similarity between locations.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Bahar et al. (2014) "Neighbors and the Evolution of the Comparative Advantage
|
|
7
|
+
of Nations".
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from typing import Union
|
|
13
|
+
|
|
14
|
+
from ..core.utils import validate_matrix
|
|
15
|
+
from ..core.rca import rca as compute_rca
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def export_similarity(
|
|
19
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
20
|
+
use_rca: bool = True,
|
|
21
|
+
epsilon: float = 0.1,
|
|
22
|
+
log: bool = True,
|
|
23
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
24
|
+
"""
|
|
25
|
+
Export Similarity Index (Bahar et al. 2014).
|
|
26
|
+
|
|
27
|
+
Pearson correlation of (log-)RCA vectors between location pairs.
|
|
28
|
+
|
|
29
|
+
SCC_{rr'} = corr(log(RCA_r + epsilon), log(RCA_{r'} + epsilon))
|
|
30
|
+
|
|
31
|
+
Parameters
|
|
32
|
+
----------
|
|
33
|
+
mat : array-like (R x C)
|
|
34
|
+
Value matrix.
|
|
35
|
+
use_rca : bool
|
|
36
|
+
Compute RCA internally (True) or treat mat as RCA (False).
|
|
37
|
+
epsilon : float
|
|
38
|
+
Small constant added before log to avoid log(0).
|
|
39
|
+
log : bool
|
|
40
|
+
If True, apply log transform before correlation.
|
|
41
|
+
|
|
42
|
+
Returns
|
|
43
|
+
-------
|
|
44
|
+
R x R similarity matrix.
|
|
45
|
+
"""
|
|
46
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
47
|
+
row_index = mat.index if is_df else None
|
|
48
|
+
|
|
49
|
+
arr = validate_matrix(mat)
|
|
50
|
+
|
|
51
|
+
if use_rca:
|
|
52
|
+
rca_arr = compute_rca(arr)
|
|
53
|
+
if isinstance(rca_arr, pd.DataFrame):
|
|
54
|
+
rca_arr = rca_arr.values
|
|
55
|
+
else:
|
|
56
|
+
rca_arr = arr
|
|
57
|
+
|
|
58
|
+
if log:
|
|
59
|
+
transformed = np.log(rca_arr + epsilon)
|
|
60
|
+
else:
|
|
61
|
+
transformed = rca_arr
|
|
62
|
+
|
|
63
|
+
result = np.corrcoef(transformed) # R x R
|
|
64
|
+
np.fill_diagonal(result, 1.0)
|
|
65
|
+
|
|
66
|
+
if is_df:
|
|
67
|
+
return pd.DataFrame(result, index=row_index, columns=row_index)
|
|
68
|
+
return result
|