econcomplex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- econcomplex/__init__.py +220 -0
- econcomplex/complexity/__init__.py +23 -0
- econcomplex/complexity/eci_pci.py +131 -0
- econcomplex/complexity/eigenvector.py +115 -0
- econcomplex/complexity/fitness.py +130 -0
- econcomplex/complexity/reflections.py +173 -0
- econcomplex/complexity/subnational.py +82 -0
- econcomplex/core/__init__.py +23 -0
- econcomplex/core/diversity.py +125 -0
- econcomplex/core/preprocess.py +83 -0
- econcomplex/core/rca.py +161 -0
- econcomplex/core/utils.py +137 -0
- econcomplex/dynamics/__init__.py +10 -0
- econcomplex/dynamics/entry_exit.py +248 -0
- econcomplex/dynamics/growth.py +146 -0
- econcomplex/inequality/__init__.py +11 -0
- econcomplex/inequality/concentration.py +148 -0
- econcomplex/inequality/gini.py +164 -0
- econcomplex/optimization/__init__.py +46 -0
- econcomplex/optimization/diffusion.py +379 -0
- econcomplex/optimization/growth_target.py +170 -0
- econcomplex/optimization/portfolio.py +178 -0
- econcomplex/optimization/steppingstone.py +267 -0
- econcomplex/outlook/__init__.py +6 -0
- econcomplex/outlook/coi_cog.py +168 -0
- econcomplex/patents/__init__.py +7 -0
- econcomplex/patents/recombination.py +135 -0
- econcomplex/pipeline.py +255 -0
- econcomplex/productivity/__init__.py +8 -0
- econcomplex/productivity/prody.py +218 -0
- econcomplex/relatedness/__init__.py +25 -0
- econcomplex/relatedness/cooccurrence.py +173 -0
- econcomplex/relatedness/cross_space.py +142 -0
- econcomplex/relatedness/density.py +232 -0
- econcomplex/relatedness/proximity.py +214 -0
- econcomplex/specialization/__init__.py +17 -0
- econcomplex/specialization/location_quotient.py +163 -0
- econcomplex/specialization/similarity.py +68 -0
- econcomplex-1.0.0.dist-info/METADATA +223 -0
- econcomplex-1.0.0.dist-info/RECORD +43 -0
- econcomplex-1.0.0.dist-info/WHEEL +5 -0
- econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
- econcomplex-1.0.0.dist-info/top_level.txt +1 -0
econcomplex/__init__.py
ADDED
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
"""
|
|
2
|
+
econcomplex — Economic Complexity and Geographic Indicators Library
|
|
3
|
+
===================================================================
|
|
4
|
+
|
|
5
|
+
A consolidated Python library for computing Economic Complexity indicators
|
|
6
|
+
and Geographic/Regional Science metrics, combining the best implementations
|
|
7
|
+
from EconGeo (R), economiccomplexity (R), py-ecomplexity, and
|
|
8
|
+
py-economic-complexity.
|
|
9
|
+
|
|
10
|
+
Quick start
|
|
11
|
+
-----------
|
|
12
|
+
>>> import econcomplex as ec
|
|
13
|
+
>>> import pandas as pd
|
|
14
|
+
>>>
|
|
15
|
+
>>> # Long-format data: location × activity × value
|
|
16
|
+
>>> df = pd.read_csv("my_data.csv")
|
|
17
|
+
>>>
|
|
18
|
+
>>> # Full pipeline (single period)
|
|
19
|
+
>>> result = ec.compute_complexity(
|
|
20
|
+
... df,
|
|
21
|
+
... cols={"loc": "region", "act": "sector", "val": "employment"},
|
|
22
|
+
... method="eigenvector",
|
|
23
|
+
... )
|
|
24
|
+
>>>
|
|
25
|
+
>>> # Individual indicators
|
|
26
|
+
>>> mat = ec.pivot_to_matrix(df, "region", "sector", "employment")
|
|
27
|
+
>>> rca_mat = ec.rca(mat)
|
|
28
|
+
>>> eci, pci = ec.eci_pci(mat)
|
|
29
|
+
>>> phi = ec.proximity(mat)["product"]
|
|
30
|
+
>>> density = ec.relatedness_density(mat, phi=phi)
|
|
31
|
+
|
|
32
|
+
Submodules
|
|
33
|
+
----------
|
|
34
|
+
core : RCA, RPOP, Mcp, diversity, ubiquity, trim_core, utilities
|
|
35
|
+
complexity : eci_pci() — single entry point (eigenvector, reflections,
|
|
36
|
+
fitness) — plus the method-specific implementations and
|
|
37
|
+
subnational ECI
|
|
38
|
+
relatedness : proximity (discrete/continuous/cosine/correlation),
|
|
39
|
+
relatedness density, distance, co-occurrence,
|
|
40
|
+
z-score novelty, cross-space proximity/relatedness
|
|
41
|
+
specialization: location quotient, Hachman, Krugman, specialization coeff,
|
|
42
|
+
export similarity
|
|
43
|
+
inequality : Gini, locational Gini, Hoover-Gini, Hoover index,
|
|
44
|
+
Herfindahl-Hirschman, Shannon entropy
|
|
45
|
+
productivity : PRODY, EXPY, Product Gini Index, PEII
|
|
46
|
+
patents : ease of recombination, modular complexity
|
|
47
|
+
dynamics : growth rates, entry/exit tracking (matrix and panel APIs)
|
|
48
|
+
outlook : COI, COG (Complexity Outlook)
|
|
49
|
+
optimization : ECI Optimization (Stojkoski & Hidalgo 2026), growth
|
|
50
|
+
targeting, strategic diffusion (Alshamsi et al. 2018)
|
|
51
|
+
pipeline : high-level compute_complexity() function
|
|
52
|
+
|
|
53
|
+
Note: short names like density, hhi, coi, cog, pgi are aliases bound to
|
|
54
|
+
the same objects as their canonical functions (see the API map in the
|
|
55
|
+
documentation).
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
__version__ = "1.0.0"
|
|
59
|
+
__author__ = "Elton Freitas and contributors"
|
|
60
|
+
|
|
61
|
+
# ── Core ──────────────────────────────────────────────────────────────────────
|
|
62
|
+
from .core.rca import rca, rpop, mcp
|
|
63
|
+
from .core.diversity import diversity, ubiquity, normalized_ubiquity
|
|
64
|
+
from .core.utils import (
|
|
65
|
+
pivot_to_matrix,
|
|
66
|
+
melt_matrix,
|
|
67
|
+
binarize,
|
|
68
|
+
normalize_zscore,
|
|
69
|
+
normalize_01,
|
|
70
|
+
make_sample_data,
|
|
71
|
+
)
|
|
72
|
+
from .core.preprocess import trim_core
|
|
73
|
+
|
|
74
|
+
# ── Complexity ─────────────────────────────────────────────────────────────────
|
|
75
|
+
from .complexity.eci_pci import eci_pci
|
|
76
|
+
from .complexity.eigenvector import eci_pci_eigenvector
|
|
77
|
+
from .complexity.reflections import method_of_reflections, mor_regions, mor_activities
|
|
78
|
+
from .complexity.fitness import fitness_complexity
|
|
79
|
+
from .complexity.subnational import subnational_eci
|
|
80
|
+
|
|
81
|
+
# ── Relatedness ────────────────────────────────────────────────────────────────
|
|
82
|
+
from .relatedness.proximity import (
|
|
83
|
+
proximity,
|
|
84
|
+
continuous_proximity,
|
|
85
|
+
cosine_proximity,
|
|
86
|
+
correlation_proximity,
|
|
87
|
+
relatedness,
|
|
88
|
+
)
|
|
89
|
+
from .relatedness.density import (
|
|
90
|
+
relatedness_density,
|
|
91
|
+
density,
|
|
92
|
+
distance,
|
|
93
|
+
relatedness_density_internal,
|
|
94
|
+
relatedness_density_external,
|
|
95
|
+
relative_relatedness,
|
|
96
|
+
)
|
|
97
|
+
from .relatedness.cooccurrence import co_occurrence, relatedness_index, z_score_novelty
|
|
98
|
+
from .relatedness.cross_space import cross_proximity, cross_relatedness, cross_space_proximity
|
|
99
|
+
|
|
100
|
+
# ── Specialization ─────────────────────────────────────────────────────────────
|
|
101
|
+
from .specialization.location_quotient import (
|
|
102
|
+
location_quotient,
|
|
103
|
+
location_quotient_avg,
|
|
104
|
+
hachman_index,
|
|
105
|
+
specialization_coefficient,
|
|
106
|
+
spec_coefficient,
|
|
107
|
+
krugman_index,
|
|
108
|
+
)
|
|
109
|
+
from .specialization.similarity import export_similarity
|
|
110
|
+
|
|
111
|
+
# ── Inequality ─────────────────────────────────────────────────────────────────
|
|
112
|
+
from .inequality.gini import gini, locational_gini, hoover_gini
|
|
113
|
+
from .inequality.concentration import herfindahl, hhi, shannon_entropy, hoover_index
|
|
114
|
+
|
|
115
|
+
# ── Productivity ───────────────────────────────────────────────────────────────
|
|
116
|
+
from .productivity.prody import (
|
|
117
|
+
prody,
|
|
118
|
+
expy,
|
|
119
|
+
product_gini_index,
|
|
120
|
+
pgi,
|
|
121
|
+
product_emissions_index,
|
|
122
|
+
peii,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# ── Patents ────────────────────────────────────────────────────────────────────
|
|
126
|
+
from .patents.recombination import (
|
|
127
|
+
ease_of_recombination,
|
|
128
|
+
modular_complexity,
|
|
129
|
+
modular_complexity_avg,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# ── Dynamics ───────────────────────────────────────────────────────────────────
|
|
133
|
+
from .dynamics.growth import growth_rate, growth_matrix, growth_rates
|
|
134
|
+
from .dynamics.entry_exit import (
|
|
135
|
+
entry,
|
|
136
|
+
exit,
|
|
137
|
+
entry_exit_summary,
|
|
138
|
+
entry_tracking,
|
|
139
|
+
exit_tracking,
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# ── Outlook ────────────────────────────────────────────────────────────────────
|
|
143
|
+
from .outlook.coi_cog import complexity_outlook_index, complexity_outlook_gain, coi, cog
|
|
144
|
+
|
|
145
|
+
# ── ECI Optimization (Stojkoski & Hidalgo 2026) ───────────────────────────────
|
|
146
|
+
from .optimization import (
|
|
147
|
+
calibrate_steppingstone,
|
|
148
|
+
effort_matrix,
|
|
149
|
+
forecast_specialization,
|
|
150
|
+
eci_optimization,
|
|
151
|
+
calibrate_growth_model,
|
|
152
|
+
expected_growth,
|
|
153
|
+
eci_target_for_growth,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
# ── Strategic diffusion (Alshamsi, Pinheiro & Hidalgo 2018) ───────────────────
|
|
157
|
+
from .optimization import (
|
|
158
|
+
proximity_network,
|
|
159
|
+
activation_probabilities,
|
|
160
|
+
calibrate_contagion,
|
|
161
|
+
diversification_strategy,
|
|
162
|
+
expected_diversification_time,
|
|
163
|
+
compare_strategies,
|
|
164
|
+
optimize_sequence,
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# ── High-level pipeline ────────────────────────────────────────────────────────
|
|
168
|
+
from .pipeline import compute_complexity
|
|
169
|
+
|
|
170
|
+
__all__ = [
|
|
171
|
+
# core
|
|
172
|
+
"rca", "rpop", "mcp",
|
|
173
|
+
"diversity", "ubiquity", "normalized_ubiquity",
|
|
174
|
+
"pivot_to_matrix", "melt_matrix", "binarize",
|
|
175
|
+
"normalize_zscore", "normalize_01",
|
|
176
|
+
"make_sample_data", "trim_core",
|
|
177
|
+
# complexity
|
|
178
|
+
"eci_pci", "eci_pci_eigenvector",
|
|
179
|
+
"method_of_reflections", "mor_regions", "mor_activities",
|
|
180
|
+
"fitness_complexity",
|
|
181
|
+
"subnational_eci",
|
|
182
|
+
# relatedness
|
|
183
|
+
"proximity", "continuous_proximity",
|
|
184
|
+
"cosine_proximity", "correlation_proximity",
|
|
185
|
+
"relatedness_density", "density", "relatedness", "distance",
|
|
186
|
+
"relatedness_density_internal", "relatedness_density_external",
|
|
187
|
+
"relative_relatedness",
|
|
188
|
+
"co_occurrence", "relatedness_index", "z_score_novelty",
|
|
189
|
+
"cross_proximity", "cross_relatedness", "cross_space_proximity",
|
|
190
|
+
# specialization
|
|
191
|
+
"location_quotient", "location_quotient_avg",
|
|
192
|
+
"hachman_index", "specialization_coefficient", "spec_coefficient",
|
|
193
|
+
"krugman_index",
|
|
194
|
+
"export_similarity",
|
|
195
|
+
# inequality
|
|
196
|
+
"gini", "locational_gini", "hoover_gini",
|
|
197
|
+
"herfindahl", "hhi", "shannon_entropy", "hoover_index",
|
|
198
|
+
# productivity
|
|
199
|
+
"prody", "expy",
|
|
200
|
+
"product_gini_index", "pgi",
|
|
201
|
+
"product_emissions_index", "peii",
|
|
202
|
+
# patents
|
|
203
|
+
"ease_of_recombination", "modular_complexity", "modular_complexity_avg",
|
|
204
|
+
# dynamics
|
|
205
|
+
"growth_rate", "growth_matrix", "growth_rates",
|
|
206
|
+
"entry", "exit", "entry_exit_summary",
|
|
207
|
+
"entry_tracking", "exit_tracking",
|
|
208
|
+
# outlook
|
|
209
|
+
"complexity_outlook_index", "complexity_outlook_gain", "coi", "cog",
|
|
210
|
+
# optimization
|
|
211
|
+
"calibrate_steppingstone", "effort_matrix", "forecast_specialization",
|
|
212
|
+
"eci_optimization",
|
|
213
|
+
"calibrate_growth_model", "expected_growth", "eci_target_for_growth",
|
|
214
|
+
# strategic diffusion
|
|
215
|
+
"proximity_network", "activation_probabilities", "calibrate_contagion",
|
|
216
|
+
"diversification_strategy", "expected_diversification_time",
|
|
217
|
+
"compare_strategies", "optimize_sequence",
|
|
218
|
+
# pipeline
|
|
219
|
+
"compute_complexity",
|
|
220
|
+
]
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Economic complexity indicators.
|
|
3
|
+
|
|
4
|
+
`eci_pci(mat, method=...)` is the single entry point (eigenvector,
|
|
5
|
+
reflections, or fitness). The method-specific implementations remain
|
|
6
|
+
public for advanced use.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from .eci_pci import eci_pci
|
|
10
|
+
from .eigenvector import eci_pci_eigenvector
|
|
11
|
+
from .reflections import method_of_reflections, mor_regions, mor_activities
|
|
12
|
+
from .fitness import fitness_complexity
|
|
13
|
+
from .subnational import subnational_eci
|
|
14
|
+
|
|
15
|
+
__all__ = [
|
|
16
|
+
"eci_pci",
|
|
17
|
+
"eci_pci_eigenvector",
|
|
18
|
+
"method_of_reflections",
|
|
19
|
+
"mor_regions",
|
|
20
|
+
"mor_activities",
|
|
21
|
+
"fitness_complexity",
|
|
22
|
+
"subnational_eci",
|
|
23
|
+
]
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ECI / PCI — single entry point for all complexity methods.
|
|
3
|
+
|
|
4
|
+
`eci_pci(mat, method=...)` is the recommended way to compute economic
|
|
5
|
+
complexity with this library. It dispatches between the three methods
|
|
6
|
+
(mirroring `complexity_measures()` of the R `economiccomplexity` package),
|
|
7
|
+
pre-trims degenerate units, and returns results aligned with the input.
|
|
8
|
+
|
|
9
|
+
The underlying implementations remain available for advanced use:
|
|
10
|
+
`eci_pci_eigenvector` (module `eigenvector`), `method_of_reflections`
|
|
11
|
+
(module `reflections`), and `fitness_complexity` (module `fitness`).
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import pandas as pd
|
|
16
|
+
from typing import Literal, Optional, Tuple, Union
|
|
17
|
+
|
|
18
|
+
from .eigenvector import eci_pci_eigenvector
|
|
19
|
+
from .reflections import method_of_reflections
|
|
20
|
+
from .fitness import fitness_complexity
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def eci_pci(
|
|
24
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
25
|
+
use_rca: bool = True,
|
|
26
|
+
threshold: float = 1.0,
|
|
27
|
+
method: Literal["eigenvector", "reflections", "fitness"] = "eigenvector",
|
|
28
|
+
iterations: Optional[int] = None,
|
|
29
|
+
extremality: float = 1.0,
|
|
30
|
+
tol: float = 1e-10,
|
|
31
|
+
log_fitness: bool = False,
|
|
32
|
+
trim: bool = True,
|
|
33
|
+
dmin: int = 1,
|
|
34
|
+
umin: int = 1,
|
|
35
|
+
) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
|
|
36
|
+
"""
|
|
37
|
+
Economic Complexity Index (ECI) and Product Complexity Index (PCI).
|
|
38
|
+
|
|
39
|
+
Single entry point for the three complexity methods (mirrors the
|
|
40
|
+
`complexity_measures()` interface of the R `economiccomplexity`
|
|
41
|
+
package):
|
|
42
|
+
|
|
43
|
+
- 'eigenvector' (default): second eigenvector of the Markov-style
|
|
44
|
+
co-occurrence matrices (Hidalgo & Hausmann 2009, OEC Atlas form).
|
|
45
|
+
ECI/PCI are z-score normalized, sign-corrected so that ECI
|
|
46
|
+
correlates positively with diversity and PCI negatively with
|
|
47
|
+
ubiquity.
|
|
48
|
+
- 'reflections': iterative Method of Reflections
|
|
49
|
+
(delegates to `method_of_reflections`).
|
|
50
|
+
- 'fitness': non-linear Fitness-Complexity algorithm of
|
|
51
|
+
Tacchella et al. (2012) (delegates to `fitness_complexity`;
|
|
52
|
+
returns raw fitness/complexity scores, not z-scores).
|
|
53
|
+
|
|
54
|
+
Parameters
|
|
55
|
+
----------
|
|
56
|
+
mat : array-like (R x C)
|
|
57
|
+
Value matrix.
|
|
58
|
+
use_rca : bool
|
|
59
|
+
Compute RCA before binarizing.
|
|
60
|
+
threshold : float
|
|
61
|
+
Binarization threshold.
|
|
62
|
+
method : str
|
|
63
|
+
'eigenvector', 'reflections', or 'fitness'.
|
|
64
|
+
iterations : int, optional
|
|
65
|
+
Iterations for 'reflections' and 'fitness' (default 20 for both,
|
|
66
|
+
matching the R `economiccomplexity` package; for 'fitness' it is
|
|
67
|
+
a cap — the loop stops at convergence and warns if the cap is hit
|
|
68
|
+
first). Ignored by 'eigenvector'.
|
|
69
|
+
extremality : float
|
|
70
|
+
Non-linearity parameter alpha for 'fitness' (default 1.0).
|
|
71
|
+
tol : float
|
|
72
|
+
Convergence tolerance for 'reflections' and 'fitness'.
|
|
73
|
+
log_fitness : bool
|
|
74
|
+
For 'fitness': return the natural log of fitness/complexity
|
|
75
|
+
(Cristelli et al. 2015). Ignored by the other methods.
|
|
76
|
+
trim : bool
|
|
77
|
+
If True (default), pre-trim the matrix with `trim_core` so that
|
|
78
|
+
degenerate units — locations with zero diversity and activities
|
|
79
|
+
with zero ubiquity — are excluded from the calculation. Trimmed
|
|
80
|
+
units are returned as NaN, preserving the original index/shape.
|
|
81
|
+
dmin, umin : int
|
|
82
|
+
Diversity/ubiquity thresholds passed to `trim_core` (default 1).
|
|
83
|
+
Use 2 for the well-connected core recommended for very sparse
|
|
84
|
+
networks.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
(eci, pci) as pd.Series or ndarrays, aligned with the input matrix
|
|
89
|
+
(NaN for units removed by trimming).
|
|
90
|
+
"""
|
|
91
|
+
if trim:
|
|
92
|
+
from ..core.preprocess import trim_core
|
|
93
|
+
is_df_in = isinstance(mat, pd.DataFrame)
|
|
94
|
+
df = mat if is_df_in else pd.DataFrame(np.asarray(mat, dtype=float))
|
|
95
|
+
trimmed = trim_core(df, dmin=dmin, umin=umin,
|
|
96
|
+
use_rca=use_rca, threshold=threshold)
|
|
97
|
+
if trimmed.shape[0] < 2 or trimmed.shape[1] < 2:
|
|
98
|
+
raise ValueError(
|
|
99
|
+
f"After trimming to the ({dmin}, {umin})-core the matrix has "
|
|
100
|
+
f"shape {trimmed.shape}; not enough connected units to "
|
|
101
|
+
"compute complexity."
|
|
102
|
+
)
|
|
103
|
+
if trimmed.shape != df.shape:
|
|
104
|
+
res_r, res_c = eci_pci(
|
|
105
|
+
trimmed, use_rca=use_rca, threshold=threshold, method=method,
|
|
106
|
+
iterations=iterations, extremality=extremality, tol=tol,
|
|
107
|
+
log_fitness=log_fitness, trim=False,
|
|
108
|
+
)
|
|
109
|
+
res_r = res_r.reindex(df.index)
|
|
110
|
+
res_c = res_c.reindex(df.columns)
|
|
111
|
+
if not is_df_in:
|
|
112
|
+
return res_r.values, res_c.values
|
|
113
|
+
return res_r, res_c
|
|
114
|
+
|
|
115
|
+
if method == "eigenvector":
|
|
116
|
+
return eci_pci_eigenvector(mat, use_rca=use_rca, threshold=threshold)
|
|
117
|
+
if method == "reflections":
|
|
118
|
+
return method_of_reflections(
|
|
119
|
+
mat, use_rca=use_rca, threshold=threshold,
|
|
120
|
+
iterations=iterations if iterations is not None else 20,
|
|
121
|
+
tol=tol,
|
|
122
|
+
)
|
|
123
|
+
if method == "fitness":
|
|
124
|
+
return fitness_complexity(
|
|
125
|
+
mat, use_rca=use_rca, threshold=threshold,
|
|
126
|
+
iterations=iterations if iterations is not None else 20,
|
|
127
|
+
extremality=extremality, tol=tol, log_fitness=log_fitness,
|
|
128
|
+
)
|
|
129
|
+
raise ValueError(
|
|
130
|
+
"method must be 'eigenvector', 'reflections', or 'fitness'."
|
|
131
|
+
)
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Eigenvector implementation of Economic Complexity (ECI / PCI).
|
|
3
|
+
|
|
4
|
+
This module holds the eigenvector method only. The recommended entry
|
|
5
|
+
point for users is `eci_pci()` (module `complexity.eci_pci`), which
|
|
6
|
+
dispatches between the eigenvector, reflections, and fitness methods.
|
|
7
|
+
|
|
8
|
+
References
|
|
9
|
+
----------
|
|
10
|
+
Hidalgo & Hausmann (2009); Balland & Rigby (2017).
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import numpy as np
|
|
14
|
+
import pandas as pd
|
|
15
|
+
from typing import Tuple, Union
|
|
16
|
+
|
|
17
|
+
from ..core.utils import validate_matrix, safe_divide, normalize_zscore, binarize
|
|
18
|
+
from ..core.rca import rca as compute_rca
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _second_eigenvector(mat: np.ndarray) -> np.ndarray:
|
|
22
|
+
"""Return the eigenvector corresponding to the second largest eigenvalue.
|
|
23
|
+
|
|
24
|
+
The Markov-style co-occurrence matrix (Mcc / Mpp) is in general NOT
|
|
25
|
+
symmetric, because each row is normalised by its own diversity/ubiquity.
|
|
26
|
+
We therefore use the general (non-symmetric) eigensolver ``np.linalg.eig``
|
|
27
|
+
and select the eigenvector associated with the second-largest eigenvalue
|
|
28
|
+
by real part. The largest eigenvalue corresponds to the trivial constant
|
|
29
|
+
vector; the second is the Hidalgo-Hausmann (2009) complexity index.
|
|
30
|
+
|
|
31
|
+
Using ``np.linalg.eigh`` here would be incorrect: it assumes a symmetric
|
|
32
|
+
matrix and reads only one triangle, yielding the eigenvector of an
|
|
33
|
+
arbitrarily symmetrised matrix rather than the true second eigenvector.
|
|
34
|
+
"""
|
|
35
|
+
eigenvalues, eigenvectors = np.linalg.eig(mat)
|
|
36
|
+
order = np.argsort(eigenvalues.real)
|
|
37
|
+
return np.real(eigenvectors[:, order[-2]])
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def eci_pci_eigenvector(
|
|
41
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
42
|
+
use_rca: bool = True,
|
|
43
|
+
threshold: float = 1.0,
|
|
44
|
+
) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
|
|
45
|
+
"""
|
|
46
|
+
ECI and PCI via the eigenvector method (advanced implementation;
|
|
47
|
+
prefer `eci_pci(mat, method="eigenvector")`, which adds the automatic
|
|
48
|
+
trimming of degenerate units).
|
|
49
|
+
|
|
50
|
+
Builds Markov matrices:
|
|
51
|
+
Mcc_{rr'} = sum_c (M_{rc}/D_r) * (M_{r'c}/U_c)
|
|
52
|
+
Mpp_{pp'} = sum_r (M_{rp}/U_p) * (M_{rp'}/D_r)
|
|
53
|
+
|
|
54
|
+
ECI = second eigenvector of Mcc (sign: positive correlation with diversity).
|
|
55
|
+
PCI = second eigenvector of Mpp (sign: negative correlation with ubiquity).
|
|
56
|
+
Both are z-score normalized.
|
|
57
|
+
|
|
58
|
+
Parameters
|
|
59
|
+
----------
|
|
60
|
+
mat : array-like (R x C)
|
|
61
|
+
Value matrix.
|
|
62
|
+
use_rca : bool
|
|
63
|
+
Compute RCA before binarizing.
|
|
64
|
+
threshold : float
|
|
65
|
+
Binarization threshold.
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
(eci, pci) as pd.Series or ndarrays.
|
|
70
|
+
"""
|
|
71
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
72
|
+
row_index = mat.index if is_df else None
|
|
73
|
+
col_index = mat.columns if is_df else None
|
|
74
|
+
|
|
75
|
+
arr = validate_matrix(mat)
|
|
76
|
+
|
|
77
|
+
if use_rca:
|
|
78
|
+
m = binarize(compute_rca(arr), threshold)
|
|
79
|
+
else:
|
|
80
|
+
m = binarize(arr, threshold)
|
|
81
|
+
|
|
82
|
+
kc0 = m.sum(axis=1) # diversity R
|
|
83
|
+
kp0 = m.sum(axis=0) # ubiquity C
|
|
84
|
+
|
|
85
|
+
# Row-normalized and column-normalized matrices
|
|
86
|
+
m_div_kc = safe_divide(m, kc0[:, None]) # M / D_r (R x C)
|
|
87
|
+
m_div_kp = safe_divide(m, kp0[None, :]) # M / U_c (R x C)
|
|
88
|
+
|
|
89
|
+
# Mcc: R x R
|
|
90
|
+
mcc = m_div_kc @ m_div_kp.T
|
|
91
|
+
|
|
92
|
+
# Mpp: C x C
|
|
93
|
+
mpp = m_div_kp.T @ m_div_kc
|
|
94
|
+
|
|
95
|
+
# Second eigenvectors
|
|
96
|
+
eci_raw = _second_eigenvector(mcc)
|
|
97
|
+
pci_raw = _second_eigenvector(mpp)
|
|
98
|
+
|
|
99
|
+
# Sign correction
|
|
100
|
+
# ECI should correlate positively with diversity
|
|
101
|
+
if np.std(eci_raw) > 0 and np.std(kc0) > 0 and np.corrcoef(eci_raw, kc0)[0, 1] < 0:
|
|
102
|
+
eci_raw = -eci_raw
|
|
103
|
+
# PCI should correlate negatively with ubiquity
|
|
104
|
+
if np.std(pci_raw) > 0 and np.std(kp0) > 0 and np.corrcoef(pci_raw, kp0)[0, 1] > 0:
|
|
105
|
+
pci_raw = -pci_raw
|
|
106
|
+
|
|
107
|
+
eci = normalize_zscore(eci_raw)
|
|
108
|
+
pci = normalize_zscore(pci_raw)
|
|
109
|
+
|
|
110
|
+
if is_df:
|
|
111
|
+
return (
|
|
112
|
+
pd.Series(eci, index=row_index, name="eci"),
|
|
113
|
+
pd.Series(pci, index=col_index, name="pci"),
|
|
114
|
+
)
|
|
115
|
+
return eci, pci
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Fitness-Complexity method (non-linear iterative).
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Tacchella et al. (2012) "A New Metrics for Countries' Fitness and Products' Complexity".
|
|
7
|
+
Cristelli et al. (2013).
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import warnings
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
from typing import Tuple, Union
|
|
15
|
+
|
|
16
|
+
from ..core.utils import validate_matrix, safe_divide, binarize
|
|
17
|
+
from ..core.rca import rca as compute_rca
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def fitness_complexity(
|
|
21
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
22
|
+
use_rca: bool = True,
|
|
23
|
+
threshold: float = 1.0,
|
|
24
|
+
iterations: int = 20,
|
|
25
|
+
extremality: float = 1.0,
|
|
26
|
+
tol: float = 1e-10,
|
|
27
|
+
log_fitness: bool = False,
|
|
28
|
+
) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
|
|
29
|
+
"""
|
|
30
|
+
Fitness-Complexity algorithm.
|
|
31
|
+
|
|
32
|
+
Iterative update rules (normalized at each step):
|
|
33
|
+
F_r^{(n)} = sum_c M_{rc} * Q_c^{(n-1)}
|
|
34
|
+
Q_c^{(n)} = 1 / ( sum_r M_{rc} * (1/F_r^{(n-1)})^alpha )^{1/alpha}
|
|
35
|
+
|
|
36
|
+
where alpha = `extremality` (default 1).
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
mat : array-like (R x C)
|
|
41
|
+
Value matrix.
|
|
42
|
+
use_rca : bool
|
|
43
|
+
Compute RCA before binarizing.
|
|
44
|
+
threshold : float
|
|
45
|
+
Binarization threshold.
|
|
46
|
+
iterations : int
|
|
47
|
+
Maximum iterations (default 20, matching the R `economiccomplexity`
|
|
48
|
+
package). The loop stops earlier as soon as `tol` is reached.
|
|
49
|
+
At 20 iterations the algorithm is practically converged on typical
|
|
50
|
+
data (relative change ~1e-7); a RuntimeWarning is issued only when
|
|
51
|
+
the final relative change still exceeds 1e-3, which signals real
|
|
52
|
+
instability (e.g. oscillation on pathological matrices).
|
|
53
|
+
extremality : float
|
|
54
|
+
Non-linearity parameter alpha (default 1.0).
|
|
55
|
+
tol : float
|
|
56
|
+
Convergence tolerance on relative change.
|
|
57
|
+
log_fitness : bool
|
|
58
|
+
If True, return natural log of fitness and complexity
|
|
59
|
+
(Cristelli et al. 2015 recommend the log scale for analysis).
|
|
60
|
+
Zeros are returned as NaN.
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
(fitness, complexity) as pd.Series or ndarrays.
|
|
65
|
+
fitness = region/country fitness score.
|
|
66
|
+
complexity = product/activity complexity score.
|
|
67
|
+
"""
|
|
68
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
69
|
+
row_index = mat.index if is_df else None
|
|
70
|
+
col_index = mat.columns if is_df else None
|
|
71
|
+
|
|
72
|
+
arr = validate_matrix(mat)
|
|
73
|
+
|
|
74
|
+
if use_rca:
|
|
75
|
+
m = binarize(compute_rca(arr), threshold)
|
|
76
|
+
else:
|
|
77
|
+
m = binarize(arr, threshold)
|
|
78
|
+
|
|
79
|
+
n_r, n_c = m.shape
|
|
80
|
+
fitness = np.ones(n_r)
|
|
81
|
+
complexity = np.ones(n_c)
|
|
82
|
+
|
|
83
|
+
converged = False
|
|
84
|
+
delta_f = delta_q = np.inf
|
|
85
|
+
for _ in range(iterations):
|
|
86
|
+
fitness_new = m @ complexity
|
|
87
|
+
# Normalize by mean
|
|
88
|
+
mean_f = fitness_new.mean()
|
|
89
|
+
if mean_f > 0:
|
|
90
|
+
fitness_new /= mean_f
|
|
91
|
+
|
|
92
|
+
# Q update with extremality
|
|
93
|
+
inv_f = safe_divide(1.0, fitness_new ** extremality)
|
|
94
|
+
q_denom = (m.T @ inv_f) ** (1.0 / extremality)
|
|
95
|
+
complexity_new = safe_divide(1.0, q_denom)
|
|
96
|
+
mean_q = complexity_new.mean()
|
|
97
|
+
if mean_q > 0:
|
|
98
|
+
complexity_new /= mean_q
|
|
99
|
+
|
|
100
|
+
# Convergence check
|
|
101
|
+
delta_f = np.max(np.abs(fitness_new - fitness)) / (np.max(np.abs(fitness)) + 1e-15)
|
|
102
|
+
delta_q = np.max(np.abs(complexity_new - complexity)) / (np.max(np.abs(complexity)) + 1e-15)
|
|
103
|
+
|
|
104
|
+
fitness = fitness_new
|
|
105
|
+
complexity = complexity_new
|
|
106
|
+
|
|
107
|
+
if delta_f < tol and delta_q < tol:
|
|
108
|
+
converged = True
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
if not converged and (delta_f > 1e-3 or delta_q > 1e-3):
|
|
112
|
+
warnings.warn(
|
|
113
|
+
f"fitness_complexity did not converge within {iterations} "
|
|
114
|
+
f"iterations (final relative change {max(delta_f, delta_q):.1e}); "
|
|
115
|
+
"results may be unstable. Increase `iterations`.",
|
|
116
|
+
RuntimeWarning,
|
|
117
|
+
stacklevel=2,
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
if log_fitness:
|
|
121
|
+
with np.errstate(divide="ignore", invalid="ignore"):
|
|
122
|
+
fitness = np.where(fitness > 0, np.log(fitness), np.nan)
|
|
123
|
+
complexity = np.where(complexity > 0, np.log(complexity), np.nan)
|
|
124
|
+
|
|
125
|
+
if is_df:
|
|
126
|
+
return (
|
|
127
|
+
pd.Series(fitness, index=row_index, name="fitness"),
|
|
128
|
+
pd.Series(complexity, index=col_index, name="complexity"),
|
|
129
|
+
)
|
|
130
|
+
return fitness, complexity
|