econcomplex 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. econcomplex/__init__.py +220 -0
  2. econcomplex/complexity/__init__.py +23 -0
  3. econcomplex/complexity/eci_pci.py +131 -0
  4. econcomplex/complexity/eigenvector.py +115 -0
  5. econcomplex/complexity/fitness.py +130 -0
  6. econcomplex/complexity/reflections.py +173 -0
  7. econcomplex/complexity/subnational.py +82 -0
  8. econcomplex/core/__init__.py +23 -0
  9. econcomplex/core/diversity.py +125 -0
  10. econcomplex/core/preprocess.py +83 -0
  11. econcomplex/core/rca.py +161 -0
  12. econcomplex/core/utils.py +137 -0
  13. econcomplex/dynamics/__init__.py +10 -0
  14. econcomplex/dynamics/entry_exit.py +248 -0
  15. econcomplex/dynamics/growth.py +146 -0
  16. econcomplex/inequality/__init__.py +11 -0
  17. econcomplex/inequality/concentration.py +148 -0
  18. econcomplex/inequality/gini.py +164 -0
  19. econcomplex/optimization/__init__.py +46 -0
  20. econcomplex/optimization/diffusion.py +379 -0
  21. econcomplex/optimization/growth_target.py +170 -0
  22. econcomplex/optimization/portfolio.py +178 -0
  23. econcomplex/optimization/steppingstone.py +267 -0
  24. econcomplex/outlook/__init__.py +6 -0
  25. econcomplex/outlook/coi_cog.py +168 -0
  26. econcomplex/patents/__init__.py +7 -0
  27. econcomplex/patents/recombination.py +135 -0
  28. econcomplex/pipeline.py +255 -0
  29. econcomplex/productivity/__init__.py +8 -0
  30. econcomplex/productivity/prody.py +218 -0
  31. econcomplex/relatedness/__init__.py +25 -0
  32. econcomplex/relatedness/cooccurrence.py +173 -0
  33. econcomplex/relatedness/cross_space.py +142 -0
  34. econcomplex/relatedness/density.py +232 -0
  35. econcomplex/relatedness/proximity.py +214 -0
  36. econcomplex/specialization/__init__.py +17 -0
  37. econcomplex/specialization/location_quotient.py +163 -0
  38. econcomplex/specialization/similarity.py +68 -0
  39. econcomplex-1.0.0.dist-info/METADATA +223 -0
  40. econcomplex-1.0.0.dist-info/RECORD +43 -0
  41. econcomplex-1.0.0.dist-info/WHEEL +5 -0
  42. econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
  43. econcomplex-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,220 @@
1
+ """
2
+ econcomplex — Economic Complexity and Geographic Indicators Library
3
+ ===================================================================
4
+
5
+ A consolidated Python library for computing Economic Complexity indicators
6
+ and Geographic/Regional Science metrics, combining the best implementations
7
+ from EconGeo (R), economiccomplexity (R), py-ecomplexity, and
8
+ py-economic-complexity.
9
+
10
+ Quick start
11
+ -----------
12
+ >>> import econcomplex as ec
13
+ >>> import pandas as pd
14
+ >>>
15
+ >>> # Long-format data: location × activity × value
16
+ >>> df = pd.read_csv("my_data.csv")
17
+ >>>
18
+ >>> # Full pipeline (single period)
19
+ >>> result = ec.compute_complexity(
20
+ ... df,
21
+ ... cols={"loc": "region", "act": "sector", "val": "employment"},
22
+ ... method="eigenvector",
23
+ ... )
24
+ >>>
25
+ >>> # Individual indicators
26
+ >>> mat = ec.pivot_to_matrix(df, "region", "sector", "employment")
27
+ >>> rca_mat = ec.rca(mat)
28
+ >>> eci, pci = ec.eci_pci(mat)
29
+ >>> phi = ec.proximity(mat)["product"]
30
+ >>> density = ec.relatedness_density(mat, phi=phi)
31
+
32
+ Submodules
33
+ ----------
34
+ core : RCA, RPOP, Mcp, diversity, ubiquity, trim_core, utilities
35
+ complexity : eci_pci() — single entry point (eigenvector, reflections,
36
+ fitness) — plus the method-specific implementations and
37
+ subnational ECI
38
+ relatedness : proximity (discrete/continuous/cosine/correlation),
39
+ relatedness density, distance, co-occurrence,
40
+ z-score novelty, cross-space proximity/relatedness
41
+ specialization: location quotient, Hachman, Krugman, specialization coeff,
42
+ export similarity
43
+ inequality : Gini, locational Gini, Hoover-Gini, Hoover index,
44
+ Herfindahl-Hirschman, Shannon entropy
45
+ productivity : PRODY, EXPY, Product Gini Index, PEII
46
+ patents : ease of recombination, modular complexity
47
+ dynamics : growth rates, entry/exit tracking (matrix and panel APIs)
48
+ outlook : COI, COG (Complexity Outlook)
49
+ optimization : ECI Optimization (Stojkoski & Hidalgo 2026), growth
50
+ targeting, strategic diffusion (Alshamsi et al. 2018)
51
+ pipeline : high-level compute_complexity() function
52
+
53
+ Note: short names like density, hhi, coi, cog, pgi are aliases bound to
54
+ the same objects as their canonical functions (see the API map in the
55
+ documentation).
56
+ """
57
+
58
+ __version__ = "1.0.0"
59
+ __author__ = "Elton Freitas and contributors"
60
+
61
+ # ── Core ──────────────────────────────────────────────────────────────────────
62
+ from .core.rca import rca, rpop, mcp
63
+ from .core.diversity import diversity, ubiquity, normalized_ubiquity
64
+ from .core.utils import (
65
+ pivot_to_matrix,
66
+ melt_matrix,
67
+ binarize,
68
+ normalize_zscore,
69
+ normalize_01,
70
+ make_sample_data,
71
+ )
72
+ from .core.preprocess import trim_core
73
+
74
+ # ── Complexity ─────────────────────────────────────────────────────────────────
75
+ from .complexity.eci_pci import eci_pci
76
+ from .complexity.eigenvector import eci_pci_eigenvector
77
+ from .complexity.reflections import method_of_reflections, mor_regions, mor_activities
78
+ from .complexity.fitness import fitness_complexity
79
+ from .complexity.subnational import subnational_eci
80
+
81
+ # ── Relatedness ────────────────────────────────────────────────────────────────
82
+ from .relatedness.proximity import (
83
+ proximity,
84
+ continuous_proximity,
85
+ cosine_proximity,
86
+ correlation_proximity,
87
+ relatedness,
88
+ )
89
+ from .relatedness.density import (
90
+ relatedness_density,
91
+ density,
92
+ distance,
93
+ relatedness_density_internal,
94
+ relatedness_density_external,
95
+ relative_relatedness,
96
+ )
97
+ from .relatedness.cooccurrence import co_occurrence, relatedness_index, z_score_novelty
98
+ from .relatedness.cross_space import cross_proximity, cross_relatedness, cross_space_proximity
99
+
100
+ # ── Specialization ─────────────────────────────────────────────────────────────
101
+ from .specialization.location_quotient import (
102
+ location_quotient,
103
+ location_quotient_avg,
104
+ hachman_index,
105
+ specialization_coefficient,
106
+ spec_coefficient,
107
+ krugman_index,
108
+ )
109
+ from .specialization.similarity import export_similarity
110
+
111
+ # ── Inequality ─────────────────────────────────────────────────────────────────
112
+ from .inequality.gini import gini, locational_gini, hoover_gini
113
+ from .inequality.concentration import herfindahl, hhi, shannon_entropy, hoover_index
114
+
115
+ # ── Productivity ───────────────────────────────────────────────────────────────
116
+ from .productivity.prody import (
117
+ prody,
118
+ expy,
119
+ product_gini_index,
120
+ pgi,
121
+ product_emissions_index,
122
+ peii,
123
+ )
124
+
125
+ # ── Patents ────────────────────────────────────────────────────────────────────
126
+ from .patents.recombination import (
127
+ ease_of_recombination,
128
+ modular_complexity,
129
+ modular_complexity_avg,
130
+ )
131
+
132
+ # ── Dynamics ───────────────────────────────────────────────────────────────────
133
+ from .dynamics.growth import growth_rate, growth_matrix, growth_rates
134
+ from .dynamics.entry_exit import (
135
+ entry,
136
+ exit,
137
+ entry_exit_summary,
138
+ entry_tracking,
139
+ exit_tracking,
140
+ )
141
+
142
+ # ── Outlook ────────────────────────────────────────────────────────────────────
143
+ from .outlook.coi_cog import complexity_outlook_index, complexity_outlook_gain, coi, cog
144
+
145
+ # ── ECI Optimization (Stojkoski & Hidalgo 2026) ───────────────────────────────
146
+ from .optimization import (
147
+ calibrate_steppingstone,
148
+ effort_matrix,
149
+ forecast_specialization,
150
+ eci_optimization,
151
+ calibrate_growth_model,
152
+ expected_growth,
153
+ eci_target_for_growth,
154
+ )
155
+
156
+ # ── Strategic diffusion (Alshamsi, Pinheiro & Hidalgo 2018) ───────────────────
157
+ from .optimization import (
158
+ proximity_network,
159
+ activation_probabilities,
160
+ calibrate_contagion,
161
+ diversification_strategy,
162
+ expected_diversification_time,
163
+ compare_strategies,
164
+ optimize_sequence,
165
+ )
166
+
167
+ # ── High-level pipeline ────────────────────────────────────────────────────────
168
+ from .pipeline import compute_complexity
169
+
170
+ __all__ = [
171
+ # core
172
+ "rca", "rpop", "mcp",
173
+ "diversity", "ubiquity", "normalized_ubiquity",
174
+ "pivot_to_matrix", "melt_matrix", "binarize",
175
+ "normalize_zscore", "normalize_01",
176
+ "make_sample_data", "trim_core",
177
+ # complexity
178
+ "eci_pci", "eci_pci_eigenvector",
179
+ "method_of_reflections", "mor_regions", "mor_activities",
180
+ "fitness_complexity",
181
+ "subnational_eci",
182
+ # relatedness
183
+ "proximity", "continuous_proximity",
184
+ "cosine_proximity", "correlation_proximity",
185
+ "relatedness_density", "density", "relatedness", "distance",
186
+ "relatedness_density_internal", "relatedness_density_external",
187
+ "relative_relatedness",
188
+ "co_occurrence", "relatedness_index", "z_score_novelty",
189
+ "cross_proximity", "cross_relatedness", "cross_space_proximity",
190
+ # specialization
191
+ "location_quotient", "location_quotient_avg",
192
+ "hachman_index", "specialization_coefficient", "spec_coefficient",
193
+ "krugman_index",
194
+ "export_similarity",
195
+ # inequality
196
+ "gini", "locational_gini", "hoover_gini",
197
+ "herfindahl", "hhi", "shannon_entropy", "hoover_index",
198
+ # productivity
199
+ "prody", "expy",
200
+ "product_gini_index", "pgi",
201
+ "product_emissions_index", "peii",
202
+ # patents
203
+ "ease_of_recombination", "modular_complexity", "modular_complexity_avg",
204
+ # dynamics
205
+ "growth_rate", "growth_matrix", "growth_rates",
206
+ "entry", "exit", "entry_exit_summary",
207
+ "entry_tracking", "exit_tracking",
208
+ # outlook
209
+ "complexity_outlook_index", "complexity_outlook_gain", "coi", "cog",
210
+ # optimization
211
+ "calibrate_steppingstone", "effort_matrix", "forecast_specialization",
212
+ "eci_optimization",
213
+ "calibrate_growth_model", "expected_growth", "eci_target_for_growth",
214
+ # strategic diffusion
215
+ "proximity_network", "activation_probabilities", "calibrate_contagion",
216
+ "diversification_strategy", "expected_diversification_time",
217
+ "compare_strategies", "optimize_sequence",
218
+ # pipeline
219
+ "compute_complexity",
220
+ ]
@@ -0,0 +1,23 @@
1
+ """
2
+ Economic complexity indicators.
3
+
4
+ `eci_pci(mat, method=...)` is the single entry point (eigenvector,
5
+ reflections, or fitness). The method-specific implementations remain
6
+ public for advanced use.
7
+ """
8
+
9
+ from .eci_pci import eci_pci
10
+ from .eigenvector import eci_pci_eigenvector
11
+ from .reflections import method_of_reflections, mor_regions, mor_activities
12
+ from .fitness import fitness_complexity
13
+ from .subnational import subnational_eci
14
+
15
+ __all__ = [
16
+ "eci_pci",
17
+ "eci_pci_eigenvector",
18
+ "method_of_reflections",
19
+ "mor_regions",
20
+ "mor_activities",
21
+ "fitness_complexity",
22
+ "subnational_eci",
23
+ ]
@@ -0,0 +1,131 @@
1
+ """
2
+ ECI / PCI — single entry point for all complexity methods.
3
+
4
+ `eci_pci(mat, method=...)` is the recommended way to compute economic
5
+ complexity with this library. It dispatches between the three methods
6
+ (mirroring `complexity_measures()` of the R `economiccomplexity` package),
7
+ pre-trims degenerate units, and returns results aligned with the input.
8
+
9
+ The underlying implementations remain available for advanced use:
10
+ `eci_pci_eigenvector` (module `eigenvector`), `method_of_reflections`
11
+ (module `reflections`), and `fitness_complexity` (module `fitness`).
12
+ """
13
+
14
+ import numpy as np
15
+ import pandas as pd
16
+ from typing import Literal, Optional, Tuple, Union
17
+
18
+ from .eigenvector import eci_pci_eigenvector
19
+ from .reflections import method_of_reflections
20
+ from .fitness import fitness_complexity
21
+
22
+
23
+ def eci_pci(
24
+ mat: Union[np.ndarray, pd.DataFrame],
25
+ use_rca: bool = True,
26
+ threshold: float = 1.0,
27
+ method: Literal["eigenvector", "reflections", "fitness"] = "eigenvector",
28
+ iterations: Optional[int] = None,
29
+ extremality: float = 1.0,
30
+ tol: float = 1e-10,
31
+ log_fitness: bool = False,
32
+ trim: bool = True,
33
+ dmin: int = 1,
34
+ umin: int = 1,
35
+ ) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
36
+ """
37
+ Economic Complexity Index (ECI) and Product Complexity Index (PCI).
38
+
39
+ Single entry point for the three complexity methods (mirrors the
40
+ `complexity_measures()` interface of the R `economiccomplexity`
41
+ package):
42
+
43
+ - 'eigenvector' (default): second eigenvector of the Markov-style
44
+ co-occurrence matrices (Hidalgo & Hausmann 2009, OEC Atlas form).
45
+ ECI/PCI are z-score normalized, sign-corrected so that ECI
46
+ correlates positively with diversity and PCI negatively with
47
+ ubiquity.
48
+ - 'reflections': iterative Method of Reflections
49
+ (delegates to `method_of_reflections`).
50
+ - 'fitness': non-linear Fitness-Complexity algorithm of
51
+ Tacchella et al. (2012) (delegates to `fitness_complexity`;
52
+ returns raw fitness/complexity scores, not z-scores).
53
+
54
+ Parameters
55
+ ----------
56
+ mat : array-like (R x C)
57
+ Value matrix.
58
+ use_rca : bool
59
+ Compute RCA before binarizing.
60
+ threshold : float
61
+ Binarization threshold.
62
+ method : str
63
+ 'eigenvector', 'reflections', or 'fitness'.
64
+ iterations : int, optional
65
+ Iterations for 'reflections' and 'fitness' (default 20 for both,
66
+ matching the R `economiccomplexity` package; for 'fitness' it is
67
+ a cap — the loop stops at convergence and warns if the cap is hit
68
+ first). Ignored by 'eigenvector'.
69
+ extremality : float
70
+ Non-linearity parameter alpha for 'fitness' (default 1.0).
71
+ tol : float
72
+ Convergence tolerance for 'reflections' and 'fitness'.
73
+ log_fitness : bool
74
+ For 'fitness': return the natural log of fitness/complexity
75
+ (Cristelli et al. 2015). Ignored by the other methods.
76
+ trim : bool
77
+ If True (default), pre-trim the matrix with `trim_core` so that
78
+ degenerate units — locations with zero diversity and activities
79
+ with zero ubiquity — are excluded from the calculation. Trimmed
80
+ units are returned as NaN, preserving the original index/shape.
81
+ dmin, umin : int
82
+ Diversity/ubiquity thresholds passed to `trim_core` (default 1).
83
+ Use 2 for the well-connected core recommended for very sparse
84
+ networks.
85
+
86
+ Returns
87
+ -------
88
+ (eci, pci) as pd.Series or ndarrays, aligned with the input matrix
89
+ (NaN for units removed by trimming).
90
+ """
91
+ if trim:
92
+ from ..core.preprocess import trim_core
93
+ is_df_in = isinstance(mat, pd.DataFrame)
94
+ df = mat if is_df_in else pd.DataFrame(np.asarray(mat, dtype=float))
95
+ trimmed = trim_core(df, dmin=dmin, umin=umin,
96
+ use_rca=use_rca, threshold=threshold)
97
+ if trimmed.shape[0] < 2 or trimmed.shape[1] < 2:
98
+ raise ValueError(
99
+ f"After trimming to the ({dmin}, {umin})-core the matrix has "
100
+ f"shape {trimmed.shape}; not enough connected units to "
101
+ "compute complexity."
102
+ )
103
+ if trimmed.shape != df.shape:
104
+ res_r, res_c = eci_pci(
105
+ trimmed, use_rca=use_rca, threshold=threshold, method=method,
106
+ iterations=iterations, extremality=extremality, tol=tol,
107
+ log_fitness=log_fitness, trim=False,
108
+ )
109
+ res_r = res_r.reindex(df.index)
110
+ res_c = res_c.reindex(df.columns)
111
+ if not is_df_in:
112
+ return res_r.values, res_c.values
113
+ return res_r, res_c
114
+
115
+ if method == "eigenvector":
116
+ return eci_pci_eigenvector(mat, use_rca=use_rca, threshold=threshold)
117
+ if method == "reflections":
118
+ return method_of_reflections(
119
+ mat, use_rca=use_rca, threshold=threshold,
120
+ iterations=iterations if iterations is not None else 20,
121
+ tol=tol,
122
+ )
123
+ if method == "fitness":
124
+ return fitness_complexity(
125
+ mat, use_rca=use_rca, threshold=threshold,
126
+ iterations=iterations if iterations is not None else 20,
127
+ extremality=extremality, tol=tol, log_fitness=log_fitness,
128
+ )
129
+ raise ValueError(
130
+ "method must be 'eigenvector', 'reflections', or 'fitness'."
131
+ )
@@ -0,0 +1,115 @@
1
+ """
2
+ Eigenvector implementation of Economic Complexity (ECI / PCI).
3
+
4
+ This module holds the eigenvector method only. The recommended entry
5
+ point for users is `eci_pci()` (module `complexity.eci_pci`), which
6
+ dispatches between the eigenvector, reflections, and fitness methods.
7
+
8
+ References
9
+ ----------
10
+ Hidalgo & Hausmann (2009); Balland & Rigby (2017).
11
+ """
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+ from typing import Tuple, Union
16
+
17
+ from ..core.utils import validate_matrix, safe_divide, normalize_zscore, binarize
18
+ from ..core.rca import rca as compute_rca
19
+
20
+
21
+ def _second_eigenvector(mat: np.ndarray) -> np.ndarray:
22
+ """Return the eigenvector corresponding to the second largest eigenvalue.
23
+
24
+ The Markov-style co-occurrence matrix (Mcc / Mpp) is in general NOT
25
+ symmetric, because each row is normalised by its own diversity/ubiquity.
26
+ We therefore use the general (non-symmetric) eigensolver ``np.linalg.eig``
27
+ and select the eigenvector associated with the second-largest eigenvalue
28
+ by real part. The largest eigenvalue corresponds to the trivial constant
29
+ vector; the second is the Hidalgo-Hausmann (2009) complexity index.
30
+
31
+ Using ``np.linalg.eigh`` here would be incorrect: it assumes a symmetric
32
+ matrix and reads only one triangle, yielding the eigenvector of an
33
+ arbitrarily symmetrised matrix rather than the true second eigenvector.
34
+ """
35
+ eigenvalues, eigenvectors = np.linalg.eig(mat)
36
+ order = np.argsort(eigenvalues.real)
37
+ return np.real(eigenvectors[:, order[-2]])
38
+
39
+
40
+ def eci_pci_eigenvector(
41
+ mat: Union[np.ndarray, pd.DataFrame],
42
+ use_rca: bool = True,
43
+ threshold: float = 1.0,
44
+ ) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
45
+ """
46
+ ECI and PCI via the eigenvector method (advanced implementation;
47
+ prefer `eci_pci(mat, method="eigenvector")`, which adds the automatic
48
+ trimming of degenerate units).
49
+
50
+ Builds Markov matrices:
51
+ Mcc_{rr'} = sum_c (M_{rc}/D_r) * (M_{r'c}/U_c)
52
+ Mpp_{pp'} = sum_r (M_{rp}/U_p) * (M_{rp'}/D_r)
53
+
54
+ ECI = second eigenvector of Mcc (sign: positive correlation with diversity).
55
+ PCI = second eigenvector of Mpp (sign: negative correlation with ubiquity).
56
+ Both are z-score normalized.
57
+
58
+ Parameters
59
+ ----------
60
+ mat : array-like (R x C)
61
+ Value matrix.
62
+ use_rca : bool
63
+ Compute RCA before binarizing.
64
+ threshold : float
65
+ Binarization threshold.
66
+
67
+ Returns
68
+ -------
69
+ (eci, pci) as pd.Series or ndarrays.
70
+ """
71
+ is_df = isinstance(mat, pd.DataFrame)
72
+ row_index = mat.index if is_df else None
73
+ col_index = mat.columns if is_df else None
74
+
75
+ arr = validate_matrix(mat)
76
+
77
+ if use_rca:
78
+ m = binarize(compute_rca(arr), threshold)
79
+ else:
80
+ m = binarize(arr, threshold)
81
+
82
+ kc0 = m.sum(axis=1) # diversity R
83
+ kp0 = m.sum(axis=0) # ubiquity C
84
+
85
+ # Row-normalized and column-normalized matrices
86
+ m_div_kc = safe_divide(m, kc0[:, None]) # M / D_r (R x C)
87
+ m_div_kp = safe_divide(m, kp0[None, :]) # M / U_c (R x C)
88
+
89
+ # Mcc: R x R
90
+ mcc = m_div_kc @ m_div_kp.T
91
+
92
+ # Mpp: C x C
93
+ mpp = m_div_kp.T @ m_div_kc
94
+
95
+ # Second eigenvectors
96
+ eci_raw = _second_eigenvector(mcc)
97
+ pci_raw = _second_eigenvector(mpp)
98
+
99
+ # Sign correction
100
+ # ECI should correlate positively with diversity
101
+ if np.std(eci_raw) > 0 and np.std(kc0) > 0 and np.corrcoef(eci_raw, kc0)[0, 1] < 0:
102
+ eci_raw = -eci_raw
103
+ # PCI should correlate negatively with ubiquity
104
+ if np.std(pci_raw) > 0 and np.std(kp0) > 0 and np.corrcoef(pci_raw, kp0)[0, 1] > 0:
105
+ pci_raw = -pci_raw
106
+
107
+ eci = normalize_zscore(eci_raw)
108
+ pci = normalize_zscore(pci_raw)
109
+
110
+ if is_df:
111
+ return (
112
+ pd.Series(eci, index=row_index, name="eci"),
113
+ pd.Series(pci, index=col_index, name="pci"),
114
+ )
115
+ return eci, pci
@@ -0,0 +1,130 @@
1
+ """
2
+ Fitness-Complexity method (non-linear iterative).
3
+
4
+ References
5
+ ----------
6
+ Tacchella et al. (2012) "A New Metrics for Countries' Fitness and Products' Complexity".
7
+ Cristelli et al. (2013).
8
+ """
9
+
10
+ import warnings
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+ from typing import Tuple, Union
15
+
16
+ from ..core.utils import validate_matrix, safe_divide, binarize
17
+ from ..core.rca import rca as compute_rca
18
+
19
+
20
+ def fitness_complexity(
21
+ mat: Union[np.ndarray, pd.DataFrame],
22
+ use_rca: bool = True,
23
+ threshold: float = 1.0,
24
+ iterations: int = 20,
25
+ extremality: float = 1.0,
26
+ tol: float = 1e-10,
27
+ log_fitness: bool = False,
28
+ ) -> Tuple[Union[pd.Series, np.ndarray], Union[pd.Series, np.ndarray]]:
29
+ """
30
+ Fitness-Complexity algorithm.
31
+
32
+ Iterative update rules (normalized at each step):
33
+ F_r^{(n)} = sum_c M_{rc} * Q_c^{(n-1)}
34
+ Q_c^{(n)} = 1 / ( sum_r M_{rc} * (1/F_r^{(n-1)})^alpha )^{1/alpha}
35
+
36
+ where alpha = `extremality` (default 1).
37
+
38
+ Parameters
39
+ ----------
40
+ mat : array-like (R x C)
41
+ Value matrix.
42
+ use_rca : bool
43
+ Compute RCA before binarizing.
44
+ threshold : float
45
+ Binarization threshold.
46
+ iterations : int
47
+ Maximum iterations (default 20, matching the R `economiccomplexity`
48
+ package). The loop stops earlier as soon as `tol` is reached.
49
+ At 20 iterations the algorithm is practically converged on typical
50
+ data (relative change ~1e-7); a RuntimeWarning is issued only when
51
+ the final relative change still exceeds 1e-3, which signals real
52
+ instability (e.g. oscillation on pathological matrices).
53
+ extremality : float
54
+ Non-linearity parameter alpha (default 1.0).
55
+ tol : float
56
+ Convergence tolerance on relative change.
57
+ log_fitness : bool
58
+ If True, return natural log of fitness and complexity
59
+ (Cristelli et al. 2015 recommend the log scale for analysis).
60
+ Zeros are returned as NaN.
61
+
62
+ Returns
63
+ -------
64
+ (fitness, complexity) as pd.Series or ndarrays.
65
+ fitness = region/country fitness score.
66
+ complexity = product/activity complexity score.
67
+ """
68
+ is_df = isinstance(mat, pd.DataFrame)
69
+ row_index = mat.index if is_df else None
70
+ col_index = mat.columns if is_df else None
71
+
72
+ arr = validate_matrix(mat)
73
+
74
+ if use_rca:
75
+ m = binarize(compute_rca(arr), threshold)
76
+ else:
77
+ m = binarize(arr, threshold)
78
+
79
+ n_r, n_c = m.shape
80
+ fitness = np.ones(n_r)
81
+ complexity = np.ones(n_c)
82
+
83
+ converged = False
84
+ delta_f = delta_q = np.inf
85
+ for _ in range(iterations):
86
+ fitness_new = m @ complexity
87
+ # Normalize by mean
88
+ mean_f = fitness_new.mean()
89
+ if mean_f > 0:
90
+ fitness_new /= mean_f
91
+
92
+ # Q update with extremality
93
+ inv_f = safe_divide(1.0, fitness_new ** extremality)
94
+ q_denom = (m.T @ inv_f) ** (1.0 / extremality)
95
+ complexity_new = safe_divide(1.0, q_denom)
96
+ mean_q = complexity_new.mean()
97
+ if mean_q > 0:
98
+ complexity_new /= mean_q
99
+
100
+ # Convergence check
101
+ delta_f = np.max(np.abs(fitness_new - fitness)) / (np.max(np.abs(fitness)) + 1e-15)
102
+ delta_q = np.max(np.abs(complexity_new - complexity)) / (np.max(np.abs(complexity)) + 1e-15)
103
+
104
+ fitness = fitness_new
105
+ complexity = complexity_new
106
+
107
+ if delta_f < tol and delta_q < tol:
108
+ converged = True
109
+ break
110
+
111
+ if not converged and (delta_f > 1e-3 or delta_q > 1e-3):
112
+ warnings.warn(
113
+ f"fitness_complexity did not converge within {iterations} "
114
+ f"iterations (final relative change {max(delta_f, delta_q):.1e}); "
115
+ "results may be unstable. Increase `iterations`.",
116
+ RuntimeWarning,
117
+ stacklevel=2,
118
+ )
119
+
120
+ if log_fitness:
121
+ with np.errstate(divide="ignore", invalid="ignore"):
122
+ fitness = np.where(fitness > 0, np.log(fitness), np.nan)
123
+ complexity = np.where(complexity > 0, np.log(complexity), np.nan)
124
+
125
+ if is_df:
126
+ return (
127
+ pd.Series(fitness, index=row_index, name="fitness"),
128
+ pd.Series(complexity, index=col_index, name="complexity"),
129
+ )
130
+ return fitness, complexity