econcomplex 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. econcomplex/__init__.py +220 -0
  2. econcomplex/complexity/__init__.py +23 -0
  3. econcomplex/complexity/eci_pci.py +131 -0
  4. econcomplex/complexity/eigenvector.py +115 -0
  5. econcomplex/complexity/fitness.py +130 -0
  6. econcomplex/complexity/reflections.py +173 -0
  7. econcomplex/complexity/subnational.py +82 -0
  8. econcomplex/core/__init__.py +23 -0
  9. econcomplex/core/diversity.py +125 -0
  10. econcomplex/core/preprocess.py +83 -0
  11. econcomplex/core/rca.py +161 -0
  12. econcomplex/core/utils.py +137 -0
  13. econcomplex/dynamics/__init__.py +10 -0
  14. econcomplex/dynamics/entry_exit.py +248 -0
  15. econcomplex/dynamics/growth.py +146 -0
  16. econcomplex/inequality/__init__.py +11 -0
  17. econcomplex/inequality/concentration.py +148 -0
  18. econcomplex/inequality/gini.py +164 -0
  19. econcomplex/optimization/__init__.py +46 -0
  20. econcomplex/optimization/diffusion.py +379 -0
  21. econcomplex/optimization/growth_target.py +170 -0
  22. econcomplex/optimization/portfolio.py +178 -0
  23. econcomplex/optimization/steppingstone.py +267 -0
  24. econcomplex/outlook/__init__.py +6 -0
  25. econcomplex/outlook/coi_cog.py +168 -0
  26. econcomplex/patents/__init__.py +7 -0
  27. econcomplex/patents/recombination.py +135 -0
  28. econcomplex/pipeline.py +255 -0
  29. econcomplex/productivity/__init__.py +8 -0
  30. econcomplex/productivity/prody.py +218 -0
  31. econcomplex/relatedness/__init__.py +25 -0
  32. econcomplex/relatedness/cooccurrence.py +173 -0
  33. econcomplex/relatedness/cross_space.py +142 -0
  34. econcomplex/relatedness/density.py +232 -0
  35. econcomplex/relatedness/proximity.py +214 -0
  36. econcomplex/specialization/__init__.py +17 -0
  37. econcomplex/specialization/location_quotient.py +163 -0
  38. econcomplex/specialization/similarity.py +68 -0
  39. econcomplex-1.0.0.dist-info/METADATA +223 -0
  40. econcomplex-1.0.0.dist-info/RECORD +43 -0
  41. econcomplex-1.0.0.dist-info/WHEEL +5 -0
  42. econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
  43. econcomplex-1.0.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,218 @@
1
+ """
2
+ Productivity levels of products and regions.
3
+
4
+ References
5
+ ----------
6
+ Hausmann, Hwang & Rodrik (2007) "What you export matters".
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from typing import Union
12
+
13
+ from ..core.utils import validate_matrix, safe_divide
14
+ from ..core.rca import rca as compute_rca
15
+
16
+
17
+ def prody(
18
+ mat: Union[np.ndarray, pd.DataFrame],
19
+ gdp: Union[np.ndarray, pd.Series],
20
+ use_rca: bool = True,
21
+ ) -> Union[pd.Series, np.ndarray]:
22
+ """
23
+ PRODY: Income / productivity level of each activity.
24
+
25
+ PRODY_c = sum_r (LQ_{rc} * GDP_r) / sum_r LQ_{rc}
26
+
27
+ Weighted average of regional GDP, weighted by each region's
28
+ Revealed Comparative Advantage in activity c.
29
+
30
+ Parameters
31
+ ----------
32
+ mat : array-like (R x C)
33
+ Value matrix.
34
+ gdp : array-like (length R)
35
+ GDP (or income) per region.
36
+ use_rca : bool
37
+ Compute RCA internally (True) or use mat as LQ (False).
38
+
39
+ Returns
40
+ -------
41
+ pd.Series indexed by activity.
42
+ """
43
+ is_df = isinstance(mat, pd.DataFrame)
44
+ col_index = mat.columns if is_df else None
45
+
46
+ arr = validate_matrix(mat)
47
+ gdp_arr = np.array(gdp, dtype=float)
48
+
49
+ if len(gdp_arr) != arr.shape[0]:
50
+ raise ValueError("Length of gdp must equal number of rows in mat.")
51
+
52
+ if use_rca:
53
+ lq = compute_rca(arr)
54
+ if isinstance(lq, pd.DataFrame):
55
+ lq = lq.values
56
+ else:
57
+ lq = arr
58
+
59
+ numerator = (lq * gdp_arr[:, None]).sum(axis=0) # C
60
+ denominator = lq.sum(axis=0) # C
61
+ result = safe_divide(numerator, denominator)
62
+
63
+ if is_df:
64
+ return pd.Series(result, index=col_index, name="prody")
65
+ return result
66
+
67
+
68
+ def expy(
69
+ mat: Union[np.ndarray, pd.DataFrame],
70
+ gdp: Union[np.ndarray, pd.Series],
71
+ use_rca: bool = True,
72
+ ) -> Union[pd.Series, np.ndarray]:
73
+ """
74
+ EXPY: Income level of a region's export / activity portfolio.
75
+
76
+ EXPY_r = sum_c (s_{rc} * PRODY_c)
77
+ where s_{rc} = x_{rc} / X_r (share of activity c in region r)
78
+
79
+ Parameters
80
+ ----------
81
+ mat : array-like (R x C)
82
+ Value matrix.
83
+ gdp : array-like (length R)
84
+ GDP (or income) per region.
85
+ use_rca : bool
86
+ Whether to compute RCA for PRODY calculation.
87
+
88
+ Returns
89
+ -------
90
+ pd.Series indexed by region.
91
+ """
92
+ is_df = isinstance(mat, pd.DataFrame)
93
+ row_index = mat.index if is_df else None
94
+
95
+ arr = validate_matrix(mat)
96
+ gdp_arr = np.array(gdp, dtype=float)
97
+
98
+ prody_arr = prody(arr, gdp_arr, use_rca=use_rca)
99
+ if isinstance(prody_arr, pd.Series):
100
+ prody_arr = prody_arr.values
101
+
102
+ row_sums = arr.sum(axis=1, keepdims=True)
103
+ shares = safe_divide(arr, row_sums)
104
+
105
+ result = (shares * prody_arr[None, :]).sum(axis=1)
106
+
107
+ if is_df:
108
+ return pd.Series(result, index=row_index, name="expy")
109
+ return result
110
+
111
+
112
+ def product_gini_index(
113
+ mat: Union[np.ndarray, pd.DataFrame],
114
+ gini_vec: Union[np.ndarray, pd.Series],
115
+ threshold: float = 1.0,
116
+ ) -> Union[pd.Series, np.ndarray]:
117
+ """
118
+ Product Gini Index (PGI): inequality embedded in a product.
119
+
120
+ PGI_c = sum_r (M_{rc} * s_{rc} * Gini_r) / sum_r (M_{rc} * s_{rc})
121
+
122
+ Weighted average of regional Gini coefficients, weighted by
123
+ specialization (RCA) shares.
124
+
125
+ Parameters
126
+ ----------
127
+ mat : array-like (R x C)
128
+ Value matrix.
129
+ gini_vec : array-like (length R)
130
+ Gini coefficient per region.
131
+ threshold : float
132
+ RCA binarization threshold.
133
+
134
+ Returns
135
+ -------
136
+ pd.Series indexed by activity.
137
+ """
138
+ is_df = isinstance(mat, pd.DataFrame)
139
+ col_index = mat.columns if is_df else None
140
+
141
+ arr = validate_matrix(mat)
142
+ gini_arr = np.array(gini_vec, dtype=float)
143
+
144
+ if len(gini_arr) != arr.shape[0]:
145
+ raise ValueError("Length of gini_vec must equal number of rows in mat.")
146
+
147
+ rca_mat = compute_rca(arr)
148
+ if isinstance(rca_mat, pd.DataFrame):
149
+ rca_mat = rca_mat.values
150
+
151
+ m = (rca_mat >= threshold).astype(float)
152
+ row_sums = arr.sum(axis=1, keepdims=True)
153
+ shares = safe_divide(arr, row_sums)
154
+
155
+ weights = m * shares # R x C
156
+ numerator = (weights * gini_arr[:, None]).sum(axis=0)
157
+ denominator = weights.sum(axis=0)
158
+ result = safe_divide(numerator, denominator)
159
+
160
+ if is_df:
161
+ return pd.Series(result, index=col_index, name="pgi")
162
+ return result
163
+
164
+
165
+ def product_emissions_index(
166
+ mat: Union[np.ndarray, pd.DataFrame],
167
+ emissions: Union[np.ndarray, pd.Series],
168
+ threshold: float = 1.0,
169
+ ) -> Union[pd.Series, np.ndarray]:
170
+ """
171
+ Product Emissions Intensity Index (PEII).
172
+
173
+ Same formula as PGI but substitutes emissions intensity for Gini:
174
+ PEII_c = sum_r (M_{rc} * s_{rc} * emissions_r) / sum_r (M_{rc} * s_{rc})
175
+
176
+ Parameters
177
+ ----------
178
+ mat : array-like (R x C)
179
+ Value matrix.
180
+ emissions : array-like (length R)
181
+ Emissions intensity per region.
182
+ threshold : float
183
+ RCA binarization threshold.
184
+
185
+ Returns
186
+ -------
187
+ pd.Series indexed by activity.
188
+ """
189
+ is_df = isinstance(mat, pd.DataFrame)
190
+ col_index = mat.columns if is_df else None
191
+
192
+ arr = validate_matrix(mat)
193
+ em_arr = np.array(emissions, dtype=float)
194
+
195
+ if len(em_arr) != arr.shape[0]:
196
+ raise ValueError("Length of emissions must equal number of rows in mat.")
197
+
198
+ rca_mat = compute_rca(arr)
199
+ if isinstance(rca_mat, pd.DataFrame):
200
+ rca_mat = rca_mat.values
201
+
202
+ m = (rca_mat >= threshold).astype(float)
203
+ row_sums = arr.sum(axis=1, keepdims=True)
204
+ shares = safe_divide(arr, row_sums)
205
+
206
+ weights = m * shares
207
+ numerator = (weights * em_arr[:, None]).sum(axis=0)
208
+ denominator = weights.sum(axis=0)
209
+ result = safe_divide(numerator, denominator)
210
+
211
+ if is_df:
212
+ return pd.Series(result, index=col_index, name="peii")
213
+ return result
214
+
215
+
216
+ # Short aliases matching the documented API
217
+ pgi = product_gini_index
218
+ peii = product_emissions_index
@@ -0,0 +1,25 @@
1
+ from .proximity import proximity, continuous_proximity
2
+ from .density import (
3
+ relatedness_density,
4
+ distance,
5
+ relatedness_density_internal,
6
+ relatedness_density_external,
7
+ relative_relatedness,
8
+ )
9
+ from .cooccurrence import co_occurrence, relatedness_index, z_score_novelty
10
+ from .cross_space import cross_proximity, cross_relatedness
11
+
12
+ __all__ = [
13
+ "proximity",
14
+ "continuous_proximity",
15
+ "relatedness_density",
16
+ "distance",
17
+ "relatedness_density_internal",
18
+ "relatedness_density_external",
19
+ "relative_relatedness",
20
+ "co_occurrence",
21
+ "relatedness_index",
22
+ "z_score_novelty",
23
+ "cross_proximity",
24
+ "cross_relatedness",
25
+ ]
@@ -0,0 +1,173 @@
1
+ """
2
+ Co-occurrence matrices and statistical novelty (z-score).
3
+
4
+ References
5
+ ----------
6
+ Steijn (2017) probability index; Fleming & Sorenson (2001) for patents.
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from typing import Literal, Union
12
+
13
+ from ..core.utils import validate_matrix, safe_divide
14
+
15
+
16
+ def co_occurrence(
17
+ mat: Union[np.ndarray, pd.DataFrame],
18
+ diagonal: bool = False,
19
+ ) -> Union[pd.DataFrame, np.ndarray]:
20
+ """
21
+ Co-occurrence matrix: number of times two activities appear together
22
+ across regions/events.
23
+
24
+ Cooc = M * M^T (if mat is events × activities)
25
+ = M^T * M (if mat is regions × activities — activity co-occurrence)
26
+
27
+ Here we compute the *activity-by-activity* co-occurrence:
28
+ Cooc_{cc'} = sum_r M_{rc} * M_{rc'}
29
+
30
+ Parameters
31
+ ----------
32
+ mat : array-like (R x C)
33
+ Binary (or value) incidence matrix.
34
+ diagonal : bool
35
+ If False (default), set diagonal to 0.
36
+
37
+ Returns
38
+ -------
39
+ C x C co-occurrence matrix.
40
+ """
41
+ is_df = isinstance(mat, pd.DataFrame)
42
+ col_index = mat.columns if is_df else None
43
+
44
+ arr = validate_matrix(mat)
45
+ cooc = arr.T @ arr
46
+
47
+ if not diagonal:
48
+ np.fill_diagonal(cooc, 0.0)
49
+
50
+ if is_df:
51
+ return pd.DataFrame(cooc, index=col_index, columns=col_index)
52
+ return cooc
53
+
54
+
55
+ def relatedness_index(
56
+ mat: Union[np.ndarray, pd.DataFrame],
57
+ method: Literal["probability", "association", "cosine", "jaccard"] = "cosine",
58
+ diagonal: bool = False,
59
+ ) -> Union[pd.DataFrame, np.ndarray]:
60
+ """
61
+ Pairwise relatedness index between activities from co-occurrence.
62
+
63
+ Methods
64
+ -------
65
+ "probability" (Steijn 2017):
66
+ SM_{ij} = C_{ij} / (((S_i/T)(S_j/(T-S_i)) + (S_j/T)(S_i/(T-S_j))) * T/2)
67
+
68
+ "association" (association strength):
69
+ SA_{ij} = (C_{ij}/T) / ((S_i/T)(S_j/T))
70
+
71
+ "cosine":
72
+ SC_{ij} = C_{ij} / sqrt(S_i * S_j)
73
+
74
+ "jaccard":
75
+ SJ_{ij} = C_{ij} / (S_i + S_j - C_{ij})
76
+
77
+ Parameters
78
+ ----------
79
+ mat : array-like (R x C)
80
+ Binary incidence matrix.
81
+ method : str
82
+ Normalization method.
83
+
84
+ Returns
85
+ -------
86
+ C x C normalized relatedness matrix.
87
+ """
88
+ is_df = isinstance(mat, pd.DataFrame)
89
+ col_index = mat.columns if is_df else None
90
+
91
+ arr = validate_matrix(mat)
92
+ cooc = arr.T @ arr # C x C
93
+ n_events = arr.shape[0] # T = number of regions/events
94
+ s = cooc.diagonal().copy() # S_i = total occurrences of activity i
95
+
96
+ if method == "cosine":
97
+ denom = np.sqrt(s[:, None] * s[None, :])
98
+ result = safe_divide(cooc, denom)
99
+
100
+ elif method == "jaccard":
101
+ denom = s[:, None] + s[None, :] - cooc
102
+ result = safe_divide(cooc, denom)
103
+
104
+ elif method == "association":
105
+ p_i = s / n_events
106
+ expected = p_i[:, None] * p_i[None, :]
107
+ result = safe_divide(cooc / n_events, expected)
108
+
109
+ elif method == "probability":
110
+ T = n_events
111
+ si = s.astype(float)
112
+ # expected = ((si/T)(sj/(T-si)) + (sj/T)(si/(T-sj))) * T/2
113
+ t1 = safe_divide(si[:, None] * si[None, :], T * (T - si[:, None]))
114
+ t2 = safe_divide(si[None, :] * si[:, None], T * (T - si[None, :]))
115
+ expected = (t1 + t2) * T / 2
116
+ result = safe_divide(cooc, expected)
117
+
118
+ else:
119
+ raise ValueError("method must be 'probability', 'association', 'cosine', or 'jaccard'.")
120
+
121
+ if not diagonal:
122
+ np.fill_diagonal(result, 0.0)
123
+
124
+ if is_df:
125
+ return pd.DataFrame(result, index=col_index, columns=col_index)
126
+ return result
127
+
128
+
129
+ def z_score_novelty(
130
+ incidence: Union[np.ndarray, pd.DataFrame],
131
+ ) -> Union[pd.DataFrame, np.ndarray]:
132
+ """
133
+ Z-score of technological novelty (atypicality of co-occurrence).
134
+
135
+ Measures how much each pair of technologies co-occurs more or less
136
+ than expected by chance.
137
+
138
+ z_{ij} = (C_{ij} - mu_{ij}) / sigma_{ij}
139
+ where
140
+ mu_{ij} = n_i * n_j / P
141
+ sigma_{ij} = sqrt(mu_{ij} * (1 - n_i/P) * (P - n_j)/(P-1))
142
+
143
+ Parameters
144
+ ----------
145
+ incidence : array-like (P x T)
146
+ Patent × technology (or event × activity) incidence matrix.
147
+
148
+ Returns
149
+ -------
150
+ T x T z-score matrix.
151
+ """
152
+ is_df = isinstance(incidence, pd.DataFrame)
153
+ col_index = incidence.columns if is_df else None
154
+
155
+ arr = validate_matrix(incidence)
156
+ P = arr.shape[0] # number of patents/events
157
+
158
+ n = arr.sum(axis=0) # occurrences per technology (T,)
159
+ cooc = arr.T @ arr # T x T
160
+
161
+ mu = (n[:, None] * n[None, :]) / P # expected co-occurrence
162
+
163
+ variance = mu * (1 - n[:, None] / P) * safe_divide(
164
+ P - n[None, :], P - 1
165
+ )
166
+ sigma = np.sqrt(np.maximum(variance, 0))
167
+
168
+ z = safe_divide(cooc - mu, sigma)
169
+ np.fill_diagonal(z, 0.0)
170
+
171
+ if is_df:
172
+ return pd.DataFrame(z, index=col_index, columns=col_index)
173
+ return z
@@ -0,0 +1,142 @@
1
+ """
2
+ Cross-space proximity and relatedness between two different activity spaces.
3
+
4
+ References
5
+ ----------
6
+ Catalan et al. (2020) "Cross-space proximity and relatedness".
7
+ """
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+ from typing import Union
12
+
13
+ from ..core.utils import validate_matrix, safe_divide, binarize
14
+ from ..core.rca import rca as compute_rca
15
+
16
+
17
+ def cross_proximity(
18
+ mat_a: Union[np.ndarray, pd.DataFrame],
19
+ mat_b: Union[np.ndarray, pd.DataFrame],
20
+ use_rca: bool = True,
21
+ threshold: float = 1.0,
22
+ ) -> Union[pd.DataFrame, np.ndarray]:
23
+ """
24
+ Cross-space proximity between activity space A and activity space B.
25
+
26
+ X_phi_{ij} = min(C_{ij}/U_j^B, C_{ij}/U_i^A)
27
+ where C_{ij} = (M_A^T * M_B)_{ij} = co-presence count
28
+
29
+ Parameters
30
+ ----------
31
+ mat_a : array-like (R x A)
32
+ Region × activity-A matrix.
33
+ mat_b : array-like (R x B)
34
+ Region × activity-B matrix.
35
+ use_rca : bool
36
+ Compute RCA before binarizing for both matrices.
37
+ threshold : float
38
+ Binarization threshold.
39
+
40
+ Returns
41
+ -------
42
+ A x B cross-proximity matrix.
43
+ """
44
+ is_df_a = isinstance(mat_a, pd.DataFrame)
45
+ is_df_b = isinstance(mat_b, pd.DataFrame)
46
+ col_a = mat_a.columns if is_df_a else None
47
+ col_b = mat_b.columns if is_df_b else None
48
+
49
+ arr_a = validate_matrix(mat_a)
50
+ arr_b = validate_matrix(mat_b)
51
+
52
+ if arr_a.shape[0] != arr_b.shape[0]:
53
+ raise ValueError("mat_a and mat_b must have the same number of rows (locations).")
54
+
55
+ if use_rca:
56
+ m_a = binarize(compute_rca(arr_a), threshold)
57
+ m_b = binarize(compute_rca(arr_b), threshold)
58
+ else:
59
+ m_a = binarize(arr_a, threshold)
60
+ m_b = binarize(arr_b, threshold)
61
+
62
+ # Co-presence: A x B
63
+ c = m_a.T @ m_b # (A x R) @ (R x B) = A x B
64
+
65
+ ub_a = m_a.sum(axis=0) # A
66
+ ub_b = m_b.sum(axis=0) # B
67
+
68
+ # min(C/U_b, C/U_a) = C / max(U_a, U_b)
69
+ denom = np.maximum(ub_a[:, None], ub_b[None, :])
70
+ result = safe_divide(c, denom)
71
+
72
+ if is_df_a and is_df_b:
73
+ return pd.DataFrame(result, index=col_a, columns=col_b)
74
+ return result
75
+
76
+
77
+ def cross_relatedness(
78
+ mat_a: Union[np.ndarray, pd.DataFrame],
79
+ x_phi: Union[np.ndarray, pd.DataFrame],
80
+ use_rca: bool = True,
81
+ threshold: float = 1.0,
82
+ ) -> Union[pd.DataFrame, np.ndarray]:
83
+ """
84
+ Cross-space relatedness density.
85
+
86
+ x_density_{rb} = (M_A * X_phi)_{rb} / sum_a X_phi_{ab}
87
+
88
+ Relatedness of region r to each activity b of space B, given the
89
+ region's portfolio in space A (the fraction of space-A activities
90
+ related to b that the region already holds).
91
+
92
+ Parameters
93
+ ----------
94
+ mat_a : array-like (R x A)
95
+ Region × activity-A value matrix.
96
+ x_phi : array-like (A x B)
97
+ Cross-proximity matrix from `cross_proximity`.
98
+ use_rca : bool
99
+ Compute RCA before binarizing mat_a.
100
+ threshold : float
101
+ Binarization threshold.
102
+
103
+ Returns
104
+ -------
105
+ R x B cross-relatedness matrix (columns = activities of space B).
106
+ """
107
+ is_df = isinstance(mat_a, pd.DataFrame)
108
+ row_index = mat_a.index if is_df else None
109
+ col_b = x_phi.columns if isinstance(x_phi, pd.DataFrame) else None
110
+
111
+ arr_a = validate_matrix(mat_a)
112
+ phi_arr = (
113
+ x_phi.values if isinstance(x_phi, pd.DataFrame) else np.array(x_phi, dtype=float)
114
+ )
115
+
116
+ if arr_a.shape[1] != phi_arr.shape[0]:
117
+ raise ValueError(
118
+ "x_phi must have one row per activity of mat_a "
119
+ f"(mat_a has {arr_a.shape[1]} activities, x_phi has "
120
+ f"{phi_arr.shape[0]} rows)."
121
+ )
122
+
123
+ if use_rca:
124
+ m_a = binarize(compute_rca(arr_a), threshold)
125
+ else:
126
+ m_a = binarize(arr_a, threshold)
127
+
128
+ # numerator: R x B
129
+ numerator = m_a @ phi_arr
130
+
131
+ # denominator: 1 x B (sum of each column of x_phi)
132
+ denom = phi_arr.sum(axis=0, keepdims=True)
133
+
134
+ result = safe_divide(numerator, denom)
135
+
136
+ if is_df:
137
+ return pd.DataFrame(result, index=row_index, columns=col_b)
138
+ return result
139
+
140
+
141
+ # Short alias matching the documented API
142
+ cross_space_proximity = cross_proximity