econcomplex 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- econcomplex/__init__.py +220 -0
- econcomplex/complexity/__init__.py +23 -0
- econcomplex/complexity/eci_pci.py +131 -0
- econcomplex/complexity/eigenvector.py +115 -0
- econcomplex/complexity/fitness.py +130 -0
- econcomplex/complexity/reflections.py +173 -0
- econcomplex/complexity/subnational.py +82 -0
- econcomplex/core/__init__.py +23 -0
- econcomplex/core/diversity.py +125 -0
- econcomplex/core/preprocess.py +83 -0
- econcomplex/core/rca.py +161 -0
- econcomplex/core/utils.py +137 -0
- econcomplex/dynamics/__init__.py +10 -0
- econcomplex/dynamics/entry_exit.py +248 -0
- econcomplex/dynamics/growth.py +146 -0
- econcomplex/inequality/__init__.py +11 -0
- econcomplex/inequality/concentration.py +148 -0
- econcomplex/inequality/gini.py +164 -0
- econcomplex/optimization/__init__.py +46 -0
- econcomplex/optimization/diffusion.py +379 -0
- econcomplex/optimization/growth_target.py +170 -0
- econcomplex/optimization/portfolio.py +178 -0
- econcomplex/optimization/steppingstone.py +267 -0
- econcomplex/outlook/__init__.py +6 -0
- econcomplex/outlook/coi_cog.py +168 -0
- econcomplex/patents/__init__.py +7 -0
- econcomplex/patents/recombination.py +135 -0
- econcomplex/pipeline.py +255 -0
- econcomplex/productivity/__init__.py +8 -0
- econcomplex/productivity/prody.py +218 -0
- econcomplex/relatedness/__init__.py +25 -0
- econcomplex/relatedness/cooccurrence.py +173 -0
- econcomplex/relatedness/cross_space.py +142 -0
- econcomplex/relatedness/density.py +232 -0
- econcomplex/relatedness/proximity.py +214 -0
- econcomplex/specialization/__init__.py +17 -0
- econcomplex/specialization/location_quotient.py +163 -0
- econcomplex/specialization/similarity.py +68 -0
- econcomplex-1.0.0.dist-info/METADATA +223 -0
- econcomplex-1.0.0.dist-info/RECORD +43 -0
- econcomplex-1.0.0.dist-info/WHEEL +5 -0
- econcomplex-1.0.0.dist-info/licenses/LICENSE +22 -0
- econcomplex-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Complexity Outlook Index (COI) and Complexity Outlook Gain (COG).
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Hausmann & Hidalgo (2011) "The Atlas of Economic Complexity".
|
|
7
|
+
Hidalgo et al. (2007) "The Product Space Conditions the Development of Nations".
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from typing import Optional, Union
|
|
13
|
+
|
|
14
|
+
from ..core.utils import validate_matrix, safe_divide, binarize
|
|
15
|
+
from ..core.rca import rca as compute_rca
|
|
16
|
+
from ..relatedness.density import relatedness_density
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def complexity_outlook_index(
|
|
20
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
21
|
+
pci: Union[np.ndarray, pd.Series],
|
|
22
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
23
|
+
use_rca: bool = True,
|
|
24
|
+
threshold: float = 1.0,
|
|
25
|
+
proximity_method: str = "max",
|
|
26
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
27
|
+
"""
|
|
28
|
+
Complexity Outlook Index (COI).
|
|
29
|
+
|
|
30
|
+
COI_r = sum_c (density_{rc} * (1 - M_{rc}) * PCI_c)
|
|
31
|
+
|
|
32
|
+
Measures a region's potential to diversify into complex activities
|
|
33
|
+
it does not yet have, weighted by how related those activities are
|
|
34
|
+
to its current portfolio.
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
mat : array-like (R x C)
|
|
39
|
+
Value matrix.
|
|
40
|
+
pci : array-like (length C)
|
|
41
|
+
Product / activity complexity index.
|
|
42
|
+
phi : array-like (C x C), optional
|
|
43
|
+
Pre-computed proximity matrix. Computed if None.
|
|
44
|
+
use_rca : bool
|
|
45
|
+
Compute RCA before binarizing.
|
|
46
|
+
threshold : float
|
|
47
|
+
Binarization threshold.
|
|
48
|
+
proximity_method : str
|
|
49
|
+
Proximity normalization ('max', 'sqrt', 'min').
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
pd.Series indexed by region.
|
|
54
|
+
"""
|
|
55
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
56
|
+
row_index = mat.index if is_df else None
|
|
57
|
+
|
|
58
|
+
arr = validate_matrix(mat)
|
|
59
|
+
pci_arr = np.array(pci, dtype=float)
|
|
60
|
+
|
|
61
|
+
if use_rca:
|
|
62
|
+
m = binarize(compute_rca(arr), threshold)
|
|
63
|
+
else:
|
|
64
|
+
m = binarize(arr, threshold)
|
|
65
|
+
|
|
66
|
+
dens = relatedness_density(m, phi=phi, use_rca=False, threshold=0.5,
|
|
67
|
+
proximity_method=proximity_method)
|
|
68
|
+
if isinstance(dens, pd.DataFrame):
|
|
69
|
+
dens = dens.values
|
|
70
|
+
|
|
71
|
+
# COI: sum over non-present activities
|
|
72
|
+
coi = ((dens / 100) * (1 - m) * pci_arr[None, :]).sum(axis=1)
|
|
73
|
+
|
|
74
|
+
if is_df:
|
|
75
|
+
return pd.Series(coi, index=row_index, name="coi")
|
|
76
|
+
return coi
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def complexity_outlook_gain(
|
|
80
|
+
mat: Union[np.ndarray, pd.DataFrame],
|
|
81
|
+
pci: Union[np.ndarray, pd.Series],
|
|
82
|
+
phi: Optional[Union[np.ndarray, pd.DataFrame]] = None,
|
|
83
|
+
use_rca: bool = True,
|
|
84
|
+
threshold: float = 1.0,
|
|
85
|
+
proximity_method: str = "max",
|
|
86
|
+
) -> Union[pd.DataFrame, np.ndarray]:
|
|
87
|
+
"""
|
|
88
|
+
Complexity Outlook Gain (COG).
|
|
89
|
+
|
|
90
|
+
COG_{rc} = (1 - M_{rc}) * sum_{c'} (
|
|
91
|
+
(1 - M_{rc'}) * phi_{cc'} * PCI_{c'} / sum_{c''} phi_{cc''}
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
Gain in COI if region r were to develop activity c.
|
|
95
|
+
|
|
96
|
+
Parameters
|
|
97
|
+
----------
|
|
98
|
+
mat : array-like (R x C)
|
|
99
|
+
Value matrix.
|
|
100
|
+
pci : array-like (length C)
|
|
101
|
+
Activity complexity index.
|
|
102
|
+
phi : array-like (C x C), optional
|
|
103
|
+
Pre-computed proximity matrix.
|
|
104
|
+
use_rca : bool
|
|
105
|
+
Compute RCA before binarizing.
|
|
106
|
+
threshold : float
|
|
107
|
+
Binarization threshold.
|
|
108
|
+
proximity_method : str
|
|
109
|
+
Proximity normalization method.
|
|
110
|
+
|
|
111
|
+
Returns
|
|
112
|
+
-------
|
|
113
|
+
R x C COG matrix.
|
|
114
|
+
"""
|
|
115
|
+
is_df = isinstance(mat, pd.DataFrame)
|
|
116
|
+
row_index = mat.index if is_df else None
|
|
117
|
+
col_index = mat.columns if is_df else None
|
|
118
|
+
|
|
119
|
+
arr = validate_matrix(mat)
|
|
120
|
+
pci_arr = np.array(pci, dtype=float)
|
|
121
|
+
|
|
122
|
+
if use_rca:
|
|
123
|
+
m = binarize(compute_rca(arr), threshold)
|
|
124
|
+
else:
|
|
125
|
+
m = binarize(arr, threshold)
|
|
126
|
+
|
|
127
|
+
if phi is None:
|
|
128
|
+
from ..relatedness.proximity import proximity as _prox
|
|
129
|
+
phi_dict = _prox(m, use_rca=False, threshold=0.5,
|
|
130
|
+
method=proximity_method, compute="product")
|
|
131
|
+
phi_arr = (
|
|
132
|
+
phi_dict["product"].values
|
|
133
|
+
if isinstance(phi_dict["product"], pd.DataFrame)
|
|
134
|
+
else phi_dict["product"]
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
phi_arr = phi.values if isinstance(phi, pd.DataFrame) else np.array(phi, dtype=float)
|
|
138
|
+
|
|
139
|
+
# phi_norm_{cc'} = phi_{cc'} / sum_{c''} phi_{cc''}
|
|
140
|
+
phi_row_sums = phi_arr.sum(axis=1, keepdims=True)
|
|
141
|
+
phi_norm = safe_divide(phi_arr, phi_row_sums) # C x C
|
|
142
|
+
|
|
143
|
+
# For each region r: weighted PCI of non-present activities
|
|
144
|
+
# inner = sum_{c'} (1 - M_{rc'}) * phi_norm_{cc'} * PCI_{c'}
|
|
145
|
+
# Shape: R x C
|
|
146
|
+
# (1 - m): R x C
|
|
147
|
+
# phi_norm * pci: sum over c' of phi_norm[c, c'] * pci[c'] = (phi_norm @ pci): C
|
|
148
|
+
# But conditioned on (1 - m): need element-wise
|
|
149
|
+
|
|
150
|
+
cog = np.zeros_like(m)
|
|
151
|
+
phi_pci = phi_norm * pci_arr[None, :] # C x C, entry [c, c'] = phi_norm[c,c'] * PCI[c']
|
|
152
|
+
|
|
153
|
+
for r in range(m.shape[0]):
|
|
154
|
+
not_present = (1 - m[r]) # C
|
|
155
|
+
# For each target activity c: sum_{c'} (1 - m[r,c']) * phi_norm[c, c'] * PCI[c']
|
|
156
|
+
cog[r] = not_present * (phi_pci @ not_present)
|
|
157
|
+
|
|
158
|
+
# Mask to non-present only
|
|
159
|
+
cog = cog * (1 - m)
|
|
160
|
+
|
|
161
|
+
if is_df:
|
|
162
|
+
return pd.DataFrame(cog, index=row_index, columns=col_index)
|
|
163
|
+
return cog
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
# Short aliases matching the documented API
|
|
167
|
+
coi = complexity_outlook_index
|
|
168
|
+
cog = complexity_outlook_gain
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Patent-based recombination complexity.
|
|
3
|
+
|
|
4
|
+
References
|
|
5
|
+
----------
|
|
6
|
+
Fleming & Sorenson (2001) "Technology as a Complex Adaptive System:
|
|
7
|
+
Evidence from Patent Data".
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import pandas as pd
|
|
12
|
+
from typing import Union
|
|
13
|
+
|
|
14
|
+
from ..core.utils import validate_matrix, safe_divide
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def ease_of_recombination(
|
|
18
|
+
incidence: Union[np.ndarray, pd.DataFrame],
|
|
19
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
20
|
+
"""
|
|
21
|
+
Ease of Recombination (EOR) for each technology/class.
|
|
22
|
+
|
|
23
|
+
EOR_t = (number of patents in which t co-occurs with any other tech)
|
|
24
|
+
/ (total number of patents containing t)
|
|
25
|
+
|
|
26
|
+
A high EOR means the technology is easily combined with others.
|
|
27
|
+
|
|
28
|
+
Parameters
|
|
29
|
+
----------
|
|
30
|
+
incidence : array-like (P x T)
|
|
31
|
+
Patent × technology binary incidence matrix.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
pd.Series indexed by technology.
|
|
36
|
+
"""
|
|
37
|
+
is_df = isinstance(incidence, pd.DataFrame)
|
|
38
|
+
col_index = incidence.columns if is_df else None
|
|
39
|
+
|
|
40
|
+
arr = validate_matrix(incidence)
|
|
41
|
+
|
|
42
|
+
# For each patent, does it contain more than one technology?
|
|
43
|
+
patents_with_multiple = (arr.sum(axis=1) > 1).astype(float) # P
|
|
44
|
+
|
|
45
|
+
# For each technology: number of patents where it appears with others
|
|
46
|
+
numerator = arr.T @ patents_with_multiple # T
|
|
47
|
+
|
|
48
|
+
# Total patents containing each technology
|
|
49
|
+
denominator = arr.sum(axis=0) # T
|
|
50
|
+
|
|
51
|
+
result = safe_divide(numerator, denominator)
|
|
52
|
+
|
|
53
|
+
if is_df:
|
|
54
|
+
return pd.Series(result, index=col_index, name="ease_of_recombination")
|
|
55
|
+
return result
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def modular_complexity(
|
|
59
|
+
incidence: Union[np.ndarray, pd.DataFrame],
|
|
60
|
+
eor: Union[np.ndarray, pd.Series, None] = None,
|
|
61
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
62
|
+
"""
|
|
63
|
+
Modular Complexity of each patent.
|
|
64
|
+
|
|
65
|
+
MC_p = (number of technologies in patent p)
|
|
66
|
+
/ (sum of EOR for each technology in patent p)
|
|
67
|
+
|
|
68
|
+
Higher MC = technologies are harder to recombine = more complex patent.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
incidence : array-like (P x T)
|
|
73
|
+
Patent × technology binary incidence matrix.
|
|
74
|
+
eor : array-like (length T), optional
|
|
75
|
+
Pre-computed EOR vector. Computed internally if None.
|
|
76
|
+
|
|
77
|
+
Returns
|
|
78
|
+
-------
|
|
79
|
+
pd.Series indexed by patent.
|
|
80
|
+
"""
|
|
81
|
+
is_df = isinstance(incidence, pd.DataFrame)
|
|
82
|
+
row_index = incidence.index if is_df else None
|
|
83
|
+
|
|
84
|
+
arr = validate_matrix(incidence)
|
|
85
|
+
|
|
86
|
+
if eor is None:
|
|
87
|
+
eor_arr = ease_of_recombination(arr)
|
|
88
|
+
if isinstance(eor_arr, pd.Series):
|
|
89
|
+
eor_arr = eor_arr.values
|
|
90
|
+
else:
|
|
91
|
+
eor_arr = np.array(eor, dtype=float)
|
|
92
|
+
|
|
93
|
+
# number of technologies per patent
|
|
94
|
+
n_techs = arr.sum(axis=1) # P
|
|
95
|
+
|
|
96
|
+
# sum of EOR for technologies present in each patent
|
|
97
|
+
eor_sum = arr @ eor_arr # P
|
|
98
|
+
|
|
99
|
+
result = safe_divide(n_techs, eor_sum)
|
|
100
|
+
|
|
101
|
+
if is_df:
|
|
102
|
+
return pd.Series(result, index=row_index, name="modular_complexity")
|
|
103
|
+
return result
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def modular_complexity_avg(
|
|
107
|
+
incidence: Union[np.ndarray, pd.DataFrame],
|
|
108
|
+
eor: Union[np.ndarray, pd.Series, None] = None,
|
|
109
|
+
) -> Union[pd.Series, np.ndarray]:
|
|
110
|
+
"""
|
|
111
|
+
Average Modular Complexity aggregated per technology (column).
|
|
112
|
+
|
|
113
|
+
avg_MC_t = mean(MC_p for all patents p containing technology t)
|
|
114
|
+
|
|
115
|
+
Returns
|
|
116
|
+
-------
|
|
117
|
+
pd.Series indexed by technology.
|
|
118
|
+
"""
|
|
119
|
+
is_df = isinstance(incidence, pd.DataFrame)
|
|
120
|
+
col_index = incidence.columns if is_df else None
|
|
121
|
+
|
|
122
|
+
arr = validate_matrix(incidence)
|
|
123
|
+
mc = modular_complexity(arr, eor=eor)
|
|
124
|
+
if isinstance(mc, pd.Series):
|
|
125
|
+
mc_arr = mc.values
|
|
126
|
+
else:
|
|
127
|
+
mc_arr = mc
|
|
128
|
+
|
|
129
|
+
col_sums = arr.sum(axis=0)
|
|
130
|
+
numerator = arr.T @ mc_arr
|
|
131
|
+
result = safe_divide(numerator, col_sums)
|
|
132
|
+
|
|
133
|
+
if is_df:
|
|
134
|
+
return pd.Series(result, index=col_index, name="modular_complexity_avg")
|
|
135
|
+
return result
|
econcomplex/pipeline.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""
|
|
2
|
+
High-level pipeline: compute all complexity indicators in one call.
|
|
3
|
+
|
|
4
|
+
Mirrors the API of py-ecomplexity but with richer output and support
|
|
5
|
+
for all indicator methods.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
from typing import Dict, Literal, Optional
|
|
11
|
+
|
|
12
|
+
import numpy as np
|
|
13
|
+
import pandas as pd
|
|
14
|
+
|
|
15
|
+
from .core.rca import rca, mcp
|
|
16
|
+
from .complexity.eci_pci import eci_pci
|
|
17
|
+
from .relatedness.proximity import proximity, continuous_proximity
|
|
18
|
+
from .relatedness.density import relatedness_density
|
|
19
|
+
from .outlook.coi_cog import complexity_outlook_index, complexity_outlook_gain
|
|
20
|
+
from .core.utils import pivot_to_matrix
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def compute_complexity(
|
|
24
|
+
data: pd.DataFrame,
|
|
25
|
+
cols: Dict[str, str],
|
|
26
|
+
*,
|
|
27
|
+
method: Literal["eigenvector", "reflections", "fitness"] = "eigenvector",
|
|
28
|
+
presence_test: Literal["rca", "rpop", "both", "manual"] = "rca",
|
|
29
|
+
threshold: float = 1.0,
|
|
30
|
+
iterations: int = 20,
|
|
31
|
+
proximity_type: Literal["discrete", "continuous"] = "discrete",
|
|
32
|
+
proximity_method: Literal["max", "sqrt", "min"] = "max",
|
|
33
|
+
pop: Optional[pd.DataFrame] = None,
|
|
34
|
+
compute_coi_cog: bool = True,
|
|
35
|
+
time_col: Optional[str] = None,
|
|
36
|
+
) -> pd.DataFrame:
|
|
37
|
+
"""
|
|
38
|
+
Compute a full suite of economic complexity indicators.
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
data : pd.DataFrame
|
|
43
|
+
Long-format DataFrame.
|
|
44
|
+
cols : dict
|
|
45
|
+
Mapping with keys:
|
|
46
|
+
'loc' → column name for locations/regions
|
|
47
|
+
'act' → column name for activities/products
|
|
48
|
+
'val' → column name for values
|
|
49
|
+
Optionally:
|
|
50
|
+
'time' → column name for time periods (enables panel mode)
|
|
51
|
+
method : str
|
|
52
|
+
ECI/PCI method: 'eigenvector', 'reflections', or 'fitness'.
|
|
53
|
+
presence_test : str
|
|
54
|
+
How to binarize: 'rca', 'rpop', 'both', 'manual'.
|
|
55
|
+
threshold : float
|
|
56
|
+
Binarization threshold (default 1.0).
|
|
57
|
+
iterations : int
|
|
58
|
+
Iterations for reflections/fitness methods.
|
|
59
|
+
proximity_type : str
|
|
60
|
+
'discrete' (co-occurrence) or 'continuous' (correlation).
|
|
61
|
+
proximity_method : str
|
|
62
|
+
Proximity normalization: 'max', 'sqrt', or 'min'.
|
|
63
|
+
pop : pd.DataFrame, optional
|
|
64
|
+
Population data with columns ['loc', 'time'?, 'pop'].
|
|
65
|
+
Required when presence_test is 'rpop' or 'both'.
|
|
66
|
+
compute_coi_cog : bool
|
|
67
|
+
Whether to compute COI and COG (slower).
|
|
68
|
+
time_col : str, optional
|
|
69
|
+
Override for time column name (or use cols['time']).
|
|
70
|
+
|
|
71
|
+
Returns
|
|
72
|
+
-------
|
|
73
|
+
Original DataFrame with new columns:
|
|
74
|
+
rca, mcp, diversity, ubiquity, eci, pci, density, distance, [coi, cog]
|
|
75
|
+
[fitness, complexity] (when method='fitness')
|
|
76
|
+
"""
|
|
77
|
+
loc_col = cols.get("loc") or cols.get("location")
|
|
78
|
+
act_col = cols.get("act") or cols.get("activity") or cols.get("product")
|
|
79
|
+
val_col = cols.get("val") or cols.get("value")
|
|
80
|
+
t_col = time_col or cols.get("time")
|
|
81
|
+
|
|
82
|
+
if not all([loc_col, act_col, val_col]):
|
|
83
|
+
raise ValueError("cols must contain 'loc', 'act', and 'val' keys.")
|
|
84
|
+
|
|
85
|
+
if t_col and t_col in data.columns:
|
|
86
|
+
periods = data[t_col].unique()
|
|
87
|
+
results = []
|
|
88
|
+
for period in sorted(periods):
|
|
89
|
+
period_data = data[data[t_col] == period].copy()
|
|
90
|
+
pop_period = None
|
|
91
|
+
if pop is not None:
|
|
92
|
+
if t_col in pop.columns:
|
|
93
|
+
pop_period = pop[pop[t_col] == period]
|
|
94
|
+
else:
|
|
95
|
+
pop_period = pop
|
|
96
|
+
period_result = _compute_single_period(
|
|
97
|
+
period_data, loc_col, act_col, val_col,
|
|
98
|
+
method=method,
|
|
99
|
+
presence_test=presence_test,
|
|
100
|
+
threshold=threshold,
|
|
101
|
+
iterations=iterations,
|
|
102
|
+
proximity_type=proximity_type,
|
|
103
|
+
proximity_method=proximity_method,
|
|
104
|
+
pop=pop_period,
|
|
105
|
+
compute_coi_cog=compute_coi_cog,
|
|
106
|
+
)
|
|
107
|
+
results.append(period_result)
|
|
108
|
+
return pd.concat(results, ignore_index=True)
|
|
109
|
+
else:
|
|
110
|
+
return _compute_single_period(
|
|
111
|
+
data, loc_col, act_col, val_col,
|
|
112
|
+
method=method,
|
|
113
|
+
presence_test=presence_test,
|
|
114
|
+
threshold=threshold,
|
|
115
|
+
iterations=iterations,
|
|
116
|
+
proximity_type=proximity_type,
|
|
117
|
+
proximity_method=proximity_method,
|
|
118
|
+
pop=pop,
|
|
119
|
+
compute_coi_cog=compute_coi_cog,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _compute_single_period(
|
|
124
|
+
data: pd.DataFrame,
|
|
125
|
+
loc_col: str,
|
|
126
|
+
act_col: str,
|
|
127
|
+
val_col: str,
|
|
128
|
+
*,
|
|
129
|
+
method: str,
|
|
130
|
+
presence_test: str,
|
|
131
|
+
threshold: float,
|
|
132
|
+
iterations: int,
|
|
133
|
+
proximity_type: str,
|
|
134
|
+
proximity_method: str,
|
|
135
|
+
pop: Optional[pd.DataFrame],
|
|
136
|
+
compute_coi_cog: bool,
|
|
137
|
+
) -> pd.DataFrame:
|
|
138
|
+
"""Compute indicators for a single time period."""
|
|
139
|
+
|
|
140
|
+
# --- Build pivot matrix ---
|
|
141
|
+
mat = pivot_to_matrix(data, index=loc_col, columns=act_col, values=val_col)
|
|
142
|
+
|
|
143
|
+
# --- RCA ---
|
|
144
|
+
rca_mat = rca(mat)
|
|
145
|
+
|
|
146
|
+
# --- Population for RPOP ---
|
|
147
|
+
pop_vec = None
|
|
148
|
+
if presence_test in ("rpop", "both") and pop is not None:
|
|
149
|
+
pop_col = [c for c in pop.columns if c not in (loc_col,)][0]
|
|
150
|
+
pop_aligned = pop.set_index(loc_col).reindex(mat.index)[pop_col]
|
|
151
|
+
pop_vec = pop_aligned.values
|
|
152
|
+
|
|
153
|
+
# --- Mcp binary matrix ---
|
|
154
|
+
mcp_mat = mcp(
|
|
155
|
+
mat,
|
|
156
|
+
presence_test=presence_test,
|
|
157
|
+
rca_threshold=threshold,
|
|
158
|
+
rpop_threshold=threshold,
|
|
159
|
+
pop=pop_vec,
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# --- Diversity & Ubiquity ---
|
|
163
|
+
div = mcp_mat.sum(axis=1)
|
|
164
|
+
ubi = mcp_mat.sum(axis=0)
|
|
165
|
+
|
|
166
|
+
# --- ECI / PCI ---
|
|
167
|
+
# Routed through the eci_pci dispatcher: validates the method and
|
|
168
|
+
# pre-trims degenerate (zero diversity/ubiquity) units, returned as NaN.
|
|
169
|
+
eci_s, pci_s = eci_pci(
|
|
170
|
+
mat, use_rca=True, threshold=threshold, method=method,
|
|
171
|
+
iterations=iterations if method != "eigenvector" else None,
|
|
172
|
+
)
|
|
173
|
+
if method == "fitness":
|
|
174
|
+
eci_s.name = "eci"
|
|
175
|
+
pci_s.name = "pci"
|
|
176
|
+
|
|
177
|
+
# --- Proximity ---
|
|
178
|
+
if proximity_type == "continuous":
|
|
179
|
+
phi = continuous_proximity(rca_mat)
|
|
180
|
+
else:
|
|
181
|
+
phi_dict = proximity(mcp_mat, use_rca=False, threshold=0.5,
|
|
182
|
+
method=proximity_method, compute="product")
|
|
183
|
+
phi = phi_dict["product"]
|
|
184
|
+
|
|
185
|
+
# --- Density & Distance ---
|
|
186
|
+
dens_mat = relatedness_density(mcp_mat, phi=phi, use_rca=False, threshold=0.5)
|
|
187
|
+
dist_mat = 1 - dens_mat / 100
|
|
188
|
+
|
|
189
|
+
# --- COI / COG (optional) ---
|
|
190
|
+
coi_s = None
|
|
191
|
+
cog_mat = None
|
|
192
|
+
if compute_coi_cog:
|
|
193
|
+
pci_arr = pci_s.values if isinstance(pci_s, pd.Series) else pci_s
|
|
194
|
+
coi_s = complexity_outlook_index(
|
|
195
|
+
mcp_mat, pci_arr, phi=phi, use_rca=False, threshold=0.5
|
|
196
|
+
)
|
|
197
|
+
cog_mat = complexity_outlook_gain(
|
|
198
|
+
mcp_mat, pci_arr, phi=phi, use_rca=False, threshold=0.5
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# --- Merge everything back into long format ---
|
|
202
|
+
result = data.copy()
|
|
203
|
+
|
|
204
|
+
# Add location-level indicators
|
|
205
|
+
loc_indicators = pd.DataFrame({
|
|
206
|
+
"diversity": div,
|
|
207
|
+
"eci": eci_s,
|
|
208
|
+
"coi": coi_s if coi_s is not None else np.nan,
|
|
209
|
+
})
|
|
210
|
+
result = result.merge(
|
|
211
|
+
loc_indicators.reset_index().rename(columns={"index": loc_col}),
|
|
212
|
+
on=loc_col,
|
|
213
|
+
how="left",
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Add activity-level indicators
|
|
217
|
+
act_indicators = pd.DataFrame({
|
|
218
|
+
"ubiquity": ubi,
|
|
219
|
+
"pci": pci_s,
|
|
220
|
+
})
|
|
221
|
+
result = result.merge(
|
|
222
|
+
act_indicators.reset_index().rename(columns={"index": act_col}),
|
|
223
|
+
on=act_col,
|
|
224
|
+
how="left",
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Add cell-level indicators
|
|
228
|
+
rca_long = (
|
|
229
|
+
rca_mat.reset_index()
|
|
230
|
+
.melt(id_vars=rca_mat.index.name, var_name=act_col, value_name="rca")
|
|
231
|
+
)
|
|
232
|
+
mcp_long = (
|
|
233
|
+
mcp_mat.reset_index()
|
|
234
|
+
.melt(id_vars=mcp_mat.index.name, var_name=act_col, value_name="mcp")
|
|
235
|
+
)
|
|
236
|
+
dens_long = (
|
|
237
|
+
dens_mat.reset_index()
|
|
238
|
+
.melt(id_vars=dens_mat.index.name, var_name=act_col, value_name="density")
|
|
239
|
+
)
|
|
240
|
+
dist_long = (
|
|
241
|
+
dist_mat.reset_index()
|
|
242
|
+
.melt(id_vars=dist_mat.index.name, var_name=act_col, value_name="distance")
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
for long_df in [rca_long, mcp_long, dens_long, dist_long]:
|
|
246
|
+
result = result.merge(long_df, on=[loc_col, act_col], how="left")
|
|
247
|
+
|
|
248
|
+
if cog_mat is not None:
|
|
249
|
+
cog_long = (
|
|
250
|
+
cog_mat.reset_index()
|
|
251
|
+
.melt(id_vars=cog_mat.index.name, var_name=act_col, value_name="cog")
|
|
252
|
+
)
|
|
253
|
+
result = result.merge(cog_long, on=[loc_col, act_col], how="left")
|
|
254
|
+
|
|
255
|
+
return result
|