mrio-toolbox 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- mrio_toolbox/__init__.py +18 -2
- mrio_toolbox/_parts/_Axe.py +95 -37
- mrio_toolbox/_parts/_Part.py +264 -70
- mrio_toolbox/_parts/__init__.py +4 -0
- mrio_toolbox/_parts/part_operations.py +24 -17
- mrio_toolbox/extractors/__init__.py +20 -0
- mrio_toolbox/extractors/downloaders.py +36 -0
- mrio_toolbox/extractors/emerging/__init__.py +3 -0
- mrio_toolbox/extractors/emerging/emerging_extractor.py +117 -0
- mrio_toolbox/extractors/eora/__init__.py +3 -0
- mrio_toolbox/extractors/eora/eora_extractor.py +132 -0
- mrio_toolbox/extractors/exiobase/__init__.py +3 -0
- mrio_toolbox/extractors/exiobase/exiobase_extractor.py +270 -0
- mrio_toolbox/extractors/extractors.py +79 -0
- mrio_toolbox/extractors/figaro/__init__.py +3 -0
- mrio_toolbox/extractors/figaro/figaro_downloader.py +280 -0
- mrio_toolbox/extractors/figaro/figaro_extractor.py +187 -0
- mrio_toolbox/extractors/gloria/__init__.py +3 -0
- mrio_toolbox/extractors/gloria/gloria_extractor.py +202 -0
- mrio_toolbox/extractors/gtap11/__init__.py +7 -0
- mrio_toolbox/extractors/gtap11/extraction/__init__.py +3 -0
- mrio_toolbox/extractors/gtap11/extraction/extractor.py +129 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +6 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +279 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +262 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +974 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +300 -0
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +229 -0
- mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +6 -0
- mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +158 -0
- mrio_toolbox/extractors/icio/__init__.py +3 -0
- mrio_toolbox/extractors/icio/icio_extractor.py +121 -0
- mrio_toolbox/extractors/wiod/__init__.py +3 -0
- mrio_toolbox/extractors/wiod/wiod_extractor.py +143 -0
- mrio_toolbox/mrio.py +254 -94
- mrio_toolbox/msm/__init__.py +6 -0
- mrio_toolbox/msm/multi_scale_mapping.py +863 -0
- mrio_toolbox/utils/__init__.py +3 -0
- mrio_toolbox/utils/converters/__init__.py +3 -0
- mrio_toolbox/utils/converters/pandas.py +8 -6
- mrio_toolbox/utils/converters/xarray.py +2 -13
- mrio_toolbox/utils/formatting/__init__.py +0 -0
- mrio_toolbox/utils/formatting/formatter.py +528 -0
- mrio_toolbox/utils/loaders/__init__.py +4 -0
- mrio_toolbox/utils/loaders/_loader.py +60 -4
- mrio_toolbox/utils/loaders/_loader_factory.py +22 -1
- mrio_toolbox/utils/loaders/_nc_loader.py +37 -1
- mrio_toolbox/utils/loaders/_pandas_loader.py +29 -3
- mrio_toolbox/utils/loaders/_parameter_loader.py +61 -16
- mrio_toolbox/utils/savers/__init__.py +3 -0
- mrio_toolbox/utils/savers/_path_checker.py +25 -7
- mrio_toolbox/utils/savers/_to_folder.py +6 -1
- mrio_toolbox/utils/savers/_to_nc.py +26 -18
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/METADATA +10 -6
- mrio_toolbox-1.1.1.dist-info/RECORD +59 -0
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/WHEEL +1 -1
- mrio_toolbox-1.0.0.dist-info/RECORD +0 -26
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info/licenses}/LICENSE +0 -0
- {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extractor for Emerging MRIO
|
|
3
|
+
|
|
4
|
+
This extractor loads tables and labels from the Emerging MRIO .mat files files,
|
|
5
|
+
builds an mrio object from the data and saves it as NetCDF for further use with
|
|
6
|
+
the mrio_toolbox library.
|
|
7
|
+
|
|
8
|
+
Supports Emerging v.1
|
|
9
|
+
https://zenodo.org/records/10956623
|
|
10
|
+
|
|
11
|
+
Created on 18.03.2025
|
|
12
|
+
@author: wirth
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import logging
|
|
17
|
+
import numpy as np
|
|
18
|
+
import h5py
|
|
19
|
+
|
|
20
|
+
from mrio_toolbox import MRIO
|
|
21
|
+
from mrio_toolbox.utils.savers._to_nc import save_to_nc
|
|
22
|
+
|
|
23
|
+
log = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def extract_emerging(year, source, precision=32):
|
|
27
|
+
"""
|
|
28
|
+
Extract EMERGING data.
|
|
29
|
+
|
|
30
|
+
Loads EMERGING tables and labels and store them as NetCDF for further use with
|
|
31
|
+
the mrio_toolbox library.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
year : str
|
|
36
|
+
Data year to load.
|
|
37
|
+
source : path-like
|
|
38
|
+
Path to folder where raw data is stored
|
|
39
|
+
destination : path-like
|
|
40
|
+
path to folder where NetCDF file will be saved
|
|
41
|
+
precision : int
|
|
42
|
+
Precision of the data in bits. Default is 32.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
log.info(f"Opening EMERGING data for year {year}...")
|
|
46
|
+
file_path = os.path.join(source, f"global_mrio_{year}.mat")
|
|
47
|
+
|
|
48
|
+
#Check source path
|
|
49
|
+
if not os.path.isfile(file_path):
|
|
50
|
+
log.error(f"{os.path.abspath(file_path)} does not exist.")
|
|
51
|
+
raise FileNotFoundError(f"{os.path.abspath(file_path)} does not exist.")
|
|
52
|
+
|
|
53
|
+
f = h5py.File(file_path, "r")
|
|
54
|
+
|
|
55
|
+
log.info("Extracting labels...")
|
|
56
|
+
countries = []
|
|
57
|
+
sectors = []
|
|
58
|
+
y_labs = []
|
|
59
|
+
va_labs = ["Value added"]
|
|
60
|
+
|
|
61
|
+
for ref in f['country_list']:
|
|
62
|
+
ref_key = ref.item() # Convert NumPy array to scalar with the item function
|
|
63
|
+
country_data = f['#refs#'][ref_key][:]
|
|
64
|
+
country_name = ''.join(chr(c[0]) for c in country_data) # Convert ASCII codes to string
|
|
65
|
+
countries.append(country_name)
|
|
66
|
+
|
|
67
|
+
for ref_key in f["sector_list"][0]:
|
|
68
|
+
sector_data = f['#refs#'][ref_key][:]
|
|
69
|
+
sector_data = ''.join(chr(c[0]) for c in sector_data)
|
|
70
|
+
sectors.append(sector_data)
|
|
71
|
+
|
|
72
|
+
for ref_key in f['final_list'][0]:
|
|
73
|
+
fd_data = f['#refs#'][ref_key][:]
|
|
74
|
+
fd_data = ''.join(chr(c[0]) for c in fd_data)
|
|
75
|
+
y_labs.append(fd_data)
|
|
76
|
+
|
|
77
|
+
labels = {
|
|
78
|
+
"countries": countries,
|
|
79
|
+
"sectors": sectors,
|
|
80
|
+
"y_labs": y_labs,
|
|
81
|
+
"va_labs": va_labs
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
if precision == 32:
|
|
85
|
+
log.info("Data precision is 32 bits")
|
|
86
|
+
dt = np.float32
|
|
87
|
+
elif precision == 64:
|
|
88
|
+
log.info("Data precision is 64 bits")
|
|
89
|
+
dt = np.float64
|
|
90
|
+
|
|
91
|
+
log.info("Extracting data, this can take a while...")
|
|
92
|
+
tables = {}
|
|
93
|
+
tables["T"] = np.array(f["z"],dtype=dt)
|
|
94
|
+
tables["Y"] = np.array(f["f"], dtype=dt).transpose() # y is provided transposed
|
|
95
|
+
tables["VA"] = np.array(f["va"], dtype=dt) # No vay part provided
|
|
96
|
+
|
|
97
|
+
# Assemble mrio object
|
|
98
|
+
log.info("Building MRIO object...")
|
|
99
|
+
m = MRIO()
|
|
100
|
+
m.add_dimensions(labels)
|
|
101
|
+
log.info("Building MRIO objects from parts containing labels and tables...")
|
|
102
|
+
m.parts["T"] = m.new_part(name="T",
|
|
103
|
+
data= tables["T"],
|
|
104
|
+
dimensions = [["countries","sectors"],["countries", "sectors"]])
|
|
105
|
+
log.info("T part added")
|
|
106
|
+
m.parts["Y"] = m.new_part(name="Y",
|
|
107
|
+
data= tables["Y"],
|
|
108
|
+
dimensions = [["countries","sectors"],["countries", "y_labs"]])
|
|
109
|
+
log.info("Y part added")
|
|
110
|
+
m.parts["VA"] = m.new_part(name="VA",
|
|
111
|
+
data= tables["VA"],
|
|
112
|
+
dimensions = ["va_labs",["countries", "sectors"]])
|
|
113
|
+
log.info("VA part added")
|
|
114
|
+
log.info("MRIO object built")
|
|
115
|
+
|
|
116
|
+
m.name = f"emerging_{year}_{precision}bits_resolution"
|
|
117
|
+
return m
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Extractor for Eora26 data.
|
|
3
|
+
|
|
4
|
+
This extractor loads Eora26 raw data files and converts them to NetCDF
|
|
5
|
+
files.
|
|
6
|
+
|
|
7
|
+
Supports Eora26 v199.82
|
|
8
|
+
https://worldmrio.com/eora26/
|
|
9
|
+
|
|
10
|
+
Created on Fr Nov 29, 2024
|
|
11
|
+
@author: wirth, based on code of beaufils
|
|
12
|
+
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import os
|
|
16
|
+
import logging
|
|
17
|
+
import numpy as np
|
|
18
|
+
from mrio_toolbox import MRIO
|
|
19
|
+
from mrio_toolbox.utils.savers._to_nc import save_to_nc
|
|
20
|
+
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
def extract_eora26(
|
|
24
|
+
year,
|
|
25
|
+
source,
|
|
26
|
+
parts = 'all'):
|
|
27
|
+
"""
|
|
28
|
+
Extract EORA26 data.
|
|
29
|
+
|
|
30
|
+
Loads EORA26 tables and labels and store them as NetCDF for further use with
|
|
31
|
+
the mrio_toolbox library.
|
|
32
|
+
|
|
33
|
+
Parameters
|
|
34
|
+
----------
|
|
35
|
+
year : str
|
|
36
|
+
Data year to load.
|
|
37
|
+
parts : str
|
|
38
|
+
Data blocks to load:
|
|
39
|
+
basic : T, FD
|
|
40
|
+
all : T, FD, VA, QT, QY
|
|
41
|
+
source : path-like
|
|
42
|
+
Path to folder where raw data is stored
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
#Check source path
|
|
46
|
+
if not os.path.exists(source):
|
|
47
|
+
log.error(f"{os.path.abspath(source)} does not exist.")
|
|
48
|
+
raise NotADirectoryError(f"{os.path.abspath(source)} does not exist.")
|
|
49
|
+
|
|
50
|
+
# EORA26 comes with 189 countries and 26 sectors
|
|
51
|
+
c,s = 189,26
|
|
52
|
+
|
|
53
|
+
# Usually, we want to extract all tables
|
|
54
|
+
if parts == "all":
|
|
55
|
+
parts = ["T","FD","VA","Q","QY"]
|
|
56
|
+
else:
|
|
57
|
+
parts = ["T","FD"]
|
|
58
|
+
|
|
59
|
+
# First, we create a dictionary of part objects
|
|
60
|
+
tables = dict()
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
for part in parts:
|
|
64
|
+
tables[part] = np.loadtxt(
|
|
65
|
+
os.path.join(source,f'Eora26_{year}_bp_{part}.txt'),
|
|
66
|
+
delimiter = '\t')
|
|
67
|
+
|
|
68
|
+
# Then, we have to exclude statistical discrepancies
|
|
69
|
+
# (RoW) row and column, so our data aligns with the number
|
|
70
|
+
# of countries and sectors
|
|
71
|
+
if part == "T":
|
|
72
|
+
tables[part] = tables[part][:c*s,:c*s]
|
|
73
|
+
elif part == "FD":
|
|
74
|
+
tables[part] = tables[part][:c*s,:c*6]
|
|
75
|
+
elif part == "QY":
|
|
76
|
+
tables[part] = tables[part][:,:c*6]
|
|
77
|
+
else: #Q, VA
|
|
78
|
+
tables[part] = tables[part][:,:c*s]
|
|
79
|
+
|
|
80
|
+
# Next, we load the labels
|
|
81
|
+
labels = {}
|
|
82
|
+
# Split country and sector labels for multi-indexing
|
|
83
|
+
labs = np.loadtxt(os.path.join(source, "labels_T.txt"),
|
|
84
|
+
dtype=str, delimiter ='\t')
|
|
85
|
+
sectors = labs[:s,3].tolist()
|
|
86
|
+
countries = []
|
|
87
|
+
for i in range(c):
|
|
88
|
+
countries.append(labs[i*s,1][:])
|
|
89
|
+
|
|
90
|
+
# Omit countries and sectors from y_labs, they are already included
|
|
91
|
+
# in sectors and countries labels.
|
|
92
|
+
y_labs = np.loadtxt(os.path.join(source, "labels_FD.txt"),
|
|
93
|
+
dtype=str, delimiter ='\t')
|
|
94
|
+
y_labs = y_labs[:6, 3].tolist()
|
|
95
|
+
|
|
96
|
+
# q and y labels need to be reformatted into a single list
|
|
97
|
+
q_labs = np.loadtxt(os.path.join(source, "labels_Q.txt"),
|
|
98
|
+
dtype=str,delimiter="\t")
|
|
99
|
+
q_labs = [" - ".join(sub_array[:-1]) for sub_array in q_labs]
|
|
100
|
+
|
|
101
|
+
va_labs = np.loadtxt(os.path.join(source, "labels_VA.txt"),
|
|
102
|
+
dtype=str,delimiter="\t")
|
|
103
|
+
va_labs = [" - ".join(sub_array[:-1]) for sub_array in va_labs]
|
|
104
|
+
|
|
105
|
+
labels["countries"] = countries
|
|
106
|
+
labels["sectors"] = sectors
|
|
107
|
+
labels["y_labs"] = y_labs
|
|
108
|
+
labels["q_labs"] = q_labs
|
|
109
|
+
labels["va_labs"] = va_labs
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
# build an MRIO object from labels and tables
|
|
113
|
+
m = MRIO()
|
|
114
|
+
m.add_dimensions(labels)
|
|
115
|
+
m.parts["t"] = m.new_part(name="t",
|
|
116
|
+
data= tables["T"],
|
|
117
|
+
dimensions = [["countries","sectors"],["countries", "sectors"]])
|
|
118
|
+
m.parts["y"] = m.new_part(name="y",
|
|
119
|
+
data= tables["FD"],
|
|
120
|
+
dimensions = [["countries","sectors"],["countries", "y_labs"]])
|
|
121
|
+
m.parts["va"] = m.new_part(name="va",
|
|
122
|
+
data= tables["VA"],
|
|
123
|
+
dimensions = ["va_labs",["countries","sectors"]])
|
|
124
|
+
m.parts["q"] = m.new_part(name="q",
|
|
125
|
+
data= tables["Q"],
|
|
126
|
+
dimensions = ["q_labs",["countries","sectors"]])
|
|
127
|
+
m.parts["qy"] = m.new_part(name="qy",
|
|
128
|
+
data= tables["QY"],
|
|
129
|
+
dimensions = ["q_labs",["countries","y_labs"]])
|
|
130
|
+
|
|
131
|
+
m.name = f"eora26_{year}"
|
|
132
|
+
return m
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Created on Wed Mar 29 10:43:49 2023
|
|
3
|
+
|
|
4
|
+
Load and convert Exiobase Industries MRIO files.
|
|
5
|
+
|
|
6
|
+
Supports Exiobase 3.9.5 in csv https://zenodo.org/records/14869924
|
|
7
|
+
|
|
8
|
+
This is the supporting information excel sheet:
|
|
9
|
+
https://onlinelibrary.wiley.com/action/downloadSupplement?doi=10.1111%2Fjiec.12715&file=jiec12715-sup-0009-SuppMat-9.xlsx
|
|
10
|
+
|
|
11
|
+
@author: wirth
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import pandas as pd
|
|
16
|
+
import logging
|
|
17
|
+
from mrio_toolbox import MRIO
|
|
18
|
+
from mrio_toolbox.utils.savers._to_nc import save_to_nc
|
|
19
|
+
|
|
20
|
+
s,c = 163,49
|
|
21
|
+
log = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
def extract_exiobase3(
|
|
24
|
+
year,
|
|
25
|
+
source,
|
|
26
|
+
mode="ixi",
|
|
27
|
+
satellites = 'basic'):
|
|
28
|
+
|
|
29
|
+
"""
|
|
30
|
+
Load and preformat an EXIOBASE 3 table.
|
|
31
|
+
|
|
32
|
+
Parameters
|
|
33
|
+
----------
|
|
34
|
+
year : str
|
|
35
|
+
Data year to load.
|
|
36
|
+
source : path-like
|
|
37
|
+
Path to folder where raw data is stored
|
|
38
|
+
satellites : str
|
|
39
|
+
Satellite accounts to load:
|
|
40
|
+
basic : air emissions
|
|
41
|
+
all : air_emissions, employment, energy, land, material, nutrients, water
|
|
42
|
+
path : path-like
|
|
43
|
+
Path to raw data.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
tables : dict of {str : 2D numpy array}
|
|
48
|
+
Keys correspond to matrix parts.
|
|
49
|
+
Values are the numerical tables stored as numpy arrays.
|
|
50
|
+
labels : dict of {str : list of str}
|
|
51
|
+
Dictionnary of countries and sectors labels.
|
|
52
|
+
|
|
53
|
+
"""
|
|
54
|
+
source = os.path.join(source, f"IOT_{year}_[{mode}]")
|
|
55
|
+
#Check source path
|
|
56
|
+
if not os.path.exists(source):
|
|
57
|
+
log.error(f"{os.path.abspath(source)} does not exist.")
|
|
58
|
+
raise NotADirectoryError(f"{os.path.abspath(source)} does not exist.")
|
|
59
|
+
|
|
60
|
+
# EXIOBASE 3 comes with:
|
|
61
|
+
# - 43 countries + 5 ROW regions
|
|
62
|
+
# - 163 industries
|
|
63
|
+
# - 9 final demand categories
|
|
64
|
+
# - 9 value added categories including taxes
|
|
65
|
+
# - various satellite accounts
|
|
66
|
+
#c,s,y,va = 48,163, 5, 9,
|
|
67
|
+
|
|
68
|
+
parts = ["t", "y", "va", "vay", "q_air", "qy_air"]
|
|
69
|
+
|
|
70
|
+
# Load labels
|
|
71
|
+
log.info("Loading labels...")
|
|
72
|
+
countries = pd.read_csv(os.path.join(source,"unit.txt"), delimiter="\t")
|
|
73
|
+
countries = countries["region"].tolist()
|
|
74
|
+
seen = set() # Remove duplicates while preserving order
|
|
75
|
+
countries = [x for x in countries if not (x in seen or seen.add(x))]
|
|
76
|
+
sectors = pd.read_csv(os.path.join(source,"unit.txt"), delimiter="\t")
|
|
77
|
+
sectors = sectors[sectors["region"] == "AT"]["sector"].tolist()
|
|
78
|
+
y_labs = pd.read_csv(os.path.join(source,"Y.txt"), header=1, dtype= "str", delimiter="\t")
|
|
79
|
+
y_labs = y_labs.columns[2:9]
|
|
80
|
+
va_labs = pd.read_csv(os.path.join(source, "factor_inputs", "unit.txt"), dtype= "str", delimiter="\t")
|
|
81
|
+
va_labs = va_labs.iloc[:,0].tolist()
|
|
82
|
+
q_labs_air_emissions = pd.read_csv(os.path.join(source, "air_emissions", "unit.txt"), dtype= "str", delimiter="\t")
|
|
83
|
+
q_labs_air_emissions = q_labs_air_emissions.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
84
|
+
|
|
85
|
+
labels = {
|
|
86
|
+
"countries" : countries,
|
|
87
|
+
"sectors" : sectors,
|
|
88
|
+
"y_labs" : y_labs,
|
|
89
|
+
"va_labs" : va_labs,
|
|
90
|
+
"q_labs_air_emissions" : q_labs_air_emissions,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if satellites == 'all':
|
|
94
|
+
|
|
95
|
+
parts.extend(["q_employment", "qy_employment", "q_energy", "qy_energy", "q_land", "qy_land", "q_material", "qy_material", "q_nutrients", "qy_nutrients", "q_water", "qy_water"])
|
|
96
|
+
|
|
97
|
+
q_labs_employment = pd.read_csv(os.path.join(source, "employment", "unit.txt"), dtype= "str", delimiter="\t")
|
|
98
|
+
labels["q_labs_employment"] = q_labs_employment.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
99
|
+
q_labs_energy = pd.read_csv(os.path.join(source, "energy", "unit.txt"), dtype= "str", delimiter="\t")
|
|
100
|
+
labels["q_labs_energy"] = q_labs_energy.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
101
|
+
q_labs_land = pd.read_csv(os.path.join(source, "land", "unit.txt"), dtype= "str", delimiter="\t")
|
|
102
|
+
labels["q_labs_land"] = q_labs_land.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
103
|
+
q_labs_material = pd.read_csv(os.path.join(source, "material", "unit.txt"), dtype= "str", delimiter="\t")
|
|
104
|
+
labels["q_labs_material"] = q_labs_material.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
105
|
+
q_labs_nutrient = pd.read_csv(os.path.join(source, "nutrients", "unit.txt"), dtype= "str", delimiter="\t")
|
|
106
|
+
labels["q_labs_nutrient"] = q_labs_nutrient.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
107
|
+
q_labs_water = pd.read_csv(os.path.join(source, "water", "unit.txt"), dtype= "str", delimiter="\t")
|
|
108
|
+
labels["q_labs_water"] = q_labs_water.apply(lambda row: f"{row.iloc[0]} - {row.iloc[1]}", axis=1).tolist()
|
|
109
|
+
|
|
110
|
+
log.info("Labels loaded")
|
|
111
|
+
|
|
112
|
+
# Load tables
|
|
113
|
+
|
|
114
|
+
tables = {}
|
|
115
|
+
log.info("Loading IO tables, this can take a while...")
|
|
116
|
+
for part in parts:
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
if part == "t":
|
|
120
|
+
tables[part] = pd.read_csv(os.path.join(source, "Z.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 2:].to_numpy().astype(float)
|
|
121
|
+
log.info(f"Loaded {part}")
|
|
122
|
+
elif part == "y":
|
|
123
|
+
tables[part] = pd.read_csv(os.path.join(source, "Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 2:].to_numpy().astype(float)
|
|
124
|
+
log.info(f"Loaded {part}")
|
|
125
|
+
elif part == "va":
|
|
126
|
+
tables[part] = pd.read_csv(os.path.join(source, "factor_inputs", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
127
|
+
log.info(f"Loaded {part}")
|
|
128
|
+
elif part == "vay":
|
|
129
|
+
tables[part] = pd.read_csv(os.path.join(source, "factor_inputs", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
130
|
+
log.info(f"Loaded {part}")
|
|
131
|
+
elif part == "q_air":
|
|
132
|
+
tables[part] = pd.read_csv(os.path.join(source, "air_emissions", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
133
|
+
log.info(f"Loaded {part}")
|
|
134
|
+
elif part == "qy_air":
|
|
135
|
+
tables[part] = pd.read_csv(os.path.join(source, "air_emissions", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
136
|
+
log.info(f"Loaded {part}")
|
|
137
|
+
elif part == "q_employment":
|
|
138
|
+
tables[part] = pd.read_csv(os.path.join(source, "employment", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
139
|
+
log.info(f"Loaded {part}")
|
|
140
|
+
elif part == "qy_employment":
|
|
141
|
+
tables[part] = pd.read_csv(os.path.join(source, "employment", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
142
|
+
log.info(f"Loaded {part}")
|
|
143
|
+
elif part == "q_energy":
|
|
144
|
+
tables[part] = pd.read_csv(os.path.join(source, "energy", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
145
|
+
log.info(f"Loaded {part}")
|
|
146
|
+
elif part == "qy_energy":
|
|
147
|
+
tables[part] = pd.read_csv(os.path.join(source, "energy", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
148
|
+
log.info(f"Loaded {part}")
|
|
149
|
+
elif part == "q_land":
|
|
150
|
+
tables[part] = pd.read_csv(os.path.join(source, "land", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
151
|
+
log.info(f"Loaded {part}")
|
|
152
|
+
elif part == "qy_land":
|
|
153
|
+
tables[part] = pd.read_csv(os.path.join(source, "land", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
154
|
+
log.info(f"Loaded {part}")
|
|
155
|
+
elif part == "q_material":
|
|
156
|
+
tables[part] = pd.read_csv(os.path.join(source, "material", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
157
|
+
log.info(f"Loaded {part}")
|
|
158
|
+
elif part == "qy_material":
|
|
159
|
+
tables[part] = pd.read_csv(os.path.join(source, "material", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
160
|
+
log.info(f"Loaded {part}")
|
|
161
|
+
elif part == "q_nutrients":
|
|
162
|
+
tables[part] = pd.read_csv(os.path.join(source, "nutrients", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
163
|
+
log.info(f"Loaded {part}")
|
|
164
|
+
elif part == "qy_nutrients":
|
|
165
|
+
tables[part] = pd.read_csv(os.path.join(source, "nutrients", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
166
|
+
log.info(f"Loaded {part}")
|
|
167
|
+
elif part == "q_water":
|
|
168
|
+
tables[part] = pd.read_csv(os.path.join(source, "water", "F.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
169
|
+
log.info(f"Loaded {part}")
|
|
170
|
+
elif part == "qy_water":
|
|
171
|
+
tables[part] = pd.read_csv(os.path.join(source, "water", "F_Y.txt"), delimiter = "\t", dtype = "str", header = None).iloc[3:, 1:].to_numpy().astype(float)
|
|
172
|
+
log.info(f"Loaded {part}")
|
|
173
|
+
else:
|
|
174
|
+
tables[part] = None
|
|
175
|
+
log.info(f"Didn't load {part}")
|
|
176
|
+
|
|
177
|
+
log.info("Tables loaded")
|
|
178
|
+
m = MRIO()
|
|
179
|
+
m.add_dimensions(labels)
|
|
180
|
+
|
|
181
|
+
log.info("Building MRIO objects from parts containing labels and tables...")
|
|
182
|
+
m.parts["t"] = m.new_part(name="t",
|
|
183
|
+
data= tables["t"],
|
|
184
|
+
dimensions = [["countries","sectors"],["countries", "sectors"]])
|
|
185
|
+
log.info("t part added")
|
|
186
|
+
m.parts["y"] = m.new_part(name="y",
|
|
187
|
+
data= tables["y"],
|
|
188
|
+
dimensions = [["countries","sectors"],["countries", "y_labs"]])
|
|
189
|
+
log.info("y part added")
|
|
190
|
+
m.parts["va"] = m.new_part(name="va",
|
|
191
|
+
data= tables["va"],
|
|
192
|
+
dimensions = ["va_labs",["countries","sectors"]])
|
|
193
|
+
log.info("va part added")
|
|
194
|
+
m.parts["vay"] = m.new_part(name="vay",
|
|
195
|
+
data= tables["vay"],
|
|
196
|
+
dimensions = ["va_labs",["countries","y_labs"]])
|
|
197
|
+
log.info("vay part added")
|
|
198
|
+
m.parts["q_air"] = m.new_part(name="q_air",
|
|
199
|
+
data = tables["q_air"],
|
|
200
|
+
dimensions = ["q_labs_air_emissions",["countries","sectors"]])
|
|
201
|
+
log.info("q_air part added")
|
|
202
|
+
m.parts["qy_air"] = m.new_part(name="qy_air",
|
|
203
|
+
data = tables["qy_air"],
|
|
204
|
+
dimensions = ["q_labs_air_emissions",["countries","y_labs"]])
|
|
205
|
+
log.info("qy_air part added")
|
|
206
|
+
|
|
207
|
+
if(satellites == 'all'):
|
|
208
|
+
m.parts["q_employment"] = m.new_part(name="q_employment",
|
|
209
|
+
data = tables["q_employment"],
|
|
210
|
+
dimensions = ["q_labs_employment",["countries","sectors"]])
|
|
211
|
+
log.info("q_employment part added")
|
|
212
|
+
|
|
213
|
+
m.parts["qy_employment"] = m.new_part(name="qy_employment",
|
|
214
|
+
data = tables["qy_employment"],
|
|
215
|
+
dimensions = ["q_labs_employment",["countries","y_labs"]])
|
|
216
|
+
log.info("qy_employment part added")
|
|
217
|
+
|
|
218
|
+
m.parts["q_energy"] = m.new_part(name="q_energy",
|
|
219
|
+
data = tables["q_energy"],
|
|
220
|
+
dimensions = ["q_labs_energy",["countries","sectors"]])
|
|
221
|
+
log.info("q_energy part added")
|
|
222
|
+
|
|
223
|
+
m.parts["qy_energy"] = m.new_part(name="qy_energy",
|
|
224
|
+
data = tables["qy_energy"],
|
|
225
|
+
dimensions = ["q_labs_energy",["countries","y_labs"]])
|
|
226
|
+
log.info("qy_energy part added")
|
|
227
|
+
|
|
228
|
+
m.parts["q_land"] = m.new_part(name="q_land",
|
|
229
|
+
data = tables["q_land"],
|
|
230
|
+
dimensions = ["q_labs_land",["countries","sectors"]])
|
|
231
|
+
log.info("q_land part added")
|
|
232
|
+
|
|
233
|
+
m.parts["qy_land"] = m.new_part(name="qy_land",
|
|
234
|
+
data = tables["qy_land"],
|
|
235
|
+
dimensions = ["q_labs_land",["countries","y_labs"]])
|
|
236
|
+
log.info("qy_land part added")
|
|
237
|
+
|
|
238
|
+
m.parts["q_material"] = m.new_part(name="q_material",
|
|
239
|
+
data = tables["q_material"],
|
|
240
|
+
dimensions = ["q_labs_material",["countries","sectors"]])
|
|
241
|
+
log.info("q_material part added")
|
|
242
|
+
|
|
243
|
+
m.parts["qy_material"] = m.new_part(name="qy_material",
|
|
244
|
+
data = tables["qy_material"],
|
|
245
|
+
dimensions = ["q_labs_material",["countries","y_labs"]])
|
|
246
|
+
log.info("qy_material part added")
|
|
247
|
+
|
|
248
|
+
m.parts["q_nutrients"] = m.new_part(name="q_nutrients",
|
|
249
|
+
data = tables["q_nutrients"],
|
|
250
|
+
dimensions = ["q_labs_nutrient",["countries","sectors"]])
|
|
251
|
+
log.info("q_nutrients part added")
|
|
252
|
+
|
|
253
|
+
m.parts["qy_nutrients"] = m.new_part(name="qy_nutrients",
|
|
254
|
+
data = tables["qy_nutrients"],
|
|
255
|
+
dimensions = ["q_labs_nutrient",["countries","y_labs"]])
|
|
256
|
+
log.info("qy_nutrients part added")
|
|
257
|
+
|
|
258
|
+
m.parts["q_water"] = m.new_part(name="q_water",
|
|
259
|
+
data = tables["q_water"],
|
|
260
|
+
dimensions = ["q_labs_water",["countries","sectors"]])
|
|
261
|
+
log.info("q_water part added")
|
|
262
|
+
|
|
263
|
+
m.parts["qy_water"] = m.new_part(name="qy_water",
|
|
264
|
+
data = tables["qy_water"],
|
|
265
|
+
dimensions = ["q_labs_water",["countries","y_labs"]])
|
|
266
|
+
log.info("qy_water part added")
|
|
267
|
+
|
|
268
|
+
m.name = f"exiobase3_{year}_{mode}_{satellites}_satellites"
|
|
269
|
+
return m
|
|
270
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module for extracting and converting data from various sources.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import logging as log
|
|
6
|
+
import os
|
|
7
|
+
log = log.getLogger(__name__)
|
|
8
|
+
|
|
9
|
+
def extract_MRIO(table,year,source,destination,
|
|
10
|
+
preprocessing = False,
|
|
11
|
+
saving_kwargs = dict(),
|
|
12
|
+
extraction_kwargs = dict()):
|
|
13
|
+
"""
|
|
14
|
+
Extract MRIO data and save it to a NetCDF file.
|
|
15
|
+
|
|
16
|
+
Specific extractors are called based on the table name.
|
|
17
|
+
Refer to the individual extractor functions for more details.
|
|
18
|
+
|
|
19
|
+
Parameters
|
|
20
|
+
----------
|
|
21
|
+
table : str
|
|
22
|
+
Name of the MRIO table to extract. Currently supported:
|
|
23
|
+
|
|
24
|
+
- 'eora26': Extracts Eora26 data.
|
|
25
|
+
- 'gloria': Extracts GLORIA data.
|
|
26
|
+
- 'wiod': Extracts WIOD data.
|
|
27
|
+
- 'icio': Extracts ICIO data.
|
|
28
|
+
- 'exiobase3': Extracts EXIOBASE3 data.
|
|
29
|
+
- 'figaro': Extracts FIGARO data.
|
|
30
|
+
- 'emerging': Extracts EMERGING data.
|
|
31
|
+
- 'gtap11': Extracts GTAP 11 data.
|
|
32
|
+
|
|
33
|
+
year : str
|
|
34
|
+
Year of the data to extract.
|
|
35
|
+
source : path-like
|
|
36
|
+
Path to the source directory containing the raw data files.
|
|
37
|
+
destination : path-like
|
|
38
|
+
Path to the destination directory where the NetCDF file will be saved.
|
|
39
|
+
preprocessing : dict
|
|
40
|
+
Parameters for preprocessing the table
|
|
41
|
+
If left empty, no preprocessing is done
|
|
42
|
+
extraction_kwargs : dict
|
|
43
|
+
Additional keyword arguments specific to the extractor function.
|
|
44
|
+
saving_kwargs : dict
|
|
45
|
+
Additional keyword arguments for saving the MRIO data
|
|
46
|
+
"""
|
|
47
|
+
log.info(f"Extracting MRIO data for table '{table}' for year {year} from {source} to {destination}")
|
|
48
|
+
if table == 'eora26':
|
|
49
|
+
from mrio_toolbox.extractors.eora.eora_extractor import extract_eora26
|
|
50
|
+
mrio = extract_eora26(year, source, **extraction_kwargs)
|
|
51
|
+
elif table == 'gloria':
|
|
52
|
+
from mrio_toolbox.extractors.gloria.gloria_extractor import extract_gloria
|
|
53
|
+
mrio =extract_gloria(year, source, **extraction_kwargs)
|
|
54
|
+
elif table == 'wiod16':
|
|
55
|
+
from mrio_toolbox.extractors.wiod.wiod_extractor import extract_wiod
|
|
56
|
+
mrio = extract_wiod(year, source, **extraction_kwargs)
|
|
57
|
+
elif table == 'icio':
|
|
58
|
+
from mrio_toolbox.extractors.icio.icio_extractor import extract_icio
|
|
59
|
+
mrio =extract_icio(year, source, **extraction_kwargs)
|
|
60
|
+
elif table == 'exiobase3':
|
|
61
|
+
from mrio_toolbox.extractors.exiobase.exiobase_extractor import extract_exiobase3
|
|
62
|
+
mrio = extract_exiobase3(year, source, **extraction_kwargs)
|
|
63
|
+
elif table == 'figaro':
|
|
64
|
+
from mrio_toolbox.extractors.figaro.figaro_extractor import extract_figaro
|
|
65
|
+
mrio = extract_figaro(year, source, **extraction_kwargs)
|
|
66
|
+
elif table == 'emerging':
|
|
67
|
+
from mrio_toolbox.extractors.emerging.emerging_extractor import extract_emerging
|
|
68
|
+
mrio = extract_emerging(year, source, **extraction_kwargs)
|
|
69
|
+
elif table == 'gtap11':
|
|
70
|
+
from mrio_toolbox.extractors.gtap11 import extract_gtap11
|
|
71
|
+
mrio = extract_gtap11(year, source, destination, **extraction_kwargs)
|
|
72
|
+
else:
|
|
73
|
+
raise ValueError(f"Unsupported MRIO table: {table}")
|
|
74
|
+
if preprocessing:
|
|
75
|
+
mrio.preprocess(**preprocessing)
|
|
76
|
+
if not saving_kwargs and not destination:
|
|
77
|
+
return mrio
|
|
78
|
+
filepath = os.path.join(destination, f"{table}_year{year}.nc")
|
|
79
|
+
mrio.save(file = filepath, **saving_kwargs)
|