mrio-toolbox 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mrio-toolbox might be problematic. Click here for more details.

Files changed (59) hide show
  1. mrio_toolbox/__init__.py +18 -2
  2. mrio_toolbox/_parts/_Axe.py +95 -37
  3. mrio_toolbox/_parts/_Part.py +264 -70
  4. mrio_toolbox/_parts/__init__.py +4 -0
  5. mrio_toolbox/_parts/part_operations.py +24 -17
  6. mrio_toolbox/extractors/__init__.py +20 -0
  7. mrio_toolbox/extractors/downloaders.py +36 -0
  8. mrio_toolbox/extractors/emerging/__init__.py +3 -0
  9. mrio_toolbox/extractors/emerging/emerging_extractor.py +117 -0
  10. mrio_toolbox/extractors/eora/__init__.py +3 -0
  11. mrio_toolbox/extractors/eora/eora_extractor.py +132 -0
  12. mrio_toolbox/extractors/exiobase/__init__.py +3 -0
  13. mrio_toolbox/extractors/exiobase/exiobase_extractor.py +270 -0
  14. mrio_toolbox/extractors/extractors.py +79 -0
  15. mrio_toolbox/extractors/figaro/__init__.py +3 -0
  16. mrio_toolbox/extractors/figaro/figaro_downloader.py +280 -0
  17. mrio_toolbox/extractors/figaro/figaro_extractor.py +187 -0
  18. mrio_toolbox/extractors/gloria/__init__.py +3 -0
  19. mrio_toolbox/extractors/gloria/gloria_extractor.py +202 -0
  20. mrio_toolbox/extractors/gtap11/__init__.py +7 -0
  21. mrio_toolbox/extractors/gtap11/extraction/__init__.py +3 -0
  22. mrio_toolbox/extractors/gtap11/extraction/extractor.py +129 -0
  23. mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +6 -0
  24. mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +279 -0
  25. mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +262 -0
  26. mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +974 -0
  27. mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +300 -0
  28. mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +229 -0
  29. mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +6 -0
  30. mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +158 -0
  31. mrio_toolbox/extractors/icio/__init__.py +3 -0
  32. mrio_toolbox/extractors/icio/icio_extractor.py +121 -0
  33. mrio_toolbox/extractors/wiod/__init__.py +3 -0
  34. mrio_toolbox/extractors/wiod/wiod_extractor.py +143 -0
  35. mrio_toolbox/mrio.py +254 -94
  36. mrio_toolbox/msm/__init__.py +6 -0
  37. mrio_toolbox/msm/multi_scale_mapping.py +863 -0
  38. mrio_toolbox/utils/__init__.py +3 -0
  39. mrio_toolbox/utils/converters/__init__.py +3 -0
  40. mrio_toolbox/utils/converters/pandas.py +8 -6
  41. mrio_toolbox/utils/converters/xarray.py +2 -13
  42. mrio_toolbox/utils/formatting/__init__.py +0 -0
  43. mrio_toolbox/utils/formatting/formatter.py +528 -0
  44. mrio_toolbox/utils/loaders/__init__.py +4 -0
  45. mrio_toolbox/utils/loaders/_loader.py +60 -4
  46. mrio_toolbox/utils/loaders/_loader_factory.py +22 -1
  47. mrio_toolbox/utils/loaders/_nc_loader.py +37 -1
  48. mrio_toolbox/utils/loaders/_pandas_loader.py +29 -3
  49. mrio_toolbox/utils/loaders/_parameter_loader.py +61 -16
  50. mrio_toolbox/utils/savers/__init__.py +3 -0
  51. mrio_toolbox/utils/savers/_path_checker.py +25 -7
  52. mrio_toolbox/utils/savers/_to_folder.py +6 -1
  53. mrio_toolbox/utils/savers/_to_nc.py +26 -18
  54. {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/METADATA +10 -6
  55. mrio_toolbox-1.1.1.dist-info/RECORD +59 -0
  56. {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/WHEEL +1 -1
  57. mrio_toolbox-1.0.0.dist-info/RECORD +0 -26
  58. {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info/licenses}/LICENSE +0 -0
  59. {mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,280 @@
1
+ """
2
+ Download Figaro 25ed from the CIRCABC website.
3
+
4
+ @author: wirth
5
+ """
6
+
7
+ from selenium import webdriver
8
+ from selenium.webdriver.common.by import By
9
+ from selenium.webdriver.firefox.options import Options as FirefoxOptions
10
+ from selenium.webdriver.chrome.options import Options as ChromeOptions
11
+ from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, WebDriverException
12
+ import time
13
+ import os
14
+ import logging
15
+
16
+ log = logging.getLogger(__name__)
17
+
18
+
19
+ def wait_for_download(file_path, timeout=60):
20
+ """
21
+ Wait until the given file has been fully downloaded.
22
+
23
+ Parameters
24
+ ----------
25
+ file_path : str
26
+ The path to the file that is being downloaded.
27
+ timeout : int
28
+ Maximum time to wait for the download to complete, in seconds.
29
+ """
30
+ folder = os.path.dirname(file_path)
31
+ end_time = time.time() + timeout
32
+
33
+ while time.time() < end_time:
34
+ # Firefox: file exists, size stops changing
35
+ if os.path.exists(file_path):
36
+ size_old = os.path.getsize(file_path)
37
+ time.sleep(1)
38
+ size_new = os.path.getsize(file_path)
39
+ if size_new == size_old:
40
+ time.sleep(1) # wait a bit more to ensure download is completed
41
+ return True
42
+
43
+ # Chrome: check for any .crdownload temp file
44
+ if not any(name.endswith(".crdownload") for name in os.listdir(folder)):
45
+ if os.path.exists(file_path):
46
+ time.sleep(1) # wait a bit more to ensure download is completed
47
+ return True
48
+
49
+ time.sleep(0.5)
50
+ raise TimeoutError(f"Download not completed within {timeout} seconds: {file_path}")
51
+
52
+ def get_driver(destination, headless=True, prefer="chrome"):
53
+ """
54
+ Try to get a Selenium driver. Falls back to Chrome if Firefox is not available.
55
+
56
+ Parameters
57
+ ----------
58
+ destination : str
59
+ Download folder for browser.
60
+ headless : bool
61
+ Run browser in headless mode.
62
+ prefer : str
63
+ Preferred browser: "firefox" or "chrome".
64
+ """
65
+
66
+ def make_firefox():
67
+ options = FirefoxOptions()
68
+ if headless: options.add_argument("--headless")
69
+ options.add_argument("--window-size=1920,1080")
70
+ options.set_preference("browser.download.folderList", 2)
71
+ options.set_preference("browser.download.dir", destination)
72
+ return webdriver.Firefox(options=options)
73
+
74
+ def make_chrome():
75
+ options = ChromeOptions()
76
+ if headless: options.add_argument("--headless=new")
77
+ options.add_argument("--window-size=1920,1080")
78
+ prefs = {"download.default_directory": destination,
79
+ "download.prompt_for_download": False,
80
+ "download.directory_upgrade": True,
81
+ "safebrowsing.enabled": True,
82
+ "profile.default_content_setting_values.automatic_downloads": 1}
83
+ options.add_experimental_option("prefs", prefs)
84
+ return webdriver.Chrome(options=options)
85
+
86
+ tried = []
87
+ for choice in ([prefer, "chrome", "firefox"] if prefer == "firefox" else [prefer, "firefox", "chrome"]):
88
+ try:
89
+ if choice == "firefox":
90
+ log.info("Trying Firefox driver...")
91
+ return make_firefox()
92
+ elif choice == "chrome":
93
+ log.info("Trying Chrome driver...")
94
+ return make_chrome()
95
+ except WebDriverException as e:
96
+ log.warning(f"{choice.capitalize()} driver failed: {e}")
97
+ tried.append(choice)
98
+ raise RuntimeError(f"Could not start any browser driver (tried {tried}). Please install Firefox or Chrome.")
99
+
100
+
101
+ def safe_click(driver, by, value, description="element"):
102
+ """
103
+ Safely clicks an element on the page, handling exceptions and logging errors.
104
+
105
+ Parameters:
106
+ -----------
107
+ driver: WebDriver
108
+ The Selenium WebDriver instance.
109
+ by: By
110
+ The method to locate the element (e.g., By.XPATH, By.CSS_SELECTOR).
111
+ value: str
112
+ The value to locate the element.
113
+ description: str
114
+ A description of the element for logging purposes.
115
+
116
+ Notes:
117
+ ------
118
+ If you want to debug the click, you can set the headless mode to False in the download_figaro function.
119
+ This will open the browser window and allow you to see what is happening.
120
+ In the browser window, you can right-click on the element and select "Inspect" to see the HTML structure.
121
+ """
122
+ try:
123
+ elem = driver.find_element(by, value)
124
+ driver.execute_script("arguments[0].click();", elem) # Click with JavaScript to avoid issues with overlays or pop-ups
125
+ log.info(f"Clicked {description}")
126
+ except NoSuchElementException:
127
+ log.error(f"{description} not found: {value}")
128
+ raise RuntimeError(f"{description} not found: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed. "
129
+ "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
130
+ except ElementClickInterceptedException:
131
+ log.error(f"{description} was obstructed: {value}")
132
+ raise RuntimeError(f"{description} was obstructed: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed."
133
+ "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
134
+ except Exception as e:
135
+ log.error(f"Error clicking {description}: {e}.")
136
+ raise RuntimeError("The download was aborted. Likely the page structure of the CIRCABC website has changed."
137
+ "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
138
+
139
+
140
+
141
+ def download_figaro(year, destination, format = 'industry by industry', sut = False, headless = True):
142
+ """
143
+ Downloads the specified format of the EU input-output matrix from Figaro.
144
+
145
+ Parameters:
146
+ -----------
147
+ destination: str
148
+ A path to the folder where the downloaded file will be saved.
149
+ year: int
150
+ The year of the data to download.
151
+ format: str, optional
152
+ Either 'industry by industry' or 'product by product'.
153
+ sut: Boolean, optional
154
+ If True, also downloads the supply and use tables, otherwise only the input-output matrix.
155
+ headless: Boolean, optional
156
+ If True, runs the browser in headless mode (no GUI). Default is True.
157
+ """
158
+
159
+ # Check if year is valid
160
+ if not isinstance(year, int) or year < 2010 or year > 2023:
161
+ raise ValueError("As of August 2025, the Figaro database contains IO tables for the years 2010 to 2023. Please provide a valid year within this range."
162
+ "If you are sure that the year 2024 is already available, please update this check accordingly.")
163
+
164
+ # Check if destination exists
165
+ if not os.path.exists(destination):
166
+ raise FileNotFoundError(f"The destination folder '{destination}' does not exist. Please create it before downloading.")
167
+
168
+ if format == 'industry by industry':
169
+ format_abbr = "ind-by-ind"
170
+ elif format == 'product by product':
171
+ format_abbr = "prod-by-prod"
172
+ else:
173
+ raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
174
+
175
+ # Check if files already exist
176
+ paths = {
177
+ "io_path" : os.path.join(destination, f"matrix_eu-ic-io_{format_abbr}_25ed_{year}.csv"),
178
+ "sup_path" : os.path.join(destination, f"matrix_eu-ic-supply_25ed_{year}.csv"),
179
+ "use_path" : os.path.join(destination, f"matrix_eu-ic-use_25ed_{year}.csv"),
180
+ "excel_path" : os.path.join(destination, f"Description_FIGARO_Tables(25ed).xlsx")
181
+ }
182
+ url = "https://circabc.europa.eu/ui/group/cec66924-a924-4f91-a0ef-600a0531e3ba/library/0d8bab1e-d159-40b9-9aff-ef8e6d58e24e?p=1&n=10&sort=name_ASC"
183
+
184
+ if any(not os.path.exists(p) for p in paths.values()):
185
+
186
+ driver = get_driver(destination, headless=headless, prefer="chrome")
187
+ driver.get(url)
188
+ driver.implicitly_wait(5)
189
+
190
+ if not os.path.exists(paths["excel_path"]):
191
+ log.info("Downloading the description of the Figaro tables")
192
+ # Find and click the Excel file
193
+ safe_click(driver, By.XPATH, "//a[contains(text(), 'Description_FIGARO_Tables(25ed).xlsx')]", "Excel file link")
194
+
195
+ # Find and click the download button
196
+ safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for Excel file")
197
+
198
+ # Wait for the download to complete
199
+ wait_for_download(paths["excel_path"])
200
+
201
+ # Go back to the main page
202
+ driver.get(url)
203
+ else:
204
+ log.info(f"The description of the Figaro tables is already in the folder '{destination}', skipping download")
205
+
206
+ if not os.path.exists(paths["io_path"]):
207
+ log.info(f"Downloading IO table for format '{format_abbr}' and year '{year}'")
208
+ # Find and click the desired format (ixi or pxp)
209
+ safe_click(driver, By.XPATH, f"//a[contains(text(), '{format}')]", f"format '{format}' link")
210
+
211
+ # Find and click the CSV matrix format
212
+ safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
213
+
214
+ # Click for the second page if year > 2019
215
+ if year > 2019:
216
+ time.sleep(0.5) # we need an explicit wait here, because the element is found before it is clickable
217
+ safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
218
+
219
+ # Find and click the desired year
220
+ safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
221
+
222
+ # Find and click the download button
223
+ safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for IO table")
224
+
225
+ # Wait for the download to complete
226
+ wait_for_download(paths["io_path"])
227
+ driver.get(url)
228
+ else:
229
+ log.info(f"The IO tables for format '{format}' and year '{year}' are already in the folder '{destination}', skipping download")
230
+
231
+ if sut == True:
232
+ if not os.path.exists(paths["sup_path"]):
233
+ log.info(f"Downloading supply table for year '{year}'")
234
+ # Find and click the supply table
235
+ safe_click(driver, By.XPATH, f"//a[contains(text(), 'Supply tables')]", "Supply tables link")
236
+
237
+ # Find and click the CSV matrix format
238
+ safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
239
+
240
+ # Click for the second page if year > 2019
241
+ if year > 2019:
242
+ time.sleep(0.5)
243
+ safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
244
+
245
+ # Find and click the desired year
246
+ safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
247
+
248
+ # Find and click the download button
249
+ safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for supply table")
250
+
251
+ wait_for_download(paths["sup_path"])
252
+ driver.get(url)
253
+ else:
254
+ log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
255
+
256
+ if not os.path.exists(paths["use_path"]):
257
+ log.info(f"Downloading supply table for year '{year}'")
258
+
259
+ # Find and click the supply table
260
+ safe_click(driver, By.XPATH, f"//a[contains(text(), 'Use tables')]", "Use tables link")
261
+
262
+ # Find and click the CSV matrix format
263
+ safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
264
+
265
+ # Click for the second page if year > 2019
266
+ if year > 2019:
267
+ time.sleep(0.5)
268
+ safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
269
+
270
+ # Find and click the desired year
271
+ safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
272
+
273
+ # Find and click the download button
274
+ safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for use table")
275
+ wait_for_download(paths["use_path"])
276
+ else:
277
+ log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
278
+ driver.quit()
279
+ else:
280
+ log.info(f"The files for format '{format_abbr}' and year '{year}' are already in the folder '{destination}', skipping download")
@@ -0,0 +1,187 @@
1
+ """
2
+ Load and convert Figaro MRIO files.
3
+
4
+ Supports Figaro inter industry IO, supply and use tables in csv matrix format
5
+ https://ec.europa.eu/eurostat/web/esa-supply-use-input-tables/database#Input-output%20tables%20industry%20by%20industry
6
+
7
+ The extractor loads the IO table and if available the supply and use tables.
8
+
9
+ @author: wirth
10
+ """
11
+
12
+ import os
13
+ import logging
14
+ import pandas as pd
15
+
16
+ from mrio_toolbox import MRIO
17
+ from mrio_toolbox.utils.savers._to_nc import save_to_nc
18
+
19
+ log = logging.getLogger(__name__)
20
+
21
+ def extract_figaro(year, source, format = 'industry by industry', sut = "none", edition=25):
22
+ """
23
+ Extract FIGARO data.
24
+
25
+ Loads FIGARO tables and labels and store them as NetCDF for further use with
26
+ the mrio_toolbox library. Currently the extractor does not support emission
27
+ satellite accounts (I couldn't find them on the figaro website).
28
+
29
+ Put all tables as well as the 'Description_FIGARO_Tables({edition}ed).xlsx' file
30
+ in the same source folder.
31
+
32
+ Parameters
33
+ ----------
34
+ year : str
35
+ Data year to load.
36
+ source : path-like
37
+ Path to folder where raw data is stored
38
+ format : str, optional
39
+ Either 'industry by industry' or 'product by product'.
40
+ sut : str, optional
41
+ Supply and use tables to load, by default "none".
42
+ Available options are "none", "supply", "use" or "both".
43
+ edition : int, optional
44
+ Edition of the FIGARO tables, by default 25. The alternative is 24.
45
+ """
46
+
47
+ if format == 'industry by industry':
48
+ format_abbr = "ind-by-ind"
49
+ elif format == 'product by product':
50
+ format_abbr = "prod-by-prod"
51
+ else:
52
+ raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
53
+
54
+ log.info(f"Extracting FIGARO IO table for year {year}, load IO table...")
55
+ raw = pd.read_csv(os.path.join(source, f"matrix_eu-ic-io_{format_abbr}_{edition}ed_{year}.csv"), dtype = str)
56
+ log.info("Loaded IO table")
57
+
58
+ if sut in ["supply", "both"]:
59
+ log.info(f"Check if supply table is available for year {year}...")
60
+ if os.path.isfile(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv")):
61
+ log.info("Supply table found, loading...")
62
+ raw_supply = pd.read_csv(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv"), dtype = str)
63
+ log.info("Loaded supply table")
64
+
65
+ if sut in ["use", "both"]:
66
+ log.info(f"Check if use table is available for year {year}...")
67
+ if os.path.isfile(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv")):
68
+ log.info("Use table found, loading...")
69
+ raw_use = pd.read_csv(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv"), dtype = str)
70
+ log.info("Loaded use table")
71
+
72
+ log.info("Extracting labels...")
73
+ if edition == 24:
74
+ df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=5, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
75
+ elif edition == 25:
76
+ df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=3, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
77
+ else:
78
+ ValueError(f"Edition {edition} not yet supported. Please use edition 24 or 25.")
79
+
80
+ # Countries are not in the correct order in the excel sheet, so get countries from raw data
81
+ column_labs = raw.columns[1:]
82
+ countries = column_labs.str.split("_").str[0]
83
+ countries = list(dict.fromkeys(countries))
84
+
85
+ # Get other labels from excel sheet
86
+ sectors = df["Label.1"].tolist()
87
+ cpa_labs = df["Label"].tolist()
88
+ va_labs = df["Label.2"].dropna().tolist()
89
+ y_labs = df["Label.3"].dropna().tolist()
90
+
91
+ labels = {
92
+ "countries": countries,
93
+ "sectors": sectors,
94
+ "y_labs": y_labs,
95
+ "va_labs": va_labs
96
+ }
97
+ c, s, y, va = len(countries), len(sectors), len(y_labs), len(va_labs)
98
+ if 'raw_supply' in locals() or 'raw_use' in locals():
99
+ labels["cpa_labs"] = cpa_labs
100
+ cpa = len(cpa_labs)
101
+ log.info("Labels extracted")
102
+
103
+ log.info("Extracting parts from raw data...")
104
+ raw = raw.iloc[:, 1:].astype(float).to_numpy()
105
+
106
+ tables = {}
107
+ tables["t"] = raw[:c*s, :c*s]
108
+ tables["y"] = raw[:c*s, c*s:(c*s+c*y)]
109
+ tables["va"] = raw[c*s:(c*s+c*va), :c*s]
110
+ tables["vay"] = raw[c*s:(c*s+c*va), c*s:(c*s+c*y)]
111
+ log.info("Extracted parts from raw data")
112
+
113
+ # Treat supply table if available
114
+ if 'raw_supply' in locals():
115
+ log.info("Extracting supply table...")
116
+ raw_supply = raw_supply.iloc[:, 1:].astype(float).to_numpy()
117
+ tables["sup"] = raw_supply[:c*cpa, :c*s]
118
+ log.info("Extracted supply table")
119
+ else:
120
+ log.info("No supply table found, skipping...")
121
+
122
+ # Treat use table if available
123
+ if 'raw_use' in locals():
124
+ log.info("Extracting use table...")
125
+ raw_use = raw_use.iloc[:, 1:].astype(float).to_numpy()
126
+ tables["use_t"] = raw_use[:c*cpa, :c*s]
127
+ tables["use_y"] = raw_use[:c*cpa, c*s:c*s + c*y]
128
+ tables["use_va"] = raw_use[c*cpa:c*cpa+c*va, :c*s]
129
+ tables["use_vay"] = raw_use[c*cpa:(c*cpa+c*va), c*s:(c*s+c*y)]
130
+ log.info("Extracted use table")
131
+ else:
132
+ log.info("No use table found, skipping...")
133
+
134
+ # Assemble mrio object
135
+ log.info("Building MRIO object...")
136
+ m = MRIO()
137
+ m.add_dimensions(labels)
138
+ log.info("Building MRIO objects from parts containing labels and tables...")
139
+ m.parts["t"] = m.new_part(name="t",
140
+ data= tables["t"],
141
+ dimensions = [["countries","sectors"],["countries", "sectors"]])
142
+ log.info("t part added")
143
+ m.parts["y"] = m.new_part(name="y",
144
+ data= tables["y"],
145
+ dimensions = [["countries","sectors"],["countries", "y_labs"]])
146
+ log.info("y part added")
147
+ m.parts["va"] = m.new_part(name="va",
148
+ data= tables["va"],
149
+ dimensions = ["va_labs",["countries", "sectors"]])
150
+ log.info("va part added")
151
+ m.parts["vay"] = m.new_part(name="vay",
152
+ data= tables["vay"],
153
+ dimensions = ["va_labs",["countries", "y_labs"]])
154
+ log.info("vay part added")
155
+ if 'sup' in tables:
156
+ m.parts["sup"] = m.new_part(name="sup",
157
+ data= tables["sup"],
158
+ dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
159
+ log.info("sup part added")
160
+ if 'use_t' in tables:
161
+ m.parts["use_t"] = m.new_part(name="use_t",
162
+ data= tables["use_t"],
163
+ dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
164
+ log.info("use_t part added")
165
+ m.parts["use_y"] = m.new_part(name="use_y",
166
+ data= tables["use_y"],
167
+ dimensions = [["countries","cpa_labs"],["countries", "y_labs"]])
168
+ log.info("use_y part added")
169
+ m.parts["use_va"] = m.new_part(name="use_va",
170
+ data= tables["use_va"],
171
+ dimensions = ["va_labs",["countries", "sectors"]])
172
+ log.info("use_va part added")
173
+ m.parts["use_vay"] = m.new_part(name="use_vay",
174
+ data= tables["use_vay"],
175
+ dimensions = ["va_labs",["countries", "y_labs"]])
176
+ log.info("use_vay part added")
177
+ log.info("MRIO object built")
178
+
179
+ # Add metadata
180
+ log.info("Adding metadata to MRIO object...")
181
+ m.metadata["table"] = "figaro"
182
+ m.metadata["edition"] = edition
183
+ m.metadata["year"] = year
184
+ m.metadata["format"] = format
185
+ m.metadata["sut"] = sut
186
+ m.name = f"figaro_{year}_{format}"
187
+ return m
@@ -0,0 +1,3 @@
1
+ """
2
+ This module contains the gloria extractor
3
+ """
@@ -0,0 +1,202 @@
1
+ """
2
+ Extractor for GLORIA data.
3
+
4
+ This extractor loads GLORIA raw data files and converts them to NetCDF
5
+ files.
6
+
7
+ Supports GLORIA version 059
8
+ https://ielab.info/labs/ielab-gloria
9
+
10
+ Created on Fr Dez 20, 2024
11
+ @author: wirth, based on code of beaufils
12
+
13
+ """
14
+
15
+ import os
16
+ import logging
17
+ import numpy as np
18
+ import pandas as pd
19
+ from mrio_toolbox import MRIO
20
+ from mrio_toolbox.utils.savers._to_nc import save_to_nc
21
+
22
+ log = logging.getLogger(__name__)
23
+
24
+ def extract_gloria(
25
+ year,
26
+ source,
27
+ markup = 1,
28
+ parts = "all",
29
+ precision=32):
30
+ """
31
+ Extract GLORIA data.
32
+
33
+ Loads GLORIA tables and labels and store them as NetCDF for further use with
34
+ the mrio_toolbox library. Currrently, this extractor supports loading T, Y,
35
+ VA, Q, and QY tables.
36
+
37
+ Put all tables (including emission satellite accounts) as well as the
38
+ 'GLORIA_ReadMe_059a.xlsx' file in the same source folder.
39
+
40
+
41
+ Parameters
42
+ ----------
43
+ year : str
44
+ Data year to load.
45
+ parts : str
46
+ Data blocks to load:
47
+ basic : T, Y
48
+ all : T, Y, VA, Q, QY
49
+ markup : int
50
+ Version of prices to load. Available versions:
51
+ 1 : basic prices
52
+ 2 : trade margins
53
+ 3 : transport margins
54
+ 4 : taxes on products
55
+ 5 : subsidies on products
56
+ source : path-like
57
+ Path to folder where raw data is stored
58
+ precision : int
59
+ Floating point precision in bits. Default is 32.
60
+ This introduces some rounding error for large numbers.
61
+ """
62
+
63
+ #Check source path
64
+ source = source + f"/GLORIA_MRIOs_59_{year}"
65
+ if not os.path.exists(source):
66
+ log.error(f"{os.path.abspath(source)} does not exist.")
67
+ raise NotADirectoryError(f"{os.path.abspath(source)} does not exist.")
68
+
69
+ # Gloria comes with 164 regions (160 countries + rest of americas,
70
+ # rest of europe, rest of africa, rest of asia-pacific) and 120 sectors.
71
+
72
+ if parts == "all":
73
+ parts = ["T","Y","V","TQ","YQ"]
74
+ elif parts == "basic":
75
+ parts = ["T","Y", "V"]
76
+
77
+ tables = {}
78
+
79
+ if precision == 32:
80
+ log.info("Data precision is 32 bits")
81
+ dt = np.float32
82
+ elif precision == 64:
83
+ log.info("Data precision is 64 bits")
84
+ dt = np.float64
85
+
86
+ log.info("Loading Gloria labels...")
87
+ labels = {}
88
+ countries = pd.read_excel(
89
+ io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
90
+ sheet_name = "Regions")
91
+ countries = countries["Region_acronyms"].tolist()
92
+ sectors = pd.read_excel(
93
+ io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
94
+ sheet_name = "Sectors")
95
+ sectors = sectors["Sector_names"].tolist()
96
+ va_and_y_labs = pd.read_excel(
97
+ io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
98
+ sheet_name = "Value added and final demand")
99
+ va_labs= va_and_y_labs["Value_added_names"].tolist()
100
+ y_labs = va_and_y_labs["Final_demand_names"].tolist()
101
+ q_labs = pd.read_excel(
102
+ io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
103
+ sheet_name = "Satellites")
104
+ q_labs["combined"] = q_labs["Sat_head_indicator"] + " - " + q_labs["Sat_indicator"] + " - " + q_labs["Sat_unit"]
105
+ q_labs = q_labs["combined"].tolist()
106
+
107
+ labels["countries"] = countries
108
+ labels["sectors"] = sectors
109
+ labels["y_labs"] = y_labs
110
+ labels["q_labs"] = q_labs
111
+ labels["va_labs"] = va_labs
112
+ log.info("Loaded Gloria labels")
113
+
114
+ log.info("Loading Gloria tables, this can take a while...")
115
+ for part in parts:
116
+ if part == "T" or part == "Y":
117
+ path = os.path.join(source, f'20240111_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup00{markup}(full).csv')
118
+ elif part == "V":
119
+ path = os.path.join(source, f'20240419_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup001(full).csv')
120
+ elif part == "TQ" or part == "YQ":
121
+ path = os.path.join(source, f'20240417_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup00{markup}(full).csv')
122
+ log.info(f"Loading {part} table...")
123
+ tables[part] = load_and_transform_to_IO_structure(path, part, dt)
124
+ log.info(f"Loaded {part} table")
125
+
126
+
127
+ # build an MRIO object from labels and tables
128
+ m = MRIO()
129
+ m.add_dimensions(labels)
130
+
131
+ m.parts["T"] = m.new_part(name="t",
132
+ data= tables["T"],
133
+ dimensions = [["countries","sectors"],["countries", "sectors"]])
134
+ log.info("Added T table")
135
+
136
+ m.parts["Y"] = m.new_part(name="y",
137
+ data= tables["Y"],
138
+ dimensions = [["countries","sectors"],["countries", "y_labs"]])
139
+ log.info("Added Y table")
140
+
141
+ m.parts["VA"] = m.new_part(name="va",
142
+ data= tables["V"],
143
+ dimensions = ["va_labs",["countries","sectors"]])
144
+ log.info("Added VA table")
145
+
146
+ if parts == "all":
147
+ m.parts["Q"] = m.new_part(name="q",
148
+ data= tables["TQ"],
149
+ dimensions = ["q_labs",["countries","sectors"]])
150
+ log.info("Added Q table")
151
+
152
+ m.parts["QY"] = m.new_part(name="qy",
153
+ data= tables["YQ"],
154
+ dimensions = ["q_labs",["countries","y_labs"]])
155
+ log.info("Added QY table")
156
+
157
+ m.name = f"gloria_{year}_markup00{markup}"
158
+ return m
159
+
160
+ def load_and_transform_to_IO_structure(path, part, dt):
161
+ c = 164 # number of countries
162
+ s = 120 # number of sectors
163
+
164
+ table = np.loadtxt(path, dtype=dt, delimiter=',')
165
+
166
+ rows = np.arange(table.shape[0])
167
+ columns = np.arange(table.shape[1])
168
+
169
+ if part == "T":
170
+ selected_rows = (rows // s) % 2 == 1 # Starts with 120 off, then 120 on
171
+ selected_columns = (columns // s) % 2 == 0 # starts with 120 on, then 120 off
172
+ elif part == "Y":
173
+ selected_rows = (rows // s) % 2 == 1
174
+ selected_columns = columns
175
+ elif part == "V":
176
+ selected_rows = rows
177
+ selected_columns = (columns // s) % 2 == 0
178
+ elif part == "TQ":
179
+ selected_rows = rows
180
+ selected_columns = (columns // s) % 2 == 0
181
+ elif part == "YQ":
182
+ selected_rows = rows
183
+ selected_columns = columns
184
+
185
+ table = table[selected_rows][:, selected_columns]
186
+
187
+ if part == "V":
188
+ # Stack the entries to transform the pseudo-diagonalized 984x19680 shape into a 6x19680 shape
189
+
190
+ block_height = 6
191
+ block_width = 120
192
+ blocks = []
193
+
194
+ for i in range (0, int(table.shape[0]/block_height)):
195
+ block = table[i*block_height:(i+1)*block_height,
196
+ i*block_width:(i+1)*block_width]
197
+ blocks.append(block)
198
+
199
+ table = np.hstack(blocks)
200
+
201
+ return table
202
+
@@ -0,0 +1,7 @@
1
+ """
2
+ This module contains the extractor for raw GTAP 11 data and the IO builder to transform it into an MRIO object.
3
+ """
4
+ from .extraction.extractor import extract_gtap11
5
+ from .gtap_mrio import build_io
6
+
7
+ __all__ = ["extract_gtap11","build_io"]