mrio-toolbox 1.1.1__py3-none-any.whl → 1.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mrio-toolbox might be problematic. Click here for more details.
- {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/METADATA +2 -2
- mrio_toolbox-1.1.3.dist-info/RECORD +5 -0
- mrio_toolbox-1.1.3.dist-info/top_level.txt +1 -0
- mrio_toolbox/__init__.py +0 -21
- mrio_toolbox/_parts/_Axe.py +0 -539
- mrio_toolbox/_parts/_Part.py +0 -1698
- mrio_toolbox/_parts/__init__.py +0 -7
- mrio_toolbox/_parts/part_operations.py +0 -57
- mrio_toolbox/extractors/__init__.py +0 -20
- mrio_toolbox/extractors/downloaders.py +0 -36
- mrio_toolbox/extractors/emerging/__init__.py +0 -3
- mrio_toolbox/extractors/emerging/emerging_extractor.py +0 -117
- mrio_toolbox/extractors/eora/__init__.py +0 -3
- mrio_toolbox/extractors/eora/eora_extractor.py +0 -132
- mrio_toolbox/extractors/exiobase/__init__.py +0 -3
- mrio_toolbox/extractors/exiobase/exiobase_extractor.py +0 -270
- mrio_toolbox/extractors/extractors.py +0 -79
- mrio_toolbox/extractors/figaro/__init__.py +0 -3
- mrio_toolbox/extractors/figaro/figaro_downloader.py +0 -280
- mrio_toolbox/extractors/figaro/figaro_extractor.py +0 -187
- mrio_toolbox/extractors/gloria/__init__.py +0 -3
- mrio_toolbox/extractors/gloria/gloria_extractor.py +0 -202
- mrio_toolbox/extractors/gtap11/__init__.py +0 -7
- mrio_toolbox/extractors/gtap11/extraction/__init__.py +0 -3
- mrio_toolbox/extractors/gtap11/extraction/extractor.py +0 -129
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +0 -6
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +0 -279
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +0 -262
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +0 -974
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +0 -300
- mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +0 -229
- mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +0 -6
- mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +0 -158
- mrio_toolbox/extractors/icio/__init__.py +0 -3
- mrio_toolbox/extractors/icio/icio_extractor.py +0 -121
- mrio_toolbox/extractors/wiod/__init__.py +0 -3
- mrio_toolbox/extractors/wiod/wiod_extractor.py +0 -143
- mrio_toolbox/mrio.py +0 -899
- mrio_toolbox/msm/__init__.py +0 -6
- mrio_toolbox/msm/multi_scale_mapping.py +0 -863
- mrio_toolbox/utils/__init__.py +0 -3
- mrio_toolbox/utils/converters/__init__.py +0 -5
- mrio_toolbox/utils/converters/pandas.py +0 -247
- mrio_toolbox/utils/converters/xarray.py +0 -130
- mrio_toolbox/utils/formatting/__init__.py +0 -0
- mrio_toolbox/utils/formatting/formatter.py +0 -528
- mrio_toolbox/utils/loaders/__init__.py +0 -7
- mrio_toolbox/utils/loaders/_loader.py +0 -312
- mrio_toolbox/utils/loaders/_loader_factory.py +0 -96
- mrio_toolbox/utils/loaders/_nc_loader.py +0 -184
- mrio_toolbox/utils/loaders/_np_loader.py +0 -112
- mrio_toolbox/utils/loaders/_pandas_loader.py +0 -128
- mrio_toolbox/utils/loaders/_parameter_loader.py +0 -386
- mrio_toolbox/utils/savers/__init__.py +0 -11
- mrio_toolbox/utils/savers/_path_checker.py +0 -37
- mrio_toolbox/utils/savers/_to_folder.py +0 -165
- mrio_toolbox/utils/savers/_to_nc.py +0 -60
- mrio_toolbox-1.1.1.dist-info/RECORD +0 -59
- mrio_toolbox-1.1.1.dist-info/top_level.txt +0 -1
- {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/WHEEL +0 -0
- {mrio_toolbox-1.1.1.dist-info → mrio_toolbox-1.1.3.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Module for extracting and converting data from various sources.
|
|
3
|
-
"""
|
|
4
|
-
|
|
5
|
-
import logging as log
|
|
6
|
-
import os
|
|
7
|
-
log = log.getLogger(__name__)
|
|
8
|
-
|
|
9
|
-
def extract_MRIO(table,year,source,destination,
|
|
10
|
-
preprocessing = False,
|
|
11
|
-
saving_kwargs = dict(),
|
|
12
|
-
extraction_kwargs = dict()):
|
|
13
|
-
"""
|
|
14
|
-
Extract MRIO data and save it to a NetCDF file.
|
|
15
|
-
|
|
16
|
-
Specific extractors are called based on the table name.
|
|
17
|
-
Refer to the individual extractor functions for more details.
|
|
18
|
-
|
|
19
|
-
Parameters
|
|
20
|
-
----------
|
|
21
|
-
table : str
|
|
22
|
-
Name of the MRIO table to extract. Currently supported:
|
|
23
|
-
|
|
24
|
-
- 'eora26': Extracts Eora26 data.
|
|
25
|
-
- 'gloria': Extracts GLORIA data.
|
|
26
|
-
- 'wiod': Extracts WIOD data.
|
|
27
|
-
- 'icio': Extracts ICIO data.
|
|
28
|
-
- 'exiobase3': Extracts EXIOBASE3 data.
|
|
29
|
-
- 'figaro': Extracts FIGARO data.
|
|
30
|
-
- 'emerging': Extracts EMERGING data.
|
|
31
|
-
- 'gtap11': Extracts GTAP 11 data.
|
|
32
|
-
|
|
33
|
-
year : str
|
|
34
|
-
Year of the data to extract.
|
|
35
|
-
source : path-like
|
|
36
|
-
Path to the source directory containing the raw data files.
|
|
37
|
-
destination : path-like
|
|
38
|
-
Path to the destination directory where the NetCDF file will be saved.
|
|
39
|
-
preprocessing : dict
|
|
40
|
-
Parameters for preprocessing the table
|
|
41
|
-
If left empty, no preprocessing is done
|
|
42
|
-
extraction_kwargs : dict
|
|
43
|
-
Additional keyword arguments specific to the extractor function.
|
|
44
|
-
saving_kwargs : dict
|
|
45
|
-
Additional keyword arguments for saving the MRIO data
|
|
46
|
-
"""
|
|
47
|
-
log.info(f"Extracting MRIO data for table '{table}' for year {year} from {source} to {destination}")
|
|
48
|
-
if table == 'eora26':
|
|
49
|
-
from mrio_toolbox.extractors.eora.eora_extractor import extract_eora26
|
|
50
|
-
mrio = extract_eora26(year, source, **extraction_kwargs)
|
|
51
|
-
elif table == 'gloria':
|
|
52
|
-
from mrio_toolbox.extractors.gloria.gloria_extractor import extract_gloria
|
|
53
|
-
mrio =extract_gloria(year, source, **extraction_kwargs)
|
|
54
|
-
elif table == 'wiod16':
|
|
55
|
-
from mrio_toolbox.extractors.wiod.wiod_extractor import extract_wiod
|
|
56
|
-
mrio = extract_wiod(year, source, **extraction_kwargs)
|
|
57
|
-
elif table == 'icio':
|
|
58
|
-
from mrio_toolbox.extractors.icio.icio_extractor import extract_icio
|
|
59
|
-
mrio =extract_icio(year, source, **extraction_kwargs)
|
|
60
|
-
elif table == 'exiobase3':
|
|
61
|
-
from mrio_toolbox.extractors.exiobase.exiobase_extractor import extract_exiobase3
|
|
62
|
-
mrio = extract_exiobase3(year, source, **extraction_kwargs)
|
|
63
|
-
elif table == 'figaro':
|
|
64
|
-
from mrio_toolbox.extractors.figaro.figaro_extractor import extract_figaro
|
|
65
|
-
mrio = extract_figaro(year, source, **extraction_kwargs)
|
|
66
|
-
elif table == 'emerging':
|
|
67
|
-
from mrio_toolbox.extractors.emerging.emerging_extractor import extract_emerging
|
|
68
|
-
mrio = extract_emerging(year, source, **extraction_kwargs)
|
|
69
|
-
elif table == 'gtap11':
|
|
70
|
-
from mrio_toolbox.extractors.gtap11 import extract_gtap11
|
|
71
|
-
mrio = extract_gtap11(year, source, destination, **extraction_kwargs)
|
|
72
|
-
else:
|
|
73
|
-
raise ValueError(f"Unsupported MRIO table: {table}")
|
|
74
|
-
if preprocessing:
|
|
75
|
-
mrio.preprocess(**preprocessing)
|
|
76
|
-
if not saving_kwargs and not destination:
|
|
77
|
-
return mrio
|
|
78
|
-
filepath = os.path.join(destination, f"{table}_year{year}.nc")
|
|
79
|
-
mrio.save(file = filepath, **saving_kwargs)
|
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Download Figaro 25ed from the CIRCABC website.
|
|
3
|
-
|
|
4
|
-
@author: wirth
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
from selenium import webdriver
|
|
8
|
-
from selenium.webdriver.common.by import By
|
|
9
|
-
from selenium.webdriver.firefox.options import Options as FirefoxOptions
|
|
10
|
-
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
|
11
|
-
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, WebDriverException
|
|
12
|
-
import time
|
|
13
|
-
import os
|
|
14
|
-
import logging
|
|
15
|
-
|
|
16
|
-
log = logging.getLogger(__name__)
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def wait_for_download(file_path, timeout=60):
|
|
20
|
-
"""
|
|
21
|
-
Wait until the given file has been fully downloaded.
|
|
22
|
-
|
|
23
|
-
Parameters
|
|
24
|
-
----------
|
|
25
|
-
file_path : str
|
|
26
|
-
The path to the file that is being downloaded.
|
|
27
|
-
timeout : int
|
|
28
|
-
Maximum time to wait for the download to complete, in seconds.
|
|
29
|
-
"""
|
|
30
|
-
folder = os.path.dirname(file_path)
|
|
31
|
-
end_time = time.time() + timeout
|
|
32
|
-
|
|
33
|
-
while time.time() < end_time:
|
|
34
|
-
# Firefox: file exists, size stops changing
|
|
35
|
-
if os.path.exists(file_path):
|
|
36
|
-
size_old = os.path.getsize(file_path)
|
|
37
|
-
time.sleep(1)
|
|
38
|
-
size_new = os.path.getsize(file_path)
|
|
39
|
-
if size_new == size_old:
|
|
40
|
-
time.sleep(1) # wait a bit more to ensure download is completed
|
|
41
|
-
return True
|
|
42
|
-
|
|
43
|
-
# Chrome: check for any .crdownload temp file
|
|
44
|
-
if not any(name.endswith(".crdownload") for name in os.listdir(folder)):
|
|
45
|
-
if os.path.exists(file_path):
|
|
46
|
-
time.sleep(1) # wait a bit more to ensure download is completed
|
|
47
|
-
return True
|
|
48
|
-
|
|
49
|
-
time.sleep(0.5)
|
|
50
|
-
raise TimeoutError(f"Download not completed within {timeout} seconds: {file_path}")
|
|
51
|
-
|
|
52
|
-
def get_driver(destination, headless=True, prefer="chrome"):
|
|
53
|
-
"""
|
|
54
|
-
Try to get a Selenium driver. Falls back to Chrome if Firefox is not available.
|
|
55
|
-
|
|
56
|
-
Parameters
|
|
57
|
-
----------
|
|
58
|
-
destination : str
|
|
59
|
-
Download folder for browser.
|
|
60
|
-
headless : bool
|
|
61
|
-
Run browser in headless mode.
|
|
62
|
-
prefer : str
|
|
63
|
-
Preferred browser: "firefox" or "chrome".
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
def make_firefox():
|
|
67
|
-
options = FirefoxOptions()
|
|
68
|
-
if headless: options.add_argument("--headless")
|
|
69
|
-
options.add_argument("--window-size=1920,1080")
|
|
70
|
-
options.set_preference("browser.download.folderList", 2)
|
|
71
|
-
options.set_preference("browser.download.dir", destination)
|
|
72
|
-
return webdriver.Firefox(options=options)
|
|
73
|
-
|
|
74
|
-
def make_chrome():
|
|
75
|
-
options = ChromeOptions()
|
|
76
|
-
if headless: options.add_argument("--headless=new")
|
|
77
|
-
options.add_argument("--window-size=1920,1080")
|
|
78
|
-
prefs = {"download.default_directory": destination,
|
|
79
|
-
"download.prompt_for_download": False,
|
|
80
|
-
"download.directory_upgrade": True,
|
|
81
|
-
"safebrowsing.enabled": True,
|
|
82
|
-
"profile.default_content_setting_values.automatic_downloads": 1}
|
|
83
|
-
options.add_experimental_option("prefs", prefs)
|
|
84
|
-
return webdriver.Chrome(options=options)
|
|
85
|
-
|
|
86
|
-
tried = []
|
|
87
|
-
for choice in ([prefer, "chrome", "firefox"] if prefer == "firefox" else [prefer, "firefox", "chrome"]):
|
|
88
|
-
try:
|
|
89
|
-
if choice == "firefox":
|
|
90
|
-
log.info("Trying Firefox driver...")
|
|
91
|
-
return make_firefox()
|
|
92
|
-
elif choice == "chrome":
|
|
93
|
-
log.info("Trying Chrome driver...")
|
|
94
|
-
return make_chrome()
|
|
95
|
-
except WebDriverException as e:
|
|
96
|
-
log.warning(f"{choice.capitalize()} driver failed: {e}")
|
|
97
|
-
tried.append(choice)
|
|
98
|
-
raise RuntimeError(f"Could not start any browser driver (tried {tried}). Please install Firefox or Chrome.")
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
def safe_click(driver, by, value, description="element"):
|
|
102
|
-
"""
|
|
103
|
-
Safely clicks an element on the page, handling exceptions and logging errors.
|
|
104
|
-
|
|
105
|
-
Parameters:
|
|
106
|
-
-----------
|
|
107
|
-
driver: WebDriver
|
|
108
|
-
The Selenium WebDriver instance.
|
|
109
|
-
by: By
|
|
110
|
-
The method to locate the element (e.g., By.XPATH, By.CSS_SELECTOR).
|
|
111
|
-
value: str
|
|
112
|
-
The value to locate the element.
|
|
113
|
-
description: str
|
|
114
|
-
A description of the element for logging purposes.
|
|
115
|
-
|
|
116
|
-
Notes:
|
|
117
|
-
------
|
|
118
|
-
If you want to debug the click, you can set the headless mode to False in the download_figaro function.
|
|
119
|
-
This will open the browser window and allow you to see what is happening.
|
|
120
|
-
In the browser window, you can right-click on the element and select "Inspect" to see the HTML structure.
|
|
121
|
-
"""
|
|
122
|
-
try:
|
|
123
|
-
elem = driver.find_element(by, value)
|
|
124
|
-
driver.execute_script("arguments[0].click();", elem) # Click with JavaScript to avoid issues with overlays or pop-ups
|
|
125
|
-
log.info(f"Clicked {description}")
|
|
126
|
-
except NoSuchElementException:
|
|
127
|
-
log.error(f"{description} not found: {value}")
|
|
128
|
-
raise RuntimeError(f"{description} not found: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed. "
|
|
129
|
-
"If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
|
|
130
|
-
except ElementClickInterceptedException:
|
|
131
|
-
log.error(f"{description} was obstructed: {value}")
|
|
132
|
-
raise RuntimeError(f"{description} was obstructed: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed."
|
|
133
|
-
"If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
|
|
134
|
-
except Exception as e:
|
|
135
|
-
log.error(f"Error clicking {description}: {e}.")
|
|
136
|
-
raise RuntimeError("The download was aborted. Likely the page structure of the CIRCABC website has changed."
|
|
137
|
-
"If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def download_figaro(year, destination, format = 'industry by industry', sut = False, headless = True):
|
|
142
|
-
"""
|
|
143
|
-
Downloads the specified format of the EU input-output matrix from Figaro.
|
|
144
|
-
|
|
145
|
-
Parameters:
|
|
146
|
-
-----------
|
|
147
|
-
destination: str
|
|
148
|
-
A path to the folder where the downloaded file will be saved.
|
|
149
|
-
year: int
|
|
150
|
-
The year of the data to download.
|
|
151
|
-
format: str, optional
|
|
152
|
-
Either 'industry by industry' or 'product by product'.
|
|
153
|
-
sut: Boolean, optional
|
|
154
|
-
If True, also downloads the supply and use tables, otherwise only the input-output matrix.
|
|
155
|
-
headless: Boolean, optional
|
|
156
|
-
If True, runs the browser in headless mode (no GUI). Default is True.
|
|
157
|
-
"""
|
|
158
|
-
|
|
159
|
-
# Check if year is valid
|
|
160
|
-
if not isinstance(year, int) or year < 2010 or year > 2023:
|
|
161
|
-
raise ValueError("As of August 2025, the Figaro database contains IO tables for the years 2010 to 2023. Please provide a valid year within this range."
|
|
162
|
-
"If you are sure that the year 2024 is already available, please update this check accordingly.")
|
|
163
|
-
|
|
164
|
-
# Check if destination exists
|
|
165
|
-
if not os.path.exists(destination):
|
|
166
|
-
raise FileNotFoundError(f"The destination folder '{destination}' does not exist. Please create it before downloading.")
|
|
167
|
-
|
|
168
|
-
if format == 'industry by industry':
|
|
169
|
-
format_abbr = "ind-by-ind"
|
|
170
|
-
elif format == 'product by product':
|
|
171
|
-
format_abbr = "prod-by-prod"
|
|
172
|
-
else:
|
|
173
|
-
raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
|
|
174
|
-
|
|
175
|
-
# Check if files already exist
|
|
176
|
-
paths = {
|
|
177
|
-
"io_path" : os.path.join(destination, f"matrix_eu-ic-io_{format_abbr}_25ed_{year}.csv"),
|
|
178
|
-
"sup_path" : os.path.join(destination, f"matrix_eu-ic-supply_25ed_{year}.csv"),
|
|
179
|
-
"use_path" : os.path.join(destination, f"matrix_eu-ic-use_25ed_{year}.csv"),
|
|
180
|
-
"excel_path" : os.path.join(destination, f"Description_FIGARO_Tables(25ed).xlsx")
|
|
181
|
-
}
|
|
182
|
-
url = "https://circabc.europa.eu/ui/group/cec66924-a924-4f91-a0ef-600a0531e3ba/library/0d8bab1e-d159-40b9-9aff-ef8e6d58e24e?p=1&n=10&sort=name_ASC"
|
|
183
|
-
|
|
184
|
-
if any(not os.path.exists(p) for p in paths.values()):
|
|
185
|
-
|
|
186
|
-
driver = get_driver(destination, headless=headless, prefer="chrome")
|
|
187
|
-
driver.get(url)
|
|
188
|
-
driver.implicitly_wait(5)
|
|
189
|
-
|
|
190
|
-
if not os.path.exists(paths["excel_path"]):
|
|
191
|
-
log.info("Downloading the description of the Figaro tables")
|
|
192
|
-
# Find and click the Excel file
|
|
193
|
-
safe_click(driver, By.XPATH, "//a[contains(text(), 'Description_FIGARO_Tables(25ed).xlsx')]", "Excel file link")
|
|
194
|
-
|
|
195
|
-
# Find and click the download button
|
|
196
|
-
safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for Excel file")
|
|
197
|
-
|
|
198
|
-
# Wait for the download to complete
|
|
199
|
-
wait_for_download(paths["excel_path"])
|
|
200
|
-
|
|
201
|
-
# Go back to the main page
|
|
202
|
-
driver.get(url)
|
|
203
|
-
else:
|
|
204
|
-
log.info(f"The description of the Figaro tables is already in the folder '{destination}', skipping download")
|
|
205
|
-
|
|
206
|
-
if not os.path.exists(paths["io_path"]):
|
|
207
|
-
log.info(f"Downloading IO table for format '{format_abbr}' and year '{year}'")
|
|
208
|
-
# Find and click the desired format (ixi or pxp)
|
|
209
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), '{format}')]", f"format '{format}' link")
|
|
210
|
-
|
|
211
|
-
# Find and click the CSV matrix format
|
|
212
|
-
safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
|
|
213
|
-
|
|
214
|
-
# Click for the second page if year > 2019
|
|
215
|
-
if year > 2019:
|
|
216
|
-
time.sleep(0.5) # we need an explicit wait here, because the element is found before it is clickable
|
|
217
|
-
safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
|
|
218
|
-
|
|
219
|
-
# Find and click the desired year
|
|
220
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
|
|
221
|
-
|
|
222
|
-
# Find and click the download button
|
|
223
|
-
safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for IO table")
|
|
224
|
-
|
|
225
|
-
# Wait for the download to complete
|
|
226
|
-
wait_for_download(paths["io_path"])
|
|
227
|
-
driver.get(url)
|
|
228
|
-
else:
|
|
229
|
-
log.info(f"The IO tables for format '{format}' and year '{year}' are already in the folder '{destination}', skipping download")
|
|
230
|
-
|
|
231
|
-
if sut == True:
|
|
232
|
-
if not os.path.exists(paths["sup_path"]):
|
|
233
|
-
log.info(f"Downloading supply table for year '{year}'")
|
|
234
|
-
# Find and click the supply table
|
|
235
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), 'Supply tables')]", "Supply tables link")
|
|
236
|
-
|
|
237
|
-
# Find and click the CSV matrix format
|
|
238
|
-
safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
|
|
239
|
-
|
|
240
|
-
# Click for the second page if year > 2019
|
|
241
|
-
if year > 2019:
|
|
242
|
-
time.sleep(0.5)
|
|
243
|
-
safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
|
|
244
|
-
|
|
245
|
-
# Find and click the desired year
|
|
246
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
|
|
247
|
-
|
|
248
|
-
# Find and click the download button
|
|
249
|
-
safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for supply table")
|
|
250
|
-
|
|
251
|
-
wait_for_download(paths["sup_path"])
|
|
252
|
-
driver.get(url)
|
|
253
|
-
else:
|
|
254
|
-
log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
|
|
255
|
-
|
|
256
|
-
if not os.path.exists(paths["use_path"]):
|
|
257
|
-
log.info(f"Downloading supply table for year '{year}'")
|
|
258
|
-
|
|
259
|
-
# Find and click the supply table
|
|
260
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), 'Use tables')]", "Use tables link")
|
|
261
|
-
|
|
262
|
-
# Find and click the CSV matrix format
|
|
263
|
-
safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
|
|
264
|
-
|
|
265
|
-
# Click for the second page if year > 2019
|
|
266
|
-
if year > 2019:
|
|
267
|
-
time.sleep(0.5)
|
|
268
|
-
safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
|
|
269
|
-
|
|
270
|
-
# Find and click the desired year
|
|
271
|
-
safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
|
|
272
|
-
|
|
273
|
-
# Find and click the download button
|
|
274
|
-
safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for use table")
|
|
275
|
-
wait_for_download(paths["use_path"])
|
|
276
|
-
else:
|
|
277
|
-
log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
|
|
278
|
-
driver.quit()
|
|
279
|
-
else:
|
|
280
|
-
log.info(f"The files for format '{format_abbr}' and year '{year}' are already in the folder '{destination}', skipping download")
|
|
@@ -1,187 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Load and convert Figaro MRIO files.
|
|
3
|
-
|
|
4
|
-
Supports Figaro inter industry IO, supply and use tables in csv matrix format
|
|
5
|
-
https://ec.europa.eu/eurostat/web/esa-supply-use-input-tables/database#Input-output%20tables%20industry%20by%20industry
|
|
6
|
-
|
|
7
|
-
The extractor loads the IO table and if available the supply and use tables.
|
|
8
|
-
|
|
9
|
-
@author: wirth
|
|
10
|
-
"""
|
|
11
|
-
|
|
12
|
-
import os
|
|
13
|
-
import logging
|
|
14
|
-
import pandas as pd
|
|
15
|
-
|
|
16
|
-
from mrio_toolbox import MRIO
|
|
17
|
-
from mrio_toolbox.utils.savers._to_nc import save_to_nc
|
|
18
|
-
|
|
19
|
-
log = logging.getLogger(__name__)
|
|
20
|
-
|
|
21
|
-
def extract_figaro(year, source, format = 'industry by industry', sut = "none", edition=25):
|
|
22
|
-
"""
|
|
23
|
-
Extract FIGARO data.
|
|
24
|
-
|
|
25
|
-
Loads FIGARO tables and labels and store them as NetCDF for further use with
|
|
26
|
-
the mrio_toolbox library. Currently the extractor does not support emission
|
|
27
|
-
satellite accounts (I couldn't find them on the figaro website).
|
|
28
|
-
|
|
29
|
-
Put all tables as well as the 'Description_FIGARO_Tables({edition}ed).xlsx' file
|
|
30
|
-
in the same source folder.
|
|
31
|
-
|
|
32
|
-
Parameters
|
|
33
|
-
----------
|
|
34
|
-
year : str
|
|
35
|
-
Data year to load.
|
|
36
|
-
source : path-like
|
|
37
|
-
Path to folder where raw data is stored
|
|
38
|
-
format : str, optional
|
|
39
|
-
Either 'industry by industry' or 'product by product'.
|
|
40
|
-
sut : str, optional
|
|
41
|
-
Supply and use tables to load, by default "none".
|
|
42
|
-
Available options are "none", "supply", "use" or "both".
|
|
43
|
-
edition : int, optional
|
|
44
|
-
Edition of the FIGARO tables, by default 25. The alternative is 24.
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
if format == 'industry by industry':
|
|
48
|
-
format_abbr = "ind-by-ind"
|
|
49
|
-
elif format == 'product by product':
|
|
50
|
-
format_abbr = "prod-by-prod"
|
|
51
|
-
else:
|
|
52
|
-
raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
|
|
53
|
-
|
|
54
|
-
log.info(f"Extracting FIGARO IO table for year {year}, load IO table...")
|
|
55
|
-
raw = pd.read_csv(os.path.join(source, f"matrix_eu-ic-io_{format_abbr}_{edition}ed_{year}.csv"), dtype = str)
|
|
56
|
-
log.info("Loaded IO table")
|
|
57
|
-
|
|
58
|
-
if sut in ["supply", "both"]:
|
|
59
|
-
log.info(f"Check if supply table is available for year {year}...")
|
|
60
|
-
if os.path.isfile(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv")):
|
|
61
|
-
log.info("Supply table found, loading...")
|
|
62
|
-
raw_supply = pd.read_csv(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv"), dtype = str)
|
|
63
|
-
log.info("Loaded supply table")
|
|
64
|
-
|
|
65
|
-
if sut in ["use", "both"]:
|
|
66
|
-
log.info(f"Check if use table is available for year {year}...")
|
|
67
|
-
if os.path.isfile(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv")):
|
|
68
|
-
log.info("Use table found, loading...")
|
|
69
|
-
raw_use = pd.read_csv(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv"), dtype = str)
|
|
70
|
-
log.info("Loaded use table")
|
|
71
|
-
|
|
72
|
-
log.info("Extracting labels...")
|
|
73
|
-
if edition == 24:
|
|
74
|
-
df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=5, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
|
|
75
|
-
elif edition == 25:
|
|
76
|
-
df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=3, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
|
|
77
|
-
else:
|
|
78
|
-
ValueError(f"Edition {edition} not yet supported. Please use edition 24 or 25.")
|
|
79
|
-
|
|
80
|
-
# Countries are not in the correct order in the excel sheet, so get countries from raw data
|
|
81
|
-
column_labs = raw.columns[1:]
|
|
82
|
-
countries = column_labs.str.split("_").str[0]
|
|
83
|
-
countries = list(dict.fromkeys(countries))
|
|
84
|
-
|
|
85
|
-
# Get other labels from excel sheet
|
|
86
|
-
sectors = df["Label.1"].tolist()
|
|
87
|
-
cpa_labs = df["Label"].tolist()
|
|
88
|
-
va_labs = df["Label.2"].dropna().tolist()
|
|
89
|
-
y_labs = df["Label.3"].dropna().tolist()
|
|
90
|
-
|
|
91
|
-
labels = {
|
|
92
|
-
"countries": countries,
|
|
93
|
-
"sectors": sectors,
|
|
94
|
-
"y_labs": y_labs,
|
|
95
|
-
"va_labs": va_labs
|
|
96
|
-
}
|
|
97
|
-
c, s, y, va = len(countries), len(sectors), len(y_labs), len(va_labs)
|
|
98
|
-
if 'raw_supply' in locals() or 'raw_use' in locals():
|
|
99
|
-
labels["cpa_labs"] = cpa_labs
|
|
100
|
-
cpa = len(cpa_labs)
|
|
101
|
-
log.info("Labels extracted")
|
|
102
|
-
|
|
103
|
-
log.info("Extracting parts from raw data...")
|
|
104
|
-
raw = raw.iloc[:, 1:].astype(float).to_numpy()
|
|
105
|
-
|
|
106
|
-
tables = {}
|
|
107
|
-
tables["t"] = raw[:c*s, :c*s]
|
|
108
|
-
tables["y"] = raw[:c*s, c*s:(c*s+c*y)]
|
|
109
|
-
tables["va"] = raw[c*s:(c*s+c*va), :c*s]
|
|
110
|
-
tables["vay"] = raw[c*s:(c*s+c*va), c*s:(c*s+c*y)]
|
|
111
|
-
log.info("Extracted parts from raw data")
|
|
112
|
-
|
|
113
|
-
# Treat supply table if available
|
|
114
|
-
if 'raw_supply' in locals():
|
|
115
|
-
log.info("Extracting supply table...")
|
|
116
|
-
raw_supply = raw_supply.iloc[:, 1:].astype(float).to_numpy()
|
|
117
|
-
tables["sup"] = raw_supply[:c*cpa, :c*s]
|
|
118
|
-
log.info("Extracted supply table")
|
|
119
|
-
else:
|
|
120
|
-
log.info("No supply table found, skipping...")
|
|
121
|
-
|
|
122
|
-
# Treat use table if available
|
|
123
|
-
if 'raw_use' in locals():
|
|
124
|
-
log.info("Extracting use table...")
|
|
125
|
-
raw_use = raw_use.iloc[:, 1:].astype(float).to_numpy()
|
|
126
|
-
tables["use_t"] = raw_use[:c*cpa, :c*s]
|
|
127
|
-
tables["use_y"] = raw_use[:c*cpa, c*s:c*s + c*y]
|
|
128
|
-
tables["use_va"] = raw_use[c*cpa:c*cpa+c*va, :c*s]
|
|
129
|
-
tables["use_vay"] = raw_use[c*cpa:(c*cpa+c*va), c*s:(c*s+c*y)]
|
|
130
|
-
log.info("Extracted use table")
|
|
131
|
-
else:
|
|
132
|
-
log.info("No use table found, skipping...")
|
|
133
|
-
|
|
134
|
-
# Assemble mrio object
|
|
135
|
-
log.info("Building MRIO object...")
|
|
136
|
-
m = MRIO()
|
|
137
|
-
m.add_dimensions(labels)
|
|
138
|
-
log.info("Building MRIO objects from parts containing labels and tables...")
|
|
139
|
-
m.parts["t"] = m.new_part(name="t",
|
|
140
|
-
data= tables["t"],
|
|
141
|
-
dimensions = [["countries","sectors"],["countries", "sectors"]])
|
|
142
|
-
log.info("t part added")
|
|
143
|
-
m.parts["y"] = m.new_part(name="y",
|
|
144
|
-
data= tables["y"],
|
|
145
|
-
dimensions = [["countries","sectors"],["countries", "y_labs"]])
|
|
146
|
-
log.info("y part added")
|
|
147
|
-
m.parts["va"] = m.new_part(name="va",
|
|
148
|
-
data= tables["va"],
|
|
149
|
-
dimensions = ["va_labs",["countries", "sectors"]])
|
|
150
|
-
log.info("va part added")
|
|
151
|
-
m.parts["vay"] = m.new_part(name="vay",
|
|
152
|
-
data= tables["vay"],
|
|
153
|
-
dimensions = ["va_labs",["countries", "y_labs"]])
|
|
154
|
-
log.info("vay part added")
|
|
155
|
-
if 'sup' in tables:
|
|
156
|
-
m.parts["sup"] = m.new_part(name="sup",
|
|
157
|
-
data= tables["sup"],
|
|
158
|
-
dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
|
|
159
|
-
log.info("sup part added")
|
|
160
|
-
if 'use_t' in tables:
|
|
161
|
-
m.parts["use_t"] = m.new_part(name="use_t",
|
|
162
|
-
data= tables["use_t"],
|
|
163
|
-
dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
|
|
164
|
-
log.info("use_t part added")
|
|
165
|
-
m.parts["use_y"] = m.new_part(name="use_y",
|
|
166
|
-
data= tables["use_y"],
|
|
167
|
-
dimensions = [["countries","cpa_labs"],["countries", "y_labs"]])
|
|
168
|
-
log.info("use_y part added")
|
|
169
|
-
m.parts["use_va"] = m.new_part(name="use_va",
|
|
170
|
-
data= tables["use_va"],
|
|
171
|
-
dimensions = ["va_labs",["countries", "sectors"]])
|
|
172
|
-
log.info("use_va part added")
|
|
173
|
-
m.parts["use_vay"] = m.new_part(name="use_vay",
|
|
174
|
-
data= tables["use_vay"],
|
|
175
|
-
dimensions = ["va_labs",["countries", "y_labs"]])
|
|
176
|
-
log.info("use_vay part added")
|
|
177
|
-
log.info("MRIO object built")
|
|
178
|
-
|
|
179
|
-
# Add metadata
|
|
180
|
-
log.info("Adding metadata to MRIO object...")
|
|
181
|
-
m.metadata["table"] = "figaro"
|
|
182
|
-
m.metadata["edition"] = edition
|
|
183
|
-
m.metadata["year"] = year
|
|
184
|
-
m.metadata["format"] = format
|
|
185
|
-
m.metadata["sut"] = sut
|
|
186
|
-
m.name = f"figaro_{year}_{format}"
|
|
187
|
-
return m
|