PyPI - mrio-toolbox - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl - Mend

mrio-toolbox 1.0.0py3-none-any.whl → 1.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mrio-toolbox might be problematic. Click here for more details.

Files changed (59) hide show

mrio_toolbox/__init__.py +18 -2
mrio_toolbox/_parts/_Axe.py +95 -37
mrio_toolbox/_parts/_Part.py +264 -70
mrio_toolbox/_parts/__init__.py +4 -0
mrio_toolbox/_parts/part_operations.py +24 -17
mrio_toolbox/extractors/__init__.py +20 -0
mrio_toolbox/extractors/downloaders.py +36 -0
mrio_toolbox/extractors/emerging/__init__.py +3 -0
mrio_toolbox/extractors/emerging/emerging_extractor.py +117 -0
mrio_toolbox/extractors/eora/__init__.py +3 -0
mrio_toolbox/extractors/eora/eora_extractor.py +132 -0
mrio_toolbox/extractors/exiobase/__init__.py +3 -0
mrio_toolbox/extractors/exiobase/exiobase_extractor.py +270 -0
mrio_toolbox/extractors/extractors.py +79 -0
mrio_toolbox/extractors/figaro/__init__.py +3 -0
mrio_toolbox/extractors/figaro/figaro_downloader.py +280 -0
mrio_toolbox/extractors/figaro/figaro_extractor.py +187 -0
mrio_toolbox/extractors/gloria/__init__.py +3 -0
mrio_toolbox/extractors/gloria/gloria_extractor.py +202 -0
mrio_toolbox/extractors/gtap11/__init__.py +7 -0
mrio_toolbox/extractors/gtap11/extraction/__init__.py +3 -0
mrio_toolbox/extractors/gtap11/extraction/extractor.py +129 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/__init__.py +6 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/_header_sets.py +279 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file.py +262 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/har_file_io.py +974 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/header_array.py +300 -0
mrio_toolbox/extractors/gtap11/extraction/harpy_files/sl4.py +229 -0
mrio_toolbox/extractors/gtap11/gtap_mrio/__init__.py +6 -0
mrio_toolbox/extractors/gtap11/gtap_mrio/mrio_builder.py +158 -0
mrio_toolbox/extractors/icio/__init__.py +3 -0
mrio_toolbox/extractors/icio/icio_extractor.py +121 -0
mrio_toolbox/extractors/wiod/__init__.py +3 -0
mrio_toolbox/extractors/wiod/wiod_extractor.py +143 -0
mrio_toolbox/mrio.py +254 -94
mrio_toolbox/msm/__init__.py +6 -0
mrio_toolbox/msm/multi_scale_mapping.py +863 -0
mrio_toolbox/utils/__init__.py +3 -0
mrio_toolbox/utils/converters/__init__.py +3 -0
mrio_toolbox/utils/converters/pandas.py +8 -6
mrio_toolbox/utils/converters/xarray.py +2 -13
mrio_toolbox/utils/formatting/__init__.py +0 -0
mrio_toolbox/utils/formatting/formatter.py +528 -0
mrio_toolbox/utils/loaders/__init__.py +4 -0
mrio_toolbox/utils/loaders/_loader.py +60 -4
mrio_toolbox/utils/loaders/_loader_factory.py +22 -1
mrio_toolbox/utils/loaders/_nc_loader.py +37 -1
mrio_toolbox/utils/loaders/_pandas_loader.py +29 -3
mrio_toolbox/utils/loaders/_parameter_loader.py +61 -16
mrio_toolbox/utils/savers/__init__.py +3 -0
mrio_toolbox/utils/savers/_path_checker.py +25 -7
mrio_toolbox/utils/savers/_to_folder.py +6 -1
mrio_toolbox/utils/savers/_to_nc.py +26 -18
{mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/METADATA +10 -6
mrio_toolbox-1.1.1.dist-info/RECORD +59 -0
{mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/WHEEL +1 -1
mrio_toolbox-1.0.0.dist-info/RECORD +0 -26
{mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info/licenses}/LICENSE +0 -0
{mrio_toolbox-1.0.0.dist-info → mrio_toolbox-1.1.1.dist-info}/top_level.txt +0 -0

mrio_toolbox/extractors/figaro/figaro_downloader.py ADDED Viewed

@@ -0,0 +1,280 @@
+"""
+Download Figaro 25ed from the CIRCABC website.
+@author: wirth
+"""
+from selenium import webdriver
+from selenium.webdriver.common.by import By
+from selenium.webdriver.firefox.options import Options as FirefoxOptions
+from selenium.webdriver.chrome.options import Options as ChromeOptions
+from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException, WebDriverException
+import time
+import os
+import logging
+log = logging.getLogger(__name__)
+def wait_for_download(file_path, timeout=60):
+    """
+    Wait until the given file has been fully downloaded.
+    Parameters
+    ----------
+    file_path : str
+        The path to the file that is being downloaded.
+    timeout : int
+        Maximum time to wait for the download to complete, in seconds.
+    """
+    folder = os.path.dirname(file_path)
+    end_time = time.time() + timeout
+    while time.time() < end_time:
+        # Firefox: file exists, size stops changing
+        if os.path.exists(file_path):
+            size_old = os.path.getsize(file_path)
+            time.sleep(1)
+            size_new = os.path.getsize(file_path)
+            if size_new == size_old:
+                time.sleep(1)  # wait a bit more to ensure download is completed
+                return True
+        # Chrome: check for any .crdownload temp file
+        if not any(name.endswith(".crdownload") for name in os.listdir(folder)):
+            if os.path.exists(file_path):
+                time.sleep(1)  # wait a bit more to ensure download is completed
+                return True
+        time.sleep(0.5)
+    raise TimeoutError(f"Download not completed within {timeout} seconds: {file_path}")
+def get_driver(destination, headless=True, prefer="chrome"):
+    """
+    Try to get a Selenium driver. Falls back to Chrome if Firefox is not available.
+    Parameters
+    ----------
+    destination : str
+        Download folder for browser.
+    headless : bool
+        Run browser in headless mode.
+    prefer : str
+        Preferred browser: "firefox" or "chrome".
+    """
+    def make_firefox():
+        options = FirefoxOptions()
+        if headless: options.add_argument("--headless")
+        options.add_argument("--window-size=1920,1080")
+        options.set_preference("browser.download.folderList", 2)
+        options.set_preference("browser.download.dir", destination)
+        return webdriver.Firefox(options=options)
+    def make_chrome():
+        options = ChromeOptions()
+        if headless: options.add_argument("--headless=new")
+        options.add_argument("--window-size=1920,1080")
+        prefs = {"download.default_directory": destination,
+                 "download.prompt_for_download": False,
+                 "download.directory_upgrade": True,
+                 "safebrowsing.enabled": True,
+                 "profile.default_content_setting_values.automatic_downloads": 1}
+        options.add_experimental_option("prefs", prefs)
+        return webdriver.Chrome(options=options)
+    tried = []
+    for choice in ([prefer, "chrome", "firefox"] if prefer == "firefox" else [prefer, "firefox", "chrome"]):
+        try:
+            if choice == "firefox":
+                log.info("Trying Firefox driver...")
+                return make_firefox()
+            elif choice == "chrome":
+                log.info("Trying Chrome driver...")
+                return make_chrome()
+        except WebDriverException as e:
+            log.warning(f"{choice.capitalize()} driver failed: {e}")
+            tried.append(choice)
+    raise RuntimeError(f"Could not start any browser driver (tried {tried}). Please install Firefox or Chrome.")
+def safe_click(driver, by, value, description="element"):
+    """
+    Safely clicks an element on the page, handling exceptions and logging errors.
+    Parameters:
+    -----------
+    driver: WebDriver
+        The Selenium WebDriver instance.
+    by: By
+        The method to locate the element (e.g., By.XPATH, By.CSS_SELECTOR).
+    value: str
+        The value to locate the element.
+    description: str
+        A description of the element for logging purposes.
+    Notes:
+    ------
+    If you want to debug the click, you can set the headless mode to False in the download_figaro function.
+    This will open the browser window and allow you to see what is happening.
+    In the browser window, you can right-click on the element and select "Inspect" to see the HTML structure.
+    """
+    try:
+        elem = driver.find_element(by, value)
+        driver.execute_script("arguments[0].click();", elem)  # Click with JavaScript to avoid issues with overlays or pop-ups
+        log.info(f"Clicked {description}")
+    except NoSuchElementException:
+        log.error(f"{description} not found: {value}")
+        raise RuntimeError(f"{description} not found: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed. "
+                           "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
+    except ElementClickInterceptedException:
+        log.error(f"{description} was obstructed: {value}")
+        raise RuntimeError(f"{description} was obstructed: {value}. The download was aborted. Likely the page structure of the CIRCABC website has changed."
+                           "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
+    except Exception as e:
+        log.error(f"Error clicking {description}: {e}.")
+        raise RuntimeError("The download was aborted. Likely the page structure of the CIRCABC website has changed."
+                           "If you are a developer, try to debug without headless mode. If you are a user, you may want to download the figaro tables manually.")
+def download_figaro(year, destination, format = 'industry by industry', sut = False, headless = True):
+    """
+    Downloads the specified format of the EU input-output matrix from Figaro.
+    Parameters:
+    -----------
+    destination: str
+        A path to the folder where the downloaded file will be saved.
+    year: int
+        The year of the data to download.
+    format: str, optional
+        Either 'industry by industry' or 'product by product'.
+    sut: Boolean, optional
+        If True, also downloads the supply and use tables, otherwise only the input-output matrix.
+    headless: Boolean, optional
+        If True, runs the browser in headless mode (no GUI). Default is True.
+    """
+    # Check if year is valid
+    if not isinstance(year, int) or year < 2010 or year > 2023:
+        raise ValueError("As of August 2025, the Figaro database contains IO tables for the years 2010 to 2023. Please provide a valid year within this range."
+                         "If you are sure that the year 2024 is already available, please update this check accordingly.")
+    # Check if destination exists
+    if not os.path.exists(destination):
+        raise FileNotFoundError(f"The destination folder '{destination}' does not exist. Please create it before downloading.")
+    if format == 'industry by industry':
+        format_abbr = "ind-by-ind"
+    elif format == 'product by product':
+        format_abbr = "prod-by-prod"
+    else:
+        raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
+    # Check if files already exist
+    paths = {
+        "io_path" : os.path.join(destination, f"matrix_eu-ic-io_{format_abbr}_25ed_{year}.csv"),
+        "sup_path" : os.path.join(destination, f"matrix_eu-ic-supply_25ed_{year}.csv"),
+        "use_path" : os.path.join(destination, f"matrix_eu-ic-use_25ed_{year}.csv"),
+        "excel_path" : os.path.join(destination, f"Description_FIGARO_Tables(25ed).xlsx")
+    }
+    url = "https://circabc.europa.eu/ui/group/cec66924-a924-4f91-a0ef-600a0531e3ba/library/0d8bab1e-d159-40b9-9aff-ef8e6d58e24e?p=1&n=10&sort=name_ASC"
+    if any(not os.path.exists(p) for p in paths.values()):
+        driver = get_driver(destination, headless=headless, prefer="chrome")
+        driver.get(url)
+        driver.implicitly_wait(5)
+        if not os.path.exists(paths["excel_path"]):
+            log.info("Downloading the description of the Figaro tables")
+            # Find and click the Excel file
+            safe_click(driver, By.XPATH, "//a[contains(text(), 'Description_FIGARO_Tables(25ed).xlsx')]", "Excel file link")
+            # Find and click the download button
+            safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for Excel file")
+            # Wait for the download to complete
+            wait_for_download(paths["excel_path"])
+            # Go back to the main page
+            driver.get(url)
+        else:
+            log.info(f"The description of the Figaro tables is already in the folder '{destination}', skipping download")
+        if not os.path.exists(paths["io_path"]):
+            log.info(f"Downloading IO table for format '{format_abbr}' and year '{year}'")
+            # Find and click the desired format (ixi or pxp)
+            safe_click(driver, By.XPATH, f"//a[contains(text(), '{format}')]", f"format '{format}' link")
+            # Find and click the CSV matrix format
+            safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
+            # Click for the second page if year > 2019
+            if year > 2019:
+                time.sleep(0.5) # we need an explicit wait here, because the element is found before it is clickable
+                safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
+            # Find and click the desired year
+            safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
+            # Find and click the download button
+            safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for IO table")
+            # Wait for the download to complete
+            wait_for_download(paths["io_path"])
+            driver.get(url)
+        else:
+            log.info(f"The IO tables for format '{format}' and year '{year}' are already in the folder '{destination}', skipping download")
+        if sut == True:
+            if not os.path.exists(paths["sup_path"]):
+                log.info(f"Downloading supply table for year '{year}'")
+                # Find and click the supply table
+                safe_click(driver, By.XPATH, f"//a[contains(text(), 'Supply tables')]", "Supply tables link")
+                # Find and click the CSV matrix format
+                safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
+                # Click for the second page if year > 2019
+                if year > 2019:
+                    time.sleep(0.5)
+                    safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
+                # Find and click the desired year
+                safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
+                # Find and click the download button
+                safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for supply table")
+                wait_for_download(paths["sup_path"])
+                driver.get(url)
+            else:
+                log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
+            if not os.path.exists(paths["use_path"]):
+                log.info(f"Downloading supply table for year '{year}'")
+                # Find and click the supply table
+                safe_click(driver, By.XPATH, f"//a[contains(text(), 'Use tables')]", "Use tables link")
+                # Find and click the CSV matrix format
+                safe_click(driver, By.XPATH, "//a[contains(text(), 'CSV matrix format')]", "CSV matrix format link")
+                # Click for the second page if year > 2019
+                if year > 2019:
+                    time.sleep(0.5)
+                    safe_click(driver, By.CLASS_NAME, "next-page", "Next page button")
+                # Find and click the desired year
+                safe_click(driver, By.XPATH, f"//a[contains(text(), '_{year}.csv')]", f"Year '{year}' link")
+                # Find and click the download button
+                safe_click(driver, By.CSS_SELECTOR, ".download", "Download button for use table")
+                wait_for_download(paths["use_path"])
+            else:
+                log.info(f"The use tables for year '{year}' are already in the folder '{destination}', skipping download")
+        driver.quit()
+    else:
+        log.info(f"The files for format '{format_abbr}' and year '{year}' are already in the folder '{destination}', skipping download")

mrio_toolbox/extractors/figaro/figaro_extractor.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+Load and convert Figaro MRIO files.
+Supports Figaro inter industry IO, supply and use tables in csv matrix format
+https://ec.europa.eu/eurostat/web/esa-supply-use-input-tables/database#Input-output%20tables%20industry%20by%20industry
+The extractor loads the IO table and if available the supply and use tables.
+@author: wirth
+"""
+import os
+import logging
+import pandas as pd
+from mrio_toolbox import MRIO
+from mrio_toolbox.utils.savers._to_nc import save_to_nc
+log = logging.getLogger(__name__)
+def extract_figaro(year, source, format = 'industry by industry', sut = "none", edition=25):
+    """
+    Extract FIGARO data.
+    Loads FIGARO tables and labels and store them as NetCDF for further use with
+    the mrio_toolbox library. Currently the extractor does not support emission
+    satellite accounts (I couldn't find them on the figaro website).
+    Put all tables  as well as the 'Description_FIGARO_Tables({edition}ed).xlsx' file
+    in the same source folder.
+    Parameters
+    ----------
+    year : str
+        Data year to load.
+    source : path-like
+        Path to folder where raw data is stored
+    format : str, optional
+       Either 'industry by industry' or 'product by product'.
+    sut : str, optional
+        Supply and use tables to load, by default "none".
+        Available options are "none", "supply", "use" or "both".
+    edition : int, optional
+        Edition of the FIGARO tables, by default 25. The alternative is 24.
+    """
+    if format == 'industry by industry':
+        format_abbr = "ind-by-ind"
+    elif format == 'product by product':
+        format_abbr = "prod-by-prod"
+    else:
+        raise ValueError("The 'format' parameter must be either 'industry by industry' or 'product by product'.")
+    log.info(f"Extracting FIGARO IO table for year {year}, load IO table...")
+    raw = pd.read_csv(os.path.join(source, f"matrix_eu-ic-io_{format_abbr}_{edition}ed_{year}.csv"), dtype = str)
+    log.info("Loaded IO table")
+    if sut in ["supply", "both"]:
+        log.info(f"Check if supply table is available for year {year}...")
+        if os.path.isfile(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv")):
+            log.info("Supply table found, loading...")
+            raw_supply = pd.read_csv(os.path.join(source, f"matrix_eu-ic-supply_{edition}ed_{year}.csv"), dtype = str)
+            log.info("Loaded supply table")
+    if sut in ["use", "both"]:
+        log.info(f"Check if use table is available for year {year}...")
+        if os.path.isfile(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv")):
+            log.info("Use table found, loading...")
+            raw_use = pd.read_csv(os.path.join(source, f"matrix_eu-ic-use_{edition}ed_{year}.csv"), dtype = str)
+            log.info("Loaded use table")
+    log.info("Extracting labels...")
+    if edition == 24:
+        df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=5, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
+    elif edition == 25:
+        df = pd.read_excel(os.path.join(source, f"Description_FIGARO_Tables({edition}ed).xlsx"), header=3, sheet_name = "Prod, Ind & Accounting items").dropna(axis=1, how='all')
+    else:
+        ValueError(f"Edition {edition} not yet supported. Please use edition 24 or 25.")
+    # Countries are not in the correct order in the excel sheet, so get countries from raw data
+    column_labs = raw.columns[1:]
+    countries = column_labs.str.split("_").str[0]
+    countries = list(dict.fromkeys(countries))
+    # Get other labels from excel sheet
+    sectors = df["Label.1"].tolist()
+    cpa_labs = df["Label"].tolist()
+    va_labs = df["Label.2"].dropna().tolist()
+    y_labs = df["Label.3"].dropna().tolist()
+    labels = {
+        "countries": countries,
+        "sectors": sectors,
+        "y_labs": y_labs,
+        "va_labs": va_labs
+    }
+    c, s, y, va  = len(countries), len(sectors), len(y_labs), len(va_labs)
+    if 'raw_supply' in locals() or 'raw_use' in locals():
+        labels["cpa_labs"] = cpa_labs
+        cpa =  len(cpa_labs)
+    log.info("Labels extracted")
+    log.info("Extracting parts from raw data...")
+    raw = raw.iloc[:, 1:].astype(float).to_numpy()
+    tables = {}
+    tables["t"] = raw[:c*s, :c*s]
+    tables["y"] = raw[:c*s, c*s:(c*s+c*y)]
+    tables["va"] = raw[c*s:(c*s+c*va), :c*s]
+    tables["vay"] = raw[c*s:(c*s+c*va), c*s:(c*s+c*y)]
+    log.info("Extracted parts from raw data")
+    # Treat supply table if available
+    if 'raw_supply' in locals():
+        log.info("Extracting supply table...")
+        raw_supply = raw_supply.iloc[:, 1:].astype(float).to_numpy()
+        tables["sup"] = raw_supply[:c*cpa, :c*s]
+        log.info("Extracted supply table")
+    else:
+        log.info("No supply table found, skipping...")
+    # Treat use table if available
+    if 'raw_use' in locals():
+        log.info("Extracting use table...")
+        raw_use = raw_use.iloc[:, 1:].astype(float).to_numpy()
+        tables["use_t"] = raw_use[:c*cpa, :c*s]
+        tables["use_y"] = raw_use[:c*cpa, c*s:c*s + c*y]
+        tables["use_va"] = raw_use[c*cpa:c*cpa+c*va, :c*s]
+        tables["use_vay"] = raw_use[c*cpa:(c*cpa+c*va), c*s:(c*s+c*y)]
+        log.info("Extracted use table")
+    else:
+        log.info("No use table found, skipping...")
+    # Assemble mrio object
+    log.info("Building MRIO object...")
+    m = MRIO()
+    m.add_dimensions(labels)
+    log.info("Building MRIO objects from parts containing labels and tables...")
+    m.parts["t"] = m.new_part(name="t",
+        data= tables["t"],
+        dimensions = [["countries","sectors"],["countries", "sectors"]])
+    log.info("t part added")
+    m.parts["y"] = m.new_part(name="y",
+        data= tables["y"],
+        dimensions = [["countries","sectors"],["countries", "y_labs"]])
+    log.info("y part added")
+    m.parts["va"] = m.new_part(name="va",
+        data= tables["va"],
+        dimensions = ["va_labs",["countries", "sectors"]])
+    log.info("va part added")
+    m.parts["vay"] = m.new_part(name="vay",
+        data= tables["vay"],
+        dimensions = ["va_labs",["countries", "y_labs"]])
+    log.info("vay part added")
+    if 'sup' in tables:
+        m.parts["sup"] = m.new_part(name="sup",
+            data= tables["sup"],
+            dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
+        log.info("sup part added")
+    if 'use_t' in tables:
+        m.parts["use_t"] = m.new_part(name="use_t",
+            data= tables["use_t"],
+            dimensions = [["countries","cpa_labs"],["countries", "sectors"]])
+        log.info("use_t part added")
+        m.parts["use_y"] = m.new_part(name="use_y",
+            data= tables["use_y"],
+            dimensions = [["countries","cpa_labs"],["countries", "y_labs"]])
+        log.info("use_y part added")
+        m.parts["use_va"] = m.new_part(name="use_va",
+            data= tables["use_va"],
+            dimensions = ["va_labs",["countries", "sectors"]])
+        log.info("use_va part added")
+        m.parts["use_vay"] = m.new_part(name="use_vay",
+            data= tables["use_vay"],
+            dimensions = ["va_labs",["countries", "y_labs"]])
+        log.info("use_vay part added")
+    log.info("MRIO object built")
+    # Add metadata
+    log.info("Adding metadata to MRIO object...")
+    m.metadata["table"] = "figaro"
+    m.metadata["edition"] = edition
+    m.metadata["year"] = year
+    m.metadata["format"] = format
+    m.metadata["sut"] = sut
+    m.name = f"figaro_{year}_{format}"
+    return m

mrio_toolbox/extractors/gloria/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""
+This module contains the gloria extractor
+"""

mrio_toolbox/extractors/gloria/gloria_extractor.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""
+Extractor for GLORIA data.
+This extractor loads GLORIA raw data files and converts them to NetCDF
+files.
+Supports GLORIA version 059
+https://ielab.info/labs/ielab-gloria
+Created on Fr Dez 20, 2024
+@author: wirth, based on code of beaufils
+"""
+import os
+import logging
+import numpy as np
+import pandas as pd
+from mrio_toolbox import MRIO
+from mrio_toolbox.utils.savers._to_nc import save_to_nc
+log = logging.getLogger(__name__)
+def extract_gloria(
+    year,
+    source,
+    markup = 1,
+    parts = "all",
+    precision=32):
+    """
+    Extract GLORIA data.
+    Loads GLORIA tables and labels and store them as NetCDF for further use with
+    the mrio_toolbox library. Currrently, this extractor supports loading T, Y,
+    VA, Q, and QY tables.
+    Put all tables (including emission satellite accounts) as well as the
+    'GLORIA_ReadMe_059a.xlsx' file in the same source folder.
+    Parameters
+    ----------
+    year : str
+        Data year to load.
+    parts : str
+        Data blocks to load:
+            basic : T, Y
+            all : T, Y, VA, Q, QY
+    markup : int
+        Version of prices to load. Available versions:
+            1 : basic prices
+            2 : trade margins
+            3 : transport margins
+            4 : taxes on products
+            5 : subsidies on products
+    source : path-like
+        Path to folder where raw data is stored
+    precision : int
+        Floating point precision in bits. Default is 32.
+        This introduces some rounding error for large numbers.
+    """
+    #Check source path
+    source = source + f"/GLORIA_MRIOs_59_{year}"
+    if not os.path.exists(source):
+        log.error(f"{os.path.abspath(source)} does not exist.")
+        raise NotADirectoryError(f"{os.path.abspath(source)} does not exist.")
+    # Gloria comes with 164 regions (160 countries + rest of americas,
+    # rest of europe, rest of africa, rest of asia-pacific) and 120 sectors.
+    if parts == "all":
+        parts = ["T","Y","V","TQ","YQ"]
+    elif parts == "basic":
+        parts = ["T","Y", "V"]
+    tables = {}
+    if precision == 32:
+        log.info("Data precision is 32 bits")
+        dt = np.float32
+    elif precision == 64:
+        log.info("Data precision is 64 bits")
+        dt = np.float64
+    log.info("Loading Gloria labels...")
+    labels = {}
+    countries = pd.read_excel(
+        io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
+        sheet_name = "Regions")
+    countries = countries["Region_acronyms"].tolist()
+    sectors = pd.read_excel(
+        io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
+        sheet_name = "Sectors")
+    sectors = sectors["Sector_names"].tolist()
+    va_and_y_labs = pd.read_excel(
+        io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
+        sheet_name = "Value added and final demand")
+    va_labs= va_and_y_labs["Value_added_names"].tolist()
+    y_labs = va_and_y_labs["Final_demand_names"].tolist()
+    q_labs = pd.read_excel(
+        io = os.path.join(source, "GLORIA_ReadMe_059a.xlsx"),
+        sheet_name = "Satellites")
+    q_labs["combined"] = q_labs["Sat_head_indicator"] + " - " + q_labs["Sat_indicator"] + " - " + q_labs["Sat_unit"]
+    q_labs = q_labs["combined"].tolist()
+    labels["countries"] = countries
+    labels["sectors"] = sectors
+    labels["y_labs"] = y_labs
+    labels["q_labs"] = q_labs
+    labels["va_labs"] = va_labs
+    log.info("Loaded Gloria labels")
+    log.info("Loading Gloria tables, this can take a while...")
+    for part in parts:
+        if part == "T" or part == "Y":
+            path = os.path.join(source, f'20240111_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup00{markup}(full).csv')
+        elif part == "V":
+            path = os.path.join(source, f'20240419_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup001(full).csv')
+        elif part == "TQ" or part == "YQ":
+            path = os.path.join(source, f'20240417_120secMother_AllCountries_002_{part}-Results_{year}_059_Markup00{markup}(full).csv')
+        log.info(f"Loading {part} table...")
+        tables[part] = load_and_transform_to_IO_structure(path, part, dt)
+        log.info(f"Loaded {part} table")
+    # build an MRIO object from labels and tables
+    m = MRIO()
+    m.add_dimensions(labels)
+    m.parts["T"] = m.new_part(name="t",
+        data= tables["T"],
+        dimensions = [["countries","sectors"],["countries", "sectors"]])
+    log.info("Added T table")
+    m.parts["Y"] = m.new_part(name="y",
+        data= tables["Y"],
+        dimensions = [["countries","sectors"],["countries", "y_labs"]])
+    log.info("Added Y table")
+    m.parts["VA"] = m.new_part(name="va",
+        data= tables["V"],
+        dimensions = ["va_labs",["countries","sectors"]])
+    log.info("Added VA table")
+    if parts == "all":
+        m.parts["Q"] = m.new_part(name="q",
+            data= tables["TQ"],
+            dimensions = ["q_labs",["countries","sectors"]])
+        log.info("Added Q table")
+        m.parts["QY"] = m.new_part(name="qy",
+            data= tables["YQ"],
+            dimensions = ["q_labs",["countries","y_labs"]])
+        log.info("Added QY table")
+    m.name = f"gloria_{year}_markup00{markup}"
+    return m
+def load_and_transform_to_IO_structure(path, part, dt):
+    c = 164 # number of countries
+    s = 120 # number of sectors
+    table = np.loadtxt(path, dtype=dt, delimiter=',')
+    rows = np.arange(table.shape[0])
+    columns = np.arange(table.shape[1])
+    if part == "T":
+        selected_rows = (rows // s) % 2 == 1 # Starts with 120 off, then 120 on
+        selected_columns = (columns // s) % 2 == 0 # starts with 120 on, then 120 off
+    elif part == "Y":
+        selected_rows = (rows // s) % 2 == 1
+        selected_columns = columns
+    elif part == "V":
+        selected_rows = rows
+        selected_columns = (columns // s) % 2 == 0
+    elif part == "TQ":
+        selected_rows = rows
+        selected_columns = (columns // s) % 2 == 0
+    elif part == "YQ":
+        selected_rows = rows
+        selected_columns = columns
+    table = table[selected_rows][:, selected_columns]
+    if part == "V":
+        # Stack the entries to transform the pseudo-diagonalized 984x19680 shape into a 6x19680 shape
+        block_height = 6
+        block_width = 120
+        blocks = []
+        for i in range (0, int(table.shape[0]/block_height)):
+            block = table[i*block_height:(i+1)*block_height,
+                           i*block_width:(i+1)*block_width]
+            blocks.append(block)
+        table = np.hstack(blocks)
+    return table

mrio_toolbox/extractors/gtap11/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+This module contains the extractor for raw GTAP 11 data and the IO builder to transform it into an MRIO object.
+"""
+from .extraction.extractor import extract_gtap11
+from .gtap_mrio import build_io
+__all__ = ["extract_gtap11","build_io"]

mrio-toolbox 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

Potentially problematic release.

mrio-toolbox 1.0.0py3-none-any.whl → 1.1.1py3-none-any.whl