PyPI - mrio-toolbox - Versions diffs - 1.0.0__py3-none-any.whl - Mend

mrio-toolbox 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mrio-toolbox might be problematic. Click here for more details.

Files changed (26) hide show

mrio_toolbox/__init__.py +5 -0
mrio_toolbox/_parts/_Axe.py +481 -0
mrio_toolbox/_parts/_Part.py +1504 -0
mrio_toolbox/_parts/__init__.py +3 -0
mrio_toolbox/_parts/part_operations.py +50 -0
mrio_toolbox/mrio.py +739 -0
mrio_toolbox/utils/__init__.py +0 -0
mrio_toolbox/utils/converters/__init__.py +2 -0
mrio_toolbox/utils/converters/pandas.py +245 -0
mrio_toolbox/utils/converters/xarray.py +141 -0
mrio_toolbox/utils/loaders/__init__.py +3 -0
mrio_toolbox/utils/loaders/_loader.py +256 -0
mrio_toolbox/utils/loaders/_loader_factory.py +75 -0
mrio_toolbox/utils/loaders/_nc_loader.py +148 -0
mrio_toolbox/utils/loaders/_np_loader.py +112 -0
mrio_toolbox/utils/loaders/_pandas_loader.py +102 -0
mrio_toolbox/utils/loaders/_parameter_loader.py +341 -0
mrio_toolbox/utils/savers/__init__.py +8 -0
mrio_toolbox/utils/savers/_path_checker.py +19 -0
mrio_toolbox/utils/savers/_to_folder.py +160 -0
mrio_toolbox/utils/savers/_to_nc.py +52 -0
mrio_toolbox-1.0.0.dist-info/LICENSE +674 -0
mrio_toolbox-1.0.0.dist-info/METADATA +28 -0
mrio_toolbox-1.0.0.dist-info/RECORD +26 -0
mrio_toolbox-1.0.0.dist-info/WHEEL +5 -0
mrio_toolbox-1.0.0.dist-info/top_level.txt +1 -0

mrio_toolbox/utils/__init__.py ADDED Viewed

File without changes

mrio_toolbox/utils/converters/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from . import xarray
2	+ from . import pandas

mrio_toolbox/utils/converters/pandas.py ADDED Viewed

@@ -0,0 +1,245 @@
+"""
+Routines for converting between Pandas DataFrames and Parts objects.
+"""
+import pandas as pd
+import numpy as np
+def to_pandas(part):
+    """Return the current Part object as a Pandas DataFrame
+    Only applicable to Parts objects with 1 or 2 dimensions.
+    """
+    if part.ndim>2:
+        raise ValueError(f"Cannot convert a Part with {part.ndim} dimensions to DataFrame.")
+    elif part.ndim==2:
+        return pd.DataFrame(part.data,
+                            index = part.axes[0].label(True),
+                            columns = part.axes[1].label(True))
+    else:
+        return pd.DataFrame(part.data,index = part.axes[0].label(True)
+                                )
+def make_part(df,name="from_df",
+                label_detection=False,
+                **kwargs):
+    """Load a Part object from a Pandas DataFrame
+    Parameters
+    ----------
+    df : DataFrame
+        DataFrame to load
+    label_detection : bool, optional
+        Automatically detect labels, by default False
+        If True, the DataFrame is scanned to detect labels (defined as non-numeric data)
+    name : str, optional
+        Name of the data variable to load, by default None.
+        This can be left empty if there's a single variable in the DataFrame.
+    Returns
+    -------
+    dict
+        Data required to create the Part object
+    """
+    part_data = dict()
+    if label_detection:
+        df = autodecode_labels(df)
+    part_data["data"] = df.to_numpy()
+    ndim = df.ndim
+    labels = []
+    if ndim == 1:
+        labels.append(convert_labels(df.index))
+    else:
+        labels.append(convert_labels(df.index))
+        labels.append(convert_labels(df.columns))
+    labels = disambiguate_labels(labels)
+    part_data["labels"] = labels
+    part_data["groupings"] = kwargs.pop("groupings",dict())
+    part_data["metadata"] = kwargs.pop("metadata",dict())
+    part_data["name"] = name
+    for key in kwargs:
+        part_data["metadata"][key] = kwargs[key]
+    return part_data
+def autodecode_labels(df):
+    """Automatically detect the labels from a DataFrame
+    This is done by indentifying the indices and columns
+    with non-numeric values.
+    """
+    def test_selection(df,row,col):
+        """Test if a selection is numeric"""
+        try:
+            for col in df.iloc[row:,col]:
+                pd.to_numeric(col)
+            return True
+        except ValueError:
+            return False
+    def try_reduce(df,row,col):
+        """Try reducing the rectangle to the right or down"""
+        if test_selection(df,row+1,col):
+            return row+1,col
+        elif test_selection(df,row,col+1):
+            return row,col+1
+        else:
+            return row+1,col+1
+    def try_expand(df,row,col):
+        """Try expanding the rectangle to the left or up"""
+        if not test_selection(df,row+1,col):
+            return row+1,col
+        elif not test_selection(df,row,col+1):
+            return row,col+1
+        else:
+            return row, col
+    def find_rectangle(df):
+        """Find the largest rectangle with only numeric data"""
+        row = 0
+        col = 0
+        while not test_selection(df,row,col):
+            row,col = try_reduce(df,row,col)
+        while not test_selection(df,row,col):
+            #After the first while loop, we found only numeric data
+            #We now expand to the top and the left
+            #To make sure we didn't crop numerical data
+            row,col = try_expand(df,row,col)
+        return row,col
+    #First, we find the largest rectangle with only numeric data
+    row,col = find_rectangle(df)
+    #And we remove potential nan axes and ensure types are ok
+    data = pd.DataFrame(
+        data=df.iloc[row:,col:],
+        dtype=np.float64)
+    #We count Nan axes as they offset label names
+    row_offset = data.map(
+        np.isnan
+    ).all(1).sum()
+    col_offset = data.map(
+        np.isnan
+    ).all(0).sum()
+    data = data.dropna(axis=0,how="all")
+    data = data.dropna(axis=1,how="all")
+    #Then, we build the labels
+    if col>0:
+        col_names = df.iloc[:row,col-1+col_offset].to_list()
+        if row > 1:
+            labels = []
+            sel = df.iloc[:row,col:].transpose()
+            for column in sel.columns:
+                labels.append(sel[column].dropna().unique())
+            columns = pd.MultiIndex.from_product(
+                labels,
+                names = col_names)
+        else:
+            columns = pd.Index(
+                df.iloc[
+                    :row,col:
+                    ].values.flatten(),
+                name = col_names[0]
+            )
+    else:
+        columns = None
+    if row > 0:
+        index_names = df.iloc[row-1+row_offset,:col].to_list()
+        if col > 1:
+            labels = []
+            sel = df.iloc[row+row_offset:,:col]
+            for column in sel.columns:
+                labels.append(
+                    list(sel[column].dropna().unique())
+                )
+            index = pd.MultiIndex.from_product(
+                labels,
+                names = index_names)
+        else:
+            index = pd.Index(
+                list(
+                    df.iloc[
+                row:,:col
+                ].values.flatten()
+                ),
+                name = index_names[0]
+            )
+    else:
+        index = None
+    #We build the formatted DataFrame
+    output = pd.DataFrame(
+        data = data.values,
+        columns=columns,
+        index = index
+          )
+    return output
+def convert_labels(index):
+    """Convert a Pandas Index to a dictionary of labels
+    Parameters
+    ----------
+    index : Index
+        Pandas Index to convert
+    """
+    output = []
+    if isinstance(index,pd.MultiIndex):
+        for i in range(index.nlevels):
+            name = index.names[i]
+            if name is None:
+                name = f"level_{i}"
+            output.append(
+                {name : list(index.levels[i].values)}
+            )
+        return output
+    if index.name is None:
+        return [{0:list(index.array)}]
+    return [{index.name:list(index.array)}]
+def disambiguate_labels(labels):
+    """Disambiguate the labels
+    This allow solving labels ambiguity if the name was incorrectly loaded.
+    Parameters
+    ----------
+    index : dict of str:list of str
+        New index to disambiguate
+    labels : list of str:list of str
+        List of labels to disambiguate
+    """
+    ordered = []
+    cleared = dict()
+    flat_labels = [label_dim for label in labels for label_dim in label]
+    values = []
+    for label in labels:
+        ordered.append([])
+        for level in range(len(label)):
+            name,value = list(
+                label[level].keys()
+            )[0],list(
+                label[level].values()
+            )[0]
+            if name not in cleared.keys():
+                if value in values:
+                    #We have a duplicate
+                    #We use the first occurrence as reference
+                    ref_name = cleared.keys()[list(cleared.values()).index(value)]
+                    ordered[-1].append(
+                        {ref_name:value}
+                    )
+                    cleared[name] = value
+            ordered[-1].append(label[level])
+            cleared[name] = value
+            values.append(value)
+    return ordered

mrio_toolbox/utils/converters/xarray.py ADDED Viewed

@@ -0,0 +1,141 @@
+"""
+Routines for converting between xarray DataArrays and Parts objects.
+"""
+import pandas as pd
+import xarray as xr
+import numpy as np
+def to_DataArray(part):
+    """
+    Convert a Part object to an xarray DataArray
+    Labels are directly passed to the DataArray as coords.
+    Returns
+    -------
+    xr.DataArray
+        Corresponding DataArray
+    """
+    developed = part.develop()
+    old_dims = part.get_dimensions()
+    new_dims = developed.get_dimensions()
+    if old_dims != new_dims:
+        #We code the original dimensions in the metadata
+        #Because netcdf files do not support multi-level attributes
+        original_dims = [
+            dim for axe in old_dims for dim in axe+["_sep_"]
+            ]
+        part.metadata["_original_dimensions"] = original_dims[:-1]
+        #The last bit removes the last separator
+    coords = list()
+    for axe in developed.axes:
+        coords.append(
+            axe.label(True)
+        )
+    return xr.DataArray(
+        data = developed.data,
+        name = part.name,
+        attrs = part.metadata,
+        coords = coords
+    )
+def to_DataSet(mrio):
+    ds = xr.Dataset(
+            attrs = mrio.metadata,
+            coords = mrio.labels
+        )
+    for part in mrio.parts:
+        ds[part] = mrio.parts[part].to_xarray()
+    return ds
+def make_part(data,**kwargs):
+    """
+    Load a Part object from an xarray DataArray
+    Parameters
+    ----------
+    data : DataArray
+        Part object to load
+    name : str, optional
+        Name of the data variable to load, by default None.
+        This can be left empty if there's a single variable in the DataArray.
+    Returns
+    -------
+    dict
+        Data required to create the Part object
+    """
+    part_data = dict()
+    if isinstance(data,xr.Dataset):
+        #Extract the data from the Dataset
+        list_vars = list(data.data_vars)
+        if len(list_vars) > 1:
+            #In ambiguous cases, the name must be provided
+            name = kwargs.get("name",None)
+        else:
+            name = list_vars[0]
+        data = data[name]
+    elif isinstance(data,xr.DataArray):
+        name = data.name
+    part_data["data"] = data.to_numpy()
+    #Format the labels
+    labels = []
+    for key in data.dims:
+        label = dict()
+        index = data.indexes[key]
+        if isinstance(index,pd.MultiIndex):
+            for i in index.nlevels:
+                name = index.names[i]
+                if name is None:
+                    name = i
+                label[str(name)] = index.get_level_values(i).tolist()
+        else:
+            label[index.name] = index.values.tolist()
+        labels.append(label)
+    part_data["name"] = name
+    part_data["labels"] = labels
+    part_data["metadata"] = kwargs.get("metadata",dict())
+    for attr in data.attrs:
+        #Add metadata
+        part_data["metadata"][attr] = data.attrs[attr]
+    part_data["groupings"] = kwargs.get("groupings",dict())
+    return part_data
+def make_mrio(data,**kwargs):
+    """
+    Load an MRIO object from an xarray DataSet
+    Parameters
+    ----------
+    data : DataArray
+        Part object to load
+    Returns
+    -------
+    dict
+        Data required to create the Part object
+    """
+    #Extract the data from the xarray
+    list_vars = list(data.data_vars)
+    to_load = kwargs.get("parts",list_vars)
+    mrio_data = dict()
+    labels = dict()
+    for coord in data.coords:
+        #Uncompress MultiIndex data if needed
+        if "compress" in data[coord].attrs:
+            import cf_xarray as cfxr
+            data = cfxr.decode_compress_to_multi_index(data,coord)
+        labels[coord] = data[coord].values.tolist()
+    mrio_data["labels"] = labels
+    mrio_data["groupings"] = kwargs.get("groupings",dict())
+    mrio_data["groupings"].update(data.attrs.get("groupings",dict()))
+    mrio_data["metadata"] = data.attrs
+    mrio_data["metadata"].update(kwargs.get("metadata",dict()))
+    mrio_data["parts"] = dict()
+    return {"data":mrio_data},to_load

mrio_toolbox/utils/loaders/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from mrio_toolbox.utils.loaders._loader_factory import make_loader
+__all__ = ["make_loader"]

mrio_toolbox/utils/loaders/_loader.py ADDED Viewed

@@ -0,0 +1,256 @@
+"""
+Central loading module for the mrio_toolbox package.
+This module contains the central loading function for the mrio_toolbox package.
+Depending on the loading mode, the function will call the appropriate loader.
+"""
+import os
+import logging
+import yaml
+log = logging.Logger(__name__)
+class Loader:
+    """
+    Parent class for the loaders
+    """
+    def __init__(
+            self
+            ):
+        """
+        Loaders are created with format-specific parameters.
+        They hold metadata and methods to load MRIO data.
+        A loader is created using the base class if no specific loader is required,
+        i.e., if the data is directly loaded from dict, pandas or xarray.
+        In that case, the loader will fail when used,
+        triggering the creation of a specific loader.
+        """
+        self.load_mrio()
+    def extract_basic_info(self,**kwargs):
+        """
+        Extract basic information from the loader.
+        The function will extract the path, labels and groupings from the loader.
+        """
+        self.loader_kwargs = kwargs.pop("loader_kwargs",dict())
+        self.file = kwargs.get("file",None)
+        self.groupings = kwargs.get("groupings",dict())
+        self.labels = kwargs.get("labels",dict())
+        #Remaining kwargs are metadata
+        self.metadata = kwargs
+        if isinstance(self.groupings,str):
+            self.groupings = self.load_groupings(self.groupings)
+    def update_settings(self,**settings):
+        """
+        Update the loader settings with new parameters
+        """
+        self.loader_kwargs.update(
+            settings.pop("loader_kwargs",dict())
+        )
+        self.groupings.update(
+            settings.pop("groupings",dict())
+        )
+        self.labels.update(
+            settings.pop("labels",dict())
+        )
+        self.metadata.update(
+            settings.pop("metadata",dict())
+        )
+        self.metadata.update(settings)
+    def load_mrio(
+            self
+    ):
+        """
+        Create an MRIO container based on the new parameters
+        Returns
+        -------
+        dict
+            Dictionary of MRIO metadata
+        """
+        self.metadata = dict()
+        self.labels = dict()
+        self.groupings = dict()
+        pass
+    def load_part(
+            self,
+            **kwargs
+    ):
+        """
+        Load an MRIO Part based on new or existing parameters
+        Returns
+        -------
+        dict
+            Dictionary containing the Part data
+        """
+        raise FileNotFoundError("No proper loader was initialised.\n"+\
+        "The loader needs to be reloaded with new instructions.")
+    def set_groupings(self,groupings):
+        """
+        Update the groupings attribute of the loader
+        Parameters
+        ----------
+        groupings : dict of dict of str
+            Aggregation on labels
+        """
+        self.groupings = groupings
+    def update_attributes(self,**kwargs):
+        """
+        Update the current attributes of the loader.
+        The function will update the groupings, paths, labels and metadata attributes.
+        """
+        if "groupings" in kwargs:
+            log.debug("Update groupings")
+            self.groupings = kwargs.pop("groupings",self.groupings)
+        self.extract_path(update=True,**kwargs)
+        if "labels" in kwargs:
+            log.debug("Update labels")
+            self.format_labels(kwargs.pop("labels"))
+        for kwarg in kwargs:
+            log.debug(f"Override parameter {kwarg} with explicit parameter {kwargs[kwarg]}")
+            self.metadata[kwarg] = kwargs[kwarg]
+    def load_groupings(self,
+                       file,
+                       dimension=None,
+                       path=None):
+        """Load groupings from a file
+        Parameters
+        ----------
+        file : str
+            Name of the file to load
+        dimension : str, optional
+            Name of the dimension to load groupings for.
+            By default (None), the file is interpreted as a preset
+            of groupings on different dimension.
+        path : path-like, optional
+            Path where the file is stored.
+            By default, the groupings are from the settings dir
+            in the working dir.
+        """
+        def _check_groupings(groupings,dimension):
+            """Check whether the groupings are consistent with the labels"""
+            for key in groupings.keys():
+                for item in groupings[key]:
+                    if item not in self.labels[dimension]:
+                        log.warning(
+                            f"Item {item} not found in {dimension} labels"
+                            )
+                        groupings[key].remove(item)
+                if len(groupings[key])==0:
+                    log.warning(f"Group {key} is empty")
+                    groupings.pop(key)
+            return groupings
+        def load_grouping(file,level,path):
+            """Load a single grouping file"""
+            path = os.path.join(path,level)
+            with open(os.path.join(path,file+'.txt')) as f:
+                group = f.read().splitlines()
+            return {file:group}
+        if path is None:
+            path = os.path.join("parameters","groupings")
+        #If no dimension is specified, interpret as a preset
+        output = dict()
+        if isinstance(file,str):
+            log.info("Load groupings set from "+path+file)
+            with open(os.path.join(path,file)) as f:
+                groupings = yaml.safe_load(f)
+        elif isinstance(file,dict):
+            groupings = file
+        output = self.groupings
+        if dimension is None:
+            dimensions = list(groupings.keys())
+            output = dict()
+        for level in dimensions:
+            if isinstance(groupings[level],dict):
+                #Case the preset explicitly defines a grouping
+                groupings[level] = _check_groupings(
+                    groupings[level],level
+                    )
+                output[level] = groupings[level]
+                continue
+            if isinstance(groupings[level],str):
+                groupings[level] = [groupings[level]]
+            if isinstance(groupings[level],list):
+                #Otherwise, interpret as a list of groupings
+                output[level] = dict()
+                covered = []
+                for item in groupings[level]:
+                    #Load all groupings
+                    groups= load_grouping(
+                        item,level,path
+                    )
+                    if any([group in covered for group in groups]):
+                        duplicate = [
+                            group for group in groups if group in covered
+                            ]
+                        log.warning("The following items are covered in "+\
+                                    "multiple groupings: "+duplicate)
+                    covered += groups
+                    output[level][item] = groups
+        return output
+    def set_labels(self,labels):
+        """
+        Update the labels attribute of the loader
+        Parameters
+        ----------
+        labels : dict of str:list of str
+            Labels of the axes
+        """
+        self.labels = labels
+    def check_instructions(self,**kwargs):
+        """
+        Interpret the file argument for loading a part.
+        This method solves the ambiguity between data files and optional
+        .yaml instructions.
+        If the file argument refers to an instruction file, it is compared
+        to the current instructions.
+        If the data file or instruction file differ from the ones currently loaded,
+        an exception is raised to force a reload.
+        Parameters
+        ----------
+        file : path-like
+            User-provided file path
+        kwargs : additional arguments
+        Raises
+        ------
+        FileNotFoundError
+            If the loader needs to be reloaded with new instructions.
+        """
+        #The 'instructions' attribute is used to check if the loader needs to be reloaded
+        #It contains the reference to the potential yaml file used to load the data
+        new_instructions = kwargs.get("instructions",None)
+        ref_instructions = self.metadata.get("instructions",None)
+        if new_instructions is not None and ref_instructions != new_instructions:
+            #If the instructions differ from the current ones,
+            #trigger a reload of the loader
+            log.error("The loader needs to be reloaded with new instructions.")
+            raise FileNotFoundError("The loader needs to be reloaded with new instructions.")