PyPI - SankeyExcelParser - Versions diffs - 1.0.0b0__py3-none-any.whl - Mend

SankeyExcelParser 1.0.0b0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

SankeyExcelParser/__init__.py +0 -0
SankeyExcelParser/io_excel.py +1867 -0
SankeyExcelParser/io_excel_constants.py +811 -0
SankeyExcelParser/sankey.py +3138 -0
SankeyExcelParser/sankey_utils/__init__.py +0 -0
SankeyExcelParser/sankey_utils/data.py +1118 -0
SankeyExcelParser/sankey_utils/excel_source.py +31 -0
SankeyExcelParser/sankey_utils/flux.py +344 -0
SankeyExcelParser/sankey_utils/functions.py +278 -0
SankeyExcelParser/sankey_utils/node.py +340 -0
SankeyExcelParser/sankey_utils/protos/__init__.py +0 -0
SankeyExcelParser/sankey_utils/protos/flux.py +84 -0
SankeyExcelParser/sankey_utils/protos/node.py +386 -0
SankeyExcelParser/sankey_utils/protos/sankey_object.py +135 -0
SankeyExcelParser/sankey_utils/protos/tag_group.py +95 -0
SankeyExcelParser/sankey_utils/sankey_object.py +165 -0
SankeyExcelParser/sankey_utils/table_object.py +37 -0
SankeyExcelParser/sankey_utils/tag.py +95 -0
SankeyExcelParser/sankey_utils/tag_group.py +206 -0
SankeyExcelParser/su_trace.py +239 -0
SankeyExcelParser/tests/integration/__init__.py +0 -0
SankeyExcelParser/tests/integration/test_base.py +356 -0
SankeyExcelParser/tests/integration/test_run_check_input.py +100 -0
SankeyExcelParser/tests/integration/test_run_conversions.py +96 -0
SankeyExcelParser/tests/integration/test_run_load_input.py +94 -0
SankeyExcelParser/tests/unit/__init__.py +0 -0
SankeyExcelParser-1.0.0b0.data/scripts/run_parse_and_write_excel.py +155 -0
SankeyExcelParser-1.0.0b0.data/scripts/run_parse_excel.py +115 -0
SankeyExcelParser-1.0.0b0.dist-info/METADATA +113 -0
SankeyExcelParser-1.0.0b0.dist-info/RECORD +32 -0
SankeyExcelParser-1.0.0b0.dist-info/WHEEL +5 -0
SankeyExcelParser-1.0.0b0.dist-info/top_level.txt +1 -0

SankeyExcelParser/sankey_utils/excel_source.py ADDED Viewed

@@ -0,0 +1,31 @@
+"""
+Author : Vincent LE DOZE
+Date : 31/05/23
+This file contains description for ExcelSource class
+"""
+# External libs ----------------------------------------------------------------
+import pandas as pd
+# CLASS ----------------------------------------------------------------------------
+class ExcelSource(object):
+    user_sheet_name: str
+    sheet_type: str
+    _table: pd.DataFrame
+    def __init__(
+        self,
+        user_sheet_name: str,
+        sheet_type: str,
+    ):
+        self.user_sheet_name = user_sheet_name
+        self.sheet_type = sheet_type
+    def save_origin_table(
+        self,
+        table: pd.DataFrame
+    ):
+        self.table = table

SankeyExcelParser/sankey_utils/flux.py ADDED Viewed

@@ -0,0 +1,344 @@
+"""
+Author : Vincent LE DOZE
+Date : 31/05/23
+This file contains descriptions for Flux class
+"""
+# Local modules -----------------------------------------------------
+from SankeyExcelParser.sankey_utils.protos.flux import _ProtoFlux
+from SankeyExcelParser.sankey_utils.data import Data
+from SankeyExcelParser.sankey_utils.data import DataConstraint
+from SankeyExcelParser.sankey_utils.data import DataMinMax
+from SankeyExcelParser.sankey_utils.data import MCData
+from SankeyExcelParser.sankey_utils.node import Node
+# CLASS ----------------------------------------------------------------------------
+class Flux(_ProtoFlux):
+    """
+    Define a flux.
+    Inherits from `_ProtoFlux`
+    Parameters
+    ----------
+    :param orig: Flux starting node
+    :type orig: Node
+    :param dest: Flux ending node
+    :type dest: Node
+    :param datas: All datas for the flux
+    :type datas: list [Data, ...]
+    :param results: All results for the flux
+    :type results: list [Data, ...]
+    """
+    def __init__(
+        self,
+        orig: Node,
+        dest: Node,
+        **kwargs
+    ):
+        # Init super constructor
+        _ProtoFlux.__init__(self, orig, dest)
+        self._orig.add_output_flux(self)
+        self._dest.add_input_flux(self)
+        # Datas
+        self._datatags_combinations = []
+        self._datas = []
+        self._results = []
+        self._monte_carlo = None
+        # Init contraints values for all datas
+        self._min_max = DataMinMax(self)
+        self._max = None
+        self._constraints = {}
+        # Update values
+        self.update(**kwargs)
+    def update(
+        self,
+        **kwargs
+    ):
+        for key, value in kwargs.items():
+            setattr(self, key, value)
+    @property
+    def datas(self):
+        return self._datas
+    def add_data(self, _):
+        if type(_) is float:
+            data = Data(value=_)
+            self._datas.append(data)
+            data.flux = self
+            return
+        if type(_) is Data:
+            self._datas.append(_)
+            _.flux = self
+            return
+    def has_data(self):
+        return len(self._datas) > 0
+    def instanciate_all_datas(
+        self,
+        data_taggs={}
+    ):
+        """
+        Create data according to data tags presents in a table line.
+        Data can only have one tag for every given data tag group.
+        If multiple tag are given, then we must create one data for each one and link them
+        to the reference flux.
+        Parameters
+        ----------
+        :param data_taggs: List of data tags
+        :type data_taggs: dict as {key=TagGroup.name: value=TagGroup, ...} . (default={})
+        Returns
+        -------
+        :return: List of created datas
+        :rtype: list as [Data, ...]
+        """
+        # If no data tag -> Create only one data
+        if len(data_taggs) == 0:
+            # Add only one data
+            self.add_data(Data())
+            # Add empty datatags combinations
+            self._datatags_combinations.append([])
+            return
+        # Otherwise create data recursivly
+        self._recursive_instanciate_all_datas(
+            [list(_.tags.values()) for _ in data_taggs.values()])
+    def _recursive_instanciate_all_datas(
+        self,
+        data_tags_per_tagg,
+        data_tags_per_datas=[[]]
+    ):
+        """
+        Create data recursivly according to data tags presents in a table line.
+        Data can only have one tag for every given data tag group.
+        If multiple tag are given, then we must create one data for each one and link them
+        to the reference flux.
+        Parameters
+        ----------
+        :param data_tags_per_tagg: List of data tags regrouped per data tag groups.
+        :type data_tags_per_tagg: list as [list as [Tag, ...], ...]
+        :param data_tags_per_datas: Must not be touch. Used for recursivity
+        :type data_tags_per_datas: list (default=[[]])
+        Returns
+        -------
+        :return: List of created datas
+        :rtype: list as [Data, ...]
+        """
+        # Check if we arrived at the end of recursivity on data tags
+        if len(data_tags_per_tagg) == 0:
+            # Create unique data for each data tag per data tag groups
+            for data_tags in data_tags_per_datas:
+                # Create Data
+                data = Data()
+                # Add tags
+                for data_tag in data_tags:
+                    data.add_tag(data_tag)
+                # Add data to flux
+                self.add_data(data)
+                # Save data_tags_per_datas as keys
+                self._datatags_combinations.append(data_tags.copy())
+        # Otherwise we continue to recurse
+        else:
+            # Get data tags related to datas to create and to a unique data tag group
+            data_tags = data_tags_per_tagg.pop()
+            # Create the list of unique data tags for each data
+            all_data_tags_per_datas = []
+            for data_tag in data_tags:
+                # data_tags_per_datas = [[tag1_1, tag2_1], [tag1_1, tag2_2], [tag1_2, tag2_1], ...]
+                # with data tag group 1 = (tag1_1, tag1_2)
+                #      data tag group 2 = (tag2_1, tag2_2)
+                # So, if we have data tag group 3 = (tag3_1, tag3_2, ...)
+                # we must copy each list from data_tags_per_datas and append tag3_1
+                # then make anothers copies to append tag3_2, etc.
+                new_data_tags_per_datas = []
+                for data_tags_per_data in data_tags_per_datas:
+                    new_data_tags_per_data = data_tags_per_data.copy()
+                    new_data_tags_per_data.append(data_tag)
+                    new_data_tags_per_datas.append(new_data_tags_per_data)
+                all_data_tags_per_datas += new_data_tags_per_datas
+            # We recurse to deal with data tag groups
+            self._recursive_instanciate_all_datas(
+                data_tags_per_tagg,
+                data_tags_per_datas=all_data_tags_per_datas)
+    def get_corresponding_datas_from_tags(
+        self,
+        datatags_to_match,
+        fluxtags_to_match=[]
+    ):
+        """
+        Get a list of data that correspond to the input list of tags
+        Parameters
+        ----------
+        :param tags: list of tags to check
+        :type tags: list[Tag, ...]
+        Returns
+        -------
+        :return: List of corresponding datas
+        :rtype: list[Data, ...]
+        """
+        # Init list of matched datas
+        matched_datas = []
+        # Match // datatags
+        for datatags, data in zip(self._datatags_combinations, self._datas):
+            ok_match_datatags = False
+            # If all current datatags are contained in datatags to match list
+            if set(datatags_to_match).issuperset(set(datatags)):
+                ok_match_datatags = True
+            # If all datatags to match are contained in current datatags list
+            if set(datatags).issuperset(set(datatags_to_match)):
+                ok_match_datatags = True
+            # Check flux tags also
+            if ok_match_datatags:
+                # If flux tags are related to curren data
+                if set(data.tags).issuperset(set(fluxtags_to_match)):
+                    matched_datas.append(data)
+        # Output
+        return matched_datas
+    @property
+    def results(self):
+        return self._results
+    def add_result(self, _):
+        if type(_) is float:
+            result = Data(value=_)
+            self._results.append(result)
+            result.flux = self
+            return
+        if type(_) is Data:
+            self._results.append(_)
+            _.flux = self
+            return
+    def has_result(self):
+        return len(self._results) > 0
+    def reset_results(self):
+        # remove altergo links with datas
+        for result in self._results:
+            result.alterego = None
+        # Empty list
+        self._results = []
+    def get_corresponding_results_from_tags(self, tags):
+        """
+        Get a list of data that correspond to the input list of tags
+        Parameters
+        ----------
+        :param tag: tag
+        :type tag: str | Tag
+        Returns
+        -------
+        :return: Corresponding data if it exist, else None
+        :rtype: Data | None
+        """
+        results = set(self._results)
+        for tag in tags:
+            results &= set(tag.references)
+        return list(results)
+    @property
+    def monte_carlo(self):
+        return self._monte_carlo
+    def add_monte_carlo(
+        self,
+        starting_mean_value,
+        starting_sigma,
+        result_mean_value,
+        result_sigma,
+        result_min,
+        result_max
+    ):
+        self._monte_carlo = MCData(flux=self)
+        self._monte_carlo.starting_data = Data(
+            value=starting_mean_value,
+            sigma=starting_sigma)
+        self._monte_carlo.result_data = Data(
+            value=result_mean_value,
+            sigma=result_sigma)
+        self._monte_carlo.min = result_min
+        self._monte_carlo.max = result_max
+    @property
+    def min_max(self):
+        return self._min_max
+    @property
+    def min(self):
+        return self._min_max.min_val
+    @min.setter
+    def min(self, _):
+        self._min_max.min = _
+    @property
+    def max(self):
+        return self._min_max.max_val
+    @max.setter
+    def max(self, _):
+        self._min_max.max = _
+    @property
+    def constraints(self):
+        return self._constraints
+    def add_constraint(self, id_constraint, **kwargs):
+        # Create a new piece of constraint
+        constraint = DataConstraint(id_constraint, self, **kwargs)
+        # Update constraint for given id
+        if id_constraint in self._constraints.keys():
+            self._constraints[id_constraint].append(constraint)
+        else:
+            self._constraints[id_constraint] = [constraint]
+        # Return constraint
+        return constraint
+    def get_as_dict(self):
+        # Init output
+        output = {}
+        # Get values
+        output['orig'] = self.orig.name
+        output['dest'] = self.dest.name
+        output['datas'] = []
+        for data in self.datas:
+            output['datas'].append(data.get_as_dict())
+        return output
+    def __repr__(self):
+        if self.has_data():
+            if len(self.datas) > 1:
+                return '{0} --- [{2}, ...] ---> {1}'.format(
+                    self._orig.name,
+                    self._dest.name,
+                    self.datas[0])
+            else:
+                return '{0} --- {2} ---> {1}'.format(
+                    self._orig.name,
+                    self._dest.name,
+                    self.datas[0])
+        else:
+            return '{0} --- {2} ---> {1}'.format(
+                self._orig.name,
+                self._dest.name,
+                'No data')

SankeyExcelParser/sankey_utils/functions.py ADDED Viewed

@@ -0,0 +1,278 @@
+"""
+Author : Vincent LE DOZE
+Date : 31/05/23
+This file contains functions used in sankey_utils modules
+"""
+# External libs ----------------------------------------------------------------
+import pandas as pd
+import re
+import webcolors
+# External modules ------------------------------------------------------------
+from unidecode import unidecode
+# FUNCTIONS -------------------------------------------------------------------
+def _stdStr(s):
+    """
+    Returns standardize format for input string.
+    Parameters
+    ----------
+    :param s: input.
+    :type s: str | set | list
+    Returns
+    -------
+    :return: formatted string(s).
+    :rtype: same type as input
+    """
+    if type(s) is str:
+        return unidecode(s).strip().lower().replace('.0', '')
+    if type(s) is set:
+        new_s = set()
+        for _ in s:
+            new_s.add(_stdStr(_))
+        return s
+    if type(s) is list:
+        return [_stdStr(_) for _ in s]
+def _getValueIfPresent(
+    line: pd.Series,
+    index: int,
+    default_value
+):
+    """
+    Extract the value from a table line for given column name.
+    If nothing is found, returns a default value.
+    Parameters
+    ----------
+    :param line: Line where the info should be find.
+    :type line: pd.Series
+    :param index: Column name under which we should find a value
+    :type index: int
+    :param default_value: If no value is found under the given column, this is the default output.
+    Returns
+    -------
+    :return: Found value or default value if nothing is found.
+    """
+    value = line[index] if index in line.index else default_value
+    return value
+def _extractFluxFromMatrix(
+    table: pd.DataFrame,
+    origins: list,
+    destinations: list,
+    values: list
+):
+    """
+    Extract flux from a matrix as following:
+    +------+------+------+------+
+    | -    | C4   | C5   | C6   |
+    +======+======+======+======+
+    | R3   | None | x    | x    |
+    +------+------+------+------+
+    | R4   | x    | None | None |
+    +------+------+------+------+
+    Parameters
+    ----------
+    :param table: Matrix table (rows = Origins, cols = Destinations)
+    :type table: pd.DataFrame
+    :param origins: List of all origins
+    :type origins: list, modified
+    :param destinations: List of all destinations
+    :type destinations: list, modified
+    :param values: List of all values for flux
+    :type values: list, modified
+    """
+    for orig in table.index:
+        for dest in table.columns:
+            if orig == dest:
+                continue
+            v = table.loc[orig][dest]
+            ok = (v is not None) and (v != 0) and (v != "0")
+            if ok:
+                origins.append(orig)
+                destinations.append(dest)
+                if re.fullmatch('[xX]', str(v)) is not None:
+                    values.append(None)
+                else:
+                    values.append(float(v))
+def _createMatrixFromFlux(
+    origins: list,
+    destinations: list,
+    transpose=False
+):
+    """
+    Create a matrix table (rows = Origins, cols = Destinations)
+    Parameters
+    ----------
+    :param origins: List of all origin nodes
+    :type origins: list[Node]
+    :param destinations: List of all destination nodes
+    :type destinations: list[Node]
+    :param transpose: Return matrix as transpose version
+    :type transpose: bool
+    Returns
+    -------
+    :return: matrix table (rows = Origins, cols = Destinations).
+        If "x" we have a flux from origin to destination
+        If None we dont have a flux
+    :rtype: list[list]
+    """
+    table = []
+    if transpose:
+        for destination in destinations:
+            # Init new row
+            row = []
+            # Get all nodes that are registred as a flux destination from origin
+            registered_origins = \
+                [flux.orig for flux in destination.input_flux]
+            # If we have such nodes
+            if len(registered_origins) > 0:
+                for origin in origins:
+                    # 'x' if we have a flux origin -> destination
+                    # None otherwise
+                    if origin in registered_origins:
+                        row.append('x')
+                    else:
+                        row.append(None)
+            else:
+                row += [None]*len(origins)
+            # Add row to table
+            table.append(row)
+    else:
+        for origin in origins:
+            # Init new row
+            row = []
+            # Get all nodes that are registred as a flux destination from origin
+            registered_destinations = \
+                [flux.dest for flux in origin.output_flux]
+            # If we have such nodes
+            if len(registered_destinations) > 0:
+                for destination in destinations:
+                    # 'x' if we have a flux origin -> destination
+                    # None otherwise
+                    if destination in registered_destinations:
+                        row.append('x')
+                    else:
+                        row.append(None)
+            else:
+                row += [None]*len(destinations)
+            # Add row to table
+            table.append(row)
+    # Create and return panda table
+    return table
+def is_hex(s):
+    return re.fullmatch(r"^\# ?[0-9a-fA-F]+$", s or "") is not None
+def _convertColorToHex(color, default_color=''):
+    """
+    Convert a color str to hex value.
+    Parameters
+    ----------
+    :param color: color to convert
+    :type color: str
+    """
+    if type(color) is str:
+        # is the color as hexa ?
+        if (re.fullmatch(r"^\# ?[0-9a-fA-F]+$", color) is not None) or (color == ""):
+            return color
+        else:
+            return webcolors.name_to_hex(color)
+    return default_color
+def _reorderTable(
+    table: pd.DataFrame,
+    cols: list,
+    contents: list
+):
+    """
+    Reorder table lines accordlying to values presents in list of cols
+    Example :
+    -------
+    Col 1 | Col 2 | Col 3
+    ---------------------
+    a     | 1     | 2
+    b     | 2     | 2
+    c     | 2     | 1
+    d     | 1     | 2
+    Reordering as [Col2, Col3] gives
+    Col 1 | Col 2 | Col 3
+    ---------------------
+    a     | 1     | 2
+    d     | 1     | 2
+    c     | 2     | 1
+    b     | 2     | 2
+    Parameters
+    ----------
+    :param table: Table to reorder
+    :type table: panda.DataFrame
+    :param cols: List of ref columns for reordering
+    :type cols: list as [str, ...]
+    :param contents: List possible content per col. Must be ordered following this.
+    If content not in this list, then will be processed at the end.
+    :type contents: list as [list as [str, ...], ...]
+    Returns
+    -------
+    :return: Reordered table
+    :rtype: panda.DataFrame
+    """
+    # No more ordering cols -> return table
+    if len(cols) == 0:
+        return table
+    # Create new table to get block of sorted subtables
+    new_table = pd.DataFrame(columns=table.columns)
+    sorting_col = cols.pop()
+    sorting_contents = contents.pop()
+    present_contents = table[sorting_col].unique()
+    # Update sorting content with unexpected content
+    prio_sorting_contents = [_ for _ in sorting_contents if _ in present_contents]
+    other_sorting_contents = [_ for _ in present_contents if _ not in sorting_contents]
+    sorting_contents = prio_sorting_contents + other_sorting_contents
+    # Sort
+    for value in sorting_contents:
+        # Get subtable
+        if value is None:
+            sub_table = table.loc[table[sorting_col].isnull()]
+        else:
+            sub_table = table.loc[table[sorting_col] == value]
+        # Recursive filtering of sub_table
+        if len(cols) > 0:
+            sub_table = _reorderTable(sub_table, cols.copy(), contents.copy())
+        # Appending results
+        new_table = new_table._append(sub_table, ignore_index=True)
+    return new_table