PyPI - hydamo-validation - Versions diffs - 1.3.0b1__tar.gz → 1.3.0b3__tar.gz - Mend

hydamo-validation 1.3.0b1tar.gz → 1.3.0b3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of hydamo-validation might be problematic. Click here for more details.

Files changed (49) hide show

{hydamo_validation-1.3.0b1 → hydamo_validation-1.3.0b3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hydamo_validation
-Version: 1.3.0b1
+Version: 1.3.0b3
 Summary: Validation module for HyDAMO data
 Author-email: Daniel Tollenaar <daniel@d2hydro.nl>
 License: MIT

hydamo_validation-1.3.0b3/hydamo_validation/__init__.py ADDED Viewed

@@ -0,0 +1,22 @@
+__author__ = ["Het Waterschapshuis", "D2HYDRO", "HKV", "HydroConsult"]
+__copyright__ = "Copyright 2021, HyDAMO ValidatieTool"
+__credits__ = ["D2HYDRO", "HKV", "HydroConsult"]
+__version__ = "1.3.0b3"
+__license__ = "MIT"
+__maintainer__ = "Daniel Tollenaar"
+__email__ = "daniel@d2hydro.nl"
+import fiona
+from hydamo_validation.functions import topologic as topologic_functions
+from hydamo_validation.functions import logic as logic_functions
+from hydamo_validation.functions import general as general_functions
+from hydamo_validation.validator import validator
+__all__ = [
+    "fiona",
+    "topologic_functions",
+    "logic_functions",
+    "general_functions",
+    "validator",
+]

hydamo_validation-1.3.0b3/hydamo_validation/functions/__init__.py ADDED Viewed

File without changes

hydamo_validation-1.3.0b3/hydamo_validation/functions/general.py ADDED Viewed

@@ -0,0 +1,319 @@
+"""functions to be executed on gdf."""
+import geopandas as gpd
+from typing import Literal
+import numpy as np
+from pathlib import Path
+from rasterstats import zonal_stats
+import logging
+import pandas as pd
+try:
+    import rasterio
+except ImportError:
+    import gdal  # noqa to avoid rasterio.version error: https://github.com/conda-forge/rasterio-feedstock/issues/240
+    import rasterio
+COVERAGES = {}
+# DATA_MODEL = None
+# OBJECT_LAYER = None
+# We get a false-positive settingwithcopywarning in buffer-function that we supress
+pd.options.mode.chained_assignment = None
+def _set_coverage(coverage: str, directory: str):
+    """Add a coverage for functions."""
+    global COVERAGES
+    coverage_path = Path(directory)
+    if not coverage_path.exists():
+        logging.error(
+            (
+                f"Path to coverage {coverage} does not exist: ",
+                f"{coverage_path.absolute().resolve()}",
+                ". Functions using this coverage fail without data.",
+            )
+        )
+        raise FileNotFoundError(f"{coverage_path.absolute().resolve()}")
+    COVERAGES[coverage] = coverage_path
+def _buffer_row(row, column):
+    radius = max(row[column], 0.5)
+    return row.geometry.buffer(radius)
+def _get_geometric_attribute(gdf, geom_parameter):
+    geometry, method = geom_parameter.split(".")
+    return getattr(gdf[geometry], method)
+def sum(gdf, array: list):
+    """Return a sum expression."""
+    expression = " + ".join(map(str, array))
+    return gdf.eval(expression)
+def difference(gdf, left, right, absolute=False):
+    """
+    Difference between 'left' and 'right'
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    absolute : bool, optional
+        Absolute (True) or relative difference (False) to left.
+        The default is False.
+    Returns
+    -------
+    result : Series
+        Float series
+    """
+    if left in gdf.columns:
+        left = gdf[left]
+    if right in gdf.columns:
+        right = gdf[right]
+    if absolute:
+        result = (left - right).abs()
+    else:
+        result = left - right
+    return result
+def divide(gdf, left, right):
+    """
+    Division of 'left' by 'right'
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    Returns
+    -------
+    result : Series
+        Float series
+    """
+    expression = " / ".join(map(str, [left, right]))
+    return gdf.eval(expression)
+def multiply(gdf, left, right):
+    """
+    Multiply 'left' with 'right'
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : str, numeric
+        Right column or value in expression
+    Returns
+    -------
+    result : Series
+        Float series
+    """
+    expression = " * ".join(map(str, [left, right]))
+    return gdf.eval(expression)
+def buffer(gdf, radius, percentile, coverage="ahn", fill_value: float = None):
+    """
+    Percentile of coverage-value of an area defined by a radius around the
+    object
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    radius: str, numeric
+        Radius around object used to define a cirular area
+    percentile : int
+        The percentile of the coverage within area around object
+    coverage : str, optional
+        The coverage to use. The default value is 'ahn'
+    fill_value : float, optional
+        The fill_value to use when the area is not intersecting the coverage.
+        The default is None
+    Returns
+    -------
+    result : Series
+        Float series
+    """
+    gdf_out = gdf.copy()
+    gdf_out["result"] = np.nan
+    xmin, ymin, xmax, ymax = gdf_out.total_bounds
+    coverage_path = COVERAGES[coverage]
+    index_gdf = gpd.read_file(coverage_path.joinpath("index.shp"))
+    for idx, row in index_gdf.cx[xmin:xmax, ymin:ymax].iterrows():
+        try:
+            bathymetrie_raster = coverage_path.joinpath(
+                f'{row["bladnr"].upper()}_CM.tif'
+            )
+            gdf_select = gdf_out.loc[
+                gdf_out["geometry"].centroid.within(row["geometry"])
+            ]
+            if not gdf_select.empty:
+                if isinstance(radius, str):
+                    gdf_select.loc[:, ("geometry")] = gdf_select.apply(
+                        _buffer_row, args=(radius,), axis=1
+                    )
+                else:
+                    radius = max(radius, 0.5)
+                    gdf_select.loc[:, ("geometry")] = gdf_select["geometry"].buffer(
+                        radius
+                    )
+                with rasterio.open(bathymetrie_raster, "r") as src:
+                    profile = src.profile
+                    raster_data = src.read(1)
+                    affine = src.transform
+                    scale = src.scales[0]
+                raster_stats = zonal_stats(
+                    gdf_select,
+                    raster_data,
+                    affine=affine,
+                    stats=f"percentile_{percentile}",
+                    nodata=profile["nodata"],
+                    raster_out=True,
+                )
+                gdf_out.loc[gdf_select.index.to_list(), "result"] = [
+                    np.nan if item is None else round(item * scale, 2)
+                    for item in [
+                        item[f"percentile_{percentile}"] for item in raster_stats
+                    ]
+                ]
+        except Exception as e:
+            print(
+                (
+                    f"bathymetrie: {bathymetrie_raster}\n"
+                    f"indices: {gdf_select.index}\n"
+                    f"geometrien: {gdf_select['geometry']}"
+                )
+            )
+            raise e
+    # fill series if if provided
+    if fill_value is not None:
+        gdf_out.loc[gdf_out["result"].isna(), "result"] = fill_value
+    return gdf_out["result"]
+def join_parameter(
+    gdf,
+    join_object: str,
+    join_gdf: gpd.GeoDataFrame,
+    join_parameter: str,
+    fill_value=None,
+):
+    """Joins a parameteer of other object to geodataframe."""
+    _gdf = gdf.copy()
+    _join_gdf = join_gdf.copy()
+    _join_gdf.set_index("globalid", inplace=True)
+    series = _join_gdf[join_parameter]
+    series.name = "result"
+    _gdf = _gdf.merge(series, how="left", left_on=f"{join_object}id", right_index=True)
+    # fill series if if provided
+    if fill_value is not None:
+        _gdf.loc[_gdf["result"].isna(), "result"] = fill_value
+    return _gdf["result"]
+def object_relation(
+    gdf,
+    related_gdf: gpd.GeoDataFrame,
+    code_relation: str,
+    statistic: Literal["min", "max", "sum", "count"],
+    related_parameter: str = None,
+    fill_value=None,
+):
+    """
+    Statistic of related object to geodataframe
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    related_gdf : GeoDataFrame
+        GeoDataFrame with related attributes
+    code_relation : str
+        Column in related_gdf used to relate to gdf. Example 'stuwid'
+    statistic : str, options: 'min', 'max', 'sum', 'count'
+        Statistic to compute over related values
+    related_parameter: str
+        Column in related_gdf over which the statistic is to be computed
+    fill_value : float, optional
+        The fill_value to use when the area is not intersecting the coverage.
+        The default is None
+    Returns
+    -------
+    result : Series
+        Float series
+    """
+    gdf_out = gdf.copy()
+    # remove NaN values in from related_gdf[related_parameter]
+    if related_parameter:
+        if "geometry" in related_parameter:
+            related_gdf[related_parameter] = _get_geometric_attribute(
+                related_gdf, related_parameter
+            )
+        related_gdf = related_gdf.loc[related_gdf[related_parameter].notna()]
+    # compute statistic
+    if statistic == "count":
+        series = related_gdf.groupby(by=[code_relation])[code_relation].count()
+    elif statistic == "sum":
+        series = related_gdf.groupby(by=[code_relation])[related_parameter].sum()
+    elif statistic == "min":
+        series = related_gdf.groupby(by=[code_relation])[related_parameter].min()
+    elif statistic == "max":
+        series = related_gdf.groupby(by=[code_relation])[related_parameter].max()
+    elif statistic == "majority":
+        series = related_gdf.groupby(by=[code_relation])[related_parameter].agg(
+            pd.Series.mode
+        )
+    # join series with gdf
+    series.name = "result"
+    series = pd.DataFrame(series.loc[series.index.isin(gdf["globalid"])]).reset_index()
+    gdf_out = gdf_out.merge(
+        series, how="left", left_on="globalid", right_on=code_relation
+    )
+    # fill series if if provided
+    if fill_value is not None:
+        gdf_out.loc[gdf_out["result"].isna(), "result"] = fill_value
+    return gdf_out["result"]

hydamo_validation-1.3.0b3/hydamo_validation/functions/logic.py ADDED Viewed

@@ -0,0 +1,338 @@
+"""Logic functions to be used in eval-method."""
+import pandas as pd
+def _overlapping_period(row, df, start_date, end_date):
+    _df = df[df.index != row.name]
+    return ~(
+        (row[start_date] <= _df[end_date]) & (row[end_date] >= _df[end_date])
+    ).any()
+def _check_attributes(gdf, attributes):
+    for i in attributes:
+        if type(i) == str:
+            if not i in gdf.columns:
+                raise KeyError(
+                    rf"'{i}' not in columns: {gdf.columns.to_list()}. Rule cannot be executed"
+                )
+def LE(gdf, left, right, dtype=bool):
+    """
+    Evaluate if left is less or equal to/than right
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    dtype : dtype, optional
+        dtype assigned to result Series
+        The default is bool.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [left, right])
+    expression = f"{left} <= {right}".lower()
+    return gdf.eval(expression).astype(dtype)
+def LT(gdf, left, right, dtype=bool):
+    """
+    Evaluate if left is less than right
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    dtype : dtype, optional
+        dtype assigned to result Series
+        The default is bool.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [left, right])
+    expression = f"{left} < {right}".lower()
+    return gdf.eval(expression).astype(dtype)
+def GT(gdf, left, right, dtype=bool):
+    """
+    Evaluate if left is greater than right
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    dtype : dtype, optional
+        dtype assigned to result Series
+        The default is bool.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [left, right])
+    expression = f"{left} > {right}".lower()
+    return gdf.eval(expression).astype(dtype)
+def GE(gdf, left, right, dtype=bool):
+    """Evaluate if left is greater or equal to/than right
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    dtype : dtype, optional
+        dtype assigned to result Series
+        The default is bool.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [left, right])
+    expression = f"{left} >= {right}".lower()
+    return gdf.eval(expression).astype(dtype)
+def EQ(gdf, left, right, dtype=bool):
+    """Evalate if left an right expression are equal
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    left : str, numeric
+        Left column or value in expression
+    right : TYPE
+        Right column or value in expression
+    dtype : dtype, optional
+        dtype assigned to result Series
+        The default is bool.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [left, right])
+    expression = f"{left} == {right}".lower()
+    return gdf.eval(expression).astype(dtype)
+def BE(gdf, parameter, min, max, inclusive=False):
+    """Evaluate if parameter-value is between min/max inclusive (true/false)
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    parameter: str
+        Input column with numeric values
+    min : numeric
+        Lower limit of function
+    max : numeric
+        Upper limit of function
+    inclusive : bool, optional
+        To include min and max
+        The default is False.
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [parameter, min, max])
+    if inclusive:
+        series = GE(gdf, parameter, min, dtype=bool) & LE(
+            gdf, parameter, max, dtype=bool
+        )
+    else:
+        series = GT(gdf, parameter, min, dtype=bool) & LT(
+            gdf, parameter, max, dtype=bool
+        )
+    return series
+def ISIN(gdf, parameter, array):
+    """Evaluate if values in parameter are in array
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    parameter: str
+        Input column with numeric values
+    array : list
+        list of possible values that return True
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [parameter])
+    return gdf[parameter].isin(array)
+def NOTIN(gdf, parameter, array):
+    """Evaluate if values in parameter are not in array
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    parameter: str
+        Input column with numeric values
+    array : list
+        list of possible values that return False
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [parameter])
+    return ~ISIN(gdf, parameter, array)
+def NOTNA(gdf, parameter):
+    """Evaluate if values in parameter ar not NaN or None
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    parameter: str
+        Input column with numeric values
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(gdf, [parameter])
+    return gdf[parameter].notna()
+def join_object_exists(gdf, join_gdf, join_object):
+    """Evaluate if defined related_object id exists in globalid parameter of
+    related object-table.
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    related_gdf : GeoDataFrame
+        Input GeoDataFrame with related objects
+    object: str
+        HyDAMO object name of related object-layer
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    _check_attributes(join_gdf, ["globalid"])
+    _check_attributes(gdf, [f"{join_object}id"])
+    return gdf[f"{join_object}id"].isin(join_gdf["globalid"])
+def consistent_period(
+    gdf,
+    max_gap=1,
+    groupers=["pompid", "regelmiddelid"],
+    priority="prioriteit",
+    start_date="beginperiode",
+    date_format="%d%m",
+    end_date="eindperiode",
+):
+    """Check if a periodic-based table is time-consistent
+    Parameters
+    ----------
+    gdf : GeoDataFrame
+        Input GeoDataFrame
+    max_gap: int
+        max gap in days between too adjacent periods
+    Returns
+    -------
+    result : Series
+        Pandas Series (default dtype = bool)
+    """
+    # create an empty result
+    _gdf = gdf.copy()
+    result = pd.Series(index=_gdf.index)
+    # convert start_parameter and end_parameter to datetime
+    _gdf[start_date] = pd.to_datetime(_gdf[start_date], format=date_format)
+    _gdf[end_date] = pd.to_datetime(_gdf[end_date], format=date_format)
+    index_select = _gdf[start_date] > _gdf[end_date]
+    _gdf.loc[index_select, end_date] = _gdf[index_select][
+        end_date
+    ] + pd.offsets.DateOffset(years=1)
+    for group in groupers:
+        grouper = _gdf.groupby(by=[group, "prioriteit"])
+        for _, df in _gdf.groupby(by=["pompid", "prioriteit"]):
+            df.sort_values(by=start_date, inplace=True)
+            # check for overlap
+            bool_series = df.apply(
+                (lambda x: _overlapping_period(x, df, start_date, end_date)), axis=1
+            )
+            # check for gaps
+            gaps_series = df[start_date] - df.shift(1)[end_date]
+            gaps_series.iloc[0] = pd.Timedelta(days=0)  # due to shift we have NaT here
+            # add to result
+            bool_series = (gaps_series <= pd.Timedelta(days=int(max_gap))) & bool_series
+            bool_series = bool_series[
+                bool_series.index.isin(result[result.isna() | (result == True)].index)
+            ]
+            result.loc[result.index.isin(bool_series.index)] = bool_series
+    return result

hydamo-validation 1.3.0b1__tar.gz → 1.3.0b3__tar.gz

Potentially problematic release.

hydamo-validation 1.3.0b1tar.gz → 1.3.0b3tar.gz