PyPI - pthelma - Versions diffs - 1.1.0__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - Mend

pthelma 1.1.0__cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

Files changed (32) hide show

enhydris_api_client/__init__.py +252 -0
enhydris_cache/__init__.py +5 -0
enhydris_cache/cli.py +150 -0
enhydris_cache/enhydris_cache.py +69 -0
evaporation/__init__.py +4 -0
evaporation/cli.py +729 -0
evaporation/evaporation.py +437 -0
haggregate/__init__.py +2 -0
haggregate/cli.py +91 -0
haggregate/haggregate.py +155 -0
haggregate/regularize.c +15105 -0
haggregate/regularize.cpython-311-x86_64-linux-gnu.so +0 -0
haggregate/regularize.pyx +193 -0
hspatial/__init__.py +4 -0
hspatial/cli.py +310 -0
hspatial/hspatial.py +425 -0
hspatial/test.py +27 -0
htimeseries/__init__.py +2 -0
htimeseries/htimeseries.py +574 -0
htimeseries/timezone_utils.py +44 -0
pthelma/__init__.py +0 -0
pthelma/_version.py +16 -0
pthelma-1.1.0.dist-info/LICENSE.rst +34 -0
pthelma-1.1.0.dist-info/METADATA +51 -0
pthelma-1.1.0.dist-info/RECORD +32 -0
pthelma-1.1.0.dist-info/WHEEL +6 -0
pthelma-1.1.0.dist-info/entry_points.txt +5 -0
pthelma-1.1.0.dist-info/top_level.txt +8 -0
rocc/__init__.py +9 -0
rocc/calculation.c +17231 -0
rocc/calculation.cpython-311-x86_64-linux-gnu.so +0 -0
rocc/calculation.pyx +182 -0

haggregate/regularize.cpython-311-x86_64-linux-gnu.so ADDED Viewed

Binary file

haggregate/regularize.pyx ADDED Viewed

@@ -0,0 +1,193 @@
+# cython: language_level=3, linetrace=True
+# distutils: define_macros=CYTHON_TRACE=1
+import datetime as dt
+cimport numpy as np
+import numpy as np
+import pandas as pd
+from libc.math cimport isnan
+from pandas.tseries.frequencies import to_offset
+from htimeseries import HTimeseries
+from .haggregate import RegularizationMode as RM
+class RegularizeError(Exception):
+    pass
+def regularize(ts, new_date_flag="DATEINSERT", mode=RM.INTERVAL):
+    # Sanity checks
+    if not hasattr(ts, "time_step"):
+        raise RegularizeError("The source time series does not specify a time step")
+    try:
+        pd.to_timedelta(to_offset(ts.time_step))
+    except ValueError:
+        raise RegularizeError(
+            "The time step is malformed or is specified in months. Only time steps "
+            "specified in minutes, hours or days are supported."
+        )
+    # Set metadata of result
+    result = HTimeseries()
+    attrs = (
+        "unit",
+        "timezone",
+        "time_step",
+        "interval_type",
+        "variable",
+        "precision",
+        "location",
+    )
+    for attr in attrs:
+        setattr(result, attr, getattr(ts, attr, None))
+    if hasattr(ts, "title"):
+        result.title = "Regularized " + ts.title
+    if hasattr(ts, "comment"):
+        result.comment = (
+            "Created by regularizing step of timeseries that had this comment:\n\n"
+            + ts.comment
+        )
+    # Return immediately if empty
+    if len(ts.data) == 0:
+        return result
+    # Determine first and last timestamps
+    step = pd.Timedelta(ts.time_step)
+    first_timestamp_of_result = ts.data.index[0].round(step)
+    last_timestamp_of_result = ts.data.index[-1].round(step)
+    # Transform all pandas information to plain numpy, which is way faster and is also
+    # supported by numba and Cython
+    max_flags_length = max(ts.data["flags"].str.len()) + 1 + len(new_date_flag)
+    flags_dtype = "U" + str(max_flags_length)
+    ts_index = ts.data.index.values.astype(long)
+    ts_values = ts.data["value"].values
+    ts_flags = ts.data["flags"].values.astype(flags_dtype)
+    result_step = np.timedelta64(step).astype(int) * 1000
+    result_index = pd.date_range(
+        first_timestamp_of_result, last_timestamp_of_result, freq=ts.time_step
+    ).values
+    result_values = np.full(len(result_index), np.nan, dtype=object)
+    result_flags = np.full(len(result_index), "", dtype=flags_dtype)
+    # Do the job
+    _perform_regularization(
+        result_index,
+        result_values,
+        result_flags,
+        ts_index,
+        ts_values,
+        ts_flags,
+        result_step,
+        new_date_flag,
+        mode.value,
+    )
+    result.data = pd.DataFrame(
+        index=result_index,
+        columns=["value", "flags"],
+        data=np.vstack((result_values, result_flags)).transpose(),
+    ).tz_localize(dt.timezone.utc).tz_convert(first_timestamp_of_result.tz)
+    return result
+def _perform_regularization(
+    np.ndarray result_index,
+    np.ndarray result_values,
+    np.ndarray result_flags,
+    np.ndarray ts_index,
+    np.ndarray ts_values,
+    np.ndarray ts_flags,
+    long result_step,
+    str new_date_flag,
+    int mode,
+):
+    cdef int i, previous_pos
+    cdef long t
+    previous_pos = 0
+    for i in range(result_index.size):
+        t = result_index[i]
+        result_values[i], result_flags[i], previous_pos = _get_record(
+            ts_index,
+            ts_values,
+            ts_flags,
+            t,
+            result_step,
+            new_date_flag,
+            previous_pos,
+            mode,
+        )
+def _get_record(
+    np.ndarray ts_index,
+    np.ndarray ts_values,
+    np.ndarray ts_flags,
+    long t,
+    long result_step,
+    str new_date_flag,
+    int previous_pos,
+    int mode,
+):
+    cdef int i, found, count
+    cdef int nearest_i = -1
+    cdef int INTERVAL = RM.INTERVAL.value
+    cdef int INSTANTANEOUS = RM.INSTANTANEOUS.value
+    # Return the source record if it already exists
+    found = False
+    for i in range(previous_pos, ts_index.size):
+        if ts_index[i] == t and (mode == INTERVAL or not isnan(ts_values[i])):
+            found = True
+            break
+        if ts_index[i] > t:
+            break
+    if found:
+        return ts_values[i], ts_flags[i], i
+    # Otherwise get the nearby record, if it exists
+    start = t - result_step / 2
+    end = t + result_step / 2
+    count = 0
+    for i in range(previous_pos, ts_index.size):
+        ti = ts_index[i]
+        if ti >= start and ti < end and (mode == INTERVAL or not isnan(ts_values[i])):
+            count += 1
+            nearest_i = _get_nearest(nearest_i, i, ts_index, ts_values, t, mode)
+        if ts_index[i] >= end:
+            i -= 1
+            break
+    if count < 1 or (count > 1 and mode == INTERVAL):
+        return np.nan, "", i
+    value = ts_values[nearest_i]
+    flags = ts_flags[nearest_i]
+    if flags:
+        flags += " "
+    flags += new_date_flag
+    return value, flags, i + 1
+def _get_nearest(
+    int previous_nearest_i,
+    int current_i,
+    np.ndarray ts_index,
+    np.ndarray ts_values,
+    long t,
+    int mode,
+):
+    if mode == RM.INTERVAL.value:
+        # In that case it doesn't really matter which is the nearest, so long as it's
+        # only one (which is checked elsewhere), so we return immediately.
+        return current_i
+    if previous_nearest_i < 0:
+        return current_i
+    current_distance = abs(t - ts_index[current_i])
+    previous_distance = abs(t - ts_index[previous_nearest_i])
+    if current_distance < previous_distance:
+        return current_i
+    else:
+        return previous_nearest_i

hspatial/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .hspatial import *  # NOQA
+__author__ = """Antonis Christofides"""
+__email__ = "antonis@antonischristofides.com"

hspatial/cli.py ADDED Viewed

@@ -0,0 +1,310 @@
+import configparser
+import datetime as dt
+import logging
+import os
+import sys
+import traceback
+from glob import glob
+from io import StringIO
+import click
+import iso8601
+from osgeo import gdal, ogr, osr
+from simpletail import ropen
+from hspatial import create_ogr_layer_from_timeseries, h_integrate, idw
+from htimeseries import HTimeseries, TzinfoFromString
+from pthelma._version import __version__
+gdal.UseExceptions()
+class WrongValueError(configparser.Error):
+    pass
+class App:
+    def __init__(self, configfilename):
+        self.configfilename = configfilename
+    def run(self):
+        self.config = AppConfig(self.configfilename)
+        self.config.read()
+        self._setup_logger()
+        self._execute_with_error_handling()
+    def _execute_with_error_handling(self):
+        self.logger.info("Starting spatialize, " + dt.datetime.today().isoformat())
+        try:
+            self._execute()
+        except Exception as e:
+            self.logger.error(str(e))
+            self.logger.debug(traceback.format_exc())
+            self.logger.info(
+                "spatialize terminated with error, " + dt.datetime.today().isoformat()
+            )
+            raise click.ClickException(str(e))
+        else:
+            self.logger.info("Finished spatialize, " + dt.datetime.today().isoformat())
+    def _setup_logger(self):
+        self.logger = logging.getLogger("spatialize")
+        self._set_logger_handler()
+        self.logger.setLevel(self.config.loglevel.upper())
+    def _set_logger_handler(self):
+        if getattr(self.config, "logfile", None):
+            self.logger.addHandler(logging.FileHandler(self.config.logfile))
+        else:
+            self.logger.addHandler(logging.StreamHandler())
+    def _get_last_dates(self, filename, n):
+        """
+        Assuming specified file contains a time series, scan it from the bottom
+        and return the list of the n last dates (may be less than n if the time
+        series is too small). 'filename' is used in error messages.
+        """
+        # Get the time zone
+        with open(filename) as fp:
+            for line in fp:
+                if line.startswith("Timezone") or (line and line[0] in "0123456789"):
+                    break
+        if not line.startswith("Timezone"):
+            raise click.ClickException("{} does not contain Timezone".format(filename))
+        zonestr = line.partition("=")[2].strip()
+        timezone = TzinfoFromString(zonestr)
+        result = []
+        previous_line_was_empty = False
+        with ropen(filename) as fp:
+            for i, line in enumerate(fp):
+                if i >= n:
+                    break
+                line = line.strip()
+                # Ignore empty lines
+                if not line:
+                    previous_line_was_empty = True
+                    continue
+                # Is the line in the form of an ini file configuration line?
+                items = line.split("=")
+                if len(items) and ("," not in items[0]) and previous_line_was_empty:
+                    break  # Yes; we reached the start of the file
+                previous_line_was_empty = False
+                datestring = line.split(",")[0]
+                try:
+                    result.insert(
+                        0, iso8601.parse_date(datestring, default_timezone=timezone)
+                    )
+                except iso8601.ParseError as e:
+                    raise iso8601.ParseError(
+                        str(e)
+                        + " (file {}, {} lines from the end)".format(filename, i + 1)
+                    )
+        return result
+    @property
+    def _dates_to_calculate(self):
+        """
+        Generator that yields the dates for which h_integrate should be run;
+        this is the latest list of dates such that:
+        * At least one of the time series has data
+        * The length of the list is the 'number_of_output_files' configuration
+          option (maybe less if the time series don't have enough data yet).
+        """
+        n = self.config.number_of_output_files
+        dates = set()
+        for filename in self.config.files:
+            dates |= set(self._get_last_dates(filename, n))
+        dates = list(dates)
+        dates.sort()
+        dates = dates[-n:]
+        for d in dates:
+            yield d
+    @property
+    def _time_step(self):
+        """
+        Return time step of all time series. If time step is not the same
+        for all time series, raises exception.
+        """
+        time_step = None
+        for filename in self.config.files:
+            with open(filename, newline="\n") as f:
+                t = HTimeseries(f, start_date="0001-01-01 00:00")
+            item_time_step = t.time_step
+            if time_step and (item_time_step != time_step):
+                raise click.ClickException("Not all time series have the same step")
+            time_step = item_time_step
+        return time_step
+    @property
+    def _date_fmt(self):
+        """
+        Determine date_fmt based on time series time step.
+        """
+        if self._time_step.endswith("min") or self._time_step.endswith("H"):
+            return "%Y-%m-%d %H:%M%z"
+        elif self._time_step.endswith("D"):
+            return "%Y-%m-%d"
+        elif self._time_step.endswith("M"):
+            return "%Y-%m"
+        elif self._time_step.endswith("Y"):
+            return "%Y"
+        raise click.ClickException("Can't use time step " + str(self._time_step))
+    def _delete_obsolete_files(self):
+        """
+        Delete all tif files produced in the past except the last N,
+        where N is the 'number_of_output_files' configuration option.
+        """
+        pattern = os.path.join(
+            self.config.output_dir, "{}-*.tif".format(self.config.filename_prefix)
+        )
+        files = glob(pattern)
+        files.sort()
+        for filename in files[: -self.config.number_of_output_files]:
+            os.remove(filename)
+    def _execute(self):
+        # Create stations layer
+        stations = ogr.GetDriverByName("memory").CreateDataSource("stations")
+        stations_layer = create_ogr_layer_from_timeseries(
+            self.config.files, self.config.epsg, stations
+        )
+        # Get mask
+        mask = gdal.Open(self.config.mask)
+        # Setup integration method
+        if self.config.method == "idw":
+            funct = idw
+            kwargs = {"alpha": self.config.alpha}
+        else:
+            assert False
+        for date in self._dates_to_calculate:
+            self.logger.info("Processing date " + date.isoformat())
+            h_integrate(
+                mask,
+                stations_layer,
+                date,
+                os.path.join(self.config.output_dir, self.config.filename_prefix),
+                self._date_fmt,
+                funct,
+                kwargs,
+            )
+        self._delete_obsolete_files()
+class AppConfig:
+    config_file_options = {
+        "logfile": {"fallback": ""},
+        "loglevel": {"fallback": "warning"},
+        "mask": {},
+        "epsg": {},
+        "output_dir": {},
+        "filename_prefix": {},
+        "number_of_output_files": {},
+        "method": {},
+        "alpha": {"fallback": "1"},
+        "files": {},
+    }
+    def __init__(self, configfilename):
+        self.configfilename = configfilename
+    def read(self):
+        try:
+            self._parse_config()
+        except (OSError, configparser.Error) as e:
+            sys.stderr.write(str(e))
+            raise click.ClickException(str(e))
+    def _parse_config(self):
+        self._read_config_file()
+        self._get_config_options()
+        self._parse_config_options()
+    def _read_config_file(self):
+        self.config = configparser.ConfigParser(interpolation=None)
+        try:
+            self._read_config_file_assuming_it_has_section_headers()
+        except configparser.MissingSectionHeaderError:
+            self._read_config_file_without_sections()
+    def _read_config_file_assuming_it_has_section_headers(self):
+        with open(self.configfilename) as f:
+            self.config.read_file(f)
+    def _read_config_file_without_sections(self):
+        with open(self.configfilename) as f:
+            configuration = "[General]\n" + f.read()
+        self.config.read_file(StringIO(configuration))
+    def _get_config_options(self):
+        self.options = {
+            opt: self.config.get("General", opt, **kwargs)
+            for opt, kwargs in self.config_file_options.items()
+        }
+        for key, value in self.options.items():
+            setattr(self, key, value)
+    def _parse_config_options(self):
+        self._parse_log_level()
+        self._parse_files()
+        self._check_method()
+        self._parse_epsg()
+        self._parse_number_of_output_files()
+    def _parse_log_level(self):
+        log_levels = ("ERROR", "WARNING", "INFO", "DEBUG")
+        self.loglevel = self.loglevel.upper()
+        if self.loglevel not in log_levels:
+            raise WrongValueError("loglevel must be one of " + ", ".join(log_levels))
+    def _parse_files(self):
+        self.files = self.files.split("\n")
+    def _check_method(self):
+        # Check method
+        if self.method != "idw":
+            raise WrongValueError('Option "method" can currently only be idw')
+        # Check alpha
+        try:
+            self.alpha = float(self.alpha)
+        except ValueError:
+            raise WrongValueError('Option "alpha" must be a number')
+    def _parse_epsg(self):
+        try:
+            self.epsg = int(self.epsg)
+        except ValueError:
+            raise WrongValueError('Option "epsg" must be an integer')
+        srs = osr.SpatialReference()
+        result = srs.ImportFromEPSG(self.epsg)
+        if result:
+            raise WrongValueError(
+                "An error occurred when trying to use epsg={}".format(self.epsg)
+            )
+    def _parse_number_of_output_files(self):
+        try:
+            self.number_of_output_files = int(self.number_of_output_files)
+        except ValueError:
+            raise WrongValueError('Option "number_of_output_files" must be an integer')
+@click.command()
+@click.argument("configfile")
+@click.version_option(
+    version=__version__, message="%(prog)s from pthelma v.%(version)s"
+)
+def main(configfile):
+    """Spatial integration"""
+    app = App(configfile)
+    app.run()