PyPI - gensor - Versions diffs - 0.0.1__tar.gz → 0.0.3__tar.gz - Mend

gensor 0.0.1tar.gz → 0.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{gensor-0.0.1 → gensor-0.0.3}/PKG-INFO RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.1
 Name: gensor
-Version: 0.0.1
+Version: 0.0.3
 Summary: Library for handling groundwater sensor data.
 Home-page: https://github.com/zawadzkim/gensor
 Author: Mateusz Zawadzki
-Author-email: fzawadzkimat@outlook.com
+Author-email: zawadzkimat@outlook.com
 Requires-Python: >=3.11
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.11

{gensor-0.0.1 → gensor-0.0.3}/gensor/compensation.py RENAMED Viewed

@@ -20,8 +20,9 @@ Functions:
     compensate: Compensate raw sensor pressure measurement with barometric pressure.
 """
-from typing import Self
+from typing import Any
+import pandas as pd
 import pydantic as pyd
 from .dtypes import Timeseries
@@ -32,32 +33,35 @@ from .exceptions import (
 class Compensator(pyd.BaseModel):
+    """Compensate raw sensor pressure measurement with barometric pressure.
+    Attributes:
+        ts (Timeseries): Raw sensor timeseries
+        barometric (Timeseries | float): Barometric pressure timeseries or a single
+            float value. If a float value is provided, it is assumed to be in cmH2O.
+        drop_low_wc (bool): Whether to drop records where the absolute water column is
+            less than or equal to the cutoff value. Defaults to True.
+    """
     ts: Timeseries
     barometric: Timeseries | float
     drop_low_wc: bool = True
     @pyd.field_validator("ts", "barometric", mode="before")
-    def validate_timeseries_type(cls, v):
+    def validate_timeseries_type(cls, v: Timeseries) -> Timeseries:
         if isinstance(v, Timeseries) and v.variable != "pressure":
-            raise InvalidMeasurementTypeError(v.location)
+            raise InvalidMeasurementTypeError()
         return v
     @pyd.field_validator("ts")
-    def validate_sensor_information(cls, v: Timeseries):
+    def validate_sensor_information(cls, v: Timeseries) -> Timeseries:
         if v.sensor is not None and not v.sensor_alt:
             raise MissingInputError("sensor_alt")
         return v
-    def compensate(self, **kwargs) -> Self | None:
-        """Compensate raw sensor pressure measurement with barometric pressure.
-        Parameters:
-            ts (Timeseries): Raw sensor timeseries
-            barometric (Timeseries or float): Barometric pressure timeseries or a single
-                float value. If a float value is provided, it is assumed to be in cmH2O.
-            drop_low_wc (bool): Whether to drop records where the absolute water column is
-                less than or equal to the cutoff value. Defaults to True.
-            inplace (bool): Whether to update the timeseries in place. Defaults to True.
+    def compensate(self, **kwargs: Any) -> Timeseries | None:
+        """Perform compensation.
         Keyword Arguments:
             alignment_period (str): The alignment period for the timeseries.
@@ -72,16 +76,17 @@ class Compensator(pyd.BaseModel):
         alignment_period = kwargs.get("alignment_period", "h")
         threshold_wc = kwargs.get("threshold_wc", 0.5)
         resample_params = {"freq": alignment_period, "agg_func": "mean"}
+        resampled_ts = self.ts.resample(**resample_params)
         if isinstance(self.barometric, Timeseries):
             if self.ts == self.barometric:
                 print("Skipping compensation: both timeseries are the same.")
                 return None
             baro = self.barometric.resample(**resample_params).ts
-        else:
-            baro = self.barometric
-        resampled_ts = self.ts.resample(**resample_params)
+        elif isinstance(self.barometric, float):
+            baro = pd.Series(
+                [self.barometric] * len(resampled_ts.ts), index=resampled_ts.ts.index
+            )
         # dividing by 100 to convert water column from cmH2O to mH2O
         watercolumn_ts = resampled_ts.ts.sub(baro).divide(100).dropna()
@@ -94,9 +99,9 @@ class Compensator(pyd.BaseModel):
                 f"{len(watercolumn_ts) - len(watercolumn_ts_filtered)} records \
                     dropped due to low water column."
             )
-            gwl = watercolumn_ts_filtered.add(float(resampled_ts.sensor_alt))
+            gwl = watercolumn_ts_filtered.add(float(resampled_ts.sensor_alt or 0))
         else:
-            gwl = watercolumn_ts.add(float(resampled_ts.sensor_alt))
+            gwl = watercolumn_ts.add(float(resampled_ts.sensor_alt or 0))
         compensated = resampled_ts.model_copy(
             update={"ts": gwl, "unit": "m asl", "variable": "head"}
@@ -105,6 +110,20 @@ class Compensator(pyd.BaseModel):
         return compensated
-def compensate(ts, barometric, drop_low_wc, **kwargs) -> Timeseries:
+def compensate(
+    ts: Timeseries,
+    barometric: Timeseries | float,
+    drop_low_wc: bool,
+    **kwargs: Any,
+) -> Timeseries | None:
+    """Constructor for the Comensate class object.
+    Parameters:
+        ts (Timeseries): Raw sensor timeseries
+        barometric (Timeseries | float): Barometric pressure timeseries or a single
+            float value. If a float value is provided, it is assumed to be in cmH2O.
+        drop_low_wc (bool): Whether to drop records where the absolute water column is
+            less than or equal to the cutoff value. Defaults to True.
+    """
     comp = Compensator(ts=ts, barometric=barometric, drop_low_wc=drop_low_wc)
     return comp.compensate(**kwargs)

gensor-0.0.3/gensor/db/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""
+# DB
+Module handling database connection in case saving and loading from SQLite database is
+used.
+Modules:
+    connection.py
+"""
+from .connection import DatabaseConnection
+__all__ = ["DatabaseConnection"]

{gensor-0.0.1 → gensor-0.0.3}/gensor/db/connection.py RENAMED Viewed

@@ -1,4 +1,8 @@
-"""Module for database connection."""
+"""Module defining database connection object.
+Classes:
+    DatabaseConnection: Database connection object
+"""
 from pathlib import Path
@@ -10,7 +14,7 @@ from ..exceptions import DatabaseNotFound
 class DatabaseConnection(pyd.BaseModel):
-    """Class for handling the database connection.
+    """Database connection object.
     If no database exists at the specified path, it will be created.
     If no database is specified, an in-memory database will be used.

{gensor-0.0.1 → gensor-0.0.3}/gensor/dtypes.py RENAMED Viewed

@@ -18,6 +18,15 @@ ts_schema = pa.SeriesSchema(
     coerce=True,
 )
+VARIABLE_TYPES_AND_UNITS = {
+    "temperature": ["degC"],
+    "pressure": ["cmH2O", "mmH2O"],
+    "conductivity": ["mS/cm"],
+    "flux": ["m/s"],
+    "head": ["m asl"],
+    "depth": ["m"],
+}
 class Timeseries(pyd.BaseModel):
     """Timeseries from a sensor including measurement metadata.
@@ -215,8 +224,14 @@ class Timeseries(pyd.BaseModel):
             str: A message indicating the number of rows inserted into the database.
         """
         schema_name = f"{self.location}_{self.sensor}_{self.variable}_{self.unit}"
-        con = db.engine.connect()
-        self.ts.to_sql(name=schema_name, con=con, if_exists="append", index=False)
+        if db.engine is not None:
+            with db.engine.connect() as con:
+                self.ts.to_sql(
+                    name=schema_name, con=con, if_exists="append", index=False
+                )
+        else:
+            message = "Database engine is not initialized."
+            raise ValueError(message)
         return f"{schema_name} table updated."
@@ -293,7 +308,7 @@ class Dataset(pyd.BaseModel):
     def __repr__(self) -> str:
         return f"Dataset({len(self)})"
-    def __getitem__(self, index: int) -> Timeseries:
+    def __getitem__(self, index: int) -> Timeseries | None:
         """Retrieve a Timeseries object by its index in the dataset.
         Parameters:
@@ -310,11 +325,11 @@ class Dataset(pyd.BaseModel):
         except IndexError:
             raise IndexOutOfRangeError(index, len(self)) from None
-    def get_stations(self):
+    def get_stations(self) -> list:
         """List all unique locations in the dataset."""
         return [ts.location for ts in self.timeseries if ts is not None]
-    def add(self, other: Timeseries):
+    def add(self, other: Timeseries | list[Timeseries]) -> None:
         """Appends a new series to the Dataset or merges series if an equal
         one exists.
@@ -331,7 +346,9 @@ class Dataset(pyd.BaseModel):
         else:
             self._add_single_timeseries(other)
-    def _add_single_timeseries(self, ts: Timeseries):
+        return
+    def _add_single_timeseries(self, ts: Timeseries) -> None:
         """Adds a single Timeseries to the Dataset or merges if an equal one exists."""
         for i, existing_ts in enumerate(self.timeseries):
             if existing_ts == ts:
@@ -340,6 +357,8 @@ class Dataset(pyd.BaseModel):
         self.timeseries.append(ts)
+        return
     def filter(
         self,
         station: str | None = None,
@@ -358,9 +377,11 @@ class Dataset(pyd.BaseModel):
             Timeseries or Dataset: A single Timeseries if exactly one match is found,
                                    or a new Dataset if multiple matches are found.
         """
         matching_timeseries = [
             ts
             for ts in self.timeseries
+            if ts is not None
             if (station is None or ts.location == station)
             and (sensor is None or ts.sensor == sensor)
             and (variable is None or ts.variable == variable)

{gensor-0.0.1 → gensor-0.0.3}/gensor/exceptions.py RENAMED Viewed

@@ -1,10 +1,9 @@
 class InvalidMeasurementTypeError(ValueError):
     """Raised when a timeseries of a wrong measurement type is operated upon."""
-    def __init__(self, timeseries_name: str, expected_type: str = "pressure") -> None:
-        self.timeseries_name = timeseries_name
+    def __init__(self, expected_type: str = "pressure") -> None:
         self.expected_type = expected_type
-        message = f"Timeseries '{self.timeseries_name}' must be of measurement type '{self.expected_type}'."
+        message = f"Timeseries must be of measurement type '{self.expected_type}'."
         super().__init__(message)

{gensor-0.0.1 → gensor-0.0.3}/gensor/getters.py RENAMED Viewed

@@ -1,9 +1,12 @@
-"""Fetching the data from various sources."""
+"""Fetching the data from various sources.
+TODO: Fix up the read_from_sql() function to actually work properly.
+"""
 from pathlib import Path
-from typing import Literal
+from typing import Any, Literal
-from pandas import read_sql
+from pandas import Series, read_sql
 from sqlalchemy import MetaData, Table, select
 from .db.connection import DatabaseConnection
@@ -12,7 +15,9 @@ from .exceptions import NoFilesToLoad
 from .parse import parse_vanessen_csv
-def read_from_csv(path: Path, file_format: Literal["vanessen"] = "vanessen", **kwargs):
+def read_from_csv(
+    path: Path, file_format: Literal["vanessen"] = "vanessen", **kwargs: Any
+) -> Dataset:
     """Loads the data from the Van Essen CSV file(s) and returns a list of Timeseries objects.
     Args:
@@ -42,7 +47,7 @@ def read_from_csv(path: Path, file_format: Literal["vanessen"] = "vanessen", **k
     ds = Dataset()
     for f in files:
         print(f"Loading file: {f}")
-        ts_in_file: list = parser(f, **kwargs)
+        ts_in_file = parser(f, **kwargs)
         ds.add(ts_in_file)
     return ds
@@ -65,10 +70,21 @@ def read_from_sql(
     schema = Table(f"{location}_{sensor}_{variable}", metadata)
     query = select(schema)
-    df = read_sql(query, con=db.engine)
+    if db.engine:
+        with db.engine.connect() as con:
+            df = read_sql(query, con=con, index_col="timestamp")
+    if not isinstance(df, Series):
+        raise TypeError
     ts_object = Timeseries(
-        timeseries=df, variable=variable, location=location, sensor=sensor, unit=unit
+        ts=df,
+        # Validation done in Pydantic
+        variable=variable,
+        location=location,
+        sensor=sensor,
+        # Validation done in Pydantic
+        unit=unit,
     )
     return ts_object

{gensor-0.0.1 → gensor-0.0.3}/gensor/parse/vanessen.py RENAMED Viewed

@@ -9,7 +9,7 @@ import chardet
 import pytz
 from pandas import DataFrame, read_csv, to_datetime
-from ..dtypes import Timeseries
+from ..dtypes import VARIABLE_TYPES_AND_UNITS, Timeseries
 def detect_encoding(path: Path, num_bytes: int = 1024) -> str:
@@ -51,7 +51,7 @@ def handle_timestamps(df: DataFrame, tz: str) -> DataFrame:
     return df
-def parse_vanessen_csv(path: Path, **kwargs) -> list[Any]:
+def parse_vanessen_csv(path: Path, **kwargs: Any) -> list[Timeseries]:
     """Parses a van Essen csv file and returns a list of Timeseries objects. At this point it
     does not matter whether the file is a barometric or piezometric logger file.
@@ -60,12 +60,17 @@ def parse_vanessen_csv(path: Path, **kwargs) -> list[Any]:
     are not working (whihc most likely will be the case), the user should provide their own patterns. The patterns
     can be provided as keyword arguments to the function and it is possible to use OR (|) in the regex pattern.
-    Args:
+    !!! warning
+        A better check for the variable type and units has to be implemented.
+    Parameters:
         path (Path): The path to the file.
-        **kwargs (dict): Optional keyword arguments to specify the regex patterns for the serial number and station.
-            serial_number_pattern (str): The regex pattern to extract the serial number from the file.
-            location_pattern (str): The regex pattern to extract the station from the file.
-            col_names (list): The column names for the dataframe.
+    Other Parameters:
+        serial_number_pattern (str): The regex pattern to extract the serial number from the file.
+        location_pattern (str): The regex pattern to extract the station from the file.
+        col_names (list): The column names for the dataframe.
     Returns:
         list: A list of Timeseries objects.
@@ -86,7 +91,11 @@ def parse_vanessen_csv(path: Path, **kwargs) -> list[Any]:
         text = f.read()
         try:
-            data = {k: re.search(v, text).group() for k, v in data.items()}
+            data = {
+                k: (match.group() if (match := re.search(v, text)) else None)
+                for k, v in data.items()
+            }
         except AttributeError:
             print(
                 f"Skipping file {path} due to missing patterns. If this is not expected, please provide the correct patterns."
@@ -104,22 +113,33 @@ def parse_vanessen_csv(path: Path, **kwargs) -> list[Any]:
         df = read_csv(
             data_io, skiprows=1, header=None, names=column_names, index_col="timestamp"
         )
-        timezone_match = re.search(
-            kwargs.get("timezone_pattern", r"UTC[+-]?\d+"), text
-        ).group()
-        df = handle_timestamps(df, timezone_match)
-        ts_list = [
-            Timeseries(
-                ts=df[col],
-                variable=col,
-                location=data.get("location"),
-                sensor=data.get("sensor"),
-                unit="cmH2O" if col == "pressure" else "degC",
-            )
-            for col in df.columns
-        ]
+        timezone_pattern = kwargs.get("timezone_pattern", r"UTC[+-]?\d+")
+        timezone_match = re.search(timezone_pattern, text)
+        timezone = timezone_match.group() if timezone_match else "UTC"
+        df = handle_timestamps(df, timezone)
+        ts_list = []
+        for col in df.columns:
+            if col in VARIABLE_TYPES_AND_UNITS:
+                unit = VARIABLE_TYPES_AND_UNITS[col][0]
+                ts_list.append(
+                    Timeseries(
+                        ts=df[col],
+                        # Validation will be done in Pydantic
+                        variable=col,  # type: ignore[arg-type]
+                        location=data.get("location"),
+                        sensor=data.get("sensor"),
+                        # Validation will be done in Pydantic
+                        unit=unit,  # type: ignore[arg-type]
+                    )
+                )
+            else:
+                message = (
+                    "Unsupported variable: {col}. Please provide a valid variable type."
+                )
+                raise ValueError(message)
     return ts_list

{gensor-0.0.1 → gensor-0.0.3}/gensor/preprocessing.py RENAMED Viewed

@@ -58,7 +58,7 @@ class Transform:
     def difference(self, **kwargs: int) -> tuple[Series, str]:
         """Difference the time series data.
-        Args:
+        Keword Arguments:
             periods (int): The number of periods to shift. Defaults to 1.
         Returns:
@@ -90,7 +90,7 @@ class Transform:
         """Apply the Box-Cox transformation to the time series data. Only works
             for all positive datasets!
-        Args:
+        Keyword Arguments:
             lmbda (float): The transformation parameter. Defaults to 0.
         Returns:

{gensor-0.0.1 → gensor-0.0.3}/gensor/trend.py RENAMED Viewed

@@ -6,7 +6,7 @@ from matplotlib import pyplot as plt
 from .dtypes import Timeseries
-def trend_analysis(ts: Timeseries, plot=True) -> None:
+def trend_analysis(ts: Timeseries, plot: bool = True) -> None:
     time_numeric = np.arange(len(ts.timeseries))
     # Perform linear regression using numpy's polyfit

{gensor-0.0.1 → gensor-0.0.3}/pyproject.toml RENAMED Viewed

@@ -1,8 +1,8 @@
 [tool.poetry]
 name = "gensor"
-version = "v0.0.1"
+version = "0.0.3"
 description = "Library for handling groundwater sensor data."
-authors = ["Mateusz Zawadzki <fzawadzkimat@outlook.com>"]
+authors = ["Mateusz Zawadzki <zawadzkimat@outlook.com>"]
 repository = "https://github.com/zawadzkim/gensor"
 documentation = "https://zawadzkim.github.io/gensor/"
 readme = "README.md"
@@ -38,6 +38,7 @@ pandas-stubs = "^2.2.2.240807"
 mkdocs = "^1.6.1"
 mkdocs-material = "^9.5.34"
 mkdocstrings-python = "^1.11.1"
+mkdocs-autorefs = "^1.2.0"
 [build-system]
 requires = ["poetry-core>=1.0.0"]

gensor-0.0.1/gensor/db/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .connection import DatabaseConnection
-__all__ = ["DatabaseConnection"]

{gensor-0.0.1 → gensor-0.0.3}/LICENSE RENAMED Viewed

File without changes

{gensor-0.0.1 → gensor-0.0.3}/README.md RENAMED Viewed

File without changes

{gensor-0.0.1 → gensor-0.0.3}/gensor/__init__.py RENAMED Viewed

File without changes

{gensor-0.0.1 → gensor-0.0.3}/gensor/parse/__init__.py RENAMED Viewed

File without changes

{gensor-0.0.1 → gensor-0.0.3}/gensor/smoothing.py RENAMED Viewed

File without changes

{gensor-0.0.1 → gensor-0.0.3}/py.typed RENAMED Viewed

File without changes

gensor 0.0.1__tar.gz → 0.0.3__tar.gz

gensor 0.0.1tar.gz → 0.0.3tar.gz