gensor 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gensor/getters.py ADDED
@@ -0,0 +1,79 @@
1
+ """Fetching the data from various sources."""
2
+
3
+ from pathlib import Path
4
+ from typing import Literal
5
+
6
+ from pandas import read_sql
7
+ from sqlalchemy import MetaData, Table, select
8
+
9
+ from .db.connection import DatabaseConnection
10
+ from .dtypes import Dataset, Timeseries
11
+ from .exceptions import NoFilesToLoad
12
+ from .parse import parse_vanessen_csv
13
+
14
+
15
+ def read_from_csv(path: Path, file_format: Literal["vanessen"] = "vanessen", **kwargs):
16
+ """Loads the data from the Van Essen CSV file(s) and returns a list of Timeseries objects.
17
+
18
+ Args:
19
+ path (Path): The path to the file or directory containing the files.
20
+ **kwargs (dict): Optional keyword arguments passed to `parse_vanessen_csv()` to specify the regex patterns for the serial number and station.
21
+ serial_number_pattern (str): The regex pattern to extract the serial number from the file.
22
+ location_pattern (str): The regex pattern to extract the station from the file.
23
+ col_names (list): The column names for the dataframe.
24
+ """
25
+
26
+ parsers = {
27
+ "vanessen": parse_vanessen_csv,
28
+ }
29
+
30
+ if not isinstance(path, Path):
31
+ message = "The path argument must be a Path object."
32
+ raise TypeError(message)
33
+
34
+ if path.is_dir() and not any(path.iterdir()):
35
+ raise NoFilesToLoad()
36
+
37
+ files = (
38
+ [file for file in path.iterdir() if file.is_file()] if path.is_dir() else [path]
39
+ )
40
+
41
+ parser = parsers[file_format]
42
+ ds = Dataset()
43
+ for f in files:
44
+ print(f"Loading file: {f}")
45
+ ts_in_file: list = parser(f, **kwargs)
46
+ ds.add(ts_in_file)
47
+
48
+ return ds
49
+
50
+
51
+ def read_from_sql(
52
+ db: DatabaseConnection, location: str, sensor: str, variable: str, unit: str
53
+ ) -> list[Timeseries]:
54
+ """Returns the timeseries from a sql database.
55
+
56
+ Parameters:
57
+ db (DatabaseConnection): The database connection object
58
+ location (str): The station name
59
+ sensor (str): Sensor name
60
+ variable (str): The measurement type
61
+ unit (str): Unit of the measurement
62
+
63
+ """
64
+ metadata = MetaData()
65
+ schema = Table(f"{location}_{sensor}_{variable}", metadata)
66
+
67
+ query = select(schema)
68
+ df = read_sql(query, con=db.engine)
69
+
70
+ ts_object = Timeseries(
71
+ timeseries=df, variable=variable, location=location, sensor=sensor, unit=unit
72
+ )
73
+
74
+ return ts_object
75
+
76
+
77
+ def read_from_api() -> Dataset:
78
+ """Fetch data from the API."""
79
+ return NotImplemented
@@ -0,0 +1,3 @@
1
+ from .vanessen import parse_vanessen_csv
2
+
3
+ __all__ = ["parse_vanessen_csv"]
@@ -0,0 +1,125 @@
1
+ """Logic parsing CSV files from van Essen Instruments Divers."""
2
+
3
+ import re
4
+ from io import StringIO
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import chardet
9
+ import pytz
10
+ from pandas import DataFrame, read_csv, to_datetime
11
+
12
+ from ..dtypes import Timeseries
13
+
14
+
15
+ def detect_encoding(path: Path, num_bytes: int = 1024) -> str:
16
+ """Detect the encoding of a file using chardet.
17
+
18
+ Args:
19
+ path (Path): The path to the file.
20
+ num_bytes (int): Number of bytes to read for encoding detection (default is 1024).
21
+
22
+ Returns:
23
+ str: The detected encoding of the file.
24
+ """
25
+ with path.open("rb") as f:
26
+ raw_data = f.read(num_bytes)
27
+ result = chardet.detect(raw_data)
28
+ return result["encoding"] or "utf-8"
29
+
30
+
31
+ def handle_timestamps(df: DataFrame, tz: str) -> DataFrame:
32
+ """Converts the timestamps in the dataframe to the specified timezone.
33
+
34
+ The timezone is obtained from the file metadata. If the timezone is UTC, the offset is extracted
35
+ and the timestamps are converted to the corresponding timezone. If the timezone is not UTC, the
36
+ timestamps are converted to UTC and then to the specified timezone.
37
+
38
+ Args:
39
+ df (pd.DataFrame): The dataframe with the data.
40
+ tz (str): The timezone string obtained from the file metadata.
41
+ """
42
+
43
+ if tz.startswith("UTC"):
44
+ offset_hours = int(tz[3:])
45
+ timezone = pytz.FixedOffset(offset_hours * 60)
46
+ else:
47
+ timezone = pytz.UTC
48
+
49
+ df.index = to_datetime(df.index).tz_localize("UTC").tz_convert(timezone)
50
+
51
+ return df
52
+
53
+
54
+ def parse_vanessen_csv(path: Path, **kwargs) -> list[Any]:
55
+ """Parses a van Essen csv file and returns a list of Timeseries objects. At this point it
56
+ does not matter whether the file is a barometric or piezometric logger file.
57
+
58
+ The function will use regex patterns to extract the serial number and station from the file. It is
59
+ important to use the appropriate regex patterns, particularily for the station. If the default patterns
60
+ are not working (whihc most likely will be the case), the user should provide their own patterns. The patterns
61
+ can be provided as keyword arguments to the function and it is possible to use OR (|) in the regex pattern.
62
+
63
+ Args:
64
+ path (Path): The path to the file.
65
+ **kwargs (dict): Optional keyword arguments to specify the regex patterns for the serial number and station.
66
+ serial_number_pattern (str): The regex pattern to extract the serial number from the file.
67
+ location_pattern (str): The regex pattern to extract the station from the file.
68
+ col_names (list): The column names for the dataframe.
69
+
70
+ Returns:
71
+ list: A list of Timeseries objects.
72
+ """
73
+
74
+ data = {
75
+ "sensor": kwargs.get("serial_number_pattern", r"[A-Za-z]{2}\d{3,4}"),
76
+ "location": kwargs.get(
77
+ "location_pattern", r"[A-Za-z]{2}\d{2}[A-Za-z]{1}|Barodiver"
78
+ ),
79
+ }
80
+
81
+ column_names = kwargs.get("col_names", ["timestamp", "pressure", "temperature"])
82
+
83
+ encoding = detect_encoding(path, num_bytes=10_000)
84
+
85
+ with path.open(mode="r", encoding=encoding) as f:
86
+ text = f.read()
87
+
88
+ try:
89
+ data = {k: re.search(v, text).group() for k, v in data.items()}
90
+ except AttributeError:
91
+ print(
92
+ f"Skipping file {path} due to missing patterns. If this is not expected, please provide the correct patterns."
93
+ )
94
+ return []
95
+
96
+ data_io = StringIO(
97
+ text[
98
+ text.index("Date/time") : text.index(
99
+ "END OF DATA FILE OF DATALOGGER FOR WINDOWS"
100
+ )
101
+ ]
102
+ )
103
+
104
+ df = read_csv(
105
+ data_io, skiprows=1, header=None, names=column_names, index_col="timestamp"
106
+ )
107
+
108
+ timezone_match = re.search(
109
+ kwargs.get("timezone_pattern", r"UTC[+-]?\d+"), text
110
+ ).group()
111
+
112
+ df = handle_timestamps(df, timezone_match)
113
+
114
+ ts_list = [
115
+ Timeseries(
116
+ ts=df[col],
117
+ variable=col,
118
+ location=data.get("location"),
119
+ sensor=data.get("sensor"),
120
+ unit="cmH2O" if col == "pressure" else "degC",
121
+ )
122
+ for col in df.columns
123
+ ]
124
+
125
+ return ts_list
@@ -0,0 +1,280 @@
1
+ """Class and methods for preprocessing groundwater level data."""
2
+
3
+ from typing import Any, Literal
4
+
5
+ import numpy as np
6
+ from pandas import Series
7
+ from scipy import stats
8
+ from sklearn.ensemble import IsolationForest
9
+ from sklearn.neighbors import LocalOutlierFactor
10
+ from sklearn.preprocessing import (
11
+ MaxAbsScaler,
12
+ MinMaxScaler,
13
+ RobustScaler,
14
+ StandardScaler,
15
+ )
16
+
17
+
18
+ class Transform:
19
+ def __init__(
20
+ self,
21
+ data: Series,
22
+ method: Literal[
23
+ "difference",
24
+ "log",
25
+ "square_root",
26
+ "box_cox",
27
+ "standard_scaler",
28
+ "minmax_scaler",
29
+ "robust_scaler",
30
+ "maxabs_scaler",
31
+ ],
32
+ **kwargs: Any,
33
+ ) -> None:
34
+ self.data = data
35
+
36
+ if method == "difference":
37
+ self.transformed_data, self.scaler = self.difference(**kwargs)
38
+ elif method == "log":
39
+ self.transformed_data, self.scaler = self.log()
40
+ elif method == "square_root":
41
+ self.transformed_data, self.scaler = self.square_root()
42
+ elif method == "box_cox":
43
+ self.transformed_data, self.scaler = self.box_cox(**kwargs)
44
+ elif method == "standard_scaler":
45
+ self.transformed_data, self.scaler = self.standard_scaler()
46
+ elif method == "minmax_scaler":
47
+ self.transformed_data, self.scaler = self.minmax_scaler()
48
+ elif method == "robust_scaler":
49
+ self.transformed_data, self.scaler = self.robust_scaler()
50
+ elif method == "maxabs_scaler":
51
+ self.transformed_data, self.scaler = self.maxabs_scaler()
52
+ else:
53
+ raise NotImplementedError()
54
+
55
+ def get_transformation(self) -> tuple:
56
+ return self.transformed_data, self.scaler
57
+
58
+ def difference(self, **kwargs: int) -> tuple[Series, str]:
59
+ """Difference the time series data.
60
+
61
+ Args:
62
+ periods (int): The number of periods to shift. Defaults to 1.
63
+
64
+ Returns:
65
+ pandas.Series: The differenced time series data.
66
+ """
67
+ periods = kwargs.get("periods", 1)
68
+ transformed = self.data.diff(periods=periods).dropna()
69
+ return (transformed, "difference")
70
+
71
+ def log(self) -> tuple[Series, str]:
72
+ """Take the natural logarithm of the time series data.
73
+
74
+ Returns:
75
+ pandas.Series: The natural logarithm of the time series data.
76
+ """
77
+ transformed = self.data.apply(lambda x: x if x <= 0 else np.log(x))
78
+ return (transformed, "log")
79
+
80
+ def square_root(self) -> tuple[Series, str]:
81
+ """Take the square root of the time series data.
82
+
83
+ Returns:
84
+ pandas.Series: The square root of the time series data.
85
+ """
86
+ transformed = self.data.apply(lambda x: x if x <= 0 else np.sqrt(x))
87
+ return (transformed, "square_root")
88
+
89
+ def box_cox(self, **kwargs: float) -> tuple[Series, str]:
90
+ """Apply the Box-Cox transformation to the time series data. Only works
91
+ for all positive datasets!
92
+
93
+ Args:
94
+ lmbda (float): The transformation parameter. Defaults to 0.
95
+
96
+ Returns:
97
+ pandas.Series: The Box-Cox transformed time series data.
98
+ """
99
+
100
+ lmbda = kwargs.get("lmbda", None)
101
+
102
+ if (self.data <= 0).any():
103
+ message = (
104
+ "Box-Cox transformation requires all values to be strictly positive."
105
+ )
106
+ raise ValueError(message)
107
+
108
+ if not lmbda:
109
+ result = stats.boxcox(self.data, lmbda=lmbda)
110
+ transformed_series = Series(result, index=self.data.index)
111
+ else:
112
+ result = stats.boxcox(self.data, lmbda=lmbda)
113
+ transformed_series = Series(result[0], index=self.data.index)
114
+
115
+ return transformed_series, "box-cox"
116
+
117
+ def standard_scaler(self) -> tuple[Series, Any]:
118
+ """Normalize a pandas Series using StandardScaler."""
119
+ scaler = StandardScaler()
120
+ scaled_values = scaler.fit_transform(
121
+ self.data.to_numpy().reshape(-1, 1)
122
+ ).flatten()
123
+ scaled_series = Series(scaled_values, index=self.data.index)
124
+ return scaled_series, scaler
125
+
126
+ def minmax_scaler(self) -> tuple[Series, Any]:
127
+ """Normalize a pandas Series using MinMaxScaler."""
128
+ scaler = MinMaxScaler()
129
+ scaled_values = scaler.fit_transform(
130
+ self.data.to_numpy().reshape(-1, 1)
131
+ ).flatten()
132
+ scaled_series = Series(scaled_values, index=self.data.index)
133
+ return scaled_series, scaler
134
+
135
+ def robust_scaler(self) -> tuple[Series, Any]:
136
+ """Normalize a pandas Series using RobustScaler."""
137
+ scaler = RobustScaler()
138
+ scaled_values = scaler.fit_transform(
139
+ self.data.to_numpy().reshape(-1, 1)
140
+ ).flatten()
141
+ scaled_series = Series(scaled_values, index=self.data.index)
142
+ return scaled_series, scaler
143
+
144
+ def maxabs_scaler(self) -> tuple[Series, Any]:
145
+ """Normalize a pandas Series using MaxAbsScaler."""
146
+ scaler = MaxAbsScaler()
147
+ scaled_values = scaler.fit_transform(
148
+ self.data.to_numpy().reshape(-1, 1)
149
+ ).flatten()
150
+ scaled_series = Series(scaled_values, index=self.data.index)
151
+ return scaled_series, scaler
152
+
153
+
154
+ class OutlierDetection:
155
+ """Class for detecting outliers in time series data."""
156
+
157
+ def __init__(
158
+ self,
159
+ data: Series,
160
+ method: Literal["iqr", "zscore", "isolation_forest", "lof"],
161
+ **kwargs: Any,
162
+ ) -> None:
163
+ """Find outliers in a time series using the specified method."""
164
+ if method == "iqr":
165
+ self.outliers = self.iqr(data, **kwargs)
166
+ elif method == "zscore":
167
+ self.outliers = self.zscore(data, **kwargs)
168
+ elif method == "isolation_forest":
169
+ self.outliers = self.isolation_forest(data, **kwargs)
170
+ elif method == "lof":
171
+ self.outliers = self.lof(data, **kwargs)
172
+ else:
173
+ raise NotImplementedError()
174
+
175
+ def iqr(self, data: Series, **kwargs: float) -> Series:
176
+ """Use interquartile range (IQR).
177
+
178
+ Parameters:
179
+ data (pandas.Series): The time series data.
180
+
181
+ Keyword Args:
182
+ k (float): The multiplier for the IQR to define the range. Defaults to 1.5.
183
+
184
+ Returns:
185
+ pandas.Series: Outliers detected in the data.
186
+ """
187
+
188
+ k: float = kwargs.get("k", 1.5)
189
+
190
+ Q1 = data.quantile(0.25)
191
+ Q3 = data.quantile(0.75)
192
+ IQR = Q3 - Q1
193
+
194
+ lower_bound = Q1 - k * IQR
195
+ upper_bound = Q3 + k * IQR
196
+
197
+ outliers = data[(data < lower_bound) | (data > upper_bound)]
198
+
199
+ return outliers
200
+
201
+ def zscore(self, data: Series, **kwargs: float) -> Series:
202
+ """Detect outliers in a time series using the z-score method.
203
+
204
+ Args:
205
+ data (pandas.Series): The time series data.
206
+
207
+ Keyword Args:
208
+ threshold (float): The threshold for the z-score method. Defaults to 3.0.
209
+
210
+ Returns:
211
+ pandas.Series: Outliers detected in the data.
212
+ """
213
+
214
+ threshold = kwargs.get("threshold", 3.0)
215
+
216
+ mean = data.mean()
217
+ std_dev = data.std()
218
+
219
+ outliers: Series = data[(data - mean).abs() > threshold * std_dev]
220
+
221
+ return outliers
222
+
223
+ def isolation_forest(self, data: Series, **kwargs: Any) -> Series:
224
+ """Detect outliers in a time series using the isolation forest method.
225
+
226
+ Args:
227
+ data (pandas.Series): The time series data.
228
+
229
+ Keyword Args:
230
+ n_estimators (int): The number of base estimators in the ensemble. Defaults to 100.
231
+ max_samples (int | 'auto' | float): The number of samples to draw from X to train each base estimator. Defaults to 'auto'.
232
+ contamination (float): The proportion of outliers in the data. Defaults to 0.01.
233
+ max_features (int | float): The number of features to draw from X to train each base estimator. Defaults to 1.0.
234
+ bootstrap (bool): Whether to use bootstrapping when sampling the data. Defaults to False.
235
+ n_jobs (int): The number of jobs to run in parallel. Defaults to 1.
236
+ random_state (int | RandomState | None): The random state to use. Defaults to None.
237
+ verbose (int): The verbosity level. Defaults to 0.
238
+ warm_start (bool): Whether to reuse the solution of the previous call to fit and add more estimators to the ensemble. Defaults to False.
239
+
240
+ Note:
241
+ For details on kwargs see: sklearn.ensemble.IsolationForest.
242
+ """
243
+
244
+ X = data.to_numpy().reshape(-1, 1)
245
+
246
+ clf = IsolationForest(**kwargs)
247
+ clf.fit(X)
248
+
249
+ is_outlier = clf.predict(X)
250
+ outliers: Series = data[is_outlier == -1]
251
+
252
+ return outliers
253
+
254
+ def lof(self, data: Series, **kwargs: Any) -> Series:
255
+ """Detect outliers in a time series using the local outlier factor (LOF) method.
256
+
257
+ Args:
258
+ data (pandas.Series): The time series data.
259
+
260
+ Keyword Args:
261
+ n_neighbors (int): The number of neighbors to consider for each sample. Defaults to 20.
262
+ algorithm (str): The algorithm to use. Either 'auto', 'ball_tree', 'kd_tree' or 'brute'. Defaults to 'auto'.
263
+ leaf_size (int): The leaf size of the tree. Defaults to 30.
264
+ metric (str): The distance metric to use. Defaults to 'minkowski'.
265
+ p (int): The power parameter for the Minkowski metric. Defaults to 2.
266
+ contamination (float): The proportion of outliers in the data. Defaults to 0.01.
267
+ novelty (bool): Whether to consider the samples as normal or outliers. Defaults to False.
268
+ n_jobs (int): The number of jobs to run in parallel. Defaults to 1.
269
+ Note:
270
+ For details on kwargs see: sklearn.neighbors.LocalOutlierFactor.
271
+ """
272
+
273
+ X = data.to_numpy().reshape(-1, 1)
274
+
275
+ clf = LocalOutlierFactor(**kwargs)
276
+
277
+ is_outlier = clf.fit_predict(X)
278
+ outliers: Series = data[is_outlier == -1]
279
+
280
+ return outliers
gensor/smoothing.py ADDED
@@ -0,0 +1,66 @@
1
+ """Tools for smoothing the data."""
2
+
3
+ from matplotlib import pyplot as plt
4
+ from pandas import Series
5
+ from sklearn.metrics import mean_squared_error
6
+
7
+ from .dtypes import Timeseries
8
+
9
+
10
+ def smooth_data(
11
+ data: Timeseries,
12
+ window: int = 5,
13
+ method: str = "rolling_mean",
14
+ print_statistics: bool = False,
15
+ inplace: bool = False,
16
+ plot: bool = False,
17
+ ) -> Series | None:
18
+ """Smooth a time series using a rolling mean or median.
19
+
20
+ Args:
21
+ data (pandas.Series): The time series data.
22
+ window (int): The size of the window for the rolling mean or median. Defaults to 5.
23
+ method (str): The method to use for smoothing. Either 'rolling_mean' or 'rolling_median'. Defaults to 'rolling_mean'.
24
+
25
+ Returns:
26
+ pandas.Series: The smoothed time series.
27
+ """
28
+ if method == "rolling_mean":
29
+ smoothed_data = data.ts.rolling(window=window, center=True).mean()
30
+ elif method == "rolling_median":
31
+ smoothed_data = data.ts.rolling(window=window, center=True).median()
32
+ else:
33
+ raise NotImplementedError()
34
+
35
+ valid_indices = smoothed_data.notna()
36
+ original_data_aligned = data.ts[valid_indices]
37
+ smoothed_data_aligned = smoothed_data[valid_indices]
38
+
39
+ if print_statistics:
40
+ mse = mean_squared_error(original_data_aligned, smoothed_data_aligned)
41
+ print(f"Mean Squared Error of {method}: {mse:.2f}")
42
+
43
+ if plot:
44
+ plt.figure(figsize=(12, 6))
45
+ plt.plot(
46
+ data.timeseries.index, data.timeseries, label="Original Data", color="black"
47
+ )
48
+ plt.plot(
49
+ smoothed_data.index,
50
+ smoothed_data,
51
+ label=f"Moving Average ({method})",
52
+ color="green",
53
+ linestyle="dotted",
54
+ )
55
+
56
+ plt.legend()
57
+ plt.title("Groundwater Level with Moving Average")
58
+ plt.xlabel("Date")
59
+ plt.ylabel("Groundwater Level")
60
+ plt.show()
61
+
62
+ if inplace:
63
+ data.ts = smoothed_data
64
+ return None
65
+ else:
66
+ return smoothed_data
gensor/trend.py ADDED
@@ -0,0 +1,31 @@
1
+ """Analyse trends in the logger data."""
2
+
3
+ import numpy as np
4
+ from matplotlib import pyplot as plt
5
+
6
+ from .dtypes import Timeseries
7
+
8
+
9
+ def trend_analysis(ts: Timeseries, plot=True) -> None:
10
+ time_numeric = np.arange(len(ts.timeseries))
11
+
12
+ # Perform linear regression using numpy's polyfit
13
+ # This returns the slope and intercept of the best fit line
14
+ slope, intercept = np.polyfit(time_numeric, ts.timeseries, 1)
15
+
16
+ # Print the slope and intercept
17
+ print(f"Slope: {slope}, Intercept: {intercept}")
18
+
19
+ if plot:
20
+ # Compute the values of the trend line
21
+ trend_line = intercept + slope * time_numeric
22
+
23
+ # Plotting the original series and the trend line
24
+ plt.figure(figsize=(10, 5))
25
+ plt.plot(ts.timeseries.index, ts.timeseries, label="Original Data")
26
+ plt.plot(ts.timeseries.index, trend_line, color="red", label="Trend Line")
27
+ plt.xlabel("Time")
28
+ plt.ylabel("Groundwater Level")
29
+ plt.title("Groundwater Level Trend Analysis")
30
+ plt.legend()
31
+ plt.show()
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024, Mateusz Zawadzki
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,75 @@
1
+ Metadata-Version: 2.1
2
+ Name: gensor
3
+ Version: 0.0.1
4
+ Summary: Library for handling groundwater sensor data.
5
+ Home-page: https://github.com/zawadzkim/gensor
6
+ Author: Mateusz Zawadzki
7
+ Author-email: fzawadzkimat@outlook.com
8
+ Requires-Python: >=3.11
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.11
11
+ Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: chardet (>=5.2.0,<6.0.0)
13
+ Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
14
+ Requires-Dist: numpy (>=2.1.0,<3.0.0)
15
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
16
+ Requires-Dist: pandera (>=0.20.3,<0.21.0)
17
+ Requires-Dist: pydantic (>=2.8.2,<3.0.0)
18
+ Requires-Dist: pytz (>=2024.1,<2025.0)
19
+ Requires-Dist: scikit-learn (>=1.5.1,<2.0.0)
20
+ Requires-Dist: scipy (>=1.14.1,<2.0.0)
21
+ Requires-Dist: sqlalchemy (>=2.0.32,<3.0.0)
22
+ Project-URL: Documentation, https://zawadzkim.github.io/gensor/
23
+ Project-URL: Repository, https://github.com/zawadzkim/gensor
24
+ Description-Content-Type: text/markdown
25
+
26
+ # gensor
27
+
28
+ [![Release](https://img.shields.io/github/v/release/zawadzkim/gensor)](https://img.shields.io/github/v/release/zawadzkim/gensor)
29
+ [![Build status](https://img.shields.io/github/actions/workflow/status/zawadzkim/gensor/main.yml?branch=main)](https://github.com/zawadzkim/gensor/actions/workflows/main.yml?query=branch%3Amain)
30
+ [![codecov](https://codecov.io/gh/zawadzkim/gensor/branch/main/graph/badge.svg)](https://codecov.io/gh/zawadzkim/gensor)
31
+ [![Commit activity](https://img.shields.io/github/commit-activity/m/zawadzkim/gensor)](https://img.shields.io/github/commit-activity/m/zawadzkim/gensor)
32
+ [![License](https://img.shields.io/github/license/zawadzkim/gensor)](https://img.shields.io/github/license/zawadzkim/gensor)
33
+
34
+ Library for handling groundwater sensor data.
35
+
36
+ - **Github repository**: <https://github.com/zawadzkim/gensor/>
37
+ - **Documentation** <https://zawadzkim.github.io/gensor/>
38
+
39
+ ## Getting started with your project
40
+
41
+ First, create a repository on GitHub with the same name as this project, and then run the following commands:
42
+
43
+ ```bash
44
+ git init -b main
45
+ git add .
46
+ git commit -m "init commit"
47
+ git remote add origin git@github.com:zawadzkim/gensor.git
48
+ git push -u origin main
49
+ ```
50
+
51
+ Finally, install the environment and the pre-commit hooks with
52
+
53
+ ```bash
54
+ make install
55
+ ```
56
+
57
+ You are now ready to start development on your project!
58
+ The CI/CD pipeline will be triggered when you open a pull request, merge to main, or when you create a new release.
59
+
60
+ To finalize the set-up for publishing to PyPi or Artifactory, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/publishing/#set-up-for-pypi).
61
+ For activating the automatic documentation with MkDocs, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/mkdocs/#enabling-the-documentation-on-github).
62
+ To enable the code coverage reports, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/codecov/).
63
+
64
+ ## Releasing a new version
65
+
66
+ - Create an API Token on [Pypi](https://pypi.org/).
67
+ - Add the API Token to your projects secrets with the name `PYPI_TOKEN` by visiting [this page](https://github.com/zawadzkim/gensor/settings/secrets/actions/new).
68
+ - Create a [new release](https://github.com/zawadzkim/gensor/releases/new) on Github.
69
+ - Create a new tag in the form `*.*.*`.
70
+ - For more details, see [here](https://fpgmaas.github.io/cookiecutter-poetry/features/cicd/#how-to-trigger-a-release).
71
+
72
+ ---
73
+
74
+ Repository initiated with [fpgmaas/cookiecutter-poetry](https://github.com/fpgmaas/cookiecutter-poetry).
75
+