gensor 0.1.2__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. {gensor-0.1.2 → gensor-0.1.3}/PKG-INFO +1 -1
  2. gensor-0.1.2/gensor/core/timeseries.py → gensor-0.1.3/gensor/core/base.py +27 -38
  3. gensor-0.1.3/gensor/core/timeseries.py +169 -0
  4. {gensor-0.1.2 → gensor-0.1.3}/pyproject.toml +1 -1
  5. {gensor-0.1.2 → gensor-0.1.3}/LICENSE +0 -0
  6. {gensor-0.1.2 → gensor-0.1.3}/README.md +0 -0
  7. {gensor-0.1.2 → gensor-0.1.3}/gensor/__init__.py +0 -0
  8. {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/__init__.py +0 -0
  9. {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/outliers.py +0 -0
  10. {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/stats.py +0 -0
  11. {gensor-0.1.2 → gensor-0.1.3}/gensor/config.py +0 -0
  12. {gensor-0.1.2 → gensor-0.1.3}/gensor/core/__init__.py +0 -0
  13. {gensor-0.1.2 → gensor-0.1.3}/gensor/core/dataset.py +0 -0
  14. {gensor-0.1.2 → gensor-0.1.3}/gensor/core/indexer.py +0 -0
  15. {gensor-0.1.2 → gensor-0.1.3}/gensor/db/__init__.py +0 -0
  16. {gensor-0.1.2 → gensor-0.1.3}/gensor/db/connection.py +0 -0
  17. {gensor-0.1.2 → gensor-0.1.3}/gensor/exceptions.py +0 -0
  18. {gensor-0.1.2 → gensor-0.1.3}/gensor/io/__init__.py +0 -0
  19. {gensor-0.1.2 → gensor-0.1.3}/gensor/io/read.py +0 -0
  20. {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/__init__.py +0 -0
  21. {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/plain.py +0 -0
  22. {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/utils.py +0 -0
  23. {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/vanessen.py +0 -0
  24. {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/__init__.py +0 -0
  25. {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/compensation.py +0 -0
  26. {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/smoothing.py +0 -0
  27. {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/transform.py +0 -0
  28. {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/Barodiver_220427183008_BY222.csv +0 -0
  29. {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/PB01A_moni_AV319_220427183019_AV319.csv +0 -0
  30. {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/PB02A_plain.csv +0 -0
  31. {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/__init__.py +0 -0
  32. {gensor-0.1.2 → gensor-0.1.3}/py.typed +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gensor
3
- Version: 0.1.2
3
+ Version: 0.1.3
4
4
  Summary: Library for handling groundwater sensor data.
5
5
  Home-page: https://github.com/zawadzkim/gensor
6
6
  Author: Mateusz Zawadzki
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import Any, Literal
3
+ from typing import Any, Literal, TypeVar
4
4
 
5
5
  import pandas as pd
6
6
  import pandera as pa
@@ -15,6 +15,8 @@ from gensor.db import DatabaseConnection
15
15
  from gensor.exceptions import TimeseriesUnequal
16
16
  from gensor.processing.transform import Transformation
17
17
 
18
+ T = TypeVar("T", bound="BaseTimeseries")
19
+
18
20
  ts_schema = pa.SeriesSchema(
19
21
  float,
20
22
  index=pa.Index(pd.DatetimeTZDtype(tz="UTC"), coerce=False),
@@ -22,18 +24,11 @@ ts_schema = pa.SeriesSchema(
22
24
  )
23
25
 
24
26
 
25
- class Timeseries(pyd.BaseModel):
26
- """Timeseries from a sensor including measurement metadata.
27
-
28
- This is class for any sensor timeseries. The basic required attributes are
29
- just the ts, variable and unit. SensorInfo object is created from the
30
- relevant kwargs if they are passed.
27
+ class BaseTimeseries(pyd.BaseModel):
28
+ """Generic base class for timeseries with metadata.
31
29
 
32
- Timeseries represents a series of measurements of a single variable, from a
33
- single sensor with unique timestamps.
34
-
35
- TODO: Perhaps it would be cool to implement kind of a tracking of which
36
- analyses were performed on the timeseries?
30
+ Timeseries is a series of measurements of a single variable, in the same unit, from a
31
+ single location with unique timestamps.
37
32
 
38
33
  Attributes:
39
34
  ts (pd.Series): The timeseries data.
@@ -41,9 +36,8 @@ class Timeseries(pyd.BaseModel):
41
36
  The type of the measurement.
42
37
  unit (Literal['degC', 'mmH2O', 'mS/cm', 'm/s']): The unit of
43
38
  the measurement.
44
- sensor (SensorInfo): The serial number of the sensor.
45
- analysis (Analysis): An object containing details of analysis done
46
- on the timeseries.
39
+ outliers (pd.Series): Measurements marked as outliers.
40
+ transformation (Any): Metadata of transformation the timeseries undergone.
47
41
 
48
42
  Methods:
49
43
  validate_ts: if the pd.Series is not exactly what is required, coerce.
@@ -59,8 +53,6 @@ class Timeseries(pyd.BaseModel):
59
53
  ]
60
54
  unit: Literal["degc", "cmh2o", "ms/cm", "m/s", "m asl", "m"]
61
55
  location: str | None = None
62
- sensor: str | None = None
63
- sensor_alt: float | None = None
64
56
  outliers: pd.Series | None = pyd.Field(default=None, repr=False)
65
57
  transformation: Any = pyd.Field(default=None, repr=False)
66
58
 
@@ -76,15 +68,13 @@ class Timeseries(pyd.BaseModel):
76
68
 
77
69
  def __eq__(self, other: object) -> bool:
78
70
  """Check equality based on location, sensor, variable, unit and sensor_alt."""
79
- if not isinstance(other, Timeseries):
71
+ if not isinstance(other, BaseTimeseries):
80
72
  return NotImplemented
81
73
 
82
74
  return (
83
75
  self.variable == other.variable
84
76
  and self.unit == other.unit
85
77
  and self.location == other.location
86
- and self.sensor == other.sensor
87
- and self.sensor_alt == other.sensor_alt
88
78
  )
89
79
 
90
80
  def __getattr__(self, attr: Any) -> Any:
@@ -126,9 +116,9 @@ class Timeseries(pyd.BaseModel):
126
116
  return ts_schema.validate(v)
127
117
  return v
128
118
 
129
- def concatenate(self, other: Timeseries) -> Timeseries:
119
+ def concatenate(self: T, other: T) -> T:
130
120
  """Concatenate two Timeseries objects if they are considered equal."""
131
- if not isinstance(other, Timeseries):
121
+ if not isinstance(other, type(self)):
132
122
  return NotImplemented
133
123
 
134
124
  if self == other:
@@ -140,11 +130,11 @@ class Timeseries(pyd.BaseModel):
140
130
  raise TimeseriesUnequal()
141
131
 
142
132
  def resample(
143
- self,
133
+ self: T,
144
134
  freq: Any,
145
135
  agg_func: Any = pd.Series.mean,
146
136
  **resample_kwargs: Any,
147
- ) -> Timeseries:
137
+ ) -> T:
148
138
  """Resample the timeseries to a new frequency with a specified
149
139
  aggregation function.
150
140
 
@@ -165,7 +155,7 @@ class Timeseries(pyd.BaseModel):
165
155
  return self.model_copy(update={"ts": resampled_ts}, deep=True)
166
156
 
167
157
  def transform(
168
- self,
158
+ self: T,
169
159
  method: Literal[
170
160
  "difference",
171
161
  "log",
@@ -177,7 +167,7 @@ class Timeseries(pyd.BaseModel):
177
167
  "maxabs_scaler",
178
168
  ],
179
169
  **transformer_kwargs: Any,
180
- ) -> Timeseries:
170
+ ) -> T:
181
171
  """Transforms the timeseries using the specified method.
182
172
 
183
173
  Parameters:
@@ -200,13 +190,13 @@ class Timeseries(pyd.BaseModel):
200
190
  )
201
191
 
202
192
  def detect_outliers(
203
- self,
193
+ self: T,
204
194
  method: Literal["iqr", "zscore", "isolation_forest", "lof"],
205
195
  rolling: bool = False,
206
196
  window: int = 6,
207
197
  remove: bool = True,
208
198
  **kwargs: Any,
209
- ) -> Timeseries:
199
+ ) -> T:
210
200
  """Detects outliers in the timeseries using the specified method.
211
201
 
212
202
  Parameters:
@@ -230,8 +220,8 @@ class Timeseries(pyd.BaseModel):
230
220
  return self
231
221
 
232
222
  def mask_with(
233
- self, other: Timeseries | pd.Series, mode: Literal["keep", "remove"] = "remove"
234
- ) -> Timeseries:
223
+ self: T, other: T | pd.Series, mode: Literal["keep", "remove"] = "remove"
224
+ ) -> T:
235
225
  """
236
226
  Removes records not present in 'other' by index.
237
227
 
@@ -246,7 +236,7 @@ class Timeseries(pyd.BaseModel):
246
236
  """
247
237
  if isinstance(other, pd.Series):
248
238
  mask = other
249
- elif isinstance(other, Timeseries):
239
+ elif isinstance(other, BaseTimeseries):
250
240
  mask = other.ts
251
241
 
252
242
  if mode == "keep":
@@ -259,7 +249,7 @@ class Timeseries(pyd.BaseModel):
259
249
 
260
250
  return self.model_copy(update={"ts": masked_data}, deep=True)
261
251
 
262
- def to_sql(self, db: DatabaseConnection) -> str:
252
+ def to_sql(self: T, db: DatabaseConnection) -> str:
263
253
  """Converts the timeseries to a list of dictionaries and uploads it to the database.
264
254
 
265
255
  The Timeseries data is uploaded to the SQL database by using the pandas
@@ -276,7 +266,9 @@ class Timeseries(pyd.BaseModel):
276
266
  timestamp_start_fmt = self.start.strftime("%Y%m%d%H%M%S")
277
267
 
278
268
  # Construct the schema name using the location, sensor, variable, unit, and timestamp
279
- schema_name = f"{self.location}_{self.sensor}_{self.variable}_{self.unit}_{timestamp_start_fmt}".lower()
269
+ schema_name = (
270
+ f"{self.location}_{self.variable}_{self.unit}_{timestamp_start_fmt}".lower()
271
+ )
280
272
 
281
273
  # Ensure the index is a pandas DatetimeIndex
282
274
  if isinstance(self.ts.index, pd.DatetimeIndex):
@@ -311,11 +303,8 @@ class Timeseries(pyd.BaseModel):
311
303
  metadata_stmt = sqlite_insert(metadata_schema).values(
312
304
  table_name=schema_name,
313
305
  location=self.location,
314
- sensor=self.sensor,
315
306
  variable=self.variable,
316
307
  unit=self.unit,
317
- logger_alt=self.sensor_alt,
318
- location_alt=self.sensor_alt,
319
308
  timestamp_start=timestamp_start_fmt,
320
309
  timestamp_end=self.end.strftime("%Y%m%d%H%M%S"),
321
310
  )
@@ -334,7 +323,7 @@ class Timeseries(pyd.BaseModel):
334
323
  return f"{schema_name} table and metadata updated."
335
324
 
336
325
  def plot(
337
- self, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
326
+ self: T, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
338
327
  ) -> tuple:
339
328
  """Plots the timeseries data.
340
329
 
@@ -356,7 +345,7 @@ class Timeseries(pyd.BaseModel):
356
345
  ax.plot(
357
346
  self.ts.index,
358
347
  self.ts,
359
- label=f"{self.location} ({self.sensor})",
348
+ label=f"{self.location}",
360
349
  **plot_kwargs,
361
350
  )
362
351
 
@@ -0,0 +1,169 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ import pandas as pd
6
+ import pandera as pa
7
+ import pydantic as pyd
8
+ from matplotlib import pyplot as plt
9
+ from sqlalchemy import Table
10
+ from sqlalchemy.dialects.sqlite import insert as sqlite_insert
11
+
12
+ from gensor.core.base import BaseTimeseries
13
+ from gensor.db import DatabaseConnection
14
+
15
+ ts_schema = pa.SeriesSchema(
16
+ float,
17
+ index=pa.Index(pd.DatetimeTZDtype(tz="UTC"), coerce=False),
18
+ coerce=True,
19
+ )
20
+
21
+
22
+ class Timeseries(BaseTimeseries):
23
+ """Timeseries for groundwater sensor data
24
+
25
+ Attributes:
26
+ ts (pd.Series): The timeseries data.
27
+ variable (Literal['temperature', 'pressure', 'conductivity', 'flux']):
28
+ The type of the measurement.
29
+ unit (Literal['degC', 'mmH2O', 'mS/cm', 'm/s']): The unit of
30
+ the measurement.
31
+ sensor (SensorInfo): The serial number of the sensor.
32
+
33
+ Methods:
34
+ validate_ts: if the pd.Series is not exactly what is required, coerce.
35
+ """
36
+
37
+ model_config = pyd.ConfigDict(
38
+ arbitrary_types_allowed=True, validate_assignment=True
39
+ )
40
+
41
+ sensor: str | None = None
42
+ sensor_alt: float | None = None
43
+
44
+ def __eq__(self, other: object) -> bool:
45
+ """Check equality based on location, sensor, variable, unit and sensor_alt."""
46
+ if not isinstance(other, Timeseries):
47
+ return NotImplemented
48
+
49
+ return (
50
+ self.variable == other.variable
51
+ and self.unit == other.unit
52
+ and self.location == other.location
53
+ and self.sensor == other.sensor
54
+ and self.sensor_alt == other.sensor_alt
55
+ )
56
+
57
+ def to_sql(self, db: DatabaseConnection) -> str:
58
+ """Converts the timeseries to a list of dictionaries and uploads it to the database.
59
+
60
+ The Timeseries data is uploaded to the SQL database by using the pandas
61
+ `to_sql` method. Additionally, metadata about the timeseries is stored in the
62
+ 'timeseries_metadata' table.
63
+
64
+ Args:
65
+ db (DatabaseConnection): The database connection object.
66
+
67
+ Returns:
68
+ str: A message indicating the number of rows inserted into the database.
69
+ """
70
+ # Format the start timestamp as 'YYYYMMDDHHMMSS'
71
+ timestamp_start_fmt = self.start.strftime("%Y%m%d%H%M%S")
72
+
73
+ # Construct the schema name using the location, sensor, variable, unit, and timestamp
74
+ schema_name = f"{self.location}_{self.sensor}_{self.variable}_{self.unit}_{timestamp_start_fmt}".lower()
75
+
76
+ # Ensure the index is a pandas DatetimeIndex
77
+ if isinstance(self.ts.index, pd.DatetimeIndex):
78
+ utc_index = (
79
+ self.ts.index.tz_convert("UTC")
80
+ if self.ts.index.tz is not None
81
+ else self.ts.index
82
+ )
83
+ else:
84
+ message = "The index is not a DatetimeIndex and cannot be converted to UTC."
85
+ raise TypeError(message)
86
+
87
+ # Prepare the timeseries data as records for insertion
88
+ series_as_records = list(
89
+ zip(utc_index.strftime("%Y-%m-%dT%H:%M:%S%z"), self.ts, strict=False)
90
+ )
91
+
92
+ with db as con:
93
+ # Create the timeseries table if it doesn't exist
94
+ schema = db.create_table(schema_name, self.variable)
95
+
96
+ # Ensure that the timeseries_metadata table exists
97
+ metadata_schema = db.metadata.tables["__timeseries_metadata__"]
98
+
99
+ if isinstance(schema, Table):
100
+ # Insert the timeseries data
101
+ stmt = sqlite_insert(schema).values(series_as_records)
102
+ stmt = stmt.on_conflict_do_nothing(index_elements=["timestamp"])
103
+ con.execute(stmt)
104
+ con.commit()
105
+
106
+ metadata_stmt = sqlite_insert(metadata_schema).values(
107
+ table_name=schema_name,
108
+ location=self.location,
109
+ sensor=self.sensor,
110
+ variable=self.variable,
111
+ unit=self.unit,
112
+ logger_alt=self.sensor_alt,
113
+ location_alt=self.sensor_alt,
114
+ timestamp_start=timestamp_start_fmt,
115
+ timestamp_end=self.end.strftime("%Y%m%d%H%M%S"),
116
+ )
117
+
118
+ metadata_stmt = metadata_stmt.on_conflict_do_update(
119
+ index_elements=["table_name"],
120
+ set_={
121
+ "timestamp_start": timestamp_start_fmt,
122
+ "timestamp_end": self.end.strftime("%Y%m%d%H%M%S"),
123
+ },
124
+ )
125
+
126
+ con.execute(metadata_stmt)
127
+ con.commit()
128
+
129
+ return f"{schema_name} table and metadata updated."
130
+
131
+ def plot(
132
+ self, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
133
+ ) -> tuple:
134
+ """Plots the timeseries data.
135
+
136
+ Args:
137
+ include_outliers (bool): Whether to include outliers in the plot.
138
+ ax (matplotlib.axes.Axes, optional): Matplotlib axes object to plot on.
139
+ If None, a new figure and axes are created.
140
+ **plot_kwargs: Additional keyword arguments passed to plt.plot.
141
+
142
+ Returns:
143
+ (fig, ax): Matplotlib figure and axes to allow further customization.
144
+ """
145
+ # Create new figure and axes if not provided
146
+ if ax is None:
147
+ fig, ax = plt.subplots(figsize=(10, 5))
148
+ else:
149
+ fig = ax.get_figure()
150
+
151
+ ax.plot(
152
+ self.ts.index,
153
+ self.ts,
154
+ label=f"{self.location} ({self.sensor})",
155
+ **plot_kwargs,
156
+ )
157
+
158
+ if include_outliers and self.outliers is not None:
159
+ ax.scatter(
160
+ self.outliers.index, self.outliers, color="red", label="Outliers"
161
+ )
162
+ plt.xticks(rotation=45)
163
+ ax.set_xlabel("Time")
164
+ ax.set_ylabel(f"{self.variable} ({self.unit})")
165
+ ax.set_title(f"{self.variable.capitalize()} at {self.location}")
166
+
167
+ ax.legend()
168
+
169
+ return fig, ax
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "gensor"
3
- version = "0.1.2"
3
+ version = "0.1.3"
4
4
  description = "Library for handling groundwater sensor data."
5
5
  authors = ["Mateusz Zawadzki <zawadzkimat@outlook.com>"]
6
6
  repository = "https://github.com/zawadzkim/gensor"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes