gensor 0.1.2__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gensor-0.1.2 → gensor-0.1.3}/PKG-INFO +1 -1
- gensor-0.1.2/gensor/core/timeseries.py → gensor-0.1.3/gensor/core/base.py +27 -38
- gensor-0.1.3/gensor/core/timeseries.py +169 -0
- {gensor-0.1.2 → gensor-0.1.3}/pyproject.toml +1 -1
- {gensor-0.1.2 → gensor-0.1.3}/LICENSE +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/README.md +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/outliers.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/analysis/stats.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/config.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/core/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/core/dataset.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/core/indexer.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/db/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/db/connection.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/exceptions.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/io/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/io/read.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/plain.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/utils.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/parse/vanessen.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/compensation.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/smoothing.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/processing/transform.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/Barodiver_220427183008_BY222.csv +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/PB01A_moni_AV319_220427183019_AV319.csv +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/PB02A_plain.csv +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/gensor/testdata/__init__.py +0 -0
- {gensor-0.1.2 → gensor-0.1.3}/py.typed +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Any, Literal
|
|
3
|
+
from typing import Any, Literal, TypeVar
|
|
4
4
|
|
|
5
5
|
import pandas as pd
|
|
6
6
|
import pandera as pa
|
|
@@ -15,6 +15,8 @@ from gensor.db import DatabaseConnection
|
|
|
15
15
|
from gensor.exceptions import TimeseriesUnequal
|
|
16
16
|
from gensor.processing.transform import Transformation
|
|
17
17
|
|
|
18
|
+
T = TypeVar("T", bound="BaseTimeseries")
|
|
19
|
+
|
|
18
20
|
ts_schema = pa.SeriesSchema(
|
|
19
21
|
float,
|
|
20
22
|
index=pa.Index(pd.DatetimeTZDtype(tz="UTC"), coerce=False),
|
|
@@ -22,18 +24,11 @@ ts_schema = pa.SeriesSchema(
|
|
|
22
24
|
)
|
|
23
25
|
|
|
24
26
|
|
|
25
|
-
class
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
This is class for any sensor timeseries. The basic required attributes are
|
|
29
|
-
just the ts, variable and unit. SensorInfo object is created from the
|
|
30
|
-
relevant kwargs if they are passed.
|
|
27
|
+
class BaseTimeseries(pyd.BaseModel):
|
|
28
|
+
"""Generic base class for timeseries with metadata.
|
|
31
29
|
|
|
32
|
-
Timeseries
|
|
33
|
-
single
|
|
34
|
-
|
|
35
|
-
TODO: Perhaps it would be cool to implement kind of a tracking of which
|
|
36
|
-
analyses were performed on the timeseries?
|
|
30
|
+
Timeseries is a series of measurements of a single variable, in the same unit, from a
|
|
31
|
+
single location with unique timestamps.
|
|
37
32
|
|
|
38
33
|
Attributes:
|
|
39
34
|
ts (pd.Series): The timeseries data.
|
|
@@ -41,9 +36,8 @@ class Timeseries(pyd.BaseModel):
|
|
|
41
36
|
The type of the measurement.
|
|
42
37
|
unit (Literal['degC', 'mmH2O', 'mS/cm', 'm/s']): The unit of
|
|
43
38
|
the measurement.
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
on the timeseries.
|
|
39
|
+
outliers (pd.Series): Measurements marked as outliers.
|
|
40
|
+
transformation (Any): Metadata of transformation the timeseries undergone.
|
|
47
41
|
|
|
48
42
|
Methods:
|
|
49
43
|
validate_ts: if the pd.Series is not exactly what is required, coerce.
|
|
@@ -59,8 +53,6 @@ class Timeseries(pyd.BaseModel):
|
|
|
59
53
|
]
|
|
60
54
|
unit: Literal["degc", "cmh2o", "ms/cm", "m/s", "m asl", "m"]
|
|
61
55
|
location: str | None = None
|
|
62
|
-
sensor: str | None = None
|
|
63
|
-
sensor_alt: float | None = None
|
|
64
56
|
outliers: pd.Series | None = pyd.Field(default=None, repr=False)
|
|
65
57
|
transformation: Any = pyd.Field(default=None, repr=False)
|
|
66
58
|
|
|
@@ -76,15 +68,13 @@ class Timeseries(pyd.BaseModel):
|
|
|
76
68
|
|
|
77
69
|
def __eq__(self, other: object) -> bool:
|
|
78
70
|
"""Check equality based on location, sensor, variable, unit and sensor_alt."""
|
|
79
|
-
if not isinstance(other,
|
|
71
|
+
if not isinstance(other, BaseTimeseries):
|
|
80
72
|
return NotImplemented
|
|
81
73
|
|
|
82
74
|
return (
|
|
83
75
|
self.variable == other.variable
|
|
84
76
|
and self.unit == other.unit
|
|
85
77
|
and self.location == other.location
|
|
86
|
-
and self.sensor == other.sensor
|
|
87
|
-
and self.sensor_alt == other.sensor_alt
|
|
88
78
|
)
|
|
89
79
|
|
|
90
80
|
def __getattr__(self, attr: Any) -> Any:
|
|
@@ -126,9 +116,9 @@ class Timeseries(pyd.BaseModel):
|
|
|
126
116
|
return ts_schema.validate(v)
|
|
127
117
|
return v
|
|
128
118
|
|
|
129
|
-
def concatenate(self, other:
|
|
119
|
+
def concatenate(self: T, other: T) -> T:
|
|
130
120
|
"""Concatenate two Timeseries objects if they are considered equal."""
|
|
131
|
-
if not isinstance(other,
|
|
121
|
+
if not isinstance(other, type(self)):
|
|
132
122
|
return NotImplemented
|
|
133
123
|
|
|
134
124
|
if self == other:
|
|
@@ -140,11 +130,11 @@ class Timeseries(pyd.BaseModel):
|
|
|
140
130
|
raise TimeseriesUnequal()
|
|
141
131
|
|
|
142
132
|
def resample(
|
|
143
|
-
self,
|
|
133
|
+
self: T,
|
|
144
134
|
freq: Any,
|
|
145
135
|
agg_func: Any = pd.Series.mean,
|
|
146
136
|
**resample_kwargs: Any,
|
|
147
|
-
) ->
|
|
137
|
+
) -> T:
|
|
148
138
|
"""Resample the timeseries to a new frequency with a specified
|
|
149
139
|
aggregation function.
|
|
150
140
|
|
|
@@ -165,7 +155,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
165
155
|
return self.model_copy(update={"ts": resampled_ts}, deep=True)
|
|
166
156
|
|
|
167
157
|
def transform(
|
|
168
|
-
self,
|
|
158
|
+
self: T,
|
|
169
159
|
method: Literal[
|
|
170
160
|
"difference",
|
|
171
161
|
"log",
|
|
@@ -177,7 +167,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
177
167
|
"maxabs_scaler",
|
|
178
168
|
],
|
|
179
169
|
**transformer_kwargs: Any,
|
|
180
|
-
) ->
|
|
170
|
+
) -> T:
|
|
181
171
|
"""Transforms the timeseries using the specified method.
|
|
182
172
|
|
|
183
173
|
Parameters:
|
|
@@ -200,13 +190,13 @@ class Timeseries(pyd.BaseModel):
|
|
|
200
190
|
)
|
|
201
191
|
|
|
202
192
|
def detect_outliers(
|
|
203
|
-
self,
|
|
193
|
+
self: T,
|
|
204
194
|
method: Literal["iqr", "zscore", "isolation_forest", "lof"],
|
|
205
195
|
rolling: bool = False,
|
|
206
196
|
window: int = 6,
|
|
207
197
|
remove: bool = True,
|
|
208
198
|
**kwargs: Any,
|
|
209
|
-
) ->
|
|
199
|
+
) -> T:
|
|
210
200
|
"""Detects outliers in the timeseries using the specified method.
|
|
211
201
|
|
|
212
202
|
Parameters:
|
|
@@ -230,8 +220,8 @@ class Timeseries(pyd.BaseModel):
|
|
|
230
220
|
return self
|
|
231
221
|
|
|
232
222
|
def mask_with(
|
|
233
|
-
self, other:
|
|
234
|
-
) ->
|
|
223
|
+
self: T, other: T | pd.Series, mode: Literal["keep", "remove"] = "remove"
|
|
224
|
+
) -> T:
|
|
235
225
|
"""
|
|
236
226
|
Removes records not present in 'other' by index.
|
|
237
227
|
|
|
@@ -246,7 +236,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
246
236
|
"""
|
|
247
237
|
if isinstance(other, pd.Series):
|
|
248
238
|
mask = other
|
|
249
|
-
elif isinstance(other,
|
|
239
|
+
elif isinstance(other, BaseTimeseries):
|
|
250
240
|
mask = other.ts
|
|
251
241
|
|
|
252
242
|
if mode == "keep":
|
|
@@ -259,7 +249,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
259
249
|
|
|
260
250
|
return self.model_copy(update={"ts": masked_data}, deep=True)
|
|
261
251
|
|
|
262
|
-
def to_sql(self, db: DatabaseConnection) -> str:
|
|
252
|
+
def to_sql(self: T, db: DatabaseConnection) -> str:
|
|
263
253
|
"""Converts the timeseries to a list of dictionaries and uploads it to the database.
|
|
264
254
|
|
|
265
255
|
The Timeseries data is uploaded to the SQL database by using the pandas
|
|
@@ -276,7 +266,9 @@ class Timeseries(pyd.BaseModel):
|
|
|
276
266
|
timestamp_start_fmt = self.start.strftime("%Y%m%d%H%M%S")
|
|
277
267
|
|
|
278
268
|
# Construct the schema name using the location, sensor, variable, unit, and timestamp
|
|
279
|
-
schema_name =
|
|
269
|
+
schema_name = (
|
|
270
|
+
f"{self.location}_{self.variable}_{self.unit}_{timestamp_start_fmt}".lower()
|
|
271
|
+
)
|
|
280
272
|
|
|
281
273
|
# Ensure the index is a pandas DatetimeIndex
|
|
282
274
|
if isinstance(self.ts.index, pd.DatetimeIndex):
|
|
@@ -311,11 +303,8 @@ class Timeseries(pyd.BaseModel):
|
|
|
311
303
|
metadata_stmt = sqlite_insert(metadata_schema).values(
|
|
312
304
|
table_name=schema_name,
|
|
313
305
|
location=self.location,
|
|
314
|
-
sensor=self.sensor,
|
|
315
306
|
variable=self.variable,
|
|
316
307
|
unit=self.unit,
|
|
317
|
-
logger_alt=self.sensor_alt,
|
|
318
|
-
location_alt=self.sensor_alt,
|
|
319
308
|
timestamp_start=timestamp_start_fmt,
|
|
320
309
|
timestamp_end=self.end.strftime("%Y%m%d%H%M%S"),
|
|
321
310
|
)
|
|
@@ -334,7 +323,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
334
323
|
return f"{schema_name} table and metadata updated."
|
|
335
324
|
|
|
336
325
|
def plot(
|
|
337
|
-
self, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
|
|
326
|
+
self: T, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
|
|
338
327
|
) -> tuple:
|
|
339
328
|
"""Plots the timeseries data.
|
|
340
329
|
|
|
@@ -356,7 +345,7 @@ class Timeseries(pyd.BaseModel):
|
|
|
356
345
|
ax.plot(
|
|
357
346
|
self.ts.index,
|
|
358
347
|
self.ts,
|
|
359
|
-
label=f"{self.location}
|
|
348
|
+
label=f"{self.location}",
|
|
360
349
|
**plot_kwargs,
|
|
361
350
|
)
|
|
362
351
|
|
|
@@ -0,0 +1,169 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
import pandera as pa
|
|
7
|
+
import pydantic as pyd
|
|
8
|
+
from matplotlib import pyplot as plt
|
|
9
|
+
from sqlalchemy import Table
|
|
10
|
+
from sqlalchemy.dialects.sqlite import insert as sqlite_insert
|
|
11
|
+
|
|
12
|
+
from gensor.core.base import BaseTimeseries
|
|
13
|
+
from gensor.db import DatabaseConnection
|
|
14
|
+
|
|
15
|
+
ts_schema = pa.SeriesSchema(
|
|
16
|
+
float,
|
|
17
|
+
index=pa.Index(pd.DatetimeTZDtype(tz="UTC"), coerce=False),
|
|
18
|
+
coerce=True,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class Timeseries(BaseTimeseries):
|
|
23
|
+
"""Timeseries for groundwater sensor data
|
|
24
|
+
|
|
25
|
+
Attributes:
|
|
26
|
+
ts (pd.Series): The timeseries data.
|
|
27
|
+
variable (Literal['temperature', 'pressure', 'conductivity', 'flux']):
|
|
28
|
+
The type of the measurement.
|
|
29
|
+
unit (Literal['degC', 'mmH2O', 'mS/cm', 'm/s']): The unit of
|
|
30
|
+
the measurement.
|
|
31
|
+
sensor (SensorInfo): The serial number of the sensor.
|
|
32
|
+
|
|
33
|
+
Methods:
|
|
34
|
+
validate_ts: if the pd.Series is not exactly what is required, coerce.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
model_config = pyd.ConfigDict(
|
|
38
|
+
arbitrary_types_allowed=True, validate_assignment=True
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
sensor: str | None = None
|
|
42
|
+
sensor_alt: float | None = None
|
|
43
|
+
|
|
44
|
+
def __eq__(self, other: object) -> bool:
|
|
45
|
+
"""Check equality based on location, sensor, variable, unit and sensor_alt."""
|
|
46
|
+
if not isinstance(other, Timeseries):
|
|
47
|
+
return NotImplemented
|
|
48
|
+
|
|
49
|
+
return (
|
|
50
|
+
self.variable == other.variable
|
|
51
|
+
and self.unit == other.unit
|
|
52
|
+
and self.location == other.location
|
|
53
|
+
and self.sensor == other.sensor
|
|
54
|
+
and self.sensor_alt == other.sensor_alt
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def to_sql(self, db: DatabaseConnection) -> str:
|
|
58
|
+
"""Converts the timeseries to a list of dictionaries and uploads it to the database.
|
|
59
|
+
|
|
60
|
+
The Timeseries data is uploaded to the SQL database by using the pandas
|
|
61
|
+
`to_sql` method. Additionally, metadata about the timeseries is stored in the
|
|
62
|
+
'timeseries_metadata' table.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
db (DatabaseConnection): The database connection object.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
str: A message indicating the number of rows inserted into the database.
|
|
69
|
+
"""
|
|
70
|
+
# Format the start timestamp as 'YYYYMMDDHHMMSS'
|
|
71
|
+
timestamp_start_fmt = self.start.strftime("%Y%m%d%H%M%S")
|
|
72
|
+
|
|
73
|
+
# Construct the schema name using the location, sensor, variable, unit, and timestamp
|
|
74
|
+
schema_name = f"{self.location}_{self.sensor}_{self.variable}_{self.unit}_{timestamp_start_fmt}".lower()
|
|
75
|
+
|
|
76
|
+
# Ensure the index is a pandas DatetimeIndex
|
|
77
|
+
if isinstance(self.ts.index, pd.DatetimeIndex):
|
|
78
|
+
utc_index = (
|
|
79
|
+
self.ts.index.tz_convert("UTC")
|
|
80
|
+
if self.ts.index.tz is not None
|
|
81
|
+
else self.ts.index
|
|
82
|
+
)
|
|
83
|
+
else:
|
|
84
|
+
message = "The index is not a DatetimeIndex and cannot be converted to UTC."
|
|
85
|
+
raise TypeError(message)
|
|
86
|
+
|
|
87
|
+
# Prepare the timeseries data as records for insertion
|
|
88
|
+
series_as_records = list(
|
|
89
|
+
zip(utc_index.strftime("%Y-%m-%dT%H:%M:%S%z"), self.ts, strict=False)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
with db as con:
|
|
93
|
+
# Create the timeseries table if it doesn't exist
|
|
94
|
+
schema = db.create_table(schema_name, self.variable)
|
|
95
|
+
|
|
96
|
+
# Ensure that the timeseries_metadata table exists
|
|
97
|
+
metadata_schema = db.metadata.tables["__timeseries_metadata__"]
|
|
98
|
+
|
|
99
|
+
if isinstance(schema, Table):
|
|
100
|
+
# Insert the timeseries data
|
|
101
|
+
stmt = sqlite_insert(schema).values(series_as_records)
|
|
102
|
+
stmt = stmt.on_conflict_do_nothing(index_elements=["timestamp"])
|
|
103
|
+
con.execute(stmt)
|
|
104
|
+
con.commit()
|
|
105
|
+
|
|
106
|
+
metadata_stmt = sqlite_insert(metadata_schema).values(
|
|
107
|
+
table_name=schema_name,
|
|
108
|
+
location=self.location,
|
|
109
|
+
sensor=self.sensor,
|
|
110
|
+
variable=self.variable,
|
|
111
|
+
unit=self.unit,
|
|
112
|
+
logger_alt=self.sensor_alt,
|
|
113
|
+
location_alt=self.sensor_alt,
|
|
114
|
+
timestamp_start=timestamp_start_fmt,
|
|
115
|
+
timestamp_end=self.end.strftime("%Y%m%d%H%M%S"),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
metadata_stmt = metadata_stmt.on_conflict_do_update(
|
|
119
|
+
index_elements=["table_name"],
|
|
120
|
+
set_={
|
|
121
|
+
"timestamp_start": timestamp_start_fmt,
|
|
122
|
+
"timestamp_end": self.end.strftime("%Y%m%d%H%M%S"),
|
|
123
|
+
},
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
con.execute(metadata_stmt)
|
|
127
|
+
con.commit()
|
|
128
|
+
|
|
129
|
+
return f"{schema_name} table and metadata updated."
|
|
130
|
+
|
|
131
|
+
def plot(
|
|
132
|
+
self, include_outliers: bool = False, ax: Any = None, **plot_kwargs: Any
|
|
133
|
+
) -> tuple:
|
|
134
|
+
"""Plots the timeseries data.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
include_outliers (bool): Whether to include outliers in the plot.
|
|
138
|
+
ax (matplotlib.axes.Axes, optional): Matplotlib axes object to plot on.
|
|
139
|
+
If None, a new figure and axes are created.
|
|
140
|
+
**plot_kwargs: Additional keyword arguments passed to plt.plot.
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
(fig, ax): Matplotlib figure and axes to allow further customization.
|
|
144
|
+
"""
|
|
145
|
+
# Create new figure and axes if not provided
|
|
146
|
+
if ax is None:
|
|
147
|
+
fig, ax = plt.subplots(figsize=(10, 5))
|
|
148
|
+
else:
|
|
149
|
+
fig = ax.get_figure()
|
|
150
|
+
|
|
151
|
+
ax.plot(
|
|
152
|
+
self.ts.index,
|
|
153
|
+
self.ts,
|
|
154
|
+
label=f"{self.location} ({self.sensor})",
|
|
155
|
+
**plot_kwargs,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if include_outliers and self.outliers is not None:
|
|
159
|
+
ax.scatter(
|
|
160
|
+
self.outliers.index, self.outliers, color="red", label="Outliers"
|
|
161
|
+
)
|
|
162
|
+
plt.xticks(rotation=45)
|
|
163
|
+
ax.set_xlabel("Time")
|
|
164
|
+
ax.set_ylabel(f"{self.variable} ({self.unit})")
|
|
165
|
+
ax.set_title(f"{self.variable.capitalize()} at {self.location}")
|
|
166
|
+
|
|
167
|
+
ax.legend()
|
|
168
|
+
|
|
169
|
+
return fig, ax
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|