hydroserverpy 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. hydroserverpy/__init__.py +7 -0
  2. hydroserverpy/api/__init__.py +0 -0
  3. hydroserverpy/api/client.py +203 -0
  4. hydroserverpy/api/models/__init__.py +22 -0
  5. hydroserverpy/api/models/base.py +207 -0
  6. hydroserverpy/api/models/etl/__init__.py +26 -0
  7. hydroserverpy/api/models/etl/data_archive.py +77 -0
  8. hydroserverpy/api/models/etl/data_source.py +146 -0
  9. hydroserverpy/api/models/etl/etl_configuration.py +224 -0
  10. hydroserverpy/api/models/etl/extractors/__init__.py +6 -0
  11. hydroserverpy/api/models/etl/extractors/base.py +52 -0
  12. hydroserverpy/api/models/etl/extractors/ftp_extractor.py +50 -0
  13. hydroserverpy/api/models/etl/extractors/http_extractor.py +28 -0
  14. hydroserverpy/api/models/etl/extractors/local_file_extractor.py +20 -0
  15. hydroserverpy/api/models/etl/factories.py +23 -0
  16. hydroserverpy/api/models/etl/loaders/__init__.py +4 -0
  17. hydroserverpy/api/models/etl/loaders/base.py +11 -0
  18. hydroserverpy/api/models/etl/loaders/hydroserver_loader.py +98 -0
  19. hydroserverpy/api/models/etl/orchestration_configuration.py +35 -0
  20. hydroserverpy/api/models/etl/orchestration_system.py +63 -0
  21. hydroserverpy/api/models/etl/schedule.py +16 -0
  22. hydroserverpy/api/models/etl/status.py +14 -0
  23. hydroserverpy/api/models/etl/timestamp_parser.py +112 -0
  24. hydroserverpy/api/models/etl/transformers/__init__.py +5 -0
  25. hydroserverpy/api/models/etl/transformers/base.py +135 -0
  26. hydroserverpy/api/models/etl/transformers/csv_transformer.py +88 -0
  27. hydroserverpy/api/models/etl/transformers/json_transformer.py +48 -0
  28. hydroserverpy/api/models/etl/types.py +7 -0
  29. hydroserverpy/api/models/iam/__init__.py +0 -0
  30. hydroserverpy/api/models/iam/account.py +12 -0
  31. hydroserverpy/api/models/iam/apikey.py +96 -0
  32. hydroserverpy/api/models/iam/collaborator.py +70 -0
  33. hydroserverpy/api/models/iam/role.py +38 -0
  34. hydroserverpy/api/models/iam/workspace.py +297 -0
  35. hydroserverpy/api/models/sta/__init__.py +0 -0
  36. hydroserverpy/api/models/sta/datastream.py +254 -0
  37. hydroserverpy/api/models/sta/observation.py +103 -0
  38. hydroserverpy/api/models/sta/observed_property.py +37 -0
  39. hydroserverpy/api/models/sta/processing_level.py +35 -0
  40. hydroserverpy/api/models/sta/result_qualifier.py +34 -0
  41. hydroserverpy/api/models/sta/sensor.py +44 -0
  42. hydroserverpy/api/models/sta/thing.py +113 -0
  43. hydroserverpy/api/models/sta/unit.py +36 -0
  44. hydroserverpy/api/services/__init__.py +12 -0
  45. hydroserverpy/api/services/base.py +118 -0
  46. hydroserverpy/api/services/etl/__init__.py +0 -0
  47. hydroserverpy/api/services/etl/data_archive.py +166 -0
  48. hydroserverpy/api/services/etl/data_source.py +163 -0
  49. hydroserverpy/api/services/etl/orchestration_system.py +66 -0
  50. hydroserverpy/api/services/iam/__init__.py +0 -0
  51. hydroserverpy/api/services/iam/role.py +38 -0
  52. hydroserverpy/api/services/iam/workspace.py +232 -0
  53. hydroserverpy/api/services/sta/__init__.py +0 -0
  54. hydroserverpy/api/services/sta/datastream.py +296 -0
  55. hydroserverpy/api/services/sta/observed_property.py +82 -0
  56. hydroserverpy/api/services/sta/processing_level.py +72 -0
  57. hydroserverpy/api/services/sta/result_qualifier.py +64 -0
  58. hydroserverpy/api/services/sta/sensor.py +102 -0
  59. hydroserverpy/api/services/sta/thing.py +195 -0
  60. hydroserverpy/api/services/sta/unit.py +78 -0
  61. hydroserverpy/api/utils.py +22 -0
  62. hydroserverpy/quality/__init__.py +1 -0
  63. hydroserverpy/quality/service.py +405 -0
  64. hydroserverpy-1.5.1.dist-info/METADATA +66 -0
  65. hydroserverpy-1.5.1.dist-info/RECORD +69 -0
  66. hydroserverpy-1.5.1.dist-info/WHEEL +5 -0
  67. hydroserverpy-1.5.1.dist-info/licenses/LICENSE +28 -0
  68. hydroserverpy-1.5.1.dist-info/top_level.txt +1 -0
  69. hydroserverpy-1.5.1.dist-info/zip-safe +1 -0
@@ -0,0 +1,195 @@
1
+ import json
2
+ from typing import TYPE_CHECKING, Union, IO, List, Dict, Optional, Tuple
3
+ from uuid import UUID
4
+ from hydroserverpy.api.models import Thing
5
+ from hydroserverpy.api.utils import normalize_uuid
6
+ from ..base import HydroServerBaseService
7
+
8
+ if TYPE_CHECKING:
9
+ from hydroserverpy import HydroServer
10
+ from hydroserverpy.api.models import Workspace
11
+
12
+
13
+ class ThingService(HydroServerBaseService):
14
+ def __init__(self, client: "HydroServer"):
15
+ self.model = Thing
16
+ super().__init__(client)
17
+
18
+ def list(
19
+ self,
20
+ page: int = ...,
21
+ page_size: int = ...,
22
+ order_by: List[str] = ...,
23
+ workspace: Union["Workspace", UUID, str] = ...,
24
+ bbox: Tuple[float, float, float, float] = ...,
25
+ state: str = ...,
26
+ county: str = ...,
27
+ country: str = ...,
28
+ site_type: str = ...,
29
+ sampling_feature_type: str = ...,
30
+ sampling_feature_code: str = ...,
31
+ tag: Tuple[str, str] = ...,
32
+ is_private: bool = ...,
33
+ fetch_all: bool = False,
34
+ ) -> List["Thing"]:
35
+ """Fetch a collection of things."""
36
+
37
+ return super().list(
38
+ page=page,
39
+ page_size=page_size,
40
+ order_by=order_by,
41
+ workspace_id=normalize_uuid(workspace),
42
+ bbox=",".join([str(i) for i in bbox]) if bbox is not ... else bbox,
43
+ state=state,
44
+ county=county,
45
+ country=country,
46
+ site_type=site_type,
47
+ sampling_feature_type=sampling_feature_type,
48
+ sampling_feature_code=sampling_feature_code,
49
+ tag=[f"{tag[0]}:{tag[1]}"] if tag is not ... else tag,
50
+ is_private=is_private,
51
+ fetch_all=fetch_all,
52
+ )
53
+
54
+ def create(
55
+ self,
56
+ workspace: Union["Workspace", UUID, str],
57
+ name: str,
58
+ description: str,
59
+ sampling_feature_type: str,
60
+ sampling_feature_code: str,
61
+ site_type: str,
62
+ is_private: False,
63
+ latitude: float,
64
+ longitude: float,
65
+ elevation_m: Optional[float] = None,
66
+ elevation_datum: Optional[str] = None,
67
+ state: Optional[str] = None,
68
+ county: Optional[str] = None,
69
+ country: Optional[str] = None,
70
+ data_disclaimer: Optional[str] = None,
71
+ ) -> "Thing":
72
+ """Create a new thing."""
73
+
74
+ body = {
75
+ "name": name,
76
+ "description": description,
77
+ "samplingFeatureType": sampling_feature_type,
78
+ "samplingFeatureCode": sampling_feature_code,
79
+ "siteType": site_type,
80
+ "isPrivate": is_private,
81
+ "dataDisclaimer": data_disclaimer,
82
+ "workspaceId": normalize_uuid(workspace),
83
+ "location": {
84
+ "latitude": latitude,
85
+ "longitude": longitude,
86
+ "elevation_m": elevation_m,
87
+ "elevationDatum": elevation_datum,
88
+ "state": state,
89
+ "county": county,
90
+ "country": country,
91
+ }
92
+ }
93
+
94
+ return super().create(**body)
95
+
96
+ def update(
97
+ self,
98
+ uid: Union[UUID, str],
99
+ name: str = ...,
100
+ description: str = ...,
101
+ sampling_feature_type: str = ...,
102
+ sampling_feature_code: str = ...,
103
+ site_type: str = ...,
104
+ is_private: bool = ...,
105
+ latitude: float = ...,
106
+ longitude: float = ...,
107
+ elevation_m: Optional[float] = ...,
108
+ elevation_datum: Optional[str] = ...,
109
+ state: Optional[str] = ...,
110
+ county: Optional[str] = ...,
111
+ country: Optional[str] = ...,
112
+ data_disclaimer: Optional[str] = ...,
113
+ ) -> "Thing":
114
+ """Update a thing."""
115
+
116
+ body = {
117
+ "name": name,
118
+ "description": description,
119
+ "samplingFeatureType": sampling_feature_type,
120
+ "samplingFeatureCode": sampling_feature_code,
121
+ "siteType": site_type,
122
+ "isPrivate": is_private,
123
+ "dataDisclaimer": data_disclaimer,
124
+ "location": {
125
+ "latitude": latitude,
126
+ "longitude": longitude,
127
+ "elevation_m": elevation_m,
128
+ "elevationDatum": elevation_datum,
129
+ "state": state,
130
+ "county": county,
131
+ "country": country,
132
+ }
133
+ }
134
+
135
+ return super().update(uid=str(uid), **body)
136
+
137
+ def add_tag(self, uid: Union[UUID, str], key: str, value: str) -> Dict[str, str]:
138
+ """Tag a HydroServer thing."""
139
+
140
+ path = f"/{self.client.base_route}/{self.model.get_route()}/{str(uid)}/tags"
141
+ headers = {"Content-type": "application/json"}
142
+ body = {
143
+ "key": key,
144
+ "value": value
145
+ }
146
+ return self.client.request(
147
+ "post", path, headers=headers, data=json.dumps(body, default=self.default_serializer)
148
+ ).json()
149
+
150
+ def update_tag(self, uid: Union[UUID, str], key: str, value: str) -> Dict[str, str]:
151
+ """Update the tag of a HydroServer thing."""
152
+
153
+ path = f"/{self.client.base_route}/{self.model.get_route()}/{str(uid)}/tags"
154
+ headers = {"Content-type": "application/json"}
155
+ body = {
156
+ "key": key,
157
+ "value": value
158
+ }
159
+ return self.client.request(
160
+ "put", path, headers=headers, data=json.dumps(body, default=self.default_serializer)
161
+ ).json()
162
+
163
+ def delete_tag(self, uid: Union[UUID, str], key: str, value: str) -> None:
164
+ """Remove a tag from a HydroServer thing."""
165
+
166
+ path = f"/{self.client.base_route}/{self.model.get_route()}/{str(uid)}/tags"
167
+ headers = {"Content-type": "application/json"}
168
+ body = {
169
+ "key": key,
170
+ "value": value
171
+ }
172
+ self.client.request(
173
+ "delete", path, headers=headers, data=json.dumps(body, default=self.default_serializer)
174
+ )
175
+
176
+ def add_photo(self, uid: Union[UUID, str], file: IO[bytes]) -> Dict[str, str]:
177
+ """Add a photo of a HydroServer thing."""
178
+
179
+ path = f"/{self.client.base_route}/{self.model.get_route()}/{str(uid)}/photos"
180
+
181
+ return self.client.request(
182
+ "post", path, files={"file": file}
183
+ ).json()
184
+
185
+ def delete_photo(self, uid: Union[UUID, str], name: str) -> None:
186
+ """Delete a photo of a HydroServer thing."""
187
+
188
+ path = f"/{self.client.base_route}/{self.model.get_route()}/{str(uid)}/photos"
189
+ headers = {"Content-type": "application/json"}
190
+ body = {
191
+ "name": name
192
+ }
193
+ self.client.request(
194
+ "delete", path, headers=headers, data=json.dumps(body, default=self.default_serializer)
195
+ )
@@ -0,0 +1,78 @@
1
+ from typing import Optional, Union, List, TYPE_CHECKING
2
+ from uuid import UUID
3
+ from hydroserverpy.api.models import Unit
4
+ from hydroserverpy.api.utils import normalize_uuid
5
+ from ..base import HydroServerBaseService
6
+
7
+ if TYPE_CHECKING:
8
+ from hydroserverpy import HydroServer
9
+ from hydroserverpy.api.models import Workspace, Thing, Datastream
10
+
11
+
12
+ class UnitService(HydroServerBaseService):
13
+ def __init__(self, client: "HydroServer"):
14
+ self.model = Unit
15
+ super().__init__(client)
16
+
17
+ def list(
18
+ self,
19
+ page: int = ...,
20
+ page_size: int = ...,
21
+ order_by: List[str] = ...,
22
+ workspace: Union["Workspace", UUID, str] = ...,
23
+ thing: Optional[Union["Thing", UUID, str]] = ...,
24
+ datastream: Optional[Union["Datastream", UUID, str]] = ...,
25
+ unit_type: str = ...,
26
+ fetch_all: bool = False,
27
+ ) -> List["Unit"]:
28
+ """Fetch a collection of units."""
29
+
30
+ return super().list(
31
+ page=page,
32
+ page_size=page_size,
33
+ order_by=order_by,
34
+ workspace_id=normalize_uuid(workspace),
35
+ thing_id=normalize_uuid(thing),
36
+ datastream_id=normalize_uuid(datastream),
37
+ unit_type=unit_type,
38
+ fetch_all=fetch_all,
39
+ )
40
+
41
+ def create(
42
+ self,
43
+ name: str,
44
+ symbol: str,
45
+ definition: str,
46
+ unit_type: str,
47
+ workspace: Optional[Union["Workspace", UUID, str]] = None,
48
+ ) -> "Unit":
49
+ """Create a new unit."""
50
+
51
+ body = {
52
+ "name": name,
53
+ "symbol": symbol,
54
+ "definition": definition,
55
+ "type": unit_type,
56
+ "workspaceId": normalize_uuid(workspace),
57
+ }
58
+
59
+ return super().create(**body)
60
+
61
+ def update(
62
+ self,
63
+ uid: Union[UUID, str],
64
+ name: str = ...,
65
+ symbol: str = ...,
66
+ definition: str = ...,
67
+ unit_type: str = ...,
68
+ ) -> "Unit":
69
+ """Update a unit."""
70
+
71
+ body = {
72
+ "name": name,
73
+ "symbol": symbol,
74
+ "definition": definition,
75
+ "type": unit_type,
76
+ }
77
+
78
+ return super().update(uid=str(uid), **body)
@@ -0,0 +1,22 @@
1
+ from uuid import UUID
2
+ from typing import Union, Optional, Any
3
+ from pydantic.alias_generators import to_camel
4
+
5
+
6
+ def normalize_uuid(
7
+ obj: Optional[Union[str, UUID, Any]],
8
+ attr: str = "uid"
9
+ ):
10
+ if obj is ...:
11
+ return ...
12
+ if obj is None:
13
+ return None
14
+ if obj and hasattr(obj, attr):
15
+ return str(getattr(obj, attr))
16
+ return str(obj)
17
+
18
+
19
+ def order_by_to_camel(s: str) -> str:
20
+ if s.startswith('-'):
21
+ return '-' + to_camel(s[1:])
22
+ return to_camel(s)
@@ -0,0 +1 @@
1
+ from .service import HydroServerQualityControl, TimeUnit, FilterOperation, Operator
@@ -0,0 +1,405 @@
1
+ import pandas as pd
2
+ import numpy as np
3
+ from uuid import UUID
4
+ from typing import Union, Optional, List, Tuple, Dict
5
+ from enum import Enum
6
+
7
+ # Tools
8
+ # [x] Interpolate
9
+ # [x] Set a value to a constant
10
+ # [x] Change value by applying arithmetic (+, -, *, /)
11
+ # [x] Shift
12
+ # [x] Drift correction (linear)
13
+ # [x] Delete values
14
+ # [x] Fill values
15
+
16
+ # Automation
17
+ # [x] Gap filling
18
+
19
+
20
+ class TimeUnit(Enum):
21
+ """Enumeration for time units."""
22
+
23
+ SECOND = "s"
24
+ MINUTE = "m"
25
+ HOUR = "h"
26
+ DAY = "D"
27
+ WEEK = "W"
28
+ MONTH = "M"
29
+ YEAR = "Y"
30
+
31
+
32
+ class FilterOperation(Enum):
33
+ """Enumeration for filter operations."""
34
+
35
+ LT = "LT"
36
+ LTE = "LTE"
37
+ GT = "GT"
38
+ GTE = "GTE"
39
+ E = "E"
40
+
41
+
42
+ class Operator(Enum):
43
+ """Enumeration for mathematical operations."""
44
+
45
+ MULT = "MULT"
46
+ DIV = "DIV"
47
+ ADD = "ADD"
48
+ SUB = "SUB"
49
+ ASSIGN = "ASSIGN"
50
+
51
+
52
+ class HydroServerQualityControl:
53
+ """
54
+ Quality control operations for HydroServer observations.
55
+
56
+ :param datastream_id: The ID of the datastream.
57
+ :type datastream_id: Union[UUID, str]
58
+ :param observations: DataFrame containing 'timestamp' and 'value' columns.
59
+ :type observations: pd.DataFrame
60
+ """
61
+
62
+ datastream_id: Union[UUID, str]
63
+
64
+ def __init__(
65
+ self, datastream_id: Union[UUID, str], observations: pd.DataFrame
66
+ ) -> None:
67
+
68
+ assert (
69
+ "timestamp" in observations.columns
70
+ ), "Observations must have a 'timestamp' column"
71
+ assert pd.api.types.is_datetime64_any_dtype(
72
+ observations["timestamp"]
73
+ ), "Observations 'timestamp' column must be of datetime type"
74
+
75
+ assert (
76
+ "value" in observations.columns
77
+ ), "Observations must have a 'value' column"
78
+ assert pd.api.types.is_float_dtype(
79
+ observations["value"]
80
+ ), "Observations 'value' column must be of float type"
81
+
82
+ self.datastream_id = str(datastream_id)
83
+ self._df = observations
84
+ self._filtered_observations = None
85
+
86
+ @property
87
+ def observations(self) -> pd.DataFrame:
88
+ """
89
+ Returns the observations DataFrame, filtered if a filter has been applied.
90
+
91
+ :return: Observations DataFrame.
92
+ :rtype: pd.DataFrame
93
+ """
94
+
95
+ if self._filtered_observations is None:
96
+ return self._df
97
+
98
+ return self._filtered_observations
99
+
100
+ ###################
101
+ # Filters
102
+ ###################
103
+
104
+ @staticmethod
105
+ def _has_filter(
106
+ data_filter: Dict[str, Union[float, int]], key: FilterOperation
107
+ ) -> bool:
108
+ """
109
+ Checks if a given filter operation exists in the filter dictionary.
110
+
111
+ :param data_filter: Dictionary containing the filters.
112
+ :type data_filter: Dict[str, Union[float, int]]
113
+ :param key: Filter operation to check for.
114
+ :type key: FilterOperation
115
+ :return: True if the filter operation exists, False otherwise.
116
+ :rtype: bool
117
+ """
118
+
119
+ return key.value in data_filter and (
120
+ isinstance(data_filter[key.value], float)
121
+ or isinstance(data_filter[key.value], int)
122
+ )
123
+
124
+ def filter(self, data_filter: Dict[str, Union[float, int]]) -> None:
125
+ """
126
+ Executes the applied filters and returns the resulting DataFrame.
127
+
128
+ :param data_filter: Dictionary containing filter operations and their values.
129
+ :type data_filter: Dict[str, Union[float, int]]
130
+ """
131
+
132
+ query = []
133
+
134
+ if self._has_filter(data_filter, FilterOperation.LT):
135
+ query.append(f"`value` < {data_filter[FilterOperation.LT.value]}")
136
+
137
+ if self._has_filter(data_filter, FilterOperation.LTE):
138
+ query.append(f"`value` <= {data_filter[FilterOperation.LTE.value]}")
139
+
140
+ if self._has_filter(data_filter, FilterOperation.GT):
141
+ query.append(f"`value` > {data_filter[FilterOperation.GT.value]}")
142
+
143
+ if self._has_filter(data_filter, FilterOperation.GTE):
144
+ query.append(f"`value` >= {data_filter[FilterOperation.GTE.value]}")
145
+
146
+ if self._has_filter(data_filter, FilterOperation.E):
147
+ query.append(f"`value` == {data_filter[FilterOperation.E.value]}")
148
+
149
+ if len(query):
150
+ self._filtered_observations = self._df.query(" | ".join(query))
151
+ else:
152
+ self._filtered_observations = None
153
+
154
+ ###################
155
+ # Gap Analysis
156
+ ###################
157
+
158
+ def find_gaps(self, time_value: int, time_unit: str) -> pd.DataFrame:
159
+ """
160
+ Identifies gaps in the observations based on the specified time value and unit.
161
+
162
+ :param time_value: The time value for detecting gaps.
163
+ :type time_value: int
164
+ :param time_unit: The unit of time (e.g., 's', 'm', 'h').
165
+ :type time_unit: str
166
+ :return: DataFrame containing the observations with gaps.
167
+ :rtype: pd.DataFrame
168
+ """
169
+
170
+ return self.observations[
171
+ self._df["timestamp"].diff() > np.timedelta64(time_value, time_unit)
172
+ ]
173
+
174
+ def fill_gap(
175
+ self, gap: Tuple[int, str], fill: Tuple[int, str], interpolate_values: bool
176
+ ) -> pd.DataFrame:
177
+ """
178
+ Fills identified gaps in the observations with placeholder values and optionally interpolates the values.
179
+
180
+ :param gap: Tuple containing the time value and unit for identifying gaps.
181
+ :type gap: Tuple[int, str]
182
+ :param fill: Tuple containing the time value and unit for filling gaps.
183
+ :type fill: Tuple[int, str]
184
+ :param interpolate_values: Whether to interpolate values for the filled gaps.
185
+ :type interpolate_values: bool
186
+ :return: DataFrame of points that filled the gaps.
187
+ :rtype: pd.DataFrame
188
+ """
189
+
190
+ gaps_df = self.find_gaps(gap[0], gap[1])
191
+ time_gap = np.timedelta64(fill[0], fill[1])
192
+ points = []
193
+ index = []
194
+ added_index = []
195
+
196
+ for gap_row in gaps_df.iterrows():
197
+ gap_end_index = gap_row[0]
198
+ gap_start_index = gap_end_index - 1
199
+
200
+ gap_start_date = self._df.iloc[gap_start_index]["timestamp"]
201
+ gap_end_date = self._df.iloc[gap_end_index]["timestamp"]
202
+
203
+ start = gap_start_date + time_gap
204
+ end = gap_end_date
205
+
206
+ # Annotate the points that will fill this gap
207
+ while start < end:
208
+ points.append([start, -9999])
209
+ index.append(gap_start_index)
210
+ start = start + time_gap
211
+
212
+ if interpolate_values:
213
+ # Keep an index of the position where the points will end up
214
+ added_index.append(gap_start_index + len(added_index) + 1)
215
+
216
+ self.add_points(points, index)
217
+
218
+ if interpolate_values:
219
+ self.interpolate(added_index)
220
+
221
+ # Return the list of points that filled the gaps
222
+ return pd.DataFrame(points, columns=["timestamp", "value"])
223
+
224
+ ######################################
225
+ # Data point operations
226
+ ######################################
227
+
228
+ def add_points(
229
+ self, points: List[List[Union[str, float]]], index: Optional[List[int]] = None
230
+ ) -> None:
231
+ """
232
+ Adds new points to the observations, optionally at specified indices.
233
+
234
+ :param points: List of points to be added.
235
+ :type points: List[List[Union[str, float]]]
236
+ :param index: Optional list of indices at which to insert the points.
237
+ :type index: Optional[List[int]]
238
+ """
239
+
240
+ # If an index list was provided, insert the points to the DataFrame at the corresponding index.
241
+ # We do this by creating a dictionary of slices where the key is the index to insert at, and the value is an
242
+ # array of points to insert at that index
243
+ # We iterate through the dictionary keys in reverse order, so that we can insert without altering the position
244
+ # of elements before
245
+ if index is not None:
246
+ # This is the most efficient way to insert into a DataFrame for a large dataset.
247
+
248
+ # create a dictionary of points to insert at each index
249
+ slices = {}
250
+ for idx, value in enumerate(index):
251
+ if value not in slices:
252
+ slices[value] = []
253
+
254
+ slices[value].append(points[idx])
255
+
256
+ for s in sorted(slices.items(), reverse=True):
257
+ # Split DataFrame and insert new row.
258
+ idx = s[0] + 1
259
+ val = s[1]
260
+ df1 = self._df.iloc[:idx, :]
261
+ df2 = self._df.iloc[idx:, :]
262
+
263
+ points_df = pd.DataFrame(val, columns=["timestamp", "value"])
264
+ self._df = pd.concat([df1, points_df, df2]).reset_index(drop=True)
265
+
266
+ else:
267
+ # This way of inserting is not as efficient, but performance should be good enough given that the existing
268
+ # data in the DataFrame is pre-sorted.
269
+
270
+ # Create a new dataframe with the points
271
+ points_df = pd.DataFrame(points, columns=["timestamp", "value"])
272
+
273
+ # Concatenate both dataframes. New rows will be at the end.
274
+ self._df = pd.concat([self._df, points_df])
275
+
276
+ # Sort and reset index
277
+ self._df = self._df.sort_values("timestamp")
278
+ self._df.reset_index(drop=True, inplace=True)
279
+
280
+ def change_values(
281
+ self, index_list: List[int], operator: str, value: Union[int, float]
282
+ ) -> None:
283
+ """
284
+ Changes the values of observations based on the specified operator and value.
285
+
286
+ :param index_list: List of indices for which values will be changed.
287
+ :type index_list: List[int]
288
+ :param operator: The operation to perform ('MULT', 'DIV', 'ADD', 'SUB', 'ASSIGN').
289
+ :type operator: str
290
+ :param value: The value to use in the operation.
291
+ :type value: Union[int, float]
292
+ """
293
+
294
+ def operation(x):
295
+ if operator == Operator.MULT.value:
296
+ return x * value
297
+ elif operator == Operator.DIV.value:
298
+ if value == 0:
299
+ print("Error: cannot divide by 0")
300
+ return x
301
+ return x / value
302
+ elif operator == Operator.ADD.value:
303
+ return x + value
304
+ elif operator == Operator.SUB.value:
305
+ return x - value
306
+ elif operator == Operator.ASSIGN.value:
307
+ return value
308
+ else:
309
+ return x
310
+
311
+ self._df.loc[index_list, "value"] = self._df.loc[index_list, "value"].apply(
312
+ operation
313
+ )
314
+
315
+ def delete_points(self, index_list: List[int]) -> None:
316
+ """
317
+ Deletes points from the observations at the specified indices.
318
+
319
+ :param index_list: List of indices for which points will be deleted.
320
+ :type index_list: List[int]
321
+ """
322
+
323
+ self._df.drop(index=index_list, inplace=True)
324
+ self._df.reset_index(drop=True, inplace=True)
325
+
326
+ def shift_points(
327
+ self, index_list: List[int], time_value: int, time_unit: str
328
+ ) -> None:
329
+ """
330
+ Shifts the timestamps of the observations at the specified indices by a given time value and unit.
331
+
332
+ :param index_list: List of indices where timestamps will be shifted.
333
+ :type index_list: List[int]
334
+ :param time_value: The amount of time to shift the timestamps.
335
+ :type time_value: int
336
+ :param time_unit: The unit of time (e.g., 's' for seconds, 'm' for minutes).
337
+ :type time_unit: str
338
+ """
339
+
340
+ shift_value = np.timedelta64(time_value, time_unit)
341
+ condition = self._df.index.isin(index_list)
342
+
343
+ # Apply the shift
344
+ self._df.loc[condition, "timestamp"] = (
345
+ self._df.loc[condition, "timestamp"] + shift_value
346
+ )
347
+ self._df = self._df.sort_values("timestamp")
348
+ self._df.reset_index(drop=True, inplace=True)
349
+
350
+ def interpolate(self, index_list: List[int]) -> None:
351
+ """
352
+ Interpolates the values of observations at the specified indices using linear interpolation.
353
+
354
+ :param index_list: List of indices where values will be interpolated.
355
+ :type index_list: list[int]
356
+ """
357
+
358
+ condition = self._df.index.isin(index_list)
359
+ self._df["value"].mask(condition, inplace=True)
360
+ self._df["value"].interpolate(method="linear", inplace=True)
361
+
362
+ def drift_correction(self, start: int, end: int, gap_width: float) -> pd.DataFrame:
363
+ """
364
+ Applies drift correction to the values of observations within the specified index range.
365
+
366
+ :param start: Start index of the range to apply drift correction.
367
+ :type start: int
368
+ :param end: End index of the range to apply drift correction.
369
+ :type end: int
370
+ :param gap_width: The width of the drift gap to correct.
371
+ :type gap_width: float
372
+ :return: DataFrame after applying drift correction.
373
+ :rtype: pd.DataFrame
374
+ """
375
+
376
+ # validate range
377
+ if start >= end:
378
+ print("Start and end index cannot overlap")
379
+ return self._df
380
+ elif end > len(self._df) - 1:
381
+ print("End index out of range")
382
+ return self._df
383
+ elif start < 0:
384
+ print("Start index must be greater than or equal to 0")
385
+ return self._df
386
+
387
+ points = self._df.iloc[start : end + 1]
388
+ start_date = points.iloc[0]["timestamp"]
389
+ end_date = points.iloc[-1]["timestamp"]
390
+
391
+ x_l = (end_date - start_date).total_seconds()
392
+ ndv = -9999
393
+ # y_n = y_0 + G(x_i / x_l)
394
+
395
+ def f(row):
396
+ if row["value"] != ndv:
397
+ return row["value"] + (
398
+ gap_width * ((row["timestamp"] - start_date).total_seconds() / x_l)
399
+ )
400
+ else:
401
+ return row["value"]
402
+
403
+ self._df.loc[points.index, "value"] = points.apply(f, axis=1)
404
+
405
+ return self._df