cwms-python 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {cwms_python-0.6.0 → cwms_python-0.7.0}/PKG-INFO +2 -2
  2. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/__init__.py +1 -1
  3. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/api.py +63 -50
  4. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/catalog/blobs.py +38 -24
  5. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/catalog/catalog.py +4 -0
  6. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/locations/physical_locations.py +5 -1
  7. cwms_python-0.7.0/cwms/measurements/measurements.py +177 -0
  8. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/ratings/ratings.py +9 -9
  9. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/ratings/ratings_spec.py +7 -7
  10. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries.py +94 -55
  11. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_group.py +2 -2
  12. cwms_python-0.7.0/cwms/utils/__init__.py +0 -0
  13. cwms_python-0.7.0/cwms/utils/checks.py +10 -0
  14. {cwms_python-0.6.0 → cwms_python-0.7.0}/pyproject.toml +4 -2
  15. cwms_python-0.6.0/cwms/datafile_imports/shef_critfile_import.py +0 -130
  16. {cwms_python-0.6.0 → cwms_python-0.7.0}/LICENSE +0 -0
  17. {cwms_python-0.6.0 → cwms_python-0.7.0}/README.md +0 -0
  18. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/catalog/clobs.py +0 -0
  19. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/cwms_types.py +0 -0
  20. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/forecast/forecast_instance.py +0 -0
  21. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/forecast/forecast_spec.py +0 -0
  22. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/levels/location_levels.py +0 -0
  23. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/levels/specified_levels.py +0 -0
  24. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/locations/gate_changes.py +0 -0
  25. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/locations/location_groups.py +0 -0
  26. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/outlets/outlets.py +0 -0
  27. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/outlets/virtual_outlets.py +0 -0
  28. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/projects/project_lock_rights.py +0 -0
  29. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/projects/project_locks.py +0 -0
  30. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/projects/projects.py +0 -0
  31. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/ratings/ratings_template.py +0 -0
  32. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/standard_text/standard_text.py +0 -0
  33. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_bin.py +0 -0
  34. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_identifier.py +0 -0
  35. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_profile.py +0 -0
  36. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_profile_instance.py +0 -0
  37. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_profile_parser.py +0 -0
  38. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/timeseries/timeseries_txt.py +0 -0
  39. {cwms_python-0.6.0 → cwms_python-0.7.0}/cwms/turbines/turbines.py +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: cwms-python
3
- Version: 0.6.0
4
- Summary: Corps water managerment systems (CWMS) REST API for Data Retrieval of USACE water data
3
+ Version: 0.7.0
4
+ Summary: Corps water management systems (CWMS) REST API for Data Retrieval of USACE water data
5
5
  License: LICENSE
6
6
  Keywords: USACE,water data,CWMS
7
7
  Author: Eric Novotny
@@ -4,7 +4,6 @@ from cwms.api import *
4
4
  from cwms.catalog.blobs import *
5
5
  from cwms.catalog.catalog import *
6
6
  from cwms.catalog.clobs import *
7
- from cwms.datafile_imports.shef_critfile_import import *
8
7
  from cwms.forecast.forecast_instance import *
9
8
  from cwms.forecast.forecast_spec import *
10
9
  from cwms.levels.location_levels import *
@@ -12,6 +11,7 @@ from cwms.levels.specified_levels import *
12
11
  from cwms.locations.gate_changes import *
13
12
  from cwms.locations.location_groups import *
14
13
  from cwms.locations.physical_locations import *
14
+ from cwms.measurements.measurements import *
15
15
  from cwms.outlets.outlets import *
16
16
  from cwms.outlets.virtual_outlets import *
17
17
  from cwms.projects.project_lock_rights import *
@@ -26,6 +26,7 @@ which includes the response object and provides some hints to the user on how to
26
26
  the error.
27
27
  """
28
28
 
29
+ import base64
29
30
  import json
30
31
  import logging
31
32
  from json import JSONDecodeError
@@ -34,6 +35,7 @@ from typing import Any, Optional, cast
34
35
  from requests import Response, adapters
35
36
  from requests_toolbelt import sessions # type: ignore
36
37
  from requests_toolbelt.sessions import BaseUrlSession # type: ignore
38
+ from urllib3.util.retry import Retry
37
39
 
38
40
  from cwms.cwms_types import JSON, RequestParams
39
41
 
@@ -42,8 +44,24 @@ API_ROOT = "https://cwms-data.usace.army.mil/cwms-data/"
42
44
  API_VERSION = 2
43
45
 
44
46
  # Initialize a non-authenticated session with the default root URL and set default pool connections.
47
+
48
+ retry_strategy = Retry(
49
+ total=6,
50
+ backoff_factor=0.5,
51
+ status_forcelist=[
52
+ 403,
53
+ 429,
54
+ 500,
55
+ 502,
56
+ 503,
57
+ 504,
58
+ ], # Example: also retry on these HTTP status codes
59
+ allowed_methods=["GET", "PUT", "POST", "PATCH", "DELETE"], # Methods to retry
60
+ )
45
61
  SESSION = sessions.BaseUrlSession(base_url=API_ROOT)
46
- adapter = adapters.HTTPAdapter(pool_connections=100, pool_maxsize=100)
62
+ adapter = adapters.HTTPAdapter(
63
+ pool_connections=100, pool_maxsize=100, max_retries=retry_strategy
64
+ )
47
65
  SESSION.mount("https://", adapter)
48
66
 
49
67
 
@@ -118,7 +136,9 @@ def init_session(
118
136
  logging.debug(f"Initializing root URL: api_root={api_root}")
119
137
  SESSION = sessions.BaseUrlSession(base_url=api_root)
120
138
  adapter = adapters.HTTPAdapter(
121
- pool_connections=pool_connections, pool_maxsize=pool_connections
139
+ pool_connections=pool_connections,
140
+ pool_maxsize=pool_connections,
141
+ max_retries=retry_strategy,
122
142
  )
123
143
  SESSION.mount("https://", adapter)
124
144
  if api_key:
@@ -188,20 +208,8 @@ def get_xml(
188
208
  Raises:
189
209
  ApiError: If an error response is return by the API.
190
210
  """
191
-
192
- headers = {"Accept": api_version_text(api_version)}
193
- response = SESSION.get(endpoint, params=params, headers=headers)
194
- response.close()
195
-
196
- if response.status_code < 200 or response.status_code >= 300:
197
- logging.error(f"CDA Error: response={response}")
198
- raise ApiError(response)
199
-
200
- try:
201
- return response.content.decode("utf-8")
202
- except JSONDecodeError as error:
203
- logging.error(f"Error decoding CDA response as xml: {error}")
204
- return {}
211
+ # Wrap the primary get for backwards compatibility
212
+ return get(endpoint=endpoint, params=params, api_version=api_version)
205
213
 
206
214
 
207
215
  def get(
@@ -209,7 +217,7 @@ def get(
209
217
  params: Optional[RequestParams] = None,
210
218
  *,
211
219
  api_version: int = API_VERSION,
212
- ) -> JSON:
220
+ ) -> Any:
213
221
  """Make a GET request to the CWMS Data API.
214
222
 
215
223
  Args:
@@ -228,17 +236,28 @@ def get(
228
236
  """
229
237
 
230
238
  headers = {"Accept": api_version_text(api_version)}
231
- response = SESSION.get(endpoint, params=params, headers=headers)
232
- response.close()
233
- if response.status_code < 200 or response.status_code >= 300:
234
- logging.error(f"CDA Error: response={response}")
235
- raise ApiError(response)
236
-
237
- try:
238
- return cast(JSON, response.json())
239
- except JSONDecodeError as error:
240
- logging.error(f"Error decoding CDA response as json: {error}")
241
- return {}
239
+ with SESSION.get(endpoint, params=params, headers=headers) as response:
240
+ if not response.ok:
241
+ logging.error(f"CDA Error: response={response}")
242
+ raise ApiError(response)
243
+ try:
244
+ # Avoid case sensitivity issues with the content type header
245
+ content_type = response.headers.get("Content-Type", "").lower()
246
+ # Most CDA content is JSON
247
+ if "application/json" in content_type or not content_type:
248
+ return cast(JSON, response.json())
249
+ # Use automatic charset detection with .text
250
+ if "text/plain" in content_type or "text/" in content_type:
251
+ return response.text
252
+ if content_type.startswith("image/"):
253
+ return base64.b64encode(response.content).decode("utf-8")
254
+ # Fallback for remaining content types
255
+ return response.content.decode("utf-8")
256
+ except JSONDecodeError as error:
257
+ logging.error(
258
+ f"Error decoding CDA response as JSON: {error} on line {error.lineno}\n\tFalling back to text"
259
+ )
260
+ return response.text
242
261
 
243
262
 
244
263
  def get_with_paging(
@@ -247,7 +266,7 @@ def get_with_paging(
247
266
  params: RequestParams,
248
267
  *,
249
268
  api_version: int = API_VERSION,
250
- ) -> JSON:
269
+ ) -> Any:
251
270
  """Make a GET request to the CWMS Data API with paging.
252
271
 
253
272
  Args:
@@ -312,12 +331,10 @@ def post(
312
331
  if isinstance(data, dict) or isinstance(data, list):
313
332
  data = json.dumps(data)
314
333
 
315
- response = SESSION.post(endpoint, params=params, headers=headers, data=data)
316
- response.close()
317
-
318
- if response.status_code < 200 or response.status_code >= 300:
319
- logging.error(f"CDA Error: response={response}")
320
- raise ApiError(response)
334
+ with SESSION.post(endpoint, params=params, headers=headers, data=data) as response:
335
+ if not response.ok:
336
+ logging.error(f"CDA Error: response={response}")
337
+ raise ApiError(response)
321
338
 
322
339
 
323
340
  def patch(
@@ -346,16 +363,13 @@ def patch(
346
363
  """
347
364
 
348
365
  headers = {"accept": "*/*", "Content-Type": api_version_text(api_version)}
349
- if data is None:
350
- response = SESSION.patch(endpoint, params=params, headers=headers)
351
- else:
352
- if isinstance(data, dict) or isinstance(data, list):
353
- data = json.dumps(data)
354
- response = SESSION.patch(endpoint, params=params, headers=headers, data=data)
355
- response.close()
356
- if response.status_code < 200 or response.status_code >= 300:
357
- logging.error(f"CDA Error: response={response}")
358
- raise ApiError(response)
366
+
367
+ if data and isinstance(data, dict) or isinstance(data, list):
368
+ data = json.dumps(data)
369
+ with SESSION.patch(endpoint, params=params, headers=headers, data=data) as response:
370
+ if not response.ok:
371
+ logging.error(f"CDA Error: response={response}")
372
+ raise ApiError(response)
359
373
 
360
374
 
361
375
  def delete(
@@ -379,8 +393,7 @@ def delete(
379
393
  """
380
394
 
381
395
  headers = {"Accept": api_version_text(api_version)}
382
- response = SESSION.delete(endpoint, params=params, headers=headers)
383
- response.close()
384
- if response.status_code < 200 or response.status_code >= 300:
385
- logging.error(f"CDA Error: response={response}")
386
- raise ApiError(response)
396
+ with SESSION.delete(endpoint, params=params, headers=headers) as response:
397
+ if not response.ok:
398
+ logging.error(f"CDA Error: response={response}")
399
+ raise ApiError(response)
@@ -1,29 +1,40 @@
1
+ import base64
1
2
  from typing import Optional
2
3
 
3
4
  import cwms.api as api
4
5
  from cwms.cwms_types import JSON, Data
6
+ from cwms.utils.checks import is_base64
5
7
 
8
+ STORE_DICT = """data = {
9
+ "office-id": "SWT",
10
+ "id": "MYFILE_OR_BLOB_ID.TXT",
11
+ "description": "Your description here",
12
+ "media-type-id": "application/octet-stream",
13
+ "value": "STRING of content or BASE64_ENCODED_STRING"
14
+ }
15
+ """
6
16
 
7
- def get_blob(blob_id: str, office_id: str) -> Data:
8
- """Get a single clob.
17
+
18
+ def get_blob(blob_id: str, office_id: str) -> str:
19
+ """Get a single BLOB (Binary Large Object).
9
20
 
10
21
  Parameters
11
22
  ----------
12
23
  blob_id: string
13
- Specifies the id of the blob
24
+ Specifies the id of the blob. ALL blob ids are UPPERCASE.
14
25
  office_id: string
15
26
  Specifies the office of the blob.
16
27
 
17
28
 
18
29
  Returns
19
30
  -------
20
- cwms data type. data.json will return the JSON output and data.df will return a dataframe
31
+ str: the value returned based on the content-type it was stored with as a string
21
32
  """
22
33
 
23
34
  endpoint = f"blobs/{blob_id}"
24
35
  params = {"office": office_id}
25
36
  response = api.get(endpoint, params, api_version=1)
26
- return Data(response)
37
+ return str(response)
27
38
 
28
39
 
29
40
  def get_blobs(
@@ -50,36 +61,39 @@ def get_blobs(
50
61
  endpoint = "blobs"
51
62
  params = {"office": office_id, "page-size": page_size, "like": blob_id_like}
52
63
 
53
- response = api.get(endpoint, params, api_version=1)
64
+ response = api.get(endpoint, params, api_version=2)
54
65
  return Data(response, selector="blobs")
55
66
 
56
67
 
57
68
  def store_blobs(data: JSON, fail_if_exists: Optional[bool] = True) -> None:
58
- """Create New Blob
69
+ f"""Create New Blob
59
70
 
60
71
  Parameters
61
- ----------
62
- Data: JSON dictionary
63
- JSON containing information of Blob to be updated
64
- {
65
- "office-id": "string",
66
- "id": "string",
67
- "description": "string",
68
- "media-type-id": "string",
69
- "value": "string"
70
- }
71
- fail_if_exists: Boolean
72
- Create will fail if provided ID already exists. Default: true
72
+ ----------
73
+ **Note**: The "id" field is automatically cast to uppercase.
73
74
 
74
- Returns
75
- -------
76
- None
75
+ Data: JSON dictionary
76
+ JSON containing information of Blob to be updated.
77
+
78
+ {STORE_DICT}
79
+ fail_if_exists: Boolean
80
+ Create will fail if the provided ID already exists. Default: True
81
+
82
+ Returns
83
+ -------
84
+ None
77
85
  """
78
86
 
79
87
  if not isinstance(data, dict):
80
- raise ValueError("Cannot store a Blob without a JSON data dictionary")
88
+ raise ValueError(
89
+ f"Cannot store a Blob without a JSON data dictionary:\n{STORE_DICT}"
90
+ )
91
+
92
+ # Encode value if it's not already Base64-encoded
93
+ if "value" in data and not is_base64(data["value"]):
94
+ # Encode to bytes, then Base64, then decode to string for storing
95
+ data["value"] = base64.b64encode(data["value"].encode("utf-8")).decode("utf-8")
81
96
 
82
97
  endpoint = "blobs"
83
98
  params = {"fail-if-exists": fail_if_exists}
84
-
85
99
  return api.post(endpoint, data, params, api_version=1)
@@ -77,6 +77,7 @@ def get_timeseries_catalog(
77
77
  timeseries_category_like: Optional[str] = None,
78
78
  timeseries_group_like: Optional[str] = "DMZ Include List",
79
79
  bounding_office_like: Optional[str] = None,
80
+ include_extents: Optional[bool] = False,
80
81
  ) -> Data:
81
82
  """Retrieves filters for the timeseries catalog
82
83
 
@@ -101,6 +102,8 @@ def get_timeseries_catalog(
101
102
  The regex for matching against the timeseries group id. This will default to pull only public datasets
102
103
  bounding_office_like: string
103
104
  The regex for matching against the location bounding office
105
+ include_extents: bool
106
+ Whether to include the time series extents in the catalog
104
107
 
105
108
  Returns
106
109
  -------
@@ -122,6 +125,7 @@ def get_timeseries_catalog(
122
125
  "timeseries-category-like": timeseries_category_like,
123
126
  "timeseries-group-like": timeseries_group_like,
124
127
  "bounding-office-like": bounding_office_like,
128
+ "include-extents": include_extents,
125
129
  }
126
130
 
127
131
  response = api.get(endpoint=endpoint, params=params, api_version=2)
@@ -70,7 +70,7 @@ def get_locations(
70
70
  params = {
71
71
  "office": office_id,
72
72
  "names": location_ids,
73
- "units": units,
73
+ "unit": units,
74
74
  "datum": datum,
75
75
  }
76
76
 
@@ -95,6 +95,7 @@ def ExpandLocations(df: DataFrame) -> DataFrame:
95
95
  def delete_location(
96
96
  location_id: str,
97
97
  office_id: Optional[str] = None,
98
+ cascade_delete: Optional[bool] = False,
98
99
  ) -> None:
99
100
  """
100
101
  Deletes location data with the given ID and office ID.
@@ -105,6 +106,8 @@ def delete_location(
105
106
  The ID of the office that the data belongs to.
106
107
  loc_ids : str
107
108
  The ID of the location that the data belongs to.
109
+ cascade_delete: bool
110
+ Whether to delete all data associated with location.
108
111
 
109
112
  Returns
110
113
  -------
@@ -119,6 +122,7 @@ def delete_location(
119
122
  endpoint = f"locations/{location_id}"
120
123
  params = {
121
124
  "office": office_id,
125
+ "cascade-delete": cascade_delete,
122
126
  }
123
127
 
124
128
  return api.delete(endpoint, params=params)
@@ -0,0 +1,177 @@
1
+ from datetime import datetime
2
+ from typing import Optional
3
+
4
+ import cwms.api as api
5
+ from cwms.cwms_types import JSON, Data
6
+
7
+
8
+ def get_measurements(
9
+ office_id: Optional[str] = None,
10
+ location_id_mask: Optional[str] = None,
11
+ min_number_id: Optional[str] = None,
12
+ max_number_id: Optional[str] = None,
13
+ begin: Optional[datetime] = None,
14
+ end: Optional[datetime] = None,
15
+ timezone: Optional[str] = None,
16
+ min_height: Optional[float] = None,
17
+ max_height: Optional[float] = None,
18
+ min_flow: Optional[float] = None,
19
+ max_flow: Optional[float] = None,
20
+ agency: Optional[str] = None,
21
+ quality: Optional[str] = None,
22
+ unit: Optional[str] = "EN",
23
+ ) -> Data:
24
+ """Returns matching measurement data
25
+
26
+ Parameters
27
+ ----------
28
+ office_id: string, optional, default is None
29
+ Office id mask for filtering measurements.
30
+ location_id_mask: string, optional, default is None
31
+ Location id mask for filtering measurements. Use null to retrieve measurements for all locations.
32
+ min_number_id: sting, optional, default is None
33
+ Minimum measurement number-id for filtering measurements.
34
+ max_number_id: string, optional, default is None
35
+ Maximum measurement number-id for filtering measurements.
36
+ begin: datetime, optional, default is None
37
+ Start of the time window for data to be included in the response. If this field is
38
+ not specified, then begin time will be unbounded. Any timezone information should be
39
+ passed within the datetime object. If no timezone information is given, default will be UTC.
40
+ end: datetime, optional, default is None
41
+ End of the time window for data to be included in the response. If this field is
42
+ not specified, then begin time will be unbounded. Any timezone information should
43
+ be passed within the datetime object. If no timezone information is given, default will be UTC.
44
+ timezone: string, optional, default is None
45
+ This field specifies a default timezone to be used if the format of the begin and end
46
+ parameters do not include offset or time zone information. Defaults to UTC
47
+ min_height: float, optional, default is None
48
+ Minimum height for filtering measurements.
49
+ max_height: float, optional, default is None
50
+ Maximum flow for filtering measurements.
51
+ min_flow: float, optional, default is None
52
+ Minimum flow for filtering measurements.
53
+ max_flow: float, optional, default is None
54
+ Maximum flow for filtering measurements.
55
+ agency: string, optional, default is None
56
+ Agency for filtering measurements
57
+ quality: string, optional, default is None
58
+ Quality for filtering measurements
59
+ unit_systems: string, optional, default is EN
60
+ Specifies the unit system of the response. Valid values for the unit field are:
61
+ 1. EN. English unit system.
62
+ 2. SI. SI unit system.
63
+ Returns
64
+ -------
65
+ cwms data type. data.json will return the JSON output and data.df will return a dataframe. Dates returned are all in UTC.
66
+ """
67
+
68
+ # creates the dataframe from the timeseries data
69
+ endpoint = "measurements"
70
+ if begin and not isinstance(begin, datetime):
71
+ raise ValueError("begin needs to be in datetime")
72
+ if end and not isinstance(end, datetime):
73
+ raise ValueError("end needs to be in datetime")
74
+
75
+ params = {
76
+ "office-mask": office_id,
77
+ "id-mask": location_id_mask,
78
+ "min-number": min_number_id,
79
+ "max-number": max_number_id,
80
+ "begin": begin.isoformat() if begin else None,
81
+ "end": end.isoformat() if end else None,
82
+ "timezone": timezone,
83
+ "min-height": min_height,
84
+ "max-height": max_height,
85
+ "min-flow": min_flow,
86
+ "max-flow": max_flow,
87
+ "agency": agency,
88
+ "quality": quality,
89
+ "unit-system": unit,
90
+ }
91
+
92
+ response = api.get(endpoint, params, api_version=1)
93
+ return Data(response) # , selector=selector)
94
+
95
+
96
+ def store_measurements(
97
+ data: JSON,
98
+ fail_if_exists: Optional[bool] = True,
99
+ ) -> None:
100
+ """Will Create new measurement(s)
101
+
102
+ Parameters
103
+ ----------
104
+ data: JSON dictionary
105
+ measurement data to be stored.
106
+ fail_if_exists: bool, optional, default is True
107
+ Create will fail if provided Measurement(s) already exist.
108
+
109
+ Returns
110
+ -------
111
+ response
112
+ """
113
+
114
+ endpoint = "measurements"
115
+ params = {
116
+ "fail-if-exists": fail_if_exists,
117
+ }
118
+
119
+ if not isinstance(data, dict):
120
+ raise ValueError("Cannot store a timeseries without a JSON data dictionary")
121
+
122
+ return api.post(endpoint, data, params, api_version=1)
123
+
124
+
125
+ def delete_measurements(
126
+ location_id: str,
127
+ office_id: str,
128
+ begin: datetime,
129
+ end: datetime,
130
+ timezone: Optional[str] = None,
131
+ min_number_id: Optional[str] = None,
132
+ max_number_id: Optional[str] = None,
133
+ ) -> None:
134
+ """Delete an existing measurement
135
+
136
+ Parameters
137
+ ----------
138
+ office_id: string
139
+ Specifies the office of the measurements to delete
140
+ location_id: string
141
+ Specifies the location-id of the measurement(s) to be deleted.
142
+ begin: datetime
143
+ Start of the time window to delete. Any timezone information should be
144
+ passed within the datetime object. If no timezone information is given, default will be UTC.
145
+ end: datetime
146
+ End of the time window to delete. Any timezone information should
147
+ be passed within the datetime object. If no timezone information is given, default will be UTC.
148
+ timezone: string, optional, default is None
149
+ This field specifies a default timezone to be used if the format of the begin and end
150
+ parameters do not include offset or time zone information. Defaults to UTC
151
+ min_number_id: sting, optional, default is None
152
+ Minimum measurement number-id of the measurement to be deleted.
153
+ max_number_id: string, optional, default is None
154
+ Maximum measurement number-id of the measurement to be deleted.
155
+
156
+ Returns
157
+ -------
158
+ None
159
+ """
160
+
161
+ if location_id is None:
162
+ raise ValueError("Deleting measurements requires a location id")
163
+ if office_id is None:
164
+ raise ValueError("Deleting measurements requires an office")
165
+
166
+ endpoint = f"measurements/{location_id}"
167
+
168
+ params = {
169
+ "office": office_id,
170
+ "begin": begin.isoformat() if begin else None,
171
+ "end": end.isoformat() if end else None,
172
+ "timezone": timezone,
173
+ "min-number": min_number_id,
174
+ "max-number": max_number_id,
175
+ }
176
+
177
+ return api.delete(endpoint, params, api_version=1)
@@ -15,7 +15,7 @@ def rating_current_effective_date(rating_id: str, office_id: str) -> Any:
15
15
  """Retrieve the most recent effective date for a specific rating id.
16
16
 
17
17
  Returns
18
- datatime
18
+ Any
19
19
  the datetime of the most recent effective date for a rating id. If max effective date is
20
20
  not present for rating_id then None will be returned
21
21
 
@@ -46,7 +46,7 @@ def get_current_rating(
46
46
  The owning office of the rating specifications. If no office is provided information from all offices will
47
47
  be returned
48
48
  rating_table_in_df: Bool, Optional Default = True
49
- define if the independant and dependant variables should be stored as a dataframe
49
+ define if the independent and dependant variables should be stored as a dataframe
50
50
  Returns
51
51
  -------
52
52
  Data : Data
@@ -112,7 +112,7 @@ def get_ratings_xml(
112
112
  timezone: Optional[str] = None,
113
113
  method: Optional[str] = "EAGER",
114
114
  ) -> Any:
115
- """Retrives ratings for a specific rating-id
115
+ """Retrieves ratings for a specific rating-id
116
116
 
117
117
  Parameters
118
118
  ----------
@@ -124,7 +124,7 @@ def get_ratings_xml(
124
124
  begin: datetime, optional
125
125
  the start of the time window for data to be included in the response. This is based on the effective date of the ratings
126
126
  end: datetime, optional
127
- the end of the time window for data to be included int he reponse. This is based on the effective date of the ratings
127
+ the end of the time window for data to be included int he response. This is based on the effective date of the ratings
128
128
  timezone:
129
129
  the time zone of the values in the being and end fields if not specified UTC is used
130
130
  method:
@@ -225,13 +225,13 @@ def rating_simple_df_to_json(
225
225
  active: Optional[bool] = True,
226
226
  ) -> JSON:
227
227
  """This function converts a dataframe to a json dictionary in the correct format to be posted using the store_ratings function. Can
228
- only be used for simple ratings with a indenpendant and 1 dependant variable.
228
+ only be used for simple ratings with a independent and 1 dependant variable.
229
229
 
230
230
  Parameters
231
231
  ----------
232
232
  data: pd.Dataframe
233
233
  Rating Table to be stored to an exiting rating specification and template. Can only have 2 columns ind and dep. ind
234
- contained the indenpendant variable and dep contains the dependent variable.
234
+ contained the independent variable and dep contains the dependent variable.
235
235
  ind dep
236
236
  0 9.62 0.01
237
237
  1 9.63 0.01
@@ -249,7 +249,7 @@ def rating_simple_df_to_json(
249
249
  office_id: str
250
250
  the owning office of the rating
251
251
  units: str
252
- units for both the independant and dependent variable seperated by ; i.e. ft;cfs or ft;ft.
252
+ units for both the independent and dependent variable separated by ; i.e. ft;cfs or ft;ft.
253
253
  effective_date: datetime,
254
254
  The effective date of the rating curve to be stored.
255
255
  transition_start_date: datetime Optional = None
@@ -384,7 +384,7 @@ def delete_ratings(
384
384
 
385
385
 
386
386
  def store_rating(data: Any, store_template: Optional[bool] = True) -> None:
387
- """Will create a new ratingset including template/spec and rating
387
+ """Will create a new rating-set including template/spec and rating
388
388
 
389
389
  Parameters
390
390
  ----------
@@ -403,7 +403,7 @@ def store_rating(data: Any, store_template: Optional[bool] = True) -> None:
403
403
 
404
404
  if not isinstance(data, dict) and xml_heading not in data:
405
405
  raise ValueError(
406
- "Cannot store a timeseries without a JSON data dictionaryor in XML"
406
+ "Cannot store a timeseries without a JSON data dictionary or in XML"
407
407
  )
408
408
 
409
409
  if xml_heading in data:
@@ -8,7 +8,7 @@ from cwms.cwms_types import JSON, Data
8
8
 
9
9
 
10
10
  def get_rating_spec(rating_id: str, office_id: str) -> Data:
11
- """Retrives a single rating spec
11
+ """Retrieves a single rating spec
12
12
 
13
13
  Parameters
14
14
  ----------
@@ -37,7 +37,7 @@ def get_rating_specs(
37
37
  rating_id_mask: Optional[str] = None,
38
38
  page_size: int = 500000,
39
39
  ) -> Data:
40
- """Retrives a list of rating specification
40
+ """Retrieves a list of rating specification
41
41
 
42
42
  Parameters
43
43
  ----------
@@ -45,7 +45,7 @@ def get_rating_specs(
45
45
  The owning office of the rating specifications. If no office is provided information from all offices will
46
46
  be returned
47
47
  rating-id-mask: string, optional
48
- Posix regular expression that specifies the rating ids to be included in the reponce. If not specified all
48
+ Posix regular expression that specifies the rating ids to be included in the response. If not specified all
49
49
  rating specs shall be returned.
50
50
  page-size: int, optional, default is 5000000: Specifies the number of records to obtain in
51
51
  a single call.
@@ -111,7 +111,7 @@ def rating_spec_df_to_xml(data: pd.DataFrame) -> str:
111
111
  Parameters
112
112
  ----------
113
113
  data : pd_dataframe
114
- pandas dataframe that contrains rating specification paramters
114
+ pandas dataframe that contains rating specification parameters
115
115
  should follow same formate the is returned from get_rating_spec function
116
116
  Returns
117
117
  -------
@@ -134,10 +134,10 @@ def rating_spec_df_to_xml(data: pd.DataFrame) -> str:
134
134
  <auto-migrate-extension>{str(data.loc[0,'auto-migrate-extension']).lower()}</auto-migrate-extension>
135
135
  <ind-rounding-specs>"""
136
136
 
137
- ind_rouding = data.loc[0, "independent-rounding-specs"]
138
- if isinstance(ind_rouding, list):
137
+ ind_rounding = data.loc[0, "independent-rounding-specs"]
138
+ if isinstance(ind_rounding, list):
139
139
  i = 1
140
- for rounding in ind_rouding:
140
+ for rounding in ind_rounding:
141
141
  spec_xml = (
142
142
  spec_xml
143
143
  + f"""\n <ind-rounding-spec position="{i}">{rounding['value']}</ind-rounding-spec>"""
@@ -1,4 +1,4 @@
1
- import threading
1
+ import concurrent.futures
2
2
  from datetime import datetime
3
3
  from typing import Any, Dict, Optional
4
4
 
@@ -16,13 +16,14 @@ def get_multi_timeseries_df(
16
16
  begin: Optional[datetime] = None,
17
17
  end: Optional[datetime] = None,
18
18
  melted: Optional[bool] = False,
19
+ max_workers: Optional[int] = 30,
19
20
  ) -> DataFrame:
20
21
  """gets multiple timeseries and stores into a single dataframe
21
22
 
22
23
  Parameters
23
24
  ----------
24
- ts_ids: linst
25
- a list of timeseries to get. If the timeseries is a verioned timeseries then serpeate the ts_id from the
25
+ ts_ids: list
26
+ a list of timeseries to get. If the timeseries is a versioned timeseries then separate the ts_id from the
26
27
  version_date using a :. Example "OMA.Stage.Inst.6Hours.0.Fcst-MRBWM-GRFT:2024-04-22 07:00:00-05:00". Make
27
28
  sure that the version date include the timezone offset if not in UTC.
28
29
  office_id: string
@@ -46,6 +47,9 @@ def get_multi_timeseries_df(
46
47
  melted: Boolean, optional, default is false
47
48
  if set to True a melted dataframe will be provided. By default a multi-index column dataframe will be
48
49
  returned.
50
+ max_workers: Int, Optional, default is None
51
+ It is a number of Threads aka size of pool in concurrent.futures.ThreadPoolExecutor. From 3.8 onwards
52
+ default value is min(32, os.cpu_count() + 4). Out of these 5 threads are preserved for I/O bound task.
49
53
 
50
54
 
51
55
  Returns
@@ -53,60 +57,47 @@ def get_multi_timeseries_df(
53
57
  dataframe
54
58
  """
55
59
 
56
- def get_ts_ids(
57
- result_dict: list[Dict[str, Any]],
58
- ts_id: str,
59
- office_id: str,
60
- begin: datetime,
61
- end: datetime,
62
- unit: str,
63
- version_date: datetime,
64
- ) -> None:
65
- data = get_timeseries(
66
- ts_id=ts_id,
67
- office_id=office_id,
68
- unit=unit,
69
- begin=begin,
70
- end=end,
71
- version_date=version_date,
72
- )
73
- result_dict.append(
74
- {
60
+ def get_ts_ids(ts_id: str) -> Any:
61
+ try:
62
+ if ":" in ts_id:
63
+ ts_id, version_date = ts_id.split(":", 1)
64
+ version_date_dt = pd.to_datetime(version_date)
65
+ else:
66
+ version_date_dt = None
67
+ data = get_timeseries(
68
+ ts_id=ts_id,
69
+ office_id=office_id,
70
+ unit=unit,
71
+ begin=begin,
72
+ end=end,
73
+ version_date=version_date_dt,
74
+ )
75
+ result_dict = {
75
76
  "ts_id": ts_id,
76
77
  "unit": data.json["units"],
77
- "version_date": version_date,
78
+ "version_date": version_date_dt,
78
79
  "values": data.df,
79
80
  }
80
- )
81
-
82
- result_dict = [] # type: list[Dict[str,Any]]
83
- threads = []
84
- for ts_id in ts_ids:
85
- if ":" in ts_id:
86
- ts_id, version_date = ts_id.split(":", 1)
87
- version_date_dt = pd.to_datetime(version_date)
88
- else:
89
- version_date_dt = None
90
- t = threading.Thread(
91
- target=get_ts_ids,
92
- args=(result_dict, ts_id, office_id, begin, end, unit, version_date_dt),
93
- )
94
- threads.append(t)
95
- t.start()
81
+ return result_dict
82
+ except Exception as e:
83
+ print(f"Error processing {ts_id}: {e}")
84
+ return None
96
85
 
97
- for t in threads:
98
- t.join()
86
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
87
+ results = executor.map(get_ts_ids, ts_ids)
99
88
 
89
+ result_dict = list(results)
100
90
  data = pd.DataFrame()
101
91
  for row in result_dict:
102
- temp_df = row["values"]
103
- temp_df = temp_df.assign(ts_id=row["ts_id"], units=row["unit"])
104
- if "version_date" in row.keys():
105
- temp_df = temp_df.assign(version_date=row["version_date"])
106
- temp_df.dropna(how="all", axis=1, inplace=True)
107
- data = pd.concat([data, temp_df], ignore_index=True)
108
-
109
- if not melted:
92
+ if row:
93
+ temp_df = row["values"]
94
+ temp_df = temp_df.assign(ts_id=row["ts_id"], units=row["unit"])
95
+ if "version_date" in row.keys():
96
+ temp_df = temp_df.assign(version_date=row["version_date"])
97
+ temp_df.dropna(how="all", axis=1, inplace=True)
98
+ data = pd.concat([data, temp_df], ignore_index=True)
99
+
100
+ if not melted and "date-time" in data.columns:
110
101
  cols = ["ts_id", "units"]
111
102
  if "version_date" in data.columns:
112
103
  cols.append("version_date")
@@ -129,7 +120,7 @@ def get_timeseries(
129
120
  datum: Optional[str] = None,
130
121
  begin: Optional[datetime] = None,
131
122
  end: Optional[datetime] = None,
132
- page_size: Optional[int] = 500000,
123
+ page_size: Optional[int] = 300000,
133
124
  version_date: Optional[datetime] = None,
134
125
  trim: Optional[bool] = True,
135
126
  ) -> Data:
@@ -163,7 +154,7 @@ def get_timeseries(
163
154
  not specified, any required time window ends at the current time. Any timezone
164
155
  information should be passed within the datetime object. If no timezone information
165
156
  is given, default will be UTC.
166
- page_size: int, optional, default is 5000000: Sepcifies the number of records to obtain in
157
+ page_size: int, optional, default is 300000: Specifies the number of records to obtain in
167
158
  a single call.
168
159
  version_date: datetime, optional, default is None
169
160
  Version date of time series values being requested. If this field is not specified and
@@ -208,7 +199,7 @@ def timeseries_df_to_json(
208
199
  office_id: str,
209
200
  version_date: Optional[datetime] = None,
210
201
  ) -> JSON:
211
- """This function converts a dataframe to a json dictionary in the correct format to be posted using the store_timeseries fucntion.
202
+ """This function converts a dataframe to a json dictionary in the correct format to be posted using the store_timeseries function.
212
203
 
213
204
  Parameters
214
205
  ----------
@@ -223,7 +214,7 @@ def timeseries_df_to_json(
223
214
  2 2023-12-20T15:15:00.000-05:00 98.5 0
224
215
  3 2023-12-20T15:30:00.000-05:00 98.5 0
225
216
  ts_id: str
226
- timeseried id:specified name of the timeseries to be posted to
217
+ timeseries id:specified name of the timeseries to be posted to
227
218
  office_id: str
228
219
  the owning office of the time series
229
220
  units: str
@@ -242,7 +233,7 @@ def timeseries_df_to_json(
242
233
  df["quality-code"] = 0
243
234
  if "date-time" not in df:
244
235
  raise TypeError(
245
- "date-time is a required column in data when posting as a dateframe"
236
+ "date-time is a required column in data when posting as a dataframe"
246
237
  )
247
238
  if "value" not in df:
248
239
  raise TypeError(
@@ -268,6 +259,54 @@ def timeseries_df_to_json(
268
259
  return ts_dict
269
260
 
270
261
 
262
+ def store_multi_timeseries_df(
263
+ ts_data: pd.DataFrame, office_id: str, max_workers: Optional[int] = 30
264
+ ) -> None:
265
+
266
+ def store_ts_ids(
267
+ data: pd.DataFrame,
268
+ ts_id: str,
269
+ office_id: str,
270
+ version_date: Optional[datetime] = None,
271
+ ) -> None:
272
+ units = data["units"].iloc[0]
273
+ data_json = timeseries_df_to_json(
274
+ data=data,
275
+ ts_id=ts_id,
276
+ units=units,
277
+ office_id=office_id,
278
+ version_date=version_date,
279
+ )
280
+ store_timeseries(data=data_json)
281
+ return None
282
+
283
+ unique_tsids = (
284
+ ts_data["ts_id"].astype(str) + ":" + ts_data["version_date"].astype(str)
285
+ ).unique()
286
+
287
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
288
+ for ts_id_all in unique_tsids:
289
+ try:
290
+ ts_id, version_date = ts_id_all.split(":", 1)
291
+ if version_date != "NaT":
292
+ version_date_dt = pd.to_datetime(version_date)
293
+ data = ts_data[
294
+ (ts_data["ts_id"] == ts_id)
295
+ & (ts_data["version_date"] == version_date_dt)
296
+ ]
297
+ else:
298
+ version_date_dt = None
299
+ data = ts_data[
300
+ (ts_data["ts_id"] == ts_id) & ts_data["version_date"].isna()
301
+ ]
302
+ if not data.empty:
303
+ executor.submit(
304
+ store_ts_ids, data, ts_id, office_id, version_date_dt
305
+ )
306
+ except Exception as e:
307
+ print(f"Error processing {ts_id}: {e}")
308
+
309
+
271
310
  def store_timeseries(
272
311
  data: JSON,
273
312
  create_as_ltrs: Optional[bool] = False,
@@ -280,7 +319,7 @@ def store_timeseries(
280
319
  ----------
281
320
  data: JSON dictionary
282
321
  Time Series data to be stored.
283
- create_as_ltrs: bool, optional, defualt is False
322
+ create_as_ltrs: bool, optional, default is False
284
323
  Flag indicating if timeseries should be created as Local Regular Time Series.
285
324
  store_rule: str, optional, default is None:
286
325
  The business rule to use when merging the incoming with existing data. Available values :
@@ -12,9 +12,9 @@ from cwms.cwms_types import JSON, Data
12
12
  def get_timeseries_group(
13
13
  group_id: str,
14
14
  category_id: str,
15
- office_id: str,
16
- group_office_id: str,
17
15
  category_office_id: str,
16
+ office_id: Optional[str] = None,
17
+ group_office_id: Optional[str] = None,
18
18
  ) -> Data:
19
19
  """Retreives time series stored in the requested time series group
20
20
 
File without changes
@@ -0,0 +1,10 @@
1
+ import base64
2
+
3
+
4
+ def is_base64(s: str) -> bool:
5
+ """Check if a string is Base64 encoded."""
6
+ try:
7
+ decoded = base64.b64decode(s, validate=True)
8
+ return base64.b64encode(decoded).decode("utf-8") == s
9
+ except (ValueError, TypeError):
10
+ return False
@@ -1,12 +1,14 @@
1
1
  [tool.poetry]
2
2
  name = "cwms-python"
3
3
  repository = "https://github.com/HydrologicEngineeringCenter/cwms-python"
4
- version = "0.6.0"
4
+
5
+ version = "0.7.0"
6
+
5
7
 
6
8
  packages = [
7
9
  { include = "cwms" },
8
10
  ]
9
- description = "Corps water managerment systems (CWMS) REST API for Data Retrieval of USACE water data"
11
+ description = "Corps water management systems (CWMS) REST API for Data Retrieval of USACE water data"
10
12
  readme = "README.md"
11
13
  license = "LICENSE"
12
14
  keywords = ["USACE", "water data", "CWMS"]
@@ -1,130 +0,0 @@
1
- import re
2
- from typing import Dict, List
3
-
4
- import pandas as pd
5
-
6
- import cwms
7
-
8
-
9
- def import_critfile_to_ts_group(
10
- file_path: str,
11
- office_id: str,
12
- group_id: str = "SHEF Data Acquisition",
13
- category_id: str = "Data Acquisition",
14
- group_office_id: str = "CWMS",
15
- category_office_id: str = "CWMS",
16
- replace_assigned_ts: bool = False,
17
- ) -> None:
18
- """
19
- Processes a .crit file and saves the information to the SHEF Data Acquisition time series group.
20
-
21
- Parameters
22
- ----------
23
- file_path : str
24
- Path to the .crit file.
25
- office_id : str
26
- The ID of the office associated with the specified timeseries.
27
- group_id : str, optional
28
- The specified group associated with the timeseries data. Defaults to "SHEF Data Acquisition".
29
- category_id : str, optional
30
- The category ID that contains the timeseries group. Defaults to "Data Acquisition".
31
- group_office_id : str, optional
32
- The specified office group associated with the timeseries data. Defaults to "CWMS".
33
- replace_assigned_ts : bool, optional
34
- Specifies whether to unassign all existing time series before assigning new time series specified in the content body. Default is False.
35
-
36
- Returns
37
- -------
38
- None
39
- """
40
-
41
- def parse_crit_file(file_path: str) -> List[Dict[str, str]]:
42
- """
43
- Parses a .crit file into a dictionary containing timeseries ID and Alias.
44
-
45
- Parameters
46
- ----------
47
- file_path : str
48
- Path to the .crit file.
49
-
50
- Returns
51
- -------
52
- List[Dict[str, str]]
53
- A list of dictionaries with "Alias" and "Timeseries ID" as keys.
54
- """
55
- parsed_data = []
56
- with open(file_path, "r") as file:
57
- for line in file:
58
- # Ignore comment lines and empty lines
59
- if line.startswith("#") or not line.strip():
60
- continue
61
-
62
- # Extract alias, timeseries ID, and TZ
63
- match = re.match(r"([^=]+)=([^;]+);(.+)", line.strip())
64
-
65
- if match:
66
- alias = match.group(1).strip()
67
- timeseries_id = match.group(2).strip()
68
- alias2 = match.group(3).strip()
69
-
70
- parsed_data.append(
71
- {
72
- "Alias": alias + ":" + alias2,
73
- "Timeseries ID": timeseries_id,
74
- }
75
- )
76
-
77
- return parsed_data
78
-
79
- def append_df(
80
- df: pd.DataFrame, office_id: str, ts_id: str, alias: str
81
- ) -> pd.DataFrame:
82
- """
83
- Appends a row to the DataFrame.
84
-
85
- Parameters
86
- ----------
87
- df : pandas.DataFrame
88
- The DataFrame to append to.
89
- office_id : str
90
- The ID of the office associated with the specified timeseries.
91
- tsId : str
92
- The timeseries ID from the file.
93
- alias : str
94
- The alias from the file.
95
- Returns
96
- -------
97
- pandas.DataFrame
98
- The updated DataFrame.
99
- """
100
- data = {
101
- "office-id": [office_id],
102
- "timeseries-id": [ts_id],
103
- "alias-id": [alias],
104
- }
105
- df = pd.concat([df, pd.DataFrame(data)])
106
- return df
107
-
108
- # Parse the file and get the parsed data
109
- parsed_data = parse_crit_file(file_path)
110
-
111
- df = pd.DataFrame()
112
- for data in parsed_data:
113
- # Create DataFrame for the current row
114
- df = append_df(df, office_id, data["Timeseries ID"], data["Alias"])
115
-
116
- # Generate JSON dictionary
117
- json_dict = cwms.timeseries_group_df_to_json(
118
- data=df,
119
- group_id=group_id,
120
- group_office_id=group_office_id,
121
- category_office_id=category_office_id,
122
- category_id=category_id,
123
- )
124
-
125
- cwms.update_timeseries_groups(
126
- group_id=group_id,
127
- office_id=office_id,
128
- replace_assigned_ts=replace_assigned_ts,
129
- data=json_dict,
130
- )
File without changes
File without changes