discontinuum 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
discontinuum/_version.py CHANGED
@@ -17,5 +17,5 @@ __version__: str
17
17
  __version_tuple__: VERSION_TUPLE
18
18
  version_tuple: VERSION_TUPLE
19
19
 
20
- __version__ = version = '1.0.0'
21
- __version_tuple__ = version_tuple = (1, 0, 0)
20
+ __version__ = version = '1.0.2'
21
+ __version_tuple__ = version_tuple = (1, 0, 2)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: discontinuum
3
- Version: 1.0.0
3
+ Version: 1.0.2
4
4
  Summary: Estimate discontinuous timeseries from continuous covariates.
5
5
  Maintainer-email: Timothy Hodson <thodson@usgs.gov>
6
6
  License: License
@@ -1,5 +1,5 @@
1
1
  discontinuum/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- discontinuum/_version.py,sha256=fo5PXsZuloQZu3LdpIFTUAXvJmY2L9N5sNGe2tvdU98,511
2
+ discontinuum/_version.py,sha256=98Mtoja-WpqDUnppClywkGhX6oxX6l-ZHnSYMYYbBUE,511
3
3
  discontinuum/data_manager.py,sha256=LiZoPR0nnu7YAUfh5L1ZDRfaS3dgfVIELXIHkzUKyBg,4416
4
4
  discontinuum/pipeline.py,sha256=1avuZnFai-b3HmihcpZ8M3WFNQ8lXAFSNTrnfl2NrY0,10074
5
5
  discontinuum/plot.py,sha256=eZQS6-Ydq8FFcEukPtNuDVB-weV6lHyWMyJ1hqTkVrU,2969
@@ -11,7 +11,7 @@ discontinuum/engines/pymc.py,sha256=phbtE-3UCSVcP1MhbXwAHIWDZWDr56wK9U7aRt-w-2o,
11
11
  discontinuum/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  discontinuum/providers/base.py,sha256=Yn2EHS1b4fYl09-m2MYuf2P9VRUXAP-WDpSoZrCbRvY,720
13
13
  discontinuum/tests/test_pipeline.py,sha256=_FhkGxbFIxNb35lGaIdZk7Zjgs6CkxEF3gFUX3PE8EU,918
14
- discontinuum-1.0.0.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
14
+ discontinuum-1.0.2.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
15
15
  loadest_gp/__init__.py,sha256=YISfvbc7Zy2y0BOxS1A2KzqxyoNJTz0EnLMnRW6iVT8,740
16
16
  loadest_gp/plot.py,sha256=x2PK7vBCc44dX9lu5YV-rvw1u4pvXSLdcrTSvYLiHMA,2595
17
17
  loadest_gp/utils.py,sha256=m5QaqR_0JiuRXPfryH8nI5lODp8PqvQla5C05WDN3LY,2772
@@ -28,8 +28,8 @@ rating_gp/models/base.py,sha256=e2Kq644I88YLHWPNA0qyRgitF5wimdLW4618vKX-o_s,1474
28
28
  rating_gp/models/gpytorch.py,sha256=rSxuTMoSeGK2LwqANMDMrmxDsOCXyAWSmENm6KFjRZ0,5930
29
29
  rating_gp/models/kernels.py,sha256=3xg2mhY3aEgjI3r5vyAll9MA4c3M5UKqRi3FApNhJJQ,11579
30
30
  rating_gp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- rating_gp/providers/usgs.py,sha256=I3iilF5jHk3DOejD_qoSyR-nY70GGjRI4Aw8yIMNhmQ,6094
32
- discontinuum-1.0.0.dist-info/METADATA,sha256=sT0RWAmZ683JHKq3363yE43-VpJ0G12_vUlxjg4mcQg,6231
33
- discontinuum-1.0.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- discontinuum-1.0.0.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
35
- discontinuum-1.0.0.dist-info/RECORD,,
31
+ rating_gp/providers/usgs.py,sha256=KmKYN3c8Mi-ly2l6X80WT3taEhqCPXeEcRNi9HvbJmY,8134
32
+ discontinuum-1.0.2.dist-info/METADATA,sha256=0oCct8VOrQib5sCzdi2cBNI8KEcpYTYTKj1oCL3DCsc,6231
33
+ discontinuum-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ discontinuum-1.0.2.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
35
+ discontinuum-1.0.2.dist-info/RECORD,,
@@ -21,6 +21,16 @@ if TYPE_CHECKING:
21
21
  FT_TO_M = 0.3048
22
22
  FT3_TO_M3 = 0.0283168
23
23
 
24
+ # Quantitative values of "measured_rating_diff"
25
+ USGS_QUALITY_CODES = {
26
+ 'Excellent': '0.02',
27
+ 'Good': '0.05',
28
+ 'Fair': '0.08',
29
+ 'Poor': '0.12',
30
+ 'Unspecified': '0.12',
31
+ }
32
+
33
+
24
34
  @dataclass
25
35
  class NWISColumn:
26
36
  column_name: str
@@ -182,6 +192,53 @@ def get_measurements(
182
192
  )
183
193
  df = nwis._read_rdb(response.text)
184
194
 
195
+ return read_measurements_df(df)
196
+
197
+
198
+ def read_measurements_df(df: pd.DataFrame) -> xr.Dataset:
199
+ """Read a DataFrame of USGS discharge measurements and convert to xarray Dataset.
200
+
201
+ Parameters
202
+ ----------
203
+ df : pd.DataFrame
204
+ Dataframe from `dataretrieval.nwis.get_discharge_measurements()`
205
+
206
+ Returns
207
+ -------
208
+ xr.Dataset
209
+
210
+ Example
211
+ -------
212
+ >>> from dataretrieval import nwis
213
+ >>> from rating_gp.providers.usgs import read_measurements_df
214
+ >>> df, _ = nwis.get_discharge_measurements(
215
+ sites='03339000',
216
+ start='2020-01-01',
217
+ end='2020-12-31',
218
+ format='rdb_expanded',
219
+ )
220
+ >>> ds = read_measurements_df(df)
221
+ """
222
+
223
+ # assert the correct columns are present
224
+ required_columns = [
225
+ "measurement_dt",
226
+ "gage_height_va",
227
+ "discharge_va",
228
+ "q_meas_used_fg",
229
+ "control_type_cd",
230
+ "measured_rating_diff",
231
+ "streamflow_method",
232
+ NWISStage.column_name,
233
+ NWISDischarge.column_name,
234
+ ]
235
+
236
+ missing_columns = set(required_columns) - set(df.columns)
237
+ if missing_columns:
238
+ raise ValueError(
239
+ f"Missing required columns in the DataFrame: {missing_columns}"
240
+ )
241
+
185
242
  # covert timezone to UTC? ignore for now
186
243
  df.index = pd.to_datetime(
187
244
  df["measurement_dt"],
@@ -195,20 +252,40 @@ def get_measurements(
195
252
  NWISDischarge.column_name: NWISDischarge.standard_name,
196
253
  }
197
254
  )
198
- # parse uncertainty from measured "measured_rating_diff"
199
- qualitycode_to_uncertainty_fraction = {
200
- 'Excellent': '0.02',
201
- 'Good': '0.05',
202
- 'Fair': '0.08',
203
- 'Poor': '0.12',
204
- 'Unspecified': '0.12',
205
- }
255
+
256
+ # Process the control_type_cd column
257
+ df["control_type_cd"] = (
258
+ df["control_type_cd"]
259
+ .fillna("Unspecified")
260
+ .astype("category")
261
+ )
262
+
263
+ # Filter any measurements that are not used in the rating
264
+ mask = df["q_meas_used_fg"].str.lower().isin(['yes', 'y'])
265
+ df = df[mask]
266
+
267
+ num_not_used = (~mask).sum()
268
+ if num_not_used > 0:
269
+ warnings.warn(
270
+ f"{num_not_used} measurements were not used in the rating and "
271
+ "will be dropped from the dataset.",
272
+ UserWarning,
273
+ )
274
+
275
+ # Replace other values with 'Unspecified'
276
+ df['measured_rating_diff'] = df['measured_rating_diff'].where(
277
+ df['measured_rating_diff'].isin(USGS_QUALITY_CODES.keys()),
278
+ 'Unspecified'
279
+ )
280
+
206
281
  df['discharge_unc_frac'] = (df['measured_rating_diff']
207
- .replace(qualitycode_to_uncertainty_fraction)
282
+ .replace(USGS_QUALITY_CODES)
208
283
  .astype(float))
209
- # set indirect measurements as 20% uncertain regardless of quality code
284
+
285
+ # Set indirect measurements as 20% uncertain regardless of quality code
210
286
  df.loc[df['streamflow_method'] == 'QIDIR', 'discharge_unc_frac'] = 0.2
211
- # convert fractional uncertainty to uncertainty assuming the uncertainty
287
+
288
+ # Convert fractional uncertainty to uncertainty assuming the uncertainty
212
289
  # fraction is a 2 sigma gse interval. (GSE = frac + 1)
213
290
  # (GSE -> exp(sigma_ln(Q)))
214
291
  df['discharge_unc'] = df['discharge_unc_frac'] / 2 + 1
@@ -216,7 +293,14 @@ def get_measurements(
216
293
  # drop data that is <= 0 as we need all positive data
217
294
  df = df[(df['stage'] > 0) & (df['discharge'] > 0)]
218
295
 
219
- ds = xr.Dataset.from_dataframe(df[["stage", "discharge", "discharge_unc"]])
296
+ ds = xr.Dataset.from_dataframe(
297
+ df[[
298
+ "stage",
299
+ "discharge",
300
+ "discharge_unc",
301
+ "control_type_cd",
302
+ ]]
303
+ )
220
304
 
221
305
  for param in [NWISStage, NWISDischarge]:
222
306
  ds[param.name] = ds[param.name] * param.conversion