discontinuum 1.0.0__py3-none-any.whl → 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- discontinuum/_version.py +2 -2
- {discontinuum-1.0.0.dist-info → discontinuum-1.0.2.dist-info}/METADATA +1 -1
- {discontinuum-1.0.0.dist-info → discontinuum-1.0.2.dist-info}/RECORD +7 -7
- rating_gp/providers/usgs.py +96 -12
- {discontinuum-1.0.0.dist-info → discontinuum-1.0.2.dist-info}/WHEEL +0 -0
- {discontinuum-1.0.0.dist-info → discontinuum-1.0.2.dist-info}/licenses/LICENSE.md +0 -0
- {discontinuum-1.0.0.dist-info → discontinuum-1.0.2.dist-info}/top_level.txt +0 -0
discontinuum/_version.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
discontinuum/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
2
|
-
discontinuum/_version.py,sha256=
|
2
|
+
discontinuum/_version.py,sha256=98Mtoja-WpqDUnppClywkGhX6oxX6l-ZHnSYMYYbBUE,511
|
3
3
|
discontinuum/data_manager.py,sha256=LiZoPR0nnu7YAUfh5L1ZDRfaS3dgfVIELXIHkzUKyBg,4416
|
4
4
|
discontinuum/pipeline.py,sha256=1avuZnFai-b3HmihcpZ8M3WFNQ8lXAFSNTrnfl2NrY0,10074
|
5
5
|
discontinuum/plot.py,sha256=eZQS6-Ydq8FFcEukPtNuDVB-weV6lHyWMyJ1hqTkVrU,2969
|
@@ -11,7 +11,7 @@ discontinuum/engines/pymc.py,sha256=phbtE-3UCSVcP1MhbXwAHIWDZWDr56wK9U7aRt-w-2o,
|
|
11
11
|
discontinuum/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
12
12
|
discontinuum/providers/base.py,sha256=Yn2EHS1b4fYl09-m2MYuf2P9VRUXAP-WDpSoZrCbRvY,720
|
13
13
|
discontinuum/tests/test_pipeline.py,sha256=_FhkGxbFIxNb35lGaIdZk7Zjgs6CkxEF3gFUX3PE8EU,918
|
14
|
-
discontinuum-1.0.
|
14
|
+
discontinuum-1.0.2.dist-info/licenses/LICENSE.md,sha256=XElVHHnS2uQ15M_Z2giPH1vmeWMzdpGQ48ItkuZurVA,1650
|
15
15
|
loadest_gp/__init__.py,sha256=YISfvbc7Zy2y0BOxS1A2KzqxyoNJTz0EnLMnRW6iVT8,740
|
16
16
|
loadest_gp/plot.py,sha256=x2PK7vBCc44dX9lu5YV-rvw1u4pvXSLdcrTSvYLiHMA,2595
|
17
17
|
loadest_gp/utils.py,sha256=m5QaqR_0JiuRXPfryH8nI5lODp8PqvQla5C05WDN3LY,2772
|
@@ -28,8 +28,8 @@ rating_gp/models/base.py,sha256=e2Kq644I88YLHWPNA0qyRgitF5wimdLW4618vKX-o_s,1474
|
|
28
28
|
rating_gp/models/gpytorch.py,sha256=rSxuTMoSeGK2LwqANMDMrmxDsOCXyAWSmENm6KFjRZ0,5930
|
29
29
|
rating_gp/models/kernels.py,sha256=3xg2mhY3aEgjI3r5vyAll9MA4c3M5UKqRi3FApNhJJQ,11579
|
30
30
|
rating_gp/providers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
rating_gp/providers/usgs.py,sha256=
|
32
|
-
discontinuum-1.0.
|
33
|
-
discontinuum-1.0.
|
34
|
-
discontinuum-1.0.
|
35
|
-
discontinuum-1.0.
|
31
|
+
rating_gp/providers/usgs.py,sha256=KmKYN3c8Mi-ly2l6X80WT3taEhqCPXeEcRNi9HvbJmY,8134
|
32
|
+
discontinuum-1.0.2.dist-info/METADATA,sha256=0oCct8VOrQib5sCzdi2cBNI8KEcpYTYTKj1oCL3DCsc,6231
|
33
|
+
discontinuum-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
34
|
+
discontinuum-1.0.2.dist-info/top_level.txt,sha256=mwU_PSFrZYSJrBgqIuTJTo7Pp9ODDv6XdDed7kAagXM,34
|
35
|
+
discontinuum-1.0.2.dist-info/RECORD,,
|
rating_gp/providers/usgs.py
CHANGED
@@ -21,6 +21,16 @@ if TYPE_CHECKING:
|
|
21
21
|
FT_TO_M = 0.3048
|
22
22
|
FT3_TO_M3 = 0.0283168
|
23
23
|
|
24
|
+
# Quantitative values of "measured_rating_diff"
|
25
|
+
USGS_QUALITY_CODES = {
|
26
|
+
'Excellent': '0.02',
|
27
|
+
'Good': '0.05',
|
28
|
+
'Fair': '0.08',
|
29
|
+
'Poor': '0.12',
|
30
|
+
'Unspecified': '0.12',
|
31
|
+
}
|
32
|
+
|
33
|
+
|
24
34
|
@dataclass
|
25
35
|
class NWISColumn:
|
26
36
|
column_name: str
|
@@ -182,6 +192,53 @@ def get_measurements(
|
|
182
192
|
)
|
183
193
|
df = nwis._read_rdb(response.text)
|
184
194
|
|
195
|
+
return read_measurements_df(df)
|
196
|
+
|
197
|
+
|
198
|
+
def read_measurements_df(df: pd.DataFrame) -> xr.Dataset:
|
199
|
+
"""Read a DataFrame of USGS discharge measurements and convert to xarray Dataset.
|
200
|
+
|
201
|
+
Parameters
|
202
|
+
----------
|
203
|
+
df : pd.DataFrame
|
204
|
+
Dataframe from `dataretrieval.nwis.get_discharge_measurements()`
|
205
|
+
|
206
|
+
Returns
|
207
|
+
-------
|
208
|
+
xr.Dataset
|
209
|
+
|
210
|
+
Example
|
211
|
+
-------
|
212
|
+
>>> from dataretrieval import nwis
|
213
|
+
>>> from rating_gp.providers.usgs import read_measurements_df
|
214
|
+
>>> df, _ = nwis.get_discharge_measurements(
|
215
|
+
sites='03339000',
|
216
|
+
start='2020-01-01',
|
217
|
+
end='2020-12-31',
|
218
|
+
format='rdb_expanded',
|
219
|
+
)
|
220
|
+
>>> ds = read_measurements_df(df)
|
221
|
+
"""
|
222
|
+
|
223
|
+
# assert the correct columns are present
|
224
|
+
required_columns = [
|
225
|
+
"measurement_dt",
|
226
|
+
"gage_height_va",
|
227
|
+
"discharge_va",
|
228
|
+
"q_meas_used_fg",
|
229
|
+
"control_type_cd",
|
230
|
+
"measured_rating_diff",
|
231
|
+
"streamflow_method",
|
232
|
+
NWISStage.column_name,
|
233
|
+
NWISDischarge.column_name,
|
234
|
+
]
|
235
|
+
|
236
|
+
missing_columns = set(required_columns) - set(df.columns)
|
237
|
+
if missing_columns:
|
238
|
+
raise ValueError(
|
239
|
+
f"Missing required columns in the DataFrame: {missing_columns}"
|
240
|
+
)
|
241
|
+
|
185
242
|
# covert timezone to UTC? ignore for now
|
186
243
|
df.index = pd.to_datetime(
|
187
244
|
df["measurement_dt"],
|
@@ -195,20 +252,40 @@ def get_measurements(
|
|
195
252
|
NWISDischarge.column_name: NWISDischarge.standard_name,
|
196
253
|
}
|
197
254
|
)
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
255
|
+
|
256
|
+
# Process the control_type_cd column
|
257
|
+
df["control_type_cd"] = (
|
258
|
+
df["control_type_cd"]
|
259
|
+
.fillna("Unspecified")
|
260
|
+
.astype("category")
|
261
|
+
)
|
262
|
+
|
263
|
+
# Filter any measurements that are not used in the rating
|
264
|
+
mask = df["q_meas_used_fg"].str.lower().isin(['yes', 'y'])
|
265
|
+
df = df[mask]
|
266
|
+
|
267
|
+
num_not_used = (~mask).sum()
|
268
|
+
if num_not_used > 0:
|
269
|
+
warnings.warn(
|
270
|
+
f"{num_not_used} measurements were not used in the rating and "
|
271
|
+
"will be dropped from the dataset.",
|
272
|
+
UserWarning,
|
273
|
+
)
|
274
|
+
|
275
|
+
# Replace other values with 'Unspecified'
|
276
|
+
df['measured_rating_diff'] = df['measured_rating_diff'].where(
|
277
|
+
df['measured_rating_diff'].isin(USGS_QUALITY_CODES.keys()),
|
278
|
+
'Unspecified'
|
279
|
+
)
|
280
|
+
|
206
281
|
df['discharge_unc_frac'] = (df['measured_rating_diff']
|
207
|
-
.replace(
|
282
|
+
.replace(USGS_QUALITY_CODES)
|
208
283
|
.astype(float))
|
209
|
-
|
284
|
+
|
285
|
+
# Set indirect measurements as 20% uncertain regardless of quality code
|
210
286
|
df.loc[df['streamflow_method'] == 'QIDIR', 'discharge_unc_frac'] = 0.2
|
211
|
-
|
287
|
+
|
288
|
+
# Convert fractional uncertainty to uncertainty assuming the uncertainty
|
212
289
|
# fraction is a 2 sigma gse interval. (GSE = frac + 1)
|
213
290
|
# (GSE -> exp(sigma_ln(Q)))
|
214
291
|
df['discharge_unc'] = df['discharge_unc_frac'] / 2 + 1
|
@@ -216,7 +293,14 @@ def get_measurements(
|
|
216
293
|
# drop data that is <= 0 as we need all positive data
|
217
294
|
df = df[(df['stage'] > 0) & (df['discharge'] > 0)]
|
218
295
|
|
219
|
-
ds = xr.Dataset.from_dataframe(
|
296
|
+
ds = xr.Dataset.from_dataframe(
|
297
|
+
df[[
|
298
|
+
"stage",
|
299
|
+
"discharge",
|
300
|
+
"discharge_unc",
|
301
|
+
"control_type_cd",
|
302
|
+
]]
|
303
|
+
)
|
220
304
|
|
221
305
|
for param in [NWISStage, NWISDischarge]:
|
222
306
|
ds[param.name] = ds[param.name] * param.conversion
|
File without changes
|
File without changes
|
File without changes
|