hspf 2.0.3__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hspf/hbn.py CHANGED
@@ -103,10 +103,22 @@ def get_simulated_perlnd_constituent(hbn,constituent,time_step):
103
103
 
104
104
  return df
105
105
 
106
- def get_simulated_catchment_constituent(hbn,constituent,time_step):
107
- return pd.concat([get_simulated_perlnd_constituent(hbn,constituent,time_step),
108
- get_simulated_implnd_constituent(hbn,constituent,time_step)])
109
-
106
+ def get_catchment_constituent(hbn,constituent,catchment_ids = None,time_step = 5):
107
+ if constituent == 'Q':
108
+ units = 'in/acre'
109
+ else:
110
+ units = 'lb/acre'
111
+
112
+ perlnds = hbn.get_perlnd_constituent(constituent).reset_index().melt(id_vars = ['index'],var_name = 'OPNID')
113
+ perlnds['OPERATION'] = 'PERLND'
114
+ implnds = hbn.get_implnd_constituent(constituent).reset_index().melt(id_vars = ['index'],var_name = 'OPNID')
115
+ implnds['OPERATION'] = 'IMPLND'
116
+
117
+ df = pd.concat([perlnds,implnds],axis=0)
118
+ df['unit'] = units
119
+ df.rename(columns = {'index':'datetime','value':constituent},inplace = True)
120
+ return df
121
+
110
122
 
111
123
  def get_simulated_flow(hbn,time_step,reach_ids,unit = None):
112
124
 
@@ -176,6 +188,13 @@ class hbnInterface:
176
188
  def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1):
177
189
  return pd.concat([hbn.get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
178
190
 
191
+ def get_perlnd_constituent(self,constituent,perlnd_ids = None,time_step = 5):
192
+ return get_simulated_perlnd_constituent(self,constituent,time_step)
193
+
194
+ def get_implnd_constituent(self,constituent,implnd_ids = None,time_step = 5):
195
+ return get_simulated_implnd_constituent(self,constituent,time_step)
196
+
197
+
179
198
  def get_reach_constituent(self,constituent,reach_ids,time_step,unit = None):
180
199
  if constituent == 'Q':
181
200
  df = get_simulated_flow(self,time_step,reach_ids,unit = unit)
@@ -209,48 +228,17 @@ class hbnInterface:
209
228
 
210
229
  return df
211
230
 
212
-
231
+
213
232
  def get_rchres_data(self,constituent,reach_ids,units = 'mg/l',t_code = 'daily'):
214
233
  '''
215
234
  Convience function for accessing the hbn time series associated with our current
216
235
  calibration method. Assumes you are summing across all dataframes.
217
-
218
- Parameters
219
- ----------
220
- hbn : TYPE
221
- DESCRIPTION.
222
- nutrient_id : TYPE
223
- DESCRIPTION.
224
- reach_ids : TYPE
225
- DESCRIPTION.
226
- flux : TYPE, optional
227
- DESCRIPTION. The default is None.
228
-
229
- Returns
230
- -------
231
- df : TYPE
232
- DESCRIPTION.
233
-
234
- '''
235
-
236
-
236
+ '''
237
237
 
238
- t_cons = helpers.get_tcons(constituent,'RCHRES',units)
239
-
240
-
241
-
242
- df = pd.concat([self.get_multiple_timeseries(t_opn = 'RCHRES',
243
- t_code =t_code,
244
- t_con = t_con,
245
- opnids = reach_ids)
246
- for t_con in t_cons],axis = 1).sum(1).to_frame()
247
-
248
- if (constituent == 'Q') & (units == 'cfs'):
249
- df = df/CF2CFS[t_code]*43560 #Acrfeet/invl to cubic feet/s
250
-
238
+ df = pd.concat([self.get_reach_constituent(constituent,[reach_id],t_code,units) for reach_id in reach_ids], axis = 1)
239
+ df.columns = reach_ids
251
240
  df.attrs['unit'] = units
252
241
  df.attrs['constituent'] = constituent
253
- df.attrs['reach_ids'] = reach_ids
254
242
  return df
255
243
 
256
244
 
hspf/hbn2.py ADDED
@@ -0,0 +1,316 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Wed Mar 30 15:33:52 2022
4
+ Utility functions for accessing data from the hbn files as they relate to the
5
+ nutrients relevant for our current calibration methods. (See calibration_helpers.py)
6
+
7
+ @author: mfratki
8
+
9
+ MODIFIED TO USE CYTHON FOR SPEED
10
+ """
11
+ from . import helpers
12
+ import pandas as pd
13
+ import math
14
+ from struct import unpack
15
+ from pandas import DataFrame
16
+ from datetime import datetime, timedelta
17
+ from collections import defaultdict
18
+
19
+ # Attempt to import the compiled Cython module
20
+ try:
21
+ from hspf import hbn_cy
22
+ except ImportError:
23
+ print("="*80)
24
+ print("WARNING: Could not import compiled 'hbn_cy' module.")
25
+ print("Falling back to slow, pure-Python implementation.")
26
+ print("To fix this, compile the Cython extension by running:")
27
+ print("python setup.py build_ext --inplace --compiler=mingw32")
28
+ print("="*80)
29
+ hbn_cy = None
30
+
31
+ # ... (The rest of your helper functions like get_simulated_flow, etc. remain here) ...
32
+ CF2CFS = {'hourly':3600,
33
+ 'daily':86400,
34
+ 'monthly':2592000,
35
+ 'yearly':31536000,
36
+ 'h':3600,
37
+ 'D':86400,
38
+ 'ME':2592000,
39
+ 'Y':31536000,
40
+ 'YE':31536000,
41
+ 2:3600,
42
+ 3:86400,
43
+ 4:2592000,
44
+ 5:31536000}
45
+
46
+ AGG_DEFAULTS = {'cfs':'mean',
47
+ 'mg/l':'mean',
48
+ 'degF': 'mean',
49
+ 'lb':'sum'}
50
+
51
+ UNIT_DEFAULTS = {'Q': 'cfs',
52
+ 'TSS': 'mg/l',
53
+ 'TP' : 'mg/l',
54
+ 'OP' : 'mg/l',
55
+ 'TKN': 'mg/l',
56
+ 'N' : 'mg/l',
57
+ 'WT' : 'degF',
58
+ 'WL' : 'ft'}
59
+
60
+ def get_simulated_implnd_constituent(hbn,constituent,time_step):
61
+ t_cons = helpers.get_tcons(constituent,'IMPLND')
62
+ df = sum([hbn.get_multiple_timeseries(t_opn='IMPLND',
63
+ t_con= t_con,
64
+ t_code = time_step) for t_con in t_cons])
65
+ if constituent == 'TSS':
66
+ df = df*2000
67
+ return df
68
+
69
+
70
+ def get_simulated_perlnd_constituent(hbn,constituent,time_step):
71
+ t_cons = helpers.get_tcons(constituent,'PERLND')
72
+ df = sum([hbn.get_multiple_timeseries(t_opn='PERLND',
73
+ t_con= t_con,
74
+ t_code = time_step) for t_con in t_cons])
75
+ if constituent == 'TSS':
76
+ df = df*2000
77
+ return df
78
+
79
+ def get_simulated_catchment_constituent(hbn,constituent,time_step):
80
+ return pd.concat([get_simulated_perlnd_constituent(hbn,constituent,time_step),
81
+ get_simulated_implnd_constituent(hbn,constituent,time_step)])
82
+
83
+ def get_simulated_flow(hbn,time_step,reach_ids,unit = None):
84
+ if unit is None:
85
+ unit = 'cfs'
86
+ assert unit in ['cfs','acrft']
87
+ sign = [math.copysign(1,reach_id) for reach_id in reach_ids]
88
+ reach_ids = [abs(reach_id) for reach_id in reach_ids]
89
+ flows = hbn.get_multiple_timeseries('RCHRES',time_step,'ROVOL',reach_ids)
90
+ flows = (flows*sign).sum(axis=1)
91
+ if unit == 'cfs':
92
+ flows = flows/CF2CFS[time_step]*43560
93
+ flows.attrs['unit'] = unit
94
+ return flows
95
+
96
+ def get_simulated_temperature(hbn,units,time_step,reach_ids):
97
+ raise NotImplementedError()
98
+
99
+ def get_simulated_reach_constituent(hbn,constituent,time_step,reach_ids,unit = None):
100
+ sign = [math.copysign(1,reach_id) for reach_id in reach_ids]
101
+ if unit is None:
102
+ unit = UNIT_DEFAULTS[constituent]
103
+ else:
104
+ assert(unit in ['mg/l','lb','cfs','degF'])
105
+ t_cons = helpers.get_tcons(constituent,'RCHRES','lb')
106
+ df = pd.concat([hbn.get_multiple_timeseries('RCHRES',time_step,t_con,[abs(reach_id) for reach_id in reach_ids])*sign for t_con in t_cons],axis=1).sum(axis=1)
107
+ if constituent == 'TSS':
108
+ df = df*2000
109
+ if unit == 'mg/l':
110
+ flow = get_simulated_flow(hbn,time_step,reach_ids,'acrft')*1233481.8375475
111
+ df = df*453592.37 / flow
112
+ df.attrs['unit'] = unit
113
+ df.attrs['constituent'] = constituent
114
+ df.attrs['reach_ids'] = reach_ids
115
+ return df
116
+
117
+ class hbnInterface:
118
+ def __init__(self,file_paths,Map = True):
119
+ self.names = [file_path for file_path in file_paths]
120
+ self.hbns = [hbnClass(file_path,Map) for file_path in file_paths]
121
+ # ... (rest of hbnInterface is unchanged) ...
122
+ def _clear_cache(self):
123
+ [hbn._clear_cache() for hbn in self.hbns]
124
+
125
+ def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
126
+ return pd.concat([hbn.get_time_series(t_opn, t_cons, t_code, opnid, activity) for hbn in self.hbns],axis = 1)
127
+
128
+ def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1):
129
+ return pd.concat([hbn.get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
130
+
131
+ def get_reach_constituent(self,constituent,reach_ids,time_step,unit = None):
132
+ if constituent == 'Q':
133
+ df = get_simulated_flow(self,time_step,reach_ids,unit = unit)
134
+ elif constituent == 'WT':
135
+ df = get_simulated_temperature(self,time_step,reach_ids)
136
+ else:
137
+ df = get_simulated_reach_constituent(self,constituent,time_step,reach_ids,unit)
138
+ return df.to_frame()
139
+
140
+ def output_names(self):
141
+ dd = defaultdict(set)
142
+ dics = [hbn.output_names() for hbn in self.hbns]
143
+ for dic in dics:
144
+ for key, vals in dic.items():
145
+ [dd[key].add(val) for val in vals]
146
+ return dd
147
+
148
+ class hbnClass:
149
+ def __init__(self,file_name,Map = True):
150
+ self.file_name = file_name
151
+ self.tcodes = {'minutely':1,'hourly':2,'daily':3,'monthly':4,'yearly':5,
152
+ 1:'minutely',2:'hourly',3:'daily',4:'monthly',5:'yearly',
153
+ 'min':1,'h':2,'D':3,'M':4,'Y':5,'H':2,'ME':4,'YE':5}
154
+ self.pandas_tcodes = {1:'min',2:'h',3:'D',4:'ME',5:'YE'}
155
+ self._clear_cache()
156
+ if Map:
157
+ self.map_hbn()
158
+
159
+ def map_hbn(self):
160
+ """
161
+ Maps the HBN file contents using the fast Cython implementation if available,
162
+ otherwise falls back to the pure Python implementation.
163
+ """
164
+ self._clear_cache()
165
+ if hbn_cy:
166
+ # Use the fast Cython implementation
167
+ self.mapn, self.mapd, self.data = hbn_cy.map_hbn_file(self.file_name)
168
+ else:
169
+ # Fallback to slow Python implementation (from your original file)
170
+ with open(self.file_name, 'rb') as f:
171
+ self.data = f.read()
172
+
173
+ if not self.data or self.data[0] != 0xFD:
174
+ print('BAD HBN FILE - must start with magic number 0xFD')
175
+ return
176
+
177
+ data_view = memoryview(self.data)
178
+ mapn = defaultdict(list)
179
+ mapd = defaultdict(list)
180
+ index = 1
181
+ while index < len(data_view):
182
+ if index + 28 > len(data_view): break
183
+ rc1, rc2, rc3, rc, rectype, operation_bytes, id, activity_bytes = unpack('4BI8sI8s', data_view[index:index + 28])
184
+ reclen = (rc * 4194304) + (rc3 * 16384) + (rc2 * 64) + (rc1 >> 2) - 24
185
+ operation = operation_bytes.decode('ascii', 'ignore').strip()
186
+ activity = activity_bytes.decode('ascii', 'ignore').strip()
187
+
188
+ if rectype == 1:
189
+ if index + 36 > len(data_view): break
190
+ tcode = unpack('I', data_view[index + 32: index + 36])[0]
191
+ mapd[operation, id, activity, tcode].append((index, reclen))
192
+ elif rectype == 0:
193
+ i = index + 28
194
+ slen = 0
195
+ while slen < reclen:
196
+ if i + slen + 4 > len(data_view): break
197
+ ln = unpack('I', data_view[i + slen: i + slen + 4])[0]
198
+ if i + slen + 4 + ln > len(data_view): break
199
+ n = unpack(f'{ln}s', data_view[i + slen + 4: i + slen + 4 + ln])[0].decode('ascii', 'ignore').strip()
200
+ mapn[operation, id, activity].append(n.replace('-', ''))
201
+ slen += 4 + ln
202
+
203
+ if reclen < 36: index += reclen + 29
204
+ else: index += reclen + 30
205
+
206
+ self.mapn = dict(mapn)
207
+ self.mapd = dict(mapd)
208
+
209
+ def read_data(self,operation,id,activity,tcode):
210
+ dfname = f'{operation}_{activity}_{id:03d}_{tcode}'
211
+ if dfname in self.data_frames:
212
+ return self.data_frames[dfname]
213
+
214
+ names = self.mapn.get((operation, id, activity))
215
+ entries = self.mapd.get((operation, id, activity, tcode))
216
+ if not names or not entries:
217
+ return None
218
+ nvals = len(names)
219
+
220
+ if hbn_cy:
221
+ # Use the fast Cython implementation
222
+ times, rows = hbn_cy.read_data_entries(self.data, entries, nvals)
223
+ else:
224
+ # Fallback to slow Python implementation
225
+ rows_list, times_list = [], []
226
+ data_view = memoryview(self.data)
227
+ for (index, reclen) in entries:
228
+ if index + 56 + (4 * nvals) > len(data_view): continue
229
+ yr, mo, dy, hr, mn = unpack('5I', data_view[index + 36: index + 56])
230
+ try:
231
+ dt = datetime(yr, mo, dy, 0, mn) + timedelta(hours=hr-1)
232
+ except ValueError:
233
+ continue # Skip bad date entries
234
+ times_list.append(dt)
235
+ row = unpack(f'{nvals}f', data_view[index + 56: index + 56 + (4 * nvals)])
236
+ rows_list.append(row)
237
+ times, rows = times_list, rows_list
238
+
239
+ if not times: return None
240
+ df = DataFrame(rows, index=times, columns=names).sort_index()
241
+
242
+ if not df.empty:
243
+ self.summaryindx.append(dfname)
244
+ self.summary.append((operation, activity, str(id), self.tcodes[tcode], str(df.shape), df.index[0], df.index[-1]))
245
+ self.output_dictionary[dfname] = names
246
+ resampled_df = df.resample(self.pandas_tcodes[tcode]).mean()
247
+ self.data_frames[dfname] = resampled_df
248
+ return resampled_df
249
+ else:
250
+ return None
251
+
252
+ def _clear_cache(self):
253
+ self.data_frames = {}
254
+ self.summary = []
255
+ self.summarycols = ['Operation', 'Activity', 'segment', 'Frequency', 'Shape', 'Start', 'Stop']
256
+ self.summaryindx = []
257
+ self.output_dictionary = {}
258
+
259
+ # ... (rest of your hbnClass methods like infer_opnids, get_time_series, etc. are unchanged) ...
260
+ def infer_opnids(self,t_opn, t_cons,activity):
261
+ result = [k[-2] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn) & (k[-1] == activity)]
262
+ if len(result) == 0:
263
+ print('No Constituent-OPNID relationship found')
264
+ return None
265
+ return result
266
+
267
+ def infer_activity(self,t_opn, t_cons):
268
+ result = [k[-1] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn)]
269
+ if len(result) == 0:
270
+ print('No Constituent-Activity relationship found')
271
+ return None
272
+ assert(len(set(result)) == 1)
273
+ return result[0]
274
+
275
+ def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
276
+ if isinstance(t_code,str):
277
+ t_code = self.tcodes[t_code]
278
+ if activity is None:
279
+ activity = self.infer_activity(t_opn,t_cons)
280
+ if activity is None:
281
+ return None
282
+ df = self.read_data(t_opn,opnid,activity,t_code)
283
+ if df is not None and t_cons in df.columns:
284
+ series = df[t_cons].copy()
285
+ series = series[series.index >= '1996-01-01']
286
+ return series
287
+ else:
288
+ return None
289
+
290
+ def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None):
291
+ if isinstance(t_code,str):
292
+ t_code = self.tcodes[t_code]
293
+ if activity is None:
294
+ activity = self.infer_activity(t_opn,t_con)
295
+ if activity is None:
296
+ return None
297
+ if opnids is None:
298
+ opnids = self.infer_opnids(t_opn,t_con,activity)
299
+ if opnids is None:
300
+ return None
301
+ frames = []
302
+ for opnid in opnids:
303
+ series = self.get_time_series(t_opn,t_con,t_code,opnid,activity)
304
+ if series is not None:
305
+ frames.append(series.rename(opnid))
306
+ if len(frames) > 0:
307
+ return pd.concat(frames,axis=1)
308
+ return None
309
+
310
+ def output_names(self):
311
+ activities = set([k[-1] for k,v in self.mapn.items()])
312
+ dic = {}
313
+ for activity in activities:
314
+ t_cons = [v for k,v in self.mapn.items() if k[-1] == activity]
315
+ dic[activity] = set([item for sublist in t_cons for item in sublist])
316
+ return dic