hspf 2.1.1__py3-none-any.whl → 2.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
@@ -0,0 +1 @@
1
+ WDM WDM1 C:\Program Files (x86)\HSPEXP+\WinHSPFLt\hspfmsg.wdm
hspf/build_warehouse.py CHANGED
@@ -281,12 +281,49 @@ for key,ts_names in outputs.items():
281
281
  dfs.append(df)
282
282
  output_df = pd.concat(dfs).reset_index(drop=True)
283
283
 
284
+ dfs = []
285
+ for key,data in hbn.hbns[0].data_frames.items():
286
+ keys = key.split('_')
287
+ operation = keys[0]
288
+ activity = keys[1]
289
+ opnid = int(keys[2])
290
+ t_code = keys[3]
291
+ data.reset_index(inplace=True)
292
+ data.rename(columns={'index': 'datetime'}, inplace=True)
293
+ data = data.melt(id_vars = ['datetime'],var_name = 'ts_name', value_name = 'value')
294
+ data['operation'] = operation
295
+ data['activity'] = activity
296
+ data['opnid'] = opnid
297
+ data['t_code'] = t_code
298
+ data['model_name'] = model_name
299
+ dfs.append(data)
300
+ output_df = pd.concat(dfs).reset_index(drop=True)
301
+
302
+
303
+
304
+ # Write to Parquet with DuckDB, including "t_code" as a partition
305
+ output_path = "model_outputs"
306
+
307
+ con = duckdb.connect(database=':memory:') # Temporary in-memory database
308
+ con.execute(f"""
309
+ COPY output_df
310
+ TO '{output_path}'
311
+ (FORMAT 'parquet', PARTITION_BY ('model_name','operation', 'opnid'))
312
+ """)
313
+
314
+ print(f"Data written to {output_path}")
284
315
 
285
- ts_name = 'PERO'
286
- op_type = 'PERLND'
287
- t_code = 4
288
316
 
289
317
 
318
+ ['PERO',
319
+ 'SURO',
320
+ 'IFWO',
321
+ 'AGWO']
322
+
323
+ for constituent in ['Q','TSS','TP','N','OP','BOD','TKN']:
324
+ t_cons = helpers.get_tcons(constituent,'RCHRES','lb')
325
+ df = hbn.get_rechres_data(constituent, units='lb', freq='daily').reset_index()
326
+
290
327
 
291
328
  pero = hbn.get_multiple_timeseries(op_type,t_code,ts_name).reset_index().rename(columns={'index': 'datetime'})
292
329
  pero = pero.melt(id_vars = ['datetime'],var_name = 'operation_id', value_name = 'value')
@@ -295,15 +332,58 @@ pero['t_code'] = t_code
295
332
  pero['model_name'] = model_name
296
333
 
297
334
 
335
+ db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
298
336
  with duckdb.connect(db_path) as con:
299
337
  warehouse.insert_model_run(con, model_name, run_id)
300
338
 
339
+ db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
340
+ with duckdb.connect(db_path) as conn:
341
+ conn.execute("CREATE SCHEMA if not exists reports")
342
+ conn.execute("CREATE TABLE if not exists reports.catchment_loading AS SELECT * FROM df")
343
+ conn.close()
344
+
301
345
 
346
+ # Average annual loading by catchment
347
+ db_path = 'c:/Users/mfratki/Documents/ucis.duckdb'
348
+ with duckdb.connect(db_path) as conn:
349
+ query = f"""
350
+ SELECT
351
+ model_name,
352
+ operation AS operation_type,
353
+ opnid AS operation_id,
354
+ t_code,
355
+ ts_name AS constituent,
356
+ AVG(value) * 365.25 AS annual_loading
357
+ FROM reports.catchment_loading
358
+ WHERE t_code = 'PERLND' AND constituent IN ('Q','TP','TSS','N','OP','BOD','TKN')
359
+ GROUP BY model_name, TVOLNO, constituent
360
+ """
361
+ annual_loadings = conn.execute(query).fetchdf()
362
+ conn.close()
363
+
364
+ hbn.hbns[0].data_frames.keys()
365
+
366
+
367
+
368
+ import duckdb
369
+ import pandas as pd
302
370
 
303
371
 
372
+ # Convert to DataFrame
373
+ df = pd.DataFrame(data)
374
+ df['datetime'] = pd.to_datetime(df['datetime']) # Ensure datetime column is formatted properly
304
375
 
376
+ # Write to Parquet with DuckDB, including "t_code" as a partition
377
+ output_path = "model_outputs"
305
378
 
379
+ con = duckdb.connect(database=':memory:') # Temporary in-memory database
380
+ con.execute(f"""
381
+ COPY df
382
+ TO '{output_path}'
383
+ (FORMAT 'parquet', PARTITION_BY ('operation_type', 'operation_id', 't_code'))
384
+ """)
306
385
 
386
+ print(f"Data written to {output_path}")
307
387
 
308
388
 
309
389
 
hspf/hbn.py CHANGED
@@ -6,7 +6,7 @@ nutrients relevant for our current calibration methods. (See calibration_helpers
6
6
 
7
7
  @author: mfratki
8
8
  """
9
- from . import helpers
9
+ from hspf import helpers
10
10
  import pandas as pd
11
11
  import math
12
12
  from struct import unpack
@@ -14,6 +14,7 @@ from numpy import fromfile
14
14
  from pandas import DataFrame
15
15
  from datetime import datetime, timedelta #, timezone
16
16
  from collections import defaultdict
17
+ from collections.abc import MutableMapping
17
18
  #from pathlib import Path
18
19
 
19
20
 
@@ -140,8 +141,15 @@ def get_simulated_flow(hbn,time_step,reach_ids,unit = None):
140
141
  flows.attrs['unit'] = unit
141
142
  return flows
142
143
 
143
- def get_simulated_temperature(hbn,units,time_step,reach_ids):
144
- raise NotImplementedError()
144
+ def get_simulated_temperature(hbn,time_step,reach_ids):
145
+ assert len(reach_ids) == 1, "Temperature can only be retreived for one reach at a time."
146
+
147
+
148
+ wt = hbn.get_multiple_timeseries('RCHRES',time_step,'TW', reach_ids)
149
+ wt = wt.sum(axis=1)
150
+ wt.attrs['unit'] = 'degf'
151
+
152
+ return wt
145
153
 
146
154
 
147
155
  def get_simulated_reach_constituent(hbn,constituent,time_step,reach_ids,unit = None):
@@ -152,11 +160,11 @@ def get_simulated_reach_constituent(hbn,constituent,time_step,reach_ids,unit = N
152
160
  if unit is None:
153
161
  unit = UNIT_DEFAULTS[constituent]
154
162
  else:
155
- assert(unit in ['mg/l','lb','cfs','degF'])
163
+ assert(unit in ['mg/l','lb'])
156
164
 
157
165
  t_cons = helpers.get_tcons(constituent,'RCHRES','lb')
158
166
 
159
- # Correct instances when a flow needs to be subtracted (rare)
167
+ # Correct instances when a reach output needs to be subtracted (rare)
160
168
  df = pd.concat([hbn.get_multiple_timeseries('RCHRES',time_step,t_con,[abs(reach_id) for reach_id in reach_ids])*sign for t_con in t_cons],axis=1).sum(axis=1)
161
169
 
162
170
  if constituent == 'TSS':
@@ -182,11 +190,30 @@ class hbnInterface:
182
190
  def _clear_cache(self):
183
191
  [hbn._clear_cache() for hbn in self.hbns]
184
192
 
193
+
194
+
185
195
  def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
186
- return pd.concat([hbn.get_time_series(t_opn, t_cons, t_code, opnid, activity) for hbn in self.hbns],axis = 1)
196
+ df = pd.concat([hbn._get_time_series(t_opn, t_cons, t_code, opnid, activity) for hbn in self.hbns],axis = 1)
197
+ if df.empty:
198
+ raise ValueError(f"No data found for {t_opn} {t_cons} {t_code} {opnid} {activity}")
199
+
200
+ if long_format:
201
+ df = df.reset_index().melt(id_vars = ['index'],var_name = 'OPNID',value_name = t_con)
202
+ df.rename(columns = {'index':'datetime'},inplace = True)
203
+ df['OPERATION'] = t_opn
204
+ return df
187
205
 
188
- def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1):
189
- return pd.concat([hbn.get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
206
+ def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None,axis = 1,long_format = False):
207
+ df = pd.concat([hbn._get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity) for hbn in self.hbns],axis = 1)
208
+ if df.empty:
209
+ raise ValueError(f"No data found for {t_opn} {t_con} {t_code} {opnids} {activity}")
210
+
211
+ if long_format:
212
+ df = df.reset_index().melt(id_vars = ['index'],var_name = 'OPNID',value_name = 'value')
213
+ df.rename(columns = {'index':'datetime'},inplace = True)
214
+ df['TIMESERIES'] = t_con
215
+ df['OPERATION'] = t_opn
216
+ return df
190
217
 
191
218
  def get_perlnd_constituent(self,constituent,perlnd_ids = None,time_step = 5):
192
219
  return get_simulated_perlnd_constituent(self,constituent,time_step)
@@ -210,13 +237,33 @@ class hbnInterface:
210
237
  # for dic in dics:
211
238
  # for key, vals in dic.items():
212
239
  # [dd[key].append(val) for val in vals]
213
- dd = defaultdict(set)
240
+ # dd = defaultdict(set)
214
241
  dics = [hbn.output_names() for hbn in self.hbns]
242
+ return merge_dicts(dics)
243
+ # for dic in dics:
244
+ # for operation, vals in dic.items():
245
+ # for activity,v in vals.items():
246
+ # [dd[operation][activity].add(t) for t in v]
247
+ # return dd
248
+
249
+ def _timeseries(self):
250
+ mapn = self._mapn()
251
+ timeseries = []
252
+ for key, vals in mapn.items():
253
+ _key = list(key)
254
+ for val in vals:
255
+ timeseries.append(_key + [val])
256
+ return timeseries
257
+
258
+
259
+ def _mapn(self):
260
+ dd = defaultdict(set)
261
+ dics = [hbn.mapn for hbn in self.hbns]
215
262
  for dic in dics:
216
263
  for key, vals in dic.items():
217
264
  [dd[key].add(val) for val in vals]
218
- return dd
219
-
265
+ return dd
266
+
220
267
  def get_perlnd_data(self,constituent,t_code = 'yearly'):
221
268
  t_cons = helpers.get_tcons(constituent,'PERLND')
222
269
 
@@ -229,14 +276,13 @@ class hbnInterface:
229
276
  return df
230
277
 
231
278
 
232
- def get_rchres_data(self,constituent,reach_ids,units = 'mg/l',t_code = 'daily'):
279
+ def get_rchres_output(self,constituent,units = 'mg/l',t_code = 5):
233
280
  '''
234
281
  Convience function for accessing the hbn time series associated with our current
235
282
  calibration method. Assumes you are summing across all dataframes.
236
283
  '''
237
-
238
- df = pd.concat([self.get_reach_constituent(constituent,[reach_id],t_code,units) for reach_id in reach_ids], axis = 1)
239
- df.columns = reach_ids
284
+ t_cons = helpers.get_tcons(constituent,'RCHRES',units)
285
+ df = sum([self.get_multiple_timeseries('RCHRES',t_code,t_con) for t_con in t_cons])
240
286
  df.attrs['unit'] = units
241
287
  df.attrs['constituent'] = constituent
242
288
  return df
@@ -392,19 +438,27 @@ class hbnClass:
392
438
  def infer_opnids(self,t_opn, t_cons,activity):
393
439
  result = [k[-2] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn) & (k[-1] == activity)]
394
440
  if len(result) == 0:
395
- return print('No Constituent-OPNID relationship found')
441
+ result = [-1]
442
+ # return print('No Constituent-OPNID relationship found')
396
443
  return result
397
444
 
398
445
 
399
446
  def infer_activity(self,t_opn, t_cons):
400
447
  result = [k[-1] for k,v in self.mapn.items() if (t_cons in v) & (k[0] == t_opn)]
401
448
  if len(result) == 0:
402
- return print('No Constituent-Activity relationship found')
403
- assert(len(set(result)) == 1)
404
- return result[0]
405
-
449
+ result = ''
450
+ else:# return print('No Constituent-Activity relationship found')
451
+ assert(len(set(result)) == 1)
452
+ result = result[0]
453
+ return result
406
454
 
407
455
  def get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
456
+ df = self._get_time_series(t_opn, t_cons, t_code, opnid, activity)
457
+ if df.empty:
458
+ raise ValueError(f"No data found for {t_opn} {t_cons} {t_code} {opnid} {activity}")
459
+ return df
460
+
461
+ def _get_time_series(self, t_opn, t_cons, t_code, opnid, activity = None):
408
462
  """
409
463
  get a single time series based on:
410
464
  1. t_opn: RCHRES, IMPLND, PERLND
@@ -413,13 +467,15 @@ class hbnClass:
413
467
  4. t_activity: HYDR, IQUAL, etc
414
468
  5. time_unit: yearly, monthly, full (default is 'full' simulation duration)
415
469
  """
470
+
471
+
416
472
  if isinstance(t_code,str):
417
473
  t_code = self.tcodes[t_code]
418
474
 
419
475
  if activity is None:
420
476
  activity = self.infer_activity(t_opn,t_cons)
421
- if activity is None:
422
- return None
477
+
478
+
423
479
  summaryindx = f'{t_opn}_{activity}_{opnid:03d}_{t_code}'
424
480
  if summaryindx in self.summaryindx:
425
481
  df = self.data_frames[summaryindx][t_cons].copy()
@@ -431,25 +487,31 @@ class hbnClass:
431
487
  #df.index = df.index.shift(-1,TCODES2FREQ[t_code])
432
488
  df = df[df.index >= '1996-01-01']
433
489
  else:
434
- df = None
490
+ df = pd.DataFrame()
435
491
 
436
492
  return df
493
+
437
494
  def get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None):
495
+ df = self._get_multiple_timeseries(t_opn,t_code,t_con,opnids,activity)
496
+ if df.empty:
497
+ raise ValueError(f"No data found for {t_opn} {t_con} {t_code} {opnids} {activity}")
498
+ return df
499
+
500
+ def _get_multiple_timeseries(self,t_opn,t_code,t_con,opnids = None,activity = None):
438
501
  # a single constituent but multiple opnids
502
+
503
+
439
504
  if isinstance(t_code,str):
440
505
  t_code = self.tcodes[t_code]
441
506
 
442
507
  if activity is None:
443
- activity = self.infer_activity(t_opn,t_con)
444
- if activity is None:
445
- return None
446
-
508
+ activity = self.infer_activity(t_opn,t_con)
509
+
447
510
  if opnids is None:
448
511
  opnids = self.infer_opnids(t_opn,t_con,activity)
449
- if opnids is None:
450
- return None
512
+
451
513
 
452
- df = None
514
+ df = pd.DataFrame()
453
515
  frames = []
454
516
  mapd_list = list(self.mapd.keys())
455
517
  for opnid in opnids:
@@ -468,9 +530,76 @@ class hbnClass:
468
530
  dic[activity] = set([item for sublist in t_cons for item in sublist])
469
531
  return dic
470
532
 
533
+
534
+ def output_names(self):
535
+
536
+ activities = []
537
+ operations = []
538
+ for k, v in self.mapn.items():
539
+ operations.append(k[0])
540
+ activities.append(k[-1])
541
+
542
+ operations = set(operations)
543
+ activities = set(activities)
544
+ #activities = set([k[-1] for k,v in self.mapn.items()])
545
+
546
+ dic = {}
547
+ for operation in operations:
548
+ acitivities = set([k[-1] for k,v in self.mapn.items() if k[0] == operation])
549
+ dic[operation] = {}
550
+ for activity in acitivities:
551
+ t_cons = [v for k,v in self.mapn.items() if (k[0] == operation) & (k[-1] == activity)]
552
+ dic[operation][activity] = set([item for sublist in t_cons for item in sublist])
553
+ # for activity in activities:
554
+ # t_cons = [v for k,v in self.mapn.items() if k[-1] == activity]
555
+ # dic[activity] = set([item for sublist in t_cons for item in sublist])
556
+ return dic
557
+
558
+ def get_timeseries(self):
559
+ mapn = self.mapn
560
+ timeseries = []
561
+ for key, vals in mapn.items():
562
+ _key = list(key)
563
+ for val in vals:
564
+ timeseries.append(_key + [val])
565
+ return timeseries
566
+
471
567
  @staticmethod
472
568
  def get_perlands(summary_indxs):
473
569
  perlands = [int(summary_indx.split('_')[-2]) for summary_indx in summary_indxs]
474
570
  return perlands
475
571
 
476
-
572
+
573
+ def merge_dicts(dicts):
574
+ """
575
+ Merge a list of dictionaries into a single dictionary, combining sets
576
+ at the leaf level and properly merging nested dictionaries.
577
+
578
+ Args:
579
+ dicts (list): A list of dictionaries to merge.
580
+
581
+ Returns:
582
+ dict: The merged dictionary.
583
+ """
584
+ def recursive_merge(d1, d2):
585
+ for key, value in d2.items():
586
+ if key in d1:
587
+ # If the value is a dictionary, recurse
588
+ if isinstance(d1[key], MutableMapping) and isinstance(value, MutableMapping):
589
+ recursive_merge(d1[key], value)
590
+ # If the value is a set, merge the sets
591
+ elif isinstance(d1[key], set) and isinstance(value, set):
592
+ d1[key].update(value)
593
+ else:
594
+ raise ValueError(f"Incompatible types for key '{key}': {type(d1[key])} vs {type(value)}")
595
+ else:
596
+ # If the key does not exist in d1, copy it
597
+ d1[key] = value
598
+
599
+ # Start with an empty dictionary
600
+ merged_dict = {}
601
+
602
+ for d in dicts:
603
+ recursive_merge(merged_dict, d)
604
+
605
+ return merged_dict
hspf/helpers.py CHANGED
@@ -48,9 +48,10 @@ def get_tcons(nutrient_name,operation,units = 'mg/l'):
48
48
  'N' :['NO3OUTTOT','NO2OUTTOT'], # N
49
49
  'OP' :['PO4OUTDIS'], # Ortho
50
50
  'TP' :['PTOTOUT'],
51
- 'BOD' :['BODOUTTOT']},
51
+ 'BOD' :['BODOUTTOT'],},
52
52
  'cfs': {'Q': ['ROVOL']},
53
- 'acrft' : {'Q': ['ROVOL']}}
53
+ 'acrft' : {'Q': ['ROVOL']},
54
+ 'degf' : {'WT': ['TW']}}
54
55
 
55
56
  t_cons = MAP[units]
56
57
  elif operation == 'PERLND':
hspf/hspfModel.py CHANGED
@@ -7,17 +7,19 @@ Created on Thu Oct 13 09:26:05 2022
7
7
  from pathlib import Path
8
8
  import os.path
9
9
  import subprocess
10
+ import concurrent.futures
10
11
 
11
- from .uci import UCI
12
- from . import hbn
13
- from .reports import Reports
14
- from .wdm import wdmInterface
15
- from . import wdmReader
12
+ from hspf.uci import UCI
13
+ from hspf import hbn
14
+ from hspf.reports import Reports
15
+ from hspf.wdm import wdmInterface
16
+ from hspf import wdmReader
16
17
 
17
18
 
18
19
 
19
20
 
20
21
 
22
+ winHSPF = str(Path(__file__).resolve().parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
21
23
 
22
24
 
23
25
  # Only for accessing information regarding a specific uci_file
@@ -51,6 +53,15 @@ class hspfModel():
51
53
  # Compositions
52
54
  self.reports = Reports(self.uci,self.hbns,self.wdms)
53
55
 
56
+ def _reinitialize(self,uci_file:str,run_model:bool = False):
57
+ self.uci = UCI(uci_file)
58
+ self.validate_uci(run_model = run_model)
59
+ self.hbns = hbn.hbnInterface(self.hbn_paths)
60
+ try:
61
+ self.wdms = wdmInterface(self.wdm_paths)
62
+ except:
63
+ self.wdms = None
64
+ self.reports = Reports(self.uci,self.hbns,self.wdms)
54
65
 
55
66
  def validate_wdms(self):
56
67
  # Ensure wdm files exist and the folders for the other file types exist relative
@@ -92,15 +103,16 @@ class hspfModel():
92
103
  else:
93
104
  self.run_model()
94
105
 
95
- def run_model(self,new_uci_file = None):
106
+ def run_model(self,new_uci_file = None,):
96
107
 
97
108
  if new_uci_file is None:
98
109
  new_uci_file = self.uci_file
99
110
 
100
111
  # new_uci_file = self.model_path.joinpath(uci_name)
101
112
  # self.uci.write(new_uci_file)
102
- subprocess.run([self.winHSPF,self.uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
103
- self.load_uci(new_uci_file,run_model = False)
113
+
114
+ subprocess.run([winHSPF,self.uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
115
+ self._reinitialize(new_uci_file,run_model = False)
104
116
 
105
117
  def load_hbn(self,hbn_name):
106
118
  self.hbns[hbn_name] = hbn.hbnClass(self.uci_file.parent.joinpath(hbn_name).as_posix())
@@ -177,8 +189,35 @@ class hspfModel():
177
189
 
178
190
 
179
191
 
192
+ def run_uci(uci_file:str, ):
193
+ """
194
+ convenience function to run a single model uci file.
195
+ """
196
+ print(f"Starting model: {uci_file}")
197
+ subprocess.run([winHSPF, uci_file])
198
+ print(f"Completed model: {uci_file}")
180
199
 
181
200
 
201
+ def run_batch_files(file_list, max_concurrent=4):
202
+ """
203
+ Takes a list of .uci file paths and runs them N at a time.
204
+ """
205
+ # Create a pool of workers (threads)
206
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_concurrent) as executor:
207
+ # Submit all jobs to the pool
208
+ future_to_file = {
209
+ executor.submit(run_uci, uci_file): uci_file
210
+ for uci_file in file_list
211
+ }
212
+
213
+ # Monitor completion (optional, but good for error catching)
214
+ for future in concurrent.futures.as_completed(future_to_file):
215
+ uci_file = future_to_file[future]
216
+ try:
217
+ future.result() # This will raise exceptions if run_uci failed
218
+ except Exception as exc:
219
+ print(f"File {uci_file} generated an exception: {exc}")
220
+
182
221
 
183
222
  # class runManager():
184
223
  # def __init__()
hspf/parser/graph.py CHANGED
@@ -635,7 +635,8 @@ class reachNetwork():
635
635
  areas = areas.groupby(['source_type','source_type_id','source_name'])['area'].sum()[['PERLND','IMPLND']]
636
636
 
637
637
  if group:
638
- areas = pd.concat([areas[operation].groupby('source_name').sum() for operation in ['PERLND','IMPLND']])
638
+ areas = areas.groupby(['source_type','source_name']).sum()
639
+ #areas = pd.concat([areas[operation].groupby('source_name').sum() for operation in ['PERLND','IMPLND']])
639
640
  #areas = pd.concat([areas[operation].groupby(self.uci.opnid_dict[operation].loc[areas[operation].index,'LSID'].values).sum() for operation in ['PERLND','IMPLND']])
640
641
  return areas
641
642