hspf 2.0.3__py3-none-any.whl → 2.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hspf/uci.py CHANGED
@@ -8,6 +8,7 @@ Created on Mon Jul 11 08:39:57 2022
8
8
 
9
9
  #lines = reader('C:/Users/mfratki/Documents/Projects/LacQuiParle/ucis/LacQuiParle_0.uci')
10
10
  import subprocess
11
+ import sys
11
12
  import numpy as np
12
13
  import pandas as pd
13
14
  from .parser.parsers import Table
@@ -78,7 +79,7 @@ class UCI():
78
79
 
79
80
  def table(self,block,table_name = 'na',table_id = 0,drop_comments = True):
80
81
  # Dynamic parsing of tables when called by user
81
- assert block in ['FILES','PERLND','IMPLND','RCHRES','SCHEMATIC','OPN SEQUENCE','MASS-LINK','EXT SOURCES','NETWORK','GENER','MONTH-DATA','EXT TARGETS','COPY','FTABLES']
82
+ assert block in ['GLOBAL','FILES','PERLND','IMPLND','RCHRES','SCHEMATIC','OPN SEQUENCE','MASS-LINK','EXT SOURCES','NETWORK','GENER','MONTH-DATA','EXT TARGETS','COPY','FTABLES']
82
83
 
83
84
  table = self.uci[(block,table_name,table_id)] #[block][table_name][table_id]
84
85
  #TODO move the format_opnids into the Table class?
@@ -103,7 +104,7 @@ class UCI():
103
104
  self.uci[(block,table_name,table_id)].replace(table)
104
105
 
105
106
  def table_lines(self,block,table_name = 'na',table_id = 0):
106
- return self.uci[(block,table_name,table_id)].lines
107
+ return self.uci[(block,table_name,table_id)].lines.copy()
107
108
 
108
109
  def comments(block,table_name = None,table_id = 0): # comments of a table
109
110
  raise NotImplementedError()
@@ -177,6 +178,43 @@ class UCI():
177
178
  lines += ['END RUN']
178
179
  self.lines = lines
179
180
 
181
+ def set_simulation_period(self,start_year,end_year):
182
+ # Update GLOBAL table with new start and end dates very janky implementation but not a priority.
183
+
184
+ # if start_hour < 10:
185
+ # start_hour = f'0{int(start_hour+1)}:00'
186
+ # else:
187
+ # start_hour = f'{int(start_hour+1)}:00'
188
+
189
+ # if end_hour < 10:
190
+ # end_hour = f'0{int(end_hour+1)}:00'
191
+ # else:
192
+ # end_hour = f'{int(end_hour+1)}:00'
193
+
194
+ table_lines = self.table_lines('GLOBAL')
195
+ for index, line in enumerate(table_lines):
196
+ if '***' in line: #in case there are comments in the global block
197
+ continue
198
+ elif line.strip().startswith('START'):
199
+ table_lines[index] = line[0:14] + f'{start_year}/01/01 00:00 ' + f'END {end_year}/12/31 24:00'
200
+ else:
201
+ continue
202
+
203
+ self.uci[('GLOBAL','na',0)].lines = table_lines
204
+
205
+ def set_echo_flags(self,flag1,flag2):
206
+ table_lines = self.table_lines('GLOBAL')
207
+ for index, line in enumerate(table_lines):
208
+ if '***' in line: #in case there are comments in the global block
209
+ continue
210
+ elif line.strip().startswith('RUN INTERP OUTPT LEVELS'):
211
+ table_lines[index] = f' RUN INTERP OUTPT LEVELS {flag1} {flag2}'
212
+ else:
213
+ continue
214
+
215
+
216
+ self.uci[('GLOBAL','na',0)].lines = table_lines
217
+
180
218
 
181
219
  def _write(self,filepath):
182
220
  with open(filepath, 'w') as the_file:
@@ -211,6 +249,9 @@ class UCI():
211
249
  self.merge_lines()
212
250
  self._write(new_uci_path)
213
251
 
252
+ def _run(self,wait_for_completion=True):
253
+ run_model(self.filepath, wait_for_completion=wait_for_completion)
254
+
214
255
  def update_bino(self,name):
215
256
  #TODO: Move up to busniess/presentation layer
216
257
  table = self.table('FILES',drop_comments = False) # initialize the table
@@ -325,9 +366,25 @@ class UCI():
325
366
 
326
367
  #TODO: More conveince methods that should probably be in a separate module
327
368
 
328
- def run_model(uci_file):
329
- winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFLt\\WinHspfLt.exe'
330
- subprocess.run([winHSPF,uci_file.as_posix()]) #, stdout=subprocess.PIPE, creationflags=0x08000000)
369
+ def run_model(uci_file, wait_for_completion=True):
370
+ winHSPF = str(Path(__file__).resolve().parent.parent) + '\\bin\\WinHSPFlt\\WinHspfLt.exe'
371
+
372
+ # Arguments for the subprocess
373
+ args = [winHSPF, uci_file.as_posix()]
374
+
375
+ if wait_for_completion:
376
+ # Use subprocess.run to wait for the process to complete (original behavior)
377
+ subprocess.run(args)
378
+ else:
379
+ # Use subprocess.Popen to run the process in the background without waiting
380
+ # On Windows, you can use creationflags to prevent a console window from appearing
381
+ if sys.platform.startswith('win'):
382
+ # Use a variable for the flag to ensure it's only used on Windows
383
+ creationflags = subprocess.CREATE_NO_WINDOW
384
+ subprocess.Popen(args, creationflags=creationflags)
385
+ else:
386
+ # For other platforms (like Linux/macOS), Popen without special flags works fine
387
+ subprocess.Popen(args)
331
388
 
332
389
  def get_filepaths(uci,file_extension):
333
390
  files = uci.table('FILES')
hspf/validations.py ADDED
@@ -0,0 +1,211 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Created on Mon Dec 23 17:33:46 2024
4
+
5
+ @author: mfratki
6
+ """
7
+
8
+ _COLUMN = 'ReachID'
9
+ _DS_COLUMN = 'DS_ReachID'
10
+
11
+ # %% Functions
12
+ # First validate that the UCI file opn-sequence block is correct
13
+ # Reaches
14
+ # Is there an FTABLE?
15
+ # Is it a lake reach?
16
+ # Is it in the schematic block? Is there any acreage?
17
+ # Is it in the ext sources block?
18
+ # Is it in the
19
+
20
+ #%% gis_layer methods
21
+
22
+ def gis_upstream(reach, gis_layer):
23
+ return gis_layer.loc[gis_layer[_DS_COLUMN] == reach, _COLUMN].to_list()
24
+
25
+
26
+ def gis_downstream(reach, gis_layer):
27
+ return gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN].to_list()
28
+
29
+
30
+ def duplicates(gis_layer):
31
+ return gis_layer.loc[gis_layer.duplicated(subset=_COLUMN),_COLUMN].to_list()
32
+
33
+ def is_duplicate(reach, gis_layer):
34
+ return len(gis_layer.loc[gis_layer[_COLUMN] == reach]) > 1
35
+
36
+ def is_missing(reach, gis_layer):
37
+ return not any(gis_layer[_COLUMN].isin([reach]))
38
+
39
+ #%% gis and uci checks
40
+
41
+ def gis_only(gis_layer, uci):
42
+ return gis_layer.loc[~gis_layer[_COLUMN].isin(uci.valid_opnids['RCHRES']), _COLUMN]
43
+
44
+ def missing(gis_layer, uci):
45
+ return [reach for reach in uci.valid_opnids['RCHRES'] if is_missing(reach, gis_layer)]
46
+
47
+
48
+ #%% reach specific gis and uci checks
49
+
50
+
51
+ def similar_area(reach,gis_layer,uci,tol = .05):
52
+ uci_area = uci.network.subwatershed_area(reach)
53
+ gis_area = gis_layer.loc[gis_layer[_COLUMN] == reach].geometry.area*0.000247105
54
+
55
+ return abs((uci_area-gis_area)/uci_area) <= tol
56
+
57
+ def test_upstream(reach, gis_layer, uci):
58
+ # Is it a 0 order reach?
59
+ upstream = uci.network.upstream(reach)
60
+
61
+ us_pass = False
62
+ if len(upstream) == 0:
63
+ # Make sure the gis layer reach is not in the downstream reach id column
64
+ if not all(gis_layer[_DS_COLUMN] == reach): # isin([reach])):
65
+ us_pass = True
66
+ else:
67
+ # if any(gis_layer.loc[gis_layer[_DS_COLUMN] == reach,_COLUMN].isin(upstream)):
68
+ if set(gis_layer.loc[gis_layer[_DS_COLUMN] == reach, _COLUMN]) == set(upstream):
69
+ us_pass = True
70
+ return us_pass
71
+
72
+
73
+ def test_downstream(reach, gis_layer, uci):
74
+ # Is it a 0 order reach?
75
+ downstream = uci.network.downstream(reach)
76
+
77
+ ds_pass = False
78
+ if len(downstream) == 0:
79
+ if any(gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN].isin([999, -999])):
80
+ ds_pass = True
81
+ else:
82
+ if set(gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN]) == set(downstream):
83
+ ds_pass = True
84
+ return ds_pass
85
+
86
+
87
+ #%% UCI checks
88
+ def same_metzone(reachs, uci):
89
+ '''
90
+ Returns True if all reaches are located within the same metzone, otherwise returns False.
91
+ '''
92
+
93
+ return len(uci.opnid_dict['RCHRES'].loc[reachs, 'metzone'].unique()) == 1
94
+
95
+ def same_metzone(reach_ids,uci):
96
+ dsn = uci.get_dsns('RCHRES',reach_ids[0],'PREC')['SVOLNO'].iloc[0]
97
+ mismatch = [reach_id for reach_id in reach_ids if uci.get_dsns('RCHRES',reach_id,'PREC')['SVOLNO'].iloc[0] != dsn]
98
+ return len(mismatch) == 0
99
+
100
+
101
+ # def validate_subwatershed_metzone(reach,uci):
102
+ # subwatershed = uci.network.subwatershed(reach)
103
+ # reach_dsn = uci.get_dsns('RCHRES',reach,'PREC')
104
+ # subwatershed['dsns'] = pd.concat([uci.get_dsns(row['SVOL'],row['SVOLNO'],'PREC')['SVOLNO'] for index,row in subwatershed.iterrows()]).values
105
+
106
+
107
+ def same_dsns(reach,uci):
108
+ reach_dsn = uci.get_dsns('RCHRES',reach,'PREC')['SVOLNO'].values[0]
109
+ diff = []
110
+ for index,row in uci.network.subwatershed(reach).iterrows():
111
+ perlnd_dsn = uci.get_dsns(row['SVOL'],row['SVOLNO'],'PREC')['SVOLNO'].values[0]
112
+ if perlnd_dsn != reach_dsn:
113
+ diff.append(perlnd_dsn)
114
+ return len(diff) == 0
115
+
116
+ def has_ftable(reach, uci):
117
+ '''
118
+ Returns True if there is an FTABLE in the uci associated with the reach, otherwise returns False.
119
+ '''
120
+ return f'FTABLE{reach}' in uci.table_names('FTABLES')
121
+
122
+ def isin_open_sequence(operation,opnid,uci):
123
+ opnseq = uci.table('OPN SEQUENCE')
124
+ return opnid in opnseq.loc[opnseq['OPERATION'] == operation,'SEGMENT'].values
125
+
126
+ def isin_geninfo(reach, uci):
127
+ return reach in uci.table('RCHRES', 'GEN-INFO').index
128
+
129
+ def isin_network(reach,uci):
130
+ return reach in uci.network.G.nodes
131
+
132
+ def isin_schematic(reach, uci):
133
+ schematic = uci.table('SCHEMATIC')
134
+ return reach in set(schematic.loc[schematic['TVOL'] == 'RCHRES','TVOLNO'])
135
+ #return reach in uci.opnid_dict['RCHRES'].index
136
+
137
+ def svol_isin_schematic(svol,svolnos,uci):
138
+ schematic = uci.table('SCHEMATIC')
139
+ schematic_svolnos = set(schematic.loc[schematic['SVOL'] == svol,'SVOLNO'])
140
+ out = {svolno:svolno in schematic_svolnos for svolno in svolnos}
141
+ if all(out.values()):
142
+ out = True
143
+ return out
144
+
145
+ def tvol_isin_schematic(tvol,tvolnos,uci):
146
+ schematic = uci.table('SCHEMATIC')
147
+ schematic_tvolnos = set(schematic.loc[schematic['TVOL'] == tvol,'TVOLNO'])
148
+ out = {tvolno:tvolno in schematic_tvolnos for tvolno in tvolnos}
149
+ if all(out.values()):
150
+ out = True
151
+ return out
152
+
153
+ def number_of_networks(uci):
154
+ return len(uci.network.outlets())
155
+
156
+ def is_non_contributing_area(reach,uci):
157
+ return all([isin_schematic(reach,uci), not isin_network(reach,uci)])
158
+
159
+
160
+ def isin_uci(reach, uci):
161
+ return reach in uci.valid_opnids['RCHRES']
162
+
163
+
164
+ def has_area(reach, uci):
165
+ subwatersheds = uci.network.subwatersheds()
166
+ return reach in subwatersheds.index
167
+
168
+ def gets_precip(reach, uci):
169
+ return reach in uci.network.G.nodes
170
+
171
+ def is_routing_reach(reach, uci):
172
+ #return all([isin_network(reach,uci), not has_area(reach,uci)])
173
+ return uci.network.subwatershed(reach)['AFACTR'].sum() == 0
174
+
175
+
176
+ def is_lake(reach, uci):
177
+ return uci.table('RCHRES', 'GEN-INFO').loc[reach, 'LKFG'] == 1
178
+
179
+ # def recieves_met(reach,uci):
180
+ # ts_names = ['ATEM','CLOU','DEWP','PEVT','PREC','SOLR','WIND']
181
+ # return reach in set(ext_sources.loc[(ext_sources['TVOL'] == 'RCHRES') & (ext_sources['SMEMN'].isin(ts_names)),'TOPFST'])
182
+
183
+ #%% In opensequence but not in scehamatic
184
+
185
+
186
+ for model_name, uci in ucis.items():
187
+ reach_ids = uci.table('OPN SEQUENCE').query('OPERATION == "RCHRES"')['SEGMENT'].to_list()
188
+ schem = uci.table('SCHEMATIC')
189
+ if not all(schem.query('SVOL == "RCHRES"')['SVOLNO'].isin(reach_ids)):
190
+ print(model_name)
191
+
192
+
193
+ '''
194
+ Dummy Terminal Lake (Buffalo)
195
+
196
+ A reach that acts as a termnial resevoir for upstream inflows.
197
+ No Ftable is needed since there is no routing (but perhaps some include them?)
198
+
199
+
200
+
201
+
202
+
203
+ '''
204
+
205
+
206
+
207
+
208
+ # opensequence
209
+ # ext sources
210
+ # schematic
211
+ #
hspf/warehouse.py ADDED
@@ -0,0 +1,275 @@
1
+ import duckdb
2
+ from pathlib import Path
3
+ import pandas as pd
4
+
5
+ def init_hspf_db(db_path: str, reset: bool = False):
6
+ """Initializes the HSPF model structure database."""
7
+ db_path = Path(db_path)
8
+ if reset and db_path.exists():
9
+ db_path.unlink()
10
+
11
+ with duckdb.connect(db_path.as_posix()) as con:
12
+ # Create schema
13
+ con.execute("CREATE SCHEMA IF NOT EXISTS hspf")
14
+
15
+ # Create tables for HSPF model data
16
+ create_model_tables(con)
17
+ create_model_run_table(con)
18
+ create_structure_tables(con)
19
+ create_parameter_tables(con)
20
+ create_timeseries_tables(con)
21
+ # ...and so on for all HSPF tables...
22
+
23
+ def load_df_to_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, replace: bool = True):
24
+ """
25
+ Persist a pandas DataFrame into a DuckDB table. This will overwrite the table
26
+ by default (replace=True).
27
+ """
28
+ if replace:
29
+ con.execute(f"DROP TABLE IF EXISTS {table_name}")
30
+ # register pandas DF and create table
31
+ con.register("tmp_df", df)
32
+ con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM tmp_df")
33
+ con.unregister("tmp_df")
34
+
35
+
36
+ def create_hspf_model_hierarchy_tables(con: duckdb.DuckDBPyConnection):
37
+ """
38
+ Creates the tables that define the model -> version -> scenario -> run hierarchy.
39
+ """
40
+ con.execute('''
41
+ CREATE SEQUENCE IF NOT EXISTS hspf.model_seq START 1;
42
+ CREATE SEQUENCE IF NOT EXISTS hspf.model_version_seq START 1;
43
+ CREATE SEQUENCE IF NOT EXISTS hspf.scenario_seq START 1;
44
+ CREATE SEQUENCE IF NOT EXISTS hspf.model_run_seq START 1;
45
+
46
+ -- Level 1: The overall Model (e.g., for a specific basin)
47
+ CREATE TABLE IF NOT EXISTS hspf.models (
48
+ model_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_seq'),
49
+ model_name VARCHAR NOT NULL UNIQUE, -- e.g., 'Nemadji River Basin Model'
50
+ description VARCHAR
51
+ );
52
+
53
+ -- Level 2: A specific Version of a Model
54
+ CREATE TABLE IF NOT EXISTS hspf.model_versions (
55
+ model_version_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_version_seq'),
56
+ model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
57
+ version_name VARCHAR NOT NULL, -- e.g., 'v2.1', '2025_Update'
58
+ release_date DATE,
59
+ description VARCHAR,
60
+ UNIQUE (model_pk, version_name)
61
+ );
62
+
63
+ -- Level 3: A Scenario within a Model Version
64
+ CREATE TABLE IF NOT EXISTS hspf.scenarios (
65
+ scenario_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.scenario_seq'),
66
+ model_version_pk BIGINT NOT NULL REFERENCES hspf.model_versions(model_version_pk),
67
+ scenario_name VARCHAR NOT NULL, -- e.g., 'Baseline_2020', 'Future_Climate_BMPs'
68
+ description VARCHAR,
69
+ UNIQUE (model_version_pk, scenario_name)
70
+ );
71
+
72
+ -- Level 4: A single execution (Run) of a Scenario
73
+ CREATE TABLE IF NOT EXISTS hspf.model_runs (
74
+ model_run_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_run_seq'),
75
+ scenario_pk BIGINT NOT NULL REFERENCES hspf.scenarios(scenario_pk),
76
+ run_id BIGINT,
77
+ run_name VARCHAR, -- e.g., 'Run_1995-2015', 'Calibration_Run_A'
78
+ start_year INTEGER,
79
+ end_year INTEGER,
80
+ run_timestamp TIMESTAMP DEFAULT current_timestamp,
81
+ notes VARCHAR
82
+ );
83
+ ''')
84
+
85
+ def create_model_run_table(con: duckdb.DuckDBPyConnection):
86
+ """
87
+ Creates the table to store individual model runs linked to scenarios.
88
+ """
89
+ con.execute(
90
+ '''
91
+ CREATE SEQUENCE IF NOT EXISTS model_run_seq START 1;
92
+
93
+ -- Table: hspf.model_runs
94
+ -- Purpose: Stores individual model runs linked to scenarios.
95
+ CREATE TABLE IF NOT EXISTS model_runs (
96
+ model_run_pk BIGINT PRIMARY KEY DEFAULT nextval('model_run_seq'),
97
+ model_name VARCHAR NOT NULL, -- e.g., 'Nemadji River Basin Model'
98
+ run_id BIGINT,
99
+ run_name VARCHAR, -- e.g., 'Run_1995-2015', 'Calibration_Run_A'
100
+ notes VARCHAR
101
+ );
102
+ ''')
103
+
104
+ def insert_model_run(con: duckdb.DuckDBPyConnection, model_name: str, run_id: int, run_name: str = None, notes: str = None):
105
+ """
106
+ Inserts a new model run into the model_runs table.
107
+ """
108
+ con.execute(
109
+ '''
110
+ INSERT INTO model_runs (model_name, run_id, run_name, notes)
111
+ VALUES (?, ?, ?, ?)
112
+ ''',
113
+ (model_name, run_id, run_name, notes)
114
+ )
115
+
116
+ def create_structure_tables(con: duckdb.DuckDBPyConnection):
117
+ """
118
+ Creates tables that define the structural components of an HSPF model,
119
+ linking them to a core model definition.
120
+ """
121
+ con.execute(
122
+ '''
123
+ CREATE SEQUENCE IF NOT EXISTS hspf.operation_seq START 1;
124
+ CREATE SEQUENCE IF NOT EXISTS hspf.catchment_seq START 1;
125
+
126
+ -- Table: hspf.operations
127
+ -- Purpose: Registry of all land segments and reaches (e.g., PERLND, RCHRES).
128
+ CREATE TABLE IF NOT EXISTS hspf.operations (
129
+ operation_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.operation_seq'),
130
+ model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
131
+ operation_id INTEGER NOT NULL, -- e.g., The PERLND number (101)
132
+ operation_type VARCHAR NOT NULL, -- e.g., 'PERLND', 'RCHRES'
133
+ UNIQUE (model_pk, operation_id, operation_type)
134
+ );
135
+
136
+
137
+ -- Table: hspf.catchments
138
+ -- Purpose: Defines the subwatersheds or catchments in the model.
139
+ CREATE TABLE IF NOT EXISTS hspf.catchments (
140
+ catchment_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.catchment_seq'),
141
+ model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
142
+ catchment_id INTEGER NOT NULL,
143
+ catchment_name VARCHAR,
144
+ UNIQUE (model_pk, catchment_id)
145
+ );
146
+
147
+ -- Table: hspf.catchment_operations
148
+ -- Purpose: Maps operations (land segments) to catchments, defining the model's spatial structure and connectivity.
149
+ CREATE TABLE IF NOT EXISTS hspf.catchment_operations (
150
+ catchment_pk BIGINT REFERENCES hspf.catchments(catchment_pk),
151
+ source_operation_pk BIGINT REFERENCES hspf.operations(operation_pk),
152
+ target_operation_pk BIGINT REFERENCES hspf.operations(operation_pk),
153
+ model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
154
+ value FLOAT,
155
+ mlno INTEGER, -- Mass-link number from SCHEMATIC block
156
+ tmemsb1 INTEGER, -- Mass-link memory storage 1
157
+ tmemsb2 INTEGER -- Mass-link memory storage 2
158
+ );
159
+ ''')
160
+
161
+
162
+ def create_parameter_tables(con: duckdb.DuckDBPyConnection):
163
+ """
164
+ Creates tables to store the parameters, flags, and properties for model operations,
165
+ linking them to the model structure.
166
+ """
167
+ con.execute(
168
+ '''
169
+ CREATE SEQUENCE IF NOT EXISTS hspf.parameter_seq START 1;
170
+ CREATE SEQUENCE IF NOT EXISTS hspf.flag_seq START 1;
171
+ CREATE SEQUENCE IF NOT EXISTS hspf.property_seq START 1;
172
+
173
+ -- Table: hspf.parameters
174
+ -- Purpose: Stores numeric model parameters for each operation (e.g., LZSN, UZSN).
175
+ CREATE TABLE IF NOT EXISTS hspf.parameters (
176
+ parameter_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.parameter_seq'),
177
+ operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
178
+ parameter_name VARCHAR,
179
+ parameter_value FLOAT
180
+ );
181
+
182
+ -- Table: hspf.flags
183
+ -- Purpose: Stores integer-based flags for model operations (e.g., snow flags).
184
+ CREATE TABLE IF NOT EXISTS hspf.flags (
185
+ flag_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.flag_seq'),
186
+ operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
187
+ flag_name VARCHAR,
188
+ flag_value INTEGER
189
+ );
190
+
191
+ -- Table: hspf.properties
192
+ -- Purpose: Stores string-based properties for model operations (e.g., land use names).
193
+ CREATE TABLE IF NOT EXISTS hspf.properties (
194
+ property_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.property_seq'),
195
+ operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
196
+ property_name VARCHAR,
197
+ property_value VARCHAR
198
+ );
199
+ ''')
200
+
201
+
202
+ def create_timeseries_tables(con: duckdb.DuckDBPyConnection):
203
+ """
204
+ Creates tables for storing model output timeseries, linking them to a specific model run.
205
+ """
206
+ con.execute(
207
+ '''
208
+ CREATE SEQUENCE IF NOT EXISTS timeseries_metadata_seq START 1;
209
+
210
+ -- Table: hspf.timeseries_metadata
211
+ -- Purpose: Metadata for each unique timeseries produced by a model run.
212
+ CREATE TABLE IF NOT EXISTS hspf.timeseries_metadata (
213
+ timeseries_pk BIGINT PRIMARY KEY DEFAULT nextval('timeseries_metadata_seq'),
214
+ model_run_pk BIGINT NOT NULL REFERENCES model_runs(model_run_pk),
215
+ operation_pk BIGINT NOT NULL REFERENCES operations(operation_pk),
216
+ ts_name VARCHAR NOT NULL, -- e.g., 'ROVOL','SOSED'
217
+ activity VARCHAR NOT NULL, -- e.g., 'SEDTRN','HYDR'
218
+ timestep VARCHAR NOT NULL, -- e.g., 'hourly','daily'
219
+ unit VARCHAR NOT NULL, -- e.g., 'cfs','mg/L'
220
+ timeseries_type VARCHAR NOT NULL -- e.g., 'cumulative', 'instantaneous'
221
+ );
222
+
223
+ -- Table: hspf.timeseries
224
+ -- Purpose: Stores the actual timeseries data points in a narrow/long format.
225
+ CREATE TABLE IF NOT EXISTS hspf.timeseries (
226
+ timeseries_pk BIGINT NOT NULL REFERENCES timeseries_metadata(timeseries_pk),
227
+ datetime TIMESTAMP NOT NULL,
228
+ value DOUBLE,
229
+ UNIQUE(timeseries_pk, datetime)
230
+ );
231
+ ''')
232
+
233
+ def connect(db_path: str, read_only: bool = False) -> duckdb.DuckDBPyConnection:
234
+ db_path = Path(db_path)
235
+ db_path.parent.mkdir(parents=True, exist_ok=True)
236
+ return duckdb.connect(database=db_path.as_posix(), read_only=read_only)
237
+
238
+ def insert_df_into_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, schema: str = 'hspf', clear_before_insert: bool = True):
239
+ """
240
+ Inserts a pandas DataFrame into an existing table in a specified schema,
241
+ matching columns by name, making the operation robust to column order.
242
+
243
+ Args:
244
+ con: The DuckDB connection object.
245
+ df: The pandas DataFrame to insert.
246
+ table_name: The name of the target table.
247
+ schema: The schema of the target table (e.g., 'hspf', 'analytics').
248
+ clear_before_insert: If True, deletes all rows from the table before insertion.
249
+ """
250
+ target_table = f"{schema}.{table_name}"
251
+
252
+ if not df.empty:
253
+ if clear_before_insert:
254
+ print(f" Clearing all data from {target_table}...")
255
+ con.execute(f"DELETE FROM {target_table}")
256
+
257
+ # Get column names from the DataFrame and format them for the SQL query.
258
+ # Quoting column names handles special characters, spaces, and case-sensitivity.
259
+ cols = df.columns
260
+ col_string = ", ".join([f'"{c}"' for c in cols])
261
+
262
+ # Register the DataFrame as a temporary view so we can query it
263
+ temp_view_name = "temp_df_to_insert"
264
+ con.register(temp_view_name, df)
265
+
266
+ print(f" Inserting {len(df)} rows into {target_table}...")
267
+
268
+ # The SQL statement is now robust to column order in the DataFrame
269
+ sql = f"INSERT INTO {target_table} ({col_string}) SELECT {col_string} FROM {temp_view_name}"
270
+ con.execute(sql)
271
+
272
+ # Clean up the temporary view
273
+ con.unregister(temp_view_name)
274
+ else:
275
+ print(f" DataFrame is empty. Skipping insertion into {target_table}.")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hspf
3
- Version: 2.0.3
3
+ Version: 2.1.1
4
4
  Summary: Python package for downloading and running HSPF models
5
5
  Project-URL: Homepage, https://github.com/mfratkin1/pyHSPF
6
6
  Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>
@@ -1,11 +1,22 @@
1
+ hspf/Masslink_Timeseries.csv,sha256=TOV6PpR0SBI0FaAU1T-qyD2DyGsBFjUWZenvWXiS3wA,4985
1
2
  hspf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- hspf/hbn.py,sha256=SQMxWllZy5OxWGMkhmjiardb8vbSjrmENJrorLBqTDI,19476
3
- hspf/helpers.py,sha256=djKc12ZZkJmB_cHEbFm-mk8sp4GAbBNfjXxfp7YAELU,3132
4
- hspf/hspfModel.py,sha256=8XFPd89niSn9bNTjB2UUpoLNAs6wsD6i6Lb9YKoYjUU,8090
5
- hspf/reports.py,sha256=DfS9DoNwrnD3UvxO879i-bM2gWh5QUMxrV4mdRDgpfE,51878
6
- hspf/uci.py,sha256=towPqQYFO1JC1yNHG5gHoM_8jeO-XueSmClheSth-5k,31612
3
+ hspf/build_warehouse.py,sha256=J3fgycY9xkZdY3C9u0wDaEX1n6satr1t0mQmfg4Fh6E,20205
4
+ hspf/hbn.py,sha256=xUvovcAXXLLLE_ID9kejjiyaAqnh-vwmDLdPLjsGQ8A,19401
5
+ hspf/hbn2.py,sha256=OmuTVDxd0Boyd3GvBgzEfqvP7CTeYIJYPC7EXPgYu30,13190
6
+ hspf/hbn_cy.c,sha256=ZIJwWxyGx8fE5nM1HBd8-zNotmStGZscqXijl3KSRdI,593464
7
+ hspf/hbn_cy.html,sha256=o8wMdvEE547DRXcUHFPgAkkyJ665b6rloGL-qKClaTo,137735
8
+ hspf/hbn_cy.pyx,sha256=T-itpkvHlxHzQHKtJBS-M8_ToLBa1U_ajpV53hh_oI8,4323
9
+ hspf/helpers.py,sha256=cd8J3XfCFmpER475Mk5aFL29612rgop75GRGxlofXQo,3242
10
+ hspf/hspfModel.py,sha256=K_xF7HtuMpDMod56Z3IXDCeGsnUi8KGhly_9tm-mxoY,9070
11
+ hspf/reports.py,sha256=bU9rU9qaffXosxyA3H5OWi4SqQVPiQh6709tTCMYeeU,46286
12
+ hspf/uci.py,sha256=rsi_KJqdfBFp0rlKCHyhmQGdB_rgNE8k6abTjH26UqE,33982
13
+ hspf/validations.py,sha256=BcNT0h5QDZW9lHpXk8KuHQvycl8a_4jQ91srwWFodRo,6666
14
+ hspf/warehouse.py,sha256=1zm1uu_QvevIuAMNPOkzspnFhNpLQrvkq3x3HXSypGg,11898
7
15
  hspf/wdm.py,sha256=q0hNqsMNrTkxHtKEX0q0wWlIZabXv6UX2HjNCF9WEW4,12734
8
16
  hspf/wdmReader.py,sha256=-akKWB9SpUzXvXoQMeHLZNi_u584KaeEOyHB-YolTWM,22848
17
+ hspf/bin/WinHSPFLt/WinHspfLt.exe,sha256=Afs_nJ62r1VnTL2P4XfiRJ1sH2If5DeGTbcCzoqlanE,74752
18
+ hspf/data/HSPFParameterRanges.csv,sha256=PKz1DRIgpsgTEDrVaSHB9SAGMa5yUBRpyZDc9-CKJJo,28357
19
+ hspf/data/LandUseNames_Mappings.csv,sha256=Bb2toZn6FkPfZ6_8SnzIQvXJ03ycwCuc8uyv4cUowNY,75899
9
20
  hspf/data/ParseTable.csv,sha256=ExqUaZg_uUPF5XHGLJEk5_jadnDenKjbwqC4d-iNX_M,193609
10
21
  hspf/data/Timeseries Catalog/IMPLND/IQUAL.txt,sha256=r36wt2gYtHKr5SkOcVnpyk5aYZF743AgkJ5o7CvHlIc,1000
11
22
  hspf/data/Timeseries Catalog/IMPLND/IWATER.txt,sha256=JZ03DFMq8e3EcflRSQ_BPYIeKe8TH3WYEUMmTF2OQEs,743
@@ -27,8 +38,8 @@ hspf/data/Timeseries Catalog/RCHRES/OXRX.txt,sha256=NWdRFpJ60LsYzCGHjt8Llay3OI8j
27
38
  hspf/data/Timeseries Catalog/RCHRES/PLANK.txt,sha256=0MAehIrF8leYQt0Po-9h6IiujzoWOlw-ADCV-bPiqs0,3508
28
39
  hspf/data/Timeseries Catalog/RCHRES/SEDTRN.txt,sha256=SiTgD4_YWctTgEfhoMymZfv8ay74xzCRdnI005dXjyE,659
29
40
  hspf/parser/__init__.py,sha256=2HvprGVCaJ9L-egvTj1MI-bekq5CNjtSBZfrCtQi3fs,92
30
- hspf/parser/graph.py,sha256=bAOCkOwubRoETRWlOP_apOFyepV-yHSeCYPYVyuZ2bE,28610
31
- hspf/parser/parsers.py,sha256=xlWB-odGNrArdvd5qwGyvNZ0N8oaVmuNZ6z3gRdHm-g,19796
32
- hspf-2.0.3.dist-info/METADATA,sha256=qyqFAALOQR0L2W62BIsBRD65-CwPF2Ue2iFJEK8-Jdc,605
33
- hspf-2.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
34
- hspf-2.0.3.dist-info/RECORD,,
41
+ hspf/parser/graph.py,sha256=jvkjz9eNtBFEmxUeQosuQE7XgsIRlrNH-rSny5KBDoE,33046
42
+ hspf/parser/parsers.py,sha256=x3othxQogUmGNe_ctCU20atDrRM_B4lEbVJb3EMbwto,20850
43
+ hspf-2.1.1.dist-info/METADATA,sha256=KtAPnc8v-bT8ow30iHdN1lCm2asH22rvOyKgDGS_kL0,605
44
+ hspf-2.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
45
+ hspf-2.1.1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.27.0
2
+ Generator: hatchling 1.28.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any