hspf 2.1.0__py3-none-any.whl → 2.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hspf/Masslink_Timeseries.csv +240 -0
- hspf/build_warehouse.py +545 -0
- hspf/data/HSPFParameterRanges.csv +492 -0
- hspf/data/LandUseNames_Mappings.csv +3330 -0
- hspf/hbn.py +17 -4
- hspf/hbn2.py +316 -0
- hspf/hbn_cy.c +14450 -0
- hspf/hbn_cy.html +1540 -0
- hspf/hbn_cy.pyx +107 -0
- hspf/helpers.py +8 -7
- hspf/parser/graph.py +17 -2
- hspf/reports.py +264 -459
- hspf/validations.py +211 -0
- hspf/warehouse.py +275 -0
- {hspf-2.1.0.dist-info → hspf-2.1.1.dist-info}/METADATA +1 -1
- {hspf-2.1.0.dist-info → hspf-2.1.1.dist-info}/RECORD +17 -7
- {hspf-2.1.0.dist-info → hspf-2.1.1.dist-info}/WHEEL +0 -0
hspf/validations.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Created on Mon Dec 23 17:33:46 2024
|
|
4
|
+
|
|
5
|
+
@author: mfratki
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
_COLUMN = 'ReachID'
|
|
9
|
+
_DS_COLUMN = 'DS_ReachID'
|
|
10
|
+
|
|
11
|
+
# %% Functions
|
|
12
|
+
# First validate that the UCI file opn-sequence block is correct
|
|
13
|
+
# Reaches
|
|
14
|
+
# Is there an FTABLE?
|
|
15
|
+
# Is it a lake reach?
|
|
16
|
+
# Is it in the schematic block? Is there any acreage?
|
|
17
|
+
# Is it in the ext sources block?
|
|
18
|
+
# Is it in the
|
|
19
|
+
|
|
20
|
+
#%% gis_layer methods
|
|
21
|
+
|
|
22
|
+
def gis_upstream(reach, gis_layer):
|
|
23
|
+
return gis_layer.loc[gis_layer[_DS_COLUMN] == reach, _COLUMN].to_list()
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def gis_downstream(reach, gis_layer):
|
|
27
|
+
return gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN].to_list()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def duplicates(gis_layer):
|
|
31
|
+
return gis_layer.loc[gis_layer.duplicated(subset=_COLUMN),_COLUMN].to_list()
|
|
32
|
+
|
|
33
|
+
def is_duplicate(reach, gis_layer):
|
|
34
|
+
return len(gis_layer.loc[gis_layer[_COLUMN] == reach]) > 1
|
|
35
|
+
|
|
36
|
+
def is_missing(reach, gis_layer):
|
|
37
|
+
return not any(gis_layer[_COLUMN].isin([reach]))
|
|
38
|
+
|
|
39
|
+
#%% gis and uci checks
|
|
40
|
+
|
|
41
|
+
def gis_only(gis_layer, uci):
|
|
42
|
+
return gis_layer.loc[~gis_layer[_COLUMN].isin(uci.valid_opnids['RCHRES']), _COLUMN]
|
|
43
|
+
|
|
44
|
+
def missing(gis_layer, uci):
|
|
45
|
+
return [reach for reach in uci.valid_opnids['RCHRES'] if is_missing(reach, gis_layer)]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
#%% reach specific gis and uci checks
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def similar_area(reach,gis_layer,uci,tol = .05):
|
|
52
|
+
uci_area = uci.network.subwatershed_area(reach)
|
|
53
|
+
gis_area = gis_layer.loc[gis_layer[_COLUMN] == reach].geometry.area*0.000247105
|
|
54
|
+
|
|
55
|
+
return abs((uci_area-gis_area)/uci_area) <= tol
|
|
56
|
+
|
|
57
|
+
def test_upstream(reach, gis_layer, uci):
|
|
58
|
+
# Is it a 0 order reach?
|
|
59
|
+
upstream = uci.network.upstream(reach)
|
|
60
|
+
|
|
61
|
+
us_pass = False
|
|
62
|
+
if len(upstream) == 0:
|
|
63
|
+
# Make sure the gis layer reach is not in the downstream reach id column
|
|
64
|
+
if not all(gis_layer[_DS_COLUMN] == reach): # isin([reach])):
|
|
65
|
+
us_pass = True
|
|
66
|
+
else:
|
|
67
|
+
# if any(gis_layer.loc[gis_layer[_DS_COLUMN] == reach,_COLUMN].isin(upstream)):
|
|
68
|
+
if set(gis_layer.loc[gis_layer[_DS_COLUMN] == reach, _COLUMN]) == set(upstream):
|
|
69
|
+
us_pass = True
|
|
70
|
+
return us_pass
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_downstream(reach, gis_layer, uci):
|
|
74
|
+
# Is it a 0 order reach?
|
|
75
|
+
downstream = uci.network.downstream(reach)
|
|
76
|
+
|
|
77
|
+
ds_pass = False
|
|
78
|
+
if len(downstream) == 0:
|
|
79
|
+
if any(gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN].isin([999, -999])):
|
|
80
|
+
ds_pass = True
|
|
81
|
+
else:
|
|
82
|
+
if set(gis_layer.loc[gis_layer[_COLUMN] == reach, _DS_COLUMN]) == set(downstream):
|
|
83
|
+
ds_pass = True
|
|
84
|
+
return ds_pass
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
#%% UCI checks
|
|
88
|
+
def same_metzone(reachs, uci):
|
|
89
|
+
'''
|
|
90
|
+
Returns True if all reaches are located within the same metzone, otherwise returns False.
|
|
91
|
+
'''
|
|
92
|
+
|
|
93
|
+
return len(uci.opnid_dict['RCHRES'].loc[reachs, 'metzone'].unique()) == 1
|
|
94
|
+
|
|
95
|
+
def same_metzone(reach_ids,uci):
|
|
96
|
+
dsn = uci.get_dsns('RCHRES',reach_ids[0],'PREC')['SVOLNO'].iloc[0]
|
|
97
|
+
mismatch = [reach_id for reach_id in reach_ids if uci.get_dsns('RCHRES',reach_id,'PREC')['SVOLNO'].iloc[0] != dsn]
|
|
98
|
+
return len(mismatch) == 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# def validate_subwatershed_metzone(reach,uci):
|
|
102
|
+
# subwatershed = uci.network.subwatershed(reach)
|
|
103
|
+
# reach_dsn = uci.get_dsns('RCHRES',reach,'PREC')
|
|
104
|
+
# subwatershed['dsns'] = pd.concat([uci.get_dsns(row['SVOL'],row['SVOLNO'],'PREC')['SVOLNO'] for index,row in subwatershed.iterrows()]).values
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def same_dsns(reach,uci):
|
|
108
|
+
reach_dsn = uci.get_dsns('RCHRES',reach,'PREC')['SVOLNO'].values[0]
|
|
109
|
+
diff = []
|
|
110
|
+
for index,row in uci.network.subwatershed(reach).iterrows():
|
|
111
|
+
perlnd_dsn = uci.get_dsns(row['SVOL'],row['SVOLNO'],'PREC')['SVOLNO'].values[0]
|
|
112
|
+
if perlnd_dsn != reach_dsn:
|
|
113
|
+
diff.append(perlnd_dsn)
|
|
114
|
+
return len(diff) == 0
|
|
115
|
+
|
|
116
|
+
def has_ftable(reach, uci):
|
|
117
|
+
'''
|
|
118
|
+
Returns True if there is an FTABLE in the uci associated with the reach, otherwise returns False.
|
|
119
|
+
'''
|
|
120
|
+
return f'FTABLE{reach}' in uci.table_names('FTABLES')
|
|
121
|
+
|
|
122
|
+
def isin_open_sequence(operation,opnid,uci):
|
|
123
|
+
opnseq = uci.table('OPN SEQUENCE')
|
|
124
|
+
return opnid in opnseq.loc[opnseq['OPERATION'] == operation,'SEGMENT'].values
|
|
125
|
+
|
|
126
|
+
def isin_geninfo(reach, uci):
|
|
127
|
+
return reach in uci.table('RCHRES', 'GEN-INFO').index
|
|
128
|
+
|
|
129
|
+
def isin_network(reach,uci):
|
|
130
|
+
return reach in uci.network.G.nodes
|
|
131
|
+
|
|
132
|
+
def isin_schematic(reach, uci):
|
|
133
|
+
schematic = uci.table('SCHEMATIC')
|
|
134
|
+
return reach in set(schematic.loc[schematic['TVOL'] == 'RCHRES','TVOLNO'])
|
|
135
|
+
#return reach in uci.opnid_dict['RCHRES'].index
|
|
136
|
+
|
|
137
|
+
def svol_isin_schematic(svol,svolnos,uci):
|
|
138
|
+
schematic = uci.table('SCHEMATIC')
|
|
139
|
+
schematic_svolnos = set(schematic.loc[schematic['SVOL'] == svol,'SVOLNO'])
|
|
140
|
+
out = {svolno:svolno in schematic_svolnos for svolno in svolnos}
|
|
141
|
+
if all(out.values()):
|
|
142
|
+
out = True
|
|
143
|
+
return out
|
|
144
|
+
|
|
145
|
+
def tvol_isin_schematic(tvol,tvolnos,uci):
|
|
146
|
+
schematic = uci.table('SCHEMATIC')
|
|
147
|
+
schematic_tvolnos = set(schematic.loc[schematic['TVOL'] == tvol,'TVOLNO'])
|
|
148
|
+
out = {tvolno:tvolno in schematic_tvolnos for tvolno in tvolnos}
|
|
149
|
+
if all(out.values()):
|
|
150
|
+
out = True
|
|
151
|
+
return out
|
|
152
|
+
|
|
153
|
+
def number_of_networks(uci):
|
|
154
|
+
return len(uci.network.outlets())
|
|
155
|
+
|
|
156
|
+
def is_non_contributing_area(reach,uci):
|
|
157
|
+
return all([isin_schematic(reach,uci), not isin_network(reach,uci)])
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def isin_uci(reach, uci):
|
|
161
|
+
return reach in uci.valid_opnids['RCHRES']
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def has_area(reach, uci):
|
|
165
|
+
subwatersheds = uci.network.subwatersheds()
|
|
166
|
+
return reach in subwatersheds.index
|
|
167
|
+
|
|
168
|
+
def gets_precip(reach, uci):
|
|
169
|
+
return reach in uci.network.G.nodes
|
|
170
|
+
|
|
171
|
+
def is_routing_reach(reach, uci):
|
|
172
|
+
#return all([isin_network(reach,uci), not has_area(reach,uci)])
|
|
173
|
+
return uci.network.subwatershed(reach)['AFACTR'].sum() == 0
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def is_lake(reach, uci):
|
|
177
|
+
return uci.table('RCHRES', 'GEN-INFO').loc[reach, 'LKFG'] == 1
|
|
178
|
+
|
|
179
|
+
# def recieves_met(reach,uci):
|
|
180
|
+
# ts_names = ['ATEM','CLOU','DEWP','PEVT','PREC','SOLR','WIND']
|
|
181
|
+
# return reach in set(ext_sources.loc[(ext_sources['TVOL'] == 'RCHRES') & (ext_sources['SMEMN'].isin(ts_names)),'TOPFST'])
|
|
182
|
+
|
|
183
|
+
#%% In opensequence but not in scehamatic
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
for model_name, uci in ucis.items():
|
|
187
|
+
reach_ids = uci.table('OPN SEQUENCE').query('OPERATION == "RCHRES"')['SEGMENT'].to_list()
|
|
188
|
+
schem = uci.table('SCHEMATIC')
|
|
189
|
+
if not all(schem.query('SVOL == "RCHRES"')['SVOLNO'].isin(reach_ids)):
|
|
190
|
+
print(model_name)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
'''
|
|
194
|
+
Dummy Terminal Lake (Buffalo)
|
|
195
|
+
|
|
196
|
+
A reach that acts as a termnial resevoir for upstream inflows.
|
|
197
|
+
No Ftable is needed since there is no routing (but perhaps some include them?)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
'''
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
# opensequence
|
|
209
|
+
# ext sources
|
|
210
|
+
# schematic
|
|
211
|
+
#
|
hspf/warehouse.py
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
import duckdb
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
import pandas as pd
|
|
4
|
+
|
|
5
|
+
def init_hspf_db(db_path: str, reset: bool = False):
|
|
6
|
+
"""Initializes the HSPF model structure database."""
|
|
7
|
+
db_path = Path(db_path)
|
|
8
|
+
if reset and db_path.exists():
|
|
9
|
+
db_path.unlink()
|
|
10
|
+
|
|
11
|
+
with duckdb.connect(db_path.as_posix()) as con:
|
|
12
|
+
# Create schema
|
|
13
|
+
con.execute("CREATE SCHEMA IF NOT EXISTS hspf")
|
|
14
|
+
|
|
15
|
+
# Create tables for HSPF model data
|
|
16
|
+
create_model_tables(con)
|
|
17
|
+
create_model_run_table(con)
|
|
18
|
+
create_structure_tables(con)
|
|
19
|
+
create_parameter_tables(con)
|
|
20
|
+
create_timeseries_tables(con)
|
|
21
|
+
# ...and so on for all HSPF tables...
|
|
22
|
+
|
|
23
|
+
def load_df_to_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, replace: bool = True):
|
|
24
|
+
"""
|
|
25
|
+
Persist a pandas DataFrame into a DuckDB table. This will overwrite the table
|
|
26
|
+
by default (replace=True).
|
|
27
|
+
"""
|
|
28
|
+
if replace:
|
|
29
|
+
con.execute(f"DROP TABLE IF EXISTS {table_name}")
|
|
30
|
+
# register pandas DF and create table
|
|
31
|
+
con.register("tmp_df", df)
|
|
32
|
+
con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM tmp_df")
|
|
33
|
+
con.unregister("tmp_df")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def create_hspf_model_hierarchy_tables(con: duckdb.DuckDBPyConnection):
|
|
37
|
+
"""
|
|
38
|
+
Creates the tables that define the model -> version -> scenario -> run hierarchy.
|
|
39
|
+
"""
|
|
40
|
+
con.execute('''
|
|
41
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.model_seq START 1;
|
|
42
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.model_version_seq START 1;
|
|
43
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.scenario_seq START 1;
|
|
44
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.model_run_seq START 1;
|
|
45
|
+
|
|
46
|
+
-- Level 1: The overall Model (e.g., for a specific basin)
|
|
47
|
+
CREATE TABLE IF NOT EXISTS hspf.models (
|
|
48
|
+
model_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_seq'),
|
|
49
|
+
model_name VARCHAR NOT NULL UNIQUE, -- e.g., 'Nemadji River Basin Model'
|
|
50
|
+
description VARCHAR
|
|
51
|
+
);
|
|
52
|
+
|
|
53
|
+
-- Level 2: A specific Version of a Model
|
|
54
|
+
CREATE TABLE IF NOT EXISTS hspf.model_versions (
|
|
55
|
+
model_version_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_version_seq'),
|
|
56
|
+
model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
|
|
57
|
+
version_name VARCHAR NOT NULL, -- e.g., 'v2.1', '2025_Update'
|
|
58
|
+
release_date DATE,
|
|
59
|
+
description VARCHAR,
|
|
60
|
+
UNIQUE (model_pk, version_name)
|
|
61
|
+
);
|
|
62
|
+
|
|
63
|
+
-- Level 3: A Scenario within a Model Version
|
|
64
|
+
CREATE TABLE IF NOT EXISTS hspf.scenarios (
|
|
65
|
+
scenario_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.scenario_seq'),
|
|
66
|
+
model_version_pk BIGINT NOT NULL REFERENCES hspf.model_versions(model_version_pk),
|
|
67
|
+
scenario_name VARCHAR NOT NULL, -- e.g., 'Baseline_2020', 'Future_Climate_BMPs'
|
|
68
|
+
description VARCHAR,
|
|
69
|
+
UNIQUE (model_version_pk, scenario_name)
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
-- Level 4: A single execution (Run) of a Scenario
|
|
73
|
+
CREATE TABLE IF NOT EXISTS hspf.model_runs (
|
|
74
|
+
model_run_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.model_run_seq'),
|
|
75
|
+
scenario_pk BIGINT NOT NULL REFERENCES hspf.scenarios(scenario_pk),
|
|
76
|
+
run_id BIGINT,
|
|
77
|
+
run_name VARCHAR, -- e.g., 'Run_1995-2015', 'Calibration_Run_A'
|
|
78
|
+
start_year INTEGER,
|
|
79
|
+
end_year INTEGER,
|
|
80
|
+
run_timestamp TIMESTAMP DEFAULT current_timestamp,
|
|
81
|
+
notes VARCHAR
|
|
82
|
+
);
|
|
83
|
+
''')
|
|
84
|
+
|
|
85
|
+
def create_model_run_table(con: duckdb.DuckDBPyConnection):
|
|
86
|
+
"""
|
|
87
|
+
Creates the table to store individual model runs linked to scenarios.
|
|
88
|
+
"""
|
|
89
|
+
con.execute(
|
|
90
|
+
'''
|
|
91
|
+
CREATE SEQUENCE IF NOT EXISTS model_run_seq START 1;
|
|
92
|
+
|
|
93
|
+
-- Table: hspf.model_runs
|
|
94
|
+
-- Purpose: Stores individual model runs linked to scenarios.
|
|
95
|
+
CREATE TABLE IF NOT EXISTS model_runs (
|
|
96
|
+
model_run_pk BIGINT PRIMARY KEY DEFAULT nextval('model_run_seq'),
|
|
97
|
+
model_name VARCHAR NOT NULL, -- e.g., 'Nemadji River Basin Model'
|
|
98
|
+
run_id BIGINT,
|
|
99
|
+
run_name VARCHAR, -- e.g., 'Run_1995-2015', 'Calibration_Run_A'
|
|
100
|
+
notes VARCHAR
|
|
101
|
+
);
|
|
102
|
+
''')
|
|
103
|
+
|
|
104
|
+
def insert_model_run(con: duckdb.DuckDBPyConnection, model_name: str, run_id: int, run_name: str = None, notes: str = None):
|
|
105
|
+
"""
|
|
106
|
+
Inserts a new model run into the model_runs table.
|
|
107
|
+
"""
|
|
108
|
+
con.execute(
|
|
109
|
+
'''
|
|
110
|
+
INSERT INTO model_runs (model_name, run_id, run_name, notes)
|
|
111
|
+
VALUES (?, ?, ?, ?)
|
|
112
|
+
''',
|
|
113
|
+
(model_name, run_id, run_name, notes)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
def create_structure_tables(con: duckdb.DuckDBPyConnection):
|
|
117
|
+
"""
|
|
118
|
+
Creates tables that define the structural components of an HSPF model,
|
|
119
|
+
linking them to a core model definition.
|
|
120
|
+
"""
|
|
121
|
+
con.execute(
|
|
122
|
+
'''
|
|
123
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.operation_seq START 1;
|
|
124
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.catchment_seq START 1;
|
|
125
|
+
|
|
126
|
+
-- Table: hspf.operations
|
|
127
|
+
-- Purpose: Registry of all land segments and reaches (e.g., PERLND, RCHRES).
|
|
128
|
+
CREATE TABLE IF NOT EXISTS hspf.operations (
|
|
129
|
+
operation_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.operation_seq'),
|
|
130
|
+
model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
|
|
131
|
+
operation_id INTEGER NOT NULL, -- e.g., The PERLND number (101)
|
|
132
|
+
operation_type VARCHAR NOT NULL, -- e.g., 'PERLND', 'RCHRES'
|
|
133
|
+
UNIQUE (model_pk, operation_id, operation_type)
|
|
134
|
+
);
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
-- Table: hspf.catchments
|
|
138
|
+
-- Purpose: Defines the subwatersheds or catchments in the model.
|
|
139
|
+
CREATE TABLE IF NOT EXISTS hspf.catchments (
|
|
140
|
+
catchment_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.catchment_seq'),
|
|
141
|
+
model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
|
|
142
|
+
catchment_id INTEGER NOT NULL,
|
|
143
|
+
catchment_name VARCHAR,
|
|
144
|
+
UNIQUE (model_pk, catchment_id)
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
-- Table: hspf.catchment_operations
|
|
148
|
+
-- Purpose: Maps operations (land segments) to catchments, defining the model's spatial structure and connectivity.
|
|
149
|
+
CREATE TABLE IF NOT EXISTS hspf.catchment_operations (
|
|
150
|
+
catchment_pk BIGINT REFERENCES hspf.catchments(catchment_pk),
|
|
151
|
+
source_operation_pk BIGINT REFERENCES hspf.operations(operation_pk),
|
|
152
|
+
target_operation_pk BIGINT REFERENCES hspf.operations(operation_pk),
|
|
153
|
+
model_pk BIGINT NOT NULL REFERENCES hspf.models(model_pk),
|
|
154
|
+
value FLOAT,
|
|
155
|
+
mlno INTEGER, -- Mass-link number from SCHEMATIC block
|
|
156
|
+
tmemsb1 INTEGER, -- Mass-link memory storage 1
|
|
157
|
+
tmemsb2 INTEGER -- Mass-link memory storage 2
|
|
158
|
+
);
|
|
159
|
+
''')
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def create_parameter_tables(con: duckdb.DuckDBPyConnection):
|
|
163
|
+
"""
|
|
164
|
+
Creates tables to store the parameters, flags, and properties for model operations,
|
|
165
|
+
linking them to the model structure.
|
|
166
|
+
"""
|
|
167
|
+
con.execute(
|
|
168
|
+
'''
|
|
169
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.parameter_seq START 1;
|
|
170
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.flag_seq START 1;
|
|
171
|
+
CREATE SEQUENCE IF NOT EXISTS hspf.property_seq START 1;
|
|
172
|
+
|
|
173
|
+
-- Table: hspf.parameters
|
|
174
|
+
-- Purpose: Stores numeric model parameters for each operation (e.g., LZSN, UZSN).
|
|
175
|
+
CREATE TABLE IF NOT EXISTS hspf.parameters (
|
|
176
|
+
parameter_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.parameter_seq'),
|
|
177
|
+
operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
|
|
178
|
+
parameter_name VARCHAR,
|
|
179
|
+
parameter_value FLOAT
|
|
180
|
+
);
|
|
181
|
+
|
|
182
|
+
-- Table: hspf.flags
|
|
183
|
+
-- Purpose: Stores integer-based flags for model operations (e.g., snow flags).
|
|
184
|
+
CREATE TABLE IF NOT EXISTS hspf.flags (
|
|
185
|
+
flag_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.flag_seq'),
|
|
186
|
+
operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
|
|
187
|
+
flag_name VARCHAR,
|
|
188
|
+
flag_value INTEGER
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
-- Table: hspf.properties
|
|
192
|
+
-- Purpose: Stores string-based properties for model operations (e.g., land use names).
|
|
193
|
+
CREATE TABLE IF NOT EXISTS hspf.properties (
|
|
194
|
+
property_pk BIGINT PRIMARY KEY DEFAULT nextval('hspf.property_seq'),
|
|
195
|
+
operation_pk BIGINT NOT NULL REFERENCES hspf.operations(operation_pk),
|
|
196
|
+
property_name VARCHAR,
|
|
197
|
+
property_value VARCHAR
|
|
198
|
+
);
|
|
199
|
+
''')
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def create_timeseries_tables(con: duckdb.DuckDBPyConnection):
|
|
203
|
+
"""
|
|
204
|
+
Creates tables for storing model output timeseries, linking them to a specific model run.
|
|
205
|
+
"""
|
|
206
|
+
con.execute(
|
|
207
|
+
'''
|
|
208
|
+
CREATE SEQUENCE IF NOT EXISTS timeseries_metadata_seq START 1;
|
|
209
|
+
|
|
210
|
+
-- Table: hspf.timeseries_metadata
|
|
211
|
+
-- Purpose: Metadata for each unique timeseries produced by a model run.
|
|
212
|
+
CREATE TABLE IF NOT EXISTS hspf.timeseries_metadata (
|
|
213
|
+
timeseries_pk BIGINT PRIMARY KEY DEFAULT nextval('timeseries_metadata_seq'),
|
|
214
|
+
model_run_pk BIGINT NOT NULL REFERENCES model_runs(model_run_pk),
|
|
215
|
+
operation_pk BIGINT NOT NULL REFERENCES operations(operation_pk),
|
|
216
|
+
ts_name VARCHAR NOT NULL, -- e.g., 'ROVOL','SOSED'
|
|
217
|
+
activity VARCHAR NOT NULL, -- e.g., 'SEDTRN','HYDR'
|
|
218
|
+
timestep VARCHAR NOT NULL, -- e.g., 'hourly','daily'
|
|
219
|
+
unit VARCHAR NOT NULL, -- e.g., 'cfs','mg/L'
|
|
220
|
+
timeseries_type VARCHAR NOT NULL -- e.g., 'cumulative', 'instantaneous'
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
-- Table: hspf.timeseries
|
|
224
|
+
-- Purpose: Stores the actual timeseries data points in a narrow/long format.
|
|
225
|
+
CREATE TABLE IF NOT EXISTS hspf.timeseries (
|
|
226
|
+
timeseries_pk BIGINT NOT NULL REFERENCES timeseries_metadata(timeseries_pk),
|
|
227
|
+
datetime TIMESTAMP NOT NULL,
|
|
228
|
+
value DOUBLE,
|
|
229
|
+
UNIQUE(timeseries_pk, datetime)
|
|
230
|
+
);
|
|
231
|
+
''')
|
|
232
|
+
|
|
233
|
+
def connect(db_path: str, read_only: bool = False) -> duckdb.DuckDBPyConnection:
|
|
234
|
+
db_path = Path(db_path)
|
|
235
|
+
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
236
|
+
return duckdb.connect(database=db_path.as_posix(), read_only=read_only)
|
|
237
|
+
|
|
238
|
+
def insert_df_into_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, schema: str = 'hspf', clear_before_insert: bool = True):
|
|
239
|
+
"""
|
|
240
|
+
Inserts a pandas DataFrame into an existing table in a specified schema,
|
|
241
|
+
matching columns by name, making the operation robust to column order.
|
|
242
|
+
|
|
243
|
+
Args:
|
|
244
|
+
con: The DuckDB connection object.
|
|
245
|
+
df: The pandas DataFrame to insert.
|
|
246
|
+
table_name: The name of the target table.
|
|
247
|
+
schema: The schema of the target table (e.g., 'hspf', 'analytics').
|
|
248
|
+
clear_before_insert: If True, deletes all rows from the table before insertion.
|
|
249
|
+
"""
|
|
250
|
+
target_table = f"{schema}.{table_name}"
|
|
251
|
+
|
|
252
|
+
if not df.empty:
|
|
253
|
+
if clear_before_insert:
|
|
254
|
+
print(f" Clearing all data from {target_table}...")
|
|
255
|
+
con.execute(f"DELETE FROM {target_table}")
|
|
256
|
+
|
|
257
|
+
# Get column names from the DataFrame and format them for the SQL query.
|
|
258
|
+
# Quoting column names handles special characters, spaces, and case-sensitivity.
|
|
259
|
+
cols = df.columns
|
|
260
|
+
col_string = ", ".join([f'"{c}"' for c in cols])
|
|
261
|
+
|
|
262
|
+
# Register the DataFrame as a temporary view so we can query it
|
|
263
|
+
temp_view_name = "temp_df_to_insert"
|
|
264
|
+
con.register(temp_view_name, df)
|
|
265
|
+
|
|
266
|
+
print(f" Inserting {len(df)} rows into {target_table}...")
|
|
267
|
+
|
|
268
|
+
# The SQL statement is now robust to column order in the DataFrame
|
|
269
|
+
sql = f"INSERT INTO {target_table} ({col_string}) SELECT {col_string} FROM {temp_view_name}"
|
|
270
|
+
con.execute(sql)
|
|
271
|
+
|
|
272
|
+
# Clean up the temporary view
|
|
273
|
+
con.unregister(temp_view_name)
|
|
274
|
+
else:
|
|
275
|
+
print(f" DataFrame is empty. Skipping insertion into {target_table}.")
|
|
@@ -1,12 +1,22 @@
|
|
|
1
|
+
hspf/Masslink_Timeseries.csv,sha256=TOV6PpR0SBI0FaAU1T-qyD2DyGsBFjUWZenvWXiS3wA,4985
|
|
1
2
|
hspf/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
hspf/
|
|
3
|
-
hspf/
|
|
3
|
+
hspf/build_warehouse.py,sha256=J3fgycY9xkZdY3C9u0wDaEX1n6satr1t0mQmfg4Fh6E,20205
|
|
4
|
+
hspf/hbn.py,sha256=xUvovcAXXLLLE_ID9kejjiyaAqnh-vwmDLdPLjsGQ8A,19401
|
|
5
|
+
hspf/hbn2.py,sha256=OmuTVDxd0Boyd3GvBgzEfqvP7CTeYIJYPC7EXPgYu30,13190
|
|
6
|
+
hspf/hbn_cy.c,sha256=ZIJwWxyGx8fE5nM1HBd8-zNotmStGZscqXijl3KSRdI,593464
|
|
7
|
+
hspf/hbn_cy.html,sha256=o8wMdvEE547DRXcUHFPgAkkyJ665b6rloGL-qKClaTo,137735
|
|
8
|
+
hspf/hbn_cy.pyx,sha256=T-itpkvHlxHzQHKtJBS-M8_ToLBa1U_ajpV53hh_oI8,4323
|
|
9
|
+
hspf/helpers.py,sha256=cd8J3XfCFmpER475Mk5aFL29612rgop75GRGxlofXQo,3242
|
|
4
10
|
hspf/hspfModel.py,sha256=K_xF7HtuMpDMod56Z3IXDCeGsnUi8KGhly_9tm-mxoY,9070
|
|
5
|
-
hspf/reports.py,sha256=
|
|
11
|
+
hspf/reports.py,sha256=bU9rU9qaffXosxyA3H5OWi4SqQVPiQh6709tTCMYeeU,46286
|
|
6
12
|
hspf/uci.py,sha256=rsi_KJqdfBFp0rlKCHyhmQGdB_rgNE8k6abTjH26UqE,33982
|
|
13
|
+
hspf/validations.py,sha256=BcNT0h5QDZW9lHpXk8KuHQvycl8a_4jQ91srwWFodRo,6666
|
|
14
|
+
hspf/warehouse.py,sha256=1zm1uu_QvevIuAMNPOkzspnFhNpLQrvkq3x3HXSypGg,11898
|
|
7
15
|
hspf/wdm.py,sha256=q0hNqsMNrTkxHtKEX0q0wWlIZabXv6UX2HjNCF9WEW4,12734
|
|
8
16
|
hspf/wdmReader.py,sha256=-akKWB9SpUzXvXoQMeHLZNi_u584KaeEOyHB-YolTWM,22848
|
|
9
17
|
hspf/bin/WinHSPFLt/WinHspfLt.exe,sha256=Afs_nJ62r1VnTL2P4XfiRJ1sH2If5DeGTbcCzoqlanE,74752
|
|
18
|
+
hspf/data/HSPFParameterRanges.csv,sha256=PKz1DRIgpsgTEDrVaSHB9SAGMa5yUBRpyZDc9-CKJJo,28357
|
|
19
|
+
hspf/data/LandUseNames_Mappings.csv,sha256=Bb2toZn6FkPfZ6_8SnzIQvXJ03ycwCuc8uyv4cUowNY,75899
|
|
10
20
|
hspf/data/ParseTable.csv,sha256=ExqUaZg_uUPF5XHGLJEk5_jadnDenKjbwqC4d-iNX_M,193609
|
|
11
21
|
hspf/data/Timeseries Catalog/IMPLND/IQUAL.txt,sha256=r36wt2gYtHKr5SkOcVnpyk5aYZF743AgkJ5o7CvHlIc,1000
|
|
12
22
|
hspf/data/Timeseries Catalog/IMPLND/IWATER.txt,sha256=JZ03DFMq8e3EcflRSQ_BPYIeKe8TH3WYEUMmTF2OQEs,743
|
|
@@ -28,8 +38,8 @@ hspf/data/Timeseries Catalog/RCHRES/OXRX.txt,sha256=NWdRFpJ60LsYzCGHjt8Llay3OI8j
|
|
|
28
38
|
hspf/data/Timeseries Catalog/RCHRES/PLANK.txt,sha256=0MAehIrF8leYQt0Po-9h6IiujzoWOlw-ADCV-bPiqs0,3508
|
|
29
39
|
hspf/data/Timeseries Catalog/RCHRES/SEDTRN.txt,sha256=SiTgD4_YWctTgEfhoMymZfv8ay74xzCRdnI005dXjyE,659
|
|
30
40
|
hspf/parser/__init__.py,sha256=2HvprGVCaJ9L-egvTj1MI-bekq5CNjtSBZfrCtQi3fs,92
|
|
31
|
-
hspf/parser/graph.py,sha256=
|
|
41
|
+
hspf/parser/graph.py,sha256=jvkjz9eNtBFEmxUeQosuQE7XgsIRlrNH-rSny5KBDoE,33046
|
|
32
42
|
hspf/parser/parsers.py,sha256=x3othxQogUmGNe_ctCU20atDrRM_B4lEbVJb3EMbwto,20850
|
|
33
|
-
hspf-2.1.
|
|
34
|
-
hspf-2.1.
|
|
35
|
-
hspf-2.1.
|
|
43
|
+
hspf-2.1.1.dist-info/METADATA,sha256=KtAPnc8v-bT8ow30iHdN1lCm2asH22rvOyKgDGS_kL0,605
|
|
44
|
+
hspf-2.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
45
|
+
hspf-2.1.1.dist-info/RECORD,,
|
|
File without changes
|