pyhcal 1.1.0__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
pyhcal/modl_db.py DELETED
@@ -1,319 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- """
3
- Created on Thu May 1 09:51:51 2025
4
-
5
- @author: mfratki
6
- """
7
- #import sqlite3
8
- from pathlib import Path
9
- import geopandas as gpd
10
- import pandas as pd
11
- import duckdb
12
- #from hspf_tools.calibrator import etlWISKI, etlSWD
13
-
14
-
15
- #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
16
-
17
-
18
- _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
19
- stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
20
- stations_wiski['source'] = 'wiski'
21
- _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
22
- stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
23
- stations_equis['source'] = 'equis'
24
- stations_equis['wplmn_flag'] = 0
25
-
26
-
27
- DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
28
-
29
- MODL_DB = pd.concat([stations_wiski,stations_equis])
30
- MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
31
- MODL_DB = MODL_DB.dropna(subset='opnids')
32
- MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
33
-
34
- def _reload():
35
- global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
36
- _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
37
- stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
38
- stations_wiski['source'] = 'wiski'
39
- _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
40
- stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
41
- stations_equis['source'] = 'equis'
42
- stations_equis['wplmn_flag'] = 0
43
-
44
- MODL_DB = pd.concat([stations_wiski,stations_equis])
45
- MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
46
- MODL_DB = MODL_DB.dropna(subset='opnids')
47
- MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
48
-
49
-
50
- def get_model_db(model_name: str):
51
- return MODL_DB.query('repository_name == @model_name')
52
-
53
- def split_opnids(opnids: list):
54
- return [abs(int(float(j))) for i in opnids for j in i]
55
-
56
- def valid_models():
57
- return MODL_DB['repository_name'].unique().tolist()
58
-
59
- def wplmn_station_opnids(model_name):
60
- opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
61
- return split_opnids(opnids)
62
-
63
- def wiski_station_opnids(model_name):
64
- opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
65
- return split_opnids(opnids)
66
-
67
- def equis_station_opnids(model_name):
68
- opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
69
- return split_opnids(opnids)
70
-
71
- def station_opnids(model_name):
72
- opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
73
- return split_opnids(opnids)
74
-
75
- def equis_stations(model_name):
76
- return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
77
-
78
- def wiski_stations(model_name):
79
- return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
80
-
81
- def wplmn_stations(model_name):
82
- return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
83
-
84
- def outlets(model_name):
85
- return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
86
-
87
- def outlet_stations(model_name):
88
- return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
89
-
90
- def _split_opnids(opnids: list):
91
- return [int(float(j)) for i in opnids for j in i]
92
-
93
- def connect(db_path):
94
- Path(db_path).parent.mkdir(parents=True, exist_ok=True)
95
- return duckdb.connect(db_path)
96
-
97
-
98
- def init_db(db_path: str,reset: bool = False):
99
- """
100
- Initialize the DuckDB database: create staging and analytics schemas
101
- """
102
- db_path = Path(db_path)
103
- if reset and db_path.exists():
104
- db_path.unlink()
105
-
106
- with connect(db_path.as_posix()) as con:
107
- con.execute(OUTLETS_SCHEMA)
108
-
109
-
110
-
111
- # Accessors:
112
- def get_outlets_by_model(model_name: str):
113
- with connect(DB_PATH) as con:
114
- df = con.execute(
115
- """
116
- SELECT r.*
117
- FROM station_reach_pairs r
118
- WHERE r.repository_name = ?
119
- """,
120
- [model_name]
121
- ).fetchdf()
122
- return df
123
-
124
- def get_outlets_by_reach(reach_id: int, model_name: str):
125
- """
126
- Return all outlet rows for outlets that include the given reach_id in the given model_name.
127
- """
128
- with connect(DB_PATH) as con:
129
- df = con.execute(
130
- """
131
- SELECT r.*
132
- FROM station_reach_pairs r
133
- WHERE r.reach_id = ? AND r.repository_name = ?
134
- """,
135
- [reach_id, model_name]).fetchdf()
136
- return df
137
-
138
- def get_outlets_by_station(station_id: str, station_origin: str):
139
- """
140
- Return all outlet rows for outlets that include the given reach_id in the given model_name.
141
- """
142
- with connect(DB_PATH) as con:
143
-
144
- df = con.execute(
145
- """
146
- SELECT r.*
147
- FROM station_reach_pairs r
148
- WHERE r.station_id = ? AND r.station_origin = ?
149
- """,
150
- [station_id, station_origin]).fetchdf()
151
- return df
152
-
153
- # constructors:
154
- def build_outlet_db(db_path: str = None):
155
- if db_path is None:
156
- db_path = DB_PATH
157
- init_db(db_path,reset=True)
158
- with connect(db_path) as con:
159
- for index, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
160
- repo_name = group['repository_name'].iloc[0]
161
- add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
162
-
163
- opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
164
-
165
- for opnid in opnids:
166
- if opnid < 0:
167
- exclude = 1
168
- else:
169
- exclude = 0
170
- add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
171
-
172
- for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
173
- add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
174
-
175
-
176
- def create_outlet_schema(con, model_name : str):
177
- for index, (_, group) in enumerate(modl_db.outlets(model_name)):
178
- repo_name = group['repository_name'].iloc[0]
179
- add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
180
-
181
- opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
182
-
183
- for opnid in opnids:
184
- if opnid < 0:
185
- exclude = 1
186
- else:
187
- exclude = 0
188
- add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
189
-
190
- for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
191
- add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
192
-
193
-
194
- def add_outlet(con,
195
- outlet_id: str,
196
- repository_name: str,
197
- outlet_name = None,
198
- notes = None):
199
- """
200
- Insert an outlet. repository_name is required.
201
- """
202
- con.execute(
203
- "INSERT INTO outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
204
- [outlet_id, repository_name, outlet_name, notes]
205
- )
206
-
207
- def add_station(con,
208
- outlet_id: str,
209
- station_id: str,
210
- station_origin: str,
211
- true_opnid: str,
212
- repository_name: str,
213
- comments = None):
214
- """
215
- Insert a station membership for an outlet.
216
- Constraints:
217
- - PRIMARY KEY (station_id, station_origin): unique per origin across all outlets.
218
- - true_opnid and true_opnid_repository_name are required per schema.
219
- """
220
- con.execute(
221
- """INSERT INTO outlet_stations
222
- (outlet_id, station_id, station_origin, true_opnid, repository_name, comments)
223
- VALUES (?, ?, ?, ?, ?, ?)""",
224
- [outlet_id, station_id, station_origin, true_opnid, repository_name, comments]
225
- )
226
-
227
- def add_reach(con,
228
- outlet_id: str,
229
- reach_id: str,
230
- repository_name: str,
231
- exclude: int = 0):
232
- """
233
- Insert a reach membership for an outlet.
234
- - repository_name is required and participates in the PK (reach_id, repository_name).
235
- - exclude = 1 to mark a reach as excluded from association views.
236
- """
237
- con.execute(
238
- """INSERT INTO outlet_reaches (outlet_id, reach_id, repository_name, exclude)
239
- VALUES (?, ?, ?, ?)""",
240
- [outlet_id, reach_id, repository_name, int(exclude)]
241
- )
242
-
243
-
244
- OUTLETS_SCHEMA = """-- schema.sql
245
- -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
246
- -- Compatible with DuckDB and SQLite.
247
-
248
- -- Table 1: outlets
249
- -- Represents a logical grouping that ties stations and reaches together.
250
- CREATE TABLE IF NOT EXISTS outlets (
251
- outlet_id TEXT PRIMARY KEY,
252
- repository_name TEXT NOT NULL,
253
- outlet_name TEXT,
254
- notes TEXT -- optional: general notes about the outlet grouping
255
- );
256
-
257
- -- Table 2: outlet_stations
258
- -- One-to-many: outlet -> stations
259
- CREATE TABLE IF NOT EXISTS outlet_stations (
260
- outlet_id TEXT NOT NULL,
261
- station_id TEXT NOT NULL,
262
- station_origin TEXT NOT NULL, -- e.g., 'wiski', 'equis'
263
- repository_name TEXT NOT NULL, -- repository model the station is physically located in
264
- true_opnid TEXT NOT NULL, -- The specific reach the station physically sits on (optional)
265
- comments TEXT, -- Per-station comments, issues, etc.
266
- CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
267
- FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
268
- );
269
-
270
- -- Table 3: outlet_reaches
271
- -- One-to-many: outlet -> reaches
272
- -- A reach can appear in multiple outlets, enabling many-to-many overall.
273
- CREATE TABLE IF NOT EXISTS outlet_reaches (
274
- outlet_id TEXT NOT NULL,
275
- reach_id TEXT NOT NULL, -- model reach identifier (aka opind)
276
- repository_name TEXT NOT NULL, -- optional: where the mapping comes from
277
- exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
278
- FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
279
- );
280
-
281
- -- Useful views:
282
-
283
- -- View: station_reach_pairs
284
- -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
285
- CREATE VIEW IF NOT EXISTS station_reach_pairs AS
286
- SELECT
287
- s.outlet_id,
288
- s.station_id,
289
- s.station_origin,
290
- r.reach_id,
291
- r.exclude,
292
- r.repository_name,
293
- FROM outlet_stations s
294
- JOIN outlet_reaches r
295
- ON s.outlet_id = r.outlet_id;
296
-
297
- -- Example indexes (SQLite will accept CREATE INDEX; DuckDB treats them as metadata but it’s okay to define):
298
- CREATE INDEX IF NOT EXISTS idx_outlet_stations_outlet ON outlet_stations(outlet_id);
299
- CREATE INDEX IF NOT EXISTS idx_outlet_reaches_outlet ON outlet_reaches(outlet_id);
300
- CREATE INDEX IF NOT EXISTS idx_station_reach_pairs_station ON outlet_stations(station_id);"""
301
-
302
-
303
- #row = modl_db.MODL_DB.iloc[0]
304
-
305
- #info = etlWISKI.info(row['station_id'])
306
-
307
- #modl_db.MODL_DB.query('source == "equis"')
308
-
309
- # outlet_dict = {'stations': {'wiski': ['E66050001'],
310
- # 'equis': ['S002-118']},
311
- # 'reaches': {'Clearwater': [650]}
312
-
313
-
314
-
315
-
316
- # station_ids = ['S002-118']
317
- # #station_ids = ['E66050001']
318
- # reach_ids = [650]
319
- # flow_station_ids = ['E66050001']
@@ -1,15 +0,0 @@
1
- pyhcal/__init__.py,sha256=4TEpGD-PfEY8yK-od8DpEMA4_iQ-q9y0PBvROXSPdB0,94
2
- pyhcal/calibrators.py,sha256=lDxvXpjLj1Xhp-NPCVUyyWeaMTZdP5FA9Bh-PuwrOHs,30383
3
- pyhcal/figures.py,sha256=Iu7LaN_i2IuDA_nfxj-a8AkG-FTLZVicJ3-efIs5OiE,45534
4
- pyhcal/metrics.py,sha256=GUGHd-op-g1Foj8wnS_JVURSms4ifcC0a5h8ketQ29I,17911
5
- pyhcal/modl_db.py,sha256=z8trT387Gcbg15xAqAcW62c1dbgSSjymv3qOyH3Dyx8,12569
6
- pyhcal/repository.py,sha256=VPHeSbrnFAG1F6tFXdnI2C72xm3dTPb7Z3rkPr--srI,4603
7
- pyhcal/setup_utils.py,sha256=sIINj8_h-MgQhcCH_u95nFTJVw0QEgnR0fZgAitX1iQ,30398
8
- pyhcal/data/HUC_Names.csv,sha256=UGmd3Q5E8DyFWggXzaXWpsRze7sFyrlpYqaYpMWAiGM,18946
9
- pyhcal/data/WISKI_EQUIS_XREF.csv,sha256=bPYq-f4-Qc6jsvUgl81lwXBeFamfDe5TjohqUV1XJlg,1244704
10
- pyhcal/data/outlets.duckdb,sha256=zB1t2NsnrUed-3HOT61DYmtOy89PZ9uWZAwmAEUhleY,2895872
11
- pyhcal/data/stations_EQUIS.gpkg,sha256=SPyxGK5H3bbqMikv45n_ul-KULPNWyad9XcDq_9mXwM,2019328
12
- pyhcal/data/stations_wiski.gpkg,sha256=vlh03SihjlQMIbn25rfPqOKQtJsSYS2FMR65zAznTQQ,905216
13
- pyhcal-1.1.0.dist-info/METADATA,sha256=xRX7vV5LA1UxSsAZze6VMRzTJI6ePZ749Pdf8h-VQgw,560
14
- pyhcal-1.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
15
- pyhcal-1.1.0.dist-info/RECORD,,
File without changes