PyPI - mpcaHydro - Versions diffs - 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl - Mend

mpcaHydro 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

mpcaHydro/data/{outlets.duckdb → outlet.duckdb} +0 -0
mpcaHydro/data/stations_EQUIS.gpkg +0 -0
mpcaHydro/data/stations_wiski.gpkg +0 -0
mpcaHydro/data_manager.py +105 -60
mpcaHydro/etlSWD.py +21 -15
mpcaHydro/outlets.py +70 -74
mpcaHydro/reports.py +1 -1
mpcaHydro/warehouse.py +276 -146
mpcaHydro/warehouseManager.py +8 -0
mpcaHydro/wiski.py +57 -5
{mpcahydro-2.2.0.dist-info → mpcahydro-2.2.1.dist-info}/METADATA +1 -1
mpcahydro-2.2.1.dist-info/RECORD +23 -0
mpcahydro-2.2.0.dist-info/RECORD +0 -23
{mpcahydro-2.2.0.dist-info → mpcahydro-2.2.1.dist-info}/WHEEL +0 -0

mpcaHydro/data/{outlets.duckdb → outlet.duckdb} RENAMED Viewed

Binary file

mpcaHydro/data/stations_EQUIS.gpkg CHANGED Viewed

Binary file

mpcaHydro/data/stations_wiski.gpkg CHANGED Viewed

Binary file

mpcaHydro/data_manager.py CHANGED Viewed

@@ -5,6 +5,7 @@ Created on Fri Jun  3 10:01:14 2022
 @author: mfratki
 """
+from copy import replace
 import pandas as pd
 #from abc import abstractmethod
 from pathlib import Path
@@ -64,88 +65,115 @@ def constituent_summary(db_path):
         return res.fetch_df()
 class dataManager():
-    def __init__(self,folderpath, oracle_user = None, oracle_password =None):
+    def __init__(self,folderpath, oracle_username = None, oracle_password =None, reset = False):
         self.data = {}
         self.folderpath = Path(folderpath)
         self.db_path = self.folderpath.joinpath('observations.duckdb')
-        self.oracle_user = oracle_user
+        self.oracle_username = oracle_username
         self.oracle_password = oracle_password
-        warehouse.init_db(self.db_path,reset = False)
-        self.xref = xref
-        self.outlets = outlets
+        if not self.db_path.exists() or reset:
+            self._build_warehouse()
+        self.xref = xref #TODO: implement xref manager class
+        self.outlets = outlets #TODO: implement outlets manager class
         self.reports = reportManager(self.db_path)
     def connect_to_oracle(self):
         assert (self.credentials_exist(), 'Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
-        equis.connect(user = self.oracle_user, password = self.oracle_password)
+        equis.connect(user = self.oracle_username, password = self.oracle_password)
     def credentials_exist(self):
-        if (self.oracle_user is not None) & (self.oracle_password is not None):
+        if (self.oracle_username is not None) & (self.oracle_password is not None):
             return True
         else:
             return False
     def _build_warehouse(self):
-        build_warehouse(self.folderpath)
+        warehouse.init_db(self.db_path.as_posix(),True)
-    def download_station_data(self,station_id,station_origin,overwrite=True,to_csv = False,filter_qc_codes = True, start_year = 1996, end_year = 2030,baseflow_method = 'Boughton'):
-        '''
-        Method to download data for a specific station and load it into the warehouse.
-        :param self: Description
-        :param station_id: Station identifier
-        :param station_origin: source of station data: wiski, equis, or swd
-        :param overwrite: Whether to overwrite existing data
-        :param to_csv: Whether to export data to CSV
-        :param filter_qc_codes: Whether to filter quality control codes
-        :param start_year: Start year for data download
-        :param end_year: End year for data download
-        :param baseflow_method: Method for baseflow calculation
-        '''
-        with duckdb.connect(self.db_path,read_only=False) as con:
-            if overwrite:
-                warehouse.drop_station_id(con,station_id,station_origin)
-                warehouse.update_views(con)
+    def _process_wiski_data(self,filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
+        with warehouse.connect(self.db_path,read_only = False) as con:
+            df = con.execute("SELECT * FROM staging.wiski").df()
+            df_transformed = wiski.transform(df, filter_qc_codes, data_codes, baseflow_method)
+            warehouse.load_df_to_table(con,df_transformed, 'analytics.wiski')
+            warehouse.update_views(con)
-            if station_origin == 'wiski':
-                df = wiski.download([station_id],start_year = start_year, end_year = end_year)
-                warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = overwrite)
-                warehouse.load_df_to_analytics(con,wiski.transform(df,filter_qc_codes = filter_qc_codes,baseflow_method = baseflow_method),'wiski') # method includes normalization
-            elif station_origin == 'equis':
-                assert (self.credentials_exist(), 'Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
-                df = equis.download([station_id])
-                warehouse.load_df_to_staging(con,df, 'equis_raw',replace = overwrite)
-                warehouse.load_df_to_analytics(con,equis.transform(df),'equis')
-            elif station_origin == 'swd':
-                df = etlSWD.download(station_id)
-                warehouse.load_df_to_staging(con,df, 'swd_raw', replace = overwrite)
-                warehouse.load_df_to_analytics(con,etlSWD.transform(df),'swd')
-            else:
-                raise ValueError('station_origin must be wiski, equis, or swd')
-        with duckdb.connect(self.db_path,read_only=False) as con:
+    def _process_equis_data(self):
+        with warehouse.connect(self.db_path,read_only = False) as con:
+            df = con.execute("SELECT * FROM staging.equis").df()
+            df_transformed = equis.transform(df)
+            warehouse.load_df_to_table(con,df_transformed, 'analytics.equis')
             warehouse.update_views(con)
-        if to_csv:
-            self.to_csv(station_id)
+    def _process_data(self,filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
+        self._process_wiski_data(filter_qc_codes, data_codes, baseflow_method)
+        self._process_equis_data()
+    def _update_views(self):
+        with warehouse.connect(self.db_path,read_only = False) as con:
+            warehouse.update_views(con)
+    def _download_wiski_data(self,station_ids,start_year = 1996, end_year = 2030, filter_qc_codes = True, data_codes = None, baseflow_method = 'Boughton'):
+        with warehouse.connect(self.db_path,read_only = False) as con:
+            df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
+            if not df.empty:
+                warehouse.load_df_to_table(con,df, 'staging.wiski')
+                warehouse.load_df_to_table(con,wiski.transform(df, filter_qc_codes,data_codes,baseflow_method), 'analytics.wiski')
+                warehouse.update_views(con)
+            else:
+                print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
+    def _download_equis_data(self,station_ids):
+        if self.credentials_exist():
+            self.connect_to_oracle()
+            print('Connected to Oracle database.')
+            with warehouse.connect(self.db_path,read_only = False) as con:
+                df = equis.download(station_ids)
+                if not df.empty:
+                    warehouse.load_df_to_table(con,df, 'staging.equis')
+                    warehouse.load_df_to_table(con,equis.transform(df.copy()), 'analytics.equis')
+                    warehouse.update_views(con)
+                else:
+                    print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
+        else:
+            raise ValueError('Oracle credentials not found. Set ORACLE_USER and ORACLE_PASSWORD environment variables or use swd as station_origin')
+    def _get_equis_template(self):
+        with duckdb.connect(self.db_path,read_only=True) as con:
+            query = '''
+            SELECT *
+            FROM staging.equis
+            LIMIT 0'''
+            df = con.execute(query).fetch_df().to_csv(self.folderpath.joinpath('equis_template.csv'), index=False)
         return df
-    def get_outlets(self):
+    def _get_wiski_template(self):
+        with duckdb.connect(self.db_path,read_only=True) as con:
+            query = '''
+            SELECT *
+            FROM staging.wiski
+            LIMIT 0'''
+            df = con.execute(query).fetch_df().to_csv(self.folderpath.joinpath('wiski_template.csv'), index=False)
+        return df
+    def get_outlets(self,model_name):
         with duckdb.connect(self.db_path,read_only=True) as con:
             query = '''
             SELECT *
             FROM outlets.station_reach_pairs
+            WHERE repository_name = ?
             ORDER BY outlet_id'''
-            df = con.execute(query).fetch_df()
+            df = con.execute(query,[model_name]).fetch_df()
         return df
     def get_station_ids(self,station_origin = None):
         with duckdb.connect(self.db_path,read_only=True) as con:
             if station_origin is None:
@@ -163,9 +191,7 @@ class dataManager():
         return df['station_id'].to_list()
-    def get_station_data(self,station_ids,constituent,agg_period = None):
+    def get_observation_data(self,station_ids,constituent,agg_period = None):
         with duckdb.connect(self.db_path,read_only=True) as con:
             query = '''
             SELECT *
@@ -184,9 +210,9 @@ class dataManager():
             df.attrs['agg_period'] = agg_period
         df.rename(columns={'value': 'observed'}, inplace=True)
-        return df
+        return df.dropna(subset=['observed'])
-    def get_outlet_data(self,outlet_id,constituent,agg_period = 'D'):
+    def get_outlet_data(self,outlet_id,constituent,agg_period = 'D',to_csv = False):
         with duckdb.connect(self.db_path,read_only=True) as con:
             query = '''
             SELECT *
@@ -207,16 +233,35 @@ class dataManager():
         df.rename(columns={'value': 'observed',
                            'flow_value': 'observed_flow',
                            'baseflow_value': 'observed_baseflow'}, inplace=True)
-        return df
+        return df.dropna(subset=['observed'])
+    def get_raw_data(self,station_id,station_origin, to_csv = False):
+        with duckdb.connect(self.db_path,read_only=True) as con:
+            if station_origin.lower() == 'equis':
+                query = '''
+                SELECT *
+                FROM staging.equis_raw
+                WHERE station_id = ?'''
+            elif station_origin.lower() == 'wiski':
+                query = '''
+                SELECT *
+                FROM staging.wiski_raw
+                WHERE station_id = ?'''
+            else:
+                raise ValueError(f'Station origin {station_origin} not recognized. Valid options are equis or wiski.')
+            df = con.execute(query,[station_id]).fetch_df()
+        if to_csv:
+            df.to_csv(self.folderpath.joinpath(f'{station_id}_raw.csv'), index=False)
+        return df
-    def to_csv(self,station_id,folderpath = None):
+    def to_csv(self,station_id  ,station_origin,folderpath = None):
         if folderpath is None:
             folderpath = self.folderpath
         else:
             folderpath = Path(folderpath)
-        df = self._load(station_id)
+        df = self.get_station_data([station_id],constituent = 'Q',agg_period = None)
         if len(df) > 0:
             df.to_csv(folderpath.joinpath(station_id + '.csv'))
         else:

mpcaHydro/etlSWD.py CHANGED Viewed

@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
 #     return df
 import requests
-def _download(station_no):
+def _download(station_id):
     # Replace {station_no} in the URL with the actual station number
-    url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
+    #url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
+    url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
     try:
         # Send a GET request to the URL
-        response = requests.get(url)
+        params = {
+            'stationId': station_id,
+            'format': 'json'
+        }
+        response = requests.get(url,params = params)
         response.raise_for_status()  # Raise exception for HTTP errors
         # Parse the JSON data
-        if response.json()['recordCount'] == 0:
-            return pd.DataFrame(columns = response.json()['column_names'])
-        else:
-            return pd.DataFrame(response.json()['data'])
+        return pd.DataFrame(response.json()['data'])
     except requests.exceptions.RequestException as e:
         print(f"An error occurred: {e}")
@@ -46,14 +48,18 @@ def _download(station_no):
-def download(station_no):
+def download(station_ids):
     #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
-    df = _download(station_no)
-    if df.empty:
-        return df
-    else:
-        df['station_id'] = station_no
-        return transform(df)
+    dfs = []
+    for station_id in station_ids:
+        df = _download(station_id)
+        if not df.empty:
+            df['station_id'] = station_id
+            dfs.append(df)
+    return pd.concat(dfs, ignore_index=True)
 def info(station_no):
     #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')

mpcaHydro/outlets.py CHANGED Viewed

@@ -14,17 +14,18 @@ import duckdb
 #stations_wiski = gpd.read_file('C:/Users/mfratki/Documents/GitHub/pyhcal/src/pyhcal/data/stations_wiski.gpkg')
 _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
-stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
+stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
 stations_wiski['source'] = 'wiski'
 _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
-stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
+stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
 stations_equis['source'] = 'equis'
 stations_equis['wplmn_flag'] = 0
-DB_PATH = str(Path(__file__).resolve().parent/'data\\outlets.duckdb')
+DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
 MODL_DB = pd.concat([stations_wiski,stations_equis])
 MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
@@ -34,64 +35,69 @@ MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True
 def _reload():
     global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
     _stations_wiski = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_wiski.gpkg'))
-    stations_wiski = _stations_wiski.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name','wplmn_flag']]
+    stations_wiski = _stations_wiski.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name','wplmn_flag']]
     stations_wiski['source'] = 'wiski'
     _stations_equis = gpd.read_file(str(Path(__file__).resolve().parent/'data\\stations_EQUIS.gpkg'))
-    stations_equis = _stations_equis.dropna(subset='opnids')[['station_id','true_opnid','opnids','comments','modeled','repository_name']]
+    stations_equis = _stations_equis.loc[:,['station_id','true_opnid','opnids','comments','modeled','repo_name']]
     stations_equis['source'] = 'equis'
     stations_equis['wplmn_flag'] = 0
     MODL_DB = pd.concat([stations_wiski,stations_equis])
     MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
     MODL_DB = MODL_DB.dropna(subset='opnids')
     MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
-def get_model_db(model_name: str):
-    return MODL_DB.query('repository_name == @model_name')
 def split_opnids(opnids: list):
-    return [abs(int(float(j))) for i in opnids for j in i]
+    return [int(float(j)) for i in opnids for j in i]
+def get_model_db(model_name: str):
+    return MODL_DB.query('repo_name == @model_name')
 def valid_models():
-    return MODL_DB['repository_name'].unique().tolist()
+    return MODL_DB['repo_name'].unique().tolist()
+def equis_stations(model_name):
+    return _stations_equis.query('repo_name == @model_name')['station_id'].tolist()
+def wiski_stations(model_name):
+    return _stations_wiski.query('repo_name == @model_name')['station_id'].tolist()
+def wplmn_stations(model_name):
+    return MODL_DB.query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
 def wplmn_station_opnids(model_name):
-    opnids = MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
+    opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and wplmn_flag == 1 and source == "wiski"')['opnids'].str.split(',').to_list()
     return split_opnids(opnids)
 def wiski_station_opnids(model_name):
-    opnids = MODL_DB.query('repository_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
+    opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['opnids'].str.split(',').to_list()
     return split_opnids(opnids)
 def equis_station_opnids(model_name):
-    opnids = MODL_DB.query('repository_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
+    opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['opnids'].str.split(',').to_list()
     return split_opnids(opnids)
 def station_opnids(model_name):
-    opnids = MODL_DB.query('repository_name == @model_name')['opnids'].str.split(',').to_list()
+    opnids = MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name')['opnids'].str.split(',').to_list()
     return split_opnids(opnids)
-def equis_stations(model_name):
-    return MODL_DB.query('repository_name == @model_name and source == "equis"')['station_id'].tolist()
-def wiski_stations(model_name):
-    return MODL_DB.query('repository_name == @model_name and source == "wiski"')['station_id'].tolist()
+def mapped_equis_stations(model_name):
+    return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "equis"')['station_id'].tolist()
-def wplmn_stations(model_name):
-    return MODL_DB.query('repository_name == @model_name and wplmn_flag == 1 and source == "wiski"')['station_id'].tolist()
+def mapped_wiski_stations(model_name):
+    return MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name and source == "wiski"')['station_id'].tolist()
 def outlets(model_name):
-    return [group for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
+    return [group for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
 def outlet_stations(model_name):
-    return [group['station_id'].to_list() for _, group in MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
+    return [group['station_id'].to_list() for _, group in MODL_DB.dropna(subset=['opnids']).query('repo_name == @model_name').groupby(by = ['opnids','repo_name'])]
-def _split_opnids(opnids: list):
-    return [int(float(j)) for i in opnids for j in i]
 def connect(db_path, read_only=True):
-    Path(db_path).parent.mkdir(parents=True, exist_ok=True)
+    #Path(db_path).parent.mkdir(parents=True, exist_ok=True)
     return duckdb.connect(db_path,read_only=read_only)
@@ -103,7 +109,7 @@ def init_db(db_path: str,reset: bool = False):
     if reset and db_path.exists():
         db_path.unlink()
-    with connect(db_path.as_posix()) as con:
+    with connect(db_path.as_posix(),False) as con:
         con.execute(OUTLETS_SCHEMA)
@@ -202,7 +208,7 @@ def build_outlet_db(db_path: str = None):
     if db_path is None:
         db_path = DB_PATH
     init_db(db_path,reset=True)
-    with connect(db_path) as con:
+    with connect(db_path,False) as con:
         build_outlets(con)
@@ -212,43 +218,35 @@ def build_outlets(con, model_name: str = None):
     else:
         modl_db = MODL_DB
-    for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repository_name'])):
-        repo_name = group['repository_name'].iloc[0]
+    for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
+        repo_name = group['repo_name'].iloc[0]
         add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
-        opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
+        opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
         for opnid in opnids:
-            if opnid < 0:
-                exclude = 1
-            else:
-                exclude = 0
-            add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
+            add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
         for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
             add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
 def create_outlet_schema(con, model_name : str):
-    for index, (_, group) in enumerate(modl_db.outlets(model_name)):
-        repo_name = group['repository_name'].iloc[0]
+    for index, (_, group) in enumerate(outlets(model_name)):
+        repo_name = group['repo_name'].iloc[0]
         add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
-        opnids = set(_split_opnids(group['opnids'].str.split(',').to_list()))
+        opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
         for opnid in opnids:
-            if opnid < 0:
-                exclude = 1
-            else:
-                exclude = 0
-            add_reach(con, outlet_id = index, reach_id = abs(opnid),exclude = exclude, repository_name = repo_name)
+            add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
         for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
             add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
 def add_outlet(con,
-               outlet_id: str,
+               outlet_id: int,
                repository_name: str,
                outlet_name = None,
                notes = None):
@@ -256,15 +254,15 @@ def add_outlet(con,
     Insert an outlet. repository_name is required.
     """
     con.execute(
-        "INSERT INTO outlets.outlets (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
+        "INSERT INTO outlets.outlet_groups (outlet_id, repository_name, outlet_name, notes) VALUES (?, ?, ?, ?)",
         [outlet_id, repository_name, outlet_name, notes]
     )
 def add_station(con,
-                outlet_id: str,
-                station_id: str,
+                outlet_id: int,
+                station_id: int,
                 station_origin: str,
-                true_opnid: str,
+                true_opnid: int,
                 repository_name: str,
                 comments = None):
     """
@@ -281,19 +279,17 @@ def add_station(con,
     )
 def add_reach(con,
-              outlet_id: str,
-              reach_id: str,
-              repository_name: str,
-              exclude: int = 0):
+              outlet_id: int,
+              reach_id: int,
+              repository_name: str):
     """
     Insert a reach membership for an outlet.
     - repository_name is required and participates in the PK (reach_id, repository_name).
-    - exclude = 1 to mark a reach as excluded from association views.
     """
     con.execute(
-        """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name, exclude)
-           VALUES (?, ?, ?, ?)""",
-        [outlet_id, reach_id, repository_name, int(exclude)]
+        """INSERT INTO outlets.outlet_reaches (outlet_id, reach_id, repository_name)
+           VALUES (?, ?, ?)""",
+        [outlet_id, reach_id, repository_name]
     )
@@ -303,8 +299,10 @@ OUTLETS_SCHEMA  = """-- schema.sql
 -- Table 1: outlets
 -- Represents a logical grouping that ties stations and reaches together.
-CREATE TABLE IF NOT EXISTS outlets (
-  outlet_id TEXT PRIMARY KEY,
+CREATE SCHEMA IF NOT EXISTS outlets;
+CREATE TABLE IF NOT EXISTS outlets.outlet_groups  (
+  outlet_id INTEGER PRIMARY KEY,
   repository_name TEXT NOT NULL,
   outlet_name TEXT,
   notes TEXT             -- optional: general notes about the outlet grouping
@@ -312,42 +310,40 @@ CREATE TABLE IF NOT EXISTS outlets (
 -- Table 2: outlet_stations
 -- One-to-many: outlet -> stations
-CREATE TABLE IF NOT EXISTS outlet_stations (
-  outlet_id TEXT NOT NULL,
+CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
+  outlet_id INTEGER NOT NULL,
   station_id TEXT NOT NULL,
   station_origin TEXT NOT NULL,       -- e.g., 'wiski', 'equis'
   repository_name TEXT NOT NULL,  -- repository model the station is physically located in
-  true_opnid TEXT NOT NULL,           -- The specific reach the station physically sits on (optional)
+  true_opnid INTEGER NOT NULL,           -- The specific reach the station physically sits on (optional)
   comments TEXT,             -- Per-station comments, issues, etc.
   CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
-  FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
+  FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
 );
 -- Table 3: outlet_reaches
 -- One-to-many: outlet -> reaches
 -- A reach can appear in multiple outlets, enabling many-to-many overall.
-CREATE TABLE IF NOT EXISTS outlet_reaches (
-  outlet_id TEXT NOT NULL,
-  reach_id TEXT NOT NULL,    -- model reach identifier (aka opind)
+CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
+  outlet_id INTEGER NOT NULL,
+  reach_id INTEGER NOT NULL,    -- model reach identifier (aka opind)
   repository_name TEXT NOT NULL,  -- optional: where the mapping comes from
-  exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
-  FOREIGN KEY (outlet_id) REFERENCES outlets(outlet_id)
+  FOREIGN KEY (outlet_id) REFERENCES outlets.outlet_groups(outlet_id)
 );
 -- Useful views:
 -- View: station_reach_pairs
 -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
-CREATE VIEW IF NOT EXISTS station_reach_pairs AS
+CREATE OR REPLACE VIEW outlets.station_reach_pairs AS
 SELECT
   s.outlet_id,
   s.station_id,
   s.station_origin,
   r.reach_id,
-  r.exclude,
-  r.repository_name,
-FROM outlet_stations s
-JOIN outlet_reaches r
+  r.repository_name
+FROM outlets.outlet_stations AS s
+JOIN outlets.outlet_reaches AS r
   ON s.outlet_id = r.outlet_id;
 """

mpcaHydro/reports.py CHANGED Viewed

@@ -43,7 +43,7 @@ def wiski_qc_counts(con: duckdb.DuckDBPyConnection):
     query = '''
     SELECT *,
     FROM
-        staging.wiski_qc_count
+        reports.wiski_qc_count
     ORDER BY
         station_no,
         parametertype_name

mpcaHydro 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

mpcaHydro 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl