PyPI - mpcaHydro - Versions diffs - 2.2.7__tar.gz → 2.2.9__tar.gz - Mend

mpcaHydro 2.2.7tar.gz → 2.2.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mpcaHydro
-Version: 2.2.7
+Version: 2.2.9
 Summary: Python package for downloading MPCA hydrology data
 Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
 Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>

mpcahydro-2.2.9/demo.py ADDED Viewed

@@ -0,0 +1,226 @@
+#%%
+from mpcaHydro.data_manager import dataManager
+from pyhcal.repository import Repository
+from mpcaHydro import outlets
+import duckdb
+from mpcaHydro import equis, warehouse, wiski
+from hspf.hspfModel import hspfModel
+from hspf.uci import UCI
+from mpcaHydro import etlSWD
+#%%
+'''
+New approach. Directly load to warehouse from downloads.
+Store raw and processed data in warehouse. For large timeseries I could store
+as parquet files. The transformations using pandas take a bit of time. I imagine doing them
+within duckdb would be faster.
+'''
+# with warehouse.connect(db_path) as con:
+#    df = con.execute("SELECT * FROM staging.wiski").df()
+#    df = wiski.transform(df,filter_qc_codes = False)
+#%%
+model_name = 'Nemadji'
+db_path = f'C:/Users/mfratki/Documents/{model_name}.duckdb'
+start_year = 1996
+end_year = 2030
+replace = True
+filter_qc_codes = True
+equis_stations = outlets.equis_stations(model_name)
+wiski_stations = outlets.wiski_stations(model_name)
+equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
+warehouse.init_db(db_path,reset = True)
+#%% Old approach. Store as indvidual processed station files then load to warehouse
+#df_equis = equis.download(equis_stations)
+#df_wiski = wiski.download(wiski_stations,start_year = start_year, end_year = end_year)
+#%% equis
+def download_equis_data(db_path,station_ids,replace = False):
+  with warehouse.connect(db_path,read_only = False) as con:
+    df = equis.download(station_ids)
+    if not df.empty:
+      warehouse.load_df_to_table(con,df, 'staging.equis',replace = replace)
+      warehouse.load_df_to_table(con,equis.transform(df), 'analytics.equis',replace = replace)
+    else:
+      print('No data neccesary for HSPF calibration available from equis for stations:',station_ids)
+def download_wiski_data(db_path,station_ids,replace = False):
+  with warehouse.connect(db_path,read_only = False) as con:
+    df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
+    if not df.empty:
+      warehouse.load_df_to_table(con,df, 'staging.wiski', replace = replace)
+      warehouse.load_df_to_table(con,wiski.transform(df), 'analytics.wiski',replace = replace)
+    else:
+      print('No data neccesary for HSPF calibration available from wiski for stations:',station_ids)
+# Add to warehouse from custom df. Must contain required normalized columns.
+with warehouse.connect(db_path,read_only = False) as con:
+  if replace:
+     warehouse.drop_station_id(con,station_id,station_origin='equis')
+  warehouse.add_to_table(con,df, 'staging','equis_normalized')
+  warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
+  df = equis.normalize(df.copy())
+  warehouse.add_to_table(con,df, 'staging','equis_normalized')
+  df = equis.transform(df)
+  warehouse.add_to_table(con,df, 'analytics','equis')
+#%% swd
+df = etlSWD.download(equis_stations)
+with warehouse.connect(db_path,read_only = False) as con:
+  warehouse.load_df_to_staging(con,df, 'equis_raw',replace = replace)
+  df = equis.normalize(df.copy())
+  warehouse.add_to_table(con,df, 'staging','equis_normalized')
+  df = equis.transform(df)
+  warehouse.add_to_table(con,df, 'analytics','equis')
+#%% wiski
+      if station_origin == 'wiski':
+          df = wiski.download(station_ids,start_year = start_year, end_year = end_year)
+          warehouse.load_df_to_staging(con,df, 'wiski_raw', replace = replace)
+          df = wiski.normalize(df.copy())
+          warehouse.add_to_table(con,df, 'staging','wiski_normalized')
+          df = wiski.transform(df,filter_qc_codes = filter_qc_codes)
+          warehouse.add_to_table(con,df, 'analytics','wiski') # method includes normalization
+      if station_origin == 'swd':
+          df = pd.concat([etlSWD.download(station_id) for station_id in station_ids])
+          warehouse.load_df_to_staging(con,df, 'equis_raw', replace = replace)
+          df = etlSWD.transform(df.copy())
+          warehouse.add_to_table(con,df, 'analytics','equis')
+      warehouse.update_views(con)
+with warehouse.connect(db_path) as con:
+  warehouse.update_views(con)
+#%%
+import requests
+url = 'http://ifrshiny.seas.umich.edu/mglp/'
+requests.get(url)
+db_path = 'C:/Users/mfratki/Documents/Rum.duckdb'
+modl_db.build_outlet_db(db_path)
+con = duckdb.connect(db_path)
+con.execute("SELECT * FROM station_reach_pairs").df()
+con.execute('SELECT * FROM station_reach_pairs WHERE outlet_id = 76').df()
+# Need to remove duplicates from MODL_DB
+modl_db.MODL_DB.loc[modl_db.MODL_DB.duplicated(['station_id','source'])]
+#%%
+dm = dataManager('C:/Users/mfratki/Documents/')
+dm._build_warehouse()
+equis_stations = modl_db.equis_stations('Nemadji')
+wiski_stations = modl_db.wiski_stations('Nemadji')
+#%% Old approach. Store as indvidual processed station files then load to warehouse
+for station_id in equis_stations:
+    dm._download_station_data(station_id,'equis', True)
+for station_id in wiski_stations:
+    dm._download_station_data(station_id,'wiski', True)
+#%% Adding HSPF outputs to warehouse
+con = duckdb.connect(db_path)
+model_name = 'Nemadji'
+outlets = [group for _, group in modl_db.MODL_DB.query('repository_name == @model_name').groupby(by = ['opnids','repository_name'])]
+for outlet in outlets:
+    1+1
+dfs = []
+for constituent in ['Q','TSS','TP','N','OP','TKN']:
+    opnids = modl_db.split_opnids([opnid.split(',') for opnid in set(outlet['opnids'].tolist())])
+    for opnid in opnids:
+        df = mod.hbns.get_reach_constituent(constituent,opnids,time_step='h')
+        df.columns = ['value']
+        df['constituent'] = constituent
+        df['operation'] = operation
+        df['opnid'] = opnid
+        dfs.append(df)
+df = pd.concat(dfs).reset_index()
+df['model_name'] = model_name
+station_ids = ['H05018001','S006-214','S015-102']
+target_constituent = 'TSS'
+flow_constituent = 'Q'
+# build placeholders for the IN list (one ? per station id)
+placeholders = ','.join(['?'] * len(station_ids))
+sql = f'''
+SELECT o.*, f.datetime AS flow_datetime, f.value AS flow, f.baseflow, f.station_id AS flow_station_id, f.station_origin AS flow_station_origin
+FROM analytics.observations o
+JOIN analytics.observations f
+  ON o.datetime = f.datetime
+WHERE o.constituent = ?
+  AND o.station_id IN ({placeholders})
+  AND f.constituent = ?;
+'''
+# parameter order must match the ? positions in the query
+params = [target_constituent] + station_ids + [flow_constituent]
+df = con.execute(sql, params).df()
+outlet_id: station_ids
+outlet_id: opnid
+outlets = []
+for index, (_, group) in enumerate(modl_db.MODL_DB.groupby(by = ['opnids','repository_name'])):
+    group['outlet_id'] = index
+    group.reset_index(drop=True, inplace=True)
+    outlets.append(group)
+    for _, row in group.iterrows():
+        opnids = group.split_opnids(row['opnids'].str.split(',').to_list())
+        row*len(opnids)

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/pyproject.toml RENAMED Viewed

@@ -5,7 +5,7 @@ build-backend = "hatchling.build"
 [project]
 name = "mpcaHydro"
 urls = { "Homepage" = "https://github.com/mfratkin1/mpcaHydro" }  # ? Add this!
-version = "2.2.7"
+version = "2.2.9"
 dependencies = [
   "pandas",
   "requests",

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/outlet.duckdb RENAMED Viewed

Binary file

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_EQUIS.gpkg RENAMED Viewed

Binary file

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/data/stations_wiski.gpkg RENAMED Viewed

Binary file

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/equis.py RENAMED Viewed

@@ -265,6 +265,13 @@ def replace_nondetects(df):
     df.loc[df['value'].isna(), 'value'] = 0
     return df
+def filter_years(df, start_year=1996, end_year=None):
+    '''Filter Equis data to include only samples within a certain year range.'''
+    df = df[df['datetime'].dt.year >= start_year]
+    if end_year is not None:
+        df = df[df['datetime'].dt.year <= end_year]
+    return df
 def normalize(df):
     '''Normalize Equis data: select relevant columns.'''
     df = map_constituents(df)
@@ -278,6 +285,7 @@ def transform(df):
     df = normalize(df)
     df = replace_nondetects(df)
+    df = filter_years(df)
     if not df.empty:
         df = average_results(df)
     return df

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/outlets.py RENAMED Viewed

@@ -31,7 +31,15 @@ DB_PATH = str(Path(__file__).resolve().parent/'data\\outlet.duckdb')
 MODL_DB = pd.concat([stations_wiski,stations_equis])
 MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
 MODL_DB = MODL_DB.dropna(subset='opnids')
+MODL_DB = MODL_DB.dropna(subset = 'repo_name')
 MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
+# Add outlet_id column to MODL_DB based on enumerate grouping
+outlet_id_map = {}
+for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
+    for idx in group.index:
+        outlet_id_map[idx] = int(outlet_id)
+MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
 def _reload():
     global _stations_wiski, stations_wiski, _stations_equis, stations_equis, MODL_DB
@@ -47,7 +55,14 @@ def _reload():
     MODL_DB = pd.concat([stations_wiski,stations_equis])
     MODL_DB['opnids'] = MODL_DB['opnids'].str.strip().replace('',pd.NA)
     MODL_DB = MODL_DB.dropna(subset='opnids')
+    MODL_DB = MODL_DB.dropna(subset = 'repo_name')
     MODL_DB = MODL_DB.drop_duplicates(['station_id','source']).reset_index(drop=True)
+    # Add outlet_id column to MODL_DB based on enumerate grouping
+    outlet_id_map = {}
+    for outlet_id, (_, group) in enumerate(MODL_DB.drop_duplicates(['station_id','source']).groupby(by=['opnids','repo_name'])):
+        for idx in group.index:
+            outlet_id_map[idx] = int(outlet_id)
+    MODL_DB['outlet_id'] = MODL_DB.index.map(outlet_id_map)
 def split_opnids(opnids: list):
@@ -144,7 +159,7 @@ def get_outlets_by_reach(reach_id: int, model_name: str):
             """,
         [reach_id, model_name]).fetchdf()
     return df
 def get_outlets_by_station(station_id: str, station_origin: str):
     """
     Return all outlet rows for outlets that include the given reach_id in the given model_name.
@@ -160,6 +175,47 @@ def get_outlets_by_station(station_id: str, station_origin: str):
         [station_id, station_origin]).fetchdf()
     return df
+def get_station_opnids(station_id: str, station_origin: str):
+    """
+    Return all model reach IDs (opnids) associated with the given station ID and origin.
+    """
+    with connect(DB_PATH) as con:
+        df = con.execute(
+        """
+        SELECT r.reach_id
+        FROM outlets.station_reach_pairs r
+        WHERE r.station_id = ? AND r.station_origin = ?
+        """,
+        [station_id, station_origin]).fetchdf()
+    return df['reach_id'].tolist()
+def get_outlet_opnids(outlet_id: int):
+    """
+    Return all model reach IDs (opnids) associated with the given outlet ID.
+    """
+    with connect(DB_PATH) as con:
+        df = con.execute(
+        """
+        SELECT r.reach_id
+        FROM outlets.station_reach_pairs r
+        WHERE r.outlet_id = ?
+        """,
+        [outlet_id]).fetchdf()
+    return list(set(df['reach_id'].tolist()))
+def get_outlet_stations(outlet_id: int):
+    """
+    Return all station IDs and origins associated with the given outlet ID.
+    """
+    with connect(DB_PATH) as con:
+        df = con.execute(
+        """
+        SELECT r.station_id, r.station_origin
+        FROM outlets.station_reach_pairs r
+        WHERE r.outlet_id = ?
+        """,
+        [outlet_id]).fetchdf()
+    return df[['station_id', 'station_origin']].drop_duplicates().to_dict(orient='records')
 class OutletGateway:
@@ -179,7 +235,7 @@ class OutletGateway:
         return equis_station_opnids(self.model_name)
     def station_opnids(self):
-        return station_opnids(self.model_name)
+        return mapped_station_opnids(self.model_name)
     def equis_stations(self):
         return equis_stations(self.model_name)
@@ -207,6 +263,12 @@ class OutletGateway:
         assert(station_id in self.wiski_stations() + self.equis_stations()), f"Station ID {station_id} not found in model {self.model_name}"
         return get_outlets_by_station(station_id, station_origin)
+    def get_outlet_opnids(self, outlet_id: int):
+        return get_outlet_opnids(outlet_id)
+    def get_outlet_stations(self, outlet_id: int):
+        return get_outlet_stations(outlet_id)
 # constructors:
 def build_outlet_db(db_path: str = None):
     if db_path is None:
@@ -222,31 +284,15 @@ def build_outlets(con, model_name: str = None):
     else:
         modl_db = MODL_DB
-    for index, (_, group) in enumerate(modl_db.drop_duplicates(['station_id','source']).groupby(by = ['opnids','repo_name'])):
-        repo_name = group['repo_name'].iloc[0]
-        add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
+    for outlet_id in modl_db['outlet_id'].unique():
+        group = modl_db.query('outlet_id == @outlet_id')
+        repo_name = group['repo_name'].iloc[0]
+        add_outlet(con, outlet_id = int(outlet_id), outlet_name = None, repository_name = repo_name, notes = None)
         opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
         for opnid in opnids:
-            add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
-        for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
-            add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
-def create_outlet_schema(con, model_name : str):
-    for index, (_, group) in enumerate(outlets(model_name)):
-        repo_name = group['repo_name'].iloc[0]
-        add_outlet(con, outlet_id = index, outlet_name = None, repository_name = repo_name, notes = None)
-        opnids = set(split_opnids(group['opnids'].str.split(',').to_list()))
-        for opnid in opnids:
-            add_reach(con, outlet_id = index, reach_id = int(opnid), repository_name = repo_name)
+            add_reach(con, outlet_id = int(outlet_id), reach_id = int(opnid), repository_name = repo_name)
         for _, row in group.drop_duplicates(subset=['station_id', 'source']).iterrows():
-            add_station(con, outlet_id = index, station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
+            add_station(con, outlet_id = int(outlet_id), station_id = row['station_id'], station_origin = row['source'], true_opnid = row['true_opnid'], repository_name= repo_name, comments = row['comments'])
 def add_outlet(con,

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/swd.py RENAMED Viewed

@@ -26,19 +26,21 @@ CONSTITUENT_MAP = {i[0]:i[1] for i in EQUIS_PARAMETER_XREF[['PARAMETER','constit
 #     return df
 import requests
-def _download(station_no):
+def _download(station_id):
     # Replace {station_no} in the URL with the actual station number
-    url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
+    #url = f"https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=json"
+    url = 'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results'
     try:
         # Send a GET request to the URL
-        response = requests.get(url)
+        params = {
+            'stationId': station_id,
+            'format': 'json'
+        }
+        response = requests.get(url,params = params)
         response.raise_for_status()  # Raise exception for HTTP errors
         # Parse the JSON data
-        if response.json()['recordCount'] == 0:
-            return pd.DataFrame(columns = response.json()['column_names'])
-        else:
-            return pd.DataFrame(response.json()['data'])
+        return pd.DataFrame(response.json()['data'])
     except requests.exceptions.RequestException as e:
         print(f"An error occurred: {e}")
@@ -46,14 +48,18 @@ def _download(station_no):
-def download(station_no):
+def download(station_ids):
     #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')
-    df = _download(station_no)
-    if df.empty:
-        return df
-    else:
-        df['station_id'] = station_no
-        return transform(df)
+    dfs = []
+    for station_id in station_ids:
+        df = _download(station_id)
+        if not df.empty:
+            df['station_id'] = station_id
+            dfs.append(df)
+    return pd.concat(dfs, ignore_index=True)
 def info(station_no):
     #df = pd.read_csv(f'https://services.pca.state.mn.us/api/v1/surfacewater/monitoring-stations/results?stationId={station_no}&format=csv')

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse.py RENAMED Viewed

@@ -28,6 +28,23 @@ def init_db(db_path: str,reset: bool = False):
+def validate_schemas(con: duckdb.DuckDBPyConnection):
+    """Validate that the database has the expected schemas and tables."""
+    expected_schemas = {'staging', 'analytics', 'mappings', 'outlets', 'reports'}
+    result = con.execute("SELECT schema_name FROM information_schema.schemata").fetchall()
+    existing_schemas = {row[0] for row in result}
+    missing_schemas = expected_schemas - existing_schemas
+    if missing_schemas:
+        raise ValueError(f"Missing schemas: {missing_schemas}")
+def validate_tables(con: duckdb.DuckDBPyConnection, schema: str, expected_tables: set):
+    """Validate that a schema contains the expected tables."""
+    result = con.execute(f"SELECT table_name FROM information_schema.tables WHERE table_schema = ?", [schema]).fetchall()
+    existing_tables = {row[0] for row in result}
+    missing_tables = expected_tables - existing_tables
+    if missing_tables:
+        raise ValueError(f"Missing tables in {schema} schema: {missing_tables}")
 def create_schemas(con: duckdb.DuckDBPyConnection):
     """Create staging, analytics, hspf, and reports schemas if they do not exist."""
     con.execute(sql_loader.get_schemas_sql())
@@ -96,12 +113,49 @@ def create_mapping_tables(con: duckdb.DuckDBPyConnection):
     else:
         print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
-def create_outlets_tables(con: duckdb.DuckDBPyConnection):
+def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
+    """
+    Attach an external DuckDB database containing outlet definitions.
+    """
+    create_schemas(con)
+    con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
+    tables = con.execute("SHOW TABLES FROM outlets_db").fetchall()
+    print(f"Tables in the source database: {tables}")
+    for table in tables:
+        table_name = table[0]  # Extract table name
+        con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM outlets_db.{table_name}")  # Copy table contents
+    # -- Step 2: Copy all views --
+    # Retrieve the list of views in the source database
+    views = con.execute("SHOW VIEWS FROM outlets_db").fetchall()
+    print(f"Views in the source database: {views}")
+    # Copy each view from source to destination
+    for view in views:
+        view_name = view[0]  # Extract view name
+        # Get the CREATE VIEW statement for the view
+        create_view_sql = con.execute(f"SHOW CREATE VIEW outlets_db.{view_name}").fetchone()[0]
+        # Recreate the view in the destination database (remove the `outlets_db.` prefix if exists)
+        create_view_sql = create_view_sql.replace(f"outlets_db.", "")
+        con.execute(create_view_sql)
+    con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
+    # Optional: Detach the source database
+    con.execute("DETACH 'outlets_db'")
+def create_outlets_tables(con: duckdb.DuckDBPyConnection, model_name: str = None):
     """Create tables in the outlets schema to define outlet-station-reach relationships."""
     con.execute(sql_loader.get_outlets_schema_sql())
     con.execute(sql_loader.get_views_outlets_sql())
-    outlets.build_outlets(con)
+    outlets.build_outlets(con, model_name=model_name)
 def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
     """Create a view filtering WISKI data based on specified data codes."""

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/warehouse_functions.py RENAMED Viewed

@@ -101,7 +101,8 @@ def download_wiski_data(
         if overwrite:
             warehouse.drop_station_data(con, station_ids, 'wiski')
         warehouse.add_df_to_table(con, df, 'staging', 'wiski')
-        warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
+        if not df_transformed.empty:
+            warehouse.add_df_to_table(con, df_transformed, 'analytics', 'wiski')
         warehouse.update_views(con)
     else:
         print('No data necessary for HSPF calibration from wiski for:', station_ids)
@@ -351,7 +352,7 @@ def station_reach_pairs(con: duckdb.DuckDBPyConnection):
     query = '''
     SELECT *,
     FROM
-        reports.station_reach_pairs
+        outlets.station_reach_pairs
     ORDER BY
         outlet_id,
         station_id

{mpcahydro-2.2.7 → mpcahydro-2.2.9}/src/mpcaHydro/wiski.py RENAMED Viewed

@@ -336,6 +336,14 @@ def filter_quality_codes(df, data_codes):
     '''
     return df.loc[df['quality_code'].isin(data_codes)]
+def filter_years(df, start_year=1996, end_year=None):
+    '''Filter Equis data to include only samples within a certain year range.'''
+    df = df[df['datetime'].dt.year >= start_year]
+    if end_year is not None:
+        df = df[df['datetime'].dt.year <= end_year]
+    return df
 def average_results(df):
     #df['datetime'] = pd.to_datetime(df.loc[:,'datetime'])
     df.loc[:,'datetime'] = df.loc[:,'datetime'].dt.round('h')
@@ -392,6 +400,7 @@ def transform(df, filter_qc_codes = True, data_codes = None, baseflow_method = '
             data_codes = DATA_CODES
         df = filter_quality_codes(df, data_codes)
     df = average_results(df)
+    df = filter_years(df, start_year=1996)
     df = calculate_baseflow(df, method = baseflow_method)
     df['station_origin'] = 'wiski'
     #df.set_index('datetime',inplace=True)

mpcahydro-2.2.9/tests/integration/test_dataManager.py ADDED Viewed

@@ -0,0 +1,61 @@
+#%% Imports
+from mpcaHydro.data_manager import dataManager
+from pathlib import Path
+import duckdb
+THIS_DIR = Path(__file__).parent
+WISKI_STATIONS = ['E05011002']
+EQUIS_STATIONS = ['S001-235','S005-115']
+#%%
+def test_build_warehouse():
+    dm = dataManager(THIS_DIR)
+    dm._build_warehouse()
+test_build_warehouse()
+# %%
+def test_equis_data_download():
+    dm = dataManager(THIS_DIR,
+                     oracle_username = 'MFRATKI',
+                     oracle_password = 'DeltaT#MPCA3',
+                     reset=True)
+    dm.connect_to_oracle()
+    dm._download_equis_data(EQUIS_STATIONS)
+test_equis_data_download()
+#%%
+def test_wiski_data_download():
+    dm = dataManager(THIS_DIR, reset=True)
+    dm._download_wiski_data(WISKI_STATIONS)
+test_wiski_data_download()
+#%%
+dm = dataManager(THIS_DIR, reset=False)
+with duckdb.connect(dm.db_path, read_only=True) as con:
+    df = con.execute('SELECT * FROM analytics.outlet_observations').fetch_df()
+    assert(df['outlet_id'].isnull().sum() == 0)
+with duckdb.connect(dm.db_path, read_only=True) as con:
+    df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
+    assert(df['outlet_id'].isnull().sum() == 0)
+    assert(df['value'].isnull().sum() == 0)
+# %%
+dm = dataManager(THIS_DIR, reset=False)
+def test_wiski_download():
+    dm = dataManager(THIS_DIR, reset=False)
+    wiski_stations = WISKI_STATIONS
+    dm._download_wiski_data(wiski_stations)
+    return dm
+test_wiski_download()
+with duckdb.connect(dm.db_path, read_only=True) as con:
+    df = con.execute('SELECT * FROM analytics.outlet_observations_with_flow').fetch_df()
+    assert(df['outlet_id'].isnull().sum() == 0)
+# %%

mpcahydro-2.2.9/tests/integration/test_warehouse.duckdb ADDED Viewed

Binary file

mpcahydro-2.2.9/tests/unit/test_equis.py ADDED Viewed

@@ -0,0 +1,19 @@
+#%%
+from mpcaHydro import equis
+from mpcaHydro import outlets
+#%%
+model_name = 'Rum'
+equis_stations = outlets.equis_stations(model_name)
+equis.connect('MFRATKI',password = 'DeltaT#MPCA3')
+df = equis.download(equis_stations)
+df_normalized = equis.normalize(df.copy())
+expected_columns = ['station_id', 'constituent', 'cas_rn', 'datetime', 'value', 'unit']
+assert all(col in df_normalized.columns for col in expected_columns)
+# %%

mpcahydro-2.2.7/tests/pixi.toml DELETED Viewed

@@ -1,25 +0,0 @@
-[workspace]
-channels   = ["https://prefix.dev/conda-forge"]
-platforms  = ["linux-64", "osx-64", "win-64"]
-[dependencies]
-requests    = "*"
-pandas       = "*"
-time = 			"*"
-pathlib = 		"*"
-spyder = "*"
-jupyter = "*"
-[package]
-name        = "mpcaHydro"
-version     = "0.1.0"
-[package.build]
-backend     = { name = "pixi-build-python", version = "0.1.*" }
-[package.run-dependencies]
-requests    = "*"
-pandas       = "*"
-time = 			"*"
-pathlib = 		"*"