PyPI - mpcaHydro - Versions diffs - 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl - Mend

mpcaHydro 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

mpcaHydro/data/{outlets.duckdb → outlet.duckdb} +0 -0
mpcaHydro/data/stations_EQUIS.gpkg +0 -0
mpcaHydro/data/stations_wiski.gpkg +0 -0
mpcaHydro/data_manager.py +105 -60
mpcaHydro/etlSWD.py +21 -15
mpcaHydro/outlets.py +70 -74
mpcaHydro/reports.py +1 -1
mpcaHydro/warehouse.py +276 -146
mpcaHydro/warehouseManager.py +8 -0
mpcaHydro/wiski.py +57 -5
{mpcahydro-2.2.0.dist-info → mpcahydro-2.2.1.dist-info}/METADATA +1 -1
mpcahydro-2.2.1.dist-info/RECORD +23 -0
mpcahydro-2.2.0.dist-info/RECORD +0 -23
{mpcahydro-2.2.0.dist-info → mpcahydro-2.2.1.dist-info}/WHEEL +0 -0

mpcaHydro/warehouse.py CHANGED Viewed

@@ -18,11 +18,14 @@ def init_db(db_path: str,reset: bool = False):
         # Create tables
         create_outlets_tables(con)
         create_mapping_tables(con)
+        create_staging_tables(con)
         create_analytics_tables(con)
         # Create views
-        #update_views(con)
+        update_views(con)
 def create_schemas(con: duckdb.DuckDBPyConnection):
     """
@@ -34,6 +37,100 @@ def create_schemas(con: duckdb.DuckDBPyConnection):
     con.execute("CREATE SCHEMA IF NOT EXISTS outlets")
     con.execute("CREATE SCHEMA IF NOT EXISTS mappings")
+def create_staging_tables(con: duckdb.DuckDBPyConnection):
+    '''
+    Create necessary tables in the staging schema. These were copied directly from database DDL. Would need to be updated if sources change.
+    '''
+    con.execute("""
+    CREATE TABLE IF NOT EXISTS staging.equis(
+                LATITUDE DOUBLE,
+                LONGITUDE DOUBLE,
+                WID_LIST VARCHAR,
+                SAMPLE_METHOD VARCHAR,
+                SAMPLE_REMARK VARCHAR,
+                FACILITY_ID BIGINT,
+                FACILITY_NAME VARCHAR,
+                FACILITY_TYPE VARCHAR,
+                SYS_LOC_CODE VARCHAR,
+                LOC_NAME VARCHAR,
+                LOC_TYPE VARCHAR,
+                LOC_TYPE_2 VARCHAR,
+                TASK_CODE VARCHAR,
+                SAMPLE_ID BIGINT,
+                SYS_SAMPLE_CODE VARCHAR,
+                TEST_ID BIGINT,
+                ANALYTE_TYPE VARCHAR,
+                ANALYTE_TYPE_DESC VARCHAR,
+                ANALYTIC_METHOD VARCHAR,
+                PREFERRED_NAME VARCHAR,
+                PARAMETER VARCHAR,
+                CAS_RN VARCHAR,
+                CHEMICAL_NAME VARCHAR,
+                GTLT VARCHAR,
+                RESULT_TEXT VARCHAR,
+                RESULT_NUMERIC DOUBLE,
+                RESULT_UNIT VARCHAR,
+                STAT_TYPE INTEGER,
+                VALUE_TYPE VARCHAR,
+                DETECT_FLAG VARCHAR,
+                DETECT_DESC VARCHAR,
+                RESULT_REMARK VARCHAR,
+                RESULT_TYPE_CODE VARCHAR,
+                METHOD_DETECTION_LIMIT VARCHAR,
+                REPORTING_DETECTION_LIMIT VARCHAR,
+                QUANTITATION_LIMIT INTEGER,
+                LAB_QUALIFIERS VARCHAR,
+                INTERPRETED_QUALIFIERS VARCHAR,
+                REPORTABLE_RESULT VARCHAR,
+                APPROVAL_CODE VARCHAR,
+                SENSITIVE_NOTPUBLIC VARCHAR,
+                TEST_TYPE VARCHAR,
+                DILUTION_FACTOR DOUBLE,
+                FRACTION VARCHAR,
+                BASIS VARCHAR,
+                TEMP_BASIS VARCHAR,
+                TEST_REMARK VARCHAR,
+                ANALYSIS_DATE_TIME TIMESTAMP_NS,
+                ANALYSIS_DATE VARCHAR,
+                ANALYSIS_TIME VARCHAR,
+                ANALYSIS_DATE_TIMEZONE VARCHAR,
+                COMPANY_NAME VARCHAR,
+                LAB_NAME_CODE VARCHAR,
+                LAB_SAMPLE_ID VARCHAR,
+                SAMPLE_TYPE_GROUP VARCHAR,
+                SAMPLE_TYPE_CODE VARCHAR,
+                SAMPLE_TYPE_DESC VARCHAR,
+                MEDIUM_CODE VARCHAR,
+                MATRIX_CODE VARCHAR,
+                START_DEPTH DOUBLE,
+                DEPTH_UNIT VARCHAR,
+                SAMPLE_DATE_TIME TIMESTAMP_NS,
+                SAMPLE_DATE VARCHAR,
+                SAMPLE_TIME VARCHAR,
+                SAMPLE_DATE_TIMEZONE VARCHAR,
+                EBATCH DOUBLE);
+    """)
+    con.execute("""
+    CREATE TABLE IF NOT EXISTS staging.wiski(
+                "Timestamp" VARCHAR,
+                "Value" DOUBLE,
+                "Quality Code" BIGINT,
+                "Quality Code Name" VARCHAR,
+                ts_unitsymbol VARCHAR,
+                ts_name VARCHAR,
+                ts_id VARCHAR,
+                station_no VARCHAR,
+                station_name VARCHAR,
+                station_latitude VARCHAR,
+                station_longitude VARCHAR,
+                parametertype_id VARCHAR,
+                parametertype_name VARCHAR,
+                stationparameter_no VARCHAR,
+                stationparameter_name VARCHAR,
+                wplmn_flag BIGINT);
+    """)
 def create_analytics_tables(con: duckdb.DuckDBPyConnection):
     """
     Create necessary tables in the analytics schema.
@@ -117,64 +214,51 @@ def create_mapping_tables(con: duckdb.DuckDBPyConnection):
     else:
             print(f"Warning: WISKI_QUALITY_CODES.csv not found at {wiski_qc_csv_path}")
+def attach_outlets_db(con: duckdb.DuckDBPyConnection, outlets_db_path: str):
+    """
+    Attach an external DuckDB database containing outlet definitions.
+    """
+    create_schemas(con)
+    con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
+    tables = con.execute("SHOW TABLES FROM outlets_db").fetchall()
+    print(f"Tables in the source database: {tables}")
+    for table in tables:
+        table_name = table[0]  # Extract table name
+        con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM outlets_db.{table_name}")  # Copy table contents
+    # -- Step 2: Copy all views --
+    # Retrieve the list of views in the source database
+    views = con.execute("SHOW VIEWS FROM outlets_db").fetchall()
+    print(f"Views in the source database: {views}")
+    # Copy each view from source to destination
+    for view in views:
+        view_name = view[0]  # Extract view name
+        # Get the CREATE VIEW statement for the view
+        create_view_sql = con.execute(f"SHOW CREATE VIEW outlets_db.{view_name}").fetchone()[0]
+        # Recreate the view in the destination database (remove the `outlets_db.` prefix if exists)
+        create_view_sql = create_view_sql.replace(f"outlets_db.", "")
+        con.execute(create_view_sql)
+    con.execute(f"ATTACH DATABASE '{outlets_db_path}' AS outlets_db;")
+    # Optional: Detach the source database
+    con.execute("DETACH 'outlets_db'")
 def create_outlets_tables(con: duckdb.DuckDBPyConnection):
     """
-    Create tables in the outlets schema to define outlet-station-reach relationships.
-    """
-    con.execute("""-- schema.sql
-            -- Simple 3-table design to manage associations between model reaches and observation stations via outlets.
-            -- Compatible with DuckDB and SQLite.
-            -- Table 1: outlets
-            -- Represents a logical grouping that ties stations and reaches together.
-            CREATE TABLE IF NOT EXISTS outlets.outlets (
-            outlet_id TEXT PRIMARY KEY,
-            repository_name TEXT NOT NULL,
-            outlet_name TEXT,
-            notes TEXT             -- optional: general notes about the outlet grouping
-            );
-            -- Table 2: outlet_stations
-            -- One-to-many: outlet -> stations
-            CREATE TABLE IF NOT EXISTS outlets.outlet_stations (
-            outlet_id TEXT NOT NULL,
-            station_id TEXT NOT NULL,
-            station_origin TEXT NOT NULL,       -- e.g., 'wiski', 'equis'
-            repository_name TEXT NOT NULL,  -- repository model the station is physically located in
-            true_opnid TEXT NOT NULL,           -- The specific reach the station physically sits on (optional)
-            comments TEXT,             -- Per-station comments, issues, etc.
-            CONSTRAINT uq_station_origin UNIQUE (station_id, station_origin),
-            FOREIGN KEY (outlet_id) REFERENCES outlets.outlets(outlet_id)
-            );
-            -- Table 3: outlet_reaches
-            -- One-to-many: outlet -> reaches
-            -- A reach can appear in multiple outlets, enabling many-to-many overall.
-            CREATE TABLE IF NOT EXISTS outlets.outlet_reaches (
-            outlet_id TEXT NOT NULL,
-            reach_id TEXT NOT NULL,    -- model reach identifier (aka opind)
-            repository_name TEXT NOT NULL,  -- optional: where the mapping comes from
-            exclude INTEGER DEFAULT 0, -- flag to indicate if this reach should be excluded (1) or included (0)
-            FOREIGN KEY (outlet_id) REFERENCES outlets.outlets(outlet_id)
-            );
-            -- Useful views:
-            -- View: station_reach_pairs
-            -- Derives the implicit many-to-many station <-> reach relationship via shared outlet_id
-            CREATE VIEW IF NOT EXISTS outlets.station_reach_pairs AS
-            SELECT
-            s.outlet_id,
-            s.station_id,
-            s.station_origin,
-            r.reach_id,
-            r.exclude,
-            r.repository_name,
-            FROM outlets.outlet_stations s
-            JOIN outlets.outlet_reaches r
-            ON s.outlet_id = r.outlet_id;
-          """)
+    Create tables in the outlets schema to define outlet-station-reach relationships.Copies from outlets module.
+    """
+    query = outlets.OUTLETS_SCHEMA
+    con.execute(query)
+    outlets.build_outlets(con)
 def create_normalized_wiski_view(con: duckdb.DuckDBPyConnection):
     """
@@ -210,7 +294,7 @@ def create_normalized_wiski_view(con: duckdb.DuckDBPyConnection):
         "Quality Code Name" AS quality_code_name,            -- Rename Quality Code Name to quality_code_name
         parametertype_id,                                    -- Keeps parametertype_id as is
         constituent                                          -- Keeps constituent as is
-    FROM staging.wiski_raw;""")
+    FROM staging.wiski;""")
 def create_filtered_wiski_view(con: duckdb.DuckDBPyConnection, data_codes: list):
@@ -254,17 +338,22 @@ def create_staging_qc_count_view(con: duckdb.DuckDBPyConnection):
     Create a view in staging schema that counts quality codes for each station and constituent.
     """
     con.execute("""
-    CREATE OR REPLACE VIEW staging.wiski_qc_count AS (
+        CREATE OR REPLACE VIEW reports.wiski_qc_count AS (
         SELECT
             w.station_no,
             w.parametertype_name,
             w."Quality Code",
-            w."Quality Code Name",
-            COUNT(w."Quality Code") AS count
-        FROM staging.wiski_raw w
+            COUNT(w."Quality Code") AS count,
+            wqc."Text",
+            wqc.Description,
+        FROM staging.wiski w
+        LEFT JOIN mappings.wiski_quality_codes wqc
+            ON w."Quality Code" = wqc.quality_code
+        WHERE wqc.Active = 1
         GROUP BY
-            w."Quality Code",w."Quality Code Name",w.parametertype_name, w.station_no
-                );
+            w."Quality Code",wqc."Text",wqc.Description,w.parametertype_name, w.station_no
+                );
         """)
     # ORDER BY
     #         w.station_no,w.parametertype_name, w."Quality Code"
@@ -283,7 +372,7 @@ def create_combined_observations_view(con: duckdb.DuckDBPyConnection):
     SELECT datetime,value,station_id,station_origin,constituent,unit
     FROM analytics.wiski;
     """)
 def create_outlet_observations_view(con: duckdb.DuckDBPyConnection):
     """
@@ -299,8 +388,9 @@ def create_outlet_observations_view(con: duckdb.DuckDBPyConnection):
         COUNT(o.value) AS count
     FROM
         analytics.observations AS o
-    LEFT JOIN
+    INNER JOIN
         outlets.outlet_stations AS os ON o.station_id = os.station_id AND o.station_origin = os.station_origin
+    WHERE os.outlet_id IS NOT NULL
     GROUP BY
         os.outlet_id,
         o.constituent,
@@ -316,51 +406,65 @@ def create_outlet_observations_view(con: duckdb.DuckDBPyConnection):
 def create_outlet_observations_with_flow_view(con: duckdb.DuckDBPyConnection):
     con.execute("""
-    CREATE OR REPLACE VIEW analytics.outlet_observations_with_flow AS
-        WITH baseflow_data AS (
-            SELECT
-                outlet_id,
-                datetime,
-                "value" AS baseflow_value
-            FROM
-                analytics.outlet_observations
-            WHERE
-                (constituent = 'QB')),
-        flow_data AS (
-            SELECT
-                outlet_id,
-                datetime,
-                "value" AS flow_value
-            FROM
-                analytics.outlet_observations
-            WHERE
-                (constituent = 'Q')),
-        constituent_data AS (
-            SELECT
-                outlet_id,
-                datetime,
-                constituent,
-                "value",
-                count
-            FROM
-                analytics.outlet_observations
-            WHERE
-                (constituent NOT IN ('Q', 'QB')))
-        SELECT
-            constituent_data.outlet_id,
-            constituent_data.constituent,
-            constituent_data.datetime,
-            constituent_data."value",
-            flow_data.flow_value,
-            baseflow_data.baseflow_value
-        FROM
-            constituent_data
-        FULL JOIN flow_data ON
-            (((constituent_data.outlet_id = flow_data.outlet_id)
-                AND (constituent_data.datetime = flow_data.datetime)))
-        LEFT JOIN baseflow_data ON
-            (((constituent_data.outlet_id = baseflow_data.outlet_id)
-                AND (constituent_data.datetime = baseflow_data.datetime)));""")
+                CREATE OR REPLACE VIEW analytics.outlet_observations_with_flow AS
+                WITH
+                -- Extract baseflow data (constituent = 'QB')
+                baseflow_data AS (
+                    SELECT
+                        outlet_id,
+                        datetime,
+                        "value" AS baseflow_value
+                    FROM
+                        analytics.outlet_observations
+                    WHERE
+                        constituent = 'QB'
+                ),
+                -- Extract flow data (constituent = 'Q')
+                flow_data AS (
+                    SELECT
+                        outlet_id,
+                        datetime,
+                        "value" AS flow_value
+                    FROM
+                        analytics.outlet_observations
+                    WHERE
+                        constituent = 'Q'
+                ),
+                -- Extract all other constituent data (not 'Q' or 'QB')
+                constituent_data AS (
+                    SELECT
+                        outlet_id,
+                        datetime,
+                        constituent,
+                        "value",
+                        count
+                    FROM
+                        analytics.outlet_observations
+                    WHERE
+                        constituent NOT IN ('Q', 'QB')
+                )
+                -- Final join: Only include rows that have baseflow, flow, and constituent data
+                SELECT
+                    c.outlet_id,
+                    c.constituent,
+                    c.datetime,
+                    c."value",
+                    c.count,
+                    f.flow_value,
+                    b.baseflow_value
+                FROM
+                    constituent_data AS c
+                LEFT JOIN
+                    flow_data AS f
+                    ON c.outlet_id = f.outlet_id
+                    AND c.datetime = f.datetime
+                LEFT JOIN
+                    baseflow_data AS b
+                    ON c.outlet_id = b.outlet_id
+                    AND c.datetime = b.datetime;""")
     # ORDER BY
     #     constituent_data.outlet_id,
     #     constituent_data.datetime;
@@ -390,10 +494,10 @@ def create_constituent_summary_report(con: duckdb.DuckDBPyConnection):
             # ORDER BY
             # constituent,sample_count;''')
 def create_outlet_summary_report(con: duckdb.DuckDBPyConnection):
     con.execute("""
-        CREATE VIEW reports.outlet_constituent_summary AS
+        CREATE OR REPLACE VIEW reports.outlet_constituent_summary AS
     SELECT
         outlet_id,
         constituent,
@@ -411,16 +515,6 @@ def create_outlet_summary_report(con: duckdb.DuckDBPyConnection):
     """)
-def drop_station_id(con: duckdb.DuckDBPyConnection, station_id: str,station_origin: str):
-    """
-    Drop all data for a specific station from staging and analytics schemas.
-    """
-    con.execute(f"DELETE FROM staging.equis_raw WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
-    con.execute(f"DELETE FROM staging.wiski_raw WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
-    con.execute(f"DELETE FROM analytics.equis WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
-    con.execute(f"DELETE FROM analytics.wiski WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
-    update_views(con)
 def update_views(con: duckdb.DuckDBPyConnection):
     """
@@ -444,16 +538,69 @@ def connect(db_path: str, read_only: bool = False) -> duckdb.DuckDBPyConnection:
     return duckdb.connect(database=db_path.as_posix(), read_only=read_only)
-def load_df_to_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, replace: bool = True):
+def drop_station_id(con: duckdb.DuckDBPyConnection, station_id: str,station_origin: str):
+    """
+    Drop all data for a specific station from staging and analytics schemas.
+    """
+    con.execute(f"DELETE FROM staging.equis WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
+    con.execute(f"DELETE FROM staging.wiski WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
+    con.execute(f"DELETE FROM analytics.equis WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
+    con.execute(f"DELETE FROM analytics.wiski WHERE station_id = '{station_id}' AND station_origin = '{station_origin}'")
+    update_views(con)
+def get_column_names(con: duckdb.DuckDBPyConnection, table_schema: str, table_name: str) -> list:
+    """
+    Get the column names of a DuckDB table.
+    """
+    #table_schema, table_name = table_name.split('.')
+    query = """
+    SELECT column_name
+    FROM information_schema.columns
+    WHERE table_name = ? AND table_schema = ?
+    """
+    result = con.execute(query,[table_name,table_schema]).fetchall()
+    column_names = [row[0] for row in result]
+    return column_names
+def add_to_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_schema: str, table_name: str):
+    """
+    Append a pandas DataFrame into a DuckDB table. This will create the table
+    if it does not exist.
+    """
+    # get existing columns
+    existing_columns = get_column_names(con, table_schema, table_name)
+    df = df[[existing_columns]]
+    # register pandas DF and create table if not exists
+    con.register("tmp_df", df)
+    con.execute(f"""
+        INSERT INTO {table_schema}.{table_name}
+        SELECT * FROM tmp_df
+    """)
+    con.unregister("tmp_df")
+def add_station_data(con: duckdb.DuckDBPyConnection, station_id: str, station_origin: str, table_schema: str, table_name: str, df: pd.DataFrame, replace: bool = False):
+    """
+    Add station data to the staging and analytics schemas.
+    """
+    if replace:
+        drop_station_id(con, station_id, station_origin)
+    add_to_table(con, df, table_schema, table_name)
+def load_df_to_table(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str):
     """
     Persist a pandas DataFrame into a DuckDB table. This will overwrite the table
     by default (replace=True).
     """
-    if replace:
-        con.execute(f"DROP TABLE IF EXISTS {table_name}")
     # register pandas DF and create table
     con.register("tmp_df", df)
-    con.execute(f"CREATE TABLE {table_name} AS SELECT * FROM tmp_df")
+    con.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM tmp_df")
     con.unregister("tmp_df")
 def load_df_to_staging(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str, replace: bool = True):
@@ -468,23 +615,6 @@ def load_df_to_staging(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_n
     con.execute(f"CREATE TABLE staging.{table_name} AS SELECT * FROM tmp_df")
     con.unregister("tmp_df")
-def add_df_to_staging(con: duckdb.DuckDBPyConnection, df: pd.DataFrame, table_name: str):
-    """
-    Append a pandas DataFrame into a staging table. This will create the staging
-    table if it does not exist.
-    """
-    # register pandas DF and create table if not exists
-    con.register("tmp_df", df)
-    con.execute(f"""
-        CREATE TABLE IF NOT EXISTS staging.{table_name} AS
-        SELECT * FROM tmp_df
-    """)
-    con.execute(f"""
-        INSERT INTO staging.{table_name}
-        SELECT * FROM tmp_df
-    """)
-    con.unregister("tmp_df")
 def load_csv_to_staging(con: duckdb.DuckDBPyConnection, csv_path: str, table_name: str, replace: bool = True, **read_csv_kwargs):
     """
     Persist a CSV file into a staging table. This will overwrite the staging
@@ -496,7 +626,7 @@ def load_csv_to_staging(con: duckdb.DuckDBPyConnection, csv_path: str, table_nam
         CREATE TABLE staging.{table_name} AS
         SELECT * FROM read_csv_auto('{csv_path}', {', '.join(f"{k}={repr(v)}" for k, v in read_csv_kwargs.items())})
     """)
 def load_parquet_to_staging(con: duckdb.DuckDBPyConnection, parquet_path: str, table_name: str, replace: bool = True):
     """
     Persist a Parquet file into a staging table. This will overwrite the staging

mpcaHydro/warehouseManager.py CHANGED Viewed

@@ -44,4 +44,12 @@ def construct_database(db_path:Path,db_name:str = 'observations')->Path:
     warehouse.init_db(warehouse_path=db_path)
+def create_normalized_wiski_view(con: duckdb.DuckDBPyConnection):
+    """
+    Create a view in the database that contains normalized WISKI data.
+    """
+    con.execute("""
+    CREATE OR REPLACE VIEW analytics.normalized_wiski AS
+    SELECT
+        *""")

mpcaHydro/wiski.py CHANGED Viewed

@@ -19,12 +19,9 @@ PARAMETERTYPE_MAP ={'11522': 'TP',
                     '11504': 'WT',
                     '11533': 'DO',
                     '11507':'WL'}
-#STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024'  ,'5025','5044' ,'5045']
-STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
 DATA_CODES = [1,3,10,12,15,20,29,30,31,32,34,45,46,47,48,49]
 TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
                                      'unit': '15.Rated'},
                          'External': {'daily': '20.Day.Mean',
@@ -62,7 +59,8 @@ TS_NAME_SELECTOR = {'Q':{'Internal':{'daily':'20.Day.Mean.Archive',
                         'External': {'daily': '20.Day.Mean',
                                     'unit': '08.Provisional.Edited'}}}
+#STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*','5034' ,'5035','5005', '5004','5014' ,'5015','5024'  ,'5025','5044' ,'5045']
+STATIONPARAMETER_NOS = ['262*','450*','451*','863*','866*']
 CONSTITUENT_NAME_NO = {'Q'  :['262*'],#,'263'],
                        'WT' :['450*', '451*'], # '450.42','451.42'],
@@ -74,6 +72,13 @@ CONSTITUENT_NAME_NO = {'Q'  :['262*'],#,'263'],
                        'N'  :None,
                        'TKN':None}
+STATIONPARAMETER_NOS_MAP = {'262*':'Q',
+                            '450*':'WT',
+                            '451*':'WT',
+                            '863*':'OP',
+                            '866*':'DO',
+                            '811*':'TRB'}
 CONSTITUENT_NAME_NO_WPLMN = {'Q'  :['262*'],#,'263'],
                        'WT' :['450*', '451*'], # '450.42','451.42'],
                        'OP' :['863*','5034' ,'5035'],
@@ -91,6 +96,38 @@ def test_connection():
     '''
     return pywisk.test_connection()
+def info(station_ids: list,constituent = None):
+    '''
+    Fetch metadata for given station IDs from WISKI database using the KISTERS API.
+    '''
+    if constituent is not None:
+        stationparameter_nos = CONSTITUENT_NAME_NO[constituent]
+    else:
+        stationparameter_nos = STATIONPARAMETER_NOS
+    df = pywisk.get_ts_ids(station_nos = station_ids,
+                            stationparameter_no = stationparameter_nos,
+                            ts_name = ['15.Rated','09.Archive','08.Provisional.Edited'])
+    df = normalize_columns(df)
+    # rows = []
+    # for station_id in df['station_id'].unique():
+    #     for constituent in df.loc[df['station_id'] == station_id,'constituent'].unique():
+    #         df_station_constituent = df.loc[(df['station_id'] == station_id) & (df['constituent'] == constituent) & df['ts_name'].isin(['15.Rated','09.Archive','08.Provisional.Edited'])]
+    #         if not df_station_constituent.empty:
+    #             if station_id.lower().startswith('e'):
+    #                 ts_names = TS_NAME_SELECTOR[constituent]['External']['unit']
+    #             else:
+    #                 ts_names = TS_NAME_SELECTOR[constituent]['Internal']['unit']
+    #             rows.append(df_station_constituent.loc[df_station_constituent['ts_name'] == ts_names,:])
+    return df
 def download(station_ids: list, start_year: int = 1996, end_year: int = 2030,wplmn: bool = False):
     '''
     Fetch data for given station IDs from WISKI database using the KISTERS API.
@@ -219,13 +256,28 @@ def convert_units(df):
     return df
+def map_constituents(df):
+    '''
+    Map stationparameter_no to constituent names
+    '''
+    def map_values(value):
+        for key, replacement in STATIONPARAMETER_NOS_MAP.items():
+            if value.startswith(key.rstrip('*')):  # Match prefix without the wildcard '*'
+                return replacement
+        return value  # If no match, return the original value
+    df['constituent'] = df['stationparameter_no'].apply(map_values)
+    return df
 def normalize_columns(df):
     '''
     Normalize column names and units
     '''
     # Map parameter numbers to constituent names
-    df['constituent'] = df['parametertype_id'].map(PARAMETERTYPE_MAP)
+    #df['constituent'] = df['stationparameter_no'].map(STATIONPARAMETER_NOS_MAP,regex=True)
+    df = map_constituents(df)
     df.rename(columns={
         'station_no':'station_id',
         'Timestamp':'datetime',

{mpcahydro-2.2.0.dist-info → mpcahydro-2.2.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mpcaHydro
-Version: 2.2.0
+Version: 2.2.1
 Summary: Python package for downloading MPCA hydrology data
 Project-URL: Homepage, https://github.com/mfratkin1/mpcaHydro
 Author-email: Mulu Fratkin <michael.fratkin@state.mn.us>

mpcaHydro 2.2.0__py3-none-any.whl → 2.2.1__py3-none-any.whl

mpcaHydro 2.2.0py3-none-any.whl → 2.2.1py3-none-any.whl