PyPI - rashdf - Versions diffs - 0.9.0__tar.gz → 0.10.0__tar.gz - Mend

rashdf 0.9.0tar.gz → 0.10.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

{rashdf-0.9.0 → rashdf-0.10.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rashdf
-Version: 0.9.0
+Version: 0.10.0
 Summary: Read data from HEC-RAS HDF files.
 Project-URL: repository, https://github.com/fema-ffrd/rashdf
 Classifier: Development Status :: 4 - Beta

{rashdf-0.9.0 → rashdf-0.10.0}/pyproject.toml RENAMED Viewed

@@ -12,7 +12,7 @@ classifiers = [
     "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.13",
 ]
-version = "0.9.0"
+version = "0.10.0"
 dependencies = ["h5py", "geopandas>=1.0,<2.0", "pyarrow", "xarray<=2025.4.0"]
 [project.optional-dependencies]

{rashdf-0.9.0 → rashdf-0.10.0}/src/rashdf/plan.py RENAMED Viewed

@@ -1779,3 +1779,151 @@ class RasPlanHdf(RasGeomHdf):
         """
         ds = self.reference_points_timeseries_output()
         return self._zmeta(ds)
+    def reference_lines_flow(self, use_names: bool = False) -> DataFrame:
+        """Return wide-format DataFrame for reference lines timeseries flow data.
+        Parameters
+        ----------
+        use_names : bool, optional
+            (Default) If False, use reference line IDs as column headers.
+            If True, use reference line names as column headers.
+        Returns
+        -------
+        DataFrame
+            Wide-format DataFrame with time as index and reference line IDs (or names) as columns.
+        """
+        ds = self.reference_lines_timeseries_output()
+        return self._timeseries_to_wide_dataframe(
+            ds=ds,
+            var="Flow",
+            id_column="refln_id",
+            name_column="refln_name",
+            mesh_column="mesh_name",
+            use_names_as_col=use_names,
+        )
+    def reference_points_stage(self, use_names: bool = False) -> DataFrame:
+        """Return Wide-format DataFrame for reference points timeseries stage data.
+        Parameters
+        ----------
+        use_names : bool, optional
+            (Default) If False, use reference point IDs as column headers.
+            If True, use reference point names as column headers.
+        Returns
+        -------
+        DataFrame
+            Wide-format DataFrame with time as index and reference point IDs (or names) as columns.
+        """
+        ds = self.reference_points_timeseries_output()
+        return self._timeseries_to_wide_dataframe(
+            ds=ds,
+            var=WATER_SURFACE,
+            id_column="refpt_id",
+            name_column="refpt_name",
+            mesh_column="mesh_name",
+            use_names_as_col=use_names,
+        )
+    def bc_lines_flow(self, use_names: bool = False) -> DataFrame:
+        """Return wide-format DataFrame for boundary condition lines timeseries flow data with.
+        Parameters
+        ----------
+        use_names : bool, optional
+            (Default) If False, use BC line IDs as column headers.
+            If True, use BC line names as column headers.
+        Returns
+        -------
+        DataFrame
+            Wide-format DataFrame with time as index and BC line IDs (or names) as columns.
+        """
+        ds = self.bc_lines_timeseries_output()
+        return self._timeseries_to_wide_dataframe(
+            ds=ds,
+            var="Flow",
+            id_column="bc_line_id",
+            name_column="bc_line_name",
+            mesh_column="mesh_name",
+            use_names_as_col=use_names,
+        )
+    def _timeseries_to_wide_dataframe(
+        self,
+        ds: xr.Dataset,
+        var: str,
+        id_column: str,
+        name_column: str,
+        mesh_column: str,
+        use_names_as_col: bool = False,
+    ) -> DataFrame:
+        """Convert xarray timeseries Dataset to wide-format DataFrame with metadata.
+        Parameters
+        ----------
+        ds : xr.Dataset
+            xarray Dataset containing timeseries data
+        var : str
+            Variable name to extract (e.g. "Flow", "Water Surface")
+        id_column : str
+            ID column name for pivoting (e.g. "refln_id", "refpt_id", "bc_line_id")
+        name_column : str
+            Name column for creating readable column names (e.g. "refln_name", "refpt_name")
+        mesh_column : str
+            Mesh column name (e.g. "mesh_name")
+        use_names_as_col : bool, optional
+            (Default) If False, use IDs.
+            If True, use names as column headers.
+        Returns
+        -------
+        DataFrame
+            Wide-format DataFrame with time as index and IDs or names as columns.
+            Metadata stored in DataFrame.attrs including name and mesh mappings.
+        """
+        if var not in ds:
+            raise ValueError(f"{var} data not found in timeseries output")
+        df = ds[var].to_dataframe().dropna().reset_index()
+        # check for duplicate names when using names as columns
+        if use_names_as_col:
+            unique_names = df[name_column].nunique()
+            unique_ids = df[id_column].nunique()
+            if unique_names < unique_ids:  # should have one name for every one id
+                name_counts = (
+                    df[[id_column, name_column]]
+                    .drop_duplicates()[name_column]
+                    .value_counts()
+                )
+                duplicates = name_counts[name_counts > 1].index.tolist()
+                raise ValueError(
+                    f"Cannot use names as columns. The following names are not unique: {duplicates}. "
+                )
+        pivot_column = name_column if use_names_as_col else id_column
+        wide_df = df.pivot(index="time", columns=pivot_column, values=var)
+        lookup = df[[id_column, name_column, mesh_column]].drop_duplicates()
+        if use_names_as_col:
+            # when using names as columns, key=name -> value=id
+            id_mapping = lookup.set_index(name_column)[id_column].to_dict()
+            mesh_mapping = lookup.set_index(name_column)[mesh_column].to_dict()
+        else:
+            # when using IDs as columns, key=id -> value=name
+            id_mapping = lookup.set_index(id_column)[name_column].to_dict()
+            mesh_mapping = lookup.set_index(id_column)[mesh_column].to_dict()
+        wide_df.attrs = {
+            "variable": var,
+            "units": ds[var].attrs.get("units", None),
+            "hdf_path": ds[var].attrs.get("hdf_path", None),
+            "id_mapping": id_mapping,
+            "mesh_mapping": mesh_mapping,
+        }
+        return wide_df

{rashdf-0.9.0 → rashdf-0.10.0}/src/rashdf.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: rashdf
-Version: 0.9.0
+Version: 0.10.0
 Summary: Read data from HEC-RAS HDF files.
 Project-URL: repository, https://github.com/fema-ffrd/rashdf
 Classifier: Development Status :: 4 - Beta

{rashdf-0.9.0 → rashdf-0.10.0}/tests/test_plan.py RENAMED Viewed

@@ -748,3 +748,89 @@ def test_bc_lines_include_output_true():
             plan_hdf.bc_lines(include_output=True, datetime_to_str=True),
             bc_lines_with_output_json,
         )
+def test_reference_lines_flow(tmp_path: Path):
+    plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
+    df = plan_hdf.reference_lines_flow()
+    assert df.index.name == "time"
+    assert df.shape == (37, 4)
+    assert list(df.columns) == [0, 1, 2, 3]
+    # Check metadata
+    assert df.attrs["variable"] == "Flow"
+    assert df.attrs["units"] == "cfs"
+    # Check mappings
+    assert "id_mapping" in df.attrs
+    assert len(df.attrs["id_mapping"]) == 4
+    assert "mesh_mapping" in df.attrs
+    assert len(df.attrs["mesh_mapping"]) == 4
+    df_refln2 = df[2].to_frame(name="Flow")
+    valid_df = pd.read_csv(
+        TEST_CSV / "BaldEagleDamBrk.reflines.2.csv",
+        index_col="time",
+        parse_dates=True,
+        usecols=["time", "Flow"],
+        dtype={"Flow": np.float32},
+    )
+    assert_frame_equal(df_refln2, valid_df, check_dtype=False)
+def test_reference_points_stage(tmp_path: Path):
+    plan_hdf = RasPlanHdf(BALD_EAGLE_P18_REF)
+    df = plan_hdf.reference_points_stage()
+    assert df.index.name == "time"
+    assert df.shape == (37, 3)
+    assert list(df.columns) == [0, 1, 2]
+    # Check metadata
+    assert df.attrs["variable"] == "Water Surface"
+    assert df.attrs["units"] == "ft"
+    # Check mappings
+    assert "id_mapping" in df.attrs
+    assert len(df.attrs["id_mapping"]) == 3
+    assert "mesh_mapping" in df.attrs
+    assert len(df.attrs["mesh_mapping"]) == 3
+    df_refpt1 = df[1].to_frame(name="Water Surface")
+    valid_df = pd.read_csv(
+        TEST_CSV / "BaldEagleDamBrk.refpoints.1.csv",
+        index_col="time",
+        parse_dates=True,
+        usecols=["time", "Water Surface"],
+        dtype={"Water Surface": np.float32},
+    )
+    assert_frame_equal(df_refpt1, valid_df, check_dtype=False)
+def test_bc_lines_flow(tmp_path: Path):
+    plan_hdf = RasPlanHdf(LOWER_KANAWHA_P01_BC_LINES)
+    df = plan_hdf.bc_lines_flow()
+    assert df.index.name == "time"
+    assert df.shape == (577, 10)
+    # Check metadata
+    assert df.attrs["variable"] == "Flow"
+    assert df.attrs["units"] == "cfs"
+    # Check mappings
+    assert "id_mapping" in df.attrs
+    assert len(df.attrs["id_mapping"]) == 10
+    assert "mesh_mapping" in df.attrs
+    assert len(df.attrs["mesh_mapping"]) == 10
+    df_bcline7 = df[7].to_frame(name="Flow")
+    valid_df = pd.read_csv(
+        TEST_CSV / "LowerKanawha.p01.bclines.7.csv",
+        index_col="time",
+        parse_dates=True,
+        usecols=["time", "Flow"],
+        dtype={"Flow": np.float32},
+    )
+    assert_frame_equal(df_bcline7, valid_df, check_dtype=False)