PyPI - kestrel-transects - Versions diffs - 1.0.0__tar.gz - Mend

kestrel-transects 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

kestrel_transects-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026 Mark J. Woodhouse (University of Bristol)
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

kestrel_transects-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,24 @@
+Metadata-Version: 2.4
+Name: kestrel_transects
+Version: 1.0.0
+Summary: Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects
+Author-email: "Mark J. Woodhouse" <mark.woodhouse@bristol.ac.uk>
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: click
+Requires-Dist: geopandas
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: netCDF4
+Requires-Dist: xarray
+Requires-Dist: tqdm
+Requires-Dist: scipy
+Provides-Extra: io-h5netcdf
+Requires-Dist: h5netcdf; extra == "io-h5netcdf"
+Requires-Dist: h5py; extra == "io-h5netcdf"
+Provides-Extra: excel
+Requires-Dist: openpyxl; extra == "excel"
+Dynamic: license-file

kestrel_transects-1.0.0/README.md ADDED Viewed

File without changes

kestrel_transects-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,37 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "kestrel_transects"
+version = "1.0.0"
+authors = [
+    { name="Mark J. Woodhouse", email="mark.woodhouse@bristol.ac.uk" },
+]
+description = "Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects"
+readme = "README.md"
+requires-python = ">=3.10"
+dependencies = [
+    "click",
+    "geopandas",
+    "numpy",
+    "pandas",
+    "netCDF4",
+    "xarray",
+    "tqdm",
+    "scipy",
+]
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "Operating System :: OS Independent",
+]
+[project.scripts]
+kestrel-transects-fluxes = "kestrel_transects.kestrel_transects:compute_fluxes"
+kestrel-transects-profiles = "kestrel_transects.kestrel_transects:compute_profiles"
+[project.optional-dependencies]
+# Optional IO backends for xarray — install one of these if you need additional netCDF/HDF5 I/O support
+io-h5netcdf = ["h5netcdf", "h5py"]
+# Optional library for writing Excel files from pandas/geopandas
+excel = ["openpyxl"]

kestrel_transects-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

kestrel_transects-1.0.0/src/kestrel_transects/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .kestrel_transects import get_transect_fluxes, get_transect_profile
2	+ from .plot import plot_flux

kestrel_transects-1.0.0/src/kestrel_transects/kestrel_transects.py ADDED Viewed

@@ -0,0 +1,549 @@
+import glob
+import os
+import re
+from pathlib import Path
+import click
+import geopandas as gpd
+import numpy as np
+import pandas as pd
+import xarray as xr
+from numpy.typing import NDArray
+from tqdm.autonotebook import tqdm
+from shapely.geometry import LineString
+import numpy as np
+import pandas as pd
+import geopandas as gpd
+from shapely.geometry import LineString, Point
+import rioxarray as rxr
+def thalweg_to_transect(
+    thalweg_file,
+    thalweg_dist=None,
+    max_transect_length=100,
+    DEM_file=None,
+    bank_height=10,
+):
+    # ------------------------------------------------------------
+    # 1. Load thalweg and build coordinate array
+    # ------------------------------------------------------------
+    thalweg = gpd.read_file(thalweg_file)
+    geom = thalweg.iloc[0].geometry
+    if thalweg_dist is not None:
+        d = np.arange(0, geom.length + thalweg_dist, thalweg_dist)
+        coords = np.array([(p.x, p.y) for p in geom.interpolate(d)])
+    else:
+        coords = np.asarray(geom.coords)
+    # ------------------------------------------------------------
+    # 2. Midpoints + normals (vectorized)
+    # ------------------------------------------------------------
+    dx = coords[1:, 0] - coords[:-1, 0]
+    dy = coords[1:, 1] - coords[:-1, 1]
+    pts = 0.5 * (coords[1:] + coords[:-1])  # midpoint of each segment
+    seg_len = np.sqrt(dx**2 + dy**2)
+    nx = dy / seg_len
+    ny = -dx / seg_len
+    # ------------------------------------------------------------
+    # 3. Compute transect endpoints (p1 → p2)
+    # ------------------------------------------------------------
+    ds = max_transect_length / 2
+    p1 = pts - ds * np.column_stack([nx, ny])
+    p2 = pts + ds * np.column_stack([nx, ny])
+    n_transects = len(p1)
+    if DEM_file is None:
+        # simple case: return transects as drawn
+        geoms = [LineString([tuple(a), tuple(b)]) for a, b in zip(p1, p2)]
+        return gpd.GeoDataFrame({"id": np.arange(n_transects)}, geometry=geoms, crs=thalweg.crs)
+    # ------------------------------------------------------------
+    # 4. Sample points along each transect (vectorized)
+    # shape → (n_transects, n_samples, 2)
+    # ------------------------------------------------------------
+    DEM = rxr.open_rasterio(DEM_file)
+    resolution = np.max(np.abs(DEM.rio.resolution()))
+    distances = np.arange(0, max_transect_length + resolution, resolution)
+    n_samples = len(distances)
+    # parameter t in [0,1]
+    t = (distances / max_transect_length).reshape(1, -1, 1)  # shape (1,n,1)
+    # Broadcast to compute all sample points
+    P1 = p1[:, np.newaxis, :]  # (T,1,2)
+    P2 = p2[:, np.newaxis, :]  # (T,1,2)
+    sample_pts = P1 + (P2 - P1) * t  # (T, S, 2)
+    xs = sample_pts[:, :, 0].ravel()
+    ys = sample_pts[:, :, 1].ravel()
+    # ------------------------------------------------------------
+    # 5. DEM interpolation (single batched call)
+    # ------------------------------------------------------------
+    elev_flat = DEM.interp(x=("z", xs), y=("z", ys)).values[0]
+    elev = elev_flat.reshape(n_transects, n_samples)
+    # ------------------------------------------------------------
+    # 6. Vectorized bank detection
+    # mid index = thalweg index
+    # ------------------------------------------------------------
+    mid = n_samples // 2
+    thr = elev[:, mid:mid + 1] + bank_height  # (T,1)
+    # left mask
+    left_mask = elev[:, :mid] >= thr  # (T, mid)
+    right_mask = elev[:, mid+1:] >= thr  # (T, n_samples-mid-1)
+    # left index: furthest True from right side
+    left_idx = np.where(
+        left_mask.any(axis=1),
+        left_mask[:, ::-1].argmax(axis=1) * -1 + (mid - 1),
+        0,
+    )
+    # right index: first True
+    right_idx = np.where(
+        right_mask.any(axis=1),
+        (mid + 1) + right_mask.argmax(axis=1),
+        n_samples - 1,
+    )
+    # distances of bank points along transect
+    left_ds = distances[left_idx]
+    right_ds = distances[right_idx]
+    # ------------------------------------------------------------
+    # 7. Vectorized reconstruction of bank endpoints
+    # ------------------------------------------------------------
+    tauL = (left_ds / max_transect_length).reshape(-1, 1)
+    tauR = (right_ds / max_transect_length).reshape(-1, 1)
+    bank_L = p1 + (p2 - p1) * tauL
+    bank_R = p1 + (p2 - p1) * tauR
+    # ------------------------------------------------------------
+    # 8. Build output LineStrings
+    # ------------------------------------------------------------
+    geoms = [LineString([tuple(bank_L[i]), tuple(bank_R[i])]) for i in range(n_transects)]
+    return gpd.GeoDataFrame({"id": np.arange(n_transects)}, geometry=geoms, crs=thalweg.crs)
+# def make_transect(data, pt0, pt1, ds=None, num_pts=100):
+#     t_vec = np.array([[pt1[0]-pt0[0]],[pt1[1]-pt0[1]]]) # vector along transect
+#     dist = np.linalg.norm(t_vec) # length of transect
+#     t_vec = t_vec / dist # normalize
+#     n_vec = np.array([[-t_vec[1]], [t_vec[0]]]) # normal vector to transect
+#     # Make distances along transect
+#     if ds is not None:
+#         s = np.arange(0, dist, ds)
+#     else:
+#         s = np.linspace(0, dist, num_pts, endpoint=True)
+#     # Get points along transect
+#     t = np.array([[pt0[0]],[pt0[1]]]) + t_vec*s
+#     xx = xr.DataArray(t[0,:], dims='s', coords={'s':s})
+#     yy = xr.DataArray(t[1,:], dims='s', coords={'s':s})
+#     # Get values along transect
+#     transect_values = data.interp(x=xx, y=yy, method="linear")
+#     # transect_values['s'] = s
+#     transect_values.attrs['normal_vector'] = n_vec
+#     transect_values.attrs['transect_vector'] = t_vec
+#     transect_values.attrs['transect_resolution'] = s[1]-s[0]
+#     return transect_values
+def make_transect(data: xr.Dataset, pt0, pt1, ds: float | None = None, num_pts: int = 100) -> xr.Dataset:
+    # vector along transect
+    t_vec = np.array([[pt1[0] - pt0[0]], [pt1[1] - pt0[1]]], dtype=float)
+    dist = float(np.linalg.norm(t_vec))
+    if dist == 0:
+        raise ValueError("Transect endpoints are identical; zero-length transect.")
+    t_vec /= dist
+    n_vec = np.array([[-t_vec[1, 0]], [t_vec[0, 0]]])
+    # distances along transect
+    s = np.arange(0.0, dist, ds, dtype=float) if ds is not None else np.linspace(0.0, dist, num_pts, endpoint=True)
+    base = np.array([[pt0[0]], [pt0[1]]], dtype=float)
+    t = base + t_vec * s  # shape (2, S)
+    xx = xr.DataArray(t[0, :], dims='s', coords={'s': s})
+    yy = xr.DataArray(t[1, :], dims='s', coords={'s': s})
+    # interpolate all requested variables
+    transect_values = data.interp(x=xx, y=yy, method="linear")
+    transect_values = transect_values.assign_coords(s=xx)  # ensure s is a proper coord
+    transect_values.attrs['normal_vector'] = n_vec
+    transect_values.attrs['transect_vector'] = t_vec
+    transect_values.attrs['transect_resolution'] = float(s[1] - s[0]) if len(s) > 1 else np.nan
+    return transect_values
+def compute_transect_flux(transect_values):
+    # Get velocity components
+    U = transect_values.x_velocity.values
+    V = transect_values.y_velocity.values
+    # Get flow depth
+    H = transect_values.flow_depth.values
+    # Get bulk density
+    rho = transect_values.density.values
+    # Get solids fraction
+    phi = transect_values.solids_fraction.values
+    # Get solid density
+    rho_s = transect_values.attrs['solids density']
+    # Get normal vector
+    nx = transect_values.attrs['normal_vector'][0]
+    ny = transect_values.attrs['normal_vector'][1]
+    # Get resolution
+    ds = transect_values.attrs['transect_resolution']
+    VolFlux = np.nansum(H*np.abs(U*nx + V*ny))*ds
+    MassFlux = np.nansum(rho*H*np.abs(U*nx + V*ny))*ds
+    SolidsVolFlux = np.nansum(phi*H*np.abs(U*nx + V*ny))*ds
+    SolidsMassFlux = rho_s*SolidsVolFlux
+    return {'VolumeFlux':VolFlux, 'MassFlux':MassFlux, 'SolidsVolumeFlux':SolidsVolFlux, 'SolidsMassFlux':SolidsMassFlux}
+def get_transect_profile(kestrel_file, transect_file, variable: str | list[str], resolution=1.0):
+    data = xr.load_dataset(kestrel_file)
+    epsg = data.attrs.get('crs_epsg', None)
+    if epsg is None:
+        raise ValueError("Dataset missing 'crs_epsg' attribute.")
+    transects = gpd.read_file(transect_file).to_crs(epsg=epsg)
+    # normalize variable list
+    variables = [variable] if isinstance(variable, str) else list(variable)
+    for v in variables:
+        if v not in data.variables:
+            raise ValueError(f"Variable {v!r} is not in dataset.")
+    data = data[variables]
+    frames = []
+    for k, g in transects.iterrows():
+        p = list(g.geometry.coords)
+        vals = make_transect(data, p[0], p[1], ds=resolution)
+        df = vals.to_dataframe().reset_index()  # has 's' and variables
+        df['transect_index'] = k
+        df['Time'] = data.attrs.get('time', None)
+        frames.append(gpd.GeoDataFrame(df, geometry=g.geometry, crs=transects.crs))
+    long_df = pd.concat(frames, ignore_index=True)
+    return gpd.GeoDataFrame(long_df, geometry='geometry', crs=transects.crs)
+# def get_transect_fluxes(kestrel_file, transect_shpfile):
+#     data = xr.load_dataset(kestrel_file)
+#     transects = gpd.read_file(transect_shpfile)
+#     transects = transects.to_crs(data.attrs['crs_epsg'])
+#     transects['Volume flux'] = np.nan
+#     transects['Mass flux'] = np.nan
+#     transects['Solids Volume flux'] = np.nan
+#     transects['Solids Mass flux'] = np.nan
+#     transects['Time'] = data.attrs['time']
+#     for k, g in transects.iterrows():
+#         p = g.geometry.coords
+#         values = make_transect(data, p[0], p[1], num_pts=100)
+#         fluxes = compute_transect_flux(values)
+#         transects.loc[k,'Volume flux'] = fluxes['VolumeFlux']
+#         transects.loc[k,'Mass flux'] = fluxes['MassFlux']
+#         transects.loc[k,'Solids Volume flux'] = fluxes['SolidsVolumeFlux']
+#         transects.loc[k,'Solids Mass flux'] = fluxes['SolidsMassFlux']
+#     return transects
+def get_transect_fluxes(kestrel_file, transect_shpfile):
+    data = xr.load_dataset(kestrel_file)
+    epsg = data.attrs.get('crs_epsg', None)
+    if epsg is None:
+        raise ValueError("Dataset missing 'crs_epsg' attribute.")
+    transects = gpd.read_file(transect_shpfile).to_crs(epsg)
+    transects['Volume flux'] = np.nan
+    transects['Mass flux'] = np.nan
+    transects['Solids Volume flux'] = np.nan
+    transects['Solids Mass flux'] = np.nan
+    transects['Time'] = data.attrs.get('time', None)
+    for k, g in transects.iterrows():
+        p = list(g.geometry.coords)
+        values = make_transect(data, p[0], p[1], num_pts=100)
+        fluxes = compute_transect_flux(values)
+        transects.loc[k, 'Volume flux'] = fluxes['VolumeFlux']
+        transects.loc[k, 'Mass flux'] = fluxes['MassFlux']
+        transects.loc[k, 'Solids Volume flux'] = fluxes['SolidsVolumeFlux']
+        transects.loc[k, 'Solids Mass flux'] = fluxes['SolidsMassFlux']
+    return transects
+def get_transect_profile(kestrel_file, transect_file, variable: str | list[str], resolution=1):
+    data = xr.load_dataset(kestrel_file)
+    transects = gpd.read_file(transect_file)
+    transects = transects.to_crs(data.attrs['crs_epsg'])
+    if isinstance(variable, str):
+        if variable not in data.variables:
+            raise ValueError(f'Variable {variable} is not in dataset')
+        variable = [variable]
+    else:
+        for v in variable:
+            if v not in data.variables:
+                raise ValueError(f'Variable {v} is not in dataset')
+    data = data[variable]
+    long_rows = []
+    for k, g in transects.iterrows():
+        p = g.geometry.coords
+        profile_values = make_transect(data, p[0], p[1], ds=resolution)
+        s_vals = profile_values['s'].values
+        row_geom = g.geometry
+        time_val = data.attrs['time']
+        for i, s in enumerate(s_vals):
+            row = {
+                'geometry': row_geom,
+                'transect_index': k,
+                's': float(s),
+                'Time': time_val
+            }
+            for v in variable:
+                row[v] = float(profile_values[v].values[i])
+            long_rows.append(row)
+    long_df = gpd.GeoDataFrame(long_rows, geometry='geometry', crs=transects.crs)
+    long_df = long_df.to_crs(data.attrs['crs_epsg'])
+    return long_df
+def arclength(x: NDArray, y: NDArray, a: float, b: float):
+    bounds = (x>=a) & (x<=b)
+    dydx = np.gradient(y[bounds], x[bounds])
+    integrand = np.sqrt(1 + dydx**2)
+    return np.trapezoid(integrand, x[bounds])
+@click.command()
+@click.option("-o", "--out_file", type=str, default="transect_data", help="name of output GeoPackage file (without extension)")
+@click.option("--excel", default=None, help="name of optional output Excel file (without extension)")
+@click.option("-q", "--quiet", default=False, is_flag=True, help="surpress output messages")
+@click.argument("input_dir", type=click.Path(exists=True))
+@click.argument("transects", type=str)
+def compute_fluxes(out_file, excel, quiet, input_dir, transects):
+    """Compute fluxes using kestrel netCDF results in dir across transects in transects.
+    transects should be a georeferenced vector file readable by geopandas (e.g. a shapefile or geopackage).
+    """
+    if not quiet:
+        print(f"Processing {input_dir} to extract data on transects in file {transects}.")
+        print(f"Data will be stored in as a GeoPackage in {out_file}.gpkg")
+        if excel is not None:
+            print(f"and as an Excel spreadsheet in file {excel}.xlsx")
+    file_pattern = re.compile(r'.*?(\d+).nc')
+    def get_order(file):
+        match = file_pattern.match(Path(file).name)
+        if not match:
+            return np.infty
+        return int(match.groups()[0])
+    snapshot_files = sorted(glob.glob(input_dir+'/*[0-9].nc'), key=get_order)
+    transect_data = []
+    if not quiet:
+        it = enumerate(pbar := tqdm(snapshot_files))
+    else:
+        it = enumerate(snapshot_files)
+    for j, f in it:
+        if not quiet:
+            pbar.set_description(f"Processing file: {f}")
+        this_transect = get_transect_fluxes(f, transects)
+        transect_data.append(this_transect)
+    fluxes_on_transects = pd.concat(transect_data)
+    fluxes_on_transects.to_file(f"{out_file}.gpkg", driver="GPKG")
+    if excel is not None:
+        fluxes_on_transects.to_excel(f"{excel}.xlsx")
+@click.command()
+@click.option("-r", "--resolution", type=float, default=1.0, show_default=True, help="Transect sampling resolution")
+@click.option("-o", "--out_file", type=str, default="transect_data", help="name of output GeoPackage file (without extension)")
+@click.option("--excel", default=None, help="name of optional output Excel file (without extension)")
+@click.option("-q", "--quiet", default=False, is_flag=True, help="surpress output messages")
+@click.option("--extrema", type=click.Choice(["on", "off", "only"], case_sensitive=False), default="off", show_default=True, help="Include min/max across s: 'on' adds _min/_max columns; 'only' saves only extrema per transect/time; 'off' disables this")
+@click.argument("input_dir", type=click.Path(exists=True))
+@click.argument("transects", type=str)
+@click.argument("var", nargs=-1, type=str)
+def compute_profiles(resolution, out_file, excel, quiet, extrema, input_dir, transects, var):
+    """Compute profiles using kestrel netCDF results in `input_dir` across `transects`.
+    The command reads kestrel NetCDF snapshots from `input_dir`, extracts transect profiles
+    across the provided transect vector (e.g. a shapefile or GeoPackage), and writes
+    the concatenated results to a GeoPackage and optionally an Excel workbook.
+    Parameters
+    ----------
+    resolution : float
+        Sampling resolution along each transect (units same as dataset). Default 1.0.
+    out_file : str
+        Base output filename (GeoPackage and Excel will use this prefix).
+    excel : str | None
+        Optional Excel filename (without extension) to also write results to.
+    quiet : bool
+        Suppress progress output when True.
+    extrema : {'on', 'off', 'only'}
+        Controls computation and output of per-transect-per-snapshot extrema (min/max):
+          - 'off'  : do not compute extrema (default)
+          - 'on'   : compute extrema and add <var>_min / <var>_max columns to the long-form table
+          - 'only' : save only a compact per-transect-per-time table that contains geometry,
+                     Time and only the <var>_min / <var>_max columns
+    input_dir : str
+        Directory containing kestrel NetCDF snapshots.
+    transects : str
+        Path to a georeferenced vector file containing transect geometries.
+    var : tuple[str]
+        One or more variable names to extract.
+    Notes
+    -----
+    - The in-memory output is a long-form GeoDataFrame: each row corresponds to a single
+      sample point along a transect and includes columns: geometry, transect_index, s, Time
+      and one column per requested variable.
+    - Many GIS formats do not preserve array/list types. The long-form table stores scalars
+      and exports cleanly. For array-preservation, prefer formats like Parquet or storing
+      arrays separately (e.g., .npy).
+    Examples
+    --------
+    CLI (single variable):
+        python -m kestrel_transects.kestrel_transects compute_profiles /path/to/snapshots transects.gpkg flow_depth
+    CLI (multiple variables + extrema):
+        python -m kestrel_transects.kestrel_transects \
+            compute_profiles /path/to/snapshots transects.gpkg flow_depth velocity --resolution 0.5 --extrema on
+    CLI (save only extrema per transect/time):
+        python -m kestrel_transects.kestrel_transects \
+            compute_profiles /path/to/snapshots transects.gpkg flow_depth velocity --extrema only
+    In-Python (formatting the long-form output into a pivot table):
+        # get_transect_profile returns the same long-format rows (useful for single snapshot)
+        profiles_long = get_transect_profile(kestrel_file, transect_file, ['flow_depth','velocity'], resolution=1.0)
+        # Pivot a single variable into a table with rows=transect_index and columns=s
+        flow_table = profiles_long.pivot(index='transect_index', columns='s', values='flow_depth')
+        # For multiple variables: melt then pivot, producing a MultiIndex column with (variable, s)
+        melted = profiles_long.melt(
+            id_vars=['transect_index', 's', 'Time', 'geometry'],
+            value_vars=['flow_depth', 'velocity'],
+            var_name='variable', value_name='value'
+        )
+        pivoted = melted.pivot_table(index='transect_index', columns=['variable', 's'], values='value')
+    """
+    if not quiet:
+        print(f"Processing {input_dir} to extract data on transects in file {transects}.")
+        print(f"Data will be stored in as a GeoPackage in {out_file}.gpkg")
+        if excel is not None:
+            print(f"and as an Excel spreadsheet in file {excel}.xlsx")
+    file_pattern = re.compile(r'.*?(\d+).nc')
+    def get_order(file):
+        match = file_pattern.match(Path(file).name)
+        if not match:
+            return np.infty
+        return int(match.groups()[0])
+    snapshot_files = sorted(glob.glob(input_dir+'/*[0-9].nc'), key=get_order)
+    transect_data = []
+    if not quiet:
+        it = enumerate(pbar := tqdm(snapshot_files))
+    else:
+        it = enumerate(snapshot_files)
+    for j, f in it:
+        if not quiet:
+            pbar.set_description(f"Processing file: {f}")
+        # var is now a tuple of variable names
+        variables = list(var)
+        this_transect = get_transect_profile(f, transects, variables, resolution=resolution)
+        transect_data.append(this_transect)
+    profiles_on_transects = pd.concat(transect_data)
+    if extrema.lower() in ("on", "only"):
+        # Compute per-transect-per-snapshot minima and maxima (group by transect_index and Time)
+        # compute minima and maxima; agg returns MultiIndex columns so flatten them
+        agg_df = profiles_on_transects.groupby(['transect_index', 'Time'])[list(var)].agg(['min', 'max'])
+        # flatten multiindex column tuples (var, stat) -> 'var_min' / 'var_max'
+        newcols = []
+        for c in agg_df.columns:
+            if isinstance(c, tuple):
+                newcols.append(f"{c[0]}_{c[1]}")
+            else:
+                newcols.append(c)
+        agg_df.columns = newcols
+        # move transect_index/Time back to columns
+        extrema_df = agg_df.reset_index()
+        # Add geometry (take the first geometry for each transect_index/Time pair)
+        meta = profiles_on_transects.groupby(['transect_index', 'Time']).agg({'geometry': 'first'}).reset_index()
+        extrema_df = pd.merge(extrema_df, meta, on=['transect_index', 'Time'])
+        if extrema.lower() == "on":
+            # Prepare extrema_df indexed by (transect_index, Time)
+            extrema_idx = extrema_df.set_index(['transect_index', 'Time'])[[c for v in var for c in (f"{v}_min", f"{v}_max")]]
+            # Join the per-transect-per-time extrema onto the long-form table
+            joined = profiles_on_transects.set_index(['transect_index', 'Time']).join(extrema_idx, how='left')
+            profiles_on_transects = joined.reset_index()
+        elif extrema.lower() == "only":
+            # Replace the output object with just the per-transect-per-time extrema (GeoDataFrame)
+            extrema_gdf = gpd.GeoDataFrame(extrema_df, geometry='geometry', crs=profiles_on_transects.crs)
+            profiles_on_transects = extrema_gdf
+    profiles_on_transects.to_file(f"{out_file}.gpkg", driver="GPKG")
+    if excel is not None:
+        profiles_on_transects.to_excel(f"{excel}.xlsx")
+# if __name__=="__main__":
+#     compute_fluxes()

kestrel_transects-1.0.0/src/kestrel_transects/plot/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+from .cm import (conc_colours, depo_colours, depth_colours, ero_colours,
+                 speed_colours)
+from .plot import plot_extrema, plot_flux

kestrel_transects-1.0.0/src/kestrel_transects/plot/cm.py ADDED Viewed

@@ -0,0 +1,66 @@
+from matplotlib.colors import LinearSegmentedColormap
+depth_colours = LinearSegmentedColormap.from_list('depth_cmap',
+                [
+                (209/255.0, 238/255.0 ,234/255.0, 0),
+                (209/255.0, 238/255.0 ,234/255.0, 1),
+                (209/255.0, 238/255.0 ,234/255.0, 1),
+                (168/255.0, 219/255.0 ,217/255.0, 1),
+                (133/255.0, 196/255.0 ,201/255.0, 1),
+                (104/255.0, 171/255.0 ,184/255.0, 1),
+                ( 79/255.0, 144/255.0 ,166/255.0, 1),
+                ( 59/255.0, 115/255.0 ,143/255.0, 1),
+                ( 42/255.0,  86/255.0 ,116/255.0, 1)
+                ],
+                N=100)
+speed_colours = LinearSegmentedColormap.from_list('speed_cmap',
+                [
+                    (252/255.0, 222/255.0, 156/255.0 , 1),
+                    (250/255.0, 164/255.0, 118/255.0 , 1),
+                    (240/255.0, 116/255.0, 110/255.0 , 1),
+                    (227/255.0,  79/255.0, 111/255.0 , 1),
+                    (220/255.0,  57/255.0, 119/255.0 , 1),
+                    (185/255.0,  37/255.0, 122/255.0 , 1),
+                    (124/255.0,  29/255.0, 111/255.0 , 1)
+                ],
+                N=100)
+conc_colours = LinearSegmentedColormap.from_list('conc_cmap',
+                [
+                    (127/255.0, 162/255.0, 163/255.0, 1),
+                    ( 97/255.0, 132/255.0, 133/255.0, 1),
+                    ( 80/255.0, 116/255.0, 117/255.0, 1),
+                    (114/255.0, 134/255.0, 123/255.0, 1),
+                    (155/255.0, 127/255.0, 104/255.0, 1),
+                    (196/255.0, 119/255.0,  87/255.0, 1),
+                    (147/255.0,  81/255.0,  22/255.0, 1)
+                ],
+                N=100)
+ero_colours = LinearSegmentedColormap.from_list('ero_cmap',
+                [
+                    (  8/255.0,  48/255.0, 107/255.0, 1),
+                    (  8/255.0,  81/255.0, 156/255.0, 1),
+                    ( 33/255.0, 113/255.0, 181/255.0, 1),
+                    ( 66/255.0, 146/255.0, 198/255.0, 1),
+                    (107/255.0, 174/255.0, 214/255.0, 1),
+                    (158/255.0, 202/255.0, 225/255.0, 1),
+                    (198/255.0, 219/255.0, 239/255.0, 1),
+                    (222/255.0, 235/255.0, 247/255.0, 1),
+                    (255/255.0, 255/255.0, 255/255.0, 0)
+                ],
+                N=100)
+depo_colours = LinearSegmentedColormap.from_list('depo_cmap',
+                [
+                    (255/255.0, 245/255.0, 240/255.0, 0),
+                    (254/255.0, 224/255.0, 210/255.0, 1),
+                    (252/255.0, 187/255.0, 161/255.0, 1),
+                    (251/255.0, 106/255.0,  74/255.0, 1),
+                    (239/255.0,  59/255.0,  44/255.0, 1),
+                    (203/255.0,  24/255.0,  29/255.0, 1),
+                    (165/255.0,  15/255.0,  21/255.0, 1),
+                    (103/255.0,   0/255.0,  13/255.0, 1)
+                ],
+                N=100)

kestrel_transects-1.0.0/src/kestrel_transects/plot/plot.py ADDED Viewed

@@ -0,0 +1,162 @@
+from pathlib import Path
+from typing import Union
+import geopandas as gpd
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.axes import Axes
+from matplotlib.colors import Colormap, LogNorm, Normalize, PowerNorm
+from .cm import (conc_colours, depo_colours, depth_colours, ero_colours,
+                 speed_colours)
+flux_vars = {
+    'Volume flux': {'label':'Volume flux', 'unit': r'm^{3}/s', 'cmap':plt.cm.plasma},
+    'Mass flux': {'label':'Mass flux', 'unit': 'kg/s', 'cmap':plt.cm.viridis},
+    'Solids Volume flux': {'label':'Solids Volume flux', 'unit': r'm^{3}/s', 'cmap':conc_colours},
+    'Solids Mass flux': {'label':'Solids Mass flux', 'unit': 'kg/s', 'cmap':conc_colours},
+}
+extrema_vars = {
+    'flow_depth_min': {'label':'Minimum flow depth', 'unit':'m', 'cmap': depth_colours},
+    'flow_depth_max': {'label':'Maximum flow depth', 'unit':'m', 'cmap': depth_colours},
+    'flow_speed_min': {'label':'Minimum flow speed', 'unit':'m/s', 'cmap': speed_colours},
+    'flow_speed_max': {'label':'Maximum flow speed', 'unit':'m/s', 'cmap': speed_colours},
+    'solids_fraction_min': {'label':'Minimum solids fraction', 'unit':'', 'cmap': conc_colours},
+    'solids_fraction_max': {'label':'Maximum solids fraction', 'unit':'', 'cmap': conc_colours},
+    'elevation_change_min': {'label':'Minimum elevation change', 'unit':'', 'cmap': ero_colours},
+    'elevation_change_max': {'label':'Maximum elevation change', 'unit':'', 'cmap': depo_colours},
+}
+def plot_flux(fluxes_file: Path, transects_file: Path, *,
+              var: str = 'Mass flux',
+              ax: Axes | None = None,
+              cmap: Union[Colormap, str] | None = "viridis",
+              norm: object = Normalize(),
+              vmin: float = 0,
+              vmax: float | None = None) -> None:
+    """
+        var can be one of ['Volume flux', 'Mass flux', 'Solids Volume flux', 'Solids Mass flux']
+    Raises:
+        ValueError: _description_
+    """
+    fluxes = gpd.read_file(fluxes_file)
+    if var not in fluxes.columns:
+        raise ValueError(f"Variable var={var} not found in columns of {fluxes_file}. \n Recognized variables are {list(fluxes.columns)}")
+    transects = gpd.read_file(transects_file)
+    transect_centroid = transects.centroid
+    transect_distance = transect_centroid.distance(transect_centroid.loc[0]) # type: ignore
+    time = fluxes.Time.unique()
+    flux_data = np.nan*np.ones((transect_distance.size, time.size))
+    for it, t in enumerate(time):
+         for i in  fluxes.id.unique():
+              try:
+                  flux_data[i,it] = float(fluxes.loc[(fluxes.id==i) & (fluxes.Time==t), var].iloc[0]) # type: ignore
+              except:
+                  pass
+    flux_data = np.ma.masked_less(flux_data, vmin)
+    if ax is None:
+        fig, ax = plt.subplots()
+    if cmap is None:
+        cmap = flux_vars[var]['cmap']
+    if isinstance(cmap, str):
+        cmap = plt.colormaps.get_cmap(cmap)
+    if vmax is None:
+        vmax = fluxes[var].max()
+    if isinstance(norm, LogNorm):
+        if np.isclose(vmin,0.0):
+            vmin = np.finfo(float).eps*10
+        norm.vmin=vmin
+        norm.vmax=vmax
+    else:
+        norm.vmin=vmin # type: ignore
+        norm.vmax=vmax # type: ignore
+    img = ax.imshow(flux_data,
+                    extent=(time.min(), time.max(), transect_distance.max(), transect_distance.min()),
+                    norm=norm, # type: ignore
+                    cmap=cmap)
+    ax.set_xlabel('Time (s)')
+    ax.set_ylabel('Downstream distance (m)')
+    cbar = plt.colorbar(img)
+    cbar.set_label(f'{flux_vars[var]['label']} ({flux_vars[var]['unit']})')
+def plot_extrema(extrema_file: Path, transects_file: Path, *,
+              var: str = 'flow depth max',
+              ax: Axes | None = None,
+              cmap: Union[Colormap, str] | None = None,
+              norm: object = Normalize(),
+              vmin: float = 0,
+              vmax: float | None = None) -> None:
+    extrema = gpd.read_file(extrema_file)
+    if var not in extrema.columns:
+        raise ValueError(f"Variable var={var} not found in columns of {extrema_file}. \n Recognized variables are {list(extrema.columns)}")
+    transects = gpd.read_file(transects_file)
+    transect_centroid = transects.centroid
+    transect_distance = transect_centroid.distance(transect_centroid.loc[0]) # type: ignore
+    time = extrema.Time.unique()
+    extrema_data = np.zeros((transect_distance.size, time.size))
+    for it, t in enumerate(time):
+         for i in  extrema.transect_index.unique():
+              try:
+                  extrema_data[i,it] = float(extrema.loc[(extrema.transect_index==i) & (extrema.Time==t), var].iloc[0]) # type: ignore
+              except:
+                  pass
+    if ax is None:
+        fig, ax = plt.subplots()
+    if cmap is None:
+        cmap = extrema_vars[var]['cmap']
+    if isinstance(cmap, str):
+        cmap = plt.colormaps.get_cmap(cmap)
+    if vmax is None:
+        vmax = extrema[var].max()
+    if isinstance(norm, LogNorm):
+        if np.isclose(vmin,0.0):
+            vmin = np.finfo(float).eps*10
+        norm.vmin=vmin
+        norm.vmax=vmax
+    else:
+        norm.vmin=vmin # type: ignore
+        norm.vmax=vmax # type: ignore
+    ax.set_xlabel('Time (s)')
+    ax.set_ylabel('Downstream distance (m)')
+    img = ax.imshow(extrema_data,
+                    extent=(time.min(), time.max(), transect_distance.max(), transect_distance.min()),
+                    norm=norm, # type: ignore
+                    cmap=cmap)
+    cbar = plt.colorbar(img)
+    cbar.set_label(f'{extrema_vars[var]['label']} ({extrema_vars[var]['unit']})')

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,24 @@
+Metadata-Version: 2.4
+Name: kestrel_transects
+Version: 1.0.0
+Summary: Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects
+Author-email: "Mark J. Woodhouse" <mark.woodhouse@bristol.ac.uk>
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: click
+Requires-Dist: geopandas
+Requires-Dist: numpy
+Requires-Dist: pandas
+Requires-Dist: netCDF4
+Requires-Dist: xarray
+Requires-Dist: tqdm
+Requires-Dist: scipy
+Provides-Extra: io-h5netcdf
+Requires-Dist: h5netcdf; extra == "io-h5netcdf"
+Requires-Dist: h5py; extra == "io-h5netcdf"
+Provides-Extra: excel
+Requires-Dist: openpyxl; extra == "excel"
+Dynamic: license-file

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,14 @@
+LICENSE
+README.md
+pyproject.toml
+src/kestrel_transects/__init__.py
+src/kestrel_transects/kestrel_transects.py
+src/kestrel_transects.egg-info/PKG-INFO
+src/kestrel_transects.egg-info/SOURCES.txt
+src/kestrel_transects.egg-info/dependency_links.txt
+src/kestrel_transects.egg-info/entry_points.txt
+src/kestrel_transects.egg-info/requires.txt
+src/kestrel_transects.egg-info/top_level.txt
+src/kestrel_transects/plot/__init__.py
+src/kestrel_transects/plot/cm.py
+src/kestrel_transects/plot/plot.py

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/entry_points.txt ADDED Viewed

@@ -0,0 +1,3 @@
+[console_scripts]
+kestrel-transects-fluxes = kestrel_transects.kestrel_transects:compute_fluxes
+kestrel-transects-profiles = kestrel_transects.kestrel_transects:compute_profiles

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,15 @@
+click
+geopandas
+numpy
+pandas
+netCDF4
+xarray
+tqdm
+scipy
+[excel]
+openpyxl
+[io-h5netcdf]
+h5netcdf
+h5py

kestrel_transects-1.0.0/src/kestrel_transects.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ kestrel_transects