kestrel-transects 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Mark J. Woodhouse (University of Bristol)
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: kestrel_transects
3
+ Version: 1.0.0
4
+ Summary: Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects
5
+ Author-email: "Mark J. Woodhouse" <mark.woodhouse@bristol.ac.uk>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: click
12
+ Requires-Dist: geopandas
13
+ Requires-Dist: numpy
14
+ Requires-Dist: pandas
15
+ Requires-Dist: netCDF4
16
+ Requires-Dist: xarray
17
+ Requires-Dist: tqdm
18
+ Requires-Dist: scipy
19
+ Provides-Extra: io-h5netcdf
20
+ Requires-Dist: h5netcdf; extra == "io-h5netcdf"
21
+ Requires-Dist: h5py; extra == "io-h5netcdf"
22
+ Provides-Extra: excel
23
+ Requires-Dist: openpyxl; extra == "excel"
24
+ Dynamic: license-file
File without changes
@@ -0,0 +1,37 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "kestrel_transects"
7
+ version = "1.0.0"
8
+ authors = [
9
+ { name="Mark J. Woodhouse", email="mark.woodhouse@bristol.ac.uk" },
10
+ ]
11
+ description = "Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects"
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+ dependencies = [
15
+ "click",
16
+ "geopandas",
17
+ "numpy",
18
+ "pandas",
19
+ "netCDF4",
20
+ "xarray",
21
+ "tqdm",
22
+ "scipy",
23
+ ]
24
+ classifiers = [
25
+ "Programming Language :: Python :: 3",
26
+ "Operating System :: OS Independent",
27
+ ]
28
+
29
+ [project.scripts]
30
+ kestrel-transects-fluxes = "kestrel_transects.kestrel_transects:compute_fluxes"
31
+ kestrel-transects-profiles = "kestrel_transects.kestrel_transects:compute_profiles"
32
+
33
+ [project.optional-dependencies]
34
+ # Optional IO backends for xarray — install one of these if you need additional netCDF/HDF5 I/O support
35
+ io-h5netcdf = ["h5netcdf", "h5py"]
36
+ # Optional library for writing Excel files from pandas/geopandas
37
+ excel = ["openpyxl"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,2 @@
1
+ from .kestrel_transects import get_transect_fluxes, get_transect_profile
2
+ from .plot import plot_flux
@@ -0,0 +1,549 @@
1
+ import glob
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+
6
+ import click
7
+ import geopandas as gpd
8
+ import numpy as np
9
+ import pandas as pd
10
+ import xarray as xr
11
+ from numpy.typing import NDArray
12
+ from tqdm.autonotebook import tqdm
13
+
14
+ from shapely.geometry import LineString
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ import geopandas as gpd
19
+ from shapely.geometry import LineString, Point
20
+ import rioxarray as rxr
21
+
22
+
23
+ def thalweg_to_transect(
24
+ thalweg_file,
25
+ thalweg_dist=None,
26
+ max_transect_length=100,
27
+ DEM_file=None,
28
+ bank_height=10,
29
+ ):
30
+ # ------------------------------------------------------------
31
+ # 1. Load thalweg and build coordinate array
32
+ # ------------------------------------------------------------
33
+ thalweg = gpd.read_file(thalweg_file)
34
+ geom = thalweg.iloc[0].geometry
35
+
36
+ if thalweg_dist is not None:
37
+ d = np.arange(0, geom.length + thalweg_dist, thalweg_dist)
38
+ coords = np.array([(p.x, p.y) for p in geom.interpolate(d)])
39
+ else:
40
+ coords = np.asarray(geom.coords)
41
+
42
+ # ------------------------------------------------------------
43
+ # 2. Midpoints + normals (vectorized)
44
+ # ------------------------------------------------------------
45
+ dx = coords[1:, 0] - coords[:-1, 0]
46
+ dy = coords[1:, 1] - coords[:-1, 1]
47
+
48
+ pts = 0.5 * (coords[1:] + coords[:-1]) # midpoint of each segment
49
+
50
+ seg_len = np.sqrt(dx**2 + dy**2)
51
+ nx = dy / seg_len
52
+ ny = -dx / seg_len
53
+
54
+ # ------------------------------------------------------------
55
+ # 3. Compute transect endpoints (p1 → p2)
56
+ # ------------------------------------------------------------
57
+ ds = max_transect_length / 2
58
+ p1 = pts - ds * np.column_stack([nx, ny])
59
+ p2 = pts + ds * np.column_stack([nx, ny])
60
+
61
+ n_transects = len(p1)
62
+
63
+ if DEM_file is None:
64
+ # simple case: return transects as drawn
65
+ geoms = [LineString([tuple(a), tuple(b)]) for a, b in zip(p1, p2)]
66
+ return gpd.GeoDataFrame({"id": np.arange(n_transects)}, geometry=geoms, crs=thalweg.crs)
67
+
68
+ # ------------------------------------------------------------
69
+ # 4. Sample points along each transect (vectorized)
70
+ # shape → (n_transects, n_samples, 2)
71
+ # ------------------------------------------------------------
72
+ DEM = rxr.open_rasterio(DEM_file)
73
+ resolution = np.max(np.abs(DEM.rio.resolution()))
74
+
75
+ distances = np.arange(0, max_transect_length + resolution, resolution)
76
+ n_samples = len(distances)
77
+
78
+ # parameter t in [0,1]
79
+ t = (distances / max_transect_length).reshape(1, -1, 1) # shape (1,n,1)
80
+
81
+ # Broadcast to compute all sample points
82
+ P1 = p1[:, np.newaxis, :] # (T,1,2)
83
+ P2 = p2[:, np.newaxis, :] # (T,1,2)
84
+
85
+ sample_pts = P1 + (P2 - P1) * t # (T, S, 2)
86
+ xs = sample_pts[:, :, 0].ravel()
87
+ ys = sample_pts[:, :, 1].ravel()
88
+
89
+ # ------------------------------------------------------------
90
+ # 5. DEM interpolation (single batched call)
91
+ # ------------------------------------------------------------
92
+ elev_flat = DEM.interp(x=("z", xs), y=("z", ys)).values[0]
93
+ elev = elev_flat.reshape(n_transects, n_samples)
94
+
95
+ # ------------------------------------------------------------
96
+ # 6. Vectorized bank detection
97
+ # mid index = thalweg index
98
+ # ------------------------------------------------------------
99
+ mid = n_samples // 2
100
+ thr = elev[:, mid:mid + 1] + bank_height # (T,1)
101
+
102
+ # left mask
103
+ left_mask = elev[:, :mid] >= thr # (T, mid)
104
+ right_mask = elev[:, mid+1:] >= thr # (T, n_samples-mid-1)
105
+
106
+ # left index: furthest True from right side
107
+ left_idx = np.where(
108
+ left_mask.any(axis=1),
109
+ left_mask[:, ::-1].argmax(axis=1) * -1 + (mid - 1),
110
+ 0,
111
+ )
112
+
113
+ # right index: first True
114
+ right_idx = np.where(
115
+ right_mask.any(axis=1),
116
+ (mid + 1) + right_mask.argmax(axis=1),
117
+ n_samples - 1,
118
+ )
119
+
120
+ # distances of bank points along transect
121
+ left_ds = distances[left_idx]
122
+ right_ds = distances[right_idx]
123
+
124
+ # ------------------------------------------------------------
125
+ # 7. Vectorized reconstruction of bank endpoints
126
+ # ------------------------------------------------------------
127
+ tauL = (left_ds / max_transect_length).reshape(-1, 1)
128
+ tauR = (right_ds / max_transect_length).reshape(-1, 1)
129
+
130
+ bank_L = p1 + (p2 - p1) * tauL
131
+ bank_R = p1 + (p2 - p1) * tauR
132
+
133
+ # ------------------------------------------------------------
134
+ # 8. Build output LineStrings
135
+ # ------------------------------------------------------------
136
+ geoms = [LineString([tuple(bank_L[i]), tuple(bank_R[i])]) for i in range(n_transects)]
137
+
138
+ return gpd.GeoDataFrame({"id": np.arange(n_transects)}, geometry=geoms, crs=thalweg.crs)
139
+
140
+
141
+
142
+ # def make_transect(data, pt0, pt1, ds=None, num_pts=100):
143
+
144
+ # t_vec = np.array([[pt1[0]-pt0[0]],[pt1[1]-pt0[1]]]) # vector along transect
145
+
146
+ # dist = np.linalg.norm(t_vec) # length of transect
147
+ # t_vec = t_vec / dist # normalize
148
+
149
+ # n_vec = np.array([[-t_vec[1]], [t_vec[0]]]) # normal vector to transect
150
+
151
+ # # Make distances along transect
152
+ # if ds is not None:
153
+ # s = np.arange(0, dist, ds)
154
+ # else:
155
+ # s = np.linspace(0, dist, num_pts, endpoint=True)
156
+
157
+ # # Get points along transect
158
+ # t = np.array([[pt0[0]],[pt0[1]]]) + t_vec*s
159
+
160
+ # xx = xr.DataArray(t[0,:], dims='s', coords={'s':s})
161
+ # yy = xr.DataArray(t[1,:], dims='s', coords={'s':s})
162
+
163
+ # # Get values along transect
164
+ # transect_values = data.interp(x=xx, y=yy, method="linear")
165
+
166
+ # # transect_values['s'] = s
167
+ # transect_values.attrs['normal_vector'] = n_vec
168
+ # transect_values.attrs['transect_vector'] = t_vec
169
+ # transect_values.attrs['transect_resolution'] = s[1]-s[0]
170
+
171
+ # return transect_values
172
+
173
+ def make_transect(data: xr.Dataset, pt0, pt1, ds: float | None = None, num_pts: int = 100) -> xr.Dataset:
174
+ # vector along transect
175
+ t_vec = np.array([[pt1[0] - pt0[0]], [pt1[1] - pt0[1]]], dtype=float)
176
+ dist = float(np.linalg.norm(t_vec))
177
+ if dist == 0:
178
+ raise ValueError("Transect endpoints are identical; zero-length transect.")
179
+ t_vec /= dist
180
+ n_vec = np.array([[-t_vec[1, 0]], [t_vec[0, 0]]])
181
+
182
+ # distances along transect
183
+ s = np.arange(0.0, dist, ds, dtype=float) if ds is not None else np.linspace(0.0, dist, num_pts, endpoint=True)
184
+ base = np.array([[pt0[0]], [pt0[1]]], dtype=float)
185
+ t = base + t_vec * s # shape (2, S)
186
+
187
+ xx = xr.DataArray(t[0, :], dims='s', coords={'s': s})
188
+ yy = xr.DataArray(t[1, :], dims='s', coords={'s': s})
189
+
190
+ # interpolate all requested variables
191
+ transect_values = data.interp(x=xx, y=yy, method="linear")
192
+ transect_values = transect_values.assign_coords(s=xx) # ensure s is a proper coord
193
+
194
+ transect_values.attrs['normal_vector'] = n_vec
195
+ transect_values.attrs['transect_vector'] = t_vec
196
+ transect_values.attrs['transect_resolution'] = float(s[1] - s[0]) if len(s) > 1 else np.nan
197
+ return transect_values
198
+
199
+
200
+ def compute_transect_flux(transect_values):
201
+
202
+ # Get velocity components
203
+ U = transect_values.x_velocity.values
204
+ V = transect_values.y_velocity.values
205
+
206
+ # Get flow depth
207
+ H = transect_values.flow_depth.values
208
+
209
+ # Get bulk density
210
+ rho = transect_values.density.values
211
+
212
+ # Get solids fraction
213
+ phi = transect_values.solids_fraction.values
214
+
215
+ # Get solid density
216
+ rho_s = transect_values.attrs['solids density']
217
+
218
+ # Get normal vector
219
+ nx = transect_values.attrs['normal_vector'][0]
220
+ ny = transect_values.attrs['normal_vector'][1]
221
+
222
+ # Get resolution
223
+ ds = transect_values.attrs['transect_resolution']
224
+
225
+ VolFlux = np.nansum(H*np.abs(U*nx + V*ny))*ds
226
+ MassFlux = np.nansum(rho*H*np.abs(U*nx + V*ny))*ds
227
+ SolidsVolFlux = np.nansum(phi*H*np.abs(U*nx + V*ny))*ds
228
+ SolidsMassFlux = rho_s*SolidsVolFlux
229
+
230
+ return {'VolumeFlux':VolFlux, 'MassFlux':MassFlux, 'SolidsVolumeFlux':SolidsVolFlux, 'SolidsMassFlux':SolidsMassFlux}
231
+
232
+
233
+ def get_transect_profile(kestrel_file, transect_file, variable: str | list[str], resolution=1.0):
234
+ data = xr.load_dataset(kestrel_file)
235
+ epsg = data.attrs.get('crs_epsg', None)
236
+ if epsg is None:
237
+ raise ValueError("Dataset missing 'crs_epsg' attribute.")
238
+
239
+ transects = gpd.read_file(transect_file).to_crs(epsg=epsg)
240
+
241
+ # normalize variable list
242
+ variables = [variable] if isinstance(variable, str) else list(variable)
243
+ for v in variables:
244
+ if v not in data.variables:
245
+ raise ValueError(f"Variable {v!r} is not in dataset.")
246
+ data = data[variables]
247
+
248
+ frames = []
249
+ for k, g in transects.iterrows():
250
+ p = list(g.geometry.coords)
251
+ vals = make_transect(data, p[0], p[1], ds=resolution)
252
+
253
+ df = vals.to_dataframe().reset_index() # has 's' and variables
254
+ df['transect_index'] = k
255
+ df['Time'] = data.attrs.get('time', None)
256
+ frames.append(gpd.GeoDataFrame(df, geometry=g.geometry, crs=transects.crs))
257
+
258
+ long_df = pd.concat(frames, ignore_index=True)
259
+ return gpd.GeoDataFrame(long_df, geometry='geometry', crs=transects.crs)
260
+
261
+ # def get_transect_fluxes(kestrel_file, transect_shpfile):
262
+
263
+ # data = xr.load_dataset(kestrel_file)
264
+
265
+ # transects = gpd.read_file(transect_shpfile)
266
+ # transects = transects.to_crs(data.attrs['crs_epsg'])
267
+
268
+ # transects['Volume flux'] = np.nan
269
+ # transects['Mass flux'] = np.nan
270
+ # transects['Solids Volume flux'] = np.nan
271
+ # transects['Solids Mass flux'] = np.nan
272
+ # transects['Time'] = data.attrs['time']
273
+
274
+ # for k, g in transects.iterrows():
275
+ # p = g.geometry.coords
276
+ # values = make_transect(data, p[0], p[1], num_pts=100)
277
+ # fluxes = compute_transect_flux(values)
278
+ # transects.loc[k,'Volume flux'] = fluxes['VolumeFlux']
279
+ # transects.loc[k,'Mass flux'] = fluxes['MassFlux']
280
+ # transects.loc[k,'Solids Volume flux'] = fluxes['SolidsVolumeFlux']
281
+ # transects.loc[k,'Solids Mass flux'] = fluxes['SolidsMassFlux']
282
+
283
+ # return transects
284
+
285
+ def get_transect_fluxes(kestrel_file, transect_shpfile):
286
+ data = xr.load_dataset(kestrel_file)
287
+ epsg = data.attrs.get('crs_epsg', None)
288
+ if epsg is None:
289
+ raise ValueError("Dataset missing 'crs_epsg' attribute.")
290
+
291
+ transects = gpd.read_file(transect_shpfile).to_crs(epsg)
292
+
293
+ transects['Volume flux'] = np.nan
294
+ transects['Mass flux'] = np.nan
295
+ transects['Solids Volume flux'] = np.nan
296
+ transects['Solids Mass flux'] = np.nan
297
+ transects['Time'] = data.attrs.get('time', None)
298
+
299
+ for k, g in transects.iterrows():
300
+ p = list(g.geometry.coords)
301
+ values = make_transect(data, p[0], p[1], num_pts=100)
302
+ fluxes = compute_transect_flux(values)
303
+ transects.loc[k, 'Volume flux'] = fluxes['VolumeFlux']
304
+ transects.loc[k, 'Mass flux'] = fluxes['MassFlux']
305
+ transects.loc[k, 'Solids Volume flux'] = fluxes['SolidsVolumeFlux']
306
+ transects.loc[k, 'Solids Mass flux'] = fluxes['SolidsMassFlux']
307
+ return transects
308
+
309
+
310
+ def get_transect_profile(kestrel_file, transect_file, variable: str | list[str], resolution=1):
311
+
312
+ data = xr.load_dataset(kestrel_file)
313
+
314
+ transects = gpd.read_file(transect_file)
315
+ transects = transects.to_crs(data.attrs['crs_epsg'])
316
+
317
+ if isinstance(variable, str):
318
+ if variable not in data.variables:
319
+ raise ValueError(f'Variable {variable} is not in dataset')
320
+ variable = [variable]
321
+ else:
322
+ for v in variable:
323
+ if v not in data.variables:
324
+ raise ValueError(f'Variable {v} is not in dataset')
325
+
326
+ data = data[variable]
327
+
328
+ long_rows = []
329
+ for k, g in transects.iterrows():
330
+ p = g.geometry.coords
331
+ profile_values = make_transect(data, p[0], p[1], ds=resolution)
332
+
333
+ s_vals = profile_values['s'].values
334
+ row_geom = g.geometry
335
+ time_val = data.attrs['time']
336
+ for i, s in enumerate(s_vals):
337
+ row = {
338
+ 'geometry': row_geom,
339
+ 'transect_index': k,
340
+ 's': float(s),
341
+ 'Time': time_val
342
+ }
343
+ for v in variable:
344
+ row[v] = float(profile_values[v].values[i])
345
+ long_rows.append(row)
346
+ long_df = gpd.GeoDataFrame(long_rows, geometry='geometry', crs=transects.crs)
347
+ long_df = long_df.to_crs(data.attrs['crs_epsg'])
348
+ return long_df
349
+
350
+
351
+
352
+ def arclength(x: NDArray, y: NDArray, a: float, b: float):
353
+ bounds = (x>=a) & (x<=b)
354
+
355
+ dydx = np.gradient(y[bounds], x[bounds])
356
+
357
+ integrand = np.sqrt(1 + dydx**2)
358
+
359
+ return np.trapezoid(integrand, x[bounds])
360
+
361
+ @click.command()
362
+ @click.option("-o", "--out_file", type=str, default="transect_data", help="name of output GeoPackage file (without extension)")
363
+ @click.option("--excel", default=None, help="name of optional output Excel file (without extension)")
364
+ @click.option("-q", "--quiet", default=False, is_flag=True, help="surpress output messages")
365
+ @click.argument("input_dir", type=click.Path(exists=True))
366
+ @click.argument("transects", type=str)
367
+ def compute_fluxes(out_file, excel, quiet, input_dir, transects):
368
+ """Compute fluxes using kestrel netCDF results in dir across transects in transects.
369
+
370
+ transects should be a georeferenced vector file readable by geopandas (e.g. a shapefile or geopackage).
371
+ """
372
+
373
+ if not quiet:
374
+ print(f"Processing {input_dir} to extract data on transects in file {transects}.")
375
+ print(f"Data will be stored in as a GeoPackage in {out_file}.gpkg")
376
+ if excel is not None:
377
+ print(f"and as an Excel spreadsheet in file {excel}.xlsx")
378
+
379
+
380
+ file_pattern = re.compile(r'.*?(\d+).nc')
381
+ def get_order(file):
382
+ match = file_pattern.match(Path(file).name)
383
+ if not match:
384
+ return np.infty
385
+ return int(match.groups()[0])
386
+ snapshot_files = sorted(glob.glob(input_dir+'/*[0-9].nc'), key=get_order)
387
+
388
+ transect_data = []
389
+ if not quiet:
390
+ it = enumerate(pbar := tqdm(snapshot_files))
391
+ else:
392
+ it = enumerate(snapshot_files)
393
+ for j, f in it:
394
+ if not quiet:
395
+ pbar.set_description(f"Processing file: {f}")
396
+ this_transect = get_transect_fluxes(f, transects)
397
+ transect_data.append(this_transect)
398
+
399
+
400
+ fluxes_on_transects = pd.concat(transect_data)
401
+
402
+ fluxes_on_transects.to_file(f"{out_file}.gpkg", driver="GPKG")
403
+
404
+ if excel is not None:
405
+ fluxes_on_transects.to_excel(f"{excel}.xlsx")
406
+
407
+
408
+ @click.command()
409
+ @click.option("-r", "--resolution", type=float, default=1.0, show_default=True, help="Transect sampling resolution")
410
+ @click.option("-o", "--out_file", type=str, default="transect_data", help="name of output GeoPackage file (without extension)")
411
+ @click.option("--excel", default=None, help="name of optional output Excel file (without extension)")
412
+ @click.option("-q", "--quiet", default=False, is_flag=True, help="surpress output messages")
413
+ @click.option("--extrema", type=click.Choice(["on", "off", "only"], case_sensitive=False), default="off", show_default=True, help="Include min/max across s: 'on' adds _min/_max columns; 'only' saves only extrema per transect/time; 'off' disables this")
414
+ @click.argument("input_dir", type=click.Path(exists=True))
415
+ @click.argument("transects", type=str)
416
+ @click.argument("var", nargs=-1, type=str)
417
+ def compute_profiles(resolution, out_file, excel, quiet, extrema, input_dir, transects, var):
418
+ """Compute profiles using kestrel netCDF results in `input_dir` across `transects`.
419
+
420
+ The command reads kestrel NetCDF snapshots from `input_dir`, extracts transect profiles
421
+ across the provided transect vector (e.g. a shapefile or GeoPackage), and writes
422
+ the concatenated results to a GeoPackage and optionally an Excel workbook.
423
+
424
+ Parameters
425
+ ----------
426
+ resolution : float
427
+ Sampling resolution along each transect (units same as dataset). Default 1.0.
428
+ out_file : str
429
+ Base output filename (GeoPackage and Excel will use this prefix).
430
+ excel : str | None
431
+ Optional Excel filename (without extension) to also write results to.
432
+ quiet : bool
433
+ Suppress progress output when True.
434
+ extrema : {'on', 'off', 'only'}
435
+ Controls computation and output of per-transect-per-snapshot extrema (min/max):
436
+ - 'off' : do not compute extrema (default)
437
+ - 'on' : compute extrema and add <var>_min / <var>_max columns to the long-form table
438
+ - 'only' : save only a compact per-transect-per-time table that contains geometry,
439
+ Time and only the <var>_min / <var>_max columns
440
+ input_dir : str
441
+ Directory containing kestrel NetCDF snapshots.
442
+ transects : str
443
+ Path to a georeferenced vector file containing transect geometries.
444
+ var : tuple[str]
445
+ One or more variable names to extract.
446
+
447
+ Notes
448
+ -----
449
+ - The in-memory output is a long-form GeoDataFrame: each row corresponds to a single
450
+ sample point along a transect and includes columns: geometry, transect_index, s, Time
451
+ and one column per requested variable.
452
+ - Many GIS formats do not preserve array/list types. The long-form table stores scalars
453
+ and exports cleanly. For array-preservation, prefer formats like Parquet or storing
454
+ arrays separately (e.g., .npy).
455
+
456
+ Examples
457
+ --------
458
+ CLI (single variable):
459
+ python -m kestrel_transects.kestrel_transects compute_profiles /path/to/snapshots transects.gpkg flow_depth
460
+
461
+ CLI (multiple variables + extrema):
462
+ python -m kestrel_transects.kestrel_transects \
463
+ compute_profiles /path/to/snapshots transects.gpkg flow_depth velocity --resolution 0.5 --extrema on
464
+
465
+ CLI (save only extrema per transect/time):
466
+ python -m kestrel_transects.kestrel_transects \
467
+ compute_profiles /path/to/snapshots transects.gpkg flow_depth velocity --extrema only
468
+
469
+ In-Python (formatting the long-form output into a pivot table):
470
+ # get_transect_profile returns the same long-format rows (useful for single snapshot)
471
+ profiles_long = get_transect_profile(kestrel_file, transect_file, ['flow_depth','velocity'], resolution=1.0)
472
+
473
+ # Pivot a single variable into a table with rows=transect_index and columns=s
474
+ flow_table = profiles_long.pivot(index='transect_index', columns='s', values='flow_depth')
475
+
476
+ # For multiple variables: melt then pivot, producing a MultiIndex column with (variable, s)
477
+ melted = profiles_long.melt(
478
+ id_vars=['transect_index', 's', 'Time', 'geometry'],
479
+ value_vars=['flow_depth', 'velocity'],
480
+ var_name='variable', value_name='value'
481
+ )
482
+ pivoted = melted.pivot_table(index='transect_index', columns=['variable', 's'], values='value')
483
+ """
484
+
485
+ if not quiet:
486
+ print(f"Processing {input_dir} to extract data on transects in file {transects}.")
487
+ print(f"Data will be stored in as a GeoPackage in {out_file}.gpkg")
488
+ if excel is not None:
489
+ print(f"and as an Excel spreadsheet in file {excel}.xlsx")
490
+
491
+ file_pattern = re.compile(r'.*?(\d+).nc')
492
+ def get_order(file):
493
+ match = file_pattern.match(Path(file).name)
494
+ if not match:
495
+ return np.infty
496
+ return int(match.groups()[0])
497
+ snapshot_files = sorted(glob.glob(input_dir+'/*[0-9].nc'), key=get_order)
498
+
499
+ transect_data = []
500
+ if not quiet:
501
+ it = enumerate(pbar := tqdm(snapshot_files))
502
+ else:
503
+ it = enumerate(snapshot_files)
504
+ for j, f in it:
505
+ if not quiet:
506
+ pbar.set_description(f"Processing file: {f}")
507
+ # var is now a tuple of variable names
508
+ variables = list(var)
509
+ this_transect = get_transect_profile(f, transects, variables, resolution=resolution)
510
+ transect_data.append(this_transect)
511
+
512
+ profiles_on_transects = pd.concat(transect_data)
513
+
514
+ if extrema.lower() in ("on", "only"):
515
+ # Compute per-transect-per-snapshot minima and maxima (group by transect_index and Time)
516
+ # compute minima and maxima; agg returns MultiIndex columns so flatten them
517
+ agg_df = profiles_on_transects.groupby(['transect_index', 'Time'])[list(var)].agg(['min', 'max'])
518
+ # flatten multiindex column tuples (var, stat) -> 'var_min' / 'var_max'
519
+ newcols = []
520
+ for c in agg_df.columns:
521
+ if isinstance(c, tuple):
522
+ newcols.append(f"{c[0]}_{c[1]}")
523
+ else:
524
+ newcols.append(c)
525
+ agg_df.columns = newcols
526
+ # move transect_index/Time back to columns
527
+ extrema_df = agg_df.reset_index()
528
+ # Add geometry (take the first geometry for each transect_index/Time pair)
529
+ meta = profiles_on_transects.groupby(['transect_index', 'Time']).agg({'geometry': 'first'}).reset_index()
530
+ extrema_df = pd.merge(extrema_df, meta, on=['transect_index', 'Time'])
531
+
532
+ if extrema.lower() == "on":
533
+ # Prepare extrema_df indexed by (transect_index, Time)
534
+ extrema_idx = extrema_df.set_index(['transect_index', 'Time'])[[c for v in var for c in (f"{v}_min", f"{v}_max")]]
535
+ # Join the per-transect-per-time extrema onto the long-form table
536
+ joined = profiles_on_transects.set_index(['transect_index', 'Time']).join(extrema_idx, how='left')
537
+ profiles_on_transects = joined.reset_index()
538
+ elif extrema.lower() == "only":
539
+ # Replace the output object with just the per-transect-per-time extrema (GeoDataFrame)
540
+ extrema_gdf = gpd.GeoDataFrame(extrema_df, geometry='geometry', crs=profiles_on_transects.crs)
541
+ profiles_on_transects = extrema_gdf
542
+
543
+ profiles_on_transects.to_file(f"{out_file}.gpkg", driver="GPKG")
544
+
545
+ if excel is not None:
546
+ profiles_on_transects.to_excel(f"{excel}.xlsx")
547
+
548
+ # if __name__=="__main__":
549
+ # compute_fluxes()
@@ -0,0 +1,3 @@
1
+ from .cm import (conc_colours, depo_colours, depth_colours, ero_colours,
2
+ speed_colours)
3
+ from .plot import plot_extrema, plot_flux
@@ -0,0 +1,66 @@
1
+ from matplotlib.colors import LinearSegmentedColormap
2
+
3
+ depth_colours = LinearSegmentedColormap.from_list('depth_cmap',
4
+ [
5
+ (209/255.0, 238/255.0 ,234/255.0, 0),
6
+ (209/255.0, 238/255.0 ,234/255.0, 1),
7
+ (209/255.0, 238/255.0 ,234/255.0, 1),
8
+ (168/255.0, 219/255.0 ,217/255.0, 1),
9
+ (133/255.0, 196/255.0 ,201/255.0, 1),
10
+ (104/255.0, 171/255.0 ,184/255.0, 1),
11
+ ( 79/255.0, 144/255.0 ,166/255.0, 1),
12
+ ( 59/255.0, 115/255.0 ,143/255.0, 1),
13
+ ( 42/255.0, 86/255.0 ,116/255.0, 1)
14
+ ],
15
+ N=100)
16
+
17
+ speed_colours = LinearSegmentedColormap.from_list('speed_cmap',
18
+ [
19
+ (252/255.0, 222/255.0, 156/255.0 , 1),
20
+ (250/255.0, 164/255.0, 118/255.0 , 1),
21
+ (240/255.0, 116/255.0, 110/255.0 , 1),
22
+ (227/255.0, 79/255.0, 111/255.0 , 1),
23
+ (220/255.0, 57/255.0, 119/255.0 , 1),
24
+ (185/255.0, 37/255.0, 122/255.0 , 1),
25
+ (124/255.0, 29/255.0, 111/255.0 , 1)
26
+ ],
27
+ N=100)
28
+
29
+ conc_colours = LinearSegmentedColormap.from_list('conc_cmap',
30
+ [
31
+ (127/255.0, 162/255.0, 163/255.0, 1),
32
+ ( 97/255.0, 132/255.0, 133/255.0, 1),
33
+ ( 80/255.0, 116/255.0, 117/255.0, 1),
34
+ (114/255.0, 134/255.0, 123/255.0, 1),
35
+ (155/255.0, 127/255.0, 104/255.0, 1),
36
+ (196/255.0, 119/255.0, 87/255.0, 1),
37
+ (147/255.0, 81/255.0, 22/255.0, 1)
38
+ ],
39
+ N=100)
40
+
41
+ ero_colours = LinearSegmentedColormap.from_list('ero_cmap',
42
+ [
43
+ ( 8/255.0, 48/255.0, 107/255.0, 1),
44
+ ( 8/255.0, 81/255.0, 156/255.0, 1),
45
+ ( 33/255.0, 113/255.0, 181/255.0, 1),
46
+ ( 66/255.0, 146/255.0, 198/255.0, 1),
47
+ (107/255.0, 174/255.0, 214/255.0, 1),
48
+ (158/255.0, 202/255.0, 225/255.0, 1),
49
+ (198/255.0, 219/255.0, 239/255.0, 1),
50
+ (222/255.0, 235/255.0, 247/255.0, 1),
51
+ (255/255.0, 255/255.0, 255/255.0, 0)
52
+ ],
53
+ N=100)
54
+
55
+ depo_colours = LinearSegmentedColormap.from_list('depo_cmap',
56
+ [
57
+ (255/255.0, 245/255.0, 240/255.0, 0),
58
+ (254/255.0, 224/255.0, 210/255.0, 1),
59
+ (252/255.0, 187/255.0, 161/255.0, 1),
60
+ (251/255.0, 106/255.0, 74/255.0, 1),
61
+ (239/255.0, 59/255.0, 44/255.0, 1),
62
+ (203/255.0, 24/255.0, 29/255.0, 1),
63
+ (165/255.0, 15/255.0, 21/255.0, 1),
64
+ (103/255.0, 0/255.0, 13/255.0, 1)
65
+ ],
66
+ N=100)
@@ -0,0 +1,162 @@
1
+ from pathlib import Path
2
+ from typing import Union
3
+
4
+ import geopandas as gpd
5
+ import matplotlib.pyplot as plt
6
+ import numpy as np
7
+ from matplotlib.axes import Axes
8
+ from matplotlib.colors import Colormap, LogNorm, Normalize, PowerNorm
9
+
10
+ from .cm import (conc_colours, depo_colours, depth_colours, ero_colours,
11
+ speed_colours)
12
+
13
+ flux_vars = {
14
+ 'Volume flux': {'label':'Volume flux', 'unit': r'm^{3}/s', 'cmap':plt.cm.plasma},
15
+ 'Mass flux': {'label':'Mass flux', 'unit': 'kg/s', 'cmap':plt.cm.viridis},
16
+ 'Solids Volume flux': {'label':'Solids Volume flux', 'unit': r'm^{3}/s', 'cmap':conc_colours},
17
+ 'Solids Mass flux': {'label':'Solids Mass flux', 'unit': 'kg/s', 'cmap':conc_colours},
18
+ }
19
+
20
+ extrema_vars = {
21
+ 'flow_depth_min': {'label':'Minimum flow depth', 'unit':'m', 'cmap': depth_colours},
22
+ 'flow_depth_max': {'label':'Maximum flow depth', 'unit':'m', 'cmap': depth_colours},
23
+ 'flow_speed_min': {'label':'Minimum flow speed', 'unit':'m/s', 'cmap': speed_colours},
24
+ 'flow_speed_max': {'label':'Maximum flow speed', 'unit':'m/s', 'cmap': speed_colours},
25
+ 'solids_fraction_min': {'label':'Minimum solids fraction', 'unit':'', 'cmap': conc_colours},
26
+ 'solids_fraction_max': {'label':'Maximum solids fraction', 'unit':'', 'cmap': conc_colours},
27
+ 'elevation_change_min': {'label':'Minimum elevation change', 'unit':'', 'cmap': ero_colours},
28
+ 'elevation_change_max': {'label':'Maximum elevation change', 'unit':'', 'cmap': depo_colours},
29
+ }
30
+
31
+ def plot_flux(fluxes_file: Path, transects_file: Path, *,
32
+ var: str = 'Mass flux',
33
+ ax: Axes | None = None,
34
+ cmap: Union[Colormap, str] | None = "viridis",
35
+ norm: object = Normalize(),
36
+ vmin: float = 0,
37
+ vmax: float | None = None) -> None:
38
+
39
+ """
40
+
41
+ var can be one of ['Volume flux', 'Mass flux', 'Solids Volume flux', 'Solids Mass flux']
42
+
43
+ Raises:
44
+ ValueError: _description_
45
+ """
46
+
47
+ fluxes = gpd.read_file(fluxes_file)
48
+
49
+ if var not in fluxes.columns:
50
+ raise ValueError(f"Variable var={var} not found in columns of {fluxes_file}. \n Recognized variables are {list(fluxes.columns)}")
51
+
52
+ transects = gpd.read_file(transects_file)
53
+ transect_centroid = transects.centroid
54
+ transect_distance = transect_centroid.distance(transect_centroid.loc[0]) # type: ignore
55
+
56
+ time = fluxes.Time.unique()
57
+
58
+ flux_data = np.nan*np.ones((transect_distance.size, time.size))
59
+
60
+ for it, t in enumerate(time):
61
+ for i in fluxes.id.unique():
62
+ try:
63
+ flux_data[i,it] = float(fluxes.loc[(fluxes.id==i) & (fluxes.Time==t), var].iloc[0]) # type: ignore
64
+ except:
65
+ pass
66
+
67
+ flux_data = np.ma.masked_less(flux_data, vmin)
68
+
69
+ if ax is None:
70
+ fig, ax = plt.subplots()
71
+
72
+ if cmap is None:
73
+ cmap = flux_vars[var]['cmap']
74
+
75
+ if isinstance(cmap, str):
76
+ cmap = plt.colormaps.get_cmap(cmap)
77
+
78
+ if vmax is None:
79
+ vmax = fluxes[var].max()
80
+
81
+ if isinstance(norm, LogNorm):
82
+ if np.isclose(vmin,0.0):
83
+ vmin = np.finfo(float).eps*10
84
+ norm.vmin=vmin
85
+ norm.vmax=vmax
86
+ else:
87
+ norm.vmin=vmin # type: ignore
88
+ norm.vmax=vmax # type: ignore
89
+
90
+ img = ax.imshow(flux_data,
91
+ extent=(time.min(), time.max(), transect_distance.max(), transect_distance.min()),
92
+ norm=norm, # type: ignore
93
+ cmap=cmap)
94
+
95
+ ax.set_xlabel('Time (s)')
96
+ ax.set_ylabel('Downstream distance (m)')
97
+
98
+ cbar = plt.colorbar(img)
99
+ cbar.set_label(f'{flux_vars[var]['label']} ({flux_vars[var]['unit']})')
100
+
101
+
102
+
103
+ def plot_extrema(extrema_file: Path, transects_file: Path, *,
104
+ var: str = 'flow depth max',
105
+ ax: Axes | None = None,
106
+ cmap: Union[Colormap, str] | None = None,
107
+ norm: object = Normalize(),
108
+ vmin: float = 0,
109
+ vmax: float | None = None) -> None:
110
+
111
+ extrema = gpd.read_file(extrema_file)
112
+
113
+ if var not in extrema.columns:
114
+ raise ValueError(f"Variable var={var} not found in columns of {extrema_file}. \n Recognized variables are {list(extrema.columns)}")
115
+
116
+ transects = gpd.read_file(transects_file)
117
+ transect_centroid = transects.centroid
118
+ transect_distance = transect_centroid.distance(transect_centroid.loc[0]) # type: ignore
119
+
120
+ time = extrema.Time.unique()
121
+
122
+ extrema_data = np.zeros((transect_distance.size, time.size))
123
+
124
+ for it, t in enumerate(time):
125
+ for i in extrema.transect_index.unique():
126
+ try:
127
+ extrema_data[i,it] = float(extrema.loc[(extrema.transect_index==i) & (extrema.Time==t), var].iloc[0]) # type: ignore
128
+ except:
129
+ pass
130
+
131
+ if ax is None:
132
+ fig, ax = plt.subplots()
133
+
134
+ if cmap is None:
135
+ cmap = extrema_vars[var]['cmap']
136
+
137
+ if isinstance(cmap, str):
138
+ cmap = plt.colormaps.get_cmap(cmap)
139
+
140
+ if vmax is None:
141
+ vmax = extrema[var].max()
142
+
143
+ if isinstance(norm, LogNorm):
144
+ if np.isclose(vmin,0.0):
145
+ vmin = np.finfo(float).eps*10
146
+ norm.vmin=vmin
147
+ norm.vmax=vmax
148
+ else:
149
+ norm.vmin=vmin # type: ignore
150
+ norm.vmax=vmax # type: ignore
151
+
152
+ ax.set_xlabel('Time (s)')
153
+ ax.set_ylabel('Downstream distance (m)')
154
+
155
+ img = ax.imshow(extrema_data,
156
+ extent=(time.min(), time.max(), transect_distance.max(), transect_distance.min()),
157
+ norm=norm, # type: ignore
158
+ cmap=cmap)
159
+
160
+ cbar = plt.colorbar(img)
161
+ cbar.set_label(f'{extrema_vars[var]['label']} ({extrema_vars[var]['unit']})')
162
+
@@ -0,0 +1,24 @@
1
+ Metadata-Version: 2.4
2
+ Name: kestrel_transects
3
+ Version: 1.0.0
4
+ Summary: Post-processing tool for University of Bristol's Kestrel morphodynamic surface flow modelling software to extract data on transects
5
+ Author-email: "Mark J. Woodhouse" <mark.woodhouse@bristol.ac.uk>
6
+ Classifier: Programming Language :: Python :: 3
7
+ Classifier: Operating System :: OS Independent
8
+ Requires-Python: >=3.10
9
+ Description-Content-Type: text/markdown
10
+ License-File: LICENSE
11
+ Requires-Dist: click
12
+ Requires-Dist: geopandas
13
+ Requires-Dist: numpy
14
+ Requires-Dist: pandas
15
+ Requires-Dist: netCDF4
16
+ Requires-Dist: xarray
17
+ Requires-Dist: tqdm
18
+ Requires-Dist: scipy
19
+ Provides-Extra: io-h5netcdf
20
+ Requires-Dist: h5netcdf; extra == "io-h5netcdf"
21
+ Requires-Dist: h5py; extra == "io-h5netcdf"
22
+ Provides-Extra: excel
23
+ Requires-Dist: openpyxl; extra == "excel"
24
+ Dynamic: license-file
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ src/kestrel_transects/__init__.py
5
+ src/kestrel_transects/kestrel_transects.py
6
+ src/kestrel_transects.egg-info/PKG-INFO
7
+ src/kestrel_transects.egg-info/SOURCES.txt
8
+ src/kestrel_transects.egg-info/dependency_links.txt
9
+ src/kestrel_transects.egg-info/entry_points.txt
10
+ src/kestrel_transects.egg-info/requires.txt
11
+ src/kestrel_transects.egg-info/top_level.txt
12
+ src/kestrel_transects/plot/__init__.py
13
+ src/kestrel_transects/plot/cm.py
14
+ src/kestrel_transects/plot/plot.py
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ kestrel-transects-fluxes = kestrel_transects.kestrel_transects:compute_fluxes
3
+ kestrel-transects-profiles = kestrel_transects.kestrel_transects:compute_profiles
@@ -0,0 +1,15 @@
1
+ click
2
+ geopandas
3
+ numpy
4
+ pandas
5
+ netCDF4
6
+ xarray
7
+ tqdm
8
+ scipy
9
+
10
+ [excel]
11
+ openpyxl
12
+
13
+ [io-h5netcdf]
14
+ h5netcdf
15
+ h5py
@@ -0,0 +1 @@
1
+ kestrel_transects