ras-commander 0.51.0__py3-none-any.whl → 0.52.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,309 +1,554 @@
1
- """
2
- Class: HdfFluvialPluvial
3
-
4
- All of the methods in this class are static and are designed to be used without instantiation.
5
-
6
- List of Functions in HdfFluvialPluvial:
7
- - calculate_fluvial_pluvial_boundary()
8
- - _process_cell_adjacencies()
9
- - _identify_boundary_edges()
10
-
11
- """
12
-
13
- from typing import Dict, List, Tuple
14
- import pandas as pd
15
- import geopandas as gpd
16
- from collections import defaultdict
17
- from shapely.geometry import LineString, MultiLineString # Added MultiLineString import
18
- from tqdm import tqdm
19
- from .HdfMesh import HdfMesh
20
- from .HdfUtils import HdfUtils
21
- from .Decorators import standardize_input
22
- from .HdfResultsMesh import HdfResultsMesh
23
- from .LoggingConfig import get_logger
24
- from pathlib import Path
25
-
26
- logger = get_logger(__name__)
27
-
28
- class HdfFluvialPluvial:
29
- """
30
- A class for analyzing and visualizing fluvial-pluvial boundaries in HEC-RAS 2D model results.
31
-
32
- This class provides methods to process and visualize HEC-RAS 2D model outputs,
33
- specifically focusing on the delineation of fluvial and pluvial flood areas.
34
- It includes functionality for calculating fluvial-pluvial boundaries based on
35
- the timing of maximum water surface elevations.
36
-
37
- Key Concepts:
38
- - Fluvial flooding: Flooding from rivers/streams
39
- - Pluvial flooding: Flooding from rainfall/surface water
40
- - Delta_t: Time threshold (in hours) used to distinguish between fluvial and pluvial cells.
41
- Cells with max WSE time differences greater than delta_t are considered boundaries.
42
-
43
- Data Requirements:
44
- - HEC-RAS plan HDF file containing:
45
- - 2D mesh cell geometry (accessed via HdfMesh)
46
- - Maximum water surface elevation times (accessed via HdfResultsMesh)
47
-
48
- Usage Example:
49
- >>> ras = init_ras_project(project_path, ras_version)
50
- >>> hdf_path = Path("path/to/plan.hdf")
51
- >>> boundary_gdf = HdfFluvialPluvial.calculate_fluvial_pluvial_boundary(
52
- ... hdf_path,
53
- ... delta_t=12
54
- ... )
55
- """
56
- def __init__(self):
57
- self.logger = get_logger(__name__) # Initialize logger with module name
58
-
59
- @staticmethod
60
- @standardize_input(file_type='plan_hdf')
61
- def calculate_fluvial_pluvial_boundary(hdf_path: Path, delta_t: float = 12) -> gpd.GeoDataFrame:
62
- """
63
- Calculate the fluvial-pluvial boundary based on cell polygons and maximum water surface elevation times.
64
-
65
- Args:
66
- hdf_path (Path): Path to the HEC-RAS plan HDF file
67
- delta_t (float): Threshold time difference in hours. Cells with time differences
68
- greater than this value are considered boundaries. Default is 12 hours.
69
-
70
- Returns:
71
- gpd.GeoDataFrame: GeoDataFrame containing the fluvial-pluvial boundaries with:
72
- - geometry: LineString features representing boundaries
73
- - CRS: Coordinate reference system matching the input HDF file
74
-
75
- Raises:
76
- ValueError: If no cell polygons or maximum water surface data found in HDF file
77
- Exception: If there are errors during boundary calculation
78
-
79
- Note:
80
- The returned boundaries represent locations where the timing of maximum water surface
81
- elevation changes significantly (> delta_t), indicating potential transitions between
82
- fluvial and pluvial flooding mechanisms.
83
- """
84
- try:
85
- # Get cell polygons from HdfMesh
86
- logger.info("Getting cell polygons from HDF file...")
87
- cell_polygons_gdf = HdfMesh.get_mesh_cell_polygons(hdf_path)
88
- if cell_polygons_gdf.empty:
89
- raise ValueError("No cell polygons found in HDF file")
90
-
91
- # Get max water surface data from HdfResultsMesh
92
- logger.info("Getting maximum water surface data from HDF file...")
93
- max_ws_df = HdfResultsMesh.get_mesh_max_ws(hdf_path)
94
- if max_ws_df.empty:
95
- raise ValueError("No maximum water surface data found in HDF file")
96
-
97
- # Convert timestamps using the renamed utility function
98
- logger.info("Converting maximum water surface timestamps...")
99
- if 'maximum_water_surface_time' in max_ws_df.columns:
100
- max_ws_df['maximum_water_surface_time'] = max_ws_df['maximum_water_surface_time'].apply(
101
- lambda x: HdfUtils.parse_ras_datetime(x) if isinstance(x, str) else x
102
- )
103
-
104
- # Process cell adjacencies
105
- logger.info("Processing cell adjacencies...")
106
- cell_adjacency, common_edges = HdfFluvialPluvial._process_cell_adjacencies(cell_polygons_gdf)
107
-
108
- # Get cell times from max_ws_df
109
- logger.info("Extracting cell times from maximum water surface data...")
110
- cell_times = max_ws_df.set_index('cell_id')['maximum_water_surface_time'].to_dict()
111
-
112
- # Identify boundary edges
113
- logger.info("Identifying boundary edges...")
114
- boundary_edges = HdfFluvialPluvial._identify_boundary_edges(
115
- cell_adjacency, common_edges, cell_times, delta_t
116
- )
117
-
118
- # Join adjacent LineStrings into simple LineStrings
119
- logger.info("Joining adjacent LineStrings into simple LineStrings...")
120
- joined_lines = []
121
-
122
- def get_coords(geom):
123
- """Helper function to get coordinates from either LineString or MultiLineString"""
124
- if isinstance(geom, LineString):
125
- return list(geom.coords)
126
- elif isinstance(geom, MultiLineString):
127
- return list(geom.geoms[0].coords)
128
- return None
129
-
130
- # Create a dictionary to store start and end points for each line
131
- line_endpoints = {}
132
- for i, edge in enumerate(boundary_edges):
133
- coords = get_coords(edge)
134
- if coords:
135
- line_endpoints[i] = (coords[0], coords[-1])
136
-
137
- # Process lines in order
138
- used_indices = set()
139
- while len(used_indices) < len(boundary_edges):
140
- current_line = []
141
- current_points = []
142
-
143
- # Find a new starting line if needed
144
- for i in range(len(boundary_edges)):
145
- if i not in used_indices:
146
- current_line.append(boundary_edges[i])
147
- coords = get_coords(boundary_edges[i])
148
- if coords:
149
- current_points.extend(coords)
150
- used_indices.add(i)
151
- break
152
-
153
- # Continue adding connected lines
154
- while True:
155
- found_next = False
156
- current_end = current_points[-1] if current_points else None
157
-
158
- # Look for the next connected line
159
- for i, (start, end) in line_endpoints.items():
160
- if i not in used_indices and current_end:
161
- if start == current_end:
162
- # Add line in forward direction
163
- coords = get_coords(boundary_edges[i])
164
- if coords:
165
- current_points.extend(coords[1:]) # Skip first point to avoid duplication
166
- current_line.append(boundary_edges[i])
167
- used_indices.add(i)
168
- found_next = True
169
- break
170
- elif end == current_end:
171
- # Add line in reverse direction
172
- coords = get_coords(boundary_edges[i])
173
- if coords:
174
- current_points.extend(reversed(coords[:-1])) # Skip last point to avoid duplication
175
- current_line.append(boundary_edges[i])
176
- used_indices.add(i)
177
- found_next = True
178
- break
179
-
180
- if not found_next:
181
- break
182
-
183
- # Create a single LineString from the collected points
184
- if current_points:
185
- joined_lines.append(LineString(current_points))
186
-
187
- # Create final GeoDataFrame with CRS from cell_polygons_gdf
188
- logger.info("Creating final GeoDataFrame for boundaries...")
189
- boundary_gdf = gpd.GeoDataFrame(
190
- geometry=joined_lines,
191
- crs=cell_polygons_gdf.crs
192
- )
193
-
194
- # Clean up intermediate dataframes
195
- logger.info("Cleaning up intermediate dataframes...")
196
- del cell_polygons_gdf
197
- del max_ws_df
198
-
199
- logger.info("Fluvial-pluvial boundary calculation completed successfully.")
200
- return boundary_gdf
201
-
202
- except Exception as e:
203
- self.logger.error(f"Error calculating fluvial-pluvial boundary: {str(e)}")
204
- return None
205
-
206
-
207
- @staticmethod
208
- def _process_cell_adjacencies(cell_polygons_gdf: gpd.GeoDataFrame) -> Tuple[Dict[int, List[int]], Dict[int, Dict[int, LineString]]]:
209
- """
210
- Optimized method to process cell adjacencies by extracting shared edges directly.
211
-
212
- Args:
213
- cell_polygons_gdf (gpd.GeoDataFrame): GeoDataFrame containing 2D mesh cell polygons
214
- with 'cell_id' and 'geometry' columns.
215
-
216
- Returns:
217
- Tuple containing:
218
- - Dict[int, List[int]]: Dictionary mapping cell IDs to lists of adjacent cell IDs.
219
- - Dict[int, Dict[int, LineString]]: Nested dictionary storing common edges between cells,
220
- where common_edges[cell1][cell2] gives the shared boundary.
221
- """
222
- cell_adjacency = defaultdict(list)
223
- common_edges = defaultdict(dict)
224
-
225
- # Build an edge to cells mapping
226
- edge_to_cells = defaultdict(set)
227
-
228
- # Function to generate edge keys
229
- def edge_key(coords1, coords2, precision=8):
230
- # Round coordinates
231
- coords1 = tuple(round(coord, precision) for coord in coords1)
232
- coords2 = tuple(round(coord, precision) for coord in coords2)
233
- # Create sorted key to handle edge direction
234
- return tuple(sorted([coords1, coords2]))
235
-
236
- # For each polygon, extract edges
237
- for idx, row in cell_polygons_gdf.iterrows():
238
- cell_id = row['cell_id']
239
- geom = row['geometry']
240
- if geom.is_empty or not geom.is_valid:
241
- continue
242
- # Get exterior coordinates
243
- coords = list(geom.exterior.coords)
244
- num_coords = len(coords)
245
- for i in range(num_coords - 1):
246
- coord1 = coords[i]
247
- coord2 = coords[i + 1]
248
- key = edge_key(coord1, coord2)
249
- edge_to_cells[key].add(cell_id)
250
-
251
- # Now, process edge_to_cells to build adjacency
252
- for edge, cells in edge_to_cells.items():
253
- cells = list(cells)
254
- if len(cells) >= 2:
255
- # For all pairs of cells sharing this edge
256
- for i in range(len(cells)):
257
- for j in range(i + 1, len(cells)):
258
- cell1 = cells[i]
259
- cell2 = cells[j]
260
- # Update adjacency
261
- if cell2 not in cell_adjacency[cell1]:
262
- cell_adjacency[cell1].append(cell2)
263
- if cell1 not in cell_adjacency[cell2]:
264
- cell_adjacency[cell2].append(cell1)
265
- # Store common edge
266
- common_edge = LineString([edge[0], edge[1]])
267
- common_edges[cell1][cell2] = common_edge
268
- common_edges[cell2][cell1] = common_edge
269
-
270
- logger.info("Cell adjacencies processed successfully.")
271
- return cell_adjacency, common_edges
272
-
273
- @staticmethod
274
- def _identify_boundary_edges(cell_adjacency: Dict[int, List[int]],
275
- common_edges: Dict[int, Dict[int, LineString]],
276
- cell_times: Dict[int, pd.Timestamp],
277
- delta_t: float) -> List[LineString]:
278
- """
279
- Identify boundary edges between cells with significant time differences.
280
-
281
- Args:
282
- cell_adjacency (Dict[int, List[int]]): Dictionary of cell adjacencies
283
- common_edges (Dict[int, Dict[int, LineString]]): Dictionary of shared edges between cells
284
- cell_times (Dict[int, pd.Timestamp]): Dictionary mapping cell IDs to their max WSE times
285
- delta_t (float): Time threshold in hours
286
-
287
- Returns:
288
- List[LineString]: List of LineString geometries representing boundaries where
289
- adjacent cells have time differences greater than delta_t
290
-
291
- Note:
292
- Boundaries are identified where the absolute time difference between adjacent
293
- cells exceeds the specified delta_t threshold.
294
- """
295
- boundary_edges = []
296
- with tqdm(total=len(cell_adjacency), desc="Processing cell adjacencies") as pbar:
297
- for cell_id, neighbors in cell_adjacency.items():
298
- cell_time = cell_times[cell_id]
299
-
300
- for neighbor_id in neighbors:
301
- neighbor_time = cell_times[neighbor_id]
302
- time_diff = abs((cell_time - neighbor_time).total_seconds() / 3600)
303
-
304
- if time_diff >= delta_t:
305
- boundary_edges.append(common_edges[cell_id][neighbor_id])
306
-
307
- pbar.update(1)
308
-
309
- return boundary_edges
1
+ """
2
+ Class: HdfFluvialPluvial
3
+
4
+ All of the methods in this class are static and are designed to be used without instantiation.
5
+
6
+ List of Functions in HdfFluvialPluvial:
7
+ - calculate_fluvial_pluvial_boundary()
8
+ - _process_cell_adjacencies()
9
+ - _identify_boundary_edges()
10
+
11
+ """
12
+
13
+ from typing import Dict, List, Tuple
14
+ import pandas as pd
15
+ import geopandas as gpd
16
+ from collections import defaultdict
17
+ from shapely.geometry import LineString, MultiLineString # Added MultiLineString import
18
+ from tqdm import tqdm
19
+ from .HdfMesh import HdfMesh
20
+ from .HdfUtils import HdfUtils
21
+ from .Decorators import standardize_input
22
+ from .HdfResultsMesh import HdfResultsMesh
23
+ from .LoggingConfig import get_logger
24
+ from pathlib import Path
25
+
26
+ logger = get_logger(__name__)
27
+
28
+ class HdfFluvialPluvial:
29
+ """
30
+ A class for analyzing and visualizing fluvial-pluvial boundaries in HEC-RAS 2D model results.
31
+
32
+ This class provides methods to process and visualize HEC-RAS 2D model outputs,
33
+ specifically focusing on the delineation of fluvial and pluvial flood areas.
34
+ It includes functionality for calculating fluvial-pluvial boundaries based on
35
+ the timing of maximum water surface elevations.
36
+
37
+ Key Concepts:
38
+ - Fluvial flooding: Flooding from rivers/streams
39
+ - Pluvial flooding: Flooding from rainfall/surface water
40
+ - Delta_t: Time threshold (in hours) used to distinguish between fluvial and pluvial cells.
41
+ Cells with max WSE time differences greater than delta_t are considered boundaries.
42
+
43
+ Data Requirements:
44
+ - HEC-RAS plan HDF file containing:
45
+ - 2D mesh cell geometry (accessed via HdfMesh)
46
+ - Maximum water surface elevation times (accessed via HdfResultsMesh)
47
+
48
+ Usage Example:
49
+ >>> ras = init_ras_project(project_path, ras_version)
50
+ >>> hdf_path = Path("path/to/plan.hdf")
51
+ >>> boundary_gdf = HdfFluvialPluvial.calculate_fluvial_pluvial_boundary(
52
+ ... hdf_path,
53
+ ... delta_t=12
54
+ ... )
55
+ """
56
+ def __init__(self):
57
+ self.logger = get_logger(__name__) # Initialize logger with module name
58
+
59
+ @staticmethod
60
+ @standardize_input(file_type='plan_hdf')
61
+ def calculate_fluvial_pluvial_boundary(hdf_path: Path, delta_t: float = 12) -> gpd.GeoDataFrame:
62
+ """
63
+ Calculate the fluvial-pluvial boundary based on cell polygons and maximum water surface elevation times.
64
+
65
+ Args:
66
+ hdf_path (Path): Path to the HEC-RAS plan HDF file
67
+ delta_t (float): Threshold time difference in hours. Cells with time differences
68
+ greater than this value are considered boundaries. Default is 12 hours.
69
+
70
+ Returns:
71
+ gpd.GeoDataFrame: GeoDataFrame containing the fluvial-pluvial boundaries with:
72
+ - geometry: LineString features representing boundaries
73
+ - CRS: Coordinate reference system matching the input HDF file
74
+
75
+ Raises:
76
+ ValueError: If no cell polygons or maximum water surface data found in HDF file
77
+ Exception: If there are errors during boundary calculation
78
+
79
+ Note:
80
+ The returned boundaries represent locations where the timing of maximum water surface
81
+ elevation changes significantly (> delta_t), indicating potential transitions between
82
+ fluvial and pluvial flooding mechanisms.
83
+ """
84
+ try:
85
+ # Get cell polygons from HdfMesh
86
+ logger.info("Getting cell polygons from HDF file...")
87
+ cell_polygons_gdf = HdfMesh.get_mesh_cell_polygons(hdf_path)
88
+ if cell_polygons_gdf.empty:
89
+ raise ValueError("No cell polygons found in HDF file")
90
+
91
+ # Get max water surface data from HdfResultsMesh
92
+ logger.info("Getting maximum water surface data from HDF file...")
93
+ max_ws_df = HdfResultsMesh.get_mesh_max_ws(hdf_path)
94
+ if max_ws_df.empty:
95
+ raise ValueError("No maximum water surface data found in HDF file")
96
+
97
+ # Convert timestamps using the renamed utility function
98
+ logger.info("Converting maximum water surface timestamps...")
99
+ if 'maximum_water_surface_time' in max_ws_df.columns:
100
+ max_ws_df['maximum_water_surface_time'] = max_ws_df['maximum_water_surface_time'].apply(
101
+ lambda x: HdfUtils.parse_ras_datetime(x) if isinstance(x, str) else x
102
+ )
103
+
104
+ # Process cell adjacencies
105
+ logger.info("Processing cell adjacencies...")
106
+ cell_adjacency, common_edges = HdfFluvialPluvial._process_cell_adjacencies(cell_polygons_gdf)
107
+
108
+ # Get cell times from max_ws_df
109
+ logger.info("Extracting cell times from maximum water surface data...")
110
+ cell_times = max_ws_df.set_index('cell_id')['maximum_water_surface_time'].to_dict()
111
+
112
+ # Identify boundary edges
113
+ logger.info("Identifying boundary edges...")
114
+ boundary_edges = HdfFluvialPluvial._identify_boundary_edges(
115
+ cell_adjacency, common_edges, cell_times, delta_t
116
+ )
117
+
118
+ # FOCUS YOUR REVISIONS HERE:
119
+ # Join adjacent LineStrings into simple LineStrings by connecting them at shared endpoints
120
+ logger.info("Joining adjacent LineStrings into simple LineStrings...")
121
+
122
+ def get_coords(geom):
123
+ """Helper function to extract coordinates from geometry objects
124
+
125
+ Args:
126
+ geom: A Shapely LineString or MultiLineString geometry
127
+
128
+ Returns:
129
+ tuple: Tuple containing:
130
+ - list of original coordinates [(x1,y1), (x2,y2),...]
131
+ - list of rounded coordinates for comparison
132
+ - None if invalid geometry
133
+ """
134
+ if isinstance(geom, LineString):
135
+ orig_coords = list(geom.coords)
136
+ # Round coordinates to 0.01 for comparison
137
+ rounded_coords = [(round(x, 2), round(y, 2)) for x, y in orig_coords]
138
+ return orig_coords, rounded_coords
139
+ elif isinstance(geom, MultiLineString):
140
+ orig_coords = list(geom.geoms[0].coords)
141
+ rounded_coords = [(round(x, 2), round(y, 2)) for x, y in orig_coords]
142
+ return orig_coords, rounded_coords
143
+ return None, None
144
+
145
+ def find_connecting_line(current_end, unused_lines, endpoint_counts, rounded_endpoints):
146
+ """Find a line that connects to the current endpoint
147
+
148
+ Args:
149
+ current_end: Tuple of (x, y) coordinates
150
+ unused_lines: Set of unused line indices
151
+ endpoint_counts: Dict of endpoint occurrence counts
152
+ rounded_endpoints: Dict of rounded endpoint coordinates
153
+
154
+ Returns:
155
+ tuple: (line_index, should_reverse, found) or (None, None, False)
156
+ """
157
+ rounded_end = (round(current_end[0], 2), round(current_end[1], 2))
158
+
159
+ # Skip if current endpoint is connected to more than 2 lines
160
+ if endpoint_counts.get(rounded_end, 0) > 2:
161
+ return None, None, False
162
+
163
+ for i in unused_lines:
164
+ start, end = rounded_endpoints[i]
165
+ if start == rounded_end and endpoint_counts.get(start, 0) <= 2:
166
+ return i, False, True
167
+ elif end == rounded_end and endpoint_counts.get(end, 0) <= 2:
168
+ return i, True, True
169
+ return None, None, False
170
+
171
+ # Initialize data structures
172
+ joined_lines = []
173
+ unused_lines = set(range(len(boundary_edges)))
174
+
175
+ # Create endpoint lookup dictionaries
176
+ line_endpoints = {}
177
+ rounded_endpoints = {}
178
+ for i, edge in enumerate(boundary_edges):
179
+ coords_result = get_coords(edge)
180
+ if coords_result:
181
+ orig_coords, rounded_coords = coords_result
182
+ line_endpoints[i] = (orig_coords[0], orig_coords[-1])
183
+ rounded_endpoints[i] = (rounded_coords[0], rounded_coords[-1])
184
+
185
+ # Count endpoint occurrences
186
+ endpoint_counts = {}
187
+ for start, end in rounded_endpoints.values():
188
+ endpoint_counts[start] = endpoint_counts.get(start, 0) + 1
189
+ endpoint_counts[end] = endpoint_counts.get(end, 0) + 1
190
+
191
+ # Iteratively join lines
192
+ while unused_lines:
193
+ # Start a new line chain
194
+ current_points = []
195
+
196
+ # Find first unused line
197
+ start_idx = unused_lines.pop()
198
+ start_coords, _ = get_coords(boundary_edges[start_idx])
199
+ if start_coords:
200
+ current_points.extend(start_coords)
201
+
202
+ # Try to extend in both directions
203
+ continue_joining = True
204
+ while continue_joining:
205
+ continue_joining = False
206
+
207
+ # Try to extend forward
208
+ next_idx, should_reverse, found = find_connecting_line(
209
+ current_points[-1],
210
+ unused_lines,
211
+ endpoint_counts,
212
+ rounded_endpoints
213
+ )
214
+
215
+ if found:
216
+ unused_lines.remove(next_idx)
217
+ next_coords, _ = get_coords(boundary_edges[next_idx])
218
+ if next_coords:
219
+ if should_reverse:
220
+ current_points.extend(reversed(next_coords[:-1]))
221
+ else:
222
+ current_points.extend(next_coords[1:])
223
+ continue_joining = True
224
+ continue
225
+
226
+ # Try to extend backward
227
+ prev_idx, should_reverse, found = find_connecting_line(
228
+ current_points[0],
229
+ unused_lines,
230
+ endpoint_counts,
231
+ rounded_endpoints
232
+ )
233
+
234
+ if found:
235
+ unused_lines.remove(prev_idx)
236
+ prev_coords, _ = get_coords(boundary_edges[prev_idx])
237
+ if prev_coords:
238
+ if should_reverse:
239
+ current_points[0:0] = reversed(prev_coords[:-1])
240
+ else:
241
+ current_points[0:0] = prev_coords[:-1]
242
+ continue_joining = True
243
+
244
+ # Create final LineString from collected points
245
+ if current_points:
246
+ joined_lines.append(LineString(current_points))
247
+
248
+ # FILL GAPS BETWEEN JOINED LINES
249
+ logger.info(f"Starting gap analysis for {len(joined_lines)} line segments...")
250
+
251
+ def find_endpoints(lines):
252
+ """Get all endpoints of the lines with their indices"""
253
+ endpoints = []
254
+ for i, line in enumerate(lines):
255
+ coords = list(line.coords)
256
+ endpoints.append((coords[0], i, 'start'))
257
+ endpoints.append((coords[-1], i, 'end'))
258
+ return endpoints
259
+
260
+ def find_nearby_points(point1, point2, tolerance=0.01):
261
+ """Check if two points are within tolerance distance"""
262
+ return (abs(point1[0] - point2[0]) <= tolerance and
263
+ abs(point1[1] - point2[1]) <= tolerance)
264
+
265
+ def find_gaps(lines, tolerance=0.01):
266
+ """Find gaps between line endpoints"""
267
+ logger.info("Analyzing line endpoints to identify gaps...")
268
+ endpoints = []
269
+ for i, line in enumerate(lines):
270
+ coords = list(line.coords)
271
+ start = coords[0]
272
+ end = coords[-1]
273
+ endpoints.append({
274
+ 'point': start,
275
+ 'line_idx': i,
276
+ 'position': 'start',
277
+ 'coords': coords
278
+ })
279
+ endpoints.append({
280
+ 'point': end,
281
+ 'line_idx': i,
282
+ 'position': 'end',
283
+ 'coords': coords
284
+ })
285
+
286
+ logger.info(f"Found {len(endpoints)} endpoints to analyze")
287
+ gaps = []
288
+
289
+ # Compare each endpoint with all others
290
+ for i, ep1 in enumerate(endpoints):
291
+ for ep2 in endpoints[i+1:]:
292
+ # Skip if endpoints are from same line
293
+ if ep1['line_idx'] == ep2['line_idx']:
294
+ continue
295
+
296
+ point1 = ep1['point']
297
+ point2 = ep2['point']
298
+
299
+ # Skip if points are too close (already connected)
300
+ if find_nearby_points(point1, point2):
301
+ continue
302
+
303
+ # Check if this could be a gap
304
+ dist = LineString([point1, point2]).length
305
+ if dist < 10.0: # Maximum gap distance threshold
306
+ gaps.append({
307
+ 'start': ep1,
308
+ 'end': ep2,
309
+ 'distance': dist
310
+ })
311
+
312
+ logger.info(f"Identified {len(gaps)} potential gaps to fill")
313
+ return sorted(gaps, key=lambda x: x['distance'])
314
+
315
+ def join_lines_with_gap(line1_coords, line2_coords, gap_start_pos, gap_end_pos):
316
+ """Join two lines maintaining correct point order based on gap positions"""
317
+ if gap_start_pos == 'end' and gap_end_pos == 'start':
318
+ # line1 end connects to line2 start
319
+ return line1_coords + line2_coords
320
+ elif gap_start_pos == 'start' and gap_end_pos == 'end':
321
+ # line1 start connects to line2 end
322
+ return list(reversed(line2_coords)) + line1_coords
323
+ elif gap_start_pos == 'end' and gap_end_pos == 'end':
324
+ # line1 end connects to line2 end
325
+ return line1_coords + list(reversed(line2_coords))
326
+ else: # start to start
327
+ # line1 start connects to line2 start
328
+ return list(reversed(line1_coords)) + line2_coords
329
+
330
+ # Process gaps and join lines
331
+ processed_lines = joined_lines.copy()
332
+ line_groups = [[i] for i in range(len(processed_lines))]
333
+ gaps = find_gaps(processed_lines)
334
+
335
+ filled_gap_count = 0
336
+ for gap_idx, gap in enumerate(gaps, 1):
337
+ logger.info(f"Processing gap {gap_idx}/{len(gaps)} (distance: {gap['distance']:.3f})")
338
+
339
+ line1_idx = gap['start']['line_idx']
340
+ line2_idx = gap['end']['line_idx']
341
+
342
+ # Find the groups containing these lines
343
+ group1 = next(g for g in line_groups if line1_idx in g)
344
+ group2 = next(g for g in line_groups if line2_idx in g)
345
+
346
+ # Skip if lines are already in the same group
347
+ if group1 == group2:
348
+ continue
349
+
350
+ # Get the coordinates for both lines
351
+ line1_coords = gap['start']['coords']
352
+ line2_coords = gap['end']['coords']
353
+
354
+ # Join the lines in correct order
355
+ joined_coords = join_lines_with_gap(
356
+ line1_coords,
357
+ line2_coords,
358
+ gap['start']['position'],
359
+ gap['end']['position']
360
+ )
361
+
362
+ # Create new joined line
363
+ new_line = LineString(joined_coords)
364
+
365
+ # Update processed_lines and line_groups
366
+ new_idx = len(processed_lines)
367
+ processed_lines.append(new_line)
368
+
369
+ # Merge groups and remove old ones
370
+ new_group = group1 + group2
371
+ line_groups.remove(group1)
372
+ line_groups.remove(group2)
373
+ line_groups.append(new_group + [new_idx])
374
+
375
+ filled_gap_count += 1
376
+ logger.info(f"Successfully joined lines {line1_idx} and {line2_idx}")
377
+
378
+ logger.info(f"Gap filling complete. Filled {filled_gap_count} out of {len(gaps)} gaps")
379
+
380
+ # Get final lines (take the last line from each group)
381
+ final_lines = [processed_lines[group[-1]] for group in line_groups]
382
+
383
+ logger.info(f"Final cleanup complete. Resulting in {len(final_lines)} line segments")
384
+ joined_lines = final_lines
385
+
386
+ # Create final GeoDataFrame with CRS from cell_polygons_gdf
387
+ logger.info("Creating final GeoDataFrame for boundaries...")
388
+ boundary_gdf = gpd.GeoDataFrame(
389
+ geometry=joined_lines,
390
+ crs=cell_polygons_gdf.crs
391
+ )
392
+
393
+ # Clean up intermediate dataframes
394
+ logger.info("Cleaning up intermediate dataframes...")
395
+ del cell_polygons_gdf
396
+ del max_ws_df
397
+
398
+ logger.info("Fluvial-pluvial boundary calculation completed successfully.")
399
+ return boundary_gdf
400
+
401
+ except Exception as e:
402
+ self.logger.error(f"Error calculating fluvial-pluvial boundary: {str(e)}")
403
+ return None
404
+
405
+
406
+ @staticmethod
407
+ def _process_cell_adjacencies(cell_polygons_gdf: gpd.GeoDataFrame) -> Tuple[Dict[int, List[int]], Dict[int, Dict[int, LineString]]]:
408
+ """
409
+ Optimized method to process cell adjacencies by extracting shared edges directly.
410
+
411
+ Args:
412
+ cell_polygons_gdf (gpd.GeoDataFrame): GeoDataFrame containing 2D mesh cell polygons
413
+ with 'cell_id' and 'geometry' columns.
414
+
415
+ Returns:
416
+ Tuple containing:
417
+ - Dict[int, List[int]]: Dictionary mapping cell IDs to lists of adjacent cell IDs.
418
+ - Dict[int, Dict[int, LineString]]: Nested dictionary storing common edges between cells,
419
+ where common_edges[cell1][cell2] gives the shared boundary.
420
+ """
421
+ cell_adjacency = defaultdict(list)
422
+ common_edges = defaultdict(dict)
423
+
424
+ # Build an edge to cells mapping
425
+ edge_to_cells = defaultdict(set)
426
+
427
+ # Function to generate edge keys
428
+ def edge_key(coords1, coords2, precision=8):
429
+ # Round coordinates
430
+ coords1 = tuple(round(coord, precision) for coord in coords1)
431
+ coords2 = tuple(round(coord, precision) for coord in coords2)
432
+ # Create sorted key to handle edge direction
433
+ return tuple(sorted([coords1, coords2]))
434
+
435
+ # For each polygon, extract edges
436
+ for idx, row in cell_polygons_gdf.iterrows():
437
+ cell_id = row['cell_id']
438
+ geom = row['geometry']
439
+ if geom.is_empty or not geom.is_valid:
440
+ continue
441
+ # Get exterior coordinates
442
+ coords = list(geom.exterior.coords)
443
+ num_coords = len(coords)
444
+ for i in range(num_coords - 1):
445
+ coord1 = coords[i]
446
+ coord2 = coords[i + 1]
447
+ key = edge_key(coord1, coord2)
448
+ edge_to_cells[key].add(cell_id)
449
+
450
+ # Now, process edge_to_cells to build adjacency
451
+ for edge, cells in edge_to_cells.items():
452
+ cells = list(cells)
453
+ if len(cells) >= 2:
454
+ # For all pairs of cells sharing this edge
455
+ for i in range(len(cells)):
456
+ for j in range(i + 1, len(cells)):
457
+ cell1 = cells[i]
458
+ cell2 = cells[j]
459
+ # Update adjacency
460
+ if cell2 not in cell_adjacency[cell1]:
461
+ cell_adjacency[cell1].append(cell2)
462
+ if cell1 not in cell_adjacency[cell2]:
463
+ cell_adjacency[cell2].append(cell1)
464
+ # Store common edge
465
+ common_edge = LineString([edge[0], edge[1]])
466
+ common_edges[cell1][cell2] = common_edge
467
+ common_edges[cell2][cell1] = common_edge
468
+
469
+ logger.info("Cell adjacencies processed successfully.")
470
+ return cell_adjacency, common_edges
471
+
472
+ @staticmethod
473
+ def _identify_boundary_edges(cell_adjacency: Dict[int, List[int]],
474
+ common_edges: Dict[int, Dict[int, LineString]],
475
+ cell_times: Dict[int, pd.Timestamp],
476
+ delta_t: float) -> List[LineString]:
477
+ """
478
+ Identify boundary edges between cells with significant time differences.
479
+
480
+ Args:
481
+ cell_adjacency (Dict[int, List[int]]): Dictionary of cell adjacencies
482
+ common_edges (Dict[int, Dict[int, LineString]]): Dictionary of shared edges between cells
483
+ cell_times (Dict[int, pd.Timestamp]): Dictionary mapping cell IDs to their max WSE times
484
+ delta_t (float): Time threshold in hours
485
+
486
+ Returns:
487
+ List[LineString]: List of LineString geometries representing boundaries
488
+ """
489
+ # Validate cell_times data
490
+ valid_times = {k: v for k, v in cell_times.items() if pd.notna(v)}
491
+ if len(valid_times) < len(cell_times):
492
+ logger.warning(f"Found {len(cell_times) - len(valid_times)} cells with invalid timestamps")
493
+ cell_times = valid_times
494
+
495
+ # Use a set to store processed cell pairs and avoid duplicates
496
+ processed_pairs = set()
497
+ boundary_edges = []
498
+
499
+ # Track time differences for debugging
500
+ time_diffs = []
501
+
502
+ with tqdm(total=len(cell_adjacency), desc="Processing cell adjacencies") as pbar:
503
+ for cell_id, neighbors in cell_adjacency.items():
504
+ if cell_id not in cell_times:
505
+ logger.debug(f"Skipping cell {cell_id} - no timestamp data")
506
+ pbar.update(1)
507
+ continue
508
+
509
+ cell_time = cell_times[cell_id]
510
+
511
+ for neighbor_id in neighbors:
512
+ if neighbor_id not in cell_times:
513
+ logger.debug(f"Skipping neighbor {neighbor_id} of cell {cell_id} - no timestamp data")
514
+ continue
515
+
516
+ # Create a sorted tuple of the cell pair to ensure uniqueness
517
+ cell_pair = tuple(sorted([cell_id, neighbor_id]))
518
+
519
+ # Skip if we've already processed this pair
520
+ if cell_pair in processed_pairs:
521
+ continue
522
+
523
+ neighbor_time = cell_times[neighbor_id]
524
+
525
+ # Ensure both timestamps are valid
526
+ if pd.isna(cell_time) or pd.isna(neighbor_time):
527
+ continue
528
+
529
+ # Calculate time difference in hours
530
+ time_diff = abs((cell_time - neighbor_time).total_seconds() / 3600)
531
+ time_diffs.append(time_diff)
532
+
533
+ logger.debug(f"Time difference between cells {cell_id} and {neighbor_id}: {time_diff:.2f} hours")
534
+
535
+ if time_diff >= delta_t:
536
+ logger.debug(f"Found boundary edge between cells {cell_id} and {neighbor_id} "
537
+ f"(time diff: {time_diff:.2f} hours)")
538
+ boundary_edges.append(common_edges[cell_id][neighbor_id])
539
+
540
+ # Mark this pair as processed
541
+ processed_pairs.add(cell_pair)
542
+
543
+ pbar.update(1)
544
+
545
+ # Log summary statistics
546
+ if time_diffs:
547
+ logger.info(f"Time difference statistics:")
548
+ logger.info(f" Min: {min(time_diffs):.2f} hours")
549
+ logger.info(f" Max: {max(time_diffs):.2f} hours")
550
+ logger.info(f" Mean: {sum(time_diffs)/len(time_diffs):.2f} hours")
551
+ logger.info(f" Number of boundaries found: {len(boundary_edges)}")
552
+ logger.info(f" Delta-t threshold: {delta_t} hours")
553
+
554
+ return boundary_edges