ras-commander 0.51.0__py3-none-any.whl → 0.53.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -115,75 +115,274 @@ class HdfFluvialPluvial:
115
115
  cell_adjacency, common_edges, cell_times, delta_t
116
116
  )
117
117
 
118
- # Join adjacent LineStrings into simple LineStrings
118
+ # FOCUS YOUR REVISIONS HERE:
119
+ # Join adjacent LineStrings into simple LineStrings by connecting them at shared endpoints
119
120
  logger.info("Joining adjacent LineStrings into simple LineStrings...")
120
- joined_lines = []
121
121
 
122
122
  def get_coords(geom):
123
- """Helper function to get coordinates from either LineString or MultiLineString"""
123
+ """Helper function to extract coordinates from geometry objects
124
+
125
+ Args:
126
+ geom: A Shapely LineString or MultiLineString geometry
127
+
128
+ Returns:
129
+ tuple: Tuple containing:
130
+ - list of original coordinates [(x1,y1), (x2,y2),...]
131
+ - list of rounded coordinates for comparison
132
+ - None if invalid geometry
133
+ """
124
134
  if isinstance(geom, LineString):
125
- return list(geom.coords)
135
+ orig_coords = list(geom.coords)
136
+ # Round coordinates to 0.01 for comparison
137
+ rounded_coords = [(round(x, 2), round(y, 2)) for x, y in orig_coords]
138
+ return orig_coords, rounded_coords
126
139
  elif isinstance(geom, MultiLineString):
127
- return list(geom.geoms[0].coords)
128
- return None
140
+ orig_coords = list(geom.geoms[0].coords)
141
+ rounded_coords = [(round(x, 2), round(y, 2)) for x, y in orig_coords]
142
+ return orig_coords, rounded_coords
143
+ return None, None
129
144
 
130
- # Create a dictionary to store start and end points for each line
145
+ def find_connecting_line(current_end, unused_lines, endpoint_counts, rounded_endpoints):
146
+ """Find a line that connects to the current endpoint
147
+
148
+ Args:
149
+ current_end: Tuple of (x, y) coordinates
150
+ unused_lines: Set of unused line indices
151
+ endpoint_counts: Dict of endpoint occurrence counts
152
+ rounded_endpoints: Dict of rounded endpoint coordinates
153
+
154
+ Returns:
155
+ tuple: (line_index, should_reverse, found) or (None, None, False)
156
+ """
157
+ rounded_end = (round(current_end[0], 2), round(current_end[1], 2))
158
+
159
+ # Skip if current endpoint is connected to more than 2 lines
160
+ if endpoint_counts.get(rounded_end, 0) > 2:
161
+ return None, None, False
162
+
163
+ for i in unused_lines:
164
+ start, end = rounded_endpoints[i]
165
+ if start == rounded_end and endpoint_counts.get(start, 0) <= 2:
166
+ return i, False, True
167
+ elif end == rounded_end and endpoint_counts.get(end, 0) <= 2:
168
+ return i, True, True
169
+ return None, None, False
170
+
171
+ # Initialize data structures
172
+ joined_lines = []
173
+ unused_lines = set(range(len(boundary_edges)))
174
+
175
+ # Create endpoint lookup dictionaries
131
176
  line_endpoints = {}
177
+ rounded_endpoints = {}
132
178
  for i, edge in enumerate(boundary_edges):
133
- coords = get_coords(edge)
134
- if coords:
135
- line_endpoints[i] = (coords[0], coords[-1])
136
-
137
- # Process lines in order
138
- used_indices = set()
139
- while len(used_indices) < len(boundary_edges):
140
- current_line = []
179
+ coords_result = get_coords(edge)
180
+ if coords_result:
181
+ orig_coords, rounded_coords = coords_result
182
+ line_endpoints[i] = (orig_coords[0], orig_coords[-1])
183
+ rounded_endpoints[i] = (rounded_coords[0], rounded_coords[-1])
184
+
185
+ # Count endpoint occurrences
186
+ endpoint_counts = {}
187
+ for start, end in rounded_endpoints.values():
188
+ endpoint_counts[start] = endpoint_counts.get(start, 0) + 1
189
+ endpoint_counts[end] = endpoint_counts.get(end, 0) + 1
190
+
191
+ # Iteratively join lines
192
+ while unused_lines:
193
+ # Start a new line chain
141
194
  current_points = []
142
195
 
143
- # Find a new starting line if needed
144
- for i in range(len(boundary_edges)):
145
- if i not in used_indices:
146
- current_line.append(boundary_edges[i])
147
- coords = get_coords(boundary_edges[i])
148
- if coords:
149
- current_points.extend(coords)
150
- used_indices.add(i)
151
- break
196
+ # Find first unused line
197
+ start_idx = unused_lines.pop()
198
+ start_coords, _ = get_coords(boundary_edges[start_idx])
199
+ if start_coords:
200
+ current_points.extend(start_coords)
152
201
 
153
- # Continue adding connected lines
154
- while True:
155
- found_next = False
156
- current_end = current_points[-1] if current_points else None
202
+ # Try to extend in both directions
203
+ continue_joining = True
204
+ while continue_joining:
205
+ continue_joining = False
206
+
207
+ # Try to extend forward
208
+ next_idx, should_reverse, found = find_connecting_line(
209
+ current_points[-1],
210
+ unused_lines,
211
+ endpoint_counts,
212
+ rounded_endpoints
213
+ )
214
+
215
+ if found:
216
+ unused_lines.remove(next_idx)
217
+ next_coords, _ = get_coords(boundary_edges[next_idx])
218
+ if next_coords:
219
+ if should_reverse:
220
+ current_points.extend(reversed(next_coords[:-1]))
221
+ else:
222
+ current_points.extend(next_coords[1:])
223
+ continue_joining = True
224
+ continue
157
225
 
158
- # Look for the next connected line
159
- for i, (start, end) in line_endpoints.items():
160
- if i not in used_indices and current_end:
161
- if start == current_end:
162
- # Add line in forward direction
163
- coords = get_coords(boundary_edges[i])
164
- if coords:
165
- current_points.extend(coords[1:]) # Skip first point to avoid duplication
166
- current_line.append(boundary_edges[i])
167
- used_indices.add(i)
168
- found_next = True
169
- break
170
- elif end == current_end:
171
- # Add line in reverse direction
172
- coords = get_coords(boundary_edges[i])
173
- if coords:
174
- current_points.extend(reversed(coords[:-1])) # Skip last point to avoid duplication
175
- current_line.append(boundary_edges[i])
176
- used_indices.add(i)
177
- found_next = True
178
- break
226
+ # Try to extend backward
227
+ prev_idx, should_reverse, found = find_connecting_line(
228
+ current_points[0],
229
+ unused_lines,
230
+ endpoint_counts,
231
+ rounded_endpoints
232
+ )
179
233
 
180
- if not found_next:
181
- break
234
+ if found:
235
+ unused_lines.remove(prev_idx)
236
+ prev_coords, _ = get_coords(boundary_edges[prev_idx])
237
+ if prev_coords:
238
+ if should_reverse:
239
+ current_points[0:0] = reversed(prev_coords[:-1])
240
+ else:
241
+ current_points[0:0] = prev_coords[:-1]
242
+ continue_joining = True
182
243
 
183
- # Create a single LineString from the collected points
244
+ # Create final LineString from collected points
184
245
  if current_points:
185
246
  joined_lines.append(LineString(current_points))
186
247
 
248
+ # FILL GAPS BETWEEN JOINED LINES
249
+ logger.info(f"Starting gap analysis for {len(joined_lines)} line segments...")
250
+
251
+ def find_endpoints(lines):
252
+ """Get all endpoints of the lines with their indices"""
253
+ endpoints = []
254
+ for i, line in enumerate(lines):
255
+ coords = list(line.coords)
256
+ endpoints.append((coords[0], i, 'start'))
257
+ endpoints.append((coords[-1], i, 'end'))
258
+ return endpoints
259
+
260
+ def find_nearby_points(point1, point2, tolerance=0.01):
261
+ """Check if two points are within tolerance distance"""
262
+ return (abs(point1[0] - point2[0]) <= tolerance and
263
+ abs(point1[1] - point2[1]) <= tolerance)
264
+
265
+ def find_gaps(lines, tolerance=0.01):
266
+ """Find gaps between line endpoints"""
267
+ logger.info("Analyzing line endpoints to identify gaps...")
268
+ endpoints = []
269
+ for i, line in enumerate(lines):
270
+ coords = list(line.coords)
271
+ start = coords[0]
272
+ end = coords[-1]
273
+ endpoints.append({
274
+ 'point': start,
275
+ 'line_idx': i,
276
+ 'position': 'start',
277
+ 'coords': coords
278
+ })
279
+ endpoints.append({
280
+ 'point': end,
281
+ 'line_idx': i,
282
+ 'position': 'end',
283
+ 'coords': coords
284
+ })
285
+
286
+ logger.info(f"Found {len(endpoints)} endpoints to analyze")
287
+ gaps = []
288
+
289
+ # Compare each endpoint with all others
290
+ for i, ep1 in enumerate(endpoints):
291
+ for ep2 in endpoints[i+1:]:
292
+ # Skip if endpoints are from same line
293
+ if ep1['line_idx'] == ep2['line_idx']:
294
+ continue
295
+
296
+ point1 = ep1['point']
297
+ point2 = ep2['point']
298
+
299
+ # Skip if points are too close (already connected)
300
+ if find_nearby_points(point1, point2):
301
+ continue
302
+
303
+ # Check if this could be a gap
304
+ dist = LineString([point1, point2]).length
305
+ if dist < 10.0: # Maximum gap distance threshold
306
+ gaps.append({
307
+ 'start': ep1,
308
+ 'end': ep2,
309
+ 'distance': dist
310
+ })
311
+
312
+ logger.info(f"Identified {len(gaps)} potential gaps to fill")
313
+ return sorted(gaps, key=lambda x: x['distance'])
314
+
315
+ def join_lines_with_gap(line1_coords, line2_coords, gap_start_pos, gap_end_pos):
316
+ """Join two lines maintaining correct point order based on gap positions"""
317
+ if gap_start_pos == 'end' and gap_end_pos == 'start':
318
+ # line1 end connects to line2 start
319
+ return line1_coords + line2_coords
320
+ elif gap_start_pos == 'start' and gap_end_pos == 'end':
321
+ # line1 start connects to line2 end
322
+ return list(reversed(line2_coords)) + line1_coords
323
+ elif gap_start_pos == 'end' and gap_end_pos == 'end':
324
+ # line1 end connects to line2 end
325
+ return line1_coords + list(reversed(line2_coords))
326
+ else: # start to start
327
+ # line1 start connects to line2 start
328
+ return list(reversed(line1_coords)) + line2_coords
329
+
330
+ # Process gaps and join lines
331
+ processed_lines = joined_lines.copy()
332
+ line_groups = [[i] for i in range(len(processed_lines))]
333
+ gaps = find_gaps(processed_lines)
334
+
335
+ filled_gap_count = 0
336
+ for gap_idx, gap in enumerate(gaps, 1):
337
+ logger.info(f"Processing gap {gap_idx}/{len(gaps)} (distance: {gap['distance']:.3f})")
338
+
339
+ line1_idx = gap['start']['line_idx']
340
+ line2_idx = gap['end']['line_idx']
341
+
342
+ # Find the groups containing these lines
343
+ group1 = next(g for g in line_groups if line1_idx in g)
344
+ group2 = next(g for g in line_groups if line2_idx in g)
345
+
346
+ # Skip if lines are already in the same group
347
+ if group1 == group2:
348
+ continue
349
+
350
+ # Get the coordinates for both lines
351
+ line1_coords = gap['start']['coords']
352
+ line2_coords = gap['end']['coords']
353
+
354
+ # Join the lines in correct order
355
+ joined_coords = join_lines_with_gap(
356
+ line1_coords,
357
+ line2_coords,
358
+ gap['start']['position'],
359
+ gap['end']['position']
360
+ )
361
+
362
+ # Create new joined line
363
+ new_line = LineString(joined_coords)
364
+
365
+ # Update processed_lines and line_groups
366
+ new_idx = len(processed_lines)
367
+ processed_lines.append(new_line)
368
+
369
+ # Merge groups and remove old ones
370
+ new_group = group1 + group2
371
+ line_groups.remove(group1)
372
+ line_groups.remove(group2)
373
+ line_groups.append(new_group + [new_idx])
374
+
375
+ filled_gap_count += 1
376
+ logger.info(f"Successfully joined lines {line1_idx} and {line2_idx}")
377
+
378
+ logger.info(f"Gap filling complete. Filled {filled_gap_count} out of {len(gaps)} gaps")
379
+
380
+ # Get final lines (take the last line from each group)
381
+ final_lines = [processed_lines[group[-1]] for group in line_groups]
382
+
383
+ logger.info(f"Final cleanup complete. Resulting in {len(final_lines)} line segments")
384
+ joined_lines = final_lines
385
+
187
386
  # Create final GeoDataFrame with CRS from cell_polygons_gdf
188
387
  logger.info("Creating final GeoDataFrame for boundaries...")
189
388
  boundary_gdf = gpd.GeoDataFrame(
@@ -285,25 +484,71 @@ class HdfFluvialPluvial:
285
484
  delta_t (float): Time threshold in hours
286
485
 
287
486
  Returns:
288
- List[LineString]: List of LineString geometries representing boundaries where
289
- adjacent cells have time differences greater than delta_t
290
-
291
- Note:
292
- Boundaries are identified where the absolute time difference between adjacent
293
- cells exceeds the specified delta_t threshold.
487
+ List[LineString]: List of LineString geometries representing boundaries
294
488
  """
489
+ # Validate cell_times data
490
+ valid_times = {k: v for k, v in cell_times.items() if pd.notna(v)}
491
+ if len(valid_times) < len(cell_times):
492
+ logger.warning(f"Found {len(cell_times) - len(valid_times)} cells with invalid timestamps")
493
+ cell_times = valid_times
494
+
495
+ # Use a set to store processed cell pairs and avoid duplicates
496
+ processed_pairs = set()
295
497
  boundary_edges = []
498
+
499
+ # Track time differences for debugging
500
+ time_diffs = []
501
+
296
502
  with tqdm(total=len(cell_adjacency), desc="Processing cell adjacencies") as pbar:
297
503
  for cell_id, neighbors in cell_adjacency.items():
504
+ if cell_id not in cell_times:
505
+ logger.debug(f"Skipping cell {cell_id} - no timestamp data")
506
+ pbar.update(1)
507
+ continue
508
+
298
509
  cell_time = cell_times[cell_id]
299
510
 
300
511
  for neighbor_id in neighbors:
512
+ if neighbor_id not in cell_times:
513
+ logger.debug(f"Skipping neighbor {neighbor_id} of cell {cell_id} - no timestamp data")
514
+ continue
515
+
516
+ # Create a sorted tuple of the cell pair to ensure uniqueness
517
+ cell_pair = tuple(sorted([cell_id, neighbor_id]))
518
+
519
+ # Skip if we've already processed this pair
520
+ if cell_pair in processed_pairs:
521
+ continue
522
+
301
523
  neighbor_time = cell_times[neighbor_id]
524
+
525
+ # Ensure both timestamps are valid
526
+ if pd.isna(cell_time) or pd.isna(neighbor_time):
527
+ continue
528
+
529
+ # Calculate time difference in hours
302
530
  time_diff = abs((cell_time - neighbor_time).total_seconds() / 3600)
531
+ time_diffs.append(time_diff)
532
+
533
+ logger.debug(f"Time difference between cells {cell_id} and {neighbor_id}: {time_diff:.2f} hours")
303
534
 
304
535
  if time_diff >= delta_t:
536
+ logger.debug(f"Found boundary edge between cells {cell_id} and {neighbor_id} "
537
+ f"(time diff: {time_diff:.2f} hours)")
305
538
  boundary_edges.append(common_edges[cell_id][neighbor_id])
539
+
540
+ # Mark this pair as processed
541
+ processed_pairs.add(cell_pair)
306
542
 
307
543
  pbar.update(1)
308
544
 
309
- return boundary_edges
545
+ # Log summary statistics
546
+ if time_diffs:
547
+ logger.info(f"Time difference statistics:")
548
+ logger.info(f" Min: {min(time_diffs):.2f} hours")
549
+ logger.info(f" Max: {max(time_diffs):.2f} hours")
550
+ logger.info(f" Mean: {sum(time_diffs)/len(time_diffs):.2f} hours")
551
+ logger.info(f" Number of boundaries found: {len(boundary_edges)}")
552
+ logger.info(f" Delta-t threshold: {delta_t} hours")
553
+
554
+ return boundary_edges