ras-commander 0.75.0__py3-none-any.whl → 0.77.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1530 +1,1530 @@
1
- """
2
- Class: HdfInfiltration
3
-
4
- A comprehensive class for handling infiltration-related operations in HEC-RAS HDF geometry files.
5
- This class provides methods for managing infiltration parameters, soil statistics, and raster data processing.
6
-
7
- Key Features:
8
- - Infiltration parameter management (scaling, setting, retrieving)
9
- - Soil statistics calculation and analysis
10
- - Raster data processing and mapping
11
- - Weighted parameter calculations
12
- - Data export and file management
13
-
14
- Methods:
15
- 1. Geometry File Base Override Management:
16
- - scale_infiltration_data(): Updates infiltration parameters with scaling factors in geometry file
17
- - get_infiltration_data(): Retrieves current infiltration parameters from geometry file
18
- - set_infiltration_table(): Sets infiltration parameters directly in geometry file
19
-
20
- 2. Raster and Mapping Operations (uses rasmap_df HDF files):
21
- - get_infiltration_map(): Reads infiltration raster map from rasmap_df HDF file
22
- - calculate_soil_statistics(): Processes zonal statistics for soil analysis
23
-
24
- 3. Soil Analysis (uses rasmap_df HDF files):
25
- - get_significant_mukeys(): Identifies mukeys above percentage threshold
26
- - calculate_total_significant_percentage(): Computes total coverage of significant mukeys
27
- - get_infiltration_parameters(): Retrieves parameters for specific mukey
28
- - calculate_weighted_parameters(): Computes weighted average parameters
29
-
30
- 4. Data Management (uses rasmap_df HDF files):
31
- - save_statistics(): Exports soil statistics to CSV
32
-
33
- Constants:
34
- - SQM_TO_ACRE: Conversion factor from square meters to acres (0.000247105)
35
- - SQM_TO_SQMILE: Conversion factor from square meters to square miles (3.861e-7)
36
-
37
- Dependencies:
38
- - pathlib: Path handling
39
- - pandas: Data manipulation
40
- - geopandas: Geospatial data processing
41
- - h5py: HDF file operations
42
- - rasterstats: Zonal statistics calculation (optional)
43
-
44
- Note:
45
- - Methods in section 1 work with base overrides in geometry files
46
- - Methods in sections 2-4 work with HDF files from rasmap_df by default
47
- - All methods are static and decorated with @standardize_input and @log_call
48
- - The class is designed to work with both HEC-RAS geometry files and rasmap_df HDF files
49
- """
50
- from pathlib import Path
51
- import h5py
52
- import numpy as np
53
- import pandas as pd
54
- from typing import Optional, Dict, Any, List, Tuple
55
- import logging
56
- from .HdfBase import HdfBase
57
- from .HdfUtils import HdfUtils
58
- from .Decorators import standardize_input, log_call
59
- from .LoggingConfig import setup_logging, get_logger
60
-
61
- logger = get_logger(__name__)
62
-
63
- from pathlib import Path
64
- import pandas as pd
65
- import geopandas as gpd
66
- import h5py
67
-
68
- from .Decorators import log_call, standardize_input
69
-
70
- class HdfInfiltration:
71
-
72
- """
73
- A class for handling infiltration-related operations on HEC-RAS HDF geometry files.
74
-
75
- This class provides methods to extract and modify infiltration data from HEC-RAS HDF geometry files,
76
- including base overrides of infiltration parameters.
77
- """
78
-
79
- # Constants for unit conversion
80
- SQM_TO_ACRE = 0.000247105
81
- SQM_TO_SQMILE = 3.861e-7
82
-
83
- def __init__(self):
84
- self.logger = logging.getLogger(__name__)
85
-
86
-
87
- @staticmethod
88
- @log_call
89
- def get_infiltration_baseoverrides(hdf_path: Path) -> Optional[pd.DataFrame]:
90
- """
91
- Retrieve current infiltration parameters from a HEC-RAS geometry HDF file.
92
- Dynamically reads whatever columns are present in the table.
93
-
94
- Parameters
95
- ----------
96
- hdf_path : Path
97
- Path to the HEC-RAS geometry HDF file
98
-
99
- Returns
100
- -------
101
- Optional[pd.DataFrame]
102
- DataFrame containing infiltration parameters if successful, None if operation fails
103
- """
104
- try:
105
- with h5py.File(hdf_path, 'r') as hdf_file:
106
- table_path = '/Geometry/Infiltration/Base Overrides'
107
- if table_path not in hdf_file:
108
- logger.warning(f"No infiltration data found in {hdf_path}")
109
- return None
110
-
111
- # Get column info
112
- col_names, _, _ = HdfInfiltration._get_table_info(hdf_file, table_path)
113
- if not col_names:
114
- logger.error(f"No columns found in infiltration table")
115
- return None
116
-
117
- # Read data
118
- data = hdf_file[table_path][()]
119
-
120
- # Convert to DataFrame
121
- df_dict = {}
122
- for col in col_names:
123
- values = data[col]
124
- # Convert byte strings to regular strings if needed
125
- if values.dtype.kind == 'S':
126
- values = [v.decode('utf-8').strip() for v in values]
127
- df_dict[col] = values
128
-
129
- return pd.DataFrame(df_dict)
130
-
131
- except Exception as e:
132
- logger.error(f"Error reading infiltration data from {hdf_path}: {str(e)}")
133
- return None
134
-
135
-
136
-
137
- # set_infiltration_baseoverrides goes here, once finalized tested and fixed.
138
-
139
-
140
-
141
- # Since the infiltration base overrides are in the geometry file, the above functions work on the geometry files
142
- # The below functions work on the infiltration layer HDF files. Changes only take effect if no base overrides are present.
143
-
144
- @staticmethod
145
- @log_call
146
- def get_infiltration_layer_data(hdf_path: Path) -> Optional[pd.DataFrame]:
147
- """
148
- Retrieve current infiltration parameters from a HEC-RAS infiltration layer HDF file.
149
- Extracts the Variables dataset which contains the layer data.
150
-
151
- Parameters
152
- ----------
153
- hdf_path : Path
154
- Path to the HEC-RAS infiltration layer HDF file
155
-
156
- Returns
157
- -------
158
- Optional[pd.DataFrame]
159
- DataFrame containing infiltration parameters if successful, None if operation fails
160
- """
161
- try:
162
- with h5py.File(hdf_path, 'r') as hdf_file:
163
- variables_path = '//Variables'
164
- if variables_path not in hdf_file:
165
- logger.warning(f"No Variables dataset found in {hdf_path}")
166
- return None
167
-
168
- # Read data from Variables dataset
169
- data = hdf_file[variables_path][()]
170
-
171
- # Convert to DataFrame
172
- df_dict = {}
173
- for field_name in data.dtype.names:
174
- values = data[field_name]
175
- # Convert byte strings to regular strings if needed
176
- if values.dtype.kind == 'S':
177
- values = [v.decode('utf-8').strip() for v in values]
178
- df_dict[field_name] = values
179
-
180
- return pd.DataFrame(df_dict)
181
-
182
- except Exception as e:
183
- logger.error(f"Error reading infiltration layer data from {hdf_path}: {str(e)}")
184
- return None
185
-
186
-
187
- @staticmethod
188
- @log_call
189
- def set_infiltration_layer_data(
190
- hdf_path: Path,
191
- infiltration_df: pd.DataFrame
192
- ) -> Optional[pd.DataFrame]:
193
- """
194
- Set infiltration layer data in the infiltration layer HDF file directly from the provided DataFrame.
195
- # NOTE: This will not work if there are base overrides present in the Geometry HDF file.
196
- Updates the Variables dataset with the provided data.
197
-
198
- Parameters
199
- ----------
200
- hdf_path : Path
201
- Path to the HEC-RAS infiltration layer HDF file
202
- infiltration_df : pd.DataFrame
203
- DataFrame containing infiltration parameters with columns:
204
- - Name (string)
205
- - Curve Number (float)
206
- - Abstraction Ratio (float)
207
- - Minimum Infiltration Rate (float)
208
-
209
- Returns
210
- -------
211
- Optional[pd.DataFrame]
212
- The infiltration DataFrame if successful, None if operation fails
213
- """
214
- try:
215
- variables_path = '//Variables'
216
-
217
- # Validate required columns
218
- required_columns = ['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate']
219
- missing_columns = [col for col in required_columns if col not in infiltration_df.columns]
220
- if missing_columns:
221
- raise ValueError(f"Missing required columns: {missing_columns}")
222
-
223
- with h5py.File(hdf_path, 'a') as hdf_file:
224
- # Delete existing dataset if it exists
225
- if variables_path in hdf_file:
226
- del hdf_file[variables_path]
227
-
228
- # Create dtype for structured array
229
- dt = np.dtype([
230
- ('Name', f'S{infiltration_df["Name"].str.len().max()}'),
231
- ('Curve Number', 'f4'),
232
- ('Abstraction Ratio', 'f4'),
233
- ('Minimum Infiltration Rate', 'f4')
234
- ])
235
-
236
- # Create structured array
237
- structured_array = np.zeros(infiltration_df.shape[0], dtype=dt)
238
-
239
- # Fill structured array
240
- structured_array['Name'] = infiltration_df['Name'].values.astype(f'|S{dt["Name"].itemsize}')
241
- structured_array['Curve Number'] = infiltration_df['Curve Number'].values
242
- structured_array['Abstraction Ratio'] = infiltration_df['Abstraction Ratio'].values
243
- structured_array['Minimum Infiltration Rate'] = infiltration_df['Minimum Infiltration Rate'].values
244
-
245
- # Create new dataset
246
- hdf_file.create_dataset(
247
- variables_path,
248
- data=structured_array,
249
- dtype=dt,
250
- compression='gzip',
251
- compression_opts=1,
252
- chunks=(100,),
253
- maxshape=(None,)
254
- )
255
-
256
- return infiltration_df
257
-
258
- except Exception as e:
259
- logger.error(f"Error setting infiltration layer data in {hdf_path}: {str(e)}")
260
- return None
261
-
262
-
263
-
264
-
265
- @staticmethod
266
- @standardize_input(file_type='geom_hdf')
267
- @log_call
268
- def scale_infiltration_data(
269
- hdf_path: Path,
270
- infiltration_df: pd.DataFrame,
271
- scale_factors: Dict[str, float]
272
- ) -> Optional[pd.DataFrame]:
273
- """
274
- Update infiltration parameters in the HDF file with scaling factors.
275
- Supports any numeric columns present in the DataFrame.
276
-
277
- Parameters
278
- ----------
279
- hdf_path : Path
280
- Path to the HEC-RAS geometry HDF file
281
- infiltration_df : pd.DataFrame
282
- DataFrame containing infiltration parameters
283
- scale_factors : Dict[str, float]
284
- Dictionary mapping column names to their scaling factors
285
-
286
- Returns
287
- -------
288
- Optional[pd.DataFrame]
289
- The updated infiltration DataFrame if successful, None if operation fails
290
- """
291
- try:
292
- # Make a copy to avoid modifying the input DataFrame
293
- infiltration_df = infiltration_df.copy()
294
-
295
- # Apply scaling factors to specified columns
296
- for col, factor in scale_factors.items():
297
- if col in infiltration_df.columns and pd.api.types.is_numeric_dtype(infiltration_df[col]):
298
- infiltration_df[col] *= factor
299
- else:
300
- logger.warning(f"Column {col} not found or not numeric - skipping scaling")
301
-
302
- # Use set_infiltration_table to write the scaled data
303
- return HdfInfiltration.set_infiltration_table(hdf_path, infiltration_df)
304
-
305
- except Exception as e:
306
- logger.error(f"Error scaling infiltration data in {hdf_path}: {str(e)}")
307
- return None
308
-
309
-
310
-
311
- # Need to reorganize these soil staatistics functions so they are more straightforward.
312
-
313
-
314
- @staticmethod
315
- @log_call
316
- @standardize_input(file_type='geom_hdf')
317
- def get_soils_raster_stats(
318
- geom_hdf_path: Path,
319
- soil_hdf_path: Path = None,
320
- ras_object: Any = None
321
- ) -> pd.DataFrame:
322
- """
323
- Calculate soil group statistics for each 2D flow area using the area's perimeter.
324
-
325
- Parameters
326
- ----------
327
- geom_hdf_path : Path
328
- Path to the HEC-RAS geometry HDF file containing the 2D flow areas
329
- soil_hdf_path : Path, optional
330
- Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
331
- ras_object : Any, optional
332
- Optional RAS object. If not provided, uses global ras instance
333
-
334
- Returns
335
- -------
336
- pd.DataFrame
337
- DataFrame with soil statistics for each 2D flow area, including:
338
- - mesh_name: Name of the 2D flow area
339
- - mukey: Soil mukey identifier
340
- - percentage: Percentage of 2D flow area covered by this soil type
341
- - area_sqm: Area in square meters
342
- - area_acres: Area in acres
343
- - area_sqmiles: Area in square miles
344
-
345
- Notes
346
- -----
347
- Requires the rasterstats package to be installed.
348
- """
349
- try:
350
- from rasterstats import zonal_stats
351
- import shapely
352
- import geopandas as gpd
353
- import numpy as np
354
- import tempfile
355
- import os
356
- except ImportError as e:
357
- logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas'")
358
- raise e
359
-
360
- # Import here to avoid circular imports
361
- from .HdfMesh import HdfMesh
362
-
363
- # Get the soil HDF path
364
- if soil_hdf_path is None:
365
- if ras_object is None:
366
- from .RasPrj import ras
367
- ras_object = ras
368
-
369
- # Try to get soil_layer_path from rasmap_df
370
- try:
371
- soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
372
- if not soil_hdf_path.exists():
373
- logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
374
- return pd.DataFrame()
375
- except (KeyError, IndexError, AttributeError, TypeError) as e:
376
- logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
377
- return pd.DataFrame()
378
-
379
- # Get infiltration map - pass as hdf_path to ensure standardize_input works correctly
380
- try:
381
- raster_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
382
- if not raster_map:
383
- logger.error(f"No infiltration map found in {soil_hdf_path}")
384
- return pd.DataFrame()
385
- except Exception as e:
386
- logger.error(f"Error getting infiltration map: {str(e)}")
387
- return pd.DataFrame()
388
-
389
- # Get 2D flow areas
390
- mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
391
- if mesh_areas.empty:
392
- logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
393
- return pd.DataFrame()
394
-
395
- # Extract the raster data for analysis
396
- tif_path = soil_hdf_path.with_suffix('.tif')
397
- if not tif_path.exists():
398
- logger.error(f"No raster file found at {tif_path}")
399
- return pd.DataFrame()
400
-
401
- # Read the raster data and info
402
- import rasterio
403
- with rasterio.open(tif_path) as src:
404
- grid_data = src.read(1)
405
-
406
- # Get transform directly from rasterio
407
- transform = src.transform
408
- no_data = src.nodata if src.nodata is not None else -9999
409
-
410
- # List to store all results
411
- all_results = []
412
-
413
- # Calculate zonal statistics for each 2D flow area
414
- for _, mesh_row in mesh_areas.iterrows():
415
- mesh_name = mesh_row['mesh_name']
416
- mesh_geom = mesh_row['geometry']
417
-
418
- # Get zonal statistics directly using numpy array
419
- try:
420
- stats = zonal_stats(
421
- mesh_geom,
422
- grid_data,
423
- affine=transform,
424
- categorical=True,
425
- nodata=no_data
426
- )[0]
427
-
428
- # Skip if no stats
429
- if not stats:
430
- logger.warning(f"No soil data found for 2D flow area: {mesh_name}")
431
- continue
432
-
433
- # Calculate total area and percentages
434
- total_area_sqm = sum(stats.values())
435
-
436
- # Process each mukey
437
- for raster_val, area_sqm in stats.items():
438
- # Skip NoData values
439
- if raster_val is None or raster_val == no_data:
440
- continue
441
-
442
- try:
443
- mukey = raster_map.get(int(raster_val), f"Unknown-{raster_val}")
444
- except (ValueError, TypeError):
445
- mukey = f"Unknown-{raster_val}"
446
-
447
- percentage = (area_sqm / total_area_sqm) * 100 if total_area_sqm > 0 else 0
448
-
449
- all_results.append({
450
- 'mesh_name': mesh_name,
451
- 'mukey': mukey,
452
- 'percentage': percentage,
453
- 'area_sqm': area_sqm,
454
- 'area_acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
455
- 'area_sqmiles': area_sqm * HdfInfiltration.SQM_TO_SQMILE
456
- })
457
- except Exception as e:
458
- logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
459
- continue
460
-
461
- # Create DataFrame with results
462
- results_df = pd.DataFrame(all_results)
463
-
464
- # Sort by mesh_name and percentage (descending)
465
- if not results_df.empty:
466
- results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
467
-
468
- return results_df
469
-
470
-
471
-
472
-
473
-
474
-
475
- @staticmethod
476
- @log_call
477
- @standardize_input(file_type='geom_hdf')
478
- def get_soil_raster_stats(
479
- geom_hdf_path: Path,
480
- landcover_hdf_path: Path = None,
481
- soil_hdf_path: Path = None,
482
- ras_object: Any = None
483
- ) -> pd.DataFrame:
484
- """
485
- Calculate combined land cover and soil infiltration statistics for each 2D flow area.
486
-
487
- This function processes both land cover and soil data to calculate statistics
488
- for each combination (Land Cover : Soil Type) within each 2D flow area.
489
-
490
- Parameters
491
- ----------
492
- geom_hdf_path : Path
493
- Path to the HEC-RAS geometry HDF file containing the 2D flow areas
494
- landcover_hdf_path : Path, optional
495
- Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
496
- soil_hdf_path : Path, optional
497
- Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
498
- ras_object : Any, optional
499
- Optional RAS object. If not provided, uses global ras instance
500
-
501
- Returns
502
- -------
503
- pd.DataFrame
504
- DataFrame with combined statistics for each 2D flow area, including:
505
- - mesh_name: Name of the 2D flow area
506
- - combined_type: Combined land cover and soil type (e.g. "Mixed Forest : B")
507
- - percentage: Percentage of 2D flow area covered by this combination
508
- - area_sqm: Area in square meters
509
- - area_acres: Area in acres
510
- - area_sqmiles: Area in square miles
511
- - curve_number: Curve number for this combination
512
- - abstraction_ratio: Abstraction ratio for this combination
513
- - min_infiltration_rate: Minimum infiltration rate for this combination
514
-
515
- Notes
516
- -----
517
- Requires the rasterstats package to be installed.
518
- """
519
- try:
520
- from rasterstats import zonal_stats
521
- import shapely
522
- import geopandas as gpd
523
- import numpy as np
524
- import tempfile
525
- import os
526
- import rasterio
527
- from rasterio.merge import merge
528
- except ImportError as e:
529
- logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
530
- raise e
531
-
532
- # Import here to avoid circular imports
533
- from .HdfMesh import HdfMesh
534
-
535
- # Get RAS object
536
- if ras_object is None:
537
- from .RasPrj import ras
538
- ras_object = ras
539
-
540
- # Get the landcover HDF path
541
- if landcover_hdf_path is None:
542
- try:
543
- landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
544
- if not landcover_hdf_path.exists():
545
- logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
546
- return pd.DataFrame()
547
- except (KeyError, IndexError, AttributeError, TypeError) as e:
548
- logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
549
- return pd.DataFrame()
550
-
551
- # Get the soil HDF path
552
- if soil_hdf_path is None:
553
- try:
554
- soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
555
- if not soil_hdf_path.exists():
556
- logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
557
- return pd.DataFrame()
558
- except (KeyError, IndexError, AttributeError, TypeError) as e:
559
- logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
560
- return pd.DataFrame()
561
-
562
- # Get land cover map (raster to ID mapping)
563
- try:
564
- with h5py.File(landcover_hdf_path, 'r') as hdf:
565
- if '//Raster Map' not in hdf:
566
- logger.error(f"No Raster Map found in {landcover_hdf_path}")
567
- return pd.DataFrame()
568
-
569
- landcover_map_data = hdf['//Raster Map'][()]
570
- landcover_map = {int(item[0]): item[1].decode('utf-8').strip() for item in landcover_map_data}
571
- except Exception as e:
572
- logger.error(f"Error reading land cover data from HDF: {str(e)}")
573
- return pd.DataFrame()
574
-
575
- # Get soil map (raster to ID mapping)
576
- try:
577
- soil_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
578
- if not soil_map:
579
- logger.error(f"No soil map found in {soil_hdf_path}")
580
- return pd.DataFrame()
581
- except Exception as e:
582
- logger.error(f"Error getting soil map: {str(e)}")
583
- return pd.DataFrame()
584
-
585
- # Get infiltration parameters
586
- try:
587
- infiltration_params = HdfInfiltration.get_infiltration_layer_data(soil_hdf_path)
588
- if infiltration_params is None or infiltration_params.empty:
589
- logger.warning(f"No infiltration parameters found in {soil_hdf_path}")
590
- infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
591
- except Exception as e:
592
- logger.error(f"Error getting infiltration parameters: {str(e)}")
593
- infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
594
-
595
- # Get 2D flow areas
596
- mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
597
- if mesh_areas.empty:
598
- logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
599
- return pd.DataFrame()
600
-
601
- # Check for the TIF files with same name as HDF
602
- landcover_tif_path = landcover_hdf_path.with_suffix('.tif')
603
- soil_tif_path = soil_hdf_path.with_suffix('.tif')
604
-
605
- if not landcover_tif_path.exists():
606
- logger.error(f"No land cover raster file found at {landcover_tif_path}")
607
- return pd.DataFrame()
608
-
609
- if not soil_tif_path.exists():
610
- logger.error(f"No soil raster file found at {soil_tif_path}")
611
- return pd.DataFrame()
612
-
613
- # List to store all results
614
- all_results = []
615
-
616
- # Read the raster data
617
- try:
618
- with rasterio.open(landcover_tif_path) as landcover_src, rasterio.open(soil_tif_path) as soil_src:
619
- landcover_nodata = landcover_src.nodata if landcover_src.nodata is not None else -9999
620
- soil_nodata = soil_src.nodata if soil_src.nodata is not None else -9999
621
-
622
- # Calculate zonal statistics for each 2D flow area
623
- for _, mesh_row in mesh_areas.iterrows():
624
- mesh_name = mesh_row['mesh_name']
625
- mesh_geom = mesh_row['geometry']
626
-
627
- # Get zonal statistics for land cover
628
- try:
629
- landcover_stats = zonal_stats(
630
- mesh_geom,
631
- landcover_tif_path,
632
- categorical=True,
633
- nodata=landcover_nodata
634
- )[0]
635
-
636
- # Get zonal statistics for soil
637
- soil_stats = zonal_stats(
638
- mesh_geom,
639
- soil_tif_path,
640
- categorical=True,
641
- nodata=soil_nodata
642
- )[0]
643
-
644
- # Skip if no stats
645
- if not landcover_stats or not soil_stats:
646
- logger.warning(f"No land cover or soil data found for 2D flow area: {mesh_name}")
647
- continue
648
-
649
- # Calculate total area
650
- landcover_total = sum(landcover_stats.values())
651
- soil_total = sum(soil_stats.values())
652
-
653
- # Create a cross-tabulation of land cover and soil types
654
- # This is an approximation since we don't have the exact pixel-by-pixel overlap
655
- mesh_area_sqm = mesh_row['geometry'].area
656
-
657
- # Calculate percentage of each land cover type
658
- landcover_pct = {k: v/landcover_total for k, v in landcover_stats.items() if k is not None and k != landcover_nodata}
659
-
660
- # Calculate percentage of each soil type
661
- soil_pct = {k: v/soil_total for k, v in soil_stats.items() if k is not None and k != soil_nodata}
662
-
663
- # Generate combinations
664
- for lc_id, lc_pct in landcover_pct.items():
665
- lc_name = landcover_map.get(int(lc_id), f"Unknown-{lc_id}")
666
-
667
- for soil_id, soil_pct in soil_pct.items():
668
- try:
669
- soil_name = soil_map.get(int(soil_id), f"Unknown-{soil_id}")
670
- except (ValueError, TypeError):
671
- soil_name = f"Unknown-{soil_id}"
672
-
673
- # Calculate combined percentage (approximate)
674
- # This is a simplification; actual overlap would require pixel-by-pixel analysis
675
- combined_pct = lc_pct * soil_pct * 100
676
- combined_area_sqm = mesh_area_sqm * (combined_pct / 100)
677
-
678
- # Create combined name
679
- combined_name = f"{lc_name} : {soil_name}"
680
-
681
- # Look up infiltration parameters
682
- param_row = infiltration_params[infiltration_params['Name'] == combined_name]
683
- if param_row.empty:
684
- # Try with NoData for soil type
685
- param_row = infiltration_params[infiltration_params['Name'] == f"{lc_name} : NoData"]
686
-
687
- if not param_row.empty:
688
- curve_number = param_row.iloc[0]['Curve Number']
689
- abstraction_ratio = param_row.iloc[0]['Abstraction Ratio']
690
- min_infiltration_rate = param_row.iloc[0]['Minimum Infiltration Rate']
691
- else:
692
- curve_number = None
693
- abstraction_ratio = None
694
- min_infiltration_rate = None
695
-
696
- all_results.append({
697
- 'mesh_name': mesh_name,
698
- 'combined_type': combined_name,
699
- 'percentage': combined_pct,
700
- 'area_sqm': combined_area_sqm,
701
- 'area_acres': combined_area_sqm * HdfInfiltration.SQM_TO_ACRE,
702
- 'area_sqmiles': combined_area_sqm * HdfInfiltration.SQM_TO_SQMILE,
703
- 'curve_number': curve_number,
704
- 'abstraction_ratio': abstraction_ratio,
705
- 'min_infiltration_rate': min_infiltration_rate
706
- })
707
- except Exception as e:
708
- logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
709
- continue
710
- except Exception as e:
711
- logger.error(f"Error opening raster files: {str(e)}")
712
- return pd.DataFrame()
713
-
714
- # Create DataFrame with results
715
- results_df = pd.DataFrame(all_results)
716
-
717
- # Sort by mesh_name, percentage (descending)
718
- if not results_df.empty:
719
- results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
720
-
721
- return results_df
722
-
723
-
724
-
725
-
726
-
727
-
728
- @staticmethod
729
- @log_call
730
- @standardize_input(file_type='geom_hdf')
731
- def get_infiltration_stats(
732
- geom_hdf_path: Path,
733
- landcover_hdf_path: Path = None,
734
- soil_hdf_path: Path = None,
735
- ras_object: Any = None
736
- ) -> pd.DataFrame:
737
- """
738
- Calculate combined land cover and soil infiltration statistics for each 2D flow area.
739
-
740
- This function processes both land cover and soil data to calculate statistics
741
- for each combination (Land Cover : Soil Type) within each 2D flow area.
742
-
743
- Parameters
744
- ----------
745
- geom_hdf_path : Path
746
- Path to the HEC-RAS geometry HDF file containing the 2D flow areas
747
- landcover_hdf_path : Path, optional
748
- Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
749
- soil_hdf_path : Path, optional
750
- Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
751
- ras_object : Any, optional
752
- Optional RAS object. If not provided, uses global ras instance
753
-
754
- Returns
755
- -------
756
- pd.DataFrame
757
- DataFrame with combined statistics for each 2D flow area, including:
758
- - mesh_name: Name of the 2D flow area
759
- - combined_type: Combined land cover and soil type (e.g. "Mixed Forest : B")
760
- - percentage: Percentage of 2D flow area covered by this combination
761
- - area_sqm: Area in square meters
762
- - area_acres: Area in acres
763
- - area_sqmiles: Area in square miles
764
- - curve_number: Curve number for this combination
765
- - abstraction_ratio: Abstraction ratio for this combination
766
- - min_infiltration_rate: Minimum infiltration rate for this combination
767
-
768
- Notes
769
- -----
770
- Requires the rasterstats package to be installed.
771
- """
772
- try:
773
- from rasterstats import zonal_stats
774
- import shapely
775
- import geopandas as gpd
776
- import numpy as np
777
- import tempfile
778
- import os
779
- import rasterio
780
- from rasterio.merge import merge
781
- except ImportError as e:
782
- logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
783
- raise e
784
-
785
- # Import here to avoid circular imports
786
- from .HdfMesh import HdfMesh
787
-
788
- # Get RAS object
789
- if ras_object is None:
790
- from .RasPrj import ras
791
- ras_object = ras
792
-
793
- # Get the landcover HDF path
794
- if landcover_hdf_path is None:
795
- try:
796
- landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
797
- if not landcover_hdf_path.exists():
798
- logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
799
- return pd.DataFrame()
800
- except (KeyError, IndexError, AttributeError, TypeError) as e:
801
- logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
802
- return pd.DataFrame()
803
-
804
- # Get the soil HDF path
805
- if soil_hdf_path is None:
806
- try:
807
- soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
808
- if not soil_hdf_path.exists():
809
- logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
810
- return pd.DataFrame()
811
- except (KeyError, IndexError, AttributeError, TypeError) as e:
812
- logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
813
- return pd.DataFrame()
814
-
815
- # Get land cover map (raster to ID mapping)
816
- try:
817
- with h5py.File(landcover_hdf_path, 'r') as hdf:
818
- if '//Raster Map' not in hdf:
819
- logger.error(f"No Raster Map found in {landcover_hdf_path}")
820
- return pd.DataFrame()
821
-
822
- landcover_map_data = hdf['//Raster Map'][()]
823
- landcover_map = {int(item[0]): item[1].decode('utf-8').strip() for item in landcover_map_data}
824
- except Exception as e:
825
- logger.error(f"Error reading land cover data from HDF: {str(e)}")
826
- return pd.DataFrame()
827
-
828
- # Get soil map (raster to ID mapping)
829
- try:
830
- soil_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
831
- if not soil_map:
832
- logger.error(f"No soil map found in {soil_hdf_path}")
833
- return pd.DataFrame()
834
- except Exception as e:
835
- logger.error(f"Error getting soil map: {str(e)}")
836
- return pd.DataFrame()
837
-
838
- # Get infiltration parameters
839
- try:
840
- infiltration_params = HdfInfiltration.get_infiltration_layer_data(soil_hdf_path)
841
- if infiltration_params is None or infiltration_params.empty:
842
- logger.warning(f"No infiltration parameters found in {soil_hdf_path}")
843
- infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
844
- except Exception as e:
845
- logger.error(f"Error getting infiltration parameters: {str(e)}")
846
- infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
847
-
848
- # Get 2D flow areas
849
- mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
850
- if mesh_areas.empty:
851
- logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
852
- return pd.DataFrame()
853
-
854
- # Check for the TIF files with same name as HDF
855
- landcover_tif_path = landcover_hdf_path.with_suffix('.tif')
856
- soil_tif_path = soil_hdf_path.with_suffix('.tif')
857
-
858
- if not landcover_tif_path.exists():
859
- logger.error(f"No land cover raster file found at {landcover_tif_path}")
860
- return pd.DataFrame()
861
-
862
- if not soil_tif_path.exists():
863
- logger.error(f"No soil raster file found at {soil_tif_path}")
864
- return pd.DataFrame()
865
-
866
- # List to store all results
867
- all_results = []
868
-
869
- # Read the raster data
870
- try:
871
- with rasterio.open(landcover_tif_path) as landcover_src, rasterio.open(soil_tif_path) as soil_src:
872
- landcover_nodata = landcover_src.nodata if landcover_src.nodata is not None else -9999
873
- soil_nodata = soil_src.nodata if soil_src.nodata is not None else -9999
874
-
875
- # Calculate zonal statistics for each 2D flow area
876
- for _, mesh_row in mesh_areas.iterrows():
877
- mesh_name = mesh_row['mesh_name']
878
- mesh_geom = mesh_row['geometry']
879
-
880
- # Get zonal statistics for land cover
881
- try:
882
- landcover_stats = zonal_stats(
883
- mesh_geom,
884
- landcover_tif_path,
885
- categorical=True,
886
- nodata=landcover_nodata
887
- )[0]
888
-
889
- # Get zonal statistics for soil
890
- soil_stats = zonal_stats(
891
- mesh_geom,
892
- soil_tif_path,
893
- categorical=True,
894
- nodata=soil_nodata
895
- )[0]
896
-
897
- # Skip if no stats
898
- if not landcover_stats or not soil_stats:
899
- logger.warning(f"No land cover or soil data found for 2D flow area: {mesh_name}")
900
- continue
901
-
902
- # Calculate total area
903
- landcover_total = sum(landcover_stats.values())
904
- soil_total = sum(soil_stats.values())
905
-
906
- # Create a cross-tabulation of land cover and soil types
907
- # This is an approximation since we don't have the exact pixel-by-pixel overlap
908
- mesh_area_sqm = mesh_row['geometry'].area
909
-
910
- # Calculate percentage of each land cover type
911
- landcover_pct = {k: v/landcover_total for k, v in landcover_stats.items() if k is not None and k != landcover_nodata}
912
-
913
- # Calculate percentage of each soil type
914
- soil_pct = {k: v/soil_total for k, v in soil_stats.items() if k is not None and k != soil_nodata}
915
-
916
- # Generate combinations
917
- for lc_id, lc_pct in landcover_pct.items():
918
- lc_name = landcover_map.get(int(lc_id), f"Unknown-{lc_id}")
919
-
920
- for soil_id, soil_pct in soil_pct.items():
921
- try:
922
- soil_name = soil_map.get(int(soil_id), f"Unknown-{soil_id}")
923
- except (ValueError, TypeError):
924
- soil_name = f"Unknown-{soil_id}"
925
-
926
- # Calculate combined percentage (approximate)
927
- # This is a simplification; actual overlap would require pixel-by-pixel analysis
928
- combined_pct = lc_pct * soil_pct * 100
929
- combined_area_sqm = mesh_area_sqm * (combined_pct / 100)
930
-
931
- # Create combined name
932
- combined_name = f"{lc_name} : {soil_name}"
933
-
934
- # Look up infiltration parameters
935
- param_row = infiltration_params[infiltration_params['Name'] == combined_name]
936
- if param_row.empty:
937
- # Try with NoData for soil type
938
- param_row = infiltration_params[infiltration_params['Name'] == f"{lc_name} : NoData"]
939
-
940
- if not param_row.empty:
941
- curve_number = param_row.iloc[0]['Curve Number']
942
- abstraction_ratio = param_row.iloc[0]['Abstraction Ratio']
943
- min_infiltration_rate = param_row.iloc[0]['Minimum Infiltration Rate']
944
- else:
945
- curve_number = None
946
- abstraction_ratio = None
947
- min_infiltration_rate = None
948
-
949
- all_results.append({
950
- 'mesh_name': mesh_name,
951
- 'combined_type': combined_name,
952
- 'percentage': combined_pct,
953
- 'area_sqm': combined_area_sqm,
954
- 'area_acres': combined_area_sqm * HdfInfiltration.SQM_TO_ACRE,
955
- 'area_sqmiles': combined_area_sqm * HdfInfiltration.SQM_TO_SQMILE,
956
- 'curve_number': curve_number,
957
- 'abstraction_ratio': abstraction_ratio,
958
- 'min_infiltration_rate': min_infiltration_rate
959
- })
960
- except Exception as e:
961
- logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
962
- continue
963
- except Exception as e:
964
- logger.error(f"Error opening raster files: {str(e)}")
965
- return pd.DataFrame()
966
-
967
- # Create DataFrame with results
968
- results_df = pd.DataFrame(all_results)
969
-
970
- # Sort by mesh_name, percentage (descending)
971
- if not results_df.empty:
972
- results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
973
-
974
- return results_df
975
-
976
-
977
-
978
-
979
-
980
-
981
-
982
-
983
-
984
-
985
-
986
-
987
-
988
-
989
-
990
-
991
-
992
-
993
-
994
- @staticmethod
995
- @log_call
996
- @standardize_input(file_type='geom_hdf')
997
- def get_infiltration_map(hdf_path: Path = None, ras_object: Any = None) -> dict:
998
- """Read the infiltration raster map from HDF file
999
-
1000
- Args:
1001
- hdf_path: Optional path to the HDF file. If not provided, uses first infiltration_hdf_path from rasmap_df
1002
- ras_object: Optional RAS object. If not provided, uses global ras instance
1003
-
1004
- Returns:
1005
- Dictionary mapping raster values to mukeys
1006
- """
1007
- if hdf_path is None:
1008
- if ras_object is None:
1009
- from .RasPrj import ras
1010
- ras_object = ras
1011
- hdf_path = Path(ras_object.rasmap_df.iloc[0]['infiltration_hdf_path'][0])
1012
-
1013
- with h5py.File(hdf_path, 'r') as hdf:
1014
- raster_map_data = hdf['Raster Map'][:]
1015
- return {int(item[0]): item[1].decode('utf-8') for item in raster_map_data}
1016
-
1017
- @staticmethod
1018
- @log_call
1019
- def calculate_soil_statistics(zonal_stats: list, raster_map: dict) -> pd.DataFrame:
1020
- """Calculate soil statistics from zonal statistics
1021
-
1022
- Args:
1023
- zonal_stats: List of zonal statistics
1024
- raster_map: Dictionary mapping raster values to mukeys
1025
-
1026
- Returns:
1027
- DataFrame with soil statistics including percentages and areas
1028
- """
1029
-
1030
- try:
1031
- from rasterstats import zonal_stats
1032
- except ImportError as e:
1033
- logger.error("Failed to import rasterstats. Please run 'pip install rasterstats' and try again.")
1034
- raise e
1035
- # Initialize areas dictionary
1036
- mukey_areas = {mukey: 0 for mukey in raster_map.values()}
1037
-
1038
- # Calculate total area and mukey areas
1039
- total_area_sqm = 0
1040
- for stat in zonal_stats:
1041
- for raster_val, area in stat.items():
1042
- mukey = raster_map.get(raster_val)
1043
- if mukey:
1044
- mukey_areas[mukey] += area
1045
- total_area_sqm += area
1046
-
1047
- # Create DataFrame rows
1048
- rows = []
1049
- for mukey, area_sqm in mukey_areas.items():
1050
- if area_sqm > 0:
1051
- rows.append({
1052
- 'mukey': mukey,
1053
- 'Percentage': (area_sqm / total_area_sqm) * 100,
1054
- 'Area in Acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
1055
- 'Area in Square Miles': area_sqm * HdfInfiltration.SQM_TO_SQMILE
1056
- })
1057
-
1058
- return pd.DataFrame(rows)
1059
-
1060
- @staticmethod
1061
- @log_call
1062
- def get_significant_mukeys(soil_stats: pd.DataFrame,
1063
- threshold: float = 1.0) -> pd.DataFrame:
1064
- """Get mukeys with percentage greater than threshold
1065
-
1066
- Args:
1067
- soil_stats: DataFrame with soil statistics
1068
- threshold: Minimum percentage threshold (default 1.0)
1069
-
1070
- Returns:
1071
- DataFrame with significant mukeys and their statistics
1072
- """
1073
- significant = soil_stats[soil_stats['Percentage'] > threshold].copy()
1074
- significant.sort_values('Percentage', ascending=False, inplace=True)
1075
- return significant
1076
-
1077
- @staticmethod
1078
- @log_call
1079
- def calculate_total_significant_percentage(significant_mukeys: pd.DataFrame) -> float:
1080
- """Calculate total percentage covered by significant mukeys
1081
-
1082
- Args:
1083
- significant_mukeys: DataFrame of significant mukeys
1084
-
1085
- Returns:
1086
- Total percentage covered by significant mukeys
1087
- """
1088
- return significant_mukeys['Percentage'].sum()
1089
-
1090
- @staticmethod
1091
- @log_call
1092
- def save_statistics(soil_stats: pd.DataFrame, output_path: Path,
1093
- include_timestamp: bool = True):
1094
- """Save soil statistics to CSV
1095
-
1096
- Args:
1097
- soil_stats: DataFrame with soil statistics
1098
- output_path: Path to save CSV file
1099
- include_timestamp: Whether to include timestamp in filename
1100
- """
1101
- if include_timestamp:
1102
- timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
1103
- output_path = output_path.with_name(
1104
- f"{output_path.stem}_{timestamp}{output_path.suffix}")
1105
-
1106
- soil_stats.to_csv(output_path, index=False)
1107
-
1108
- @staticmethod
1109
- @log_call
1110
- @standardize_input
1111
- def get_infiltration_parameters(hdf_path: Path = None, mukey: str = None, ras_object: Any = None) -> dict:
1112
- """Get infiltration parameters for a specific mukey from HDF file
1113
-
1114
- Args:
1115
- hdf_path: Optional path to the HDF file. If not provided, uses first infiltration_hdf_path from rasmap_df
1116
- mukey: Mukey identifier
1117
- ras_object: Optional RAS object. If not provided, uses global ras instance
1118
-
1119
- Returns:
1120
- Dictionary of infiltration parameters
1121
- """
1122
- if hdf_path is None:
1123
- if ras_object is None:
1124
- from .RasPrj import ras
1125
- ras_object = ras
1126
- hdf_path = Path(ras_object.rasmap_df.iloc[0]['infiltration_hdf_path'][0])
1127
-
1128
- with h5py.File(hdf_path, 'r') as hdf:
1129
- if 'Infiltration Parameters' not in hdf:
1130
- raise KeyError("No infiltration parameters found in HDF file")
1131
-
1132
- params = hdf['Infiltration Parameters'][:]
1133
- for row in params:
1134
- if row[0].decode('utf-8') == mukey:
1135
- return {
1136
- 'Initial Loss (in)': float(row[1]),
1137
- 'Constant Loss Rate (in/hr)': float(row[2]),
1138
- 'Impervious Area (%)': float(row[3])
1139
- }
1140
- return None
1141
-
1142
- @staticmethod
1143
- @log_call
1144
- def calculate_weighted_parameters(soil_stats: pd.DataFrame,
1145
- infiltration_params: dict) -> dict:
1146
- """Calculate weighted infiltration parameters based on soil statistics
1147
-
1148
- Args:
1149
- soil_stats: DataFrame with soil statistics
1150
- infiltration_params: Dictionary of infiltration parameters by mukey
1151
-
1152
- Returns:
1153
- Dictionary of weighted average infiltration parameters
1154
- """
1155
- total_weight = soil_stats['Percentage'].sum()
1156
-
1157
- weighted_params = {
1158
- 'Initial Loss (in)': 0.0,
1159
- 'Constant Loss Rate (in/hr)': 0.0,
1160
- 'Impervious Area (%)': 0.0
1161
- }
1162
-
1163
- for _, row in soil_stats.iterrows():
1164
- mukey = row['mukey']
1165
- weight = row['Percentage'] / total_weight
1166
-
1167
- if mukey in infiltration_params:
1168
- for param in weighted_params:
1169
- weighted_params[param] += (
1170
- infiltration_params[mukey][param] * weight
1171
- )
1172
-
1173
- return weighted_params
1174
-
1175
-
1176
- @staticmethod
1177
- def _get_table_info(hdf_file: h5py.File, table_path: str) -> Tuple[List[str], List[str], List[str]]:
1178
- """Get column names and types from HDF table
1179
-
1180
- Args:
1181
- hdf_file: Open HDF file object
1182
- table_path: Path to table in HDF file
1183
-
1184
- Returns:
1185
- Tuple of (column names, numpy dtypes, column descriptions)
1186
- """
1187
- if table_path not in hdf_file:
1188
- return [], [], []
1189
-
1190
- dataset = hdf_file[table_path]
1191
- dtype = dataset.dtype
1192
-
1193
- # Extract column names and types
1194
- col_names = []
1195
- col_types = []
1196
- col_descs = []
1197
-
1198
- for name in dtype.names:
1199
- col_names.append(name)
1200
- col_types.append(dtype[name].str)
1201
- col_descs.append(name) # Could be enhanced to get actual descriptions
1202
-
1203
- return col_names, col_types, col_descs
1204
-
1205
-
1206
- @staticmethod
1207
- @log_call
1208
- @standardize_input(file_type='geom_hdf')
1209
- def get_landcover_raster_stats(
1210
- geom_hdf_path: Path,
1211
- landcover_hdf_path: Path = None,
1212
- ras_object: Any = None
1213
- ) -> pd.DataFrame:
1214
- """
1215
- Calculate land cover statistics for each 2D flow area using the area's perimeter.
1216
-
1217
- Parameters
1218
- ----------
1219
- geom_hdf_path : Path
1220
- Path to the HEC-RAS geometry HDF file containing the 2D flow areas
1221
- landcover_hdf_path : Path, optional
1222
- Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
1223
- ras_object : Any, optional
1224
- Optional RAS object. If not provided, uses global ras instance
1225
-
1226
- Returns
1227
- -------
1228
- pd.DataFrame
1229
- DataFrame with land cover statistics for each 2D flow area, including:
1230
- - mesh_name: Name of the 2D flow area
1231
- - land_cover: Land cover classification name
1232
- - percentage: Percentage of 2D flow area covered by this land cover type
1233
- - area_sqm: Area in square meters
1234
- - area_acres: Area in acres
1235
- - area_sqmiles: Area in square miles
1236
- - mannings_n: Manning's n value for this land cover type
1237
- - percent_impervious: Percent impervious for this land cover type
1238
-
1239
- Notes
1240
- -----
1241
- Requires the rasterstats package to be installed.
1242
- """
1243
- try:
1244
- from rasterstats import zonal_stats
1245
- import shapely
1246
- import geopandas as gpd
1247
- import numpy as np
1248
- import tempfile
1249
- import os
1250
- import rasterio
1251
- except ImportError as e:
1252
- logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
1253
- raise e
1254
-
1255
- # Import here to avoid circular imports
1256
- from .HdfMesh import HdfMesh
1257
-
1258
- # Get the landcover HDF path
1259
- if landcover_hdf_path is None:
1260
- if ras_object is None:
1261
- from .RasPrj import ras
1262
- ras_object = ras
1263
-
1264
- # Try to get landcover_hdf_path from rasmap_df
1265
- try:
1266
- landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
1267
- if not landcover_hdf_path.exists():
1268
- logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
1269
- return pd.DataFrame()
1270
- except (KeyError, IndexError, AttributeError, TypeError) as e:
1271
- logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
1272
- return pd.DataFrame()
1273
-
1274
- # Get land cover map (raster to ID mapping)
1275
- try:
1276
- with h5py.File(landcover_hdf_path, 'r') as hdf:
1277
- if '//Raster Map' not in hdf:
1278
- logger.error(f"No Raster Map found in {landcover_hdf_path}")
1279
- return pd.DataFrame()
1280
-
1281
- raster_map_data = hdf['//Raster Map'][()]
1282
- raster_map = {int(item[0]): item[1].decode('utf-8').strip() for item in raster_map_data}
1283
-
1284
- # Get land cover variables (mannings_n and percent_impervious)
1285
- variables = {}
1286
- if '//Variables' in hdf:
1287
- var_data = hdf['//Variables'][()]
1288
- for row in var_data:
1289
- name = row[0].decode('utf-8').strip()
1290
- mannings_n = float(row[1])
1291
- percent_impervious = float(row[2])
1292
- variables[name] = {
1293
- 'mannings_n': mannings_n,
1294
- 'percent_impervious': percent_impervious
1295
- }
1296
- except Exception as e:
1297
- logger.error(f"Error reading land cover data from HDF: {str(e)}")
1298
- return pd.DataFrame()
1299
-
1300
- # Get 2D flow areas
1301
- mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
1302
- if mesh_areas.empty:
1303
- logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
1304
- return pd.DataFrame()
1305
-
1306
- # Check for the TIF file with same name as HDF
1307
- tif_path = landcover_hdf_path.with_suffix('.tif')
1308
- if not tif_path.exists():
1309
- logger.error(f"No raster file found at {tif_path}")
1310
- return pd.DataFrame()
1311
-
1312
- # List to store all results
1313
- all_results = []
1314
-
1315
- # Read the raster data and info
1316
- try:
1317
- with rasterio.open(tif_path) as src:
1318
- # Get transform directly from rasterio
1319
- transform = src.transform
1320
- no_data = src.nodata if src.nodata is not None else -9999
1321
-
1322
- # Calculate zonal statistics for each 2D flow area
1323
- for _, mesh_row in mesh_areas.iterrows():
1324
- mesh_name = mesh_row['mesh_name']
1325
- mesh_geom = mesh_row['geometry']
1326
-
1327
- # Get zonal statistics directly using rasterio grid
1328
- try:
1329
- stats = zonal_stats(
1330
- mesh_geom,
1331
- tif_path,
1332
- categorical=True,
1333
- nodata=no_data
1334
- )[0]
1335
-
1336
- # Skip if no stats
1337
- if not stats:
1338
- logger.warning(f"No land cover data found for 2D flow area: {mesh_name}")
1339
- continue
1340
-
1341
- # Calculate total area and percentages
1342
- total_area_sqm = sum(stats.values())
1343
-
1344
- # Process each land cover type
1345
- for raster_val, area_sqm in stats.items():
1346
- # Skip NoData values
1347
- if raster_val is None or raster_val == no_data:
1348
- continue
1349
-
1350
- try:
1351
- # Get land cover name from raster map
1352
- land_cover = raster_map.get(int(raster_val), f"Unknown-{raster_val}")
1353
-
1354
- # Get Manning's n and percent impervious
1355
- mannings_n = variables.get(land_cover, {}).get('mannings_n', None)
1356
- percent_impervious = variables.get(land_cover, {}).get('percent_impervious', None)
1357
-
1358
- percentage = (area_sqm / total_area_sqm) * 100 if total_area_sqm > 0 else 0
1359
-
1360
- all_results.append({
1361
- 'mesh_name': mesh_name,
1362
- 'land_cover': land_cover,
1363
- 'percentage': percentage,
1364
- 'area_sqm': area_sqm,
1365
- 'area_acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
1366
- 'area_sqmiles': area_sqm * HdfInfiltration.SQM_TO_SQMILE,
1367
- 'mannings_n': mannings_n,
1368
- 'percent_impervious': percent_impervious
1369
- })
1370
- except Exception as e:
1371
- logger.warning(f"Error processing raster value {raster_val}: {e}")
1372
- continue
1373
- except Exception as e:
1374
- logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
1375
- continue
1376
- except Exception as e:
1377
- logger.error(f"Error opening raster file {tif_path}: {str(e)}")
1378
- return pd.DataFrame()
1379
-
1380
- # Create DataFrame with results
1381
- results_df = pd.DataFrame(all_results)
1382
-
1383
- # Sort by mesh_name, percentage (descending)
1384
- if not results_df.empty:
1385
- results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
1386
-
1387
- return results_df
1388
-
1389
-
1390
-
1391
- '''
1392
-
1393
- THIS FUNCTION IS VERY CLOSE BUT DOES NOT WORK BECAUSE IT DOES NOT PRESERVE THE EXACT STRUCTURE OF THE HDF FILE.
1394
- WHEN RAS LOADS THE HDF, IT IGNORES THE DATA IN THE TABLE AND REPLACES IT WITH NULLS.
1395
-
1396
-
1397
- @staticmethod
1398
- @log_call
1399
- def set_infiltration_baseoverrides(
1400
- hdf_path: Path,
1401
- infiltration_df: pd.DataFrame
1402
- ) -> Optional[pd.DataFrame]:
1403
- """
1404
- Set base overrides for infiltration parameters in the HDF file while preserving
1405
- the exact structure of the existing dataset.
1406
-
1407
- This function ensures that the HDF structure is maintained exactly as in the
1408
- original file, including field names, data types, and string lengths. It updates
1409
- the values while preserving all dataset attributes.
1410
-
1411
- Parameters
1412
- ----------
1413
- hdf_path : Path
1414
- Path to the HEC-RAS geometry HDF file
1415
- infiltration_df : pd.DataFrame
1416
- DataFrame containing infiltration parameters with columns matching HDF structure.
1417
- The first column should be 'Name' or 'Land Cover Name'.
1418
-
1419
- Returns
1420
- -------
1421
- Optional[pd.DataFrame]
1422
- The infiltration DataFrame if successful, None if operation fails
1423
- """
1424
- try:
1425
- # Make a copy to avoid modifying the input DataFrame
1426
- infiltration_df = infiltration_df.copy()
1427
-
1428
- # Check for and rename the first column if needed
1429
- if "Land Cover Name" in infiltration_df.columns:
1430
- name_col = "Land Cover Name"
1431
- else:
1432
- name_col = "Name"
1433
- # Rename 'Name' to 'Land Cover Name' for HDF dataset
1434
- infiltration_df = infiltration_df.rename(columns={"Name": "Land Cover Name"})
1435
-
1436
- table_path = '/Geometry/Infiltration/Base Overrides'
1437
-
1438
- with h5py.File(hdf_path, 'r') as hdf_file_read:
1439
- # Check if dataset exists
1440
- if table_path not in hdf_file_read:
1441
- logger.warning(f"No infiltration data found in {hdf_path}. Creating new dataset.")
1442
- # If dataset doesn't exist, use the standard set_infiltration_baseoverrides method
1443
- return HdfInfiltration.set_infiltration_baseoverrides(hdf_path, infiltration_df)
1444
-
1445
- # Get the exact dtype of the existing dataset
1446
- existing_dtype = hdf_file_read[table_path].dtype
1447
-
1448
- # Extract column names from the existing dataset
1449
- existing_columns = existing_dtype.names
1450
-
1451
- # Check if all columns in the DataFrame exist in the HDF dataset
1452
- for col in infiltration_df.columns:
1453
- hdf_col = col
1454
- if col == "Name" and "Land Cover Name" in existing_columns:
1455
- hdf_col = "Land Cover Name"
1456
-
1457
- if hdf_col not in existing_columns:
1458
- logger.warning(f"Column {col} not found in existing dataset - it will be ignored")
1459
-
1460
- # Get current dataset to preserve structure for non-updated fields
1461
- existing_data = hdf_file_read[table_path][()]
1462
-
1463
- # Create a structured array with the exact same dtype as the existing dataset
1464
- structured_array = np.zeros(len(infiltration_df), dtype=existing_dtype)
1465
-
1466
- # Copy data from DataFrame to structured array, preserving existing structure
1467
- for col in existing_columns:
1468
- df_col = col
1469
- # Map 'Land Cover Name' to 'Name' if needed
1470
- if col == "Land Cover Name" and name_col == "Name":
1471
- df_col = "Name"
1472
-
1473
- if df_col in infiltration_df.columns:
1474
- # Handle string fields - need to maintain exact string length
1475
- if existing_dtype[col].kind == 'S':
1476
- # Get the exact string length from dtype
1477
- max_str_len = existing_dtype[col].itemsize
1478
- # Convert to bytes with correct length
1479
- structured_array[col] = infiltration_df[df_col].astype(str).values.astype(f'|S{max_str_len}')
1480
- else:
1481
- # Handle numeric fields - ensure correct numeric type
1482
- if existing_dtype[col].kind in ('f', 'i'):
1483
- structured_array[col] = infiltration_df[df_col].values.astype(existing_dtype[col])
1484
- else:
1485
- # For any other type, just copy as is
1486
- structured_array[col] = infiltration_df[df_col].values
1487
- else:
1488
- logger.warning(f"Column {col} not in DataFrame - using default values")
1489
- # Use zeros for numeric fields or empty strings for string fields
1490
- if existing_dtype[col].kind == 'S':
1491
- structured_array[col] = np.array([''] * len(infiltration_df), dtype=f'|S{existing_dtype[col].itemsize}')
1492
-
1493
- # Write back to HDF file
1494
- with h5py.File(hdf_path, 'a') as hdf_file_write:
1495
- # Delete existing dataset
1496
- if table_path in hdf_file_write:
1497
- del hdf_file_write[table_path]
1498
-
1499
- # Create new dataset with exact same properties as original
1500
- dataset = hdf_file_write.create_dataset(
1501
- table_path,
1502
- data=structured_array,
1503
- dtype=existing_dtype,
1504
- compression='gzip',
1505
- compression_opts=1,
1506
- chunks=(100,),
1507
- maxshape=(None,)
1508
- )
1509
-
1510
- # Return the DataFrame with columns matching what was actually written
1511
- result_df = pd.DataFrame()
1512
- for col in existing_columns:
1513
- if existing_dtype[col].kind == 'S':
1514
- # Convert bytes back to string
1515
- result_df[col] = [val.decode('utf-8').strip() for val in structured_array[col]]
1516
- else:
1517
- result_df[col] = structured_array[col]
1518
-
1519
- return result_df
1520
-
1521
- except Exception as e:
1522
- logger.error(f"Error setting infiltration data in {hdf_path}: {str(e)}")
1523
- return None
1524
-
1525
-
1526
-
1527
-
1528
-
1529
-
1
+ """
2
+ Class: HdfInfiltration
3
+
4
+ A comprehensive class for handling infiltration-related operations in HEC-RAS HDF geometry files.
5
+ This class provides methods for managing infiltration parameters, soil statistics, and raster data processing.
6
+
7
+ Key Features:
8
+ - Infiltration parameter management (scaling, setting, retrieving)
9
+ - Soil statistics calculation and analysis
10
+ - Raster data processing and mapping
11
+ - Weighted parameter calculations
12
+ - Data export and file management
13
+
14
+ Methods:
15
+ 1. Geometry File Base Override Management:
16
+ - scale_infiltration_data(): Updates infiltration parameters with scaling factors in geometry file
17
+ - get_infiltration_data(): Retrieves current infiltration parameters from geometry file
18
+ - set_infiltration_table(): Sets infiltration parameters directly in geometry file
19
+
20
+ 2. Raster and Mapping Operations (uses rasmap_df HDF files):
21
+ - get_infiltration_map(): Reads infiltration raster map from rasmap_df HDF file
22
+ - calculate_soil_statistics(): Processes zonal statistics for soil analysis
23
+
24
+ 3. Soil Analysis (uses rasmap_df HDF files):
25
+ - get_significant_mukeys(): Identifies mukeys above percentage threshold
26
+ - calculate_total_significant_percentage(): Computes total coverage of significant mukeys
27
+ - get_infiltration_parameters(): Retrieves parameters for specific mukey
28
+ - calculate_weighted_parameters(): Computes weighted average parameters
29
+
30
+ 4. Data Management (uses rasmap_df HDF files):
31
+ - save_statistics(): Exports soil statistics to CSV
32
+
33
+ Constants:
34
+ - SQM_TO_ACRE: Conversion factor from square meters to acres (0.000247105)
35
+ - SQM_TO_SQMILE: Conversion factor from square meters to square miles (3.861e-7)
36
+
37
+ Dependencies:
38
+ - pathlib: Path handling
39
+ - pandas: Data manipulation
40
+ - geopandas: Geospatial data processing
41
+ - h5py: HDF file operations
42
+ - rasterstats: Zonal statistics calculation (optional)
43
+
44
+ Note:
45
+ - Methods in section 1 work with base overrides in geometry files
46
+ - Methods in sections 2-4 work with HDF files from rasmap_df by default
47
+ - All methods are static and decorated with @standardize_input and @log_call
48
+ - The class is designed to work with both HEC-RAS geometry files and rasmap_df HDF files
49
+ """
50
+ from pathlib import Path
51
+ import h5py
52
+ import numpy as np
53
+ import pandas as pd
54
+ from typing import Optional, Dict, Any, List, Tuple
55
+ import logging
56
+ from .HdfBase import HdfBase
57
+ from .HdfUtils import HdfUtils
58
+ from .Decorators import standardize_input, log_call
59
+ from .LoggingConfig import setup_logging, get_logger
60
+
61
+ logger = get_logger(__name__)
62
+
63
+ from pathlib import Path
64
+ import pandas as pd
65
+ import geopandas as gpd
66
+ import h5py
67
+
68
+ from .Decorators import log_call, standardize_input
69
+
70
+ class HdfInfiltration:
71
+
72
+ """
73
+ A class for handling infiltration-related operations on HEC-RAS HDF geometry files.
74
+
75
+ This class provides methods to extract and modify infiltration data from HEC-RAS HDF geometry files,
76
+ including base overrides of infiltration parameters.
77
+ """
78
+
79
+ # Constants for unit conversion
80
+ SQM_TO_ACRE = 0.000247105
81
+ SQM_TO_SQMILE = 3.861e-7
82
+
83
+ def __init__(self):
84
+ self.logger = logging.getLogger(__name__)
85
+
86
+
87
+ @staticmethod
88
+ @log_call
89
+ def get_infiltration_baseoverrides(hdf_path: Path) -> Optional[pd.DataFrame]:
90
+ """
91
+ Retrieve current infiltration parameters from a HEC-RAS geometry HDF file.
92
+ Dynamically reads whatever columns are present in the table.
93
+
94
+ Parameters
95
+ ----------
96
+ hdf_path : Path
97
+ Path to the HEC-RAS geometry HDF file
98
+
99
+ Returns
100
+ -------
101
+ Optional[pd.DataFrame]
102
+ DataFrame containing infiltration parameters if successful, None if operation fails
103
+ """
104
+ try:
105
+ with h5py.File(hdf_path, 'r') as hdf_file:
106
+ table_path = '/Geometry/Infiltration/Base Overrides'
107
+ if table_path not in hdf_file:
108
+ logger.warning(f"No infiltration data found in {hdf_path}")
109
+ return None
110
+
111
+ # Get column info
112
+ col_names, _, _ = HdfInfiltration._get_table_info(hdf_file, table_path)
113
+ if not col_names:
114
+ logger.error(f"No columns found in infiltration table")
115
+ return None
116
+
117
+ # Read data
118
+ data = hdf_file[table_path][()]
119
+
120
+ # Convert to DataFrame
121
+ df_dict = {}
122
+ for col in col_names:
123
+ values = data[col]
124
+ # Convert byte strings to regular strings if needed
125
+ if values.dtype.kind == 'S':
126
+ values = [v.decode('utf-8').strip() for v in values]
127
+ df_dict[col] = values
128
+
129
+ return pd.DataFrame(df_dict)
130
+
131
+ except Exception as e:
132
+ logger.error(f"Error reading infiltration data from {hdf_path}: {str(e)}")
133
+ return None
134
+
135
+
136
+
137
+ # set_infiltration_baseoverrides goes here, once finalized tested and fixed.
138
+
139
+
140
+
141
+ # Since the infiltration base overrides are in the geometry file, the above functions work on the geometry files
142
+ # The below functions work on the infiltration layer HDF files. Changes only take effect if no base overrides are present.
143
+
144
+ @staticmethod
145
+ @log_call
146
+ def get_infiltration_layer_data(hdf_path: Path) -> Optional[pd.DataFrame]:
147
+ """
148
+ Retrieve current infiltration parameters from a HEC-RAS infiltration layer HDF file.
149
+ Extracts the Variables dataset which contains the layer data.
150
+
151
+ Parameters
152
+ ----------
153
+ hdf_path : Path
154
+ Path to the HEC-RAS infiltration layer HDF file
155
+
156
+ Returns
157
+ -------
158
+ Optional[pd.DataFrame]
159
+ DataFrame containing infiltration parameters if successful, None if operation fails
160
+ """
161
+ try:
162
+ with h5py.File(hdf_path, 'r') as hdf_file:
163
+ variables_path = '//Variables'
164
+ if variables_path not in hdf_file:
165
+ logger.warning(f"No Variables dataset found in {hdf_path}")
166
+ return None
167
+
168
+ # Read data from Variables dataset
169
+ data = hdf_file[variables_path][()]
170
+
171
+ # Convert to DataFrame
172
+ df_dict = {}
173
+ for field_name in data.dtype.names:
174
+ values = data[field_name]
175
+ # Convert byte strings to regular strings if needed
176
+ if values.dtype.kind == 'S':
177
+ values = [v.decode('utf-8').strip() for v in values]
178
+ df_dict[field_name] = values
179
+
180
+ return pd.DataFrame(df_dict)
181
+
182
+ except Exception as e:
183
+ logger.error(f"Error reading infiltration layer data from {hdf_path}: {str(e)}")
184
+ return None
185
+
186
+
187
+ @staticmethod
188
+ @log_call
189
+ def set_infiltration_layer_data(
190
+ hdf_path: Path,
191
+ infiltration_df: pd.DataFrame
192
+ ) -> Optional[pd.DataFrame]:
193
+ """
194
+ Set infiltration layer data in the infiltration layer HDF file directly from the provided DataFrame.
195
+ # NOTE: This will not work if there are base overrides present in the Geometry HDF file.
196
+ Updates the Variables dataset with the provided data.
197
+
198
+ Parameters
199
+ ----------
200
+ hdf_path : Path
201
+ Path to the HEC-RAS infiltration layer HDF file
202
+ infiltration_df : pd.DataFrame
203
+ DataFrame containing infiltration parameters with columns:
204
+ - Name (string)
205
+ - Curve Number (float)
206
+ - Abstraction Ratio (float)
207
+ - Minimum Infiltration Rate (float)
208
+
209
+ Returns
210
+ -------
211
+ Optional[pd.DataFrame]
212
+ The infiltration DataFrame if successful, None if operation fails
213
+ """
214
+ try:
215
+ variables_path = '//Variables'
216
+
217
+ # Validate required columns
218
+ required_columns = ['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate']
219
+ missing_columns = [col for col in required_columns if col not in infiltration_df.columns]
220
+ if missing_columns:
221
+ raise ValueError(f"Missing required columns: {missing_columns}")
222
+
223
+ with h5py.File(hdf_path, 'a') as hdf_file:
224
+ # Delete existing dataset if it exists
225
+ if variables_path in hdf_file:
226
+ del hdf_file[variables_path]
227
+
228
+ # Create dtype for structured array
229
+ dt = np.dtype([
230
+ ('Name', f'S{infiltration_df["Name"].str.len().max()}'),
231
+ ('Curve Number', 'f4'),
232
+ ('Abstraction Ratio', 'f4'),
233
+ ('Minimum Infiltration Rate', 'f4')
234
+ ])
235
+
236
+ # Create structured array
237
+ structured_array = np.zeros(infiltration_df.shape[0], dtype=dt)
238
+
239
+ # Fill structured array
240
+ structured_array['Name'] = infiltration_df['Name'].values.astype(f'|S{dt["Name"].itemsize}')
241
+ structured_array['Curve Number'] = infiltration_df['Curve Number'].values
242
+ structured_array['Abstraction Ratio'] = infiltration_df['Abstraction Ratio'].values
243
+ structured_array['Minimum Infiltration Rate'] = infiltration_df['Minimum Infiltration Rate'].values
244
+
245
+ # Create new dataset
246
+ hdf_file.create_dataset(
247
+ variables_path,
248
+ data=structured_array,
249
+ dtype=dt,
250
+ compression='gzip',
251
+ compression_opts=1,
252
+ chunks=(100,),
253
+ maxshape=(None,)
254
+ )
255
+
256
+ return infiltration_df
257
+
258
+ except Exception as e:
259
+ logger.error(f"Error setting infiltration layer data in {hdf_path}: {str(e)}")
260
+ return None
261
+
262
+
263
+
264
+
265
+ @staticmethod
266
+ @standardize_input(file_type='geom_hdf')
267
+ @log_call
268
+ def scale_infiltration_data(
269
+ hdf_path: Path,
270
+ infiltration_df: pd.DataFrame,
271
+ scale_factors: Dict[str, float]
272
+ ) -> Optional[pd.DataFrame]:
273
+ """
274
+ Update infiltration parameters in the HDF file with scaling factors.
275
+ Supports any numeric columns present in the DataFrame.
276
+
277
+ Parameters
278
+ ----------
279
+ hdf_path : Path
280
+ Path to the HEC-RAS geometry HDF file
281
+ infiltration_df : pd.DataFrame
282
+ DataFrame containing infiltration parameters
283
+ scale_factors : Dict[str, float]
284
+ Dictionary mapping column names to their scaling factors
285
+
286
+ Returns
287
+ -------
288
+ Optional[pd.DataFrame]
289
+ The updated infiltration DataFrame if successful, None if operation fails
290
+ """
291
+ try:
292
+ # Make a copy to avoid modifying the input DataFrame
293
+ infiltration_df = infiltration_df.copy()
294
+
295
+ # Apply scaling factors to specified columns
296
+ for col, factor in scale_factors.items():
297
+ if col in infiltration_df.columns and pd.api.types.is_numeric_dtype(infiltration_df[col]):
298
+ infiltration_df[col] *= factor
299
+ else:
300
+ logger.warning(f"Column {col} not found or not numeric - skipping scaling")
301
+
302
+ # Use set_infiltration_table to write the scaled data
303
+ return HdfInfiltration.set_infiltration_table(hdf_path, infiltration_df)
304
+
305
+ except Exception as e:
306
+ logger.error(f"Error scaling infiltration data in {hdf_path}: {str(e)}")
307
+ return None
308
+
309
+
310
+
311
+ # Need to reorganize these soil staatistics functions so they are more straightforward.
312
+
313
+
314
+ @staticmethod
315
+ @log_call
316
+ @standardize_input(file_type='geom_hdf')
317
+ def get_soils_raster_stats(
318
+ geom_hdf_path: Path,
319
+ soil_hdf_path: Path = None,
320
+ ras_object: Any = None
321
+ ) -> pd.DataFrame:
322
+ """
323
+ Calculate soil group statistics for each 2D flow area using the area's perimeter.
324
+
325
+ Parameters
326
+ ----------
327
+ geom_hdf_path : Path
328
+ Path to the HEC-RAS geometry HDF file containing the 2D flow areas
329
+ soil_hdf_path : Path, optional
330
+ Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
331
+ ras_object : Any, optional
332
+ Optional RAS object. If not provided, uses global ras instance
333
+
334
+ Returns
335
+ -------
336
+ pd.DataFrame
337
+ DataFrame with soil statistics for each 2D flow area, including:
338
+ - mesh_name: Name of the 2D flow area
339
+ - mukey: Soil mukey identifier
340
+ - percentage: Percentage of 2D flow area covered by this soil type
341
+ - area_sqm: Area in square meters
342
+ - area_acres: Area in acres
343
+ - area_sqmiles: Area in square miles
344
+
345
+ Notes
346
+ -----
347
+ Requires the rasterstats package to be installed.
348
+ """
349
+ try:
350
+ from rasterstats import zonal_stats
351
+ import shapely
352
+ import geopandas as gpd
353
+ import numpy as np
354
+ import tempfile
355
+ import os
356
+ except ImportError as e:
357
+ logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas'")
358
+ raise e
359
+
360
+ # Import here to avoid circular imports
361
+ from .HdfMesh import HdfMesh
362
+
363
+ # Get the soil HDF path
364
+ if soil_hdf_path is None:
365
+ if ras_object is None:
366
+ from .RasPrj import ras
367
+ ras_object = ras
368
+
369
+ # Try to get soil_layer_path from rasmap_df
370
+ try:
371
+ soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
372
+ if not soil_hdf_path.exists():
373
+ logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
374
+ return pd.DataFrame()
375
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
376
+ logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
377
+ return pd.DataFrame()
378
+
379
+ # Get infiltration map - pass as hdf_path to ensure standardize_input works correctly
380
+ try:
381
+ raster_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
382
+ if not raster_map:
383
+ logger.error(f"No infiltration map found in {soil_hdf_path}")
384
+ return pd.DataFrame()
385
+ except Exception as e:
386
+ logger.error(f"Error getting infiltration map: {str(e)}")
387
+ return pd.DataFrame()
388
+
389
+ # Get 2D flow areas
390
+ mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
391
+ if mesh_areas.empty:
392
+ logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
393
+ return pd.DataFrame()
394
+
395
+ # Extract the raster data for analysis
396
+ tif_path = soil_hdf_path.with_suffix('.tif')
397
+ if not tif_path.exists():
398
+ logger.error(f"No raster file found at {tif_path}")
399
+ return pd.DataFrame()
400
+
401
+ # Read the raster data and info
402
+ import rasterio
403
+ with rasterio.open(tif_path) as src:
404
+ grid_data = src.read(1)
405
+
406
+ # Get transform directly from rasterio
407
+ transform = src.transform
408
+ no_data = src.nodata if src.nodata is not None else -9999
409
+
410
+ # List to store all results
411
+ all_results = []
412
+
413
+ # Calculate zonal statistics for each 2D flow area
414
+ for _, mesh_row in mesh_areas.iterrows():
415
+ mesh_name = mesh_row['mesh_name']
416
+ mesh_geom = mesh_row['geometry']
417
+
418
+ # Get zonal statistics directly using numpy array
419
+ try:
420
+ stats = zonal_stats(
421
+ mesh_geom,
422
+ grid_data,
423
+ affine=transform,
424
+ categorical=True,
425
+ nodata=no_data
426
+ )[0]
427
+
428
+ # Skip if no stats
429
+ if not stats:
430
+ logger.warning(f"No soil data found for 2D flow area: {mesh_name}")
431
+ continue
432
+
433
+ # Calculate total area and percentages
434
+ total_area_sqm = sum(stats.values())
435
+
436
+ # Process each mukey
437
+ for raster_val, area_sqm in stats.items():
438
+ # Skip NoData values
439
+ if raster_val is None or raster_val == no_data:
440
+ continue
441
+
442
+ try:
443
+ mukey = raster_map.get(int(raster_val), f"Unknown-{raster_val}")
444
+ except (ValueError, TypeError):
445
+ mukey = f"Unknown-{raster_val}"
446
+
447
+ percentage = (area_sqm / total_area_sqm) * 100 if total_area_sqm > 0 else 0
448
+
449
+ all_results.append({
450
+ 'mesh_name': mesh_name,
451
+ 'mukey': mukey,
452
+ 'percentage': percentage,
453
+ 'area_sqm': area_sqm,
454
+ 'area_acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
455
+ 'area_sqmiles': area_sqm * HdfInfiltration.SQM_TO_SQMILE
456
+ })
457
+ except Exception as e:
458
+ logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
459
+ continue
460
+
461
+ # Create DataFrame with results
462
+ results_df = pd.DataFrame(all_results)
463
+
464
+ # Sort by mesh_name and percentage (descending)
465
+ if not results_df.empty:
466
+ results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
467
+
468
+ return results_df
469
+
470
+
471
+
472
+
473
+
474
+
475
+ @staticmethod
476
+ @log_call
477
+ @standardize_input(file_type='geom_hdf')
478
+ def get_soil_raster_stats(
479
+ geom_hdf_path: Path,
480
+ landcover_hdf_path: Path = None,
481
+ soil_hdf_path: Path = None,
482
+ ras_object: Any = None
483
+ ) -> pd.DataFrame:
484
+ """
485
+ Calculate combined land cover and soil infiltration statistics for each 2D flow area.
486
+
487
+ This function processes both land cover and soil data to calculate statistics
488
+ for each combination (Land Cover : Soil Type) within each 2D flow area.
489
+
490
+ Parameters
491
+ ----------
492
+ geom_hdf_path : Path
493
+ Path to the HEC-RAS geometry HDF file containing the 2D flow areas
494
+ landcover_hdf_path : Path, optional
495
+ Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
496
+ soil_hdf_path : Path, optional
497
+ Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
498
+ ras_object : Any, optional
499
+ Optional RAS object. If not provided, uses global ras instance
500
+
501
+ Returns
502
+ -------
503
+ pd.DataFrame
504
+ DataFrame with combined statistics for each 2D flow area, including:
505
+ - mesh_name: Name of the 2D flow area
506
+ - combined_type: Combined land cover and soil type (e.g. "Mixed Forest : B")
507
+ - percentage: Percentage of 2D flow area covered by this combination
508
+ - area_sqm: Area in square meters
509
+ - area_acres: Area in acres
510
+ - area_sqmiles: Area in square miles
511
+ - curve_number: Curve number for this combination
512
+ - abstraction_ratio: Abstraction ratio for this combination
513
+ - min_infiltration_rate: Minimum infiltration rate for this combination
514
+
515
+ Notes
516
+ -----
517
+ Requires the rasterstats package to be installed.
518
+ """
519
+ try:
520
+ from rasterstats import zonal_stats
521
+ import shapely
522
+ import geopandas as gpd
523
+ import numpy as np
524
+ import tempfile
525
+ import os
526
+ import rasterio
527
+ from rasterio.merge import merge
528
+ except ImportError as e:
529
+ logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
530
+ raise e
531
+
532
+ # Import here to avoid circular imports
533
+ from .HdfMesh import HdfMesh
534
+
535
+ # Get RAS object
536
+ if ras_object is None:
537
+ from .RasPrj import ras
538
+ ras_object = ras
539
+
540
+ # Get the landcover HDF path
541
+ if landcover_hdf_path is None:
542
+ try:
543
+ landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
544
+ if not landcover_hdf_path.exists():
545
+ logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
546
+ return pd.DataFrame()
547
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
548
+ logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
549
+ return pd.DataFrame()
550
+
551
+ # Get the soil HDF path
552
+ if soil_hdf_path is None:
553
+ try:
554
+ soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
555
+ if not soil_hdf_path.exists():
556
+ logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
557
+ return pd.DataFrame()
558
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
559
+ logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
560
+ return pd.DataFrame()
561
+
562
+ # Get land cover map (raster to ID mapping)
563
+ try:
564
+ with h5py.File(landcover_hdf_path, 'r') as hdf:
565
+ if '//Raster Map' not in hdf:
566
+ logger.error(f"No Raster Map found in {landcover_hdf_path}")
567
+ return pd.DataFrame()
568
+
569
+ landcover_map_data = hdf['//Raster Map'][()]
570
+ landcover_map = {int(item[0]): item[1].decode('utf-8').strip() for item in landcover_map_data}
571
+ except Exception as e:
572
+ logger.error(f"Error reading land cover data from HDF: {str(e)}")
573
+ return pd.DataFrame()
574
+
575
+ # Get soil map (raster to ID mapping)
576
+ try:
577
+ soil_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
578
+ if not soil_map:
579
+ logger.error(f"No soil map found in {soil_hdf_path}")
580
+ return pd.DataFrame()
581
+ except Exception as e:
582
+ logger.error(f"Error getting soil map: {str(e)}")
583
+ return pd.DataFrame()
584
+
585
+ # Get infiltration parameters
586
+ try:
587
+ infiltration_params = HdfInfiltration.get_infiltration_layer_data(soil_hdf_path)
588
+ if infiltration_params is None or infiltration_params.empty:
589
+ logger.warning(f"No infiltration parameters found in {soil_hdf_path}")
590
+ infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
591
+ except Exception as e:
592
+ logger.error(f"Error getting infiltration parameters: {str(e)}")
593
+ infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
594
+
595
+ # Get 2D flow areas
596
+ mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
597
+ if mesh_areas.empty:
598
+ logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
599
+ return pd.DataFrame()
600
+
601
+ # Check for the TIF files with same name as HDF
602
+ landcover_tif_path = landcover_hdf_path.with_suffix('.tif')
603
+ soil_tif_path = soil_hdf_path.with_suffix('.tif')
604
+
605
+ if not landcover_tif_path.exists():
606
+ logger.error(f"No land cover raster file found at {landcover_tif_path}")
607
+ return pd.DataFrame()
608
+
609
+ if not soil_tif_path.exists():
610
+ logger.error(f"No soil raster file found at {soil_tif_path}")
611
+ return pd.DataFrame()
612
+
613
+ # List to store all results
614
+ all_results = []
615
+
616
+ # Read the raster data
617
+ try:
618
+ with rasterio.open(landcover_tif_path) as landcover_src, rasterio.open(soil_tif_path) as soil_src:
619
+ landcover_nodata = landcover_src.nodata if landcover_src.nodata is not None else -9999
620
+ soil_nodata = soil_src.nodata if soil_src.nodata is not None else -9999
621
+
622
+ # Calculate zonal statistics for each 2D flow area
623
+ for _, mesh_row in mesh_areas.iterrows():
624
+ mesh_name = mesh_row['mesh_name']
625
+ mesh_geom = mesh_row['geometry']
626
+
627
+ # Get zonal statistics for land cover
628
+ try:
629
+ landcover_stats = zonal_stats(
630
+ mesh_geom,
631
+ landcover_tif_path,
632
+ categorical=True,
633
+ nodata=landcover_nodata
634
+ )[0]
635
+
636
+ # Get zonal statistics for soil
637
+ soil_stats = zonal_stats(
638
+ mesh_geom,
639
+ soil_tif_path,
640
+ categorical=True,
641
+ nodata=soil_nodata
642
+ )[0]
643
+
644
+ # Skip if no stats
645
+ if not landcover_stats or not soil_stats:
646
+ logger.warning(f"No land cover or soil data found for 2D flow area: {mesh_name}")
647
+ continue
648
+
649
+ # Calculate total area
650
+ landcover_total = sum(landcover_stats.values())
651
+ soil_total = sum(soil_stats.values())
652
+
653
+ # Create a cross-tabulation of land cover and soil types
654
+ # This is an approximation since we don't have the exact pixel-by-pixel overlap
655
+ mesh_area_sqm = mesh_row['geometry'].area
656
+
657
+ # Calculate percentage of each land cover type
658
+ landcover_pct = {k: v/landcover_total for k, v in landcover_stats.items() if k is not None and k != landcover_nodata}
659
+
660
+ # Calculate percentage of each soil type
661
+ soil_pct = {k: v/soil_total for k, v in soil_stats.items() if k is not None and k != soil_nodata}
662
+
663
+ # Generate combinations
664
+ for lc_id, lc_pct in landcover_pct.items():
665
+ lc_name = landcover_map.get(int(lc_id), f"Unknown-{lc_id}")
666
+
667
+ for soil_id, soil_pct in soil_pct.items():
668
+ try:
669
+ soil_name = soil_map.get(int(soil_id), f"Unknown-{soil_id}")
670
+ except (ValueError, TypeError):
671
+ soil_name = f"Unknown-{soil_id}"
672
+
673
+ # Calculate combined percentage (approximate)
674
+ # This is a simplification; actual overlap would require pixel-by-pixel analysis
675
+ combined_pct = lc_pct * soil_pct * 100
676
+ combined_area_sqm = mesh_area_sqm * (combined_pct / 100)
677
+
678
+ # Create combined name
679
+ combined_name = f"{lc_name} : {soil_name}"
680
+
681
+ # Look up infiltration parameters
682
+ param_row = infiltration_params[infiltration_params['Name'] == combined_name]
683
+ if param_row.empty:
684
+ # Try with NoData for soil type
685
+ param_row = infiltration_params[infiltration_params['Name'] == f"{lc_name} : NoData"]
686
+
687
+ if not param_row.empty:
688
+ curve_number = param_row.iloc[0]['Curve Number']
689
+ abstraction_ratio = param_row.iloc[0]['Abstraction Ratio']
690
+ min_infiltration_rate = param_row.iloc[0]['Minimum Infiltration Rate']
691
+ else:
692
+ curve_number = None
693
+ abstraction_ratio = None
694
+ min_infiltration_rate = None
695
+
696
+ all_results.append({
697
+ 'mesh_name': mesh_name,
698
+ 'combined_type': combined_name,
699
+ 'percentage': combined_pct,
700
+ 'area_sqm': combined_area_sqm,
701
+ 'area_acres': combined_area_sqm * HdfInfiltration.SQM_TO_ACRE,
702
+ 'area_sqmiles': combined_area_sqm * HdfInfiltration.SQM_TO_SQMILE,
703
+ 'curve_number': curve_number,
704
+ 'abstraction_ratio': abstraction_ratio,
705
+ 'min_infiltration_rate': min_infiltration_rate
706
+ })
707
+ except Exception as e:
708
+ logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
709
+ continue
710
+ except Exception as e:
711
+ logger.error(f"Error opening raster files: {str(e)}")
712
+ return pd.DataFrame()
713
+
714
+ # Create DataFrame with results
715
+ results_df = pd.DataFrame(all_results)
716
+
717
+ # Sort by mesh_name, percentage (descending)
718
+ if not results_df.empty:
719
+ results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
720
+
721
+ return results_df
722
+
723
+
724
+
725
+
726
+
727
+
728
+ @staticmethod
729
+ @log_call
730
+ @standardize_input(file_type='geom_hdf')
731
+ def get_infiltration_stats(
732
+ geom_hdf_path: Path,
733
+ landcover_hdf_path: Path = None,
734
+ soil_hdf_path: Path = None,
735
+ ras_object: Any = None
736
+ ) -> pd.DataFrame:
737
+ """
738
+ Calculate combined land cover and soil infiltration statistics for each 2D flow area.
739
+
740
+ This function processes both land cover and soil data to calculate statistics
741
+ for each combination (Land Cover : Soil Type) within each 2D flow area.
742
+
743
+ Parameters
744
+ ----------
745
+ geom_hdf_path : Path
746
+ Path to the HEC-RAS geometry HDF file containing the 2D flow areas
747
+ landcover_hdf_path : Path, optional
748
+ Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
749
+ soil_hdf_path : Path, optional
750
+ Path to the soil HDF file. If None, uses soil_layer_path from rasmap_df
751
+ ras_object : Any, optional
752
+ Optional RAS object. If not provided, uses global ras instance
753
+
754
+ Returns
755
+ -------
756
+ pd.DataFrame
757
+ DataFrame with combined statistics for each 2D flow area, including:
758
+ - mesh_name: Name of the 2D flow area
759
+ - combined_type: Combined land cover and soil type (e.g. "Mixed Forest : B")
760
+ - percentage: Percentage of 2D flow area covered by this combination
761
+ - area_sqm: Area in square meters
762
+ - area_acres: Area in acres
763
+ - area_sqmiles: Area in square miles
764
+ - curve_number: Curve number for this combination
765
+ - abstraction_ratio: Abstraction ratio for this combination
766
+ - min_infiltration_rate: Minimum infiltration rate for this combination
767
+
768
+ Notes
769
+ -----
770
+ Requires the rasterstats package to be installed.
771
+ """
772
+ try:
773
+ from rasterstats import zonal_stats
774
+ import shapely
775
+ import geopandas as gpd
776
+ import numpy as np
777
+ import tempfile
778
+ import os
779
+ import rasterio
780
+ from rasterio.merge import merge
781
+ except ImportError as e:
782
+ logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
783
+ raise e
784
+
785
+ # Import here to avoid circular imports
786
+ from .HdfMesh import HdfMesh
787
+
788
+ # Get RAS object
789
+ if ras_object is None:
790
+ from .RasPrj import ras
791
+ ras_object = ras
792
+
793
+ # Get the landcover HDF path
794
+ if landcover_hdf_path is None:
795
+ try:
796
+ landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
797
+ if not landcover_hdf_path.exists():
798
+ logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
799
+ return pd.DataFrame()
800
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
801
+ logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
802
+ return pd.DataFrame()
803
+
804
+ # Get the soil HDF path
805
+ if soil_hdf_path is None:
806
+ try:
807
+ soil_hdf_path = Path(ras_object.rasmap_df.loc[0, 'soil_layer_path'][0])
808
+ if not soil_hdf_path.exists():
809
+ logger.warning(f"Soil HDF path from rasmap_df does not exist: {soil_hdf_path}")
810
+ return pd.DataFrame()
811
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
812
+ logger.error(f"Error retrieving soil_layer_path from rasmap_df: {str(e)}")
813
+ return pd.DataFrame()
814
+
815
+ # Get land cover map (raster to ID mapping)
816
+ try:
817
+ with h5py.File(landcover_hdf_path, 'r') as hdf:
818
+ if '//Raster Map' not in hdf:
819
+ logger.error(f"No Raster Map found in {landcover_hdf_path}")
820
+ return pd.DataFrame()
821
+
822
+ landcover_map_data = hdf['//Raster Map'][()]
823
+ landcover_map = {int(item[0]): item[1].decode('utf-8').strip() for item in landcover_map_data}
824
+ except Exception as e:
825
+ logger.error(f"Error reading land cover data from HDF: {str(e)}")
826
+ return pd.DataFrame()
827
+
828
+ # Get soil map (raster to ID mapping)
829
+ try:
830
+ soil_map = HdfInfiltration.get_infiltration_map(hdf_path=soil_hdf_path, ras_object=ras_object)
831
+ if not soil_map:
832
+ logger.error(f"No soil map found in {soil_hdf_path}")
833
+ return pd.DataFrame()
834
+ except Exception as e:
835
+ logger.error(f"Error getting soil map: {str(e)}")
836
+ return pd.DataFrame()
837
+
838
+ # Get infiltration parameters
839
+ try:
840
+ infiltration_params = HdfInfiltration.get_infiltration_layer_data(soil_hdf_path)
841
+ if infiltration_params is None or infiltration_params.empty:
842
+ logger.warning(f"No infiltration parameters found in {soil_hdf_path}")
843
+ infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
844
+ except Exception as e:
845
+ logger.error(f"Error getting infiltration parameters: {str(e)}")
846
+ infiltration_params = pd.DataFrame(columns=['Name', 'Curve Number', 'Abstraction Ratio', 'Minimum Infiltration Rate'])
847
+
848
+ # Get 2D flow areas
849
+ mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
850
+ if mesh_areas.empty:
851
+ logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
852
+ return pd.DataFrame()
853
+
854
+ # Check for the TIF files with same name as HDF
855
+ landcover_tif_path = landcover_hdf_path.with_suffix('.tif')
856
+ soil_tif_path = soil_hdf_path.with_suffix('.tif')
857
+
858
+ if not landcover_tif_path.exists():
859
+ logger.error(f"No land cover raster file found at {landcover_tif_path}")
860
+ return pd.DataFrame()
861
+
862
+ if not soil_tif_path.exists():
863
+ logger.error(f"No soil raster file found at {soil_tif_path}")
864
+ return pd.DataFrame()
865
+
866
+ # List to store all results
867
+ all_results = []
868
+
869
+ # Read the raster data
870
+ try:
871
+ with rasterio.open(landcover_tif_path) as landcover_src, rasterio.open(soil_tif_path) as soil_src:
872
+ landcover_nodata = landcover_src.nodata if landcover_src.nodata is not None else -9999
873
+ soil_nodata = soil_src.nodata if soil_src.nodata is not None else -9999
874
+
875
+ # Calculate zonal statistics for each 2D flow area
876
+ for _, mesh_row in mesh_areas.iterrows():
877
+ mesh_name = mesh_row['mesh_name']
878
+ mesh_geom = mesh_row['geometry']
879
+
880
+ # Get zonal statistics for land cover
881
+ try:
882
+ landcover_stats = zonal_stats(
883
+ mesh_geom,
884
+ landcover_tif_path,
885
+ categorical=True,
886
+ nodata=landcover_nodata
887
+ )[0]
888
+
889
+ # Get zonal statistics for soil
890
+ soil_stats = zonal_stats(
891
+ mesh_geom,
892
+ soil_tif_path,
893
+ categorical=True,
894
+ nodata=soil_nodata
895
+ )[0]
896
+
897
+ # Skip if no stats
898
+ if not landcover_stats or not soil_stats:
899
+ logger.warning(f"No land cover or soil data found for 2D flow area: {mesh_name}")
900
+ continue
901
+
902
+ # Calculate total area
903
+ landcover_total = sum(landcover_stats.values())
904
+ soil_total = sum(soil_stats.values())
905
+
906
+ # Create a cross-tabulation of land cover and soil types
907
+ # This is an approximation since we don't have the exact pixel-by-pixel overlap
908
+ mesh_area_sqm = mesh_row['geometry'].area
909
+
910
+ # Calculate percentage of each land cover type
911
+ landcover_pct = {k: v/landcover_total for k, v in landcover_stats.items() if k is not None and k != landcover_nodata}
912
+
913
+ # Calculate percentage of each soil type
914
+ soil_pct = {k: v/soil_total for k, v in soil_stats.items() if k is not None and k != soil_nodata}
915
+
916
+ # Generate combinations
917
+ for lc_id, lc_pct in landcover_pct.items():
918
+ lc_name = landcover_map.get(int(lc_id), f"Unknown-{lc_id}")
919
+
920
+ for soil_id, soil_pct in soil_pct.items():
921
+ try:
922
+ soil_name = soil_map.get(int(soil_id), f"Unknown-{soil_id}")
923
+ except (ValueError, TypeError):
924
+ soil_name = f"Unknown-{soil_id}"
925
+
926
+ # Calculate combined percentage (approximate)
927
+ # This is a simplification; actual overlap would require pixel-by-pixel analysis
928
+ combined_pct = lc_pct * soil_pct * 100
929
+ combined_area_sqm = mesh_area_sqm * (combined_pct / 100)
930
+
931
+ # Create combined name
932
+ combined_name = f"{lc_name} : {soil_name}"
933
+
934
+ # Look up infiltration parameters
935
+ param_row = infiltration_params[infiltration_params['Name'] == combined_name]
936
+ if param_row.empty:
937
+ # Try with NoData for soil type
938
+ param_row = infiltration_params[infiltration_params['Name'] == f"{lc_name} : NoData"]
939
+
940
+ if not param_row.empty:
941
+ curve_number = param_row.iloc[0]['Curve Number']
942
+ abstraction_ratio = param_row.iloc[0]['Abstraction Ratio']
943
+ min_infiltration_rate = param_row.iloc[0]['Minimum Infiltration Rate']
944
+ else:
945
+ curve_number = None
946
+ abstraction_ratio = None
947
+ min_infiltration_rate = None
948
+
949
+ all_results.append({
950
+ 'mesh_name': mesh_name,
951
+ 'combined_type': combined_name,
952
+ 'percentage': combined_pct,
953
+ 'area_sqm': combined_area_sqm,
954
+ 'area_acres': combined_area_sqm * HdfInfiltration.SQM_TO_ACRE,
955
+ 'area_sqmiles': combined_area_sqm * HdfInfiltration.SQM_TO_SQMILE,
956
+ 'curve_number': curve_number,
957
+ 'abstraction_ratio': abstraction_ratio,
958
+ 'min_infiltration_rate': min_infiltration_rate
959
+ })
960
+ except Exception as e:
961
+ logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
962
+ continue
963
+ except Exception as e:
964
+ logger.error(f"Error opening raster files: {str(e)}")
965
+ return pd.DataFrame()
966
+
967
+ # Create DataFrame with results
968
+ results_df = pd.DataFrame(all_results)
969
+
970
+ # Sort by mesh_name, percentage (descending)
971
+ if not results_df.empty:
972
+ results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
973
+
974
+ return results_df
975
+
976
+
977
+
978
+
979
+
980
+
981
+
982
+
983
+
984
+
985
+
986
+
987
+
988
+
989
+
990
+
991
+
992
+
993
+
994
+ @staticmethod
995
+ @log_call
996
+ @standardize_input(file_type='geom_hdf')
997
+ def get_infiltration_map(hdf_path: Path = None, ras_object: Any = None) -> dict:
998
+ """Read the infiltration raster map from HDF file
999
+
1000
+ Args:
1001
+ hdf_path: Optional path to the HDF file. If not provided, uses first infiltration_hdf_path from rasmap_df
1002
+ ras_object: Optional RAS object. If not provided, uses global ras instance
1003
+
1004
+ Returns:
1005
+ Dictionary mapping raster values to mukeys
1006
+ """
1007
+ if hdf_path is None:
1008
+ if ras_object is None:
1009
+ from .RasPrj import ras
1010
+ ras_object = ras
1011
+ hdf_path = Path(ras_object.rasmap_df.iloc[0]['infiltration_hdf_path'][0])
1012
+
1013
+ with h5py.File(hdf_path, 'r') as hdf:
1014
+ raster_map_data = hdf['Raster Map'][:]
1015
+ return {int(item[0]): item[1].decode('utf-8') for item in raster_map_data}
1016
+
1017
+ @staticmethod
1018
+ @log_call
1019
+ def calculate_soil_statistics(zonal_stats: list, raster_map: dict) -> pd.DataFrame:
1020
+ """Calculate soil statistics from zonal statistics
1021
+
1022
+ Args:
1023
+ zonal_stats: List of zonal statistics
1024
+ raster_map: Dictionary mapping raster values to mukeys
1025
+
1026
+ Returns:
1027
+ DataFrame with soil statistics including percentages and areas
1028
+ """
1029
+
1030
+ try:
1031
+ from rasterstats import zonal_stats
1032
+ except ImportError as e:
1033
+ logger.error("Failed to import rasterstats. Please run 'pip install rasterstats' and try again.")
1034
+ raise e
1035
+ # Initialize areas dictionary
1036
+ mukey_areas = {mukey: 0 for mukey in raster_map.values()}
1037
+
1038
+ # Calculate total area and mukey areas
1039
+ total_area_sqm = 0
1040
+ for stat in zonal_stats:
1041
+ for raster_val, area in stat.items():
1042
+ mukey = raster_map.get(raster_val)
1043
+ if mukey:
1044
+ mukey_areas[mukey] += area
1045
+ total_area_sqm += area
1046
+
1047
+ # Create DataFrame rows
1048
+ rows = []
1049
+ for mukey, area_sqm in mukey_areas.items():
1050
+ if area_sqm > 0:
1051
+ rows.append({
1052
+ 'mukey': mukey,
1053
+ 'Percentage': (area_sqm / total_area_sqm) * 100,
1054
+ 'Area in Acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
1055
+ 'Area in Square Miles': area_sqm * HdfInfiltration.SQM_TO_SQMILE
1056
+ })
1057
+
1058
+ return pd.DataFrame(rows)
1059
+
1060
+ @staticmethod
1061
+ @log_call
1062
+ def get_significant_mukeys(soil_stats: pd.DataFrame,
1063
+ threshold: float = 1.0) -> pd.DataFrame:
1064
+ """Get mukeys with percentage greater than threshold
1065
+
1066
+ Args:
1067
+ soil_stats: DataFrame with soil statistics
1068
+ threshold: Minimum percentage threshold (default 1.0)
1069
+
1070
+ Returns:
1071
+ DataFrame with significant mukeys and their statistics
1072
+ """
1073
+ significant = soil_stats[soil_stats['Percentage'] > threshold].copy()
1074
+ significant.sort_values('Percentage', ascending=False, inplace=True)
1075
+ return significant
1076
+
1077
+ @staticmethod
1078
+ @log_call
1079
+ def calculate_total_significant_percentage(significant_mukeys: pd.DataFrame) -> float:
1080
+ """Calculate total percentage covered by significant mukeys
1081
+
1082
+ Args:
1083
+ significant_mukeys: DataFrame of significant mukeys
1084
+
1085
+ Returns:
1086
+ Total percentage covered by significant mukeys
1087
+ """
1088
+ return significant_mukeys['Percentage'].sum()
1089
+
1090
+ @staticmethod
1091
+ @log_call
1092
+ def save_statistics(soil_stats: pd.DataFrame, output_path: Path,
1093
+ include_timestamp: bool = True):
1094
+ """Save soil statistics to CSV
1095
+
1096
+ Args:
1097
+ soil_stats: DataFrame with soil statistics
1098
+ output_path: Path to save CSV file
1099
+ include_timestamp: Whether to include timestamp in filename
1100
+ """
1101
+ if include_timestamp:
1102
+ timestamp = pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')
1103
+ output_path = output_path.with_name(
1104
+ f"{output_path.stem}_{timestamp}{output_path.suffix}")
1105
+
1106
+ soil_stats.to_csv(output_path, index=False)
1107
+
1108
+ @staticmethod
1109
+ @log_call
1110
+ @standardize_input
1111
+ def get_infiltration_parameters(hdf_path: Path = None, mukey: str = None, ras_object: Any = None) -> dict:
1112
+ """Get infiltration parameters for a specific mukey from HDF file
1113
+
1114
+ Args:
1115
+ hdf_path: Optional path to the HDF file. If not provided, uses first infiltration_hdf_path from rasmap_df
1116
+ mukey: Mukey identifier
1117
+ ras_object: Optional RAS object. If not provided, uses global ras instance
1118
+
1119
+ Returns:
1120
+ Dictionary of infiltration parameters
1121
+ """
1122
+ if hdf_path is None:
1123
+ if ras_object is None:
1124
+ from .RasPrj import ras
1125
+ ras_object = ras
1126
+ hdf_path = Path(ras_object.rasmap_df.iloc[0]['infiltration_hdf_path'][0])
1127
+
1128
+ with h5py.File(hdf_path, 'r') as hdf:
1129
+ if 'Infiltration Parameters' not in hdf:
1130
+ raise KeyError("No infiltration parameters found in HDF file")
1131
+
1132
+ params = hdf['Infiltration Parameters'][:]
1133
+ for row in params:
1134
+ if row[0].decode('utf-8') == mukey:
1135
+ return {
1136
+ 'Initial Loss (in)': float(row[1]),
1137
+ 'Constant Loss Rate (in/hr)': float(row[2]),
1138
+ 'Impervious Area (%)': float(row[3])
1139
+ }
1140
+ return None
1141
+
1142
+ @staticmethod
1143
+ @log_call
1144
+ def calculate_weighted_parameters(soil_stats: pd.DataFrame,
1145
+ infiltration_params: dict) -> dict:
1146
+ """Calculate weighted infiltration parameters based on soil statistics
1147
+
1148
+ Args:
1149
+ soil_stats: DataFrame with soil statistics
1150
+ infiltration_params: Dictionary of infiltration parameters by mukey
1151
+
1152
+ Returns:
1153
+ Dictionary of weighted average infiltration parameters
1154
+ """
1155
+ total_weight = soil_stats['Percentage'].sum()
1156
+
1157
+ weighted_params = {
1158
+ 'Initial Loss (in)': 0.0,
1159
+ 'Constant Loss Rate (in/hr)': 0.0,
1160
+ 'Impervious Area (%)': 0.0
1161
+ }
1162
+
1163
+ for _, row in soil_stats.iterrows():
1164
+ mukey = row['mukey']
1165
+ weight = row['Percentage'] / total_weight
1166
+
1167
+ if mukey in infiltration_params:
1168
+ for param in weighted_params:
1169
+ weighted_params[param] += (
1170
+ infiltration_params[mukey][param] * weight
1171
+ )
1172
+
1173
+ return weighted_params
1174
+
1175
+
1176
+ @staticmethod
1177
+ def _get_table_info(hdf_file: h5py.File, table_path: str) -> Tuple[List[str], List[str], List[str]]:
1178
+ """Get column names and types from HDF table
1179
+
1180
+ Args:
1181
+ hdf_file: Open HDF file object
1182
+ table_path: Path to table in HDF file
1183
+
1184
+ Returns:
1185
+ Tuple of (column names, numpy dtypes, column descriptions)
1186
+ """
1187
+ if table_path not in hdf_file:
1188
+ return [], [], []
1189
+
1190
+ dataset = hdf_file[table_path]
1191
+ dtype = dataset.dtype
1192
+
1193
+ # Extract column names and types
1194
+ col_names = []
1195
+ col_types = []
1196
+ col_descs = []
1197
+
1198
+ for name in dtype.names:
1199
+ col_names.append(name)
1200
+ col_types.append(dtype[name].str)
1201
+ col_descs.append(name) # Could be enhanced to get actual descriptions
1202
+
1203
+ return col_names, col_types, col_descs
1204
+
1205
+
1206
+ @staticmethod
1207
+ @log_call
1208
+ @standardize_input(file_type='geom_hdf')
1209
+ def get_landcover_raster_stats(
1210
+ geom_hdf_path: Path,
1211
+ landcover_hdf_path: Path = None,
1212
+ ras_object: Any = None
1213
+ ) -> pd.DataFrame:
1214
+ """
1215
+ Calculate land cover statistics for each 2D flow area using the area's perimeter.
1216
+
1217
+ Parameters
1218
+ ----------
1219
+ geom_hdf_path : Path
1220
+ Path to the HEC-RAS geometry HDF file containing the 2D flow areas
1221
+ landcover_hdf_path : Path, optional
1222
+ Path to the land cover HDF file. If None, uses landcover_hdf_path from rasmap_df
1223
+ ras_object : Any, optional
1224
+ Optional RAS object. If not provided, uses global ras instance
1225
+
1226
+ Returns
1227
+ -------
1228
+ pd.DataFrame
1229
+ DataFrame with land cover statistics for each 2D flow area, including:
1230
+ - mesh_name: Name of the 2D flow area
1231
+ - land_cover: Land cover classification name
1232
+ - percentage: Percentage of 2D flow area covered by this land cover type
1233
+ - area_sqm: Area in square meters
1234
+ - area_acres: Area in acres
1235
+ - area_sqmiles: Area in square miles
1236
+ - mannings_n: Manning's n value for this land cover type
1237
+ - percent_impervious: Percent impervious for this land cover type
1238
+
1239
+ Notes
1240
+ -----
1241
+ Requires the rasterstats package to be installed.
1242
+ """
1243
+ try:
1244
+ from rasterstats import zonal_stats
1245
+ import shapely
1246
+ import geopandas as gpd
1247
+ import numpy as np
1248
+ import tempfile
1249
+ import os
1250
+ import rasterio
1251
+ except ImportError as e:
1252
+ logger.error(f"Failed to import required package: {e}. Please run 'pip install rasterstats shapely geopandas rasterio'")
1253
+ raise e
1254
+
1255
+ # Import here to avoid circular imports
1256
+ from .HdfMesh import HdfMesh
1257
+
1258
+ # Get the landcover HDF path
1259
+ if landcover_hdf_path is None:
1260
+ if ras_object is None:
1261
+ from .RasPrj import ras
1262
+ ras_object = ras
1263
+
1264
+ # Try to get landcover_hdf_path from rasmap_df
1265
+ try:
1266
+ landcover_hdf_path = Path(ras_object.rasmap_df.loc[0, 'landcover_hdf_path'][0])
1267
+ if not landcover_hdf_path.exists():
1268
+ logger.warning(f"Land cover HDF path from rasmap_df does not exist: {landcover_hdf_path}")
1269
+ return pd.DataFrame()
1270
+ except (KeyError, IndexError, AttributeError, TypeError) as e:
1271
+ logger.error(f"Error retrieving landcover_hdf_path from rasmap_df: {str(e)}")
1272
+ return pd.DataFrame()
1273
+
1274
+ # Get land cover map (raster to ID mapping)
1275
+ try:
1276
+ with h5py.File(landcover_hdf_path, 'r') as hdf:
1277
+ if '//Raster Map' not in hdf:
1278
+ logger.error(f"No Raster Map found in {landcover_hdf_path}")
1279
+ return pd.DataFrame()
1280
+
1281
+ raster_map_data = hdf['//Raster Map'][()]
1282
+ raster_map = {int(item[0]): item[1].decode('utf-8').strip() for item in raster_map_data}
1283
+
1284
+ # Get land cover variables (mannings_n and percent_impervious)
1285
+ variables = {}
1286
+ if '//Variables' in hdf:
1287
+ var_data = hdf['//Variables'][()]
1288
+ for row in var_data:
1289
+ name = row[0].decode('utf-8').strip()
1290
+ mannings_n = float(row[1])
1291
+ percent_impervious = float(row[2])
1292
+ variables[name] = {
1293
+ 'mannings_n': mannings_n,
1294
+ 'percent_impervious': percent_impervious
1295
+ }
1296
+ except Exception as e:
1297
+ logger.error(f"Error reading land cover data from HDF: {str(e)}")
1298
+ return pd.DataFrame()
1299
+
1300
+ # Get 2D flow areas
1301
+ mesh_areas = HdfMesh.get_mesh_areas(geom_hdf_path)
1302
+ if mesh_areas.empty:
1303
+ logger.warning(f"No 2D flow areas found in {geom_hdf_path}")
1304
+ return pd.DataFrame()
1305
+
1306
+ # Check for the TIF file with same name as HDF
1307
+ tif_path = landcover_hdf_path.with_suffix('.tif')
1308
+ if not tif_path.exists():
1309
+ logger.error(f"No raster file found at {tif_path}")
1310
+ return pd.DataFrame()
1311
+
1312
+ # List to store all results
1313
+ all_results = []
1314
+
1315
+ # Read the raster data and info
1316
+ try:
1317
+ with rasterio.open(tif_path) as src:
1318
+ # Get transform directly from rasterio
1319
+ transform = src.transform
1320
+ no_data = src.nodata if src.nodata is not None else -9999
1321
+
1322
+ # Calculate zonal statistics for each 2D flow area
1323
+ for _, mesh_row in mesh_areas.iterrows():
1324
+ mesh_name = mesh_row['mesh_name']
1325
+ mesh_geom = mesh_row['geometry']
1326
+
1327
+ # Get zonal statistics directly using rasterio grid
1328
+ try:
1329
+ stats = zonal_stats(
1330
+ mesh_geom,
1331
+ tif_path,
1332
+ categorical=True,
1333
+ nodata=no_data
1334
+ )[0]
1335
+
1336
+ # Skip if no stats
1337
+ if not stats:
1338
+ logger.warning(f"No land cover data found for 2D flow area: {mesh_name}")
1339
+ continue
1340
+
1341
+ # Calculate total area and percentages
1342
+ total_area_sqm = sum(stats.values())
1343
+
1344
+ # Process each land cover type
1345
+ for raster_val, area_sqm in stats.items():
1346
+ # Skip NoData values
1347
+ if raster_val is None or raster_val == no_data:
1348
+ continue
1349
+
1350
+ try:
1351
+ # Get land cover name from raster map
1352
+ land_cover = raster_map.get(int(raster_val), f"Unknown-{raster_val}")
1353
+
1354
+ # Get Manning's n and percent impervious
1355
+ mannings_n = variables.get(land_cover, {}).get('mannings_n', None)
1356
+ percent_impervious = variables.get(land_cover, {}).get('percent_impervious', None)
1357
+
1358
+ percentage = (area_sqm / total_area_sqm) * 100 if total_area_sqm > 0 else 0
1359
+
1360
+ all_results.append({
1361
+ 'mesh_name': mesh_name,
1362
+ 'land_cover': land_cover,
1363
+ 'percentage': percentage,
1364
+ 'area_sqm': area_sqm,
1365
+ 'area_acres': area_sqm * HdfInfiltration.SQM_TO_ACRE,
1366
+ 'area_sqmiles': area_sqm * HdfInfiltration.SQM_TO_SQMILE,
1367
+ 'mannings_n': mannings_n,
1368
+ 'percent_impervious': percent_impervious
1369
+ })
1370
+ except Exception as e:
1371
+ logger.warning(f"Error processing raster value {raster_val}: {e}")
1372
+ continue
1373
+ except Exception as e:
1374
+ logger.error(f"Error calculating statistics for mesh {mesh_name}: {str(e)}")
1375
+ continue
1376
+ except Exception as e:
1377
+ logger.error(f"Error opening raster file {tif_path}: {str(e)}")
1378
+ return pd.DataFrame()
1379
+
1380
+ # Create DataFrame with results
1381
+ results_df = pd.DataFrame(all_results)
1382
+
1383
+ # Sort by mesh_name, percentage (descending)
1384
+ if not results_df.empty:
1385
+ results_df = results_df.sort_values(['mesh_name', 'percentage'], ascending=[True, False])
1386
+
1387
+ return results_df
1388
+
1389
+
1390
+
1391
+ '''
1392
+
1393
+ THIS FUNCTION IS VERY CLOSE BUT DOES NOT WORK BECAUSE IT DOES NOT PRESERVE THE EXACT STRUCTURE OF THE HDF FILE.
1394
+ WHEN RAS LOADS THE HDF, IT IGNORES THE DATA IN THE TABLE AND REPLACES IT WITH NULLS.
1395
+
1396
+
1397
+ @staticmethod
1398
+ @log_call
1399
+ def set_infiltration_baseoverrides(
1400
+ hdf_path: Path,
1401
+ infiltration_df: pd.DataFrame
1402
+ ) -> Optional[pd.DataFrame]:
1403
+ """
1404
+ Set base overrides for infiltration parameters in the HDF file while preserving
1405
+ the exact structure of the existing dataset.
1406
+
1407
+ This function ensures that the HDF structure is maintained exactly as in the
1408
+ original file, including field names, data types, and string lengths. It updates
1409
+ the values while preserving all dataset attributes.
1410
+
1411
+ Parameters
1412
+ ----------
1413
+ hdf_path : Path
1414
+ Path to the HEC-RAS geometry HDF file
1415
+ infiltration_df : pd.DataFrame
1416
+ DataFrame containing infiltration parameters with columns matching HDF structure.
1417
+ The first column should be 'Name' or 'Land Cover Name'.
1418
+
1419
+ Returns
1420
+ -------
1421
+ Optional[pd.DataFrame]
1422
+ The infiltration DataFrame if successful, None if operation fails
1423
+ """
1424
+ try:
1425
+ # Make a copy to avoid modifying the input DataFrame
1426
+ infiltration_df = infiltration_df.copy()
1427
+
1428
+ # Check for and rename the first column if needed
1429
+ if "Land Cover Name" in infiltration_df.columns:
1430
+ name_col = "Land Cover Name"
1431
+ else:
1432
+ name_col = "Name"
1433
+ # Rename 'Name' to 'Land Cover Name' for HDF dataset
1434
+ infiltration_df = infiltration_df.rename(columns={"Name": "Land Cover Name"})
1435
+
1436
+ table_path = '/Geometry/Infiltration/Base Overrides'
1437
+
1438
+ with h5py.File(hdf_path, 'r') as hdf_file_read:
1439
+ # Check if dataset exists
1440
+ if table_path not in hdf_file_read:
1441
+ logger.warning(f"No infiltration data found in {hdf_path}. Creating new dataset.")
1442
+ # If dataset doesn't exist, use the standard set_infiltration_baseoverrides method
1443
+ return HdfInfiltration.set_infiltration_baseoverrides(hdf_path, infiltration_df)
1444
+
1445
+ # Get the exact dtype of the existing dataset
1446
+ existing_dtype = hdf_file_read[table_path].dtype
1447
+
1448
+ # Extract column names from the existing dataset
1449
+ existing_columns = existing_dtype.names
1450
+
1451
+ # Check if all columns in the DataFrame exist in the HDF dataset
1452
+ for col in infiltration_df.columns:
1453
+ hdf_col = col
1454
+ if col == "Name" and "Land Cover Name" in existing_columns:
1455
+ hdf_col = "Land Cover Name"
1456
+
1457
+ if hdf_col not in existing_columns:
1458
+ logger.warning(f"Column {col} not found in existing dataset - it will be ignored")
1459
+
1460
+ # Get current dataset to preserve structure for non-updated fields
1461
+ existing_data = hdf_file_read[table_path][()]
1462
+
1463
+ # Create a structured array with the exact same dtype as the existing dataset
1464
+ structured_array = np.zeros(len(infiltration_df), dtype=existing_dtype)
1465
+
1466
+ # Copy data from DataFrame to structured array, preserving existing structure
1467
+ for col in existing_columns:
1468
+ df_col = col
1469
+ # Map 'Land Cover Name' to 'Name' if needed
1470
+ if col == "Land Cover Name" and name_col == "Name":
1471
+ df_col = "Name"
1472
+
1473
+ if df_col in infiltration_df.columns:
1474
+ # Handle string fields - need to maintain exact string length
1475
+ if existing_dtype[col].kind == 'S':
1476
+ # Get the exact string length from dtype
1477
+ max_str_len = existing_dtype[col].itemsize
1478
+ # Convert to bytes with correct length
1479
+ structured_array[col] = infiltration_df[df_col].astype(str).values.astype(f'|S{max_str_len}')
1480
+ else:
1481
+ # Handle numeric fields - ensure correct numeric type
1482
+ if existing_dtype[col].kind in ('f', 'i'):
1483
+ structured_array[col] = infiltration_df[df_col].values.astype(existing_dtype[col])
1484
+ else:
1485
+ # For any other type, just copy as is
1486
+ structured_array[col] = infiltration_df[df_col].values
1487
+ else:
1488
+ logger.warning(f"Column {col} not in DataFrame - using default values")
1489
+ # Use zeros for numeric fields or empty strings for string fields
1490
+ if existing_dtype[col].kind == 'S':
1491
+ structured_array[col] = np.array([''] * len(infiltration_df), dtype=f'|S{existing_dtype[col].itemsize}')
1492
+
1493
+ # Write back to HDF file
1494
+ with h5py.File(hdf_path, 'a') as hdf_file_write:
1495
+ # Delete existing dataset
1496
+ if table_path in hdf_file_write:
1497
+ del hdf_file_write[table_path]
1498
+
1499
+ # Create new dataset with exact same properties as original
1500
+ dataset = hdf_file_write.create_dataset(
1501
+ table_path,
1502
+ data=structured_array,
1503
+ dtype=existing_dtype,
1504
+ compression='gzip',
1505
+ compression_opts=1,
1506
+ chunks=(100,),
1507
+ maxshape=(None,)
1508
+ )
1509
+
1510
+ # Return the DataFrame with columns matching what was actually written
1511
+ result_df = pd.DataFrame()
1512
+ for col in existing_columns:
1513
+ if existing_dtype[col].kind == 'S':
1514
+ # Convert bytes back to string
1515
+ result_df[col] = [val.decode('utf-8').strip() for val in structured_array[col]]
1516
+ else:
1517
+ result_df[col] = structured_array[col]
1518
+
1519
+ return result_df
1520
+
1521
+ except Exception as e:
1522
+ logger.error(f"Error setting infiltration data in {hdf_path}: {str(e)}")
1523
+ return None
1524
+
1525
+
1526
+
1527
+
1528
+
1529
+
1530
1530
  '''