ras-commander 0.42.0__py3-none-any.whl → 0.43.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ras_commander/RasHdf.py DELETED
@@ -1,1619 +0,0 @@
1
- """
2
- RasHdf Module
3
-
4
- This module provides utilities for working with HDF files in HEC-RAS projects.
5
- It contains the RasHdf class, which offers various static methods for extracting,
6
- analyzing, and manipulating data from HEC-RAS HDF files.
7
-
8
- Note:
9
- This method is decorated with @hdf_operation, which handles the opening and closing of the HDF file.
10
- The decorator should be used for all methods that directly interact with HDF files.
11
- It ensures proper file handling and error management.
12
-
13
- When using the @hdf_operation decorator:
14
- - The method receives an open h5py.File object as its first argument after 'cls'.
15
- - Error handling for file operations is managed by the decorator.
16
- - The HDF file is automatically closed after the method execution.
17
-
18
- Methods without this decorator must manually handle file opening, closing, and error management.
19
- Failure to use the decorator or properly manage the file can lead to resource leaks or file access errors.
20
-
21
- Example:
22
- @classmethod
23
- @hdf_operation
24
- def example_method(cls, hdf_file: h5py.File, other_args):
25
- # Method implementation using hdf_file
26
-
27
- This module is part of the ras-commander library and uses a centralized logging configuration.
28
-
29
- Logging Configuration:
30
- - The logging is set up in the logging_config.py file.
31
- - A @log_call decorator is available to automatically log function calls.
32
- - Log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
33
- - Logs are written to both console and a rotating file handler.
34
- - The default log file is 'ras_commander.log' in the 'logs' directory.
35
- - The default log level is INFO.
36
-
37
- To use logging in this module:
38
- 1. Use the @log_call decorator for automatic function call logging.
39
- 2. For additional logging, use logger.[level]() calls (e.g., logger.info(), logger.debug()).
40
- 3. Obtain the logger using: logger = logging.getLogger(__name__)
41
-
42
- Example:
43
- @log_call
44
- def my_function():
45
- logger = logging.getLogger(__name__)
46
- logger.debug("Additional debug information")
47
- # Function logic here
48
- """
49
- import h5py
50
- import numpy as np
51
- import pandas as pd
52
- from typing import Union, List, Optional, Dict, Tuple, Any, Callable
53
- from scipy.spatial import KDTree
54
- from pathlib import Path
55
- from datetime import datetime
56
- import logging
57
- from functools import wraps
58
- from .RasPrj import RasPrj, ras, init_ras_project
59
-
60
- # If you're using RasPrj in type hints, you might need to use string literals to avoid circular imports
61
- from typing import TYPE_CHECKING
62
- if TYPE_CHECKING:
63
- from .RasPrj import RasPrj
64
- from ras_commander import get_logger
65
- from ras_commander.logging_config import log_call
66
-
67
- logger = get_logger(__name__)
68
-
69
- class RasHdf:
70
- """
71
- A utility class for working with HDF files in HEC-RAS projects.
72
-
73
- This class provides static methods for various operations on HDF files,
74
- including listing paths, extracting data, and performing analyses on
75
- HEC-RAS project data stored in HDF format.
76
- """
77
-
78
-
79
- @staticmethod
80
- def hdf_operation(func):
81
- """
82
- A decorator for HDF file operations in the RasHdf class.
83
-
84
- This decorator wraps methods that perform operations on HDF files. It handles:
85
- 1. Resolving the HDF filename from various input types.
86
- 2. Opening and closing the HDF file.
87
- 3. Error handling and logging.
88
- 4. Applying the decorated function as a class method.
89
-
90
- Args:
91
- func (Callable): The function to be decorated.
92
-
93
- Returns:
94
- Callable: A wrapped version of the input function as a class method.
95
-
96
- Raises:
97
- ValueError: If the HDF file is not found.
98
-
99
- Usage:
100
- @RasHdf.hdf_operation
101
- def some_hdf_method(cls, hdf_file, ...):
102
- # Method implementation
103
- """
104
- @wraps(func)
105
- def wrapper(cls, hdf_input: Union[str, Path], *args: Any, **kwargs: Any) -> Any:
106
- from ras_commander import ras # Import here to avoid circular import
107
- ras_obj = kwargs.pop('ras_object', None) or ras
108
- try:
109
- hdf_filename = cls._get_hdf_filename(hdf_input, ras_obj)
110
- if hdf_filename is None:
111
- raise ValueError(f"HDF file {hdf_input} not found. Use a try-except block to catch this error.")
112
- with h5py.File(hdf_filename, 'r') as hdf_file:
113
- return func(cls, hdf_file, *args, **kwargs)
114
- except Exception as e:
115
- logger.error(f"Error in {func.__name__}: {e}")
116
- return None
117
- return classmethod(wrapper)
118
-
119
-
120
- @classmethod
121
- @log_call
122
- def get_runtime_data(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
123
- """
124
- Extract runtime and compute time data from a single HDF file.
125
-
126
- Args:
127
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
128
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
129
-
130
- Returns:
131
- Optional[pd.DataFrame]: DataFrame containing runtime and compute time data, or None if data extraction fails.
132
-
133
- Example:
134
- >>> runtime_df = RasHdf.get_runtime_data("path/to/file.hdf")
135
- >>> if runtime_df is not None:
136
- ... print(runtime_df.head())
137
- """
138
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
139
- logger.info(f"Extracting Plan Information from: {Path(hdf_file.filename).name}")
140
- plan_info = hdf_file.get('/Plan Data/Plan Information')
141
- if plan_info is None:
142
- logger.warning("Group '/Plan Data/Plan Information' not found.")
143
- return None
144
-
145
- plan_name = plan_info.attrs.get('Plan Name', 'Unknown')
146
- plan_name = plan_name.decode('utf-8') if isinstance(plan_name, bytes) else plan_name
147
- logger.info(f"Plan Name: {plan_name}")
148
-
149
- start_time_str = plan_info.attrs.get('Simulation Start Time', 'Unknown')
150
- end_time_str = plan_info.attrs.get('Simulation End Time', 'Unknown')
151
- start_time_str = start_time_str.decode('utf-8') if isinstance(start_time_str, bytes) else start_time_str
152
- end_time_str = end_time_str.decode('utf-8') if isinstance(end_time_str, bytes) else end_time_str
153
-
154
- start_time = datetime.strptime(start_time_str, "%d%b%Y %H:%M:%S")
155
- end_time = datetime.strptime(end_time_str, "%d%b%Y %H:%M:%S")
156
- simulation_duration = end_time - start_time
157
- simulation_hours = simulation_duration.total_seconds() / 3600
158
-
159
- logger.info(f"Simulation Start Time: {start_time_str}")
160
- logger.info(f"Simulation End Time: {end_time_str}")
161
- logger.info(f"Simulation Duration (hours): {simulation_hours}")
162
-
163
- compute_processes = hdf_file.get('/Results/Summary/Compute Processes')
164
- if compute_processes is None:
165
- logger.warning("Dataset '/Results/Summary/Compute Processes' not found.")
166
- return None
167
-
168
- process_names = [name.decode('utf-8') for name in compute_processes['Process'][:]]
169
- filenames = [filename.decode('utf-8') for filename in compute_processes['Filename'][:]]
170
- completion_times = compute_processes['Compute Time (ms)'][:]
171
-
172
- compute_processes_df = pd.DataFrame({
173
- 'Process': process_names,
174
- 'Filename': filenames,
175
- 'Compute Time (ms)': completion_times,
176
- 'Compute Time (s)': completion_times / 1000,
177
- 'Compute Time (hours)': completion_times / (1000 * 3600)
178
- })
179
-
180
- logger.debug("Compute processes DataFrame:")
181
- logger.debug(compute_processes_df)
182
-
183
- compute_processes_summary = {
184
- 'Plan Name': [plan_name],
185
- 'File Name': [Path(hdf_file.filename).name],
186
- 'Simulation Start Time': [start_time_str],
187
- 'Simulation End Time': [end_time_str],
188
- 'Simulation Duration (s)': [simulation_duration.total_seconds()],
189
- 'Simulation Time (hr)': [simulation_hours],
190
- 'Completing Geometry (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Completing Geometry']['Compute Time (hours)'].values[0] if 'Completing Geometry' in compute_processes_df['Process'].values else 'N/A'],
191
- 'Preprocessing Geometry (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Preprocessing Geometry']['Compute Time (hours)'].values[0] if 'Preprocessing Geometry' in compute_processes_df['Process'].values else 'N/A'],
192
- 'Completing Event Conditions (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Completing Event Conditions']['Compute Time (hours)'].values[0] if 'Completing Event Conditions' in compute_processes_df['Process'].values else 'N/A'],
193
- 'Unsteady Flow Computations (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Unsteady Flow Computations']['Compute Time (hours)'].values[0] if 'Unsteady Flow Computations' in compute_processes_df['Process'].values else 'N/A'],
194
- 'Complete Process (hr)': [compute_processes_df['Compute Time (hours)'].sum()]
195
- }
196
-
197
- compute_processes_summary['Unsteady Flow Speed (hr/hr)'] = [simulation_hours / compute_processes_summary['Unsteady Flow Computations (hr)'][0] if compute_processes_summary['Unsteady Flow Computations (hr)'][0] != 'N/A' else 'N/A']
198
- compute_processes_summary['Complete Process Speed (hr/hr)'] = [simulation_hours / compute_processes_summary['Complete Process (hr)'][0] if compute_processes_summary['Complete Process (hr)'][0] != 'N/A' else 'N/A']
199
-
200
- compute_summary_df = pd.DataFrame(compute_processes_summary)
201
- logger.debug("Compute summary DataFrame:")
202
- logger.debug(compute_summary_df)
203
-
204
- return compute_summary_df
205
-
206
- # List 2D Flow Area Groups (needed for later functions that extract specific datasets)
207
-
208
- @classmethod
209
- @log_call
210
- def get_2d_flow_area_names(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[List[str]]:
211
- """
212
- List 2D Flow Area names from the HDF file.
213
-
214
- Args:
215
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
216
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
217
-
218
- Returns:
219
- Optional[List[str]]: List of 2D Flow Area names, or None if no 2D Flow Areas are found.
220
-
221
- Raises:
222
- ValueError: If no 2D Flow Areas are found in the HDF file.
223
- """
224
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
225
- if 'Geometry/2D Flow Areas' in hdf_file:
226
- group = hdf_file['Geometry/2D Flow Areas']
227
- group_names = [name for name in group.keys() if isinstance(group[name], h5py.Group)]
228
- if not group_names:
229
- logger.warning("No 2D Flow Areas found in the HDF file")
230
- return None
231
- logger.info(f"Found {len(group_names)} 2D Flow Areas")
232
- return group_names
233
- else:
234
- logger.warning("No 2D Flow Areas found in the HDF file")
235
- return None
236
- @classmethod
237
- @log_call
238
- def get_2d_flow_area_attributes(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
239
- """
240
- Extract 2D Flow Area Attributes from the HDF file.
241
-
242
- Args:
243
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
244
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
245
-
246
- Returns:
247
- Optional[pd.DataFrame]: DataFrame containing 2D Flow Area Attributes, or None if attributes are not found.
248
-
249
- Example:
250
- >>> attributes_df = RasHdf.get_2d_flow_area_attributes("path/to/file.hdf")
251
- >>> if attributes_df is not None:
252
- ... print(attributes_df.head())
253
- ... else:
254
- ... print("No 2D Flow Area attributes found")
255
- """
256
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
257
- if 'Geometry/2D Flow Areas/Attributes' in hdf_file:
258
- attributes = hdf_file['Geometry/2D Flow Areas/Attributes'][()]
259
- attributes_df = pd.DataFrame(attributes)
260
- return attributes_df
261
- else:
262
- return None
263
-
264
- @classmethod
265
- @log_call
266
- def get_cell_info(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
267
- """
268
- Extract Cell Info from the HDF file.
269
-
270
- Args:
271
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
272
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
273
-
274
- Returns:
275
- Optional[pd.DataFrame]: DataFrame containing Cell Info, or None if the data is not found.
276
-
277
- Example:
278
- >>> cell_info_df = RasHdf.get_cell_info("path/to/file.hdf")
279
- >>> if cell_info_df is not None:
280
- ... print(cell_info_df.head())
281
- ... else:
282
- ... print("No Cell Info found")
283
- """
284
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
285
- cell_info_df = cls._extract_dataset(hdf_file, 'Geometry/2D Flow Areas/Cell Info', ['Start', 'End'])
286
- return cell_info_df
287
-
288
- @classmethod
289
- @log_call
290
- def get_cell_points(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
291
- """
292
- Extract Cell Points from the HDF file.
293
-
294
- Args:
295
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
296
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
297
-
298
- Returns:
299
- Optional[pd.DataFrame]: DataFrame containing Cell Points, or None if the data is not found.
300
-
301
- Example:
302
- >>> cell_points_df = RasHdf.get_cell_points("path/to/file.hdf")
303
- >>> if cell_points_df is not None:
304
- ... print(cell_points_df.head())
305
- ... else:
306
- ... print("No Cell Points found")
307
- """
308
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
309
- cell_points_df = cls._extract_dataset(hdf_file, 'Geometry/2D Flow Areas/Cell Points', ['X', 'Y'])
310
- return cell_points_df
311
-
312
- @classmethod
313
- @log_call
314
- def get_polygon_info_and_parts(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
315
- """
316
- Extract Polygon Info and Parts from the HDF file.
317
-
318
- Args:
319
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
320
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
321
- If None, uses the first 2D Area Name found.
322
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
323
-
324
- Returns:
325
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
326
- Two DataFrames containing Polygon Info and Polygon Parts respectively,
327
- or None for each if the corresponding data is not found.
328
-
329
- Example:
330
- >>> polygon_info_df, polygon_parts_df = RasHdf.get_polygon_info_and_parts("path/to/file.hdf")
331
- >>> if polygon_info_df is not None and polygon_parts_df is not None:
332
- ... print("Polygon Info:")
333
- ... print(polygon_info_df.head())
334
- ... print("Polygon Parts:")
335
- ... print(polygon_parts_df.head())
336
- ... else:
337
- ... print("Polygon data not found")
338
- """
339
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
340
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
341
- base_path = f'Geometry/2D Flow Areas'
342
- polygon_info_df = cls._extract_dataset(hdf_file, f'{base_path}/Polygon Info', ['Column1', 'Column2', 'Column3', 'Column4'])
343
- polygon_parts_df = cls._extract_dataset(hdf_file, f'{base_path}/Polygon Parts', ['Start', 'Count'])
344
- return polygon_info_df, polygon_parts_df
345
-
346
- @classmethod
347
- @log_call
348
- def get_polygon_points(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[pd.DataFrame]:
349
- """
350
- Extract Polygon Points from the HDF file.
351
-
352
- Args:
353
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
354
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
355
- If None, uses the first 2D Area Name found.
356
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
357
-
358
- Returns:
359
- Optional[pd.DataFrame]: DataFrame containing Polygon Points, or None if the data is not found.
360
- """
361
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
362
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
363
- polygon_points_path = f'Geometry/2D Flow Areas/Polygon Points'
364
- if polygon_points_path in hdf_file:
365
- polygon_points = hdf_file[polygon_points_path][()]
366
- polygon_points_df = pd.DataFrame(polygon_points, columns=['X', 'Y'])
367
- return polygon_points_df
368
- else:
369
- return None
370
-
371
- @classmethod
372
- @log_call
373
- def get_cells_center_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
374
- """
375
- Extract Cells Center Coordinates and Manning's n from the HDF file.
376
-
377
- Args:
378
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
379
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
380
- If None, uses the first 2D Area Name found.
381
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
382
-
383
- Returns:
384
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
385
- Two DataFrames containing Cells Center Coordinates and Manning's n respectively,
386
- or None for each if the corresponding data is not found.
387
-
388
- Example:
389
- >>> coords_df, mannings_df = RasHdf.get_cells_center_data("path/to/file.hdf")
390
- >>> if coords_df is not None and mannings_df is not None:
391
- ... print("Cell Center Coordinates:")
392
- ... print(coords_df.head())
393
- ... print("Manning's n:")
394
- ... print(mannings_df.head())
395
- ... else:
396
- ... print("Cell center data not found")
397
- """
398
- try:
399
- hdf_filename = cls._get_hdf_filename(hdf_input, ras_object)
400
- with h5py.File(hdf_filename, 'r') as hdf_file:
401
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
402
- base_path = f'Geometry/2D Flow Areas/{area_name}'
403
- cells_center_coord_path = f'{base_path}/Cells Center Coordinate'
404
- cells_manning_n_path = f'{base_path}/Cells Center Manning\'s n'
405
- cells_center_coord_df = cls._extract_dataset(hdf_file, cells_center_coord_path, ['X', 'Y'])
406
- cells_manning_n_df = cls._extract_dataset(hdf_file, cells_manning_n_path, ['Manning\'s n'])
407
- return cells_center_coord_df, cells_manning_n_df
408
- except Exception as e:
409
- return None, None
410
-
411
- @classmethod
412
- @log_call
413
- def get_faces_area_elevation_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[pd.DataFrame]:
414
- """
415
- Extract Faces Area Elevation Values from the HDF file.
416
-
417
- Args:
418
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
419
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
420
- If None, uses the first 2D Area Name found.
421
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
422
-
423
- Returns:
424
- Optional[pd.DataFrame]: DataFrame containing Faces Area Elevation Values, or None if the data is not found.
425
-
426
- Example:
427
- >>> elevation_df = RasHdf.get_faces_area_elevation_data("path/to/file.hdf")
428
- >>> if elevation_df is not None:
429
- ... print(elevation_df.head())
430
- ... else:
431
- ... print("No Faces Area Elevation data found")
432
- """
433
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
434
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
435
- base_path = f'Geometry/2D Flow Areas/{area_name}'
436
- area_elev_values_path = f'{base_path}/Faces Area Elevation Values'
437
-
438
- if area_elev_values_path in hdf_file:
439
- area_elev_values = hdf_file[area_elev_values_path][()]
440
- area_elev_values_df = pd.DataFrame(area_elev_values, columns=['Elevation', 'Area', 'Wetted Perimeter', 'Manning\'s n'])
441
- return area_elev_values_df
442
- else:
443
- return None
444
-
445
- @classmethod
446
- @log_call
447
- def get_faces_indexes(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
448
- """
449
- Extract Faces Cell and FacePoint Indexes from the HDF file.
450
-
451
- Args:
452
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
453
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
454
- If None, uses the first 2D Area Name found.
455
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
456
-
457
- Returns:
458
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
459
- Two DataFrames containing Faces Cell Indexes and FacePoint Indexes respectively,
460
- or None for each if the corresponding data is not found.
461
-
462
- Example:
463
- >>> cell_indexes_df, facepoint_indexes_df = RasHdf.get_faces_indexes("path/to/file.hdf")
464
- >>> if cell_indexes_df is not None and facepoint_indexes_df is not None:
465
- ... print("Faces Cell Indexes:")
466
- ... print(cell_indexes_df.head())
467
- ... print("Faces FacePoint Indexes:")
468
- ... print(facepoint_indexes_df.head())
469
- ... else:
470
- ... print("Faces indexes data not found")
471
- """
472
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
473
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
474
-
475
- base_path = f'Geometry/2D Flow Areas/{area_name}'
476
- cell_indexes_path = f'{base_path}/Faces Cell Indexes'
477
- facepoint_indexes_path = f'{base_path}/Faces FacePoint Indexes'
478
-
479
- cell_indexes_df = cls._extract_dataset(hdf_file, cell_indexes_path, ['Left Cell', 'Right Cell'])
480
- facepoint_indexes_df = cls._extract_dataset(hdf_file, facepoint_indexes_path, ['Start FacePoint', 'End FacePoint'])
481
-
482
- return cell_indexes_df, facepoint_indexes_df
483
-
484
- @classmethod
485
- @log_call
486
- def get_faces_elevation_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
487
- """
488
- Extract Faces Low Elevation Centroid and Minimum Elevation from the HDF file.
489
-
490
- Args:
491
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
492
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
493
- If None, uses the first 2D Area Name found.
494
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
495
-
496
- Returns:
497
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
498
- DataFrames containing Faces Low Elevation Centroid and Minimum Elevation.
499
- """
500
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
501
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
502
-
503
- base_path = f'Geometry/2D Flow Areas/{area_name}'
504
- low_elev_centroid = cls._extract_dataset(hdf_file, f'{base_path}/Faces Low Elevation Centroid', ['Low Elevation Centroid'])
505
- min_elevation = cls._extract_dataset(hdf_file, f'{base_path}/Faces Minimum Elevation', ['Minimum Elevation'])
506
-
507
- return low_elev_centroid, min_elevation
508
-
509
- @classmethod
510
- @log_call
511
- def get_faces_vector_data(
512
- cls,
513
- hdf_input: Union[str, Path],
514
- area_name: Optional[str] = None,
515
- ras_object=None
516
- ) -> Optional[pd.DataFrame]:
517
- """
518
- Extract Faces NormalUnitVector and Length from the HDF file.
519
-
520
- Args:
521
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
522
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
523
- If None, uses the first 2D Area Name found.
524
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
525
-
526
- Returns:
527
- Optional[pd.DataFrame]: DataFrame containing Faces NormalUnitVector and Length.
528
- """
529
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
530
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
531
-
532
- base_path = f'Geometry/2D Flow Areas/{area_name}'
533
- vector_data = cls._extract_dataset(hdf_file, f'{base_path}/Faces NormalUnitVector and Length', ['NormalX', 'NormalY', 'Length'])
534
-
535
- return vector_data
536
-
537
- @classmethod
538
- @log_call
539
- def get_faces_perimeter_data(
540
- cls,
541
- hdf_input: Union[str, Path],
542
- area_name: Optional[str] = None,
543
- ras_object=None
544
- ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
545
- """
546
- Extract Faces Perimeter Info and Values from the HDF file.
547
-
548
- Args:
549
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
550
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
551
- If None, uses the first 2D Area Name found.
552
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
553
-
554
- Returns:
555
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
556
- DataFrames containing Faces Perimeter Info and Values.
557
-
558
- Raises:
559
- ValueError: If no HDF file is found for the given plan number.
560
- FileNotFoundError: If the specified HDF file does not exist.
561
-
562
- Example:
563
- >>> perimeter_info_df, perimeter_values_df = RasHdf.get_faces_perimeter_data("path/to/file.hdf")
564
- >>> if perimeter_info_df is not None and perimeter_values_df is not None:
565
- ... print("Perimeter Info:")
566
- ... print(perimeter_info_df.head())
567
- ... print("Perimeter Values:")
568
- ... print(perimeter_values_df.head())
569
- ... else:
570
- ... print("Perimeter data not found")
571
- """
572
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
573
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
574
-
575
- base_path = f'Geometry/2D Flow Areas/{area_name}'
576
- perimeter_info = cls._extract_dataset(hdf_file, f'{base_path}/Faces Perimeter Info', ['Start', 'Count'])
577
- perimeter_values = cls._extract_dataset(hdf_file, f'{base_path}/Faces Perimeter Values', ['X', 'Y'])
578
-
579
- return perimeter_info, perimeter_values
580
-
581
- @classmethod
582
- @log_call
583
- def get_infiltration_data(
584
- cls,
585
- hdf_input: Union[str, Path],
586
- area_name: Optional[str] = None,
587
- ras_object=None
588
- ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
589
- """
590
- Extract Infiltration Data from the HDF file.
591
-
592
- Args:
593
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
594
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
595
- If None, uses the first 2D Area Name found.
596
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
597
-
598
- Returns:
599
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
600
- DataFrames containing various Infiltration Data
601
- """
602
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
603
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
604
-
605
- base_path = f'Geometry/2D Flow Areas/{area_name}/Infiltration'
606
-
607
- cell_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Cell Center Classifications', ['Cell Classification'])
608
- face_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Face Center Classifications', ['Face Classification'])
609
- initial_deficit = cls._extract_dataset(hdf_file, f'{base_path}/Initial Deficit', ['Initial Deficit'])
610
- maximum_deficit = cls._extract_dataset(hdf_file, f'{base_path}/Maximum Deficit', ['Maximum Deficit'])
611
- potential_percolation_rate = cls._extract_dataset(hdf_file, f'{base_path}/Potential Percolation Rate', ['Potential Percolation Rate'])
612
-
613
- return cell_classifications, face_classifications, initial_deficit, maximum_deficit, potential_percolation_rate
614
-
615
- @classmethod
616
- @log_call
617
- def get_percent_impervious_data(
618
- cls,
619
- hdf_input: Union[str, Path],
620
- area_name: Optional[str] = None,
621
- ras_object=None
622
- ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
623
- """
624
- Extract Percent Impervious Data from the HDF file.
625
-
626
- Args:
627
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
628
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
629
- If None, uses the first 2D Area Name found.
630
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
631
-
632
- Returns:
633
- Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
634
- DataFrames containing Cell Classifications, Face Classifications, and Percent Impervious Data
635
- """
636
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
637
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
638
-
639
- base_path = f'Geometry/2D Flow Areas/{area_name}/Percent Impervious'
640
- cell_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Cell Center Classifications', ['Cell Classification'])
641
- face_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Face Center Classifications', ['Face Classification'])
642
- percent_impervious = cls._extract_dataset(hdf_file, f'{base_path}/Percent Impervious', ['Percent Impervious'])
643
-
644
- return cell_classifications, face_classifications, percent_impervious
645
-
646
- @classmethod
647
- @log_call
648
- def get_perimeter_data(
649
- cls,
650
- hdf_input: Union[str, Path],
651
- area_name: Optional[str] = None,
652
- ras_object=None
653
- ) -> Optional[pd.DataFrame]:
654
- """
655
- Extract Perimeter Data from the HDF file.
656
-
657
- Args:
658
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
659
- area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
660
- If None, uses the first 2D Area Name found.
661
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
662
-
663
- Returns:
664
- Optional[pd.DataFrame]: DataFrame containing Perimeter Data
665
-
666
- Example:
667
- >>> perimeter_df = RasHdf.get_perimeter_data("path/to/file.hdf")
668
- >>> if perimeter_df is not None:
669
- ... print(perimeter_df.head())
670
- ... else:
671
- ... print("Perimeter data not found")
672
- """
673
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
674
- area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
675
-
676
- perimeter_path = f'Geometry/2D Flow Areas/{area_name}/Perimeter'
677
- perimeter_df = cls._extract_dataset(hdf_file, perimeter_path, ['X', 'Y'])
678
-
679
- return perimeter_df
680
-
681
- @classmethod
682
- @log_call
683
- def _get_area_name(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> str:
684
- """
685
- Get the 2D Flow Area name from the HDF file.
686
-
687
- Args:
688
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
689
- area_name (Optional[str]): The provided area name, if any.
690
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
691
-
692
- Returns:
693
- str: The 2D Flow Area name.
694
-
695
- Raises:
696
- ValueError: If no 2D Flow Areas are found in the HDF file or if the specified area name is not found.
697
- """
698
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
699
- if area_name is None:
700
- area_names = [name for name in hdf_file['Geometry/2D Flow Areas'].keys() if isinstance(hdf_file['Geometry/2D Flow Areas'][name], h5py.Group)]
701
- if not area_names:
702
- raise ValueError("No 2D Flow Areas found in the HDF file")
703
- area_name = area_names[0]
704
- else:
705
- if area_name not in hdf_file['Geometry/2D Flow Areas']:
706
- raise ValueError(f"2D Flow Area '{area_name}' not found in the HDF file")
707
- return area_name
708
-
709
- @classmethod
710
- @log_call
711
- def _extract_dataset(cls, hdf_input: Union[str, Path], dataset_path: str, column_names: List[str], ras_object=None) -> Optional[pd.DataFrame]:
712
- """
713
- Extract a dataset from the HDF file and convert it to a DataFrame.
714
-
715
- Args:
716
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
717
- dataset_path (str): The path to the dataset within the HDF file.
718
- column_names (List[str]): The names to assign to the DataFrame columns.
719
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
720
-
721
- Returns:
722
- Optional[pd.DataFrame]: The extracted data as a DataFrame, or None if the dataset is not found.
723
- """
724
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
725
- try:
726
- dataset = hdf_file[dataset_path][()]
727
- df = pd.DataFrame(dataset, columns=column_names)
728
- return df
729
- except KeyError:
730
- return None
731
-
732
- @classmethod
733
- @log_call
734
- def read_hdf_to_dataframe(cls, hdf_input: Union[str, Path], dataset_path: str, fill_value: Union[int, float, str] = -9999, ras_object=None) -> pd.DataFrame:
735
- """
736
- Reads an HDF5 dataset and converts it into a pandas DataFrame, handling byte strings and missing values.
737
-
738
- Args:
739
- hdf_input (Union[str, Path]): Path to the HDF file or plan number.
740
- dataset_path (str): Path to the dataset within the HDF file.
741
- fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
742
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
743
-
744
- Returns:
745
- pd.DataFrame: The resulting DataFrame with byte strings decoded and missing values replaced.
746
-
747
- Raises:
748
- KeyError: If the dataset is not found in the HDF file.
749
- """
750
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
751
- try:
752
- hdf_dataset = hdf_file[dataset_path]
753
- hdf_dataframe = cls.convert_to_dataframe_array(hdf_dataset)
754
- byte_columns = [col for col in hdf_dataframe.columns if isinstance(hdf_dataframe[col].iloc[0], (bytes, bytearray))]
755
-
756
- hdf_dataframe[byte_columns] = hdf_dataframe[byte_columns].applymap(lambda x: x.decode('utf-8') if isinstance(x, (bytes, bytearray)) else x)
757
- hdf_dataframe = hdf_dataframe.replace({fill_value: np.NaN})
758
-
759
- return hdf_dataframe
760
- except KeyError:
761
- raise
762
-
763
- @classmethod
764
- @log_call
765
- def get_group_attributes_as_df(cls, hdf_input: Union[str, Path], group_path: str, ras_object=None) -> pd.DataFrame:
766
- """
767
- Convert attributes inside a given HDF group to a DataFrame.
768
-
769
- Args:
770
- hdf_input (Union[str, Path]): Path to the HDF file or plan number.
771
- group_path (str): Path of the group in the HDF file.
772
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
773
-
774
- Returns:
775
- pd.DataFrame: DataFrame of all attributes in the specified group with their properties.
776
-
777
- Raises:
778
- KeyError: If the specified group_path is not found in the file.
779
-
780
- Example:
781
- >>> attributes_df = RasHdf.get_group_attributes_as_df("path/to/file.hdf", "/Results/Unsteady/Output")
782
- >>> print(attributes_df.head())
783
- """
784
- hdf_filename = cls._get_hdf_filename(hdf_input, ras_object)
785
-
786
- with h5py.File(hdf_filename, 'r') as hdf_file:
787
- try:
788
- group = hdf_file[group_path]
789
- attributes = []
790
- for attr in group.attrs:
791
- value = group.attrs[attr]
792
- attr_info = {
793
- 'Attribute': attr,
794
- 'Value': value,
795
- 'Type': type(value).__name__,
796
- 'Shape': value.shape if isinstance(value, np.ndarray) else None,
797
- 'Size': value.size if isinstance(value, np.ndarray) else None,
798
- 'Dtype': value.dtype if isinstance(value, np.ndarray) else None
799
- }
800
- if isinstance(value, bytes):
801
- attr_info['Value'] = value.decode('utf-8')
802
- elif isinstance(value, np.ndarray):
803
- if value.dtype.kind == 'S':
804
- attr_info['Value'] = [v.decode('utf-8') for v in value]
805
- elif value.dtype.kind in ['i', 'f', 'u']:
806
- attr_info['Value'] = value.tolist()
807
- attributes.append(attr_info)
808
-
809
- return pd.DataFrame(attributes)
810
- except KeyError:
811
- logger.critical(f"Group path '{group_path}' not found in HDF file '{hdf_filename}'")
812
-
813
- # Last functions from PyHMT2D:
814
-
815
- from ras_commander.logging_config import log_call
816
-
817
- @classmethod
818
- @log_call
819
- def get_2d_area_solution_times(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
820
- """
821
- Retrieve solution times for a specified 2D Flow Area.
822
-
823
- Args:
824
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
825
- area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
826
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
827
-
828
- Returns:
829
- Optional[np.ndarray]: Array of solution times, or None if not found.
830
-
831
- Example:
832
- >>> solution_times = RasHdf.get_2d_area_solution_times("03", area_name="Area1")
833
- >>> print(solution_times)
834
- [0.0, 0.5, 1.0, ...]
835
- """
836
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
837
- try:
838
- solution_times = np.array(
839
- hdf_file['Results']['Unsteady']['Output']['Output Blocks']
840
- ['Base Output']['Unsteady Time Series']['Time']
841
- )
842
- return solution_times
843
- except KeyError:
844
- return None
845
-
846
- @classmethod
847
- @log_call
848
- def get_2d_area_solution_time_dates(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
849
- """
850
- Retrieve solution time dates for a specified 2D Flow Area.
851
-
852
- Args:
853
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
854
- area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
855
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
856
-
857
- Returns:
858
- Optional[np.ndarray]: Array of solution time dates, or None if not found.
859
-
860
- Example:
861
- >>> solution_time_dates = RasHdf.get_2d_area_solution_time_dates("03", area_name="Area1")
862
- >>> print(solution_time_dates)
863
- ['2024-01-01T00:00:00', '2024-01-01T00:30:00', ...]
864
- """
865
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
866
- try:
867
- solution_time_dates = np.array(
868
- hdf_file['Results']['Unsteady']['Output']['Output Blocks']
869
- ['Base Output']['Unsteady Time Series']['Time Date Stamp']
870
- )
871
- return solution_time_dates
872
- except KeyError:
873
- return None
874
-
875
- @classmethod
876
- @log_call
877
- def load_2d_area_solutions(
878
- cls,
879
- hdf_file: h5py.File,
880
- ras_object=None
881
- ) -> Optional[Dict[str, pd.DataFrame]]:
882
- """
883
- Load 2D Area Solutions (Water Surface Elevation and Face Normal Velocity) from the HDF file
884
- and provide them as pandas DataFrames.
885
-
886
- **Note:**
887
- - This function has only been tested with HEC-RAS version 6.5.
888
- - Ensure that the HDF file structure matches the expected paths.
889
-
890
- Args:
891
- hdf_file (h5py.File): An open HDF5 file object.
892
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
893
-
894
- Returns:
895
- Optional[Dict[str, pd.DataFrame]]: A dictionary containing:
896
- - 'solution_times': DataFrame of solution times.
897
- - For each 2D Flow Area:
898
- - '{Area_Name}_WSE': Water Surface Elevation DataFrame.
899
- - '{Area_Name}_Face_Velocity': Face Normal Velocity DataFrame.
900
- """
901
- try:
902
- solution_times_path = '/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/Time'
903
- if solution_times_path not in hdf_file:
904
- return None
905
-
906
- solution_times = hdf_file[solution_times_path][()]
907
- solution_times_df = pd.DataFrame({
908
- 'Time_Step': solution_times
909
- })
910
-
911
- solutions_dict = {
912
- 'solution_times': solution_times_df
913
- }
914
-
915
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
916
- if not two_d_area_names:
917
- return solutions_dict
918
-
919
- for area in two_d_area_names:
920
- wse_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Water Surface'
921
- face_velocity_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Face Velocity'
922
-
923
- if wse_path not in hdf_file:
924
- continue
925
-
926
- wse_data = hdf_file[wse_path][()]
927
- cell_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Cell Center Coordinate'
928
- if cell_center_coords_path not in hdf_file:
929
- continue
930
-
931
- cell_center_coords = hdf_file[cell_center_coords_path][()]
932
- if cell_center_coords.shape[0] != wse_data.shape[1]:
933
- continue
934
-
935
- wse_df = pd.DataFrame({
936
- 'Time_Step': np.repeat(solution_times, wse_data.shape[1]),
937
- 'Cell_ID': np.tile(np.arange(wse_data.shape[1]), wse_data.shape[0]),
938
- 'X': cell_center_coords[:, 0].repeat(wse_data.shape[0]),
939
- 'Y': cell_center_coords[:, 1].repeat(wse_data.shape[0]),
940
- 'WSE': wse_data.flatten()
941
- })
942
- solutions_dict[f'{area}_WSE'] = wse_df
943
-
944
- if face_velocity_path not in hdf_file:
945
- continue
946
-
947
- face_velocity_data = hdf_file[face_velocity_path][()]
948
- face_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Face Points Coordinates'
949
- if face_center_coords_path not in hdf_file:
950
- continue
951
-
952
- face_center_coords = hdf_file[face_center_coords_path][()]
953
- if face_center_coords.shape[0] != face_velocity_data.shape[1]:
954
- continue
955
-
956
- face_velocity_df = pd.DataFrame({
957
- 'Time_Step': np.repeat(solution_times, face_velocity_data.shape[1]),
958
- 'Face_ID': np.tile(np.arange(face_velocity_data.shape[1]), face_velocity_data.shape[0]),
959
- 'X': face_center_coords[:, 0].repeat(face_velocity_data.shape[0]),
960
- 'Y': face_center_coords[:, 1].repeat(face_velocity_data.shape[0]),
961
- 'Normal_Velocity_ft_s': face_velocity_data.flatten()
962
- })
963
- solutions_dict[f'{area}_Face_Velocity'] = face_velocity_df
964
-
965
- return solutions_dict
966
-
967
- except Exception as e:
968
- return None
969
-
970
- @classmethod
971
- @log_call
972
- def get_hdf_paths_with_properties(cls, hdf_input: Union[str, Path], ras_object=None) -> pd.DataFrame:
973
- """
974
- List all paths in the HDF file with their properties.
975
-
976
- Args:
977
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
978
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
979
-
980
- Returns:
981
- pd.DataFrame: DataFrame of all paths and their properties in the HDF file.
982
-
983
- Example:
984
- >>> paths_df = RasHdf.get_hdf_paths_with_properties("path/to/file.hdf")
985
- >>> print(paths_df.head())
986
- """
987
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
988
- paths = []
989
- def visitor_func(name: str, node: h5py.Group) -> None:
990
- path_info = {
991
- "HDF_Path": name,
992
- "Type": type(node).__name__,
993
- "Shape": getattr(node, "shape", None),
994
- "Size": getattr(node, "size", None),
995
- "Dtype": getattr(node, "dtype", None)
996
- }
997
- paths.append(path_info)
998
- hdf_file.visititems(visitor_func)
999
- return pd.DataFrame(paths)
1000
-
1001
- @classmethod
1002
- @log_call
1003
- def build_2d_area_face_hydraulic_information(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[List[np.ndarray]]]:
1004
- """
1005
- Build face hydraulic information tables (elevation, area, wetted perimeter, Manning's n) for each face in 2D Flow Areas.
1006
-
1007
- Args:
1008
- hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1009
- area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1010
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1011
-
1012
- Returns:
1013
- Optional[List[List[np.ndarray]]]: Nested lists containing hydraulic information for each face in each 2D Flow Area.
1014
-
1015
- Example:
1016
- >>> hydraulic_info = RasHdf.build_2d_area_face_hydraulic_information("03")
1017
- >>> print(hydraulic_info[0][0]) # First face of first area
1018
- [[Elevation1, Area1, WettedPerim1, ManningN1],
1019
- [Elevation2, Area2, WettedPerim2, ManningN2],
1020
- ...]
1021
- """
1022
- try:
1023
- ras_obj = ras_object if ras_object is not None else ras
1024
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_obj), 'r') as hdf_file:
1025
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1026
- hydraulic_info_table = []
1027
-
1028
- for area in two_d_area_names:
1029
- face_elev_info = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Info'])
1030
- face_elev_values = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Values'])
1031
-
1032
- area_hydraulic_info = []
1033
- for face in face_elev_info:
1034
- start_row, count = face
1035
- face_data = face_elev_values[start_row:start_row + count].copy()
1036
- area_hydraulic_info.append(face_data)
1037
-
1038
- hydraulic_info_table.append(area_hydraulic_info)
1039
-
1040
- return hydraulic_info_table
1041
-
1042
- except KeyError:
1043
- return None
1044
-
1045
- @classmethod
1046
- @log_call
1047
- def build_2d_area_face_point_coordinates_list(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
1048
- """
1049
- Build a list of face point coordinates for each 2D Flow Area.
1050
-
1051
- Args:
1052
- hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1053
- area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1054
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1055
-
1056
- Returns:
1057
- Optional[List[np.ndarray]]: List containing arrays of face point coordinates for each 2D Flow Area.
1058
-
1059
- Example:
1060
- >>> face_coords_list = RasHdf.build_2d_area_face_point_coordinates_list("03")
1061
- >>> print(face_coords_list[0]) # Coordinates for first area
1062
- [[X1, Y1], [X2, Y2], ...]
1063
- """
1064
- try:
1065
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1066
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1067
- face_point_coords_list = []
1068
-
1069
- for area in two_d_area_names:
1070
- face_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
1071
- face_point_coords_list.append(face_points)
1072
-
1073
- return face_point_coords_list
1074
-
1075
- except KeyError:
1076
- return None
1077
-
1078
- @classmethod
1079
- @log_call
1080
- def build_2d_area_face_profile(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None, n_face_profile_points: int = 10) -> Optional[List[np.ndarray]]:
1081
- """
1082
- Build face profiles representing sub-grid terrain for each face in 2D Flow Areas.
1083
-
1084
- Args:
1085
- hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1086
- area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1087
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1088
- n_face_profile_points (int): Number of points to interpolate along each face profile.
1089
-
1090
- Returns:
1091
- Optional[List[np.ndarray]]: List containing arrays of profile points for each face in each 2D Flow Area.
1092
-
1093
- Example:
1094
- >>> face_profiles = RasHdf.build_2d_area_face_profile("03", n_face_profile_points=20)
1095
- >>> print(face_profiles[0][0]) # Profile points for first face of first area
1096
- [[X1, Y1, Z1], [X2, Y2, Z2], ...]
1097
- """
1098
- try:
1099
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1100
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1101
- face_profiles = []
1102
-
1103
- for area in two_d_area_names:
1104
- face_faces = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
1105
- face_point_coords = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
1106
- profile_points_all_faces = []
1107
-
1108
- for face in face_faces:
1109
- face_start, face_end = face
1110
- start_coords = face_point_coords[face_start]
1111
- end_coords = face_point_coords[face_end]
1112
-
1113
- length = cls.horizontal_distance(start_coords, end_coords)
1114
- stations = np.linspace(0, length, n_face_profile_points, endpoint=True)
1115
-
1116
- interpolated_points = np.array([
1117
- start_coords + (end_coords - start_coords) * i / (n_face_profile_points - 1)
1118
- for i in range(n_face_profile_points)
1119
- ])
1120
-
1121
- interpolated_points = cls.interpolate_z_coords(interpolated_points)
1122
-
1123
- profile_points_all_faces.append(interpolated_points)
1124
-
1125
- face_profiles.append(profile_points_all_faces)
1126
-
1127
- return face_profiles
1128
-
1129
- except KeyError as e:
1130
- logging.error(f"Error building face profiles: {e}")
1131
- return None
1132
-
1133
- @classmethod
1134
- @log_call
1135
- def build_face_facepoints(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
1136
- """
1137
- Build face's facepoint list for each 2D Flow Area.
1138
-
1139
- Args:
1140
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
1141
- area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1142
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1143
-
1144
- Returns:
1145
- Optional[List[np.ndarray]]: List containing arrays of face point indexes for each face in each 2D Flow Area.
1146
-
1147
- Example:
1148
- >>> face_facepoints = RasHdf.build_face_facepoints("03")
1149
- >>> print(face_facepoints[0][0]) # FacePoint indexes for first face of first area
1150
- [start_idx, end_idx]
1151
- """
1152
- try:
1153
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1154
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1155
- face_facepoints_list = []
1156
-
1157
- for area in two_d_area_names:
1158
- face_facepoints = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
1159
- face_facepoints_list.append(face_facepoints)
1160
-
1161
- return face_facepoints_list
1162
-
1163
- except KeyError as e:
1164
- logger = logging.getLogger(__name__)
1165
- logger.error(f"Error building face facepoints list: {e}")
1166
- return None
1167
-
1168
- @classmethod
1169
- @log_call
1170
- def build_2d_area_boundaries(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
1171
- """
1172
- Build boundaries with their point lists for each 2D Flow Area.
1173
-
1174
- Args:
1175
- hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
1176
- area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1177
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1178
-
1179
- Returns:
1180
- Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
1181
- Tuple containing total boundaries, boundary IDs, boundary names, associated 2D Flow Area names, boundary types,
1182
- total points per boundary, and boundary point lists.
1183
-
1184
- Example:
1185
- >>> total_boundaries, boundary_ids, boundary_names, flow_area_names, boundary_types, total_points, boundary_points = RasHdf.build_2d_area_boundaries("03")
1186
- >>> print(total_boundaries)
1187
- 5
1188
- """
1189
- try:
1190
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1191
- two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1192
- total_boundaries = 0
1193
- boundary_ids = []
1194
- boundary_names = []
1195
- flow_area_names = []
1196
- boundary_types = []
1197
- total_points_per_boundary = []
1198
- boundary_points_list = []
1199
-
1200
- for area in two_d_area_names:
1201
- boundary_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Boundary Points'])
1202
- if boundary_points.size == 0:
1203
- logger = logging.getLogger(__name__)
1204
- logger.warning(f"No boundary points found for 2D Flow Area: {area}")
1205
- continue
1206
-
1207
- current_boundary_id = boundary_points[0][0]
1208
- current_boundary_points = [boundary_points[0][2], boundary_points[0][3]]
1209
- boundary_id = current_boundary_id
1210
-
1211
- for point in boundary_points[1:]:
1212
- if point[0] == current_boundary_id:
1213
- current_boundary_points.append(point[3])
1214
- else:
1215
- # Save the completed boundary
1216
- boundary_ids.append(current_boundary_id)
1217
- boundary_names.append(point[0]) # Assuming boundary name is stored here
1218
- flow_area_names.append(area)
1219
- boundary_types.append(point[2]) # Assuming boundary type is stored here
1220
- total_points_per_boundary.append(len(current_boundary_points))
1221
- boundary_points_list.append(np.array(current_boundary_points))
1222
- total_boundaries += 1
1223
-
1224
- # Start a new boundary
1225
- current_boundary_id = point[0]
1226
- current_boundary_points = [point[2], point[3]]
1227
-
1228
- # Save the last boundary
1229
- boundary_ids.append(current_boundary_id)
1230
- boundary_names.append(boundary_points[-1][0]) # Assuming boundary name is stored here
1231
- flow_area_names.append(area)
1232
- boundary_types.append(boundary_points[-1][2]) # Assuming boundary type is stored here
1233
- total_points_per_boundary.append(len(current_boundary_points))
1234
- boundary_points_list.append(np.array(current_boundary_points))
1235
- total_boundaries += 1
1236
-
1237
- return (total_boundaries, np.array(boundary_ids), boundary_names, flow_area_names, boundary_types, np.array(total_points_per_boundary), np.array(boundary_points_list))
1238
-
1239
- except KeyError as e:
1240
- logger = logging.getLogger(__name__)
1241
- logger.error(f"Error building boundaries: {e}")
1242
- return None
1243
-
1244
- # Helper Methods for New Functionalities
1245
-
1246
- @classmethod
1247
- @log_call
1248
- def horizontal_distance(cls, coord1: np.ndarray, coord2: np.ndarray) -> float:
1249
- """
1250
- Calculate the horizontal distance between two coordinate points.
1251
-
1252
- Args:
1253
- coord1 (np.ndarray): First coordinate point [X, Y].
1254
- coord2 (np.ndarray): Second coordinate point [X, Y].
1255
-
1256
- Returns:
1257
- float: Horizontal distance.
1258
-
1259
- Example:
1260
- >>> distance = RasHdf.horizontal_distance([0, 0], [3, 4])
1261
- >>> print(distance)
1262
- 5.0
1263
- """
1264
- return np.linalg.norm(coord2 - coord1)
1265
-
1266
- @classmethod
1267
- @log_call
1268
- def interpolate_z_coords(cls, points: np.ndarray) -> np.ndarray:
1269
- """
1270
- Interpolate Z coordinates for a set of points.
1271
-
1272
- Args:
1273
- points (np.ndarray): Array of points with [X, Y].
1274
-
1275
- Returns:
1276
- np.ndarray: Array of points with [X, Y, Z].
1277
-
1278
- Example:
1279
- >>> interpolated = RasHdf.interpolate_z_coords(np.array([[0,0], [1,1]]))
1280
- >>> print(interpolated)
1281
- [[0, 0, Z0],
1282
- [1, 1, Z1]]
1283
- """
1284
- # Placeholder for actual interpolation logic
1285
- # This should be replaced with the appropriate interpolation method
1286
- z_coords = np.zeros((points.shape[0], 1)) # Assuming Z=0 for simplicity
1287
- return np.hstack((points, z_coords))
1288
-
1289
- @classmethod
1290
- @log_call
1291
- def extract_string_from_hdf(
1292
- cls,
1293
- hdf_input: Union[str, Path],
1294
- hdf_path: str,
1295
- ras_object: Optional["RasPrj"] = None
1296
- ) -> str:
1297
- """
1298
- Extract string from HDF object at a given path.
1299
-
1300
- Args:
1301
- hdf_input (Union[str, Path]): Either the plan number or the full path to the HDF file.
1302
- hdf_path (str): Path of the object in the HDF file.
1303
- ras_object (Optional["RasPrj"]): Specific RAS object to use. If None, uses the global ras instance.
1304
-
1305
- Returns:
1306
- str: Extracted string from the specified HDF object.
1307
-
1308
- Raises:
1309
- ValueError: If no HDF file is found for the given plan number.
1310
- FileNotFoundError: If the specified HDF file does not exist.
1311
- KeyError: If the specified hdf_path is not found in the file.
1312
-
1313
- Example:
1314
- >>> result = RasHdf.extract_string_from_hdf("path/to/file.hdf", "/Results/Summary/Compute Messages (text)")
1315
- >>> print(result)
1316
- """
1317
- with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1318
- try:
1319
- hdf_object = hdf_file[hdf_path]
1320
- if isinstance(hdf_object, h5py.Group):
1321
- return f"Group: {hdf_path}\nContents: {list(hdf_object.keys())}"
1322
- elif isinstance(hdf_object, h5py.Dataset):
1323
- data = hdf_object[()]
1324
- if isinstance(data, bytes):
1325
- return data.decode('utf-8')
1326
- elif isinstance(data, np.ndarray) and data.dtype.kind == 'S':
1327
- return [v.decode('utf-8') for v in data]
1328
- else:
1329
- return str(data)
1330
- else:
1331
- return f"Unsupported object type: {type(hdf_object)}"
1332
- except KeyError:
1333
- logger = logging.getLogger(__name__)
1334
- logger.error(f"Path not found: {hdf_path}")
1335
- raise KeyError(f"Path not found: {hdf_path}")
1336
-
1337
- @classmethod
1338
- @log_call
1339
- def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
1340
- """
1341
- Decodes byte strings in a DataFrame to regular string objects.
1342
-
1343
- This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
1344
-
1345
- Args:
1346
- dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
1347
-
1348
- Returns:
1349
- pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
1350
-
1351
- Example:
1352
- >>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
1353
- >>> decoded_df = RasHdf.decode_byte_strings(df)
1354
- >>> print(decoded_df)
1355
- A B
1356
- 0 hello 1
1357
- 1 world 2
1358
- """
1359
- str_df = dataframe.select_dtypes(['object'])
1360
- str_df = str_df.stack().str.decode('utf-8').unstack()
1361
- for col in str_df:
1362
- dataframe[col] = str_df[col]
1363
- return dataframe
1364
-
1365
- @classmethod
1366
- @log_call
1367
- def perform_kdtree_query(
1368
- reference_points: np.ndarray,
1369
- query_points: np.ndarray,
1370
- max_distance: float = 2.0
1371
- ) -> np.ndarray:
1372
- """
1373
- Performs a KDTree query between two datasets and returns indices with distances exceeding max_distance set to -1.
1374
-
1375
- Args:
1376
- reference_points (np.ndarray): The reference dataset for KDTree.
1377
- query_points (np.ndarray): The query dataset to search against KDTree of reference_points.
1378
- max_distance (float, optional): The maximum distance threshold. Indices with distances greater than this are set to -1. Defaults to 2.0.
1379
-
1380
- Returns:
1381
- np.ndarray: Array of indices from reference_points that are nearest to each point in query_points.
1382
- Indices with distances > max_distance are set to -1.
1383
-
1384
- Example:
1385
- >>> ref_points = np.array([[0, 0], [1, 1], [2, 2]])
1386
- >>> query_points = np.array([[0.5, 0.5], [3, 3]])
1387
- >>> result = RasHdf.perform_kdtree_query(ref_points, query_points)
1388
- >>> print(result)
1389
- array([ 0, -1])
1390
- """
1391
- dist, snap = KDTree(reference_points).query(query_points, distance_upper_bound=max_distance)
1392
- snap[dist > max_distance] = -1
1393
- return snap
1394
-
1395
- @classmethod
1396
- @log_call
1397
- def find_nearest_neighbors(points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
1398
- """
1399
- Creates a self KDTree for dataset points and finds nearest neighbors excluding self,
1400
- with distances above max_distance set to -1.
1401
-
1402
- Args:
1403
- points (np.ndarray): The dataset to build the KDTree from and query against itself.
1404
- max_distance (float, optional): The maximum distance threshold. Indices with distances
1405
- greater than max_distance are set to -1. Defaults to 2.0.
1406
-
1407
- Returns:
1408
- np.ndarray: Array of indices representing the nearest neighbor in points for each point in points.
1409
- Indices with distances > max_distance or self-matches are set to -1.
1410
-
1411
- Example:
1412
- >>> points = np.array([[0, 0], [1, 1], [2, 2], [10, 10]])
1413
- >>> result = RasHdf.find_nearest_neighbors(points)
1414
- >>> print(result)
1415
- array([1, 0, 1, -1])
1416
- """
1417
- dist, snap = KDTree(points).query(points, k=2, distance_upper_bound=max_distance)
1418
- snap[dist > max_distance] = -1
1419
-
1420
- snp = pd.DataFrame(snap, index=np.arange(len(snap)))
1421
- snp = snp.replace(-1, np.nan)
1422
- snp.loc[snp[0] == snp.index, 0] = np.nan
1423
- snp.loc[snp[1] == snp.index, 1] = np.nan
1424
- filled = snp[0].fillna(snp[1])
1425
- snapped = filled.fillna(-1).astype(np.int64).to_numpy()
1426
- return snapped
1427
-
1428
- @classmethod
1429
- @log_call
1430
- def consolidate_dataframe(
1431
- dataframe: pd.DataFrame,
1432
- group_by: Optional[Union[str, List[str]]] = None,
1433
- pivot_columns: Optional[Union[str, List[str]]] = None,
1434
- level: Optional[int] = None,
1435
- n_dimensional: bool = False,
1436
- aggregation_method: Union[str, Callable] = 'list'
1437
- ) -> pd.DataFrame:
1438
- """
1439
- Consolidate rows in a DataFrame by merging duplicate values into lists or using a specified aggregation function.
1440
-
1441
- Args:
1442
- dataframe (pd.DataFrame): The DataFrame to consolidate.
1443
- group_by (Optional[Union[str, List[str]]]): Columns or indices to group by.
1444
- pivot_columns (Optional[Union[str, List[str]]]): Columns to pivot.
1445
- level (Optional[int]): Level of multi-index to group by.
1446
- n_dimensional (bool): If True, use a pivot table for N-Dimensional consolidation.
1447
- aggregation_method (Union[str, Callable]): Aggregation method, e.g., 'list' to aggregate into lists.
1448
-
1449
- Returns:
1450
- pd.DataFrame: The consolidated DataFrame.
1451
-
1452
- Example:
1453
- >>> df = pd.DataFrame({'A': [1, 1, 2], 'B': [4, 5, 6], 'C': [7, 8, 9]})
1454
- >>> result = RasHdf.consolidate_dataframe(df, group_by='A')
1455
- >>> print(result)
1456
- B C
1457
- A
1458
- 1 [4, 5] [7, 8]
1459
- 2 [6] [9]
1460
- """
1461
- if aggregation_method == 'list':
1462
- agg_func = lambda x: tuple(x)
1463
- else:
1464
- agg_func = aggregation_method
1465
-
1466
- if n_dimensional:
1467
- result = dataframe.pivot_table(group_by, pivot_columns, aggfunc=agg_func)
1468
- else:
1469
- result = dataframe.groupby(group_by, level=level).agg(agg_func).applymap(list)
1470
-
1471
- return result
1472
-
1473
- @classmethod
1474
- @log_call
1475
- def find_nearest_value(array: Union[list, np.ndarray], target_value: Union[int, float]) -> Union[int, float]:
1476
- """
1477
- Finds the nearest value in a NumPy array to the specified target value.
1478
-
1479
- Args:
1480
- array (Union[list, np.ndarray]): The array to search within.
1481
- target_value (Union[int, float]): The value to find the nearest neighbor to.
1482
-
1483
- Returns:
1484
- Union[int, float]: The nearest value in the array to the specified target value.
1485
-
1486
- Example:
1487
- >>> arr = np.array([1, 3, 5, 7, 9])
1488
- >>> result = RasHdf.find_nearest_value(arr, 6)
1489
- >>> print(result)
1490
- 5
1491
- """
1492
- array = np.asarray(array)
1493
- idx = (np.abs(array - target_value)).argmin()
1494
- return array[idx]
1495
-
1496
- @staticmethod
1497
- @log_call
1498
- def _get_hdf_filename(hdf_input: Union[str, Path, h5py.File], ras_object=None) -> Optional[Path]:
1499
- """
1500
- Get the HDF filename from the input.
1501
-
1502
- Args:
1503
- hdf_input (Union[str, Path, h5py.File]): The plan number, full path to the HDF file as a string, a Path object, or an h5py.File object.
1504
- ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1505
-
1506
- Returns:
1507
- Optional[Path]: The full path to the HDF file as a Path object, or None if an error occurs.
1508
-
1509
- Note:
1510
- This method logs critical errors instead of raising exceptions.
1511
- """
1512
-
1513
- # If hdf_input is already an h5py.File object, return its filename
1514
- if isinstance(hdf_input, h5py.File):
1515
- return Path(hdf_input.filename)
1516
-
1517
- # Convert to Path object if it's a string
1518
- if isinstance(hdf_input, str):
1519
- hdf_input = Path(hdf_input)
1520
-
1521
- # If hdf_input is a file path, return it directly
1522
- if isinstance(hdf_input, Path) and hdf_input.is_file():
1523
- return hdf_input
1524
-
1525
- # If hdf_input is not a file path, assume it's a plan number and require ras_object
1526
- ras_obj = ras_object or ras
1527
- if not ras_obj.initialized:
1528
- logger.critical("ras_object is not initialized. ras_object is required when hdf_input is not a direct file path.")
1529
- return None
1530
-
1531
- plan_info = ras_obj.plan_df[ras_obj.plan_df['plan_number'] == str(hdf_input)]
1532
- if plan_info.empty:
1533
- logger.critical(f"No HDF file found for plan number {hdf_input}")
1534
- return None
1535
-
1536
- hdf_filename = plan_info.iloc[0]['HDF_Results_Path']
1537
- if hdf_filename is None:
1538
- logger.critical(f"HDF_Results_Path is None for plan number {hdf_input}")
1539
- return None
1540
-
1541
- hdf_path = Path(hdf_filename)
1542
- if not hdf_path.is_file():
1543
- logger.critical(f"HDF file not found: {hdf_path}")
1544
- return None
1545
-
1546
- return hdf_path
1547
-
1548
-
1549
-
1550
- @log_call
1551
- def save_dataframe_to_hdf(
1552
- dataframe: pd.DataFrame,
1553
- hdf_parent_group: h5py.Group,
1554
- dataset_name: str,
1555
- attributes: Optional[Dict[str, Union[int, float, str]]] = None,
1556
- fill_value: Union[int, float, str] = -9999,
1557
- **kwargs: Any
1558
- ) -> h5py.Dataset:
1559
- """
1560
- Save a pandas DataFrame to an HDF5 dataset within a specified parent group.
1561
-
1562
- This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
1563
-
1564
- Args:
1565
- dataframe (pd.DataFrame): The DataFrame to save.
1566
- hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
1567
- dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
1568
- attributes (Optional[Dict[str, Union[int, float, str]]]): A dictionary of attributes to add to the dataset.
1569
- fill_value (Union[int, float, str]): The value to use for filling missing data.
1570
- **kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
1571
-
1572
- Returns:
1573
- h5py.Dataset: The created HDF5 dataset within the parent group.
1574
-
1575
- Raises:
1576
- ValueError: If the DataFrame columns are not consistent.
1577
-
1578
- Example:
1579
- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
1580
- >>> with h5py.File('data.h5', 'w') as f:
1581
- ... group = f.create_group('my_group')
1582
- ... dataset = save_dataframe_to_hdf(df, group, 'my_dataset')
1583
- >>> print(dataset)
1584
- """
1585
- df = dataframe.copy()
1586
-
1587
- # Replace '/' in column names with '-' to avoid issues in HDF5
1588
- if df.columns.dtype == 'O':
1589
- df.columns = df.columns.str.replace('/', '-', regex=False)
1590
-
1591
- # Fill missing values with the specified fill_value
1592
- df = df.fillna(fill_value)
1593
-
1594
- # Identify string columns and ensure consistency
1595
- string_cols = df.select_dtypes(include=['object']).columns
1596
- if not string_cols.equals(df.select_dtypes(include=['object']).columns):
1597
- logger.error("Inconsistent string columns detected")
1598
- raise ValueError("Inconsistent string columns detected")
1599
-
1600
- # Encode string columns to bytes
1601
- df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
1602
-
1603
- # Prepare data for HDF5 dataset creation
1604
- arr = df.to_records(index=False) if not isinstance(df.columns, pd.RangeIndex) else df.values
1605
-
1606
- # Remove existing dataset if it exists
1607
- if dataset_name in hdf_parent_group:
1608
- logger.warning(f"Existing dataset {dataset_name} will be overwritten")
1609
- del hdf_parent_group[dataset_name]
1610
-
1611
- # Create the dataset in the HDF5 file
1612
- dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
1613
-
1614
- # Update dataset attributes if provided
1615
- if attributes:
1616
- dataset.attrs.update(attributes)
1617
-
1618
- logger.info(f"Successfully saved DataFrame to dataset: {dataset_name}")
1619
- return dataset