ras-commander 0.34.0__py3-none-any.whl → 0.36.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ras_commander/RasHdf.py CHANGED
@@ -1,110 +1,1374 @@
1
+ """
2
+ RasHdf Module
1
3
 
4
+ This module provides utilities for working with HDF files in HEC-RAS projects.
5
+ It contains the RasHdf class, which offers various static methods for extracting,
6
+ analyzing, and manipulating data from HEC-RAS HDF files.
2
7
 
8
+ Note:
9
+ This method is decorated with @hdf_operation, which handles the opening and closing of the HDF file.
10
+ The decorator should be used for all methods that directly interact with HDF files.
11
+ It ensures proper file handling and error management.
12
+
13
+ When using the @hdf_operation decorator:
14
+ - The method receives an open h5py.File object as its first argument after 'cls'.
15
+ - Error handling for file operations is managed by the decorator.
16
+ - The HDF file is automatically closed after the method execution.
17
+
18
+ Methods without this decorator must manually handle file opening, closing, and error management.
19
+ Failure to use the decorator or properly manage the file can lead to resource leaks or file access errors.
20
+
21
+ Example:
22
+ @classmethod
23
+ @hdf_operation
24
+ def example_method(cls, hdf_file: h5py.File, other_args):
25
+ # Method implementation using hdf_file
26
+
27
+ This module is part of the ras-commander library and uses a centralized logging configuration.
28
+
29
+ Logging Configuration:
30
+ - The logging is set up in the logging_config.py file.
31
+ - A @log_call decorator is available to automatically log function calls.
32
+ - Log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL
33
+ - Logs are written to both console and a rotating file handler.
34
+ - The default log file is 'ras_commander.log' in the 'logs' directory.
35
+ - The default log level is INFO.
36
+
37
+ To use logging in this module:
38
+ 1. Use the @log_call decorator for automatic function call logging.
39
+ 2. For additional logging, use logger.[level]() calls (e.g., logger.info(), logger.debug()).
40
+ 3. Obtain the logger using: logger = logging.getLogger(__name__)
41
+
42
+ Example:
43
+ @log_call
44
+ def my_function():
45
+ logger = logging.getLogger(__name__)
46
+ logger.debug("Additional debug information")
47
+ # Function logic here
48
+ """
3
49
  import h5py
4
50
  import numpy as np
5
51
  import pandas as pd
6
- from typing import Union, List, Optional, Dict, Callable
52
+ from typing import Union, List, Optional, Dict, Tuple, Any, Callable
7
53
  from scipy.spatial import KDTree
54
+ from pathlib import Path
55
+ from datetime import datetime
56
+ import logging
57
+ from functools import wraps
58
+ from .RasPrj import RasPrj, ras, init_ras_project
59
+
60
+ # If you're using RasPrj in type hints, you might need to use string literals to avoid circular imports
61
+ from typing import TYPE_CHECKING
62
+ if TYPE_CHECKING:
63
+ from .RasPrj import RasPrj
64
+ from ras_commander import get_logger
65
+ from ras_commander.logging_config import log_call
66
+
67
+ logger = get_logger(__name__)
8
68
 
9
69
  class RasHdf:
10
70
  """
11
- A class containing utility functions for working with HDF files in the ras-commander library.
12
- """
71
+ A utility class for working with HDF files in HEC-RAS projects.
13
72
 
73
+ This class provides static methods for various operations on HDF files,
74
+ including listing paths, extracting data, and performing analyses on
75
+ HEC-RAS project data stored in HDF format.
76
+ """
77
+
78
+
14
79
  @staticmethod
15
- def read_hdf_to_dataframe(hdf_dataset: h5py.Dataset, fill_value: Union[int, float, str] = -9999) -> pd.DataFrame:
80
+ def hdf_operation(func):
81
+ """
82
+ A decorator for HDF file operations in the RasHdf class.
83
+
84
+ This decorator wraps methods that perform operations on HDF files. It handles:
85
+ 1. Resolving the HDF filename from various input types.
86
+ 2. Opening and closing the HDF file.
87
+ 3. Error handling and logging.
88
+ 4. Applying the decorated function as a class method.
89
+
90
+ Args:
91
+ func (Callable): The function to be decorated.
92
+
93
+ Returns:
94
+ Callable: A wrapped version of the input function as a class method.
95
+
96
+ Raises:
97
+ ValueError: If the HDF file is not found.
98
+
99
+ Usage:
100
+ @RasHdf.hdf_operation
101
+ def some_hdf_method(cls, hdf_file, ...):
102
+ # Method implementation
16
103
  """
17
- Reads an HDF5 table using h5py and converts it into a pandas DataFrame, handling byte strings and missing values.
104
+ @wraps(func)
105
+ def wrapper(cls, hdf_input: Union[str, Path], *args: Any, **kwargs: Any) -> Any:
106
+ from ras_commander import ras # Import here to avoid circular import
107
+ ras_obj = kwargs.pop('ras_object', None) or ras
108
+ try:
109
+ hdf_filename = cls._get_hdf_filename(hdf_input, ras_obj)
110
+ if hdf_filename is None:
111
+ raise ValueError(f"HDF file {hdf_input} not found. Use a try-except block to catch this error.")
112
+ with h5py.File(hdf_filename, 'r') as hdf_file:
113
+ return func(cls, hdf_file, *args, **kwargs)
114
+ except Exception as e:
115
+ logger.error(f"Error in {func.__name__}: {e}")
116
+ return None
117
+ return classmethod(wrapper)
118
+
119
+
120
+ @classmethod
121
+ @log_call
122
+ def get_runtime_data(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
123
+ """
124
+ Extract runtime and compute time data from a single HDF file.
18
125
 
19
126
  Args:
20
- hdf_dataset (h5py.Dataset): The HDF5 table to read.
127
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
128
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
129
+
130
+ Returns:
131
+ Optional[pd.DataFrame]: DataFrame containing runtime and compute time data, or None if data extraction fails.
132
+
133
+ Example:
134
+ >>> runtime_df = RasHdf.get_runtime_data("path/to/file.hdf")
135
+ >>> if runtime_df is not None:
136
+ ... print(runtime_df.head())
137
+ """
138
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
139
+ logger.info(f"Extracting Plan Information from: {Path(hdf_file.filename).name}")
140
+ plan_info = hdf_file.get('/Plan Data/Plan Information')
141
+ if plan_info is None:
142
+ logger.warning("Group '/Plan Data/Plan Information' not found.")
143
+ return None
144
+
145
+ plan_name = plan_info.attrs.get('Plan Name', 'Unknown')
146
+ plan_name = plan_name.decode('utf-8') if isinstance(plan_name, bytes) else plan_name
147
+ logger.info(f"Plan Name: {plan_name}")
148
+
149
+ start_time_str = plan_info.attrs.get('Simulation Start Time', 'Unknown')
150
+ end_time_str = plan_info.attrs.get('Simulation End Time', 'Unknown')
151
+ start_time_str = start_time_str.decode('utf-8') if isinstance(start_time_str, bytes) else start_time_str
152
+ end_time_str = end_time_str.decode('utf-8') if isinstance(end_time_str, bytes) else end_time_str
153
+
154
+ start_time = datetime.strptime(start_time_str, "%d%b%Y %H:%M:%S")
155
+ end_time = datetime.strptime(end_time_str, "%d%b%Y %H:%M:%S")
156
+ simulation_duration = end_time - start_time
157
+ simulation_hours = simulation_duration.total_seconds() / 3600
158
+
159
+ logger.info(f"Simulation Start Time: {start_time_str}")
160
+ logger.info(f"Simulation End Time: {end_time_str}")
161
+ logger.info(f"Simulation Duration (hours): {simulation_hours}")
162
+
163
+ compute_processes = hdf_file.get('/Results/Summary/Compute Processes')
164
+ if compute_processes is None:
165
+ logger.warning("Dataset '/Results/Summary/Compute Processes' not found.")
166
+ return None
167
+
168
+ process_names = [name.decode('utf-8') for name in compute_processes['Process'][:]]
169
+ filenames = [filename.decode('utf-8') for filename in compute_processes['Filename'][:]]
170
+ completion_times = compute_processes['Compute Time (ms)'][:]
171
+
172
+ compute_processes_df = pd.DataFrame({
173
+ 'Process': process_names,
174
+ 'Filename': filenames,
175
+ 'Compute Time (ms)': completion_times,
176
+ 'Compute Time (s)': completion_times / 1000,
177
+ 'Compute Time (hours)': completion_times / (1000 * 3600)
178
+ })
179
+
180
+ logger.debug("Compute processes DataFrame:")
181
+ logger.debug(compute_processes_df)
182
+
183
+ compute_processes_summary = {
184
+ 'Plan Name': [plan_name],
185
+ 'File Name': [Path(hdf_file.filename).name],
186
+ 'Simulation Start Time': [start_time_str],
187
+ 'Simulation End Time': [end_time_str],
188
+ 'Simulation Duration (s)': [simulation_duration.total_seconds()],
189
+ 'Simulation Time (hr)': [simulation_hours],
190
+ 'Completing Geometry (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Completing Geometry']['Compute Time (hours)'].values[0] if 'Completing Geometry' in compute_processes_df['Process'].values else 'N/A'],
191
+ 'Preprocessing Geometry (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Preprocessing Geometry']['Compute Time (hours)'].values[0] if 'Preprocessing Geometry' in compute_processes_df['Process'].values else 'N/A'],
192
+ 'Completing Event Conditions (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Completing Event Conditions']['Compute Time (hours)'].values[0] if 'Completing Event Conditions' in compute_processes_df['Process'].values else 'N/A'],
193
+ 'Unsteady Flow Computations (hr)': [compute_processes_df[compute_processes_df['Process'] == 'Unsteady Flow Computations']['Compute Time (hours)'].values[0] if 'Unsteady Flow Computations' in compute_processes_df['Process'].values else 'N/A'],
194
+ 'Complete Process (hr)': [compute_processes_df['Compute Time (hours)'].sum()]
195
+ }
196
+
197
+ compute_processes_summary['Unsteady Flow Speed (hr/hr)'] = [simulation_hours / compute_processes_summary['Unsteady Flow Computations (hr)'][0] if compute_processes_summary['Unsteady Flow Computations (hr)'][0] != 'N/A' else 'N/A']
198
+ compute_processes_summary['Complete Process Speed (hr/hr)'] = [simulation_hours / compute_processes_summary['Complete Process (hr)'][0] if compute_processes_summary['Complete Process (hr)'][0] != 'N/A' else 'N/A']
199
+
200
+ compute_summary_df = pd.DataFrame(compute_processes_summary)
201
+ logger.debug("Compute summary DataFrame:")
202
+ logger.debug(compute_summary_df)
203
+
204
+ return compute_summary_df
205
+
206
+ # List 2D Flow Area Groups (needed for later functions that extract specific datasets)
207
+
208
+ @classmethod
209
+ @log_call
210
+ def get_2d_flow_area_names(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[List[str]]:
211
+ """
212
+ List 2D Flow Area names from the HDF file.
213
+
214
+ Args:
215
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
216
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
217
+
218
+ Returns:
219
+ Optional[List[str]]: List of 2D Flow Area names, or None if no 2D Flow Areas are found.
220
+
221
+ Raises:
222
+ ValueError: If no 2D Flow Areas are found in the HDF file.
223
+ """
224
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
225
+ if 'Geometry/2D Flow Areas' in hdf_file:
226
+ group = hdf_file['Geometry/2D Flow Areas']
227
+ group_names = [name for name in group.keys() if isinstance(group[name], h5py.Group)]
228
+ if not group_names:
229
+ logger.warning("No 2D Flow Areas found in the HDF file")
230
+ return None
231
+ logger.info(f"Found {len(group_names)} 2D Flow Areas")
232
+ return group_names
233
+ else:
234
+ logger.warning("No 2D Flow Areas found in the HDF file")
235
+ return None
236
+ @classmethod
237
+ @log_call
238
+ def get_2d_flow_area_attributes(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
239
+ """
240
+ Extract 2D Flow Area Attributes from the HDF file.
241
+
242
+ Args:
243
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
244
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
245
+
246
+ Returns:
247
+ Optional[pd.DataFrame]: DataFrame containing 2D Flow Area Attributes, or None if attributes are not found.
248
+
249
+ Example:
250
+ >>> attributes_df = RasHdf.get_2d_flow_area_attributes("path/to/file.hdf")
251
+ >>> if attributes_df is not None:
252
+ ... print(attributes_df.head())
253
+ ... else:
254
+ ... print("No 2D Flow Area attributes found")
255
+ """
256
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
257
+ if 'Geometry/2D Flow Areas/Attributes' in hdf_file:
258
+ attributes = hdf_file['Geometry/2D Flow Areas/Attributes'][()]
259
+ attributes_df = pd.DataFrame(attributes)
260
+ return attributes_df
261
+ else:
262
+ return None
263
+
264
+ @classmethod
265
+ @log_call
266
+ def get_cell_info(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
267
+ """
268
+ Extract Cell Info from the HDF file.
269
+
270
+ Args:
271
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
272
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
273
+
274
+ Returns:
275
+ Optional[pd.DataFrame]: DataFrame containing Cell Info, or None if the data is not found.
276
+
277
+ Example:
278
+ >>> cell_info_df = RasHdf.get_cell_info("path/to/file.hdf")
279
+ >>> if cell_info_df is not None:
280
+ ... print(cell_info_df.head())
281
+ ... else:
282
+ ... print("No Cell Info found")
283
+ """
284
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
285
+ cell_info_df = cls._extract_dataset(hdf_file, 'Geometry/2D Flow Areas/Cell Info', ['Start', 'End'])
286
+ return cell_info_df
287
+
288
+ @classmethod
289
+ @log_call
290
+ def get_cell_points(cls, hdf_input: Union[str, Path], ras_object=None) -> Optional[pd.DataFrame]:
291
+ """
292
+ Extract Cell Points from the HDF file.
293
+
294
+ Args:
295
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
296
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
297
+
298
+ Returns:
299
+ Optional[pd.DataFrame]: DataFrame containing Cell Points, or None if the data is not found.
300
+
301
+ Example:
302
+ >>> cell_points_df = RasHdf.get_cell_points("path/to/file.hdf")
303
+ >>> if cell_points_df is not None:
304
+ ... print(cell_points_df.head())
305
+ ... else:
306
+ ... print("No Cell Points found")
307
+ """
308
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
309
+ cell_points_df = cls._extract_dataset(hdf_file, 'Geometry/2D Flow Areas/Cell Points', ['X', 'Y'])
310
+ return cell_points_df
311
+
312
+ @classmethod
313
+ @log_call
314
+ def get_polygon_info_and_parts(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
315
+ """
316
+ Extract Polygon Info and Parts from the HDF file.
317
+
318
+ Args:
319
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
320
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
321
+ If None, uses the first 2D Area Name found.
322
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
323
+
324
+ Returns:
325
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
326
+ Two DataFrames containing Polygon Info and Polygon Parts respectively,
327
+ or None for each if the corresponding data is not found.
328
+
329
+ Example:
330
+ >>> polygon_info_df, polygon_parts_df = RasHdf.get_polygon_info_and_parts("path/to/file.hdf")
331
+ >>> if polygon_info_df is not None and polygon_parts_df is not None:
332
+ ... print("Polygon Info:")
333
+ ... print(polygon_info_df.head())
334
+ ... print("Polygon Parts:")
335
+ ... print(polygon_parts_df.head())
336
+ ... else:
337
+ ... print("Polygon data not found")
338
+ """
339
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
340
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
341
+ base_path = f'Geometry/2D Flow Areas'
342
+ polygon_info_df = cls._extract_dataset(hdf_file, f'{base_path}/Polygon Info', ['Column1', 'Column2', 'Column3', 'Column4'])
343
+ polygon_parts_df = cls._extract_dataset(hdf_file, f'{base_path}/Polygon Parts', ['Start', 'Count'])
344
+ return polygon_info_df, polygon_parts_df
345
+
346
+ @classmethod
347
+ @log_call
348
+ def get_polygon_points(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[pd.DataFrame]:
349
+ """
350
+ Extract Polygon Points from the HDF file.
351
+
352
+ Args:
353
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
354
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
355
+ If None, uses the first 2D Area Name found.
356
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
357
+
358
+ Returns:
359
+ Optional[pd.DataFrame]: DataFrame containing Polygon Points, or None if the data is not found.
360
+ """
361
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
362
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
363
+ polygon_points_path = f'Geometry/2D Flow Areas/Polygon Points'
364
+ if polygon_points_path in hdf_file:
365
+ polygon_points = hdf_file[polygon_points_path][()]
366
+ polygon_points_df = pd.DataFrame(polygon_points, columns=['X', 'Y'])
367
+ return polygon_points_df
368
+ else:
369
+ return None
370
+
371
+ @classmethod
372
+ @log_call
373
+ def get_cells_center_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
374
+ """
375
+ Extract Cells Center Coordinates and Manning's n from the HDF file.
376
+
377
+ Args:
378
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
379
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
380
+ If None, uses the first 2D Area Name found.
381
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
382
+
383
+ Returns:
384
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
385
+ Two DataFrames containing Cells Center Coordinates and Manning's n respectively,
386
+ or None for each if the corresponding data is not found.
387
+
388
+ Example:
389
+ >>> coords_df, mannings_df = RasHdf.get_cells_center_data("path/to/file.hdf")
390
+ >>> if coords_df is not None and mannings_df is not None:
391
+ ... print("Cell Center Coordinates:")
392
+ ... print(coords_df.head())
393
+ ... print("Manning's n:")
394
+ ... print(mannings_df.head())
395
+ ... else:
396
+ ... print("Cell center data not found")
397
+ """
398
+ try:
399
+ hdf_filename = cls._get_hdf_filename(hdf_input, ras_object)
400
+ with h5py.File(hdf_filename, 'r') as hdf_file:
401
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
402
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
403
+ cells_center_coord_path = f'{base_path}/Cells Center Coordinate'
404
+ cells_manning_n_path = f'{base_path}/Cells Center Manning\'s n'
405
+ cells_center_coord_df = cls._extract_dataset(hdf_file, cells_center_coord_path, ['X', 'Y'])
406
+ cells_manning_n_df = cls._extract_dataset(hdf_file, cells_manning_n_path, ['Manning\'s n'])
407
+ return cells_center_coord_df, cells_manning_n_df
408
+ except Exception as e:
409
+ return None, None
410
+
411
+ @classmethod
412
+ @log_call
413
+ def get_faces_area_elevation_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[pd.DataFrame]:
414
+ """
415
+ Extract Faces Area Elevation Values from the HDF file.
416
+
417
+ Args:
418
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
419
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
420
+ If None, uses the first 2D Area Name found.
421
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
422
+
423
+ Returns:
424
+ Optional[pd.DataFrame]: DataFrame containing Faces Area Elevation Values, or None if the data is not found.
425
+
426
+ Example:
427
+ >>> elevation_df = RasHdf.get_faces_area_elevation_data("path/to/file.hdf")
428
+ >>> if elevation_df is not None:
429
+ ... print(elevation_df.head())
430
+ ... else:
431
+ ... print("No Faces Area Elevation data found")
432
+ """
433
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
434
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
435
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
436
+ area_elev_values_path = f'{base_path}/Faces Area Elevation Values'
437
+
438
+ if area_elev_values_path in hdf_file:
439
+ area_elev_values = hdf_file[area_elev_values_path][()]
440
+ area_elev_values_df = pd.DataFrame(area_elev_values, columns=['Elevation', 'Area', 'Wetted Perimeter', 'Manning\'s n'])
441
+ return area_elev_values_df
442
+ else:
443
+ return None
444
+
445
+ @classmethod
446
+ @log_call
447
+ def get_faces_indexes(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
448
+ """
449
+ Extract Faces Cell and FacePoint Indexes from the HDF file.
450
+
451
+ Args:
452
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
453
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
454
+ If None, uses the first 2D Area Name found.
455
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
456
+
457
+ Returns:
458
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
459
+ Two DataFrames containing Faces Cell Indexes and FacePoint Indexes respectively,
460
+ or None for each if the corresponding data is not found.
461
+
462
+ Example:
463
+ >>> cell_indexes_df, facepoint_indexes_df = RasHdf.get_faces_indexes("path/to/file.hdf")
464
+ >>> if cell_indexes_df is not None and facepoint_indexes_df is not None:
465
+ ... print("Faces Cell Indexes:")
466
+ ... print(cell_indexes_df.head())
467
+ ... print("Faces FacePoint Indexes:")
468
+ ... print(facepoint_indexes_df.head())
469
+ ... else:
470
+ ... print("Faces indexes data not found")
471
+ """
472
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
473
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
474
+
475
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
476
+ cell_indexes_path = f'{base_path}/Faces Cell Indexes'
477
+ facepoint_indexes_path = f'{base_path}/Faces FacePoint Indexes'
478
+
479
+ cell_indexes_df = cls._extract_dataset(hdf_file, cell_indexes_path, ['Left Cell', 'Right Cell'])
480
+ facepoint_indexes_df = cls._extract_dataset(hdf_file, facepoint_indexes_path, ['Start FacePoint', 'End FacePoint'])
481
+
482
+ return cell_indexes_df, facepoint_indexes_df
483
+
484
+ @classmethod
485
+ @log_call
486
+ def get_faces_elevation_data(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
487
+ """
488
+ Extract Faces Low Elevation Centroid and Minimum Elevation from the HDF file.
489
+
490
+ Args:
491
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
492
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
493
+ If None, uses the first 2D Area Name found.
494
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
495
+
496
+ Returns:
497
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
498
+ DataFrames containing Faces Low Elevation Centroid and Minimum Elevation.
499
+ """
500
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
501
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
502
+
503
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
504
+ low_elev_centroid = cls._extract_dataset(hdf_file, f'{base_path}/Faces Low Elevation Centroid', ['Low Elevation Centroid'])
505
+ min_elevation = cls._extract_dataset(hdf_file, f'{base_path}/Faces Minimum Elevation', ['Minimum Elevation'])
506
+
507
+ return low_elev_centroid, min_elevation
508
+
509
+ @classmethod
510
+ @log_call
511
+ def get_faces_vector_data(
512
+ cls,
513
+ hdf_input: Union[str, Path],
514
+ area_name: Optional[str] = None,
515
+ ras_object=None
516
+ ) -> Optional[pd.DataFrame]:
517
+ """
518
+ Extract Faces NormalUnitVector and Length from the HDF file.
519
+
520
+ Args:
521
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
522
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
523
+ If None, uses the first 2D Area Name found.
524
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
525
+
526
+ Returns:
527
+ Optional[pd.DataFrame]: DataFrame containing Faces NormalUnitVector and Length.
528
+ """
529
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
530
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
531
+
532
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
533
+ vector_data = cls._extract_dataset(hdf_file, f'{base_path}/Faces NormalUnitVector and Length', ['NormalX', 'NormalY', 'Length'])
534
+
535
+ return vector_data
536
+
537
+ @classmethod
538
+ @log_call
539
+ def get_faces_perimeter_data(
540
+ cls,
541
+ hdf_input: Union[str, Path],
542
+ area_name: Optional[str] = None,
543
+ ras_object=None
544
+ ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
545
+ """
546
+ Extract Faces Perimeter Info and Values from the HDF file.
547
+
548
+ Args:
549
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
550
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
551
+ If None, uses the first 2D Area Name found.
552
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
553
+
554
+ Returns:
555
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame]]:
556
+ DataFrames containing Faces Perimeter Info and Values.
557
+
558
+ Raises:
559
+ ValueError: If no HDF file is found for the given plan number.
560
+ FileNotFoundError: If the specified HDF file does not exist.
561
+
562
+ Example:
563
+ >>> perimeter_info_df, perimeter_values_df = RasHdf.get_faces_perimeter_data("path/to/file.hdf")
564
+ >>> if perimeter_info_df is not None and perimeter_values_df is not None:
565
+ ... print("Perimeter Info:")
566
+ ... print(perimeter_info_df.head())
567
+ ... print("Perimeter Values:")
568
+ ... print(perimeter_values_df.head())
569
+ ... else:
570
+ ... print("Perimeter data not found")
571
+ """
572
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
573
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
574
+
575
+ base_path = f'Geometry/2D Flow Areas/{area_name}'
576
+ perimeter_info = cls._extract_dataset(hdf_file, f'{base_path}/Faces Perimeter Info', ['Start', 'Count'])
577
+ perimeter_values = cls._extract_dataset(hdf_file, f'{base_path}/Faces Perimeter Values', ['X', 'Y'])
578
+
579
+ return perimeter_info, perimeter_values
580
+
581
+ @classmethod
582
+ @log_call
583
+ def get_infiltration_data(
584
+ cls,
585
+ hdf_input: Union[str, Path],
586
+ area_name: Optional[str] = None,
587
+ ras_object=None
588
+ ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
589
+ """
590
+ Extract Infiltration Data from the HDF file.
591
+
592
+ Args:
593
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
594
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
595
+ If None, uses the first 2D Area Name found.
596
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
597
+
598
+ Returns:
599
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
600
+ DataFrames containing various Infiltration Data
601
+ """
602
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
603
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
604
+
605
+ base_path = f'Geometry/2D Flow Areas/{area_name}/Infiltration'
606
+
607
+ cell_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Cell Center Classifications', ['Cell Classification'])
608
+ face_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Face Center Classifications', ['Face Classification'])
609
+ initial_deficit = cls._extract_dataset(hdf_file, f'{base_path}/Initial Deficit', ['Initial Deficit'])
610
+ maximum_deficit = cls._extract_dataset(hdf_file, f'{base_path}/Maximum Deficit', ['Maximum Deficit'])
611
+ potential_percolation_rate = cls._extract_dataset(hdf_file, f'{base_path}/Potential Percolation Rate', ['Potential Percolation Rate'])
612
+
613
+ return cell_classifications, face_classifications, initial_deficit, maximum_deficit, potential_percolation_rate
614
+
615
+ @classmethod
616
+ @log_call
617
+ def get_percent_impervious_data(
618
+ cls,
619
+ hdf_input: Union[str, Path],
620
+ area_name: Optional[str] = None,
621
+ ras_object=None
622
+ ) -> Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
623
+ """
624
+ Extract Percent Impervious Data from the HDF file.
625
+
626
+ Args:
627
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
628
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
629
+ If None, uses the first 2D Area Name found.
630
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
631
+
632
+ Returns:
633
+ Tuple[Optional[pd.DataFrame], Optional[pd.DataFrame], Optional[pd.DataFrame]]:
634
+ DataFrames containing Cell Classifications, Face Classifications, and Percent Impervious Data
635
+ """
636
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
637
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
638
+
639
+ base_path = f'Geometry/2D Flow Areas/{area_name}/Percent Impervious'
640
+ cell_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Cell Center Classifications', ['Cell Classification'])
641
+ face_classifications = cls._extract_dataset(hdf_file, f'{base_path}/Face Center Classifications', ['Face Classification'])
642
+ percent_impervious = cls._extract_dataset(hdf_file, f'{base_path}/Percent Impervious', ['Percent Impervious'])
643
+
644
+ return cell_classifications, face_classifications, percent_impervious
645
+
646
+ @classmethod
647
+ @log_call
648
+ def get_perimeter_data(
649
+ cls,
650
+ hdf_input: Union[str, Path],
651
+ area_name: Optional[str] = None,
652
+ ras_object=None
653
+ ) -> Optional[pd.DataFrame]:
654
+ """
655
+ Extract Perimeter Data from the HDF file.
656
+
657
+ Args:
658
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
659
+ area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
660
+ If None, uses the first 2D Area Name found.
661
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
662
+
663
+ Returns:
664
+ Optional[pd.DataFrame]: DataFrame containing Perimeter Data
665
+
666
+ Example:
667
+ >>> perimeter_df = RasHdf.get_perimeter_data("path/to/file.hdf")
668
+ >>> if perimeter_df is not None:
669
+ ... print(perimeter_df.head())
670
+ ... else:
671
+ ... print("Perimeter data not found")
672
+ """
673
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
674
+ area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
675
+
676
+ perimeter_path = f'Geometry/2D Flow Areas/{area_name}/Perimeter'
677
+ perimeter_df = cls._extract_dataset(hdf_file, perimeter_path, ['X', 'Y'])
678
+
679
+ return perimeter_df
680
+
681
+ @classmethod
682
+ @log_call
683
+ def _get_area_name(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> str:
684
+ """
685
+ Get the 2D Flow Area name from the HDF file.
686
+
687
+ Args:
688
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
689
+ area_name (Optional[str]): The provided area name, if any.
690
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
691
+
692
+ Returns:
693
+ str: The 2D Flow Area name.
694
+
695
+ Raises:
696
+ ValueError: If no 2D Flow Areas are found in the HDF file or if the specified area name is not found.
697
+ """
698
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
699
+ if area_name is None:
700
+ area_names = [name for name in hdf_file['Geometry/2D Flow Areas'].keys() if isinstance(hdf_file['Geometry/2D Flow Areas'][name], h5py.Group)]
701
+ if not area_names:
702
+ raise ValueError("No 2D Flow Areas found in the HDF file")
703
+ area_name = area_names[0]
704
+ else:
705
+ if area_name not in hdf_file['Geometry/2D Flow Areas']:
706
+ raise ValueError(f"2D Flow Area '{area_name}' not found in the HDF file")
707
+ return area_name
708
+
709
+ @classmethod
710
+ @log_call
711
+ def _extract_dataset(cls, hdf_input: Union[str, Path], dataset_path: str, column_names: List[str], ras_object=None) -> Optional[pd.DataFrame]:
712
+ """
713
+ Extract a dataset from the HDF file and convert it to a DataFrame.
714
+
715
+ Args:
716
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
717
+ dataset_path (str): The path to the dataset within the HDF file.
718
+ column_names (List[str]): The names to assign to the DataFrame columns.
719
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
720
+
721
+ Returns:
722
+ Optional[pd.DataFrame]: The extracted data as a DataFrame, or None if the dataset is not found.
723
+ """
724
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
725
+ try:
726
+ dataset = hdf_file[dataset_path][()]
727
+ df = pd.DataFrame(dataset, columns=column_names)
728
+ return df
729
+ except KeyError:
730
+ return None
731
+
732
+ @classmethod
733
+ @log_call
734
+ def read_hdf_to_dataframe(cls, hdf_input: Union[str, Path], dataset_path: str, fill_value: Union[int, float, str] = -9999, ras_object=None) -> pd.DataFrame:
735
+ """
736
+ Reads an HDF5 dataset and converts it into a pandas DataFrame, handling byte strings and missing values.
737
+
738
+ Args:
739
+ hdf_input (Union[str, Path]): Path to the HDF file or plan number.
740
+ dataset_path (str): Path to the dataset within the HDF file.
21
741
  fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
742
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
22
743
 
23
744
  Returns:
24
745
  pd.DataFrame: The resulting DataFrame with byte strings decoded and missing values replaced.
25
746
 
747
+ Raises:
748
+ KeyError: If the dataset is not found in the HDF file.
749
+ """
750
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
751
+ try:
752
+ hdf_dataset = hdf_file[dataset_path]
753
+ hdf_dataframe = cls.convert_to_dataframe_array(hdf_dataset)
754
+ byte_columns = [col for col in hdf_dataframe.columns if isinstance(hdf_dataframe[col].iloc[0], (bytes, bytearray))]
755
+
756
+ hdf_dataframe[byte_columns] = hdf_dataframe[byte_columns].applymap(lambda x: x.decode('utf-8') if isinstance(x, (bytes, bytearray)) else x)
757
+ hdf_dataframe = hdf_dataframe.replace({fill_value: np.NaN})
758
+
759
+ return hdf_dataframe
760
+ except KeyError:
761
+ raise
762
+
763
+ @classmethod
764
+ @log_call
765
+ def get_group_attributes_as_df(cls, hdf_input: Union[str, Path], group_path: str, ras_object=None) -> pd.DataFrame:
766
+ """
767
+ Convert attributes inside a given HDF group to a DataFrame.
768
+
769
+ Args:
770
+ hdf_input (Union[str, Path]): Path to the HDF file or plan number.
771
+ group_path (str): Path of the group in the HDF file.
772
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
773
+
774
+ Returns:
775
+ pd.DataFrame: DataFrame of all attributes in the specified group with their properties.
776
+
777
+ Raises:
778
+ KeyError: If the specified group_path is not found in the file.
779
+
26
780
  Example:
27
- >>> with h5py.File('data.h5', 'r') as f:
28
- ... dataset = f['my_dataset']
29
- ... df = RasHdf.read_hdf_to_dataframe(dataset)
30
- >>> print(df.head())
781
+ >>> attributes_df = RasHdf.get_group_attributes_as_df("path/to/file.hdf", "/Results/Unsteady/Output")
782
+ >>> print(attributes_df.head())
31
783
  """
32
- df = RasHdf.convert_to_dataframe_array(hdf_dataset)
33
- byte_cols = [col for col in df.columns if isinstance(df[col].iloc[0], (bytes, bytearray))]
34
- test_byte_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], (bytes, bytearray))]
35
- assert byte_cols == test_byte_cols, "Inconsistent byte string columns detected"
784
+ hdf_filename = cls._get_hdf_filename(hdf_input, ras_object)
785
+
786
+ with h5py.File(hdf_filename, 'r') as hdf_file:
787
+ try:
788
+ group = hdf_file[group_path]
789
+ attributes = []
790
+ for attr in group.attrs:
791
+ value = group.attrs[attr]
792
+ attr_info = {
793
+ 'Attribute': attr,
794
+ 'Value': value,
795
+ 'Type': type(value).__name__,
796
+ 'Shape': value.shape if isinstance(value, np.ndarray) else None,
797
+ 'Size': value.size if isinstance(value, np.ndarray) else None,
798
+ 'Dtype': value.dtype if isinstance(value, np.ndarray) else None
799
+ }
800
+ if isinstance(value, bytes):
801
+ attr_info['Value'] = value.decode('utf-8')
802
+ elif isinstance(value, np.ndarray):
803
+ if value.dtype.kind == 'S':
804
+ attr_info['Value'] = [v.decode('utf-8') for v in value]
805
+ elif value.dtype.kind in ['i', 'f', 'u']:
806
+ attr_info['Value'] = value.tolist()
807
+ attributes.append(attr_info)
808
+
809
+ return pd.DataFrame(attributes)
810
+ except KeyError:
811
+ logger.critical(f"Group path '{group_path}' not found in HDF file '{hdf_filename}'")
812
+
813
+ # Last functions from PyHMT2D:
814
+
815
+ from ras_commander.logging_config import log_call
816
+
817
+ @classmethod
818
+ @log_call
819
+ def get_2d_area_solution_times(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
820
+ """
821
+ Retrieve solution times for a specified 2D Flow Area.
822
+
823
+ Args:
824
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
825
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
826
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
827
+
828
+ Returns:
829
+ Optional[np.ndarray]: Array of solution times, or None if not found.
36
830
 
831
+ Example:
832
+ >>> solution_times = RasHdf.get_2d_area_solution_times("03", area_name="Area1")
833
+ >>> print(solution_times)
834
+ [0.0, 0.5, 1.0, ...]
835
+ """
836
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
837
+ try:
838
+ solution_times = np.array(
839
+ hdf_file['Results']['Unsteady']['Output']['Output Blocks']
840
+ ['Base Output']['Unsteady Time Series']['Time']
841
+ )
842
+ return solution_times
843
+ except KeyError:
844
+ return None
845
+
846
+ @classmethod
847
+ @log_call
848
+ def get_2d_area_solution_time_dates(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
849
+ """
850
+ Retrieve solution time dates for a specified 2D Flow Area.
851
+
852
+ Args:
853
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
854
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
855
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
856
+
857
+ Returns:
858
+ Optional[np.ndarray]: Array of solution time dates, or None if not found.
859
+
860
+ Example:
861
+ >>> solution_time_dates = RasHdf.get_2d_area_solution_time_dates("03", area_name="Area1")
862
+ >>> print(solution_time_dates)
863
+ ['2024-01-01T00:00:00', '2024-01-01T00:30:00', ...]
864
+ """
865
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
866
+ try:
867
+ solution_time_dates = np.array(
868
+ hdf_file['Results']['Unsteady']['Output']['Output Blocks']
869
+ ['Base Output']['Unsteady Time Series']['Time Date Stamp']
870
+ )
871
+ return solution_time_dates
872
+ except KeyError:
873
+ return None
874
+
875
+ @classmethod
876
+ @log_call
877
+ def load_2d_area_solutions(
878
+ cls,
879
+ hdf_file: h5py.File,
880
+ ras_object=None
881
+ ) -> Optional[Dict[str, pd.DataFrame]]:
882
+ """
883
+ Load 2D Area Solutions (Water Surface Elevation and Face Normal Velocity) from the HDF file
884
+ and provide them as pandas DataFrames.
885
+
886
+ **Note:**
887
+ - This function has only been tested with HEC-RAS version 6.5.
888
+ - Ensure that the HDF file structure matches the expected paths.
889
+
890
+ Args:
891
+ hdf_file (h5py.File): An open HDF5 file object.
892
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
893
+
894
+ Returns:
895
+ Optional[Dict[str, pd.DataFrame]]: A dictionary containing:
896
+ - 'solution_times': DataFrame of solution times.
897
+ - For each 2D Flow Area:
898
+ - '{Area_Name}_WSE': Water Surface Elevation DataFrame.
899
+ - '{Area_Name}_Face_Velocity': Face Normal Velocity DataFrame.
900
+ """
37
901
  try:
38
- df[byte_cols] = df[byte_cols].applymap(lambda x: x.decode('utf-8'))
902
+ solution_times_path = '/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/Time'
903
+ if solution_times_path not in hdf_file:
904
+ return None
905
+
906
+ solution_times = hdf_file[solution_times_path][()]
907
+ solution_times_df = pd.DataFrame({
908
+ 'Time_Step': solution_times
909
+ })
910
+
911
+ solutions_dict = {
912
+ 'solution_times': solution_times_df
913
+ }
914
+
915
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
916
+ if not two_d_area_names:
917
+ return solutions_dict
918
+
919
+ for area in two_d_area_names:
920
+ wse_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Water Surface'
921
+ face_velocity_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Face Velocity'
922
+
923
+ if wse_path not in hdf_file:
924
+ continue
925
+
926
+ wse_data = hdf_file[wse_path][()]
927
+ cell_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Cell Center Coordinate'
928
+ if cell_center_coords_path not in hdf_file:
929
+ continue
930
+
931
+ cell_center_coords = hdf_file[cell_center_coords_path][()]
932
+ if cell_center_coords.shape[0] != wse_data.shape[1]:
933
+ continue
934
+
935
+ wse_df = pd.DataFrame({
936
+ 'Time_Step': np.repeat(solution_times, wse_data.shape[1]),
937
+ 'Cell_ID': np.tile(np.arange(wse_data.shape[1]), wse_data.shape[0]),
938
+ 'X': cell_center_coords[:, 0].repeat(wse_data.shape[0]),
939
+ 'Y': cell_center_coords[:, 1].repeat(wse_data.shape[0]),
940
+ 'WSE': wse_data.flatten()
941
+ })
942
+ solutions_dict[f'{area}_WSE'] = wse_df
943
+
944
+ if face_velocity_path not in hdf_file:
945
+ continue
946
+
947
+ face_velocity_data = hdf_file[face_velocity_path][()]
948
+ face_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Face Points Coordinates'
949
+ if face_center_coords_path not in hdf_file:
950
+ continue
951
+
952
+ face_center_coords = hdf_file[face_center_coords_path][()]
953
+ if face_center_coords.shape[0] != face_velocity_data.shape[1]:
954
+ continue
955
+
956
+ face_velocity_df = pd.DataFrame({
957
+ 'Time_Step': np.repeat(solution_times, face_velocity_data.shape[1]),
958
+ 'Face_ID': np.tile(np.arange(face_velocity_data.shape[1]), face_velocity_data.shape[0]),
959
+ 'X': face_center_coords[:, 0].repeat(face_velocity_data.shape[0]),
960
+ 'Y': face_center_coords[:, 1].repeat(face_velocity_data.shape[0]),
961
+ 'Normal_Velocity_ft_s': face_velocity_data.flatten()
962
+ })
963
+ solutions_dict[f'{area}_Face_Velocity'] = face_velocity_df
964
+
965
+ return solutions_dict
966
+
39
967
  except Exception as e:
40
- print(f'WARNING: {e} while decoding byte strings in {hdf_dataset.name}, resuming')
968
+ return None
969
+
970
+ @classmethod
971
+ @log_call
972
+ def get_hdf_paths_with_properties(cls, hdf_input: Union[str, Path], ras_object=None) -> pd.DataFrame:
973
+ """
974
+ List all paths in the HDF file with their properties.
975
+
976
+ Args:
977
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
978
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
979
+
980
+ Returns:
981
+ pd.DataFrame: DataFrame of all paths and their properties in the HDF file.
982
+
983
+ Example:
984
+ >>> paths_df = RasHdf.get_hdf_paths_with_properties("path/to/file.hdf")
985
+ >>> print(paths_df.head())
986
+ """
987
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
988
+ paths = []
989
+ def visitor_func(name: str, node: h5py.Group) -> None:
990
+ path_info = {
991
+ "HDF_Path": name,
992
+ "Type": type(node).__name__,
993
+ "Shape": getattr(node, "shape", None),
994
+ "Size": getattr(node, "size", None),
995
+ "Dtype": getattr(node, "dtype", None)
996
+ }
997
+ paths.append(path_info)
998
+ hdf_file.visititems(visitor_func)
999
+ return pd.DataFrame(paths)
1000
+
1001
+ @classmethod
1002
+ @log_call
1003
+ def build_2d_area_face_hydraulic_information(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[List[np.ndarray]]]:
1004
+ """
1005
+ Build face hydraulic information tables (elevation, area, wetted perimeter, Manning's n) for each face in 2D Flow Areas.
1006
+
1007
+ Args:
1008
+ hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1009
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1010
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1011
+
1012
+ Returns:
1013
+ Optional[List[List[np.ndarray]]]: Nested lists containing hydraulic information for each face in each 2D Flow Area.
41
1014
 
42
- df = df.replace({fill_value: np.NaN})
43
- return df
1015
+ Example:
1016
+ >>> hydraulic_info = RasHdf.build_2d_area_face_hydraulic_information("03")
1017
+ >>> print(hydraulic_info[0][0]) # First face of first area
1018
+ [[Elevation1, Area1, WettedPerim1, ManningN1],
1019
+ [Elevation2, Area2, WettedPerim2, ManningN2],
1020
+ ...]
1021
+ """
1022
+ try:
1023
+ ras_obj = ras_object if ras_object is not None else ras
1024
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_obj), 'r') as hdf_file:
1025
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1026
+ hydraulic_info_table = []
44
1027
 
45
- @staticmethod
46
- def save_dataframe_to_hdf(dataframe: pd.DataFrame,
47
- hdf_parent_group: h5py.Group,
48
- dataset_name: str,
49
- attributes: Optional[Dict[str, Union[int, float, str]]] = None,
50
- fill_value: Union[int, float, str] = -9999,
51
- **kwargs: Union[int, float, str]) -> h5py.Dataset:
1028
+ for area in two_d_area_names:
1029
+ face_elev_info = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Info'])
1030
+ face_elev_values = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Values'])
1031
+
1032
+ area_hydraulic_info = []
1033
+ for face in face_elev_info:
1034
+ start_row, count = face
1035
+ face_data = face_elev_values[start_row:start_row + count].copy()
1036
+ area_hydraulic_info.append(face_data)
1037
+
1038
+ hydraulic_info_table.append(area_hydraulic_info)
1039
+
1040
+ return hydraulic_info_table
1041
+
1042
+ except KeyError:
1043
+ return None
1044
+
1045
+ @classmethod
1046
+ @log_call
1047
+ def build_2d_area_face_point_coordinates_list(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
1048
+ """
1049
+ Build a list of face point coordinates for each 2D Flow Area.
1050
+
1051
+ Args:
1052
+ hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1053
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1054
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1055
+
1056
+ Returns:
1057
+ Optional[List[np.ndarray]]: List containing arrays of face point coordinates for each 2D Flow Area.
1058
+
1059
+ Example:
1060
+ >>> face_coords_list = RasHdf.build_2d_area_face_point_coordinates_list("03")
1061
+ >>> print(face_coords_list[0]) # Coordinates for first area
1062
+ [[X1, Y1], [X2, Y2], ...]
52
1063
  """
53
- Saves a pandas DataFrame to an HDF5 dataset within a specified parent group.
1064
+ try:
1065
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1066
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1067
+ face_point_coords_list = []
1068
+
1069
+ for area in two_d_area_names:
1070
+ face_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
1071
+ face_point_coords_list.append(face_points)
1072
+
1073
+ return face_point_coords_list
54
1074
 
55
- This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
1075
+ except KeyError:
1076
+ return None
56
1077
 
1078
+ @classmethod
1079
+ @log_call
1080
+ def build_2d_area_face_profile(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None, n_face_profile_points: int = 10) -> Optional[List[np.ndarray]]:
1081
+ """
1082
+ Build face profiles representing sub-grid terrain for each face in 2D Flow Areas.
1083
+
57
1084
  Args:
58
- dataframe (pd.DataFrame): The DataFrame to save.
59
- hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
60
- dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
61
- attributes (Optional[Dict[str, Union[int, float, str]]], optional): A dictionary of attributes to add to the dataset. Defaults to None.
62
- fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
63
- **kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
1085
+ hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
1086
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1087
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1088
+ n_face_profile_points (int): Number of points to interpolate along each face profile.
1089
+
1090
+ Returns:
1091
+ Optional[List[np.ndarray]]: List containing arrays of profile points for each face in each 2D Flow Area.
1092
+
1093
+ Example:
1094
+ >>> face_profiles = RasHdf.build_2d_area_face_profile("03", n_face_profile_points=20)
1095
+ >>> print(face_profiles[0][0]) # Profile points for first face of first area
1096
+ [[X1, Y1, Z1], [X2, Y2, Z2], ...]
1097
+ """
1098
+ try:
1099
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1100
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1101
+ face_profiles = []
1102
+
1103
+ for area in two_d_area_names:
1104
+ face_faces = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
1105
+ face_point_coords = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
1106
+ profile_points_all_faces = []
64
1107
 
1108
+ for face in face_faces:
1109
+ face_start, face_end = face
1110
+ start_coords = face_point_coords[face_start]
1111
+ end_coords = face_point_coords[face_end]
1112
+
1113
+ length = cls.horizontal_distance(start_coords, end_coords)
1114
+ stations = np.linspace(0, length, n_face_profile_points, endpoint=True)
1115
+
1116
+ interpolated_points = np.array([
1117
+ start_coords + (end_coords - start_coords) * i / (n_face_profile_points - 1)
1118
+ for i in range(n_face_profile_points)
1119
+ ])
1120
+
1121
+ interpolated_points = cls.interpolate_z_coords(interpolated_points)
1122
+
1123
+ profile_points_all_faces.append(interpolated_points)
1124
+
1125
+ face_profiles.append(profile_points_all_faces)
1126
+
1127
+ return face_profiles
1128
+
1129
+ except KeyError as e:
1130
+ logging.error(f"Error building face profiles: {e}")
1131
+ return None
1132
+
1133
+ @classmethod
1134
+ @log_call
1135
+ def build_face_facepoints(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
1136
+ """
1137
+ Build face's facepoint list for each 2D Flow Area.
1138
+
1139
+ Args:
1140
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
1141
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1142
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1143
+
65
1144
  Returns:
66
- h5py.Dataset: The created HDF5 dataset within the parent group.
1145
+ Optional[List[np.ndarray]]: List containing arrays of face point indexes for each face in each 2D Flow Area.
1146
+
1147
+ Example:
1148
+ >>> face_facepoints = RasHdf.build_face_facepoints("03")
1149
+ >>> print(face_facepoints[0][0]) # FacePoint indexes for first face of first area
1150
+ [start_idx, end_idx]
1151
+ """
1152
+ try:
1153
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1154
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1155
+ face_facepoints_list = []
1156
+
1157
+ for area in two_d_area_names:
1158
+ face_facepoints = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
1159
+ face_facepoints_list.append(face_facepoints)
67
1160
 
1161
+ return face_facepoints_list
1162
+
1163
+ except KeyError as e:
1164
+ logger = logging.getLogger(__name__)
1165
+ logger.error(f"Error building face facepoints list: {e}")
1166
+ return None
1167
+
1168
+ @classmethod
1169
+ @log_call
1170
+ def build_2d_area_boundaries(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
1171
+ """
1172
+ Build boundaries with their point lists for each 2D Flow Area.
1173
+
1174
+ Args:
1175
+ hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
1176
+ area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
1177
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1178
+
1179
+ Returns:
1180
+ Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
1181
+ Tuple containing total boundaries, boundary IDs, boundary names, associated 2D Flow Area names, boundary types,
1182
+ total points per boundary, and boundary point lists.
1183
+
68
1184
  Example:
69
- >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
70
- >>> with h5py.File('data.h5', 'w') as f:
71
- ... group = f.create_group('my_group')
72
- ... dataset = RasHdf.save_dataframe_to_hdf(df, group, 'my_dataset')
73
- >>> print(dataset)
1185
+ >>> total_boundaries, boundary_ids, boundary_names, flow_area_names, boundary_types, total_points, boundary_points = RasHdf.build_2d_area_boundaries("03")
1186
+ >>> print(total_boundaries)
1187
+ 5
74
1188
  """
75
- df = dataframe.copy()
1189
+ try:
1190
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1191
+ two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
1192
+ total_boundaries = 0
1193
+ boundary_ids = []
1194
+ boundary_names = []
1195
+ flow_area_names = []
1196
+ boundary_types = []
1197
+ total_points_per_boundary = []
1198
+ boundary_points_list = []
1199
+
1200
+ for area in two_d_area_names:
1201
+ boundary_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Boundary Points'])
1202
+ if boundary_points.size == 0:
1203
+ logger = logging.getLogger(__name__)
1204
+ logger.warning(f"No boundary points found for 2D Flow Area: {area}")
1205
+ continue
1206
+
1207
+ current_boundary_id = boundary_points[0][0]
1208
+ current_boundary_points = [boundary_points[0][2], boundary_points[0][3]]
1209
+ boundary_id = current_boundary_id
1210
+
1211
+ for point in boundary_points[1:]:
1212
+ if point[0] == current_boundary_id:
1213
+ current_boundary_points.append(point[3])
1214
+ else:
1215
+ # Save the completed boundary
1216
+ boundary_ids.append(current_boundary_id)
1217
+ boundary_names.append(point[0]) # Assuming boundary name is stored here
1218
+ flow_area_names.append(area)
1219
+ boundary_types.append(point[2]) # Assuming boundary type is stored here
1220
+ total_points_per_boundary.append(len(current_boundary_points))
1221
+ boundary_points_list.append(np.array(current_boundary_points))
1222
+ total_boundaries += 1
1223
+
1224
+ # Start a new boundary
1225
+ current_boundary_id = point[0]
1226
+ current_boundary_points = [point[2], point[3]]
76
1227
 
77
- if df.columns.dtype == 'O':
78
- df.columns = df.columns.str.replace('/', '-')
1228
+ # Save the last boundary
1229
+ boundary_ids.append(current_boundary_id)
1230
+ boundary_names.append(boundary_points[-1][0]) # Assuming boundary name is stored here
1231
+ flow_area_names.append(area)
1232
+ boundary_types.append(boundary_points[-1][2]) # Assuming boundary type is stored here
1233
+ total_points_per_boundary.append(len(current_boundary_points))
1234
+ boundary_points_list.append(np.array(current_boundary_points))
1235
+ total_boundaries += 1
1236
+
1237
+ return (total_boundaries, np.array(boundary_ids), boundary_names, flow_area_names, boundary_types, np.array(total_points_per_boundary), np.array(boundary_points_list))
1238
+
1239
+ except KeyError as e:
1240
+ logger = logging.getLogger(__name__)
1241
+ logger.error(f"Error building boundaries: {e}")
1242
+ return None
1243
+
1244
+ # Helper Methods for New Functionalities
1245
+
1246
+ @classmethod
1247
+ @log_call
1248
+ def horizontal_distance(cls, coord1: np.ndarray, coord2: np.ndarray) -> float:
1249
+ """
1250
+ Calculate the horizontal distance between two coordinate points.
79
1251
 
80
- df = df.fillna(fill_value)
1252
+ Args:
1253
+ coord1 (np.ndarray): First coordinate point [X, Y].
1254
+ coord2 (np.ndarray): Second coordinate point [X, Y].
81
1255
 
82
- string_cols = [col for col in df.columns if isinstance(df[col].iloc[0], str)]
83
- test_string_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], str)]
84
- assert string_cols == test_string_cols, "Inconsistent string columns detected"
1256
+ Returns:
1257
+ float: Horizontal distance.
85
1258
 
86
- df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
1259
+ Example:
1260
+ >>> distance = RasHdf.horizontal_distance([0, 0], [3, 4])
1261
+ >>> print(distance)
1262
+ 5.0
1263
+ """
1264
+ return np.linalg.norm(coord2 - coord1)
87
1265
 
88
- if isinstance(df.columns, pd.RangeIndex):
89
- arr = df.values
90
- else:
91
- arr_dt = [(col, df[col].dtype) for col in df.columns]
92
- arr = np.empty((len(df),), dtype=arr_dt)
93
- for col in df.columns:
94
- arr[col] = df[col].values
95
-
96
- if dataset_name in hdf_parent_group:
97
- del hdf_parent_group[dataset_name]
1266
+ @classmethod
1267
+ @log_call
1268
+ def interpolate_z_coords(cls, points: np.ndarray) -> np.ndarray:
1269
+ """
1270
+ Interpolate Z coordinates for a set of points.
98
1271
 
99
- dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
1272
+ Args:
1273
+ points (np.ndarray): Array of points with [X, Y].
100
1274
 
101
- if attributes:
102
- dataset.attrs.update(attributes)
1275
+ Returns:
1276
+ np.ndarray: Array of points with [X, Y, Z].
103
1277
 
104
- return dataset
1278
+ Example:
1279
+ >>> interpolated = RasHdf.interpolate_z_coords(np.array([[0,0], [1,1]]))
1280
+ >>> print(interpolated)
1281
+ [[0, 0, Z0],
1282
+ [1, 1, Z1]]
1283
+ """
1284
+ # Placeholder for actual interpolation logic
1285
+ # This should be replaced with the appropriate interpolation method
1286
+ z_coords = np.zeros((points.shape[0], 1)) # Assuming Z=0 for simplicity
1287
+ return np.hstack((points, z_coords))
105
1288
 
106
- @staticmethod
107
- def perform_kdtree_query(reference_points: np.ndarray, query_points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
1289
+ @classmethod
1290
+ @log_call
1291
+ def extract_string_from_hdf(
1292
+ cls,
1293
+ hdf_input: Union[str, Path],
1294
+ hdf_path: str,
1295
+ ras_object: Optional["RasPrj"] = None
1296
+ ) -> str:
1297
+ """
1298
+ Extract string from HDF object at a given path.
1299
+
1300
+ Args:
1301
+ hdf_input (Union[str, Path]): Either the plan number or the full path to the HDF file.
1302
+ hdf_path (str): Path of the object in the HDF file.
1303
+ ras_object (Optional["RasPrj"]): Specific RAS object to use. If None, uses the global ras instance.
1304
+
1305
+ Returns:
1306
+ str: Extracted string from the specified HDF object.
1307
+
1308
+ Raises:
1309
+ ValueError: If no HDF file is found for the given plan number.
1310
+ FileNotFoundError: If the specified HDF file does not exist.
1311
+ KeyError: If the specified hdf_path is not found in the file.
1312
+
1313
+ Example:
1314
+ >>> result = RasHdf.extract_string_from_hdf("path/to/file.hdf", "/Results/Summary/Compute Messages (text)")
1315
+ >>> print(result)
1316
+ """
1317
+ with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
1318
+ try:
1319
+ hdf_object = hdf_file[hdf_path]
1320
+ if isinstance(hdf_object, h5py.Group):
1321
+ return f"Group: {hdf_path}\nContents: {list(hdf_object.keys())}"
1322
+ elif isinstance(hdf_object, h5py.Dataset):
1323
+ data = hdf_object[()]
1324
+ if isinstance(data, bytes):
1325
+ return data.decode('utf-8')
1326
+ elif isinstance(data, np.ndarray) and data.dtype.kind == 'S':
1327
+ return [v.decode('utf-8') for v in data]
1328
+ else:
1329
+ return str(data)
1330
+ else:
1331
+ return f"Unsupported object type: {type(hdf_object)}"
1332
+ except KeyError:
1333
+ logger = logging.getLogger(__name__)
1334
+ logger.error(f"Path not found: {hdf_path}")
1335
+ raise KeyError(f"Path not found: {hdf_path}")
1336
+
1337
+ @classmethod
1338
+ @log_call
1339
+ def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
1340
+ """
1341
+ Decodes byte strings in a DataFrame to regular string objects.
1342
+
1343
+ This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
1344
+
1345
+ Args:
1346
+ dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
1347
+
1348
+ Returns:
1349
+ pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
1350
+
1351
+ Example:
1352
+ >>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
1353
+ >>> decoded_df = RasHdf.decode_byte_strings(df)
1354
+ >>> print(decoded_df)
1355
+ A B
1356
+ 0 hello 1
1357
+ 1 world 2
1358
+ """
1359
+ str_df = dataframe.select_dtypes(['object'])
1360
+ str_df = str_df.stack().str.decode('utf-8').unstack()
1361
+ for col in str_df:
1362
+ dataframe[col] = str_df[col]
1363
+ return dataframe
1364
+
1365
+ @classmethod
1366
+ @log_call
1367
+ def perform_kdtree_query(
1368
+ reference_points: np.ndarray,
1369
+ query_points: np.ndarray,
1370
+ max_distance: float = 2.0
1371
+ ) -> np.ndarray:
108
1372
  """
109
1373
  Performs a KDTree query between two datasets and returns indices with distances exceeding max_distance set to -1.
110
1374
 
@@ -120,14 +1384,16 @@ class RasHdf:
120
1384
  Example:
121
1385
  >>> ref_points = np.array([[0, 0], [1, 1], [2, 2]])
122
1386
  >>> query_points = np.array([[0.5, 0.5], [3, 3]])
123
- >>> RasHdf.perform_kdtree_query(ref_points, query_points)
1387
+ >>> result = RasHdf.perform_kdtree_query(ref_points, query_points)
1388
+ >>> print(result)
124
1389
  array([ 0, -1])
125
1390
  """
126
1391
  dist, snap = KDTree(reference_points).query(query_points, distance_upper_bound=max_distance)
127
1392
  snap[dist > max_distance] = -1
128
1393
  return snap
129
1394
 
130
- @staticmethod
1395
+ @classmethod
1396
+ @log_call
131
1397
  def find_nearest_neighbors(points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
132
1398
  """
133
1399
  Creates a self KDTree for dataset points and finds nearest neighbors excluding self,
@@ -144,7 +1410,8 @@ class RasHdf:
144
1410
 
145
1411
  Example:
146
1412
  >>> points = np.array([[0, 0], [1, 1], [2, 2], [10, 10]])
147
- >>> RasHdf.find_nearest_neighbors(points)
1413
+ >>> result = RasHdf.find_nearest_neighbors(points)
1414
+ >>> print(result)
148
1415
  array([1, 0, 1, -1])
149
1416
  """
150
1417
  dist, snap = KDTree(points).query(points, k=2, distance_upper_bound=max_distance)
@@ -158,31 +1425,35 @@ class RasHdf:
158
1425
  snapped = filled.fillna(-1).astype(np.int64).to_numpy()
159
1426
  return snapped
160
1427
 
161
- @staticmethod
162
- def consolidate_dataframe(dataframe: pd.DataFrame,
163
- group_by: Optional[Union[str, List[str]]] = None,
164
- pivot_columns: Optional[Union[str, List[str]]] = None,
165
- level: Optional[int] = None,
166
- n_dimensional: bool = False,
167
- aggregation_method: Union[str, Callable] = 'list') -> pd.DataFrame:
1428
+ @classmethod
1429
+ @log_call
1430
+ def consolidate_dataframe(
1431
+ dataframe: pd.DataFrame,
1432
+ group_by: Optional[Union[str, List[str]]] = None,
1433
+ pivot_columns: Optional[Union[str, List[str]]] = None,
1434
+ level: Optional[int] = None,
1435
+ n_dimensional: bool = False,
1436
+ aggregation_method: Union[str, Callable] = 'list'
1437
+ ) -> pd.DataFrame:
168
1438
  """
169
1439
  Consolidate rows in a DataFrame by merging duplicate values into lists or using a specified aggregation function.
170
1440
 
171
1441
  Args:
172
1442
  dataframe (pd.DataFrame): The DataFrame to consolidate.
173
- group_by (Optional[Union[str, List[str]]], optional): Columns or indices to group by. Defaults to None.
174
- pivot_columns (Optional[Union[str, List[str]]], optional): Columns to pivot. Defaults to None.
175
- level (Optional[int], optional): Level of multi-index to group by. Defaults to None.
176
- n_dimensional (bool, optional): If True, use a pivot table for N-Dimensional consolidation. Defaults to False.
177
- aggregation_method (Union[str, Callable], optional): Aggregation method, e.g., 'list' to aggregate into lists. Defaults to 'list'.
1443
+ group_by (Optional[Union[str, List[str]]]): Columns or indices to group by.
1444
+ pivot_columns (Optional[Union[str, List[str]]]): Columns to pivot.
1445
+ level (Optional[int]): Level of multi-index to group by.
1446
+ n_dimensional (bool): If True, use a pivot table for N-Dimensional consolidation.
1447
+ aggregation_method (Union[str, Callable]): Aggregation method, e.g., 'list' to aggregate into lists.
178
1448
 
179
1449
  Returns:
180
1450
  pd.DataFrame: The consolidated DataFrame.
181
1451
 
182
1452
  Example:
183
1453
  >>> df = pd.DataFrame({'A': [1, 1, 2], 'B': [4, 5, 6], 'C': [7, 8, 9]})
184
- >>> RasHdf.consolidate_dataframe(df, group_by='A')
185
- B C
1454
+ >>> result = RasHdf.consolidate_dataframe(df, group_by='A')
1455
+ >>> print(result)
1456
+ B C
186
1457
  A
187
1458
  1 [4, 5] [7, 8]
188
1459
  2 [6] [9]
@@ -198,34 +1469,9 @@ class RasHdf:
198
1469
  result = dataframe.groupby(group_by, level=level).agg(agg_func).applymap(list)
199
1470
 
200
1471
  return result
201
-
202
- @staticmethod
203
- def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
204
- """
205
- Decodes byte strings in a DataFrame to regular string objects.
206
-
207
- This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
208
-
209
- Args:
210
- dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
211
-
212
- Returns:
213
- pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
214
-
215
- Example:
216
- >>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
217
- >>> RasHdf.decode_byte_strings(df)
218
- A B
219
- 0 hello 1
220
- 1 world 2
221
- """
222
- str_df = dataframe.select_dtypes(['object'])
223
- str_df = str_df.stack().str.decode('utf-8').unstack()
224
- for col in str_df:
225
- dataframe[col] = str_df[col]
226
- return dataframe
227
-
228
- @staticmethod
1472
+
1473
+ @classmethod
1474
+ @log_call
229
1475
  def find_nearest_value(array: Union[list, np.ndarray], target_value: Union[int, float]) -> Union[int, float]:
230
1476
  """
231
1477
  Finds the nearest value in a NumPy array to the specified target value.
@@ -239,10 +1485,135 @@ class RasHdf:
239
1485
 
240
1486
  Example:
241
1487
  >>> arr = np.array([1, 3, 5, 7, 9])
242
- >>> RasHdf.find_nearest_value(arr, 6)
1488
+ >>> result = RasHdf.find_nearest_value(arr, 6)
1489
+ >>> print(result)
243
1490
  5
244
1491
  """
245
1492
  array = np.asarray(array)
246
1493
  idx = (np.abs(array - target_value)).argmin()
247
1494
  return array[idx]
1495
+
1496
+ @staticmethod
1497
+ @log_call
1498
+ def _get_hdf_filename(hdf_input: Union[str, Path, h5py.File], ras_object=None) -> Optional[Path]:
1499
+ """
1500
+ Get the HDF filename from the input.
1501
+
1502
+ Args:
1503
+ hdf_input (Union[str, Path, h5py.File]): The plan number, full path to the HDF file as a string, a Path object, or an h5py.File object.
1504
+ ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
1505
+
1506
+ Returns:
1507
+ Optional[Path]: The full path to the HDF file as a Path object, or None if an error occurs.
1508
+
1509
+ Note:
1510
+ This method logs critical errors instead of raising exceptions.
1511
+ """
1512
+
1513
+ # If hdf_input is already an h5py.File object, return its filename
1514
+ if isinstance(hdf_input, h5py.File):
1515
+ return Path(hdf_input.filename)
1516
+
1517
+ # Convert to Path object if it's a string
1518
+ if isinstance(hdf_input, str):
1519
+ hdf_input = Path(hdf_input)
1520
+
1521
+ # If hdf_input is a file path, return it directly
1522
+ if isinstance(hdf_input, Path) and hdf_input.is_file():
1523
+ return hdf_input
1524
+
1525
+ # If hdf_input is not a file path, assume it's a plan number and require ras_object
1526
+ ras_obj = ras_object or ras
1527
+ if not ras_obj.initialized:
1528
+ logger.critical("ras_object is not initialized. ras_object is required when hdf_input is not a direct file path.")
1529
+ return None
1530
+
1531
+ plan_info = ras_obj.plan_df[ras_obj.plan_df['plan_number'] == str(hdf_input)]
1532
+ if plan_info.empty:
1533
+ logger.critical(f"No HDF file found for plan number {hdf_input}")
1534
+ return None
1535
+
1536
+ hdf_filename = plan_info.iloc[0]['HDF_Results_Path']
1537
+ if hdf_filename is None:
1538
+ logger.critical(f"HDF_Results_Path is None for plan number {hdf_input}")
1539
+ return None
1540
+
1541
+ hdf_path = Path(hdf_filename)
1542
+ if not hdf_path.is_file():
1543
+ logger.critical(f"HDF file not found: {hdf_path}")
1544
+ return None
1545
+
1546
+ return hdf_path
1547
+
1548
+
1549
+
1550
+ @log_call
1551
+ def save_dataframe_to_hdf(
1552
+ dataframe: pd.DataFrame,
1553
+ hdf_parent_group: h5py.Group,
1554
+ dataset_name: str,
1555
+ attributes: Optional[Dict[str, Union[int, float, str]]] = None,
1556
+ fill_value: Union[int, float, str] = -9999,
1557
+ **kwargs: Any
1558
+ ) -> h5py.Dataset:
1559
+ """
1560
+ Save a pandas DataFrame to an HDF5 dataset within a specified parent group.
1561
+
1562
+ This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
248
1563
 
1564
+ Args:
1565
+ dataframe (pd.DataFrame): The DataFrame to save.
1566
+ hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
1567
+ dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
1568
+ attributes (Optional[Dict[str, Union[int, float, str]]]): A dictionary of attributes to add to the dataset.
1569
+ fill_value (Union[int, float, str]): The value to use for filling missing data.
1570
+ **kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
1571
+
1572
+ Returns:
1573
+ h5py.Dataset: The created HDF5 dataset within the parent group.
1574
+
1575
+ Raises:
1576
+ ValueError: If the DataFrame columns are not consistent.
1577
+
1578
+ Example:
1579
+ >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
1580
+ >>> with h5py.File('data.h5', 'w') as f:
1581
+ ... group = f.create_group('my_group')
1582
+ ... dataset = save_dataframe_to_hdf(df, group, 'my_dataset')
1583
+ >>> print(dataset)
1584
+ """
1585
+ df = dataframe.copy()
1586
+
1587
+ # Replace '/' in column names with '-' to avoid issues in HDF5
1588
+ if df.columns.dtype == 'O':
1589
+ df.columns = df.columns.str.replace('/', '-', regex=False)
1590
+
1591
+ # Fill missing values with the specified fill_value
1592
+ df = df.fillna(fill_value)
1593
+
1594
+ # Identify string columns and ensure consistency
1595
+ string_cols = df.select_dtypes(include=['object']).columns
1596
+ if not string_cols.equals(df.select_dtypes(include=['object']).columns):
1597
+ logger.error("Inconsistent string columns detected")
1598
+ raise ValueError("Inconsistent string columns detected")
1599
+
1600
+ # Encode string columns to bytes
1601
+ df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
1602
+
1603
+ # Prepare data for HDF5 dataset creation
1604
+ arr = df.to_records(index=False) if not isinstance(df.columns, pd.RangeIndex) else df.values
1605
+
1606
+ # Remove existing dataset if it exists
1607
+ if dataset_name in hdf_parent_group:
1608
+ logger.warning(f"Existing dataset {dataset_name} will be overwritten")
1609
+ del hdf_parent_group[dataset_name]
1610
+
1611
+ # Create the dataset in the HDF5 file
1612
+ dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
1613
+
1614
+ # Update dataset attributes if provided
1615
+ if attributes:
1616
+ dataset.attrs.update(attributes)
1617
+
1618
+ logger.info(f"Successfully saved DataFrame to dataset: {dataset_name}")
1619
+ return dataset