ras-commander 0.40.0__py3-none-any.whl → 0.41.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ras_commander/RasExamples.py +10 -113
- ras_commander/RasGeo.py +8 -0
- ras_commander/RasHdf.py +2 -977
- ras_commander/RasPlan.py +88 -297
- ras_commander/RasPrj.py +15 -20
- ras_commander/RasUtils.py +114 -1
- {ras_commander-0.40.0.dist-info → ras_commander-0.41.0.dist-info}/METADATA +3 -2
- ras_commander-0.41.0.dist-info/RECORD +16 -0
- ras_commander/_version.py +0 -16
- ras_commander-0.40.0.dist-info/RECORD +0 -17
- {ras_commander-0.40.0.dist-info → ras_commander-0.41.0.dist-info}/LICENSE +0 -0
- {ras_commander-0.40.0.dist-info → ras_commander-0.41.0.dist-info}/WHEEL +0 -0
- {ras_commander-0.40.0.dist-info → ras_commander-0.41.0.dist-info}/top_level.txt +0 -0
ras_commander/RasHdf.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
"""
|
2
2
|
RasHdf Module
|
3
3
|
|
4
|
-
This module provides utilities for working with HDF files in HEC-RAS projects.
|
4
|
+
This module provides utilities for working with RESULTS (Plan) HDF files in HEC-RAS projects.
|
5
5
|
It contains the RasHdf class, which offers various static methods for extracting,
|
6
|
-
analyzing, and manipulating data from HEC-RAS HDF files.
|
6
|
+
analyzing, and manipulating data from HEC-RAS RESULTS HDF files.
|
7
7
|
|
8
8
|
Note:
|
9
9
|
This method is decorated with @hdf_operation, which handles the opening and closing of the HDF file.
|
@@ -642,978 +642,3 @@ class RasHdf:
|
|
642
642
|
percent_impervious = cls._extract_dataset(hdf_file, f'{base_path}/Percent Impervious', ['Percent Impervious'])
|
643
643
|
|
644
644
|
return cell_classifications, face_classifications, percent_impervious
|
645
|
-
|
646
|
-
@classmethod
|
647
|
-
@log_call
|
648
|
-
def get_perimeter_data(
|
649
|
-
cls,
|
650
|
-
hdf_input: Union[str, Path],
|
651
|
-
area_name: Optional[str] = None,
|
652
|
-
ras_object=None
|
653
|
-
) -> Optional[pd.DataFrame]:
|
654
|
-
"""
|
655
|
-
Extract Perimeter Data from the HDF file.
|
656
|
-
|
657
|
-
Args:
|
658
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
659
|
-
area_name (Optional[str]): Name of the 2D Flow Area to extract data from.
|
660
|
-
If None, uses the first 2D Area Name found.
|
661
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
662
|
-
|
663
|
-
Returns:
|
664
|
-
Optional[pd.DataFrame]: DataFrame containing Perimeter Data
|
665
|
-
|
666
|
-
Example:
|
667
|
-
>>> perimeter_df = RasHdf.get_perimeter_data("path/to/file.hdf")
|
668
|
-
>>> if perimeter_df is not None:
|
669
|
-
... print(perimeter_df.head())
|
670
|
-
... else:
|
671
|
-
... print("Perimeter data not found")
|
672
|
-
"""
|
673
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
674
|
-
area_name = cls._get_area_name(hdf_file, area_name, hdf_file.filename)
|
675
|
-
|
676
|
-
perimeter_path = f'Geometry/2D Flow Areas/{area_name}/Perimeter'
|
677
|
-
perimeter_df = cls._extract_dataset(hdf_file, perimeter_path, ['X', 'Y'])
|
678
|
-
|
679
|
-
return perimeter_df
|
680
|
-
|
681
|
-
@classmethod
|
682
|
-
@log_call
|
683
|
-
def _get_area_name(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> str:
|
684
|
-
"""
|
685
|
-
Get the 2D Flow Area name from the HDF file.
|
686
|
-
|
687
|
-
Args:
|
688
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
689
|
-
area_name (Optional[str]): The provided area name, if any.
|
690
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
691
|
-
|
692
|
-
Returns:
|
693
|
-
str: The 2D Flow Area name.
|
694
|
-
|
695
|
-
Raises:
|
696
|
-
ValueError: If no 2D Flow Areas are found in the HDF file or if the specified area name is not found.
|
697
|
-
"""
|
698
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
699
|
-
if area_name is None:
|
700
|
-
area_names = [name for name in hdf_file['Geometry/2D Flow Areas'].keys() if isinstance(hdf_file['Geometry/2D Flow Areas'][name], h5py.Group)]
|
701
|
-
if not area_names:
|
702
|
-
raise ValueError("No 2D Flow Areas found in the HDF file")
|
703
|
-
area_name = area_names[0]
|
704
|
-
else:
|
705
|
-
if area_name not in hdf_file['Geometry/2D Flow Areas']:
|
706
|
-
raise ValueError(f"2D Flow Area '{area_name}' not found in the HDF file")
|
707
|
-
return area_name
|
708
|
-
|
709
|
-
@classmethod
|
710
|
-
@log_call
|
711
|
-
def _extract_dataset(cls, hdf_input: Union[str, Path], dataset_path: str, column_names: List[str], ras_object=None) -> Optional[pd.DataFrame]:
|
712
|
-
"""
|
713
|
-
Extract a dataset from the HDF file and convert it to a DataFrame.
|
714
|
-
|
715
|
-
Args:
|
716
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
717
|
-
dataset_path (str): The path to the dataset within the HDF file.
|
718
|
-
column_names (List[str]): The names to assign to the DataFrame columns.
|
719
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
720
|
-
|
721
|
-
Returns:
|
722
|
-
Optional[pd.DataFrame]: The extracted data as a DataFrame, or None if the dataset is not found.
|
723
|
-
"""
|
724
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
725
|
-
try:
|
726
|
-
dataset = hdf_file[dataset_path][()]
|
727
|
-
df = pd.DataFrame(dataset, columns=column_names)
|
728
|
-
return df
|
729
|
-
except KeyError:
|
730
|
-
return None
|
731
|
-
|
732
|
-
@classmethod
|
733
|
-
@log_call
|
734
|
-
def read_hdf_to_dataframe(cls, hdf_input: Union[str, Path], dataset_path: str, fill_value: Union[int, float, str] = -9999, ras_object=None) -> pd.DataFrame:
|
735
|
-
"""
|
736
|
-
Reads an HDF5 dataset and converts it into a pandas DataFrame, handling byte strings and missing values.
|
737
|
-
|
738
|
-
Args:
|
739
|
-
hdf_input (Union[str, Path]): Path to the HDF file or plan number.
|
740
|
-
dataset_path (str): Path to the dataset within the HDF file.
|
741
|
-
fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
|
742
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
743
|
-
|
744
|
-
Returns:
|
745
|
-
pd.DataFrame: The resulting DataFrame with byte strings decoded and missing values replaced.
|
746
|
-
|
747
|
-
Raises:
|
748
|
-
KeyError: If the dataset is not found in the HDF file.
|
749
|
-
"""
|
750
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
751
|
-
try:
|
752
|
-
hdf_dataset = hdf_file[dataset_path]
|
753
|
-
hdf_dataframe = cls.convert_to_dataframe_array(hdf_dataset)
|
754
|
-
byte_columns = [col for col in hdf_dataframe.columns if isinstance(hdf_dataframe[col].iloc[0], (bytes, bytearray))]
|
755
|
-
|
756
|
-
hdf_dataframe[byte_columns] = hdf_dataframe[byte_columns].applymap(lambda x: x.decode('utf-8') if isinstance(x, (bytes, bytearray)) else x)
|
757
|
-
hdf_dataframe = hdf_dataframe.replace({fill_value: np.NaN})
|
758
|
-
|
759
|
-
return hdf_dataframe
|
760
|
-
except KeyError:
|
761
|
-
raise
|
762
|
-
|
763
|
-
@classmethod
|
764
|
-
@log_call
|
765
|
-
def get_group_attributes_as_df(cls, hdf_input: Union[str, Path], group_path: str, ras_object=None) -> pd.DataFrame:
|
766
|
-
"""
|
767
|
-
Convert attributes inside a given HDF group to a DataFrame.
|
768
|
-
|
769
|
-
Args:
|
770
|
-
hdf_input (Union[str, Path]): Path to the HDF file or plan number.
|
771
|
-
group_path (str): Path of the group in the HDF file.
|
772
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
773
|
-
|
774
|
-
Returns:
|
775
|
-
pd.DataFrame: DataFrame of all attributes in the specified group with their properties.
|
776
|
-
|
777
|
-
Raises:
|
778
|
-
KeyError: If the specified group_path is not found in the file.
|
779
|
-
|
780
|
-
Example:
|
781
|
-
>>> attributes_df = RasHdf.get_group_attributes_as_df("path/to/file.hdf", "/Results/Unsteady/Output")
|
782
|
-
>>> print(attributes_df.head())
|
783
|
-
"""
|
784
|
-
hdf_filename = cls._get_hdf_filename(hdf_input, ras_object)
|
785
|
-
|
786
|
-
with h5py.File(hdf_filename, 'r') as hdf_file:
|
787
|
-
try:
|
788
|
-
group = hdf_file[group_path]
|
789
|
-
attributes = []
|
790
|
-
for attr in group.attrs:
|
791
|
-
value = group.attrs[attr]
|
792
|
-
attr_info = {
|
793
|
-
'Attribute': attr,
|
794
|
-
'Value': value,
|
795
|
-
'Type': type(value).__name__,
|
796
|
-
'Shape': value.shape if isinstance(value, np.ndarray) else None,
|
797
|
-
'Size': value.size if isinstance(value, np.ndarray) else None,
|
798
|
-
'Dtype': value.dtype if isinstance(value, np.ndarray) else None
|
799
|
-
}
|
800
|
-
if isinstance(value, bytes):
|
801
|
-
attr_info['Value'] = value.decode('utf-8')
|
802
|
-
elif isinstance(value, np.ndarray):
|
803
|
-
if value.dtype.kind == 'S':
|
804
|
-
attr_info['Value'] = [v.decode('utf-8') for v in value]
|
805
|
-
elif value.dtype.kind in ['i', 'f', 'u']:
|
806
|
-
attr_info['Value'] = value.tolist()
|
807
|
-
attributes.append(attr_info)
|
808
|
-
|
809
|
-
return pd.DataFrame(attributes)
|
810
|
-
except KeyError:
|
811
|
-
logger.critical(f"Group path '{group_path}' not found in HDF file '{hdf_filename}'")
|
812
|
-
|
813
|
-
# Last functions from PyHMT2D:
|
814
|
-
|
815
|
-
from ras_commander.logging_config import log_call
|
816
|
-
|
817
|
-
@classmethod
|
818
|
-
@log_call
|
819
|
-
def get_2d_area_solution_times(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
|
820
|
-
"""
|
821
|
-
Retrieve solution times for a specified 2D Flow Area.
|
822
|
-
|
823
|
-
Args:
|
824
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
825
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
|
826
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
827
|
-
|
828
|
-
Returns:
|
829
|
-
Optional[np.ndarray]: Array of solution times, or None if not found.
|
830
|
-
|
831
|
-
Example:
|
832
|
-
>>> solution_times = RasHdf.get_2d_area_solution_times("03", area_name="Area1")
|
833
|
-
>>> print(solution_times)
|
834
|
-
[0.0, 0.5, 1.0, ...]
|
835
|
-
"""
|
836
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
837
|
-
try:
|
838
|
-
solution_times = np.array(
|
839
|
-
hdf_file['Results']['Unsteady']['Output']['Output Blocks']
|
840
|
-
['Base Output']['Unsteady Time Series']['Time']
|
841
|
-
)
|
842
|
-
return solution_times
|
843
|
-
except KeyError:
|
844
|
-
return None
|
845
|
-
|
846
|
-
@classmethod
|
847
|
-
@log_call
|
848
|
-
def get_2d_area_solution_time_dates(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[np.ndarray]:
|
849
|
-
"""
|
850
|
-
Retrieve solution time dates for a specified 2D Flow Area.
|
851
|
-
|
852
|
-
Args:
|
853
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
854
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, uses the first area found.
|
855
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
856
|
-
|
857
|
-
Returns:
|
858
|
-
Optional[np.ndarray]: Array of solution time dates, or None if not found.
|
859
|
-
|
860
|
-
Example:
|
861
|
-
>>> solution_time_dates = RasHdf.get_2d_area_solution_time_dates("03", area_name="Area1")
|
862
|
-
>>> print(solution_time_dates)
|
863
|
-
['2024-01-01T00:00:00', '2024-01-01T00:30:00', ...]
|
864
|
-
"""
|
865
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
866
|
-
try:
|
867
|
-
solution_time_dates = np.array(
|
868
|
-
hdf_file['Results']['Unsteady']['Output']['Output Blocks']
|
869
|
-
['Base Output']['Unsteady Time Series']['Time Date Stamp']
|
870
|
-
)
|
871
|
-
return solution_time_dates
|
872
|
-
except KeyError:
|
873
|
-
return None
|
874
|
-
|
875
|
-
@classmethod
|
876
|
-
@log_call
|
877
|
-
def load_2d_area_solutions(
|
878
|
-
cls,
|
879
|
-
hdf_file: h5py.File,
|
880
|
-
ras_object=None
|
881
|
-
) -> Optional[Dict[str, pd.DataFrame]]:
|
882
|
-
"""
|
883
|
-
Load 2D Area Solutions (Water Surface Elevation and Face Normal Velocity) from the HDF file
|
884
|
-
and provide them as pandas DataFrames.
|
885
|
-
|
886
|
-
**Note:**
|
887
|
-
- This function has only been tested with HEC-RAS version 6.5.
|
888
|
-
- Ensure that the HDF file structure matches the expected paths.
|
889
|
-
|
890
|
-
Args:
|
891
|
-
hdf_file (h5py.File): An open HDF5 file object.
|
892
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
893
|
-
|
894
|
-
Returns:
|
895
|
-
Optional[Dict[str, pd.DataFrame]]: A dictionary containing:
|
896
|
-
- 'solution_times': DataFrame of solution times.
|
897
|
-
- For each 2D Flow Area:
|
898
|
-
- '{Area_Name}_WSE': Water Surface Elevation DataFrame.
|
899
|
-
- '{Area_Name}_Face_Velocity': Face Normal Velocity DataFrame.
|
900
|
-
"""
|
901
|
-
try:
|
902
|
-
solution_times_path = '/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/Time'
|
903
|
-
if solution_times_path not in hdf_file:
|
904
|
-
return None
|
905
|
-
|
906
|
-
solution_times = hdf_file[solution_times_path][()]
|
907
|
-
solution_times_df = pd.DataFrame({
|
908
|
-
'Time_Step': solution_times
|
909
|
-
})
|
910
|
-
|
911
|
-
solutions_dict = {
|
912
|
-
'solution_times': solution_times_df
|
913
|
-
}
|
914
|
-
|
915
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
916
|
-
if not two_d_area_names:
|
917
|
-
return solutions_dict
|
918
|
-
|
919
|
-
for area in two_d_area_names:
|
920
|
-
wse_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Water Surface'
|
921
|
-
face_velocity_path = f'/Results/Unsteady/Output/Output Blocks/Base Output/Unsteady Time Series/2D Flow Areas/{area}/Face Velocity'
|
922
|
-
|
923
|
-
if wse_path not in hdf_file:
|
924
|
-
continue
|
925
|
-
|
926
|
-
wse_data = hdf_file[wse_path][()]
|
927
|
-
cell_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Cell Center Coordinate'
|
928
|
-
if cell_center_coords_path not in hdf_file:
|
929
|
-
continue
|
930
|
-
|
931
|
-
cell_center_coords = hdf_file[cell_center_coords_path][()]
|
932
|
-
if cell_center_coords.shape[0] != wse_data.shape[1]:
|
933
|
-
continue
|
934
|
-
|
935
|
-
wse_df = pd.DataFrame({
|
936
|
-
'Time_Step': np.repeat(solution_times, wse_data.shape[1]),
|
937
|
-
'Cell_ID': np.tile(np.arange(wse_data.shape[1]), wse_data.shape[0]),
|
938
|
-
'X': cell_center_coords[:, 0].repeat(wse_data.shape[0]),
|
939
|
-
'Y': cell_center_coords[:, 1].repeat(wse_data.shape[0]),
|
940
|
-
'WSE': wse_data.flatten()
|
941
|
-
})
|
942
|
-
solutions_dict[f'{area}_WSE'] = wse_df
|
943
|
-
|
944
|
-
if face_velocity_path not in hdf_file:
|
945
|
-
continue
|
946
|
-
|
947
|
-
face_velocity_data = hdf_file[face_velocity_path][()]
|
948
|
-
face_center_coords_path = f'/Geometry/2D Flow Areas/{area}/Face Points Coordinates'
|
949
|
-
if face_center_coords_path not in hdf_file:
|
950
|
-
continue
|
951
|
-
|
952
|
-
face_center_coords = hdf_file[face_center_coords_path][()]
|
953
|
-
if face_center_coords.shape[0] != face_velocity_data.shape[1]:
|
954
|
-
continue
|
955
|
-
|
956
|
-
face_velocity_df = pd.DataFrame({
|
957
|
-
'Time_Step': np.repeat(solution_times, face_velocity_data.shape[1]),
|
958
|
-
'Face_ID': np.tile(np.arange(face_velocity_data.shape[1]), face_velocity_data.shape[0]),
|
959
|
-
'X': face_center_coords[:, 0].repeat(face_velocity_data.shape[0]),
|
960
|
-
'Y': face_center_coords[:, 1].repeat(face_velocity_data.shape[0]),
|
961
|
-
'Normal_Velocity_ft_s': face_velocity_data.flatten()
|
962
|
-
})
|
963
|
-
solutions_dict[f'{area}_Face_Velocity'] = face_velocity_df
|
964
|
-
|
965
|
-
return solutions_dict
|
966
|
-
|
967
|
-
except Exception as e:
|
968
|
-
return None
|
969
|
-
|
970
|
-
@classmethod
|
971
|
-
@log_call
|
972
|
-
def get_hdf_paths_with_properties(cls, hdf_input: Union[str, Path], ras_object=None) -> pd.DataFrame:
|
973
|
-
"""
|
974
|
-
List all paths in the HDF file with their properties.
|
975
|
-
|
976
|
-
Args:
|
977
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
978
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
979
|
-
|
980
|
-
Returns:
|
981
|
-
pd.DataFrame: DataFrame of all paths and their properties in the HDF file.
|
982
|
-
|
983
|
-
Example:
|
984
|
-
>>> paths_df = RasHdf.get_hdf_paths_with_properties("path/to/file.hdf")
|
985
|
-
>>> print(paths_df.head())
|
986
|
-
"""
|
987
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
988
|
-
paths = []
|
989
|
-
def visitor_func(name: str, node: h5py.Group) -> None:
|
990
|
-
path_info = {
|
991
|
-
"HDF_Path": name,
|
992
|
-
"Type": type(node).__name__,
|
993
|
-
"Shape": getattr(node, "shape", None),
|
994
|
-
"Size": getattr(node, "size", None),
|
995
|
-
"Dtype": getattr(node, "dtype", None)
|
996
|
-
}
|
997
|
-
paths.append(path_info)
|
998
|
-
hdf_file.visititems(visitor_func)
|
999
|
-
return pd.DataFrame(paths)
|
1000
|
-
|
1001
|
-
@classmethod
|
1002
|
-
@log_call
|
1003
|
-
def build_2d_area_face_hydraulic_information(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[List[np.ndarray]]]:
|
1004
|
-
"""
|
1005
|
-
Build face hydraulic information tables (elevation, area, wetted perimeter, Manning's n) for each face in 2D Flow Areas.
|
1006
|
-
|
1007
|
-
Args:
|
1008
|
-
hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
|
1009
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
|
1010
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1011
|
-
|
1012
|
-
Returns:
|
1013
|
-
Optional[List[List[np.ndarray]]]: Nested lists containing hydraulic information for each face in each 2D Flow Area.
|
1014
|
-
|
1015
|
-
Example:
|
1016
|
-
>>> hydraulic_info = RasHdf.build_2d_area_face_hydraulic_information("03")
|
1017
|
-
>>> print(hydraulic_info[0][0]) # First face of first area
|
1018
|
-
[[Elevation1, Area1, WettedPerim1, ManningN1],
|
1019
|
-
[Elevation2, Area2, WettedPerim2, ManningN2],
|
1020
|
-
...]
|
1021
|
-
"""
|
1022
|
-
try:
|
1023
|
-
ras_obj = ras_object if ras_object is not None else ras
|
1024
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_obj), 'r') as hdf_file:
|
1025
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
1026
|
-
hydraulic_info_table = []
|
1027
|
-
|
1028
|
-
for area in two_d_area_names:
|
1029
|
-
face_elev_info = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Info'])
|
1030
|
-
face_elev_values = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces Area Elevation Values'])
|
1031
|
-
|
1032
|
-
area_hydraulic_info = []
|
1033
|
-
for face in face_elev_info:
|
1034
|
-
start_row, count = face
|
1035
|
-
face_data = face_elev_values[start_row:start_row + count].copy()
|
1036
|
-
area_hydraulic_info.append(face_data)
|
1037
|
-
|
1038
|
-
hydraulic_info_table.append(area_hydraulic_info)
|
1039
|
-
|
1040
|
-
return hydraulic_info_table
|
1041
|
-
|
1042
|
-
except KeyError:
|
1043
|
-
return None
|
1044
|
-
|
1045
|
-
@classmethod
|
1046
|
-
@log_call
|
1047
|
-
def build_2d_area_face_point_coordinates_list(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
|
1048
|
-
"""
|
1049
|
-
Build a list of face point coordinates for each 2D Flow Area.
|
1050
|
-
|
1051
|
-
Args:
|
1052
|
-
hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
|
1053
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
|
1054
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1055
|
-
|
1056
|
-
Returns:
|
1057
|
-
Optional[List[np.ndarray]]: List containing arrays of face point coordinates for each 2D Flow Area.
|
1058
|
-
|
1059
|
-
Example:
|
1060
|
-
>>> face_coords_list = RasHdf.build_2d_area_face_point_coordinates_list("03")
|
1061
|
-
>>> print(face_coords_list[0]) # Coordinates for first area
|
1062
|
-
[[X1, Y1], [X2, Y2], ...]
|
1063
|
-
"""
|
1064
|
-
try:
|
1065
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
1066
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
1067
|
-
face_point_coords_list = []
|
1068
|
-
|
1069
|
-
for area in two_d_area_names:
|
1070
|
-
face_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
|
1071
|
-
face_point_coords_list.append(face_points)
|
1072
|
-
|
1073
|
-
return face_point_coords_list
|
1074
|
-
|
1075
|
-
except KeyError:
|
1076
|
-
return None
|
1077
|
-
|
1078
|
-
@classmethod
|
1079
|
-
@log_call
|
1080
|
-
def build_2d_area_face_profile(cls, hdf_input: Union[str, Path, h5py.File], area_name: Optional[str] = None, ras_object=None, n_face_profile_points: int = 10) -> Optional[List[np.ndarray]]:
|
1081
|
-
"""
|
1082
|
-
Build face profiles representing sub-grid terrain for each face in 2D Flow Areas.
|
1083
|
-
|
1084
|
-
Args:
|
1085
|
-
hdf_input (Union[str, Path, h5py.File]): The HDF5 file path or open HDF5 file object.
|
1086
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
|
1087
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1088
|
-
n_face_profile_points (int): Number of points to interpolate along each face profile.
|
1089
|
-
|
1090
|
-
Returns:
|
1091
|
-
Optional[List[np.ndarray]]: List containing arrays of profile points for each face in each 2D Flow Area.
|
1092
|
-
|
1093
|
-
Example:
|
1094
|
-
>>> face_profiles = RasHdf.build_2d_area_face_profile("03", n_face_profile_points=20)
|
1095
|
-
>>> print(face_profiles[0][0]) # Profile points for first face of first area
|
1096
|
-
[[X1, Y1, Z1], [X2, Y2, Z2], ...]
|
1097
|
-
"""
|
1098
|
-
try:
|
1099
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
1100
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
1101
|
-
face_profiles = []
|
1102
|
-
|
1103
|
-
for area in two_d_area_names:
|
1104
|
-
face_faces = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
|
1105
|
-
face_point_coords = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Face Points Coordinates'])
|
1106
|
-
profile_points_all_faces = []
|
1107
|
-
|
1108
|
-
for face in face_faces:
|
1109
|
-
face_start, face_end = face
|
1110
|
-
start_coords = face_point_coords[face_start]
|
1111
|
-
end_coords = face_point_coords[face_end]
|
1112
|
-
|
1113
|
-
length = cls.horizontal_distance(start_coords, end_coords)
|
1114
|
-
stations = np.linspace(0, length, n_face_profile_points, endpoint=True)
|
1115
|
-
|
1116
|
-
interpolated_points = np.array([
|
1117
|
-
start_coords + (end_coords - start_coords) * i / (n_face_profile_points - 1)
|
1118
|
-
for i in range(n_face_profile_points)
|
1119
|
-
])
|
1120
|
-
|
1121
|
-
interpolated_points = cls.interpolate_z_coords(interpolated_points)
|
1122
|
-
|
1123
|
-
profile_points_all_faces.append(interpolated_points)
|
1124
|
-
|
1125
|
-
face_profiles.append(profile_points_all_faces)
|
1126
|
-
|
1127
|
-
return face_profiles
|
1128
|
-
|
1129
|
-
except KeyError as e:
|
1130
|
-
logging.error(f"Error building face profiles: {e}")
|
1131
|
-
return None
|
1132
|
-
|
1133
|
-
@classmethod
|
1134
|
-
@log_call
|
1135
|
-
def build_face_facepoints(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[List[np.ndarray]]:
|
1136
|
-
"""
|
1137
|
-
Build face's facepoint list for each 2D Flow Area.
|
1138
|
-
|
1139
|
-
Args:
|
1140
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
1141
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
|
1142
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1143
|
-
|
1144
|
-
Returns:
|
1145
|
-
Optional[List[np.ndarray]]: List containing arrays of face point indexes for each face in each 2D Flow Area.
|
1146
|
-
|
1147
|
-
Example:
|
1148
|
-
>>> face_facepoints = RasHdf.build_face_facepoints("03")
|
1149
|
-
>>> print(face_facepoints[0][0]) # FacePoint indexes for first face of first area
|
1150
|
-
[start_idx, end_idx]
|
1151
|
-
"""
|
1152
|
-
try:
|
1153
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
1154
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
1155
|
-
face_facepoints_list = []
|
1156
|
-
|
1157
|
-
for area in two_d_area_names:
|
1158
|
-
face_facepoints = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Faces FacePoint Indexes'])
|
1159
|
-
face_facepoints_list.append(face_facepoints)
|
1160
|
-
|
1161
|
-
return face_facepoints_list
|
1162
|
-
|
1163
|
-
except KeyError as e:
|
1164
|
-
logger = logging.getLogger(__name__)
|
1165
|
-
logger.error(f"Error building face facepoints list: {e}")
|
1166
|
-
return None
|
1167
|
-
|
1168
|
-
@classmethod
|
1169
|
-
@log_call
|
1170
|
-
def build_2d_area_boundaries(cls, hdf_input: Union[str, Path], area_name: Optional[str] = None, ras_object=None) -> Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
|
1171
|
-
"""
|
1172
|
-
Build boundaries with their point lists for each 2D Flow Area.
|
1173
|
-
|
1174
|
-
Args:
|
1175
|
-
hdf_input (Union[str, Path]): The plan number or full path to the HDF file.
|
1176
|
-
area_name (Optional[str]): Name of the 2D Flow Area. If None, builds for all areas.
|
1177
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1178
|
-
|
1179
|
-
Returns:
|
1180
|
-
Optional[Tuple[int, np.ndarray, List[str], List[str], List[str], np.ndarray, np.ndarray]]:
|
1181
|
-
Tuple containing total boundaries, boundary IDs, boundary names, associated 2D Flow Area names, boundary types,
|
1182
|
-
total points per boundary, and boundary point lists.
|
1183
|
-
|
1184
|
-
Example:
|
1185
|
-
>>> total_boundaries, boundary_ids, boundary_names, flow_area_names, boundary_types, total_points, boundary_points = RasHdf.build_2d_area_boundaries("03")
|
1186
|
-
>>> print(total_boundaries)
|
1187
|
-
5
|
1188
|
-
"""
|
1189
|
-
try:
|
1190
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
1191
|
-
two_d_area_names = cls.get_2d_flow_area_names(hdf_file, ras_object=ras_object)
|
1192
|
-
total_boundaries = 0
|
1193
|
-
boundary_ids = []
|
1194
|
-
boundary_names = []
|
1195
|
-
flow_area_names = []
|
1196
|
-
boundary_types = []
|
1197
|
-
total_points_per_boundary = []
|
1198
|
-
boundary_points_list = []
|
1199
|
-
|
1200
|
-
for area in two_d_area_names:
|
1201
|
-
boundary_points = np.array(hdf_file[f'Geometry/2D Flow Areas/{area}/Boundary Points'])
|
1202
|
-
if boundary_points.size == 0:
|
1203
|
-
logger = logging.getLogger(__name__)
|
1204
|
-
logger.warning(f"No boundary points found for 2D Flow Area: {area}")
|
1205
|
-
continue
|
1206
|
-
|
1207
|
-
current_boundary_id = boundary_points[0][0]
|
1208
|
-
current_boundary_points = [boundary_points[0][2], boundary_points[0][3]]
|
1209
|
-
boundary_id = current_boundary_id
|
1210
|
-
|
1211
|
-
for point in boundary_points[1:]:
|
1212
|
-
if point[0] == current_boundary_id:
|
1213
|
-
current_boundary_points.append(point[3])
|
1214
|
-
else:
|
1215
|
-
# Save the completed boundary
|
1216
|
-
boundary_ids.append(current_boundary_id)
|
1217
|
-
boundary_names.append(point[0]) # Assuming boundary name is stored here
|
1218
|
-
flow_area_names.append(area)
|
1219
|
-
boundary_types.append(point[2]) # Assuming boundary type is stored here
|
1220
|
-
total_points_per_boundary.append(len(current_boundary_points))
|
1221
|
-
boundary_points_list.append(np.array(current_boundary_points))
|
1222
|
-
total_boundaries += 1
|
1223
|
-
|
1224
|
-
# Start a new boundary
|
1225
|
-
current_boundary_id = point[0]
|
1226
|
-
current_boundary_points = [point[2], point[3]]
|
1227
|
-
|
1228
|
-
# Save the last boundary
|
1229
|
-
boundary_ids.append(current_boundary_id)
|
1230
|
-
boundary_names.append(boundary_points[-1][0]) # Assuming boundary name is stored here
|
1231
|
-
flow_area_names.append(area)
|
1232
|
-
boundary_types.append(boundary_points[-1][2]) # Assuming boundary type is stored here
|
1233
|
-
total_points_per_boundary.append(len(current_boundary_points))
|
1234
|
-
boundary_points_list.append(np.array(current_boundary_points))
|
1235
|
-
total_boundaries += 1
|
1236
|
-
|
1237
|
-
return (total_boundaries, np.array(boundary_ids), boundary_names, flow_area_names, boundary_types, np.array(total_points_per_boundary), np.array(boundary_points_list))
|
1238
|
-
|
1239
|
-
except KeyError as e:
|
1240
|
-
logger = logging.getLogger(__name__)
|
1241
|
-
logger.error(f"Error building boundaries: {e}")
|
1242
|
-
return None
|
1243
|
-
|
1244
|
-
# Helper Methods for New Functionalities
|
1245
|
-
|
1246
|
-
@classmethod
|
1247
|
-
@log_call
|
1248
|
-
def horizontal_distance(cls, coord1: np.ndarray, coord2: np.ndarray) -> float:
|
1249
|
-
"""
|
1250
|
-
Calculate the horizontal distance between two coordinate points.
|
1251
|
-
|
1252
|
-
Args:
|
1253
|
-
coord1 (np.ndarray): First coordinate point [X, Y].
|
1254
|
-
coord2 (np.ndarray): Second coordinate point [X, Y].
|
1255
|
-
|
1256
|
-
Returns:
|
1257
|
-
float: Horizontal distance.
|
1258
|
-
|
1259
|
-
Example:
|
1260
|
-
>>> distance = RasHdf.horizontal_distance([0, 0], [3, 4])
|
1261
|
-
>>> print(distance)
|
1262
|
-
5.0
|
1263
|
-
"""
|
1264
|
-
return np.linalg.norm(coord2 - coord1)
|
1265
|
-
|
1266
|
-
@classmethod
|
1267
|
-
@log_call
|
1268
|
-
def interpolate_z_coords(cls, points: np.ndarray) -> np.ndarray:
|
1269
|
-
"""
|
1270
|
-
Interpolate Z coordinates for a set of points.
|
1271
|
-
|
1272
|
-
Args:
|
1273
|
-
points (np.ndarray): Array of points with [X, Y].
|
1274
|
-
|
1275
|
-
Returns:
|
1276
|
-
np.ndarray: Array of points with [X, Y, Z].
|
1277
|
-
|
1278
|
-
Example:
|
1279
|
-
>>> interpolated = RasHdf.interpolate_z_coords(np.array([[0,0], [1,1]]))
|
1280
|
-
>>> print(interpolated)
|
1281
|
-
[[0, 0, Z0],
|
1282
|
-
[1, 1, Z1]]
|
1283
|
-
"""
|
1284
|
-
# Placeholder for actual interpolation logic
|
1285
|
-
# This should be replaced with the appropriate interpolation method
|
1286
|
-
z_coords = np.zeros((points.shape[0], 1)) # Assuming Z=0 for simplicity
|
1287
|
-
return np.hstack((points, z_coords))
|
1288
|
-
|
1289
|
-
@classmethod
|
1290
|
-
@log_call
|
1291
|
-
def extract_string_from_hdf(
|
1292
|
-
cls,
|
1293
|
-
hdf_input: Union[str, Path],
|
1294
|
-
hdf_path: str,
|
1295
|
-
ras_object: Optional["RasPrj"] = None
|
1296
|
-
) -> str:
|
1297
|
-
"""
|
1298
|
-
Extract string from HDF object at a given path.
|
1299
|
-
|
1300
|
-
Args:
|
1301
|
-
hdf_input (Union[str, Path]): Either the plan number or the full path to the HDF file.
|
1302
|
-
hdf_path (str): Path of the object in the HDF file.
|
1303
|
-
ras_object (Optional["RasPrj"]): Specific RAS object to use. If None, uses the global ras instance.
|
1304
|
-
|
1305
|
-
Returns:
|
1306
|
-
str: Extracted string from the specified HDF object.
|
1307
|
-
|
1308
|
-
Raises:
|
1309
|
-
ValueError: If no HDF file is found for the given plan number.
|
1310
|
-
FileNotFoundError: If the specified HDF file does not exist.
|
1311
|
-
KeyError: If the specified hdf_path is not found in the file.
|
1312
|
-
|
1313
|
-
Example:
|
1314
|
-
>>> result = RasHdf.extract_string_from_hdf("path/to/file.hdf", "/Results/Summary/Compute Messages (text)")
|
1315
|
-
>>> print(result)
|
1316
|
-
"""
|
1317
|
-
with h5py.File(cls._get_hdf_filename(hdf_input, ras_object), 'r') as hdf_file:
|
1318
|
-
try:
|
1319
|
-
hdf_object = hdf_file[hdf_path]
|
1320
|
-
if isinstance(hdf_object, h5py.Group):
|
1321
|
-
return f"Group: {hdf_path}\nContents: {list(hdf_object.keys())}"
|
1322
|
-
elif isinstance(hdf_object, h5py.Dataset):
|
1323
|
-
data = hdf_object[()]
|
1324
|
-
if isinstance(data, bytes):
|
1325
|
-
return data.decode('utf-8')
|
1326
|
-
elif isinstance(data, np.ndarray) and data.dtype.kind == 'S':
|
1327
|
-
return [v.decode('utf-8') for v in data]
|
1328
|
-
else:
|
1329
|
-
return str(data)
|
1330
|
-
else:
|
1331
|
-
return f"Unsupported object type: {type(hdf_object)}"
|
1332
|
-
except KeyError:
|
1333
|
-
logger = logging.getLogger(__name__)
|
1334
|
-
logger.error(f"Path not found: {hdf_path}")
|
1335
|
-
raise KeyError(f"Path not found: {hdf_path}")
|
1336
|
-
|
1337
|
-
@classmethod
|
1338
|
-
@log_call
|
1339
|
-
def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
|
1340
|
-
"""
|
1341
|
-
Decodes byte strings in a DataFrame to regular string objects.
|
1342
|
-
|
1343
|
-
This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
|
1344
|
-
|
1345
|
-
Args:
|
1346
|
-
dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
|
1347
|
-
|
1348
|
-
Returns:
|
1349
|
-
pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
|
1350
|
-
|
1351
|
-
Example:
|
1352
|
-
>>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
|
1353
|
-
>>> decoded_df = RasHdf.decode_byte_strings(df)
|
1354
|
-
>>> print(decoded_df)
|
1355
|
-
A B
|
1356
|
-
0 hello 1
|
1357
|
-
1 world 2
|
1358
|
-
"""
|
1359
|
-
str_df = dataframe.select_dtypes(['object'])
|
1360
|
-
str_df = str_df.stack().str.decode('utf-8').unstack()
|
1361
|
-
for col in str_df:
|
1362
|
-
dataframe[col] = str_df[col]
|
1363
|
-
return dataframe
|
1364
|
-
|
1365
|
-
@classmethod
|
1366
|
-
@log_call
|
1367
|
-
def perform_kdtree_query(
|
1368
|
-
reference_points: np.ndarray,
|
1369
|
-
query_points: np.ndarray,
|
1370
|
-
max_distance: float = 2.0
|
1371
|
-
) -> np.ndarray:
|
1372
|
-
"""
|
1373
|
-
Performs a KDTree query between two datasets and returns indices with distances exceeding max_distance set to -1.
|
1374
|
-
|
1375
|
-
Args:
|
1376
|
-
reference_points (np.ndarray): The reference dataset for KDTree.
|
1377
|
-
query_points (np.ndarray): The query dataset to search against KDTree of reference_points.
|
1378
|
-
max_distance (float, optional): The maximum distance threshold. Indices with distances greater than this are set to -1. Defaults to 2.0.
|
1379
|
-
|
1380
|
-
Returns:
|
1381
|
-
np.ndarray: Array of indices from reference_points that are nearest to each point in query_points.
|
1382
|
-
Indices with distances > max_distance are set to -1.
|
1383
|
-
|
1384
|
-
Example:
|
1385
|
-
>>> ref_points = np.array([[0, 0], [1, 1], [2, 2]])
|
1386
|
-
>>> query_points = np.array([[0.5, 0.5], [3, 3]])
|
1387
|
-
>>> result = RasHdf.perform_kdtree_query(ref_points, query_points)
|
1388
|
-
>>> print(result)
|
1389
|
-
array([ 0, -1])
|
1390
|
-
"""
|
1391
|
-
dist, snap = KDTree(reference_points).query(query_points, distance_upper_bound=max_distance)
|
1392
|
-
snap[dist > max_distance] = -1
|
1393
|
-
return snap
|
1394
|
-
|
1395
|
-
@classmethod
|
1396
|
-
@log_call
|
1397
|
-
def find_nearest_neighbors(points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
|
1398
|
-
"""
|
1399
|
-
Creates a self KDTree for dataset points and finds nearest neighbors excluding self,
|
1400
|
-
with distances above max_distance set to -1.
|
1401
|
-
|
1402
|
-
Args:
|
1403
|
-
points (np.ndarray): The dataset to build the KDTree from and query against itself.
|
1404
|
-
max_distance (float, optional): The maximum distance threshold. Indices with distances
|
1405
|
-
greater than max_distance are set to -1. Defaults to 2.0.
|
1406
|
-
|
1407
|
-
Returns:
|
1408
|
-
np.ndarray: Array of indices representing the nearest neighbor in points for each point in points.
|
1409
|
-
Indices with distances > max_distance or self-matches are set to -1.
|
1410
|
-
|
1411
|
-
Example:
|
1412
|
-
>>> points = np.array([[0, 0], [1, 1], [2, 2], [10, 10]])
|
1413
|
-
>>> result = RasHdf.find_nearest_neighbors(points)
|
1414
|
-
>>> print(result)
|
1415
|
-
array([1, 0, 1, -1])
|
1416
|
-
"""
|
1417
|
-
dist, snap = KDTree(points).query(points, k=2, distance_upper_bound=max_distance)
|
1418
|
-
snap[dist > max_distance] = -1
|
1419
|
-
|
1420
|
-
snp = pd.DataFrame(snap, index=np.arange(len(snap)))
|
1421
|
-
snp = snp.replace(-1, np.nan)
|
1422
|
-
snp.loc[snp[0] == snp.index, 0] = np.nan
|
1423
|
-
snp.loc[snp[1] == snp.index, 1] = np.nan
|
1424
|
-
filled = snp[0].fillna(snp[1])
|
1425
|
-
snapped = filled.fillna(-1).astype(np.int64).to_numpy()
|
1426
|
-
return snapped
|
1427
|
-
|
1428
|
-
@classmethod
|
1429
|
-
@log_call
|
1430
|
-
def consolidate_dataframe(
|
1431
|
-
dataframe: pd.DataFrame,
|
1432
|
-
group_by: Optional[Union[str, List[str]]] = None,
|
1433
|
-
pivot_columns: Optional[Union[str, List[str]]] = None,
|
1434
|
-
level: Optional[int] = None,
|
1435
|
-
n_dimensional: bool = False,
|
1436
|
-
aggregation_method: Union[str, Callable] = 'list'
|
1437
|
-
) -> pd.DataFrame:
|
1438
|
-
"""
|
1439
|
-
Consolidate rows in a DataFrame by merging duplicate values into lists or using a specified aggregation function.
|
1440
|
-
|
1441
|
-
Args:
|
1442
|
-
dataframe (pd.DataFrame): The DataFrame to consolidate.
|
1443
|
-
group_by (Optional[Union[str, List[str]]]): Columns or indices to group by.
|
1444
|
-
pivot_columns (Optional[Union[str, List[str]]]): Columns to pivot.
|
1445
|
-
level (Optional[int]): Level of multi-index to group by.
|
1446
|
-
n_dimensional (bool): If True, use a pivot table for N-Dimensional consolidation.
|
1447
|
-
aggregation_method (Union[str, Callable]): Aggregation method, e.g., 'list' to aggregate into lists.
|
1448
|
-
|
1449
|
-
Returns:
|
1450
|
-
pd.DataFrame: The consolidated DataFrame.
|
1451
|
-
|
1452
|
-
Example:
|
1453
|
-
>>> df = pd.DataFrame({'A': [1, 1, 2], 'B': [4, 5, 6], 'C': [7, 8, 9]})
|
1454
|
-
>>> result = RasHdf.consolidate_dataframe(df, group_by='A')
|
1455
|
-
>>> print(result)
|
1456
|
-
B C
|
1457
|
-
A
|
1458
|
-
1 [4, 5] [7, 8]
|
1459
|
-
2 [6] [9]
|
1460
|
-
"""
|
1461
|
-
if aggregation_method == 'list':
|
1462
|
-
agg_func = lambda x: tuple(x)
|
1463
|
-
else:
|
1464
|
-
agg_func = aggregation_method
|
1465
|
-
|
1466
|
-
if n_dimensional:
|
1467
|
-
result = dataframe.pivot_table(group_by, pivot_columns, aggfunc=agg_func)
|
1468
|
-
else:
|
1469
|
-
result = dataframe.groupby(group_by, level=level).agg(agg_func).applymap(list)
|
1470
|
-
|
1471
|
-
return result
|
1472
|
-
|
1473
|
-
@classmethod
|
1474
|
-
@log_call
|
1475
|
-
def find_nearest_value(array: Union[list, np.ndarray], target_value: Union[int, float]) -> Union[int, float]:
|
1476
|
-
"""
|
1477
|
-
Finds the nearest value in a NumPy array to the specified target value.
|
1478
|
-
|
1479
|
-
Args:
|
1480
|
-
array (Union[list, np.ndarray]): The array to search within.
|
1481
|
-
target_value (Union[int, float]): The value to find the nearest neighbor to.
|
1482
|
-
|
1483
|
-
Returns:
|
1484
|
-
Union[int, float]: The nearest value in the array to the specified target value.
|
1485
|
-
|
1486
|
-
Example:
|
1487
|
-
>>> arr = np.array([1, 3, 5, 7, 9])
|
1488
|
-
>>> result = RasHdf.find_nearest_value(arr, 6)
|
1489
|
-
>>> print(result)
|
1490
|
-
5
|
1491
|
-
"""
|
1492
|
-
array = np.asarray(array)
|
1493
|
-
idx = (np.abs(array - target_value)).argmin()
|
1494
|
-
return array[idx]
|
1495
|
-
|
1496
|
-
@staticmethod
|
1497
|
-
@log_call
|
1498
|
-
def _get_hdf_filename(hdf_input: Union[str, Path, h5py.File], ras_object=None) -> Optional[Path]:
|
1499
|
-
"""
|
1500
|
-
Get the HDF filename from the input.
|
1501
|
-
|
1502
|
-
Args:
|
1503
|
-
hdf_input (Union[str, Path, h5py.File]): The plan number, full path to the HDF file as a string, a Path object, or an h5py.File object.
|
1504
|
-
ras_object (RasPrj, optional): The RAS project object. If None, uses the global ras instance.
|
1505
|
-
|
1506
|
-
Returns:
|
1507
|
-
Optional[Path]: The full path to the HDF file as a Path object, or None if an error occurs.
|
1508
|
-
|
1509
|
-
Note:
|
1510
|
-
This method logs critical errors instead of raising exceptions.
|
1511
|
-
"""
|
1512
|
-
|
1513
|
-
# If hdf_input is already an h5py.File object, return its filename
|
1514
|
-
if isinstance(hdf_input, h5py.File):
|
1515
|
-
return Path(hdf_input.filename)
|
1516
|
-
|
1517
|
-
# Convert to Path object if it's a string
|
1518
|
-
if isinstance(hdf_input, str):
|
1519
|
-
hdf_input = Path(hdf_input)
|
1520
|
-
|
1521
|
-
# If hdf_input is a file path, return it directly
|
1522
|
-
if isinstance(hdf_input, Path) and hdf_input.is_file():
|
1523
|
-
return hdf_input
|
1524
|
-
|
1525
|
-
# If hdf_input is not a file path, assume it's a plan number and require ras_object
|
1526
|
-
ras_obj = ras_object or ras
|
1527
|
-
if not ras_obj.initialized:
|
1528
|
-
logger.critical("ras_object is not initialized. ras_object is required when hdf_input is not a direct file path.")
|
1529
|
-
return None
|
1530
|
-
|
1531
|
-
plan_info = ras_obj.plan_df[ras_obj.plan_df['plan_number'] == str(hdf_input)]
|
1532
|
-
if plan_info.empty:
|
1533
|
-
logger.critical(f"No HDF file found for plan number {hdf_input}")
|
1534
|
-
return None
|
1535
|
-
|
1536
|
-
hdf_filename = plan_info.iloc[0]['HDF_Results_Path']
|
1537
|
-
if hdf_filename is None:
|
1538
|
-
logger.critical(f"HDF_Results_Path is None for plan number {hdf_input}")
|
1539
|
-
return None
|
1540
|
-
|
1541
|
-
hdf_path = Path(hdf_filename)
|
1542
|
-
if not hdf_path.is_file():
|
1543
|
-
logger.critical(f"HDF file not found: {hdf_path}")
|
1544
|
-
return None
|
1545
|
-
|
1546
|
-
return hdf_path
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
@log_call
|
1551
|
-
def save_dataframe_to_hdf(
|
1552
|
-
dataframe: pd.DataFrame,
|
1553
|
-
hdf_parent_group: h5py.Group,
|
1554
|
-
dataset_name: str,
|
1555
|
-
attributes: Optional[Dict[str, Union[int, float, str]]] = None,
|
1556
|
-
fill_value: Union[int, float, str] = -9999,
|
1557
|
-
**kwargs: Any
|
1558
|
-
) -> h5py.Dataset:
|
1559
|
-
"""
|
1560
|
-
Save a pandas DataFrame to an HDF5 dataset within a specified parent group.
|
1561
|
-
|
1562
|
-
This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
|
1563
|
-
|
1564
|
-
Args:
|
1565
|
-
dataframe (pd.DataFrame): The DataFrame to save.
|
1566
|
-
hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
|
1567
|
-
dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
|
1568
|
-
attributes (Optional[Dict[str, Union[int, float, str]]]): A dictionary of attributes to add to the dataset.
|
1569
|
-
fill_value (Union[int, float, str]): The value to use for filling missing data.
|
1570
|
-
**kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
|
1571
|
-
|
1572
|
-
Returns:
|
1573
|
-
h5py.Dataset: The created HDF5 dataset within the parent group.
|
1574
|
-
|
1575
|
-
Raises:
|
1576
|
-
ValueError: If the DataFrame columns are not consistent.
|
1577
|
-
|
1578
|
-
Example:
|
1579
|
-
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
|
1580
|
-
>>> with h5py.File('data.h5', 'w') as f:
|
1581
|
-
... group = f.create_group('my_group')
|
1582
|
-
... dataset = save_dataframe_to_hdf(df, group, 'my_dataset')
|
1583
|
-
>>> print(dataset)
|
1584
|
-
"""
|
1585
|
-
df = dataframe.copy()
|
1586
|
-
|
1587
|
-
# Replace '/' in column names with '-' to avoid issues in HDF5
|
1588
|
-
if df.columns.dtype == 'O':
|
1589
|
-
df.columns = df.columns.str.replace('/', '-', regex=False)
|
1590
|
-
|
1591
|
-
# Fill missing values with the specified fill_value
|
1592
|
-
df = df.fillna(fill_value)
|
1593
|
-
|
1594
|
-
# Identify string columns and ensure consistency
|
1595
|
-
string_cols = df.select_dtypes(include=['object']).columns
|
1596
|
-
if not string_cols.equals(df.select_dtypes(include=['object']).columns):
|
1597
|
-
logger.error("Inconsistent string columns detected")
|
1598
|
-
raise ValueError("Inconsistent string columns detected")
|
1599
|
-
|
1600
|
-
# Encode string columns to bytes
|
1601
|
-
df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
|
1602
|
-
|
1603
|
-
# Prepare data for HDF5 dataset creation
|
1604
|
-
arr = df.to_records(index=False) if not isinstance(df.columns, pd.RangeIndex) else df.values
|
1605
|
-
|
1606
|
-
# Remove existing dataset if it exists
|
1607
|
-
if dataset_name in hdf_parent_group:
|
1608
|
-
logger.warning(f"Existing dataset {dataset_name} will be overwritten")
|
1609
|
-
del hdf_parent_group[dataset_name]
|
1610
|
-
|
1611
|
-
# Create the dataset in the HDF5 file
|
1612
|
-
dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
|
1613
|
-
|
1614
|
-
# Update dataset attributes if provided
|
1615
|
-
if attributes:
|
1616
|
-
dataset.attrs.update(attributes)
|
1617
|
-
|
1618
|
-
logger.info(f"Successfully saved DataFrame to dataset: {dataset_name}")
|
1619
|
-
return dataset
|