ras-commander 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ras_commander/RasCmdr.py +163 -131
- ras_commander/RasExamples.py +96 -46
- ras_commander/RasGeo.py +27 -6
- ras_commander/RasHdf.py +248 -0
- ras_commander/RasPlan.py +391 -437
- ras_commander/RasPrj.py +396 -64
- ras_commander/RasUnsteady.py +24 -4
- ras_commander/RasUtils.py +352 -51
- ras_commander/__init__.py +4 -1
- ras_commander-0.34.0.dist-info/METADATA +263 -0
- ras_commander-0.34.0.dist-info/RECORD +15 -0
- ras_commander-0.33.0.dist-info/METADATA +0 -5
- ras_commander-0.33.0.dist-info/RECORD +0 -14
- {ras_commander-0.33.0.dist-info → ras_commander-0.34.0.dist-info}/LICENSE +0 -0
- {ras_commander-0.33.0.dist-info → ras_commander-0.34.0.dist-info}/WHEEL +0 -0
- {ras_commander-0.33.0.dist-info → ras_commander-0.34.0.dist-info}/top_level.txt +0 -0
ras_commander/RasExamples.py
CHANGED
@@ -7,6 +7,16 @@ import shutil
|
|
7
7
|
from typing import Union, List
|
8
8
|
import csv
|
9
9
|
from datetime import datetime
|
10
|
+
import logging
|
11
|
+
|
12
|
+
# Configure logging
|
13
|
+
logging.basicConfig(
|
14
|
+
level=logging.INFO, # Set the logging level to INFO
|
15
|
+
format='%(asctime)s - %(levelname)s - %(message)s', # Log message format
|
16
|
+
handlers=[
|
17
|
+
logging.StreamHandler() # Log to stderr
|
18
|
+
]
|
19
|
+
)
|
10
20
|
|
11
21
|
class RasExamples:
|
12
22
|
"""
|
@@ -38,14 +48,12 @@ class RasExamples:
|
|
38
48
|
folder_df (pd.DataFrame): DataFrame containing folder structure information.
|
39
49
|
csv_file_path (Path): Path to the CSV file for caching project metadata.
|
40
50
|
|
41
|
-
|
42
51
|
Future Improvements:
|
43
52
|
- Implement the ability for user-provided example projects (provided as a zip file) for their own repeatable examples.
|
44
53
|
- If the zip file is in the same folder structure as the HEC-RAS example projects, simple replace Example_Projects_6_5.zip and the folder structure will be automatically extracted from the zip file.
|
45
54
|
- The actual RAS example projects haven't been updated much, but there is the structure here to handle future versions. Although this version of the code is probably fine for a few years, until HEC-RAS 2025 comes out.
|
46
|
-
|
47
55
|
"""
|
48
|
-
|
56
|
+
|
49
57
|
def __init__(self):
|
50
58
|
"""
|
51
59
|
Initialize the RasExamples class.
|
@@ -54,7 +62,7 @@ class RasExamples:
|
|
54
62
|
It initializes the base URL for downloads, valid versions, directory paths, and other essential
|
55
63
|
attributes. It also creates the projects directory if it doesn't exist and loads the project data.
|
56
64
|
|
57
|
-
The method also
|
65
|
+
The method also logs the location of the example projects folder and calls _load_project_data()
|
58
66
|
to initialize the project data.
|
59
67
|
"""
|
60
68
|
self.base_url = 'https://github.com/HydrologicEngineeringCenter/hec-downloads/releases/download/'
|
@@ -71,7 +79,7 @@ class RasExamples:
|
|
71
79
|
self.csv_file_path = self.examples_dir / 'example_projects.csv'
|
72
80
|
|
73
81
|
self.projects_dir.mkdir(parents=True, exist_ok=True)
|
74
|
-
|
82
|
+
logging.info(f"Example projects folder: {self.projects_dir}")
|
75
83
|
self._load_project_data()
|
76
84
|
|
77
85
|
def _load_project_data(self):
|
@@ -84,21 +92,29 @@ class RasExamples:
|
|
84
92
|
self._find_zip_file()
|
85
93
|
|
86
94
|
if not self.zip_file_path:
|
87
|
-
|
95
|
+
logging.info("No example projects zip file found. Downloading...")
|
88
96
|
self.get_example_projects()
|
89
97
|
|
90
|
-
|
98
|
+
try:
|
99
|
+
zip_modified_time = os.path.getmtime(self.zip_file_path)
|
100
|
+
except FileNotFoundError:
|
101
|
+
logging.error(f"Zip file not found at {self.zip_file_path}.")
|
102
|
+
return
|
91
103
|
|
92
104
|
if self.csv_file_path.exists():
|
93
105
|
csv_modified_time = os.path.getmtime(self.csv_file_path)
|
94
106
|
|
95
107
|
if csv_modified_time >= zip_modified_time:
|
96
|
-
|
97
|
-
|
98
|
-
|
108
|
+
logging.info("Loading project data from CSV...")
|
109
|
+
try:
|
110
|
+
self.folder_df = pd.read_csv(self.csv_file_path)
|
111
|
+
logging.info(f"Loaded {len(self.folder_df)} projects from CSV. Use list_categories() and list_projects() to explore them.")
|
112
|
+
except Exception as e:
|
113
|
+
logging.error(f"Failed to read CSV file: {e}")
|
114
|
+
self.folder_df = None
|
99
115
|
return
|
100
116
|
|
101
|
-
|
117
|
+
logging.info("Extracting folder structure from zip file...")
|
102
118
|
self._extract_folder_structure()
|
103
119
|
self._save_to_csv()
|
104
120
|
|
@@ -108,8 +124,10 @@ class RasExamples:
|
|
108
124
|
potential_zip = self.examples_dir / f"Example_Projects_{version.replace('.', '_')}.zip"
|
109
125
|
if potential_zip.exists():
|
110
126
|
self.zip_file_path = potential_zip
|
111
|
-
|
127
|
+
logging.info(f"Found zip file: {self.zip_file_path}")
|
112
128
|
break
|
129
|
+
else:
|
130
|
+
logging.warning("No existing example projects zip file found.")
|
113
131
|
|
114
132
|
def _extract_folder_structure(self):
|
115
133
|
"""
|
@@ -129,18 +147,25 @@ class RasExamples:
|
|
129
147
|
})
|
130
148
|
|
131
149
|
self.folder_df = pd.DataFrame(folder_data).drop_duplicates()
|
132
|
-
|
133
|
-
|
134
|
-
|
150
|
+
logging.info(f"Extracted {len(self.folder_df)} projects.")
|
151
|
+
logging.debug(f"folder_df:\n{self.folder_df}")
|
152
|
+
except zipfile.BadZipFile:
|
153
|
+
logging.error(f"The file {self.zip_file_path} is not a valid zip file.")
|
154
|
+
self.folder_df = pd.DataFrame(columns=['Category', 'Project'])
|
135
155
|
except Exception as e:
|
136
|
-
|
156
|
+
logging.error(f"An error occurred while extracting the folder structure: {str(e)}")
|
137
157
|
self.folder_df = pd.DataFrame(columns=['Category', 'Project'])
|
138
158
|
|
139
159
|
def _save_to_csv(self):
|
140
160
|
"""Save the extracted folder structure to CSV file."""
|
141
161
|
if self.folder_df is not None and not self.folder_df.empty:
|
142
|
-
|
143
|
-
|
162
|
+
try:
|
163
|
+
self.folder_df.to_csv(self.csv_file_path, index=False)
|
164
|
+
logging.info(f"Saved project data to {self.csv_file_path}")
|
165
|
+
except Exception as e:
|
166
|
+
logging.error(f"Failed to save project data to CSV: {e}")
|
167
|
+
else:
|
168
|
+
logging.warning("No folder data to save to CSV.")
|
144
169
|
|
145
170
|
def get_example_projects(self, version_number='6.5'):
|
146
171
|
"""
|
@@ -155,9 +180,11 @@ class RasExamples:
|
|
155
180
|
Raises:
|
156
181
|
ValueError: If an invalid version number is provided.
|
157
182
|
"""
|
158
|
-
|
183
|
+
logging.info(f"Getting example projects for version {version_number}")
|
159
184
|
if version_number not in self.valid_versions:
|
160
|
-
|
185
|
+
error_msg = f"Invalid version number. Valid versions are: {', '.join(self.valid_versions)}"
|
186
|
+
logging.error(error_msg)
|
187
|
+
raise ValueError(error_msg)
|
161
188
|
|
162
189
|
zip_url = f"{self.base_url}1.0.31/Example_Projects_{version_number.replace('.', '_')}.zip"
|
163
190
|
|
@@ -166,13 +193,18 @@ class RasExamples:
|
|
166
193
|
self.zip_file_path = self.examples_dir / f"Example_Projects_{version_number.replace('.', '_')}.zip"
|
167
194
|
|
168
195
|
if not self.zip_file_path.exists():
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
196
|
+
logging.info(f"Downloading HEC-RAS Example Projects from {zip_url}. \nThe file is over 400 MB, so it may take a few minutes to download....")
|
197
|
+
try:
|
198
|
+
response = requests.get(zip_url, stream=True)
|
199
|
+
response.raise_for_status()
|
200
|
+
with open(self.zip_file_path, 'wb') as file:
|
201
|
+
shutil.copyfileobj(response.raw, file)
|
202
|
+
logging.info(f"Downloaded to {self.zip_file_path}")
|
203
|
+
except requests.exceptions.RequestException as e:
|
204
|
+
logging.error(f"Failed to download the zip file: {e}")
|
205
|
+
raise
|
174
206
|
else:
|
175
|
-
|
207
|
+
logging.info("HEC-RAS Example Projects zip file already exists. Skipping download.")
|
176
208
|
|
177
209
|
self._load_project_data()
|
178
210
|
return self.projects_dir
|
@@ -185,10 +217,10 @@ class RasExamples:
|
|
185
217
|
list: Available categories.
|
186
218
|
"""
|
187
219
|
if self.folder_df is None or 'Category' not in self.folder_df.columns:
|
188
|
-
|
220
|
+
logging.warning("No categories available. Make sure the zip file is properly loaded.")
|
189
221
|
return []
|
190
222
|
categories = self.folder_df['Category'].unique()
|
191
|
-
|
223
|
+
logging.info(f"Available categories: {', '.join(categories)}")
|
192
224
|
return categories.tolist()
|
193
225
|
|
194
226
|
def list_projects(self, category=None):
|
@@ -202,12 +234,14 @@ class RasExamples:
|
|
202
234
|
list: List of project names.
|
203
235
|
"""
|
204
236
|
if self.folder_df is None:
|
205
|
-
|
237
|
+
logging.warning("No projects available. Make sure the zip file is properly loaded.")
|
206
238
|
return []
|
207
239
|
if category:
|
208
240
|
projects = self.folder_df[self.folder_df['Category'] == category]['Project'].unique()
|
241
|
+
logging.info(f"Projects in category '{category}': {', '.join(projects)}")
|
209
242
|
else:
|
210
243
|
projects = self.folder_df['Project'].unique()
|
244
|
+
logging.info(f"All available projects: {', '.join(projects)}")
|
211
245
|
return projects.tolist()
|
212
246
|
|
213
247
|
def extract_project(self, project_names: Union[str, List[str]]):
|
@@ -229,21 +263,29 @@ class RasExamples:
|
|
229
263
|
extracted_paths = []
|
230
264
|
|
231
265
|
for project_name in project_names:
|
232
|
-
|
233
|
-
|
266
|
+
logging.info("----- RasExamples Extracting Project -----")
|
267
|
+
logging.info(f"Extracting project '{project_name}'")
|
234
268
|
project_path = self.projects_dir / project_name
|
235
269
|
|
236
270
|
if project_path.exists():
|
237
|
-
|
238
|
-
|
239
|
-
|
271
|
+
logging.info(f"Project '{project_name}' already exists. Deleting existing folder...")
|
272
|
+
try:
|
273
|
+
shutil.rmtree(project_path)
|
274
|
+
logging.info(f"Existing folder for project '{project_name}' has been deleted.")
|
275
|
+
except Exception as e:
|
276
|
+
logging.error(f"Failed to delete existing project folder '{project_name}': {e}")
|
277
|
+
continue
|
240
278
|
|
241
279
|
if self.folder_df is None or self.folder_df.empty:
|
242
|
-
|
280
|
+
error_msg = "No project information available. Make sure the zip file is properly loaded."
|
281
|
+
logging.error(error_msg)
|
282
|
+
raise ValueError(error_msg)
|
243
283
|
|
244
284
|
project_info = self.folder_df[self.folder_df['Project'] == project_name]
|
245
285
|
if project_info.empty:
|
246
|
-
|
286
|
+
error_msg = f"Project '{project_name}' not found in the zip file."
|
287
|
+
logging.error(error_msg)
|
288
|
+
raise ValueError(error_msg)
|
247
289
|
|
248
290
|
category = project_info['Category'].iloc[0]
|
249
291
|
|
@@ -265,15 +307,15 @@ class RasExamples:
|
|
265
307
|
with zip_ref.open(file) as source, open(extract_path, "wb") as target:
|
266
308
|
shutil.copyfileobj(source, target)
|
267
309
|
|
268
|
-
|
310
|
+
logging.info(f"Successfully extracted project '{project_name}' to {project_path}")
|
269
311
|
extracted_paths.append(project_path)
|
270
312
|
except zipfile.BadZipFile:
|
271
|
-
|
313
|
+
logging.error(f"Error: The file {self.zip_file_path} is not a valid zip file.")
|
272
314
|
except FileNotFoundError:
|
273
|
-
|
315
|
+
logging.error(f"Error: The file {self.zip_file_path} was not found.")
|
274
316
|
except Exception as e:
|
275
|
-
|
276
|
-
|
317
|
+
logging.error(f"An unexpected error occurred while extracting the project: {str(e)}")
|
318
|
+
logging.info("----- RasExamples Extraction Complete -----")
|
277
319
|
return extracted_paths
|
278
320
|
|
279
321
|
def is_project_extracted(self, project_name):
|
@@ -287,18 +329,26 @@ class RasExamples:
|
|
287
329
|
bool: True if the project is extracted, False otherwise.
|
288
330
|
"""
|
289
331
|
project_path = self.projects_dir / project_name
|
290
|
-
|
332
|
+
is_extracted = project_path.exists()
|
333
|
+
logging.info(f"Project '{project_name}' extracted: {is_extracted}")
|
334
|
+
return is_extracted
|
291
335
|
|
292
336
|
def clean_projects_directory(self):
|
293
337
|
"""Remove all extracted projects from the example_projects directory."""
|
294
|
-
|
338
|
+
logging.info(f"Cleaning projects directory: {self.projects_dir}")
|
295
339
|
if self.projects_dir.exists():
|
296
|
-
|
340
|
+
try:
|
341
|
+
shutil.rmtree(self.projects_dir)
|
342
|
+
logging.info("All projects have been removed.")
|
343
|
+
except Exception as e:
|
344
|
+
logging.error(f"Failed to remove projects directory: {e}")
|
345
|
+
else:
|
346
|
+
logging.warning("Projects directory does not exist.")
|
297
347
|
self.projects_dir.mkdir(parents=True, exist_ok=True)
|
298
|
-
|
348
|
+
logging.info("Projects directory cleaned and recreated.")
|
299
349
|
|
300
350
|
# Example usage:
|
301
351
|
# ras_examples = RasExamples()
|
302
352
|
# extracted_paths = ras_examples.extract_project(["Bald Eagle Creek", "BaldEagleCrkMulti2D", "Muncie"])
|
303
353
|
# for path in extracted_paths:
|
304
|
-
#
|
354
|
+
# logging.info(f"Extracted to: {path}")
|
ras_commander/RasGeo.py
CHANGED
@@ -5,8 +5,18 @@ from pathlib import Path
|
|
5
5
|
from typing import List, Union
|
6
6
|
from .RasPlan import RasPlan
|
7
7
|
from .RasPrj import ras
|
8
|
+
import logging
|
8
9
|
import re
|
9
10
|
|
11
|
+
# Configure logging at the module level
|
12
|
+
logging.basicConfig(
|
13
|
+
level=logging.INFO,
|
14
|
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
15
|
+
# You can add a filename parameter here to log to a file
|
16
|
+
# filename='rasgeo.log',
|
17
|
+
# Uncomment the above line to enable file logging
|
18
|
+
)
|
19
|
+
|
10
20
|
class RasGeo:
|
11
21
|
"""
|
12
22
|
A class for operations on HEC-RAS geometry files.
|
@@ -62,27 +72,38 @@ class RasGeo:
|
|
62
72
|
geom_preprocessor_file = plan_path.with_suffix(geom_preprocessor_suffix)
|
63
73
|
if geom_preprocessor_file.exists():
|
64
74
|
try:
|
65
|
-
|
75
|
+
logging.info(f"Deleting geometry preprocessor file: {geom_preprocessor_file}")
|
66
76
|
geom_preprocessor_file.unlink()
|
67
|
-
|
77
|
+
logging.info("File deletion completed successfully.")
|
68
78
|
except PermissionError:
|
79
|
+
logging.error(f"Permission denied: Unable to delete geometry preprocessor file: {geom_preprocessor_file}.")
|
69
80
|
raise PermissionError(f"Unable to delete geometry preprocessor file: {geom_preprocessor_file}. Permission denied.")
|
70
81
|
except OSError as e:
|
82
|
+
logging.error(f"Error deleting geometry preprocessor file: {geom_preprocessor_file}. {str(e)}")
|
71
83
|
raise OSError(f"Error deleting geometry preprocessor file: {geom_preprocessor_file}. {str(e)}")
|
72
84
|
else:
|
73
|
-
|
85
|
+
logging.warning(f"No geometry preprocessor file found for: {plan_file}")
|
74
86
|
|
75
87
|
if plan_files is None:
|
76
|
-
|
88
|
+
logging.info("Clearing all geometry preprocessor files in the project directory.")
|
77
89
|
plan_files_to_clear = list(ras_obj.project_folder.glob(r'*.p*'))
|
78
90
|
elif isinstance(plan_files, (str, Path)):
|
79
91
|
plan_files_to_clear = [plan_files]
|
92
|
+
logging.info(f"Clearing geometry preprocessor file for single plan: {plan_files}")
|
80
93
|
elif isinstance(plan_files, list):
|
81
94
|
plan_files_to_clear = plan_files
|
95
|
+
logging.info(f"Clearing geometry preprocessor files for multiple plans: {plan_files}")
|
82
96
|
else:
|
97
|
+
logging.error("Invalid input type for plan_files.")
|
83
98
|
raise ValueError("Invalid input. Please provide a string, Path, list of paths, or None.")
|
84
99
|
|
85
100
|
for plan_file in plan_files_to_clear:
|
86
101
|
clear_single_file(plan_file, ras_obj)
|
87
|
-
|
88
|
-
|
102
|
+
|
103
|
+
# Update the geometry dataframe
|
104
|
+
try:
|
105
|
+
ras_obj.geom_df = ras_obj.get_geom_entries()
|
106
|
+
logging.info("Geometry dataframe updated successfully.")
|
107
|
+
except Exception as e:
|
108
|
+
logging.error(f"Failed to update geometry dataframe: {str(e)}")
|
109
|
+
raise
|
ras_commander/RasHdf.py
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
|
2
|
+
|
3
|
+
import h5py
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
from typing import Union, List, Optional, Dict, Callable
|
7
|
+
from scipy.spatial import KDTree
|
8
|
+
|
9
|
+
class RasHdf:
|
10
|
+
"""
|
11
|
+
A class containing utility functions for working with HDF files in the ras-commander library.
|
12
|
+
"""
|
13
|
+
|
14
|
+
@staticmethod
|
15
|
+
def read_hdf_to_dataframe(hdf_dataset: h5py.Dataset, fill_value: Union[int, float, str] = -9999) -> pd.DataFrame:
|
16
|
+
"""
|
17
|
+
Reads an HDF5 table using h5py and converts it into a pandas DataFrame, handling byte strings and missing values.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
hdf_dataset (h5py.Dataset): The HDF5 table to read.
|
21
|
+
fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
|
22
|
+
|
23
|
+
Returns:
|
24
|
+
pd.DataFrame: The resulting DataFrame with byte strings decoded and missing values replaced.
|
25
|
+
|
26
|
+
Example:
|
27
|
+
>>> with h5py.File('data.h5', 'r') as f:
|
28
|
+
... dataset = f['my_dataset']
|
29
|
+
... df = RasHdf.read_hdf_to_dataframe(dataset)
|
30
|
+
>>> print(df.head())
|
31
|
+
"""
|
32
|
+
df = RasHdf.convert_to_dataframe_array(hdf_dataset)
|
33
|
+
byte_cols = [col for col in df.columns if isinstance(df[col].iloc[0], (bytes, bytearray))]
|
34
|
+
test_byte_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], (bytes, bytearray))]
|
35
|
+
assert byte_cols == test_byte_cols, "Inconsistent byte string columns detected"
|
36
|
+
|
37
|
+
try:
|
38
|
+
df[byte_cols] = df[byte_cols].applymap(lambda x: x.decode('utf-8'))
|
39
|
+
except Exception as e:
|
40
|
+
print(f'WARNING: {e} while decoding byte strings in {hdf_dataset.name}, resuming')
|
41
|
+
|
42
|
+
df = df.replace({fill_value: np.NaN})
|
43
|
+
return df
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def save_dataframe_to_hdf(dataframe: pd.DataFrame,
|
47
|
+
hdf_parent_group: h5py.Group,
|
48
|
+
dataset_name: str,
|
49
|
+
attributes: Optional[Dict[str, Union[int, float, str]]] = None,
|
50
|
+
fill_value: Union[int, float, str] = -9999,
|
51
|
+
**kwargs: Union[int, float, str]) -> h5py.Dataset:
|
52
|
+
"""
|
53
|
+
Saves a pandas DataFrame to an HDF5 dataset within a specified parent group.
|
54
|
+
|
55
|
+
This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
|
56
|
+
|
57
|
+
Args:
|
58
|
+
dataframe (pd.DataFrame): The DataFrame to save.
|
59
|
+
hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
|
60
|
+
dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
|
61
|
+
attributes (Optional[Dict[str, Union[int, float, str]]], optional): A dictionary of attributes to add to the dataset. Defaults to None.
|
62
|
+
fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
|
63
|
+
**kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
|
64
|
+
|
65
|
+
Returns:
|
66
|
+
h5py.Dataset: The created HDF5 dataset within the parent group.
|
67
|
+
|
68
|
+
Example:
|
69
|
+
>>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
|
70
|
+
>>> with h5py.File('data.h5', 'w') as f:
|
71
|
+
... group = f.create_group('my_group')
|
72
|
+
... dataset = RasHdf.save_dataframe_to_hdf(df, group, 'my_dataset')
|
73
|
+
>>> print(dataset)
|
74
|
+
"""
|
75
|
+
df = dataframe.copy()
|
76
|
+
|
77
|
+
if df.columns.dtype == 'O':
|
78
|
+
df.columns = df.columns.str.replace('/', '-')
|
79
|
+
|
80
|
+
df = df.fillna(fill_value)
|
81
|
+
|
82
|
+
string_cols = [col for col in df.columns if isinstance(df[col].iloc[0], str)]
|
83
|
+
test_string_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], str)]
|
84
|
+
assert string_cols == test_string_cols, "Inconsistent string columns detected"
|
85
|
+
|
86
|
+
df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
|
87
|
+
|
88
|
+
if isinstance(df.columns, pd.RangeIndex):
|
89
|
+
arr = df.values
|
90
|
+
else:
|
91
|
+
arr_dt = [(col, df[col].dtype) for col in df.columns]
|
92
|
+
arr = np.empty((len(df),), dtype=arr_dt)
|
93
|
+
for col in df.columns:
|
94
|
+
arr[col] = df[col].values
|
95
|
+
|
96
|
+
if dataset_name in hdf_parent_group:
|
97
|
+
del hdf_parent_group[dataset_name]
|
98
|
+
|
99
|
+
dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
|
100
|
+
|
101
|
+
if attributes:
|
102
|
+
dataset.attrs.update(attributes)
|
103
|
+
|
104
|
+
return dataset
|
105
|
+
|
106
|
+
@staticmethod
|
107
|
+
def perform_kdtree_query(reference_points: np.ndarray, query_points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
|
108
|
+
"""
|
109
|
+
Performs a KDTree query between two datasets and returns indices with distances exceeding max_distance set to -1.
|
110
|
+
|
111
|
+
Args:
|
112
|
+
reference_points (np.ndarray): The reference dataset for KDTree.
|
113
|
+
query_points (np.ndarray): The query dataset to search against KDTree of reference_points.
|
114
|
+
max_distance (float, optional): The maximum distance threshold. Indices with distances greater than this are set to -1. Defaults to 2.0.
|
115
|
+
|
116
|
+
Returns:
|
117
|
+
np.ndarray: Array of indices from reference_points that are nearest to each point in query_points.
|
118
|
+
Indices with distances > max_distance are set to -1.
|
119
|
+
|
120
|
+
Example:
|
121
|
+
>>> ref_points = np.array([[0, 0], [1, 1], [2, 2]])
|
122
|
+
>>> query_points = np.array([[0.5, 0.5], [3, 3]])
|
123
|
+
>>> RasHdf.perform_kdtree_query(ref_points, query_points)
|
124
|
+
array([ 0, -1])
|
125
|
+
"""
|
126
|
+
dist, snap = KDTree(reference_points).query(query_points, distance_upper_bound=max_distance)
|
127
|
+
snap[dist > max_distance] = -1
|
128
|
+
return snap
|
129
|
+
|
130
|
+
@staticmethod
|
131
|
+
def find_nearest_neighbors(points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
|
132
|
+
"""
|
133
|
+
Creates a self KDTree for dataset points and finds nearest neighbors excluding self,
|
134
|
+
with distances above max_distance set to -1.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
points (np.ndarray): The dataset to build the KDTree from and query against itself.
|
138
|
+
max_distance (float, optional): The maximum distance threshold. Indices with distances
|
139
|
+
greater than max_distance are set to -1. Defaults to 2.0.
|
140
|
+
|
141
|
+
Returns:
|
142
|
+
np.ndarray: Array of indices representing the nearest neighbor in points for each point in points.
|
143
|
+
Indices with distances > max_distance or self-matches are set to -1.
|
144
|
+
|
145
|
+
Example:
|
146
|
+
>>> points = np.array([[0, 0], [1, 1], [2, 2], [10, 10]])
|
147
|
+
>>> RasHdf.find_nearest_neighbors(points)
|
148
|
+
array([1, 0, 1, -1])
|
149
|
+
"""
|
150
|
+
dist, snap = KDTree(points).query(points, k=2, distance_upper_bound=max_distance)
|
151
|
+
snap[dist > max_distance] = -1
|
152
|
+
|
153
|
+
snp = pd.DataFrame(snap, index=np.arange(len(snap)))
|
154
|
+
snp = snp.replace(-1, np.nan)
|
155
|
+
snp.loc[snp[0] == snp.index, 0] = np.nan
|
156
|
+
snp.loc[snp[1] == snp.index, 1] = np.nan
|
157
|
+
filled = snp[0].fillna(snp[1])
|
158
|
+
snapped = filled.fillna(-1).astype(np.int64).to_numpy()
|
159
|
+
return snapped
|
160
|
+
|
161
|
+
@staticmethod
|
162
|
+
def consolidate_dataframe(dataframe: pd.DataFrame,
|
163
|
+
group_by: Optional[Union[str, List[str]]] = None,
|
164
|
+
pivot_columns: Optional[Union[str, List[str]]] = None,
|
165
|
+
level: Optional[int] = None,
|
166
|
+
n_dimensional: bool = False,
|
167
|
+
aggregation_method: Union[str, Callable] = 'list') -> pd.DataFrame:
|
168
|
+
"""
|
169
|
+
Consolidate rows in a DataFrame by merging duplicate values into lists or using a specified aggregation function.
|
170
|
+
|
171
|
+
Args:
|
172
|
+
dataframe (pd.DataFrame): The DataFrame to consolidate.
|
173
|
+
group_by (Optional[Union[str, List[str]]], optional): Columns or indices to group by. Defaults to None.
|
174
|
+
pivot_columns (Optional[Union[str, List[str]]], optional): Columns to pivot. Defaults to None.
|
175
|
+
level (Optional[int], optional): Level of multi-index to group by. Defaults to None.
|
176
|
+
n_dimensional (bool, optional): If True, use a pivot table for N-Dimensional consolidation. Defaults to False.
|
177
|
+
aggregation_method (Union[str, Callable], optional): Aggregation method, e.g., 'list' to aggregate into lists. Defaults to 'list'.
|
178
|
+
|
179
|
+
Returns:
|
180
|
+
pd.DataFrame: The consolidated DataFrame.
|
181
|
+
|
182
|
+
Example:
|
183
|
+
>>> df = pd.DataFrame({'A': [1, 1, 2], 'B': [4, 5, 6], 'C': [7, 8, 9]})
|
184
|
+
>>> RasHdf.consolidate_dataframe(df, group_by='A')
|
185
|
+
B C
|
186
|
+
A
|
187
|
+
1 [4, 5] [7, 8]
|
188
|
+
2 [6] [9]
|
189
|
+
"""
|
190
|
+
if aggregation_method == 'list':
|
191
|
+
agg_func = lambda x: tuple(x)
|
192
|
+
else:
|
193
|
+
agg_func = aggregation_method
|
194
|
+
|
195
|
+
if n_dimensional:
|
196
|
+
result = dataframe.pivot_table(group_by, pivot_columns, aggfunc=agg_func)
|
197
|
+
else:
|
198
|
+
result = dataframe.groupby(group_by, level=level).agg(agg_func).applymap(list)
|
199
|
+
|
200
|
+
return result
|
201
|
+
|
202
|
+
@staticmethod
|
203
|
+
def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
|
204
|
+
"""
|
205
|
+
Decodes byte strings in a DataFrame to regular string objects.
|
206
|
+
|
207
|
+
This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
|
208
|
+
|
209
|
+
Args:
|
210
|
+
dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
|
211
|
+
|
212
|
+
Returns:
|
213
|
+
pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
|
214
|
+
|
215
|
+
Example:
|
216
|
+
>>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
|
217
|
+
>>> RasHdf.decode_byte_strings(df)
|
218
|
+
A B
|
219
|
+
0 hello 1
|
220
|
+
1 world 2
|
221
|
+
"""
|
222
|
+
str_df = dataframe.select_dtypes(['object'])
|
223
|
+
str_df = str_df.stack().str.decode('utf-8').unstack()
|
224
|
+
for col in str_df:
|
225
|
+
dataframe[col] = str_df[col]
|
226
|
+
return dataframe
|
227
|
+
|
228
|
+
@staticmethod
|
229
|
+
def find_nearest_value(array: Union[list, np.ndarray], target_value: Union[int, float]) -> Union[int, float]:
|
230
|
+
"""
|
231
|
+
Finds the nearest value in a NumPy array to the specified target value.
|
232
|
+
|
233
|
+
Args:
|
234
|
+
array (Union[list, np.ndarray]): The array to search within.
|
235
|
+
target_value (Union[int, float]): The value to find the nearest neighbor to.
|
236
|
+
|
237
|
+
Returns:
|
238
|
+
Union[int, float]: The nearest value in the array to the specified target value.
|
239
|
+
|
240
|
+
Example:
|
241
|
+
>>> arr = np.array([1, 3, 5, 7, 9])
|
242
|
+
>>> RasHdf.find_nearest_value(arr, 6)
|
243
|
+
5
|
244
|
+
"""
|
245
|
+
array = np.asarray(array)
|
246
|
+
idx = (np.abs(array - target_value)).argmin()
|
247
|
+
return array[idx]
|
248
|
+
|