ras-commander 0.33.0__py3-none-any.whl → 0.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,6 +7,16 @@ import shutil
7
7
  from typing import Union, List
8
8
  import csv
9
9
  from datetime import datetime
10
+ import logging
11
+
12
+ # Configure logging
13
+ logging.basicConfig(
14
+ level=logging.INFO, # Set the logging level to INFO
15
+ format='%(asctime)s - %(levelname)s - %(message)s', # Log message format
16
+ handlers=[
17
+ logging.StreamHandler() # Log to stderr
18
+ ]
19
+ )
10
20
 
11
21
  class RasExamples:
12
22
  """
@@ -38,14 +48,12 @@ class RasExamples:
38
48
  folder_df (pd.DataFrame): DataFrame containing folder structure information.
39
49
  csv_file_path (Path): Path to the CSV file for caching project metadata.
40
50
 
41
-
42
51
  Future Improvements:
43
52
  - Implement the ability for user-provided example projects (provided as a zip file) for their own repeatable examples.
44
53
  - If the zip file is in the same folder structure as the HEC-RAS example projects, simple replace Example_Projects_6_5.zip and the folder structure will be automatically extracted from the zip file.
45
54
  - The actual RAS example projects haven't been updated much, but there is the structure here to handle future versions. Although this version of the code is probably fine for a few years, until HEC-RAS 2025 comes out.
46
-
47
55
  """
48
-
56
+
49
57
  def __init__(self):
50
58
  """
51
59
  Initialize the RasExamples class.
@@ -54,7 +62,7 @@ class RasExamples:
54
62
  It initializes the base URL for downloads, valid versions, directory paths, and other essential
55
63
  attributes. It also creates the projects directory if it doesn't exist and loads the project data.
56
64
 
57
- The method also prints the location of the example projects folder and calls _load_project_data()
65
+ The method also logs the location of the example projects folder and calls _load_project_data()
58
66
  to initialize the project data.
59
67
  """
60
68
  self.base_url = 'https://github.com/HydrologicEngineeringCenter/hec-downloads/releases/download/'
@@ -71,7 +79,7 @@ class RasExamples:
71
79
  self.csv_file_path = self.examples_dir / 'example_projects.csv'
72
80
 
73
81
  self.projects_dir.mkdir(parents=True, exist_ok=True)
74
- print(f"Example projects folder: {self.projects_dir}")
82
+ logging.info(f"Example projects folder: {self.projects_dir}")
75
83
  self._load_project_data()
76
84
 
77
85
  def _load_project_data(self):
@@ -84,21 +92,29 @@ class RasExamples:
84
92
  self._find_zip_file()
85
93
 
86
94
  if not self.zip_file_path:
87
- print("No example projects zip file found. Downloading...")
95
+ logging.info("No example projects zip file found. Downloading...")
88
96
  self.get_example_projects()
89
97
 
90
- zip_modified_time = os.path.getmtime(self.zip_file_path)
98
+ try:
99
+ zip_modified_time = os.path.getmtime(self.zip_file_path)
100
+ except FileNotFoundError:
101
+ logging.error(f"Zip file not found at {self.zip_file_path}.")
102
+ return
91
103
 
92
104
  if self.csv_file_path.exists():
93
105
  csv_modified_time = os.path.getmtime(self.csv_file_path)
94
106
 
95
107
  if csv_modified_time >= zip_modified_time:
96
- print("Loading project data from CSV...")
97
- self.folder_df = pd.read_csv(self.csv_file_path)
98
- print(f"Loaded {len(self.folder_df)} projects from CSV, use list_categories() and list_projects() to explore them")
108
+ logging.info("Loading project data from CSV...")
109
+ try:
110
+ self.folder_df = pd.read_csv(self.csv_file_path)
111
+ logging.info(f"Loaded {len(self.folder_df)} projects from CSV. Use list_categories() and list_projects() to explore them.")
112
+ except Exception as e:
113
+ logging.error(f"Failed to read CSV file: {e}")
114
+ self.folder_df = None
99
115
  return
100
116
 
101
- print("Extracting folder structure from zip file...")
117
+ logging.info("Extracting folder structure from zip file...")
102
118
  self._extract_folder_structure()
103
119
  self._save_to_csv()
104
120
 
@@ -108,8 +124,10 @@ class RasExamples:
108
124
  potential_zip = self.examples_dir / f"Example_Projects_{version.replace('.', '_')}.zip"
109
125
  if potential_zip.exists():
110
126
  self.zip_file_path = potential_zip
111
- print(f"Found zip file: {self.zip_file_path}")
127
+ logging.info(f"Found zip file: {self.zip_file_path}")
112
128
  break
129
+ else:
130
+ logging.warning("No existing example projects zip file found.")
113
131
 
114
132
  def _extract_folder_structure(self):
115
133
  """
@@ -129,18 +147,25 @@ class RasExamples:
129
147
  })
130
148
 
131
149
  self.folder_df = pd.DataFrame(folder_data).drop_duplicates()
132
- print(f"Extracted {len(self.folder_df)} projects")
133
- print("folder_df:")
134
- display(self.folder_df)
150
+ logging.info(f"Extracted {len(self.folder_df)} projects.")
151
+ logging.debug(f"folder_df:\n{self.folder_df}")
152
+ except zipfile.BadZipFile:
153
+ logging.error(f"The file {self.zip_file_path} is not a valid zip file.")
154
+ self.folder_df = pd.DataFrame(columns=['Category', 'Project'])
135
155
  except Exception as e:
136
- print(f"An error occurred while extracting the folder structure: {str(e)}")
156
+ logging.error(f"An error occurred while extracting the folder structure: {str(e)}")
137
157
  self.folder_df = pd.DataFrame(columns=['Category', 'Project'])
138
158
 
139
159
  def _save_to_csv(self):
140
160
  """Save the extracted folder structure to CSV file."""
141
161
  if self.folder_df is not None and not self.folder_df.empty:
142
- self.folder_df.to_csv(self.csv_file_path, index=False)
143
- print(f"Saved project data to {self.csv_file_path}")
162
+ try:
163
+ self.folder_df.to_csv(self.csv_file_path, index=False)
164
+ logging.info(f"Saved project data to {self.csv_file_path}")
165
+ except Exception as e:
166
+ logging.error(f"Failed to save project data to CSV: {e}")
167
+ else:
168
+ logging.warning("No folder data to save to CSV.")
144
169
 
145
170
  def get_example_projects(self, version_number='6.5'):
146
171
  """
@@ -155,9 +180,11 @@ class RasExamples:
155
180
  Raises:
156
181
  ValueError: If an invalid version number is provided.
157
182
  """
158
- print(f"Getting example projects for version {version_number}")
183
+ logging.info(f"Getting example projects for version {version_number}")
159
184
  if version_number not in self.valid_versions:
160
- raise ValueError(f"Invalid version number. Valid versions are: {', '.join(self.valid_versions)}")
185
+ error_msg = f"Invalid version number. Valid versions are: {', '.join(self.valid_versions)}"
186
+ logging.error(error_msg)
187
+ raise ValueError(error_msg)
161
188
 
162
189
  zip_url = f"{self.base_url}1.0.31/Example_Projects_{version_number.replace('.', '_')}.zip"
163
190
 
@@ -166,13 +193,18 @@ class RasExamples:
166
193
  self.zip_file_path = self.examples_dir / f"Example_Projects_{version_number.replace('.', '_')}.zip"
167
194
 
168
195
  if not self.zip_file_path.exists():
169
- print(f"Downloading HEC-RAS Example Projects from {zip_url}. \n The file is over 400 MB, so it may take a few minutes to download....")
170
- response = requests.get(zip_url)
171
- with open(self.zip_file_path, 'wb') as file:
172
- file.write(response.content)
173
- print(f"Downloaded to {self.zip_file_path}")
196
+ logging.info(f"Downloading HEC-RAS Example Projects from {zip_url}. \nThe file is over 400 MB, so it may take a few minutes to download....")
197
+ try:
198
+ response = requests.get(zip_url, stream=True)
199
+ response.raise_for_status()
200
+ with open(self.zip_file_path, 'wb') as file:
201
+ shutil.copyfileobj(response.raw, file)
202
+ logging.info(f"Downloaded to {self.zip_file_path}")
203
+ except requests.exceptions.RequestException as e:
204
+ logging.error(f"Failed to download the zip file: {e}")
205
+ raise
174
206
  else:
175
- print("HEC-RAS Example Projects zip file already exists. Skipping download.")
207
+ logging.info("HEC-RAS Example Projects zip file already exists. Skipping download.")
176
208
 
177
209
  self._load_project_data()
178
210
  return self.projects_dir
@@ -185,10 +217,10 @@ class RasExamples:
185
217
  list: Available categories.
186
218
  """
187
219
  if self.folder_df is None or 'Category' not in self.folder_df.columns:
188
- print("No categories available. Make sure the zip file is properly loaded.")
220
+ logging.warning("No categories available. Make sure the zip file is properly loaded.")
189
221
  return []
190
222
  categories = self.folder_df['Category'].unique()
191
- print(f"Available categories: {', '.join(categories)}")
223
+ logging.info(f"Available categories: {', '.join(categories)}")
192
224
  return categories.tolist()
193
225
 
194
226
  def list_projects(self, category=None):
@@ -202,12 +234,14 @@ class RasExamples:
202
234
  list: List of project names.
203
235
  """
204
236
  if self.folder_df is None:
205
- print("No projects available. Make sure the zip file is properly loaded.")
237
+ logging.warning("No projects available. Make sure the zip file is properly loaded.")
206
238
  return []
207
239
  if category:
208
240
  projects = self.folder_df[self.folder_df['Category'] == category]['Project'].unique()
241
+ logging.info(f"Projects in category '{category}': {', '.join(projects)}")
209
242
  else:
210
243
  projects = self.folder_df['Project'].unique()
244
+ logging.info(f"All available projects: {', '.join(projects)}")
211
245
  return projects.tolist()
212
246
 
213
247
  def extract_project(self, project_names: Union[str, List[str]]):
@@ -229,21 +263,29 @@ class RasExamples:
229
263
  extracted_paths = []
230
264
 
231
265
  for project_name in project_names:
232
- print("----- RasExamples Extracting Project -----")
233
- print(f"Extracting project '{project_name}'")
266
+ logging.info("----- RasExamples Extracting Project -----")
267
+ logging.info(f"Extracting project '{project_name}'")
234
268
  project_path = self.projects_dir / project_name
235
269
 
236
270
  if project_path.exists():
237
- print(f"Project '{project_name}' already exists. Deleting existing folder...")
238
- shutil.rmtree(project_path)
239
- print(f"Existing folder for project '{project_name}' has been deleted.")
271
+ logging.info(f"Project '{project_name}' already exists. Deleting existing folder...")
272
+ try:
273
+ shutil.rmtree(project_path)
274
+ logging.info(f"Existing folder for project '{project_name}' has been deleted.")
275
+ except Exception as e:
276
+ logging.error(f"Failed to delete existing project folder '{project_name}': {e}")
277
+ continue
240
278
 
241
279
  if self.folder_df is None or self.folder_df.empty:
242
- raise ValueError("No project information available. Make sure the zip file is properly loaded.")
280
+ error_msg = "No project information available. Make sure the zip file is properly loaded."
281
+ logging.error(error_msg)
282
+ raise ValueError(error_msg)
243
283
 
244
284
  project_info = self.folder_df[self.folder_df['Project'] == project_name]
245
285
  if project_info.empty:
246
- raise ValueError(f"Project '{project_name}' not found in the zip file.")
286
+ error_msg = f"Project '{project_name}' not found in the zip file."
287
+ logging.error(error_msg)
288
+ raise ValueError(error_msg)
247
289
 
248
290
  category = project_info['Category'].iloc[0]
249
291
 
@@ -265,15 +307,15 @@ class RasExamples:
265
307
  with zip_ref.open(file) as source, open(extract_path, "wb") as target:
266
308
  shutil.copyfileobj(source, target)
267
309
 
268
- print(f"Successfully extracted project '{project_name}' to {project_path}")
310
+ logging.info(f"Successfully extracted project '{project_name}' to {project_path}")
269
311
  extracted_paths.append(project_path)
270
312
  except zipfile.BadZipFile:
271
- print(f"Error: The file {self.zip_file_path} is not a valid zip file.")
313
+ logging.error(f"Error: The file {self.zip_file_path} is not a valid zip file.")
272
314
  except FileNotFoundError:
273
- print(f"Error: The file {self.zip_file_path} was not found.")
315
+ logging.error(f"Error: The file {self.zip_file_path} was not found.")
274
316
  except Exception as e:
275
- print(f"An unexpected error occurred while extracting the project: {str(e)}")
276
- #print("----- RasExamples Extraction Complete -----")
317
+ logging.error(f"An unexpected error occurred while extracting the project: {str(e)}")
318
+ logging.info("----- RasExamples Extraction Complete -----")
277
319
  return extracted_paths
278
320
 
279
321
  def is_project_extracted(self, project_name):
@@ -287,18 +329,26 @@ class RasExamples:
287
329
  bool: True if the project is extracted, False otherwise.
288
330
  """
289
331
  project_path = self.projects_dir / project_name
290
- return project_path.exists()
332
+ is_extracted = project_path.exists()
333
+ logging.info(f"Project '{project_name}' extracted: {is_extracted}")
334
+ return is_extracted
291
335
 
292
336
  def clean_projects_directory(self):
293
337
  """Remove all extracted projects from the example_projects directory."""
294
- print(f"Cleaning projects directory: {self.projects_dir}")
338
+ logging.info(f"Cleaning projects directory: {self.projects_dir}")
295
339
  if self.projects_dir.exists():
296
- shutil.rmtree(self.projects_dir)
340
+ try:
341
+ shutil.rmtree(self.projects_dir)
342
+ logging.info("All projects have been removed.")
343
+ except Exception as e:
344
+ logging.error(f"Failed to remove projects directory: {e}")
345
+ else:
346
+ logging.warning("Projects directory does not exist.")
297
347
  self.projects_dir.mkdir(parents=True, exist_ok=True)
298
- print("Projects directory cleaned.")
348
+ logging.info("Projects directory cleaned and recreated.")
299
349
 
300
350
  # Example usage:
301
351
  # ras_examples = RasExamples()
302
352
  # extracted_paths = ras_examples.extract_project(["Bald Eagle Creek", "BaldEagleCrkMulti2D", "Muncie"])
303
353
  # for path in extracted_paths:
304
- # print(f"Extracted to: {path}")
354
+ # logging.info(f"Extracted to: {path}")
ras_commander/RasGeo.py CHANGED
@@ -5,8 +5,18 @@ from pathlib import Path
5
5
  from typing import List, Union
6
6
  from .RasPlan import RasPlan
7
7
  from .RasPrj import ras
8
+ import logging
8
9
  import re
9
10
 
11
+ # Configure logging at the module level
12
+ logging.basicConfig(
13
+ level=logging.INFO,
14
+ format='%(asctime)s - %(levelname)s - %(message)s',
15
+ # You can add a filename parameter here to log to a file
16
+ # filename='rasgeo.log',
17
+ # Uncomment the above line to enable file logging
18
+ )
19
+
10
20
  class RasGeo:
11
21
  """
12
22
  A class for operations on HEC-RAS geometry files.
@@ -62,27 +72,38 @@ class RasGeo:
62
72
  geom_preprocessor_file = plan_path.with_suffix(geom_preprocessor_suffix)
63
73
  if geom_preprocessor_file.exists():
64
74
  try:
65
- print(f"Deleting geometry preprocessor file: {geom_preprocessor_file}")
75
+ logging.info(f"Deleting geometry preprocessor file: {geom_preprocessor_file}")
66
76
  geom_preprocessor_file.unlink()
67
- print("File deletion completed successfully.")
77
+ logging.info("File deletion completed successfully.")
68
78
  except PermissionError:
79
+ logging.error(f"Permission denied: Unable to delete geometry preprocessor file: {geom_preprocessor_file}.")
69
80
  raise PermissionError(f"Unable to delete geometry preprocessor file: {geom_preprocessor_file}. Permission denied.")
70
81
  except OSError as e:
82
+ logging.error(f"Error deleting geometry preprocessor file: {geom_preprocessor_file}. {str(e)}")
71
83
  raise OSError(f"Error deleting geometry preprocessor file: {geom_preprocessor_file}. {str(e)}")
72
84
  else:
73
- print(f"No geometry preprocessor file found for: {plan_file}")
85
+ logging.warning(f"No geometry preprocessor file found for: {plan_file}")
74
86
 
75
87
  if plan_files is None:
76
- print("Clearing all geometry preprocessor files in the project directory.")
88
+ logging.info("Clearing all geometry preprocessor files in the project directory.")
77
89
  plan_files_to_clear = list(ras_obj.project_folder.glob(r'*.p*'))
78
90
  elif isinstance(plan_files, (str, Path)):
79
91
  plan_files_to_clear = [plan_files]
92
+ logging.info(f"Clearing geometry preprocessor file for single plan: {plan_files}")
80
93
  elif isinstance(plan_files, list):
81
94
  plan_files_to_clear = plan_files
95
+ logging.info(f"Clearing geometry preprocessor files for multiple plans: {plan_files}")
82
96
  else:
97
+ logging.error("Invalid input type for plan_files.")
83
98
  raise ValueError("Invalid input. Please provide a string, Path, list of paths, or None.")
84
99
 
85
100
  for plan_file in plan_files_to_clear:
86
101
  clear_single_file(plan_file, ras_obj)
87
- ras_obj.geom_df = ras_obj.get_geom_entries()
88
-
102
+
103
+ # Update the geometry dataframe
104
+ try:
105
+ ras_obj.geom_df = ras_obj.get_geom_entries()
106
+ logging.info("Geometry dataframe updated successfully.")
107
+ except Exception as e:
108
+ logging.error(f"Failed to update geometry dataframe: {str(e)}")
109
+ raise
@@ -0,0 +1,248 @@
1
+
2
+
3
+ import h5py
4
+ import numpy as np
5
+ import pandas as pd
6
+ from typing import Union, List, Optional, Dict, Callable
7
+ from scipy.spatial import KDTree
8
+
9
+ class RasHdf:
10
+ """
11
+ A class containing utility functions for working with HDF files in the ras-commander library.
12
+ """
13
+
14
+ @staticmethod
15
+ def read_hdf_to_dataframe(hdf_dataset: h5py.Dataset, fill_value: Union[int, float, str] = -9999) -> pd.DataFrame:
16
+ """
17
+ Reads an HDF5 table using h5py and converts it into a pandas DataFrame, handling byte strings and missing values.
18
+
19
+ Args:
20
+ hdf_dataset (h5py.Dataset): The HDF5 table to read.
21
+ fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
22
+
23
+ Returns:
24
+ pd.DataFrame: The resulting DataFrame with byte strings decoded and missing values replaced.
25
+
26
+ Example:
27
+ >>> with h5py.File('data.h5', 'r') as f:
28
+ ... dataset = f['my_dataset']
29
+ ... df = RasHdf.read_hdf_to_dataframe(dataset)
30
+ >>> print(df.head())
31
+ """
32
+ df = RasHdf.convert_to_dataframe_array(hdf_dataset)
33
+ byte_cols = [col for col in df.columns if isinstance(df[col].iloc[0], (bytes, bytearray))]
34
+ test_byte_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], (bytes, bytearray))]
35
+ assert byte_cols == test_byte_cols, "Inconsistent byte string columns detected"
36
+
37
+ try:
38
+ df[byte_cols] = df[byte_cols].applymap(lambda x: x.decode('utf-8'))
39
+ except Exception as e:
40
+ print(f'WARNING: {e} while decoding byte strings in {hdf_dataset.name}, resuming')
41
+
42
+ df = df.replace({fill_value: np.NaN})
43
+ return df
44
+
45
+ @staticmethod
46
+ def save_dataframe_to_hdf(dataframe: pd.DataFrame,
47
+ hdf_parent_group: h5py.Group,
48
+ dataset_name: str,
49
+ attributes: Optional[Dict[str, Union[int, float, str]]] = None,
50
+ fill_value: Union[int, float, str] = -9999,
51
+ **kwargs: Union[int, float, str]) -> h5py.Dataset:
52
+ """
53
+ Saves a pandas DataFrame to an HDF5 dataset within a specified parent group.
54
+
55
+ This function addresses limitations of `pd.to_hdf()` by using h5py to create and save datasets.
56
+
57
+ Args:
58
+ dataframe (pd.DataFrame): The DataFrame to save.
59
+ hdf_parent_group (h5py.Group): The parent HDF5 group where the dataset will be created.
60
+ dataset_name (str): The name of the new dataset to add in the HDF5 parent group.
61
+ attributes (Optional[Dict[str, Union[int, float, str]]], optional): A dictionary of attributes to add to the dataset. Defaults to None.
62
+ fill_value (Union[int, float, str], optional): The value to use for filling missing data. Defaults to -9999.
63
+ **kwargs: Additional keyword arguments passed to `hdf_parent_group.create_dataset()`.
64
+
65
+ Returns:
66
+ h5py.Dataset: The created HDF5 dataset within the parent group.
67
+
68
+ Example:
69
+ >>> df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'c']})
70
+ >>> with h5py.File('data.h5', 'w') as f:
71
+ ... group = f.create_group('my_group')
72
+ ... dataset = RasHdf.save_dataframe_to_hdf(df, group, 'my_dataset')
73
+ >>> print(dataset)
74
+ """
75
+ df = dataframe.copy()
76
+
77
+ if df.columns.dtype == 'O':
78
+ df.columns = df.columns.str.replace('/', '-')
79
+
80
+ df = df.fillna(fill_value)
81
+
82
+ string_cols = [col for col in df.columns if isinstance(df[col].iloc[0], str)]
83
+ test_string_cols = [col for col in df.columns if isinstance(df[col].iloc[-1], str)]
84
+ assert string_cols == test_string_cols, "Inconsistent string columns detected"
85
+
86
+ df[string_cols] = df[string_cols].applymap(lambda x: x.encode('utf-8')).astype('bytes')
87
+
88
+ if isinstance(df.columns, pd.RangeIndex):
89
+ arr = df.values
90
+ else:
91
+ arr_dt = [(col, df[col].dtype) for col in df.columns]
92
+ arr = np.empty((len(df),), dtype=arr_dt)
93
+ for col in df.columns:
94
+ arr[col] = df[col].values
95
+
96
+ if dataset_name in hdf_parent_group:
97
+ del hdf_parent_group[dataset_name]
98
+
99
+ dataset = hdf_parent_group.create_dataset(dataset_name, data=arr, **kwargs)
100
+
101
+ if attributes:
102
+ dataset.attrs.update(attributes)
103
+
104
+ return dataset
105
+
106
+ @staticmethod
107
+ def perform_kdtree_query(reference_points: np.ndarray, query_points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
108
+ """
109
+ Performs a KDTree query between two datasets and returns indices with distances exceeding max_distance set to -1.
110
+
111
+ Args:
112
+ reference_points (np.ndarray): The reference dataset for KDTree.
113
+ query_points (np.ndarray): The query dataset to search against KDTree of reference_points.
114
+ max_distance (float, optional): The maximum distance threshold. Indices with distances greater than this are set to -1. Defaults to 2.0.
115
+
116
+ Returns:
117
+ np.ndarray: Array of indices from reference_points that are nearest to each point in query_points.
118
+ Indices with distances > max_distance are set to -1.
119
+
120
+ Example:
121
+ >>> ref_points = np.array([[0, 0], [1, 1], [2, 2]])
122
+ >>> query_points = np.array([[0.5, 0.5], [3, 3]])
123
+ >>> RasHdf.perform_kdtree_query(ref_points, query_points)
124
+ array([ 0, -1])
125
+ """
126
+ dist, snap = KDTree(reference_points).query(query_points, distance_upper_bound=max_distance)
127
+ snap[dist > max_distance] = -1
128
+ return snap
129
+
130
+ @staticmethod
131
+ def find_nearest_neighbors(points: np.ndarray, max_distance: float = 2.0) -> np.ndarray:
132
+ """
133
+ Creates a self KDTree for dataset points and finds nearest neighbors excluding self,
134
+ with distances above max_distance set to -1.
135
+
136
+ Args:
137
+ points (np.ndarray): The dataset to build the KDTree from and query against itself.
138
+ max_distance (float, optional): The maximum distance threshold. Indices with distances
139
+ greater than max_distance are set to -1. Defaults to 2.0.
140
+
141
+ Returns:
142
+ np.ndarray: Array of indices representing the nearest neighbor in points for each point in points.
143
+ Indices with distances > max_distance or self-matches are set to -1.
144
+
145
+ Example:
146
+ >>> points = np.array([[0, 0], [1, 1], [2, 2], [10, 10]])
147
+ >>> RasHdf.find_nearest_neighbors(points)
148
+ array([1, 0, 1, -1])
149
+ """
150
+ dist, snap = KDTree(points).query(points, k=2, distance_upper_bound=max_distance)
151
+ snap[dist > max_distance] = -1
152
+
153
+ snp = pd.DataFrame(snap, index=np.arange(len(snap)))
154
+ snp = snp.replace(-1, np.nan)
155
+ snp.loc[snp[0] == snp.index, 0] = np.nan
156
+ snp.loc[snp[1] == snp.index, 1] = np.nan
157
+ filled = snp[0].fillna(snp[1])
158
+ snapped = filled.fillna(-1).astype(np.int64).to_numpy()
159
+ return snapped
160
+
161
+ @staticmethod
162
+ def consolidate_dataframe(dataframe: pd.DataFrame,
163
+ group_by: Optional[Union[str, List[str]]] = None,
164
+ pivot_columns: Optional[Union[str, List[str]]] = None,
165
+ level: Optional[int] = None,
166
+ n_dimensional: bool = False,
167
+ aggregation_method: Union[str, Callable] = 'list') -> pd.DataFrame:
168
+ """
169
+ Consolidate rows in a DataFrame by merging duplicate values into lists or using a specified aggregation function.
170
+
171
+ Args:
172
+ dataframe (pd.DataFrame): The DataFrame to consolidate.
173
+ group_by (Optional[Union[str, List[str]]], optional): Columns or indices to group by. Defaults to None.
174
+ pivot_columns (Optional[Union[str, List[str]]], optional): Columns to pivot. Defaults to None.
175
+ level (Optional[int], optional): Level of multi-index to group by. Defaults to None.
176
+ n_dimensional (bool, optional): If True, use a pivot table for N-Dimensional consolidation. Defaults to False.
177
+ aggregation_method (Union[str, Callable], optional): Aggregation method, e.g., 'list' to aggregate into lists. Defaults to 'list'.
178
+
179
+ Returns:
180
+ pd.DataFrame: The consolidated DataFrame.
181
+
182
+ Example:
183
+ >>> df = pd.DataFrame({'A': [1, 1, 2], 'B': [4, 5, 6], 'C': [7, 8, 9]})
184
+ >>> RasHdf.consolidate_dataframe(df, group_by='A')
185
+ B C
186
+ A
187
+ 1 [4, 5] [7, 8]
188
+ 2 [6] [9]
189
+ """
190
+ if aggregation_method == 'list':
191
+ agg_func = lambda x: tuple(x)
192
+ else:
193
+ agg_func = aggregation_method
194
+
195
+ if n_dimensional:
196
+ result = dataframe.pivot_table(group_by, pivot_columns, aggfunc=agg_func)
197
+ else:
198
+ result = dataframe.groupby(group_by, level=level).agg(agg_func).applymap(list)
199
+
200
+ return result
201
+
202
+ @staticmethod
203
+ def decode_byte_strings(dataframe: pd.DataFrame) -> pd.DataFrame:
204
+ """
205
+ Decodes byte strings in a DataFrame to regular string objects.
206
+
207
+ This function converts columns with byte-encoded strings (e.g., b'string') into UTF-8 decoded strings.
208
+
209
+ Args:
210
+ dataframe (pd.DataFrame): The DataFrame containing byte-encoded string columns.
211
+
212
+ Returns:
213
+ pd.DataFrame: The DataFrame with byte strings decoded to regular strings.
214
+
215
+ Example:
216
+ >>> df = pd.DataFrame({'A': [b'hello', b'world'], 'B': [1, 2]})
217
+ >>> RasHdf.decode_byte_strings(df)
218
+ A B
219
+ 0 hello 1
220
+ 1 world 2
221
+ """
222
+ str_df = dataframe.select_dtypes(['object'])
223
+ str_df = str_df.stack().str.decode('utf-8').unstack()
224
+ for col in str_df:
225
+ dataframe[col] = str_df[col]
226
+ return dataframe
227
+
228
+ @staticmethod
229
+ def find_nearest_value(array: Union[list, np.ndarray], target_value: Union[int, float]) -> Union[int, float]:
230
+ """
231
+ Finds the nearest value in a NumPy array to the specified target value.
232
+
233
+ Args:
234
+ array (Union[list, np.ndarray]): The array to search within.
235
+ target_value (Union[int, float]): The value to find the nearest neighbor to.
236
+
237
+ Returns:
238
+ Union[int, float]: The nearest value in the array to the specified target value.
239
+
240
+ Example:
241
+ >>> arr = np.array([1, 3, 5, 7, 9])
242
+ >>> RasHdf.find_nearest_value(arr, 6)
243
+ 5
244
+ """
245
+ array = np.asarray(array)
246
+ idx = (np.abs(array - target_value)).argmin()
247
+ return array[idx]
248
+