ccfx 0.3.0__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ccfx/ccfx.py CHANGED
@@ -26,7 +26,9 @@ import geopandas, pandas
26
26
  from osgeo import gdal, ogr, osr
27
27
  import py7zr
28
28
  import subprocess
29
-
29
+ import multiprocessing
30
+ from mutagen.mp3 import MP3
31
+ from mutagen.id3 import ID3, TPE1, TALB, TIT2, TRCK, TDRC, TCON, APIC, COMM, USLT, TPE2, TCOM, TPE3, TPE4, TCOP, TENC, TSRC, TBPM
30
32
 
31
33
 
32
34
  # functions
@@ -56,8 +58,142 @@ def getExtension(filePath:str) -> str:
56
58
  '''
57
59
  Get the extension of a file
58
60
  filePath: file path
61
+
62
+ return: file extension without the dot
63
+ '''
64
+ return os.path.splitext(filePath)[1].lstrip('.')
65
+
66
+
67
+ def getMp3Metadata(fn, imagePath=None):
68
+ '''
69
+ This function takes a path to mp3 and returns a dictionary with
70
+ the following keys:
71
+ - artist, album, title, track number, year, genre
72
+ '''
73
+ metadata = {}
74
+
75
+ try:
76
+ audio = MP3(fn, ID3=ID3)
77
+
78
+ if 'TPE1' in audio.tags: metadata['artist'] = str(audio.tags['TPE1'])
79
+ else: metadata['artist'] = "Unknown Artist"
80
+
81
+ if 'TALB' in audio.tags: metadata['album'] = str(audio.tags['TALB'])
82
+ else: metadata['album'] = "Unknown Album"
83
+
84
+ if 'TIT2' in audio.tags: metadata['title'] = str(audio.tags['TIT2'])
85
+ else: metadata['title'] = os.path.basename(fn).replace('.mp3', '')
86
+
87
+ if 'TRCK' in audio.tags: metadata['track_number'] = str(audio.tags['TRCK'])
88
+ else: metadata['track_number'] = "0"
89
+
90
+ if 'TDRC' in audio.tags: metadata['year'] = str(audio.tags['TDRC'])
91
+ else: metadata['year'] = "Unknown Year"
92
+
93
+ if 'TCON' in audio.tags: metadata['genre'] = str(audio.tags['TCON'])
94
+ else: metadata['genre'] = "Unknown Genre"
95
+
96
+ if imagePath is not None:
97
+ foundImage = False
98
+ if audio.tags:
99
+ for tagKey in audio.tags.keys():
100
+ if tagKey.startswith("APIC:"):
101
+ with open(imagePath, 'wb') as img_file:
102
+ img_file.write(audio.tags[tagKey].data)
103
+ foundImage = True
104
+ break
105
+ if not foundImage:
106
+ print("No image found in metadata.")
107
+
108
+ except Exception as e:
109
+ print(f"Error extracting metadata from {fn}: {e}")
110
+ # Set default values if extraction fails
111
+ metadata = {
112
+ 'artist': "Unknown Artist",
113
+ 'album': "Unknown Album",
114
+ 'title': os.path.basename(fn).replace('.mp3', ''),
115
+ 'track_number': "0",
116
+ 'year': "Unknown Year",
117
+ 'genre': "Unknown Genre"
118
+ }
119
+
120
+ return metadata
121
+
122
+ def guessMimeType(imagePath):
123
+ ext = os.path.splitext(imagePath.lower())[1]
124
+ if ext in ['.jpg', '.jpeg']:
125
+ return 'image/jpeg'
126
+ elif ext == '.png':
127
+ return 'image/png'
128
+ return 'image/png'
129
+
130
+ def setMp3Metadata(fn, metadata, imagePath=None):
59
131
  '''
60
- return os.path.splitext(filePath)[1]
132
+ This function takes a path to an mp3 and a metadata dictionary,
133
+ then writes that metadata to the file's ID3 tags.
134
+
135
+ The metadata dictionary should have these keys:
136
+ - artist, album, title, track_number, year, genre
137
+
138
+ Additionally, an optional imagePath parameter can be provided to
139
+ attach album artwork from a PNG or JPEG file.
140
+
141
+ Alternatively, you can include an 'imagePath' key in the metadata
142
+ dictionary instead of using the separate parameter.
143
+ '''
144
+ try:
145
+ # Try to load existing ID3 tags or create new ones if they don't exist
146
+ try:
147
+ audio = ID3(fn)
148
+ except:
149
+ audio = ID3()
150
+
151
+ # Set artist
152
+ if 'artist' in metadata and metadata['artist']: audio['TPE1'] = TPE1(encoding=3, text=metadata['artist'])
153
+ if 'album' in metadata and metadata['album']: audio['TALB'] = TALB(encoding=3, text=metadata['album'])
154
+ if 'title' in metadata and metadata['title']: audio['TIT2'] = TIT2(encoding=3, text=metadata['title'])
155
+ if 'track_number' in metadata and metadata['track_number']: audio['TRCK'] = TRCK(encoding=3, text=metadata['track_number'])
156
+ if 'year' in metadata and metadata['year']: audio['TDRC'] = TDRC(encoding=3, text=metadata['year'])
157
+ if 'genre' in metadata and metadata['genre']: audio['TCON'] = TCON(encoding=3, text=metadata['genre'])
158
+ if 'comment' in metadata and metadata['comment']: audio['COMM'] = COMM(encoding=3, text=metadata['comment'])
159
+ if 'lyrics' in metadata and metadata['lyrics']: audio['USLT'] = USLT(encoding=3, text=metadata['lyrics'])
160
+ if 'publisher' in metadata and metadata['publisher']: audio['TPUB'] = TPE2(encoding=3, text=metadata['publisher'])
161
+ if 'composer' in metadata and metadata['composer']: audio['TCOM'] = TCOM(encoding=3, text=metadata['composer'])
162
+ if 'conductor' in metadata and metadata['conductor']: audio['TPE3'] = TPE3(encoding=3, text=metadata['conductor'])
163
+ if 'performer' in metadata and metadata['performer']: audio['TPE4'] = TPE4(encoding=3, text=metadata['performer'])
164
+ if 'copyright' in metadata and metadata['copyright']: audio['TCOP'] = TCOP(encoding=3, text=metadata['copyright'])
165
+ if 'encoded_by' in metadata and metadata['encoded_by']: audio['TENC'] = TENC(encoding=3, text=metadata['encoded_by'])
166
+ if 'encoder' in metadata and metadata['encoder']: audio['TENC'] = TENC(encoding=3, text=metadata['encoder'])
167
+ if 'isrc' in metadata and metadata['isrc']: audio['TSRC'] = TSRC(encoding=3, text=metadata['isrc'])
168
+ if 'bpm' in metadata and metadata['bpm']: audio['TBPM'] = TBPM(encoding=3, text=metadata['bpm'])
169
+ # Check if image path is in metadata dictionary and not provided as parameter
170
+ if imagePath is None and 'imagePath' in metadata:
171
+ imagePath = metadata['imagePath']
172
+
173
+ # Attach image if provided
174
+ if imagePath and os.path.exists(imagePath):
175
+ with open(imagePath, 'rb') as img_file:
176
+ img_data = img_file.read()
177
+
178
+ # Determine image MIME type
179
+ mime = guessMimeType(imagePath)
180
+
181
+ # Create APIC frame for album artwork
182
+ audio['APIC'] = APIC(
183
+ encoding=3, # UTF-8 encoding
184
+ mime=mime, # MIME type of the image
185
+ type=3, # 3 means 'Cover (front)'
186
+ desc='Cover', # Description
187
+ data=img_data # The image data
188
+ )
189
+
190
+ # Save changes to the file
191
+ audio.save(fn)
192
+ return True
193
+
194
+ except Exception as e:
195
+ print(f"Error writing metadata to {fn}: {e}")
196
+ return False
61
197
 
62
198
 
63
199
  def deleteFile(filePath:str, v:bool = False) -> bool:
@@ -86,6 +222,47 @@ def deleteFile(filePath:str, v:bool = False) -> bool:
86
222
 
87
223
  return deleted
88
224
 
225
+ def deletePath(path:str, v:bool = False) -> bool:
226
+ '''
227
+ Delete a directory
228
+
229
+ path: directory
230
+ v: verbose (default is True)
231
+
232
+ return: True if the directory is deleted, False otherwise
233
+ '''
234
+ deleted = False
235
+ if os.path.exists(path):
236
+ try:
237
+ shutil.rmtree(path)
238
+ deleted = True
239
+ except:
240
+ print(f'! Could not delete {path}')
241
+ deleted = False
242
+ if v:
243
+ print(f'> {path} deleted')
244
+ else:
245
+ if v:
246
+ print(f'! {path} does not exist')
247
+ deleted = False
248
+
249
+
250
+ def mergeRasterTiles(tileList:list, outFile:str) -> str:
251
+ '''
252
+ Merge raster tiles into one raster file
253
+ tileList: list of raster files
254
+ outFile: output raster file
255
+ '''
256
+ gdal.Warp(outFile, tileList)
257
+ return outFile
258
+
259
+ def mergeRasterFiles(tileList:list, outFile:str) -> str:
260
+ '''
261
+ this function is an alias for mergeRasterTiles
262
+ '''
263
+ return mergeRasterTiles(tileList, outFile)
264
+
265
+
89
266
  def systemPlatform() -> str:
90
267
  '''
91
268
  Get the system platform
@@ -112,6 +289,55 @@ def fileCount(path:str = "./", extension:str = ".*", v:bool = True) -> int:
112
289
  print(f'> there are {count} {extension if not extension ==".*" else ""} files in {path}')
113
290
  return count
114
291
 
292
+ def resampleRaster(inFile:str, outFile:str, resolution:float, dstSRS = None, resamplingMethod = 'bilinear', replaceOutput:bool = True, v:bool = True) -> str:
293
+ '''
294
+ Resample a raster file
295
+ inFile: input raster file
296
+ outFile: output raster file
297
+ resolution: resolution in the same units as the input raster
298
+ v: verbose (default is True)
299
+ available resample types:
300
+ 'nearest', 'bilinear', 'cubic', 'cubicspline', 'lanczos', 'average', 'mode', 'max', 'min', 'med', 'q1', 'q3'
301
+
302
+ return: output raster file path
303
+ '''
304
+
305
+ resamleTypes = {
306
+ 'nearest': gdal.GRA_NearestNeighbour,
307
+ 'bilinear': gdal.GRA_Bilinear,
308
+ 'cubic': gdal.GRA_Cubic,
309
+ 'cubicspline': gdal.GRA_CubicSpline,
310
+ 'lanczos': gdal.GRA_Lanczos,
311
+ 'average': gdal.GRA_Average,
312
+ 'mode': gdal.GRA_Mode,
313
+ 'max': gdal.GRA_Max,
314
+ 'min': gdal.GRA_Min,
315
+ 'med': gdal.GRA_Med,
316
+ 'q1': gdal.GRA_Q1,
317
+ 'q3': gdal.GRA_Q3
318
+ }
319
+
320
+ if not os.path.exists(inFile):
321
+ print(f'! {inFile} does not exist')
322
+ return None
323
+
324
+ if os.path.exists(outFile):
325
+ if replaceOutput:
326
+ os.remove(outFile)
327
+ else:
328
+ print(f'! {outFile} already exists')
329
+ return None
330
+
331
+ if v:
332
+ print(f'> resampling {inFile} to {outFile} at {resolution}')
333
+
334
+ ds = gdal.Open(inFile)
335
+ if dstSRS is None: gdal.Warp(outFile, ds, xRes=resolution, yRes=resolution, resampleAlg=resamleTypes[resamplingMethod])
336
+ else: gdal.Warp(outFile, ds, xRes=resolution, yRes=resolution, resampleAlg=resamleTypes[resamplingMethod], dstSRS=dstSRS)
337
+
338
+ ds = None
339
+ return outFile
340
+
115
341
  def watchFileCount(path:str="./", extension:str = ".*", interval:float = 0.2, duration = 3, v:bool = True) -> None:
116
342
  '''
117
343
  Watch the number of files in a directory with a specific extension
@@ -346,8 +572,11 @@ def moveDirectoryFiles(srcDir: str, destDir: str, v: bool = False) -> bool:
346
572
  src_file = os.path.join(root, file)
347
573
  dest_file = os.path.join(dest_root, file)
348
574
  if v:
349
- print(f"\t> Moving file \n\t - {src_file}\n\t - to {dest_file}")
350
- shutil.move(src_file, dest_file)
575
+ print(f"\t> Moving file \n\t - {src_file}\n\t to {dest_file}")
576
+ try:
577
+ shutil.move(src_file, dest_file)
578
+ except Exception as e:
579
+ print(f"! Error moving file: {e}")
351
580
 
352
581
  return True
353
582
 
@@ -639,7 +868,7 @@ def netcdfExportTif(ncFile: str, variable: str, outputFile: str = None, band: in
639
868
  return dataset
640
869
 
641
870
 
642
- def getFileBaseName(filePath:str, extension:bool = False) -> str:
871
+ def getFileBaseName(filePath:str, extension:bool = True) -> str:
643
872
  '''
644
873
  Get the base name of a file
645
874
  filePath: file path
@@ -710,12 +939,64 @@ def copyFile(source:str, destination:str, v:bool = True) -> None:
710
939
  source: source file
711
940
  destination: destination file
712
941
  '''
942
+ if not exists(os.path.dirname(destination)): createPath(f"{os.path.dirname(destination)}/")
713
943
  with open(source, 'rb') as src:
714
944
  with open(destination, 'wb') as dest: dest.write(src.read())
715
945
 
716
946
  if v: print(f'> {source} copied to \t - {destination}')
717
947
 
718
948
 
949
+ def copyDirectory(source:str, destination:str, recursive = True, v:bool = True, filter = []) -> None:
950
+ '''
951
+ Copy a directory from source to destination
952
+ source: source directory
953
+ destination: destination directory
954
+ recursive: copy subdirectories (default is True)
955
+ v: verbose (default is True)
956
+ filter: list of file extensions to filter out
957
+ '''
958
+ if not exists(destination): os.makedirs(destination)
959
+
960
+ itemCount = None
961
+ counter = 1
962
+
963
+ if recursive:
964
+ if len(filter) > 0:
965
+ itemCount = len([fn for fn in listAllFiles(source) if not getExtension(fn) in filter])
966
+ else:
967
+ itemCount = len(listAllFiles(source))
968
+ else:
969
+ if len(filter) > 0:
970
+ itemCount = len([fn for fn in listFiles(source) if not getExtension(fn) in filter])
971
+ else:
972
+ itemCount = len(listFiles(source))
973
+
974
+
975
+ for item in os.listdir(source):
976
+ s = os.path.join(source, item)
977
+ d = os.path.join(destination, item)
978
+ if os.path.isdir(s):
979
+ if recursive: copyDirectory(s, d, recursive, v, filter)
980
+ else:
981
+ if len(filter) > 0:
982
+ if not getExtension(s) in filter:
983
+ copyFile(s, d, v = False)
984
+ counter += 1
985
+ if v: showProgress(counter, itemCount, f'copying {getFileBaseName(item)}\t\t', barLength=50)
986
+ else:
987
+ copyFile(s, d, v = False)
988
+ if v: showProgress(counter, itemCount, f'copying {getFileBaseName(item)}\t\t', barLength=50)
989
+ counter += 1
990
+ print()
991
+
992
+
993
+ def copyFolder(source:str, destination:str, v:bool = True) -> None:
994
+ '''
995
+ this function is an alias for copyDirectory
996
+ '''
997
+ copyDirectory(source, destination, v)
998
+
999
+
719
1000
  def convertCoordinates(lon, lat, srcEPSG, dstCRS) -> tuple:
720
1001
  """
721
1002
  this function converts coordinates from one CRS to another
@@ -806,7 +1087,9 @@ def showProgress(count: int, end: int, message: str, barLength: int = 100) -> No
806
1087
  filled = int(barLength * count / end)
807
1088
  bar = '█' * filled + '░' * (barLength - filled)
808
1089
  print(f'\r{bar}| {percentStr}% [{count}/{end}] | {message} ', end='', flush=True)
809
- if count == end: print()
1090
+ if count == end:
1091
+ print(f'\r{bar}| {percentStr}% [{count}/{end}] ', end='', flush=True)
1092
+ print()
810
1093
 
811
1094
 
812
1095
  def listAllFiles(folder, extension="*"):
@@ -854,4 +1137,338 @@ def createPointGeometry(coords: list, proj: str = "EPSG:4326") -> geopandas.GeoD
854
1137
  gdf.reset_index(inplace=True)
855
1138
  return gdf
856
1139
 
1140
+ def calculateTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
1141
+ '''
1142
+ Calculate statistics for a timeseries
1143
+
1144
+ the assumed dataframe structure is:
1145
+ date - DateTime
1146
+ observed - float
1147
+ simulated - float
1148
+
1149
+ Parameters:
1150
+ data: pandas.DataFrame
1151
+ DataFrame containing the timeseries data
1152
+ observed: str
1153
+ name of the observed column
1154
+ simulated: str
1155
+ name of the simulated column
1156
+ resample: str
1157
+ if specified, the data will be resampled to the specified frequency
1158
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1159
+
1160
+ Returns:
1161
+ dict: Dictionary containing the following statistics:
1162
+ NSE: Nash-Sutcliffe Efficiency
1163
+ KGE: Kling-Gupta Efficiency
1164
+ PBIAS: Percent Bias
1165
+ LNSE: Log Nash-Sutcliffe Efficiency
1166
+ R2: R-squared
1167
+ RMSE: Root Mean Square Error
1168
+ MAE: Mean Absolute Error
1169
+ MSE: Mean Square Error
1170
+ MAPE: Mean Absolute Percentage Error
1171
+ alpha: Ratio of standard deviations
1172
+ beta: Ratio of means
1173
+ '''
1174
+
1175
+ options = {'H': '1H', 'D': '1D', 'M': '1M', 'Y': '1Y'}
1176
+
1177
+ if resample:
1178
+ if resample not in options:
1179
+ raise ValueError(f"Invalid resample option. Choose from {list(options.keys())}")
1180
+ if not 'date' in data.columns:
1181
+ for col in data.columns:
1182
+ if data[col].dtype == 'datetime64[ns]':
1183
+ data = data.set_index(col).resample(options[resample]).mean()
1184
+ break
1185
+ else:
1186
+ raise ValueError("No datetime column found for resampling.")
1187
+ else:
1188
+ data = data.set_index('date').resample(options[resample]).mean()
1189
+
1190
+ # Auto-detect columns if not specified
1191
+ if not observed and not simulated:
1192
+ datetime_cols = [col for col in data.columns if data[col].dtype == 'datetime64[ns]']
1193
+ if datetime_cols:
1194
+ data = data.drop(datetime_cols, axis=1)
1195
+
1196
+ if len(data.columns) == 2:
1197
+ observed = data.columns[0]
1198
+ simulated = data.columns[1]
1199
+ else:
1200
+ raise ValueError("Could not automatically detect observed and simulated columns")
1201
+ elif not observed or not simulated:
1202
+ raise ValueError("Both observed and simulated columns must be specified if one is specified")
1203
+
1204
+ # Extract data
1205
+ obs = data[observed].values
1206
+ sim = data[simulated].values
1207
+
1208
+ # Remove any rows where either observed or simulated is NaN
1209
+ mask = ~(numpy.isnan(obs) | numpy.isnan(sim))
1210
+ obs = obs[mask]
1211
+ sim = sim[mask]
1212
+
1213
+ if len(obs) == 0:
1214
+ raise ValueError("No valid data points after removing NaN values")
1215
+
1216
+ # Calculate statistics with error handling
1217
+ try:
1218
+ # Nash-Sutcliffe Efficiency (NSE)
1219
+ denominator = numpy.sum((obs - numpy.mean(obs)) ** 2)
1220
+ nse = 1 - numpy.sum((obs - sim) ** 2) / denominator if denominator != 0 else numpy.nan
1221
+
1222
+ # Kling-Gupta Efficiency (KGE) components
1223
+ r = numpy.corrcoef(obs, sim)[0, 1]
1224
+ obs_std = numpy.std(obs)
1225
+ sim_std = numpy.std(sim)
1226
+ obs_mean = numpy.mean(obs)
1227
+ sim_mean = numpy.mean(sim)
1228
+
1229
+ alpha = sim_std / obs_std if obs_std != 0 else numpy.nan
1230
+ beta = sim_mean / obs_mean if obs_mean != 0 else numpy.nan
1231
+
1232
+ # KGE calculation
1233
+ if not any(numpy.isnan([r, alpha, beta])):
1234
+ kge = 1 - numpy.sqrt((r - 1) ** 2 + (alpha - 1) ** 2 + (beta - 1) ** 2)
1235
+ else:
1236
+ kge = numpy.nan
1237
+
1238
+ # Percent Bias (PBIAS)
1239
+ pbias = 100 * numpy.sum(sim - obs) / numpy.sum(obs) if numpy.sum(obs) != 0 else numpy.nan
1240
+
1241
+ # Log Nash-Sutcliffe Efficiency (LNSE)
1242
+ eps = 0.0001
1243
+ log_obs = numpy.log(obs + eps)
1244
+ log_sim = numpy.log(sim + eps)
1245
+ log_denominator = numpy.sum((log_obs - numpy.mean(log_obs)) ** 2)
1246
+ lnse = 1 - numpy.sum((log_obs - log_sim) ** 2) / log_denominator if log_denominator != 0 else numpy.nan
1247
+
1248
+ # R-squared (R2)
1249
+ r2 = r ** 2 if not numpy.isnan(r) else numpy.nan
1250
+
1251
+ # Root Mean Square Error (RMSE)
1252
+ rmse = numpy.sqrt(numpy.mean((obs - sim) ** 2))
1253
+
1254
+ # Mean Absolute Error (MAE)
1255
+ mae = numpy.mean(numpy.abs(obs - sim))
1256
+
1257
+ # Mean Square Error (MSE)
1258
+ mse = numpy.mean((obs - sim) ** 2)
1259
+
1260
+ # Mean Absolute Percentage Error (MAPE)
1261
+ with numpy.errstate(divide='ignore', invalid='ignore'):
1262
+ mape = numpy.mean(numpy.abs((obs - sim) / obs) * 100)
1263
+ mape = numpy.nan if numpy.isinf(mape) else mape
1264
+
1265
+ except Exception as e:
1266
+ print(f"Warning: Error in statistical calculations: {str(e)}")
1267
+ return {stat: numpy.nan for stat in ['NSE', 'KGE', 'PBIAS', 'LNSE', 'R2', 'RMSE', 'MAE', 'MSE', 'MAPE', 'alpha', 'beta']}
1268
+
1269
+ return {
1270
+ 'NSE': nse,
1271
+ 'KGE': kge,
1272
+ 'PBIAS': pbias,
1273
+ 'LNSE': lnse,
1274
+ 'R2': r2,
1275
+ 'RMSE': rmse,
1276
+ 'MAE': mae,
1277
+ 'MSE': mse,
1278
+ 'MAPE': mape,
1279
+ 'alpha': alpha,
1280
+ 'beta': beta
1281
+ }
1282
+
1283
+
1284
+ def getNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1285
+ '''
1286
+ this function is a wrapper for calculateTimeseriesStats specifically to return the NSE
1287
+
1288
+ data: pandas.DataFrame
1289
+ DataFrame containing the timeseries data
1290
+ observed: str
1291
+ name of the observed column
1292
+ simulated: str
1293
+ name of the simulated column
1294
+ resample: str
1295
+ if specified, the data will be resampled to the specified frequency
1296
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1297
+
1298
+ return: float
1299
+ NSE value
1300
+ '''
1301
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1302
+
1303
+ return stats['NSE']
1304
+
1305
+ def getKGE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1306
+ '''
1307
+ this function is a wrapper for calculateTimeseriesStats specifically to return the KGE
1308
+
1309
+ data: pandas.DataFrame
1310
+ DataFrame containing the timeseries data
1311
+ observed: str
1312
+ name of the observed column
1313
+ simulated: str
1314
+ name of the simulated column
1315
+ resample: str
1316
+ if specified, the data will be resampled to the specified frequency
1317
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1318
+
1319
+ return: float
1320
+ KGE value
1321
+ '''
1322
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1323
+
1324
+ return stats['KGE']
1325
+
1326
+ def getPBIAS(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1327
+ '''
1328
+ this function is a wrapper for calculateTimeseriesStats specifically to return the PBIAS
1329
+
1330
+ data: pandas.DataFrame
1331
+ DataFrame containing the timeseries data
1332
+ observed: str
1333
+ name of the observed column
1334
+ simulated: str
1335
+ name of the simulated column
1336
+ resample: str
1337
+ if specified, the data will be resampled to the specified frequency
1338
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1339
+
1340
+ return: float
1341
+ PBIAS value
1342
+ '''
1343
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1344
+
1345
+ return stats['PBIAS']
1346
+
1347
+
1348
+ def getLNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1349
+ '''
1350
+ this function is a wrapper for calculateTimeseriesStats specifically to return the LNSE
1351
+
1352
+ data: pandas.DataFrame
1353
+ DataFrame containing the timeseries data
1354
+ observed: str
1355
+ name of the observed column
1356
+ simulated: str
1357
+ name of the simulated column
1358
+ resample: str
1359
+ if specified, the data will be resampled to the specified frequency
1360
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1361
+
1362
+ return: float
1363
+ LNSE value
1364
+ '''
1365
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1366
+
1367
+ return stats['LNSE']
1368
+
1369
+ def getR2(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1370
+ '''
1371
+ this function is a wrapper for calculateTimeseriesStats specifically to return the R2
1372
+
1373
+ data: pandas.DataFrame
1374
+ DataFrame containing the timeseries data
1375
+ observed: str
1376
+ name of the observed column
1377
+ simulated: str
1378
+ name of the simulated column
1379
+ resample: str
1380
+ if specified, the data will be resampled to the specified frequency
1381
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1382
+
1383
+ return: float
1384
+ R2 value
1385
+ '''
1386
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1387
+
1388
+ return stats['R2']
1389
+
1390
+ def getRMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1391
+ '''
1392
+ this function is a wrapper for calculateTimeseriesStats specifically to return the RMSE
1393
+
1394
+ data: pandas.DataFrame
1395
+ DataFrame containing the timeseries data
1396
+ observed: str
1397
+ name of the observed column
1398
+ simulated: str
1399
+ name of the simulated column
1400
+ resample: str
1401
+ if specified, the data will be resampled to the specified frequency
1402
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1403
+
1404
+ return: float
1405
+ RMSE value
1406
+ '''
1407
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1408
+
1409
+ return stats['RMSE']
1410
+
1411
+ def getMAE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1412
+ '''
1413
+ this function is a wrapper for calculateTimeseriesStats specifically to return the MAE
1414
+
1415
+ data: pandas.DataFrame
1416
+ DataFrame containing the timeseries data
1417
+ observed: str
1418
+ name of the observed column
1419
+ simulated: str
1420
+ name of the simulated column
1421
+ resample: str
1422
+ if specified, the data will be resampled to the specified frequency
1423
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1424
+
1425
+ return: float
1426
+ MAE value
1427
+ '''
1428
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1429
+
1430
+ return stats['MAE']
1431
+
1432
+ def getMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1433
+ '''
1434
+ this function is a wrapper for calculateTimeseriesStats specifically to return the MSE
1435
+
1436
+ data: pandas.DataFrame
1437
+ DataFrame containing the timeseries data
1438
+ observed: str
1439
+ name of the observed column
1440
+ simulated: str
1441
+ name of the simulated column
1442
+ resample: str
1443
+ if specified, the data will be resampled to the specified frequency
1444
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1445
+
1446
+ return: float
1447
+ MSE value
1448
+ '''
1449
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1450
+
1451
+ return stats['MSE']
1452
+
1453
+ def getTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
1454
+ '''
1455
+ this function is a wrapper for calculateTimeseriesStats specifically to return all stats
1456
+
1457
+ data: pandas.DataFrame
1458
+ DataFrame containing the timeseries data
1459
+ observed: str
1460
+ name of the observed column
1461
+ simulated: str
1462
+ name of the simulated column
1463
+ resample: str
1464
+ if specified, the data will be resampled to the specified frequency
1465
+ available options: 'H' (hourly), 'D' (daily), 'M' (monthly), 'Y' (yearly)
1466
+
1467
+ return: dict
1468
+ dictionary containing all stats
1469
+ '''
1470
+ stats = calculateTimeseriesStats(data, observed, simulated, resample)
1471
+
1472
+ return stats
1473
+
857
1474
  ignoreWarnings()
@@ -1,146 +1,148 @@
1
- Metadata-Version: 2.1
2
- Name: ccfx
3
- Version: 0.3.0
4
- Summary: Your package description here
5
- Author-email: Celray James CHAWANDA <celray@chawanda.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/celray/ccfx
8
- Classifier: Programming Language :: Python :: 3
9
- Classifier: License :: OSI Approved :: MIT License
10
- Classifier: Operating System :: OS Independent
11
- Requires-Python: >=3.9
12
- Description-Content-Type: text/markdown
13
- License-File: LICENSE
14
- Requires-Dist: netCDF4
15
- Requires-Dist: gdal
16
- Requires-Dist: numpy
17
- Requires-Dist: shapely
18
- Requires-Dist: geopandas
19
- Requires-Dist: pandas
20
- Requires-Dist: xlsxwriter
21
- Requires-Dist: pyodbc
22
- Requires-Dist: sqlalchemy
23
- Requires-Dist: python-docx
24
- Requires-Dist: py7zr
25
-
26
- # ccfx
27
-
28
- `ccfx` is a comprehensive Python package designed to streamline file and data management, geospatial analysis, and NetCDF file processing for quick prototyping. The library provides versatile tools for file handling, raster and vector manipulation, database connectivity, and data export for geospatial and scientific computing projects.
29
-
30
- ## Features
31
-
32
- 1. **File Management**:
33
- - List, delete, move, and count files within directories.
34
- - Monitor file count over time in a specific directory.
35
- - Save, load, and manage Python variables via pickle serialization.
36
-
37
- 2. **Geospatial Data Processing**:
38
- - Read, write, and manage raster and vector geospatial data.
39
- - Clip rasters by bounding boxes and extract raster values at specified coordinates.
40
- - Create grids of polygons based on shapefile boundaries with user-defined resolutions.
41
- - Convert coordinates between coordinate reference systems (CRS).
42
- - Write NumPy arrays to GeoTIFF files with projection and geotransform settings.
43
-
44
- 3. **NetCDF File Handling**:
45
- - List available variables and dimensions in NetCDF files.
46
- - Export NetCDF variables to GeoTIFF format.
47
- - Calculate sum and average maps from NetCDF data across multiple files.
48
-
49
- 4. **Database Connectivity**:
50
- - Access and interact with databases using ODBC and SQLAlchemy for flexible database management.
51
- - Connect to both SQL Server and SQLite databases.
52
-
53
- 5. **Progress Tracking and System Info**:
54
- - Display dynamic progress bars for long-running operations.
55
- - Check the system’s platform information.
56
- - Enable or disable warnings programmatically.
57
-
58
- 6. **Excel and Word File Handling**:
59
- - Create and modify Excel files using xlsxwriter.
60
- - Generate Word documents with advanced formatting options using python-docx.
61
-
62
- ## Installation
63
-
64
- Install `ccfx` via pip:
65
- ```bash
66
- pip install ccfx
67
- ```
68
-
69
- ## Dependencies
70
-
71
- `ccfx` relies on the following libraries:
72
-
73
- - **netCDF4**: For working with NetCDF files.
74
- - **gdal**: Required for geospatial raster data manipulation.
75
- - **numpy**: For array processing and numerical operations.
76
- - **pandas**: For data manipulation and analysis.
77
- - **shapely**: Provides geometric operations for spatial data.
78
- - **geopandas**: Extends pandas to handle geospatial data.
79
- - **xlsxwriter**: For creating and writing Excel files.
80
- - **pyodbc**: Enables connectivity to databases through ODBC.
81
- - **sqlalchemy**: Provides SQL toolkit and ORM features for database access.
82
- - **python-docx**: Enables creation and manipulation of Word documents.
83
-
84
- These dependencies will be installed automatically when `ccfx` is installed.
85
-
86
- ## API Reference
87
-
88
- ### `listFiles(path: str, ext: str = None) -> list`
89
- Lists all files in a directory with a specified extension.
90
-
91
- - **Parameters**:
92
- - `path` (str): The directory to search.
93
- - `ext` (str, optional): File extension to filter by, e.g., `'txt'`, `'.txt'`, `'*txt'`, or `'*.txt'`.
94
-
95
- - **Returns**:
96
- - `list`: A list of file paths matching the criteria.
97
-
98
- ### `deleteFile(filePath: str, v: bool = False) -> bool`
99
- Deletes a specified file with optional verbose output.
100
-
101
- - **Parameters**:
102
- - `filePath` (str): Path to the file to be deleted.
103
- - `v` (bool, optional): If `True`, prints a confirmation message. Defaults to `False`.
104
-
105
- - **Returns**:
106
- - `bool`: `True` if deletion was successful; `False` otherwise.
107
-
108
- ### `createGrid(shapefile_path: str, resolution: float, useDegree: bool = True) -> tuple`
109
- Generates a grid of polygons from a shapefile at a given resolution.
110
-
111
- - **Parameters**:
112
- - `shapefile_path` (str): Path to the shapefile.
113
- - `resolution` (float): Resolution of the grid.
114
- - `useDegree` (bool, optional): If `True`, coordinates are in degrees. Defaults to `True`.
115
-
116
- - **Returns**:
117
- - `tuple`: Contains grid coordinates and metadata.
118
-
119
- ### `clipRasterByExtent(inFile: str, outFile: str, bounds: tuple) -> str`
120
- Clips a raster to specified bounding box coordinates.
121
-
122
- - **Parameters**:
123
- - `inFile` (str): Path to the input raster file.
124
- - `outFile` (str): Path to the output clipped raster file.
125
- - `bounds` (tuple): Bounding box as `(minx, miny, maxx, maxy)`.
126
-
127
- - **Returns**:
128
- - `str`: Path to the clipped raster file.
129
-
130
- ### `netcdfVariablesList(ncFile: str) -> list`
131
- Lists all variables in a NetCDF file.
132
-
133
- - **Parameters**:
134
- - `ncFile` (str): Path to the NetCDF file.
135
-
136
- - **Returns**:
137
- - `list`: A list of variable names in the file.
138
-
139
- ### ... And More ...
140
-
141
- ## Contributing
142
-
143
- Contributions are welcome! Please fork the repository, make your changes, and submit a pull request.
144
-
145
- ## License
146
- This project is licensed under the MIT License.
1
+ Metadata-Version: 2.4
2
+ Name: ccfx
3
+ Version: 0.5.0
4
+ Summary: This package simplifies regular common actions for quick prototyping in a user friendly way
5
+ Author-email: Celray James CHAWANDA <celray@chawanda.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/celray/ccfx
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: netCDF4
15
+ Requires-Dist: gdal
16
+ Requires-Dist: numpy
17
+ Requires-Dist: shapely
18
+ Requires-Dist: geopandas
19
+ Requires-Dist: pandas
20
+ Requires-Dist: xlsxwriter
21
+ Requires-Dist: pyodbc
22
+ Requires-Dist: sqlalchemy
23
+ Requires-Dist: python-docx
24
+ Requires-Dist: py7zr
25
+ Requires-Dist: mutagen
26
+ Dynamic: license-file
27
+
28
+ # ccfx
29
+
30
+ `ccfx` is a comprehensive Python package designed to streamline file and data management, geospatial analysis, and NetCDF file processing for quick prototyping. The library provides versatile tools for file handling, raster and vector manipulation, database connectivity, and data export for geospatial and scientific computing projects.
31
+
32
+ ## Features
33
+
34
+ 1. **File Management**:
35
+ - List, delete, move, and count files within directories.
36
+ - Monitor file count over time in a specific directory.
37
+ - Save, load, and manage Python variables via pickle serialization.
38
+
39
+ 2. **Geospatial Data Processing**:
40
+ - Read, write, and manage raster and vector geospatial data.
41
+ - Clip rasters by bounding boxes and extract raster values at specified coordinates.
42
+ - Create grids of polygons based on shapefile boundaries with user-defined resolutions.
43
+ - Convert coordinates between coordinate reference systems (CRS).
44
+ - Write NumPy arrays to GeoTIFF files with projection and geotransform settings.
45
+
46
+ 3. **NetCDF File Handling**:
47
+ - List available variables and dimensions in NetCDF files.
48
+ - Export NetCDF variables to GeoTIFF format.
49
+ - Calculate sum and average maps from NetCDF data across multiple files.
50
+
51
+ 4. **Database Connectivity**:
52
+ - Access and interact with databases using ODBC and SQLAlchemy for flexible database management.
53
+ - Connect to both SQL Server and SQLite databases.
54
+
55
+ 5. **Progress Tracking and System Info**:
56
+ - Display dynamic progress bars for long-running operations.
57
+ - Check the system’s platform information.
58
+ - Enable or disable warnings programmatically.
59
+
60
+ 6. **Excel and Word File Handling**:
61
+ - Create and modify Excel files using xlsxwriter.
62
+ - Generate Word documents with advanced formatting options using python-docx.
63
+
64
+ ## Installation
65
+
66
+ Install `ccfx` via pip:
67
+ ```bash
68
+ pip install ccfx
69
+ ```
70
+
71
+ ## Dependencies
72
+
73
+ `ccfx` relies on the following libraries:
74
+
75
+ - **netCDF4**: For working with NetCDF files.
76
+ - **gdal**: Required for geospatial raster data manipulation.
77
+ - **numpy**: For array processing and numerical operations.
78
+ - **pandas**: For data manipulation and analysis.
79
+ - **shapely**: Provides geometric operations for spatial data.
80
+ - **geopandas**: Extends pandas to handle geospatial data.
81
+ - **xlsxwriter**: For creating and writing Excel files.
82
+ - **pyodbc**: Enables connectivity to databases through ODBC.
83
+ - **sqlalchemy**: Provides SQL toolkit and ORM features for database access.
84
+ - **python-docx**: Enables creation and manipulation of Word documents.
85
+
86
+ These dependencies will be installed automatically when `ccfx` is installed.
87
+
88
+ ## API Reference
89
+
90
+ ### `listFiles(path: str, ext: str = None) -> list`
91
+ Lists all files in a directory with a specified extension.
92
+
93
+ - **Parameters**:
94
+ - `path` (str): The directory to search.
95
+ - `ext` (str, optional): File extension to filter by, e.g., `'txt'`, `'.txt'`, `'*txt'`, or `'*.txt'`.
96
+
97
+ - **Returns**:
98
+ - `list`: A list of file paths matching the criteria.
99
+
100
+ ### `deleteFile(filePath: str, v: bool = False) -> bool`
101
+ Deletes a specified file with optional verbose output.
102
+
103
+ - **Parameters**:
104
+ - `filePath` (str): Path to the file to be deleted.
105
+ - `v` (bool, optional): If `True`, prints a confirmation message. Defaults to `False`.
106
+
107
+ - **Returns**:
108
+ - `bool`: `True` if deletion was successful; `False` otherwise.
109
+
110
+ ### `createGrid(shapefile_path: str, resolution: float, useDegree: bool = True) -> tuple`
111
+ Generates a grid of polygons from a shapefile at a given resolution.
112
+
113
+ - **Parameters**:
114
+ - `shapefile_path` (str): Path to the shapefile.
115
+ - `resolution` (float): Resolution of the grid.
116
+ - `useDegree` (bool, optional): If `True`, coordinates are in degrees. Defaults to `True`.
117
+
118
+ - **Returns**:
119
+ - `tuple`: Contains grid coordinates and metadata.
120
+
121
+ ### `clipRasterByExtent(inFile: str, outFile: str, bounds: tuple) -> str`
122
+ Clips a raster to specified bounding box coordinates.
123
+
124
+ - **Parameters**:
125
+ - `inFile` (str): Path to the input raster file.
126
+ - `outFile` (str): Path to the output clipped raster file.
127
+ - `bounds` (tuple): Bounding box as `(minx, miny, maxx, maxy)`.
128
+
129
+ - **Returns**:
130
+ - `str`: Path to the clipped raster file.
131
+
132
+ ### `netcdfVariablesList(ncFile: str) -> list`
133
+ Lists all variables in a NetCDF file.
134
+
135
+ - **Parameters**:
136
+ - `ncFile` (str): Path to the NetCDF file.
137
+
138
+ - **Returns**:
139
+ - `list`: A list of variable names in the file.
140
+
141
+ ### ... And More ...
142
+
143
+ ## Contributing
144
+
145
+ Contributions are welcome! Please fork the repository, make your changes, and submit a pull request.
146
+
147
+ ## License
148
+ This project is licensed under the MIT License.
@@ -0,0 +1,11 @@
1
+ ccfx/__init__.py,sha256=VmBeF3oj6JTJ_793d4i8PvhyF8_FxaxA1L_FmHWqitc,142
2
+ ccfx/ccfx.py,sha256=A61uV0bKlusLAcLlgNVhKaMQ4KutWxB7hzEQnitJpus,51808
3
+ ccfx/excel.py,sha256=cQ4TQW49XqbMB3sSS0IOhO3-WArIolEBIrvOvhFyPtI,4757
4
+ ccfx/mssqlConnection.py,sha256=TwyZXhHHI7zy6BSfH1pszuHVJ5cmndRC5dVxvEtSTks,7904
5
+ ccfx/sqliteConnection.py,sha256=jEJ94D5ySt84N7AeDpa27Rclt1NaKhkX6nYzidwApIg,11104
6
+ ccfx/word.py,sha256=AGa64jX5Zl5qotZh5L0QmrsjTnktIBhmj_ByRKZ88vw,3061
7
+ ccfx-0.5.0.dist-info/licenses/LICENSE,sha256=2-M3fBUS3FmrSIrqd3cZDmxXxojWVJtZY-SHSRE6RxM,1098
8
+ ccfx-0.5.0.dist-info/METADATA,sha256=6tpyj3FwWBdkm8BCvU26J9rZFsP_mCIoZUxTb71GYRA,5381
9
+ ccfx-0.5.0.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
10
+ ccfx-0.5.0.dist-info/top_level.txt,sha256=_cSvSA1WX2K8TgoV3iBJUdUZZqMKJbOPLNnKLYSLHaw,5
11
+ ccfx-0.5.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.3.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,11 +0,0 @@
1
- ccfx/__init__.py,sha256=VmBeF3oj6JTJ_793d4i8PvhyF8_FxaxA1L_FmHWqitc,142
2
- ccfx/ccfx.py,sha256=lB6hTCywvbMpHH6lMgw0UJuoiU-7vA22yvttvKFG5Gc,28450
3
- ccfx/excel.py,sha256=cQ4TQW49XqbMB3sSS0IOhO3-WArIolEBIrvOvhFyPtI,4757
4
- ccfx/mssqlConnection.py,sha256=TwyZXhHHI7zy6BSfH1pszuHVJ5cmndRC5dVxvEtSTks,7904
5
- ccfx/sqliteConnection.py,sha256=jEJ94D5ySt84N7AeDpa27Rclt1NaKhkX6nYzidwApIg,11104
6
- ccfx/word.py,sha256=AGa64jX5Zl5qotZh5L0QmrsjTnktIBhmj_ByRKZ88vw,3061
7
- ccfx-0.3.0.dist-info/LICENSE,sha256=2-M3fBUS3FmrSIrqd3cZDmxXxojWVJtZY-SHSRE6RxM,1098
8
- ccfx-0.3.0.dist-info/METADATA,sha256=9C2wcPC02QWli3N8GPc0_80K4LZ-YI2ZpNPlMadHGjw,5419
9
- ccfx-0.3.0.dist-info/WHEEL,sha256=P9jw-gEje8ByB7_hXoICnHtVCrEwMQh-630tKvQWehc,91
10
- ccfx-0.3.0.dist-info/top_level.txt,sha256=_cSvSA1WX2K8TgoV3iBJUdUZZqMKJbOPLNnKLYSLHaw,5
11
- ccfx-0.3.0.dist-info/RECORD,,