ccfx 0.9.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ccfx
3
- Version: 0.9.0
3
+ Version: 1.0.2
4
4
  Summary: This package simplifies regular common actions for quick prototyping in a user friendly way
5
5
  Author-email: Celray James CHAWANDA <celray@chawanda.com>
6
6
  License-Expression: MIT
@@ -14,7 +14,7 @@ import os, sys
14
14
  import glob
15
15
  import warnings
16
16
  from netCDF4 import Dataset
17
- from osgeo import gdal, osr
17
+ from osgeo import gdal, ogr, osr
18
18
  import numpy
19
19
  from genericpath import exists
20
20
  import shutil
@@ -23,7 +23,7 @@ import pickle
23
23
  import time
24
24
  from shapely.geometry import box, Point
25
25
  import geopandas, pandas
26
- from osgeo import gdal, ogr, osr
26
+ from collections import defaultdict
27
27
  import py7zr
28
28
  import subprocess
29
29
  import multiprocessing
@@ -35,9 +35,10 @@ import requests
35
35
  from tqdm import tqdm
36
36
  import yt_dlp
37
37
  from typing import Optional
38
+ from datetime import datetime, timedelta
38
39
 
39
40
  # functions
40
- def listFiles(path: str, ext: str = None) -> list:
41
+ def listFiles(path: str, ext: Optional[str] = None) -> list:
41
42
  '''
42
43
  List all files in a directory with a specific extension
43
44
  path: directory
@@ -133,7 +134,7 @@ def guessMimeType(imagePath):
133
134
  return 'image/png'
134
135
 
135
136
 
136
- def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFileName: Optional[str] = None ) -> str:
137
+ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, cookiesFile: Optional[str] = None, dstFileName: Optional[str] = None ) -> str:
137
138
  """
138
139
  Download from YouTube via yt-dlp.
139
140
 
@@ -154,6 +155,9 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
154
155
 
155
156
  opts = {"outtmpl": template}
156
157
 
158
+ if cookiesFile:
159
+ opts["cookiefile"] = cookiesFile
160
+
157
161
  if audioOnly:
158
162
  opts.update({
159
163
  "format": "bestaudio/best",
@@ -163,6 +167,7 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
163
167
  "preferredquality": "192",
164
168
  }],
165
169
  })
170
+
166
171
  else:
167
172
  # prefer a single MP4 file (progressive), fallback to any best if none
168
173
  opts["format"] = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
@@ -181,6 +186,158 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
181
186
  return os.path.join(dstDir, final)
182
187
 
183
188
 
189
+ def parseYoutubePlaylist(playlistUrl: str) -> list[str]:
190
+ """
191
+ Return a list of full video URLs contained in a YouTube playlist.
192
+
193
+ Args:
194
+ playlistUrl: Full URL of the playlist (the one with &list=… or /playlist?list=…).
195
+
196
+ Returns:
197
+ List of video URLs in the order reported by YouTube.
198
+ """
199
+ opts = {
200
+ "quiet": True,
201
+ "extract_flat": "in_playlist", # don’t recurse into each video
202
+ }
203
+
204
+ with yt_dlp.YoutubeDL(opts) as ytdl:
205
+ info = ytdl.extract_info(playlistUrl, download=False)
206
+
207
+ entries = info.get("entries", [])
208
+ return [f"https://www.youtube.com/watch?v={e['id']}" for e in entries if e.get("id")]
209
+
210
+
211
+ def parseYoutubeChannelVideos(channelUrl: str, maxItems: Optional[int] = None) -> list[str]:
212
+ """
213
+ Return a list of video URLs published on a channel.
214
+
215
+ Args:
216
+ channelUrl: Any canonical channel URL, e.g.
217
+ - https://www.youtube.com/@LinusTechTips
218
+ - https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw
219
+ - https://www.youtube.com/c/NASA/videos
220
+ maxItems: Optional hard limit. If None, returns every video the API exposes.
221
+
222
+ Returns:
223
+ List of video URLs, newest-first (YouTube’s default order).
224
+ """
225
+ opts = {
226
+ "quiet": True,
227
+ "extract_flat": True, # treat the channel as one big “playlist”
228
+ "skip_download": True,
229
+ }
230
+
231
+ with yt_dlp.YoutubeDL(opts) as ytdl:
232
+ info = ytdl.extract_info(channelUrl, download=False)
233
+
234
+ entries = info.get("entries", [])
235
+ if maxItems is not None:
236
+ entries = entries[:maxItems]
237
+
238
+ return [f"https://www.youtube.com/watch?v={e['id']}" for e in entries if e.get("id")]
239
+
240
+
241
+
242
+ def runSWATPlus(txtinoutDir: str, finalDir: str, executablePath: str = "swatplus", v: bool = True):
243
+ os.chdir(txtinoutDir)
244
+
245
+ if not v:
246
+ # Run the SWAT+ but ignore output and errors
247
+ subprocess.run([executablePath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
248
+ else:
249
+
250
+ yrs_line = readFrom('time.sim')[2].strip().split()
251
+
252
+ yr_from = int(yrs_line[1])
253
+ yr_to = int(yrs_line[3])
254
+
255
+ delta = datetime(yr_to, 12, 31) - datetime(yr_from, 1, 1)
256
+
257
+ CREATE_NO_WINDOW = 0x08000000
258
+
259
+ if platform.system() == "Windows":
260
+ process = subprocess.Popen(executablePath, stdout=subprocess.PIPE, creationflags=CREATE_NO_WINDOW )
261
+ else:
262
+ process = subprocess.Popen(executablePath, stdout=subprocess.PIPE)
263
+
264
+ current = 0
265
+ number_of_days = delta.days + 1
266
+
267
+ day_cycle = []
268
+ previous_time = None
269
+
270
+ while True:
271
+ line = process.stdout.readline()
272
+ line_parts = str(line).strip().split()
273
+ if not "Simulation" in line_parts: pass
274
+ elif 'Simulation' in line_parts:
275
+ ref_index = str(line).strip().split().index("Simulation")
276
+ year = line_parts[ref_index + 3]
277
+ month = line_parts[ref_index + 1]
278
+ day = line_parts[ref_index + 2]
279
+
280
+
281
+ month = f"0{month}" if int(month) < 10 else month
282
+ day = f"0{day}" if int(day) < 10 else day
283
+
284
+ current += 1
285
+
286
+ if not previous_time is None:
287
+ day_cycle.append(datetime.now() - previous_time)
288
+
289
+ if len(day_cycle) > 40:
290
+ if len(day_cycle) > (7 * 365.25):
291
+ del day_cycle[0]
292
+
293
+ av_cycle_time = sum(day_cycle, timedelta()) / len(day_cycle)
294
+ eta = av_cycle_time * (number_of_days - current)
295
+
296
+ eta_str = f" ETA - {formatTimedelta(eta)}:"
297
+
298
+
299
+ else:
300
+ eta_str = ''
301
+
302
+ showProgress(current, number_of_days, bar_length=20, message= f' >> current date: {day}/{month}/{year} - f{yr_to} {eta_str}')
303
+
304
+ previous_time = datetime.now()
305
+ elif "ntdll.dll" in line_parts:
306
+ print("\n! there was an error running SWAT+\n")
307
+ if counter < 10:
308
+ counter += 1
309
+ continue
310
+
311
+ if len(line_parts) < 2: break
312
+
313
+ showProgress(current, number_of_days, string_after= f' ')
314
+ print("\n")
315
+
316
+ os.chdir(finalDir)
317
+
318
+
319
+
320
+ def formatTimedelta(delta: timedelta) -> str:
321
+ """Formats a timedelta duration to [N days] %H:%M:%S format"""
322
+ seconds = int(delta.total_seconds())
323
+
324
+ secs_in_a_day = 86400
325
+ secs_in_a_hour = 3600
326
+ secs_in_a_min = 60
327
+
328
+ days, seconds = divmod(seconds, secs_in_a_day)
329
+ hours, seconds = divmod(seconds, secs_in_a_hour)
330
+ minutes, seconds = divmod(seconds, secs_in_a_min)
331
+
332
+ time_fmt = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
333
+
334
+ if days > 0:
335
+ suffix = "s" if days > 1 else ""
336
+ return f"{days} day{suffix} {time_fmt}"
337
+ else:
338
+ return f"{time_fmt}"
339
+
340
+
184
341
  def setMp3Metadata(fn, metadata, imagePath=None):
185
342
  '''
186
343
  This function takes a path to an mp3 and a metadata dictionary,
@@ -276,6 +433,38 @@ def deleteFile(filePath:str, v:bool = False) -> bool:
276
433
 
277
434
  return deleted
278
435
 
436
+
437
+ def alert(message:str, server:str = "http://ntfy.sh", topic:str = "pythonAlerts", attachment:Optional[str] = None, messageTitle:str = "info", priority:int = None, tags:list = [], printIt:bool = True, v:bool = False) -> bool:
438
+ '''
439
+ This sends an alert to a given server in case you want to be notified of something
440
+ message : the message to send
441
+ server : the server to send the message to (default is http://ntfy.sh)
442
+ topic : the topic to send the message to (default is pythonAlerts)
443
+ attachment : a file to attach to the message (optional)
444
+ messageTitle : the title of the message (optional, default is info)
445
+ priority : the priority of the message (optional, default is None)
446
+ tags : a list of tags to add to the message (optional, default is empty list)
447
+ printIt : whether to print the message to the console (default is True)
448
+ v : verbose (default is False, set to True to print debug info)
449
+
450
+ return: True if the alert was sent successfully, False otherwise
451
+ '''
452
+ print(message) if printIt else None; header_data = {}
453
+ if not messageTitle is None: header_data["Title"] = messageTitle
454
+ if not priority is None: header_data["Priority"] = priority
455
+ if not len(tags) == 0: header_data["Tags"] = ",".join(tags)
456
+
457
+ try:
458
+ if v: print(f"sending alert to {server}/{topic}")
459
+ if not attachment is None:
460
+ header_data["Filename"] = getFileBaseName(attachment)
461
+ requests.put( f"{server}/{topic}", data=open(attachment, 'rb'), headers=header_data )
462
+ try: requests.post(f"{server}/{topic}",data=message, headers=header_data )
463
+ except: return False
464
+ except: return False
465
+ return True
466
+
467
+
279
468
  def deletePath(path:str, v:bool = False) -> bool:
280
469
  '''
281
470
  Delete a directory
@@ -309,6 +498,38 @@ def downloadChunk(url, start, end, path):
309
498
  if chunk:
310
499
  f.write(chunk)
311
500
 
501
+
502
+ def formatStringBlock(input_str, max_chars=70):
503
+ '''
504
+ This function takes a string and formats it into a block of text
505
+ with a maximum number of characters per line.
506
+
507
+ input_str: the string to format
508
+ max_chars: the maximum number of characters per line (default is 70)
509
+
510
+ '''
511
+ words = input_str.split(' ')
512
+ lines = []
513
+ current_line = ""
514
+
515
+ for word in words:
516
+ # If adding the next word to the current line would exceed the max_chars limit
517
+ if len(current_line) + len(word) > max_chars:
518
+ # Append current line to lines and start a new one
519
+ lines.append(current_line.strip())
520
+ current_line = word
521
+ else:
522
+ # Add the word to the current line
523
+ current_line += " " + word
524
+
525
+ # Append any remaining words
526
+ lines.append(current_line.strip())
527
+
528
+ return '\n'.join(lines)
529
+
530
+
531
+
532
+
312
533
  def downloadFile(url, save_path, exists_action='resume', num_connections=5, v=False):
313
534
  if v:
314
535
  print(f"\ndownloading {url}")
@@ -643,21 +864,45 @@ def renameNetCDFvariable(input_file: str, output_file: str, old_var_name: str, n
643
864
  except subprocess.CalledProcessError as e:
644
865
  print(f"Error: {e.stderr}")
645
866
 
646
- def compressTo7z(input_dir: str, output_file: str):
867
+
868
+ def compressTo7z(input_dir: str, output_file: str, compressionLevel: int = 4, excludeExt: list = None, v: bool = False) -> None:
647
869
  """
648
870
  Compresses the contents of a directory to a .7z archive with maximum compression.
649
871
 
650
872
  :param input_dir: Path to the directory to compress
651
873
  :param output_file: Output .7z file path
874
+ :param compressionLevel: Compression level (0-9), default is 4 (maximum compression)
875
+ :param excludeExt: List of file extensions to exclude from compression
652
876
  """
877
+ if excludeExt is None:
878
+ excludeExt = []
879
+
653
880
  # Create the .7z archive with LZMA2 compression
654
- with py7zr.SevenZipFile(output_file, 'w', filters=[{'id': py7zr.FILTER_LZMA2, 'preset': 9}]) as archive:
881
+ with py7zr.SevenZipFile(output_file, 'w', filters=[{'id': py7zr.FILTER_LZMA2, 'preset': compressionLevel}]) as archive:
655
882
  # Add each item in the input directory, avoiding the top-level folder in the archive
656
883
  for root, _, files in os.walk(input_dir):
657
884
  for file in files:
658
885
  file_path = os.path.join(root, file)
886
+
887
+ # Skip excluded file extensions
888
+ if any(file.endswith(ext) for ext in excludeExt):
889
+ continue
659
890
  # Add file to the archive with a relative path to avoid including the 'tmp' folder itself
660
891
  archive.write(file_path, arcname=os.path.relpath(file_path, start=input_dir))
892
+ if v:
893
+ print(f"compressed {input_dir} to {output_file} with compression level {compressionLevel}.")
894
+
895
+
896
+ def uncompress(inputFile: str, outputDir: str, v: bool = False) -> None:
897
+ """
898
+ Extracts an archive supported by py7zr (.7z, .zip, .tar, .tar.gz, .tar.bz2, .xz, .tar.xz) to outputDir.
899
+ inputFile: Path to the input archive file
900
+ outputDir: Directory where the contents will be extracted
901
+ v: Verbose flag to print extraction status (default is False)
902
+ """
903
+ if not exists(outputDir): createPath(outputDir)
904
+ with py7zr.SevenZipFile(inputFile, 'r') as archive: archive.extractall(path=outputDir)
905
+ if v: print(f"extracted {inputFile} to {outputDir}.")
661
906
 
662
907
 
663
908
  def moveDirectory(srcDir:str, destDir:str, v:bool = False) -> bool:
@@ -744,6 +989,21 @@ def clipRasterByExtent(inFile: str, outFile: str, bounds: tuple) -> str:
744
989
  ds = None
745
990
  return outFile
746
991
 
992
+
993
+ def clipRasterByVector(inFile: str, outFile: str, vectorFile: str) -> str:
994
+ '''
995
+ Clips a raster using GDAL warp with a vector file
996
+ inFile: input raster path
997
+ outFile: output path
998
+ vectorFile: vector file path (e.g., shapefile or GeoJSON)
999
+ return: output path
1000
+ '''
1001
+ ds = gdal.Open(inFile)
1002
+ gdal.Warp(outFile, ds, cutlineDSName=vectorFile, cropToCutline=True)
1003
+ ds = None
1004
+ return outFile
1005
+
1006
+
747
1007
  def clipVectorByExtent(inFile: str, outFile: str, bounds: tuple) -> str:
748
1008
  '''
749
1009
  Clips a vector using GeoPandas
@@ -878,61 +1138,81 @@ def ignoreWarnings(ignore:bool = True, v:bool = False) -> None:
878
1138
  return None
879
1139
 
880
1140
 
881
- def createGrid(shapefile_path: str, resolution: float, useDegree: bool=True) -> tuple:
1141
+ def createGrid(topLeft: list = None, bottomRight: list = None, resolution: float = None,
1142
+ inputShape: str = None, crs: str = "EPSG:4326", saveVector: str = None) -> geopandas.GeoDataFrame:
882
1143
  '''
883
- This function creates a grid of polygons based on a shapefile
884
- shapefile_path: path to the shapefile
885
- resolution: resolution of the grid
886
- useDegree: use degree (default is True)
887
-
888
- return: xx, yy, polygons, within_mask, gdf.crs, minx, miny
889
- '''
890
- # Read the shapefile
891
- gdf = geopandas.read_file(shapefile_path)
892
-
893
- if useDegree:
894
- gdf = gdf.to_crs(epsg=4326)
1144
+ This function creates a grid of polygons based on either a shapefile or corner coordinates
895
1145
 
896
- # Get the bounds of the shapefile
897
- minx, miny, maxx, maxy = gdf.total_bounds
1146
+ Parameters:
1147
+ topLeft: list [lon, lat] - top left corner coordinates
1148
+ bottomRight: list [lon, lat] - bottom right corner coordinates
1149
+ resolution: float - resolution of the grid
1150
+ inputShape: str - path to the shapefile (optional, if provided bounds will be taken from here)
1151
+ crs: str - coordinate reference system (default is "EPSG:4326")
1152
+ saveVector: str - path to save the generated grid (optional)
1153
+
1154
+ Returns:
1155
+ geopandas.GeoDataFrame - the generated grid
1156
+ '''
1157
+ # Input validation
1158
+ if inputShape is None and (topLeft is None or bottomRight is None or resolution is None):
1159
+ raise ValueError("Either provide inputShape OR provide topLeft, bottomRight, and resolution")
1160
+
1161
+ if inputShape is not None and resolution is None:
1162
+ raise ValueError("Resolution must be provided")
1163
+
1164
+ # Get bounds from shapefile or coordinates
1165
+ if inputShape is not None:
1166
+ # Read the shapefile and get bounds
1167
+ gdf = geopandas.read_file(inputShape)
1168
+ gdf = gdf.to_crs(crs)
1169
+ minx, miny, maxx, maxy = gdf.total_bounds
1170
+ reference_geometry = gdf.unary_union
1171
+ else:
1172
+ # Use provided corner coordinates [lon, lat]
1173
+ # Extract coordinates and determine actual bounds
1174
+ lon1, lat1 = topLeft[0], topLeft[1]
1175
+ lon2, lat2 = bottomRight[0], bottomRight[1]
1176
+
1177
+ # Determine actual min/max values
1178
+ minx = min(lon1, lon2)
1179
+ maxx = max(lon1, lon2)
1180
+ miny = min(lat1, lat2)
1181
+ maxy = max(lat1, lat2)
1182
+ reference_geometry = None
898
1183
 
899
1184
  # Create a grid based on the bounds and resolution
900
1185
  x = numpy.arange(minx, maxx, resolution)
901
1186
  y = numpy.arange(miny, maxy, resolution)
902
- xx, yy = numpy.meshgrid(x, y)
903
-
904
- # Create polygons for each grid cell, arranged in 2D array
905
- grid_shape = xx.shape
906
- polygons = numpy.empty(grid_shape, dtype=object)
907
- for i in range(grid_shape[0]):
908
- for j in range(grid_shape[1]):
909
- x0, y0 = xx[i, j], yy[i, j]
910
- x1, y1 = x0 + resolution, y0 + resolution
911
- polygons[i, j] = box(x0, y0, x1, y1)
912
1187
 
913
- # Flatten the polygons for GeoDataFrame creation
914
- flat_polygons = polygons.ravel()
1188
+ # Create polygons for each grid cell
1189
+ polygons = []
1190
+ for i in range(len(y)):
1191
+ for j in range(len(x)):
1192
+ x0, y0 = x[j], y[i]
1193
+ x1, y1 = x0 + resolution, y0 + resolution
1194
+ # Ensure we don't exceed the bounds
1195
+ x1 = min(x1, maxx)
1196
+ y1 = min(y1, maxy)
1197
+ polygons.append(box(x0, y0, x1, y1))
915
1198
 
916
1199
  # Create a GeoDataFrame from the grid
917
- grid_gdf = geopandas.GeoDataFrame({'geometry': flat_polygons}, crs=gdf.crs)
918
-
919
- minx, miny, maxx, maxy = grid_gdf.total_bounds
920
- print(" minx:", minx, "miny:", miny, "maxx:", maxx, "maxy:", maxy)
921
-
922
- minx, miny, maxx, maxy = getVectorBounds(grid_gdf)
1200
+ grid_gdf = geopandas.GeoDataFrame({'geometry': polygons}, crs=crs)
1201
+
923
1202
  # Add a column to indicate if the cell intersects with the original shapefile
924
- grid_gdf['within'] = grid_gdf.intersects(gdf.unary_union)
1203
+ if reference_geometry is not None:
1204
+ grid_gdf['within'] = grid_gdf.intersects(reference_geometry)
1205
+ else:
1206
+ # For coordinate-based grids, set all cells as within
1207
+ grid_gdf['within'] = True
925
1208
 
926
- # Reshape the 'within' mask to grid shape
927
- within_mask = grid_gdf['within'].values.reshape(grid_shape)
1209
+ # Save the grid if path is provided
1210
+ if saveVector is not None:
1211
+ grid_gdf.to_file(saveVector, driver="GPKG")
1212
+ print(f"Grid saved to {saveVector}")
928
1213
 
929
- # Save the grid
930
- reprojectedGrid = grid_gdf.to_crs(epsg=4326)
1214
+ return grid_gdf
931
1215
 
932
- grid_gdf.to_file("generatedGrid4326.gpkg", driver="GPKG")
933
- reprojectedGrid.to_file("generatedGrid.gpkg", driver="GPKG")
934
-
935
- return xx, yy, polygons, within_mask, gdf.crs, minx, miny
936
1216
 
937
1217
  def setHomeDir(path:str) -> str:
938
1218
  '''
@@ -989,7 +1269,7 @@ def netcdfVariableDimensions(ncFile: str, variable: str) -> dict:
989
1269
 
990
1270
  return bands_info
991
1271
 
992
- def netcdfExportTif(ncFile: str, variable: str, outputFile: str = None, band: int = None, v:bool = True) -> gdal.Dataset:
1272
+ def netcdfExportTif(ncFile: str, variable: str, outputFile: Optional[str] = None, band: int = None, v:bool = True) -> gdal.Dataset:
993
1273
  '''
994
1274
  Export a variable from a NetCDF file to a GeoTiff file
995
1275
  ncFile: NetCDF file
@@ -1232,7 +1512,7 @@ def showProgress(count: int, end: int, message: str, barLength: int = 100) -> No
1232
1512
  message: message to display
1233
1513
  barLength: length of the progress bar
1234
1514
  '''
1235
- percent = int(count / end * 100)
1515
+ percent = float(count / end * 100)
1236
1516
  percentStr = f'{percent:03.1f}'
1237
1517
  filled = int(barLength * count / end)
1238
1518
  bar = '█' * filled + '░' * (barLength - filled)
@@ -1310,7 +1590,7 @@ def createPointGeometry(coords: list, proj: str = "EPSG:4326") -> geopandas.GeoD
1310
1590
  gdf.reset_index(inplace=True)
1311
1591
  return gdf
1312
1592
 
1313
- def calculateTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
1593
+ def calculateTimeseriesStats(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> dict:
1314
1594
  '''
1315
1595
  Calculate statistics for a timeseries
1316
1596
 
@@ -1454,7 +1734,7 @@ def calculateTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulat
1454
1734
  }
1455
1735
 
1456
1736
 
1457
- def getNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1737
+ def getNSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1458
1738
  '''
1459
1739
  this function is a wrapper for calculateTimeseriesStats specifically to return the NSE
1460
1740
 
@@ -1475,7 +1755,7 @@ def getNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
1475
1755
 
1476
1756
  return stats['NSE']
1477
1757
 
1478
- def getKGE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1758
+ def getKGE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1479
1759
  '''
1480
1760
  this function is a wrapper for calculateTimeseriesStats specifically to return the KGE
1481
1761
 
@@ -1496,7 +1776,7 @@ def getKGE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
1496
1776
 
1497
1777
  return stats['KGE']
1498
1778
 
1499
- def getPBIAS(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1779
+ def getPBIAS(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1500
1780
  '''
1501
1781
  this function is a wrapper for calculateTimeseriesStats specifically to return the PBIAS
1502
1782
 
@@ -1518,7 +1798,7 @@ def getPBIAS(data:pandas.DataFrame, observed:str = None, simulated:str = None, r
1518
1798
  return stats['PBIAS']
1519
1799
 
1520
1800
 
1521
- def getLNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1801
+ def getLNSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1522
1802
  '''
1523
1803
  this function is a wrapper for calculateTimeseriesStats specifically to return the LNSE
1524
1804
 
@@ -1539,7 +1819,7 @@ def getLNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, re
1539
1819
 
1540
1820
  return stats['LNSE']
1541
1821
 
1542
- def getR2(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1822
+ def getR2(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1543
1823
  '''
1544
1824
  this function is a wrapper for calculateTimeseriesStats specifically to return the R2
1545
1825
 
@@ -1560,7 +1840,7 @@ def getR2(data:pandas.DataFrame, observed:str = None, simulated:str = None, resa
1560
1840
 
1561
1841
  return stats['R2']
1562
1842
 
1563
- def getRMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1843
+ def getRMSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1564
1844
  '''
1565
1845
  this function is a wrapper for calculateTimeseriesStats specifically to return the RMSE
1566
1846
 
@@ -1581,7 +1861,7 @@ def getRMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, re
1581
1861
 
1582
1862
  return stats['RMSE']
1583
1863
 
1584
- def getMAE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1864
+ def getMAE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1585
1865
  '''
1586
1866
  this function is a wrapper for calculateTimeseriesStats specifically to return the MAE
1587
1867
 
@@ -1602,7 +1882,7 @@ def getMAE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
1602
1882
 
1603
1883
  return stats['MAE']
1604
1884
 
1605
- def getMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
1885
+ def getMSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
1606
1886
  '''
1607
1887
  this function is a wrapper for calculateTimeseriesStats specifically to return the MSE
1608
1888
 
@@ -1623,7 +1903,7 @@ def getMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
1623
1903
 
1624
1904
  return stats['MSE']
1625
1905
 
1626
- def getTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
1906
+ def getTimeseriesStats(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> dict:
1627
1907
  '''
1628
1908
  this function is a wrapper for calculateTimeseriesStats specifically to return all stats
1629
1909
 
@@ -1644,4 +1924,107 @@ def getTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str
1644
1924
 
1645
1925
  return stats
1646
1926
 
1927
+ def readSWATPlusOutputs(filePath: str, column: Optional[str] = None, unit: Optional[int] = None, gis_id: Optional[int] = None, name: Optional[str] = None):
1928
+ '''
1929
+ Read SWAT+ output files and return a pandas DataFrame with proper date handling
1930
+ and optional filtering capabilities.
1931
+
1932
+ Parameters:
1933
+ -----------
1934
+ filePath: str
1935
+ Path to the SWAT+ output file
1936
+ column: str, optional
1937
+ Name of the column to extract. If not specified, returns all columns.
1938
+ If specified, returns first match, or specify multiple columns as comma-separated string
1939
+ unit: int, optional
1940
+ Filter by unit number. If not specified, returns all units
1941
+ gis_id: int, optional
1942
+ Filter by gis_id. If not specified, returns all gis_ids
1943
+ name: str, optional
1944
+ Filter by name. If not specified, returns all names
1945
+
1946
+ Returns:
1947
+ --------
1948
+ pandas.DataFrame or None
1949
+ DataFrame with date column and requested data, filtered as specified
1950
+ '''
1951
+
1952
+ if not exists(filePath):
1953
+ print('! SWAT+ result file does not exist')
1954
+ return None
1955
+
1956
+ # Read the header line (line 2, index 1)
1957
+ with open(filePath, 'r') as f:
1958
+ lines = f.readlines()
1959
+
1960
+ header_line = lines[1].strip()
1961
+ headers = header_line.split()
1962
+
1963
+ # Handle duplicate column names
1964
+ column_counts = defaultdict(int)
1965
+ modified_header = []
1966
+ for col_name in headers:
1967
+ column_counts[col_name] += 1
1968
+ if column_counts[col_name] > 1:
1969
+ modified_header.append(f"{col_name}_{column_counts[col_name]}")
1970
+ else:
1971
+ modified_header.append(col_name)
1972
+
1973
+ # Add extra columns to handle potential mismatches
1974
+ modified_header = modified_header + ['extra1', 'extra2']
1975
+
1976
+ try:
1977
+ df = pandas.read_csv(filePath, delim_whitespace=True, skiprows=3, names=modified_header, index_col=False)
1978
+ except:
1979
+ sys.stdout.write(f'\r! could not read {filePath} using pandas, check the number of columns\n')
1980
+ sys.stdout.flush()
1981
+ return None
1982
+
1983
+ # Remove extra columns
1984
+ df = df.drop(columns=['extra1', 'extra2'], errors='ignore')
1985
+
1986
+ # Convert all columns to numeric except 'name' (which is string)
1987
+ for col in df.columns:
1988
+ if col != 'name':
1989
+ df[col] = pandas.to_numeric(df[col], errors='coerce')
1990
+
1991
+ # Create date column from yr, mon, day
1992
+ try:
1993
+ df['date'] = pandas.to_datetime(pandas.DataFrame({'year': df.yr, 'month': df.mon, 'day': df.day}))
1994
+ except KeyError:
1995
+ # If some date columns are missing, create a simple index-based date
1996
+ df['date'] = pandas.date_range(start='2000-01-01', periods=len(df), freq='D')
1997
+ except:
1998
+ # If date creation fails for any other reason, use index-based date
1999
+ df['date'] = pandas.date_range(start='2000-01-01', periods=len(df), freq='D')
2000
+
2001
+ # Filter by unit if specified
2002
+ if unit is not None and 'unit' in df.columns:
2003
+ df = df[df['unit'] == unit]
2004
+
2005
+ # Filter by gis_id if specified
2006
+ if gis_id is not None and 'gis_id' in df.columns:
2007
+ df = df[df['gis_id'] == gis_id]
2008
+
2009
+ # Filter by name if specified
2010
+ if name is not None and 'name' in df.columns:
2011
+ df = df[df['name'] == name]
2012
+
2013
+ # Handle column selection
2014
+ if column is not None and column != "*":
2015
+ # Parse comma-separated columns
2016
+ requested_cols = [col.strip() for col in column.split(',')]
2017
+
2018
+ # Always include date column
2019
+ selected_cols = ['date']
2020
+
2021
+ # Add requested columns if they exist
2022
+ for req_col in requested_cols:
2023
+ if req_col in df.columns:
2024
+ selected_cols.append(req_col)
2025
+
2026
+ df = df[selected_cols]
2027
+
2028
+ return df
2029
+
1647
2030
  ignoreWarnings()
@@ -29,7 +29,7 @@ class excel:
29
29
  self.date_format = None
30
30
 
31
31
  def create(self):
32
- self.create_path(os.path.dirname(self.path))
32
+ self.createPath(os.path.dirname(self.path))
33
33
  self.book = xlsxwriter.Workbook(self.path)
34
34
 
35
35
  def addSheet(self, sheet_name):
@@ -42,7 +42,7 @@ class excel:
42
42
 
43
43
  def writeDate(self, sheet_name, row, column, datetime_obj):
44
44
  if self.date_format is None:
45
- self.set_date_format()
45
+ self.setDateFormat()
46
46
 
47
47
  self.sheet_names[sheet_name].write_datetime(
48
48
  row, column, datetime_obj, self.date_format)
@@ -21,7 +21,7 @@ import geopandas
21
21
 
22
22
 
23
23
  # classes
24
- class mssql_connection:
24
+ class mssqlConnection:
25
25
  def __init__(self, server, username, password, driver, trust_server_ssl = True) -> None:
26
26
  self.server = server
27
27
  self.username = username
@@ -85,20 +85,49 @@ class mssql_connection:
85
85
  self.cursor = self.connection.cursor()
86
86
  self.cursor.execute(query)
87
87
  tables = [row[0] for row in self.cursor.fetchall()]
88
- print("> list of tables in the active database:")
89
- for table in tables:
90
- print(f"\t- {table}")
88
+ # print("> list of tables in the active database:")
89
+ # for table in tables:
90
+ # print(f"\t- {table}")
91
91
  except pyodbc.Error as e:
92
92
  print("Error occurred while fetching the list of tables:")
93
93
  print(e)
94
94
 
95
95
  return tables
96
96
 
97
+ def listColumns(self, tableName: str, dbName: str | None = None) -> list[str]:
98
+ if dbName:
99
+ self.connect_db(dbName)
100
+
101
+ schema, tbl = ('dbo', tableName) if '.' not in tableName else tableName.split('.', 1)
102
+
103
+ sql = """
104
+ SELECT column_name
105
+ FROM information_schema.columns
106
+ WHERE table_schema = ? AND table_name = ?
107
+ ORDER BY ordinal_position
108
+ """
109
+
110
+ try:
111
+ with self.connection.cursor() as cur:
112
+ cur.execute(sql, (schema, tbl))
113
+ return [row[0] for row in cur.fetchall()]
114
+ except pyodbc.Error as e:
115
+ print(f"Could not list columns for {tableName}: {e}")
116
+ return []
117
+
97
118
 
98
119
  def readTable(self, table_name:str, db_name:str = None, columns:list = None, geom_col:str = None, v = True):
99
120
  if db_name is not None:
100
121
  self.connect_db(db_name)
101
122
 
123
+ # ensure geometry column is not in columns if specified
124
+ if geom_col is not None:
125
+ if columns is None:
126
+ columns = self.listColumns(table_name, db_name)
127
+
128
+ columns = [col for col in columns if col != geom_col]
129
+
130
+
102
131
  if columns is not None and geom_col is not None:
103
132
  columns.append(f"{geom_col}.STAsText() as {geom_col}_wkt")
104
133
  query = f"SELECT {','.join(columns)} FROM {table_name}"
@@ -221,7 +250,9 @@ class mssql_connection:
221
250
  self.connection.close()
222
251
  self.connection = None
223
252
  self.cursor = None
224
- if v: print("> connection closed...")
253
+ if v: print("> connection closed...")
254
+ else:
255
+ if v: print("> no connection to close...")
225
256
 
226
257
  def disconnect(self, v = True):
227
258
  self.close(v = v)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ccfx
3
- Version: 0.9.0
3
+ Version: 1.0.2
4
4
  Summary: This package simplifies regular common actions for quick prototyping in a user friendly way
5
5
  Author-email: Celray James CHAWANDA <celray@chawanda.com>
6
6
  License-Expression: MIT
File without changes
File without changes
File without changes
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ccfx"
7
- version = "0.9.0"
7
+ version = "1.0.2"
8
8
  description = "This package simplifies regular common actions for quick prototyping in a user friendly way"
9
9
  readme = "README.md"
10
10
  license = "MIT"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes