ccfx 0.9.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ccfx-0.9.0/ccfx.egg-info → ccfx-1.0.2}/PKG-INFO +1 -1
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/ccfx.py +443 -60
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/excel.py +2 -2
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/mssqlConnection.py +36 -5
- {ccfx-0.9.0 → ccfx-1.0.2/ccfx.egg-info}/PKG-INFO +1 -1
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx.egg-info/SOURCES.txt +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx.egg-info/dependency_links.txt +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx.egg-info/requires.txt +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx.egg-info/top_level.txt +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/pyproject.toml +1 -1
- {ccfx-0.9.0 → ccfx-1.0.2}/setup.cfg +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/LICENSE +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/MANIFEST.in +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/README.md +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/__init__.py +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/sqliteConnection.py +0 -0
- {ccfx-0.9.0 → ccfx-1.0.2}/ccfx/word.py +0 -0
@@ -14,7 +14,7 @@ import os, sys
|
|
14
14
|
import glob
|
15
15
|
import warnings
|
16
16
|
from netCDF4 import Dataset
|
17
|
-
from osgeo import gdal, osr
|
17
|
+
from osgeo import gdal, ogr, osr
|
18
18
|
import numpy
|
19
19
|
from genericpath import exists
|
20
20
|
import shutil
|
@@ -23,7 +23,7 @@ import pickle
|
|
23
23
|
import time
|
24
24
|
from shapely.geometry import box, Point
|
25
25
|
import geopandas, pandas
|
26
|
-
from
|
26
|
+
from collections import defaultdict
|
27
27
|
import py7zr
|
28
28
|
import subprocess
|
29
29
|
import multiprocessing
|
@@ -35,9 +35,10 @@ import requests
|
|
35
35
|
from tqdm import tqdm
|
36
36
|
import yt_dlp
|
37
37
|
from typing import Optional
|
38
|
+
from datetime import datetime, timedelta
|
38
39
|
|
39
40
|
# functions
|
40
|
-
def listFiles(path: str, ext: str = None) -> list:
|
41
|
+
def listFiles(path: str, ext: Optional[str] = None) -> list:
|
41
42
|
'''
|
42
43
|
List all files in a directory with a specific extension
|
43
44
|
path: directory
|
@@ -133,7 +134,7 @@ def guessMimeType(imagePath):
|
|
133
134
|
return 'image/png'
|
134
135
|
|
135
136
|
|
136
|
-
def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFileName: Optional[str] = None ) -> str:
|
137
|
+
def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, cookiesFile: Optional[str] = None, dstFileName: Optional[str] = None ) -> str:
|
137
138
|
"""
|
138
139
|
Download from YouTube via yt-dlp.
|
139
140
|
|
@@ -154,6 +155,9 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
|
|
154
155
|
|
155
156
|
opts = {"outtmpl": template}
|
156
157
|
|
158
|
+
if cookiesFile:
|
159
|
+
opts["cookiefile"] = cookiesFile
|
160
|
+
|
157
161
|
if audioOnly:
|
158
162
|
opts.update({
|
159
163
|
"format": "bestaudio/best",
|
@@ -163,6 +167,7 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
|
|
163
167
|
"preferredquality": "192",
|
164
168
|
}],
|
165
169
|
})
|
170
|
+
|
166
171
|
else:
|
167
172
|
# prefer a single MP4 file (progressive), fallback to any best if none
|
168
173
|
opts["format"] = "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"
|
@@ -181,6 +186,158 @@ def downloadYoutubeVideo(url: str, dstDir: str, audioOnly: bool = False, dstFile
|
|
181
186
|
return os.path.join(dstDir, final)
|
182
187
|
|
183
188
|
|
189
|
+
def parseYoutubePlaylist(playlistUrl: str) -> list[str]:
|
190
|
+
"""
|
191
|
+
Return a list of full video URLs contained in a YouTube playlist.
|
192
|
+
|
193
|
+
Args:
|
194
|
+
playlistUrl: Full URL of the playlist (the one with &list=… or /playlist?list=…).
|
195
|
+
|
196
|
+
Returns:
|
197
|
+
List of video URLs in the order reported by YouTube.
|
198
|
+
"""
|
199
|
+
opts = {
|
200
|
+
"quiet": True,
|
201
|
+
"extract_flat": "in_playlist", # don’t recurse into each video
|
202
|
+
}
|
203
|
+
|
204
|
+
with yt_dlp.YoutubeDL(opts) as ytdl:
|
205
|
+
info = ytdl.extract_info(playlistUrl, download=False)
|
206
|
+
|
207
|
+
entries = info.get("entries", [])
|
208
|
+
return [f"https://www.youtube.com/watch?v={e['id']}" for e in entries if e.get("id")]
|
209
|
+
|
210
|
+
|
211
|
+
def parseYoutubeChannelVideos(channelUrl: str, maxItems: Optional[int] = None) -> list[str]:
|
212
|
+
"""
|
213
|
+
Return a list of video URLs published on a channel.
|
214
|
+
|
215
|
+
Args:
|
216
|
+
channelUrl: Any canonical channel URL, e.g.
|
217
|
+
- https://www.youtube.com/@LinusTechTips
|
218
|
+
- https://www.youtube.com/channel/UCXuqSBlHAE6Xw-yeJA0Tunw
|
219
|
+
- https://www.youtube.com/c/NASA/videos
|
220
|
+
maxItems: Optional hard limit. If None, returns every video the API exposes.
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
List of video URLs, newest-first (YouTube’s default order).
|
224
|
+
"""
|
225
|
+
opts = {
|
226
|
+
"quiet": True,
|
227
|
+
"extract_flat": True, # treat the channel as one big “playlist”
|
228
|
+
"skip_download": True,
|
229
|
+
}
|
230
|
+
|
231
|
+
with yt_dlp.YoutubeDL(opts) as ytdl:
|
232
|
+
info = ytdl.extract_info(channelUrl, download=False)
|
233
|
+
|
234
|
+
entries = info.get("entries", [])
|
235
|
+
if maxItems is not None:
|
236
|
+
entries = entries[:maxItems]
|
237
|
+
|
238
|
+
return [f"https://www.youtube.com/watch?v={e['id']}" for e in entries if e.get("id")]
|
239
|
+
|
240
|
+
|
241
|
+
|
242
|
+
def runSWATPlus(txtinoutDir: str, finalDir: str, executablePath: str = "swatplus", v: bool = True):
|
243
|
+
os.chdir(txtinoutDir)
|
244
|
+
|
245
|
+
if not v:
|
246
|
+
# Run the SWAT+ but ignore output and errors
|
247
|
+
subprocess.run([executablePath], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
248
|
+
else:
|
249
|
+
|
250
|
+
yrs_line = readFrom('time.sim')[2].strip().split()
|
251
|
+
|
252
|
+
yr_from = int(yrs_line[1])
|
253
|
+
yr_to = int(yrs_line[3])
|
254
|
+
|
255
|
+
delta = datetime(yr_to, 12, 31) - datetime(yr_from, 1, 1)
|
256
|
+
|
257
|
+
CREATE_NO_WINDOW = 0x08000000
|
258
|
+
|
259
|
+
if platform.system() == "Windows":
|
260
|
+
process = subprocess.Popen(executablePath, stdout=subprocess.PIPE, creationflags=CREATE_NO_WINDOW )
|
261
|
+
else:
|
262
|
+
process = subprocess.Popen(executablePath, stdout=subprocess.PIPE)
|
263
|
+
|
264
|
+
current = 0
|
265
|
+
number_of_days = delta.days + 1
|
266
|
+
|
267
|
+
day_cycle = []
|
268
|
+
previous_time = None
|
269
|
+
|
270
|
+
while True:
|
271
|
+
line = process.stdout.readline()
|
272
|
+
line_parts = str(line).strip().split()
|
273
|
+
if not "Simulation" in line_parts: pass
|
274
|
+
elif 'Simulation' in line_parts:
|
275
|
+
ref_index = str(line).strip().split().index("Simulation")
|
276
|
+
year = line_parts[ref_index + 3]
|
277
|
+
month = line_parts[ref_index + 1]
|
278
|
+
day = line_parts[ref_index + 2]
|
279
|
+
|
280
|
+
|
281
|
+
month = f"0{month}" if int(month) < 10 else month
|
282
|
+
day = f"0{day}" if int(day) < 10 else day
|
283
|
+
|
284
|
+
current += 1
|
285
|
+
|
286
|
+
if not previous_time is None:
|
287
|
+
day_cycle.append(datetime.now() - previous_time)
|
288
|
+
|
289
|
+
if len(day_cycle) > 40:
|
290
|
+
if len(day_cycle) > (7 * 365.25):
|
291
|
+
del day_cycle[0]
|
292
|
+
|
293
|
+
av_cycle_time = sum(day_cycle, timedelta()) / len(day_cycle)
|
294
|
+
eta = av_cycle_time * (number_of_days - current)
|
295
|
+
|
296
|
+
eta_str = f" ETA - {formatTimedelta(eta)}:"
|
297
|
+
|
298
|
+
|
299
|
+
else:
|
300
|
+
eta_str = ''
|
301
|
+
|
302
|
+
showProgress(current, number_of_days, bar_length=20, message= f' >> current date: {day}/{month}/{year} - f{yr_to} {eta_str}')
|
303
|
+
|
304
|
+
previous_time = datetime.now()
|
305
|
+
elif "ntdll.dll" in line_parts:
|
306
|
+
print("\n! there was an error running SWAT+\n")
|
307
|
+
if counter < 10:
|
308
|
+
counter += 1
|
309
|
+
continue
|
310
|
+
|
311
|
+
if len(line_parts) < 2: break
|
312
|
+
|
313
|
+
showProgress(current, number_of_days, string_after= f' ')
|
314
|
+
print("\n")
|
315
|
+
|
316
|
+
os.chdir(finalDir)
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
def formatTimedelta(delta: timedelta) -> str:
|
321
|
+
"""Formats a timedelta duration to [N days] %H:%M:%S format"""
|
322
|
+
seconds = int(delta.total_seconds())
|
323
|
+
|
324
|
+
secs_in_a_day = 86400
|
325
|
+
secs_in_a_hour = 3600
|
326
|
+
secs_in_a_min = 60
|
327
|
+
|
328
|
+
days, seconds = divmod(seconds, secs_in_a_day)
|
329
|
+
hours, seconds = divmod(seconds, secs_in_a_hour)
|
330
|
+
minutes, seconds = divmod(seconds, secs_in_a_min)
|
331
|
+
|
332
|
+
time_fmt = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
333
|
+
|
334
|
+
if days > 0:
|
335
|
+
suffix = "s" if days > 1 else ""
|
336
|
+
return f"{days} day{suffix} {time_fmt}"
|
337
|
+
else:
|
338
|
+
return f"{time_fmt}"
|
339
|
+
|
340
|
+
|
184
341
|
def setMp3Metadata(fn, metadata, imagePath=None):
|
185
342
|
'''
|
186
343
|
This function takes a path to an mp3 and a metadata dictionary,
|
@@ -276,6 +433,38 @@ def deleteFile(filePath:str, v:bool = False) -> bool:
|
|
276
433
|
|
277
434
|
return deleted
|
278
435
|
|
436
|
+
|
437
|
+
def alert(message:str, server:str = "http://ntfy.sh", topic:str = "pythonAlerts", attachment:Optional[str] = None, messageTitle:str = "info", priority:int = None, tags:list = [], printIt:bool = True, v:bool = False) -> bool:
|
438
|
+
'''
|
439
|
+
This sends an alert to a given server in case you want to be notified of something
|
440
|
+
message : the message to send
|
441
|
+
server : the server to send the message to (default is http://ntfy.sh)
|
442
|
+
topic : the topic to send the message to (default is pythonAlerts)
|
443
|
+
attachment : a file to attach to the message (optional)
|
444
|
+
messageTitle : the title of the message (optional, default is info)
|
445
|
+
priority : the priority of the message (optional, default is None)
|
446
|
+
tags : a list of tags to add to the message (optional, default is empty list)
|
447
|
+
printIt : whether to print the message to the console (default is True)
|
448
|
+
v : verbose (default is False, set to True to print debug info)
|
449
|
+
|
450
|
+
return: True if the alert was sent successfully, False otherwise
|
451
|
+
'''
|
452
|
+
print(message) if printIt else None; header_data = {}
|
453
|
+
if not messageTitle is None: header_data["Title"] = messageTitle
|
454
|
+
if not priority is None: header_data["Priority"] = priority
|
455
|
+
if not len(tags) == 0: header_data["Tags"] = ",".join(tags)
|
456
|
+
|
457
|
+
try:
|
458
|
+
if v: print(f"sending alert to {server}/{topic}")
|
459
|
+
if not attachment is None:
|
460
|
+
header_data["Filename"] = getFileBaseName(attachment)
|
461
|
+
requests.put( f"{server}/{topic}", data=open(attachment, 'rb'), headers=header_data )
|
462
|
+
try: requests.post(f"{server}/{topic}",data=message, headers=header_data )
|
463
|
+
except: return False
|
464
|
+
except: return False
|
465
|
+
return True
|
466
|
+
|
467
|
+
|
279
468
|
def deletePath(path:str, v:bool = False) -> bool:
|
280
469
|
'''
|
281
470
|
Delete a directory
|
@@ -309,6 +498,38 @@ def downloadChunk(url, start, end, path):
|
|
309
498
|
if chunk:
|
310
499
|
f.write(chunk)
|
311
500
|
|
501
|
+
|
502
|
+
def formatStringBlock(input_str, max_chars=70):
|
503
|
+
'''
|
504
|
+
This function takes a string and formats it into a block of text
|
505
|
+
with a maximum number of characters per line.
|
506
|
+
|
507
|
+
input_str: the string to format
|
508
|
+
max_chars: the maximum number of characters per line (default is 70)
|
509
|
+
|
510
|
+
'''
|
511
|
+
words = input_str.split(' ')
|
512
|
+
lines = []
|
513
|
+
current_line = ""
|
514
|
+
|
515
|
+
for word in words:
|
516
|
+
# If adding the next word to the current line would exceed the max_chars limit
|
517
|
+
if len(current_line) + len(word) > max_chars:
|
518
|
+
# Append current line to lines and start a new one
|
519
|
+
lines.append(current_line.strip())
|
520
|
+
current_line = word
|
521
|
+
else:
|
522
|
+
# Add the word to the current line
|
523
|
+
current_line += " " + word
|
524
|
+
|
525
|
+
# Append any remaining words
|
526
|
+
lines.append(current_line.strip())
|
527
|
+
|
528
|
+
return '\n'.join(lines)
|
529
|
+
|
530
|
+
|
531
|
+
|
532
|
+
|
312
533
|
def downloadFile(url, save_path, exists_action='resume', num_connections=5, v=False):
|
313
534
|
if v:
|
314
535
|
print(f"\ndownloading {url}")
|
@@ -643,21 +864,45 @@ def renameNetCDFvariable(input_file: str, output_file: str, old_var_name: str, n
|
|
643
864
|
except subprocess.CalledProcessError as e:
|
644
865
|
print(f"Error: {e.stderr}")
|
645
866
|
|
646
|
-
|
867
|
+
|
868
|
+
def compressTo7z(input_dir: str, output_file: str, compressionLevel: int = 4, excludeExt: list = None, v: bool = False) -> None:
|
647
869
|
"""
|
648
870
|
Compresses the contents of a directory to a .7z archive with maximum compression.
|
649
871
|
|
650
872
|
:param input_dir: Path to the directory to compress
|
651
873
|
:param output_file: Output .7z file path
|
874
|
+
:param compressionLevel: Compression level (0-9), default is 4 (maximum compression)
|
875
|
+
:param excludeExt: List of file extensions to exclude from compression
|
652
876
|
"""
|
877
|
+
if excludeExt is None:
|
878
|
+
excludeExt = []
|
879
|
+
|
653
880
|
# Create the .7z archive with LZMA2 compression
|
654
|
-
with py7zr.SevenZipFile(output_file, 'w', filters=[{'id': py7zr.FILTER_LZMA2, 'preset':
|
881
|
+
with py7zr.SevenZipFile(output_file, 'w', filters=[{'id': py7zr.FILTER_LZMA2, 'preset': compressionLevel}]) as archive:
|
655
882
|
# Add each item in the input directory, avoiding the top-level folder in the archive
|
656
883
|
for root, _, files in os.walk(input_dir):
|
657
884
|
for file in files:
|
658
885
|
file_path = os.path.join(root, file)
|
886
|
+
|
887
|
+
# Skip excluded file extensions
|
888
|
+
if any(file.endswith(ext) for ext in excludeExt):
|
889
|
+
continue
|
659
890
|
# Add file to the archive with a relative path to avoid including the 'tmp' folder itself
|
660
891
|
archive.write(file_path, arcname=os.path.relpath(file_path, start=input_dir))
|
892
|
+
if v:
|
893
|
+
print(f"compressed {input_dir} to {output_file} with compression level {compressionLevel}.")
|
894
|
+
|
895
|
+
|
896
|
+
def uncompress(inputFile: str, outputDir: str, v: bool = False) -> None:
|
897
|
+
"""
|
898
|
+
Extracts an archive supported by py7zr (.7z, .zip, .tar, .tar.gz, .tar.bz2, .xz, .tar.xz) to outputDir.
|
899
|
+
inputFile: Path to the input archive file
|
900
|
+
outputDir: Directory where the contents will be extracted
|
901
|
+
v: Verbose flag to print extraction status (default is False)
|
902
|
+
"""
|
903
|
+
if not exists(outputDir): createPath(outputDir)
|
904
|
+
with py7zr.SevenZipFile(inputFile, 'r') as archive: archive.extractall(path=outputDir)
|
905
|
+
if v: print(f"extracted {inputFile} to {outputDir}.")
|
661
906
|
|
662
907
|
|
663
908
|
def moveDirectory(srcDir:str, destDir:str, v:bool = False) -> bool:
|
@@ -744,6 +989,21 @@ def clipRasterByExtent(inFile: str, outFile: str, bounds: tuple) -> str:
|
|
744
989
|
ds = None
|
745
990
|
return outFile
|
746
991
|
|
992
|
+
|
993
|
+
def clipRasterByVector(inFile: str, outFile: str, vectorFile: str) -> str:
|
994
|
+
'''
|
995
|
+
Clips a raster using GDAL warp with a vector file
|
996
|
+
inFile: input raster path
|
997
|
+
outFile: output path
|
998
|
+
vectorFile: vector file path (e.g., shapefile or GeoJSON)
|
999
|
+
return: output path
|
1000
|
+
'''
|
1001
|
+
ds = gdal.Open(inFile)
|
1002
|
+
gdal.Warp(outFile, ds, cutlineDSName=vectorFile, cropToCutline=True)
|
1003
|
+
ds = None
|
1004
|
+
return outFile
|
1005
|
+
|
1006
|
+
|
747
1007
|
def clipVectorByExtent(inFile: str, outFile: str, bounds: tuple) -> str:
|
748
1008
|
'''
|
749
1009
|
Clips a vector using GeoPandas
|
@@ -878,61 +1138,81 @@ def ignoreWarnings(ignore:bool = True, v:bool = False) -> None:
|
|
878
1138
|
return None
|
879
1139
|
|
880
1140
|
|
881
|
-
def createGrid(
|
1141
|
+
def createGrid(topLeft: list = None, bottomRight: list = None, resolution: float = None,
|
1142
|
+
inputShape: str = None, crs: str = "EPSG:4326", saveVector: str = None) -> geopandas.GeoDataFrame:
|
882
1143
|
'''
|
883
|
-
This function creates a grid of polygons based on a shapefile
|
884
|
-
shapefile_path: path to the shapefile
|
885
|
-
resolution: resolution of the grid
|
886
|
-
useDegree: use degree (default is True)
|
887
|
-
|
888
|
-
return: xx, yy, polygons, within_mask, gdf.crs, minx, miny
|
889
|
-
'''
|
890
|
-
# Read the shapefile
|
891
|
-
gdf = geopandas.read_file(shapefile_path)
|
892
|
-
|
893
|
-
if useDegree:
|
894
|
-
gdf = gdf.to_crs(epsg=4326)
|
1144
|
+
This function creates a grid of polygons based on either a shapefile or corner coordinates
|
895
1145
|
|
896
|
-
|
897
|
-
|
1146
|
+
Parameters:
|
1147
|
+
topLeft: list [lon, lat] - top left corner coordinates
|
1148
|
+
bottomRight: list [lon, lat] - bottom right corner coordinates
|
1149
|
+
resolution: float - resolution of the grid
|
1150
|
+
inputShape: str - path to the shapefile (optional, if provided bounds will be taken from here)
|
1151
|
+
crs: str - coordinate reference system (default is "EPSG:4326")
|
1152
|
+
saveVector: str - path to save the generated grid (optional)
|
1153
|
+
|
1154
|
+
Returns:
|
1155
|
+
geopandas.GeoDataFrame - the generated grid
|
1156
|
+
'''
|
1157
|
+
# Input validation
|
1158
|
+
if inputShape is None and (topLeft is None or bottomRight is None or resolution is None):
|
1159
|
+
raise ValueError("Either provide inputShape OR provide topLeft, bottomRight, and resolution")
|
1160
|
+
|
1161
|
+
if inputShape is not None and resolution is None:
|
1162
|
+
raise ValueError("Resolution must be provided")
|
1163
|
+
|
1164
|
+
# Get bounds from shapefile or coordinates
|
1165
|
+
if inputShape is not None:
|
1166
|
+
# Read the shapefile and get bounds
|
1167
|
+
gdf = geopandas.read_file(inputShape)
|
1168
|
+
gdf = gdf.to_crs(crs)
|
1169
|
+
minx, miny, maxx, maxy = gdf.total_bounds
|
1170
|
+
reference_geometry = gdf.unary_union
|
1171
|
+
else:
|
1172
|
+
# Use provided corner coordinates [lon, lat]
|
1173
|
+
# Extract coordinates and determine actual bounds
|
1174
|
+
lon1, lat1 = topLeft[0], topLeft[1]
|
1175
|
+
lon2, lat2 = bottomRight[0], bottomRight[1]
|
1176
|
+
|
1177
|
+
# Determine actual min/max values
|
1178
|
+
minx = min(lon1, lon2)
|
1179
|
+
maxx = max(lon1, lon2)
|
1180
|
+
miny = min(lat1, lat2)
|
1181
|
+
maxy = max(lat1, lat2)
|
1182
|
+
reference_geometry = None
|
898
1183
|
|
899
1184
|
# Create a grid based on the bounds and resolution
|
900
1185
|
x = numpy.arange(minx, maxx, resolution)
|
901
1186
|
y = numpy.arange(miny, maxy, resolution)
|
902
|
-
xx, yy = numpy.meshgrid(x, y)
|
903
|
-
|
904
|
-
# Create polygons for each grid cell, arranged in 2D array
|
905
|
-
grid_shape = xx.shape
|
906
|
-
polygons = numpy.empty(grid_shape, dtype=object)
|
907
|
-
for i in range(grid_shape[0]):
|
908
|
-
for j in range(grid_shape[1]):
|
909
|
-
x0, y0 = xx[i, j], yy[i, j]
|
910
|
-
x1, y1 = x0 + resolution, y0 + resolution
|
911
|
-
polygons[i, j] = box(x0, y0, x1, y1)
|
912
1187
|
|
913
|
-
#
|
914
|
-
|
1188
|
+
# Create polygons for each grid cell
|
1189
|
+
polygons = []
|
1190
|
+
for i in range(len(y)):
|
1191
|
+
for j in range(len(x)):
|
1192
|
+
x0, y0 = x[j], y[i]
|
1193
|
+
x1, y1 = x0 + resolution, y0 + resolution
|
1194
|
+
# Ensure we don't exceed the bounds
|
1195
|
+
x1 = min(x1, maxx)
|
1196
|
+
y1 = min(y1, maxy)
|
1197
|
+
polygons.append(box(x0, y0, x1, y1))
|
915
1198
|
|
916
1199
|
# Create a GeoDataFrame from the grid
|
917
|
-
grid_gdf = geopandas.GeoDataFrame({'geometry':
|
918
|
-
|
919
|
-
minx, miny, maxx, maxy = grid_gdf.total_bounds
|
920
|
-
print(" minx:", minx, "miny:", miny, "maxx:", maxx, "maxy:", maxy)
|
921
|
-
|
922
|
-
minx, miny, maxx, maxy = getVectorBounds(grid_gdf)
|
1200
|
+
grid_gdf = geopandas.GeoDataFrame({'geometry': polygons}, crs=crs)
|
1201
|
+
|
923
1202
|
# Add a column to indicate if the cell intersects with the original shapefile
|
924
|
-
|
1203
|
+
if reference_geometry is not None:
|
1204
|
+
grid_gdf['within'] = grid_gdf.intersects(reference_geometry)
|
1205
|
+
else:
|
1206
|
+
# For coordinate-based grids, set all cells as within
|
1207
|
+
grid_gdf['within'] = True
|
925
1208
|
|
926
|
-
#
|
927
|
-
|
1209
|
+
# Save the grid if path is provided
|
1210
|
+
if saveVector is not None:
|
1211
|
+
grid_gdf.to_file(saveVector, driver="GPKG")
|
1212
|
+
print(f"Grid saved to {saveVector}")
|
928
1213
|
|
929
|
-
|
930
|
-
reprojectedGrid = grid_gdf.to_crs(epsg=4326)
|
1214
|
+
return grid_gdf
|
931
1215
|
|
932
|
-
grid_gdf.to_file("generatedGrid4326.gpkg", driver="GPKG")
|
933
|
-
reprojectedGrid.to_file("generatedGrid.gpkg", driver="GPKG")
|
934
|
-
|
935
|
-
return xx, yy, polygons, within_mask, gdf.crs, minx, miny
|
936
1216
|
|
937
1217
|
def setHomeDir(path:str) -> str:
|
938
1218
|
'''
|
@@ -989,7 +1269,7 @@ def netcdfVariableDimensions(ncFile: str, variable: str) -> dict:
|
|
989
1269
|
|
990
1270
|
return bands_info
|
991
1271
|
|
992
|
-
def netcdfExportTif(ncFile: str, variable: str, outputFile: str = None, band: int = None, v:bool = True) -> gdal.Dataset:
|
1272
|
+
def netcdfExportTif(ncFile: str, variable: str, outputFile: Optional[str] = None, band: int = None, v:bool = True) -> gdal.Dataset:
|
993
1273
|
'''
|
994
1274
|
Export a variable from a NetCDF file to a GeoTiff file
|
995
1275
|
ncFile: NetCDF file
|
@@ -1232,7 +1512,7 @@ def showProgress(count: int, end: int, message: str, barLength: int = 100) -> No
|
|
1232
1512
|
message: message to display
|
1233
1513
|
barLength: length of the progress bar
|
1234
1514
|
'''
|
1235
|
-
percent =
|
1515
|
+
percent = float(count / end * 100)
|
1236
1516
|
percentStr = f'{percent:03.1f}'
|
1237
1517
|
filled = int(barLength * count / end)
|
1238
1518
|
bar = '█' * filled + '░' * (barLength - filled)
|
@@ -1310,7 +1590,7 @@ def createPointGeometry(coords: list, proj: str = "EPSG:4326") -> geopandas.GeoD
|
|
1310
1590
|
gdf.reset_index(inplace=True)
|
1311
1591
|
return gdf
|
1312
1592
|
|
1313
|
-
def calculateTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
|
1593
|
+
def calculateTimeseriesStats(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> dict:
|
1314
1594
|
'''
|
1315
1595
|
Calculate statistics for a timeseries
|
1316
1596
|
|
@@ -1454,7 +1734,7 @@ def calculateTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulat
|
|
1454
1734
|
}
|
1455
1735
|
|
1456
1736
|
|
1457
|
-
def getNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1737
|
+
def getNSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1458
1738
|
'''
|
1459
1739
|
this function is a wrapper for calculateTimeseriesStats specifically to return the NSE
|
1460
1740
|
|
@@ -1475,7 +1755,7 @@ def getNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
|
|
1475
1755
|
|
1476
1756
|
return stats['NSE']
|
1477
1757
|
|
1478
|
-
def getKGE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1758
|
+
def getKGE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1479
1759
|
'''
|
1480
1760
|
this function is a wrapper for calculateTimeseriesStats specifically to return the KGE
|
1481
1761
|
|
@@ -1496,7 +1776,7 @@ def getKGE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
|
|
1496
1776
|
|
1497
1777
|
return stats['KGE']
|
1498
1778
|
|
1499
|
-
def getPBIAS(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1779
|
+
def getPBIAS(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1500
1780
|
'''
|
1501
1781
|
this function is a wrapper for calculateTimeseriesStats specifically to return the PBIAS
|
1502
1782
|
|
@@ -1518,7 +1798,7 @@ def getPBIAS(data:pandas.DataFrame, observed:str = None, simulated:str = None, r
|
|
1518
1798
|
return stats['PBIAS']
|
1519
1799
|
|
1520
1800
|
|
1521
|
-
def getLNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1801
|
+
def getLNSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1522
1802
|
'''
|
1523
1803
|
this function is a wrapper for calculateTimeseriesStats specifically to return the LNSE
|
1524
1804
|
|
@@ -1539,7 +1819,7 @@ def getLNSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, re
|
|
1539
1819
|
|
1540
1820
|
return stats['LNSE']
|
1541
1821
|
|
1542
|
-
def getR2(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1822
|
+
def getR2(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1543
1823
|
'''
|
1544
1824
|
this function is a wrapper for calculateTimeseriesStats specifically to return the R2
|
1545
1825
|
|
@@ -1560,7 +1840,7 @@ def getR2(data:pandas.DataFrame, observed:str = None, simulated:str = None, resa
|
|
1560
1840
|
|
1561
1841
|
return stats['R2']
|
1562
1842
|
|
1563
|
-
def getRMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1843
|
+
def getRMSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1564
1844
|
'''
|
1565
1845
|
this function is a wrapper for calculateTimeseriesStats specifically to return the RMSE
|
1566
1846
|
|
@@ -1581,7 +1861,7 @@ def getRMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, re
|
|
1581
1861
|
|
1582
1862
|
return stats['RMSE']
|
1583
1863
|
|
1584
|
-
def getMAE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1864
|
+
def getMAE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1585
1865
|
'''
|
1586
1866
|
this function is a wrapper for calculateTimeseriesStats specifically to return the MAE
|
1587
1867
|
|
@@ -1602,7 +1882,7 @@ def getMAE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
|
|
1602
1882
|
|
1603
1883
|
return stats['MAE']
|
1604
1884
|
|
1605
|
-
def getMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> float:
|
1885
|
+
def getMSE(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> float:
|
1606
1886
|
'''
|
1607
1887
|
this function is a wrapper for calculateTimeseriesStats specifically to return the MSE
|
1608
1888
|
|
@@ -1623,7 +1903,7 @@ def getMSE(data:pandas.DataFrame, observed:str = None, simulated:str = None, res
|
|
1623
1903
|
|
1624
1904
|
return stats['MSE']
|
1625
1905
|
|
1626
|
-
def getTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str = None, resample:str = None ) -> dict:
|
1906
|
+
def getTimeseriesStats(data:pandas.DataFrame, observed:Optional[str] = None, simulated:Optional[str] = None, resample:Optional[str] = None ) -> dict:
|
1627
1907
|
'''
|
1628
1908
|
this function is a wrapper for calculateTimeseriesStats specifically to return all stats
|
1629
1909
|
|
@@ -1644,4 +1924,107 @@ def getTimeseriesStats(data:pandas.DataFrame, observed:str = None, simulated:str
|
|
1644
1924
|
|
1645
1925
|
return stats
|
1646
1926
|
|
1927
|
+
def readSWATPlusOutputs(filePath: str, column: Optional[str] = None, unit: Optional[int] = None, gis_id: Optional[int] = None, name: Optional[str] = None):
|
1928
|
+
'''
|
1929
|
+
Read SWAT+ output files and return a pandas DataFrame with proper date handling
|
1930
|
+
and optional filtering capabilities.
|
1931
|
+
|
1932
|
+
Parameters:
|
1933
|
+
-----------
|
1934
|
+
filePath: str
|
1935
|
+
Path to the SWAT+ output file
|
1936
|
+
column: str, optional
|
1937
|
+
Name of the column to extract. If not specified, returns all columns.
|
1938
|
+
If specified, returns first match, or specify multiple columns as comma-separated string
|
1939
|
+
unit: int, optional
|
1940
|
+
Filter by unit number. If not specified, returns all units
|
1941
|
+
gis_id: int, optional
|
1942
|
+
Filter by gis_id. If not specified, returns all gis_ids
|
1943
|
+
name: str, optional
|
1944
|
+
Filter by name. If not specified, returns all names
|
1945
|
+
|
1946
|
+
Returns:
|
1947
|
+
--------
|
1948
|
+
pandas.DataFrame or None
|
1949
|
+
DataFrame with date column and requested data, filtered as specified
|
1950
|
+
'''
|
1951
|
+
|
1952
|
+
if not exists(filePath):
|
1953
|
+
print('! SWAT+ result file does not exist')
|
1954
|
+
return None
|
1955
|
+
|
1956
|
+
# Read the header line (line 2, index 1)
|
1957
|
+
with open(filePath, 'r') as f:
|
1958
|
+
lines = f.readlines()
|
1959
|
+
|
1960
|
+
header_line = lines[1].strip()
|
1961
|
+
headers = header_line.split()
|
1962
|
+
|
1963
|
+
# Handle duplicate column names
|
1964
|
+
column_counts = defaultdict(int)
|
1965
|
+
modified_header = []
|
1966
|
+
for col_name in headers:
|
1967
|
+
column_counts[col_name] += 1
|
1968
|
+
if column_counts[col_name] > 1:
|
1969
|
+
modified_header.append(f"{col_name}_{column_counts[col_name]}")
|
1970
|
+
else:
|
1971
|
+
modified_header.append(col_name)
|
1972
|
+
|
1973
|
+
# Add extra columns to handle potential mismatches
|
1974
|
+
modified_header = modified_header + ['extra1', 'extra2']
|
1975
|
+
|
1976
|
+
try:
|
1977
|
+
df = pandas.read_csv(filePath, delim_whitespace=True, skiprows=3, names=modified_header, index_col=False)
|
1978
|
+
except:
|
1979
|
+
sys.stdout.write(f'\r! could not read {filePath} using pandas, check the number of columns\n')
|
1980
|
+
sys.stdout.flush()
|
1981
|
+
return None
|
1982
|
+
|
1983
|
+
# Remove extra columns
|
1984
|
+
df = df.drop(columns=['extra1', 'extra2'], errors='ignore')
|
1985
|
+
|
1986
|
+
# Convert all columns to numeric except 'name' (which is string)
|
1987
|
+
for col in df.columns:
|
1988
|
+
if col != 'name':
|
1989
|
+
df[col] = pandas.to_numeric(df[col], errors='coerce')
|
1990
|
+
|
1991
|
+
# Create date column from yr, mon, day
|
1992
|
+
try:
|
1993
|
+
df['date'] = pandas.to_datetime(pandas.DataFrame({'year': df.yr, 'month': df.mon, 'day': df.day}))
|
1994
|
+
except KeyError:
|
1995
|
+
# If some date columns are missing, create a simple index-based date
|
1996
|
+
df['date'] = pandas.date_range(start='2000-01-01', periods=len(df), freq='D')
|
1997
|
+
except:
|
1998
|
+
# If date creation fails for any other reason, use index-based date
|
1999
|
+
df['date'] = pandas.date_range(start='2000-01-01', periods=len(df), freq='D')
|
2000
|
+
|
2001
|
+
# Filter by unit if specified
|
2002
|
+
if unit is not None and 'unit' in df.columns:
|
2003
|
+
df = df[df['unit'] == unit]
|
2004
|
+
|
2005
|
+
# Filter by gis_id if specified
|
2006
|
+
if gis_id is not None and 'gis_id' in df.columns:
|
2007
|
+
df = df[df['gis_id'] == gis_id]
|
2008
|
+
|
2009
|
+
# Filter by name if specified
|
2010
|
+
if name is not None and 'name' in df.columns:
|
2011
|
+
df = df[df['name'] == name]
|
2012
|
+
|
2013
|
+
# Handle column selection
|
2014
|
+
if column is not None and column != "*":
|
2015
|
+
# Parse comma-separated columns
|
2016
|
+
requested_cols = [col.strip() for col in column.split(',')]
|
2017
|
+
|
2018
|
+
# Always include date column
|
2019
|
+
selected_cols = ['date']
|
2020
|
+
|
2021
|
+
# Add requested columns if they exist
|
2022
|
+
for req_col in requested_cols:
|
2023
|
+
if req_col in df.columns:
|
2024
|
+
selected_cols.append(req_col)
|
2025
|
+
|
2026
|
+
df = df[selected_cols]
|
2027
|
+
|
2028
|
+
return df
|
2029
|
+
|
1647
2030
|
ignoreWarnings()
|
@@ -29,7 +29,7 @@ class excel:
|
|
29
29
|
self.date_format = None
|
30
30
|
|
31
31
|
def create(self):
|
32
|
-
self.
|
32
|
+
self.createPath(os.path.dirname(self.path))
|
33
33
|
self.book = xlsxwriter.Workbook(self.path)
|
34
34
|
|
35
35
|
def addSheet(self, sheet_name):
|
@@ -42,7 +42,7 @@ class excel:
|
|
42
42
|
|
43
43
|
def writeDate(self, sheet_name, row, column, datetime_obj):
|
44
44
|
if self.date_format is None:
|
45
|
-
self.
|
45
|
+
self.setDateFormat()
|
46
46
|
|
47
47
|
self.sheet_names[sheet_name].write_datetime(
|
48
48
|
row, column, datetime_obj, self.date_format)
|
@@ -21,7 +21,7 @@ import geopandas
|
|
21
21
|
|
22
22
|
|
23
23
|
# classes
|
24
|
-
class
|
24
|
+
class mssqlConnection:
|
25
25
|
def __init__(self, server, username, password, driver, trust_server_ssl = True) -> None:
|
26
26
|
self.server = server
|
27
27
|
self.username = username
|
@@ -85,20 +85,49 @@ class mssql_connection:
|
|
85
85
|
self.cursor = self.connection.cursor()
|
86
86
|
self.cursor.execute(query)
|
87
87
|
tables = [row[0] for row in self.cursor.fetchall()]
|
88
|
-
print("> list of tables in the active database:")
|
89
|
-
for table in tables:
|
90
|
-
|
88
|
+
# print("> list of tables in the active database:")
|
89
|
+
# for table in tables:
|
90
|
+
# print(f"\t- {table}")
|
91
91
|
except pyodbc.Error as e:
|
92
92
|
print("Error occurred while fetching the list of tables:")
|
93
93
|
print(e)
|
94
94
|
|
95
95
|
return tables
|
96
96
|
|
97
|
+
def listColumns(self, tableName: str, dbName: str | None = None) -> list[str]:
|
98
|
+
if dbName:
|
99
|
+
self.connect_db(dbName)
|
100
|
+
|
101
|
+
schema, tbl = ('dbo', tableName) if '.' not in tableName else tableName.split('.', 1)
|
102
|
+
|
103
|
+
sql = """
|
104
|
+
SELECT column_name
|
105
|
+
FROM information_schema.columns
|
106
|
+
WHERE table_schema = ? AND table_name = ?
|
107
|
+
ORDER BY ordinal_position
|
108
|
+
"""
|
109
|
+
|
110
|
+
try:
|
111
|
+
with self.connection.cursor() as cur:
|
112
|
+
cur.execute(sql, (schema, tbl))
|
113
|
+
return [row[0] for row in cur.fetchall()]
|
114
|
+
except pyodbc.Error as e:
|
115
|
+
print(f"Could not list columns for {tableName}: {e}")
|
116
|
+
return []
|
117
|
+
|
97
118
|
|
98
119
|
def readTable(self, table_name:str, db_name:str = None, columns:list = None, geom_col:str = None, v = True):
|
99
120
|
if db_name is not None:
|
100
121
|
self.connect_db(db_name)
|
101
122
|
|
123
|
+
# ensure geometry column is not in columns if specified
|
124
|
+
if geom_col is not None:
|
125
|
+
if columns is None:
|
126
|
+
columns = self.listColumns(table_name, db_name)
|
127
|
+
|
128
|
+
columns = [col for col in columns if col != geom_col]
|
129
|
+
|
130
|
+
|
102
131
|
if columns is not None and geom_col is not None:
|
103
132
|
columns.append(f"{geom_col}.STAsText() as {geom_col}_wkt")
|
104
133
|
query = f"SELECT {','.join(columns)} FROM {table_name}"
|
@@ -221,7 +250,9 @@ class mssql_connection:
|
|
221
250
|
self.connection.close()
|
222
251
|
self.connection = None
|
223
252
|
self.cursor = None
|
224
|
-
|
253
|
+
if v: print("> connection closed...")
|
254
|
+
else:
|
255
|
+
if v: print("> no connection to close...")
|
225
256
|
|
226
257
|
def disconnect(self, v = True):
|
227
258
|
self.close(v = v)
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|