kardioutils 1.0.10__tar.gz → 1.0.12__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kardioutils-1.0.10/kardioutils.egg-info → kardioutils-1.0.12}/PKG-INFO +1 -1
- kardioutils-1.0.12/dl2050utils/__version__.py +1 -0
- kardioutils-1.0.12/dl2050utils/df_utils.py +77 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/gs.py +254 -103
- {kardioutils-1.0.10 → kardioutils-1.0.12/kardioutils.egg-info}/PKG-INFO +1 -1
- {kardioutils-1.0.10 → kardioutils-1.0.12}/kardioutils.egg-info/SOURCES.txt +1 -0
- kardioutils-1.0.10/dl2050utils/__version__.py +0 -1
- {kardioutils-1.0.10 → kardioutils-1.0.12}/LICENSE.txt +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/README.md +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/__config__.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/__init__.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/api.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/auth.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/com.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/common.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/core.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/db copy.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/db.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/dbdf.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/dbutils.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/df.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/env.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/etl.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/fdb.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/fs.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/graphql.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/ju.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/log.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/mq.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/rest.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/restapp.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/restutils.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/sqlite.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/ulists.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/dl2050utils/wsgi.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/kardioutils.egg-info/dependency_links.txt +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/kardioutils.egg-info/top_level.txt +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/setup.cfg +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/setup.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/test/test_core.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/test/test_db.py +0 -0
- {kardioutils-1.0.10 → kardioutils-1.0.12}/test/test_env.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "1.0.12"
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
def list_prefixes(df: pd.DataFrame) -> list:
|
|
5
|
+
"""Return all distinct prefixes in the dataframe."""
|
|
6
|
+
return df["prefix"].dropna().unique().tolist()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def filter_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
|
|
10
|
+
"""Return all rows that match a given prefix exactly."""
|
|
11
|
+
return df[df["prefix"] == prefix]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def filter_prefix_contains(df: pd.DataFrame, text: str) -> pd.DataFrame:
|
|
15
|
+
"""Return all rows where prefix contains the given text."""
|
|
16
|
+
return df[df["prefix"].str.contains(text, na=False)]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def find_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> pd.DataFrame:
|
|
20
|
+
"""Return all rows that match a given uid_suffix."""
|
|
21
|
+
return df[df["uid_suffix"] == uid_suffix]
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def find_by_uid_full(df: pd.DataFrame, uid_full: str) -> pd.DataFrame:
|
|
25
|
+
"""Return all rows that match a given uid_full."""
|
|
26
|
+
return df[df["uid_full"] == uid_full]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
30
|
+
"""Return only rows where holter == True."""
|
|
31
|
+
return df[df["holter"] == True]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def non_holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
35
|
+
"""Return only rows where holter == False."""
|
|
36
|
+
return df[df["holter"] == False]
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def get_path_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> str | None:
|
|
40
|
+
"""
|
|
41
|
+
Return the path for a given uid_suffix.
|
|
42
|
+
If there are multiple rows, returns the first one.
|
|
43
|
+
If nothing is found, returns None.
|
|
44
|
+
"""
|
|
45
|
+
rows = df[df["uid_suffix"] == uid_suffix]
|
|
46
|
+
if rows.empty:
|
|
47
|
+
return None
|
|
48
|
+
return rows.iloc[0]["path"]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def get_paths_by_prefix(df: pd.DataFrame, prefix: str, holter_only_flag: bool | None = None) -> list:
|
|
52
|
+
"""
|
|
53
|
+
Return a list of paths filtered by prefix and optionally holter flag.
|
|
54
|
+
- holter_only_flag = True → only holter rows
|
|
55
|
+
- holter_only_flag = False → only non-holter rows
|
|
56
|
+
- holter_only_flag = None → ignore holter column
|
|
57
|
+
"""
|
|
58
|
+
subset = df[df["prefix"] == prefix]
|
|
59
|
+
if holter_only_flag is not None:
|
|
60
|
+
subset = subset[subset["holter"] == holter_only_flag]
|
|
61
|
+
return subset["path"].dropna().tolist()
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def check_missing_files(df):
|
|
65
|
+
"""
|
|
66
|
+
Return subset of rows whose 'path' does not point to an existing file.
|
|
67
|
+
"""
|
|
68
|
+
mask = ~df["path"].astype(str).apply(os.path.exists)
|
|
69
|
+
return df[mask]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def check_existing_files(df):
|
|
73
|
+
"""
|
|
74
|
+
Return subset of rows whose 'path' exists.
|
|
75
|
+
"""
|
|
76
|
+
mask = df["path"].astype(str).apply(os.path.exists)
|
|
77
|
+
return df[mask]
|
|
@@ -8,12 +8,45 @@ import datetime
|
|
|
8
8
|
import re
|
|
9
9
|
import pickle
|
|
10
10
|
import mimetypes
|
|
11
|
+
import requests
|
|
11
12
|
from google.cloud import storage
|
|
12
13
|
from dl2050utils.core import oget
|
|
13
14
|
from dl2050utils.env import config_load
|
|
14
15
|
from dl2050utils.fs import json_save
|
|
16
|
+
import hashlib
|
|
17
|
+
import hmac
|
|
18
|
+
import urllib.parse
|
|
19
|
+
from pathlib import Path
|
|
15
20
|
|
|
16
21
|
|
|
22
|
+
class _URLSigner:
|
|
23
|
+
"""Internal HMAC-based URL signer for local backend."""
|
|
24
|
+
|
|
25
|
+
def __init__(self, secret_key: str, base_url: str):
|
|
26
|
+
self.secret = secret_key.encode("utf-8")
|
|
27
|
+
self.base_url = base_url.rstrip("/")
|
|
28
|
+
|
|
29
|
+
def _make_signature(self, method: str, bucket: str, blob: str, exp: int, max_size: int | None):
|
|
30
|
+
payload = f"{method}\n{bucket}\n{blob}\n{exp}\n{max_size or ''}"
|
|
31
|
+
return hmac.new(self.secret, payload.encode("utf-8"), hashlib.sha256).hexdigest()
|
|
32
|
+
|
|
33
|
+
def generate_url(self, path: str, method: str, bucket: str, blob: str,
|
|
34
|
+
timeout: int, max_size: int | None = None) -> str:
|
|
35
|
+
import time
|
|
36
|
+
exp = int(time.time()) + timeout
|
|
37
|
+
sig = self._make_signature(method, bucket, blob, exp, max_size)
|
|
38
|
+
query = {
|
|
39
|
+
"bucket": bucket,
|
|
40
|
+
"blob": blob,
|
|
41
|
+
"exp": exp,
|
|
42
|
+
"method": method,
|
|
43
|
+
"sig": sig,
|
|
44
|
+
}
|
|
45
|
+
if max_size is not None:
|
|
46
|
+
query["max_size"] = max_size
|
|
47
|
+
qs = urllib.parse.urlencode(query)
|
|
48
|
+
return f"{self.base_url}{path}?{qs}"
|
|
49
|
+
|
|
17
50
|
class GS:
|
|
18
51
|
"""
|
|
19
52
|
Google Cloud Storage helper class to manage buckets, files, and URLs.
|
|
@@ -27,23 +60,42 @@ class GS:
|
|
|
27
60
|
"""
|
|
28
61
|
|
|
29
62
|
def __init__(self, service, default_location="europe-west1"):
|
|
30
|
-
"""
|
|
31
|
-
Initializes the GS class with the specified Google Cloud service and location.
|
|
32
|
-
Args:
|
|
33
|
-
service (str): The Google Cloud service name.
|
|
34
|
-
default_location (str): Default location for bucket creation. Defaults to "europe-west1".
|
|
35
|
-
"""
|
|
36
63
|
cfg = config_load(service)
|
|
37
|
-
#
|
|
64
|
+
# Try Google Cloud first
|
|
38
65
|
key_dict = oget(cfg, ["gcloud", "gs_key"])
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
66
|
+
fs_cfg = oget(cfg, ["fs"]) or {}
|
|
67
|
+
|
|
68
|
+
self.mode = None # 'gcloud' or 'local'
|
|
69
|
+
|
|
70
|
+
if key_dict is not None:
|
|
71
|
+
# ---------- GCS MODE ----------
|
|
72
|
+
assert key_dict["type"] == "service_account"
|
|
73
|
+
credentials_p = "./gs-keyfile.json"
|
|
74
|
+
token = fs_cfg["internal_token"]
|
|
75
|
+
json_save(credentials_p, key_dict)
|
|
76
|
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_p
|
|
77
|
+
os.environ["FS_INTERNAL_TOKEN"] = token
|
|
78
|
+
|
|
79
|
+
self.default_location = default_location
|
|
80
|
+
self.gc = storage.Client()
|
|
81
|
+
self.mode = "gcloud"
|
|
82
|
+
elif fs_cfg.get("backend") == "local":
|
|
83
|
+
# ---------- LOCAL MODE ----------
|
|
84
|
+
self.mode = "local"
|
|
85
|
+
self.default_location = "local"
|
|
86
|
+
self.gc = None # not used
|
|
87
|
+
|
|
88
|
+
self.root_dir = Path(fs_cfg.get("root_dir", f"/data/{service}/fs"))
|
|
89
|
+
self.root_dir.mkdir(parents=True, exist_ok=True)
|
|
90
|
+
|
|
91
|
+
base_url = fs_cfg.get("url", "http://localhost:8001")
|
|
92
|
+
secret = fs_cfg.get("secret")
|
|
93
|
+
if not secret:
|
|
94
|
+
raise RuntimeError("GS local backend enabled but fs.secret not configured")
|
|
95
|
+
|
|
96
|
+
self._signer = _URLSigner(secret_key=secret, base_url=base_url)
|
|
97
|
+
else:
|
|
98
|
+
raise RuntimeError("GS: neither gcloud.gs_key nor fs.backend=local configured")
|
|
47
99
|
|
|
48
100
|
# ####################################################################################################
|
|
49
101
|
# Admin
|
|
@@ -172,30 +224,56 @@ class GS:
|
|
|
172
224
|
# Memmory Download, Upload
|
|
173
225
|
# ###################################################################################################################
|
|
174
226
|
|
|
175
|
-
def upload_mem(self, bucket_name, blob_name, data,
|
|
227
|
+
def upload_mem(self, bucket_name, blob_name, data,
|
|
228
|
+
content_type="application/octet-stream",
|
|
229
|
+
use_pickle=True):
|
|
176
230
|
"""
|
|
177
|
-
Uploads data from memory to
|
|
178
|
-
Args:
|
|
179
|
-
bucket_name (str): Name of the GCS bucket.
|
|
180
|
-
blob_name (str): Name of the blob to upload.
|
|
181
|
-
data (str or bytes): Data to upload. Can be a string or bytes.
|
|
182
|
-
content_type (str, optional): MIME type of the data. Defaults to 'application/octet-stream'.
|
|
183
|
-
use_pickle (bool, optional): If True, serializes the data using pickle before uploading. Defaults to False.
|
|
184
|
-
Returns:
|
|
185
|
-
int: 0 if upload is successful, 1 otherwise.
|
|
186
|
-
Examples:
|
|
187
|
-
gs.upload_mem(bucket_name, blob_name, data="Hello, world!", content_type='text/plain')
|
|
188
|
-
gs.upload_mem(bucket_name, blob_name, data=b'\x89PNG\r\n\x1a...', content_type='image/png')
|
|
231
|
+
Uploads data from memory to storage (GCS or local FS).
|
|
189
232
|
"""
|
|
190
233
|
try:
|
|
191
234
|
if use_pickle:
|
|
192
235
|
data = pickle.dumps(data)
|
|
193
236
|
elif isinstance(data, str):
|
|
194
237
|
data = data.encode("utf-8")
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
if self.mode == "gcloud":
|
|
241
|
+
# --------- Google Cloud ---------
|
|
242
|
+
bucket = self.gc.bucket(bucket_name)
|
|
243
|
+
blob = bucket.blob(blob_name)
|
|
244
|
+
blob.upload_from_string(data, content_type=content_type)
|
|
245
|
+
return 0
|
|
246
|
+
|
|
247
|
+
elif self.mode == "local":
|
|
248
|
+
# --------- Local fs-server (fs.py) via HTTP ---------
|
|
249
|
+
size = len(data)
|
|
250
|
+
upload_url = self.upload_url(
|
|
251
|
+
bucket_name,
|
|
252
|
+
blob_name,
|
|
253
|
+
timeout=15 * 60,
|
|
254
|
+
size=size,
|
|
255
|
+
)
|
|
256
|
+
if not upload_url:
|
|
257
|
+
print("upload_mem (local) ERROR: could not use upload_url")
|
|
258
|
+
return 1
|
|
259
|
+
|
|
260
|
+
resp = requests.put(
|
|
261
|
+
upload_url,
|
|
262
|
+
data=data,
|
|
263
|
+
headers={"Content-Type": content_type},
|
|
264
|
+
timeout=60,
|
|
265
|
+
)
|
|
266
|
+
|
|
267
|
+
if resp.status_code not in (200, 201):
|
|
268
|
+
print("upload_mem (local) ERROR:", resp.status_code, resp.text)
|
|
269
|
+
return 1
|
|
270
|
+
|
|
271
|
+
return 0
|
|
272
|
+
|
|
273
|
+
else:
|
|
274
|
+
print("upload_mem ERROR: unknown mode", self.mode)
|
|
275
|
+
return 1
|
|
276
|
+
|
|
199
277
|
except Exception as exc:
|
|
200
278
|
print(f"upload_mem EXCEPTION: {str(exc)}")
|
|
201
279
|
return 1
|
|
@@ -215,21 +293,48 @@ class GS:
|
|
|
215
293
|
Any: The data from the blob, possibly decoded or deserialized.
|
|
216
294
|
"""
|
|
217
295
|
try:
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
296
|
+
if self.mode == "gcloud":
|
|
297
|
+
# --------- Google Cloud ---------
|
|
298
|
+
bucket = self.gc.bucket(bucket_name)
|
|
299
|
+
blob = bucket.blob(blob_name)
|
|
300
|
+
data = blob.download_as_bytes()
|
|
301
|
+
|
|
302
|
+
elif self.mode == "local":
|
|
303
|
+
# --------- Local fs-server (fs.py) via HTTP ---------
|
|
304
|
+
download_url = self.download_url(
|
|
305
|
+
bucket_name,
|
|
306
|
+
blob_name,
|
|
307
|
+
timeout=24 * 3600,
|
|
308
|
+
)
|
|
309
|
+
if not download_url:
|
|
310
|
+
print("download_mem (local) ERROR: could not generate download_url")
|
|
311
|
+
return None
|
|
312
|
+
internal_token = os.environ.get("FS_INTERNAL_TOKEN")
|
|
313
|
+
headers = {}
|
|
314
|
+
if internal_token:
|
|
315
|
+
headers["X-Internal-Token"] = internal_token
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
resp = requests.get(download_url, headers=headers, timeout=60)
|
|
319
|
+
|
|
320
|
+
if resp.status_code != 200:
|
|
321
|
+
print("download_mem (local) ERROR:", resp.status_code, resp.text)
|
|
322
|
+
return None
|
|
323
|
+
|
|
324
|
+
data = resp.content
|
|
325
|
+
|
|
326
|
+
else:
|
|
327
|
+
print("download_mem ERROR: unknown mode", self.mode)
|
|
328
|
+
return None
|
|
329
|
+
# Pós-processamento igual para os dois modos
|
|
221
330
|
if use_pickle:
|
|
222
|
-
# Deserialize the data using pickle
|
|
223
331
|
data = pickle.loads(data)
|
|
224
332
|
elif as_string:
|
|
225
|
-
# Decode the data using the specified encoding
|
|
226
333
|
data = data.decode(encoding)
|
|
227
|
-
# If neither use_pickle nor as_string is True, return the raw bytes
|
|
228
334
|
return data
|
|
229
335
|
except Exception as exc:
|
|
230
336
|
print(f"download_mem EXCEPTION: {str(exc)}")
|
|
231
337
|
return None
|
|
232
|
-
|
|
233
338
|
# ###################################################################################################################
|
|
234
339
|
# File Download, Upload
|
|
235
340
|
# ###################################################################################################################
|
|
@@ -243,18 +348,20 @@ class GS:
|
|
|
243
348
|
):
|
|
244
349
|
"""
|
|
245
350
|
Uploads a local file to a specified bucket and blob.
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
blob_name (str): Name of the blob to upload.
|
|
249
|
-
local_file_path (str): Local path of the file to upload.
|
|
250
|
-
content_type (str, optional): MIME type of the data. Defaults to 'application/octet-stream'.
|
|
251
|
-
Returns:
|
|
252
|
-
int: 0 if upload is successful, 1 otherwise.
|
|
351
|
+
- In GCS mode: uploads to Google Cloud.
|
|
352
|
+
- In local mode: copies the file into root_dir / bucket / blob.
|
|
253
353
|
"""
|
|
254
354
|
try:
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
355
|
+
if self.mode == "gcloud":
|
|
356
|
+
bucket = self.gc.bucket(bucket_name)
|
|
357
|
+
blob = bucket.blob(blob_name)
|
|
358
|
+
blob.upload_from_filename(local_file_path, content_type=content_type)
|
|
359
|
+
return 0
|
|
360
|
+
# LOCAL MODE
|
|
361
|
+
dst = self.root_dir / bucket_name / blob_name
|
|
362
|
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
|
363
|
+
import shutil
|
|
364
|
+
shutil.copy2(local_file_path, dst)
|
|
258
365
|
return 0
|
|
259
366
|
except Exception as exc:
|
|
260
367
|
print(f"upload_file EXCEPTION: {str(exc)}")
|
|
@@ -263,18 +370,51 @@ class GS:
|
|
|
263
370
|
def download_file(self, bucket_name, blob_name, local_file_path):
|
|
264
371
|
"""
|
|
265
372
|
Downloads a blob from the bucket to a local file.
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
blob_name (str): Name of the blob to download.
|
|
269
|
-
local_file_path (str): Local path to save the downloaded file.
|
|
270
|
-
Returns:
|
|
271
|
-
int: 0 if upload is successful, 1 otherwise.
|
|
373
|
+
- In GCS mode: downloads from Google Cloud.
|
|
374
|
+
- In local mode: copies from root_dir / bucket / blob.
|
|
272
375
|
"""
|
|
273
376
|
try:
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
377
|
+
if self.mode == "gcloud":
|
|
378
|
+
bucket = self.gc.bucket(bucket_name)
|
|
379
|
+
blob = bucket.blob(blob_name)
|
|
380
|
+
blob.download_to_filename(local_file_path)
|
|
381
|
+
return 0
|
|
382
|
+
|
|
383
|
+
# LOCAL MODE
|
|
384
|
+
elif self.mode == "local":
|
|
385
|
+
# --------- Local fs-server (fs.py) via HTTP ---------
|
|
386
|
+
download_url = self.download_url(
|
|
387
|
+
bucket_name,
|
|
388
|
+
blob_name,
|
|
389
|
+
timeout=24 * 3600,
|
|
390
|
+
)
|
|
391
|
+
if not download_url:
|
|
392
|
+
print("download_file (local) ERROR: could not generate download_url")
|
|
393
|
+
return 1
|
|
394
|
+
|
|
395
|
+
internal_token = os.environ.get("FS_INTERNAL_TOKEN")
|
|
396
|
+
|
|
397
|
+
headers = {}
|
|
398
|
+
if internal_token:
|
|
399
|
+
headers["X-Internal-Token"] = internal_token
|
|
400
|
+
|
|
401
|
+
# stream to not load everyting in ram
|
|
402
|
+
with requests.get(download_url, headers=headers, stream=True, timeout=60) as r:
|
|
403
|
+
if r.status_code != 200:
|
|
404
|
+
print("download_file (local) ERROR:", r.status_code, r.text)
|
|
405
|
+
return 1
|
|
406
|
+
|
|
407
|
+
Path(local_file_path).parent.mkdir(parents=True, exist_ok=True)
|
|
408
|
+
with open(local_file_path, "wb") as f:
|
|
409
|
+
for chunk in r.iter_content(chunk_size=1024 * 1024):
|
|
410
|
+
if chunk:
|
|
411
|
+
f.write(chunk)
|
|
412
|
+
|
|
413
|
+
return 0
|
|
414
|
+
|
|
415
|
+
else:
|
|
416
|
+
print("download_file ERROR: unknown mode", self.mode)
|
|
417
|
+
return 1
|
|
278
418
|
except Exception as exc:
|
|
279
419
|
print(f"download_file EXCEPTION: {str(exc)}")
|
|
280
420
|
return 1
|
|
@@ -357,68 +497,79 @@ class GS:
|
|
|
357
497
|
def upload_url(self, bucket_name, blob_name, timeout=15 * 60, size=None):
|
|
358
498
|
"""
|
|
359
499
|
Generates a signed URL for uploading a blob.
|
|
360
|
-
|
|
361
|
-
bucket_name (str): Name of the GCS bucket.
|
|
362
|
-
blob_name (str): Name of the blob to upload.
|
|
363
|
-
timeout (int, optional): URL expiration time in seconds. Defaults to 15 minutes.
|
|
364
|
-
size (int, optional): Maximum allowed size of the upload in bytes.
|
|
365
|
-
Returns:
|
|
366
|
-
str or None: Signed URL for uploading or None if an error occurs.
|
|
500
|
+
- Local mode: signed URL for local fileserver (/upload).
|
|
367
501
|
"""
|
|
502
|
+
if self.mode == "gcloud":
|
|
503
|
+
try:
|
|
504
|
+
bucket = self.gc.bucket(bucket_name)
|
|
505
|
+
blob = bucket.blob(blob_name)
|
|
506
|
+
query_parameters = (
|
|
507
|
+
None if size is None else {"x-goog-content-length-range": f"0,{size}"}
|
|
508
|
+
)
|
|
509
|
+
url = blob.generate_signed_url(
|
|
510
|
+
version="v4",
|
|
511
|
+
expiration=datetime.timedelta(seconds=timeout),
|
|
512
|
+
method="PUT",
|
|
513
|
+
content_type="application/octet-stream",
|
|
514
|
+
query_parameters=query_parameters,
|
|
515
|
+
)
|
|
516
|
+
return url
|
|
517
|
+
except Exception as exc:
|
|
518
|
+
print(f"upload_url EXCEPTION: {str(exc)}")
|
|
519
|
+
return None
|
|
520
|
+
|
|
521
|
+
# LOCAL MODE
|
|
368
522
|
try:
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
query_parameters = (
|
|
372
|
-
None if size is None else {"x-goog-content-length-range": f"0,{size}"}
|
|
373
|
-
)
|
|
374
|
-
url = blob.generate_signed_url(
|
|
375
|
-
version="v4",
|
|
376
|
-
expiration=datetime.timedelta(seconds=timeout),
|
|
523
|
+
return self._signer.generate_url(
|
|
524
|
+
path="/upload",
|
|
377
525
|
method="PUT",
|
|
378
|
-
|
|
379
|
-
|
|
526
|
+
bucket=bucket_name,
|
|
527
|
+
blob=blob_name,
|
|
528
|
+
timeout=timeout,
|
|
529
|
+
max_size=size,
|
|
380
530
|
)
|
|
381
|
-
return url
|
|
382
531
|
except Exception as exc:
|
|
383
|
-
print(f"upload_url EXCEPTION: {str(exc)}")
|
|
532
|
+
print(f"upload_url (local) EXCEPTION: {str(exc)}")
|
|
384
533
|
return None
|
|
385
534
|
|
|
386
535
|
def download_url(self, bucket_name, blob_name, timeout=24 * 3600):
|
|
387
536
|
"""
|
|
388
537
|
Generates a signed URL for downloading a blob.
|
|
389
|
-
|
|
390
|
-
bucket_name (str): Name of the GCS bucket.
|
|
391
|
-
blob_name (str): Name of the blob to download.
|
|
392
|
-
timeout (int, optional): URL expiration time in seconds. Defaults to 24 hours.
|
|
393
|
-
Returns:
|
|
394
|
-
str or None: Signed URL for downloading or None if an error occurs.
|
|
538
|
+
- Local mode: signed URL for local fileserver (/download).
|
|
395
539
|
"""
|
|
540
|
+
if self.mode == "gcloud":
|
|
541
|
+
try:
|
|
542
|
+
bucket = self.gc.bucket(bucket_name)
|
|
543
|
+
blob = bucket.blob(blob_name)
|
|
544
|
+
url = blob.generate_signed_url(
|
|
545
|
+
version="v4",
|
|
546
|
+
expiration=datetime.timedelta(seconds=timeout),
|
|
547
|
+
method="GET",
|
|
548
|
+
)
|
|
549
|
+
return url
|
|
550
|
+
except Exception as exc:
|
|
551
|
+
print(f"download_url EXCEPTION: {str(exc)}")
|
|
552
|
+
return None
|
|
553
|
+
|
|
554
|
+
# LOCAL MODE
|
|
396
555
|
try:
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
url = blob.generate_signed_url(
|
|
400
|
-
version="v4",
|
|
401
|
-
expiration=datetime.timedelta(seconds=timeout),
|
|
556
|
+
return self._signer.generate_url(
|
|
557
|
+
path="/download",
|
|
402
558
|
method="GET",
|
|
559
|
+
bucket=bucket_name,
|
|
560
|
+
blob=blob_name,
|
|
561
|
+
timeout=timeout,
|
|
562
|
+
max_size=None,
|
|
403
563
|
)
|
|
404
|
-
# Append the blob_name for the download client to be able to recover the file name
|
|
405
|
-
# url = f'{url}&filename={blob_name}'
|
|
406
|
-
return url
|
|
407
564
|
except Exception as exc:
|
|
408
|
-
print(f"download_url EXCEPTION: {str(exc)}")
|
|
565
|
+
print(f"download_url (local) EXCEPTION: {str(exc)}")
|
|
409
566
|
return None
|
|
410
567
|
|
|
411
568
|
def urls(self, bucket_name, blob_name, timeout=24 * 3600, size=None):
|
|
412
569
|
"""
|
|
413
570
|
Generates both upload and download signed URLs for a blob.
|
|
414
|
-
Args:
|
|
415
|
-
bucket_name (str): Name of the GCS bucket.
|
|
416
|
-
blob_name (str): Name of the blob.
|
|
417
|
-
timeout (int, optional): URL expiration time in seconds. Defaults to 24 hours.
|
|
418
|
-
size (int, optional): Maximum allowed size of the upload in bytes.
|
|
419
|
-
Returns:
|
|
420
|
-
tuple: (upload_url, download_url)
|
|
421
571
|
"""
|
|
422
|
-
return
|
|
423
|
-
bucket_name, blob_name, timeout=timeout, size=size
|
|
424
|
-
|
|
572
|
+
return (
|
|
573
|
+
self.upload_url(bucket_name, blob_name, timeout=timeout, size=size),
|
|
574
|
+
self.download_url(bucket_name, blob_name, timeout=timeout),
|
|
575
|
+
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
version = "1.0.10"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|