kardioutils 1.0.18__tar.gz → 1.0.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {kardioutils-1.0.18/kardioutils.egg-info → kardioutils-1.0.20}/PKG-INFO +1 -1
  2. kardioutils-1.0.20/dl2050utils/__version__.py +1 -0
  3. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/api.py +11 -2
  4. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/db.py +48 -1
  5. kardioutils-1.0.20/dl2050utils/df_utils.py +155 -0
  6. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/gs.py +14 -0
  7. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/restapp.py +6 -1
  8. {kardioutils-1.0.18 → kardioutils-1.0.20/kardioutils.egg-info}/PKG-INFO +1 -1
  9. kardioutils-1.0.18/dl2050utils/__version__.py +0 -1
  10. kardioutils-1.0.18/dl2050utils/df_utils.py +0 -77
  11. {kardioutils-1.0.18 → kardioutils-1.0.20}/LICENSE.txt +0 -0
  12. {kardioutils-1.0.18 → kardioutils-1.0.20}/README.md +0 -0
  13. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/__config__.py +0 -0
  14. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/__init__.py +0 -0
  15. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/auth.py +0 -0
  16. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/com.py +0 -0
  17. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/common.py +0 -0
  18. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/core.py +0 -0
  19. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/db copy.py +0 -0
  20. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/dbdf.py +0 -0
  21. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/dbutils.py +0 -0
  22. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/df.py +0 -0
  23. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/env.py +0 -0
  24. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/etl.py +0 -0
  25. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/fdb.py +0 -0
  26. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/fs.py +0 -0
  27. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/graphql.py +0 -0
  28. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/ju.py +0 -0
  29. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/log.py +0 -0
  30. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/mq.py +0 -0
  31. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/rest.py +0 -0
  32. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/restutils.py +0 -0
  33. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/sqlite.py +0 -0
  34. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/ulists.py +0 -0
  35. {kardioutils-1.0.18 → kardioutils-1.0.20}/dl2050utils/wsgi.py +0 -0
  36. {kardioutils-1.0.18 → kardioutils-1.0.20}/kardioutils.egg-info/SOURCES.txt +0 -0
  37. {kardioutils-1.0.18 → kardioutils-1.0.20}/kardioutils.egg-info/dependency_links.txt +0 -0
  38. {kardioutils-1.0.18 → kardioutils-1.0.20}/kardioutils.egg-info/top_level.txt +0 -0
  39. {kardioutils-1.0.18 → kardioutils-1.0.20}/setup.cfg +0 -0
  40. {kardioutils-1.0.18 → kardioutils-1.0.20}/setup.py +0 -0
  41. {kardioutils-1.0.18 → kardioutils-1.0.20}/test/test_core.py +0 -0
  42. {kardioutils-1.0.18 → kardioutils-1.0.20}/test/test_db.py +0 -0
  43. {kardioutils-1.0.18 → kardioutils-1.0.20}/test/test_env.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kardioutils
3
- Version: 1.0.18
3
+ Version: 1.0.20
4
4
  Summary: Utils lib
5
5
  Author: João Neto
6
6
  Author-email: joao.filipe.neto@gmail.com
@@ -0,0 +1 @@
1
+ version = "1.0.20"
@@ -149,6 +149,7 @@ def db_create(db):
149
149
  key CHAR(256),
150
150
  kind CHAR(32),
151
151
  status CHAR(32),
152
+ bucket_key CHAR(64),
152
153
  urls TEXT, -- json stringyfied list of dicts with url and file name
153
154
  results_url TEXT,
154
155
  eta DATETIME,
@@ -342,8 +343,10 @@ class APIServer:
342
343
  data = await request.json()
343
344
  urls = get_param(request, 'urls', list, data=data, LOG=self.LOG)
344
345
  req_uid = get_uid()
346
+ bucket_key = get_param(request, 'bucket_key', str, required=False, data=data, LOG=self.LOG)
347
+
345
348
  task = {'req_uid':req_uid, 'created_at':now(), 'key':key, 'kind':'ASYNC', 'status':'REQUESTED',
346
- 'urls': json_dumps(urls)}
349
+ 'urls': json_dumps(urls), 'bucket_key': bucket_key,}
347
350
  self.db.insert('api_tasks', task)
348
351
  self.LOG(2, 0, label='APIServer', label2='/apiserver/req', msg=f"req_uid={req_uid}")
349
352
  return rest_ok({'req_uid':req_uid})
@@ -456,7 +459,13 @@ class APIClient:
456
459
  return sync_request(f'{self.url}{route}', method=method, headers=headers, payload=payload)
457
460
  def get_signed_urls(self, fnames): return self.do_request('/apiserver/get_signed_urls', {'fnames':fnames})
458
461
  def proc(self, data): return self.do_request('/apiserver/proc', {'base64':base64_encode(data)})
459
- def req(self, urls): return self.do_request('/apiserver/req', {'urls':listify(urls)})
462
+
463
+ def req(self, urls, bucket_key=None):
464
+ payload = {'urls': listify(urls)}
465
+ if bucket_key is not None:
466
+ payload['bucket_key'] = bucket_key
467
+ return self.do_request('/apiserver/req', payload)
468
+
460
469
  def check(self, req_uid): return self.do_request('/apiserver/check', {'req_uid':req_uid})
461
470
 
462
471
  # #################################################################################################################
@@ -749,8 +749,10 @@ def db_import_tbl(db, p, tbl, delete=False):
749
749
  if rows is None:
750
750
  return log_and_return(f"Cant read {p}")
751
751
  if delete:
752
- if db.sync_execute(f"delete from {tbl}"):
752
+ res = db.sync_execute(f"DELETE FROM {tbl}")
753
+ if res is None:
753
754
  return log_and_return(f"Error deleting tbl {tbl}")
755
+ print("Delete result:", res)
754
756
  n = 0
755
757
  for row in rows:
756
758
  res = db.sync_insert(tbl, row)
@@ -783,3 +785,48 @@ def db_enable_serial(db, tbl, col):
783
785
  db.sync_execute(
784
786
  f"ALTER TABLE {tbl} ALTER COLUMN {col} SET DEFAULT nextval('{tbl}_{col}_seq')"
785
787
  )
788
+
789
+ def row_exists_full(db, tbl, row: dict, cols=None):
790
+ cols = cols or list(row.keys())
791
+ where = " AND ".join([f"{c} IS NOT DISTINCT FROM ${i+1}" for i, c in enumerate(cols)])
792
+ q = f"SELECT 1 FROM {tbl} WHERE {where} LIMIT 1"
793
+ params = [convert_type(row.get(c)) for c in cols]
794
+ res = db.sync_execute(q, *params)
795
+ return bool(res)
796
+
797
+ def db_import_tbl_full_compare(db, p, tbl, delete=False, cols=None):
798
+ from pathlib import Path
799
+ p = Path(p)
800
+ if not p.with_suffix(".pickle").is_file():
801
+ return log_and_return(f"Error importing {tbl}: file {p} not found")
802
+
803
+ rows = pickle_load(p)
804
+ if rows is None:
805
+ return log_and_return(f"Cant read {p}")
806
+ if not rows:
807
+ print("No rows to import.")
808
+ return 0
809
+
810
+ if delete:
811
+ res = db.sync_execute(f"DELETE FROM {tbl}")
812
+ if res is None:
813
+ return log_and_return(f"Error deleting tbl {tbl}")
814
+
815
+ cols = cols or list(rows[0].keys())
816
+ col_list = ", ".join(cols)
817
+ placeholders = ", ".join([f"${i}" for i in range(1, len(cols)+1)])
818
+ qins = f"INSERT INTO {tbl} ({col_list}) VALUES ({placeholders})"
819
+
820
+ n_new, n_skip = 0, 0
821
+ for r in rows:
822
+ r2 = {c: r.get(c) for c in cols}
823
+ if not delete and row_exists_full(db, tbl, r2, cols=cols):
824
+ n_skip += 1
825
+ continue
826
+ res = db.sync_execute(qins, *[convert_type(r2.get(c)) for c in cols])
827
+ if res is not None:
828
+ try: n_new += int(str(res).split()[-1])
829
+ except: pass
830
+
831
+ print(f"new={n_new} skipped={n_skip}")
832
+ return 0
@@ -0,0 +1,155 @@
1
+ import pandas as pd
2
+ import os
3
+ from __future__ import annotations
4
+ from typing import Tuple, List, Optional
5
+ import numpy as np
6
+
7
+ def list_prefixes(df: pd.DataFrame) -> list:
8
+ """Return all distinct prefixes in the dataframe."""
9
+ return df["prefix"].dropna().unique().tolist()
10
+
11
+
12
+ def filter_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
13
+ """Return all rows that match a given prefix exactly."""
14
+ return df[df["prefix"] == prefix]
15
+
16
+
17
+ def filter_prefix_contains(df: pd.DataFrame, text: str) -> pd.DataFrame:
18
+ """Return all rows where prefix contains the given text."""
19
+ return df[df["prefix"].str.contains(text, na=False)]
20
+
21
+
22
+ def find_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> pd.DataFrame:
23
+ """Return all rows that match a given uid_suffix."""
24
+ return df[df["uid_suffix"] == uid_suffix]
25
+
26
+
27
+ def find_by_uid_full(df: pd.DataFrame, uid_full: str) -> pd.DataFrame:
28
+ """Return all rows that match a given uid_full."""
29
+ return df[df["uid_full"] == uid_full]
30
+
31
+
32
+ def holter_only(df: pd.DataFrame) -> pd.DataFrame:
33
+ """Return only rows where holter == True."""
34
+ return df[df["holter"] == True]
35
+
36
+
37
+ def non_holter_only(df: pd.DataFrame) -> pd.DataFrame:
38
+ """Return only rows where holter == False."""
39
+ return df[df["holter"] == False]
40
+
41
+
42
+ def get_path_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> str | None:
43
+ """
44
+ Return the path for a given uid_suffix.
45
+ If there are multiple rows, returns the first one.
46
+ If nothing is found, returns None.
47
+ """
48
+ rows = df[df["uid_suffix"] == uid_suffix]
49
+ if rows.empty:
50
+ return None
51
+ return rows.iloc[0]["path"]
52
+
53
+
54
+ def get_paths_by_prefix(df: pd.DataFrame, prefix: str, holter_only_flag: bool | None = None) -> list:
55
+ """
56
+ Return a list of paths filtered by prefix and optionally holter flag.
57
+ - holter_only_flag = True → only holter rows
58
+ - holter_only_flag = False → only non-holter rows
59
+ - holter_only_flag = None → ignore holter column
60
+ """
61
+ subset = df[df["prefix"] == prefix]
62
+ if holter_only_flag is not None:
63
+ subset = subset[subset["holter"] == holter_only_flag]
64
+ return subset["path"].dropna().tolist()
65
+
66
+
67
+ def check_missing_files(df):
68
+ """
69
+ Return subset of rows whose 'path' does not point to an existing file.
70
+ """
71
+ mask = ~df["path"].astype(str).apply(os.path.exists)
72
+ return df[mask]
73
+
74
+
75
+ def check_existing_files(df):
76
+ """
77
+ Return subset of rows whose 'path' exists.
78
+ """
79
+ mask = df["path"].astype(str).apply(os.path.exists)
80
+ return df[mask]
81
+
82
+ def load_X_from_index_df(
83
+ index_df: pd.DataFrame,
84
+ fdb,
85
+ *,
86
+ uid_col: str = "uid_full",
87
+ pre: str = "x_",
88
+ ext: str = ".npy",
89
+ allow_pickle: bool = False,
90
+ stack: bool = True,
91
+ on_missing: str = "skip", # "skip" | "raise" | "keep_none"
92
+ ) -> Tuple[np.ndarray, List[str], pd.DataFrame]:
93
+ """
94
+ Loads x_ arrays for each row in index_df using fdb.load(row[uid_col], pre, ext).
95
+
96
+ Returns:
97
+ - X: stacked np.ndarray (N, ...) if stack=True; otherwise object array of length N
98
+ - ids: list of ids in the same order as X
99
+ - meta: dataframe aligned with X (rows kept), including a 'loaded' boolean column
100
+ """
101
+ if uid_col not in index_df.columns:
102
+ raise KeyError(f"uid_col '{uid_col}' not found in index_df columns")
103
+
104
+ loaded_arrays = []
105
+ kept_ids: List[str] = []
106
+ kept_rows = []
107
+ missing_rows = []
108
+
109
+ for _, row in index_df.iterrows():
110
+ uid = row[uid_col]
111
+ arr = fdb.load(uid, pre=pre, ext=ext, allow_pickle=allow_pickle)
112
+
113
+ if arr is None:
114
+ if on_missing == "raise":
115
+ raise FileNotFoundError(f"Missing array for {uid} (pre={pre}, ext={ext})")
116
+ if on_missing == "keep_none":
117
+ loaded_arrays.append(None)
118
+ kept_ids.append(uid)
119
+ r = row.copy()
120
+ r["loaded"] = False
121
+ kept_rows.append(r)
122
+ else: # skip
123
+ r = row.copy()
124
+ r["loaded"] = False
125
+ missing_rows.append(r)
126
+ continue
127
+
128
+ loaded_arrays.append(arr)
129
+ kept_ids.append(uid)
130
+ r = row.copy()
131
+ r["loaded"] = True
132
+ kept_rows.append(r)
133
+
134
+ meta = pd.DataFrame(kept_rows).reset_index(drop=True)
135
+
136
+ if not stack:
137
+ # keep as object array (useful if shapes can differ or you used keep_none)
138
+ X = np.array(loaded_arrays, dtype=object)
139
+ return X, kept_ids, meta
140
+
141
+ # stack=True: require all arrays exist and have same shape
142
+ arrays_only = [a for a in loaded_arrays if a is not None]
143
+ if len(arrays_only) == 0:
144
+ return np.empty((0,), dtype=float), kept_ids, meta
145
+
146
+ try:
147
+ X = np.stack(arrays_only, axis=0)
148
+ except Exception as e:
149
+ raise ValueError(
150
+ "Could not stack arrays (shapes likely differ). "
151
+ "Use stack=False or handle padding/truncation."
152
+ ) from e
153
+
154
+
155
+ return X, kept_ids, meta
@@ -64,6 +64,9 @@ class GS:
64
64
  # Try Google Cloud first
65
65
  key_dict = oget(cfg, ["gcloud", "gs_key"])
66
66
  fs_cfg = oget(cfg, ["fs"]) or {}
67
+ bucket_cfg = oget(cfg, ["bucket"]) or {}
68
+ self.bucket_map = bucket_cfg if isinstance(bucket_cfg, dict) else {}
69
+ self.default_bucket = self.bucket_map.get("general")
67
70
  self.internal_token = fs_cfg.get("internal_token")
68
71
  if self.internal_token:
69
72
  os.environ["FS_INTERNAL_TOKEN"] = self.internal_token
@@ -573,3 +576,14 @@ class GS:
573
576
  self.upload_url(bucket_name, blob_name, timeout=timeout, size=size),
574
577
  self.download_url(bucket_name, blob_name, timeout=timeout),
575
578
  )
579
+ def resolve_bucket(self, bucket_name=None, bucket_key=None):
580
+ if bucket_name:
581
+ return bucket_name
582
+ if bucket_key:
583
+ if bucket_key in self.bucket_map:
584
+ return self.bucket_map[bucket_key]
585
+ raise RuntimeError(f"GS: unknown bucket_key '{bucket_key}'")
586
+ if self.default_bucket:
587
+ return self.default_bucket
588
+ raise RuntimeError("GS: missing bucket.general in config")
589
+
@@ -173,7 +173,12 @@ class App():
173
173
  files = args['files']
174
174
  uid = get_uid()
175
175
  upload_urls,download_urls = [],[]
176
- bucket = f'{self.service}-apiserver'
176
+ u = await self.auth.check_auth(request)
177
+ org_id= u.get("org")
178
+ org = await self.db.select_one("orgs", {"id": org_id})
179
+ bucket_key = org.get("bucket_key")
180
+ bucket = self.fs.resolve_bucket(bucket_key=bucket_key)
181
+ #bucket = f'{self.service}-apiserver'
177
182
  for file in files:
178
183
  upload_url,download_url = self.fs.urls(bucket, f'{uid}/{file}')
179
184
  if upload_url is None or download_url is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kardioutils
3
- Version: 1.0.18
3
+ Version: 1.0.20
4
4
  Summary: Utils lib
5
5
  Author: João Neto
6
6
  Author-email: joao.filipe.neto@gmail.com
@@ -1 +0,0 @@
1
- version = "1.0.18"
@@ -1,77 +0,0 @@
1
- import pandas as pd
2
- import os
3
-
4
- def list_prefixes(df: pd.DataFrame) -> list:
5
- """Return all distinct prefixes in the dataframe."""
6
- return df["prefix"].dropna().unique().tolist()
7
-
8
-
9
- def filter_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
10
- """Return all rows that match a given prefix exactly."""
11
- return df[df["prefix"] == prefix]
12
-
13
-
14
- def filter_prefix_contains(df: pd.DataFrame, text: str) -> pd.DataFrame:
15
- """Return all rows where prefix contains the given text."""
16
- return df[df["prefix"].str.contains(text, na=False)]
17
-
18
-
19
- def find_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> pd.DataFrame:
20
- """Return all rows that match a given uid_suffix."""
21
- return df[df["uid_suffix"] == uid_suffix]
22
-
23
-
24
- def find_by_uid_full(df: pd.DataFrame, uid_full: str) -> pd.DataFrame:
25
- """Return all rows that match a given uid_full."""
26
- return df[df["uid_full"] == uid_full]
27
-
28
-
29
- def holter_only(df: pd.DataFrame) -> pd.DataFrame:
30
- """Return only rows where holter == True."""
31
- return df[df["holter"] == True]
32
-
33
-
34
- def non_holter_only(df: pd.DataFrame) -> pd.DataFrame:
35
- """Return only rows where holter == False."""
36
- return df[df["holter"] == False]
37
-
38
-
39
- def get_path_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> str | None:
40
- """
41
- Return the path for a given uid_suffix.
42
- If there are multiple rows, returns the first one.
43
- If nothing is found, returns None.
44
- """
45
- rows = df[df["uid_suffix"] == uid_suffix]
46
- if rows.empty:
47
- return None
48
- return rows.iloc[0]["path"]
49
-
50
-
51
- def get_paths_by_prefix(df: pd.DataFrame, prefix: str, holter_only_flag: bool | None = None) -> list:
52
- """
53
- Return a list of paths filtered by prefix and optionally holter flag.
54
- - holter_only_flag = True → only holter rows
55
- - holter_only_flag = False → only non-holter rows
56
- - holter_only_flag = None → ignore holter column
57
- """
58
- subset = df[df["prefix"] == prefix]
59
- if holter_only_flag is not None:
60
- subset = subset[subset["holter"] == holter_only_flag]
61
- return subset["path"].dropna().tolist()
62
-
63
-
64
- def check_missing_files(df):
65
- """
66
- Return subset of rows whose 'path' does not point to an existing file.
67
- """
68
- mask = ~df["path"].astype(str).apply(os.path.exists)
69
- return df[mask]
70
-
71
-
72
- def check_existing_files(df):
73
- """
74
- Return subset of rows whose 'path' exists.
75
- """
76
- mask = df["path"].astype(str).apply(os.path.exists)
77
- return df[mask]
File without changes
File without changes
File without changes
File without changes