kardioutils 1.0.18__tar.gz → 1.0.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {kardioutils-1.0.18/kardioutils.egg-info → kardioutils-1.0.19}/PKG-INFO +1 -1
- kardioutils-1.0.19/dl2050utils/__version__.py +1 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/db.py +3 -1
- kardioutils-1.0.19/dl2050utils/df_utils.py +155 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19/kardioutils.egg-info}/PKG-INFO +1 -1
- kardioutils-1.0.18/dl2050utils/__version__.py +0 -1
- kardioutils-1.0.18/dl2050utils/df_utils.py +0 -77
- {kardioutils-1.0.18 → kardioutils-1.0.19}/LICENSE.txt +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/README.md +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/__config__.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/__init__.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/api.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/auth.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/com.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/common.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/core.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/db copy.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/dbdf.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/dbutils.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/df.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/env.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/etl.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/fdb.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/fs.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/graphql.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/gs.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/ju.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/log.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/mq.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/rest.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/restapp.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/restutils.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/sqlite.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/ulists.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/dl2050utils/wsgi.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/kardioutils.egg-info/SOURCES.txt +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/kardioutils.egg-info/dependency_links.txt +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/kardioutils.egg-info/top_level.txt +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/setup.cfg +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/setup.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/test/test_core.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/test/test_db.py +0 -0
- {kardioutils-1.0.18 → kardioutils-1.0.19}/test/test_env.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
version = "1.0.19"
|
|
@@ -749,8 +749,10 @@ def db_import_tbl(db, p, tbl, delete=False):
|
|
|
749
749
|
if rows is None:
|
|
750
750
|
return log_and_return(f"Cant read {p}")
|
|
751
751
|
if delete:
|
|
752
|
-
|
|
752
|
+
res = db.sync_execute(f"DELETE FROM {tbl}")
|
|
753
|
+
if res is None:
|
|
753
754
|
return log_and_return(f"Error deleting tbl {tbl}")
|
|
755
|
+
print("Delete result:", res)
|
|
754
756
|
n = 0
|
|
755
757
|
for row in rows:
|
|
756
758
|
res = db.sync_insert(tbl, row)
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
import os
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Tuple, List, Optional
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
def list_prefixes(df: pd.DataFrame) -> list:
|
|
8
|
+
"""Return all distinct prefixes in the dataframe."""
|
|
9
|
+
return df["prefix"].dropna().unique().tolist()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def filter_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
|
|
13
|
+
"""Return all rows that match a given prefix exactly."""
|
|
14
|
+
return df[df["prefix"] == prefix]
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def filter_prefix_contains(df: pd.DataFrame, text: str) -> pd.DataFrame:
|
|
18
|
+
"""Return all rows where prefix contains the given text."""
|
|
19
|
+
return df[df["prefix"].str.contains(text, na=False)]
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def find_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> pd.DataFrame:
|
|
23
|
+
"""Return all rows that match a given uid_suffix."""
|
|
24
|
+
return df[df["uid_suffix"] == uid_suffix]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def find_by_uid_full(df: pd.DataFrame, uid_full: str) -> pd.DataFrame:
|
|
28
|
+
"""Return all rows that match a given uid_full."""
|
|
29
|
+
return df[df["uid_full"] == uid_full]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
33
|
+
"""Return only rows where holter == True."""
|
|
34
|
+
return df[df["holter"] == True]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def non_holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
38
|
+
"""Return only rows where holter == False."""
|
|
39
|
+
return df[df["holter"] == False]
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_path_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> str | None:
|
|
43
|
+
"""
|
|
44
|
+
Return the path for a given uid_suffix.
|
|
45
|
+
If there are multiple rows, returns the first one.
|
|
46
|
+
If nothing is found, returns None.
|
|
47
|
+
"""
|
|
48
|
+
rows = df[df["uid_suffix"] == uid_suffix]
|
|
49
|
+
if rows.empty:
|
|
50
|
+
return None
|
|
51
|
+
return rows.iloc[0]["path"]
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_paths_by_prefix(df: pd.DataFrame, prefix: str, holter_only_flag: bool | None = None) -> list:
|
|
55
|
+
"""
|
|
56
|
+
Return a list of paths filtered by prefix and optionally holter flag.
|
|
57
|
+
- holter_only_flag = True → only holter rows
|
|
58
|
+
- holter_only_flag = False → only non-holter rows
|
|
59
|
+
- holter_only_flag = None → ignore holter column
|
|
60
|
+
"""
|
|
61
|
+
subset = df[df["prefix"] == prefix]
|
|
62
|
+
if holter_only_flag is not None:
|
|
63
|
+
subset = subset[subset["holter"] == holter_only_flag]
|
|
64
|
+
return subset["path"].dropna().tolist()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def check_missing_files(df):
|
|
68
|
+
"""
|
|
69
|
+
Return subset of rows whose 'path' does not point to an existing file.
|
|
70
|
+
"""
|
|
71
|
+
mask = ~df["path"].astype(str).apply(os.path.exists)
|
|
72
|
+
return df[mask]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def check_existing_files(df):
|
|
76
|
+
"""
|
|
77
|
+
Return subset of rows whose 'path' exists.
|
|
78
|
+
"""
|
|
79
|
+
mask = df["path"].astype(str).apply(os.path.exists)
|
|
80
|
+
return df[mask]
|
|
81
|
+
|
|
82
|
+
def load_X_from_index_df(
|
|
83
|
+
index_df: pd.DataFrame,
|
|
84
|
+
fdb,
|
|
85
|
+
*,
|
|
86
|
+
uid_col: str = "uid_full",
|
|
87
|
+
pre: str = "x_",
|
|
88
|
+
ext: str = ".npy",
|
|
89
|
+
allow_pickle: bool = False,
|
|
90
|
+
stack: bool = True,
|
|
91
|
+
on_missing: str = "skip", # "skip" | "raise" | "keep_none"
|
|
92
|
+
) -> Tuple[np.ndarray, List[str], pd.DataFrame]:
|
|
93
|
+
"""
|
|
94
|
+
Loads x_ arrays for each row in index_df using fdb.load(row[uid_col], pre, ext).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
- X: stacked np.ndarray (N, ...) if stack=True; otherwise object array of length N
|
|
98
|
+
- ids: list of ids in the same order as X
|
|
99
|
+
- meta: dataframe aligned with X (rows kept), including a 'loaded' boolean column
|
|
100
|
+
"""
|
|
101
|
+
if uid_col not in index_df.columns:
|
|
102
|
+
raise KeyError(f"uid_col '{uid_col}' not found in index_df columns")
|
|
103
|
+
|
|
104
|
+
loaded_arrays = []
|
|
105
|
+
kept_ids: List[str] = []
|
|
106
|
+
kept_rows = []
|
|
107
|
+
missing_rows = []
|
|
108
|
+
|
|
109
|
+
for _, row in index_df.iterrows():
|
|
110
|
+
uid = row[uid_col]
|
|
111
|
+
arr = fdb.load(uid, pre=pre, ext=ext, allow_pickle=allow_pickle)
|
|
112
|
+
|
|
113
|
+
if arr is None:
|
|
114
|
+
if on_missing == "raise":
|
|
115
|
+
raise FileNotFoundError(f"Missing array for {uid} (pre={pre}, ext={ext})")
|
|
116
|
+
if on_missing == "keep_none":
|
|
117
|
+
loaded_arrays.append(None)
|
|
118
|
+
kept_ids.append(uid)
|
|
119
|
+
r = row.copy()
|
|
120
|
+
r["loaded"] = False
|
|
121
|
+
kept_rows.append(r)
|
|
122
|
+
else: # skip
|
|
123
|
+
r = row.copy()
|
|
124
|
+
r["loaded"] = False
|
|
125
|
+
missing_rows.append(r)
|
|
126
|
+
continue
|
|
127
|
+
|
|
128
|
+
loaded_arrays.append(arr)
|
|
129
|
+
kept_ids.append(uid)
|
|
130
|
+
r = row.copy()
|
|
131
|
+
r["loaded"] = True
|
|
132
|
+
kept_rows.append(r)
|
|
133
|
+
|
|
134
|
+
meta = pd.DataFrame(kept_rows).reset_index(drop=True)
|
|
135
|
+
|
|
136
|
+
if not stack:
|
|
137
|
+
# keep as object array (useful if shapes can differ or you used keep_none)
|
|
138
|
+
X = np.array(loaded_arrays, dtype=object)
|
|
139
|
+
return X, kept_ids, meta
|
|
140
|
+
|
|
141
|
+
# stack=True: require all arrays exist and have same shape
|
|
142
|
+
arrays_only = [a for a in loaded_arrays if a is not None]
|
|
143
|
+
if len(arrays_only) == 0:
|
|
144
|
+
return np.empty((0,), dtype=float), kept_ids, meta
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
X = np.stack(arrays_only, axis=0)
|
|
148
|
+
except Exception as e:
|
|
149
|
+
raise ValueError(
|
|
150
|
+
"Could not stack arrays (shapes likely differ). "
|
|
151
|
+
"Use stack=False or handle padding/truncation."
|
|
152
|
+
) from e
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
return X, kept_ids, meta
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
version = "1.0.18"
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import os
|
|
3
|
-
|
|
4
|
-
def list_prefixes(df: pd.DataFrame) -> list:
|
|
5
|
-
"""Return all distinct prefixes in the dataframe."""
|
|
6
|
-
return df["prefix"].dropna().unique().tolist()
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
def filter_by_prefix(df: pd.DataFrame, prefix: str) -> pd.DataFrame:
|
|
10
|
-
"""Return all rows that match a given prefix exactly."""
|
|
11
|
-
return df[df["prefix"] == prefix]
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def filter_prefix_contains(df: pd.DataFrame, text: str) -> pd.DataFrame:
|
|
15
|
-
"""Return all rows where prefix contains the given text."""
|
|
16
|
-
return df[df["prefix"].str.contains(text, na=False)]
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def find_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> pd.DataFrame:
|
|
20
|
-
"""Return all rows that match a given uid_suffix."""
|
|
21
|
-
return df[df["uid_suffix"] == uid_suffix]
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
def find_by_uid_full(df: pd.DataFrame, uid_full: str) -> pd.DataFrame:
|
|
25
|
-
"""Return all rows that match a given uid_full."""
|
|
26
|
-
return df[df["uid_full"] == uid_full]
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
30
|
-
"""Return only rows where holter == True."""
|
|
31
|
-
return df[df["holter"] == True]
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
def non_holter_only(df: pd.DataFrame) -> pd.DataFrame:
|
|
35
|
-
"""Return only rows where holter == False."""
|
|
36
|
-
return df[df["holter"] == False]
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
def get_path_by_uid_suffix(df: pd.DataFrame, uid_suffix: str) -> str | None:
|
|
40
|
-
"""
|
|
41
|
-
Return the path for a given uid_suffix.
|
|
42
|
-
If there are multiple rows, returns the first one.
|
|
43
|
-
If nothing is found, returns None.
|
|
44
|
-
"""
|
|
45
|
-
rows = df[df["uid_suffix"] == uid_suffix]
|
|
46
|
-
if rows.empty:
|
|
47
|
-
return None
|
|
48
|
-
return rows.iloc[0]["path"]
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def get_paths_by_prefix(df: pd.DataFrame, prefix: str, holter_only_flag: bool | None = None) -> list:
|
|
52
|
-
"""
|
|
53
|
-
Return a list of paths filtered by prefix and optionally holter flag.
|
|
54
|
-
- holter_only_flag = True → only holter rows
|
|
55
|
-
- holter_only_flag = False → only non-holter rows
|
|
56
|
-
- holter_only_flag = None → ignore holter column
|
|
57
|
-
"""
|
|
58
|
-
subset = df[df["prefix"] == prefix]
|
|
59
|
-
if holter_only_flag is not None:
|
|
60
|
-
subset = subset[subset["holter"] == holter_only_flag]
|
|
61
|
-
return subset["path"].dropna().tolist()
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
def check_missing_files(df):
|
|
65
|
-
"""
|
|
66
|
-
Return subset of rows whose 'path' does not point to an existing file.
|
|
67
|
-
"""
|
|
68
|
-
mask = ~df["path"].astype(str).apply(os.path.exists)
|
|
69
|
-
return df[mask]
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
def check_existing_files(df):
|
|
73
|
-
"""
|
|
74
|
-
Return subset of rows whose 'path' exists.
|
|
75
|
-
"""
|
|
76
|
-
mask = df["path"].astype(str).apply(os.path.exists)
|
|
77
|
-
return df[mask]
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|