py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.27__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ips.py +1059 -114
- py2ls/ml2ls.py +758 -186
- py2ls/netfinder.py +204 -20
- py2ls/ocr.py +60 -4
- py2ls/plot.py +916 -141
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/METADATA +6 -1
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/RECORD +16 -14
- py2ls/data/usages_pd copy.json +0 -1105
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -16,7 +16,12 @@ import warnings
|
|
16
16
|
|
17
17
|
warnings.simplefilter("ignore", category=pd.errors.SettingWithCopyWarning)
|
18
18
|
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
|
19
|
-
|
19
|
+
warnings.filterwarnings("ignore")
|
20
|
+
import os
|
21
|
+
import shutil
|
22
|
+
import logging
|
23
|
+
from pathlib import Path
|
24
|
+
from datetime import datetime
|
20
25
|
|
21
26
|
def run_once_within(duration=60,reverse=False): # default 60s
|
22
27
|
import time
|
@@ -541,8 +546,7 @@ def is_text(s):
|
|
541
546
|
|
542
547
|
from typing import Any, Union
|
543
548
|
|
544
|
-
|
545
|
-
def shared(*args, strict=True, n_shared=2, verbose=True):
|
549
|
+
def share(*args, strict=True, n_shared=2, verbose=True):
|
546
550
|
"""
|
547
551
|
check the shared elelements in two list.
|
548
552
|
usage:
|
@@ -587,12 +591,68 @@ def shared(*args, strict=True, n_shared=2, verbose=True):
|
|
587
591
|
elements2show = (
|
588
592
|
shared_elements if len(shared_elements) < 10 else shared_elements[:5]
|
589
593
|
)
|
594
|
+
tail = '' if len(shared_elements) < 10 else '......'
|
595
|
+
elements2show.append(tail)
|
590
596
|
print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
|
591
597
|
print("********* checking shared elements *********")
|
592
598
|
return shared_elements
|
593
599
|
|
600
|
+
def shared(*args, n_shared=None, verbose=True,**kwargs):
|
601
|
+
"""
|
602
|
+
check the shared elelements in two list.
|
603
|
+
usage:
|
604
|
+
list1 = [1, 2, 3, 4, 5]
|
605
|
+
list2 = [4, 5, 6, 7, 8]
|
606
|
+
list3 = [5, 6, 9, 10]
|
607
|
+
a = shared(list1, list2,list3)
|
608
|
+
"""
|
609
|
+
if verbose:
|
610
|
+
print("\n********* checking shared elements *********")
|
611
|
+
|
612
|
+
if len(args) == 1 and isinstance(args[0], list):
|
613
|
+
lists = args[0] # Unpack the single list
|
614
|
+
else:
|
615
|
+
lists = args # Use the provided arguments as lists
|
616
|
+
flattened_lists = [flatten(lst, verbose=verbose) for lst in lists]
|
617
|
+
|
618
|
+
if n_shared is None:
|
619
|
+
n_shared = len(flattened_lists)
|
620
|
+
strict = True
|
621
|
+
else:
|
622
|
+
strict = False
|
623
|
+
# Ensure all arguments are lists
|
624
|
+
if any(not isinstance(lst, list) for lst in flattened_lists):
|
625
|
+
print(f"{' ' * 2}All inputs must be lists.")
|
626
|
+
return []
|
627
|
+
first_list = flattened_lists[0]
|
628
|
+
shared_elements = [
|
629
|
+
item for item in first_list if all(item in lst for lst in flattened_lists)
|
630
|
+
]
|
631
|
+
if strict:
|
632
|
+
# Strict mode: require elements to be in all lists
|
633
|
+
shared_elements = set(flattened_lists[0])
|
634
|
+
for lst in flattened_lists[1:]:
|
635
|
+
shared_elements.intersection_update(lst)
|
636
|
+
else:
|
637
|
+
from collections import Counter
|
594
638
|
|
595
|
-
|
639
|
+
all_elements = [item for sublist in flattened_lists for item in sublist]
|
640
|
+
element_count = Counter(all_elements)
|
641
|
+
# Get elements that appear in at least n_shared lists
|
642
|
+
shared_elements = [
|
643
|
+
item for item, count in element_count.items() if count >= n_shared
|
644
|
+
]
|
645
|
+
|
646
|
+
shared_elements = flatten(shared_elements, verbose=verbose)
|
647
|
+
if verbose:
|
648
|
+
elements2show = (
|
649
|
+
shared_elements if len(shared_elements) < 10 else shared_elements[:5]
|
650
|
+
)
|
651
|
+
print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
|
652
|
+
print("********* checking shared elements *********")
|
653
|
+
return shared_elements
|
654
|
+
|
655
|
+
def share_not(*args, n_shared=None, verbose=False):
|
596
656
|
"""
|
597
657
|
To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
|
598
658
|
usage:
|
@@ -600,7 +660,19 @@ def not_shared(*args, strict=True, n_shared=2, verbose=False):
|
|
600
660
|
list2 = [4, 5, 6, 7, 8]
|
601
661
|
not_shared(list1,list2)# output [1,3]
|
602
662
|
"""
|
603
|
-
_common = shared(*args,
|
663
|
+
_common = shared(*args, n_shared=n_shared, verbose=verbose)
|
664
|
+
list1 = flatten(args[0], verbose=verbose)
|
665
|
+
_not_shared = [item for item in list1 if item not in _common]
|
666
|
+
return _not_shared
|
667
|
+
def not_shared(*args, n_shared=None, verbose=False):
|
668
|
+
"""
|
669
|
+
To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
|
670
|
+
usage:
|
671
|
+
list1 = [1, 8, 3, 3, 4, 5]
|
672
|
+
list2 = [4, 5, 6, 7, 8]
|
673
|
+
not_shared(list1,list2)# output [1,3]
|
674
|
+
"""
|
675
|
+
_common = shared(*args, n_shared=n_shared, verbose=verbose)
|
604
676
|
list1 = flatten(args[0], verbose=verbose)
|
605
677
|
_not_shared = [item for item in list1 if item not in _common]
|
606
678
|
return _not_shared
|
@@ -806,6 +878,19 @@ def counter(list_, verbose=True):
|
|
806
878
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
807
879
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
808
880
|
|
881
|
+
def dict2df(dict_, fill=None):
|
882
|
+
len_max = 0
|
883
|
+
for key, value in dict_.items():
|
884
|
+
# value部分需要是list
|
885
|
+
if isinstance(value, list):
|
886
|
+
pass
|
887
|
+
# get the max_length
|
888
|
+
len_max = len(value) if len(value) > len_max else len_max
|
889
|
+
# 补齐长度
|
890
|
+
for key, value in dict_.items():
|
891
|
+
value.extend([fill] * (len_max - len(value)))
|
892
|
+
dict_[key] = value
|
893
|
+
return pd.DataFrame.from_dict(dict_)
|
809
894
|
|
810
895
|
def str2time(time_str, fmt="24"):
|
811
896
|
"""
|
@@ -1254,7 +1339,7 @@ def docx2pdf(dir_docx, dir_pdf=None):
|
|
1254
1339
|
convert(dir_docx)
|
1255
1340
|
|
1256
1341
|
|
1257
|
-
def img2pdf(dir_img, kind=
|
1342
|
+
def img2pdf(dir_img, kind=None, page=None, dir_save=None, page_size="a4", dpi=300):
|
1258
1343
|
import img2pdf as image2pdf
|
1259
1344
|
|
1260
1345
|
def mm_to_point(size):
|
@@ -1263,7 +1348,8 @@ def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=
|
|
1263
1348
|
def set_dpi(x):
|
1264
1349
|
dpix = dpiy = x
|
1265
1350
|
return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
1266
|
-
|
1351
|
+
if kind is None:
|
1352
|
+
_, kind = os.path.splitext(dir_img)
|
1267
1353
|
if not kind.startswith("."):
|
1268
1354
|
kind = "." + kind
|
1269
1355
|
if dir_save is None:
|
@@ -1286,8 +1372,10 @@ def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=
|
|
1286
1372
|
continue
|
1287
1373
|
imgs.append(path)
|
1288
1374
|
else:
|
1289
|
-
imgs = [
|
1290
|
-
|
1375
|
+
imgs = [
|
1376
|
+
# os.path.isdir(dir_img),
|
1377
|
+
dir_img]
|
1378
|
+
print(imgs)
|
1291
1379
|
if page_size:
|
1292
1380
|
if isinstance(page_size, str):
|
1293
1381
|
pdf_in_mm = mm_to_point(paper_size(page_size))
|
@@ -1983,7 +2071,6 @@ def fload(fpath, kind=None, **kwargs):
|
|
1983
2071
|
|
1984
2072
|
def load_csv(fpath, **kwargs):
|
1985
2073
|
from pandas.errors import EmptyDataError
|
1986
|
-
|
1987
2074
|
engine = kwargs.pop("engine", "pyarrow")# default: None
|
1988
2075
|
sep = kwargs.pop("sep", None)# default: ','
|
1989
2076
|
index_col = kwargs.pop("index_col", None)# default: None
|
@@ -1994,13 +2081,20 @@ def fload(fpath, kind=None, **kwargs):
|
|
1994
2081
|
comment = kwargs.pop("comment", None)# default: None
|
1995
2082
|
fmt = kwargs.pop("fmt", False)# default:
|
1996
2083
|
chunksize = kwargs.pop("chunksize", None)# default: None
|
2084
|
+
|
2085
|
+
#check filesize
|
2086
|
+
f_size=round(os.path.getsize(fpath) / 1024 / 1024, 3)
|
2087
|
+
if f_size>=50: #50 MB
|
2088
|
+
if chunksize is None:
|
2089
|
+
chunksize = 5000
|
2090
|
+
print(f"file size is {f_size}MB, then set the chunksize with {chunksize}")
|
1997
2091
|
engine = "c" if chunksize else engine # when chunksize, recommend 'c'
|
1998
2092
|
low_memory = kwargs.pop("low_memory", True)# default: True
|
1999
2093
|
low_memory = (
|
2000
2094
|
False if chunksize else True
|
2001
2095
|
) # when chunksize, recommend low_memory=False # default:
|
2002
2096
|
verbose = kwargs.pop("verbose", False)
|
2003
|
-
if run_once_within():
|
2097
|
+
if run_once_within(reverse=True):
|
2004
2098
|
use_pd("read_csv", verbose=verbose)
|
2005
2099
|
|
2006
2100
|
if comment is None:# default: None
|
@@ -2176,7 +2270,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2176
2270
|
def load_excel(fpath, **kwargs):
|
2177
2271
|
engine = kwargs.get("engine", "openpyxl")
|
2178
2272
|
verbose = kwargs.pop("verbose", False)
|
2179
|
-
if run_once_within():
|
2273
|
+
if run_once_within(reverse=True):
|
2180
2274
|
use_pd("read_excel", verbose=verbose)
|
2181
2275
|
df = pd.read_excel(fpath, engine=engine, **kwargs)
|
2182
2276
|
try:
|
@@ -2206,7 +2300,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2206
2300
|
engine = kwargs.get("engine", "pyarrow")
|
2207
2301
|
verbose = kwargs.pop("verbose", False)
|
2208
2302
|
|
2209
|
-
if run_once_within():
|
2303
|
+
if run_once_within(reverse=True):
|
2210
2304
|
use_pd("read_parquet", verbose=verbose)
|
2211
2305
|
try:
|
2212
2306
|
df = pd.read_parquet(fpath, engine=engine, **kwargs)
|
@@ -2383,13 +2477,13 @@ def fload(fpath, kind=None, **kwargs):
|
|
2383
2477
|
return load_xml(fpath)
|
2384
2478
|
elif kind in ["csv", "tsv"]:
|
2385
2479
|
# verbose = kwargs.pop("verbose", False)
|
2386
|
-
if run_once_within():
|
2480
|
+
if run_once_within(reverse=True):
|
2387
2481
|
use_pd("read_csv")
|
2388
2482
|
content = load_csv(fpath, **kwargs)
|
2389
2483
|
return content
|
2390
2484
|
elif kind == "pkl":
|
2391
2485
|
verbose = kwargs.pop("verbose", False)
|
2392
|
-
if run_once_within():
|
2486
|
+
if run_once_within(reverse=True):
|
2393
2487
|
use_pd("read_pickle")
|
2394
2488
|
return pd.read_pickle(fpath, **kwargs)
|
2395
2489
|
elif kind in ["ods", "ods", "odt"]:
|
@@ -2420,12 +2514,12 @@ def fload(fpath, kind=None, **kwargs):
|
|
2420
2514
|
return load_ipynb(fpath, **kwargs)
|
2421
2515
|
elif kind in ["parquet", "snappy"]:
|
2422
2516
|
verbose = kwargs.pop("verbose", False)
|
2423
|
-
if run_once_within():
|
2517
|
+
if run_once_within(reverse=True):
|
2424
2518
|
use_pd("read_parquet")
|
2425
2519
|
return load_parquet(fpath, **kwargs)
|
2426
2520
|
elif kind == "feather":
|
2427
2521
|
verbose = kwargs.pop("verbose", False)
|
2428
|
-
if run_once_within():
|
2522
|
+
if run_once_within(reverse=True):
|
2429
2523
|
use_pd("read_feather")
|
2430
2524
|
content = pd.read_feather(fpath, **kwargs)
|
2431
2525
|
return content
|
@@ -2684,7 +2778,7 @@ def fsave(
|
|
2684
2778
|
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
|
2685
2779
|
|
2686
2780
|
verbose = kwargs.pop("verbose", False)
|
2687
|
-
if run_once_within():
|
2781
|
+
if run_once_within(reverse=True):
|
2688
2782
|
use_pd("to_csv", verbose=verbose)
|
2689
2783
|
kwargs_csv = dict(
|
2690
2784
|
path_or_buf=None,
|
@@ -2716,7 +2810,7 @@ def fsave(
|
|
2716
2810
|
def save_xlsx(fpath, data, **kwargs):
|
2717
2811
|
verbose = kwargs.pop("verbose", False)
|
2718
2812
|
sheet_name = kwargs.pop("sheet_name", "Sheet1")
|
2719
|
-
if run_once_within():
|
2813
|
+
if run_once_within(reverse=True):
|
2720
2814
|
use_pd("to_excel", verbose=verbose)
|
2721
2815
|
if any(kwargs):
|
2722
2816
|
format_excel(df=data, filename=fpath, **kwargs)
|
@@ -3131,21 +3225,437 @@ def isa(content, kind):
|
|
3131
3225
|
return False
|
3132
3226
|
|
3133
3227
|
|
3134
|
-
|
3228
|
+
def get_os(full=False, verbose=False):
|
3229
|
+
"""Collects comprehensive system information.
|
3230
|
+
full(bool): True, get more detailed info
|
3231
|
+
verbose(bool): True, print it
|
3232
|
+
usage:
|
3233
|
+
info = get_os(full=True, verbose=False)
|
3234
|
+
"""
|
3235
|
+
import sys
|
3236
|
+
import platform
|
3237
|
+
import psutil
|
3238
|
+
import GPUtil
|
3239
|
+
import socket
|
3240
|
+
import uuid
|
3241
|
+
import cpuinfo
|
3242
|
+
import os
|
3243
|
+
import subprocess
|
3244
|
+
from datetime import datetime, timedelta
|
3245
|
+
from collections import defaultdict
|
3246
|
+
|
3247
|
+
def get_os_type():
|
3248
|
+
os_name = sys.platform
|
3249
|
+
if "dar" in os_name:
|
3250
|
+
return "macOS"
|
3251
|
+
else:
|
3252
|
+
if "win" in os_name:
|
3253
|
+
return "Windows"
|
3254
|
+
elif "linux" in os_name:
|
3255
|
+
return "Linux"
|
3256
|
+
else:
|
3257
|
+
print(f"{os_name}, returned 'None'")
|
3258
|
+
return None
|
3259
|
+
|
3260
|
+
def get_os_info():
|
3261
|
+
"""Get the detailed OS name, version, and other platform-specific details."""
|
3262
|
+
|
3263
|
+
def get_mac_os_info():
|
3264
|
+
"""Get detailed macOS version and product name."""
|
3265
|
+
try:
|
3266
|
+
sw_vers = subprocess.check_output(["sw_vers"]).decode("utf-8")
|
3267
|
+
product_name = (
|
3268
|
+
[
|
3269
|
+
line
|
3270
|
+
for line in sw_vers.split("\n")
|
3271
|
+
if line.startswith("ProductName")
|
3272
|
+
][0]
|
3273
|
+
.split(":")[1]
|
3274
|
+
.strip()
|
3275
|
+
)
|
3276
|
+
product_version = (
|
3277
|
+
[
|
3278
|
+
line
|
3279
|
+
for line in sw_vers.split("\n")
|
3280
|
+
if line.startswith("ProductVersion")
|
3281
|
+
][0]
|
3282
|
+
.split(":")[1]
|
3283
|
+
.strip()
|
3284
|
+
)
|
3285
|
+
build_version = (
|
3286
|
+
[
|
3287
|
+
line
|
3288
|
+
for line in sw_vers.split("\n")
|
3289
|
+
if line.startswith("BuildVersion")
|
3290
|
+
][0]
|
3291
|
+
.split(":")[1]
|
3292
|
+
.strip()
|
3293
|
+
)
|
3294
|
+
|
3295
|
+
# Return the formatted macOS name, version, and build
|
3296
|
+
return f"{product_name} {product_version} (Build {build_version})"
|
3297
|
+
except Exception as e:
|
3298
|
+
return f"Error retrieving macOS name: {str(e)}"
|
3299
|
+
|
3300
|
+
def get_windows_info():
|
3301
|
+
"""Get detailed Windows version and edition."""
|
3302
|
+
try:
|
3303
|
+
# Get basic Windows version using platform
|
3304
|
+
windows_version = platform.version()
|
3305
|
+
release = platform.release()
|
3306
|
+
version = platform.win32_ver()[0]
|
3307
|
+
|
3308
|
+
# Additional information using Windows-specific system commands
|
3309
|
+
edition_command = "wmic os get caption"
|
3310
|
+
edition = (
|
3311
|
+
subprocess.check_output(edition_command, shell=True)
|
3312
|
+
.decode("utf-8")
|
3313
|
+
.strip()
|
3314
|
+
.split("\n")[1]
|
3315
|
+
)
|
3135
3316
|
|
3317
|
+
# Return Windows information
|
3318
|
+
return f"Windows {version} {release} ({edition})"
|
3319
|
+
except Exception as e:
|
3320
|
+
return f"Error retrieving Windows information: {str(e)}"
|
3136
3321
|
|
3137
|
-
def
|
3138
|
-
|
3139
|
-
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
3144
|
-
|
3145
|
-
|
3322
|
+
def get_linux_info():
|
3323
|
+
"""Get detailed Linux version and distribution info."""
|
3324
|
+
try:
|
3325
|
+
# Check /etc/os-release for modern Linux distros
|
3326
|
+
with open("/etc/os-release") as f:
|
3327
|
+
os_info = f.readlines()
|
3328
|
+
|
3329
|
+
os_name = (
|
3330
|
+
next(line for line in os_info if line.startswith("NAME"))
|
3331
|
+
.split("=")[1]
|
3332
|
+
.strip()
|
3333
|
+
.replace('"', "")
|
3334
|
+
)
|
3335
|
+
os_version = (
|
3336
|
+
next(line for line in os_info if line.startswith("VERSION"))
|
3337
|
+
.split("=")[1]
|
3338
|
+
.strip()
|
3339
|
+
.replace('"', "")
|
3340
|
+
)
|
3341
|
+
|
3342
|
+
# For additional info, check for the package manager (e.g., apt, dnf)
|
3343
|
+
package_manager = "Unknown"
|
3344
|
+
if os.path.exists("/usr/bin/apt"):
|
3345
|
+
package_manager = "APT (Debian/Ubuntu)"
|
3346
|
+
elif os.path.exists("/usr/bin/dnf"):
|
3347
|
+
package_manager = "DNF (Fedora/RHEL)"
|
3348
|
+
|
3349
|
+
# Return Linux distribution, version, and package manager
|
3350
|
+
return f"{os_name} {os_version} (Package Manager: {package_manager})"
|
3351
|
+
except Exception as e:
|
3352
|
+
return f"Error retrieving Linux information: {str(e)}"
|
3353
|
+
|
3354
|
+
os_name = platform.system()
|
3355
|
+
|
3356
|
+
if os_name == "Darwin":
|
3357
|
+
return get_mac_os_info()
|
3358
|
+
elif os_name == "Windows":
|
3359
|
+
return get_windows_info()
|
3360
|
+
elif os_name == "Linux":
|
3361
|
+
return get_linux_info()
|
3146
3362
|
else:
|
3147
|
-
|
3148
|
-
|
3363
|
+
return f"Unknown OS: {os_name} {platform.release()}"
|
3364
|
+
|
3365
|
+
def get_os_name_and_version():
|
3366
|
+
os_name = platform.system()
|
3367
|
+
if os_name == "Darwin":
|
3368
|
+
try:
|
3369
|
+
# Run 'sw_vers' command to get macOS details like "macOS Sequoia"
|
3370
|
+
sw_vers = subprocess.check_output(["sw_vers"]).decode("utf-8")
|
3371
|
+
product_name = (
|
3372
|
+
[
|
3373
|
+
line
|
3374
|
+
for line in sw_vers.split("\n")
|
3375
|
+
if line.startswith("ProductName")
|
3376
|
+
][0]
|
3377
|
+
.split(":")[1]
|
3378
|
+
.strip()
|
3379
|
+
)
|
3380
|
+
product_version = (
|
3381
|
+
[
|
3382
|
+
line
|
3383
|
+
for line in sw_vers.split("\n")
|
3384
|
+
if line.startswith("ProductVersion")
|
3385
|
+
][0]
|
3386
|
+
.split(":")[1]
|
3387
|
+
.strip()
|
3388
|
+
)
|
3389
|
+
|
3390
|
+
# Return the formatted macOS name and version
|
3391
|
+
return f"{product_name} {product_version}"
|
3392
|
+
|
3393
|
+
except Exception as e:
|
3394
|
+
return f"Error retrieving macOS name: {str(e)}"
|
3395
|
+
|
3396
|
+
# For Windows, we use platform to get the OS name and version
|
3397
|
+
elif os_name == "Windows":
|
3398
|
+
os_version = platform.version()
|
3399
|
+
return f"Windows {os_version}"
|
3400
|
+
|
3401
|
+
# For Linux, check for distribution info using platform and os-release file
|
3402
|
+
elif os_name == "Linux":
|
3403
|
+
try:
|
3404
|
+
# Try to read Linux distribution info from '/etc/os-release'
|
3405
|
+
with open("/etc/os-release") as f:
|
3406
|
+
os_info = f.readlines()
|
3407
|
+
|
3408
|
+
# Find fields like NAME and VERSION
|
3409
|
+
os_name = (
|
3410
|
+
next(line for line in os_info if line.startswith("NAME"))
|
3411
|
+
.split("=")[1]
|
3412
|
+
.strip()
|
3413
|
+
.replace('"', "")
|
3414
|
+
)
|
3415
|
+
os_version = (
|
3416
|
+
next(line for line in os_info if line.startswith("VERSION"))
|
3417
|
+
.split("=")[1]
|
3418
|
+
.strip()
|
3419
|
+
.replace('"', "")
|
3420
|
+
)
|
3421
|
+
return f"{os_name} {os_version}"
|
3422
|
+
|
3423
|
+
except Exception as e:
|
3424
|
+
return f"Error retrieving Linux name: {str(e)}"
|
3425
|
+
|
3426
|
+
# Default fallback (for unknown OS or edge cases)
|
3427
|
+
return f"{os_name} {platform.release()}"
|
3428
|
+
|
3429
|
+
def get_system_uptime():
|
3430
|
+
"""Returns system uptime as a human-readable string."""
|
3431
|
+
boot_time = datetime.fromtimestamp(psutil.boot_time())
|
3432
|
+
uptime = datetime.now() - boot_time
|
3433
|
+
return str(uptime).split(".")[0] # Remove microseconds
|
3434
|
+
|
3435
|
+
def get_active_processes(limit=10):
|
3436
|
+
processes = []
|
3437
|
+
for proc in psutil.process_iter(
|
3438
|
+
["pid", "name", "cpu_percent", "memory_percent"]
|
3439
|
+
):
|
3440
|
+
try:
|
3441
|
+
processes.append(proc.info)
|
3442
|
+
except psutil.NoSuchProcess:
|
3443
|
+
pass
|
3444
|
+
# Handle NoneType values by treating them as 0
|
3445
|
+
processes.sort(key=lambda x: x["cpu_percent"] or 0, reverse=True)
|
3446
|
+
return processes[:limit]
|
3447
|
+
|
3448
|
+
def get_virtual_environment_info():
|
3449
|
+
"""Checks if the script is running in a virtual environment and returns details."""
|
3450
|
+
try:
|
3451
|
+
# Check if running in a virtual environment
|
3452
|
+
if hasattr(sys, "real_prefix") or (
|
3453
|
+
hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
|
3454
|
+
):
|
3455
|
+
return {
|
3456
|
+
"Virtual Environment": sys.prefix,
|
3457
|
+
"Site-Packages Path": os.path.join(
|
3458
|
+
sys.prefix,
|
3459
|
+
"lib",
|
3460
|
+
"python{}/site-packages".format(sys.version_info.major),
|
3461
|
+
),
|
3462
|
+
}
|
3463
|
+
else:
|
3464
|
+
return {"Virtual Environment": "Not in a virtual environment"}
|
3465
|
+
except Exception as e:
|
3466
|
+
return {"Error": str(e)}
|
3467
|
+
|
3468
|
+
def get_temperatures():
|
3469
|
+
"""Returns temperature sensor readings."""
|
3470
|
+
try:
|
3471
|
+
return psutil.sensors_temperatures(fahrenheit=False)
|
3472
|
+
except AttributeError:
|
3473
|
+
return {"Error": "Temperature sensors not available"}
|
3474
|
+
|
3475
|
+
def get_battery_status():
|
3476
|
+
"""Returns battery status."""
|
3477
|
+
battery = psutil.sensors_battery()
|
3478
|
+
if battery:
|
3479
|
+
time_left = (
|
3480
|
+
str(timedelta(seconds=battery.secsleft))
|
3481
|
+
if battery.secsleft != psutil.POWER_TIME_UNLIMITED
|
3482
|
+
else "Charging/Unlimited"
|
3483
|
+
)
|
3484
|
+
return {
|
3485
|
+
"Percentage": battery.percent,
|
3486
|
+
"Plugged In": battery.power_plugged,
|
3487
|
+
"Time Left": time_left,
|
3488
|
+
}
|
3489
|
+
return {"Status": "No battery detected"}
|
3490
|
+
|
3491
|
+
def get_disk_io():
|
3492
|
+
"""Returns disk I/O statistics."""
|
3493
|
+
disk_io = psutil.disk_io_counters()
|
3494
|
+
return {
|
3495
|
+
"Read (GB)": disk_io.read_bytes / (1024**3),
|
3496
|
+
"Write (GB)": disk_io.write_bytes / (1024**3),
|
3497
|
+
"Read Count": disk_io.read_count,
|
3498
|
+
"Write Count": disk_io.write_count,
|
3499
|
+
}
|
3500
|
+
|
3501
|
+
def get_network_io():
|
3502
|
+
"""Returns network I/O statistics."""
|
3503
|
+
net_io = psutil.net_io_counters()
|
3504
|
+
return {
|
3505
|
+
"Bytes Sent (GB)": net_io.bytes_sent / (1024**3),
|
3506
|
+
"Bytes Received (GB)": net_io.bytes_recv / (1024**3),
|
3507
|
+
"Packets Sent": net_io.packets_sent,
|
3508
|
+
"Packets Received": net_io.packets_recv,
|
3509
|
+
}
|
3510
|
+
|
3511
|
+
def run_shell_command(command):
|
3512
|
+
"""Runs a shell command and returns its output."""
|
3513
|
+
try:
|
3514
|
+
result = subprocess.run(
|
3515
|
+
command,
|
3516
|
+
shell=True,
|
3517
|
+
stdout=subprocess.PIPE,
|
3518
|
+
stderr=subprocess.PIPE,
|
3519
|
+
text=True,
|
3520
|
+
)
|
3521
|
+
return (
|
3522
|
+
result.stdout.strip()
|
3523
|
+
if result.returncode == 0
|
3524
|
+
else result.stderr.strip()
|
3525
|
+
)
|
3526
|
+
except Exception as e:
|
3527
|
+
return f"Error running command: {e}"
|
3528
|
+
|
3529
|
+
system_info = {
|
3530
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
3531
|
+
"os": get_os_type(),
|
3532
|
+
"system": {
|
3533
|
+
"os": get_os_info(),
|
3534
|
+
"platform": f"{platform.system()} {platform.release()}",
|
3535
|
+
"version": platform.version(),
|
3536
|
+
"machine": platform.machine(),
|
3537
|
+
"processor": platform.processor(),
|
3538
|
+
"architecture": platform.architecture()[0],
|
3539
|
+
"hostname": socket.gethostname(),
|
3540
|
+
"ip address": socket.gethostbyname(socket.gethostname()),
|
3541
|
+
"mac address": ":".join(
|
3542
|
+
["{:02x}".format((uuid.getnode() >> i) & 0xFF) for i in range(0, 48, 8)]
|
3543
|
+
),
|
3544
|
+
"cpu brand": cpuinfo.get_cpu_info().get("brand_raw", "Unknown"),
|
3545
|
+
"python version": platform.python_version(),
|
3546
|
+
"uptime": get_system_uptime(),
|
3547
|
+
},
|
3548
|
+
"cpu": {
|
3549
|
+
"physical cores": psutil.cpu_count(logical=False),
|
3550
|
+
"logical cores": psutil.cpu_count(logical=True),
|
3551
|
+
"max frequency (MHz)": psutil.cpu_freq().max,
|
3552
|
+
"min frequency (MHz)": psutil.cpu_freq().min,
|
3553
|
+
"current frequency (MHz)": psutil.cpu_freq().current,
|
3554
|
+
"usage per core (%)": psutil.cpu_percent(percpu=True),
|
3555
|
+
"total cpu Usage (%)": psutil.cpu_percent(),
|
3556
|
+
"load average (1m, 5m, 15m)": (
|
3557
|
+
os.getloadavg() if hasattr(os, "getloadavg") else "N/A"
|
3558
|
+
),
|
3559
|
+
},
|
3560
|
+
"memory": {
|
3561
|
+
"total memory (GB)": psutil.virtual_memory().total / (1024**3),
|
3562
|
+
"available memory (GB)": psutil.virtual_memory().available / (1024**3),
|
3563
|
+
"used memory (GB)": psutil.virtual_memory().used / (1024**3),
|
3564
|
+
"memory usage (%)": psutil.virtual_memory().percent,
|
3565
|
+
"swap total (GB)": psutil.swap_memory().total / (1024**3),
|
3566
|
+
"swap free (GB)": psutil.swap_memory().free / (1024**3),
|
3567
|
+
"swap used (GB)": psutil.swap_memory().used / (1024**3),
|
3568
|
+
"swap usage (%)": psutil.swap_memory().percent,
|
3569
|
+
},
|
3570
|
+
"disk": {},
|
3571
|
+
"disk io": get_disk_io(),
|
3572
|
+
"network": {},
|
3573
|
+
"network io": get_network_io(),
|
3574
|
+
"gpu": [],
|
3575
|
+
"temperatures": get_temperatures(),
|
3576
|
+
"battery": get_battery_status(),
|
3577
|
+
"active processes": get_active_processes(),
|
3578
|
+
"environment": {
|
3579
|
+
"user": os.getenv("USER", "Unknown"),
|
3580
|
+
"environment variables": dict(os.environ),
|
3581
|
+
"virtual environment info": get_virtual_environment_info(), # Virtual env details
|
3582
|
+
"docker running": os.path.exists("/.dockerenv"), # Check for Docker
|
3583
|
+
"shell": os.environ.get("SHELL", "Unknown"),
|
3584
|
+
"default terminal": run_shell_command("echo $TERM"),
|
3585
|
+
"kernel version": platform.uname().release,
|
3586
|
+
"virtualization type": run_shell_command("systemd-detect-virt"),
|
3587
|
+
},
|
3588
|
+
"additional info": {
|
3589
|
+
"Shell": os.environ.get("SHELL", "Unknown"),
|
3590
|
+
"default terminal": run_shell_command("echo $TERM"),
|
3591
|
+
"kernel version": platform.uname().release,
|
3592
|
+
"virtualization type": run_shell_command("systemd-detect-virt"),
|
3593
|
+
"running in docker": os.path.exists("/.dockerenv"),
|
3594
|
+
},
|
3595
|
+
}
|
3596
|
+
|
3597
|
+
# Disk Information
|
3598
|
+
for partition in psutil.disk_partitions():
|
3599
|
+
try:
|
3600
|
+
usage = psutil.disk_usage(partition.mountpoint)
|
3601
|
+
system_info["disk"][partition.device] = {
|
3602
|
+
"mountpoint": partition.mountpoint,
|
3603
|
+
"file system type": partition.fstype,
|
3604
|
+
"total size (GB)": usage.total / (1024**3),
|
3605
|
+
"used (GB)": usage.used / (1024**3),
|
3606
|
+
"free (GB)": usage.free / (1024**3),
|
3607
|
+
"usage (%)": usage.percent,
|
3608
|
+
}
|
3609
|
+
except PermissionError:
|
3610
|
+
system_info["Disk"][partition.device] = "Permission Denied"
|
3611
|
+
|
3612
|
+
# Network Information
|
3613
|
+
if_addrs = psutil.net_if_addrs()
|
3614
|
+
for interface_name, interface_addresses in if_addrs.items():
|
3615
|
+
system_info["network"][interface_name] = []
|
3616
|
+
for address in interface_addresses:
|
3617
|
+
if str(address.family) == "AddressFamily.AF_INET":
|
3618
|
+
system_info["network"][interface_name].append(
|
3619
|
+
{
|
3620
|
+
"ip address": address.address,
|
3621
|
+
"netmask": address.netmask,
|
3622
|
+
"broadcast ip": address.broadcast,
|
3623
|
+
}
|
3624
|
+
)
|
3625
|
+
elif str(address.family) == "AddressFamily.AF_PACKET":
|
3626
|
+
system_info["network"][interface_name].append(
|
3627
|
+
{
|
3628
|
+
"mac address": address.address,
|
3629
|
+
"netmask": address.netmask,
|
3630
|
+
"broadcast mac": address.broadcast,
|
3631
|
+
}
|
3632
|
+
)
|
3633
|
+
|
3634
|
+
# GPU Information
|
3635
|
+
gpus = GPUtil.getGPUs()
|
3636
|
+
for gpu in gpus:
|
3637
|
+
gpu_info = {
|
3638
|
+
"name": gpu.name,
|
3639
|
+
"load (%)": gpu.load * 100,
|
3640
|
+
"free memory (MB)": gpu.memoryFree,
|
3641
|
+
"used memory (MB)": gpu.memoryUsed,
|
3642
|
+
"total memory (MB)": gpu.memoryTotal,
|
3643
|
+
"driver version": gpu.driver,
|
3644
|
+
"temperature (°C)": gpu.temperature,
|
3645
|
+
}
|
3646
|
+
if hasattr(gpu, "powerDraw"):
|
3647
|
+
gpu_info["Power Draw (W)"] = gpu.powerDraw
|
3648
|
+
if hasattr(gpu, "powerLimit"):
|
3649
|
+
gpu_info["Power Limit (W)"] = gpu.powerLimit
|
3650
|
+
system_info["gpu"].append(gpu_info)
|
3651
|
+
|
3652
|
+
res = system_info if full else get_os_type()
|
3653
|
+
if verbose:
|
3654
|
+
try:
|
3655
|
+
preview(res)
|
3656
|
+
except Exception as e:
|
3657
|
+
pnrint(e)
|
3658
|
+
return res
|
3149
3659
|
|
3150
3660
|
|
3151
3661
|
def listdir(
|
@@ -3168,8 +3678,9 @@ def listdir(
|
|
3168
3678
|
print(ls)
|
3169
3679
|
df_all = pd.DataFrame(
|
3170
3680
|
{
|
3171
|
-
"
|
3172
|
-
"
|
3681
|
+
"name": ls,
|
3682
|
+
"path": [os.path.join(rootdir, i) for i in ls],
|
3683
|
+
"kind":[os.path.splitext(i)[1] for i in ls]
|
3173
3684
|
}
|
3174
3685
|
)
|
3175
3686
|
if verbose:
|
@@ -3308,7 +3819,94 @@ def listfunc(lib_name, opt="call"):
|
|
3308
3819
|
def func_list(lib_name, opt="call"):
|
3309
3820
|
return list_func(lib_name, opt=opt)
|
3310
3821
|
|
3822
|
+
def copy(src, dst, overwrite=False):
|
3823
|
+
"""Copy a file from src to dst."""
|
3824
|
+
try:
|
3825
|
+
src = Path(src)
|
3826
|
+
dst = Path(dst)
|
3827
|
+
if not src.is_dir():
|
3828
|
+
if dst.is_dir():
|
3829
|
+
dst = dst / src.name
|
3830
|
+
|
3831
|
+
if dst.exists():
|
3832
|
+
if overwrite:
|
3833
|
+
dst.unlink()
|
3834
|
+
else:
|
3835
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
|
3836
|
+
shutil.copy(src, dst)
|
3837
|
+
print(f"\n Done! copy to {dst}\n")
|
3838
|
+
else:
|
3839
|
+
dst = dst/src.name
|
3840
|
+
if dst.exists():
|
3841
|
+
if overwrite:
|
3842
|
+
shutil.rmtree(dst) # Remove existing directory
|
3843
|
+
else:
|
3844
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('%H%M%S')}")
|
3845
|
+
shutil.copytree(src, dst)
|
3846
|
+
print(f"\n Done! copy to {dst}\n")
|
3847
|
+
|
3848
|
+
except Exception as e:
|
3849
|
+
logging.error(f"Failed {e}")
|
3850
|
+
|
3851
|
+
def move(src, dst, overwrite=False):
|
3852
|
+
return cut(src=src, dst=dst, overwrite=overwrite)
|
3311
3853
|
|
3854
|
+
def cut(src, dst, overwrite=False):
|
3855
|
+
try:
|
3856
|
+
src = Path(src)
|
3857
|
+
dst = Path(dst)
|
3858
|
+
if dst.is_dir():
|
3859
|
+
dst = dst / src.name
|
3860
|
+
if dst.exists():
|
3861
|
+
if overwrite:
|
3862
|
+
# dst.unlink() # Delete the existing file
|
3863
|
+
pass
|
3864
|
+
else:
|
3865
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
|
3866
|
+
shutil.move(src, dst)
|
3867
|
+
print(f"\n Done! moved to {dst}\n")
|
3868
|
+
except Exception as e:
|
3869
|
+
logging.error(f"Failed to move file from {src} to {dst}: {e}")
|
3870
|
+
|
3871
|
+
def delete(fpath):
|
3872
|
+
"""Delete a file/folder."""
|
3873
|
+
try:
|
3874
|
+
fpath = Path(fpath)
|
3875
|
+
if not fpath.is_dir(): # file
|
3876
|
+
if fpath.exists():
|
3877
|
+
fpath.unlink()
|
3878
|
+
print(f"\n Done! delete {fpath}\n")
|
3879
|
+
else:
|
3880
|
+
print(f"File '{fpath}' does not exist.")
|
3881
|
+
else:#folder
|
3882
|
+
if fpath.exists():
|
3883
|
+
shutil.rmtree(fpath) # Remove existing directory
|
3884
|
+
print(f"\n Done! delete {fpath}\n")
|
3885
|
+
else:
|
3886
|
+
print(f"Folder '{fpath}' does not exist.")
|
3887
|
+
except Exception as e:
|
3888
|
+
logging.error(f"Failed to delete {fpath}: {e}")
|
3889
|
+
def rename(fpath, dst, smart=True):
|
3890
|
+
"""Rename a file or folder."""
|
3891
|
+
try:
|
3892
|
+
src_kind,dst_kind = None,None
|
3893
|
+
if smart:
|
3894
|
+
dir_name_src=os.path.dirname(fpath)
|
3895
|
+
dir_name_dst=os.path.dirname(dst)
|
3896
|
+
src_kind=os.path.splitext(fpath)[1]
|
3897
|
+
dst_kind=os.path.splitext(dst)[1]
|
3898
|
+
if dir_name_dst!=dir_name_src:
|
3899
|
+
dst=os.path.join(dir_name_src,dst)
|
3900
|
+
if dst_kind is not None and src_kind is not None:
|
3901
|
+
if dst_kind!=src_kind:
|
3902
|
+
dst=dst + src_kind
|
3903
|
+
if os.path.exists(fpath):
|
3904
|
+
os.rename(fpath,dst)
|
3905
|
+
print(f"Done! rename to {dst}")
|
3906
|
+
else:
|
3907
|
+
print(f"Failed: {fpath} does not exist.")
|
3908
|
+
except Exception as e:
|
3909
|
+
logging.error(f"Failed to rename {fpath} to {dst}: {e}")
|
3312
3910
|
def mkdir_nest(fpath: str) -> str:
|
3313
3911
|
"""
|
3314
3912
|
Create nested directories based on the provided file path.
|
@@ -3327,7 +3925,9 @@ def mkdir_nest(fpath: str) -> str:
|
|
3327
3925
|
dir_parts = fpath.split(f_slash) # Split the path by the OS-specific separator
|
3328
3926
|
|
3329
3927
|
# Start creating directories from the root to the desired path
|
3330
|
-
|
3928
|
+
root_dir = os.path.splitdrive(fpath)[0] # Get the root drive on Windows (e.g., 'C:')
|
3929
|
+
current_path = root_dir if root_dir else f_slash # Start from the root directory or POSIX '/'
|
3930
|
+
|
3331
3931
|
for part in dir_parts:
|
3332
3932
|
if part:
|
3333
3933
|
current_path = os.path.join(current_path, part)
|
@@ -3351,10 +3951,13 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
|
|
3351
3951
|
Returns:
|
3352
3952
|
- str: The path of the created directory or an error message.
|
3353
3953
|
"""
|
3354
|
-
|
3355
3954
|
rootdir = []
|
3955
|
+
pardir= mkdir_nest(pardir)
|
3356
3956
|
if chdir is None:
|
3357
|
-
return
|
3957
|
+
return pardir
|
3958
|
+
else:
|
3959
|
+
pass
|
3960
|
+
print(pardir)
|
3358
3961
|
if isinstance(chdir, str):
|
3359
3962
|
chdir = [chdir]
|
3360
3963
|
chdir = list(set(chdir))
|
@@ -3392,7 +3995,7 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
|
|
3392
3995
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
3393
3996
|
if len(rootdir) == 1:
|
3394
3997
|
rootdir = rootdir[0]
|
3395
|
-
|
3998
|
+
rootdir = rootdir + stype if not rootdir.endswith(stype) else rootdir
|
3396
3999
|
|
3397
4000
|
return rootdir
|
3398
4001
|
|
@@ -3791,6 +4394,114 @@ def apply_filter(img, *args):
|
|
3791
4394
|
)
|
3792
4395
|
return img.filter(supported_filters[filter_name])
|
3793
4396
|
|
4397
|
+
def detect_angle(image, by="median", template=None):
|
4398
|
+
"""Detect the angle of rotation using various methods."""
|
4399
|
+
from sklearn.decomposition import PCA
|
4400
|
+
from skimage import transform, feature, filters, measure
|
4401
|
+
from skimage.color import rgb2gray
|
4402
|
+
from scipy.fftpack import fftshift, fft2
|
4403
|
+
import numpy as np
|
4404
|
+
import cv2
|
4405
|
+
# Convert to grayscale
|
4406
|
+
gray_image = rgb2gray(image)
|
4407
|
+
|
4408
|
+
# Detect edges using Canny edge detector
|
4409
|
+
edges = feature.canny(gray_image, sigma=2)
|
4410
|
+
|
4411
|
+
# Use Hough transform to detect lines
|
4412
|
+
lines = transform.probabilistic_hough_line(edges)
|
4413
|
+
|
4414
|
+
if not lines and any(["me" in by, "pca" in by]):
|
4415
|
+
print("No lines detected. Adjust the edge detection parameters.")
|
4416
|
+
return 0
|
4417
|
+
|
4418
|
+
# Hough Transform-based angle detection (Median/Mean)
|
4419
|
+
if "me" in by:
|
4420
|
+
angles = []
|
4421
|
+
for line in lines:
|
4422
|
+
(x0, y0), (x1, y1) = line
|
4423
|
+
angle = np.arctan2(y1 - y0, x1 - x0) * 180 / np.pi
|
4424
|
+
if 80 < abs(angle) < 100:
|
4425
|
+
angles.append(angle)
|
4426
|
+
if not angles:
|
4427
|
+
return 0
|
4428
|
+
if "di" in by:
|
4429
|
+
median_angle = np.median(angles)
|
4430
|
+
rotation_angle = (
|
4431
|
+
90 - median_angle if median_angle > 0 else -90 - median_angle
|
4432
|
+
)
|
4433
|
+
|
4434
|
+
return rotation_angle
|
4435
|
+
else:
|
4436
|
+
mean_angle = np.mean(angles)
|
4437
|
+
rotation_angle = 90 - mean_angle if mean_angle > 0 else -90 - mean_angle
|
4438
|
+
|
4439
|
+
return rotation_angle
|
4440
|
+
|
4441
|
+
# PCA-based angle detection
|
4442
|
+
elif "pca" in by:
|
4443
|
+
y, x = np.nonzero(edges)
|
4444
|
+
if len(x) == 0:
|
4445
|
+
return 0
|
4446
|
+
pca = PCA(n_components=2)
|
4447
|
+
pca.fit(np.vstack((x, y)).T)
|
4448
|
+
angle = np.arctan2(pca.components_[0, 1], pca.components_[0, 0]) * 180 / np.pi
|
4449
|
+
return angle
|
4450
|
+
|
4451
|
+
# Gradient Orientation-based angle detection
|
4452
|
+
elif "gra" in by:
|
4453
|
+
gx, gy = np.gradient(gray_image)
|
4454
|
+
angles = np.arctan2(gy, gx) * 180 / np.pi
|
4455
|
+
hist, bin_edges = np.histogram(angles, bins=360, range=(-180, 180))
|
4456
|
+
return bin_edges[np.argmax(hist)]
|
4457
|
+
|
4458
|
+
# Template Matching-based angle detection
|
4459
|
+
elif "temp" in by:
|
4460
|
+
if template is None:
|
4461
|
+
# Automatically extract a template from the center of the image
|
4462
|
+
height, width = gray_image.shape
|
4463
|
+
center_x, center_y = width // 2, height // 2
|
4464
|
+
size = (
|
4465
|
+
min(height, width) // 4
|
4466
|
+
) # Size of the template as a fraction of image size
|
4467
|
+
template = gray_image[
|
4468
|
+
center_y - size : center_y + size, center_x - size : center_x + size
|
4469
|
+
]
|
4470
|
+
best_angle = None
|
4471
|
+
best_corr = -1
|
4472
|
+
for angle in range(0, 180, 1): # Checking every degree
|
4473
|
+
rotated_template = transform.rotate(template, angle)
|
4474
|
+
res = cv2.matchTemplate(gray_image, rotated_template, cv2.TM_CCOEFF)
|
4475
|
+
_, max_val, _, _ = cv2.minMaxLoc(res)
|
4476
|
+
if max_val > best_corr:
|
4477
|
+
best_corr = max_val
|
4478
|
+
best_angle = angle
|
4479
|
+
return best_angle
|
4480
|
+
|
4481
|
+
# Image Moments-based angle detection
|
4482
|
+
elif "mo" in by:
|
4483
|
+
moments = measure.moments_central(gray_image)
|
4484
|
+
angle = (
|
4485
|
+
0.5
|
4486
|
+
* np.arctan2(2 * moments[1, 1], moments[0, 2] - moments[2, 0])
|
4487
|
+
* 180
|
4488
|
+
/ np.pi
|
4489
|
+
)
|
4490
|
+
return angle
|
4491
|
+
|
4492
|
+
# Fourier Transform-based angle detection
|
4493
|
+
elif "fft" in by:
|
4494
|
+
f = fft2(gray_image)
|
4495
|
+
fshift = fftshift(f)
|
4496
|
+
magnitude_spectrum = np.log(np.abs(fshift) + 1)
|
4497
|
+
rows, cols = magnitude_spectrum.shape
|
4498
|
+
r, c = np.unravel_index(np.argmax(magnitude_spectrum), (rows, cols))
|
4499
|
+
angle = np.arctan2(r - rows // 2, c - cols // 2) * 180 / np.pi
|
4500
|
+
return angle
|
4501
|
+
|
4502
|
+
else:
|
4503
|
+
print(f"Unknown method {by}")
|
4504
|
+
return 0
|
3794
4505
|
|
3795
4506
|
def imgsets(img, **kwargs):
|
3796
4507
|
"""
|
@@ -5911,6 +6622,9 @@ def df_scaler(
|
|
5911
6622
|
scaler=None,
|
5912
6623
|
method="standard",
|
5913
6624
|
columns=None, # default, select all numeric col/row
|
6625
|
+
feature_range=None,# specific for 'minmax'
|
6626
|
+
vmin=0,
|
6627
|
+
vmax=1,
|
5914
6628
|
inplace=False,
|
5915
6629
|
verbose=False, # show usage
|
5916
6630
|
axis=0, # defalut column-wise
|
@@ -5943,11 +6657,13 @@ def df_scaler(
|
|
5943
6657
|
scaler = StandardScaler(**kwargs)
|
5944
6658
|
elif method == "minmax":
|
5945
6659
|
from sklearn.preprocessing import MinMaxScaler
|
6660
|
+
if feature_range is None:
|
6661
|
+
feature_range=(vmin,vmax)
|
5946
6662
|
if verbose:
|
5947
6663
|
print("don't forget to define the range: e.g., 'feature_range=(0, 1)'. ")
|
5948
6664
|
print("scales the features to the range [0, 1]. Adjust feature_range if you want a different range, like [-1, 1].")
|
5949
6665
|
print("Use when the data does not follow a normal distribution and you need all features in a specific range (e.g., [0, 1]).\nIdeal for algorithms that do not assume a particular distribution, such as k-nearest neighbors and neural networks.")
|
5950
|
-
scaler = MinMaxScaler(
|
6666
|
+
scaler = MinMaxScaler(feature_range=feature_range,**kwargs)
|
5951
6667
|
elif method == "robust":
|
5952
6668
|
from sklearn.preprocessing import RobustScaler
|
5953
6669
|
if verbose:
|
@@ -6035,15 +6751,20 @@ def df_special_characters_cleaner(
|
|
6035
6751
|
|
6036
6752
|
# 1. Clean column names by replacing special characters with underscores
|
6037
6753
|
if "column" in where_:
|
6038
|
-
|
6754
|
+
try:
|
6755
|
+
data.columns = data.columns.str.replace(r"[^\w\s]", "_", regex=True)
|
6756
|
+
except Exception as e:
|
6757
|
+
print(e)
|
6039
6758
|
|
6040
6759
|
# 2. Clean only object-type columns (text columns)
|
6041
|
-
|
6042
|
-
|
6043
|
-
|
6044
|
-
|
6045
|
-
|
6046
|
-
|
6760
|
+
try:
|
6761
|
+
if "content" in where_:
|
6762
|
+
for col in data.select_dtypes(include=["object"]).columns:
|
6763
|
+
data[col] = data[col].str.replace(r"[^\w\s]", "", regex=True)
|
6764
|
+
if data.index.dtype == "object" and index in where_:
|
6765
|
+
data.index = data.index.str.replace(r"[^\w\s]", "_", regex=True)
|
6766
|
+
except:
|
6767
|
+
pass
|
6047
6768
|
return data
|
6048
6769
|
|
6049
6770
|
|
@@ -6426,6 +7147,9 @@ def df_reducer(
|
|
6426
7147
|
# "autoencoder","nmf",
|
6427
7148
|
]
|
6428
7149
|
method = strcmp(method, methods)[0]
|
7150
|
+
if run_once_within(reverse=True):
|
7151
|
+
print(f"support methods:{methods}")
|
7152
|
+
|
6429
7153
|
if verbose:
|
6430
7154
|
print(f"\nprocessing with using {dict_methods[method]}:")
|
6431
7155
|
xlabel, ylabel = None, None
|
@@ -6433,16 +7157,20 @@ def df_reducer(
|
|
6433
7157
|
columns = data.select_dtypes(include="number").columns.tolist()
|
6434
7158
|
if hue is None:
|
6435
7159
|
hue = data.select_dtypes(exclude="number").columns.tolist()
|
7160
|
+
print(f"auto select the non-number as 'hue':{hue}")
|
6436
7161
|
if isinstance(hue, list):
|
6437
7162
|
print("Warning: hue is a list, only select the 1st one")
|
6438
7163
|
hue = hue[0]
|
6439
|
-
if not hue:
|
7164
|
+
if not any(hue):
|
6440
7165
|
# Select columns if specified, else use all columns
|
6441
7166
|
X = data[columns].values if columns else data.values
|
6442
7167
|
else:
|
6443
7168
|
# Select columns to reduce and hue for LDA
|
6444
|
-
|
6445
|
-
|
7169
|
+
try:
|
7170
|
+
X = data[columns].values if columns else data.drop(columns=[hue]).values
|
7171
|
+
y = data[hue].values
|
7172
|
+
except:
|
7173
|
+
pass
|
6446
7174
|
print(X.shape)
|
6447
7175
|
# Handle missing values
|
6448
7176
|
if fill_missing:
|
@@ -6909,33 +7637,49 @@ def df_reducer(
|
|
6909
7637
|
colname_met = "SVD_"
|
6910
7638
|
# Quick plots
|
6911
7639
|
if plot_ and (not method in ["isolation_forest"]):
|
6912
|
-
from .plot import plotxy
|
6913
|
-
if ax is None:
|
6914
|
-
|
6915
|
-
|
6916
|
-
|
6917
|
-
|
6918
|
-
else:
|
6919
|
-
|
7640
|
+
from .plot import plotxy,figsets,get_color
|
7641
|
+
# if ax is None:
|
7642
|
+
# if figsize is None:
|
7643
|
+
# _, ax = plt.subplots(figsize=cm2inch(8, 8))
|
7644
|
+
# else:
|
7645
|
+
# _, ax = plt.subplots(figsize=figsize)
|
7646
|
+
# else:
|
7647
|
+
# ax = ax.cla()
|
6920
7648
|
xlabel = f"{colname_met}1" if xlabel is None else xlabel
|
6921
7649
|
ylabel = f"{colname_met}2" if ylabel is None else ylabel
|
7650
|
+
palette=get_color(len(flatten(data[hue],verbose=0)))
|
7651
|
+
|
7652
|
+
reduced_df=reduced_df.sort_values(by=hue)
|
7653
|
+
print(flatten(reduced_df[hue]))
|
6922
7654
|
ax = plotxy(
|
6923
7655
|
data=reduced_df,
|
6924
7656
|
x=colname_met + "1",
|
6925
7657
|
y=colname_met + "2",
|
6926
7658
|
hue=hue,
|
6927
|
-
|
7659
|
+
palette=palette,
|
7660
|
+
# size=size,
|
6928
7661
|
edgecolor=edgecolor,
|
6929
|
-
kind_="
|
6930
|
-
|
6931
|
-
|
6932
|
-
|
6933
|
-
|
6934
|
-
|
6935
|
-
|
7662
|
+
kind_=["joint",
|
7663
|
+
# "kde",
|
7664
|
+
"ell",
|
7665
|
+
],
|
7666
|
+
kws_kde=dict(
|
7667
|
+
hue=hue,
|
7668
|
+
levels=2,
|
7669
|
+
common_norm=False,
|
7670
|
+
fill=True,
|
7671
|
+
alpha=0.05,
|
7672
|
+
),
|
7673
|
+
kws_joint=dict(kind='scatter',joint_kws=dict(s=size)),
|
7674
|
+
kws_ellipse=dict(alpha=0.1,lw=1,label=None),
|
6936
7675
|
verbose=False,
|
6937
7676
|
**kwargs,
|
6938
7677
|
)
|
7678
|
+
figsets(
|
7679
|
+
legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
|
7680
|
+
xlabel=xlabel if xlabel else None,
|
7681
|
+
ylabel=ylabel if ylabel else None,
|
7682
|
+
)
|
6939
7683
|
|
6940
7684
|
if inplace:
|
6941
7685
|
# If inplace=True, add components back into the original data
|
@@ -7412,6 +8156,7 @@ def df_qc(
|
|
7412
8156
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
7413
8157
|
from scipy.stats import skew, kurtosis, entropy
|
7414
8158
|
|
8159
|
+
pd.options.display.max_seq_items = 10
|
7415
8160
|
#! display(data.select_dtypes(include=[np.number]).describe())
|
7416
8161
|
#!skim
|
7417
8162
|
if columns is not None:
|
@@ -7428,16 +8173,18 @@ def df_qc(
|
|
7428
8173
|
data = data.copy()
|
7429
8174
|
data.loc[:, data.isna().all()] = 0
|
7430
8175
|
res_qc = {}
|
7431
|
-
print(f"data.shape:{data.shape}")
|
8176
|
+
print(f"⤵ data.shape:{data.shape}\n⤵ data.sample(10):")
|
8177
|
+
display(data.sample(10).style.background_gradient(cmap="coolwarm", axis=1))
|
7432
8178
|
|
7433
8179
|
# Missing values
|
7434
8180
|
res_qc["missing_values"] = data.isnull().sum()
|
7435
|
-
res_qc["missing_percentage"] = (res_qc["missing_values"] / len(data)) * 100
|
8181
|
+
res_qc["missing_percentage"] = round((res_qc["missing_values"] / len(data)) * 100,2)
|
7436
8182
|
res_qc["rows_with_missing"] = data.isnull().any(axis=1).sum()
|
7437
8183
|
|
7438
8184
|
# Data types and unique values
|
7439
8185
|
res_qc["data_types"] = data.dtypes
|
7440
|
-
res_qc["
|
8186
|
+
res_qc["unique_counts"] = data.select_dtypes(exclude=np.number).nunique().sort_values()
|
8187
|
+
res_qc["unique_values"] = data.select_dtypes(exclude=np.number).apply(lambda x: x.unique())
|
7441
8188
|
res_qc["constant_columns"] = [
|
7442
8189
|
col for col in data.columns if data[col].nunique() <= 1
|
7443
8190
|
]
|
@@ -7453,33 +8200,42 @@ def df_qc(
|
|
7453
8200
|
data_outliers = df_outlier(data)
|
7454
8201
|
outlier_num = data_outliers.isna().sum() - data.isnull().sum()
|
7455
8202
|
res_qc["outlier_num"] = outlier_num[outlier_num > 0]
|
7456
|
-
outlier_percentage=(outlier_num / len(data_outliers)) * 100
|
8203
|
+
outlier_percentage=round((outlier_num / len(data_outliers)) * 100,2)
|
7457
8204
|
res_qc["outlier_percentage"] = outlier_percentage[outlier_percentage>0]
|
7458
|
-
|
7459
|
-
|
7460
|
-
|
7461
|
-
|
7462
|
-
|
7463
|
-
|
7464
|
-
|
7465
|
-
|
7466
|
-
|
7467
|
-
|
7468
|
-
res_qc["high_correlations"] = high_corr_pairs
|
7469
|
-
|
7470
|
-
# VIF for multicollinearity check
|
7471
|
-
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
7472
|
-
vif_data = pd.DataFrame()
|
7473
|
-
res_qc["vif"]=vif_data
|
7474
|
-
if numeric_df.shape[1] > 1 and not numeric_df.empty:
|
7475
|
-
vif_data["feature"] = numeric_df.columns
|
7476
|
-
vif_data["VIF"] = [
|
7477
|
-
variance_inflation_factor(numeric_df.values, i)
|
7478
|
-
for i in range(numeric_df.shape[1])
|
8205
|
+
try:
|
8206
|
+
# Correlation and multicollinearity (VIF)
|
8207
|
+
if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
|
8208
|
+
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
8209
|
+
corr_matrix = numeric_df.corr()
|
8210
|
+
high_corr_pairs = [
|
8211
|
+
(col1, col2)
|
8212
|
+
for col1 in corr_matrix.columns
|
8213
|
+
for col2 in corr_matrix.columns
|
8214
|
+
if col1 != col2 and abs(corr_matrix[col1][col2]) > 0.9
|
7479
8215
|
]
|
7480
|
-
res_qc["
|
7481
|
-
|
7482
|
-
|
8216
|
+
res_qc["high_correlations"] = high_corr_pairs
|
8217
|
+
|
8218
|
+
# VIF for multicollinearity check
|
8219
|
+
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
8220
|
+
if isinstance(numeric_df.columns, pd.MultiIndex):
|
8221
|
+
numeric_df.columns = [
|
8222
|
+
"_".join(col).strip() if isinstance(col, tuple) else col for col in numeric_df.columns
|
8223
|
+
]
|
8224
|
+
|
8225
|
+
|
8226
|
+
vif_data = pd.DataFrame()
|
8227
|
+
res_qc["vif"]=vif_data
|
8228
|
+
if numeric_df.shape[1] > 1 and not numeric_df.empty:
|
8229
|
+
vif_data["feature"] = numeric_df.columns.tolist()
|
8230
|
+
vif_data["VIF"] = [
|
8231
|
+
round(variance_inflation_factor(numeric_df.values, i),2)
|
8232
|
+
for i in range(numeric_df.shape[1])
|
8233
|
+
]
|
8234
|
+
res_qc["vif"] = vif_data[
|
8235
|
+
vif_data["VIF"] > 5
|
8236
|
+
] # Typically VIF > 5 indicates multicollinearity
|
8237
|
+
except Exception as e:
|
8238
|
+
print(e)
|
7483
8239
|
# Skewness and Kurtosis
|
7484
8240
|
skewness = data.skew(numeric_only=True)
|
7485
8241
|
kurtosis_vals = data.kurt(numeric_only=True)
|
@@ -7492,8 +8248,7 @@ def df_qc(
|
|
7492
8248
|
col: entropy(data[col].value_counts(normalize=True), base=2)
|
7493
8249
|
for col in categorical_cols
|
7494
8250
|
}
|
7495
|
-
|
7496
|
-
res_qc["unique_counts"] = data.nunique()
|
8251
|
+
|
7497
8252
|
# dtypes counts
|
7498
8253
|
res_qc['dtype_counts']=data.dtypes.value_counts()
|
7499
8254
|
|
@@ -7540,7 +8295,7 @@ def df_qc(
|
|
7540
8295
|
res_qc["text_length_analysis"] = text_lengths
|
7541
8296
|
|
7542
8297
|
# Summary statistics
|
7543
|
-
res_qc["summary_statistics"] = data.describe().T
|
8298
|
+
res_qc["summary_statistics"] = data.describe().T.style.background_gradient(cmap='coolwarm', axis=0)
|
7544
8299
|
|
7545
8300
|
# Automated warnings
|
7546
8301
|
warnings = []
|
@@ -7562,28 +8317,45 @@ def df_qc(
|
|
7562
8317
|
|
7563
8318
|
# Report generation
|
7564
8319
|
if verbose:
|
7565
|
-
print("=== QC Report Summary ===")
|
7566
8320
|
print("\n⤵ Summary Statistics:")
|
7567
8321
|
display(res_qc["summary_statistics"])
|
7568
8322
|
print("\n⤵ Data Types:")
|
7569
8323
|
display(res_qc["data_types"])
|
7570
8324
|
if any(res_qc["missing_values"][res_qc["missing_values"] > 0]):
|
7571
8325
|
print(" ⤵ Missing Values Counts:")
|
7572
|
-
display(
|
8326
|
+
display(pd.DataFrame(
|
8327
|
+
{
|
8328
|
+
"missing_values": res_qc["missing_values"][res_qc["missing_values"] > 0],
|
8329
|
+
"missing_percent(%)": res_qc["missing_percentage"][
|
8330
|
+
res_qc["missing_percentage"] > 0
|
8331
|
+
],
|
8332
|
+
}
|
8333
|
+
).style.background_gradient(cmap="coolwarm", axis=0)
|
8334
|
+
)
|
7573
8335
|
# print(res_qc["missing_percentage"][res_qc["missing_percentage"] > 0])
|
7574
8336
|
print("\n⤵ Rows with Missing Values:",res_qc["rows_with_missing"])
|
7575
8337
|
|
8338
|
+
print("\n⤵ Constant Columns:", res_qc["constant_columns"]) if any(res_qc["constant_columns"]) else None
|
8339
|
+
print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
|
8340
|
+
print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
|
8341
|
+
|
7576
8342
|
if any(res_qc["outlier_num"]):
|
7577
8343
|
print("\n⤵ Outlier Report:")
|
7578
|
-
display(
|
7579
|
-
|
7580
|
-
|
7581
|
-
|
8344
|
+
display(pd.DataFrame(
|
8345
|
+
{
|
8346
|
+
"outlier_num": res_qc["outlier_num"][res_qc["outlier_num"] > 0],
|
8347
|
+
"outlier_percentage(%)": res_qc["outlier_percentage"][
|
8348
|
+
res_qc["outlier_percentage"] > 0
|
8349
|
+
],
|
8350
|
+
}
|
8351
|
+
).style.background_gradient(cmap="coolwarm", axis=0)
|
8352
|
+
)
|
7582
8353
|
|
7583
|
-
|
8354
|
+
if any(res_qc["unique_counts"]):
|
8355
|
+
print("\n⤵ Unique Values per Column:")
|
8356
|
+
display(pd.DataFrame({"unique_counts":res_qc["unique_counts"],
|
8357
|
+
"unique_values":res_qc["unique_values"]}).style.background_gradient(cmap="coolwarm", axis=0))
|
7584
8358
|
|
7585
|
-
print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
|
7586
|
-
print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
|
7587
8359
|
|
7588
8360
|
if res_qc["empty_columns"]:
|
7589
8361
|
print("\n⤵ Empty Columns:", res_qc["empty_columns"])
|
@@ -7595,7 +8367,7 @@ def df_qc(
|
|
7595
8367
|
|
7596
8368
|
if "vif" in res_qc:
|
7597
8369
|
print("\n⤵ Features with High VIF (>|5|):")
|
7598
|
-
|
8370
|
+
display(res_qc["vif"].style.background_gradient(cmap="coolwarm", axis=0))
|
7599
8371
|
|
7600
8372
|
if any(res_qc["high_cardinality_categoricals"]):
|
7601
8373
|
print("\n⤵ High Cardinality Categorical Columns (>|50 unique|):")
|
@@ -7614,6 +8386,8 @@ def df_qc(
|
|
7614
8386
|
print("\nWarnings:")
|
7615
8387
|
for warning in res_qc["warnings"]:
|
7616
8388
|
print(" -", warning)
|
8389
|
+
|
8390
|
+
pd.reset_option("display.max_seq_items")
|
7617
8391
|
if plot_:
|
7618
8392
|
df_qc_plots(data=data, res_qc=res_qc, max_cols=max_cols,hue=hue,dir_save=dir_save)
|
7619
8393
|
if output or not plot_:
|
@@ -7632,7 +8406,7 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7632
8406
|
if isinstance(columns, (list,pd.core.indexes.base.Index)):
|
7633
8407
|
data=data[columns]
|
7634
8408
|
len_total = len(res_qc)
|
7635
|
-
n_row, n_col = int((len_total + 10)
|
8409
|
+
n_row, n_col = int((len_total + 10)), 3
|
7636
8410
|
nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
|
7637
8411
|
|
7638
8412
|
missing_data = res_qc["missing_values"][res_qc["missing_values"] > 0].sort_values(
|
@@ -7789,8 +8563,10 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7789
8563
|
title="Dtypes",
|
7790
8564
|
ylabel="#",
|
7791
8565
|
ax=ax_dtype_counts,
|
7792
|
-
fontsize=8
|
8566
|
+
fontsize=8 if len(dtype_counts.index)<=20 else 6,
|
7793
8567
|
)
|
8568
|
+
# from .plot import pie
|
8569
|
+
# pie()
|
7794
8570
|
|
7795
8571
|
# High cardinality: Show top categorical columns by unique value count
|
7796
8572
|
high_cardinality = res_qc["high_cardinality_categoricals"]
|
@@ -7871,16 +8647,17 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7871
8647
|
title="Correlation Heatmap",
|
7872
8648
|
ax=ax_heatmap
|
7873
8649
|
)
|
7874
|
-
# save figure
|
7875
|
-
if dir_save:
|
7876
|
-
|
8650
|
+
# # save figure
|
8651
|
+
# if dir_save:
|
8652
|
+
# figsave(dir_save,f"qc_plot_{now_}.pdf")
|
7877
8653
|
|
7878
8654
|
if columns is not None:
|
7879
8655
|
if isinstance(columns, (list,pd.core.indexes.base.Index)):
|
7880
8656
|
data=data[columns]
|
7881
|
-
|
7882
|
-
|
7883
|
-
|
8657
|
+
|
8658
|
+
# len_total = len(res_qc)
|
8659
|
+
# n_row, n_col = int((len_total + 10) / 3), 3
|
8660
|
+
# nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
|
7884
8661
|
#! check distribution
|
7885
8662
|
data_num = data.select_dtypes(include=np.number)
|
7886
8663
|
if len(data_num) > max_cols:
|
@@ -7907,7 +8684,43 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7907
8684
|
figsets(ylabel=f'Q-Q Plot:{column}',title=None)
|
7908
8685
|
# save figure
|
7909
8686
|
if dir_save:
|
7910
|
-
figsave(dir_save,f"
|
8687
|
+
figsave(dir_save,f"qc_plot_{now_}.pdf")
|
8688
|
+
|
8689
|
+
def df_corr(df: pd.DataFrame, method="pearson"):
|
8690
|
+
"""
|
8691
|
+
Compute correlation coefficients and p-values for a DataFrame.
|
8692
|
+
|
8693
|
+
Parameters:
|
8694
|
+
- df (pd.DataFrame): Input DataFrame with numeric data.
|
8695
|
+
- method (str): Correlation method ("pearson", "spearman", "kendall").
|
8696
|
+
|
8697
|
+
Returns:
|
8698
|
+
- corr_matrix (pd.DataFrame): Correlation coefficient matrix.
|
8699
|
+
- pval_matrix (pd.DataFrame): P-value matrix.
|
8700
|
+
"""
|
8701
|
+
from scipy.stats import pearsonr, spearmanr, kendalltau
|
8702
|
+
|
8703
|
+
methods = ["pearson", "spearman", "kendall"]
|
8704
|
+
method = strcmp(method, methods)[0]
|
8705
|
+
methods_dict = {"pearson": pearsonr, "spearman": spearmanr, "kendall": kendalltau}
|
8706
|
+
|
8707
|
+
cols = df.columns
|
8708
|
+
corr_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
|
8709
|
+
pval_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
|
8710
|
+
correlation_func = methods_dict[method]
|
8711
|
+
|
8712
|
+
for col1 in cols:
|
8713
|
+
for col2 in cols:
|
8714
|
+
if col1 == col2:
|
8715
|
+
corr_matrix.loc[col1, col2] = 1.0
|
8716
|
+
pval_matrix.loc[col1, col2] = 0.0
|
8717
|
+
else:
|
8718
|
+
corr, pval = correlation_func(df[col1], df[col2])
|
8719
|
+
corr_matrix.loc[col1, col2] = corr
|
8720
|
+
pval_matrix.loc[col1, col2] = pval
|
8721
|
+
|
8722
|
+
return corr_matrix, pval_matrix
|
8723
|
+
|
7911
8724
|
def use_pd(
|
7912
8725
|
func_name="excel",
|
7913
8726
|
verbose=True,
|
@@ -7927,3 +8740,135 @@ def use_pd(
|
|
7927
8740
|
except Exception as e:
|
7928
8741
|
if verbose:
|
7929
8742
|
print(e)
|
8743
|
+
|
8744
|
+
def get_phone(phone_number: str, region: str = None,verbose=True):
|
8745
|
+
"""
|
8746
|
+
usage:
|
8747
|
+
info = get_phone(15237654321, "DE")
|
8748
|
+
preview(info)
|
8749
|
+
|
8750
|
+
Extremely advanced phone number analysis function.
|
8751
|
+
|
8752
|
+
Args:
|
8753
|
+
phone_number (str): The phone number to analyze.
|
8754
|
+
region (str): None (Default). Tries to work with international numbers including country codes; otherwise, uses the specified region.
|
8755
|
+
|
8756
|
+
Returns:
|
8757
|
+
dict: Comprehensive information about the phone number.
|
8758
|
+
"""
|
8759
|
+
import phonenumbers
|
8760
|
+
from phonenumbers import geocoder, carrier, timezone, number_type
|
8761
|
+
from datetime import datetime
|
8762
|
+
import pytz
|
8763
|
+
from tzlocal import get_localzone
|
8764
|
+
|
8765
|
+
if not isinstance(phone_number, str):
|
8766
|
+
phone_number = str(phone_number)
|
8767
|
+
if isinstance(region, str):
|
8768
|
+
region = region.upper()
|
8769
|
+
|
8770
|
+
try:
|
8771
|
+
# Parse the phone number
|
8772
|
+
parsed_number = phonenumbers.parse(phone_number, region)
|
8773
|
+
|
8774
|
+
# Validate the phone number
|
8775
|
+
valid = phonenumbers.is_valid_number(parsed_number)
|
8776
|
+
possible = phonenumbers.is_possible_number(parsed_number)
|
8777
|
+
|
8778
|
+
if not valid:
|
8779
|
+
suggested_fix = phonenumbers.example_number(region) if region else "Unknown"
|
8780
|
+
return {
|
8781
|
+
"valid": False,
|
8782
|
+
"error": "Invalid phone number",
|
8783
|
+
"suggested_fix": suggested_fix,
|
8784
|
+
}
|
8785
|
+
|
8786
|
+
# Basic details
|
8787
|
+
formatted_international = phonenumbers.format_number(
|
8788
|
+
parsed_number, phonenumbers.PhoneNumberFormat.INTERNATIONAL
|
8789
|
+
)
|
8790
|
+
formatted_national = phonenumbers.format_number(
|
8791
|
+
parsed_number, phonenumbers.PhoneNumberFormat.NATIONAL
|
8792
|
+
)
|
8793
|
+
formatted_e164 = phonenumbers.format_number(
|
8794
|
+
parsed_number, phonenumbers.PhoneNumberFormat.E164
|
8795
|
+
)
|
8796
|
+
country_code = parsed_number.country_code
|
8797
|
+
region_code = geocoder.region_code_for_number(parsed_number)
|
8798
|
+
country_name = geocoder.country_name_for_number(parsed_number, "en")
|
8799
|
+
|
8800
|
+
location = geocoder.description_for_number(parsed_number, "en")
|
8801
|
+
carrier_name = carrier.name_for_number(parsed_number, "en") or "Unknown Carrier"
|
8802
|
+
time_zones = timezone.time_zones_for_number(parsed_number)[0]
|
8803
|
+
current_times = datetime.now(pytz.timezone(time_zones)).strftime(
|
8804
|
+
"%Y-%m-%d %H:%M:%S %Z"
|
8805
|
+
)
|
8806
|
+
number_type_str = {
|
8807
|
+
phonenumbers.PhoneNumberType.FIXED_LINE: "Fixed Line",
|
8808
|
+
phonenumbers.PhoneNumberType.MOBILE: "Mobile",
|
8809
|
+
phonenumbers.PhoneNumberType.FIXED_LINE_OR_MOBILE: "Fixed Line or Mobile",
|
8810
|
+
phonenumbers.PhoneNumberType.TOLL_FREE: "Toll Free",
|
8811
|
+
phonenumbers.PhoneNumberType.PREMIUM_RATE: "Premium Rate",
|
8812
|
+
phonenumbers.PhoneNumberType.SHARED_COST: "Shared Cost",
|
8813
|
+
phonenumbers.PhoneNumberType.VOIP: "VOIP",
|
8814
|
+
phonenumbers.PhoneNumberType.PERSONAL_NUMBER: "Personal Number",
|
8815
|
+
phonenumbers.PhoneNumberType.PAGER: "Pager",
|
8816
|
+
phonenumbers.PhoneNumberType.UAN: "UAN",
|
8817
|
+
phonenumbers.PhoneNumberType.UNKNOWN: "Unknown",
|
8818
|
+
}.get(number_type(parsed_number), "Unknown")
|
8819
|
+
|
8820
|
+
# Advanced Features
|
8821
|
+
is_toll_free = (
|
8822
|
+
number_type(parsed_number) == phonenumbers.PhoneNumberType.TOLL_FREE
|
8823
|
+
)
|
8824
|
+
is_premium_rate = (
|
8825
|
+
number_type(parsed_number) == phonenumbers.PhoneNumberType.PREMIUM_RATE
|
8826
|
+
)
|
8827
|
+
|
8828
|
+
# Dialing Information
|
8829
|
+
dialing_instructions = f"Dial {formatted_national} within {country_name}. Dial {formatted_e164} from abroad."
|
8830
|
+
|
8831
|
+
# Advanced Timezone Handling
|
8832
|
+
gmt_offsets = pytz.timezone(time_zones).utcoffset(datetime.now()).total_seconds()/ 3600
|
8833
|
+
# Get the local timezone (current computer's time)
|
8834
|
+
local_timezone = get_localzone()
|
8835
|
+
#local_timezone = pytz.timezone(pytz.country_timezones[region_code][0])
|
8836
|
+
local_offset = local_timezone.utcoffset(datetime.now()).total_seconds() / 3600
|
8837
|
+
offset_diff = local_offset - gmt_offsets
|
8838
|
+
head_time = "earlier" if offset_diff < 0 else "later" if offset_diff > 0 else ""
|
8839
|
+
res= {
|
8840
|
+
"valid": True,
|
8841
|
+
"possible": possible,
|
8842
|
+
"formatted": {
|
8843
|
+
"international": formatted_international,
|
8844
|
+
"national": formatted_national,
|
8845
|
+
"e164": formatted_e164,
|
8846
|
+
},
|
8847
|
+
"country_code": country_code,
|
8848
|
+
"country_name": country_name,
|
8849
|
+
"region_code": region_code,
|
8850
|
+
"location": location if location else "Unknown",
|
8851
|
+
"carrier": carrier_name,
|
8852
|
+
"time_zone": time_zones,
|
8853
|
+
"current_times": current_times,
|
8854
|
+
"local_offset":f"{local_offset} utcoffset",
|
8855
|
+
"time_zone_diff": f"{head_time} {int(np.abs(offset_diff))} h",
|
8856
|
+
"number_type": number_type_str,
|
8857
|
+
"is_toll_free": is_toll_free,
|
8858
|
+
"is_premium_rate": is_premium_rate,
|
8859
|
+
"dialing_instructions": dialing_instructions,
|
8860
|
+
"suggested_fix": None, # Use phonenumbers.example_number if invalid
|
8861
|
+
"logs": {
|
8862
|
+
"number_analysis_completed": datetime.now().strftime(
|
8863
|
+
"%Y-%m-%d %H:%M:%S"
|
8864
|
+
),
|
8865
|
+
"raw_input": phone_number,
|
8866
|
+
"parsed_number": str(parsed_number),
|
8867
|
+
},
|
8868
|
+
}
|
8869
|
+
|
8870
|
+
except phonenumbers.NumberParseException as e:
|
8871
|
+
res= {"valid": False, "error": str(e)}
|
8872
|
+
if verbose:
|
8873
|
+
preview(res)
|
8874
|
+
return res
|