py2ls 0.2.4.25__py3-none-any.whl → 0.2.4.27__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/corr.py +475 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_autogluon_zeroshot2024.json +2383 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/styles/example/.DS_Store +0 -0
- py2ls/data/usages_sns.json +6 -1
- py2ls/ips.py +1059 -114
- py2ls/ml2ls.py +758 -186
- py2ls/netfinder.py +204 -20
- py2ls/ocr.py +60 -4
- py2ls/plot.py +916 -141
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/METADATA +6 -1
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/RECORD +16 -14
- py2ls/data/usages_pd copy.json +0 -1105
- {py2ls-0.2.4.25.dist-info → py2ls-0.2.4.27.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -16,7 +16,12 @@ import warnings
|
|
16
16
|
|
17
17
|
warnings.simplefilter("ignore", category=pd.errors.SettingWithCopyWarning)
|
18
18
|
warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
|
19
|
-
|
19
|
+
warnings.filterwarnings("ignore")
|
20
|
+
import os
|
21
|
+
import shutil
|
22
|
+
import logging
|
23
|
+
from pathlib import Path
|
24
|
+
from datetime import datetime
|
20
25
|
|
21
26
|
def run_once_within(duration=60,reverse=False): # default 60s
|
22
27
|
import time
|
@@ -541,8 +546,7 @@ def is_text(s):
|
|
541
546
|
|
542
547
|
from typing import Any, Union
|
543
548
|
|
544
|
-
|
545
|
-
def shared(*args, strict=True, n_shared=2, verbose=True):
|
549
|
+
def share(*args, strict=True, n_shared=2, verbose=True):
|
546
550
|
"""
|
547
551
|
check the shared elelements in two list.
|
548
552
|
usage:
|
@@ -587,12 +591,68 @@ def shared(*args, strict=True, n_shared=2, verbose=True):
|
|
587
591
|
elements2show = (
|
588
592
|
shared_elements if len(shared_elements) < 10 else shared_elements[:5]
|
589
593
|
)
|
594
|
+
tail = '' if len(shared_elements) < 10 else '......'
|
595
|
+
elements2show.append(tail)
|
590
596
|
print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
|
591
597
|
print("********* checking shared elements *********")
|
592
598
|
return shared_elements
|
593
599
|
|
600
|
+
def shared(*args, n_shared=None, verbose=True,**kwargs):
|
601
|
+
"""
|
602
|
+
check the shared elelements in two list.
|
603
|
+
usage:
|
604
|
+
list1 = [1, 2, 3, 4, 5]
|
605
|
+
list2 = [4, 5, 6, 7, 8]
|
606
|
+
list3 = [5, 6, 9, 10]
|
607
|
+
a = shared(list1, list2,list3)
|
608
|
+
"""
|
609
|
+
if verbose:
|
610
|
+
print("\n********* checking shared elements *********")
|
611
|
+
|
612
|
+
if len(args) == 1 and isinstance(args[0], list):
|
613
|
+
lists = args[0] # Unpack the single list
|
614
|
+
else:
|
615
|
+
lists = args # Use the provided arguments as lists
|
616
|
+
flattened_lists = [flatten(lst, verbose=verbose) for lst in lists]
|
617
|
+
|
618
|
+
if n_shared is None:
|
619
|
+
n_shared = len(flattened_lists)
|
620
|
+
strict = True
|
621
|
+
else:
|
622
|
+
strict = False
|
623
|
+
# Ensure all arguments are lists
|
624
|
+
if any(not isinstance(lst, list) for lst in flattened_lists):
|
625
|
+
print(f"{' ' * 2}All inputs must be lists.")
|
626
|
+
return []
|
627
|
+
first_list = flattened_lists[0]
|
628
|
+
shared_elements = [
|
629
|
+
item for item in first_list if all(item in lst for lst in flattened_lists)
|
630
|
+
]
|
631
|
+
if strict:
|
632
|
+
# Strict mode: require elements to be in all lists
|
633
|
+
shared_elements = set(flattened_lists[0])
|
634
|
+
for lst in flattened_lists[1:]:
|
635
|
+
shared_elements.intersection_update(lst)
|
636
|
+
else:
|
637
|
+
from collections import Counter
|
594
638
|
|
595
|
-
|
639
|
+
all_elements = [item for sublist in flattened_lists for item in sublist]
|
640
|
+
element_count = Counter(all_elements)
|
641
|
+
# Get elements that appear in at least n_shared lists
|
642
|
+
shared_elements = [
|
643
|
+
item for item, count in element_count.items() if count >= n_shared
|
644
|
+
]
|
645
|
+
|
646
|
+
shared_elements = flatten(shared_elements, verbose=verbose)
|
647
|
+
if verbose:
|
648
|
+
elements2show = (
|
649
|
+
shared_elements if len(shared_elements) < 10 else shared_elements[:5]
|
650
|
+
)
|
651
|
+
print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
|
652
|
+
print("********* checking shared elements *********")
|
653
|
+
return shared_elements
|
654
|
+
|
655
|
+
def share_not(*args, n_shared=None, verbose=False):
|
596
656
|
"""
|
597
657
|
To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
|
598
658
|
usage:
|
@@ -600,7 +660,19 @@ def not_shared(*args, strict=True, n_shared=2, verbose=False):
|
|
600
660
|
list2 = [4, 5, 6, 7, 8]
|
601
661
|
not_shared(list1,list2)# output [1,3]
|
602
662
|
"""
|
603
|
-
_common = shared(*args,
|
663
|
+
_common = shared(*args, n_shared=n_shared, verbose=verbose)
|
664
|
+
list1 = flatten(args[0], verbose=verbose)
|
665
|
+
_not_shared = [item for item in list1 if item not in _common]
|
666
|
+
return _not_shared
|
667
|
+
def not_shared(*args, n_shared=None, verbose=False):
|
668
|
+
"""
|
669
|
+
To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
|
670
|
+
usage:
|
671
|
+
list1 = [1, 8, 3, 3, 4, 5]
|
672
|
+
list2 = [4, 5, 6, 7, 8]
|
673
|
+
not_shared(list1,list2)# output [1,3]
|
674
|
+
"""
|
675
|
+
_common = shared(*args, n_shared=n_shared, verbose=verbose)
|
604
676
|
list1 = flatten(args[0], verbose=verbose)
|
605
677
|
_not_shared = [item for item in list1 if item not in _common]
|
606
678
|
return _not_shared
|
@@ -806,6 +878,19 @@ def counter(list_, verbose=True):
|
|
806
878
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
807
879
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
808
880
|
|
881
|
+
def dict2df(dict_, fill=None):
|
882
|
+
len_max = 0
|
883
|
+
for key, value in dict_.items():
|
884
|
+
# value部分需要是list
|
885
|
+
if isinstance(value, list):
|
886
|
+
pass
|
887
|
+
# get the max_length
|
888
|
+
len_max = len(value) if len(value) > len_max else len_max
|
889
|
+
# 补齐长度
|
890
|
+
for key, value in dict_.items():
|
891
|
+
value.extend([fill] * (len_max - len(value)))
|
892
|
+
dict_[key] = value
|
893
|
+
return pd.DataFrame.from_dict(dict_)
|
809
894
|
|
810
895
|
def str2time(time_str, fmt="24"):
|
811
896
|
"""
|
@@ -1254,7 +1339,7 @@ def docx2pdf(dir_docx, dir_pdf=None):
|
|
1254
1339
|
convert(dir_docx)
|
1255
1340
|
|
1256
1341
|
|
1257
|
-
def img2pdf(dir_img, kind=
|
1342
|
+
def img2pdf(dir_img, kind=None, page=None, dir_save=None, page_size="a4", dpi=300):
|
1258
1343
|
import img2pdf as image2pdf
|
1259
1344
|
|
1260
1345
|
def mm_to_point(size):
|
@@ -1263,7 +1348,8 @@ def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=
|
|
1263
1348
|
def set_dpi(x):
|
1264
1349
|
dpix = dpiy = x
|
1265
1350
|
return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
|
1266
|
-
|
1351
|
+
if kind is None:
|
1352
|
+
_, kind = os.path.splitext(dir_img)
|
1267
1353
|
if not kind.startswith("."):
|
1268
1354
|
kind = "." + kind
|
1269
1355
|
if dir_save is None:
|
@@ -1286,8 +1372,10 @@ def img2pdf(dir_img, kind="jpeg", page=None, dir_save=None, page_size="a4", dpi=
|
|
1286
1372
|
continue
|
1287
1373
|
imgs.append(path)
|
1288
1374
|
else:
|
1289
|
-
imgs = [
|
1290
|
-
|
1375
|
+
imgs = [
|
1376
|
+
# os.path.isdir(dir_img),
|
1377
|
+
dir_img]
|
1378
|
+
print(imgs)
|
1291
1379
|
if page_size:
|
1292
1380
|
if isinstance(page_size, str):
|
1293
1381
|
pdf_in_mm = mm_to_point(paper_size(page_size))
|
@@ -1983,7 +2071,6 @@ def fload(fpath, kind=None, **kwargs):
|
|
1983
2071
|
|
1984
2072
|
def load_csv(fpath, **kwargs):
|
1985
2073
|
from pandas.errors import EmptyDataError
|
1986
|
-
|
1987
2074
|
engine = kwargs.pop("engine", "pyarrow")# default: None
|
1988
2075
|
sep = kwargs.pop("sep", None)# default: ','
|
1989
2076
|
index_col = kwargs.pop("index_col", None)# default: None
|
@@ -1994,13 +2081,20 @@ def fload(fpath, kind=None, **kwargs):
|
|
1994
2081
|
comment = kwargs.pop("comment", None)# default: None
|
1995
2082
|
fmt = kwargs.pop("fmt", False)# default:
|
1996
2083
|
chunksize = kwargs.pop("chunksize", None)# default: None
|
2084
|
+
|
2085
|
+
#check filesize
|
2086
|
+
f_size=round(os.path.getsize(fpath) / 1024 / 1024, 3)
|
2087
|
+
if f_size>=50: #50 MB
|
2088
|
+
if chunksize is None:
|
2089
|
+
chunksize = 5000
|
2090
|
+
print(f"file size is {f_size}MB, then set the chunksize with {chunksize}")
|
1997
2091
|
engine = "c" if chunksize else engine # when chunksize, recommend 'c'
|
1998
2092
|
low_memory = kwargs.pop("low_memory", True)# default: True
|
1999
2093
|
low_memory = (
|
2000
2094
|
False if chunksize else True
|
2001
2095
|
) # when chunksize, recommend low_memory=False # default:
|
2002
2096
|
verbose = kwargs.pop("verbose", False)
|
2003
|
-
if run_once_within():
|
2097
|
+
if run_once_within(reverse=True):
|
2004
2098
|
use_pd("read_csv", verbose=verbose)
|
2005
2099
|
|
2006
2100
|
if comment is None:# default: None
|
@@ -2176,7 +2270,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2176
2270
|
def load_excel(fpath, **kwargs):
|
2177
2271
|
engine = kwargs.get("engine", "openpyxl")
|
2178
2272
|
verbose = kwargs.pop("verbose", False)
|
2179
|
-
if run_once_within():
|
2273
|
+
if run_once_within(reverse=True):
|
2180
2274
|
use_pd("read_excel", verbose=verbose)
|
2181
2275
|
df = pd.read_excel(fpath, engine=engine, **kwargs)
|
2182
2276
|
try:
|
@@ -2206,7 +2300,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2206
2300
|
engine = kwargs.get("engine", "pyarrow")
|
2207
2301
|
verbose = kwargs.pop("verbose", False)
|
2208
2302
|
|
2209
|
-
if run_once_within():
|
2303
|
+
if run_once_within(reverse=True):
|
2210
2304
|
use_pd("read_parquet", verbose=verbose)
|
2211
2305
|
try:
|
2212
2306
|
df = pd.read_parquet(fpath, engine=engine, **kwargs)
|
@@ -2383,13 +2477,13 @@ def fload(fpath, kind=None, **kwargs):
|
|
2383
2477
|
return load_xml(fpath)
|
2384
2478
|
elif kind in ["csv", "tsv"]:
|
2385
2479
|
# verbose = kwargs.pop("verbose", False)
|
2386
|
-
if run_once_within():
|
2480
|
+
if run_once_within(reverse=True):
|
2387
2481
|
use_pd("read_csv")
|
2388
2482
|
content = load_csv(fpath, **kwargs)
|
2389
2483
|
return content
|
2390
2484
|
elif kind == "pkl":
|
2391
2485
|
verbose = kwargs.pop("verbose", False)
|
2392
|
-
if run_once_within():
|
2486
|
+
if run_once_within(reverse=True):
|
2393
2487
|
use_pd("read_pickle")
|
2394
2488
|
return pd.read_pickle(fpath, **kwargs)
|
2395
2489
|
elif kind in ["ods", "ods", "odt"]:
|
@@ -2420,12 +2514,12 @@ def fload(fpath, kind=None, **kwargs):
|
|
2420
2514
|
return load_ipynb(fpath, **kwargs)
|
2421
2515
|
elif kind in ["parquet", "snappy"]:
|
2422
2516
|
verbose = kwargs.pop("verbose", False)
|
2423
|
-
if run_once_within():
|
2517
|
+
if run_once_within(reverse=True):
|
2424
2518
|
use_pd("read_parquet")
|
2425
2519
|
return load_parquet(fpath, **kwargs)
|
2426
2520
|
elif kind == "feather":
|
2427
2521
|
verbose = kwargs.pop("verbose", False)
|
2428
|
-
if run_once_within():
|
2522
|
+
if run_once_within(reverse=True):
|
2429
2523
|
use_pd("read_feather")
|
2430
2524
|
content = pd.read_feather(fpath, **kwargs)
|
2431
2525
|
return content
|
@@ -2684,7 +2778,7 @@ def fsave(
|
|
2684
2778
|
# https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
|
2685
2779
|
|
2686
2780
|
verbose = kwargs.pop("verbose", False)
|
2687
|
-
if run_once_within():
|
2781
|
+
if run_once_within(reverse=True):
|
2688
2782
|
use_pd("to_csv", verbose=verbose)
|
2689
2783
|
kwargs_csv = dict(
|
2690
2784
|
path_or_buf=None,
|
@@ -2716,7 +2810,7 @@ def fsave(
|
|
2716
2810
|
def save_xlsx(fpath, data, **kwargs):
|
2717
2811
|
verbose = kwargs.pop("verbose", False)
|
2718
2812
|
sheet_name = kwargs.pop("sheet_name", "Sheet1")
|
2719
|
-
if run_once_within():
|
2813
|
+
if run_once_within(reverse=True):
|
2720
2814
|
use_pd("to_excel", verbose=verbose)
|
2721
2815
|
if any(kwargs):
|
2722
2816
|
format_excel(df=data, filename=fpath, **kwargs)
|
@@ -3131,21 +3225,437 @@ def isa(content, kind):
|
|
3131
3225
|
return False
|
3132
3226
|
|
3133
3227
|
|
3134
|
-
|
3228
|
+
def get_os(full=False, verbose=False):
|
3229
|
+
"""Collects comprehensive system information.
|
3230
|
+
full(bool): True, get more detailed info
|
3231
|
+
verbose(bool): True, print it
|
3232
|
+
usage:
|
3233
|
+
info = get_os(full=True, verbose=False)
|
3234
|
+
"""
|
3235
|
+
import sys
|
3236
|
+
import platform
|
3237
|
+
import psutil
|
3238
|
+
import GPUtil
|
3239
|
+
import socket
|
3240
|
+
import uuid
|
3241
|
+
import cpuinfo
|
3242
|
+
import os
|
3243
|
+
import subprocess
|
3244
|
+
from datetime import datetime, timedelta
|
3245
|
+
from collections import defaultdict
|
3246
|
+
|
3247
|
+
def get_os_type():
|
3248
|
+
os_name = sys.platform
|
3249
|
+
if "dar" in os_name:
|
3250
|
+
return "macOS"
|
3251
|
+
else:
|
3252
|
+
if "win" in os_name:
|
3253
|
+
return "Windows"
|
3254
|
+
elif "linux" in os_name:
|
3255
|
+
return "Linux"
|
3256
|
+
else:
|
3257
|
+
print(f"{os_name}, returned 'None'")
|
3258
|
+
return None
|
3259
|
+
|
3260
|
+
def get_os_info():
|
3261
|
+
"""Get the detailed OS name, version, and other platform-specific details."""
|
3262
|
+
|
3263
|
+
def get_mac_os_info():
|
3264
|
+
"""Get detailed macOS version and product name."""
|
3265
|
+
try:
|
3266
|
+
sw_vers = subprocess.check_output(["sw_vers"]).decode("utf-8")
|
3267
|
+
product_name = (
|
3268
|
+
[
|
3269
|
+
line
|
3270
|
+
for line in sw_vers.split("\n")
|
3271
|
+
if line.startswith("ProductName")
|
3272
|
+
][0]
|
3273
|
+
.split(":")[1]
|
3274
|
+
.strip()
|
3275
|
+
)
|
3276
|
+
product_version = (
|
3277
|
+
[
|
3278
|
+
line
|
3279
|
+
for line in sw_vers.split("\n")
|
3280
|
+
if line.startswith("ProductVersion")
|
3281
|
+
][0]
|
3282
|
+
.split(":")[1]
|
3283
|
+
.strip()
|
3284
|
+
)
|
3285
|
+
build_version = (
|
3286
|
+
[
|
3287
|
+
line
|
3288
|
+
for line in sw_vers.split("\n")
|
3289
|
+
if line.startswith("BuildVersion")
|
3290
|
+
][0]
|
3291
|
+
.split(":")[1]
|
3292
|
+
.strip()
|
3293
|
+
)
|
3294
|
+
|
3295
|
+
# Return the formatted macOS name, version, and build
|
3296
|
+
return f"{product_name} {product_version} (Build {build_version})"
|
3297
|
+
except Exception as e:
|
3298
|
+
return f"Error retrieving macOS name: {str(e)}"
|
3299
|
+
|
3300
|
+
def get_windows_info():
|
3301
|
+
"""Get detailed Windows version and edition."""
|
3302
|
+
try:
|
3303
|
+
# Get basic Windows version using platform
|
3304
|
+
windows_version = platform.version()
|
3305
|
+
release = platform.release()
|
3306
|
+
version = platform.win32_ver()[0]
|
3307
|
+
|
3308
|
+
# Additional information using Windows-specific system commands
|
3309
|
+
edition_command = "wmic os get caption"
|
3310
|
+
edition = (
|
3311
|
+
subprocess.check_output(edition_command, shell=True)
|
3312
|
+
.decode("utf-8")
|
3313
|
+
.strip()
|
3314
|
+
.split("\n")[1]
|
3315
|
+
)
|
3135
3316
|
|
3317
|
+
# Return Windows information
|
3318
|
+
return f"Windows {version} {release} ({edition})"
|
3319
|
+
except Exception as e:
|
3320
|
+
return f"Error retrieving Windows information: {str(e)}"
|
3136
3321
|
|
3137
|
-
def
|
3138
|
-
|
3139
|
-
|
3140
|
-
|
3141
|
-
|
3142
|
-
|
3143
|
-
|
3144
|
-
|
3145
|
-
|
3322
|
+
def get_linux_info():
|
3323
|
+
"""Get detailed Linux version and distribution info."""
|
3324
|
+
try:
|
3325
|
+
# Check /etc/os-release for modern Linux distros
|
3326
|
+
with open("/etc/os-release") as f:
|
3327
|
+
os_info = f.readlines()
|
3328
|
+
|
3329
|
+
os_name = (
|
3330
|
+
next(line for line in os_info if line.startswith("NAME"))
|
3331
|
+
.split("=")[1]
|
3332
|
+
.strip()
|
3333
|
+
.replace('"', "")
|
3334
|
+
)
|
3335
|
+
os_version = (
|
3336
|
+
next(line for line in os_info if line.startswith("VERSION"))
|
3337
|
+
.split("=")[1]
|
3338
|
+
.strip()
|
3339
|
+
.replace('"', "")
|
3340
|
+
)
|
3341
|
+
|
3342
|
+
# For additional info, check for the package manager (e.g., apt, dnf)
|
3343
|
+
package_manager = "Unknown"
|
3344
|
+
if os.path.exists("/usr/bin/apt"):
|
3345
|
+
package_manager = "APT (Debian/Ubuntu)"
|
3346
|
+
elif os.path.exists("/usr/bin/dnf"):
|
3347
|
+
package_manager = "DNF (Fedora/RHEL)"
|
3348
|
+
|
3349
|
+
# Return Linux distribution, version, and package manager
|
3350
|
+
return f"{os_name} {os_version} (Package Manager: {package_manager})"
|
3351
|
+
except Exception as e:
|
3352
|
+
return f"Error retrieving Linux information: {str(e)}"
|
3353
|
+
|
3354
|
+
os_name = platform.system()
|
3355
|
+
|
3356
|
+
if os_name == "Darwin":
|
3357
|
+
return get_mac_os_info()
|
3358
|
+
elif os_name == "Windows":
|
3359
|
+
return get_windows_info()
|
3360
|
+
elif os_name == "Linux":
|
3361
|
+
return get_linux_info()
|
3146
3362
|
else:
|
3147
|
-
|
3148
|
-
|
3363
|
+
return f"Unknown OS: {os_name} {platform.release()}"
|
3364
|
+
|
3365
|
+
def get_os_name_and_version():
|
3366
|
+
os_name = platform.system()
|
3367
|
+
if os_name == "Darwin":
|
3368
|
+
try:
|
3369
|
+
# Run 'sw_vers' command to get macOS details like "macOS Sequoia"
|
3370
|
+
sw_vers = subprocess.check_output(["sw_vers"]).decode("utf-8")
|
3371
|
+
product_name = (
|
3372
|
+
[
|
3373
|
+
line
|
3374
|
+
for line in sw_vers.split("\n")
|
3375
|
+
if line.startswith("ProductName")
|
3376
|
+
][0]
|
3377
|
+
.split(":")[1]
|
3378
|
+
.strip()
|
3379
|
+
)
|
3380
|
+
product_version = (
|
3381
|
+
[
|
3382
|
+
line
|
3383
|
+
for line in sw_vers.split("\n")
|
3384
|
+
if line.startswith("ProductVersion")
|
3385
|
+
][0]
|
3386
|
+
.split(":")[1]
|
3387
|
+
.strip()
|
3388
|
+
)
|
3389
|
+
|
3390
|
+
# Return the formatted macOS name and version
|
3391
|
+
return f"{product_name} {product_version}"
|
3392
|
+
|
3393
|
+
except Exception as e:
|
3394
|
+
return f"Error retrieving macOS name: {str(e)}"
|
3395
|
+
|
3396
|
+
# For Windows, we use platform to get the OS name and version
|
3397
|
+
elif os_name == "Windows":
|
3398
|
+
os_version = platform.version()
|
3399
|
+
return f"Windows {os_version}"
|
3400
|
+
|
3401
|
+
# For Linux, check for distribution info using platform and os-release file
|
3402
|
+
elif os_name == "Linux":
|
3403
|
+
try:
|
3404
|
+
# Try to read Linux distribution info from '/etc/os-release'
|
3405
|
+
with open("/etc/os-release") as f:
|
3406
|
+
os_info = f.readlines()
|
3407
|
+
|
3408
|
+
# Find fields like NAME and VERSION
|
3409
|
+
os_name = (
|
3410
|
+
next(line for line in os_info if line.startswith("NAME"))
|
3411
|
+
.split("=")[1]
|
3412
|
+
.strip()
|
3413
|
+
.replace('"', "")
|
3414
|
+
)
|
3415
|
+
os_version = (
|
3416
|
+
next(line for line in os_info if line.startswith("VERSION"))
|
3417
|
+
.split("=")[1]
|
3418
|
+
.strip()
|
3419
|
+
.replace('"', "")
|
3420
|
+
)
|
3421
|
+
return f"{os_name} {os_version}"
|
3422
|
+
|
3423
|
+
except Exception as e:
|
3424
|
+
return f"Error retrieving Linux name: {str(e)}"
|
3425
|
+
|
3426
|
+
# Default fallback (for unknown OS or edge cases)
|
3427
|
+
return f"{os_name} {platform.release()}"
|
3428
|
+
|
3429
|
+
def get_system_uptime():
|
3430
|
+
"""Returns system uptime as a human-readable string."""
|
3431
|
+
boot_time = datetime.fromtimestamp(psutil.boot_time())
|
3432
|
+
uptime = datetime.now() - boot_time
|
3433
|
+
return str(uptime).split(".")[0] # Remove microseconds
|
3434
|
+
|
3435
|
+
def get_active_processes(limit=10):
|
3436
|
+
processes = []
|
3437
|
+
for proc in psutil.process_iter(
|
3438
|
+
["pid", "name", "cpu_percent", "memory_percent"]
|
3439
|
+
):
|
3440
|
+
try:
|
3441
|
+
processes.append(proc.info)
|
3442
|
+
except psutil.NoSuchProcess:
|
3443
|
+
pass
|
3444
|
+
# Handle NoneType values by treating them as 0
|
3445
|
+
processes.sort(key=lambda x: x["cpu_percent"] or 0, reverse=True)
|
3446
|
+
return processes[:limit]
|
3447
|
+
|
3448
|
+
def get_virtual_environment_info():
|
3449
|
+
"""Checks if the script is running in a virtual environment and returns details."""
|
3450
|
+
try:
|
3451
|
+
# Check if running in a virtual environment
|
3452
|
+
if hasattr(sys, "real_prefix") or (
|
3453
|
+
hasattr(sys, "base_prefix") and sys.base_prefix != sys.prefix
|
3454
|
+
):
|
3455
|
+
return {
|
3456
|
+
"Virtual Environment": sys.prefix,
|
3457
|
+
"Site-Packages Path": os.path.join(
|
3458
|
+
sys.prefix,
|
3459
|
+
"lib",
|
3460
|
+
"python{}/site-packages".format(sys.version_info.major),
|
3461
|
+
),
|
3462
|
+
}
|
3463
|
+
else:
|
3464
|
+
return {"Virtual Environment": "Not in a virtual environment"}
|
3465
|
+
except Exception as e:
|
3466
|
+
return {"Error": str(e)}
|
3467
|
+
|
3468
|
+
def get_temperatures():
|
3469
|
+
"""Returns temperature sensor readings."""
|
3470
|
+
try:
|
3471
|
+
return psutil.sensors_temperatures(fahrenheit=False)
|
3472
|
+
except AttributeError:
|
3473
|
+
return {"Error": "Temperature sensors not available"}
|
3474
|
+
|
3475
|
+
def get_battery_status():
|
3476
|
+
"""Returns battery status."""
|
3477
|
+
battery = psutil.sensors_battery()
|
3478
|
+
if battery:
|
3479
|
+
time_left = (
|
3480
|
+
str(timedelta(seconds=battery.secsleft))
|
3481
|
+
if battery.secsleft != psutil.POWER_TIME_UNLIMITED
|
3482
|
+
else "Charging/Unlimited"
|
3483
|
+
)
|
3484
|
+
return {
|
3485
|
+
"Percentage": battery.percent,
|
3486
|
+
"Plugged In": battery.power_plugged,
|
3487
|
+
"Time Left": time_left,
|
3488
|
+
}
|
3489
|
+
return {"Status": "No battery detected"}
|
3490
|
+
|
3491
|
+
def get_disk_io():
|
3492
|
+
"""Returns disk I/O statistics."""
|
3493
|
+
disk_io = psutil.disk_io_counters()
|
3494
|
+
return {
|
3495
|
+
"Read (GB)": disk_io.read_bytes / (1024**3),
|
3496
|
+
"Write (GB)": disk_io.write_bytes / (1024**3),
|
3497
|
+
"Read Count": disk_io.read_count,
|
3498
|
+
"Write Count": disk_io.write_count,
|
3499
|
+
}
|
3500
|
+
|
3501
|
+
def get_network_io():
|
3502
|
+
"""Returns network I/O statistics."""
|
3503
|
+
net_io = psutil.net_io_counters()
|
3504
|
+
return {
|
3505
|
+
"Bytes Sent (GB)": net_io.bytes_sent / (1024**3),
|
3506
|
+
"Bytes Received (GB)": net_io.bytes_recv / (1024**3),
|
3507
|
+
"Packets Sent": net_io.packets_sent,
|
3508
|
+
"Packets Received": net_io.packets_recv,
|
3509
|
+
}
|
3510
|
+
|
3511
|
+
def run_shell_command(command):
|
3512
|
+
"""Runs a shell command and returns its output."""
|
3513
|
+
try:
|
3514
|
+
result = subprocess.run(
|
3515
|
+
command,
|
3516
|
+
shell=True,
|
3517
|
+
stdout=subprocess.PIPE,
|
3518
|
+
stderr=subprocess.PIPE,
|
3519
|
+
text=True,
|
3520
|
+
)
|
3521
|
+
return (
|
3522
|
+
result.stdout.strip()
|
3523
|
+
if result.returncode == 0
|
3524
|
+
else result.stderr.strip()
|
3525
|
+
)
|
3526
|
+
except Exception as e:
|
3527
|
+
return f"Error running command: {e}"
|
3528
|
+
|
3529
|
+
system_info = {
|
3530
|
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
|
3531
|
+
"os": get_os_type(),
|
3532
|
+
"system": {
|
3533
|
+
"os": get_os_info(),
|
3534
|
+
"platform": f"{platform.system()} {platform.release()}",
|
3535
|
+
"version": platform.version(),
|
3536
|
+
"machine": platform.machine(),
|
3537
|
+
"processor": platform.processor(),
|
3538
|
+
"architecture": platform.architecture()[0],
|
3539
|
+
"hostname": socket.gethostname(),
|
3540
|
+
"ip address": socket.gethostbyname(socket.gethostname()),
|
3541
|
+
"mac address": ":".join(
|
3542
|
+
["{:02x}".format((uuid.getnode() >> i) & 0xFF) for i in range(0, 48, 8)]
|
3543
|
+
),
|
3544
|
+
"cpu brand": cpuinfo.get_cpu_info().get("brand_raw", "Unknown"),
|
3545
|
+
"python version": platform.python_version(),
|
3546
|
+
"uptime": get_system_uptime(),
|
3547
|
+
},
|
3548
|
+
"cpu": {
|
3549
|
+
"physical cores": psutil.cpu_count(logical=False),
|
3550
|
+
"logical cores": psutil.cpu_count(logical=True),
|
3551
|
+
"max frequency (MHz)": psutil.cpu_freq().max,
|
3552
|
+
"min frequency (MHz)": psutil.cpu_freq().min,
|
3553
|
+
"current frequency (MHz)": psutil.cpu_freq().current,
|
3554
|
+
"usage per core (%)": psutil.cpu_percent(percpu=True),
|
3555
|
+
"total cpu Usage (%)": psutil.cpu_percent(),
|
3556
|
+
"load average (1m, 5m, 15m)": (
|
3557
|
+
os.getloadavg() if hasattr(os, "getloadavg") else "N/A"
|
3558
|
+
),
|
3559
|
+
},
|
3560
|
+
"memory": {
|
3561
|
+
"total memory (GB)": psutil.virtual_memory().total / (1024**3),
|
3562
|
+
"available memory (GB)": psutil.virtual_memory().available / (1024**3),
|
3563
|
+
"used memory (GB)": psutil.virtual_memory().used / (1024**3),
|
3564
|
+
"memory usage (%)": psutil.virtual_memory().percent,
|
3565
|
+
"swap total (GB)": psutil.swap_memory().total / (1024**3),
|
3566
|
+
"swap free (GB)": psutil.swap_memory().free / (1024**3),
|
3567
|
+
"swap used (GB)": psutil.swap_memory().used / (1024**3),
|
3568
|
+
"swap usage (%)": psutil.swap_memory().percent,
|
3569
|
+
},
|
3570
|
+
"disk": {},
|
3571
|
+
"disk io": get_disk_io(),
|
3572
|
+
"network": {},
|
3573
|
+
"network io": get_network_io(),
|
3574
|
+
"gpu": [],
|
3575
|
+
"temperatures": get_temperatures(),
|
3576
|
+
"battery": get_battery_status(),
|
3577
|
+
"active processes": get_active_processes(),
|
3578
|
+
"environment": {
|
3579
|
+
"user": os.getenv("USER", "Unknown"),
|
3580
|
+
"environment variables": dict(os.environ),
|
3581
|
+
"virtual environment info": get_virtual_environment_info(), # Virtual env details
|
3582
|
+
"docker running": os.path.exists("/.dockerenv"), # Check for Docker
|
3583
|
+
"shell": os.environ.get("SHELL", "Unknown"),
|
3584
|
+
"default terminal": run_shell_command("echo $TERM"),
|
3585
|
+
"kernel version": platform.uname().release,
|
3586
|
+
"virtualization type": run_shell_command("systemd-detect-virt"),
|
3587
|
+
},
|
3588
|
+
"additional info": {
|
3589
|
+
"Shell": os.environ.get("SHELL", "Unknown"),
|
3590
|
+
"default terminal": run_shell_command("echo $TERM"),
|
3591
|
+
"kernel version": platform.uname().release,
|
3592
|
+
"virtualization type": run_shell_command("systemd-detect-virt"),
|
3593
|
+
"running in docker": os.path.exists("/.dockerenv"),
|
3594
|
+
},
|
3595
|
+
}
|
3596
|
+
|
3597
|
+
# Disk Information
|
3598
|
+
for partition in psutil.disk_partitions():
|
3599
|
+
try:
|
3600
|
+
usage = psutil.disk_usage(partition.mountpoint)
|
3601
|
+
system_info["disk"][partition.device] = {
|
3602
|
+
"mountpoint": partition.mountpoint,
|
3603
|
+
"file system type": partition.fstype,
|
3604
|
+
"total size (GB)": usage.total / (1024**3),
|
3605
|
+
"used (GB)": usage.used / (1024**3),
|
3606
|
+
"free (GB)": usage.free / (1024**3),
|
3607
|
+
"usage (%)": usage.percent,
|
3608
|
+
}
|
3609
|
+
except PermissionError:
|
3610
|
+
system_info["Disk"][partition.device] = "Permission Denied"
|
3611
|
+
|
3612
|
+
# Network Information
|
3613
|
+
if_addrs = psutil.net_if_addrs()
|
3614
|
+
for interface_name, interface_addresses in if_addrs.items():
|
3615
|
+
system_info["network"][interface_name] = []
|
3616
|
+
for address in interface_addresses:
|
3617
|
+
if str(address.family) == "AddressFamily.AF_INET":
|
3618
|
+
system_info["network"][interface_name].append(
|
3619
|
+
{
|
3620
|
+
"ip address": address.address,
|
3621
|
+
"netmask": address.netmask,
|
3622
|
+
"broadcast ip": address.broadcast,
|
3623
|
+
}
|
3624
|
+
)
|
3625
|
+
elif str(address.family) == "AddressFamily.AF_PACKET":
|
3626
|
+
system_info["network"][interface_name].append(
|
3627
|
+
{
|
3628
|
+
"mac address": address.address,
|
3629
|
+
"netmask": address.netmask,
|
3630
|
+
"broadcast mac": address.broadcast,
|
3631
|
+
}
|
3632
|
+
)
|
3633
|
+
|
3634
|
+
# GPU Information
|
3635
|
+
gpus = GPUtil.getGPUs()
|
3636
|
+
for gpu in gpus:
|
3637
|
+
gpu_info = {
|
3638
|
+
"name": gpu.name,
|
3639
|
+
"load (%)": gpu.load * 100,
|
3640
|
+
"free memory (MB)": gpu.memoryFree,
|
3641
|
+
"used memory (MB)": gpu.memoryUsed,
|
3642
|
+
"total memory (MB)": gpu.memoryTotal,
|
3643
|
+
"driver version": gpu.driver,
|
3644
|
+
"temperature (°C)": gpu.temperature,
|
3645
|
+
}
|
3646
|
+
if hasattr(gpu, "powerDraw"):
|
3647
|
+
gpu_info["Power Draw (W)"] = gpu.powerDraw
|
3648
|
+
if hasattr(gpu, "powerLimit"):
|
3649
|
+
gpu_info["Power Limit (W)"] = gpu.powerLimit
|
3650
|
+
system_info["gpu"].append(gpu_info)
|
3651
|
+
|
3652
|
+
res = system_info if full else get_os_type()
|
3653
|
+
if verbose:
|
3654
|
+
try:
|
3655
|
+
preview(res)
|
3656
|
+
except Exception as e:
|
3657
|
+
pnrint(e)
|
3658
|
+
return res
|
3149
3659
|
|
3150
3660
|
|
3151
3661
|
def listdir(
|
@@ -3168,8 +3678,9 @@ def listdir(
|
|
3168
3678
|
print(ls)
|
3169
3679
|
df_all = pd.DataFrame(
|
3170
3680
|
{
|
3171
|
-
"
|
3172
|
-
"
|
3681
|
+
"name": ls,
|
3682
|
+
"path": [os.path.join(rootdir, i) for i in ls],
|
3683
|
+
"kind":[os.path.splitext(i)[1] for i in ls]
|
3173
3684
|
}
|
3174
3685
|
)
|
3175
3686
|
if verbose:
|
@@ -3308,7 +3819,94 @@ def listfunc(lib_name, opt="call"):
|
|
3308
3819
|
def func_list(lib_name, opt="call"):
|
3309
3820
|
return list_func(lib_name, opt=opt)
|
3310
3821
|
|
3822
|
+
def copy(src, dst, overwrite=False):
|
3823
|
+
"""Copy a file from src to dst."""
|
3824
|
+
try:
|
3825
|
+
src = Path(src)
|
3826
|
+
dst = Path(dst)
|
3827
|
+
if not src.is_dir():
|
3828
|
+
if dst.is_dir():
|
3829
|
+
dst = dst / src.name
|
3830
|
+
|
3831
|
+
if dst.exists():
|
3832
|
+
if overwrite:
|
3833
|
+
dst.unlink()
|
3834
|
+
else:
|
3835
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
|
3836
|
+
shutil.copy(src, dst)
|
3837
|
+
print(f"\n Done! copy to {dst}\n")
|
3838
|
+
else:
|
3839
|
+
dst = dst/src.name
|
3840
|
+
if dst.exists():
|
3841
|
+
if overwrite:
|
3842
|
+
shutil.rmtree(dst) # Remove existing directory
|
3843
|
+
else:
|
3844
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('%H%M%S')}")
|
3845
|
+
shutil.copytree(src, dst)
|
3846
|
+
print(f"\n Done! copy to {dst}\n")
|
3847
|
+
|
3848
|
+
except Exception as e:
|
3849
|
+
logging.error(f"Failed {e}")
|
3850
|
+
|
3851
|
+
def move(src, dst, overwrite=False):
|
3852
|
+
return cut(src=src, dst=dst, overwrite=overwrite)
|
3311
3853
|
|
3854
|
+
def cut(src, dst, overwrite=False):
|
3855
|
+
try:
|
3856
|
+
src = Path(src)
|
3857
|
+
dst = Path(dst)
|
3858
|
+
if dst.is_dir():
|
3859
|
+
dst = dst / src.name
|
3860
|
+
if dst.exists():
|
3861
|
+
if overwrite:
|
3862
|
+
# dst.unlink() # Delete the existing file
|
3863
|
+
pass
|
3864
|
+
else:
|
3865
|
+
dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
|
3866
|
+
shutil.move(src, dst)
|
3867
|
+
print(f"\n Done! moved to {dst}\n")
|
3868
|
+
except Exception as e:
|
3869
|
+
logging.error(f"Failed to move file from {src} to {dst}: {e}")
|
3870
|
+
|
3871
|
+
def delete(fpath):
|
3872
|
+
"""Delete a file/folder."""
|
3873
|
+
try:
|
3874
|
+
fpath = Path(fpath)
|
3875
|
+
if not fpath.is_dir(): # file
|
3876
|
+
if fpath.exists():
|
3877
|
+
fpath.unlink()
|
3878
|
+
print(f"\n Done! delete {fpath}\n")
|
3879
|
+
else:
|
3880
|
+
print(f"File '{fpath}' does not exist.")
|
3881
|
+
else:#folder
|
3882
|
+
if fpath.exists():
|
3883
|
+
shutil.rmtree(fpath) # Remove existing directory
|
3884
|
+
print(f"\n Done! delete {fpath}\n")
|
3885
|
+
else:
|
3886
|
+
print(f"Folder '{fpath}' does not exist.")
|
3887
|
+
except Exception as e:
|
3888
|
+
logging.error(f"Failed to delete {fpath}: {e}")
|
3889
|
+
def rename(fpath, dst, smart=True):
|
3890
|
+
"""Rename a file or folder."""
|
3891
|
+
try:
|
3892
|
+
src_kind,dst_kind = None,None
|
3893
|
+
if smart:
|
3894
|
+
dir_name_src=os.path.dirname(fpath)
|
3895
|
+
dir_name_dst=os.path.dirname(dst)
|
3896
|
+
src_kind=os.path.splitext(fpath)[1]
|
3897
|
+
dst_kind=os.path.splitext(dst)[1]
|
3898
|
+
if dir_name_dst!=dir_name_src:
|
3899
|
+
dst=os.path.join(dir_name_src,dst)
|
3900
|
+
if dst_kind is not None and src_kind is not None:
|
3901
|
+
if dst_kind!=src_kind:
|
3902
|
+
dst=dst + src_kind
|
3903
|
+
if os.path.exists(fpath):
|
3904
|
+
os.rename(fpath,dst)
|
3905
|
+
print(f"Done! rename to {dst}")
|
3906
|
+
else:
|
3907
|
+
print(f"Failed: {fpath} does not exist.")
|
3908
|
+
except Exception as e:
|
3909
|
+
logging.error(f"Failed to rename {fpath} to {dst}: {e}")
|
3312
3910
|
def mkdir_nest(fpath: str) -> str:
|
3313
3911
|
"""
|
3314
3912
|
Create nested directories based on the provided file path.
|
@@ -3327,7 +3925,9 @@ def mkdir_nest(fpath: str) -> str:
|
|
3327
3925
|
dir_parts = fpath.split(f_slash) # Split the path by the OS-specific separator
|
3328
3926
|
|
3329
3927
|
# Start creating directories from the root to the desired path
|
3330
|
-
|
3928
|
+
root_dir = os.path.splitdrive(fpath)[0] # Get the root drive on Windows (e.g., 'C:')
|
3929
|
+
current_path = root_dir if root_dir else f_slash # Start from the root directory or POSIX '/'
|
3930
|
+
|
3331
3931
|
for part in dir_parts:
|
3332
3932
|
if part:
|
3333
3933
|
current_path = os.path.join(current_path, part)
|
@@ -3351,10 +3951,13 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
|
|
3351
3951
|
Returns:
|
3352
3952
|
- str: The path of the created directory or an error message.
|
3353
3953
|
"""
|
3354
|
-
|
3355
3954
|
rootdir = []
|
3955
|
+
pardir= mkdir_nest(pardir)
|
3356
3956
|
if chdir is None:
|
3357
|
-
return
|
3957
|
+
return pardir
|
3958
|
+
else:
|
3959
|
+
pass
|
3960
|
+
print(pardir)
|
3358
3961
|
if isinstance(chdir, str):
|
3359
3962
|
chdir = [chdir]
|
3360
3963
|
chdir = list(set(chdir))
|
@@ -3392,7 +3995,7 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
|
|
3392
3995
|
# Dir is the main output, if only one dir, then str type is inconvenient
|
3393
3996
|
if len(rootdir) == 1:
|
3394
3997
|
rootdir = rootdir[0]
|
3395
|
-
|
3998
|
+
rootdir = rootdir + stype if not rootdir.endswith(stype) else rootdir
|
3396
3999
|
|
3397
4000
|
return rootdir
|
3398
4001
|
|
@@ -3791,6 +4394,114 @@ def apply_filter(img, *args):
|
|
3791
4394
|
)
|
3792
4395
|
return img.filter(supported_filters[filter_name])
|
3793
4396
|
|
4397
|
+
def detect_angle(image, by="median", template=None):
|
4398
|
+
"""Detect the angle of rotation using various methods."""
|
4399
|
+
from sklearn.decomposition import PCA
|
4400
|
+
from skimage import transform, feature, filters, measure
|
4401
|
+
from skimage.color import rgb2gray
|
4402
|
+
from scipy.fftpack import fftshift, fft2
|
4403
|
+
import numpy as np
|
4404
|
+
import cv2
|
4405
|
+
# Convert to grayscale
|
4406
|
+
gray_image = rgb2gray(image)
|
4407
|
+
|
4408
|
+
# Detect edges using Canny edge detector
|
4409
|
+
edges = feature.canny(gray_image, sigma=2)
|
4410
|
+
|
4411
|
+
# Use Hough transform to detect lines
|
4412
|
+
lines = transform.probabilistic_hough_line(edges)
|
4413
|
+
|
4414
|
+
if not lines and any(["me" in by, "pca" in by]):
|
4415
|
+
print("No lines detected. Adjust the edge detection parameters.")
|
4416
|
+
return 0
|
4417
|
+
|
4418
|
+
# Hough Transform-based angle detection (Median/Mean)
|
4419
|
+
if "me" in by:
|
4420
|
+
angles = []
|
4421
|
+
for line in lines:
|
4422
|
+
(x0, y0), (x1, y1) = line
|
4423
|
+
angle = np.arctan2(y1 - y0, x1 - x0) * 180 / np.pi
|
4424
|
+
if 80 < abs(angle) < 100:
|
4425
|
+
angles.append(angle)
|
4426
|
+
if not angles:
|
4427
|
+
return 0
|
4428
|
+
if "di" in by:
|
4429
|
+
median_angle = np.median(angles)
|
4430
|
+
rotation_angle = (
|
4431
|
+
90 - median_angle if median_angle > 0 else -90 - median_angle
|
4432
|
+
)
|
4433
|
+
|
4434
|
+
return rotation_angle
|
4435
|
+
else:
|
4436
|
+
mean_angle = np.mean(angles)
|
4437
|
+
rotation_angle = 90 - mean_angle if mean_angle > 0 else -90 - mean_angle
|
4438
|
+
|
4439
|
+
return rotation_angle
|
4440
|
+
|
4441
|
+
# PCA-based angle detection
|
4442
|
+
elif "pca" in by:
|
4443
|
+
y, x = np.nonzero(edges)
|
4444
|
+
if len(x) == 0:
|
4445
|
+
return 0
|
4446
|
+
pca = PCA(n_components=2)
|
4447
|
+
pca.fit(np.vstack((x, y)).T)
|
4448
|
+
angle = np.arctan2(pca.components_[0, 1], pca.components_[0, 0]) * 180 / np.pi
|
4449
|
+
return angle
|
4450
|
+
|
4451
|
+
# Gradient Orientation-based angle detection
|
4452
|
+
elif "gra" in by:
|
4453
|
+
gx, gy = np.gradient(gray_image)
|
4454
|
+
angles = np.arctan2(gy, gx) * 180 / np.pi
|
4455
|
+
hist, bin_edges = np.histogram(angles, bins=360, range=(-180, 180))
|
4456
|
+
return bin_edges[np.argmax(hist)]
|
4457
|
+
|
4458
|
+
# Template Matching-based angle detection
|
4459
|
+
elif "temp" in by:
|
4460
|
+
if template is None:
|
4461
|
+
# Automatically extract a template from the center of the image
|
4462
|
+
height, width = gray_image.shape
|
4463
|
+
center_x, center_y = width // 2, height // 2
|
4464
|
+
size = (
|
4465
|
+
min(height, width) // 4
|
4466
|
+
) # Size of the template as a fraction of image size
|
4467
|
+
template = gray_image[
|
4468
|
+
center_y - size : center_y + size, center_x - size : center_x + size
|
4469
|
+
]
|
4470
|
+
best_angle = None
|
4471
|
+
best_corr = -1
|
4472
|
+
for angle in range(0, 180, 1): # Checking every degree
|
4473
|
+
rotated_template = transform.rotate(template, angle)
|
4474
|
+
res = cv2.matchTemplate(gray_image, rotated_template, cv2.TM_CCOEFF)
|
4475
|
+
_, max_val, _, _ = cv2.minMaxLoc(res)
|
4476
|
+
if max_val > best_corr:
|
4477
|
+
best_corr = max_val
|
4478
|
+
best_angle = angle
|
4479
|
+
return best_angle
|
4480
|
+
|
4481
|
+
# Image Moments-based angle detection
|
4482
|
+
elif "mo" in by:
|
4483
|
+
moments = measure.moments_central(gray_image)
|
4484
|
+
angle = (
|
4485
|
+
0.5
|
4486
|
+
* np.arctan2(2 * moments[1, 1], moments[0, 2] - moments[2, 0])
|
4487
|
+
* 180
|
4488
|
+
/ np.pi
|
4489
|
+
)
|
4490
|
+
return angle
|
4491
|
+
|
4492
|
+
# Fourier Transform-based angle detection
|
4493
|
+
elif "fft" in by:
|
4494
|
+
f = fft2(gray_image)
|
4495
|
+
fshift = fftshift(f)
|
4496
|
+
magnitude_spectrum = np.log(np.abs(fshift) + 1)
|
4497
|
+
rows, cols = magnitude_spectrum.shape
|
4498
|
+
r, c = np.unravel_index(np.argmax(magnitude_spectrum), (rows, cols))
|
4499
|
+
angle = np.arctan2(r - rows // 2, c - cols // 2) * 180 / np.pi
|
4500
|
+
return angle
|
4501
|
+
|
4502
|
+
else:
|
4503
|
+
print(f"Unknown method {by}")
|
4504
|
+
return 0
|
3794
4505
|
|
3795
4506
|
def imgsets(img, **kwargs):
|
3796
4507
|
"""
|
@@ -5911,6 +6622,9 @@ def df_scaler(
|
|
5911
6622
|
scaler=None,
|
5912
6623
|
method="standard",
|
5913
6624
|
columns=None, # default, select all numeric col/row
|
6625
|
+
feature_range=None,# specific for 'minmax'
|
6626
|
+
vmin=0,
|
6627
|
+
vmax=1,
|
5914
6628
|
inplace=False,
|
5915
6629
|
verbose=False, # show usage
|
5916
6630
|
axis=0, # defalut column-wise
|
@@ -5943,11 +6657,13 @@ def df_scaler(
|
|
5943
6657
|
scaler = StandardScaler(**kwargs)
|
5944
6658
|
elif method == "minmax":
|
5945
6659
|
from sklearn.preprocessing import MinMaxScaler
|
6660
|
+
if feature_range is None:
|
6661
|
+
feature_range=(vmin,vmax)
|
5946
6662
|
if verbose:
|
5947
6663
|
print("don't forget to define the range: e.g., 'feature_range=(0, 1)'. ")
|
5948
6664
|
print("scales the features to the range [0, 1]. Adjust feature_range if you want a different range, like [-1, 1].")
|
5949
6665
|
print("Use when the data does not follow a normal distribution and you need all features in a specific range (e.g., [0, 1]).\nIdeal for algorithms that do not assume a particular distribution, such as k-nearest neighbors and neural networks.")
|
5950
|
-
scaler = MinMaxScaler(
|
6666
|
+
scaler = MinMaxScaler(feature_range=feature_range,**kwargs)
|
5951
6667
|
elif method == "robust":
|
5952
6668
|
from sklearn.preprocessing import RobustScaler
|
5953
6669
|
if verbose:
|
@@ -6035,15 +6751,20 @@ def df_special_characters_cleaner(
|
|
6035
6751
|
|
6036
6752
|
# 1. Clean column names by replacing special characters with underscores
|
6037
6753
|
if "column" in where_:
|
6038
|
-
|
6754
|
+
try:
|
6755
|
+
data.columns = data.columns.str.replace(r"[^\w\s]", "_", regex=True)
|
6756
|
+
except Exception as e:
|
6757
|
+
print(e)
|
6039
6758
|
|
6040
6759
|
# 2. Clean only object-type columns (text columns)
|
6041
|
-
|
6042
|
-
|
6043
|
-
|
6044
|
-
|
6045
|
-
|
6046
|
-
|
6760
|
+
try:
|
6761
|
+
if "content" in where_:
|
6762
|
+
for col in data.select_dtypes(include=["object"]).columns:
|
6763
|
+
data[col] = data[col].str.replace(r"[^\w\s]", "", regex=True)
|
6764
|
+
if data.index.dtype == "object" and index in where_:
|
6765
|
+
data.index = data.index.str.replace(r"[^\w\s]", "_", regex=True)
|
6766
|
+
except:
|
6767
|
+
pass
|
6047
6768
|
return data
|
6048
6769
|
|
6049
6770
|
|
@@ -6426,6 +7147,9 @@ def df_reducer(
|
|
6426
7147
|
# "autoencoder","nmf",
|
6427
7148
|
]
|
6428
7149
|
method = strcmp(method, methods)[0]
|
7150
|
+
if run_once_within(reverse=True):
|
7151
|
+
print(f"support methods:{methods}")
|
7152
|
+
|
6429
7153
|
if verbose:
|
6430
7154
|
print(f"\nprocessing with using {dict_methods[method]}:")
|
6431
7155
|
xlabel, ylabel = None, None
|
@@ -6433,16 +7157,20 @@ def df_reducer(
|
|
6433
7157
|
columns = data.select_dtypes(include="number").columns.tolist()
|
6434
7158
|
if hue is None:
|
6435
7159
|
hue = data.select_dtypes(exclude="number").columns.tolist()
|
7160
|
+
print(f"auto select the non-number as 'hue':{hue}")
|
6436
7161
|
if isinstance(hue, list):
|
6437
7162
|
print("Warning: hue is a list, only select the 1st one")
|
6438
7163
|
hue = hue[0]
|
6439
|
-
if not hue:
|
7164
|
+
if not any(hue):
|
6440
7165
|
# Select columns if specified, else use all columns
|
6441
7166
|
X = data[columns].values if columns else data.values
|
6442
7167
|
else:
|
6443
7168
|
# Select columns to reduce and hue for LDA
|
6444
|
-
|
6445
|
-
|
7169
|
+
try:
|
7170
|
+
X = data[columns].values if columns else data.drop(columns=[hue]).values
|
7171
|
+
y = data[hue].values
|
7172
|
+
except:
|
7173
|
+
pass
|
6446
7174
|
print(X.shape)
|
6447
7175
|
# Handle missing values
|
6448
7176
|
if fill_missing:
|
@@ -6909,33 +7637,49 @@ def df_reducer(
|
|
6909
7637
|
colname_met = "SVD_"
|
6910
7638
|
# Quick plots
|
6911
7639
|
if plot_ and (not method in ["isolation_forest"]):
|
6912
|
-
from .plot import plotxy
|
6913
|
-
if ax is None:
|
6914
|
-
|
6915
|
-
|
6916
|
-
|
6917
|
-
|
6918
|
-
else:
|
6919
|
-
|
7640
|
+
from .plot import plotxy,figsets,get_color
|
7641
|
+
# if ax is None:
|
7642
|
+
# if figsize is None:
|
7643
|
+
# _, ax = plt.subplots(figsize=cm2inch(8, 8))
|
7644
|
+
# else:
|
7645
|
+
# _, ax = plt.subplots(figsize=figsize)
|
7646
|
+
# else:
|
7647
|
+
# ax = ax.cla()
|
6920
7648
|
xlabel = f"{colname_met}1" if xlabel is None else xlabel
|
6921
7649
|
ylabel = f"{colname_met}2" if ylabel is None else ylabel
|
7650
|
+
palette=get_color(len(flatten(data[hue],verbose=0)))
|
7651
|
+
|
7652
|
+
reduced_df=reduced_df.sort_values(by=hue)
|
7653
|
+
print(flatten(reduced_df[hue]))
|
6922
7654
|
ax = plotxy(
|
6923
7655
|
data=reduced_df,
|
6924
7656
|
x=colname_met + "1",
|
6925
7657
|
y=colname_met + "2",
|
6926
7658
|
hue=hue,
|
6927
|
-
|
7659
|
+
palette=palette,
|
7660
|
+
# size=size,
|
6928
7661
|
edgecolor=edgecolor,
|
6929
|
-
kind_="
|
6930
|
-
|
6931
|
-
|
6932
|
-
|
6933
|
-
|
6934
|
-
|
6935
|
-
|
7662
|
+
kind_=["joint",
|
7663
|
+
# "kde",
|
7664
|
+
"ell",
|
7665
|
+
],
|
7666
|
+
kws_kde=dict(
|
7667
|
+
hue=hue,
|
7668
|
+
levels=2,
|
7669
|
+
common_norm=False,
|
7670
|
+
fill=True,
|
7671
|
+
alpha=0.05,
|
7672
|
+
),
|
7673
|
+
kws_joint=dict(kind='scatter',joint_kws=dict(s=size)),
|
7674
|
+
kws_ellipse=dict(alpha=0.1,lw=1,label=None),
|
6936
7675
|
verbose=False,
|
6937
7676
|
**kwargs,
|
6938
7677
|
)
|
7678
|
+
figsets(
|
7679
|
+
legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
|
7680
|
+
xlabel=xlabel if xlabel else None,
|
7681
|
+
ylabel=ylabel if ylabel else None,
|
7682
|
+
)
|
6939
7683
|
|
6940
7684
|
if inplace:
|
6941
7685
|
# If inplace=True, add components back into the original data
|
@@ -7412,6 +8156,7 @@ def df_qc(
|
|
7412
8156
|
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
7413
8157
|
from scipy.stats import skew, kurtosis, entropy
|
7414
8158
|
|
8159
|
+
pd.options.display.max_seq_items = 10
|
7415
8160
|
#! display(data.select_dtypes(include=[np.number]).describe())
|
7416
8161
|
#!skim
|
7417
8162
|
if columns is not None:
|
@@ -7428,16 +8173,18 @@ def df_qc(
|
|
7428
8173
|
data = data.copy()
|
7429
8174
|
data.loc[:, data.isna().all()] = 0
|
7430
8175
|
res_qc = {}
|
7431
|
-
print(f"data.shape:{data.shape}")
|
8176
|
+
print(f"⤵ data.shape:{data.shape}\n⤵ data.sample(10):")
|
8177
|
+
display(data.sample(10).style.background_gradient(cmap="coolwarm", axis=1))
|
7432
8178
|
|
7433
8179
|
# Missing values
|
7434
8180
|
res_qc["missing_values"] = data.isnull().sum()
|
7435
|
-
res_qc["missing_percentage"] = (res_qc["missing_values"] / len(data)) * 100
|
8181
|
+
res_qc["missing_percentage"] = round((res_qc["missing_values"] / len(data)) * 100,2)
|
7436
8182
|
res_qc["rows_with_missing"] = data.isnull().any(axis=1).sum()
|
7437
8183
|
|
7438
8184
|
# Data types and unique values
|
7439
8185
|
res_qc["data_types"] = data.dtypes
|
7440
|
-
res_qc["
|
8186
|
+
res_qc["unique_counts"] = data.select_dtypes(exclude=np.number).nunique().sort_values()
|
8187
|
+
res_qc["unique_values"] = data.select_dtypes(exclude=np.number).apply(lambda x: x.unique())
|
7441
8188
|
res_qc["constant_columns"] = [
|
7442
8189
|
col for col in data.columns if data[col].nunique() <= 1
|
7443
8190
|
]
|
@@ -7453,33 +8200,42 @@ def df_qc(
|
|
7453
8200
|
data_outliers = df_outlier(data)
|
7454
8201
|
outlier_num = data_outliers.isna().sum() - data.isnull().sum()
|
7455
8202
|
res_qc["outlier_num"] = outlier_num[outlier_num > 0]
|
7456
|
-
outlier_percentage=(outlier_num / len(data_outliers)) * 100
|
8203
|
+
outlier_percentage=round((outlier_num / len(data_outliers)) * 100,2)
|
7457
8204
|
res_qc["outlier_percentage"] = outlier_percentage[outlier_percentage>0]
|
7458
|
-
|
7459
|
-
|
7460
|
-
|
7461
|
-
|
7462
|
-
|
7463
|
-
|
7464
|
-
|
7465
|
-
|
7466
|
-
|
7467
|
-
|
7468
|
-
res_qc["high_correlations"] = high_corr_pairs
|
7469
|
-
|
7470
|
-
# VIF for multicollinearity check
|
7471
|
-
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
7472
|
-
vif_data = pd.DataFrame()
|
7473
|
-
res_qc["vif"]=vif_data
|
7474
|
-
if numeric_df.shape[1] > 1 and not numeric_df.empty:
|
7475
|
-
vif_data["feature"] = numeric_df.columns
|
7476
|
-
vif_data["VIF"] = [
|
7477
|
-
variance_inflation_factor(numeric_df.values, i)
|
7478
|
-
for i in range(numeric_df.shape[1])
|
8205
|
+
try:
|
8206
|
+
# Correlation and multicollinearity (VIF)
|
8207
|
+
if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
|
8208
|
+
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
8209
|
+
corr_matrix = numeric_df.corr()
|
8210
|
+
high_corr_pairs = [
|
8211
|
+
(col1, col2)
|
8212
|
+
for col1 in corr_matrix.columns
|
8213
|
+
for col2 in corr_matrix.columns
|
8214
|
+
if col1 != col2 and abs(corr_matrix[col1][col2]) > 0.9
|
7479
8215
|
]
|
7480
|
-
res_qc["
|
7481
|
-
|
7482
|
-
|
8216
|
+
res_qc["high_correlations"] = high_corr_pairs
|
8217
|
+
|
8218
|
+
# VIF for multicollinearity check
|
8219
|
+
numeric_df = data.select_dtypes(include=[np.number]).dropna()
|
8220
|
+
if isinstance(numeric_df.columns, pd.MultiIndex):
|
8221
|
+
numeric_df.columns = [
|
8222
|
+
"_".join(col).strip() if isinstance(col, tuple) else col for col in numeric_df.columns
|
8223
|
+
]
|
8224
|
+
|
8225
|
+
|
8226
|
+
vif_data = pd.DataFrame()
|
8227
|
+
res_qc["vif"]=vif_data
|
8228
|
+
if numeric_df.shape[1] > 1 and not numeric_df.empty:
|
8229
|
+
vif_data["feature"] = numeric_df.columns.tolist()
|
8230
|
+
vif_data["VIF"] = [
|
8231
|
+
round(variance_inflation_factor(numeric_df.values, i),2)
|
8232
|
+
for i in range(numeric_df.shape[1])
|
8233
|
+
]
|
8234
|
+
res_qc["vif"] = vif_data[
|
8235
|
+
vif_data["VIF"] > 5
|
8236
|
+
] # Typically VIF > 5 indicates multicollinearity
|
8237
|
+
except Exception as e:
|
8238
|
+
print(e)
|
7483
8239
|
# Skewness and Kurtosis
|
7484
8240
|
skewness = data.skew(numeric_only=True)
|
7485
8241
|
kurtosis_vals = data.kurt(numeric_only=True)
|
@@ -7492,8 +8248,7 @@ def df_qc(
|
|
7492
8248
|
col: entropy(data[col].value_counts(normalize=True), base=2)
|
7493
8249
|
for col in categorical_cols
|
7494
8250
|
}
|
7495
|
-
|
7496
|
-
res_qc["unique_counts"] = data.nunique()
|
8251
|
+
|
7497
8252
|
# dtypes counts
|
7498
8253
|
res_qc['dtype_counts']=data.dtypes.value_counts()
|
7499
8254
|
|
@@ -7540,7 +8295,7 @@ def df_qc(
|
|
7540
8295
|
res_qc["text_length_analysis"] = text_lengths
|
7541
8296
|
|
7542
8297
|
# Summary statistics
|
7543
|
-
res_qc["summary_statistics"] = data.describe().T
|
8298
|
+
res_qc["summary_statistics"] = data.describe().T.style.background_gradient(cmap='coolwarm', axis=0)
|
7544
8299
|
|
7545
8300
|
# Automated warnings
|
7546
8301
|
warnings = []
|
@@ -7562,28 +8317,45 @@ def df_qc(
|
|
7562
8317
|
|
7563
8318
|
# Report generation
|
7564
8319
|
if verbose:
|
7565
|
-
print("=== QC Report Summary ===")
|
7566
8320
|
print("\n⤵ Summary Statistics:")
|
7567
8321
|
display(res_qc["summary_statistics"])
|
7568
8322
|
print("\n⤵ Data Types:")
|
7569
8323
|
display(res_qc["data_types"])
|
7570
8324
|
if any(res_qc["missing_values"][res_qc["missing_values"] > 0]):
|
7571
8325
|
print(" ⤵ Missing Values Counts:")
|
7572
|
-
display(
|
8326
|
+
display(pd.DataFrame(
|
8327
|
+
{
|
8328
|
+
"missing_values": res_qc["missing_values"][res_qc["missing_values"] > 0],
|
8329
|
+
"missing_percent(%)": res_qc["missing_percentage"][
|
8330
|
+
res_qc["missing_percentage"] > 0
|
8331
|
+
],
|
8332
|
+
}
|
8333
|
+
).style.background_gradient(cmap="coolwarm", axis=0)
|
8334
|
+
)
|
7573
8335
|
# print(res_qc["missing_percentage"][res_qc["missing_percentage"] > 0])
|
7574
8336
|
print("\n⤵ Rows with Missing Values:",res_qc["rows_with_missing"])
|
7575
8337
|
|
8338
|
+
print("\n⤵ Constant Columns:", res_qc["constant_columns"]) if any(res_qc["constant_columns"]) else None
|
8339
|
+
print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
|
8340
|
+
print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
|
8341
|
+
|
7576
8342
|
if any(res_qc["outlier_num"]):
|
7577
8343
|
print("\n⤵ Outlier Report:")
|
7578
|
-
display(
|
7579
|
-
|
7580
|
-
|
7581
|
-
|
8344
|
+
display(pd.DataFrame(
|
8345
|
+
{
|
8346
|
+
"outlier_num": res_qc["outlier_num"][res_qc["outlier_num"] > 0],
|
8347
|
+
"outlier_percentage(%)": res_qc["outlier_percentage"][
|
8348
|
+
res_qc["outlier_percentage"] > 0
|
8349
|
+
],
|
8350
|
+
}
|
8351
|
+
).style.background_gradient(cmap="coolwarm", axis=0)
|
8352
|
+
)
|
7582
8353
|
|
7583
|
-
|
8354
|
+
if any(res_qc["unique_counts"]):
|
8355
|
+
print("\n⤵ Unique Values per Column:")
|
8356
|
+
display(pd.DataFrame({"unique_counts":res_qc["unique_counts"],
|
8357
|
+
"unique_values":res_qc["unique_values"]}).style.background_gradient(cmap="coolwarm", axis=0))
|
7584
8358
|
|
7585
|
-
print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
|
7586
|
-
print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
|
7587
8359
|
|
7588
8360
|
if res_qc["empty_columns"]:
|
7589
8361
|
print("\n⤵ Empty Columns:", res_qc["empty_columns"])
|
@@ -7595,7 +8367,7 @@ def df_qc(
|
|
7595
8367
|
|
7596
8368
|
if "vif" in res_qc:
|
7597
8369
|
print("\n⤵ Features with High VIF (>|5|):")
|
7598
|
-
|
8370
|
+
display(res_qc["vif"].style.background_gradient(cmap="coolwarm", axis=0))
|
7599
8371
|
|
7600
8372
|
if any(res_qc["high_cardinality_categoricals"]):
|
7601
8373
|
print("\n⤵ High Cardinality Categorical Columns (>|50 unique|):")
|
@@ -7614,6 +8386,8 @@ def df_qc(
|
|
7614
8386
|
print("\nWarnings:")
|
7615
8387
|
for warning in res_qc["warnings"]:
|
7616
8388
|
print(" -", warning)
|
8389
|
+
|
8390
|
+
pd.reset_option("display.max_seq_items")
|
7617
8391
|
if plot_:
|
7618
8392
|
df_qc_plots(data=data, res_qc=res_qc, max_cols=max_cols,hue=hue,dir_save=dir_save)
|
7619
8393
|
if output or not plot_:
|
@@ -7632,7 +8406,7 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7632
8406
|
if isinstance(columns, (list,pd.core.indexes.base.Index)):
|
7633
8407
|
data=data[columns]
|
7634
8408
|
len_total = len(res_qc)
|
7635
|
-
n_row, n_col = int((len_total + 10)
|
8409
|
+
n_row, n_col = int((len_total + 10)), 3
|
7636
8410
|
nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
|
7637
8411
|
|
7638
8412
|
missing_data = res_qc["missing_values"][res_qc["missing_values"] > 0].sort_values(
|
@@ -7789,8 +8563,10 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7789
8563
|
title="Dtypes",
|
7790
8564
|
ylabel="#",
|
7791
8565
|
ax=ax_dtype_counts,
|
7792
|
-
fontsize=8
|
8566
|
+
fontsize=8 if len(dtype_counts.index)<=20 else 6,
|
7793
8567
|
)
|
8568
|
+
# from .plot import pie
|
8569
|
+
# pie()
|
7794
8570
|
|
7795
8571
|
# High cardinality: Show top categorical columns by unique value count
|
7796
8572
|
high_cardinality = res_qc["high_cardinality_categoricals"]
|
@@ -7871,16 +8647,17 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7871
8647
|
title="Correlation Heatmap",
|
7872
8648
|
ax=ax_heatmap
|
7873
8649
|
)
|
7874
|
-
# save figure
|
7875
|
-
if dir_save:
|
7876
|
-
|
8650
|
+
# # save figure
|
8651
|
+
# if dir_save:
|
8652
|
+
# figsave(dir_save,f"qc_plot_{now_}.pdf")
|
7877
8653
|
|
7878
8654
|
if columns is not None:
|
7879
8655
|
if isinstance(columns, (list,pd.core.indexes.base.Index)):
|
7880
8656
|
data=data[columns]
|
7881
|
-
|
7882
|
-
|
7883
|
-
|
8657
|
+
|
8658
|
+
# len_total = len(res_qc)
|
8659
|
+
# n_row, n_col = int((len_total + 10) / 3), 3
|
8660
|
+
# nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
|
7884
8661
|
#! check distribution
|
7885
8662
|
data_num = data.select_dtypes(include=np.number)
|
7886
8663
|
if len(data_num) > max_cols:
|
@@ -7907,7 +8684,43 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
|
|
7907
8684
|
figsets(ylabel=f'Q-Q Plot:{column}',title=None)
|
7908
8685
|
# save figure
|
7909
8686
|
if dir_save:
|
7910
|
-
figsave(dir_save,f"
|
8687
|
+
figsave(dir_save,f"qc_plot_{now_}.pdf")
|
8688
|
+
|
8689
|
+
def df_corr(df: pd.DataFrame, method="pearson"):
|
8690
|
+
"""
|
8691
|
+
Compute correlation coefficients and p-values for a DataFrame.
|
8692
|
+
|
8693
|
+
Parameters:
|
8694
|
+
- df (pd.DataFrame): Input DataFrame with numeric data.
|
8695
|
+
- method (str): Correlation method ("pearson", "spearman", "kendall").
|
8696
|
+
|
8697
|
+
Returns:
|
8698
|
+
- corr_matrix (pd.DataFrame): Correlation coefficient matrix.
|
8699
|
+
- pval_matrix (pd.DataFrame): P-value matrix.
|
8700
|
+
"""
|
8701
|
+
from scipy.stats import pearsonr, spearmanr, kendalltau
|
8702
|
+
|
8703
|
+
methods = ["pearson", "spearman", "kendall"]
|
8704
|
+
method = strcmp(method, methods)[0]
|
8705
|
+
methods_dict = {"pearson": pearsonr, "spearman": spearmanr, "kendall": kendalltau}
|
8706
|
+
|
8707
|
+
cols = df.columns
|
8708
|
+
corr_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
|
8709
|
+
pval_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
|
8710
|
+
correlation_func = methods_dict[method]
|
8711
|
+
|
8712
|
+
for col1 in cols:
|
8713
|
+
for col2 in cols:
|
8714
|
+
if col1 == col2:
|
8715
|
+
corr_matrix.loc[col1, col2] = 1.0
|
8716
|
+
pval_matrix.loc[col1, col2] = 0.0
|
8717
|
+
else:
|
8718
|
+
corr, pval = correlation_func(df[col1], df[col2])
|
8719
|
+
corr_matrix.loc[col1, col2] = corr
|
8720
|
+
pval_matrix.loc[col1, col2] = pval
|
8721
|
+
|
8722
|
+
return corr_matrix, pval_matrix
|
8723
|
+
|
7911
8724
|
def use_pd(
|
7912
8725
|
func_name="excel",
|
7913
8726
|
verbose=True,
|
@@ -7927,3 +8740,135 @@ def use_pd(
|
|
7927
8740
|
except Exception as e:
|
7928
8741
|
if verbose:
|
7929
8742
|
print(e)
|
8743
|
+
|
8744
|
+
def get_phone(phone_number: str, region: str = None,verbose=True):
|
8745
|
+
"""
|
8746
|
+
usage:
|
8747
|
+
info = get_phone(15237654321, "DE")
|
8748
|
+
preview(info)
|
8749
|
+
|
8750
|
+
Extremely advanced phone number analysis function.
|
8751
|
+
|
8752
|
+
Args:
|
8753
|
+
phone_number (str): The phone number to analyze.
|
8754
|
+
region (str): None (Default). Tries to work with international numbers including country codes; otherwise, uses the specified region.
|
8755
|
+
|
8756
|
+
Returns:
|
8757
|
+
dict: Comprehensive information about the phone number.
|
8758
|
+
"""
|
8759
|
+
import phonenumbers
|
8760
|
+
from phonenumbers import geocoder, carrier, timezone, number_type
|
8761
|
+
from datetime import datetime
|
8762
|
+
import pytz
|
8763
|
+
from tzlocal import get_localzone
|
8764
|
+
|
8765
|
+
if not isinstance(phone_number, str):
|
8766
|
+
phone_number = str(phone_number)
|
8767
|
+
if isinstance(region, str):
|
8768
|
+
region = region.upper()
|
8769
|
+
|
8770
|
+
try:
|
8771
|
+
# Parse the phone number
|
8772
|
+
parsed_number = phonenumbers.parse(phone_number, region)
|
8773
|
+
|
8774
|
+
# Validate the phone number
|
8775
|
+
valid = phonenumbers.is_valid_number(parsed_number)
|
8776
|
+
possible = phonenumbers.is_possible_number(parsed_number)
|
8777
|
+
|
8778
|
+
if not valid:
|
8779
|
+
suggested_fix = phonenumbers.example_number(region) if region else "Unknown"
|
8780
|
+
return {
|
8781
|
+
"valid": False,
|
8782
|
+
"error": "Invalid phone number",
|
8783
|
+
"suggested_fix": suggested_fix,
|
8784
|
+
}
|
8785
|
+
|
8786
|
+
# Basic details
|
8787
|
+
formatted_international = phonenumbers.format_number(
|
8788
|
+
parsed_number, phonenumbers.PhoneNumberFormat.INTERNATIONAL
|
8789
|
+
)
|
8790
|
+
formatted_national = phonenumbers.format_number(
|
8791
|
+
parsed_number, phonenumbers.PhoneNumberFormat.NATIONAL
|
8792
|
+
)
|
8793
|
+
formatted_e164 = phonenumbers.format_number(
|
8794
|
+
parsed_number, phonenumbers.PhoneNumberFormat.E164
|
8795
|
+
)
|
8796
|
+
country_code = parsed_number.country_code
|
8797
|
+
region_code = geocoder.region_code_for_number(parsed_number)
|
8798
|
+
country_name = geocoder.country_name_for_number(parsed_number, "en")
|
8799
|
+
|
8800
|
+
location = geocoder.description_for_number(parsed_number, "en")
|
8801
|
+
carrier_name = carrier.name_for_number(parsed_number, "en") or "Unknown Carrier"
|
8802
|
+
time_zones = timezone.time_zones_for_number(parsed_number)[0]
|
8803
|
+
current_times = datetime.now(pytz.timezone(time_zones)).strftime(
|
8804
|
+
"%Y-%m-%d %H:%M:%S %Z"
|
8805
|
+
)
|
8806
|
+
number_type_str = {
|
8807
|
+
phonenumbers.PhoneNumberType.FIXED_LINE: "Fixed Line",
|
8808
|
+
phonenumbers.PhoneNumberType.MOBILE: "Mobile",
|
8809
|
+
phonenumbers.PhoneNumberType.FIXED_LINE_OR_MOBILE: "Fixed Line or Mobile",
|
8810
|
+
phonenumbers.PhoneNumberType.TOLL_FREE: "Toll Free",
|
8811
|
+
phonenumbers.PhoneNumberType.PREMIUM_RATE: "Premium Rate",
|
8812
|
+
phonenumbers.PhoneNumberType.SHARED_COST: "Shared Cost",
|
8813
|
+
phonenumbers.PhoneNumberType.VOIP: "VOIP",
|
8814
|
+
phonenumbers.PhoneNumberType.PERSONAL_NUMBER: "Personal Number",
|
8815
|
+
phonenumbers.PhoneNumberType.PAGER: "Pager",
|
8816
|
+
phonenumbers.PhoneNumberType.UAN: "UAN",
|
8817
|
+
phonenumbers.PhoneNumberType.UNKNOWN: "Unknown",
|
8818
|
+
}.get(number_type(parsed_number), "Unknown")
|
8819
|
+
|
8820
|
+
# Advanced Features
|
8821
|
+
is_toll_free = (
|
8822
|
+
number_type(parsed_number) == phonenumbers.PhoneNumberType.TOLL_FREE
|
8823
|
+
)
|
8824
|
+
is_premium_rate = (
|
8825
|
+
number_type(parsed_number) == phonenumbers.PhoneNumberType.PREMIUM_RATE
|
8826
|
+
)
|
8827
|
+
|
8828
|
+
# Dialing Information
|
8829
|
+
dialing_instructions = f"Dial {formatted_national} within {country_name}. Dial {formatted_e164} from abroad."
|
8830
|
+
|
8831
|
+
# Advanced Timezone Handling
|
8832
|
+
gmt_offsets = pytz.timezone(time_zones).utcoffset(datetime.now()).total_seconds()/ 3600
|
8833
|
+
# Get the local timezone (current computer's time)
|
8834
|
+
local_timezone = get_localzone()
|
8835
|
+
#local_timezone = pytz.timezone(pytz.country_timezones[region_code][0])
|
8836
|
+
local_offset = local_timezone.utcoffset(datetime.now()).total_seconds() / 3600
|
8837
|
+
offset_diff = local_offset - gmt_offsets
|
8838
|
+
head_time = "earlier" if offset_diff < 0 else "later" if offset_diff > 0 else ""
|
8839
|
+
res= {
|
8840
|
+
"valid": True,
|
8841
|
+
"possible": possible,
|
8842
|
+
"formatted": {
|
8843
|
+
"international": formatted_international,
|
8844
|
+
"national": formatted_national,
|
8845
|
+
"e164": formatted_e164,
|
8846
|
+
},
|
8847
|
+
"country_code": country_code,
|
8848
|
+
"country_name": country_name,
|
8849
|
+
"region_code": region_code,
|
8850
|
+
"location": location if location else "Unknown",
|
8851
|
+
"carrier": carrier_name,
|
8852
|
+
"time_zone": time_zones,
|
8853
|
+
"current_times": current_times,
|
8854
|
+
"local_offset":f"{local_offset} utcoffset",
|
8855
|
+
"time_zone_diff": f"{head_time} {int(np.abs(offset_diff))} h",
|
8856
|
+
"number_type": number_type_str,
|
8857
|
+
"is_toll_free": is_toll_free,
|
8858
|
+
"is_premium_rate": is_premium_rate,
|
8859
|
+
"dialing_instructions": dialing_instructions,
|
8860
|
+
"suggested_fix": None, # Use phonenumbers.example_number if invalid
|
8861
|
+
"logs": {
|
8862
|
+
"number_analysis_completed": datetime.now().strftime(
|
8863
|
+
"%Y-%m-%d %H:%M:%S"
|
8864
|
+
),
|
8865
|
+
"raw_input": phone_number,
|
8866
|
+
"parsed_number": str(parsed_number),
|
8867
|
+
},
|
8868
|
+
}
|
8869
|
+
|
8870
|
+
except phonenumbers.NumberParseException as e:
|
8871
|
+
res= {"valid": False, "error": str(e)}
|
8872
|
+
if verbose:
|
8873
|
+
preview(res)
|
8874
|
+
return res
|