py2ls 0.2.4.31__py3-none-any.whl → 0.2.4.33__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/ips.py CHANGED
@@ -1,6 +1,7 @@
1
1
  import numpy as np
2
2
  import pandas as pd
3
- import sys, os
3
+ import sys
4
+ import os
4
5
  from IPython.display import display
5
6
  from typing import List, Optional, Union
6
7
 
@@ -17,13 +18,15 @@ import warnings
17
18
  warnings.simplefilter("ignore", category=pd.errors.SettingWithCopyWarning)
18
19
  warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
19
20
  warnings.filterwarnings("ignore")
20
- import os
21
21
  import shutil
22
22
  import logging
23
23
  from pathlib import Path
24
24
  from datetime import datetime
25
+ import re
26
+ import stat
27
+ import platform
25
28
 
26
- def run_once_within(duration=60,reverse=False): # default 60s
29
+ def run_once_within(duration=60, reverse=False): # default 60s
27
30
  import time
28
31
 
29
32
  """
@@ -546,6 +549,7 @@ def is_text(s):
546
549
 
547
550
  from typing import Any, Union
548
551
 
552
+
549
553
  def share(*args, strict=True, n_shared=2, verbose=True):
550
554
  """
551
555
  check the shared elelements in two list.
@@ -591,13 +595,14 @@ def share(*args, strict=True, n_shared=2, verbose=True):
591
595
  elements2show = (
592
596
  shared_elements if len(shared_elements) < 10 else shared_elements[:5]
593
597
  )
594
- tail = '' if len(shared_elements) < 10 else '......'
598
+ tail = "" if len(shared_elements) < 10 else "......"
595
599
  elements2show.append(tail)
596
600
  print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
597
601
  print("********* checking shared elements *********")
598
602
  return shared_elements
599
603
 
600
- def shared(*args, n_shared=None, verbose=True,**kwargs):
604
+
605
+ def shared(*args, n_shared=None, verbose=True, **kwargs):
601
606
  """
602
607
  check the shared elelements in two list.
603
608
  usage:
@@ -652,7 +657,8 @@ def shared(*args, n_shared=None, verbose=True,**kwargs):
652
657
  print("********* checking shared elements *********")
653
658
  return shared_elements
654
659
 
655
- def share_not(*args, n_shared=None, verbose=False):
660
+
661
+ def share_not(*args, n_shared=None, verbose=False):
656
662
  """
657
663
  To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
658
664
  usage:
@@ -660,10 +666,12 @@ def share_not(*args, n_shared=None, verbose=False):
660
666
  list2 = [4, 5, 6, 7, 8]
661
667
  not_shared(list1,list2)# output [1,3]
662
668
  """
663
- _common = shared(*args, n_shared=n_shared, verbose=verbose)
669
+ _common = shared(*args, n_shared=n_shared, verbose=verbose)
664
670
  list1 = flatten(args[0], verbose=verbose)
665
671
  _not_shared = [item for item in list1 if item not in _common]
666
672
  return _not_shared
673
+
674
+
667
675
  def not_shared(*args, n_shared=None, verbose=False):
668
676
  """
669
677
  To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
@@ -779,13 +787,23 @@ def strcmp(
779
787
  print(f"建议: {best_match}")
780
788
  return candidates[best_match_index], best_match_index
781
789
 
782
- def imgcmp(img: list, method='knn', plot_=True, figsize=[12, 6]):
790
+
791
+ def imgcmp(img: list,
792
+ method:str ="knn",
793
+ thr:float =0.75,
794
+ detector: str = "sift",
795
+ plot_:bool =True,
796
+ figsize=[12, 6],
797
+ grid_size=10,# only for grid detector
798
+ **kwargs):
783
799
  """
784
800
  Compare two images using SSIM, Feature Matching (SIFT), or KNN Matching.
785
801
 
786
802
  Parameters:
787
- - img (list): List containing two image file paths [img1, img2].
803
+ - img (list): List containing two image file paths [img1, img2] or two numpy arrays.
788
804
  - method (str): Comparison method ('ssim', 'match', or 'knn').
805
+ - detector (str): Feature detector ('sift', 'grid', 'pixel').
806
+ - thr (float): Threshold for filtering matches.
789
807
  - plot_ (bool): Whether to display the results visually.
790
808
  - figsize (list): Size of the figure for plots.
791
809
 
@@ -796,15 +814,21 @@ def imgcmp(img: list, method='knn', plot_=True, figsize=[12, 6]):
796
814
  import cv2
797
815
  import matplotlib.pyplot as plt
798
816
  from skimage.metrics import structural_similarity as ssim
817
+
799
818
  # Load images
800
- image1 = cv2.imread(img[0])
801
- image2 = cv2.imread(img[1])
819
+ if isinstance(img, list) and isinstance(img[0],str):
820
+ image1 = cv2.imread(img[0])
821
+ image2 = cv2.imread(img[1])
822
+ bool_cvt=True
823
+ else:
824
+ image1, image2 = np.array(img[0]),np.array(img[1])
825
+ bool_cvt=False
802
826
 
803
827
  if image1 is None or image2 is None:
804
828
  raise ValueError("Could not load one or both images. Check file paths.")
805
- methods=['ssim','match','knn']
806
- method=strcmp(method, methods)[0]
807
- if method == 'ssim':
829
+ methods = ["ssim", "match", "knn"]
830
+ method = strcmp(method, methods)[0]
831
+ if method == "ssim":
808
832
  # Convert images to grayscale
809
833
  gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
810
834
  gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
@@ -819,107 +843,187 @@ def imgcmp(img: list, method='knn', plot_=True, figsize=[12, 6]):
819
843
  # Plot if needed
820
844
  if plot_:
821
845
  fig, ax = plt.subplots(1, 3, figsize=figsize)
822
- ax[0].imshow(gray1, cmap='gray')
846
+ ax[0].imshow(gray1, cmap="gray")
823
847
  ax[0].set_title("Image 1")
824
- ax[1].imshow(gray2, cmap='gray')
848
+ ax[1].imshow(gray2, cmap="gray")
825
849
  ax[1].set_title("Image 2")
826
- ax[2].imshow(diff, cmap='gray')
850
+ ax[2].imshow(diff, cmap="gray")
827
851
  ax[2].set_title("Difference (SSIM)")
828
852
  plt.tight_layout()
829
853
  plt.show()
830
-
854
+
831
855
  return diff, score
832
856
 
833
- elif method in ['match', 'knn']:
857
+ elif method in ["match", "knn"]:
834
858
  # Convert images to grayscale
835
859
  gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
836
- gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
860
+ gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
861
+
862
+ if detector == "sift":
863
+ # SIFT detector
864
+ sift = cv2.SIFT_create()
865
+ keypoints1, descriptors1 = sift.detectAndCompute(gray1, None)
866
+ keypoints2, descriptors2 = sift.detectAndCompute(gray2, None)
867
+
868
+ elif detector == "grid":
869
+ # Grid-based detection
870
+ keypoints1, descriptors1 = [], []
871
+ keypoints2, descriptors2 = [], []
872
+
873
+ for i in range(0, gray1.shape[0], grid_size):
874
+ for j in range(0, gray1.shape[1], grid_size):
875
+ patch1 = gray1[i:i + grid_size, j:j + grid_size]
876
+ patch2 = gray2[i:i + grid_size, j:j + grid_size]
877
+ if patch1.size > 0 and patch2.size > 0:
878
+ keypoints1.append(cv2.KeyPoint(j + grid_size // 2, i + grid_size // 2, grid_size))
879
+ keypoints2.append(cv2.KeyPoint(j + grid_size // 2, i + grid_size // 2, grid_size))
880
+ descriptors1.append(np.mean(patch1))
881
+ descriptors2.append(np.mean(patch2))
882
+
883
+ descriptors1 = np.array(descriptors1).reshape(-1, 1)
884
+ descriptors2 = np.array(descriptors2).reshape(-1, 1)
885
+
886
+ elif detector == "pixel":
887
+ # Pixel-based direct comparison
888
+ descriptors1 = gray1.flatten()
889
+ descriptors2 = gray2.flatten()
890
+ keypoints1 = [cv2.KeyPoint(x, y, 1) for y in range(gray1.shape[0]) for x in range(gray1.shape[1])]
891
+ keypoints2 = [cv2.KeyPoint(x, y, 1) for y in range(gray2.shape[0]) for x in range(gray2.shape[1])]
837
892
 
838
- # Initialize SIFT detector
839
- sift = cv2.SIFT_create()
840
-
841
- # Detect and compute features
842
- keypoints1, descriptors1 = sift.detectAndCompute(gray1, None)
843
- keypoints2, descriptors2 = sift.detectAndCompute(gray2, None)
844
-
845
- if len(keypoints1) == 0 or len(keypoints2) == 0:
846
- raise ValueError("No keypoints found in one or both images.")
893
+ else:
894
+ raise ValueError("Invalid detector. Use 'sift', 'grid', or 'pixel'.")
895
+
896
+ # Handle missing descriptors
897
+ if descriptors1 is None or descriptors2 is None:
898
+ raise ValueError("Failed to compute descriptors for one or both images.")
899
+ # Ensure descriptors are in the correct data type
900
+ if descriptors1.dtype != np.float32:
901
+ descriptors1 = descriptors1.astype(np.float32)
902
+ if descriptors2.dtype != np.float32:
903
+ descriptors2 = descriptors2.astype(np.float32)
847
904
 
848
905
  # BFMatcher initialization
849
906
  bf = cv2.BFMatcher()
850
-
851
- if method == 'match': # Cross-check matching
907
+ if method == "match": # Cross-check matching
852
908
  bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
853
909
  matches = bf.match(descriptors1, descriptors2)
854
910
  matches = sorted(matches, key=lambda x: x.distance)
855
911
 
856
912
  # Filter good matches
857
- good_matches = [m for m in matches if m.distance < 0.75 * matches[-1].distance]
913
+ good_matches = [
914
+ m for m in matches if m.distance < thr * matches[-1].distance
915
+ ]
858
916
 
859
- elif method == 'knn': # KNN matching with ratio test
917
+ elif method == "knn": # KNN matching with ratio test
918
+ bf = cv2.BFMatcher()
860
919
  matches = bf.knnMatch(descriptors1, descriptors2, k=2)
861
920
  # Apply Lowe's ratio test
862
- good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]
921
+ good_matches = [m for m, n in matches if m.distance < thr * n.distance]
863
922
 
864
923
  # Calculate similarity score
865
924
  similarity_score = len(good_matches) / min(len(keypoints1), len(keypoints2))
866
925
  print(f"Number of good matches: {len(good_matches)}")
867
926
  print(f"Similarity Score: {similarity_score:.4f}")
868
- # Handle case where no good matches are found
927
+ # Handle case where no good matches are found
869
928
  if len(good_matches) == 0:
870
929
  print("No good matches found.")
871
930
  return good_matches, 0.0, None
872
931
 
873
932
  # Identify matched keypoints
874
- src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
875
- dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
876
-
877
- # Calculate Homography using RANSAC
878
- homography_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
879
-
933
+ src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(
934
+ -1, 1, 2
935
+ )
936
+ dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(
937
+ -1, 1, 2
938
+ )
880
939
  # Apply the homography to image2
881
- h, w = image1.shape[:2]
882
- warped_image2 = cv2.warpPerspective(image2, homography_matrix, (w, h))
883
-
884
- # Plot result if needed
885
- if plot_:
886
- fig, ax = plt.subplots(1, 2, figsize=figsize)
887
- ax[0].imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
888
- ax[0].set_title("Image 1")
889
- ax[1].imshow(cv2.cvtColor(warped_image2, cv2.COLOR_BGR2RGB))
890
- ax[1].set_title("Warped Image 2")
891
- plt.tight_layout()
892
- plt.show()
940
+ try:
941
+ # Calculate Homography using RANSAC
942
+ homography_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
943
+ h, w = image1.shape[:2]
944
+ warped_image2 = cv2.warpPerspective(image2, homography_matrix, (w, h))
945
+
946
+ # Plot result if needed
947
+ if plot_:
948
+ fig, ax = plt.subplots(1, 2, figsize=figsize)
949
+ ax[0].imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[0].imshow(image1)
950
+ ax[0].set_title("Image 1")
951
+ ax[1].imshow(cv2.cvtColor(warped_image2, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[1].imshow(warped_image2)
952
+ ax[1].set_title("Warped Image 2")
953
+ plt.tight_layout()
954
+ plt.show()
955
+ except Exception as e:
956
+ print(e)
893
957
 
894
958
  # Plot matches if needed
895
959
  if plot_:
896
- result = cv2.drawMatches(image1, keypoints1, image2, keypoints2, good_matches, None, flags=2)
960
+ result = cv2.drawMatches(
961
+ image1, keypoints1, image2, keypoints2, good_matches, None, flags=2
962
+ )
897
963
  plt.figure(figsize=figsize)
898
- plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
964
+ plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB)) if bool_cvt else plt.imshow(result)
899
965
  plt.title(f"Feature Matches ({len(good_matches)} matches, Score: {similarity_score:.4f})")
900
- plt.axis('off')
966
+ plt.axis("off")
901
967
  plt.show()
902
968
  # Identify unmatched keypoints
903
969
  matched_idx1 = [m.queryIdx for m in good_matches]
904
970
  matched_idx2 = [m.trainIdx for m in good_matches]
905
-
971
+ matched_kp1 = [kp for i, kp in enumerate(keypoints1) if i in matched_idx1]
972
+ matched_kp2 = [kp for i, kp in enumerate(keypoints2) if i in matched_idx2]
906
973
  unmatched_kp1 = [kp for i, kp in enumerate(keypoints1) if i not in matched_idx1]
907
974
  unmatched_kp2 = [kp for i, kp in enumerate(keypoints2) if i not in matched_idx2]
908
975
 
909
- # Mark unmatched keypoints on the images
910
- img1_marked = cv2.drawKeypoints(image1, unmatched_kp1, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
911
- img2_marked = cv2.drawKeypoints(image2, unmatched_kp2, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
976
+ # Mark keypoints on the images
977
+ img1_match = cv2.drawKeypoints(
978
+ image1,
979
+ matched_kp1,
980
+ None,
981
+ color=(0, 0, 255),
982
+ flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
983
+ )
984
+ img2_match = cv2.drawKeypoints(
985
+ image2,
986
+ matched_kp2,
987
+ None,
988
+ color=(0, 0, 255),
989
+ flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
990
+ )
991
+ img1_unmatch = cv2.drawKeypoints(
992
+ image1,
993
+ unmatched_kp1,
994
+ None,
995
+ color=(0, 0, 255),
996
+ flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
997
+ )
998
+ img2_unmatch = cv2.drawKeypoints(
999
+ image2,
1000
+ unmatched_kp2,
1001
+ None,
1002
+ color=(0, 0, 255),
1003
+ flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
1004
+ )
912
1005
 
913
- # Display results
914
1006
  if plot_:
915
1007
  fig, ax = plt.subplots(1, 2, figsize=figsize)
916
- ax[0].imshow(cv2.cvtColor(img1_marked, cv2.COLOR_BGR2RGB))
1008
+ ax[0].imshow(cv2.cvtColor(img1_unmatch, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[0].imshow(img1_unmatch)
917
1009
  ax[0].set_title("Unmatched Keypoints (Image 1)")
918
- ax[1].imshow(cv2.cvtColor(img2_marked, cv2.COLOR_BGR2RGB))
1010
+ ax[1].imshow(cv2.cvtColor(img2_unmatch, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[1].imshow(img2_unmatch)
919
1011
  ax[1].set_title("Unmatched Keypoints (Image 2)")
1012
+ ax[0].axis("off")
1013
+ ax[1].axis("off")
920
1014
  plt.tight_layout()
921
1015
  plt.show()
922
- return good_matches, similarity_score, homography_matrix
1016
+ if plot_:
1017
+ fig, ax = plt.subplots(1, 2, figsize=figsize)
1018
+ ax[0].imshow(cv2.cvtColor(img1_match, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[0].imshow(img1_match)
1019
+ ax[0].set_title("Matched Keypoints (Image 1)")
1020
+ ax[1].imshow(cv2.cvtColor(img2_match, cv2.COLOR_BGR2RGB)) if bool_cvt else ax[1].imshow(img2_match)
1021
+ ax[1].set_title("Matched Keypoints (Image 2)")
1022
+ ax[0].axis("off")
1023
+ ax[1].axis("off")
1024
+ plt.tight_layout()
1025
+ plt.show()
1026
+ return good_matches, similarity_score#, homography_matrix
923
1027
 
924
1028
  else:
925
1029
  raise ValueError("Invalid method. Use 'ssim', 'match', or 'knn'.")
@@ -939,9 +1043,7 @@ def cn2pinyin(
939
1043
  Args:
940
1044
  cn_str (str): Chinese string to convert.
941
1045
  sep (str): Separator for the output Pinyin string.
942
- style (Style): "normal","tone", "tone2","tone3",
943
- "finals","finals_tone","finals_tone2","finals_tone3",
944
- "initials","bopomofo","bopomofo_first","cyrillic","pl",
1046
+ fmt (Style): "normal","tone", "tone2","tone3","finals","finals_tone","finals_tone2","finals_tone3","initials","bopomofo","bopomofo_first","cyrillic","pl",
945
1047
  Returns:
946
1048
  cn_str: The Pinyin representation of the Chinese string.
947
1049
  """
@@ -1017,6 +1119,7 @@ def counter(list_, verbose=True):
1017
1119
  # print(f"Return a list of the n most common elements:\n{c.most_common()}")
1018
1120
  # print(f"Compute the sum of the counts:\n{c.total()}")
1019
1121
 
1122
+
1020
1123
  def dict2df(dict_, fill=None):
1021
1124
  len_max = 0
1022
1125
  for key, value in dict_.items():
@@ -1031,11 +1134,12 @@ def dict2df(dict_, fill=None):
1031
1134
  dict_[key] = value
1032
1135
  return pd.DataFrame.from_dict(dict_)
1033
1136
 
1137
+
1034
1138
  def text2audio(
1035
1139
  text,
1036
1140
  method=None, # "pyttsx3","gTTS"
1037
1141
  rate=200,
1038
- slow=False,#"gTTS"
1142
+ slow=False, # "gTTS"
1039
1143
  volume=1.0,
1040
1144
  voice=None,
1041
1145
  lang=None,
@@ -1056,16 +1160,38 @@ def text2audio(
1056
1160
  # )
1057
1161
  """
1058
1162
  if method is not None:
1059
- methods=["gTTS","pyttsx3","google"]
1060
- method=strcmp(method, methods)[0]
1163
+ methods = ["gTTS", "pyttsx3", "google"]
1164
+ method = strcmp(method, methods)[0]
1061
1165
  else:
1062
1166
  try:
1063
- text2audio(text,method='google',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
1167
+ text2audio(
1168
+ text,
1169
+ method="google",
1170
+ rate=rate,
1171
+ slow=slow,
1172
+ volume=volume,
1173
+ voice=voice,
1174
+ lang=lang,
1175
+ gender=gender,
1176
+ age=age,
1177
+ dir_save=dir_save,
1178
+ )
1064
1179
  except Exception as e:
1065
1180
  print(e)
1066
- text2audio(text,method='pyttsx3',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
1067
-
1068
- if method=="pyttsx3":
1181
+ text2audio(
1182
+ text,
1183
+ method="pyttsx3",
1184
+ rate=rate,
1185
+ slow=slow,
1186
+ volume=volume,
1187
+ voice=voice,
1188
+ lang=lang,
1189
+ gender=gender,
1190
+ age=age,
1191
+ dir_save=dir_save,
1192
+ )
1193
+
1194
+ if method == "pyttsx3":
1069
1195
  import pyttsx3
1070
1196
 
1071
1197
  try:
@@ -1140,27 +1266,29 @@ def text2audio(
1140
1266
  sys.exit()
1141
1267
  except SystemExit:
1142
1268
  pass
1143
- elif method.lower() in ['google','gtts']:
1269
+ elif method.lower() in ["google", "gtts"]:
1144
1270
  from gtts import gTTS
1271
+
1145
1272
  try:
1146
1273
  if lang is None:
1147
1274
  from langdetect import detect
1275
+
1148
1276
  lang = detect(text)
1149
1277
  # Initialize gTTS with the provided parameters
1150
1278
  tts = gTTS(text=text, lang=lang, slow=slow)
1151
1279
  except Exception as e:
1152
1280
  print(f"An error occurred: {e}")
1153
-
1281
+
1154
1282
  print("not realtime reading...")
1155
1283
  if dir_save:
1156
1284
  if "." not in dir_save:
1157
- dir_save=dir_save+".mp3"
1285
+ dir_save = dir_save + ".mp3"
1158
1286
  tts.save(dir_save)
1159
1287
  print(f"Audio saved to {dir_save}")
1160
1288
  else:
1161
1289
  dir_save = "temp_audio.mp3"
1162
1290
  if "." not in dir_save:
1163
- dir_save=dir_save+".mp3"
1291
+ dir_save = dir_save + ".mp3"
1164
1292
  tts.save(dir_save)
1165
1293
  try:
1166
1294
  fopen(dir_save)
@@ -1624,6 +1752,7 @@ def img2pdf(dir_img, kind=None, page=None, dir_save=None, page_size="a4", dpi=30
1624
1752
  def set_dpi(x):
1625
1753
  dpix = dpiy = x
1626
1754
  return image2pdf.get_fixed_dpi_layout_fun((dpix, dpiy))
1755
+
1627
1756
  if kind is None:
1628
1757
  _, kind = os.path.splitext(dir_img)
1629
1758
  if not kind.startswith("."):
@@ -1649,8 +1778,9 @@ def img2pdf(dir_img, kind=None, page=None, dir_save=None, page_size="a4", dpi=30
1649
1778
  imgs.append(path)
1650
1779
  else:
1651
1780
  imgs = [
1652
- # os.path.isdir(dir_img),
1653
- dir_img]
1781
+ # os.path.isdir(dir_img),
1782
+ dir_img
1783
+ ]
1654
1784
  print(imgs)
1655
1785
  if page_size:
1656
1786
  if isinstance(page_size, str):
@@ -2196,7 +2326,7 @@ def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
2196
2326
  # Check data types
2197
2327
  data_types = df.dtypes
2198
2328
  # messages.append(f"Data types of columns:\n{data_types}")
2199
-
2329
+
2200
2330
  # Check for an unreasonable number of rows or columns
2201
2331
  if actual_shape[0] < 2 or actual_shape[1] < 2:
2202
2332
  messages.append(
@@ -2347,33 +2477,36 @@ def fload(fpath, kind=None, **kwargs):
2347
2477
 
2348
2478
  def load_csv(fpath, **kwargs):
2349
2479
  from pandas.errors import EmptyDataError
2350
- engine = kwargs.pop("engine", "pyarrow")# default: None
2351
- sep = kwargs.pop("sep", None)# default: ','
2352
- index_col = kwargs.pop("index_col", None)# default: None
2353
- memory_map = kwargs.pop("memory_map", False)# default: False
2354
- skipinitialspace = kwargs.pop("skipinitialspace", False)# default: False
2355
- encoding = kwargs.pop("encoding", "utf-8")# default: "utf-8"
2356
- on_bad_lines = kwargs.pop("on_bad_lines", "skip")# default: 'error'
2357
- comment = kwargs.pop("comment", None)# default: None
2358
- fmt = kwargs.pop("fmt", False)# default:
2359
- chunksize = kwargs.pop("chunksize", None)# default: None
2360
-
2361
- #check filesize
2362
- f_size=round(os.path.getsize(fpath) / 1024 / 1024, 3)
2363
- if f_size>=50: #50 MB
2480
+
2481
+ engine = kwargs.pop("engine", "pyarrow") # default: None
2482
+ sep = kwargs.pop("sep", None) # default: ','
2483
+ index_col = kwargs.pop("index_col", None) # default: None
2484
+ memory_map = kwargs.pop("memory_map", False) # default: False
2485
+ skipinitialspace = kwargs.pop("skipinitialspace", False) # default: False
2486
+ encoding = kwargs.pop("encoding", "utf-8") # default: "utf-8"
2487
+ on_bad_lines = kwargs.pop("on_bad_lines", "skip") # default: 'error'
2488
+ comment = kwargs.pop("comment", None) # default: None
2489
+ fmt = kwargs.pop("fmt", False) # default:
2490
+ chunksize = kwargs.pop("chunksize", None) # default: None
2491
+
2492
+ # check filesize
2493
+ f_size = round(os.path.getsize(fpath) / 1024 / 1024, 3)
2494
+ if f_size >= 50: # 50 MB
2364
2495
  if chunksize is None:
2365
- chunksize = 5000
2366
- print(f"file size is {f_size}MB, then set the chunksize with {chunksize}")
2496
+ chunksize = 5000
2497
+ print(
2498
+ f"file size is {f_size}MB, then set the chunksize with {chunksize}"
2499
+ )
2367
2500
  engine = "c" if chunksize else engine # when chunksize, recommend 'c'
2368
- low_memory = kwargs.pop("low_memory", True)# default: True
2501
+ low_memory = kwargs.pop("low_memory", True) # default: True
2369
2502
  low_memory = (
2370
2503
  False if chunksize else True
2371
- ) # when chunksize, recommend low_memory=False # default:
2504
+ ) # when chunksize, recommend low_memory=False # default:
2372
2505
  verbose = kwargs.pop("verbose", False)
2373
2506
  if run_once_within(reverse=True) and verbose:
2374
2507
  use_pd("read_csv", verbose=verbose)
2375
2508
 
2376
- if comment is None:# default: None
2509
+ if comment is None: # default: None
2377
2510
  comment = get_comment(
2378
2511
  fpath, comment=None, encoding="utf-8", lines_to_check=5
2379
2512
  )
@@ -2503,7 +2636,9 @@ def fload(fpath, kind=None, **kwargs):
2503
2636
  try:
2504
2637
  sep2show = sep if sep != "\t" else "\\t"
2505
2638
  if verbose:
2506
- print(f"trying with: engine={engine}, sep='{sep2show}'")
2639
+ print(
2640
+ f"trying with: engine={engine}, sep='{sep2show}'"
2641
+ )
2507
2642
  # print(".")
2508
2643
  df = pd.read_csv(
2509
2644
  fpath,
@@ -2524,12 +2659,12 @@ def fload(fpath, kind=None, **kwargs):
2524
2659
  if verbose:
2525
2660
  (
2526
2661
  display(df.head(2))
2527
- if isinstance(df, pd.DataFrame)
2662
+ if isinstance(df, pd.DataFrame)
2528
2663
  else display("it is not a DataFrame")
2529
2664
  )
2530
2665
  (
2531
2666
  print(f"shape: {df.shape}")
2532
- if isinstance(df, pd.DataFrame)
2667
+ if isinstance(df, pd.DataFrame)
2533
2668
  else display("it is not a DataFrame")
2534
2669
  )
2535
2670
  return df
@@ -2663,9 +2798,10 @@ def fload(fpath, kind=None, **kwargs):
2663
2798
  doc = Document(fpath)
2664
2799
  content = [para.text for para in doc.paragraphs]
2665
2800
  return content
2666
-
2801
+
2667
2802
  def load_rtf(file_path):
2668
2803
  from striprtf.striprtf import rtf_to_text
2804
+
2669
2805
  try:
2670
2806
  with open(file_path, "r") as file:
2671
2807
  rtf_content = file.read()
@@ -2715,7 +2851,7 @@ def fload(fpath, kind=None, **kwargs):
2715
2851
  "xml",
2716
2852
  "ipynb",
2717
2853
  "mtx",
2718
- "rtf"
2854
+ "rtf",
2719
2855
  ]
2720
2856
  zip_types = [
2721
2857
  "gz",
@@ -2735,7 +2871,7 @@ def fload(fpath, kind=None, **kwargs):
2735
2871
  if kind not in supported_types:
2736
2872
  print(
2737
2873
  f'Warning:\n"{kind}" is not in the supported list '
2738
- ) # {supported_types}')
2874
+ ) # {supported_types}')
2739
2875
 
2740
2876
  if kind == "docx":
2741
2877
  return load_docx(fpath)
@@ -2760,10 +2896,11 @@ def fload(fpath, kind=None, **kwargs):
2760
2896
  if run_once_within(reverse=True) and verbose:
2761
2897
  use_pd("read_pickle")
2762
2898
  try:
2763
- res_=pd.read_pickle(fpath, **kwargs)
2899
+ res_ = pd.read_pickle(fpath, **kwargs)
2764
2900
  except Exception as e:
2765
2901
  import pickle
2766
- with open('sgd_classifier.pkl', 'rb') as f:
2902
+
2903
+ with open("sgd_classifier.pkl", "rb") as f:
2767
2904
  res_ = pickle.load(f)
2768
2905
  return res_
2769
2906
  elif kind in ["ods", "ods", "odt"]:
@@ -2775,21 +2912,34 @@ def fload(fpath, kind=None, **kwargs):
2775
2912
  engine = kwargs.get("engine", "xlrd")
2776
2913
  kwargs.pop("engine", None)
2777
2914
  content = load_excel(fpath, engine=engine, **kwargs)
2778
- print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) and verbose else None
2915
+ (
2916
+ print(f"shape: {content.shape}")
2917
+ if isinstance(content, pd.DataFrame) and verbose
2918
+ else None
2919
+ )
2779
2920
  display(content.head(3)) if isinstance(content, pd.DataFrame) else None
2780
2921
  return content
2781
2922
  elif kind == "xlsx":
2782
2923
  verbose = kwargs.pop("verbose", False)
2783
2924
  content = load_excel(fpath, **kwargs)
2784
- display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
2925
+ (
2926
+ display(content.head(3))
2927
+ if isinstance(content, pd.DataFrame) and verbose
2928
+ else None
2929
+ )
2785
2930
  print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
2786
2931
  return content
2787
2932
  elif kind == "mtx":
2788
2933
  from scipy.io import mmread
2934
+
2789
2935
  verbose = kwargs.pop("verbose", False)
2790
2936
  dat_mtx = mmread(fpath)
2791
2937
  content = pd.DataFrame.sparse.from_spmatrix(dat_mtx, **kwargs)
2792
- display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
2938
+ (
2939
+ display(content.head(3))
2940
+ if isinstance(content, pd.DataFrame) and verbose
2941
+ else None
2942
+ )
2793
2943
  print(f"shape: {content.shape}")
2794
2944
  return content
2795
2945
  elif kind == "ipynb":
@@ -2904,34 +3054,34 @@ def fopen(fpath):
2904
3054
  import os
2905
3055
  import platform
2906
3056
  import sys
3057
+
2907
3058
  try:
2908
3059
  # Check if the file exists
2909
3060
  if not os.path.isfile(fpath):
2910
3061
  print(f"Error: The file does not exist - {fpath}")
2911
3062
  return
2912
-
3063
+
2913
3064
  # Get the system platform
2914
3065
  system = platform.system()
2915
3066
 
2916
3067
  # Platform-specific file opening commands
2917
3068
  if system == "Darwin": # macOS
2918
- os.system(f"open \"{fpath}\"")
3069
+ os.system(f'open "{fpath}"')
2919
3070
  elif system == "Windows": # Windows
2920
3071
  # Ensure the path is handled correctly in Windows, escape spaces
2921
- os.system(f"start \"\" \"{fpath}\"")
3072
+ os.system(f'start "" "{fpath}"')
2922
3073
  elif system == "Linux": # Linux
2923
- os.system(f"xdg-open \"{fpath}\"")
3074
+ os.system(f'xdg-open "{fpath}"')
2924
3075
  elif system == "Java": # Java (or other unhandled systems)
2925
3076
  print(f"Opening {fpath} on unsupported system.")
2926
3077
  else:
2927
3078
  print(f"Unsupported OS: {system}")
2928
-
3079
+
2929
3080
  print(f"Successfully opened {fpath} with the default application.")
2930
3081
  except Exception as e:
2931
3082
  print(f"Error opening file {fpath}: {e}")
2932
3083
 
2933
3084
 
2934
-
2935
3085
  def fupdate(fpath, content=None, how="head"):
2936
3086
  """
2937
3087
  Update a file by adding new content at the top and moving the old content to the bottom.
@@ -3346,9 +3496,10 @@ def fsave(
3346
3496
  except Exception as e:
3347
3497
  try:
3348
3498
  import pickle
3349
- with open(fpath, 'wb') as f:
3499
+
3500
+ with open(fpath, "wb") as f:
3350
3501
  pickle.dump(content, f)
3351
- print('done!', fpath)
3502
+ print("done!", fpath)
3352
3503
  except Exception as e:
3353
3504
  raise ValueError(
3354
3505
  f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
@@ -3508,9 +3659,9 @@ def isa(content, kind):
3508
3659
  """
3509
3660
  if "img" in kind.lower() or "image" in kind.lower():
3510
3661
  return is_image(content)
3511
- elif 'vid' in kind.lower():
3662
+ elif "vid" in kind.lower():
3512
3663
  return is_video(content)
3513
- elif 'aud' in kind.lower():
3664
+ elif "aud" in kind.lower():
3514
3665
  return is_audio(content)
3515
3666
  elif "doc" in kind.lower():
3516
3667
  return is_document(content)
@@ -3569,8 +3720,8 @@ def get_os(full=False, verbose=False):
3569
3720
  import os
3570
3721
  import subprocess
3571
3722
  from datetime import datetime, timedelta
3572
- from collections import defaultdict
3573
3723
 
3724
+
3574
3725
  def get_os_type():
3575
3726
  os_name = sys.platform
3576
3727
  if "dar" in os_name:
@@ -3583,7 +3734,8 @@ def get_os(full=False, verbose=False):
3583
3734
  else:
3584
3735
  print(f"{os_name}, returned 'None'")
3585
3736
  return None
3586
-
3737
+ if not full:
3738
+ return get_os_type()
3587
3739
  def get_os_info():
3588
3740
  """Get the detailed OS name, version, and other platform-specific details."""
3589
3741
 
@@ -3755,22 +3907,28 @@ def get_os(full=False, verbose=False):
3755
3907
 
3756
3908
  def get_system_uptime():
3757
3909
  """Returns system uptime as a human-readable string."""
3758
- boot_time = datetime.fromtimestamp(psutil.boot_time())
3759
- uptime = datetime.now() - boot_time
3760
- return str(uptime).split(".")[0] # Remove microseconds
3910
+ try:
3911
+ boot_time = datetime.fromtimestamp(psutil.boot_time())
3912
+ uptime = datetime.now() - boot_time
3913
+ return str(uptime).split(".")[0] # Remove microseconds
3914
+ except:
3915
+ return None
3761
3916
 
3762
3917
  def get_active_processes(limit=10):
3763
- processes = []
3764
- for proc in psutil.process_iter(
3765
- ["pid", "name", "cpu_percent", "memory_percent"]
3766
- ):
3767
- try:
3768
- processes.append(proc.info)
3769
- except psutil.NoSuchProcess:
3770
- pass
3771
- # Handle NoneType values by treating them as 0
3772
- processes.sort(key=lambda x: x["cpu_percent"] or 0, reverse=True)
3773
- return processes[:limit]
3918
+ try:
3919
+ processes = []
3920
+ for proc in psutil.process_iter(
3921
+ ["pid", "name", "cpu_percent", "memory_percent"]
3922
+ ):
3923
+ try:
3924
+ processes.append(proc.info)
3925
+ except psutil.NoSuchProcess:
3926
+ pass
3927
+ # Handle NoneType values by treating them as 0
3928
+ processes.sort(key=lambda x: x["cpu_percent"] or 0, reverse=True)
3929
+ return processes[:limit]
3930
+ except:
3931
+ return None
3774
3932
 
3775
3933
  def get_virtual_environment_info():
3776
3934
  """Checks if the script is running in a virtual environment and returns details."""
@@ -3801,19 +3959,22 @@ def get_os(full=False, verbose=False):
3801
3959
 
3802
3960
  def get_battery_status():
3803
3961
  """Returns battery status."""
3804
- battery = psutil.sensors_battery()
3805
- if battery:
3806
- time_left = (
3807
- str(timedelta(seconds=battery.secsleft))
3808
- if battery.secsleft != psutil.POWER_TIME_UNLIMITED
3809
- else "Charging/Unlimited"
3810
- )
3811
- return {
3812
- "Percentage": battery.percent,
3813
- "Plugged In": battery.power_plugged,
3814
- "Time Left": time_left,
3815
- }
3816
- return {"Status": "No battery detected"}
3962
+ try:
3963
+ battery = psutil.sensors_battery()
3964
+ if battery:
3965
+ time_left = (
3966
+ str(timedelta(seconds=battery.secsleft))
3967
+ if battery.secsleft != psutil.POWER_TIME_UNLIMITED
3968
+ else "Charging/Unlimited"
3969
+ )
3970
+ return {
3971
+ "Percentage": battery.percent,
3972
+ "Plugged In": battery.power_plugged,
3973
+ "Time Left": time_left,
3974
+ }
3975
+ return {"Status": "No battery detected"}
3976
+ except:
3977
+ return {"Status": "No battery detected"}
3817
3978
 
3818
3979
  def get_disk_io():
3819
3980
  """Returns disk I/O statistics."""
@@ -3899,8 +4060,8 @@ def get_os(full=False, verbose=False):
3899
4060
  "network": {},
3900
4061
  "network io": get_network_io(),
3901
4062
  "gpu": [],
3902
- "temperatures": get_temperatures(),
3903
- "battery": get_battery_status(),
4063
+ # "temperatures": get_temperatures(),
4064
+ # "battery": get_battery_status(),
3904
4065
  "active processes": get_active_processes(),
3905
4066
  "environment": {
3906
4067
  "user": os.getenv("USER", "Unknown"),
@@ -3984,27 +4145,26 @@ def get_os(full=False, verbose=False):
3984
4145
  pnrint(e)
3985
4146
  return res
3986
4147
 
3987
- import re
3988
- import stat
3989
- import platform
4148
+
3990
4149
  def listdir(
3991
4150
  rootdir,
3992
4151
  kind=None,
3993
4152
  sort_by="name",
3994
4153
  ascending=True,
3995
- contains=None,# filter filenames using re
3996
- booster=False,# walk in subfolders
3997
- depth = 0, # 0: no subfolders; None: all subfolders; [int 1,2,3]: levels of subfolders
4154
+ contains=None, # filter filenames using re
4155
+ booster=False, # walk in subfolders
4156
+ depth=0, # 0: no subfolders; None: all subfolders; [int 1,2,3]: levels of subfolders
3998
4157
  hidden=False, # Include hidden files/folders
3999
4158
  orient="list",
4000
4159
  output="df", # "df", 'list','dict','records','index','series'
4001
4160
  verbose=True,
4002
- ):
4161
+ ):
4003
4162
  def is_hidden(filepath):
4004
4163
  """Check if a file or folder is hidden."""
4005
4164
  system = platform.system()
4006
4165
  if system == "Windows":
4007
4166
  import ctypes
4167
+
4008
4168
  attribute = ctypes.windll.kernel32.GetFileAttributesW(filepath)
4009
4169
  if attribute == -1:
4010
4170
  raise FileNotFoundError(f"File {filepath} not found.")
@@ -4019,6 +4179,7 @@ def listdir(
4019
4179
  return os.environ.get("USERNAME", "Unknown")
4020
4180
  else:
4021
4181
  import pwd
4182
+
4022
4183
  return pwd.getpwuid(os.getuid()).pw_name
4023
4184
 
4024
4185
  if isinstance(kind, list):
@@ -4030,7 +4191,7 @@ def listdir(
4030
4191
  sort_by=sort_by,
4031
4192
  ascending=ascending,
4032
4193
  contains=contains,
4033
- depth=depth,# walk in subfolders
4194
+ depth=depth, # walk in subfolders
4034
4195
  hidden=hidden,
4035
4196
  orient=orient,
4036
4197
  output=output,
@@ -4046,21 +4207,21 @@ def listdir(
4046
4207
  i = 0
4047
4208
  f = {
4048
4209
  "name": [],
4049
- 'kind':[],
4210
+ "kind": [],
4050
4211
  "length": [],
4051
- "basename":[],
4212
+ "basename": [],
4052
4213
  "path": [],
4053
4214
  "created_time": [],
4054
4215
  "modified_time": [],
4055
4216
  "last_open_time": [],
4056
4217
  "size": [],
4057
- "permission":[],
4058
- "owner":[],
4059
- "rootdir":[],
4218
+ "permission": [],
4219
+ "owner": [],
4220
+ "rootdir": [],
4060
4221
  "fname": [],
4061
4222
  "fpath": [],
4062
- "num":[],
4063
- "os":[]
4223
+ "num": [],
4224
+ "os": [],
4064
4225
  }
4065
4226
  root_depth = rootdir.rstrip(os.sep).count(os.sep)
4066
4227
  for dirpath, dirnames, ls in os.walk(rootdir):
@@ -4069,30 +4230,32 @@ def listdir(
4069
4230
  if depth is not None and current_depth > depth:
4070
4231
  dirnames[:] = [] # Prevent further traversal into subfolders
4071
4232
  continue
4072
-
4233
+
4073
4234
  if not hidden:
4074
- dirnames[:] = [d for d in dirnames if not is_hidden(os.path.join(dirpath, d))]
4235
+ dirnames[:] = [
4236
+ d for d in dirnames if not is_hidden(os.path.join(dirpath, d))
4237
+ ]
4075
4238
  ls = [i for i in ls if not is_hidden(os.path.join(dirpath, i))]
4076
4239
 
4077
4240
  for dirname in dirnames:
4078
- if kind is not None and kind not in fd: # do not check folders
4241
+ if kind is not None and kind not in fd: # do not check folders
4079
4242
  continue
4080
4243
  if contains and not re.search(contains, dirname):
4081
4244
  continue
4082
4245
  dirname_path = os.path.join(dirpath, dirname)
4083
- fpath = os.path.join(os.path.dirname(dirname_path), dirname)
4246
+ fpath = os.path.join(os.path.dirname(dirname_path), dirname)
4084
4247
  try:
4085
4248
  stats_file = os.stat(fpath)
4086
4249
  except Exception as e:
4087
4250
  print(e)
4088
4251
  continue
4089
4252
  filename, file_extension = os.path.splitext(dirname)
4090
- file_extension = file_extension if file_extension!='' else None
4253
+ file_extension = file_extension if file_extension != "" else None
4091
4254
  f["name"].append(filename)
4092
- f['kind'].append(file_extension)
4255
+ f["kind"].append(file_extension)
4093
4256
  f["length"].append(len(filename))
4094
4257
  f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
4095
- f['basename'].append(os.path.basename(dirname_path))
4258
+ f["basename"].append(os.path.basename(dirname_path))
4096
4259
  f["path"].append(os.path.join(os.path.dirname(dirname_path), dirname))
4097
4260
  f["created_time"].append(
4098
4261
  pd.to_datetime(int(os.path.getctime(dirname_path)), unit="s")
@@ -4110,7 +4273,7 @@ def listdir(
4110
4273
  f["fpath"].append(fpath) # will be removed
4111
4274
  i += 1
4112
4275
  for item in ls:
4113
- if kind in fd:# only check folders
4276
+ if kind in fd: # only check folders
4114
4277
  continue
4115
4278
  if contains and not re.search(contains, item):
4116
4279
  continue
@@ -4127,7 +4290,16 @@ def listdir(
4127
4290
  is_file = kind.lower() in file_extension.lower() and (
4128
4291
  os.path.isfile(item_path)
4129
4292
  )
4130
- if kind in [".doc", ".img", ".zip",".code",".file",".image",".video",".audio"]: # 选择大的类别
4293
+ if kind in [
4294
+ ".doc",
4295
+ ".img",
4296
+ ".zip",
4297
+ ".code",
4298
+ ".file",
4299
+ ".image",
4300
+ ".video",
4301
+ ".audio",
4302
+ ]: # 选择大的类别
4131
4303
  if kind != ".folder" and not isa(item_path, kind):
4132
4304
  continue
4133
4305
  elif kind in [".all"]:
@@ -4135,13 +4307,13 @@ def listdir(
4135
4307
  else: # 精确到文件的后缀
4136
4308
  if not is_folder and not is_file:
4137
4309
  continue
4138
- file_extension = file_extension if file_extension!='' else None
4310
+ file_extension = file_extension if file_extension != "" else None
4139
4311
  f["name"].append(filename)
4140
- f['kind'].append(file_extension)
4312
+ f["kind"].append(file_extension)
4141
4313
  f["length"].append(len(filename))
4142
4314
  f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
4143
- f['basename'].append(os.path.basename(item_path))
4144
- f["path"].append(os.path.join(os.path.dirname(item_path), item))
4315
+ f["basename"].append(os.path.basename(item_path))
4316
+ f["path"].append(os.path.join(os.path.dirname(item_path), item))
4145
4317
  f["created_time"].append(
4146
4318
  pd.to_datetime(int(os.path.getctime(item_path)), unit="s")
4147
4319
  )
@@ -4152,7 +4324,9 @@ def listdir(
4152
4324
  pd.to_datetime(int(os.path.getatime(item_path)), unit="s")
4153
4325
  )
4154
4326
  f["permission"].append(stat.filemode(stats_file.st_mode)),
4155
- f["owner"].append(os.getlogin() if platform.system() != "Windows" else "N/A"),
4327
+ f["owner"].append(
4328
+ os.getlogin() if platform.system() != "Windows" else "N/A"
4329
+ ),
4156
4330
  f["fname"].append(filename) # will be removed
4157
4331
  f["fpath"].append(fpath) # will be removed
4158
4332
  f["rootdir"].append(dirpath)
@@ -4162,11 +4336,28 @@ def listdir(
4162
4336
  f["os"] = get_os() # os.uname().machine
4163
4337
  # if not booster: # go deeper subfolders
4164
4338
  # break
4165
- #* convert to pd.DataFrame
4339
+ # * convert to pd.DataFrame
4166
4340
  f = pd.DataFrame(f)
4167
- f=f[["basename","name","kind","length","size","num","path","created_time",
4168
- "modified_time","last_open_time","rootdir",
4169
- "permission","owner","os","fname","fpath",]]
4341
+ f = f[
4342
+ [
4343
+ "basename",
4344
+ "name",
4345
+ "kind",
4346
+ "length",
4347
+ "size",
4348
+ "num",
4349
+ "path",
4350
+ "created_time",
4351
+ "modified_time",
4352
+ "last_open_time",
4353
+ "rootdir",
4354
+ "permission",
4355
+ "owner",
4356
+ "os",
4357
+ "fname",
4358
+ "fpath",
4359
+ ]
4360
+ ]
4170
4361
  if "nam" in sort_by.lower():
4171
4362
  f = sort_kind(f, by="name", ascending=ascending)
4172
4363
  elif "crea" in sort_by.lower():
@@ -4183,6 +4374,7 @@ def listdir(
4183
4374
  return f
4184
4375
  else:
4185
4376
  from box import Box
4377
+
4186
4378
  if "l" in orient.lower(): # list # default
4187
4379
  res_output = Box(f.to_dict(orient="list"))
4188
4380
  return res_output
@@ -4195,6 +4387,7 @@ def listdir(
4195
4387
  if "se" in orient.lower(): # records
4196
4388
  return Box(f.to_dict(orient="series"))
4197
4389
 
4390
+
4198
4391
  def listfunc(lib_name, opt="call"):
4199
4392
  if opt == "call":
4200
4393
  funcs = [func for func in dir(lib_name) if callable(getattr(lib_name, func))]
@@ -4206,6 +4399,7 @@ def listfunc(lib_name, opt="call"):
4206
4399
  def func_list(lib_name, opt="call"):
4207
4400
  return list_func(lib_name, opt=opt)
4208
4401
 
4402
+
4209
4403
  def copy(src, dst, overwrite=False):
4210
4404
  """Copy a file from src to dst."""
4211
4405
  try:
@@ -4223,25 +4417,31 @@ def copy(src, dst, overwrite=False):
4223
4417
  if overwrite:
4224
4418
  dst.unlink()
4225
4419
  else:
4226
- dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
4420
+ dst = dst.with_name(
4421
+ f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}"
4422
+ )
4227
4423
  shutil.copy(src, dst)
4228
4424
  print(f"\n Done! copy to {dst}\n")
4229
4425
  else:
4230
- dst = dst/src.name
4426
+ dst = dst / src.name
4231
4427
  if dst.exists():
4232
4428
  if overwrite:
4233
4429
  shutil.rmtree(dst) # Remove existing directory
4234
4430
  else:
4235
- dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('%H%M%S')}")
4431
+ dst = dst.with_name(
4432
+ f"{dst.stem}_{datetime.now().strftime('%H%M%S')}"
4433
+ )
4236
4434
  shutil.copytree(src, dst)
4237
4435
  print(f"\n Done! copy to {dst}\n")
4238
4436
 
4239
4437
  except Exception as e:
4240
4438
  logging.error(f"Failed {e}")
4241
-
4439
+
4440
+
4242
4441
  def cut(src, dst, overwrite=False):
4243
4442
  return move(src=src, dst=dst, overwrite=overwrite)
4244
4443
 
4444
+
4245
4445
  def move(src, dst, overwrite=False):
4246
4446
  try:
4247
4447
  dir_par_dst = os.path.dirname(dst)
@@ -4256,23 +4456,26 @@ def move(src, dst, overwrite=False):
4256
4456
  # dst.unlink() # Delete the existing file
4257
4457
  pass
4258
4458
  else:
4259
- dst = dst.with_name(f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}")
4459
+ dst = dst.with_name(
4460
+ f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}"
4461
+ )
4260
4462
  shutil.move(src, dst)
4261
4463
  print(f"\n Done! moved to {dst}\n")
4262
4464
  except Exception as e:
4263
4465
  logging.error(f"Failed to move file from {src} to {dst}: {e}")
4264
-
4466
+
4467
+
4265
4468
  def delete(fpath):
4266
- """Delete a file/folder."""
4469
+ """Delete a file/folder."""
4267
4470
  try:
4268
4471
  fpath = Path(fpath)
4269
- if not fpath.is_dir(): # file
4472
+ if not fpath.is_dir(): # file
4270
4473
  if fpath.exists():
4271
4474
  fpath.unlink()
4272
4475
  print(f"\n Done! delete {fpath}\n")
4273
4476
  else:
4274
4477
  print(f"File '{fpath}' does not exist.")
4275
- else:#folder
4478
+ else: # folder
4276
4479
  if fpath.exists():
4277
4480
  shutil.rmtree(fpath) # Remove existing directory
4278
4481
  print(f"\n Done! delete {fpath}\n")
@@ -4280,27 +4483,31 @@ def delete(fpath):
4280
4483
  print(f"Folder '{fpath}' does not exist.")
4281
4484
  except Exception as e:
4282
4485
  logging.error(f"Failed to delete {fpath}: {e}")
4486
+
4487
+
4283
4488
  def rename(fpath, dst, smart=True):
4284
4489
  """Rename a file or folder."""
4285
4490
  try:
4286
- src_kind,dst_kind = None,None
4491
+ src_kind, dst_kind = None, None
4287
4492
  if smart:
4288
- dir_name_src=os.path.dirname(fpath)
4289
- dir_name_dst=os.path.dirname(dst)
4290
- src_kind=os.path.splitext(fpath)[1]
4291
- dst_kind=os.path.splitext(dst)[1]
4292
- if dir_name_dst!=dir_name_src:
4293
- dst=os.path.join(dir_name_src,dst)
4493
+ dir_name_src = os.path.dirname(fpath)
4494
+ dir_name_dst = os.path.dirname(dst)
4495
+ src_kind = os.path.splitext(fpath)[1]
4496
+ dst_kind = os.path.splitext(dst)[1]
4497
+ if dir_name_dst != dir_name_src:
4498
+ dst = os.path.join(dir_name_src, dst)
4294
4499
  if dst_kind is not None and src_kind is not None:
4295
- if dst_kind!=src_kind:
4296
- dst=dst + src_kind
4500
+ if dst_kind != src_kind:
4501
+ dst = dst + src_kind
4297
4502
  if os.path.exists(fpath):
4298
- os.rename(fpath,dst)
4503
+ os.rename(fpath, dst)
4299
4504
  print(f"Done! rename to {dst}")
4300
4505
  else:
4301
4506
  print(f"Failed: {fpath} does not exist.")
4302
4507
  except Exception as e:
4303
4508
  logging.error(f"Failed to rename {fpath} to {dst}: {e}")
4509
+
4510
+
4304
4511
  def mkdir_nest(fpath: str) -> str:
4305
4512
  """
4306
4513
  Create nested directories based on the provided file path.
@@ -4319,9 +4526,13 @@ def mkdir_nest(fpath: str) -> str:
4319
4526
  dir_parts = fpath.split(f_slash) # Split the path by the OS-specific separator
4320
4527
 
4321
4528
  # Start creating directories from the root to the desired path
4322
- root_dir = os.path.splitdrive(fpath)[0] # Get the root drive on Windows (e.g., 'C:')
4323
- current_path = root_dir if root_dir else f_slash # Start from the root directory or POSIX '/'
4324
-
4529
+ root_dir = os.path.splitdrive(fpath)[
4530
+ 0
4531
+ ] # Get the root drive on Windows (e.g., 'C:')
4532
+ current_path = (
4533
+ root_dir if root_dir else f_slash
4534
+ ) # Start from the root directory or POSIX '/'
4535
+
4325
4536
  for part in dir_parts:
4326
4537
  if part:
4327
4538
  current_path = os.path.join(current_path, part)
@@ -4346,7 +4557,7 @@ def mkdir(pardir: str = None, chdir: str | list = None, overwrite=False):
4346
4557
  - str: The path of the created directory or an error message.
4347
4558
  """
4348
4559
  rootdir = []
4349
- pardir= mkdir_nest(pardir)
4560
+ pardir = mkdir_nest(pardir)
4350
4561
  if chdir is None:
4351
4562
  return pardir
4352
4563
  else:
@@ -4465,6 +4676,7 @@ def figsave(*args, dpi=300):
4465
4676
  img.save(fname, format=ftype.upper(), dpi=(dpi, dpi))
4466
4677
  elif isinstance(img, np.ndarray):
4467
4678
  import cv2
4679
+
4468
4680
  # Check the shape of the image to determine color mode
4469
4681
  if img.ndim == 2:
4470
4682
  # Grayscale image
@@ -4496,8 +4708,13 @@ def figsave(*args, dpi=300):
4496
4708
  )
4497
4709
  else:
4498
4710
  plt.savefig(
4499
- fname, format=ftype.lower(), dpi=dpi, bbox_inches="tight", transparent=True,pad_inches=0
4500
- )
4711
+ fname,
4712
+ format=ftype.lower(),
4713
+ dpi=dpi,
4714
+ bbox_inches="tight",
4715
+ transparent=True,
4716
+ pad_inches=0,
4717
+ )
4501
4718
  elif ftype.lower() == "emf":
4502
4719
  plt.savefig(fname, format="emf", dpi=dpi, bbox_inches="tight", pad_inches=0)
4503
4720
  elif ftype.lower() == "fig":
@@ -4534,6 +4751,7 @@ def is_num(s):
4534
4751
  def isnum(s):
4535
4752
  return is_num(s)
4536
4753
 
4754
+
4537
4755
  def is_image(fpath):
4538
4756
  """
4539
4757
  Determine if a given file is an image based on MIME type and file extension.
@@ -4544,37 +4762,60 @@ def is_image(fpath):
4544
4762
  Returns:
4545
4763
  bool: True if the file is a recognized image, False otherwise.
4546
4764
  """
4547
- import mimetypes
4548
- # Known image MIME types
4549
- image_mime_types = {
4550
- "image/jpeg",
4551
- "image/png",
4552
- "image/gif",
4553
- "image/bmp",
4554
- "image/webp",
4555
- "image/tiff",
4556
- "image/x-icon",
4557
- "image/svg+xml",
4558
- "image/heic",
4559
- "image/heif",
4560
- }
4765
+ from PIL import Image
4766
+ if isinstance(fpath,str):
4767
+ import mimetypes
4768
+
4769
+ # Known image MIME types
4770
+ image_mime_types = {
4771
+ "image/jpeg",
4772
+ "image/png",
4773
+ "image/gif",
4774
+ "image/bmp",
4775
+ "image/webp",
4776
+ "image/tiff",
4777
+ "image/x-icon",
4778
+ "image/svg+xml",
4779
+ "image/heic",
4780
+ "image/heif",
4781
+ }
4561
4782
 
4562
- # Known image file extensions
4563
- image_extensions = {
4564
- ".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tif", ".tiff",
4565
- ".ico", ".svg", ".heic", ".heif",".fig",".jpg"
4566
- }
4783
+ # Known image file extensions
4784
+ image_extensions = {
4785
+ ".jpg",
4786
+ ".jpeg",
4787
+ ".png",
4788
+ ".gif",
4789
+ ".bmp",
4790
+ ".webp",
4791
+ ".tif",
4792
+ ".tiff",
4793
+ ".ico",
4794
+ ".svg",
4795
+ ".heic",
4796
+ ".heif",
4797
+ ".fig",
4798
+ ".jpg",
4799
+ }
4567
4800
 
4568
- # Get MIME type using mimetypes
4569
- mime_type, _ = mimetypes.guess_type(fpath)
4801
+ # Get MIME type using mimetypes
4802
+ mime_type, _ = mimetypes.guess_type(fpath)
4570
4803
 
4571
- # Check MIME type
4572
- if mime_type in image_mime_types:
4573
- return True
4804
+ # Check MIME type
4805
+ if mime_type in image_mime_types:
4806
+ return True
4574
4807
 
4575
- # Fallback: Check file extension
4576
- ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
4577
- if ext in image_extensions:
4808
+ # Fallback: Check file extension
4809
+ ext = os.path.splitext(fpath)[
4810
+ -1
4811
+ ].lower() # Get the file extension and ensure lowercase
4812
+ if ext in image_extensions:
4813
+ return True
4814
+
4815
+ return False
4816
+
4817
+ elif isinstance(fpath, Image.Image):
4818
+ # If the input is a PIL Image object
4578
4819
  return True
4579
4820
 
4580
4821
  return False
@@ -4590,6 +4831,7 @@ def is_video(fpath):
4590
4831
  bool: True if the file is a recognized video, False otherwise.
4591
4832
  """
4592
4833
  import mimetypes
4834
+
4593
4835
  # Known video MIME types
4594
4836
  video_mime_types = {
4595
4837
  "video/mp4",
@@ -4610,8 +4852,22 @@ def is_video(fpath):
4610
4852
 
4611
4853
  # Known video file extensions
4612
4854
  video_extensions = {
4613
- ".mp4", ".mov", ".avi", ".mkv", ".flv", ".webm", ".ogv", ".wmv",
4614
- ".mpg", ".mpeg", ".3gp", ".mpeg2", ".asf", ".ts", ".m4v", ".divx",
4855
+ ".mp4",
4856
+ ".mov",
4857
+ ".avi",
4858
+ ".mkv",
4859
+ ".flv",
4860
+ ".webm",
4861
+ ".ogv",
4862
+ ".wmv",
4863
+ ".mpg",
4864
+ ".mpeg",
4865
+ ".3gp",
4866
+ ".mpeg2",
4867
+ ".asf",
4868
+ ".ts",
4869
+ ".m4v",
4870
+ ".divx",
4615
4871
  }
4616
4872
 
4617
4873
  # Get MIME type using mimetypes
@@ -4622,12 +4878,15 @@ def is_video(fpath):
4622
4878
  return True
4623
4879
 
4624
4880
  # Fallback: Check file extension
4625
- ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
4881
+ ext = os.path.splitext(fpath)[
4882
+ -1
4883
+ ].lower() # Get the file extension and ensure lowercase
4626
4884
  if ext in video_extensions:
4627
4885
  return True
4628
4886
 
4629
4887
  return False
4630
4888
 
4889
+
4631
4890
  def is_document(fpath):
4632
4891
  """
4633
4892
  Determine if a given file is a document based on MIME type and file extension.
@@ -4639,6 +4898,7 @@ def is_document(fpath):
4639
4898
  bool: True if the file is a recognized document, False otherwise.
4640
4899
  """
4641
4900
  import mimetypes
4901
+
4642
4902
  # Define known MIME types for documents
4643
4903
  document_mime_types = {
4644
4904
  "text/",
@@ -4679,18 +4939,23 @@ def is_document(fpath):
4679
4939
 
4680
4940
  # Get MIME type
4681
4941
  mime_type, _ = mimetypes.guess_type(fpath)
4682
-
4942
+
4683
4943
  # Check MIME type
4684
- if mime_type and any(mime_type.startswith(doc_type) for doc_type in document_mime_types):
4944
+ if mime_type and any(
4945
+ mime_type.startswith(doc_type) for doc_type in document_mime_types
4946
+ ):
4685
4947
  return True
4686
4948
 
4687
4949
  # Fallback: Check file extension
4688
- ext = os.path.splitext(fpath)[-1].lower() # Get the extension, ensure it's lowercase
4950
+ ext = os.path.splitext(fpath)[
4951
+ -1
4952
+ ].lower() # Get the extension, ensure it's lowercase
4689
4953
  if ext in document_extensions:
4690
4954
  return True
4691
4955
 
4692
4956
  return False
4693
4957
 
4958
+
4694
4959
  def is_audio(fpath):
4695
4960
  """
4696
4961
  Determine if a given file is an audio file based on MIME type and file extension.
@@ -4702,6 +4967,7 @@ def is_audio(fpath):
4702
4967
  bool: True if the file is a recognized audio file, False otherwise.
4703
4968
  """
4704
4969
  import mimetypes
4970
+
4705
4971
  # Known audio MIME types
4706
4972
  audio_mime_types = {
4707
4973
  "audio/mpeg",
@@ -4720,8 +4986,19 @@ def is_audio(fpath):
4720
4986
 
4721
4987
  # Known audio file extensions
4722
4988
  audio_extensions = {
4723
- ".mp3", ".wav", ".ogg", ".aac", ".flac", ".midi", ".m4a",
4724
- ".aiff", ".pcm", ".wma", ".ape", ".alac", ".opus",
4989
+ ".mp3",
4990
+ ".wav",
4991
+ ".ogg",
4992
+ ".aac",
4993
+ ".flac",
4994
+ ".midi",
4995
+ ".m4a",
4996
+ ".aiff",
4997
+ ".pcm",
4998
+ ".wma",
4999
+ ".ape",
5000
+ ".alac",
5001
+ ".opus",
4725
5002
  }
4726
5003
 
4727
5004
  # Get MIME type using mimetypes
@@ -4732,12 +5009,15 @@ def is_audio(fpath):
4732
5009
  return True
4733
5010
 
4734
5011
  # Fallback: Check file extension
4735
- ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
5012
+ ext = os.path.splitext(fpath)[
5013
+ -1
5014
+ ].lower() # Get the file extension and ensure lowercase
4736
5015
  if ext in audio_extensions:
4737
5016
  return True
4738
5017
 
4739
5018
  return False
4740
5019
 
5020
+
4741
5021
  def is_code(fpath):
4742
5022
  """
4743
5023
  Determine if a given file is a code file based on file extension and optionally MIME type.
@@ -4751,16 +5031,37 @@ def is_code(fpath):
4751
5031
  """
4752
5032
  # Known programming and scripting file extensions
4753
5033
  code_extensions = {
4754
- ".m", ".py", ".ipynb", ".js", ".html", ".css", ".java", ".cpp", ".h", ".cs", ".go",
4755
- ".rs", ".sh", ".rb", ".swift", ".ts", ".json", ".xml", ".yaml", ".toml", ".bash", ".r"
5034
+ ".m",
5035
+ ".py",
5036
+ ".ipynb",
5037
+ ".js",
5038
+ ".html",
5039
+ ".css",
5040
+ ".java",
5041
+ ".cpp",
5042
+ ".h",
5043
+ ".cs",
5044
+ ".go",
5045
+ ".rs",
5046
+ ".sh",
5047
+ ".rb",
5048
+ ".swift",
5049
+ ".ts",
5050
+ ".json",
5051
+ ".xml",
5052
+ ".yaml",
5053
+ ".toml",
5054
+ ".bash",
5055
+ ".r",
4756
5056
  }
4757
5057
 
4758
5058
  # Check file extension
4759
- ext = os.path.splitext(fpath)[-1].lower()
5059
+ ext = os.path.splitext(fpath)[-1].lower()
4760
5060
  if ext in code_extensions:
4761
- return True
5061
+ return True
4762
5062
  return False
4763
-
5063
+
5064
+
4764
5065
  def is_zip(fpath):
4765
5066
  import mimetypes
4766
5067
 
@@ -4828,6 +5129,105 @@ def str2list(str_):
4828
5129
  [l.append(x) for x in str_]
4829
5130
  return l
4830
5131
 
5132
+ def str2words(content, method="combined", custom_dict=None, sym_spell_params=None, use_threading=True):
5133
+ """
5134
+ Ultimate text correction function supporting multiple methods,
5135
+ lists or strings, and domain-specific corrections.
5136
+
5137
+ Parameters:
5138
+ content (str or list): Input text or list of strings to correct.
5139
+ method (str): Correction method ('textblob', 'sym', 'combined').
5140
+ custom_dict (dict): Custom dictionary for domain-specific corrections.
5141
+ sym_spell_params (dict): Parameters for initializing SymSpell.
5142
+
5143
+ Returns:
5144
+ str or list: Corrected text or list of corrected strings.
5145
+ """
5146
+ from textblob import TextBlob
5147
+ from symspellpy import SymSpell, Verbosity
5148
+ from functools import lru_cache
5149
+ import pkg_resources
5150
+ from concurrent.futures import ThreadPoolExecutor
5151
+
5152
+ def initialize_symspell(max_edit_distance=2, prefix_length=7):
5153
+ """Initialize SymSpell for advanced spelling correction."""
5154
+ sym_spell = SymSpell(max_edit_distance, prefix_length)
5155
+ dictionary_path = pkg_resources.resource_filename(
5156
+ "symspellpy",
5157
+ # "frequency_bigramdictionary_en_243_342.txt",
5158
+ "frequency_dictionary_en_82_765.txt",
5159
+ )
5160
+
5161
+ sym_spell.load_dictionary(dictionary_path, term_index=0, count_index=1)
5162
+ return sym_spell
5163
+
5164
+ def segment_words(text, sym_spell):
5165
+ """Segment concatenated words into separate words."""
5166
+ segmented = sym_spell.word_segmentation(text)
5167
+ return segmented.corrected_string
5168
+
5169
+ @lru_cache(maxsize=1000) # Cache results for repeated corrections
5170
+ def advanced_correction(word, sym_spell):
5171
+ """Correct a single word using SymSpell."""
5172
+ suggestions = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=2)
5173
+ return suggestions[0].term if suggestions else word
5174
+
5175
+ def apply_custom_corrections(word, custom_dict):
5176
+ """Apply domain-specific corrections using a custom dictionary."""
5177
+ return custom_dict.get(word.lower(), word)
5178
+ def preserve_case(original_word, corrected_word):
5179
+ """
5180
+ Preserve the case of the original word in the corrected word.
5181
+ """
5182
+ if original_word.isupper():
5183
+ return corrected_word.upper()
5184
+ elif original_word[0].isupper():
5185
+ return corrected_word.capitalize()
5186
+ else:
5187
+ return corrected_word.lower()
5188
+ def process_string(text, method, sym_spell=None, custom_dict=None):
5189
+ """
5190
+ Process a single string for spelling corrections.
5191
+ Handles TextBlob, SymSpell, and custom corrections.
5192
+ """
5193
+ if method in ("sym", "combined") and sym_spell:
5194
+ text = segment_words(text, sym_spell)
5195
+
5196
+ if method in ("textblob", "combined"):
5197
+ text = str(TextBlob(text).correct())
5198
+
5199
+ corrected_words = []
5200
+ for word in text.split():
5201
+ original_word = word
5202
+ if method in ("sym", "combined") and sym_spell:
5203
+ word = advanced_correction(word, sym_spell)
5204
+
5205
+ # Step 3: Apply custom corrections
5206
+ if custom_dict:
5207
+ word = apply_custom_corrections(word, custom_dict)
5208
+ # Preserve original case
5209
+ word = preserve_case(original_word, word)
5210
+ corrected_words.append(word)
5211
+
5212
+ return " ".join(corrected_words)
5213
+
5214
+ # Initialize SymSpell if needed
5215
+ sym_spell = None
5216
+ if method in ("sym", "combined"):
5217
+ if not sym_spell_params:
5218
+ sym_spell_params = {"max_edit_distance": 2, "prefix_length": 7}
5219
+ sym_spell = initialize_symspell(**sym_spell_params)
5220
+
5221
+ # Process lists or strings
5222
+ if isinstance(content, list):
5223
+ if use_threading:
5224
+ with ThreadPoolExecutor() as executor:
5225
+ corrected_content = list(executor.map(lambda x: process_string(x, method, sym_spell, custom_dict), content))
5226
+ return corrected_content
5227
+ else:
5228
+ return [process_string(item, method, sym_spell, custom_dict) for item in content]
5229
+ else:
5230
+ return process_string(content, method, sym_spell, custom_dict)
4831
5231
 
4832
5232
  def load_img(fpath):
4833
5233
  """
@@ -4851,7 +5251,7 @@ def load_img(fpath):
4851
5251
  raise OSError(f"Unable to open file '{fpath}' or it is not a valid image file.")
4852
5252
 
4853
5253
 
4854
- def apply_filter(img, *args):
5254
+ def apply_filter(img, *args,verbose=True):
4855
5255
  # def apply_filter(img, filter_name, filter_value=None):
4856
5256
  """
4857
5257
  Apply the specified filter to the image.
@@ -4865,7 +5265,7 @@ def apply_filter(img, *args):
4865
5265
  from PIL import ImageFilter
4866
5266
 
4867
5267
  def correct_filter_name(filter_name):
4868
- if "bl" in filter_name.lower() and "box" not in filter_name.lower():
5268
+ if all(["b" in filter_name.lower(),"ur" in filter_name.lower(), "box" not in filter_name.lower()]):
4869
5269
  return "BLUR"
4870
5270
  elif "cont" in filter_name.lower():
4871
5271
  return "Contour"
@@ -4929,10 +5329,11 @@ def apply_filter(img, *args):
4929
5329
 
4930
5330
  for arg in args:
4931
5331
  if isinstance(arg, str):
4932
- filter_name = arg
4933
- filter_name = correct_filter_name(filter_name)
5332
+ filter_name = correct_filter_name(arg)
4934
5333
  else:
4935
5334
  filter_value = arg
5335
+ if verbose:
5336
+ print(f'processing {filter_name}')
4936
5337
  filter_name = filter_name.upper() # Ensure filter name is uppercase
4937
5338
 
4938
5339
  # Supported filters
@@ -4976,12 +5377,13 @@ def apply_filter(img, *args):
4976
5377
  bands = filter_value if filter_value is not None else None
4977
5378
  return img.filter(supported_filters[filter_name](bands))
4978
5379
  else:
4979
- if filter_value is not None:
5380
+ if filter_value is not None and verbose:
4980
5381
  print(
4981
5382
  f"{filter_name} doesn't require a value for {filter_value}, but it remains unaffected"
4982
5383
  )
4983
5384
  return img.filter(supported_filters[filter_name])
4984
5385
 
5386
+
4985
5387
  def detect_angle(image, by="median", template=None):
4986
5388
  """Detect the angle of rotation using various methods."""
4987
5389
  from sklearn.decomposition import PCA
@@ -4989,8 +5391,11 @@ def detect_angle(image, by="median", template=None):
4989
5391
  from skimage.color import rgb2gray
4990
5392
  from scipy.fftpack import fftshift, fft2
4991
5393
  import numpy as np
4992
- import cv2
5394
+ import cv2
5395
+
4993
5396
  # Convert to grayscale
5397
+ if np.array(image).shape[-1]>3:
5398
+ image=np.array(image)[:,:,:3]
4994
5399
  gray_image = rgb2gray(image)
4995
5400
 
4996
5401
  # Detect edges using Canny edge detector
@@ -5002,9 +5407,10 @@ def detect_angle(image, by="median", template=None):
5002
5407
  if not lines and any(["me" in by, "pca" in by]):
5003
5408
  print("No lines detected. Adjust the edge detection parameters.")
5004
5409
  return 0
5005
-
5410
+ methods=['mean','median','pca','gradient orientation','template matching','moments','fft']
5411
+ by=strcmp(by, methods)[0]
5006
5412
  # Hough Transform-based angle detection (Median/Mean)
5007
- if "me" in by:
5413
+ if "me" in by.lower():
5008
5414
  angles = []
5009
5415
  for line in lines:
5010
5416
  (x0, y0), (x1, y1) = line
@@ -5027,7 +5433,7 @@ def detect_angle(image, by="median", template=None):
5027
5433
  return rotation_angle
5028
5434
 
5029
5435
  # PCA-based angle detection
5030
- elif "pca" in by:
5436
+ elif "pca" in by.lower():
5031
5437
  y, x = np.nonzero(edges)
5032
5438
  if len(x) == 0:
5033
5439
  return 0
@@ -5037,14 +5443,14 @@ def detect_angle(image, by="median", template=None):
5037
5443
  return angle
5038
5444
 
5039
5445
  # Gradient Orientation-based angle detection
5040
- elif "gra" in by:
5446
+ elif "gra" in by.lower():
5041
5447
  gx, gy = np.gradient(gray_image)
5042
5448
  angles = np.arctan2(gy, gx) * 180 / np.pi
5043
5449
  hist, bin_edges = np.histogram(angles, bins=360, range=(-180, 180))
5044
5450
  return bin_edges[np.argmax(hist)]
5045
5451
 
5046
5452
  # Template Matching-based angle detection
5047
- elif "temp" in by:
5453
+ elif "temp" in by.lower():
5048
5454
  if template is None:
5049
5455
  # Automatically extract a template from the center of the image
5050
5456
  height, width = gray_image.shape
@@ -5067,7 +5473,7 @@ def detect_angle(image, by="median", template=None):
5067
5473
  return best_angle
5068
5474
 
5069
5475
  # Image Moments-based angle detection
5070
- elif "mo" in by:
5476
+ elif "mo" in by.lower():
5071
5477
  moments = measure.moments_central(gray_image)
5072
5478
  angle = (
5073
5479
  0.5
@@ -5078,7 +5484,7 @@ def detect_angle(image, by="median", template=None):
5078
5484
  return angle
5079
5485
 
5080
5486
  # Fourier Transform-based angle detection
5081
- elif "fft" in by:
5487
+ elif "fft" in by.lower():
5082
5488
  f = fft2(gray_image)
5083
5489
  fshift = fftshift(f)
5084
5490
  magnitude_spectrum = np.log(np.abs(fshift) + 1)
@@ -5088,10 +5494,19 @@ def detect_angle(image, by="median", template=None):
5088
5494
  return angle
5089
5495
 
5090
5496
  else:
5091
- print(f"Unknown method {by}")
5497
+ print(f"Unknown method {by}: supported methods: {methods}")
5092
5498
  return 0
5093
5499
 
5094
- def imgsets(img, **kwargs):
5500
+
5501
+ def imgsets(img,
5502
+ auto:bool=True,
5503
+ size=None,
5504
+ figsize=None,
5505
+ dpi:int=200,
5506
+ show_axis:bool=False,
5507
+ plot_:bool=True,
5508
+ verbose:bool=False,
5509
+ **kwargs):
5095
5510
  """
5096
5511
  Apply various enhancements and filters to an image using PIL's ImageEnhance and ImageFilter modules.
5097
5512
 
@@ -5125,6 +5540,9 @@ def imgsets(img, **kwargs):
5125
5540
  Note:
5126
5541
  The "color" and "enhance" enhancements are not implemented in this function.
5127
5542
  """
5543
+
5544
+ import matplotlib.pyplot as plt
5545
+ from PIL import ImageEnhance, ImageOps,Image
5128
5546
  supported_filters = [
5129
5547
  "BLUR",
5130
5548
  "CONTOUR",
@@ -5144,8 +5562,22 @@ def imgsets(img, **kwargs):
5144
5562
  "BOX_BLUR",
5145
5563
  "MEDIAN_FILTER",
5146
5564
  ]
5147
- print('usage: imgsets(dir_img, contrast="auto", rm=True, color=2.2)')
5148
- print("\nlog:\n")
5565
+ str_usage="""
5566
+ imgsets(dir_img, auto=1, color=1.5, plot_=0)
5567
+ imgsets(dir_img, color=2)
5568
+ imgsets(dir_img, pad=(300, 300), bgcolor=(73, 162, 127), plot_=0)
5569
+ imgsets(dir_img, contrast=0, color=1.2, plot_=0)
5570
+ imgsets(get_clip(), flip="tb")# flip top and bottom
5571
+ imgsets(get_clip(), contrast=1, rm=[100, 5, 2]) #'foreground_threshold', 'background_threshold' and 'erode_structure_size'
5572
+ """
5573
+ if run_once_within():
5574
+ print(str_usage)
5575
+
5576
+ def gamma_correction(image, gamma=1.0, v_max=255):
5577
+ # adjust gama value
5578
+ inv_gamma = 1.0 / gamma
5579
+ lut = [int((i / float(v_max)) ** inv_gamma * int(v_max)) for i in range(int(v_max))]
5580
+ return lut #image.point(lut)
5149
5581
 
5150
5582
  def confirm_rembg_models(model_name):
5151
5583
  models_support = [
@@ -5169,37 +5601,52 @@ def imgsets(img, **kwargs):
5169
5601
 
5170
5602
  def auto_enhance(img):
5171
5603
  """
5172
- Automatically enhances the image based on its characteristics.
5604
+ Automatically enhances the image based on its characteristics, including brightness,
5605
+ contrast, color range, sharpness, and gamma correction.
5606
+
5173
5607
  Args:
5174
5608
  img (PIL.Image): The input image.
5609
+
5175
5610
  Returns:
5176
- dict: A dictionary containing the optimal enhancement values.
5611
+ dict: A dictionary containing the optimal enhancement values applied.
5612
+ PIL.Image: The enhanced image.
5177
5613
  """
5614
+ from PIL import Image, ImageEnhance, ImageOps, ImageFilter
5615
+ import numpy as np
5178
5616
  # Determine the bit depth based on the image mode
5179
- if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
5180
- # 8-bit depth per channel
5181
- bit_depth = 8
5182
- elif img.mode in ["RGBA", "CMYK"]:
5183
- # 8-bit depth per channel + alpha (RGBA) or additional channels (CMYK)
5617
+ try:
5618
+ if img.mode in ["1", "L", "P", "RGB", "YCbCr", "LAB", "HSV"]:
5619
+ bit_depth = 8
5620
+ elif img.mode in ["RGBA", "CMYK"]:
5621
+ bit_depth = 8
5622
+ elif img.mode in ["I", "F"]:
5623
+ bit_depth = 16
5624
+ else:
5625
+ raise ValueError("Unsupported image mode")
5626
+ except:
5184
5627
  bit_depth = 8
5185
- elif img.mode in ["I", "F"]:
5186
- # 16-bit depth per channel (integer or floating-point)
5187
- bit_depth = 16
5188
- else:
5189
- raise ValueError("Unsupported image mode")
5190
- # Calculate the brightness and contrast for each channel
5628
+
5629
+ # Initialize enhancement factors
5630
+ enhancements = {
5631
+ "brightness": 1.0,
5632
+ "contrast": 0,# autocontrasted
5633
+ "color": 1.35,
5634
+ "sharpness": 1.0,
5635
+ "gamma": 1.0
5636
+ }
5637
+
5638
+ # Calculate brightness and contrast for each channel
5191
5639
  num_channels = len(img.getbands())
5192
5640
  brightness_factors = []
5193
5641
  contrast_factors = []
5194
5642
  for channel in range(num_channels):
5195
5643
  channel_histogram = img.split()[channel].histogram()
5196
- brightness = sum(i * w for i, w in enumerate(channel_histogram)) / sum(
5197
- channel_histogram
5198
- )
5644
+ total_pixels = sum(channel_histogram)
5645
+ brightness = sum(i * w for i, w in enumerate(channel_histogram)) / total_pixels
5199
5646
  channel_min, channel_max = img.split()[channel].getextrema()
5200
5647
  contrast = channel_max - channel_min
5201
5648
  # Adjust calculations based on bit depth
5202
- normalization_factor = 2**bit_depth - 1 # Max value for the given bit depth
5649
+ normalization_factor = 2**bit_depth - 1
5203
5650
  brightness_factor = (
5204
5651
  1.0 + (brightness - normalization_factor / 2) / normalization_factor
5205
5652
  )
@@ -5208,37 +5655,62 @@ def imgsets(img, **kwargs):
5208
5655
  )
5209
5656
  brightness_factors.append(brightness_factor)
5210
5657
  contrast_factors.append(contrast_factor)
5211
- # Calculate the average brightness and contrast factors across channels
5212
- avg_brightness_factor = sum(brightness_factors) / num_channels
5213
- avg_contrast_factor = sum(contrast_factors) / num_channels
5214
- return {"brightness": avg_brightness_factor, "contrast": avg_contrast_factor}
5215
5658
 
5216
- import matplotlib.pyplot as plt
5217
- from PIL import ImageEnhance, ImageOps
5659
+ # Calculate average brightness and contrast factors across channels
5660
+ enhancements["brightness"] = sum(brightness_factors) / num_channels
5661
+ # Adjust brightness and contrast
5662
+ img = ImageEnhance.Brightness(img).enhance(enhancements["brightness"])
5663
+
5664
+ # # Automatic color enhancement (saturation)
5665
+ # if img.mode == "RGB":
5666
+ # color_enhancer = ImageEnhance.Color(img)
5667
+ # color_histogram = np.array(img.histogram()).reshape(3, -1)
5668
+ # avg_saturation = np.mean([np.std(channel) for channel in color_histogram]) / normalization_factor
5669
+ # print(avg_saturation)
5670
+ # enhancements["color"] = min(0, max(0.5, 1.0 + avg_saturation)) # Clamp to a reasonable range
5671
+ # # img = color_enhancer.enhance(enhancements["color"])
5672
+
5673
+ # Adjust sharpness
5674
+ sharpness_enhancer = ImageEnhance.Sharpness(img)
5675
+ # Use edge detection to estimate sharpness need
5676
+ edges = img.filter(ImageFilter.FIND_EDGES).convert("L")
5677
+ avg_edge_intensity = np.mean(np.array(edges))
5678
+ enhancements["sharpness"] = min(2.0, max(0.5, 1.0 + avg_edge_intensity / normalization_factor))
5679
+ # img = sharpness_enhancer.enhance(enhancements["sharpness"])
5680
+
5681
+ # # Apply gamma correction
5682
+ # def gamma_correction(image, gamma):
5683
+ # inv_gamma = 1.0 / gamma
5684
+ # lut = [min(255, max(0, int((i / 255.0) ** inv_gamma * 255))) for i in range(256)]
5685
+ # return image.point(lut)
5686
+
5687
+ # avg_brightness = np.mean(np.array(img.convert("L"))) / 255
5688
+ # enhancements["gamma"] = min(2.0, max(0.5, 1.0 if avg_brightness > 0.5 else 1.2 - avg_brightness))
5689
+ # img = gamma_correction(img, enhancements["gamma"])
5690
+
5691
+ # Return the enhancements and the enhanced image
5692
+ return enhancements
5693
+
5218
5694
 
5219
5695
  # Load image if input is a file path
5220
5696
  if isinstance(img, str):
5221
5697
  img = load_img(img)
5222
- img_update = img.copy()
5223
- # Auto-enhance image if requested
5224
-
5225
- auto = kwargs.get("auto", False)
5226
- show = kwargs.get("show", True)
5227
- show_axis = kwargs.get("show_axis", False)
5228
- size = kwargs.get("size", None)
5229
- figsize = kwargs.get("figsize", None)
5230
- dpi = kwargs.get("dpi", 100)
5698
+ img_update = img.copy()
5231
5699
 
5232
5700
  if auto:
5233
5701
  kwargs = {**auto_enhance(img_update), **kwargs}
5234
-
5702
+ params=["sharp","color","contrast","bright","crop","rotate",'size',"resize",
5703
+ "thumbnail","cover","contain","filter","fit","pad",
5704
+ "rem","rm","back","bg_color","cut",'gamma','flip']
5235
5705
  for k, value in kwargs.items():
5706
+ k = strcmp(k, params)[0] # correct the param name
5236
5707
  if "shar" in k.lower():
5237
5708
  enhancer = ImageEnhance.Sharpness(img_update)
5238
5709
  img_update = enhancer.enhance(value)
5239
5710
  elif all(
5240
5711
  ["col" in k.lower(), "bg" not in k.lower(), "background" not in k.lower()]
5241
5712
  ):
5713
+ # *color
5242
5714
  enhancer = ImageEnhance.Color(img_update)
5243
5715
  img_update = enhancer.enhance(value)
5244
5716
  elif "contr" in k.lower():
@@ -5246,8 +5718,11 @@ def imgsets(img, **kwargs):
5246
5718
  enhancer = ImageEnhance.Contrast(img_update)
5247
5719
  img_update = enhancer.enhance(value)
5248
5720
  else:
5249
- print("autocontrasted")
5250
- img_update = ImageOps.autocontrast(img_update)
5721
+ try:
5722
+ img_update = ImageOps.autocontrast(img_update)
5723
+ print("autocontrasted")
5724
+ except Exception as e:
5725
+ print(f"Failed 'autocontrasted':{e}")
5251
5726
  elif "bri" in k.lower():
5252
5727
  enhancer = ImageEnhance.Brightness(img_update)
5253
5728
  img_update = enhancer.enhance(value)
@@ -5258,7 +5733,13 @@ def imgsets(img, **kwargs):
5258
5733
  value = detect_angle(img_update, by=value)
5259
5734
  print(f"rotated by {value}°")
5260
5735
  img_update = img_update.rotate(value)
5261
-
5736
+ elif 'flip' in k.lower():
5737
+ if 'l' in value and 'r' in value:
5738
+ # left/right
5739
+ img_update = img_update.transpose(Image.FLIP_LEFT_RIGHT)
5740
+ elif any(['u' in value and'd' in value, 't' in value and 'b' in value]):
5741
+ # up/down or top/bottom
5742
+ img_update = img_update.transpose(Image.FLIP_TOP_BOTTOM)
5262
5743
  elif "si" in k.lower():
5263
5744
  if isinstance(value, tuple):
5264
5745
  value = list(value)
@@ -5270,13 +5751,17 @@ def imgsets(img, **kwargs):
5270
5751
  img_update = ImageOps.cover(img_update, size=value)
5271
5752
  elif "contain" in k.lower():
5272
5753
  img_update = ImageOps.contain(img_update, size=value)
5273
- elif "fit" in k.lower():
5754
+ elif "fi" in k.lower() and "t" in k.lower(): # filter
5274
5755
  if isinstance(value, dict):
5756
+ if verbose:
5757
+ print(f"supported filter: {supported_filters}")
5275
5758
  for filter_name, filter_value in value.items():
5276
- img_update = apply_filter(img_update, filter_name, filter_value)
5759
+ img_update = apply_filter(img_update, filter_name, filter_value,verbose=verbose)
5277
5760
  else:
5278
5761
  img_update = ImageOps.fit(img_update, size=value)
5279
5762
  elif "pad" in k.lower():
5763
+ # *ImageOps.pad ensures that the resized image has the exact size specified by the size parameter while maintaining the aspect ratio.
5764
+ # size: A tuple specifying the target size (width, height).
5280
5765
  img_update = ImageOps.pad(img_update, size=value)
5281
5766
  elif "rem" in k.lower() or "rm" in k.lower() or "back" in k.lower():
5282
5767
  from rembg import remove, new_session
@@ -5285,7 +5770,9 @@ def imgsets(img, **kwargs):
5285
5770
  session = new_session("isnet-general-use")
5286
5771
  img_update = remove(img_update, session=session)
5287
5772
  elif value and isinstance(value, (int, float, list)):
5288
- print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
5773
+ if verbose:
5774
+ print("https://github.com/danielgatis/rembg/blob/main/USAGE.md")
5775
+ print(f"rm=True # using default setting;\nrm=(240,10,10)\n'foreground_threshold'(240) and 'background_threshold' (10) values used to determine foreground and background pixels. \nThe 'erode_structure_size'(10) parameter specifies the size of the erosion structure to be applied to the mask.")
5289
5776
  if isinstance(value, int):
5290
5777
  value = [value]
5291
5778
  if len(value) < 2:
@@ -5327,8 +5814,11 @@ def imgsets(img, **kwargs):
5327
5814
  if len(value) == 3:
5328
5815
  value += (255,)
5329
5816
  img_update = remove(img_update, bgcolor=value)
5817
+
5818
+ # elif "ga" in k.lower() and "m" in k.lower():
5819
+ # img_update = gamma_correction(img_update, gamma=value)
5330
5820
  # Display the image if requested
5331
- if show:
5821
+ if plot_:
5332
5822
  if figsize is None:
5333
5823
  plt.figure(dpi=dpi)
5334
5824
  else:
@@ -6355,13 +6845,13 @@ def _df_outlier(
6355
6845
  from scipy.stats import zscore
6356
6846
  from sklearn.ensemble import IsolationForest
6357
6847
  from sklearn.preprocessing import StandardScaler
6358
-
6848
+
6359
6849
  # Fill completely NaN columns with a default value (e.g., 0)
6360
6850
  data = data.copy()
6361
6851
  data.loc[:, data.isna().all()] = 0
6362
6852
  if columns is not None:
6363
- if isinstance(columns, (list,pd.core.indexes.base.Index)):
6364
- data=data[columns]
6853
+ if isinstance(columns, (list, pd.core.indexes.base.Index)):
6854
+ data = data[columns]
6365
6855
  col_names_org = data.columns.tolist()
6366
6856
  index_names_org = data.index.tolist()
6367
6857
  # Separate numeric and non-numeric columns
@@ -6527,6 +7017,7 @@ def df_extend(data: pd.DataFrame, column, axis=0, sep=None, prefix="col"):
6527
7017
  data = data.explode(column, ignore_index=True)
6528
7018
  return data
6529
7019
 
7020
+
6530
7021
  def df_cycle(data: pd.DataFrame, columns=None, max_val=None, inplace=False):
6531
7022
  """
6532
7023
  Purpose: transforms a datetime feature (like month or day) into a cyclic encoding for use in machine learning models, particularly neural networks.
@@ -6536,24 +7027,30 @@ def df_cycle(data: pd.DataFrame, columns=None, max_val=None, inplace=False):
6536
7027
  data = df_cycle(data, 'month', 12)
6537
7028
  """
6538
7029
  if columns is None:
6539
- columns = list(data.select_dtypes(include=np.number).columns) # If no columns specified, use all columns
7030
+ columns = list(
7031
+ data.select_dtypes(include=np.number).columns
7032
+ ) # If no columns specified, use all columns
6540
7033
  if max_val is None:
6541
- max_val = np.max(data[columns]) # If no max_val specified, use the maximum value across all columns
7034
+ max_val = np.max(
7035
+ data[columns]
7036
+ ) # If no max_val specified, use the maximum value across all columns
6542
7037
  if isinstance(columns, str):
6543
- columns = [columns] # If a single column name is provided as a string, convert it to a list
6544
-
7038
+ columns = [
7039
+ columns
7040
+ ] # If a single column name is provided as a string, convert it to a list
7041
+
6545
7042
  # Check if inplace is True, so we modify the original dataframe
6546
7043
  if inplace:
6547
7044
  # Modify the data in place, no return statement needed
6548
7045
  for col in columns:
6549
- data[col + '_sin'] = np.sin(2 * np.pi * data[col] / max_val)
6550
- data[col + '_cos'] = np.cos(2 * np.pi * data[col] / max_val)
7046
+ data[col + "_sin"] = np.sin(2 * np.pi * data[col] / max_val)
7047
+ data[col + "_cos"] = np.cos(2 * np.pi * data[col] / max_val)
6551
7048
  else:
6552
7049
  # If inplace is False, return the modified dataframe
6553
7050
  new_data = data.copy()
6554
7051
  for col in columns:
6555
- new_data[col + '_sin'] = np.sin(2 * np.pi * new_data[col] / max_val)
6556
- new_data[col + '_cos'] = np.cos(2 * np.pi * new_data[col] / max_val)
7052
+ new_data[col + "_sin"] = np.sin(2 * np.pi * new_data[col] / max_val)
7053
+ new_data[col + "_cos"] = np.cos(2 * np.pi * new_data[col] / max_val)
6557
7054
  return new_data
6558
7055
 
6559
7056
 
@@ -6561,7 +7058,7 @@ def df_cycle(data: pd.DataFrame, columns=None, max_val=None, inplace=False):
6561
7058
  def df_astype(
6562
7059
  data: pd.DataFrame,
6563
7060
  columns: Optional[Union[str, List[str]]] = None,
6564
- astype: str = None,#"datetime",
7061
+ astype: str = None, # "datetime",
6565
7062
  skip_row: Union[str, list] = None,
6566
7063
  fmt: Optional[str] = None,
6567
7064
  inplace: bool = False,
@@ -6624,7 +7121,7 @@ def df_astype(
6624
7121
  "day",
6625
7122
  "month",
6626
7123
  "year",
6627
- "circular"
7124
+ "circular",
6628
7125
  ]
6629
7126
  # If inplace is False, make a copy of the DataFrame
6630
7127
  if not inplace:
@@ -6720,12 +7217,12 @@ def df_astype(
6720
7217
  data[column] = pd.to_timedelta(data[column], errors=errors, **kwargs)
6721
7218
  # print(f"Successfully converted '{column}' to timedelta.")
6722
7219
  elif astype == "circular":
6723
- max_val = kwargs.get('max_val',None)
6724
- data[column]=df_cycle(data=data,columns=column,max_val=max_val)
7220
+ max_val = kwargs.get("max_val", None)
7221
+ data[column] = df_cycle(data=data, columns=column, max_val=max_val)
6725
7222
  else:
6726
7223
  # Convert to other types (e.g., float, int)
6727
- if astype=='int':
6728
- data[column] = data[column].astype('float').astype('int')
7224
+ if astype == "int":
7225
+ data[column] = data[column].astype("float").astype("int")
6729
7226
  else:
6730
7227
  data[column] = data[column].astype(astype)
6731
7228
  # print(f"Successfully converted '{column}' to {astype}.")
@@ -6775,7 +7272,9 @@ def df_sort_values(data, column, by=None, ascending=True, inplace=True, **kwargs
6775
7272
  ).index.tolist()
6776
7273
 
6777
7274
  # Convert to a categorical type with the new order
6778
- data[column] = pd.Categorical(data[column], categories=sorted_counts, ordered=True)
7275
+ data[column] = pd.Categorical(
7276
+ data[column], categories=sorted_counts, ordered=True
7277
+ )
6779
7278
  # Set ascending to count_ascending for sorting
6780
7279
  ascending = count_ascending # Adjust ascending for the final sort
6781
7280
  elif isinstance(by, list):
@@ -6977,7 +7476,7 @@ def df_fillna(
6977
7476
  # Fill completely NaN columns with a default value (e.g., 0)
6978
7477
  data = data.copy()
6979
7478
  data.loc[:, data.isna().all()] = 0
6980
-
7479
+
6981
7480
  col_names_org = data.columns.tolist()
6982
7481
  index_names_org = data.index.tolist()
6983
7482
  # Separate numeric and non-numeric columns
@@ -7034,7 +7533,7 @@ def df_fillna(
7034
7533
  imputed_data = imputer.fit_transform(numeric_data.T)
7035
7534
  else:
7036
7535
  raise ValueError("Invalid axis. Use 0 for columns or 1 for rows.")
7037
-
7536
+
7038
7537
  imputed_data = pd.DataFrame(
7039
7538
  imputed_data if axis == 0 else imputed_data.T,
7040
7539
  index=numeric_data.index if axis == 0 else numeric_data.columns,
@@ -7179,11 +7678,15 @@ def df_encoder(
7179
7678
 
7180
7679
  encoder = LabelEncoder()
7181
7680
  # Apply LabelEncoder only to non-numeric columns
7182
- non_numeric_columns = [col for col in columns if not pd.api.types.is_numeric_dtype(data[col])]
7681
+ non_numeric_columns = [
7682
+ col for col in columns if not pd.api.types.is_numeric_dtype(data[col])
7683
+ ]
7183
7684
 
7184
7685
  if not non_numeric_columns:
7185
7686
  return data
7186
- encoded_data = data[non_numeric_columns].apply(lambda col: encoder.fit_transform(col))
7687
+ encoded_data = data[non_numeric_columns].apply(
7688
+ lambda col: encoder.fit_transform(col)
7689
+ )
7187
7690
  return pd.concat([data.drop(non_numeric_columns, axis=1), encoded_data], axis=1)
7188
7691
 
7189
7692
  # Target encoding (Mean of the target for each category)
@@ -7210,13 +7713,13 @@ def df_scaler(
7210
7713
  scaler=None,
7211
7714
  method="standard",
7212
7715
  columns=None, # default, select all numeric col/row
7213
- feature_range=None,# specific for 'minmax'
7716
+ feature_range=None, # specific for 'minmax'
7214
7717
  vmin=0,
7215
7718
  vmax=1,
7216
7719
  inplace=False,
7217
7720
  verbose=False, # show usage
7218
7721
  axis=0, # defalut column-wise
7219
- return_scaler:bool=False,# True: return both: return df, scaler
7722
+ return_scaler: bool = False, # True: return both: return df, scaler
7220
7723
  **kwargs,
7221
7724
  ):
7222
7725
  """
@@ -7235,34 +7738,56 @@ def df_scaler(
7235
7738
  if verbose:
7236
7739
  print('df_scaler(data, scaler="standard", inplace=False, axis=0, verbose=True)')
7237
7740
  if scaler is None:
7238
- methods = ["standard", "minmax", "robust","maxabs"]
7741
+ methods = ["standard", "minmax", "robust", "maxabs"]
7239
7742
  method = strcmp(method, methods)[0]
7240
7743
  if method == "standard":
7241
7744
  from sklearn.preprocessing import StandardScaler
7745
+
7242
7746
  if verbose:
7243
- print("performs z-score normalization: This will standardize each feature to have a mean of 0 and a standard deviation of 1.")
7244
- print("Use when the data is approximately normally distributed (Gaussian).\nWorks well with algorithms sensitive to feature distribution, such as SVMs, linear regression, logistic regression, and neural networks.")
7747
+ print(
7748
+ "performs z-score normalization: This will standardize each feature to have a mean of 0 and a standard deviation of 1."
7749
+ )
7750
+ print(
7751
+ "Use when the data is approximately normally distributed (Gaussian).\nWorks well with algorithms sensitive to feature distribution, such as SVMs, linear regression, logistic regression, and neural networks."
7752
+ )
7245
7753
  scaler = StandardScaler(**kwargs)
7246
7754
  elif method == "minmax":
7247
7755
  from sklearn.preprocessing import MinMaxScaler
7756
+
7248
7757
  if feature_range is None:
7249
- feature_range=(vmin,vmax)
7758
+ feature_range = (vmin, vmax)
7250
7759
  if verbose:
7251
- print("don't forget to define the range: e.g., 'feature_range=(0, 1)'. ")
7252
- print("scales the features to the range [0, 1]. Adjust feature_range if you want a different range, like [-1, 1].")
7253
- print("Use when the data does not follow a normal distribution and you need all features in a specific range (e.g., [0, 1]).\nIdeal for algorithms that do not assume a particular distribution, such as k-nearest neighbors and neural networks.")
7254
- scaler = MinMaxScaler(feature_range=feature_range,**kwargs)
7760
+ print(
7761
+ "don't forget to define the range: e.g., 'feature_range=(0, 1)'. "
7762
+ )
7763
+ print(
7764
+ "scales the features to the range [0, 1]. Adjust feature_range if you want a different range, like [-1, 1]."
7765
+ )
7766
+ print(
7767
+ "Use when the data does not follow a normal distribution and you need all features in a specific range (e.g., [0, 1]).\nIdeal for algorithms that do not assume a particular distribution, such as k-nearest neighbors and neural networks."
7768
+ )
7769
+ scaler = MinMaxScaler(feature_range=feature_range, **kwargs)
7255
7770
  elif method == "robust":
7256
7771
  from sklearn.preprocessing import RobustScaler
7772
+
7257
7773
  if verbose:
7258
- print("scales the data based on the median and interquartile range, which is robust to outliers.")
7259
- print("Use when the dataset contains outliers.\nThis method is useful because it scales based on the median and the interquartile range (IQR), which are more robust to outliers than the mean and standard deviation.")
7774
+ print(
7775
+ "scales the data based on the median and interquartile range, which is robust to outliers."
7776
+ )
7777
+ print(
7778
+ "Use when the dataset contains outliers.\nThis method is useful because it scales based on the median and the interquartile range (IQR), which are more robust to outliers than the mean and standard deviation."
7779
+ )
7260
7780
  scaler = RobustScaler(**kwargs)
7261
- elif method=="maxabs":
7781
+ elif method == "maxabs":
7262
7782
  from sklearn.preprocessing import MaxAbsScaler
7783
+
7263
7784
  if verbose:
7264
- print("This scales each feature by its maximum absolute value, resulting in values within the range [-1, 1] for each feature.")
7265
- print("Use for data that is already sparse or when features have positive or negative values that need scaling without shifting the data.\nOften used with sparse data (data with many zeros), where preserving zero entries is essential, such as in text data or recommendation systems.")
7785
+ print(
7786
+ "This scales each feature by its maximum absolute value, resulting in values within the range [-1, 1] for each feature."
7787
+ )
7788
+ print(
7789
+ "Use for data that is already sparse or when features have positive or negative values that need scaling without shifting the data.\nOften used with sparse data (data with many zeros), where preserving zero entries is essential, such as in text data or recommendation systems."
7790
+ )
7266
7791
  scaler = MaxAbsScaler(**kwargs)
7267
7792
  if axis not in [0, 1]:
7268
7793
  raise ValueError("Axis must be 0 (column-wise) or 1 (row-wise).")
@@ -7275,7 +7800,7 @@ def df_scaler(
7275
7800
  non_numeric_columns = data.columns.difference(columns)
7276
7801
 
7277
7802
  # scaled_data = scaler.fit_transform(data[columns])
7278
- if scaler is None or not hasattr(scaler, 'mean_'):
7803
+ if scaler is None or not hasattr(scaler, "mean_"):
7279
7804
  scaled_data = scaler.fit_transform(data[columns])
7280
7805
  else:
7281
7806
  scaled_data = scaler.transform(data[columns])
@@ -7293,7 +7818,7 @@ def df_scaler(
7293
7818
  )
7294
7819
  scaled_df = scaled_df[data.columns] # Maintain column order
7295
7820
  if return_scaler:
7296
- return scaled_df,scaler
7821
+ return scaled_df, scaler
7297
7822
  else:
7298
7823
  return scaled_df
7299
7824
 
@@ -7310,7 +7835,11 @@ def df_scaler(
7310
7835
  # scaled_data = scaler.fit_transform(
7311
7836
  # numeric_rows.T
7312
7837
  # ).T # Transpose for scaling and then back
7313
- scaled_data = scaler.fit_transform(numeric_rows.T).T if scaler is None or not hasattr(scaler, 'mean_') else scaler.transform(numeric_rows.T).T
7838
+ scaled_data = (
7839
+ scaler.fit_transform(numeric_rows.T).T
7840
+ if scaler is None or not hasattr(scaler, "mean_")
7841
+ else scaler.transform(numeric_rows.T).T
7842
+ )
7314
7843
 
7315
7844
  if inplace:
7316
7845
  data.loc[numeric_rows.index] = scaled_data
@@ -7319,7 +7848,7 @@ def df_scaler(
7319
7848
  scaled_df = data.copy()
7320
7849
  scaled_df.loc[numeric_rows.index] = scaled_data
7321
7850
  if return_scaler:
7322
- return scaled_df,scaler
7851
+ return scaled_df, scaler
7323
7852
  else:
7324
7853
  return scaled_df
7325
7854
 
@@ -7683,10 +8212,10 @@ def df_reducer(
7683
8212
  hue: str = None, # lda-specific
7684
8213
  scale: bool = True,
7685
8214
  fill_missing: bool = True,
7686
- size=2,# for plot marker size
7687
- markerscale=4,# for plot, legend marker size scale
7688
- edgecolor='none',# for plot,
7689
- legend_loc='best',# for plot,
8215
+ size=2, # for plot marker size
8216
+ markerscale=4, # for plot, legend marker size scale
8217
+ edgecolor="none", # for plot,
8218
+ legend_loc="best", # for plot,
7690
8219
  bbox_to_anchor=None,
7691
8220
  ncols=1,
7692
8221
  debug: bool = False,
@@ -7719,7 +8248,7 @@ def df_reducer(
7719
8248
  "autoencoder": "Autoencoder:\n\tA neural network-based approach for complex feature learning and non-linear dimensionality reduction. Advantage: Can capture very complex relationships. Limitation: Computationally expensive, requires neural network expertise for effective tuning.",
7720
8249
  "nmf": "Non-negative Matrix Factorization:\n\tEffective for parts-based decomposition, commonly used for sparse and non-negative data, e.g., text data or images. Advantage: Interpretability with non-negativity, efficient with sparse data. Limitation: Less effective for negative or zero-centered data.",
7721
8250
  "umap_hdbscan": "UMAP + HDBSCAN:\n\tCombination of UMAP for dimensionality reduction and HDBSCAN for density-based clustering, suitable for cluster discovery in high-dimensional data. Advantage: Effective in discovering clusters in embeddings. Limitation: Requires careful tuning of both UMAP and HDBSCAN parameters.",
7722
- "manifold_learning": "Manifold Learning (Isomap, Hessian LLE, etc.):\n\tMethods designed to capture intrinsic geometrical structure. Advantage: Preserves non-linear relationships in low dimensions. Limitation: Computationally expensive and sensitive to noise."
8251
+ "manifold_learning": "Manifold Learning (Isomap, Hessian LLE, etc.):\n\tMethods designed to capture intrinsic geometrical structure. Advantage: Preserves non-linear relationships in low dimensions. Limitation: Computationally expensive and sensitive to noise.",
7723
8252
  }
7724
8253
 
7725
8254
  from sklearn.preprocessing import StandardScaler
@@ -7730,14 +8259,27 @@ def df_reducer(
7730
8259
  import seaborn as sns
7731
8260
  # Check valid method input
7732
8261
  methods = [
7733
- "pca", "umap", "umap_hdbscan", "tsne", "factor", "isolation_forest","manifold_learning", "lda", "kpca", "ica",
7734
- "mds", "lle", "svd", "truncated_svd", "spectral_embedding",
8262
+ "pca",
8263
+ "umap",
8264
+ "umap_hdbscan",
8265
+ "tsne",
8266
+ "factor",
8267
+ "isolation_forest",
8268
+ "manifold_learning",
8269
+ "lda",
8270
+ "kpca",
8271
+ "ica",
8272
+ "mds",
8273
+ "lle",
8274
+ "svd",
8275
+ "truncated_svd",
8276
+ "spectral_embedding",
7735
8277
  # "autoencoder","nmf",
7736
8278
  ]
7737
8279
  method = strcmp(method, methods)[0]
7738
8280
  if run_once_within(reverse=True):
7739
8281
  print(f"support methods:{methods}")
7740
-
8282
+
7741
8283
  if verbose:
7742
8284
  print(f"\nprocessing with using {dict_methods[method]}:")
7743
8285
  xlabel, ylabel = None, None
@@ -8050,8 +8592,9 @@ def df_reducer(
8050
8592
  svd_df[hue] = y
8051
8593
  if debug:
8052
8594
  print("Singular Value Decomposition (SVD) completed.")
8053
- elif method=="truncated_svd":
8595
+ elif method == "truncated_svd":
8054
8596
  from sklearn.decomposition import TruncatedSVD
8597
+
8055
8598
  svd = TruncatedSVD(n_components=n_components, random_state=random_state)
8056
8599
  X_reduced = svd.fit_transform(X)
8057
8600
  reduced_df = pd.DataFrame(
@@ -8070,7 +8613,9 @@ def df_reducer(
8070
8613
  elif method == "spectral_embedding":
8071
8614
  from sklearn.manifold import SpectralEmbedding
8072
8615
 
8073
- spectral = SpectralEmbedding(n_components=n_components, random_state=random_state)
8616
+ spectral = SpectralEmbedding(
8617
+ n_components=n_components, random_state=random_state
8618
+ )
8074
8619
  X_reduced = spectral.fit_transform(X)
8075
8620
  reduced_df = pd.DataFrame(
8076
8621
  X_reduced,
@@ -8168,7 +8713,7 @@ def df_reducer(
8168
8713
  print("Manifold Learning (Isomap) completed.")
8169
8714
  if hue:
8170
8715
  reduced_df[hue] = y
8171
-
8716
+
8172
8717
  #! Return reduced data and info as a new DataFrame with the same index
8173
8718
  if method == "pca":
8174
8719
  reduced_df = pca_df
@@ -8225,7 +8770,8 @@ def df_reducer(
8225
8770
  colname_met = "SVD_"
8226
8771
  # Quick plots
8227
8772
  if plot_ and (not method in ["isolation_forest"]):
8228
- from .plot import plotxy,figsets,get_color
8773
+ from .plot import plotxy, figsets, get_color
8774
+
8229
8775
  # if ax is None:
8230
8776
  # if figsize is None:
8231
8777
  # _, ax = plt.subplots(figsize=cm2inch(8, 8))
@@ -8235,9 +8781,9 @@ def df_reducer(
8235
8781
  # ax = ax.cla()
8236
8782
  xlabel = f"{colname_met}1" if xlabel is None else xlabel
8237
8783
  ylabel = f"{colname_met}2" if ylabel is None else ylabel
8238
- palette=get_color(len(flatten(data[hue],verbose=0)))
8784
+ palette = get_color(len(flatten(data[hue], verbose=0)))
8239
8785
 
8240
- reduced_df=reduced_df.sort_values(by=hue)
8786
+ reduced_df = reduced_df.sort_values(by=hue)
8241
8787
  print(flatten(reduced_df[hue]))
8242
8788
  ax = plotxy(
8243
8789
  data=reduced_df,
@@ -8247,24 +8793,31 @@ def df_reducer(
8247
8793
  palette=palette,
8248
8794
  # size=size,
8249
8795
  edgecolor=edgecolor,
8250
- kind_=["joint",
8251
- # "kde",
8252
- "ell",
8253
- ],
8796
+ kind_=[
8797
+ "joint",
8798
+ # "kde",
8799
+ "ell",
8800
+ ],
8254
8801
  kws_kde=dict(
8255
- hue=hue,
8256
- levels=2,
8257
- common_norm=False,
8258
- fill=True,
8259
- alpha=0.05,
8260
- ),
8261
- kws_joint=dict(kind='scatter',joint_kws=dict(s=size)),
8262
- kws_ellipse=dict(alpha=0.1,lw=1,label=None),
8802
+ hue=hue,
8803
+ levels=2,
8804
+ common_norm=False,
8805
+ fill=True,
8806
+ alpha=0.05,
8807
+ ),
8808
+ kws_joint=dict(kind="scatter", joint_kws=dict(s=size)),
8809
+ kws_ellipse=dict(alpha=0.1, lw=1, label=None),
8263
8810
  verbose=False,
8264
8811
  **kwargs,
8265
8812
  )
8266
8813
  figsets(
8267
- legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
8814
+ legend=dict(
8815
+ loc=legend_loc,
8816
+ markerscale=markerscale,
8817
+ bbox_to_anchor=bbox_to_anchor,
8818
+ ncols=ncols,
8819
+ fontsize=8,
8820
+ ),
8268
8821
  xlabel=xlabel if xlabel else None,
8269
8822
  ylabel=ylabel if ylabel else None,
8270
8823
  )
@@ -8297,6 +8850,7 @@ def df_reducer(
8297
8850
  # example:
8298
8851
  # df_reducer(data=data_log, columns=markers, n_components=2)
8299
8852
 
8853
+
8300
8854
  def get_df_format(data, threshold_unique=0.5, verbose=False):
8301
8855
  """
8302
8856
  检测表格: long, wide or uncertain.
@@ -8396,7 +8950,9 @@ def get_df_format(data, threshold_unique=0.5, verbose=False):
8396
8950
  if cluster_labels.nunique() < len(numeric_cols) * 0.5:
8397
8951
  wide_score += 2
8398
8952
  if verbose:
8399
- print("Clustering on columns shows grouping, suggesting wide format.")
8953
+ print(
8954
+ "Clustering on columns shows grouping, suggesting wide format."
8955
+ )
8400
8956
  except Exception as e:
8401
8957
  print(e) if verbose else None
8402
8958
 
@@ -8487,7 +9043,8 @@ def get_df_format(data, threshold_unique=0.5, verbose=False):
8487
9043
  if verbose:
8488
9044
  print("Final decision: Uncertain format.")
8489
9045
  return "uncertain"
8490
-
9046
+
9047
+
8491
9048
  def plot_cluster(
8492
9049
  data: pd.DataFrame,
8493
9050
  labels: np.ndarray,
@@ -8735,6 +9292,8 @@ def evaluate_cluster(
8735
9292
  metrics["V-Measure"] = np.nan
8736
9293
 
8737
9294
  return metrics
9295
+
9296
+
8738
9297
  def df_qc(
8739
9298
  data: pd.DataFrame,
8740
9299
  columns=None,
@@ -8744,7 +9303,7 @@ def df_qc(
8744
9303
  hue=None,
8745
9304
  output=False,
8746
9305
  verbose=True,
8747
- dir_save=None
9306
+ dir_save=None,
8748
9307
  ):
8749
9308
  """
8750
9309
  Usage example:
@@ -8752,16 +9311,17 @@ def df_qc(
8752
9311
  """
8753
9312
  from statsmodels.stats.outliers_influence import variance_inflation_factor
8754
9313
  from scipy.stats import skew, kurtosis, entropy
8755
-
9314
+
8756
9315
  pd.options.display.max_seq_items = 10
8757
9316
  #! display(data.select_dtypes(include=[np.number]).describe())
8758
9317
  #!skim
8759
9318
  if columns is not None:
8760
- if isinstance(columns, (list,pd.core.indexes.base.Index)):
8761
- data=data[columns]
9319
+ if isinstance(columns, (list, pd.core.indexes.base.Index)):
9320
+ data = data[columns]
8762
9321
  if skim:
8763
9322
  try:
8764
- import skimpy
9323
+ import skimpy
9324
+
8765
9325
  skimpy.skim(data)
8766
9326
  except:
8767
9327
  numerical_data = data.select_dtypes(include=[np.number])
@@ -8775,13 +9335,19 @@ def df_qc(
8775
9335
 
8776
9336
  # Missing values
8777
9337
  res_qc["missing_values"] = data.isnull().sum()
8778
- res_qc["missing_percentage"] = round((res_qc["missing_values"] / len(data)) * 100,2)
9338
+ res_qc["missing_percentage"] = round(
9339
+ (res_qc["missing_values"] / len(data)) * 100, 2
9340
+ )
8779
9341
  res_qc["rows_with_missing"] = data.isnull().any(axis=1).sum()
8780
9342
 
8781
9343
  # Data types and unique values
8782
9344
  res_qc["data_types"] = data.dtypes
8783
- res_qc["unique_counts"] = data.select_dtypes(exclude=np.number).nunique().sort_values()
8784
- res_qc["unique_values"] = data.select_dtypes(exclude=np.number).apply(lambda x: x.unique())
9345
+ res_qc["unique_counts"] = (
9346
+ data.select_dtypes(exclude=np.number).nunique().sort_values()
9347
+ )
9348
+ res_qc["unique_values"] = data.select_dtypes(exclude=np.number).apply(
9349
+ lambda x: x.unique()
9350
+ )
8785
9351
  res_qc["constant_columns"] = [
8786
9352
  col for col in data.columns if data[col].nunique() <= 1
8787
9353
  ]
@@ -8797,8 +9363,8 @@ def df_qc(
8797
9363
  data_outliers = df_outlier(data)
8798
9364
  outlier_num = data_outliers.isna().sum() - data.isnull().sum()
8799
9365
  res_qc["outlier_num"] = outlier_num[outlier_num > 0]
8800
- outlier_percentage=round((outlier_num / len(data_outliers)) * 100,2)
8801
- res_qc["outlier_percentage"] = outlier_percentage[outlier_percentage>0]
9366
+ outlier_percentage = round((outlier_num / len(data_outliers)) * 100, 2)
9367
+ res_qc["outlier_percentage"] = outlier_percentage[outlier_percentage > 0]
8802
9368
  try:
8803
9369
  # Correlation and multicollinearity (VIF)
8804
9370
  if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
@@ -8816,16 +9382,16 @@ def df_qc(
8816
9382
  numeric_df = data.select_dtypes(include=[np.number]).dropna()
8817
9383
  if isinstance(numeric_df.columns, pd.MultiIndex):
8818
9384
  numeric_df.columns = [
8819
- "_".join(col).strip() if isinstance(col, tuple) else col for col in numeric_df.columns
9385
+ "_".join(col).strip() if isinstance(col, tuple) else col
9386
+ for col in numeric_df.columns
8820
9387
  ]
8821
9388
 
8822
-
8823
9389
  vif_data = pd.DataFrame()
8824
- res_qc["vif"]=vif_data
9390
+ res_qc["vif"] = vif_data
8825
9391
  if numeric_df.shape[1] > 1 and not numeric_df.empty:
8826
9392
  vif_data["feature"] = numeric_df.columns.tolist()
8827
9393
  vif_data["VIF"] = [
8828
- round(variance_inflation_factor(numeric_df.values, i),2)
9394
+ round(variance_inflation_factor(numeric_df.values, i), 2)
8829
9395
  for i in range(numeric_df.shape[1])
8830
9396
  ]
8831
9397
  res_qc["vif"] = vif_data[
@@ -8847,8 +9413,8 @@ def df_qc(
8847
9413
  }
8848
9414
 
8849
9415
  # dtypes counts
8850
- res_qc['dtype_counts']=data.dtypes.value_counts()
8851
-
9416
+ res_qc["dtype_counts"] = data.dtypes.value_counts()
9417
+
8852
9418
  # Distribution Analysis (mean, median, mode, std dev, IQR for numeric columns)
8853
9419
  distribution_stats = data.select_dtypes(include=[np.number]).describe().T
8854
9420
  iqr = data.select_dtypes(include=[np.number]).apply(
@@ -8880,7 +9446,6 @@ def df_qc(
8880
9446
  if len(unique_types) > 1:
8881
9447
  inconsistent_types[col] = unique_types
8882
9448
  res_qc["inconsistent_types"] = inconsistent_types
8883
-
8884
9449
 
8885
9450
  # Text length analysis for text fields
8886
9451
  text_lengths = {}
@@ -8892,7 +9457,9 @@ def df_qc(
8892
9457
  res_qc["text_length_analysis"] = text_lengths
8893
9458
 
8894
9459
  # Summary statistics
8895
- res_qc["summary_statistics"] = data.describe().T.style.background_gradient(cmap='coolwarm', axis=0)
9460
+ res_qc["summary_statistics"] = data.describe().T.style.background_gradient(
9461
+ cmap="coolwarm", axis=0
9462
+ )
8896
9463
 
8897
9464
  # Automated warnings
8898
9465
  warnings = []
@@ -8920,39 +9487,60 @@ def df_qc(
8920
9487
  display(res_qc["data_types"])
8921
9488
  if any(res_qc["missing_values"][res_qc["missing_values"] > 0]):
8922
9489
  print(" ⤵ Missing Values Counts:")
8923
- display(pd.DataFrame(
8924
- {
8925
- "missing_values": res_qc["missing_values"][res_qc["missing_values"] > 0],
8926
- "missing_percent(%)": res_qc["missing_percentage"][
8927
- res_qc["missing_percentage"] > 0
8928
- ],
8929
- }
8930
- ).style.background_gradient(cmap="coolwarm", axis=0)
8931
- )
9490
+ display(
9491
+ pd.DataFrame(
9492
+ {
9493
+ "missing_values": res_qc["missing_values"][
9494
+ res_qc["missing_values"] > 0
9495
+ ],
9496
+ "missing_percent(%)": res_qc["missing_percentage"][
9497
+ res_qc["missing_percentage"] > 0
9498
+ ],
9499
+ }
9500
+ ).style.background_gradient(cmap="coolwarm", axis=0)
9501
+ )
8932
9502
  # print(res_qc["missing_percentage"][res_qc["missing_percentage"] > 0])
8933
- print("\n⤵ Rows with Missing Values:",res_qc["rows_with_missing"])
9503
+ print("\n⤵ Rows with Missing Values:", res_qc["rows_with_missing"])
9504
+
9505
+ (
9506
+ print("\n⤵ Constant Columns:", res_qc["constant_columns"])
9507
+ if any(res_qc["constant_columns"])
9508
+ else None
9509
+ )
9510
+ (
9511
+ print("⤵ Duplicate Rows:", res_qc["duplicate_rows"])
9512
+ if res_qc["duplicate_rows"]
9513
+ else None
9514
+ )
9515
+ (
9516
+ print("⤵ Duplicate Columns:", res_qc["duplicate_columns"])
9517
+ if any(res_qc["duplicate_columns"])
9518
+ else None
9519
+ )
8934
9520
 
8935
- print("\n⤵ Constant Columns:", res_qc["constant_columns"]) if any(res_qc["constant_columns"]) else None
8936
- print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
8937
- print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
8938
-
8939
9521
  if any(res_qc["outlier_num"]):
8940
9522
  print("\n⤵ Outlier Report:")
8941
- display(pd.DataFrame(
8942
- {
8943
- "outlier_num": res_qc["outlier_num"][res_qc["outlier_num"] > 0],
8944
- "outlier_percentage(%)": res_qc["outlier_percentage"][
8945
- res_qc["outlier_percentage"] > 0
8946
- ],
8947
- }
8948
- ).style.background_gradient(cmap="coolwarm", axis=0)
8949
- )
9523
+ display(
9524
+ pd.DataFrame(
9525
+ {
9526
+ "outlier_num": res_qc["outlier_num"][res_qc["outlier_num"] > 0],
9527
+ "outlier_percentage(%)": res_qc["outlier_percentage"][
9528
+ res_qc["outlier_percentage"] > 0
9529
+ ],
9530
+ }
9531
+ ).style.background_gradient(cmap="coolwarm", axis=0)
9532
+ )
8950
9533
 
8951
9534
  if any(res_qc["unique_counts"]):
8952
9535
  print("\n⤵ Unique Values per Column:")
8953
- display(pd.DataFrame({"unique_counts":res_qc["unique_counts"],
8954
- "unique_values":res_qc["unique_values"]}).style.background_gradient(cmap="coolwarm", axis=0))
8955
-
9536
+ display(
9537
+ pd.DataFrame(
9538
+ {
9539
+ "unique_counts": res_qc["unique_counts"],
9540
+ "unique_values": res_qc["unique_values"],
9541
+ }
9542
+ ).style.background_gradient(cmap="coolwarm", axis=0)
9543
+ )
8956
9544
 
8957
9545
  if res_qc["empty_columns"]:
8958
9546
  print("\n⤵ Empty Columns:", res_qc["empty_columns"])
@@ -8971,7 +9559,7 @@ def df_qc(
8971
9559
  print(res_qc["high_cardinality_categoricals"])
8972
9560
  if any(res_qc["inconsistent_types"]):
8973
9561
  print("\n⤵ Inconsistent Data Types:")
8974
- display(res_qc["inconsistent_types"])
9562
+ display(res_qc["inconsistent_types"])
8975
9563
  if any(res_qc["text_length_analysis"]):
8976
9564
  print("\n⤵ Text Length Analysis:")
8977
9565
  for col, stats in res_qc["text_length_analysis"].items():
@@ -8986,67 +9574,93 @@ def df_qc(
8986
9574
 
8987
9575
  pd.reset_option("display.max_seq_items")
8988
9576
  if plot_:
8989
- df_qc_plots(data=data, res_qc=res_qc, max_cols=max_cols,hue=hue,dir_save=dir_save)
9577
+ df_qc_plots(
9578
+ data=data, res_qc=res_qc, max_cols=max_cols, hue=hue, dir_save=dir_save
9579
+ )
8990
9580
  if output or not plot_:
8991
9581
  return res_qc
8992
9582
  return None
8993
9583
 
8994
9584
 
8995
- def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,hue=None,dir_save=None):
9585
+ def df_qc_plots(
9586
+ data: pd.DataFrame,
9587
+ columns=None,
9588
+ res_qc: dict = None,
9589
+ max_cols=20,
9590
+ hue=None,
9591
+ dir_save=None,
9592
+ ):
8996
9593
  import matplotlib.pyplot as plt
8997
9594
  import seaborn as sns
8998
9595
  from .plot import subplot, figsets, get_color
8999
9596
  from datetime import datetime
9597
+
9000
9598
  now_ = datetime.now().strftime("%y%m%d_%H%M%S")
9001
-
9599
+
9002
9600
  if columns is not None:
9003
- if isinstance(columns, (list,pd.core.indexes.base.Index)):
9004
- data=data[columns]
9601
+ if isinstance(columns, (list, pd.core.indexes.base.Index)):
9602
+ data = data[columns]
9005
9603
  len_total = len(res_qc)
9006
9604
  n_row, n_col = int((len_total + 10)), 3
9007
- nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
9605
+ nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row], verbose=False)
9008
9606
 
9009
9607
  missing_data = res_qc["missing_values"][res_qc["missing_values"] > 0].sort_values(
9010
9608
  ascending=False
9011
9609
  )
9012
9610
  if len(missing_data) > max_cols:
9013
9611
  missing_data = missing_data[:max_cols]
9014
- ax_missing_data=sns.barplot(
9612
+ ax_missing_data = sns.barplot(
9015
9613
  y=missing_data.index,
9016
9614
  x=missing_data.values,
9017
9615
  hue=missing_data.index,
9018
9616
  palette=get_color(len(missing_data), cmap="coolwarm")[::-1],
9019
9617
  ax=nexttile(),
9020
9618
  )
9021
- figsets(title="Missing (#)", xlabel="#",ax=ax_missing_data,ylabel=None,fontsize=8 if len(missing_data)<=20 else 6)
9619
+ figsets(
9620
+ title="Missing (#)",
9621
+ xlabel="#",
9622
+ ax=ax_missing_data,
9623
+ ylabel=None,
9624
+ fontsize=8 if len(missing_data) <= 20 else 6,
9625
+ )
9022
9626
 
9023
9627
  outlier_num = res_qc["outlier_num"].sort_values(ascending=False)
9024
9628
  if len(outlier_num) > max_cols:
9025
9629
  outlier_num = outlier_num[:max_cols]
9026
- ax_outlier_num=sns.barplot(
9630
+ ax_outlier_num = sns.barplot(
9027
9631
  y=outlier_num.index,
9028
9632
  x=outlier_num.values,
9029
- hue=outlier_num.index,
9633
+ hue=outlier_num.index,
9030
9634
  palette=get_color(len(outlier_num), cmap="coolwarm")[::-1],
9031
9635
  ax=nexttile(),
9032
9636
  )
9033
- figsets(ax=ax_outlier_num,title="Outliers (#)", xlabel="#",ylabel=None,fontsize=8 if len(outlier_num)<=20 else 6)
9034
-
9637
+ figsets(
9638
+ ax=ax_outlier_num,
9639
+ title="Outliers (#)",
9640
+ xlabel="#",
9641
+ ylabel=None,
9642
+ fontsize=8 if len(outlier_num) <= 20 else 6,
9643
+ )
9644
+
9035
9645
  #!
9036
9646
  try:
9037
- for col in data.select_dtypes(include='category').columns:
9038
- sns.countplot(y=data[col],
9039
- palette=get_color(data.select_dtypes(include='category').shape[1], cmap="coolwarm")[::-1],
9040
- ax=nexttile())
9647
+ for col in data.select_dtypes(include="category").columns:
9648
+ sns.countplot(
9649
+ y=data[col],
9650
+ palette=get_color(
9651
+ data.select_dtypes(include="category").shape[1], cmap="coolwarm"
9652
+ )[::-1],
9653
+ ax=nexttile(),
9654
+ )
9041
9655
  figsets(title=f"Count Plot: {col}", xlabel="Count", ylabel=col)
9042
9656
  except Exception as e:
9043
- pass
9657
+ pass
9044
9658
 
9045
9659
  # Skewness and Kurtosis Plots
9046
9660
  skewness = res_qc["skewness"].sort_values(ascending=False)
9047
9661
  kurtosis = res_qc["kurtosis"].sort_values(ascending=False)
9048
9662
  if not skewness.empty:
9049
- ax_skewness=sns.barplot(
9663
+ ax_skewness = sns.barplot(
9050
9664
  y=skewness.index,
9051
9665
  x=skewness.values,
9052
9666
  hue=skewness.index,
@@ -9055,11 +9669,13 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9055
9669
  )
9056
9670
  figsets(
9057
9671
  title="Highly Skewed Numeric Columns (Skewness > 1)",
9058
- xlabel="Skewness",ylabel=None,ax=ax_skewness,
9059
- fontsize=8 if len(skewness)<=20 else 6
9672
+ xlabel="Skewness",
9673
+ ylabel=None,
9674
+ ax=ax_skewness,
9675
+ fontsize=8 if len(skewness) <= 20 else 6,
9060
9676
  )
9061
9677
  if not kurtosis.empty:
9062
- ax_kurtosis=sns.barplot(
9678
+ ax_kurtosis = sns.barplot(
9063
9679
  y=kurtosis.index,
9064
9680
  x=kurtosis.values,
9065
9681
  hue=kurtosis.index,
@@ -9068,59 +9684,68 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9068
9684
  )
9069
9685
  figsets(
9070
9686
  title="Highly Kurtotic Numeric Columns (Kurtosis > 3)",
9071
- xlabel="Kurtosis",ylabel=None,ax=ax_kurtosis,
9072
- fontsize=8 if len(kurtosis)<=20 else 6
9687
+ xlabel="Kurtosis",
9688
+ ylabel=None,
9689
+ ax=ax_kurtosis,
9690
+ fontsize=8 if len(kurtosis) <= 20 else 6,
9073
9691
  )
9074
9692
 
9075
9693
  # Entropy for Categorical Variables
9076
9694
  entropy_data = pd.Series(res_qc["entropy_categoricals"]).sort_values(
9077
9695
  ascending=False
9078
9696
  )
9079
- ax_entropy_data=sns.barplot(
9080
- y=entropy_data.index, x=entropy_data.values,hue=entropy_data.index,
9697
+ ax_entropy_data = sns.barplot(
9698
+ y=entropy_data.index,
9699
+ x=entropy_data.values,
9700
+ hue=entropy_data.index,
9081
9701
  palette=get_color(len(entropy_data), cmap="coolwarm")[::-1],
9082
- ax=nexttile()
9083
- )
9702
+ ax=nexttile(),
9703
+ )
9084
9704
  figsets(
9085
- ylabel="Categorical Columns",
9086
- title="Entropy of Categorical Variables",
9087
- xlabel="Entropy (bits)",
9088
- ax=ax_entropy_data,
9089
- fontsize=8 if len(entropy_data)<=20 else 6
9090
- )
9705
+ ylabel="Categorical Columns",
9706
+ title="Entropy of Categorical Variables",
9707
+ xlabel="Entropy (bits)",
9708
+ ax=ax_entropy_data,
9709
+ fontsize=8 if len(entropy_data) <= 20 else 6,
9710
+ )
9091
9711
 
9092
9712
  # unique counts
9093
- unique_counts=res_qc["unique_counts"].sort_values(ascending=False)
9094
- ax_unique_counts_=sns.barplot(
9095
- y=unique_counts.index,
9096
- x=unique_counts.values,
9097
- hue=unique_counts.index,
9098
- palette=get_color(len(unique_counts), cmap="coolwarm")[::-1],
9099
- ax=nexttile())
9713
+ unique_counts = res_qc["unique_counts"].sort_values(ascending=False)
9714
+ ax_unique_counts_ = sns.barplot(
9715
+ y=unique_counts.index,
9716
+ x=unique_counts.values,
9717
+ hue=unique_counts.index,
9718
+ palette=get_color(len(unique_counts), cmap="coolwarm")[::-1],
9719
+ ax=nexttile(),
9720
+ )
9100
9721
  figsets(
9101
- title="Unique Counts",
9102
- ylabel=None,
9103
- xlabel="#",
9104
- ax=ax_unique_counts_,
9105
- fontsize=8 if len(unique_counts)<=20 else 6
9106
- )
9722
+ title="Unique Counts",
9723
+ ylabel=None,
9724
+ xlabel="#",
9725
+ ax=ax_unique_counts_,
9726
+ fontsize=8 if len(unique_counts) <= 20 else 6,
9727
+ )
9107
9728
  # Binary Checking
9108
- ax_unique_counts=sns.barplot(y=unique_counts[unique_counts<8].index,
9109
- x=unique_counts[unique_counts<8].values,
9110
- hue=unique_counts[unique_counts<8].index,
9111
- palette=get_color(len(unique_counts[unique_counts<8].index), cmap="coolwarm")[::-1],
9112
- ax=nexttile())
9729
+ ax_unique_counts = sns.barplot(
9730
+ y=unique_counts[unique_counts < 8].index,
9731
+ x=unique_counts[unique_counts < 8].values,
9732
+ hue=unique_counts[unique_counts < 8].index,
9733
+ palette=get_color(len(unique_counts[unique_counts < 8].index), cmap="coolwarm")[
9734
+ ::-1
9735
+ ],
9736
+ ax=nexttile(),
9737
+ )
9113
9738
  plt.axvline(x=2, color="r", linestyle="--", lw=2)
9114
9739
  figsets(
9115
- ylabel=None,
9116
- title="Binary Checking",
9117
- xlabel="#",
9118
- ax=ax_unique_counts,
9119
- fontsize=8 if len(unique_counts[unique_counts<10].index)<=20 else 6
9120
- )
9740
+ ylabel=None,
9741
+ title="Binary Checking",
9742
+ xlabel="#",
9743
+ ax=ax_unique_counts,
9744
+ fontsize=8 if len(unique_counts[unique_counts < 10].index) <= 20 else 6,
9745
+ )
9121
9746
 
9122
9747
  # dtypes counts
9123
- dtype_counts = res_qc['dtype_counts']
9748
+ dtype_counts = res_qc["dtype_counts"]
9124
9749
  txt = []
9125
9750
  for tp in dtype_counts.index:
9126
9751
  txt.append(list(data.select_dtypes(include=tp).columns))
@@ -9131,9 +9756,9 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9131
9756
  color="#F3C8B2",
9132
9757
  ax=nexttile(),
9133
9758
  )
9134
- max_columns_per_row = 1 # Maximum number of columns per row
9759
+ max_columns_per_row = 1 # Maximum number of columns per row
9135
9760
  for i, tp in enumerate(dtype_counts.index):
9136
- if i<=20:
9761
+ if i <= 20:
9137
9762
  column_names = txt[i]
9138
9763
  # Split the column names into multiple lines if too long
9139
9764
  column_name_str = ", ".join(column_names)
@@ -9152,7 +9777,7 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9152
9777
  ha="center",
9153
9778
  va="top",
9154
9779
  c="k",
9155
- fontsize=8 if len(dtype_counts.index)<=20 else 6,
9780
+ fontsize=8 if len(dtype_counts.index) <= 20 else 6,
9156
9781
  rotation=0,
9157
9782
  )
9158
9783
  figsets(
@@ -9160,7 +9785,7 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9160
9785
  title="Dtypes",
9161
9786
  ylabel="#",
9162
9787
  ax=ax_dtype_counts,
9163
- fontsize=8 if len(dtype_counts.index)<=20 else 6,
9788
+ fontsize=8 if len(dtype_counts.index) <= 20 else 6,
9164
9789
  )
9165
9790
  # from .plot import pie
9166
9791
  # pie()
@@ -9175,57 +9800,66 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9175
9800
  )
9176
9801
 
9177
9802
  if high_cardinality:
9178
- ax_high_cardinality=sns.barplot(
9803
+ ax_high_cardinality = sns.barplot(
9179
9804
  y=list(high_cardinality.keys()),
9180
9805
  x=list(high_cardinality.values()),
9181
9806
  hue=list(high_cardinality.keys()),
9182
- palette=get_color(len(list(high_cardinality.keys())), cmap="coolwarm")[::-1],
9807
+ palette=get_color(len(list(high_cardinality.keys())), cmap="coolwarm")[
9808
+ ::-1
9809
+ ],
9183
9810
  ax=nexttile(),
9184
9811
  )
9185
9812
  figsets(
9186
9813
  title="High Cardinality Categorical Columns",
9187
9814
  xlabel="Unique Value Count",
9188
9815
  ax=ax_high_cardinality,
9189
- fontsize=8 if len(list(high_cardinality.keys()))<=20 else 6
9816
+ fontsize=8 if len(list(high_cardinality.keys())) <= 20 else 6,
9190
9817
  )
9191
9818
  if res_qc["low_variance_features"]:
9192
9819
  low_variance_data = data[res_qc["low_variance_features"]].copy()
9193
9820
  for col in low_variance_data.columns:
9194
- ax_low_variance_features=sns.histplot(
9821
+ ax_low_variance_features = sns.histplot(
9195
9822
  low_variance_data[col], bins=20, kde=True, color="coral", ax=nexttile()
9196
9823
  )
9197
- figsets(title=f"Low Variance Feature: {col}",ax=ax_low_variance_features,
9198
- fontsize=8 if len(low_variance_data[col])<=20 else 6)
9824
+ figsets(
9825
+ title=f"Low Variance Feature: {col}",
9826
+ ax=ax_low_variance_features,
9827
+ fontsize=8 if len(low_variance_data[col]) <= 20 else 6,
9828
+ )
9199
9829
 
9200
9830
  # VIF plot for multicollinearity detection
9201
9831
  if "vif" in res_qc and not res_qc["vif"].empty:
9202
9832
  vif_data = res_qc["vif"].sort_values(by="VIF", ascending=False)
9203
9833
  if len(vif_data) > max_cols:
9204
9834
  vif_data = vif_data[:max_cols]
9205
- ax_vif=sns.barplot(data=vif_data,
9206
- x="VIF",
9207
- y="feature",
9208
- hue="VIF",
9209
- palette=get_color(len(vif_data), cmap="coolwarm")[::-1],
9210
- ax=nexttile())
9835
+ ax_vif = sns.barplot(
9836
+ data=vif_data,
9837
+ x="VIF",
9838
+ y="feature",
9839
+ hue="VIF",
9840
+ palette=get_color(len(vif_data), cmap="coolwarm")[::-1],
9841
+ ax=nexttile(),
9842
+ )
9211
9843
  figsets(
9212
9844
  title="Variance Inflation Factor(VIF)",
9213
9845
  xlabel="VIF",
9214
9846
  ylabel="Features",
9215
9847
  legend=None,
9216
9848
  ax=ax_vif,
9217
- fontsize=8 if len(vif_data)<=20 else 6
9849
+ fontsize=8 if len(vif_data) <= 20 else 6,
9218
9850
  )
9219
9851
 
9220
9852
  # Correlation heatmap for numeric columns with high correlation pairs
9221
9853
  if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
9222
9854
  corr = data.select_dtypes(include=[np.number]).corr()
9223
- if corr.shape[1]<=33:
9855
+ if corr.shape[1] <= 33:
9224
9856
  mask = np.triu(np.ones_like(corr, dtype=bool))
9225
9857
  num_columns = corr.shape[1]
9226
- fontsize = max(6, min(12, 12 - (num_columns - 10) * 0.2)) # Scale between 8 and 12
9858
+ fontsize = max(
9859
+ 6, min(12, 12 - (num_columns - 10) * 0.2)
9860
+ ) # Scale between 8 and 12
9227
9861
 
9228
- ax_heatmap=sns.heatmap(
9862
+ ax_heatmap = sns.heatmap(
9229
9863
  corr,
9230
9864
  mask=mask,
9231
9865
  annot=True,
@@ -9233,24 +9867,21 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9233
9867
  center=0,
9234
9868
  fmt=".1f",
9235
9869
  linewidths=0.5,
9236
- vmin=-1, vmax=1,
9870
+ vmin=-1,
9871
+ vmax=1,
9237
9872
  ax=nexttile(2, 2),
9238
- cbar_kws=dict(shrink=0.2,ticks=np.arange(-1, 2, 1)),
9239
- annot_kws={"size": fontsize}
9240
- )
9241
-
9242
- figsets(
9243
- xangle=45,
9244
- title="Correlation Heatmap",
9245
- ax=ax_heatmap
9873
+ cbar_kws=dict(shrink=0.2, ticks=np.arange(-1, 2, 1)),
9874
+ annot_kws={"size": fontsize},
9246
9875
  )
9876
+
9877
+ figsets(xangle=45, title="Correlation Heatmap", ax=ax_heatmap)
9247
9878
  # # save figure
9248
9879
  # if dir_save:
9249
9880
  # figsave(dir_save,f"qc_plot_{now_}.pdf")
9250
9881
 
9251
9882
  if columns is not None:
9252
- if isinstance(columns, (list,pd.core.indexes.base.Index)):
9253
- data=data[columns]
9883
+ if isinstance(columns, (list, pd.core.indexes.base.Index)):
9884
+ data = data[columns]
9254
9885
 
9255
9886
  # len_total = len(res_qc)
9256
9887
  # n_row, n_col = int((len_total + 10) / 3), 3
@@ -9258,30 +9889,36 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
9258
9889
  #! check distribution
9259
9890
  data_num = data.select_dtypes(include=np.number)
9260
9891
  if len(data_num) > max_cols:
9261
- data_num = data_num.iloc[:,:max_cols]
9892
+ data_num = data_num.iloc[:, :max_cols]
9893
+
9894
+ data_num = df_scaler(data=data_num, method="standard")
9262
9895
 
9263
- data_num = df_scaler(data=data_num, method='standard')
9264
-
9265
9896
  import scipy.stats as stats
9897
+
9266
9898
  for column in data_num.columns:
9267
- #* Shapiro-Wilk test for normality
9899
+ # * Shapiro-Wilk test for normality
9268
9900
  stat, p_value = stats.shapiro(data_num[column])
9269
- normality = "norm" if p_value > 0.05 else "not_norm"
9270
- #* Plot histogram
9271
- ax_hist=sns.histplot(data_num[column], kde=True, ax=nexttile())
9901
+ normality = "norm" if p_value > 0.05 else "not_norm"
9902
+ # * Plot histogram
9903
+ ax_hist = sns.histplot(data_num[column], kde=True, ax=nexttile())
9272
9904
  x_min, x_max = ax_hist.get_xlim()
9273
9905
  y_min, y_max = ax_hist.get_ylim()
9274
- ax_hist.text(x_min+(x_max-x_min)*0.5, y_min+(y_max-y_min)*0.75,
9275
- f'p(Shapiro-Wilk)={p_value:.3f}\n{normality}',
9276
- ha='center', va='top')
9277
- figsets(title=column,ax=ax_hist)
9278
- ax_twin=ax_hist.twinx()
9279
- #* Q-Q plot
9906
+ ax_hist.text(
9907
+ x_min + (x_max - x_min) * 0.5,
9908
+ y_min + (y_max - y_min) * 0.75,
9909
+ f"p(Shapiro-Wilk)={p_value:.3f}\n{normality}",
9910
+ ha="center",
9911
+ va="top",
9912
+ )
9913
+ figsets(title=column, ax=ax_hist)
9914
+ ax_twin = ax_hist.twinx()
9915
+ # * Q-Q plot
9280
9916
  stats.probplot(data_num[column], dist="norm", plot=ax_twin)
9281
- figsets(ylabel=f'Q-Q Plot:{column}',title=None)
9917
+ figsets(ylabel=f"Q-Q Plot:{column}", title=None)
9282
9918
  # save figure
9283
9919
  if dir_save:
9284
- figsave(dir_save,f"qc_plot_{now_}.pdf")
9920
+ figsave(dir_save, f"qc_plot_{now_}.pdf")
9921
+
9285
9922
 
9286
9923
  def df_corr(df: pd.DataFrame, method="pearson"):
9287
9924
  """
@@ -9318,6 +9955,7 @@ def df_corr(df: pd.DataFrame, method="pearson"):
9318
9955
 
9319
9956
  return corr_matrix, pval_matrix
9320
9957
 
9958
+
9321
9959
  def use_pd(
9322
9960
  func_name="excel",
9323
9961
  verbose=True,
@@ -9338,7 +9976,8 @@ def use_pd(
9338
9976
  if verbose:
9339
9977
  print(e)
9340
9978
 
9341
- def get_phone(phone_number: str, region: str = None,verbose=True):
9979
+
9980
+ def get_phone(phone_number: str, region: str = None, verbose=True):
9342
9981
  """
9343
9982
  usage:
9344
9983
  info = get_phone(15237654321, "DE")
@@ -9426,21 +10065,23 @@ def get_phone(phone_number: str, region: str = None,verbose=True):
9426
10065
  dialing_instructions = f"Dial {formatted_national} within {country_name}. Dial {formatted_e164} from abroad."
9427
10066
 
9428
10067
  # Advanced Timezone Handling
9429
- gmt_offsets = pytz.timezone(time_zones).utcoffset(datetime.now()).total_seconds()/ 3600
10068
+ gmt_offsets = (
10069
+ pytz.timezone(time_zones).utcoffset(datetime.now()).total_seconds() / 3600
10070
+ )
9430
10071
  # Get the local timezone (current computer's time)
9431
10072
  local_timezone = get_localzone()
9432
- #local_timezone = pytz.timezone(pytz.country_timezones[region_code][0])
10073
+ # local_timezone = pytz.timezone(pytz.country_timezones[region_code][0])
9433
10074
  local_offset = local_timezone.utcoffset(datetime.now()).total_seconds() / 3600
9434
10075
  offset_diff = local_offset - gmt_offsets
9435
10076
  head_time = "earlier" if offset_diff < 0 else "later" if offset_diff > 0 else ""
9436
- res= {
10077
+ res = {
9437
10078
  "valid": True,
9438
10079
  "possible": possible,
9439
10080
  "formatted": {
9440
10081
  "international": formatted_international,
9441
10082
  "national": formatted_national,
9442
10083
  "e164": formatted_e164,
9443
- },
10084
+ },
9444
10085
  "country_code": country_code,
9445
10086
  "country_name": country_name,
9446
10087
  "region_code": region_code,
@@ -9448,13 +10089,13 @@ def get_phone(phone_number: str, region: str = None,verbose=True):
9448
10089
  "carrier": carrier_name,
9449
10090
  "time_zone": time_zones,
9450
10091
  "current_times": current_times,
9451
- "local_offset":f"{local_offset} utcoffset",
10092
+ "local_offset": f"{local_offset} utcoffset",
9452
10093
  "time_zone_diff": f"{head_time} {int(np.abs(offset_diff))} h",
9453
10094
  "number_type": number_type_str,
9454
10095
  "is_toll_free": is_toll_free,
9455
- "is_premium_rate": is_premium_rate,
10096
+ "is_premium_rate": is_premium_rate,
9456
10097
  "dialing_instructions": dialing_instructions,
9457
- "suggested_fix": None, # Use phonenumbers.example_number if invalid
10098
+ "suggested_fix": None, # Use phonenumbers.example_number if invalid
9458
10099
  "logs": {
9459
10100
  "number_analysis_completed": datetime.now().strftime(
9460
10101
  "%Y-%m-%d %H:%M:%S"
@@ -9465,7 +10106,7 @@ def get_phone(phone_number: str, region: str = None,verbose=True):
9465
10106
  }
9466
10107
 
9467
10108
  except phonenumbers.NumberParseException as e:
9468
- res= {"valid": False, "error": str(e)}
10109
+ res = {"valid": False, "error": str(e)}
9469
10110
  if verbose:
9470
10111
  preview(res)
9471
10112
  return res
@@ -9531,7 +10172,8 @@ def decode_pluscode(
9531
10172
 
9532
10173
  return latitude, longitude
9533
10174
 
9534
- def get_loc(input_data, user_agent="0413@mygmail.com)",verbose=True):
10175
+
10176
+ def get_loc(input_data, user_agent="0413@mygmail.com)", verbose=True):
9535
10177
  """
9536
10178
  Determine if the input is a city name, lat/lon, or DMS and perform geocoding or reverse geocoding.
9537
10179
  Usage:
@@ -9562,13 +10204,17 @@ def get_loc(input_data, user_agent="0413@mygmail.com)",verbose=True):
9562
10204
  # Case 1: Input is a city name (string)
9563
10205
  if isinstance(input_data, str) and not re.match(r"^\d+(\.\d+)?$", input_data):
9564
10206
  location = geolocator.geocode(input_data)
9565
- if verbose:
9566
- print(
9567
- f"Latitude and Longitude for {input_data}: {location.latitude}, {location.longitude}"
9568
- )
9569
- else:
9570
- print(f"Could not find {input_data}.")
9571
- return location
10207
+ try:
10208
+ if verbose:
10209
+ print(
10210
+ f"Latitude and Longitude for {input_data}: {location.latitude}, {location.longitude}"
10211
+ )
10212
+ else:
10213
+ print(f"Could not find {input_data}.")
10214
+ return location
10215
+ except Exception as e:
10216
+ print(f'Error: {e}')
10217
+ return
9572
10218
 
9573
10219
  # Case 2: Input is latitude and longitude (float or tuple)
9574
10220
  elif isinstance(input_data, (float, tuple)):
@@ -9607,7 +10253,8 @@ def get_loc(input_data, user_agent="0413@mygmail.com)",verbose=True):
9607
10253
  "Invalid input format. Please provide a city name, latitude/longitude, or DMS string."
9608
10254
  )
9609
10255
 
9610
- def enpass(code: str, method: str="AES", key: str = None):
10256
+
10257
+ def enpass(code: str, method: str = "AES", key: str = None):
9611
10258
  """
9612
10259
  usage: enpass("admin")
9613
10260
  Master encryption function that supports multiple methods: AES, RSA, and SHA256.
@@ -9617,6 +10264,7 @@ def enpass(code: str, method: str="AES", key: str = None):
9617
10264
  :return: The encrypted data or hashed value.
9618
10265
  """
9619
10266
  import hashlib
10267
+
9620
10268
  # AES Encryption (Advanced)
9621
10269
  def aes_encrypt(data: str, key: str):
9622
10270
  """
@@ -9630,9 +10278,10 @@ def enpass(code: str, method: str="AES", key: str = None):
9630
10278
  from cryptography.hazmat.primitives import padding
9631
10279
  import base64
9632
10280
  import os
10281
+
9633
10282
  # Generate a 256-bit key from the provided password
9634
10283
  key = hashlib.sha256(key.encode()).digest()
9635
-
10284
+
9636
10285
  # Generate a random initialization vector (IV)
9637
10286
  iv = os.urandom(16) # 16 bytes for AES block size
9638
10287
 
@@ -9659,10 +10308,12 @@ def enpass(code: str, method: str="AES", key: str = None):
9659
10308
  import base64
9660
10309
  from Crypto.PublicKey import RSA
9661
10310
  from Crypto.Cipher import PKCS1_OAEP
10311
+
9662
10312
  public_key_obj = RSA.import_key(public_key)
9663
10313
  cipher_rsa = PKCS1_OAEP.new(public_key_obj)
9664
10314
  encrypted_data = cipher_rsa.encrypt(data.encode())
9665
10315
  return base64.b64encode(encrypted_data).decode()
10316
+
9666
10317
  # SHA256 Hashing (Non-reversible)
9667
10318
  def sha256_hash(data: str):
9668
10319
  """
@@ -9671,9 +10322,10 @@ def enpass(code: str, method: str="AES", key: str = None):
9671
10322
  :return: The hashed value (hex string).
9672
10323
  """
9673
10324
  return hashlib.sha256(data.encode()).hexdigest()
10325
+
9674
10326
  if key is None:
9675
- key="worldpeace"
9676
- method=strcmp(method,["AES","RSA",'SHA256'])[0]
10327
+ key = "worldpeace"
10328
+ method = strcmp(method, ["AES", "RSA", "SHA256"])[0]
9677
10329
  if method == "AES":
9678
10330
  return aes_encrypt(code, key)
9679
10331
  elif method == "RSA":
@@ -9685,7 +10337,7 @@ def enpass(code: str, method: str="AES", key: str = None):
9685
10337
 
9686
10338
 
9687
10339
  # Master Decryption Function (Supports AES, RSA)
9688
- def depass(encrypted_code: str, method: str='AES', key: str = None):
10340
+ def depass(encrypted_code: str, method: str = "AES", key: str = None):
9689
10341
  """
9690
10342
  Master decryption function that supports multiple methods: AES and RSA.
9691
10343
  :param encrypted_code: The encrypted data to decrypt.
@@ -9694,6 +10346,7 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9694
10346
  :return: The decrypted data.
9695
10347
  """
9696
10348
  import hashlib
10349
+
9697
10350
  def aes_decrypt(encrypted_data: str, key: str):
9698
10351
  """
9699
10352
  Decrypts data encrypted using AES in CBC mode.
@@ -9705,12 +10358,13 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9705
10358
  from cryptography.hazmat.backends import default_backend
9706
10359
  from cryptography.hazmat.primitives import padding
9707
10360
  import base64
10361
+
9708
10362
  # Generate the same 256-bit key from the password
9709
10363
  key = hashlib.sha256(key.encode()).digest()
9710
-
10364
+
9711
10365
  # Decode the encrypted data from base64
9712
10366
  encrypted_data = base64.b64decode(encrypted_data)
9713
-
10367
+
9714
10368
  # Extract the IV and the actual encrypted data
9715
10369
  iv = encrypted_data[:16] # First 16 bytes are the IV
9716
10370
  encrypted_data = encrypted_data[16:] # Remaining data is the encrypted message
@@ -9724,7 +10378,8 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9724
10378
  unpadder = padding.PKCS7(128).unpadder()
9725
10379
  unpadded_data = unpadder.update(decrypted_data) + unpadder.finalize()
9726
10380
 
9727
- return unpadded_data.decode()
10381
+ return unpadded_data.decode()
10382
+
9728
10383
  def rsa_decrypt(encrypted_data: str, private_key: str):
9729
10384
  """
9730
10385
  Decrypts RSA-encrypted data using the private key.
@@ -9735,6 +10390,7 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9735
10390
  from Crypto.PublicKey import RSA
9736
10391
  from Crypto.Cipher import PKCS1_OAEP
9737
10392
  import base64
10393
+
9738
10394
  encrypted_data = base64.b64decode(encrypted_data)
9739
10395
  private_key_obj = RSA.import_key(private_key)
9740
10396
  cipher_rsa = PKCS1_OAEP.new(private_key_obj)
@@ -9742,8 +10398,8 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9742
10398
  return decrypted_data.decode()
9743
10399
 
9744
10400
  if key is None:
9745
- key="worldpeace"
9746
- method=strcmp(method,["AES","RSA",'SHA256'])[0]
10401
+ key = "worldpeace"
10402
+ method = strcmp(method, ["AES", "RSA", "SHA256"])[0]
9747
10403
  if method == "AES":
9748
10404
  return aes_decrypt(encrypted_code, key)
9749
10405
  elif method == "RSA":
@@ -9752,3 +10408,311 @@ def depass(encrypted_code: str, method: str='AES', key: str = None):
9752
10408
  raise ValueError("SHA256 is a hash function and cannot be decrypted.")
9753
10409
  else:
9754
10410
  raise ValueError("Unsupported decryption method")
10411
+
10412
+ def get_clip(dir_save=None):
10413
+ """
10414
+ Master function to extract content from the clipboard (text, URL, or image).
10415
+
10416
+ Parameters:
10417
+ dir_save (str, optional): If an image is found, save it to this path.
10418
+
10419
+ Returns:
10420
+ dict: A dictionary with extracted content:
10421
+ {
10422
+ "type": "text" | "url" | "image" | "none",
10423
+ "content": <str|Image|None>,
10424
+ "saved_to": <str|None> # Path if an image is saved
10425
+ }
10426
+ """
10427
+ result = {"type": "none", "content": None, "saved_to": None}
10428
+
10429
+ try:
10430
+ import pyperclip
10431
+ from PIL import ImageGrab, Image
10432
+ import validators
10433
+ # 1. Check for text in the clipboard
10434
+ clipboard_content = pyperclip.paste()
10435
+ if clipboard_content:
10436
+ if validators.url(clipboard_content.strip()):
10437
+ result["type"] = "url"
10438
+ result["content"] = clipboard_content.strip()
10439
+
10440
+ else:
10441
+ result["type"] = "text"
10442
+ result["content"] = clipboard_content.strip()
10443
+ return clipboard_content.strip()
10444
+
10445
+ # 2. Check for image in the clipboard
10446
+ image = ImageGrab.grabclipboard()
10447
+ if isinstance(image, Image.Image):
10448
+ result["type"] = "image"
10449
+ result["content"] = image
10450
+ if dir_save:
10451
+ image.save(dir_save)
10452
+ result["saved_to"] = dir_save
10453
+ print(f"Image saved to {dir_save}.")
10454
+ else:
10455
+ print("Image detected in clipboard but not saved.")
10456
+ return image
10457
+ print("No valid text, URL, or image found in clipboard.")
10458
+ return result
10459
+
10460
+ except Exception as e:
10461
+ print(f"An error occurred: {e}")
10462
+ return result
10463
+
10464
+ def keyboard(*args, action='press', n_click=1,interval=0,verbose=False,**kwargs):
10465
+ """
10466
+ Simulates keyboard input using pyautogui.
10467
+
10468
+ Parameters:
10469
+ input_key (str): The key to simulate. Check the list of supported keys with verbose=True.
10470
+ action (str): The action to perform. Options are 'press', 'keyDown', or 'keyUp'.
10471
+ n_click (int): Number of times to press the key (only for 'press' action).
10472
+ interval (float): Time interval between key presses for 'press' action.
10473
+ verbose (bool): Print detailed output, including supported keys and debug info.
10474
+ kwargs: Additional arguments (reserved for future extensions).
10475
+
10476
+ keyboard("command", "d", action="shorcut")
10477
+ """
10478
+ import pyautogui
10479
+ input_key = args
10480
+
10481
+ actions = ['press','keyDown','keyUp', 'hold','release', 'hotkey','shortcut']
10482
+ action = strcmp(action,actions)[0]
10483
+ keyboard_keys_=['\t', '\n', '\r', ' ', '!', '"', '#', '$', '%', '&', "'", '(',
10484
+ ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7',
10485
+ '8', '9', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', '`',
10486
+ 'a', 'b', 'c', 'd', 'e','f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
10487
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~',
10488
+ 'accept', 'add', 'alt', 'altleft', 'altright', 'apps', 'backspace',
10489
+ 'browserback', 'browserfavorites', 'browserforward', 'browserhome',
10490
+ 'browserrefresh', 'browsersearch', 'browserstop', 'capslock', 'clear',
10491
+ 'convert', 'ctrl', 'ctrlleft', 'ctrlright', 'decimal', 'del', 'delete',
10492
+ 'divide', 'down', 'end', 'enter', 'esc', 'escape', 'execute', 'f1', 'f10',
10493
+ 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 'f2', 'f20',
10494
+ 'f21', 'f22', 'f23', 'f24', 'f3', 'f4', 'f5', 'f6', 'f7', 'f8', 'f9',
10495
+ 'final', 'fn', 'hanguel', 'hangul', 'hanja', 'help', 'home', 'insert', 'junja',
10496
+ 'kana', 'kanji', 'launchapp1', 'launchapp2', 'launchmail',
10497
+ 'launchmediaselect', 'left', 'modechange', 'multiply', 'nexttrack',
10498
+ 'nonconvert', 'num0', 'num1', 'num2', 'num3', 'num4', 'num5', 'num6',
10499
+ 'num7', 'num8', 'num9', 'numlock', 'pagedown', 'pageup', 'pause', 'pgdn',
10500
+ 'pgup', 'playpause', 'prevtrack', 'print', 'printscreen', 'prntscrn',
10501
+ 'prtsc', 'prtscr', 'return', 'right', 'scrolllock', 'select', 'separator',
10502
+ 'shift', 'shiftleft', 'shiftright', 'sleep', 'space', 'stop', 'subtract', 'tab',
10503
+ 'up', 'volumedown', 'volumemute', 'volumeup', 'win', 'winleft', 'winright', 'yen',
10504
+ 'command', 'option', 'optionleft', 'optionright']
10505
+ if verbose:
10506
+ print(f"supported keys: {keyboard_keys_}")
10507
+
10508
+ if action not in ['hotkey','shortcut']:
10509
+ if not isinstance(input_key, list):
10510
+ input_key=list(input_key)
10511
+ input_key = [strcmp(i, keyboard_keys_)[0] for i in input_key ]
10512
+
10513
+ # correct action
10514
+ cmd_keys = ['command', 'option', 'optionleft', 'optionright','win', 'winleft', 'winright','ctrl', 'ctrlleft', 'ctrlright']
10515
+ try:
10516
+ if any([i in cmd_keys for i in input_key]):
10517
+ action='hotkey'
10518
+ except:
10519
+ pass
10520
+
10521
+ print(f"\n{action}: {input_key}")
10522
+ # keyboard
10523
+ if action in ["press"]:
10524
+ # pyautogui.press(input_key, presses=n_click,interval=interval)
10525
+ for _ in range(n_click):
10526
+ for key in input_key:
10527
+ pyautogui.press(key)
10528
+ pyautogui.sleep(interval)
10529
+ elif action in ['keyDown','hold']:
10530
+ # pyautogui.keyDown(input_key)
10531
+ for _ in range(n_click):
10532
+ for key in input_key:
10533
+ pyautogui.keyDown(key)
10534
+ pyautogui.sleep(interval)
10535
+
10536
+ elif action in ['keyUp','release']:
10537
+ # pyautogui.keyUp(input_key)
10538
+ for _ in range(n_click):
10539
+ for key in input_key:
10540
+ pyautogui.keyUp(key)
10541
+ pyautogui.sleep(interval)
10542
+
10543
+ elif action in ['hotkey','shortcut']:
10544
+ pyautogui.hotkey(input_key)
10545
+
10546
+ def mouse(
10547
+ *args, # loc
10548
+ action: str = "move",
10549
+ duration: float = 0.5,
10550
+ loc_type: str = "absolute", # 'absolute', 'relative'
10551
+ region: tuple = None, # (tuple, optional): A region (x, y, width, height) to search for the image.
10552
+ image_path: str = None,
10553
+ wait:float = 0,
10554
+ text: str = None,
10555
+ confidence: float = 0.8,
10556
+ button: str = "left",
10557
+ n_click: int = 1, # number of clicks
10558
+ interval: float = 0.25, # time between clicks
10559
+ scroll_amount: int = -500,
10560
+ fail_safe: bool = True,
10561
+ grayscale: bool = False,
10562
+ **kwargs,
10563
+ ):
10564
+ """
10565
+ Master function to handle pyautogui actions.
10566
+
10567
+ Parameters:
10568
+ action (str): The action to perform ('click', 'double_click', 'type', 'drag', 'scroll', 'move', 'locate', etc.).
10569
+ image_path (str, optional): Path to the image for 'locate' or 'click' actions.
10570
+ text (str, optional): Text to type for 'type' action.
10571
+ confidence (float, optional): Confidence level for image recognition (default 0.8).
10572
+ duration (float, optional): Duration for smooth movements in seconds (default 0.5).
10573
+ region (tuple, optional): A region (x, y, width, height) to search for the image.
10574
+ button (str, optional): Mouse button to use ('left', 'right', 'middle').
10575
+ n_click (int, optional): Number of times to click for 'click' actions.
10576
+ interval (float, optional): Interval between clicks for 'click' actions.
10577
+ offset (tuple, optional): Horizontal offset from the located image. y_offset (int, optional): Vertical offset from the located image.
10578
+ scroll_amount (int, optional): Amount to scroll (positive for up, negative for down).
10579
+ fail_safe (bool, optional): Enable/disable pyautogui's fail-safe feature.
10580
+ grayscale (bool, optional): Search for the image in grayscale mode.
10581
+
10582
+ Returns:
10583
+ tuple or None: Returns coordinates for 'locate' actions, otherwise None.
10584
+ """
10585
+ import pyautogui
10586
+ import time
10587
+
10588
+ pyautogui.FAILSAFE = fail_safe # Enable/disable fail-safe
10589
+ loc_type = "absolute" if "abs" in loc_type else "relative"
10590
+ if len(args) == 1:
10591
+ if isinstance(args[0], str):
10592
+ image_path = args[0]
10593
+ x_offset, y_offset = None, None
10594
+ else:
10595
+ x_offset, y_offset = args
10596
+
10597
+ elif len(args) == 2:
10598
+ x_offset, y_offset = args
10599
+ elif len(args) == 3:
10600
+ x_offset, y_offset, action = args
10601
+ elif len(args) == 4:
10602
+ x_offset, y_offset, action, duration = args
10603
+ else:
10604
+ x_offset, y_offset = None, None
10605
+
10606
+ what_action = [
10607
+ "locate",
10608
+ "click",
10609
+ "double_click",
10610
+ "triple_click",
10611
+ "input",
10612
+ "write",
10613
+ "type",
10614
+ "drag",
10615
+ "move",
10616
+ "scroll",
10617
+ "down",
10618
+ "up",
10619
+ "hold",
10620
+ "press",
10621
+ "release"
10622
+ ]
10623
+ action = strcmp(action, what_action)[0]
10624
+ # get the locations
10625
+ location = None
10626
+ if any([x_offset is None, y_offset is None]):
10627
+ if region is None:
10628
+ w,h=pyautogui.size()
10629
+ region=(0,0,w,h)
10630
+ print(region)
10631
+ try:
10632
+ print(image_path)
10633
+ location = pyautogui.locateOnScreen(
10634
+ image_path, confidence=confidence, region=region, grayscale=grayscale
10635
+ )
10636
+ print(pyautogui.center(location))
10637
+ except Exception as e:
10638
+ location = None
10639
+
10640
+ # try:
10641
+ if location:
10642
+ x, y = pyautogui.center(location)
10643
+ x += x_offset if x_offset else 0
10644
+ y += y_offset if y_offset else 0
10645
+ x_offset, y_offset = x,y
10646
+ print(action)
10647
+ if action in ['locate']:
10648
+ x, y = pyautogui.position()
10649
+ elif action in ["click", "double_click","triple_click"]:
10650
+ # if location:
10651
+ # x, y = pyautogui.center(location)
10652
+ # x += x_offset
10653
+ # y += y_offset
10654
+ # pyautogui.moveTo(x, y, duration=duration)
10655
+ # if action == "click":
10656
+ # pyautogui.click(x=x, y=y, clicks=n_click, interval=interval, button=button)
10657
+ # elif action == "double_click":
10658
+ # pyautogui.doubleClick(x=x, y=y, interval=interval, button=button)
10659
+ # elif action=='triple_click':
10660
+ # pyautogui.tripleClick(x=x,y=y,interval=interval, button=button)
10661
+ # else:
10662
+ if action == "click":
10663
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10664
+ time.sleep(wait)
10665
+ pyautogui.click(x=x_offset, y=y_offset, clicks=n_click, interval=interval, button=button)
10666
+ elif action == "double_click":
10667
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10668
+ time.sleep(wait)
10669
+ pyautogui.doubleClick(x=x_offset, y=y_offset, interval=interval, button=button)
10670
+ elif action=='triple_click':
10671
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10672
+ time.sleep(wait)
10673
+ pyautogui.tripleClick(x=x_offset, y=y_offset, interval=interval, button=button)
10674
+
10675
+ elif action in ["type", "write", "input"]:
10676
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10677
+ time.sleep(wait)
10678
+ if text is not None:
10679
+ pyautogui.typewrite(text, interval=interval)
10680
+ else:
10681
+ raise ValueError("Text must be provided for the 'type' action.")
10682
+
10683
+ elif action == "drag":
10684
+ if loc_type == "absolute":
10685
+ pyautogui.dragTo(x_offset, y_offset, duration=duration, button=button)
10686
+ else:
10687
+ pyautogui.dragRel(x_offset, y_offset, duration=duration, button=button)
10688
+
10689
+ elif action in ["move"]:
10690
+ if loc_type == "absolute":
10691
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10692
+ else:
10693
+ pyautogui.moveRel(x_offset, y_offset, duration=duration)
10694
+
10695
+ elif action == "scroll":
10696
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10697
+ time.sleep(wait)
10698
+ pyautogui.scroll(scroll_amount)
10699
+
10700
+ elif action in ["down",'hold','press']:
10701
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10702
+ time.sleep(wait)
10703
+ pyautogui.mouseDown(x_offset, y_offset, button=button, duration=duration)
10704
+
10705
+ elif action in ['up','release']:
10706
+ pyautogui.moveTo(x_offset, y_offset, duration=duration)
10707
+ time.sleep(wait)
10708
+ pyautogui.mouseUp(x_offset, y_offset, button=button, duration=duration)
10709
+
10710
+ else:
10711
+ raise ValueError(f"Unsupported action: {action}")
10712
+
10713
+ # except pyautogui.ImageNotFoundException:
10714
+ # print(
10715
+ # "Image not found. Ensure the image is visible and parameters are correct."
10716
+ # )
10717
+ # except Exception as e:
10718
+ # print(f"An error occurred: {e}")