py2ls 0.2.4.28__py3-none-any.whl → 0.2.4.30__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/tiles.csv +146 -0
- py2ls/ips.py +964 -117
- py2ls/ml2ls.py +30 -23
- py2ls/netfinder.py +59 -9
- py2ls/plot.py +127 -9
- {py2ls-0.2.4.28.dist-info → py2ls-0.2.4.30.dist-info}/METADATA +7 -4
- {py2ls-0.2.4.28.dist-info → py2ls-0.2.4.30.dist-info}/RECORD +14 -12
- {py2ls-0.2.4.28.dist-info → py2ls-0.2.4.30.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -779,11 +779,150 @@ def strcmp(
|
|
779
779
|
print(f"建议: {best_match}")
|
780
780
|
return candidates[best_match_index], best_match_index
|
781
781
|
|
782
|
+
def imgcmp(img: list, method='knn', plot_=True, figsize=[12, 6]):
|
783
|
+
"""
|
784
|
+
Compare two images using SSIM, Feature Matching (SIFT), or KNN Matching.
|
785
|
+
|
786
|
+
Parameters:
|
787
|
+
- img (list): List containing two image file paths [img1, img2].
|
788
|
+
- method (str): Comparison method ('ssim', 'match', or 'knn').
|
789
|
+
- plot_ (bool): Whether to display the results visually.
|
790
|
+
- figsize (list): Size of the figure for plots.
|
791
|
+
|
792
|
+
Returns:
|
793
|
+
- For 'ssim': (diff, score): SSIM difference map and similarity score.
|
794
|
+
- For 'match' or 'knn': (good_matches, len(good_matches), similarity_score): Matches and similarity score.
|
795
|
+
"""
|
796
|
+
import cv2
|
797
|
+
import matplotlib.pyplot as plt
|
798
|
+
from skimage.metrics import structural_similarity as ssim
|
799
|
+
# Load images
|
800
|
+
image1 = cv2.imread(img[0])
|
801
|
+
image2 = cv2.imread(img[1])
|
802
|
+
|
803
|
+
if image1 is None or image2 is None:
|
804
|
+
raise ValueError("Could not load one or both images. Check file paths.")
|
805
|
+
methods=['ssim','match','knn']
|
806
|
+
method=strcmp(method, methods)[0]
|
807
|
+
if method == 'ssim':
|
808
|
+
# Convert images to grayscale
|
809
|
+
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
|
810
|
+
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
|
811
|
+
|
812
|
+
# Compute SSIM
|
813
|
+
score, diff = ssim(gray1, gray2, full=True)
|
814
|
+
print(f"SSIM Score: {score:.4f}")
|
815
|
+
|
816
|
+
# Convert diff to 8-bit for visualization
|
817
|
+
diff = (diff * 255).astype("uint8")
|
818
|
+
|
819
|
+
# Plot if needed
|
820
|
+
if plot_:
|
821
|
+
fig, ax = plt.subplots(1, 3, figsize=figsize)
|
822
|
+
ax[0].imshow(gray1, cmap='gray')
|
823
|
+
ax[0].set_title("Image 1")
|
824
|
+
ax[1].imshow(gray2, cmap='gray')
|
825
|
+
ax[1].set_title("Image 2")
|
826
|
+
ax[2].imshow(diff, cmap='gray')
|
827
|
+
ax[2].set_title("Difference (SSIM)")
|
828
|
+
plt.tight_layout()
|
829
|
+
plt.show()
|
830
|
+
|
831
|
+
return diff, score
|
832
|
+
|
833
|
+
elif method in ['match', 'knn']:
|
834
|
+
# Convert images to grayscale
|
835
|
+
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
|
836
|
+
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
|
837
|
+
|
838
|
+
# Initialize SIFT detector
|
839
|
+
sift = cv2.SIFT_create()
|
840
|
+
|
841
|
+
# Detect and compute features
|
842
|
+
keypoints1, descriptors1 = sift.detectAndCompute(gray1, None)
|
843
|
+
keypoints2, descriptors2 = sift.detectAndCompute(gray2, None)
|
844
|
+
|
845
|
+
if len(keypoints1) == 0 or len(keypoints2) == 0:
|
846
|
+
raise ValueError("No keypoints found in one or both images.")
|
847
|
+
|
848
|
+
# BFMatcher initialization
|
849
|
+
bf = cv2.BFMatcher()
|
850
|
+
|
851
|
+
if method == 'match': # Cross-check matching
|
852
|
+
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
|
853
|
+
matches = bf.match(descriptors1, descriptors2)
|
854
|
+
matches = sorted(matches, key=lambda x: x.distance)
|
855
|
+
|
856
|
+
# Filter good matches
|
857
|
+
good_matches = [m for m in matches if m.distance < 0.75 * matches[-1].distance]
|
858
|
+
|
859
|
+
elif method == 'knn': # KNN matching with ratio test
|
860
|
+
matches = bf.knnMatch(descriptors1, descriptors2, k=2)
|
861
|
+
# Apply Lowe's ratio test
|
862
|
+
good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]
|
782
863
|
|
783
|
-
#
|
784
|
-
|
785
|
-
|
786
|
-
|
864
|
+
# Calculate similarity score
|
865
|
+
similarity_score = len(good_matches) / min(len(keypoints1), len(keypoints2))
|
866
|
+
print(f"Number of good matches: {len(good_matches)}")
|
867
|
+
print(f"Similarity Score: {similarity_score:.4f}")
|
868
|
+
# Handle case where no good matches are found
|
869
|
+
if len(good_matches) == 0:
|
870
|
+
print("No good matches found.")
|
871
|
+
return good_matches, 0.0, None
|
872
|
+
|
873
|
+
# Identify matched keypoints
|
874
|
+
src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
875
|
+
dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
876
|
+
|
877
|
+
# Calculate Homography using RANSAC
|
878
|
+
homography_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
|
879
|
+
|
880
|
+
# Apply the homography to image2
|
881
|
+
h, w = image1.shape[:2]
|
882
|
+
warped_image2 = cv2.warpPerspective(image2, homography_matrix, (w, h))
|
883
|
+
|
884
|
+
# Plot result if needed
|
885
|
+
if plot_:
|
886
|
+
fig, ax = plt.subplots(1, 2, figsize=figsize)
|
887
|
+
ax[0].imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
|
888
|
+
ax[0].set_title("Image 1")
|
889
|
+
ax[1].imshow(cv2.cvtColor(warped_image2, cv2.COLOR_BGR2RGB))
|
890
|
+
ax[1].set_title("Warped Image 2")
|
891
|
+
plt.tight_layout()
|
892
|
+
plt.show()
|
893
|
+
|
894
|
+
# Plot matches if needed
|
895
|
+
if plot_:
|
896
|
+
result = cv2.drawMatches(image1, keypoints1, image2, keypoints2, good_matches, None, flags=2)
|
897
|
+
plt.figure(figsize=figsize)
|
898
|
+
plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
|
899
|
+
plt.title(f"Feature Matches ({len(good_matches)} matches, Score: {similarity_score:.4f})")
|
900
|
+
plt.axis('off')
|
901
|
+
plt.show()
|
902
|
+
# Identify unmatched keypoints
|
903
|
+
matched_idx1 = [m.queryIdx for m in good_matches]
|
904
|
+
matched_idx2 = [m.trainIdx for m in good_matches]
|
905
|
+
|
906
|
+
unmatched_kp1 = [kp for i, kp in enumerate(keypoints1) if i not in matched_idx1]
|
907
|
+
unmatched_kp2 = [kp for i, kp in enumerate(keypoints2) if i not in matched_idx2]
|
908
|
+
|
909
|
+
# Mark unmatched keypoints on the images
|
910
|
+
img1_marked = cv2.drawKeypoints(image1, unmatched_kp1, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
|
911
|
+
img2_marked = cv2.drawKeypoints(image2, unmatched_kp2, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
|
912
|
+
|
913
|
+
# Display results
|
914
|
+
if plot_:
|
915
|
+
fig, ax = plt.subplots(1, 2, figsize=figsize)
|
916
|
+
ax[0].imshow(cv2.cvtColor(img1_marked, cv2.COLOR_BGR2RGB))
|
917
|
+
ax[0].set_title("Unmatched Keypoints (Image 1)")
|
918
|
+
ax[1].imshow(cv2.cvtColor(img2_marked, cv2.COLOR_BGR2RGB))
|
919
|
+
ax[1].set_title("Unmatched Keypoints (Image 2)")
|
920
|
+
plt.tight_layout()
|
921
|
+
plt.show()
|
922
|
+
return good_matches, similarity_score, homography_matrix
|
923
|
+
|
924
|
+
else:
|
925
|
+
raise ValueError("Invalid method. Use 'ssim', 'match', or 'knn'.")
|
787
926
|
|
788
927
|
|
789
928
|
def cn2pinyin(
|
@@ -892,6 +1031,143 @@ def dict2df(dict_, fill=None):
|
|
892
1031
|
dict_[key] = value
|
893
1032
|
return pd.DataFrame.from_dict(dict_)
|
894
1033
|
|
1034
|
+
def text2audio(
|
1035
|
+
text,
|
1036
|
+
method=None, # "pyttsx3","gTTS"
|
1037
|
+
rate=200,
|
1038
|
+
slow=False,#"gTTS"
|
1039
|
+
volume=1.0,
|
1040
|
+
voice=None,
|
1041
|
+
lang=None,
|
1042
|
+
gender=None,
|
1043
|
+
age=None,
|
1044
|
+
dir_save=None,
|
1045
|
+
):
|
1046
|
+
"""
|
1047
|
+
# sample_text = "Hello! This is a test of the pyttsx3 text-to-speech system."
|
1048
|
+
# sample_text = "这个是中文, 测试"
|
1049
|
+
# sample_text = "Hallo, ich bin echo, Wie Heissen Sie"
|
1050
|
+
|
1051
|
+
# text2audio(
|
1052
|
+
# text=sample_text,
|
1053
|
+
# rate=150,
|
1054
|
+
# volume=0.9,
|
1055
|
+
# # voice=None, # Replace with a voice name or ID available on your system
|
1056
|
+
# )
|
1057
|
+
"""
|
1058
|
+
if method is not None:
|
1059
|
+
methods=["gTTS","pyttsx3","google"]
|
1060
|
+
method=strcmp(method, methods)[0]
|
1061
|
+
else:
|
1062
|
+
try:
|
1063
|
+
text2audio(text,method='google',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
|
1064
|
+
except Exception as e:
|
1065
|
+
print(e)
|
1066
|
+
text2audio(text,method='pyttsx3',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
|
1067
|
+
|
1068
|
+
if method=="pyttsx3":
|
1069
|
+
import pyttsx3
|
1070
|
+
|
1071
|
+
try:
|
1072
|
+
engine = pyttsx3.init()
|
1073
|
+
engine.setProperty("rate", rate)
|
1074
|
+
if 0.0 <= volume <= 1.0:
|
1075
|
+
engine.setProperty("volume", volume)
|
1076
|
+
else:
|
1077
|
+
raise ValueError("Volume must be between 0.0 and 1.0")
|
1078
|
+
|
1079
|
+
if gender is not None:
|
1080
|
+
gender = strcmp(gender, ["male", "female"])[0]
|
1081
|
+
if age is not None:
|
1082
|
+
if isinstance(age, (float, int)):
|
1083
|
+
if age <= 10:
|
1084
|
+
age = "child"
|
1085
|
+
elif 10 < age < 18:
|
1086
|
+
age = "senior"
|
1087
|
+
else:
|
1088
|
+
age = "adult"
|
1089
|
+
elif isinstance(age, str):
|
1090
|
+
age = strcmp(age, ["child", "adult", "senior"])[0]
|
1091
|
+
else:
|
1092
|
+
raise ValueError("age: should be in ['child', 'adult', 'senior']")
|
1093
|
+
voices = engine.getProperty("voices")
|
1094
|
+
if voice is None:
|
1095
|
+
if lang is None:
|
1096
|
+
voice = strcmp(detect_lang(text), [v.name for v in voices])[0]
|
1097
|
+
else:
|
1098
|
+
if run_once_within():
|
1099
|
+
print([v.name for v in voices])
|
1100
|
+
print(f"lang:{lang}")
|
1101
|
+
voice = strcmp(lang, [v.name for v in voices])[0]
|
1102
|
+
selected_voice = None
|
1103
|
+
|
1104
|
+
for v in voices:
|
1105
|
+
# Check if the voice matches the specified gender or age
|
1106
|
+
if voice and (voice.lower() in v.name.lower() or voice in v.id):
|
1107
|
+
selected_voice = v
|
1108
|
+
break
|
1109
|
+
if gender and gender.lower() in v.name.lower():
|
1110
|
+
selected_voice = v
|
1111
|
+
if age and age.lower() in v.name.lower():
|
1112
|
+
selected_voice = v
|
1113
|
+
|
1114
|
+
if selected_voice:
|
1115
|
+
engine.setProperty("voice", selected_voice.id)
|
1116
|
+
else:
|
1117
|
+
if voice or gender or age:
|
1118
|
+
raise ValueError(
|
1119
|
+
f"No matching voice found for specified criteria. Available voices: {[v.name for v in voices]}"
|
1120
|
+
)
|
1121
|
+
# Generate audio
|
1122
|
+
if dir_save:
|
1123
|
+
engine.save_to_file(text, dir_save)
|
1124
|
+
print(f"Audio saved to {dir_save}")
|
1125
|
+
else:
|
1126
|
+
engine.say(text)
|
1127
|
+
|
1128
|
+
engine.runAndWait()
|
1129
|
+
except Exception as e:
|
1130
|
+
print(f"An error occurred: {e}")
|
1131
|
+
# # Explicitly terminate the pyttsx3 engine to release resources
|
1132
|
+
try:
|
1133
|
+
engine.stop()
|
1134
|
+
except RuntimeError:
|
1135
|
+
pass
|
1136
|
+
# Safely exit the script if running interactively to avoid kernel restarts
|
1137
|
+
try:
|
1138
|
+
import sys
|
1139
|
+
|
1140
|
+
sys.exit()
|
1141
|
+
except SystemExit:
|
1142
|
+
pass
|
1143
|
+
elif method.lower() in ['google','gtts']:
|
1144
|
+
from gtts import gTTS
|
1145
|
+
try:
|
1146
|
+
if lang is None:
|
1147
|
+
from langdetect import detect
|
1148
|
+
lang = detect(text)
|
1149
|
+
# Initialize gTTS with the provided parameters
|
1150
|
+
tts = gTTS(text=text, lang=lang, slow=slow)
|
1151
|
+
except Exception as e:
|
1152
|
+
print(f"An error occurred: {e}")
|
1153
|
+
|
1154
|
+
print("not realtime reading...")
|
1155
|
+
if dir_save:
|
1156
|
+
if "." not in dir_save:
|
1157
|
+
dir_save=dir_save+".mp3"
|
1158
|
+
tts.save(dir_save)
|
1159
|
+
print(f"Audio saved to {dir_save}")
|
1160
|
+
else:
|
1161
|
+
dir_save = "temp_audio.mp3"
|
1162
|
+
if "." not in dir_save:
|
1163
|
+
dir_save=dir_save+".mp3"
|
1164
|
+
tts.save(dir_save)
|
1165
|
+
try:
|
1166
|
+
fopen(dir_save)
|
1167
|
+
except Exception as e:
|
1168
|
+
print(f"Error opening file: {e}")
|
1169
|
+
print("done")
|
1170
|
+
|
895
1171
|
def str2time(time_str, fmt="24"):
|
896
1172
|
"""
|
897
1173
|
Convert a time string into the specified format.
|
@@ -2094,7 +2370,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2094
2370
|
False if chunksize else True
|
2095
2371
|
) # when chunksize, recommend low_memory=False # default:
|
2096
2372
|
verbose = kwargs.pop("verbose", False)
|
2097
|
-
if run_once_within(reverse=True):
|
2373
|
+
if run_once_within(reverse=True) and verbose:
|
2098
2374
|
use_pd("read_csv", verbose=verbose)
|
2099
2375
|
|
2100
2376
|
if comment is None:# default: None
|
@@ -2212,7 +2488,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2212
2488
|
if chunksize:
|
2213
2489
|
df = _get_chunks(df)
|
2214
2490
|
print(df.shape)
|
2215
|
-
if not is_df_abnormal(df, verbose=0): # normal
|
2491
|
+
if not is_df_abnormal(df, verbose=0) and verbose: # normal
|
2216
2492
|
display(df.head(2))
|
2217
2493
|
print(f"shape: {df.shape}")
|
2218
2494
|
return df
|
@@ -2245,26 +2521,28 @@ def fload(fpath, kind=None, **kwargs):
|
|
2245
2521
|
df = _get_chunks(df)
|
2246
2522
|
print(df.shape)
|
2247
2523
|
if not is_df_abnormal(df, verbose=0):
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
|
2254
|
-
|
2255
|
-
|
2256
|
-
|
2257
|
-
|
2524
|
+
if verbose:
|
2525
|
+
(
|
2526
|
+
display(df.head(2))
|
2527
|
+
if isinstance(df, pd.DataFrame)
|
2528
|
+
else display("it is not a DataFrame")
|
2529
|
+
)
|
2530
|
+
(
|
2531
|
+
print(f"shape: {df.shape}")
|
2532
|
+
if isinstance(df, pd.DataFrame)
|
2533
|
+
else display("it is not a DataFrame")
|
2534
|
+
)
|
2258
2535
|
return df
|
2259
2536
|
except EmptyDataError as e:
|
2260
2537
|
continue
|
2261
2538
|
else:
|
2262
2539
|
pass
|
2263
|
-
print(kwargs)
|
2540
|
+
# print(kwargs)
|
2264
2541
|
# if is_df_abnormal(df,verbose=verbose):
|
2265
2542
|
# df=pd.read_csv(fpath,**kwargs)
|
2266
|
-
|
2267
|
-
|
2543
|
+
if verbose:
|
2544
|
+
display(df.head(2))
|
2545
|
+
print(f"shape: {df.shape}")
|
2268
2546
|
return df
|
2269
2547
|
|
2270
2548
|
def load_excel(fpath, **kwargs):
|
@@ -2300,7 +2578,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2300
2578
|
engine = kwargs.get("engine", "pyarrow")
|
2301
2579
|
verbose = kwargs.pop("verbose", False)
|
2302
2580
|
|
2303
|
-
if run_once_within(reverse=True):
|
2581
|
+
if run_once_within(reverse=True) and verbose:
|
2304
2582
|
use_pd("read_parquet", verbose=verbose)
|
2305
2583
|
try:
|
2306
2584
|
df = pd.read_parquet(fpath, engine=engine, **kwargs)
|
@@ -2385,6 +2663,16 @@ def fload(fpath, kind=None, **kwargs):
|
|
2385
2663
|
doc = Document(fpath)
|
2386
2664
|
content = [para.text for para in doc.paragraphs]
|
2387
2665
|
return content
|
2666
|
+
|
2667
|
+
def load_rtf(file_path):
|
2668
|
+
from striprtf.striprtf import rtf_to_text
|
2669
|
+
try:
|
2670
|
+
with open(file_path, "r") as file:
|
2671
|
+
rtf_content = file.read()
|
2672
|
+
text = rtf_to_text(rtf_content)
|
2673
|
+
return text
|
2674
|
+
except Exception as e:
|
2675
|
+
print(f"Error loading RTF file: {e}")
|
2388
2676
|
|
2389
2677
|
if kind is None:
|
2390
2678
|
_, kind = os.path.splitext(fpath)
|
@@ -2427,6 +2715,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2427
2715
|
"xml",
|
2428
2716
|
"ipynb",
|
2429
2717
|
"mtx",
|
2718
|
+
"rtf"
|
2430
2719
|
]
|
2431
2720
|
zip_types = [
|
2432
2721
|
"gz",
|
@@ -2446,22 +2735,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2446
2735
|
if kind not in supported_types:
|
2447
2736
|
print(
|
2448
2737
|
f'Warning:\n"{kind}" is not in the supported list '
|
2449
|
-
) # {supported_types}')
|
2450
|
-
# if os.path.splitext(fpath)[1][1:].lower() in zip_types:
|
2451
|
-
# keep=kwargs.get("keep", False)
|
2452
|
-
# ifile=kwargs.get("ifile",(0,0))
|
2453
|
-
# kwargs.pop("keep",None)
|
2454
|
-
# kwargs.pop("ifile",None)
|
2455
|
-
# fpath_unzip=unzip(fpath)
|
2456
|
-
# if isinstance(fpath_unzip,list):
|
2457
|
-
# fpath_unzip=fpath_unzip[ifile[0]]
|
2458
|
-
# if os.path.isdir(fpath_unzip):
|
2459
|
-
# fpath_selected=listdir(fpath_unzip,kind=kind).fpath[ifile[1]]
|
2460
|
-
# fpath_unzip=fpath_selected
|
2461
|
-
# content_unzip=fload(fpath_unzip, **kwargs)
|
2462
|
-
# if not keep:
|
2463
|
-
# os.remove(fpath_unzip)
|
2464
|
-
# return content_unzip
|
2738
|
+
) # {supported_types}')
|
2465
2739
|
|
2466
2740
|
if kind == "docx":
|
2467
2741
|
return load_docx(fpath)
|
@@ -2477,37 +2751,45 @@ def fload(fpath, kind=None, **kwargs):
|
|
2477
2751
|
return load_xml(fpath)
|
2478
2752
|
elif kind in ["csv", "tsv"]:
|
2479
2753
|
# verbose = kwargs.pop("verbose", False)
|
2480
|
-
if run_once_within(reverse=True):
|
2481
|
-
|
2754
|
+
# if run_once_within(reverse=True) and verbose:
|
2755
|
+
# use_pd("read_csv")
|
2482
2756
|
content = load_csv(fpath, **kwargs)
|
2483
2757
|
return content
|
2484
2758
|
elif kind == "pkl":
|
2485
2759
|
verbose = kwargs.pop("verbose", False)
|
2486
|
-
if run_once_within(reverse=True):
|
2760
|
+
if run_once_within(reverse=True) and verbose:
|
2487
2761
|
use_pd("read_pickle")
|
2488
|
-
|
2762
|
+
try:
|
2763
|
+
res_=pd.read_pickle(fpath, **kwargs)
|
2764
|
+
except Exception as e:
|
2765
|
+
import pickle
|
2766
|
+
with open('sgd_classifier.pkl', 'rb') as f:
|
2767
|
+
res_ = pickle.load(f)
|
2768
|
+
return res_
|
2489
2769
|
elif kind in ["ods", "ods", "odt"]:
|
2490
2770
|
engine = kwargs.get("engine", "odf")
|
2491
2771
|
kwargs.pop("engine", None)
|
2492
2772
|
return load_excel(fpath, engine=engine, **kwargs)
|
2493
2773
|
elif kind == "xls":
|
2774
|
+
verbose = kwargs.pop("verbose", False)
|
2494
2775
|
engine = kwargs.get("engine", "xlrd")
|
2495
2776
|
kwargs.pop("engine", None)
|
2496
2777
|
content = load_excel(fpath, engine=engine, **kwargs)
|
2497
|
-
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
|
2778
|
+
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) and verbose else None
|
2498
2779
|
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2499
2780
|
return content
|
2500
2781
|
elif kind == "xlsx":
|
2782
|
+
verbose = kwargs.pop("verbose", False)
|
2501
2783
|
content = load_excel(fpath, **kwargs)
|
2502
|
-
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2784
|
+
display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
|
2503
2785
|
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
|
2504
2786
|
return content
|
2505
2787
|
elif kind == "mtx":
|
2506
2788
|
from scipy.io import mmread
|
2507
|
-
|
2789
|
+
verbose = kwargs.pop("verbose", False)
|
2508
2790
|
dat_mtx = mmread(fpath)
|
2509
2791
|
content = pd.DataFrame.sparse.from_spmatrix(dat_mtx, **kwargs)
|
2510
|
-
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2792
|
+
display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
|
2511
2793
|
print(f"shape: {content.shape}")
|
2512
2794
|
return content
|
2513
2795
|
elif kind == "ipynb":
|
@@ -2578,6 +2860,8 @@ def fload(fpath, kind=None, **kwargs):
|
|
2578
2860
|
|
2579
2861
|
elif kind == "mplstyle":
|
2580
2862
|
return read_mplstyle(fpath)
|
2863
|
+
elif kind == "rtf":
|
2864
|
+
return load_rtf(fpath)
|
2581
2865
|
|
2582
2866
|
else:
|
2583
2867
|
print("direct reading...")
|
@@ -2616,6 +2900,38 @@ def fload(fpath, kind=None, **kwargs):
|
|
2616
2900
|
# docx_content = fload('sample.docx')
|
2617
2901
|
|
2618
2902
|
|
2903
|
+
def fopen(fpath):
|
2904
|
+
import os
|
2905
|
+
import platform
|
2906
|
+
import sys
|
2907
|
+
try:
|
2908
|
+
# Check if the file exists
|
2909
|
+
if not os.path.isfile(fpath):
|
2910
|
+
print(f"Error: The file does not exist - {fpath}")
|
2911
|
+
return
|
2912
|
+
|
2913
|
+
# Get the system platform
|
2914
|
+
system = platform.system()
|
2915
|
+
|
2916
|
+
# Platform-specific file opening commands
|
2917
|
+
if system == "Darwin": # macOS
|
2918
|
+
os.system(f"open \"{fpath}\"")
|
2919
|
+
elif system == "Windows": # Windows
|
2920
|
+
# Ensure the path is handled correctly in Windows, escape spaces
|
2921
|
+
os.system(f"start \"\" \"{fpath}\"")
|
2922
|
+
elif system == "Linux": # Linux
|
2923
|
+
os.system(f"xdg-open \"{fpath}\"")
|
2924
|
+
elif system == "Java": # Java (or other unhandled systems)
|
2925
|
+
print(f"Opening {fpath} on unsupported system.")
|
2926
|
+
else:
|
2927
|
+
print(f"Unsupported OS: {system}")
|
2928
|
+
|
2929
|
+
print(f"Successfully opened {fpath} with the default application.")
|
2930
|
+
except Exception as e:
|
2931
|
+
print(f"Error opening file {fpath}: {e}")
|
2932
|
+
|
2933
|
+
|
2934
|
+
|
2619
2935
|
def fupdate(fpath, content=None, how="head"):
|
2620
2936
|
"""
|
2621
2937
|
Update a file by adding new content at the top and moving the old content to the bottom.
|
@@ -3025,13 +3341,18 @@ def fsave(
|
|
3025
3341
|
content.to_pickle(fpath, **kwargs)
|
3026
3342
|
else:
|
3027
3343
|
try:
|
3028
|
-
print("trying to convert it as a DataFrame...")
|
3029
3344
|
content = pd.DataFrame(content)
|
3030
3345
|
content.to_pickle(fpath, **kwargs)
|
3031
3346
|
except Exception as e:
|
3032
|
-
|
3033
|
-
|
3034
|
-
|
3347
|
+
try:
|
3348
|
+
import pickle
|
3349
|
+
with open(fpath, 'wb') as f:
|
3350
|
+
pickle.dump(content, f)
|
3351
|
+
print('done!', fpath)
|
3352
|
+
except Exception as e:
|
3353
|
+
raise ValueError(
|
3354
|
+
f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
|
3355
|
+
)
|
3035
3356
|
elif kind.lower() in ["fea", "feather", "ft", "fe", "feat", "fether"]:
|
3036
3357
|
# Feather: The Feather format, based on Apache Arrow, is designed for fast I/O operations. It's
|
3037
3358
|
# optimized for data analytics tasks and is especially fast when working with Pandas.
|
@@ -3187,16 +3508,22 @@ def isa(content, kind):
|
|
3187
3508
|
"""
|
3188
3509
|
if "img" in kind.lower() or "image" in kind.lower():
|
3189
3510
|
return is_image(content)
|
3511
|
+
elif 'vid' in kind.lower():
|
3512
|
+
return is_video(content)
|
3513
|
+
elif 'aud' in kind.lower():
|
3514
|
+
return is_audio(content)
|
3190
3515
|
elif "doc" in kind.lower():
|
3191
3516
|
return is_document(content)
|
3192
3517
|
elif "zip" in kind.lower():
|
3193
3518
|
return is_zip(content)
|
3194
3519
|
elif "dir" in kind.lower() or ("f" in kind.lower() and "d" in kind.lower()):
|
3195
3520
|
return os.path.isdir(content)
|
3521
|
+
elif "code" in kind.lower(): # file
|
3522
|
+
return is_code(content)
|
3196
3523
|
elif "fi" in kind.lower(): # file
|
3197
3524
|
return os.path.isfile(content)
|
3198
3525
|
elif "num" in kind.lower(): # file
|
3199
|
-
return
|
3526
|
+
return isnum(content)
|
3200
3527
|
elif "text" in kind.lower() or "txt" in kind.lower(): # file
|
3201
3528
|
return is_text(content)
|
3202
3529
|
elif "color" in kind.lower(): # file
|
@@ -3607,7 +3934,7 @@ def get_os(full=False, verbose=False):
|
|
3607
3934
|
"usage (%)": usage.percent,
|
3608
3935
|
}
|
3609
3936
|
except PermissionError:
|
3610
|
-
system_info["
|
3937
|
+
system_info["disk"][partition.device] = "Permission Denied"
|
3611
3938
|
|
3612
3939
|
# Network Information
|
3613
3940
|
if_addrs = psutil.net_if_addrs()
|
@@ -3667,11 +3994,33 @@ def listdir(
|
|
3667
3994
|
ascending=True,
|
3668
3995
|
contains=None,# filter filenames using re
|
3669
3996
|
booster=False,# walk in subfolders
|
3997
|
+
depth = 0, # 0: no subfolders; None: all subfolders; [int 1,2,3]: levels of subfolders
|
3670
3998
|
hidden=False, # Include hidden files/folders
|
3671
3999
|
orient="list",
|
3672
4000
|
output="df", # "df", 'list','dict','records','index','series'
|
3673
4001
|
verbose=True,
|
3674
|
-
):
|
4002
|
+
):
|
4003
|
+
def is_hidden(filepath):
|
4004
|
+
"""Check if a file or folder is hidden."""
|
4005
|
+
system = platform.system()
|
4006
|
+
if system == "Windows":
|
4007
|
+
import ctypes
|
4008
|
+
attribute = ctypes.windll.kernel32.GetFileAttributesW(filepath)
|
4009
|
+
if attribute == -1:
|
4010
|
+
raise FileNotFoundError(f"File {filepath} not found.")
|
4011
|
+
return bool(attribute & 2) # FILE_ATTRIBUTE_HIDDEN
|
4012
|
+
else: # macOS/Linux: Hidden if the name starts with a dot
|
4013
|
+
return os.path.basename(filepath).startswith(".")
|
4014
|
+
|
4015
|
+
def get_user():
|
4016
|
+
"""Retrieve the username of the current user."""
|
4017
|
+
system = platform.system()
|
4018
|
+
if system == "Windows":
|
4019
|
+
return os.environ.get("USERNAME", "Unknown")
|
4020
|
+
else:
|
4021
|
+
import pwd
|
4022
|
+
return pwd.getpwuid(os.getuid()).pw_name
|
4023
|
+
|
3675
4024
|
if isinstance(kind, list):
|
3676
4025
|
f_ = []
|
3677
4026
|
for kind_ in kind:
|
@@ -3681,7 +4030,7 @@ def listdir(
|
|
3681
4030
|
sort_by=sort_by,
|
3682
4031
|
ascending=ascending,
|
3683
4032
|
contains=contains,
|
3684
|
-
|
4033
|
+
depth=depth,# walk in subfolders
|
3685
4034
|
hidden=hidden,
|
3686
4035
|
orient=orient,
|
3687
4036
|
output=output,
|
@@ -3710,12 +4059,24 @@ def listdir(
|
|
3710
4059
|
"rootdir":[],
|
3711
4060
|
"fname": [],
|
3712
4061
|
"fpath": [],
|
4062
|
+
"num":[],
|
4063
|
+
"os":[]
|
3713
4064
|
}
|
4065
|
+
root_depth = rootdir.rstrip(os.sep).count(os.sep)
|
3714
4066
|
for dirpath, dirnames, ls in os.walk(rootdir):
|
4067
|
+
current_depth = dirpath.rstrip(os.sep).count(os.sep) - root_depth
|
4068
|
+
# Check depth limit
|
4069
|
+
if depth is not None and current_depth > depth:
|
4070
|
+
dirnames[:] = [] # Prevent further traversal into subfolders
|
4071
|
+
continue
|
4072
|
+
|
3715
4073
|
if not hidden:
|
3716
|
-
dirnames[:] = [d for d in dirnames if not
|
3717
|
-
ls = [i for i in ls if not
|
3718
|
-
|
4074
|
+
dirnames[:] = [d for d in dirnames if not is_hidden(os.path.join(dirpath, d))]
|
4075
|
+
ls = [i for i in ls if not is_hidden(os.path.join(dirpath, i))]
|
4076
|
+
|
4077
|
+
for dirname in dirnames:
|
4078
|
+
if kind is not None and kind not in fd: # do not check folders
|
4079
|
+
continue
|
3719
4080
|
if contains and not re.search(contains, dirname):
|
3720
4081
|
continue
|
3721
4082
|
dirname_path = os.path.join(dirpath, dirname)
|
@@ -3734,21 +4095,23 @@ def listdir(
|
|
3734
4095
|
f['basename'].append(os.path.basename(dirname_path))
|
3735
4096
|
f["path"].append(os.path.join(os.path.dirname(dirname_path), dirname))
|
3736
4097
|
f["created_time"].append(
|
3737
|
-
pd.to_datetime(os.path.getctime(dirname_path), unit="s")
|
4098
|
+
pd.to_datetime(int(os.path.getctime(dirname_path)), unit="s")
|
3738
4099
|
)
|
3739
4100
|
f["modified_time"].append(
|
3740
|
-
pd.to_datetime(os.path.getmtime(dirname_path), unit="s")
|
4101
|
+
pd.to_datetime(int(os.path.getmtime(dirname_path)), unit="s")
|
3741
4102
|
)
|
3742
4103
|
f["last_open_time"].append(
|
3743
|
-
pd.to_datetime(os.path.getatime(dirname_path), unit="s")
|
4104
|
+
pd.to_datetime(int(os.path.getatime(dirname_path)), unit="s")
|
3744
4105
|
)
|
3745
4106
|
f["permission"].append(stat.filemode(stats_file.st_mode)),
|
3746
|
-
f["owner"].append(
|
4107
|
+
f["owner"].append(get_user()),
|
3747
4108
|
f["rootdir"].append(dirpath)
|
3748
4109
|
f["fname"].append(filename) # will be removed
|
3749
4110
|
f["fpath"].append(fpath) # will be removed
|
3750
4111
|
i += 1
|
3751
|
-
for item in ls:
|
4112
|
+
for item in ls:
|
4113
|
+
if kind in fd:# only check folders
|
4114
|
+
continue
|
3752
4115
|
if contains and not re.search(contains, item):
|
3753
4116
|
continue
|
3754
4117
|
item_path = os.path.join(dirpath, item)
|
@@ -3760,13 +4123,11 @@ def listdir(
|
|
3760
4123
|
continue
|
3761
4124
|
filename, file_extension = os.path.splitext(item)
|
3762
4125
|
if kind is not None:
|
3763
|
-
if not kind.startswith("."):
|
3764
|
-
kind = "." + kind
|
3765
4126
|
is_folder = kind.lower() in fd and os.path.isdir(item_path)
|
3766
4127
|
is_file = kind.lower() in file_extension.lower() and (
|
3767
4128
|
os.path.isfile(item_path)
|
3768
4129
|
)
|
3769
|
-
if kind in [".doc", ".img", ".zip"]: # 选择大的类别
|
4130
|
+
if kind in [".doc", ".img", ".zip",".code",".file",".image",".video",".audio"]: # 选择大的类别
|
3770
4131
|
if kind != ".folder" and not isa(item_path, kind):
|
3771
4132
|
continue
|
3772
4133
|
elif kind in [".all"]:
|
@@ -3780,15 +4141,15 @@ def listdir(
|
|
3780
4141
|
f["length"].append(len(filename))
|
3781
4142
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
3782
4143
|
f['basename'].append(os.path.basename(item_path))
|
3783
|
-
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
4144
|
+
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
3784
4145
|
f["created_time"].append(
|
3785
|
-
pd.to_datetime(os.path.getctime(item_path), unit="s")
|
4146
|
+
pd.to_datetime(int(os.path.getctime(item_path)), unit="s")
|
3786
4147
|
)
|
3787
4148
|
f["modified_time"].append(
|
3788
|
-
pd.to_datetime(os.path.getmtime(item_path), unit="s")
|
4149
|
+
pd.to_datetime(int(os.path.getmtime(item_path)), unit="s")
|
3789
4150
|
)
|
3790
4151
|
f["last_open_time"].append(
|
3791
|
-
pd.to_datetime(os.path.getatime(item_path), unit="s")
|
4152
|
+
pd.to_datetime(int(os.path.getatime(item_path)), unit="s")
|
3792
4153
|
)
|
3793
4154
|
f["permission"].append(stat.filemode(stats_file.st_mode)),
|
3794
4155
|
f["owner"].append(os.getlogin() if platform.system() != "Windows" else "N/A"),
|
@@ -3799,13 +4160,13 @@ def listdir(
|
|
3799
4160
|
|
3800
4161
|
f["num"] = i
|
3801
4162
|
f["os"] = get_os() # os.uname().machine
|
3802
|
-
if not booster: # go deeper subfolders
|
3803
|
-
|
4163
|
+
# if not booster: # go deeper subfolders
|
4164
|
+
# break
|
3804
4165
|
#* convert to pd.DataFrame
|
3805
4166
|
f = pd.DataFrame(f)
|
3806
4167
|
f=f[["basename","name","kind","length","size","num","path","created_time",
|
3807
4168
|
"modified_time","last_open_time","rootdir",
|
3808
|
-
"
|
4169
|
+
"permission","owner","os","fname","fpath",]]
|
3809
4170
|
if "nam" in sort_by.lower():
|
3810
4171
|
f = sort_kind(f, by="name", ascending=ascending)
|
3811
4172
|
elif "crea" in sort_by.lower():
|
@@ -4173,39 +4534,233 @@ def is_num(s):
|
|
4173
4534
|
def isnum(s):
|
4174
4535
|
return is_num(s)
|
4175
4536
|
|
4176
|
-
|
4177
4537
|
def is_image(fpath):
|
4538
|
+
"""
|
4539
|
+
Determine if a given file is an image based on MIME type and file extension.
|
4540
|
+
|
4541
|
+
Args:
|
4542
|
+
fpath (str): Path to the file.
|
4543
|
+
|
4544
|
+
Returns:
|
4545
|
+
bool: True if the file is a recognized image, False otherwise.
|
4546
|
+
"""
|
4178
4547
|
import mimetypes
|
4548
|
+
# Known image MIME types
|
4549
|
+
image_mime_types = {
|
4550
|
+
"image/jpeg",
|
4551
|
+
"image/png",
|
4552
|
+
"image/gif",
|
4553
|
+
"image/bmp",
|
4554
|
+
"image/webp",
|
4555
|
+
"image/tiff",
|
4556
|
+
"image/x-icon",
|
4557
|
+
"image/svg+xml",
|
4558
|
+
"image/heic",
|
4559
|
+
"image/heif",
|
4560
|
+
}
|
4179
4561
|
|
4562
|
+
# Known image file extensions
|
4563
|
+
image_extensions = {
|
4564
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tif", ".tiff",
|
4565
|
+
".ico", ".svg", ".heic", ".heif",".fig",".jpg"
|
4566
|
+
}
|
4567
|
+
|
4568
|
+
# Get MIME type using mimetypes
|
4180
4569
|
mime_type, _ = mimetypes.guess_type(fpath)
|
4181
|
-
|
4570
|
+
|
4571
|
+
# Check MIME type
|
4572
|
+
if mime_type in image_mime_types:
|
4573
|
+
return True
|
4574
|
+
|
4575
|
+
# Fallback: Check file extension
|
4576
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4577
|
+
if ext in image_extensions:
|
4578
|
+
return True
|
4579
|
+
|
4580
|
+
return False
|
4581
|
+
|
4582
|
+
def is_video(fpath):
|
4583
|
+
"""
|
4584
|
+
Determine if a given file is a video based on MIME type and file extension.
|
4585
|
+
|
4586
|
+
Args:
|
4587
|
+
fpath (str): Path to the file.
|
4588
|
+
|
4589
|
+
Returns:
|
4590
|
+
bool: True if the file is a recognized video, False otherwise.
|
4591
|
+
"""
|
4592
|
+
import mimetypes
|
4593
|
+
# Known video MIME types
|
4594
|
+
video_mime_types = {
|
4595
|
+
"video/mp4",
|
4596
|
+
"video/quicktime",
|
4597
|
+
"video/x-msvideo",
|
4598
|
+
"video/x-matroska",
|
4599
|
+
"video/x-flv",
|
4600
|
+
"video/webm",
|
4601
|
+
"video/ogg",
|
4602
|
+
"video/x-ms-wmv",
|
4603
|
+
"video/x-mpeg",
|
4604
|
+
"video/3gpp",
|
4605
|
+
"video/avi",
|
4606
|
+
"video/mpeg",
|
4607
|
+
"video/x-mpeg2",
|
4608
|
+
"video/x-ms-asf",
|
4609
|
+
}
|
4610
|
+
|
4611
|
+
# Known video file extensions
|
4612
|
+
video_extensions = {
|
4613
|
+
".mp4", ".mov", ".avi", ".mkv", ".flv", ".webm", ".ogv", ".wmv",
|
4614
|
+
".mpg", ".mpeg", ".3gp", ".mpeg2", ".asf", ".ts", ".m4v", ".divx",
|
4615
|
+
}
|
4616
|
+
|
4617
|
+
# Get MIME type using mimetypes
|
4618
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
4619
|
+
|
4620
|
+
# Check MIME type
|
4621
|
+
if mime_type in video_mime_types:
|
4622
|
+
return True
|
4623
|
+
|
4624
|
+
# Fallback: Check file extension
|
4625
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4626
|
+
if ext in video_extensions:
|
4182
4627
|
return True
|
4183
|
-
else:
|
4184
|
-
return False
|
4185
4628
|
|
4629
|
+
return False
|
4186
4630
|
|
4187
4631
|
def is_document(fpath):
|
4632
|
+
"""
|
4633
|
+
Determine if a given file is a document based on MIME type and file extension.
|
4634
|
+
|
4635
|
+
Args:
|
4636
|
+
fpath (str): Path to the file.
|
4637
|
+
|
4638
|
+
Returns:
|
4639
|
+
bool: True if the file is a recognized document, False otherwise.
|
4640
|
+
"""
|
4188
4641
|
import mimetypes
|
4642
|
+
# Define known MIME types for documents
|
4643
|
+
document_mime_types = {
|
4644
|
+
"text/",
|
4645
|
+
"application/pdf",
|
4646
|
+
"application/msword",
|
4647
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
4648
|
+
"application/vnd.ms-excel",
|
4649
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
4650
|
+
"application/vnd.ms-powerpoint",
|
4651
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
4652
|
+
"application/rtf",
|
4653
|
+
"application/x-latex",
|
4654
|
+
"application/vnd.oasis.opendocument.text",
|
4655
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
4656
|
+
"application/vnd.oasis.opendocument.presentation",
|
4657
|
+
}
|
4658
|
+
|
4659
|
+
# Define extensions for fallback
|
4660
|
+
document_extensions = {
|
4661
|
+
".txt",
|
4662
|
+
".log",
|
4663
|
+
".csv",
|
4664
|
+
".json",
|
4665
|
+
".xml",
|
4666
|
+
".pdf",
|
4667
|
+
".doc",
|
4668
|
+
".docx",
|
4669
|
+
".xls",
|
4670
|
+
".xlsx",
|
4671
|
+
".ppt",
|
4672
|
+
".pptx",
|
4673
|
+
".odt",
|
4674
|
+
".ods",
|
4675
|
+
".odp",
|
4676
|
+
".rtf",
|
4677
|
+
".tex",
|
4678
|
+
}
|
4189
4679
|
|
4680
|
+
# Get MIME type
|
4190
4681
|
mime_type, _ = mimetypes.guess_type(fpath)
|
4191
|
-
|
4192
|
-
|
4193
|
-
|
4194
|
-
|
4195
|
-
|
4196
|
-
|
4197
|
-
|
4198
|
-
|
4199
|
-
|
4200
|
-
|
4201
|
-
|
4202
|
-
|
4203
|
-
|
4682
|
+
|
4683
|
+
# Check MIME type
|
4684
|
+
if mime_type and any(mime_type.startswith(doc_type) for doc_type in document_mime_types):
|
4685
|
+
return True
|
4686
|
+
|
4687
|
+
# Fallback: Check file extension
|
4688
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the extension, ensure it's lowercase
|
4689
|
+
if ext in document_extensions:
|
4690
|
+
return True
|
4691
|
+
|
4692
|
+
return False
|
4693
|
+
|
4694
|
+
def is_audio(fpath):
|
4695
|
+
"""
|
4696
|
+
Determine if a given file is an audio file based on MIME type and file extension.
|
4697
|
+
|
4698
|
+
Args:
|
4699
|
+
fpath (str): Path to the file.
|
4700
|
+
|
4701
|
+
Returns:
|
4702
|
+
bool: True if the file is a recognized audio file, False otherwise.
|
4703
|
+
"""
|
4704
|
+
import mimetypes
|
4705
|
+
# Known audio MIME types
|
4706
|
+
audio_mime_types = {
|
4707
|
+
"audio/mpeg",
|
4708
|
+
"audio/wav",
|
4709
|
+
"audio/ogg",
|
4710
|
+
"audio/aac",
|
4711
|
+
"audio/flac",
|
4712
|
+
"audio/midi",
|
4713
|
+
"audio/x-midi",
|
4714
|
+
"audio/x-wav",
|
4715
|
+
"audio/x-flac",
|
4716
|
+
"audio/pcm",
|
4717
|
+
"audio/x-aiff",
|
4718
|
+
"audio/x-m4a",
|
4719
|
+
}
|
4720
|
+
|
4721
|
+
# Known audio file extensions
|
4722
|
+
audio_extensions = {
|
4723
|
+
".mp3", ".wav", ".ogg", ".aac", ".flac", ".midi", ".m4a",
|
4724
|
+
".aiff", ".pcm", ".wma", ".ape", ".alac", ".opus",
|
4725
|
+
}
|
4726
|
+
|
4727
|
+
# Get MIME type using mimetypes
|
4728
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
4729
|
+
|
4730
|
+
# Check MIME type
|
4731
|
+
if mime_type in audio_mime_types:
|
4732
|
+
return True
|
4733
|
+
|
4734
|
+
# Fallback: Check file extension
|
4735
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4736
|
+
if ext in audio_extensions:
|
4204
4737
|
return True
|
4205
|
-
else:
|
4206
|
-
return False
|
4207
4738
|
|
4739
|
+
return False
|
4208
4740
|
|
4741
|
+
def is_code(fpath):
|
4742
|
+
"""
|
4743
|
+
Determine if a given file is a code file based on file extension and optionally MIME type.
|
4744
|
+
|
4745
|
+
Args:
|
4746
|
+
fpath (str): Path to the file.
|
4747
|
+
check_mime (bool): Whether to perform a MIME type check in addition to file extension check.
|
4748
|
+
|
4749
|
+
Returns:
|
4750
|
+
bool: True if the file is a recognized code file, False otherwise.
|
4751
|
+
"""
|
4752
|
+
# Known programming and scripting file extensions
|
4753
|
+
code_extensions = {
|
4754
|
+
".m", ".py", ".ipynb", ".js", ".html", ".css", ".java", ".cpp", ".h", ".cs", ".go",
|
4755
|
+
".rs", ".sh", ".rb", ".swift", ".ts", ".json", ".xml", ".yaml", ".toml", ".bash", ".r"
|
4756
|
+
}
|
4757
|
+
|
4758
|
+
# Check file extension
|
4759
|
+
ext = os.path.splitext(fpath)[-1].lower()
|
4760
|
+
if ext in code_extensions:
|
4761
|
+
return True
|
4762
|
+
return False
|
4763
|
+
|
4209
4764
|
def is_zip(fpath):
|
4210
4765
|
import mimetypes
|
4211
4766
|
|
@@ -6190,12 +6745,12 @@ def df_astype(
|
|
6190
6745
|
|
6191
6746
|
|
6192
6747
|
# ! DataFrame
|
6193
|
-
def df_sort_values(
|
6748
|
+
def df_sort_values(data, column, by=None, ascending=True, inplace=True, **kwargs):
|
6194
6749
|
"""
|
6195
6750
|
Sort a DataFrame by a specified column based on a custom order or by count.
|
6196
6751
|
|
6197
6752
|
Parameters:
|
6198
|
-
-
|
6753
|
+
- data: DataFrame to be sorted.
|
6199
6754
|
- column: The name of the column to sort by.
|
6200
6755
|
- by: List specifying the custom order for sorting or 'count' to sort by frequency.
|
6201
6756
|
- ascending: Boolean or list of booleans, default True.
|
@@ -6211,7 +6766,7 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6211
6766
|
|
6212
6767
|
if isinstance(by, str) and "count" in by.lower():
|
6213
6768
|
# Count occurrences of each value in the specified column
|
6214
|
-
value_counts =
|
6769
|
+
value_counts = data[column].value_counts()
|
6215
6770
|
|
6216
6771
|
# Determine the order based on counts
|
6217
6772
|
count_ascending = kwargs.pop("count_ascending", ascending)
|
@@ -6220,12 +6775,12 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6220
6775
|
).index.tolist()
|
6221
6776
|
|
6222
6777
|
# Convert to a categorical type with the new order
|
6223
|
-
|
6778
|
+
data[column] = pd.Categorical(data[column], categories=sorted_counts, ordered=True)
|
6224
6779
|
# Set ascending to count_ascending for sorting
|
6225
6780
|
ascending = count_ascending # Adjust ascending for the final sort
|
6226
6781
|
elif isinstance(by, list):
|
6227
6782
|
# Convert the specified column to a categorical type with the custom order
|
6228
|
-
|
6783
|
+
data[column] = pd.Categorical(data[column], categories=by, ordered=True)
|
6229
6784
|
else:
|
6230
6785
|
raise ValueError("Custom order must be a list or 'count'.")
|
6231
6786
|
|
@@ -6240,7 +6795,7 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6240
6795
|
return sorted_df
|
6241
6796
|
except Exception as e:
|
6242
6797
|
print(f"Error sorting DataFrame by '{column}': {e}")
|
6243
|
-
return
|
6798
|
+
return data
|
6244
6799
|
|
6245
6800
|
|
6246
6801
|
# # Example usage:
|
@@ -7742,7 +8297,7 @@ def df_reducer(
|
|
7742
8297
|
# example:
|
7743
8298
|
# df_reducer(data=data_log, columns=markers, n_components=2)
|
7744
8299
|
|
7745
|
-
def
|
8300
|
+
def get_df_format(data, threshold_unique=0.5, verbose=False):
|
7746
8301
|
"""
|
7747
8302
|
检测表格: long, wide or uncertain.
|
7748
8303
|
|
@@ -7834,13 +8389,16 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7834
8389
|
# Step 5: Clustering analysis on numerical columns for correlation in wide format
|
7835
8390
|
numeric_cols = data.select_dtypes(include="number").columns
|
7836
8391
|
if len(numeric_cols) > 1:
|
7837
|
-
|
7838
|
-
|
7839
|
-
|
7840
|
-
|
7841
|
-
|
7842
|
-
|
7843
|
-
|
8392
|
+
try:
|
8393
|
+
scaled_data = StandardScaler().fit_transform(data[numeric_cols].dropna())
|
8394
|
+
clustering = AgglomerativeClustering(n_clusters=2).fit(scaled_data.T)
|
8395
|
+
cluster_labels = pd.Series(clustering.labels_)
|
8396
|
+
if cluster_labels.nunique() < len(numeric_cols) * 0.5:
|
8397
|
+
wide_score += 2
|
8398
|
+
if verbose:
|
8399
|
+
print("Clustering on columns shows grouping, suggesting wide format.")
|
8400
|
+
except Exception as e:
|
8401
|
+
print(e) if verbose else None
|
7844
8402
|
|
7845
8403
|
# Step 6: Inter-column correlation analysis
|
7846
8404
|
if len(numeric_cols) > 1:
|
@@ -7868,11 +8426,14 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7868
8426
|
|
7869
8427
|
# Step 8: Multi-level clustering on rows to detect block structure for wide format
|
7870
8428
|
if len(numeric_cols) > 1 and n_rows > 5:
|
7871
|
-
|
7872
|
-
|
7873
|
-
|
7874
|
-
|
7875
|
-
|
8429
|
+
try:
|
8430
|
+
clustering_rows = AgglomerativeClustering(n_clusters=2).fit(scaled_data)
|
8431
|
+
if pd.Series(clustering_rows.labels_).nunique() < 2:
|
8432
|
+
wide_score += 2
|
8433
|
+
if verbose:
|
8434
|
+
print("Row clustering reveals homogeneity, suggesting wide format.")
|
8435
|
+
except Exception as e:
|
8436
|
+
print(e) if verbose else None
|
7876
8437
|
|
7877
8438
|
# Step 9: Sequential name detection for time-series pattern in wide format
|
7878
8439
|
if any(col.isdigit() or col.startswith("T") for col in col_names):
|
@@ -7881,15 +8442,18 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7881
8442
|
print("Detected time-like sequential column names, supporting wide format.")
|
7882
8443
|
|
7883
8444
|
# Step 10: Entropy of numeric columns
|
7884
|
-
|
7885
|
-
|
7886
|
-
|
7887
|
-
|
7888
|
-
|
7889
|
-
|
7890
|
-
|
7891
|
-
|
7892
|
-
|
8445
|
+
try:
|
8446
|
+
numeric_entropy = data[numeric_cols].apply(
|
8447
|
+
lambda x: entropy(pd.cut(x, bins=10).value_counts(normalize=True))
|
8448
|
+
)
|
8449
|
+
if numeric_entropy.mean() < 2:
|
8450
|
+
wide_score += 2
|
8451
|
+
if verbose:
|
8452
|
+
print(
|
8453
|
+
"Low entropy in numeric columns indicates stability across columns, supporting wide format."
|
8454
|
+
)
|
8455
|
+
except Exception as e:
|
8456
|
+
print(e) if verbose else None
|
7893
8457
|
|
7894
8458
|
# Step 11: Tie-breaking strategy if scores are equal
|
7895
8459
|
if wide_score == long_score:
|
@@ -8905,3 +9469,286 @@ def get_phone(phone_number: str, region: str = None,verbose=True):
|
|
8905
9469
|
if verbose:
|
8906
9470
|
preview(res)
|
8907
9471
|
return res
|
9472
|
+
|
9473
|
+
|
9474
|
+
def decode_pluscode(
|
9475
|
+
pluscode: str, reference: tuple = (52.5200, 13.4050), return_bbox: bool = False
|
9476
|
+
):
|
9477
|
+
"""
|
9478
|
+
Decodes a Plus Code into latitude and longitude (and optionally returns a bounding box).
|
9479
|
+
|
9480
|
+
Parameters:
|
9481
|
+
pluscode (str): The Plus Code to decode. Can be full or short.
|
9482
|
+
reference (tuple, optional): Reference latitude and longitude for decoding short Plus Codes.
|
9483
|
+
Default is None, required if Plus Code is short.
|
9484
|
+
return_bbox (bool): If True, returns the bounding box coordinates (latitude/longitude bounds).
|
9485
|
+
Default is False.
|
9486
|
+
|
9487
|
+
Returns:
|
9488
|
+
tuple: (latitude, longitude) if `return_bbox` is False.
|
9489
|
+
(latitude, longitude, bbox) if `return_bbox` is True.
|
9490
|
+
bbox = (latitudeLo, latitudeHi, longitudeLo, longitudeHi)
|
9491
|
+
Raises:
|
9492
|
+
ValueError: If the Plus Code is invalid or reference is missing for a short code.
|
9493
|
+
|
9494
|
+
Usage:
|
9495
|
+
lat, lon = decode_pluscode("7FG6+89")
|
9496
|
+
print(f"Decoded Short Plus Code: Latitude: {lat}, Longitude: {lon}, Bounding Box: {bbox}")
|
9497
|
+
|
9498
|
+
lat, lon = decode_pluscode("9F4M7FG6+89")
|
9499
|
+
print(f"Decoded Full Plus Code: Latitude: {lat}, Longitude: {lon}")
|
9500
|
+
"""
|
9501
|
+
from openlocationcode import openlocationcode as olc
|
9502
|
+
|
9503
|
+
# Validate Plus Code
|
9504
|
+
if not olc.isValid(pluscode):
|
9505
|
+
raise ValueError(f"Invalid Plus Code: {pluscode}")
|
9506
|
+
|
9507
|
+
# Handle Short Plus Codes
|
9508
|
+
if olc.isShort(pluscode):
|
9509
|
+
if reference is None:
|
9510
|
+
raise ValueError(
|
9511
|
+
"Reference location (latitude, longitude) is required for decoding short Plus Codes."
|
9512
|
+
)
|
9513
|
+
# Recover the full Plus Code using the reference location
|
9514
|
+
pluscode = olc.recoverNearest(pluscode, reference[0], reference[1])
|
9515
|
+
|
9516
|
+
# Decode the Plus Code
|
9517
|
+
decoded = olc.decode(pluscode)
|
9518
|
+
|
9519
|
+
# Calculate the center point of the bounding box
|
9520
|
+
latitude = (decoded.latitudeLo + decoded.latitudeHi) / 2
|
9521
|
+
longitude = (decoded.longitudeLo + decoded.longitudeHi) / 2
|
9522
|
+
|
9523
|
+
if return_bbox:
|
9524
|
+
bbox = (
|
9525
|
+
decoded.latitudeLo,
|
9526
|
+
decoded.latitudeHi,
|
9527
|
+
decoded.longitudeLo,
|
9528
|
+
decoded.longitudeHi,
|
9529
|
+
)
|
9530
|
+
return latitude, longitude, bbox
|
9531
|
+
|
9532
|
+
return latitude, longitude
|
9533
|
+
|
9534
|
+
def get_loc(input_data, user_agent="0413@mygmail.com)",verbose=True):
|
9535
|
+
"""
|
9536
|
+
Determine if the input is a city name, lat/lon, or DMS and perform geocoding or reverse geocoding.
|
9537
|
+
Usage:
|
9538
|
+
get_loc("Berlin, Germany") # Example city
|
9539
|
+
# get_loc((48.8566, 2.3522)) # Example latitude and longitude
|
9540
|
+
# get_loc("48 51 24.3 N") # Example DMS input
|
9541
|
+
"""
|
9542
|
+
from geopy.geocoders import Nominatim
|
9543
|
+
import re
|
9544
|
+
|
9545
|
+
def dms_to_decimal(dms):
|
9546
|
+
"""
|
9547
|
+
Convert DMS (Degrees, Minutes, Seconds) to Decimal format.
|
9548
|
+
Input should be in the format of "DD MM SS" or "D M S".
|
9549
|
+
"""
|
9550
|
+
# Regex pattern for DMS input
|
9551
|
+
pattern = r"(\d{1,3})[^\d]*?(\d{1,2})[^\d]*?(\d{1,2})"
|
9552
|
+
match = re.match(pattern, dms)
|
9553
|
+
|
9554
|
+
if match:
|
9555
|
+
degrees, minutes, seconds = map(float, match.groups())
|
9556
|
+
decimal = degrees + (minutes / 60) + (seconds / 3600)
|
9557
|
+
return decimal
|
9558
|
+
else:
|
9559
|
+
raise ValueError("Invalid DMS format")
|
9560
|
+
|
9561
|
+
geolocator = Nominatim(user_agent="0413@mygmail.com)")
|
9562
|
+
# Case 1: Input is a city name (string)
|
9563
|
+
if isinstance(input_data, str) and not re.match(r"^\d+(\.\d+)?$", input_data):
|
9564
|
+
location = geolocator.geocode(input_data)
|
9565
|
+
if verbose:
|
9566
|
+
print(
|
9567
|
+
f"Latitude and Longitude for {input_data}: {location.latitude}, {location.longitude}"
|
9568
|
+
)
|
9569
|
+
else:
|
9570
|
+
print(f"Could not find {input_data}.")
|
9571
|
+
return location
|
9572
|
+
|
9573
|
+
# Case 2: Input is latitude and longitude (float or tuple)
|
9574
|
+
elif isinstance(input_data, (float, tuple)):
|
9575
|
+
if isinstance(input_data, tuple) and len(input_data) == 2:
|
9576
|
+
latitude, longitude = input_data
|
9577
|
+
elif isinstance(input_data, float):
|
9578
|
+
latitude = input_data
|
9579
|
+
longitude = None # No longitude provided for a single float
|
9580
|
+
|
9581
|
+
# Reverse geocoding
|
9582
|
+
location_reversed = geolocator.reverse(
|
9583
|
+
(latitude, longitude) if longitude else latitude
|
9584
|
+
)
|
9585
|
+
if verbose:
|
9586
|
+
print(
|
9587
|
+
f"Address from coordinates ({latitude}, {longitude if longitude else ''}): {location_reversed.address}"
|
9588
|
+
)
|
9589
|
+
else:
|
9590
|
+
print("Could not reverse geocode the coordinates.")
|
9591
|
+
return location_reversed
|
9592
|
+
|
9593
|
+
# Case 3: Input is a DMS string
|
9594
|
+
elif isinstance(input_data, str):
|
9595
|
+
try:
|
9596
|
+
decimal_lat = dms_to_decimal(input_data)
|
9597
|
+
print(f"Converted DMS to decimal latitude: {decimal_lat}")
|
9598
|
+
|
9599
|
+
location_reversed = geolocator.reverse(decimal_lat)
|
9600
|
+
if verbose:
|
9601
|
+
print(f"Address from coordinates: {location_reversed.address}")
|
9602
|
+
else:
|
9603
|
+
print("Could not reverse geocode the coordinates.")
|
9604
|
+
return location_reversed
|
9605
|
+
except ValueError:
|
9606
|
+
print(
|
9607
|
+
"Invalid input format. Please provide a city name, latitude/longitude, or DMS string."
|
9608
|
+
)
|
9609
|
+
|
9610
|
+
def enpass(code: str, method: str="AES", key: str = None):
|
9611
|
+
"""
|
9612
|
+
usage: enpass("admin")
|
9613
|
+
Master encryption function that supports multiple methods: AES, RSA, and SHA256.
|
9614
|
+
:param code: The input data to encrypt or hash.
|
9615
|
+
:param method: The encryption or hashing method ('AES', 'RSA', or 'SHA256').
|
9616
|
+
:param key: The key to use for encryption. For AES and RSA, it can be a password or key in PEM format.
|
9617
|
+
:return: The encrypted data or hashed value.
|
9618
|
+
"""
|
9619
|
+
import hashlib
|
9620
|
+
# AES Encryption (Advanced)
|
9621
|
+
def aes_encrypt(data: str, key: str):
|
9622
|
+
"""
|
9623
|
+
Encrypts data using AES algorithm in CBC mode.
|
9624
|
+
:param data: The data to encrypt.
|
9625
|
+
:param key: The key to use for AES encryption.
|
9626
|
+
:return: The encrypted data, base64 encoded.
|
9627
|
+
"""
|
9628
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
9629
|
+
from cryptography.hazmat.backends import default_backend
|
9630
|
+
from cryptography.hazmat.primitives import padding
|
9631
|
+
import base64
|
9632
|
+
import os
|
9633
|
+
# Generate a 256-bit key from the provided password
|
9634
|
+
key = hashlib.sha256(key.encode()).digest()
|
9635
|
+
|
9636
|
+
# Generate a random initialization vector (IV)
|
9637
|
+
iv = os.urandom(16) # 16 bytes for AES block size
|
9638
|
+
|
9639
|
+
# Pad the data to be a multiple of 16 bytes using PKCS7
|
9640
|
+
padder = padding.PKCS7(128).padder()
|
9641
|
+
padded_data = padder.update(data.encode()) + padder.finalize()
|
9642
|
+
|
9643
|
+
# Create AES cipher object using CBC mode
|
9644
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
|
9645
|
+
encryptor = cipher.encryptor()
|
9646
|
+
encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
|
9647
|
+
|
9648
|
+
# Return the base64 encoded result (IV + encrypted data)
|
9649
|
+
return base64.b64encode(iv + encrypted_data).decode()
|
9650
|
+
|
9651
|
+
# RSA Encryption (Advanced)
|
9652
|
+
def rsa_encrypt(data: str, public_key: str):
|
9653
|
+
"""
|
9654
|
+
Encrypts data using RSA encryption with OAEP padding.
|
9655
|
+
:param data: The data to encrypt.
|
9656
|
+
:param public_key: The public key in PEM format.
|
9657
|
+
:return: The encrypted data, base64 encoded.
|
9658
|
+
"""
|
9659
|
+
import base64
|
9660
|
+
from Crypto.PublicKey import RSA
|
9661
|
+
from Crypto.Cipher import PKCS1_OAEP
|
9662
|
+
public_key_obj = RSA.import_key(public_key)
|
9663
|
+
cipher_rsa = PKCS1_OAEP.new(public_key_obj)
|
9664
|
+
encrypted_data = cipher_rsa.encrypt(data.encode())
|
9665
|
+
return base64.b64encode(encrypted_data).decode()
|
9666
|
+
# SHA256 Hashing (Non-reversible)
|
9667
|
+
def sha256_hash(data: str):
|
9668
|
+
"""
|
9669
|
+
Generates a SHA256 hash of the data.
|
9670
|
+
:param data: The data to hash.
|
9671
|
+
:return: The hashed value (hex string).
|
9672
|
+
"""
|
9673
|
+
return hashlib.sha256(data.encode()).hexdigest()
|
9674
|
+
if key is None:
|
9675
|
+
key="worldpeace"
|
9676
|
+
method=strcmp(method,["AES","RSA",'SHA256'])[0]
|
9677
|
+
if method == "AES":
|
9678
|
+
return aes_encrypt(code, key)
|
9679
|
+
elif method == "RSA":
|
9680
|
+
return rsa_encrypt(code, key)
|
9681
|
+
elif method == "SHA256":
|
9682
|
+
return sha256_hash(code)
|
9683
|
+
else:
|
9684
|
+
raise ValueError("Unsupported encryption method")
|
9685
|
+
|
9686
|
+
|
9687
|
+
# Master Decryption Function (Supports AES, RSA)
|
9688
|
+
def depass(encrypted_code: str, method: str='AES', key: str = None):
|
9689
|
+
"""
|
9690
|
+
Master decryption function that supports multiple methods: AES and RSA.
|
9691
|
+
:param encrypted_code: The encrypted data to decrypt.
|
9692
|
+
:param method: The encryption method ('AES' or 'RSA').
|
9693
|
+
:param key: The key to use for decryption. For AES and RSA, it can be a password or key in PEM format.
|
9694
|
+
:return: The decrypted data.
|
9695
|
+
"""
|
9696
|
+
import hashlib
|
9697
|
+
def aes_decrypt(encrypted_data: str, key: str):
|
9698
|
+
"""
|
9699
|
+
Decrypts data encrypted using AES in CBC mode.
|
9700
|
+
:param encrypted_data: The encrypted data, base64 encoded.
|
9701
|
+
:param key: The key to use for AES decryption.
|
9702
|
+
:return: The decrypted data (string).
|
9703
|
+
"""
|
9704
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
9705
|
+
from cryptography.hazmat.backends import default_backend
|
9706
|
+
from cryptography.hazmat.primitives import padding
|
9707
|
+
import base64
|
9708
|
+
# Generate the same 256-bit key from the password
|
9709
|
+
key = hashlib.sha256(key.encode()).digest()
|
9710
|
+
|
9711
|
+
# Decode the encrypted data from base64
|
9712
|
+
encrypted_data = base64.b64decode(encrypted_data)
|
9713
|
+
|
9714
|
+
# Extract the IV and the actual encrypted data
|
9715
|
+
iv = encrypted_data[:16] # First 16 bytes are the IV
|
9716
|
+
encrypted_data = encrypted_data[16:] # Remaining data is the encrypted message
|
9717
|
+
|
9718
|
+
# Create AES cipher object using CBC mode
|
9719
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
|
9720
|
+
decryptor = cipher.decryptor()
|
9721
|
+
decrypted_data = decryptor.update(encrypted_data) + decryptor.finalize()
|
9722
|
+
|
9723
|
+
# Unpad the decrypted data using PKCS7
|
9724
|
+
unpadder = padding.PKCS7(128).unpadder()
|
9725
|
+
unpadded_data = unpadder.update(decrypted_data) + unpadder.finalize()
|
9726
|
+
|
9727
|
+
return unpadded_data.decode()
|
9728
|
+
def rsa_decrypt(encrypted_data: str, private_key: str):
|
9729
|
+
"""
|
9730
|
+
Decrypts RSA-encrypted data using the private key.
|
9731
|
+
:param encrypted_data: The encrypted data, base64 encoded.
|
9732
|
+
:param private_key: The private key in PEM format.
|
9733
|
+
:return: The decrypted data (string).
|
9734
|
+
"""
|
9735
|
+
from Crypto.PublicKey import RSA
|
9736
|
+
from Crypto.Cipher import PKCS1_OAEP
|
9737
|
+
import base64
|
9738
|
+
encrypted_data = base64.b64decode(encrypted_data)
|
9739
|
+
private_key_obj = RSA.import_key(private_key)
|
9740
|
+
cipher_rsa = PKCS1_OAEP.new(private_key_obj)
|
9741
|
+
decrypted_data = cipher_rsa.decrypt(encrypted_data)
|
9742
|
+
return decrypted_data.decode()
|
9743
|
+
|
9744
|
+
if key is None:
|
9745
|
+
key="worldpeace"
|
9746
|
+
method=strcmp(method,["AES","RSA",'SHA256'])[0]
|
9747
|
+
if method == "AES":
|
9748
|
+
return aes_decrypt(encrypted_code, key)
|
9749
|
+
elif method == "RSA":
|
9750
|
+
return rsa_decrypt(encrypted_code, key)
|
9751
|
+
elif method == "SHA256":
|
9752
|
+
raise ValueError("SHA256 is a hash function and cannot be decrypted.")
|
9753
|
+
else:
|
9754
|
+
raise ValueError("Unsupported decryption method")
|