py2ls 0.2.4.29__py3-none-any.whl → 0.2.4.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.DS_Store +0 -0
- py2ls/.git/index +0 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +1 -0
- py2ls/data/.DS_Store +0 -0
- py2ls/data/hyper_param_tabrepo_2024.py +1753 -0
- py2ls/data/styles/.DS_Store +0 -0
- py2ls/data/tiles.csv +146 -0
- py2ls/ips.py +964 -117
- py2ls/ml2ls.py +30 -23
- py2ls/netfinder.py +59 -9
- py2ls/plot.py +127 -9
- {py2ls-0.2.4.29.dist-info → py2ls-0.2.4.31.dist-info}/METADATA +4 -1
- {py2ls-0.2.4.29.dist-info → py2ls-0.2.4.31.dist-info}/RECORD +14 -12
- {py2ls-0.2.4.29.dist-info → py2ls-0.2.4.31.dist-info}/WHEEL +0 -0
py2ls/ips.py
CHANGED
@@ -779,11 +779,150 @@ def strcmp(
|
|
779
779
|
print(f"建议: {best_match}")
|
780
780
|
return candidates[best_match_index], best_match_index
|
781
781
|
|
782
|
+
def imgcmp(img: list, method='knn', plot_=True, figsize=[12, 6]):
|
783
|
+
"""
|
784
|
+
Compare two images using SSIM, Feature Matching (SIFT), or KNN Matching.
|
785
|
+
|
786
|
+
Parameters:
|
787
|
+
- img (list): List containing two image file paths [img1, img2].
|
788
|
+
- method (str): Comparison method ('ssim', 'match', or 'knn').
|
789
|
+
- plot_ (bool): Whether to display the results visually.
|
790
|
+
- figsize (list): Size of the figure for plots.
|
791
|
+
|
792
|
+
Returns:
|
793
|
+
- For 'ssim': (diff, score): SSIM difference map and similarity score.
|
794
|
+
- For 'match' or 'knn': (good_matches, len(good_matches), similarity_score): Matches and similarity score.
|
795
|
+
"""
|
796
|
+
import cv2
|
797
|
+
import matplotlib.pyplot as plt
|
798
|
+
from skimage.metrics import structural_similarity as ssim
|
799
|
+
# Load images
|
800
|
+
image1 = cv2.imread(img[0])
|
801
|
+
image2 = cv2.imread(img[1])
|
802
|
+
|
803
|
+
if image1 is None or image2 is None:
|
804
|
+
raise ValueError("Could not load one or both images. Check file paths.")
|
805
|
+
methods=['ssim','match','knn']
|
806
|
+
method=strcmp(method, methods)[0]
|
807
|
+
if method == 'ssim':
|
808
|
+
# Convert images to grayscale
|
809
|
+
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
|
810
|
+
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
|
811
|
+
|
812
|
+
# Compute SSIM
|
813
|
+
score, diff = ssim(gray1, gray2, full=True)
|
814
|
+
print(f"SSIM Score: {score:.4f}")
|
815
|
+
|
816
|
+
# Convert diff to 8-bit for visualization
|
817
|
+
diff = (diff * 255).astype("uint8")
|
818
|
+
|
819
|
+
# Plot if needed
|
820
|
+
if plot_:
|
821
|
+
fig, ax = plt.subplots(1, 3, figsize=figsize)
|
822
|
+
ax[0].imshow(gray1, cmap='gray')
|
823
|
+
ax[0].set_title("Image 1")
|
824
|
+
ax[1].imshow(gray2, cmap='gray')
|
825
|
+
ax[1].set_title("Image 2")
|
826
|
+
ax[2].imshow(diff, cmap='gray')
|
827
|
+
ax[2].set_title("Difference (SSIM)")
|
828
|
+
plt.tight_layout()
|
829
|
+
plt.show()
|
830
|
+
|
831
|
+
return diff, score
|
832
|
+
|
833
|
+
elif method in ['match', 'knn']:
|
834
|
+
# Convert images to grayscale
|
835
|
+
gray1 = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
|
836
|
+
gray2 = cv2.cvtColor(image2, cv2.COLOR_BGR2GRAY)
|
837
|
+
|
838
|
+
# Initialize SIFT detector
|
839
|
+
sift = cv2.SIFT_create()
|
840
|
+
|
841
|
+
# Detect and compute features
|
842
|
+
keypoints1, descriptors1 = sift.detectAndCompute(gray1, None)
|
843
|
+
keypoints2, descriptors2 = sift.detectAndCompute(gray2, None)
|
844
|
+
|
845
|
+
if len(keypoints1) == 0 or len(keypoints2) == 0:
|
846
|
+
raise ValueError("No keypoints found in one or both images.")
|
847
|
+
|
848
|
+
# BFMatcher initialization
|
849
|
+
bf = cv2.BFMatcher()
|
850
|
+
|
851
|
+
if method == 'match': # Cross-check matching
|
852
|
+
bf = cv2.BFMatcher(cv2.NORM_L2, crossCheck=True)
|
853
|
+
matches = bf.match(descriptors1, descriptors2)
|
854
|
+
matches = sorted(matches, key=lambda x: x.distance)
|
855
|
+
|
856
|
+
# Filter good matches
|
857
|
+
good_matches = [m for m in matches if m.distance < 0.75 * matches[-1].distance]
|
858
|
+
|
859
|
+
elif method == 'knn': # KNN matching with ratio test
|
860
|
+
matches = bf.knnMatch(descriptors1, descriptors2, k=2)
|
861
|
+
# Apply Lowe's ratio test
|
862
|
+
good_matches = [m for m, n in matches if m.distance < 0.75 * n.distance]
|
782
863
|
|
783
|
-
#
|
784
|
-
|
785
|
-
|
786
|
-
|
864
|
+
# Calculate similarity score
|
865
|
+
similarity_score = len(good_matches) / min(len(keypoints1), len(keypoints2))
|
866
|
+
print(f"Number of good matches: {len(good_matches)}")
|
867
|
+
print(f"Similarity Score: {similarity_score:.4f}")
|
868
|
+
# Handle case where no good matches are found
|
869
|
+
if len(good_matches) == 0:
|
870
|
+
print("No good matches found.")
|
871
|
+
return good_matches, 0.0, None
|
872
|
+
|
873
|
+
# Identify matched keypoints
|
874
|
+
src_pts = np.float32([keypoints1[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
875
|
+
dst_pts = np.float32([keypoints2[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
|
876
|
+
|
877
|
+
# Calculate Homography using RANSAC
|
878
|
+
homography_matrix, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
|
879
|
+
|
880
|
+
# Apply the homography to image2
|
881
|
+
h, w = image1.shape[:2]
|
882
|
+
warped_image2 = cv2.warpPerspective(image2, homography_matrix, (w, h))
|
883
|
+
|
884
|
+
# Plot result if needed
|
885
|
+
if plot_:
|
886
|
+
fig, ax = plt.subplots(1, 2, figsize=figsize)
|
887
|
+
ax[0].imshow(cv2.cvtColor(image1, cv2.COLOR_BGR2RGB))
|
888
|
+
ax[0].set_title("Image 1")
|
889
|
+
ax[1].imshow(cv2.cvtColor(warped_image2, cv2.COLOR_BGR2RGB))
|
890
|
+
ax[1].set_title("Warped Image 2")
|
891
|
+
plt.tight_layout()
|
892
|
+
plt.show()
|
893
|
+
|
894
|
+
# Plot matches if needed
|
895
|
+
if plot_:
|
896
|
+
result = cv2.drawMatches(image1, keypoints1, image2, keypoints2, good_matches, None, flags=2)
|
897
|
+
plt.figure(figsize=figsize)
|
898
|
+
plt.imshow(cv2.cvtColor(result, cv2.COLOR_BGR2RGB))
|
899
|
+
plt.title(f"Feature Matches ({len(good_matches)} matches, Score: {similarity_score:.4f})")
|
900
|
+
plt.axis('off')
|
901
|
+
plt.show()
|
902
|
+
# Identify unmatched keypoints
|
903
|
+
matched_idx1 = [m.queryIdx for m in good_matches]
|
904
|
+
matched_idx2 = [m.trainIdx for m in good_matches]
|
905
|
+
|
906
|
+
unmatched_kp1 = [kp for i, kp in enumerate(keypoints1) if i not in matched_idx1]
|
907
|
+
unmatched_kp2 = [kp for i, kp in enumerate(keypoints2) if i not in matched_idx2]
|
908
|
+
|
909
|
+
# Mark unmatched keypoints on the images
|
910
|
+
img1_marked = cv2.drawKeypoints(image1, unmatched_kp1, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
|
911
|
+
img2_marked = cv2.drawKeypoints(image2, unmatched_kp2, None, color=(0, 0, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
|
912
|
+
|
913
|
+
# Display results
|
914
|
+
if plot_:
|
915
|
+
fig, ax = plt.subplots(1, 2, figsize=figsize)
|
916
|
+
ax[0].imshow(cv2.cvtColor(img1_marked, cv2.COLOR_BGR2RGB))
|
917
|
+
ax[0].set_title("Unmatched Keypoints (Image 1)")
|
918
|
+
ax[1].imshow(cv2.cvtColor(img2_marked, cv2.COLOR_BGR2RGB))
|
919
|
+
ax[1].set_title("Unmatched Keypoints (Image 2)")
|
920
|
+
plt.tight_layout()
|
921
|
+
plt.show()
|
922
|
+
return good_matches, similarity_score, homography_matrix
|
923
|
+
|
924
|
+
else:
|
925
|
+
raise ValueError("Invalid method. Use 'ssim', 'match', or 'knn'.")
|
787
926
|
|
788
927
|
|
789
928
|
def cn2pinyin(
|
@@ -892,6 +1031,143 @@ def dict2df(dict_, fill=None):
|
|
892
1031
|
dict_[key] = value
|
893
1032
|
return pd.DataFrame.from_dict(dict_)
|
894
1033
|
|
1034
|
+
def text2audio(
|
1035
|
+
text,
|
1036
|
+
method=None, # "pyttsx3","gTTS"
|
1037
|
+
rate=200,
|
1038
|
+
slow=False,#"gTTS"
|
1039
|
+
volume=1.0,
|
1040
|
+
voice=None,
|
1041
|
+
lang=None,
|
1042
|
+
gender=None,
|
1043
|
+
age=None,
|
1044
|
+
dir_save=None,
|
1045
|
+
):
|
1046
|
+
"""
|
1047
|
+
# sample_text = "Hello! This is a test of the pyttsx3 text-to-speech system."
|
1048
|
+
# sample_text = "这个是中文, 测试"
|
1049
|
+
# sample_text = "Hallo, ich bin echo, Wie Heissen Sie"
|
1050
|
+
|
1051
|
+
# text2audio(
|
1052
|
+
# text=sample_text,
|
1053
|
+
# rate=150,
|
1054
|
+
# volume=0.9,
|
1055
|
+
# # voice=None, # Replace with a voice name or ID available on your system
|
1056
|
+
# )
|
1057
|
+
"""
|
1058
|
+
if method is not None:
|
1059
|
+
methods=["gTTS","pyttsx3","google"]
|
1060
|
+
method=strcmp(method, methods)[0]
|
1061
|
+
else:
|
1062
|
+
try:
|
1063
|
+
text2audio(text,method='google',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
|
1064
|
+
except Exception as e:
|
1065
|
+
print(e)
|
1066
|
+
text2audio(text,method='pyttsx3',rate=rate, slow=slow, volume=volume, voice=voice,lang=lang,gender=gender,age=age,dir_save=dir_save)
|
1067
|
+
|
1068
|
+
if method=="pyttsx3":
|
1069
|
+
import pyttsx3
|
1070
|
+
|
1071
|
+
try:
|
1072
|
+
engine = pyttsx3.init()
|
1073
|
+
engine.setProperty("rate", rate)
|
1074
|
+
if 0.0 <= volume <= 1.0:
|
1075
|
+
engine.setProperty("volume", volume)
|
1076
|
+
else:
|
1077
|
+
raise ValueError("Volume must be between 0.0 and 1.0")
|
1078
|
+
|
1079
|
+
if gender is not None:
|
1080
|
+
gender = strcmp(gender, ["male", "female"])[0]
|
1081
|
+
if age is not None:
|
1082
|
+
if isinstance(age, (float, int)):
|
1083
|
+
if age <= 10:
|
1084
|
+
age = "child"
|
1085
|
+
elif 10 < age < 18:
|
1086
|
+
age = "senior"
|
1087
|
+
else:
|
1088
|
+
age = "adult"
|
1089
|
+
elif isinstance(age, str):
|
1090
|
+
age = strcmp(age, ["child", "adult", "senior"])[0]
|
1091
|
+
else:
|
1092
|
+
raise ValueError("age: should be in ['child', 'adult', 'senior']")
|
1093
|
+
voices = engine.getProperty("voices")
|
1094
|
+
if voice is None:
|
1095
|
+
if lang is None:
|
1096
|
+
voice = strcmp(detect_lang(text), [v.name for v in voices])[0]
|
1097
|
+
else:
|
1098
|
+
if run_once_within():
|
1099
|
+
print([v.name for v in voices])
|
1100
|
+
print(f"lang:{lang}")
|
1101
|
+
voice = strcmp(lang, [v.name for v in voices])[0]
|
1102
|
+
selected_voice = None
|
1103
|
+
|
1104
|
+
for v in voices:
|
1105
|
+
# Check if the voice matches the specified gender or age
|
1106
|
+
if voice and (voice.lower() in v.name.lower() or voice in v.id):
|
1107
|
+
selected_voice = v
|
1108
|
+
break
|
1109
|
+
if gender and gender.lower() in v.name.lower():
|
1110
|
+
selected_voice = v
|
1111
|
+
if age and age.lower() in v.name.lower():
|
1112
|
+
selected_voice = v
|
1113
|
+
|
1114
|
+
if selected_voice:
|
1115
|
+
engine.setProperty("voice", selected_voice.id)
|
1116
|
+
else:
|
1117
|
+
if voice or gender or age:
|
1118
|
+
raise ValueError(
|
1119
|
+
f"No matching voice found for specified criteria. Available voices: {[v.name for v in voices]}"
|
1120
|
+
)
|
1121
|
+
# Generate audio
|
1122
|
+
if dir_save:
|
1123
|
+
engine.save_to_file(text, dir_save)
|
1124
|
+
print(f"Audio saved to {dir_save}")
|
1125
|
+
else:
|
1126
|
+
engine.say(text)
|
1127
|
+
|
1128
|
+
engine.runAndWait()
|
1129
|
+
except Exception as e:
|
1130
|
+
print(f"An error occurred: {e}")
|
1131
|
+
# # Explicitly terminate the pyttsx3 engine to release resources
|
1132
|
+
try:
|
1133
|
+
engine.stop()
|
1134
|
+
except RuntimeError:
|
1135
|
+
pass
|
1136
|
+
# Safely exit the script if running interactively to avoid kernel restarts
|
1137
|
+
try:
|
1138
|
+
import sys
|
1139
|
+
|
1140
|
+
sys.exit()
|
1141
|
+
except SystemExit:
|
1142
|
+
pass
|
1143
|
+
elif method.lower() in ['google','gtts']:
|
1144
|
+
from gtts import gTTS
|
1145
|
+
try:
|
1146
|
+
if lang is None:
|
1147
|
+
from langdetect import detect
|
1148
|
+
lang = detect(text)
|
1149
|
+
# Initialize gTTS with the provided parameters
|
1150
|
+
tts = gTTS(text=text, lang=lang, slow=slow)
|
1151
|
+
except Exception as e:
|
1152
|
+
print(f"An error occurred: {e}")
|
1153
|
+
|
1154
|
+
print("not realtime reading...")
|
1155
|
+
if dir_save:
|
1156
|
+
if "." not in dir_save:
|
1157
|
+
dir_save=dir_save+".mp3"
|
1158
|
+
tts.save(dir_save)
|
1159
|
+
print(f"Audio saved to {dir_save}")
|
1160
|
+
else:
|
1161
|
+
dir_save = "temp_audio.mp3"
|
1162
|
+
if "." not in dir_save:
|
1163
|
+
dir_save=dir_save+".mp3"
|
1164
|
+
tts.save(dir_save)
|
1165
|
+
try:
|
1166
|
+
fopen(dir_save)
|
1167
|
+
except Exception as e:
|
1168
|
+
print(f"Error opening file: {e}")
|
1169
|
+
print("done")
|
1170
|
+
|
895
1171
|
def str2time(time_str, fmt="24"):
|
896
1172
|
"""
|
897
1173
|
Convert a time string into the specified format.
|
@@ -2094,7 +2370,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2094
2370
|
False if chunksize else True
|
2095
2371
|
) # when chunksize, recommend low_memory=False # default:
|
2096
2372
|
verbose = kwargs.pop("verbose", False)
|
2097
|
-
if run_once_within(reverse=True):
|
2373
|
+
if run_once_within(reverse=True) and verbose:
|
2098
2374
|
use_pd("read_csv", verbose=verbose)
|
2099
2375
|
|
2100
2376
|
if comment is None:# default: None
|
@@ -2212,7 +2488,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2212
2488
|
if chunksize:
|
2213
2489
|
df = _get_chunks(df)
|
2214
2490
|
print(df.shape)
|
2215
|
-
if not is_df_abnormal(df, verbose=0): # normal
|
2491
|
+
if not is_df_abnormal(df, verbose=0) and verbose: # normal
|
2216
2492
|
display(df.head(2))
|
2217
2493
|
print(f"shape: {df.shape}")
|
2218
2494
|
return df
|
@@ -2245,26 +2521,28 @@ def fload(fpath, kind=None, **kwargs):
|
|
2245
2521
|
df = _get_chunks(df)
|
2246
2522
|
print(df.shape)
|
2247
2523
|
if not is_df_abnormal(df, verbose=0):
|
2248
|
-
|
2249
|
-
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
|
2254
|
-
|
2255
|
-
|
2256
|
-
|
2257
|
-
|
2524
|
+
if verbose:
|
2525
|
+
(
|
2526
|
+
display(df.head(2))
|
2527
|
+
if isinstance(df, pd.DataFrame)
|
2528
|
+
else display("it is not a DataFrame")
|
2529
|
+
)
|
2530
|
+
(
|
2531
|
+
print(f"shape: {df.shape}")
|
2532
|
+
if isinstance(df, pd.DataFrame)
|
2533
|
+
else display("it is not a DataFrame")
|
2534
|
+
)
|
2258
2535
|
return df
|
2259
2536
|
except EmptyDataError as e:
|
2260
2537
|
continue
|
2261
2538
|
else:
|
2262
2539
|
pass
|
2263
|
-
print(kwargs)
|
2540
|
+
# print(kwargs)
|
2264
2541
|
# if is_df_abnormal(df,verbose=verbose):
|
2265
2542
|
# df=pd.read_csv(fpath,**kwargs)
|
2266
|
-
|
2267
|
-
|
2543
|
+
if verbose:
|
2544
|
+
display(df.head(2))
|
2545
|
+
print(f"shape: {df.shape}")
|
2268
2546
|
return df
|
2269
2547
|
|
2270
2548
|
def load_excel(fpath, **kwargs):
|
@@ -2300,7 +2578,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2300
2578
|
engine = kwargs.get("engine", "pyarrow")
|
2301
2579
|
verbose = kwargs.pop("verbose", False)
|
2302
2580
|
|
2303
|
-
if run_once_within(reverse=True):
|
2581
|
+
if run_once_within(reverse=True) and verbose:
|
2304
2582
|
use_pd("read_parquet", verbose=verbose)
|
2305
2583
|
try:
|
2306
2584
|
df = pd.read_parquet(fpath, engine=engine, **kwargs)
|
@@ -2385,6 +2663,16 @@ def fload(fpath, kind=None, **kwargs):
|
|
2385
2663
|
doc = Document(fpath)
|
2386
2664
|
content = [para.text for para in doc.paragraphs]
|
2387
2665
|
return content
|
2666
|
+
|
2667
|
+
def load_rtf(file_path):
|
2668
|
+
from striprtf.striprtf import rtf_to_text
|
2669
|
+
try:
|
2670
|
+
with open(file_path, "r") as file:
|
2671
|
+
rtf_content = file.read()
|
2672
|
+
text = rtf_to_text(rtf_content)
|
2673
|
+
return text
|
2674
|
+
except Exception as e:
|
2675
|
+
print(f"Error loading RTF file: {e}")
|
2388
2676
|
|
2389
2677
|
if kind is None:
|
2390
2678
|
_, kind = os.path.splitext(fpath)
|
@@ -2427,6 +2715,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2427
2715
|
"xml",
|
2428
2716
|
"ipynb",
|
2429
2717
|
"mtx",
|
2718
|
+
"rtf"
|
2430
2719
|
]
|
2431
2720
|
zip_types = [
|
2432
2721
|
"gz",
|
@@ -2446,22 +2735,7 @@ def fload(fpath, kind=None, **kwargs):
|
|
2446
2735
|
if kind not in supported_types:
|
2447
2736
|
print(
|
2448
2737
|
f'Warning:\n"{kind}" is not in the supported list '
|
2449
|
-
) # {supported_types}')
|
2450
|
-
# if os.path.splitext(fpath)[1][1:].lower() in zip_types:
|
2451
|
-
# keep=kwargs.get("keep", False)
|
2452
|
-
# ifile=kwargs.get("ifile",(0,0))
|
2453
|
-
# kwargs.pop("keep",None)
|
2454
|
-
# kwargs.pop("ifile",None)
|
2455
|
-
# fpath_unzip=unzip(fpath)
|
2456
|
-
# if isinstance(fpath_unzip,list):
|
2457
|
-
# fpath_unzip=fpath_unzip[ifile[0]]
|
2458
|
-
# if os.path.isdir(fpath_unzip):
|
2459
|
-
# fpath_selected=listdir(fpath_unzip,kind=kind).fpath[ifile[1]]
|
2460
|
-
# fpath_unzip=fpath_selected
|
2461
|
-
# content_unzip=fload(fpath_unzip, **kwargs)
|
2462
|
-
# if not keep:
|
2463
|
-
# os.remove(fpath_unzip)
|
2464
|
-
# return content_unzip
|
2738
|
+
) # {supported_types}')
|
2465
2739
|
|
2466
2740
|
if kind == "docx":
|
2467
2741
|
return load_docx(fpath)
|
@@ -2477,37 +2751,45 @@ def fload(fpath, kind=None, **kwargs):
|
|
2477
2751
|
return load_xml(fpath)
|
2478
2752
|
elif kind in ["csv", "tsv"]:
|
2479
2753
|
# verbose = kwargs.pop("verbose", False)
|
2480
|
-
if run_once_within(reverse=True):
|
2481
|
-
|
2754
|
+
# if run_once_within(reverse=True) and verbose:
|
2755
|
+
# use_pd("read_csv")
|
2482
2756
|
content = load_csv(fpath, **kwargs)
|
2483
2757
|
return content
|
2484
2758
|
elif kind == "pkl":
|
2485
2759
|
verbose = kwargs.pop("verbose", False)
|
2486
|
-
if run_once_within(reverse=True):
|
2760
|
+
if run_once_within(reverse=True) and verbose:
|
2487
2761
|
use_pd("read_pickle")
|
2488
|
-
|
2762
|
+
try:
|
2763
|
+
res_=pd.read_pickle(fpath, **kwargs)
|
2764
|
+
except Exception as e:
|
2765
|
+
import pickle
|
2766
|
+
with open('sgd_classifier.pkl', 'rb') as f:
|
2767
|
+
res_ = pickle.load(f)
|
2768
|
+
return res_
|
2489
2769
|
elif kind in ["ods", "ods", "odt"]:
|
2490
2770
|
engine = kwargs.get("engine", "odf")
|
2491
2771
|
kwargs.pop("engine", None)
|
2492
2772
|
return load_excel(fpath, engine=engine, **kwargs)
|
2493
2773
|
elif kind == "xls":
|
2774
|
+
verbose = kwargs.pop("verbose", False)
|
2494
2775
|
engine = kwargs.get("engine", "xlrd")
|
2495
2776
|
kwargs.pop("engine", None)
|
2496
2777
|
content = load_excel(fpath, engine=engine, **kwargs)
|
2497
|
-
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
|
2778
|
+
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) and verbose else None
|
2498
2779
|
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2499
2780
|
return content
|
2500
2781
|
elif kind == "xlsx":
|
2782
|
+
verbose = kwargs.pop("verbose", False)
|
2501
2783
|
content = load_excel(fpath, **kwargs)
|
2502
|
-
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2784
|
+
display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
|
2503
2785
|
print(f"shape: {content.shape}") if isinstance(content, pd.DataFrame) else None
|
2504
2786
|
return content
|
2505
2787
|
elif kind == "mtx":
|
2506
2788
|
from scipy.io import mmread
|
2507
|
-
|
2789
|
+
verbose = kwargs.pop("verbose", False)
|
2508
2790
|
dat_mtx = mmread(fpath)
|
2509
2791
|
content = pd.DataFrame.sparse.from_spmatrix(dat_mtx, **kwargs)
|
2510
|
-
display(content.head(3)) if isinstance(content, pd.DataFrame) else None
|
2792
|
+
display(content.head(3)) if isinstance(content, pd.DataFrame) and verbose else None
|
2511
2793
|
print(f"shape: {content.shape}")
|
2512
2794
|
return content
|
2513
2795
|
elif kind == "ipynb":
|
@@ -2578,6 +2860,8 @@ def fload(fpath, kind=None, **kwargs):
|
|
2578
2860
|
|
2579
2861
|
elif kind == "mplstyle":
|
2580
2862
|
return read_mplstyle(fpath)
|
2863
|
+
elif kind == "rtf":
|
2864
|
+
return load_rtf(fpath)
|
2581
2865
|
|
2582
2866
|
else:
|
2583
2867
|
print("direct reading...")
|
@@ -2616,6 +2900,38 @@ def fload(fpath, kind=None, **kwargs):
|
|
2616
2900
|
# docx_content = fload('sample.docx')
|
2617
2901
|
|
2618
2902
|
|
2903
|
+
def fopen(fpath):
|
2904
|
+
import os
|
2905
|
+
import platform
|
2906
|
+
import sys
|
2907
|
+
try:
|
2908
|
+
# Check if the file exists
|
2909
|
+
if not os.path.isfile(fpath):
|
2910
|
+
print(f"Error: The file does not exist - {fpath}")
|
2911
|
+
return
|
2912
|
+
|
2913
|
+
# Get the system platform
|
2914
|
+
system = platform.system()
|
2915
|
+
|
2916
|
+
# Platform-specific file opening commands
|
2917
|
+
if system == "Darwin": # macOS
|
2918
|
+
os.system(f"open \"{fpath}\"")
|
2919
|
+
elif system == "Windows": # Windows
|
2920
|
+
# Ensure the path is handled correctly in Windows, escape spaces
|
2921
|
+
os.system(f"start \"\" \"{fpath}\"")
|
2922
|
+
elif system == "Linux": # Linux
|
2923
|
+
os.system(f"xdg-open \"{fpath}\"")
|
2924
|
+
elif system == "Java": # Java (or other unhandled systems)
|
2925
|
+
print(f"Opening {fpath} on unsupported system.")
|
2926
|
+
else:
|
2927
|
+
print(f"Unsupported OS: {system}")
|
2928
|
+
|
2929
|
+
print(f"Successfully opened {fpath} with the default application.")
|
2930
|
+
except Exception as e:
|
2931
|
+
print(f"Error opening file {fpath}: {e}")
|
2932
|
+
|
2933
|
+
|
2934
|
+
|
2619
2935
|
def fupdate(fpath, content=None, how="head"):
|
2620
2936
|
"""
|
2621
2937
|
Update a file by adding new content at the top and moving the old content to the bottom.
|
@@ -3025,13 +3341,18 @@ def fsave(
|
|
3025
3341
|
content.to_pickle(fpath, **kwargs)
|
3026
3342
|
else:
|
3027
3343
|
try:
|
3028
|
-
print("trying to convert it as a DataFrame...")
|
3029
3344
|
content = pd.DataFrame(content)
|
3030
3345
|
content.to_pickle(fpath, **kwargs)
|
3031
3346
|
except Exception as e:
|
3032
|
-
|
3033
|
-
|
3034
|
-
|
3347
|
+
try:
|
3348
|
+
import pickle
|
3349
|
+
with open(fpath, 'wb') as f:
|
3350
|
+
pickle.dump(content, f)
|
3351
|
+
print('done!', fpath)
|
3352
|
+
except Exception as e:
|
3353
|
+
raise ValueError(
|
3354
|
+
f"content is not a DataFrame, cannot be saved as a 'pkl' format: {e}"
|
3355
|
+
)
|
3035
3356
|
elif kind.lower() in ["fea", "feather", "ft", "fe", "feat", "fether"]:
|
3036
3357
|
# Feather: The Feather format, based on Apache Arrow, is designed for fast I/O operations. It's
|
3037
3358
|
# optimized for data analytics tasks and is especially fast when working with Pandas.
|
@@ -3187,16 +3508,22 @@ def isa(content, kind):
|
|
3187
3508
|
"""
|
3188
3509
|
if "img" in kind.lower() or "image" in kind.lower():
|
3189
3510
|
return is_image(content)
|
3511
|
+
elif 'vid' in kind.lower():
|
3512
|
+
return is_video(content)
|
3513
|
+
elif 'aud' in kind.lower():
|
3514
|
+
return is_audio(content)
|
3190
3515
|
elif "doc" in kind.lower():
|
3191
3516
|
return is_document(content)
|
3192
3517
|
elif "zip" in kind.lower():
|
3193
3518
|
return is_zip(content)
|
3194
3519
|
elif "dir" in kind.lower() or ("f" in kind.lower() and "d" in kind.lower()):
|
3195
3520
|
return os.path.isdir(content)
|
3521
|
+
elif "code" in kind.lower(): # file
|
3522
|
+
return is_code(content)
|
3196
3523
|
elif "fi" in kind.lower(): # file
|
3197
3524
|
return os.path.isfile(content)
|
3198
3525
|
elif "num" in kind.lower(): # file
|
3199
|
-
return
|
3526
|
+
return isnum(content)
|
3200
3527
|
elif "text" in kind.lower() or "txt" in kind.lower(): # file
|
3201
3528
|
return is_text(content)
|
3202
3529
|
elif "color" in kind.lower(): # file
|
@@ -3607,7 +3934,7 @@ def get_os(full=False, verbose=False):
|
|
3607
3934
|
"usage (%)": usage.percent,
|
3608
3935
|
}
|
3609
3936
|
except PermissionError:
|
3610
|
-
system_info["
|
3937
|
+
system_info["disk"][partition.device] = "Permission Denied"
|
3611
3938
|
|
3612
3939
|
# Network Information
|
3613
3940
|
if_addrs = psutil.net_if_addrs()
|
@@ -3667,11 +3994,33 @@ def listdir(
|
|
3667
3994
|
ascending=True,
|
3668
3995
|
contains=None,# filter filenames using re
|
3669
3996
|
booster=False,# walk in subfolders
|
3997
|
+
depth = 0, # 0: no subfolders; None: all subfolders; [int 1,2,3]: levels of subfolders
|
3670
3998
|
hidden=False, # Include hidden files/folders
|
3671
3999
|
orient="list",
|
3672
4000
|
output="df", # "df", 'list','dict','records','index','series'
|
3673
4001
|
verbose=True,
|
3674
|
-
):
|
4002
|
+
):
|
4003
|
+
def is_hidden(filepath):
|
4004
|
+
"""Check if a file or folder is hidden."""
|
4005
|
+
system = platform.system()
|
4006
|
+
if system == "Windows":
|
4007
|
+
import ctypes
|
4008
|
+
attribute = ctypes.windll.kernel32.GetFileAttributesW(filepath)
|
4009
|
+
if attribute == -1:
|
4010
|
+
raise FileNotFoundError(f"File {filepath} not found.")
|
4011
|
+
return bool(attribute & 2) # FILE_ATTRIBUTE_HIDDEN
|
4012
|
+
else: # macOS/Linux: Hidden if the name starts with a dot
|
4013
|
+
return os.path.basename(filepath).startswith(".")
|
4014
|
+
|
4015
|
+
def get_user():
|
4016
|
+
"""Retrieve the username of the current user."""
|
4017
|
+
system = platform.system()
|
4018
|
+
if system == "Windows":
|
4019
|
+
return os.environ.get("USERNAME", "Unknown")
|
4020
|
+
else:
|
4021
|
+
import pwd
|
4022
|
+
return pwd.getpwuid(os.getuid()).pw_name
|
4023
|
+
|
3675
4024
|
if isinstance(kind, list):
|
3676
4025
|
f_ = []
|
3677
4026
|
for kind_ in kind:
|
@@ -3681,7 +4030,7 @@ def listdir(
|
|
3681
4030
|
sort_by=sort_by,
|
3682
4031
|
ascending=ascending,
|
3683
4032
|
contains=contains,
|
3684
|
-
|
4033
|
+
depth=depth,# walk in subfolders
|
3685
4034
|
hidden=hidden,
|
3686
4035
|
orient=orient,
|
3687
4036
|
output=output,
|
@@ -3710,12 +4059,24 @@ def listdir(
|
|
3710
4059
|
"rootdir":[],
|
3711
4060
|
"fname": [],
|
3712
4061
|
"fpath": [],
|
4062
|
+
"num":[],
|
4063
|
+
"os":[]
|
3713
4064
|
}
|
4065
|
+
root_depth = rootdir.rstrip(os.sep).count(os.sep)
|
3714
4066
|
for dirpath, dirnames, ls in os.walk(rootdir):
|
4067
|
+
current_depth = dirpath.rstrip(os.sep).count(os.sep) - root_depth
|
4068
|
+
# Check depth limit
|
4069
|
+
if depth is not None and current_depth > depth:
|
4070
|
+
dirnames[:] = [] # Prevent further traversal into subfolders
|
4071
|
+
continue
|
4072
|
+
|
3715
4073
|
if not hidden:
|
3716
|
-
dirnames[:] = [d for d in dirnames if not
|
3717
|
-
ls = [i for i in ls if not
|
3718
|
-
|
4074
|
+
dirnames[:] = [d for d in dirnames if not is_hidden(os.path.join(dirpath, d))]
|
4075
|
+
ls = [i for i in ls if not is_hidden(os.path.join(dirpath, i))]
|
4076
|
+
|
4077
|
+
for dirname in dirnames:
|
4078
|
+
if kind is not None and kind not in fd: # do not check folders
|
4079
|
+
continue
|
3719
4080
|
if contains and not re.search(contains, dirname):
|
3720
4081
|
continue
|
3721
4082
|
dirname_path = os.path.join(dirpath, dirname)
|
@@ -3734,21 +4095,23 @@ def listdir(
|
|
3734
4095
|
f['basename'].append(os.path.basename(dirname_path))
|
3735
4096
|
f["path"].append(os.path.join(os.path.dirname(dirname_path), dirname))
|
3736
4097
|
f["created_time"].append(
|
3737
|
-
pd.to_datetime(os.path.getctime(dirname_path), unit="s")
|
4098
|
+
pd.to_datetime(int(os.path.getctime(dirname_path)), unit="s")
|
3738
4099
|
)
|
3739
4100
|
f["modified_time"].append(
|
3740
|
-
pd.to_datetime(os.path.getmtime(dirname_path), unit="s")
|
4101
|
+
pd.to_datetime(int(os.path.getmtime(dirname_path)), unit="s")
|
3741
4102
|
)
|
3742
4103
|
f["last_open_time"].append(
|
3743
|
-
pd.to_datetime(os.path.getatime(dirname_path), unit="s")
|
4104
|
+
pd.to_datetime(int(os.path.getatime(dirname_path)), unit="s")
|
3744
4105
|
)
|
3745
4106
|
f["permission"].append(stat.filemode(stats_file.st_mode)),
|
3746
|
-
f["owner"].append(
|
4107
|
+
f["owner"].append(get_user()),
|
3747
4108
|
f["rootdir"].append(dirpath)
|
3748
4109
|
f["fname"].append(filename) # will be removed
|
3749
4110
|
f["fpath"].append(fpath) # will be removed
|
3750
4111
|
i += 1
|
3751
|
-
for item in ls:
|
4112
|
+
for item in ls:
|
4113
|
+
if kind in fd:# only check folders
|
4114
|
+
continue
|
3752
4115
|
if contains and not re.search(contains, item):
|
3753
4116
|
continue
|
3754
4117
|
item_path = os.path.join(dirpath, item)
|
@@ -3760,13 +4123,11 @@ def listdir(
|
|
3760
4123
|
continue
|
3761
4124
|
filename, file_extension = os.path.splitext(item)
|
3762
4125
|
if kind is not None:
|
3763
|
-
if not kind.startswith("."):
|
3764
|
-
kind = "." + kind
|
3765
4126
|
is_folder = kind.lower() in fd and os.path.isdir(item_path)
|
3766
4127
|
is_file = kind.lower() in file_extension.lower() and (
|
3767
4128
|
os.path.isfile(item_path)
|
3768
4129
|
)
|
3769
|
-
if kind in [".doc", ".img", ".zip"]: # 选择大的类别
|
4130
|
+
if kind in [".doc", ".img", ".zip",".code",".file",".image",".video",".audio"]: # 选择大的类别
|
3770
4131
|
if kind != ".folder" and not isa(item_path, kind):
|
3771
4132
|
continue
|
3772
4133
|
elif kind in [".all"]:
|
@@ -3780,15 +4141,15 @@ def listdir(
|
|
3780
4141
|
f["length"].append(len(filename))
|
3781
4142
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
3782
4143
|
f['basename'].append(os.path.basename(item_path))
|
3783
|
-
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
4144
|
+
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
3784
4145
|
f["created_time"].append(
|
3785
|
-
pd.to_datetime(os.path.getctime(item_path), unit="s")
|
4146
|
+
pd.to_datetime(int(os.path.getctime(item_path)), unit="s")
|
3786
4147
|
)
|
3787
4148
|
f["modified_time"].append(
|
3788
|
-
pd.to_datetime(os.path.getmtime(item_path), unit="s")
|
4149
|
+
pd.to_datetime(int(os.path.getmtime(item_path)), unit="s")
|
3789
4150
|
)
|
3790
4151
|
f["last_open_time"].append(
|
3791
|
-
pd.to_datetime(os.path.getatime(item_path), unit="s")
|
4152
|
+
pd.to_datetime(int(os.path.getatime(item_path)), unit="s")
|
3792
4153
|
)
|
3793
4154
|
f["permission"].append(stat.filemode(stats_file.st_mode)),
|
3794
4155
|
f["owner"].append(os.getlogin() if platform.system() != "Windows" else "N/A"),
|
@@ -3799,13 +4160,13 @@ def listdir(
|
|
3799
4160
|
|
3800
4161
|
f["num"] = i
|
3801
4162
|
f["os"] = get_os() # os.uname().machine
|
3802
|
-
if not booster: # go deeper subfolders
|
3803
|
-
|
4163
|
+
# if not booster: # go deeper subfolders
|
4164
|
+
# break
|
3804
4165
|
#* convert to pd.DataFrame
|
3805
4166
|
f = pd.DataFrame(f)
|
3806
4167
|
f=f[["basename","name","kind","length","size","num","path","created_time",
|
3807
4168
|
"modified_time","last_open_time","rootdir",
|
3808
|
-
"
|
4169
|
+
"permission","owner","os","fname","fpath",]]
|
3809
4170
|
if "nam" in sort_by.lower():
|
3810
4171
|
f = sort_kind(f, by="name", ascending=ascending)
|
3811
4172
|
elif "crea" in sort_by.lower():
|
@@ -4173,39 +4534,233 @@ def is_num(s):
|
|
4173
4534
|
def isnum(s):
|
4174
4535
|
return is_num(s)
|
4175
4536
|
|
4176
|
-
|
4177
4537
|
def is_image(fpath):
|
4538
|
+
"""
|
4539
|
+
Determine if a given file is an image based on MIME type and file extension.
|
4540
|
+
|
4541
|
+
Args:
|
4542
|
+
fpath (str): Path to the file.
|
4543
|
+
|
4544
|
+
Returns:
|
4545
|
+
bool: True if the file is a recognized image, False otherwise.
|
4546
|
+
"""
|
4178
4547
|
import mimetypes
|
4548
|
+
# Known image MIME types
|
4549
|
+
image_mime_types = {
|
4550
|
+
"image/jpeg",
|
4551
|
+
"image/png",
|
4552
|
+
"image/gif",
|
4553
|
+
"image/bmp",
|
4554
|
+
"image/webp",
|
4555
|
+
"image/tiff",
|
4556
|
+
"image/x-icon",
|
4557
|
+
"image/svg+xml",
|
4558
|
+
"image/heic",
|
4559
|
+
"image/heif",
|
4560
|
+
}
|
4179
4561
|
|
4562
|
+
# Known image file extensions
|
4563
|
+
image_extensions = {
|
4564
|
+
".jpg", ".jpeg", ".png", ".gif", ".bmp", ".webp", ".tif", ".tiff",
|
4565
|
+
".ico", ".svg", ".heic", ".heif",".fig",".jpg"
|
4566
|
+
}
|
4567
|
+
|
4568
|
+
# Get MIME type using mimetypes
|
4180
4569
|
mime_type, _ = mimetypes.guess_type(fpath)
|
4181
|
-
|
4570
|
+
|
4571
|
+
# Check MIME type
|
4572
|
+
if mime_type in image_mime_types:
|
4573
|
+
return True
|
4574
|
+
|
4575
|
+
# Fallback: Check file extension
|
4576
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4577
|
+
if ext in image_extensions:
|
4578
|
+
return True
|
4579
|
+
|
4580
|
+
return False
|
4581
|
+
|
4582
|
+
def is_video(fpath):
|
4583
|
+
"""
|
4584
|
+
Determine if a given file is a video based on MIME type and file extension.
|
4585
|
+
|
4586
|
+
Args:
|
4587
|
+
fpath (str): Path to the file.
|
4588
|
+
|
4589
|
+
Returns:
|
4590
|
+
bool: True if the file is a recognized video, False otherwise.
|
4591
|
+
"""
|
4592
|
+
import mimetypes
|
4593
|
+
# Known video MIME types
|
4594
|
+
video_mime_types = {
|
4595
|
+
"video/mp4",
|
4596
|
+
"video/quicktime",
|
4597
|
+
"video/x-msvideo",
|
4598
|
+
"video/x-matroska",
|
4599
|
+
"video/x-flv",
|
4600
|
+
"video/webm",
|
4601
|
+
"video/ogg",
|
4602
|
+
"video/x-ms-wmv",
|
4603
|
+
"video/x-mpeg",
|
4604
|
+
"video/3gpp",
|
4605
|
+
"video/avi",
|
4606
|
+
"video/mpeg",
|
4607
|
+
"video/x-mpeg2",
|
4608
|
+
"video/x-ms-asf",
|
4609
|
+
}
|
4610
|
+
|
4611
|
+
# Known video file extensions
|
4612
|
+
video_extensions = {
|
4613
|
+
".mp4", ".mov", ".avi", ".mkv", ".flv", ".webm", ".ogv", ".wmv",
|
4614
|
+
".mpg", ".mpeg", ".3gp", ".mpeg2", ".asf", ".ts", ".m4v", ".divx",
|
4615
|
+
}
|
4616
|
+
|
4617
|
+
# Get MIME type using mimetypes
|
4618
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
4619
|
+
|
4620
|
+
# Check MIME type
|
4621
|
+
if mime_type in video_mime_types:
|
4622
|
+
return True
|
4623
|
+
|
4624
|
+
# Fallback: Check file extension
|
4625
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4626
|
+
if ext in video_extensions:
|
4182
4627
|
return True
|
4183
|
-
else:
|
4184
|
-
return False
|
4185
4628
|
|
4629
|
+
return False
|
4186
4630
|
|
4187
4631
|
def is_document(fpath):
|
4632
|
+
"""
|
4633
|
+
Determine if a given file is a document based on MIME type and file extension.
|
4634
|
+
|
4635
|
+
Args:
|
4636
|
+
fpath (str): Path to the file.
|
4637
|
+
|
4638
|
+
Returns:
|
4639
|
+
bool: True if the file is a recognized document, False otherwise.
|
4640
|
+
"""
|
4188
4641
|
import mimetypes
|
4642
|
+
# Define known MIME types for documents
|
4643
|
+
document_mime_types = {
|
4644
|
+
"text/",
|
4645
|
+
"application/pdf",
|
4646
|
+
"application/msword",
|
4647
|
+
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
4648
|
+
"application/vnd.ms-excel",
|
4649
|
+
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
4650
|
+
"application/vnd.ms-powerpoint",
|
4651
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
4652
|
+
"application/rtf",
|
4653
|
+
"application/x-latex",
|
4654
|
+
"application/vnd.oasis.opendocument.text",
|
4655
|
+
"application/vnd.oasis.opendocument.spreadsheet",
|
4656
|
+
"application/vnd.oasis.opendocument.presentation",
|
4657
|
+
}
|
4658
|
+
|
4659
|
+
# Define extensions for fallback
|
4660
|
+
document_extensions = {
|
4661
|
+
".txt",
|
4662
|
+
".log",
|
4663
|
+
".csv",
|
4664
|
+
".json",
|
4665
|
+
".xml",
|
4666
|
+
".pdf",
|
4667
|
+
".doc",
|
4668
|
+
".docx",
|
4669
|
+
".xls",
|
4670
|
+
".xlsx",
|
4671
|
+
".ppt",
|
4672
|
+
".pptx",
|
4673
|
+
".odt",
|
4674
|
+
".ods",
|
4675
|
+
".odp",
|
4676
|
+
".rtf",
|
4677
|
+
".tex",
|
4678
|
+
}
|
4189
4679
|
|
4680
|
+
# Get MIME type
|
4190
4681
|
mime_type, _ = mimetypes.guess_type(fpath)
|
4191
|
-
|
4192
|
-
|
4193
|
-
|
4194
|
-
|
4195
|
-
|
4196
|
-
|
4197
|
-
|
4198
|
-
|
4199
|
-
|
4200
|
-
|
4201
|
-
|
4202
|
-
|
4203
|
-
|
4682
|
+
|
4683
|
+
# Check MIME type
|
4684
|
+
if mime_type and any(mime_type.startswith(doc_type) for doc_type in document_mime_types):
|
4685
|
+
return True
|
4686
|
+
|
4687
|
+
# Fallback: Check file extension
|
4688
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the extension, ensure it's lowercase
|
4689
|
+
if ext in document_extensions:
|
4690
|
+
return True
|
4691
|
+
|
4692
|
+
return False
|
4693
|
+
|
4694
|
+
def is_audio(fpath):
|
4695
|
+
"""
|
4696
|
+
Determine if a given file is an audio file based on MIME type and file extension.
|
4697
|
+
|
4698
|
+
Args:
|
4699
|
+
fpath (str): Path to the file.
|
4700
|
+
|
4701
|
+
Returns:
|
4702
|
+
bool: True if the file is a recognized audio file, False otherwise.
|
4703
|
+
"""
|
4704
|
+
import mimetypes
|
4705
|
+
# Known audio MIME types
|
4706
|
+
audio_mime_types = {
|
4707
|
+
"audio/mpeg",
|
4708
|
+
"audio/wav",
|
4709
|
+
"audio/ogg",
|
4710
|
+
"audio/aac",
|
4711
|
+
"audio/flac",
|
4712
|
+
"audio/midi",
|
4713
|
+
"audio/x-midi",
|
4714
|
+
"audio/x-wav",
|
4715
|
+
"audio/x-flac",
|
4716
|
+
"audio/pcm",
|
4717
|
+
"audio/x-aiff",
|
4718
|
+
"audio/x-m4a",
|
4719
|
+
}
|
4720
|
+
|
4721
|
+
# Known audio file extensions
|
4722
|
+
audio_extensions = {
|
4723
|
+
".mp3", ".wav", ".ogg", ".aac", ".flac", ".midi", ".m4a",
|
4724
|
+
".aiff", ".pcm", ".wma", ".ape", ".alac", ".opus",
|
4725
|
+
}
|
4726
|
+
|
4727
|
+
# Get MIME type using mimetypes
|
4728
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
4729
|
+
|
4730
|
+
# Check MIME type
|
4731
|
+
if mime_type in audio_mime_types:
|
4732
|
+
return True
|
4733
|
+
|
4734
|
+
# Fallback: Check file extension
|
4735
|
+
ext = os.path.splitext(fpath)[-1].lower() # Get the file extension and ensure lowercase
|
4736
|
+
if ext in audio_extensions:
|
4204
4737
|
return True
|
4205
|
-
else:
|
4206
|
-
return False
|
4207
4738
|
|
4739
|
+
return False
|
4208
4740
|
|
4741
|
+
def is_code(fpath):
|
4742
|
+
"""
|
4743
|
+
Determine if a given file is a code file based on file extension and optionally MIME type.
|
4744
|
+
|
4745
|
+
Args:
|
4746
|
+
fpath (str): Path to the file.
|
4747
|
+
check_mime (bool): Whether to perform a MIME type check in addition to file extension check.
|
4748
|
+
|
4749
|
+
Returns:
|
4750
|
+
bool: True if the file is a recognized code file, False otherwise.
|
4751
|
+
"""
|
4752
|
+
# Known programming and scripting file extensions
|
4753
|
+
code_extensions = {
|
4754
|
+
".m", ".py", ".ipynb", ".js", ".html", ".css", ".java", ".cpp", ".h", ".cs", ".go",
|
4755
|
+
".rs", ".sh", ".rb", ".swift", ".ts", ".json", ".xml", ".yaml", ".toml", ".bash", ".r"
|
4756
|
+
}
|
4757
|
+
|
4758
|
+
# Check file extension
|
4759
|
+
ext = os.path.splitext(fpath)[-1].lower()
|
4760
|
+
if ext in code_extensions:
|
4761
|
+
return True
|
4762
|
+
return False
|
4763
|
+
|
4209
4764
|
def is_zip(fpath):
|
4210
4765
|
import mimetypes
|
4211
4766
|
|
@@ -6190,12 +6745,12 @@ def df_astype(
|
|
6190
6745
|
|
6191
6746
|
|
6192
6747
|
# ! DataFrame
|
6193
|
-
def df_sort_values(
|
6748
|
+
def df_sort_values(data, column, by=None, ascending=True, inplace=True, **kwargs):
|
6194
6749
|
"""
|
6195
6750
|
Sort a DataFrame by a specified column based on a custom order or by count.
|
6196
6751
|
|
6197
6752
|
Parameters:
|
6198
|
-
-
|
6753
|
+
- data: DataFrame to be sorted.
|
6199
6754
|
- column: The name of the column to sort by.
|
6200
6755
|
- by: List specifying the custom order for sorting or 'count' to sort by frequency.
|
6201
6756
|
- ascending: Boolean or list of booleans, default True.
|
@@ -6211,7 +6766,7 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6211
6766
|
|
6212
6767
|
if isinstance(by, str) and "count" in by.lower():
|
6213
6768
|
# Count occurrences of each value in the specified column
|
6214
|
-
value_counts =
|
6769
|
+
value_counts = data[column].value_counts()
|
6215
6770
|
|
6216
6771
|
# Determine the order based on counts
|
6217
6772
|
count_ascending = kwargs.pop("count_ascending", ascending)
|
@@ -6220,12 +6775,12 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6220
6775
|
).index.tolist()
|
6221
6776
|
|
6222
6777
|
# Convert to a categorical type with the new order
|
6223
|
-
|
6778
|
+
data[column] = pd.Categorical(data[column], categories=sorted_counts, ordered=True)
|
6224
6779
|
# Set ascending to count_ascending for sorting
|
6225
6780
|
ascending = count_ascending # Adjust ascending for the final sort
|
6226
6781
|
elif isinstance(by, list):
|
6227
6782
|
# Convert the specified column to a categorical type with the custom order
|
6228
|
-
|
6783
|
+
data[column] = pd.Categorical(data[column], categories=by, ordered=True)
|
6229
6784
|
else:
|
6230
6785
|
raise ValueError("Custom order must be a list or 'count'.")
|
6231
6786
|
|
@@ -6240,7 +6795,7 @@ def df_sort_values(df, column, by=None, ascending=True, inplace=True, **kwargs):
|
|
6240
6795
|
return sorted_df
|
6241
6796
|
except Exception as e:
|
6242
6797
|
print(f"Error sorting DataFrame by '{column}': {e}")
|
6243
|
-
return
|
6798
|
+
return data
|
6244
6799
|
|
6245
6800
|
|
6246
6801
|
# # Example usage:
|
@@ -7742,7 +8297,7 @@ def df_reducer(
|
|
7742
8297
|
# example:
|
7743
8298
|
# df_reducer(data=data_log, columns=markers, n_components=2)
|
7744
8299
|
|
7745
|
-
def
|
8300
|
+
def get_df_format(data, threshold_unique=0.5, verbose=False):
|
7746
8301
|
"""
|
7747
8302
|
检测表格: long, wide or uncertain.
|
7748
8303
|
|
@@ -7834,13 +8389,16 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7834
8389
|
# Step 5: Clustering analysis on numerical columns for correlation in wide format
|
7835
8390
|
numeric_cols = data.select_dtypes(include="number").columns
|
7836
8391
|
if len(numeric_cols) > 1:
|
7837
|
-
|
7838
|
-
|
7839
|
-
|
7840
|
-
|
7841
|
-
|
7842
|
-
|
7843
|
-
|
8392
|
+
try:
|
8393
|
+
scaled_data = StandardScaler().fit_transform(data[numeric_cols].dropna())
|
8394
|
+
clustering = AgglomerativeClustering(n_clusters=2).fit(scaled_data.T)
|
8395
|
+
cluster_labels = pd.Series(clustering.labels_)
|
8396
|
+
if cluster_labels.nunique() < len(numeric_cols) * 0.5:
|
8397
|
+
wide_score += 2
|
8398
|
+
if verbose:
|
8399
|
+
print("Clustering on columns shows grouping, suggesting wide format.")
|
8400
|
+
except Exception as e:
|
8401
|
+
print(e) if verbose else None
|
7844
8402
|
|
7845
8403
|
# Step 6: Inter-column correlation analysis
|
7846
8404
|
if len(numeric_cols) > 1:
|
@@ -7868,11 +8426,14 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7868
8426
|
|
7869
8427
|
# Step 8: Multi-level clustering on rows to detect block structure for wide format
|
7870
8428
|
if len(numeric_cols) > 1 and n_rows > 5:
|
7871
|
-
|
7872
|
-
|
7873
|
-
|
7874
|
-
|
7875
|
-
|
8429
|
+
try:
|
8430
|
+
clustering_rows = AgglomerativeClustering(n_clusters=2).fit(scaled_data)
|
8431
|
+
if pd.Series(clustering_rows.labels_).nunique() < 2:
|
8432
|
+
wide_score += 2
|
8433
|
+
if verbose:
|
8434
|
+
print("Row clustering reveals homogeneity, suggesting wide format.")
|
8435
|
+
except Exception as e:
|
8436
|
+
print(e) if verbose else None
|
7876
8437
|
|
7877
8438
|
# Step 9: Sequential name detection for time-series pattern in wide format
|
7878
8439
|
if any(col.isdigit() or col.startswith("T") for col in col_names):
|
@@ -7881,15 +8442,18 @@ def df_format(data, threshold_unique=0.5, verbose=False):
|
|
7881
8442
|
print("Detected time-like sequential column names, supporting wide format.")
|
7882
8443
|
|
7883
8444
|
# Step 10: Entropy of numeric columns
|
7884
|
-
|
7885
|
-
|
7886
|
-
|
7887
|
-
|
7888
|
-
|
7889
|
-
|
7890
|
-
|
7891
|
-
|
7892
|
-
|
8445
|
+
try:
|
8446
|
+
numeric_entropy = data[numeric_cols].apply(
|
8447
|
+
lambda x: entropy(pd.cut(x, bins=10).value_counts(normalize=True))
|
8448
|
+
)
|
8449
|
+
if numeric_entropy.mean() < 2:
|
8450
|
+
wide_score += 2
|
8451
|
+
if verbose:
|
8452
|
+
print(
|
8453
|
+
"Low entropy in numeric columns indicates stability across columns, supporting wide format."
|
8454
|
+
)
|
8455
|
+
except Exception as e:
|
8456
|
+
print(e) if verbose else None
|
7893
8457
|
|
7894
8458
|
# Step 11: Tie-breaking strategy if scores are equal
|
7895
8459
|
if wide_score == long_score:
|
@@ -8905,3 +9469,286 @@ def get_phone(phone_number: str, region: str = None,verbose=True):
|
|
8905
9469
|
if verbose:
|
8906
9470
|
preview(res)
|
8907
9471
|
return res
|
9472
|
+
|
9473
|
+
|
9474
|
+
def decode_pluscode(
|
9475
|
+
pluscode: str, reference: tuple = (52.5200, 13.4050), return_bbox: bool = False
|
9476
|
+
):
|
9477
|
+
"""
|
9478
|
+
Decodes a Plus Code into latitude and longitude (and optionally returns a bounding box).
|
9479
|
+
|
9480
|
+
Parameters:
|
9481
|
+
pluscode (str): The Plus Code to decode. Can be full or short.
|
9482
|
+
reference (tuple, optional): Reference latitude and longitude for decoding short Plus Codes.
|
9483
|
+
Default is None, required if Plus Code is short.
|
9484
|
+
return_bbox (bool): If True, returns the bounding box coordinates (latitude/longitude bounds).
|
9485
|
+
Default is False.
|
9486
|
+
|
9487
|
+
Returns:
|
9488
|
+
tuple: (latitude, longitude) if `return_bbox` is False.
|
9489
|
+
(latitude, longitude, bbox) if `return_bbox` is True.
|
9490
|
+
bbox = (latitudeLo, latitudeHi, longitudeLo, longitudeHi)
|
9491
|
+
Raises:
|
9492
|
+
ValueError: If the Plus Code is invalid or reference is missing for a short code.
|
9493
|
+
|
9494
|
+
Usage:
|
9495
|
+
lat, lon = decode_pluscode("7FG6+89")
|
9496
|
+
print(f"Decoded Short Plus Code: Latitude: {lat}, Longitude: {lon}, Bounding Box: {bbox}")
|
9497
|
+
|
9498
|
+
lat, lon = decode_pluscode("9F4M7FG6+89")
|
9499
|
+
print(f"Decoded Full Plus Code: Latitude: {lat}, Longitude: {lon}")
|
9500
|
+
"""
|
9501
|
+
from openlocationcode import openlocationcode as olc
|
9502
|
+
|
9503
|
+
# Validate Plus Code
|
9504
|
+
if not olc.isValid(pluscode):
|
9505
|
+
raise ValueError(f"Invalid Plus Code: {pluscode}")
|
9506
|
+
|
9507
|
+
# Handle Short Plus Codes
|
9508
|
+
if olc.isShort(pluscode):
|
9509
|
+
if reference is None:
|
9510
|
+
raise ValueError(
|
9511
|
+
"Reference location (latitude, longitude) is required for decoding short Plus Codes."
|
9512
|
+
)
|
9513
|
+
# Recover the full Plus Code using the reference location
|
9514
|
+
pluscode = olc.recoverNearest(pluscode, reference[0], reference[1])
|
9515
|
+
|
9516
|
+
# Decode the Plus Code
|
9517
|
+
decoded = olc.decode(pluscode)
|
9518
|
+
|
9519
|
+
# Calculate the center point of the bounding box
|
9520
|
+
latitude = (decoded.latitudeLo + decoded.latitudeHi) / 2
|
9521
|
+
longitude = (decoded.longitudeLo + decoded.longitudeHi) / 2
|
9522
|
+
|
9523
|
+
if return_bbox:
|
9524
|
+
bbox = (
|
9525
|
+
decoded.latitudeLo,
|
9526
|
+
decoded.latitudeHi,
|
9527
|
+
decoded.longitudeLo,
|
9528
|
+
decoded.longitudeHi,
|
9529
|
+
)
|
9530
|
+
return latitude, longitude, bbox
|
9531
|
+
|
9532
|
+
return latitude, longitude
|
9533
|
+
|
9534
|
+
def get_loc(input_data, user_agent="0413@mygmail.com)",verbose=True):
|
9535
|
+
"""
|
9536
|
+
Determine if the input is a city name, lat/lon, or DMS and perform geocoding or reverse geocoding.
|
9537
|
+
Usage:
|
9538
|
+
get_loc("Berlin, Germany") # Example city
|
9539
|
+
# get_loc((48.8566, 2.3522)) # Example latitude and longitude
|
9540
|
+
# get_loc("48 51 24.3 N") # Example DMS input
|
9541
|
+
"""
|
9542
|
+
from geopy.geocoders import Nominatim
|
9543
|
+
import re
|
9544
|
+
|
9545
|
+
def dms_to_decimal(dms):
|
9546
|
+
"""
|
9547
|
+
Convert DMS (Degrees, Minutes, Seconds) to Decimal format.
|
9548
|
+
Input should be in the format of "DD MM SS" or "D M S".
|
9549
|
+
"""
|
9550
|
+
# Regex pattern for DMS input
|
9551
|
+
pattern = r"(\d{1,3})[^\d]*?(\d{1,2})[^\d]*?(\d{1,2})"
|
9552
|
+
match = re.match(pattern, dms)
|
9553
|
+
|
9554
|
+
if match:
|
9555
|
+
degrees, minutes, seconds = map(float, match.groups())
|
9556
|
+
decimal = degrees + (minutes / 60) + (seconds / 3600)
|
9557
|
+
return decimal
|
9558
|
+
else:
|
9559
|
+
raise ValueError("Invalid DMS format")
|
9560
|
+
|
9561
|
+
geolocator = Nominatim(user_agent="0413@mygmail.com)")
|
9562
|
+
# Case 1: Input is a city name (string)
|
9563
|
+
if isinstance(input_data, str) and not re.match(r"^\d+(\.\d+)?$", input_data):
|
9564
|
+
location = geolocator.geocode(input_data)
|
9565
|
+
if verbose:
|
9566
|
+
print(
|
9567
|
+
f"Latitude and Longitude for {input_data}: {location.latitude}, {location.longitude}"
|
9568
|
+
)
|
9569
|
+
else:
|
9570
|
+
print(f"Could not find {input_data}.")
|
9571
|
+
return location
|
9572
|
+
|
9573
|
+
# Case 2: Input is latitude and longitude (float or tuple)
|
9574
|
+
elif isinstance(input_data, (float, tuple)):
|
9575
|
+
if isinstance(input_data, tuple) and len(input_data) == 2:
|
9576
|
+
latitude, longitude = input_data
|
9577
|
+
elif isinstance(input_data, float):
|
9578
|
+
latitude = input_data
|
9579
|
+
longitude = None # No longitude provided for a single float
|
9580
|
+
|
9581
|
+
# Reverse geocoding
|
9582
|
+
location_reversed = geolocator.reverse(
|
9583
|
+
(latitude, longitude) if longitude else latitude
|
9584
|
+
)
|
9585
|
+
if verbose:
|
9586
|
+
print(
|
9587
|
+
f"Address from coordinates ({latitude}, {longitude if longitude else ''}): {location_reversed.address}"
|
9588
|
+
)
|
9589
|
+
else:
|
9590
|
+
print("Could not reverse geocode the coordinates.")
|
9591
|
+
return location_reversed
|
9592
|
+
|
9593
|
+
# Case 3: Input is a DMS string
|
9594
|
+
elif isinstance(input_data, str):
|
9595
|
+
try:
|
9596
|
+
decimal_lat = dms_to_decimal(input_data)
|
9597
|
+
print(f"Converted DMS to decimal latitude: {decimal_lat}")
|
9598
|
+
|
9599
|
+
location_reversed = geolocator.reverse(decimal_lat)
|
9600
|
+
if verbose:
|
9601
|
+
print(f"Address from coordinates: {location_reversed.address}")
|
9602
|
+
else:
|
9603
|
+
print("Could not reverse geocode the coordinates.")
|
9604
|
+
return location_reversed
|
9605
|
+
except ValueError:
|
9606
|
+
print(
|
9607
|
+
"Invalid input format. Please provide a city name, latitude/longitude, or DMS string."
|
9608
|
+
)
|
9609
|
+
|
9610
|
+
def enpass(code: str, method: str="AES", key: str = None):
|
9611
|
+
"""
|
9612
|
+
usage: enpass("admin")
|
9613
|
+
Master encryption function that supports multiple methods: AES, RSA, and SHA256.
|
9614
|
+
:param code: The input data to encrypt or hash.
|
9615
|
+
:param method: The encryption or hashing method ('AES', 'RSA', or 'SHA256').
|
9616
|
+
:param key: The key to use for encryption. For AES and RSA, it can be a password or key in PEM format.
|
9617
|
+
:return: The encrypted data or hashed value.
|
9618
|
+
"""
|
9619
|
+
import hashlib
|
9620
|
+
# AES Encryption (Advanced)
|
9621
|
+
def aes_encrypt(data: str, key: str):
|
9622
|
+
"""
|
9623
|
+
Encrypts data using AES algorithm in CBC mode.
|
9624
|
+
:param data: The data to encrypt.
|
9625
|
+
:param key: The key to use for AES encryption.
|
9626
|
+
:return: The encrypted data, base64 encoded.
|
9627
|
+
"""
|
9628
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
9629
|
+
from cryptography.hazmat.backends import default_backend
|
9630
|
+
from cryptography.hazmat.primitives import padding
|
9631
|
+
import base64
|
9632
|
+
import os
|
9633
|
+
# Generate a 256-bit key from the provided password
|
9634
|
+
key = hashlib.sha256(key.encode()).digest()
|
9635
|
+
|
9636
|
+
# Generate a random initialization vector (IV)
|
9637
|
+
iv = os.urandom(16) # 16 bytes for AES block size
|
9638
|
+
|
9639
|
+
# Pad the data to be a multiple of 16 bytes using PKCS7
|
9640
|
+
padder = padding.PKCS7(128).padder()
|
9641
|
+
padded_data = padder.update(data.encode()) + padder.finalize()
|
9642
|
+
|
9643
|
+
# Create AES cipher object using CBC mode
|
9644
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
|
9645
|
+
encryptor = cipher.encryptor()
|
9646
|
+
encrypted_data = encryptor.update(padded_data) + encryptor.finalize()
|
9647
|
+
|
9648
|
+
# Return the base64 encoded result (IV + encrypted data)
|
9649
|
+
return base64.b64encode(iv + encrypted_data).decode()
|
9650
|
+
|
9651
|
+
# RSA Encryption (Advanced)
|
9652
|
+
def rsa_encrypt(data: str, public_key: str):
|
9653
|
+
"""
|
9654
|
+
Encrypts data using RSA encryption with OAEP padding.
|
9655
|
+
:param data: The data to encrypt.
|
9656
|
+
:param public_key: The public key in PEM format.
|
9657
|
+
:return: The encrypted data, base64 encoded.
|
9658
|
+
"""
|
9659
|
+
import base64
|
9660
|
+
from Crypto.PublicKey import RSA
|
9661
|
+
from Crypto.Cipher import PKCS1_OAEP
|
9662
|
+
public_key_obj = RSA.import_key(public_key)
|
9663
|
+
cipher_rsa = PKCS1_OAEP.new(public_key_obj)
|
9664
|
+
encrypted_data = cipher_rsa.encrypt(data.encode())
|
9665
|
+
return base64.b64encode(encrypted_data).decode()
|
9666
|
+
# SHA256 Hashing (Non-reversible)
|
9667
|
+
def sha256_hash(data: str):
|
9668
|
+
"""
|
9669
|
+
Generates a SHA256 hash of the data.
|
9670
|
+
:param data: The data to hash.
|
9671
|
+
:return: The hashed value (hex string).
|
9672
|
+
"""
|
9673
|
+
return hashlib.sha256(data.encode()).hexdigest()
|
9674
|
+
if key is None:
|
9675
|
+
key="worldpeace"
|
9676
|
+
method=strcmp(method,["AES","RSA",'SHA256'])[0]
|
9677
|
+
if method == "AES":
|
9678
|
+
return aes_encrypt(code, key)
|
9679
|
+
elif method == "RSA":
|
9680
|
+
return rsa_encrypt(code, key)
|
9681
|
+
elif method == "SHA256":
|
9682
|
+
return sha256_hash(code)
|
9683
|
+
else:
|
9684
|
+
raise ValueError("Unsupported encryption method")
|
9685
|
+
|
9686
|
+
|
9687
|
+
# Master Decryption Function (Supports AES, RSA)
|
9688
|
+
def depass(encrypted_code: str, method: str='AES', key: str = None):
|
9689
|
+
"""
|
9690
|
+
Master decryption function that supports multiple methods: AES and RSA.
|
9691
|
+
:param encrypted_code: The encrypted data to decrypt.
|
9692
|
+
:param method: The encryption method ('AES' or 'RSA').
|
9693
|
+
:param key: The key to use for decryption. For AES and RSA, it can be a password or key in PEM format.
|
9694
|
+
:return: The decrypted data.
|
9695
|
+
"""
|
9696
|
+
import hashlib
|
9697
|
+
def aes_decrypt(encrypted_data: str, key: str):
|
9698
|
+
"""
|
9699
|
+
Decrypts data encrypted using AES in CBC mode.
|
9700
|
+
:param encrypted_data: The encrypted data, base64 encoded.
|
9701
|
+
:param key: The key to use for AES decryption.
|
9702
|
+
:return: The decrypted data (string).
|
9703
|
+
"""
|
9704
|
+
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
|
9705
|
+
from cryptography.hazmat.backends import default_backend
|
9706
|
+
from cryptography.hazmat.primitives import padding
|
9707
|
+
import base64
|
9708
|
+
# Generate the same 256-bit key from the password
|
9709
|
+
key = hashlib.sha256(key.encode()).digest()
|
9710
|
+
|
9711
|
+
# Decode the encrypted data from base64
|
9712
|
+
encrypted_data = base64.b64decode(encrypted_data)
|
9713
|
+
|
9714
|
+
# Extract the IV and the actual encrypted data
|
9715
|
+
iv = encrypted_data[:16] # First 16 bytes are the IV
|
9716
|
+
encrypted_data = encrypted_data[16:] # Remaining data is the encrypted message
|
9717
|
+
|
9718
|
+
# Create AES cipher object using CBC mode
|
9719
|
+
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=default_backend())
|
9720
|
+
decryptor = cipher.decryptor()
|
9721
|
+
decrypted_data = decryptor.update(encrypted_data) + decryptor.finalize()
|
9722
|
+
|
9723
|
+
# Unpad the decrypted data using PKCS7
|
9724
|
+
unpadder = padding.PKCS7(128).unpadder()
|
9725
|
+
unpadded_data = unpadder.update(decrypted_data) + unpadder.finalize()
|
9726
|
+
|
9727
|
+
return unpadded_data.decode()
|
9728
|
+
def rsa_decrypt(encrypted_data: str, private_key: str):
|
9729
|
+
"""
|
9730
|
+
Decrypts RSA-encrypted data using the private key.
|
9731
|
+
:param encrypted_data: The encrypted data, base64 encoded.
|
9732
|
+
:param private_key: The private key in PEM format.
|
9733
|
+
:return: The decrypted data (string).
|
9734
|
+
"""
|
9735
|
+
from Crypto.PublicKey import RSA
|
9736
|
+
from Crypto.Cipher import PKCS1_OAEP
|
9737
|
+
import base64
|
9738
|
+
encrypted_data = base64.b64decode(encrypted_data)
|
9739
|
+
private_key_obj = RSA.import_key(private_key)
|
9740
|
+
cipher_rsa = PKCS1_OAEP.new(private_key_obj)
|
9741
|
+
decrypted_data = cipher_rsa.decrypt(encrypted_data)
|
9742
|
+
return decrypted_data.decode()
|
9743
|
+
|
9744
|
+
if key is None:
|
9745
|
+
key="worldpeace"
|
9746
|
+
method=strcmp(method,["AES","RSA",'SHA256'])[0]
|
9747
|
+
if method == "AES":
|
9748
|
+
return aes_decrypt(encrypted_code, key)
|
9749
|
+
elif method == "RSA":
|
9750
|
+
return rsa_decrypt(encrypted_code, key)
|
9751
|
+
elif method == "SHA256":
|
9752
|
+
raise ValueError("SHA256 is a hash function and cannot be decrypted.")
|
9753
|
+
else:
|
9754
|
+
raise ValueError("Unsupported decryption method")
|