py2ls 0.2.5.5__py3-none-any.whl → 0.2.5.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py
CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
|
|
3
3
|
import sys
|
4
4
|
import os
|
5
5
|
from IPython.display import display
|
6
|
-
from typing import List, Optional, Union
|
6
|
+
from typing import List, Optional, Union,Any
|
7
7
|
|
8
8
|
from regex import X
|
9
9
|
|
@@ -169,6 +169,11 @@ def run_every(when: str = None, job=None, wait: int = 60):
|
|
169
169
|
|
170
170
|
:param when: String specifying the interval, e.g. '2 minutes', '4 hours', '1 day'.
|
171
171
|
:param job: The function to be scheduled.
|
172
|
+
|
173
|
+
# usage:
|
174
|
+
def job():
|
175
|
+
print("1 sec")
|
176
|
+
run_every(when="1 sec", job=job)
|
172
177
|
"""
|
173
178
|
import schedule
|
174
179
|
import time
|
@@ -201,19 +206,17 @@ def run_every(when: str = None, job=None, wait: int = 60):
|
|
201
206
|
while True:
|
202
207
|
schedule.run_pending()
|
203
208
|
time.sleep(wait) # in seconds
|
204
|
-
time.sleep(wait) # in seconds
|
205
|
-
|
206
|
-
|
207
|
-
# # usage:
|
208
|
-
# def job():
|
209
|
-
# print("1 sec")
|
210
|
-
# run_every(when="1 sec", job=job)
|
211
|
-
|
212
|
-
|
209
|
+
time.sleep(wait) # in seconds
|
213
210
|
def run_at(when: str, job=None, wait: int = 60):
|
214
211
|
"""
|
215
212
|
Schedules a job to run at an exact time of the day.
|
216
213
|
|
214
|
+
# Example usage:
|
215
|
+
def my_job():
|
216
|
+
print("Job executed at the exact time!")
|
217
|
+
# Schedule the job at 14:30 when day
|
218
|
+
run_at(when="1.30 pm", job=my_job)
|
219
|
+
|
217
220
|
:param when: String specifying the time, e.g. '1:30 pm','1.30 am','14:30', '1:30 pm', '8:45 am'.
|
218
221
|
:param job: The function to be scheduled.
|
219
222
|
:param wait: The sleep interval between checks in seconds.
|
@@ -241,13 +244,10 @@ def run_at(when: str, job=None, wait: int = 60):
|
|
241
244
|
f"Invalid time format: {when}. Use 'HH:MM' (24-hour) or 'H:MM AM/PM' format."
|
242
245
|
)
|
243
246
|
return
|
244
|
-
|
245
247
|
print(f"Job scheduled to run at {scheduled_time}.")
|
246
|
-
|
247
248
|
# Keep checking the current time
|
248
249
|
while True:
|
249
250
|
now = datetime.now()
|
250
|
-
|
251
251
|
# Check if current time matches the scheduled time
|
252
252
|
if (
|
253
253
|
now.time().hour == scheduled_time.hour
|
@@ -259,14 +259,7 @@ def run_at(when: str, job=None, wait: int = 60):
|
|
259
259
|
) # Sleep for a minute to avoid running the job multiple times in the same minute
|
260
260
|
|
261
261
|
time.sleep(wait) # wait to avoid excessive CPU usage
|
262
|
-
|
263
|
-
|
264
|
-
# # Example usage:
|
265
|
-
# def my_job():
|
266
|
-
# print("Job executed at the exact time!")
|
267
|
-
# # Schedule the job at 14:30 when day
|
268
|
-
# run_at(when="1.30 pm", job=my_job)
|
269
|
-
|
262
|
+
|
270
263
|
# ************* above section: run_when *************
|
271
264
|
|
272
265
|
|
@@ -344,12 +337,7 @@ def get_version(pkg):
|
|
344
337
|
if isinstance(pkg, str):
|
345
338
|
get_v(pkg)
|
346
339
|
elif isinstance(pkg, list):
|
347
|
-
[get_v(pkg_) for pkg_ in pkg]
|
348
|
-
|
349
|
-
|
350
|
-
# usage:
|
351
|
-
# get_version(['pandas','numpy','py2ls'])
|
352
|
-
|
340
|
+
[get_v(pkg_) for pkg_ in pkg]
|
353
341
|
|
354
342
|
def rm_folder(folder_path, verbose=True):
|
355
343
|
import shutil
|
@@ -387,32 +375,11 @@ def fremove(path, verbose=True):
|
|
387
375
|
except Exception as e:
|
388
376
|
if verbose:
|
389
377
|
print(f"Failed to delete {path}. Reason: {e}")
|
390
|
-
|
391
|
-
|
392
|
-
# def get_cwd(verbose: bool = True):
|
393
|
-
# """
|
394
|
-
# get_cwd: to get the current working directory
|
395
|
-
# Args:
|
396
|
-
# verbose (bool, optional): to show which function is use. Defaults to True.
|
397
|
-
# """
|
398
|
-
# try:
|
399
|
-
# script_dir = os.path.dirname(os.path.abspath(__file__))
|
400
|
-
# if verbose:
|
401
|
-
# print("os.path.dirname(os.path.abspath(__file__)):", script_dir)
|
402
|
-
# except NameError:
|
403
|
-
# # This works in an interactive environment (like a Jupyter notebook)
|
404
|
-
# script_dir = os.getcwd()
|
405
|
-
# if verbose:
|
406
|
-
# print("os.getcwd():", script_dir)
|
407
|
-
# return script_dir
|
408
|
-
|
409
|
-
|
378
|
+
|
410
379
|
def get_cwd():
|
411
380
|
from pathlib import Path
|
412
|
-
|
413
381
|
# Get the current script's directory as a Path object
|
414
382
|
current_directory = Path(__file__).resolve().parent
|
415
|
-
|
416
383
|
return current_directory
|
417
384
|
|
418
385
|
|
@@ -530,12 +497,9 @@ def echo(*args, **kwargs):
|
|
530
497
|
|
531
498
|
def chat(*args, **kwargs):
|
532
499
|
return echo(*args, **kwargs)
|
533
|
-
|
534
|
-
|
535
500
|
def ai(*args, **kwargs):
|
536
501
|
return echo(*args, **kwargs)
|
537
502
|
|
538
|
-
|
539
503
|
def detect_lang(text, output="lang", verbose=True):
|
540
504
|
from langdetect import detect
|
541
505
|
|
@@ -565,10 +529,6 @@ def is_text(s):
|
|
565
529
|
# no_special = not re.search(r'[^A-Za-z0-9\s]', s)
|
566
530
|
return has_alpha and has_non_alpha
|
567
531
|
|
568
|
-
|
569
|
-
from typing import Any, Union
|
570
|
-
|
571
|
-
|
572
532
|
def share(*args, strict=True, n_shared=2, verbose=True):
|
573
533
|
"""
|
574
534
|
check the shared elelements in two list.
|
@@ -1097,7 +1057,365 @@ def imgcmp(
|
|
1097
1057
|
else:
|
1098
1058
|
raise ValueError("Invalid method. Use 'ssim', 'match', or 'knn'.")
|
1099
1059
|
|
1060
|
+
def fcmp(file1, file2, kind= None, verbose=True, **kwargs):
|
1061
|
+
import pandas as pd
|
1062
|
+
import os
|
1063
|
+
from concurrent.futures import ThreadPoolExecutor
|
1064
|
+
from datetime import datetime
|
1065
|
+
import json
|
1066
|
+
|
1067
|
+
# --- Compare excel files ---
|
1068
|
+
def cmp_excel(
|
1069
|
+
file1,# base
|
1070
|
+
file2, # new
|
1071
|
+
sheet_name=None, # list or strings; default:"common" sheet
|
1072
|
+
key_columns=None,
|
1073
|
+
ignore_columns=None,
|
1074
|
+
numeric_tolerance=0,
|
1075
|
+
ignore_case=False,
|
1076
|
+
detect_reordered_rows=False,
|
1077
|
+
verbose=True,
|
1078
|
+
**kwargs,
|
1079
|
+
):
|
1080
|
+
"""
|
1081
|
+
Compare two Excel files and identify differences across specified sheets.
|
1082
|
+
|
1083
|
+
Parameters:
|
1084
|
+
- file1 (Base/Reference): str, path to the first Excel file.
|
1085
|
+
- file2: str, path to the second Excel file.
|
1086
|
+
- sheet_name: list of str, specific sheets to compare (default: all common sheets).
|
1087
|
+
- key_columns: list of str, columns to use as unique identifiers (default: None, compares all columns).
|
1088
|
+
- ignore_columns: list of str, columns to exclude from comparison (default: None).
|
1089
|
+
- numeric_tolerance: float, tolerance for numeric column differences (default: 0, exact match).
|
1090
|
+
- ignore_case: bool, whether to ignore case differences (default: False). # Changed here
|
1091
|
+
- detect_reordered_rows: bool, whether to detect reordered rows (default: False).
|
1092
|
+
- verbose: bool, whether to print progress messages (default: True).
|
1093
|
+
|
1094
|
+
Returns:
|
1095
|
+
- dict, summary of differences for each sheet.
|
1096
|
+
"""
|
1097
|
+
# Define output directory based on file1 basename
|
1098
|
+
file1_basename = os.path.splitext(os.path.basename(file1))[0]
|
1099
|
+
output_dir = f"CMP_{file1_basename}"
|
1100
|
+
if not os.path.exists(output_dir):
|
1101
|
+
os.makedirs(output_dir)
|
1102
|
+
|
1103
|
+
# Load both files into a dictionary of DataFrames
|
1104
|
+
xl1 = pd.ExcelFile(file1)
|
1105
|
+
xl2 = pd.ExcelFile(file2)
|
1106
|
+
|
1107
|
+
# Get the sheets to compare
|
1108
|
+
sheets1 = set(xl1.sheet_names)
|
1109
|
+
sheets2 = set(xl2.sheet_names)
|
1110
|
+
if sheet_name is None:
|
1111
|
+
sheet_name = list(sheets1 & sheets2) # Compare only common sheets
|
1112
|
+
else:
|
1113
|
+
sheet_name = [sheet for sheet in sheet_name if sheet in sheets1 and sheets2]
|
1114
|
+
|
1115
|
+
summary = {}
|
1116
|
+
print(f"Reference file: '{os.path.basename(file1)}'")
|
1117
|
+
def compare_sheet(sheet):
|
1118
|
+
|
1119
|
+
if verbose:
|
1120
|
+
print(f"Comparing sheet: {sheet}...")
|
1121
|
+
|
1122
|
+
# Read sheets as DataFrames
|
1123
|
+
df1 = xl1.parse(sheet).fillna("NA")
|
1124
|
+
df2 = xl2.parse(sheet).fillna("NA")
|
1125
|
+
|
1126
|
+
# Handle case insensitivity
|
1127
|
+
if ignore_case:
|
1128
|
+
df1.columns = [col.lower() for col in df1.columns]
|
1129
|
+
df2.columns = [col.lower() for col in df2.columns]
|
1130
|
+
df1 = df1.applymap(lambda x: x.lower() if isinstance(x, str) else x)
|
1131
|
+
df2 = df2.applymap(lambda x: x.lower() if isinstance(x, str) else x)
|
1132
|
+
|
1133
|
+
# Drop ignored columns
|
1134
|
+
if ignore_columns:
|
1135
|
+
df1 = df1.drop(
|
1136
|
+
columns=[col for col in ignore_columns if col in df1.columns],
|
1137
|
+
errors="ignore",
|
1138
|
+
)
|
1139
|
+
df2 = df2.drop(
|
1140
|
+
columns=[col for col in ignore_columns if col in df2.columns],
|
1141
|
+
errors="ignore",
|
1142
|
+
)
|
1143
|
+
|
1144
|
+
# Normalize column order for comparison
|
1145
|
+
common_cols = df1.columns.intersection(df2.columns)
|
1146
|
+
df1 = df1[common_cols]
|
1147
|
+
df2 = df2[common_cols]
|
1148
|
+
|
1149
|
+
# Specify key columns for comparison
|
1150
|
+
if key_columns:
|
1151
|
+
df1 = df1.set_index(key_columns)
|
1152
|
+
df2 = df2.set_index(key_columns)
|
1153
|
+
# Identify added and deleted rows based on entire row comparison, not just index
|
1154
|
+
added_rows = df2[~df2.apply(tuple, 1).isin(df1.apply(tuple, 1))]
|
1155
|
+
deleted_rows = df1[~df1.apply(tuple, 1).isin(df2.apply(tuple, 1))]
|
1156
|
+
|
1157
|
+
# Detect reordered rows
|
1158
|
+
reordered_rows = pd.DataFrame()
|
1159
|
+
if detect_reordered_rows:
|
1160
|
+
# Find rows that exist in both DataFrames but are in different positions
|
1161
|
+
for idx in df1.index:
|
1162
|
+
if idx in df2.index:
|
1163
|
+
if not df1.loc[idx].equals(df2.loc[idx]):
|
1164
|
+
reordered_rows = reordered_rows.append(df1.loc[idx])
|
1165
|
+
|
1166
|
+
# Detect modified rows (in case of exact matches between the two files)
|
1167
|
+
aligned_df1 = df1[df1.index.isin(df2.index)]
|
1168
|
+
aligned_df2 = df2[df2.index.isin(df1.index)]
|
1169
|
+
|
1170
|
+
if numeric_tolerance > 0:
|
1171
|
+
modified_rows = aligned_df1.compare(
|
1172
|
+
aligned_df2,
|
1173
|
+
keep_shape=False,
|
1174
|
+
keep_equal=False,
|
1175
|
+
result_names=["left", "right"],
|
1176
|
+
).pipe(
|
1177
|
+
lambda df: df[
|
1178
|
+
~df.apply(
|
1179
|
+
lambda row: (
|
1180
|
+
abs(row["left"] - row["right"]) <= numeric_tolerance
|
1181
|
+
if pd.api.types.is_numeric_dtype(row["left"])
|
1182
|
+
else False
|
1183
|
+
),
|
1184
|
+
axis=1,
|
1185
|
+
)
|
1186
|
+
]
|
1187
|
+
)
|
1188
|
+
else:
|
1189
|
+
modified_rows = aligned_df1.compare(
|
1190
|
+
aligned_df2, keep_shape=False, keep_equal=False
|
1191
|
+
)
|
1192
|
+
|
1193
|
+
# Save differences to Excel files
|
1194
|
+
sheet_dir = os.path.join(output_dir, sheet)
|
1195
|
+
os.makedirs(sheet_dir, exist_ok=True)
|
1196
|
+
added_path = os.path.join(sheet_dir, f"{sheet}_added.xlsx")
|
1197
|
+
deleted_path = os.path.join(sheet_dir, f"{sheet}_deleted.xlsx")
|
1198
|
+
modified_path = os.path.join(sheet_dir, f"{sheet}_modified.xlsx")
|
1199
|
+
reordered_path = os.path.join(sheet_dir, f"{sheet}_reordered.xlsx")
|
1200
|
+
|
1201
|
+
if not added_rows.empty:
|
1202
|
+
added_rows.to_excel(added_path)
|
1203
|
+
if not deleted_rows.empty:
|
1204
|
+
deleted_rows.to_excel(deleted_path)
|
1205
|
+
if not modified_rows.empty:
|
1206
|
+
modified_rows.to_excel(modified_path)
|
1207
|
+
if not reordered_rows.empty:
|
1208
|
+
reordered_rows.to_excel(reordered_path)
|
1209
|
+
|
1210
|
+
# Return the summary
|
1211
|
+
return {
|
1212
|
+
"added_rows": len(added_rows),
|
1213
|
+
"deleted_rows": len(deleted_rows),
|
1214
|
+
"modified_rows": len(modified_rows),
|
1215
|
+
"reordered_rows": len(reordered_rows),
|
1216
|
+
"added_file": added_path if not added_rows.empty else None,
|
1217
|
+
"deleted_file": deleted_path if not deleted_rows.empty else None,
|
1218
|
+
"modified_file": modified_path if not modified_rows.empty else None,
|
1219
|
+
"reordered_file": reordered_path if not reordered_rows.empty else None,
|
1220
|
+
}
|
1221
|
+
|
1222
|
+
# Use ThreadPoolExecutor for parallel processing
|
1223
|
+
with ThreadPoolExecutor() as executor:
|
1224
|
+
results = executor.map(compare_sheet, sheet_name)
|
1225
|
+
|
1226
|
+
# Collect results
|
1227
|
+
summary = {sheet: result for sheet, result in zip(sheet_name, results)}
|
1228
|
+
|
1229
|
+
# Save JSON log
|
1230
|
+
json_path = os.path.join(output_dir, "comparison_summary.json")
|
1231
|
+
if os.path.exists(json_path):
|
1232
|
+
with open(json_path, "r") as f:
|
1233
|
+
existing_data = json.load(f)
|
1234
|
+
else:
|
1235
|
+
existing_data = {}
|
1236
|
+
|
1237
|
+
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
1238
|
+
existing_data[timestamp] = summary
|
1239
|
+
# Sort the existing data by the timestamp in descending order (latest first)
|
1240
|
+
existing_data = dict(sorted(existing_data.items(), reverse=True))
|
1241
|
+
|
1242
|
+
with open(json_path, "w") as f:
|
1243
|
+
json.dump(existing_data, f, indent=4)
|
1244
|
+
if verbose:
|
1245
|
+
print(f"Comparison complete. Results saved in '{output_dir}'")
|
1246
|
+
|
1247
|
+
return summary
|
1248
|
+
|
1249
|
+
# --- Compare CSV files ---
|
1250
|
+
def cmp_csv(
|
1251
|
+
file1,
|
1252
|
+
file2,
|
1253
|
+
ignore_case=False,
|
1254
|
+
numeric_tolerance=0,
|
1255
|
+
ignore_columns=None,
|
1256
|
+
verbose=True,
|
1257
|
+
**kwargs,
|
1258
|
+
):
|
1259
|
+
import pandas as pd
|
1260
|
+
|
1261
|
+
# Load data and fill NaNs
|
1262
|
+
df1 = pd.read_csv(file1).fillna("NA")
|
1263
|
+
df2 = pd.read_csv(file2).fillna("NA")
|
1264
|
+
|
1265
|
+
# Standardize case if needed
|
1266
|
+
if ignore_case:
|
1267
|
+
df1.columns = df1.columns.str.lower()
|
1268
|
+
df2.columns = df2.columns.str.lower()
|
1269
|
+
df1 = df1.applymap(lambda x: x.lower() if isinstance(x, str) else x)
|
1270
|
+
df2 = df2.applymap(lambda x: x.lower() if isinstance(x, str) else x)
|
1271
|
+
|
1272
|
+
# Drop ignored columns
|
1273
|
+
if ignore_columns:
|
1274
|
+
ignore_columns = [col.lower() if ignore_case else col for col in ignore_columns]
|
1275
|
+
df1.drop(columns=[col for col in ignore_columns if col in df1.columns], errors="ignore", inplace=True)
|
1276
|
+
df2.drop(columns=[col for col in ignore_columns if col in df2.columns], errors="ignore", inplace=True)
|
1277
|
+
|
1278
|
+
# Reset index to ensure alignment
|
1279
|
+
df1.reset_index(drop=True, inplace=True)
|
1280
|
+
df2.reset_index(drop=True, inplace=True)
|
1281
|
+
|
1282
|
+
# Align DataFrames by columns
|
1283
|
+
df1, df2 = df1.align(df2, join="inner", axis=1)
|
1284
|
+
|
1285
|
+
# Compare rows
|
1286
|
+
added_rows = df2[~df2.apply(tuple, axis=1).isin(df1.apply(tuple, axis=1))]
|
1287
|
+
deleted_rows = df1[~df1.apply(tuple, axis=1).isin(df2.apply(tuple, axis=1))]
|
1288
|
+
|
1289
|
+
# Compare modified rows
|
1290
|
+
if numeric_tolerance > 0:
|
1291
|
+
def numeric_diff(row):
|
1292
|
+
if pd.api.types.is_numeric_dtype(row["left"]):
|
1293
|
+
return abs(row["left"] - row["right"]) > numeric_tolerance
|
1294
|
+
return row["left"] != row["right"]
|
1295
|
+
|
1296
|
+
modified_rows = df1.compare(df2, keep_shape=True, keep_equal=False)
|
1297
|
+
modified_rows = modified_rows[modified_rows.apply(numeric_diff, axis=1)]
|
1298
|
+
else:
|
1299
|
+
modified_rows = df1.compare(df2, keep_shape=True, keep_equal=False)
|
1300
|
+
|
1301
|
+
# Return results
|
1302
|
+
return {
|
1303
|
+
"added_rows": len(added_rows),
|
1304
|
+
"deleted_rows": len(deleted_rows),
|
1305
|
+
"modified_rows": len(modified_rows),
|
1306
|
+
"added_file": added_rows if not added_rows.empty else pd.DataFrame(),
|
1307
|
+
"deleted_file": deleted_rows if not deleted_rows.empty else pd.DataFrame(),
|
1308
|
+
"modified_file": modified_rows if not modified_rows.empty else pd.DataFrame(),
|
1309
|
+
}
|
1310
|
+
|
1311
|
+
# --- Compare JSON files ---
|
1312
|
+
def cmp_json(
|
1313
|
+
file1, file2, ignore_case=False, numeric_tolerance=0, verbose=True, **kwargs
|
1314
|
+
):
|
1315
|
+
import json
|
1316
|
+
|
1317
|
+
with open(file1, "r") as f1:
|
1318
|
+
json1 = json.load(f1)
|
1319
|
+
with open(file2, "r") as f2:
|
1320
|
+
json2 = json.load(f2)
|
1321
|
+
|
1322
|
+
# Normalize case and compare JSONs
|
1323
|
+
if ignore_case:
|
1324
|
+
|
1325
|
+
def normalize(obj):
|
1326
|
+
if isinstance(obj, dict):
|
1327
|
+
return {k.lower(): normalize(v) for k, v in obj.items()}
|
1328
|
+
elif isinstance(obj, list):
|
1329
|
+
return [normalize(item) for item in obj]
|
1330
|
+
elif isinstance(obj, str):
|
1331
|
+
return obj.lower()
|
1332
|
+
else:
|
1333
|
+
return obj
|
1334
|
+
|
1335
|
+
json1 = normalize(json1)
|
1336
|
+
json2 = normalize(json2)
|
1337
|
+
|
1338
|
+
# Compare JSONs
|
1339
|
+
def compare_json(obj1, obj2):
|
1340
|
+
if isinstance(obj1, dict) and isinstance(obj2, dict):
|
1341
|
+
added_keys = {k: obj2[k] for k in obj2 if k not in obj1}
|
1342
|
+
deleted_keys = {k: obj1[k] for k in obj1 if k not in obj2}
|
1343
|
+
modified_keys = {
|
1344
|
+
k: (obj1[k], obj2[k])
|
1345
|
+
for k in obj1
|
1346
|
+
if k in obj2 and obj1[k] != obj2[k]
|
1347
|
+
}
|
1348
|
+
return added_keys, deleted_keys, modified_keys
|
1349
|
+
|
1350
|
+
elif isinstance(obj1, list) and isinstance(obj2, list):
|
1351
|
+
added_items = [item for item in obj2 if item not in obj1]
|
1352
|
+
deleted_items = [item for item in obj1 if item not in obj2]
|
1353
|
+
modified_items = [
|
1354
|
+
(item1, item2) for item1, item2 in zip(obj1, obj2) if item1 != item2
|
1355
|
+
]
|
1356
|
+
return added_items, deleted_items, modified_items
|
1357
|
+
|
1358
|
+
else:
|
1359
|
+
if obj1 != obj2:
|
1360
|
+
return obj1, obj2, None
|
1361
|
+
else:
|
1362
|
+
return None, None, None
|
1363
|
+
|
1364
|
+
added, deleted, modified = compare_json(json1, json2)
|
1100
1365
|
|
1366
|
+
return {"added_keys": added, "deleted_keys": deleted, "modified_keys": modified}
|
1367
|
+
|
1368
|
+
# --- Compare Text files ---
|
1369
|
+
def cmp_txt(
|
1370
|
+
file1, file2, ignore_case=False, numeric_tolerance=0, verbose=True, **kwargs
|
1371
|
+
):
|
1372
|
+
def read_lines(file):
|
1373
|
+
with open(file, "r") as f:
|
1374
|
+
return f.readlines()
|
1375
|
+
|
1376
|
+
lines1 = read_lines(file1)
|
1377
|
+
lines2 = read_lines(file2)
|
1378
|
+
|
1379
|
+
if ignore_case:
|
1380
|
+
lines1 = [line.lower() for line in lines1]
|
1381
|
+
lines2 = [line.lower() for line in lines2]
|
1382
|
+
|
1383
|
+
added_lines = [line for line in lines2 if line not in lines1]
|
1384
|
+
deleted_lines = [line for line in lines1 if line not in lines2]
|
1385
|
+
|
1386
|
+
modified_lines = []
|
1387
|
+
if numeric_tolerance > 0:
|
1388
|
+
for line1, line2 in zip(lines1, lines2):
|
1389
|
+
if abs(float(line1) - float(line2)) > numeric_tolerance:
|
1390
|
+
modified_lines.append((line1, line2))
|
1391
|
+
else:
|
1392
|
+
for line1, line2 in zip(lines1, lines2):
|
1393
|
+
if line1 != line2:
|
1394
|
+
modified_lines.append((line1, line2))
|
1395
|
+
|
1396
|
+
return {
|
1397
|
+
"added_lines": added_lines,
|
1398
|
+
"deleted_lines": deleted_lines,
|
1399
|
+
"modified_lines": modified_lines,
|
1400
|
+
}
|
1401
|
+
|
1402
|
+
if kind is None:
|
1403
|
+
kind = os.path.splitext(file1)[1].lower()[1:]
|
1404
|
+
# Compare based on the file type
|
1405
|
+
if kind == "xlsx":
|
1406
|
+
return cmp_excel(file1=file1, file2=file2, verbose=verbose, **kwargs)
|
1407
|
+
|
1408
|
+
elif kind == "csv":
|
1409
|
+
return cmp_csv(file1=file1, file2=file2, verbose=verbose, **kwargs)
|
1410
|
+
|
1411
|
+
elif kind == "json":
|
1412
|
+
return cmp_json(file1=file1, file2=file2, verbose=verbose, **kwargs)
|
1413
|
+
|
1414
|
+
elif kind == "txt":
|
1415
|
+
return cmp_txt(file1=file1, file2=file2, verbose=verbose, **kwargs)
|
1416
|
+
|
1417
|
+
else:
|
1418
|
+
raise ValueError(f"Unsupported file type: {kind}")
|
1101
1419
|
def cn2pinyin(
|
1102
1420
|
cn_str: Union[str, list] = None,
|
1103
1421
|
sep: str = " ",
|
@@ -1188,21 +1506,38 @@ def counter(list_, verbose=True):
|
|
1188
1506
|
# print(f"Return a list of the n most common elements:\n{c.most_common()}")
|
1189
1507
|
# print(f"Compute the sum of the counts:\n{c.total()}")
|
1190
1508
|
|
1509
|
+
def dict2df(dict_, fill=None, axis=0):
|
1510
|
+
"""
|
1511
|
+
Convert a dictionary to a DataFrame with flexible axis and padding options.
|
1512
|
+
|
1513
|
+
Parameters:
|
1514
|
+
- dict_: The dictionary to convert (keys are columns or index).
|
1515
|
+
- fill: Value to fill in case of shorter lists.
|
1516
|
+
- axis: Axis for DataFrame construction (0 for columns, 1 for rows).
|
1517
|
+
|
1518
|
+
Returns:
|
1519
|
+
- DataFrame created from the dictionary.
|
1520
|
+
"""
|
1521
|
+
for key, value in dict_.items():
|
1522
|
+
if not isinstance(value, list):
|
1523
|
+
dict_[key] = [value]
|
1524
|
+
print(f"'{key}' is not a list. trying to convert it to 'list'")
|
1191
1525
|
|
1192
|
-
|
1193
|
-
len_max =
|
1526
|
+
# Get the maximum length of values
|
1527
|
+
len_max = max(len(value) for value in dict_.values())
|
1528
|
+
|
1529
|
+
# Extend lists to match the length of the longest list
|
1194
1530
|
for key, value in dict_.items():
|
1195
|
-
# value部分需要是list
|
1196
1531
|
if isinstance(value, list):
|
1197
|
-
|
1198
|
-
# get the max_length
|
1199
|
-
len_max = len(value) if len(value) > len_max else len_max
|
1200
|
-
# 补齐长度
|
1201
|
-
for key, value in dict_.items():
|
1202
|
-
value.extend([fill] * (len_max - len(value)))
|
1532
|
+
value.extend([fill] * (len_max - len(value))) # Fill shorter lists
|
1203
1533
|
dict_[key] = value
|
1204
|
-
return pd.DataFrame.from_dict(dict_)
|
1205
1534
|
|
1535
|
+
# If axis=0, the dictionary keys will be treated as column names
|
1536
|
+
if axis == 0:
|
1537
|
+
return pd.DataFrame(dict_)
|
1538
|
+
# If axis=1, the dictionary keys will be treated as index names (rows)
|
1539
|
+
else:
|
1540
|
+
return pd.DataFrame(dict_).transpose()
|
1206
1541
|
|
1207
1542
|
def text2audio(
|
1208
1543
|
text,
|
@@ -1433,104 +1768,161 @@ def str2time(time_str, fmt="24"):
|
|
1433
1768
|
# print(formatted_time2) # Output: 14:30:45
|
1434
1769
|
|
1435
1770
|
|
1436
|
-
def str2date(date_str, fmt="%Y-%m-%
|
1771
|
+
def str2date(date_str, original_fmt=None, fmt="%Y-%m-%d"):
|
1437
1772
|
"""
|
1438
|
-
Convert a date string
|
1773
|
+
Convert a date string to the desired format and extract components if needed.
|
1774
|
+
Usage:
|
1775
|
+
str2date(x, fmt="%d.%m.%y",original_fmt="%d.%m.%y")
|
1439
1776
|
Parameters:
|
1440
|
-
- date_str (str): The date string
|
1441
|
-
-
|
1777
|
+
- date_str (str): The input date string.
|
1778
|
+
- original_fmt (str, optional): The original format of the date string. If not provided, it will be auto-detected.
|
1779
|
+
- fmt (str): The desired format for the output date string. Defaults to '%Y-%m-%d'.
|
1780
|
+
|
1442
1781
|
Returns:
|
1443
|
-
-
|
1444
|
-
|
1782
|
+
- dict: A dictionary containing the converted date string and its components (year, month, day).
|
1783
|
+
|
1784
|
+
Raises:
|
1785
|
+
- ValueError: If the date cannot be parsed.
|
1786
|
+
"""
|
1445
1787
|
from dateutil import parser
|
1446
|
-
|
1447
1788
|
try:
|
1448
|
-
|
1449
|
-
|
1450
|
-
|
1451
|
-
|
1452
|
-
|
1453
|
-
|
1789
|
+
if not isinstance(date_str,str):
|
1790
|
+
date_str=str(date_str)
|
1791
|
+
# Parse the date using the provided original format or auto-detect
|
1792
|
+
if original_fmt:
|
1793
|
+
try:
|
1794
|
+
date_obj = datetime.strptime(date_str, original_fmt)
|
1795
|
+
except Exception as e:
|
1796
|
+
print(e)
|
1797
|
+
date_obj=None
|
1798
|
+
else:
|
1799
|
+
try:
|
1800
|
+
date_obj = parser.parse(date_str)
|
1801
|
+
except Exception as e:
|
1802
|
+
print(e)
|
1803
|
+
date_obj=None
|
1804
|
+
# Return formatted string if `fmt` is specified, otherwise return the datetime object
|
1805
|
+
if date_obj is not None:
|
1806
|
+
if fmt:
|
1807
|
+
date_obj=date_obj.strftime(fmt)
|
1808
|
+
else:
|
1809
|
+
date_obj=date_str
|
1810
|
+
return date_obj
|
1811
|
+
|
1812
|
+
except (ValueError, TypeError) as e:
|
1813
|
+
raise ValueError(f"Unable to process date string: '{date_str}'. Error: {e}")
|
1454
1814
|
|
1455
1815
|
|
1456
1816
|
# str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
|
1457
1817
|
# print(str1)
|
1458
1818
|
# str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
|
1459
1819
|
# print(str2)
|
1820
|
+
|
1821
|
+
def str2num(
|
1822
|
+
s: str,
|
1823
|
+
*args,
|
1824
|
+
sep: Optional[Union[str, List[str]]] = None,
|
1825
|
+
round_digits: Optional[int] = None,
|
1826
|
+
return_list: bool = True,
|
1827
|
+
handle_text: bool = True
|
1828
|
+
) -> Union[float, int, List[Union[float, int]], None]:
|
1829
|
+
"""
|
1830
|
+
# Examples
|
1831
|
+
print(str2num("123")) # Output: 123
|
1832
|
+
print(str2num("123.456", 2)) # Output: 123.46
|
1833
|
+
print(str2num("one hundred and twenty three")) # Output: 123
|
1834
|
+
print(str2num("seven million")) # Output: 7000000
|
1835
|
+
print(str2num('one thousand thirty one',',')) # Output: 1,031
|
1836
|
+
print(str2num("12345.6789", ",")) # Output: 12,345.6789
|
1837
|
+
print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
|
1838
|
+
print(str2num('111113.34555',3,',')) # Output: 111,113.346
|
1839
|
+
print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
|
1840
|
+
print(str2num("every 3,300.55 hours and 5.045555 min", sep=",", round=1))
|
1841
|
+
print(str2num("five hundred fourty one"), str2num(
|
1842
|
+
"this is 5.9435 euros for 10.04499 killograme", round=3
|
1843
|
+
)[0])
|
1844
|
+
Convert a string containing numeric or textual data into an integer, float, or list of numbers.
|
1460
1845
|
|
1846
|
+
Parameters:
|
1847
|
+
- s (str): Input string containing a number or textual representation of a number.
|
1848
|
+
- *args: Additional arguments for delimiter or rounding digits.
|
1849
|
+
- sep (str or list): Delimiter(s) to remove from the string (e.g., ',' or ['.', ',']).
|
1850
|
+
- round_digits (int): Number of decimal places to round the result to.
|
1851
|
+
- return_list (bool): Whether to return a list of numbers if multiple are found.
|
1852
|
+
- handle_text (bool): Whether to process textual numbers using the numerizer library.
|
1461
1853
|
|
1462
|
-
|
1854
|
+
Returns:
|
1855
|
+
- Union[float, int, List[Union[float, int]], None]: Converted number(s) or None if conversion fails.
|
1856
|
+
"""
|
1463
1857
|
import re
|
1858
|
+
from numerizer import numerize
|
1464
1859
|
|
1465
|
-
|
1466
|
-
|
1467
|
-
|
1468
|
-
|
1860
|
+
if not isinstance(s, str):
|
1861
|
+
return None
|
1862
|
+
|
1863
|
+
# Merge args with explicit parameters
|
1864
|
+
if sep is None:
|
1865
|
+
sep = []
|
1866
|
+
elif isinstance(sep, str):
|
1867
|
+
sep = [sep]
|
1469
1868
|
for arg in args:
|
1470
|
-
if isinstance(arg, str)
|
1471
|
-
|
1869
|
+
if isinstance(arg, str):
|
1870
|
+
sep.append(arg)
|
1472
1871
|
elif isinstance(arg, int) and round_digits is None:
|
1473
1872
|
round_digits = arg
|
1474
|
-
|
1475
|
-
|
1476
|
-
|
1873
|
+
|
1874
|
+
# Remove all specified delimiters
|
1875
|
+
for delimiter in sep:
|
1876
|
+
s = s.replace(delimiter, "")
|
1877
|
+
|
1878
|
+
# Attempt conversion
|
1879
|
+
def try_convert(segment: str) -> Union[float, int, None]:
|
1477
1880
|
try:
|
1478
|
-
|
1881
|
+
return int(segment)
|
1479
1882
|
except ValueError:
|
1480
|
-
from numerizer import numerize
|
1481
|
-
|
1482
1883
|
try:
|
1483
|
-
|
1484
|
-
|
1485
|
-
|
1486
|
-
# Attempt to handle multiple number segments
|
1487
|
-
try:
|
1488
|
-
number_segments = re.findall(r"[-+]?\d*\.\d+|\d+", s)
|
1489
|
-
nums = []
|
1490
|
-
for segment in number_segments:
|
1491
|
-
nums.append(str2num(segment))
|
1492
|
-
if len(nums) == 1:
|
1493
|
-
num = nums[0]
|
1494
|
-
else:
|
1495
|
-
num = nums
|
1496
|
-
except Exception as e:
|
1497
|
-
return None
|
1884
|
+
return float(segment)
|
1885
|
+
except ValueError:
|
1886
|
+
return None
|
1498
1887
|
|
1499
|
-
#
|
1500
|
-
if
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1505
|
-
num = round(num_adj, round_digits)
|
1506
|
-
if round_digits == 0:
|
1507
|
-
if isinstance(num, list):
|
1508
|
-
num = [int(i) for i in num]
|
1509
|
-
else:
|
1510
|
-
num = int(num)
|
1511
|
-
# if delimiter is not None:
|
1512
|
-
# num_str = f"{num:,}".replace(",", delimiter)
|
1513
|
-
# return num_str#s.replace(delimiter, "")
|
1888
|
+
# Handle textual numbers
|
1889
|
+
if handle_text:
|
1890
|
+
try:
|
1891
|
+
s = numerize(s)
|
1892
|
+
except Exception:
|
1893
|
+
pass
|
1514
1894
|
|
1515
|
-
|
1895
|
+
# Extract numeric segments
|
1896
|
+
number_segments = re.findall(r"[-+]?\d*\.\d+|\d+", s)
|
1897
|
+
numbers = [try_convert(seg) for seg in number_segments if seg]
|
1898
|
+
numbers = [num for num in numbers if num is not None]
|
1516
1899
|
|
1900
|
+
if not numbers:
|
1901
|
+
return None # No valid numbers found
|
1517
1902
|
|
1518
|
-
#
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
# print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
|
1526
|
-
# print(str2num('111113.34555',3,',')) # Output: 111,113.346
|
1527
|
-
# print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
|
1528
|
-
# print(str2num("every 3,300.55 hours and 5.045555 min", sep=",", round=1))
|
1529
|
-
# print(str2num("five hundred fourty one"), str2num(
|
1530
|
-
# "this is 5.9435 euros for 10.04499 killograme", round=3
|
1531
|
-
# )[0])
|
1903
|
+
# Single or multiple numbers
|
1904
|
+
if len(numbers) == 1 and not return_list:
|
1905
|
+
result = numbers[0]
|
1906
|
+
else:
|
1907
|
+
result = (
|
1908
|
+
numbers[0] if len(numbers) == 1 else numbers if return_list else numbers[0]
|
1909
|
+
)
|
1532
1910
|
|
1911
|
+
# Apply rounding if necessary
|
1912
|
+
if round_digits is not None:
|
1913
|
+
if isinstance(result, list):
|
1914
|
+
result = [round(num + 1e-10, round_digits) for num in result]
|
1915
|
+
else:
|
1916
|
+
result = round(result + 1e-10, round_digits)
|
1917
|
+
|
1918
|
+
# Convert to int if rounding to 0 digits
|
1919
|
+
if round_digits == 0:
|
1920
|
+
if isinstance(result, list):
|
1921
|
+
result = [int(num) for num in result]
|
1922
|
+
else:
|
1923
|
+
result = int(result)
|
1533
1924
|
|
1925
|
+
return result
|
1534
1926
|
def num2str(num, *args, **kwargs):
|
1535
1927
|
delimiter = kwargs.get("sep", None)
|
1536
1928
|
round_digits = kwargs.get("round", None)
|
@@ -1706,6 +2098,68 @@ def cm2inch(*inch) -> list:
|
|
1706
2098
|
return [i / 2.54 for i in inch]
|
1707
2099
|
|
1708
2100
|
|
2101
|
+
|
2102
|
+
def sqlite2sql(db_path, sql_path):
|
2103
|
+
"""
|
2104
|
+
Export an SQLite database to an SQL file, including schema and data for all tables.
|
2105
|
+
|
2106
|
+
:param db_path: Path to the SQLite .db file
|
2107
|
+
:param output_file: Path to the output .sql file
|
2108
|
+
|
2109
|
+
# Usage
|
2110
|
+
db_path = "your_database.db" # Replace with the path to your SQLite database
|
2111
|
+
sql_path = "output.sql" # Replace with your desired output file name
|
2112
|
+
export_sqlite_to_sql(db_path, sql_path)
|
2113
|
+
|
2114
|
+
"""
|
2115
|
+
import sqlite3
|
2116
|
+
try:
|
2117
|
+
# Connect to the SQLite database
|
2118
|
+
conn = sqlite3.connect(db_path)
|
2119
|
+
cursor = conn.cursor()
|
2120
|
+
|
2121
|
+
with open(sql_path, 'w') as f:
|
2122
|
+
# Write a header for the SQL dump
|
2123
|
+
f.write("-- SQLite Database Dump\n")
|
2124
|
+
f.write(f"-- Source: {db_path}\n\n")
|
2125
|
+
|
2126
|
+
# Retrieve all table names
|
2127
|
+
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
|
2128
|
+
tables = [row[0] for row in cursor.fetchall()]
|
2129
|
+
|
2130
|
+
for table in tables:
|
2131
|
+
# Write the schema for the table
|
2132
|
+
cursor.execute(f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}';")
|
2133
|
+
schema = cursor.fetchone()
|
2134
|
+
if schema:
|
2135
|
+
f.write(f"{schema[0]};\n\n")
|
2136
|
+
|
2137
|
+
# Write data for the table
|
2138
|
+
cursor.execute(f"SELECT * FROM {table};")
|
2139
|
+
rows = cursor.fetchall()
|
2140
|
+
if rows:
|
2141
|
+
cursor.execute(f"PRAGMA table_info({table});")
|
2142
|
+
column_names = [info[1] for info in cursor.fetchall()]
|
2143
|
+
column_list = ', '.join(f'"{col}"' for col in column_names)
|
2144
|
+
|
2145
|
+
for row in rows:
|
2146
|
+
values = ', '.join(f"'{str(val).replace('\'', '\'\'')}'" if val is not None else 'NULL' for val in row)
|
2147
|
+
f.write(f"INSERT INTO {table} ({column_list}) VALUES ({values});\n")
|
2148
|
+
|
2149
|
+
f.write("\n")
|
2150
|
+
|
2151
|
+
print(f"Database exported successfully to {sql_path}")
|
2152
|
+
|
2153
|
+
except sqlite3.Error as e:
|
2154
|
+
print(f"SQLite error: {e}")
|
2155
|
+
except Exception as e:
|
2156
|
+
print(f"Unexpected error: {e}")
|
2157
|
+
finally:
|
2158
|
+
# Ensure the connection is closed
|
2159
|
+
if conn:
|
2160
|
+
conn.close()
|
2161
|
+
|
2162
|
+
|
1709
2163
|
def sreplace(*args, **kwargs):
|
1710
2164
|
"""
|
1711
2165
|
sreplace(text, by=None, robust=True)
|
@@ -2194,6 +2648,12 @@ def unzip(dir_path, output_dir=None):
|
|
2194
2648
|
Unzips or extracts various compressed file formats (.gz, .zip, .7z, .tar, .bz2, .xz, .rar).
|
2195
2649
|
If the output directory already exists, it will be replaced.
|
2196
2650
|
|
2651
|
+
# Example usage:
|
2652
|
+
output_dir = unzip('data.tar.gz')
|
2653
|
+
output_file = unzip('file.csv.gz')
|
2654
|
+
output_dir_zip = unzip('archive.zip')
|
2655
|
+
output_dir_7z = unzip('archive.7z')
|
2656
|
+
|
2197
2657
|
Parameters:
|
2198
2658
|
dir_path (str): Path to the compressed file.
|
2199
2659
|
output_dir (str): Directory where the extracted files will be saved.
|
@@ -2314,21 +2774,12 @@ def unzip(dir_path, output_dir=None):
|
|
2314
2774
|
else:
|
2315
2775
|
raise ValueError(f"Unsupported file format: {os.path.splitext(dir_path)[1]}")
|
2316
2776
|
|
2317
|
-
|
2318
|
-
# Example usage:
|
2319
|
-
# output_dir = unzip('data.tar.gz')
|
2320
|
-
# output_file = unzip('file.csv.gz')
|
2321
|
-
# output_dir_zip = unzip('archive.zip')
|
2322
|
-
# output_dir_7z = unzip('archive.7z')
|
2323
|
-
|
2324
|
-
|
2325
2777
|
def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
|
2326
2778
|
"""
|
2327
2779
|
Usage
|
2328
2780
|
is_abnormal = is_df_abnormal(df, verbose=1)
|
2329
2781
|
True: abnormal
|
2330
2782
|
False: normal
|
2331
|
-
|
2332
2783
|
"""
|
2333
2784
|
if not isinstance(df, pd.DataFrame):
|
2334
2785
|
if verbose:
|
@@ -3405,29 +3856,47 @@ def fsave(
|
|
3405
3856
|
df = pd.DataFrame(data)
|
3406
3857
|
df.to_csv(fpath, **kwargs_valid)
|
3407
3858
|
|
3408
|
-
def save_xlsx(fpath, data, password=None, **kwargs):
|
3859
|
+
def save_xlsx(fpath, data, password=None,apply_format=None, **kwargs):
|
3409
3860
|
import msoffcrypto
|
3410
3861
|
from io import BytesIO
|
3862
|
+
import openpyxl
|
3863
|
+
import pandas.io.formats.style
|
3411
3864
|
|
3412
3865
|
verbose = kwargs.pop("verbose", False)
|
3413
3866
|
sheet_name = kwargs.pop("sheet_name", "Sheet1")
|
3414
|
-
|
3867
|
+
engine = kwargs.pop("engine", "xlsxwriter")
|
3868
|
+
mode = kwargs.pop("mode","a")
|
3869
|
+
if_sheet_exists = strcmp(kwargs.get("if_sheet_exists","new"),['error', 'new', 'replace', 'overlay'])[0]
|
3870
|
+
kwargs.pop("if_sheet_exists",None)
|
3415
3871
|
if run_once_within(reverse=True):
|
3416
3872
|
use_pd("to_excel", verbose=verbose)
|
3417
|
-
|
3418
|
-
if
|
3419
|
-
format_excel(
|
3873
|
+
|
3874
|
+
if apply_format is None:
|
3875
|
+
kwargs_format=list(extract_kwargs(format_excel).keys())[4:]
|
3876
|
+
apply_format=True if any([i in kwargs_format for i in kwargs]) else False
|
3877
|
+
print(f"apply format: {apply_format}")
|
3878
|
+
if apply_format or any([
|
3879
|
+
isinstance(data, openpyxl.worksheet.worksheet.Worksheet),
|
3880
|
+
isinstance(data, openpyxl.workbook.workbook.Workbook),
|
3881
|
+
isinstance(data, pd.io.formats.style.Styler)
|
3882
|
+
]):
|
3883
|
+
format_excel(df=data,
|
3884
|
+
filename=fpath,
|
3885
|
+
sheet_name=sheet_name,
|
3886
|
+
password=password,
|
3887
|
+
if_sheet_exists=if_sheet_exists,
|
3888
|
+
mode=mode,
|
3889
|
+
engine=engine,
|
3890
|
+
verbose=verbose,
|
3891
|
+
**kwargs)
|
3420
3892
|
else:
|
3421
3893
|
# Remove non-relevant kwargs
|
3422
3894
|
irrelevant_keys=list(extract_kwargs(format_excel).keys())[4:]
|
3423
|
-
|
3424
|
-
for key in irrelevant_keys:
|
3425
|
-
kwargs.pop(key, None)
|
3426
|
-
|
3895
|
+
[kwargs.pop(key, None) for key in irrelevant_keys]
|
3427
3896
|
df = pd.DataFrame(data)
|
3428
|
-
|
3429
3897
|
# Write to Excel without password first
|
3430
3898
|
temp_file = BytesIO()
|
3899
|
+
|
3431
3900
|
df.to_excel(
|
3432
3901
|
temp_file,
|
3433
3902
|
sheet_name=sheet_name,
|
@@ -3435,26 +3904,22 @@ def fsave(
|
|
3435
3904
|
engine="xlsxwriter",
|
3436
3905
|
**kwargs,
|
3437
3906
|
)
|
3438
|
-
|
3439
3907
|
# If a password is provided, encrypt the file
|
3440
3908
|
if password:
|
3441
3909
|
temp_file.seek(0)
|
3442
|
-
office_file = msoffcrypto.OfficeFile(temp_file)
|
3443
|
-
office_file.load_key(password=password) # Provide the password
|
3444
|
-
|
3445
|
-
# Encrypt and save the file
|
3910
|
+
office_file = msoffcrypto.OfficeFile(temp_file)
|
3446
3911
|
with open(fpath, "wb") as encrypted_file:
|
3447
|
-
office_file.encrypt(encrypted_file)
|
3448
|
-
else:
|
3449
|
-
# Save the file without encryption if no password is provided
|
3912
|
+
office_file.encrypt(outfile=encrypted_file,password=password)
|
3913
|
+
else: # Save the file without encryption if no password is provided
|
3450
3914
|
try:
|
3451
3915
|
# Use ExcelWriter with append mode if the file exists
|
3452
|
-
|
3453
|
-
|
3454
|
-
|
3455
|
-
|
3456
|
-
|
3457
|
-
)
|
3916
|
+
engine="openpyxl" if mode=="a" else "xlsxwriter"
|
3917
|
+
if mode=="a":
|
3918
|
+
with pd.ExcelWriter(fpath, engine=engine, mode=mode,if_sheet_exists=if_sheet_exists) as writer:
|
3919
|
+
df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
|
3920
|
+
else:
|
3921
|
+
with pd.ExcelWriter(fpath, engine=engine, mode=mode) as writer:
|
3922
|
+
df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
|
3458
3923
|
except FileNotFoundError:
|
3459
3924
|
# If file doesn't exist, create a new one
|
3460
3925
|
df.to_excel(fpath, sheet_name=sheet_name, index=False, **kwargs)
|
@@ -3478,15 +3943,9 @@ def fsave(
|
|
3478
3943
|
nb["cells"] = cells
|
3479
3944
|
# Write the notebook to a file
|
3480
3945
|
with open(fpath, "w", encoding="utf-8") as ipynb_file:
|
3481
|
-
nbformat.write(nb, ipynb_file)
|
3482
|
-
|
3483
|
-
# def save_json(fpath, data, **kwargs):
|
3484
|
-
# with open(fpath, "w") as file:
|
3485
|
-
# json.dump(data, file, **kwargs)
|
3486
|
-
|
3946
|
+
nbformat.write(nb, ipynb_file)
|
3487
3947
|
def save_json(fpath_fname, var_dict_or_df):
|
3488
3948
|
import json
|
3489
|
-
|
3490
3949
|
def _convert_js(data):
|
3491
3950
|
if isinstance(data, pd.DataFrame):
|
3492
3951
|
return data.to_dict(orient="list")
|
@@ -3497,15 +3956,9 @@ def fsave(
|
|
3497
3956
|
return data
|
3498
3957
|
|
3499
3958
|
serializable_data = _convert_js(var_dict_or_df)
|
3500
|
-
|
3501
3959
|
# Save the serializable data to the JSON file
|
3502
3960
|
with open(fpath_fname, "w") as f_json:
|
3503
|
-
json.dump(serializable_data, f_json, indent=4)
|
3504
|
-
|
3505
|
-
# # Example usage:
|
3506
|
-
# sets = {"title": "mse_path_ MSE"}
|
3507
|
-
# jsonsave("/.json", sets)
|
3508
|
-
# # setss = jsonload("/.json")
|
3961
|
+
json.dump(serializable_data, f_json, indent=4)
|
3509
3962
|
|
3510
3963
|
def save_yaml(fpath, data, **kwargs):
|
3511
3964
|
import yaml
|
@@ -4579,10 +5032,11 @@ def copy(src, dst, overwrite=False, verbose=True):
|
|
4579
5032
|
dst.unlink()
|
4580
5033
|
else:
|
4581
5034
|
dst = dst.with_name(
|
4582
|
-
f"{dst.stem}_{datetime.now().strftime('
|
5035
|
+
f"{dst.stem}_{datetime.now().strftime('%y%m%d_%H%M%S')}{dst.suffix}"
|
4583
5036
|
)
|
4584
5037
|
shutil.copy(src, dst)
|
4585
5038
|
print(f"\n Done! copy to {dst}\n") if verbose else None
|
5039
|
+
return dst
|
4586
5040
|
else:
|
4587
5041
|
dst = dst / src.name
|
4588
5042
|
if dst.exists():
|
@@ -4590,14 +5044,33 @@ def copy(src, dst, overwrite=False, verbose=True):
|
|
4590
5044
|
shutil.rmtree(dst) # Remove existing directory
|
4591
5045
|
else:
|
4592
5046
|
dst = dst.with_name(
|
4593
|
-
f"{dst.stem}_{datetime.now().strftime('%H%M%S')}"
|
5047
|
+
f"{dst.stem}_{datetime.now().strftime('%y%m%d%H%M%S')}"
|
4594
5048
|
)
|
4595
5049
|
shutil.copytree(src, dst)
|
4596
5050
|
print(f"\n Done! copy to {dst}\n") if verbose else None
|
5051
|
+
return dst
|
4597
5052
|
|
4598
5053
|
except Exception as e:
|
4599
5054
|
logging.error(f"Failed {e}")
|
4600
|
-
|
5055
|
+
def local_path(fpath,station=r"Q:\\IM\\AGLengerke\\Jeff\\# testing\\temp\\"):
|
5056
|
+
"""copy file to a specific folder first, to aviod file conflict"""
|
5057
|
+
try:
|
5058
|
+
f=listdir(station)
|
5059
|
+
if listdir(station ,verbose=False).loc[0,"num"]>=10:
|
5060
|
+
for fpath_ in f['path']:
|
5061
|
+
if os.path.basename(fpath)[:5] in fpath_:
|
5062
|
+
if fpath== fpath_:
|
5063
|
+
pass
|
5064
|
+
else:
|
5065
|
+
delete(fpath_)
|
5066
|
+
except:
|
5067
|
+
pass
|
5068
|
+
try:
|
5069
|
+
new_path=copy(fpath, station)
|
5070
|
+
except Exception as e:
|
5071
|
+
print(f"Path did not update because: Error:{e}")
|
5072
|
+
new_path=fpath
|
5073
|
+
return new_path
|
4601
5074
|
|
4602
5075
|
def cut(src, dst, overwrite=False):
|
4603
5076
|
return move(src=src, dst=dst, overwrite=overwrite)
|
@@ -6347,29 +6820,152 @@ def extract_kwargs(func):
|
|
6347
6820
|
|
6348
6821
|
return kwargs
|
6349
6822
|
def format_excel(
|
6350
|
-
df=None,
|
6351
|
-
filename=None,
|
6352
|
-
sheet_name=0,
|
6353
|
-
|
6354
|
-
|
6355
|
-
|
6356
|
-
|
6357
|
-
|
6358
|
-
|
6359
|
-
|
6360
|
-
|
6361
|
-
|
6823
|
+
df: pd.DataFrame=None,
|
6824
|
+
filename:str=None,
|
6825
|
+
sheet_name:Union[str, int]=0,
|
6826
|
+
insert_img:dict=None,# {"A1":img_path}
|
6827
|
+
usage:bool=False,
|
6828
|
+
text_color:Union[dict,bool]=False, # dict: set the text color
|
6829
|
+
bg_color:Union[dict,bool]=False, # dict: set the back_ground color
|
6830
|
+
cell:Union[dict, list]=None, # dict: or list for multiple locs setting:
|
6831
|
+
width:Union[bool, dict]=None, # dict
|
6832
|
+
width_factor:int=2,# calculated with plus this factor
|
6833
|
+
height:Union[bool, dict]=None, # dict e.g., {2: 50, 3: 25}, keys are columns
|
6834
|
+
height_max:int=25,
|
6835
|
+
merge:tuple=None, # tuple e.g., (slice(0, 1), slice(1, 3)),
|
6836
|
+
shade:Union[dict, list]=None, # dict
|
6837
|
+
comment:Union[dict, list]=None, # dict e.g., {(2, 4): "This is a comment"},
|
6362
6838
|
comment_always_visible:bool=True,# always display comment
|
6363
|
-
link=None, # dict e.g., {(2, 2): "https://example.com"},
|
6364
|
-
protect=None, # dict
|
6365
|
-
number_format=None, # dict: e.g., {1:"0.00", 2:"#,##0",3:"0%",4:"$#,##0.00"}
|
6839
|
+
link:Union[dict, list]=None, # dict e.g., {(2, 2): "https://example.com"},
|
6840
|
+
protect:dict=None, # dict
|
6841
|
+
number_format:dict=None, # dict: e.g., {1:"0.00", 2:"#,##0",3:"0%",4:"$#,##0.00"}
|
6366
6842
|
data_validation=None, # dict
|
6367
6843
|
apply_filter:bool=True, # add filter
|
6368
6844
|
freeze :str= False,#"A2",
|
6369
|
-
conditional_format=None, # dict
|
6370
|
-
verbose=
|
6845
|
+
conditional_format:dict=None, # dict
|
6846
|
+
verbose:bool=False,
|
6371
6847
|
**kwargs,
|
6372
6848
|
):
|
6849
|
+
"""
|
6850
|
+
Parameters:
|
6851
|
+
df : pandas.DataFrame, optional
|
6852
|
+
DataFrame to be written to the Excel file.
|
6853
|
+
filename : str, optional
|
6854
|
+
Path to the output Excel file.
|
6855
|
+
sheet_name : str or int, default 0
|
6856
|
+
Name or index of the sheet where data will be written.
|
6857
|
+
insert_img : dict, optional
|
6858
|
+
Dictionary specifying image insert locations, e.g., {"A1": "path/to/image.png"}.
|
6859
|
+
usage : bool, default False
|
6860
|
+
If True, display usage examples.
|
6861
|
+
cell : dict or list, optional
|
6862
|
+
Specifies cell formatting options.
|
6863
|
+
width : dict, optional
|
6864
|
+
Dictionary specifying column widths, e.g., {1: 20, 2: 30}.
|
6865
|
+
width_factor : int, default 2
|
6866
|
+
Additional factor to adjust column width dynamically.
|
6867
|
+
height : dict, optional
|
6868
|
+
Dictionary specifying row heights, e.g., {2: 50, 3: 25}.
|
6869
|
+
height_max : int, default 25
|
6870
|
+
Maximum row height allowed.
|
6871
|
+
merge : tuple, optional
|
6872
|
+
Specifies cell merging, e.g., (slice(0, 1), slice(1, 3)).
|
6873
|
+
shade : dict, optional
|
6874
|
+
Dictionary defining cell shading/styling.
|
6875
|
+
comment : dict, optional
|
6876
|
+
Dictionary adding comments, e.g., {(2, 4): "This is a comment"}.
|
6877
|
+
comment_always_visible : bool, default True
|
6878
|
+
Whether comments should always be visible.
|
6879
|
+
link : dict, optional
|
6880
|
+
Dictionary specifying hyperlinks, e.g., {(2, 2): "https://example.com"}.
|
6881
|
+
protect : dict, optional
|
6882
|
+
Dictionary defining cell protection settings.
|
6883
|
+
number_format : dict, optional
|
6884
|
+
Dictionary specifying number formats, e.g., {1: "0.00", 2: "#,##0"}.
|
6885
|
+
data_validation : dict, optional
|
6886
|
+
Dictionary setting data validation rules.
|
6887
|
+
apply_filter : bool, default True
|
6888
|
+
Whether to apply filters to the header row.
|
6889
|
+
freeze : str, optional
|
6890
|
+
Cell reference (e.g., "A2") to freeze rows/columns.
|
6891
|
+
conditional_format : dict, optional
|
6892
|
+
Dictionary defining conditional formatting rules.
|
6893
|
+
verbose : bool, default False
|
6894
|
+
Whether to print detailed execution logs.
|
6895
|
+
**kwargs : dict
|
6896
|
+
Additional parameters for advanced customization.
|
6897
|
+
"""
|
6898
|
+
|
6899
|
+
usage_str="""
|
6900
|
+
Formats an Excel file with various styling options.
|
6901
|
+
Usage:
|
6902
|
+
fsave(
|
6903
|
+
dir_save,
|
6904
|
+
fload(dir_save, output="bit", sheet_name=sheet_name),
|
6905
|
+
sheet_name=sheet_name,
|
6906
|
+
if_sheet_exists="overlay",
|
6907
|
+
mode="a",
|
6908
|
+
width_factor=0,
|
6909
|
+
height={1: 50},
|
6910
|
+
cell=[
|
6911
|
+
{
|
6912
|
+
(slice(0, 1), slice(0, df_exists.shape[1])): {
|
6913
|
+
"fill": {
|
6914
|
+
"start_color": "61AFEF", # Starting color
|
6915
|
+
"end_color": "61AFEF", # Ending color (useful for gradients)
|
6916
|
+
"fill_type": "solid", # Fill type (solid, gradient, etc.)
|
6917
|
+
},
|
6918
|
+
"font": {
|
6919
|
+
"name": "Arial", # Font name
|
6920
|
+
"size": 11, # Font size
|
6921
|
+
"bold": True, # Bold text
|
6922
|
+
"italic": False, # Italic text
|
6923
|
+
# "underline": "single", # Underline (single, double)
|
6924
|
+
"color": "#000000", # Font color
|
6925
|
+
},
|
6926
|
+
"alignment": {
|
6927
|
+
"horizontal": "center", # Horizontal alignment (left, center, right)
|
6928
|
+
"vertical": "center", # Vertical alignment (top, center, bottom)
|
6929
|
+
"wrap_text": True, # Wrap text in the cell
|
6930
|
+
"shrink_to_fit": True, # Shrink text to fit within cell
|
6931
|
+
"text_rotation": 0, # Text rotation angle
|
6932
|
+
},
|
6933
|
+
}
|
6934
|
+
},
|
6935
|
+
{
|
6936
|
+
(
|
6937
|
+
slice(0, df_exists.shape[0]),
|
6938
|
+
slice(0, df_exists.shape[1]),
|
6939
|
+
): {
|
6940
|
+
"alignment": {
|
6941
|
+
"horizontal": "center", # Horizontal alignment (left, center, right)
|
6942
|
+
"vertical": "center", # Vertical alignment (top, center, bottom)
|
6943
|
+
"wrap_text": True, # Wrap text in the cell
|
6944
|
+
"shrink_to_fit": True, # Shrink text to fit within cell
|
6945
|
+
"text_rotation": 0, # Text rotation angle
|
6946
|
+
},
|
6947
|
+
}
|
6948
|
+
},
|
6949
|
+
{
|
6950
|
+
(slice(0, df_exists.shape[0]), slice(2, 3)): {
|
6951
|
+
"alignment": {
|
6952
|
+
"horizontal": "left", # Horizontal alignment (left, center, right)
|
6953
|
+
},
|
6954
|
+
}
|
6955
|
+
},
|
6956
|
+
{
|
6957
|
+
(slice(0, df_exists.shape[0]), slice(7, 8)): {
|
6958
|
+
"alignment": {
|
6959
|
+
"horizontal": "left", # Horizontal alignment (left, center, right)
|
6960
|
+
},
|
6961
|
+
}
|
6962
|
+
},
|
6963
|
+
],
|
6964
|
+
password=False, # depass("ogB3B7y3xR9iuH4QIQbyy6VXG14I0A8DlsTxyiGqg1U="),
|
6965
|
+
)
|
6966
|
+
"""
|
6967
|
+
if verbose:
|
6968
|
+
print(usage_str)
|
6373
6969
|
import pandas as pd
|
6374
6970
|
from datetime import datetime
|
6375
6971
|
import openpyxl
|
@@ -6394,7 +6990,91 @@ def format_excel(
|
|
6394
6990
|
if end_col_letter
|
6395
6991
|
else f"{start_col_letter}{start_row}"
|
6396
6992
|
)
|
6993
|
+
def apply_color_to_worksheet(ws=None, sheet_name=None, conditions=None, cell_idx=None,where="text"):
|
6994
|
+
"""
|
6995
|
+
Apply text color formatting to a specific cell range in an openpyxl workbook based on conditions.
|
6397
6996
|
|
6997
|
+
Parameters:
|
6998
|
+
ws : worrksheet
|
6999
|
+
The openpyxl workbook object to style.
|
7000
|
+
sheet_name : str
|
7001
|
+
The name of the sheet to style.
|
7002
|
+
conditions : dict
|
7003
|
+
Dictionary defining conditions for text coloring.
|
7004
|
+
cell_idx : tuple, optional
|
7005
|
+
A tuple of slices defining the selected row and column range (only for DataFrame).
|
7006
|
+
|
7007
|
+
Returns:
|
7008
|
+
openpyxl.workbook.workbook.Workbook
|
7009
|
+
The workbook with applied formatting.
|
7010
|
+
"""
|
7011
|
+
def evaluate_condition(value, condition):
|
7012
|
+
"""Evaluate the condition dynamically."""
|
7013
|
+
try:
|
7014
|
+
if "x" in condition and re.search(r"[<>=!]=*", condition):
|
7015
|
+
expr = condition.replace("x", str(value))
|
7016
|
+
else:
|
7017
|
+
expr = condition
|
7018
|
+
return eval(expr)
|
7019
|
+
except Exception as e:
|
7020
|
+
# print(f"Error evaluating condition {condition} for value {value}: {e}")
|
7021
|
+
return False
|
7022
|
+
|
7023
|
+
def apply_condition_to_cell_text_color(cell, value):
|
7024
|
+
"""Apply color to a cell if it matches any condition."""
|
7025
|
+
for condition, color in conditions.items():
|
7026
|
+
if evaluate_condition(value, condition):
|
7027
|
+
# Apply color to font
|
7028
|
+
cell.font = openpyxl.styles.Font(
|
7029
|
+
color=openpyxl.styles.Color(rgb=hex2argb(color))
|
7030
|
+
)
|
7031
|
+
return
|
7032
|
+
def apply_condition_to_cell_bg_color(cell, value):
|
7033
|
+
"""Apply background color to a cell if it matches any condition."""
|
7034
|
+
for condition, color in conditions.items():
|
7035
|
+
if evaluate_condition(value, condition):
|
7036
|
+
if not isinstance(color,list):
|
7037
|
+
color=[color]
|
7038
|
+
if len(color)==1:
|
7039
|
+
cell.fill = PatternFill(
|
7040
|
+
start_color=hex2argb(color[0]),
|
7041
|
+
end_color=hex2argb(color[0]),
|
7042
|
+
fill_type="solid"
|
7043
|
+
)
|
7044
|
+
elif len(color)==2:
|
7045
|
+
cell.fill = PatternFill(
|
7046
|
+
start_color=hex2argb(color[0]),
|
7047
|
+
end_color=hex2argb(color[1]),
|
7048
|
+
fill_type="solid"
|
7049
|
+
)
|
7050
|
+
return
|
7051
|
+
if isinstance(cell_idx, tuple):
|
7052
|
+
# If cell_idx is provided, select a range based on the slice
|
7053
|
+
row_slice, col_slice = cell_idx
|
7054
|
+
rows = list(
|
7055
|
+
ws.iter_rows(
|
7056
|
+
min_row=row_slice.start + 1,
|
7057
|
+
max_row=row_slice.stop,
|
7058
|
+
min_col=col_slice.start + 1,
|
7059
|
+
max_col=col_slice.stop,
|
7060
|
+
)
|
7061
|
+
)
|
7062
|
+
for row in rows:
|
7063
|
+
for cell in row:
|
7064
|
+
if where=="text":
|
7065
|
+
apply_condition_to_cell_text_color(cell, cell.value)
|
7066
|
+
elif where=="bg":
|
7067
|
+
apply_condition_to_cell_bg_color(cell, cell.value)
|
7068
|
+
else:
|
7069
|
+
# If no cell_idx is provided, apply to all cells
|
7070
|
+
for row in ws.iter_rows():
|
7071
|
+
for cell in row:
|
7072
|
+
if where=="text":
|
7073
|
+
apply_condition_to_cell_text_color(cell, cell.value)
|
7074
|
+
elif where=="bg":
|
7075
|
+
apply_condition_to_cell_bg_color(cell,cell.value)
|
7076
|
+
return ws
|
7077
|
+
|
6398
7078
|
def apply_format(ws, cell, cell_range):
|
6399
7079
|
"""Apply cell formatting to a specified range."""
|
6400
7080
|
cell_font, cell_fill, cell_alignment, border = None, None, None, None
|
@@ -6405,7 +7085,7 @@ def format_excel(
|
|
6405
7085
|
font_color = "000000"
|
6406
7086
|
font_name = "Arial"
|
6407
7087
|
font_underline = "none"
|
6408
|
-
font_size =
|
7088
|
+
font_size = 11
|
6409
7089
|
font_bold = False
|
6410
7090
|
font_strike = False
|
6411
7091
|
font_italic = False
|
@@ -6742,6 +7422,8 @@ def format_excel(
|
|
6742
7422
|
counter += 1
|
6743
7423
|
unique_name = f"{sheet_name}_{counter}"
|
6744
7424
|
return unique_name
|
7425
|
+
|
7426
|
+
|
6745
7427
|
# if it is already worksheet format
|
6746
7428
|
if isinstance(df, pd.DataFrame):
|
6747
7429
|
pass
|
@@ -6763,55 +7445,56 @@ def format_excel(
|
|
6763
7445
|
kwargs.pop("format", None) # 更好地跟fsave结合使用
|
6764
7446
|
kwargs.pop("sheet_name", 0) # 更好地跟df.to_excel结合使用
|
6765
7447
|
# 只有openpyxl才支持 append
|
6766
|
-
mode = strcmp(kwargs.get("mode", "
|
7448
|
+
mode = strcmp(kwargs.get("mode", "a"), ["a", "w","auto"])[0]
|
7449
|
+
print(f'mode="{mode}"')
|
6767
7450
|
kwargs.pop("mode", None)
|
6768
7451
|
engine = strcmp(kwargs.get("engine", "openpyxl"), ["xlsxwriter", "openpyxl"])[0]
|
7452
|
+
# corr engine
|
7453
|
+
engine="openpyxl" if mode=="a" else "xlsxwriter"
|
7454
|
+
print(f'engine="{engine}"')
|
7455
|
+
if_sheet_exists=kwargs.get("if_sheet_exists","replace")
|
6769
7456
|
# 通常是不需要保存index的
|
6770
7457
|
index = kwargs.get("index", False)
|
6771
|
-
kwargs.pop("index", None)
|
6772
7458
|
# header
|
6773
7459
|
header=kwargs.pop("header",False)
|
6774
|
-
|
7460
|
+
password = kwargs.pop("password", None) # Use kwargs if provided
|
7461
|
+
|
7462
|
+
kwargs.pop("password", None)
|
7463
|
+
kwargs.pop("header", None)
|
7464
|
+
kwargs.pop("index", None)
|
7465
|
+
kwargs.pop("if_sheet_exists", None)
|
6775
7466
|
if isinstance(df, openpyxl.workbook.workbook.Workbook):
|
7467
|
+
"""打开Sheet_name指定的表格,如果该表不存在,则创建一个新的或从现有文件中加载数据"""
|
6776
7468
|
wb=df
|
6777
7469
|
try:
|
6778
7470
|
ws = wb.worksheets[sheet_name]
|
6779
7471
|
except Exception as e:
|
6780
|
-
print(
|
7472
|
+
print(f'mode="{mode}"')
|
6781
7473
|
if not os.path.exists(filename) or mode=="w":
|
6782
7474
|
ws=wb.active
|
6783
7475
|
ws.title = sheet_name
|
6784
7476
|
else:# file exists
|
6785
7477
|
wb = load_workbook(filename)
|
6786
|
-
|
6787
|
-
sheet_name_corr=generate_unique_sheet_name(wb, sheet_name)
|
6788
|
-
|
6789
|
-
# Save the workbook with the new sheet name
|
6790
|
-
with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists="new") as writer:
|
7478
|
+
with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists=if_sheet_exists) as writer:
|
6791
7479
|
for ws in df.worksheets: # Iterate through worksheets in the input workbook
|
6792
7480
|
ws_df = pd.DataFrame(ws.values)
|
6793
|
-
ws_df.to_excel(writer,
|
6794
|
-
|
6795
|
-
|
6796
|
-
|
6797
|
-
|
6798
|
-
|
6799
|
-
|
6800
|
-
|
6801
|
-
|
6802
|
-
|
6803
|
-
if not sheet_name==sheet_name_corr:
|
7481
|
+
ws_df.to_excel(writer,
|
7482
|
+
sheet_name=sheet_name,
|
7483
|
+
index=index,
|
7484
|
+
header=header,
|
7485
|
+
**kwargs)
|
7486
|
+
# 重新打开刚更新过的数据
|
7487
|
+
wb = load_workbook(filename)
|
7488
|
+
if sheet_name in wb.sheetnames:
|
7489
|
+
ws = wb[sheet_name]
|
7490
|
+
if not sheet_name==sheet_name:
|
6804
7491
|
wb.remove(wb[sheet_name])
|
6805
7492
|
else:
|
6806
|
-
raise KeyError(f"Worksheet {
|
7493
|
+
raise KeyError(f"Worksheet {sheet_name} does not exist.")
|
6807
7494
|
else:
|
6808
|
-
if not os.path.exists(filename) or mode=="w": # or overwrite
|
6809
|
-
# save file
|
6810
|
-
sheet_name_corr = (
|
6811
|
-
sheet_name if isinstance(sheet_name, str) else f"Sheet_{sheet_name}"
|
6812
|
-
)
|
7495
|
+
if not os.path.exists(filename) or mode=="w": # or overwrite
|
6813
7496
|
with pd.ExcelWriter(filename, mode="w", engine=engine) as writer:
|
6814
|
-
df.to_excel(writer, sheet_name=
|
7497
|
+
df.to_excel(writer, sheet_name=sheet_name, index=index, header=header,**kwargs)
|
6815
7498
|
wb = load_workbook(filename)
|
6816
7499
|
if isinstance(sheet_name, str):
|
6817
7500
|
ws = wb[sheet_name]
|
@@ -6821,15 +7504,75 @@ def format_excel(
|
|
6821
7504
|
ws = wb.worksheets[sheet_name] # the index of worksheets
|
6822
7505
|
else:# file exists
|
6823
7506
|
wb = load_workbook(filename)
|
6824
|
-
|
6825
|
-
|
6826
|
-
df.to_excel(writer, sheet_name=sheet_name_corr, index=index, header=header,**kwargs)
|
7507
|
+
with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists=if_sheet_exists) as writer:
|
7508
|
+
df.to_excel(writer, sheet_name=sheet_name, index=index, header=header,**kwargs)
|
6827
7509
|
wb = load_workbook(filename)
|
6828
|
-
if
|
6829
|
-
ws = wb[
|
7510
|
+
if sheet_name in wb.sheetnames:
|
7511
|
+
ws = wb[sheet_name]
|
6830
7512
|
else:
|
6831
|
-
raise KeyError(f"Worksheet {
|
6832
|
-
|
7513
|
+
raise KeyError(f"Worksheet {sheet_name} does not exist.")
|
7514
|
+
# ! Apply Text color
|
7515
|
+
if text_color:
|
7516
|
+
if verbose:
|
7517
|
+
text_color_str="""
|
7518
|
+
text_color=[
|
7519
|
+
{
|
7520
|
+
(slice(1, 2), slice(0, 3)): {
|
7521
|
+
"x>20": "#DD0531", # Numbers > 20 → red
|
7522
|
+
"x<=8": "#35B20C", # Numbers ≤ 10 → blue
|
7523
|
+
"'x'!='available'": "#0510DD", # 'available' → green
|
7524
|
+
"10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
|
7525
|
+
"10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
|
7526
|
+
}
|
7527
|
+
},
|
7528
|
+
{
|
7529
|
+
(slice(3, df.shape[0] + 1), slice(0, 3)): {
|
7530
|
+
"x>20": "#DD0531", # Numbers > 20 → red
|
7531
|
+
"x<=10": "#35B20C", # Numbers ≤ 10 → blue
|
7532
|
+
"'x'!='available'": "#0510DD", # 'available' → green
|
7533
|
+
"10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
|
7534
|
+
"10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
|
7535
|
+
}
|
7536
|
+
},
|
7537
|
+
],
|
7538
|
+
"""
|
7539
|
+
print(text_color_str)
|
7540
|
+
if not isinstance(text_color, list):
|
7541
|
+
text_color=[text_color]
|
7542
|
+
for text_color_ in text_color:
|
7543
|
+
for indices, dict_text_conditions in text_color_.items():
|
7544
|
+
ws = apply_color_to_worksheet(ws, sheet_name=sheet_name, conditions=dict_text_conditions, cell_idx=indices,where="text")
|
7545
|
+
# ! Apply Text color
|
7546
|
+
if bg_color:
|
7547
|
+
if verbose:
|
7548
|
+
bg_color_str="""
|
7549
|
+
bg_color=[
|
7550
|
+
{
|
7551
|
+
(slice(1, 2), slice(0, 3)): {
|
7552
|
+
"x>20": ["#DD0531","#35B20C"], # Numbers > 20 → red
|
7553
|
+
"x<=8": "#35B20C", # Numbers ≤ 10 → blue
|
7554
|
+
"'x'!='available'": "#0510DD", # 'available' → green
|
7555
|
+
"10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
|
7556
|
+
"10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
|
7557
|
+
}
|
7558
|
+
},
|
7559
|
+
{
|
7560
|
+
(slice(3, df.shape[0] + 1), slice(0, 3)): {
|
7561
|
+
"x>20": "#DD0531", # Numbers > 20 → red
|
7562
|
+
"x<=10": "#35B20C", # Numbers ≤ 10 → blue
|
7563
|
+
"'x'!='available'": "#0510DD", # 'available' → green
|
7564
|
+
"10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
|
7565
|
+
"10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
|
7566
|
+
}
|
7567
|
+
},
|
7568
|
+
],
|
7569
|
+
"""
|
7570
|
+
print(bg_color_str)
|
7571
|
+
if not isinstance(bg_color, list):
|
7572
|
+
bg_color=[bg_color]
|
7573
|
+
for bg_color_ in bg_color:
|
7574
|
+
for indices, dict_text_conditions in bg_color_.items():
|
7575
|
+
ws = apply_color_to_worksheet(ws, sheet_name=sheet_name, conditions=dict_text_conditions, cell_idx=indices,where="bg")
|
6833
7576
|
# !Apply cell formatting
|
6834
7577
|
if cell:
|
6835
7578
|
if not isinstance(cell, list):
|
@@ -6838,6 +7581,7 @@ def format_excel(
|
|
6838
7581
|
for indices, format_options in cell_.items():
|
6839
7582
|
cell_range = convert_indices_to_range(*indices)
|
6840
7583
|
apply_format(ws, format_options, cell_range)
|
7584
|
+
|
6841
7585
|
if verbose:
|
6842
7586
|
cell_tmp="""cell=[
|
6843
7587
|
{
|
@@ -6918,14 +7662,16 @@ def format_excel(
|
|
6918
7662
|
print(number_format_temp)
|
6919
7663
|
|
6920
7664
|
if freeze:
|
7665
|
+
if isinstance(freeze,bool):
|
7666
|
+
freeze='A2'
|
6921
7667
|
ws.freeze_panes = freeze # Freeze everything above and to the left of A2
|
6922
7668
|
if apply_filter:
|
6923
7669
|
if isinstance(apply_filter, bool):
|
6924
7670
|
# Default: Apply filter to the entire first row (header)
|
6925
7671
|
filter_range = f"A1:{get_column_letter(ws.max_column)}1"
|
6926
7672
|
ws.auto_filter.ref = filter_range
|
6927
|
-
|
6928
|
-
|
7673
|
+
if not freeze:
|
7674
|
+
ws.freeze_panes = "A2" # Freeze everything above and to the left of A2
|
6929
7675
|
elif isinstance(apply_filter, tuple):
|
6930
7676
|
row_slice, col_slice = apply_filter
|
6931
7677
|
# Extract the start and end indices for rows and columns
|
@@ -6949,7 +7695,11 @@ def format_excel(
|
|
6949
7695
|
ws.auto_filter.ref = filter_range
|
6950
7696
|
if freeze:
|
6951
7697
|
ws.freeze_panes = freeze # Freeze everything above and to the left of A2
|
6952
|
-
# !widths
|
7698
|
+
# !widths
|
7699
|
+
if isinstance(width,bool):
|
7700
|
+
width=None if width else False
|
7701
|
+
if isinstance(height,bool):
|
7702
|
+
height=None if height else False
|
6953
7703
|
if width is None: # automatic adust width
|
6954
7704
|
for col in ws.columns:
|
6955
7705
|
max_length = 0
|
@@ -6971,6 +7721,8 @@ def format_excel(
|
|
6971
7721
|
pass
|
6972
7722
|
adjusted_width = max_length + width_factor # You can adjust the padding value as needed
|
6973
7723
|
ws.column_dimensions[column].width = adjusted_width
|
7724
|
+
elif isinstance(width,bool):
|
7725
|
+
pass
|
6974
7726
|
else:
|
6975
7727
|
for col_idx, width_ in width.items():
|
6976
7728
|
col_letter = get_column_letter(col_idx)
|
@@ -6989,6 +7741,8 @@ def format_excel(
|
|
6989
7741
|
estimated_height += 5 * (max_line_length // 20)
|
6990
7742
|
max_height = max(max_height, estimated_height)
|
6991
7743
|
ws.row_dimensions[row[0].row].height = max_height
|
7744
|
+
elif isinstance(height,bool) and not height:
|
7745
|
+
pass
|
6992
7746
|
else:
|
6993
7747
|
for row, height_ in height.items():
|
6994
7748
|
ws.row_dimensions[row].height = height_
|
@@ -7073,7 +7827,6 @@ def format_excel(
|
|
7073
7827
|
)
|
7074
7828
|
# !Protect sheet with a password
|
7075
7829
|
# Fetch the password
|
7076
|
-
password = kwargs.pop("password", None) # Use kwargs if provided
|
7077
7830
|
|
7078
7831
|
if all([password is not None, any([protect, isinstance(password, (str, list, tuple)) and any(password)])]): # Check if protection options are provided
|
7079
7832
|
if protect is None:
|
@@ -7111,39 +7864,71 @@ def format_excel(
|
|
7111
7864
|
for rule in rules:
|
7112
7865
|
# Handle color scale
|
7113
7866
|
if "color_scale" in rule:
|
7114
|
-
|
7115
|
-
|
7116
|
-
|
7117
|
-
|
7867
|
+
if verbose:
|
7868
|
+
color_scale_tmp="""
|
7869
|
+
conditional_format={
|
7870
|
+
(slice(1, df.shape[0] + 1), slice(1, 2)):
|
7871
|
+
{
|
7872
|
+
"color_scale": {
|
7873
|
+
"start_type": "min",
|
7874
|
+
"start_value": 0,
|
7875
|
+
"start_color": "#74ADE9",
|
7876
|
+
"mid_type": "percentile",
|
7877
|
+
"mid_value": 50,
|
7878
|
+
"mid_color": "74ADE9",
|
7879
|
+
"end_type": "max",
|
7880
|
+
"end_value": 100,
|
7881
|
+
"end_color": "#B62833",
|
7882
|
+
}
|
7883
|
+
}}
|
7884
|
+
"""
|
7885
|
+
print(color_scale_tmp)
|
7886
|
+
color_scale = rule["color_scale"]
|
7118
7887
|
|
7119
7888
|
color_scale_rule = ColorScaleRule(
|
7120
7889
|
start_type=color_scale.get("start_type", "min"),
|
7121
|
-
start_value=color_scale.get("start_value"),
|
7122
|
-
start_color=start_color,
|
7123
|
-
mid_type=color_scale.get("mid_type"),
|
7124
|
-
mid_value=color_scale.get("mid_value"),
|
7125
|
-
mid_color=mid_color,
|
7890
|
+
start_value=color_scale.get("start_value",None),
|
7891
|
+
start_color=hex2argb(color_scale.get("start_color", "#74ADE9")),
|
7892
|
+
mid_type=color_scale.get("mid_type","percentile"),
|
7893
|
+
mid_value=color_scale.get("mid_value",None),
|
7894
|
+
mid_color=hex2argb(color_scale.get("mid_color", "FFFFFF")),
|
7126
7895
|
end_type=color_scale.get("end_type", "max"),
|
7127
|
-
end_value=color_scale.get("end_value"),
|
7128
|
-
end_color=end_color,
|
7896
|
+
end_value=color_scale.get("end_value",None),
|
7897
|
+
end_color=hex2argb(color_scale.get("end_color", "#B62833")),
|
7129
7898
|
)
|
7130
7899
|
ws.conditional_formatting.add(cell_range, color_scale_rule)
|
7131
7900
|
# Handle data bar
|
7132
7901
|
if "data_bar" in rule:
|
7902
|
+
if verbose:
|
7903
|
+
data_bar_tmp="""
|
7904
|
+
conditional_format={
|
7905
|
+
(slice(1, df.shape[0] + 1), slice(1, 2)):
|
7906
|
+
{
|
7907
|
+
"data_bar": {
|
7908
|
+
"start_type": "min",
|
7909
|
+
"start_value": None,
|
7910
|
+
"end_type": "max",
|
7911
|
+
"end_value": None,
|
7912
|
+
"color": "F6C9CE",
|
7913
|
+
"show_value": True,
|
7914
|
+
}
|
7915
|
+
}}
|
7916
|
+
"""
|
7917
|
+
print(data_bar_tmp)
|
7133
7918
|
data_bar = rule["data_bar"]
|
7134
7919
|
bar_color = hex2argb(data_bar.get("color", "638EC6"))
|
7135
7920
|
|
7136
7921
|
data_bar_rule = DataBarRule(
|
7137
7922
|
start_type=data_bar.get("start_type", "min"),
|
7138
|
-
start_value=data_bar.get("start_value"),
|
7923
|
+
start_value=data_bar.get("start_value",None),
|
7139
7924
|
end_type=data_bar.get("end_type", "max"),
|
7140
|
-
end_value=data_bar.get("end_value"),
|
7925
|
+
end_value=data_bar.get("end_value",None),
|
7141
7926
|
color=bar_color,
|
7142
7927
|
showValue=data_bar.get("show_value", True),
|
7143
7928
|
)
|
7144
7929
|
ws.conditional_formatting.add(cell_range, data_bar_rule)
|
7145
7930
|
|
7146
|
-
# Handle icon
|
7931
|
+
# Handle icon setse
|
7147
7932
|
if "icon_set" in rule:
|
7148
7933
|
icon_set = rule["icon_set"]
|
7149
7934
|
icon_set_rule = IconSet(
|
@@ -7152,6 +7937,34 @@ def format_excel(
|
|
7152
7937
|
reverse=icon_set.get("reverse", False) # Corrected
|
7153
7938
|
)
|
7154
7939
|
ws.conditional_formatting.add(cell_range, icon_set_rule)
|
7940
|
+
# Handle text-based conditions
|
7941
|
+
if "text_color" in rule: # not work
|
7942
|
+
from openpyxl.styles.differential import DifferentialStyle
|
7943
|
+
from openpyxl.formatting.rule import Rule
|
7944
|
+
from openpyxl.styles import PatternFill
|
7945
|
+
|
7946
|
+
# Extract the fill properties from the rule
|
7947
|
+
fill = rule.get("fill", {})
|
7948
|
+
start_color = fill.get("start_color", "FFFFFF") # Default to white if not specified
|
7949
|
+
end_color = fill.get("end_color", "FFFFFF") # Default to white if not specified
|
7950
|
+
fill_type = fill.get("fill_type", "solid") # Default to solid fill if not specified
|
7951
|
+
|
7952
|
+
# Extract the text condition or default to a space if 'text' is not provided
|
7953
|
+
text = rule.get("text", " ")
|
7954
|
+
|
7955
|
+
# Create the DifferentialStyle using the extracted fill settings
|
7956
|
+
dxf = DifferentialStyle(
|
7957
|
+
fill=PatternFill(start_color=start_color, end_color=end_color, fill_type=fill_type)
|
7958
|
+
)
|
7959
|
+
|
7960
|
+
# Create the text rule based on the text condition
|
7961
|
+
text_rule = Rule(
|
7962
|
+
type="containsText", # The type of condition
|
7963
|
+
operator=rule.get("operator", "equal"), # Default operator is "equal"
|
7964
|
+
text=text,
|
7965
|
+
dxf=dxf, # Apply the fill color from DifferentialStyle
|
7966
|
+
)
|
7967
|
+
ws.conditional_formatting.add(cell_range, text_rule)
|
7155
7968
|
if verbose:
|
7156
7969
|
conditional_format_temp="""
|
7157
7970
|
conditional_format={
|
@@ -7183,10 +7996,49 @@ def format_excel(
|
|
7183
7996
|
}
|
7184
7997
|
"""
|
7185
7998
|
print(conditional_format_temp)
|
7999
|
+
if insert_img:
|
8000
|
+
if not isinstance(insert_img, dict):
|
8001
|
+
raise ValueError(f'insert_img 需要dict格式: e.g., insert_img={"A1":"example.png"}')
|
8002
|
+
try:
|
8003
|
+
from openpyxl import drawing
|
8004
|
+
from PIL import Image
|
8005
|
+
import PIL
|
8006
|
+
for img_cell, img_data in insert_img.items():
|
8007
|
+
img_width = img_height = None
|
8008
|
+
pil_img=img_path = None
|
8009
|
+
if isinstance(img_data, dict):
|
8010
|
+
if "path" in img_data:
|
8011
|
+
img_path = img_data["path"] # File path
|
8012
|
+
img_ = drawing.image.Image(img_path)
|
8013
|
+
elif "image" in img_data:
|
8014
|
+
pil_img = img_data["image"] # PIL Image object
|
8015
|
+
elif "array" in img_data:
|
8016
|
+
pil_img = Image.fromarray(img_data["array"]) # Convert NumPy array to PIL Image
|
8017
|
+
|
8018
|
+
img_width = img_data.get("width", None)
|
8019
|
+
img_height = img_data.get("height", None)
|
8020
|
+
elif isinstance(img_data, str):
|
8021
|
+
img_path = img_data # Direct file path
|
8022
|
+
elif isinstance(img_data, (PIL.Image.Image,PIL.PngImagePlugin.PngImageFile)):
|
8023
|
+
pil_img = img_data # Direct PIL Image object
|
8024
|
+
elif isinstance(img_data, np.ndarray):
|
8025
|
+
pil_img = Image.fromarray(img_data) # Convert NumPy array to PIL Image
|
8026
|
+
elif pil_img:
|
8027
|
+
img_ = drawing.image.Image(pil_img)
|
8028
|
+
|
8029
|
+
# Set width and height if provided
|
8030
|
+
if img_width is not None:
|
8031
|
+
img_.width = img_width
|
8032
|
+
if img_height is not None:
|
8033
|
+
img_.height = img_height
|
8034
|
+
ws.add_image(img_, img_cell)
|
8035
|
+
print(f"✅ at {img_cell} inserted image: {os.path.basename(img_path)}")
|
7186
8036
|
|
8037
|
+
except Exception as e:
|
8038
|
+
print(e)
|
8039
|
+
|
7187
8040
|
# Save the workbook
|
7188
8041
|
wb.save(filename)
|
7189
|
-
print(f"Formatted Excel file saved as:\n{filename}")
|
7190
8042
|
|
7191
8043
|
|
7192
8044
|
def preview(var):
|
@@ -7511,9 +8363,11 @@ def df_astype(
|
|
7511
8363
|
columns: Optional[Union[str, List[str]]] = None,
|
7512
8364
|
astype: str = None, # "datetime",
|
7513
8365
|
skip_row: Union[str, list] = None,
|
8366
|
+
original_fmt:str=None,
|
7514
8367
|
fmt: Optional[str] = None,
|
7515
8368
|
inplace: bool = False,
|
7516
8369
|
errors: str = "coerce", # Can be "ignore", "raise", or "coerce"
|
8370
|
+
verbose:bool=True,
|
7517
8371
|
**kwargs,
|
7518
8372
|
) -> Optional[pd.DataFrame]:
|
7519
8373
|
"""
|
@@ -7585,13 +8439,21 @@ def df_astype(
|
|
7585
8439
|
# correct the astype input
|
7586
8440
|
if isinstance(astype, str):
|
7587
8441
|
astype = strcmp(astype, astypes)[0]
|
7588
|
-
print(f"converting as type: {astype}")
|
8442
|
+
print(f"converting as type: {astype}")
|
7589
8443
|
elif isinstance(astype, dict):
|
7590
8444
|
for col, dtype in astype.items():
|
7591
8445
|
dtype = "date" if dtype == "day" else dtype
|
7592
|
-
|
8446
|
+
target_dtype = strcmp(dtype, astypes)[0]
|
8447
|
+
try:
|
8448
|
+
if target_dtype == "datetime":
|
8449
|
+
data[col] = pd.to_datetime(data[col], format=original_fmt, errors=errors)
|
8450
|
+
elif target_dtype == "timedelta":
|
8451
|
+
data[col] = pd.to_timedelta(data[col], errors=errors)
|
8452
|
+
else:
|
8453
|
+
data[col] = data[col].astype(target_dtype)
|
8454
|
+
except Exception as e:
|
8455
|
+
print(f"Error converting column '{col}' to {target_dtype}: {e}")
|
7593
8456
|
return data if not inplace else None
|
7594
|
-
|
7595
8457
|
# Ensure columns is a list
|
7596
8458
|
if isinstance(columns, str):
|
7597
8459
|
columns = [columns]
|
@@ -7613,9 +8475,17 @@ def df_astype(
|
|
7613
8475
|
kwargs.pop("errors", None)
|
7614
8476
|
# convert it as type: datetime
|
7615
8477
|
if isinstance(column, int):
|
7616
|
-
data.iloc[:, column] = pd.to_datetime(
|
7617
|
-
|
7618
|
-
|
8478
|
+
data.iloc[:, column] = pd.to_datetime(data.iloc[:, column], format=original_fmt, errors=errors, **kwargs) if original_fmt is not None else pd.to_datetime(data[column], errors=errors, **kwargs)
|
8479
|
+
|
8480
|
+
try:
|
8481
|
+
if fmt is not None:
|
8482
|
+
# data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
|
8483
|
+
data[column] = data[column].apply(
|
8484
|
+
lambda x: x.strftime(fmt) if pd.notnull(x) else None
|
8485
|
+
)
|
8486
|
+
except Exception as e:
|
8487
|
+
print(f"设置格式的时候有误: {e}")
|
8488
|
+
|
7619
8489
|
# further convert:
|
7620
8490
|
if astype == "time":
|
7621
8491
|
data.iloc[:, column] = data.iloc[:, column].dt.time
|
@@ -7636,11 +8506,20 @@ def df_astype(
|
|
7636
8506
|
else:
|
7637
8507
|
data[column] = (
|
7638
8508
|
pd.to_datetime(
|
7639
|
-
data[column], format=
|
8509
|
+
data[column], format=original_fmt, errors=errors, **kwargs
|
7640
8510
|
)
|
7641
|
-
if
|
8511
|
+
if original_fmt is not None
|
7642
8512
|
else pd.to_datetime(data[column], errors=errors, **kwargs)
|
7643
8513
|
)
|
8514
|
+
|
8515
|
+
try:
|
8516
|
+
if fmt is not None:
|
8517
|
+
# data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
|
8518
|
+
data[column] = data[column].apply(
|
8519
|
+
lambda x: x.strftime(fmt) if pd.notnull(x) else None
|
8520
|
+
)
|
8521
|
+
except Exception as e:
|
8522
|
+
print(f"设置格式的时候有误: {e}")
|
7644
8523
|
# further convert:
|
7645
8524
|
if astype == "time":
|
7646
8525
|
data[column] = data[column].dt.time
|
@@ -7677,16 +8556,12 @@ def df_astype(
|
|
7677
8556
|
else:
|
7678
8557
|
data[column] = data[column].astype(astype)
|
7679
8558
|
# print(f"Successfully converted '{column}' to {astype}.")
|
7680
|
-
|
7681
|
-
try:
|
7682
|
-
if fmt is not None:
|
7683
|
-
data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
|
7684
|
-
except Exception as e:
|
7685
|
-
print(f"设置格式的时候有误: {e}")
|
8559
|
+
|
7686
8560
|
except Exception as e:
|
7687
8561
|
print(f"Error converting '{column}' to {astype}: {e}")
|
7688
8562
|
try:
|
7689
|
-
|
8563
|
+
if verbose:
|
8564
|
+
display(data.info()[:10])
|
7690
8565
|
except:
|
7691
8566
|
pass
|
7692
8567
|
return data
|
@@ -9867,6 +10742,7 @@ def df_qc(
|
|
9867
10742
|
res_qc["dtype_counts"] = data.dtypes.value_counts()
|
9868
10743
|
|
9869
10744
|
# Distribution Analysis (mean, median, mode, std dev, IQR for numeric columns)
|
10745
|
+
|
9870
10746
|
distribution_stats = data.select_dtypes(include=[np.number]).describe().T
|
9871
10747
|
iqr = data.select_dtypes(include=[np.number]).apply(
|
9872
10748
|
lambda x: x.quantile(0.75) - x.quantile(0.25)
|