py2ls 0.2.5.5__py3-none-any.whl → 0.2.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
3
  import sys
4
4
  import os
5
5
  from IPython.display import display
6
- from typing import List, Optional, Union
6
+ from typing import List, Optional, Union,Any
7
7
 
8
8
  from regex import X
9
9
 
@@ -169,6 +169,11 @@ def run_every(when: str = None, job=None, wait: int = 60):
169
169
 
170
170
  :param when: String specifying the interval, e.g. '2 minutes', '4 hours', '1 day'.
171
171
  :param job: The function to be scheduled.
172
+
173
+ # usage:
174
+ def job():
175
+ print("1 sec")
176
+ run_every(when="1 sec", job=job)
172
177
  """
173
178
  import schedule
174
179
  import time
@@ -201,19 +206,17 @@ def run_every(when: str = None, job=None, wait: int = 60):
201
206
  while True:
202
207
  schedule.run_pending()
203
208
  time.sleep(wait) # in seconds
204
- time.sleep(wait) # in seconds
205
-
206
-
207
- # # usage:
208
- # def job():
209
- # print("1 sec")
210
- # run_every(when="1 sec", job=job)
211
-
212
-
209
+ time.sleep(wait) # in seconds
213
210
  def run_at(when: str, job=None, wait: int = 60):
214
211
  """
215
212
  Schedules a job to run at an exact time of the day.
216
213
 
214
+ # Example usage:
215
+ def my_job():
216
+ print("Job executed at the exact time!")
217
+ # Schedule the job at 14:30 when day
218
+ run_at(when="1.30 pm", job=my_job)
219
+
217
220
  :param when: String specifying the time, e.g. '1:30 pm','1.30 am','14:30', '1:30 pm', '8:45 am'.
218
221
  :param job: The function to be scheduled.
219
222
  :param wait: The sleep interval between checks in seconds.
@@ -241,13 +244,10 @@ def run_at(when: str, job=None, wait: int = 60):
241
244
  f"Invalid time format: {when}. Use 'HH:MM' (24-hour) or 'H:MM AM/PM' format."
242
245
  )
243
246
  return
244
-
245
247
  print(f"Job scheduled to run at {scheduled_time}.")
246
-
247
248
  # Keep checking the current time
248
249
  while True:
249
250
  now = datetime.now()
250
-
251
251
  # Check if current time matches the scheduled time
252
252
  if (
253
253
  now.time().hour == scheduled_time.hour
@@ -259,14 +259,7 @@ def run_at(when: str, job=None, wait: int = 60):
259
259
  ) # Sleep for a minute to avoid running the job multiple times in the same minute
260
260
 
261
261
  time.sleep(wait) # wait to avoid excessive CPU usage
262
-
263
-
264
- # # Example usage:
265
- # def my_job():
266
- # print("Job executed at the exact time!")
267
- # # Schedule the job at 14:30 when day
268
- # run_at(when="1.30 pm", job=my_job)
269
-
262
+
270
263
  # ************* above section: run_when *************
271
264
 
272
265
 
@@ -344,12 +337,7 @@ def get_version(pkg):
344
337
  if isinstance(pkg, str):
345
338
  get_v(pkg)
346
339
  elif isinstance(pkg, list):
347
- [get_v(pkg_) for pkg_ in pkg]
348
-
349
-
350
- # usage:
351
- # get_version(['pandas','numpy','py2ls'])
352
-
340
+ [get_v(pkg_) for pkg_ in pkg]
353
341
 
354
342
  def rm_folder(folder_path, verbose=True):
355
343
  import shutil
@@ -387,32 +375,11 @@ def fremove(path, verbose=True):
387
375
  except Exception as e:
388
376
  if verbose:
389
377
  print(f"Failed to delete {path}. Reason: {e}")
390
-
391
-
392
- # def get_cwd(verbose: bool = True):
393
- # """
394
- # get_cwd: to get the current working directory
395
- # Args:
396
- # verbose (bool, optional): to show which function is use. Defaults to True.
397
- # """
398
- # try:
399
- # script_dir = os.path.dirname(os.path.abspath(__file__))
400
- # if verbose:
401
- # print("os.path.dirname(os.path.abspath(__file__)):", script_dir)
402
- # except NameError:
403
- # # This works in an interactive environment (like a Jupyter notebook)
404
- # script_dir = os.getcwd()
405
- # if verbose:
406
- # print("os.getcwd():", script_dir)
407
- # return script_dir
408
-
409
-
378
+
410
379
  def get_cwd():
411
380
  from pathlib import Path
412
-
413
381
  # Get the current script's directory as a Path object
414
382
  current_directory = Path(__file__).resolve().parent
415
-
416
383
  return current_directory
417
384
 
418
385
 
@@ -530,12 +497,9 @@ def echo(*args, **kwargs):
530
497
 
531
498
  def chat(*args, **kwargs):
532
499
  return echo(*args, **kwargs)
533
-
534
-
535
500
  def ai(*args, **kwargs):
536
501
  return echo(*args, **kwargs)
537
502
 
538
-
539
503
  def detect_lang(text, output="lang", verbose=True):
540
504
  from langdetect import detect
541
505
 
@@ -565,10 +529,6 @@ def is_text(s):
565
529
  # no_special = not re.search(r'[^A-Za-z0-9\s]', s)
566
530
  return has_alpha and has_non_alpha
567
531
 
568
-
569
- from typing import Any, Union
570
-
571
-
572
532
  def share(*args, strict=True, n_shared=2, verbose=True):
573
533
  """
574
534
  check the shared elelements in two list.
@@ -1097,7 +1057,365 @@ def imgcmp(
1097
1057
  else:
1098
1058
  raise ValueError("Invalid method. Use 'ssim', 'match', or 'knn'.")
1099
1059
 
1060
+ def fcmp(file1, file2, kind= None, verbose=True, **kwargs):
1061
+ import pandas as pd
1062
+ import os
1063
+ from concurrent.futures import ThreadPoolExecutor
1064
+ from datetime import datetime
1065
+ import json
1066
+
1067
+ # --- Compare excel files ---
1068
+ def cmp_excel(
1069
+ file1,# base
1070
+ file2, # new
1071
+ sheet_name=None, # list or strings; default:"common" sheet
1072
+ key_columns=None,
1073
+ ignore_columns=None,
1074
+ numeric_tolerance=0,
1075
+ ignore_case=False,
1076
+ detect_reordered_rows=False,
1077
+ verbose=True,
1078
+ **kwargs,
1079
+ ):
1080
+ """
1081
+ Compare two Excel files and identify differences across specified sheets.
1082
+
1083
+ Parameters:
1084
+ - file1 (Base/Reference): str, path to the first Excel file.
1085
+ - file2: str, path to the second Excel file.
1086
+ - sheet_name: list of str, specific sheets to compare (default: all common sheets).
1087
+ - key_columns: list of str, columns to use as unique identifiers (default: None, compares all columns).
1088
+ - ignore_columns: list of str, columns to exclude from comparison (default: None).
1089
+ - numeric_tolerance: float, tolerance for numeric column differences (default: 0, exact match).
1090
+ - ignore_case: bool, whether to ignore case differences (default: False). # Changed here
1091
+ - detect_reordered_rows: bool, whether to detect reordered rows (default: False).
1092
+ - verbose: bool, whether to print progress messages (default: True).
1093
+
1094
+ Returns:
1095
+ - dict, summary of differences for each sheet.
1096
+ """
1097
+ # Define output directory based on file1 basename
1098
+ file1_basename = os.path.splitext(os.path.basename(file1))[0]
1099
+ output_dir = f"CMP_{file1_basename}"
1100
+ if not os.path.exists(output_dir):
1101
+ os.makedirs(output_dir)
1102
+
1103
+ # Load both files into a dictionary of DataFrames
1104
+ xl1 = pd.ExcelFile(file1)
1105
+ xl2 = pd.ExcelFile(file2)
1106
+
1107
+ # Get the sheets to compare
1108
+ sheets1 = set(xl1.sheet_names)
1109
+ sheets2 = set(xl2.sheet_names)
1110
+ if sheet_name is None:
1111
+ sheet_name = list(sheets1 & sheets2) # Compare only common sheets
1112
+ else:
1113
+ sheet_name = [sheet for sheet in sheet_name if sheet in sheets1 and sheets2]
1114
+
1115
+ summary = {}
1116
+ print(f"Reference file: '{os.path.basename(file1)}'")
1117
+ def compare_sheet(sheet):
1118
+
1119
+ if verbose:
1120
+ print(f"Comparing sheet: {sheet}...")
1121
+
1122
+ # Read sheets as DataFrames
1123
+ df1 = xl1.parse(sheet).fillna("NA")
1124
+ df2 = xl2.parse(sheet).fillna("NA")
1125
+
1126
+ # Handle case insensitivity
1127
+ if ignore_case:
1128
+ df1.columns = [col.lower() for col in df1.columns]
1129
+ df2.columns = [col.lower() for col in df2.columns]
1130
+ df1 = df1.applymap(lambda x: x.lower() if isinstance(x, str) else x)
1131
+ df2 = df2.applymap(lambda x: x.lower() if isinstance(x, str) else x)
1132
+
1133
+ # Drop ignored columns
1134
+ if ignore_columns:
1135
+ df1 = df1.drop(
1136
+ columns=[col for col in ignore_columns if col in df1.columns],
1137
+ errors="ignore",
1138
+ )
1139
+ df2 = df2.drop(
1140
+ columns=[col for col in ignore_columns if col in df2.columns],
1141
+ errors="ignore",
1142
+ )
1143
+
1144
+ # Normalize column order for comparison
1145
+ common_cols = df1.columns.intersection(df2.columns)
1146
+ df1 = df1[common_cols]
1147
+ df2 = df2[common_cols]
1148
+
1149
+ # Specify key columns for comparison
1150
+ if key_columns:
1151
+ df1 = df1.set_index(key_columns)
1152
+ df2 = df2.set_index(key_columns)
1153
+ # Identify added and deleted rows based on entire row comparison, not just index
1154
+ added_rows = df2[~df2.apply(tuple, 1).isin(df1.apply(tuple, 1))]
1155
+ deleted_rows = df1[~df1.apply(tuple, 1).isin(df2.apply(tuple, 1))]
1156
+
1157
+ # Detect reordered rows
1158
+ reordered_rows = pd.DataFrame()
1159
+ if detect_reordered_rows:
1160
+ # Find rows that exist in both DataFrames but are in different positions
1161
+ for idx in df1.index:
1162
+ if idx in df2.index:
1163
+ if not df1.loc[idx].equals(df2.loc[idx]):
1164
+ reordered_rows = reordered_rows.append(df1.loc[idx])
1165
+
1166
+ # Detect modified rows (in case of exact matches between the two files)
1167
+ aligned_df1 = df1[df1.index.isin(df2.index)]
1168
+ aligned_df2 = df2[df2.index.isin(df1.index)]
1169
+
1170
+ if numeric_tolerance > 0:
1171
+ modified_rows = aligned_df1.compare(
1172
+ aligned_df2,
1173
+ keep_shape=False,
1174
+ keep_equal=False,
1175
+ result_names=["left", "right"],
1176
+ ).pipe(
1177
+ lambda df: df[
1178
+ ~df.apply(
1179
+ lambda row: (
1180
+ abs(row["left"] - row["right"]) <= numeric_tolerance
1181
+ if pd.api.types.is_numeric_dtype(row["left"])
1182
+ else False
1183
+ ),
1184
+ axis=1,
1185
+ )
1186
+ ]
1187
+ )
1188
+ else:
1189
+ modified_rows = aligned_df1.compare(
1190
+ aligned_df2, keep_shape=False, keep_equal=False
1191
+ )
1192
+
1193
+ # Save differences to Excel files
1194
+ sheet_dir = os.path.join(output_dir, sheet)
1195
+ os.makedirs(sheet_dir, exist_ok=True)
1196
+ added_path = os.path.join(sheet_dir, f"{sheet}_added.xlsx")
1197
+ deleted_path = os.path.join(sheet_dir, f"{sheet}_deleted.xlsx")
1198
+ modified_path = os.path.join(sheet_dir, f"{sheet}_modified.xlsx")
1199
+ reordered_path = os.path.join(sheet_dir, f"{sheet}_reordered.xlsx")
1200
+
1201
+ if not added_rows.empty:
1202
+ added_rows.to_excel(added_path)
1203
+ if not deleted_rows.empty:
1204
+ deleted_rows.to_excel(deleted_path)
1205
+ if not modified_rows.empty:
1206
+ modified_rows.to_excel(modified_path)
1207
+ if not reordered_rows.empty:
1208
+ reordered_rows.to_excel(reordered_path)
1209
+
1210
+ # Return the summary
1211
+ return {
1212
+ "added_rows": len(added_rows),
1213
+ "deleted_rows": len(deleted_rows),
1214
+ "modified_rows": len(modified_rows),
1215
+ "reordered_rows": len(reordered_rows),
1216
+ "added_file": added_path if not added_rows.empty else None,
1217
+ "deleted_file": deleted_path if not deleted_rows.empty else None,
1218
+ "modified_file": modified_path if not modified_rows.empty else None,
1219
+ "reordered_file": reordered_path if not reordered_rows.empty else None,
1220
+ }
1221
+
1222
+ # Use ThreadPoolExecutor for parallel processing
1223
+ with ThreadPoolExecutor() as executor:
1224
+ results = executor.map(compare_sheet, sheet_name)
1225
+
1226
+ # Collect results
1227
+ summary = {sheet: result for sheet, result in zip(sheet_name, results)}
1228
+
1229
+ # Save JSON log
1230
+ json_path = os.path.join(output_dir, "comparison_summary.json")
1231
+ if os.path.exists(json_path):
1232
+ with open(json_path, "r") as f:
1233
+ existing_data = json.load(f)
1234
+ else:
1235
+ existing_data = {}
1236
+
1237
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
1238
+ existing_data[timestamp] = summary
1239
+ # Sort the existing data by the timestamp in descending order (latest first)
1240
+ existing_data = dict(sorted(existing_data.items(), reverse=True))
1241
+
1242
+ with open(json_path, "w") as f:
1243
+ json.dump(existing_data, f, indent=4)
1244
+ if verbose:
1245
+ print(f"Comparison complete. Results saved in '{output_dir}'")
1246
+
1247
+ return summary
1248
+
1249
+ # --- Compare CSV files ---
1250
+ def cmp_csv(
1251
+ file1,
1252
+ file2,
1253
+ ignore_case=False,
1254
+ numeric_tolerance=0,
1255
+ ignore_columns=None,
1256
+ verbose=True,
1257
+ **kwargs,
1258
+ ):
1259
+ import pandas as pd
1260
+
1261
+ # Load data and fill NaNs
1262
+ df1 = pd.read_csv(file1).fillna("NA")
1263
+ df2 = pd.read_csv(file2).fillna("NA")
1264
+
1265
+ # Standardize case if needed
1266
+ if ignore_case:
1267
+ df1.columns = df1.columns.str.lower()
1268
+ df2.columns = df2.columns.str.lower()
1269
+ df1 = df1.applymap(lambda x: x.lower() if isinstance(x, str) else x)
1270
+ df2 = df2.applymap(lambda x: x.lower() if isinstance(x, str) else x)
1271
+
1272
+ # Drop ignored columns
1273
+ if ignore_columns:
1274
+ ignore_columns = [col.lower() if ignore_case else col for col in ignore_columns]
1275
+ df1.drop(columns=[col for col in ignore_columns if col in df1.columns], errors="ignore", inplace=True)
1276
+ df2.drop(columns=[col for col in ignore_columns if col in df2.columns], errors="ignore", inplace=True)
1277
+
1278
+ # Reset index to ensure alignment
1279
+ df1.reset_index(drop=True, inplace=True)
1280
+ df2.reset_index(drop=True, inplace=True)
1281
+
1282
+ # Align DataFrames by columns
1283
+ df1, df2 = df1.align(df2, join="inner", axis=1)
1284
+
1285
+ # Compare rows
1286
+ added_rows = df2[~df2.apply(tuple, axis=1).isin(df1.apply(tuple, axis=1))]
1287
+ deleted_rows = df1[~df1.apply(tuple, axis=1).isin(df2.apply(tuple, axis=1))]
1288
+
1289
+ # Compare modified rows
1290
+ if numeric_tolerance > 0:
1291
+ def numeric_diff(row):
1292
+ if pd.api.types.is_numeric_dtype(row["left"]):
1293
+ return abs(row["left"] - row["right"]) > numeric_tolerance
1294
+ return row["left"] != row["right"]
1295
+
1296
+ modified_rows = df1.compare(df2, keep_shape=True, keep_equal=False)
1297
+ modified_rows = modified_rows[modified_rows.apply(numeric_diff, axis=1)]
1298
+ else:
1299
+ modified_rows = df1.compare(df2, keep_shape=True, keep_equal=False)
1100
1300
 
1301
+ # Return results
1302
+ return {
1303
+ "added_rows": len(added_rows),
1304
+ "deleted_rows": len(deleted_rows),
1305
+ "modified_rows": len(modified_rows),
1306
+ "added_file": added_rows if not added_rows.empty else pd.DataFrame(),
1307
+ "deleted_file": deleted_rows if not deleted_rows.empty else pd.DataFrame(),
1308
+ "modified_file": modified_rows if not modified_rows.empty else pd.DataFrame(),
1309
+ }
1310
+
1311
+ # --- Compare JSON files ---
1312
+ def cmp_json(
1313
+ file1, file2, ignore_case=False, numeric_tolerance=0, verbose=True, **kwargs
1314
+ ):
1315
+ import json
1316
+
1317
+ with open(file1, "r") as f1:
1318
+ json1 = json.load(f1)
1319
+ with open(file2, "r") as f2:
1320
+ json2 = json.load(f2)
1321
+
1322
+ # Normalize case and compare JSONs
1323
+ if ignore_case:
1324
+
1325
+ def normalize(obj):
1326
+ if isinstance(obj, dict):
1327
+ return {k.lower(): normalize(v) for k, v in obj.items()}
1328
+ elif isinstance(obj, list):
1329
+ return [normalize(item) for item in obj]
1330
+ elif isinstance(obj, str):
1331
+ return obj.lower()
1332
+ else:
1333
+ return obj
1334
+
1335
+ json1 = normalize(json1)
1336
+ json2 = normalize(json2)
1337
+
1338
+ # Compare JSONs
1339
+ def compare_json(obj1, obj2):
1340
+ if isinstance(obj1, dict) and isinstance(obj2, dict):
1341
+ added_keys = {k: obj2[k] for k in obj2 if k not in obj1}
1342
+ deleted_keys = {k: obj1[k] for k in obj1 if k not in obj2}
1343
+ modified_keys = {
1344
+ k: (obj1[k], obj2[k])
1345
+ for k in obj1
1346
+ if k in obj2 and obj1[k] != obj2[k]
1347
+ }
1348
+ return added_keys, deleted_keys, modified_keys
1349
+
1350
+ elif isinstance(obj1, list) and isinstance(obj2, list):
1351
+ added_items = [item for item in obj2 if item not in obj1]
1352
+ deleted_items = [item for item in obj1 if item not in obj2]
1353
+ modified_items = [
1354
+ (item1, item2) for item1, item2 in zip(obj1, obj2) if item1 != item2
1355
+ ]
1356
+ return added_items, deleted_items, modified_items
1357
+
1358
+ else:
1359
+ if obj1 != obj2:
1360
+ return obj1, obj2, None
1361
+ else:
1362
+ return None, None, None
1363
+
1364
+ added, deleted, modified = compare_json(json1, json2)
1365
+
1366
+ return {"added_keys": added, "deleted_keys": deleted, "modified_keys": modified}
1367
+
1368
+ # --- Compare Text files ---
1369
+ def cmp_txt(
1370
+ file1, file2, ignore_case=False, numeric_tolerance=0, verbose=True, **kwargs
1371
+ ):
1372
+ def read_lines(file):
1373
+ with open(file, "r") as f:
1374
+ return f.readlines()
1375
+
1376
+ lines1 = read_lines(file1)
1377
+ lines2 = read_lines(file2)
1378
+
1379
+ if ignore_case:
1380
+ lines1 = [line.lower() for line in lines1]
1381
+ lines2 = [line.lower() for line in lines2]
1382
+
1383
+ added_lines = [line for line in lines2 if line not in lines1]
1384
+ deleted_lines = [line for line in lines1 if line not in lines2]
1385
+
1386
+ modified_lines = []
1387
+ if numeric_tolerance > 0:
1388
+ for line1, line2 in zip(lines1, lines2):
1389
+ if abs(float(line1) - float(line2)) > numeric_tolerance:
1390
+ modified_lines.append((line1, line2))
1391
+ else:
1392
+ for line1, line2 in zip(lines1, lines2):
1393
+ if line1 != line2:
1394
+ modified_lines.append((line1, line2))
1395
+
1396
+ return {
1397
+ "added_lines": added_lines,
1398
+ "deleted_lines": deleted_lines,
1399
+ "modified_lines": modified_lines,
1400
+ }
1401
+
1402
+ if kind is None:
1403
+ kind = os.path.splitext(file1)[1].lower()[1:]
1404
+ # Compare based on the file type
1405
+ if kind == "xlsx":
1406
+ return cmp_excel(file1=file1, file2=file2, verbose=verbose, **kwargs)
1407
+
1408
+ elif kind == "csv":
1409
+ return cmp_csv(file1=file1, file2=file2, verbose=verbose, **kwargs)
1410
+
1411
+ elif kind == "json":
1412
+ return cmp_json(file1=file1, file2=file2, verbose=verbose, **kwargs)
1413
+
1414
+ elif kind == "txt":
1415
+ return cmp_txt(file1=file1, file2=file2, verbose=verbose, **kwargs)
1416
+
1417
+ else:
1418
+ raise ValueError(f"Unsupported file type: {kind}")
1101
1419
  def cn2pinyin(
1102
1420
  cn_str: Union[str, list] = None,
1103
1421
  sep: str = " ",
@@ -1188,21 +1506,38 @@ def counter(list_, verbose=True):
1188
1506
  # print(f"Return a list of the n most common elements:\n{c.most_common()}")
1189
1507
  # print(f"Compute the sum of the counts:\n{c.total()}")
1190
1508
 
1509
+ def dict2df(dict_, fill=None, axis=0):
1510
+ """
1511
+ Convert a dictionary to a DataFrame with flexible axis and padding options.
1512
+
1513
+ Parameters:
1514
+ - dict_: The dictionary to convert (keys are columns or index).
1515
+ - fill: Value to fill in case of shorter lists.
1516
+ - axis: Axis for DataFrame construction (0 for columns, 1 for rows).
1191
1517
 
1192
- def dict2df(dict_, fill=None):
1193
- len_max = 0
1518
+ Returns:
1519
+ - DataFrame created from the dictionary.
1520
+ """
1194
1521
  for key, value in dict_.items():
1195
- # value部分需要是list
1196
- if isinstance(value, list):
1197
- pass
1198
- # get the max_length
1199
- len_max = len(value) if len(value) > len_max else len_max
1200
- # 补齐长度
1522
+ if not isinstance(value, list):
1523
+ dict_[key] = [value]
1524
+ print(f"'{key}' is not a list. trying to convert it to 'list'")
1525
+
1526
+ # Get the maximum length of values
1527
+ len_max = max(len(value) for value in dict_.values())
1528
+
1529
+ # Extend lists to match the length of the longest list
1201
1530
  for key, value in dict_.items():
1202
- value.extend([fill] * (len_max - len(value)))
1531
+ if isinstance(value, list):
1532
+ value.extend([fill] * (len_max - len(value))) # Fill shorter lists
1203
1533
  dict_[key] = value
1204
- return pd.DataFrame.from_dict(dict_)
1205
1534
 
1535
+ # If axis=0, the dictionary keys will be treated as column names
1536
+ if axis == 0:
1537
+ return pd.DataFrame(dict_)
1538
+ # If axis=1, the dictionary keys will be treated as index names (rows)
1539
+ else:
1540
+ return pd.DataFrame(dict_).transpose()
1206
1541
 
1207
1542
  def text2audio(
1208
1543
  text,
@@ -1433,104 +1768,161 @@ def str2time(time_str, fmt="24"):
1433
1768
  # print(formatted_time2) # Output: 14:30:45
1434
1769
 
1435
1770
 
1436
- def str2date(date_str, fmt="%Y-%m-%d_%H:%M:%S"):
1771
+ def str2date(date_str, original_fmt=None, fmt="%Y-%m-%d"):
1437
1772
  """
1438
- Convert a date string into the specified format.
1773
+ Convert a date string to the desired format and extract components if needed.
1774
+ Usage:
1775
+ str2date(x, fmt="%d.%m.%y",original_fmt="%d.%m.%y")
1439
1776
  Parameters:
1440
- - date_str (str): The date string to be converted.
1441
- - fmt (str): The format to convert the date to. Defaults to '%Y%m%d'.
1777
+ - date_str (str): The input date string.
1778
+ - original_fmt (str, optional): The original format of the date string. If not provided, it will be auto-detected.
1779
+ - fmt (str): The desired format for the output date string. Defaults to '%Y-%m-%d'.
1780
+
1442
1781
  Returns:
1443
- - str: The converted date string.
1444
- """
1782
+ - dict: A dictionary containing the converted date string and its components (year, month, day).
1783
+
1784
+ Raises:
1785
+ - ValueError: If the date cannot be parsed.
1786
+ """
1445
1787
  from dateutil import parser
1446
-
1447
1788
  try:
1448
- date_obj = parser.parse(date_str)
1449
- except ValueError as e:
1450
- raise ValueError(f"Unable to parse date string: {date_str}. Error: {e}")
1451
- # Format the date object to the desired output format
1452
- formatted_date = date_obj.strftime(fmt)
1453
- return formatted_date
1789
+ if not isinstance(date_str,str):
1790
+ date_str=str(date_str)
1791
+ # Parse the date using the provided original format or auto-detect
1792
+ if original_fmt:
1793
+ try:
1794
+ date_obj = datetime.strptime(date_str, original_fmt)
1795
+ except Exception as e:
1796
+ print(e)
1797
+ date_obj=None
1798
+ else:
1799
+ try:
1800
+ date_obj = parser.parse(date_str)
1801
+ except Exception as e:
1802
+ print(e)
1803
+ date_obj=None
1804
+ # Return formatted string if `fmt` is specified, otherwise return the datetime object
1805
+ if date_obj is not None:
1806
+ if fmt:
1807
+ date_obj=date_obj.strftime(fmt)
1808
+ else:
1809
+ date_obj=date_str
1810
+ return date_obj
1811
+
1812
+ except (ValueError, TypeError) as e:
1813
+ raise ValueError(f"Unable to process date string: '{date_str}'. Error: {e}")
1454
1814
 
1455
1815
 
1456
1816
  # str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
1457
1817
  # print(str1)
1458
1818
  # str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
1459
1819
  # print(str2)
1820
+
1821
+ def str2num(
1822
+ s: str,
1823
+ *args,
1824
+ sep: Optional[Union[str, List[str]]] = None,
1825
+ round_digits: Optional[int] = None,
1826
+ return_list: bool = True,
1827
+ handle_text: bool = True
1828
+ ) -> Union[float, int, List[Union[float, int]], None]:
1829
+ """
1830
+ # Examples
1831
+ print(str2num("123")) # Output: 123
1832
+ print(str2num("123.456", 2)) # Output: 123.46
1833
+ print(str2num("one hundred and twenty three")) # Output: 123
1834
+ print(str2num("seven million")) # Output: 7000000
1835
+ print(str2num('one thousand thirty one',',')) # Output: 1,031
1836
+ print(str2num("12345.6789", ",")) # Output: 12,345.6789
1837
+ print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
1838
+ print(str2num('111113.34555',3,',')) # Output: 111,113.346
1839
+ print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
1840
+ print(str2num("every 3,300.55 hours and 5.045555 min", sep=",", round=1))
1841
+ print(str2num("five hundred fourty one"), str2num(
1842
+ "this is 5.9435 euros for 10.04499 killograme", round=3
1843
+ )[0])
1844
+ Convert a string containing numeric or textual data into an integer, float, or list of numbers.
1460
1845
 
1846
+ Parameters:
1847
+ - s (str): Input string containing a number or textual representation of a number.
1848
+ - *args: Additional arguments for delimiter or rounding digits.
1849
+ - sep (str or list): Delimiter(s) to remove from the string (e.g., ',' or ['.', ',']).
1850
+ - round_digits (int): Number of decimal places to round the result to.
1851
+ - return_list (bool): Whether to return a list of numbers if multiple are found.
1852
+ - handle_text (bool): Whether to process textual numbers using the numerizer library.
1461
1853
 
1462
- def str2num(s, *args, **kwargs):
1854
+ Returns:
1855
+ - Union[float, int, List[Union[float, int]], None]: Converted number(s) or None if conversion fails.
1856
+ """
1463
1857
  import re
1858
+ from numerizer import numerize
1464
1859
 
1465
- delimiter = kwargs.get("sep", None)
1466
- round_digits = kwargs.get("round", None)
1467
- if delimiter is not None:
1468
- s = s.replace(delimiter, "")
1860
+ if not isinstance(s, str):
1861
+ return None
1862
+
1863
+ # Merge args with explicit parameters
1864
+ if sep is None:
1865
+ sep = []
1866
+ elif isinstance(sep, str):
1867
+ sep = [sep]
1469
1868
  for arg in args:
1470
- if isinstance(arg, str) and delimiter is None:
1471
- delimiter = arg
1869
+ if isinstance(arg, str):
1870
+ sep.append(arg)
1472
1871
  elif isinstance(arg, int) and round_digits is None:
1473
1872
  round_digits = arg
1474
- try:
1475
- num = int(s)
1476
- except ValueError:
1873
+
1874
+ # Remove all specified delimiters
1875
+ for delimiter in sep:
1876
+ s = s.replace(delimiter, "")
1877
+
1878
+ # Attempt conversion
1879
+ def try_convert(segment: str) -> Union[float, int, None]:
1477
1880
  try:
1478
- num = float(s)
1881
+ return int(segment)
1479
1882
  except ValueError:
1480
- from numerizer import numerize
1481
-
1482
1883
  try:
1483
- numerized = numerize(s)
1484
- num = int(numerized) if "." not in numerized else float(numerized)
1485
- except Exception as e:
1486
- # Attempt to handle multiple number segments
1487
- try:
1488
- number_segments = re.findall(r"[-+]?\d*\.\d+|\d+", s)
1489
- nums = []
1490
- for segment in number_segments:
1491
- nums.append(str2num(segment))
1492
- if len(nums) == 1:
1493
- num = nums[0]
1494
- else:
1495
- num = nums
1496
- except Exception as e:
1497
- return None
1884
+ return float(segment)
1885
+ except ValueError:
1886
+ return None
1498
1887
 
1499
- # Apply rounding if specified
1500
- if round_digits is not None:
1501
- if isinstance(num, list):
1502
- num = [round(i + 0.00000000001, round_digits) for i in num]
1503
- else:
1504
- num_adj = num + 0.00000000001 # Ensure precise rounding
1505
- num = round(num_adj, round_digits)
1506
- if round_digits == 0:
1507
- if isinstance(num, list):
1508
- num = [int(i) for i in num]
1509
- else:
1510
- num = int(num)
1511
- # if delimiter is not None:
1512
- # num_str = f"{num:,}".replace(",", delimiter)
1513
- # return num_str#s.replace(delimiter, "")
1888
+ # Handle textual numbers
1889
+ if handle_text:
1890
+ try:
1891
+ s = numerize(s)
1892
+ except Exception:
1893
+ pass
1514
1894
 
1515
- return num
1895
+ # Extract numeric segments
1896
+ number_segments = re.findall(r"[-+]?\d*\.\d+|\d+", s)
1897
+ numbers = [try_convert(seg) for seg in number_segments if seg]
1898
+ numbers = [num for num in numbers if num is not None]
1516
1899
 
1900
+ if not numbers:
1901
+ return None # No valid numbers found
1517
1902
 
1518
- # Examples
1519
- # print(str2num("123")) # Output: 123
1520
- # print(str2num("123.456", 2)) # Output: 123.46
1521
- # print(str2num("one hundred and twenty three")) # Output: 123
1522
- # print(str2num("seven million")) # Output: 7000000
1523
- # print(str2num('one thousand thirty one',',')) # Output: 1,031
1524
- # print(str2num("12345.6789", ",")) # Output: 12,345.6789
1525
- # print(str2num("12345.6789", " ", 2)) # Output: 12 345.68
1526
- # print(str2num('111113.34555',3,',')) # Output: 111,113.346
1527
- # print(str2num("123.55555 sec miniuets",3)) # Output: 1.3
1528
- # print(str2num("every 3,300.55 hours and 5.045555 min", sep=",", round=1))
1529
- # print(str2num("five hundred fourty one"), str2num(
1530
- # "this is 5.9435 euros for 10.04499 killograme", round=3
1531
- # )[0])
1903
+ # Single or multiple numbers
1904
+ if len(numbers) == 1 and not return_list:
1905
+ result = numbers[0]
1906
+ else:
1907
+ result = (
1908
+ numbers[0] if len(numbers) == 1 else numbers if return_list else numbers[0]
1909
+ )
1532
1910
 
1911
+ # Apply rounding if necessary
1912
+ if round_digits is not None:
1913
+ if isinstance(result, list):
1914
+ result = [round(num + 1e-10, round_digits) for num in result]
1915
+ else:
1916
+ result = round(result + 1e-10, round_digits)
1917
+
1918
+ # Convert to int if rounding to 0 digits
1919
+ if round_digits == 0:
1920
+ if isinstance(result, list):
1921
+ result = [int(num) for num in result]
1922
+ else:
1923
+ result = int(result)
1533
1924
 
1925
+ return result
1534
1926
  def num2str(num, *args, **kwargs):
1535
1927
  delimiter = kwargs.get("sep", None)
1536
1928
  round_digits = kwargs.get("round", None)
@@ -1706,6 +2098,68 @@ def cm2inch(*inch) -> list:
1706
2098
  return [i / 2.54 for i in inch]
1707
2099
 
1708
2100
 
2101
+
2102
+ def sqlite2sql(db_path, sql_path):
2103
+ """
2104
+ Export an SQLite database to an SQL file, including schema and data for all tables.
2105
+
2106
+ :param db_path: Path to the SQLite .db file
2107
+ :param output_file: Path to the output .sql file
2108
+
2109
+ # Usage
2110
+ db_path = "your_database.db" # Replace with the path to your SQLite database
2111
+ sql_path = "output.sql" # Replace with your desired output file name
2112
+ export_sqlite_to_sql(db_path, sql_path)
2113
+
2114
+ """
2115
+ import sqlite3
2116
+ try:
2117
+ # Connect to the SQLite database
2118
+ conn = sqlite3.connect(db_path)
2119
+ cursor = conn.cursor()
2120
+
2121
+ with open(sql_path, 'w') as f:
2122
+ # Write a header for the SQL dump
2123
+ f.write("-- SQLite Database Dump\n")
2124
+ f.write(f"-- Source: {db_path}\n\n")
2125
+
2126
+ # Retrieve all table names
2127
+ cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';")
2128
+ tables = [row[0] for row in cursor.fetchall()]
2129
+
2130
+ for table in tables:
2131
+ # Write the schema for the table
2132
+ cursor.execute(f"SELECT sql FROM sqlite_master WHERE type='table' AND name='{table}';")
2133
+ schema = cursor.fetchone()
2134
+ if schema:
2135
+ f.write(f"{schema[0]};\n\n")
2136
+
2137
+ # Write data for the table
2138
+ cursor.execute(f"SELECT * FROM {table};")
2139
+ rows = cursor.fetchall()
2140
+ if rows:
2141
+ cursor.execute(f"PRAGMA table_info({table});")
2142
+ column_names = [info[1] for info in cursor.fetchall()]
2143
+ column_list = ', '.join(f'"{col}"' for col in column_names)
2144
+
2145
+ for row in rows:
2146
+ values = ', '.join(f"'{str(val).replace('\'', '\'\'')}'" if val is not None else 'NULL' for val in row)
2147
+ f.write(f"INSERT INTO {table} ({column_list}) VALUES ({values});\n")
2148
+
2149
+ f.write("\n")
2150
+
2151
+ print(f"Database exported successfully to {sql_path}")
2152
+
2153
+ except sqlite3.Error as e:
2154
+ print(f"SQLite error: {e}")
2155
+ except Exception as e:
2156
+ print(f"Unexpected error: {e}")
2157
+ finally:
2158
+ # Ensure the connection is closed
2159
+ if conn:
2160
+ conn.close()
2161
+
2162
+
1709
2163
  def sreplace(*args, **kwargs):
1710
2164
  """
1711
2165
  sreplace(text, by=None, robust=True)
@@ -2194,6 +2648,12 @@ def unzip(dir_path, output_dir=None):
2194
2648
  Unzips or extracts various compressed file formats (.gz, .zip, .7z, .tar, .bz2, .xz, .rar).
2195
2649
  If the output directory already exists, it will be replaced.
2196
2650
 
2651
+ # Example usage:
2652
+ output_dir = unzip('data.tar.gz')
2653
+ output_file = unzip('file.csv.gz')
2654
+ output_dir_zip = unzip('archive.zip')
2655
+ output_dir_7z = unzip('archive.7z')
2656
+
2197
2657
  Parameters:
2198
2658
  dir_path (str): Path to the compressed file.
2199
2659
  output_dir (str): Directory where the extracted files will be saved.
@@ -2314,21 +2774,12 @@ def unzip(dir_path, output_dir=None):
2314
2774
  else:
2315
2775
  raise ValueError(f"Unsupported file format: {os.path.splitext(dir_path)[1]}")
2316
2776
 
2317
-
2318
- # Example usage:
2319
- # output_dir = unzip('data.tar.gz')
2320
- # output_file = unzip('file.csv.gz')
2321
- # output_dir_zip = unzip('archive.zip')
2322
- # output_dir_7z = unzip('archive.7z')
2323
-
2324
-
2325
2777
  def is_df_abnormal(df: pd.DataFrame, verbose=False) -> bool:
2326
2778
  """
2327
2779
  Usage
2328
2780
  is_abnormal = is_df_abnormal(df, verbose=1)
2329
2781
  True: abnormal
2330
2782
  False: normal
2331
-
2332
2783
  """
2333
2784
  if not isinstance(df, pd.DataFrame):
2334
2785
  if verbose:
@@ -3405,29 +3856,47 @@ def fsave(
3405
3856
  df = pd.DataFrame(data)
3406
3857
  df.to_csv(fpath, **kwargs_valid)
3407
3858
 
3408
- def save_xlsx(fpath, data, password=None, **kwargs):
3859
+ def save_xlsx(fpath, data, password=None,apply_format=None, **kwargs):
3409
3860
  import msoffcrypto
3410
3861
  from io import BytesIO
3862
+ import openpyxl
3863
+ import pandas.io.formats.style
3411
3864
 
3412
3865
  verbose = kwargs.pop("verbose", False)
3413
3866
  sheet_name = kwargs.pop("sheet_name", "Sheet1")
3414
-
3867
+ engine = kwargs.pop("engine", "xlsxwriter")
3868
+ mode = kwargs.pop("mode","a")
3869
+ if_sheet_exists = strcmp(kwargs.get("if_sheet_exists","new"),['error', 'new', 'replace', 'overlay'])[0]
3870
+ kwargs.pop("if_sheet_exists",None)
3415
3871
  if run_once_within(reverse=True):
3416
3872
  use_pd("to_excel", verbose=verbose)
3417
-
3418
- if any(kwargs):
3419
- format_excel(df=data, filename=fpath,sheet_name=sheet_name,password=password, **kwargs)
3873
+
3874
+ if apply_format is None:
3875
+ kwargs_format=list(extract_kwargs(format_excel).keys())[4:]
3876
+ apply_format=True if any([i in kwargs_format for i in kwargs]) else False
3877
+ print(f"apply format: {apply_format}")
3878
+ if apply_format or any([
3879
+ isinstance(data, openpyxl.worksheet.worksheet.Worksheet),
3880
+ isinstance(data, openpyxl.workbook.workbook.Workbook),
3881
+ isinstance(data, pd.io.formats.style.Styler)
3882
+ ]):
3883
+ format_excel(df=data,
3884
+ filename=fpath,
3885
+ sheet_name=sheet_name,
3886
+ password=password,
3887
+ if_sheet_exists=if_sheet_exists,
3888
+ mode=mode,
3889
+ engine=engine,
3890
+ verbose=verbose,
3891
+ **kwargs)
3420
3892
  else:
3421
3893
  # Remove non-relevant kwargs
3422
3894
  irrelevant_keys=list(extract_kwargs(format_excel).keys())[4:]
3423
-
3424
- for key in irrelevant_keys:
3425
- kwargs.pop(key, None)
3426
-
3895
+ [kwargs.pop(key, None) for key in irrelevant_keys]
3427
3896
  df = pd.DataFrame(data)
3428
-
3429
3897
  # Write to Excel without password first
3430
3898
  temp_file = BytesIO()
3899
+
3431
3900
  df.to_excel(
3432
3901
  temp_file,
3433
3902
  sheet_name=sheet_name,
@@ -3435,26 +3904,22 @@ def fsave(
3435
3904
  engine="xlsxwriter",
3436
3905
  **kwargs,
3437
3906
  )
3438
-
3439
3907
  # If a password is provided, encrypt the file
3440
3908
  if password:
3441
3909
  temp_file.seek(0)
3442
- office_file = msoffcrypto.OfficeFile(temp_file)
3443
- office_file.load_key(password=password) # Provide the password
3444
-
3445
- # Encrypt and save the file
3910
+ office_file = msoffcrypto.OfficeFile(temp_file)
3446
3911
  with open(fpath, "wb") as encrypted_file:
3447
- office_file.encrypt(encrypted_file)
3448
- else:
3449
- # Save the file without encryption if no password is provided
3912
+ office_file.encrypt(outfile=encrypted_file,password=password)
3913
+ else: # Save the file without encryption if no password is provided
3450
3914
  try:
3451
3915
  # Use ExcelWriter with append mode if the file exists
3452
- with pd.ExcelWriter(
3453
- fpath, engine="openpyxl", mode="a", if_sheet_exists="new"
3454
- ) as writer:
3455
- df.to_excel(
3456
- writer, sheet_name=sheet_name, index=False, **kwargs
3457
- )
3916
+ engine="openpyxl" if mode=="a" else "xlsxwriter"
3917
+ if mode=="a":
3918
+ with pd.ExcelWriter(fpath, engine=engine, mode=mode,if_sheet_exists=if_sheet_exists) as writer:
3919
+ df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
3920
+ else:
3921
+ with pd.ExcelWriter(fpath, engine=engine, mode=mode) as writer:
3922
+ df.to_excel(writer, sheet_name=sheet_name, index=False, **kwargs)
3458
3923
  except FileNotFoundError:
3459
3924
  # If file doesn't exist, create a new one
3460
3925
  df.to_excel(fpath, sheet_name=sheet_name, index=False, **kwargs)
@@ -3478,15 +3943,9 @@ def fsave(
3478
3943
  nb["cells"] = cells
3479
3944
  # Write the notebook to a file
3480
3945
  with open(fpath, "w", encoding="utf-8") as ipynb_file:
3481
- nbformat.write(nb, ipynb_file)
3482
-
3483
- # def save_json(fpath, data, **kwargs):
3484
- # with open(fpath, "w") as file:
3485
- # json.dump(data, file, **kwargs)
3486
-
3946
+ nbformat.write(nb, ipynb_file)
3487
3947
  def save_json(fpath_fname, var_dict_or_df):
3488
3948
  import json
3489
-
3490
3949
  def _convert_js(data):
3491
3950
  if isinstance(data, pd.DataFrame):
3492
3951
  return data.to_dict(orient="list")
@@ -3497,15 +3956,9 @@ def fsave(
3497
3956
  return data
3498
3957
 
3499
3958
  serializable_data = _convert_js(var_dict_or_df)
3500
-
3501
3959
  # Save the serializable data to the JSON file
3502
3960
  with open(fpath_fname, "w") as f_json:
3503
- json.dump(serializable_data, f_json, indent=4)
3504
-
3505
- # # Example usage:
3506
- # sets = {"title": "mse_path_ MSE"}
3507
- # jsonsave("/.json", sets)
3508
- # # setss = jsonload("/.json")
3961
+ json.dump(serializable_data, f_json, indent=4)
3509
3962
 
3510
3963
  def save_yaml(fpath, data, **kwargs):
3511
3964
  import yaml
@@ -4579,10 +5032,11 @@ def copy(src, dst, overwrite=False, verbose=True):
4579
5032
  dst.unlink()
4580
5033
  else:
4581
5034
  dst = dst.with_name(
4582
- f"{dst.stem}_{datetime.now().strftime('_%H%M%S')}{dst.suffix}"
5035
+ f"{dst.stem}_{datetime.now().strftime('%y%m%d_%H%M%S')}{dst.suffix}"
4583
5036
  )
4584
5037
  shutil.copy(src, dst)
4585
5038
  print(f"\n Done! copy to {dst}\n") if verbose else None
5039
+ return dst
4586
5040
  else:
4587
5041
  dst = dst / src.name
4588
5042
  if dst.exists():
@@ -4590,14 +5044,33 @@ def copy(src, dst, overwrite=False, verbose=True):
4590
5044
  shutil.rmtree(dst) # Remove existing directory
4591
5045
  else:
4592
5046
  dst = dst.with_name(
4593
- f"{dst.stem}_{datetime.now().strftime('%H%M%S')}"
5047
+ f"{dst.stem}_{datetime.now().strftime('%y%m%d%H%M%S')}"
4594
5048
  )
4595
5049
  shutil.copytree(src, dst)
4596
5050
  print(f"\n Done! copy to {dst}\n") if verbose else None
5051
+ return dst
4597
5052
 
4598
5053
  except Exception as e:
4599
5054
  logging.error(f"Failed {e}")
4600
-
5055
+ def local_path(fpath,station=r"Q:\\IM\\AGLengerke\\Jeff\\# testing\\temp\\"):
5056
+ """copy file to a specific folder first, to aviod file conflict"""
5057
+ try:
5058
+ f=listdir(station)
5059
+ if listdir(station ,verbose=False).loc[0,"num"]>=10:
5060
+ for fpath_ in f['path']:
5061
+ if os.path.basename(fpath)[:5] in fpath_:
5062
+ if fpath== fpath_:
5063
+ pass
5064
+ else:
5065
+ delete(fpath_)
5066
+ except:
5067
+ pass
5068
+ try:
5069
+ new_path=copy(fpath, station)
5070
+ except Exception as e:
5071
+ print(f"Path did not update because: Error:{e}")
5072
+ new_path=fpath
5073
+ return new_path
4601
5074
 
4602
5075
  def cut(src, dst, overwrite=False):
4603
5076
  return move(src=src, dst=dst, overwrite=overwrite)
@@ -6299,39 +6772,39 @@ def finfo(fpath):
6299
6772
 
6300
6773
 
6301
6774
  # ! format excel file
6302
- def hex2argb(hex_color):
6303
- """
6304
- Convert a hex color code to aARGB format required by openpyxl.
6305
6775
 
6306
- :param hex_color: A hex color code in the format #RRGGBB, RRGGBB, or aARRGGBB.
6307
- :return: A hex color code in the format aARRGGBB.
6308
6776
 
6309
- # Example usage
6310
- print(hex_to_argb("FFFF00")) # Outputs: FFFFFF00
6311
- print(hex_to_argb("#DF4245")) # Outputs: FFFFFF00
6312
- print(hex_to_argb("FF00FF00")) # Outputs: FF00FF00 (already in aARGB format)
6777
+ def hex2argb(color):
6313
6778
  """
6314
- # Remove the hash if present
6315
- if hex_color.startswith("#"):
6316
- hex_color = hex_color[1:]
6779
+ Convert a color name or hex code to aARGB format required by openpyxl.
6317
6780
 
6318
- # Check if it's already in aARGB format (8 characters)
6319
- if len(hex_color) == 8:
6320
- return hex_color
6781
+ :param color: A color in the format: 'blue', '#RRGGBB', 'RRGGBB', 'aARRGGBB'
6782
+ :return: A hex color code in the format aARRGGBB.
6321
6783
 
6322
- # Otherwise, assume it's in RRGGBB format and prepend FF for opaque
6323
- if len(hex_color) == 6:
6324
- return f"FF{hex_color}"
6325
- if len(hex_color) == 7:
6326
- return f"F{hex_color}"
6327
- else:
6328
- if len(hex_color) > 8:
6329
- return hex_color[-9:]
6330
- else:
6331
- return "F" * (9 - len(hex_color)) + hex_color
6332
- raise ValueError(
6333
- "Invalid hex color format. Use RRGGBB, #RRGGBB, or aARRGGBB format."
6334
- )
6784
+ Example:
6785
+ print(hex2argb("blue")) # Output: FF0000FF
6786
+ print(hex2argb("FFFF00")) # Output: FFFFFF00
6787
+ print(hex2argb("#DF4245")) # Output: FFDf4245
6788
+ print(hex2argb("FF00FF00")) # Output: FF00FF00 (already in aARGB format)
6789
+ """
6790
+ import matplotlib.colors as mcolors
6791
+ import re
6792
+ color = color.lower().replace(" ", "") # 'light blue'
6793
+ # Convert color name (e.g., "blue") to hex
6794
+ if color.lower() in mcolors.CSS4_COLORS:
6795
+ color = mcolors.CSS4_COLORS[color.lower()].lstrip("#")
6796
+ color = color.lstrip("#").upper()# Remove '#' if present
6797
+
6798
+ # Validate hex format
6799
+ if not re.fullmatch(r"[A-F0-9]{6,8}", color):
6800
+ raise ValueError(f"格式错误❌: {color}, 应该使用 RRGGBB, #RRGGBB, or aARRGGBB format.")
6801
+
6802
+ # If already in aARRGGBB format (8 chars), return as is
6803
+ if len(color) == 8:
6804
+ return color
6805
+
6806
+ # If in RRGGBB format, add FF (full opacity) as alpha
6807
+ return f"FF{color}"
6335
6808
 
6336
6809
  def extract_kwargs(func):
6337
6810
  import inspect
@@ -6347,29 +6820,152 @@ def extract_kwargs(func):
6347
6820
 
6348
6821
  return kwargs
6349
6822
  def format_excel(
6350
- df=None,
6351
- filename=None,
6352
- sheet_name=0,
6353
- usage=False,
6354
- cell=None, # dict: or list for multiple locs setting:
6355
- width=None, # dict
6356
- width_factor=2,# calculated with plus this factor
6357
- height=None, # dict e.g., {2: 50, 3: 25}, keys are columns
6358
- height_max=25,
6359
- merge=None, # tuple e.g., (slice(0, 1), slice(1, 3)),
6360
- shade=None, # dict
6361
- comment=None, # dict e.g., {(2, 4): "This is a comment"},
6823
+ df: pd.DataFrame=None,
6824
+ filename:str=None,
6825
+ sheet_name:Union[str, int]=0,
6826
+ insert_img:dict=None,# {"A1":img_path}
6827
+ usage:bool=False,
6828
+ text_color:Union[dict,bool]=False, # dict: set the text color
6829
+ bg_color:Union[dict,bool]=False, # dict: set the back_ground color
6830
+ cell:Union[dict, list]=None, # dict: or list for multiple locs setting:
6831
+ width:Union[bool, dict]=None, # dict
6832
+ width_factor:int=2,# calculated with plus this factor
6833
+ height:Union[bool, dict]=None, # dict e.g., {2: 50, 3: 25}, keys are columns
6834
+ height_max:int=25,
6835
+ merge:tuple=None, # tuple e.g., (slice(0, 1), slice(1, 3)),
6836
+ shade:Union[dict, list]=None, # dict
6837
+ comment:Union[dict, list]=None, # dict e.g., {(2, 4): "This is a comment"},
6362
6838
  comment_always_visible:bool=True,# always display comment
6363
- link=None, # dict e.g., {(2, 2): "https://example.com"},
6364
- protect=None, # dict
6365
- number_format=None, # dict: e.g., {1:"0.00", 2:"#,##0",3:"0%",4:"$#,##0.00"}
6839
+ link:Union[dict, list]=None, # dict e.g., {(2, 2): "https://example.com"},
6840
+ protect:dict=None, # dict
6841
+ number_format:dict=None, # dict: e.g., {1:"0.00", 2:"#,##0",3:"0%",4:"$#,##0.00"}
6366
6842
  data_validation=None, # dict
6367
6843
  apply_filter:bool=True, # add filter
6368
6844
  freeze :str= False,#"A2",
6369
- conditional_format=None, # dict
6370
- verbose=True,
6845
+ conditional_format:dict=None, # dict
6846
+ verbose:bool=False,
6371
6847
  **kwargs,
6372
6848
  ):
6849
+ """
6850
+ Parameters:
6851
+ df : pandas.DataFrame, optional
6852
+ DataFrame to be written to the Excel file.
6853
+ filename : str, optional
6854
+ Path to the output Excel file.
6855
+ sheet_name : str or int, default 0
6856
+ Name or index of the sheet where data will be written.
6857
+ insert_img : dict, optional
6858
+ Dictionary specifying image insert locations, e.g., {"A1": "path/to/image.png"}.
6859
+ usage : bool, default False
6860
+ If True, display usage examples.
6861
+ cell : dict or list, optional
6862
+ Specifies cell formatting options.
6863
+ width : dict, optional
6864
+ Dictionary specifying column widths, e.g., {1: 20, 2: 30}.
6865
+ width_factor : int, default 2
6866
+ Additional factor to adjust column width dynamically.
6867
+ height : dict, optional
6868
+ Dictionary specifying row heights, e.g., {2: 50, 3: 25}.
6869
+ height_max : int, default 25
6870
+ Maximum row height allowed.
6871
+ merge : tuple, optional
6872
+ Specifies cell merging, e.g., (slice(0, 1), slice(1, 3)).
6873
+ shade : dict, optional
6874
+ Dictionary defining cell shading/styling.
6875
+ comment : dict, optional
6876
+ Dictionary adding comments, e.g., {(2, 4): "This is a comment"}.
6877
+ comment_always_visible : bool, default True
6878
+ Whether comments should always be visible.
6879
+ link : dict, optional
6880
+ Dictionary specifying hyperlinks, e.g., {(2, 2): "https://example.com"}.
6881
+ protect : dict, optional
6882
+ Dictionary defining cell protection settings.
6883
+ number_format : dict, optional
6884
+ Dictionary specifying number formats, e.g., {1: "0.00", 2: "#,##0"}.
6885
+ data_validation : dict, optional
6886
+ Dictionary setting data validation rules.
6887
+ apply_filter : bool, default True
6888
+ Whether to apply filters to the header row.
6889
+ freeze : str, optional
6890
+ Cell reference (e.g., "A2") to freeze rows/columns.
6891
+ conditional_format : dict, optional
6892
+ Dictionary defining conditional formatting rules.
6893
+ verbose : bool, default False
6894
+ Whether to print detailed execution logs.
6895
+ **kwargs : dict
6896
+ Additional parameters for advanced customization.
6897
+ """
6898
+
6899
+ usage_str="""
6900
+ Formats an Excel file with various styling options.
6901
+ Usage:
6902
+ fsave(
6903
+ dir_save,
6904
+ fload(dir_save, output="bit", sheet_name=sheet_name),
6905
+ sheet_name=sheet_name,
6906
+ if_sheet_exists="overlay",
6907
+ mode="a",
6908
+ width_factor=0,
6909
+ height={1: 50},
6910
+ cell=[
6911
+ {
6912
+ (slice(0, 1), slice(0, df_exists.shape[1])): {
6913
+ "fill": {
6914
+ "start_color": "61AFEF", # Starting color
6915
+ "end_color": "61AFEF", # Ending color (useful for gradients)
6916
+ "fill_type": "solid", # Fill type (solid, gradient, etc.)
6917
+ },
6918
+ "font": {
6919
+ "name": "Arial", # Font name
6920
+ "size": 11, # Font size
6921
+ "bold": True, # Bold text
6922
+ "italic": False, # Italic text
6923
+ # "underline": "single", # Underline (single, double)
6924
+ "color": "#000000", # Font color
6925
+ },
6926
+ "alignment": {
6927
+ "horizontal": "center", # Horizontal alignment (left, center, right)
6928
+ "vertical": "center", # Vertical alignment (top, center, bottom)
6929
+ "wrap_text": True, # Wrap text in the cell
6930
+ "shrink_to_fit": True, # Shrink text to fit within cell
6931
+ "text_rotation": 0, # Text rotation angle
6932
+ },
6933
+ }
6934
+ },
6935
+ {
6936
+ (
6937
+ slice(0, df_exists.shape[0]),
6938
+ slice(0, df_exists.shape[1]),
6939
+ ): {
6940
+ "alignment": {
6941
+ "horizontal": "center", # Horizontal alignment (left, center, right)
6942
+ "vertical": "center", # Vertical alignment (top, center, bottom)
6943
+ "wrap_text": True, # Wrap text in the cell
6944
+ "shrink_to_fit": True, # Shrink text to fit within cell
6945
+ "text_rotation": 0, # Text rotation angle
6946
+ },
6947
+ }
6948
+ },
6949
+ {
6950
+ (slice(0, df_exists.shape[0]), slice(2, 3)): {
6951
+ "alignment": {
6952
+ "horizontal": "left", # Horizontal alignment (left, center, right)
6953
+ },
6954
+ }
6955
+ },
6956
+ {
6957
+ (slice(0, df_exists.shape[0]), slice(7, 8)): {
6958
+ "alignment": {
6959
+ "horizontal": "left", # Horizontal alignment (left, center, right)
6960
+ },
6961
+ }
6962
+ },
6963
+ ],
6964
+ password=False, # depass("ogB3B7y3xR9iuH4QIQbyy6VXG14I0A8DlsTxyiGqg1U="),
6965
+ )
6966
+ """
6967
+ if verbose:
6968
+ print(usage_str)
6373
6969
  import pandas as pd
6374
6970
  from datetime import datetime
6375
6971
  import openpyxl
@@ -6379,7 +6975,7 @@ def format_excel(
6379
6975
  from openpyxl.worksheet.datavalidation import DataValidation
6380
6976
  from openpyxl.comments import Comment
6381
6977
  from openpyxl.formatting.rule import ColorScaleRule, DataBarRule, IconSetRule,IconSet
6382
-
6978
+ from openpyxl.utils import get_column_letter
6383
6979
  def convert_indices_to_range(row_slice, col_slice):
6384
6980
  """Convert numerical row and column slices to Excel-style range strings."""
6385
6981
  start_row = row_slice.start + 1
@@ -6394,7 +6990,109 @@ def format_excel(
6394
6990
  if end_col_letter
6395
6991
  else f"{start_col_letter}{start_row}"
6396
6992
  )
6993
+ def apply_color_to_worksheet(ws=None, sheet_name=None, conditions=None, cell_idx=None,where="text"):
6994
+ """
6995
+ Apply text color formatting to a specific cell range in an openpyxl workbook based on conditions.
6996
+
6997
+ Parameters:
6998
+ ws : worrksheet
6999
+ The openpyxl workbook object to style.
7000
+ sheet_name : str
7001
+ The name of the sheet to style.
7002
+ conditions : dict
7003
+ Dictionary defining conditions for text or background coloring.
7004
+ Example:
7005
+ {
7006
+ ">10": "#FF0000", # Red if value is greater than 10
7007
+ "contains:Error": "#FFFF00", # Yellow if text contains 'Error'
7008
+ "startswith:Warn": "#FFA500" # Orange if text starts with 'Warn'
7009
+ }
7010
+ cell_idx : tuple, optional
7011
+ A tuple of slices defining the selected row and column range (only for DataFrame).
7012
+ where : str, default="text"
7013
+ "text" -> Apply color to text, "bg" -> Apply color to background.
6397
7014
 
7015
+ Returns:
7016
+ openpyxl.workbook.workbook.Workbook
7017
+ The workbook with applied formatting.
7018
+ """
7019
+ def evaluate_condition(value, condition):
7020
+ """Evaluate the condition dynamically."""
7021
+ if not isinstance(conditions, dict):
7022
+ raise ValueError(f"condition必须是dict格式:e.g., {'x>=20':'#DD0531', 'startswith:Available':'#DD0531'}")
7023
+ try:
7024
+ if "x" in condition and re.search(r"[<>=!]=*", condition):
7025
+ expr = condition.replace("x", str(value))
7026
+ return eval(expr)
7027
+ elif condition.startswith("startswith:") or condition.startswith("startwith:"):
7028
+ return value.startswith(condition.split(":", 1)[1])
7029
+ elif condition.startswith("endswith:") or condition.startswith("endwith:"):
7030
+ return value.endswith(condition.split(":", 1)[1])
7031
+ elif condition.startswith("contains:") or condition.startswith("contain:") or condition.startswith("include:"):
7032
+ return condition.split(":", 1)[1] in value
7033
+ elif condition.startswith("matches:") or condition.startswith("match:"):
7034
+ return re.search(condition.split(":", 1)[1], value) is not None
7035
+ else:
7036
+ expr = condition
7037
+ return False
7038
+ except Exception as e:
7039
+ return False
7040
+
7041
+ def apply_condition_to_cell_text_color(cell, value):
7042
+ """Apply color to a cell if it matches any condition."""
7043
+ for condition, color in conditions.items():
7044
+ if evaluate_condition(value, condition):
7045
+ # Apply color to font
7046
+ cell.font = openpyxl.styles.Font(
7047
+ color=openpyxl.styles.Color(rgb=hex2argb(color))
7048
+ )
7049
+ return
7050
+ def apply_condition_to_cell_bg_color(cell, value):
7051
+ """Apply background color to a cell if it matches any condition."""
7052
+ for condition, color in conditions.items():
7053
+ if evaluate_condition(value, condition):
7054
+ if not isinstance(color,list):
7055
+ color=[color]
7056
+ if len(color)==1:
7057
+ cell.fill = PatternFill(
7058
+ start_color=hex2argb(color[0]),
7059
+ end_color=hex2argb(color[0]),
7060
+ fill_type="solid"
7061
+ )
7062
+ elif len(color)==2:
7063
+ cell.fill = PatternFill(
7064
+ start_color=hex2argb(color[0]),
7065
+ end_color=hex2argb(color[1]),
7066
+ fill_type="solid"
7067
+ )
7068
+ return
7069
+ if isinstance(cell_idx, tuple):
7070
+ # If cell_idx is provided, select a range based on the slice
7071
+ row_slice, col_slice = cell_idx
7072
+ rows = list(
7073
+ ws.iter_rows(
7074
+ min_row=row_slice.start + 1,
7075
+ max_row=row_slice.stop,
7076
+ min_col=col_slice.start + 1,
7077
+ max_col=col_slice.stop,
7078
+ )
7079
+ )
7080
+ for row in rows:
7081
+ for cell in row:
7082
+ if where=="text":
7083
+ apply_condition_to_cell_text_color(cell, cell.value)
7084
+ elif where=="bg":
7085
+ apply_condition_to_cell_bg_color(cell, cell.value)
7086
+ else:
7087
+ # If no cell_idx is provided, apply to all cells
7088
+ for row in ws.iter_rows():
7089
+ for cell in row:
7090
+ if where=="text":
7091
+ apply_condition_to_cell_text_color(cell, cell.value)
7092
+ elif where=="bg":
7093
+ apply_condition_to_cell_bg_color(cell,cell.value)
7094
+ return ws
7095
+
6398
7096
  def apply_format(ws, cell, cell_range):
6399
7097
  """Apply cell formatting to a specified range."""
6400
7098
  cell_font, cell_fill, cell_alignment, border = None, None, None, None
@@ -6405,7 +7103,7 @@ def format_excel(
6405
7103
  font_color = "000000"
6406
7104
  font_name = "Arial"
6407
7105
  font_underline = "none"
6408
- font_size = 14
7106
+ font_size = 11
6409
7107
  font_bold = False
6410
7108
  font_strike = False
6411
7109
  font_italic = False
@@ -6742,6 +7440,8 @@ def format_excel(
6742
7440
  counter += 1
6743
7441
  unique_name = f"{sheet_name}_{counter}"
6744
7442
  return unique_name
7443
+
7444
+
6745
7445
  # if it is already worksheet format
6746
7446
  if isinstance(df, pd.DataFrame):
6747
7447
  pass
@@ -6763,55 +7463,52 @@ def format_excel(
6763
7463
  kwargs.pop("format", None) # 更好地跟fsave结合使用
6764
7464
  kwargs.pop("sheet_name", 0) # 更好地跟df.to_excel结合使用
6765
7465
  # 只有openpyxl才支持 append
6766
- mode = strcmp(kwargs.get("mode", "auto"), ["a", "w","auto"])[0]
7466
+ mode = strcmp(kwargs.get("mode", "a"), ["a", "w","auto"])[0]
7467
+ # print(f'mode="{mode}"')
6767
7468
  kwargs.pop("mode", None)
6768
7469
  engine = strcmp(kwargs.get("engine", "openpyxl"), ["xlsxwriter", "openpyxl"])[0]
7470
+ # corr engine
7471
+ engine="openpyxl" if mode=="a" else "xlsxwriter"
7472
+ # print(f'engine="{engine}"')
7473
+ if_sheet_exists=kwargs.get("if_sheet_exists","replace")
6769
7474
  # 通常是不需要保存index的
6770
7475
  index = kwargs.get("index", False)
6771
- kwargs.pop("index", None)
6772
7476
  # header
6773
7477
  header=kwargs.pop("header",False)
6774
-
7478
+ password = kwargs.pop("password", None) # Use kwargs if provided
7479
+
7480
+ kwargs.pop("password", None)
7481
+ kwargs.pop("header", None)
7482
+ kwargs.pop("index", None)
7483
+ kwargs.pop("if_sheet_exists", None)
6775
7484
  if isinstance(df, openpyxl.workbook.workbook.Workbook):
7485
+ """打开Sheet_name指定的表格,如果该表不存在,则创建一个新的或从现有文件中加载数据"""
6776
7486
  wb=df
6777
7487
  try:
6778
7488
  ws = wb.worksheets[sheet_name]
6779
7489
  except Exception as e:
6780
- print(e)
7490
+ print(f'mode="{mode}"')
6781
7491
  if not os.path.exists(filename) or mode=="w":
6782
7492
  ws=wb.active
6783
7493
  ws.title = sheet_name
6784
7494
  else:# file exists
6785
7495
  wb = load_workbook(filename)
6786
- # check the shtname and get the new sheet_name
6787
- sheet_name_corr=generate_unique_sheet_name(wb, sheet_name)
6788
-
6789
- # Save the workbook with the new sheet name
6790
- with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists="new") as writer:
7496
+ with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists=if_sheet_exists) as writer:
6791
7497
  for ws in df.worksheets: # Iterate through worksheets in the input workbook
6792
7498
  ws_df = pd.DataFrame(ws.values)
6793
- ws_df.to_excel(writer,
6794
- sheet_name=sheet_name_corr,
6795
- index=index,
6796
- header=header,
6797
- **kwargs)
6798
- wb = load_workbook(filename)
6799
- print(sheet_name,sheet_name_corr)
6800
- print(wb.sheetnames)
6801
- if sheet_name_corr in wb.sheetnames:
6802
- ws = wb[sheet_name_corr]
6803
- if not sheet_name==sheet_name_corr:
7499
+ ws_df.to_excel(writer,sheet_name=sheet_name,index=index,header=header,**kwargs)
7500
+ # 重新打开刚更新过的数据
7501
+ wb = load_workbook(filename)
7502
+ if sheet_name in wb.sheetnames:
7503
+ ws = wb[sheet_name]
7504
+ if not sheet_name==sheet_name:
6804
7505
  wb.remove(wb[sheet_name])
6805
7506
  else:
6806
- raise KeyError(f"Worksheet {sheet_name_corr} does not exist.")
7507
+ raise KeyError(f"Worksheet {sheet_name} does not exist.")
6807
7508
  else:
6808
- if not os.path.exists(filename) or mode=="w": # or overwrite
6809
- # save file
6810
- sheet_name_corr = (
6811
- sheet_name if isinstance(sheet_name, str) else f"Sheet_{sheet_name}"
6812
- )
7509
+ if not os.path.exists(filename) or mode=="w": # or overwrite
6813
7510
  with pd.ExcelWriter(filename, mode="w", engine=engine) as writer:
6814
- df.to_excel(writer, sheet_name=sheet_name_corr, index=index, header=header,**kwargs)
7511
+ df.to_excel(writer, sheet_name=sheet_name, index=index, header=header,**kwargs)
6815
7512
  wb = load_workbook(filename)
6816
7513
  if isinstance(sheet_name, str):
6817
7514
  ws = wb[sheet_name]
@@ -6821,15 +7518,75 @@ def format_excel(
6821
7518
  ws = wb.worksheets[sheet_name] # the index of worksheets
6822
7519
  else:# file exists
6823
7520
  wb = load_workbook(filename)
6824
- sheet_name_corr = generate_unique_sheet_name(wb, sheet_name)
6825
- with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists="new") as writer:
6826
- df.to_excel(writer, sheet_name=sheet_name_corr, index=index, header=header,**kwargs)
7521
+ with pd.ExcelWriter(filename, mode="a", engine=engine, if_sheet_exists=if_sheet_exists) as writer:
7522
+ df.to_excel(writer, sheet_name=sheet_name, index=index, header=header,**kwargs)
6827
7523
  wb = load_workbook(filename)
6828
- if sheet_name_corr in wb.sheetnames:
6829
- ws = wb[sheet_name_corr]
7524
+ if sheet_name in wb.sheetnames:
7525
+ ws = wb[sheet_name]
6830
7526
  else:
6831
- raise KeyError(f"Worksheet {sheet_name_corr} does not exist.")
6832
-
7527
+ raise KeyError(f"Worksheet {sheet_name} does not exist.")
7528
+ # ! Apply Text color
7529
+ if text_color:
7530
+ if verbose:
7531
+ text_color_str="""
7532
+ text_color=[
7533
+ {
7534
+ (slice(1, 2), slice(0, 3)): {
7535
+ "x>20": "#DD0531", # Numbers > 20 → red
7536
+ "x<=8": "#35B20C", # Numbers ≤ 10 → blue
7537
+ "'x'!='available'": "#0510DD", # 'available' → green
7538
+ "10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
7539
+ "10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
7540
+ }
7541
+ },
7542
+ {
7543
+ (slice(3, df.shape[0] + 1), slice(0, 3)): {
7544
+ "x>20": "#DD0531", # Numbers > 20 → red
7545
+ "x<=10": "#35B20C", # Numbers ≤ 10 → blue
7546
+ "'x'!='available'": "#0510DD", # 'available' → green
7547
+ "10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
7548
+ "10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
7549
+ }
7550
+ },
7551
+ ],
7552
+ """
7553
+ print(text_color_str)
7554
+ if not isinstance(text_color, list):
7555
+ text_color=[text_color]
7556
+ for text_color_ in text_color:
7557
+ for indices, dict_text_conditions in text_color_.items():
7558
+ ws = apply_color_to_worksheet(ws, sheet_name=sheet_name, conditions=dict_text_conditions, cell_idx=indices,where="text")
7559
+ # ! Apply Text color
7560
+ if bg_color:
7561
+ if verbose:
7562
+ bg_color_str="""
7563
+ bg_color=[
7564
+ {
7565
+ (slice(1, 2), slice(0, 3)): {
7566
+ "x>20": ["#DD0531","#35B20C"], # Numbers > 20 → red
7567
+ "x<=8": "#35B20C", # Numbers ≤ 10 → blue
7568
+ "'x'!='available'": "#0510DD", # 'available' → green
7569
+ "10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
7570
+ "10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
7571
+ }
7572
+ },
7573
+ {
7574
+ (slice(3, df.shape[0] + 1), slice(0, 3)): {
7575
+ "x>20": "#DD0531", # Numbers > 20 → red
7576
+ "x<=10": "#35B20C", # Numbers ≤ 10 → blue
7577
+ "'x'!='available'": "#0510DD", # 'available' → green
7578
+ "10<x<=30": "#EAB107", # 10 < value ≤ 30 → orange
7579
+ "10<=x<30": "#C615BE", # 10 ≤ value < 30 → purple
7580
+ }
7581
+ },
7582
+ ],
7583
+ """
7584
+ print(bg_color_str)
7585
+ if not isinstance(bg_color, list):
7586
+ bg_color=[bg_color]
7587
+ for bg_color_ in bg_color:
7588
+ for indices, dict_text_conditions in bg_color_.items():
7589
+ ws = apply_color_to_worksheet(ws, sheet_name=sheet_name, conditions=dict_text_conditions, cell_idx=indices,where="bg")
6833
7590
  # !Apply cell formatting
6834
7591
  if cell:
6835
7592
  if not isinstance(cell, list):
@@ -6838,6 +7595,7 @@ def format_excel(
6838
7595
  for indices, format_options in cell_.items():
6839
7596
  cell_range = convert_indices_to_range(*indices)
6840
7597
  apply_format(ws, format_options, cell_range)
7598
+
6841
7599
  if verbose:
6842
7600
  cell_tmp="""cell=[
6843
7601
  {
@@ -6918,14 +7676,16 @@ def format_excel(
6918
7676
  print(number_format_temp)
6919
7677
 
6920
7678
  if freeze:
7679
+ if isinstance(freeze,bool):
7680
+ freeze='A2'
6921
7681
  ws.freeze_panes = freeze # Freeze everything above and to the left of A2
6922
7682
  if apply_filter:
6923
7683
  if isinstance(apply_filter, bool):
6924
7684
  # Default: Apply filter to the entire first row (header)
6925
7685
  filter_range = f"A1:{get_column_letter(ws.max_column)}1"
6926
7686
  ws.auto_filter.ref = filter_range
6927
- if not freeze:
6928
- ws.freeze_panes = "A2" # Freeze everything above and to the left of A2
7687
+ if not freeze:
7688
+ ws.freeze_panes = "A2" # Freeze everything above and to the left of A2
6929
7689
  elif isinstance(apply_filter, tuple):
6930
7690
  row_slice, col_slice = apply_filter
6931
7691
  # Extract the start and end indices for rows and columns
@@ -6949,7 +7709,11 @@ def format_excel(
6949
7709
  ws.auto_filter.ref = filter_range
6950
7710
  if freeze:
6951
7711
  ws.freeze_panes = freeze # Freeze everything above and to the left of A2
6952
- # !widths
7712
+ # !widths
7713
+ if isinstance(width,bool):
7714
+ width=None if width else False
7715
+ if isinstance(height,bool):
7716
+ height=None if height else False
6953
7717
  if width is None: # automatic adust width
6954
7718
  for col in ws.columns:
6955
7719
  max_length = 0
@@ -6971,6 +7735,8 @@ def format_excel(
6971
7735
  pass
6972
7736
  adjusted_width = max_length + width_factor # You can adjust the padding value as needed
6973
7737
  ws.column_dimensions[column].width = adjusted_width
7738
+ elif isinstance(width,bool):
7739
+ pass
6974
7740
  else:
6975
7741
  for col_idx, width_ in width.items():
6976
7742
  col_letter = get_column_letter(col_idx)
@@ -6989,6 +7755,8 @@ def format_excel(
6989
7755
  estimated_height += 5 * (max_line_length // 20)
6990
7756
  max_height = max(max_height, estimated_height)
6991
7757
  ws.row_dimensions[row[0].row].height = max_height
7758
+ elif isinstance(height,bool) and not height:
7759
+ pass
6992
7760
  else:
6993
7761
  for row, height_ in height.items():
6994
7762
  ws.row_dimensions[row].height = height_
@@ -7073,7 +7841,6 @@ def format_excel(
7073
7841
  )
7074
7842
  # !Protect sheet with a password
7075
7843
  # Fetch the password
7076
- password = kwargs.pop("password", None) # Use kwargs if provided
7077
7844
 
7078
7845
  if all([password is not None, any([protect, isinstance(password, (str, list, tuple)) and any(password)])]): # Check if protection options are provided
7079
7846
  if protect is None:
@@ -7111,39 +7878,71 @@ def format_excel(
7111
7878
  for rule in rules:
7112
7879
  # Handle color scale
7113
7880
  if "color_scale" in rule:
7114
- color_scale = rule["color_scale"]
7115
- start_color = hex2argb(color_scale.get("start_color", "FFFFFF"))
7116
- mid_color = hex2argb(color_scale.get("mid_color", "FFFFFF"))
7117
- end_color = hex2argb(color_scale.get("end_color", "FFFFFF"))
7881
+ if verbose:
7882
+ color_scale_tmp="""
7883
+ conditional_format={
7884
+ (slice(1, df.shape[0] + 1), slice(1, 2)):
7885
+ {
7886
+ "color_scale": {
7887
+ "start_type": "min",
7888
+ "start_value": 0,
7889
+ "start_color": "#74ADE9",
7890
+ "mid_type": "percentile",
7891
+ "mid_value": 50,
7892
+ "mid_color": "74ADE9",
7893
+ "end_type": "max",
7894
+ "end_value": 100,
7895
+ "end_color": "#B62833",
7896
+ }
7897
+ }}
7898
+ """
7899
+ print(color_scale_tmp)
7900
+ color_scale = rule["color_scale"]
7118
7901
 
7119
7902
  color_scale_rule = ColorScaleRule(
7120
7903
  start_type=color_scale.get("start_type", "min"),
7121
- start_value=color_scale.get("start_value"),
7122
- start_color=start_color,
7123
- mid_type=color_scale.get("mid_type"),
7124
- mid_value=color_scale.get("mid_value"),
7125
- mid_color=mid_color,
7904
+ start_value=color_scale.get("start_value",None),
7905
+ start_color=hex2argb(color_scale.get("start_color", "#74ADE9")),
7906
+ mid_type=color_scale.get("mid_type","percentile"),
7907
+ mid_value=color_scale.get("mid_value",None),
7908
+ mid_color=hex2argb(color_scale.get("mid_color", "FFFFFF")),
7126
7909
  end_type=color_scale.get("end_type", "max"),
7127
- end_value=color_scale.get("end_value"),
7128
- end_color=end_color,
7910
+ end_value=color_scale.get("end_value",None),
7911
+ end_color=hex2argb(color_scale.get("end_color", "#B62833")),
7129
7912
  )
7130
7913
  ws.conditional_formatting.add(cell_range, color_scale_rule)
7131
7914
  # Handle data bar
7132
7915
  if "data_bar" in rule:
7916
+ if verbose:
7917
+ data_bar_tmp="""
7918
+ conditional_format={
7919
+ (slice(1, df.shape[0] + 1), slice(1, 2)):
7920
+ {
7921
+ "data_bar": {
7922
+ "start_type": "min",
7923
+ "start_value": None,
7924
+ "end_type": "max",
7925
+ "end_value": None,
7926
+ "color": "F6C9CE",
7927
+ "show_value": True,
7928
+ }
7929
+ }}
7930
+ """
7931
+ print(data_bar_tmp)
7133
7932
  data_bar = rule["data_bar"]
7134
7933
  bar_color = hex2argb(data_bar.get("color", "638EC6"))
7135
7934
 
7136
7935
  data_bar_rule = DataBarRule(
7137
7936
  start_type=data_bar.get("start_type", "min"),
7138
- start_value=data_bar.get("start_value"),
7937
+ start_value=data_bar.get("start_value",None),
7139
7938
  end_type=data_bar.get("end_type", "max"),
7140
- end_value=data_bar.get("end_value"),
7939
+ end_value=data_bar.get("end_value",None),
7141
7940
  color=bar_color,
7142
7941
  showValue=data_bar.get("show_value", True),
7143
7942
  )
7144
7943
  ws.conditional_formatting.add(cell_range, data_bar_rule)
7145
7944
 
7146
- # Handle icon set
7945
+ # Handle icon setse
7147
7946
  if "icon_set" in rule:
7148
7947
  icon_set = rule["icon_set"]
7149
7948
  icon_set_rule = IconSet(
@@ -7152,6 +7951,34 @@ def format_excel(
7152
7951
  reverse=icon_set.get("reverse", False) # Corrected
7153
7952
  )
7154
7953
  ws.conditional_formatting.add(cell_range, icon_set_rule)
7954
+ # Handle text-based conditions
7955
+ if "text_color" in rule: # not work
7956
+ from openpyxl.styles.differential import DifferentialStyle
7957
+ from openpyxl.formatting.rule import Rule
7958
+ from openpyxl.styles import PatternFill
7959
+
7960
+ # Extract the fill properties from the rule
7961
+ fill = rule.get("fill", {})
7962
+ start_color = fill.get("start_color", "FFFFFF") # Default to white if not specified
7963
+ end_color = fill.get("end_color", "FFFFFF") # Default to white if not specified
7964
+ fill_type = fill.get("fill_type", "solid") # Default to solid fill if not specified
7965
+
7966
+ # Extract the text condition or default to a space if 'text' is not provided
7967
+ text = rule.get("text", " ")
7968
+
7969
+ # Create the DifferentialStyle using the extracted fill settings
7970
+ dxf = DifferentialStyle(
7971
+ fill=PatternFill(start_color=start_color, end_color=end_color, fill_type=fill_type)
7972
+ )
7973
+
7974
+ # Create the text rule based on the text condition
7975
+ text_rule = Rule(
7976
+ type="containsText", # The type of condition
7977
+ operator=rule.get("operator", "equal"), # Default operator is "equal"
7978
+ text=text,
7979
+ dxf=dxf, # Apply the fill color from DifferentialStyle
7980
+ )
7981
+ ws.conditional_formatting.add(cell_range, text_rule)
7155
7982
  if verbose:
7156
7983
  conditional_format_temp="""
7157
7984
  conditional_format={
@@ -7183,10 +8010,49 @@ def format_excel(
7183
8010
  }
7184
8011
  """
7185
8012
  print(conditional_format_temp)
8013
+ if insert_img:
8014
+ if not isinstance(insert_img, dict):
8015
+ raise ValueError(f'insert_img 需要dict格式: e.g., insert_img={"A1":"example.png"}')
8016
+ try:
8017
+ from openpyxl import drawing
8018
+ from PIL import Image
8019
+ import PIL
8020
+ for img_cell, img_data in insert_img.items():
8021
+ img_width = img_height = None
8022
+ pil_img=img_path = None
8023
+ if isinstance(img_data, dict):
8024
+ if "path" in img_data:
8025
+ img_path = img_data["path"] # File path
8026
+ img_ = drawing.image.Image(img_path)
8027
+ elif "image" in img_data:
8028
+ pil_img = img_data["image"] # PIL Image object
8029
+ elif "array" in img_data:
8030
+ pil_img = Image.fromarray(img_data["array"]) # Convert NumPy array to PIL Image
8031
+
8032
+ img_width = img_data.get("width", None)
8033
+ img_height = img_data.get("height", None)
8034
+ elif isinstance(img_data, str):
8035
+ img_path = img_data # Direct file path
8036
+ elif isinstance(img_data, (PIL.Image.Image,PIL.PngImagePlugin.PngImageFile)):
8037
+ pil_img = img_data # Direct PIL Image object
8038
+ elif isinstance(img_data, np.ndarray):
8039
+ pil_img = Image.fromarray(img_data) # Convert NumPy array to PIL Image
8040
+ elif pil_img:
8041
+ img_ = drawing.image.Image(pil_img)
8042
+
8043
+ # Set width and height if provided
8044
+ if img_width is not None:
8045
+ img_.width = img_width
8046
+ if img_height is not None:
8047
+ img_.height = img_height
8048
+ ws.add_image(img_, img_cell)
8049
+ print(f"✅ at {img_cell} inserted image: {os.path.basename(img_path)}")
7186
8050
 
8051
+ except Exception as e:
8052
+ print(e)
8053
+
7187
8054
  # Save the workbook
7188
8055
  wb.save(filename)
7189
- print(f"Formatted Excel file saved as:\n{filename}")
7190
8056
 
7191
8057
 
7192
8058
  def preview(var):
@@ -7511,9 +8377,11 @@ def df_astype(
7511
8377
  columns: Optional[Union[str, List[str]]] = None,
7512
8378
  astype: str = None, # "datetime",
7513
8379
  skip_row: Union[str, list] = None,
8380
+ original_fmt:str=None,
7514
8381
  fmt: Optional[str] = None,
7515
8382
  inplace: bool = False,
7516
8383
  errors: str = "coerce", # Can be "ignore", "raise", or "coerce"
8384
+ verbose:bool=True,
7517
8385
  **kwargs,
7518
8386
  ) -> Optional[pd.DataFrame]:
7519
8387
  """
@@ -7585,13 +8453,21 @@ def df_astype(
7585
8453
  # correct the astype input
7586
8454
  if isinstance(astype, str):
7587
8455
  astype = strcmp(astype, astypes)[0]
7588
- print(f"converting as type: {astype}")
8456
+ print(f"converting as type: {astype}")
7589
8457
  elif isinstance(astype, dict):
7590
8458
  for col, dtype in astype.items():
7591
8459
  dtype = "date" if dtype == "day" else dtype
7592
- data["col"] = data["col"].adtype(strcmp(dtype, astypes)[0])
8460
+ target_dtype = strcmp(dtype, astypes)[0]
8461
+ try:
8462
+ if target_dtype == "datetime":
8463
+ data[col] = pd.to_datetime(data[col], format=original_fmt, errors=errors)
8464
+ elif target_dtype == "timedelta":
8465
+ data[col] = pd.to_timedelta(data[col], errors=errors)
8466
+ else:
8467
+ data[col] = data[col].astype(target_dtype)
8468
+ except Exception as e:
8469
+ print(f"Error converting column '{col}' to {target_dtype}: {e}")
7593
8470
  return data if not inplace else None
7594
-
7595
8471
  # Ensure columns is a list
7596
8472
  if isinstance(columns, str):
7597
8473
  columns = [columns]
@@ -7613,9 +8489,17 @@ def df_astype(
7613
8489
  kwargs.pop("errors", None)
7614
8490
  # convert it as type: datetime
7615
8491
  if isinstance(column, int):
7616
- data.iloc[:, column] = pd.to_datetime(
7617
- data.iloc[:, column], format=fmt, errors=errors, **kwargs
7618
- )
8492
+ data.iloc[:, column] = pd.to_datetime(data.iloc[:, column], format=original_fmt, errors=errors, **kwargs) if original_fmt is not None else pd.to_datetime(data[column], errors=errors, **kwargs)
8493
+
8494
+ try:
8495
+ if fmt is not None:
8496
+ # data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
8497
+ data[column] = data[column].apply(
8498
+ lambda x: x.strftime(fmt) if pd.notnull(x) else None
8499
+ )
8500
+ except Exception as e:
8501
+ print(f"设置格式的时候有误: {e}")
8502
+
7619
8503
  # further convert:
7620
8504
  if astype == "time":
7621
8505
  data.iloc[:, column] = data.iloc[:, column].dt.time
@@ -7636,11 +8520,20 @@ def df_astype(
7636
8520
  else:
7637
8521
  data[column] = (
7638
8522
  pd.to_datetime(
7639
- data[column], format=fmt, errors=errors, **kwargs
8523
+ data[column], format=original_fmt, errors=errors, **kwargs
7640
8524
  )
7641
- if fmt
8525
+ if original_fmt is not None
7642
8526
  else pd.to_datetime(data[column], errors=errors, **kwargs)
7643
8527
  )
8528
+
8529
+ try:
8530
+ if fmt is not None:
8531
+ # data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
8532
+ data[column] = data[column].apply(
8533
+ lambda x: x.strftime(fmt) if pd.notnull(x) else None
8534
+ )
8535
+ except Exception as e:
8536
+ print(f"设置格式的时候有误: {e}")
7644
8537
  # further convert:
7645
8538
  if astype == "time":
7646
8539
  data[column] = data[column].dt.time
@@ -7677,16 +8570,12 @@ def df_astype(
7677
8570
  else:
7678
8571
  data[column] = data[column].astype(astype)
7679
8572
  # print(f"Successfully converted '{column}' to {astype}.")
7680
- # format
7681
- try:
7682
- if fmt is not None:
7683
- data[column] = data[column].apply(lambda x: f"{x:{fmt}}")
7684
- except Exception as e:
7685
- print(f"设置格式的时候有误: {e}")
8573
+
7686
8574
  except Exception as e:
7687
8575
  print(f"Error converting '{column}' to {astype}: {e}")
7688
8576
  try:
7689
- display(data.info()[:10])
8577
+ if verbose:
8578
+ display(data.info()[:10])
7690
8579
  except:
7691
8580
  pass
7692
8581
  return data
@@ -9867,6 +10756,7 @@ def df_qc(
9867
10756
  res_qc["dtype_counts"] = data.dtypes.value_counts()
9868
10757
 
9869
10758
  # Distribution Analysis (mean, median, mode, std dev, IQR for numeric columns)
10759
+
9870
10760
  distribution_stats = data.select_dtypes(include=[np.number]).describe().T
9871
10761
  iqr = data.select_dtypes(include=[np.number]).apply(
9872
10762
  lambda x: x.quantile(0.75) - x.quantile(0.25)