py2ls 0.2.4.24__py3-none-any.whl → 0.2.4.26__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/ips.py CHANGED
@@ -16,17 +16,20 @@ import warnings
16
16
 
17
17
  warnings.simplefilter("ignore", category=pd.errors.SettingWithCopyWarning)
18
18
  warnings.filterwarnings("ignore", category=pd.errors.PerformanceWarning)
19
+ warnings.filterwarnings("ignore")
19
20
 
20
21
 
21
- def run_once_within(duration=60): # default 60s
22
+ def run_once_within(duration=60,reverse=False): # default 60s
22
23
  import time
23
24
 
24
25
  """
26
+ 如果reverse is True, 则在第一次运行时并不运行.但是在第二次运行时则运行
25
27
  usage:
26
28
  if run_once_within():
27
29
  print("This code runs once per minute.")
28
30
  else:
29
31
  print("The code has already been run in the last minute.")
32
+
30
33
  """
31
34
  if not hasattr(run_once_within, "time_last"):
32
35
  run_once_within.time_last = None
@@ -36,9 +39,9 @@ def run_once_within(duration=60): # default 60s
36
39
  time_curr - run_once_within.time_last >= duration
37
40
  ):
38
41
  run_once_within.time_last = time_curr # Update the last execution time
39
- return True
42
+ return False if reverse else True
40
43
  else:
41
- return False
44
+ return True if reverse else False
42
45
 
43
46
 
44
47
  def plt_font(dir_font: str = "/System/Library/Fonts/Hiragino Sans GB.ttc"):
@@ -539,8 +542,7 @@ def is_text(s):
539
542
 
540
543
  from typing import Any, Union
541
544
 
542
-
543
- def shared(*args, strict=True, n_shared=2, verbose=True):
545
+ def share(*args, strict=True, n_shared=2, verbose=True):
544
546
  """
545
547
  check the shared elelements in two list.
546
548
  usage:
@@ -585,12 +587,80 @@ def shared(*args, strict=True, n_shared=2, verbose=True):
585
587
  elements2show = (
586
588
  shared_elements if len(shared_elements) < 10 else shared_elements[:5]
587
589
  )
590
+ tail = '' if len(shared_elements) < 10 else '......'
591
+ elements2show.append(tail)
588
592
  print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
589
593
  print("********* checking shared elements *********")
590
594
  return shared_elements
591
595
 
596
+ def shared(*args, n_shared=None, verbose=True,**kwargs):
597
+ """
598
+ check the shared elelements in two list.
599
+ usage:
600
+ list1 = [1, 2, 3, 4, 5]
601
+ list2 = [4, 5, 6, 7, 8]
602
+ list3 = [5, 6, 9, 10]
603
+ a = shared(list1, list2,list3)
604
+ """
605
+ if verbose:
606
+ print("\n********* checking shared elements *********")
607
+
608
+ if len(args) == 1 and isinstance(args[0], list):
609
+ lists = args[0] # Unpack the single list
610
+ else:
611
+ lists = args # Use the provided arguments as lists
612
+ flattened_lists = [flatten(lst, verbose=verbose) for lst in lists]
613
+
614
+ if n_shared is None:
615
+ n_shared = len(flattened_lists)
616
+ strict = True
617
+ else:
618
+ strict = False
619
+ # Ensure all arguments are lists
620
+ if any(not isinstance(lst, list) for lst in flattened_lists):
621
+ print(f"{' ' * 2}All inputs must be lists.")
622
+ return []
623
+ first_list = flattened_lists[0]
624
+ shared_elements = [
625
+ item for item in first_list if all(item in lst for lst in flattened_lists)
626
+ ]
627
+ if strict:
628
+ # Strict mode: require elements to be in all lists
629
+ shared_elements = set(flattened_lists[0])
630
+ for lst in flattened_lists[1:]:
631
+ shared_elements.intersection_update(lst)
632
+ else:
633
+ from collections import Counter
634
+
635
+ all_elements = [item for sublist in flattened_lists for item in sublist]
636
+ element_count = Counter(all_elements)
637
+ # Get elements that appear in at least n_shared lists
638
+ shared_elements = [
639
+ item for item, count in element_count.items() if count >= n_shared
640
+ ]
641
+
642
+ shared_elements = flatten(shared_elements, verbose=verbose)
643
+ if verbose:
644
+ elements2show = (
645
+ shared_elements if len(shared_elements) < 10 else shared_elements[:5]
646
+ )
647
+ print(f"{' '*2}{len(shared_elements)} elements shared: {' '*2}{elements2show}")
648
+ print("********* checking shared elements *********")
649
+ return shared_elements
592
650
 
593
- def not_shared(*args, strict=True, n_shared=2, verbose=False):
651
+ def share_not(*args, n_shared=None, verbose=False):
652
+ """
653
+ To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
654
+ usage:
655
+ list1 = [1, 8, 3, 3, 4, 5]
656
+ list2 = [4, 5, 6, 7, 8]
657
+ not_shared(list1,list2)# output [1,3]
658
+ """
659
+ _common = shared(*args, n_shared=n_shared, verbose=verbose)
660
+ list1 = flatten(args[0], verbose=verbose)
661
+ _not_shared = [item for item in list1 if item not in _common]
662
+ return _not_shared
663
+ def not_shared(*args, n_shared=None, verbose=False):
594
664
  """
595
665
  To find the elements in list1 that are not shared with list2 while maintaining the original order of list1
596
666
  usage:
@@ -598,7 +668,7 @@ def not_shared(*args, strict=True, n_shared=2, verbose=False):
598
668
  list2 = [4, 5, 6, 7, 8]
599
669
  not_shared(list1,list2)# output [1,3]
600
670
  """
601
- _common = shared(*args, strict=strict, n_shared=n_shared, verbose=verbose)
671
+ _common = shared(*args, n_shared=n_shared, verbose=verbose)
602
672
  list1 = flatten(args[0], verbose=verbose)
603
673
  _not_shared = [item for item in list1 if item not in _common]
604
674
  return _not_shared
@@ -1981,7 +2051,6 @@ def fload(fpath, kind=None, **kwargs):
1981
2051
 
1982
2052
  def load_csv(fpath, **kwargs):
1983
2053
  from pandas.errors import EmptyDataError
1984
-
1985
2054
  engine = kwargs.pop("engine", "pyarrow")# default: None
1986
2055
  sep = kwargs.pop("sep", None)# default: ','
1987
2056
  index_col = kwargs.pop("index_col", None)# default: None
@@ -1992,13 +2061,20 @@ def fload(fpath, kind=None, **kwargs):
1992
2061
  comment = kwargs.pop("comment", None)# default: None
1993
2062
  fmt = kwargs.pop("fmt", False)# default:
1994
2063
  chunksize = kwargs.pop("chunksize", None)# default: None
2064
+
2065
+ #check filesize
2066
+ f_size=round(os.path.getsize(fpath) / 1024 / 1024, 3)
2067
+ if f_size>=50: #50 MB
2068
+ if chunksize is None:
2069
+ chunksize = 5000
2070
+ print(f"file size is {f_size}MB, then set the chunksize with {chunksize}")
1995
2071
  engine = "c" if chunksize else engine # when chunksize, recommend 'c'
1996
2072
  low_memory = kwargs.pop("low_memory", True)# default: True
1997
2073
  low_memory = (
1998
2074
  False if chunksize else True
1999
2075
  ) # when chunksize, recommend low_memory=False # default:
2000
2076
  verbose = kwargs.pop("verbose", False)
2001
- if run_once_within():
2077
+ if run_once_within(reverse=True):
2002
2078
  use_pd("read_csv", verbose=verbose)
2003
2079
 
2004
2080
  if comment is None:# default: None
@@ -2174,7 +2250,7 @@ def fload(fpath, kind=None, **kwargs):
2174
2250
  def load_excel(fpath, **kwargs):
2175
2251
  engine = kwargs.get("engine", "openpyxl")
2176
2252
  verbose = kwargs.pop("verbose", False)
2177
- if run_once_within():
2253
+ if run_once_within(reverse=True):
2178
2254
  use_pd("read_excel", verbose=verbose)
2179
2255
  df = pd.read_excel(fpath, engine=engine, **kwargs)
2180
2256
  try:
@@ -2204,7 +2280,7 @@ def fload(fpath, kind=None, **kwargs):
2204
2280
  engine = kwargs.get("engine", "pyarrow")
2205
2281
  verbose = kwargs.pop("verbose", False)
2206
2282
 
2207
- if run_once_within():
2283
+ if run_once_within(reverse=True):
2208
2284
  use_pd("read_parquet", verbose=verbose)
2209
2285
  try:
2210
2286
  df = pd.read_parquet(fpath, engine=engine, **kwargs)
@@ -2381,13 +2457,13 @@ def fload(fpath, kind=None, **kwargs):
2381
2457
  return load_xml(fpath)
2382
2458
  elif kind in ["csv", "tsv"]:
2383
2459
  # verbose = kwargs.pop("verbose", False)
2384
- if run_once_within():
2460
+ if run_once_within(reverse=True):
2385
2461
  use_pd("read_csv")
2386
2462
  content = load_csv(fpath, **kwargs)
2387
2463
  return content
2388
2464
  elif kind == "pkl":
2389
2465
  verbose = kwargs.pop("verbose", False)
2390
- if run_once_within():
2466
+ if run_once_within(reverse=True):
2391
2467
  use_pd("read_pickle")
2392
2468
  return pd.read_pickle(fpath, **kwargs)
2393
2469
  elif kind in ["ods", "ods", "odt"]:
@@ -2418,12 +2494,12 @@ def fload(fpath, kind=None, **kwargs):
2418
2494
  return load_ipynb(fpath, **kwargs)
2419
2495
  elif kind in ["parquet", "snappy"]:
2420
2496
  verbose = kwargs.pop("verbose", False)
2421
- if run_once_within():
2497
+ if run_once_within(reverse=True):
2422
2498
  use_pd("read_parquet")
2423
2499
  return load_parquet(fpath, **kwargs)
2424
2500
  elif kind == "feather":
2425
2501
  verbose = kwargs.pop("verbose", False)
2426
- if run_once_within():
2502
+ if run_once_within(reverse=True):
2427
2503
  use_pd("read_feather")
2428
2504
  content = pd.read_feather(fpath, **kwargs)
2429
2505
  return content
@@ -2682,7 +2758,7 @@ def fsave(
2682
2758
  # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
2683
2759
 
2684
2760
  verbose = kwargs.pop("verbose", False)
2685
- if run_once_within():
2761
+ if run_once_within(reverse=True):
2686
2762
  use_pd("to_csv", verbose=verbose)
2687
2763
  kwargs_csv = dict(
2688
2764
  path_or_buf=None,
@@ -2714,7 +2790,7 @@ def fsave(
2714
2790
  def save_xlsx(fpath, data, **kwargs):
2715
2791
  verbose = kwargs.pop("verbose", False)
2716
2792
  sheet_name = kwargs.pop("sheet_name", "Sheet1")
2717
- if run_once_within():
2793
+ if run_once_within(reverse=True):
2718
2794
  use_pd("to_excel", verbose=verbose)
2719
2795
  if any(kwargs):
2720
2796
  format_excel(df=data, filename=fpath, **kwargs)
@@ -3497,12 +3573,8 @@ def figsave(*args, dpi=300):
3497
3573
  )
3498
3574
  else:
3499
3575
  plt.savefig(
3500
- fname, format=ftype.lower(), dpi=dpi, bbox_inches="tight", pad_inches=0
3501
- )
3502
- # elif ftype.lower() == "png":
3503
- # plt.savefig(fname, format="png", dpi=dpi, bbox_inches="tight", transparent=True,pad_inches=0)
3504
- # elif ftype.lower() in ["tiff", "tif"]:
3505
- # plt.savefig(fname, format="tiff", dpi=dpi, bbox_inches="tight",pad_inches=0)
3576
+ fname, format=ftype.lower(), dpi=dpi, bbox_inches="tight", transparent=True,pad_inches=0
3577
+ )
3506
3578
  elif ftype.lower() == "emf":
3507
3579
  plt.savefig(fname, format="emf", dpi=dpi, bbox_inches="tight", pad_inches=0)
3508
3580
  elif ftype.lower() == "fig":
@@ -5230,16 +5302,16 @@ def df_extend(data: pd.DataFrame, column, axis=0, sep=None, prefix="col"):
5230
5302
  data = data.explode(column, ignore_index=True)
5231
5303
  return data
5232
5304
 
5233
- def df_circular(data: pd.DataFrame, columns=None, max_val=None, inplace=False):
5305
+ def df_cycle(data: pd.DataFrame, columns=None, max_val=None, inplace=False):
5234
5306
  """
5235
5307
  Purpose: transforms a datetime feature (like month or day) into a cyclic encoding for use in machine learning models, particularly neural networks.
5236
5308
  Usage:
5237
5309
  data = pd.DataFrame({'month': [1, 4, 7, 10, 12]}) # Just months as an example
5238
- # df_circular month cyclically
5239
- data = df_circular(data, 'month', 12)
5310
+ # df_cycle month cyclically
5311
+ data = df_cycle(data, 'month', 12)
5240
5312
  """
5241
5313
  if columns is None:
5242
- columns = list(data.columns) # If no columns specified, use all columns
5314
+ columns = list(data.select_dtypes(include=np.number).columns) # If no columns specified, use all columns
5243
5315
  if max_val is None:
5244
5316
  max_val = np.max(data[columns]) # If no max_val specified, use the maximum value across all columns
5245
5317
  if isinstance(columns, str):
@@ -5424,7 +5496,7 @@ def df_astype(
5424
5496
  # print(f"Successfully converted '{column}' to timedelta.")
5425
5497
  elif astype == "circular":
5426
5498
  max_val = kwargs.get('max_val',None)
5427
- data[column]=df_circular(data=data,columns=column,max_val=max_val)
5499
+ data[column]=df_cycle(data=data,columns=column,max_val=max_val)
5428
5500
  else:
5429
5501
  # Convert to other types (e.g., float, int)
5430
5502
  if astype=='int':
@@ -5910,11 +5982,16 @@ def df_encoder(
5910
5982
 
5911
5983
  def df_scaler(
5912
5984
  data: pd.DataFrame, # should be numeric dtype
5985
+ scaler=None,
5913
5986
  method="standard",
5914
5987
  columns=None, # default, select all numeric col/row
5988
+ feature_range=None,# specific for 'minmax'
5989
+ vmin=0,
5990
+ vmax=1,
5915
5991
  inplace=False,
5916
5992
  verbose=False, # show usage
5917
5993
  axis=0, # defalut column-wise
5994
+ return_scaler:bool=False,# True: return both: return df, scaler
5918
5995
  **kwargs,
5919
5996
  ):
5920
5997
  """
@@ -5932,31 +6009,51 @@ def df_scaler(
5932
6009
  """
5933
6010
  if verbose:
5934
6011
  print('df_scaler(data, scaler="standard", inplace=False, axis=0, verbose=True)')
5935
-
5936
- methods = ["standard", "minmax", "robust"]
5937
- method = strcmp(method, methods)[0]
5938
- if method == "standard":
5939
- from sklearn.preprocessing import StandardScaler
5940
-
5941
- scaler = StandardScaler(**kwargs)
5942
- elif method == "minmax":
5943
- from sklearn.preprocessing import MinMaxScaler
5944
-
5945
- scaler = MinMaxScaler(**kwargs)
5946
- elif method == "robust":
5947
- from sklearn.preprocessing import RobustScaler
5948
-
5949
- scaler = RobustScaler(**kwargs)
5950
- if axis not in [0, 1]:
5951
- raise ValueError("Axis must be 0 (column-wise) or 1 (row-wise).")
5952
-
6012
+ if scaler is None:
6013
+ methods = ["standard", "minmax", "robust","maxabs"]
6014
+ method = strcmp(method, methods)[0]
6015
+ if method == "standard":
6016
+ from sklearn.preprocessing import StandardScaler
6017
+ if verbose:
6018
+ print("performs z-score normalization: This will standardize each feature to have a mean of 0 and a standard deviation of 1.")
6019
+ print("Use when the data is approximately normally distributed (Gaussian).\nWorks well with algorithms sensitive to feature distribution, such as SVMs, linear regression, logistic regression, and neural networks.")
6020
+ scaler = StandardScaler(**kwargs)
6021
+ elif method == "minmax":
6022
+ from sklearn.preprocessing import MinMaxScaler
6023
+ if feature_range is None:
6024
+ feature_range=(vmin,vmax)
6025
+ if verbose:
6026
+ print("don't forget to define the range: e.g., 'feature_range=(0, 1)'. ")
6027
+ print("scales the features to the range [0, 1]. Adjust feature_range if you want a different range, like [-1, 1].")
6028
+ print("Use when the data does not follow a normal distribution and you need all features in a specific range (e.g., [0, 1]).\nIdeal for algorithms that do not assume a particular distribution, such as k-nearest neighbors and neural networks.")
6029
+ scaler = MinMaxScaler(feature_range=feature_range,**kwargs)
6030
+ elif method == "robust":
6031
+ from sklearn.preprocessing import RobustScaler
6032
+ if verbose:
6033
+ print("scales the data based on the median and interquartile range, which is robust to outliers.")
6034
+ print("Use when the dataset contains outliers.\nThis method is useful because it scales based on the median and the interquartile range (IQR), which are more robust to outliers than the mean and standard deviation.")
6035
+ scaler = RobustScaler(**kwargs)
6036
+ elif method=="maxabs":
6037
+ from sklearn.preprocessing import MaxAbsScaler
6038
+ if verbose:
6039
+ print("This scales each feature by its maximum absolute value, resulting in values within the range [-1, 1] for each feature.")
6040
+ print("Use for data that is already sparse or when features have positive or negative values that need scaling without shifting the data.\nOften used with sparse data (data with many zeros), where preserving zero entries is essential, such as in text data or recommendation systems.")
6041
+ scaler = MaxAbsScaler(**kwargs)
6042
+ if axis not in [0, 1]:
6043
+ raise ValueError("Axis must be 0 (column-wise) or 1 (row-wise).")
6044
+ if verbose:
6045
+ print(scaler)
5953
6046
  if axis == 0:
5954
6047
  # Column-wise scaling (default)
5955
6048
  if columns is None:
5956
6049
  columns = data.select_dtypes(include=np.number).columns.tolist()
5957
6050
  non_numeric_columns = data.columns.difference(columns)
5958
6051
 
5959
- scaled_data = scaler.fit_transform(data[columns])
6052
+ # scaled_data = scaler.fit_transform(data[columns])
6053
+ if scaler is None or not hasattr(scaler, 'mean_'):
6054
+ scaled_data = scaler.fit_transform(data[columns])
6055
+ else:
6056
+ scaled_data = scaler.transform(data[columns])
5960
6057
 
5961
6058
  if inplace:
5962
6059
  data[columns] = scaled_data
@@ -5970,7 +6067,10 @@ def df_scaler(
5970
6067
  axis=1,
5971
6068
  )
5972
6069
  scaled_df = scaled_df[data.columns] # Maintain column order
5973
- return scaled_df
6070
+ if return_scaler:
6071
+ return scaled_df,scaler
6072
+ else:
6073
+ return scaled_df
5974
6074
 
5975
6075
  elif axis == 1:
5976
6076
  # Row-wise scaling
@@ -5982,9 +6082,10 @@ def df_scaler(
5982
6082
 
5983
6083
  print(f"Scaling rows")
5984
6084
 
5985
- scaled_data = scaler.fit_transform(
5986
- numeric_rows.T
5987
- ).T # Transpose for scaling and then back
6085
+ # scaled_data = scaler.fit_transform(
6086
+ # numeric_rows.T
6087
+ # ).T # Transpose for scaling and then back
6088
+ scaled_data = scaler.fit_transform(numeric_rows.T).T if scaler is None or not hasattr(scaler, 'mean_') else scaler.transform(numeric_rows.T).T
5988
6089
 
5989
6090
  if inplace:
5990
6091
  data.loc[numeric_rows.index] = scaled_data
@@ -5992,7 +6093,10 @@ def df_scaler(
5992
6093
  else:
5993
6094
  scaled_df = data.copy()
5994
6095
  scaled_df.loc[numeric_rows.index] = scaled_data
5995
- return scaled_df
6096
+ if return_scaler:
6097
+ return scaled_df,scaler
6098
+ else:
6099
+ return scaled_df
5996
6100
 
5997
6101
 
5998
6102
  def df_special_characters_cleaner(
@@ -6010,15 +6114,20 @@ def df_special_characters_cleaner(
6010
6114
 
6011
6115
  # 1. Clean column names by replacing special characters with underscores
6012
6116
  if "column" in where_:
6013
- data.columns = data.columns.str.replace(r"[^\w\s]", "_", regex=True)
6117
+ try:
6118
+ data.columns = data.columns.str.replace(r"[^\w\s]", "_", regex=True)
6119
+ except Exception as e:
6120
+ print(e)
6014
6121
 
6015
6122
  # 2. Clean only object-type columns (text columns)
6016
- if "content" in where_:
6017
- for col in data.select_dtypes(include=["object"]).columns:
6018
- data[col] = data[col].str.replace(r"[^\w\s]", "", regex=True)
6019
- if data.index.dtype == "object" and index in where_:
6020
- data.index = data.index.str.replace(r"[^\w\s]", "_", regex=True)
6021
-
6123
+ try:
6124
+ if "content" in where_:
6125
+ for col in data.select_dtypes(include=["object"]).columns:
6126
+ data[col] = data[col].str.replace(r"[^\w\s]", "", regex=True)
6127
+ if data.index.dtype == "object" and index in where_:
6128
+ data.index = data.index.str.replace(r"[^\w\s]", "_", regex=True)
6129
+ except:
6130
+ pass
6022
6131
  return data
6023
6132
 
6024
6133
 
@@ -6401,6 +6510,9 @@ def df_reducer(
6401
6510
  # "autoencoder","nmf",
6402
6511
  ]
6403
6512
  method = strcmp(method, methods)[0]
6513
+ if run_once_within(reverse=True):
6514
+ print(f"support methods:{methods}")
6515
+
6404
6516
  if verbose:
6405
6517
  print(f"\nprocessing with using {dict_methods[method]}:")
6406
6518
  xlabel, ylabel = None, None
@@ -6408,16 +6520,20 @@ def df_reducer(
6408
6520
  columns = data.select_dtypes(include="number").columns.tolist()
6409
6521
  if hue is None:
6410
6522
  hue = data.select_dtypes(exclude="number").columns.tolist()
6523
+ print(f"auto select the non-number as 'hue':{hue}")
6411
6524
  if isinstance(hue, list):
6412
6525
  print("Warning: hue is a list, only select the 1st one")
6413
6526
  hue = hue[0]
6414
- if not hue:
6527
+ if not any(hue):
6415
6528
  # Select columns if specified, else use all columns
6416
6529
  X = data[columns].values if columns else data.values
6417
6530
  else:
6418
6531
  # Select columns to reduce and hue for LDA
6419
- X = data[columns].values if columns else data.drop(columns=[hue]).values
6420
- y = data[hue].values
6532
+ try:
6533
+ X = data[columns].values if columns else data.drop(columns=[hue]).values
6534
+ y = data[hue].values
6535
+ except:
6536
+ pass
6421
6537
  print(X.shape)
6422
6538
  # Handle missing values
6423
6539
  if fill_missing:
@@ -6884,33 +7000,49 @@ def df_reducer(
6884
7000
  colname_met = "SVD_"
6885
7001
  # Quick plots
6886
7002
  if plot_ and (not method in ["isolation_forest"]):
6887
- from .plot import plotxy
6888
- if ax is None:
6889
- if figsize is None:
6890
- _, ax = plt.subplots(figsize=cm2inch(8, 8))
6891
- else:
6892
- _, ax = plt.subplots(figsize=figsize)
6893
- else:
6894
- ax = ax.cla()
7003
+ from .plot import plotxy,figsets,get_color
7004
+ # if ax is None:
7005
+ # if figsize is None:
7006
+ # _, ax = plt.subplots(figsize=cm2inch(8, 8))
7007
+ # else:
7008
+ # _, ax = plt.subplots(figsize=figsize)
7009
+ # else:
7010
+ # ax = ax.cla()
6895
7011
  xlabel = f"{colname_met}1" if xlabel is None else xlabel
6896
7012
  ylabel = f"{colname_met}2" if ylabel is None else ylabel
7013
+ palette=get_color(len(flatten(data[hue],verbose=0)))
7014
+
7015
+ reduced_df=reduced_df.sort_values(by=hue)
7016
+ print(flatten(reduced_df[hue]))
6897
7017
  ax = plotxy(
6898
7018
  data=reduced_df,
6899
7019
  x=colname_met + "1",
6900
7020
  y=colname_met + "2",
6901
7021
  hue=hue,
6902
- s=size,
7022
+ palette=palette,
7023
+ # size=size,
6903
7024
  edgecolor=edgecolor,
6904
- kind_="scater",
6905
- figsets=dict(
6906
- legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
6907
- xlabel=xlabel if xlabel else None,
6908
- ylabel=ylabel if ylabel else None,
6909
- ),
6910
- ax=ax,
7025
+ kind_=["joint",
7026
+ # "kde",
7027
+ "ell",
7028
+ ],
7029
+ kws_kde=dict(
7030
+ hue=hue,
7031
+ levels=2,
7032
+ common_norm=False,
7033
+ fill=True,
7034
+ alpha=0.05,
7035
+ ),
7036
+ kws_joint=dict(kind='scatter',joint_kws=dict(s=size)),
7037
+ kws_ellipse=dict(alpha=0.1,lw=1,label=None),
6911
7038
  verbose=False,
6912
7039
  **kwargs,
6913
7040
  )
7041
+ figsets(
7042
+ legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
7043
+ xlabel=xlabel if xlabel else None,
7044
+ ylabel=ylabel if ylabel else None,
7045
+ )
6914
7046
 
6915
7047
  if inplace:
6916
7048
  # If inplace=True, add components back into the original data
@@ -7387,6 +7519,7 @@ def df_qc(
7387
7519
  from statsmodels.stats.outliers_influence import variance_inflation_factor
7388
7520
  from scipy.stats import skew, kurtosis, entropy
7389
7521
 
7522
+ pd.options.display.max_seq_items = 10
7390
7523
  #! display(data.select_dtypes(include=[np.number]).describe())
7391
7524
  #!skim
7392
7525
  if columns is not None:
@@ -7403,16 +7536,18 @@ def df_qc(
7403
7536
  data = data.copy()
7404
7537
  data.loc[:, data.isna().all()] = 0
7405
7538
  res_qc = {}
7406
- print(f"data.shape:{data.shape}")
7539
+ print(f"data.shape:{data.shape}\n⤵ data.sample(10):")
7540
+ display(data.sample(10).style.background_gradient(cmap="coolwarm", axis=1))
7407
7541
 
7408
7542
  # Missing values
7409
7543
  res_qc["missing_values"] = data.isnull().sum()
7410
- res_qc["missing_percentage"] = (res_qc["missing_values"] / len(data)) * 100
7544
+ res_qc["missing_percentage"] = round((res_qc["missing_values"] / len(data)) * 100,2)
7411
7545
  res_qc["rows_with_missing"] = data.isnull().any(axis=1).sum()
7412
7546
 
7413
7547
  # Data types and unique values
7414
7548
  res_qc["data_types"] = data.dtypes
7415
- res_qc["unique_values"] = data.nunique()
7549
+ res_qc["unique_counts"] = data.select_dtypes(exclude=np.number).nunique().sort_values()
7550
+ res_qc["unique_values"] = data.select_dtypes(exclude=np.number).apply(lambda x: x.unique())
7416
7551
  res_qc["constant_columns"] = [
7417
7552
  col for col in data.columns if data[col].nunique() <= 1
7418
7553
  ]
@@ -7428,33 +7563,42 @@ def df_qc(
7428
7563
  data_outliers = df_outlier(data)
7429
7564
  outlier_num = data_outliers.isna().sum() - data.isnull().sum()
7430
7565
  res_qc["outlier_num"] = outlier_num[outlier_num > 0]
7431
- outlier_percentage=(outlier_num / len(data_outliers)) * 100
7566
+ outlier_percentage=round((outlier_num / len(data_outliers)) * 100,2)
7432
7567
  res_qc["outlier_percentage"] = outlier_percentage[outlier_percentage>0]
7433
- # Correlation and multicollinearity (VIF)
7434
- if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
7435
- numeric_df = data.select_dtypes(include=[np.number]).dropna()
7436
- corr_matrix = numeric_df.corr()
7437
- high_corr_pairs = [
7438
- (col1, col2)
7439
- for col1 in corr_matrix.columns
7440
- for col2 in corr_matrix.columns
7441
- if col1 != col2 and abs(corr_matrix[col1][col2]) > 0.9
7442
- ]
7443
- res_qc["high_correlations"] = high_corr_pairs
7444
-
7445
- # VIF for multicollinearity check
7446
- numeric_df = data.select_dtypes(include=[np.number]).dropna()
7447
- vif_data = pd.DataFrame()
7448
- res_qc["vif"]=vif_data
7449
- if numeric_df.shape[1] > 1 and not numeric_df.empty:
7450
- vif_data["feature"] = numeric_df.columns
7451
- vif_data["VIF"] = [
7452
- variance_inflation_factor(numeric_df.values, i)
7453
- for i in range(numeric_df.shape[1])
7568
+ try:
7569
+ # Correlation and multicollinearity (VIF)
7570
+ if any(data.dtypes.apply(pd.api.types.is_numeric_dtype)):
7571
+ numeric_df = data.select_dtypes(include=[np.number]).dropna()
7572
+ corr_matrix = numeric_df.corr()
7573
+ high_corr_pairs = [
7574
+ (col1, col2)
7575
+ for col1 in corr_matrix.columns
7576
+ for col2 in corr_matrix.columns
7577
+ if col1 != col2 and abs(corr_matrix[col1][col2]) > 0.9
7454
7578
  ]
7455
- res_qc["vif"] = vif_data[
7456
- vif_data["VIF"] > 5
7457
- ] # Typically VIF > 5 indicates multicollinearity
7579
+ res_qc["high_correlations"] = high_corr_pairs
7580
+
7581
+ # VIF for multicollinearity check
7582
+ numeric_df = data.select_dtypes(include=[np.number]).dropna()
7583
+ if isinstance(numeric_df.columns, pd.MultiIndex):
7584
+ numeric_df.columns = [
7585
+ "_".join(col).strip() if isinstance(col, tuple) else col for col in numeric_df.columns
7586
+ ]
7587
+
7588
+
7589
+ vif_data = pd.DataFrame()
7590
+ res_qc["vif"]=vif_data
7591
+ if numeric_df.shape[1] > 1 and not numeric_df.empty:
7592
+ vif_data["feature"] = numeric_df.columns.tolist()
7593
+ vif_data["VIF"] = [
7594
+ round(variance_inflation_factor(numeric_df.values, i),2)
7595
+ for i in range(numeric_df.shape[1])
7596
+ ]
7597
+ res_qc["vif"] = vif_data[
7598
+ vif_data["VIF"] > 5
7599
+ ] # Typically VIF > 5 indicates multicollinearity
7600
+ except Exception as e:
7601
+ print(e)
7458
7602
  # Skewness and Kurtosis
7459
7603
  skewness = data.skew(numeric_only=True)
7460
7604
  kurtosis_vals = data.kurt(numeric_only=True)
@@ -7467,8 +7611,7 @@ def df_qc(
7467
7611
  col: entropy(data[col].value_counts(normalize=True), base=2)
7468
7612
  for col in categorical_cols
7469
7613
  }
7470
- # number of unique
7471
- res_qc["unique_counts"] = data.nunique()
7614
+
7472
7615
  # dtypes counts
7473
7616
  res_qc['dtype_counts']=data.dtypes.value_counts()
7474
7617
 
@@ -7515,7 +7658,7 @@ def df_qc(
7515
7658
  res_qc["text_length_analysis"] = text_lengths
7516
7659
 
7517
7660
  # Summary statistics
7518
- res_qc["summary_statistics"] = data.describe().T
7661
+ res_qc["summary_statistics"] = data.describe().T.style.background_gradient(cmap='coolwarm', axis=0)
7519
7662
 
7520
7663
  # Automated warnings
7521
7664
  warnings = []
@@ -7537,28 +7680,45 @@ def df_qc(
7537
7680
 
7538
7681
  # Report generation
7539
7682
  if verbose:
7540
- print("=== QC Report Summary ===")
7541
7683
  print("\n⤵ Summary Statistics:")
7542
7684
  display(res_qc["summary_statistics"])
7543
7685
  print("\n⤵ Data Types:")
7544
7686
  display(res_qc["data_types"])
7545
7687
  if any(res_qc["missing_values"][res_qc["missing_values"] > 0]):
7546
7688
  print(" ⤵ Missing Values Counts:")
7547
- display(res_qc["missing_values"][res_qc["missing_values"] > 0])
7689
+ display(pd.DataFrame(
7690
+ {
7691
+ "missing_values": res_qc["missing_values"][res_qc["missing_values"] > 0],
7692
+ "missing_percent(%)": res_qc["missing_percentage"][
7693
+ res_qc["missing_percentage"] > 0
7694
+ ],
7695
+ }
7696
+ ).style.background_gradient(cmap="coolwarm", axis=0)
7697
+ )
7548
7698
  # print(res_qc["missing_percentage"][res_qc["missing_percentage"] > 0])
7549
7699
  print("\n⤵ Rows with Missing Values:",res_qc["rows_with_missing"])
7550
7700
 
7701
+ print("\n⤵ Constant Columns:", res_qc["constant_columns"]) if any(res_qc["constant_columns"]) else None
7702
+ print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
7703
+ print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
7704
+
7551
7705
  if any(res_qc["outlier_num"]):
7552
7706
  print("\n⤵ Outlier Report:")
7553
- display(res_qc["outlier_num"])
7554
- if any(res_qc["unique_values"]):
7555
- print("\n⤵ Unique Values per Column:")
7556
- display(res_qc["unique_values"])
7707
+ display(pd.DataFrame(
7708
+ {
7709
+ "outlier_num": res_qc["outlier_num"][res_qc["outlier_num"] > 0],
7710
+ "outlier_percentage(%)": res_qc["outlier_percentage"][
7711
+ res_qc["outlier_percentage"] > 0
7712
+ ],
7713
+ }
7714
+ ).style.background_gradient(cmap="coolwarm", axis=0)
7715
+ )
7557
7716
 
7558
- print("\n⤵ Constant Columns:", res_qc["constant_columns"]) if any(res_qc["constant_columns"]) else None
7717
+ if any(res_qc["unique_counts"]):
7718
+ print("\n⤵ Unique Values per Column:")
7719
+ display(pd.DataFrame({"unique_counts":res_qc["unique_counts"],
7720
+ "unique_values":res_qc["unique_values"]}).style.background_gradient(cmap="coolwarm", axis=0))
7559
7721
 
7560
- print("⤵ Duplicate Rows:", res_qc["duplicate_rows"]) if res_qc["duplicate_rows"] else None
7561
- print("⤵ Duplicate Columns:", res_qc["duplicate_columns"]) if any(res_qc["duplicate_columns"]) else None
7562
7722
 
7563
7723
  if res_qc["empty_columns"]:
7564
7724
  print("\n⤵ Empty Columns:", res_qc["empty_columns"])
@@ -7570,7 +7730,7 @@ def df_qc(
7570
7730
 
7571
7731
  if "vif" in res_qc:
7572
7732
  print("\n⤵ Features with High VIF (>|5|):")
7573
- print(res_qc["vif"])
7733
+ display(res_qc["vif"].style.background_gradient(cmap="coolwarm", axis=0))
7574
7734
 
7575
7735
  if any(res_qc["high_cardinality_categoricals"]):
7576
7736
  print("\n⤵ High Cardinality Categorical Columns (>|50 unique|):")
@@ -7589,28 +7749,27 @@ def df_qc(
7589
7749
  print("\nWarnings:")
7590
7750
  for warning in res_qc["warnings"]:
7591
7751
  print(" -", warning)
7752
+
7753
+ pd.reset_option("display.max_seq_items")
7592
7754
  if plot_:
7593
- df_qc_plots(data=data, res_qc=res_qc, max_cols=max_cols,hue=hue)
7594
- if dir_save:
7595
- try:
7596
- figsave(dir_save)
7597
- except Exception as e:
7598
- print(f"⚠️: {e}")
7599
- if output:
7755
+ df_qc_plots(data=data, res_qc=res_qc, max_cols=max_cols,hue=hue,dir_save=dir_save)
7756
+ if output or not plot_:
7600
7757
  return res_qc
7601
7758
  return None
7602
7759
 
7603
7760
 
7604
- def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,hue=None):
7761
+ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,hue=None,dir_save=None):
7605
7762
  import matplotlib.pyplot as plt
7606
7763
  import seaborn as sns
7607
7764
  from .plot import subplot, figsets, get_color
7765
+ from datetime import datetime
7766
+ now_ = datetime.now().strftime("%y%m%d_%H%M%S")
7608
7767
 
7609
7768
  if columns is not None:
7610
7769
  if isinstance(columns, (list,pd.core.indexes.base.Index)):
7611
7770
  data=data[columns]
7612
7771
  len_total = len(res_qc)
7613
- n_row, n_col = int((len_total + 10) / 3), 3
7772
+ n_row, n_col = int((len_total + 10)), 3
7614
7773
  nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
7615
7774
 
7616
7775
  missing_data = res_qc["missing_values"][res_qc["missing_values"] > 0].sort_values(
@@ -7638,15 +7797,7 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
7638
7797
  ax=nexttile(),
7639
7798
  )
7640
7799
  figsets(ax=ax_outlier_num,title="Outliers (#)", xlabel="#",ylabel=None,fontsize=8 if len(outlier_num)<=20 else 6)
7641
-
7642
- #!
7643
- try:
7644
- if data.select_dtypes(include=np.number).shape[1]<=10:
7645
- for col in data.select_dtypes(include=np.number).columns:
7646
- sns.histplot(data[col], kde=True, bins=30, ax=nexttile())
7647
- figsets(title=f"Distribution: {col}", xlabel=col, ylabel="Frequency")
7648
- except:
7649
- pass
7800
+
7650
7801
  #!
7651
7802
  try:
7652
7803
  for col in data.select_dtypes(include='category').columns:
@@ -7775,8 +7926,10 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
7775
7926
  title="Dtypes",
7776
7927
  ylabel="#",
7777
7928
  ax=ax_dtype_counts,
7778
- fontsize=8 if len(dtype_counts.index)<=20 else 6,
7929
+ fontsize=8 if len(dtype_counts.index)<=20 else 6,
7779
7930
  )
7931
+ # from .plot import pie
7932
+ # pie()
7780
7933
 
7781
7934
  # High cardinality: Show top categorical columns by unique value count
7782
7935
  high_cardinality = res_qc["high_cardinality_categoricals"]
@@ -7857,6 +8010,79 @@ def df_qc_plots(data: pd.DataFrame, columns=None,res_qc: dict=None, max_cols=20,
7857
8010
  title="Correlation Heatmap",
7858
8011
  ax=ax_heatmap
7859
8012
  )
8013
+ # # save figure
8014
+ # if dir_save:
8015
+ # figsave(dir_save,f"qc_plot_{now_}.pdf")
8016
+
8017
+ if columns is not None:
8018
+ if isinstance(columns, (list,pd.core.indexes.base.Index)):
8019
+ data=data[columns]
8020
+
8021
+ # len_total = len(res_qc)
8022
+ # n_row, n_col = int((len_total + 10) / 3), 3
8023
+ # nexttile = subplot(n_row, n_col, figsize=[5 * n_col, 5 * n_row],verbose=False)
8024
+ #! check distribution
8025
+ data_num = data.select_dtypes(include=np.number)
8026
+ if len(data_num) > max_cols:
8027
+ data_num = data_num.iloc[:,:max_cols]
8028
+
8029
+ data_num = df_scaler(data=data_num, method='standard')
8030
+
8031
+ import scipy.stats as stats
8032
+ for column in data_num.columns:
8033
+ #* Shapiro-Wilk test for normality
8034
+ stat, p_value = stats.shapiro(data_num[column])
8035
+ normality = "norm" if p_value > 0.05 else "not_norm"
8036
+ #* Plot histogram
8037
+ ax_hist=sns.histplot(data_num[column], kde=True, ax=nexttile())
8038
+ x_min, x_max = ax_hist.get_xlim()
8039
+ y_min, y_max = ax_hist.get_ylim()
8040
+ ax_hist.text(x_min+(x_max-x_min)*0.5, y_min+(y_max-y_min)*0.75,
8041
+ f'p(Shapiro-Wilk)={p_value:.3f}\n{normality}',
8042
+ ha='center', va='top')
8043
+ figsets(title=column,ax=ax_hist)
8044
+ ax_twin=ax_hist.twinx()
8045
+ #* Q-Q plot
8046
+ stats.probplot(data_num[column], dist="norm", plot=ax_twin)
8047
+ figsets(ylabel=f'Q-Q Plot:{column}',title=None)
8048
+ # save figure
8049
+ if dir_save:
8050
+ figsave(dir_save,f"qc_plot_{now_}.pdf")
8051
+
8052
+ def df_corr(df: pd.DataFrame, method="pearson"):
8053
+ """
8054
+ Compute correlation coefficients and p-values for a DataFrame.
8055
+
8056
+ Parameters:
8057
+ - df (pd.DataFrame): Input DataFrame with numeric data.
8058
+ - method (str): Correlation method ("pearson", "spearman", "kendall").
8059
+
8060
+ Returns:
8061
+ - corr_matrix (pd.DataFrame): Correlation coefficient matrix.
8062
+ - pval_matrix (pd.DataFrame): P-value matrix.
8063
+ """
8064
+ from scipy.stats import pearsonr, spearmanr, kendalltau
8065
+
8066
+ methods = ["pearson", "spearman", "kendall"]
8067
+ method = strcmp(method, methods)[0]
8068
+ methods_dict = {"pearson": pearsonr, "spearman": spearmanr, "kendall": kendalltau}
8069
+
8070
+ cols = df.columns
8071
+ corr_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
8072
+ pval_matrix = pd.DataFrame(index=cols, columns=cols, dtype=float)
8073
+ correlation_func = methods_dict[method]
8074
+
8075
+ for col1 in cols:
8076
+ for col2 in cols:
8077
+ if col1 == col2:
8078
+ corr_matrix.loc[col1, col2] = 1.0
8079
+ pval_matrix.loc[col1, col2] = 0.0
8080
+ else:
8081
+ corr, pval = correlation_func(df[col1], df[col2])
8082
+ corr_matrix.loc[col1, col2] = corr
8083
+ pval_matrix.loc[col1, col2] = pval
8084
+
8085
+ return corr_matrix, pval_matrix
7860
8086
 
7861
8087
  def use_pd(
7862
8088
  func_name="excel",
@@ -7877,3 +8103,135 @@ def use_pd(
7877
8103
  except Exception as e:
7878
8104
  if verbose:
7879
8105
  print(e)
8106
+
8107
+ def get_phone(phone_number: str, region: str = None,verbose=True):
8108
+ """
8109
+ usage:
8110
+ info = get_phone(15237654321, "DE")
8111
+ preview(info)
8112
+
8113
+ Extremely advanced phone number analysis function.
8114
+
8115
+ Args:
8116
+ phone_number (str): The phone number to analyze.
8117
+ region (str): None (Default). Tries to work with international numbers including country codes; otherwise, uses the specified region.
8118
+
8119
+ Returns:
8120
+ dict: Comprehensive information about the phone number.
8121
+ """
8122
+ import phonenumbers
8123
+ from phonenumbers import geocoder, carrier, timezone, number_type
8124
+ from datetime import datetime
8125
+ import pytz
8126
+ from tzlocal import get_localzone
8127
+
8128
+ if not isinstance(phone_number, str):
8129
+ phone_number = str(phone_number)
8130
+ if isinstance(region, str):
8131
+ region = region.upper()
8132
+
8133
+ try:
8134
+ # Parse the phone number
8135
+ parsed_number = phonenumbers.parse(phone_number, region)
8136
+
8137
+ # Validate the phone number
8138
+ valid = phonenumbers.is_valid_number(parsed_number)
8139
+ possible = phonenumbers.is_possible_number(parsed_number)
8140
+
8141
+ if not valid:
8142
+ suggested_fix = phonenumbers.example_number(region) if region else "Unknown"
8143
+ return {
8144
+ "valid": False,
8145
+ "error": "Invalid phone number",
8146
+ "suggested_fix": suggested_fix,
8147
+ }
8148
+
8149
+ # Basic details
8150
+ formatted_international = phonenumbers.format_number(
8151
+ parsed_number, phonenumbers.PhoneNumberFormat.INTERNATIONAL
8152
+ )
8153
+ formatted_national = phonenumbers.format_number(
8154
+ parsed_number, phonenumbers.PhoneNumberFormat.NATIONAL
8155
+ )
8156
+ formatted_e164 = phonenumbers.format_number(
8157
+ parsed_number, phonenumbers.PhoneNumberFormat.E164
8158
+ )
8159
+ country_code = parsed_number.country_code
8160
+ region_code = geocoder.region_code_for_number(parsed_number)
8161
+ country_name = geocoder.country_name_for_number(parsed_number, "en")
8162
+
8163
+ location = geocoder.description_for_number(parsed_number, "en")
8164
+ carrier_name = carrier.name_for_number(parsed_number, "en") or "Unknown Carrier"
8165
+ time_zones = timezone.time_zones_for_number(parsed_number)[0]
8166
+ current_times = datetime.now(pytz.timezone(time_zones)).strftime(
8167
+ "%Y-%m-%d %H:%M:%S %Z"
8168
+ )
8169
+ number_type_str = {
8170
+ phonenumbers.PhoneNumberType.FIXED_LINE: "Fixed Line",
8171
+ phonenumbers.PhoneNumberType.MOBILE: "Mobile",
8172
+ phonenumbers.PhoneNumberType.FIXED_LINE_OR_MOBILE: "Fixed Line or Mobile",
8173
+ phonenumbers.PhoneNumberType.TOLL_FREE: "Toll Free",
8174
+ phonenumbers.PhoneNumberType.PREMIUM_RATE: "Premium Rate",
8175
+ phonenumbers.PhoneNumberType.SHARED_COST: "Shared Cost",
8176
+ phonenumbers.PhoneNumberType.VOIP: "VOIP",
8177
+ phonenumbers.PhoneNumberType.PERSONAL_NUMBER: "Personal Number",
8178
+ phonenumbers.PhoneNumberType.PAGER: "Pager",
8179
+ phonenumbers.PhoneNumberType.UAN: "UAN",
8180
+ phonenumbers.PhoneNumberType.UNKNOWN: "Unknown",
8181
+ }.get(number_type(parsed_number), "Unknown")
8182
+
8183
+ # Advanced Features
8184
+ is_toll_free = (
8185
+ number_type(parsed_number) == phonenumbers.PhoneNumberType.TOLL_FREE
8186
+ )
8187
+ is_premium_rate = (
8188
+ number_type(parsed_number) == phonenumbers.PhoneNumberType.PREMIUM_RATE
8189
+ )
8190
+
8191
+ # Dialing Information
8192
+ dialing_instructions = f"Dial {formatted_national} within {country_name}. Dial {formatted_e164} from abroad."
8193
+
8194
+ # Advanced Timezone Handling
8195
+ gmt_offsets = pytz.timezone(time_zones).utcoffset(datetime.now()).total_seconds()/ 3600
8196
+ # Get the local timezone (current computer's time)
8197
+ local_timezone = get_localzone()
8198
+ #local_timezone = pytz.timezone(pytz.country_timezones[region_code][0])
8199
+ local_offset = local_timezone.utcoffset(datetime.now()).total_seconds() / 3600
8200
+ offset_diff = local_offset - gmt_offsets
8201
+ head_time = "earlier" if offset_diff < 0 else "later" if offset_diff > 0 else ""
8202
+ res= {
8203
+ "valid": True,
8204
+ "possible": possible,
8205
+ "formatted": {
8206
+ "international": formatted_international,
8207
+ "national": formatted_national,
8208
+ "e164": formatted_e164,
8209
+ },
8210
+ "country_code": country_code,
8211
+ "country_name": country_name,
8212
+ "region_code": region_code,
8213
+ "location": location if location else "Unknown",
8214
+ "carrier": carrier_name,
8215
+ "time_zone": time_zones,
8216
+ "current_times": current_times,
8217
+ "local_offset":f"{local_offset} utcoffset",
8218
+ "time_zone_diff": f"{head_time} {int(np.abs(offset_diff))} h",
8219
+ "number_type": number_type_str,
8220
+ "is_toll_free": is_toll_free,
8221
+ "is_premium_rate": is_premium_rate,
8222
+ "dialing_instructions": dialing_instructions,
8223
+ "suggested_fix": None, # Use phonenumbers.example_number if invalid
8224
+ "logs": {
8225
+ "number_analysis_completed": datetime.now().strftime(
8226
+ "%Y-%m-%d %H:%M:%S"
8227
+ ),
8228
+ "raw_input": phone_number,
8229
+ "parsed_number": str(parsed_number),
8230
+ },
8231
+ }
8232
+
8233
+ except phonenumbers.NumberParseException as e:
8234
+ res= {"valid": False, "error": str(e)}
8235
+ if verbose:
8236
+ preview(res)
8237
+ return res