py2ls 0.1.10.26__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
py2ls/ips.py CHANGED
@@ -46,7 +46,7 @@ from collections import Counter
46
46
  from fuzzywuzzy import fuzz, process
47
47
  from langdetect import detect
48
48
  from duckduckgo_search import DDGS
49
-
49
+ from typing import List, Optional, Union
50
50
  from bs4 import BeautifulSoup
51
51
 
52
52
  from . import netfinder
@@ -1250,6 +1250,119 @@ def get_encoding(fpath, alternative_encodings=None, verbose=False):
1250
1250
  return None
1251
1251
 
1252
1252
 
1253
+ def unzip(dir_path, output_dir=None):
1254
+ """
1255
+ Unzips or extracts various compressed file formats (.gz, .zip, .7z, .tar, .bz2, .xz, .rar).
1256
+ If the output directory already exists, it will be replaced.
1257
+
1258
+ Parameters:
1259
+ dir_path (str): Path to the compressed file.
1260
+ output_dir (str): Directory where the extracted files will be saved.
1261
+ If None, it extracts to the same directory as the file, with the same name.
1262
+
1263
+ Returns:
1264
+ str: The path to the output directory where files are extracted.
1265
+ """
1266
+
1267
+ # Set default output directory to the same as the input file
1268
+ if output_dir is None:
1269
+ output_dir = os.path.splitext(dir_path)[0]
1270
+
1271
+ # If the output directory already exists, remove it and replace it
1272
+ if os.path.exists(output_dir):
1273
+ if os.path.isdir(output_dir): # check if it is a folder
1274
+ shutil.rmtree(output_dir) # remove folder
1275
+ else:
1276
+ os.remove(output_dir) # remove file
1277
+
1278
+ # Handle .tar.gz files
1279
+ if dir_path.endswith(".tar.gz") or dir_path.endswith(".tgz"):
1280
+ import tarfile
1281
+
1282
+ with tarfile.open(dir_path, "r:gz") as tar_ref:
1283
+ tar_ref.extractall(output_dir)
1284
+ return output_dir
1285
+ # Handle .gz files
1286
+ if dir_path.endswith(".gz"):
1287
+ import gzip
1288
+
1289
+ output_file = os.path.splitext(dir_path)[0] # remove the .gz extension
1290
+ with gzip.open(dir_path, "rb") as gz_file:
1291
+ with open(output_file, "wb") as out_file:
1292
+ shutil.copyfileobj(gz_file, out_file)
1293
+ return output_file
1294
+
1295
+ # Handle .zip files
1296
+ elif dir_path.endswith(".zip"):
1297
+ import zipfile
1298
+
1299
+ with zipfile.ZipFile(dir_path, "r") as zip_ref:
1300
+ zip_ref.extractall(output_dir)
1301
+ return output_dir
1302
+
1303
+ # Handle .7z files (requires py7zr)
1304
+ elif dir_path.endswith(".7z"):
1305
+ import py7zr
1306
+
1307
+ with py7zr.SevenZipFile(dir_path, mode="r") as z:
1308
+ z.extractall(path=output_dir)
1309
+ return output_dir
1310
+
1311
+ # Handle .tar files
1312
+ elif dir_path.endswith(".tar"):
1313
+ import tarfile
1314
+
1315
+ with tarfile.open(dir_path, "r") as tar_ref:
1316
+ tar_ref.extractall(output_dir)
1317
+ return output_dir
1318
+
1319
+ # Handle .tar.bz2 files
1320
+ elif dir_path.endswith(".tar.bz2"):
1321
+ import tarfile
1322
+
1323
+ with tarfile.open(dir_path, "r:bz2") as tar_ref:
1324
+ tar_ref.extractall(output_dir)
1325
+ return output_dir
1326
+
1327
+ # Handle .bz2 files
1328
+ elif dir_path.endswith(".bz2"):
1329
+ import bz2
1330
+
1331
+ output_file = os.path.splitext(dir_path)[0] # remove the .bz2 extension
1332
+ with bz2.open(dir_path, "rb") as bz_file:
1333
+ with open(output_file, "wb") as out_file:
1334
+ shutil.copyfileobj(bz_file, out_file)
1335
+ return output_file
1336
+
1337
+ # Handle .xz files
1338
+ elif dir_path.endswith(".xz"):
1339
+ import lzma
1340
+
1341
+ output_file = os.path.splitext(dir_path)[0] # remove the .xz extension
1342
+ with lzma.open(dir_path, "rb") as xz_file:
1343
+ with open(output_file, "wb") as out_file:
1344
+ shutil.copyfileobj(xz_file, out_file)
1345
+ return output_file
1346
+
1347
+ # Handle .rar files (requires rarfile)
1348
+ elif dir_path.endswith(".rar"):
1349
+ import rarfile
1350
+
1351
+ with rarfile.RarFile(dir_path) as rar_ref:
1352
+ rar_ref.extractall(output_dir)
1353
+ return output_dir
1354
+
1355
+ else:
1356
+ raise ValueError(f"Unsupported file format: {os.path.splitext(dir_path)[1]}")
1357
+
1358
+
1359
+ # Example usage:
1360
+ # output_dir = unzip('data.tar.gz')
1361
+ # output_file = unzip('file.csv.gz')
1362
+ # output_dir_zip = unzip('archive.zip')
1363
+ # output_dir_7z = unzip('archive.7z')
1364
+
1365
+
1253
1366
  def fload(fpath, kind=None, **kwargs):
1254
1367
  """
1255
1368
  Load content from a file with specified file type.
@@ -1286,13 +1399,65 @@ def fload(fpath, kind=None, **kwargs):
1286
1399
  root = tree.getroot()
1287
1400
  return etree.tostring(root, pretty_print=True).decode()
1288
1401
 
1289
- def load_csv(fpath, engine="pyarrow", **kwargs):
1290
- print(f"engine={engine}")
1291
- df = pd.read_csv(fpath, engine=engine, **kwargs)
1402
+ def load_csv(fpath, **kwargs):
1403
+ engine = kwargs.get("engine", "pyarrow")
1404
+ kwargs.pop("engine", None)
1405
+ index_col = kwargs.get("index_col", None)
1406
+ kwargs.pop("index_col", None)
1407
+ memory_map = kwargs.get("memory_map", True)
1408
+ kwargs.pop("memory_map", None)
1409
+ skipinitialspace = kwargs.get("skipinitialspace", True)
1410
+ kwargs.pop("skipinitialspace", None)
1411
+ encoding = kwargs.get("encoding", "utf-8")
1412
+ kwargs.pop("encoding", None)
1413
+ try:
1414
+ if engine == "pyarrow":
1415
+ df = pd.read_csv(
1416
+ fpath,
1417
+ engine=engine,
1418
+ index_col=index_col,
1419
+ encoding=encoding,
1420
+ **kwargs,
1421
+ )
1422
+ else:
1423
+ df = pd.read_csv(
1424
+ fpath,
1425
+ engine=engine,
1426
+ index_col=index_col,
1427
+ memory_map=memory_map,
1428
+ encoding=encoding,
1429
+ skipinitialspace=skipinitialspace,
1430
+ **kwargs,
1431
+ )
1432
+ print("File loaded successfully with utf-8 encoding.")
1433
+ except UnicodeDecodeError:
1434
+ encoding = get_encoding(fpath)
1435
+ print(f"utf-8 failed. Retrying with detected encoding: {encoding}")
1436
+ if engine == "pyarrow":
1437
+ df = pd.read_csv(
1438
+ fpath,
1439
+ engine=engine,
1440
+ index_col=index_col,
1441
+ encoding=encoding,
1442
+ **kwargs,
1443
+ )
1444
+ else:
1445
+ df = pd.read_csv(
1446
+ fpath,
1447
+ engine=engine,
1448
+ index_col=index_col,
1449
+ memory_map=memory_map,
1450
+ encoding=encoding,
1451
+ skipinitialspace=skipinitialspace,
1452
+ **kwargs,
1453
+ )
1454
+ print("File loaded successfully with utf-8 encoding.")
1292
1455
  return df
1293
1456
 
1294
1457
  def load_xlsx(fpath, **kwargs):
1295
- df = pd.read_excel(fpath, **kwargs)
1458
+ engine = kwargs.get("engine", "openpyxl")
1459
+ kwargs.pop("engine", None)
1460
+ df = pd.read_excel(fpath, engine=engine, **kwargs)
1296
1461
  return df
1297
1462
 
1298
1463
  def load_ipynb(fpath, **kwargs):
@@ -1398,11 +1563,37 @@ def fload(fpath, kind=None, **kwargs):
1398
1563
  "pdf",
1399
1564
  "ipynb",
1400
1565
  ]
1401
- supported_types = [*doc_types, *img_types]
1566
+ zip_types = [
1567
+ "gz",
1568
+ "zip",
1569
+ "7z",
1570
+ "tar",
1571
+ "tar.gz",
1572
+ "tar.bz2",
1573
+ "bz2",
1574
+ "xz",
1575
+ "rar",
1576
+ "tgz",
1577
+ ]
1578
+ supported_types = [*doc_types, *img_types, *zip_types]
1402
1579
  if kind not in supported_types:
1403
- raise ValueError(
1404
- f"Error:\n{kind} is not in the supported list {supported_types}"
1405
- )
1580
+ print(f'Error:\n"{kind}" is not in the supported list {supported_types}')
1581
+ # if os.path.splitext(fpath)[1][1:].lower() in zip_types:
1582
+ # keep=kwargs.get("keep", False)
1583
+ # ifile=kwargs.get("ifile",(0,0))
1584
+ # kwargs.pop("keep",None)
1585
+ # kwargs.pop("ifile",None)
1586
+ # fpath_unzip=unzip(fpath)
1587
+ # if isinstance(fpath_unzip,list):
1588
+ # fpath_unzip=fpath_unzip[ifile[0]]
1589
+ # if os.path.isdir(fpath_unzip):
1590
+ # fpath_selected=listdir(fpath_unzip,kind=kind).fpath[ifile[1]]
1591
+ # fpath_unzip=fpath_selected
1592
+ # content_unzip=fload(fpath_unzip, **kwargs)
1593
+ # if not keep:
1594
+ # os.remove(fpath_unzip)
1595
+ # return content_unzip
1596
+
1406
1597
  if kind == "docx":
1407
1598
  return load_docx(fpath)
1408
1599
  elif kind == "txt" or kind == "md":
@@ -1417,6 +1608,14 @@ def fload(fpath, kind=None, **kwargs):
1417
1608
  return load_xml(fpath)
1418
1609
  elif kind == "csv":
1419
1610
  return load_csv(fpath, **kwargs)
1611
+ elif kind in ["ods", "ods", "odt"]:
1612
+ engine = kwargs.get("engine", "odf")
1613
+ kwargs.pop("engine", None)
1614
+ return load_xlsx(fpath, engine=engine, **kwargs)
1615
+ elif kind == "xls":
1616
+ engine = kwargs.get("engine", "xlrd")
1617
+ kwargs.pop("engine", None)
1618
+ return load_xlsx(fpath, engine=engine, **kwargs)
1420
1619
  elif kind == "xlsx":
1421
1620
  return load_xlsx(fpath, **kwargs)
1422
1621
  elif kind == "ipynb":
@@ -1427,10 +1626,55 @@ def fload(fpath, kind=None, **kwargs):
1427
1626
  elif kind.lower() in img_types:
1428
1627
  print(f'Image ".{kind}" is loaded.')
1429
1628
  return load_img(fpath)
1629
+ elif kind.lower() in zip_types:
1630
+ keep = kwargs.get("keep", False)
1631
+ fpath_unzip = unzip(fpath)
1632
+ if os.path.isdir(fpath_unzip):
1633
+ print(f"{fpath_unzip} is a folder. fload stoped.")
1634
+ fpath_list = os.listdir("./datasets/GSE10927_family.xml")
1635
+ print(f"{len(fpath_list)} files within the folder")
1636
+ if len(fpath_list) > 5:
1637
+ pp(fpath_list[:5])
1638
+ print("there are more ...")
1639
+ else:
1640
+ pp(fpath_list)
1641
+ return fpath_list
1642
+ elif os.path.isfile(fpath_unzip):
1643
+ print(f"{fpath_unzip} is a file.")
1644
+ content_unzip = fload(fpath_unzip, **kwargs)
1645
+ if not keep:
1646
+ os.remove(fpath_unzip)
1647
+ return content_unzip
1648
+ else:
1649
+ print(f"{fpath_unzip} does not exist or is a different type.")
1650
+
1651
+ elif kind.lower() == "gmt":
1652
+ import gseapy as gp
1653
+
1654
+ gene_sets = gp.read_gmt(fpath)
1655
+ return gene_sets
1430
1656
  else:
1431
- raise ValueError(
1432
- f"Error:\n{kind} is not in the supported list {supported_types}"
1433
- )
1657
+ try:
1658
+ try:
1659
+ with open(fpath, "r", encoding="utf-8") as f:
1660
+ content = f.readlines()
1661
+ except UnicodeDecodeError:
1662
+ print("Failed to read as utf-8, trying different encoding...")
1663
+ with open(
1664
+ fpath, "r", encoding=get_encoding(fpath)
1665
+ ) as f: # Trying with a different encoding
1666
+ content = f.readlines()
1667
+ except:
1668
+ try:
1669
+ with open(fpath, "r", encoding="utf-8") as f:
1670
+ content = f.read()
1671
+ except UnicodeDecodeError:
1672
+ print("Failed to read as utf-8, trying different encoding...")
1673
+ with open(
1674
+ fpath, "r", encoding=get_encoding(fpath)
1675
+ ) as f: # Trying with a different encoding
1676
+ content = f.read()
1677
+ return content
1434
1678
 
1435
1679
 
1436
1680
  # Example usage
@@ -2030,10 +2274,10 @@ def mkdir(*args, **kwargs):
2030
2274
  if isinstance(arg, (str, list)):
2031
2275
  if "/" in arg or "\\" in arg:
2032
2276
  pardir = arg
2033
- print(f"pardir{pardir}")
2277
+ print(f'pardir: "{pardir}"')
2034
2278
  else:
2035
2279
  chdir = arg
2036
- print(f"chdir{chdir}")
2280
+ print(f'chdir:"{chdir}"')
2037
2281
  elif isinstance(arg, bool):
2038
2282
  overwrite = arg
2039
2283
  print(overwrite)
@@ -2049,6 +2293,13 @@ def mkdir(*args, **kwargs):
2049
2293
  pardir = os.path.normpath(pardir)
2050
2294
  # Get the slash type: "/" or "\"
2051
2295
  stype = "/" if "/" in pardir else "\\"
2296
+ if "mac" in get_os().lower() or "lin" in get_os().lower():
2297
+ stype = "/"
2298
+ elif "win" in get_os().lower():
2299
+ stype = "\\"
2300
+ else:
2301
+ stype = "/"
2302
+
2052
2303
  # Check if the parent directory exists and is a directory path
2053
2304
  if os.path.isdir(pardir):
2054
2305
  os.chdir(pardir) # Set current path
@@ -4046,3 +4297,234 @@ def preview(var):
4046
4297
  # preview("# This is a Markdown header")
4047
4298
  # preview(pd.DataFrame({"Name": ["Alice", "Bob"], "Age": [25, 30]}))
4048
4299
  # preview({"key": "value", "numbers": [1, 2, 3]})
4300
+
4301
+
4302
+ # ! DataFrame
4303
+ def df_as_type(
4304
+ df: pd.DataFrame,
4305
+ columns: Optional[Union[str, List[str]]] = None,
4306
+ astype: str = "datetime",
4307
+ format: Optional[str] = None,
4308
+ inplace: bool = True,
4309
+ errors: str = "coerce", # Can be "ignore", "raise", or "coerce"
4310
+ **kwargs,
4311
+ ) -> Optional[pd.DataFrame]:
4312
+ """
4313
+ Convert specified columns of a DataFrame to a specified type (e.g., datetime, float, int, numeric, timedelta).
4314
+ If columns is None, all columns in the DataFrame will be converted.
4315
+
4316
+ Parameters:
4317
+ - df: DataFrame containing the columns to convert.
4318
+ - columns: Either a single column name, a list of column names, or None to convert all columns.
4319
+ - astype: The target type to convert the columns to ('datetime', 'float', 'int', 'numeric', 'timedelta', etc.).
4320
+ - format: Optional; format to specify the datetime format (only relevant for 'datetime' conversion).
4321
+ - inplace: Whether to modify the DataFrame in place or return a new one. Defaults to False.
4322
+ - errors: Can be "ignore", "raise", or "coerce"
4323
+ - **kwargs: Additional keyword arguments to pass to the conversion function (e.g., errors='ignore' for pd.to_datetime or pd.to_numeric).
4324
+
4325
+ Returns:
4326
+ - If inplace=False: DataFrame with the specified columns (or all columns if columns=None) converted to the specified type.
4327
+ - If inplace=True: The original DataFrame is modified in place, and nothing is returned.
4328
+ """
4329
+ astypes = [
4330
+ "datetime",
4331
+ "timedelta",
4332
+ "numeric",
4333
+ "int",
4334
+ "int8",
4335
+ "int16",
4336
+ "int32",
4337
+ "int64",
4338
+ "uint8",
4339
+ "uint16",
4340
+ "uint32",
4341
+ "uint64",
4342
+ "float",
4343
+ "float16",
4344
+ "float32",
4345
+ "float64",
4346
+ "complex",
4347
+ "complex64",
4348
+ "complex128",
4349
+ "str",
4350
+ "string",
4351
+ "bool",
4352
+ "datetime64",
4353
+ "datetime64[ns]",
4354
+ "timedelta64",
4355
+ "timedelta64[ns]",
4356
+ "category",
4357
+ "object",
4358
+ "Sparse",
4359
+ "hour",
4360
+ "minute",
4361
+ "second",
4362
+ "time",
4363
+ "week",
4364
+ "date",
4365
+ "month",
4366
+ "year",
4367
+ ]
4368
+ # correct the astype input
4369
+ astype = strcmp(astype, astypes)[0]
4370
+ print(f"converting {columns} as type: {astype}")
4371
+ # If inplace is False, make a copy of the DataFrame
4372
+ if not inplace:
4373
+ df = df.copy()
4374
+ # If columns is None, apply to all columns
4375
+ if columns is None:
4376
+ columns = df.columns
4377
+
4378
+ # Ensure columns is a list
4379
+ if isinstance(columns, (str, int)):
4380
+ columns = [columns]
4381
+
4382
+ # Convert specified columns
4383
+ for column in columns:
4384
+ try:
4385
+ if astype in [
4386
+ "datetime",
4387
+ "hour",
4388
+ "minute",
4389
+ "second",
4390
+ "time",
4391
+ "week",
4392
+ "date",
4393
+ "month",
4394
+ "year",
4395
+ ]:
4396
+ kwargs.pop("errors", None)
4397
+ # convert it as type: datetime
4398
+ if isinstance(column, int):
4399
+ df.iloc[:, column] = pd.to_datetime(
4400
+ df.iloc[:, column], format=format, errors=errors, **kwargs
4401
+ )
4402
+ # further convert:
4403
+ if astype == "time":
4404
+ df.iloc[:, column] = df.iloc[:, column].dt.time
4405
+ elif astype == "month":
4406
+ df.iloc[:, column] = df.iloc[:, column].dt.month
4407
+ elif astype == "year":
4408
+ df.iloc[:, column] = df.iloc[:, column].dt.year
4409
+ elif astype == "date":
4410
+ df.iloc[:, column] = df.iloc[:, column].dt.date
4411
+ elif astype == "hour":
4412
+ df.iloc[:, column] = df.iloc[:, column].dt.hour
4413
+ elif astype == "minute":
4414
+ df.iloc[:, column] = df.iloc[:, column].dt.minute
4415
+ elif astype == "second":
4416
+ df.iloc[:, column] = df.iloc[:, column].dt.second
4417
+ elif astype == "week":
4418
+ df.iloc[:, column] = df.iloc[:, column].dt.day_name()
4419
+ else:
4420
+ df[column] = (
4421
+ pd.to_datetime(
4422
+ df[column], format=format, errors=errors, **kwargs
4423
+ )
4424
+ if format
4425
+ else pd.to_datetime(df[column], errors=errors, **kwargs)
4426
+ )
4427
+ # further convert:
4428
+ if astype == "time":
4429
+ df[column] = df[column].dt.time
4430
+ elif astype == "month":
4431
+ df[column] = df[column].dt.month
4432
+ elif astype == "year":
4433
+ df[column] = df[column].dt.year
4434
+ elif astype == "date":
4435
+ df[column] = df[column].dt.date
4436
+ elif astype == "hour":
4437
+ df[column] = df[column].dt.hour
4438
+ elif astype == "minute":
4439
+ df[column] = df[column].dt.minute
4440
+ elif astype == "second":
4441
+ df[column] = df[column].dt.second
4442
+ elif astype == "week":
4443
+ df[column] = df[column].dt.day_name()
4444
+
4445
+ elif astype == "numeric":
4446
+ kwargs.pop("errors", None)
4447
+ df[column] = pd.to_numeric(df[column], errors=errors, **kwargs)
4448
+ # print(f"Successfully converted '{column}' to numeric.")
4449
+ elif astype == "timedelta":
4450
+ kwargs.pop("errors", None)
4451
+ df[column] = pd.to_timedelta(df[column], errors=errors, **kwargs)
4452
+ # print(f"Successfully converted '{column}' to timedelta.")
4453
+ else:
4454
+ # Convert to other types (e.g., float, int)
4455
+ df[column] = df[column].astype(astype)
4456
+ # print(f"Successfully converted '{column}' to {astype}.")
4457
+ except Exception as e:
4458
+ print(f"Error converting '{column}' to {astype}: {e}")
4459
+
4460
+ # Return the modified DataFrame if inplace is False
4461
+ return df
4462
+
4463
+
4464
+ # ! DataFrame
4465
+ def df_sort_values(df, column, by=None, ascending=True, inplace=False, **kwargs):
4466
+ """
4467
+ Sort a DataFrame by a specified column based on a custom order.
4468
+
4469
+ Parameters:
4470
+ - df: DataFrame to be sorted.
4471
+ - column: The name of the column to sort by.
4472
+ - by: List specifying the custom order for sorting.
4473
+ - ascending: Boolean or list of booleans, default True.
4474
+ Sort ascending vs. descending.
4475
+ - inplace: If True, perform operation in place and return None.
4476
+ - **kwargs: Additional arguments to pass to sort_values.
4477
+
4478
+ Returns:
4479
+ - Sorted DataFrame if inplace is False, otherwise None.
4480
+ """
4481
+ if column not in df.columns:
4482
+ raise ValueError(f"Column '{column}' does not exist in the DataFrame.")
4483
+
4484
+ if not isinstance(by, list):
4485
+ raise ValueError("custom_order must be a list.")
4486
+
4487
+ try:
4488
+ # Convert the specified column to a categorical type with the custom order
4489
+ df[column] = pd.Categorical(df[column], categories=by, ordered=True)
4490
+ if inplace: # replace the original
4491
+ df.sort_values(column, ascending=ascending, inplace=True, **kwargs)
4492
+ print(f"Successfully sorted DataFrame by '{column}'")
4493
+ return None
4494
+ else:
4495
+ sorted_df = df.sort_values(column, ascending=ascending, **kwargs)
4496
+ print(f"Successfully sorted DataFrame by '{column}' using custom order.")
4497
+ return sorted_df
4498
+ except Exception as e:
4499
+ print(f"Error sorting DataFrame by '{column}': {e}")
4500
+ return df
4501
+
4502
+
4503
+ # # Example usage:
4504
+ # # Sample DataFrame
4505
+ # data = {
4506
+ # "month": ["March", "January", "February", "April", "December"],
4507
+ # "Amount": [200, 100, 150, 300, 250],
4508
+ # }
4509
+ # df_month = pd.DataFrame(data)
4510
+
4511
+ # # Define the month order
4512
+ # month_order = [
4513
+ # "January",
4514
+ # "February",
4515
+ # "March",
4516
+ # "April",
4517
+ # "May",
4518
+ # "June",
4519
+ # "July",
4520
+ # "August",
4521
+ # "September",
4522
+ # "October",
4523
+ # "November",
4524
+ # "December",
4525
+ # ]
4526
+ # display(df_month)
4527
+ # sorted_df_month = df_sort_values(df_month, "month", month_order, ascending=True)
4528
+ # display(sorted_df_month)
4529
+ # df_sort_values(df_month, "month", month_order, ascending=True, inplace=True)
4530
+ # display(df_month)
py2ls/plot.py CHANGED
@@ -17,6 +17,96 @@ from .stats import *
17
17
  logging.getLogger("fontTools").setLevel(logging.WARNING)
18
18
 
19
19
 
20
+ def df_corr(
21
+ df,
22
+ columns="all",
23
+ tri="u",
24
+ mask=True,
25
+ k=1,
26
+ annot=True,
27
+ cmap="coolwarm",
28
+ fmt=".2f",
29
+ cluster=False, # New parameter for clustermap option
30
+ figsize=(10, 8),
31
+ row_cluster=True, # Perform clustering on rows
32
+ col_cluster=True, # Perform clustering on columns
33
+ dendrogram_ratio=(0.2, 0.1), # Adjust size of dendrograms
34
+ cbar_pos=(0.02, 1, 0.02, 0.1), # Adjust colorbar position
35
+ xticklabels=True, # Show column labels
36
+ yticklabels=True, # Show row labels
37
+ **kwargs,
38
+ ):
39
+ # Select numeric columns or specific subset of columns
40
+ if columns == "all":
41
+ df_numeric = df.select_dtypes(include=[float, int])
42
+ else:
43
+ df_numeric = df[columns]
44
+
45
+ # Compute the correlation matrix
46
+ correlation_matrix = df_numeric.corr()
47
+
48
+ # Generate mask for the upper triangle if mask is True
49
+ if mask:
50
+ if "u" in tri.lower(): # upper => np.tril
51
+ mask_array = np.tril(np.ones_like(correlation_matrix, dtype=bool), k=k)
52
+ else: # lower => np.triu
53
+ mask_array = np.triu(np.ones_like(correlation_matrix, dtype=bool), k=k)
54
+ else:
55
+ mask_array = None
56
+
57
+ # Remove conflicting kwargs
58
+ kwargs.pop("mask", None)
59
+ kwargs.pop("annot", None)
60
+ kwargs.pop("cmap", None)
61
+ kwargs.pop("fmt", None)
62
+
63
+ kwargs.pop("clustermap", None)
64
+ kwargs.pop("row_cluster", None)
65
+ kwargs.pop("col_cluster", None)
66
+ kwargs.pop("dendrogram_ratio", None)
67
+ kwargs.pop("cbar_pos", None)
68
+ kwargs.pop("xticklabels", None)
69
+ kwargs.pop("col_cluster", None)
70
+
71
+ # Plot the heatmap or clustermap
72
+ if cluster:
73
+ # Create a clustermap
74
+ cluster_obj = sns.clustermap(
75
+ correlation_matrix,
76
+ mask=mask_array,
77
+ annot=annot,
78
+ cmap=cmap,
79
+ fmt=fmt,
80
+ figsize=figsize, # Figure size, adjusted for professional display
81
+ row_cluster=row_cluster, # Perform clustering on rows
82
+ col_cluster=col_cluster, # Perform clustering on columns
83
+ dendrogram_ratio=dendrogram_ratio, # Adjust size of dendrograms
84
+ cbar_pos=cbar_pos, # Adjust colorbar position
85
+ xticklabels=xticklabels, # Show column labels
86
+ yticklabels=yticklabels, # Show row labels
87
+ **kwargs, # Pass any additional arguments to sns.clustermap
88
+ )
89
+
90
+ return (
91
+ cluster_obj.ax_row_dendrogram,
92
+ cluster_obj.ax_col_dendrogram,
93
+ cluster_obj.ax_heatmap,
94
+ )
95
+ else:
96
+ # Create a standard heatmap
97
+ plt.figure(figsize=figsize)
98
+ ax = sns.heatmap(
99
+ correlation_matrix,
100
+ mask=mask_array,
101
+ annot=annot,
102
+ cmap=cmap,
103
+ fmt=fmt,
104
+ **kwargs, # Pass any additional arguments to sns.heatmap
105
+ )
106
+ # Return the Axes object for further customization if needed
107
+ return ax
108
+
109
+
20
110
  def catplot(data, *args, **kwargs):
21
111
  """
22
112
  catplot(data, opt=None, ax=None)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.1.10.26
3
+ Version: 0.2.1
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -207,15 +207,15 @@ py2ls/doc.py,sha256=xN3g1OWfoaGUhikbJ0NqbN5eKy1VZVvWwRlhHMgyVEc,4243
207
207
  py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,2325
208
208
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
209
209
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
210
- py2ls/ips.py,sha256=nhQBp5J2QjodplBXMCVBW-7w7rVzG6IalqCuaJgf7Xc,147799
210
+ py2ls/ips.py,sha256=HjMZDXzfOiqhgNOdtoX7dxoY2cRsrD78LXilWyIUffE,164940
211
211
  py2ls/netfinder.py,sha256=vgOOMhzwbjRuLWMAPyf_kh3HoOhsJ9dlA-tCkMf7kNU,55371
212
212
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
213
- py2ls/plot.py,sha256=yj-AfnYNr1ha_Y5EimTsUVSooFc36nE0KCQ8cP9_Trs,97601
213
+ py2ls/plot.py,sha256=x_bvQyPM6sl7IscgHPUbOEnqR82Iefcyur1JOweEAZw,100536
214
214
  py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
215
215
  py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
216
216
  py2ls/stats.py,sha256=fJmXQ9Lq460StOn-kfEljE97cySq7876HUPTnpB5hLs,38123
217
217
  py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
218
218
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
219
- py2ls-0.1.10.26.dist-info/METADATA,sha256=jwKMA56onOk6i2BpDchh8dsg8HTzNDbODlNNLu61On4,20040
220
- py2ls-0.1.10.26.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
221
- py2ls-0.1.10.26.dist-info/RECORD,,
219
+ py2ls-0.2.1.dist-info/METADATA,sha256=Qr6DFCoJWEj0_JrHmUDLJYRtoPqO7GyHth0Apsq5wOk,20036
220
+ py2ls-0.2.1.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
221
+ py2ls-0.2.1.dist-info/RECORD,,