py2ls 0.2.4.12__py3-none-any.whl → 0.2.4.14__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/ips.py CHANGED
@@ -6289,6 +6289,12 @@ def df_reducer(
6289
6289
  hue: str = None, # lda-specific
6290
6290
  scale: bool = True,
6291
6291
  fill_missing: bool = True,
6292
+ size=2,# for plot marker size
6293
+ markerscale=4,# for plot, legend marker size scale
6294
+ edgecolor='none',# for plot,
6295
+ legend_loc='best',# for plot,
6296
+ bbox_to_anchor=None,
6297
+ ncols=1,
6292
6298
  debug: bool = False,
6293
6299
  inplace: bool = True, # replace the oringinal data
6294
6300
  plot_: bool = False, # plot scatterplot, but no 'hue',so it is meaningless
@@ -6312,6 +6318,13 @@ def df_reducer(
6312
6318
  "ica": "ica(Independent Component Analysis):\n\tEffective for blind source separation (e.g., EEG, audio signal processing).is generally categorized under Non-linear Dimensionality Reduction, but it also serves a distinct role in Blind Source Separation. While ICA is commonly used for dimensionality reduction, particularly in contexts where data sources need to be disentangled (e.g., separating mixed signals like EEG or audio data), it focuses on finding statistically independent components rather than maximizing variance (like PCA) or preserving distances (like MDS or UMAP). Advantage: Extracts independent signals/components, useful in mixed signal scenarios. Limitation: Assumes statistical independence, sensitive to noise and algorithm choice.",
6313
6319
  #! Anomaly Detection: Specialized for detecting outliers or unusual patterns
6314
6320
  "isolation_forest": "Isolation Forest:\n\tDesigned for anomaly detection, especially in high-dimensional data. Advantage: Effective in detecting outliers, efficient for large datasets. Limitation: Sensitive to contamination ratio parameter, not ideal for highly structured or non-anomalous data.",
6321
+ #! more methods
6322
+ "truncated_svd": "Truncated Singular Value Decomposition (SVD):\n\tEfficient for large sparse datasets, useful for feature reduction in natural language processing (e.g., Latent Semantic Analysis). Advantage: Efficient in memory usage for large datasets. Limitation: Limited in non-linear transformation.",
6323
+ "spectral_embedding": "Spectral Embedding:\n\tBased on graph theory, it can be useful for clustering and visualization, especially for data with connected structures. Advantage: Preserves global structure, good for graph-type data. Limitation: Sensitive to parameter choice, not ideal for arbitrary non-connected data.",
6324
+ "autoencoder": "Autoencoder:\n\tA neural network-based approach for complex feature learning and non-linear dimensionality reduction. Advantage: Can capture very complex relationships. Limitation: Computationally expensive, requires neural network expertise for effective tuning.",
6325
+ "nmf": "Non-negative Matrix Factorization:\n\tEffective for parts-based decomposition, commonly used for sparse and non-negative data, e.g., text data or images. Advantage: Interpretability with non-negativity, efficient with sparse data. Limitation: Less effective for negative or zero-centered data.",
6326
+ "umap_hdbscan": "UMAP + HDBSCAN:\n\tCombination of UMAP for dimensionality reduction and HDBSCAN for density-based clustering, suitable for cluster discovery in high-dimensional data. Advantage: Effective in discovering clusters in embeddings. Limitation: Requires careful tuning of both UMAP and HDBSCAN parameters.",
6327
+ "manifold_learning": "Manifold Learning (Isomap, Hessian LLE, etc.):\n\tMethods designed to capture intrinsic geometrical structure. Advantage: Preserves non-linear relationships in low dimensions. Limitation: Computationally expensive and sensitive to noise."
6315
6328
  }
6316
6329
 
6317
6330
  from sklearn.preprocessing import StandardScaler
@@ -6322,17 +6335,9 @@ def df_reducer(
6322
6335
  import seaborn as sns
6323
6336
  # Check valid method input
6324
6337
  methods = [
6325
- "pca",
6326
- "umap",
6327
- "tsne",
6328
- "factor",
6329
- "isolation_forest",
6330
- "lda",
6331
- "kpca",
6332
- "ica",
6333
- "mds",
6334
- "lle",
6335
- "svd",
6338
+ "pca", "umap", "umap_hdbscan", "tsne", "factor", "isolation_forest","manifold_learning", "lda", "kpca", "ica",
6339
+ "mds", "lle", "svd", "truncated_svd", "spectral_embedding",
6340
+ # "autoencoder","nmf",
6336
6341
  ]
6337
6342
  method = strcmp(method, methods)[0]
6338
6343
  print(f"\nprocessing with using {dict_methods[method]}:")
@@ -6637,12 +6642,131 @@ def df_reducer(
6637
6642
  index=data.index,
6638
6643
  columns=[f"SVD_{i+1}" for i in range(n_components)],
6639
6644
  )
6645
+ colname_met = "SVD_"
6640
6646
  if hue:
6641
6647
  svd_df[hue] = y
6642
6648
  if debug:
6643
6649
  print("Singular Value Decomposition (SVD) completed.")
6650
+ elif method=="truncated_svd":
6651
+ from sklearn.decomposition import TruncatedSVD
6652
+ svd = TruncatedSVD(n_components=n_components, random_state=random_state)
6653
+ X_reduced = svd.fit_transform(X)
6654
+ reduced_df = pd.DataFrame(
6655
+ X_reduced,
6656
+ columns=[f"SVD Component {i+1}" for i in range(n_components)],
6657
+ index=data.index,
6658
+ )
6659
+ colname_met = "SVD Component "
6660
+
6661
+ if debug:
6662
+ print("Truncated SVD completed.")
6663
+ print("Explained Variance Ratio:", svd.explained_variance_ratio_)
6664
+ if hue:
6665
+ reduced_df[hue] = y
6666
+
6667
+ elif method == "spectral_embedding":
6668
+ from sklearn.manifold import SpectralEmbedding
6669
+
6670
+ spectral = SpectralEmbedding(n_components=n_components, random_state=random_state)
6671
+ X_reduced = spectral.fit_transform(X)
6672
+ reduced_df = pd.DataFrame(
6673
+ X_reduced,
6674
+ columns=[f"Dimension_{i+1}" for i in range(n_components)],
6675
+ index=data.index,
6676
+ )
6677
+ colname_met = "Dimension_"
6678
+
6679
+ if debug:
6680
+ print("Spectral Embedding completed.")
6681
+ if hue:
6682
+ reduced_df[hue] = y
6683
+
6684
+ elif method == "autoencoder":
6685
+ from tensorflow.keras.models import Model
6686
+ from tensorflow.keras.layers import Input, Dense
6687
+
6688
+ input_dim = X.shape[1]
6689
+ input_layer = Input(shape=(input_dim,))
6690
+ encoded = Dense(n_components * 2, activation="relu")(input_layer)
6691
+ encoded = Dense(n_components, activation="relu")(encoded)
6692
+ autoencoder = Model(input_layer, encoded)
6693
+ autoencoder.compile(optimizer="adam", loss="mean_squared_error")
6694
+ autoencoder.fit(X, X, epochs=50, batch_size=256, shuffle=True, verbose=0)
6695
+
6696
+ X_reduced = autoencoder.predict(X)
6697
+ reduced_df = pd.DataFrame(
6698
+ X_reduced,
6699
+ columns=[f"Score_{i+1}" for i in range(n_components)],
6700
+ index=data.index,
6701
+ )
6702
+ colname_met = "Score_"
6703
+
6704
+ if debug:
6705
+ print("Autoencoder reduction completed.")
6706
+ if hue:
6707
+ reduced_df[hue] = y
6708
+
6709
+ elif method == "nmf":
6710
+ from sklearn.decomposition import NMF
6711
+
6712
+ nmf = NMF(n_components=n_components, random_state=random_state)
6713
+ X_reduced = nmf.fit_transform(X)
6714
+ reduced_df = pd.DataFrame(
6715
+ X_reduced,
6716
+ columns=[f"NMF_{i+1}" for i in range(n_components)],
6717
+ index=data.index,
6718
+ )
6719
+ colname_met = "NMF_"
6644
6720
 
6645
- # Return reduced data and info as a new DataFrame with the same index
6721
+ if debug:
6722
+ print("Non-negative Matrix Factorization completed.")
6723
+ if hue:
6724
+ reduced_df[hue] = y
6725
+
6726
+ elif method == "umap_hdbscan":
6727
+ import umap
6728
+ import hdbscan
6729
+
6730
+ umap_model = umap.UMAP(
6731
+ n_neighbors=umap_neighbors,
6732
+ min_dist=umap_min_dist,
6733
+ n_components=n_components,
6734
+ )
6735
+ X_umap = umap_model.fit_transform(X)
6736
+
6737
+ clusterer = hdbscan.HDBSCAN()
6738
+ clusters = clusterer.fit_predict(X_umap)
6739
+
6740
+ reduced_df = pd.DataFrame(
6741
+ X_umap,
6742
+ columns=[f"UMAP_{i+1}" for i in range(n_components)],
6743
+ index=data.index,
6744
+ )
6745
+ reduced_df["Cluster"] = clusters
6746
+ colname_met = "UMAP_"
6747
+ if debug:
6748
+ print("UMAP + HDBSCAN reduction and clustering completed.")
6749
+ if hue:
6750
+ reduced_df[hue] = y
6751
+
6752
+ elif method == "manifold_learning":
6753
+ from sklearn.manifold import Isomap
6754
+
6755
+ isomap = Isomap(n_components=n_components)
6756
+ X_reduced = isomap.fit_transform(X)
6757
+ reduced_df = pd.DataFrame(
6758
+ X_reduced,
6759
+ columns=[f"Manifold_{i+1}" for i in range(n_components)],
6760
+ index=data.index,
6761
+ )
6762
+ colname_met = "Manifold_"
6763
+
6764
+ if debug:
6765
+ print("Manifold Learning (Isomap) completed.")
6766
+ if hue:
6767
+ reduced_df[hue] = y
6768
+
6769
+ #! Return reduced data and info as a new DataFrame with the same index
6646
6770
  if method == "pca":
6647
6771
  reduced_df = pca_df
6648
6772
  colname_met = "PC_"
@@ -6699,7 +6823,6 @@ def df_reducer(
6699
6823
  # Quick plots
6700
6824
  if plot_ and (not method in ["isolation_forest"]):
6701
6825
  from .plot import plotxy
6702
-
6703
6826
  if ax is None:
6704
6827
  if figsize is None:
6705
6828
  _, ax = plt.subplots(figsize=cm2inch(8, 8))
@@ -6707,16 +6830,18 @@ def df_reducer(
6707
6830
  _, ax = plt.subplots(figsize=figsize)
6708
6831
  else:
6709
6832
  ax = ax.cla()
6833
+ xlabel = f"{colname_met}1" if xlabel is None else xlabel
6834
+ ylabel = f"{colname_met}2" if ylabel is None else ylabel
6710
6835
  ax = plotxy(
6711
6836
  data=reduced_df,
6712
6837
  x=colname_met + "1",
6713
6838
  y=colname_met + "2",
6714
6839
  hue=hue,
6715
- s=1,
6716
- edgecolor="none",
6840
+ s=size,
6841
+ edgecolor=edgecolor,
6717
6842
  kind="scater",
6718
6843
  figsets=dict(
6719
- legend=dict(loc="best", markerscale=4),
6844
+ legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
6720
6845
  xlabel=xlabel if xlabel else None,
6721
6846
  ylabel=ylabel if ylabel else None,
6722
6847
  ),
py2ls/ml2ls.py CHANGED
@@ -1298,10 +1298,11 @@ def plot_validate_features_single(res_val, figsize=None):
1298
1298
  mean_auc = res_val["roc_curve"][model_name]["auc"]
1299
1299
 
1300
1300
  # Plotting
1301
- plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci, ax=nexttile())
1301
+ plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
1302
+ model_name=model_name, ax=nexttile())
1302
1303
  plot.figsets(title=model_name, sp=2)
1303
1304
 
1304
- plot_pr_curve(
1305
+ plot_pr_binary(
1305
1306
  recall=res_val["pr_curve"][model_name]["recall"],
1306
1307
  precision=res_val["pr_curve"][model_name]["precision"],
1307
1308
  avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
@@ -1410,7 +1411,6 @@ def plot_roc_curve(
1410
1411
  # ml2ls.plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci)
1411
1412
  # figsets(title=model_name)
1412
1413
 
1413
-
1414
1414
  def plot_pr_curve(
1415
1415
  recall=None,
1416
1416
  precision=None,
@@ -1436,7 +1436,7 @@ def plot_pr_curve(
1436
1436
  precision,
1437
1437
  lw=lw,
1438
1438
  color=color,
1439
- label=(f"{model_name} (AUC={avg_precision:.2f})"),
1439
+ label=(f"{model_name} (AP={avg_precision:.2f})"),
1440
1440
  clip_on=False,
1441
1441
  **kwargs,
1442
1442
  )
@@ -1453,7 +1453,6 @@ def plot_pr_curve(
1453
1453
  ax.legend(loc=legend_loc)
1454
1454
  return ax
1455
1455
 
1456
-
1457
1456
  # * usage: ml2ls.plot_pr_curve()
1458
1457
  # for md_name in flatten(validation_results["pr_curve"].keys()):
1459
1458
  # ml2ls.plot_pr_curve(
@@ -1466,7 +1465,91 @@ def plot_pr_curve(
1466
1465
  # color="r",
1467
1466
  # )
1468
1467
 
1468
+ def plot_pr_binary(
1469
+ recall=None,
1470
+ precision=None,
1471
+ avg_precision=None,
1472
+ model_name=None,
1473
+ lw=2,
1474
+ figsize=[5, 5],
1475
+ title="Precision-Recall Curve",
1476
+ xlabel="Recall",
1477
+ ylabel="Precision",
1478
+ alpha=0.1,
1479
+ color="#FF8F00",
1480
+ legend_loc="lower left",
1481
+ ax=None,
1482
+ show_avg_precision=False,
1483
+ **kwargs,
1484
+ ):
1485
+ from scipy.interpolate import interp1d
1486
+ if ax is None:
1487
+ fig, ax = plt.subplots(figsize=figsize)
1488
+ model_name = "Binary PR Curve" if model_name is None else model_name
1489
+
1490
+ #* use sklearn bulitin function 'PrecisionRecallDisplay'?
1491
+ # from sklearn.metrics import PrecisionRecallDisplay
1492
+ # disp = PrecisionRecallDisplay(precision=precision,
1493
+ # recall=recall,
1494
+ # average_precision=avg_precision,**kwargs)
1495
+ # disp.plot(ax=ax, name=model_name, color=color)
1496
+
1497
+ # Plot Precision-Recall curve
1498
+ ax.plot(
1499
+ recall,
1500
+ precision,
1501
+ lw=lw,
1502
+ color=color,
1503
+ label=(f"{model_name} (AP={avg_precision:.2f})"),
1504
+ clip_on=False,
1505
+ **kwargs,
1506
+ )
1469
1507
 
1508
+ # Fill area under the curve
1509
+ ax.fill_between(recall, precision, alpha=alpha, color=color)
1510
+ # Add F1 score iso-contours
1511
+ f_scores = np.linspace(0.2, 0.8, num=4)
1512
+ # for f_score in f_scores:
1513
+ # x = np.linspace(0.01, 1)
1514
+ # y = f_score * x / (2 * x - f_score)
1515
+ # plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=1)
1516
+ # plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y[45] + 0.02))
1517
+
1518
+ pr_boundary = interp1d(recall, precision, kind="linear", fill_value="extrapolate")
1519
+ for f_score in f_scores:
1520
+ x_vals = np.linspace(0.01, 1, 10000)
1521
+ y_vals = f_score * x_vals / (2 * x_vals - f_score)
1522
+ y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
1523
+ y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None) # Prevent going to zero
1524
+ valid = y_vals_clipped < pr_boundary(x_vals)
1525
+ valid_ = y_vals_clipped > 1e-3
1526
+ valid = valid&valid_
1527
+ x_vals = x_vals[valid]
1528
+ y_vals_clipped = y_vals_clipped[valid]
1529
+ if len(x_vals) > 0: # Ensure annotation is placed only if line segment exists
1530
+ ax.plot(x_vals, y_vals_clipped, color="gray", alpha=1)
1531
+ plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped)*0.35)] + 0.02))
1532
+
1533
+
1534
+ # # Plot the average precision line
1535
+ if show_avg_precision:
1536
+ plt.axhline(
1537
+ y=avg_precision,
1538
+ color="red",
1539
+ ls="--",
1540
+ lw=lw,
1541
+ label=f"Avg. precision={avg_precision:.2f}",
1542
+ )
1543
+ # Customize axes
1544
+ ax.set_title(title)
1545
+ ax.set_xlabel(xlabel)
1546
+ ax.set_ylabel(ylabel)
1547
+ ax.set_xlim([-0.01, 1.0])
1548
+ ax.set_ylim([0.0, 1.0])
1549
+ ax.grid(False)
1550
+ ax.legend(loc=legend_loc)
1551
+ return ax
1552
+
1470
1553
  def plot_cm(
1471
1554
  cm,
1472
1555
  labels_name=None,
py2ls/netfinder.py CHANGED
@@ -1,36 +1,11 @@
1
1
  from bs4 import BeautifulSoup
2
2
  import requests
3
- from requests.utils import dict_from_cookiejar
4
- from requests.exceptions import ChunkedEncodingError, ConnectionError
5
3
  import os
6
- from urllib.parse import urlparse, urljoin
7
- import base64
8
4
  import pandas as pd
9
- from collections import Counter
10
- import random
11
5
  import logging
12
- from time import sleep
13
- import stem.process
14
- from stem import Signal
15
- from stem.control import Controller
16
6
  import json
17
- from fake_useragent import UserAgent
18
- from selenium import webdriver
19
- from selenium.webdriver.chrome.service import Service
20
- from selenium.webdriver.common.by import By
21
- from selenium.webdriver.chrome.options import Options
22
- from selenium.webdriver.support.ui import WebDriverWait
23
- from selenium.webdriver.support import expected_conditions as EC
24
- from webdriver_manager.chrome import ChromeDriverManager
25
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
26
- from pprint import pp
27
- import mimetypes
28
- import io
29
- import matplotlib.pyplot as plt
30
- from PIL import Image
31
- from duckduckgo_search import DDGS
32
- from datetime import datetime
33
7
  import time
8
+ from selenium.webdriver.common.by import By
34
9
  from . import ips
35
10
 
36
11
  dir_save = "/Users/macjianfeng/Dropbox/Downloads/"
@@ -56,6 +31,8 @@ def user_agent(
56
31
  verbose=False,
57
32
  os=["windows", "macos", "linux"],
58
33
  ):
34
+ from fake_useragent import UserAgent
35
+
59
36
  ua = UserAgent(browsers=browsers, platforms=platforms, os=os)
60
37
  output_ua = ua.random
61
38
  if verbose:
@@ -109,6 +86,8 @@ def get_attr(content, where=None, attr=None, **kwargs):
109
86
  else:
110
87
  print(f"The attribute '{attr}' is not found in the elements.")
111
88
  else:
89
+ from pprint import pp
90
+
112
91
  print(f"Cannot find tag '{where}' in the content.")
113
92
  print("Available tags:")
114
93
  pp(all_tags)
@@ -237,6 +216,8 @@ def flatten_json(y):
237
216
 
238
217
 
239
218
  def get_proxy():
219
+ import random
220
+
240
221
  list_ = []
241
222
  headers = {"User-Agent": user_agent()}
242
223
  response = requests.get(
@@ -275,6 +256,8 @@ def get_cookies(url, login={"username": "your_username", "password": "your_passw
275
256
 
276
257
  ### 更加平滑地移动鼠标, 这样更容易反爬
277
258
  def scroll_smth_steps(driver, scroll_pause=0.5, min_step=200, max_step=600):
259
+ import random
260
+
278
261
  """Smoothly scrolls down the page to trigger lazy loading."""
279
262
  current_scroll_position = 0
280
263
  end_of_page = driver.execute_script("return document.body.scrollHeight")
@@ -383,7 +366,7 @@ def fetch_all(
383
366
  if response.status_code == 403:
384
367
  logger.warning("403 Forbidden error. Retrying...")
385
368
  # Retry the request after a short delay
386
- sleep(random.uniform(1, 3))
369
+ time.sleep(random.uniform(1, 3))
387
370
  response = requests.get(
388
371
  url, headers=headers, proxies=proxies_glob, timeout=30, stream=True
389
372
  )
@@ -410,6 +393,18 @@ def fetch_all(
410
393
  logger.warning("Unsupported content type")
411
394
  return None, None
412
395
  elif "se" in driver.lower():
396
+ import random
397
+ from selenium import webdriver
398
+ from selenium.webdriver.chrome.service import Service
399
+ from selenium.webdriver.common.by import By
400
+ from selenium.webdriver.chrome.options import Options
401
+ from selenium.webdriver.support.ui import WebDriverWait
402
+ from selenium.webdriver.support import expected_conditions as EC
403
+ from webdriver_manager.chrome import ChromeDriverManager
404
+ from selenium.webdriver.common.desired_capabilities import (
405
+ DesiredCapabilities,
406
+ )
407
+
413
408
  chrome_options = Options()
414
409
  chrome_options.add_argument("--headless")
415
410
  chrome_options.add_argument("--no-sandbox")
@@ -501,7 +496,7 @@ def fetch_all(
501
496
  content = BeautifulSoup(page_source, "html.parser")
502
497
  if content and content.find_all(by):
503
498
  break
504
- sleep(
499
+ time.sleep(
505
500
  random.uniform(2, 4)
506
501
  ) # Wait for a random time before polling again
507
502
 
@@ -575,6 +570,8 @@ def fetch_all(
575
570
  # else:
576
571
  # return None
577
572
  def find_links(url, driver="request", booster=False):
573
+ from urllib.parse import urlparse, urljoin
574
+
578
575
  links_href, cond_ex = [], ["javascript:", "mailto:", "tel:", "fax:"]
579
576
  content_type, soup = fetch_all(url, driver=driver)
580
577
 
@@ -615,6 +612,8 @@ def find_links(url, driver="request", booster=False):
615
612
 
616
613
  # To determine which links are related to target domains(e.g., pages) you are interested in
617
614
  def filter_links(links, contains="html", driver="requ", booster=False):
615
+ from urllib.parse import urlparse, urljoin
616
+
618
617
  filtered_links = []
619
618
  if isinstance(contains, str):
620
619
  contains = [contains]
@@ -631,6 +630,9 @@ def filter_links(links, contains="html", driver="requ", booster=False):
631
630
 
632
631
 
633
632
  def find_domain(links):
633
+ from urllib.parse import urlparse, urljoin
634
+ from collections import Counter
635
+
634
636
  if not links:
635
637
  return None
636
638
  domains = [urlparse(link).netloc for link in links]
@@ -685,6 +687,8 @@ def pdf_detector(url, contains=None, dir_save=None, booster=False):
685
687
  pdf_links = filter_links(links=links_all, contains=["pdf"])
686
688
 
687
689
  if pdf_links:
690
+ from pprint import pp
691
+
688
692
  pp(f"pdf detected{pdf_links}")
689
693
  else:
690
694
  print("no pdf file")
@@ -719,6 +723,9 @@ def downloader(
719
723
  n_try=3,
720
724
  timestamp=False,
721
725
  ):
726
+
727
+ from requests.exceptions import ChunkedEncodingError, ConnectionError
728
+
722
729
  if verbose:
723
730
  print(
724
731
  "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
@@ -742,14 +749,14 @@ def downloader(
742
749
  counter_ = str(counter)
743
750
  new_filename = f"{base}_{counter_}{ext}"
744
751
  counter += 1
745
- return new_filename
746
-
752
+ return new_filename
753
+
747
754
  if url.startswith("ftp"):
748
755
  import urllib.request
749
756
 
750
757
  if dir_save is None:
751
- dir_save = "./"
752
- dir_save+= os.path.basename(url)
758
+ dir_save = "./"
759
+ dir_save += os.path.basename(url)
753
760
  print(dir_save)
754
761
  urllib.request.urlretrieve(url, dir_save)
755
762
  print(f"Downloaded file to: {dir_save}")
@@ -807,6 +814,8 @@ def downloader(
807
814
  file_links = filter_links(links_all, contains=kind_)
808
815
  if verbose:
809
816
  if file_links:
817
+ from pprint import pp
818
+
810
819
  print("Files detected:")
811
820
  pp(file_links)
812
821
  else:
@@ -845,6 +854,8 @@ def downloader(
845
854
  dir_save, corrected_fname
846
855
  )
847
856
  if timestamp:
857
+ from datetime import datetime
858
+
848
859
  corrected_fname = (
849
860
  datetime.now().strftime("%y%m%d_%H%M%S_")
850
861
  + corrected_fname
@@ -878,6 +889,8 @@ def downloader(
878
889
 
879
890
  # print(f"\n{len(fnames)} files were downloaded:")
880
891
  if verbose:
892
+ from pprint import pp
893
+
881
894
  if corrected_fname:
882
895
  pp(corrected_fname)
883
896
  print(f"\n\nsaved @:\n{dir_save}")
@@ -896,6 +909,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
896
909
  Returns:
897
910
  str: HTML content with updated image URLs pointing to local files.
898
911
  """
912
+ from urllib.parse import urlparse, urljoin
913
+ import base64
914
+
899
915
  if rm_folder:
900
916
  ips.rm_folder(dir_save)
901
917
  content_type, content = fetch_all(url, driver=driver)
@@ -961,6 +977,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
961
977
 
962
978
 
963
979
  def svg_to_png(svg_file):
980
+ import io
981
+ from PIL import Image
982
+
964
983
  with WandImage(filename=svg_file, resolution=300) as img:
965
984
  img.format = "png"
966
985
  png_image = img.make_blob()
@@ -1026,6 +1045,16 @@ def fetch_selenium(
1026
1045
  iframe_name=None, # Add option to handle iframe
1027
1046
  **kwargs,
1028
1047
  ):
1048
+ import random
1049
+ from selenium import webdriver
1050
+ from selenium.webdriver.chrome.service import Service
1051
+ from selenium.webdriver.common.by import By
1052
+ from selenium.webdriver.chrome.options import Options
1053
+ from selenium.webdriver.support.ui import WebDriverWait
1054
+ from selenium.webdriver.support import expected_conditions as EC
1055
+ from webdriver_manager.chrome import ChromeDriverManager
1056
+ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
1057
+
1029
1058
  chrome_options = Options()
1030
1059
  chrome_options.add_argument("--headless")
1031
1060
  chrome_options.add_argument("--no-sandbox")
@@ -1085,7 +1114,7 @@ def fetch_selenium(
1085
1114
  if attempt == retry - 1:
1086
1115
  logger.error("Failed to fetch the content after all retries")
1087
1116
  return []
1088
- sleep(random.uniform(1, 3))
1117
+ time.sleep(random.uniform(1, 3))
1089
1118
  # Return empty list if nothing found after all retries
1090
1119
  return []
1091
1120
 
@@ -1102,6 +1131,9 @@ def fetch(
1102
1131
  output="text",
1103
1132
  **kws,
1104
1133
  ):
1134
+ import random
1135
+ from urllib.parse import urlparse, urljoin
1136
+
1105
1137
  if "xt" in output.lower():
1106
1138
  for attempt in range(retry):
1107
1139
  if verbose and attempt == 0:
@@ -1127,7 +1159,7 @@ def fetch(
1127
1159
  else:
1128
1160
  if texts:
1129
1161
  break
1130
- sleep(random.uniform(0.5, 1.5))
1162
+ time.sleep(random.uniform(0.5, 1.5))
1131
1163
  if isinstance(texts, pd.core.frame.DataFrame):
1132
1164
  condition_ = [texts.empty, booster]
1133
1165
  else:
@@ -1453,6 +1485,8 @@ def isa(fpath, kind="img"):
1453
1485
 
1454
1486
 
1455
1487
  def is_image(fpath):
1488
+ import mimetypes
1489
+
1456
1490
  mime_type, _ = mimetypes.guess_type(fpath)
1457
1491
  if mime_type and mime_type.startswith("image"):
1458
1492
  return True
@@ -1461,6 +1495,8 @@ def is_image(fpath):
1461
1495
 
1462
1496
 
1463
1497
  def is_document(fpath):
1498
+ import mimetypes
1499
+
1464
1500
  mime_type, _ = mimetypes.guess_type(fpath)
1465
1501
  if mime_type and (
1466
1502
  mime_type.startswith("text/")
@@ -1481,6 +1517,8 @@ def is_document(fpath):
1481
1517
 
1482
1518
 
1483
1519
  def is_zip(fpath):
1520
+ import mimetypes
1521
+
1484
1522
  mime_type, _ = mimetypes.guess_type(fpath)
1485
1523
  if mime_type == "application/zip":
1486
1524
  return True
@@ -1500,6 +1538,8 @@ def search(
1500
1538
  ):
1501
1539
 
1502
1540
  if "te" in kind.lower():
1541
+ from duckduckgo_search import DDGS
1542
+
1503
1543
  results = DDGS().text(query, max_results=limit)
1504
1544
  res = pd.DataFrame(results)
1505
1545
  res.rename(columns={"href": "links"}, inplace=True)
@@ -1517,6 +1557,8 @@ def search(
1517
1557
 
1518
1558
 
1519
1559
  def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
1560
+ from duckduckgo_search import DDGS
1561
+
1520
1562
  def is_in_any(str_candi_short, str_full, ignore_case=True):
1521
1563
  if isinstance(str_candi_short, str):
1522
1564
  str_candi_short = [str_candi_short]
@@ -1545,8 +1587,12 @@ def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
1545
1587
  model_valid = valid_mod_name(model)
1546
1588
  res = DDGS().chat(query, model=model_valid)
1547
1589
  if verbose:
1590
+ from pprint import pp
1591
+
1548
1592
  pp(res)
1549
1593
  if log:
1594
+ from datetime import datetime
1595
+
1550
1596
  dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
1551
1597
  res_ = f"###{dt_str}\n\n>{res}\n"
1552
1598
  os.makedirs(dir_save, exist_ok=True)
py2ls/plot.py CHANGED
@@ -735,7 +735,10 @@ def catplot(data, *args, **kwargs):
735
735
  bx_opt["EdgeColor"] = "none"
736
736
  else:
737
737
  bx_opt["EdgeColor"] = bx_opt["EdgeColor"]
738
-
738
+ if not isinstance(bx_opt["FaceColor"], list):
739
+ bx_opt["FaceColor"]=[bx_opt["FaceColor"]]
740
+ if len(bxp["boxes"])!= len(bx_opt["FaceColor"]) and (len(bx_opt["FaceColor"])==1):
741
+ bx_opt["FaceColor"]=bx_opt["FaceColor"] *len(bxp["boxes"])
739
742
  for patch, color in zip(bxp["boxes"], bx_opt["FaceColor"]):
740
743
  patch.set_facecolor(to_rgba(color, bx_opt["FaceAlpha"]))
741
744
 
@@ -2315,16 +2318,8 @@ def split_legend(ax, n=2, loc=None, title=None, bbox=None, ncol=1, **kwargs):
2315
2318
  return legends
2316
2319
 
2317
2320
 
2318
- def get_colors(
2319
- n: int = 1,
2320
- cmap: str = "auto",
2321
- by: str = "start",
2322
- alpha: float = 1.0,
2323
- output: str = "hue",
2324
- *args,
2325
- **kwargs,
2326
- ):
2327
- return get_color(n, cmap, alpha, output, *args, **kwargs)
2321
+ def get_colors(n: int = 1,cmap: str = "auto",by: str = "start",alpha: float = 1.0,output: str = "hue",*args,**kwargs):
2322
+ return get_color(n=n, cmap=cmap, alpha=alpha, output=output, *args, **kwargs)
2328
2323
 
2329
2324
 
2330
2325
  def get_color(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.2.4.12
3
+ Version: 0.2.4.14
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -132,7 +132,7 @@ Requires-Dist: nltk (>=3.8.1)
132
132
  Requires-Dist: numba (>=0.59.1)
133
133
  Requires-Dist: numcodecs (>=0.13.0)
134
134
  Requires-Dist: numerizer (>=0.2.3)
135
- Requires-Dist: numpy (>=1.26.4)
135
+ Requires-Dist: numpy (>=1.26.4,<2.0.0)
136
136
  Requires-Dist: onnxruntime (>=1.18.1)
137
137
  Requires-Dist: opencv-contrib-python (>=4.10.0.84)
138
138
  Requires-Dist: opencv-python (>=4.10.0.84)
@@ -234,17 +234,17 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
234
234
  py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
235
235
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
236
236
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
237
- py2ls/ips.py,sha256=eXDteBS2ODd4qOjKhEQAgvLWerPXOjBvIe1kHQnI-Ww,265294
238
- py2ls/ml2ls.py,sha256=DPVbitW1Z-YwMXl6DR4ciB-OoCHFMNv5oWnEIi918LA,109898
237
+ py2ls/ips.py,sha256=O2QdLo6-vPbHvWtlVdtMA49LAn2y0CNVM27cxLbqqYA,271496
238
+ py2ls/ml2ls.py,sha256=LovnWDV9ptdWuWwJF5EEdf3sGY4EniGBBNxRJJbzStw,112784
239
239
  py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
240
- py2ls/netfinder.py,sha256=RJFr80tGEJiuwEx99IBOhI5-ZuXnPdWnGUYpF7XCEwI,56426
240
+ py2ls/netfinder.py,sha256=R70NkrnO8LlXjT1y7bf2TN-yE4yOeAYhb0jDBiNp8XA,57536
241
241
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
242
- py2ls/plot.py,sha256=5eoCgyQ7Bi4HyB60nrWdOh0tEJJEkLlFHfxM6ydT9PA,171262
242
+ py2ls/plot.py,sha256=X0R1KK_UTdeJazjnqTqYvP-uWu6wY8szQHyJMsDDz2s,171515
243
243
  py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
244
244
  py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
245
245
  py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
246
246
  py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
247
247
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
248
- py2ls-0.2.4.12.dist-info/METADATA,sha256=mvipE6Wd7de3FX-AU3u2IV9oL3zTQuAbcn8yhLcRA_4,20039
249
- py2ls-0.2.4.12.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
250
- py2ls-0.2.4.12.dist-info/RECORD,,
248
+ py2ls-0.2.4.14.dist-info/METADATA,sha256=SSjNh_FXmxwIF_Xx2fZvSGKZaX997x4sfJxUQckMuGY,20046
249
+ py2ls-0.2.4.14.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
250
+ py2ls-0.2.4.14.dist-info/RECORD,,