py2ls 0.2.4.11__py3-none-any.whl → 0.2.4.13__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
py2ls/ips.py CHANGED
@@ -6289,6 +6289,12 @@ def df_reducer(
6289
6289
  hue: str = None, # lda-specific
6290
6290
  scale: bool = True,
6291
6291
  fill_missing: bool = True,
6292
+ size=2,# for plot marker size
6293
+ markerscale=4,# for plot, legend marker size scale
6294
+ edgecolor='none',# for plot,
6295
+ legend_loc='best',# for plot,
6296
+ bbox_to_anchor=None,
6297
+ ncols=1,
6292
6298
  debug: bool = False,
6293
6299
  inplace: bool = True, # replace the oringinal data
6294
6300
  plot_: bool = False, # plot scatterplot, but no 'hue',so it is meaningless
@@ -6312,6 +6318,13 @@ def df_reducer(
6312
6318
  "ica": "ica(Independent Component Analysis):\n\tEffective for blind source separation (e.g., EEG, audio signal processing).is generally categorized under Non-linear Dimensionality Reduction, but it also serves a distinct role in Blind Source Separation. While ICA is commonly used for dimensionality reduction, particularly in contexts where data sources need to be disentangled (e.g., separating mixed signals like EEG or audio data), it focuses on finding statistically independent components rather than maximizing variance (like PCA) or preserving distances (like MDS or UMAP). Advantage: Extracts independent signals/components, useful in mixed signal scenarios. Limitation: Assumes statistical independence, sensitive to noise and algorithm choice.",
6313
6319
  #! Anomaly Detection: Specialized for detecting outliers or unusual patterns
6314
6320
  "isolation_forest": "Isolation Forest:\n\tDesigned for anomaly detection, especially in high-dimensional data. Advantage: Effective in detecting outliers, efficient for large datasets. Limitation: Sensitive to contamination ratio parameter, not ideal for highly structured or non-anomalous data.",
6321
+ #! more methods
6322
+ "truncated_svd": "Truncated Singular Value Decomposition (SVD):\n\tEfficient for large sparse datasets, useful for feature reduction in natural language processing (e.g., Latent Semantic Analysis). Advantage: Efficient in memory usage for large datasets. Limitation: Limited in non-linear transformation.",
6323
+ "spectral_embedding": "Spectral Embedding:\n\tBased on graph theory, it can be useful for clustering and visualization, especially for data with connected structures. Advantage: Preserves global structure, good for graph-type data. Limitation: Sensitive to parameter choice, not ideal for arbitrary non-connected data.",
6324
+ "autoencoder": "Autoencoder:\n\tA neural network-based approach for complex feature learning and non-linear dimensionality reduction. Advantage: Can capture very complex relationships. Limitation: Computationally expensive, requires neural network expertise for effective tuning.",
6325
+ "nmf": "Non-negative Matrix Factorization:\n\tEffective for parts-based decomposition, commonly used for sparse and non-negative data, e.g., text data or images. Advantage: Interpretability with non-negativity, efficient with sparse data. Limitation: Less effective for negative or zero-centered data.",
6326
+ "umap_hdbscan": "UMAP + HDBSCAN:\n\tCombination of UMAP for dimensionality reduction and HDBSCAN for density-based clustering, suitable for cluster discovery in high-dimensional data. Advantage: Effective in discovering clusters in embeddings. Limitation: Requires careful tuning of both UMAP and HDBSCAN parameters.",
6327
+ "manifold_learning": "Manifold Learning (Isomap, Hessian LLE, etc.):\n\tMethods designed to capture intrinsic geometrical structure. Advantage: Preserves non-linear relationships in low dimensions. Limitation: Computationally expensive and sensitive to noise."
6315
6328
  }
6316
6329
 
6317
6330
  from sklearn.preprocessing import StandardScaler
@@ -6322,17 +6335,9 @@ def df_reducer(
6322
6335
  import seaborn as sns
6323
6336
  # Check valid method input
6324
6337
  methods = [
6325
- "pca",
6326
- "umap",
6327
- "tsne",
6328
- "factor",
6329
- "isolation_forest",
6330
- "lda",
6331
- "kpca",
6332
- "ica",
6333
- "mds",
6334
- "lle",
6335
- "svd",
6338
+ "pca", "umap", "umap_hdbscan", "tsne", "factor", "isolation_forest","manifold_learning", "lda", "kpca", "ica",
6339
+ "mds", "lle", "svd", "truncated_svd", "spectral_embedding",
6340
+ # "autoencoder","nmf",
6336
6341
  ]
6337
6342
  method = strcmp(method, methods)[0]
6338
6343
  print(f"\nprocessing with using {dict_methods[method]}:")
@@ -6637,12 +6642,131 @@ def df_reducer(
6637
6642
  index=data.index,
6638
6643
  columns=[f"SVD_{i+1}" for i in range(n_components)],
6639
6644
  )
6645
+ colname_met = "SVD_"
6640
6646
  if hue:
6641
6647
  svd_df[hue] = y
6642
6648
  if debug:
6643
6649
  print("Singular Value Decomposition (SVD) completed.")
6650
+ elif method=="truncated_svd":
6651
+ from sklearn.decomposition import TruncatedSVD
6652
+ svd = TruncatedSVD(n_components=n_components, random_state=random_state)
6653
+ X_reduced = svd.fit_transform(X)
6654
+ reduced_df = pd.DataFrame(
6655
+ X_reduced,
6656
+ columns=[f"SVD Component {i+1}" for i in range(n_components)],
6657
+ index=data.index,
6658
+ )
6659
+ colname_met = "SVD Component "
6660
+
6661
+ if debug:
6662
+ print("Truncated SVD completed.")
6663
+ print("Explained Variance Ratio:", svd.explained_variance_ratio_)
6664
+ if hue:
6665
+ reduced_df[hue] = y
6666
+
6667
+ elif method == "spectral_embedding":
6668
+ from sklearn.manifold import SpectralEmbedding
6669
+
6670
+ spectral = SpectralEmbedding(n_components=n_components, random_state=random_state)
6671
+ X_reduced = spectral.fit_transform(X)
6672
+ reduced_df = pd.DataFrame(
6673
+ X_reduced,
6674
+ columns=[f"Dimension_{i+1}" for i in range(n_components)],
6675
+ index=data.index,
6676
+ )
6677
+ colname_met = "Dimension_"
6678
+
6679
+ if debug:
6680
+ print("Spectral Embedding completed.")
6681
+ if hue:
6682
+ reduced_df[hue] = y
6683
+
6684
+ elif method == "autoencoder":
6685
+ from tensorflow.keras.models import Model
6686
+ from tensorflow.keras.layers import Input, Dense
6687
+
6688
+ input_dim = X.shape[1]
6689
+ input_layer = Input(shape=(input_dim,))
6690
+ encoded = Dense(n_components * 2, activation="relu")(input_layer)
6691
+ encoded = Dense(n_components, activation="relu")(encoded)
6692
+ autoencoder = Model(input_layer, encoded)
6693
+ autoencoder.compile(optimizer="adam", loss="mean_squared_error")
6694
+ autoencoder.fit(X, X, epochs=50, batch_size=256, shuffle=True, verbose=0)
6695
+
6696
+ X_reduced = autoencoder.predict(X)
6697
+ reduced_df = pd.DataFrame(
6698
+ X_reduced,
6699
+ columns=[f"Score_{i+1}" for i in range(n_components)],
6700
+ index=data.index,
6701
+ )
6702
+ colname_met = "Score_"
6703
+
6704
+ if debug:
6705
+ print("Autoencoder reduction completed.")
6706
+ if hue:
6707
+ reduced_df[hue] = y
6708
+
6709
+ elif method == "nmf":
6710
+ from sklearn.decomposition import NMF
6711
+
6712
+ nmf = NMF(n_components=n_components, random_state=random_state)
6713
+ X_reduced = nmf.fit_transform(X)
6714
+ reduced_df = pd.DataFrame(
6715
+ X_reduced,
6716
+ columns=[f"NMF_{i+1}" for i in range(n_components)],
6717
+ index=data.index,
6718
+ )
6719
+ colname_met = "NMF_"
6644
6720
 
6645
- # Return reduced data and info as a new DataFrame with the same index
6721
+ if debug:
6722
+ print("Non-negative Matrix Factorization completed.")
6723
+ if hue:
6724
+ reduced_df[hue] = y
6725
+
6726
+ elif method == "umap_hdbscan":
6727
+ import umap
6728
+ import hdbscan
6729
+
6730
+ umap_model = umap.UMAP(
6731
+ n_neighbors=umap_neighbors,
6732
+ min_dist=umap_min_dist,
6733
+ n_components=n_components,
6734
+ )
6735
+ X_umap = umap_model.fit_transform(X)
6736
+
6737
+ clusterer = hdbscan.HDBSCAN()
6738
+ clusters = clusterer.fit_predict(X_umap)
6739
+
6740
+ reduced_df = pd.DataFrame(
6741
+ X_umap,
6742
+ columns=[f"UMAP_{i+1}" for i in range(n_components)],
6743
+ index=data.index,
6744
+ )
6745
+ reduced_df["Cluster"] = clusters
6746
+ colname_met = "UMAP_"
6747
+ if debug:
6748
+ print("UMAP + HDBSCAN reduction and clustering completed.")
6749
+ if hue:
6750
+ reduced_df[hue] = y
6751
+
6752
+ elif method == "manifold_learning":
6753
+ from sklearn.manifold import Isomap
6754
+
6755
+ isomap = Isomap(n_components=n_components)
6756
+ X_reduced = isomap.fit_transform(X)
6757
+ reduced_df = pd.DataFrame(
6758
+ X_reduced,
6759
+ columns=[f"Manifold_{i+1}" for i in range(n_components)],
6760
+ index=data.index,
6761
+ )
6762
+ colname_met = "Manifold_"
6763
+
6764
+ if debug:
6765
+ print("Manifold Learning (Isomap) completed.")
6766
+ if hue:
6767
+ reduced_df[hue] = y
6768
+
6769
+ #! Return reduced data and info as a new DataFrame with the same index
6646
6770
  if method == "pca":
6647
6771
  reduced_df = pca_df
6648
6772
  colname_met = "PC_"
@@ -6699,7 +6823,6 @@ def df_reducer(
6699
6823
  # Quick plots
6700
6824
  if plot_ and (not method in ["isolation_forest"]):
6701
6825
  from .plot import plotxy
6702
-
6703
6826
  if ax is None:
6704
6827
  if figsize is None:
6705
6828
  _, ax = plt.subplots(figsize=cm2inch(8, 8))
@@ -6707,16 +6830,18 @@ def df_reducer(
6707
6830
  _, ax = plt.subplots(figsize=figsize)
6708
6831
  else:
6709
6832
  ax = ax.cla()
6833
+ xlabel = f"{colname_met}1" if xlabel is None else xlabel
6834
+ ylabel = f"{colname_met}2" if ylabel is None else ylabel
6710
6835
  ax = plotxy(
6711
6836
  data=reduced_df,
6712
6837
  x=colname_met + "1",
6713
6838
  y=colname_met + "2",
6714
6839
  hue=hue,
6715
- s=1,
6716
- edgecolor="none",
6840
+ s=size,
6841
+ edgecolor=edgecolor,
6717
6842
  kind="scater",
6718
6843
  figsets=dict(
6719
- legend=dict(loc="best", markerscale=4),
6844
+ legend=dict(loc=legend_loc, markerscale=markerscale,bbox_to_anchor=bbox_to_anchor,ncols=ncols,fontsize=8),
6720
6845
  xlabel=xlabel if xlabel else None,
6721
6846
  ylabel=ylabel if ylabel else None,
6722
6847
  ),
py2ls/ml2ls.py CHANGED
@@ -1298,10 +1298,11 @@ def plot_validate_features_single(res_val, figsize=None):
1298
1298
  mean_auc = res_val["roc_curve"][model_name]["auc"]
1299
1299
 
1300
1300
  # Plotting
1301
- plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci, ax=nexttile())
1301
+ plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci,
1302
+ model_name=model_name, ax=nexttile())
1302
1303
  plot.figsets(title=model_name, sp=2)
1303
1304
 
1304
- plot_pr_curve(
1305
+ plot_pr_binary(
1305
1306
  recall=res_val["pr_curve"][model_name]["recall"],
1306
1307
  precision=res_val["pr_curve"][model_name]["precision"],
1307
1308
  avg_precision=res_val["pr_curve"][model_name]["avg_precision"],
@@ -1410,7 +1411,6 @@ def plot_roc_curve(
1410
1411
  # ml2ls.plot_roc_curve(fpr, tpr, mean_auc, lower_ci, upper_ci)
1411
1412
  # figsets(title=model_name)
1412
1413
 
1413
-
1414
1414
  def plot_pr_curve(
1415
1415
  recall=None,
1416
1416
  precision=None,
@@ -1436,7 +1436,7 @@ def plot_pr_curve(
1436
1436
  precision,
1437
1437
  lw=lw,
1438
1438
  color=color,
1439
- label=(f"{model_name} (AUC={avg_precision:.2f})"),
1439
+ label=(f"{model_name} (AP={avg_precision:.2f})"),
1440
1440
  clip_on=False,
1441
1441
  **kwargs,
1442
1442
  )
@@ -1453,7 +1453,6 @@ def plot_pr_curve(
1453
1453
  ax.legend(loc=legend_loc)
1454
1454
  return ax
1455
1455
 
1456
-
1457
1456
  # * usage: ml2ls.plot_pr_curve()
1458
1457
  # for md_name in flatten(validation_results["pr_curve"].keys()):
1459
1458
  # ml2ls.plot_pr_curve(
@@ -1466,7 +1465,91 @@ def plot_pr_curve(
1466
1465
  # color="r",
1467
1466
  # )
1468
1467
 
1468
+ def plot_pr_binary(
1469
+ recall=None,
1470
+ precision=None,
1471
+ avg_precision=None,
1472
+ model_name=None,
1473
+ lw=2,
1474
+ figsize=[5, 5],
1475
+ title="Precision-Recall Curve",
1476
+ xlabel="Recall",
1477
+ ylabel="Precision",
1478
+ alpha=0.1,
1479
+ color="#FF8F00",
1480
+ legend_loc="lower left",
1481
+ ax=None,
1482
+ show_avg_precision=False,
1483
+ **kwargs,
1484
+ ):
1485
+ from scipy.interpolate import interp1d
1486
+ if ax is None:
1487
+ fig, ax = plt.subplots(figsize=figsize)
1488
+ model_name = "Binary PR Curve" if model_name is None else model_name
1489
+
1490
+ #* use sklearn bulitin function 'PrecisionRecallDisplay'?
1491
+ # from sklearn.metrics import PrecisionRecallDisplay
1492
+ # disp = PrecisionRecallDisplay(precision=precision,
1493
+ # recall=recall,
1494
+ # average_precision=avg_precision,**kwargs)
1495
+ # disp.plot(ax=ax, name=model_name, color=color)
1496
+
1497
+ # Plot Precision-Recall curve
1498
+ ax.plot(
1499
+ recall,
1500
+ precision,
1501
+ lw=lw,
1502
+ color=color,
1503
+ label=(f"{model_name} (AP={avg_precision:.2f})"),
1504
+ clip_on=False,
1505
+ **kwargs,
1506
+ )
1469
1507
 
1508
+ # Fill area under the curve
1509
+ ax.fill_between(recall, precision, alpha=alpha, color=color)
1510
+ # Add F1 score iso-contours
1511
+ f_scores = np.linspace(0.2, 0.8, num=4)
1512
+ # for f_score in f_scores:
1513
+ # x = np.linspace(0.01, 1)
1514
+ # y = f_score * x / (2 * x - f_score)
1515
+ # plt.plot(x[y >= 0], y[y >= 0], color="gray", alpha=1)
1516
+ # plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y[45] + 0.02))
1517
+
1518
+ pr_boundary = interp1d(recall, precision, kind="linear", fill_value="extrapolate")
1519
+ for f_score in f_scores:
1520
+ x_vals = np.linspace(0.01, 1, 10000)
1521
+ y_vals = f_score * x_vals / (2 * x_vals - f_score)
1522
+ y_vals_clipped = np.minimum(y_vals, pr_boundary(x_vals))
1523
+ y_vals_clipped = np.clip(y_vals_clipped, 1e-3, None) # Prevent going to zero
1524
+ valid = y_vals_clipped < pr_boundary(x_vals)
1525
+ valid_ = y_vals_clipped > 1e-3
1526
+ valid = valid&valid_
1527
+ x_vals = x_vals[valid]
1528
+ y_vals_clipped = y_vals_clipped[valid]
1529
+ if len(x_vals) > 0: # Ensure annotation is placed only if line segment exists
1530
+ ax.plot(x_vals, y_vals_clipped, color="gray", alpha=1)
1531
+ plt.annotate(f"$f_1={f_score:0.1f}$", xy=(0.8, y_vals_clipped[-int(len(y_vals_clipped)*0.35)] + 0.02))
1532
+
1533
+
1534
+ # # Plot the average precision line
1535
+ if show_avg_precision:
1536
+ plt.axhline(
1537
+ y=avg_precision,
1538
+ color="red",
1539
+ ls="--",
1540
+ lw=lw,
1541
+ label=f"Avg. precision={avg_precision:.2f}",
1542
+ )
1543
+ # Customize axes
1544
+ ax.set_title(title)
1545
+ ax.set_xlabel(xlabel)
1546
+ ax.set_ylabel(ylabel)
1547
+ ax.set_xlim([-0.01, 1.0])
1548
+ ax.set_ylim([0.0, 1.0])
1549
+ ax.grid(False)
1550
+ ax.legend(loc=legend_loc)
1551
+ return ax
1552
+
1470
1553
  def plot_cm(
1471
1554
  cm,
1472
1555
  labels_name=None,
py2ls/netfinder.py CHANGED
@@ -1,35 +1,9 @@
1
1
  from bs4 import BeautifulSoup
2
2
  import requests
3
- from requests.utils import dict_from_cookiejar
4
- from requests.exceptions import ChunkedEncodingError, ConnectionError
5
3
  import os
6
- from urllib.parse import urlparse, urljoin
7
- import base64
8
4
  import pandas as pd
9
- from collections import Counter
10
- import random
11
5
  import logging
12
- from time import sleep
13
- import stem.process
14
- from stem import Signal
15
- from stem.control import Controller
16
6
  import json
17
- from fake_useragent import UserAgent
18
- from selenium import webdriver
19
- from selenium.webdriver.chrome.service import Service
20
- from selenium.webdriver.common.by import By
21
- from selenium.webdriver.chrome.options import Options
22
- from selenium.webdriver.support.ui import WebDriverWait
23
- from selenium.webdriver.support import expected_conditions as EC
24
- from webdriver_manager.chrome import ChromeDriverManager
25
- from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
26
- from pprint import pp
27
- import mimetypes
28
- import io
29
- import matplotlib.pyplot as plt
30
- from PIL import Image
31
- from duckduckgo_search import DDGS
32
- from datetime import datetime
33
7
  import time
34
8
  from . import ips
35
9
 
@@ -56,6 +30,8 @@ def user_agent(
56
30
  verbose=False,
57
31
  os=["windows", "macos", "linux"],
58
32
  ):
33
+ from fake_useragent import UserAgent
34
+
59
35
  ua = UserAgent(browsers=browsers, platforms=platforms, os=os)
60
36
  output_ua = ua.random
61
37
  if verbose:
@@ -109,6 +85,8 @@ def get_attr(content, where=None, attr=None, **kwargs):
109
85
  else:
110
86
  print(f"The attribute '{attr}' is not found in the elements.")
111
87
  else:
88
+ from pprint import pp
89
+
112
90
  print(f"Cannot find tag '{where}' in the content.")
113
91
  print("Available tags:")
114
92
  pp(all_tags)
@@ -237,6 +215,8 @@ def flatten_json(y):
237
215
 
238
216
 
239
217
  def get_proxy():
218
+ import random
219
+
240
220
  list_ = []
241
221
  headers = {"User-Agent": user_agent()}
242
222
  response = requests.get(
@@ -275,6 +255,8 @@ def get_cookies(url, login={"username": "your_username", "password": "your_passw
275
255
 
276
256
  ### 更加平滑地移动鼠标, 这样更容易反爬
277
257
  def scroll_smth_steps(driver, scroll_pause=0.5, min_step=200, max_step=600):
258
+ import random
259
+
278
260
  """Smoothly scrolls down the page to trigger lazy loading."""
279
261
  current_scroll_position = 0
280
262
  end_of_page = driver.execute_script("return document.body.scrollHeight")
@@ -383,7 +365,7 @@ def fetch_all(
383
365
  if response.status_code == 403:
384
366
  logger.warning("403 Forbidden error. Retrying...")
385
367
  # Retry the request after a short delay
386
- sleep(random.uniform(1, 3))
368
+ time.sleep(random.uniform(1, 3))
387
369
  response = requests.get(
388
370
  url, headers=headers, proxies=proxies_glob, timeout=30, stream=True
389
371
  )
@@ -410,6 +392,18 @@ def fetch_all(
410
392
  logger.warning("Unsupported content type")
411
393
  return None, None
412
394
  elif "se" in driver.lower():
395
+ import random
396
+ from selenium import webdriver
397
+ from selenium.webdriver.chrome.service import Service
398
+ from selenium.webdriver.common.by import By
399
+ from selenium.webdriver.chrome.options import Options
400
+ from selenium.webdriver.support.ui import WebDriverWait
401
+ from selenium.webdriver.support import expected_conditions as EC
402
+ from webdriver_manager.chrome import ChromeDriverManager
403
+ from selenium.webdriver.common.desired_capabilities import (
404
+ DesiredCapabilities,
405
+ )
406
+
413
407
  chrome_options = Options()
414
408
  chrome_options.add_argument("--headless")
415
409
  chrome_options.add_argument("--no-sandbox")
@@ -501,7 +495,7 @@ def fetch_all(
501
495
  content = BeautifulSoup(page_source, "html.parser")
502
496
  if content and content.find_all(by):
503
497
  break
504
- sleep(
498
+ time.sleep(
505
499
  random.uniform(2, 4)
506
500
  ) # Wait for a random time before polling again
507
501
 
@@ -575,6 +569,8 @@ def fetch_all(
575
569
  # else:
576
570
  # return None
577
571
  def find_links(url, driver="request", booster=False):
572
+ from urllib.parse import urlparse, urljoin
573
+
578
574
  links_href, cond_ex = [], ["javascript:", "mailto:", "tel:", "fax:"]
579
575
  content_type, soup = fetch_all(url, driver=driver)
580
576
 
@@ -615,6 +611,8 @@ def find_links(url, driver="request", booster=False):
615
611
 
616
612
  # To determine which links are related to target domains(e.g., pages) you are interested in
617
613
  def filter_links(links, contains="html", driver="requ", booster=False):
614
+ from urllib.parse import urlparse, urljoin
615
+
618
616
  filtered_links = []
619
617
  if isinstance(contains, str):
620
618
  contains = [contains]
@@ -631,6 +629,9 @@ def filter_links(links, contains="html", driver="requ", booster=False):
631
629
 
632
630
 
633
631
  def find_domain(links):
632
+ from urllib.parse import urlparse, urljoin
633
+ from collections import Counter
634
+
634
635
  if not links:
635
636
  return None
636
637
  domains = [urlparse(link).netloc for link in links]
@@ -685,6 +686,8 @@ def pdf_detector(url, contains=None, dir_save=None, booster=False):
685
686
  pdf_links = filter_links(links=links_all, contains=["pdf"])
686
687
 
687
688
  if pdf_links:
689
+ from pprint import pp
690
+
688
691
  pp(f"pdf detected{pdf_links}")
689
692
  else:
690
693
  print("no pdf file")
@@ -719,6 +722,9 @@ def downloader(
719
722
  n_try=3,
720
723
  timestamp=False,
721
724
  ):
725
+
726
+ from requests.exceptions import ChunkedEncodingError, ConnectionError
727
+
722
728
  if verbose:
723
729
  print(
724
730
  "usage: downloader(url, dir_save=None, kind=['.pdf','xls'], contains=None, booster=False)"
@@ -742,14 +748,14 @@ def downloader(
742
748
  counter_ = str(counter)
743
749
  new_filename = f"{base}_{counter_}{ext}"
744
750
  counter += 1
745
- return new_filename
746
-
751
+ return new_filename
752
+
747
753
  if url.startswith("ftp"):
748
754
  import urllib.request
749
755
 
750
756
  if dir_save is None:
751
- dir_save = "./"
752
- dir_save+= os.path.basename(url)
757
+ dir_save = "./"
758
+ dir_save += os.path.basename(url)
753
759
  print(dir_save)
754
760
  urllib.request.urlretrieve(url, dir_save)
755
761
  print(f"Downloaded file to: {dir_save}")
@@ -807,6 +813,8 @@ def downloader(
807
813
  file_links = filter_links(links_all, contains=kind_)
808
814
  if verbose:
809
815
  if file_links:
816
+ from pprint import pp
817
+
810
818
  print("Files detected:")
811
819
  pp(file_links)
812
820
  else:
@@ -845,6 +853,8 @@ def downloader(
845
853
  dir_save, corrected_fname
846
854
  )
847
855
  if timestamp:
856
+ from datetime import datetime
857
+
848
858
  corrected_fname = (
849
859
  datetime.now().strftime("%y%m%d_%H%M%S_")
850
860
  + corrected_fname
@@ -878,6 +888,8 @@ def downloader(
878
888
 
879
889
  # print(f"\n{len(fnames)} files were downloaded:")
880
890
  if verbose:
891
+ from pprint import pp
892
+
881
893
  if corrected_fname:
882
894
  pp(corrected_fname)
883
895
  print(f"\n\nsaved @:\n{dir_save}")
@@ -896,6 +908,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
896
908
  Returns:
897
909
  str: HTML content with updated image URLs pointing to local files.
898
910
  """
911
+ from urllib.parse import urlparse, urljoin
912
+ import base64
913
+
899
914
  if rm_folder:
900
915
  ips.rm_folder(dir_save)
901
916
  content_type, content = fetch_all(url, driver=driver)
@@ -961,6 +976,9 @@ def find_img(url, driver="request", dir_save="images", rm_folder=False, verbose=
961
976
 
962
977
 
963
978
  def svg_to_png(svg_file):
979
+ import io
980
+ from PIL import Image
981
+
964
982
  with WandImage(filename=svg_file, resolution=300) as img:
965
983
  img.format = "png"
966
984
  png_image = img.make_blob()
@@ -1026,6 +1044,16 @@ def fetch_selenium(
1026
1044
  iframe_name=None, # Add option to handle iframe
1027
1045
  **kwargs,
1028
1046
  ):
1047
+ import random
1048
+ from selenium import webdriver
1049
+ from selenium.webdriver.chrome.service import Service
1050
+ from selenium.webdriver.common.by import By
1051
+ from selenium.webdriver.chrome.options import Options
1052
+ from selenium.webdriver.support.ui import WebDriverWait
1053
+ from selenium.webdriver.support import expected_conditions as EC
1054
+ from webdriver_manager.chrome import ChromeDriverManager
1055
+ from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
1056
+
1029
1057
  chrome_options = Options()
1030
1058
  chrome_options.add_argument("--headless")
1031
1059
  chrome_options.add_argument("--no-sandbox")
@@ -1085,7 +1113,7 @@ def fetch_selenium(
1085
1113
  if attempt == retry - 1:
1086
1114
  logger.error("Failed to fetch the content after all retries")
1087
1115
  return []
1088
- sleep(random.uniform(1, 3))
1116
+ time.sleep(random.uniform(1, 3))
1089
1117
  # Return empty list if nothing found after all retries
1090
1118
  return []
1091
1119
 
@@ -1102,6 +1130,9 @@ def fetch(
1102
1130
  output="text",
1103
1131
  **kws,
1104
1132
  ):
1133
+ import random
1134
+ from urllib.parse import urlparse, urljoin
1135
+
1105
1136
  if "xt" in output.lower():
1106
1137
  for attempt in range(retry):
1107
1138
  if verbose and attempt == 0:
@@ -1127,7 +1158,7 @@ def fetch(
1127
1158
  else:
1128
1159
  if texts:
1129
1160
  break
1130
- sleep(random.uniform(0.5, 1.5))
1161
+ time.sleep(random.uniform(0.5, 1.5))
1131
1162
  if isinstance(texts, pd.core.frame.DataFrame):
1132
1163
  condition_ = [texts.empty, booster]
1133
1164
  else:
@@ -1453,6 +1484,8 @@ def isa(fpath, kind="img"):
1453
1484
 
1454
1485
 
1455
1486
  def is_image(fpath):
1487
+ import mimetypes
1488
+
1456
1489
  mime_type, _ = mimetypes.guess_type(fpath)
1457
1490
  if mime_type and mime_type.startswith("image"):
1458
1491
  return True
@@ -1461,6 +1494,8 @@ def is_image(fpath):
1461
1494
 
1462
1495
 
1463
1496
  def is_document(fpath):
1497
+ import mimetypes
1498
+
1464
1499
  mime_type, _ = mimetypes.guess_type(fpath)
1465
1500
  if mime_type and (
1466
1501
  mime_type.startswith("text/")
@@ -1481,6 +1516,8 @@ def is_document(fpath):
1481
1516
 
1482
1517
 
1483
1518
  def is_zip(fpath):
1519
+ import mimetypes
1520
+
1484
1521
  mime_type, _ = mimetypes.guess_type(fpath)
1485
1522
  if mime_type == "application/zip":
1486
1523
  return True
@@ -1500,6 +1537,8 @@ def search(
1500
1537
  ):
1501
1538
 
1502
1539
  if "te" in kind.lower():
1540
+ from duckduckgo_search import DDGS
1541
+
1503
1542
  results = DDGS().text(query, max_results=limit)
1504
1543
  res = pd.DataFrame(results)
1505
1544
  res.rename(columns={"href": "links"}, inplace=True)
@@ -1517,6 +1556,8 @@ def search(
1517
1556
 
1518
1557
 
1519
1558
  def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
1559
+ from duckduckgo_search import DDGS
1560
+
1520
1561
  def is_in_any(str_candi_short, str_full, ignore_case=True):
1521
1562
  if isinstance(str_candi_short, str):
1522
1563
  str_candi_short = [str_candi_short]
@@ -1545,8 +1586,12 @@ def echo(query, model="gpt", verbose=True, log=True, dir_save=dir_save):
1545
1586
  model_valid = valid_mod_name(model)
1546
1587
  res = DDGS().chat(query, model=model_valid)
1547
1588
  if verbose:
1589
+ from pprint import pp
1590
+
1548
1591
  pp(res)
1549
1592
  if log:
1593
+ from datetime import datetime
1594
+
1550
1595
  dt_str = datetime.fromtimestamp(time.time()).strftime("%Y-%m-%d_%H:%M:%S")
1551
1596
  res_ = f"###{dt_str}\n\n>{res}\n"
1552
1597
  os.makedirs(dir_save, exist_ok=True)
py2ls/plot.py CHANGED
@@ -735,7 +735,10 @@ def catplot(data, *args, **kwargs):
735
735
  bx_opt["EdgeColor"] = "none"
736
736
  else:
737
737
  bx_opt["EdgeColor"] = bx_opt["EdgeColor"]
738
-
738
+ if not isinstance(bx_opt["FaceColor"], list):
739
+ bx_opt["FaceColor"]=[bx_opt["FaceColor"]]
740
+ if len(bxp["boxes"])!= len(bx_opt["FaceColor"]) and (len(bx_opt["FaceColor"])==1):
741
+ bx_opt["FaceColor"]=bx_opt["FaceColor"] *len(bxp["boxes"])
739
742
  for patch, color in zip(bxp["boxes"], bx_opt["FaceColor"]):
740
743
  patch.set_facecolor(to_rgba(color, bx_opt["FaceAlpha"]))
741
744
 
@@ -2315,16 +2318,8 @@ def split_legend(ax, n=2, loc=None, title=None, bbox=None, ncol=1, **kwargs):
2315
2318
  return legends
2316
2319
 
2317
2320
 
2318
- def get_colors(
2319
- n: int = 1,
2320
- cmap: str = "auto",
2321
- by: str = "start",
2322
- alpha: float = 1.0,
2323
- output: str = "hue",
2324
- *args,
2325
- **kwargs,
2326
- ):
2327
- return get_color(n, cmap, alpha, output, *args, **kwargs)
2321
+ def get_colors(n: int = 1,cmap: str = "auto",by: str = "start",alpha: float = 1.0,output: str = "hue",*args,**kwargs):
2322
+ return get_color(n=n, cmap=cmap, alpha=alpha, output=output, *args, **kwargs)
2328
2323
 
2329
2324
 
2330
2325
  def get_color(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.2.4.11
3
+ Version: 0.2.4.13
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
@@ -132,7 +132,7 @@ Requires-Dist: nltk (>=3.8.1)
132
132
  Requires-Dist: numba (>=0.59.1)
133
133
  Requires-Dist: numcodecs (>=0.13.0)
134
134
  Requires-Dist: numerizer (>=0.2.3)
135
- Requires-Dist: numpy (>=1.26.4)
135
+ Requires-Dist: numpy (>=1.26.4,<2.0.0)
136
136
  Requires-Dist: onnxruntime (>=1.18.1)
137
137
  Requires-Dist: opencv-contrib-python (>=4.10.0.84)
138
138
  Requires-Dist: opencv-python (>=4.10.0.84)
@@ -234,17 +234,17 @@ py2ls/export_requirements.py,sha256=x2WgUF0jYKz9GfA1MVKN-MdsM-oQ8yUeC6Ua8oCymio,
234
234
  py2ls/fetch_update.py,sha256=9LXj661GpCEFII2wx_99aINYctDiHni6DOruDs_fdt8,4752
235
235
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
236
236
  py2ls/ich2ls.py,sha256=3E9R8oVpyYZXH5PiIQgT3CN5NxLe4Dwtm2LwaeacE6I,21381
237
- py2ls/ips.py,sha256=eXDteBS2ODd4qOjKhEQAgvLWerPXOjBvIe1kHQnI-Ww,265294
238
- py2ls/ml2ls.py,sha256=DPVbitW1Z-YwMXl6DR4ciB-OoCHFMNv5oWnEIi918LA,109898
237
+ py2ls/ips.py,sha256=O2QdLo6-vPbHvWtlVdtMA49LAn2y0CNVM27cxLbqqYA,271496
238
+ py2ls/ml2ls.py,sha256=LovnWDV9ptdWuWwJF5EEdf3sGY4EniGBBNxRJJbzStw,112784
239
239
  py2ls/mol.py,sha256=AZnHzarIk_MjueKdChqn1V6e4tUle3X1NnHSFA6n3Nw,10645
240
- py2ls/netfinder.py,sha256=RJFr80tGEJiuwEx99IBOhI5-ZuXnPdWnGUYpF7XCEwI,56426
240
+ py2ls/netfinder.py,sha256=Di0gnolZ8VLFYsAjiW--KtnVwjbIAHrFLRWzSciSqm8,57492
241
241
  py2ls/ocr.py,sha256=5lhUbJufIKRSOL6wAWVLEo8TqMYSjoI_Q-IO-_4u3DE,31419
242
- py2ls/plot.py,sha256=5eoCgyQ7Bi4HyB60nrWdOh0tEJJEkLlFHfxM6ydT9PA,171262
242
+ py2ls/plot.py,sha256=X0R1KK_UTdeJazjnqTqYvP-uWu6wY8szQHyJMsDDz2s,171515
243
243
  py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
244
244
  py2ls/sleep_events_detectors.py,sha256=bQA3HJqv5qnYKJJEIhCyhlDtkXQfIzqksnD0YRXso68,52145
245
245
  py2ls/stats.py,sha256=qBn2rJmNa_QLLUqjwYqXUlGzqmW94sgA1bxJU2FC3r0,39175
246
246
  py2ls/translator.py,sha256=zBeq4pYZeroqw3DT-5g7uHfVqKd-EQptT6LJ-Adi8JY,34244
247
247
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
248
- py2ls-0.2.4.11.dist-info/METADATA,sha256=8COfj0wL1jk5GxFaHaS1q5HozYpmlDIpoHg0Giz8hTc,20039
249
- py2ls-0.2.4.11.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
250
- py2ls-0.2.4.11.dist-info/RECORD,,
248
+ py2ls-0.2.4.13.dist-info/METADATA,sha256=XdPIn3j8oepWoUeQLRlrrd3UQSXGTQicA-I5v5GPNuk,20046
249
+ py2ls-0.2.4.13.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
250
+ py2ls-0.2.4.13.dist-info/RECORD,,