py2ls 0.1.4.4__py3-none-any.whl → 0.1.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. py2ls/.DS_Store +0 -0
  2. py2ls/.git/COMMIT_EDITMSG +1 -4
  3. py2ls/.git/FETCH_HEAD +1 -1
  4. py2ls/.git/index +0 -0
  5. py2ls/.git/logs/HEAD +5 -0
  6. py2ls/.git/logs/refs/heads/main +5 -0
  7. py2ls/.git/logs/refs/remotes/origin/HEAD +12 -0
  8. py2ls/.git/logs/refs/remotes/origin/main +4 -0
  9. py2ls/.git/objects/1a/b4585881a6a42889f01aa0cfe25fd5acfaf46f +0 -0
  10. py2ls/.git/objects/50/08ddfcf53c02e82d7eee2e57c38e5672ef89f6 +0 -0
  11. py2ls/.git/objects/53/e0deb1cb4c2c606bced6e7f9a66b0fda60980d +0 -0
  12. py2ls/.git/objects/56/e4e8b2d5545e0256090f45aa8fc42c5fe067d0 +0 -0
  13. py2ls/.git/objects/62/7c81b23b4e56e87b042b650b0103653cc9e34a +0 -0
  14. py2ls/.git/objects/62/d90ccf8cbefdc2e4fd475e7c6f4f76e9fdf801 +3 -0
  15. py2ls/.git/objects/6a/52e747a2b349b128d1490d9e896d2323818eb7 +0 -0
  16. py2ls/.git/objects/6c/cebb29b7f3f5b0c889f6dadbf9ff066554587d +0 -0
  17. py2ls/.git/objects/71/36b2074a2754be8b58127d82250e5b37e3c373 +0 -0
  18. py2ls/.git/objects/81/8f26b7bf042269729020cf944fc362d66ba27e +0 -0
  19. py2ls/.git/objects/84/59071b722a255b774a80b27746033f8141ab39 +0 -0
  20. py2ls/.git/objects/8b/84f56978e1de8f2ae82abce5f8b3e182d365cd +0 -0
  21. py2ls/.git/objects/b5/61831c7dce8ea51e7ee6b6fa35745f14d8242d +0 -0
  22. py2ls/.git/objects/c1/20fc812b9ad311c34a3608512d6a9d976bb48e +0 -0
  23. py2ls/.git/objects/d6/9ab1c4aadf279936dd778e8346ba60f74705b6 +0 -0
  24. py2ls/.git/objects/d9/dfa5aee51e92a541b707e8e7baea6f06deff98 +0 -0
  25. py2ls/.git/objects/db/141dbaa93594df2a8156182f361ee4db829359 +0 -0
  26. py2ls/.git/objects/e3/1356f90ea6dd0577b5e0b40b206319adcbf085 +0 -0
  27. py2ls/.git/objects/fa/147e6bb78a2e8db241d231295fd7f1ed061af8 +0 -0
  28. py2ls/.git/refs/heads/main +1 -1
  29. py2ls/.git/refs/remotes/origin/main +1 -1
  30. py2ls/__init__.py +1 -2
  31. py2ls/ips.py +90 -6
  32. py2ls/netfinder.py +18 -10
  33. py2ls/setuptools-70.1.0-py3-none-any.whl +0 -0
  34. {py2ls-0.1.4.4.dist-info → py2ls-0.1.4.6.dist-info}/METADATA +139 -2
  35. {py2ls-0.1.4.4.dist-info → py2ls-0.1.4.6.dist-info}/RECORD +36 -17
  36. py2ls/internet_finder.py +0 -405
  37. py2ls/version.py +0 -1
  38. {py2ls-0.1.4.4.dist-info → py2ls-0.1.4.6.dist-info}/WHEEL +0 -0
py2ls/.DS_Store ADDED
Binary file
py2ls/.git/COMMIT_EDITMSG CHANGED
@@ -1,4 +1 @@
1
- listdir type
2
-
3
- listdir type 大类别和精确类别,
4
- e.g., kind='img', means filtter all kinds of images, .tiff, tif, jpg, png...
1
+ update
py2ls/.git/FETCH_HEAD CHANGED
@@ -1 +1 @@
1
- 6427a4edff08f93d98f511418423f09f2ab90bcd branch 'main' of https://github.com/Jianfengliu0413/py2ls
1
+ c120fc812b9ad311c34a3608512d6a9d976bb48e branch 'main' of https://github.com/Jianfengliu0413/py2ls
py2ls/.git/index CHANGED
Binary file
py2ls/.git/logs/HEAD CHANGED
@@ -8,3 +8,8 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
8
8
  3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
9
9
  48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
10
10
  cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
11
+ 6427a4edff08f93d98f511418423f09f2ab90bcd d9dfa5aee51e92a541b707e8e7baea6f06deff98 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718570098 +0200 commit: imgsets update
12
+ d9dfa5aee51e92a541b707e8e7baea6f06deff98 e31356f90ea6dd0577b5e0b40b206319adcbf085 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718618413 +0200 commit: add thumbnail
13
+ e31356f90ea6dd0577b5e0b40b206319adcbf085 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718619568 +0200 commit: Update ips.py
14
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 627c81b23b4e56e87b042b650b0103653cc9e34a Jianfeng <Jianfeng.Liu0413@gmail.com> 1718698925 +0200 commit: update xample_imgsets
15
+ 627c81b23b4e56e87b042b650b0103653cc9e34a c120fc812b9ad311c34a3608512d6a9d976bb48e Jianfeng <Jianfeng.Liu0413@gmail.com> 1718893141 +0200 commit: update
@@ -8,3 +8,8 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
8
8
  3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
9
9
  48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
10
10
  cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
11
+ 6427a4edff08f93d98f511418423f09f2ab90bcd d9dfa5aee51e92a541b707e8e7baea6f06deff98 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718570098 +0200 commit: imgsets update
12
+ d9dfa5aee51e92a541b707e8e7baea6f06deff98 e31356f90ea6dd0577b5e0b40b206319adcbf085 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718618413 +0200 commit: add thumbnail
13
+ e31356f90ea6dd0577b5e0b40b206319adcbf085 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718619568 +0200 commit: Update ips.py
14
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 627c81b23b4e56e87b042b650b0103653cc9e34a Jianfeng <Jianfeng.Liu0413@gmail.com> 1718698925 +0200 commit: update xample_imgsets
15
+ 627c81b23b4e56e87b042b650b0103653cc9e34a c120fc812b9ad311c34a3608512d6a9d976bb48e Jianfeng <Jianfeng.Liu0413@gmail.com> 1718893141 +0200 commit: update
@@ -24,3 +24,15 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd d9c2403fd166ce791b4e9d0c6792ed8342c71fc
24
24
  48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553371 +0200 remote set-head
25
25
  cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553465 +0200 remote set-head
26
26
  6427a4edff08f93d98f511418423f09f2ab90bcd 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718555183 +0200 remote set-head
27
+ 6427a4edff08f93d98f511418423f09f2ab90bcd 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718570095 +0200 remote set-head
28
+ d9dfa5aee51e92a541b707e8e7baea6f06deff98 d9dfa5aee51e92a541b707e8e7baea6f06deff98 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718570102 +0200 remote set-head
29
+ e31356f90ea6dd0577b5e0b40b206319adcbf085 e31356f90ea6dd0577b5e0b40b206319adcbf085 Jianfeng Liu <macjianfeng@JFLMBP.local> 1718618416 +0200 remote set-head
30
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718619571 +0200 remote set-head
31
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718698084 +0200 remote set-head
32
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718699917 +0200 remote set-head
33
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718701729 +0200 remote set-head
34
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718703544 +0200 remote set-head
35
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718705358 +0200 remote set-head
36
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718707170 +0200 remote set-head
37
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng Liu <macjianfeng@JFLMBP.local> 1718708982 +0200 remote set-head
38
+ c120fc812b9ad311c34a3608512d6a9d976bb48e c120fc812b9ad311c34a3608512d6a9d976bb48e Jianfeng Liu <macjianfeng@JFLMBP.cin.medizin.uni-tuebingen.de> 1718893145 +0200 remote set-head
@@ -8,3 +8,7 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
8
8
  3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526959 +0200 update by push
9
9
  48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553464 +0200 update by push
10
10
  cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555183 +0200 update by push
11
+ 6427a4edff08f93d98f511418423f09f2ab90bcd d9dfa5aee51e92a541b707e8e7baea6f06deff98 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718570102 +0200 update by push
12
+ d9dfa5aee51e92a541b707e8e7baea6f06deff98 e31356f90ea6dd0577b5e0b40b206319adcbf085 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718618415 +0200 update by push
13
+ e31356f90ea6dd0577b5e0b40b206319adcbf085 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718619570 +0200 update by push
14
+ 1ab4585881a6a42889f01aa0cfe25fd5acfaf46f c120fc812b9ad311c34a3608512d6a9d976bb48e Jianfeng <Jianfeng.Liu0413@gmail.com> 1718893144 +0200 update by push
@@ -0,0 +1,3 @@
1
+ xM��
2
+ �0 ���S�]�����tMY�Kk���s��\��w~\�n��a�Y9����DP�\a�����$�p�8�,
3
+ ���#[Ԅ��2fJ�Y��݄�I#���RcGb����~�k"*���Z�'��/Y�n]-��v�)�6{m���� ]�J�
@@ -1 +1 @@
1
- 6427a4edff08f93d98f511418423f09f2ab90bcd
1
+ c120fc812b9ad311c34a3608512d6a9d976bb48e
@@ -1 +1 @@
1
- 6427a4edff08f93d98f511418423f09f2ab90bcd
1
+ c120fc812b9ad311c34a3608512d6a9d976bb48e
py2ls/__init__.py CHANGED
@@ -7,11 +7,10 @@ from . import (
7
7
  correlators,
8
8
  dbhandler,
9
9
  freqanalysis,
10
- internet_finder,
11
10
  ips,
12
11
  netfinder,
13
12
  sleep_events_detectors,
14
13
  translator,
15
- version,
16
14
  wb_detector,
17
15
  )
16
+ from .ips import *
py2ls/ips.py CHANGED
@@ -3,9 +3,8 @@ import numpy as np
3
3
  import pandas as pd
4
4
  import json
5
5
  import matplotlib.pyplot as plt
6
- # from functools import partial
7
6
  import seaborn as sns
8
- import scienceplots
7
+ # import scienceplots
9
8
  import matplotlib
10
9
  import sys
11
10
  import os
@@ -15,7 +14,7 @@ from scipy import stats
15
14
  import matplotlib.ticker as tck
16
15
  from cycler import cycler
17
16
  import re
18
- from PIL import ImageEnhance, ImageOps,ImageFilter
17
+ from PIL import Image,ImageEnhance, ImageOps,ImageFilter
19
18
  from rembg import remove,new_session
20
19
  from mpl_toolkits.mplot3d import Axes3D
21
20
  import docx
@@ -37,17 +36,39 @@ from box import Box, BoxList
37
36
  from numerizer import numerize
38
37
  from tqdm import tqdm
39
38
  import mimetypes
39
+ from pprint import pp
40
+ from dateutil import parser
41
+ from datetime import datetime
42
+
43
+ def str2date(date_str, fmt='%Y-%m-%d_%H:%M:%S'):
44
+ """
45
+ Convert a date string into the specified format.
46
+ Parameters:
47
+ - date_str (str): The date string to be converted.
48
+ - fmt (str): The format to convert the date to. Defaults to '%Y%m%d'.
49
+ Returns:
50
+ - str: The converted date string.
51
+ """
52
+ try:
53
+ date_obj = parser.parse(date_str)
54
+ except ValueError as e:
55
+ raise ValueError(f"Unable to parse date string: {date_str}. Error: {e}")
56
+ # Format the date object to the desired output format
57
+ formatted_date = date_obj.strftime(fmt)
58
+ return formatted_date
59
+ # str1=str2date(num2str(20240625),fmt="%a %d-%B-%Y")
60
+ # print(str1)
61
+ # str2=str2num(str2date(str1,fmt='%a %Y%m%d'))
62
+ # print(str2)
40
63
 
41
64
  def str2num(s, *args):
42
65
  delimiter = None
43
66
  round_digits = None
44
-
45
67
  for arg in args:
46
68
  if isinstance(arg, str):
47
69
  delimiter = arg
48
70
  elif isinstance(arg, int):
49
71
  round_digits = arg
50
-
51
72
  try:
52
73
  num = int(s)
53
74
  except ValueError:
@@ -2107,7 +2128,6 @@ def load_img(fpath):
2107
2128
  FileNotFoundError: If the specified file is not found.
2108
2129
  OSError: If the specified file cannot be opened or is not a valid image file.
2109
2130
  """
2110
- from PIL import Image
2111
2131
 
2112
2132
  try:
2113
2133
  img = Image.open(fpath)
@@ -2188,6 +2208,7 @@ def apply_filter(img, *args):
2188
2208
  raise ValueError(
2189
2209
  f"Unsupported filter: {filter_name}, should be one of: {supported_filters}"
2190
2210
  )
2211
+
2191
2212
  for arg in args:
2192
2213
  if isinstance(arg, str):
2193
2214
  filter_name = arg
@@ -2286,6 +2307,29 @@ def imgsets(
2286
2307
  Note:
2287
2308
  The "color" and "enhance" enhancements are not implemented in this function.
2288
2309
  """
2310
+ supported_filters = [
2311
+ "BLUR",
2312
+ "CONTOUR",
2313
+ "DETAIL",
2314
+ "EDGE_ENHANCE",
2315
+ "EDGE_ENHANCE_MORE",
2316
+ "EMBOSS",
2317
+ "FIND_EDGES",
2318
+ "SHARPEN",
2319
+ "SMOOTH",
2320
+ "SMOOTH_MORE",
2321
+ "MIN_FILTER",
2322
+ "MAX_FILTER",
2323
+ "MODE_FILTER",
2324
+ "MULTIBAND_FILTER",
2325
+ "GAUSSIAN_BLUR",
2326
+ "BOX_BLUR",
2327
+ "MEDIAN_FILTER",
2328
+ ]
2329
+ print("sets: a dict,'sharp:1.2','color','contrast:'auto' or 1.2','bright', 'crop: x_upperleft,y_upperleft, x_lowerright, y_lowerright','rotation','resize','rem or background'")
2330
+ print(f"usage: filter_kws 'dict' below:")
2331
+ pp([str(i).lower() for i in supported_filters])
2332
+ print("\nlog:\n")
2289
2333
  def confirm_rembg_models(model_name):
2290
2334
  models_support = [
2291
2335
  "u2net",
@@ -2734,6 +2778,46 @@ def figsets(*args):
2734
2778
  plt.tight_layout()
2735
2779
  plt.gcf().align_labels()
2736
2780
 
2781
+ def thumbnail(dir_img_list,figsize=(10,10),dpi=100, dir_save=None, kind='.png'):
2782
+ """
2783
+ Display a thumbnail figure of all images in the specified directory.
2784
+ Args:
2785
+ dir_img_list (list): List of the Directory containing the images.
2786
+ """
2787
+ num_images = len(dir_img_list)
2788
+ if not kind.startswith('.'):
2789
+ kind='.'+kind
2790
+
2791
+ if num_images == 0:
2792
+ print("No images found to display.")
2793
+ return
2794
+ grid_size = int(num_images ** 0.5) + 1 # Determine grid size
2795
+ fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
2796
+ for ax, image_file in zip(axs.flatten(), dir_img_list):
2797
+ try:
2798
+ img = Image.open(image_file)
2799
+ ax.imshow(img)
2800
+ ax.axis('off')
2801
+ except:
2802
+ continue
2803
+ # for ax in axs.flatten():
2804
+ # ax.axis('off')
2805
+ [ax.axis("off") for ax in axs.flatten()]
2806
+ plt.tight_layout()
2807
+ if dir_save is None:
2808
+ plt.show()
2809
+ else:
2810
+ if basename(dir_save):
2811
+ fname= basename(dir_save) +kind
2812
+ else:
2813
+ fname= "_thumbnail_"+basename(dirname(dir_save)[:-1])+'.png'
2814
+ if dirname(dir_img_list[0]) == dirname(dir_save):
2815
+ figsave(dirname(dir_save[:-1]),fname)
2816
+ else:
2817
+ figsave(dirname(dir_save),fname)
2818
+ # usage:
2819
+ # fpath = "/Users/macjianfeng/Dropbox/github/python/py2ls/tests/xample_netfinder/images/"
2820
+ # thumbnail(listdir(fpath,'png').fpath.to_list(),dir_save=dirname(fpath))
2737
2821
  def read_mplstyle(style_file):
2738
2822
  # Load the style file
2739
2823
  plt.style.use(style_file)
py2ls/netfinder.py CHANGED
@@ -21,8 +21,11 @@ from selenium.webdriver.support.ui import WebDriverWait
21
21
  from selenium.webdriver.support import expected_conditions as EC
22
22
  from webdriver_manager.chrome import ChromeDriverManager
23
23
  from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
24
- import pprint
24
+ from pprint import pp
25
25
  import mimetypes
26
+ import io
27
+ import matplotlib.pyplot as plt
28
+ from PIL import Image
26
29
 
27
30
  # Set up logging
28
31
  logging.basicConfig(level=logging.INFO)
@@ -313,7 +316,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
313
316
  pdf_links = filter_links(links=links_all, contains=["pdf"])
314
317
 
315
318
  if pdf_links:
316
- pprint.pp(f"pdf detected\n{pdf_links}")
319
+ pp(f"pdf detected{pdf_links}")
317
320
  else:
318
321
  print('no pdf file')
319
322
  if dir_save:
@@ -399,9 +402,13 @@ def find_img(url, dir_save="images", verbose=True):
399
402
  print(f"Failed to process image {image_url}: {e}")
400
403
  print(f"images were saved at\n{dir_save}")
401
404
  if verbose:
402
- display_thumbnail_figure(flist(dir_save,filter='img'),dpi=200)
405
+ display_thumbnail_figure(flist(dir_save,filter='img'),dpi=100)
403
406
  return content
404
-
407
+ def svg_to_png(svg_file):
408
+ with WandImage(filename=svg_file, resolution=300) as img:
409
+ img.format = 'png'
410
+ png_image = img.make_blob()
411
+ return Image.open(io.BytesIO(png_image))
405
412
  def display_thumbnail_figure(dir_img_list,figsize=(10,10),dpi=100):
406
413
  import matplotlib.pyplot as plt
407
414
  from PIL import Image
@@ -422,13 +429,14 @@ def display_thumbnail_figure(dir_img_list,figsize=(10,10),dpi=100):
422
429
  fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
423
430
 
424
431
  for ax, image_file in zip(axs.flatten(), dir_img_list):
425
- img = Image.open(image_file)
426
- ax.imshow(img)
427
- ax.axis('off') # Hide axes
428
-
432
+ try:
433
+ img = Image.open(image_file)
434
+ ax.imshow(img)
435
+ ax.axis('off') # Hide axes
436
+ except:
437
+ continue
429
438
  # Hide remaining subplots
430
- for ax in axs.flatten()[num_images:]:
431
- ax.axis('off')
439
+ [ax.axis("off") for ax in axs.flatten()]
432
440
 
433
441
  plt.tight_layout()
434
442
  plt.show()
Binary file
@@ -1,12 +1,149 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: py2ls
3
- Version: 0.1.4.4
3
+ Version: 0.1.4.6
4
4
  Summary: py(thon)2(too)ls
5
5
  Author: Jianfeng
6
6
  Author-email: Jianfeng.Liu0413@gmail.com
7
- Requires-Python: >=3.12,<4.0
7
+ Requires-Python: >=3.10,<4.0
8
8
  Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.10
10
+ Classifier: Programming Language :: Python :: 3.11
9
11
  Classifier: Programming Language :: Python :: 3.12
12
+ Requires-Dist: Deprecated (>=1.2.14,<2.0.0)
13
+ Requires-Dist: Jinja2 (>=3.1.4,<4.0.0)
14
+ Requires-Dist: MarkupSafe (>=2.1.5,<3.0.0)
15
+ Requires-Dist: PyMatting (>=1.1.12,<2.0.0)
16
+ Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
17
+ Requires-Dist: PySocks (>=1.7.1,<2.0.0)
18
+ Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
19
+ Requires-Dist: Pygments (>=2.18.0,<3.0.0)
20
+ Requires-Dist: SciencePlots (>=2.1.1,<3.0.0)
21
+ Requires-Dist: appnope (>=0.1.4,<0.2.0)
22
+ Requires-Dist: appscript (>=1.2.5,<2.0.0)
23
+ Requires-Dist: asttokens (>=2.4.1,<3.0.0)
24
+ Requires-Dist: attrs (>=23.2.0,<24.0.0)
25
+ Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
26
+ Requires-Dist: certifi (>=2024.6.2,<2025.0.0)
27
+ Requires-Dist: chardet (>=3.0.4,<4.0.0)
28
+ Requires-Dist: charset-normalizer (>=3.3.2,<4.0.0)
29
+ Requires-Dist: click (>=8.1.7,<9.0.0)
30
+ Requires-Dist: colorcet (>=3.1.0,<4.0.0)
31
+ Requires-Dist: coloredlogs (>=15.0.1,<16.0.0)
32
+ Requires-Dist: comm (>=0.2.2,<0.3.0)
33
+ Requires-Dist: contourpy (>=1.2.1,<2.0.0)
34
+ Requires-Dist: cycler (>=0.12.1,<0.13.0)
35
+ Requires-Dist: debugpy (>=1.8.1,<2.0.0)
36
+ Requires-Dist: decorator (>=5.1.1,<6.0.0)
37
+ Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
38
+ Requires-Dist: docx (>=0.2.4,<0.3.0)
39
+ Requires-Dist: docx2pdf (>=0.1.8,<0.2.0)
40
+ Requires-Dist: executing (>=2.0.1,<3.0.0)
41
+ Requires-Dist: fake-useragent (>=1.5.1,<2.0.0)
42
+ Requires-Dist: flatbuffers (>=24.3.25,<25.0.0)
43
+ Requires-Dist: fonttools (>=4.53.0,<5.0.0)
44
+ Requires-Dist: fpdf (>=1.7.2,<2.0.0)
45
+ Requires-Dist: googletrans (>=4.0.0rc1,<5.0.0)
46
+ Requires-Dist: h11 (>=0.9.0,<0.10.0)
47
+ Requires-Dist: h2 (>=3.2.0,<4.0.0)
48
+ Requires-Dist: hpack (>=3.0.0,<4.0.0)
49
+ Requires-Dist: hstspreload (>=2024.6.1,<2025.0.0)
50
+ Requires-Dist: httpcore (>=0.9.1,<0.10.0)
51
+ Requires-Dist: httpx (>=0.13.3,<0.14.0)
52
+ Requires-Dist: humanfriendly (>=10.0,<11.0)
53
+ Requires-Dist: hyperframe (>=5.2.0,<6.0.0)
54
+ Requires-Dist: idna (>=2.10,<3.0)
55
+ Requires-Dist: imageio (>=2.34.1,<3.0.0)
56
+ Requires-Dist: img2pdf (>=0.5.1,<0.6.0)
57
+ Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
58
+ Requires-Dist: ipython (>=8.25.0,<9.0.0) ; python_version >= "3.9" and python_version < "4.0"
59
+ Requires-Dist: jedi (>=0.19.1,<0.20.0)
60
+ Requires-Dist: joblib (>=1.3.2,<2.0.0)
61
+ Requires-Dist: jsonschema (>=4.22.0,<5.0.0)
62
+ Requires-Dist: jsonschema-specifications (>=2023.12.1,<2024.0.0)
63
+ Requires-Dist: jupyter_client (>=8.6.2,<9.0.0)
64
+ Requires-Dist: jupyter_core (>=5.7.2,<6.0.0)
65
+ Requires-Dist: kiwisolver (>=1.4.5,<2.0.0)
66
+ Requires-Dist: langdetect (>=1.0.9,<2.0.0)
67
+ Requires-Dist: lazy_loader (>=0.4,<0.5)
68
+ Requires-Dist: libretranslatepy (>=2.1.1,<3.0.0)
69
+ Requires-Dist: llvmlite (>=0.42.0,<0.43.0)
70
+ Requires-Dist: lxml (>=4.9.4,<5.0.0)
71
+ Requires-Dist: matplotlib (>=3.8.4,<4.0.0)
72
+ Requires-Dist: matplotlib-inline (>=0.1.7,<0.2.0)
73
+ Requires-Dist: mne (>=1.6.0,<2.0.0)
74
+ Requires-Dist: mpmath (>=1.3.0,<2.0.0)
75
+ Requires-Dist: nest-asyncio (>=1.6.0,<2.0.0)
76
+ Requires-Dist: networkx (>=3.3,<4.0) ; python_version >= "3.10" and python_version < "4.0"
77
+ Requires-Dist: nltk (>=3.8.1,<4.0.0)
78
+ Requires-Dist: numba (>=0.59.1,<0.60.0)
79
+ Requires-Dist: numerizer (>=0.2.3,<0.3.0)
80
+ Requires-Dist: numpy (>=1.26.4,<2.0.0)
81
+ Requires-Dist: onnxruntime (>=1.18.0,<2.0.0)
82
+ Requires-Dist: opencv-contrib-python (>=4.9.0.80,<5.0.0.0)
83
+ Requires-Dist: opencv-python (>=4.9.0.80,<5.0.0.0)
84
+ Requires-Dist: opencv-python-headless (>=4.9.0.80,<5.0.0.0)
85
+ Requires-Dist: outcome (>=1.3.0.post0,<2.0.0)
86
+ Requires-Dist: packaging (>=24.1,<25.0)
87
+ Requires-Dist: pandas (>=2.2.2,<3.0.0)
88
+ Requires-Dist: pandas-flavor (>=0.6.0,<0.7.0)
89
+ Requires-Dist: parso (>=0.8.4,<0.9.0)
90
+ Requires-Dist: patsy (>=0.5.6,<0.6.0)
91
+ Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
92
+ Requires-Dist: pdf2img (>=0.1.2,<0.2.0)
93
+ Requires-Dist: pexpect (>=4.9.0,<5.0.0)
94
+ Requires-Dist: pikepdf (>=9.0.0,<10.0.0)
95
+ Requires-Dist: pillow (>=10.3.0,<11.0.0)
96
+ Requires-Dist: pingouin (>=0.5.4,<0.6.0)
97
+ Requires-Dist: platformdirs (>=4.2.2,<5.0.0)
98
+ Requires-Dist: pooch (>=1.8.2,<2.0.0)
99
+ Requires-Dist: prompt_toolkit (>=3.0.47,<4.0.0)
100
+ Requires-Dist: protobuf (>=5.27.1,<6.0.0)
101
+ Requires-Dist: psutil (>=5.9.8,<6.0.0)
102
+ Requires-Dist: ptyprocess (>=0.7.0,<0.8.0)
103
+ Requires-Dist: pure-eval (>=0.2.2,<0.3.0)
104
+ Requires-Dist: pyparsing (>=3.1.2,<4.0.0)
105
+ Requires-Dist: python-box (>=7.2.0,<8.0.0)
106
+ Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
107
+ Requires-Dist: python-docx (>=1.1.0,<2.0.0)
108
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
109
+ Requires-Dist: pytz (>=2024.1,<2025.0)
110
+ Requires-Dist: pyzmq (>=26.0.3,<27.0.0)
111
+ Requires-Dist: referencing (>=0.35.1,<0.36.0)
112
+ Requires-Dist: regex (>=2024.5.15,<2025.0.0)
113
+ Requires-Dist: rembg (>=2.0.56,<3.0.0) ; python_version >= "3.9" and python_version < "3.13"
114
+ Requires-Dist: requests (>=2.32.3,<3.0.0)
115
+ Requires-Dist: rfc3986 (>=1.5.0,<2.0.0)
116
+ Requires-Dist: rpds-py (>=0.18.1,<0.19.0)
117
+ Requires-Dist: scikit-image (>=0.23.2,<0.24.0) ; python_version >= "3.10" and python_version < "4.0"
118
+ Requires-Dist: scikit-learn (>=1.3.2,<2.0.0)
119
+ Requires-Dist: scipy (>=1.13.1,<2.0.0)
120
+ Requires-Dist: seaborn (>=0.13.2,<0.14.0)
121
+ Requires-Dist: selenium (>=4.21.0,<5.0.0)
122
+ Requires-Dist: six (>=1.16.0,<2.0.0)
123
+ Requires-Dist: sniffio (>=1.3.1,<2.0.0)
124
+ Requires-Dist: sortedcontainers (>=2.4.0,<3.0.0)
125
+ Requires-Dist: soupsieve (>=2.5,<3.0)
126
+ Requires-Dist: stack-data (>=0.6.3,<0.7.0)
127
+ Requires-Dist: statsmodels (>=0.14.1,<0.15.0)
128
+ Requires-Dist: stem (>=1.8.2,<2.0.0)
129
+ Requires-Dist: sympy (>=1.12.1,<2.0.0)
130
+ Requires-Dist: tabulate (>=0.9.0,<0.10.0)
131
+ Requires-Dist: threadpoolctl (>=3.5.0,<4.0.0)
132
+ Requires-Dist: tifffile (>=2024.5.22,<2025.0.0)
133
+ Requires-Dist: tornado (>=6.4.1,<7.0.0)
134
+ Requires-Dist: tqdm (>=4.66.4,<5.0.0)
135
+ Requires-Dist: traitlets (>=5.14.3,<6.0.0)
136
+ Requires-Dist: translate (>=3.6.1,<4.0.0)
137
+ Requires-Dist: trio (>=0.25.1,<0.26.0)
138
+ Requires-Dist: trio-websocket (>=0.11.1,<0.12.0)
139
+ Requires-Dist: typing_extensions (>=4.12.2,<5.0.0)
140
+ Requires-Dist: tzdata (>=2024.1,<2025.0)
141
+ Requires-Dist: urllib3 (>=2.2.1,<3.0.0)
142
+ Requires-Dist: wcwidth (>=0.2.13,<0.3.0)
143
+ Requires-Dist: webdriver-manager (>=4.0.1,<5.0.0)
144
+ Requires-Dist: wrapt (>=1.16.0,<2.0.0)
145
+ Requires-Dist: wsproto (>=1.2.0,<2.0.0)
146
+ Requires-Dist: xarray (>=2024.6.0,<2025.0.0)
10
147
  Description-Content-Type: text/markdown
11
148
 
12
149
  # Install
@@ -1,5 +1,6 @@
1
- py2ls/.git/COMMIT_EDITMSG,sha256=E6ggbgaSjmHbt33GuQe6KX5ipK0Dp29QiuxxpBkqm60,131
2
- py2ls/.git/FETCH_HEAD,sha256=Cdzu13JkjE_s_0YvmkAIac4l2WuokfA-YimhsDkYo1Q,100
1
+ py2ls/.DS_Store,sha256=1lFlJ5EFymdzGAUAaI30vcaaLHt3F1LwpG7xILf9jsM,6148
2
+ py2ls/.git/COMMIT_EDITMSG,sha256=5xj-jWMbrdOc9m7gSn-UcsAQ9FMNvWSbLWSsrOUIO5w,7
3
+ py2ls/.git/FETCH_HEAD,sha256=1FfG9FtKEzbthC4ygl5ci0pnEm7ZaF3ZY7njNqkjz2I,100
3
4
  py2ls/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
4
5
  py2ls/.git/config,sha256=XswTg1Ts7_7IBDlKHh4OF_0Tq7v4wW7BXb6xSVInSec,345
5
6
  py2ls/.git/description,sha256=ZzMxc0Ca26m45Twn1DDnOHqin5VHEZ9uOTBrScIXSjE,16
@@ -16,15 +17,16 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
16
17
  py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
17
18
  py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
18
19
  py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
19
- py2ls/.git/index,sha256=Bxygoi0rCfEqm0Lnpaba5ZsAFg_NS8TYUbR2o4uwnnI,1338
20
+ py2ls/.git/index,sha256=XcsDiczPb7unmufhwqWxcgRV2ye2gknaLxmx1rsiyY4,1346
20
21
  py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
21
- py2ls/.git/logs/HEAD,sha256=Wiu2pJYW-hD-oZ452dEJyYjYf9wfjUjvJONH7wAi8Ug,1638
22
- py2ls/.git/logs/refs/heads/main,sha256=Wiu2pJYW-hD-oZ452dEJyYjYf9wfjUjvJONH7wAi8Ug,1638
23
- py2ls/.git/logs/refs/remotes/origin/HEAD,sha256=032ZeLPOt_lRcwk8gTcVOzQd81zTAkkTk9P1PtPr6Aw,4212
24
- py2ls/.git/logs/refs/remotes/origin/main,sha256=YU2rplaPYPyRcfSuRQ65bFZ8oQwKogJrBlSXYhD9Pa4,1520
22
+ py2ls/.git/logs/HEAD,sha256=ZbfxG7S0VtQedb4Z-bCwPRW4PV1fONzgYIQ0VaZfxvE,2435
23
+ py2ls/.git/logs/refs/heads/main,sha256=ZbfxG7S0VtQedb4Z-bCwPRW4PV1fONzgYIQ0VaZfxvE,2435
24
+ py2ls/.git/logs/refs/remotes/origin/HEAD,sha256=kaJ5UZPeGXVtmL3wpB90jgENFdS20YNyknYovGBS-Gg,6109
25
+ py2ls/.git/logs/refs/remotes/origin/main,sha256=tgL6shcaeZwXHsJo_YIMa_jmUz0_7nRnJ-Z4lLH_bdk,2128
25
26
  py2ls/.git/objects/0b/409e1bc918277010f5679b402d1d1dda53e15c,sha256=y5S1XaGxJz1NXi-SPWjPC_NKIqqSbZv9oOg74MzBihY,156
26
27
  py2ls/.git/objects/14/449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed,sha256=PomZFmCUCQM1ii0wH-OJGSHLQCTqRtIwE5w3C0TtzSY,171
27
28
  py2ls/.git/objects/15/a8e468aacfcb440e090020f36d0b985d45da23,sha256=xiRunMcN5I_B2hHgBUFupR-F0b8H_CQTmmAZG9XkZik,3215
29
+ py2ls/.git/objects/1a/b4585881a6a42889f01aa0cfe25fd5acfaf46f,sha256=iQsKMPNKUs4WQwhiLgXmG5V3xKyIgxmc13ZwbBATvhQ,165
28
30
  py2ls/.git/objects/1d/fe9d9633b24ea560354f4f93d39c6e5f163ea0,sha256=mV_84wLqIitnSYmzfrNpTzwVP9AmksiRI0Fjltwl0Pg,8872
29
31
  py2ls/.git/objects/24/6b368b986f758630c46dc02b7fa512b53422f7,sha256=sw7ERFCFu7m6fnURAqQfQ4GWShaARr-Vc6GRnlOPkxU,8512
30
32
  py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46,sha256=4ic5vOwEdfbGL8oARSVEeAnSoDs14-gggGZEL-61nYE,564
@@ -37,16 +39,27 @@ py2ls/.git/objects/3c/bbe5f4173d165127b9ad96119f1ec24c306ffc,sha256=S1BXemROYtzR
37
39
  py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9,sha256=91oqbTWfUE1d_hT_1ptYmRUb5pOQ1X4oxQxpF6NXjKU,8501
38
40
  py2ls/.git/objects/43/dbd49b2ee367c5434dd545e3b5795434f2ef0b,sha256=DAzt0dWp2KsuuImCKp7N9ia7KaCDNqwB-tYIx3Wf_c0,565
39
41
  py2ls/.git/objects/48/a88fc5806305d0bb0755ee6801161b79696972,sha256=f3JStE39k_hPGE-WRwqZtDTjQkfOmBVb_6-ELBbScjI,203
42
+ py2ls/.git/objects/50/08ddfcf53c02e82d7eee2e57c38e5672ef89f6,sha256=p0M2WLqiTe6X2FI_k5Aj0IEsE85jqLa58sVdmV8x1vU,255
43
+ py2ls/.git/objects/53/e0deb1cb4c2c606bced6e7f9a66b0fda60980d,sha256=muq6m7_XRSFPzypW-m9mhpKfsomCr4s7GfkgM3gh2pc,482344
44
+ py2ls/.git/objects/56/e4e8b2d5545e0256090f45aa8fc42c5fe067d0,sha256=VsjKo1biAzCV-iIfwCDTPzyfP63K43hdZqJpDP70Iik,529
40
45
  py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2,sha256=3Pf6NS8OTK4EdHZGVeJ421BtK7w4WJncQDBauZI_wW4,34
41
46
  py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a,sha256=aJD9iF_LmYSrqDepXFBZKN1yMYbQczVkN_wnrDosBdI,5620
42
47
  py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99,sha256=IQZi5MkbRu3ToRUPsRcXuh1Xa3pkAz_HDRCVhNL89ds,5753
48
+ py2ls/.git/objects/62/7c81b23b4e56e87b042b650b0103653cc9e34a,sha256=pv9wgBxnvJUFSrk9G7vApA6lnSykQSMJ4yXT7YnlSDU,167
49
+ py2ls/.git/objects/62/d90ccf8cbefdc2e4fd475e7c6f4f76e9fdf801,sha256=1L473QanNpnumCkE8tG6wtbvLqFtNeoagL9SJmasXNY,155
43
50
  py2ls/.git/objects/64/27a4edff08f93d98f511418423f09f2ab90bcd,sha256=RyNngwk9fvdvvvywmNfllnim718fWNjVauH9U2y8Q2s,258
44
51
  py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4,sha256=NYLQQZTfd0htZst42ALS2dmryv1q_l1N19ZfHEbz_38,3193
52
+ py2ls/.git/objects/6a/52e747a2b349b128d1490d9e896d2323818eb7,sha256=Qc_B3_xxlWmjooFu274r82b583uf_HpIpDBldr9fqVI,34966
45
53
  py2ls/.git/objects/6b/7fde264d93a7a0986d394c46c7650d0ce2ab92,sha256=iIl0-RF0wd6BSEjzczgUyApxc899PbdTl04JbDn6_-Q,166
54
+ py2ls/.git/objects/6c/cebb29b7f3f5b0c889f6dadbf9ff066554587d,sha256=UylkFWAfhStNVQRQuC9CzpaWaT9uHCVs1mn7ecOma8I,609
55
+ py2ls/.git/objects/71/36b2074a2754be8b58127d82250e5b37e3c373,sha256=cbVFQaBx0Q5QkZ1wQle-iIxNx14JxGSx3G8aQ7EbbAA,586
46
56
  py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab,sha256=SK2QDjDBiDhVMG1I5p19g4RbEm2Rax7mYnxawmVZYxs,15523
47
57
  py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307,sha256=lQOKF2pb1JvipI3eT79X0-TuMGWsy1A-Yw4BCgKZNOM,33472
48
58
  py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f,sha256=sfqJBiSNj-gyJo4D7xkmRAo76mC2ztjqeZZsl4ifULA,162
59
+ py2ls/.git/objects/81/8f26b7bf042269729020cf944fc362d66ba27e,sha256=mg6FGEyv6EcOgurR8CEvHGovaWrUgMUxTtACAy7-ei4,34960
60
+ py2ls/.git/objects/84/59071b722a255b774a80b27746033f8141ab39,sha256=0pYGJOXFfp4MSu4n5MzE1XN--t0lSs7wcdqboADWMx0,9792
49
61
  py2ls/.git/objects/87/ef1fc3f7f1ddc4d0ab9b3e65381ce9f3388621,sha256=OFrpW6lu31qGBvD3ijPUBSG9JrdU1_mKzeYBzidn9VM,3748
62
+ py2ls/.git/objects/8b/84f56978e1de8f2ae82abce5f8b3e182d365cd,sha256=a8XequnUMBSv9zIQJdcdgDvMQ7PLGdIrgZ-MqQGF87c,573
50
63
  py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82,sha256=yW-jVYeCTWR-nX3JJgA1g9YLPjzNsKlDmEOH290Ywx0,1221
51
64
  py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7,sha256=Kk2MWCO1OcShYuABGzp2O9LiWGDfDkcZtd0oy4nY6RU,9529
52
65
  py2ls/.git/objects/9d/0df52899fe95279059286d9c0ec42287edc168,sha256=67nV3TLo-fwe4lt0wwvxoDnVNHc1IpapRyAY2STP3iI,564
@@ -55,38 +68,44 @@ py2ls/.git/objects/a7/3e13eafee65c5b8d73ad2d3ea46d0eee82f0d3,sha256=iv3uTzna5XBz
55
68
  py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8,sha256=8Y7z30eNceBd5QIx09QfMp5cYBbrgUllmats0kvJEJ4,132
56
69
  py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87,sha256=82dx4hIdMpdcB64e5PU1s2gZFVkTvrj1cPwwJ_kasNU,4444
57
70
  py2ls/.git/objects/b2/18e6a0f0f1c4df8cdefa9852058348abc713b7,sha256=hOQfdyzDZctjoge0-pAcEDel5XHVPNfOtrMNyFPUOIE,564
71
+ py2ls/.git/objects/b5/61831c7dce8ea51e7ee6b6fa35745f14d8242d,sha256=wUqxlKjLN1vOUj2tkYStado64QewdcF3CHlSICds1ik,34415
58
72
  py2ls/.git/objects/bb/934eb33bc1a8b85630bf680caffd99560c1b8f,sha256=ggehjexUsWlskHJvHxW7u6U0otB0OCItmIZdT9O-3OU,9670
73
+ py2ls/.git/objects/c1/20fc812b9ad311c34a3608512d6a9d976bb48e,sha256=q-WAKugB-_-g7w0Mlw6oyTBaXQ_Qd7BdLatrDiYN7Wc,156
59
74
  py2ls/.git/objects/c4/cba65f1163661999ee4b8ed23342b63bc1300c,sha256=rwSdKt-C98nUQ_B-7imY4fYRYmn29MQc4SIu9wruHeo,566
60
75
  py2ls/.git/objects/c6/7f17e5707313600efcb85e9a3fedea35dba591,sha256=TL7rDIWiaWlk8iIwqPst7St5Xr2otPs-vp17GPlET7o,565
61
76
  py2ls/.git/objects/cf/0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d,sha256=T_nV0GrgpVu3mOJ4fYcCW98oCunzgqy0DnSX0luy04Q,183
77
+ py2ls/.git/objects/d6/9ab1c4aadf279936dd778e8346ba60f74705b6,sha256=WcfdSMKqfiWT5TOWVUcDj0XDaD2hYxDnyIRNlYGutL8,34976
62
78
  py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0,sha256=IJIoz93V7pf9yx43U1JdN8gBq_LWtw8A9Z2YMPnq_B0,1450
63
79
  py2ls/.git/objects/d9/c2403fd166ce791b4e9d0c6792ed8342c71fcd,sha256=uD7BsKdrmN-9FStTpwsRWh-XxVXeDsV4dGjFkaMIIs8,170
80
+ py2ls/.git/objects/d9/dfa5aee51e92a541b707e8e7baea6f06deff98,sha256=jMdhZ1i_L5q_UgjOtjLN15PCSCz3pE51FhD3z74ZUr8,163
81
+ py2ls/.git/objects/db/141dbaa93594df2a8156182f361ee4db829359,sha256=TpKTLvbDc4Blzrp1Pq9JijqDROJyBJ7sCQQBmIuYKZo,845984
64
82
  py2ls/.git/objects/db/ffa8ea7bda721d0cee7b9e4ce5b2ef927733ff,sha256=GhDkvP6JYV26qVg5ETPys1ZEnGlsct9hiXCc24Ky4Xg,565
65
83
  py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b,sha256=vCdlxwEidekh8i-5TVMVgSLGk9DPZCZAbWqvGYSKQ9c,76
84
+ py2ls/.git/objects/e3/1356f90ea6dd0577b5e0b40b206319adcbf085,sha256=I9_QNwmmtoqSwq29Ixdfv_PgF2x14u2M6sX1eQumwoY,161
66
85
  py2ls/.git/objects/e3/5a4dafc50850cacac7bf76c56db2715cbda2c4,sha256=GAcBj3YSEbm6tm7fGD6al16uBo8LtEtjZ2Hi-UgIsUg,3290
67
86
  py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f,sha256=RWTy2n8L2XxZQknBFyPczA0Aa_4gSG_Ybcr8e8v4ccc,10264
68
87
  py2ls/.git/objects/f4/b64d3107b39e3ad6f540c6607004ea34e6c024,sha256=0egAtqc0x8hc7U1z91tIjcRhSd_BT2a_gxZxo_7NTJA,564
69
88
  py2ls/.git/objects/f7/c98ba5c2f903e603b1f5e63d49fbc8a43815cc,sha256=tYbi3A7irrIPB_11bwItuof0Vc9a0MDuLFMNAzRsG3A,33467
89
+ py2ls/.git/objects/fa/147e6bb78a2e8db241d231295fd7f1ed061af8,sha256=G9pg5LXv7AdxnPIQsTm2AF3Un314dLRJQYwxmZem9rQ,574
70
90
  py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7,sha256=hGIYoxKWNT3IPwk3DE4l3FLBbUYF-kXcHcx7KrH9uS0,1971
71
- py2ls/.git/refs/heads/main,sha256=XWF5fg25wz0SqFKCM1pFdyA5I_3pkrwsU8Ql6LH7X0E,41
91
+ py2ls/.git/refs/heads/main,sha256=CKZwTZ8cZZy9HnCOINHmltX6O90E8kPZFdJQ9peSpMk,41
72
92
  py2ls/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
73
- py2ls/.git/refs/remotes/origin/main,sha256=XWF5fg25wz0SqFKCM1pFdyA5I_3pkrwsU8Ql6LH7X0E,41
93
+ py2ls/.git/refs/remotes/origin/main,sha256=CKZwTZ8cZZy9HnCOINHmltX6O90E8kPZFdJQ9peSpMk,41
74
94
  py2ls/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
75
95
  py2ls/.gitignore,sha256=y7GvbD_zZkjPVVIue8AyiuFkDMuUbvMaV65Lgu89To8,2763
76
96
  py2ls/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
77
97
  py2ls/README.md,sha256=CwvJWAnSXnCnrVHlnEbrxxi6MbjbE_MT6DH2D53S818,11572
78
- py2ls/__init__.py,sha256=k05sUEKwWnAaOAn4VKCiPopcWrbmLLsZOuAt7vmUKWc,260
98
+ py2ls/__init__.py,sha256=ESXjQ9tnqg5mqYH4Gfgs76AoT1HHF_BkJUgnstiVwR8,243
79
99
  py2ls/brain_atlas.py,sha256=w1o5EelRjq89zuFJUNSz4Da8HnTCwAwDAZ4NU4a-bAY,5486
80
100
  py2ls/correlators.py,sha256=RbOaJIPLCHJtUm5SFi_4dCJ7VFUPWR0PErfK3K26ad4,18243
81
101
  py2ls/dbhandler.py,sha256=i9dNrpHyx0oIaFieHI4X4tsrCdN-aFxudPTDOgy9Ppo,3574
82
102
  py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
83
- py2ls/internet_finder.py,sha256=I-jPepbBhpDaOAsD2MqbKMe1CBN8w1PYo0CjNFkaeeU,19149
84
- py2ls/ips.py,sha256=MH_TRP_lz9AuWMLjPNgTTxNFHcvLLN2XKGQcG9txjP0,121806
85
- py2ls/netfinder.py,sha256=EbNH-QMFNYsu_eJ68zqEEJRJ5uAYOCrtkqfoC9ZJpo4,31207
103
+ py2ls/ips.py,sha256=wcA7UITz2Nx5bmDkQvGyZ9mNCvt9ZE9JTRpgCvExNPs,124868
104
+ py2ls/netfinder.py,sha256=dt6hkYeH-ivCHInoUi92MhJMLlXtjRXT3ewKzOwGtWk,31506
105
+ py2ls/setuptools-70.1.0-py3-none-any.whl,sha256=2bi3cUVal8ip86s0SOvgspteEF8SKLukECi-EWmFomc,882588
86
106
  py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
87
107
  py2ls/translator.py,sha256=QfDUO0-pXHGMBFZBefiBHzOrC93-__N5sUQY_VP4wes,29734
88
- py2ls/version.py,sha256=CactNZqrHHYTPrkHKccy2WKXmaiUdtTgPqSjFyVXnJk,18
89
108
  py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
90
- py2ls-0.1.4.4.dist-info/METADATA,sha256=sar2UcFB1uR1NAUfvx0pxCP-VO_KyT8n_fS6Yu4K0r4,11877
91
- py2ls-0.1.4.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
92
- py2ls-0.1.4.4.dist-info/RECORD,,
109
+ py2ls-0.1.4.6.dist-info/METADATA,sha256=M9tLANmcFhRhKeppFawPAZ4tOTn7lrFw99JBp0Mso2A,17943
110
+ py2ls-0.1.4.6.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
111
+ py2ls-0.1.4.6.dist-info/RECORD,,
py2ls/internet_finder.py DELETED
@@ -1,405 +0,0 @@
1
- from bs4 import BeautifulSoup
2
- import requests
3
- import os
4
- from urllib.parse import urlparse, urljoin
5
- import base64
6
- import pandas as pd
7
- from collections import Counter
8
- import random
9
- import logging
10
- from time import sleep
11
- import stem.process
12
- from stem import Signal
13
- from stem.control import Controller
14
- import json
15
- # Set up logging
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
18
-
19
- # Define supported content types and corresponding parsers
20
- CONTENT_PARSERS = {
21
- "text/html": lambda text, parser: BeautifulSoup(text, parser),
22
- "application/json": lambda text, parser: json.loads(text),
23
- "text/xml": lambda text, parser: BeautifulSoup(text, parser),
24
- "text/plain": lambda text, parser: text.text,
25
- }
26
-
27
- def fetch_all(url, parser="lxml"): # lxml is faster, # parser="html.parser"
28
- try:
29
- # Generate a random user-agent string
30
- headers = {"User-Agent": user_agent()}
31
-
32
- # Send the initial request
33
- response = requests.get(url, headers=headers)
34
-
35
- # If the response is a redirect, follow it
36
- while response.is_redirect:
37
- logger.info(f"Redirecting to: {response.headers['Location']}")
38
- response = requests.get(response.headers["Location"], headers=headers)
39
- # Check for a 403 error
40
- if response.status_code == 403:
41
- logger.warning("403 Forbidden error. Retrying...")
42
- # Retry the request after a short delay
43
- sleep(random.uniform(1, 3))
44
- response = requests.get(url, headers=headers)
45
- # Raise an error if retry also fails
46
- response.raise_for_status()
47
-
48
- # Raise an error for other HTTP status codes
49
- response.raise_for_status()
50
-
51
- # Get the content type
52
- content_type = response.headers.get("content-type", "").split(";")[0].lower()
53
- content = response.content.decode(response.encoding)
54
- # logger.info(f"Content type: {content_type}")
55
-
56
- # Check if content type is supported
57
- if content_type in CONTENT_PARSERS:
58
- return content_type, CONTENT_PARSERS[content_type](content, parser)
59
- else:
60
- logger.warning("Unsupported content type")
61
- return None, None
62
- except requests.RequestException as e:
63
- logger.error(f"Error fetching URL '{url}': {e}")
64
- return None, None
65
- def user_agent():
66
- # Example of generating a random user-agent string
67
- user_agents = [
68
- # Windows (Intel)
69
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4891.0 Safari/537.36",
70
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4893.0 Safari/537.36",
71
- "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4895.0 Safari/537.36",
72
- # Windows (ARM)
73
- "Mozilla/5.0 (Windows NT 10.0; Win64; arm64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4891.0 Safari/537.36",
74
- "Mozilla/5.0 (Windows NT 10.0; Win64; arm64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4893.0 Safari/537.36",
75
- "Mozilla/5.0 (Windows NT 10.0; Win64; arm64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4895.0 Safari/537.36",
76
- # Linux (x86_64)
77
- "Mozilla/5.0 (X11; Linux x86_64; rv:98.0) Gecko/20100101 Firefox/98.0",
78
- "Mozilla/5.0 (X11; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0",
79
- "Mozilla/5.0 (X11; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0",
80
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
81
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4891.0 Safari/537.36",
82
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4893.0 Safari/537.36",
83
- "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4895.0 Safari/537.36",
84
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:98.0) Gecko/20100101 Firefox/98.0",
85
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:99.0) Gecko/20100101 Firefox/99.0",
86
- "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:100.0) Gecko/20100101 Firefox/100.0",
87
- # macOS (Intel)
88
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_0_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15",
89
- "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_0_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15",
90
- # macOS (ARM)
91
- "Mozilla/5.0 (Macintosh; ARM Mac OS X 12_0_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.3 Safari/605.1.15",
92
- "Mozilla/5.0 (Macintosh; ARM Mac OS X 12_0_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15",
93
- # iOS Devices
94
- "Mozilla/5.0 (iPad; CPU OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
95
- "Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
96
- # Android Devices
97
- "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4891.0 Mobile Safari/537.36",
98
- "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4893.0 Mobile Safari/537.36",
99
- "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4895.0 Mobile Safari/537.36",
100
- # Smart TVs
101
- "Mozilla/5.0 (SMART-TV; LINUX; Tizen 6.0) AppleWebKit/537.36 (KHTML, like Gecko) SmartTV/1.0",
102
- "Mozilla/5.0 (SMART-TV; LINUX; Tizen 6.0) AppleWebKit/537.36 (KHTML, like Gecko) WebAppManager/1.0",
103
- # Game Consoles
104
- "Mozilla/5.0 (PlayStation 5 3.01) AppleWebKit/605.1.15 (KHTML, like Gecko)",
105
- "Mozilla/5.0 (Xbox One 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36 Edge/44.18363.8740",
106
- ]
107
- agents = random.choice(user_agents)
108
- return agents
109
-
110
- # # Function to change Tor IP address
111
- # def renew_tor_ip():
112
- # with Controller.from_port(port=9051) as controller:
113
- # controller.authenticate()
114
- # controller.signal(Signal.NEWNYM)
115
-
116
- # # Function to make requests through Tor
117
- # def make_tor_request(url, max_retries=3):
118
- # renew_tor_ip()
119
- # headers = {"User-Agent": user_agent()}
120
- # session = requests.Session()
121
- # session.proxies = {"http": "socks5h://localhost:9050", "https": "socks5h://localhost:9050"}
122
-
123
- # for i in range(max_retries):
124
- # try:
125
- # response = session.get(url, headers=headers, timeout=10)
126
- # if response.status_code == 200:
127
- # return response.text
128
- # except requests.exceptions.RequestException as e:
129
- # print(f"Error: {e}")
130
- # time.sleep(2) # Add a delay between retries
131
-
132
- # return None
133
-
134
-
135
- def find_links(url):
136
- links_href = [] # Initialize list to store extracted links
137
- content_type, content = fetch_all(url)
138
- base_url = urlparse(url)
139
- links = content.find_all("a", href=True)
140
- for link in links:
141
- link_href = link["href"]
142
- if not link_href.startswith(('http://', 'https://')):
143
- # Convert relative links to absolute links
144
- link_href = urljoin(base_url.geturl(), link_href)
145
- links_href.append(link_href)
146
- return links_href
147
-
148
- def find_domain(links):
149
- domains = [urlparse(link).netloc for link in links]
150
- domain_counts = Counter(domains)
151
- most_common_domain = domain_counts.most_common(1)[0][0]
152
- # print(f"Most_frequent_domain:{most_common_domain}")
153
- return most_common_domain
154
-
155
- # To determine which links are related to target domains(e.g., pages) you are interested in
156
- def filter_links(links, domain=None, kind='html'):
157
- filtered_links = []
158
- if isinstance(kind, (str, list)):
159
- kind = tuple(kind)
160
- if domain is None:
161
- domain = find_domain(links)
162
- for link in links:
163
- parsed_link = urlparse(link)
164
- if parsed_link.netloc == domain and parsed_link.path.endswith(kind) and 'javascript:' not in parsed_link:
165
- filtered_links.append(link)
166
- return filtered_links
167
-
168
- def find_img(url, dir_save="images"):
169
- """
170
- Save images referenced in HTML content locally.
171
- Args:
172
- content (str or BeautifulSoup): HTML content or BeautifulSoup object.
173
- url (str): URL of the webpage.
174
- content_type (str): Type of content. Default is "html".
175
- dir_save (str): Directory to save images. Default is "images".
176
- Returns:
177
- str: HTML content with updated image URLs pointing to local files.
178
- """
179
- content_type, content = fetch_all(url)
180
- if "html" in content_type.lower():
181
- # Create the directory if it doesn't exist
182
- os.makedirs(dir_save, exist_ok=True)
183
-
184
- # Parse HTML content if it's not already a BeautifulSoup object
185
- if isinstance(content, str):
186
- content = BeautifulSoup(content, "html.parser")
187
- image_links=[]
188
- # Extracting images
189
- images = content.find_all("img", src=True)
190
- for i, image in enumerate(images):
191
- try:
192
- # Get the image URL
193
- image_url = image["src"]
194
-
195
- if image_url.startswith("data:image"):
196
- # Extract the image data from the data URI
197
- mime_type, base64_data = image_url.split(",", 1)
198
- # Determine the file extension from the MIME type
199
- if ":" in mime_type:
200
- # image_extension = mime_type.split(":")[1].split(";")[0]
201
- image_extension = mime_type.split(":")[1].split(";")[0].split("/")[-1]
202
- else:
203
- image_extension = "png" # Default to PNG if extension is not specified
204
- # if 'svg+xml' in image_extension:
205
- # image_extension='svg'
206
- image_data = base64.b64decode(base64_data)
207
- # Save the image data to a file
208
- image_filename = os.path.join(
209
- dir_save, f"image_{i}.{image_extension}"
210
- )
211
- with open(image_filename, "wb") as image_file:
212
- image_file.write(image_data)
213
-
214
- # Update the src attribute of the image tag to point to the local file
215
- image["src"] = image_filename
216
- else:
217
- # Construct the absolute image URL
218
- absolute_image_url = urljoin(url, image_url)
219
-
220
- # Parse the image URL to extract the file extension
221
- parsed_url = urlparse(absolute_image_url)
222
- image_extension = os.path.splitext(parsed_url.path)[1]
223
-
224
- # Download the image
225
- image_response = requests.get(absolute_image_url)
226
-
227
- # Save the image to a file
228
- image_filename = os.path.join(
229
- dir_save, f"image_{i}{image_extension}"
230
- )
231
- with open(image_filename, "wb") as image_file:
232
- image_file.write(image_response.content)
233
-
234
- # Update the src attribute of the image tag to point to the local file
235
- image["src"] = image_filename
236
- except (requests.RequestException, KeyError) as e:
237
- print(f"Failed to process image {image_url}: {e}")
238
- print(f"images were saved at\n{dir_save}")
239
- # Return the HTML content with updated image URLs
240
- return content
241
-
242
- def content_div_class(content, div="div", div_class="highlight"):
243
- texts = [div.text for div in content.find_all(div, class_=div_class)]
244
- return texts
245
- def find(url, where="div", what="highlight"):
246
- _,content = fetch_all(url, parser="html.parser")
247
- texts = [div.text for div in content.find_all(where, class_=what)]
248
- return texts
249
- # usage example:
250
- #### img2local(url, "/Users/macjianfeng/Desktop/@tmp/dd/")
251
- def find_forms(url):
252
- content_type, content = fetch_all(url)
253
- df=pd.DataFrame()
254
- # Extracting forms and inputs
255
- forms = content.find_all("form")
256
- form_data = []
257
- for form in forms:
258
- form_inputs = form.find_all("input")
259
- input_data = {}
260
- for input_tag in form_inputs:
261
- input_type = input_tag.get("type")
262
- input_name = input_tag.get("name")
263
- input_value = input_tag.get("value")
264
- input_data[input_name] = {"type": input_type, "value": input_value}
265
- form_data.append(input_data)
266
- return form_data
267
- # to clean strings
268
- def clean_string(value):
269
- if isinstance(value, str):
270
- return value.replace('\n', '').replace('\r', '').replace('\t', '')
271
- else:
272
- return value
273
- def find_all(url, dir_save=None):
274
- content_type, content = fetch_all(url)
275
-
276
- # Extracting paragraphs
277
- paragraphs_text = [paragraph.text for paragraph in content.find_all("p")]
278
-
279
- # Extracting specific elements by class
280
- specific_elements_text = [element.text for element in content.find_all(class_="specific-class")]
281
-
282
- # Extracting links (anchor tags)
283
- links_href = find_links(url)
284
- links_href = filter_links(links_href)
285
-
286
- # Extracting images
287
- images_src = [image['src'] for image in content.find_all("img", src=True)]
288
-
289
- # Extracting headings (h1, h2, h3, etc.)
290
- headings = [f'h{i}' for i in range(1, 7)]
291
- headings_text = {heading: [tag.text for tag in content.find_all(heading)] for heading in headings}
292
-
293
- # Extracting lists (ul, ol, li)
294
- list_items_text = [item.text for list_ in content.find_all(["ul", "ol"]) for item in list_.find_all("li")]
295
-
296
- # Extracting tables (table, tr, td)
297
- table_cells_text = [cell.text for table in content.find_all("table") for row in table.find_all("tr") for cell in row.find_all("td")]
298
-
299
- # Extracting other elements
300
- divs_content = [div.text.strip() for div in content.find_all("div")]
301
- headers_footer_content = [tag.text for tag in content.find_all(["header", "footer"])]
302
- meta_tags_content = [(tag.name, tag.attrs) for tag in content.find_all("meta")]
303
- spans_content = [span.text for span in content.find_all("span")]
304
- bold_text_content = [text.text for text in content.find_all("b")]
305
- italic_text_content = [text.text for text in content.find_all("i")]
306
- code_snippets_content = [code.text for code in content.find_all("code")]
307
- blockquotes_content = [blockquote.text for blockquote in content.find_all("blockquote")]
308
- preformatted_text_content = [pre.text for pre in content.find_all("pre")]
309
- buttons_content = [button.text for button in content.find_all("button")]
310
- navs_content = [nav.text for nav in content.find_all("nav")]
311
- sections_content = [section.text for section in content.find_all("section")]
312
- articles_content = [article.text for article in content.find_all("article")]
313
- figures_content = [figure.text for figure in content.find_all("figure")]
314
- captions_content = [caption.text for caption in content.find_all("figcaption")]
315
- abbreviations_content = [abbr.text for abbr in content.find_all("abbr")]
316
- definitions_content = [definition.text for definition in content.find_all("dfn")]
317
- addresses_content = [address.text for address in content.find_all("address")]
318
- time_elements_content = [time.text for time in content.find_all("time")]
319
- progress_content = [progress.text for progress in content.find_all("progress")]
320
- meter_content = [meter.text for meter in content.find_all("meter")]
321
- forms = find_forms(url)
322
-
323
- lists_to_fill = [
324
- paragraphs_text, specific_elements_text, links_href, images_src,
325
- headings_text["h1"], headings_text["h2"], headings_text["h3"], headings_text["h4"],
326
- headings_text["h5"], headings_text["h6"], list_items_text, table_cells_text,
327
- divs_content, headers_footer_content, meta_tags_content, spans_content,
328
- bold_text_content, italic_text_content, code_snippets_content,
329
- blockquotes_content, preformatted_text_content, buttons_content,
330
- navs_content, sections_content, articles_content, figures_content,
331
- captions_content, abbreviations_content, definitions_content,
332
- addresses_content, time_elements_content, progress_content,
333
- meter_content,forms
334
- ]
335
- # add new features
336
- script_texts=content_div_class(content, div="div", div_class="highlight")
337
- lists_to_fill.append(script_texts)
338
-
339
- audio_src = [audio['src'] for audio in content.find_all("audio", src=True)]
340
- video_src = [video['src'] for video in content.find_all("video", src=True)]
341
- iframe_src = [iframe['src'] for iframe in content.find_all("iframe", src=True)]
342
- lists_to_fill.extend([audio_src, video_src, iframe_src])
343
-
344
- rss_links = [link['href'] for link in content.find_all('link', type=['application/rss+xml', 'application/atom+xml'])]
345
- lists_to_fill.append(rss_links)
346
-
347
- # Find the maximum length among all lists
348
- max_length = max(len(lst) for lst in lists_to_fill)
349
-
350
- # Fill missing data with empty strings for each list
351
- for lst in lists_to_fill:
352
- lst += [""] * (max_length - len(lst))
353
-
354
- # Create DataFrame
355
- df = pd.DataFrame({
356
- "headings1": headings_text["h1"],
357
- "headings2": headings_text["h2"],
358
- "headings3": headings_text["h3"],
359
- "headings4": headings_text["h4"],
360
- "headings5": headings_text["h5"],
361
- "headings6": headings_text["h6"],
362
- "paragraphs": paragraphs_text,
363
- "list_items": list_items_text,
364
- "table_cells": table_cells_text,
365
- "headers_footer": headers_footer_content,
366
- "meta_tags": meta_tags_content,
367
- "spans": spans_content,
368
- "bold_text": bold_text_content,
369
- "italic_text": italic_text_content,
370
- "code_snippets": code_snippets_content,
371
- "blockquotes": blockquotes_content,
372
- "preformatted_text": preformatted_text_content,
373
- "buttons": buttons_content,
374
- "navs": navs_content,
375
- "sections": sections_content,
376
- "articles": articles_content,
377
- "figures": figures_content,
378
- "captions": captions_content,
379
- "abbreviations": abbreviations_content,
380
- "definitions": definitions_content,
381
- "addresses": addresses_content,
382
- "time_elements": time_elements_content,
383
- "progress": progress_content,
384
- "specific_elements": specific_elements_text,
385
- "meter": meter_content,
386
- "forms":forms,
387
- "scripts":script_texts,
388
- "audio":audio_src,
389
- "video":video_src,
390
- "iframe":iframe_src,
391
- "rss": rss_links,
392
- "images": images_src,
393
- "links": links_href,
394
- "divs": divs_content,
395
- })
396
- # to remove the '\n\t\r'
397
- df=df.apply(lambda x: x.map(clean_string) if x.dtype == "object" else x) # df=df.applymap(clean_string)
398
- if dir_save:
399
- if not dir_save.endswith(".csv"):
400
- dir_save=dir_save+"_df.csv"
401
- df.to_csv(dir_save)
402
- else:
403
- df.to_csv(dir_save)
404
- print(f"file has been saved at\n{dir_save}")
405
- return df
py2ls/version.py DELETED
@@ -1 +0,0 @@
1
- version = "0.0.1"