py2ls 0.1.4.2__py3-none-any.whl → 0.1.4.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.DS_Store +0 -0
- py2ls/.git/COMMIT_EDITMSG +1 -3
- py2ls/.git/FETCH_HEAD +1 -1
- py2ls/.git/index +0 -0
- py2ls/.git/logs/HEAD +6 -0
- py2ls/.git/logs/refs/heads/main +6 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +26 -0
- py2ls/.git/logs/refs/remotes/origin/main +5 -0
- py2ls/.git/objects/1a/b4585881a6a42889f01aa0cfe25fd5acfaf46f +0 -0
- py2ls/.git/objects/36/e56a361f526eafa59c5235a5c990bf288b5f9c +0 -0
- py2ls/.git/objects/43/dbd49b2ee367c5434dd545e3b5795434f2ef0b +0 -0
- py2ls/.git/objects/50/08ddfcf53c02e82d7eee2e57c38e5672ef89f6 +0 -0
- py2ls/.git/objects/53/e0deb1cb4c2c606bced6e7f9a66b0fda60980d +0 -0
- py2ls/.git/objects/56/e4e8b2d5545e0256090f45aa8fc42c5fe067d0 +0 -0
- py2ls/.git/objects/62/7c81b23b4e56e87b042b650b0103653cc9e34a +0 -0
- py2ls/.git/objects/62/d90ccf8cbefdc2e4fd475e7c6f4f76e9fdf801 +3 -0
- py2ls/.git/objects/64/27a4edff08f93d98f511418423f09f2ab90bcd +0 -0
- py2ls/.git/objects/6c/cebb29b7f3f5b0c889f6dadbf9ff066554587d +0 -0
- py2ls/.git/objects/81/8f26b7bf042269729020cf944fc362d66ba27e +0 -0
- py2ls/.git/objects/84/59071b722a255b774a80b27746033f8141ab39 +0 -0
- py2ls/.git/objects/8b/84f56978e1de8f2ae82abce5f8b3e182d365cd +0 -0
- py2ls/.git/objects/b5/61831c7dce8ea51e7ee6b6fa35745f14d8242d +0 -0
- py2ls/.git/objects/bb/934eb33bc1a8b85630bf680caffd99560c1b8f +0 -0
- py2ls/.git/objects/c6/7f17e5707313600efcb85e9a3fedea35dba591 +0 -0
- py2ls/.git/objects/cf/0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d +1 -0
- py2ls/.git/objects/d6/9ab1c4aadf279936dd778e8346ba60f74705b6 +0 -0
- py2ls/.git/objects/d9/dfa5aee51e92a541b707e8e7baea6f06deff98 +0 -0
- py2ls/.git/objects/e3/1356f90ea6dd0577b5e0b40b206319adcbf085 +0 -0
- py2ls/.git/objects/fa/147e6bb78a2e8db241d231295fd7f1ed061af8 +0 -0
- py2ls/.git/refs/heads/main +1 -1
- py2ls/.git/refs/remotes/origin/main +1 -1
- py2ls/__init__.py +1 -2
- py2ls/ips.py +159 -24
- py2ls/netfinder.py +166 -128
- py2ls/setuptools-70.1.0-py3-none-any.whl +0 -0
- {py2ls-0.1.4.2.dist-info → py2ls-0.1.4.5.dist-info}/METADATA +139 -2
- {py2ls-0.1.4.2.dist-info → py2ls-0.1.4.5.dist-info}/RECORD +38 -17
- py2ls/internet_finder.py +0 -405
- py2ls/version.py +0 -1
- {py2ls-0.1.4.2.dist-info → py2ls-0.1.4.5.dist-info}/WHEEL +0 -0
py2ls/netfinder.py
CHANGED
@@ -20,7 +20,12 @@ from selenium.webdriver.chrome.options import Options
|
|
20
20
|
from selenium.webdriver.support.ui import WebDriverWait
|
21
21
|
from selenium.webdriver.support import expected_conditions as EC
|
22
22
|
from webdriver_manager.chrome import ChromeDriverManager
|
23
|
-
import
|
23
|
+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
24
|
+
from pprint import pp
|
25
|
+
import mimetypes
|
26
|
+
import io
|
27
|
+
import matplotlib.pyplot as plt
|
28
|
+
from PIL import Image
|
24
29
|
|
25
30
|
# Set up logging
|
26
31
|
logging.basicConfig(level=logging.INFO)
|
@@ -42,61 +47,22 @@ def user_agent(browsers=["chrome", "edge", "firefox", "safari"], platforms=["pc"
|
|
42
47
|
output_ua = ua.random
|
43
48
|
if verbose:
|
44
49
|
print(output_ua)
|
45
|
-
return output_ua
|
46
|
-
# def extract_text_from_content(content,where,what,extend=False):
|
47
|
-
# if extend:
|
48
|
-
# texts = ""
|
49
|
-
|
50
|
-
# def extract_text(element):
|
51
|
-
# nonlocal texts
|
52
|
-
# if isinstance(element, str) and element.strip():
|
53
|
-
# texts += element.strip()
|
54
|
-
# elif hasattr(element, "children"):
|
55
|
-
# for child in element.children:
|
56
|
-
# extract_text(child)
|
57
|
-
|
58
|
-
# result_set = (
|
59
|
-
# content.find_all(where, class_=what)
|
60
|
-
# if what
|
61
|
-
# else content.find_all(where)
|
62
|
-
# )
|
63
|
-
# for tag in result_set:
|
64
|
-
# extract_text(tag)
|
65
|
-
|
66
|
-
# text = [tx for tx in texts.split("\n") if tx]
|
67
|
-
# return text
|
68
|
-
# else:
|
69
|
-
# result_set = (
|
70
|
-
# content.find_all(where, class_=what)
|
71
|
-
# if what
|
72
|
-
# else content.find_all(where)
|
73
|
-
# )
|
74
|
-
# texts_ = " ".join(tag.get_text() + "\n" for tag in result_set)
|
75
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx]
|
76
|
-
# return texts
|
77
|
-
# def extract_text_from_content(content, where, what=None, extend=True):
|
78
|
-
# if extend:
|
79
|
-
# def extract_text(element):
|
80
|
-
# texts = ""
|
81
|
-
# if isinstance(element, str) and element.strip():
|
82
|
-
# texts += element.strip()
|
83
|
-
# elif hasattr(element, "children"):
|
84
|
-
# for child in element.children:
|
85
|
-
# texts += extract_text(child)
|
86
|
-
# return texts
|
87
|
-
|
88
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
89
|
-
# texts = ""
|
90
|
-
# for tag in result_set:
|
91
|
-
# texts += extract_text(tag) + "\n"
|
92
|
-
# text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
93
|
-
# return text_list
|
94
|
-
# else:
|
95
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
96
|
-
# texts_ = " ".join(tag.get_text() for tag in result_set)
|
97
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
98
|
-
# return texts
|
50
|
+
return output_ua
|
99
51
|
def extract_text_from_content(content, content_type="text/html", where=None, what=None, extend=True, **kwargs):
|
52
|
+
"""
|
53
|
+
Extracts text from the given content based on the specified content type and search criteria.
|
54
|
+
|
55
|
+
Parameters:
|
56
|
+
- content (str/BeautifulSoup): The content to extract text from.
|
57
|
+
- content_type (str): The type of content, e.g., "text/html" or "application/json".
|
58
|
+
- where (str/list): The HTML tag or list of tags to search for.
|
59
|
+
- what (str): The class name to filter the tags (optional).
|
60
|
+
- extend (bool): Whether to recursively extract text from child elements.
|
61
|
+
- **kwargs: Additional keyword arguments for the search (e.g., id, attributes).
|
62
|
+
|
63
|
+
Returns:
|
64
|
+
- list: A list of extracted text segments.
|
65
|
+
"""
|
100
66
|
if content is None:
|
101
67
|
logger.error("Content is None, cannot extract text.")
|
102
68
|
return []
|
@@ -109,35 +75,41 @@ def extract_text_from_content(content, content_type="text/html", where=None, wha
|
|
109
75
|
where = None
|
110
76
|
return extract_text_from_json(content, where)
|
111
77
|
elif 'text' in content_type:
|
112
|
-
if
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
elif hasattr(element, "children"):
|
118
|
-
for child in element.children:
|
119
|
-
texts += extract_text(child)
|
120
|
-
return texts
|
121
|
-
|
122
|
-
search_kwargs = {**kwargs}
|
123
|
-
if what:
|
124
|
-
search_kwargs["class_"] = what
|
125
|
-
|
126
|
-
result_set = content.find_all(where, **search_kwargs)
|
127
|
-
texts = ""
|
128
|
-
for tag in result_set:
|
129
|
-
texts += extract_text(tag) + "\n"
|
130
|
-
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
131
|
-
return text_list
|
78
|
+
if isinstance(where, list):
|
79
|
+
res=[]
|
80
|
+
for where_ in where:
|
81
|
+
res.extend(extract_text_from_content(content, content_type="text/html", where=where_, what=what, extend=extend, **kwargs))
|
82
|
+
return res
|
132
83
|
else:
|
133
|
-
|
134
|
-
|
135
|
-
|
84
|
+
if extend:
|
85
|
+
def extract_text(element):
|
86
|
+
texts = ""
|
87
|
+
if isinstance(element, str) and element.strip():
|
88
|
+
texts += element.strip()
|
89
|
+
elif hasattr(element, "children"):
|
90
|
+
for child in element.children:
|
91
|
+
texts += extract_text(child)
|
92
|
+
return texts
|
93
|
+
|
94
|
+
search_kwargs = {**kwargs}
|
95
|
+
if what:
|
96
|
+
search_kwargs["class_"] = what
|
97
|
+
|
98
|
+
result_set = content.find_all(where, **search_kwargs)
|
99
|
+
texts = ""
|
100
|
+
for tag in result_set:
|
101
|
+
texts += extract_text(tag) + "\n"
|
102
|
+
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
103
|
+
return text_list
|
104
|
+
else:
|
105
|
+
search_kwargs = {**kwargs}
|
106
|
+
if what:
|
107
|
+
search_kwargs["class_"] = what
|
136
108
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
109
|
+
result_set = content.find_all(where, **search_kwargs)
|
110
|
+
texts_ = " ".join(tag.get_text() for tag in result_set)
|
111
|
+
texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
112
|
+
return texts
|
141
113
|
|
142
114
|
def extract_text_from_json(content, key=None):
|
143
115
|
if key:
|
@@ -344,7 +316,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
|
|
344
316
|
pdf_links = filter_links(links=links_all, contains=["pdf"])
|
345
317
|
|
346
318
|
if pdf_links:
|
347
|
-
|
319
|
+
pp(f"pdf detected{pdf_links}")
|
348
320
|
else:
|
349
321
|
print('no pdf file')
|
350
322
|
if dir_save:
|
@@ -366,7 +338,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
|
|
366
338
|
print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
|
367
339
|
|
368
340
|
|
369
|
-
def find_img(url, dir_save="images"):
|
341
|
+
def find_img(url, dir_save="images", verbose=True):
|
370
342
|
"""
|
371
343
|
Save images referenced in HTML content locally.
|
372
344
|
Args:
|
@@ -381,7 +353,6 @@ def find_img(url, dir_save="images"):
|
|
381
353
|
if "html" in content_type.lower():
|
382
354
|
# Create the directory if it doesn't exist
|
383
355
|
os.makedirs(dir_save, exist_ok=True)
|
384
|
-
|
385
356
|
# Parse HTML content if it's not already a BeautifulSoup object
|
386
357
|
if isinstance(content, str):
|
387
358
|
content = BeautifulSoup(content, "html.parser")
|
@@ -390,13 +361,9 @@ def find_img(url, dir_save="images"):
|
|
390
361
|
images = content.find_all("img", src=True)
|
391
362
|
for i, image in enumerate(images):
|
392
363
|
try:
|
393
|
-
# Get the image URL
|
394
364
|
image_url = image["src"]
|
395
|
-
|
396
365
|
if image_url.startswith("data:image"):
|
397
|
-
# Extract the image data from the data URI
|
398
366
|
mime_type, base64_data = image_url.split(",", 1)
|
399
|
-
# Determine the file extension from the MIME type
|
400
367
|
if ":" in mime_type:
|
401
368
|
# image_extension = mime_type.split(":")[1].split(";")[0]
|
402
369
|
image_extension = (
|
@@ -406,44 +373,73 @@ def find_img(url, dir_save="images"):
|
|
406
373
|
image_extension = (
|
407
374
|
"png" # Default to PNG if extension is not specified
|
408
375
|
)
|
409
|
-
# if 'svg+xml' in image_extension:
|
410
|
-
# image_extension='svg'
|
411
376
|
image_data = base64.b64decode(base64_data)
|
412
|
-
# Save the image data to a file
|
413
377
|
image_filename = os.path.join(
|
414
378
|
dir_save, f"image_{i}.{image_extension}"
|
415
379
|
)
|
416
380
|
with open(image_filename, "wb") as image_file:
|
417
381
|
image_file.write(image_data)
|
418
|
-
|
419
|
-
# Update the src attribute of the image tag to point to the local file
|
420
382
|
image["src"] = image_filename
|
383
|
+
if verbose:
|
384
|
+
plt.imshow(image_data)
|
421
385
|
else:
|
422
386
|
# Construct the absolute image URL
|
423
387
|
absolute_image_url = urljoin(url, image_url)
|
424
|
-
|
425
388
|
# Parse the image URL to extract the file extension
|
426
389
|
parsed_url = urlparse(absolute_image_url)
|
427
390
|
image_extension = os.path.splitext(parsed_url.path)[1]
|
428
|
-
|
429
391
|
# Download the image
|
430
392
|
image_response = requests.get(absolute_image_url,proxies=proxies_glob)
|
431
|
-
|
432
393
|
# Save the image to a file
|
433
394
|
image_filename = os.path.join(
|
434
395
|
dir_save, f"image_{i}{image_extension}"
|
435
396
|
)
|
436
397
|
with open(image_filename, "wb") as image_file:
|
437
398
|
image_file.write(image_response.content)
|
438
|
-
|
439
399
|
# Update the src attribute of the image tag to point to the local file
|
440
400
|
image["src"] = image_filename
|
441
401
|
except (requests.RequestException, KeyError) as e:
|
442
402
|
print(f"Failed to process image {image_url}: {e}")
|
443
403
|
print(f"images were saved at\n{dir_save}")
|
444
|
-
|
404
|
+
if verbose:
|
405
|
+
display_thumbnail_figure(flist(dir_save,filter='img'),dpi=100)
|
445
406
|
return content
|
407
|
+
def svg_to_png(svg_file):
|
408
|
+
with WandImage(filename=svg_file, resolution=300) as img:
|
409
|
+
img.format = 'png'
|
410
|
+
png_image = img.make_blob()
|
411
|
+
return Image.open(io.BytesIO(png_image))
|
412
|
+
def display_thumbnail_figure(dir_img_list,figsize=(10,10),dpi=100):
|
413
|
+
import matplotlib.pyplot as plt
|
414
|
+
from PIL import Image
|
415
|
+
"""
|
416
|
+
Display a thumbnail figure of all images in the specified directory.
|
417
|
+
Args:
|
418
|
+
dir_img_list (list): List of the Directory containing the images.
|
419
|
+
"""
|
420
|
+
num_images = len(dir_img_list)
|
421
|
+
|
422
|
+
if num_images == 0:
|
423
|
+
print("No images found to display.")
|
424
|
+
return
|
425
|
+
|
426
|
+
# Determine grid size
|
427
|
+
grid_size = int(num_images ** 0.5) + 1
|
428
|
+
|
429
|
+
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
|
430
|
+
|
431
|
+
for ax, image_file in zip(axs.flatten(), dir_img_list):
|
432
|
+
try:
|
433
|
+
img = Image.open(image_file)
|
434
|
+
ax.imshow(img)
|
435
|
+
ax.axis('off') # Hide axes
|
436
|
+
except:
|
437
|
+
continue
|
438
|
+
# Hide remaining subplots
|
439
|
+
[ax.axis("off") for ax in axs.flatten()]
|
446
440
|
|
441
|
+
plt.tight_layout()
|
442
|
+
plt.show()
|
447
443
|
|
448
444
|
def content_div_class(content, div="div", div_class="highlight"):
|
449
445
|
texts = [div.text for div in content.find_all(div, class_=div_class)]
|
@@ -467,7 +463,7 @@ def fetch_selenium(
|
|
467
463
|
username_by=By.NAME,
|
468
464
|
password_by=By.NAME,
|
469
465
|
submit_by=By.NAME,
|
470
|
-
|
466
|
+
# capability='eager', # eager or none
|
471
467
|
proxy=None, # Add proxy parameter
|
472
468
|
javascript=True, # Add JavaScript option
|
473
469
|
disable_images=False, # Add option to disable images
|
@@ -479,21 +475,16 @@ def fetch_selenium(
|
|
479
475
|
chrome_options.add_argument("--no-sandbox")
|
480
476
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
481
477
|
chrome_options.add_argument(f"user-agent={user_agent()}")
|
482
|
-
|
483
478
|
if proxy:
|
484
479
|
chrome_options.add_argument(f'--proxy-server={proxy}')
|
485
|
-
|
486
480
|
if disable_images:
|
487
481
|
prefs = {"profile.managed_default_content_settings.images": 2}
|
488
482
|
chrome_options.add_experimental_option("prefs", prefs)
|
489
|
-
|
490
|
-
caps = DesiredCapabilities().CHROME
|
491
|
-
caps["pageLoadStrategy"] = "eager" # You can set this to "none" if needed
|
492
|
-
|
483
|
+
# chrome_options.page_load_strategy = capability
|
493
484
|
service = Service(ChromeDriverManager().install())
|
494
485
|
for attempt in range(retry):
|
495
486
|
try:
|
496
|
-
driver = webdriver.Chrome(service=service, options=chrome_options
|
487
|
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
497
488
|
|
498
489
|
if not javascript:
|
499
490
|
driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
|
@@ -540,36 +531,19 @@ def fetch_selenium(
|
|
540
531
|
|
541
532
|
|
542
533
|
def fetch(url, where="div", what=None, extend=True, booster=False,retry=2,verbose=False, **kws):
|
543
|
-
# for attempt in range(retry):
|
544
|
-
# if verbose and attempt==0:
|
545
|
-
# xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
546
|
-
# print(xample)
|
547
|
-
# content_type, content = fetch_all(url, parser="html.parser")
|
548
|
-
# texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
549
|
-
# if isinstance(texts,pd.core.frame.DataFrame):
|
550
|
-
# condition=[texts.empty, attempt != retry - 1]
|
551
|
-
# else:
|
552
|
-
# condition=[not texts, attempt != retry - 1]
|
553
|
-
# if all(condition):
|
554
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
555
|
-
# sleep(random.uniform(0.5, 1.5))
|
556
534
|
for attempt in range(retry):
|
557
535
|
if verbose and attempt==0:
|
558
536
|
xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
559
537
|
print(xample)
|
560
538
|
content_type, content = fetch_all(url, parser="html.parser")
|
561
539
|
texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
562
|
-
if isinstance(texts, pd.core.frame.DataFrame):
|
563
|
-
# condition=[texts.empty, attempt != retry - 1]
|
540
|
+
if isinstance(texts, pd.core.frame.DataFrame):
|
564
541
|
if not texts.empty:
|
565
542
|
break
|
566
|
-
else:
|
567
|
-
# condition=[not texts, attempt != retry - 1]
|
543
|
+
else:
|
568
544
|
if texts:
|
569
545
|
break
|
570
|
-
|
571
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
572
|
-
sleep(random.uniform(0.5, 1.5))
|
546
|
+
sleep(random.uniform(0.5, 1.5))
|
573
547
|
if isinstance(texts,pd.core.frame.DataFrame):
|
574
548
|
condition_=[texts.empty, booster]
|
575
549
|
else:
|
@@ -799,4 +773,68 @@ def find_all(url, dir_save=None):
|
|
799
773
|
else:
|
800
774
|
df.to_csv(dir_save)
|
801
775
|
print(f"file has been saved at\n{dir_save}")
|
802
|
-
return df
|
776
|
+
return df
|
777
|
+
|
778
|
+
|
779
|
+
def flist(fpath, filter="all"):
|
780
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
781
|
+
if isinstance(filter, list):
|
782
|
+
filt_files=[]
|
783
|
+
for filter_ in filter:
|
784
|
+
filt_files.extend(flist(fpath, filter_))
|
785
|
+
return filt_files
|
786
|
+
else:
|
787
|
+
if 'all' in filter.lower():
|
788
|
+
return all_files
|
789
|
+
else:
|
790
|
+
filt_files = [f for f in all_files if istype(f, filter)]
|
791
|
+
return filt_files
|
792
|
+
|
793
|
+
def istype(fpath, filter='img'):
|
794
|
+
"""
|
795
|
+
Filters file paths based on the specified filter.
|
796
|
+
Args:
|
797
|
+
fpath (str): Path to the file.
|
798
|
+
filter (str): filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
799
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
800
|
+
Returns:
|
801
|
+
bool: True if the file matches the filter, False otherwise.
|
802
|
+
"""
|
803
|
+
if 'img' in filter.lower():
|
804
|
+
return is_image(fpath)
|
805
|
+
elif 'doc' in filter.lower():
|
806
|
+
return is_document(fpath)
|
807
|
+
elif 'zip' in filter.lower():
|
808
|
+
return is_zip(fpath)
|
809
|
+
else:
|
810
|
+
return False
|
811
|
+
|
812
|
+
def is_image(fpath):
|
813
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
814
|
+
if mime_type and mime_type.startswith('image'):
|
815
|
+
return True
|
816
|
+
else:
|
817
|
+
return False
|
818
|
+
|
819
|
+
def is_document(fpath):
|
820
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
821
|
+
if mime_type and (
|
822
|
+
mime_type.startswith('text/') or
|
823
|
+
mime_type == 'application/pdf' or
|
824
|
+
mime_type == 'application/msword' or
|
825
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
826
|
+
mime_type == 'application/vnd.ms-excel' or
|
827
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
828
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
829
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
830
|
+
):
|
831
|
+
return True
|
832
|
+
else:
|
833
|
+
return False
|
834
|
+
|
835
|
+
def is_zip(fpath):
|
836
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
837
|
+
if mime_type == 'application/zip':
|
838
|
+
return True
|
839
|
+
else:
|
840
|
+
return False
|
Binary file
|
@@ -1,12 +1,149 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: py2ls
|
3
|
-
Version: 0.1.4.
|
3
|
+
Version: 0.1.4.5
|
4
4
|
Summary: py(thon)2(too)ls
|
5
5
|
Author: Jianfeng
|
6
6
|
Author-email: Jianfeng.Liu0413@gmail.com
|
7
|
-
Requires-Python: >=3.
|
7
|
+
Requires-Python: >=3.10,<4.0
|
8
8
|
Classifier: Programming Language :: Python :: 3
|
9
|
+
Classifier: Programming Language :: Python :: 3.10
|
10
|
+
Classifier: Programming Language :: Python :: 3.11
|
9
11
|
Classifier: Programming Language :: Python :: 3.12
|
12
|
+
Requires-Dist: Deprecated (>=1.2.14,<2.0.0)
|
13
|
+
Requires-Dist: Jinja2 (>=3.1.4,<4.0.0)
|
14
|
+
Requires-Dist: MarkupSafe (>=2.1.5,<3.0.0)
|
15
|
+
Requires-Dist: PyMatting (>=1.1.12,<2.0.0)
|
16
|
+
Requires-Dist: PyPDF2 (>=3.0.1,<4.0.0)
|
17
|
+
Requires-Dist: PySocks (>=1.7.1,<2.0.0)
|
18
|
+
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
19
|
+
Requires-Dist: Pygments (>=2.18.0,<3.0.0)
|
20
|
+
Requires-Dist: SciencePlots (>=2.1.1,<3.0.0)
|
21
|
+
Requires-Dist: appnope (>=0.1.4,<0.2.0)
|
22
|
+
Requires-Dist: appscript (>=1.2.5,<2.0.0)
|
23
|
+
Requires-Dist: asttokens (>=2.4.1,<3.0.0)
|
24
|
+
Requires-Dist: attrs (>=23.2.0,<24.0.0)
|
25
|
+
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
26
|
+
Requires-Dist: certifi (>=2024.6.2,<2025.0.0)
|
27
|
+
Requires-Dist: chardet (>=3.0.4,<4.0.0)
|
28
|
+
Requires-Dist: charset-normalizer (>=3.3.2,<4.0.0)
|
29
|
+
Requires-Dist: click (>=8.1.7,<9.0.0)
|
30
|
+
Requires-Dist: colorcet (>=3.1.0,<4.0.0)
|
31
|
+
Requires-Dist: coloredlogs (>=15.0.1,<16.0.0)
|
32
|
+
Requires-Dist: comm (>=0.2.2,<0.3.0)
|
33
|
+
Requires-Dist: contourpy (>=1.2.1,<2.0.0)
|
34
|
+
Requires-Dist: cycler (>=0.12.1,<0.13.0)
|
35
|
+
Requires-Dist: debugpy (>=1.8.1,<2.0.0)
|
36
|
+
Requires-Dist: decorator (>=5.1.1,<6.0.0)
|
37
|
+
Requires-Dist: defusedxml (>=0.7.1,<0.8.0)
|
38
|
+
Requires-Dist: docx (>=0.2.4,<0.3.0)
|
39
|
+
Requires-Dist: docx2pdf (>=0.1.8,<0.2.0)
|
40
|
+
Requires-Dist: executing (>=2.0.1,<3.0.0)
|
41
|
+
Requires-Dist: fake-useragent (>=1.5.1,<2.0.0)
|
42
|
+
Requires-Dist: flatbuffers (>=24.3.25,<25.0.0)
|
43
|
+
Requires-Dist: fonttools (>=4.53.0,<5.0.0)
|
44
|
+
Requires-Dist: fpdf (>=1.7.2,<2.0.0)
|
45
|
+
Requires-Dist: googletrans (>=4.0.0rc1,<5.0.0)
|
46
|
+
Requires-Dist: h11 (>=0.9.0,<0.10.0)
|
47
|
+
Requires-Dist: h2 (>=3.2.0,<4.0.0)
|
48
|
+
Requires-Dist: hpack (>=3.0.0,<4.0.0)
|
49
|
+
Requires-Dist: hstspreload (>=2024.6.1,<2025.0.0)
|
50
|
+
Requires-Dist: httpcore (>=0.9.1,<0.10.0)
|
51
|
+
Requires-Dist: httpx (>=0.13.3,<0.14.0)
|
52
|
+
Requires-Dist: humanfriendly (>=10.0,<11.0)
|
53
|
+
Requires-Dist: hyperframe (>=5.2.0,<6.0.0)
|
54
|
+
Requires-Dist: idna (>=2.10,<3.0)
|
55
|
+
Requires-Dist: imageio (>=2.34.1,<3.0.0)
|
56
|
+
Requires-Dist: img2pdf (>=0.5.1,<0.6.0)
|
57
|
+
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
58
|
+
Requires-Dist: ipython (>=8.25.0,<9.0.0) ; python_version >= "3.9" and python_version < "4.0"
|
59
|
+
Requires-Dist: jedi (>=0.19.1,<0.20.0)
|
60
|
+
Requires-Dist: joblib (>=1.3.2,<2.0.0)
|
61
|
+
Requires-Dist: jsonschema (>=4.22.0,<5.0.0)
|
62
|
+
Requires-Dist: jsonschema-specifications (>=2023.12.1,<2024.0.0)
|
63
|
+
Requires-Dist: jupyter_client (>=8.6.2,<9.0.0)
|
64
|
+
Requires-Dist: jupyter_core (>=5.7.2,<6.0.0)
|
65
|
+
Requires-Dist: kiwisolver (>=1.4.5,<2.0.0)
|
66
|
+
Requires-Dist: langdetect (>=1.0.9,<2.0.0)
|
67
|
+
Requires-Dist: lazy_loader (>=0.4,<0.5)
|
68
|
+
Requires-Dist: libretranslatepy (>=2.1.1,<3.0.0)
|
69
|
+
Requires-Dist: llvmlite (>=0.42.0,<0.43.0)
|
70
|
+
Requires-Dist: lxml (>=4.9.4,<5.0.0)
|
71
|
+
Requires-Dist: matplotlib (>=3.8.4,<4.0.0)
|
72
|
+
Requires-Dist: matplotlib-inline (>=0.1.7,<0.2.0)
|
73
|
+
Requires-Dist: mne (>=1.6.0,<2.0.0)
|
74
|
+
Requires-Dist: mpmath (>=1.3.0,<2.0.0)
|
75
|
+
Requires-Dist: nest-asyncio (>=1.6.0,<2.0.0)
|
76
|
+
Requires-Dist: networkx (>=3.3,<4.0) ; python_version >= "3.10" and python_version < "4.0"
|
77
|
+
Requires-Dist: nltk (>=3.8.1,<4.0.0)
|
78
|
+
Requires-Dist: numba (>=0.59.1,<0.60.0)
|
79
|
+
Requires-Dist: numerizer (>=0.2.3,<0.3.0)
|
80
|
+
Requires-Dist: numpy (>=1.26.4,<2.0.0)
|
81
|
+
Requires-Dist: onnxruntime (>=1.18.0,<2.0.0)
|
82
|
+
Requires-Dist: opencv-contrib-python (>=4.9.0.80,<5.0.0.0)
|
83
|
+
Requires-Dist: opencv-python (>=4.9.0.80,<5.0.0.0)
|
84
|
+
Requires-Dist: opencv-python-headless (>=4.9.0.80,<5.0.0.0)
|
85
|
+
Requires-Dist: outcome (>=1.3.0.post0,<2.0.0)
|
86
|
+
Requires-Dist: packaging (>=24.1,<25.0)
|
87
|
+
Requires-Dist: pandas (>=2.2.2,<3.0.0)
|
88
|
+
Requires-Dist: pandas-flavor (>=0.6.0,<0.7.0)
|
89
|
+
Requires-Dist: parso (>=0.8.4,<0.9.0)
|
90
|
+
Requires-Dist: patsy (>=0.5.6,<0.6.0)
|
91
|
+
Requires-Dist: pdf2image (>=1.17.0,<2.0.0)
|
92
|
+
Requires-Dist: pdf2img (>=0.1.2,<0.2.0)
|
93
|
+
Requires-Dist: pexpect (>=4.9.0,<5.0.0)
|
94
|
+
Requires-Dist: pikepdf (>=9.0.0,<10.0.0)
|
95
|
+
Requires-Dist: pillow (>=10.3.0,<11.0.0)
|
96
|
+
Requires-Dist: pingouin (>=0.5.4,<0.6.0)
|
97
|
+
Requires-Dist: platformdirs (>=4.2.2,<5.0.0)
|
98
|
+
Requires-Dist: pooch (>=1.8.2,<2.0.0)
|
99
|
+
Requires-Dist: prompt_toolkit (>=3.0.47,<4.0.0)
|
100
|
+
Requires-Dist: protobuf (>=5.27.1,<6.0.0)
|
101
|
+
Requires-Dist: psutil (>=5.9.8,<6.0.0)
|
102
|
+
Requires-Dist: ptyprocess (>=0.7.0,<0.8.0)
|
103
|
+
Requires-Dist: pure-eval (>=0.2.2,<0.3.0)
|
104
|
+
Requires-Dist: pyparsing (>=3.1.2,<4.0.0)
|
105
|
+
Requires-Dist: python-box (>=7.2.0,<8.0.0)
|
106
|
+
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0)
|
107
|
+
Requires-Dist: python-docx (>=1.1.0,<2.0.0)
|
108
|
+
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
109
|
+
Requires-Dist: pytz (>=2024.1,<2025.0)
|
110
|
+
Requires-Dist: pyzmq (>=26.0.3,<27.0.0)
|
111
|
+
Requires-Dist: referencing (>=0.35.1,<0.36.0)
|
112
|
+
Requires-Dist: regex (>=2024.5.15,<2025.0.0)
|
113
|
+
Requires-Dist: rembg (>=2.0.56,<3.0.0) ; python_version >= "3.9" and python_version < "3.13"
|
114
|
+
Requires-Dist: requests (>=2.32.3,<3.0.0)
|
115
|
+
Requires-Dist: rfc3986 (>=1.5.0,<2.0.0)
|
116
|
+
Requires-Dist: rpds-py (>=0.18.1,<0.19.0)
|
117
|
+
Requires-Dist: scikit-image (>=0.23.2,<0.24.0) ; python_version >= "3.10" and python_version < "4.0"
|
118
|
+
Requires-Dist: scikit-learn (>=1.3.2,<2.0.0)
|
119
|
+
Requires-Dist: scipy (>=1.13.1,<2.0.0)
|
120
|
+
Requires-Dist: seaborn (>=0.13.2,<0.14.0)
|
121
|
+
Requires-Dist: selenium (>=4.21.0,<5.0.0)
|
122
|
+
Requires-Dist: six (>=1.16.0,<2.0.0)
|
123
|
+
Requires-Dist: sniffio (>=1.3.1,<2.0.0)
|
124
|
+
Requires-Dist: sortedcontainers (>=2.4.0,<3.0.0)
|
125
|
+
Requires-Dist: soupsieve (>=2.5,<3.0)
|
126
|
+
Requires-Dist: stack-data (>=0.6.3,<0.7.0)
|
127
|
+
Requires-Dist: statsmodels (>=0.14.1,<0.15.0)
|
128
|
+
Requires-Dist: stem (>=1.8.2,<2.0.0)
|
129
|
+
Requires-Dist: sympy (>=1.12.1,<2.0.0)
|
130
|
+
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
131
|
+
Requires-Dist: threadpoolctl (>=3.5.0,<4.0.0)
|
132
|
+
Requires-Dist: tifffile (>=2024.5.22,<2025.0.0)
|
133
|
+
Requires-Dist: tornado (>=6.4.1,<7.0.0)
|
134
|
+
Requires-Dist: tqdm (>=4.66.4,<5.0.0)
|
135
|
+
Requires-Dist: traitlets (>=5.14.3,<6.0.0)
|
136
|
+
Requires-Dist: translate (>=3.6.1,<4.0.0)
|
137
|
+
Requires-Dist: trio (>=0.25.1,<0.26.0)
|
138
|
+
Requires-Dist: trio-websocket (>=0.11.1,<0.12.0)
|
139
|
+
Requires-Dist: typing_extensions (>=4.12.2,<5.0.0)
|
140
|
+
Requires-Dist: tzdata (>=2024.1,<2025.0)
|
141
|
+
Requires-Dist: urllib3 (>=2.2.1,<3.0.0)
|
142
|
+
Requires-Dist: wcwidth (>=0.2.13,<0.3.0)
|
143
|
+
Requires-Dist: webdriver-manager (>=4.0.1,<5.0.0)
|
144
|
+
Requires-Dist: wrapt (>=1.16.0,<2.0.0)
|
145
|
+
Requires-Dist: wsproto (>=1.2.0,<2.0.0)
|
146
|
+
Requires-Dist: xarray (>=2024.6.0,<2025.0.0)
|
10
147
|
Description-Content-Type: text/markdown
|
11
148
|
|
12
149
|
# Install
|