py2ls 0.1.4.1__py3-none-any.whl → 0.1.4.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py2ls/.git/COMMIT_EDITMSG +4 -1
- py2ls/.git/FETCH_HEAD +1 -1
- py2ls/.git/index +0 -0
- py2ls/.git/logs/HEAD +5 -0
- py2ls/.git/logs/refs/heads/main +5 -0
- py2ls/.git/logs/refs/remotes/origin/HEAD +18 -0
- py2ls/.git/logs/refs/remotes/origin/main +5 -0
- py2ls/.git/objects/1d/fe9d9633b24ea560354f4f93d39c6e5f163ea0 +0 -0
- py2ls/.git/objects/24/6b368b986f758630c46dc02b7fa512b53422f7 +0 -0
- py2ls/.git/objects/36/e56a361f526eafa59c5235a5c990bf288b5f9c +0 -0
- py2ls/.git/objects/3b/bd972aa7ad680858f8dfbd0f7fcd97756f0d6f +0 -0
- py2ls/.git/objects/3c/bbe5f4173d165127b9ad96119f1ec24c306ffc +0 -0
- py2ls/.git/objects/43/dbd49b2ee367c5434dd545e3b5795434f2ef0b +0 -0
- py2ls/.git/objects/48/a88fc5806305d0bb0755ee6801161b79696972 +2 -0
- py2ls/.git/objects/64/27a4edff08f93d98f511418423f09f2ab90bcd +0 -0
- py2ls/.git/objects/6b/7fde264d93a7a0986d394c46c7650d0ce2ab92 +0 -0
- py2ls/.git/objects/a7/3e13eafee65c5b8d73ad2d3ea46d0eee82f0d3 +0 -0
- py2ls/.git/objects/bb/934eb33bc1a8b85630bf680caffd99560c1b8f +0 -0
- py2ls/.git/objects/c6/7f17e5707313600efcb85e9a3fedea35dba591 +0 -0
- py2ls/.git/objects/cf/0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d +1 -0
- py2ls/.git/objects/db/ffa8ea7bda721d0cee7b9e4ce5b2ef927733ff +0 -0
- py2ls/.git/objects/f4/b64d3107b39e3ad6f540c6607004ea34e6c024 +0 -0
- py2ls/.git/refs/heads/main +1 -1
- py2ls/.git/refs/remotes/origin/main +1 -1
- py2ls/ips.py +121 -50
- py2ls/netfinder.py +176 -124
- {py2ls-0.1.4.1.dist-info → py2ls-0.1.4.4.dist-info}/METADATA +1 -1
- {py2ls-0.1.4.1.dist-info → py2ls-0.1.4.4.dist-info}/RECORD +29 -14
- {py2ls-0.1.4.1.dist-info → py2ls-0.1.4.4.dist-info}/WHEEL +0 -0
py2ls/.git/COMMIT_EDITMSG
CHANGED
py2ls/.git/FETCH_HEAD
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd branch 'main' of https://github.com/Jianfengliu0413/py2ls
|
py2ls/.git/index
CHANGED
Binary file
|
py2ls/.git/logs/HEAD
CHANGED
@@ -3,3 +3,8 @@ b056be4be89ba6b76949dd641df45bb7036050c8 d9c2403fd166ce791b4e9d0c6792ed8342c71fc
|
|
3
3
|
d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed Jianfeng <Jianfeng.Liu0413@gmail.com> 1718370666 +0200 commit: Update README.md
|
4
4
|
14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed 36ef43e50009e59db11812c258846d9e38718173 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718371326 +0200 commit: Update README.md
|
5
5
|
36ef43e50009e59db11812c258846d9e38718173 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng <Jianfeng.Liu0413@gmail.com> 1718392848 +0200 commit: readme
|
6
|
+
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393734 +0200 commit: Update ips.py
|
7
|
+
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491087 +0200 commit: Update netfinder.py
|
8
|
+
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
|
py2ls/.git/logs/refs/heads/main
CHANGED
@@ -3,3 +3,8 @@ b056be4be89ba6b76949dd641df45bb7036050c8 d9c2403fd166ce791b4e9d0c6792ed8342c71fc
|
|
3
3
|
d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed Jianfeng <Jianfeng.Liu0413@gmail.com> 1718370666 +0200 commit: Update README.md
|
4
4
|
14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed 36ef43e50009e59db11812c258846d9e38718173 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718371326 +0200 commit: Update README.md
|
5
5
|
36ef43e50009e59db11812c258846d9e38718173 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng <Jianfeng.Liu0413@gmail.com> 1718392848 +0200 commit: readme
|
6
|
+
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393734 +0200 commit: Update ips.py
|
7
|
+
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491087 +0200 commit: Update netfinder.py
|
8
|
+
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
|
@@ -6,3 +6,21 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd d9c2403fd166ce791b4e9d0c6792ed8342c71fc
|
|
6
6
|
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718390975 +0200 remote set-head
|
7
7
|
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392810 +0200 remote set-head
|
8
8
|
0b409e1bc918277010f5679b402d1d1dda53e15c 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392852 +0200 remote set-head
|
9
|
+
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718393737 +0200 remote set-head
|
10
|
+
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718491090 +0200 remote set-head
|
11
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718526960 +0200 remote set-head
|
12
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718529062 +0200 remote set-head
|
13
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718531693 +0200 remote set-head
|
14
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718533521 +0200 remote set-head
|
15
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718535485 +0200 remote set-head
|
16
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718537443 +0200 remote set-head
|
17
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718539464 +0200 remote set-head
|
18
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718541421 +0200 remote set-head
|
19
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718543586 +0200 remote set-head
|
20
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718545544 +0200 remote set-head
|
21
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718547501 +0200 remote set-head
|
22
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718549457 +0200 remote set-head
|
23
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718551415 +0200 remote set-head
|
24
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553371 +0200 remote set-head
|
25
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553465 +0200 remote set-head
|
26
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718555183 +0200 remote set-head
|
@@ -3,3 +3,8 @@ b056be4be89ba6b76949dd641df45bb7036050c8 d9c2403fd166ce791b4e9d0c6792ed8342c71fc
|
|
3
3
|
d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed Jianfeng <Jianfeng.Liu0413@gmail.com> 1718370668 +0200 update by push
|
4
4
|
14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed 36ef43e50009e59db11812c258846d9e38718173 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718371329 +0200 update by push
|
5
5
|
36ef43e50009e59db11812c258846d9e38718173 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng <Jianfeng.Liu0413@gmail.com> 1718392851 +0200 update by push
|
6
|
+
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393736 +0200 update by push
|
7
|
+
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491090 +0200 update by push
|
8
|
+
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526959 +0200 update by push
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553464 +0200 update by push
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555183 +0200 update by push
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
Binary file
|
@@ -0,0 +1 @@
|
|
1
|
+
x�NIj1�Y��{����4&�C�`�Qk,�4f�!������K-PU�Zk�,��F�����n&�MJd�u$7=eV�1�{ظu0>x�g�h5R��1[�RZ�d�a��u��Z���ʎF��RC��~�t�ic|�B�7;�] �B����O���~�~�klc���2�!^Q�
|
Binary file
|
Binary file
|
py2ls/.git/refs/heads/main
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd
|
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd
|
py2ls/ips.py
CHANGED
@@ -36,6 +36,7 @@ import time
|
|
36
36
|
from box import Box, BoxList
|
37
37
|
from numerizer import numerize
|
38
38
|
from tqdm import tqdm
|
39
|
+
import mimetypes
|
39
40
|
|
40
41
|
def str2num(s, *args):
|
41
42
|
delimiter = None
|
@@ -779,43 +780,14 @@ def dir_name(fpath):
|
|
779
780
|
def basename(fpath):
|
780
781
|
return os.path.basename(fpath)
|
781
782
|
|
782
|
-
def finfo(fpath):
|
783
|
-
fname, fmt = os.path.splitext(fpath)
|
784
|
-
dir_par = os.path.dirname(fpath) + '/'
|
785
|
-
data = {
|
786
|
-
"size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
|
787
|
-
"creation_time": time.ctime(os.path.getctime(fpath)),
|
788
|
-
"ctime": time.ctime(os.path.getctime(fpath)),
|
789
|
-
"mod_time": time.ctime(os.path.getmtime(fpath)),
|
790
|
-
"mtime": time.ctime(os.path.getmtime(fpath)),
|
791
|
-
"parent_dir": dir_par,
|
792
|
-
"fname": fname.replace(dir_par, ""),
|
793
|
-
"kind": fmt
|
794
|
-
}
|
795
|
-
extra_info = {}
|
796
|
-
if data["kind"] == ".pdf":
|
797
|
-
extra_info = pdfinfo_from_path(fpath)
|
798
|
-
|
799
|
-
return FileInfo(
|
800
|
-
size=data["size"],
|
801
|
-
creation_time=data["creation_time"],
|
802
|
-
ctime=data["ctime"],
|
803
|
-
mod_time=data["mod_time"],
|
804
|
-
mtime=data["mtime"],
|
805
|
-
parent_dir=data["parent_dir"],
|
806
|
-
fname=data["fname"],
|
807
|
-
kind=data["kind"],
|
808
|
-
extra_info=extra_info
|
809
|
-
)
|
810
|
-
|
811
783
|
def listdir(
|
812
784
|
rootdir,
|
813
785
|
kind="folder",
|
814
786
|
sort_by="name",
|
815
787
|
ascending=True,
|
816
788
|
contains=None,
|
817
|
-
orient
|
818
|
-
output=
|
789
|
+
orient="list",
|
790
|
+
output="df"
|
819
791
|
):
|
820
792
|
def sort_kind(df, by="name", ascending=True):
|
821
793
|
if df[by].dtype == 'object': # Check if the column contains string values
|
@@ -831,6 +803,69 @@ def listdir(
|
|
831
803
|
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
832
804
|
return sorted_df
|
833
805
|
|
806
|
+
def flist(fpath, filter="all"):
|
807
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
808
|
+
if isinstance(filter, list):
|
809
|
+
filt_files = []
|
810
|
+
for filter_ in filter:
|
811
|
+
filt_files.extend(flist(fpath, filter_))
|
812
|
+
return filt_files
|
813
|
+
else:
|
814
|
+
if 'all' in filter.lower():
|
815
|
+
return all_files
|
816
|
+
else:
|
817
|
+
filt_files = [f for f in all_files if istype(f, filter)]
|
818
|
+
return filt_files
|
819
|
+
|
820
|
+
def istype(fpath, filter='img'):
|
821
|
+
"""
|
822
|
+
Filters file paths based on the specified filter.
|
823
|
+
Args:
|
824
|
+
fpath (str): Path to the file.
|
825
|
+
filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
826
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
827
|
+
Returns:
|
828
|
+
bool: True if the file matches the filter, False otherwise.
|
829
|
+
"""
|
830
|
+
if 'img' in filter.lower():
|
831
|
+
return is_image(fpath)
|
832
|
+
elif 'doc' in filter.lower():
|
833
|
+
return is_document(fpath)
|
834
|
+
elif 'zip' in filter.lower():
|
835
|
+
return is_zip(fpath)
|
836
|
+
else:
|
837
|
+
return False
|
838
|
+
|
839
|
+
def is_image(fpath):
|
840
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
841
|
+
if mime_type and mime_type.startswith('image'):
|
842
|
+
return True
|
843
|
+
else:
|
844
|
+
return False
|
845
|
+
|
846
|
+
def is_document(fpath):
|
847
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
848
|
+
if mime_type and (
|
849
|
+
mime_type.startswith('text/') or
|
850
|
+
mime_type == 'application/pdf' or
|
851
|
+
mime_type == 'application/msword' or
|
852
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
853
|
+
mime_type == 'application/vnd.ms-excel' or
|
854
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
855
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
856
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
857
|
+
):
|
858
|
+
return True
|
859
|
+
else:
|
860
|
+
return False
|
861
|
+
|
862
|
+
def is_zip(fpath):
|
863
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
864
|
+
if mime_type == 'application/zip':
|
865
|
+
return True
|
866
|
+
else:
|
867
|
+
return False
|
868
|
+
|
834
869
|
if not kind.startswith("."):
|
835
870
|
kind = "." + kind
|
836
871
|
|
@@ -844,7 +879,7 @@ def listdir(
|
|
844
879
|
"path": [],
|
845
880
|
"created_time": [],
|
846
881
|
"modified_time": [],
|
847
|
-
"last_open_time":[],
|
882
|
+
"last_open_time": [],
|
848
883
|
"size": [],
|
849
884
|
"fname": [],
|
850
885
|
"fpath": [],
|
@@ -858,16 +893,20 @@ def listdir(
|
|
858
893
|
is_file = kind.lower() in file_extension.lower() and (
|
859
894
|
os.path.isfile(item_path)
|
860
895
|
)
|
861
|
-
if
|
862
|
-
|
896
|
+
if kind in ['.doc','.img','.zip']: #选择大的类别
|
897
|
+
if kind != ".folder" and not istype(item_path, kind):
|
898
|
+
continue
|
899
|
+
else: #精确到文件的后缀
|
900
|
+
if not is_folder and not is_file:
|
901
|
+
continue
|
863
902
|
f["name"].append(filename)
|
864
903
|
f["length"].append(len(filename))
|
865
904
|
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
866
|
-
fpath=os.path.join(os.path.dirname(item_path), item)
|
905
|
+
fpath = os.path.join(os.path.dirname(item_path), item)
|
867
906
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
868
|
-
f["created_time"].append(pd.to_datetime(os.path.getctime(item_path),unit='s'))
|
869
|
-
f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path),unit='s'))
|
870
|
-
f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path),unit='s'))
|
907
|
+
f["created_time"].append(pd.to_datetime(os.path.getctime(item_path), unit='s'))
|
908
|
+
f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path), unit='s'))
|
909
|
+
f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path), unit='s'))
|
871
910
|
f["fname"].append(filename) # will be removed
|
872
911
|
f["fpath"].append(fpath) # will be removed
|
873
912
|
i += 1
|
@@ -880,32 +919,35 @@ def listdir(
|
|
880
919
|
'The directory "{}" does NOT exist. Please check the directory "rootdir".'.format(
|
881
920
|
rootdir
|
882
921
|
)
|
883
|
-
)
|
922
|
+
)
|
923
|
+
|
884
924
|
f = pd.DataFrame(f)
|
925
|
+
|
885
926
|
if contains is not None:
|
886
|
-
f = f[f["name"].str.contains(contains,case=False)]
|
927
|
+
f = f[f["name"].str.contains(contains, case=False)]
|
928
|
+
|
887
929
|
if "nam" in sort_by.lower():
|
888
|
-
|
889
|
-
f=sort_kind(f, by="name", ascending=ascending)
|
930
|
+
f = sort_kind(f, by="name", ascending=ascending)
|
890
931
|
elif "crea" in sort_by.lower():
|
891
|
-
f=sort_kind(f, by="created_time", ascending=ascending)
|
932
|
+
f = sort_kind(f, by="created_time", ascending=ascending)
|
892
933
|
elif "modi" in sort_by.lower():
|
893
|
-
f=sort_kind(f, by="modified_time", ascending=ascending)
|
934
|
+
f = sort_kind(f, by="modified_time", ascending=ascending)
|
894
935
|
elif "s" in sort_by.lower() and "z" in sort_by.lower():
|
895
|
-
f=sort_kind(f, by="size", ascending=ascending)
|
936
|
+
f = sort_kind(f, by="size", ascending=ascending)
|
937
|
+
|
896
938
|
if 'df' in output:
|
897
939
|
return f
|
898
940
|
else:
|
899
|
-
if 'l' in orient.lower():
|
941
|
+
if 'l' in orient.lower(): # list # default
|
900
942
|
res_output = Box(f.to_dict(orient="list"))
|
901
943
|
return res_output
|
902
|
-
if 'd' in orient.lower():
|
944
|
+
if 'd' in orient.lower(): # dict
|
903
945
|
return Box(f.to_dict(orient="dict"))
|
904
|
-
if 'r' in orient.lower():
|
946
|
+
if 'r' in orient.lower(): # records
|
905
947
|
return Box(f.to_dict(orient="records"))
|
906
|
-
if 'in' in orient.lower():
|
948
|
+
if 'in' in orient.lower(): # records
|
907
949
|
return Box(f.to_dict(orient="index"))
|
908
|
-
if 'se' in orient.lower():
|
950
|
+
if 'se' in orient.lower(): # records
|
909
951
|
return Box(f.to_dict(orient="series"))
|
910
952
|
|
911
953
|
# Example usage:
|
@@ -2841,3 +2883,32 @@ class FileInfo:
|
|
2841
2883
|
"kind": self.kind,
|
2842
2884
|
**{key: getattr(self, key) for key in vars(self) if key not in ["size", "creation_time", "ctime", "mod_time", "mtime", "parent_dir", "fname", "kind"]}
|
2843
2885
|
}
|
2886
|
+
|
2887
|
+
def finfo(fpath):
|
2888
|
+
fname, fmt = os.path.splitext(fpath)
|
2889
|
+
dir_par = os.path.dirname(fpath) + '/'
|
2890
|
+
data = {
|
2891
|
+
"size": round(os.path.getsize(fpath) / 1024 / 1024, 3),
|
2892
|
+
"creation_time": time.ctime(os.path.getctime(fpath)),
|
2893
|
+
"ctime": time.ctime(os.path.getctime(fpath)),
|
2894
|
+
"mod_time": time.ctime(os.path.getmtime(fpath)),
|
2895
|
+
"mtime": time.ctime(os.path.getmtime(fpath)),
|
2896
|
+
"parent_dir": dir_par,
|
2897
|
+
"fname": fname.replace(dir_par, ""),
|
2898
|
+
"kind": fmt
|
2899
|
+
}
|
2900
|
+
extra_info = {}
|
2901
|
+
if data["kind"] == ".pdf":
|
2902
|
+
extra_info = pdfinfo_from_path(fpath)
|
2903
|
+
|
2904
|
+
return FileInfo(
|
2905
|
+
size=data["size"],
|
2906
|
+
creation_time=data["creation_time"],
|
2907
|
+
ctime=data["ctime"],
|
2908
|
+
mod_time=data["mod_time"],
|
2909
|
+
mtime=data["mtime"],
|
2910
|
+
parent_dir=data["parent_dir"],
|
2911
|
+
fname=data["fname"],
|
2912
|
+
kind=data["kind"],
|
2913
|
+
extra_info=extra_info
|
2914
|
+
)
|
py2ls/netfinder.py
CHANGED
@@ -20,7 +20,9 @@ from selenium.webdriver.chrome.options import Options
|
|
20
20
|
from selenium.webdriver.support.ui import WebDriverWait
|
21
21
|
from selenium.webdriver.support import expected_conditions as EC
|
22
22
|
from webdriver_manager.chrome import ChromeDriverManager
|
23
|
-
|
23
|
+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
24
|
+
import pprint
|
25
|
+
import mimetypes
|
24
26
|
|
25
27
|
# Set up logging
|
26
28
|
logging.basicConfig(level=logging.INFO)
|
@@ -42,61 +44,22 @@ def user_agent(browsers=["chrome", "edge", "firefox", "safari"], platforms=["pc"
|
|
42
44
|
output_ua = ua.random
|
43
45
|
if verbose:
|
44
46
|
print(output_ua)
|
45
|
-
return output_ua
|
46
|
-
# def extract_text_from_content(content,where,what,extend=False):
|
47
|
-
# if extend:
|
48
|
-
# texts = ""
|
49
|
-
|
50
|
-
# def extract_text(element):
|
51
|
-
# nonlocal texts
|
52
|
-
# if isinstance(element, str) and element.strip():
|
53
|
-
# texts += element.strip()
|
54
|
-
# elif hasattr(element, "children"):
|
55
|
-
# for child in element.children:
|
56
|
-
# extract_text(child)
|
57
|
-
|
58
|
-
# result_set = (
|
59
|
-
# content.find_all(where, class_=what)
|
60
|
-
# if what
|
61
|
-
# else content.find_all(where)
|
62
|
-
# )
|
63
|
-
# for tag in result_set:
|
64
|
-
# extract_text(tag)
|
65
|
-
|
66
|
-
# text = [tx for tx in texts.split("\n") if tx]
|
67
|
-
# return text
|
68
|
-
# else:
|
69
|
-
# result_set = (
|
70
|
-
# content.find_all(where, class_=what)
|
71
|
-
# if what
|
72
|
-
# else content.find_all(where)
|
73
|
-
# )
|
74
|
-
# texts_ = " ".join(tag.get_text() + "\n" for tag in result_set)
|
75
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx]
|
76
|
-
# return texts
|
77
|
-
# def extract_text_from_content(content, where, what=None, extend=True):
|
78
|
-
# if extend:
|
79
|
-
# def extract_text(element):
|
80
|
-
# texts = ""
|
81
|
-
# if isinstance(element, str) and element.strip():
|
82
|
-
# texts += element.strip()
|
83
|
-
# elif hasattr(element, "children"):
|
84
|
-
# for child in element.children:
|
85
|
-
# texts += extract_text(child)
|
86
|
-
# return texts
|
87
|
-
|
88
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
89
|
-
# texts = ""
|
90
|
-
# for tag in result_set:
|
91
|
-
# texts += extract_text(tag) + "\n"
|
92
|
-
# text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
93
|
-
# return text_list
|
94
|
-
# else:
|
95
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
96
|
-
# texts_ = " ".join(tag.get_text() for tag in result_set)
|
97
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
98
|
-
# return texts
|
47
|
+
return output_ua
|
99
48
|
def extract_text_from_content(content, content_type="text/html", where=None, what=None, extend=True, **kwargs):
|
49
|
+
"""
|
50
|
+
Extracts text from the given content based on the specified content type and search criteria.
|
51
|
+
|
52
|
+
Parameters:
|
53
|
+
- content (str/BeautifulSoup): The content to extract text from.
|
54
|
+
- content_type (str): The type of content, e.g., "text/html" or "application/json".
|
55
|
+
- where (str/list): The HTML tag or list of tags to search for.
|
56
|
+
- what (str): The class name to filter the tags (optional).
|
57
|
+
- extend (bool): Whether to recursively extract text from child elements.
|
58
|
+
- **kwargs: Additional keyword arguments for the search (e.g., id, attributes).
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
- list: A list of extracted text segments.
|
62
|
+
"""
|
100
63
|
if content is None:
|
101
64
|
logger.error("Content is None, cannot extract text.")
|
102
65
|
return []
|
@@ -109,35 +72,41 @@ def extract_text_from_content(content, content_type="text/html", where=None, wha
|
|
109
72
|
where = None
|
110
73
|
return extract_text_from_json(content, where)
|
111
74
|
elif 'text' in content_type:
|
112
|
-
if
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
elif hasattr(element, "children"):
|
118
|
-
for child in element.children:
|
119
|
-
texts += extract_text(child)
|
120
|
-
return texts
|
121
|
-
|
122
|
-
search_kwargs = {**kwargs}
|
123
|
-
if what:
|
124
|
-
search_kwargs["class_"] = what
|
125
|
-
|
126
|
-
result_set = content.find_all(where, **search_kwargs)
|
127
|
-
texts = ""
|
128
|
-
for tag in result_set:
|
129
|
-
texts += extract_text(tag) + "\n"
|
130
|
-
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
131
|
-
return text_list
|
75
|
+
if isinstance(where, list):
|
76
|
+
res=[]
|
77
|
+
for where_ in where:
|
78
|
+
res.extend(extract_text_from_content(content, content_type="text/html", where=where_, what=what, extend=extend, **kwargs))
|
79
|
+
return res
|
132
80
|
else:
|
133
|
-
|
134
|
-
|
135
|
-
|
81
|
+
if extend:
|
82
|
+
def extract_text(element):
|
83
|
+
texts = ""
|
84
|
+
if isinstance(element, str) and element.strip():
|
85
|
+
texts += element.strip()
|
86
|
+
elif hasattr(element, "children"):
|
87
|
+
for child in element.children:
|
88
|
+
texts += extract_text(child)
|
89
|
+
return texts
|
90
|
+
|
91
|
+
search_kwargs = {**kwargs}
|
92
|
+
if what:
|
93
|
+
search_kwargs["class_"] = what
|
94
|
+
|
95
|
+
result_set = content.find_all(where, **search_kwargs)
|
96
|
+
texts = ""
|
97
|
+
for tag in result_set:
|
98
|
+
texts += extract_text(tag) + "\n"
|
99
|
+
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
100
|
+
return text_list
|
101
|
+
else:
|
102
|
+
search_kwargs = {**kwargs}
|
103
|
+
if what:
|
104
|
+
search_kwargs["class_"] = what
|
136
105
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
106
|
+
result_set = content.find_all(where, **search_kwargs)
|
107
|
+
texts_ = " ".join(tag.get_text() for tag in result_set)
|
108
|
+
texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
109
|
+
return texts
|
141
110
|
|
142
111
|
def extract_text_from_json(content, key=None):
|
143
112
|
if key:
|
@@ -344,7 +313,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
|
|
344
313
|
pdf_links = filter_links(links=links_all, contains=["pdf"])
|
345
314
|
|
346
315
|
if pdf_links:
|
347
|
-
|
316
|
+
pprint.pp(f"pdf detected\n{pdf_links}")
|
348
317
|
else:
|
349
318
|
print('no pdf file')
|
350
319
|
if dir_save:
|
@@ -366,7 +335,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
|
|
366
335
|
print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
|
367
336
|
|
368
337
|
|
369
|
-
def find_img(url, dir_save="images"):
|
338
|
+
def find_img(url, dir_save="images", verbose=True):
|
370
339
|
"""
|
371
340
|
Save images referenced in HTML content locally.
|
372
341
|
Args:
|
@@ -381,7 +350,6 @@ def find_img(url, dir_save="images"):
|
|
381
350
|
if "html" in content_type.lower():
|
382
351
|
# Create the directory if it doesn't exist
|
383
352
|
os.makedirs(dir_save, exist_ok=True)
|
384
|
-
|
385
353
|
# Parse HTML content if it's not already a BeautifulSoup object
|
386
354
|
if isinstance(content, str):
|
387
355
|
content = BeautifulSoup(content, "html.parser")
|
@@ -390,13 +358,9 @@ def find_img(url, dir_save="images"):
|
|
390
358
|
images = content.find_all("img", src=True)
|
391
359
|
for i, image in enumerate(images):
|
392
360
|
try:
|
393
|
-
# Get the image URL
|
394
361
|
image_url = image["src"]
|
395
|
-
|
396
362
|
if image_url.startswith("data:image"):
|
397
|
-
# Extract the image data from the data URI
|
398
363
|
mime_type, base64_data = image_url.split(",", 1)
|
399
|
-
# Determine the file extension from the MIME type
|
400
364
|
if ":" in mime_type:
|
401
365
|
# image_extension = mime_type.split(":")[1].split(";")[0]
|
402
366
|
image_extension = (
|
@@ -406,51 +370,74 @@ def find_img(url, dir_save="images"):
|
|
406
370
|
image_extension = (
|
407
371
|
"png" # Default to PNG if extension is not specified
|
408
372
|
)
|
409
|
-
# if 'svg+xml' in image_extension:
|
410
|
-
# image_extension='svg'
|
411
373
|
image_data = base64.b64decode(base64_data)
|
412
|
-
# Save the image data to a file
|
413
374
|
image_filename = os.path.join(
|
414
375
|
dir_save, f"image_{i}.{image_extension}"
|
415
376
|
)
|
416
377
|
with open(image_filename, "wb") as image_file:
|
417
378
|
image_file.write(image_data)
|
418
|
-
|
419
|
-
# Update the src attribute of the image tag to point to the local file
|
420
379
|
image["src"] = image_filename
|
380
|
+
if verbose:
|
381
|
+
plt.imshow(image_data)
|
421
382
|
else:
|
422
383
|
# Construct the absolute image URL
|
423
384
|
absolute_image_url = urljoin(url, image_url)
|
424
|
-
|
425
385
|
# Parse the image URL to extract the file extension
|
426
386
|
parsed_url = urlparse(absolute_image_url)
|
427
387
|
image_extension = os.path.splitext(parsed_url.path)[1]
|
428
|
-
|
429
388
|
# Download the image
|
430
389
|
image_response = requests.get(absolute_image_url,proxies=proxies_glob)
|
431
|
-
|
432
390
|
# Save the image to a file
|
433
391
|
image_filename = os.path.join(
|
434
392
|
dir_save, f"image_{i}{image_extension}"
|
435
393
|
)
|
436
394
|
with open(image_filename, "wb") as image_file:
|
437
395
|
image_file.write(image_response.content)
|
438
|
-
|
439
396
|
# Update the src attribute of the image tag to point to the local file
|
440
397
|
image["src"] = image_filename
|
441
398
|
except (requests.RequestException, KeyError) as e:
|
442
399
|
print(f"Failed to process image {image_url}: {e}")
|
443
400
|
print(f"images were saved at\n{dir_save}")
|
444
|
-
|
401
|
+
if verbose:
|
402
|
+
display_thumbnail_figure(flist(dir_save,filter='img'),dpi=200)
|
445
403
|
return content
|
446
404
|
|
405
|
+
def display_thumbnail_figure(dir_img_list,figsize=(10,10),dpi=100):
|
406
|
+
import matplotlib.pyplot as plt
|
407
|
+
from PIL import Image
|
408
|
+
"""
|
409
|
+
Display a thumbnail figure of all images in the specified directory.
|
410
|
+
Args:
|
411
|
+
dir_img_list (list): List of the Directory containing the images.
|
412
|
+
"""
|
413
|
+
num_images = len(dir_img_list)
|
414
|
+
|
415
|
+
if num_images == 0:
|
416
|
+
print("No images found to display.")
|
417
|
+
return
|
418
|
+
|
419
|
+
# Determine grid size
|
420
|
+
grid_size = int(num_images ** 0.5) + 1
|
421
|
+
|
422
|
+
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
|
423
|
+
|
424
|
+
for ax, image_file in zip(axs.flatten(), dir_img_list):
|
425
|
+
img = Image.open(image_file)
|
426
|
+
ax.imshow(img)
|
427
|
+
ax.axis('off') # Hide axes
|
428
|
+
|
429
|
+
# Hide remaining subplots
|
430
|
+
for ax in axs.flatten()[num_images:]:
|
431
|
+
ax.axis('off')
|
432
|
+
|
433
|
+
plt.tight_layout()
|
434
|
+
plt.show()
|
447
435
|
|
448
436
|
def content_div_class(content, div="div", div_class="highlight"):
|
449
437
|
texts = [div.text for div in content.find_all(div, class_=div_class)]
|
450
438
|
return texts
|
451
439
|
|
452
440
|
|
453
|
-
|
454
441
|
def fetch_selenium(
|
455
442
|
url,
|
456
443
|
where="div",
|
@@ -468,7 +455,11 @@ def fetch_selenium(
|
|
468
455
|
username_by=By.NAME,
|
469
456
|
password_by=By.NAME,
|
470
457
|
submit_by=By.NAME,
|
458
|
+
# capability='eager', # eager or none
|
471
459
|
proxy=None, # Add proxy parameter
|
460
|
+
javascript=True, # Add JavaScript option
|
461
|
+
disable_images=False, # Add option to disable images
|
462
|
+
iframe_name=None, # Add option to handle iframe
|
472
463
|
**kwargs
|
473
464
|
):
|
474
465
|
chrome_options = Options()
|
@@ -478,11 +469,18 @@ def fetch_selenium(
|
|
478
469
|
chrome_options.add_argument(f"user-agent={user_agent()}")
|
479
470
|
if proxy:
|
480
471
|
chrome_options.add_argument(f'--proxy-server={proxy}')
|
481
|
-
|
482
|
-
|
472
|
+
if disable_images:
|
473
|
+
prefs = {"profile.managed_default_content_settings.images": 2}
|
474
|
+
chrome_options.add_experimental_option("prefs", prefs)
|
475
|
+
# chrome_options.page_load_strategy = capability
|
476
|
+
service = Service(ChromeDriverManager().install())
|
483
477
|
for attempt in range(retry):
|
484
478
|
try:
|
485
479
|
driver = webdriver.Chrome(service=service, options=chrome_options)
|
480
|
+
|
481
|
+
if not javascript:
|
482
|
+
driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
|
483
|
+
|
486
484
|
if login_url:
|
487
485
|
driver.get(login_url)
|
488
486
|
WebDriverWait(driver, timeout).until(
|
@@ -496,6 +494,13 @@ def fetch_selenium(
|
|
496
494
|
).click()
|
497
495
|
|
498
496
|
driver.get(url)
|
497
|
+
|
498
|
+
if iframe_name:
|
499
|
+
iframe = WebDriverWait(driver, timeout).until(
|
500
|
+
EC.presence_of_element_located((By.NAME, iframe_name))
|
501
|
+
)
|
502
|
+
driver.switch_to.frame(iframe)
|
503
|
+
|
499
504
|
WebDriverWait(driver, timeout).until(
|
500
505
|
EC.presence_of_element_located((by, where))
|
501
506
|
)
|
@@ -503,7 +508,7 @@ def fetch_selenium(
|
|
503
508
|
driver.quit()
|
504
509
|
|
505
510
|
content = BeautifulSoup(page_source, "html.parser")
|
506
|
-
texts=extract_text_from_content(content, where=where,what=what,extend=extend
|
511
|
+
texts = extract_text_from_content(content, where=where, what=what, extend=extend, **kwargs)
|
507
512
|
return texts
|
508
513
|
except Exception as e:
|
509
514
|
# logger.error(f"Attempt {attempt + 1} failed with error ")
|
@@ -518,36 +523,19 @@ def fetch_selenium(
|
|
518
523
|
|
519
524
|
|
520
525
|
def fetch(url, where="div", what=None, extend=True, booster=False,retry=2,verbose=False, **kws):
|
521
|
-
# for attempt in range(retry):
|
522
|
-
# if verbose and attempt==0:
|
523
|
-
# xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
524
|
-
# print(xample)
|
525
|
-
# content_type, content = fetch_all(url, parser="html.parser")
|
526
|
-
# texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
527
|
-
# if isinstance(texts,pd.core.frame.DataFrame):
|
528
|
-
# condition=[texts.empty, attempt != retry - 1]
|
529
|
-
# else:
|
530
|
-
# condition=[not texts, attempt != retry - 1]
|
531
|
-
# if all(condition):
|
532
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
533
|
-
# sleep(random.uniform(0.5, 1.5))
|
534
526
|
for attempt in range(retry):
|
535
527
|
if verbose and attempt==0:
|
536
528
|
xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
537
529
|
print(xample)
|
538
530
|
content_type, content = fetch_all(url, parser="html.parser")
|
539
531
|
texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
540
|
-
if isinstance(texts, pd.core.frame.DataFrame):
|
541
|
-
# condition=[texts.empty, attempt != retry - 1]
|
532
|
+
if isinstance(texts, pd.core.frame.DataFrame):
|
542
533
|
if not texts.empty:
|
543
534
|
break
|
544
|
-
else:
|
545
|
-
# condition=[not texts, attempt != retry - 1]
|
535
|
+
else:
|
546
536
|
if texts:
|
547
537
|
break
|
548
|
-
|
549
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
550
|
-
sleep(random.uniform(0.5, 1.5))
|
538
|
+
sleep(random.uniform(0.5, 1.5))
|
551
539
|
if isinstance(texts,pd.core.frame.DataFrame):
|
552
540
|
condition_=[texts.empty, booster]
|
553
541
|
else:
|
@@ -777,4 +765,68 @@ def find_all(url, dir_save=None):
|
|
777
765
|
else:
|
778
766
|
df.to_csv(dir_save)
|
779
767
|
print(f"file has been saved at\n{dir_save}")
|
780
|
-
return df
|
768
|
+
return df
|
769
|
+
|
770
|
+
|
771
|
+
def flist(fpath, filter="all"):
|
772
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
773
|
+
if isinstance(filter, list):
|
774
|
+
filt_files=[]
|
775
|
+
for filter_ in filter:
|
776
|
+
filt_files.extend(flist(fpath, filter_))
|
777
|
+
return filt_files
|
778
|
+
else:
|
779
|
+
if 'all' in filter.lower():
|
780
|
+
return all_files
|
781
|
+
else:
|
782
|
+
filt_files = [f for f in all_files if istype(f, filter)]
|
783
|
+
return filt_files
|
784
|
+
|
785
|
+
def istype(fpath, filter='img'):
|
786
|
+
"""
|
787
|
+
Filters file paths based on the specified filter.
|
788
|
+
Args:
|
789
|
+
fpath (str): Path to the file.
|
790
|
+
filter (str): filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
791
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
792
|
+
Returns:
|
793
|
+
bool: True if the file matches the filter, False otherwise.
|
794
|
+
"""
|
795
|
+
if 'img' in filter.lower():
|
796
|
+
return is_image(fpath)
|
797
|
+
elif 'doc' in filter.lower():
|
798
|
+
return is_document(fpath)
|
799
|
+
elif 'zip' in filter.lower():
|
800
|
+
return is_zip(fpath)
|
801
|
+
else:
|
802
|
+
return False
|
803
|
+
|
804
|
+
def is_image(fpath):
|
805
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
806
|
+
if mime_type and mime_type.startswith('image'):
|
807
|
+
return True
|
808
|
+
else:
|
809
|
+
return False
|
810
|
+
|
811
|
+
def is_document(fpath):
|
812
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
813
|
+
if mime_type and (
|
814
|
+
mime_type.startswith('text/') or
|
815
|
+
mime_type == 'application/pdf' or
|
816
|
+
mime_type == 'application/msword' or
|
817
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
818
|
+
mime_type == 'application/vnd.ms-excel' or
|
819
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
820
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
821
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
822
|
+
):
|
823
|
+
return True
|
824
|
+
else:
|
825
|
+
return False
|
826
|
+
|
827
|
+
def is_zip(fpath):
|
828
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
829
|
+
if mime_type == 'application/zip':
|
830
|
+
return True
|
831
|
+
else:
|
832
|
+
return False
|
@@ -1,5 +1,5 @@
|
|
1
|
-
py2ls/.git/COMMIT_EDITMSG,sha256=
|
2
|
-
py2ls/.git/FETCH_HEAD,sha256=
|
1
|
+
py2ls/.git/COMMIT_EDITMSG,sha256=E6ggbgaSjmHbt33GuQe6KX5ipK0Dp29QiuxxpBkqm60,131
|
2
|
+
py2ls/.git/FETCH_HEAD,sha256=Cdzu13JkjE_s_0YvmkAIac4l2WuokfA-YimhsDkYo1Q,100
|
3
3
|
py2ls/.git/HEAD,sha256=KNJb-Cr0wOK3L1CVmyvrhZ4-YLljCl6MYD2tTdsrboA,21
|
4
4
|
py2ls/.git/config,sha256=XswTg1Ts7_7IBDlKHh4OF_0Tq7v4wW7BXb6xSVInSec,345
|
5
5
|
py2ls/.git/description,sha256=ZzMxc0Ca26m45Twn1DDnOHqin5VHEZ9uOTBrScIXSjE,16
|
@@ -16,24 +16,33 @@ py2ls/.git/hooks/pre-receive.sample,sha256=pMPSuce7P9jRRBwxvU7nGlldZrRPz0ndsxAlI
|
|
16
16
|
py2ls/.git/hooks/prepare-commit-msg.sample,sha256=6d3KpBif3dJe2X_Ix4nsp7bKFjkLI5KuMnbwyOGqRhk,1492
|
17
17
|
py2ls/.git/hooks/push-to-checkout.sample,sha256=pT0HQXmLKHxt16-mSu5HPzBeZdP0lGO7nXQI7DsSv18,2783
|
18
18
|
py2ls/.git/hooks/update.sample,sha256=jV8vqD4QPPCLV-qmdSHfkZT0XL28s32lKtWGCXoU0QY,3650
|
19
|
-
py2ls/.git/index,sha256=
|
19
|
+
py2ls/.git/index,sha256=Bxygoi0rCfEqm0Lnpaba5ZsAFg_NS8TYUbR2o4uwnnI,1338
|
20
20
|
py2ls/.git/info/exclude,sha256=ZnH-g7egfIky7okWTR8nk7IxgFjri5jcXAbuClo7DsE,240
|
21
|
-
py2ls/.git/logs/HEAD,sha256=
|
22
|
-
py2ls/.git/logs/refs/heads/main,sha256=
|
23
|
-
py2ls/.git/logs/refs/remotes/origin/HEAD,sha256=
|
24
|
-
py2ls/.git/logs/refs/remotes/origin/main,sha256=
|
21
|
+
py2ls/.git/logs/HEAD,sha256=Wiu2pJYW-hD-oZ452dEJyYjYf9wfjUjvJONH7wAi8Ug,1638
|
22
|
+
py2ls/.git/logs/refs/heads/main,sha256=Wiu2pJYW-hD-oZ452dEJyYjYf9wfjUjvJONH7wAi8Ug,1638
|
23
|
+
py2ls/.git/logs/refs/remotes/origin/HEAD,sha256=032ZeLPOt_lRcwk8gTcVOzQd81zTAkkTk9P1PtPr6Aw,4212
|
24
|
+
py2ls/.git/logs/refs/remotes/origin/main,sha256=YU2rplaPYPyRcfSuRQ65bFZ8oQwKogJrBlSXYhD9Pa4,1520
|
25
25
|
py2ls/.git/objects/0b/409e1bc918277010f5679b402d1d1dda53e15c,sha256=y5S1XaGxJz1NXi-SPWjPC_NKIqqSbZv9oOg74MzBihY,156
|
26
26
|
py2ls/.git/objects/14/449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed,sha256=PomZFmCUCQM1ii0wH-OJGSHLQCTqRtIwE5w3C0TtzSY,171
|
27
27
|
py2ls/.git/objects/15/a8e468aacfcb440e090020f36d0b985d45da23,sha256=xiRunMcN5I_B2hHgBUFupR-F0b8H_CQTmmAZG9XkZik,3215
|
28
|
+
py2ls/.git/objects/1d/fe9d9633b24ea560354f4f93d39c6e5f163ea0,sha256=mV_84wLqIitnSYmzfrNpTzwVP9AmksiRI0Fjltwl0Pg,8872
|
29
|
+
py2ls/.git/objects/24/6b368b986f758630c46dc02b7fa512b53422f7,sha256=sw7ERFCFu7m6fnURAqQfQ4GWShaARr-Vc6GRnlOPkxU,8512
|
28
30
|
py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46,sha256=4ic5vOwEdfbGL8oARSVEeAnSoDs14-gggGZEL-61nYE,564
|
29
31
|
py2ls/.git/objects/30/a2f8da47ee947811dc8d993f5a06a45de374f4,sha256=u5W33_qNtTs1-U8Fardx-zB_udqKvuCm5kiw1mQGdsU,3218
|
30
32
|
py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971,sha256=X18sHo17gQTxusNptoILoWxSfU0ulrsZvbETlBtV5aY,2327
|
33
|
+
py2ls/.git/objects/36/e56a361f526eafa59c5235a5c990bf288b5f9c,sha256=7L1L-iqVvuufrlfEE2myD0-QnAPueFMySKetu08W-Pc,34216
|
31
34
|
py2ls/.git/objects/36/ef43e50009e59db11812c258846d9e38718173,sha256=0nwCwQSuQAdGyD2NfEK-_L12ydE9nGVKBXOfFq_Lndc,169
|
35
|
+
py2ls/.git/objects/3b/bd972aa7ad680858f8dfbd0f7fcd97756f0d6f,sha256=MQWEzML3wbb4GixiHDCHrxgbXLQitrYDstT1plhmQSU,169
|
36
|
+
py2ls/.git/objects/3c/bbe5f4173d165127b9ad96119f1ec24c306ffc,sha256=S1BXemROYtzRaj5WXLPYnTmPTBQDKovMEN0GRLul-I4,33489
|
32
37
|
py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9,sha256=91oqbTWfUE1d_hT_1ptYmRUb5pOQ1X4oxQxpF6NXjKU,8501
|
38
|
+
py2ls/.git/objects/43/dbd49b2ee367c5434dd545e3b5795434f2ef0b,sha256=DAzt0dWp2KsuuImCKp7N9ia7KaCDNqwB-tYIx3Wf_c0,565
|
39
|
+
py2ls/.git/objects/48/a88fc5806305d0bb0755ee6801161b79696972,sha256=f3JStE39k_hPGE-WRwqZtDTjQkfOmBVb_6-ELBbScjI,203
|
33
40
|
py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2,sha256=3Pf6NS8OTK4EdHZGVeJ421BtK7w4WJncQDBauZI_wW4,34
|
34
41
|
py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a,sha256=aJD9iF_LmYSrqDepXFBZKN1yMYbQczVkN_wnrDosBdI,5620
|
35
42
|
py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99,sha256=IQZi5MkbRu3ToRUPsRcXuh1Xa3pkAz_HDRCVhNL89ds,5753
|
43
|
+
py2ls/.git/objects/64/27a4edff08f93d98f511418423f09f2ab90bcd,sha256=RyNngwk9fvdvvvywmNfllnim718fWNjVauH9U2y8Q2s,258
|
36
44
|
py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4,sha256=NYLQQZTfd0htZst42ALS2dmryv1q_l1N19ZfHEbz_38,3193
|
45
|
+
py2ls/.git/objects/6b/7fde264d93a7a0986d394c46c7650d0ce2ab92,sha256=iIl0-RF0wd6BSEjzczgUyApxc899PbdTl04JbDn6_-Q,166
|
37
46
|
py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab,sha256=SK2QDjDBiDhVMG1I5p19g4RbEm2Rax7mYnxawmVZYxs,15523
|
38
47
|
py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307,sha256=lQOKF2pb1JvipI3eT79X0-TuMGWsy1A-Yw4BCgKZNOM,33472
|
39
48
|
py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f,sha256=sfqJBiSNj-gyJo4D7xkmRAo76mC2ztjqeZZsl4ifULA,162
|
@@ -42,20 +51,26 @@ py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82,sha256=yW-jVYeCTWR-
|
|
42
51
|
py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7,sha256=Kk2MWCO1OcShYuABGzp2O9LiWGDfDkcZtd0oy4nY6RU,9529
|
43
52
|
py2ls/.git/objects/9d/0df52899fe95279059286d9c0ec42287edc168,sha256=67nV3TLo-fwe4lt0wwvxoDnVNHc1IpapRyAY2STP3iI,564
|
44
53
|
py2ls/.git/objects/a5/ec8f74642fbba27f7ea78c53b372ae0c7dedce,sha256=Sl17Ka_UfjSZyEVDLv3yz8TjXL3O1u3gqOn8sXFPvTM,565
|
54
|
+
py2ls/.git/objects/a7/3e13eafee65c5b8d73ad2d3ea46d0eee82f0d3,sha256=iv3uTzna5XBzTTwF5ZTOpdrCiv0wqz1fuDpZ-m8QO2I,565
|
45
55
|
py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8,sha256=8Y7z30eNceBd5QIx09QfMp5cYBbrgUllmats0kvJEJ4,132
|
46
56
|
py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87,sha256=82dx4hIdMpdcB64e5PU1s2gZFVkTvrj1cPwwJ_kasNU,4444
|
47
57
|
py2ls/.git/objects/b2/18e6a0f0f1c4df8cdefa9852058348abc713b7,sha256=hOQfdyzDZctjoge0-pAcEDel5XHVPNfOtrMNyFPUOIE,564
|
58
|
+
py2ls/.git/objects/bb/934eb33bc1a8b85630bf680caffd99560c1b8f,sha256=ggehjexUsWlskHJvHxW7u6U0otB0OCItmIZdT9O-3OU,9670
|
48
59
|
py2ls/.git/objects/c4/cba65f1163661999ee4b8ed23342b63bc1300c,sha256=rwSdKt-C98nUQ_B-7imY4fYRYmn29MQc4SIu9wruHeo,566
|
60
|
+
py2ls/.git/objects/c6/7f17e5707313600efcb85e9a3fedea35dba591,sha256=TL7rDIWiaWlk8iIwqPst7St5Xr2otPs-vp17GPlET7o,565
|
61
|
+
py2ls/.git/objects/cf/0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d,sha256=T_nV0GrgpVu3mOJ4fYcCW98oCunzgqy0DnSX0luy04Q,183
|
49
62
|
py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0,sha256=IJIoz93V7pf9yx43U1JdN8gBq_LWtw8A9Z2YMPnq_B0,1450
|
50
63
|
py2ls/.git/objects/d9/c2403fd166ce791b4e9d0c6792ed8342c71fcd,sha256=uD7BsKdrmN-9FStTpwsRWh-XxVXeDsV4dGjFkaMIIs8,170
|
64
|
+
py2ls/.git/objects/db/ffa8ea7bda721d0cee7b9e4ce5b2ef927733ff,sha256=GhDkvP6JYV26qVg5ETPys1ZEnGlsct9hiXCc24Ky4Xg,565
|
51
65
|
py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b,sha256=vCdlxwEidekh8i-5TVMVgSLGk9DPZCZAbWqvGYSKQ9c,76
|
52
66
|
py2ls/.git/objects/e3/5a4dafc50850cacac7bf76c56db2715cbda2c4,sha256=GAcBj3YSEbm6tm7fGD6al16uBo8LtEtjZ2Hi-UgIsUg,3290
|
53
67
|
py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f,sha256=RWTy2n8L2XxZQknBFyPczA0Aa_4gSG_Ybcr8e8v4ccc,10264
|
68
|
+
py2ls/.git/objects/f4/b64d3107b39e3ad6f540c6607004ea34e6c024,sha256=0egAtqc0x8hc7U1z91tIjcRhSd_BT2a_gxZxo_7NTJA,564
|
54
69
|
py2ls/.git/objects/f7/c98ba5c2f903e603b1f5e63d49fbc8a43815cc,sha256=tYbi3A7irrIPB_11bwItuof0Vc9a0MDuLFMNAzRsG3A,33467
|
55
70
|
py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7,sha256=hGIYoxKWNT3IPwk3DE4l3FLBbUYF-kXcHcx7KrH9uS0,1971
|
56
|
-
py2ls/.git/refs/heads/main,sha256=
|
71
|
+
py2ls/.git/refs/heads/main,sha256=XWF5fg25wz0SqFKCM1pFdyA5I_3pkrwsU8Ql6LH7X0E,41
|
57
72
|
py2ls/.git/refs/remotes/origin/HEAD,sha256=K7aiSqD8bEhBAPXVGim7rYQc0sdV9dk_qiBOXbtOsrQ,30
|
58
|
-
py2ls/.git/refs/remotes/origin/main,sha256=
|
73
|
+
py2ls/.git/refs/remotes/origin/main,sha256=XWF5fg25wz0SqFKCM1pFdyA5I_3pkrwsU8Ql6LH7X0E,41
|
59
74
|
py2ls/.gitattributes,sha256=Gh2-F2vCM7SZ01pX23UT8pQcmauXWfF3gwyRSb6ZAFs,66
|
60
75
|
py2ls/.gitignore,sha256=y7GvbD_zZkjPVVIue8AyiuFkDMuUbvMaV65Lgu89To8,2763
|
61
76
|
py2ls/LICENSE,sha256=UOZ1F5fFDe3XXvG4oNnkL1-Ecun7zpHzRxjp-XsMeAo,11324
|
@@ -66,12 +81,12 @@ py2ls/correlators.py,sha256=RbOaJIPLCHJtUm5SFi_4dCJ7VFUPWR0PErfK3K26ad4,18243
|
|
66
81
|
py2ls/dbhandler.py,sha256=i9dNrpHyx0oIaFieHI4X4tsrCdN-aFxudPTDOgy9Ppo,3574
|
67
82
|
py2ls/freqanalysis.py,sha256=F4218VSPbgL5tnngh6xNCYuNnfR-F_QjECUUxrPYZss,32594
|
68
83
|
py2ls/internet_finder.py,sha256=I-jPepbBhpDaOAsD2MqbKMe1CBN8w1PYo0CjNFkaeeU,19149
|
69
|
-
py2ls/ips.py,sha256=
|
70
|
-
py2ls/netfinder.py,sha256=
|
84
|
+
py2ls/ips.py,sha256=MH_TRP_lz9AuWMLjPNgTTxNFHcvLLN2XKGQcG9txjP0,121806
|
85
|
+
py2ls/netfinder.py,sha256=EbNH-QMFNYsu_eJ68zqEEJRJ5uAYOCrtkqfoC9ZJpo4,31207
|
71
86
|
py2ls/sleep_events_detectors.py,sha256=36MCuRrpurn0Uvzpo3p3b3_JlVsRNHSWCXbJxCGM3mg,51546
|
72
87
|
py2ls/translator.py,sha256=QfDUO0-pXHGMBFZBefiBHzOrC93-__N5sUQY_VP4wes,29734
|
73
88
|
py2ls/version.py,sha256=CactNZqrHHYTPrkHKccy2WKXmaiUdtTgPqSjFyVXnJk,18
|
74
89
|
py2ls/wb_detector.py,sha256=7y6TmBUj9exCZeIgBAJ_9hwuhkDh1x_-yg4dvNY1_GQ,6284
|
75
|
-
py2ls-0.1.4.
|
76
|
-
py2ls-0.1.4.
|
77
|
-
py2ls-0.1.4.
|
90
|
+
py2ls-0.1.4.4.dist-info/METADATA,sha256=sar2UcFB1uR1NAUfvx0pxCP-VO_KyT8n_fS6Yu4K0r4,11877
|
91
|
+
py2ls-0.1.4.4.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
92
|
+
py2ls-0.1.4.4.dist-info/RECORD,,
|
File without changes
|