py2ls 0.1.4.2__tar.gz → 0.1.4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/PKG-INFO +1 -1
- py2ls-0.1.4.4/py2ls/.git/COMMIT_EDITMSG +4 -0
- py2ls-0.1.4.4/py2ls/.git/FETCH_HEAD +1 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/index +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/logs/HEAD +2 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/logs/refs/heads/main +2 -0
- py2ls-0.1.4.4/py2ls/.git/logs/refs/remotes/origin/HEAD +26 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/logs/refs/remotes/origin/main +2 -0
- py2ls-0.1.4.4/py2ls/.git/objects/36/e56a361f526eafa59c5235a5c990bf288b5f9c +0 -0
- py2ls-0.1.4.4/py2ls/.git/objects/43/dbd49b2ee367c5434dd545e3b5795434f2ef0b +0 -0
- py2ls-0.1.4.4/py2ls/.git/objects/64/27a4edff08f93d98f511418423f09f2ab90bcd +0 -0
- py2ls-0.1.4.4/py2ls/.git/objects/bb/934eb33bc1a8b85630bf680caffd99560c1b8f +0 -0
- py2ls-0.1.4.4/py2ls/.git/objects/c6/7f17e5707313600efcb85e9a3fedea35dba591 +0 -0
- py2ls-0.1.4.4/py2ls/.git/objects/cf/0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d +1 -0
- py2ls-0.1.4.4/py2ls/.git/refs/heads/main +1 -0
- py2ls-0.1.4.4/py2ls/.git/refs/remotes/origin/main +1 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/ips.py +93 -21
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/netfinder.py +156 -126
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/pyproject.toml +1 -1
- py2ls-0.1.4.2/py2ls/.git/COMMIT_EDITMSG +0 -3
- py2ls-0.1.4.2/py2ls/.git/FETCH_HEAD +0 -1
- py2ls-0.1.4.2/py2ls/.git/logs/refs/remotes/origin/HEAD +0 -11
- py2ls-0.1.4.2/py2ls/.git/refs/heads/main +0 -1
- py2ls-0.1.4.2/py2ls/.git/refs/remotes/origin/main +0 -1
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/README.md +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/HEAD +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/config +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/description +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/applypatch-msg.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/commit-msg.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/fsmonitor-watchman.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/post-update.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-applypatch.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-commit.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-merge-commit.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-push.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-rebase.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/pre-receive.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/prepare-commit-msg.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/push-to-checkout.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/hooks/update.sample +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/info/exclude +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/0b/409e1bc918277010f5679b402d1d1dda53e15c +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/14/449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/15/a8e468aacfcb440e090020f36d0b985d45da23 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/1d/fe9d9633b24ea560354f4f93d39c6e5f163ea0 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/24/6b368b986f758630c46dc02b7fa512b53422f7 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/30/a2f8da47ee947811dc8d993f5a06a45de374f4 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/36/ef43e50009e59db11812c258846d9e38718173 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3b/bd972aa7ad680858f8dfbd0f7fcd97756f0d6f +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3c/bbe5f4173d165127b9ad96119f1ec24c306ffc +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/48/a88fc5806305d0bb0755ee6801161b79696972 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/6b/7fde264d93a7a0986d394c46c7650d0ce2ab92 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/87/ef1fc3f7f1ddc4d0ab9b3e65381ce9f3388621 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/9d/0df52899fe95279059286d9c0ec42287edc168 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/a5/ec8f74642fbba27f7ea78c53b372ae0c7dedce +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/a7/3e13eafee65c5b8d73ad2d3ea46d0eee82f0d3 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b2/18e6a0f0f1c4df8cdefa9852058348abc713b7 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/c4/cba65f1163661999ee4b8ed23342b63bc1300c +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/d9/c2403fd166ce791b4e9d0c6792ed8342c71fcd +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/db/ffa8ea7bda721d0cee7b9e4ce5b2ef927733ff +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/e3/5a4dafc50850cacac7bf76c56db2715cbda2c4 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/f4/b64d3107b39e3ad6f540c6607004ea34e6c024 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/f7/c98ba5c2f903e603b1f5e63d49fbc8a43815cc +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7 +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/refs/remotes/origin/HEAD +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.gitattributes +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.gitignore +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/LICENSE +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/README.md +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/__init__.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/brain_atlas.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/correlators.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/dbhandler.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/freqanalysis.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/internet_finder.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/sleep_events_detectors.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/translator.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/version.py +0 -0
- {py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/wb_detector.py +0 -0
@@ -0,0 +1 @@
|
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd branch 'main' of https://github.com/Jianfengliu0413/py2ls
|
Binary file
|
@@ -6,3 +6,5 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
|
|
6
6
|
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393734 +0200 commit: Update ips.py
|
7
7
|
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491087 +0200 commit: Update netfinder.py
|
8
8
|
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
|
@@ -6,3 +6,5 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
|
|
6
6
|
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393734 +0200 commit: Update ips.py
|
7
7
|
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491087 +0200 commit: Update netfinder.py
|
8
8
|
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526957 +0200 commit: Update netfinder.py
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553462 +0200 commit: new feature: display_thumbnail_figure
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555179 +0200 commit: listdir type
|
@@ -0,0 +1,26 @@
|
|
1
|
+
0000000000000000000000000000000000000000 b056be4be89ba6b76949dd641df45bb7036050c8 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370200 +0200 remote set-head
|
2
|
+
d9c2403fd166ce791b4e9d0c6792ed8342c71fcd d9c2403fd166ce791b4e9d0c6792ed8342c71fcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370431 +0200 remote set-head
|
3
|
+
14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370669 +0200 remote set-head
|
4
|
+
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718371330 +0200 remote set-head
|
5
|
+
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718389149 +0200 remote set-head
|
6
|
+
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718390975 +0200 remote set-head
|
7
|
+
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392810 +0200 remote set-head
|
8
|
+
0b409e1bc918277010f5679b402d1d1dda53e15c 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392852 +0200 remote set-head
|
9
|
+
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718393737 +0200 remote set-head
|
10
|
+
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718491090 +0200 remote set-head
|
11
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718526960 +0200 remote set-head
|
12
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718529062 +0200 remote set-head
|
13
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718531693 +0200 remote set-head
|
14
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718533521 +0200 remote set-head
|
15
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718535485 +0200 remote set-head
|
16
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718537443 +0200 remote set-head
|
17
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718539464 +0200 remote set-head
|
18
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718541421 +0200 remote set-head
|
19
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718543586 +0200 remote set-head
|
20
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718545544 +0200 remote set-head
|
21
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718547501 +0200 remote set-head
|
22
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718549457 +0200 remote set-head
|
23
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718551415 +0200 remote set-head
|
24
|
+
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553371 +0200 remote set-head
|
25
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718553465 +0200 remote set-head
|
26
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718555183 +0200 remote set-head
|
@@ -6,3 +6,5 @@ d9c2403fd166ce791b4e9d0c6792ed8342c71fcd 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9e
|
|
6
6
|
0b409e1bc918277010f5679b402d1d1dda53e15c 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718393736 +0200 update by push
|
7
7
|
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng <Jianfeng.Liu0413@gmail.com> 1718491090 +0200 update by push
|
8
8
|
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng <Jianfeng.Liu0413@gmail.com> 1718526959 +0200 update by push
|
9
|
+
48a88fc5806305d0bb0755ee6801161b79696972 cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d Jianfeng <Jianfeng.Liu0413@gmail.com> 1718553464 +0200 update by push
|
10
|
+
cf0c0d9c6fb09473aaeb7f7e2edbd770c3f2ef3d 6427a4edff08f93d98f511418423f09f2ab90bcd Jianfeng <Jianfeng.Liu0413@gmail.com> 1718555183 +0200 update by push
|
@@ -0,0 +1 @@
|
|
1
|
+
x�NIj1�Y��{����4&�C�`�Qk,�4f�!������K-PU�Zk�,��F�����n&�MJd�u$7=eV�1�{ظu0>x�g�h5R��1[�RZ�d�a��u��Z���ʎF��RC��~�t�ic|�B�7;�] �B����O���~�~�klc���2�!^Q�
|
@@ -0,0 +1 @@
|
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd
|
@@ -0,0 +1 @@
|
|
1
|
+
6427a4edff08f93d98f511418423f09f2ab90bcd
|
@@ -36,6 +36,7 @@ import time
|
|
36
36
|
from box import Box, BoxList
|
37
37
|
from numerizer import numerize
|
38
38
|
from tqdm import tqdm
|
39
|
+
import mimetypes
|
39
40
|
|
40
41
|
def str2num(s, *args):
|
41
42
|
delimiter = None
|
@@ -778,14 +779,15 @@ def dir_name(fpath):
|
|
778
779
|
return dirname(fpath)
|
779
780
|
def basename(fpath):
|
780
781
|
return os.path.basename(fpath)
|
782
|
+
|
781
783
|
def listdir(
|
782
784
|
rootdir,
|
783
785
|
kind="folder",
|
784
786
|
sort_by="name",
|
785
787
|
ascending=True,
|
786
788
|
contains=None,
|
787
|
-
orient
|
788
|
-
output=
|
789
|
+
orient="list",
|
790
|
+
output="df"
|
789
791
|
):
|
790
792
|
def sort_kind(df, by="name", ascending=True):
|
791
793
|
if df[by].dtype == 'object': # Check if the column contains string values
|
@@ -801,6 +803,69 @@ def listdir(
|
|
801
803
|
sorted_df = df.iloc[sorted_index].reset_index(drop=True)
|
802
804
|
return sorted_df
|
803
805
|
|
806
|
+
def flist(fpath, filter="all"):
|
807
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
808
|
+
if isinstance(filter, list):
|
809
|
+
filt_files = []
|
810
|
+
for filter_ in filter:
|
811
|
+
filt_files.extend(flist(fpath, filter_))
|
812
|
+
return filt_files
|
813
|
+
else:
|
814
|
+
if 'all' in filter.lower():
|
815
|
+
return all_files
|
816
|
+
else:
|
817
|
+
filt_files = [f for f in all_files if istype(f, filter)]
|
818
|
+
return filt_files
|
819
|
+
|
820
|
+
def istype(fpath, filter='img'):
|
821
|
+
"""
|
822
|
+
Filters file paths based on the specified filter.
|
823
|
+
Args:
|
824
|
+
fpath (str): Path to the file.
|
825
|
+
filter (str): Filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
826
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
827
|
+
Returns:
|
828
|
+
bool: True if the file matches the filter, False otherwise.
|
829
|
+
"""
|
830
|
+
if 'img' in filter.lower():
|
831
|
+
return is_image(fpath)
|
832
|
+
elif 'doc' in filter.lower():
|
833
|
+
return is_document(fpath)
|
834
|
+
elif 'zip' in filter.lower():
|
835
|
+
return is_zip(fpath)
|
836
|
+
else:
|
837
|
+
return False
|
838
|
+
|
839
|
+
def is_image(fpath):
|
840
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
841
|
+
if mime_type and mime_type.startswith('image'):
|
842
|
+
return True
|
843
|
+
else:
|
844
|
+
return False
|
845
|
+
|
846
|
+
def is_document(fpath):
|
847
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
848
|
+
if mime_type and (
|
849
|
+
mime_type.startswith('text/') or
|
850
|
+
mime_type == 'application/pdf' or
|
851
|
+
mime_type == 'application/msword' or
|
852
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
853
|
+
mime_type == 'application/vnd.ms-excel' or
|
854
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
855
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
856
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
857
|
+
):
|
858
|
+
return True
|
859
|
+
else:
|
860
|
+
return False
|
861
|
+
|
862
|
+
def is_zip(fpath):
|
863
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
864
|
+
if mime_type == 'application/zip':
|
865
|
+
return True
|
866
|
+
else:
|
867
|
+
return False
|
868
|
+
|
804
869
|
if not kind.startswith("."):
|
805
870
|
kind = "." + kind
|
806
871
|
|
@@ -814,7 +879,7 @@ def listdir(
|
|
814
879
|
"path": [],
|
815
880
|
"created_time": [],
|
816
881
|
"modified_time": [],
|
817
|
-
"last_open_time":[],
|
882
|
+
"last_open_time": [],
|
818
883
|
"size": [],
|
819
884
|
"fname": [],
|
820
885
|
"fpath": [],
|
@@ -828,16 +893,20 @@ def listdir(
|
|
828
893
|
is_file = kind.lower() in file_extension.lower() and (
|
829
894
|
os.path.isfile(item_path)
|
830
895
|
)
|
831
|
-
if
|
832
|
-
|
896
|
+
if kind in ['.doc','.img','.zip']: #选择大的类别
|
897
|
+
if kind != ".folder" and not istype(item_path, kind):
|
898
|
+
continue
|
899
|
+
else: #精确到文件的后缀
|
900
|
+
if not is_folder and not is_file:
|
901
|
+
continue
|
833
902
|
f["name"].append(filename)
|
834
903
|
f["length"].append(len(filename))
|
835
904
|
f["path"].append(os.path.join(os.path.dirname(item_path), item))
|
836
|
-
fpath=os.path.join(os.path.dirname(item_path), item)
|
905
|
+
fpath = os.path.join(os.path.dirname(item_path), item)
|
837
906
|
f["size"].append(round(os.path.getsize(fpath) / 1024 / 1024, 3))
|
838
|
-
f["created_time"].append(pd.to_datetime(os.path.getctime(item_path),unit='s'))
|
839
|
-
f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path),unit='s'))
|
840
|
-
f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path),unit='s'))
|
907
|
+
f["created_time"].append(pd.to_datetime(os.path.getctime(item_path), unit='s'))
|
908
|
+
f["modified_time"].append(pd.to_datetime(os.path.getmtime(item_path), unit='s'))
|
909
|
+
f['last_open_time'].append(pd.to_datetime(os.path.getatime(item_path), unit='s'))
|
841
910
|
f["fname"].append(filename) # will be removed
|
842
911
|
f["fpath"].append(fpath) # will be removed
|
843
912
|
i += 1
|
@@ -850,32 +919,35 @@ def listdir(
|
|
850
919
|
'The directory "{}" does NOT exist. Please check the directory "rootdir".'.format(
|
851
920
|
rootdir
|
852
921
|
)
|
853
|
-
)
|
922
|
+
)
|
923
|
+
|
854
924
|
f = pd.DataFrame(f)
|
925
|
+
|
855
926
|
if contains is not None:
|
856
|
-
f = f[f["name"].str.contains(contains,case=False)]
|
927
|
+
f = f[f["name"].str.contains(contains, case=False)]
|
928
|
+
|
857
929
|
if "nam" in sort_by.lower():
|
858
|
-
|
859
|
-
f=sort_kind(f, by="name", ascending=ascending)
|
930
|
+
f = sort_kind(f, by="name", ascending=ascending)
|
860
931
|
elif "crea" in sort_by.lower():
|
861
|
-
f=sort_kind(f, by="created_time", ascending=ascending)
|
932
|
+
f = sort_kind(f, by="created_time", ascending=ascending)
|
862
933
|
elif "modi" in sort_by.lower():
|
863
|
-
f=sort_kind(f, by="modified_time", ascending=ascending)
|
934
|
+
f = sort_kind(f, by="modified_time", ascending=ascending)
|
864
935
|
elif "s" in sort_by.lower() and "z" in sort_by.lower():
|
865
|
-
f=sort_kind(f, by="size", ascending=ascending)
|
936
|
+
f = sort_kind(f, by="size", ascending=ascending)
|
937
|
+
|
866
938
|
if 'df' in output:
|
867
939
|
return f
|
868
940
|
else:
|
869
|
-
if 'l' in orient.lower():
|
941
|
+
if 'l' in orient.lower(): # list # default
|
870
942
|
res_output = Box(f.to_dict(orient="list"))
|
871
943
|
return res_output
|
872
|
-
if 'd' in orient.lower():
|
944
|
+
if 'd' in orient.lower(): # dict
|
873
945
|
return Box(f.to_dict(orient="dict"))
|
874
|
-
if 'r' in orient.lower():
|
946
|
+
if 'r' in orient.lower(): # records
|
875
947
|
return Box(f.to_dict(orient="records"))
|
876
|
-
if 'in' in orient.lower():
|
948
|
+
if 'in' in orient.lower(): # records
|
877
949
|
return Box(f.to_dict(orient="index"))
|
878
|
-
if 'se' in orient.lower():
|
950
|
+
if 'se' in orient.lower(): # records
|
879
951
|
return Box(f.to_dict(orient="series"))
|
880
952
|
|
881
953
|
# Example usage:
|
@@ -20,7 +20,9 @@ from selenium.webdriver.chrome.options import Options
|
|
20
20
|
from selenium.webdriver.support.ui import WebDriverWait
|
21
21
|
from selenium.webdriver.support import expected_conditions as EC
|
22
22
|
from webdriver_manager.chrome import ChromeDriverManager
|
23
|
+
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
23
24
|
import pprint
|
25
|
+
import mimetypes
|
24
26
|
|
25
27
|
# Set up logging
|
26
28
|
logging.basicConfig(level=logging.INFO)
|
@@ -42,61 +44,22 @@ def user_agent(browsers=["chrome", "edge", "firefox", "safari"], platforms=["pc"
|
|
42
44
|
output_ua = ua.random
|
43
45
|
if verbose:
|
44
46
|
print(output_ua)
|
45
|
-
return output_ua
|
46
|
-
# def extract_text_from_content(content,where,what,extend=False):
|
47
|
-
# if extend:
|
48
|
-
# texts = ""
|
49
|
-
|
50
|
-
# def extract_text(element):
|
51
|
-
# nonlocal texts
|
52
|
-
# if isinstance(element, str) and element.strip():
|
53
|
-
# texts += element.strip()
|
54
|
-
# elif hasattr(element, "children"):
|
55
|
-
# for child in element.children:
|
56
|
-
# extract_text(child)
|
57
|
-
|
58
|
-
# result_set = (
|
59
|
-
# content.find_all(where, class_=what)
|
60
|
-
# if what
|
61
|
-
# else content.find_all(where)
|
62
|
-
# )
|
63
|
-
# for tag in result_set:
|
64
|
-
# extract_text(tag)
|
65
|
-
|
66
|
-
# text = [tx for tx in texts.split("\n") if tx]
|
67
|
-
# return text
|
68
|
-
# else:
|
69
|
-
# result_set = (
|
70
|
-
# content.find_all(where, class_=what)
|
71
|
-
# if what
|
72
|
-
# else content.find_all(where)
|
73
|
-
# )
|
74
|
-
# texts_ = " ".join(tag.get_text() + "\n" for tag in result_set)
|
75
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx]
|
76
|
-
# return texts
|
77
|
-
# def extract_text_from_content(content, where, what=None, extend=True):
|
78
|
-
# if extend:
|
79
|
-
# def extract_text(element):
|
80
|
-
# texts = ""
|
81
|
-
# if isinstance(element, str) and element.strip():
|
82
|
-
# texts += element.strip()
|
83
|
-
# elif hasattr(element, "children"):
|
84
|
-
# for child in element.children:
|
85
|
-
# texts += extract_text(child)
|
86
|
-
# return texts
|
87
|
-
|
88
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
89
|
-
# texts = ""
|
90
|
-
# for tag in result_set:
|
91
|
-
# texts += extract_text(tag) + "\n"
|
92
|
-
# text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
93
|
-
# return text_list
|
94
|
-
# else:
|
95
|
-
# result_set = content.find_all(where, class_=what) if what else content.find_all(where)
|
96
|
-
# texts_ = " ".join(tag.get_text() for tag in result_set)
|
97
|
-
# texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
98
|
-
# return texts
|
47
|
+
return output_ua
|
99
48
|
def extract_text_from_content(content, content_type="text/html", where=None, what=None, extend=True, **kwargs):
|
49
|
+
"""
|
50
|
+
Extracts text from the given content based on the specified content type and search criteria.
|
51
|
+
|
52
|
+
Parameters:
|
53
|
+
- content (str/BeautifulSoup): The content to extract text from.
|
54
|
+
- content_type (str): The type of content, e.g., "text/html" or "application/json".
|
55
|
+
- where (str/list): The HTML tag or list of tags to search for.
|
56
|
+
- what (str): The class name to filter the tags (optional).
|
57
|
+
- extend (bool): Whether to recursively extract text from child elements.
|
58
|
+
- **kwargs: Additional keyword arguments for the search (e.g., id, attributes).
|
59
|
+
|
60
|
+
Returns:
|
61
|
+
- list: A list of extracted text segments.
|
62
|
+
"""
|
100
63
|
if content is None:
|
101
64
|
logger.error("Content is None, cannot extract text.")
|
102
65
|
return []
|
@@ -109,35 +72,41 @@ def extract_text_from_content(content, content_type="text/html", where=None, wha
|
|
109
72
|
where = None
|
110
73
|
return extract_text_from_json(content, where)
|
111
74
|
elif 'text' in content_type:
|
112
|
-
if
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
elif hasattr(element, "children"):
|
118
|
-
for child in element.children:
|
119
|
-
texts += extract_text(child)
|
120
|
-
return texts
|
121
|
-
|
122
|
-
search_kwargs = {**kwargs}
|
123
|
-
if what:
|
124
|
-
search_kwargs["class_"] = what
|
125
|
-
|
126
|
-
result_set = content.find_all(where, **search_kwargs)
|
127
|
-
texts = ""
|
128
|
-
for tag in result_set:
|
129
|
-
texts += extract_text(tag) + "\n"
|
130
|
-
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
131
|
-
return text_list
|
75
|
+
if isinstance(where, list):
|
76
|
+
res=[]
|
77
|
+
for where_ in where:
|
78
|
+
res.extend(extract_text_from_content(content, content_type="text/html", where=where_, what=what, extend=extend, **kwargs))
|
79
|
+
return res
|
132
80
|
else:
|
133
|
-
|
134
|
-
|
135
|
-
|
81
|
+
if extend:
|
82
|
+
def extract_text(element):
|
83
|
+
texts = ""
|
84
|
+
if isinstance(element, str) and element.strip():
|
85
|
+
texts += element.strip()
|
86
|
+
elif hasattr(element, "children"):
|
87
|
+
for child in element.children:
|
88
|
+
texts += extract_text(child)
|
89
|
+
return texts
|
90
|
+
|
91
|
+
search_kwargs = {**kwargs}
|
92
|
+
if what:
|
93
|
+
search_kwargs["class_"] = what
|
94
|
+
|
95
|
+
result_set = content.find_all(where, **search_kwargs)
|
96
|
+
texts = ""
|
97
|
+
for tag in result_set:
|
98
|
+
texts += extract_text(tag) + "\n"
|
99
|
+
text_list = [tx.strip() for tx in texts.split("\n") if tx.strip()]
|
100
|
+
return text_list
|
101
|
+
else:
|
102
|
+
search_kwargs = {**kwargs}
|
103
|
+
if what:
|
104
|
+
search_kwargs["class_"] = what
|
136
105
|
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
106
|
+
result_set = content.find_all(where, **search_kwargs)
|
107
|
+
texts_ = " ".join(tag.get_text() for tag in result_set)
|
108
|
+
texts = [tx.strip() for tx in texts_.split("\n") if tx.strip()]
|
109
|
+
return texts
|
141
110
|
|
142
111
|
def extract_text_from_json(content, key=None):
|
143
112
|
if key:
|
@@ -366,7 +335,7 @@ def pdf_detector(url, contains=None, dir_save=None,booster=False):
|
|
366
335
|
print(f'{len(fnames)} files are downloaded:\n{fnames}\n to local: \n{dir_save}')
|
367
336
|
|
368
337
|
|
369
|
-
def find_img(url, dir_save="images"):
|
338
|
+
def find_img(url, dir_save="images", verbose=True):
|
370
339
|
"""
|
371
340
|
Save images referenced in HTML content locally.
|
372
341
|
Args:
|
@@ -381,7 +350,6 @@ def find_img(url, dir_save="images"):
|
|
381
350
|
if "html" in content_type.lower():
|
382
351
|
# Create the directory if it doesn't exist
|
383
352
|
os.makedirs(dir_save, exist_ok=True)
|
384
|
-
|
385
353
|
# Parse HTML content if it's not already a BeautifulSoup object
|
386
354
|
if isinstance(content, str):
|
387
355
|
content = BeautifulSoup(content, "html.parser")
|
@@ -390,13 +358,9 @@ def find_img(url, dir_save="images"):
|
|
390
358
|
images = content.find_all("img", src=True)
|
391
359
|
for i, image in enumerate(images):
|
392
360
|
try:
|
393
|
-
# Get the image URL
|
394
361
|
image_url = image["src"]
|
395
|
-
|
396
362
|
if image_url.startswith("data:image"):
|
397
|
-
# Extract the image data from the data URI
|
398
363
|
mime_type, base64_data = image_url.split(",", 1)
|
399
|
-
# Determine the file extension from the MIME type
|
400
364
|
if ":" in mime_type:
|
401
365
|
# image_extension = mime_type.split(":")[1].split(";")[0]
|
402
366
|
image_extension = (
|
@@ -406,44 +370,68 @@ def find_img(url, dir_save="images"):
|
|
406
370
|
image_extension = (
|
407
371
|
"png" # Default to PNG if extension is not specified
|
408
372
|
)
|
409
|
-
# if 'svg+xml' in image_extension:
|
410
|
-
# image_extension='svg'
|
411
373
|
image_data = base64.b64decode(base64_data)
|
412
|
-
# Save the image data to a file
|
413
374
|
image_filename = os.path.join(
|
414
375
|
dir_save, f"image_{i}.{image_extension}"
|
415
376
|
)
|
416
377
|
with open(image_filename, "wb") as image_file:
|
417
378
|
image_file.write(image_data)
|
418
|
-
|
419
|
-
# Update the src attribute of the image tag to point to the local file
|
420
379
|
image["src"] = image_filename
|
380
|
+
if verbose:
|
381
|
+
plt.imshow(image_data)
|
421
382
|
else:
|
422
383
|
# Construct the absolute image URL
|
423
384
|
absolute_image_url = urljoin(url, image_url)
|
424
|
-
|
425
385
|
# Parse the image URL to extract the file extension
|
426
386
|
parsed_url = urlparse(absolute_image_url)
|
427
387
|
image_extension = os.path.splitext(parsed_url.path)[1]
|
428
|
-
|
429
388
|
# Download the image
|
430
389
|
image_response = requests.get(absolute_image_url,proxies=proxies_glob)
|
431
|
-
|
432
390
|
# Save the image to a file
|
433
391
|
image_filename = os.path.join(
|
434
392
|
dir_save, f"image_{i}{image_extension}"
|
435
393
|
)
|
436
394
|
with open(image_filename, "wb") as image_file:
|
437
395
|
image_file.write(image_response.content)
|
438
|
-
|
439
396
|
# Update the src attribute of the image tag to point to the local file
|
440
397
|
image["src"] = image_filename
|
441
398
|
except (requests.RequestException, KeyError) as e:
|
442
399
|
print(f"Failed to process image {image_url}: {e}")
|
443
400
|
print(f"images were saved at\n{dir_save}")
|
444
|
-
|
401
|
+
if verbose:
|
402
|
+
display_thumbnail_figure(flist(dir_save,filter='img'),dpi=200)
|
445
403
|
return content
|
446
404
|
|
405
|
+
def display_thumbnail_figure(dir_img_list,figsize=(10,10),dpi=100):
|
406
|
+
import matplotlib.pyplot as plt
|
407
|
+
from PIL import Image
|
408
|
+
"""
|
409
|
+
Display a thumbnail figure of all images in the specified directory.
|
410
|
+
Args:
|
411
|
+
dir_img_list (list): List of the Directory containing the images.
|
412
|
+
"""
|
413
|
+
num_images = len(dir_img_list)
|
414
|
+
|
415
|
+
if num_images == 0:
|
416
|
+
print("No images found to display.")
|
417
|
+
return
|
418
|
+
|
419
|
+
# Determine grid size
|
420
|
+
grid_size = int(num_images ** 0.5) + 1
|
421
|
+
|
422
|
+
fig, axs = plt.subplots(grid_size, grid_size, figsize=figsize,dpi=dpi)
|
423
|
+
|
424
|
+
for ax, image_file in zip(axs.flatten(), dir_img_list):
|
425
|
+
img = Image.open(image_file)
|
426
|
+
ax.imshow(img)
|
427
|
+
ax.axis('off') # Hide axes
|
428
|
+
|
429
|
+
# Hide remaining subplots
|
430
|
+
for ax in axs.flatten()[num_images:]:
|
431
|
+
ax.axis('off')
|
432
|
+
|
433
|
+
plt.tight_layout()
|
434
|
+
plt.show()
|
447
435
|
|
448
436
|
def content_div_class(content, div="div", div_class="highlight"):
|
449
437
|
texts = [div.text for div in content.find_all(div, class_=div_class)]
|
@@ -467,7 +455,7 @@ def fetch_selenium(
|
|
467
455
|
username_by=By.NAME,
|
468
456
|
password_by=By.NAME,
|
469
457
|
submit_by=By.NAME,
|
470
|
-
|
458
|
+
# capability='eager', # eager or none
|
471
459
|
proxy=None, # Add proxy parameter
|
472
460
|
javascript=True, # Add JavaScript option
|
473
461
|
disable_images=False, # Add option to disable images
|
@@ -479,21 +467,16 @@ def fetch_selenium(
|
|
479
467
|
chrome_options.add_argument("--no-sandbox")
|
480
468
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
481
469
|
chrome_options.add_argument(f"user-agent={user_agent()}")
|
482
|
-
|
483
470
|
if proxy:
|
484
471
|
chrome_options.add_argument(f'--proxy-server={proxy}')
|
485
|
-
|
486
472
|
if disable_images:
|
487
473
|
prefs = {"profile.managed_default_content_settings.images": 2}
|
488
474
|
chrome_options.add_experimental_option("prefs", prefs)
|
489
|
-
|
490
|
-
caps = DesiredCapabilities().CHROME
|
491
|
-
caps["pageLoadStrategy"] = "eager" # You can set this to "none" if needed
|
492
|
-
|
475
|
+
# chrome_options.page_load_strategy = capability
|
493
476
|
service = Service(ChromeDriverManager().install())
|
494
477
|
for attempt in range(retry):
|
495
478
|
try:
|
496
|
-
driver = webdriver.Chrome(service=service, options=chrome_options
|
479
|
+
driver = webdriver.Chrome(service=service, options=chrome_options)
|
497
480
|
|
498
481
|
if not javascript:
|
499
482
|
driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
|
@@ -540,36 +523,19 @@ def fetch_selenium(
|
|
540
523
|
|
541
524
|
|
542
525
|
def fetch(url, where="div", what=None, extend=True, booster=False,retry=2,verbose=False, **kws):
|
543
|
-
# for attempt in range(retry):
|
544
|
-
# if verbose and attempt==0:
|
545
|
-
# xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
546
|
-
# print(xample)
|
547
|
-
# content_type, content = fetch_all(url, parser="html.parser")
|
548
|
-
# texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
549
|
-
# if isinstance(texts,pd.core.frame.DataFrame):
|
550
|
-
# condition=[texts.empty, attempt != retry - 1]
|
551
|
-
# else:
|
552
|
-
# condition=[not texts, attempt != retry - 1]
|
553
|
-
# if all(condition):
|
554
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
555
|
-
# sleep(random.uniform(0.5, 1.5))
|
556
526
|
for attempt in range(retry):
|
557
527
|
if verbose and attempt==0:
|
558
528
|
xample = 'fetch(url,where="div",what=None,extend=True,by=By.TAG_NAME,timeout=10,retry=3,login_url=None,username=None,password=None,username_field="username",password_field="password",submit_field="submit",username_by=By.NAME,password_by=By.NAME,submit_by=By.NAME)'
|
559
529
|
print(xample)
|
560
530
|
content_type, content = fetch_all(url, parser="html.parser")
|
561
531
|
texts=extract_text_from_content(content,content_type=content_type,where=where,what=what,extend=extend, **kws)
|
562
|
-
if isinstance(texts, pd.core.frame.DataFrame):
|
563
|
-
# condition=[texts.empty, attempt != retry - 1]
|
532
|
+
if isinstance(texts, pd.core.frame.DataFrame):
|
564
533
|
if not texts.empty:
|
565
534
|
break
|
566
|
-
else:
|
567
|
-
# condition=[not texts, attempt != retry - 1]
|
535
|
+
else:
|
568
536
|
if texts:
|
569
537
|
break
|
570
|
-
|
571
|
-
# texts = fetch(url=url, where=where, what=what, extend=extend, retry=retry-1, **kws)
|
572
|
-
sleep(random.uniform(0.5, 1.5))
|
538
|
+
sleep(random.uniform(0.5, 1.5))
|
573
539
|
if isinstance(texts,pd.core.frame.DataFrame):
|
574
540
|
condition_=[texts.empty, booster]
|
575
541
|
else:
|
@@ -799,4 +765,68 @@ def find_all(url, dir_save=None):
|
|
799
765
|
else:
|
800
766
|
df.to_csv(dir_save)
|
801
767
|
print(f"file has been saved at\n{dir_save}")
|
802
|
-
return df
|
768
|
+
return df
|
769
|
+
|
770
|
+
|
771
|
+
def flist(fpath, filter="all"):
|
772
|
+
all_files = [os.path.join(fpath, f) for f in os.listdir(fpath) if os.path.isfile(os.path.join(fpath, f))]
|
773
|
+
if isinstance(filter, list):
|
774
|
+
filt_files=[]
|
775
|
+
for filter_ in filter:
|
776
|
+
filt_files.extend(flist(fpath, filter_))
|
777
|
+
return filt_files
|
778
|
+
else:
|
779
|
+
if 'all' in filter.lower():
|
780
|
+
return all_files
|
781
|
+
else:
|
782
|
+
filt_files = [f for f in all_files if istype(f, filter)]
|
783
|
+
return filt_files
|
784
|
+
|
785
|
+
def istype(fpath, filter='img'):
|
786
|
+
"""
|
787
|
+
Filters file paths based on the specified filter.
|
788
|
+
Args:
|
789
|
+
fpath (str): Path to the file.
|
790
|
+
filter (str): filter of file to filter. Default is 'img' for images. Other options include 'doc' for documents,
|
791
|
+
'zip' for ZIP archives, and 'other' for other types of files.
|
792
|
+
Returns:
|
793
|
+
bool: True if the file matches the filter, False otherwise.
|
794
|
+
"""
|
795
|
+
if 'img' in filter.lower():
|
796
|
+
return is_image(fpath)
|
797
|
+
elif 'doc' in filter.lower():
|
798
|
+
return is_document(fpath)
|
799
|
+
elif 'zip' in filter.lower():
|
800
|
+
return is_zip(fpath)
|
801
|
+
else:
|
802
|
+
return False
|
803
|
+
|
804
|
+
def is_image(fpath):
|
805
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
806
|
+
if mime_type and mime_type.startswith('image'):
|
807
|
+
return True
|
808
|
+
else:
|
809
|
+
return False
|
810
|
+
|
811
|
+
def is_document(fpath):
|
812
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
813
|
+
if mime_type and (
|
814
|
+
mime_type.startswith('text/') or
|
815
|
+
mime_type == 'application/pdf' or
|
816
|
+
mime_type == 'application/msword' or
|
817
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' or
|
818
|
+
mime_type == 'application/vnd.ms-excel' or
|
819
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' or
|
820
|
+
mime_type == 'application/vnd.ms-powerpoint' or
|
821
|
+
mime_type == 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
822
|
+
):
|
823
|
+
return True
|
824
|
+
else:
|
825
|
+
return False
|
826
|
+
|
827
|
+
def is_zip(fpath):
|
828
|
+
mime_type, _ = mimetypes.guess_type(fpath)
|
829
|
+
if mime_type == 'application/zip':
|
830
|
+
return True
|
831
|
+
else:
|
832
|
+
return False
|
@@ -1 +0,0 @@
|
|
1
|
-
48a88fc5806305d0bb0755ee6801161b79696972 branch 'main' of https://github.com/Jianfengliu0413/py2ls
|
@@ -1,11 +0,0 @@
|
|
1
|
-
0000000000000000000000000000000000000000 b056be4be89ba6b76949dd641df45bb7036050c8 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370200 +0200 remote set-head
|
2
|
-
d9c2403fd166ce791b4e9d0c6792ed8342c71fcd d9c2403fd166ce791b4e9d0c6792ed8342c71fcd Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370431 +0200 remote set-head
|
3
|
-
14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed 14449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718370669 +0200 remote set-head
|
4
|
-
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718371330 +0200 remote set-head
|
5
|
-
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718389149 +0200 remote set-head
|
6
|
-
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718390975 +0200 remote set-head
|
7
|
-
36ef43e50009e59db11812c258846d9e38718173 36ef43e50009e59db11812c258846d9e38718173 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392810 +0200 remote set-head
|
8
|
-
0b409e1bc918277010f5679b402d1d1dda53e15c 0b409e1bc918277010f5679b402d1d1dda53e15c Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718392852 +0200 remote set-head
|
9
|
-
6b7fde264d93a7a0986d394c46c7650d0ce2ab92 6b7fde264d93a7a0986d394c46c7650d0ce2ab92 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718393737 +0200 remote set-head
|
10
|
-
3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f 3bbd972aa7ad680858f8dfbd0f7fcd97756f0d6f Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718491090 +0200 remote set-head
|
11
|
-
48a88fc5806305d0bb0755ee6801161b79696972 48a88fc5806305d0bb0755ee6801161b79696972 Jianfeng Liu <macjianfeng@jflmbp.speedport.ip> 1718526960 +0200 remote set-head
|
@@ -1 +0,0 @@
|
|
1
|
-
48a88fc5806305d0bb0755ee6801161b79696972
|
@@ -1 +0,0 @@
|
|
1
|
-
48a88fc5806305d0bb0755ee6801161b79696972
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/0b/409e1bc918277010f5679b402d1d1dda53e15c
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/14/449a0e6ba4ea2f1a73acf63ef91c9c6193f9ed
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/15/a8e468aacfcb440e090020f36d0b985d45da23
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/1d/fe9d9633b24ea560354f4f93d39c6e5f163ea0
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/24/6b368b986f758630c46dc02b7fa512b53422f7
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/25/b796accd261b9135fd32a2c00785f68edf6c46
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/30/a2f8da47ee947811dc8d993f5a06a45de374f4
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/36/b4a1b7403abc6c360f8fe2cb656ab945254971
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/36/ef43e50009e59db11812c258846d9e38718173
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3b/bd972aa7ad680858f8dfbd0f7fcd97756f0d6f
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3c/bbe5f4173d165127b9ad96119f1ec24c306ffc
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/3f/d6561300938afbb3d11976cf9c8f29549280d9
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/48/a88fc5806305d0bb0755ee6801161b79696972
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/58/20a729045d4dc7e37ccaf8aa8eec126850afe2
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/60/f273eb1c412d916fa3f11318a7da7a9911b52a
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/61/570cec8c061abe74121f27f5face6c69b98f99
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/69/13c452ca319f7cbf6a0836dc10a5bb033c84e4
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/6b/7fde264d93a7a0986d394c46c7650d0ce2ab92
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/78/3d4167bc95c9d2175e0df03ef1c1c880ba75ab
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/79/7ae089b2212a937840e215276005ce76881307
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/7e/5956c806b5edc344d46dab599dec337891ba1f
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/87/ef1fc3f7f1ddc4d0ab9b3e65381ce9f3388621
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/8e/55a7d2b96184030211f20c9b9af201eefcac82
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/91/c69ad88fe0ba94aa7859fb5f7edac5e6f1a3f7
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/9d/0df52899fe95279059286d9c0ec42287edc168
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/a5/ec8f74642fbba27f7ea78c53b372ae0c7dedce
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/a7/3e13eafee65c5b8d73ad2d3ea46d0eee82f0d3
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b0/56be4be89ba6b76949dd641df45bb7036050c8
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b0/9cd7856d58590578ee1a4f3ad45d1310a97f87
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/b2/18e6a0f0f1c4df8cdefa9852058348abc713b7
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/c4/cba65f1163661999ee4b8ed23342b63bc1300c
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/d9/005f2cc7fc4e65f14ed5518276007c08cf2fd0
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/d9/c2403fd166ce791b4e9d0c6792ed8342c71fcd
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/db/ffa8ea7bda721d0cee7b9e4ce5b2ef927733ff
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/df/e0770424b2a19faf507a501ebfc23be8f54e7b
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/e3/5a4dafc50850cacac7bf76c56db2715cbda2c4
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/e9/391ffe371f1cc43b42ef09b705d9c767c2e14f
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/f4/b64d3107b39e3ad6f540c6607004ea34e6c024
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/f7/c98ba5c2f903e603b1f5e63d49fbc8a43815cc
RENAMED
File without changes
|
{py2ls-0.1.4.2 → py2ls-0.1.4.4}/py2ls/.git/objects/fc/292e793ecfd42240ac43be407023bd731fa9e7
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|