crawlo 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crawlo might be problematic. Click here for more details.
- crawlo/__init__.py +1 -0
- crawlo/__version__.py +1 -1
- crawlo/core/engine.py +9 -7
- crawlo/core/processor.py +1 -1
- crawlo/core/scheduler.py +32 -8
- crawlo/downloader/playwright_downloader.py +161 -0
- crawlo/extension/log_stats.py +4 -4
- crawlo/filters/__init__.py +37 -0
- crawlo/filters/aioredis_filter.py +130 -0
- crawlo/filters/memory_filter.py +203 -0
- crawlo/filters/redis_filter.py +120 -0
- crawlo/items/__init__.py +40 -2
- crawlo/items/items.py +36 -5
- crawlo/middleware/retry.py +7 -2
- crawlo/network/request.py +121 -18
- crawlo/pipelines/console_pipeline.py +28 -8
- crawlo/pipelines/mongo_pipeline.py +114 -2
- crawlo/pipelines/mysql_batch_pipline.py +134 -0
- crawlo/pipelines/mysql_pipeline.py +173 -2
- crawlo/pipelines/pipeline_manager.py +3 -3
- crawlo/settings/default_settings.py +51 -1
- crawlo/spider/__init__.py +2 -2
- crawlo/utils/date_tools.py +165 -8
- crawlo/utils/func_tools.py +74 -14
- crawlo/utils/pqueue.py +166 -8
- crawlo/utils/project.py +3 -2
- crawlo/utils/request.py +85 -0
- crawlo/utils/url.py +40 -0
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/METADATA +2 -2
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/RECORD +34 -26
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/WHEEL +0 -0
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/entry_points.txt +0 -0
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/licenses/LICENSE +0 -0
- {crawlo-1.0.0.dist-info → crawlo-1.0.1.dist-info}/top_level.txt +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
crawlo/__init__.py,sha256=
|
|
2
|
-
crawlo/__version__.py,sha256=
|
|
1
|
+
crawlo/__init__.py,sha256=BoRtaB19VFlByP3JKzXQbmg4Jb6i6yVnpTR3jCSrCig,208
|
|
2
|
+
crawlo/__version__.py,sha256=nIKsC1SE3kX8Mn6LwGE7AJDVBp7TTkZgedeO4C6G2Jw,23
|
|
3
3
|
crawlo/crawler.py,sha256=lIx3-_TKXdCDhpfw4lYcizpyoclapjAHfGchBJ9DmTU,3679
|
|
4
4
|
crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
|
|
5
5
|
crawlo/exceptions.py,sha256=trxM2c0jw50QsGSoFAKC2RrKpapOFHQDq0wQuLWqmKE,980
|
|
@@ -7,17 +7,22 @@ crawlo/stats_collector.py,sha256=jhAW8k0SzjqelkpiWpfGmMw2DBkgTjpwnObqTNDOp6A,128
|
|
|
7
7
|
crawlo/subscriber.py,sha256=4stxeXqNK6RB7oqo0wKJdUw2Ym3b6UBMhZ4pRrjSMEU,1001
|
|
8
8
|
crawlo/task_manager.py,sha256=D9m-nqnGj-FZPtGk4CdwZX3Gw7IWyYvTS7CHpRGWc_w,748
|
|
9
9
|
crawlo/core/__init__.py,sha256=JYSAn15r8yWgRK_Nc69t_8tZCyb70MiPZKssA8wrYz0,43
|
|
10
|
-
crawlo/core/engine.py,sha256=
|
|
11
|
-
crawlo/core/processor.py,sha256=
|
|
12
|
-
crawlo/core/scheduler.py,sha256=
|
|
10
|
+
crawlo/core/engine.py,sha256=JFHooPp-5cfHSyxEh87nOOR5NMaPLVDfNSqAsbtx4PM,6030
|
|
11
|
+
crawlo/core/processor.py,sha256=oHLs-cno0bJGTNc9NGD2S7_2-grI3ruvggO0SY2mf3Q,1180
|
|
12
|
+
crawlo/core/scheduler.py,sha256=ZwPoU_QRjs9wwrxdt-MGPwsSmKhvvhgmcnBllDrXnhg,2014
|
|
13
13
|
crawlo/downloader/__init__.py,sha256=72u2Hef4HaMfs9VCqEjbMtiaRXbaXmgNiJn6qy09LHs,2384
|
|
14
14
|
crawlo/downloader/aiohttp_downloader.py,sha256=4C2BDloKzwss16kfD7tH0WPugPbSSFxl-5-_DLWB0vM,3676
|
|
15
15
|
crawlo/downloader/httpx_downloader.py,sha256=ra6Ae_lv8pNyvLzPQYBgTNuBdMVBYi86kNt2OdZlcSo,1704
|
|
16
|
+
crawlo/downloader/playwright_downloader.py,sha256=mEGlSd6A6sN0Wyq-TDkownIElOgxnwVfY3rS5wtLoYY,6726
|
|
16
17
|
crawlo/extension/__init__.py,sha256=O2BVK1U3WwmurZb-PaYVz3g1tZ_iYUjCwilmUKf6844,1170
|
|
17
18
|
crawlo/extension/log_interval.py,sha256=FOWeTOuWtOpCz2UPV5F_--QIa8yomltSpjxbw3F7bkU,1971
|
|
18
|
-
crawlo/extension/log_stats.py,sha256
|
|
19
|
-
crawlo/
|
|
20
|
-
crawlo/
|
|
19
|
+
crawlo/extension/log_stats.py,sha256=-V7ipdIfYMQdp1ZDc4kvNEAIHIR74U1ZHV5FhlLyGGU,1786
|
|
20
|
+
crawlo/filters/__init__.py,sha256=9fJQRVkxWWPChajYbAGe1O6UYB639xWt0hiLUGBs4hQ,1014
|
|
21
|
+
crawlo/filters/aioredis_filter.py,sha256=H_HAFfE9mHnPrzQcjyXXCseQ77iN4nT9a3lukHiu8M8,4874
|
|
22
|
+
crawlo/filters/memory_filter.py,sha256=pk2o0kbX0zrGLJ6poKhSa-cfOmsp68fA65hXEGQCQ5M,6895
|
|
23
|
+
crawlo/filters/redis_filter.py,sha256=m1nRsf_3slnWSb80RtTEURi5kwjdI0xotoFxnscx974,4211
|
|
24
|
+
crawlo/items/__init__.py,sha256=o5BSpS1Byivr-bpdfFgc9GCoGi8ThNuPJiTW7lz85-I,2125
|
|
25
|
+
crawlo/items/items.py,sha256=myOOjWaSByKW0r8SxIQ0bxS3PXjUDE1c-Pe38z6fSF8,4108
|
|
21
26
|
crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
|
|
22
27
|
crawlo/middleware/default_header.py,sha256=OVW4vpRPp3Y6qYXtiEYlGqVjCYcbuv1Iecc7zEgwCsI,1099
|
|
23
28
|
crawlo/middleware/download_delay.py,sha256=P2eyAJXwdLdC4yYuLhvKZVa1b5YQvQD0GpsR8aDW8-8,994
|
|
@@ -25,19 +30,20 @@ crawlo/middleware/middleware_manager.py,sha256=T4axTY89Z0BOwaWDWcUTABeDNTvyPFiyr
|
|
|
25
30
|
crawlo/middleware/request_ignore.py,sha256=jdybWFVXuA5YsAPfZJFzLTWkYhEAewNgxuhFqczPW9M,1027
|
|
26
31
|
crawlo/middleware/response_code.py,sha256=vgXWv3mMu_v9URvhKA9myIFH4u6L4EwNme80wL4DCGc,677
|
|
27
32
|
crawlo/middleware/response_filter.py,sha256=O2gkV_Yjart8kmmXTGzrtZnb_Uuefap4uL2Cu01iRs4,863
|
|
28
|
-
crawlo/middleware/retry.py,sha256=
|
|
33
|
+
crawlo/middleware/retry.py,sha256=xe2npsVvXrMB8o4SeRvjenZ3_Ejf7-kXaaW_oPtayao,3416
|
|
29
34
|
crawlo/network/__init__.py,sha256=DVz1JpasjxCgOlXvm76gz-S18OXr4emG_J39yi5iVuA,130
|
|
30
|
-
crawlo/network/request.py,sha256=
|
|
35
|
+
crawlo/network/request.py,sha256=5j_YoHl4HR_OSWEpLygiSc0jVKfdiubYDcd0HTJzv7U,4834
|
|
31
36
|
crawlo/network/response.py,sha256=QikuOCgxS5yMh4Uh0QuMpqWfyR78vossCs-Va-sQ2YE,2993
|
|
32
37
|
crawlo/pipelines/__init__.py,sha256=IbXJ6B8LqxVVjeLNgL_12AxV6zbV8hNRQxAfMLjjSaw,273
|
|
33
|
-
crawlo/pipelines/console_pipeline.py,sha256=
|
|
34
|
-
crawlo/pipelines/mongo_pipeline.py,sha256=
|
|
35
|
-
crawlo/pipelines/
|
|
36
|
-
crawlo/pipelines/
|
|
38
|
+
crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
|
|
39
|
+
crawlo/pipelines/mongo_pipeline.py,sha256=Yr48D0T61-_Y-EpgWXf7BUn9w8e-Pj5P07QDSPZ0pYU,4558
|
|
40
|
+
crawlo/pipelines/mysql_batch_pipline.py,sha256=7KXd0IUV0h3IViD8R0iruyWv5XdZR1pANB8EY9z6iMI,5022
|
|
41
|
+
crawlo/pipelines/mysql_pipeline.py,sha256=X15LJXQC10kcE5UzLoY7VAc6gkeMwEez0tkEQuV5bnw,7219
|
|
42
|
+
crawlo/pipelines/pipeline_manager.py,sha256=k-Rg0os0Havrov99D-Jn3ROpnz154K30tf7aARE5W3k,2174
|
|
37
43
|
crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
|
|
38
|
-
crawlo/settings/default_settings.py,sha256=
|
|
44
|
+
crawlo/settings/default_settings.py,sha256=JS1QKYe7jkdFlOjqZ-eOeOcVS3AXCZynoNH95GuEnds,2556
|
|
39
45
|
crawlo/settings/setting_manager.py,sha256=4xXOzKwZCgAp8ybwvVcs2R--CsOD7c6dBIkj6DJHB3c,2998
|
|
40
|
-
crawlo/spider/__init__.py,sha256=
|
|
46
|
+
crawlo/spider/__init__.py,sha256=1tmKkr2-oJi0w9r2ho9nn6Z_VDn18pjXHXU0Hv2eheY,941
|
|
41
47
|
crawlo/templates/item_template.tmpl,sha256=0bGFnlwJRqstxMNEj1H_pEICybwoueRhs31QaDPXrS0,372
|
|
42
48
|
crawlo/templates/spider_template.tmpl,sha256=JzphuA87Yl_F1xR9zOIi_ZSazyT8eSNPxYYPMv3Uiko,835
|
|
43
49
|
crawlo/templates/project_template/main.py,sha256=BcCP294ycCPsHi_AMN7OAJtcrLvQdf91meH93PqbQgs,626
|
|
@@ -45,15 +51,17 @@ crawlo/templates/project_template/setting.py,sha256=Ce4nMbrdhL1ioRdTcB0vV_vK_50c
|
|
|
45
51
|
crawlo/templates/project_template/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
46
52
|
crawlo/templates/project_template/spiders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
53
|
crawlo/utils/__init__.py,sha256=XCYumI8wJ1jU_Myn_K0LT-LVygPDUCdETCbXM3EWvlo,130
|
|
48
|
-
crawlo/utils/date_tools.py,sha256=
|
|
49
|
-
crawlo/utils/func_tools.py,sha256=
|
|
54
|
+
crawlo/utils/date_tools.py,sha256=Y7pnGNn1-5vkiHtydAgmQ-qR3pSO30k5WEYigOPifPQ,5496
|
|
55
|
+
crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
|
|
50
56
|
crawlo/utils/log.py,sha256=LU0J3boPCL-Kynx3wR_CAryRgScNmPPn4pBitLrrsX4,1028
|
|
51
|
-
crawlo/utils/pqueue.py,sha256=
|
|
52
|
-
crawlo/utils/project.py,sha256=
|
|
57
|
+
crawlo/utils/pqueue.py,sha256=HDgX4HAkc7RqYUtX6q51tzI1ZRTACf8P_4jLqC4-uC0,5559
|
|
58
|
+
crawlo/utils/project.py,sha256=FfBaMfxcau4yL59O-DfD7FAii8k6gXWQmQ1YU6aaUCE,1544
|
|
59
|
+
crawlo/utils/request.py,sha256=BEBtxwejvQw5euEiSclHCbqkNcBwUMY7KPGGkvj8BjE,2936
|
|
53
60
|
crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
|
|
54
|
-
crawlo
|
|
55
|
-
crawlo-1.0.
|
|
56
|
-
crawlo-1.0.
|
|
57
|
-
crawlo-1.0.
|
|
58
|
-
crawlo-1.0.
|
|
59
|
-
crawlo-1.0.
|
|
61
|
+
crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
|
|
62
|
+
crawlo-1.0.1.dist-info/licenses/LICENSE,sha256=f96mrub4oLJnuMcNwMjHWMksci5brQ2a2Fu0R7YEgnk,1125
|
|
63
|
+
crawlo-1.0.1.dist-info/METADATA,sha256=EaEhkScaNmTXATp3KoE9ECfzkK-VgDHmTydE-eDZVsA,1265
|
|
64
|
+
crawlo-1.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
65
|
+
crawlo-1.0.1.dist-info/entry_points.txt,sha256=GD9PBhKQN83EaxPYtz7NhcGeZeh3bdr2jWbTixOs-lw,59
|
|
66
|
+
crawlo-1.0.1.dist-info/top_level.txt,sha256=Dwuv-Y1aGSJD3mjFrCdNGQ8EHroMj7RgVcxDdcczx4k,7
|
|
67
|
+
crawlo-1.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|