crawlo 1.0.2__py3-none-any.whl → 1.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (79) hide show
  1. crawlo/__init__.py +9 -6
  2. crawlo/__version__.py +1 -2
  3. crawlo/core/__init__.py +2 -2
  4. crawlo/core/engine.py +158 -158
  5. crawlo/core/processor.py +40 -40
  6. crawlo/core/scheduler.py +57 -59
  7. crawlo/crawler.py +242 -222
  8. crawlo/downloader/__init__.py +78 -78
  9. crawlo/downloader/aiohttp_downloader.py +259 -96
  10. crawlo/downloader/httpx_downloader.py +187 -48
  11. crawlo/downloader/playwright_downloader.py +160 -160
  12. crawlo/event.py +11 -11
  13. crawlo/exceptions.py +64 -64
  14. crawlo/extension/__init__.py +31 -31
  15. crawlo/extension/log_interval.py +49 -49
  16. crawlo/extension/log_stats.py +44 -44
  17. crawlo/filters/__init__.py +37 -37
  18. crawlo/filters/aioredis_filter.py +150 -130
  19. crawlo/filters/memory_filter.py +202 -203
  20. crawlo/items/__init__.py +62 -62
  21. crawlo/items/items.py +118 -118
  22. crawlo/middleware/__init__.py +21 -21
  23. crawlo/middleware/default_header.py +32 -32
  24. crawlo/middleware/download_delay.py +28 -28
  25. crawlo/middleware/middleware_manager.py +140 -140
  26. crawlo/middleware/request_ignore.py +30 -30
  27. crawlo/middleware/response_code.py +18 -18
  28. crawlo/middleware/response_filter.py +26 -26
  29. crawlo/middleware/retry.py +90 -90
  30. crawlo/network/__init__.py +7 -7
  31. crawlo/network/request.py +204 -233
  32. crawlo/network/response.py +166 -162
  33. crawlo/pipelines/__init__.py +13 -13
  34. crawlo/pipelines/console_pipeline.py +39 -39
  35. crawlo/pipelines/mongo_pipeline.py +116 -116
  36. crawlo/pipelines/mysql_batch_pipline.py +133 -133
  37. crawlo/pipelines/mysql_pipeline.py +195 -195
  38. crawlo/pipelines/pipeline_manager.py +56 -56
  39. crawlo/settings/__init__.py +7 -7
  40. crawlo/settings/default_settings.py +94 -89
  41. crawlo/settings/setting_manager.py +99 -99
  42. crawlo/spider/__init__.py +36 -36
  43. crawlo/stats_collector.py +59 -47
  44. crawlo/subscriber.py +106 -106
  45. crawlo/task_manager.py +27 -27
  46. crawlo/templates/item_template.tmpl +21 -21
  47. crawlo/templates/project_template/main.py +32 -32
  48. crawlo/templates/project_template/setting.py +189 -189
  49. crawlo/templates/spider_template.tmpl +30 -30
  50. crawlo/utils/__init__.py +7 -7
  51. crawlo/utils/concurrency_manager.py +124 -124
  52. crawlo/utils/date_tools.py +177 -177
  53. crawlo/utils/func_tools.py +82 -82
  54. crawlo/utils/log.py +39 -39
  55. crawlo/utils/pqueue.py +173 -173
  56. crawlo/utils/project.py +59 -59
  57. crawlo/utils/request.py +122 -85
  58. crawlo/utils/system.py +11 -11
  59. crawlo/utils/tools.py +302 -302
  60. crawlo/utils/url.py +39 -39
  61. {crawlo-1.0.2.dist-info → crawlo-1.0.4.dist-info}/METADATA +48 -48
  62. crawlo-1.0.4.dist-info/RECORD +79 -0
  63. {crawlo-1.0.2.dist-info → crawlo-1.0.4.dist-info}/top_level.txt +1 -0
  64. tests/__init__.py +7 -0
  65. tests/baidu_spider/__init__.py +7 -0
  66. tests/baidu_spider/demo.py +94 -0
  67. tests/baidu_spider/items.py +25 -0
  68. tests/baidu_spider/middleware.py +49 -0
  69. tests/baidu_spider/pipeline.py +55 -0
  70. tests/baidu_spider/request_fingerprints.txt +9 -0
  71. tests/baidu_spider/run.py +27 -0
  72. tests/baidu_spider/settings.py +80 -0
  73. tests/baidu_spider/spiders/__init__.py +7 -0
  74. tests/baidu_spider/spiders/bai_du.py +61 -0
  75. tests/baidu_spider/spiders/sina.py +79 -0
  76. crawlo/filters/redis_filter.py +0 -120
  77. crawlo-1.0.2.dist-info/RECORD +0 -68
  78. {crawlo-1.0.2.dist-info → crawlo-1.0.4.dist-info}/WHEEL +0 -0
  79. {crawlo-1.0.2.dist-info → crawlo-1.0.4.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,80 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+
4
+ PROJECT_NAME = 'baidu_spider'
5
+
6
+ CONCURRENCY = 4
7
+
8
+ USE_SESSION = True
9
+
10
+ # 下载延迟
11
+ DOWNLOAD_DELAY = 0.5
12
+ RANDOMNESS = False
13
+
14
+ # --------------------------------------------------- 公共MySQL配置 -----------------------------------------------------
15
+ MYSQL_HOST = '43.139.14.225'
16
+ MYSQL_PORT = 3306
17
+ MYSQL_USER = 'picker'
18
+ MYSQL_PASSWORD = 'kmcNbbz6TbSihttZ'
19
+ MYSQL_DB = 'stock_pro'
20
+ MYSQL_TABLE = 'articles' # 可选,默认使用spider名称
21
+ MYSQL_BATCH_SIZE = 500
22
+
23
+ # asyncmy专属配置
24
+ MYSQL_POOL_MIN = 5 # 连接池最小连接数
25
+ MYSQL_POOL_MAX = 20 # 连接池最大连接数
26
+
27
+ # 选择下载器
28
+ # DOWNLOADER = "crawlo.downloader.httpx_downloader.HttpXDownloader"
29
+
30
+ MIDDLEWARES = [
31
+ 'crawlo.middleware.download_delay.DownloadDelayMiddleware',
32
+ 'crawlo.middleware.default_header.DefaultHeaderMiddleware',
33
+ 'crawlo.middleware.response_filter.ResponseFilterMiddleware',
34
+ 'crawlo.middleware.retry.RetryMiddleware',
35
+ 'crawlo.middleware.response_code.ResponseCodeMiddleware',
36
+ 'crawlo.middleware.request_ignore.RequestIgnoreMiddleware',
37
+ # 'baidu_spider.middleware.TestMiddleWare',
38
+ # 'baidu_spider.middleware.TestMiddleWare2'
39
+ ]
40
+
41
+ EXTENSIONS = [
42
+ 'crawlo.extension.log_interval.LogIntervalExtension',
43
+ 'crawlo.extension.log_stats.LogStats',
44
+ ]
45
+
46
+ PIPELINES = [
47
+ 'crawlo.pipelines.console_pipeline.ConsolePipeline',
48
+ 'crawlo.pipelines.mysql_pipeline.AsyncmyMySQLPipeline', # 或 AiomysqlMySQLPipeline
49
+ # 'crawlo.pipelines.mysql_batch_pipline.AsyncmyMySQLPipeline', # 或 AiomysqlMySQLPipeline
50
+ # 'baidu_spider.pipeline.TestPipeline',
51
+ # 'baidu_spider.pipeline.MongoPipeline',
52
+ ]
53
+
54
+ USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36'
55
+ DEFAULT_HEADERS = {
56
+ "accept": "application/json, text/javascript, */*; q=0.01",
57
+ "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
58
+ "cache-control": "no-cache",
59
+ "pragma": "no-cache",
60
+ "priority": "u=1, i",
61
+ "sec-ch-ua": "\"Chromium\";v=\"136\", \"Google Chrome\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
62
+ "sec-ch-ua-mobile": "?0",
63
+ "sec-ch-ua-platform": "\"macOS\"",
64
+ "sec-fetch-dest": "empty",
65
+ "sec-fetch-mode": "cors",
66
+ "sec-fetch-site": "same-origin",
67
+ # "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
68
+ "x-requested-with": "XMLHttpRequest"
69
+ }
70
+
71
+ # --------------------------------------DB ---------------------------------------------
72
+ Mongo_Params = ''
73
+ MONGODB_DB = 'news'
74
+
75
+ REDIS_TTL = 0
76
+ CLEANUP_FP = False
77
+
78
+ FILTER_CLASS = 'crawlo.filters.aioredis_filter.AioRedisFilter'
79
+ # FILTER_CLASS = 'crawlo.filters.redis_filter.RedisFilter'
80
+ # FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFileFilter'
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ # @Time : 2025-05-11 12:20
5
+ # @Author : oscar
6
+ # @Desc : None
7
+ """
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ # @Time : 2025-02-05 13:05
5
+ # @Author : oscar
6
+ # @Desc : None
7
+ """
8
+ import asyncio
9
+ from crawlo import Request
10
+ from crawlo.spider import Spider
11
+
12
+ from items import BauDuItem
13
+
14
+
15
+ class BaiDuSpider(Spider):
16
+ start_urls = ["https://www.baidu.com/", "https://www.baidu.com/"]
17
+
18
+ custom_settings = {
19
+ 'CONCURRENCY': 1
20
+ }
21
+
22
+ name = "bai_du"
23
+
24
+ # headers = {
25
+ # "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
26
+ # }
27
+ #
28
+ user_gent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
29
+
30
+ async def parse(self, response):
31
+ for i in range(5):
32
+ url = f"https://www.baidu.com"
33
+ # url = f"https://www.httpbin.org/404"
34
+ r = Request(url=url, callback=self.parse_page, dont_filter=True)
35
+ yield r
36
+
37
+ async def parse_page(self, response):
38
+ for i in range(5):
39
+ url = f"https://www.baidu.com"
40
+ meta = {'test': 'hhhh'}
41
+ r = Request(url=url, callback=self.parse_detail, meta=meta, dont_filter=False)
42
+ yield r
43
+
44
+ def parse_detail(self, response):
45
+ item = BauDuItem()
46
+ item['title'] = response.xpath('//title/text()').get()
47
+
48
+ item['url'] = response.url
49
+
50
+ yield item
51
+
52
+ async def spider_opened(self):
53
+ pass
54
+
55
+ async def spider_closed(self):
56
+ pass
57
+
58
+
59
+ if __name__ == '__main__':
60
+ b = BaiDuSpider()
61
+ b.start_requests()
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/python
2
+ # -*- coding:UTF-8 -*-
3
+ """
4
+ # @Time : 2025-02-05 13:05
5
+ # @Author : oscar
6
+ # @Desc : None
7
+ """
8
+ import time
9
+
10
+ from crawlo import Request
11
+ from crawlo.spider import Spider
12
+ from crawlo.utils.date_tools import timestamp_to_datetime, format_datetime
13
+
14
+ from tests.baidu_spider.items import ArticleItem
15
+
16
+
17
+ class SinaSpider(Spider):
18
+ # 获取当前时间戳,并减去 10 分钟(600 秒)
19
+ current_time_minus_10min = int(time.time()) - 6000
20
+ # 构造 URL
21
+ url = f'https://news.10jqka.com.cn/tapp/news/push/stock/?page=1&tag=&track=website&ctime={current_time_minus_10min}'
22
+
23
+ start_urls = [url]
24
+ name = 'sina'
25
+ # mysql_table = 'news_10jqka'
26
+
27
+ allowed_domains = ['*']
28
+
29
+ def start_requests(self):
30
+ for url in self.start_urls:
31
+ yield Request(url=url, callback=self.parse, dont_filter=True)
32
+
33
+ async def parse(self, response):
34
+ jsonp_str = response.json()
35
+ rows = jsonp_str.get('data', {}).get('list', [])
36
+ for row in rows:
37
+ article_id = row.get('id')
38
+ title = row.get('title')
39
+ digest = row.get('digest')
40
+ short = row.get('short')
41
+ detail_url = row.get('url')
42
+ tag = row.get('tag')
43
+ ctime = row.get('ctime')
44
+ source = row.get('source')
45
+ meta = {
46
+ 'article_id': article_id,
47
+ 'title': title,
48
+ 'digest': digest,
49
+ 'short': short,
50
+ 'detail_url': detail_url,
51
+ 'source': source,
52
+ 'tag': tag,
53
+ 'ctime': timestamp_to_datetime(int(ctime))
54
+ }
55
+
56
+ yield Request(url=detail_url, callback=self.parse_detail, encoding='gbk', meta=meta)
57
+
58
+ @staticmethod
59
+ async def parse_detail(response):
60
+ item = ArticleItem()
61
+ meta = response.meta
62
+ content = ''.join(response.xpath('//*[@id="contentApp"]/p/text()').extract()).strip()
63
+ ctime = meta.get('ctime')
64
+ item['article_id'] = meta.get('article_id')
65
+ item['title'] = meta.get('title')
66
+ item['digest'] = content
67
+ item['short'] = meta.get('short')
68
+ item['url'] = meta.get('detail_url')
69
+ item['tag'] = meta.get('tag').strip()
70
+ item['ctime'] = format_datetime(ctime)
71
+ item['source'] = meta.get('source')
72
+
73
+ yield item
74
+
75
+ async def spider_opened(self):
76
+ pass
77
+
78
+ async def spider_closed(self):
79
+ pass
@@ -1,120 +0,0 @@
1
- #!/usr/bin/python
2
- # -*- coding:UTF-8 -*-
3
- import redis
4
-
5
- from crawlo import Request
6
- from crawlo.filters import BaseFilter
7
- from crawlo.utils.log import get_logger
8
- from crawlo.utils.request import request_fingerprint
9
-
10
-
11
- class RedisFilter(BaseFilter):
12
- """使用Redis集合实现的同步请求去重过滤器"""
13
-
14
- def __init__(
15
- self,
16
- redis_key: str,
17
- client: redis.Redis,
18
- stats: dict,
19
- debug: bool,
20
- log_level: str,
21
- save_fp: bool
22
- ):
23
- """
24
- 初始化过滤器
25
-
26
- :param redis_key: Redis存储键名
27
- :param client: redis客户端实例
28
- :param stats: 统计字典
29
- :param debug: 是否启用调试模式
30
- :param log_level: 日志级别
31
- :param save_fp: 是否保留指纹数据
32
- """
33
- self.logger = get_logger(self.__class__.__name__, log_level)
34
- super().__init__(self.logger, stats, debug)
35
-
36
- self.redis_key = redis_key
37
- self.redis = client
38
- self.save_fp = save_fp
39
-
40
- @classmethod
41
- def create_instance(cls, crawler) -> 'BaseFilter':
42
- """工厂方法创建实例"""
43
- redis_url = crawler.settings.get('REDIS_URL', 'redis://localhost:6379')
44
- decode_responses = crawler.settings.get_bool('DECODE_RESPONSES', True)
45
-
46
- try:
47
- # 添加连接池配置
48
- redis_client = redis.from_url(
49
- redis_url,
50
- decode_responses=decode_responses,
51
- socket_timeout=5, # 超时设置
52
- socket_connect_timeout=5,
53
- max_connections=20 # 连接池大小
54
- )
55
- # 测试连接是否有效
56
- redis_client.ping()
57
- except redis.RedisError as e:
58
- raise RuntimeError(f"Redis连接失败: {str(e)}")
59
-
60
- return cls(
61
- redis_key=f"{crawler.settings.get('PROJECT_NAME')}:{crawler.settings.get('REDIS_KEY', 'request_fingerprints')}",
62
- client=redis_client,
63
- stats=crawler.stats,
64
- save_fp=crawler.settings.get_bool('SAVE_FP', False),
65
- debug=crawler.settings.get_bool('FILTER_DEBUG', False),
66
- log_level=crawler.settings.get('LOG_LEVEL', 'INFO')
67
- )
68
-
69
- def requested(self, request: Request) -> bool:
70
- """
71
- 检查请求是否已存在
72
-
73
- :param request: 请求对象
74
- :return: 是否重复
75
- """
76
- fp = request_fingerprint(request)
77
- try:
78
- if self.redis.sismember(self.redis_key, fp):
79
- self.logger.debug(f"重复请求: {fp}")
80
- return True
81
-
82
- self.add_fingerprint(fp)
83
- return False
84
- except redis.RedisError as e:
85
- self.logger.error(f"Redis操作失败: {str(e)}")
86
- raise
87
-
88
- def add_fingerprint(self, fp: str) -> None:
89
- """添加指纹到Redis集合"""
90
- try:
91
- self.redis.sadd(self.redis_key, fp)
92
- self.logger.debug(f"新增指纹: {fp}")
93
- except redis.RedisError as e:
94
- self.logger.error(f"指纹添加失败: {str(e)}")
95
- raise
96
-
97
- def __contains__(self, item) -> bool:
98
- """支持 in 操作符检查 (必须返回bool类型)"""
99
- try:
100
- # 显式将redis返回的0/1转换为bool
101
- return bool(self.redis.sismember(self.redis_key, item))
102
- except redis.RedisError as e:
103
- self.logger.error(f"Redis查询失败: {str(e)}")
104
- raise
105
-
106
- def close(self) -> None:
107
- """同步清理方法(注意不是异步的closed)"""
108
- if not self.save_fp:
109
- try:
110
- count = self.redis.delete(self.redis_key)
111
- self.logger.info(f"已清理Redis键 {self.redis_key}, 删除数量: {count}")
112
- except redis.RedisError as e:
113
- self.logger.error(f"清理失败: {str(e)}")
114
- finally:
115
- # 同步客户端需要手动关闭连接池
116
- self.redis.close()
117
-
118
- async def closed(self):
119
- """兼容异步接口的同步实现"""
120
- self.close()
@@ -1,68 +0,0 @@
1
- crawlo/__init__.py,sha256=BoRtaB19VFlByP3JKzXQbmg4Jb6i6yVnpTR3jCSrCig,208
2
- crawlo/__version__.py,sha256=FlR3yFykEvKzEITpEQq_qx6Uq29lYlhxcnSHeRP1LgI,23
3
- crawlo/crawler.py,sha256=XLiDyFyoiJr8BzhLbCB15zbccVHQiiLGDM1zFUkdrAI,8544
4
- crawlo/event.py,sha256=ZhoPW5CglCEuZNFEwviSCBIw0pT5O6jT98bqYrDFd3E,324
5
- crawlo/exceptions.py,sha256=trxM2c0jw50QsGSoFAKC2RrKpapOFHQDq0wQuLWqmKE,980
6
- crawlo/stats_collector.py,sha256=jhAW8k0SzjqelkpiWpfGmMw2DBkgTjpwnObqTNDOp6A,1286
7
- crawlo/subscriber.py,sha256=udlHeTR0ymGQhCDxVUGwUzeeeR4TYCEJrJwFnkgr0cU,3836
8
- crawlo/task_manager.py,sha256=D9m-nqnGj-FZPtGk4CdwZX3Gw7IWyYvTS7CHpRGWc_w,748
9
- crawlo/core/__init__.py,sha256=JYSAn15r8yWgRK_Nc69t_8tZCyb70MiPZKssA8wrYz0,43
10
- crawlo/core/engine.py,sha256=JFHooPp-5cfHSyxEh87nOOR5NMaPLVDfNSqAsbtx4PM,6030
11
- crawlo/core/processor.py,sha256=oHLs-cno0bJGTNc9NGD2S7_2-grI3ruvggO0SY2mf3Q,1180
12
- crawlo/core/scheduler.py,sha256=ZwPoU_QRjs9wwrxdt-MGPwsSmKhvvhgmcnBllDrXnhg,2014
13
- crawlo/downloader/__init__.py,sha256=72u2Hef4HaMfs9VCqEjbMtiaRXbaXmgNiJn6qy09LHs,2384
14
- crawlo/downloader/aiohttp_downloader.py,sha256=4C2BDloKzwss16kfD7tH0WPugPbSSFxl-5-_DLWB0vM,3676
15
- crawlo/downloader/httpx_downloader.py,sha256=ra6Ae_lv8pNyvLzPQYBgTNuBdMVBYi86kNt2OdZlcSo,1704
16
- crawlo/downloader/playwright_downloader.py,sha256=mEGlSd6A6sN0Wyq-TDkownIElOgxnwVfY3rS5wtLoYY,6726
17
- crawlo/extension/__init__.py,sha256=O2BVK1U3WwmurZb-PaYVz3g1tZ_iYUjCwilmUKf6844,1170
18
- crawlo/extension/log_interval.py,sha256=FOWeTOuWtOpCz2UPV5F_--QIa8yomltSpjxbw3F7bkU,1971
19
- crawlo/extension/log_stats.py,sha256=-V7ipdIfYMQdp1ZDc4kvNEAIHIR74U1ZHV5FhlLyGGU,1786
20
- crawlo/filters/__init__.py,sha256=9fJQRVkxWWPChajYbAGe1O6UYB639xWt0hiLUGBs4hQ,1014
21
- crawlo/filters/aioredis_filter.py,sha256=H_HAFfE9mHnPrzQcjyXXCseQ77iN4nT9a3lukHiu8M8,4874
22
- crawlo/filters/memory_filter.py,sha256=pk2o0kbX0zrGLJ6poKhSa-cfOmsp68fA65hXEGQCQ5M,6895
23
- crawlo/filters/redis_filter.py,sha256=m1nRsf_3slnWSb80RtTEURi5kwjdI0xotoFxnscx974,4211
24
- crawlo/items/__init__.py,sha256=o5BSpS1Byivr-bpdfFgc9GCoGi8ThNuPJiTW7lz85-I,2125
25
- crawlo/items/items.py,sha256=myOOjWaSByKW0r8SxIQ0bxS3PXjUDE1c-Pe38z6fSF8,4108
26
- crawlo/middleware/__init__.py,sha256=PSwpRLdBUopaQzBp1S0zK_TZbrRagQ4yzvgyLy4tBk8,570
27
- crawlo/middleware/default_header.py,sha256=OVW4vpRPp3Y6qYXtiEYlGqVjCYcbuv1Iecc7zEgwCsI,1099
28
- crawlo/middleware/download_delay.py,sha256=P2eyAJXwdLdC4yYuLhvKZVa1b5YQvQD0GpsR8aDW8-8,994
29
- crawlo/middleware/middleware_manager.py,sha256=T4axTY89Z0BOwaWDWcUTABeDNTvyPFiyrbwj-H4sbSA,6629
30
- crawlo/middleware/request_ignore.py,sha256=jdybWFVXuA5YsAPfZJFzLTWkYhEAewNgxuhFqczPW9M,1027
31
- crawlo/middleware/response_code.py,sha256=vgXWv3mMu_v9URvhKA9myIFH4u6L4EwNme80wL4DCGc,677
32
- crawlo/middleware/response_filter.py,sha256=O2gkV_Yjart8kmmXTGzrtZnb_Uuefap4uL2Cu01iRs4,863
33
- crawlo/middleware/retry.py,sha256=9SnE7l3Nhh143AqCiL0gfE6dl_gF1Kr6CjoNxvMH_Ps,3465
34
- crawlo/network/__init__.py,sha256=DVz1JpasjxCgOlXvm76gz-S18OXr4emG_J39yi5iVuA,130
35
- crawlo/network/request.py,sha256=yUAL6oecm1TniD9dsBmRedEaEHzh3rtr_6p3dMK2EfQ,8195
36
- crawlo/network/response.py,sha256=amnLEExKq11dLh6m_YdSqdKLl0srh1eeY5uPz15fwyo,6055
37
- crawlo/pipelines/__init__.py,sha256=IbXJ6B8LqxVVjeLNgL_12AxV6zbV8hNRQxAfMLjjSaw,273
38
- crawlo/pipelines/console_pipeline.py,sha256=bwe5hZgaVSWmh3R8XpOaaeAjJme-Ttrpo6G6f1cnLIg,1287
39
- crawlo/pipelines/mongo_pipeline.py,sha256=Yr48D0T61-_Y-EpgWXf7BUn9w8e-Pj5P07QDSPZ0pYU,4558
40
- crawlo/pipelines/mysql_batch_pipline.py,sha256=7KXd0IUV0h3IViD8R0iruyWv5XdZR1pANB8EY9z6iMI,5022
41
- crawlo/pipelines/mysql_pipeline.py,sha256=TzyaBg1oBj9pgzuVUrb5VVtkiwZH6gqP_6IYM2QCkLs,8052
42
- crawlo/pipelines/pipeline_manager.py,sha256=k-Rg0os0Havrov99D-Jn3ROpnz154K30tf7aARE5W3k,2174
43
- crawlo/settings/__init__.py,sha256=NgYFLfk_Bw7h6KSoepJn_lMBSqVbCHebjKxaE3_eMgw,130
44
- crawlo/settings/default_settings.py,sha256=JS1QKYe7jkdFlOjqZ-eOeOcVS3AXCZynoNH95GuEnds,2556
45
- crawlo/settings/setting_manager.py,sha256=4xXOzKwZCgAp8ybwvVcs2R--CsOD7c6dBIkj6DJHB3c,2998
46
- crawlo/spider/__init__.py,sha256=1tmKkr2-oJi0w9r2ho9nn6Z_VDn18pjXHXU0Hv2eheY,941
47
- crawlo/templates/item_template.tmpl,sha256=0bGFnlwJRqstxMNEj1H_pEICybwoueRhs31QaDPXrS0,372
48
- crawlo/templates/spider_template.tmpl,sha256=JzphuA87Yl_F1xR9zOIi_ZSazyT8eSNPxYYPMv3Uiko,835
49
- crawlo/templates/project_template/main.py,sha256=BcCP294ycCPsHi_AMN7OAJtcrLvQdf91meH93PqbQgs,626
50
- crawlo/templates/project_template/setting.py,sha256=Ce4nMbrdhL1ioRdTcB0vV_vK_50cfnwVqSvt49QsNkA,9395
51
- crawlo/templates/project_template/items/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
- crawlo/templates/project_template/spiders/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- crawlo/utils/__init__.py,sha256=XCYumI8wJ1jU_Myn_K0LT-LVygPDUCdETCbXM3EWvlo,130
54
- crawlo/utils/concurrency_manager.py,sha256=o-_cfeUHdlBOM3eAXF857MtekSrRcVTBJ2jWZvY6weQ,5230
55
- crawlo/utils/date_tools.py,sha256=Y7pnGNn1-5vkiHtydAgmQ-qR3pSO30k5WEYigOPifPQ,5496
56
- crawlo/utils/func_tools.py,sha256=y-TYP9H3X67MS_foWy9Z2LIS6GP7Y4Cy3T168ulq3Jc,2451
57
- crawlo/utils/log.py,sha256=LU0J3boPCL-Kynx3wR_CAryRgScNmPPn4pBitLrrsX4,1028
58
- crawlo/utils/pqueue.py,sha256=HDgX4HAkc7RqYUtX6q51tzI1ZRTACf8P_4jLqC4-uC0,5559
59
- crawlo/utils/project.py,sha256=FfBaMfxcau4yL59O-DfD7FAii8k6gXWQmQ1YU6aaUCE,1544
60
- crawlo/utils/request.py,sha256=BEBtxwejvQw5euEiSclHCbqkNcBwUMY7KPGGkvj8BjE,2936
61
- crawlo/utils/system.py,sha256=24zGmtHNhDFMGVo7ftMV-Pqg6_5d63zsyNey9udvJJk,248
62
- crawlo/utils/tools.py,sha256=U7xGKgXe2PmMyvNxZ1vlJYcv4Ei1WhIBMf8XcJZ7XCY,9764
63
- crawlo/utils/url.py,sha256=RKe_iqdjafsNcp-P2GVLYpsL1qbxiuZLiFc-SqOQkcs,1521
64
- crawlo-1.0.2.dist-info/METADATA,sha256=kzNv4kckIbSZCADon5g2Ik5QJhubNxWra0-aXX6bMug,1784
65
- crawlo-1.0.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
66
- crawlo-1.0.2.dist-info/entry_points.txt,sha256=GD9PBhKQN83EaxPYtz7NhcGeZeh3bdr2jWbTixOs-lw,59
67
- crawlo-1.0.2.dist-info/top_level.txt,sha256=Dwuv-Y1aGSJD3mjFrCdNGQ8EHroMj7RgVcxDdcczx4k,7
68
- crawlo-1.0.2.dist-info/RECORD,,
File without changes