crawlo 1.2.1__py3-none-any.whl → 1.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlo might be problematic. Click here for more details.

Files changed (49) hide show
  1. crawlo/__version__.py +1 -1
  2. crawlo/cli.py +16 -0
  3. crawlo/commands/check.py +69 -69
  4. crawlo/commands/genspider.py +25 -25
  5. crawlo/commands/help.py +4 -4
  6. crawlo/commands/list.py +23 -23
  7. crawlo/commands/run.py +34 -34
  8. crawlo/commands/startproject.py +35 -36
  9. crawlo/commands/stats.py +21 -21
  10. crawlo/commands/utils.py +4 -4
  11. crawlo/config.py +4 -4
  12. crawlo/config_validator.py +1 -2
  13. crawlo/data/__init__.py +6 -0
  14. crawlo/data/user_agents.py +108 -0
  15. crawlo/downloader/hybrid_downloader.py +0 -1
  16. crawlo/downloader/selenium_downloader.py +1 -1
  17. crawlo/extension/health_check.py +2 -2
  18. crawlo/items/fields.py +0 -1
  19. crawlo/middleware/offsite.py +0 -1
  20. crawlo/middleware/proxy.py +3 -2
  21. crawlo/middleware/request_ignore.py +0 -1
  22. crawlo/middleware/response_code.py +0 -1
  23. crawlo/middleware/response_filter.py +0 -1
  24. crawlo/middleware/retry.py +1 -1
  25. crawlo/mode_manager.py +3 -3
  26. crawlo/pipelines/database_dedup_pipeline.py +1 -3
  27. crawlo/pipelines/memory_dedup_pipeline.py +2 -2
  28. crawlo/pipelines/mysql_pipeline.py +4 -3
  29. crawlo/pipelines/redis_dedup_pipeline.py +2 -4
  30. crawlo/project.py +2 -2
  31. crawlo/subscriber.py +1 -2
  32. crawlo/templates/project/settings.py.tmpl +1 -3
  33. crawlo/templates/project/settings_distributed.py.tmpl +2 -0
  34. crawlo/utils/batch_processor.py +2 -3
  35. crawlo/utils/controlled_spider_mixin.py +1 -1
  36. crawlo/utils/enhanced_error_handler.py +3 -6
  37. crawlo/utils/env_config.py +1 -1
  38. crawlo/utils/error_handler.py +2 -4
  39. crawlo/utils/large_scale_helper.py +2 -1
  40. crawlo/utils/performance_monitor.py +5 -4
  41. crawlo/utils/redis_connection_pool.py +4 -4
  42. crawlo/utils/redis_key_validator.py +1 -2
  43. crawlo/utils/request_serializer.py +1 -2
  44. crawlo/utils/spider_loader.py +0 -1
  45. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/METADATA +1 -1
  46. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/RECORD +49 -47
  47. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/WHEEL +0 -0
  48. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/entry_points.txt +0 -0
  49. {crawlo-1.2.1.dist-info → crawlo-1.2.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,108 @@
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ User-Agent列表
5
+ 包含各种设备和浏览器的User-Agent字符串,用于爬虫伪装
6
+ """
7
+
8
+ # 桌面浏览器User-Agent
9
+ DESKTOP_USER_AGENTS = [
10
+ # Chrome
11
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
12
+ "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
13
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
14
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
15
+
16
+ # Firefox
17
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
18
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:125.0) Gecko/20100101 Firefox/125.0",
19
+ "Mozilla/5.0 (X11; Linux i686; rv:125.0) Gecko/20100101 Firefox/125.0",
20
+ "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
21
+
22
+ # Safari
23
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Safari/605.1.15",
24
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
25
+
26
+ # Edge
27
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.2478.80",
28
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/123.0.2420.40",
29
+
30
+ # Internet Explorer
31
+ "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko",
32
+ ]
33
+
34
+ # 移动设备User-Agent
35
+ MOBILE_USER_AGENTS = [
36
+ # iPhone
37
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1",
38
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
39
+
40
+ # iPad
41
+ "Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.4 Mobile/15E148 Safari/604.1",
42
+ "Mozilla/5.0 (iPad; CPU OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1",
43
+
44
+ # Android
45
+ "Mozilla/5.0 (Linux; Android 14; SM-S911B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
46
+ "Mozilla/5.0 (Linux; Android 13; SM-S908B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
47
+ "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
48
+ "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.118 Mobile Safari/537.36",
49
+
50
+ # Android平板
51
+ "Mozilla/5.0 (Linux; Android 13; SM-X906C Build/TP1A.220624.014; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/124.0.6367.118 Safari/537.36",
52
+ ]
53
+
54
+ # 爬虫/机器人User-Agent (用于测试)
55
+ BOT_USER_AGENTS = [
56
+ "Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
57
+ "Mozilla/5.0 (compatible; Bingbot/2.0; +http://www.bing.com/bingbot.htm)",
58
+ "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)",
59
+ "Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)",
60
+ ]
61
+
62
+ # 所有User-Agent的组合列表
63
+ ALL_USER_AGENTS = DESKTOP_USER_AGENTS + MOBILE_USER_AGENTS
64
+
65
+ # 按设备类型分类的User-Agent字典
66
+ USER_AGENTS_BY_TYPE = {
67
+ "desktop": DESKTOP_USER_AGENTS,
68
+ "mobile": MOBILE_USER_AGENTS,
69
+ "bot": BOT_USER_AGENTS,
70
+ "all": ALL_USER_AGENTS,
71
+ }
72
+
73
+ def get_user_agents(device_type="all"):
74
+ """
75
+ 获取指定类型的User-Agent列表
76
+
77
+ Args:
78
+ device_type (str): 设备类型,可选值: "desktop", "mobile", "bot", "all"
79
+
80
+ Returns:
81
+ list: User-Agent字符串列表
82
+ """
83
+ return USER_AGENTS_BY_TYPE.get(device_type, ALL_USER_AGENTS)
84
+
85
+ def get_random_user_agent(device_type="all"):
86
+ """
87
+ 获取随机User-Agent
88
+
89
+ Args:
90
+ device_type (str): 设备类型,可选值: "desktop", "mobile", "bot", "all"
91
+
92
+ Returns:
93
+ str: 随机User-Agent字符串
94
+ """
95
+ import random
96
+ user_agents = get_user_agents(device_type)
97
+ return random.choice(user_agents) if user_agents else ""
98
+
99
+ # 导出常用的User-Agent列表
100
+ __all__ = [
101
+ "DESKTOP_USER_AGENTS",
102
+ "MOBILE_USER_AGENTS",
103
+ "BOT_USER_AGENTS",
104
+ "ALL_USER_AGENTS",
105
+ "USER_AGENTS_BY_TYPE",
106
+ "get_user_agents",
107
+ "get_random_user_agent"
108
+ ]
@@ -17,7 +17,6 @@
17
17
  4. 统一的接口和响应格式
18
18
  5. 自动资源管理和优化
19
19
  """
20
- import asyncio
21
20
  from typing import Optional, Dict, Type
22
21
  from urllib.parse import urlparse
23
22
 
@@ -14,9 +14,9 @@ Selenium 下载器
14
14
  - 支持翻页操作(鼠标滑动、点击翻页)
15
15
  - 单浏览器多标签页模式
16
16
  """
17
- import asyncio
18
17
  import os
19
18
  import time
19
+ import asyncio
20
20
  from typing import Optional, Dict, List
21
21
 
22
22
  from selenium import webdriver
@@ -1,11 +1,11 @@
1
1
  #!/usr/bin/python
2
2
  # -*- coding:UTF-8 -*-
3
3
  import asyncio
4
+ from datetime import datetime
4
5
  from typing import Any, Optional, Dict
5
- from datetime import datetime, timedelta
6
6
 
7
- from crawlo.utils.log import get_logger
8
7
  from crawlo.event import spider_opened, spider_closed, response_received, request_scheduled
8
+ from crawlo.utils.log import get_logger
9
9
 
10
10
 
11
11
  class HealthCheckExtension:
crawlo/items/fields.py CHANGED
@@ -3,7 +3,6 @@
3
3
  """
4
4
  Field 类定义
5
5
  """
6
-
7
6
  from typing import Any, Optional, Type
8
7
 
9
8
 
@@ -4,7 +4,6 @@
4
4
  OffsiteMiddleware 中间件
5
5
  用于过滤掉不在指定域名范围内的请求
6
6
  """
7
-
8
7
  import re
9
8
  from urllib.parse import urlparse
10
9
 
@@ -1,10 +1,11 @@
1
1
  #!/usr/bin/python
2
2
  # -*- coding: UTF-8 -*-
3
+ import time
3
4
  import asyncio
4
5
  import socket
5
- from typing import Optional, Dict, Any, Callable, Union, TYPE_CHECKING, List
6
6
  from urllib.parse import urlparse
7
- import time
7
+ from typing import Optional, Dict, Any, Callable, Union, TYPE_CHECKING, List
8
+
8
9
 
9
10
  from crawlo import Request, Response
10
11
  from crawlo.exceptions import NotConfiguredError
@@ -4,7 +4,6 @@
4
4
  RequestIgnoreMiddleware 中间件
5
5
  用于处理和记录被忽略的请求
6
6
  """
7
-
8
7
  from crawlo.utils.log import get_logger
9
8
  from crawlo.exceptions import IgnoreRequestError
10
9
  from crawlo.event import ignore_request
@@ -4,7 +4,6 @@
4
4
  ResponseCodeMiddleware 中间件
5
5
  用于处理HTTP响应状态码,记录统计信息并支持特殊状态码处理
6
6
  """
7
-
8
7
  from crawlo.utils.log import get_logger
9
8
 
10
9
 
@@ -4,7 +4,6 @@
4
4
  ResponseFilterMiddleware 中间件
5
5
  用于过滤不符合要求的HTTP响应,支持自定义允许的状态码
6
6
  """
7
-
8
7
  from crawlo.utils.log import get_logger
9
8
  from crawlo.exceptions import IgnoreRequestError
10
9
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/python
2
2
  # -*- coding:UTF-8 -*-
3
- from typing import List
4
3
  import asyncio
4
+ from typing import List
5
5
 
6
6
  try:
7
7
  from anyio import EndOfStream
crawlo/mode_manager.py CHANGED
@@ -10,10 +10,10 @@
10
10
  2. distributed - 分布式模式
11
11
  3. auto - 自动检测模式
12
12
  """
13
-
14
- from typing import Dict, Any, Optional
15
- from enum import Enum
16
13
  import os
14
+ from enum import Enum
15
+ from typing import Dict, Any, Optional
16
+
17
17
  from crawlo.utils.log import get_logger
18
18
 
19
19
 
@@ -11,15 +11,13 @@
11
11
  - 适用性广: 支持多种数据库后端
12
12
  - 可扩展: 支持自定义表结构和字段
13
13
  """
14
-
15
14
  import hashlib
16
- from typing import Dict, Any, Optional
17
15
  import aiomysql
18
16
 
19
17
  from crawlo import Item
18
+ from crawlo.exceptions import DropItem
20
19
  from crawlo.spider import Spider
21
20
  from crawlo.utils.log import get_logger
22
- from crawlo.exceptions import DropItem
23
21
 
24
22
 
25
23
  class DatabaseDedupPipeline:
@@ -13,12 +13,12 @@
13
13
  """
14
14
 
15
15
  import hashlib
16
- from typing import Dict, Any, Set
16
+ from typing import Set
17
17
 
18
18
  from crawlo import Item
19
+ from crawlo.exceptions import DropItem
19
20
  from crawlo.spider import Spider
20
21
  from crawlo.utils.log import get_logger
21
- from crawlo.exceptions import DropItem
22
22
 
23
23
 
24
24
  class MemoryDedupPipeline:
@@ -1,11 +1,12 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  import asyncio
3
3
  import aiomysql
4
- from typing import Optional, List, Dict
5
4
  from asyncmy import create_pool
6
- from crawlo.utils.log import get_logger
5
+ from typing import Optional, List, Dict
6
+
7
7
  from crawlo.exceptions import ItemDiscard
8
- from crawlo.utils.db_helper import make_insert_sql, make_batch_sql, logger
8
+ from crawlo.utils.db_helper import make_insert_sql, make_batch_sql
9
+ from crawlo.utils.log import get_logger
9
10
 
10
11
 
11
12
  class AsyncmyMySQLPipeline:
@@ -11,15 +11,13 @@
11
11
  - 可配置: 支持自定义 Redis 连接参数
12
12
  - 容错设计: 网络异常时不会丢失数据
13
13
  """
14
-
14
+ import redis
15
15
  import hashlib
16
16
  from typing import Optional
17
17
 
18
- import redis
19
-
20
18
  from crawlo import Item
21
- from crawlo.exceptions import DropItem
22
19
  from crawlo.spider import Spider
20
+ from crawlo.exceptions import DropItem
23
21
  from crawlo.utils.log import get_logger
24
22
 
25
23
 
crawlo/project.py CHANGED
@@ -14,10 +14,10 @@ import sys
14
14
  import configparser
15
15
  from importlib import import_module
16
16
  from inspect import iscoroutinefunction
17
- from typing import Callable, Optional, Tuple
17
+ from typing import Callable, Optional
18
18
 
19
- from crawlo.utils.log import get_logger
20
19
  from crawlo.settings.setting_manager import SettingManager
20
+ from crawlo.utils.log import get_logger
21
21
 
22
22
  logger = get_logger(__name__)
23
23
 
crawlo/subscriber.py CHANGED
@@ -1,10 +1,9 @@
1
1
  #!/usr/bin/python
2
2
  # -*- coding:UTF-8 -*-
3
3
  import asyncio
4
- import weakref
5
4
  from collections import defaultdict
6
5
  from inspect import iscoroutinefunction
7
- from typing import Dict, Set, Callable, Coroutine, Any, TypeAlias, List, Tuple
6
+ from typing import Dict, Callable, Coroutine, Any, TypeAlias, List, Tuple
8
7
 
9
8
 
10
9
  class ReceiverTypeError(TypeError):
@@ -146,8 +146,6 @@ MONGO_USE_BATCH = False # 是否启用批量插入
146
146
  REQUEST_DIR = '.'
147
147
 
148
148
  # 根据运行模式自动选择去重管道
149
- # 单机模式默认使用内存去重管道
150
- # 分布式模式默认使用Redis去重管道
151
149
  if RUN_MODE == 'distributed':
152
150
  # 分布式模式下默认使用Redis去重管道
153
151
  DEFAULT_DEDUP_PIPELINE = 'crawlo.pipelines.redis_dedup_pipeline.RedisDedupPipeline'
@@ -157,7 +155,6 @@ else:
157
155
 
158
156
  # 去重过滤器(推荐分布式项目使用 Redis 过滤器)
159
157
  FILTER_CLASS = 'crawlo.filters.memory_filter.MemoryFilter'
160
- # FILTER_CLASS = 'crawlo.filters.aioredis_filter.AioRedisFilter' # 分布式去重
161
158
 
162
159
  # --- Redis 配置(用于分布式去重和队列) ---
163
160
  REDIS_HOST = os.getenv('REDIS_HOST', '127.0.0.1')
@@ -190,6 +187,7 @@ MIDDLEWARES = [
190
187
  'crawlo.middleware.download_delay.DownloadDelayMiddleware',
191
188
  'crawlo.middleware.default_header.DefaultHeaderMiddleware',
192
189
  'crawlo.middleware.proxy.ProxyMiddleware',
190
+ 'crawlo.middleware.offsite.OffsiteMiddleware',
193
191
 
194
192
  # === 响应处理阶段 ===
195
193
  'crawlo.middleware.retry.RetryMiddleware',
@@ -83,6 +83,8 @@ MIDDLEWARES = [
83
83
  'crawlo.middleware.download_delay.DownloadDelayMiddleware',
84
84
  'crawlo.middleware.default_header.DefaultHeaderMiddleware',
85
85
  'crawlo.middleware.proxy.ProxyMiddleware',
86
+ 'crawlo.middleware.offsite.OffsiteMiddleware',
87
+
86
88
  'crawlo.middleware.retry.RetryMiddleware',
87
89
  'crawlo.middleware.response_code.ResponseCodeMiddleware',
88
90
  'crawlo.middleware.response_filter.ResponseFilterMiddleware',
@@ -5,12 +5,11 @@
5
5
  提供批量操作的统一接口和优化实现
6
6
  """
7
7
  import asyncio
8
- import time
9
- from typing import List, Callable, Any, Optional, Dict
10
8
  from functools import wraps
9
+ from typing import List, Callable, Any, Optional, Dict
11
10
 
12
- from crawlo.utils.log import get_logger
13
11
  from crawlo.utils.error_handler import ErrorHandler
12
+ from crawlo.utils.log import get_logger
14
13
 
15
14
 
16
15
  class BatchProcessor:
@@ -6,8 +6,8 @@
6
6
  """
7
7
  import asyncio
8
8
  import time
9
- from typing import Generator, Optional, Set
10
9
  from collections import deque
10
+ from typing import Generator, Optional
11
11
 
12
12
  from crawlo import Request
13
13
  from crawlo.utils.log import get_logger
@@ -1,16 +1,13 @@
1
1
  #!/usr/bin/python
2
2
  # -*- coding:UTF-8 -*-
3
3
  """
4
- 增强版错误处理工具
4
+ 误处理工具
5
5
  提供更详细、更一致的错误处理和日志记录机制
6
6
  """
7
7
  import traceback
8
- import logging
9
- import sys
10
- import os
11
- from typing import Optional, Callable, Any, Dict, List
12
- from functools import wraps
13
8
  from datetime import datetime
9
+ from functools import wraps
10
+ from typing import Optional, Callable, Any, Dict, List
14
11
 
15
12
  from crawlo.utils.log import get_logger
16
13
 
@@ -5,7 +5,7 @@
5
5
  提供统一的环境变量读取和配置管理机制
6
6
  """
7
7
  import os
8
- from typing import Optional, Union, Any
8
+ from typing import Any
9
9
 
10
10
 
11
11
  class EnvConfigManager:
@@ -4,13 +4,11 @@
4
4
  统一错误处理工具
5
5
  提供一致的错误处理和日志记录机制
6
6
  """
7
- import traceback
8
- import logging
9
- from typing import Optional, Callable, Any
10
7
  from functools import wraps
8
+ from typing import Callable, Any
11
9
 
12
- from crawlo.utils.log import get_logger
13
10
  from crawlo.utils.enhanced_error_handler import EnhancedErrorHandler, ErrorContext
11
+ from crawlo.utils.log import get_logger
14
12
 
15
13
 
16
14
  class ErrorHandler:
@@ -6,7 +6,8 @@
6
6
  import asyncio
7
7
  import json
8
8
  import time
9
- from typing import Generator, Iterator, List, Set, Optional, Dict, Any
9
+ from typing import Generator, List, Dict, Any
10
+
10
11
  from crawlo.utils.log import get_logger
11
12
 
12
13
 
@@ -4,14 +4,15 @@
4
4
  性能监控工具
5
5
  提供系统性能监控和资源使用情况跟踪
6
6
  """
7
- import time
8
- import psutil
9
7
  import asyncio
10
- from typing import Dict, Any, Optional, Callable
8
+ import time
11
9
  from functools import wraps
10
+ from typing import Dict, Any
11
+
12
+ import psutil
12
13
 
13
- from crawlo.utils.log import get_logger
14
14
  from crawlo.utils.error_handler import ErrorHandler
15
+ from crawlo.utils.log import get_logger
15
16
 
16
17
 
17
18
  class PerformanceMonitor:
@@ -4,13 +4,13 @@
4
4
  Redis连接池优化工具
5
5
  提供优化的Redis连接池管理和配置
6
6
  """
7
- import asyncio
8
- import redis.asyncio as aioredis
9
- from typing import Dict, Any, Optional, Union
10
7
  from contextlib import asynccontextmanager
8
+ from typing import Dict, Any, Optional
9
+
10
+ import redis.asyncio as aioredis
11
11
 
12
- from crawlo.utils.log import get_logger
13
12
  from crawlo.utils.error_handler import ErrorHandler
13
+ from crawlo.utils.log import get_logger
14
14
 
15
15
 
16
16
  class OptimizedRedisConnectionPool:
@@ -5,9 +5,8 @@ Redis Key 验证工具
5
5
  =================
6
6
  提供 Redis Key 命名规范的验证功能
7
7
  """
8
-
9
- import re
10
8
  from typing import List, Tuple
9
+
11
10
  from crawlo.utils.log import get_logger
12
11
 
13
12
 
@@ -4,10 +4,9 @@
4
4
  Request 序列化工具类
5
5
  负责处理 Request 对象的序列化前清理工作,解决 logger 等不可序列化对象的问题
6
6
  """
7
+ import gc
7
8
  import logging
8
9
  import pickle
9
- import gc
10
- from typing import Any, Dict
11
10
 
12
11
  from crawlo.utils.log import get_logger
13
12
 
@@ -1,5 +1,4 @@
1
1
  import importlib
2
- import inspect
3
2
  from pathlib import Path
4
3
  from typing import List, Type, Optional, Dict
5
4
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlo
3
- Version: 1.2.1
3
+ Version: 1.2.3
4
4
  Summary: Crawlo 是一款基于异步IO的高性能Python爬虫框架,支持分布式抓取。
5
5
  Home-page: https://github.com/crawl-coder/Crawlo.git
6
6
  Author: crawl-coder