aio-scrapy 2.1.4__py3-none-any.whl → 2.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/LICENSE +1 -1
  2. aio_scrapy-2.1.7.dist-info/METADATA +147 -0
  3. aio_scrapy-2.1.7.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +522 -17
  8. aioscrapy/core/downloader/handlers/__init__.py +187 -5
  9. aioscrapy/core/downloader/handlers/aiohttp.py +190 -6
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +126 -5
  11. aioscrapy/core/downloader/handlers/httpx.py +135 -5
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +137 -5
  13. aioscrapy/core/downloader/handlers/requests.py +120 -2
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +151 -13
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +192 -6
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +314 -14
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.4.dist-info/METADATA +0 -239
  105. aio_scrapy-2.1.4.dist-info/RECORD +0 -133
  106. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
  107. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
  108. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  109. aioscrapy/http/response/playwright.py +0 -36
  110. aioscrapy/libs/pipelines/execl.py +0 -169
  111. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/entry_points.txt +0 -0
  112. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,15 @@ from aioscrapy.utils.reqser import request_from_dict
7
7
 
8
8
 
9
9
  class AbsQueue(metaclass=ABCMeta):
10
- """Per-spider base queue class"""
10
+ """
11
+ Per-spider base queue class.
12
+ 每个爬虫的基础队列类。
13
+
14
+ This abstract class defines the interface for request queues used by spiders.
15
+ It provides methods for pushing, popping, and managing requests in a queue.
16
+ 此抽象类定义了爬虫使用的请求队列的接口。
17
+ 它提供了推送、弹出和管理队列中请求的方法。
18
+ """
11
19
 
12
20
  def __init__(
13
21
  self,
@@ -16,52 +24,196 @@ class AbsQueue(metaclass=ABCMeta):
16
24
  key: Optional[str] = None,
17
25
  serializer: Optional[AbsSerializer] = None
18
26
  ) -> None:
19
- """Initialize per-spider queue"""
20
- self.container = container
21
- self.spider = spider
22
- self.key = key
23
- self.serializer = serializer
27
+ """
28
+ Initialize per-spider queue.
29
+ 初始化每个爬虫的队列。
30
+
31
+ Args:
32
+ container: The underlying data structure to store the queue.
33
+ 存储队列的底层数据结构。
34
+ spider: The spider instance that will use this queue.
35
+ 将使用此队列的爬虫实例。
36
+ key: Optional key to identify this queue.
37
+ 可选的键,用于标识此队列。
38
+ serializer: Optional serializer for encoding/decoding requests.
39
+ 可选的序列化器,用于编码/解码请求。
40
+ """
41
+ self.container = container # The underlying data structure
42
+ # 底层数据结构
43
+ self.spider = spider # Associated spider
44
+ # 关联的爬虫
45
+ self.key = key # Queue identifier
46
+ # 队列标识符
47
+ self.serializer = serializer # For serializing requests
48
+ # 用于序列化请求
24
49
 
25
50
  @property
26
51
  @abstractmethod
27
52
  def inc_key(self) -> str:
28
- """stats inc_value"""
53
+ """
54
+ Get the key used for incrementing stats.
55
+ 获取用于增加统计信息的键。
56
+
57
+ This property should return a string key that will be used with
58
+ the stats collector's inc_value method to track queue operations.
59
+ 此属性应返回一个字符串键,该键将与统计收集器的inc_value方法一起使用,
60
+ 以跟踪队列操作。
61
+
62
+ Returns:
63
+ str: The stats key for this queue.
64
+ 此队列的统计键。
65
+ """
29
66
 
30
67
  @classmethod
31
68
  @abstractmethod
32
69
  async def from_spider(cls, spider: aioscrapy.Spider) -> "AbsQueue":
33
- """get queue instance from spider"""
70
+ """
71
+ Create a queue instance for a spider.
72
+ 为爬虫创建队列实例。
73
+
74
+ This factory method creates a new queue instance configured
75
+ for the given spider.
76
+ 此工厂方法创建一个为给定爬虫配置的新队列实例。
77
+
78
+ Args:
79
+ spider: The spider that will use the queue.
80
+ 将使用队列的爬虫。
81
+
82
+ Returns:
83
+ AbsQueue: A new queue instance.
84
+ 一个新的队列实例。
85
+ """
34
86
 
35
87
  def _encode_request(self, request: aioscrapy.Request) -> Any:
36
- """Encode a request object"""
88
+ """
89
+ Encode a request object for storage.
90
+ 编码请求对象以进行存储。
91
+
92
+ This method converts a Request object to a serialized form that can
93
+ be stored in the queue's container.
94
+ 此方法将Request对象转换为可以存储在队列容器中的序列化形式。
95
+
96
+ Args:
97
+ request: The Request object to encode.
98
+ 要编码的Request对象。
99
+
100
+ Returns:
101
+ Any: The serialized form of the request.
102
+ 请求的序列化形式。
103
+ """
37
104
  obj = request.to_dict(spider=self.spider)
38
105
  return self.serializer.dumps(obj)
39
106
 
40
107
  async def _decode_request(self, encoded_request: Any) -> aioscrapy.Request:
41
- """Decode an request previously encoded"""
108
+ """
109
+ Decode a previously encoded request.
110
+ 解码先前编码的请求。
111
+
112
+ This method converts a serialized request back into a Request object.
113
+ 此方法将序列化的请求转换回Request对象。
114
+
115
+ Args:
116
+ encoded_request: The serialized request to decode.
117
+ 要解码的序列化请求。
118
+
119
+ Returns:
120
+ Request: The reconstructed Request object.
121
+ 重建的Request对象。
122
+ """
42
123
  obj = self.serializer.loads(encoded_request)
43
124
  return await request_from_dict(obj, spider=self.spider)
44
125
 
45
126
  def __len__(self) -> None:
46
- """Return the length of the queue"""
127
+ """
128
+ Return the length of the queue (synchronous version).
129
+ 返回队列的长度(同步版本)。
130
+
131
+ This method is overridden to prevent synchronous access to the queue length.
132
+ Use the async len() method instead.
133
+ 此方法被重写以防止同步访问队列长度。
134
+ 请改用异步len()方法。
135
+
136
+ Raises:
137
+ Exception: Always raises an exception to remind users to use the async len() method.
138
+ 始终引发异常,以提醒用户使用异步len()方法。
139
+ """
47
140
  raise Exception('please use len()')
48
141
 
49
142
  @abstractmethod
50
143
  async def len(self) -> int:
51
- """Return the length of the queue"""
144
+ """
145
+ Return the length of the queue (asynchronous version).
146
+ 返回队列的长度(异步版本)。
147
+
148
+ This method should return the number of requests currently in the queue.
149
+ 此方法应返回当前队列中的请求数量。
150
+
151
+ Returns:
152
+ int: The number of requests in the queue.
153
+ 队列中的请求数量。
154
+ """
52
155
 
53
156
  @abstractmethod
54
157
  async def push(self, request: aioscrapy.Request) -> None:
55
- """Push a request"""
158
+ """
159
+ Push a request to the queue.
160
+ 将请求推送到队列。
161
+
162
+ This method adds a single request to the queue.
163
+ 此方法将单个请求添加到队列中。
164
+
165
+ Args:
166
+ request: The request to add to the queue.
167
+ 要添加到队列的请求。
168
+ """
56
169
 
57
170
  @abstractmethod
58
171
  async def push_batch(self, requests: List[aioscrapy.Request]) -> None:
59
- """Push a batch requests"""
172
+ """
173
+ Push multiple requests to the queue.
174
+ 将多个请求推送到队列。
175
+
176
+ This method adds multiple requests to the queue at once,
177
+ which may be more efficient than calling push() multiple times.
178
+ 此方法一次将多个请求添加到队列中,
179
+ 这可能比多次调用push()更有效率。
180
+
181
+ Args:
182
+ requests: A list of requests to add to the queue.
183
+ 要添加到队列的请求列表。
184
+ """
60
185
 
61
186
  @abstractmethod
62
187
  async def pop(self, timeout: int = 0) -> Optional[aioscrapy.Request]:
63
- """Pop a request"""
188
+ """
189
+ Pop a request from the queue.
190
+ 从队列中弹出请求。
191
+
192
+ This method removes and returns a request from the queue.
193
+ If the queue is empty, it may wait up to timeout seconds
194
+ before returning None.
195
+ 此方法从队列中移除并返回一个请求。
196
+ 如果队列为空,它可能会等待最多timeout秒,
197
+ 然后返回None。
198
+
199
+ Args:
200
+ timeout: Maximum time to wait for a request, in seconds.
201
+ 等待请求的最长时间,以秒为单位。
202
+
203
+ Returns:
204
+ Optional[Request]: The next request from the queue, or None if
205
+ the queue is empty or the timeout expires.
206
+ 队列中的下一个请求,如果队列为空或超时,则为None。
207
+ """
64
208
 
65
209
  @abstractmethod
66
210
  async def clear(self) -> None:
67
- """Clear queue/stack"""
211
+ """
212
+ Clear all requests from the queue.
213
+ 清除队列中的所有请求。
214
+
215
+ This method removes all pending requests from the queue,
216
+ effectively resetting it to an empty state.
217
+ 此方法从队列中删除所有待处理的请求,
218
+ 有效地将其重置为空状态。
219
+ """
@@ -1,4 +1,28 @@
1
1
 
2
+ """
3
+ Scrapyd Runner Module
4
+ Scrapyd运行器模块
5
+
6
+ This module provides utilities for running AioScrapy spiders from egg files deployed
7
+ with Scrapyd. It handles the activation of egg files, setting up the project environment,
8
+ and launching the spider.
9
+ 此模块提供了从使用Scrapyd部署的egg文件运行AioScrapy爬虫的实用程序。它处理egg文件的激活、
10
+ 设置项目环境和启动爬虫。
11
+
12
+ The main components are:
13
+ 主要组件包括:
14
+
15
+ 1. activate_egg: Activates a Scrapy egg file and sets up the environment
16
+ 激活Scrapy egg文件并设置环境
17
+ 2. project_environment: Context manager that sets up the project environment
18
+ 设置项目环境的上下文管理器
19
+ 3. main: Entry point for running spiders from Scrapyd
20
+ 从Scrapyd运行爬虫的入口点
21
+
22
+ This module is designed to be used by Scrapyd to run AioScrapy spiders, but it can
23
+ also be used directly to run spiders from egg files.
24
+ 此模块设计用于Scrapyd运行AioScrapy爬虫,但也可以直接用于从egg文件运行爬虫。
25
+ """
2
26
  import os
3
27
  import shutil
4
28
  import sys
@@ -15,9 +39,28 @@ except ImportError:
15
39
 
16
40
 
17
41
  def activate_egg(eggpath):
18
- """Activate a Scrapy egg file. This is meant to be used from egg runners
19
- to activate a Scrapy egg file. Don't use it from other code as it may
20
- leave unwanted side effects.
42
+ """
43
+ Activate a Scrapy egg file.
44
+ 激活aioscrapy egg文件。
45
+
46
+ This function activates a aioscrapy egg file by adding it to the Python path
47
+ and setting the AIOSCRAPY_SETTINGS_MODULE environment variable to the
48
+ settings module specified in the egg's entry points.
49
+ 此函数通过将aioscrapy egg文件添加到Python路径并将AIOSCRAPY_SETTINGS_MODULE
50
+ 环境变量设置为egg入口点中指定的设置模块来激活它。
51
+
52
+ This is meant to be used from egg runners to activate a Scrapy egg file.
53
+ Don't use it from other code as it may leave unwanted side effects.
54
+ 这旨在从egg运行器使用,以激活Scrapy egg文件。不要从其他代码中使用它,
55
+ 因为它可能会留下不必要的副作用。
56
+
57
+ Args:
58
+ eggpath: Path to the egg file to activate.
59
+ 要激活的egg文件的路径。
60
+
61
+ Raises:
62
+ ValueError: If the egg file is unknown or corrupt.
63
+ 如果egg文件未知或损坏。
21
64
  """
22
65
  try:
23
66
  d = next(pkg_resources.find_distributions(eggpath))
@@ -30,31 +73,109 @@ def activate_egg(eggpath):
30
73
 
31
74
  @contextmanager
32
75
  def project_environment(project):
76
+ """
77
+ Set up the environment for a aioscrapy project.
78
+ 为aioscrapy项目设置环境。
79
+
80
+ This context manager sets up the environment for a aioscrapy project by:
81
+ 此上下文管理器通过以下方式为aioscrapy项目设置环境:
82
+
83
+ 1. Retrieving the egg file for the project from aioscrapyd's egg storage
84
+ 从aioscrapyd的egg存储中检索项目的egg文件
85
+ 2. Creating a temporary copy of the egg file
86
+ 创建egg文件的临时副本
87
+ 3. Activating the egg file
88
+ 激活egg文件
89
+ 4. Cleaning up the temporary egg file when done
90
+ 完成后清理临时egg文件
91
+
92
+ Args:
93
+ project: The name of the project to set up the environment for.
94
+ 要为其设置环境的项目名称。
95
+
96
+ Yields:
97
+ None: This context manager doesn't yield a value, but sets up the
98
+ environment for the code inside the with block.
99
+ 此上下文管理器不产生值,但为with块内的代码设置环境。
100
+
101
+ Raises:
102
+ AssertionError: If aioscrapy settings are already loaded.
103
+ 如果aioscrapy设置已加载。
104
+ """
105
+ # Get the Scrapyd application and egg storage
106
+ # 获取Scrapyd应用程序和egg存储
33
107
  app = get_application()
34
108
  eggstorage = app.getComponent(IEggStorage)
109
+
110
+ # Get the egg version from environment or use the latest
111
+ # 从环境获取egg版本或使用最新版本
35
112
  eggversion = os.environ.get('AIOSCRAPY_EGG_VERSION', None)
113
+
114
+ # Get the egg file from storage
115
+ # 从存储中获取egg文件
36
116
  version, eggfile = eggstorage.get(project, eggversion)
117
+
37
118
  if eggfile:
119
+ # Create a temporary copy of the egg file
120
+ # 创建egg文件的临时副本
38
121
  prefix = '%s-%s-' % (project, version)
39
122
  fd, eggpath = tempfile.mkstemp(prefix=prefix, suffix='.egg')
40
123
  lf = os.fdopen(fd, 'wb')
41
124
  shutil.copyfileobj(eggfile, lf)
42
125
  lf.close()
126
+
127
+ # Activate the egg file
128
+ # 激活egg文件
43
129
  activate_egg(eggpath)
44
130
  else:
45
131
  eggpath = None
132
+
46
133
  try:
134
+ # Ensure settings aren't already loaded
135
+ # 确保设置尚未加载
47
136
  assert 'aioscrapy.conf' not in sys.modules, "aioscrapy settings already loaded"
48
137
  yield
49
138
  finally:
139
+ # Clean up the temporary egg file
140
+ # 清理临时egg文件
50
141
  if eggpath:
51
142
  os.remove(eggpath)
52
143
 
53
144
 
54
145
  def main():
146
+ """
147
+ Main entry point for running spiders from Scrapyd.
148
+ 从Scrapyd运行爬虫的主入口点。
149
+
150
+ This function:
151
+ 此函数:
152
+
153
+ 1. Updates environment variables by converting SCRAPY_* variables to AIO* variables
154
+ 通过将SCRAPY_*变量转换为AIO*变量来更新环境变量
155
+ 2. Gets the project name from the AIOSCRAPY_PROJECT environment variable
156
+ 从AIOSCRAPY_PROJECT环境变量获取项目名称
157
+ 3. Sets up the project environment using the project_environment context manager
158
+ 使用project_environment上下文管理器设置项目环境
159
+ 4. Imports and executes the aioscrapy.cmdline.execute function to run the spider
160
+ 导入并执行aioscrapy.cmdline.execute函数来运行爬虫
161
+
162
+ This function is designed to be called by Scrapyd to run AioScrapy spiders.
163
+ 此函数设计用于Scrapyd调用以运行AioScrapy爬虫。
164
+
165
+ Raises:
166
+ KeyError: If the AIOSCRAPY_PROJECT environment variable is not set.
167
+ 如果未设置AIOSCRAPY_PROJECT环境变量。
168
+ """
169
+ # Update environment variables by converting SCRAPY_* to AIO*
170
+ # 通过将SCRAPY_*转换为AIO*来更新环境变量
55
171
  os.environ.update({f'AIO{k}': v for k, v in os.environ.items() if k.startswith('SCRAPY_')})
56
172
 
173
+ # Get the project name from environment
174
+ # 从环境获取项目名称
57
175
  project = os.environ['AIOSCRAPY_PROJECT']
176
+
177
+ # Set up the project environment and run the spider
178
+ # 设置项目环境并运行爬虫
58
179
  with project_environment(project):
59
180
  from aioscrapy.cmdline import execute
60
181
  execute()
aioscrapy/serializer.py CHANGED
@@ -1,3 +1,14 @@
1
+ """
2
+ Serialization utilities for AioScrapy.
3
+ AioScrapy的序列化实用工具。
4
+
5
+ This module provides serializer classes for converting Python objects to and from
6
+ serialized formats like JSON and Pickle. These serializers are used throughout
7
+ AioScrapy for data persistence, message passing, and caching.
8
+ 此模块提供了用于将Python对象转换为序列化格式(如JSON和Pickle)以及从这些格式转换回来的
9
+ 序列化器类。这些序列化器在AioScrapy中用于数据持久化、消息传递和缓存。
10
+ """
11
+
1
12
  import ujson
2
13
  import pickle
3
14
  from abc import ABCMeta, abstractmethod
@@ -6,34 +17,203 @@ __all__ = ['PickleSerializer', 'JsonSerializer', 'AbsSerializer']
6
17
 
7
18
 
8
19
  class AbsSerializer(object, metaclass=ABCMeta):
20
+ """
21
+ Abstract base class for serializers.
22
+ 序列化器的抽象基类。
23
+
24
+ This class defines the interface that all serializers must implement.
25
+ It provides methods for serializing Python objects to a string format
26
+ and deserializing strings back to Python objects.
27
+ 此类定义了所有序列化器必须实现的接口。
28
+ 它提供了将Python对象序列化为字符串格式以及将字符串反序列化回Python对象的方法。
29
+ """
9
30
 
10
31
  @staticmethod
11
32
  @abstractmethod
12
33
  def loads(s):
13
- """Serializer object"""
34
+ """
35
+ Deserialize a string to a Python object.
36
+ 将字符串反序列化为Python对象。
37
+
38
+ This method takes a serialized string and converts it back to
39
+ a Python object.
40
+ 此方法接受一个序列化的字符串并将其转换回Python对象。
41
+
42
+ Args:
43
+ s: The serialized string to deserialize.
44
+ 要反序列化的序列化字符串。
45
+
46
+ Returns:
47
+ The deserialized Python object.
48
+ 反序列化的Python对象。
49
+
50
+ Raises:
51
+ Depends on the implementation.
52
+ 取决于实现。
53
+ """
54
+ pass
14
55
 
15
56
  @staticmethod
16
57
  @abstractmethod
17
58
  def dumps(obj):
18
- """Serializer object"""
59
+ """
60
+ Serialize a Python object to a string.
61
+ 将Python对象序列化为字符串。
62
+
63
+ This method takes a Python object and converts it to a serialized
64
+ string format.
65
+ 此方法接受一个Python对象并将其转换为序列化的字符串格式。
66
+
67
+ Args:
68
+ obj: The Python object to serialize.
69
+ 要序列化的Python对象。
70
+
71
+ Returns:
72
+ The serialized string representation of the object.
73
+ 对象的序列化字符串表示。
74
+
75
+ Raises:
76
+ Depends on the implementation.
77
+ 取决于实现。
78
+ """
79
+ pass
19
80
 
20
81
 
21
82
  class PickleSerializer(AbsSerializer):
83
+ """
84
+ Serializer that uses Python's pickle module.
85
+ 使用Python的pickle模块的序列化器。
86
+
87
+ This serializer uses Python's built-in pickle module to serialize and
88
+ deserialize Python objects. Pickle can handle a wide range of Python
89
+ objects, including custom classes, but the resulting serialized data
90
+ is not human-readable and may not be compatible across different
91
+ Python versions.
92
+ 此序列化器使用Python内置的pickle模块来序列化和反序列化Python对象。
93
+ Pickle可以处理各种Python对象,包括自定义类,但生成的序列化数据
94
+ 不是人类可读的,并且可能在不同的Python版本之间不兼容。
95
+
96
+ Warning:
97
+ Pickle is not secure against maliciously constructed data. Never unpickle
98
+ data received from untrusted or unauthenticated sources.
99
+ Pickle对恶意构造的数据不安全。切勿对来自不受信任或未经身份验证的
100
+ 来源的数据进行反序列化。
101
+ """
102
+
22
103
  @staticmethod
23
104
  def loads(s):
105
+ """
106
+ Deserialize a pickle-encoded string to a Python object.
107
+ 将pickle编码的字符串反序列化为Python对象。
108
+
109
+ Args:
110
+ s: The pickle-encoded string to deserialize.
111
+ 要反序列化的pickle编码字符串。
112
+
113
+ Returns:
114
+ The deserialized Python object.
115
+ 反序列化的Python对象。
116
+
117
+ Raises:
118
+ pickle.UnpicklingError: If the data cannot be unpickled.
119
+ 如果数据无法被反序列化。
120
+ ValueError: If the pickle data is truncated.
121
+ 如果pickle数据被截断。
122
+ TypeError: If the serialized data is not a bytes-like object.
123
+ 如果序列化数据不是类字节对象。
124
+ """
24
125
  return pickle.loads(s)
25
126
 
26
127
  @staticmethod
27
128
  def dumps(obj):
129
+ """
130
+ Serialize a Python object to a pickle-encoded string.
131
+ 将Python对象序列化为pickle编码的字符串。
132
+
133
+ Args:
134
+ obj: The Python object to serialize.
135
+ 要序列化的Python对象。
136
+
137
+ Returns:
138
+ bytes: The pickle-encoded representation of the object.
139
+ 对象的pickle编码表示。
140
+
141
+ Raises:
142
+ pickle.PicklingError: If the object cannot be pickled.
143
+ 如果对象无法被序列化。
144
+
145
+ Note:
146
+ Uses the highest available pickle protocol for maximum efficiency.
147
+ 使用最高可用的pickle协议以获得最大效率。
148
+ """
149
+ # protocol=-1 means use the highest available protocol
150
+ # protocol=-1表示使用最高可用的协议
28
151
  return pickle.dumps(obj, protocol=-1)
29
152
 
30
153
 
31
154
  class JsonSerializer(AbsSerializer):
155
+ """
156
+ Serializer that uses the ujson module.
157
+ 使用ujson模块的序列化器。
158
+
159
+ This serializer uses the ujson module (UltraJSON) to serialize and
160
+ deserialize Python objects to and from JSON format. UltraJSON is a fast
161
+ JSON encoder and decoder written in C with Python bindings.
162
+ 此序列化器使用ujson模块(UltraJSON)将Python对象序列化为JSON格式
163
+ 以及从JSON格式反序列化。UltraJSON是一个用C编写的快速JSON编码器和
164
+ 解码器,带有Python绑定。
165
+
166
+ JSON serialization is more limited than pickle in terms of the types it can
167
+ handle (primarily: dict, list, str, int, float, bool, None), but it produces
168
+ human-readable output and is safe to use with untrusted data.
169
+ JSON序列化在可以处理的类型方面比pickle更有限(主要是:dict、list、str、
170
+ int、float、bool、None),但它产生人类可读的输出,并且可以安全地
171
+ 用于不受信任的数据。
172
+ """
173
+
32
174
  @staticmethod
33
175
  def loads(s):
176
+ """
177
+ Deserialize a JSON string to a Python object.
178
+ 将JSON字符串反序列化为Python对象。
179
+
180
+ Args:
181
+ s: The JSON string to deserialize.
182
+ 要反序列化的JSON字符串。
183
+
184
+ Returns:
185
+ The deserialized Python object (typically a dict, list, or primitive type).
186
+ 反序列化的Python对象(通常是dict、list或原始类型)。
187
+
188
+ Raises:
189
+ ValueError: If the string is not valid JSON.
190
+ 如果字符串不是有效的JSON。
191
+ """
34
192
  return ujson.loads(s)
35
193
 
36
194
  @staticmethod
37
195
  def dumps(obj):
196
+ """
197
+ Serialize a Python object to a JSON string.
198
+ 将Python对象序列化为JSON字符串。
199
+
200
+ Args:
201
+ obj: The Python object to serialize.
202
+ 要序列化的Python对象。
203
+ Must be a type that can be represented in JSON (dict, list, str,
204
+ int, float, bool, None, or a combination of these).
205
+ 必须是可以在JSON中表示的类型(dict、list、str、int、float、
206
+ bool、None或这些的组合)。
207
+
208
+ Returns:
209
+ str: The JSON string representation of the object.
210
+ 对象的JSON字符串表示。
211
+
212
+ Raises:
213
+ TypeError: If the object contains types that cannot be serialized to JSON.
214
+ 如果对象包含无法序列化为JSON的类型。
215
+ OverflowError: If an integer is too large to be represented in JSON.
216
+ 如果整数太大而无法在JSON中表示。
217
+ """
38
218
  return ujson.dumps(obj)
39
219