aio-scrapy 2.1.4__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/LICENSE +1 -1
  2. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/METADATA +53 -41
  3. aio_scrapy-2.1.6.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +522 -17
  8. aioscrapy/core/downloader/handlers/__init__.py +187 -5
  9. aioscrapy/core/downloader/handlers/aiohttp.py +187 -3
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +124 -3
  11. aioscrapy/core/downloader/handlers/httpx.py +133 -3
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +132 -3
  13. aioscrapy/core/downloader/handlers/requests.py +120 -2
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +151 -13
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +192 -6
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +313 -13
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.4.dist-info/RECORD +0 -133
  105. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
  106. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
  107. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  108. aioscrapy/http/response/playwright.py +0 -36
  109. aioscrapy/libs/pipelines/execl.py +0 -169
  110. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/entry_points.txt +0 -0
  111. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,14 @@
1
+
1
2
  """
2
- This module implements the Request class which is used to represent HTTP
3
- requests in Aioscrapy.
3
+ HTTP Request implementation for aioscrapy.
4
+ aioscrapy的HTTP请求实现。
4
5
 
5
- See documentation in docs/topics/request-response.rst
6
+ This module provides the Request class, which represents an HTTP request to be sent by the crawler.
7
+ It handles URL normalization, fingerprinting, serialization, and other request-related functionality.
8
+ 此模块提供了Request类,表示由爬虫发送的HTTP请求。
9
+ 它处理URL规范化、指纹生成、序列化和其他与请求相关的功能。
6
10
  """
11
+
7
12
  import hashlib
8
13
  import inspect
9
14
  import json
@@ -18,6 +23,8 @@ from aioscrapy.utils.curl import curl_to_request_kwargs
18
23
  from aioscrapy.utils.python import to_unicode
19
24
  from aioscrapy.utils.url import escape_ajax
20
25
 
26
+ # Type variable for Request class to use in class methods
27
+ # 用于在类方法中使用的Request类的类型变量
21
28
  RequestTypeVar = TypeVar("RequestTypeVar", bound="Request")
22
29
 
23
30
 
@@ -47,7 +54,27 @@ class Request(object):
47
54
  fingerprint: Optional[str] = None,
48
55
  use_proxy: bool = True,
49
56
  ):
50
-
57
+ """
58
+ Initialize a Request object.
59
+ 初始化Request对象。
60
+
61
+ Args:
62
+ url: URL for the request. 请求的URL。
63
+ callback: Function to call when the response is received. 接收到响应时调用的函数。
64
+ method: HTTP method. HTTP方法。
65
+ headers: HTTP headers. HTTP头信息。
66
+ body: Request body. 请求体。
67
+ cookies: Cookies to send with the request. 随请求发送的Cookie。
68
+ meta: Additional metadata. 额外的元数据。
69
+ encoding: Encoding for the URL and body. URL和请求体的编码。
70
+ priority: Request priority. 请求优先级。
71
+ dont_filter: Whether to filter this request through the scheduler's dupefilter. 是否通过调度器的去重过滤器过滤此请求。
72
+ errback: Function to call if an error occurs during processing. 处理过程中发生错误时调用的函数。
73
+ flags: Request flags. 请求标志。
74
+ cb_kwargs: Additional keyword arguments to pass to the callback. 传递给回调函数的额外关键字参数。
75
+ fingerprint: Request fingerprint. 请求指纹。
76
+ use_proxy: Whether to use a proxy for this request. 是否为此请求使用代理。
77
+ """
51
78
  self._encoding = encoding
52
79
  self.method = str(method).upper()
53
80
  self._set_url(url)
@@ -71,26 +98,90 @@ class Request(object):
71
98
 
72
99
  @property
73
100
  def cb_kwargs(self) -> dict:
101
+ """
102
+ Get the callback keyword arguments dictionary.
103
+ 获取回调关键字参数字典。
104
+
105
+ This property ensures that the callback keyword arguments dictionary
106
+ is always initialized, creating an empty dictionary if needed.
107
+ 此属性确保回调关键字参数字典始终被初始化,如果需要则创建一个空字典。
108
+
109
+ Returns:
110
+ dict: The callback keyword arguments dictionary.
111
+ 回调关键字参数字典。
112
+ """
74
113
  if self._cb_kwargs is None:
75
114
  self._cb_kwargs = {}
76
115
  return self._cb_kwargs
77
116
 
78
117
  @property
79
118
  def meta(self) -> dict:
119
+ """
120
+ Get the request metadata dictionary.
121
+ 获取请求元数据字典。
122
+
123
+ This property ensures that the metadata dictionary is always initialized,
124
+ creating an empty dictionary if needed. The metadata dictionary is used
125
+ to store arbitrary data associated with the request.
126
+ 此属性确保元数据字典始终被初始化,如果需要则创建一个空字典。
127
+ 元数据字典用于存储与请求相关的任意数据。
128
+
129
+ Returns:
130
+ dict: The request metadata dictionary.
131
+ 请求元数据字典。
132
+ """
80
133
  if self._meta is None:
81
134
  self._meta = {}
82
135
  return self._meta
83
136
 
84
137
  def _get_url(self) -> str:
138
+ """
139
+ Get the request URL.
140
+ 获取请求URL。
141
+
142
+ This is an internal method used by the url property.
143
+ 这是由url属性使用的内部方法。
144
+
145
+ Returns:
146
+ str: The request URL.
147
+ 请求URL。
148
+ """
85
149
  return self._url
86
150
 
87
151
  def _set_url(self, url: str) -> None:
152
+ """
153
+ Set the request URL.
154
+ 设置请求URL。
155
+
156
+ This method normalizes the URL by:
157
+ 此方法通过以下方式规范化URL:
158
+ 1. Converting it to a safe string using the request's encoding
159
+ 使用请求的编码将其转换为安全字符串
160
+ 2. Escaping AJAX-specific characters
161
+ 转义AJAX特定字符
162
+ 3. Validating that the URL has a scheme
163
+ 验证URL具有协议方案
164
+
165
+ Args:
166
+ url: The URL to set.
167
+ 要设置的URL。
168
+
169
+ Raises:
170
+ TypeError: If the URL is not a string.
171
+ 如果URL不是字符串。
172
+ ValueError: If the URL does not have a scheme.
173
+ 如果URL没有协议方案。
174
+ """
88
175
  if not isinstance(url, str):
89
176
  raise TypeError(f'Request url must be str or unicode, got {type(url).__name__}')
90
177
 
178
+ # Normalize the URL
179
+ # 规范化URL
91
180
  s = safe_url_string(url, self.encoding)
92
181
  self._url = escape_ajax(s)
93
182
 
183
+ # Validate that the URL has a scheme
184
+ # 验证URL具有协议方案
94
185
  if (
95
186
  '://' not in self._url
96
187
  and not self._url.startswith('about:')
@@ -98,41 +189,135 @@ class Request(object):
98
189
  ):
99
190
  raise ValueError(f'Missing scheme in request url: {self._url}')
100
191
 
192
+ # Property that uses the getter and setter methods
193
+ # 使用getter和setter方法的属性
101
194
  url = property(_get_url, _set_url)
102
195
 
103
196
  def _get_body(self) -> str:
197
+ """
198
+ Get the request body.
199
+ 获取请求体。
200
+
201
+ This is an internal method used by the body property.
202
+ 这是由body属性使用的内部方法。
203
+
204
+ Returns:
205
+ str: The request body.
206
+ 请求体。
207
+ """
104
208
  return self._body
105
209
 
106
210
  def _set_body(self, body: str) -> None:
211
+ """
212
+ Set the request body.
213
+ 设置请求体。
214
+
215
+ This method sets the request body, converting None to an empty string.
216
+ 此方法设置请求体,将None转换为空字符串。
217
+
218
+ Args:
219
+ body: The body to set.
220
+ 要设置的请求体。
221
+ """
107
222
  self._body = '' if body is None else body
108
223
 
224
+ # Property that uses the getter and setter methods
225
+ # 使用getter和setter方法的属性
109
226
  body = property(_get_body, _set_body)
110
227
 
111
228
  def _set_fingerprint(self, fingerprint: str) -> None:
229
+ """
230
+ Set the request fingerprint.
231
+ 设置请求指纹。
232
+
233
+ This is an internal method used by the fingerprint property.
234
+ The fingerprint is stored in the request's metadata.
235
+ 这是由fingerprint属性使用的内部方法。
236
+ 指纹存储在请求的元数据中。
237
+
238
+ Args:
239
+ fingerprint: The fingerprint to set.
240
+ 要设置的指纹。
241
+ """
112
242
  self._meta['_fingerprint'] = fingerprint
113
243
 
114
244
  def _get_fingerprint(self) -> str:
245
+ """
246
+ Get the request fingerprint.
247
+ 获取请求指纹。
248
+
249
+ This is an internal method used by the fingerprint property.
250
+ If the fingerprint doesn't exist, it's generated using make_fingerprint().
251
+ 这是由fingerprint属性使用的内部方法。
252
+ 如果指纹不存在,则使用make_fingerprint()生成。
253
+
254
+ Returns:
255
+ str: The request fingerprint.
256
+ 请求指纹。
257
+ """
115
258
  if not self._meta.get('_fingerprint'):
116
259
  self._meta['_fingerprint'] = self.make_fingerprint()
117
260
  return self._meta.get('_fingerprint')
118
261
 
262
+ # Property that uses the getter and setter methods
263
+ # 使用getter和setter方法的属性
119
264
  fingerprint = property(_get_fingerprint, _set_fingerprint)
120
265
 
121
266
  @property
122
267
  def encoding(self) -> str:
268
+ """
269
+ Get the request encoding.
270
+ 获取请求编码。
271
+
272
+ This encoding is used for URL and body encoding.
273
+ 此编码用于URL和请求体编码。
274
+
275
+ Returns:
276
+ str: The request encoding.
277
+ 请求编码。
278
+ """
123
279
  return self._encoding
124
280
 
125
281
  def __str__(self) -> str:
282
+ """
283
+ Return a string representation of the request.
284
+ 返回请求的字符串表示。
285
+
286
+ The string representation includes the HTTP method and URL.
287
+ 字符串表示包括HTTP方法和URL。
288
+
289
+ Returns:
290
+ str: A string representation of the request.
291
+ 请求的字符串表示。
292
+ """
126
293
  return f"<{self.method} {self.url}>"
127
294
 
295
+ # Use the same implementation for __repr__
296
+ # 对__repr__使用相同的实现
128
297
  __repr__ = __str__
129
298
 
130
299
  def copy(self) -> "Request":
131
- """Return a copy of this Request"""
300
+ """
301
+ Return a copy of this Request.
302
+ 返回此Request的副本。
303
+
304
+ Returns:
305
+ A copy of this Request. 此Request的副本。
306
+ """
132
307
  return self.replace()
133
308
 
134
309
  def replace(self, *args, **kwargs) -> "Request":
135
- """Create a new Request with the same attributes except for those given new values."""
310
+ """
311
+ Create a new Request with the same attributes except for those given new values.
312
+ 创建一个新的Request,除了给定的新值外,其他属性与当前Request相同。
313
+
314
+ Args:
315
+ *args: Positional arguments for the new Request. 新Request的位置参数。
316
+ **kwargs: Keyword arguments for the new Request. 新Request的关键字参数。
317
+
318
+ Returns:
319
+ A new Request object. 一个新的Request对象。
320
+ """
136
321
  for x in self.attributes:
137
322
  kwargs.setdefault(x, getattr(self, x))
138
323
  cls = kwargs.pop('cls', self.__class__)
@@ -142,7 +327,18 @@ class Request(object):
142
327
  def from_curl(
143
328
  cls: Type[RequestTypeVar], curl_command: str, ignore_unknown_options: bool = True, **kwargs
144
329
  ) -> RequestTypeVar:
145
- """Create a Request object from a string containing a `cURL"""
330
+ """
331
+ Create a Request object from a string containing a cURL command.
332
+ 从包含cURL命令的字符串创建Request对象。
333
+
334
+ Args:
335
+ curl_command: The cURL command. cURL命令。
336
+ ignore_unknown_options: Whether to ignore unknown cURL options. 是否忽略未知的cURL选项。
337
+ **kwargs: Additional keyword arguments for the Request. Request的额外关键字参数。
338
+
339
+ Returns:
340
+ A Request object. Request对象。
341
+ """
146
342
  request_kwargs = curl_to_request_kwargs(curl_command, ignore_unknown_options)
147
343
  request_kwargs.update(kwargs)
148
344
  return cls(**request_kwargs)
@@ -151,7 +347,19 @@ class Request(object):
151
347
  self,
152
348
  keep_fragments: bool = False,
153
349
  ) -> str:
154
- """ make the request fingerprint. """
350
+ """
351
+ Make the request fingerprint.
352
+ 生成请求指纹。
353
+
354
+ The fingerprint is a hash of the request's method, URL, and body.
355
+ 指纹是请求的方法、URL和请求体的哈希值。
356
+
357
+ Args:
358
+ keep_fragments: Whether to keep URL fragments in the fingerprint. 是否在指纹中保留URL片段。
359
+
360
+ Returns:
361
+ The request fingerprint. 请求指纹。
362
+ """
155
363
  return hashlib.sha1(
156
364
  json.dumps({
157
365
  'method': to_unicode(self.method),
@@ -161,12 +369,22 @@ class Request(object):
161
369
  ).hexdigest()
162
370
 
163
371
  def to_dict(self, *, spider: Optional["aioscrapy.Spider"] = None) -> dict:
164
- """Return a dictionary containing the Request's data.
372
+ """
373
+ Return a dictionary containing the Request's data.
374
+ 返回包含Request数据的字典。
165
375
 
166
- Use :func:`~scrapy.utils.request.request_from_dict` to convert back into a :class:`~scrapy.Request` object.
376
+ Use request_from_dict() to convert back into a Request object.
377
+ 使用request_from_dict()将其转换回Request对象。
167
378
 
168
379
  If a spider is given, this method will try to find out the name of the spider methods used as callback
169
380
  and errback and include them in the output dict, raising an exception if they cannot be found.
381
+ 如果提供了爬虫,此方法将尝试找出用作回调和错误回调的爬虫方法的名称,并将它们包含在输出字典中,如果找不到则引发异常。
382
+
383
+ Args:
384
+ spider: The spider instance. 爬虫实例。
385
+
386
+ Returns:
387
+ A dictionary containing the Request's data. 包含Request数据的字典。
170
388
  """
171
389
  d = {
172
390
  "url": self.url, # urls are safe (safe_string_url)
@@ -183,16 +401,44 @@ class Request(object):
183
401
 
184
402
 
185
403
  def _find_method(obj, func):
186
- """Helper function for Request.to_dict"""
404
+ """
405
+ Find the name of a method in an object.
406
+ 在对象中查找方法的名称。
407
+
408
+ This is a helper function for Request.to_dict() that finds the name of a method
409
+ in an object by comparing the underlying function objects.
410
+ 这是Request.to_dict()的辅助函数,通过比较底层函数对象在对象中查找方法的名称。
411
+
412
+ Args:
413
+ obj: The object to search in.
414
+ 要搜索的对象。
415
+ func: The method to find.
416
+ 要查找的方法。
417
+
418
+ Returns:
419
+ str: The name of the method.
420
+ 方法的名称。
421
+
422
+ Raises:
423
+ ValueError: If the function is not an instance method in the object.
424
+ 如果函数不是对象中的实例方法。
425
+ """
187
426
  # Only instance methods contain ``__func__``
427
+ # 只有实例方法包含``__func__``
188
428
  if obj and hasattr(func, '__func__'):
429
+ # Get all methods of the object
430
+ # 获取对象的所有方法
189
431
  members = inspect.getmembers(obj, predicate=inspect.ismethod)
190
432
  for name, obj_func in members:
191
433
  # We need to use __func__ to access the original function object because instance
192
434
  # method objects are generated each time attribute is retrieved from instance.
435
+ # 我们需要使用__func__来访问原始函数对象,因为实例方法对象在每次从实例检索属性时都会生成。
193
436
  #
194
437
  # Reference: The standard type hierarchy
438
+ # 参考:标准类型层次结构
195
439
  # https://docs.python.org/3/reference/datamodel.html
196
440
  if obj_func.__func__ is func.__func__:
197
441
  return name
442
+ # If we get here, the function was not found
443
+ # 如果我们到达这里,则未找到函数
198
444
  raise ValueError(f"Function {func} is not an instance method in: {obj}")
@@ -1,39 +1,119 @@
1
+
1
2
  """
2
- This module implements the FormRequest class which is a more convenient class
3
- (than Request) to generate Requests based on form data.
3
+ Form request implementation for aioscrapy.
4
+ aioscrapy的表单请求实现。
4
5
 
5
- See documentation in docs/topics/request-response.rst
6
+ This module provides the FormRequest class, which is a specialized Request
7
+ that handles HTML form submission, both GET and POST methods.
8
+ 此模块提供了FormRequest类,这是一个专门处理HTML表单提交的Request,
9
+ 支持GET和POST方法。
6
10
  """
11
+
7
12
  from typing import List, Optional, Tuple, Union
8
13
  from urllib.parse import urlencode
9
14
 
10
15
  from aioscrapy.http.request import Request
11
16
  from aioscrapy.utils.python import to_bytes, is_listlike
12
17
 
18
+ # Type definition for form data, which can be a dictionary or a list of key-value tuples
19
+ # 表单数据的类型定义,可以是字典或键值元组列表
13
20
  FormdataType = Optional[Union[dict, List[Tuple[str, str]]]]
14
21
 
15
22
 
16
23
  class FormRequest(Request):
24
+ """
25
+ A Request that submits HTML form data.
26
+ 提交HTML表单数据的Request。
27
+
28
+ This class extends the base Request to handle form submissions,
29
+ automatically setting the appropriate method, headers, and
30
+ encoding the form data either in the URL (for GET requests)
31
+ or in the body (for POST requests).
32
+ 此类扩展了基本Request以处理表单提交,自动设置适当的方法、
33
+ 头部,并将表单数据编码到URL中(对于GET请求)或请求体中
34
+ (对于POST请求)。
35
+ """
36
+
37
+ # Valid HTTP methods for form submission
38
+ # 表单提交的有效HTTP方法
17
39
  valid_form_methods = ['GET', 'POST']
18
40
 
19
41
  def __init__(self, *args, formdata: FormdataType = None, **kwargs) -> None:
42
+ """
43
+ Initialize a FormRequest.
44
+ 初始化FormRequest。
45
+
46
+ This constructor extends the base Request constructor to handle form data.
47
+ If form data is provided and no method is specified, it defaults to POST.
48
+ 此构造函数扩展了基本Request构造函数以处理表单数据。
49
+ 如果提供了表单数据且未指定方法,则默认为POST。
50
+
51
+ Args:
52
+ *args: Positional arguments passed to the Request constructor.
53
+ 传递给Request构造函数的位置参数。
54
+ formdata: Form data to submit, either as a dict or a list of (name, value) tuples.
55
+ 要提交的表单数据,可以是字典或(名称, 值)元组的列表。
56
+ **kwargs: Keyword arguments passed to the Request constructor.
57
+ 传递给Request构造函数的关键字参数。
58
+ """
59
+ # Default to POST method if form data is provided and no method is specified
60
+ # 如果提供了表单数据且未指定方法,则默认为POST方法
20
61
  if formdata and kwargs.get('method') is None:
21
62
  kwargs['method'] = 'POST'
22
63
 
64
+ # Initialize the base Request
65
+ # 初始化基本Request
23
66
  super().__init__(*args, **kwargs)
24
67
 
68
+ # Process form data if provided
69
+ # 如果提供了表单数据,则处理它
25
70
  if formdata:
71
+ # Convert dict to items() iterator if necessary
72
+ # 如果需要,将字典转换为items()迭代器
26
73
  items = formdata.items() if isinstance(formdata, dict) else formdata
74
+
75
+ # URL-encode the form data
76
+ # URL编码表单数据
27
77
  form_query: str = _urlencode(items, self.encoding)
78
+
28
79
  if self.method == 'POST':
80
+ # For POST requests, set the Content-Type header and put form data in the body
81
+ # 对于POST请求,设置Content-Type头部并将表单数据放入请求体
29
82
  self.headers.setdefault('Content-Type', 'application/x-www-form-urlencoded')
30
83
  self._set_body(form_query)
31
84
  else:
85
+ # For GET requests, append form data to the URL
86
+ # 对于GET请求,将表单数据附加到URL
32
87
  self._set_url(self.url + ('&' if '?' in self.url else '?') + form_query)
33
88
 
34
89
 
35
90
  def _urlencode(seq, enc):
91
+ """
92
+ URL-encode a sequence of form data.
93
+ URL编码表单数据序列。
94
+
95
+ This internal function handles the encoding of form data for submission,
96
+ converting keys and values to bytes using the specified encoding and
97
+ properly handling list-like values.
98
+ 此内部函数处理表单数据的编码以便提交,使用指定的编码将键和值转换为字节,
99
+ 并正确处理类似列表的值。
100
+
101
+ Args:
102
+ seq: A sequence of (name, value) pairs to encode.
103
+ 要编码的(名称, 值)对序列。
104
+ enc: The encoding to use for converting strings to bytes.
105
+ 用于将字符串转换为字节的编码。
106
+
107
+ Returns:
108
+ str: The URL-encoded form data string.
109
+ URL编码的表单数据字符串。
110
+ """
111
+ # Convert each key-value pair to bytes and handle list-like values
112
+ # 将每个键值对转换为字节并处理类似列表的值
36
113
  values = [(to_bytes(k, enc), to_bytes(v, enc))
37
114
  for k, vs in seq
38
115
  for v in (vs if is_listlike(vs) else [vs])]
116
+
117
+ # Use urllib's urlencode with doseq=1 to properly handle sequences
118
+ # 使用urllib的urlencode,doseq=1以正确处理序列
39
119
  return urlencode(values, doseq=1)