aio-scrapy 2.1.3__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/LICENSE +1 -1
  2. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/METADATA +53 -40
  3. aio_scrapy-2.1.6.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +523 -18
  8. aioscrapy/core/downloader/handlers/__init__.py +188 -6
  9. aioscrapy/core/downloader/handlers/aiohttp.py +188 -4
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +125 -4
  11. aioscrapy/core/downloader/handlers/httpx.py +134 -4
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +133 -4
  13. aioscrapy/core/downloader/handlers/requests.py +121 -3
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +170 -14
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +193 -7
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +313 -13
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.3.dist-info/RECORD +0 -133
  105. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -110
  106. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -53
  107. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  108. aioscrapy/http/response/playwright.py +0 -36
  109. aioscrapy/libs/pipelines/execl.py +0 -169
  110. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/entry_points.txt +0 -0
  111. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,13 @@
1
- """
2
- This module implements the JsonRequest class which is a more convenient class
3
- (than Request) to generate JSON Requests.
4
1
 
5
- See documentation in docs/topics/request-response.rst
2
+ """
3
+ JSON request implementation for aioscrapy.
4
+ aioscrapy的JSON请求实现。
5
+
6
+ This module provides the JsonRequest class, which is a specialized Request
7
+ that handles JSON data, automatically setting appropriate headers and
8
+ serializing Python objects to JSON.
9
+ 此模块提供了JsonRequest类,这是一个专门处理JSON数据的Request,
10
+ 自动设置适当的头部并将Python对象序列化为JSON。
6
11
  """
7
12
 
8
13
  import copy
@@ -11,53 +16,160 @@ import warnings
11
16
  from typing import Optional, Tuple
12
17
 
13
18
  from aioscrapy.http.request import Request
14
- from aioscrapy.utils.deprecate import create_deprecated_class
15
19
 
16
20
 
17
21
  class JsonRequest(Request):
22
+ """
23
+ A Request that handles JSON data.
24
+ 处理JSON数据的Request。
25
+
26
+ This class extends the base Request to handle JSON data, automatically
27
+ setting appropriate headers for JSON content and serializing Python
28
+ objects to JSON format.
29
+ 此类扩展了基本Request以处理JSON数据,自动设置JSON内容的
30
+ 适当头部,并将Python对象序列化为JSON格式。
31
+ """
32
+
33
+ # Add dumps_kwargs to the list of attributes to be included in serialization
34
+ # 将dumps_kwargs添加到要包含在序列化中的属性列表中
18
35
  attributes: Tuple[str, ...] = Request.attributes + ("dumps_kwargs",)
19
36
 
20
37
  def __init__(self, *args, dumps_kwargs: Optional[dict] = None, **kwargs) -> None:
38
+ """
39
+ Initialize a JsonRequest.
40
+ 初始化JsonRequest。
41
+
42
+ This constructor extends the base Request constructor to handle JSON data.
43
+ It accepts either a 'body' parameter with pre-serialized JSON or a 'data'
44
+ parameter with a Python object to be serialized to JSON.
45
+ 此构造函数扩展了基本Request构造函数以处理JSON数据。
46
+ 它接受带有预序列化JSON的'body'参数或带有要序列化为JSON的Python对象的'data'参数。
47
+
48
+ Args:
49
+ *args: Positional arguments passed to the Request constructor.
50
+ 传递给Request构造函数的位置参数。
51
+ dumps_kwargs: Optional keyword arguments to pass to json.dumps().
52
+ 可选的关键字参数,传递给json.dumps()。
53
+ **kwargs: Keyword arguments passed to the Request constructor.
54
+ May include 'data' (a Python object to serialize to JSON)
55
+ or 'body' (pre-serialized JSON string).
56
+ 传递给Request构造函数的关键字参数。
57
+ 可能包括'data'(要序列化为JSON的Python对象)
58
+ 或'body'(预序列化的JSON字符串)。
59
+ """
60
+ # Make a deep copy of dumps_kwargs to avoid modifying the original
61
+ # 深拷贝dumps_kwargs以避免修改原始对象
21
62
  dumps_kwargs = copy.deepcopy(dumps_kwargs) if dumps_kwargs is not None else {}
22
63
  self._dumps_kwargs = dumps_kwargs
23
64
 
65
+ # Check if body or data parameters were provided
66
+ # 检查是否提供了body或data参数
24
67
  body_passed = kwargs.get('body', None) is not None
25
68
  data = kwargs.pop('data', None)
26
69
  data_passed = data is not None
27
70
 
71
+ # Handle the case where both body and data are provided
72
+ # 处理同时提供body和data的情况
28
73
  if body_passed and data_passed:
29
74
  warnings.warn('Both body and data passed. data will be ignored')
30
75
 
76
+ # Handle the case where only data is provided
77
+ # 处理只提供data的情况
31
78
  elif not body_passed and data_passed:
79
+ # Serialize the data to JSON and set it as the body
80
+ # 将数据序列化为JSON并将其设置为body
32
81
  kwargs['body'] = self._dumps(data)
33
82
 
83
+ # Default to POST method if not specified
84
+ # 如果未指定,则默认为POST方法
34
85
  if 'method' not in kwargs:
35
86
  kwargs['method'] = 'POST'
36
87
 
88
+ # Initialize the base Request
89
+ # 初始化基本Request
37
90
  super().__init__(*args, **kwargs)
91
+
92
+ # Set default headers for JSON content
93
+ # 设置JSON内容的默认头部
38
94
  self.headers.setdefault('Content-Type', 'application/json')
39
95
  self.headers.setdefault('Accept', 'application/json, text/javascript, */*; q=0.01')
40
96
 
41
97
  @property
42
98
  def dumps_kwargs(self) -> dict:
99
+ """
100
+ Get the keyword arguments used for JSON serialization.
101
+ 获取用于JSON序列化的关键字参数。
102
+
103
+ These arguments are passed to json.dumps() when serializing data.
104
+ 这些参数在序列化数据时传递给json.dumps()。
105
+
106
+ Returns:
107
+ dict: The keyword arguments for json.dumps().
108
+ json.dumps()的关键字参数。
109
+ """
43
110
  return self._dumps_kwargs
44
111
 
45
112
  def replace(self, *args, **kwargs) -> Request:
113
+ """
114
+ Create a new JsonRequest with the same attributes except for those given new values.
115
+ 创建一个新的JsonRequest,除了给定的新值外,其他属性与当前JsonRequest相同。
116
+
117
+ This method extends the base Request.replace() method to handle the 'data'
118
+ parameter, serializing it to JSON if provided.
119
+ 此方法扩展了基本Request.replace()方法以处理'data'参数,
120
+ 如果提供了该参数,则将其序列化为JSON。
121
+
122
+ Args:
123
+ *args: Positional arguments passed to the base replace() method.
124
+ 传递给基本replace()方法的位置参数。
125
+ **kwargs: Keyword arguments passed to the base replace() method.
126
+ May include 'data' (a Python object to serialize to JSON)
127
+ or 'body' (pre-serialized JSON string).
128
+ 传递给基本replace()方法的关键字参数。
129
+ 可能包括'data'(要序列化为JSON的Python对象)
130
+ 或'body'(预序列化的JSON字符串)。
131
+
132
+ Returns:
133
+ Request: A new JsonRequest object.
134
+ 一个新的JsonRequest对象。
135
+ """
136
+ # Check if body or data parameters were provided
137
+ # 检查是否提供了body或data参数
46
138
  body_passed = kwargs.get('body', None) is not None
47
139
  data = kwargs.pop('data', None)
48
140
  data_passed = data is not None
49
141
 
142
+ # Handle the case where both body and data are provided
143
+ # 处理同时提供body和data的情况
50
144
  if body_passed and data_passed:
51
145
  warnings.warn('Both body and data passed. data will be ignored')
52
146
 
147
+ # Handle the case where only data is provided
148
+ # 处理只提供data的情况
53
149
  elif not body_passed and data_passed:
150
+ # Serialize the data to JSON and set it as the body
151
+ # 将数据序列化为JSON并将其设置为body
54
152
  kwargs['body'] = self._dumps(data)
55
153
 
154
+ # Call the base replace() method
155
+ # 调用基本replace()方法
56
156
  return super().replace(*args, **kwargs)
57
157
 
58
158
  def _dumps(self, data: dict) -> str:
59
- """Convert to JSON """
159
+ """
160
+ Convert Python data to a JSON string.
161
+ 将Python数据转换为JSON字符串。
162
+
163
+ This internal method serializes the given data to JSON using the
164
+ json.dumps() function with the configured keyword arguments.
165
+ 此内部方法使用json.dumps()函数和配置的关键字参数将给定数据序列化为JSON。
166
+
167
+ Args:
168
+ data: The Python object to serialize to JSON.
169
+ 要序列化为JSON的Python对象。
170
+
171
+ Returns:
172
+ str: The JSON string representation of the data.
173
+ 数据的JSON字符串表示。
174
+ """
60
175
  return json.dumps(data, **self._dumps_kwargs)
61
-
62
-
63
- JSONRequest = create_deprecated_class("JSONRequest", JsonRequest)
@@ -1,9 +1,15 @@
1
+
1
2
  """
2
- This module implements the Response class which is used to represent HTTP
3
- responses in aioscrapy.
3
+ HTTP Response implementation for aioscrapy.
4
+ aioscrapy的HTTP响应实现。
4
5
 
5
- See documentation in docs/topics/request-response.rst
6
+ This module provides the Response class, which represents an HTTP response
7
+ received by the crawler. It handles response data, headers, cookies, and
8
+ provides methods for URL joining and following links.
9
+ 此模块提供了Response类,表示爬虫接收到的HTTP响应。它处理响应数据、
10
+ 头部、Cookie,并提供URL连接和跟踪链接的方法。
6
11
  """
12
+
7
13
  from typing import Generator, Optional
8
14
  from urllib.parse import urljoin
9
15
 
@@ -24,6 +30,19 @@ class Response(object):
24
30
  flags: Optional[list] = None,
25
31
  request: Optional[Request] = None,
26
32
  ):
33
+ """
34
+ Initialize a Response object.
35
+ 初始化Response对象。
36
+
37
+ Args:
38
+ url: URL for this response. 此响应的URL。
39
+ status: HTTP status code. HTTP状态码。
40
+ headers: HTTP headers. HTTP头信息。
41
+ cookies: Cookies from the response. 响应中的Cookie。
42
+ body: Response body. 响应体。
43
+ flags: Response flags. 响应标志。
44
+ request: The Request object that generated this response. 生成此响应的Request对象。
45
+ """
27
46
  self.headers = headers or {}
28
47
  self.status = int(status)
29
48
  self._set_body(body)
@@ -34,6 +53,24 @@ class Response(object):
34
53
 
35
54
  @property
36
55
  def cb_kwargs(self):
56
+ """
57
+ Get the callback keyword arguments from the request that generated this response.
58
+ 从生成此响应的请求中获取回调关键字参数。
59
+
60
+ This property provides access to the cb_kwargs dictionary of the request
61
+ that generated this response, allowing callback functions to access
62
+ data passed from the request.
63
+ 此属性提供对生成此响应的请求的cb_kwargs字典的访问,
64
+ 允许回调函数访问从请求传递的数据。
65
+
66
+ Returns:
67
+ dict: The callback keyword arguments dictionary.
68
+ 回调关键字参数字典。
69
+
70
+ Raises:
71
+ AttributeError: If this response is not tied to any request.
72
+ 如果此响应未与任何请求关联。
73
+ """
37
74
  try:
38
75
  return self.request.cb_kwargs
39
76
  except AttributeError:
@@ -44,6 +81,24 @@ class Response(object):
44
81
 
45
82
  @property
46
83
  def meta(self):
84
+ """
85
+ Get the metadata from the request that generated this response.
86
+ 从生成此响应的请求中获取元数据。
87
+
88
+ This property provides access to the meta dictionary of the request
89
+ that generated this response, allowing callback functions to access
90
+ metadata passed from the request.
91
+ 此属性提供对生成此响应的请求的meta字典的访问,
92
+ 允许回调函数访问从请求传递的元数据。
93
+
94
+ Returns:
95
+ dict: The request metadata dictionary.
96
+ 请求元数据字典。
97
+
98
+ Raises:
99
+ AttributeError: If this response is not tied to any request.
100
+ 如果此响应未与任何请求关联。
101
+ """
47
102
  try:
48
103
  return self.request.meta
49
104
  except AttributeError:
@@ -53,21 +108,75 @@ class Response(object):
53
108
  )
54
109
 
55
110
  def _get_url(self):
111
+ """
112
+ Get the response URL.
113
+ 获取响应URL。
114
+
115
+ This is an internal method used by the url property.
116
+ 这是由url属性使用的内部方法。
117
+
118
+ Returns:
119
+ str: The response URL.
120
+ 响应URL。
121
+ """
56
122
  return self._url
57
123
 
58
124
  def _set_url(self, url):
125
+ """
126
+ Set the response URL.
127
+ 设置响应URL。
128
+
129
+ This method validates that the URL is a string.
130
+ 此方法验证URL是一个字符串。
131
+
132
+ Args:
133
+ url: The URL to set.
134
+ 要设置的URL。
135
+
136
+ Raises:
137
+ TypeError: If the URL is not a string.
138
+ 如果URL不是字符串。
139
+ """
59
140
  if isinstance(url, str):
60
141
  self._url = url
61
142
  else:
62
143
  raise TypeError(f'{type(self).__name__} url must be str, '
63
144
  f'got {type(url).__name__}')
64
145
 
146
+ # Property that uses the getter and setter methods
147
+ # 使用getter和setter方法的属性
65
148
  url = property(_get_url, _set_url)
66
149
 
67
150
  def _get_body(self):
151
+ """
152
+ Get the response body.
153
+ 获取响应体。
154
+
155
+ This is an internal method used by the body property.
156
+ 这是由body属性使用的内部方法。
157
+
158
+ Returns:
159
+ bytes: The response body.
160
+ 响应体。
161
+ """
68
162
  return self._body
69
163
 
70
164
  def _set_body(self, body):
165
+ """
166
+ Set the response body.
167
+ 设置响应体。
168
+
169
+ This method validates that the body is bytes and converts None to an empty bytes object.
170
+ 此方法验证body是字节对象,并将None转换为空字节对象。
171
+
172
+ Args:
173
+ body: The body to set.
174
+ 要设置的响应体。
175
+
176
+ Raises:
177
+ TypeError: If the body is not bytes.
178
+ 如果body不是字节对象。
179
+ """
71
180
  if body is None:
72
181
  self._body = b''
73
182
  elif not isinstance(body, bytes):
@@ -78,20 +187,49 @@ class Response(object):
78
187
  else:
79
188
  self._body = body
80
189
 
190
+ # Property that uses the getter and setter methods
191
+ # 使用getter和setter方法的属性
81
192
  body = property(_get_body, _set_body)
82
193
 
83
194
  def __str__(self):
195
+ """
196
+ Return a string representation of the response.
197
+ 返回响应的字符串表示。
198
+
199
+ The string representation includes the HTTP status code and URL.
200
+ 字符串表示包括HTTP状态码和URL。
201
+
202
+ Returns:
203
+ str: A string representation of the response.
204
+ 响应的字符串表示。
205
+ """
84
206
  return f"<{self.status} {self.url}>"
85
207
 
208
+ # Use the same implementation for __repr__
209
+ # 对__repr__使用相同的实现
86
210
  __repr__ = __str__
87
211
 
88
212
  def copy(self):
89
- """Return a copy of this Response"""
213
+ """
214
+ Return a copy of this Response.
215
+ 返回此Response的副本。
216
+
217
+ Returns:
218
+ A copy of this Response. 此Response的副本。
219
+ """
90
220
  return self.replace()
91
221
 
92
222
  def replace(self, *args, **kwargs):
93
- """Create a new Response with the same attributes except for those
94
- given new values.
223
+ """
224
+ Create a new Response with the same attributes except for those given new values.
225
+ 创建一个新的Response,除了给定的新值外,其他属性与当前Response相同。
226
+
227
+ Args:
228
+ *args: Positional arguments for the new Response. 新Response的位置参数。
229
+ **kwargs: Keyword arguments for the new Response. 新Response的关键字参数。
230
+
231
+ Returns:
232
+ A new Response object. 一个新的Response对象。
95
233
  """
96
234
  for x in [
97
235
  "url", "status", "headers", "body", "request", "flags"
@@ -101,32 +239,102 @@ class Response(object):
101
239
  return cls(*args, **kwargs)
102
240
 
103
241
  def urljoin(self, url):
104
- """Join this Response's url with a possible relative url to form an
105
- absolute interpretation of the latter."""
242
+ """
243
+ Join this Response's url with a possible relative url to form an absolute interpretation of the latter.
244
+ 将此Response的url与可能的相对url连接,形成后者的绝对解释。
245
+
246
+ Args:
247
+ url: The URL to join. 要连接的URL。
248
+
249
+ Returns:
250
+ The absolute URL. 绝对URL。
251
+ """
106
252
  return urljoin(self.url, url)
107
253
 
108
254
  @property
109
255
  def text(self):
110
- """For subclasses of TextResponse, this will return the body
111
- as str
256
+ """
257
+ Get the response body as text.
258
+ 将响应体作为文本获取。
259
+
260
+ This property is only implemented by subclasses of TextResponse.
261
+ In the base Response class, it raises an AttributeError.
262
+ 此属性仅由TextResponse的子类实现。
263
+ 在基本Response类中,它会引发AttributeError。
264
+
265
+ Returns:
266
+ str: The response body as text (in subclasses).
267
+ 响应体作为文本(在子类中)。
268
+
269
+ Raises:
270
+ AttributeError: In the base Response class.
271
+ 在基本Response类中。
112
272
  """
113
273
  raise AttributeError("Response content isn't text")
114
274
 
115
275
  def css(self, *a, **kw):
116
- """Shortcut method implemented only by responses whose content
117
- is text (subclasses of TextResponse).
276
+ """
277
+ Apply the given CSS selector to this response's content.
278
+ 将给定的CSS选择器应用于此响应的内容。
279
+
280
+ This method is only implemented by subclasses of TextResponse.
281
+ In the base Response class, it raises a NotSupported exception.
282
+ 此方法仅由TextResponse的子类实现。
283
+ 在基本Response类中,它会引发NotSupported异常。
284
+
285
+ Args:
286
+ *a: Positional arguments for the CSS selector.
287
+ CSS选择器的位置参数。
288
+ **kw: Keyword arguments for the CSS selector.
289
+ CSS选择器的关键字参数。
290
+
291
+ Raises:
292
+ NotSupported: In the base Response class.
293
+ 在基本Response类中。
118
294
  """
119
295
  raise NotSupported("Response content isn't text")
120
296
 
121
297
  def xpath(self, *a, **kw):
122
- """Shortcut method implemented only by responses whose content
123
- is text (subclasses of TextResponse).
298
+ """
299
+ Apply the given XPath selector to this response's content.
300
+ 将给定的XPath选择器应用于此响应的内容。
301
+
302
+ This method is only implemented by subclasses of TextResponse.
303
+ In the base Response class, it raises a NotSupported exception.
304
+ 此方法仅由TextResponse的子类实现。
305
+ 在基本Response类中,它会引发NotSupported异常。
306
+
307
+ Args:
308
+ *a: Positional arguments for the XPath selector.
309
+ XPath选择器的位置参数。
310
+ **kw: Keyword arguments for the XPath selector.
311
+ XPath选择器的关键字参数。
312
+
313
+ Raises:
314
+ NotSupported: In the base Response class.
315
+ 在基本Response类中。
124
316
  """
125
317
  raise NotSupported("Response content isn't text")
126
318
 
127
319
  def json(self, *a, **kw):
128
- """Shortcut method implemented only by responses whose content
129
- is text (subclasses of TextResponse).
320
+ """
321
+ Parse this response's body as JSON.
322
+ 将此响应的正文解析为JSON。
323
+
324
+ This method is only implemented by subclasses of TextResponse.
325
+ In the base Response class, it raises a NotSupported exception.
326
+ 此方法仅由TextResponse的子类实现。
327
+ 在基本Response类中,它会引发NotSupported异常。
328
+
329
+ Args:
330
+ *a: Positional arguments for the JSON parser.
331
+ JSON解析器的位置参数。
332
+ **kw: Keyword arguments for the JSON parser.
333
+ JSON解析器的关键字参数。
334
+
335
+ Raises:
336
+ NotSupported: In the base Response class.
337
+ 在基本Response类中。
130
338
  """
131
339
  raise NotSupported("Response content isn't text")
132
340
 
@@ -135,17 +343,48 @@ class Response(object):
135
343
  fingerprint=None, errback=None, cb_kwargs=None, flags=None):
136
344
  # type: (...) -> Request
137
345
  """
138
- Return a :class:`~.Request` instance to follow a link ``url``.
139
- It accepts the same arguments as ``Request.__init__`` method,
140
- but ``url`` can be a relative URL or a ``scrapy.link.Link`` object,
141
- not only an absolute URL.
346
+ Return a Request instance to follow a link.
347
+ 返回一个Request实例以跟踪链接。
348
+
349
+ This method creates a new Request to follow the given URL. The URL can be
350
+ a relative URL, a Link object, or an absolute URL. If it's a relative URL,
351
+ it will be joined with the current response's URL.
352
+ 此方法创建一个新的Request以跟踪给定的URL。URL可以是相对URL、Link对象或绝对URL。
353
+ 如果是相对URL,它将与当前响应的URL连接。
142
354
 
143
- :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow`
144
- method which supports selectors in addition to absolute/relative URLs
145
- and Link objects.
355
+ Args:
356
+ url: The URL to follow. Can be a string or a Link object.
357
+ 要跟踪的URL。可以是字符串或Link对象。
358
+ callback: A function to be called with the response from the request.
359
+ 使用请求的响应调用的函数。
360
+ method: The HTTP method to use.
361
+ 要使用的HTTP方法。
362
+ headers: The headers to use for the request.
363
+ 请求使用的头部。
364
+ body: The body of the request.
365
+ 请求的正文。
366
+ cookies: The cookies to send with the request.
367
+ 与请求一起发送的Cookie。
368
+ meta: Extra data to pass to the request.
369
+ 传递给请求的额外数据。
370
+ encoding: The encoding to use for the request.
371
+ 请求使用的编码。
372
+ priority: The priority of the request.
373
+ 请求的优先级。
374
+ dont_filter: Whether to filter duplicate requests.
375
+ 是否过滤重复请求。
376
+ fingerprint: The fingerprint for the request.
377
+ 请求的指纹。
378
+ errback: A function to be called if the request fails.
379
+ 如果请求失败时调用的函数。
380
+ cb_kwargs: Additional keyword arguments to pass to the callback.
381
+ 传递给回调的额外关键字参数。
382
+ flags: Flags for the request.
383
+ 请求的标志。
146
384
 
147
- .. versionadded:: 2.0
148
- The *flags* parameter.
385
+ Returns:
386
+ Request: A new Request instance.
387
+ 一个新的Request实例。
149
388
  """
150
389
  if isinstance(url, Link):
151
390
  url = url.url
@@ -175,16 +414,50 @@ class Response(object):
175
414
  dont_filter=False, errback=None, cb_kwargs=None, flags=None):
176
415
  # type: (...) -> Generator[Request, None, None]
177
416
  """
178
- .. versionadded:: 2.0
417
+ Return an iterable of Request instances to follow all links in urls.
418
+ 返回一个Request实例的可迭代对象,以跟踪urls中的所有链接。
419
+
420
+ This method creates multiple Requests to follow the given URLs. Each URL can be
421
+ a relative URL, a Link object, or an absolute URL. If it's a relative URL,
422
+ it will be joined with the current response's URL.
423
+ 此方法创建多个Request以跟踪给定的URL。每个URL可以是相对URL、Link对象或绝对URL。
424
+ 如果是相对URL,它将与当前响应的URL连接。
425
+
426
+ Args:
427
+ urls: An iterable of URLs to follow. Each can be a string or a Link object.
428
+ 要跟踪的URL的可迭代对象。每个可以是字符串或Link对象。
429
+ callback: A function to be called with the response from each request.
430
+ 使用每个请求的响应调用的函数。
431
+ method: The HTTP method to use.
432
+ 要使用的HTTP方法。
433
+ headers: The headers to use for the requests.
434
+ 请求使用的头部。
435
+ body: The body of the requests.
436
+ 请求的正文。
437
+ cookies: The cookies to send with the requests.
438
+ 与请求一起发送的Cookie。
439
+ meta: Extra data to pass to the requests.
440
+ 传递给请求的额外数据。
441
+ encoding: The encoding to use for the requests.
442
+ 请求使用的编码。
443
+ priority: The priority of the requests.
444
+ 请求的优先级。
445
+ dont_filter: Whether to filter duplicate requests.
446
+ 是否过滤重复请求。
447
+ errback: A function to be called if the requests fail.
448
+ 如果请求失败时调用的函数。
449
+ cb_kwargs: Additional keyword arguments to pass to the callback.
450
+ 传递给回调的额外关键字参数。
451
+ flags: Flags for the requests.
452
+ 请求的标志。
179
453
 
180
- Return an iterable of :class:`~.Request` instances to follow all links
181
- in ``urls``. It accepts the same arguments as ``Request.__init__`` method,
182
- but elements of ``urls`` can be relative URLs or :class:`~scrapy.link.Link` objects,
183
- not only absolute URLs.
454
+ Returns:
455
+ Generator[Request, None, None]: A generator of Request instances.
456
+ Request实例的生成器。
184
457
 
185
- :class:`~.TextResponse` provides a :meth:`~.TextResponse.follow_all`
186
- method which supports selectors in addition to absolute/relative URLs
187
- and Link objects.
458
+ Raises:
459
+ TypeError: If urls is not an iterable.
460
+ 如果urls不是可迭代的。
188
461
  """
189
462
  if not hasattr(urls, '__iter__'):
190
463
  raise TypeError("'urls' argument must be an iterable")
@@ -1,12 +1,51 @@
1
+
1
2
  """
2
- This module implements the HtmlResponse class which adds encoding
3
- discovering through HTML encoding declarations to the TextResponse class.
3
+ HTML response implementation for aioscrapy.
4
+ aioscrapy的HTML响应实现。
4
5
 
5
- See documentation in docs/topics/request-response.rst
6
+ This module provides the HtmlResponse class, which is a specialized TextResponse
7
+ for handling HTML content. It inherits all functionality from TextResponse
8
+ but is specifically intended for HTML responses.
9
+ 此模块提供了HtmlResponse类,这是一个专门用于处理HTML内容的TextResponse。
10
+ 它继承了TextResponse的所有功能,但专门用于HTML响应。
6
11
  """
7
12
 
8
13
  from aioscrapy.http.response.text import TextResponse
9
14
 
10
15
 
11
16
  class HtmlResponse(TextResponse):
17
+ """
18
+ A Response subclass specifically for HTML responses.
19
+ 专门用于HTML响应的Response子类。
20
+
21
+ This class extends TextResponse to handle HTML content. It inherits all the
22
+ functionality of TextResponse, including:
23
+ 此类扩展了TextResponse以处理HTML内容。它继承了TextResponse的所有功能,包括:
24
+
25
+ - Automatic encoding detection
26
+ 自动编码检测
27
+ - Unicode conversion
28
+ Unicode转换
29
+ - CSS and XPath selectors
30
+ CSS和XPath选择器
31
+ - JSON parsing
32
+ JSON解析
33
+ - Enhanced link following
34
+ 增强的链接跟踪
35
+
36
+ The main purpose of this class is to provide a specific type for HTML responses,
37
+ which can be useful for type checking and middleware processing.
38
+ 此类的主要目的是为HTML响应提供特定类型,这对类型检查和中间件处理很有用。
39
+
40
+ Example:
41
+ ```python
42
+ def parse(self, response):
43
+ if isinstance(response, HtmlResponse):
44
+ # Process HTML response
45
+ title = response.css('title::text').get()
46
+ else:
47
+ # Handle other response types
48
+ pass
49
+ ```
50
+ """
12
51
  pass