aio-scrapy 2.1.4__py3-none-any.whl → 2.1.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/LICENSE +1 -1
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/METADATA +53 -41
- aio_scrapy-2.1.6.dist-info/RECORD +134 -0
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/WHEEL +1 -1
- aioscrapy/VERSION +1 -1
- aioscrapy/cmdline.py +438 -5
- aioscrapy/core/downloader/__init__.py +522 -17
- aioscrapy/core/downloader/handlers/__init__.py +187 -5
- aioscrapy/core/downloader/handlers/aiohttp.py +187 -3
- aioscrapy/core/downloader/handlers/curl_cffi.py +124 -3
- aioscrapy/core/downloader/handlers/httpx.py +133 -3
- aioscrapy/core/downloader/handlers/pyhttpx.py +132 -3
- aioscrapy/core/downloader/handlers/requests.py +120 -2
- aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
- aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
- aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
- aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
- aioscrapy/core/engine.py +381 -20
- aioscrapy/core/scheduler.py +350 -36
- aioscrapy/core/scraper.py +509 -33
- aioscrapy/crawler.py +392 -10
- aioscrapy/db/__init__.py +149 -0
- aioscrapy/db/absmanager.py +212 -6
- aioscrapy/db/aiomongo.py +292 -10
- aioscrapy/db/aiomysql.py +363 -10
- aioscrapy/db/aiopg.py +299 -2
- aioscrapy/db/aiorabbitmq.py +444 -4
- aioscrapy/db/aioredis.py +260 -11
- aioscrapy/dupefilters/__init__.py +110 -5
- aioscrapy/dupefilters/disk.py +124 -2
- aioscrapy/dupefilters/redis.py +598 -32
- aioscrapy/exceptions.py +151 -13
- aioscrapy/http/__init__.py +1 -1
- aioscrapy/http/headers.py +237 -3
- aioscrapy/http/request/__init__.py +257 -11
- aioscrapy/http/request/form.py +83 -3
- aioscrapy/http/request/json_request.py +121 -9
- aioscrapy/http/response/__init__.py +306 -33
- aioscrapy/http/response/html.py +42 -3
- aioscrapy/http/response/text.py +496 -49
- aioscrapy/http/response/web_driver.py +144 -0
- aioscrapy/http/response/xml.py +45 -3
- aioscrapy/libs/downloader/defaultheaders.py +66 -2
- aioscrapy/libs/downloader/downloadtimeout.py +91 -2
- aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
- aioscrapy/libs/downloader/retry.py +192 -6
- aioscrapy/libs/downloader/stats.py +142 -0
- aioscrapy/libs/downloader/useragent.py +93 -2
- aioscrapy/libs/extensions/closespider.py +166 -4
- aioscrapy/libs/extensions/corestats.py +151 -1
- aioscrapy/libs/extensions/logstats.py +145 -1
- aioscrapy/libs/extensions/metric.py +370 -1
- aioscrapy/libs/extensions/throttle.py +235 -1
- aioscrapy/libs/pipelines/__init__.py +345 -2
- aioscrapy/libs/pipelines/csv.py +242 -0
- aioscrapy/libs/pipelines/excel.py +545 -0
- aioscrapy/libs/pipelines/mongo.py +132 -0
- aioscrapy/libs/pipelines/mysql.py +67 -0
- aioscrapy/libs/pipelines/pg.py +67 -0
- aioscrapy/libs/spider/depth.py +141 -3
- aioscrapy/libs/spider/httperror.py +144 -4
- aioscrapy/libs/spider/offsite.py +202 -2
- aioscrapy/libs/spider/referer.py +396 -21
- aioscrapy/libs/spider/urllength.py +97 -1
- aioscrapy/link.py +115 -8
- aioscrapy/logformatter.py +199 -8
- aioscrapy/middleware/absmanager.py +328 -2
- aioscrapy/middleware/downloader.py +218 -0
- aioscrapy/middleware/extension.py +50 -1
- aioscrapy/middleware/itempipeline.py +96 -0
- aioscrapy/middleware/spider.py +360 -7
- aioscrapy/process.py +200 -0
- aioscrapy/proxy/__init__.py +142 -3
- aioscrapy/proxy/redis.py +136 -2
- aioscrapy/queue/__init__.py +168 -16
- aioscrapy/scrapyd/runner.py +124 -3
- aioscrapy/serializer.py +182 -2
- aioscrapy/settings/__init__.py +610 -128
- aioscrapy/settings/default_settings.py +313 -13
- aioscrapy/signalmanager.py +151 -20
- aioscrapy/signals.py +183 -1
- aioscrapy/spiderloader.py +165 -12
- aioscrapy/spiders/__init__.py +233 -6
- aioscrapy/statscollectors.py +312 -1
- aioscrapy/utils/conf.py +345 -17
- aioscrapy/utils/curl.py +168 -16
- aioscrapy/utils/decorators.py +76 -6
- aioscrapy/utils/deprecate.py +212 -19
- aioscrapy/utils/httpobj.py +55 -3
- aioscrapy/utils/log.py +79 -0
- aioscrapy/utils/misc.py +189 -21
- aioscrapy/utils/ossignal.py +67 -5
- aioscrapy/utils/project.py +165 -3
- aioscrapy/utils/python.py +254 -44
- aioscrapy/utils/reqser.py +75 -1
- aioscrapy/utils/request.py +173 -12
- aioscrapy/utils/response.py +91 -6
- aioscrapy/utils/signal.py +196 -14
- aioscrapy/utils/spider.py +51 -4
- aioscrapy/utils/template.py +93 -6
- aioscrapy/utils/tools.py +191 -17
- aioscrapy/utils/trackref.py +198 -12
- aioscrapy/utils/url.py +341 -36
- aio_scrapy-2.1.4.dist-info/RECORD +0 -133
- aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
- aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
- aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
- aioscrapy/http/response/playwright.py +0 -36
- aioscrapy/libs/pipelines/execl.py +0 -169
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/entry_points.txt +0 -0
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/top_level.txt +0 -0
aioscrapy/cmdline.py
CHANGED
|
@@ -1,3 +1,31 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Command Line Interface Module
|
|
3
|
+
命令行接口模块
|
|
4
|
+
|
|
5
|
+
This module provides the command-line interface for AioScrapy. It handles command
|
|
6
|
+
discovery, parsing command-line arguments, and executing commands.
|
|
7
|
+
此模块提供了AioScrapy的命令行接口。它处理命令发现、解析命令行参数和执行命令。
|
|
8
|
+
|
|
9
|
+
The main components are:
|
|
10
|
+
主要组件包括:
|
|
11
|
+
|
|
12
|
+
1. Command discovery functions: Find and load available commands
|
|
13
|
+
查找并加载可用命令
|
|
14
|
+
2. Command execution functions: Parse arguments and execute commands
|
|
15
|
+
解析参数并执行命令
|
|
16
|
+
3. Helper functions: Print help messages and handle errors
|
|
17
|
+
打印帮助消息并处理错误
|
|
18
|
+
|
|
19
|
+
Commands can be provided by:
|
|
20
|
+
命令可以由以下提供:
|
|
21
|
+
|
|
22
|
+
- Built-in commands in the aioscrapy.commands module
|
|
23
|
+
aioscrapy.commands模块中的内置命令
|
|
24
|
+
- Entry points in the aioscrapy.commands group
|
|
25
|
+
aioscrapy.commands组中的入口点
|
|
26
|
+
- Custom modules specified in the COMMANDS_MODULE setting
|
|
27
|
+
COMMANDS_MODULE设置中指定的自定义模块
|
|
28
|
+
"""
|
|
1
29
|
import sys
|
|
2
30
|
import os
|
|
3
31
|
import optparse
|
|
@@ -15,59 +43,251 @@ from aioscrapy.utils.python import garbage_collect
|
|
|
15
43
|
|
|
16
44
|
|
|
17
45
|
def _iter_command_classes(module_name):
|
|
18
|
-
|
|
19
|
-
|
|
46
|
+
"""
|
|
47
|
+
Iterate over all command classes in a module.
|
|
48
|
+
迭代模块中的所有命令类。
|
|
49
|
+
|
|
50
|
+
This function walks through all modules in the given module path and yields
|
|
51
|
+
all classes that are subclasses of AioScrapyCommand and defined in the module
|
|
52
|
+
(not imported).
|
|
53
|
+
此函数遍历给定模块路径中的所有模块,并产生所有是AioScrapyCommand子类且在模块中
|
|
54
|
+
定义(非导入)的类。
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
module_name: The name of the module to search for command classes.
|
|
58
|
+
要搜索命令类的模块名称。
|
|
59
|
+
|
|
60
|
+
Yields:
|
|
61
|
+
class: Command classes found in the module.
|
|
62
|
+
在模块中找到的命令类。
|
|
63
|
+
|
|
64
|
+
Note:
|
|
65
|
+
TODO: add `name` attribute to commands and merge this function with
|
|
66
|
+
aioscrapy.utils.spider.iter_spider_classes
|
|
67
|
+
"""
|
|
68
|
+
# Walk through all modules in the given module path
|
|
69
|
+
# 遍历给定模块路径中的所有模块
|
|
20
70
|
for module in walk_modules(module_name):
|
|
71
|
+
# Iterate over all objects in the module
|
|
72
|
+
# 迭代模块中的所有对象
|
|
21
73
|
for obj in vars(module).values():
|
|
74
|
+
# Check if the object is a command class
|
|
75
|
+
# 检查对象是否为命令类
|
|
22
76
|
if (
|
|
23
77
|
inspect.isclass(obj)
|
|
24
78
|
and issubclass(obj, AioScrapyCommand)
|
|
25
|
-
and obj.__module__ == module.__name__
|
|
26
|
-
|
|
79
|
+
and obj.__module__ == module.__name__ # Only classes defined in this module
|
|
80
|
+
# 仅此模块中定义的类
|
|
81
|
+
and not obj == AioScrapyCommand # Exclude the base class
|
|
82
|
+
# 排除基类
|
|
27
83
|
):
|
|
28
84
|
yield obj
|
|
29
85
|
|
|
30
86
|
|
|
31
87
|
def _get_commands_from_module(module, inproject):
|
|
88
|
+
"""
|
|
89
|
+
Get all commands from a module.
|
|
90
|
+
从模块获取所有命令。
|
|
91
|
+
|
|
92
|
+
This function creates a dictionary of command name -> command instance for all
|
|
93
|
+
command classes found in the given module. It only includes commands that are
|
|
94
|
+
available in the current context (either we're in a project, or the command
|
|
95
|
+
doesn't require a project).
|
|
96
|
+
此函数为在给定模块中找到的所有命令类创建一个命令名称 -> 命令实例的字典。它只包括
|
|
97
|
+
在当前上下文中可用的命令(要么我们在项目中,要么命令不需要项目)。
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
module: The module name to search for commands.
|
|
101
|
+
要搜索命令的模块名称。
|
|
102
|
+
inproject: Whether we're currently inside a project.
|
|
103
|
+
我们当前是否在项目内。
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
dict: A dictionary of command name -> command instance.
|
|
107
|
+
命令名称 -> 命令实例的字典。
|
|
108
|
+
"""
|
|
109
|
+
# Initialize an empty dictionary to store commands
|
|
110
|
+
# 初始化一个空字典来存储命令
|
|
32
111
|
d = {}
|
|
112
|
+
|
|
113
|
+
# Iterate over all command classes in the module
|
|
114
|
+
# 迭代模块中的所有命令类
|
|
33
115
|
for cmd in _iter_command_classes(module):
|
|
116
|
+
# Only include commands that are available in the current context
|
|
117
|
+
# 只包括在当前上下文中可用的命令
|
|
34
118
|
if inproject or not cmd.requires_project:
|
|
119
|
+
# Use the last part of the module name as the command name
|
|
120
|
+
# 使用模块名称的最后一部分作为命令名称
|
|
35
121
|
cmdname = cmd.__module__.split('.')[-1]
|
|
122
|
+
# Create an instance of the command class
|
|
123
|
+
# 创建命令类的实例
|
|
36
124
|
d[cmdname] = cmd()
|
|
125
|
+
|
|
37
126
|
return d
|
|
38
127
|
|
|
39
128
|
|
|
40
129
|
def _get_commands_from_entry_points(inproject, group='aioscrapy.commands'):
|
|
130
|
+
"""
|
|
131
|
+
Get commands from entry points.
|
|
132
|
+
从入口点获取命令。
|
|
133
|
+
|
|
134
|
+
This function loads commands from entry points in the specified group.
|
|
135
|
+
Entry points allow third-party packages to provide AioScrapy commands.
|
|
136
|
+
此函数从指定组中的入口点加载命令。入口点允许第三方包提供AioScrapy命令。
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
inproject: Whether we're currently inside a project.
|
|
140
|
+
我们当前是否在项目内。
|
|
141
|
+
This parameter is included for interface consistency with
|
|
142
|
+
_get_commands_from_module, but is not used in this implementation.
|
|
143
|
+
此参数包含是为了与_get_commands_from_module保持接口一致性,
|
|
144
|
+
但在此实现中未使用。
|
|
145
|
+
group: The entry point group to search for commands.
|
|
146
|
+
要搜索命令的入口点组。
|
|
147
|
+
Defaults to 'aioscrapy.commands'.
|
|
148
|
+
默认为'aioscrapy.commands'。
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
dict: A dictionary of command name -> command instance.
|
|
152
|
+
命令名称 -> 命令实例的字典。
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
Exception: If an entry point doesn't point to a class.
|
|
156
|
+
如果入口点不指向类。
|
|
157
|
+
"""
|
|
158
|
+
# Initialize an empty dictionary to store commands
|
|
159
|
+
# 初始化一个空字典来存储命令
|
|
41
160
|
cmds = {}
|
|
161
|
+
|
|
162
|
+
# Iterate over all entry points in the specified group
|
|
163
|
+
# 迭代指定组中的所有入口点
|
|
42
164
|
for entry_point in pkg_resources.iter_entry_points(group):
|
|
165
|
+
# Load the object from the entry point
|
|
166
|
+
# 从入口点加载对象
|
|
43
167
|
obj = entry_point.load()
|
|
168
|
+
|
|
169
|
+
# Check if the object is a class
|
|
170
|
+
# 检查对象是否为类
|
|
44
171
|
if inspect.isclass(obj):
|
|
172
|
+
# Create an instance of the class and add it to the commands dictionary
|
|
173
|
+
# 创建类的实例并将其添加到命令字典中
|
|
45
174
|
cmds[entry_point.name] = obj()
|
|
46
175
|
else:
|
|
176
|
+
# Raise an exception if the entry point doesn't point to a class
|
|
177
|
+
# 如果入口点不指向类,则引发异常
|
|
47
178
|
raise Exception(f"Invalid entry point {entry_point.name}")
|
|
179
|
+
|
|
48
180
|
return cmds
|
|
49
181
|
|
|
50
182
|
|
|
51
183
|
def _get_commands_dict(settings, inproject):
|
|
184
|
+
"""
|
|
185
|
+
Get a dictionary of all available commands.
|
|
186
|
+
获取所有可用命令的字典。
|
|
187
|
+
|
|
188
|
+
This function collects commands from three sources:
|
|
189
|
+
此函数从三个来源收集命令:
|
|
190
|
+
|
|
191
|
+
1. Built-in commands from the aioscrapy.commands module
|
|
192
|
+
aioscrapy.commands模块中的内置命令
|
|
193
|
+
2. Commands from entry points in the aioscrapy.commands group
|
|
194
|
+
aioscrapy.commands组中的入口点命令
|
|
195
|
+
3. Commands from the module specified in the COMMANDS_MODULE setting
|
|
196
|
+
COMMANDS_MODULE设置中指定的模块中的命令
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
settings: The settings object.
|
|
200
|
+
设置对象。
|
|
201
|
+
inproject: Whether we're currently inside a project.
|
|
202
|
+
我们当前是否在项目内。
|
|
203
|
+
|
|
204
|
+
Returns:
|
|
205
|
+
dict: A dictionary of command name -> command instance.
|
|
206
|
+
命令名称 -> 命令实例的字典。
|
|
207
|
+
"""
|
|
208
|
+
# Get built-in commands from the aioscrapy.commands module
|
|
209
|
+
# 从aioscrapy.commands模块获取内置命令
|
|
52
210
|
cmds = _get_commands_from_module('aioscrapy.commands', inproject)
|
|
211
|
+
|
|
212
|
+
# Update with commands from entry points
|
|
213
|
+
# 使用入口点中的命令更新
|
|
53
214
|
cmds.update(_get_commands_from_entry_points(inproject))
|
|
215
|
+
|
|
216
|
+
# Get the custom commands module from settings
|
|
217
|
+
# 从设置获取自定义命令模块
|
|
54
218
|
cmds_module = settings['COMMANDS_MODULE']
|
|
219
|
+
|
|
220
|
+
# If a custom commands module is specified, add its commands
|
|
221
|
+
# 如果指定了自定义命令模块,则添加其命令
|
|
55
222
|
if cmds_module:
|
|
56
223
|
cmds.update(_get_commands_from_module(cmds_module, inproject))
|
|
224
|
+
|
|
57
225
|
return cmds
|
|
58
226
|
|
|
59
227
|
|
|
60
228
|
def _pop_command_name(argv):
|
|
229
|
+
"""
|
|
230
|
+
Extract the command name from command line arguments.
|
|
231
|
+
从命令行参数中提取命令名称。
|
|
232
|
+
|
|
233
|
+
This function searches for the first argument that doesn't start with a dash,
|
|
234
|
+
which is assumed to be the command name. It removes this argument from the
|
|
235
|
+
list and returns it.
|
|
236
|
+
此函数搜索第一个不以破折号开头的参数,该参数被假定为命令名称。它从列表中
|
|
237
|
+
删除此参数并返回它。
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
argv: List of command line arguments.
|
|
241
|
+
命令行参数列表。
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
str or None: The command name if found, None otherwise.
|
|
245
|
+
如果找到,则为命令名称,否则为None。
|
|
246
|
+
"""
|
|
247
|
+
# Start from index 0 (which corresponds to argv[1], the first argument after the script name)
|
|
248
|
+
# 从索引0开始(对应于argv[1],脚本名称之后的第一个参数)
|
|
61
249
|
i = 0
|
|
250
|
+
|
|
251
|
+
# Iterate through arguments, skipping the script name (argv[0])
|
|
252
|
+
# 迭代参数,跳过脚本名称(argv[0])
|
|
62
253
|
for arg in argv[1:]:
|
|
254
|
+
# If the argument doesn't start with a dash, it's the command name
|
|
255
|
+
# 如果参数不以破折号开头,则它是命令名称
|
|
63
256
|
if not arg.startswith('-'):
|
|
257
|
+
# Remove the command name from the argument list
|
|
258
|
+
# 从参数列表中删除命令名称
|
|
64
259
|
del argv[i]
|
|
260
|
+
# Return the command name
|
|
261
|
+
# 返回命令名称
|
|
65
262
|
return arg
|
|
66
263
|
i += 1
|
|
67
264
|
|
|
265
|
+
# No command name found
|
|
266
|
+
# 未找到命令名称
|
|
267
|
+
return None
|
|
268
|
+
|
|
68
269
|
|
|
69
270
|
def _print_header(settings, inproject):
|
|
271
|
+
"""
|
|
272
|
+
Print the AioScrapy header with version and project information.
|
|
273
|
+
打印带有版本和项目信息的AioScrapy标头。
|
|
274
|
+
|
|
275
|
+
This function prints a header line showing the AioScrapy version and,
|
|
276
|
+
if inside a project, the project name.
|
|
277
|
+
此函数打印一个标头行,显示AioScrapy版本,如果在项目内,则显示项目名称。
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
settings: The settings object.
|
|
281
|
+
设置对象。
|
|
282
|
+
inproject: Whether we're currently inside a project.
|
|
283
|
+
我们当前是否在项目内。
|
|
284
|
+
"""
|
|
285
|
+
# Get the AioScrapy version
|
|
286
|
+
# 获取AioScrapy版本
|
|
70
287
|
version = aioscrapy.__version__
|
|
288
|
+
|
|
289
|
+
# Print different headers depending on whether we're in a project
|
|
290
|
+
# 根据我们是否在项目内打印不同的标头
|
|
71
291
|
if inproject:
|
|
72
292
|
print(f"ioscrapy {version} - project: {settings['BOT_NAME']}\n")
|
|
73
293
|
else:
|
|
@@ -75,44 +295,159 @@ def _print_header(settings, inproject):
|
|
|
75
295
|
|
|
76
296
|
|
|
77
297
|
def _print_commands(settings, inproject):
|
|
298
|
+
"""
|
|
299
|
+
Print a list of available commands.
|
|
300
|
+
打印可用命令列表。
|
|
301
|
+
|
|
302
|
+
This function prints the AioScrapy header, usage information, and a list
|
|
303
|
+
of all available commands with their short descriptions.
|
|
304
|
+
此函数打印AioScrapy标头、使用信息和所有可用命令及其简短描述的列表。
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
settings: The settings object.
|
|
308
|
+
设置对象。
|
|
309
|
+
inproject: Whether we're currently inside a project.
|
|
310
|
+
我们当前是否在项目内。
|
|
311
|
+
"""
|
|
312
|
+
# Print the header
|
|
313
|
+
# 打印标头
|
|
78
314
|
_print_header(settings, inproject)
|
|
315
|
+
|
|
316
|
+
# Print usage information
|
|
317
|
+
# 打印使用信息
|
|
79
318
|
print("Usage:")
|
|
80
319
|
print(" aioscrapy <command> [options] [args]\n")
|
|
320
|
+
|
|
321
|
+
# Print available commands
|
|
322
|
+
# 打印可用命令
|
|
81
323
|
print("Available commands:")
|
|
82
324
|
cmds = _get_commands_dict(settings, inproject)
|
|
83
325
|
for cmdname, cmdclass in sorted(cmds.items()):
|
|
84
326
|
print(f" {cmdname:<13} {cmdclass.short_desc()}")
|
|
327
|
+
|
|
328
|
+
# If not in a project, mention that more commands are available in a project
|
|
329
|
+
# 如果不在项目内,请提及在项目中有更多可用命令
|
|
85
330
|
if not inproject:
|
|
86
331
|
print()
|
|
87
332
|
print(" [ more ] More commands available when run from project directory")
|
|
333
|
+
|
|
334
|
+
# Print help information
|
|
335
|
+
# 打印帮助信息
|
|
88
336
|
print()
|
|
89
337
|
print('Use "aioscrapy <command> -h" to see more info about a command')
|
|
90
338
|
|
|
91
339
|
|
|
92
340
|
def _print_unknown_command(settings, cmdname, inproject):
|
|
341
|
+
"""
|
|
342
|
+
Print an error message for an unknown command.
|
|
343
|
+
打印未知命令的错误消息。
|
|
344
|
+
|
|
345
|
+
This function prints the AioScrapy header and an error message indicating
|
|
346
|
+
that the specified command is unknown.
|
|
347
|
+
此函数打印AioScrapy标头和一条错误消息,指示指定的命令未知。
|
|
348
|
+
|
|
349
|
+
Args:
|
|
350
|
+
settings: The settings object.
|
|
351
|
+
设置对象。
|
|
352
|
+
cmdname: The name of the unknown command.
|
|
353
|
+
未知命令的名称。
|
|
354
|
+
inproject: Whether we're currently inside a project.
|
|
355
|
+
我们当前是否在项目内。
|
|
356
|
+
"""
|
|
357
|
+
# Print the header
|
|
358
|
+
# 打印标头
|
|
93
359
|
_print_header(settings, inproject)
|
|
360
|
+
|
|
361
|
+
# Print error message
|
|
362
|
+
# 打印错误消息
|
|
94
363
|
print(f"Unknown command: {cmdname}\n")
|
|
364
|
+
|
|
365
|
+
# Print help information
|
|
366
|
+
# 打印帮助信息
|
|
95
367
|
print('Use "aioscrapy" to see available commands')
|
|
96
368
|
|
|
97
369
|
|
|
98
370
|
def _run_print_help(parser, func, *a, **kw):
|
|
371
|
+
"""
|
|
372
|
+
Run a function and handle UsageError exceptions.
|
|
373
|
+
运行函数并处理UsageError异常。
|
|
374
|
+
|
|
375
|
+
This function runs the specified function with the given arguments and handles
|
|
376
|
+
UsageError exceptions by printing an error message and/or help information.
|
|
377
|
+
此函数使用给定的参数运行指定的函数,并通过打印错误消息和/或帮助信息来处理
|
|
378
|
+
UsageError异常。
|
|
379
|
+
|
|
380
|
+
Args:
|
|
381
|
+
parser: The option parser to use for printing help.
|
|
382
|
+
用于打印帮助的选项解析器。
|
|
383
|
+
func: The function to run.
|
|
384
|
+
要运行的函数。
|
|
385
|
+
*a: Positional arguments to pass to the function.
|
|
386
|
+
传递给函数的位置参数。
|
|
387
|
+
**kw: Keyword arguments to pass to the function.
|
|
388
|
+
传递给函数的关键字参数。
|
|
389
|
+
|
|
390
|
+
Raises:
|
|
391
|
+
SystemExit: With exit code 2 if a UsageError occurs.
|
|
392
|
+
如果发生UsageError,则退出代码为2。
|
|
393
|
+
"""
|
|
99
394
|
try:
|
|
395
|
+
# Run the function with the given arguments
|
|
396
|
+
# 使用给定的参数运行函数
|
|
100
397
|
func(*a, **kw)
|
|
101
398
|
except UsageError as e:
|
|
399
|
+
# If the error has a message, print it
|
|
400
|
+
# 如果错误有消息,则打印它
|
|
102
401
|
if str(e):
|
|
103
402
|
parser.error(str(e))
|
|
403
|
+
|
|
404
|
+
# If the error requests help to be printed, print it
|
|
405
|
+
# 如果错误请求打印帮助,则打印它
|
|
104
406
|
if e.print_help:
|
|
105
407
|
parser.print_help()
|
|
408
|
+
|
|
409
|
+
# Exit with code 2 (command line syntax error)
|
|
410
|
+
# 退出代码2(命令行语法错误)
|
|
106
411
|
sys.exit(2)
|
|
107
412
|
|
|
108
413
|
|
|
109
414
|
def execute(argv=None, settings=None):
|
|
415
|
+
"""
|
|
416
|
+
Main entry point for the AioScrapy command line interface.
|
|
417
|
+
AioScrapy命令行接口的主入口点。
|
|
418
|
+
|
|
419
|
+
This function parses command line arguments, finds the appropriate command,
|
|
420
|
+
and runs it with the specified options and arguments.
|
|
421
|
+
此函数解析命令行参数,找到适当的命令,并使用指定的选项和参数运行它。
|
|
422
|
+
|
|
423
|
+
Args:
|
|
424
|
+
argv: The command line arguments.
|
|
425
|
+
命令行参数。
|
|
426
|
+
Defaults to sys.argv.
|
|
427
|
+
默认为sys.argv。
|
|
428
|
+
settings: The settings object.
|
|
429
|
+
设置对象。
|
|
430
|
+
If None, the project settings will be used.
|
|
431
|
+
如果为None,则将使用项目设置。
|
|
432
|
+
|
|
433
|
+
Raises:
|
|
434
|
+
SystemExit: With exit code 0 if no command is specified,
|
|
435
|
+
or exit code 2 if an unknown command or a command that
|
|
436
|
+
requires a project is run outside a project.
|
|
437
|
+
如果未指定命令,则退出代码为0;
|
|
438
|
+
如果在项目外运行未知命令或需要项目的命令,则退出代码为2。
|
|
439
|
+
"""
|
|
440
|
+
# Use sys.argv if no arguments are provided
|
|
441
|
+
# 如果未提供参数,则使用sys.argv
|
|
110
442
|
if argv is None:
|
|
111
443
|
argv = sys.argv
|
|
112
444
|
|
|
445
|
+
# Use project settings if no settings are provided
|
|
446
|
+
# 如果未提供设置,则使用项目设置
|
|
113
447
|
if settings is None:
|
|
114
448
|
settings = get_project_settings()
|
|
115
|
-
#
|
|
449
|
+
# Set EDITOR from environment if available
|
|
450
|
+
# 如果可用,则从环境设置EDITOR
|
|
116
451
|
try:
|
|
117
452
|
editor = os.environ['EDITOR']
|
|
118
453
|
except KeyError:
|
|
@@ -120,45 +455,143 @@ def execute(argv=None, settings=None):
|
|
|
120
455
|
else:
|
|
121
456
|
settings['EDITOR'] = editor
|
|
122
457
|
|
|
458
|
+
# Check if we're inside a project
|
|
459
|
+
# 检查我们是否在项目内
|
|
123
460
|
inproject = inside_project()
|
|
461
|
+
|
|
462
|
+
# Get all available commands
|
|
463
|
+
# 获取所有可用命令
|
|
124
464
|
cmds = _get_commands_dict(settings, inproject)
|
|
465
|
+
|
|
466
|
+
# Extract the command name from the arguments
|
|
467
|
+
# 从参数中提取命令名称
|
|
125
468
|
cmdname = _pop_command_name(argv)
|
|
469
|
+
|
|
470
|
+
# Create an option parser
|
|
471
|
+
# 创建选项解析器
|
|
126
472
|
parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
|
|
127
473
|
conflict_handler='resolve')
|
|
474
|
+
|
|
475
|
+
# If no command is specified, print the list of commands and exit
|
|
476
|
+
# 如果未指定命令,则打印命令列表并退出
|
|
128
477
|
if not cmdname:
|
|
129
478
|
_print_commands(settings, inproject)
|
|
130
479
|
sys.exit(0)
|
|
480
|
+
# If the command is unknown, print an error message and exit
|
|
481
|
+
# 如果命令未知,则打印错误消息并退出
|
|
131
482
|
elif cmdname not in cmds:
|
|
132
483
|
_print_unknown_command(settings, cmdname, inproject)
|
|
133
484
|
sys.exit(2)
|
|
134
485
|
|
|
486
|
+
# Get the command instance
|
|
487
|
+
# 获取命令实例
|
|
135
488
|
cmd = cmds[cmdname]
|
|
489
|
+
|
|
490
|
+
# Set up the parser with command-specific information
|
|
491
|
+
# 使用命令特定信息设置解析器
|
|
136
492
|
parser.usage = f"aioscrapy {cmdname} {cmd.syntax()}"
|
|
137
493
|
parser.description = cmd.long_desc()
|
|
494
|
+
|
|
495
|
+
# Apply command-specific settings
|
|
496
|
+
# 应用命令特定设置
|
|
138
497
|
settings.setdict(cmd.default_settings, priority='command')
|
|
139
498
|
cmd.settings = settings
|
|
499
|
+
|
|
500
|
+
# Add command-specific options to the parser
|
|
501
|
+
# 向解析器添加命令特定选项
|
|
140
502
|
cmd.add_options(parser)
|
|
503
|
+
|
|
504
|
+
# Parse the command line arguments
|
|
505
|
+
# 解析命令行参数
|
|
141
506
|
opts, args = parser.parse_args(args=argv[1:])
|
|
507
|
+
|
|
508
|
+
# Process command options
|
|
509
|
+
# 处理命令选项
|
|
142
510
|
_run_print_help(parser, cmd.process_options, args, opts)
|
|
143
511
|
|
|
512
|
+
# Set up the crawler process for the command
|
|
513
|
+
# 为命令设置爬虫进程
|
|
144
514
|
cmd.crawler_process = CrawlerProcess(settings)
|
|
515
|
+
|
|
516
|
+
# Run the command and handle any usage errors
|
|
517
|
+
# 运行命令并处理任何使用错误
|
|
145
518
|
_run_print_help(parser, _run_command, cmd, args, opts)
|
|
519
|
+
|
|
520
|
+
# Exit with the command's exit code
|
|
521
|
+
# 使用命令的退出代码退出
|
|
146
522
|
sys.exit(cmd.exitcode)
|
|
147
523
|
|
|
148
524
|
|
|
149
525
|
def _run_command(cmd, args, opts):
|
|
526
|
+
"""
|
|
527
|
+
Run a command with the given arguments and options.
|
|
528
|
+
使用给定的参数和选项运行命令。
|
|
529
|
+
|
|
530
|
+
This function runs the command either with or without profiling,
|
|
531
|
+
depending on the options.
|
|
532
|
+
此函数根据选项运行命令,可以带有或不带有性能分析。
|
|
533
|
+
|
|
534
|
+
Args:
|
|
535
|
+
cmd: The command to run.
|
|
536
|
+
要运行的命令。
|
|
537
|
+
args: The arguments to pass to the command.
|
|
538
|
+
传递给命令的参数。
|
|
539
|
+
opts: The options to pass to the command.
|
|
540
|
+
传递给命令的选项。
|
|
541
|
+
Must have a 'profile' attribute that specifies whether to run
|
|
542
|
+
with profiling.
|
|
543
|
+
必须有一个'profile'属性,指定是否使用性能分析运行。
|
|
544
|
+
"""
|
|
545
|
+
# If profiling is enabled, run the command with profiling
|
|
546
|
+
# 如果启用了性能分析,则使用性能分析运行命令
|
|
150
547
|
if opts.profile:
|
|
151
548
|
_run_command_profiled(cmd, args, opts)
|
|
152
549
|
else:
|
|
550
|
+
# Otherwise, run the command directly
|
|
551
|
+
# 否则,直接运行命令
|
|
153
552
|
cmd.run(args, opts)
|
|
154
553
|
|
|
155
554
|
|
|
156
555
|
def _run_command_profiled(cmd, args, opts):
|
|
556
|
+
"""
|
|
557
|
+
Run a command with profiling.
|
|
558
|
+
使用性能分析运行命令。
|
|
559
|
+
|
|
560
|
+
This function runs the command with cProfile profiling and optionally
|
|
561
|
+
saves the profiling stats to a file.
|
|
562
|
+
此函数使用cProfile性能分析运行命令,并可选择将性能分析统计信息保存到文件。
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
cmd: The command to run.
|
|
566
|
+
要运行的命令。
|
|
567
|
+
args: The arguments to pass to the command.
|
|
568
|
+
传递给命令的参数。
|
|
569
|
+
opts: The options to pass to the command.
|
|
570
|
+
传递给命令的选项。
|
|
571
|
+
Must have a 'profile' attribute that specifies the output file
|
|
572
|
+
for profiling stats, or False to disable saving stats.
|
|
573
|
+
必须有一个'profile'属性,指定性能分析统计信息的输出文件,
|
|
574
|
+
或False以禁用保存统计信息。
|
|
575
|
+
"""
|
|
576
|
+
# If a profile output file is specified, print a message
|
|
577
|
+
# 如果指定了性能分析输出文件,则打印消息
|
|
157
578
|
if opts.profile:
|
|
158
579
|
sys.stderr.write(f"aioscrapy: writing cProfile stats to {opts.profile!r}\n")
|
|
580
|
+
|
|
581
|
+
# Create a local namespace for the profiler
|
|
582
|
+
# 为性能分析器创建本地命名空间
|
|
159
583
|
loc = locals()
|
|
584
|
+
|
|
585
|
+
# Create a profiler
|
|
586
|
+
# 创建性能分析器
|
|
160
587
|
p = cProfile.Profile()
|
|
588
|
+
|
|
589
|
+
# Run the command with profiling
|
|
590
|
+
# 使用性能分析运行命令
|
|
161
591
|
p.runctx('cmd.run(args, opts)', globals(), loc)
|
|
592
|
+
|
|
593
|
+
# If a profile output file is specified, save the stats
|
|
594
|
+
# 如果指定了性能分析输出文件,则保存统计信息
|
|
162
595
|
if opts.profile:
|
|
163
596
|
p.dump_stats(opts.profile)
|
|
164
597
|
|