aio-scrapy 2.1.3__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/LICENSE +1 -1
  2. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/METADATA +53 -40
  3. aio_scrapy-2.1.6.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +523 -18
  8. aioscrapy/core/downloader/handlers/__init__.py +188 -6
  9. aioscrapy/core/downloader/handlers/aiohttp.py +188 -4
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +125 -4
  11. aioscrapy/core/downloader/handlers/httpx.py +134 -4
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +133 -4
  13. aioscrapy/core/downloader/handlers/requests.py +121 -3
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +170 -14
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +193 -7
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +313 -13
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.3.dist-info/RECORD +0 -133
  105. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -110
  106. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -53
  107. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  108. aioscrapy/http/response/playwright.py +0 -36
  109. aioscrapy/libs/pipelines/execl.py +0 -169
  110. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/entry_points.txt +0 -0
  111. {aio_scrapy-2.1.3.dist-info → aio_scrapy-2.1.6.dist-info}/top_level.txt +0 -0
aioscrapy/cmdline.py CHANGED
@@ -1,3 +1,31 @@
1
+ """
2
+ Command Line Interface Module
3
+ 命令行接口模块
4
+
5
+ This module provides the command-line interface for AioScrapy. It handles command
6
+ discovery, parsing command-line arguments, and executing commands.
7
+ 此模块提供了AioScrapy的命令行接口。它处理命令发现、解析命令行参数和执行命令。
8
+
9
+ The main components are:
10
+ 主要组件包括:
11
+
12
+ 1. Command discovery functions: Find and load available commands
13
+ 查找并加载可用命令
14
+ 2. Command execution functions: Parse arguments and execute commands
15
+ 解析参数并执行命令
16
+ 3. Helper functions: Print help messages and handle errors
17
+ 打印帮助消息并处理错误
18
+
19
+ Commands can be provided by:
20
+ 命令可以由以下提供:
21
+
22
+ - Built-in commands in the aioscrapy.commands module
23
+ aioscrapy.commands模块中的内置命令
24
+ - Entry points in the aioscrapy.commands group
25
+ aioscrapy.commands组中的入口点
26
+ - Custom modules specified in the COMMANDS_MODULE setting
27
+ COMMANDS_MODULE设置中指定的自定义模块
28
+ """
1
29
  import sys
2
30
  import os
3
31
  import optparse
@@ -15,59 +43,251 @@ from aioscrapy.utils.python import garbage_collect
15
43
 
16
44
 
17
45
  def _iter_command_classes(module_name):
18
- # TODO: add `name` attribute to commands and and merge this function with
19
- # aioscrapy.utils.spider.iter_spider_classes
46
+ """
47
+ Iterate over all command classes in a module.
48
+ 迭代模块中的所有命令类。
49
+
50
+ This function walks through all modules in the given module path and yields
51
+ all classes that are subclasses of AioScrapyCommand and defined in the module
52
+ (not imported).
53
+ 此函数遍历给定模块路径中的所有模块,并产生所有是AioScrapyCommand子类且在模块中
54
+ 定义(非导入)的类。
55
+
56
+ Args:
57
+ module_name: The name of the module to search for command classes.
58
+ 要搜索命令类的模块名称。
59
+
60
+ Yields:
61
+ class: Command classes found in the module.
62
+ 在模块中找到的命令类。
63
+
64
+ Note:
65
+ TODO: add `name` attribute to commands and merge this function with
66
+ aioscrapy.utils.spider.iter_spider_classes
67
+ """
68
+ # Walk through all modules in the given module path
69
+ # 遍历给定模块路径中的所有模块
20
70
  for module in walk_modules(module_name):
71
+ # Iterate over all objects in the module
72
+ # 迭代模块中的所有对象
21
73
  for obj in vars(module).values():
74
+ # Check if the object is a command class
75
+ # 检查对象是否为命令类
22
76
  if (
23
77
  inspect.isclass(obj)
24
78
  and issubclass(obj, AioScrapyCommand)
25
- and obj.__module__ == module.__name__
26
- and not obj == AioScrapyCommand
79
+ and obj.__module__ == module.__name__ # Only classes defined in this module
80
+ # 仅此模块中定义的类
81
+ and not obj == AioScrapyCommand # Exclude the base class
82
+ # 排除基类
27
83
  ):
28
84
  yield obj
29
85
 
30
86
 
31
87
  def _get_commands_from_module(module, inproject):
88
+ """
89
+ Get all commands from a module.
90
+ 从模块获取所有命令。
91
+
92
+ This function creates a dictionary of command name -> command instance for all
93
+ command classes found in the given module. It only includes commands that are
94
+ available in the current context (either we're in a project, or the command
95
+ doesn't require a project).
96
+ 此函数为在给定模块中找到的所有命令类创建一个命令名称 -> 命令实例的字典。它只包括
97
+ 在当前上下文中可用的命令(要么我们在项目中,要么命令不需要项目)。
98
+
99
+ Args:
100
+ module: The module name to search for commands.
101
+ 要搜索命令的模块名称。
102
+ inproject: Whether we're currently inside a project.
103
+ 我们当前是否在项目内。
104
+
105
+ Returns:
106
+ dict: A dictionary of command name -> command instance.
107
+ 命令名称 -> 命令实例的字典。
108
+ """
109
+ # Initialize an empty dictionary to store commands
110
+ # 初始化一个空字典来存储命令
32
111
  d = {}
112
+
113
+ # Iterate over all command classes in the module
114
+ # 迭代模块中的所有命令类
33
115
  for cmd in _iter_command_classes(module):
116
+ # Only include commands that are available in the current context
117
+ # 只包括在当前上下文中可用的命令
34
118
  if inproject or not cmd.requires_project:
119
+ # Use the last part of the module name as the command name
120
+ # 使用模块名称的最后一部分作为命令名称
35
121
  cmdname = cmd.__module__.split('.')[-1]
122
+ # Create an instance of the command class
123
+ # 创建命令类的实例
36
124
  d[cmdname] = cmd()
125
+
37
126
  return d
38
127
 
39
128
 
40
129
  def _get_commands_from_entry_points(inproject, group='aioscrapy.commands'):
130
+ """
131
+ Get commands from entry points.
132
+ 从入口点获取命令。
133
+
134
+ This function loads commands from entry points in the specified group.
135
+ Entry points allow third-party packages to provide AioScrapy commands.
136
+ 此函数从指定组中的入口点加载命令。入口点允许第三方包提供AioScrapy命令。
137
+
138
+ Args:
139
+ inproject: Whether we're currently inside a project.
140
+ 我们当前是否在项目内。
141
+ This parameter is included for interface consistency with
142
+ _get_commands_from_module, but is not used in this implementation.
143
+ 此参数包含是为了与_get_commands_from_module保持接口一致性,
144
+ 但在此实现中未使用。
145
+ group: The entry point group to search for commands.
146
+ 要搜索命令的入口点组。
147
+ Defaults to 'aioscrapy.commands'.
148
+ 默认为'aioscrapy.commands'。
149
+
150
+ Returns:
151
+ dict: A dictionary of command name -> command instance.
152
+ 命令名称 -> 命令实例的字典。
153
+
154
+ Raises:
155
+ Exception: If an entry point doesn't point to a class.
156
+ 如果入口点不指向类。
157
+ """
158
+ # Initialize an empty dictionary to store commands
159
+ # 初始化一个空字典来存储命令
41
160
  cmds = {}
161
+
162
+ # Iterate over all entry points in the specified group
163
+ # 迭代指定组中的所有入口点
42
164
  for entry_point in pkg_resources.iter_entry_points(group):
165
+ # Load the object from the entry point
166
+ # 从入口点加载对象
43
167
  obj = entry_point.load()
168
+
169
+ # Check if the object is a class
170
+ # 检查对象是否为类
44
171
  if inspect.isclass(obj):
172
+ # Create an instance of the class and add it to the commands dictionary
173
+ # 创建类的实例并将其添加到命令字典中
45
174
  cmds[entry_point.name] = obj()
46
175
  else:
176
+ # Raise an exception if the entry point doesn't point to a class
177
+ # 如果入口点不指向类,则引发异常
47
178
  raise Exception(f"Invalid entry point {entry_point.name}")
179
+
48
180
  return cmds
49
181
 
50
182
 
51
183
  def _get_commands_dict(settings, inproject):
184
+ """
185
+ Get a dictionary of all available commands.
186
+ 获取所有可用命令的字典。
187
+
188
+ This function collects commands from three sources:
189
+ 此函数从三个来源收集命令:
190
+
191
+ 1. Built-in commands from the aioscrapy.commands module
192
+ aioscrapy.commands模块中的内置命令
193
+ 2. Commands from entry points in the aioscrapy.commands group
194
+ aioscrapy.commands组中的入口点命令
195
+ 3. Commands from the module specified in the COMMANDS_MODULE setting
196
+ COMMANDS_MODULE设置中指定的模块中的命令
197
+
198
+ Args:
199
+ settings: The settings object.
200
+ 设置对象。
201
+ inproject: Whether we're currently inside a project.
202
+ 我们当前是否在项目内。
203
+
204
+ Returns:
205
+ dict: A dictionary of command name -> command instance.
206
+ 命令名称 -> 命令实例的字典。
207
+ """
208
+ # Get built-in commands from the aioscrapy.commands module
209
+ # 从aioscrapy.commands模块获取内置命令
52
210
  cmds = _get_commands_from_module('aioscrapy.commands', inproject)
211
+
212
+ # Update with commands from entry points
213
+ # 使用入口点中的命令更新
53
214
  cmds.update(_get_commands_from_entry_points(inproject))
215
+
216
+ # Get the custom commands module from settings
217
+ # 从设置获取自定义命令模块
54
218
  cmds_module = settings['COMMANDS_MODULE']
219
+
220
+ # If a custom commands module is specified, add its commands
221
+ # 如果指定了自定义命令模块,则添加其命令
55
222
  if cmds_module:
56
223
  cmds.update(_get_commands_from_module(cmds_module, inproject))
224
+
57
225
  return cmds
58
226
 
59
227
 
60
228
  def _pop_command_name(argv):
229
+ """
230
+ Extract the command name from command line arguments.
231
+ 从命令行参数中提取命令名称。
232
+
233
+ This function searches for the first argument that doesn't start with a dash,
234
+ which is assumed to be the command name. It removes this argument from the
235
+ list and returns it.
236
+ 此函数搜索第一个不以破折号开头的参数,该参数被假定为命令名称。它从列表中
237
+ 删除此参数并返回它。
238
+
239
+ Args:
240
+ argv: List of command line arguments.
241
+ 命令行参数列表。
242
+
243
+ Returns:
244
+ str or None: The command name if found, None otherwise.
245
+ 如果找到,则为命令名称,否则为None。
246
+ """
247
+ # Start from index 0 (which corresponds to argv[1], the first argument after the script name)
248
+ # 从索引0开始(对应于argv[1],脚本名称之后的第一个参数)
61
249
  i = 0
250
+
251
+ # Iterate through arguments, skipping the script name (argv[0])
252
+ # 迭代参数,跳过脚本名称(argv[0])
62
253
  for arg in argv[1:]:
254
+ # If the argument doesn't start with a dash, it's the command name
255
+ # 如果参数不以破折号开头,则它是命令名称
63
256
  if not arg.startswith('-'):
257
+ # Remove the command name from the argument list
258
+ # 从参数列表中删除命令名称
64
259
  del argv[i]
260
+ # Return the command name
261
+ # 返回命令名称
65
262
  return arg
66
263
  i += 1
67
264
 
265
+ # No command name found
266
+ # 未找到命令名称
267
+ return None
268
+
68
269
 
69
270
  def _print_header(settings, inproject):
271
+ """
272
+ Print the AioScrapy header with version and project information.
273
+ 打印带有版本和项目信息的AioScrapy标头。
274
+
275
+ This function prints a header line showing the AioScrapy version and,
276
+ if inside a project, the project name.
277
+ 此函数打印一个标头行,显示AioScrapy版本,如果在项目内,则显示项目名称。
278
+
279
+ Args:
280
+ settings: The settings object.
281
+ 设置对象。
282
+ inproject: Whether we're currently inside a project.
283
+ 我们当前是否在项目内。
284
+ """
285
+ # Get the AioScrapy version
286
+ # 获取AioScrapy版本
70
287
  version = aioscrapy.__version__
288
+
289
+ # Print different headers depending on whether we're in a project
290
+ # 根据我们是否在项目内打印不同的标头
71
291
  if inproject:
72
292
  print(f"ioscrapy {version} - project: {settings['BOT_NAME']}\n")
73
293
  else:
@@ -75,44 +295,159 @@ def _print_header(settings, inproject):
75
295
 
76
296
 
77
297
  def _print_commands(settings, inproject):
298
+ """
299
+ Print a list of available commands.
300
+ 打印可用命令列表。
301
+
302
+ This function prints the AioScrapy header, usage information, and a list
303
+ of all available commands with their short descriptions.
304
+ 此函数打印AioScrapy标头、使用信息和所有可用命令及其简短描述的列表。
305
+
306
+ Args:
307
+ settings: The settings object.
308
+ 设置对象。
309
+ inproject: Whether we're currently inside a project.
310
+ 我们当前是否在项目内。
311
+ """
312
+ # Print the header
313
+ # 打印标头
78
314
  _print_header(settings, inproject)
315
+
316
+ # Print usage information
317
+ # 打印使用信息
79
318
  print("Usage:")
80
319
  print(" aioscrapy <command> [options] [args]\n")
320
+
321
+ # Print available commands
322
+ # 打印可用命令
81
323
  print("Available commands:")
82
324
  cmds = _get_commands_dict(settings, inproject)
83
325
  for cmdname, cmdclass in sorted(cmds.items()):
84
326
  print(f" {cmdname:<13} {cmdclass.short_desc()}")
327
+
328
+ # If not in a project, mention that more commands are available in a project
329
+ # 如果不在项目内,请提及在项目中有更多可用命令
85
330
  if not inproject:
86
331
  print()
87
332
  print(" [ more ] More commands available when run from project directory")
333
+
334
+ # Print help information
335
+ # 打印帮助信息
88
336
  print()
89
337
  print('Use "aioscrapy <command> -h" to see more info about a command')
90
338
 
91
339
 
92
340
  def _print_unknown_command(settings, cmdname, inproject):
341
+ """
342
+ Print an error message for an unknown command.
343
+ 打印未知命令的错误消息。
344
+
345
+ This function prints the AioScrapy header and an error message indicating
346
+ that the specified command is unknown.
347
+ 此函数打印AioScrapy标头和一条错误消息,指示指定的命令未知。
348
+
349
+ Args:
350
+ settings: The settings object.
351
+ 设置对象。
352
+ cmdname: The name of the unknown command.
353
+ 未知命令的名称。
354
+ inproject: Whether we're currently inside a project.
355
+ 我们当前是否在项目内。
356
+ """
357
+ # Print the header
358
+ # 打印标头
93
359
  _print_header(settings, inproject)
360
+
361
+ # Print error message
362
+ # 打印错误消息
94
363
  print(f"Unknown command: {cmdname}\n")
364
+
365
+ # Print help information
366
+ # 打印帮助信息
95
367
  print('Use "aioscrapy" to see available commands')
96
368
 
97
369
 
98
370
  def _run_print_help(parser, func, *a, **kw):
371
+ """
372
+ Run a function and handle UsageError exceptions.
373
+ 运行函数并处理UsageError异常。
374
+
375
+ This function runs the specified function with the given arguments and handles
376
+ UsageError exceptions by printing an error message and/or help information.
377
+ 此函数使用给定的参数运行指定的函数,并通过打印错误消息和/或帮助信息来处理
378
+ UsageError异常。
379
+
380
+ Args:
381
+ parser: The option parser to use for printing help.
382
+ 用于打印帮助的选项解析器。
383
+ func: The function to run.
384
+ 要运行的函数。
385
+ *a: Positional arguments to pass to the function.
386
+ 传递给函数的位置参数。
387
+ **kw: Keyword arguments to pass to the function.
388
+ 传递给函数的关键字参数。
389
+
390
+ Raises:
391
+ SystemExit: With exit code 2 if a UsageError occurs.
392
+ 如果发生UsageError,则退出代码为2。
393
+ """
99
394
  try:
395
+ # Run the function with the given arguments
396
+ # 使用给定的参数运行函数
100
397
  func(*a, **kw)
101
398
  except UsageError as e:
399
+ # If the error has a message, print it
400
+ # 如果错误有消息,则打印它
102
401
  if str(e):
103
402
  parser.error(str(e))
403
+
404
+ # If the error requests help to be printed, print it
405
+ # 如果错误请求打印帮助,则打印它
104
406
  if e.print_help:
105
407
  parser.print_help()
408
+
409
+ # Exit with code 2 (command line syntax error)
410
+ # 退出代码2(命令行语法错误)
106
411
  sys.exit(2)
107
412
 
108
413
 
109
414
  def execute(argv=None, settings=None):
415
+ """
416
+ Main entry point for the AioScrapy command line interface.
417
+ AioScrapy命令行接口的主入口点。
418
+
419
+ This function parses command line arguments, finds the appropriate command,
420
+ and runs it with the specified options and arguments.
421
+ 此函数解析命令行参数,找到适当的命令,并使用指定的选项和参数运行它。
422
+
423
+ Args:
424
+ argv: The command line arguments.
425
+ 命令行参数。
426
+ Defaults to sys.argv.
427
+ 默认为sys.argv。
428
+ settings: The settings object.
429
+ 设置对象。
430
+ If None, the project settings will be used.
431
+ 如果为None,则将使用项目设置。
432
+
433
+ Raises:
434
+ SystemExit: With exit code 0 if no command is specified,
435
+ or exit code 2 if an unknown command or a command that
436
+ requires a project is run outside a project.
437
+ 如果未指定命令,则退出代码为0;
438
+ 如果在项目外运行未知命令或需要项目的命令,则退出代码为2。
439
+ """
440
+ # Use sys.argv if no arguments are provided
441
+ # 如果未提供参数,则使用sys.argv
110
442
  if argv is None:
111
443
  argv = sys.argv
112
444
 
445
+ # Use project settings if no settings are provided
446
+ # 如果未提供设置,则使用项目设置
113
447
  if settings is None:
114
448
  settings = get_project_settings()
115
- # set EDITOR from environment if available
449
+ # Set EDITOR from environment if available
450
+ # 如果可用,则从环境设置EDITOR
116
451
  try:
117
452
  editor = os.environ['EDITOR']
118
453
  except KeyError:
@@ -120,45 +455,143 @@ def execute(argv=None, settings=None):
120
455
  else:
121
456
  settings['EDITOR'] = editor
122
457
 
458
+ # Check if we're inside a project
459
+ # 检查我们是否在项目内
123
460
  inproject = inside_project()
461
+
462
+ # Get all available commands
463
+ # 获取所有可用命令
124
464
  cmds = _get_commands_dict(settings, inproject)
465
+
466
+ # Extract the command name from the arguments
467
+ # 从参数中提取命令名称
125
468
  cmdname = _pop_command_name(argv)
469
+
470
+ # Create an option parser
471
+ # 创建选项解析器
126
472
  parser = optparse.OptionParser(formatter=optparse.TitledHelpFormatter(),
127
473
  conflict_handler='resolve')
474
+
475
+ # If no command is specified, print the list of commands and exit
476
+ # 如果未指定命令,则打印命令列表并退出
128
477
  if not cmdname:
129
478
  _print_commands(settings, inproject)
130
479
  sys.exit(0)
480
+ # If the command is unknown, print an error message and exit
481
+ # 如果命令未知,则打印错误消息并退出
131
482
  elif cmdname not in cmds:
132
483
  _print_unknown_command(settings, cmdname, inproject)
133
484
  sys.exit(2)
134
485
 
486
+ # Get the command instance
487
+ # 获取命令实例
135
488
  cmd = cmds[cmdname]
489
+
490
+ # Set up the parser with command-specific information
491
+ # 使用命令特定信息设置解析器
136
492
  parser.usage = f"aioscrapy {cmdname} {cmd.syntax()}"
137
493
  parser.description = cmd.long_desc()
494
+
495
+ # Apply command-specific settings
496
+ # 应用命令特定设置
138
497
  settings.setdict(cmd.default_settings, priority='command')
139
498
  cmd.settings = settings
499
+
500
+ # Add command-specific options to the parser
501
+ # 向解析器添加命令特定选项
140
502
  cmd.add_options(parser)
503
+
504
+ # Parse the command line arguments
505
+ # 解析命令行参数
141
506
  opts, args = parser.parse_args(args=argv[1:])
507
+
508
+ # Process command options
509
+ # 处理命令选项
142
510
  _run_print_help(parser, cmd.process_options, args, opts)
143
511
 
512
+ # Set up the crawler process for the command
513
+ # 为命令设置爬虫进程
144
514
  cmd.crawler_process = CrawlerProcess(settings)
515
+
516
+ # Run the command and handle any usage errors
517
+ # 运行命令并处理任何使用错误
145
518
  _run_print_help(parser, _run_command, cmd, args, opts)
519
+
520
+ # Exit with the command's exit code
521
+ # 使用命令的退出代码退出
146
522
  sys.exit(cmd.exitcode)
147
523
 
148
524
 
149
525
  def _run_command(cmd, args, opts):
526
+ """
527
+ Run a command with the given arguments and options.
528
+ 使用给定的参数和选项运行命令。
529
+
530
+ This function runs the command either with or without profiling,
531
+ depending on the options.
532
+ 此函数根据选项运行命令,可以带有或不带有性能分析。
533
+
534
+ Args:
535
+ cmd: The command to run.
536
+ 要运行的命令。
537
+ args: The arguments to pass to the command.
538
+ 传递给命令的参数。
539
+ opts: The options to pass to the command.
540
+ 传递给命令的选项。
541
+ Must have a 'profile' attribute that specifies whether to run
542
+ with profiling.
543
+ 必须有一个'profile'属性,指定是否使用性能分析运行。
544
+ """
545
+ # If profiling is enabled, run the command with profiling
546
+ # 如果启用了性能分析,则使用性能分析运行命令
150
547
  if opts.profile:
151
548
  _run_command_profiled(cmd, args, opts)
152
549
  else:
550
+ # Otherwise, run the command directly
551
+ # 否则,直接运行命令
153
552
  cmd.run(args, opts)
154
553
 
155
554
 
156
555
  def _run_command_profiled(cmd, args, opts):
556
+ """
557
+ Run a command with profiling.
558
+ 使用性能分析运行命令。
559
+
560
+ This function runs the command with cProfile profiling and optionally
561
+ saves the profiling stats to a file.
562
+ 此函数使用cProfile性能分析运行命令,并可选择将性能分析统计信息保存到文件。
563
+
564
+ Args:
565
+ cmd: The command to run.
566
+ 要运行的命令。
567
+ args: The arguments to pass to the command.
568
+ 传递给命令的参数。
569
+ opts: The options to pass to the command.
570
+ 传递给命令的选项。
571
+ Must have a 'profile' attribute that specifies the output file
572
+ for profiling stats, or False to disable saving stats.
573
+ 必须有一个'profile'属性,指定性能分析统计信息的输出文件,
574
+ 或False以禁用保存统计信息。
575
+ """
576
+ # If a profile output file is specified, print a message
577
+ # 如果指定了性能分析输出文件,则打印消息
157
578
  if opts.profile:
158
579
  sys.stderr.write(f"aioscrapy: writing cProfile stats to {opts.profile!r}\n")
580
+
581
+ # Create a local namespace for the profiler
582
+ # 为性能分析器创建本地命名空间
159
583
  loc = locals()
584
+
585
+ # Create a profiler
586
+ # 创建性能分析器
160
587
  p = cProfile.Profile()
588
+
589
+ # Run the command with profiling
590
+ # 使用性能分析运行命令
161
591
  p.runctx('cmd.run(args, opts)', globals(), loc)
592
+
593
+ # If a profile output file is specified, save the stats
594
+ # 如果指定了性能分析输出文件,则保存统计信息
162
595
  if opts.profile:
163
596
  p.dump_stats(opts.profile)
164
597