aio-scrapy 2.1.4__py3-none-any.whl → 2.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/LICENSE +1 -1
  2. aio_scrapy-2.1.7.dist-info/METADATA +147 -0
  3. aio_scrapy-2.1.7.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +522 -17
  8. aioscrapy/core/downloader/handlers/__init__.py +187 -5
  9. aioscrapy/core/downloader/handlers/aiohttp.py +190 -6
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +126 -5
  11. aioscrapy/core/downloader/handlers/httpx.py +135 -5
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +137 -5
  13. aioscrapy/core/downloader/handlers/requests.py +120 -2
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +151 -13
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +192 -6
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +314 -14
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.4.dist-info/METADATA +0 -239
  105. aio_scrapy-2.1.4.dist-info/RECORD +0 -133
  106. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
  107. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
  108. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  109. aioscrapy/http/response/playwright.py +0 -36
  110. aioscrapy/libs/pipelines/execl.py +0 -169
  111. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/entry_points.txt +0 -0
  112. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/top_level.txt +0 -0
@@ -1,3 +1,14 @@
1
+ """
2
+ Project utility functions for aioscrapy.
3
+ aioscrapy的项目实用函数。
4
+
5
+ This module provides utility functions for working with aioscrapy projects.
6
+ It includes functions for determining if code is running inside a project,
7
+ accessing project data directories, and loading project settings.
8
+ 此模块提供了用于处理aioscrapy项目的实用函数。
9
+ 它包括用于确定代码是否在项目内运行、访问项目数据目录和加载项目设置的函数。
10
+ """
11
+
1
12
  import os
2
13
  import warnings
3
14
 
@@ -9,73 +20,221 @@ from aioscrapy.settings import Settings
9
20
  from aioscrapy.exceptions import NotConfigured, AioScrapyDeprecationWarning
10
21
 
11
22
 
23
+ # Environment variable name that defines the settings module path
24
+ # 定义设置模块路径的环境变量名称
12
25
  ENVVAR = 'AIOSCRAPY_SETTINGS_MODULE'
26
+
27
+ # Configuration section name for data directory settings in scrapy.cfg
28
+ # scrapy.cfg中数据目录设置的配置部分名称
13
29
  DATADIR_CFG_SECTION = 'datadir'
14
30
 
15
31
 
16
32
  def inside_project():
33
+ """
34
+ Check if the code is running inside an aioscrapy project.
35
+ 检查代码是否在aioscrapy项目内运行。
36
+
37
+ This function determines if the current code is running inside an aioscrapy project
38
+ by checking either:
39
+ 1. If the AIOSCRAPY_SETTINGS_MODULE environment variable is set and the module can be imported
40
+ 2. If a scrapy.cfg file can be found in the current directory or any parent directory
41
+
42
+ 此函数通过以下方式确定当前代码是否在aioscrapy项目内运行:
43
+ 1. 检查AIOSCRAPY_SETTINGS_MODULE环境变量是否设置且模块可以导入
44
+ 2. 检查当前目录或任何父目录中是否可以找到scrapy.cfg文件
45
+
46
+ Returns:
47
+ bool: True if running inside a project, False otherwise.
48
+ 如果在项目内运行则为True,否则为False。
49
+ """
50
+ # Check if the settings module environment variable is set
51
+ # 检查是否设置了设置模块环境变量
17
52
  scrapy_module = os.environ.get('AIOSCRAPY_SETTINGS_MODULE')
18
53
  if scrapy_module is not None:
19
54
  try:
55
+ # Try to import the settings module
56
+ # 尝试导入设置模块
20
57
  import_module(scrapy_module)
21
58
  except ImportError as exc:
59
+ # If import fails, warn but continue checking
60
+ # 如果导入失败,发出警告但继续检查
22
61
  warnings.warn(f"Cannot import scrapy settings module {scrapy_module}: {exc}")
23
62
  else:
63
+ # If import succeeds, we're inside a project
64
+ # 如果导入成功,我们在项目内
24
65
  return True
66
+
67
+ # If no settings module or import failed, check for scrapy.cfg file
68
+ # 如果没有设置模块或导入失败,检查scrapy.cfg文件
25
69
  return bool(closest_aioscrapy_cfg())
26
70
 
27
71
 
28
72
  def project_data_dir(project='default'):
29
- """Return the current project data dir, creating it if it doesn't exist"""
73
+ """
74
+ Get the project data directory, creating it if it doesn't exist.
75
+ 获取项目数据目录,如果不存在则创建它。
76
+
77
+ This function returns the path to the data directory for the specified project.
78
+ The directory is determined in the following order:
79
+ 1. From the [datadir] section in scrapy.cfg if it exists
80
+ 2. Otherwise, defaults to a '.scrapy' directory in the same directory as scrapy.cfg
81
+
82
+ 此函数返回指定项目的数据目录的路径。
83
+ 目录按以下顺序确定:
84
+ 1. 如果存在,从scrapy.cfg的[datadir]部分获取
85
+ 2. 否则,默认为与scrapy.cfg相同目录中的'.scrapy'目录
86
+
87
+ The function will create the directory if it doesn't exist.
88
+ 如果目录不存在,该函数将创建它。
89
+
90
+ Args:
91
+ project: The project name to get the data directory for.
92
+ 要获取数据目录的项目名称。
93
+ Defaults to 'default'.
94
+ 默认为'default'。
95
+
96
+ Returns:
97
+ str: The absolute path to the project data directory.
98
+ 项目数据目录的绝对路径。
99
+
100
+ Raises:
101
+ NotConfigured: If not running inside a project or if scrapy.cfg cannot be found.
102
+ 如果不在项目内运行或找不到scrapy.cfg。
103
+ """
104
+ # Check if we're inside a project
105
+ # 检查我们是否在项目内
30
106
  if not inside_project():
31
107
  raise NotConfigured("Not inside a project")
108
+
109
+ # Get the configuration
110
+ # 获取配置
32
111
  cfg = get_config()
112
+
113
+ # Try to get the data directory from the config
114
+ # 尝试从配置中获取数据目录
33
115
  if cfg.has_option(DATADIR_CFG_SECTION, project):
34
116
  d = cfg.get(DATADIR_CFG_SECTION, project)
35
117
  else:
118
+ # Fall back to default location
119
+ # 回退到默认位置
36
120
  scrapy_cfg = closest_aioscrapy_cfg()
37
121
  if not scrapy_cfg:
38
122
  raise NotConfigured("Unable to find scrapy.cfg file to infer project data dir")
39
123
  d = abspath(join(dirname(scrapy_cfg), '.scrapy'))
124
+
125
+ # Create the directory if it doesn't exist
126
+ # 如果目录不存在,则创建它
40
127
  if not exists(d):
41
128
  os.makedirs(d)
129
+
42
130
  return d
43
131
 
44
132
 
45
133
  def data_path(path, createdir=False):
46
134
  """
47
- Return the given path joined with the .scrapy data directory.
48
- If given an absolute path, return it unmodified.
135
+ Get the absolute path for a file within the project data directory.
136
+ 获取项目数据目录中文件的绝对路径。
137
+
138
+ This function resolves a path relative to the project data directory.
139
+ If the given path is already absolute, it returns it unmodified.
140
+ If not inside a project, it uses a '.scrapy' directory in the current directory.
141
+
142
+ 此函数解析相对于项目数据目录的路径。
143
+ 如果给定的路径已经是绝对路径,则原样返回。
144
+ 如果不在项目内,则使用当前目录中的'.scrapy'目录。
145
+
146
+ Args:
147
+ path: The path to resolve. Can be absolute or relative.
148
+ 要解析的路径。可以是绝对路径或相对路径。
149
+ createdir: Whether to create the directory if it doesn't exist.
150
+ 如果目录不存在,是否创建它。
151
+ Defaults to False.
152
+ 默认为False。
153
+
154
+ Returns:
155
+ str: The absolute path to the file or directory.
156
+ 文件或目录的绝对路径。
49
157
  """
158
+ # If the path is not absolute, make it relative to the data directory
159
+ # 如果路径不是绝对的,使其相对于数据目录
50
160
  if not isabs(path):
51
161
  if inside_project():
162
+ # If inside a project, use the project data directory
163
+ # 如果在项目内,使用项目数据目录
52
164
  path = join(project_data_dir(), path)
53
165
  else:
166
+ # Otherwise, use a .scrapy directory in the current directory
167
+ # 否则,使用当前目录中的.scrapy目录
54
168
  path = join('.scrapy', path)
169
+
170
+ # Create the directory if requested and it doesn't exist
171
+ # 如果请求且目录不存在,则创建目录
55
172
  if createdir and not exists(path):
56
173
  os.makedirs(path)
174
+
57
175
  return path
58
176
 
59
177
 
60
178
  def get_project_settings():
179
+ """
180
+ Get a Settings instance with the project settings.
181
+ 获取包含项目设置的Settings实例。
182
+
183
+ This function loads the project settings from the module specified in the
184
+ AIOSCRAPY_SETTINGS_MODULE environment variable. If the variable is not set,
185
+ it tries to initialize the environment using the project name from the
186
+ AIOSCRAPY_PROJECT environment variable (defaulting to 'default').
187
+
188
+ 此函数从AIOSCRAPY_SETTINGS_MODULE环境变量指定的模块加载项目设置。
189
+ 如果未设置该变量,它会尝试使用AIOSCRAPY_PROJECT环境变量中的项目名称
190
+ (默认为'default')初始化环境。
191
+
192
+ The function also handles environment variables prefixed with AIOSCRAPY_
193
+ as a way to override settings, though this method is deprecated.
194
+
195
+ 该函数还处理以AIOSCRAPY_为前缀的环境变量作为覆盖设置的方式,
196
+ 尽管此方法已弃用。
197
+
198
+ Returns:
199
+ Settings: A Settings instance with the project settings loaded.
200
+ 加载了项目设置的Settings实例。
201
+ """
202
+ # Initialize the environment if the settings module is not set
203
+ # 如果未设置设置模块,则初始化环境
61
204
  if ENVVAR not in os.environ:
62
205
  project = os.environ.get('AIOSCRAPY_PROJECT', 'default')
63
206
  init_env(project)
64
207
 
208
+ # Create a new Settings instance
209
+ # 创建一个新的Settings实例
65
210
  settings = Settings()
211
+
212
+ # Load settings from the module specified in the environment variable
213
+ # 从环境变量指定的模块加载设置
66
214
  settings_module_path = os.environ.get(ENVVAR)
67
215
  if settings_module_path:
68
216
  settings.setmodule(settings_module_path, priority='project')
69
217
 
218
+ # Get all environment variables prefixed with AIOSCRAPY_
219
+ # 获取所有以AIOSCRAPY_为前缀的环境变量
70
220
  aioscrapy_envvars = {k[10:]: v for k, v in os.environ.items() if
71
221
  k.startswith('AIOSCRAPY_')}
222
+
223
+ # Define which environment variables are valid and not settings
224
+ # 定义哪些环境变量是有效的且不是设置
72
225
  valid_envvars = {
73
226
  'CHECK',
74
227
  'PROJECT',
75
228
  'PYTHON_SHELL',
76
229
  'SETTINGS_MODULE',
77
230
  }
231
+
232
+ # Find environment variables that are being used to override settings
233
+ # 查找用于覆盖设置的环境变量
78
234
  setting_envvars = {k for k in aioscrapy_envvars if k not in valid_envvars}
235
+
236
+ # Warn about deprecated usage of environment variables to override settings
237
+ # 警告关于使用环境变量覆盖设置的已弃用用法
79
238
  if setting_envvars:
80
239
  setting_envvar_list = ', '.join(sorted(setting_envvars))
81
240
  warnings.warn(
@@ -84,6 +243,9 @@ def get_project_settings():
84
243
  f'currently defined: {setting_envvar_list}',
85
244
  AioScrapyDeprecationWarning
86
245
  )
246
+
247
+ # Apply the environment variable overrides
248
+ # 应用环境变量覆盖
87
249
  settings.setdict(aioscrapy_envvars, priority='project')
88
250
 
89
251
  return settings
aioscrapy/utils/python.py CHANGED
@@ -1,5 +1,12 @@
1
1
  """
2
- This module contains essential stuff that should've come with Python itself ;)
2
+ Python utility functions for aioscrapy.
3
+ aioscrapy的Python实用函数。
4
+
5
+ This module contains essential utility functions for Python that enhance the standard library.
6
+ It provides functions for type conversion, string handling, regular expression searching,
7
+ memoization, and other common operations.
8
+ 此模块包含增强标准库的Python基本实用函数。
9
+ 它提供了用于类型转换、字符串处理、正则表达式搜索、记忆化和其他常见操作的函数。
3
10
  """
4
11
  import gc
5
12
  import re
@@ -12,31 +19,72 @@ from aioscrapy.utils.decorators import deprecated
12
19
 
13
20
  def is_listlike(x):
14
21
  """
15
- >>> is_listlike("foo")
16
- False
17
- >>> is_listlike(5)
18
- False
19
- >>> is_listlike(b"foo")
20
- False
21
- >>> is_listlike([b"foo"])
22
- True
23
- >>> is_listlike((b"foo",))
24
- True
25
- >>> is_listlike({})
26
- True
27
- >>> is_listlike(set())
28
- True
29
- >>> is_listlike((x for x in range(3)))
30
- True
31
- >>> is_listlike(range(5))
32
- True
22
+ Check if the given object is list-like (iterable but not a string or bytes).
23
+ 检查给定对象是否类似列表(可迭代但不是字符串或字节)。
24
+
25
+ This function determines if an object is iterable (has the __iter__ method)
26
+ but is not a string or bytes object. This is useful for functions that
27
+ should treat strings differently from other iterables.
28
+ 此函数确定对象是否可迭代(具有__iter__方法)但不是字符串或字节对象。
29
+ 这对于应该将字符串与其他可迭代对象区别对待的函数很有用。
30
+
31
+ Args:
32
+ x: The object to check.
33
+ 要检查的对象。
34
+
35
+ Returns:
36
+ bool: True if the object is list-like, False otherwise.
37
+ 如果对象类似列表则为True,否则为False。
38
+
39
+ Examples:
40
+ >>> is_listlike("foo")
41
+ False
42
+ >>> is_listlike(5)
43
+ False
44
+ >>> is_listlike(b"foo")
45
+ False
46
+ >>> is_listlike([b"foo"])
47
+ True
48
+ >>> is_listlike((b"foo",))
49
+ True
50
+ >>> is_listlike({})
51
+ True
52
+ >>> is_listlike(set())
53
+ True
54
+ >>> is_listlike((x for x in range(3)))
55
+ True
56
+ >>> is_listlike(range(5))
57
+ True
33
58
  """
34
59
  return hasattr(x, "__iter__") and not isinstance(x, (str, bytes))
35
60
 
36
61
 
37
62
  def to_unicode(text, encoding=None, errors='strict'):
38
- """Return the unicode representation of a bytes object ``text``. If
39
- ``text`` is already an unicode object, return it as-is."""
63
+ """
64
+ Convert a bytes object to a unicode (str) object.
65
+ 将字节对象转换为unicode(str)对象。
66
+
67
+ This function converts a bytes object to a unicode string using the specified
68
+ encoding. If the input is already a unicode string, it is returned unchanged.
69
+ 此函数使用指定的编码将字节对象转换为unicode字符串。
70
+ 如果输入已经是unicode字符串,则原样返回。
71
+
72
+ Args:
73
+ text: The text to convert. Must be a bytes or str object.
74
+ 要转换的文本。必须是bytes或str对象。
75
+ encoding: The encoding to use for decoding bytes. Defaults to 'utf-8'.
76
+ 用于解码字节的编码。默认为'utf-8'。
77
+ errors: The error handling scheme for decoding. Defaults to 'strict'.
78
+ 解码的错误处理方案。默认为'strict'。
79
+
80
+ Returns:
81
+ str: The unicode representation of the input text.
82
+ 输入文本的unicode表示。
83
+
84
+ Raises:
85
+ TypeError: If the input is not a bytes or str object.
86
+ 如果输入不是bytes或str对象。
87
+ """
40
88
  if isinstance(text, str):
41
89
  return text
42
90
  if not isinstance(text, (bytes, str)):
@@ -48,8 +96,31 @@ def to_unicode(text, encoding=None, errors='strict'):
48
96
 
49
97
 
50
98
  def to_bytes(text, encoding=None, errors='strict'):
51
- """Return the binary representation of ``text``. If ``text``
52
- is already a bytes object, return it as-is."""
99
+ """
100
+ Convert a unicode (str) object to a bytes object.
101
+ 将unicode(str)对象转换为字节对象。
102
+
103
+ This function converts a unicode string to a bytes object using the specified
104
+ encoding. If the input is already a bytes object, it is returned unchanged.
105
+ 此函数使用指定的编码将unicode字符串转换为字节对象。
106
+ 如果输入已经是字节对象,则原样返回。
107
+
108
+ Args:
109
+ text: The text to convert. Must be a str or bytes object.
110
+ 要转换的文本。必须是str或bytes对象。
111
+ encoding: The encoding to use for encoding the string. Defaults to 'utf-8'.
112
+ 用于编码字符串的编码。默认为'utf-8'。
113
+ errors: The error handling scheme for encoding. Defaults to 'strict'.
114
+ 编码的错误处理方案。默认为'strict'。
115
+
116
+ Returns:
117
+ bytes: The binary representation of the input text.
118
+ 输入文本的二进制表示。
119
+
120
+ Raises:
121
+ TypeError: If the input is not a str or bytes object.
122
+ 如果输入不是str或bytes对象。
123
+ """
53
124
  if isinstance(text, bytes):
54
125
  return text
55
126
  if not isinstance(text, str):
@@ -62,24 +133,71 @@ def to_bytes(text, encoding=None, errors='strict'):
62
133
 
63
134
  @deprecated('to_unicode')
64
135
  def to_native_str(text, encoding=None, errors='strict'):
65
- """ Return str representation of ``text``. """
136
+ """
137
+ Convert text to native string type (str).
138
+ 将文本转换为本地字符串类型(str)。
139
+
140
+ This function is deprecated. Use to_unicode() instead.
141
+ 此函数已弃用。请改用to_unicode()。
142
+
143
+ Args:
144
+ text: The text to convert.
145
+ 要转换的文本。
146
+ encoding: The encoding to use for decoding bytes. Defaults to 'utf-8'.
147
+ 用于解码字节的编码。默认为'utf-8'。
148
+ errors: The error handling scheme. Defaults to 'strict'.
149
+ 错误处理方案。默认为'strict'。
150
+
151
+ Returns:
152
+ str: The string representation of the input text.
153
+ 输入文本的字符串表示。
154
+ """
66
155
  return to_unicode(text, encoding, errors)
67
156
 
68
157
 
69
158
  def re_rsearch(pattern, text, chunk_size=1024):
70
159
  """
71
- This function does a reverse search in a text using a regular expression
72
- given in the attribute 'pattern'.
73
- Since the re module does not provide this functionality, we have to find for
74
- the expression into chunks of text extracted from the end (for the sake of efficiency).
75
- At first, a chunk of 'chunk_size' kilobytes is extracted from the end, and searched for
76
- the pattern. If the pattern is not found, another chunk is extracted, and another
77
- search is performed.
78
- This process continues until a match is found, or until the whole file is read.
79
- In case the pattern wasn't found, None is returned, otherwise it returns a tuple containing
80
- the start position of the match, and the ending (regarding the entire text).
81
- """
160
+ Perform a reverse search in text using a regular expression pattern.
161
+ 使用正则表达式模式在文本中执行反向搜索。
162
+
163
+ This function searches for the last occurrence of a pattern in a text,
164
+ starting from the end. Since the re module does not provide reverse search
165
+ functionality, this function implements it by searching in chunks from the
166
+ end of the text for efficiency.
167
+ 此函数从文本末尾开始搜索模式的最后一次出现。
168
+ 由于re模块不提供反向搜索功能,此函数通过从文本末尾的块中搜索来实现它,以提高效率。
169
+
170
+ The algorithm works as follows:
171
+ 1. Extract a chunk of 'chunk_size' kilobytes from the end of the text
172
+ 2. Search for the pattern in this chunk
173
+ 3. If not found, extract another chunk further from the end and search again
174
+ 4. Continue until a match is found or the entire text has been searched
175
+
176
+ 算法工作原理如下:
177
+ 1. 从文本末尾提取'chunk_size'千字节的块
178
+ 2. 在此块中搜索模式
179
+ 3. 如果未找到,从末尾进一步提取另一个块并再次搜索
180
+ 4. 继续直到找到匹配项或已搜索整个文本
181
+
182
+ Args:
183
+ pattern: The regular expression pattern to search for.
184
+ 要搜索的正则表达式模式。
185
+ Can be a string or a compiled regex pattern.
186
+ 可以是字符串或已编译的正则表达式模式。
187
+ text: The text to search in.
188
+ 要搜索的文本。
189
+ chunk_size: The size of each chunk in kilobytes. Defaults to 1024 (1MB).
190
+ 每个块的大小(千字节)。默认为1024(1MB)。
82
191
 
192
+ Returns:
193
+ tuple or None: If a match is found, returns a tuple (start, end) with the
194
+ positions of the match in the entire text. If no match is found,
195
+ returns None.
196
+ 如果找到匹配项,返回一个元组(start, end),其中包含整个文本中
197
+ 匹配项的位置。如果未找到匹配项,返回None。
198
+ """
199
+ # Inner function to generate chunks from the end of the text
200
+ # 从文本末尾生成块的内部函数
83
201
  def _chunk_iter():
84
202
  offset = len(text)
85
203
  while True:
@@ -89,60 +207,152 @@ def re_rsearch(pattern, text, chunk_size=1024):
89
207
  yield (text[offset:], offset)
90
208
  yield (text, 0)
91
209
 
210
+ # Compile the pattern if it's a string
211
+ # 如果模式是字符串,则编译它
92
212
  if isinstance(pattern, str):
93
213
  pattern = re.compile(pattern)
94
214
 
215
+ # Search for the pattern in each chunk
216
+ # 在每个块中搜索模式
95
217
  for chunk, offset in _chunk_iter():
96
218
  matches = [match for match in pattern.finditer(chunk)]
97
219
  if matches:
220
+ # Return the position of the last match in the chunk
221
+ # 返回块中最后一个匹配项的位置
98
222
  start, end = matches[-1].span()
99
223
  return offset + start, offset + end
100
224
  return None
101
225
 
102
226
 
103
227
  def memoizemethod_noargs(method):
104
- """Decorator to cache the result of a method (without arguments) using a
105
- weak reference to its object
106
228
  """
229
+ Decorator to cache the result of a method with no arguments.
230
+ 装饰器,用于缓存无参数方法的结果。
231
+
232
+ This decorator caches the result of a method call using a weak reference
233
+ to the object. This means the cache entry will be automatically removed
234
+ when the object is garbage collected.
235
+ 此装饰器使用对对象的弱引用缓存方法调用的结果。
236
+ 这意味着当对象被垃圾回收时,缓存条目将自动被移除。
237
+
238
+ Note that while the decorated method can accept arguments, the caching
239
+ is based only on the object instance, not on the arguments. This means
240
+ that only the first call's result will be cached, regardless of arguments.
241
+ 请注意,虽然装饰的方法可以接受参数,但缓存仅基于对象实例,而不是参数。
242
+ 这意味着无论参数如何,只有第一次调用的结果将被缓存。
243
+
244
+ Args:
245
+ method: The method to be decorated.
246
+ 要装饰的方法。
247
+
248
+ Returns:
249
+ function: A new method that caches its result.
250
+ 缓存其结果的新方法。
251
+ """
252
+ # Create a weak key dictionary to store cached results
253
+ # 创建一个弱键字典来存储缓存的结果
107
254
  cache = weakref.WeakKeyDictionary()
108
255
 
109
256
  @wraps(method)
110
257
  def new_method(self, *args, **kwargs):
258
+ # If the result is not cached for this object, call the method
259
+ # 如果此对象的结果未缓存,则调用该方法
111
260
  if self not in cache:
112
261
  cache[self] = method(self, *args, **kwargs)
262
+ # Return the cached result
263
+ # 返回缓存的结果
113
264
  return cache[self]
114
265
 
115
266
  return new_method
116
267
 
117
268
 
118
269
  def without_none_values(iterable):
119
- """Return a copy of ``iterable`` with all ``None`` entries removed.
270
+ """
271
+ Return a copy of an iterable with all None entries removed.
272
+ 返回一个去除所有None条目的可迭代对象的副本。
273
+
274
+ This function creates a new iterable of the same type as the input,
275
+ but with all None values removed. It handles both mappings (like dictionaries)
276
+ and sequences (like lists, tuples).
277
+ 此函数创建一个与输入相同类型的新可迭代对象,但移除了所有None值。
278
+ 它处理映射(如字典)和序列(如列表、元组)。
279
+
280
+ Args:
281
+ iterable: The iterable to process. Can be a mapping or a sequence.
282
+ 要处理的可迭代对象。可以是映射或序列。
283
+
284
+ Returns:
285
+ A new iterable of the same type as the input, but with all None values removed.
286
+ 一个与输入相同类型的新可迭代对象,但移除了所有None值。
120
287
 
121
- If ``iterable`` is a mapping, return a dictionary where all pairs that have
122
- value ``None`` have been removed.
288
+ Examples:
289
+ >>> without_none_values({'a': 1, 'b': None, 'c': 3})
290
+ {'a': 1, 'c': 3}
291
+ >>> without_none_values([1, None, 3, None, 5])
292
+ [1, 3, 5]
123
293
  """
124
294
  try:
295
+ # Handle mappings (objects with .items() method)
296
+ # 处理映射(具有.items()方法的对象)
125
297
  return {k: v for k, v in iterable.items() if v is not None}
126
298
  except AttributeError:
299
+ # Handle sequences and other iterables
300
+ # 处理序列和其他可迭代对象
127
301
  return type(iterable)((v for v in iterable if v is not None))
128
302
 
129
303
 
130
304
  def global_object_name(obj):
131
305
  """
132
- Return full name of a global object.
306
+ Return the full qualified name of a global object.
307
+ 返回全局对象的完全限定名称。
133
308
 
134
- >>> from aioscrapy import Request
135
- >>> global_object_name(Request)
136
- 'aioscrapy.http.request.Request'
309
+ This function returns the full name of an object, including its module path.
310
+ It's useful for debugging, logging, and serialization purposes.
311
+ 此函数返回对象的完整名称,包括其模块路径。
312
+ 它对于调试、日志记录和序列化目的很有用。
313
+
314
+ Args:
315
+ obj: The object to get the name for. Must have __module__ and __name__ attributes.
316
+ 要获取名称的对象。必须具有__module__和__name__属性。
317
+
318
+ Returns:
319
+ str: The full qualified name of the object in the format "module.name".
320
+ 对象的完全限定名称,格式为"module.name"。
321
+
322
+ Examples:
323
+ >>> from aioscrapy import Request
324
+ >>> global_object_name(Request)
325
+ 'aioscrapy.http.request.Request'
137
326
  """
138
327
  return f"{obj.__module__}.{obj.__name__}"
139
328
 
140
329
 
141
330
  if hasattr(sys, "pypy_version_info"):
142
331
  def garbage_collect():
332
+ """
333
+ Force garbage collection, with special handling for PyPy.
334
+ 强制垃圾回收,对PyPy进行特殊处理。
335
+
336
+ On PyPy, collecting weak references can take two collection cycles,
337
+ so this function calls gc.collect() twice.
338
+ 在PyPy上,收集弱引用可能需要两个收集周期,
339
+ 因此此函数调用gc.collect()两次。
340
+ """
143
341
  # Collecting weakreferences can take two collections on PyPy.
342
+ # 在PyPy上收集弱引用可能需要两次收集。
144
343
  gc.collect()
145
344
  gc.collect()
146
345
  else:
147
346
  def garbage_collect():
347
+ """
348
+ Force garbage collection.
349
+ 强制垃圾回收。
350
+
351
+ This function calls Python's garbage collector to force a collection cycle.
352
+ It's useful when you need to ensure that objects with no references are
353
+ properly cleaned up, especially those with __del__ methods or weak references.
354
+ 此函数调用Python的垃圾收集器来强制进行收集周期。
355
+ 当您需要确保没有引用的对象被正确清理时,这很有用,
356
+ 特别是那些具有__del__方法或弱引用的对象。
357
+ """
148
358
  gc.collect()