aio-scrapy 2.1.4__py3-none-any.whl → 2.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/LICENSE +1 -1
- aio_scrapy-2.1.7.dist-info/METADATA +147 -0
- aio_scrapy-2.1.7.dist-info/RECORD +134 -0
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/WHEEL +1 -1
- aioscrapy/VERSION +1 -1
- aioscrapy/cmdline.py +438 -5
- aioscrapy/core/downloader/__init__.py +522 -17
- aioscrapy/core/downloader/handlers/__init__.py +187 -5
- aioscrapy/core/downloader/handlers/aiohttp.py +190 -6
- aioscrapy/core/downloader/handlers/curl_cffi.py +126 -5
- aioscrapy/core/downloader/handlers/httpx.py +135 -5
- aioscrapy/core/downloader/handlers/pyhttpx.py +137 -5
- aioscrapy/core/downloader/handlers/requests.py +120 -2
- aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
- aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
- aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
- aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
- aioscrapy/core/engine.py +381 -20
- aioscrapy/core/scheduler.py +350 -36
- aioscrapy/core/scraper.py +509 -33
- aioscrapy/crawler.py +392 -10
- aioscrapy/db/__init__.py +149 -0
- aioscrapy/db/absmanager.py +212 -6
- aioscrapy/db/aiomongo.py +292 -10
- aioscrapy/db/aiomysql.py +363 -10
- aioscrapy/db/aiopg.py +299 -2
- aioscrapy/db/aiorabbitmq.py +444 -4
- aioscrapy/db/aioredis.py +260 -11
- aioscrapy/dupefilters/__init__.py +110 -5
- aioscrapy/dupefilters/disk.py +124 -2
- aioscrapy/dupefilters/redis.py +598 -32
- aioscrapy/exceptions.py +151 -13
- aioscrapy/http/__init__.py +1 -1
- aioscrapy/http/headers.py +237 -3
- aioscrapy/http/request/__init__.py +257 -11
- aioscrapy/http/request/form.py +83 -3
- aioscrapy/http/request/json_request.py +121 -9
- aioscrapy/http/response/__init__.py +306 -33
- aioscrapy/http/response/html.py +42 -3
- aioscrapy/http/response/text.py +496 -49
- aioscrapy/http/response/web_driver.py +144 -0
- aioscrapy/http/response/xml.py +45 -3
- aioscrapy/libs/downloader/defaultheaders.py +66 -2
- aioscrapy/libs/downloader/downloadtimeout.py +91 -2
- aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
- aioscrapy/libs/downloader/retry.py +192 -6
- aioscrapy/libs/downloader/stats.py +142 -0
- aioscrapy/libs/downloader/useragent.py +93 -2
- aioscrapy/libs/extensions/closespider.py +166 -4
- aioscrapy/libs/extensions/corestats.py +151 -1
- aioscrapy/libs/extensions/logstats.py +145 -1
- aioscrapy/libs/extensions/metric.py +370 -1
- aioscrapy/libs/extensions/throttle.py +235 -1
- aioscrapy/libs/pipelines/__init__.py +345 -2
- aioscrapy/libs/pipelines/csv.py +242 -0
- aioscrapy/libs/pipelines/excel.py +545 -0
- aioscrapy/libs/pipelines/mongo.py +132 -0
- aioscrapy/libs/pipelines/mysql.py +67 -0
- aioscrapy/libs/pipelines/pg.py +67 -0
- aioscrapy/libs/spider/depth.py +141 -3
- aioscrapy/libs/spider/httperror.py +144 -4
- aioscrapy/libs/spider/offsite.py +202 -2
- aioscrapy/libs/spider/referer.py +396 -21
- aioscrapy/libs/spider/urllength.py +97 -1
- aioscrapy/link.py +115 -8
- aioscrapy/logformatter.py +199 -8
- aioscrapy/middleware/absmanager.py +328 -2
- aioscrapy/middleware/downloader.py +218 -0
- aioscrapy/middleware/extension.py +50 -1
- aioscrapy/middleware/itempipeline.py +96 -0
- aioscrapy/middleware/spider.py +360 -7
- aioscrapy/process.py +200 -0
- aioscrapy/proxy/__init__.py +142 -3
- aioscrapy/proxy/redis.py +136 -2
- aioscrapy/queue/__init__.py +168 -16
- aioscrapy/scrapyd/runner.py +124 -3
- aioscrapy/serializer.py +182 -2
- aioscrapy/settings/__init__.py +610 -128
- aioscrapy/settings/default_settings.py +314 -14
- aioscrapy/signalmanager.py +151 -20
- aioscrapy/signals.py +183 -1
- aioscrapy/spiderloader.py +165 -12
- aioscrapy/spiders/__init__.py +233 -6
- aioscrapy/statscollectors.py +312 -1
- aioscrapy/utils/conf.py +345 -17
- aioscrapy/utils/curl.py +168 -16
- aioscrapy/utils/decorators.py +76 -6
- aioscrapy/utils/deprecate.py +212 -19
- aioscrapy/utils/httpobj.py +55 -3
- aioscrapy/utils/log.py +79 -0
- aioscrapy/utils/misc.py +189 -21
- aioscrapy/utils/ossignal.py +67 -5
- aioscrapy/utils/project.py +165 -3
- aioscrapy/utils/python.py +254 -44
- aioscrapy/utils/reqser.py +75 -1
- aioscrapy/utils/request.py +173 -12
- aioscrapy/utils/response.py +91 -6
- aioscrapy/utils/signal.py +196 -14
- aioscrapy/utils/spider.py +51 -4
- aioscrapy/utils/template.py +93 -6
- aioscrapy/utils/tools.py +191 -17
- aioscrapy/utils/trackref.py +198 -12
- aioscrapy/utils/url.py +341 -36
- aio_scrapy-2.1.4.dist-info/METADATA +0 -239
- aio_scrapy-2.1.4.dist-info/RECORD +0 -133
- aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
- aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
- aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
- aioscrapy/http/response/playwright.py +0 -36
- aioscrapy/libs/pipelines/execl.py +0 -169
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/entry_points.txt +0 -0
- {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/top_level.txt +0 -0
aioscrapy/utils/tools.py
CHANGED
|
@@ -1,30 +1,100 @@
|
|
|
1
1
|
# _*_ coding: utf-8 _*_
|
|
2
|
+
"""
|
|
3
|
+
Utility tools for aioscrapy.
|
|
4
|
+
aioscrapy的实用工具。
|
|
5
|
+
|
|
6
|
+
This module provides various utility functions for working with asynchronous code,
|
|
7
|
+
singletons, JavaScript execution, and task creation in aioscrapy.
|
|
8
|
+
此模块提供了各种实用函数,用于在aioscrapy中处理异步代码、单例、JavaScript执行和任务创建。
|
|
9
|
+
"""
|
|
10
|
+
|
|
2
11
|
import asyncio
|
|
3
12
|
from types import CoroutineType, GeneratorType, AsyncGeneratorType
|
|
4
13
|
|
|
5
14
|
|
|
6
|
-
async def call_helper(
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
15
|
+
async def call_helper(fn, *args, **kwargs):
|
|
16
|
+
"""
|
|
17
|
+
Call a function or coroutine function with the given arguments.
|
|
18
|
+
使用给定参数调用函数或协程函数。
|
|
19
|
+
|
|
20
|
+
This helper function automatically detects whether the provided function is
|
|
21
|
+
a coroutine function and awaits it if necessary. This allows for uniform
|
|
22
|
+
handling of both synchronous and asynchronous functions.
|
|
23
|
+
此辅助函数自动检测提供的函数是否为协程函数,并在必要时等待它。
|
|
24
|
+
这允许统一处理同步和异步函数。
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
fn: The function or coroutine function to call.
|
|
28
|
+
要调用的函数或协程函数。
|
|
29
|
+
*args: Positional arguments to pass to the function.
|
|
30
|
+
传递给函数的位置参数。
|
|
31
|
+
**kwargs: Keyword arguments to pass to the function.
|
|
32
|
+
传递给函数的关键字参数。
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
The result of calling the function or awaiting the coroutine function.
|
|
36
|
+
调用函数或等待协程函数的结果。
|
|
37
|
+
"""
|
|
38
|
+
# Check if the function is a coroutine function
|
|
39
|
+
# 检查函数是否为协程函数
|
|
40
|
+
if asyncio.iscoroutinefunction(fn):
|
|
41
|
+
# If it is, await it
|
|
42
|
+
# 如果是,则等待它
|
|
43
|
+
return await fn(*args, **kwargs)
|
|
44
|
+
# Otherwise, call it directly
|
|
45
|
+
# 否则,直接调用它
|
|
46
|
+
return fn(*args, **kwargs)
|
|
10
47
|
|
|
11
48
|
|
|
12
49
|
async def async_generator_wrapper(wrapped):
|
|
13
|
-
"""
|
|
50
|
+
"""
|
|
51
|
+
Convert any object into an asynchronous generator.
|
|
52
|
+
将任何对象转换为异步生成器。
|
|
53
|
+
|
|
54
|
+
This function takes any object and converts it into an AsyncGeneratorType.
|
|
55
|
+
It handles different types of input differently:
|
|
56
|
+
- AsyncGeneratorType: returned as is
|
|
57
|
+
- CoroutineType: wrapped in an async generator that yields the awaited result
|
|
58
|
+
- GeneratorType: wrapped in an async generator that yields each item
|
|
59
|
+
- Any other type: wrapped in an async generator that yields the object itself
|
|
60
|
+
|
|
61
|
+
此函数接受任何对象并将其转换为AsyncGeneratorType。
|
|
62
|
+
它对不同类型的输入有不同的处理方式:
|
|
63
|
+
- AsyncGeneratorType:按原样返回
|
|
64
|
+
- CoroutineType:包装在一个异步生成器中,该生成器产生等待的结果
|
|
65
|
+
- GeneratorType:包装在一个异步生成器中,该生成器产生每个项目
|
|
66
|
+
- 任何其他类型:包装在一个异步生成器中,该生成器产生对象本身
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
wrapped: The object to convert to an async generator.
|
|
70
|
+
要转换为异步生成器的对象。
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
AsyncGeneratorType: An asynchronous generator that yields the appropriate values.
|
|
74
|
+
产生适当值的异步生成器。
|
|
75
|
+
"""
|
|
76
|
+
# If it's already an async generator, return it as is
|
|
77
|
+
# 如果它已经是一个异步生成器,按原样返回它
|
|
14
78
|
if isinstance(wrapped, AsyncGeneratorType):
|
|
15
79
|
return wrapped
|
|
16
80
|
|
|
81
|
+
# If it's a coroutine, wrap it in an async generator that yields the awaited result
|
|
82
|
+
# 如果它是一个协程,将其包装在一个异步生成器中,该生成器产生等待的结果
|
|
17
83
|
elif isinstance(wrapped, CoroutineType):
|
|
18
84
|
async def anonymous(c):
|
|
19
85
|
yield await c
|
|
20
86
|
return anonymous(wrapped)
|
|
21
87
|
|
|
88
|
+
# If it's a generator, wrap it in an async generator that yields each item
|
|
89
|
+
# 如果它是一个生成器,将其包装在一个异步生成器中,该生成器产生每个项目
|
|
22
90
|
elif isinstance(wrapped, GeneratorType):
|
|
23
91
|
async def anonymous(c):
|
|
24
92
|
for r in c:
|
|
25
93
|
yield r
|
|
26
94
|
return anonymous(wrapped)
|
|
27
95
|
|
|
96
|
+
# For any other type, wrap it in an async generator that yields the object itself
|
|
97
|
+
# 对于任何其他类型,将其包装在一个异步生成器中,该生成器产生对象本身
|
|
28
98
|
else:
|
|
29
99
|
async def anonymous(c):
|
|
30
100
|
yield c
|
|
@@ -32,11 +102,45 @@ async def async_generator_wrapper(wrapped):
|
|
|
32
102
|
|
|
33
103
|
|
|
34
104
|
def singleton(cls):
|
|
105
|
+
"""
|
|
106
|
+
Decorator to implement the singleton pattern for a class.
|
|
107
|
+
为类实现单例模式的装饰器。
|
|
108
|
+
|
|
109
|
+
This decorator ensures that only one instance of the decorated class is created.
|
|
110
|
+
Subsequent calls to the class constructor will return the same instance.
|
|
111
|
+
此装饰器确保只创建一个被装饰类的实例。
|
|
112
|
+
对类构造函数的后续调用将返回相同的实例。
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
cls: The class to make a singleton.
|
|
116
|
+
要变成单例的类。
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
function: A wrapper function that implements the singleton pattern.
|
|
120
|
+
实现单例模式的包装函数。
|
|
121
|
+
|
|
122
|
+
Example:
|
|
123
|
+
@singleton
|
|
124
|
+
class MyClass:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
# These will be the same instance
|
|
128
|
+
# 这些将是相同的实例
|
|
129
|
+
instance1 = MyClass()
|
|
130
|
+
instance2 = MyClass()
|
|
131
|
+
assert instance1 is instance2
|
|
132
|
+
"""
|
|
133
|
+
# Dictionary to store class instances
|
|
134
|
+
# 用于存储类实例的字典
|
|
35
135
|
_instance = {}
|
|
36
136
|
|
|
37
137
|
def _singleton(*args, **kwargs):
|
|
138
|
+
# If the class doesn't have an instance yet, create one
|
|
139
|
+
# 如果类还没有实例,则创建一个
|
|
38
140
|
if cls not in _instance:
|
|
39
141
|
_instance[cls] = cls(*args, **kwargs)
|
|
142
|
+
# Return the existing instance
|
|
143
|
+
# 返回现有实例
|
|
40
144
|
return _instance[cls]
|
|
41
145
|
|
|
42
146
|
return _singleton
|
|
@@ -44,35 +148,105 @@ def singleton(cls):
|
|
|
44
148
|
|
|
45
149
|
def exec_js_func(js_file_path, func_name, func_params=None, cwd_path=None, cmd_path='node'):
|
|
46
150
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
151
|
+
Execute a JavaScript function using Node.js.
|
|
152
|
+
使用Node.js执行JavaScript函数。
|
|
153
|
+
|
|
154
|
+
This function reads a JavaScript file, compiles it using Node.js, and calls
|
|
155
|
+
a specified function with the provided parameters. It's useful for executing
|
|
156
|
+
JavaScript code that can't be easily translated to Python, such as browser
|
|
157
|
+
fingerprinting or encryption algorithms.
|
|
158
|
+
此函数读取JavaScript文件,使用Node.js编译它,并使用提供的参数调用指定的函数。
|
|
159
|
+
它对于执行不容易转换为Python的JavaScript代码很有用,例如浏览器指纹或加密算法。
|
|
160
|
+
|
|
161
|
+
Note:
|
|
162
|
+
This function requires the PyExecJS package to be installed.
|
|
163
|
+
此函数需要安装PyExecJS包。
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
js_file_path (str): Path to the JavaScript file.
|
|
167
|
+
JavaScript文件的路径。
|
|
168
|
+
func_name (str): Name of the function to call in the JavaScript file.
|
|
169
|
+
要在JavaScript文件中调用的函数名称。
|
|
170
|
+
func_params (list, optional): Parameters to pass to the JavaScript function.
|
|
171
|
+
要传递给JavaScript函数的参数。
|
|
172
|
+
cwd_path (str, optional): Path to the directory containing node_modules.
|
|
173
|
+
包含node_modules的目录路径。
|
|
174
|
+
If not specified, global node_modules will be used.
|
|
175
|
+
如果未指定,将使用全局的node_modules。
|
|
176
|
+
cmd_path (str, optional): Path to the Node.js executable.
|
|
177
|
+
Node.js可执行文件的路径。
|
|
178
|
+
Default is 'node', which assumes Node.js is in PATH.
|
|
179
|
+
默认为'node',这假设Node.js在PATH中。
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
The result of the JavaScript function call.
|
|
183
|
+
JavaScript函数调用的结果。
|
|
184
|
+
|
|
185
|
+
Raises:
|
|
186
|
+
ImportError: If PyExecJS is not installed.
|
|
187
|
+
如果未安装PyExecJS。
|
|
188
|
+
FileNotFoundError: If the JavaScript file or Node.js executable is not found.
|
|
189
|
+
如果找不到JavaScript文件或Node.js可执行文件。
|
|
54
190
|
"""
|
|
191
|
+
# Import execjs here to avoid making it a required dependency
|
|
192
|
+
# 在这里导入execjs以避免使其成为必需的依赖项
|
|
55
193
|
import execjs
|
|
56
194
|
|
|
195
|
+
# Initialize function parameters if None
|
|
196
|
+
# 如果为None,则初始化函数参数
|
|
57
197
|
if func_params is None:
|
|
58
198
|
func_params = []
|
|
59
|
-
|
|
60
|
-
|
|
199
|
+
|
|
200
|
+
# Register a custom Node.js runtime
|
|
201
|
+
# 注册自定义Node.js运行时
|
|
202
|
+
node_runtime_name = "MyNode"
|
|
203
|
+
execjs.register(node_runtime_name, execjs._external_runtime.ExternalRuntime(
|
|
61
204
|
name="Node.js (V8)",
|
|
62
205
|
command=[cmd_path],
|
|
63
206
|
encoding='UTF-8',
|
|
64
207
|
runner_source=execjs._runner_sources.Node
|
|
65
208
|
))
|
|
209
|
+
|
|
210
|
+
# Read the JavaScript file
|
|
211
|
+
# 读取JavaScript文件
|
|
66
212
|
with open(js_file_path, 'r', encoding='utf-8') as f:
|
|
67
213
|
lines = f.readlines()
|
|
68
214
|
js = ''.join(lines)
|
|
69
|
-
|
|
215
|
+
|
|
216
|
+
# Compile the JavaScript code and call the function
|
|
217
|
+
# 编译JavaScript代码并调用函数
|
|
218
|
+
js_context = execjs.get(node_runtime_name).compile(js, cwd=cwd_path)
|
|
70
219
|
return js_context.call(func_name, *func_params)
|
|
71
220
|
|
|
72
221
|
|
|
73
222
|
def create_task(coros, name=None):
|
|
74
|
-
"""
|
|
223
|
+
"""
|
|
224
|
+
Create an asyncio task with the current task's name.
|
|
225
|
+
创建具有当前任务名称的asyncio任务。
|
|
226
|
+
|
|
227
|
+
This function creates an asyncio task from a coroutine and sets its name
|
|
228
|
+
to the name of the current task. This helps with task tracking and debugging
|
|
229
|
+
by maintaining the task hierarchy in the task names.
|
|
230
|
+
此函数从协程创建asyncio任务,并将其名称设置为当前任务的名称。
|
|
231
|
+
这通过在任务名称中维护任务层次结构来帮助任务跟踪和调试。
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
coros: The coroutine to schedule for execution.
|
|
235
|
+
要安排执行的协程。
|
|
236
|
+
name: Optional name for the task. If not provided, the current task's name is used.
|
|
237
|
+
任务的可选名称。如果未提供,则使用当前任务的名称。
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
asyncio.Task: The created task.
|
|
241
|
+
创建的任务。
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
RuntimeError: If there is no current task.
|
|
245
|
+
如果没有当前任务。
|
|
246
|
+
"""
|
|
247
|
+
# Create a new task with the coroutine and inherit the current task's name
|
|
248
|
+
# 使用协程创建新任务并继承当前任务的名称
|
|
75
249
|
return asyncio.create_task(
|
|
76
|
-
coros,
|
|
250
|
+
coros,
|
|
77
251
|
name=asyncio.current_task().get_name()
|
|
78
252
|
)
|
aioscrapy/utils/trackref.py
CHANGED
|
@@ -1,12 +1,26 @@
|
|
|
1
|
-
"""
|
|
2
|
-
|
|
1
|
+
"""
|
|
2
|
+
Object reference tracking utilities for aioscrapy.
|
|
3
|
+
aioscrapy的对象引用跟踪实用工具。
|
|
4
|
+
|
|
5
|
+
This module provides functions and classes to record and report references to
|
|
6
|
+
live object instances. It's useful for debugging memory leaks and tracking
|
|
7
|
+
object lifetimes in aioscrapy applications.
|
|
8
|
+
此模块提供了用于记录和报告活动对象实例引用的函数和类。
|
|
9
|
+
它对于调试内存泄漏和跟踪aioscrapy应用程序中的对象生命周期很有用。
|
|
3
10
|
|
|
11
|
+
Usage:
|
|
12
|
+
使用方法:
|
|
4
13
|
If you want live objects for a particular class to be tracked, you only have to
|
|
5
14
|
subclass from object_ref (instead of object).
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
15
|
+
如果您希望跟踪特定类的活动对象,只需从object_ref(而不是object)继承即可。
|
|
16
|
+
|
|
17
|
+
About performance:
|
|
18
|
+
关于性能:
|
|
19
|
+
This library has a minimal performance impact when enabled, and no performance
|
|
20
|
+
penalty at all when disabled (as object_ref becomes just an alias to object
|
|
21
|
+
in that case).
|
|
22
|
+
启用时,此库对性能的影响最小,禁用时完全没有性能损失
|
|
23
|
+
(因为在这种情况下,object_ref只是object的别名)。
|
|
10
24
|
"""
|
|
11
25
|
|
|
12
26
|
from collections import defaultdict
|
|
@@ -16,52 +30,224 @@ from typing import DefaultDict
|
|
|
16
30
|
from weakref import WeakKeyDictionary
|
|
17
31
|
|
|
18
32
|
|
|
33
|
+
# Type alias for the None type, used for default ignore parameter in format_live_refs
|
|
34
|
+
# None类型的类型别名,用于format_live_refs中的默认ignore参数
|
|
19
35
|
NoneType = type(None)
|
|
36
|
+
|
|
37
|
+
# Global dictionary that maps classes to WeakKeyDictionaries of their instances
|
|
38
|
+
# 全局字典,将类映射到其实例的WeakKeyDictionary
|
|
39
|
+
# The WeakKeyDictionary for each class maps object instances to their creation time
|
|
40
|
+
# 每个类的WeakKeyDictionary将对象实例映射到它们的创建时间
|
|
20
41
|
live_refs: DefaultDict[type, WeakKeyDictionary] = defaultdict(WeakKeyDictionary)
|
|
21
42
|
|
|
22
43
|
|
|
23
44
|
class object_ref:
|
|
24
|
-
"""
|
|
25
|
-
|
|
45
|
+
"""
|
|
46
|
+
Base class for tracking live object instances.
|
|
47
|
+
用于跟踪活动对象实例的基类。
|
|
48
|
+
|
|
49
|
+
Inherit from this class instead of directly from 'object' to enable tracking
|
|
50
|
+
of instances of your class. Each instance will be recorded in the global
|
|
51
|
+
'live_refs' dictionary with its creation time.
|
|
52
|
+
从此类继承而不是直接从'object'继承,以启用对您的类实例的跟踪。
|
|
53
|
+
每个实例都将与其创建时间一起记录在全局'live_refs'字典中。
|
|
54
|
+
|
|
55
|
+
This tracking uses weak references, so it doesn't prevent garbage collection
|
|
56
|
+
of objects that are no longer referenced elsewhere.
|
|
57
|
+
此跟踪使用弱引用,因此不会阻止垃圾收集不再在其他地方引用的对象。
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
# Using __slots__ to reduce memory footprint
|
|
61
|
+
# 使用__slots__减少内存占用
|
|
26
62
|
__slots__ = ()
|
|
27
63
|
|
|
28
64
|
def __new__(cls, *args, **kwargs):
|
|
65
|
+
"""
|
|
66
|
+
Create a new instance and register it in the tracking system.
|
|
67
|
+
创建一个新实例并将其注册到跟踪系统中。
|
|
68
|
+
|
|
69
|
+
This method creates a new instance of the class and adds it to the
|
|
70
|
+
'live_refs' dictionary with the current timestamp. This allows tracking
|
|
71
|
+
when the object was created and how many instances exist.
|
|
72
|
+
此方法创建类的新实例,并将其与当前时间戳一起添加到'live_refs'字典中。
|
|
73
|
+
这允许跟踪对象的创建时间以及存在多少个实例。
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
*args: Variable length argument list passed to the constructor.
|
|
77
|
+
传递给构造函数的可变长度参数列表。
|
|
78
|
+
**kwargs: Arbitrary keyword arguments passed to the constructor.
|
|
79
|
+
传递给构造函数的任意关键字参数。
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
The newly created object instance.
|
|
83
|
+
新创建的对象实例。
|
|
84
|
+
"""
|
|
85
|
+
# Create the object using the standard object.__new__
|
|
86
|
+
# 使用标准的object.__new__创建对象
|
|
29
87
|
obj = object.__new__(cls)
|
|
88
|
+
|
|
89
|
+
# Register the object in live_refs with current timestamp
|
|
90
|
+
# 使用当前时间戳在live_refs中注册对象
|
|
30
91
|
live_refs[cls][obj] = time()
|
|
92
|
+
|
|
31
93
|
return obj
|
|
32
94
|
|
|
33
95
|
|
|
34
96
|
def format_live_refs(ignore=NoneType):
|
|
35
|
-
"""
|
|
97
|
+
"""
|
|
98
|
+
Generate a formatted table of tracked live objects.
|
|
99
|
+
生成跟踪的活动对象的格式化表格。
|
|
100
|
+
|
|
101
|
+
This function creates a human-readable table showing statistics about
|
|
102
|
+
tracked objects, including:
|
|
103
|
+
- The class name
|
|
104
|
+
- The number of live instances
|
|
105
|
+
- The age of the oldest instance
|
|
106
|
+
|
|
107
|
+
此函数创建一个人类可读的表格,显示有关跟踪对象的统计信息,包括:
|
|
108
|
+
- 类名
|
|
109
|
+
- 活动实例的数量
|
|
110
|
+
- 最旧实例的年龄
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
ignore: A class or type to ignore in the output. Instances of this class
|
|
114
|
+
and its subclasses will not be included in the report.
|
|
115
|
+
要在输出中忽略的类或类型。此类及其子类的实例将不会包含在报告中。
|
|
116
|
+
Default is NoneType.
|
|
117
|
+
默认为NoneType。
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
str: A formatted string containing the tabular representation of tracked objects.
|
|
121
|
+
包含跟踪对象的表格表示的格式化字符串。
|
|
122
|
+
"""
|
|
123
|
+
# Start with a header
|
|
124
|
+
# 以标题开始
|
|
36
125
|
s = "Live References\n\n"
|
|
126
|
+
|
|
127
|
+
# Get current time for age calculation
|
|
128
|
+
# 获取当前时间用于年龄计算
|
|
37
129
|
now = time()
|
|
130
|
+
|
|
131
|
+
# Sort classes by name for consistent output
|
|
132
|
+
# 按名称对类进行排序以获得一致的输出
|
|
38
133
|
for cls, wdict in sorted(live_refs.items(),
|
|
39
134
|
key=lambda x: x[0].__name__):
|
|
135
|
+
# Skip empty dictionaries (no instances)
|
|
136
|
+
# 跳过空字典(没有实例)
|
|
40
137
|
if not wdict:
|
|
41
138
|
continue
|
|
139
|
+
|
|
140
|
+
# Skip ignored classes and their subclasses
|
|
141
|
+
# 跳过被忽略的类及其子类
|
|
42
142
|
if issubclass(cls, ignore):
|
|
43
143
|
continue
|
|
144
|
+
|
|
145
|
+
# Find the oldest instance
|
|
146
|
+
# 查找最旧的实例
|
|
44
147
|
oldest = min(wdict.values())
|
|
148
|
+
|
|
149
|
+
# Format the line: class name, instance count, and age of oldest instance
|
|
150
|
+
# 格式化行:类名、实例计数和最旧实例的年龄
|
|
45
151
|
s += f"{cls.__name__:<30} {len(wdict):6} oldest: {int(now - oldest)}s ago\n"
|
|
152
|
+
|
|
46
153
|
return s
|
|
47
154
|
|
|
48
155
|
|
|
49
156
|
def print_live_refs(*a, **kw):
|
|
50
|
-
"""
|
|
157
|
+
"""
|
|
158
|
+
Print a formatted table of tracked live objects to stdout.
|
|
159
|
+
将跟踪的活动对象的格式化表格打印到标准输出。
|
|
160
|
+
|
|
161
|
+
This is a convenience function that calls format_live_refs() and prints
|
|
162
|
+
the result to the standard output. It's useful for interactive debugging
|
|
163
|
+
or for logging object tracking information.
|
|
164
|
+
这是一个便捷函数,它调用format_live_refs()并将结果打印到标准输出。
|
|
165
|
+
它对于交互式调试或记录对象跟踪信息很有用。
|
|
166
|
+
|
|
167
|
+
Args:
|
|
168
|
+
*a: Positional arguments passed to format_live_refs().
|
|
169
|
+
传递给format_live_refs()的位置参数。
|
|
170
|
+
**kw: Keyword arguments passed to format_live_refs().
|
|
171
|
+
传递给format_live_refs()的关键字参数。
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
None
|
|
175
|
+
"""
|
|
176
|
+
# Format the live references and print the result
|
|
177
|
+
# 格式化活动引用并打印结果
|
|
51
178
|
print(format_live_refs(*a, **kw))
|
|
52
179
|
|
|
53
180
|
|
|
54
181
|
def get_oldest(class_name):
|
|
55
|
-
"""
|
|
182
|
+
"""
|
|
183
|
+
Get the oldest tracked instance of a class by its name.
|
|
184
|
+
通过名称获取类的最旧跟踪实例。
|
|
185
|
+
|
|
186
|
+
This function finds the oldest (longest-living) instance of a class
|
|
187
|
+
with the given name. The age is determined by the timestamp recorded
|
|
188
|
+
when the object was created.
|
|
189
|
+
此函数查找具有给定名称的类的最旧(存活时间最长)实例。
|
|
190
|
+
年龄由创建对象时记录的时间戳确定。
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
class_name: The name of the class to search for.
|
|
194
|
+
要搜索的类的名称。
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
object: The oldest instance of the specified class, or None if no
|
|
198
|
+
instances are found.
|
|
199
|
+
指定类的最旧实例,如果未找到实例则为None。
|
|
200
|
+
"""
|
|
201
|
+
# Iterate through all tracked classes
|
|
202
|
+
# 遍历所有跟踪的类
|
|
56
203
|
for cls, wdict in live_refs.items():
|
|
204
|
+
# Find the class with the matching name
|
|
205
|
+
# 查找具有匹配名称的类
|
|
57
206
|
if cls.__name__ == class_name:
|
|
207
|
+
# If there are no instances, return None
|
|
208
|
+
# 如果没有实例,返回None
|
|
58
209
|
if not wdict:
|
|
59
210
|
break
|
|
211
|
+
|
|
212
|
+
# Find the instance with the minimum timestamp (oldest)
|
|
213
|
+
# 查找具有最小时间戳(最旧)的实例
|
|
60
214
|
return min(wdict.items(), key=itemgetter(1))[0]
|
|
61
215
|
|
|
216
|
+
# Return None if no matching class or instances are found
|
|
217
|
+
# 如果未找到匹配的类或实例,则返回None
|
|
218
|
+
return None
|
|
219
|
+
|
|
62
220
|
|
|
63
221
|
def iter_all(class_name):
|
|
64
|
-
"""
|
|
222
|
+
"""
|
|
223
|
+
Iterate over all tracked instances of a class by its name.
|
|
224
|
+
通过名称迭代类的所有跟踪实例。
|
|
225
|
+
|
|
226
|
+
This function returns an iterator over all live instances of a class
|
|
227
|
+
with the given name. It's useful for inspecting or manipulating all
|
|
228
|
+
instances of a particular class during debugging.
|
|
229
|
+
此函数返回具有给定名称的类的所有活动实例的迭代器。
|
|
230
|
+
它对于在调试期间检查或操作特定类的所有实例很有用。
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
class_name: The name of the class to search for.
|
|
234
|
+
要搜索的类的名称。
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
iterator: An iterator over all instances of the specified class,
|
|
238
|
+
or None if no matching class is found.
|
|
239
|
+
指定类的所有实例的迭代器,如果未找到匹配的类则为None。
|
|
240
|
+
"""
|
|
241
|
+
# Iterate through all tracked classes
|
|
242
|
+
# 遍历所有跟踪的类
|
|
65
243
|
for cls, wdict in live_refs.items():
|
|
244
|
+
# Find the class with the matching name
|
|
245
|
+
# 查找具有匹配名称的类
|
|
66
246
|
if cls.__name__ == class_name:
|
|
247
|
+
# Return an iterator over all instances of this class
|
|
248
|
+
# 返回此类的所有实例的迭代器
|
|
67
249
|
return wdict.keys()
|
|
250
|
+
|
|
251
|
+
# Return None if no matching class is found
|
|
252
|
+
# 如果未找到匹配的类,则返回None
|
|
253
|
+
return None
|