PyPI - aio-scrapy - Versions diffs - 2.1.4__py3-none-any.whl → 2.1.7__py3-none-any.whl - Mend

aio-scrapy 2.1.4py3-none-any.whl → 2.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

{aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/LICENSE +1 -1
aio_scrapy-2.1.7.dist-info/METADATA +147 -0
aio_scrapy-2.1.7.dist-info/RECORD +134 -0
{aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/WHEEL +1 -1
aioscrapy/VERSION +1 -1
aioscrapy/cmdline.py +438 -5
aioscrapy/core/downloader/__init__.py +522 -17
aioscrapy/core/downloader/handlers/__init__.py +187 -5
aioscrapy/core/downloader/handlers/aiohttp.py +190 -6
aioscrapy/core/downloader/handlers/curl_cffi.py +126 -5
aioscrapy/core/downloader/handlers/httpx.py +135 -5
aioscrapy/core/downloader/handlers/pyhttpx.py +137 -5
aioscrapy/core/downloader/handlers/requests.py +120 -2
aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
aioscrapy/core/engine.py +381 -20
aioscrapy/core/scheduler.py +350 -36
aioscrapy/core/scraper.py +509 -33
aioscrapy/crawler.py +392 -10
aioscrapy/db/__init__.py +149 -0
aioscrapy/db/absmanager.py +212 -6
aioscrapy/db/aiomongo.py +292 -10
aioscrapy/db/aiomysql.py +363 -10
aioscrapy/db/aiopg.py +299 -2
aioscrapy/db/aiorabbitmq.py +444 -4
aioscrapy/db/aioredis.py +260 -11
aioscrapy/dupefilters/__init__.py +110 -5
aioscrapy/dupefilters/disk.py +124 -2
aioscrapy/dupefilters/redis.py +598 -32
aioscrapy/exceptions.py +151 -13
aioscrapy/http/__init__.py +1 -1
aioscrapy/http/headers.py +237 -3
aioscrapy/http/request/__init__.py +257 -11
aioscrapy/http/request/form.py +83 -3
aioscrapy/http/request/json_request.py +121 -9
aioscrapy/http/response/__init__.py +306 -33
aioscrapy/http/response/html.py +42 -3
aioscrapy/http/response/text.py +496 -49
aioscrapy/http/response/web_driver.py +144 -0
aioscrapy/http/response/xml.py +45 -3
aioscrapy/libs/downloader/defaultheaders.py +66 -2
aioscrapy/libs/downloader/downloadtimeout.py +91 -2
aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
aioscrapy/libs/downloader/retry.py +192 -6
aioscrapy/libs/downloader/stats.py +142 -0
aioscrapy/libs/downloader/useragent.py +93 -2
aioscrapy/libs/extensions/closespider.py +166 -4
aioscrapy/libs/extensions/corestats.py +151 -1
aioscrapy/libs/extensions/logstats.py +145 -1
aioscrapy/libs/extensions/metric.py +370 -1
aioscrapy/libs/extensions/throttle.py +235 -1
aioscrapy/libs/pipelines/__init__.py +345 -2
aioscrapy/libs/pipelines/csv.py +242 -0
aioscrapy/libs/pipelines/excel.py +545 -0
aioscrapy/libs/pipelines/mongo.py +132 -0
aioscrapy/libs/pipelines/mysql.py +67 -0
aioscrapy/libs/pipelines/pg.py +67 -0
aioscrapy/libs/spider/depth.py +141 -3
aioscrapy/libs/spider/httperror.py +144 -4
aioscrapy/libs/spider/offsite.py +202 -2
aioscrapy/libs/spider/referer.py +396 -21
aioscrapy/libs/spider/urllength.py +97 -1
aioscrapy/link.py +115 -8
aioscrapy/logformatter.py +199 -8
aioscrapy/middleware/absmanager.py +328 -2
aioscrapy/middleware/downloader.py +218 -0
aioscrapy/middleware/extension.py +50 -1
aioscrapy/middleware/itempipeline.py +96 -0
aioscrapy/middleware/spider.py +360 -7
aioscrapy/process.py +200 -0
aioscrapy/proxy/__init__.py +142 -3
aioscrapy/proxy/redis.py +136 -2
aioscrapy/queue/__init__.py +168 -16
aioscrapy/scrapyd/runner.py +124 -3
aioscrapy/serializer.py +182 -2
aioscrapy/settings/__init__.py +610 -128
aioscrapy/settings/default_settings.py +314 -14
aioscrapy/signalmanager.py +151 -20
aioscrapy/signals.py +183 -1
aioscrapy/spiderloader.py +165 -12
aioscrapy/spiders/__init__.py +233 -6
aioscrapy/statscollectors.py +312 -1
aioscrapy/utils/conf.py +345 -17
aioscrapy/utils/curl.py +168 -16
aioscrapy/utils/decorators.py +76 -6
aioscrapy/utils/deprecate.py +212 -19
aioscrapy/utils/httpobj.py +55 -3
aioscrapy/utils/log.py +79 -0
aioscrapy/utils/misc.py +189 -21
aioscrapy/utils/ossignal.py +67 -5
aioscrapy/utils/project.py +165 -3
aioscrapy/utils/python.py +254 -44
aioscrapy/utils/reqser.py +75 -1
aioscrapy/utils/request.py +173 -12
aioscrapy/utils/response.py +91 -6
aioscrapy/utils/signal.py +196 -14
aioscrapy/utils/spider.py +51 -4
aioscrapy/utils/template.py +93 -6
aioscrapy/utils/tools.py +191 -17
aioscrapy/utils/trackref.py +198 -12
aioscrapy/utils/url.py +341 -36
aio_scrapy-2.1.4.dist-info/METADATA +0 -239
aio_scrapy-2.1.4.dist-info/RECORD +0 -133
aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
aioscrapy/http/response/playwright.py +0 -36
aioscrapy/libs/pipelines/execl.py +0 -169
{aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/entry_points.txt +0 -0
{aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.7.dist-info}/top_level.txt +0 -0

aioscrapy/libs/spider/referer.py CHANGED Viewed

@@ -1,6 +1,25 @@
 """
-RefererMiddleware: populates Request referer field, based on the Response which
-originated it.
+Referer Middleware for AioScrapy
+AioScrapy的Referer中间件
+This middleware populates the 'Referer' HTTP header in requests based on the response
+that generated them. It implements various referrer policies as defined in the W3C
+Referrer Policy specification, allowing control over what information is included
+in the Referer header for privacy and security reasons.
+此中间件根据生成请求的响应填充请求中的'Referer' HTTP头。它实现了W3C Referer Policy
+规范中定义的各种引用策略，允许出于隐私和安全原因控制Referer头中包含的信息。
+The middleware supports all standard referrer policies:
+中间件支持所有标准的引用策略：
+- no-referrer
+- no-referrer-when-downgrade
+- same-origin
+- origin
+- strict-origin
+- origin-when-cross-origin
+- strict-origin-when-cross-origin
+- unsafe-url
+- aioscrapy-default (a variant of no-referrer-when-downgrade)
 """
 import warnings
 from typing import Tuple
@@ -30,24 +49,102 @@ POLICY_AIOSCRAPY_DEFAULT = "aioscrapy-default"
 class ReferrerPolicy:
+    """
+    Base class for implementing W3C Referrer Policy.
+    实现W3C引用策略的基类。
+    This abstract class defines the interface and common functionality for all
+    referrer policy implementations. Each subclass implements a specific policy
+    from the W3C Referrer Policy specification.
+    这个抽象类为所有引用策略实现定义了接口和通用功能。每个子类实现W3C引用策略
+    规范中的特定策略。
+    Reference: https://www.w3.org/TR/referrer-policy/
+    参考：https://www.w3.org/TR/referrer-policy/
+    """
+    # Schemes that should never send a referrer
+    # 永远不应该发送引用的方案
     NOREFERRER_SCHEMES: Tuple[str, ...] = LOCAL_SCHEMES
+    # Policy name (to be defined by subclasses)
+    # 策略名称（由子类定义）
     name: str
     def referrer(self, response_url, request_url):
+        """
+        Determine the referrer value based on the policy.
+        根据策略确定引用值。
+        This method must be implemented by subclasses to determine what referrer
+        value (if any) should be sent for a request, based on the response URL
+        that generated the request and the request URL.
+        此方法必须由子类实现，以根据生成请求的响应URL和请求URL确定应为请求
+        发送什么引用值（如果有）。
+        Args:
+            response_url: The URL of the response that generated the request.
+                         生成请求的响应的URL。
+            request_url: The URL of the request being made.
+                        正在发出的请求的URL。
+        Returns:
+            str or None: The referrer value to use, or None if no referrer should be sent.
+                        要使用的引用值，如果不应发送引用，则为None。
+        """
         raise NotImplementedError()
     def stripped_referrer(self, url):
+        """
+        Return a stripped version of the URL suitable for use as a referrer.
+        返回适合用作引用的URL的剥离版本。
+        This method strips sensitive information from a URL according to the
+        referrer policy specification.
+        此方法根据引用策略规范从URL中剥离敏感信息。
+        Args:
+            url: The URL to strip.
+                要剥离的URL。
+        Returns:
+            str or None: The stripped URL, or None if the URL uses a scheme that
+                        should never send a referrer.
+                        剥离后的URL，如果URL使用的方案永远不应该发送引用，则为None。
+        """
         if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
             return self.strip_url(url)
+        return None
     def origin_referrer(self, url):
+        """
+        Return only the origin portion of a URL for use as a referrer.
+        仅返回URL的源部分以用作引用。
+        This method returns just the scheme, host, and port of a URL, which is
+        useful for policies that only send the origin as the referrer.
+        此方法仅返回URL的方案、主机和端口，这对于仅发送源作为引用的策略很有用。
+        Args:
+            url: The URL to get the origin from.
+                要获取源的URL。
+        Returns:
+            str or None: The origin of the URL, or None if the URL uses a scheme that
+                        should never send a referrer.
+                        URL的源，如果URL使用的方案永远不应该发送引用，则为None。
+        """
         if urlparse(url).scheme not in self.NOREFERRER_SCHEMES:
             return self.origin(url)
+        return None
     def strip_url(self, url, origin_only=False):
         """
-        https://www.w3.org/TR/referrer-policy/#strip-url
+        Strip a URL according to the referrer policy specification.
+        根据引用策略规范剥离URL。
+        Reference: https://www.w3.org/TR/referrer-policy/#strip-url
+        参考：https://www.w3.org/TR/referrer-policy/#strip-url
         If url is null, return no referrer.
         If url's scheme is a local scheme, then return no referrer.
@@ -58,6 +155,16 @@ class ReferrerPolicy:
             Set url's path to null.
             Set url's query to null.
         Return url.
+        Args:
+            url: The URL to strip.
+                要剥离的URL。
+            origin_only: Whether to strip the URL to just its origin.
+                        是否将URL剥离为仅其源。
+        Returns:
+            str or None: The stripped URL, or None if the URL is empty.
+                        剥离后的URL，如果URL为空，则为None。
         """
         if not url:
             return None
@@ -68,10 +175,45 @@ class ReferrerPolicy:
                          origin_only=origin_only)
     def origin(self, url):
-        """Return serialized origin (scheme, host, path) for a request or response URL."""
+        """
+        Return serialized origin (scheme, host, port) for a URL.
+        返回URL的序列化源（方案、主机、端口）。
+        The origin of a URL is just its scheme, host, and port, without path,
+        query, or fragment.
+        URL的源只是其方案、主机和端口，没有路径、查询或片段。
+        Args:
+            url: The URL to get the origin from.
+                要获取源的URL。
+        Returns:
+            str or None: The origin of the URL, or None if the URL is empty.
+                        URL的源，如果URL为空，则为None。
+        """
         return self.strip_url(url, origin_only=True)
     def potentially_trustworthy(self, url):
+        """
+        Determine if a URL is potentially trustworthy.
+        确定URL是否可能值得信任。
+        This is a simplified implementation that considers HTTPS and FTPS URLs
+        as potentially trustworthy, and data URLs as not trustworthy.
+        这是一个简化的实现，将HTTPS和FTPS URL视为可能值得信任，将数据URL视为不值得信任。
+        Note: this does not follow the full algorithm from:
+        注意：这不遵循以下完整算法：
+        https://w3c.github.io/webappsec-secure-contexts/#is-url-trustworthy
+        Args:
+            url: The URL to check.
+                要检查的URL。
+        Returns:
+            bool: True if the URL is potentially trustworthy, False otherwise.
+                 如果URL可能值得信任，则为True，否则为False。
+        """
         # Note: this does not follow https://w3c.github.io/webappsec-secure-contexts/#is-url-trustworthy
         parsed_url = urlparse(url)
         if parsed_url.scheme in ('data',):
@@ -79,20 +221,57 @@ class ReferrerPolicy:
         return self.tls_protected(url)
     def tls_protected(self, url):
+        """
+        Determine if a URL is protected by TLS (HTTPS or FTPS).
+        确定URL是否受TLS（HTTPS或FTPS）保护。
+        Args:
+            url: The URL to check.
+                要检查的URL。
+        Returns:
+            bool: True if the URL uses HTTPS or FTPS, False otherwise.
+                 如果URL使用HTTPS或FTPS，则为True，否则为False。
+        """
         return urlparse(url).scheme in ('https', 'ftps')
 class NoReferrerPolicy(ReferrerPolicy):
     """
-    https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer
+    Implementation of the "no-referrer" referrer policy.
+    "no-referrer"引用策略的实现。
+    Reference: https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer
+    参考：https://www.w3.org/TR/referrer-policy/#referrer-policy-no-referrer
     The simplest policy is "no-referrer", which specifies that no referrer information
     is to be sent along with requests made from a particular request client to any origin.
     The header will be omitted entirely.
+    最简单的策略是"no-referrer"，它指定不随特定请求客户端向任何源发出的请求
+    发送任何引用信息。头将完全省略。
     """
+    # Policy name
+    # 策略名称
     name: str = POLICY_NO_REFERRER
     def referrer(self, response_url, request_url):
+        """
+        Determine the referrer value based on the no-referrer policy.
+        根据no-referrer策略确定引用值。
+        This policy always returns None, meaning no Referer header should be sent.
+        此策略始终返回None，表示不应发送Referer头。
+        Args:
+            response_url: The URL of the response that generated the request.
+                         生成请求的响应的URL。
+            request_url: The URL of the request being made.
+                        正在发出的请求的URL。
+        Returns:
+            None: Always returns None, indicating no referrer should be sent.
+                 始终返回None，表示不应发送引用。
+        """
         return None
@@ -270,16 +449,42 @@ _policy_classes[''] = NoReferrerWhenDowngradePolicy
 def _load_policy_class(policy, warning_only=False):
     """
-    Expect a string for the path to the policy class,
-    otherwise try to interpret the string as a standard value
-    from https://www.w3.org/TR/referrer-policy/#referrer-policies
+    Load a referrer policy class by name or path.
+    通过名称或路径加载引用策略类。
+    This function attempts to load a referrer policy class either by importing it
+    from a path or by looking it up in the standard policy classes dictionary.
+    此函数尝试通过从路径导入或在标准策略类字典中查找来加载引用策略类。
+    Args:
+        policy: A string representing either a path to a policy class or a standard
+               policy name from the W3C Referrer Policy specification.
+               表示策略类路径或W3C引用策略规范中的标准策略名称的字符串。
+        warning_only: If True, warnings will be issued instead of raising exceptions
+                     when a policy cannot be loaded.
+                     如果为True，当无法加载策略时将发出警告而不是引发异常。
+    Returns:
+        A referrer policy class, or None if the policy could not be loaded and
+        warning_only is True.
+        引用策略类，如果无法加载策略且warning_only为True，则为None。
+    Raises:
+        RuntimeError: If the policy could not be loaded and warning_only is False.
+                     如果无法加载策略且warning_only为False，则引发RuntimeError。
     """
     try:
+        # Try to load the policy as a Python object (e.g., 'mymodule.MyPolicy')
+        # 尝试将策略作为Python对象加载（例如，'mymodule.MyPolicy'）
         return load_object(policy)
     except ValueError:
         try:
+            # Try to load the policy as a standard policy name
+            # 尝试将策略作为标准策略名称加载
             return _policy_classes[policy.lower()]
         except KeyError:
+            # Policy could not be loaded
+            # 无法加载策略
             msg = f"Could not load referrer policy {policy!r}"
             if not warning_only:
                 raise RuntimeError(msg)
@@ -289,76 +494,246 @@ def _load_policy_class(policy, warning_only=False):
 class RefererMiddleware:
+    """
+    Middleware for populating the 'Referer' HTTP header in requests.
+    用于填充请求中的'Referer' HTTP头的中间件。
+    This middleware sets the 'Referer' HTTP header in requests based on the response
+    that generated them, following the W3C Referrer Policy specification. It allows
+    control over what information is included in the Referer header for privacy and
+    security reasons.
+    此中间件根据生成请求的响应设置请求中的'Referer' HTTP头，遵循W3C引用策略规范。
+    它允许出于隐私和安全原因控制Referer头中包含的信息。
+    """
     def __init__(self, settings=None):
+        """
+        Initialize the RefererMiddleware.
+        初始化RefererMiddleware。
+        Args:
+            settings: The AioScrapy settings object.
+                     AioScrapy设置对象。
+                     If None, the default policy will be used.
+                     如果为None，将使用默认策略。
+        """
+        # Set the default policy
+        # 设置默认策略
         self.default_policy = DefaultReferrerPolicy
+        # If settings are provided, load the policy from settings
+        # 如果提供了设置，从设置加载策略
         if settings is not None:
             self.default_policy = _load_policy_class(
                 settings.get('REFERRER_POLICY'))
     @classmethod
     def from_crawler(cls, crawler):
+        """
+        Create a RefererMiddleware instance from a crawler.
+        从爬虫创建RefererMiddleware实例。
+        This is the factory method used by AioScrapy to create the middleware.
+        这是AioScrapy用于创建中间件的工厂方法。
+        Args:
+            crawler: The crawler that will use this middleware.
+                    将使用此中间件的爬虫。
+        Returns:
+            RefererMiddleware: A new RefererMiddleware instance.
+                              一个新的RefererMiddleware实例。
+        Raises:
+            NotConfigured: If REFERER_ENABLED is False in the crawler settings.
+                          如果爬虫设置中的REFERER_ENABLED为False。
+        """
+        # Check if the middleware is enabled
+        # 检查中间件是否已启用
         if not crawler.settings.getbool('REFERER_ENABLED'):
             raise NotConfigured
+        # Create a new instance with the crawler's settings
+        # 使用爬虫的设置创建一个新实例
         mw = cls(crawler.settings)
+        # Connect the request_scheduled method to the request_scheduled signal
+        # to handle redirections
+        # 将request_scheduled方法连接到request_scheduled信号以处理重定向
         # Note: this hook is a bit of a hack to intercept redirections
+        # 注意：这个钩子有点像一个黑客，用于拦截重定向
         crawler.signals.connect(mw.request_scheduled, signal=signals.request_scheduled)
+        # Return the new instance
+        # 返回新实例
         return mw
     def policy(self, resp_or_url, request):
         """
-        Determine Referrer-Policy to use from a parent Response (or URL),
-        and a Request to be sent.
-        - if a valid policy is set in Request meta, it is used.
-        - if the policy is set in meta but is wrong (e.g. a typo error),
-          the policy from settings is used
-        - if the policy is not set in Request meta,
-          but there is a Referrer-policy header in the parent response,
-          it is used if valid
-        - otherwise, the policy from settings is used.
+        Determine the Referrer-Policy to use for a request.
+        确定用于请求的引用策略。
+        This method determines which referrer policy to use based on the following
+        precedence rules:
+        此方法根据以下优先级规则确定要使用的引用策略：
+        - If a valid policy is set in Request meta, it is used.
+          如果在Request meta中设置了有效的策略，则使用它。
+        - If the policy is set in meta but is wrong (e.g. a typo error),
+          the policy from settings is used.
+          如果在meta中设置了策略但是错误的（例如，拼写错误），
+          则使用设置中的策略。
+        - If the policy is not set in Request meta,
+          but there is a Referrer-Policy header in the parent response,
+          it is used if valid.
+          如果在Request meta中未设置策略，
+          但在父响应中有Referrer-Policy头，
+          如果有效，则使用它。
+        - Otherwise, the policy from settings is used.
+          否则，使用设置中的策略。
+        Args:
+            resp_or_url: The parent Response object or URL string.
+                        父Response对象或URL字符串。
+            request: The Request object being processed.
+                    正在处理的Request对象。
+        Returns:
+            ReferrerPolicy: An instance of the appropriate referrer policy class.
+                           适当的引用策略类的实例。
         """
+        # Try to get the policy name from the request meta
+        # 尝试从请求元数据获取策略名称
         policy_name = request.meta.get('referrer_policy')
+        # If no policy in meta, try to get it from the response headers
+        # 如果元数据中没有策略，尝试从响应头获取
         if policy_name is None:
             if isinstance(resp_or_url, Response):
                 policy_header = resp_or_url.headers.get('Referrer-Policy')
                 if policy_header is not None:
                     policy_name = to_unicode(policy_header.decode('latin1') if isinstance(policy_header, bytes) else policy_header)
+        # If no policy was found, use the default
+        # 如果未找到策略，使用默认值
         if policy_name is None:
             return self.default_policy()
+        # Try to load the policy class
+        # 尝试加载策略类
         cls = _load_policy_class(policy_name, warning_only=True)
+        # Return an instance of the policy class, or the default if loading failed
+        # 返回策略类的实例，如果加载失败，则返回默认值
         return cls() if cls else self.default_policy()
     async def process_spider_output(self, response, result, spider):
+        """
+        Process the spider output to set the 'Referer' header in requests.
+        处理爬虫输出以在请求中设置'Referer'头。
+        This method processes each request yielded by the spider and sets the
+        'Referer' header based on the appropriate referrer policy.
+        此方法处理爬虫产生的每个请求，并根据适当的引用策略设置'Referer'头。
+        Args:
+            response: The response being processed.
+                     正在处理的响应。
+            result: The result returned by the spider.
+                   爬虫返回的结果。
+            spider: The spider that generated the result.
+                   生成结果的爬虫。
+        Returns:
+            An async generator yielding processed requests and other items.
+            一个产生处理后的请求和其他项目的异步生成器。
+        """
         def _set_referer(r):
+            """
+            Set the 'Referer' header for a request if it's a Request object.
+            如果是Request对象，则为请求设置'Referer'头。
+            Args:
+                r: The item to process.
+                   要处理的项目。
+            Returns:
+                The processed item.
+                处理后的项目。
+            """
+            # Only process Request objects
+            # 只处理Request对象
             if isinstance(r, Request):
+                # Get the referrer value based on the policy
+                # 根据策略获取引用值
                 referrer = self.policy(response, r).referrer(response.url, r.url)
+                # If a referrer value was returned, set it in the request headers
+                # 如果返回了引用值，则在请求头中设置它
                 if referrer is not None:
                     r.headers.setdefault('Referer', referrer)
+            # Return the item, possibly modified
+            # 返回可能已修改的项目
             return r
+        # Process each item in the result
+        # 处理结果中的每个项目
         return (_set_referer(r) async for r in result or ())
     def request_scheduled(self, request, spider):
-        # check redirected request to patch "Referer" header if necessary
+        """
+        Handle scheduled requests to patch the 'Referer' header if necessary.
+        处理计划的请求，以在必要时修补'Referer'头。
+        This method is called when a request is scheduled. It handles redirected
+        requests by updating the 'Referer' header according to the appropriate
+        referrer policy.
+        当请求被计划时调用此方法。它通过根据适当的引用策略更新'Referer'头来处理
+        重定向的请求。
+        Args:
+            request: The request being scheduled.
+                    正在计划的请求。
+            spider: The spider that generated the request.
+                   生成请求的爬虫。
+        """
+        # Check if this is a redirected request
+        # 检查这是否是重定向的请求
         redirected_urls = request.meta.get('redirect_urls', [])
         if redirected_urls:
+            # Get the current 'Referer' header value
+            # 获取当前的'Referer'头值
             request_referrer = request.headers.get('Referer')
-            # we don't patch the referrer value if there is none
+            # We don't patch the referrer value if there is none
+            # 如果没有引用值，我们不会修补它
             if request_referrer is not None:
-                # the request's referrer header value acts as a surrogate
+                # The request's referrer header value acts as a surrogate
                 # for the parent response URL
+                # 请求的引用头值作为父响应URL的替代品
                 #
                 # Note: if the 3xx response contained a Referrer-Policy header,
                 #       the information is not available using this hook
+                # 注意：如果3xx响应包含Referrer-Policy头，
+                #       使用此钩子无法获取信息
                 parent_url = safe_url_string(request_referrer)
+                # Get the referrer value based on the policy
+                # 根据策略获取引用值
                 policy_referrer = self.policy(parent_url, request).referrer(
                     parent_url, request.url)
+                # If the policy referrer is different from the current referrer,
+                # update the header
+                # 如果策略引用与当前引用不同，则更新头
                 if policy_referrer != request_referrer:
                     if policy_referrer is None:
+                        # Remove the 'Referer' header if the policy says not to send one
+                        # 如果策略说不发送引用，则删除'Referer'头
                         request.headers.pop('Referer')
                     else:
+                        # Update the 'Referer' header with the policy value
+                        # 使用策略值更新'Referer'头
                         request.headers['Referer'] = policy_referrer

aio-scrapy 2.1.4__py3-none-any.whl → 2.1.7__py3-none-any.whl

aio-scrapy 2.1.4py3-none-any.whl → 2.1.7py3-none-any.whl