bhfutils 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bhfutils-0.2.2 → bhfutils-0.2.4}/PKG-INFO +1 -1
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/items.py +1 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/meta_passthrough_middleware.py +26 -14
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/PKG-INFO +1 -1
- {bhfutils-0.2.2 → bhfutils-0.2.4}/setup.py +1 -1
- {bhfutils-0.2.2 → bhfutils-0.2.4}/MANIFEST.in +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/README.rst +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/bhf_signals.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/config/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/config/file_pusher.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/custom_cookies.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/distributed_scheduler.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/expire.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/log_retry_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/pipelines.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/_mouse_helper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/_spoof.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/_mouse_helper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/_spoof.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/js/mouseHelper.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/math.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/spoof.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/handler.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/headers.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.app.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.csi.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.loadtimes.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.runtime.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/hairline.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/iframe.contentWindow.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/magic-arrays.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/media.codecs.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.deviceMemory.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.hardwareConcurrency.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.languages.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.permissions.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.platform.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.plugins.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.userAgent.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.vendor.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.webdriver.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/screen.touch.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/user-agent-override.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/utils.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/webgl.vendor.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/window.dimensions.js +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/page.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/stealth.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/policy.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/proxy_rotate.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_domain_max_page_filter.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_dupefilter.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_global_page_per_domain_filter.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_retry_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_stats_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/settings_template.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/spiders/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/spiders/redis_spider.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/online.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_distributed_scheduler.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_link_spider.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_log_retry_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_meta_passthrough_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_pipelines.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_dupefilter.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_page_limit_filters.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_retry_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_stats_middleware.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_wandering_spider.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_ah.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_lf.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_mt.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_rq.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_rtq.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_sc.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_sw.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_zw.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/argparse_helper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/log_factory.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/method_timer.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/redis_queue.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/redis_throttled_queue.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/settings_wrapper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/stats_collector.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/zookeeper_watcher.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/__init__.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/default_settings.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/online.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/override_defaults.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_argparse_helper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_log_factory.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_method_timer.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_redis_queue.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_redis_throttled_queue.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_settings_wrapper.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_stats_collector.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_zookeeper_watcher.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/throttled_queue.py +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/SOURCES.txt +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/dependency_links.txt +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/requires.txt +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/top_level.txt +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/zip-safe +0 -0
- {bhfutils-0.2.2 → bhfutils-0.2.4}/setup.cfg +0 -0
|
@@ -9,9 +9,9 @@ class MetaPassthroughMiddleware(object):
|
|
|
9
9
|
self.setup(settings)
|
|
10
10
|
|
|
11
11
|
def setup(self, settings):
|
|
12
|
-
|
|
12
|
+
"""
|
|
13
13
|
Does the actual setup of the middleware
|
|
14
|
-
|
|
14
|
+
"""
|
|
15
15
|
# set up the default sc logger
|
|
16
16
|
my_level = settings.get('SC_LOG_LEVEL', 'INFO')
|
|
17
17
|
my_name = settings.get('SC_LOGGER_NAME', 'sc-logger')
|
|
@@ -35,19 +35,31 @@ class MetaPassthroughMiddleware(object):
|
|
|
35
35
|
def from_crawler(cls, crawler):
|
|
36
36
|
return cls(crawler.settings)
|
|
37
37
|
|
|
38
|
+
@staticmethod
|
|
39
|
+
def _passthrough(response, x):
|
|
40
|
+
# only operate on requests: pass along all known meta fields, but only
|
|
41
|
+
# if they were not already set in the spider's new request
|
|
42
|
+
if isinstance(x, Request):
|
|
43
|
+
for key in list(response.meta.keys()):
|
|
44
|
+
if key not in x.meta and key != 'playwright_page_methods':
|
|
45
|
+
x.meta[key] = response.meta[key]
|
|
46
|
+
return x
|
|
47
|
+
|
|
38
48
|
def process_spider_output(self, response, result, spider):
|
|
39
|
-
|
|
49
|
+
"""
|
|
40
50
|
Ensures the meta data from the response is passed
|
|
41
|
-
through in any Request's generated from the spider
|
|
42
|
-
|
|
51
|
+
through in any Request's generated from the spider (sync path).
|
|
52
|
+
"""
|
|
43
53
|
self.logger.debug("processing meta passthrough middleware")
|
|
44
54
|
for x in result:
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
55
|
+
yield self._passthrough(response, x)
|
|
56
|
+
|
|
57
|
+
async def process_spider_output_async(self, response, result, spider):
|
|
58
|
+
"""
|
|
59
|
+
Async-generator variant required by Scrapy >= 2.13 when the spider
|
|
60
|
+
output is an async iterable (AsyncCrawlerProcess). Defining both makes
|
|
61
|
+
this a "universal" spider middleware compatible with old + new Scrapy.
|
|
62
|
+
"""
|
|
63
|
+
self.logger.debug("processing meta passthrough middleware (async)")
|
|
64
|
+
async for x in result:
|
|
65
|
+
yield self._passthrough(response, x)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/_mouse_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/_mouse_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.hardwareConcurrency.js
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_meta_passthrough_middleware.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|