bhfutils 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. {bhfutils-0.2.2 → bhfutils-0.2.4}/PKG-INFO +1 -1
  2. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/items.py +1 -0
  3. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/meta_passthrough_middleware.py +26 -14
  4. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/PKG-INFO +1 -1
  5. {bhfutils-0.2.2 → bhfutils-0.2.4}/setup.py +1 -1
  6. {bhfutils-0.2.2 → bhfutils-0.2.4}/MANIFEST.in +0 -0
  7. {bhfutils-0.2.2 → bhfutils-0.2.4}/README.rst +0 -0
  8. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/__init__.py +0 -0
  9. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/__init__.py +0 -0
  10. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/bhf_signals.py +0 -0
  11. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/config/__init__.py +0 -0
  12. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/config/file_pusher.py +0 -0
  13. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/custom_cookies.py +0 -0
  14. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/distributed_scheduler.py +0 -0
  15. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/expire.py +0 -0
  16. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/log_retry_middleware.py +0 -0
  17. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/pipelines.py +0 -0
  18. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/__init__.py +0 -0
  19. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/__init__.py +0 -0
  20. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/__init__.py +0 -0
  21. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/_mouse_helper.py +0 -0
  22. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_async/_spoof.py +0 -0
  23. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/__init__.py +0 -0
  24. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/_mouse_helper.py +0 -0
  25. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/do_sync/_spoof.py +0 -0
  26. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/js/mouseHelper.js +0 -0
  27. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/__init__.py +0 -0
  28. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/math.py +0 -0
  29. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/cursor/shared/spoof.py +0 -0
  30. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/handler.py +0 -0
  31. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/headers.py +0 -0
  32. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/__init__.py +0 -0
  33. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.app.js +0 -0
  34. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.csi.js +0 -0
  35. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.loadtimes.js +0 -0
  36. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/chrome.runtime.js +0 -0
  37. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/hairline.js +0 -0
  38. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/iframe.contentWindow.js +0 -0
  39. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/magic-arrays.js +0 -0
  40. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/media.codecs.js +0 -0
  41. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.deviceMemory.js +0 -0
  42. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.hardwareConcurrency.js +0 -0
  43. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.languages.js +0 -0
  44. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.permissions.js +0 -0
  45. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.platform.js +0 -0
  46. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.plugins.js +0 -0
  47. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.userAgent.js +0 -0
  48. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.vendor.js +0 -0
  49. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/navigator.webdriver.js +0 -0
  50. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/screen.touch.js +0 -0
  51. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/user-agent-override.js +0 -0
  52. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/utils.js +0 -0
  53. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/webgl.vendor.js +0 -0
  54. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/js/window.dimensions.js +0 -0
  55. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/page.py +0 -0
  56. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/playwright/stealth.py +0 -0
  57. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/policy.py +0 -0
  58. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/proxy_rotate.py +0 -0
  59. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_domain_max_page_filter.py +0 -0
  60. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_dupefilter.py +0 -0
  61. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_global_page_per_domain_filter.py +0 -0
  62. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_retry_middleware.py +0 -0
  63. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/redis_stats_middleware.py +0 -0
  64. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/settings_template.py +0 -0
  65. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/spiders/__init__.py +0 -0
  66. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/spiders/redis_spider.py +0 -0
  67. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/__init__.py +0 -0
  68. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/online.py +0 -0
  69. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_distributed_scheduler.py +0 -0
  70. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_link_spider.py +0 -0
  71. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_log_retry_middleware.py +0 -0
  72. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_meta_passthrough_middleware.py +0 -0
  73. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_pipelines.py +0 -0
  74. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_dupefilter.py +0 -0
  75. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_page_limit_filters.py +0 -0
  76. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_retry_middleware.py +0 -0
  77. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_redis_stats_middleware.py +0 -0
  78. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/crawler/tests/test_wandering_spider.py +0 -0
  79. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/__init__.py +0 -0
  80. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_ah.py +0 -0
  81. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_lf.py +0 -0
  82. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_mt.py +0 -0
  83. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_rq.py +0 -0
  84. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_rtq.py +0 -0
  85. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_sc.py +0 -0
  86. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_sw.py +0 -0
  87. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/examples/example_zw.py +0 -0
  88. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/__init__.py +0 -0
  89. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/argparse_helper.py +0 -0
  90. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/log_factory.py +0 -0
  91. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/method_timer.py +0 -0
  92. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/redis_queue.py +0 -0
  93. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/redis_throttled_queue.py +0 -0
  94. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/settings_wrapper.py +0 -0
  95. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/stats_collector.py +0 -0
  96. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/scutils/zookeeper_watcher.py +0 -0
  97. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/__init__.py +0 -0
  98. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/default_settings.py +0 -0
  99. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/online.py +0 -0
  100. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/override_defaults.py +0 -0
  101. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_argparse_helper.py +0 -0
  102. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_log_factory.py +0 -0
  103. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_method_timer.py +0 -0
  104. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_redis_queue.py +0 -0
  105. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_redis_throttled_queue.py +0 -0
  106. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_settings_wrapper.py +0 -0
  107. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_stats_collector.py +0 -0
  108. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/test_zookeeper_watcher.py +0 -0
  109. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils/tests/throttled_queue.py +0 -0
  110. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/SOURCES.txt +0 -0
  111. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/dependency_links.txt +0 -0
  112. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/requires.txt +0 -0
  113. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/top_level.txt +0 -0
  114. {bhfutils-0.2.2 → bhfutils-0.2.4}/bhfutils.egg-info/zip-safe +0 -0
  115. {bhfutils-0.2.2 → bhfutils-0.2.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bhfutils
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Utilities that are used by any spider of Behoof project
5
5
  Home-page: https://behoof.app/
6
6
  Author: Teplygin Vladimir
@@ -51,6 +51,7 @@ class ProductDetailsResponseItem(ProductResponseItem):
51
51
  brandName = Field()
52
52
  sellerName = Field()
53
53
  description = Field()
54
+ vendorCode = Field()
54
55
  details = Field()
55
56
  comments = Field()
56
57
 
@@ -9,9 +9,9 @@ class MetaPassthroughMiddleware(object):
9
9
  self.setup(settings)
10
10
 
11
11
  def setup(self, settings):
12
- '''
12
+ """
13
13
  Does the actual setup of the middleware
14
- '''
14
+ """
15
15
  # set up the default sc logger
16
16
  my_level = settings.get('SC_LOG_LEVEL', 'INFO')
17
17
  my_name = settings.get('SC_LOGGER_NAME', 'sc-logger')
@@ -35,19 +35,31 @@ class MetaPassthroughMiddleware(object):
35
35
  def from_crawler(cls, crawler):
36
36
  return cls(crawler.settings)
37
37
 
38
+ @staticmethod
39
+ def _passthrough(response, x):
40
+ # only operate on requests: pass along all known meta fields, but only
41
+ # if they were not already set in the spider's new request
42
+ if isinstance(x, Request):
43
+ for key in list(response.meta.keys()):
44
+ if key not in x.meta and key != 'playwright_page_methods':
45
+ x.meta[key] = response.meta[key]
46
+ return x
47
+
38
48
  def process_spider_output(self, response, result, spider):
39
- '''
49
+ """
40
50
  Ensures the meta data from the response is passed
41
- through in any Request's generated from the spider
42
- '''
51
+ through in any Request's generated from the spider (sync path).
52
+ """
43
53
  self.logger.debug("processing meta passthrough middleware")
44
54
  for x in result:
45
- # only operate on requests
46
- if isinstance(x, Request):
47
- self.logger.debug("found request")
48
- # pass along all known meta fields, only if
49
- # they were not already set in the spider's new request
50
- for key in list(response.meta.keys()):
51
- if key not in x.meta and key != 'playwright_page_methods':
52
- x.meta[key] = response.meta[key]
53
- yield x
55
+ yield self._passthrough(response, x)
56
+
57
+ async def process_spider_output_async(self, response, result, spider):
58
+ """
59
+ Async-generator variant required by Scrapy >= 2.13 when the spider
60
+ output is an async iterable (AsyncCrawlerProcess). Defining both makes
61
+ this a "universal" spider middleware compatible with old + new Scrapy.
62
+ """
63
+ self.logger.debug("processing meta passthrough middleware (async)")
64
+ async for x in result:
65
+ yield self._passthrough(response, x)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bhfutils
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: Utilities that are used by any spider of Behoof project
5
5
  Home-page: https://behoof.app/
6
6
  Author: Teplygin Vladimir
@@ -43,7 +43,7 @@ if 'nosetests' in sys.argv[1:]:
43
43
 
44
44
  setup(
45
45
  name='bhfutils',
46
- version='0.2.2',
46
+ version='0.2.4',
47
47
  description='Utilities that are used by any spider of Behoof project',
48
48
  long_description=readme(),
49
49
  long_description_content_type='text/x-rst',
File without changes
File without changes
File without changes
File without changes