aio-scrapy 2.1.4__py3-none-any.whl → 2.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/LICENSE +1 -1
  2. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/METADATA +53 -41
  3. aio_scrapy-2.1.6.dist-info/RECORD +134 -0
  4. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/WHEEL +1 -1
  5. aioscrapy/VERSION +1 -1
  6. aioscrapy/cmdline.py +438 -5
  7. aioscrapy/core/downloader/__init__.py +522 -17
  8. aioscrapy/core/downloader/handlers/__init__.py +187 -5
  9. aioscrapy/core/downloader/handlers/aiohttp.py +187 -3
  10. aioscrapy/core/downloader/handlers/curl_cffi.py +124 -3
  11. aioscrapy/core/downloader/handlers/httpx.py +133 -3
  12. aioscrapy/core/downloader/handlers/pyhttpx.py +132 -3
  13. aioscrapy/core/downloader/handlers/requests.py +120 -2
  14. aioscrapy/core/downloader/handlers/webdriver/__init__.py +2 -0
  15. aioscrapy/core/downloader/handlers/webdriver/drissionpage.py +493 -0
  16. aioscrapy/core/downloader/handlers/webdriver/driverpool.py +234 -0
  17. aioscrapy/core/downloader/handlers/webdriver/playwright.py +498 -0
  18. aioscrapy/core/engine.py +381 -20
  19. aioscrapy/core/scheduler.py +350 -36
  20. aioscrapy/core/scraper.py +509 -33
  21. aioscrapy/crawler.py +392 -10
  22. aioscrapy/db/__init__.py +149 -0
  23. aioscrapy/db/absmanager.py +212 -6
  24. aioscrapy/db/aiomongo.py +292 -10
  25. aioscrapy/db/aiomysql.py +363 -10
  26. aioscrapy/db/aiopg.py +299 -2
  27. aioscrapy/db/aiorabbitmq.py +444 -4
  28. aioscrapy/db/aioredis.py +260 -11
  29. aioscrapy/dupefilters/__init__.py +110 -5
  30. aioscrapy/dupefilters/disk.py +124 -2
  31. aioscrapy/dupefilters/redis.py +598 -32
  32. aioscrapy/exceptions.py +151 -13
  33. aioscrapy/http/__init__.py +1 -1
  34. aioscrapy/http/headers.py +237 -3
  35. aioscrapy/http/request/__init__.py +257 -11
  36. aioscrapy/http/request/form.py +83 -3
  37. aioscrapy/http/request/json_request.py +121 -9
  38. aioscrapy/http/response/__init__.py +306 -33
  39. aioscrapy/http/response/html.py +42 -3
  40. aioscrapy/http/response/text.py +496 -49
  41. aioscrapy/http/response/web_driver.py +144 -0
  42. aioscrapy/http/response/xml.py +45 -3
  43. aioscrapy/libs/downloader/defaultheaders.py +66 -2
  44. aioscrapy/libs/downloader/downloadtimeout.py +91 -2
  45. aioscrapy/libs/downloader/ja3fingerprint.py +95 -2
  46. aioscrapy/libs/downloader/retry.py +192 -6
  47. aioscrapy/libs/downloader/stats.py +142 -0
  48. aioscrapy/libs/downloader/useragent.py +93 -2
  49. aioscrapy/libs/extensions/closespider.py +166 -4
  50. aioscrapy/libs/extensions/corestats.py +151 -1
  51. aioscrapy/libs/extensions/logstats.py +145 -1
  52. aioscrapy/libs/extensions/metric.py +370 -1
  53. aioscrapy/libs/extensions/throttle.py +235 -1
  54. aioscrapy/libs/pipelines/__init__.py +345 -2
  55. aioscrapy/libs/pipelines/csv.py +242 -0
  56. aioscrapy/libs/pipelines/excel.py +545 -0
  57. aioscrapy/libs/pipelines/mongo.py +132 -0
  58. aioscrapy/libs/pipelines/mysql.py +67 -0
  59. aioscrapy/libs/pipelines/pg.py +67 -0
  60. aioscrapy/libs/spider/depth.py +141 -3
  61. aioscrapy/libs/spider/httperror.py +144 -4
  62. aioscrapy/libs/spider/offsite.py +202 -2
  63. aioscrapy/libs/spider/referer.py +396 -21
  64. aioscrapy/libs/spider/urllength.py +97 -1
  65. aioscrapy/link.py +115 -8
  66. aioscrapy/logformatter.py +199 -8
  67. aioscrapy/middleware/absmanager.py +328 -2
  68. aioscrapy/middleware/downloader.py +218 -0
  69. aioscrapy/middleware/extension.py +50 -1
  70. aioscrapy/middleware/itempipeline.py +96 -0
  71. aioscrapy/middleware/spider.py +360 -7
  72. aioscrapy/process.py +200 -0
  73. aioscrapy/proxy/__init__.py +142 -3
  74. aioscrapy/proxy/redis.py +136 -2
  75. aioscrapy/queue/__init__.py +168 -16
  76. aioscrapy/scrapyd/runner.py +124 -3
  77. aioscrapy/serializer.py +182 -2
  78. aioscrapy/settings/__init__.py +610 -128
  79. aioscrapy/settings/default_settings.py +313 -13
  80. aioscrapy/signalmanager.py +151 -20
  81. aioscrapy/signals.py +183 -1
  82. aioscrapy/spiderloader.py +165 -12
  83. aioscrapy/spiders/__init__.py +233 -6
  84. aioscrapy/statscollectors.py +312 -1
  85. aioscrapy/utils/conf.py +345 -17
  86. aioscrapy/utils/curl.py +168 -16
  87. aioscrapy/utils/decorators.py +76 -6
  88. aioscrapy/utils/deprecate.py +212 -19
  89. aioscrapy/utils/httpobj.py +55 -3
  90. aioscrapy/utils/log.py +79 -0
  91. aioscrapy/utils/misc.py +189 -21
  92. aioscrapy/utils/ossignal.py +67 -5
  93. aioscrapy/utils/project.py +165 -3
  94. aioscrapy/utils/python.py +254 -44
  95. aioscrapy/utils/reqser.py +75 -1
  96. aioscrapy/utils/request.py +173 -12
  97. aioscrapy/utils/response.py +91 -6
  98. aioscrapy/utils/signal.py +196 -14
  99. aioscrapy/utils/spider.py +51 -4
  100. aioscrapy/utils/template.py +93 -6
  101. aioscrapy/utils/tools.py +191 -17
  102. aioscrapy/utils/trackref.py +198 -12
  103. aioscrapy/utils/url.py +341 -36
  104. aio_scrapy-2.1.4.dist-info/RECORD +0 -133
  105. aioscrapy/core/downloader/handlers/playwright/__init__.py +0 -115
  106. aioscrapy/core/downloader/handlers/playwright/driverpool.py +0 -59
  107. aioscrapy/core/downloader/handlers/playwright/webdriver.py +0 -96
  108. aioscrapy/http/response/playwright.py +0 -36
  109. aioscrapy/libs/pipelines/execl.py +0 -169
  110. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/entry_points.txt +0 -0
  111. {aio_scrapy-2.1.4.dist-info → aio_scrapy-2.1.6.dist-info}/top_level.txt +0 -0
@@ -1,115 +0,0 @@
1
- from functools import wraps
2
-
3
- try:
4
- from playwright._impl._errors import Error
5
- except ImportError:
6
- from playwright._impl._api_types import Error
7
-
8
- from playwright.async_api._generated import Response as EventResponse
9
-
10
- from aioscrapy import Request, Spider
11
- from aioscrapy.core.downloader.handlers import BaseDownloadHandler
12
- from aioscrapy.core.downloader.handlers.playwright.driverpool import WebDriverPool
13
- from aioscrapy.core.downloader.handlers.playwright.webdriver import PlaywrightDriver
14
- from aioscrapy.exceptions import DownloadError
15
- from aioscrapy.http import PlaywrightResponse
16
- from aioscrapy.settings import Settings
17
- from aioscrapy.utils.tools import call_helper
18
-
19
-
20
- class PlaywrightHandler(BaseDownloadHandler):
21
- def __init__(self, settings: Settings):
22
- self.settings = settings
23
- playwright_client_args = settings.getdict('PLAYWRIGHT_CLIENT_ARGS')
24
- use_pool = settings.getbool('PLAYWRIGHT_USE_POOL', True)
25
- self.wait_until = playwright_client_args.get('wait_until', 'domcontentloaded')
26
- self.url_regexes = playwright_client_args.pop('url_regexes', [])
27
- pool_size = playwright_client_args.pop('pool_size', settings.getint("CONCURRENT_REQUESTS", 1))
28
- self._webdriver_pool = WebDriverPool(use_pool=use_pool, pool_size=pool_size, driver_cls=PlaywrightDriver, **playwright_client_args)
29
-
30
- @classmethod
31
- def from_settings(cls, settings: Settings):
32
- return cls(settings)
33
-
34
- async def download_request(self, request: Request, spider: Spider) -> PlaywrightResponse:
35
- try:
36
- return await self._download_request(request, spider)
37
- except Error as e:
38
- raise DownloadError(real_error=e) from e
39
-
40
- async def _download_request(self, request: Request, spider) -> PlaywrightResponse:
41
- cookies = dict(request.cookies)
42
- timeout = request.meta.get('download_timeout', 30) * 1000
43
- user_agent = request.headers.get("User-Agent")
44
- proxy: str = request.meta.get("proxy")
45
- url = request.url
46
-
47
- cache_response = {}
48
-
49
- # 为了获取监听事件中的响应结果
50
- def on_event_wrap_handler(func):
51
- @wraps(func)
52
- async def inner(response):
53
- ret = await func(response)
54
- if ret:
55
- cache_response[ret[0]] = ret[1]
56
-
57
- return inner
58
-
59
- kwargs = dict()
60
- if proxy:
61
- kwargs['proxy'] = proxy
62
- if user_agent:
63
- kwargs['user_agent'] = user_agent
64
-
65
- driver: PlaywrightDriver = await self._webdriver_pool.get(**kwargs)
66
-
67
- # 移除所有的事件监听事件后 重新添加
68
- driver.page._events = dict()
69
- for name in dir(spider):
70
- if not name.startswith('on_event_'):
71
- continue
72
- driver.page.on(name.replace('on_event_', ''), on_event_wrap_handler(getattr(spider, name)))
73
-
74
- try:
75
- if cookies:
76
- driver.url = url
77
- await driver.set_cookies(cookies)
78
- await driver.page.goto(url, wait_until=request.meta.get('wait_until', self.wait_until), timeout=timeout)
79
-
80
- if process_action_fn := getattr(spider, 'process_action', None):
81
- action_result = await call_helper(process_action_fn, driver)
82
- if action_result:
83
- cache_response[action_result[0]] = action_result[1]
84
-
85
- for cache_key in list(cache_response.keys()):
86
- if isinstance(cache_response[cache_key], EventResponse):
87
- cache_ret = cache_response[cache_key]
88
- cache_response[cache_key] = PlaywrightResponse(
89
- url=cache_ret.url,
90
- request=request,
91
- intercept_request=dict(
92
- url=cache_ret.request.url,
93
- headers=cache_ret.request.headers,
94
- data=cache_ret.request.post_data,
95
- ),
96
- headers=cache_ret.headers,
97
- body=await cache_ret.body(),
98
- status=cache_ret.status,
99
- )
100
-
101
- return PlaywrightResponse(
102
- url=driver.page.url,
103
- status=200,
104
- text=await driver.page.content(),
105
- cookies=await driver.get_cookies(),
106
- cache_response=cache_response,
107
- driver=driver,
108
- driver_pool=self._webdriver_pool
109
- )
110
- except Exception as e:
111
- await self._webdriver_pool.remove(driver)
112
- raise e
113
-
114
- async def close(self):
115
- await self._webdriver_pool.close()
@@ -1,59 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- from asyncio import Lock
4
- from asyncio.queues import Queue
5
-
6
- from aioscrapy.utils.tools import singleton
7
-
8
-
9
- @singleton
10
- class WebDriverPool:
11
- def __init__(
12
- self, use_pool=True, pool_size=5, driver_cls=None, **kwargs
13
- ):
14
- self.use_pool = use_pool
15
- self.pool_size = pool_size
16
- self.driver_cls = driver_cls
17
- self.kwargs = kwargs
18
-
19
- self.queue = Queue(maxsize=pool_size)
20
- self.lock = Lock()
21
- self.driver_count = 0
22
-
23
- @property
24
- def is_full(self):
25
- return self.driver_count >= self.pool_size
26
-
27
- async def create_driver(self, **args):
28
- kwargs = self.kwargs.copy()
29
- kwargs.update(args)
30
- driver = self.driver_cls(**kwargs)
31
- await driver.setup()
32
- return driver
33
-
34
- async def get(self, **kwargs):
35
- async with self.lock:
36
- if not self.use_pool:
37
- return await self.create_driver(**kwargs)
38
- if not self.is_full:
39
- driver = await self.create_driver(**kwargs)
40
- self.driver_count += 1
41
- else:
42
- driver = await self.queue.get()
43
- return driver
44
-
45
- async def release(self, driver):
46
- if not self.use_pool:
47
- await driver.quit()
48
- return
49
- await self.queue.put(driver)
50
-
51
- async def remove(self, driver):
52
- await driver.quit()
53
- self.driver_count -= 1
54
-
55
- async def close(self):
56
- while not self.queue.empty():
57
- driver = await self.queue.get()
58
- await driver.quit()
59
- self.driver_count -= 1
@@ -1,96 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- import os
4
- from typing import Dict, Optional, Tuple
5
-
6
- try:
7
- from typing import Literal # python >= 3.8
8
- except ImportError: # python <3.8
9
- from typing_extensions import Literal
10
-
11
- from urllib.parse import urlparse, urlunparse
12
-
13
- from playwright.async_api import Page, BrowserContext, ViewportSize, ProxySettings
14
- from playwright.async_api import Playwright, Browser
15
- from playwright.async_api import async_playwright
16
-
17
-
18
- class PlaywrightDriver:
19
- def __init__(
20
- self,
21
- *,
22
- driver_type: Literal["chromium", "firefox", "webkit"] = "chromium",
23
- proxy: Optional[str] = None,
24
- browser_args: Optional[Dict] = None,
25
- context_args: Optional[Dict] = None,
26
- window_size: Optional[Tuple[int, int]] = None,
27
- user_agent: str = None,
28
- **kwargs
29
- ):
30
-
31
- self.driver_type = driver_type
32
- self.proxy = proxy and self.format_context_proxy(proxy)
33
- self.viewport = window_size and ViewportSize(width=window_size[0], height=window_size[1])
34
- self.browser_args = browser_args or {}
35
- self.context_args = context_args or {}
36
- self.user_agent = user_agent
37
-
38
- self.driver: Optional[Playwright] = None
39
- self.browser: Optional[Browser] = None
40
- self.context: Optional[BrowserContext] = None
41
- self.page: Optional[Page] = None
42
- self.url = None
43
-
44
- async def setup(self):
45
- browser_args = self.browser_args.copy()
46
- context_args = self.context_args.copy()
47
- if browser_args.get('args') is None:
48
- browser_args.update({'args': ["--no-sandbox"]})
49
-
50
- if context_args.get("storage_state") is not None:
51
- storage_state_path = context_args.get("storage_state")
52
- os.makedirs(os.path.dirname(storage_state_path), exist_ok=True)
53
-
54
- if self.proxy:
55
- browser_args.update({'proxy': self.proxy})
56
- context_args.update({'proxy': self.proxy})
57
- if self.viewport:
58
- context_args.update({"viewport": self.viewport})
59
- context_args.update({"screen": self.viewport})
60
- if self.user_agent:
61
- context_args.update({'user_agent': self.user_agent})
62
-
63
- self.driver = await async_playwright().start()
64
- self.browser: Browser = await getattr(self.driver, self.driver_type).launch(**browser_args)
65
- self.context = await self.browser.new_context(**context_args)
66
- self.page = await self.context.new_page()
67
-
68
- @staticmethod
69
- def format_context_proxy(proxy) -> ProxySettings:
70
- parsed_url = urlparse(proxy)
71
- return ProxySettings(
72
- server=urlunparse(parsed_url._replace(netloc=parsed_url.netloc.split('@')[-1])),
73
- username=parsed_url.username,
74
- password=parsed_url.password,
75
- )
76
-
77
- async def quit(self):
78
- await self.page.close()
79
- try:
80
- await self.context.close()
81
- except:
82
- pass
83
- finally:
84
- await self.browser.close()
85
- await self.driver.stop()
86
-
87
- async def get_cookies(self):
88
- return {
89
- cookie["name"]: cookie["value"]
90
- for cookie in await self.page.context.cookies()
91
- }
92
-
93
- async def set_cookies(self, cookies: dict):
94
- await self.page.context.add_cookies([
95
- {"name": key, "value": value, "url": self.url or self.page.url} for key, value in cookies.items()
96
- ])
@@ -1,36 +0,0 @@
1
- from typing import Optional, Any
2
-
3
- from aioscrapy.http.response.text import TextResponse
4
-
5
-
6
- class PlaywrightResponse(TextResponse):
7
- def __init__(
8
- self,
9
- *args,
10
- text: str = '',
11
- cache_response: Optional[dict] = None,
12
- driver: Optional["PlaywrightDriver"] = None,
13
- driver_pool: Optional["WebDriverPool"] = None,
14
- intercept_request: Optional[dict] = None,
15
- **kwargs
16
- ):
17
- self.driver = driver
18
- self.driver_pool = driver_pool
19
- self._text = text
20
- self.cache_response = cache_response or {}
21
- self.intercept_request = intercept_request
22
- super().__init__(*args, **kwargs)
23
-
24
- async def release(self):
25
- self.driver_pool and self.driver and await self.driver_pool.release(self.driver)
26
-
27
- @property
28
- def text(self):
29
- return self._text or super().text
30
-
31
- @text.setter
32
- def text(self, text):
33
- self._text = text
34
-
35
- def get_response(self, key) -> Any:
36
- return self.cache_response.get(key)
@@ -1,169 +0,0 @@
1
- import asyncio
2
- import math
3
- from io import BytesIO
4
- from typing import Tuple, Optional
5
-
6
- import requests
7
- import xlsxwriter
8
- from PIL import Image, ImageFile
9
-
10
- from aioscrapy.utils.log import logger
11
-
12
- try:
13
- resample = Image.LANCZOS
14
- except:
15
- resample = Image.ANTIALIAS
16
- ImageFile.LOAD_TRUNCATED_IMAGES = True
17
-
18
-
19
- class ExeclSinkMixin:
20
- ws_cache = {}
21
- wb_cache = {}
22
- fields_cache = {}
23
- y_cache = {}
24
-
25
- @staticmethod
26
- async def deal_img(url: str, img_size: Optional[Tuple[int, int]]) -> Optional[BytesIO]:
27
- if url.startswith('//'):
28
- url = 'https:' + url
29
- try:
30
- img_bytes = requests.get(url).content
31
- except Exception as e:
32
- logger.error(f"download img error: {e}")
33
- return
34
- im = Image.open(BytesIO(img_bytes))
35
- im_format = im.format
36
- if img_size:
37
- temp = max(im.size[0] / img_size[0], im.size[1] / img_size[1])
38
- img_size = (math.ceil(im.size[0] / temp), math.ceil(im.size[1] / temp))
39
- im = im.resize(img_size, resample).convert('P')
40
- result = BytesIO()
41
- im.save(result, format=im_format)
42
- return result
43
-
44
- async def save_item(
45
- self,
46
- item: dict,
47
- *,
48
- filename: Optional[str] = None,
49
- date_fields: Optional[list] = None,
50
- date_format: str = 'yyyy-mm-dd HH:MM:SS',
51
- img_fields: Optional[list] = None,
52
- img_size: Optional[Tuple[int, int]] = None,
53
- **options
54
- ):
55
- assert filename is not None, "请传入filename参数"
56
- if '.xlsx' not in filename:
57
- filename = filename + '.xlsx'
58
- try:
59
- wb, ws, fields, y = self._get_write_class(filename, item, **options)
60
- bold_format_1 = wb.add_format({'align': 'left', 'border': 1, 'valign': 'vcenter'})
61
- bold_format_2 = wb.add_format({'align': 'left', 'border': 1, 'valign': 'vcenter', 'fg_color': '#D0D3D4'})
62
- for x, field in enumerate(fields):
63
- if x % 2 == 0:
64
- bold_format = bold_format_1
65
- else:
66
- bold_format = bold_format_2
67
- if date_fields is not None and field in date_fields:
68
- ws.write_datetime(y, x, item.get(field), wb.add_format({'num_format': date_format}))
69
-
70
- elif img_fields is not None and field in img_fields:
71
- img_size and ws.set_column_pixels(x, x, width=math.ceil(img_size[0]))
72
- url = item.get(field)
73
- img_bytes = await self.deal_img(url, img_size)
74
- if img_bytes is None or ws.insert_image(y, x, '', {'image_data': img_bytes}) == -1:
75
- ws.write(y, x, url, bold_format)
76
- else:
77
- ws.write(y, x, item.get(field), bold_format)
78
- if img_size is not None:
79
- ws.set_column_pixels(0, len(fields), width=math.ceil(img_size[0]))
80
- ws.set_row_pixels(y, height=math.ceil(img_size[1]))
81
- except Exception as e:
82
- logger.exception(f'Save Execl Error, filename:{filename}, item:{item}, errMsg: {e}')
83
-
84
- def _get_write_class(self, filename, item, sheet='sheet1', **options):
85
- filename_sheet = filename + sheet
86
- if self.ws_cache.get(filename_sheet) is None:
87
- if self.wb_cache.get(filename) is None:
88
- logger.info(f'Create Execl: {filename}')
89
- wb = xlsxwriter.Workbook(filename, options=options)
90
- self.wb_cache[filename] = wb
91
- else:
92
- wb = self.wb_cache[filename]
93
- ws = wb.add_worksheet(sheet)
94
- bold_format = wb.add_format(
95
- {'bold': True, 'font_size': 12, 'border': 1, 'align': 'center', 'valign': 'vcenter'})
96
- fields = list(item.keys())
97
- ws.write_row('A1', fields, cell_format=bold_format)
98
- ws.set_row(0, height=30)
99
- self.fields_cache[filename_sheet] = fields
100
- self.ws_cache[filename_sheet] = ws
101
- self.y_cache[filename_sheet] = 0
102
- self.y_cache[filename_sheet] += 1
103
- return self.wb_cache[filename], \
104
- self.ws_cache[filename_sheet], \
105
- self.fields_cache[filename_sheet], \
106
- self.y_cache[filename_sheet]
107
-
108
- def close_execl(self, filename=None):
109
- if filename not in self.wb_cache:
110
- return
111
-
112
- logger.info(f'Closing Execl: {filename}')
113
- if wb := self.wb_cache.pop(filename):
114
- wb.close()
115
- for filename_sheet in list(self.ws_cache.keys()):
116
- if not filename_sheet.startswith(filename):
117
- continue
118
- self.ws_cache.pop(filename_sheet, None)
119
- self.y_cache.pop(filename_sheet, None)
120
- self.fields_cache.pop(filename_sheet, None)
121
-
122
- def close(self):
123
- for filename in list(self.wb_cache.keys()):
124
- self.close_execl(filename)
125
-
126
-
127
- class ExeclPipeline(ExeclSinkMixin):
128
- def __init__(self, settings):
129
- self.lock = asyncio.Lock()
130
-
131
- @classmethod
132
- def from_settings(cls, settings):
133
- return cls(settings)
134
-
135
- async def process_item(self, item, spider):
136
- execl_kw: Optional[dict] = item.pop('__execl__', None)
137
- if not execl_kw:
138
- logger.warning(f"item Missing key __execl__, not stored")
139
- return item
140
-
141
- execl_kw.setdefault('filename', spider.name)
142
- async with self.lock:
143
- await self.save_item(item, **execl_kw)
144
-
145
- async def close_spider(self, spider):
146
- self.close()
147
-
148
-
149
- if __name__ == '__main__':
150
- class TestSpider:
151
- name = 'TestSpider'
152
-
153
-
154
- async def test():
155
- p = ExeclPipeline({})
156
- await p.process_item({
157
- 'title': 'tttt',
158
- 'img': '//www.baidu.com/img/flexible/logo/pc/result.png',
159
- '__execl__': {
160
- 'sheet': 'sheet1',
161
- # 'filename': 'test',
162
- # 'img_fields': ['img'],
163
- # 'img_size': (100, 500)
164
- }
165
- }, TestSpider())
166
- await p.close_spider(None)
167
-
168
-
169
- asyncio.run(test())