quickquery 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quickquery/__init__.py +49 -0
- quickquery/core.py +688 -0
- quickquery/live.py +195 -0
- quickquery/utils.py +199 -0
- quickquery-0.1.1.dist-info/METADATA +338 -0
- quickquery-0.1.1.dist-info/RECORD +8 -0
- quickquery-0.1.1.dist-info/WHEEL +4 -0
- quickquery-0.1.1.dist-info/licenses/LICENSE +21 -0
quickquery/core.py
ADDED
|
@@ -0,0 +1,688 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Callable, Iterator
|
|
4
|
+
import random
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
import unicodedata as ud
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Literal
|
|
10
|
+
from urllib.parse import urljoin
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
from patchright.sync_api import Frame as PatchFrame, Page as PatchPage, ElementHandle as PatchElementHandle, Response as PatchResponse
|
|
14
|
+
from playwright.sync_api import Frame as PlayFrame, Page as PlayPage, ElementHandle as PlayElementHandle, Response as PlayResponse
|
|
15
|
+
from selectolax.lexbor import LexborHTMLParser, LexborNode
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
Page = PatchPage | PlayPage
|
|
19
|
+
ElementHandle = PatchElementHandle | PlayElementHandle
|
|
20
|
+
Response = PatchResponse | PlayResponse
|
|
21
|
+
Frame = PatchFrame | PlayFrame
|
|
22
|
+
|
|
23
|
+
_UNUSABLE_INLINE_URL = re.compile(r'(?i)^(?:#|javascript:|mailto:|tel:|data:)')
|
|
24
|
+
|
|
25
|
+
_ELEMENT_NEXT = 'nextElementSibling'
|
|
26
|
+
_ELEMENT_PREV = 'previousElementSibling'
|
|
27
|
+
_ELEMENT_PARENT = 'parentElement'
|
|
28
|
+
|
|
29
|
+
_NODE_NEXT = 'next'
|
|
30
|
+
_NODE_PREV = 'prev'
|
|
31
|
+
_NODE_PARENT = 'parent'
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _collect_str[T](items: list[T], getter: Callable[[T], str | None]) -> list[str]:
|
|
35
|
+
return [v for item in items if (v := getter(item))]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def quick_page(page: Page) -> QuickPage:
|
|
39
|
+
return QuickPage(page)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def quick_element(page: Page, elem: ElementHandle | None) -> QuickElement:
|
|
43
|
+
return QuickElement(page, elem)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def quick_element_group(page: Page, elems: list[QuickElement]) -> QuickElementGroup:
|
|
47
|
+
return QuickElementGroup(page, elems)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def quick_frame(page: Page, frame: Frame | None) -> QuickFrame:
|
|
51
|
+
return QuickFrame(page, frame)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def quick_shadow_root(page: Page, host: ElementHandle | None) -> QuickShadowRoot:
|
|
55
|
+
return QuickShadowRoot(page, host)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class _PageScoped:
|
|
59
|
+
_page: Page
|
|
60
|
+
|
|
61
|
+
def quick_element(self, elem: ElementHandle | None) -> QuickElement:
|
|
62
|
+
return quick_element(self._page, elem)
|
|
63
|
+
|
|
64
|
+
def quick_element_group(self, elems: list[QuickElement]) -> QuickElementGroup:
|
|
65
|
+
return quick_element_group(self._page, elems)
|
|
66
|
+
|
|
67
|
+
def quick_frame(self, frame: Frame | None) -> QuickFrame:
|
|
68
|
+
return quick_frame(self._page, frame)
|
|
69
|
+
|
|
70
|
+
def quick_shadow_root(self, host: ElementHandle | None) -> QuickShadowRoot:
|
|
71
|
+
return quick_shadow_root(self._page, host)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def quick_parser(parser: LexborHTMLParser) -> QuickParser:
|
|
75
|
+
return QuickParser(parser)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def quick_node(node: LexborNode | None) -> QuickNode:
|
|
79
|
+
return QuickNode(node)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def quick_node_group(nodes: list[QuickNode]) -> QuickNodeGroup:
|
|
83
|
+
return QuickNodeGroup(nodes)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class QuickPage(_PageScoped):
|
|
87
|
+
def __init__(self, page: Page) -> None:
|
|
88
|
+
self._page = page
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def raw(self) -> Page:
|
|
92
|
+
return self._page
|
|
93
|
+
|
|
94
|
+
def i(self, selector: str) -> QuickElement:
|
|
95
|
+
'''in'''
|
|
96
|
+
elem = self._page.query_selector(selector)
|
|
97
|
+
return self.quick_element(elem)
|
|
98
|
+
|
|
99
|
+
def ii(self, selector: str) -> QuickElementGroup:
|
|
100
|
+
'''in all'''
|
|
101
|
+
elems = self._page.query_selector_all(selector)
|
|
102
|
+
return self.quick_element_group([self.quick_element(e) for e in elems])
|
|
103
|
+
|
|
104
|
+
def goto(
|
|
105
|
+
self,
|
|
106
|
+
url: str | None,
|
|
107
|
+
try_cnt: int = 3,
|
|
108
|
+
wait_range: tuple[float, float] = (3, 5),
|
|
109
|
+
sleep_after: tuple[float, float] | None = (1, 2),
|
|
110
|
+
) -> Response | None:
|
|
111
|
+
if not url:
|
|
112
|
+
return None
|
|
113
|
+
for i in range(try_cnt):
|
|
114
|
+
try:
|
|
115
|
+
response = self._page.goto(url)
|
|
116
|
+
if response is not None:
|
|
117
|
+
if sleep_after is not None:
|
|
118
|
+
time.sleep(random.uniform(*sleep_after))
|
|
119
|
+
return response
|
|
120
|
+
reason = 'response is None'
|
|
121
|
+
except Exception as e:
|
|
122
|
+
reason = f'{type(e).__name__}: {e}'
|
|
123
|
+
logger.warning(f'[goto] retry ({i+1}/{try_cnt}) {reason}: {url!r}')
|
|
124
|
+
if i + 1 < try_cnt:
|
|
125
|
+
time.sleep(random.uniform(*wait_range))
|
|
126
|
+
logger.error(f'[goto] retries exhausted ({try_cnt}): {url!r}')
|
|
127
|
+
return None
|
|
128
|
+
|
|
129
|
+
def bytes_at(self, url: str | None) -> bytes | None:
|
|
130
|
+
if not url:
|
|
131
|
+
return None
|
|
132
|
+
new_page = self._page.context.new_page()
|
|
133
|
+
try:
|
|
134
|
+
res = quick_page(new_page).goto(url)
|
|
135
|
+
if not res:
|
|
136
|
+
return None
|
|
137
|
+
if res.ok:
|
|
138
|
+
return res.body()
|
|
139
|
+
logger.warning(
|
|
140
|
+
f'[bytes_at] HTTP {res.status} {res.status_text!r} | url={url!r} | response_url={res.url!r}'
|
|
141
|
+
)
|
|
142
|
+
return None
|
|
143
|
+
except Exception as e:
|
|
144
|
+
logger.warning(f'[bytes_at] {type(e).__name__}: {e} | url={url!r}')
|
|
145
|
+
return None
|
|
146
|
+
finally:
|
|
147
|
+
new_page.close()
|
|
148
|
+
|
|
149
|
+
def w(self, selector: str, state: str = 'attached', timeout: int = 15000) -> QuickElement:
|
|
150
|
+
'''wait'''
|
|
151
|
+
try:
|
|
152
|
+
elem = self._page.wait_for_selector(selector, state=state, timeout=timeout)
|
|
153
|
+
return self.quick_element(elem)
|
|
154
|
+
except Exception as e:
|
|
155
|
+
logger.warning(f'[wait] {type(e).__name__}: {e} | selector={selector!r} | url={self._page.url!r}')
|
|
156
|
+
return self.quick_element(None)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class QuickElement(_PageScoped):
|
|
160
|
+
def __init__(self, page: Page, elem: ElementHandle | None) -> None:
|
|
161
|
+
self._page = page
|
|
162
|
+
self._elem = elem
|
|
163
|
+
|
|
164
|
+
def __bool__(self) -> bool:
|
|
165
|
+
return self._elem is not None
|
|
166
|
+
|
|
167
|
+
@property
|
|
168
|
+
def raw(self) -> ElementHandle | None:
|
|
169
|
+
return self._elem
|
|
170
|
+
|
|
171
|
+
def i(self, selector: str) -> QuickElement:
|
|
172
|
+
'''in'''
|
|
173
|
+
if self._elem is None:
|
|
174
|
+
return self.quick_element(None)
|
|
175
|
+
elem = self._elem.query_selector(selector)
|
|
176
|
+
return self.quick_element(elem)
|
|
177
|
+
|
|
178
|
+
def ii(self, selector: str) -> QuickElementGroup:
|
|
179
|
+
'''in all'''
|
|
180
|
+
if self._elem is None:
|
|
181
|
+
return self.quick_element_group([])
|
|
182
|
+
elems = self._elem.query_selector_all(selector)
|
|
183
|
+
return self.quick_element_group([self.quick_element(e) for e in elems])
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def frame(self) -> QuickFrame:
|
|
187
|
+
if self._elem is None:
|
|
188
|
+
return self.quick_frame(None)
|
|
189
|
+
try:
|
|
190
|
+
return self.quick_frame(self._elem.content_frame())
|
|
191
|
+
except Exception as e:
|
|
192
|
+
logger.error(f'[frame] {type(e).__name__}: {e}')
|
|
193
|
+
return self.quick_frame(None)
|
|
194
|
+
|
|
195
|
+
@property
|
|
196
|
+
def shadow(self) -> QuickShadowRoot:
|
|
197
|
+
return self.quick_shadow_root(self._elem)
|
|
198
|
+
|
|
199
|
+
def _walk_relative(self, selector: str, axis: str, label: str) -> QuickElement:
|
|
200
|
+
if self._elem is None:
|
|
201
|
+
return self.quick_element(None)
|
|
202
|
+
try:
|
|
203
|
+
elem = self._elem.evaluate_handle(
|
|
204
|
+
'''(el, args) => {
|
|
205
|
+
const [sel, axis] = args;
|
|
206
|
+
let cur = el[axis];
|
|
207
|
+
while (cur) {
|
|
208
|
+
if (cur.matches(sel)) return cur;
|
|
209
|
+
cur = cur[axis];
|
|
210
|
+
}
|
|
211
|
+
return null;
|
|
212
|
+
}''',
|
|
213
|
+
[selector, axis],
|
|
214
|
+
).as_element()
|
|
215
|
+
return self.quick_element(elem)
|
|
216
|
+
except Exception as e:
|
|
217
|
+
logger.error(f'[{label}] {self._elem} {type(e).__name__}: {e}')
|
|
218
|
+
return self.quick_element(None)
|
|
219
|
+
|
|
220
|
+
def n(self, selector: str) -> QuickElement:
|
|
221
|
+
'''next'''
|
|
222
|
+
return self._walk_relative(selector, _ELEMENT_NEXT, 'n')
|
|
223
|
+
|
|
224
|
+
def p(self, selector: str) -> QuickElement:
|
|
225
|
+
'''prev'''
|
|
226
|
+
return self._walk_relative(selector, _ELEMENT_PREV, 'p')
|
|
227
|
+
|
|
228
|
+
def o(self, selector: str) -> QuickElement:
|
|
229
|
+
'''out'''
|
|
230
|
+
return self._walk_relative(selector, _ELEMENT_PARENT, 'o')
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def text(self) -> str | None:
|
|
234
|
+
if self._elem is None:
|
|
235
|
+
return None
|
|
236
|
+
return text if (text := self._elem.text_content()) else None
|
|
237
|
+
|
|
238
|
+
def attr(self, attr_name: str) -> str | None:
|
|
239
|
+
if self._elem is None:
|
|
240
|
+
return None
|
|
241
|
+
return attr if (attr := self._elem.get_attribute(attr_name)) else None
|
|
242
|
+
|
|
243
|
+
def _resolved_url_from_attr(self, attr_name: str) -> str | None:
|
|
244
|
+
if self._elem is None:
|
|
245
|
+
return None
|
|
246
|
+
if not (attr := self._elem.get_attribute(attr_name)):
|
|
247
|
+
return None
|
|
248
|
+
if not (a := attr.strip()):
|
|
249
|
+
return None
|
|
250
|
+
if _UNUSABLE_INLINE_URL.search(a):
|
|
251
|
+
return None
|
|
252
|
+
return urljoin(self._page.url, a)
|
|
253
|
+
|
|
254
|
+
@property
|
|
255
|
+
def url(self) -> str | None:
|
|
256
|
+
return self._resolved_url_from_attr('href')
|
|
257
|
+
|
|
258
|
+
@property
|
|
259
|
+
def src(self) -> str | None:
|
|
260
|
+
return self._resolved_url_from_attr('src')
|
|
261
|
+
|
|
262
|
+
def scroll_into_view(self) -> None:
|
|
263
|
+
if self._elem is None:
|
|
264
|
+
logger.warning('[scroll_into_view] element is None')
|
|
265
|
+
return
|
|
266
|
+
try:
|
|
267
|
+
self._elem.evaluate(
|
|
268
|
+
'''(el) => el.scrollIntoView({ behavior: "smooth", block: "center", inline: "nearest" });'''
|
|
269
|
+
)
|
|
270
|
+
self._elem.wait_for_element_state('stable')
|
|
271
|
+
except Exception as e:
|
|
272
|
+
logger.warning(f'[scroll_into_view] {type(e).__name__}: {e} | url={self._page.url!r}')
|
|
273
|
+
|
|
274
|
+
@staticmethod
|
|
275
|
+
def _isolate_visibility_css(scope: str, attr: str) -> str:
|
|
276
|
+
return (
|
|
277
|
+
f'{scope} * {{\n'
|
|
278
|
+
f' visibility: hidden !important;\n'
|
|
279
|
+
f'}}\n'
|
|
280
|
+
f'[{attr}],\n'
|
|
281
|
+
f'[{attr}] * {{\n'
|
|
282
|
+
f' visibility: visible !important;\n'
|
|
283
|
+
f'}}\n'
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
def _isolate_apply(self, attr: str, css: str, style_id: str) -> None:
|
|
287
|
+
self._elem.evaluate(
|
|
288
|
+
'''(el, args) => {
|
|
289
|
+
const [attr, css, styleId] = args;
|
|
290
|
+
el.setAttribute(attr, '');
|
|
291
|
+
const s = document.createElement('style');
|
|
292
|
+
s.id = styleId;
|
|
293
|
+
s.textContent = css;
|
|
294
|
+
(document.head || document.documentElement).appendChild(s);
|
|
295
|
+
}''',
|
|
296
|
+
[attr, css, style_id],
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
def _isolate_remove(self, attr: str, style_id: str) -> None:
|
|
300
|
+
try:
|
|
301
|
+
self._elem.evaluate(
|
|
302
|
+
'''(el, args) => {
|
|
303
|
+
const [attr, styleId] = args;
|
|
304
|
+
el.removeAttribute(attr);
|
|
305
|
+
const node = document.getElementById(styleId);
|
|
306
|
+
if (node) node.remove();
|
|
307
|
+
}''',
|
|
308
|
+
[attr, style_id],
|
|
309
|
+
)
|
|
310
|
+
except Exception as e:
|
|
311
|
+
logger.warning(
|
|
312
|
+
f'[screenshot isolate cleanup] {type(e).__name__}: {e} | url={self._page.url!r}'
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def screenshot(
|
|
316
|
+
self,
|
|
317
|
+
path: Path,
|
|
318
|
+
image_type: Literal['png', 'jpeg'] = 'png',
|
|
319
|
+
*,
|
|
320
|
+
isolate: bool = False,
|
|
321
|
+
isolate_scope: str = 'body',
|
|
322
|
+
isolate_attr: str = 'data-quickquery-screenshot-root',
|
|
323
|
+
isolate_style_id: str = 'quickquery-screenshot-isolate',
|
|
324
|
+
) -> bool:
|
|
325
|
+
if self._elem is None:
|
|
326
|
+
logger.warning('[screenshot] element is None')
|
|
327
|
+
return False
|
|
328
|
+
if isolate:
|
|
329
|
+
style_id = f'{isolate_style_id}-{time.time_ns()}'
|
|
330
|
+
try:
|
|
331
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
332
|
+
if isolate:
|
|
333
|
+
css = self._isolate_visibility_css(isolate_scope, isolate_attr)
|
|
334
|
+
self._isolate_apply(isolate_attr, css, style_id)
|
|
335
|
+
self._elem.screenshot(
|
|
336
|
+
path=path,
|
|
337
|
+
type=image_type,
|
|
338
|
+
animations='disabled',
|
|
339
|
+
)
|
|
340
|
+
return True
|
|
341
|
+
except Exception as e:
|
|
342
|
+
logger.warning(f'[screenshot] {type(e).__name__}: {e} | url={self._page.url!r}')
|
|
343
|
+
return False
|
|
344
|
+
finally:
|
|
345
|
+
if isolate:
|
|
346
|
+
self._isolate_remove(isolate_attr, style_id)
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
class QuickElementGroup(_PageScoped):
|
|
350
|
+
def __init__(self, page: Page, elems: list[QuickElement]) -> None:
|
|
351
|
+
self._page = page
|
|
352
|
+
self._elems = elems
|
|
353
|
+
|
|
354
|
+
def __iter__(self) -> Iterator[QuickElement]:
|
|
355
|
+
return iter(self._elems)
|
|
356
|
+
|
|
357
|
+
def __len__(self) -> int:
|
|
358
|
+
return len(self._elems)
|
|
359
|
+
|
|
360
|
+
def __getitem__(self, key: int | slice) -> QuickElement | QuickElementGroup:
|
|
361
|
+
if isinstance(key, slice):
|
|
362
|
+
return QuickElementGroup(self._page, self._elems[key])
|
|
363
|
+
return self._elems[key]
|
|
364
|
+
|
|
365
|
+
def __add__(self, other: QuickElementGroup) -> QuickElementGroup:
|
|
366
|
+
if not isinstance(other, QuickElementGroup):
|
|
367
|
+
raise TypeError(
|
|
368
|
+
'QuickElementGroup 同士のみ + で結合できます '
|
|
369
|
+
f'(右辺は {type(other).__name__})'
|
|
370
|
+
)
|
|
371
|
+
if self._page is not other._page:
|
|
372
|
+
raise ValueError('異なる Page に紐づいた QuickElementGroup は結合できません')
|
|
373
|
+
return QuickElementGroup(self._page, self._elems + other._elems)
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def raw(self) -> list[QuickElement]:
|
|
377
|
+
return self._elems
|
|
378
|
+
|
|
379
|
+
@property
|
|
380
|
+
def scan(self) -> ElementScan:
|
|
381
|
+
pairs: list[tuple[str, QuickElement]] = []
|
|
382
|
+
for e in self._elems:
|
|
383
|
+
if (t := e.text):
|
|
384
|
+
pairs.append((ud.normalize('NFKC', t), e))
|
|
385
|
+
return ElementScan(self._page, pairs)
|
|
386
|
+
|
|
387
|
+
@property
|
|
388
|
+
def texts(self) -> list[str]:
|
|
389
|
+
return _collect_str(self._elems, lambda e: e.text)
|
|
390
|
+
|
|
391
|
+
def attrs(self, attr_name: str) -> list[str]:
|
|
392
|
+
return _collect_str(self._elems, lambda e: e.attr(attr_name))
|
|
393
|
+
|
|
394
|
+
@property
|
|
395
|
+
def urls(self) -> list[str]:
|
|
396
|
+
return _collect_str(self._elems, lambda e: e.url)
|
|
397
|
+
|
|
398
|
+
@property
|
|
399
|
+
def srcs(self) -> list[str]:
|
|
400
|
+
return _collect_str(self._elems, lambda e: e.src)
|
|
401
|
+
|
|
402
|
+
|
|
403
|
+
class ElementScan(_PageScoped):
|
|
404
|
+
def __init__(self, page: Page, pairs: list[tuple[str, QuickElement]]) -> None:
|
|
405
|
+
self._page = page
|
|
406
|
+
self._pairs = pairs
|
|
407
|
+
|
|
408
|
+
def m(self, pattern: str) -> QuickElement:
|
|
409
|
+
'''match'''
|
|
410
|
+
try:
|
|
411
|
+
prog = re.compile(pattern)
|
|
412
|
+
for text, e in self._pairs:
|
|
413
|
+
if prog.search(text):
|
|
414
|
+
return e
|
|
415
|
+
except Exception as e:
|
|
416
|
+
logger.warning(f'[scan] {type(e).__name__}: {e} | pattern={pattern!r}')
|
|
417
|
+
return self.quick_element(None)
|
|
418
|
+
|
|
419
|
+
def mm(self, pattern: str) -> QuickElementGroup:
|
|
420
|
+
'''match all'''
|
|
421
|
+
try:
|
|
422
|
+
prog = re.compile(pattern)
|
|
423
|
+
filtered = [e for text, e in self._pairs if prog.search(text)]
|
|
424
|
+
return self.quick_element_group(filtered)
|
|
425
|
+
except Exception as e:
|
|
426
|
+
logger.warning(f'[scan] {type(e).__name__}: {e} | pattern={pattern!r}')
|
|
427
|
+
return self.quick_element_group([])
|
|
428
|
+
|
|
429
|
+
|
|
430
|
+
class QuickFrame(_PageScoped):
|
|
431
|
+
def __init__(self, page: Page, frame: Frame | None) -> None:
|
|
432
|
+
self._page = page
|
|
433
|
+
self._frame = frame
|
|
434
|
+
|
|
435
|
+
def __bool__(self) -> bool:
|
|
436
|
+
return self._frame is not None
|
|
437
|
+
|
|
438
|
+
@property
|
|
439
|
+
def raw(self) -> Frame | None:
|
|
440
|
+
return self._frame
|
|
441
|
+
|
|
442
|
+
def i(self, selector: str) -> QuickElement:
|
|
443
|
+
'''in'''
|
|
444
|
+
if self._frame is None:
|
|
445
|
+
return self.quick_element(None)
|
|
446
|
+
elem = self._frame.query_selector(selector)
|
|
447
|
+
return self.quick_element(elem)
|
|
448
|
+
|
|
449
|
+
def ii(self, selector: str) -> QuickElementGroup:
|
|
450
|
+
'''in all'''
|
|
451
|
+
if self._frame is None:
|
|
452
|
+
return self.quick_element_group([])
|
|
453
|
+
elems = self._frame.query_selector_all(selector)
|
|
454
|
+
return self.quick_element_group([self.quick_element(e) for e in elems])
|
|
455
|
+
|
|
456
|
+
def w(self, selector: str, state: str = 'attached', timeout: int = 15000) -> QuickElement:
|
|
457
|
+
'''wait'''
|
|
458
|
+
if self._frame is None:
|
|
459
|
+
return self.quick_element(None)
|
|
460
|
+
try:
|
|
461
|
+
elem = self._frame.wait_for_selector(selector, state=state, timeout=timeout)
|
|
462
|
+
return self.quick_element(elem)
|
|
463
|
+
except Exception as e:
|
|
464
|
+
logger.warning(
|
|
465
|
+
f'[wait] {type(e).__name__}: {e} | selector={selector!r} | url={self._page.url!r}'
|
|
466
|
+
)
|
|
467
|
+
return self.quick_element(None)
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
class QuickShadowRoot(_PageScoped):
|
|
471
|
+
def __init__(self, page: Page, host: ElementHandle | None) -> None:
|
|
472
|
+
self._page = page
|
|
473
|
+
self._host = host
|
|
474
|
+
|
|
475
|
+
def __bool__(self) -> bool:
|
|
476
|
+
if self._host is None:
|
|
477
|
+
return False
|
|
478
|
+
try:
|
|
479
|
+
return bool(self._host.evaluate('el => Boolean(el.shadowRoot)'))
|
|
480
|
+
except Exception as e:
|
|
481
|
+
logger.error(f'[shadow] {type(e).__name__}: {e}')
|
|
482
|
+
return False
|
|
483
|
+
|
|
484
|
+
def i(self, selector: str) -> QuickElement:
|
|
485
|
+
'''in'''
|
|
486
|
+
if not self:
|
|
487
|
+
return self.quick_element(None)
|
|
488
|
+
try:
|
|
489
|
+
elem = self._host.evaluate_handle(
|
|
490
|
+
'(el, sel) => el.shadowRoot?.querySelector(sel) ?? null',
|
|
491
|
+
selector,
|
|
492
|
+
).as_element()
|
|
493
|
+
return self.quick_element(elem)
|
|
494
|
+
except Exception as e:
|
|
495
|
+
logger.error(f'[shadow i] {type(e).__name__}: {e} | selector={selector!r}')
|
|
496
|
+
return self.quick_element(None)
|
|
497
|
+
|
|
498
|
+
def ii(self, selector: str) -> QuickElementGroup:
|
|
499
|
+
'''in all'''
|
|
500
|
+
if not self:
|
|
501
|
+
return self.quick_element_group([])
|
|
502
|
+
try:
|
|
503
|
+
n = self._host.evaluate(
|
|
504
|
+
'(el, sel) => el.shadowRoot?.querySelectorAll(sel)?.length ?? 0',
|
|
505
|
+
selector,
|
|
506
|
+
)
|
|
507
|
+
elems = []
|
|
508
|
+
for idx in range(n):
|
|
509
|
+
elem = self._host.evaluate_handle(
|
|
510
|
+
'''(el, args) => {
|
|
511
|
+
const [sel, i] = args;
|
|
512
|
+
return el.shadowRoot.querySelectorAll(sel)[i];
|
|
513
|
+
}''',
|
|
514
|
+
[selector, idx],
|
|
515
|
+
).as_element()
|
|
516
|
+
elems.append(self.quick_element(elem))
|
|
517
|
+
return self.quick_element_group(elems)
|
|
518
|
+
except Exception as e:
|
|
519
|
+
logger.error(f'[shadow ii] {type(e).__name__}: {e} | selector={selector!r}')
|
|
520
|
+
return self.quick_element_group([])
|
|
521
|
+
|
|
522
|
+
def w(self, selector: str, timeout: int = 15000) -> QuickElement:
|
|
523
|
+
'''wait (attached in shadow root only)'''
|
|
524
|
+
if not self:
|
|
525
|
+
return self.quick_element(None)
|
|
526
|
+
frame = self._host.owner_frame()
|
|
527
|
+
if frame is None:
|
|
528
|
+
logger.warning('[shadow wait] owner_frame is None')
|
|
529
|
+
return self.quick_element(None)
|
|
530
|
+
try:
|
|
531
|
+
frame.wait_for_function(
|
|
532
|
+
'([el, sel]) => Boolean(el.shadowRoot?.querySelector(sel))',
|
|
533
|
+
[self._host, selector],
|
|
534
|
+
timeout=timeout,
|
|
535
|
+
)
|
|
536
|
+
return self.i(selector)
|
|
537
|
+
except Exception as e:
|
|
538
|
+
logger.warning(
|
|
539
|
+
f'[shadow wait] {type(e).__name__}: {e} | selector={selector!r} | url={self._page.url!r}'
|
|
540
|
+
)
|
|
541
|
+
return self.quick_element(None)
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
class QuickParser:
|
|
545
|
+
def __init__(self, parser: LexborHTMLParser) -> None:
|
|
546
|
+
self._parser = parser
|
|
547
|
+
|
|
548
|
+
@property
|
|
549
|
+
def raw(self) -> LexborHTMLParser:
|
|
550
|
+
return self._parser
|
|
551
|
+
|
|
552
|
+
def i(self, selector: str) -> QuickNode:
|
|
553
|
+
'''in'''
|
|
554
|
+
node = self._parser.css_first(selector)
|
|
555
|
+
return quick_node(node)
|
|
556
|
+
|
|
557
|
+
def ii(self, selector: str) -> QuickNodeGroup:
|
|
558
|
+
'''in all'''
|
|
559
|
+
nodes = self._parser.css(selector)
|
|
560
|
+
return quick_node_group([quick_node(n) for n in nodes])
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
class QuickNode:
|
|
564
|
+
def __init__(self, node: LexborNode | None) -> None:
|
|
565
|
+
self._node = node
|
|
566
|
+
|
|
567
|
+
def __bool__(self) -> bool:
|
|
568
|
+
return self._node is not None
|
|
569
|
+
|
|
570
|
+
@property
|
|
571
|
+
def raw(self) -> LexborNode | None:
|
|
572
|
+
return self._node
|
|
573
|
+
|
|
574
|
+
def i(self, selector: str) -> QuickNode:
|
|
575
|
+
'''in'''
|
|
576
|
+
if self._node is None:
|
|
577
|
+
return quick_node(None)
|
|
578
|
+
node = self._node.css_first(selector)
|
|
579
|
+
return quick_node(node)
|
|
580
|
+
|
|
581
|
+
def ii(self, selector: str) -> QuickNodeGroup:
|
|
582
|
+
'''in all'''
|
|
583
|
+
if self._node is None:
|
|
584
|
+
return quick_node_group([])
|
|
585
|
+
nodes = self._node.css(selector)
|
|
586
|
+
return quick_node_group([quick_node(n) for n in nodes])
|
|
587
|
+
|
|
588
|
+
def _walk_relative(self, selector: str, axis: str) -> QuickNode:
|
|
589
|
+
if self._node is None:
|
|
590
|
+
return quick_node(None)
|
|
591
|
+
cur = getattr(self._node, axis)
|
|
592
|
+
while cur is not None:
|
|
593
|
+
if cur.is_element_node and cur.css_matches(selector):
|
|
594
|
+
return quick_node(cur)
|
|
595
|
+
cur = getattr(cur, axis)
|
|
596
|
+
return quick_node(None)
|
|
597
|
+
|
|
598
|
+
def n(self, selector: str) -> QuickNode:
|
|
599
|
+
'''next'''
|
|
600
|
+
return self._walk_relative(selector, _NODE_NEXT)
|
|
601
|
+
|
|
602
|
+
def p(self, selector: str) -> QuickNode:
|
|
603
|
+
'''prev'''
|
|
604
|
+
return self._walk_relative(selector, _NODE_PREV)
|
|
605
|
+
|
|
606
|
+
def o(self, selector: str) -> QuickNode:
|
|
607
|
+
'''out'''
|
|
608
|
+
return self._walk_relative(selector, _NODE_PARENT)
|
|
609
|
+
|
|
610
|
+
@property
|
|
611
|
+
def text(self) -> str | None:
|
|
612
|
+
if self._node is None:
|
|
613
|
+
return None
|
|
614
|
+
return text if (text := self._node.text()) else None
|
|
615
|
+
|
|
616
|
+
def attr(self, attr_name: str) -> str | None:
|
|
617
|
+
if self._node is None:
|
|
618
|
+
return None
|
|
619
|
+
return attr if (attr := self._node.attributes.get(attr_name)) else None
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
class QuickNodeGroup:
|
|
623
|
+
def __init__(self, nodes: list[QuickNode]) -> None:
|
|
624
|
+
self._nodes = nodes
|
|
625
|
+
|
|
626
|
+
def __iter__(self) -> Iterator[QuickNode]:
|
|
627
|
+
return iter(self._nodes)
|
|
628
|
+
|
|
629
|
+
def __len__(self) -> int:
|
|
630
|
+
return len(self._nodes)
|
|
631
|
+
|
|
632
|
+
def __getitem__(self, key: int | slice) -> QuickNode | QuickNodeGroup:
|
|
633
|
+
if isinstance(key, slice):
|
|
634
|
+
return QuickNodeGroup(self._nodes[key])
|
|
635
|
+
return self._nodes[key]
|
|
636
|
+
|
|
637
|
+
def __add__(self, other: QuickNodeGroup) -> QuickNodeGroup:
|
|
638
|
+
if not isinstance(other, QuickNodeGroup):
|
|
639
|
+
raise TypeError(
|
|
640
|
+
'QuickNodeGroup 同士のみ + で結合できます '
|
|
641
|
+
f'(右辺は {type(other).__name__})'
|
|
642
|
+
)
|
|
643
|
+
return QuickNodeGroup(self._nodes + other._nodes)
|
|
644
|
+
|
|
645
|
+
@property
|
|
646
|
+
def raw(self) -> list[QuickNode]:
|
|
647
|
+
return self._nodes
|
|
648
|
+
|
|
649
|
+
@property
|
|
650
|
+
def scan(self) -> NodeScan:
|
|
651
|
+
pairs: list[tuple[str, QuickNode]] = []
|
|
652
|
+
for n in self._nodes:
|
|
653
|
+
if (t := n.text):
|
|
654
|
+
pairs.append((ud.normalize('NFKC', t), n))
|
|
655
|
+
return NodeScan(pairs)
|
|
656
|
+
|
|
657
|
+
@property
|
|
658
|
+
def texts(self) -> list[str]:
|
|
659
|
+
return _collect_str(self._nodes, lambda n: n.text)
|
|
660
|
+
|
|
661
|
+
def attrs(self, attr_name: str) -> list[str]:
|
|
662
|
+
return _collect_str(self._nodes, lambda n: n.attr(attr_name))
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
class NodeScan:
|
|
666
|
+
def __init__(self, pairs: list[tuple[str, QuickNode]]) -> None:
|
|
667
|
+
self._pairs = pairs
|
|
668
|
+
|
|
669
|
+
def m(self, pattern: str) -> QuickNode:
|
|
670
|
+
'''match'''
|
|
671
|
+
try:
|
|
672
|
+
prog = re.compile(pattern)
|
|
673
|
+
for text, n in self._pairs:
|
|
674
|
+
if prog.search(text):
|
|
675
|
+
return n
|
|
676
|
+
except Exception as e:
|
|
677
|
+
logger.warning(f'[scan] {type(e).__name__}: {e} | pattern={pattern!r}')
|
|
678
|
+
return quick_node(None)
|
|
679
|
+
|
|
680
|
+
def mm(self, pattern: str) -> QuickNodeGroup:
|
|
681
|
+
'''match all'''
|
|
682
|
+
try:
|
|
683
|
+
prog = re.compile(pattern)
|
|
684
|
+
filtered = [n for text, n in self._pairs if prog.search(text)]
|
|
685
|
+
return quick_node_group(filtered)
|
|
686
|
+
except Exception as e:
|
|
687
|
+
logger.warning(f'[scan] {type(e).__name__}: {e} | pattern={pattern!r}')
|
|
688
|
+
return quick_node_group([])
|