litescrape 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- litescrape/__init__.py +41 -0
- litescrape/browser.py +180 -0
- litescrape/core.py +672 -0
- litescrape/utils.py +199 -0
- litescrape-0.1.1.dist-info/METADATA +327 -0
- litescrape-0.1.1.dist-info/RECORD +8 -0
- litescrape-0.1.1.dist-info/WHEEL +4 -0
- litescrape-0.1.1.dist-info/licenses/LICENSE +21 -0
litescrape/__init__.py
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
from .core import (
|
|
2
|
+
ElementHandle,
|
|
3
|
+
ElementScan,
|
|
4
|
+
Frame,
|
|
5
|
+
NodeScan,
|
|
6
|
+
Page,
|
|
7
|
+
Response,
|
|
8
|
+
LiteElement,
|
|
9
|
+
LiteElementGroup,
|
|
10
|
+
LiteFrame,
|
|
11
|
+
LiteShadowRoot,
|
|
12
|
+
LiteNode,
|
|
13
|
+
LiteNodeGroup,
|
|
14
|
+
LitePage,
|
|
15
|
+
LiteParser,
|
|
16
|
+
lite_node,
|
|
17
|
+
lite_node_group,
|
|
18
|
+
lite_page,
|
|
19
|
+
lite_parser,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"Page",
|
|
24
|
+
"ElementHandle",
|
|
25
|
+
"Frame",
|
|
26
|
+
"Response",
|
|
27
|
+
"lite_page",
|
|
28
|
+
"lite_parser",
|
|
29
|
+
"lite_node",
|
|
30
|
+
"lite_node_group",
|
|
31
|
+
"LitePage",
|
|
32
|
+
"LiteFrame",
|
|
33
|
+
"LiteShadowRoot",
|
|
34
|
+
"LiteElement",
|
|
35
|
+
"LiteElementGroup",
|
|
36
|
+
"ElementScan",
|
|
37
|
+
"LiteParser",
|
|
38
|
+
"LiteNode",
|
|
39
|
+
"LiteNodeGroup",
|
|
40
|
+
"NodeScan",
|
|
41
|
+
]
|
litescrape/browser.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
from contextlib import ExitStack
|
|
2
|
+
from dataclasses import dataclass, fields
|
|
3
|
+
from types import TracebackType
|
|
4
|
+
from typing import Any, Self
|
|
5
|
+
|
|
6
|
+
from camoufox.sync_api import Camoufox
|
|
7
|
+
from patchright.sync_api import (
|
|
8
|
+
Page as PatchrightPage,
|
|
9
|
+
Playwright,
|
|
10
|
+
sync_playwright,
|
|
11
|
+
)
|
|
12
|
+
from playwright.sync_api import Page as PlaywrightPage
|
|
13
|
+
|
|
14
|
+
Page = PatchrightPage | PlaywrightPage
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass(frozen=True, slots=True)
|
|
18
|
+
class Span:
|
|
19
|
+
browser: int | None = None
|
|
20
|
+
context: int | None = None
|
|
21
|
+
page: int | None = None
|
|
22
|
+
|
|
23
|
+
def __post_init__(self) -> None:
|
|
24
|
+
for f in fields(self):
|
|
25
|
+
value = getattr(self, f.name)
|
|
26
|
+
if value is not None and value < 1:
|
|
27
|
+
raise ValueError(f'{f.name} は 1 以上で指定してください (got {value})')
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class _RunnerBase:
|
|
31
|
+
def __init__(
|
|
32
|
+
self,
|
|
33
|
+
*,
|
|
34
|
+
browser: dict[str, Any] | None = None,
|
|
35
|
+
context: dict[str, Any] | None = None,
|
|
36
|
+
span: Span | None = None,
|
|
37
|
+
) -> None:
|
|
38
|
+
self._span = span or Span()
|
|
39
|
+
self._browser_kw = dict(browser or {})
|
|
40
|
+
self._context_kw = dict(context or {})
|
|
41
|
+
self._browser = None
|
|
42
|
+
self._ctx = None
|
|
43
|
+
self._page: Page | None = None
|
|
44
|
+
self._i = 0
|
|
45
|
+
self._active = False
|
|
46
|
+
|
|
47
|
+
def page(self) -> Page:
|
|
48
|
+
if not self._active:
|
|
49
|
+
raise RuntimeError('with ブロックの外で page() を呼べません')
|
|
50
|
+
if self._page is None:
|
|
51
|
+
self._open_browser()
|
|
52
|
+
elif (b := self._span.browser) and self._i % b == 0:
|
|
53
|
+
self._close_browser()
|
|
54
|
+
self._open_browser()
|
|
55
|
+
elif (c := self._span.context) and self._i % c == 0:
|
|
56
|
+
self._close_context()
|
|
57
|
+
self._open_context()
|
|
58
|
+
elif (p := self._span.page) and self._i % p == 0:
|
|
59
|
+
self._close_page()
|
|
60
|
+
self._open_page()
|
|
61
|
+
self._i += 1
|
|
62
|
+
return self._page
|
|
63
|
+
|
|
64
|
+
def _open_page(self) -> None:
|
|
65
|
+
self._page = self._ctx.new_page()
|
|
66
|
+
|
|
67
|
+
def _close_page(self) -> None:
|
|
68
|
+
if self._page:
|
|
69
|
+
self._page.close()
|
|
70
|
+
self._page = None
|
|
71
|
+
|
|
72
|
+
def _open_context(self) -> None:
|
|
73
|
+
self._ctx = self._browser.new_context(**self._context_kw)
|
|
74
|
+
self._open_page()
|
|
75
|
+
|
|
76
|
+
def _close_context(self) -> None:
|
|
77
|
+
self._close_page()
|
|
78
|
+
if self._ctx:
|
|
79
|
+
self._ctx.close()
|
|
80
|
+
self._ctx = None
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class PatchrightRunner(_RunnerBase):
|
|
84
|
+
def __init__(
|
|
85
|
+
self,
|
|
86
|
+
*,
|
|
87
|
+
browser: dict[str, Any] | None = None,
|
|
88
|
+
context: dict[str, Any] | None = None,
|
|
89
|
+
span: Span | None = None,
|
|
90
|
+
) -> None:
|
|
91
|
+
super().__init__(browser=browser, context=context, span=span)
|
|
92
|
+
self._pw: Playwright | None = None
|
|
93
|
+
|
|
94
|
+
def __enter__(self) -> Self:
|
|
95
|
+
self._pw = sync_playwright().start()
|
|
96
|
+
self._active = True
|
|
97
|
+
return self
|
|
98
|
+
|
|
99
|
+
def __exit__(
|
|
100
|
+
self,
|
|
101
|
+
exc_type: type[BaseException] | None,
|
|
102
|
+
exc: BaseException | None,
|
|
103
|
+
tb: TracebackType | None,
|
|
104
|
+
) -> None:
|
|
105
|
+
if not self._active:
|
|
106
|
+
return
|
|
107
|
+
self._close_browser()
|
|
108
|
+
if self._pw:
|
|
109
|
+
self._pw.stop()
|
|
110
|
+
self._pw = None
|
|
111
|
+
self._active = False
|
|
112
|
+
self._i = 0
|
|
113
|
+
|
|
114
|
+
def _open_browser(self) -> None:
|
|
115
|
+
self._browser = self._pw.chromium.launch(**self._browser_kw)
|
|
116
|
+
self._open_context()
|
|
117
|
+
|
|
118
|
+
def _close_browser(self) -> None:
|
|
119
|
+
self._close_context()
|
|
120
|
+
if self._browser:
|
|
121
|
+
self._browser.close()
|
|
122
|
+
self._browser = None
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class CamoufoxRunner(_RunnerBase):
|
|
126
|
+
def __init__(
|
|
127
|
+
self,
|
|
128
|
+
*,
|
|
129
|
+
browser: dict[str, Any] | None = None,
|
|
130
|
+
context: dict[str, Any] | None = None,
|
|
131
|
+
span: Span | None = None,
|
|
132
|
+
) -> None:
|
|
133
|
+
super().__init__(browser=browser, context=context, span=span)
|
|
134
|
+
self._fox_stack: ExitStack | None = None
|
|
135
|
+
|
|
136
|
+
def __enter__(self) -> Self:
|
|
137
|
+
self._active = True
|
|
138
|
+
return self
|
|
139
|
+
|
|
140
|
+
def __exit__(
|
|
141
|
+
self,
|
|
142
|
+
exc_type: type[BaseException] | None,
|
|
143
|
+
exc: BaseException | None,
|
|
144
|
+
tb: TracebackType | None,
|
|
145
|
+
) -> None:
|
|
146
|
+
if not self._active:
|
|
147
|
+
return
|
|
148
|
+
self._close_browser()
|
|
149
|
+
self._active = False
|
|
150
|
+
self._i = 0
|
|
151
|
+
|
|
152
|
+
def _open_browser(self) -> None:
|
|
153
|
+
self._fox_stack = ExitStack()
|
|
154
|
+
self._browser = self._fox_stack.enter_context(Camoufox(**self._browser_kw))
|
|
155
|
+
self._open_context()
|
|
156
|
+
|
|
157
|
+
def _close_browser(self) -> None:
|
|
158
|
+
self._close_context()
|
|
159
|
+
if self._fox_stack:
|
|
160
|
+
self._fox_stack.close()
|
|
161
|
+
self._fox_stack = None
|
|
162
|
+
self._browser = None
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def run_patchright(
|
|
166
|
+
*,
|
|
167
|
+
browser: dict[str, Any] | None = None,
|
|
168
|
+
context: dict[str, Any] | None = None,
|
|
169
|
+
span: Span | None = None,
|
|
170
|
+
) -> PatchrightRunner:
|
|
171
|
+
return PatchrightRunner(browser=browser, context=context, span=span)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def run_camoufox(
|
|
175
|
+
*,
|
|
176
|
+
browser: dict[str, Any] | None = None,
|
|
177
|
+
context: dict[str, Any] | None = None,
|
|
178
|
+
span: Span | None = None,
|
|
179
|
+
) -> CamoufoxRunner:
|
|
180
|
+
return CamoufoxRunner(browser=browser, context=context, span=span)
|