litescrape 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
litescrape/__init__.py ADDED
@@ -0,0 +1,41 @@
1
+ from .core import (
2
+ ElementHandle,
3
+ ElementScan,
4
+ Frame,
5
+ NodeScan,
6
+ Page,
7
+ Response,
8
+ LiteElement,
9
+ LiteElementGroup,
10
+ LiteFrame,
11
+ LiteShadowRoot,
12
+ LiteNode,
13
+ LiteNodeGroup,
14
+ LitePage,
15
+ LiteParser,
16
+ lite_node,
17
+ lite_node_group,
18
+ lite_page,
19
+ lite_parser,
20
+ )
21
+
22
+ __all__ = [
23
+ "Page",
24
+ "ElementHandle",
25
+ "Frame",
26
+ "Response",
27
+ "lite_page",
28
+ "lite_parser",
29
+ "lite_node",
30
+ "lite_node_group",
31
+ "LitePage",
32
+ "LiteFrame",
33
+ "LiteShadowRoot",
34
+ "LiteElement",
35
+ "LiteElementGroup",
36
+ "ElementScan",
37
+ "LiteParser",
38
+ "LiteNode",
39
+ "LiteNodeGroup",
40
+ "NodeScan",
41
+ ]
litescrape/browser.py ADDED
@@ -0,0 +1,180 @@
1
+ from contextlib import ExitStack
2
+ from dataclasses import dataclass, fields
3
+ from types import TracebackType
4
+ from typing import Any, Self
5
+
6
+ from camoufox.sync_api import Camoufox
7
+ from patchright.sync_api import (
8
+ Page as PatchrightPage,
9
+ Playwright,
10
+ sync_playwright,
11
+ )
12
+ from playwright.sync_api import Page as PlaywrightPage
13
+
14
+ Page = PatchrightPage | PlaywrightPage
15
+
16
+
17
+ @dataclass(frozen=True, slots=True)
18
+ class Span:
19
+ browser: int | None = None
20
+ context: int | None = None
21
+ page: int | None = None
22
+
23
+ def __post_init__(self) -> None:
24
+ for f in fields(self):
25
+ value = getattr(self, f.name)
26
+ if value is not None and value < 1:
27
+ raise ValueError(f'{f.name} は 1 以上で指定してください (got {value})')
28
+
29
+
30
+ class _RunnerBase:
31
+ def __init__(
32
+ self,
33
+ *,
34
+ browser: dict[str, Any] | None = None,
35
+ context: dict[str, Any] | None = None,
36
+ span: Span | None = None,
37
+ ) -> None:
38
+ self._span = span or Span()
39
+ self._browser_kw = dict(browser or {})
40
+ self._context_kw = dict(context or {})
41
+ self._browser = None
42
+ self._ctx = None
43
+ self._page: Page | None = None
44
+ self._i = 0
45
+ self._active = False
46
+
47
+ def page(self) -> Page:
48
+ if not self._active:
49
+ raise RuntimeError('with ブロックの外で page() を呼べません')
50
+ if self._page is None:
51
+ self._open_browser()
52
+ elif (b := self._span.browser) and self._i % b == 0:
53
+ self._close_browser()
54
+ self._open_browser()
55
+ elif (c := self._span.context) and self._i % c == 0:
56
+ self._close_context()
57
+ self._open_context()
58
+ elif (p := self._span.page) and self._i % p == 0:
59
+ self._close_page()
60
+ self._open_page()
61
+ self._i += 1
62
+ return self._page
63
+
64
+ def _open_page(self) -> None:
65
+ self._page = self._ctx.new_page()
66
+
67
+ def _close_page(self) -> None:
68
+ if self._page:
69
+ self._page.close()
70
+ self._page = None
71
+
72
+ def _open_context(self) -> None:
73
+ self._ctx = self._browser.new_context(**self._context_kw)
74
+ self._open_page()
75
+
76
+ def _close_context(self) -> None:
77
+ self._close_page()
78
+ if self._ctx:
79
+ self._ctx.close()
80
+ self._ctx = None
81
+
82
+
83
+ class PatchrightRunner(_RunnerBase):
84
+ def __init__(
85
+ self,
86
+ *,
87
+ browser: dict[str, Any] | None = None,
88
+ context: dict[str, Any] | None = None,
89
+ span: Span | None = None,
90
+ ) -> None:
91
+ super().__init__(browser=browser, context=context, span=span)
92
+ self._pw: Playwright | None = None
93
+
94
+ def __enter__(self) -> Self:
95
+ self._pw = sync_playwright().start()
96
+ self._active = True
97
+ return self
98
+
99
+ def __exit__(
100
+ self,
101
+ exc_type: type[BaseException] | None,
102
+ exc: BaseException | None,
103
+ tb: TracebackType | None,
104
+ ) -> None:
105
+ if not self._active:
106
+ return
107
+ self._close_browser()
108
+ if self._pw:
109
+ self._pw.stop()
110
+ self._pw = None
111
+ self._active = False
112
+ self._i = 0
113
+
114
+ def _open_browser(self) -> None:
115
+ self._browser = self._pw.chromium.launch(**self._browser_kw)
116
+ self._open_context()
117
+
118
+ def _close_browser(self) -> None:
119
+ self._close_context()
120
+ if self._browser:
121
+ self._browser.close()
122
+ self._browser = None
123
+
124
+
125
+ class CamoufoxRunner(_RunnerBase):
126
+ def __init__(
127
+ self,
128
+ *,
129
+ browser: dict[str, Any] | None = None,
130
+ context: dict[str, Any] | None = None,
131
+ span: Span | None = None,
132
+ ) -> None:
133
+ super().__init__(browser=browser, context=context, span=span)
134
+ self._fox_stack: ExitStack | None = None
135
+
136
+ def __enter__(self) -> Self:
137
+ self._active = True
138
+ return self
139
+
140
+ def __exit__(
141
+ self,
142
+ exc_type: type[BaseException] | None,
143
+ exc: BaseException | None,
144
+ tb: TracebackType | None,
145
+ ) -> None:
146
+ if not self._active:
147
+ return
148
+ self._close_browser()
149
+ self._active = False
150
+ self._i = 0
151
+
152
+ def _open_browser(self) -> None:
153
+ self._fox_stack = ExitStack()
154
+ self._browser = self._fox_stack.enter_context(Camoufox(**self._browser_kw))
155
+ self._open_context()
156
+
157
+ def _close_browser(self) -> None:
158
+ self._close_context()
159
+ if self._fox_stack:
160
+ self._fox_stack.close()
161
+ self._fox_stack = None
162
+ self._browser = None
163
+
164
+
165
+ def run_patchright(
166
+ *,
167
+ browser: dict[str, Any] | None = None,
168
+ context: dict[str, Any] | None = None,
169
+ span: Span | None = None,
170
+ ) -> PatchrightRunner:
171
+ return PatchrightRunner(browser=browser, context=context, span=span)
172
+
173
+
174
+ def run_camoufox(
175
+ *,
176
+ browser: dict[str, Any] | None = None,
177
+ context: dict[str, Any] | None = None,
178
+ span: Span | None = None,
179
+ ) -> CamoufoxRunner:
180
+ return CamoufoxRunner(browser=browser, context=context, span=span)