quickquery 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quickquery
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: 自分用・非汎用
5
5
  Requires-Python: >=3.12
6
6
  Description-Content-Type: text/markdown
@@ -13,66 +13,14 @@ Requires-Dist: camoufox>=0.4
13
13
  Requires-Dist: loguru>=0.7
14
14
  Requires-Dist: tqdm>=4.66
15
15
 
16
- # QuickQuery
17
-
18
16
  自分用・非汎用
19
17
 
20
18
  ## インストール
21
19
  `uv add quickquery`
22
20
 
23
- `open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
24
- `open_camoufox` を使うとき:`uv run camoufox fetch`
25
-
26
- ## 実装機能
27
-
28
- ### quickquery
29
-
30
- - `quick_page(page: Page) -> QuickPage`
31
- - `quick_element(page: Page, elem: ElementHandle | None) -> QuickElement`
32
- - `quick_element_group(page: Page, elems: list[QuickElement]) -> QuickElementGroup`
33
- - `quick_frame(page: Page, frame: Frame | None) -> QuickFrame`
34
- - `quick_shadow_root(page: Page, host: ElementHandle | None) -> QuickShadowRoot`
35
- - `quick_parser(parser: LexborHTMLParser) -> QuickParser`
36
- - `quick_node(node: LexborNode | None) -> QuickNode`
37
- - `quick_node_group(nodes: list[QuickNode]) -> QuickNodeGroup`
38
- - `QuickPage`
39
- - `QuickElement`
40
- - `QuickElementGroup`
41
- - `ElementScan`
42
- - `QuickFrame`
43
- - `QuickShadowRoot`
44
- - `QuickParser`
45
- - `QuickNode`
46
- - `QuickNodeGroup`
47
- - `NodeScan`
48
-
49
- ### quickquery.utils
50
-
51
- - `parse_html(path: Path) -> LexborHTMLParser | None`
52
- - `meta_html(meta: Mapping[str, object | None]) -> str`
53
- - `from_here(file: str) -> Callable[[str], Path]`
54
- - `append_csv(path: Path, row: dict) -> None`
55
- - `write_csv(path: Path, rows: list[dict]) -> None`
56
- - `write_parquet(path: Path, rows: list[dict]) -> None`
57
- - `hash_name(key: str) -> str`
58
- - `write_text(path: Path, data: str) -> bool`
59
- - `write_bytes(path: Path, data: bytes) -> bool`
60
- - `save_log(path: Path, level: str = 'WARNING') -> None`
61
- - `process_map[T, R](worker: Callable[[T], R], items: Iterable[T], workers: int | None = None, *, chunksize: int | None = None) -> list[R | None]`
62
- - `glob_paths(dir_path: Path, pattern: str = '*.html') -> list[str]`
63
- - `counter(start: int = 1) -> Iterator[int]`
64
-
65
- ### quickquery.live
66
-
67
- - `RecycleEvery`
68
- - `PatchrightSession`
69
- - `CamoufoxSession`
70
- - `open_patchright(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> PatchrightSession`
71
- - `open_camoufox(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> CamoufoxSession`
72
- - `PatchrightSession.page() -> Page`
73
- - `CamoufoxSession.page() -> Page`
74
-
75
- `browser_options` / `context_options` は Playwright へ渡す起動オプション。`recycle` は quickquery の再生成間隔(`page()` 呼び出し回数ごとに独立して効く。省略時は再生成しない)。`page()` を呼ぶたびに内部カウントが 1 進む。
21
+ `open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
22
+ `open_camoufox` を使うとき:`uv run camoufox fetch`
23
+
76
24
 
77
25
  ## 使用例
78
26
 
@@ -163,8 +111,10 @@ with open_patchright(
163
111
  append_csv(here('csv/failed.csv'), {
164
112
  'url_index': url_index,
165
113
  'request_url': request_url,
114
+ 'final_url': page.url,
166
115
  'reason': 'write_text',
167
116
  })
117
+ continue
168
118
 
169
119
  page.screenshot(path=here(f'media/{url_index}-full-page.png'), full_page=True)
170
120
 
@@ -198,7 +148,7 @@ def main():
198
148
  write_parquet(here('parquet/extract.parquet'), results)
199
149
 
200
150
  def extract(file_path: str) -> dict | None:
201
- if not (parser := parse_html(Path(file_path))):
151
+ if not (parser := parse_html(Path(file_path).read_bytes())):
202
152
  return None
203
153
  p = quick_parser(parser)
204
154
  dt_scan = p.ii('dt').scan
@@ -1,63 +1,11 @@
1
- # QuickQuery
2
-
3
1
  自分用・非汎用
4
2
 
5
3
  ## インストール
6
4
  `uv add quickquery`
7
5
 
8
- `open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
9
- `open_camoufox` を使うとき:`uv run camoufox fetch`
10
-
11
- ## 実装機能
12
-
13
- ### quickquery
14
-
15
- - `quick_page(page: Page) -> QuickPage`
16
- - `quick_element(page: Page, elem: ElementHandle | None) -> QuickElement`
17
- - `quick_element_group(page: Page, elems: list[QuickElement]) -> QuickElementGroup`
18
- - `quick_frame(page: Page, frame: Frame | None) -> QuickFrame`
19
- - `quick_shadow_root(page: Page, host: ElementHandle | None) -> QuickShadowRoot`
20
- - `quick_parser(parser: LexborHTMLParser) -> QuickParser`
21
- - `quick_node(node: LexborNode | None) -> QuickNode`
22
- - `quick_node_group(nodes: list[QuickNode]) -> QuickNodeGroup`
23
- - `QuickPage`
24
- - `QuickElement`
25
- - `QuickElementGroup`
26
- - `ElementScan`
27
- - `QuickFrame`
28
- - `QuickShadowRoot`
29
- - `QuickParser`
30
- - `QuickNode`
31
- - `QuickNodeGroup`
32
- - `NodeScan`
33
-
34
- ### quickquery.utils
35
-
36
- - `parse_html(path: Path) -> LexborHTMLParser | None`
37
- - `meta_html(meta: Mapping[str, object | None]) -> str`
38
- - `from_here(file: str) -> Callable[[str], Path]`
39
- - `append_csv(path: Path, row: dict) -> None`
40
- - `write_csv(path: Path, rows: list[dict]) -> None`
41
- - `write_parquet(path: Path, rows: list[dict]) -> None`
42
- - `hash_name(key: str) -> str`
43
- - `write_text(path: Path, data: str) -> bool`
44
- - `write_bytes(path: Path, data: bytes) -> bool`
45
- - `save_log(path: Path, level: str = 'WARNING') -> None`
46
- - `process_map[T, R](worker: Callable[[T], R], items: Iterable[T], workers: int | None = None, *, chunksize: int | None = None) -> list[R | None]`
47
- - `glob_paths(dir_path: Path, pattern: str = '*.html') -> list[str]`
48
- - `counter(start: int = 1) -> Iterator[int]`
49
-
50
- ### quickquery.live
51
-
52
- - `RecycleEvery`
53
- - `PatchrightSession`
54
- - `CamoufoxSession`
55
- - `open_patchright(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> PatchrightSession`
56
- - `open_camoufox(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> CamoufoxSession`
57
- - `PatchrightSession.page() -> Page`
58
- - `CamoufoxSession.page() -> Page`
59
-
60
- `browser_options` / `context_options` は Playwright へ渡す起動オプション。`recycle` は quickquery の再生成間隔(`page()` 呼び出し回数ごとに独立して効く。省略時は再生成しない)。`page()` を呼ぶたびに内部カウントが 1 進む。
6
+ `open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
7
+ `open_camoufox` を使うとき:`uv run camoufox fetch`
8
+
61
9
 
62
10
  ## 使用例
63
11
 
@@ -148,8 +96,10 @@ with open_patchright(
148
96
  append_csv(here('csv/failed.csv'), {
149
97
  'url_index': url_index,
150
98
  'request_url': request_url,
99
+ 'final_url': page.url,
151
100
  'reason': 'write_text',
152
101
  })
102
+ continue
153
103
 
154
104
  page.screenshot(path=here(f'media/{url_index}-full-page.png'), full_page=True)
155
105
 
@@ -183,7 +133,7 @@ def main():
183
133
  write_parquet(here('parquet/extract.parquet'), results)
184
134
 
185
135
  def extract(file_path: str) -> dict | None:
186
- if not (parser := parse_html(Path(file_path))):
136
+ if not (parser := parse_html(Path(file_path).read_bytes())):
187
137
  return None
188
138
  p = quick_parser(parser)
189
139
  dt_scan = p.ii('dt').scan
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "quickquery"
3
- version = "0.1.1"
3
+ version = "0.1.3"
4
4
  description = "自分用・非汎用"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.12"
@@ -36,8 +36,8 @@ class _SessionBase:
36
36
  recycle: RecycleEvery | None = None,
37
37
  ) -> None:
38
38
  self._recycle = recycle or RecycleEvery()
39
- self._browser_options = dict(browser_options or {})
40
- self._context_options = dict(context_options or {})
39
+ self._browser_options = browser_options or {}
40
+ self._context_options = context_options or {}
41
41
  self._browser = None
42
42
  self._context = None
43
43
  self._page: Page | None = None
@@ -47,7 +47,7 @@ class _SessionBase:
47
47
  def page(self) -> Page:
48
48
  if not self._entered:
49
49
  raise RuntimeError('with ブロックの外で page() を呼べません')
50
- if self._page is None:
50
+ if self._browser is None:
51
51
  self._open_browser()
52
52
  elif (b := self._recycle.browser) and self._page_calls % b == 0:
53
53
  self._close_browser()
@@ -67,7 +67,7 @@ class _SessionBase:
67
67
  def _close_page(self) -> None:
68
68
  if self._page is not None:
69
69
  self._page.close()
70
- self._page = None
70
+ self._page = None
71
71
 
72
72
  def _open_context(self) -> None:
73
73
  self._context = self._browser.new_context(**self._context_options)
@@ -77,7 +77,7 @@ class _SessionBase:
77
77
  self._close_page()
78
78
  if self._context is not None:
79
79
  self._context.close()
80
- self._context = None
80
+ self._context = None
81
81
 
82
82
 
83
83
  class PatchrightSession(_SessionBase):
@@ -111,8 +111,8 @@ class PatchrightSession(_SessionBase):
111
111
  self._close_browser()
112
112
  self._pw.stop()
113
113
  self._pw = None
114
- self._entered = False
115
114
  self._page_calls = 0
115
+ self._entered = False
116
116
 
117
117
  def _open_browser(self) -> None:
118
118
  self._browser = self._pw.chromium.launch(**self._browser_options)
@@ -153,19 +153,20 @@ class CamoufoxSession(_SessionBase):
153
153
  if not self._entered:
154
154
  return
155
155
  self._close_browser()
156
- self._entered = False
157
156
  self._page_calls = 0
157
+ self._entered = False
158
158
 
159
159
  def _open_browser(self) -> None:
160
+ fox = Camoufox(**self._browser_options)
160
161
  self._stack = ExitStack()
161
- self._browser = self._stack.enter_context(Camoufox(**self._browser_options))
162
+ self._browser = self._stack.enter_context(fox)
162
163
  self._open_context()
163
164
 
164
165
  def _close_browser(self) -> None:
165
166
  self._close_context()
166
167
  if self._stack is not None:
167
168
  self._stack.close()
168
- self._stack = None
169
+ self._stack = None
169
170
  self._browser = None
170
171
 
171
172
 
@@ -16,11 +16,11 @@ def _ensure_parent(path: Path) -> None:
16
16
  path.parent.mkdir(parents=True, exist_ok=True)
17
17
 
18
18
 
19
- def parse_html(path: Path) -> LexborHTMLParser | None:
19
+ def parse_html(html: str | bytes) -> LexborHTMLParser | None:
20
20
  try:
21
- return LexborHTMLParser(path.read_bytes())
21
+ return LexborHTMLParser(html)
22
22
  except Exception as e:
23
- logger.error(f'[parse_html] {path} {type(e).__name__}: {e}')
23
+ logger.error(f'[parse_html] {type(e).__name__}: {e}')
24
24
  return None
25
25
 
26
26
 
File without changes
File without changes
File without changes