quickquery 0.1.1__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {quickquery-0.1.1 → quickquery-0.1.2}/PKG-INFO +7 -57
- {quickquery-0.1.1 → quickquery-0.1.2}/README.md +6 -56
- {quickquery-0.1.1 → quickquery-0.1.2}/pyproject.toml +1 -1
- {quickquery-0.1.1 → quickquery-0.1.2}/quickquery/utils.py +3 -3
- {quickquery-0.1.1 → quickquery-0.1.2}/.gitignore +0 -0
- {quickquery-0.1.1 → quickquery-0.1.2}/.python-version +0 -0
- {quickquery-0.1.1 → quickquery-0.1.2}/LICENSE +0 -0
- {quickquery-0.1.1 → quickquery-0.1.2}/quickquery/__init__.py +0 -0
- {quickquery-0.1.1 → quickquery-0.1.2}/quickquery/core.py +0 -0
- {quickquery-0.1.1 → quickquery-0.1.2}/quickquery/live.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: quickquery
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: 自分用・非汎用
|
|
5
5
|
Requires-Python: >=3.12
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -13,66 +13,14 @@ Requires-Dist: camoufox>=0.4
|
|
|
13
13
|
Requires-Dist: loguru>=0.7
|
|
14
14
|
Requires-Dist: tqdm>=4.66
|
|
15
15
|
|
|
16
|
-
# QuickQuery
|
|
17
|
-
|
|
18
16
|
自分用・非汎用
|
|
19
17
|
|
|
20
18
|
## インストール
|
|
21
19
|
`uv add quickquery`
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
## 実装機能
|
|
27
|
-
|
|
28
|
-
### quickquery
|
|
29
|
-
|
|
30
|
-
- `quick_page(page: Page) -> QuickPage`
|
|
31
|
-
- `quick_element(page: Page, elem: ElementHandle | None) -> QuickElement`
|
|
32
|
-
- `quick_element_group(page: Page, elems: list[QuickElement]) -> QuickElementGroup`
|
|
33
|
-
- `quick_frame(page: Page, frame: Frame | None) -> QuickFrame`
|
|
34
|
-
- `quick_shadow_root(page: Page, host: ElementHandle | None) -> QuickShadowRoot`
|
|
35
|
-
- `quick_parser(parser: LexborHTMLParser) -> QuickParser`
|
|
36
|
-
- `quick_node(node: LexborNode | None) -> QuickNode`
|
|
37
|
-
- `quick_node_group(nodes: list[QuickNode]) -> QuickNodeGroup`
|
|
38
|
-
- `QuickPage`
|
|
39
|
-
- `QuickElement`
|
|
40
|
-
- `QuickElementGroup`
|
|
41
|
-
- `ElementScan`
|
|
42
|
-
- `QuickFrame`
|
|
43
|
-
- `QuickShadowRoot`
|
|
44
|
-
- `QuickParser`
|
|
45
|
-
- `QuickNode`
|
|
46
|
-
- `QuickNodeGroup`
|
|
47
|
-
- `NodeScan`
|
|
48
|
-
|
|
49
|
-
### quickquery.utils
|
|
50
|
-
|
|
51
|
-
- `parse_html(path: Path) -> LexborHTMLParser | None`
|
|
52
|
-
- `meta_html(meta: Mapping[str, object | None]) -> str`
|
|
53
|
-
- `from_here(file: str) -> Callable[[str], Path]`
|
|
54
|
-
- `append_csv(path: Path, row: dict) -> None`
|
|
55
|
-
- `write_csv(path: Path, rows: list[dict]) -> None`
|
|
56
|
-
- `write_parquet(path: Path, rows: list[dict]) -> None`
|
|
57
|
-
- `hash_name(key: str) -> str`
|
|
58
|
-
- `write_text(path: Path, data: str) -> bool`
|
|
59
|
-
- `write_bytes(path: Path, data: bytes) -> bool`
|
|
60
|
-
- `save_log(path: Path, level: str = 'WARNING') -> None`
|
|
61
|
-
- `process_map[T, R](worker: Callable[[T], R], items: Iterable[T], workers: int | None = None, *, chunksize: int | None = None) -> list[R | None]`
|
|
62
|
-
- `glob_paths(dir_path: Path, pattern: str = '*.html') -> list[str]`
|
|
63
|
-
- `counter(start: int = 1) -> Iterator[int]`
|
|
64
|
-
|
|
65
|
-
### quickquery.live
|
|
66
|
-
|
|
67
|
-
- `RecycleEvery`
|
|
68
|
-
- `PatchrightSession`
|
|
69
|
-
- `CamoufoxSession`
|
|
70
|
-
- `open_patchright(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> PatchrightSession`
|
|
71
|
-
- `open_camoufox(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> CamoufoxSession`
|
|
72
|
-
- `PatchrightSession.page() -> Page`
|
|
73
|
-
- `CamoufoxSession.page() -> Page`
|
|
74
|
-
|
|
75
|
-
`browser_options` / `context_options` は Playwright へ渡す起動オプション。`recycle` は quickquery の再生成間隔(`page()` 呼び出し回数ごとに独立して効く。省略時は再生成しない)。`page()` を呼ぶたびに内部カウントが 1 進む。
|
|
21
|
+
`open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
|
|
22
|
+
`open_camoufox` を使うとき:`uv run camoufox fetch`
|
|
23
|
+
|
|
76
24
|
|
|
77
25
|
## 使用例
|
|
78
26
|
|
|
@@ -163,8 +111,10 @@ with open_patchright(
|
|
|
163
111
|
append_csv(here('csv/failed.csv'), {
|
|
164
112
|
'url_index': url_index,
|
|
165
113
|
'request_url': request_url,
|
|
114
|
+
'final_url': page.url,
|
|
166
115
|
'reason': 'write_text',
|
|
167
116
|
})
|
|
117
|
+
continue
|
|
168
118
|
|
|
169
119
|
page.screenshot(path=here(f'media/{url_index}-full-page.png'), full_page=True)
|
|
170
120
|
|
|
@@ -198,7 +148,7 @@ def main():
|
|
|
198
148
|
write_parquet(here('parquet/extract.parquet'), results)
|
|
199
149
|
|
|
200
150
|
def extract(file_path: str) -> dict | None:
|
|
201
|
-
if not (parser := parse_html(Path(file_path))):
|
|
151
|
+
if not (parser := parse_html(Path(file_path).read_bytes())):
|
|
202
152
|
return None
|
|
203
153
|
p = quick_parser(parser)
|
|
204
154
|
dt_scan = p.ii('dt').scan
|
|
@@ -1,63 +1,11 @@
|
|
|
1
|
-
# QuickQuery
|
|
2
|
-
|
|
3
1
|
自分用・非汎用
|
|
4
2
|
|
|
5
3
|
## インストール
|
|
6
4
|
`uv add quickquery`
|
|
7
5
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
## 実装機能
|
|
12
|
-
|
|
13
|
-
### quickquery
|
|
14
|
-
|
|
15
|
-
- `quick_page(page: Page) -> QuickPage`
|
|
16
|
-
- `quick_element(page: Page, elem: ElementHandle | None) -> QuickElement`
|
|
17
|
-
- `quick_element_group(page: Page, elems: list[QuickElement]) -> QuickElementGroup`
|
|
18
|
-
- `quick_frame(page: Page, frame: Frame | None) -> QuickFrame`
|
|
19
|
-
- `quick_shadow_root(page: Page, host: ElementHandle | None) -> QuickShadowRoot`
|
|
20
|
-
- `quick_parser(parser: LexborHTMLParser) -> QuickParser`
|
|
21
|
-
- `quick_node(node: LexborNode | None) -> QuickNode`
|
|
22
|
-
- `quick_node_group(nodes: list[QuickNode]) -> QuickNodeGroup`
|
|
23
|
-
- `QuickPage`
|
|
24
|
-
- `QuickElement`
|
|
25
|
-
- `QuickElementGroup`
|
|
26
|
-
- `ElementScan`
|
|
27
|
-
- `QuickFrame`
|
|
28
|
-
- `QuickShadowRoot`
|
|
29
|
-
- `QuickParser`
|
|
30
|
-
- `QuickNode`
|
|
31
|
-
- `QuickNodeGroup`
|
|
32
|
-
- `NodeScan`
|
|
33
|
-
|
|
34
|
-
### quickquery.utils
|
|
35
|
-
|
|
36
|
-
- `parse_html(path: Path) -> LexborHTMLParser | None`
|
|
37
|
-
- `meta_html(meta: Mapping[str, object | None]) -> str`
|
|
38
|
-
- `from_here(file: str) -> Callable[[str], Path]`
|
|
39
|
-
- `append_csv(path: Path, row: dict) -> None`
|
|
40
|
-
- `write_csv(path: Path, rows: list[dict]) -> None`
|
|
41
|
-
- `write_parquet(path: Path, rows: list[dict]) -> None`
|
|
42
|
-
- `hash_name(key: str) -> str`
|
|
43
|
-
- `write_text(path: Path, data: str) -> bool`
|
|
44
|
-
- `write_bytes(path: Path, data: bytes) -> bool`
|
|
45
|
-
- `save_log(path: Path, level: str = 'WARNING') -> None`
|
|
46
|
-
- `process_map[T, R](worker: Callable[[T], R], items: Iterable[T], workers: int | None = None, *, chunksize: int | None = None) -> list[R | None]`
|
|
47
|
-
- `glob_paths(dir_path: Path, pattern: str = '*.html') -> list[str]`
|
|
48
|
-
- `counter(start: int = 1) -> Iterator[int]`
|
|
49
|
-
|
|
50
|
-
### quickquery.live
|
|
51
|
-
|
|
52
|
-
- `RecycleEvery`
|
|
53
|
-
- `PatchrightSession`
|
|
54
|
-
- `CamoufoxSession`
|
|
55
|
-
- `open_patchright(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> PatchrightSession`
|
|
56
|
-
- `open_camoufox(*, browser_options: dict | None = None, context_options: dict | None = None, recycle: RecycleEvery | None = None) -> CamoufoxSession`
|
|
57
|
-
- `PatchrightSession.page() -> Page`
|
|
58
|
-
- `CamoufoxSession.page() -> Page`
|
|
59
|
-
|
|
60
|
-
`browser_options` / `context_options` は Playwright へ渡す起動オプション。`recycle` は quickquery の再生成間隔(`page()` 呼び出し回数ごとに独立して効く。省略時は再生成しない)。`page()` を呼ぶたびに内部カウントが 1 進む。
|
|
6
|
+
`open_patchright` を使うとき:Google ChromeをPCにインストールしておく。
|
|
7
|
+
`open_camoufox` を使うとき:`uv run camoufox fetch`
|
|
8
|
+
|
|
61
9
|
|
|
62
10
|
## 使用例
|
|
63
11
|
|
|
@@ -148,8 +96,10 @@ with open_patchright(
|
|
|
148
96
|
append_csv(here('csv/failed.csv'), {
|
|
149
97
|
'url_index': url_index,
|
|
150
98
|
'request_url': request_url,
|
|
99
|
+
'final_url': page.url,
|
|
151
100
|
'reason': 'write_text',
|
|
152
101
|
})
|
|
102
|
+
continue
|
|
153
103
|
|
|
154
104
|
page.screenshot(path=here(f'media/{url_index}-full-page.png'), full_page=True)
|
|
155
105
|
|
|
@@ -183,7 +133,7 @@ def main():
|
|
|
183
133
|
write_parquet(here('parquet/extract.parquet'), results)
|
|
184
134
|
|
|
185
135
|
def extract(file_path: str) -> dict | None:
|
|
186
|
-
if not (parser := parse_html(Path(file_path))):
|
|
136
|
+
if not (parser := parse_html(Path(file_path).read_bytes())):
|
|
187
137
|
return None
|
|
188
138
|
p = quick_parser(parser)
|
|
189
139
|
dt_scan = p.ii('dt').scan
|
|
@@ -16,11 +16,11 @@ def _ensure_parent(path: Path) -> None:
|
|
|
16
16
|
path.parent.mkdir(parents=True, exist_ok=True)
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def parse_html(
|
|
19
|
+
def parse_html(html: str | bytes) -> LexborHTMLParser | None:
|
|
20
20
|
try:
|
|
21
|
-
return LexborHTMLParser(
|
|
21
|
+
return LexborHTMLParser(html)
|
|
22
22
|
except Exception as e:
|
|
23
|
-
logger.error(f'[parse_html] {
|
|
23
|
+
logger.error(f'[parse_html] {type(e).__name__}: {e}')
|
|
24
24
|
return None
|
|
25
25
|
|
|
26
26
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|