quickplay 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ __pycache__/
2
+ *.pyc
3
+ .venv/
4
+ .pytest_cache/
5
+ .mypy_cache/
6
+ .DS_Store
7
+ dist/
8
+ *.egg-info/
@@ -0,0 +1 @@
1
+ 3.12
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Nishizawa Takamasa
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.4
2
+ Name: quickplay
3
+ Version: 1.0.0
4
+ Summary: Add your description here
5
+ Requires-Python: >=3.12
6
+ Description-Content-Type: text/markdown
7
+ License-File: LICENSE
8
+
9
+ # quickplay
@@ -0,0 +1 @@
1
+ # quickplay
@@ -0,0 +1,7 @@
1
+ [project]
2
+ name = "quickplay"
3
+ version = "1.0.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = []
@@ -0,0 +1 @@
1
+ from .quickpage import PlayPage, BasePaths, sleep_between, append_csv, run_scraper
@@ -0,0 +1,165 @@
1
+ import random
2
+ import re
3
+ import time
4
+ import unicodedata as ud
5
+ from pathlib import Path
6
+ from typing import Callable
7
+
8
+ import pandas as pd
9
+ from playwright.sync_api import sync_playwright, Page, ElementHandle
10
+
11
+
12
+
13
+ class PlayPage:
14
+ def __init__(self, page: Page) -> None:
15
+ self._page = page
16
+
17
+ def first(self, elems: list[ElementHandle]) -> ElementHandle | None:
18
+ return elems[0] if elems else None
19
+
20
+ def re_filter(self, pattern: str, elems: list[ElementHandle]) -> list[ElementHandle]:
21
+ return [elem for elem in elems if (text := self.text_c(elem)) is not None and re.search(pattern, ud.normalize("NFKC", text))]
22
+
23
+ def ss(self, selector: str) -> list[ElementHandle]:
24
+ return self._page.query_selector_all(selector)
25
+
26
+ def s(self, selector: str) -> ElementHandle | None:
27
+ return self.first(self.ss(selector))
28
+
29
+ def ss_re(self, selector: str, pattern: str) -> list[ElementHandle]:
30
+ return self.re_filter(pattern, self.ss(selector))
31
+
32
+ def s_re(self, selector: str, pattern: str) -> ElementHandle | None:
33
+ return self.first(self.ss_re(selector, pattern))
34
+
35
+ def ss_in(self, selector: str, from_: ElementHandle | None) -> list[ElementHandle]:
36
+ return [] if from_ is None else from_.query_selector_all(selector)
37
+
38
+ def s_in(self, selector: str, from_: ElementHandle | None) -> ElementHandle | None:
39
+ return self.first(self.ss_in(selector, from_))
40
+
41
+ def ss_re_in(self, selector: str, pattern: str, from_: ElementHandle | None) -> list[ElementHandle]:
42
+ return self.re_filter(pattern, self.ss_in(selector, from_))
43
+
44
+ def s_re_in(self, selector: str, pattern: str, from_: ElementHandle | None) -> ElementHandle | None:
45
+ return self.first(self.ss_re_in(selector, pattern, from_))
46
+
47
+ def next(self, elem: ElementHandle | None) -> ElementHandle | None:
48
+ return None if elem is None else elem.evaluate_handle("el => el.nextElementSibling").as_element()
49
+
50
+ def text_c(self, elem: ElementHandle | None) -> str | None:
51
+ if elem is None:
52
+ return None
53
+ return text.strip() if (text := elem.evaluate("el => el.textContent")) else text
54
+
55
+ def i_text(self, elem: ElementHandle | None) -> str | None:
56
+ if elem is None:
57
+ return None
58
+ return text.strip() if (text := elem.evaluate("el => el.innerText")) else text
59
+
60
+ def attr(self, attr_name: str, elem: ElementHandle | None) -> str | None:
61
+ if elem is None:
62
+ return None
63
+ return attr.strip() if (attr := elem.get_attribute(attr_name)) else attr
64
+
65
+ def goto(self, url: str | None) -> bool:
66
+ if not url:
67
+ return False
68
+ try:
69
+ self._page.goto(url, wait_until="domcontentloaded")
70
+ return True
71
+ except Exception as e:
72
+ print(f"{type(e).__name__}: {e}")
73
+ return False
74
+
75
+ def wait(self, selector: str, timeout: int = 15000) -> ElementHandle | None:
76
+ try:
77
+ return self._page.wait_for_selector(selector, timeout=timeout)
78
+ except Exception as e:
79
+ print(f"{type(e).__name__}: {e}")
80
+ return None
81
+
82
+
83
+
84
+ class BasePaths:
85
+ """呼び出し元ファイルを基準にしたパス解決。
86
+
87
+ Usage:
88
+ paths = BasePaths(__file__)
89
+ csv_path = paths.from_here('data/out.csv')
90
+ """
91
+
92
+ def __init__(self, file: str) -> None:
93
+ self._base = Path(file).resolve().parent
94
+
95
+ def from_here(self, path: str) -> Path:
96
+ """baseを起点に連結した絶対Pathを返す。"""
97
+ return self._base / path
98
+
99
+
100
+ def sleep_between(a: float, b: float) -> None:
101
+ """a〜b秒のランダムスリープ。"""
102
+ time.sleep(random.uniform(a, b))
103
+
104
+
105
+ def append_csv(path: Path | str, row: dict) -> None:
106
+ """dictを1行としてCSVに追記する。ファイルがなければheaderも書く。"""
107
+ p = Path(path)
108
+ pd.DataFrame([row]).to_csv(
109
+ p,
110
+ mode='a',
111
+ index=False,
112
+ header=not p.exists(),
113
+ encoding='utf-8-sig',
114
+ )
115
+
116
+
117
+ def run_scraper(
118
+ fn: Callable[[Page], None],
119
+ *,
120
+ headless: bool = False,
121
+ channel: str = "chrome",
122
+ viewport: dict | None = {'width': 1920, 'height': 1080},
123
+ user_agent: str | None = None,
124
+ accept_language: str | None = "ja-JP,ja;q=0.9",
125
+ timeout: int = 15000,
126
+ block_resources: set[str] | None = None,
127
+ ) -> None:
128
+ """Playwrightの定型起動をまとめたランナー。
129
+
130
+ Args:
131
+ fn: scrape(page) のような関数を渡す。
132
+ headless: ヘッドレスモードにするか。
133
+ channel: ブラウザチャンネル("chrome" など)。
134
+ viewport: {'width': 1920, 'height': 1080} など。Noneなら未設定。
135
+ user_agent: User-Agent文字列。Noneなら未設定。
136
+ accept_language: Accept-Languageヘッダー。Noneなら未設定。
137
+ timeout: デフォルトタイムアウト(ミリ秒)。
138
+ block_resources: ブロックするリソースタイプ。例: {'image', 'font', 'media'}。
139
+
140
+ Usage:
141
+ run_scraper(scrape, user_agent='Mozilla/5.0 ...', block_resources={'image', 'font'})
142
+ """
143
+ context_kwargs: dict = {}
144
+ if viewport is not None:
145
+ context_kwargs['viewport'] = viewport
146
+ if user_agent is not None:
147
+ context_kwargs['user_agent'] = user_agent
148
+ if accept_language is not None:
149
+ context_kwargs['extra_http_headers'] = {'Accept-Language': accept_language}
150
+
151
+ with sync_playwright() as pw:
152
+ with pw.chromium.launch(headless=headless, channel=channel) as browser:
153
+ with browser.new_context(**context_kwargs) as context:
154
+ page = context.new_page()
155
+ page.set_default_timeout(timeout)
156
+
157
+ if block_resources:
158
+ def handler(route):
159
+ if route.request.resource_type in block_resources:
160
+ route.abort()
161
+ else:
162
+ route.continue_()
163
+ page.route('**/*', handler)
164
+
165
+ fn(page)