kabigon 0.6.1__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/__init__.py +2 -0
- kabigon/cli.py +10 -6
- kabigon/compose.py +0 -29
- kabigon/errors.py +5 -0
- kabigon/httpx.py +5 -8
- kabigon/ptt.py +32 -0
- kabigon/twitter.py +46 -0
- {kabigon-0.6.1.dist-info → kabigon-0.8.0.dist-info}/METADATA +4 -4
- kabigon-0.8.0.dist-info/RECORD +21 -0
- kabigon-0.6.1.dist-info/RECORD +0 -19
- {kabigon-0.6.1.dist-info → kabigon-0.8.0.dist-info}/WHEEL +0 -0
- {kabigon-0.6.1.dist-info → kabigon-0.8.0.dist-info}/entry_points.txt +0 -0
- {kabigon-0.6.1.dist-info → kabigon-0.8.0.dist-info}/licenses/LICENSE +0 -0
kabigon/__init__.py
CHANGED
@@ -10,7 +10,9 @@ from .httpx import HttpxLoader
|
|
10
10
|
from .loader import Loader
|
11
11
|
from .pdf import PDFLoader
|
12
12
|
from .playwright import PlaywrightLoader
|
13
|
+
from .ptt import PttLoader
|
13
14
|
from .reel import ReelLoader
|
15
|
+
from .twitter import TwitterLoader
|
14
16
|
from .youtube import YoutubeLoader
|
15
17
|
from .ytdlp import YtdlpLoader
|
16
18
|
|
kabigon/cli.py
CHANGED
@@ -1,27 +1,31 @@
|
|
1
|
-
import
|
1
|
+
import typer
|
2
2
|
from rich import print
|
3
3
|
|
4
4
|
from .compose import Compose
|
5
|
-
from .httpx import HttpxLoader
|
6
5
|
from .pdf import PDFLoader
|
7
6
|
from .playwright import PlaywrightLoader
|
7
|
+
from .ptt import PttLoader
|
8
8
|
from .reel import ReelLoader
|
9
|
+
from .twitter import TwitterLoader
|
9
10
|
from .youtube import YoutubeLoader
|
10
11
|
from .ytdlp import YtdlpLoader
|
11
12
|
|
12
13
|
|
13
|
-
|
14
|
-
@click.argument("url", type=click.STRING)
|
15
|
-
def main(url: str) -> None:
|
14
|
+
def run(url: str) -> None:
|
16
15
|
loader = Compose(
|
17
16
|
[
|
17
|
+
PttLoader(),
|
18
|
+
TwitterLoader(),
|
18
19
|
YoutubeLoader(),
|
19
20
|
ReelLoader(),
|
20
21
|
YtdlpLoader(),
|
21
22
|
PDFLoader(),
|
22
|
-
HttpxLoader(),
|
23
23
|
PlaywrightLoader(),
|
24
24
|
]
|
25
25
|
)
|
26
26
|
result = loader.load(url)
|
27
27
|
print(result)
|
28
|
+
|
29
|
+
|
30
|
+
def main() -> None:
|
31
|
+
typer.run(run)
|
kabigon/compose.py
CHANGED
@@ -1,41 +1,14 @@
|
|
1
|
-
from urllib.parse import urlparse
|
2
|
-
from urllib.parse import urlunparse
|
3
|
-
|
4
1
|
from loguru import logger
|
5
2
|
|
6
3
|
from .errors import KabigonError
|
7
4
|
from .loader import Loader
|
8
5
|
|
9
|
-
REPLACEMENTS = {
|
10
|
-
# fixupx.com seems better than api.fxtwitter.com
|
11
|
-
"fixupx.com": [
|
12
|
-
"twitter.com",
|
13
|
-
"x.com",
|
14
|
-
"fxtwitter.com",
|
15
|
-
"vxtwitter.com",
|
16
|
-
"fixvx.com",
|
17
|
-
"twittpr.com",
|
18
|
-
"api.fxtwitter.com",
|
19
|
-
]
|
20
|
-
}
|
21
|
-
|
22
|
-
|
23
|
-
def replace_domain(url: str) -> str:
|
24
|
-
parsed = urlparse(url)
|
25
|
-
for target, source in REPLACEMENTS.items():
|
26
|
-
if parsed.netloc in source:
|
27
|
-
fixed_url = parsed._replace(netloc=target)
|
28
|
-
return urlunparse(fixed_url)
|
29
|
-
return url
|
30
|
-
|
31
6
|
|
32
7
|
class Compose(Loader):
|
33
8
|
def __init__(self, loaders: list[Loader]) -> None:
|
34
9
|
self.loaders = loaders
|
35
10
|
|
36
11
|
def load(self, url: str) -> str:
|
37
|
-
url = replace_domain(url)
|
38
|
-
|
39
12
|
for loader in self.loaders:
|
40
13
|
try:
|
41
14
|
content = loader.load(url)
|
@@ -53,8 +26,6 @@ class Compose(Loader):
|
|
53
26
|
raise KabigonError(f"Failed to load URL: {url}")
|
54
27
|
|
55
28
|
async def async_load(self, url: str) -> str:
|
56
|
-
url = replace_domain(url)
|
57
|
-
|
58
29
|
for loader in self.loaders:
|
59
30
|
try:
|
60
31
|
content = await loader.async_load(url)
|
kabigon/errors.py
CHANGED
@@ -21,3 +21,8 @@ class NotPDFError(KabigonError):
|
|
21
21
|
class NotReelURLError(KabigonError):
|
22
22
|
def __init__(self, url: str):
|
23
23
|
super().__init__(f"URL is not an Instagram Reel: {url}")
|
24
|
+
|
25
|
+
|
26
|
+
class NotTwitterURLError(KabigonError):
|
27
|
+
def __init__(self, url: str):
|
28
|
+
super().__init__(f"URL is not a Twitter URL: {url}")
|
kabigon/httpx.py
CHANGED
@@ -3,21 +3,18 @@ import httpx
|
|
3
3
|
from .loader import Loader
|
4
4
|
from .utils import html_to_markdown
|
5
5
|
|
6
|
-
DEFAULT_HEADERS = {
|
7
|
-
"Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
|
8
|
-
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", # noqa
|
9
|
-
"Cookie": "over18=1", # Required for some sites like PTT
|
10
|
-
}
|
11
|
-
|
12
6
|
|
13
7
|
class HttpxLoader(Loader):
|
8
|
+
def __init__(self, headers: dict[str, str] | None = None) -> None:
|
9
|
+
self.headers = headers
|
10
|
+
|
14
11
|
def load(self, url: str) -> str:
|
15
|
-
response = httpx.get(url, headers=
|
12
|
+
response = httpx.get(url, headers=self.headers, follow_redirects=False)
|
16
13
|
response.raise_for_status()
|
17
14
|
return html_to_markdown(response.content)
|
18
15
|
|
19
16
|
async def async_load(self, url: str) -> str:
|
20
17
|
async with httpx.AsyncClient() as client:
|
21
|
-
response = await client.get(url, headers=
|
18
|
+
response = await client.get(url, headers=self.headers, follow_redirects=True)
|
22
19
|
response.raise_for_status()
|
23
20
|
return html_to_markdown(response.content)
|
kabigon/ptt.py
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
from urllib.parse import urlparse
|
2
|
+
|
3
|
+
from .errors import NotTwitterURLError
|
4
|
+
from .httpx import HttpxLoader
|
5
|
+
from .loader import Loader
|
6
|
+
|
7
|
+
|
8
|
+
def is_ptt_url(url: str) -> bool:
|
9
|
+
return urlparse(url).netloc == "www.ptt.cc"
|
10
|
+
|
11
|
+
|
12
|
+
class PttLoader(Loader):
|
13
|
+
def __init__(self) -> None:
|
14
|
+
self.httpx_loader = HttpxLoader(
|
15
|
+
headers={
|
16
|
+
"Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
|
17
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", # noqa
|
18
|
+
"Cookie": "over18=1",
|
19
|
+
}
|
20
|
+
)
|
21
|
+
|
22
|
+
def load(self, url: str) -> str:
|
23
|
+
if not is_ptt_url(url):
|
24
|
+
raise NotTwitterURLError(url)
|
25
|
+
|
26
|
+
return self.httpx_loader.load(url)
|
27
|
+
|
28
|
+
async def async_load(self, url: str):
|
29
|
+
if not is_ptt_url(url):
|
30
|
+
raise NotTwitterURLError(url)
|
31
|
+
|
32
|
+
return await self.httpx_loader.async_load(url)
|
kabigon/twitter.py
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
from urllib.parse import urlparse
|
2
|
+
from urllib.parse import urlunparse
|
3
|
+
|
4
|
+
from .errors import NotTwitterURLError
|
5
|
+
from .loader import Loader
|
6
|
+
from .playwright import PlaywrightLoader
|
7
|
+
|
8
|
+
TWITTER_DOMAINS = [
|
9
|
+
"twitter.com",
|
10
|
+
"x.com",
|
11
|
+
"fxtwitter.com",
|
12
|
+
"vxtwitter.com",
|
13
|
+
"fixvx.com",
|
14
|
+
"twittpr.com",
|
15
|
+
"api.fxtwitter.com",
|
16
|
+
"fixupx.com",
|
17
|
+
]
|
18
|
+
|
19
|
+
|
20
|
+
def replace_domain(url: str, new_domain: str = "x.com") -> str:
|
21
|
+
return urlunparse(urlparse(url)._replace(netloc=new_domain))
|
22
|
+
|
23
|
+
|
24
|
+
def is_x_url(url: str) -> bool:
|
25
|
+
return urlparse(url).netloc in TWITTER_DOMAINS
|
26
|
+
|
27
|
+
|
28
|
+
class TwitterLoader(Loader):
|
29
|
+
def __init__(self) -> None:
|
30
|
+
self.playwright_loader = PlaywrightLoader(wait_until="networkidle")
|
31
|
+
|
32
|
+
def load(self, url: str) -> str:
|
33
|
+
if not is_x_url(url):
|
34
|
+
raise NotTwitterURLError(url)
|
35
|
+
|
36
|
+
url = replace_domain(url)
|
37
|
+
|
38
|
+
return self.playwright_loader.load(url)
|
39
|
+
|
40
|
+
async def async_load(self, url: str):
|
41
|
+
if not is_x_url(url):
|
42
|
+
raise NotTwitterURLError(url)
|
43
|
+
|
44
|
+
url = replace_domain(url)
|
45
|
+
|
46
|
+
return await self.playwright_loader.async_load(url)
|
@@ -1,22 +1,22 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kabigon
|
3
|
-
Version: 0.
|
3
|
+
Version: 0.8.0
|
4
4
|
Author-email: narumi <toucans-cutouts0f@icloud.com>
|
5
5
|
License-File: LICENSE
|
6
6
|
Requires-Python: >=3.10
|
7
7
|
Requires-Dist: aioytt>=0.2.4
|
8
|
-
Requires-Dist: click>=8.1.8
|
9
8
|
Requires-Dist: firecrawl-py>=2.4.1
|
10
9
|
Requires-Dist: httpx>=0.28.1
|
11
10
|
Requires-Dist: loguru>=0.7.3
|
12
11
|
Requires-Dist: markdownify>=0.14.1
|
13
12
|
Requires-Dist: openai-whisper>=20240930
|
14
|
-
Requires-Dist: playwright>=1.
|
13
|
+
Requires-Dist: playwright>=1.52.0
|
15
14
|
Requires-Dist: pypdf>=5.3.0
|
16
15
|
Requires-Dist: rich>=13.9.4
|
17
16
|
Requires-Dist: timeout-decorator>=0.5.0
|
17
|
+
Requires-Dist: typer>=0.15.3
|
18
18
|
Requires-Dist: youtube-transcript-api>=0.6.3
|
19
|
-
Requires-Dist: yt-dlp>=2025.
|
19
|
+
Requires-Dist: yt-dlp>=2025.4.30
|
20
20
|
Description-Content-Type: text/markdown
|
21
21
|
|
22
22
|
# kabigon
|
@@ -0,0 +1,21 @@
|
|
1
|
+
kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
|
2
|
+
kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
|
3
|
+
kabigon/compose.py,sha256=l2D5OK91VcN2a6DbjMdwBk3YSqzVV7fOVX0TqNm2gJo,1502
|
4
|
+
kabigon/errors.py,sha256=iri_YS71UsOHwaVtfy5IA6iUfq30DCsptZsChmZaZic,755
|
5
|
+
kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
|
6
|
+
kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
|
7
|
+
kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
|
8
|
+
kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
|
9
|
+
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
10
|
+
kabigon/ptt.py,sha256=S2d6SeFGxM4E4kg-n5blN2BX56CWj_eOhapJxzUnxu8,965
|
11
|
+
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
12
|
+
kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
|
13
|
+
kabigon/twitter.py,sha256=U07pa8xA0nHAaaDPeUelQRvXR5ZnUvYJZW35xRAvHA8,1114
|
14
|
+
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
15
|
+
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
16
|
+
kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
|
17
|
+
kabigon-0.8.0.dist-info/METADATA,sha256=SAazEEnqklluyiH2psGQwUI40ee400GELlcxv0OyvuU,1079
|
18
|
+
kabigon-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
19
|
+
kabigon-0.8.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
20
|
+
kabigon-0.8.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
21
|
+
kabigon-0.8.0.dist-info/RECORD,,
|
kabigon-0.6.1.dist-info/RECORD
DELETED
@@ -1,19 +0,0 @@
|
|
1
|
-
kabigon/__init__.py,sha256=_1LdKfp7qm0m7Fa_IJ9zcg4TRB14c9bB-r9M3fwASWI,498
|
2
|
-
kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
|
3
|
-
kabigon/compose.py,sha256=5H_hWQ7ENUmEdeOpB2bkRv4U_U18_1IyNTran-cLYaM,2166
|
4
|
-
kabigon/errors.py,sha256=3eSPQtLocreKuq9mhwRk7IMdA2xh8KkmIFEUhGmTPxg,618
|
5
|
-
kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
|
6
|
-
kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
|
7
|
-
kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
|
8
|
-
kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
|
9
|
-
kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
|
10
|
-
kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
11
|
-
kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
|
12
|
-
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
13
|
-
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
14
|
-
kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
|
15
|
-
kabigon-0.6.1.dist-info/METADATA,sha256=78J35ClbTdy2-vU8GtlQWjgSTitntFI8J-Cz07FGqoo,1078
|
16
|
-
kabigon-0.6.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
-
kabigon-0.6.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
18
|
-
kabigon-0.6.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
19
|
-
kabigon-0.6.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|