kabigon 0.7.0__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/__init__.py CHANGED
@@ -10,6 +10,7 @@ from .httpx import HttpxLoader
10
10
  from .loader import Loader
11
11
  from .pdf import PDFLoader
12
12
  from .playwright import PlaywrightLoader
13
+ from .ptt import PttLoader
13
14
  from .reel import ReelLoader
14
15
  from .twitter import TwitterLoader
15
16
  from .youtube import YoutubeLoader
kabigon/cli.py CHANGED
@@ -1,29 +1,31 @@
1
- import click
1
+ import typer
2
2
  from rich import print
3
3
 
4
4
  from .compose import Compose
5
- from .httpx import HttpxLoader
6
5
  from .pdf import PDFLoader
7
6
  from .playwright import PlaywrightLoader
7
+ from .ptt import PttLoader
8
8
  from .reel import ReelLoader
9
9
  from .twitter import TwitterLoader
10
10
  from .youtube import YoutubeLoader
11
11
  from .ytdlp import YtdlpLoader
12
12
 
13
13
 
14
- @click.command()
15
- @click.argument("url", type=click.STRING)
16
- def main(url: str) -> None:
14
+ def run(url: str) -> None:
17
15
  loader = Compose(
18
16
  [
17
+ PttLoader(),
19
18
  TwitterLoader(),
20
19
  YoutubeLoader(),
21
20
  ReelLoader(),
22
21
  YtdlpLoader(),
23
22
  PDFLoader(),
24
- HttpxLoader(),
25
23
  PlaywrightLoader(),
26
24
  ]
27
25
  )
28
26
  result = loader.load(url)
29
27
  print(result)
28
+
29
+
30
+ def main() -> None:
31
+ typer.run(run)
kabigon/compose.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from loguru import logger
2
2
 
3
- from .errors import KabigonError
4
3
  from .loader import Loader
5
4
 
6
5
 
@@ -23,7 +22,7 @@ class Compose(Loader):
23
22
  except Exception as e:
24
23
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
25
24
 
26
- raise KabigonError(f"Failed to load URL: {url}")
25
+ raise Exception(f"Failed to load URL: {url}")
27
26
 
28
27
  async def async_load(self, url: str) -> str:
29
28
  for loader in self.loaders:
@@ -40,4 +39,4 @@ class Compose(Loader):
40
39
  except Exception as e:
41
40
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
42
41
 
43
- raise KabigonError(f"Failed to load URL: {url}")
42
+ raise Exception(f"Failed to load URL: {url}")
kabigon/firecrawl.py CHANGED
@@ -2,8 +2,6 @@ import os
2
2
 
3
3
  from firecrawl import FirecrawlApp
4
4
 
5
- from .errors import FirecrawlError
6
- from .errors import FirecrawlKeyError
7
5
  from .loader import Loader
8
6
 
9
7
 
@@ -13,7 +11,7 @@ class FirecrawlLoader(Loader):
13
11
 
14
12
  api_key = os.getenv("FIRECRAWL_API_KEY")
15
13
  if not api_key:
16
- raise FirecrawlKeyError()
14
+ raise ValueError("FIRECRAWL_API_KEY is not set.")
17
15
 
18
16
  self.app = FirecrawlApp(api_key=api_key)
19
17
 
@@ -25,7 +23,7 @@ class FirecrawlLoader(Loader):
25
23
  )
26
24
 
27
25
  if not result.success:
28
- raise FirecrawlError(url, result.error)
26
+ raise Exception(f"Failed to load URL: {url}, got: {result.error}")
29
27
 
30
28
  return result.markdown
31
29
 
kabigon/httpx.py CHANGED
@@ -3,21 +3,18 @@ import httpx
3
3
  from .loader import Loader
4
4
  from .utils import html_to_markdown
5
5
 
6
- DEFAULT_HEADERS = {
7
- "Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
8
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", # noqa
9
- "Cookie": "over18=1", # Required for some sites like PTT
10
- }
11
-
12
6
 
13
7
  class HttpxLoader(Loader):
8
+ def __init__(self, headers: dict[str, str] | None = None) -> None:
9
+ self.headers = headers
10
+
14
11
  def load(self, url: str) -> str:
15
- response = httpx.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
12
+ response = httpx.get(url, headers=self.headers, follow_redirects=False)
16
13
  response.raise_for_status()
17
14
  return html_to_markdown(response.content)
18
15
 
19
16
  async def async_load(self, url: str) -> str:
20
17
  async with httpx.AsyncClient() as client:
21
- response = await client.get(url, headers=DEFAULT_HEADERS, follow_redirects=True)
18
+ response = await client.get(url, headers=self.headers, follow_redirects=True)
22
19
  response.raise_for_status()
23
20
  return html_to_markdown(response.content)
kabigon/pdf.py CHANGED
@@ -6,7 +6,6 @@ from typing import Any
6
6
  import httpx
7
7
  from pypdf import PdfReader
8
8
 
9
- from .errors import NotPDFError
10
9
  from .loader import Loader
11
10
 
12
11
  DEFAULT_HEADERS = {
@@ -15,6 +14,11 @@ DEFAULT_HEADERS = {
15
14
  }
16
15
 
17
16
 
17
+ class NotPDFError(Exception):
18
+ def __init__(self, url: str) -> None:
19
+ super().__init__(f"URL is not a PDF: {url}")
20
+
21
+
18
22
  class PDFLoader(Loader):
19
23
  def load(self, url_or_file: str) -> str:
20
24
  if not url_or_file.startswith("http"):
kabigon/ptt.py ADDED
@@ -0,0 +1,30 @@
1
+ from urllib.parse import urlparse
2
+
3
+ from .httpx import HttpxLoader
4
+ from .loader import Loader
5
+
6
+
7
+ def check_ptt_url(url: str) -> None:
8
+ if urlparse(url).netloc != "www.ptt.cc":
9
+ raise ValueError(f"URL must be from ptt.cc, got {url}")
10
+
11
+
12
+ class PttLoader(Loader):
13
+ def __init__(self) -> None:
14
+ self.httpx_loader = HttpxLoader(
15
+ headers={
16
+ "Accept-Language": "zh-TW,zh;q=0.9,ja;q=0.8,en-US;q=0.7,en;q=0.6",
17
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36", # noqa
18
+ "Cookie": "over18=1",
19
+ }
20
+ )
21
+
22
+ def load(self, url: str) -> str:
23
+ check_ptt_url(url)
24
+
25
+ return self.httpx_loader.load(url)
26
+
27
+ async def async_load(self, url: str):
28
+ check_ptt_url(url)
29
+
30
+ return await self.httpx_loader.async_load(url)
kabigon/reel.py CHANGED
@@ -1,11 +1,11 @@
1
- from .errors import NotReelURLError
2
1
  from .httpx import HttpxLoader
3
2
  from .loader import Loader
4
3
  from .ytdlp import YtdlpLoader
5
4
 
6
5
 
7
- def is_reel_url(url: str) -> bool:
8
- return url.startswith("https://www.instagram.com/reel")
6
+ def check_reel_url(url: str) -> None:
7
+ if not url.startswith("https://www.instagram.com/reel"):
8
+ raise ValueError(f"URL is not an Instagram Reel: {url}")
9
9
 
10
10
 
11
11
  class ReelLoader(Loader):
@@ -14,8 +14,7 @@ class ReelLoader(Loader):
14
14
  self.ytdlp_loader = YtdlpLoader()
15
15
 
16
16
  def load(self, url: str) -> str:
17
- if not is_reel_url(url):
18
- raise NotReelURLError(url)
17
+ check_reel_url(url)
19
18
 
20
19
  audio_content = self.ytdlp_loader.load(url)
21
20
  html_content = self.httpx_loader.load(url)
@@ -23,8 +22,7 @@ class ReelLoader(Loader):
23
22
  return f"{audio_content}\n\n{html_content}"
24
23
 
25
24
  async def async_load(self, url: str):
26
- if not is_reel_url(url):
27
- raise NotReelURLError(url)
25
+ check_reel_url(url)
28
26
 
29
27
  audio_content = await self.ytdlp_loader.async_load(url)
30
28
  html_content = await self.httpx_loader.async_load(url)
kabigon/twitter.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from urllib.parse import urlparse
2
2
  from urllib.parse import urlunparse
3
3
 
4
- from .errors import NotTwitterURLError
5
4
  from .loader import Loader
6
5
  from .playwright import PlaywrightLoader
7
6
 
@@ -21,8 +20,9 @@ def replace_domain(url: str, new_domain: str = "x.com") -> str:
21
20
  return urlunparse(urlparse(url)._replace(netloc=new_domain))
22
21
 
23
22
 
24
- def is_x_url(url: str) -> bool:
25
- return urlparse(url).netloc in TWITTER_DOMAINS
23
+ def check_x_url(url: str) -> None:
24
+ if urlparse(url).netloc not in TWITTER_DOMAINS:
25
+ raise ValueError(f"URL is not a Twitter URL: {url}")
26
26
 
27
27
 
28
28
  class TwitterLoader(Loader):
@@ -30,16 +30,14 @@ class TwitterLoader(Loader):
30
30
  self.playwright_loader = PlaywrightLoader(wait_until="networkidle")
31
31
 
32
32
  def load(self, url: str) -> str:
33
- if not is_x_url(url):
34
- raise NotTwitterURLError(url)
33
+ check_x_url(url)
35
34
 
36
35
  url = replace_domain(url)
37
36
 
38
37
  return self.playwright_loader.load(url)
39
38
 
40
39
  async def async_load(self, url: str):
41
- if not is_x_url(url):
42
- raise NotTwitterURLError(url)
40
+ check_x_url(url)
43
41
 
44
42
  url = replace_domain(url)
45
43
 
@@ -1,11 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.7.0
3
+ Version: 0.8.1
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
7
7
  Requires-Dist: aioytt>=0.2.4
8
- Requires-Dist: click>=8.1.8
9
8
  Requires-Dist: firecrawl-py>=2.4.1
10
9
  Requires-Dist: httpx>=0.28.1
11
10
  Requires-Dist: loguru>=0.7.3
@@ -15,6 +14,7 @@ Requires-Dist: playwright>=1.52.0
15
14
  Requires-Dist: pypdf>=5.3.0
16
15
  Requires-Dist: rich>=13.9.4
17
16
  Requires-Dist: timeout-decorator>=0.5.0
17
+ Requires-Dist: typer>=0.15.3
18
18
  Requires-Dist: youtube-transcript-api>=0.6.3
19
19
  Requires-Dist: yt-dlp>=2025.4.30
20
20
  Description-Content-Type: text/markdown
@@ -0,0 +1,20 @@
1
+ kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
2
+ kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
3
+ kabigon/compose.py,sha256=Kb6_-SNeh08QELMF-r3mWxasDTxJBuJJQamFPLcwQ1I,1463
4
+ kabigon/firecrawl.py,sha256=-5AI9tla_684dtpubY_BRudqLgw28158WdwA1RjJvAA,778
5
+ kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
6
+ kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
7
+ kabigon/pdf.py,sha256=PNOx-Dz_VpN-EVnVar_wJJZdxWrDZrAFE-gSuUR9q7o,1870
8
+ kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
9
+ kabigon/ptt.py,sha256=Gyp2nJrjptkjbwZJ9VEQHX0DEgKBe5QRQOmGVHUUgNA,896
10
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
+ kabigon/reel.py,sha256=qOwWCvcp7xNKg0JDunq_Bsl8yqqMzrnAOI9k5mSqrOU,874
12
+ kabigon/twitter.py,sha256=aRqAiFxIwln6lteWdoF6SmvbzO62yBTQRzcB7UcVJwk,1046
13
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
+ kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
+ kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
16
+ kabigon-0.8.1.dist-info/METADATA,sha256=8nZX3ukADj-qndGHMMuZluG_HdI8-wqNu8QJBoplPRc,1079
17
+ kabigon-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ kabigon-0.8.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
+ kabigon-0.8.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
+ kabigon-0.8.1.dist-info/RECORD,,
kabigon/errors.py DELETED
@@ -1,28 +0,0 @@
1
- class KabigonError(Exception):
2
- pass
3
-
4
-
5
- class FirecrawlKeyError(KabigonError):
6
- def __init__(self) -> None:
7
- super().__init__("FIRECRAWL_API_KEY is not set.")
8
-
9
-
10
- class FirecrawlError(KabigonError):
11
- def __init__(self, url: str, error: str) -> None:
12
- msg = f"Failed to load URL: {url}, got: {error}"
13
- super().__init__(msg)
14
-
15
-
16
- class NotPDFError(KabigonError):
17
- def __init__(self, url: str) -> None:
18
- super().__init__(f"URL is not a PDF: {url}")
19
-
20
-
21
- class NotReelURLError(KabigonError):
22
- def __init__(self, url: str):
23
- super().__init__(f"URL is not an Instagram Reel: {url}")
24
-
25
-
26
- class NotTwitterURLError(KabigonError):
27
- def __init__(self, url: str):
28
- super().__init__(f"URL is not a Twitter URL: {url}")
@@ -1,20 +0,0 @@
1
- kabigon/__init__.py,sha256=L-MDyX7PrPZvq6mASyOCJnATebwn-Lj1Nobp9GAPeyU,533
2
- kabigon/cli.py,sha256=co9IczXx0bAnXo6EEzEdCMSfpBetkPe6pB5hd2jtzA8,675
3
- kabigon/compose.py,sha256=l2D5OK91VcN2a6DbjMdwBk3YSqzVV7fOVX0TqNm2gJo,1502
4
- kabigon/errors.py,sha256=iri_YS71UsOHwaVtfy5IA6iUfq30DCsptZsChmZaZic,755
5
- kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
6
- kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
7
- kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
8
- kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
9
- kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
10
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
- kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
12
- kabigon/twitter.py,sha256=U07pa8xA0nHAaaDPeUelQRvXR5ZnUvYJZW35xRAvHA8,1114
13
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
- kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
- kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
16
- kabigon-0.7.0.dist-info/METADATA,sha256=4rng3Fe89ifNd6bQoBggRhCoqTMIa4sWhbU6N75puzo,1078
17
- kabigon-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- kabigon-0.7.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
- kabigon-0.7.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
- kabigon-0.7.0.dist-info/RECORD,,