kabigon 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/__init__.py CHANGED
@@ -11,7 +11,7 @@ from .loader import Loader
11
11
  from .pdf import PDFLoader
12
12
  from .playwright import PlaywrightLoader
13
13
  from .reel import ReelLoader
14
- from .singlefile import SinglefileLoader
14
+ from .twitter import TwitterLoader
15
15
  from .youtube import YoutubeLoader
16
16
  from .ytdlp import YtdlpLoader
17
17
 
kabigon/cli.py CHANGED
@@ -6,6 +6,7 @@ from .httpx import HttpxLoader
6
6
  from .pdf import PDFLoader
7
7
  from .playwright import PlaywrightLoader
8
8
  from .reel import ReelLoader
9
+ from .twitter import TwitterLoader
9
10
  from .youtube import YoutubeLoader
10
11
  from .ytdlp import YtdlpLoader
11
12
 
@@ -15,6 +16,7 @@ from .ytdlp import YtdlpLoader
15
16
  def main(url: str) -> None:
16
17
  loader = Compose(
17
18
  [
19
+ TwitterLoader(),
18
20
  YoutubeLoader(),
19
21
  ReelLoader(),
20
22
  YtdlpLoader(),
kabigon/compose.py CHANGED
@@ -1,40 +1,14 @@
1
- from urllib.parse import urlparse
2
- from urllib.parse import urlunparse
3
-
4
1
  from loguru import logger
5
2
 
6
- from .errors import LoaderError
3
+ from .errors import KabigonError
7
4
  from .loader import Loader
8
5
 
9
- REPLACEMENTS = {
10
- "api.fxtwitter.com": [
11
- "twitter.com",
12
- "x.com",
13
- "fxtwitter.com",
14
- "vxtwitter.com",
15
- "fixvx.com",
16
- "twittpr.com",
17
- "fixupx.com",
18
- ]
19
- }
20
-
21
-
22
- def replace_domain(url: str) -> str:
23
- parsed = urlparse(url)
24
- for target, source in REPLACEMENTS.items():
25
- if parsed.netloc in source:
26
- fixed_url = parsed._replace(netloc=target)
27
- return urlunparse(fixed_url)
28
- return url
29
-
30
6
 
31
7
  class Compose(Loader):
32
8
  def __init__(self, loaders: list[Loader]) -> None:
33
9
  self.loaders = loaders
34
10
 
35
11
  def load(self, url: str) -> str:
36
- url = replace_domain(url)
37
-
38
12
  for loader in self.loaders:
39
13
  try:
40
14
  content = loader.load(url)
@@ -49,11 +23,9 @@ class Compose(Loader):
49
23
  except Exception as e:
50
24
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
51
25
 
52
- raise LoaderError(f"Failed to load URL: {url}")
26
+ raise KabigonError(f"Failed to load URL: {url}")
53
27
 
54
28
  async def async_load(self, url: str) -> str:
55
- url = replace_domain(url)
56
-
57
29
  for loader in self.loaders:
58
30
  try:
59
31
  content = await loader.async_load(url)
@@ -68,4 +40,4 @@ class Compose(Loader):
68
40
  except Exception as e:
69
41
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
70
42
 
71
- raise LoaderError(f"Failed to load URL: {url}")
43
+ raise KabigonError(f"Failed to load URL: {url}")
kabigon/errors.py CHANGED
@@ -1,23 +1,28 @@
1
- class LoaderError(Exception):
1
+ class KabigonError(Exception):
2
2
  pass
3
3
 
4
4
 
5
- class FirecrawlKeyError(LoaderError):
5
+ class FirecrawlKeyError(KabigonError):
6
6
  def __init__(self) -> None:
7
7
  super().__init__("FIRECRAWL_API_KEY is not set.")
8
8
 
9
9
 
10
- class FirecrawlError(LoaderError):
10
+ class FirecrawlError(KabigonError):
11
11
  def __init__(self, url: str, error: str) -> None:
12
12
  msg = f"Failed to load URL: {url}, got: {error}"
13
13
  super().__init__(msg)
14
14
 
15
15
 
16
- class NotPDFError(LoaderError):
16
+ class NotPDFError(KabigonError):
17
17
  def __init__(self, url: str) -> None:
18
18
  super().__init__(f"URL is not a PDF: {url}")
19
19
 
20
20
 
21
- class NotReelURLError(LoaderError):
21
+ class NotReelURLError(KabigonError):
22
22
  def __init__(self, url: str):
23
23
  super().__init__(f"URL is not an Instagram Reel: {url}")
24
+
25
+
26
+ class NotTwitterURLError(KabigonError):
27
+ def __init__(self, url: str):
28
+ super().__init__(f"URL is not a Twitter URL: {url}")
kabigon/twitter.py ADDED
@@ -0,0 +1,46 @@
1
+ from urllib.parse import urlparse
2
+ from urllib.parse import urlunparse
3
+
4
+ from .errors import NotTwitterURLError
5
+ from .loader import Loader
6
+ from .playwright import PlaywrightLoader
7
+
8
+ TWITTER_DOMAINS = [
9
+ "twitter.com",
10
+ "x.com",
11
+ "fxtwitter.com",
12
+ "vxtwitter.com",
13
+ "fixvx.com",
14
+ "twittpr.com",
15
+ "api.fxtwitter.com",
16
+ "fixupx.com",
17
+ ]
18
+
19
+
20
+ def replace_domain(url: str, new_domain: str = "x.com") -> str:
21
+ return urlunparse(urlparse(url)._replace(netloc=new_domain))
22
+
23
+
24
+ def is_x_url(url: str) -> bool:
25
+ return urlparse(url).netloc in TWITTER_DOMAINS
26
+
27
+
28
+ class TwitterLoader(Loader):
29
+ def __init__(self) -> None:
30
+ self.playwright_loader = PlaywrightLoader(wait_until="networkidle")
31
+
32
+ def load(self, url: str) -> str:
33
+ if not is_x_url(url):
34
+ raise NotTwitterURLError(url)
35
+
36
+ url = replace_domain(url)
37
+
38
+ return self.playwright_loader.load(url)
39
+
40
+ async def async_load(self, url: str):
41
+ if not is_x_url(url):
42
+ raise NotTwitterURLError(url)
43
+
44
+ url = replace_domain(url)
45
+
46
+ return await self.playwright_loader.async_load(url)
kabigon/ytdlp.py CHANGED
@@ -14,7 +14,7 @@ from loguru import logger
14
14
  from .loader import Loader
15
15
 
16
16
  try:
17
- import mlx_whisper # noqa: F401
17
+ import mlx_whisper # noqa: F401 # type: ignore
18
18
 
19
19
  _mlx_whisper_installed = True
20
20
  except ImportError:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -11,12 +11,12 @@ Requires-Dist: httpx>=0.28.1
11
11
  Requires-Dist: loguru>=0.7.3
12
12
  Requires-Dist: markdownify>=0.14.1
13
13
  Requires-Dist: openai-whisper>=20240930
14
- Requires-Dist: playwright>=1.50.0
14
+ Requires-Dist: playwright>=1.52.0
15
15
  Requires-Dist: pypdf>=5.3.0
16
16
  Requires-Dist: rich>=13.9.4
17
17
  Requires-Dist: timeout-decorator>=0.5.0
18
18
  Requires-Dist: youtube-transcript-api>=0.6.3
19
- Requires-Dist: yt-dlp>=2025.1.26
19
+ Requires-Dist: yt-dlp>=2025.4.30
20
20
  Description-Content-Type: text/markdown
21
21
 
22
22
  # kabigon
@@ -0,0 +1,20 @@
1
+ kabigon/__init__.py,sha256=L-MDyX7PrPZvq6mASyOCJnATebwn-Lj1Nobp9GAPeyU,533
2
+ kabigon/cli.py,sha256=co9IczXx0bAnXo6EEzEdCMSfpBetkPe6pB5hd2jtzA8,675
3
+ kabigon/compose.py,sha256=l2D5OK91VcN2a6DbjMdwBk3YSqzVV7fOVX0TqNm2gJo,1502
4
+ kabigon/errors.py,sha256=iri_YS71UsOHwaVtfy5IA6iUfq30DCsptZsChmZaZic,755
5
+ kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
6
+ kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
7
+ kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
8
+ kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
9
+ kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
10
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
+ kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
12
+ kabigon/twitter.py,sha256=U07pa8xA0nHAaaDPeUelQRvXR5ZnUvYJZW35xRAvHA8,1114
13
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
+ kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
+ kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
16
+ kabigon-0.7.0.dist-info/METADATA,sha256=4rng3Fe89ifNd6bQoBggRhCoqTMIa4sWhbU6N75puzo,1078
17
+ kabigon-0.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ kabigon-0.7.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
+ kabigon-0.7.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
+ kabigon-0.7.0.dist-info/RECORD,,
kabigon/singlefile.py DELETED
@@ -1,65 +0,0 @@
1
- import os
2
- import subprocess
3
- import tempfile
4
- from functools import cache
5
- from pathlib import Path
6
- from typing import Final
7
-
8
- import charset_normalizer
9
- from loguru import logger
10
-
11
- from .loader import Loader
12
- from .utils import html_to_markdown
13
-
14
- DEFAULT_SINGLEFILE_PATH: Final[str] = "single-file"
15
-
16
-
17
- @cache
18
- def get_singlefile_path() -> str:
19
- path = os.getenv("SINGLEFILE_PATH")
20
- if not path:
21
- path = DEFAULT_SINGLEFILE_PATH
22
- logger.warning("SINGLEFILE_PATH not set, using default: {}", DEFAULT_SINGLEFILE_PATH)
23
- return path
24
-
25
-
26
- class SinglefileLoader(Loader):
27
- def __init__(self, cookies_file: str | None = None, browser_headless: bool = False) -> None:
28
- self.cookies_file = cookies_file
29
- self.browser_headless = browser_headless
30
-
31
- def load(self, url: str) -> str:
32
- filename = self.download(url)
33
- content = str(charset_normalizer.from_path(filename).best())
34
- return html_to_markdown(content)
35
-
36
- def download(self, url: str) -> str:
37
- logger.info("Downloading HTML using SingleFile: {}", url)
38
-
39
- filename = tempfile.mktemp(suffix=".html")
40
- singlefile_path = get_singlefile_path()
41
-
42
- cmds = [singlefile_path]
43
-
44
- if self.cookies_file is not None:
45
- cookies_path = Path(self.cookies_file)
46
- if not cookies_path.exists():
47
- raise FileNotFoundError(f"Cookies file not found: {self.cookies_file}")
48
-
49
- cmds += [
50
- "--browser-cookies-file",
51
- str(cookies_path),
52
- ]
53
-
54
- cmds += [
55
- "--filename-conflict-action",
56
- "overwrite",
57
- "--browser-headless",
58
- str(self.browser_headless).lower(),
59
- url,
60
- filename,
61
- ]
62
-
63
- subprocess.run(cmds)
64
-
65
- return filename
@@ -1,20 +0,0 @@
1
- kabigon/__init__.py,sha256=L64FtFCQQxkqXXHDkW2rniWGAnufmC9WG6KBwutXU4w,539
2
- kabigon/cli.py,sha256=XSTyD1RFqq2Qok_52kSjJlBLUXl6t-K9QtsxCfB15o4,611
3
- kabigon/compose.py,sha256=Yhn9fe0mylVLpK01sT9OWWItIY1gD8TZLQ4qY8hAeSQ,2110
4
- kabigon/errors.py,sha256=tYokuDQ1-U25zq3hWu-GsYJBDpDq3VuObUijnd7Nc7M,613
5
- kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
6
- kabigon/httpx.py,sha256=uDdLks6zVzirY7-mnsJkypX86kAI5XmUVfK-lFifdJA,895
7
- kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
8
- kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
9
- kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
10
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
- kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
12
- kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
13
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
- kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
- kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
16
- kabigon-0.6.0.dist-info/METADATA,sha256=zsjAoTye8jH1NtI_ZylcCI8Vt-S0CqQWcN-ca39cbLY,1078
17
- kabigon-0.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- kabigon-0.6.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
- kabigon-0.6.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
- kabigon-0.6.0.dist-info/RECORD,,