kabigon 0.8.0__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/compose.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from loguru import logger
2
2
 
3
- from .errors import KabigonError
4
3
  from .loader import Loader
5
4
 
6
5
 
@@ -23,7 +22,7 @@ class Compose(Loader):
23
22
  except Exception as e:
24
23
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
25
24
 
26
- raise KabigonError(f"Failed to load URL: {url}")
25
+ raise Exception(f"Failed to load URL: {url}")
27
26
 
28
27
  async def async_load(self, url: str) -> str:
29
28
  for loader in self.loaders:
@@ -40,4 +39,4 @@ class Compose(Loader):
40
39
  except Exception as e:
41
40
  logger.info("[{}] Failed to load URL: {}, got error: {}", loader.__class__.__name__, url, e)
42
41
 
43
- raise KabigonError(f"Failed to load URL: {url}")
42
+ raise Exception(f"Failed to load URL: {url}")
kabigon/firecrawl.py CHANGED
@@ -2,8 +2,6 @@ import os
2
2
 
3
3
  from firecrawl import FirecrawlApp
4
4
 
5
- from .errors import FirecrawlError
6
- from .errors import FirecrawlKeyError
7
5
  from .loader import Loader
8
6
 
9
7
 
@@ -13,7 +11,7 @@ class FirecrawlLoader(Loader):
13
11
 
14
12
  api_key = os.getenv("FIRECRAWL_API_KEY")
15
13
  if not api_key:
16
- raise FirecrawlKeyError()
14
+ raise ValueError("FIRECRAWL_API_KEY is not set.")
17
15
 
18
16
  self.app = FirecrawlApp(api_key=api_key)
19
17
 
@@ -25,7 +23,7 @@ class FirecrawlLoader(Loader):
25
23
  )
26
24
 
27
25
  if not result.success:
28
- raise FirecrawlError(url, result.error)
26
+ raise Exception(f"Failed to load URL: {url}, got: {result.error}")
29
27
 
30
28
  return result.markdown
31
29
 
kabigon/pdf.py CHANGED
@@ -6,7 +6,6 @@ from typing import Any
6
6
  import httpx
7
7
  from pypdf import PdfReader
8
8
 
9
- from .errors import NotPDFError
10
9
  from .loader import Loader
11
10
 
12
11
  DEFAULT_HEADERS = {
@@ -15,6 +14,11 @@ DEFAULT_HEADERS = {
15
14
  }
16
15
 
17
16
 
17
+ class NotPDFError(Exception):
18
+ def __init__(self, url: str) -> None:
19
+ super().__init__(f"URL is not a PDF: {url}")
20
+
21
+
18
22
  class PDFLoader(Loader):
19
23
  def load(self, url_or_file: str) -> str:
20
24
  if not url_or_file.startswith("http"):
kabigon/ptt.py CHANGED
@@ -1,12 +1,12 @@
1
1
  from urllib.parse import urlparse
2
2
 
3
- from .errors import NotTwitterURLError
4
3
  from .httpx import HttpxLoader
5
4
  from .loader import Loader
6
5
 
7
6
 
8
- def is_ptt_url(url: str) -> bool:
9
- return urlparse(url).netloc == "www.ptt.cc"
7
+ def check_ptt_url(url: str) -> None:
8
+ if urlparse(url).netloc != "www.ptt.cc":
9
+ raise ValueError(f"URL must be from ptt.cc, got {url}")
10
10
 
11
11
 
12
12
  class PttLoader(Loader):
@@ -20,13 +20,11 @@ class PttLoader(Loader):
20
20
  )
21
21
 
22
22
  def load(self, url: str) -> str:
23
- if not is_ptt_url(url):
24
- raise NotTwitterURLError(url)
23
+ check_ptt_url(url)
25
24
 
26
25
  return self.httpx_loader.load(url)
27
26
 
28
27
  async def async_load(self, url: str):
29
- if not is_ptt_url(url):
30
- raise NotTwitterURLError(url)
28
+ check_ptt_url(url)
31
29
 
32
30
  return await self.httpx_loader.async_load(url)
kabigon/reel.py CHANGED
@@ -1,11 +1,11 @@
1
- from .errors import NotReelURLError
2
1
  from .httpx import HttpxLoader
3
2
  from .loader import Loader
4
3
  from .ytdlp import YtdlpLoader
5
4
 
6
5
 
7
- def is_reel_url(url: str) -> bool:
8
- return url.startswith("https://www.instagram.com/reel")
6
+ def check_reel_url(url: str) -> None:
7
+ if not url.startswith("https://www.instagram.com/reel"):
8
+ raise ValueError(f"URL is not an Instagram Reel: {url}")
9
9
 
10
10
 
11
11
  class ReelLoader(Loader):
@@ -14,8 +14,7 @@ class ReelLoader(Loader):
14
14
  self.ytdlp_loader = YtdlpLoader()
15
15
 
16
16
  def load(self, url: str) -> str:
17
- if not is_reel_url(url):
18
- raise NotReelURLError(url)
17
+ check_reel_url(url)
19
18
 
20
19
  audio_content = self.ytdlp_loader.load(url)
21
20
  html_content = self.httpx_loader.load(url)
@@ -23,8 +22,7 @@ class ReelLoader(Loader):
23
22
  return f"{audio_content}\n\n{html_content}"
24
23
 
25
24
  async def async_load(self, url: str):
26
- if not is_reel_url(url):
27
- raise NotReelURLError(url)
25
+ check_reel_url(url)
28
26
 
29
27
  audio_content = await self.ytdlp_loader.async_load(url)
30
28
  html_content = await self.httpx_loader.async_load(url)
kabigon/twitter.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from urllib.parse import urlparse
2
2
  from urllib.parse import urlunparse
3
3
 
4
- from .errors import NotTwitterURLError
5
4
  from .loader import Loader
6
5
  from .playwright import PlaywrightLoader
7
6
 
@@ -21,8 +20,9 @@ def replace_domain(url: str, new_domain: str = "x.com") -> str:
21
20
  return urlunparse(urlparse(url)._replace(netloc=new_domain))
22
21
 
23
22
 
24
- def is_x_url(url: str) -> bool:
25
- return urlparse(url).netloc in TWITTER_DOMAINS
23
+ def check_x_url(url: str) -> None:
24
+ if urlparse(url).netloc not in TWITTER_DOMAINS:
25
+ raise ValueError(f"URL is not a Twitter URL: {url}")
26
26
 
27
27
 
28
28
  class TwitterLoader(Loader):
@@ -30,16 +30,14 @@ class TwitterLoader(Loader):
30
30
  self.playwright_loader = PlaywrightLoader(wait_until="networkidle")
31
31
 
32
32
  def load(self, url: str) -> str:
33
- if not is_x_url(url):
34
- raise NotTwitterURLError(url)
33
+ check_x_url(url)
35
34
 
36
35
  url = replace_domain(url)
37
36
 
38
37
  return self.playwright_loader.load(url)
39
38
 
40
39
  async def async_load(self, url: str):
41
- if not is_x_url(url):
42
- raise NotTwitterURLError(url)
40
+ check_x_url(url)
43
41
 
44
42
  url = replace_domain(url)
45
43
 
kabigon/youtube.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import aioytt
2
2
  import aioytt.video_id
3
- import timeout_decorator
4
3
  from youtube_transcript_api import YouTubeTranscriptApi
5
4
 
6
5
  from .loader import Loader
@@ -12,7 +11,6 @@ class YoutubeLoader(Loader):
12
11
  def __init__(self, languages: list[str] | None = None) -> None:
13
12
  self.languages = languages or DEFAULT_LANGUAGES
14
13
 
15
- @timeout_decorator.timeout(20)
16
14
  def load(self, url: str) -> str:
17
15
  video_id = aioytt.video_id.parse_video_id(url)
18
16
 
kabigon/ytdlp.py CHANGED
@@ -1,25 +1,21 @@
1
- import functools
1
+ # import functools
2
2
  import hashlib
3
3
  import os
4
- import subprocess
4
+
5
+ # import subprocess
5
6
  import tempfile
6
7
  from typing import Final
7
8
 
8
- import numpy as np
9
- import timeout_decorator
10
- import whisper
9
+ # import numpy as np
11
10
  import yt_dlp
12
11
  from loguru import logger
13
12
 
14
13
  from .loader import Loader
15
14
 
16
15
  try:
17
- import mlx_whisper # noqa: F401 # type: ignore
18
-
19
- _mlx_whisper_installed = True
16
+ import whisper
20
17
  except ImportError:
21
- _mlx_whisper_installed = False
22
-
18
+ logger.info("OpenAI Whisper not installed. Please install it with `pip install openai-whisper`.")
23
19
 
24
20
  DEFAULT_FFMPEG_PATH: Final[str] = "ffmpeg"
25
21
 
@@ -65,68 +61,58 @@ def download_audio(url: str) -> str:
65
61
  return filename + ".mp3"
66
62
 
67
63
 
68
- def load_audio(file: str, sr: int = 16000):
69
- """
70
- Open an audio file and read as mono waveform, resampling as necessary
71
-
72
- Parameters
73
- ----------
74
- file: str
75
- The audio file to open
76
-
77
- sr: int
78
- The sample rate to resample the audio if necessary
79
-
80
- Returns
81
- -------
82
- A NumPy array containing the audio waveform, in float32 dtype.
83
- """
84
- ffmpeg_path = get_ffmpeg_path()
85
-
86
- # This launches a subprocess to decode audio while down-mixing
87
- # and resampling as necessary. Requires the ffmpeg CLI in PATH.
88
- # fmt: off
89
- cmd = [
90
- ffmpeg_path,
91
- "-nostdin",
92
- "-threads", "0",
93
- "-i", file,
94
- "-f", "s16le",
95
- "-ac", "1",
96
- "-acodec", "pcm_s16le",
97
- "-ar", str(sr),
98
- "-"
99
- ]
100
- # fmt: on
101
- try:
102
- out = subprocess.run(cmd, capture_output=True, check=True).stdout
103
- except subprocess.CalledProcessError as e:
104
- raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
105
-
106
- return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
107
-
108
-
109
- @functools.cache
110
- def _load_whisper_model() -> whisper.Whisper:
111
- return whisper.load_model("tiny")
112
-
113
-
114
- def _transcribe(audio: np.ndarray) -> dict:
115
- if _mlx_whisper_installed:
116
- return mlx_whisper.transcribe(audio, path_or_hf_repo="mlx-community/whisper-tiny")
117
-
118
- model = _load_whisper_model()
119
- return model.transcribe(audio)
64
+ # def load_audio(file: str, sr: int = 16000):
65
+ # """
66
+ # Open an audio file and read as mono waveform, resampling as necessary
67
+
68
+ # Parameters
69
+ # ----------
70
+ # file: str
71
+ # The audio file to open
72
+
73
+ # sr: int
74
+ # The sample rate to resample the audio if necessary
75
+
76
+ # Returns
77
+ # -------
78
+ # A NumPy array containing the audio waveform, in float32 dtype.
79
+ # """
80
+ # ffmpeg_path = get_ffmpeg_path()
81
+
82
+ # # This launches a subprocess to decode audio while down-mixing
83
+ # # and resampling as necessary. Requires the ffmpeg CLI in PATH.
84
+ # # fmt: off
85
+ # cmd = [
86
+ # ffmpeg_path,
87
+ # "-nostdin",
88
+ # "-threads", "0",
89
+ # "-i", file,
90
+ # "-f", "s16le",
91
+ # "-ac", "1",
92
+ # "-acodec", "pcm_s16le",
93
+ # "-ar", str(sr),
94
+ # "-"
95
+ # ]
96
+ # # fmt: on
97
+ # try:
98
+ # out = subprocess.run(cmd, capture_output=True, check=True).stdout
99
+ # except subprocess.CalledProcessError as e:
100
+ # raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
101
+
102
+ # return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
120
103
 
121
104
 
122
105
  class YtdlpLoader(Loader):
123
- @timeout_decorator.timeout(300)
106
+ def __init__(self, model: str = "tiny") -> None:
107
+ self.model = whisper.load_model(model)
108
+
124
109
  def load(self, url: str) -> str:
125
110
  audio_file = download_audio(url)
126
- audio = load_audio(audio_file)
111
+ # audio = load_audio(audio_file)
112
+ audio = whisper.load_audio(audio_file)
127
113
 
128
114
  # Clean up the audio file
129
115
  os.remove(audio_file)
130
116
 
131
- result = _transcribe(audio)
117
+ result = self.model.transcribe(audio)
132
118
  return result.get("text", "")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.8.0
3
+ Version: 0.8.2
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -9,14 +9,19 @@ Requires-Dist: firecrawl-py>=2.4.1
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: loguru>=0.7.3
11
11
  Requires-Dist: markdownify>=0.14.1
12
- Requires-Dist: openai-whisper>=20240930
13
- Requires-Dist: playwright>=1.52.0
12
+ Requires-Dist: numpy>=2.2.5
14
13
  Requires-Dist: pypdf>=5.3.0
15
14
  Requires-Dist: rich>=13.9.4
16
- Requires-Dist: timeout-decorator>=0.5.0
17
15
  Requires-Dist: typer>=0.15.3
18
16
  Requires-Dist: youtube-transcript-api>=0.6.3
19
17
  Requires-Dist: yt-dlp>=2025.4.30
18
+ Provides-Extra: all
19
+ Requires-Dist: openai-whisper>=20240930; extra == 'all'
20
+ Requires-Dist: playwright>=1.52.0; extra == 'all'
21
+ Provides-Extra: playwright
22
+ Requires-Dist: playwright>=1.52.0; extra == 'playwright'
23
+ Provides-Extra: whisper
24
+ Requires-Dist: openai-whisper>=20240930; extra == 'whisper'
20
25
  Description-Content-Type: text/markdown
21
26
 
22
27
  # kabigon
@@ -0,0 +1,20 @@
1
+ kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
2
+ kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
3
+ kabigon/compose.py,sha256=Kb6_-SNeh08QELMF-r3mWxasDTxJBuJJQamFPLcwQ1I,1463
4
+ kabigon/firecrawl.py,sha256=-5AI9tla_684dtpubY_BRudqLgw28158WdwA1RjJvAA,778
5
+ kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
6
+ kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
7
+ kabigon/pdf.py,sha256=PNOx-Dz_VpN-EVnVar_wJJZdxWrDZrAFE-gSuUR9q7o,1870
8
+ kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
9
+ kabigon/ptt.py,sha256=Gyp2nJrjptkjbwZJ9VEQHX0DEgKBe5QRQOmGVHUUgNA,896
10
+ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
+ kabigon/reel.py,sha256=qOwWCvcp7xNKg0JDunq_Bsl8yqqMzrnAOI9k5mSqrOU,874
12
+ kabigon/twitter.py,sha256=aRqAiFxIwln6lteWdoF6SmvbzO62yBTQRzcB7UcVJwk,1046
13
+ kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
+ kabigon/youtube.py,sha256=F9GpLa0iUy03wYU94RrrnrXa6ExqbG6CZpqx5bPENWE,1106
15
+ kabigon/ytdlp.py,sha256=oeFoE7nWZWaT0dR9nwt_SZh_FE0gJ6Gulh6QzGCB6xo,2956
16
+ kabigon-0.8.2.dist-info/METADATA,sha256=UEq0yUqP0dgzyqMhxiHgLvP0wT_nf0PlQLy8VkufrEg,1287
17
+ kabigon-0.8.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ kabigon-0.8.2.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
+ kabigon-0.8.2.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
+ kabigon-0.8.2.dist-info/RECORD,,
kabigon/errors.py DELETED
@@ -1,28 +0,0 @@
1
- class KabigonError(Exception):
2
- pass
3
-
4
-
5
- class FirecrawlKeyError(KabigonError):
6
- def __init__(self) -> None:
7
- super().__init__("FIRECRAWL_API_KEY is not set.")
8
-
9
-
10
- class FirecrawlError(KabigonError):
11
- def __init__(self, url: str, error: str) -> None:
12
- msg = f"Failed to load URL: {url}, got: {error}"
13
- super().__init__(msg)
14
-
15
-
16
- class NotPDFError(KabigonError):
17
- def __init__(self, url: str) -> None:
18
- super().__init__(f"URL is not a PDF: {url}")
19
-
20
-
21
- class NotReelURLError(KabigonError):
22
- def __init__(self, url: str):
23
- super().__init__(f"URL is not an Instagram Reel: {url}")
24
-
25
-
26
- class NotTwitterURLError(KabigonError):
27
- def __init__(self, url: str):
28
- super().__init__(f"URL is not a Twitter URL: {url}")
@@ -1,21 +0,0 @@
1
- kabigon/__init__.py,sha256=MUfTFUe5ezA249L2yuU5_2FiewLu86H3VsIpJSne2vQ,560
2
- kabigon/cli.py,sha256=PJ0wnwp_AgHA54YxGr1jNJ_q3ls7fEymgTJaJxCVU7M,650
3
- kabigon/compose.py,sha256=l2D5OK91VcN2a6DbjMdwBk3YSqzVV7fOVX0TqNm2gJo,1502
4
- kabigon/errors.py,sha256=iri_YS71UsOHwaVtfy5IA6iUfq30DCsptZsChmZaZic,755
5
- kabigon/firecrawl.py,sha256=Xnrlhco_R58x5kwGy9ZCKTnVqS4Pp-D3G0u-qnuGEsU,800
6
- kabigon/httpx.py,sha256=Zup9DURyWLqoWzaxBbCYAaV-5LSlHUuAcNyyUsZTVag,696
7
- kabigon/loader.py,sha256=KhOJvlzLDM0o05o4VqMRgiyRVjofkzuGEcihQagj-8M,452
8
- kabigon/pdf.py,sha256=yJcgkdMMF52baFsFxJp9Jn89KsTKBboLTLwD3gs6U5U,1775
9
- kabigon/playwright.py,sha256=MZ-r0Ej2wWAOJkDLwYRvO77wcDvh38KXz2wgDsCTgm0,1358
10
- kabigon/ptt.py,sha256=S2d6SeFGxM4E4kg-n5blN2BX56CWj_eOhapJxzUnxu8,965
11
- kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
12
- kabigon/reel.py,sha256=J2QOxGMYi_HaEscQPIipPEoHGN_iksGxR6pV_XvryME,929
13
- kabigon/twitter.py,sha256=U07pa8xA0nHAaaDPeUelQRvXR5ZnUvYJZW35xRAvHA8,1114
14
- kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
15
- kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
16
- kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
17
- kabigon-0.8.0.dist-info/METADATA,sha256=SAazEEnqklluyiH2psGQwUI40ee400GELlcxv0OyvuU,1079
18
- kabigon-0.8.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
19
- kabigon-0.8.0.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
20
- kabigon-0.8.0.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
21
- kabigon-0.8.0.dist-info/RECORD,,