kabigon 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kabigon/youtube.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import aioytt
2
2
  import aioytt.video_id
3
- import timeout_decorator
4
3
  from youtube_transcript_api import YouTubeTranscriptApi
5
4
 
6
5
  from .loader import Loader
@@ -12,7 +11,6 @@ class YoutubeLoader(Loader):
12
11
  def __init__(self, languages: list[str] | None = None) -> None:
13
12
  self.languages = languages or DEFAULT_LANGUAGES
14
13
 
15
- @timeout_decorator.timeout(20)
16
14
  def load(self, url: str) -> str:
17
15
  video_id = aioytt.video_id.parse_video_id(url)
18
16
 
kabigon/ytdlp.py CHANGED
@@ -1,49 +1,15 @@
1
- import functools
2
1
  import hashlib
3
2
  import os
4
- import subprocess
5
3
  import tempfile
6
- from typing import Final
7
4
 
8
- import numpy as np
9
- import timeout_decorator
10
- import whisper
11
5
  import yt_dlp
12
6
  from loguru import logger
13
7
 
14
8
  from .loader import Loader
15
9
 
16
- try:
17
- import mlx_whisper # noqa: F401 # type: ignore
18
-
19
- _mlx_whisper_installed = True
20
- except ImportError:
21
- _mlx_whisper_installed = False
22
-
23
-
24
- DEFAULT_FFMPEG_PATH: Final[str] = "ffmpeg"
25
-
26
-
27
- def hash_url(url: str) -> str:
28
- return hashlib.sha512(url.encode("utf-8")).hexdigest()
29
-
30
-
31
- def get_ffmpeg_path() -> str:
32
- path = os.getenv("FFMPEG_PATH")
33
- if not path:
34
- path = DEFAULT_FFMPEG_PATH
35
- logger.warning("FFMPEG_PATH not set, using default: {}", DEFAULT_FFMPEG_PATH)
36
-
37
- return path
38
-
39
10
 
40
11
  def download_audio(url: str) -> str:
41
- ffmpeg_path = get_ffmpeg_path()
42
-
43
- filename = os.path.join(
44
- tempfile.gettempdir(),
45
- hash_url(url),
46
- )
12
+ filename = os.path.join(tempfile.gettempdir(), hashlib.sha512(url.encode("utf-8")).hexdigest())
47
13
 
48
14
  ydl_opts = {
49
15
  "format": "bestaudio/best",
@@ -55,7 +21,7 @@ def download_audio(url: str) -> str:
55
21
  }
56
22
  ],
57
23
  "outtmpl": filename,
58
- "ffmpeg_location": ffmpeg_path,
24
+ "ffmpeg_location": os.getenv("FFMPEG_PATH", "ffmpeg"),
59
25
  "match_filter": yt_dlp.match_filter_func(["!is_live"]),
60
26
  }
61
27
 
@@ -65,68 +31,25 @@ def download_audio(url: str) -> str:
65
31
  return filename + ".mp3"
66
32
 
67
33
 
68
- def load_audio(file: str, sr: int = 16000):
69
- """
70
- Open an audio file and read as mono waveform, resampling as necessary
71
-
72
- Parameters
73
- ----------
74
- file: str
75
- The audio file to open
76
-
77
- sr: int
78
- The sample rate to resample the audio if necessary
79
-
80
- Returns
81
- -------
82
- A NumPy array containing the audio waveform, in float32 dtype.
83
- """
84
- ffmpeg_path = get_ffmpeg_path()
85
-
86
- # This launches a subprocess to decode audio while down-mixing
87
- # and resampling as necessary. Requires the ffmpeg CLI in PATH.
88
- # fmt: off
89
- cmd = [
90
- ffmpeg_path,
91
- "-nostdin",
92
- "-threads", "0",
93
- "-i", file,
94
- "-f", "s16le",
95
- "-ac", "1",
96
- "-acodec", "pcm_s16le",
97
- "-ar", str(sr),
98
- "-"
99
- ]
100
- # fmt: on
101
- try:
102
- out = subprocess.run(cmd, capture_output=True, check=True).stdout
103
- except subprocess.CalledProcessError as e:
104
- raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
105
-
106
- return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
107
-
108
-
109
- @functools.cache
110
- def _load_whisper_model() -> whisper.Whisper:
111
- return whisper.load_model("tiny")
112
-
113
-
114
- def _transcribe(audio: np.ndarray) -> dict:
115
- if _mlx_whisper_installed:
116
- return mlx_whisper.transcribe(audio, path_or_hf_repo="mlx-community/whisper-tiny")
117
-
118
- model = _load_whisper_model()
119
- return model.transcribe(audio)
34
+ class YtdlpLoader(Loader):
35
+ def __init__(self, model: str = "tiny") -> None:
36
+ try:
37
+ import whisper
38
+ except ImportError as e:
39
+ raise ImportError(
40
+ "OpenAI Whisper not installed. Please install it with `pip install openai-whisper`."
41
+ ) from e
120
42
 
43
+ self.model = whisper.load_model(model)
44
+ self.load_audio = whisper.load_audio
121
45
 
122
- class YtdlpLoader(Loader):
123
- @timeout_decorator.timeout(300)
124
46
  def load(self, url: str) -> str:
125
47
  audio_file = download_audio(url)
126
- audio = load_audio(audio_file)
48
+ audio = self.load_audio(audio_file)
127
49
 
128
50
  # Clean up the audio file
129
51
  os.remove(audio_file)
130
52
 
131
- result = _transcribe(audio)
53
+ logger.info("Transcribing audio file: {}", audio_file)
54
+ result = self.model.transcribe(audio)
132
55
  return result.get("text", "")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: kabigon
3
- Version: 0.8.1
3
+ Version: 0.8.3
4
4
  Author-email: narumi <toucans-cutouts0f@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.10
@@ -9,14 +9,19 @@ Requires-Dist: firecrawl-py>=2.4.1
9
9
  Requires-Dist: httpx>=0.28.1
10
10
  Requires-Dist: loguru>=0.7.3
11
11
  Requires-Dist: markdownify>=0.14.1
12
- Requires-Dist: openai-whisper>=20240930
13
- Requires-Dist: playwright>=1.52.0
12
+ Requires-Dist: numpy>=2.2.5
14
13
  Requires-Dist: pypdf>=5.3.0
15
14
  Requires-Dist: rich>=13.9.4
16
- Requires-Dist: timeout-decorator>=0.5.0
17
15
  Requires-Dist: typer>=0.15.3
18
16
  Requires-Dist: youtube-transcript-api>=0.6.3
19
17
  Requires-Dist: yt-dlp>=2025.4.30
18
+ Provides-Extra: all
19
+ Requires-Dist: openai-whisper>=20240930; extra == 'all'
20
+ Requires-Dist: playwright>=1.52.0; extra == 'all'
21
+ Provides-Extra: playwright
22
+ Requires-Dist: playwright>=1.52.0; extra == 'playwright'
23
+ Provides-Extra: whisper
24
+ Requires-Dist: openai-whisper>=20240930; extra == 'whisper'
20
25
  Description-Content-Type: text/markdown
21
26
 
22
27
  # kabigon
@@ -24,7 +29,7 @@ Description-Content-Type: text/markdown
24
29
  ## Installation
25
30
 
26
31
  ```shell
27
- pip install kabigon
32
+ pip install kabigon[all]
28
33
  playwright install chromium
29
34
  ```
30
35
 
@@ -11,10 +11,10 @@ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
11
11
  kabigon/reel.py,sha256=qOwWCvcp7xNKg0JDunq_Bsl8yqqMzrnAOI9k5mSqrOU,874
12
12
  kabigon/twitter.py,sha256=aRqAiFxIwln6lteWdoF6SmvbzO62yBTQRzcB7UcVJwk,1046
13
13
  kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
14
- kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
15
- kabigon/ytdlp.py,sha256=_QRcyFx9s7NnI1MvcWdKKxlX-hHLnqtduCSL5_UH6dU,3140
16
- kabigon-0.8.1.dist-info/METADATA,sha256=8nZX3ukADj-qndGHMMuZluG_HdI8-wqNu8QJBoplPRc,1079
17
- kabigon-0.8.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
- kabigon-0.8.1.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
- kabigon-0.8.1.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
- kabigon-0.8.1.dist-info/RECORD,,
14
+ kabigon/youtube.py,sha256=F9GpLa0iUy03wYU94RrrnrXa6ExqbG6CZpqx5bPENWE,1106
15
+ kabigon/ytdlp.py,sha256=OGVrKRGXY_eHePgDmN52Kkui12NN44LsIPonD_61QBM,1501
16
+ kabigon-0.8.3.dist-info/METADATA,sha256=uidye_pVnhpAivxY26MlZMSascT1MmuMjBuCAw84Ics,1292
17
+ kabigon-0.8.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
18
+ kabigon-0.8.3.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
19
+ kabigon-0.8.3.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
20
+ kabigon-0.8.3.dist-info/RECORD,,