kabigon 0.8.1__py3-none-any.whl → 0.8.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/youtube.py +0 -2
- kabigon/ytdlp.py +15 -92
- {kabigon-0.8.1.dist-info → kabigon-0.8.3.dist-info}/METADATA +10 -5
- {kabigon-0.8.1.dist-info → kabigon-0.8.3.dist-info}/RECORD +7 -7
- {kabigon-0.8.1.dist-info → kabigon-0.8.3.dist-info}/WHEEL +0 -0
- {kabigon-0.8.1.dist-info → kabigon-0.8.3.dist-info}/entry_points.txt +0 -0
- {kabigon-0.8.1.dist-info → kabigon-0.8.3.dist-info}/licenses/LICENSE +0 -0
kabigon/youtube.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
import aioytt
|
2
2
|
import aioytt.video_id
|
3
|
-
import timeout_decorator
|
4
3
|
from youtube_transcript_api import YouTubeTranscriptApi
|
5
4
|
|
6
5
|
from .loader import Loader
|
@@ -12,7 +11,6 @@ class YoutubeLoader(Loader):
|
|
12
11
|
def __init__(self, languages: list[str] | None = None) -> None:
|
13
12
|
self.languages = languages or DEFAULT_LANGUAGES
|
14
13
|
|
15
|
-
@timeout_decorator.timeout(20)
|
16
14
|
def load(self, url: str) -> str:
|
17
15
|
video_id = aioytt.video_id.parse_video_id(url)
|
18
16
|
|
kabigon/ytdlp.py
CHANGED
@@ -1,49 +1,15 @@
|
|
1
|
-
import functools
|
2
1
|
import hashlib
|
3
2
|
import os
|
4
|
-
import subprocess
|
5
3
|
import tempfile
|
6
|
-
from typing import Final
|
7
4
|
|
8
|
-
import numpy as np
|
9
|
-
import timeout_decorator
|
10
|
-
import whisper
|
11
5
|
import yt_dlp
|
12
6
|
from loguru import logger
|
13
7
|
|
14
8
|
from .loader import Loader
|
15
9
|
|
16
|
-
try:
|
17
|
-
import mlx_whisper # noqa: F401 # type: ignore
|
18
|
-
|
19
|
-
_mlx_whisper_installed = True
|
20
|
-
except ImportError:
|
21
|
-
_mlx_whisper_installed = False
|
22
|
-
|
23
|
-
|
24
|
-
DEFAULT_FFMPEG_PATH: Final[str] = "ffmpeg"
|
25
|
-
|
26
|
-
|
27
|
-
def hash_url(url: str) -> str:
|
28
|
-
return hashlib.sha512(url.encode("utf-8")).hexdigest()
|
29
|
-
|
30
|
-
|
31
|
-
def get_ffmpeg_path() -> str:
|
32
|
-
path = os.getenv("FFMPEG_PATH")
|
33
|
-
if not path:
|
34
|
-
path = DEFAULT_FFMPEG_PATH
|
35
|
-
logger.warning("FFMPEG_PATH not set, using default: {}", DEFAULT_FFMPEG_PATH)
|
36
|
-
|
37
|
-
return path
|
38
|
-
|
39
10
|
|
40
11
|
def download_audio(url: str) -> str:
|
41
|
-
|
42
|
-
|
43
|
-
filename = os.path.join(
|
44
|
-
tempfile.gettempdir(),
|
45
|
-
hash_url(url),
|
46
|
-
)
|
12
|
+
filename = os.path.join(tempfile.gettempdir(), hashlib.sha512(url.encode("utf-8")).hexdigest())
|
47
13
|
|
48
14
|
ydl_opts = {
|
49
15
|
"format": "bestaudio/best",
|
@@ -55,7 +21,7 @@ def download_audio(url: str) -> str:
|
|
55
21
|
}
|
56
22
|
],
|
57
23
|
"outtmpl": filename,
|
58
|
-
"ffmpeg_location":
|
24
|
+
"ffmpeg_location": os.getenv("FFMPEG_PATH", "ffmpeg"),
|
59
25
|
"match_filter": yt_dlp.match_filter_func(["!is_live"]),
|
60
26
|
}
|
61
27
|
|
@@ -65,68 +31,25 @@ def download_audio(url: str) -> str:
|
|
65
31
|
return filename + ".mp3"
|
66
32
|
|
67
33
|
|
68
|
-
|
69
|
-
""
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
sr: int
|
78
|
-
The sample rate to resample the audio if necessary
|
79
|
-
|
80
|
-
Returns
|
81
|
-
-------
|
82
|
-
A NumPy array containing the audio waveform, in float32 dtype.
|
83
|
-
"""
|
84
|
-
ffmpeg_path = get_ffmpeg_path()
|
85
|
-
|
86
|
-
# This launches a subprocess to decode audio while down-mixing
|
87
|
-
# and resampling as necessary. Requires the ffmpeg CLI in PATH.
|
88
|
-
# fmt: off
|
89
|
-
cmd = [
|
90
|
-
ffmpeg_path,
|
91
|
-
"-nostdin",
|
92
|
-
"-threads", "0",
|
93
|
-
"-i", file,
|
94
|
-
"-f", "s16le",
|
95
|
-
"-ac", "1",
|
96
|
-
"-acodec", "pcm_s16le",
|
97
|
-
"-ar", str(sr),
|
98
|
-
"-"
|
99
|
-
]
|
100
|
-
# fmt: on
|
101
|
-
try:
|
102
|
-
out = subprocess.run(cmd, capture_output=True, check=True).stdout
|
103
|
-
except subprocess.CalledProcessError as e:
|
104
|
-
raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}") from e
|
105
|
-
|
106
|
-
return np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
|
107
|
-
|
108
|
-
|
109
|
-
@functools.cache
|
110
|
-
def _load_whisper_model() -> whisper.Whisper:
|
111
|
-
return whisper.load_model("tiny")
|
112
|
-
|
113
|
-
|
114
|
-
def _transcribe(audio: np.ndarray) -> dict:
|
115
|
-
if _mlx_whisper_installed:
|
116
|
-
return mlx_whisper.transcribe(audio, path_or_hf_repo="mlx-community/whisper-tiny")
|
117
|
-
|
118
|
-
model = _load_whisper_model()
|
119
|
-
return model.transcribe(audio)
|
34
|
+
class YtdlpLoader(Loader):
|
35
|
+
def __init__(self, model: str = "tiny") -> None:
|
36
|
+
try:
|
37
|
+
import whisper
|
38
|
+
except ImportError as e:
|
39
|
+
raise ImportError(
|
40
|
+
"OpenAI Whisper not installed. Please install it with `pip install openai-whisper`."
|
41
|
+
) from e
|
120
42
|
|
43
|
+
self.model = whisper.load_model(model)
|
44
|
+
self.load_audio = whisper.load_audio
|
121
45
|
|
122
|
-
class YtdlpLoader(Loader):
|
123
|
-
@timeout_decorator.timeout(300)
|
124
46
|
def load(self, url: str) -> str:
|
125
47
|
audio_file = download_audio(url)
|
126
|
-
audio = load_audio(audio_file)
|
48
|
+
audio = self.load_audio(audio_file)
|
127
49
|
|
128
50
|
# Clean up the audio file
|
129
51
|
os.remove(audio_file)
|
130
52
|
|
131
|
-
|
53
|
+
logger.info("Transcribing audio file: {}", audio_file)
|
54
|
+
result = self.model.transcribe(audio)
|
132
55
|
return result.get("text", "")
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: kabigon
|
3
|
-
Version: 0.8.
|
3
|
+
Version: 0.8.3
|
4
4
|
Author-email: narumi <toucans-cutouts0f@icloud.com>
|
5
5
|
License-File: LICENSE
|
6
6
|
Requires-Python: >=3.10
|
@@ -9,14 +9,19 @@ Requires-Dist: firecrawl-py>=2.4.1
|
|
9
9
|
Requires-Dist: httpx>=0.28.1
|
10
10
|
Requires-Dist: loguru>=0.7.3
|
11
11
|
Requires-Dist: markdownify>=0.14.1
|
12
|
-
Requires-Dist:
|
13
|
-
Requires-Dist: playwright>=1.52.0
|
12
|
+
Requires-Dist: numpy>=2.2.5
|
14
13
|
Requires-Dist: pypdf>=5.3.0
|
15
14
|
Requires-Dist: rich>=13.9.4
|
16
|
-
Requires-Dist: timeout-decorator>=0.5.0
|
17
15
|
Requires-Dist: typer>=0.15.3
|
18
16
|
Requires-Dist: youtube-transcript-api>=0.6.3
|
19
17
|
Requires-Dist: yt-dlp>=2025.4.30
|
18
|
+
Provides-Extra: all
|
19
|
+
Requires-Dist: openai-whisper>=20240930; extra == 'all'
|
20
|
+
Requires-Dist: playwright>=1.52.0; extra == 'all'
|
21
|
+
Provides-Extra: playwright
|
22
|
+
Requires-Dist: playwright>=1.52.0; extra == 'playwright'
|
23
|
+
Provides-Extra: whisper
|
24
|
+
Requires-Dist: openai-whisper>=20240930; extra == 'whisper'
|
20
25
|
Description-Content-Type: text/markdown
|
21
26
|
|
22
27
|
# kabigon
|
@@ -24,7 +29,7 @@ Description-Content-Type: text/markdown
|
|
24
29
|
## Installation
|
25
30
|
|
26
31
|
```shell
|
27
|
-
pip install kabigon
|
32
|
+
pip install kabigon[all]
|
28
33
|
playwright install chromium
|
29
34
|
```
|
30
35
|
|
@@ -11,10 +11,10 @@ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
11
11
|
kabigon/reel.py,sha256=qOwWCvcp7xNKg0JDunq_Bsl8yqqMzrnAOI9k5mSqrOU,874
|
12
12
|
kabigon/twitter.py,sha256=aRqAiFxIwln6lteWdoF6SmvbzO62yBTQRzcB7UcVJwk,1046
|
13
13
|
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
14
|
-
kabigon/youtube.py,sha256=
|
15
|
-
kabigon/ytdlp.py,sha256=
|
16
|
-
kabigon-0.8.
|
17
|
-
kabigon-0.8.
|
18
|
-
kabigon-0.8.
|
19
|
-
kabigon-0.8.
|
20
|
-
kabigon-0.8.
|
14
|
+
kabigon/youtube.py,sha256=F9GpLa0iUy03wYU94RrrnrXa6ExqbG6CZpqx5bPENWE,1106
|
15
|
+
kabigon/ytdlp.py,sha256=OGVrKRGXY_eHePgDmN52Kkui12NN44LsIPonD_61QBM,1501
|
16
|
+
kabigon-0.8.3.dist-info/METADATA,sha256=uidye_pVnhpAivxY26MlZMSascT1MmuMjBuCAw84Ics,1292
|
17
|
+
kabigon-0.8.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
18
|
+
kabigon-0.8.3.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
19
|
+
kabigon-0.8.3.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
20
|
+
kabigon-0.8.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|