kabigon 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kabigon/youtube.py +3 -68
- {kabigon-0.4.1.dist-info → kabigon-0.4.2.dist-info}/METADATA +1 -1
- {kabigon-0.4.1.dist-info → kabigon-0.4.2.dist-info}/RECORD +6 -6
- {kabigon-0.4.1.dist-info → kabigon-0.4.2.dist-info}/WHEEL +0 -0
- {kabigon-0.4.1.dist-info → kabigon-0.4.2.dist-info}/entry_points.txt +0 -0
- {kabigon-0.4.1.dist-info → kabigon-0.4.2.dist-info}/licenses/LICENSE +0 -0
kabigon/youtube.py
CHANGED
@@ -1,76 +1,11 @@
|
|
1
|
-
from urllib.parse import parse_qs
|
2
|
-
from urllib.parse import urlparse
|
3
|
-
|
4
1
|
import aioytt
|
2
|
+
import aioytt.video_id
|
5
3
|
import timeout_decorator
|
6
4
|
from youtube_transcript_api import YouTubeTranscriptApi
|
7
5
|
|
8
6
|
from .loader import Loader
|
9
|
-
from .loader import LoaderError
|
10
7
|
|
11
8
|
DEFAULT_LANGUAGES = ["zh-TW", "zh-Hant", "zh", "zh-Hans", "ja", "en", "ko"]
|
12
|
-
ALLOWED_SCHEMES = {
|
13
|
-
"http",
|
14
|
-
"https",
|
15
|
-
}
|
16
|
-
ALLOWED_NETLOCS = {
|
17
|
-
"youtu.be",
|
18
|
-
"m.youtube.com",
|
19
|
-
"youtube.com",
|
20
|
-
"www.youtube.com",
|
21
|
-
"www.youtube-nocookie.com",
|
22
|
-
"vid.plus",
|
23
|
-
}
|
24
|
-
|
25
|
-
|
26
|
-
class UnsupportedURLSchemeError(LoaderError):
|
27
|
-
def __init__(self, scheme: str) -> None:
|
28
|
-
super().__init__(f"unsupported URL scheme: {scheme}")
|
29
|
-
|
30
|
-
|
31
|
-
class UnsupportedURLNetlocError(LoaderError):
|
32
|
-
def __init__(self, netloc: str) -> None:
|
33
|
-
super().__init__(f"unsupported URL netloc: {netloc}")
|
34
|
-
|
35
|
-
|
36
|
-
class VideoIDError(LoaderError):
|
37
|
-
def __init__(self, video_id: str) -> None:
|
38
|
-
super().__init__(f"invalid video ID: {video_id}")
|
39
|
-
|
40
|
-
|
41
|
-
class NoVideoIDFoundError(LoaderError):
|
42
|
-
def __init__(self, url: str) -> None:
|
43
|
-
super().__init__(f"no video found in URL: {url}")
|
44
|
-
|
45
|
-
|
46
|
-
def parse_video_id(url: str) -> str:
|
47
|
-
"""Parse a YouTube URL and return the video ID if valid, otherwise None."""
|
48
|
-
parsed_url = urlparse(url)
|
49
|
-
|
50
|
-
if parsed_url.scheme not in ALLOWED_SCHEMES:
|
51
|
-
raise UnsupportedURLSchemeError(parsed_url.scheme)
|
52
|
-
|
53
|
-
if parsed_url.netloc not in ALLOWED_NETLOCS:
|
54
|
-
raise UnsupportedURLNetlocError(parsed_url.netloc)
|
55
|
-
|
56
|
-
path = parsed_url.path
|
57
|
-
|
58
|
-
if path.endswith("/watch"):
|
59
|
-
query = parsed_url.query
|
60
|
-
parsed_query = parse_qs(query)
|
61
|
-
if "v" in parsed_query:
|
62
|
-
ids = parsed_query["v"]
|
63
|
-
video_id = ids if isinstance(ids, str) else ids[0]
|
64
|
-
else:
|
65
|
-
raise NoVideoIDFoundError(url)
|
66
|
-
else:
|
67
|
-
path = parsed_url.path.lstrip("/")
|
68
|
-
video_id = path.split("/")[-1]
|
69
|
-
|
70
|
-
if len(video_id) != 11: # Video IDs are 11 characters long
|
71
|
-
raise VideoIDError(video_id)
|
72
|
-
|
73
|
-
return video_id
|
74
9
|
|
75
10
|
|
76
11
|
class YoutubeLoader(Loader):
|
@@ -79,7 +14,7 @@ class YoutubeLoader(Loader):
|
|
79
14
|
|
80
15
|
@timeout_decorator.timeout(20)
|
81
16
|
def load(self, url: str) -> str:
|
82
|
-
video_id = parse_video_id(url)
|
17
|
+
video_id = aioytt.video_id.parse_video_id(url)
|
83
18
|
|
84
19
|
transcript_pieces: list[dict[str, str | float]] = YouTubeTranscriptApi().get_transcript(
|
85
20
|
video_id, self.languages
|
@@ -98,5 +33,5 @@ class YoutubeLoader(Loader):
|
|
98
33
|
for piece in transcript:
|
99
34
|
text = piece.text.strip()
|
100
35
|
if text:
|
101
|
-
lines
|
36
|
+
lines.append(text)
|
102
37
|
return "\n".join(lines)
|
@@ -10,10 +10,10 @@ kabigon/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
10
10
|
kabigon/reel.py,sha256=1JTcn7qVH7FcD0Oj-Rz-pnjI-xS1UtkoJcuClGb8ExQ,1124
|
11
11
|
kabigon/singlefile.py,sha256=CeTT2WPYm0vb1xWPNdyBN4uHRw9hRqfZm68D-nEcUA8,1800
|
12
12
|
kabigon/utils.py,sha256=eNTLtHLSB2erDac2HH3jWemgfr8Ou_ozwVb8h9BD-4g,922
|
13
|
-
kabigon/youtube.py,sha256=
|
13
|
+
kabigon/youtube.py,sha256=HoiFNq0ookPL7_rO_wloBaY8yTIX6xP8A77F7y02q64,1166
|
14
14
|
kabigon/ytdlp.py,sha256=kG1fXqU650otOWespjOSkGK_-jk1wO-sWiR60_UPJxY,3125
|
15
|
-
kabigon-0.4.
|
16
|
-
kabigon-0.4.
|
17
|
-
kabigon-0.4.
|
18
|
-
kabigon-0.4.
|
19
|
-
kabigon-0.4.
|
15
|
+
kabigon-0.4.2.dist-info/METADATA,sha256=JHbf13Nnhr05WfSS1hijT-YkeCewuWr5kYbzxjlJ-M8,1049
|
16
|
+
kabigon-0.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
17
|
+
kabigon-0.4.2.dist-info/entry_points.txt,sha256=O3FYAO9w-NQvlGMJrBvtrnGHSK2QkUnQBTa30YXRbVE,45
|
18
|
+
kabigon-0.4.2.dist-info/licenses/LICENSE,sha256=H2T3_RTgmcngMeC7p_SXT3GwBLkd2DaNgAZuxulcfiA,1066
|
19
|
+
kabigon-0.4.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|