getscript 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getscript/__init__.py +6 -0
- getscript/apple.py +255 -0
- getscript/cli.py +344 -0
- getscript/completions.py +81 -0
- getscript/config.py +64 -0
- getscript/detect.py +56 -0
- getscript/output.py +97 -0
- getscript/picker.py +69 -0
- getscript/progress.py +34 -0
- getscript/search.py +83 -0
- getscript/upload.py +131 -0
- getscript/youtube.py +58 -0
- getscript-0.12.0.dist-info/METADATA +125 -0
- getscript-0.12.0.dist-info/RECORD +18 -0
- getscript-0.12.0.dist-info/WHEEL +5 -0
- getscript-0.12.0.dist-info/entry_points.txt +2 -0
- getscript-0.12.0.dist-info/licenses/LICENSE +21 -0
- getscript-0.12.0.dist-info/top_level.txt +1 -0
getscript/__init__.py
ADDED
getscript/apple.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Apple Podcasts transcript fetching."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import tempfile
|
|
9
|
+
import defusedxml.ElementTree as ET
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from urllib.error import HTTPError
|
|
12
|
+
from urllib.request import Request, urlopen
|
|
13
|
+
|
|
14
|
+
CACHE_VALIDITY = 60 * 60 * 24 * 30 # 30 days
|
|
15
|
+
|
|
16
|
+
# Obj-C source for bearer token via AMSMescal (FairPlay signing).
|
|
17
|
+
# Compiled and run as a subprocess to isolate potential segfaults
|
|
18
|
+
# from the thenWithBlock: cleanup in Apple's promise implementation.
|
|
19
|
+
OBJC_TOKEN_SOURCE = r'''
|
|
20
|
+
#import <Foundation/Foundation.h>
|
|
21
|
+
#import <objc/runtime.h>
|
|
22
|
+
#import <objc/message.h>
|
|
23
|
+
#import <dlfcn.h>
|
|
24
|
+
|
|
25
|
+
// Cast objc_msgSend to typed function pointers to avoid selector validation
|
|
26
|
+
typedef id (*msg_id)(id, SEL, ...);
|
|
27
|
+
typedef id (*msg_id_id)(id, SEL, id, ...);
|
|
28
|
+
typedef id (*msg_id_id_id)(id, SEL, id, id, ...);
|
|
29
|
+
typedef void (*msg_void_id_str)(id, SEL, id, NSString *);
|
|
30
|
+
|
|
31
|
+
int main() {
|
|
32
|
+
@autoreleasepool {
|
|
33
|
+
dlopen("/System/Library/PrivateFrameworks/PodcastsFoundation.framework/PodcastsFoundation", RTLD_LAZY);
|
|
34
|
+
|
|
35
|
+
Class AMSMescal = objc_getClass("AMSMescal");
|
|
36
|
+
Class AMSMescalSession = objc_getClass("AMSMescalSession");
|
|
37
|
+
Class AMSURLRequestClass = objc_getClass("AMSURLRequest");
|
|
38
|
+
Class IMURLBag = objc_getClass("IMURLBag");
|
|
39
|
+
|
|
40
|
+
if (!AMSMescal || !AMSMescalSession || !AMSURLRequestClass || !IMURLBag) {
|
|
41
|
+
fprintf(stderr, "Failed to load required Apple private frameworks. macOS 15.5+ required.\n");
|
|
42
|
+
return 1;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
NSString *storeFront = @"143441-1,42 t:podcasts1";
|
|
46
|
+
NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
|
|
47
|
+
[formatter setDateFormat:@"yyyy-MM-dd'T'HH:mm:ss'Z'"];
|
|
48
|
+
[formatter setTimeZone:[NSTimeZone timeZoneWithAbbreviation:@"UTC"]];
|
|
49
|
+
NSString *timestamp = [formatter stringFromDate:[NSDate date]];
|
|
50
|
+
|
|
51
|
+
NSURL *tokenURL = [NSURL URLWithString:@"https://sf-api-token-service.itunes.apple.com/apiToken?clientClass=apple&clientId=com.apple.podcasts.macos&os=OS%20X&osVersion=15.5&productVersion=1.1.0&version=2"];
|
|
52
|
+
NSMutableURLRequest *nsRequest = [NSMutableURLRequest requestWithURL:tokenURL];
|
|
53
|
+
|
|
54
|
+
id urlRequest = ((msg_id_id)objc_msgSend)(
|
|
55
|
+
[AMSURLRequestClass alloc],
|
|
56
|
+
sel_registerName("initWithRequest:"),
|
|
57
|
+
nsRequest
|
|
58
|
+
);
|
|
59
|
+
((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), timestamp, @"x-request-timestamp");
|
|
60
|
+
((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), storeFront, @"X-Apple-Store-Front");
|
|
61
|
+
|
|
62
|
+
NSDictionary *policy = @{
|
|
63
|
+
@"fields": @[@"clientId"],
|
|
64
|
+
@"headers": @[@"x-apple-store-front", @"x-apple-client-application", @"x-request-timestamp"]
|
|
65
|
+
};
|
|
66
|
+
id signature = ((msg_id_id_id)objc_msgSend)(
|
|
67
|
+
(id)AMSMescal,
|
|
68
|
+
sel_registerName("_signedActionDataFromRequest:policy:"),
|
|
69
|
+
urlRequest, policy
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
id session = ((msg_id)objc_msgSend)((id)AMSMescalSession, sel_registerName("defaultSession"));
|
|
73
|
+
id urlBag = ((msg_id)objc_msgSend)([IMURLBag alloc], sel_registerName("init"));
|
|
74
|
+
|
|
75
|
+
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
|
|
76
|
+
|
|
77
|
+
id signedPromise = ((msg_id_id_id)objc_msgSend)(session, sel_registerName("signData:bag:"), signature, urlBag);
|
|
78
|
+
|
|
79
|
+
((msg_id_id)objc_msgSend)(signedPromise, sel_registerName("thenWithBlock:"), ^(id result) {
|
|
80
|
+
NSString *sig = [(NSData *)result base64EncodedStringWithOptions:0];
|
|
81
|
+
|
|
82
|
+
NSMutableURLRequest *signedRequest = [NSMutableURLRequest requestWithURL:tokenURL];
|
|
83
|
+
[signedRequest setValue:timestamp forHTTPHeaderField:@"x-request-timestamp"];
|
|
84
|
+
[signedRequest setValue:storeFront forHTTPHeaderField:@"X-Apple-Store-Front"];
|
|
85
|
+
[signedRequest setValue:sig forHTTPHeaderField:@"X-Apple-ActionSignature"];
|
|
86
|
+
|
|
87
|
+
NSURLSessionDataTask *task = [[NSURLSession sharedSession] dataTaskWithRequest:signedRequest completionHandler:^(NSData *data, NSURLResponse *response, NSError *error) {
|
|
88
|
+
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil];
|
|
89
|
+
printf("%s", [json[@"token"] UTF8String]);
|
|
90
|
+
dispatch_semaphore_signal(sema);
|
|
91
|
+
}];
|
|
92
|
+
[task resume];
|
|
93
|
+
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
|
|
97
|
+
}
|
|
98
|
+
return 0;
|
|
99
|
+
}
|
|
100
|
+
'''
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _get_cache_path(cache_dir: str) -> str:
|
|
104
|
+
return os.path.join(cache_dir, "apple_token")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_bearer_token(cache_dir: str) -> str:
|
|
108
|
+
"""Get bearer token, using cached version if valid."""
|
|
109
|
+
cache_path = _get_cache_path(cache_dir)
|
|
110
|
+
|
|
111
|
+
if os.path.exists(cache_path):
|
|
112
|
+
age = datetime.now().timestamp() - os.path.getmtime(cache_path)
|
|
113
|
+
if age < CACHE_VALIDITY:
|
|
114
|
+
with open(cache_path) as f:
|
|
115
|
+
token = f.read().strip()
|
|
116
|
+
if token.startswith("ey"):
|
|
117
|
+
return token
|
|
118
|
+
|
|
119
|
+
token = _compile_and_fetch_token()
|
|
120
|
+
if token:
|
|
121
|
+
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
|
122
|
+
with open(cache_path, "w") as f:
|
|
123
|
+
f.write(token)
|
|
124
|
+
os.chmod(cache_path, 0o600)
|
|
125
|
+
return token
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _compile_and_fetch_token() -> str | None:
|
|
129
|
+
"""Compile Obj-C helper, run it, return bearer token."""
|
|
130
|
+
if sys.platform != "darwin":
|
|
131
|
+
print(
|
|
132
|
+
"Apple Podcasts transcripts require macOS with Xcode CLI tools.",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
tmpdir = tempfile.mkdtemp(prefix="getscript-")
|
|
138
|
+
src_path = os.path.join(tmpdir, "token.m")
|
|
139
|
+
bin_path = os.path.join(tmpdir, "token")
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
with open(src_path, "w") as src:
|
|
143
|
+
src.write(OBJC_TOKEN_SOURCE)
|
|
144
|
+
|
|
145
|
+
comp = subprocess.run(
|
|
146
|
+
[
|
|
147
|
+
"clang",
|
|
148
|
+
"-o",
|
|
149
|
+
bin_path,
|
|
150
|
+
src_path,
|
|
151
|
+
"-Wno-objc-method-access",
|
|
152
|
+
"-framework",
|
|
153
|
+
"Foundation",
|
|
154
|
+
"-F/System/Library/PrivateFrameworks",
|
|
155
|
+
"-framework",
|
|
156
|
+
"AppleMediaServices",
|
|
157
|
+
"-fobjc-arc",
|
|
158
|
+
],
|
|
159
|
+
capture_output=True,
|
|
160
|
+
text=True,
|
|
161
|
+
)
|
|
162
|
+
if comp.returncode != 0:
|
|
163
|
+
print(f"Compilation failed: {comp.stderr}", file=sys.stderr)
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
result = subprocess.run([bin_path], capture_output=True, text=True, timeout=30)
|
|
167
|
+
|
|
168
|
+
token = result.stdout.strip()
|
|
169
|
+
if not token.startswith("ey"):
|
|
170
|
+
print(f"Invalid token. stderr: {result.stderr}", file=sys.stderr)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
return token
|
|
174
|
+
except subprocess.TimeoutExpired:
|
|
175
|
+
print("Token fetch timed out", file=sys.stderr)
|
|
176
|
+
return None
|
|
177
|
+
finally:
|
|
178
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def fetch_ttml(episode_id: str, bearer_token: str) -> str:
|
|
182
|
+
"""Fetch TTML transcript from Apple's AMP API."""
|
|
183
|
+
url = (
|
|
184
|
+
f"https://amp-api.podcasts.apple.com/v1/catalog/us/podcast-episodes/"
|
|
185
|
+
f"{episode_id}/transcripts?fields=ttmlToken,ttmlAssetUrls"
|
|
186
|
+
f"&include%5Bpodcast-episodes%5D=podcast&l=en-US&with=entitlements"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
req = Request(url)
|
|
190
|
+
req.add_header("Authorization", f"Bearer {bearer_token}")
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
with urlopen(req, timeout=15) as resp:
|
|
194
|
+
data = json.loads(resp.read())
|
|
195
|
+
except HTTPError as e:
|
|
196
|
+
body = e.read().decode() if e.fp else ""
|
|
197
|
+
raise Exception(f"AMP API returned {e.code}: {body}")
|
|
198
|
+
|
|
199
|
+
if "errors" in data:
|
|
200
|
+
raise Exception(f"API error: {data['errors']}")
|
|
201
|
+
|
|
202
|
+
attrs = data["data"][0]["attributes"]
|
|
203
|
+
ttml_url = attrs["ttmlAssetUrls"]["ttml"]
|
|
204
|
+
|
|
205
|
+
with urlopen(ttml_url, timeout=15) as resp:
|
|
206
|
+
return resp.read().decode("utf-8")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def ttml_to_segments(ttml_content: str) -> list[dict]:
|
|
210
|
+
"""Parse TTML XML into segment dicts with timestamps."""
|
|
211
|
+
root = ET.fromstring(ttml_content)
|
|
212
|
+
segments = []
|
|
213
|
+
|
|
214
|
+
for elem in root.iter():
|
|
215
|
+
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
216
|
+
|
|
217
|
+
if tag == "p":
|
|
218
|
+
words = []
|
|
219
|
+
# Use itertext() to get all text content without duplication
|
|
220
|
+
for text_chunk in elem.itertext():
|
|
221
|
+
stripped = text_chunk.strip()
|
|
222
|
+
if stripped:
|
|
223
|
+
words.append(stripped)
|
|
224
|
+
if words:
|
|
225
|
+
text = " ".join(words)
|
|
226
|
+
begin = elem.get("begin", "")
|
|
227
|
+
end = elem.get("end", "")
|
|
228
|
+
segment = {"text": text}
|
|
229
|
+
if begin:
|
|
230
|
+
segment["start"] = _parse_ttml_time(begin)
|
|
231
|
+
if end:
|
|
232
|
+
segment["end"] = _parse_ttml_time(end)
|
|
233
|
+
if "start" in segment:
|
|
234
|
+
segment["duration"] = segment["end"] - segment["start"]
|
|
235
|
+
segments.append(segment)
|
|
236
|
+
|
|
237
|
+
return segments
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def ttml_to_text(ttml_content: str) -> str:
|
|
241
|
+
"""Extract plain text from Apple Podcasts TTML XML."""
|
|
242
|
+
segments = ttml_to_segments(ttml_content)
|
|
243
|
+
return " ".join(s["text"] for s in segments)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _parse_ttml_time(time_str: str) -> float:
|
|
247
|
+
"""Parse TTML time format (HH:MM:SS.mmm) to seconds."""
|
|
248
|
+
parts = time_str.split(":")
|
|
249
|
+
if len(parts) == 3:
|
|
250
|
+
h, m, s = parts
|
|
251
|
+
return int(h) * 3600 + int(m) * 60 + float(s)
|
|
252
|
+
if len(parts) == 2:
|
|
253
|
+
m, s = parts
|
|
254
|
+
return int(m) * 60 + float(s)
|
|
255
|
+
return float(time_str)
|
getscript/cli.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
"""CLI entry point for getscript."""
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import sys
|
|
5
|
+
|
|
6
|
+
from getscript import __version__
|
|
7
|
+
from getscript.completions import generate as generate_completions
|
|
8
|
+
from getscript.config import get_cache_dir, load_config, merge_config
|
|
9
|
+
from getscript.detect import detect_source
|
|
10
|
+
from getscript.output import format_output
|
|
11
|
+
from getscript.progress import Progress
|
|
12
|
+
|
|
13
|
+
EXAMPLES = """\
|
|
14
|
+
examples:
|
|
15
|
+
getscript "https://youtube.com/watch?v=dQw4w9WgXcQ"
|
|
16
|
+
getscript "https://youtu.be/dQw4w9WgXcQ" --timestamps
|
|
17
|
+
getscript "https://youtube.com/watch?v=dQw4w9WgXcQ" --json | jq .
|
|
18
|
+
getscript "https://youtube.com/watch?v=dQw4w9WgXcQ" --markdown > notes.md
|
|
19
|
+
getscript 1000753754819 # Apple episode ID
|
|
20
|
+
getscript 1000753754819 --ttml # raw TTML XML
|
|
21
|
+
getscript "https://podcasts.apple.com/...?i=12345"
|
|
22
|
+
getscript "https://youtube.com/watch?v=..." -o transcript.txt
|
|
23
|
+
getscript --search "my favorite YouTuber" # search YouTube, pick via fzf
|
|
24
|
+
getscript --search "my favorite podcaster" --apple # search Apple Podcasts
|
|
25
|
+
getscript --search "topic" --list # print results, no fzf
|
|
26
|
+
getscript --search "topic" --limit 20 # control result count
|
|
27
|
+
getscript VIDEO_ID --proxy socks5://127.0.0.1:1080 # use proxy for YouTube
|
|
28
|
+
getscript VIDEO_ID --cookies ~/cookies.txt # use browser cookies
|
|
29
|
+
getscript VIDEO_ID --no-upload # skip shared library indexing
|
|
30
|
+
getscript --completions zsh >> ~/.zshrc"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
34
|
+
parser = argparse.ArgumentParser(
|
|
35
|
+
prog="getscript",
|
|
36
|
+
description="Fetch transcripts from YouTube and Apple Podcasts.",
|
|
37
|
+
epilog=EXAMPLES,
|
|
38
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument("input", nargs="?", help="URL or ID to fetch transcript for")
|
|
41
|
+
parser.add_argument(
|
|
42
|
+
"-o", "--output", metavar="FILE", help="write output to file"
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"--search", metavar="QUERY", help="search for content by topic or creator"
|
|
46
|
+
)
|
|
47
|
+
parser.add_argument(
|
|
48
|
+
"--apple", action="store_true", default=False,
|
|
49
|
+
help="search Apple Podcasts instead of YouTube",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--limit", type=int, default=None, metavar="N",
|
|
53
|
+
help="number of search results (default: 10)",
|
|
54
|
+
)
|
|
55
|
+
parser.add_argument(
|
|
56
|
+
"--list", action="store_true", default=False,
|
|
57
|
+
help="print search results without interactive selection",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--json", action="store_true", default=None, help="structured JSON output"
|
|
61
|
+
)
|
|
62
|
+
parser.add_argument(
|
|
63
|
+
"--ttml", action="store_true", default=None, help="raw TTML XML (Apple only)"
|
|
64
|
+
)
|
|
65
|
+
parser.add_argument(
|
|
66
|
+
"--timestamps", action="store_true", default=None, help="include timestamps"
|
|
67
|
+
)
|
|
68
|
+
parser.add_argument(
|
|
69
|
+
"--markdown", action="store_true", default=None, help="Markdown output"
|
|
70
|
+
)
|
|
71
|
+
parser.add_argument(
|
|
72
|
+
"--proxy", metavar="URL", default=None,
|
|
73
|
+
help="proxy URL for YouTube requests (e.g. socks5://host:port)",
|
|
74
|
+
)
|
|
75
|
+
parser.add_argument(
|
|
76
|
+
"--cookies", metavar="FILE", default=None,
|
|
77
|
+
help="Netscape cookie file for YouTube auth (e.g. cookies.txt)",
|
|
78
|
+
)
|
|
79
|
+
parser.add_argument(
|
|
80
|
+
"--no-upload", action="store_true", default=None,
|
|
81
|
+
help="disable contributing transcript to shared library",
|
|
82
|
+
)
|
|
83
|
+
parser.add_argument(
|
|
84
|
+
"--no-color", action="store_true", default=None, help="disable colors"
|
|
85
|
+
)
|
|
86
|
+
parser.add_argument(
|
|
87
|
+
"--quiet", action="store_true", default=None, help="suppress progress output"
|
|
88
|
+
)
|
|
89
|
+
parser.add_argument(
|
|
90
|
+
"--verbose", action="store_true", default=None, help="show detailed errors"
|
|
91
|
+
)
|
|
92
|
+
parser.add_argument(
|
|
93
|
+
"--completions",
|
|
94
|
+
metavar="SHELL",
|
|
95
|
+
choices=["bash", "zsh", "fish"],
|
|
96
|
+
help="generate shell completions (bash, zsh, fish)",
|
|
97
|
+
)
|
|
98
|
+
parser.add_argument(
|
|
99
|
+
"--version", action="version", version=f"getscript {__version__}"
|
|
100
|
+
)
|
|
101
|
+
return parser
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _handle_search(args, config) -> int:
|
|
105
|
+
"""Handle --search mode: search, select, then fetch transcript."""
|
|
106
|
+
from getscript.picker import format_list, pick_result
|
|
107
|
+
from getscript.search import search_apple, search_youtube
|
|
108
|
+
|
|
109
|
+
verbose = config.get("verbose", False)
|
|
110
|
+
quiet = config.get("quiet", False)
|
|
111
|
+
limit = args.limit or config.get("search_limit", 10)
|
|
112
|
+
|
|
113
|
+
# Apple transcript fetch requires macOS — warn before searching unless --list
|
|
114
|
+
if args.apple and not args.list:
|
|
115
|
+
if sys.platform != "darwin":
|
|
116
|
+
print(
|
|
117
|
+
"Apple Podcasts transcripts require macOS 15.5+ with Xcode CLI tools.\n"
|
|
118
|
+
"Use --list to browse search results without fetching transcripts.",
|
|
119
|
+
file=sys.stderr,
|
|
120
|
+
)
|
|
121
|
+
return 1
|
|
122
|
+
|
|
123
|
+
progress = Progress(quiet=quiet)
|
|
124
|
+
|
|
125
|
+
try:
|
|
126
|
+
if args.apple:
|
|
127
|
+
progress.update("Searching Apple Podcasts...")
|
|
128
|
+
results = search_apple(args.search, limit=limit)
|
|
129
|
+
else:
|
|
130
|
+
api_key = config.get("youtube_api_key")
|
|
131
|
+
if not api_key:
|
|
132
|
+
print(
|
|
133
|
+
"YouTube API key required for --search.\n"
|
|
134
|
+
"Set GETSCRIPT_YOUTUBE_API_KEY env var or add "
|
|
135
|
+
'"youtube_api_key" to ~/.config/getscript/config.json\n'
|
|
136
|
+
"Get a key: https://console.cloud.google.com/apis/credentials",
|
|
137
|
+
file=sys.stderr,
|
|
138
|
+
)
|
|
139
|
+
return 1
|
|
140
|
+
progress.update("Searching YouTube...")
|
|
141
|
+
results = search_youtube(args.search, api_key, limit=limit)
|
|
142
|
+
|
|
143
|
+
progress.done()
|
|
144
|
+
|
|
145
|
+
if not results:
|
|
146
|
+
print(f"No results for: {args.search}", file=sys.stderr)
|
|
147
|
+
return 1
|
|
148
|
+
|
|
149
|
+
# --list mode: print results and exit
|
|
150
|
+
if args.list:
|
|
151
|
+
print(format_list(results))
|
|
152
|
+
return 0
|
|
153
|
+
|
|
154
|
+
# Interactive selection via fzf
|
|
155
|
+
selected = pick_result(results)
|
|
156
|
+
if selected is None:
|
|
157
|
+
return 130
|
|
158
|
+
|
|
159
|
+
# Determine source type from selection
|
|
160
|
+
source_input = selected["id"]
|
|
161
|
+
|
|
162
|
+
except RuntimeError as e:
|
|
163
|
+
# fzf not installed
|
|
164
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
165
|
+
return 1
|
|
166
|
+
except KeyboardInterrupt:
|
|
167
|
+
progress.done()
|
|
168
|
+
print("\nInterrupted.", file=sys.stderr)
|
|
169
|
+
return 1
|
|
170
|
+
except Exception as e:
|
|
171
|
+
progress.done()
|
|
172
|
+
if verbose:
|
|
173
|
+
import traceback
|
|
174
|
+
traceback.print_exc(file=sys.stderr)
|
|
175
|
+
else:
|
|
176
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
177
|
+
return 1
|
|
178
|
+
|
|
179
|
+
# Now fetch the transcript using the selected ID
|
|
180
|
+
# Re-use args with the selected input
|
|
181
|
+
args.input = source_input
|
|
182
|
+
args.search = None # prevent re-entry
|
|
183
|
+
args._title = selected.get("title") # pass title for upload
|
|
184
|
+
return _fetch_transcript(args, config)
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _fetch_transcript(args, config) -> int:
|
|
188
|
+
"""Fetch and output a transcript for the given input."""
|
|
189
|
+
timestamps = config.get("timestamps", False)
|
|
190
|
+
verbose = config.get("verbose", False)
|
|
191
|
+
quiet = config.get("quiet", False)
|
|
192
|
+
|
|
193
|
+
# Determine output format
|
|
194
|
+
if config.get("ttml"):
|
|
195
|
+
fmt = "ttml"
|
|
196
|
+
elif config.get("json"):
|
|
197
|
+
fmt = "json"
|
|
198
|
+
elif config.get("markdown"):
|
|
199
|
+
fmt = "markdown"
|
|
200
|
+
else:
|
|
201
|
+
fmt = config.get("output_format", "text")
|
|
202
|
+
|
|
203
|
+
progress = Progress(quiet=quiet)
|
|
204
|
+
|
|
205
|
+
try:
|
|
206
|
+
# Detect source
|
|
207
|
+
progress.update("Detecting source...")
|
|
208
|
+
source, source_id = detect_source(args.input)
|
|
209
|
+
|
|
210
|
+
ttml_raw = None
|
|
211
|
+
|
|
212
|
+
if source == "youtube":
|
|
213
|
+
progress.update("Fetching YouTube transcript...")
|
|
214
|
+
from getscript.youtube import fetch_transcript
|
|
215
|
+
|
|
216
|
+
segments = fetch_transcript(source_id, config)
|
|
217
|
+
progress.done()
|
|
218
|
+
|
|
219
|
+
elif source == "apple":
|
|
220
|
+
from getscript.apple import fetch_ttml, get_bearer_token, ttml_to_segments
|
|
221
|
+
|
|
222
|
+
cache_dir = get_cache_dir()
|
|
223
|
+
|
|
224
|
+
progress.update("Authenticating with Apple...")
|
|
225
|
+
token = get_bearer_token(cache_dir)
|
|
226
|
+
if not token:
|
|
227
|
+
progress.done()
|
|
228
|
+
print(
|
|
229
|
+
"Failed to get Apple bearer token. Requires macOS 15.5+.",
|
|
230
|
+
file=sys.stderr,
|
|
231
|
+
)
|
|
232
|
+
return 1
|
|
233
|
+
|
|
234
|
+
progress.update("Fetching Apple Podcasts transcript...")
|
|
235
|
+
ttml_raw = fetch_ttml(source_id, token)
|
|
236
|
+
segments = ttml_to_segments(ttml_raw)
|
|
237
|
+
progress.done()
|
|
238
|
+
|
|
239
|
+
# Format output
|
|
240
|
+
result = format_output(
|
|
241
|
+
segments,
|
|
242
|
+
fmt=fmt,
|
|
243
|
+
source=source,
|
|
244
|
+
source_id=source_id,
|
|
245
|
+
timestamps=timestamps,
|
|
246
|
+
ttml_raw=ttml_raw,
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Write output
|
|
250
|
+
if args.output:
|
|
251
|
+
with open(args.output, "w", encoding="utf-8") as f:
|
|
252
|
+
f.write(result)
|
|
253
|
+
print(f"Written to {args.output}", file=sys.stderr)
|
|
254
|
+
else:
|
|
255
|
+
print(result)
|
|
256
|
+
|
|
257
|
+
# Upload to shared library (on by default, disable with --no-upload)
|
|
258
|
+
if not config.get("no_upload"):
|
|
259
|
+
from getscript.upload import fetch_title, upload_transcript
|
|
260
|
+
|
|
261
|
+
title = getattr(args, "_title", None)
|
|
262
|
+
if not title:
|
|
263
|
+
title = fetch_title(source, source_id)
|
|
264
|
+
resp = upload_transcript(source, source_id, segments, title, config)
|
|
265
|
+
if resp and not quiet:
|
|
266
|
+
status = resp.get("status", "unknown")
|
|
267
|
+
if status == "already_indexed":
|
|
268
|
+
print(
|
|
269
|
+
"Transcript indexed at voxlytranscribes.com",
|
|
270
|
+
file=sys.stderr,
|
|
271
|
+
)
|
|
272
|
+
elif status == "queued":
|
|
273
|
+
print(
|
|
274
|
+
"Transcript submitted to voxlytranscribes.com (verifying)",
|
|
275
|
+
file=sys.stderr,
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
return 0
|
|
279
|
+
|
|
280
|
+
except ValueError as e:
|
|
281
|
+
progress.done()
|
|
282
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
283
|
+
return 2
|
|
284
|
+
except KeyboardInterrupt:
|
|
285
|
+
progress.done()
|
|
286
|
+
print("\nInterrupted.", file=sys.stderr)
|
|
287
|
+
return 1
|
|
288
|
+
except Exception as e:
|
|
289
|
+
progress.done()
|
|
290
|
+
if verbose:
|
|
291
|
+
import traceback
|
|
292
|
+
|
|
293
|
+
traceback.print_exc(file=sys.stderr)
|
|
294
|
+
else:
|
|
295
|
+
print(f"Error: {e}", file=sys.stderr)
|
|
296
|
+
return 1
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def main(argv: list[str] | None = None) -> int:
|
|
300
|
+
parser = build_parser()
|
|
301
|
+
args = parser.parse_args(argv)
|
|
302
|
+
|
|
303
|
+
# Shell completions — print and exit
|
|
304
|
+
if args.completions:
|
|
305
|
+
print(generate_completions(args.completions))
|
|
306
|
+
return 0
|
|
307
|
+
|
|
308
|
+
# No input provided and no search
|
|
309
|
+
if not args.input and not args.search:
|
|
310
|
+
parser.print_help(sys.stderr)
|
|
311
|
+
return 2
|
|
312
|
+
|
|
313
|
+
# Mutual exclusivity: --search and positional input
|
|
314
|
+
if args.search and args.input:
|
|
315
|
+
print("Error: --search and positional input are mutually exclusive.",
|
|
316
|
+
file=sys.stderr)
|
|
317
|
+
return 2
|
|
318
|
+
|
|
319
|
+
# Load and merge config
|
|
320
|
+
file_config = load_config()
|
|
321
|
+
cli_flags = {
|
|
322
|
+
"json": args.json,
|
|
323
|
+
"ttml": args.ttml,
|
|
324
|
+
"timestamps": args.timestamps,
|
|
325
|
+
"markdown": args.markdown,
|
|
326
|
+
"no_color": args.no_color,
|
|
327
|
+
"quiet": args.quiet,
|
|
328
|
+
"verbose": args.verbose,
|
|
329
|
+
"proxy": args.proxy,
|
|
330
|
+
"cookie_file": args.cookies,
|
|
331
|
+
"no_upload": args.no_upload,
|
|
332
|
+
}
|
|
333
|
+
config = merge_config(file_config, cli_flags)
|
|
334
|
+
|
|
335
|
+
# Search mode
|
|
336
|
+
if args.search:
|
|
337
|
+
return _handle_search(args, config)
|
|
338
|
+
|
|
339
|
+
# Direct fetch mode
|
|
340
|
+
return _fetch_transcript(args, config)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
if __name__ == "__main__":
|
|
344
|
+
sys.exit(main())
|
getscript/completions.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Shell completion script generation."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def generate(shell: str) -> str:
|
|
5
|
+
"""Generate completion script for the given shell."""
|
|
6
|
+
generators = {
|
|
7
|
+
"bash": _bash,
|
|
8
|
+
"zsh": _zsh,
|
|
9
|
+
"fish": _fish,
|
|
10
|
+
}
|
|
11
|
+
gen = generators.get(shell)
|
|
12
|
+
if gen is None:
|
|
13
|
+
raise ValueError(f"Unsupported shell: {shell}. Choose from: bash, zsh, fish")
|
|
14
|
+
return gen()
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _bash() -> str:
|
|
18
|
+
return """\
|
|
19
|
+
_getscript() {
|
|
20
|
+
local cur opts
|
|
21
|
+
COMPREPLY=()
|
|
22
|
+
cur="${COMP_WORDS[COMP_CWORD]}"
|
|
23
|
+
opts="--search --apple --limit --list --json --ttml --timestamps --markdown --proxy --cookies --no-color --quiet --verbose -o --output --completions -h --help --version"
|
|
24
|
+
|
|
25
|
+
if [[ ${cur} == -* ]]; then
|
|
26
|
+
COMPREPLY=( $(compgen -W "${opts}" -- ${cur}) )
|
|
27
|
+
return 0
|
|
28
|
+
fi
|
|
29
|
+
}
|
|
30
|
+
complete -F _getscript getscript"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _zsh() -> str:
|
|
34
|
+
return """\
|
|
35
|
+
#compdef getscript
|
|
36
|
+
|
|
37
|
+
_getscript() {
|
|
38
|
+
_arguments \\
|
|
39
|
+
'1:url or id:' \\
|
|
40
|
+
'--search[Search for content by topic or creator]:query:' \\
|
|
41
|
+
'--apple[Search Apple Podcasts instead of YouTube]' \\
|
|
42
|
+
'--limit[Number of search results]:count:' \\
|
|
43
|
+
'--list[Print search results without interactive selection]' \\
|
|
44
|
+
'--json[Output as JSON]' \\
|
|
45
|
+
'--ttml[Output raw TTML XML (Apple only)]' \\
|
|
46
|
+
'--timestamps[Include timestamps]' \\
|
|
47
|
+
'--markdown[Output as Markdown]' \\
|
|
48
|
+
'--proxy[Proxy URL for YouTube requests]:url:' \\
|
|
49
|
+
'--cookies[Netscape cookie file for YouTube auth]:cookie file:_files' \\
|
|
50
|
+
'--no-color[Disable colors]' \\
|
|
51
|
+
'--quiet[Suppress progress output]' \\
|
|
52
|
+
'--verbose[Show detailed errors]' \\
|
|
53
|
+
{-o,--output}'[Write to file]:output file:_files' \\
|
|
54
|
+
'--completions[Generate shell completions]:shell:(bash zsh fish)' \\
|
|
55
|
+
{-h,--help}'[Show help]' \\
|
|
56
|
+
'--version[Show version]'
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
_getscript "$@"
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _fish() -> str:
|
|
64
|
+
return """\
|
|
65
|
+
complete -c getscript -l search -d 'Search for content by topic or creator' -r
|
|
66
|
+
complete -c getscript -l apple -d 'Search Apple Podcasts instead of YouTube'
|
|
67
|
+
complete -c getscript -l limit -d 'Number of search results' -r
|
|
68
|
+
complete -c getscript -l list -d 'Print search results without interactive selection'
|
|
69
|
+
complete -c getscript -l json -d 'Output as JSON'
|
|
70
|
+
complete -c getscript -l ttml -d 'Output raw TTML XML (Apple only)'
|
|
71
|
+
complete -c getscript -l timestamps -d 'Include timestamps'
|
|
72
|
+
complete -c getscript -l markdown -d 'Output as Markdown'
|
|
73
|
+
complete -c getscript -l proxy -d 'Proxy URL for YouTube requests' -r
|
|
74
|
+
complete -c getscript -l cookies -d 'Netscape cookie file for YouTube auth' -r -F
|
|
75
|
+
complete -c getscript -l no-color -d 'Disable colors'
|
|
76
|
+
complete -c getscript -l quiet -d 'Suppress progress output'
|
|
77
|
+
complete -c getscript -l verbose -d 'Show detailed errors'
|
|
78
|
+
complete -c getscript -s o -l output -d 'Write to file' -r -F
|
|
79
|
+
complete -c getscript -l completions -d 'Generate shell completions' -r -fa 'bash zsh fish'
|
|
80
|
+
complete -c getscript -s h -l help -d 'Show help'
|
|
81
|
+
complete -c getscript -l version -d 'Show version'"""
|