getscript 0.12.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Voxly
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: getscript
3
+ Version: 0.12.0
4
+ Summary: Fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts
5
+ Author: Voxly
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/outerbanks73/cli-tools
8
+ Project-URL: Documentation, https://voxlytranscribes.com/docs/getscript
9
+ Project-URL: Repository, https://github.com/outerbanks73/cli-tools
10
+ Project-URL: Issues, https://github.com/outerbanks73/cli-tools/issues
11
+ Keywords: transcript,youtube,podcast,apple-podcasts,cli
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Environment :: Console
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Multimedia :: Sound/Audio
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: youtube-transcript-api>=1.0.0
27
+ Requires-Dist: requests>=2.28.0
28
+ Requires-Dist: defusedxml>=0.7.1
29
+ Dynamic: license-file
30
+
31
+ # getscript
32
+
33
+ A fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts.
34
+
35
+ ## Install
36
+
37
+ ```bash
38
+ pip install .
39
+ ```
40
+
41
+ Requires Python 3.10+.
42
+
43
+ **Apple Podcasts** transcripts additionally require macOS 15.5+ with Xcode CLI tools.
44
+
45
+ ## Usage
46
+
47
+ ```bash
48
+ # Fetch from URL
49
+ getscript "https://youtube.com/watch?v=VIDEO_ID"
50
+ getscript "https://podcasts.apple.com/...?i=EPISODE_ID"
51
+
52
+ # Fetch from bare ID
53
+ getscript dQw4w9WgXcQ # YouTube (11-char ID)
54
+ getscript 1000753754819 # Apple (numeric ID)
55
+
56
+ # Output formats
57
+ getscript VIDEO_ID --json | jq .
58
+ getscript VIDEO_ID --markdown > notes.md
59
+ getscript VIDEO_ID --timestamps
60
+ getscript EPISODE_ID --ttml # raw TTML XML (Apple only)
61
+
62
+ # Write to file
63
+ getscript VIDEO_ID -o transcript.txt
64
+
65
+ # Search & pick interactively (requires fzf)
66
+ getscript --search "topic keywords"
67
+ getscript --search "topic" --apple
68
+ getscript --search "topic" --list # print results, no fzf
69
+ getscript --search "topic" --limit 20
70
+
71
+ # YouTube auth options
72
+ getscript VIDEO_ID --proxy socks5://127.0.0.1:1080
73
+ getscript VIDEO_ID --cookies ~/cookies.txt
74
+
75
+ # Transcripts are automatically indexed at voxlytranscribes.com
76
+ # To disable:
77
+ getscript VIDEO_ID --no-upload
78
+ GETSCRIPT_UPLOAD=0 getscript VIDEO_ID
79
+
80
+ # Shell completions
81
+ getscript --completions bash >> ~/.bashrc
82
+ getscript --completions zsh >> ~/.zshrc
83
+ getscript --completions fish > ~/.config/fish/completions/getscript.fish
84
+ ```
85
+
86
+ ## Configuration
87
+
88
+ Config file: `~/.config/getscript/config.json`
89
+
90
+ ```json
91
+ {
92
+ "youtube_api_key": "YOUR_KEY",
93
+ "output_format": "text",
94
+ "timestamps": false,
95
+ "search_limit": 10,
96
+ "no_upload": false
97
+ }
98
+ ```
99
+
100
+ Environment variables:
101
+ - `GETSCRIPT_YOUTUBE_API_KEY` — YouTube Data API v3 key (required for `--search`)
102
+ - `GETSCRIPT_PROXY` — proxy URL for YouTube requests
103
+ - `GETSCRIPT_COOKIE_FILE` — Netscape cookie file for YouTube auth
104
+ - `GETSCRIPT_UPLOAD` — set to `0` to disable automatic shared library indexing
105
+ - `GETSCRIPT_SUPABASE_URL` — custom Supabase URL (for development)
106
+ - `GETSCRIPT_SUPABASE_ANON_KEY` — custom Supabase anon key (for development)
107
+ - `NO_COLOR` — disable colors
108
+
109
+ Priority: config file < environment variables < CLI flags.
110
+
111
+ ## How it works
112
+
113
+ **YouTube:** Wraps [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api) with proxy and cookie support.
114
+
115
+ **Apple Podcasts:** Compiles a small Obj-C helper that uses Apple's private AMSMescal framework (FairPlay) to obtain a bearer token, then fetches TTML transcripts from the AMP API. The token is cached for 30 days at `~/.cache/getscript/apple_token`.
116
+
117
+ ## Dependencies
118
+
119
+ - `youtube-transcript-api` — YouTube transcript fetching
120
+ - `requests` — HTTP sessions for cookie-based auth
121
+ - `fzf` (optional, system binary) — interactive search result selection
122
+
123
+ ## License
124
+
125
+ MIT
@@ -0,0 +1,95 @@
1
+ # getscript
2
+
3
+ A fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install .
9
+ ```
10
+
11
+ Requires Python 3.10+.
12
+
13
+ **Apple Podcasts** transcripts additionally require macOS 15.5+ with Xcode CLI tools.
14
+
15
+ ## Usage
16
+
17
+ ```bash
18
+ # Fetch from URL
19
+ getscript "https://youtube.com/watch?v=VIDEO_ID"
20
+ getscript "https://podcasts.apple.com/...?i=EPISODE_ID"
21
+
22
+ # Fetch from bare ID
23
+ getscript dQw4w9WgXcQ # YouTube (11-char ID)
24
+ getscript 1000753754819 # Apple (numeric ID)
25
+
26
+ # Output formats
27
+ getscript VIDEO_ID --json | jq .
28
+ getscript VIDEO_ID --markdown > notes.md
29
+ getscript VIDEO_ID --timestamps
30
+ getscript EPISODE_ID --ttml # raw TTML XML (Apple only)
31
+
32
+ # Write to file
33
+ getscript VIDEO_ID -o transcript.txt
34
+
35
+ # Search & pick interactively (requires fzf)
36
+ getscript --search "topic keywords"
37
+ getscript --search "topic" --apple
38
+ getscript --search "topic" --list # print results, no fzf
39
+ getscript --search "topic" --limit 20
40
+
41
+ # YouTube auth options
42
+ getscript VIDEO_ID --proxy socks5://127.0.0.1:1080
43
+ getscript VIDEO_ID --cookies ~/cookies.txt
44
+
45
+ # Transcripts are automatically indexed at voxlytranscribes.com
46
+ # To disable:
47
+ getscript VIDEO_ID --no-upload
48
+ GETSCRIPT_UPLOAD=0 getscript VIDEO_ID
49
+
50
+ # Shell completions
51
+ getscript --completions bash >> ~/.bashrc
52
+ getscript --completions zsh >> ~/.zshrc
53
+ getscript --completions fish > ~/.config/fish/completions/getscript.fish
54
+ ```
55
+
56
+ ## Configuration
57
+
58
+ Config file: `~/.config/getscript/config.json`
59
+
60
+ ```json
61
+ {
62
+ "youtube_api_key": "YOUR_KEY",
63
+ "output_format": "text",
64
+ "timestamps": false,
65
+ "search_limit": 10,
66
+ "no_upload": false
67
+ }
68
+ ```
69
+
70
+ Environment variables:
71
+ - `GETSCRIPT_YOUTUBE_API_KEY` — YouTube Data API v3 key (required for `--search`)
72
+ - `GETSCRIPT_PROXY` — proxy URL for YouTube requests
73
+ - `GETSCRIPT_COOKIE_FILE` — Netscape cookie file for YouTube auth
74
+ - `GETSCRIPT_UPLOAD` — set to `0` to disable automatic shared library indexing
75
+ - `GETSCRIPT_SUPABASE_URL` — custom Supabase URL (for development)
76
+ - `GETSCRIPT_SUPABASE_ANON_KEY` — custom Supabase anon key (for development)
77
+ - `NO_COLOR` — disable colors
78
+
79
+ Priority: config file < environment variables < CLI flags.
80
+
81
+ ## How it works
82
+
83
+ **YouTube:** Wraps [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api) with proxy and cookie support.
84
+
85
+ **Apple Podcasts:** Compiles a small Obj-C helper that uses Apple's private AMSMescal framework (FairPlay) to obtain a bearer token, then fetches TTML transcripts from the AMP API. The token is cached for 30 days at `~/.cache/getscript/apple_token`.
86
+
87
+ ## Dependencies
88
+
89
+ - `youtube-transcript-api` — YouTube transcript fetching
90
+ - `requests` — HTTP sessions for cookie-based auth
91
+ - `fzf` (optional, system binary) — interactive search result selection
92
+
93
+ ## License
94
+
95
+ MIT
@@ -0,0 +1,6 @@
1
+ from importlib.metadata import version, PackageNotFoundError
2
+
3
+ try:
4
+ __version__ = version("getscript")
5
+ except PackageNotFoundError:
6
+ __version__ = "0.12.0" # fallback for editable installs without metadata
@@ -0,0 +1,255 @@
1
+ """Apple Podcasts transcript fetching."""
2
+
3
+ import json
4
+ import os
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import tempfile
9
+ import defusedxml.ElementTree as ET
10
+ from datetime import datetime
11
+ from urllib.error import HTTPError
12
+ from urllib.request import Request, urlopen
13
+
14
+ CACHE_VALIDITY = 60 * 60 * 24 * 30 # 30 days
15
+
16
+ # Obj-C source for bearer token via AMSMescal (FairPlay signing).
17
+ # Compiled and run as a subprocess to isolate potential segfaults
18
+ # from the thenWithBlock: cleanup in Apple's promise implementation.
19
+ OBJC_TOKEN_SOURCE = r'''
20
+ #import <Foundation/Foundation.h>
21
+ #import <objc/runtime.h>
22
+ #import <objc/message.h>
23
+ #import <dlfcn.h>
24
+
25
+ // Cast objc_msgSend to typed function pointers to avoid selector validation
26
+ typedef id (*msg_id)(id, SEL, ...);
27
+ typedef id (*msg_id_id)(id, SEL, id, ...);
28
+ typedef id (*msg_id_id_id)(id, SEL, id, id, ...);
29
+ typedef void (*msg_void_id_str)(id, SEL, id, NSString *);
30
+
31
+ int main() {
32
+ @autoreleasepool {
33
+ dlopen("/System/Library/PrivateFrameworks/PodcastsFoundation.framework/PodcastsFoundation", RTLD_LAZY);
34
+
35
+ Class AMSMescal = objc_getClass("AMSMescal");
36
+ Class AMSMescalSession = objc_getClass("AMSMescalSession");
37
+ Class AMSURLRequestClass = objc_getClass("AMSURLRequest");
38
+ Class IMURLBag = objc_getClass("IMURLBag");
39
+
40
+ if (!AMSMescal || !AMSMescalSession || !AMSURLRequestClass || !IMURLBag) {
41
+ fprintf(stderr, "Failed to load required Apple private frameworks. macOS 15.5+ required.\n");
42
+ return 1;
43
+ }
44
+
45
+ NSString *storeFront = @"143441-1,42 t:podcasts1";
46
+ NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
47
+ [formatter setDateFormat:@"yyyy-MM-dd'T'HH:mm:ss'Z'"];
48
+ [formatter setTimeZone:[NSTimeZone timeZoneWithAbbreviation:@"UTC"]];
49
+ NSString *timestamp = [formatter stringFromDate:[NSDate date]];
50
+
51
+ NSURL *tokenURL = [NSURL URLWithString:@"https://sf-api-token-service.itunes.apple.com/apiToken?clientClass=apple&clientId=com.apple.podcasts.macos&os=OS%20X&osVersion=15.5&productVersion=1.1.0&version=2"];
52
+ NSMutableURLRequest *nsRequest = [NSMutableURLRequest requestWithURL:tokenURL];
53
+
54
+ id urlRequest = ((msg_id_id)objc_msgSend)(
55
+ [AMSURLRequestClass alloc],
56
+ sel_registerName("initWithRequest:"),
57
+ nsRequest
58
+ );
59
+ ((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), timestamp, @"x-request-timestamp");
60
+ ((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), storeFront, @"X-Apple-Store-Front");
61
+
62
+ NSDictionary *policy = @{
63
+ @"fields": @[@"clientId"],
64
+ @"headers": @[@"x-apple-store-front", @"x-apple-client-application", @"x-request-timestamp"]
65
+ };
66
+ id signature = ((msg_id_id_id)objc_msgSend)(
67
+ (id)AMSMescal,
68
+ sel_registerName("_signedActionDataFromRequest:policy:"),
69
+ urlRequest, policy
70
+ );
71
+
72
+ id session = ((msg_id)objc_msgSend)((id)AMSMescalSession, sel_registerName("defaultSession"));
73
+ id urlBag = ((msg_id)objc_msgSend)([IMURLBag alloc], sel_registerName("init"));
74
+
75
+ dispatch_semaphore_t sema = dispatch_semaphore_create(0);
76
+
77
+ id signedPromise = ((msg_id_id_id)objc_msgSend)(session, sel_registerName("signData:bag:"), signature, urlBag);
78
+
79
+ ((msg_id_id)objc_msgSend)(signedPromise, sel_registerName("thenWithBlock:"), ^(id result) {
80
+ NSString *sig = [(NSData *)result base64EncodedStringWithOptions:0];
81
+
82
+ NSMutableURLRequest *signedRequest = [NSMutableURLRequest requestWithURL:tokenURL];
83
+ [signedRequest setValue:timestamp forHTTPHeaderField:@"x-request-timestamp"];
84
+ [signedRequest setValue:storeFront forHTTPHeaderField:@"X-Apple-Store-Front"];
85
+ [signedRequest setValue:sig forHTTPHeaderField:@"X-Apple-ActionSignature"];
86
+
87
+ NSURLSessionDataTask *task = [[NSURLSession sharedSession] dataTaskWithRequest:signedRequest completionHandler:^(NSData *data, NSURLResponse *response, NSError *error) {
88
+ NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil];
89
+ printf("%s", [json[@"token"] UTF8String]);
90
+ dispatch_semaphore_signal(sema);
91
+ }];
92
+ [task resume];
93
+ dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
94
+ });
95
+
96
+ dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
97
+ }
98
+ return 0;
99
+ }
100
+ '''
101
+
102
+
103
+ def _get_cache_path(cache_dir: str) -> str:
104
+ return os.path.join(cache_dir, "apple_token")
105
+
106
+
107
+ def get_bearer_token(cache_dir: str) -> str:
108
+ """Get bearer token, using cached version if valid."""
109
+ cache_path = _get_cache_path(cache_dir)
110
+
111
+ if os.path.exists(cache_path):
112
+ age = datetime.now().timestamp() - os.path.getmtime(cache_path)
113
+ if age < CACHE_VALIDITY:
114
+ with open(cache_path) as f:
115
+ token = f.read().strip()
116
+ if token.startswith("ey"):
117
+ return token
118
+
119
+ token = _compile_and_fetch_token()
120
+ if token:
121
+ os.makedirs(os.path.dirname(cache_path), exist_ok=True)
122
+ with open(cache_path, "w") as f:
123
+ f.write(token)
124
+ os.chmod(cache_path, 0o600)
125
+ return token
126
+
127
+
128
+ def _compile_and_fetch_token() -> str | None:
129
+ """Compile Obj-C helper, run it, return bearer token."""
130
+ if sys.platform != "darwin":
131
+ print(
132
+ "Apple Podcasts transcripts require macOS with Xcode CLI tools.",
133
+ file=sys.stderr,
134
+ )
135
+ return None
136
+
137
+ tmpdir = tempfile.mkdtemp(prefix="getscript-")
138
+ src_path = os.path.join(tmpdir, "token.m")
139
+ bin_path = os.path.join(tmpdir, "token")
140
+
141
+ try:
142
+ with open(src_path, "w") as src:
143
+ src.write(OBJC_TOKEN_SOURCE)
144
+
145
+ comp = subprocess.run(
146
+ [
147
+ "clang",
148
+ "-o",
149
+ bin_path,
150
+ src_path,
151
+ "-Wno-objc-method-access",
152
+ "-framework",
153
+ "Foundation",
154
+ "-F/System/Library/PrivateFrameworks",
155
+ "-framework",
156
+ "AppleMediaServices",
157
+ "-fobjc-arc",
158
+ ],
159
+ capture_output=True,
160
+ text=True,
161
+ )
162
+ if comp.returncode != 0:
163
+ print(f"Compilation failed: {comp.stderr}", file=sys.stderr)
164
+ return None
165
+
166
+ result = subprocess.run([bin_path], capture_output=True, text=True, timeout=30)
167
+
168
+ token = result.stdout.strip()
169
+ if not token.startswith("ey"):
170
+ print(f"Invalid token. stderr: {result.stderr}", file=sys.stderr)
171
+ return None
172
+
173
+ return token
174
+ except subprocess.TimeoutExpired:
175
+ print("Token fetch timed out", file=sys.stderr)
176
+ return None
177
+ finally:
178
+ shutil.rmtree(tmpdir, ignore_errors=True)
179
+
180
+
181
+ def fetch_ttml(episode_id: str, bearer_token: str) -> str:
182
+ """Fetch TTML transcript from Apple's AMP API."""
183
+ url = (
184
+ f"https://amp-api.podcasts.apple.com/v1/catalog/us/podcast-episodes/"
185
+ f"{episode_id}/transcripts?fields=ttmlToken,ttmlAssetUrls"
186
+ f"&include%5Bpodcast-episodes%5D=podcast&l=en-US&with=entitlements"
187
+ )
188
+
189
+ req = Request(url)
190
+ req.add_header("Authorization", f"Bearer {bearer_token}")
191
+
192
+ try:
193
+ with urlopen(req, timeout=15) as resp:
194
+ data = json.loads(resp.read())
195
+ except HTTPError as e:
196
+ body = e.read().decode() if e.fp else ""
197
+ raise Exception(f"AMP API returned {e.code}: {body}")
198
+
199
+ if "errors" in data:
200
+ raise Exception(f"API error: {data['errors']}")
201
+
202
+ attrs = data["data"][0]["attributes"]
203
+ ttml_url = attrs["ttmlAssetUrls"]["ttml"]
204
+
205
+ with urlopen(ttml_url, timeout=15) as resp:
206
+ return resp.read().decode("utf-8")
207
+
208
+
209
+ def ttml_to_segments(ttml_content: str) -> list[dict]:
210
+ """Parse TTML XML into segment dicts with timestamps."""
211
+ root = ET.fromstring(ttml_content)
212
+ segments = []
213
+
214
+ for elem in root.iter():
215
+ tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
216
+
217
+ if tag == "p":
218
+ words = []
219
+ # Use itertext() to get all text content without duplication
220
+ for text_chunk in elem.itertext():
221
+ stripped = text_chunk.strip()
222
+ if stripped:
223
+ words.append(stripped)
224
+ if words:
225
+ text = " ".join(words)
226
+ begin = elem.get("begin", "")
227
+ end = elem.get("end", "")
228
+ segment = {"text": text}
229
+ if begin:
230
+ segment["start"] = _parse_ttml_time(begin)
231
+ if end:
232
+ segment["end"] = _parse_ttml_time(end)
233
+ if "start" in segment:
234
+ segment["duration"] = segment["end"] - segment["start"]
235
+ segments.append(segment)
236
+
237
+ return segments
238
+
239
+
240
+ def ttml_to_text(ttml_content: str) -> str:
241
+ """Extract plain text from Apple Podcasts TTML XML."""
242
+ segments = ttml_to_segments(ttml_content)
243
+ return " ".join(s["text"] for s in segments)
244
+
245
+
246
+ def _parse_ttml_time(time_str: str) -> float:
247
+ """Parse TTML time format (HH:MM:SS.mmm) to seconds."""
248
+ parts = time_str.split(":")
249
+ if len(parts) == 3:
250
+ h, m, s = parts
251
+ return int(h) * 3600 + int(m) * 60 + float(s)
252
+ if len(parts) == 2:
253
+ m, s = parts
254
+ return int(m) * 60 + float(s)
255
+ return float(time_str)