getscript 0.12.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- getscript-0.12.0/LICENSE +21 -0
- getscript-0.12.0/PKG-INFO +125 -0
- getscript-0.12.0/README.md +95 -0
- getscript-0.12.0/getscript/__init__.py +6 -0
- getscript-0.12.0/getscript/apple.py +255 -0
- getscript-0.12.0/getscript/cli.py +344 -0
- getscript-0.12.0/getscript/completions.py +81 -0
- getscript-0.12.0/getscript/config.py +64 -0
- getscript-0.12.0/getscript/detect.py +56 -0
- getscript-0.12.0/getscript/output.py +97 -0
- getscript-0.12.0/getscript/picker.py +69 -0
- getscript-0.12.0/getscript/progress.py +34 -0
- getscript-0.12.0/getscript/search.py +83 -0
- getscript-0.12.0/getscript/upload.py +131 -0
- getscript-0.12.0/getscript/youtube.py +58 -0
- getscript-0.12.0/getscript.egg-info/PKG-INFO +125 -0
- getscript-0.12.0/getscript.egg-info/SOURCES.txt +30 -0
- getscript-0.12.0/getscript.egg-info/dependency_links.txt +1 -0
- getscript-0.12.0/getscript.egg-info/entry_points.txt +2 -0
- getscript-0.12.0/getscript.egg-info/requires.txt +3 -0
- getscript-0.12.0/getscript.egg-info/top_level.txt +1 -0
- getscript-0.12.0/pyproject.toml +46 -0
- getscript-0.12.0/setup.cfg +4 -0
- getscript-0.12.0/tests/test_apple.py +47 -0
- getscript-0.12.0/tests/test_config.py +99 -0
- getscript-0.12.0/tests/test_detect.py +67 -0
- getscript-0.12.0/tests/test_integration.py +109 -0
- getscript-0.12.0/tests/test_output.py +99 -0
- getscript-0.12.0/tests/test_picker.py +85 -0
- getscript-0.12.0/tests/test_search.py +125 -0
- getscript-0.12.0/tests/test_upload.py +213 -0
- getscript-0.12.0/tests/test_youtube.py +109 -0
getscript-0.12.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Voxly
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: getscript
|
|
3
|
+
Version: 0.12.0
|
|
4
|
+
Summary: Fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts
|
|
5
|
+
Author: Voxly
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/outerbanks73/cli-tools
|
|
8
|
+
Project-URL: Documentation, https://voxlytranscribes.com/docs/getscript
|
|
9
|
+
Project-URL: Repository, https://github.com/outerbanks73/cli-tools
|
|
10
|
+
Project-URL: Issues, https://github.com/outerbanks73/cli-tools/issues
|
|
11
|
+
Keywords: transcript,youtube,podcast,apple-podcasts,cli
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Multimedia :: Sound/Audio
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Description-Content-Type: text/markdown
|
|
25
|
+
License-File: LICENSE
|
|
26
|
+
Requires-Dist: youtube-transcript-api>=1.0.0
|
|
27
|
+
Requires-Dist: requests>=2.28.0
|
|
28
|
+
Requires-Dist: defusedxml>=0.7.1
|
|
29
|
+
Dynamic: license-file
|
|
30
|
+
|
|
31
|
+
# getscript
|
|
32
|
+
|
|
33
|
+
A fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts.
|
|
34
|
+
|
|
35
|
+
## Install
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install .
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Requires Python 3.10+.
|
|
42
|
+
|
|
43
|
+
**Apple Podcasts** transcripts additionally require macOS 15.5+ with Xcode CLI tools.
|
|
44
|
+
|
|
45
|
+
## Usage
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
# Fetch from URL
|
|
49
|
+
getscript "https://youtube.com/watch?v=VIDEO_ID"
|
|
50
|
+
getscript "https://podcasts.apple.com/...?i=EPISODE_ID"
|
|
51
|
+
|
|
52
|
+
# Fetch from bare ID
|
|
53
|
+
getscript dQw4w9WgXcQ # YouTube (11-char ID)
|
|
54
|
+
getscript 1000753754819 # Apple (numeric ID)
|
|
55
|
+
|
|
56
|
+
# Output formats
|
|
57
|
+
getscript VIDEO_ID --json | jq .
|
|
58
|
+
getscript VIDEO_ID --markdown > notes.md
|
|
59
|
+
getscript VIDEO_ID --timestamps
|
|
60
|
+
getscript EPISODE_ID --ttml # raw TTML XML (Apple only)
|
|
61
|
+
|
|
62
|
+
# Write to file
|
|
63
|
+
getscript VIDEO_ID -o transcript.txt
|
|
64
|
+
|
|
65
|
+
# Search & pick interactively (requires fzf)
|
|
66
|
+
getscript --search "topic keywords"
|
|
67
|
+
getscript --search "topic" --apple
|
|
68
|
+
getscript --search "topic" --list # print results, no fzf
|
|
69
|
+
getscript --search "topic" --limit 20
|
|
70
|
+
|
|
71
|
+
# YouTube auth options
|
|
72
|
+
getscript VIDEO_ID --proxy socks5://127.0.0.1:1080
|
|
73
|
+
getscript VIDEO_ID --cookies ~/cookies.txt
|
|
74
|
+
|
|
75
|
+
# Transcripts are automatically indexed at voxlytranscribes.com
|
|
76
|
+
# To disable:
|
|
77
|
+
getscript VIDEO_ID --no-upload
|
|
78
|
+
GETSCRIPT_UPLOAD=0 getscript VIDEO_ID
|
|
79
|
+
|
|
80
|
+
# Shell completions
|
|
81
|
+
getscript --completions bash >> ~/.bashrc
|
|
82
|
+
getscript --completions zsh >> ~/.zshrc
|
|
83
|
+
getscript --completions fish > ~/.config/fish/completions/getscript.fish
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Configuration
|
|
87
|
+
|
|
88
|
+
Config file: `~/.config/getscript/config.json`
|
|
89
|
+
|
|
90
|
+
```json
|
|
91
|
+
{
|
|
92
|
+
"youtube_api_key": "YOUR_KEY",
|
|
93
|
+
"output_format": "text",
|
|
94
|
+
"timestamps": false,
|
|
95
|
+
"search_limit": 10,
|
|
96
|
+
"no_upload": false
|
|
97
|
+
}
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Environment variables:
|
|
101
|
+
- `GETSCRIPT_YOUTUBE_API_KEY` — YouTube Data API v3 key (required for `--search`)
|
|
102
|
+
- `GETSCRIPT_PROXY` — proxy URL for YouTube requests
|
|
103
|
+
- `GETSCRIPT_COOKIE_FILE` — Netscape cookie file for YouTube auth
|
|
104
|
+
- `GETSCRIPT_UPLOAD` — set to `0` to disable automatic shared library indexing
|
|
105
|
+
- `GETSCRIPT_SUPABASE_URL` — custom Supabase URL (for development)
|
|
106
|
+
- `GETSCRIPT_SUPABASE_ANON_KEY` — custom Supabase anon key (for development)
|
|
107
|
+
- `NO_COLOR` — disable colors
|
|
108
|
+
|
|
109
|
+
Priority: config file < environment variables < CLI flags.
|
|
110
|
+
|
|
111
|
+
## How it works
|
|
112
|
+
|
|
113
|
+
**YouTube:** Wraps [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api) with proxy and cookie support.
|
|
114
|
+
|
|
115
|
+
**Apple Podcasts:** Compiles a small Obj-C helper that uses Apple's private AMSMescal framework (FairPlay) to obtain a bearer token, then fetches TTML transcripts from the AMP API. The token is cached for 30 days at `~/.cache/getscript/apple_token`.
|
|
116
|
+
|
|
117
|
+
## Dependencies
|
|
118
|
+
|
|
119
|
+
- `youtube-transcript-api` — YouTube transcript fetching
|
|
120
|
+
- `requests` — HTTP sessions for cookie-based auth
|
|
121
|
+
- `fzf` (optional, system binary) — interactive search result selection
|
|
122
|
+
|
|
123
|
+
## License
|
|
124
|
+
|
|
125
|
+
MIT
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# getscript
|
|
2
|
+
|
|
3
|
+
A fast, Unix-friendly CLI for fetching transcripts from YouTube and Apple Podcasts.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install .
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Requires Python 3.10+.
|
|
12
|
+
|
|
13
|
+
**Apple Podcasts** transcripts additionally require macOS 15.5+ with Xcode CLI tools.
|
|
14
|
+
|
|
15
|
+
## Usage
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Fetch from URL
|
|
19
|
+
getscript "https://youtube.com/watch?v=VIDEO_ID"
|
|
20
|
+
getscript "https://podcasts.apple.com/...?i=EPISODE_ID"
|
|
21
|
+
|
|
22
|
+
# Fetch from bare ID
|
|
23
|
+
getscript dQw4w9WgXcQ # YouTube (11-char ID)
|
|
24
|
+
getscript 1000753754819 # Apple (numeric ID)
|
|
25
|
+
|
|
26
|
+
# Output formats
|
|
27
|
+
getscript VIDEO_ID --json | jq .
|
|
28
|
+
getscript VIDEO_ID --markdown > notes.md
|
|
29
|
+
getscript VIDEO_ID --timestamps
|
|
30
|
+
getscript EPISODE_ID --ttml # raw TTML XML (Apple only)
|
|
31
|
+
|
|
32
|
+
# Write to file
|
|
33
|
+
getscript VIDEO_ID -o transcript.txt
|
|
34
|
+
|
|
35
|
+
# Search & pick interactively (requires fzf)
|
|
36
|
+
getscript --search "topic keywords"
|
|
37
|
+
getscript --search "topic" --apple
|
|
38
|
+
getscript --search "topic" --list # print results, no fzf
|
|
39
|
+
getscript --search "topic" --limit 20
|
|
40
|
+
|
|
41
|
+
# YouTube auth options
|
|
42
|
+
getscript VIDEO_ID --proxy socks5://127.0.0.1:1080
|
|
43
|
+
getscript VIDEO_ID --cookies ~/cookies.txt
|
|
44
|
+
|
|
45
|
+
# Transcripts are automatically indexed at voxlytranscribes.com
|
|
46
|
+
# To disable:
|
|
47
|
+
getscript VIDEO_ID --no-upload
|
|
48
|
+
GETSCRIPT_UPLOAD=0 getscript VIDEO_ID
|
|
49
|
+
|
|
50
|
+
# Shell completions
|
|
51
|
+
getscript --completions bash >> ~/.bashrc
|
|
52
|
+
getscript --completions zsh >> ~/.zshrc
|
|
53
|
+
getscript --completions fish > ~/.config/fish/completions/getscript.fish
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Configuration
|
|
57
|
+
|
|
58
|
+
Config file: `~/.config/getscript/config.json`
|
|
59
|
+
|
|
60
|
+
```json
|
|
61
|
+
{
|
|
62
|
+
"youtube_api_key": "YOUR_KEY",
|
|
63
|
+
"output_format": "text",
|
|
64
|
+
"timestamps": false,
|
|
65
|
+
"search_limit": 10,
|
|
66
|
+
"no_upload": false
|
|
67
|
+
}
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Environment variables:
|
|
71
|
+
- `GETSCRIPT_YOUTUBE_API_KEY` — YouTube Data API v3 key (required for `--search`)
|
|
72
|
+
- `GETSCRIPT_PROXY` — proxy URL for YouTube requests
|
|
73
|
+
- `GETSCRIPT_COOKIE_FILE` — Netscape cookie file for YouTube auth
|
|
74
|
+
- `GETSCRIPT_UPLOAD` — set to `0` to disable automatic shared library indexing
|
|
75
|
+
- `GETSCRIPT_SUPABASE_URL` — custom Supabase URL (for development)
|
|
76
|
+
- `GETSCRIPT_SUPABASE_ANON_KEY` — custom Supabase anon key (for development)
|
|
77
|
+
- `NO_COLOR` — disable colors
|
|
78
|
+
|
|
79
|
+
Priority: config file < environment variables < CLI flags.
|
|
80
|
+
|
|
81
|
+
## How it works
|
|
82
|
+
|
|
83
|
+
**YouTube:** Wraps [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api) with proxy and cookie support.
|
|
84
|
+
|
|
85
|
+
**Apple Podcasts:** Compiles a small Obj-C helper that uses Apple's private AMSMescal framework (FairPlay) to obtain a bearer token, then fetches TTML transcripts from the AMP API. The token is cached for 30 days at `~/.cache/getscript/apple_token`.
|
|
86
|
+
|
|
87
|
+
## Dependencies
|
|
88
|
+
|
|
89
|
+
- `youtube-transcript-api` — YouTube transcript fetching
|
|
90
|
+
- `requests` — HTTP sessions for cookie-based auth
|
|
91
|
+
- `fzf` (optional, system binary) — interactive search result selection
|
|
92
|
+
|
|
93
|
+
## License
|
|
94
|
+
|
|
95
|
+
MIT
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Apple Podcasts transcript fetching."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import subprocess
|
|
7
|
+
import sys
|
|
8
|
+
import tempfile
|
|
9
|
+
import defusedxml.ElementTree as ET
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from urllib.error import HTTPError
|
|
12
|
+
from urllib.request import Request, urlopen
|
|
13
|
+
|
|
14
|
+
CACHE_VALIDITY = 60 * 60 * 24 * 30 # 30 days
|
|
15
|
+
|
|
16
|
+
# Obj-C source for bearer token via AMSMescal (FairPlay signing).
|
|
17
|
+
# Compiled and run as a subprocess to isolate potential segfaults
|
|
18
|
+
# from the thenWithBlock: cleanup in Apple's promise implementation.
|
|
19
|
+
OBJC_TOKEN_SOURCE = r'''
|
|
20
|
+
#import <Foundation/Foundation.h>
|
|
21
|
+
#import <objc/runtime.h>
|
|
22
|
+
#import <objc/message.h>
|
|
23
|
+
#import <dlfcn.h>
|
|
24
|
+
|
|
25
|
+
// Cast objc_msgSend to typed function pointers to avoid selector validation
|
|
26
|
+
typedef id (*msg_id)(id, SEL, ...);
|
|
27
|
+
typedef id (*msg_id_id)(id, SEL, id, ...);
|
|
28
|
+
typedef id (*msg_id_id_id)(id, SEL, id, id, ...);
|
|
29
|
+
typedef void (*msg_void_id_str)(id, SEL, id, NSString *);
|
|
30
|
+
|
|
31
|
+
int main() {
|
|
32
|
+
@autoreleasepool {
|
|
33
|
+
dlopen("/System/Library/PrivateFrameworks/PodcastsFoundation.framework/PodcastsFoundation", RTLD_LAZY);
|
|
34
|
+
|
|
35
|
+
Class AMSMescal = objc_getClass("AMSMescal");
|
|
36
|
+
Class AMSMescalSession = objc_getClass("AMSMescalSession");
|
|
37
|
+
Class AMSURLRequestClass = objc_getClass("AMSURLRequest");
|
|
38
|
+
Class IMURLBag = objc_getClass("IMURLBag");
|
|
39
|
+
|
|
40
|
+
if (!AMSMescal || !AMSMescalSession || !AMSURLRequestClass || !IMURLBag) {
|
|
41
|
+
fprintf(stderr, "Failed to load required Apple private frameworks. macOS 15.5+ required.\n");
|
|
42
|
+
return 1;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
NSString *storeFront = @"143441-1,42 t:podcasts1";
|
|
46
|
+
NSDateFormatter *formatter = [[NSDateFormatter alloc] init];
|
|
47
|
+
[formatter setDateFormat:@"yyyy-MM-dd'T'HH:mm:ss'Z'"];
|
|
48
|
+
[formatter setTimeZone:[NSTimeZone timeZoneWithAbbreviation:@"UTC"]];
|
|
49
|
+
NSString *timestamp = [formatter stringFromDate:[NSDate date]];
|
|
50
|
+
|
|
51
|
+
NSURL *tokenURL = [NSURL URLWithString:@"https://sf-api-token-service.itunes.apple.com/apiToken?clientClass=apple&clientId=com.apple.podcasts.macos&os=OS%20X&osVersion=15.5&productVersion=1.1.0&version=2"];
|
|
52
|
+
NSMutableURLRequest *nsRequest = [NSMutableURLRequest requestWithURL:tokenURL];
|
|
53
|
+
|
|
54
|
+
id urlRequest = ((msg_id_id)objc_msgSend)(
|
|
55
|
+
[AMSURLRequestClass alloc],
|
|
56
|
+
sel_registerName("initWithRequest:"),
|
|
57
|
+
nsRequest
|
|
58
|
+
);
|
|
59
|
+
((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), timestamp, @"x-request-timestamp");
|
|
60
|
+
((msg_void_id_str)objc_msgSend)(urlRequest, sel_registerName("setValue:forHTTPHeaderField:"), storeFront, @"X-Apple-Store-Front");
|
|
61
|
+
|
|
62
|
+
NSDictionary *policy = @{
|
|
63
|
+
@"fields": @[@"clientId"],
|
|
64
|
+
@"headers": @[@"x-apple-store-front", @"x-apple-client-application", @"x-request-timestamp"]
|
|
65
|
+
};
|
|
66
|
+
id signature = ((msg_id_id_id)objc_msgSend)(
|
|
67
|
+
(id)AMSMescal,
|
|
68
|
+
sel_registerName("_signedActionDataFromRequest:policy:"),
|
|
69
|
+
urlRequest, policy
|
|
70
|
+
);
|
|
71
|
+
|
|
72
|
+
id session = ((msg_id)objc_msgSend)((id)AMSMescalSession, sel_registerName("defaultSession"));
|
|
73
|
+
id urlBag = ((msg_id)objc_msgSend)([IMURLBag alloc], sel_registerName("init"));
|
|
74
|
+
|
|
75
|
+
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
|
|
76
|
+
|
|
77
|
+
id signedPromise = ((msg_id_id_id)objc_msgSend)(session, sel_registerName("signData:bag:"), signature, urlBag);
|
|
78
|
+
|
|
79
|
+
((msg_id_id)objc_msgSend)(signedPromise, sel_registerName("thenWithBlock:"), ^(id result) {
|
|
80
|
+
NSString *sig = [(NSData *)result base64EncodedStringWithOptions:0];
|
|
81
|
+
|
|
82
|
+
NSMutableURLRequest *signedRequest = [NSMutableURLRequest requestWithURL:tokenURL];
|
|
83
|
+
[signedRequest setValue:timestamp forHTTPHeaderField:@"x-request-timestamp"];
|
|
84
|
+
[signedRequest setValue:storeFront forHTTPHeaderField:@"X-Apple-Store-Front"];
|
|
85
|
+
[signedRequest setValue:sig forHTTPHeaderField:@"X-Apple-ActionSignature"];
|
|
86
|
+
|
|
87
|
+
NSURLSessionDataTask *task = [[NSURLSession sharedSession] dataTaskWithRequest:signedRequest completionHandler:^(NSData *data, NSURLResponse *response, NSError *error) {
|
|
88
|
+
NSDictionary *json = [NSJSONSerialization JSONObjectWithData:data options:0 error:nil];
|
|
89
|
+
printf("%s", [json[@"token"] UTF8String]);
|
|
90
|
+
dispatch_semaphore_signal(sema);
|
|
91
|
+
}];
|
|
92
|
+
[task resume];
|
|
93
|
+
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
dispatch_semaphore_wait(sema, DISPATCH_TIME_FOREVER);
|
|
97
|
+
}
|
|
98
|
+
return 0;
|
|
99
|
+
}
|
|
100
|
+
'''
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _get_cache_path(cache_dir: str) -> str:
|
|
104
|
+
return os.path.join(cache_dir, "apple_token")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_bearer_token(cache_dir: str) -> str:
|
|
108
|
+
"""Get bearer token, using cached version if valid."""
|
|
109
|
+
cache_path = _get_cache_path(cache_dir)
|
|
110
|
+
|
|
111
|
+
if os.path.exists(cache_path):
|
|
112
|
+
age = datetime.now().timestamp() - os.path.getmtime(cache_path)
|
|
113
|
+
if age < CACHE_VALIDITY:
|
|
114
|
+
with open(cache_path) as f:
|
|
115
|
+
token = f.read().strip()
|
|
116
|
+
if token.startswith("ey"):
|
|
117
|
+
return token
|
|
118
|
+
|
|
119
|
+
token = _compile_and_fetch_token()
|
|
120
|
+
if token:
|
|
121
|
+
os.makedirs(os.path.dirname(cache_path), exist_ok=True)
|
|
122
|
+
with open(cache_path, "w") as f:
|
|
123
|
+
f.write(token)
|
|
124
|
+
os.chmod(cache_path, 0o600)
|
|
125
|
+
return token
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _compile_and_fetch_token() -> str | None:
|
|
129
|
+
"""Compile Obj-C helper, run it, return bearer token."""
|
|
130
|
+
if sys.platform != "darwin":
|
|
131
|
+
print(
|
|
132
|
+
"Apple Podcasts transcripts require macOS with Xcode CLI tools.",
|
|
133
|
+
file=sys.stderr,
|
|
134
|
+
)
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
tmpdir = tempfile.mkdtemp(prefix="getscript-")
|
|
138
|
+
src_path = os.path.join(tmpdir, "token.m")
|
|
139
|
+
bin_path = os.path.join(tmpdir, "token")
|
|
140
|
+
|
|
141
|
+
try:
|
|
142
|
+
with open(src_path, "w") as src:
|
|
143
|
+
src.write(OBJC_TOKEN_SOURCE)
|
|
144
|
+
|
|
145
|
+
comp = subprocess.run(
|
|
146
|
+
[
|
|
147
|
+
"clang",
|
|
148
|
+
"-o",
|
|
149
|
+
bin_path,
|
|
150
|
+
src_path,
|
|
151
|
+
"-Wno-objc-method-access",
|
|
152
|
+
"-framework",
|
|
153
|
+
"Foundation",
|
|
154
|
+
"-F/System/Library/PrivateFrameworks",
|
|
155
|
+
"-framework",
|
|
156
|
+
"AppleMediaServices",
|
|
157
|
+
"-fobjc-arc",
|
|
158
|
+
],
|
|
159
|
+
capture_output=True,
|
|
160
|
+
text=True,
|
|
161
|
+
)
|
|
162
|
+
if comp.returncode != 0:
|
|
163
|
+
print(f"Compilation failed: {comp.stderr}", file=sys.stderr)
|
|
164
|
+
return None
|
|
165
|
+
|
|
166
|
+
result = subprocess.run([bin_path], capture_output=True, text=True, timeout=30)
|
|
167
|
+
|
|
168
|
+
token = result.stdout.strip()
|
|
169
|
+
if not token.startswith("ey"):
|
|
170
|
+
print(f"Invalid token. stderr: {result.stderr}", file=sys.stderr)
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
return token
|
|
174
|
+
except subprocess.TimeoutExpired:
|
|
175
|
+
print("Token fetch timed out", file=sys.stderr)
|
|
176
|
+
return None
|
|
177
|
+
finally:
|
|
178
|
+
shutil.rmtree(tmpdir, ignore_errors=True)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def fetch_ttml(episode_id: str, bearer_token: str) -> str:
|
|
182
|
+
"""Fetch TTML transcript from Apple's AMP API."""
|
|
183
|
+
url = (
|
|
184
|
+
f"https://amp-api.podcasts.apple.com/v1/catalog/us/podcast-episodes/"
|
|
185
|
+
f"{episode_id}/transcripts?fields=ttmlToken,ttmlAssetUrls"
|
|
186
|
+
f"&include%5Bpodcast-episodes%5D=podcast&l=en-US&with=entitlements"
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
req = Request(url)
|
|
190
|
+
req.add_header("Authorization", f"Bearer {bearer_token}")
|
|
191
|
+
|
|
192
|
+
try:
|
|
193
|
+
with urlopen(req, timeout=15) as resp:
|
|
194
|
+
data = json.loads(resp.read())
|
|
195
|
+
except HTTPError as e:
|
|
196
|
+
body = e.read().decode() if e.fp else ""
|
|
197
|
+
raise Exception(f"AMP API returned {e.code}: {body}")
|
|
198
|
+
|
|
199
|
+
if "errors" in data:
|
|
200
|
+
raise Exception(f"API error: {data['errors']}")
|
|
201
|
+
|
|
202
|
+
attrs = data["data"][0]["attributes"]
|
|
203
|
+
ttml_url = attrs["ttmlAssetUrls"]["ttml"]
|
|
204
|
+
|
|
205
|
+
with urlopen(ttml_url, timeout=15) as resp:
|
|
206
|
+
return resp.read().decode("utf-8")
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def ttml_to_segments(ttml_content: str) -> list[dict]:
|
|
210
|
+
"""Parse TTML XML into segment dicts with timestamps."""
|
|
211
|
+
root = ET.fromstring(ttml_content)
|
|
212
|
+
segments = []
|
|
213
|
+
|
|
214
|
+
for elem in root.iter():
|
|
215
|
+
tag = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
|
|
216
|
+
|
|
217
|
+
if tag == "p":
|
|
218
|
+
words = []
|
|
219
|
+
# Use itertext() to get all text content without duplication
|
|
220
|
+
for text_chunk in elem.itertext():
|
|
221
|
+
stripped = text_chunk.strip()
|
|
222
|
+
if stripped:
|
|
223
|
+
words.append(stripped)
|
|
224
|
+
if words:
|
|
225
|
+
text = " ".join(words)
|
|
226
|
+
begin = elem.get("begin", "")
|
|
227
|
+
end = elem.get("end", "")
|
|
228
|
+
segment = {"text": text}
|
|
229
|
+
if begin:
|
|
230
|
+
segment["start"] = _parse_ttml_time(begin)
|
|
231
|
+
if end:
|
|
232
|
+
segment["end"] = _parse_ttml_time(end)
|
|
233
|
+
if "start" in segment:
|
|
234
|
+
segment["duration"] = segment["end"] - segment["start"]
|
|
235
|
+
segments.append(segment)
|
|
236
|
+
|
|
237
|
+
return segments
|
|
238
|
+
|
|
239
|
+
|
|
240
|
+
def ttml_to_text(ttml_content: str) -> str:
|
|
241
|
+
"""Extract plain text from Apple Podcasts TTML XML."""
|
|
242
|
+
segments = ttml_to_segments(ttml_content)
|
|
243
|
+
return " ".join(s["text"] for s in segments)
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def _parse_ttml_time(time_str: str) -> float:
|
|
247
|
+
"""Parse TTML time format (HH:MM:SS.mmm) to seconds."""
|
|
248
|
+
parts = time_str.split(":")
|
|
249
|
+
if len(parts) == 3:
|
|
250
|
+
h, m, s = parts
|
|
251
|
+
return int(h) * 3600 + int(m) * 60 + float(s)
|
|
252
|
+
if len(parts) == 2:
|
|
253
|
+
m, s = parts
|
|
254
|
+
return int(m) * 60 + float(s)
|
|
255
|
+
return float(time_str)
|