getscript 0.12.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
getscript/config.py ADDED
@@ -0,0 +1,64 @@
1
+ """Configuration loading with XDG base directory compliance."""
2
+
3
+ import json
4
+ import os
5
+
6
+
7
+ def get_config_dir() -> str:
8
+ return os.path.join(
9
+ os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config")),
10
+ "getscript",
11
+ )
12
+
13
+
14
+ def get_cache_dir() -> str:
15
+ return os.path.join(
16
+ os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")),
17
+ "getscript",
18
+ )
19
+
20
+
21
+ def load_config() -> dict:
22
+ """Load config from XDG config file. Returns empty dict if not found."""
23
+ import sys
24
+
25
+ config_path = os.path.join(get_config_dir(), "config.json")
26
+ if os.path.exists(config_path):
27
+ try:
28
+ with open(config_path) as f:
29
+ return json.load(f)
30
+ except json.JSONDecodeError as e:
31
+ print(f"Warning: invalid config at {config_path}: {e}", file=sys.stderr)
32
+ return {}
33
+
34
+
35
+ def merge_config(file_config: dict, cli_args: dict) -> dict:
36
+ """Merge config sources: file < env vars < CLI flags.
37
+
38
+ CLI flags override env vars override file config.
39
+ Only non-None CLI values override.
40
+ """
41
+ merged = dict(file_config)
42
+
43
+ # Env var overrides
44
+ if os.environ.get("NO_COLOR"):
45
+ merged["no_color"] = True
46
+ if os.environ.get("GETSCRIPT_YOUTUBE_API_KEY"):
47
+ merged["youtube_api_key"] = os.environ["GETSCRIPT_YOUTUBE_API_KEY"]
48
+ if os.environ.get("GETSCRIPT_PROXY"):
49
+ merged["proxy"] = os.environ["GETSCRIPT_PROXY"]
50
+ if os.environ.get("GETSCRIPT_COOKIE_FILE"):
51
+ merged["cookie_file"] = os.environ["GETSCRIPT_COOKIE_FILE"]
52
+ if os.environ.get("GETSCRIPT_UPLOAD", "").lower() in ("0", "false", "no"):
53
+ merged["no_upload"] = True
54
+ if os.environ.get("GETSCRIPT_SUPABASE_URL"):
55
+ merged["supabase_url"] = os.environ["GETSCRIPT_SUPABASE_URL"]
56
+ if os.environ.get("GETSCRIPT_SUPABASE_ANON_KEY"):
57
+ merged["supabase_anon_key"] = os.environ["GETSCRIPT_SUPABASE_ANON_KEY"]
58
+
59
+ # CLI flag overrides (only if explicitly set)
60
+ for key, value in cli_args.items():
61
+ if value is not None:
62
+ merged[key] = value
63
+
64
+ return merged
getscript/detect.py ADDED
@@ -0,0 +1,56 @@
1
+ """Auto-detect transcript source from URL or ID."""
2
+
3
+ import re
4
+ from urllib.parse import urlparse, parse_qs
5
+
6
+
7
+ def detect_source(input_str: str) -> tuple[str, str]:
8
+ """Detect source and extract ID from a URL or bare ID.
9
+
10
+ Returns:
11
+ ("youtube", video_id) or ("apple", episode_id)
12
+
13
+ Raises:
14
+ ValueError with a helpful message if input can't be identified.
15
+ """
16
+ input_str = input_str.strip()
17
+
18
+ # YouTube patterns
19
+ yt_patterns = [
20
+ r"v=([A-Za-z0-9_-]{11})",
21
+ r"youtu\.be/([A-Za-z0-9_-]{11})",
22
+ r"shorts/([A-Za-z0-9_-]{11})",
23
+ ]
24
+ for pattern in yt_patterns:
25
+ m = re.search(pattern, input_str)
26
+ if m:
27
+ return ("youtube", m.group(1))
28
+
29
+ # Bare YouTube video ID (exactly 11 chars, alphanumeric + _ -)
30
+ if re.fullmatch(r"[A-Za-z0-9_-]{11}", input_str):
31
+ return ("youtube", input_str)
32
+
33
+ # Pure numeric string → Apple Podcasts episode ID
34
+ if input_str.isdigit():
35
+ return ("apple", input_str)
36
+
37
+ # Apple Podcasts URL
38
+ if "podcasts.apple.com" in input_str:
39
+ parsed = urlparse(input_str)
40
+ qs = parse_qs(parsed.query)
41
+ if "i" in qs:
42
+ return ("apple", qs["i"][0])
43
+ # Try extracting from path: /podcast/.../id<show_id>?i=<ep_id>
44
+ raise ValueError(
45
+ f"Apple Podcasts URL missing episode ID (?i=...). "
46
+ f"Open the episode in Apple Podcasts and copy the share link."
47
+ )
48
+
49
+ raise ValueError(
50
+ f"Could not detect source from: {input_str}\n"
51
+ f"Supported inputs:\n"
52
+ f" YouTube: https://youtube.com/watch?v=VIDEO_ID\n"
53
+ f" YouTube: https://youtu.be/VIDEO_ID\n"
54
+ f" Apple: https://podcasts.apple.com/...?i=EPISODE_ID\n"
55
+ f" Apple: EPISODE_ID (numeric)"
56
+ )
getscript/output.py ADDED
@@ -0,0 +1,97 @@
1
+ """Output formatting: plain text, JSON, TTML, Markdown."""
2
+
3
+ import json
4
+ import sys
5
+ from datetime import date
6
+
7
+
8
+ def is_tty() -> bool:
9
+ return sys.stdout.isatty()
10
+
11
+
12
+ def format_timestamp(seconds: float) -> str:
13
+ """Format seconds as HH:MM:SS or MM:SS."""
14
+ h = int(seconds // 3600)
15
+ m = int((seconds % 3600) // 60)
16
+ s = int(seconds % 60)
17
+ if h > 0:
18
+ return f"{h:02d}:{m:02d}:{s:02d}"
19
+ return f"{m:02d}:{s:02d}"
20
+
21
+
22
+ def format_text(segments: list[dict], timestamps: bool = False) -> str:
23
+ """Format segments as plain text."""
24
+ if timestamps:
25
+ lines = []
26
+ for seg in segments:
27
+ ts = format_timestamp(seg.get("start", 0))
28
+ lines.append(f"[{ts}] {seg['text']}")
29
+ return "\n".join(lines)
30
+ return " ".join(seg["text"] for seg in segments)
31
+
32
+
33
+ def format_json(
34
+ segments: list[dict],
35
+ source: str,
36
+ source_id: str,
37
+ timestamps: bool = False,
38
+ ) -> str:
39
+ """Format as structured JSON."""
40
+ output = {
41
+ "source": source,
42
+ "id": source_id,
43
+ "text": " ".join(seg["text"] for seg in segments),
44
+ }
45
+ if timestamps:
46
+ output["segments"] = segments
47
+ else:
48
+ output["segments"] = [{"text": seg["text"]} for seg in segments]
49
+ return json.dumps(output, indent=2, ensure_ascii=False)
50
+
51
+
52
+ def format_markdown(
53
+ segments: list[dict],
54
+ source: str,
55
+ source_id: str,
56
+ timestamps: bool = False,
57
+ ) -> str:
58
+ """Format as Markdown with YAML frontmatter."""
59
+ lines = [
60
+ "---",
61
+ f"source: {source}",
62
+ f"id: \"{source_id}\"",
63
+ f"date: \"{date.today().isoformat()}\"",
64
+ "---",
65
+ "",
66
+ "# Transcript",
67
+ "",
68
+ ]
69
+ if timestamps:
70
+ for seg in segments:
71
+ ts = format_timestamp(seg.get("start", 0))
72
+ lines.append(f"**[{ts}]** {seg['text']}")
73
+ lines.append("")
74
+ else:
75
+ lines.append(" ".join(seg["text"] for seg in segments))
76
+ lines.append("")
77
+ return "\n".join(lines)
78
+
79
+
80
+ def format_output(
81
+ segments: list[dict],
82
+ fmt: str = "text",
83
+ source: str = "",
84
+ source_id: str = "",
85
+ timestamps: bool = False,
86
+ ttml_raw: str | None = None,
87
+ ) -> str:
88
+ """Route to the appropriate formatter."""
89
+ if fmt == "ttml":
90
+ if ttml_raw is None:
91
+ raise ValueError("--ttml is only supported for Apple Podcasts transcripts")
92
+ return ttml_raw
93
+ if fmt == "json":
94
+ return format_json(segments, source, source_id, timestamps)
95
+ if fmt == "markdown":
96
+ return format_markdown(segments, source, source_id, timestamps)
97
+ return format_text(segments, timestamps)
getscript/picker.py ADDED
@@ -0,0 +1,69 @@
1
+ """Interactive selection via fzf."""
2
+
3
+ import shutil
4
+ import subprocess
5
+ import sys
6
+
7
+
8
+ def pick_result(results: list[dict]) -> dict | None:
9
+ """Format results and pipe to fzf for interactive selection.
10
+
11
+ Returns the selected result dict, or None if user cancelled.
12
+ Raises RuntimeError if fzf is not installed.
13
+ """
14
+ if not shutil.which("fzf"):
15
+ raise RuntimeError(
16
+ "fzf required for --search. "
17
+ "Install: https://github.com/junegunn/fzf#installation"
18
+ )
19
+
20
+ # Build aligned columns
21
+ lines = []
22
+ for r in results:
23
+ parts = [r["id"], r["title"], r["channel"]]
24
+ if r.get("duration"):
25
+ parts.append(r["duration"])
26
+ lines.append("\t".join(parts))
27
+
28
+ fzf_input = "\n".join(lines)
29
+
30
+ try:
31
+ proc = subprocess.run(
32
+ ["fzf", "--delimiter=\t", "--with-nth=2..", "--header=Select a result:"],
33
+ input=fzf_input,
34
+ capture_output=True,
35
+ text=True,
36
+ )
37
+ except KeyboardInterrupt:
38
+ return None
39
+
40
+ if proc.returncode == 130:
41
+ # User pressed Esc or Ctrl-C in fzf
42
+ return None
43
+ if proc.returncode != 0:
44
+ return None
45
+
46
+ selected_line = proc.stdout.strip()
47
+ if not selected_line:
48
+ return None
49
+
50
+ # Parse the ID from the first column
51
+ selected_id = selected_line.split("\t")[0]
52
+
53
+ # Find the matching result
54
+ for r in results:
55
+ if r["id"] == selected_id:
56
+ return r
57
+
58
+ return None
59
+
60
+
61
+ def format_list(results: list[dict]) -> str:
62
+ """Format results as a printable list for --list mode."""
63
+ lines = []
64
+ for i, r in enumerate(results, 1):
65
+ parts = [r["id"], r["title"], r["channel"]]
66
+ if r.get("duration"):
67
+ parts.append(r["duration"])
68
+ lines.append(f"{i:3d}. " + "\t".join(parts))
69
+ return "\n".join(lines)
getscript/progress.py ADDED
@@ -0,0 +1,34 @@
1
+ """Lightweight stderr progress spinner (TTY-aware)."""
2
+
3
+ import sys
4
+ import time
5
+
6
+ SPINNER_CHARS = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
7
+
8
+
9
+ class Progress:
10
+ """Simple stderr status indicator. Auto-disabled when stderr is not a TTY."""
11
+
12
+ def __init__(self, quiet: bool = False):
13
+ self._enabled = sys.stderr.isatty() and not quiet
14
+ self._idx = 0
15
+ self._last_msg = ""
16
+
17
+ def update(self, message: str) -> None:
18
+ if not self._enabled:
19
+ return
20
+ char = SPINNER_CHARS[self._idx % len(SPINNER_CHARS)]
21
+ self._idx += 1
22
+ # Clear line and write status
23
+ sys.stderr.write(f"\r\033[K{char} {message}")
24
+ sys.stderr.flush()
25
+ self._last_msg = message
26
+
27
+ def done(self, message: str | None = None) -> None:
28
+ if not self._enabled:
29
+ return
30
+ if message:
31
+ sys.stderr.write(f"\r\033[K{message}\n")
32
+ else:
33
+ sys.stderr.write("\r\033[K")
34
+ sys.stderr.flush()
getscript/search.py ADDED
@@ -0,0 +1,83 @@
1
+ """Search backends for YouTube and Apple Podcasts."""
2
+
3
+ import json
4
+ import urllib.request
5
+ import urllib.parse
6
+
7
+
8
+ def search_youtube(query: str, api_key: str, limit: int = 10) -> list[dict]:
9
+ """Search YouTube via Data API v3.
10
+
11
+ Returns list of {"id", "title", "channel", "duration"} dicts.
12
+ Duration is not available from search endpoint, so set to "".
13
+ """
14
+ params = urllib.parse.urlencode({
15
+ "q": query,
16
+ "type": "video",
17
+ "part": "snippet",
18
+ "maxResults": min(limit, 50),
19
+ "key": api_key,
20
+ })
21
+ url = f"https://www.googleapis.com/youtube/v3/search?{params}"
22
+
23
+ req = urllib.request.Request(url)
24
+ req.add_header("Accept", "application/json")
25
+
26
+ with urllib.request.urlopen(req, timeout=15) as resp:
27
+ data = json.loads(resp.read())
28
+
29
+ results = []
30
+ for item in data.get("items", []):
31
+ video_id = item.get("id", {}).get("videoId")
32
+ if not video_id:
33
+ continue
34
+ snippet = item.get("snippet", {})
35
+ results.append({
36
+ "id": video_id,
37
+ "title": snippet.get("title", ""),
38
+ "channel": snippet.get("channelTitle", ""),
39
+ "duration": "",
40
+ })
41
+
42
+ return results
43
+
44
+
45
+ def search_apple(query: str, limit: int = 10) -> list[dict]:
46
+ """Search Apple Podcasts via iTunes Search API (free, no auth).
47
+
48
+ Returns list of {"id", "title", "channel", "duration"} dicts.
49
+ """
50
+ params = urllib.parse.urlencode({
51
+ "term": query,
52
+ "media": "podcast",
53
+ "entity": "podcastEpisode",
54
+ "limit": min(limit, 200),
55
+ })
56
+ url = f"https://itunes.apple.com/search?{params}"
57
+
58
+ req = urllib.request.Request(url)
59
+ with urllib.request.urlopen(req, timeout=15) as resp:
60
+ data = json.loads(resp.read())
61
+
62
+ results = []
63
+ for item in data.get("results", []):
64
+ track_id = item.get("trackId")
65
+ if not track_id:
66
+ continue
67
+ # Duration from API is in milliseconds
68
+ duration_ms = item.get("trackTimeMillis", 0)
69
+ if duration_ms:
70
+ total_secs = duration_ms // 1000
71
+ mins, secs = divmod(total_secs, 60)
72
+ hours, mins = divmod(mins, 60)
73
+ duration = f"{hours}:{mins:02d}:{secs:02d}" if hours else f"{mins}:{secs:02d}"
74
+ else:
75
+ duration = ""
76
+ results.append({
77
+ "id": str(track_id),
78
+ "title": item.get("trackName", ""),
79
+ "channel": item.get("collectionName", ""),
80
+ "duration": duration,
81
+ })
82
+
83
+ return results
getscript/upload.py ADDED
@@ -0,0 +1,131 @@
1
+ """Upload transcripts to the shared Voxly transcript pool."""
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ import urllib.error
7
+ import urllib.request
8
+ import uuid
9
+
10
+ from getscript import __version__
11
+ from getscript.config import get_config_dir
12
+
13
+ SUPABASE_URL = "https://ohxuifdseybxckmprcry.supabase.co"
14
+ SUPABASE_ANON_KEY = (
15
+ "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9."
16
+ "eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6Im9oeHVpZmRzZXlieGNrbXByY3J5Iiwicm9sZSI6ImFub24iLCJpYXQiOjE3NzA2NDE5NDgsImV4cCI6MjA4NjIxNzk0OH0."
17
+ "_4NFs2SY98gIL6Z0tgiTxIVSX7FBJ8b_46oF7Vi7p6M"
18
+ )
19
+
20
+ SOURCE_TYPE_MAP = {
21
+ "youtube": "youtube_transcript",
22
+ "apple": "podcast",
23
+ }
24
+
25
+
26
+ def get_device_id() -> str:
27
+ """Get or create a persistent anonymous device ID."""
28
+ config_dir = get_config_dir()
29
+ device_path = os.path.join(config_dir, "device.json")
30
+ if os.path.exists(device_path):
31
+ try:
32
+ with open(device_path) as f:
33
+ data = json.load(f)
34
+ return data["device_id"]
35
+ except (json.JSONDecodeError, KeyError):
36
+ pass
37
+ device_id = str(uuid.uuid4())
38
+ os.makedirs(config_dir, exist_ok=True)
39
+ with open(device_path, "w") as f:
40
+ json.dump({"device_id": device_id}, f)
41
+ os.chmod(device_path, 0o600)
42
+ return device_id
43
+
44
+
45
+ def fetch_title(source: str, source_id: str) -> str | None:
46
+ """Fetch video/episode title via oembed. Returns None on failure."""
47
+ if source == "youtube":
48
+ url = f"https://www.youtube.com/oembed?url=https://www.youtube.com/watch?v={source_id}&format=json"
49
+ else:
50
+ return None
51
+ try:
52
+ req = urllib.request.Request(url)
53
+ with urllib.request.urlopen(req, timeout=5) as resp:
54
+ data = json.loads(resp.read().decode("utf-8"))
55
+ return data.get("title")
56
+ except Exception:
57
+ return None
58
+
59
+
60
+ def _build_source_url(source: str, source_id: str) -> str:
61
+ if source == "youtube":
62
+ return f"https://www.youtube.com/watch?v={source_id}"
63
+ elif source == "apple":
64
+ return f"https://podcasts.apple.com/podcast/ep?i={source_id}"
65
+ return source_id
66
+
67
+
68
+ def upload_transcript(
69
+ source: str,
70
+ source_id: str,
71
+ segments: list[dict],
72
+ title: str | None,
73
+ config: dict,
74
+ ) -> dict | None:
75
+ """Upload a transcript to the shared pool. Returns response dict or None on failure.
76
+
77
+ Never raises — all errors are printed to stderr.
78
+ """
79
+ try:
80
+ base_url = config.get("supabase_url", SUPABASE_URL).rstrip("/")
81
+ anon_key = config.get("supabase_anon_key", SUPABASE_ANON_KEY)
82
+
83
+ source_url = _build_source_url(source, source_id)
84
+ source_type = SOURCE_TYPE_MAP.get(source, source)
85
+ full_text = " ".join(seg.get("text", "") for seg in segments)
86
+ word_count = len(full_text.split())
87
+
88
+ device_id = get_device_id()
89
+
90
+ payload = {
91
+ "device_id": device_id,
92
+ "source_type": source_type,
93
+ "source_id": source_id,
94
+ "source_url": source_url,
95
+ "title": title,
96
+ "segments": segments,
97
+ "full_text": full_text,
98
+ "word_count": word_count,
99
+ "cli_version": __version__,
100
+ }
101
+
102
+ data = json.dumps(payload).encode("utf-8")
103
+ url = f"{base_url}/functions/v1/ingest-transcript"
104
+
105
+ req = urllib.request.Request(
106
+ url,
107
+ data=data,
108
+ headers={
109
+ "Content-Type": "application/json",
110
+ "Authorization": f"Bearer {anon_key}",
111
+ },
112
+ method="POST",
113
+ )
114
+
115
+ with urllib.request.urlopen(req, timeout=10) as resp:
116
+ return json.loads(resp.read().decode("utf-8"))
117
+
118
+ except urllib.error.HTTPError as e:
119
+ body = ""
120
+ try:
121
+ body = e.read().decode("utf-8", errors="replace")
122
+ except Exception:
123
+ pass
124
+ print(f"Warning: upload failed (HTTP {e.code}): {body}", file=sys.stderr)
125
+ return None
126
+ except urllib.error.URLError as e:
127
+ print(f"Warning: upload failed (network): {e.reason}", file=sys.stderr)
128
+ return None
129
+ except Exception as e:
130
+ print(f"Warning: upload failed: {e}", file=sys.stderr)
131
+ return None
getscript/youtube.py ADDED
@@ -0,0 +1,58 @@
1
+ """YouTube transcript fetching."""
2
+
3
+ import http.cookiejar
4
+ import os
5
+
6
+ from requests import Session
7
+ from youtube_transcript_api import YouTubeTranscriptApi
8
+ from youtube_transcript_api.proxies import GenericProxyConfig
9
+
10
+
11
+ def _load_cookies(cookie_path: str) -> http.cookiejar.MozillaCookieJar:
12
+ """Load Netscape/Mozilla format cookies from file."""
13
+ jar = http.cookiejar.MozillaCookieJar(cookie_path)
14
+ jar.load(ignore_discard=True, ignore_expires=True)
15
+ return jar
16
+
17
+
18
+ def _build_api(config: dict) -> YouTubeTranscriptApi:
19
+ """Build YouTubeTranscriptApi with optional proxy and cookie config."""
20
+ proxy_config = None
21
+ http_client = None
22
+
23
+ proxy_url = config.get("proxy")
24
+ cookie_file = config.get("cookie_file")
25
+
26
+ if proxy_url:
27
+ proxy_config = GenericProxyConfig(https_url=proxy_url)
28
+
29
+ if cookie_file:
30
+ cookie_path = os.path.expanduser(cookie_file)
31
+ if not os.path.exists(cookie_path):
32
+ raise FileNotFoundError(f"Cookie file not found: {cookie_path}")
33
+ http_client = Session()
34
+ http_client.cookies = _load_cookies(cookie_path)
35
+
36
+ return YouTubeTranscriptApi(proxy_config=proxy_config, http_client=http_client)
37
+
38
+
39
+ def fetch_transcript(video_id: str, config: dict | None = None) -> list[dict]:
40
+ """Fetch transcript segments for a YouTube video.
41
+
42
+ Args:
43
+ video_id: YouTube video ID.
44
+ config: Optional config dict with proxy/cookie_file keys.
45
+
46
+ Returns:
47
+ List of {"text": str, "start": float, "duration": float}
48
+ """
49
+ api = _build_api(config or {})
50
+ transcript = api.fetch(video_id)
51
+ return [
52
+ {
53
+ "text": segment.text,
54
+ "start": segment.start,
55
+ "duration": segment.duration,
56
+ }
57
+ for segment in transcript
58
+ ]