@openthread/claude-code-plugin 0.1.4 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +111 -17
- package/bin/__tests__/settings-writer.test.js +122 -0
- package/bin/cli.sh +77 -102
- package/bin/lib/settings-writer.js +108 -0
- package/bin/postinstall.js +80 -34
- package/commands/export.md +22 -0
- package/commands/import.md +26 -0
- package/commands/search.md +15 -0
- package/commands/share.md +24 -3
- package/package.json +23 -5
- package/scripts/auth.sh +21 -3
- package/scripts/lib/__init__.py +1 -0
- package/scripts/lib/export_client.py +666 -0
- package/scripts/lib/import_client.py +510 -0
- package/scripts/lib/jsonl.py +88 -0
- package/scripts/lib/keychain.js +59 -0
- package/scripts/lib/mask.py +669 -0
- package/scripts/lib/sanitize.py +92 -0
- package/scripts/lib/search_client.py +218 -0
- package/scripts/lib/thread_to_md.py +156 -0
- package/scripts/share.sh +230 -47
- package/scripts/token.sh +215 -23
- package/skills/export-thread/SKILL.md +166 -0
- package/skills/import-thread/SKILL.md +171 -0
- package/skills/search-threads/SKILL.md +103 -0
- package/skills/share-thread/SKILL.md +25 -43
|
@@ -0,0 +1,666 @@
|
|
|
1
|
+
"""Download an OpenThread post as a local archive file.
|
|
2
|
+
|
|
3
|
+
Invoked by ``scripts/export.sh``. Reads its configuration from
|
|
4
|
+
environment variables and either writes a file to disk (mode ``0644``,
|
|
5
|
+
atomic rename from ``<path>.part``) or streams the body to stdout.
|
|
6
|
+
|
|
7
|
+
The file is meant for archival / sharing — the provenance banner is
|
|
8
|
+
plain (no "untrusted data" warning) and the mode allows other users
|
|
9
|
+
on the machine to read it. The body is still re-masked locally with
|
|
10
|
+
the shared ``sanitize`` + ``mask`` libraries as defense-in-depth on
|
|
11
|
+
top of the server's ingest-time masking.
|
|
12
|
+
|
|
13
|
+
Error reporting: this script never raises an uncaught exception. On
|
|
14
|
+
any failure it prints a single JSON object to stderr of the form
|
|
15
|
+
``{"error": "<CODE>", "message": "..."}`` and exits with a non-zero
|
|
16
|
+
status code. Success always ends with a single JSON object on stdout
|
|
17
|
+
(or on stderr if ``STDOUT=1``, so the body stream remains clean).
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import os
|
|
24
|
+
import pathlib
|
|
25
|
+
import re
|
|
26
|
+
import socket
|
|
27
|
+
import ssl
|
|
28
|
+
import sys
|
|
29
|
+
import tempfile
|
|
30
|
+
import unicodedata
|
|
31
|
+
import urllib.error
|
|
32
|
+
import urllib.parse
|
|
33
|
+
import urllib.request
|
|
34
|
+
from datetime import datetime, timezone
|
|
35
|
+
from typing import Any
|
|
36
|
+
|
|
37
|
+
# Allow ``from lib import ...`` when invoked as a standalone script.
|
|
38
|
+
_SCRIPTS_DIR = os.environ.get("PLUGIN_SCRIPTS_DIR")
|
|
39
|
+
if _SCRIPTS_DIR and _SCRIPTS_DIR not in sys.path:
|
|
40
|
+
sys.path.insert(0, _SCRIPTS_DIR)
|
|
41
|
+
|
|
42
|
+
try:
|
|
43
|
+
from lib import mask as mask_mod # type: ignore
|
|
44
|
+
from lib import sanitize as sanitize_mod # type: ignore
|
|
45
|
+
except ImportError: # pragma: no cover - fallback for direct invocation
|
|
46
|
+
import mask as mask_mod # type: ignore
|
|
47
|
+
import sanitize as sanitize_mod # type: ignore
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
# ---------------------------------------------------------------------------
|
|
51
|
+
# Constants
|
|
52
|
+
# ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
MAX_RESPONSE_BYTES = 5 * 1024 * 1024 # 5 MB, matches the server cap
|
|
55
|
+
READ_CHUNK = 64 * 1024
|
|
56
|
+
REQUEST_TIMEOUT = 30 # seconds
|
|
57
|
+
|
|
58
|
+
VALID_FORMATS = frozenset({"markdown", "text", "json"})
|
|
59
|
+
|
|
60
|
+
FORMAT_EXT = {
|
|
61
|
+
"markdown": "md",
|
|
62
|
+
"text": "txt",
|
|
63
|
+
"json": "json",
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
# Absolute paths that land inside any of these directories are refused
|
|
67
|
+
# even if the user explicitly asked for them. Export is user-facing so
|
|
68
|
+
# we pick a conservative denylist that still allows ``~``, ``/tmp``,
|
|
69
|
+
# ``/private/tmp``, ``/Users``, ``/home``, etc.
|
|
70
|
+
SYSTEM_DENYLIST = (
|
|
71
|
+
"/etc/",
|
|
72
|
+
"/dev/",
|
|
73
|
+
"/proc/",
|
|
74
|
+
"/sys/",
|
|
75
|
+
"/bin/",
|
|
76
|
+
"/sbin/",
|
|
77
|
+
"/usr/",
|
|
78
|
+
"/var/",
|
|
79
|
+
"/boot/",
|
|
80
|
+
"/lib/",
|
|
81
|
+
"/lib64/",
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
UUID_RE = re.compile(
|
|
85
|
+
r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}",
|
|
86
|
+
re.IGNORECASE,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
# ---------------------------------------------------------------------------
|
|
91
|
+
# Error helpers
|
|
92
|
+
# ---------------------------------------------------------------------------
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _emit_error(code: str, message: str, extra: dict[str, Any] | None = None) -> None:
|
|
96
|
+
payload: dict[str, Any] = {"error": code, "message": message}
|
|
97
|
+
if extra:
|
|
98
|
+
payload.update(extra)
|
|
99
|
+
sys.stderr.write(json.dumps(payload) + "\n")
|
|
100
|
+
sys.stderr.flush()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _fail(code: str, message: str, extra: dict[str, Any] | None = None) -> None:
|
|
104
|
+
_emit_error(code, message, extra)
|
|
105
|
+
sys.exit(1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# Input parsing
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def extract_uuid(raw: str) -> str | None:
|
|
114
|
+
"""Pull the first UUID out of a bare UUID, path, or full URL."""
|
|
115
|
+
if not raw:
|
|
116
|
+
return None
|
|
117
|
+
match = UUID_RE.search(raw)
|
|
118
|
+
return match.group(0).lower() if match else None
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def parse_format(fmt: str) -> str:
|
|
122
|
+
fmt = (fmt or "markdown").strip().lower()
|
|
123
|
+
if fmt not in VALID_FORMATS:
|
|
124
|
+
_fail(
|
|
125
|
+
"INVALID_FORMAT",
|
|
126
|
+
f"--format must be one of markdown|text|json (got: {fmt!r})",
|
|
127
|
+
)
|
|
128
|
+
return fmt
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
# ---------------------------------------------------------------------------
|
|
132
|
+
# HTTP
|
|
133
|
+
# ---------------------------------------------------------------------------
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _build_request(url: str, token: str | None, accept: str) -> urllib.request.Request:
|
|
137
|
+
req = urllib.request.Request(url, method="GET")
|
|
138
|
+
req.add_header("Accept", accept)
|
|
139
|
+
req.add_header("User-Agent", "openthread-plugin/export")
|
|
140
|
+
if token:
|
|
141
|
+
req.add_header("Authorization", f"Bearer {token}")
|
|
142
|
+
return req
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _validate_scheme(url: str) -> None:
|
|
146
|
+
parsed = urllib.parse.urlparse(url)
|
|
147
|
+
if parsed.scheme == "https":
|
|
148
|
+
return
|
|
149
|
+
if parsed.scheme == "http":
|
|
150
|
+
host = (parsed.hostname or "").lower()
|
|
151
|
+
if host in ("localhost", "127.0.0.1", "::1"):
|
|
152
|
+
return
|
|
153
|
+
_fail(
|
|
154
|
+
"INSECURE_SCHEME",
|
|
155
|
+
f"Refusing plain-http fetch to non-loopback host: {host}",
|
|
156
|
+
)
|
|
157
|
+
_fail("HTTP_ERROR", f"Unsupported URL scheme: {parsed.scheme}")
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _fetch(url: str, token: str | None, accept: str) -> tuple[bytes, dict[str, str]]:
|
|
161
|
+
"""Fetch ``url`` returning (body, headers). Enforces the 5 MB cap.
|
|
162
|
+
|
|
163
|
+
Raises via ``_fail`` on any error so the caller never sees exceptions.
|
|
164
|
+
"""
|
|
165
|
+
_validate_scheme(url)
|
|
166
|
+
req = _build_request(url, token, accept)
|
|
167
|
+
try:
|
|
168
|
+
ctx = ssl.create_default_context()
|
|
169
|
+
with urllib.request.urlopen(req, timeout=REQUEST_TIMEOUT, context=ctx) as resp:
|
|
170
|
+
status = getattr(resp, "status", 200)
|
|
171
|
+
headers = {k.lower(): v for k, v in resp.headers.items()}
|
|
172
|
+
if status == 304:
|
|
173
|
+
# Should not happen since we don't send If-None-Match, but
|
|
174
|
+
# be defensive.
|
|
175
|
+
_fail("HTTP_ERROR", "Server returned 304 without a cached copy")
|
|
176
|
+
if status != 200:
|
|
177
|
+
_fail("HTTP_ERROR", f"Unexpected status {status}")
|
|
178
|
+
buf = bytearray()
|
|
179
|
+
while True:
|
|
180
|
+
chunk = resp.read(READ_CHUNK)
|
|
181
|
+
if not chunk:
|
|
182
|
+
break
|
|
183
|
+
if len(buf) + len(chunk) > MAX_RESPONSE_BYTES:
|
|
184
|
+
_fail(
|
|
185
|
+
"SIZE_EXCEEDED",
|
|
186
|
+
f"Response exceeded {MAX_RESPONSE_BYTES} bytes",
|
|
187
|
+
)
|
|
188
|
+
buf.extend(chunk)
|
|
189
|
+
return bytes(buf), headers
|
|
190
|
+
except urllib.error.HTTPError as exc: # type: ignore[attr-defined]
|
|
191
|
+
status = exc.code
|
|
192
|
+
headers = {k.lower(): v for k, v in (exc.headers or {}).items()}
|
|
193
|
+
if status == 403:
|
|
194
|
+
_fail("FORBIDDEN", "You don't have access to this community's exports")
|
|
195
|
+
if status == 404:
|
|
196
|
+
_fail("NOT_FOUND", "Post not found")
|
|
197
|
+
if status == 413:
|
|
198
|
+
_fail("TOO_LARGE", "Thread exceeds the maximum export size")
|
|
199
|
+
if status == 429:
|
|
200
|
+
retry = headers.get("retry-after", "")
|
|
201
|
+
_fail(
|
|
202
|
+
"RATE_LIMITED",
|
|
203
|
+
"Rate limited by server",
|
|
204
|
+
{"retryAfter": retry},
|
|
205
|
+
)
|
|
206
|
+
_fail("HTTP_ERROR", f"HTTP {status}")
|
|
207
|
+
except urllib.error.URLError as exc:
|
|
208
|
+
_fail("HTTP_ERROR", f"Network error: {exc.reason}")
|
|
209
|
+
except (TimeoutError, socket.timeout):
|
|
210
|
+
_fail("HTTP_ERROR", "Request timed out")
|
|
211
|
+
except (ssl.SSLError, OSError) as exc:
|
|
212
|
+
_fail("HTTP_ERROR", f"Connection error: {exc}")
|
|
213
|
+
# Unreachable — _fail exits — but keeps the type checker happy.
|
|
214
|
+
return b"", {}
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
# ---------------------------------------------------------------------------
|
|
218
|
+
# Metadata (title / author / community) fetch
|
|
219
|
+
# ---------------------------------------------------------------------------
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def fetch_metadata(
|
|
223
|
+
api_base: str, uuid: str, token: str | None
|
|
224
|
+
) -> dict[str, Any]:
|
|
225
|
+
"""GET /api/posts/<uuid> for a lightweight title/author/community.
|
|
226
|
+
|
|
227
|
+
Returns a dict even on failure — missing fields fall back to
|
|
228
|
+
sensible placeholders so the export can still succeed on posts
|
|
229
|
+
whose metadata endpoint is unreachable.
|
|
230
|
+
"""
|
|
231
|
+
url = f"{api_base.rstrip('/')}/api/posts/{uuid}"
|
|
232
|
+
try:
|
|
233
|
+
body, _ = _fetch(url, token, "application/json")
|
|
234
|
+
except SystemExit:
|
|
235
|
+
# _fetch already called _fail — we swallow it here because the
|
|
236
|
+
# metadata fetch is best-effort. If the main export call fails
|
|
237
|
+
# the user will see that error instead.
|
|
238
|
+
return {}
|
|
239
|
+
try:
|
|
240
|
+
parsed = json.loads(body.decode("utf-8", errors="replace"))
|
|
241
|
+
except (ValueError, UnicodeDecodeError):
|
|
242
|
+
return {}
|
|
243
|
+
|
|
244
|
+
# API envelope is typically `{"data": {...}}`.
|
|
245
|
+
data = parsed.get("data") if isinstance(parsed, dict) else None
|
|
246
|
+
if not isinstance(data, dict):
|
|
247
|
+
data = parsed if isinstance(parsed, dict) else {}
|
|
248
|
+
|
|
249
|
+
meta: dict[str, Any] = {}
|
|
250
|
+
meta["title"] = str(data.get("title") or "")
|
|
251
|
+
author = data.get("author")
|
|
252
|
+
if isinstance(author, dict):
|
|
253
|
+
meta["author"] = str(
|
|
254
|
+
author.get("username") or author.get("name") or ""
|
|
255
|
+
)
|
|
256
|
+
elif isinstance(author, str):
|
|
257
|
+
meta["author"] = author
|
|
258
|
+
else:
|
|
259
|
+
meta["author"] = ""
|
|
260
|
+
community = data.get("community")
|
|
261
|
+
if isinstance(community, dict):
|
|
262
|
+
meta["community"] = str(
|
|
263
|
+
community.get("slug") or community.get("name") or ""
|
|
264
|
+
)
|
|
265
|
+
elif isinstance(community, str):
|
|
266
|
+
meta["community"] = community
|
|
267
|
+
else:
|
|
268
|
+
meta["community"] = ""
|
|
269
|
+
return meta
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
# ---------------------------------------------------------------------------
|
|
273
|
+
# Slugification & filename
|
|
274
|
+
# ---------------------------------------------------------------------------
|
|
275
|
+
|
|
276
|
+
_SLUG_STRIP_RE = re.compile(r"[^\w\s-]", re.UNICODE)
|
|
277
|
+
_SLUG_DASH_RE = re.compile(r"[-\s]+")
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def slugify(title: str, fallback: str = "thread") -> str:
|
|
281
|
+
"""Lowercase, ASCII-ish, dash-separated slug capped at 40 chars."""
|
|
282
|
+
if not title:
|
|
283
|
+
return fallback
|
|
284
|
+
normalized = unicodedata.normalize("NFKD", title)
|
|
285
|
+
ascii_only = normalized.encode("ascii", "ignore").decode("ascii")
|
|
286
|
+
if not ascii_only.strip():
|
|
287
|
+
# Unicode-only title (e.g. CJK) — use word chars from the original.
|
|
288
|
+
ascii_only = _SLUG_STRIP_RE.sub("", normalized)
|
|
289
|
+
cleaned = _SLUG_STRIP_RE.sub("", ascii_only).strip().lower()
|
|
290
|
+
dashed = _SLUG_DASH_RE.sub("-", cleaned).strip("-")
|
|
291
|
+
if not dashed:
|
|
292
|
+
return fallback
|
|
293
|
+
return dashed[:40].strip("-") or fallback
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
def default_filename(title: str, uuid: str, fmt: str) -> str:
|
|
297
|
+
slug = slugify(title)
|
|
298
|
+
short = uuid.split("-", 1)[0]
|
|
299
|
+
ext = FORMAT_EXT[fmt]
|
|
300
|
+
return f"ot-{slug}-{short}.{ext}"
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
# ---------------------------------------------------------------------------
|
|
304
|
+
# Path traversal guard
|
|
305
|
+
# ---------------------------------------------------------------------------
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def resolve_output_path(
|
|
309
|
+
out_flag: str,
|
|
310
|
+
title: str,
|
|
311
|
+
uuid: str,
|
|
312
|
+
fmt: str,
|
|
313
|
+
cwd: pathlib.Path,
|
|
314
|
+
) -> pathlib.Path:
|
|
315
|
+
"""Return an absolute, safe output path. Aborts on traversal.
|
|
316
|
+
|
|
317
|
+
Rules:
|
|
318
|
+
* If ``out_flag`` is empty, use ``<cwd>/<default_filename>``.
|
|
319
|
+
* If ``out_flag`` is relative, resolve under ``cwd``. Reject if
|
|
320
|
+
the result escapes ``cwd``.
|
|
321
|
+
* If ``out_flag`` starts with ``~``, expand it. The result is
|
|
322
|
+
then treated as absolute.
|
|
323
|
+
* If ``out_flag`` is absolute, reject if it lives inside any
|
|
324
|
+
system denylist directory.
|
|
325
|
+
* If ``out_flag`` resolves to an existing directory, append the
|
|
326
|
+
default filename inside it.
|
|
327
|
+
"""
|
|
328
|
+
if not out_flag:
|
|
329
|
+
return cwd / default_filename(title, uuid, fmt)
|
|
330
|
+
|
|
331
|
+
# Reject NUL bytes up front — they'd corrupt any subsequent
|
|
332
|
+
# filesystem syscall.
|
|
333
|
+
if "\x00" in out_flag:
|
|
334
|
+
_fail("UNSAFE_PATH", "Output path contains NUL byte")
|
|
335
|
+
|
|
336
|
+
expanded = os.path.expanduser(out_flag)
|
|
337
|
+
candidate = pathlib.Path(expanded)
|
|
338
|
+
|
|
339
|
+
if not candidate.is_absolute():
|
|
340
|
+
# Relative: resolve under cwd and ensure it stays under cwd.
|
|
341
|
+
joined = (cwd / candidate).resolve()
|
|
342
|
+
try:
|
|
343
|
+
joined.relative_to(cwd.resolve())
|
|
344
|
+
except ValueError:
|
|
345
|
+
_fail(
|
|
346
|
+
"UNSAFE_PATH",
|
|
347
|
+
f"Relative --out escapes current working directory: {out_flag}",
|
|
348
|
+
)
|
|
349
|
+
candidate = joined
|
|
350
|
+
else:
|
|
351
|
+
# Check BOTH the raw path and the realpath-resolved one, because
|
|
352
|
+
# macOS silently resolves ``/etc`` -> ``/private/etc`` and we
|
|
353
|
+
# want to reject both forms. We also strip a leading ``/private``
|
|
354
|
+
# prefix before comparing so a user-supplied ``/private/etc/…``
|
|
355
|
+
# is caught too.
|
|
356
|
+
raw_candidate = pathlib.Path(expanded)
|
|
357
|
+
resolved = candidate.resolve()
|
|
358
|
+
|
|
359
|
+
def _is_denylisted(p: pathlib.Path) -> str | None:
|
|
360
|
+
s = str(p)
|
|
361
|
+
# macOS realpath prefix.
|
|
362
|
+
variants = [s]
|
|
363
|
+
if s.startswith("/private/"):
|
|
364
|
+
variants.append(s[len("/private") :])
|
|
365
|
+
for v in variants:
|
|
366
|
+
for bad in SYSTEM_DENYLIST:
|
|
367
|
+
bad_rstrip = bad.rstrip("/")
|
|
368
|
+
if v == bad_rstrip or v.startswith(bad):
|
|
369
|
+
return bad
|
|
370
|
+
return None
|
|
371
|
+
|
|
372
|
+
hit = _is_denylisted(raw_candidate) or _is_denylisted(resolved)
|
|
373
|
+
if hit:
|
|
374
|
+
_fail(
|
|
375
|
+
"UNSAFE_PATH",
|
|
376
|
+
f"Refusing to write into system directory: {hit}",
|
|
377
|
+
)
|
|
378
|
+
candidate = resolved
|
|
379
|
+
|
|
380
|
+
# If the resolved path is an existing directory, drop the default
|
|
381
|
+
# filename inside it.
|
|
382
|
+
if candidate.exists() and candidate.is_dir():
|
|
383
|
+
candidate = candidate / default_filename(title, uuid, fmt)
|
|
384
|
+
|
|
385
|
+
return candidate
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# ---------------------------------------------------------------------------
|
|
389
|
+
# Banner construction
|
|
390
|
+
# ---------------------------------------------------------------------------
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def _now_iso() -> str:
|
|
394
|
+
return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
def build_markdown_banner(
|
|
398
|
+
uuid: str,
|
|
399
|
+
author: str,
|
|
400
|
+
community: str,
|
|
401
|
+
fmt: str,
|
|
402
|
+
source_url: str,
|
|
403
|
+
) -> str:
|
|
404
|
+
return (
|
|
405
|
+
"<!--\n"
|
|
406
|
+
"Exported from OpenThread\n"
|
|
407
|
+
f"post: {uuid}\n"
|
|
408
|
+
f"author: @{author or 'unknown'}\n"
|
|
409
|
+
f"community: c/{community or 'unknown'}\n"
|
|
410
|
+
f"fetched: {_now_iso()}\n"
|
|
411
|
+
f"format: {fmt}\n"
|
|
412
|
+
f"source: {source_url}\n"
|
|
413
|
+
"\n"
|
|
414
|
+
"This content was masked at ingest on OpenThread and re-masked\n"
|
|
415
|
+
"locally before being written to this file. Review before\n"
|
|
416
|
+
"publishing elsewhere.\n"
|
|
417
|
+
"-->\n\n"
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
|
|
421
|
+
def build_text_banner(
|
|
422
|
+
uuid: str,
|
|
423
|
+
author: str,
|
|
424
|
+
community: str,
|
|
425
|
+
fmt: str,
|
|
426
|
+
) -> str:
|
|
427
|
+
bar = "=" * 60
|
|
428
|
+
return (
|
|
429
|
+
f"{bar}\n"
|
|
430
|
+
"Exported from OpenThread\n"
|
|
431
|
+
f"post: {uuid}\n"
|
|
432
|
+
f"author: @{author or 'unknown'}\n"
|
|
433
|
+
f"community: c/{community or 'unknown'}\n"
|
|
434
|
+
f"fetched: {_now_iso()}\n"
|
|
435
|
+
f"format: {fmt}\n"
|
|
436
|
+
f"{bar}\n\n"
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
|
|
440
|
+
def wrap_json_banner(
|
|
441
|
+
data: Any,
|
|
442
|
+
uuid: str,
|
|
443
|
+
author: str,
|
|
444
|
+
community: str,
|
|
445
|
+
) -> dict[str, Any]:
|
|
446
|
+
return {
|
|
447
|
+
"_meta": {
|
|
448
|
+
"source": "openthread",
|
|
449
|
+
"post": uuid,
|
|
450
|
+
"author": author or "unknown",
|
|
451
|
+
"community": community or "unknown",
|
|
452
|
+
"fetched": _now_iso(),
|
|
453
|
+
"maskedLocally": True,
|
|
454
|
+
"notice": (
|
|
455
|
+
"Masked at ingest and re-masked locally. "
|
|
456
|
+
"Review before publishing."
|
|
457
|
+
),
|
|
458
|
+
},
|
|
459
|
+
"data": data,
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
# ---------------------------------------------------------------------------
|
|
464
|
+
# Post-processing
|
|
465
|
+
# ---------------------------------------------------------------------------
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def _local_mask_text(body: str) -> str:
|
|
469
|
+
home = str(pathlib.Path.home())
|
|
470
|
+
body = sanitize_mod.strip_controls(body)
|
|
471
|
+
return mask_mod.mask(body, home=home)
|
|
472
|
+
|
|
473
|
+
|
|
474
|
+
def _mask_json_strings(value: Any) -> Any:
|
|
475
|
+
"""Walk a parsed JSON document and mask every string leaf."""
|
|
476
|
+
home = str(pathlib.Path.home())
|
|
477
|
+
if isinstance(value, str):
|
|
478
|
+
return mask_mod.mask(sanitize_mod.strip_controls(value), home=home)
|
|
479
|
+
if isinstance(value, list):
|
|
480
|
+
return [_mask_json_strings(v) for v in value]
|
|
481
|
+
if isinstance(value, dict):
|
|
482
|
+
return {k: _mask_json_strings(v) for k, v in value.items()}
|
|
483
|
+
return value
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
# ---------------------------------------------------------------------------
|
|
487
|
+
# Atomic write
|
|
488
|
+
# ---------------------------------------------------------------------------
|
|
489
|
+
|
|
490
|
+
|
|
491
|
+
def atomic_write(path: pathlib.Path, data: bytes, overwrite: bool) -> None:
|
|
492
|
+
"""Write ``data`` to ``path`` via a ``.part`` rename. Mode 0644."""
|
|
493
|
+
if path.exists() and not overwrite:
|
|
494
|
+
_fail(
|
|
495
|
+
"EXISTS",
|
|
496
|
+
f"Target file already exists: {path}",
|
|
497
|
+
{"path": str(path)},
|
|
498
|
+
)
|
|
499
|
+
parent = path.parent
|
|
500
|
+
parent.mkdir(parents=True, exist_ok=True)
|
|
501
|
+
# tempfile.NamedTemporaryFile lets us control the directory so the
|
|
502
|
+
# rename stays on the same filesystem (atomic).
|
|
503
|
+
fd, tmp_name = tempfile.mkstemp(
|
|
504
|
+
prefix=path.name + ".",
|
|
505
|
+
suffix=".part",
|
|
506
|
+
dir=str(parent),
|
|
507
|
+
)
|
|
508
|
+
tmp_path = pathlib.Path(tmp_name)
|
|
509
|
+
try:
|
|
510
|
+
with os.fdopen(fd, "wb") as fh:
|
|
511
|
+
fh.write(data)
|
|
512
|
+
fh.flush()
|
|
513
|
+
try:
|
|
514
|
+
os.fsync(fh.fileno())
|
|
515
|
+
except OSError:
|
|
516
|
+
# fsync may not be supported on some filesystems — best effort.
|
|
517
|
+
pass
|
|
518
|
+
os.chmod(tmp_path, 0o644)
|
|
519
|
+
os.replace(tmp_path, path)
|
|
520
|
+
except Exception:
|
|
521
|
+
try:
|
|
522
|
+
tmp_path.unlink()
|
|
523
|
+
except OSError:
|
|
524
|
+
pass
|
|
525
|
+
raise
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
# ---------------------------------------------------------------------------
|
|
529
|
+
# Main
|
|
530
|
+
# ---------------------------------------------------------------------------
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
def main() -> None:
|
|
534
|
+
api_base = os.environ.get("API_BASE", "https://openthread.me")
|
|
535
|
+
raw_input = os.environ.get("INPUT", "")
|
|
536
|
+
fmt = parse_format(os.environ.get("FORMAT", "markdown"))
|
|
537
|
+
out_flag = os.environ.get("OUT", "") or ""
|
|
538
|
+
use_stdout = os.environ.get("STDOUT", "0") == "1"
|
|
539
|
+
no_banner = os.environ.get("NO_BANNER", "0") == "1"
|
|
540
|
+
token = os.environ.get("TOKEN", "") or None
|
|
541
|
+
overwrite = os.environ.get("OVERWRITE", "0") == "1"
|
|
542
|
+
|
|
543
|
+
uuid = extract_uuid(raw_input)
|
|
544
|
+
if not uuid:
|
|
545
|
+
_fail("INVALID_UUID", f"Could not extract UUID from input: {raw_input!r}")
|
|
546
|
+
|
|
547
|
+
# Step 1: metadata for title/author/community. Best effort.
|
|
548
|
+
meta = fetch_metadata(api_base, uuid, token)
|
|
549
|
+
|
|
550
|
+
# Step 2: actual export.
|
|
551
|
+
export_url = (
|
|
552
|
+
f"{api_base.rstrip('/')}/api/posts/{uuid}/export?format={fmt}"
|
|
553
|
+
)
|
|
554
|
+
accept_header = {
|
|
555
|
+
"markdown": "text/markdown",
|
|
556
|
+
"text": "text/plain",
|
|
557
|
+
"json": "application/json",
|
|
558
|
+
}[fmt]
|
|
559
|
+
raw_body, _headers = _fetch(export_url, token, accept_header)
|
|
560
|
+
|
|
561
|
+
# Step 3: decode + defense-in-depth mask.
|
|
562
|
+
if fmt == "json":
|
|
563
|
+
try:
|
|
564
|
+
parsed = json.loads(raw_body.decode("utf-8", errors="replace"))
|
|
565
|
+
except ValueError as exc:
|
|
566
|
+
_fail("HTTP_ERROR", f"Response was not valid JSON: {exc}")
|
|
567
|
+
masked_json = _mask_json_strings(parsed)
|
|
568
|
+
# Use the server's title if we didn't get one from metadata.
|
|
569
|
+
if not meta.get("title") and isinstance(parsed, dict):
|
|
570
|
+
server_title = parsed.get("title")
|
|
571
|
+
if isinstance(server_title, str):
|
|
572
|
+
meta["title"] = server_title
|
|
573
|
+
if no_banner:
|
|
574
|
+
out_obj: Any = masked_json
|
|
575
|
+
else:
|
|
576
|
+
out_obj = wrap_json_banner(
|
|
577
|
+
masked_json,
|
|
578
|
+
uuid,
|
|
579
|
+
meta.get("author", ""),
|
|
580
|
+
meta.get("community", ""),
|
|
581
|
+
)
|
|
582
|
+
body_bytes = json.dumps(
|
|
583
|
+
out_obj, indent=2, ensure_ascii=False
|
|
584
|
+
).encode("utf-8") + b"\n"
|
|
585
|
+
else:
|
|
586
|
+
text_body = raw_body.decode("utf-8", errors="replace")
|
|
587
|
+
masked_text = _local_mask_text(text_body)
|
|
588
|
+
if not meta.get("title"):
|
|
589
|
+
# Try to fish a title out of the first heading or line.
|
|
590
|
+
first_line = masked_text.lstrip().splitlines()[0] if masked_text.strip() else ""
|
|
591
|
+
meta["title"] = first_line.lstrip("# ").strip()
|
|
592
|
+
if no_banner:
|
|
593
|
+
final_text = masked_text
|
|
594
|
+
elif fmt == "markdown":
|
|
595
|
+
banner = build_markdown_banner(
|
|
596
|
+
uuid,
|
|
597
|
+
meta.get("author", ""),
|
|
598
|
+
meta.get("community", ""),
|
|
599
|
+
fmt,
|
|
600
|
+
export_url,
|
|
601
|
+
)
|
|
602
|
+
final_text = banner + masked_text
|
|
603
|
+
else: # text
|
|
604
|
+
banner = build_text_banner(
|
|
605
|
+
uuid,
|
|
606
|
+
meta.get("author", ""),
|
|
607
|
+
meta.get("community", ""),
|
|
608
|
+
fmt,
|
|
609
|
+
)
|
|
610
|
+
final_text = banner + masked_text
|
|
611
|
+
body_bytes = final_text.encode("utf-8")
|
|
612
|
+
|
|
613
|
+
if len(body_bytes) > MAX_RESPONSE_BYTES:
|
|
614
|
+
_fail(
|
|
615
|
+
"SIZE_EXCEEDED",
|
|
616
|
+
f"Rendered body exceeded {MAX_RESPONSE_BYTES} bytes",
|
|
617
|
+
)
|
|
618
|
+
|
|
619
|
+
title = meta.get("title") or "thread"
|
|
620
|
+
|
|
621
|
+
# Step 4: stdout mode writes body to stdout, metadata to stderr.
|
|
622
|
+
if use_stdout:
|
|
623
|
+
try:
|
|
624
|
+
sys.stdout.buffer.write(body_bytes)
|
|
625
|
+
sys.stdout.flush()
|
|
626
|
+
except BrokenPipeError:
|
|
627
|
+
pass
|
|
628
|
+
summary = {
|
|
629
|
+
"path": None,
|
|
630
|
+
"bytes": len(body_bytes),
|
|
631
|
+
"uuid": uuid,
|
|
632
|
+
"format": fmt,
|
|
633
|
+
"title": title,
|
|
634
|
+
}
|
|
635
|
+
sys.stderr.write(json.dumps(summary) + "\n")
|
|
636
|
+
sys.stderr.flush()
|
|
637
|
+
return
|
|
638
|
+
|
|
639
|
+
# Step 5: resolve the target path and write atomically.
|
|
640
|
+
cwd = pathlib.Path(os.getcwd()).resolve()
|
|
641
|
+
target = resolve_output_path(out_flag, title, uuid, fmt, cwd)
|
|
642
|
+
|
|
643
|
+
atomic_write(target, body_bytes, overwrite=overwrite)
|
|
644
|
+
|
|
645
|
+
summary = {
|
|
646
|
+
"path": str(target),
|
|
647
|
+
"bytes": len(body_bytes),
|
|
648
|
+
"uuid": uuid,
|
|
649
|
+
"format": fmt,
|
|
650
|
+
"title": title,
|
|
651
|
+
}
|
|
652
|
+
sys.stdout.write(json.dumps(summary) + "\n")
|
|
653
|
+
sys.stdout.flush()
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
if __name__ == "__main__":
|
|
657
|
+
try:
|
|
658
|
+
main()
|
|
659
|
+
except SystemExit:
|
|
660
|
+
raise
|
|
661
|
+
except KeyboardInterrupt:
|
|
662
|
+
_emit_error("INTERRUPTED", "Interrupted by user")
|
|
663
|
+
sys.exit(130)
|
|
664
|
+
except Exception as exc: # pragma: no cover - last-chance safety net
|
|
665
|
+
_emit_error("INTERNAL_ERROR", f"{type(exc).__name__}: {exc}")
|
|
666
|
+
sys.exit(1)
|