@openthread/claude-code-plugin 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,92 @@
1
+ """Normalization and control-char stripping. Used by mask.py and by
2
+ inbound content paths like /ot:import that must defang terminal escapes.
3
+
4
+ This module is a Python port of the normalization prelude of
5
+ ``apps/api/src/lib/privacy-mask.ts``. The semantics are intentionally
6
+ kept in lock-step with the TypeScript source so that masked output
7
+ produced by the plugin matches output produced by the server.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import re
13
+ import unicodedata
14
+ import urllib.parse
15
+
16
+ # C0 + C1 control chars except TAB (\t) and LF (\n). DEL (0x7F) is
17
+ # stripped because it is a terminal control code with no textual use.
18
+ _CONTROL_RE = re.compile(r"[\x00-\x08\x0b-\x1f\x7f-\x9f]")
19
+
20
+ # Zero-width joiners, bidi overrides, soft hyphens, BOM — everything
21
+ # that can smuggle data past a naive regex matcher.
22
+ _ZW_BIDI_RE = re.compile(
23
+ "["
24
+ "\u00ad" # SOFT HYPHEN
25
+ "\u200b-\u200f" # ZWSP, ZWNJ, ZWJ, LRM, RLM
26
+ "\u2028-\u202f" # LS, PS, LRE..RLO, narrow NBSP
27
+ "\u2060-\u206f" # word-joiner, invisible times/plus, etc.
28
+ "\ufeff" # BOM / ZWNBSP
29
+ "]"
30
+ )
31
+
32
+ # ANSI CSI: ESC [ ... <letter>
33
+ _ANSI_CSI_RE = re.compile(r"\x1b\[[0-9;?]*[a-zA-Z]")
34
+
35
+ # ANSI OSC: ESC ] ... (BEL | ESC \)
36
+ _ANSI_OSC_RE = re.compile(r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)")
37
+
38
+ # Matches %xx escape sequences for single-escape decoding.
39
+ _PERCENT_ESCAPE_RE = re.compile(r"%[0-9a-fA-F]{2}")
40
+
41
+
42
+ def strip_ansi(text: str) -> str:
43
+ """Remove ANSI CSI and OSC escape sequences entirely."""
44
+ text = _ANSI_OSC_RE.sub("", text)
45
+ text = _ANSI_CSI_RE.sub("", text)
46
+ return text
47
+
48
+
49
+ def strip_controls(text: str) -> str:
50
+ """Strip ANSI escapes, zero-width / bidi chars, and other control
51
+ characters. TAB and LF are preserved so code blocks survive."""
52
+ text = strip_ansi(text)
53
+ text = _ZW_BIDI_RE.sub("", text)
54
+ text = _CONTROL_RE.sub("", text)
55
+ return text
56
+
57
+
58
+ def _safe_decode_uri(text: str) -> str:
59
+ """Decode each ``%xx`` independently. A single malformed sequence
60
+ must not disable normalization for the whole string.
61
+
62
+ Mirrors the ``safeDecodeURI`` helper in the TypeScript source.
63
+ """
64
+
65
+ def _sub(match: "re.Match[str]") -> str:
66
+ token = match.group(0)
67
+ try:
68
+ return urllib.parse.unquote(token, errors="strict")
69
+ except (UnicodeDecodeError, ValueError):
70
+ return token
71
+
72
+ return _PERCENT_ESCAPE_RE.sub(_sub, text)
73
+
74
+
75
+ def normalize(text: str) -> str:
76
+ """Normalize so bypass classes disappear before regex matching.
77
+
78
+ Order matters and must match the TypeScript pipeline:
79
+ 1. NFC (combining marks collapsed into precomposed forms)
80
+ 2. strip zero-width / bidi / control chars (keeps TAB + LF)
81
+ 3. unescape JSON-style ``\\/`` slashes (leave ``\\\\`` alone so
82
+ Windows UNC paths survive the path-masking step)
83
+ 4. URL-decode ``%xx`` escapes, tolerant of malformed encoding
84
+ """
85
+ if not isinstance(text, str) or not text:
86
+ return text
87
+ out = unicodedata.normalize("NFC", text)
88
+ out = _ZW_BIDI_RE.sub("", out)
89
+ out = _CONTROL_RE.sub("", out)
90
+ out = out.replace("\\/", "/")
91
+ out = _safe_decode_uri(out)
92
+ return out
@@ -0,0 +1,218 @@
1
+ #!/usr/bin/env python3
2
+ """Search client for /ot:search.
3
+
4
+ Calls ``GET $API_BASE/search`` (where ``$API_BASE`` already points at the
5
+ OpenThread API root, e.g. ``https://openthread.me/api``) and emits a
6
+ plugin-consumable JSON response to stdout with every string field routed
7
+ through ``sanitize.strip_controls`` so the terminal cannot be tricked by
8
+ ANSI escapes, zero-width characters, or C0/C1 control bytes embedded in
9
+ user-generated content.
10
+
11
+ No external dependencies — stdlib only.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import os
18
+ import sys
19
+ import urllib.error
20
+ import urllib.parse
21
+ import urllib.request
22
+
23
+ # Allow importing sanitize from the same lib dir regardless of cwd.
24
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
25
+
26
+ from sanitize import strip_controls # noqa: E402
27
+
28
+
29
+ API_BASE = os.environ.get("API_BASE", "https://openthread.me/api")
30
+ QUERY = os.environ.get("QUERY", "").strip()
31
+ TYPE = os.environ.get("TYPE", "posts")
32
+ COMMUNITY = os.environ.get("COMMUNITY", "")
33
+ PROVIDER = os.environ.get("PROVIDER", "")
34
+ TIME_RANGE = os.environ.get("TIME_RANGE", "")
35
+ LIMIT = os.environ.get("LIMIT", "10")
36
+ TOKEN = os.environ.get("TOKEN", "")
37
+
38
+ _VALID_TYPES = {"posts", "comments", "communities", "users", "all"}
39
+
40
+
41
+ def _emit_error(code: str, message: str, exit_code: int = 1) -> None:
42
+ """Write a JSON error object to stderr and exit."""
43
+ json.dump({"error": code, "message": message}, sys.stderr, ensure_ascii=False)
44
+ sys.stderr.write("\n")
45
+ sys.exit(exit_code)
46
+
47
+
48
+ def _clean(text: object) -> str:
49
+ """Coerce ``text`` to a sanitized string. Non-strings become ``""``."""
50
+ if not isinstance(text, str):
51
+ return ""
52
+ # Strip server-side highlight markers before control-char stripping.
53
+ text = text.replace("<mark>", "").replace("</mark>", "")
54
+ return strip_controls(text).strip()
55
+
56
+
57
+ def _int(value: object) -> int:
58
+ """Coerce ``value`` to int; non-numeric becomes 0."""
59
+ if isinstance(value, bool):
60
+ return int(value)
61
+ if isinstance(value, int):
62
+ return value
63
+ if isinstance(value, float):
64
+ return int(value)
65
+ if isinstance(value, str):
66
+ try:
67
+ return int(value)
68
+ except ValueError:
69
+ return 0
70
+ return 0
71
+
72
+
73
+ def build_url() -> str:
74
+ """Build the GET /search URL with all optional filters."""
75
+ params: dict[str, str] = {"q": QUERY, "type": TYPE, "limit": LIMIT}
76
+ if COMMUNITY:
77
+ params["community"] = COMMUNITY
78
+ if PROVIDER:
79
+ params["provider"] = PROVIDER
80
+ if TIME_RANGE:
81
+ params["time"] = TIME_RANGE
82
+ qs = urllib.parse.urlencode(params, quote_via=urllib.parse.quote)
83
+ base = API_BASE.rstrip("/")
84
+ return f"{base}/search?{qs}"
85
+
86
+
87
+ def fetch(url: str) -> dict:
88
+ """GET ``url`` with the optional bearer token, parse the JSON body."""
89
+ req = urllib.request.Request(url)
90
+ req.add_header("Accept", "application/json")
91
+ if TOKEN:
92
+ req.add_header("Authorization", f"Bearer {TOKEN}")
93
+
94
+ try:
95
+ with urllib.request.urlopen(req, timeout=10) as resp:
96
+ raw = resp.read().decode("utf-8", errors="replace")
97
+ except urllib.error.HTTPError as e:
98
+ code_map = {
99
+ 400: "BAD_REQUEST",
100
+ 401: "UNAUTHORIZED",
101
+ 403: "FORBIDDEN",
102
+ 404: "NOT_FOUND",
103
+ 422: "VALIDATION_ERROR",
104
+ 429: "RATE_LIMITED",
105
+ 500: "SERVER_ERROR",
106
+ 502: "BAD_GATEWAY",
107
+ 503: "SERVICE_UNAVAILABLE",
108
+ 504: "GATEWAY_TIMEOUT",
109
+ }
110
+ _emit_error(
111
+ code_map.get(e.code, "HTTP_ERROR"),
112
+ f"HTTP {e.code}: {e.reason}",
113
+ )
114
+ except urllib.error.URLError as e:
115
+ _emit_error("NETWORK_ERROR", f"Failed to reach {API_BASE}: {e.reason}")
116
+ except TimeoutError:
117
+ _emit_error("TIMEOUT", f"Request to {API_BASE} timed out")
118
+
119
+ try:
120
+ return json.loads(raw)
121
+ except json.JSONDecodeError:
122
+ _emit_error("BAD_RESPONSE", "Server returned invalid JSON")
123
+ return {} # unreachable
124
+
125
+
126
+ def normalize_result(type_: str, item: dict) -> dict:
127
+ """Convert a server result into the plugin-consumable shape."""
128
+ if not isinstance(item, dict):
129
+ return {}
130
+ highlights = item.get("highlights")
131
+ first_highlight = ""
132
+ if isinstance(highlights, list) and highlights:
133
+ first_highlight = highlights[0] if isinstance(highlights[0], str) else ""
134
+ snippet_source = first_highlight or item.get("body") or ""
135
+ return {
136
+ "id": _clean(item.get("id")),
137
+ "type": type_,
138
+ "title": _clean(item.get("title") or item.get("name") or item.get("username")),
139
+ "community": _clean(item.get("communityName") or item.get("name")),
140
+ "author": _clean(item.get("authorUsername") or item.get("username")),
141
+ "voteScore": _int(item.get("voteScore")),
142
+ "commentCount": _int(item.get("commentCount")),
143
+ "provider": _clean(item.get("provider")),
144
+ "createdAt": _clean(item.get("createdAt")),
145
+ "snippet": _clean(snippet_source)[:320],
146
+ }
147
+
148
+
149
+ def _bucket_for_type(type_: str) -> list[str]:
150
+ if type_ == "all":
151
+ return ["posts", "comments", "communities", "users"]
152
+ return [type_]
153
+
154
+
155
+ _SINGULAR = {
156
+ "posts": "post",
157
+ "comments": "comment",
158
+ "communities": "community",
159
+ "users": "user",
160
+ }
161
+
162
+
163
+ def main() -> None:
164
+ if not QUERY:
165
+ _emit_error("MISSING_QUERY", "Query is required", 2)
166
+ if TYPE not in _VALID_TYPES:
167
+ _emit_error(
168
+ "INVALID_TYPE",
169
+ f"--type must be one of {sorted(_VALID_TYPES)}, got: {TYPE}",
170
+ 2,
171
+ )
172
+ try:
173
+ limit_int = int(LIMIT)
174
+ except ValueError:
175
+ _emit_error("INVALID_LIMIT", f"--limit must be an integer, got: {LIMIT}", 2)
176
+ return
177
+ if limit_int < 1 or limit_int > 25:
178
+ _emit_error("INVALID_LIMIT", "--limit must be between 1 and 25", 2)
179
+
180
+ url = build_url()
181
+ resp = fetch(url)
182
+
183
+ data = resp.get("data") or {}
184
+ raw_counts = resp.get("counts") or {}
185
+ counts = {
186
+ "posts": _int(raw_counts.get("posts")),
187
+ "comments": _int(raw_counts.get("comments")),
188
+ "communities": _int(raw_counts.get("communities")),
189
+ "users": _int(raw_counts.get("users")),
190
+ }
191
+ total = _int(resp.get("total"))
192
+
193
+ results: list[dict] = []
194
+ for bucket in _bucket_for_type(TYPE):
195
+ singular = _SINGULAR[bucket]
196
+ items = data.get(bucket) if isinstance(data, dict) else None
197
+ if not isinstance(items, list):
198
+ continue
199
+ for item in items:
200
+ normalized = normalize_result(singular, item)
201
+ if normalized.get("id"):
202
+ results.append(normalized)
203
+
204
+ json.dump(
205
+ {
206
+ "query": _clean(QUERY),
207
+ "total": total,
208
+ "counts": counts,
209
+ "results": results,
210
+ },
211
+ sys.stdout,
212
+ ensure_ascii=False,
213
+ )
214
+ sys.stdout.write("\n")
215
+
216
+
217
+ if __name__ == "__main__":
218
+ main()
@@ -0,0 +1,156 @@
1
+ """Render a thread JSON object to Markdown.
2
+
3
+ This is the Python counterpart to
4
+ ``packages/thread-parser/src/serializers/markdown.ts`` but is used on
5
+ the inbound (/ot:import) path. It only handles layout; callers must
6
+ already have sanitized and masked every string field before invoking
7
+ ``render_thread`` — see ``lib/import_client.py``.
8
+
9
+ Shape expected:
10
+
11
+ .. code-block:: python
12
+
13
+ {
14
+ "provider": "claude",
15
+ "model": "claude-sonnet-4.5",
16
+ "messages": [
17
+ {
18
+ "role": "user" | "assistant" | "system" | "tool",
19
+ "sequenceNum": 0,
20
+ "blocks": [ ... ContentBlock union ... ],
21
+ "tokenCount": 1234,
22
+ "model": "claude-sonnet-4.5"
23
+ },
24
+ ...
25
+ ]
26
+ }
27
+
28
+ Every ContentBlock variant defined in
29
+ ``packages/validators/src/thread.ts`` is covered:
30
+ ``text``, ``code``, ``thinking``, ``tool_use``, ``tool_result``,
31
+ ``image``, ``file``, ``artifact``, ``error``, ``math``.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import json
37
+ from typing import Any
38
+
39
+
40
+ def _as_str(value: Any, default: str = "") -> str:
41
+ if value is None:
42
+ return default
43
+ if isinstance(value, str):
44
+ return value
45
+ return str(value)
46
+
47
+
48
+ def _role_label(role: str) -> str:
49
+ if not role:
50
+ return "Unknown"
51
+ return role[:1].upper() + role[1:]
52
+
53
+
54
+ def render_block(block: dict) -> str:
55
+ """Render a single ContentBlock to markdown. Unknown types return an
56
+ empty string so they can be filtered out at the caller."""
57
+ if not isinstance(block, dict):
58
+ return ""
59
+ btype = block.get("type", "")
60
+
61
+ if btype == "text":
62
+ return _as_str(block.get("content", ""))
63
+
64
+ if btype == "code":
65
+ language = _as_str(block.get("language", "text"), "text") or "text"
66
+ content = _as_str(block.get("content", ""))
67
+ filename = block.get("filename")
68
+ title_suffix = ""
69
+ if isinstance(filename, str) and filename:
70
+ title_suffix = f' title="{filename}"'
71
+ return f"```{language}{title_suffix}\n{content}\n```"
72
+
73
+ if btype == "thinking":
74
+ content = _as_str(block.get("content", ""))
75
+ prefixed = "\n".join(f"> {line}" for line in content.split("\n"))
76
+ return f"> [thinking]\n{prefixed}"
77
+
78
+ if btype == "tool_use":
79
+ tool_name = _as_str(block.get("toolName", ""))
80
+ raw_input = block.get("input", {})
81
+ try:
82
+ input_json = json.dumps(raw_input, indent=2, ensure_ascii=False)
83
+ except (TypeError, ValueError):
84
+ input_json = json.dumps(str(raw_input))
85
+ return f"**tool:** {tool_name}\n\n```json\n{input_json}\n```"
86
+
87
+ if btype == "tool_result":
88
+ content = _as_str(block.get("content", ""))
89
+ is_error = bool(block.get("isError", False))
90
+ header = "> [tool error]" if is_error else "> [tool result]"
91
+ prefixed = "\n".join(f"> {line}" for line in content.split("\n"))
92
+ return f"{header}\n{prefixed}"
93
+
94
+ if btype == "image":
95
+ alt = _as_str(block.get("alt", ""))
96
+ url = _as_str(block.get("url", ""))
97
+ label = alt or url or "image"
98
+ return f"[image: {label}]"
99
+
100
+ if btype == "file":
101
+ filename = _as_str(block.get("filename", "untitled"))
102
+ language = _as_str(block.get("language", "text"), "text") or "text"
103
+ content = _as_str(block.get("content", ""))
104
+ return f"### {filename}\n\n```{language}\n{content}\n```"
105
+
106
+ if btype == "artifact":
107
+ title = _as_str(block.get("title", "artifact"))
108
+ language = _as_str(block.get("language", "text"), "text") or "text"
109
+ content = _as_str(block.get("content", ""))
110
+ return f"**artifact:** {title}\n\n```{language}\n{content}\n```"
111
+
112
+ if btype == "error":
113
+ code = _as_str(block.get("code", ""))
114
+ message = _as_str(block.get("message", ""))
115
+ header = f"> [error {code}]" if code else "> [error]"
116
+ return f"{header}: {message}"
117
+
118
+ if btype == "math":
119
+ expression = _as_str(block.get("expression", ""))
120
+ display = bool(block.get("display", False))
121
+ return f"$$\n{expression}\n$$" if display else f"${expression}$"
122
+
123
+ return ""
124
+
125
+
126
+ def render_message(message: dict) -> str:
127
+ role = _role_label(_as_str(message.get("role", "user")))
128
+ seq = message.get("sequenceNum", 0)
129
+ try:
130
+ seq_int = int(seq)
131
+ except (TypeError, ValueError):
132
+ seq_int = 0
133
+
134
+ blocks_raw = message.get("blocks", [])
135
+ rendered_blocks: list[str] = []
136
+ if isinstance(blocks_raw, list):
137
+ for block in blocks_raw:
138
+ rendered = render_block(block)
139
+ if rendered:
140
+ rendered_blocks.append(rendered)
141
+
142
+ header = f"## {role} (msg #{seq_int})"
143
+ if not rendered_blocks:
144
+ return f"{header}\n"
145
+ body = "\n\n".join(rendered_blocks)
146
+ return f"{header}\n\n{body}\n\n---"
147
+
148
+
149
+ def render_thread(thread: dict) -> str:
150
+ """Render a full thread JSON to markdown. Strings must already be
151
+ sanitized + masked."""
152
+ messages = thread.get("messages") if isinstance(thread, dict) else None
153
+ if not isinstance(messages, list) or not messages:
154
+ return ""
155
+ parts = [render_message(m) for m in messages if isinstance(m, dict)]
156
+ return "\n\n".join(parts).rstrip() + "\n"