ghostwriter-cli 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ghostwriter/__init__.py +28 -0
- ghostwriter/__main__.py +5 -0
- ghostwriter/cleaner.py +130 -0
- ghostwriter/cli.py +500 -0
- ghostwriter/config.py +242 -0
- ghostwriter/ghost.py +126 -0
- ghostwriter/lexical.py +252 -0
- ghostwriter/normalize.py +23 -0
- ghostwriter/pipeline.py +386 -0
- ghostwriter/wechat.py +125 -0
- ghostwriter_cli-0.1.0.dist-info/METADATA +172 -0
- ghostwriter_cli-0.1.0.dist-info/RECORD +15 -0
- ghostwriter_cli-0.1.0.dist-info/WHEEL +4 -0
- ghostwriter_cli-0.1.0.dist-info/entry_points.txt +2 -0
- ghostwriter_cli-0.1.0.dist-info/licenses/LICENSE +21 -0
ghostwriter/config.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Configuration management.
|
|
2
|
+
|
|
3
|
+
Configuration is read from two sources, in priority order:
|
|
4
|
+
1. Environment variables (GHOSTWRITER_GHOST_*, GHOSTWRITER_WECHAT_*)
|
|
5
|
+
2. Config file (~/.config/ghostwriter/config.json)
|
|
6
|
+
|
|
7
|
+
Use `ghostwriter config set <key> <value>` to manage the config file.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import os
|
|
12
|
+
import sys
|
|
13
|
+
import textwrap
|
|
14
|
+
|
|
15
|
+
# All required config keys mapped to their env var names.
|
|
16
|
+
_REQUIRED_KEYS = {
|
|
17
|
+
"ghost.api_url": "GHOSTWRITER_GHOST_API_URL",
|
|
18
|
+
"ghost.admin_key_id": "GHOSTWRITER_GHOST_ADMIN_KEY_ID",
|
|
19
|
+
"ghost.admin_key": "GHOSTWRITER_GHOST_ADMIN_KEY",
|
|
20
|
+
"wechat.appid": "GHOSTWRITER_WECHAT_APPID",
|
|
21
|
+
"wechat.secret": "GHOSTWRITER_WECHAT_SECRET",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# Keys that should be masked when displayed.
|
|
25
|
+
_SECRET_KEYS = {
|
|
26
|
+
"ghost.admin_key_id",
|
|
27
|
+
"ghost.admin_key",
|
|
28
|
+
"wechat.appid",
|
|
29
|
+
"wechat.secret",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
_CONFIG_DIR = os.path.expanduser("~/.config/ghostwriter")
|
|
33
|
+
_CONFIG_PATH = os.path.join(_CONFIG_DIR, "config.json")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _nested_get(d, key_path):
|
|
37
|
+
"""Get a nested dict value by dot-separated key path.
|
|
38
|
+
|
|
39
|
+
Example: _nested_get(d, "ghost.api_url") → d["ghost"]["api_url"]
|
|
40
|
+
"""
|
|
41
|
+
parts = key_path.split(".")
|
|
42
|
+
for part in parts:
|
|
43
|
+
if not isinstance(d, dict) or part not in d:
|
|
44
|
+
return None
|
|
45
|
+
d = d[part]
|
|
46
|
+
return d
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _nested_set(d, key_path, value):
|
|
50
|
+
"""Set a nested dict value by dot-separated key path.
|
|
51
|
+
|
|
52
|
+
Example: _nested_set(d, "ghost.api_url", "https://...")
|
|
53
|
+
"""
|
|
54
|
+
parts = key_path.split(".")
|
|
55
|
+
for part in parts[:-1]:
|
|
56
|
+
if part not in d:
|
|
57
|
+
d[part] = {}
|
|
58
|
+
d = d[part]
|
|
59
|
+
d[parts[-1]] = value
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _load_from_env():
|
|
63
|
+
"""Try to build a config dict from environment variables.
|
|
64
|
+
|
|
65
|
+
Returns a config dict if ALL required env vars are set, otherwise None.
|
|
66
|
+
"""
|
|
67
|
+
config = {"ghost": {}, "wechat": {}}
|
|
68
|
+
for key_path, env_var in _REQUIRED_KEYS.items():
|
|
69
|
+
value = os.environ.get(env_var)
|
|
70
|
+
if not value:
|
|
71
|
+
return None
|
|
72
|
+
_nested_set(config, key_path, value)
|
|
73
|
+
return config
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _load_from_file():
|
|
77
|
+
"""Load config from the JSON file.
|
|
78
|
+
|
|
79
|
+
Returns the parsed dict, or None if the file doesn't exist or is invalid.
|
|
80
|
+
"""
|
|
81
|
+
if not os.path.exists(_CONFIG_PATH):
|
|
82
|
+
return None
|
|
83
|
+
try:
|
|
84
|
+
with open(_CONFIG_PATH, encoding="utf-8") as f:
|
|
85
|
+
return json.load(f)
|
|
86
|
+
except (json.JSONDecodeError, OSError):
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _validate_config(config, source):
|
|
91
|
+
"""Check that all required keys are present.
|
|
92
|
+
|
|
93
|
+
Prints a helpful error and exits if any keys are missing.
|
|
94
|
+
"""
|
|
95
|
+
missing = []
|
|
96
|
+
for key_path, env_var in _REQUIRED_KEYS.items():
|
|
97
|
+
if not _nested_get(config, key_path):
|
|
98
|
+
missing.append((key_path, env_var))
|
|
99
|
+
|
|
100
|
+
if not missing:
|
|
101
|
+
return
|
|
102
|
+
|
|
103
|
+
lines = [f"[!] 配置不完整 ({source}),缺少以下字段:"]
|
|
104
|
+
for key_path, env_var in missing:
|
|
105
|
+
lines.append(f" {key_path} (env: {env_var})")
|
|
106
|
+
lines.append("")
|
|
107
|
+
lines.append("设置方法:")
|
|
108
|
+
lines.append(" # 环境变量(推荐用于 CI/Docker):")
|
|
109
|
+
for _, env_var in missing:
|
|
110
|
+
lines.append(f" export {env_var}=<value>")
|
|
111
|
+
lines.append("")
|
|
112
|
+
lines.append(" # 或使用 CLI 写入配置文件:")
|
|
113
|
+
for key_path, _ in missing:
|
|
114
|
+
lines.append(f" ghostwriter config set {key_path} <value>")
|
|
115
|
+
lines.append("")
|
|
116
|
+
lines.append(f" # 配置文件位置: {_CONFIG_PATH}")
|
|
117
|
+
|
|
118
|
+
print("\n".join(lines))
|
|
119
|
+
sys.exit(1)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def load_config():
|
|
123
|
+
"""Load configuration, preferring env vars over the config file.
|
|
124
|
+
|
|
125
|
+
Returns a dict with shape:
|
|
126
|
+
{"ghost": {"api_url", "admin_key_id", "admin_key"},
|
|
127
|
+
"wechat": {"appid", "secret"}}
|
|
128
|
+
|
|
129
|
+
Exits with an error message if required keys are missing.
|
|
130
|
+
"""
|
|
131
|
+
config = _load_from_env()
|
|
132
|
+
if config is not None:
|
|
133
|
+
return config
|
|
134
|
+
|
|
135
|
+
config = _load_from_file()
|
|
136
|
+
if config is not None:
|
|
137
|
+
_validate_config(config, f"文件 {_CONFIG_PATH}")
|
|
138
|
+
return config
|
|
139
|
+
|
|
140
|
+
# No config source available
|
|
141
|
+
print(f"[!] 未找到配置")
|
|
142
|
+
print(f"")
|
|
143
|
+
print(f"配置文件路径: {_CONFIG_PATH}")
|
|
144
|
+
print(f"")
|
|
145
|
+
print(f"请选择一种方式配置:")
|
|
146
|
+
print(f"")
|
|
147
|
+
print(f" 方式 1 — 环境变量(适合 CI/Docker):")
|
|
148
|
+
for _, env_var in _REQUIRED_KEYS.items():
|
|
149
|
+
print(f" export {env_var}=<value>")
|
|
150
|
+
print(f"")
|
|
151
|
+
print(f" 方式 2 — 配置文件(适合本地使用):")
|
|
152
|
+
msg = " ghostwriter config set {key} <value>"
|
|
153
|
+
print(f" {msg.format(key='ghost.api_url')}")
|
|
154
|
+
print(f" {msg.format(key='ghost.admin_key_id')}")
|
|
155
|
+
print(f" {msg.format(key='ghost.admin_key')}")
|
|
156
|
+
print(f" {msg.format(key='wechat.appid')}")
|
|
157
|
+
print(f" {msg.format(key='wechat.secret')}")
|
|
158
|
+
sys.exit(1)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
# ── CLI config management ──────────────────────────────────────
|
|
162
|
+
|
|
163
|
+
def config_path():
|
|
164
|
+
"""Return the path to the config file."""
|
|
165
|
+
return _CONFIG_PATH
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def show_config():
|
|
169
|
+
"""Print the current effective configuration.
|
|
170
|
+
|
|
171
|
+
Secrets (admin_key, appid, secret) are masked.
|
|
172
|
+
"""
|
|
173
|
+
print(f"配置文件: {_CONFIG_PATH}")
|
|
174
|
+
|
|
175
|
+
# Show env vars if they're set (primary source)
|
|
176
|
+
env_config = _load_from_env()
|
|
177
|
+
if env_config:
|
|
178
|
+
print("来源: 环境变量")
|
|
179
|
+
source = "env"
|
|
180
|
+
config = env_config
|
|
181
|
+
else:
|
|
182
|
+
config = _load_from_file()
|
|
183
|
+
if config:
|
|
184
|
+
print("来源: 配置文件")
|
|
185
|
+
source = "file"
|
|
186
|
+
else:
|
|
187
|
+
print("来源: (无)")
|
|
188
|
+
print("")
|
|
189
|
+
print("未设置任何配置。使用以下命令开始:")
|
|
190
|
+
print(" ghostwriter config set ghost.api_url <url>")
|
|
191
|
+
return
|
|
192
|
+
|
|
193
|
+
print("")
|
|
194
|
+
for key_path in _REQUIRED_KEYS:
|
|
195
|
+
value = _nested_get(config, key_path)
|
|
196
|
+
if value and key_path in _SECRET_KEYS:
|
|
197
|
+
# Mask secret values: show first 4 + last 4 chars
|
|
198
|
+
if len(value) > 8:
|
|
199
|
+
value = value[:4] + "***" + value[-4:]
|
|
200
|
+
else:
|
|
201
|
+
value = "***"
|
|
202
|
+
elif not value:
|
|
203
|
+
value = "(未设置)"
|
|
204
|
+
print(f" {key_path} = {value}")
|
|
205
|
+
|
|
206
|
+
# Show optional authors map
|
|
207
|
+
authors = config.get("authors", {})
|
|
208
|
+
if authors:
|
|
209
|
+
print("")
|
|
210
|
+
for slug, author_id in authors.items():
|
|
211
|
+
print(f" authors.{slug} = {author_id}")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def set_config_value(key_path, value):
|
|
215
|
+
"""Write a single key to the config file.
|
|
216
|
+
|
|
217
|
+
Creates the config directory and file if they don't exist.
|
|
218
|
+
Preserves any existing keys in the file.
|
|
219
|
+
"""
|
|
220
|
+
if key_path not in _REQUIRED_KEYS and not key_path.startswith("authors."):
|
|
221
|
+
valid = "\n".join(f" {k}" for k in _REQUIRED_KEYS)
|
|
222
|
+
valid += "\n authors.<slug> (可选) 作者 slug → ID 映射"
|
|
223
|
+
print(f"[!] 未知的配置键: {key_path}")
|
|
224
|
+
print(f"有效的键:\n{valid}")
|
|
225
|
+
sys.exit(1)
|
|
226
|
+
|
|
227
|
+
# Load existing or start fresh
|
|
228
|
+
config = _load_from_file()
|
|
229
|
+
if config is None:
|
|
230
|
+
config = {"ghost": {}, "wechat": {}}
|
|
231
|
+
|
|
232
|
+
_nested_set(config, key_path, value)
|
|
233
|
+
|
|
234
|
+
# Write back
|
|
235
|
+
os.makedirs(_CONFIG_DIR, exist_ok=True)
|
|
236
|
+
with open(_CONFIG_PATH, "w", encoding="utf-8") as f:
|
|
237
|
+
json.dump(config, f, indent=2, ensure_ascii=False)
|
|
238
|
+
|
|
239
|
+
display = value
|
|
240
|
+
if key_path in _SECRET_KEYS and len(value) > 8:
|
|
241
|
+
display = value[:4] + "***" + value[-4:]
|
|
242
|
+
print(f"[+] {key_path} = {display}")
|
ghostwriter/ghost.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
"""Ghost Admin API client.
|
|
2
|
+
|
|
3
|
+
Handles JWT authentication, CRUD operations, image upload, and author
|
|
4
|
+
lookup via the Ghost Admin and Content APIs.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
import jwt
|
|
11
|
+
import requests
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_ghost_token(key_id, key_secret):
|
|
15
|
+
"""Create a short-lived JWT for the Ghost Admin API.
|
|
16
|
+
|
|
17
|
+
The secret must be the raw hex string from Ghost's integration page.
|
|
18
|
+
It is decoded from hex to bytes before signing.
|
|
19
|
+
"""
|
|
20
|
+
secret_bytes = bytes.fromhex(key_secret)
|
|
21
|
+
now = int(time.time())
|
|
22
|
+
payload = {
|
|
23
|
+
"aud": "/admin/",
|
|
24
|
+
"iat": now,
|
|
25
|
+
"exp": now + 300,
|
|
26
|
+
"type": "admin",
|
|
27
|
+
}
|
|
28
|
+
header = {"alg": "HS256", "typ": "JWT", "kid": key_id}
|
|
29
|
+
return jwt.encode(payload, secret_bytes, algorithm="HS256", headers=header)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def _api_request(method, path, config, data=None):
|
|
33
|
+
"""Low-level Ghost Admin API request."""
|
|
34
|
+
key_id = config["ghost"]["admin_key_id"]
|
|
35
|
+
key_secret = config["ghost"]["admin_key"]
|
|
36
|
+
api_url = config["ghost"]["api_url"]
|
|
37
|
+
token = get_ghost_token(key_id, key_secret)
|
|
38
|
+
|
|
39
|
+
headers = {"Authorization": f"Ghost {token}"}
|
|
40
|
+
kwargs = {"headers": headers, "timeout": 30}
|
|
41
|
+
|
|
42
|
+
if data is not None:
|
|
43
|
+
kwargs["headers"]["Content-Type"] = "application/json"
|
|
44
|
+
kwargs["data"] = json.dumps(data, ensure_ascii=False).encode("utf-8")
|
|
45
|
+
|
|
46
|
+
url = f"{api_url.rstrip('/')}/{path.lstrip('/')}"
|
|
47
|
+
r = requests.request(method, url, **kwargs)
|
|
48
|
+
r.raise_for_status()
|
|
49
|
+
return r.json() if r.text else {}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def ghost_api_get(path, config):
|
|
53
|
+
"""GET from Ghost Admin API."""
|
|
54
|
+
return _api_request("GET", path, config)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def ghost_api_post(path, data, config):
|
|
58
|
+
"""POST to Ghost Admin API."""
|
|
59
|
+
return _api_request("POST", path, config, data=data)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def ghost_api_put(path, data, config):
|
|
63
|
+
"""PUT to Ghost Admin API."""
|
|
64
|
+
return _api_request("PUT", path, config, data=data)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def ghost_api_delete(path, config):
|
|
68
|
+
"""DELETE from Ghost Admin API."""
|
|
69
|
+
return _api_request("DELETE", path, config)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def upload_image_to_ghost(config, image_path):
|
|
73
|
+
"""Upload an image file to Ghost, returning the public URL."""
|
|
74
|
+
import os
|
|
75
|
+
|
|
76
|
+
if not os.path.isfile(image_path):
|
|
77
|
+
raise FileNotFoundError(f"封面图片不存在: {image_path}")
|
|
78
|
+
|
|
79
|
+
key_id = config["ghost"]["admin_key_id"]
|
|
80
|
+
key_secret = config["ghost"]["admin_key"]
|
|
81
|
+
api_url = config["ghost"]["api_url"]
|
|
82
|
+
token = get_ghost_token(key_id, key_secret)
|
|
83
|
+
|
|
84
|
+
with open(image_path, "rb") as f:
|
|
85
|
+
# Detect MIME type from file extension
|
|
86
|
+
ext = os.path.splitext(image_path)[1].lower()
|
|
87
|
+
mime_map = {
|
|
88
|
+
".jpg": "image/jpeg", ".jpeg": "image/jpeg",
|
|
89
|
+
".png": "image/png", ".gif": "image/gif",
|
|
90
|
+
".webp": "image/webp",
|
|
91
|
+
}
|
|
92
|
+
content_type = mime_map.get(ext, "image/png")
|
|
93
|
+
r = requests.post(
|
|
94
|
+
f"{api_url}/ghost/api/admin/images/upload/",
|
|
95
|
+
files={"file": (os.path.basename(image_path), f, content_type)},
|
|
96
|
+
headers={"Authorization": f"Ghost {token}"},
|
|
97
|
+
timeout=60,
|
|
98
|
+
)
|
|
99
|
+
r.raise_for_status()
|
|
100
|
+
result = r.json()
|
|
101
|
+
return result["images"][0]["url"]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_ghost_posts(config, limit=20, status="all"):
|
|
105
|
+
"""List Ghost posts via Admin API."""
|
|
106
|
+
return ghost_api_get(f"/ghost/api/admin/posts/?limit={limit}&status={status}", config)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def get_ghost_article(article_id, config):
|
|
110
|
+
"""Fetch a single Ghost article with HTML rendering."""
|
|
111
|
+
return ghost_api_get(f"/ghost/api/admin/posts/{article_id}/?formats=html", config)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_ghost_authors(config):
|
|
115
|
+
"""Fetch authors via Ghost Content API (no auth needed)."""
|
|
116
|
+
try:
|
|
117
|
+
api_url = config["ghost"]["api_url"]
|
|
118
|
+
r = requests.get(
|
|
119
|
+
f"{api_url}/ghost/api/content/authors/?limit=50",
|
|
120
|
+
timeout=10,
|
|
121
|
+
)
|
|
122
|
+
if r.ok:
|
|
123
|
+
return r.json()
|
|
124
|
+
except Exception:
|
|
125
|
+
pass
|
|
126
|
+
return {"authors": []}
|
ghostwriter/lexical.py
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
"""Markdown to Ghost Lexical JSON converter.
|
|
2
|
+
|
|
3
|
+
Converts Markdown text to Ghost's Lexical editor format (JSON). The output
|
|
4
|
+
can be posted directly to the Ghost Admin API and is editable in Ghost's
|
|
5
|
+
editor afterward.
|
|
6
|
+
|
|
7
|
+
Supported Markdown:
|
|
8
|
+
- Headings (h1-h6)
|
|
9
|
+
- Paragraphs, bold (**), italic (*), inline code (`)
|
|
10
|
+
- Links [text](url)
|
|
11
|
+
- Fenced code blocks with language tags
|
|
12
|
+
- Tables (converted to html-card nodes)
|
|
13
|
+
- Ordered and unordered lists
|
|
14
|
+
- Horizontal rules (---, ***, ___)
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import re
|
|
19
|
+
|
|
20
|
+
# Lexical format constants
|
|
21
|
+
_LEXICAL_FORMATS = {1: "bold", 2: "italic", 8: "code"}
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _lex_text(text, fmt=0):
|
|
25
|
+
return {
|
|
26
|
+
"type": "extended-text", "text": text, "format": fmt,
|
|
27
|
+
"version": 1, "detail": 0, "style": "", "mode": "normal",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _lex_link(text, url):
|
|
32
|
+
return {
|
|
33
|
+
"type": "link", "url": url,
|
|
34
|
+
"children": [_lex_text(text)],
|
|
35
|
+
"format": 0, "version": 1, "detail": 0, "style": "",
|
|
36
|
+
"mode": "normal", "rel": None, "target": None, "title": None,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _lex_para(children):
|
|
41
|
+
return {
|
|
42
|
+
"type": "paragraph", "children": children,
|
|
43
|
+
"format": "", "indent": 0, "version": 1, "direction": "ltr",
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _lex_heading(tag, children):
|
|
48
|
+
return {
|
|
49
|
+
"type": "extended-heading", "tag": tag, "children": children,
|
|
50
|
+
"format": "", "indent": 0, "version": 1, "direction": "ltr",
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _lex_codeblock(code, lang=""):
|
|
55
|
+
return {
|
|
56
|
+
"type": "codeblock", "code": code, "language": lang,
|
|
57
|
+
"caption": "", "version": 1,
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def _lex_hr():
|
|
62
|
+
return {"type": "horizontalrule", "version": 1}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _lex_listitem(children):
|
|
66
|
+
return {
|
|
67
|
+
"type": "listitem", "children": children,
|
|
68
|
+
"format": "", "indent": 0, "value": 1, "version": 1,
|
|
69
|
+
"direction": "ltr",
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def _lex_list(items, ordered=False):
|
|
74
|
+
return {
|
|
75
|
+
"type": "list",
|
|
76
|
+
"listType": "number" if ordered else "bullet",
|
|
77
|
+
"start": 1, "children": items,
|
|
78
|
+
"format": "", "indent": 0, "version": 1, "direction": "ltr",
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _lex_html_card(html):
|
|
83
|
+
return {"type": "html", "html": html, "version": 1}
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _extract_fenced_codes(md_text):
|
|
87
|
+
"""Extract fenced code blocks to placeholders.
|
|
88
|
+
|
|
89
|
+
Returns (processed_text, code_map) where code_map maps placeholder
|
|
90
|
+
keys to {"lang": ..., "code": ...} dicts.
|
|
91
|
+
"""
|
|
92
|
+
code_map = {}
|
|
93
|
+
idx = 0
|
|
94
|
+
|
|
95
|
+
def _save(m):
|
|
96
|
+
nonlocal idx
|
|
97
|
+
lang = m.group(1) or ""
|
|
98
|
+
code = m.group(2).rstrip("\n")
|
|
99
|
+
code_map[f"__CB_{idx}__"] = {"lang": lang, "code": code}
|
|
100
|
+
r = f"\n__CB_{idx}__\n"
|
|
101
|
+
idx += 1
|
|
102
|
+
return r
|
|
103
|
+
|
|
104
|
+
text = re.sub(
|
|
105
|
+
r'```(\w*)\n(.*?)```', _save, md_text,
|
|
106
|
+
flags=re.DOTALL,
|
|
107
|
+
)
|
|
108
|
+
return text, code_map
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _parse_inline(text):
|
|
112
|
+
"""Parse inline formatting: **bold**, `code`, *italic*, [links](url)."""
|
|
113
|
+
children = []
|
|
114
|
+
last = 0
|
|
115
|
+
pattern = (
|
|
116
|
+
r'\*\*(.+?)\*\*|'
|
|
117
|
+
r'(`[^`]+`)|'
|
|
118
|
+
r'(\*(.+?)\*)|'
|
|
119
|
+
r'(\[([^\]]+)\]\(([^)]+)\))'
|
|
120
|
+
)
|
|
121
|
+
for m in re.finditer(pattern, text):
|
|
122
|
+
s, e = m.start(), m.end()
|
|
123
|
+
if s > last:
|
|
124
|
+
children.append(_lex_text(text[last:s]))
|
|
125
|
+
if m.group(1): # **bold**
|
|
126
|
+
children.append(_lex_text(m.group(1), 1))
|
|
127
|
+
elif m.group(2): # `code`
|
|
128
|
+
children.append(_lex_text(m.group(2).strip('`'), 8))
|
|
129
|
+
elif m.group(3): # *italic*
|
|
130
|
+
children.append(_lex_text(m.group(4), 2))
|
|
131
|
+
elif m.group(5): # [text](url)
|
|
132
|
+
children.append(_lex_link(m.group(6), m.group(7)))
|
|
133
|
+
last = e
|
|
134
|
+
if last < len(text):
|
|
135
|
+
children.append(_lex_text(text[last:]))
|
|
136
|
+
return children if children else [_lex_text("")]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _table_to_html(raw):
|
|
140
|
+
"""Convert a Markdown table to an HTML <table> string."""
|
|
141
|
+
rows = [
|
|
142
|
+
r for r in raw.split('\n')
|
|
143
|
+
if r.strip() and not re.match(r'^\|[\s:-]+\|', r)
|
|
144
|
+
]
|
|
145
|
+
parts = ['<table>']
|
|
146
|
+
for i, row in enumerate(rows):
|
|
147
|
+
tag = 'th' if i == 0 else 'td'
|
|
148
|
+
cells = [c.strip() for c in row.split('|')[1:-1]]
|
|
149
|
+
parts.append('<tr>')
|
|
150
|
+
for cell in cells:
|
|
151
|
+
ch = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', cell)
|
|
152
|
+
ch = re.sub(r'`([^`]+)`', r'<code>\1</code>', ch)
|
|
153
|
+
parts.append(f'<{tag}>{ch}</{tag}>')
|
|
154
|
+
parts.append('</tr>')
|
|
155
|
+
parts.append('</table>')
|
|
156
|
+
return '\n'.join(parts)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def md_to_ghost_lexical(md_text):
|
|
160
|
+
"""Convert Markdown text to Ghost Lexical JSON.
|
|
161
|
+
|
|
162
|
+
Supported Markdown elements:
|
|
163
|
+
- Headings (h1-h6 via # prefix)
|
|
164
|
+
- Paragraphs
|
|
165
|
+
- Fenced code blocks (```lang ... ```)
|
|
166
|
+
- Tables (pipe syntax)
|
|
167
|
+
- Ordered and unordered lists
|
|
168
|
+
- Horizontal rules (---, ***, ___)
|
|
169
|
+
- Inline: bold, italic, code, links
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
(title: str, lexical_json: str) — title extracted from the first
|
|
173
|
+
h1 heading (or empty), and a JSON string of the Lexical document.
|
|
174
|
+
"""
|
|
175
|
+
lines = md_text.split('\n')
|
|
176
|
+
title = ""
|
|
177
|
+
if lines and lines[0].startswith('# '):
|
|
178
|
+
title = lines[0][2:].strip()
|
|
179
|
+
lines = lines[1:]
|
|
180
|
+
content = '\n'.join(lines)
|
|
181
|
+
|
|
182
|
+
content, code_map = _extract_fenced_codes(content)
|
|
183
|
+
raw_blocks = re.split(r'\n{2,}', content)
|
|
184
|
+
|
|
185
|
+
children = []
|
|
186
|
+
for raw in raw_blocks:
|
|
187
|
+
raw = raw.strip()
|
|
188
|
+
if not raw:
|
|
189
|
+
continue
|
|
190
|
+
|
|
191
|
+
cm = re.match(r'^__CB_(\d+)__$', raw)
|
|
192
|
+
if cm:
|
|
193
|
+
cb = code_map.get(raw)
|
|
194
|
+
if cb:
|
|
195
|
+
children.append(_lex_codeblock(cb["code"], cb["lang"]))
|
|
196
|
+
continue
|
|
197
|
+
|
|
198
|
+
if re.match(r'^[-*_]{3,}\s*$', raw):
|
|
199
|
+
children.append(_lex_hr())
|
|
200
|
+
continue
|
|
201
|
+
|
|
202
|
+
hm = re.match(r'^(#{1,6})\s+(.+)$', raw)
|
|
203
|
+
if hm:
|
|
204
|
+
children.append(
|
|
205
|
+
_lex_heading(f"h{len(hm.group(1))}",
|
|
206
|
+
_parse_inline(hm.group(2)))
|
|
207
|
+
)
|
|
208
|
+
continue
|
|
209
|
+
|
|
210
|
+
if '|' in raw and raw.count('|') >= 4:
|
|
211
|
+
children.append(_lex_html_card(_table_to_html(raw)))
|
|
212
|
+
continue
|
|
213
|
+
|
|
214
|
+
lines_list = raw.split('\n')
|
|
215
|
+
if all(
|
|
216
|
+
re.match(r'^(\s*[-*+]\s+|\s*\d+[.)]\s+)', l)
|
|
217
|
+
for l in lines_list if l.strip()
|
|
218
|
+
):
|
|
219
|
+
items, ordered = [], None
|
|
220
|
+
for line in lines_list:
|
|
221
|
+
m = re.match(r'^\s*([-*+]|\d+[.)])\s+(.*)$', line)
|
|
222
|
+
if not m:
|
|
223
|
+
continue
|
|
224
|
+
marker, content_i = m.group(1), m.group(2)
|
|
225
|
+
is_ordered = (
|
|
226
|
+
marker.endswith(')') or marker.endswith('.')
|
|
227
|
+
or marker.isdigit()
|
|
228
|
+
)
|
|
229
|
+
if ordered is None:
|
|
230
|
+
ordered = bool(
|
|
231
|
+
is_ordered and marker not in ['-', '*', '+']
|
|
232
|
+
)
|
|
233
|
+
items.append(
|
|
234
|
+
_lex_listitem(_parse_inline(content_i.strip()))
|
|
235
|
+
)
|
|
236
|
+
if items:
|
|
237
|
+
children.append(_lex_list(items, ordered or False))
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
children.append(_lex_para(_parse_inline(raw)))
|
|
241
|
+
|
|
242
|
+
lexical_tree = {
|
|
243
|
+
"root": {
|
|
244
|
+
"children": children,
|
|
245
|
+
"direction": None,
|
|
246
|
+
"format": "",
|
|
247
|
+
"indent": 0,
|
|
248
|
+
"type": "root",
|
|
249
|
+
"version": 1,
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
return title, json.dumps(lexical_tree, ensure_ascii=False)
|
ghostwriter/normalize.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Unicode title normalization for WeChat compatibility.
|
|
2
|
+
|
|
3
|
+
WeChat's draft API (error 45003) rejects titles containing certain
|
|
4
|
+
Unicode characters. This module maps them to safe ASCII equivalents.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def normalize_title(title):
|
|
9
|
+
"""Replace Unicode special characters with ASCII equivalents.
|
|
10
|
+
|
|
11
|
+
Mappings:
|
|
12
|
+
“ ” (curly double quotes) → "
|
|
13
|
+
‘ ’ (curly single quotes) → '
|
|
14
|
+
— – (em/en dash) → -
|
|
15
|
+
(fullwidth space) → " "
|
|
16
|
+
"""
|
|
17
|
+
return (
|
|
18
|
+
title
|
|
19
|
+
.replace('“', '"').replace('”', '"')
|
|
20
|
+
.replace('‘', "'").replace('’', "'")
|
|
21
|
+
.replace('—', '-').replace('–', '-')
|
|
22
|
+
.replace(' ', ' ')
|
|
23
|
+
)
|