@ai-welopc/opc-content-factory 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,240 @@
1
+ #!/usr/bin/env python
2
+ import argparse
3
+ import json
4
+ import mimetypes
5
+ import os
6
+ import re
7
+ import sys
8
+ import time
9
+ import urllib.error
10
+ import urllib.parse
11
+ import urllib.request
12
+ import uuid
13
+ from pathlib import Path
14
+
15
+
16
+ API_BASE = "https://api.weixin.qq.com/cgi-bin"
17
+ DIRECT_OPENER = urllib.request.build_opener(urllib.request.ProxyHandler({}))
18
+
19
+
20
+ class DraftError(RuntimeError):
21
+ pass
22
+
23
+
24
+ def load_env_file(env_file: str) -> str:
25
+ if not env_file:
26
+ return ""
27
+ path = Path(env_file).resolve()
28
+ if not path.is_file():
29
+ raise SystemExit(f"env 文件不存在:{path}")
30
+ for raw_line in path.read_text(encoding="utf-8-sig").splitlines():
31
+ line = raw_line.strip()
32
+ if not line or line.startswith("#") or "=" not in line:
33
+ continue
34
+ key, value = line.split("=", 1)
35
+ key = key.strip()
36
+ value = value.strip().strip('"').strip("'")
37
+ if key and key not in os.environ:
38
+ os.environ[key] = value
39
+ return str(path)
40
+
41
+
42
+ def api_json(url: str, data: bytes | None = None, headers: dict[str, str] | None = None) -> dict:
43
+ request = urllib.request.Request(url, data=data, headers=headers or {})
44
+ try:
45
+ with DIRECT_OPENER.open(request, timeout=60) as response:
46
+ raw = response.read()
47
+ except urllib.error.HTTPError as exc:
48
+ raise DraftError(f"HTTP {exc.code}: {exc.read().decode('utf-8', errors='replace')}") from exc
49
+ except urllib.error.URLError as exc:
50
+ raise DraftError(f"网络错误:{exc.reason}") from exc
51
+ try:
52
+ payload = json.loads(raw.decode("utf-8"))
53
+ except json.JSONDecodeError as exc:
54
+ raise DraftError(raw.decode("utf-8", errors="replace")) from exc
55
+ if payload.get("errcode") not in (None, 0):
56
+ raise DraftError(json.dumps(payload, ensure_ascii=False))
57
+ return payload
58
+
59
+
60
+ def get_token(appid: str, secret: str) -> str:
61
+ query = urllib.parse.urlencode({"grant_type": "client_credential", "appid": appid, "secret": secret})
62
+ payload = api_json(f"{API_BASE}/token?{query}")
63
+ token = payload.get("access_token")
64
+ if not token:
65
+ raise DraftError(json.dumps(payload, ensure_ascii=False))
66
+ return token
67
+
68
+
69
+ def multipart_file(field_name: str, file_path: Path) -> tuple[bytes, str]:
70
+ boundary = f"----opcContentFactory{uuid.uuid4().hex}"
71
+ mime_type = mimetypes.guess_type(str(file_path))[0] or "application/octet-stream"
72
+ body = bytearray()
73
+ body.extend(f"--{boundary}\r\n".encode())
74
+ body.extend(
75
+ f'Content-Disposition: form-data; name="{field_name}"; filename="{file_path.name}"\r\n'.encode()
76
+ )
77
+ body.extend(f"Content-Type: {mime_type}\r\n\r\n".encode())
78
+ body.extend(file_path.read_bytes())
79
+ body.extend(f"\r\n--{boundary}--\r\n".encode())
80
+ return bytes(body), boundary
81
+
82
+
83
+ def upload_permanent_thumb(token: str, cover_path: Path) -> str:
84
+ body, boundary = multipart_file("media", cover_path)
85
+ query = urllib.parse.urlencode({"access_token": token, "type": "thumb"})
86
+ payload = api_json(
87
+ f"{API_BASE}/material/add_material?{query}",
88
+ data=body,
89
+ headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
90
+ )
91
+ media_id = payload.get("media_id")
92
+ if not media_id:
93
+ raise DraftError(json.dumps(payload, ensure_ascii=False))
94
+ return media_id
95
+
96
+
97
+ def upload_article_image(token: str, image_path: Path) -> str:
98
+ body, boundary = multipart_file("media", image_path)
99
+ query = urllib.parse.urlencode({"access_token": token})
100
+ payload = api_json(
101
+ f"{API_BASE}/media/uploadimg?{query}",
102
+ data=body,
103
+ headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
104
+ )
105
+ url = payload.get("url")
106
+ if not url:
107
+ raise DraftError(json.dumps(payload, ensure_ascii=False))
108
+ return url
109
+
110
+
111
+ def rewrite_local_images(html: str, html_file: Path, token: str | None, execute: bool) -> tuple[str, list[dict]]:
112
+ uploaded = []
113
+
114
+ def replace(match: re.Match) -> str:
115
+ quote = match.group(1)
116
+ src = match.group(2)
117
+ if re.match(r"^(https?:)?//|^data:", src):
118
+ return match.group(0)
119
+ image_path = (html_file.parent / src).resolve()
120
+ record = {"local": str(image_path), "original_src": src}
121
+ if not image_path.is_file():
122
+ record["error"] = "文件不存在"
123
+ uploaded.append(record)
124
+ return match.group(0)
125
+ if execute and token:
126
+ url = upload_article_image(token, image_path)
127
+ record["url"] = url
128
+ uploaded.append(record)
129
+ return f'src={quote}{url}{quote}'
130
+ record["url"] = "DRY_RUN_NEEDS_UPLOAD"
131
+ uploaded.append(record)
132
+ return match.group(0)
133
+
134
+ rewritten = re.sub(r"src=(['\"])([^'\"]+)\1", replace, html)
135
+ return rewritten, uploaded
136
+
137
+
138
+ def create_draft(token: str, payload: dict) -> str:
139
+ body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
140
+ query = urllib.parse.urlencode({"access_token": token})
141
+ result = api_json(
142
+ f"{API_BASE}/draft/add?{query}",
143
+ data=body,
144
+ headers={"Content-Type": "application/json; charset=utf-8"},
145
+ )
146
+ media_id = result.get("media_id")
147
+ if not media_id:
148
+ raise DraftError(json.dumps(result, ensure_ascii=False))
149
+ return media_id
150
+
151
+
152
+ def parse_args() -> argparse.Namespace:
153
+ parser = argparse.ArgumentParser(description="把完成后的 HTML 文章导入公众号草稿箱。默认 dry-run。")
154
+ parser.add_argument("--title", required=True, help="文章标题。")
155
+ parser.add_argument("--digest", required=True, help="分享摘要。")
156
+ parser.add_argument("--html-file", required=True, help="准备导入的 HTML 文件。")
157
+ parser.add_argument("--cover-image", required=True, help="封面图,本地 JPG/PNG。")
158
+ parser.add_argument("--env-file", default="", help="可选 env 文件,包含 WECHAT_APPID、WECHAT_SECRET、WECHAT_AUTHOR。")
159
+ parser.add_argument("--author", default="", help="作者名;为空时读取 WECHAT_AUTHOR。")
160
+ parser.add_argument("--content-source-url", default="", help="原文链接。")
161
+ parser.add_argument("--show-cover-pic", type=int, choices=[0, 1], default=0)
162
+ parser.add_argument("--need-open-comment", type=int, choices=[0, 1], default=1)
163
+ parser.add_argument("--only-fans-can-comment", type=int, choices=[0, 1], default=0)
164
+ parser.add_argument("--out", default="", help="dry-run 或执行结果 JSON 输出路径。")
165
+ parser.add_argument("--execute", action="store_true", help="真实创建公众号草稿。")
166
+ return parser.parse_args()
167
+
168
+
169
+ def main() -> int:
170
+ if hasattr(sys.stdout, "reconfigure"):
171
+ sys.stdout.reconfigure(encoding="utf-8")
172
+ if hasattr(sys.stderr, "reconfigure"):
173
+ sys.stderr.reconfigure(encoding="utf-8")
174
+ args = parse_args()
175
+ html_file = Path(args.html_file).resolve()
176
+ cover_image = Path(args.cover_image).resolve()
177
+ if not html_file.is_file():
178
+ raise SystemExit(f"HTML 文件不存在:{html_file}")
179
+ if not cover_image.is_file():
180
+ raise SystemExit(f"封面图不存在:{cover_image}")
181
+
182
+ loaded_env_file = load_env_file(args.env_file)
183
+ if not loaded_env_file:
184
+ for candidate in [html_file.parent / "draft" / "wechat_draft.env", html_file.parent / "wechat_draft.env"]:
185
+ if candidate.is_file():
186
+ loaded_env_file = load_env_file(str(candidate))
187
+ break
188
+
189
+ appid = os.environ.get("WECHAT_APPID", "")
190
+ secret = os.environ.get("WECHAT_SECRET", "")
191
+ author = args.author or os.environ.get("WECHAT_AUTHOR", "")
192
+ if args.execute and (not appid or not secret):
193
+ raise SystemExit("执行模式需要配置 WECHAT_APPID 和 WECHAT_SECRET。不要把密钥写进文章。")
194
+
195
+ token = get_token(appid, secret) if args.execute else None
196
+ html_text = html_file.read_text(encoding="utf-8")
197
+ content, uploaded_images = rewrite_local_images(html_text, html_file, token, args.execute)
198
+ thumb_media_id = upload_permanent_thumb(token, cover_image) if args.execute and token else "DRY_RUN_THUMB_MEDIA_ID"
199
+ payload = {
200
+ "articles": [
201
+ {
202
+ "title": args.title,
203
+ "author": author,
204
+ "digest": args.digest,
205
+ "content": content,
206
+ "content_source_url": args.content_source_url,
207
+ "thumb_media_id": thumb_media_id,
208
+ "show_cover_pic": args.show_cover_pic,
209
+ "need_open_comment": args.need_open_comment,
210
+ "only_fans_can_comment": args.only_fans_can_comment,
211
+ }
212
+ ]
213
+ }
214
+ result = {
215
+ "ok": True,
216
+ "dry_run": not args.execute,
217
+ "title": args.title,
218
+ "env_file": loaded_env_file,
219
+ "thumb_media_id": thumb_media_id,
220
+ "uploaded_images": uploaded_images,
221
+ "payload": payload,
222
+ "backend_url": "https://mp.weixin.qq.com",
223
+ "created_at": int(time.time()),
224
+ }
225
+ if args.execute and token:
226
+ result["draft_media_id"] = create_draft(token, payload)
227
+
228
+ text = json.dumps(result, ensure_ascii=False, indent=2)
229
+ if args.out:
230
+ Path(args.out).write_text(text, encoding="utf-8")
231
+ print(text)
232
+ return 0
233
+
234
+
235
+ if __name__ == "__main__":
236
+ try:
237
+ raise SystemExit(main())
238
+ except DraftError as exc:
239
+ print(json.dumps({"ok": False, "error": str(exc)}, ensure_ascii=False), file=sys.stderr)
240
+ raise SystemExit(1)