sophhub 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/compact-context/skill.json +20 -0
- package/skills/compact-context/src/SKILL.md +133 -0
- package/skills/compact-context/src/scripts/check.sh +381 -0
- package/skills/compact-context/src/scripts/set-keep-recent.mjs +1337 -0
- package/skills/compact-context/src/scripts/setup.sh +96 -0
- package/skills/feishu-notes-assistant-universal/skill.json +20 -0
- package/skills/feishu-notes-assistant-universal/src/README.md +55 -0
- package/skills/feishu-notes-assistant-universal/src/SKILL.md +159 -0
- package/skills/feishu-notes-assistant-universal/src/bin/linux-amd64/lark-cli-openclaw +0 -0
- package/skills/feishu-notes-assistant-universal/src/bin/linux-arm64/lark-cli-openclaw +0 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/_resolve_lark_cli.py +58 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/openclaw_meeting_minutes.py +462 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/openclaw_notes_crud.py +547 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/openclaw_notes_crud_test.py +181 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/run_meeting_minutes.py +80 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/run_meeting_minutes.sh +5 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/run_note_crud.py +32 -0
- package/skills/feishu-notes-assistant-universal/src/scripts/run_note_crud.sh +5 -0
- package/skills/flight-booking/skill.json +9 -2
- package/skills/flight-booking/src/scripts/flight_booking.py +2 -1
- package/skills/image-classify/skill.json +5 -5
- package/skills/image-classify/src/SKILL.md +60 -67
- package/skills/image-classify/src/scripts/face_search.py +400 -15
- package/skills/image-classify/src/scripts/send_dm_message.py +332 -0
- package/skills/md2pdf-converter/skill.json +20 -0
- package/skills/md2pdf-converter/src/SKILL.md +244 -0
- package/skills/md2pdf-converter/src/_meta.json +6 -0
- package/skills/md2pdf-converter/src/scripts/generate_emoji_mapping.py +74 -0
- package/skills/md2pdf-converter/src/scripts/md2pdf-local.sh +291 -0
- package/skills/sophnet-bot-client/skill.json +20 -0
- package/skills/sophnet-bot-client/src/SKILL.md +255 -0
- package/skills/sophnet-bot-client/src/pyproject.toml +13 -0
- package/skills/sophnet-bot-client/src/scripts/__init__.py +0 -0
- package/skills/sophnet-bot-client/src/scripts/bot_client_proxy.py +165 -0
- package/skills/sophnet-bot-client/src/scripts/bot_client_safe.sh +29 -0
- package/skills/sophnet-bot-client/src/scripts/bot_client_setup.py +502 -0
- package/skills/sophnet-bot-client/src/tests/__init__.py +0 -0
- package/skills/sophnet-bot-client/src/tests/test_bot_client_proxy.py +255 -0
- package/skills/sophnet-bot-client/src/tests/test_bot_client_setup.py +679 -0
- package/skills/sophnet-bot-client/src/uv.lock +8 -0
- package/skills/sophnet-docx/skill.json +20 -0
- package/skills/sophnet-docx/src/SKILL.md +463 -0
- package/skills/sophnet-docx/src/package-lock.json +208 -0
- package/skills/sophnet-docx/src/package.json +16 -0
- package/skills/sophnet-docx/src/pyproject.toml +11 -0
- package/skills/sophnet-docx/src/scripts/__init__.py +1 -0
- package/skills/sophnet-docx/src/scripts/accept_changes.py +135 -0
- package/skills/sophnet-docx/src/scripts/comment.py +318 -0
- package/skills/sophnet-docx/src/scripts/ensure_uv_env.sh +68 -0
- package/skills/sophnet-docx/src/scripts/office/helpers/__init__.py +0 -0
- package/skills/sophnet-docx/src/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/sophnet-docx/src/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/sophnet-docx/src/scripts/office/pack.py +159 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/sophnet-docx/src/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/sophnet-docx/src/scripts/office/soffice.py +183 -0
- package/skills/sophnet-docx/src/scripts/office/unpack.py +132 -0
- package/skills/sophnet-docx/src/scripts/office/validate.py +111 -0
- package/skills/sophnet-docx/src/scripts/office/validators/__init__.py +15 -0
- package/skills/sophnet-docx/src/scripts/office/validators/base.py +847 -0
- package/skills/sophnet-docx/src/scripts/office/validators/docx.py +446 -0
- package/skills/sophnet-docx/src/scripts/office/validators/pptx.py +275 -0
- package/skills/sophnet-docx/src/scripts/office/validators/redlining.py +247 -0
- package/skills/sophnet-docx/src/scripts/templates/comments.xml +3 -0
- package/skills/sophnet-docx/src/scripts/templates/commentsExtended.xml +3 -0
- package/skills/sophnet-docx/src/scripts/templates/commentsExtensible.xml +3 -0
- package/skills/sophnet-docx/src/scripts/templates/commentsIds.xml +3 -0
- package/skills/sophnet-docx/src/scripts/templates/people.xml +3 -0
- package/skills/sophnet-docx/src/scripts/upload_file.sh +96 -0
- package/skills/sophnet-docx/src/uv.lock +320 -0
- package/skills/sophnet-pdf/skill.json +20 -0
- package/skills/sophnet-pdf/src/SKILL.md +413 -0
- package/skills/sophnet-pdf/src/forms.md +297 -0
- package/skills/sophnet-pdf/src/pyproject.toml +14 -0
- package/skills/sophnet-pdf/src/reference.md +612 -0
- package/skills/sophnet-pdf/src/scripts/check_bounding_boxes.py +65 -0
- package/skills/sophnet-pdf/src/scripts/check_fillable_fields.py +11 -0
- package/skills/sophnet-pdf/src/scripts/convert_pdf_to_images.py +33 -0
- package/skills/sophnet-pdf/src/scripts/create_validation_image.py +37 -0
- package/skills/sophnet-pdf/src/scripts/enhance_tutorial.py +558 -0
- package/skills/sophnet-pdf/src/scripts/ensure_uv_env.sh +68 -0
- package/skills/sophnet-pdf/src/scripts/extract_form_field_info.py +122 -0
- package/skills/sophnet-pdf/src/scripts/extract_form_structure.py +115 -0
- package/skills/sophnet-pdf/src/scripts/extract_pdf_content.py +35 -0
- package/skills/sophnet-pdf/src/scripts/fill_fillable_fields.py +98 -0
- package/skills/sophnet-pdf/src/scripts/fill_pdf_form_with_annotations.py +107 -0
- package/skills/sophnet-pdf/src/scripts/upload_file.sh +88 -0
- package/skills/sophnet-pdf/src/uv.lock +537 -0
- package/skills/sophnet-xlsx/skill.json +20 -0
- package/skills/sophnet-xlsx/src/SKILL.md +399 -0
- package/skills/sophnet-xlsx/src/pyproject.toml +11 -0
- package/skills/sophnet-xlsx/src/scripts/ensure_uv_env.sh +68 -0
- package/skills/sophnet-xlsx/src/scripts/office/helpers/__init__.py +0 -0
- package/skills/sophnet-xlsx/src/scripts/office/helpers/merge_runs.py +199 -0
- package/skills/sophnet-xlsx/src/scripts/office/helpers/simplify_redlines.py +197 -0
- package/skills/sophnet-xlsx/src/scripts/office/pack.py +159 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +1499 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +146 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +1085 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +11 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +3081 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +23 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +185 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +287 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +1676 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +28 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +144 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +174 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +25 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +18 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +59 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +56 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +195 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +582 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +25 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +4439 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +570 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +509 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +12 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +108 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +96 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +3646 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +116 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +42 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +50 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +49 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +33 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/mce/mc.xsd +75 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-2010.xsd +560 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-2012.xsd +67 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-2018.xsd +14 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-cex-2018.xsd +20 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-cid-2016.xsd +13 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +4 -0
- package/skills/sophnet-xlsx/src/scripts/office/schemas/microsoft/wml-symex-2015.xsd +8 -0
- package/skills/sophnet-xlsx/src/scripts/office/soffice.py +183 -0
- package/skills/sophnet-xlsx/src/scripts/office/unpack.py +132 -0
- package/skills/sophnet-xlsx/src/scripts/office/validate.py +111 -0
- package/skills/sophnet-xlsx/src/scripts/office/validators/__init__.py +15 -0
- package/skills/sophnet-xlsx/src/scripts/office/validators/base.py +847 -0
- package/skills/sophnet-xlsx/src/scripts/office/validators/docx.py +446 -0
- package/skills/sophnet-xlsx/src/scripts/office/validators/pptx.py +275 -0
- package/skills/sophnet-xlsx/src/scripts/office/validators/redlining.py +247 -0
- package/skills/sophnet-xlsx/src/scripts/recalc.py +184 -0
- package/skills/sophnet-xlsx/src/scripts/upload_file.sh +96 -0
- package/skills/sophnet-xlsx/src/uv.lock +319 -0
- package/skills/wechat-article-publisher/skill.json +20 -0
- package/skills/wechat-article-publisher/src/SKILL.md +60 -0
- package/skills/wechat-article-publisher/src/config.json +7 -0
- package/skills/wechat-article-publisher/src/pyproject.toml +12 -0
- package/skills/wechat-article-publisher/src/scripts/publish_wechat.py +825 -0
|
@@ -0,0 +1,825 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Publish Markdown/URL articles to WeChat Official Account."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import re
|
|
10
|
+
import subprocess
|
|
11
|
+
import sys
|
|
12
|
+
import textwrap
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from pathlib import Path
|
|
15
|
+
from typing import Any
|
|
16
|
+
from urllib.parse import urljoin, urlparse
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import markdown
|
|
20
|
+
import requests
|
|
21
|
+
import yaml
|
|
22
|
+
from bs4 import BeautifulSoup
|
|
23
|
+
except Exception:
|
|
24
|
+
markdown = None
|
|
25
|
+
requests = None
|
|
26
|
+
yaml = None
|
|
27
|
+
BeautifulSoup = None
|
|
28
|
+
|
|
29
|
+
TOKEN_URL = "https://api.weixin.qq.com/cgi-bin/token"
|
|
30
|
+
DRAFT_ADD_URL = "https://api.weixin.qq.com/cgi-bin/draft/add"
|
|
31
|
+
PUBLISH_SUBMIT_URL = "https://api.weixin.qq.com/cgi-bin/freepublish/submit"
|
|
32
|
+
PUBLISH_GET_URL = "https://api.weixin.qq.com/cgi-bin/freepublish/get"
|
|
33
|
+
MATERIAL_ADD_URL = "https://api.weixin.qq.com/cgi-bin/material/add_material"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class WeChatPublishError(RuntimeError):
|
|
37
|
+
"""Raised when WeChat API reports an error."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class Article:
|
|
42
|
+
title: str
|
|
43
|
+
content: str
|
|
44
|
+
source_url: str
|
|
45
|
+
first_image_url: str
|
|
46
|
+
digest: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def ensure_dependencies() -> None:
|
|
50
|
+
if not all([markdown, requests, yaml, BeautifulSoup]):
|
|
51
|
+
raise RuntimeError(
|
|
52
|
+
"依赖未安装。请在项目根目录执行 uv sync,或 uv run scripts/publish_wechat.py --install"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def install_dependencies() -> None:
|
|
57
|
+
root = Path(__file__).resolve().parent.parent
|
|
58
|
+
pyproject = root / "pyproject.toml"
|
|
59
|
+
req = Path(__file__).resolve().parent / "requirements.txt"
|
|
60
|
+
if pyproject.exists():
|
|
61
|
+
try:
|
|
62
|
+
subprocess.check_call(["uv", "sync"], cwd=str(root))
|
|
63
|
+
return
|
|
64
|
+
except FileNotFoundError:
|
|
65
|
+
pass
|
|
66
|
+
if not req.exists():
|
|
67
|
+
raise RuntimeError(f"未找到依赖文件: {req}")
|
|
68
|
+
subprocess.check_call([sys.executable, "-m", "pip", "install", "-r", str(req)])
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def slugify(text: str) -> str:
|
|
72
|
+
t = re.sub(r"[^\w\u4e00-\u9fff-]+", "-", text.strip().lower())
|
|
73
|
+
t = re.sub(r"-{2,}", "-", t).strip("-")
|
|
74
|
+
return t or "wechat-article"
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def trim_utf8_bytes(text: str, max_bytes: int) -> str:
|
|
78
|
+
raw = (text or "").encode("utf-8")
|
|
79
|
+
if len(raw) <= max_bytes:
|
|
80
|
+
return text or ""
|
|
81
|
+
return raw[:max_bytes].decode("utf-8", errors="ignore")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def decode_escaped_unicode(text: str) -> str:
|
|
85
|
+
"""Convert literal \\uXXXX sequences to real Unicode characters."""
|
|
86
|
+
if not text or "\\u" not in text:
|
|
87
|
+
return text or ""
|
|
88
|
+
if not re.search(r"\\u[0-9a-fA-F]{4}", text):
|
|
89
|
+
return text
|
|
90
|
+
try:
|
|
91
|
+
return text.encode("utf-8").decode("unicode_escape")
|
|
92
|
+
except Exception:
|
|
93
|
+
return text
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def normalize_wechat_title(text: str) -> str:
|
|
97
|
+
value = decode_escaped_unicode(text or "")
|
|
98
|
+
value = re.sub(r"\s+", " ", value.strip())
|
|
99
|
+
value = re.sub(r"^#+\s*", "", value)
|
|
100
|
+
value = value.replace("“", "\"").replace("”", "\"").replace("—", "-")
|
|
101
|
+
# 标题按字符限制:64 个汉字以内(按 64 个字符处理)。
|
|
102
|
+
value = value[:64].strip()
|
|
103
|
+
return value or "未命名文章"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def is_url(input_value: str) -> bool:
|
|
107
|
+
return bool(re.match(r"^https?://", input_value.strip(), re.I))
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def load_config(path: Path) -> dict[str, Any]:
|
|
111
|
+
if not path.exists():
|
|
112
|
+
raise RuntimeError(f"配置文件不存在: {path}")
|
|
113
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
114
|
+
|
|
115
|
+
# 支持两种结构:
|
|
116
|
+
# 1) {"wechat": {...}}
|
|
117
|
+
# 2) {"platforms": {"wechat": {...}}}
|
|
118
|
+
if isinstance(data.get("wechat"), dict):
|
|
119
|
+
return data
|
|
120
|
+
if isinstance(data.get("platforms", {}).get("wechat"), dict):
|
|
121
|
+
return {"wechat": data["platforms"]["wechat"]}
|
|
122
|
+
|
|
123
|
+
raise RuntimeError("配置文件缺少 wechat 配置")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def parse_frontmatter(md_text: str) -> tuple[dict[str, Any], str]:
|
|
127
|
+
text = decode_escaped_unicode(md_text).lstrip("\ufeff")
|
|
128
|
+
if text.startswith("---"):
|
|
129
|
+
parts = text.split("---", 2)
|
|
130
|
+
if len(parts) >= 3:
|
|
131
|
+
try:
|
|
132
|
+
frontmatter = yaml.safe_load(parts[1]) or {}
|
|
133
|
+
return frontmatter, parts[2].strip()
|
|
134
|
+
except Exception:
|
|
135
|
+
pass
|
|
136
|
+
return {}, text
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def extract_title_from_md(md_text: str) -> str:
|
|
140
|
+
text = decode_escaped_unicode(md_text).lstrip("\ufeff")
|
|
141
|
+
match = re.search(r"^#\s+(.+)$", text, re.M)
|
|
142
|
+
if match:
|
|
143
|
+
return match.group(1).strip()
|
|
144
|
+
|
|
145
|
+
for line in text.splitlines():
|
|
146
|
+
s = re.sub(r"^#+\s*", "", line.strip())
|
|
147
|
+
if s:
|
|
148
|
+
return s[:64]
|
|
149
|
+
return "未命名文章"
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def first_markdown_image(md_text: str) -> str:
|
|
153
|
+
match = re.search(r"!\[[^\]]*\]\(([^)]+)\)", md_text)
|
|
154
|
+
return match.group(1).strip() if match else ""
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def extract_from_markdown(path: Path, source_url_override: str = "") -> Article:
|
|
158
|
+
md_text = decode_escaped_unicode(path.read_text(encoding="utf-8")).lstrip("\ufeff")
|
|
159
|
+
frontmatter, body = parse_frontmatter(md_text)
|
|
160
|
+
title = (frontmatter.get("title") or extract_title_from_md(body)).strip()[:64]
|
|
161
|
+
source_url = (source_url_override or frontmatter.get("source_url") or "").strip()
|
|
162
|
+
html_content = markdown_to_html(body)
|
|
163
|
+
plain = html_to_plain_text(html_content)
|
|
164
|
+
digest = plain[:120]
|
|
165
|
+
return Article(
|
|
166
|
+
title=title or "未命名文章",
|
|
167
|
+
content=html_content,
|
|
168
|
+
source_url=source_url,
|
|
169
|
+
first_image_url=first_markdown_image(body),
|
|
170
|
+
digest=digest,
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def extract_from_url(url: str) -> Article:
|
|
175
|
+
resp = requests.get(url, timeout=30)
|
|
176
|
+
resp.raise_for_status()
|
|
177
|
+
|
|
178
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
179
|
+
title_node = (
|
|
180
|
+
soup.select_one("#cb_post_title_url")
|
|
181
|
+
or soup.find("h1")
|
|
182
|
+
or soup.find("title")
|
|
183
|
+
)
|
|
184
|
+
title = title_node.get_text(strip=True)[:64] if title_node else "未命名文章"
|
|
185
|
+
|
|
186
|
+
body_node = (
|
|
187
|
+
soup.select_one("#cnblogs_post_body")
|
|
188
|
+
or soup.find("article")
|
|
189
|
+
or soup.find("main")
|
|
190
|
+
or soup.find("div", class_=re.compile(r"content|article|post", re.I))
|
|
191
|
+
or soup.body
|
|
192
|
+
)
|
|
193
|
+
if not body_node:
|
|
194
|
+
raise RuntimeError("无法从网页提取正文")
|
|
195
|
+
|
|
196
|
+
for tag in body_node.select("script,style,iframe,.ad,.ads,.advertisement,.comment"):
|
|
197
|
+
tag.decompose()
|
|
198
|
+
for img in body_node.find_all("img"):
|
|
199
|
+
src = (img.get("src") or "").strip()
|
|
200
|
+
if src:
|
|
201
|
+
img["src"] = urljoin(url, src)
|
|
202
|
+
for a in body_node.find_all("a"):
|
|
203
|
+
href = (a.get("href") or "").strip()
|
|
204
|
+
if href:
|
|
205
|
+
a["href"] = urljoin(url, href)
|
|
206
|
+
|
|
207
|
+
first_image_url = ""
|
|
208
|
+
first_img = body_node.find("img")
|
|
209
|
+
if first_img and first_img.get("src"):
|
|
210
|
+
first_image_url = first_img["src"]
|
|
211
|
+
|
|
212
|
+
html_content = str(body_node)
|
|
213
|
+
plain = html_to_plain_text(html_content)
|
|
214
|
+
digest = plain[:120]
|
|
215
|
+
|
|
216
|
+
return Article(
|
|
217
|
+
title=title,
|
|
218
|
+
content=html_content,
|
|
219
|
+
source_url=url,
|
|
220
|
+
first_image_url=first_image_url,
|
|
221
|
+
digest=digest,
|
|
222
|
+
)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def is_probably_html(text: str) -> bool:
|
|
226
|
+
sample = (text or "").strip()[:500].lower()
|
|
227
|
+
return bool(
|
|
228
|
+
re.search(r"<(article|section|div|p|h[1-6]|ul|ol|pre|table|img|blockquote)\b", sample)
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def markdown_to_html(text: str) -> str:
|
|
233
|
+
text = decode_escaped_unicode(text or "")
|
|
234
|
+
if is_probably_html(text):
|
|
235
|
+
return text
|
|
236
|
+
return markdown.markdown(
|
|
237
|
+
text,
|
|
238
|
+
extensions=["extra", "tables", "nl2br", "codehilite"],
|
|
239
|
+
output_format="html",
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def html_to_plain_text(html: str) -> str:
|
|
244
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
245
|
+
txt = soup.get_text("\n", strip=True)
|
|
246
|
+
txt = re.sub(r"\n+", "\n", txt)
|
|
247
|
+
return txt
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def _next_element_sibling(node):
|
|
251
|
+
sibling = node.next_sibling
|
|
252
|
+
while sibling is not None:
|
|
253
|
+
name = getattr(sibling, "name", None)
|
|
254
|
+
if name:
|
|
255
|
+
return sibling
|
|
256
|
+
if str(sibling).strip():
|
|
257
|
+
return None
|
|
258
|
+
sibling = sibling.next_sibling
|
|
259
|
+
return None
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _normalize_wechat_dom(soup: BeautifulSoup) -> None:
|
|
263
|
+
for list_node in soup.find_all(["ul", "ol"]):
|
|
264
|
+
for child in list(list_node.contents):
|
|
265
|
+
name = getattr(child, "name", None)
|
|
266
|
+
if name == "li":
|
|
267
|
+
continue
|
|
268
|
+
text = str(child).strip() if not name else ""
|
|
269
|
+
if text and text not in ("\n", "\r\n"):
|
|
270
|
+
li = soup.new_tag("li")
|
|
271
|
+
li.string = text
|
|
272
|
+
child.replace_with(li)
|
|
273
|
+
else:
|
|
274
|
+
try:
|
|
275
|
+
child.extract()
|
|
276
|
+
except Exception:
|
|
277
|
+
pass
|
|
278
|
+
|
|
279
|
+
for li in soup.find_all("li"):
|
|
280
|
+
txt = li.get_text(" ", strip=True)
|
|
281
|
+
if not txt or txt in {"•", "-", "·"}:
|
|
282
|
+
li.decompose()
|
|
283
|
+
|
|
284
|
+
for pre in soup.find_all("pre"):
|
|
285
|
+
code = pre.find("code")
|
|
286
|
+
target = code if code else pre
|
|
287
|
+
text = target.get_text() if target else ""
|
|
288
|
+
if not text:
|
|
289
|
+
continue
|
|
290
|
+
text = text.replace("\r\n", "\n").replace("\r", "\n").replace("\t", " ")
|
|
291
|
+
lines = [line.replace("\u00a0", " ").replace("\u3000", " ").rstrip() for line in text.split("\n")]
|
|
292
|
+
cleaned = textwrap.dedent("\n".join(lines).strip("\n"))
|
|
293
|
+
target.clear()
|
|
294
|
+
target.append(cleaned)
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
def _enhance_reference_links(soup: BeautifulSoup) -> None:
|
|
298
|
+
ref_keywords = ("参考资源", "相关链接", "references", "reference")
|
|
299
|
+
for heading in soup.find_all(re.compile(r"^h[1-6]$")):
|
|
300
|
+
title = heading.get_text(" ", strip=True).lower()
|
|
301
|
+
if not any(k in title for k in ref_keywords):
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
current_level = int(heading.name[1])
|
|
305
|
+
node = heading.next_sibling
|
|
306
|
+
while node is not None:
|
|
307
|
+
if getattr(node, "name", None) and re.match(r"^h[1-6]$", node.name):
|
|
308
|
+
next_level = int(node.name[1])
|
|
309
|
+
if next_level <= current_level:
|
|
310
|
+
break
|
|
311
|
+
|
|
312
|
+
anchors = node.find_all("a") if getattr(node, "find_all", None) else []
|
|
313
|
+
for a in anchors:
|
|
314
|
+
href = (a.get("href") or "").strip()
|
|
315
|
+
if not href:
|
|
316
|
+
continue
|
|
317
|
+
parent_text = a.parent.get_text(" ", strip=True) if a.parent else ""
|
|
318
|
+
if href in parent_text:
|
|
319
|
+
continue
|
|
320
|
+
url_anchor = soup.new_tag("a", href=href)
|
|
321
|
+
url_anchor["style"] = "color:#576b95;text-decoration:underline;word-break:break-all;"
|
|
322
|
+
url_anchor["target"] = "_blank"
|
|
323
|
+
url_anchor.string = href
|
|
324
|
+
tail = soup.new_tag("span")
|
|
325
|
+
tail["style"] = "display:block;margin-top:2px;font-size:13px;line-height:1.6;color:#7a7a7a;"
|
|
326
|
+
tail.append(url_anchor)
|
|
327
|
+
a.insert_after(tail)
|
|
328
|
+
node = node.next_sibling
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _style_lead_paragraph(soup: BeautifulSoup, template: str) -> None:
|
|
332
|
+
for p in soup.find_all("p"):
|
|
333
|
+
if p.find_parent(["li", "blockquote", "td", "th"]):
|
|
334
|
+
continue
|
|
335
|
+
text = p.get_text(" ", strip=True)
|
|
336
|
+
if not text:
|
|
337
|
+
continue
|
|
338
|
+
if template == "viral":
|
|
339
|
+
p["style"] = (
|
|
340
|
+
"margin:1em 0;padding:0.88em 0.96em;background:#fff7e6;border:1px solid #ffe1a6;"
|
|
341
|
+
"border-radius:8px;line-height:1.9;font-size:16px;color:#2b2f38;text-indent:0;"
|
|
342
|
+
"box-shadow:0 2px 10px rgba(251,146,60,0.12);"
|
|
343
|
+
)
|
|
344
|
+
else:
|
|
345
|
+
p["style"] = (
|
|
346
|
+
"margin:1em 0;padding:0.75em 0.9em;background:#f8fafc;border:1px solid #edf2f7;"
|
|
347
|
+
"border-radius:6px;line-height:1.9;font-size:16px;color:#243044;text-indent:0;"
|
|
348
|
+
)
|
|
349
|
+
break
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def _decorate_highlights(soup: BeautifulSoup, template: str) -> None:
|
|
353
|
+
for p in soup.find_all("p"):
|
|
354
|
+
text = p.get_text(" ", strip=True)
|
|
355
|
+
if text not in ("看点:", "看点:"):
|
|
356
|
+
continue
|
|
357
|
+
if template == "viral":
|
|
358
|
+
p["style"] = (
|
|
359
|
+
"margin:0.95em 0 0.35em;display:inline-block;padding:0.24em 0.72em;"
|
|
360
|
+
"font-size:14px;line-height:1.4;color:#b42318;font-weight:700;"
|
|
361
|
+
"background:#fff1f1;border-radius:999px;border:1px solid #ffcece;"
|
|
362
|
+
)
|
|
363
|
+
else:
|
|
364
|
+
p["style"] = (
|
|
365
|
+
"margin:0.95em 0 0.35em;display:inline-block;padding:0.22em 0.65em;"
|
|
366
|
+
"font-size:14px;line-height:1.4;color:#2457c5;font-weight:700;"
|
|
367
|
+
"background:#eef4ff;border-radius:999px;border:1px solid #dce9ff;"
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
next_block = _next_element_sibling(p)
|
|
371
|
+
if next_block and next_block.name in ("ul", "ol"):
|
|
372
|
+
if template == "viral":
|
|
373
|
+
next_block["style"] = (
|
|
374
|
+
"margin:0.45em 0 1em;padding:0.78em 0.98em 0.78em 1.6em;line-height:1.85;"
|
|
375
|
+
"color:#2d3445;background:#fff7ec;border-radius:8px;"
|
|
376
|
+
"border:1px solid #ffe3bf;box-shadow:0 2px 12px rgba(251,146,60,0.10);"
|
|
377
|
+
)
|
|
378
|
+
else:
|
|
379
|
+
next_block["style"] = (
|
|
380
|
+
"margin:0.45em 0 1em;padding:0.75em 0.95em 0.75em 1.55em;line-height:1.82;"
|
|
381
|
+
"color:#2f3441;background:#f8fafc;border-radius:6px;border:1px solid #e8eef8;"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def optimize_for_wechat_html(content_html: str, template: str = "standard") -> str:
|
|
386
|
+
template = (template or "standard").strip().lower()
|
|
387
|
+
soup = BeautifulSoup(content_html, "html.parser")
|
|
388
|
+
for tag in soup.find_all(["script", "style"]):
|
|
389
|
+
tag.decompose()
|
|
390
|
+
|
|
391
|
+
_normalize_wechat_dom(soup)
|
|
392
|
+
_enhance_reference_links(soup)
|
|
393
|
+
_decorate_highlights(soup, template)
|
|
394
|
+
|
|
395
|
+
first_h1 = soup.find("h1")
|
|
396
|
+
if first_h1:
|
|
397
|
+
first_h1.decompose()
|
|
398
|
+
|
|
399
|
+
for h2 in soup.find_all("h2"):
|
|
400
|
+
if template == "viral":
|
|
401
|
+
h2["style"] = (
|
|
402
|
+
"margin:1.45em 0 0.72em;padding:0.46em 0.72em;font-size:1.2em;line-height:1.45;"
|
|
403
|
+
"color:#7a1f1f;font-weight:800;background:#fff2f2;border-left:4px solid #ef4444;border-radius:4px;"
|
|
404
|
+
)
|
|
405
|
+
else:
|
|
406
|
+
h2["style"] = (
|
|
407
|
+
"margin:1.4em 0 0.7em;padding:0.38em 0.65em;font-size:1.18em;line-height:1.45;"
|
|
408
|
+
"color:#1f2937;font-weight:700;background:#f5f8ff;border-left:4px solid #3b6dd8;border-radius:2px;"
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
for p in soup.find_all("p"):
|
|
412
|
+
if p.get_text(" ", strip=True) in ("看点:", "看点:"):
|
|
413
|
+
continue
|
|
414
|
+
p["style"] = (
|
|
415
|
+
"margin:0.95em 0;line-height:1.92;font-size:16px;color:#2b2f38;"
|
|
416
|
+
"text-align:justify;letter-spacing:0.01em;text-indent:0;"
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
list_style = (
|
|
420
|
+
"margin:0.98em 0;padding:0.82em 1em 0.82em 1.62em;line-height:1.86;color:#2d3445;"
|
|
421
|
+
"background:#fffdf8;border-radius:8px;border:1px solid #ffe8c4;"
|
|
422
|
+
"box-shadow:0 2px 10px rgba(251,146,60,0.08);list-style-position:outside;"
|
|
423
|
+
) if template == "viral" else (
|
|
424
|
+
"margin:0.95em 0;padding:0.75em 0.95em 0.75em 1.55em;line-height:1.8;color:#2f3441;"
|
|
425
|
+
"background:#f8fafc;border-radius:6px;border:1px solid #edf2f7;list-style-position:outside;"
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
for ul in soup.find_all("ul"):
|
|
429
|
+
ul["style"] = list_style + "list-style-type:disc;"
|
|
430
|
+
for ol in soup.find_all("ol"):
|
|
431
|
+
ol["style"] = list_style + "list-style-type:decimal;"
|
|
432
|
+
for li in soup.find_all("li"):
|
|
433
|
+
li["style"] = "margin:0.28em 0;font-size:16px;line-height:1.82;text-indent:0;"
|
|
434
|
+
|
|
435
|
+
for a in soup.find_all("a"):
|
|
436
|
+
a["style"] = (
|
|
437
|
+
"color:#c2410c;text-decoration:underline;word-break:break-all;"
|
|
438
|
+
if template == "viral"
|
|
439
|
+
else "color:#1f57c3;text-decoration:underline;word-break:break-all;"
|
|
440
|
+
)
|
|
441
|
+
if a.get("href"):
|
|
442
|
+
a["target"] = "_blank"
|
|
443
|
+
|
|
444
|
+
for strong in soup.find_all("strong"):
|
|
445
|
+
if template == "viral":
|
|
446
|
+
strong["style"] = (
|
|
447
|
+
"font-weight:800;color:#9a3412;background:#fff3d6;padding:0 2px;border-radius:3px;"
|
|
448
|
+
)
|
|
449
|
+
else:
|
|
450
|
+
strong["style"] = "font-weight:700;color:#1f2937;"
|
|
451
|
+
|
|
452
|
+
for blockquote in soup.find_all("blockquote"):
|
|
453
|
+
if template == "viral":
|
|
454
|
+
blockquote["style"] = (
|
|
455
|
+
"margin:1.1em 0;padding:0.82em 1.02em;border-left:3px solid #f59e0b;"
|
|
456
|
+
"background:#fff8e6;color:#5f4b20;line-height:1.85;border-radius:6px;"
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
blockquote["style"] = (
|
|
460
|
+
"margin:1.1em 0;padding:0.8em 1em;border-left:3px solid #7aa2ff;"
|
|
461
|
+
"background:#f4f7ff;color:#43506a;line-height:1.8;border-radius:4px;"
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
_style_lead_paragraph(soup, template)
|
|
465
|
+
body_html = "".join(str(node) for node in soup.contents)
|
|
466
|
+
wrapper = (
|
|
467
|
+
"<section style=\"margin:0 auto;max-width:690px;padding:0;"
|
|
468
|
+
"font-family:-apple-system,BlinkMacSystemFont,'Segoe UI','PingFang SC','Hiragino Sans GB','Microsoft YaHei',sans-serif;"
|
|
469
|
+
"color:#1f2937;font-size:16px;line-height:1.9;\">"
|
|
470
|
+
f"{body_html}"
|
|
471
|
+
"</section>"
|
|
472
|
+
)
|
|
473
|
+
return wrapper
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def generate_cover_image(title: str, output_path: Path) -> Path | None:
|
|
477
|
+
try:
|
|
478
|
+
from PIL import Image, ImageDraw, ImageFont
|
|
479
|
+
except Exception:
|
|
480
|
+
return None
|
|
481
|
+
|
|
482
|
+
width, height = 900, 383
|
|
483
|
+
img = Image.new("RGB", (width, height))
|
|
484
|
+
draw = ImageDraw.Draw(img)
|
|
485
|
+
top = (60, 130, 246)
|
|
486
|
+
bottom = (33, 197, 147)
|
|
487
|
+
for y in range(height):
|
|
488
|
+
ratio = y / max(1, height - 1)
|
|
489
|
+
r = int(top[0] + (bottom[0] - top[0]) * ratio)
|
|
490
|
+
g = int(top[1] + (bottom[1] - top[1]) * ratio)
|
|
491
|
+
b = int(top[2] + (bottom[2] - top[2]) * ratio)
|
|
492
|
+
draw.line([(0, y), (width, y)], fill=(r, g, b))
|
|
493
|
+
|
|
494
|
+
font_candidates = [
|
|
495
|
+
"C:\\Windows\\Fonts\\msyhbd.ttc",
|
|
496
|
+
"C:\\Windows\\Fonts\\msyh.ttc",
|
|
497
|
+
"C:\\Windows\\Fonts\\simhei.ttf",
|
|
498
|
+
]
|
|
499
|
+
font = None
|
|
500
|
+
for c in font_candidates:
|
|
501
|
+
try:
|
|
502
|
+
font = ImageFont.truetype(c, 56)
|
|
503
|
+
break
|
|
504
|
+
except Exception:
|
|
505
|
+
continue
|
|
506
|
+
if font is None:
|
|
507
|
+
font = ImageFont.load_default()
|
|
508
|
+
|
|
509
|
+
clean = re.sub(r"\s+", " ", BeautifulSoup(title, "html.parser").get_text(" ", strip=True))
|
|
510
|
+
line = clean[:26] + ("..." if len(clean) > 26 else "")
|
|
511
|
+
draw.text((50, 130), line, fill=(255, 255, 255), font=font)
|
|
512
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
513
|
+
img.save(output_path, "JPEG", quality=92)
|
|
514
|
+
return output_path
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
class WeChatClient:
|
|
518
|
+
def __init__(self, app_id: str, app_secret: str, timeout: int = 30):
|
|
519
|
+
self.app_id = app_id
|
|
520
|
+
self.app_secret = app_secret
|
|
521
|
+
self.timeout = timeout
|
|
522
|
+
|
|
523
|
+
def get_token(self) -> str:
|
|
524
|
+
resp = requests.get(
|
|
525
|
+
TOKEN_URL,
|
|
526
|
+
params={
|
|
527
|
+
"grant_type": "client_credential",
|
|
528
|
+
"appid": self.app_id,
|
|
529
|
+
"secret": self.app_secret,
|
|
530
|
+
},
|
|
531
|
+
timeout=self.timeout,
|
|
532
|
+
)
|
|
533
|
+
resp.raise_for_status()
|
|
534
|
+
data = resp.json()
|
|
535
|
+
if data.get("errcode", 0) != 0:
|
|
536
|
+
raise WeChatPublishError(f"get_token failed: {data}")
|
|
537
|
+
token = data.get("access_token", "")
|
|
538
|
+
if not token:
|
|
539
|
+
raise WeChatPublishError("get_token failed: missing access_token")
|
|
540
|
+
return token
|
|
541
|
+
|
|
542
|
+
def _post_json_utf8(self, url: str, params: dict[str, Any], payload: dict[str, Any]) -> dict[str, Any]:
|
|
543
|
+
body = json.dumps(payload, ensure_ascii=False).encode("utf-8")
|
|
544
|
+
resp = requests.post(
|
|
545
|
+
url,
|
|
546
|
+
params=params,
|
|
547
|
+
data=body,
|
|
548
|
+
headers={"Content-Type": "application/json; charset=utf-8"},
|
|
549
|
+
timeout=self.timeout,
|
|
550
|
+
)
|
|
551
|
+
resp.raise_for_status()
|
|
552
|
+
return json.loads(resp.content.decode("utf-8"))
|
|
553
|
+
|
|
554
|
+
def upload_image_from_path(self, token: str, image_path: Path) -> str:
|
|
555
|
+
with image_path.open("rb") as fh:
|
|
556
|
+
files = {"media": (image_path.name, fh, "image/jpeg")}
|
|
557
|
+
resp = requests.post(
|
|
558
|
+
MATERIAL_ADD_URL,
|
|
559
|
+
params={"access_token": token, "type": "image"},
|
|
560
|
+
files=files,
|
|
561
|
+
timeout=self.timeout,
|
|
562
|
+
)
|
|
563
|
+
resp.raise_for_status()
|
|
564
|
+
data = resp.json()
|
|
565
|
+
if data.get("errcode", 0) != 0:
|
|
566
|
+
raise WeChatPublishError(f"upload image failed: {data}")
|
|
567
|
+
media_id = data.get("media_id", "")
|
|
568
|
+
if not media_id:
|
|
569
|
+
raise WeChatPublishError("upload image failed: missing media_id")
|
|
570
|
+
return media_id
|
|
571
|
+
|
|
572
|
+
def upload_image_from_url(self, token: str, image_url: str) -> str:
|
|
573
|
+
img = requests.get(image_url, timeout=self.timeout)
|
|
574
|
+
img.raise_for_status()
|
|
575
|
+
content_type = img.headers.get("Content-Type", "image/jpeg")
|
|
576
|
+
files = {"media": ("thumb.jpg", img.content, content_type)}
|
|
577
|
+
resp = requests.post(
|
|
578
|
+
MATERIAL_ADD_URL,
|
|
579
|
+
params={"access_token": token, "type": "image"},
|
|
580
|
+
files=files,
|
|
581
|
+
timeout=self.timeout,
|
|
582
|
+
)
|
|
583
|
+
resp.raise_for_status()
|
|
584
|
+
data = resp.json()
|
|
585
|
+
if data.get("errcode", 0) != 0:
|
|
586
|
+
raise WeChatPublishError(f"upload image url failed: {data}")
|
|
587
|
+
media_id = data.get("media_id", "")
|
|
588
|
+
if not media_id:
|
|
589
|
+
raise WeChatPublishError("upload image url failed: missing media_id")
|
|
590
|
+
return media_id
|
|
591
|
+
|
|
592
|
+
def add_draft(
|
|
593
|
+
self,
|
|
594
|
+
token: str,
|
|
595
|
+
title: str,
|
|
596
|
+
author: str,
|
|
597
|
+
digest: str,
|
|
598
|
+
content_html: str,
|
|
599
|
+
source_url: str,
|
|
600
|
+
thumb_media_id: str,
|
|
601
|
+
) -> str:
|
|
602
|
+
safe_title_base = normalize_wechat_title(title)
|
|
603
|
+
safe_author = trim_utf8_bytes(author, 16)
|
|
604
|
+
safe_digest_base = decode_escaped_unicode(digest or "")
|
|
605
|
+
safe_digest_base = re.sub(r"\s+", " ", safe_digest_base).strip()
|
|
606
|
+
|
|
607
|
+
title_limits = [64, 48, 36, 28, 20, 12]
|
|
608
|
+
digest_limits = [120, 90, 60, 40, 20]
|
|
609
|
+
title_candidates: list[str] = []
|
|
610
|
+
digest_candidates: list[str] = []
|
|
611
|
+
|
|
612
|
+
for limit in title_limits:
|
|
613
|
+
candidate = safe_title_base[:limit].strip()
|
|
614
|
+
if candidate and candidate not in title_candidates:
|
|
615
|
+
title_candidates.append(candidate)
|
|
616
|
+
for limit in digest_limits:
|
|
617
|
+
candidate = safe_digest_base[:limit].strip()
|
|
618
|
+
if candidate and candidate not in digest_candidates:
|
|
619
|
+
digest_candidates.append(candidate)
|
|
620
|
+
if not digest_candidates:
|
|
621
|
+
digest_candidates = ["AI 资讯速览"]
|
|
622
|
+
|
|
623
|
+
last_error: dict[str, Any] | None = None
|
|
624
|
+
for safe_title in title_candidates or ["未命名文章"]:
|
|
625
|
+
should_shorten_title = False
|
|
626
|
+
for safe_digest in digest_candidates:
|
|
627
|
+
payload = {
|
|
628
|
+
"articles": [
|
|
629
|
+
{
|
|
630
|
+
"title": safe_title,
|
|
631
|
+
"author": safe_author,
|
|
632
|
+
"digest": safe_digest,
|
|
633
|
+
"content": content_html,
|
|
634
|
+
"content_source_url": source_url[:200],
|
|
635
|
+
"thumb_media_id": thumb_media_id,
|
|
636
|
+
"need_open_comment": 1,
|
|
637
|
+
"only_fans_can_comment": 0,
|
|
638
|
+
}
|
|
639
|
+
]
|
|
640
|
+
}
|
|
641
|
+
data = self._post_json_utf8(
|
|
642
|
+
DRAFT_ADD_URL,
|
|
643
|
+
{"access_token": token},
|
|
644
|
+
payload,
|
|
645
|
+
)
|
|
646
|
+
if data.get("errcode", 0) == 0:
|
|
647
|
+
media_id = data.get("media_id", "")
|
|
648
|
+
if not media_id:
|
|
649
|
+
raise WeChatPublishError("draft add failed: missing media_id")
|
|
650
|
+
return media_id
|
|
651
|
+
|
|
652
|
+
last_error = data
|
|
653
|
+
errcode = data.get("errcode")
|
|
654
|
+
if errcode == 45003:
|
|
655
|
+
should_shorten_title = True
|
|
656
|
+
break
|
|
657
|
+
if errcode == 45004:
|
|
658
|
+
continue
|
|
659
|
+
raise WeChatPublishError(f"draft add failed: {data}")
|
|
660
|
+
|
|
661
|
+
if should_shorten_title:
|
|
662
|
+
continue
|
|
663
|
+
|
|
664
|
+
raise WeChatPublishError(f"draft add failed: {last_error}")
|
|
665
|
+
|
|
666
|
+
def submit_publish(self, token: str, media_id: str) -> str:
|
|
667
|
+
data = self._post_json_utf8(
|
|
668
|
+
PUBLISH_SUBMIT_URL,
|
|
669
|
+
{"access_token": token},
|
|
670
|
+
{"media_id": media_id},
|
|
671
|
+
)
|
|
672
|
+
if data.get("errcode", 0) != 0:
|
|
673
|
+
raise WeChatPublishError(f"publish submit failed: {data}")
|
|
674
|
+
publish_id = data.get("publish_id", "")
|
|
675
|
+
if not publish_id:
|
|
676
|
+
raise WeChatPublishError("publish submit failed: missing publish_id")
|
|
677
|
+
return publish_id
|
|
678
|
+
|
|
679
|
+
def get_publish_status(self, token: str, publish_id: str) -> dict[str, Any]:
|
|
680
|
+
data = self._post_json_utf8(
|
|
681
|
+
PUBLISH_GET_URL,
|
|
682
|
+
{"access_token": token},
|
|
683
|
+
{"publish_id": publish_id},
|
|
684
|
+
)
|
|
685
|
+
if data.get("errcode", 0) != 0:
|
|
686
|
+
raise WeChatPublishError(f"publish get failed: {data}")
|
|
687
|
+
return data
|
|
688
|
+
|
|
689
|
+
|
|
690
|
+
def parse_args() -> argparse.Namespace:
|
|
691
|
+
default_config = Path(__file__).resolve().parent.parent / "config.json"
|
|
692
|
+
parser = argparse.ArgumentParser(description="Publish article to WeChat Official Account")
|
|
693
|
+
parser.add_argument("input", nargs="?", help="Markdown file path or article URL")
|
|
694
|
+
parser.add_argument("--config", default=str(default_config), help="Path to config.json")
|
|
695
|
+
parser.add_argument("--template", choices=["standard", "viral"], default="", help="Override template")
|
|
696
|
+
parser.add_argument("--author", default="", help="Override author")
|
|
697
|
+
parser.add_argument("--source-url", default="", help="Override source url")
|
|
698
|
+
parser.add_argument("--cover-image", default="", help="Local cover image path")
|
|
699
|
+
parser.add_argument("--timeout", type=int, default=30, help="HTTP timeout seconds")
|
|
700
|
+
parser.add_argument("--dry-run", action="store_true", help="Render only, no WeChat API calls")
|
|
701
|
+
parser.add_argument("--publish", action="store_true", help="Submit draft for publish")
|
|
702
|
+
parser.add_argument("--status", action="store_true", help="Query publish status once")
|
|
703
|
+
parser.add_argument("--install", action="store_true", help="Install Python dependencies")
|
|
704
|
+
return parser.parse_args()
|
|
705
|
+
|
|
706
|
+
|
|
707
|
+
def main() -> None:
|
|
708
|
+
args = parse_args()
|
|
709
|
+
if args.install:
|
|
710
|
+
install_dependencies()
|
|
711
|
+
print(json.dumps({"success": True, "installed": True}, ensure_ascii=False))
|
|
712
|
+
return
|
|
713
|
+
|
|
714
|
+
if not args.input:
|
|
715
|
+
raise RuntimeError("缺少输入参数:请传入 Markdown 文件路径或 URL")
|
|
716
|
+
ensure_dependencies()
|
|
717
|
+
|
|
718
|
+
cfg = load_config(Path(args.config))
|
|
719
|
+
wechat_cfg = cfg["wechat"]
|
|
720
|
+
template = (args.template or "viral").strip().lower()
|
|
721
|
+
if template not in {"standard", "viral"}:
|
|
722
|
+
template = "standard"
|
|
723
|
+
author = (args.author or wechat_cfg.get("author") or "").strip()
|
|
724
|
+
input_value = args.input.strip()
|
|
725
|
+
|
|
726
|
+
if is_url(input_value):
|
|
727
|
+
article = extract_from_url(input_value)
|
|
728
|
+
if args.source_url:
|
|
729
|
+
article.source_url = args.source_url.strip()
|
|
730
|
+
else:
|
|
731
|
+
input_path = Path(input_value).resolve()
|
|
732
|
+
if not input_path.exists():
|
|
733
|
+
raise RuntimeError(f"文件不存在: {input_path}")
|
|
734
|
+
article = extract_from_markdown(input_path, source_url_override=args.source_url.strip())
|
|
735
|
+
|
|
736
|
+
article.content = optimize_for_wechat_html(article.content, template=template)
|
|
737
|
+
|
|
738
|
+
preview_path = Path.cwd() / f"{slugify(article.title)}-wechat-preview.html"
|
|
739
|
+
preview_path.write_text(article.content, encoding="utf-8")
|
|
740
|
+
|
|
741
|
+
if args.dry_run:
|
|
742
|
+
print(
|
|
743
|
+
json.dumps(
|
|
744
|
+
{
|
|
745
|
+
"success": True,
|
|
746
|
+
"mode": "dry-run",
|
|
747
|
+
"title": article.title,
|
|
748
|
+
"digest": article.digest,
|
|
749
|
+
"template": template,
|
|
750
|
+
"preview_html": str(preview_path),
|
|
751
|
+
},
|
|
752
|
+
ensure_ascii=False,
|
|
753
|
+
)
|
|
754
|
+
)
|
|
755
|
+
return
|
|
756
|
+
|
|
757
|
+
app_id = (wechat_cfg.get("app_id") or "").strip()
|
|
758
|
+
app_secret = (wechat_cfg.get("app_secret") or "").strip()
|
|
759
|
+
if not app_id or not app_secret:
|
|
760
|
+
raise RuntimeError("配置缺少 wechat.app_id 或 wechat.app_secret")
|
|
761
|
+
|
|
762
|
+
client = WeChatClient(app_id=app_id, app_secret=app_secret, timeout=args.timeout)
|
|
763
|
+
token = client.get_token()
|
|
764
|
+
|
|
765
|
+
thumb_media_id = ""
|
|
766
|
+
auto_generate_cover = True
|
|
767
|
+
cover_image_path = Path(args.cover_image).resolve() if args.cover_image else None
|
|
768
|
+
if not thumb_media_id:
|
|
769
|
+
if cover_image_path and cover_image_path.exists():
|
|
770
|
+
thumb_media_id = client.upload_image_from_path(token, cover_image_path)
|
|
771
|
+
else:
|
|
772
|
+
generated = None
|
|
773
|
+
if auto_generate_cover:
|
|
774
|
+
generated = generate_cover_image(
|
|
775
|
+
article.title,
|
|
776
|
+
Path(__file__).resolve().parent.parent / "assets" / "generated_cover.jpg",
|
|
777
|
+
)
|
|
778
|
+
if generated and generated.exists():
|
|
779
|
+
thumb_media_id = client.upload_image_from_path(token, generated)
|
|
780
|
+
elif article.first_image_url and article.first_image_url.startswith(("http://", "https://")):
|
|
781
|
+
thumb_media_id = client.upload_image_from_url(token, article.first_image_url)
|
|
782
|
+
|
|
783
|
+
if not thumb_media_id:
|
|
784
|
+
raise RuntimeError(
|
|
785
|
+
"无法获取封面素材,请提供 --cover-image,或确保正文含可下载图片,或安装 Pillow 以自动生成封面"
|
|
786
|
+
)
|
|
787
|
+
|
|
788
|
+
draft_media_id = client.add_draft(
|
|
789
|
+
token=token,
|
|
790
|
+
title=article.title,
|
|
791
|
+
author=author,
|
|
792
|
+
digest=article.digest,
|
|
793
|
+
content_html=article.content,
|
|
794
|
+
source_url=article.source_url,
|
|
795
|
+
thumb_media_id=thumb_media_id,
|
|
796
|
+
)
|
|
797
|
+
|
|
798
|
+
result: dict[str, Any] = {
|
|
799
|
+
"success": True,
|
|
800
|
+
"title": article.title,
|
|
801
|
+
"template": template,
|
|
802
|
+
"draft_media_id": draft_media_id,
|
|
803
|
+
"preview_html": str(preview_path),
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
if args.publish:
|
|
807
|
+
publish_id = client.submit_publish(token, draft_media_id)
|
|
808
|
+
result["publish_id"] = publish_id
|
|
809
|
+
if args.status:
|
|
810
|
+
result["status"] = client.get_publish_status(token, publish_id)
|
|
811
|
+
|
|
812
|
+
print(json.dumps(result, ensure_ascii=False))
|
|
813
|
+
|
|
814
|
+
|
|
815
|
+
if __name__ == "__main__":
|
|
816
|
+
try:
|
|
817
|
+
main()
|
|
818
|
+
except Exception as exc:
|
|
819
|
+
print(
|
|
820
|
+
json.dumps(
|
|
821
|
+
{"success": False, "error": str(exc), "type": type(exc).__name__},
|
|
822
|
+
ensure_ascii=False,
|
|
823
|
+
)
|
|
824
|
+
)
|
|
825
|
+
sys.exit(1)
|