htmlgen-mcp 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of htmlgen-mcp might be problematic. Click here for more details.
- MCP/__init__.py +6 -0
- MCP/web_agent_server.py +1257 -0
- agents/__init__.py +6 -0
- agents/smart_web_agent.py +2384 -0
- agents/web_tools/__init__.py +84 -0
- agents/web_tools/bootstrap.py +49 -0
- agents/web_tools/browser.py +28 -0
- agents/web_tools/colors.py +137 -0
- agents/web_tools/css.py +1473 -0
- agents/web_tools/edgeone_deploy.py +541 -0
- agents/web_tools/html_templates.py +1770 -0
- agents/web_tools/images.py +600 -0
- agents/web_tools/images_fixed.py +195 -0
- agents/web_tools/js.py +235 -0
- agents/web_tools/navigation.py +386 -0
- agents/web_tools/project.py +34 -0
- agents/web_tools/simple_builder.py +346 -0
- agents/web_tools/simple_css.py +475 -0
- agents/web_tools/simple_js.py +454 -0
- agents/web_tools/simple_templates.py +220 -0
- agents/web_tools/validation.py +65 -0
- htmlgen_mcp-0.2.0.dist-info/METADATA +171 -0
- htmlgen_mcp-0.2.0.dist-info/RECORD +26 -0
- htmlgen_mcp-0.2.0.dist-info/WHEEL +5 -0
- htmlgen_mcp-0.2.0.dist-info/entry_points.txt +2 -0
- htmlgen_mcp-0.2.0.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
"""图片生成与注入工具"""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import html
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import re
|
|
8
|
+
import urllib.parse
|
|
9
|
+
import urllib.request
|
|
10
|
+
from collections import OrderedDict
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def fetch_generated_images(
|
|
15
|
+
project_path: str,
|
|
16
|
+
provider: str = "pollinations",
|
|
17
|
+
prompts: list | str | None = None,
|
|
18
|
+
count: int = 1,
|
|
19
|
+
size: str = "1200x800",
|
|
20
|
+
seed: str | int | None = None,
|
|
21
|
+
save: bool = False,
|
|
22
|
+
subdir: str = "assets/images",
|
|
23
|
+
prefix: str = "img",
|
|
24
|
+
):
|
|
25
|
+
"""获取生成图片(支持 Pollinations、DiceBear、RoboHash)
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
project_path: 项目根目录(用于保存到 assets/images)。
|
|
29
|
+
provider: 图片提供方:pollinations | dicebear | robohash。
|
|
30
|
+
prompts: 提示词/种子列表;字符串时支持 JSON 或逗号分隔。
|
|
31
|
+
count: 数量;当 prompts 为空时,根据 count 生成占位提示。
|
|
32
|
+
size: 像素尺寸,例如 "1200x800"。
|
|
33
|
+
seed: 基础种子;对 pollinations 可与提示组合、对 dicebear 为 seed、对 robohash 为文本的一部分。
|
|
34
|
+
save: 是否下载到本地 assets/images 目录。
|
|
35
|
+
subdir: 保存子目录,默认 assets/images。
|
|
36
|
+
prefix: 文件名前缀。
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
字符串:列出生成的图片URL与本地保存路径(如有)。
|
|
40
|
+
"""
|
|
41
|
+
try:
|
|
42
|
+
# 解析尺寸
|
|
43
|
+
try:
|
|
44
|
+
width_px, height_px = [int(x) for x in str(size).lower().replace("*", "x").split("x")[:2]]
|
|
45
|
+
except Exception:
|
|
46
|
+
width_px, height_px = 1200, 800
|
|
47
|
+
|
|
48
|
+
# 归一化 prompts
|
|
49
|
+
if prompts is None:
|
|
50
|
+
prompts = []
|
|
51
|
+
if isinstance(prompts, str):
|
|
52
|
+
s = prompts.strip()
|
|
53
|
+
if s.startswith("["):
|
|
54
|
+
try:
|
|
55
|
+
prompts = json.loads(s)
|
|
56
|
+
except Exception:
|
|
57
|
+
prompts = [p.strip() for p in s.split(",") if p.strip()]
|
|
58
|
+
else:
|
|
59
|
+
prompts = [p.strip() for p in s.split(",") if p.strip()]
|
|
60
|
+
if not isinstance(prompts, list):
|
|
61
|
+
prompts = [str(prompts)]
|
|
62
|
+
if not prompts:
|
|
63
|
+
# 默认主题占位
|
|
64
|
+
base = provider or "image"
|
|
65
|
+
prompts = [f"{base}-{i+1}" for i in range(max(1, int(count)))]
|
|
66
|
+
|
|
67
|
+
results = []
|
|
68
|
+
base_path = Path(project_path)
|
|
69
|
+
save_dir = base_path / subdir
|
|
70
|
+
if save:
|
|
71
|
+
save_dir.mkdir(parents=True, exist_ok=True)
|
|
72
|
+
|
|
73
|
+
for i, prompt in enumerate(prompts):
|
|
74
|
+
prompt_str = str(prompt)
|
|
75
|
+
item_seed = str(seed) if seed is not None else str(i + 1)
|
|
76
|
+
url = ""
|
|
77
|
+
ext = "jpg"
|
|
78
|
+
|
|
79
|
+
if provider.lower() in ("pollinations", "polls", "poll"): # 文生图
|
|
80
|
+
q = urllib.parse.quote(prompt_str)
|
|
81
|
+
url = (
|
|
82
|
+
f"https://image.pollinations.ai/prompt/{q}?nologo=true&seed={item_seed}&width={width_px}&height={height_px}"
|
|
83
|
+
)
|
|
84
|
+
ext = "jpg"
|
|
85
|
+
|
|
86
|
+
elif provider.lower() in ("dicebear", "avatar", "bottts"): # 矢量头像
|
|
87
|
+
# 采用 bottts 风格
|
|
88
|
+
seed_val = urllib.parse.quote(prompt_str or item_seed)
|
|
89
|
+
url = f"https://api.dicebear.com/7.x/bottts/svg?seed={seed_val}"
|
|
90
|
+
ext = "svg"
|
|
91
|
+
|
|
92
|
+
elif provider.lower() in ("robohash", "robo", "cats"): # PNG 头像
|
|
93
|
+
text = urllib.parse.quote(prompt_str or item_seed)
|
|
94
|
+
url = f"https://robohash.org/{text}.png?set=set4&bgset=bg1&size={width_px}x{height_px}"
|
|
95
|
+
ext = "png"
|
|
96
|
+
else:
|
|
97
|
+
raise ValueError("不支持的 provider,可用:pollinations | dicebear | robohash")
|
|
98
|
+
|
|
99
|
+
saved_path = None
|
|
100
|
+
if save:
|
|
101
|
+
try:
|
|
102
|
+
filename = f"{prefix}-{provider}-{i+1}.{ext}"
|
|
103
|
+
out_path = save_dir / filename
|
|
104
|
+
with urllib.request.urlopen(url, timeout=10) as resp:
|
|
105
|
+
data = resp.read()
|
|
106
|
+
with open(out_path, "wb") as f:
|
|
107
|
+
f.write(data)
|
|
108
|
+
saved_path = str(out_path)
|
|
109
|
+
except Exception:
|
|
110
|
+
# 网络不可用或失败时仅返回URL
|
|
111
|
+
saved_path = None
|
|
112
|
+
|
|
113
|
+
results.append({
|
|
114
|
+
"provider": provider,
|
|
115
|
+
"prompt": prompt_str,
|
|
116
|
+
"url": url,
|
|
117
|
+
"saved_path": saved_path,
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
# 组装结果
|
|
121
|
+
lines = [
|
|
122
|
+
f"图片生成完成(provider={provider}, save={'yes' if save else 'no'}): {len(results)} 张",
|
|
123
|
+
]
|
|
124
|
+
for r in results:
|
|
125
|
+
loc = r["saved_path"] or "(未保存)"
|
|
126
|
+
lines.append(f"- {r['prompt']}: {r['url']} -> {loc}")
|
|
127
|
+
return "\n".join(lines)
|
|
128
|
+
|
|
129
|
+
except Exception as e:
|
|
130
|
+
raise RuntimeError(f"获取图片失败: {str(e)}")
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def inject_images(
|
|
134
|
+
file_path: str,
|
|
135
|
+
provider: str = "pollinations",
|
|
136
|
+
topics: list | str | None = None,
|
|
137
|
+
size: str = "1200x800",
|
|
138
|
+
seed: str | int | None = None,
|
|
139
|
+
save: bool = False,
|
|
140
|
+
subdir: str = "assets/images",
|
|
141
|
+
prefix: str = "img",
|
|
142
|
+
) -> str:
|
|
143
|
+
"""将生成图片注入到现有HTML中。
|
|
144
|
+
|
|
145
|
+
行为:
|
|
146
|
+
- 识别 data-bg-topic 用于为容器(如 header.hero)设置背景图。
|
|
147
|
+
- 识别 <img data-topic> 占位并填充 src、alt、loading。
|
|
148
|
+
- 识别 <svg data-topic> 占位并整体替换为 <img ...> 注入。
|
|
149
|
+
- 若页面没有任何占位,但存在 hero/header/section(类名含 hero|banner|masthead|showcase),则使用 topics 的第一个为背景。
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
file_path: 目标HTML文件
|
|
153
|
+
provider: pollinations | dicebear | robohash
|
|
154
|
+
topics: 主题/提示词列表;若省略且页面含占位则按占位抓取
|
|
155
|
+
size: 尺寸字符串,如 1200x800
|
|
156
|
+
seed: 种子
|
|
157
|
+
save: 是否下载到本地 assets/images
|
|
158
|
+
subdir: 保存目录(相对 HTML 所在目录)
|
|
159
|
+
prefix: 保存文件名前缀
|
|
160
|
+
"""
|
|
161
|
+
try:
|
|
162
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
163
|
+
content = f.read()
|
|
164
|
+
|
|
165
|
+
# 解析尺寸
|
|
166
|
+
try:
|
|
167
|
+
width_px, height_px = [int(x) for x in str(size).lower().replace("*", "x").split("x")[:2]]
|
|
168
|
+
except Exception:
|
|
169
|
+
width_px, height_px = 1200, 800
|
|
170
|
+
|
|
171
|
+
# 收集页面占位符
|
|
172
|
+
bg_matches = list(re.finditer(r"<([a-zA-Z0-9]+)([^>]*?)\\sdata-bg-topic=\"([^\"]+)\"([^>]*)>", content))
|
|
173
|
+
img_matches = list(re.finditer(r"<img([^>]*?)\\sdata-topic=\"([^\"]+)\"([^>]*)>", content))
|
|
174
|
+
# 新增:识别 <svg data-topic> ... </svg> 占位
|
|
175
|
+
svg_matches = list(re.finditer(r"<svg([^>]*?)\\sdata-topic=\"([^\"]+)\"([^>]*)>.*?</svg>", content, re.I | re.S))
|
|
176
|
+
|
|
177
|
+
# 将 topics 统一为列表;若缺省或为 "<auto>",则基于页面文本与路径自动生成
|
|
178
|
+
page_topics = []
|
|
179
|
+
page_topics += [m.group(3).strip() for m in bg_matches]
|
|
180
|
+
page_topics += [m.group(2).strip() for m in img_matches]
|
|
181
|
+
page_topics += [m.group(2).strip() for m in svg_matches]
|
|
182
|
+
|
|
183
|
+
def _to_list(val):
|
|
184
|
+
if val is None:
|
|
185
|
+
return []
|
|
186
|
+
if isinstance(val, str):
|
|
187
|
+
s = val.strip()
|
|
188
|
+
return json.loads(s) if s.startswith("[") else [t.strip() for t in s.split(",") if t.strip()]
|
|
189
|
+
if isinstance(val, list):
|
|
190
|
+
return val
|
|
191
|
+
return [str(val)]
|
|
192
|
+
|
|
193
|
+
topics = _to_list(topics)
|
|
194
|
+
|
|
195
|
+
def _extract_text(html: str) -> str:
|
|
196
|
+
# 抽取 title/h1/h2 文本作为语义参考
|
|
197
|
+
text = []
|
|
198
|
+
for pat in [r"<title[^>]*>(.*?)</title>", r"<h1[^>]*>(.*?)</h1>", r"<h2[^>]*>(.*?)</h2>"]:
|
|
199
|
+
for m in re.finditer(pat, html, re.I | re.S):
|
|
200
|
+
t = re.sub(r"<[^>]+>", " ", m.group(1))
|
|
201
|
+
text.append(t)
|
|
202
|
+
head = " ".join(text)
|
|
203
|
+
return re.sub(r"\s+", " ", head).strip().lower()
|
|
204
|
+
|
|
205
|
+
def _guess_topics(html: str, path: str, n: int = 3) -> list:
|
|
206
|
+
base = _extract_text(html)
|
|
207
|
+
base += " " + os.path.basename(path).replace('-', ' ').lower()
|
|
208
|
+
# 关键词 → 主题基
|
|
209
|
+
mapping = [
|
|
210
|
+
(("phone", "mobile", "smartphone", "iphone", "android"), [
|
|
211
|
+
"modern smartphone store interior, depth of field, soft lighting",
|
|
212
|
+
"close-up product photo of latest smartphone on gradient background",
|
|
213
|
+
"lifestyle shot using smartphone in cafe daylight"
|
|
214
|
+
]),
|
|
215
|
+
(("cafe", "coffee", "latte", "espresso", "barista"), [
|
|
216
|
+
"coffee shop interior hero, warm light, depth of field",
|
|
217
|
+
"close-up latte art on wooden table, high detail",
|
|
218
|
+
"lifestyle shot people enjoying coffee"
|
|
219
|
+
]),
|
|
220
|
+
(("restaurant", "food", "menu", "dishes", "burger", "pizza"), [
|
|
221
|
+
"restaurant hero background, cozy interior, bokeh",
|
|
222
|
+
"close-up gourmet dish, food photography",
|
|
223
|
+
"lifestyle people dining, warm ambience"
|
|
224
|
+
]),
|
|
225
|
+
(("saas", "software", "startup", "app", "tech"), [
|
|
226
|
+
"abstract tech hero background, gradient, 3d shapes",
|
|
227
|
+
"dashboard ui on laptop close-up, product shot",
|
|
228
|
+
"team collaboration lifestyle office scene"
|
|
229
|
+
]),
|
|
230
|
+
(("education", "course", "school", "study"), [
|
|
231
|
+
"education hero background, campus or classroom light",
|
|
232
|
+
"notebook and laptop study flat lay",
|
|
233
|
+
"students studying lifestyle"
|
|
234
|
+
]),
|
|
235
|
+
]
|
|
236
|
+
for keys, topics_tpl in mapping:
|
|
237
|
+
if any(k in base for k in keys):
|
|
238
|
+
return topics_tpl[:n]
|
|
239
|
+
# 兜底:通用现代风格
|
|
240
|
+
return [
|
|
241
|
+
"modern gradient abstract background, soft shapes",
|
|
242
|
+
"product close-up on neutral background",
|
|
243
|
+
"lifestyle people using product"
|
|
244
|
+
][:n]
|
|
245
|
+
|
|
246
|
+
if not topics or topics == ["<auto>"]:
|
|
247
|
+
# 若页面已有占位主题,优先使用占位;否则基于文本猜测
|
|
248
|
+
topics = page_topics if page_topics else _guess_topics(content, file_path, 3)
|
|
249
|
+
|
|
250
|
+
# 辅助:构造 URL + 可选保存(支持直接传入外链URL)
|
|
251
|
+
def build_url(topic: str, idx: int) -> tuple[str, str | None, str]:
|
|
252
|
+
prov = provider.lower().strip()
|
|
253
|
+
topic_q = urllib.parse.quote_plus(topic)
|
|
254
|
+
if prov in ("pollinations", "pollination", "image", "ai"):
|
|
255
|
+
url = f"https://image.pollinations.ai/prompt/{topic_q}?width={width_px}&height={height_px}"
|
|
256
|
+
ext = "jpg"
|
|
257
|
+
elif prov in ("dicebear", "avatar", "bottts"):
|
|
258
|
+
q = urllib.parse.quote(topic)
|
|
259
|
+
url = f"https://api.dicebear.com/7.x/bottts/svg?seed={q}"
|
|
260
|
+
ext = "svg"
|
|
261
|
+
elif prov in ("robohash", "robo", "cats"):
|
|
262
|
+
q = urllib.parse.quote(topic)
|
|
263
|
+
url = f"https://robohash.org/{q}.png?set=set4&bgset=bg1&size={width_px}x{height_px}"
|
|
264
|
+
ext = "png"
|
|
265
|
+
else:
|
|
266
|
+
raise ValueError("不支持的 provider")
|
|
267
|
+
|
|
268
|
+
saved_path = None
|
|
269
|
+
if save:
|
|
270
|
+
try:
|
|
271
|
+
root = Path(file_path).parent
|
|
272
|
+
out_dir = root / subdir
|
|
273
|
+
out_dir.mkdir(parents=True, exist_ok=True)
|
|
274
|
+
out_file = out_dir / f"{prefix}-{idx+1}.{ext}"
|
|
275
|
+
with urllib.request.urlopen(url, timeout=10) as resp:
|
|
276
|
+
data = resp.read()
|
|
277
|
+
with open(out_file, "wb") as fo:
|
|
278
|
+
fo.write(data)
|
|
279
|
+
saved_path = str(out_file)
|
|
280
|
+
except Exception:
|
|
281
|
+
saved_path = None
|
|
282
|
+
return url, saved_path, ext
|
|
283
|
+
|
|
284
|
+
def build_placeholder(topic: str) -> str:
|
|
285
|
+
plain = re.sub(r"\s+", " ", topic).strip()
|
|
286
|
+
plain = plain[:24] if plain else "Placeholder"
|
|
287
|
+
encoded = urllib.parse.quote_plus(plain)
|
|
288
|
+
return f"https://placehold.co/{width_px}x{height_px}/EEE/31343C?text={encoded}"
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
# 背景设置:更新 style 中的 background-image
|
|
292
|
+
def set_bg(tag_html: str, url: str) -> str:
|
|
293
|
+
style_re = re.compile(r"style=\"([^\"]*)\"")
|
|
294
|
+
css = f"background-image:url('{url}');background-size:cover;background-position:center;background-repeat:no-repeat;"
|
|
295
|
+
if style_re.search(tag_html):
|
|
296
|
+
def _repl(m):
|
|
297
|
+
val = m.group(1)
|
|
298
|
+
# 去掉原有 background-image
|
|
299
|
+
val = re.sub(r"background-image\s*:[^;]*;?", "", val)
|
|
300
|
+
val = (val + (";" if val and not val.strip().endswith(";") else "") + css).strip(";")
|
|
301
|
+
return f"style=\"{val}\""
|
|
302
|
+
tag_html = style_re.sub(_repl, tag_html, count=1)
|
|
303
|
+
else:
|
|
304
|
+
tag_html = tag_html.replace(">", f" style=\"{css}\">")
|
|
305
|
+
# 标记属性,便于下次识别
|
|
306
|
+
if "data-ai-image=\"bg\"" not in tag_html:
|
|
307
|
+
tag_html = tag_html.replace(">", " data-ai-image=\"bg\">")
|
|
308
|
+
return tag_html
|
|
309
|
+
|
|
310
|
+
# img 占位设置/替换
|
|
311
|
+
def set_img(tag_html: str, url: str, topic: str, placeholder_url: str, remote_url: str | None = None) -> str:
|
|
312
|
+
# 解析原有属性
|
|
313
|
+
tag = tag_html.strip()
|
|
314
|
+
self_closing = tag.endswith('/>')
|
|
315
|
+
body_match = re.match(r'<img\s*(.*?)(/?>)$', tag_html, re.I | re.S)
|
|
316
|
+
original_attrs = body_match.group(1) if body_match else ''
|
|
317
|
+
terminator = body_match.group(2) if body_match else ('/>' if tag_html.strip().endswith('/>') else '>')
|
|
318
|
+
|
|
319
|
+
attr_pattern = re.compile(r'([a-zA-Z_:][\w:.-]*)\s*=\s*"([^"]*)"')
|
|
320
|
+
attrs = OrderedDict()
|
|
321
|
+
order = []
|
|
322
|
+
for name, value in attr_pattern.findall(original_attrs):
|
|
323
|
+
lower = name.lower()
|
|
324
|
+
if lower not in order:
|
|
325
|
+
order.append(lower)
|
|
326
|
+
attrs[lower] = [name, value]
|
|
327
|
+
|
|
328
|
+
def set_attr(name: str, value: str):
|
|
329
|
+
lower = name.lower()
|
|
330
|
+
if lower in attrs:
|
|
331
|
+
attrs[lower][1] = value
|
|
332
|
+
else:
|
|
333
|
+
attrs[lower] = [name, value]
|
|
334
|
+
order.append(lower)
|
|
335
|
+
|
|
336
|
+
def get_attr(lower_name: str) -> str | None:
|
|
337
|
+
item = attrs.get(lower_name)
|
|
338
|
+
return item[1] if item else None
|
|
339
|
+
|
|
340
|
+
set_attr('src', url)
|
|
341
|
+
set_attr('alt', topic or '网站图片')
|
|
342
|
+
set_attr('loading', 'lazy')
|
|
343
|
+
|
|
344
|
+
classes = get_attr('class') or ''
|
|
345
|
+
class_parts = [c for c in classes.split() if c]
|
|
346
|
+
if 'img-fluid' not in class_parts:
|
|
347
|
+
class_parts.append('img-fluid')
|
|
348
|
+
set_attr('class', ' '.join(class_parts))
|
|
349
|
+
|
|
350
|
+
set_attr('data-ai-image', 'img')
|
|
351
|
+
if topic:
|
|
352
|
+
set_attr('data-topic', attrs.get('data-topic', ['data-topic', topic])[1])
|
|
353
|
+
set_attr('data-placeholder', placeholder_url)
|
|
354
|
+
if remote_url:
|
|
355
|
+
set_attr('data-remote-src', remote_url)
|
|
356
|
+
escaped_placeholder = html.escape(placeholder_url, quote=True)
|
|
357
|
+
set_attr('onerror', f"this.onerror=null;this.src='{escaped_placeholder}'")
|
|
358
|
+
|
|
359
|
+
rebuilt = ['<img']
|
|
360
|
+
for lower in order:
|
|
361
|
+
name, value = attrs[lower]
|
|
362
|
+
escaped_val = html.escape(value, quote=True)
|
|
363
|
+
rebuilt.append(f" {name}={chr(34)}{escaped_val}{chr(34)}")
|
|
364
|
+
rebuilt.append(f"{terminator if terminator else '>'}")
|
|
365
|
+
return ''.join(rebuilt)
|
|
366
|
+
|
|
367
|
+
replacements = 0
|
|
368
|
+
hero_used = False
|
|
369
|
+
# 1) 背景占位处理
|
|
370
|
+
for idx, m in enumerate(bg_matches):
|
|
371
|
+
topic = m.group(3).strip()
|
|
372
|
+
url, saved, _ = build_url(topic, idx)
|
|
373
|
+
placeholder_url = build_placeholder(topic)
|
|
374
|
+
target = saved or url
|
|
375
|
+
if save and not saved:
|
|
376
|
+
target = placeholder_url
|
|
377
|
+
old = m.group(0)
|
|
378
|
+
new = set_bg(old, target)
|
|
379
|
+
if url != target:
|
|
380
|
+
escaped = html.escape(url, quote=True)
|
|
381
|
+
if 'data-remote-bg' not in new:
|
|
382
|
+
new = new.replace('>', f" data-remote-bg=\"{escaped}\">", 1)
|
|
383
|
+
content = content.replace(old, new, 1)
|
|
384
|
+
replacements += 1
|
|
385
|
+
hero_used = True
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# 2) img 占位处理
|
|
389
|
+
for jdx, m in enumerate(img_matches):
|
|
390
|
+
topic = m.group(2).strip()
|
|
391
|
+
url, saved, _ = build_url(topic, len(bg_matches) + jdx)
|
|
392
|
+
placeholder_url = build_placeholder(topic)
|
|
393
|
+
target = saved or url
|
|
394
|
+
if save and not saved:
|
|
395
|
+
target = placeholder_url
|
|
396
|
+
old = m.group(0)
|
|
397
|
+
new = set_img(old, target, topic, placeholder_url, url)
|
|
398
|
+
content = content.replace(old, new, 1)
|
|
399
|
+
replacements += 1
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
# 2b) svg 占位处理:整体替换为 <img ...>
|
|
403
|
+
if svg_matches:
|
|
404
|
+
offset = 0
|
|
405
|
+
for sdx, m in enumerate(svg_matches):
|
|
406
|
+
topic = m.group(2).strip()
|
|
407
|
+
url, saved, _ = build_url(topic, len(bg_matches) + len(img_matches) + sdx)
|
|
408
|
+
placeholder_url = build_placeholder(topic)
|
|
409
|
+
target = saved or url
|
|
410
|
+
if save and not saved:
|
|
411
|
+
target = placeholder_url
|
|
412
|
+
img_tag = set_img(
|
|
413
|
+
f"<img data-topic=\"{topic}\" class=\"img-fluid mb-3 rounded shadow-sm\" />",
|
|
414
|
+
target,
|
|
415
|
+
topic,
|
|
416
|
+
placeholder_url,
|
|
417
|
+
url,
|
|
418
|
+
)
|
|
419
|
+
old = m.group(0)
|
|
420
|
+
start_pos = m.start() + offset
|
|
421
|
+
end_pos = m.end() + offset
|
|
422
|
+
content = content[:start_pos] + img_tag + content[end_pos:]
|
|
423
|
+
offset += len(img_tag) - (end_pos - start_pos)
|
|
424
|
+
replacements += 1
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
# 3) 无占位但存在 hero/header:使用 topics 第一个设置背景
|
|
428
|
+
if replacements == 0 and topics:
|
|
429
|
+
# 扩展:支持 header/section,且类名包含 hero|banner|masthead|showcase 之一
|
|
430
|
+
m = re.search(r"<(header|section)[^>]*(id=\"home\"|class=\"[^\"]*(hero|banner|masthead|showcase)[^\"]*\")[^>]*>", content, re.I)
|
|
431
|
+
if m:
|
|
432
|
+
topic = topics[0]
|
|
433
|
+
url, saved, _ = build_url(topic, 0)
|
|
434
|
+
placeholder_url = build_placeholder(topic)
|
|
435
|
+
target = saved or url
|
|
436
|
+
if save and not saved:
|
|
437
|
+
target = placeholder_url
|
|
438
|
+
old = m.group(0)
|
|
439
|
+
new = set_bg(old, target)
|
|
440
|
+
if url != target:
|
|
441
|
+
escaped = html.escape(url, quote=True)
|
|
442
|
+
if 'data-remote-bg' not in new:
|
|
443
|
+
new = new.replace('>', f" data-remote-bg=\"{escaped}\">", 1)
|
|
444
|
+
if 'data-bg-topic="' not in new:
|
|
445
|
+
new = new.replace('>', f' data-bg-topic="{topic}">', 1)
|
|
446
|
+
content = content.replace(old, new, 1)
|
|
447
|
+
replacements += 1
|
|
448
|
+
hero_used = True
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
# 4) 若页面没有 <img data-topic> 占位,但存在卡片容器,则自动插入到卡片顶部
|
|
452
|
+
# 使用 topics(或页面收集的),按顺序填充
|
|
453
|
+
if not img_matches:
|
|
454
|
+
# 扩展:识别更多容器与类名(div/section/article + feature-card|card|product-card|device-card|item-card|phone-card)
|
|
455
|
+
wrappers = list(re.finditer(r"(<(div|section|article)[^>]*class=\"[^\"]*(feature-card|card|product-card|device-card|item-card|phone-card)[^\"]*\"[^>]*>)", content, re.I))
|
|
456
|
+
if wrappers:
|
|
457
|
+
start_idx = 1 if hero_used else 0
|
|
458
|
+
# 合并 topics & 默认占位
|
|
459
|
+
fill_topics = topics[:] if topics else []
|
|
460
|
+
# 逐个卡片插入
|
|
461
|
+
inserted = 0
|
|
462
|
+
offset = 0
|
|
463
|
+
for wrapper_idx, wrapper_match in enumerate(wrappers):
|
|
464
|
+
class_match = re.search(r'class=\"([^\"]+)\"', wrapper_match.group(0), re.I)
|
|
465
|
+
if class_match:
|
|
466
|
+
tokens = {tok.strip().lower() for tok in class_match.group(1).split() if tok.strip()}
|
|
467
|
+
if tokens & {"card-body", "card-text", "card-footer", "card-header"}:
|
|
468
|
+
continue
|
|
469
|
+
if not tokens & {"card", "feature-card", "product-card", "device-card", "item-card", "phone-card"}:
|
|
470
|
+
continue
|
|
471
|
+
# 查看该卡片块内是否已有 <img>(向后检查最多1200字符)
|
|
472
|
+
look_ahead = content[wrapper_match.end()+offset:wrapper_match.end()+offset+1200]
|
|
473
|
+
if re.search(r"<img", look_ahead, re.I):
|
|
474
|
+
continue
|
|
475
|
+
if "${" in look_ahead:
|
|
476
|
+
continue
|
|
477
|
+
# 选择主题
|
|
478
|
+
t_index = start_idx + inserted
|
|
479
|
+
topic = (fill_topics[t_index] if t_index < len(fill_topics) else f"image-{wrapper_idx+1}")
|
|
480
|
+
url, saved, _ = build_url(topic, t_index)
|
|
481
|
+
placeholder_url = build_placeholder(topic)
|
|
482
|
+
target = saved or url
|
|
483
|
+
if save and not saved:
|
|
484
|
+
target = placeholder_url
|
|
485
|
+
img_tag = set_img(
|
|
486
|
+
f"<img data-topic=\"{topic}\" class=\"img-fluid mb-3 rounded shadow-sm\" />",
|
|
487
|
+
target,
|
|
488
|
+
topic,
|
|
489
|
+
placeholder_url,
|
|
490
|
+
url,
|
|
491
|
+
)
|
|
492
|
+
insert_pos = wrapper_match.end() + offset
|
|
493
|
+
injection = "\n " + img_tag
|
|
494
|
+
content = content[:insert_pos] + injection + content[insert_pos:]
|
|
495
|
+
offset += len(injection)
|
|
496
|
+
inserted += 1
|
|
497
|
+
replacements += 1
|
|
498
|
+
|
|
499
|
+
# 额外兜底:总是尝试 product-grid(若存在且内部尚无 <img>)
|
|
500
|
+
grid_m = re.search(r"(<div[^>]*(?:id=(?:\"|')product-grid(?:\"|')|class=(?:\"|')[^\"']*\bproduct-grid\b[^\"']*(?:\"|'))[^>]*>)", content, re.I)
|
|
501
|
+
if grid_m and (topics or page_topics):
|
|
502
|
+
# 仅当容器内部尚无 <img> 时插入(向前窥视一段内容判断)
|
|
503
|
+
peek = content[grid_m.end():grid_m.end()+1200]
|
|
504
|
+
if not re.search(r"<img\b", peek, re.I):
|
|
505
|
+
start_idx = 1 if hero_used else 0
|
|
506
|
+
fill_topics = topics[:] if topics else page_topics[:]
|
|
507
|
+
|
|
508
|
+
def detect_brand(t: str) -> str:
|
|
509
|
+
s = t.lower()
|
|
510
|
+
for key in [
|
|
511
|
+
("apple", "apple"), ("iphone", "apple"),
|
|
512
|
+
("samsung", "samsung"), ("galaxy", "samsung"),
|
|
513
|
+
("google", "google"), ("pixel", "google"),
|
|
514
|
+
("xiaomi", "xiaomi"), ("mi ", "xiaomi"), ("redmi", "xiaomi"),
|
|
515
|
+
("huawei", "huawei"), ("mate", "huawei"), ("p60", "huawei"),
|
|
516
|
+
("oneplus", "oneplus"), ("oppo", "oppo"), ("vivo", "vivo"), ("honor", "honor")
|
|
517
|
+
]:
|
|
518
|
+
if key[0] in s:
|
|
519
|
+
return key[1]
|
|
520
|
+
return "other"
|
|
521
|
+
|
|
522
|
+
price_table = [4699, 5199, 5799, 6299, 6899, 7399, 7999, 8599, 9199]
|
|
523
|
+
|
|
524
|
+
cards = []
|
|
525
|
+
for i, topic in enumerate(fill_topics[start_idx:], start=start_idx):
|
|
526
|
+
url, saved, _ = build_url(topic, i)
|
|
527
|
+
target = saved or url
|
|
528
|
+
brand = detect_brand(topic)
|
|
529
|
+
price = price_table[(i - start_idx) % len(price_table)]
|
|
530
|
+
title = topic.split(",")[0][:80]
|
|
531
|
+
card_html = (
|
|
532
|
+
f"\n <div class=\"card\" data-brand=\"{brand}\" data-price=\"{price}\">\n"
|
|
533
|
+
f" <img data-ai-image=\"img\" data-topic=\"{topic}\" class=\"img-fluid mb-3 rounded shadow-sm\" loading=\"lazy\" alt=\"{topic}\" src=\"{target}\">\n"
|
|
534
|
+
f" <h3 style=\"font-size:1.125rem; margin-bottom:0.5rem;\">{title}</h3>\n"
|
|
535
|
+
f" <div style=\"display:flex; align-items:center; justify-content:space-between;\">\n"
|
|
536
|
+
f" <span style=\"font-weight:700; color:#111827;\">¥{price}</span>\n"
|
|
537
|
+
f" <a href=\"#\" class=\"btn btn-primary\">立即购买</a>\n"
|
|
538
|
+
f" </div>\n"
|
|
539
|
+
f" </div>"
|
|
540
|
+
)
|
|
541
|
+
cards.append(card_html)
|
|
542
|
+
replacements += 1
|
|
543
|
+
|
|
544
|
+
cards_html = "\n".join(cards) + "\n"
|
|
545
|
+
insert_pos = grid_m.end(1)
|
|
546
|
+
content = content[:insert_pos] + cards_html + content[insert_pos:]
|
|
547
|
+
|
|
548
|
+
# 5) 清理指向不存在本地文件的图片,自动切换为在线占位
|
|
549
|
+
missing_pattern = re.compile(r"<img([^>]*?)src=\"([^\"]+)\"([^>]*)>", re.I | re.S)
|
|
550
|
+
search_offset = 0
|
|
551
|
+
missing_counter = 0
|
|
552
|
+
base_dir = Path(file_path).parent
|
|
553
|
+
|
|
554
|
+
while True:
|
|
555
|
+
m = missing_pattern.search(content, search_offset)
|
|
556
|
+
if not m:
|
|
557
|
+
break
|
|
558
|
+
before, src, after = m.groups()
|
|
559
|
+
src_stripped = src.strip()
|
|
560
|
+
lower_src = src_stripped.lower()
|
|
561
|
+
if lower_src.startswith(('http://', 'https://', 'data:')):
|
|
562
|
+
search_offset = m.end()
|
|
563
|
+
continue
|
|
564
|
+
|
|
565
|
+
candidate_path = (base_dir / src_stripped).resolve()
|
|
566
|
+
if candidate_path.exists():
|
|
567
|
+
search_offset = m.end()
|
|
568
|
+
continue
|
|
569
|
+
|
|
570
|
+
attr_pattern_local = re.compile(r'([a-zA-Z_:][\w:.-]*)\s*=\s*"([^"]*)"')
|
|
571
|
+
attrs_local = {name.lower(): value for name, value in attr_pattern_local.findall(before + after)}
|
|
572
|
+
topic_source = attrs_local.get('data-topic') or attrs_local.get('alt') or Path(src_stripped).stem or 'image'
|
|
573
|
+
topic_source = re.sub(r"\s+", " ", topic_source).strip()
|
|
574
|
+
topic = topic_source or "image"
|
|
575
|
+
|
|
576
|
+
inject_idx = len(bg_matches) + len(img_matches) + len(svg_matches) + missing_counter
|
|
577
|
+
url, saved, _ = build_url(topic, inject_idx)
|
|
578
|
+
placeholder_url = build_placeholder(topic)
|
|
579
|
+
target = saved or url
|
|
580
|
+
if save and not saved:
|
|
581
|
+
target = placeholder_url
|
|
582
|
+
|
|
583
|
+
original_tag = m.group(0)
|
|
584
|
+
new_tag = set_img(original_tag, target, topic, placeholder_url, url)
|
|
585
|
+
content = content[:m.start()] + new_tag + content[m.end():]
|
|
586
|
+
search_offset = m.start() + len(new_tag)
|
|
587
|
+
replacements += 1
|
|
588
|
+
missing_counter += 1
|
|
589
|
+
|
|
590
|
+
with open(file_path, "w", encoding="utf-8") as f:
|
|
591
|
+
f.write(content)
|
|
592
|
+
|
|
593
|
+
return f"已注入图片: {replacements} 处(provider={provider}, save={'yes' if save else 'no'})"
|
|
594
|
+
|
|
595
|
+
except Exception as e:
|
|
596
|
+
raise RuntimeError(f"注入图片失败: {str(e)}")
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
|
|
600
|
+
__all__ = ["fetch_generated_images", "inject_images"]
|