htmlgen-mcp 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of htmlgen-mcp might be problematic. Click here for more details.
- htmlgen_mcp/context_aware_executor.py +262 -0
- htmlgen_mcp/improved_progress.py +392 -0
- htmlgen_mcp/nas_log_manager.py +308 -0
- htmlgen_mcp/prompt_enhancer.py +240 -0
- htmlgen_mcp/web_agent_server.py +52 -27
- {htmlgen_mcp-0.3.4.dist-info → htmlgen_mcp-0.3.6.dist-info}/METADATA +1 -1
- {htmlgen_mcp-0.3.4.dist-info → htmlgen_mcp-0.3.6.dist-info}/RECORD +10 -6
- {htmlgen_mcp-0.3.4.dist-info → htmlgen_mcp-0.3.6.dist-info}/WHEEL +0 -0
- {htmlgen_mcp-0.3.4.dist-info → htmlgen_mcp-0.3.6.dist-info}/entry_points.txt +0 -0
- {htmlgen_mcp-0.3.4.dist-info → htmlgen_mcp-0.3.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""简化的 NAS 日志管理器 - 直接在 NAS 上读写日志"""
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Optional, Any
|
|
6
|
+
import time
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class NASLogManager:
|
|
11
|
+
"""NAS 日志管理器 - 所有操作直接在 NAS 上进行"""
|
|
12
|
+
|
|
13
|
+
def __init__(self, nas_base_path: str = "/app/mcp-servers/mcp-servers/html_agent"):
|
|
14
|
+
self.nas_base = Path(nas_base_path)
|
|
15
|
+
|
|
16
|
+
# 日志存储目录
|
|
17
|
+
self.logs_dir = self.nas_base / "mcp_data" / "make_web" / "progress_logs"
|
|
18
|
+
self.jobs_dir = self.nas_base / "mcp_data" / "make_web" / "jobs"
|
|
19
|
+
self.index_dir = self.nas_base / "mcp_data" / "make_web" / "job_index"
|
|
20
|
+
|
|
21
|
+
# 创建目录
|
|
22
|
+
for dir_path in [self.logs_dir, self.jobs_dir, self.index_dir]:
|
|
23
|
+
dir_path.mkdir(parents=True, exist_ok=True)
|
|
24
|
+
|
|
25
|
+
def create_job_log(self, job_id: str, plan_id: Optional[str] = None) -> str:
|
|
26
|
+
"""
|
|
27
|
+
创建任务日志文件
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
job_id: 任务 ID
|
|
31
|
+
plan_id: 计划 ID(可选)
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
日志文件路径
|
|
35
|
+
"""
|
|
36
|
+
# 日志文件路径
|
|
37
|
+
log_file = self.logs_dir / f"{job_id}.jsonl"
|
|
38
|
+
|
|
39
|
+
# 创建索引文件(用于快速查找)
|
|
40
|
+
index_data = {
|
|
41
|
+
"job_id": job_id,
|
|
42
|
+
"plan_id": plan_id,
|
|
43
|
+
"log_file": str(log_file),
|
|
44
|
+
"created_at": datetime.now().isoformat(),
|
|
45
|
+
"node_id": os.environ.get("NODE_ID", "unknown")
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
# 保存 job_id 索引
|
|
49
|
+
job_index_file = self.index_dir / f"{job_id}.json"
|
|
50
|
+
with open(job_index_file, 'w', encoding='utf-8') as f:
|
|
51
|
+
json.dump(index_data, f, ensure_ascii=False, indent=2)
|
|
52
|
+
|
|
53
|
+
# 如果有 plan_id,也创建索引
|
|
54
|
+
if plan_id:
|
|
55
|
+
plan_index_file = self.index_dir / f"{plan_id}.json"
|
|
56
|
+
with open(plan_index_file, 'w', encoding='utf-8') as f:
|
|
57
|
+
json.dump(index_data, f, ensure_ascii=False, indent=2)
|
|
58
|
+
|
|
59
|
+
# 初始化日志文件
|
|
60
|
+
if not log_file.exists():
|
|
61
|
+
with open(log_file, 'w', encoding='utf-8') as f:
|
|
62
|
+
init_event = {
|
|
63
|
+
"timestamp": time.time(),
|
|
64
|
+
"event": "job_created",
|
|
65
|
+
"job_id": job_id,
|
|
66
|
+
"plan_id": plan_id,
|
|
67
|
+
"created_at": datetime.now().isoformat()
|
|
68
|
+
}
|
|
69
|
+
f.write(json.dumps(init_event, ensure_ascii=False) + '\n')
|
|
70
|
+
|
|
71
|
+
return str(log_file)
|
|
72
|
+
|
|
73
|
+
def find_log_file(self, identifier: str) -> Optional[str]:
|
|
74
|
+
"""
|
|
75
|
+
查找日志文件路径(支持 job_id 和 plan_id)
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
identifier: job_id 或 plan_id
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
日志文件路径,如果找不到返回 None
|
|
82
|
+
"""
|
|
83
|
+
# 方法1: 检查索引文件
|
|
84
|
+
index_file = self.index_dir / f"{identifier}.json"
|
|
85
|
+
if index_file.exists():
|
|
86
|
+
try:
|
|
87
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
88
|
+
index_data = json.load(f)
|
|
89
|
+
log_file = index_data.get("log_file")
|
|
90
|
+
if log_file and Path(log_file).exists():
|
|
91
|
+
return log_file
|
|
92
|
+
except Exception:
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
# 方法2: 直接检查是否是 job_id
|
|
96
|
+
direct_log = self.logs_dir / f"{identifier}.jsonl"
|
|
97
|
+
if direct_log.exists():
|
|
98
|
+
return str(direct_log)
|
|
99
|
+
|
|
100
|
+
# 方法3: 扫描所有索引文件查找 plan_id
|
|
101
|
+
for index_file in self.index_dir.glob("*.json"):
|
|
102
|
+
try:
|
|
103
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
104
|
+
index_data = json.load(f)
|
|
105
|
+
if (index_data.get("job_id") == identifier or
|
|
106
|
+
index_data.get("plan_id") == identifier):
|
|
107
|
+
log_file = index_data.get("log_file")
|
|
108
|
+
if log_file and Path(log_file).exists():
|
|
109
|
+
return log_file
|
|
110
|
+
except Exception:
|
|
111
|
+
continue
|
|
112
|
+
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
def write_progress(self, identifier: str, event: Dict[str, Any]) -> bool:
|
|
116
|
+
"""
|
|
117
|
+
写入进度事件
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
identifier: job_id 或 plan_id
|
|
121
|
+
event: 进度事件
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
是否写入成功
|
|
125
|
+
"""
|
|
126
|
+
log_file_path = self.find_log_file(identifier)
|
|
127
|
+
|
|
128
|
+
# 如果找不到日志文件,尝试创建
|
|
129
|
+
if not log_file_path:
|
|
130
|
+
# 假设 identifier 是 job_id
|
|
131
|
+
log_file_path = self.create_job_log(identifier)
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
# 添加时间戳
|
|
135
|
+
if "timestamp" not in event:
|
|
136
|
+
event["timestamp"] = time.time()
|
|
137
|
+
|
|
138
|
+
# 追加写入日志
|
|
139
|
+
with open(log_file_path, 'a', encoding='utf-8') as f:
|
|
140
|
+
f.write(json.dumps(event, ensure_ascii=False) + '\n')
|
|
141
|
+
f.flush() # 立即刷新到 NAS
|
|
142
|
+
|
|
143
|
+
return True
|
|
144
|
+
|
|
145
|
+
except Exception as e:
|
|
146
|
+
print(f"写入进度失败 {identifier}: {e}")
|
|
147
|
+
return False
|
|
148
|
+
|
|
149
|
+
def read_progress(self, identifier: str, limit: int = 100) -> List[Dict[str, Any]]:
|
|
150
|
+
"""
|
|
151
|
+
读取进度事件
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
identifier: job_id 或 plan_id
|
|
155
|
+
limit: 返回事件数量限制
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
进度事件列表
|
|
159
|
+
"""
|
|
160
|
+
log_file_path = self.find_log_file(identifier)
|
|
161
|
+
if not log_file_path:
|
|
162
|
+
return []
|
|
163
|
+
|
|
164
|
+
events = []
|
|
165
|
+
try:
|
|
166
|
+
with open(log_file_path, 'r', encoding='utf-8') as f:
|
|
167
|
+
lines = f.readlines()
|
|
168
|
+
|
|
169
|
+
# 从最新的开始读取
|
|
170
|
+
for line in reversed(lines[-limit:]):
|
|
171
|
+
if line.strip():
|
|
172
|
+
try:
|
|
173
|
+
event = json.loads(line)
|
|
174
|
+
events.insert(0, event) # 保持时间顺序
|
|
175
|
+
except json.JSONDecodeError:
|
|
176
|
+
continue
|
|
177
|
+
|
|
178
|
+
except Exception as e:
|
|
179
|
+
print(f"读取进度失败 {identifier}: {e}")
|
|
180
|
+
|
|
181
|
+
return events
|
|
182
|
+
|
|
183
|
+
def get_all_jobs(self) -> List[Dict[str, Any]]:
|
|
184
|
+
"""
|
|
185
|
+
获取所有任务列表
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
任务列表
|
|
189
|
+
"""
|
|
190
|
+
jobs = []
|
|
191
|
+
|
|
192
|
+
# 扫描所有索引文件
|
|
193
|
+
for index_file in self.index_dir.glob("*.json"):
|
|
194
|
+
try:
|
|
195
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
196
|
+
index_data = json.load(f)
|
|
197
|
+
# 只添加 job_id 的记录,避免重复
|
|
198
|
+
if index_data.get("job_id") == index_file.stem:
|
|
199
|
+
jobs.append(index_data)
|
|
200
|
+
except Exception:
|
|
201
|
+
continue
|
|
202
|
+
|
|
203
|
+
# 按创建时间排序
|
|
204
|
+
jobs.sort(key=lambda x: x.get("created_at", ""), reverse=True)
|
|
205
|
+
|
|
206
|
+
return jobs
|
|
207
|
+
|
|
208
|
+
def job_exists(self, identifier: str) -> bool:
|
|
209
|
+
"""
|
|
210
|
+
检查任务是否存在
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
identifier: job_id 或 plan_id
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
任务是否存在
|
|
217
|
+
"""
|
|
218
|
+
return self.find_log_file(identifier) is not None
|
|
219
|
+
|
|
220
|
+
def get_job_summary(self, identifier: str) -> Optional[Dict[str, Any]]:
|
|
221
|
+
"""
|
|
222
|
+
获取任务摘要信息
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
identifier: job_id 或 plan_id
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
任务摘要
|
|
229
|
+
"""
|
|
230
|
+
log_file_path = self.find_log_file(identifier)
|
|
231
|
+
if not log_file_path:
|
|
232
|
+
return None
|
|
233
|
+
|
|
234
|
+
try:
|
|
235
|
+
# 读取最近的几个事件
|
|
236
|
+
events = self.read_progress(identifier, limit=10)
|
|
237
|
+
if not events:
|
|
238
|
+
return None
|
|
239
|
+
|
|
240
|
+
# 获取最新事件
|
|
241
|
+
latest_event = events[-1] if events else {}
|
|
242
|
+
first_event = events[0] if events else {}
|
|
243
|
+
|
|
244
|
+
summary = {
|
|
245
|
+
"identifier": identifier,
|
|
246
|
+
"log_file": log_file_path,
|
|
247
|
+
"total_events": len(events),
|
|
248
|
+
"first_event_time": first_event.get("timestamp"),
|
|
249
|
+
"latest_event_time": latest_event.get("timestamp"),
|
|
250
|
+
"latest_status": latest_event.get("status", "unknown"),
|
|
251
|
+
"latest_message": latest_event.get("message", ""),
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
# 尝试获取索引信息
|
|
255
|
+
index_file = self.index_dir / f"{identifier}.json"
|
|
256
|
+
if index_file.exists():
|
|
257
|
+
try:
|
|
258
|
+
with open(index_file, 'r', encoding='utf-8') as f:
|
|
259
|
+
index_data = json.load(f)
|
|
260
|
+
summary.update({
|
|
261
|
+
"job_id": index_data.get("job_id"),
|
|
262
|
+
"plan_id": index_data.get("plan_id"),
|
|
263
|
+
"created_at": index_data.get("created_at"),
|
|
264
|
+
"node_id": index_data.get("node_id")
|
|
265
|
+
})
|
|
266
|
+
except Exception:
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
return summary
|
|
270
|
+
|
|
271
|
+
except Exception as e:
|
|
272
|
+
print(f"获取任务摘要失败 {identifier}: {e}")
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
# 全局实例
|
|
277
|
+
_nas_log_manager: Optional[NASLogManager] = None
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def get_nas_log_manager() -> NASLogManager:
|
|
281
|
+
"""获取 NAS 日志管理器实例(单例)"""
|
|
282
|
+
global _nas_log_manager
|
|
283
|
+
if _nas_log_manager is None:
|
|
284
|
+
nas_path = os.environ.get(
|
|
285
|
+
"NAS_STORAGE_PATH",
|
|
286
|
+
"/app/mcp-servers/mcp-servers/html_agent"
|
|
287
|
+
)
|
|
288
|
+
_nas_log_manager = NASLogManager(nas_path)
|
|
289
|
+
return _nas_log_manager
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
# 便捷函数
|
|
293
|
+
def log_progress(job_id: str, **kwargs):
|
|
294
|
+
"""记录进度的便捷函数"""
|
|
295
|
+
manager = get_nas_log_manager()
|
|
296
|
+
return manager.write_progress(job_id, kwargs)
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
def query_progress(identifier: str, limit: int = 20):
|
|
300
|
+
"""查询进度的便捷函数"""
|
|
301
|
+
manager = get_nas_log_manager()
|
|
302
|
+
return manager.read_progress(identifier, limit)
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def ensure_job_log(job_id: str, plan_id: Optional[str] = None):
|
|
306
|
+
"""确保任务日志存在的便捷函数"""
|
|
307
|
+
manager = get_nas_log_manager()
|
|
308
|
+
return manager.create_job_log(job_id, plan_id)
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"""提示词增强模块 - 确保AI使用真实数据而非生成示例"""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class PromptEnhancer:
|
|
5
|
+
"""提示词增强器,用于改进AI对真实数据的处理"""
|
|
6
|
+
|
|
7
|
+
@staticmethod
|
|
8
|
+
def enhance_for_real_data(base_prompt: str, context_data: str = None) -> str:
|
|
9
|
+
"""
|
|
10
|
+
增强提示词,确保AI使用真实数据
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
base_prompt: 基础提示词
|
|
14
|
+
context_data: 上下文数据内容
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
增强后的提示词
|
|
18
|
+
"""
|
|
19
|
+
if not context_data:
|
|
20
|
+
return base_prompt
|
|
21
|
+
|
|
22
|
+
# 分析数据类型
|
|
23
|
+
data_type = PromptEnhancer._analyze_data_type(context_data)
|
|
24
|
+
|
|
25
|
+
# 根据数据类型生成特定的指令
|
|
26
|
+
specific_instructions = PromptEnhancer._get_specific_instructions(data_type, context_data)
|
|
27
|
+
|
|
28
|
+
enhanced = f"""
|
|
29
|
+
{base_prompt}
|
|
30
|
+
|
|
31
|
+
【🔴 极其重要的数据使用规则 🔴】
|
|
32
|
+
=====================================
|
|
33
|
+
以下是必须严格遵守的数据使用规则:
|
|
34
|
+
|
|
35
|
+
1. 【数据来源】下面提供的是真实的业务数据,不是示例或模板
|
|
36
|
+
2. 【使用要求】必须100%使用这些数据,不得修改、省略或虚构
|
|
37
|
+
3. 【禁止行为】严禁生成以下虚构内容:
|
|
38
|
+
- ❌ 虚构的客户评价(如"设计质感与转化率提升明显")
|
|
39
|
+
- ❌ 虚构的定价方案(如"¥9,999起步套餐")
|
|
40
|
+
- ❌ 虚构的服务内容(如"品牌升级与重构")
|
|
41
|
+
- ❌ 占位符内容(如"Lorem ipsum"或"示例文本")
|
|
42
|
+
|
|
43
|
+
4. 【正确做法】:
|
|
44
|
+
- ✅ 完整展示所有提供的数据项
|
|
45
|
+
- ✅ 保持数据的原始格式和内容
|
|
46
|
+
- ✅ 使用合适的布局展示(卡片、列表、表格等)
|
|
47
|
+
- ✅ 可以添加导航、样式,但内容必须是提供的真实数据
|
|
48
|
+
|
|
49
|
+
【必须使用的真实数据】
|
|
50
|
+
=====================================
|
|
51
|
+
{context_data}
|
|
52
|
+
=====================================
|
|
53
|
+
|
|
54
|
+
{specific_instructions}
|
|
55
|
+
|
|
56
|
+
【工具调用要求】
|
|
57
|
+
=====================================
|
|
58
|
+
在调用以下工具时,必须包含真实数据:
|
|
59
|
+
- create_html_file: content参数必须包含上述真实数据
|
|
60
|
+
- add_content_section: 必须使用真实数据填充内容
|
|
61
|
+
- create_text_content: 文本内容必须来自上述数据
|
|
62
|
+
- add_hero_section: 标题和描述要反映真实业务
|
|
63
|
+
- create_card_grid: 卡片内容必须是真实数据项
|
|
64
|
+
|
|
65
|
+
【验证要求】
|
|
66
|
+
=====================================
|
|
67
|
+
生成的每个HTML文件都必须包含:
|
|
68
|
+
1. 完整的数据列表(不得遗漏任何一项)
|
|
69
|
+
2. 准确的名称和地址信息
|
|
70
|
+
3. 正确的数据展示格式
|
|
71
|
+
|
|
72
|
+
记住:这是一个数据展示任务,不是创意写作任务!
|
|
73
|
+
"""
|
|
74
|
+
return enhanced
|
|
75
|
+
|
|
76
|
+
@staticmethod
|
|
77
|
+
def _analyze_data_type(context_data: str) -> str:
|
|
78
|
+
"""
|
|
79
|
+
分析数据类型
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
context_data: 上下文数据
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
数据类型标识
|
|
86
|
+
"""
|
|
87
|
+
lower_data = context_data.lower()
|
|
88
|
+
|
|
89
|
+
# 检测不同类型的数据
|
|
90
|
+
if "咖啡" in context_data or "coffee" in lower_data or "店" in context_data:
|
|
91
|
+
if "地址" in context_data or "address" in lower_data:
|
|
92
|
+
return "store_list"
|
|
93
|
+
|
|
94
|
+
if "产品" in context_data or "product" in lower_data:
|
|
95
|
+
return "product_list"
|
|
96
|
+
|
|
97
|
+
if "菜单" in context_data or "menu" in lower_data:
|
|
98
|
+
return "menu_list"
|
|
99
|
+
|
|
100
|
+
if "价格" in context_data or "price" in lower_data:
|
|
101
|
+
return "pricing_list"
|
|
102
|
+
|
|
103
|
+
if "联系" in context_data or "contact" in lower_data:
|
|
104
|
+
return "contact_info"
|
|
105
|
+
|
|
106
|
+
return "general_list"
|
|
107
|
+
|
|
108
|
+
@staticmethod
|
|
109
|
+
def _get_specific_instructions(data_type: str, context_data: str) -> str:
|
|
110
|
+
"""
|
|
111
|
+
根据数据类型生成特定指令
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
data_type: 数据类型
|
|
115
|
+
context_data: 上下文数据
|
|
116
|
+
|
|
117
|
+
Returns:
|
|
118
|
+
特定的指令
|
|
119
|
+
"""
|
|
120
|
+
# 计算数据项数量
|
|
121
|
+
item_count = context_data.count('\n1.') + context_data.count('\n2.') + \
|
|
122
|
+
context_data.count('\n3.') + context_data.count('\n-')
|
|
123
|
+
|
|
124
|
+
if data_type == "store_list":
|
|
125
|
+
return f"""
|
|
126
|
+
【针对店铺列表的特定要求】
|
|
127
|
+
- 必须展示所有{item_count}个店铺
|
|
128
|
+
- 每个店铺必须包含:名称、地址
|
|
129
|
+
- 使用卡片布局,每行2-3个
|
|
130
|
+
- 可以添加地图链接按钮
|
|
131
|
+
- 可以按区域或品牌分组展示
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
elif data_type == "product_list":
|
|
135
|
+
return f"""
|
|
136
|
+
【针对产品列表的特定要求】
|
|
137
|
+
- 必须展示所有产品信息
|
|
138
|
+
- 保持原始的产品名称和描述
|
|
139
|
+
- 使用产品卡片或展示网格
|
|
140
|
+
- 可以添加产品图片占位符
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
elif data_type == "menu_list":
|
|
144
|
+
return f"""
|
|
145
|
+
【针对菜单的特定要求】
|
|
146
|
+
- 必须展示完整菜单
|
|
147
|
+
- 保持原始的菜品名称和价格
|
|
148
|
+
- 可以按类别分组
|
|
149
|
+
- 使用清晰的表格或列表格式
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
else:
|
|
153
|
+
return f"""
|
|
154
|
+
【通用数据展示要求】
|
|
155
|
+
- 必须展示所有数据项(共约{item_count}项)
|
|
156
|
+
- 保持数据的原始格式
|
|
157
|
+
- 使用适合的布局展示
|
|
158
|
+
- 不得添加虚构内容
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
@staticmethod
|
|
162
|
+
def validate_content_usage(generated_content: str, original_data: str) -> dict:
|
|
163
|
+
"""
|
|
164
|
+
验证生成的内容是否正确使用了原始数据
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
generated_content: 生成的内容
|
|
168
|
+
original_data: 原始数据
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
验证结果
|
|
172
|
+
"""
|
|
173
|
+
# 提取原始数据中的关键项
|
|
174
|
+
key_items = []
|
|
175
|
+
lines = original_data.split('\n')
|
|
176
|
+
for line in lines:
|
|
177
|
+
line = line.strip()
|
|
178
|
+
# 提取店名或关键信息
|
|
179
|
+
if '. ' in line and line[0].isdigit():
|
|
180
|
+
item = line.split('. ', 1)[1] if '. ' in line else line
|
|
181
|
+
if '(' in item:
|
|
182
|
+
item = item.split('(')[0].strip()
|
|
183
|
+
key_items.append(item)
|
|
184
|
+
elif '- 地址:' in line:
|
|
185
|
+
address = line.replace('- 地址:', '').strip()
|
|
186
|
+
key_items.append(address)
|
|
187
|
+
|
|
188
|
+
# 检查每个关键项是否在生成的内容中
|
|
189
|
+
missing_items = []
|
|
190
|
+
found_items = []
|
|
191
|
+
|
|
192
|
+
for item in key_items:
|
|
193
|
+
if item in generated_content:
|
|
194
|
+
found_items.append(item)
|
|
195
|
+
else:
|
|
196
|
+
missing_items.append(item)
|
|
197
|
+
|
|
198
|
+
# 检测虚构内容的特征
|
|
199
|
+
fake_content_patterns = [
|
|
200
|
+
"转化率提升",
|
|
201
|
+
"品牌形象",
|
|
202
|
+
"¥9,999",
|
|
203
|
+
"¥29,999",
|
|
204
|
+
"¥59,999",
|
|
205
|
+
"起步套餐",
|
|
206
|
+
"专业套餐",
|
|
207
|
+
"旗舰套餐",
|
|
208
|
+
"Alex Chen",
|
|
209
|
+
"Liang Wu",
|
|
210
|
+
"Yvonne Zhao",
|
|
211
|
+
"设计质感",
|
|
212
|
+
"交付质量",
|
|
213
|
+
"Lorem ipsum",
|
|
214
|
+
"示例文本"
|
|
215
|
+
]
|
|
216
|
+
|
|
217
|
+
detected_fake = [pattern for pattern in fake_content_patterns
|
|
218
|
+
if pattern in generated_content]
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
"valid": len(missing_items) == 0 and len(detected_fake) == 0,
|
|
222
|
+
"found_items": found_items,
|
|
223
|
+
"missing_items": missing_items,
|
|
224
|
+
"detected_fake_content": detected_fake,
|
|
225
|
+
"coverage_rate": len(found_items) / len(key_items) if key_items else 0,
|
|
226
|
+
"has_fake_content": len(detected_fake) > 0
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
# 导出便捷函数
|
|
231
|
+
def enhance_prompt_for_real_data(prompt: str, context: str = None) -> str:
|
|
232
|
+
"""增强提示词以使用真实数据"""
|
|
233
|
+
enhancer = PromptEnhancer()
|
|
234
|
+
return enhancer.enhance_for_real_data(prompt, context)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def validate_data_usage(content: str, original: str) -> dict:
|
|
238
|
+
"""验证数据使用情况"""
|
|
239
|
+
enhancer = PromptEnhancer()
|
|
240
|
+
return enhancer.validate_content_usage(content, original)
|
htmlgen_mcp/web_agent_server.py
CHANGED
|
@@ -32,6 +32,8 @@ from pathlib import Path
|
|
|
32
32
|
|
|
33
33
|
from htmlgen_mcp.agents.smart_web_agent import SmartWebAgent
|
|
34
34
|
from htmlgen_mcp.nas_storage import get_nas_storage
|
|
35
|
+
from htmlgen_mcp.nas_log_manager import get_nas_log_manager, ensure_job_log, log_progress, query_progress
|
|
36
|
+
from htmlgen_mcp.prompt_enhancer import enhance_prompt_for_real_data
|
|
35
37
|
from datetime import datetime
|
|
36
38
|
|
|
37
39
|
# 使用 NAS 作为默认存储路径
|
|
@@ -87,6 +89,8 @@ def _persist_job_state(job_id: str) -> None:
|
|
|
87
89
|
return
|
|
88
90
|
job_copy = {k: v for k, v in job.items() if k not in {"agent"}}
|
|
89
91
|
job_copy["updated_at"] = time.time()
|
|
92
|
+
|
|
93
|
+
# 同时保存到本地和 NAS
|
|
90
94
|
path = _job_state_path(job_id)
|
|
91
95
|
try:
|
|
92
96
|
path.write_text(
|
|
@@ -94,6 +98,15 @@ def _persist_job_state(job_id: str) -> None:
|
|
|
94
98
|
)
|
|
95
99
|
except Exception:
|
|
96
100
|
pass
|
|
101
|
+
|
|
102
|
+
# 保存到 NAS 日志
|
|
103
|
+
try:
|
|
104
|
+
log_manager = get_nas_log_manager()
|
|
105
|
+
plan_id = job.get("plan_id")
|
|
106
|
+
log_manager.create_job_log(job_id, plan_id)
|
|
107
|
+
log_progress(job_id, status="registered", job_info=job_copy)
|
|
108
|
+
except Exception:
|
|
109
|
+
pass
|
|
97
110
|
|
|
98
111
|
|
|
99
112
|
def _load_job_states() -> None:
|
|
@@ -257,12 +270,15 @@ def _execute_plan(
|
|
|
257
270
|
plan: Dict[str, Any],
|
|
258
271
|
*,
|
|
259
272
|
progress_log_path: Optional[str] = None,
|
|
273
|
+
job_id: Optional[str] = None,
|
|
260
274
|
) -> Dict[str, Any]:
|
|
261
275
|
progress_events: list[Dict[str, Any]] = []
|
|
262
276
|
|
|
263
277
|
def _collect(event: Dict[str, Any]) -> None:
|
|
264
278
|
if isinstance(event, dict):
|
|
265
279
|
progress_events.append(event)
|
|
280
|
+
|
|
281
|
+
# 写入本地日志
|
|
266
282
|
if progress_log_path:
|
|
267
283
|
try:
|
|
268
284
|
log_record = dict(event)
|
|
@@ -272,6 +288,13 @@ def _execute_plan(
|
|
|
272
288
|
log_file.write("\n")
|
|
273
289
|
except Exception:
|
|
274
290
|
pass
|
|
291
|
+
|
|
292
|
+
# 同时写入 NAS 日志
|
|
293
|
+
if job_id:
|
|
294
|
+
try:
|
|
295
|
+
log_progress(job_id, **event)
|
|
296
|
+
except Exception:
|
|
297
|
+
pass
|
|
275
298
|
|
|
276
299
|
results = agent._execute_plan_with_recovery(
|
|
277
300
|
plan,
|
|
@@ -754,42 +777,45 @@ async def create_simple_site(
|
|
|
754
777
|
force_single_page=True,
|
|
755
778
|
)
|
|
756
779
|
|
|
757
|
-
#
|
|
758
|
-
|
|
780
|
+
# 如果有上下文内容,将其整合到描述中
|
|
781
|
+
enhanced_description = description
|
|
759
782
|
if context_data:
|
|
760
|
-
|
|
783
|
+
enhanced_description = f"""{description}
|
|
761
784
|
|
|
762
|
-
|
|
785
|
+
【必须使用的具体数据内容】:
|
|
763
786
|
{context_data}
|
|
764
787
|
|
|
765
|
-
|
|
788
|
+
【重要提示】:上述数据是真实的业务数据,必须完整准确地展示在网页中,不要生成虚构的示例内容。"""
|
|
766
789
|
|
|
767
|
-
|
|
790
|
+
# 构建改进的提示词,强调使用真实数据
|
|
791
|
+
simple_prompt = f"""请为以下需求创建一个网站,并严格使用提供的真实数据:
|
|
768
792
|
|
|
769
|
-
|
|
770
|
-
|
|
793
|
+
**网站标题**: {site_title}
|
|
794
|
+
**具体需求和数据**:
|
|
795
|
+
{enhanced_description}
|
|
771
796
|
|
|
772
|
-
|
|
773
|
-
1.
|
|
774
|
-
2.
|
|
775
|
-
3.
|
|
776
|
-
4.
|
|
777
|
-
5.
|
|
778
|
-
6. 智能图片集成,根据内容类型匹配合适主题
|
|
779
|
-
7. 如果有上下文内容,请充分利用这些信息来丰富网页内容
|
|
797
|
+
**执行要求**:
|
|
798
|
+
1. 【数据要求】如果提供了具体数据(如店铺列表、产品信息等),必须100%使用这些真实数据,不要创建虚构内容
|
|
799
|
+
2. 【内容展示】将所有提供的数据项完整展示,使用合适的布局(如卡片、列表、表格等)
|
|
800
|
+
3. 【样式设计】保持简洁美观,使用响应式设计
|
|
801
|
+
4. 【代码限制】CSS不超过300行,避免复杂特效
|
|
802
|
+
5. 【功能实现】包含基础交互功能(导航、滚动等)
|
|
780
803
|
|
|
781
|
-
|
|
782
|
-
|
|
804
|
+
**特别强调**:
|
|
805
|
+
- 当创建HTML内容时,必须使用上面提供的真实数据
|
|
806
|
+
- 不要生成"示例客户评价"、"虚拟定价方案"等占位内容
|
|
807
|
+
- 如果是咖啡馆列表,就展示真实的咖啡馆名称和地址
|
|
808
|
+
- 如果是产品信息,就展示真实的产品数据
|
|
809
|
+
- 每个create_html_file或add_content_section工具调用时,都要包含真实数据
|
|
783
810
|
|
|
784
|
-
|
|
785
|
-
- 每个步骤都要具体可执行
|
|
786
|
-
- 优先使用简单模板函数而非复杂模板
|
|
787
|
-
- 注重实用性和美观性的平衡
|
|
788
|
-
- 充分利用提供的上下文信息来生成个性化内容
|
|
811
|
+
请生成3-6个步骤的执行计划,确保每个步骤都能正确使用提供的数据。
|
|
789
812
|
"""
|
|
790
813
|
|
|
791
814
|
# 生成简化计划(仅规划,不执行)
|
|
792
|
-
|
|
815
|
+
# 传递强化后的提示词,确保AI使用真实数据
|
|
816
|
+
# 使用提示词增强器进一步强化
|
|
817
|
+
final_prompt = enhance_prompt_for_real_data(simple_prompt, context_data)
|
|
818
|
+
plan = agent._get_execution_plan(final_prompt)
|
|
793
819
|
|
|
794
820
|
# 在计划中标记为简单网站类型和相关信息
|
|
795
821
|
plan["site_type"] = "simple"
|
|
@@ -804,9 +830,8 @@ async def create_simple_site(
|
|
|
804
830
|
plan_id = str(uuid.uuid4())
|
|
805
831
|
|
|
806
832
|
# 构建完整的源描述(包含上下文)
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
source_description = f"{description}\n\n【附加内容】\n{context_data}"
|
|
833
|
+
# 使用enhanced_description以确保数据被传递
|
|
834
|
+
source_description = enhanced_description
|
|
810
835
|
|
|
811
836
|
# 在计划中添加源描述字段
|
|
812
837
|
plan["__source_description"] = source_description
|