myagent-ai 1.15.91 → 1.15.92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/search_skill.py +11 -11
package/package.json
CHANGED
package/skills/search_skill.py
CHANGED
|
@@ -331,18 +331,18 @@ class WebReadSkill(Skill):
|
|
|
331
331
|
tag.decompose()
|
|
332
332
|
# 移除所有 class 含特定关键词的元素
|
|
333
333
|
# 注意: decompose() 会连带销毁子元素,子元素的 attrs 变为 None,
|
|
334
|
-
#
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
334
|
+
# 必须同时检查 attrs 和 parent 来跳过已被销毁的标签
|
|
335
|
+
try:
|
|
336
|
+
for tag in list(soup.find_all(True, class_=True)):
|
|
337
|
+
if not getattr(tag, 'attrs', None):
|
|
338
|
+
continue
|
|
339
339
|
cls_str = " ".join(tag.get("class", []))
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
340
|
+
if any(kw in cls_str.lower() for kw in
|
|
341
|
+
["sidebar", "footer", "nav", "header", "advertisement",
|
|
342
|
+
"cookie", "popup", "modal", "banner", "social", "share"]):
|
|
343
|
+
tag.decompose()
|
|
344
|
+
except Exception:
|
|
345
|
+
pass # 解析异常时跳过清理,不影响主要内容提取
|
|
346
346
|
|
|
347
347
|
text = soup.get_text(separator="\n", strip=True)
|
|
348
348
|
text = "\n".join(line for line in text.split("\n") if line.strip())
|