myagent-ai 1.15.90 → 1.15.92
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/search_skill.py +13 -6
package/package.json
CHANGED
package/skills/search_skill.py
CHANGED
|
@@ -330,12 +330,19 @@ class WebReadSkill(Skill):
|
|
|
330
330
|
for tag in soup(["script", "style", "nav", "footer", "header", "aside"]):
|
|
331
331
|
tag.decompose()
|
|
332
332
|
# 移除所有 class 含特定关键词的元素
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
333
|
+
# 注意: decompose() 会连带销毁子元素,子元素的 attrs 变为 None,
|
|
334
|
+
# 必须同时检查 attrs 和 parent 来跳过已被销毁的标签
|
|
335
|
+
try:
|
|
336
|
+
for tag in list(soup.find_all(True, class_=True)):
|
|
337
|
+
if not getattr(tag, 'attrs', None):
|
|
338
|
+
continue
|
|
339
|
+
cls_str = " ".join(tag.get("class", []))
|
|
340
|
+
if any(kw in cls_str.lower() for kw in
|
|
341
|
+
["sidebar", "footer", "nav", "header", "advertisement",
|
|
342
|
+
"cookie", "popup", "modal", "banner", "social", "share"]):
|
|
343
|
+
tag.decompose()
|
|
344
|
+
except Exception:
|
|
345
|
+
pass # 解析异常时跳过清理,不影响主要内容提取
|
|
339
346
|
|
|
340
347
|
text = soup.get_text(separator="\n", strip=True)
|
|
341
348
|
text = "\n".join(line for line in text.split("\n") if line.strip())
|