npm - union_kb_ingest - Versions diffs - 1.0.5 → 1.0.6 - Mend

union_kb_ingest 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/normalizer.py CHANGED Viewed

@@ -156,8 +156,12 @@ def _normalize_with_llm(block: ParsedBlock, status: str) -> List[KnowledgeItem]:
                     f"preview={_preview('；'.join(coverage_issues[:3]))}"
                 )
                 if attempt >= LLM_MAX_RETRIES:
-                    _abort_llm("source fact coverage failed after 10 attempts", block)
-                coverage_retry_feedback = _coverage_retry_prompt(coverage_issues)
+                    print(
+                        "WARNING: source fact coverage failed after "
+                        f"{LLM_MAX_RETRIES} attempts; releasing draft for manual review"
+                    )
+                    return _items_with_coverage_warning(items, block, coverage_issues)
+                coverage_retry_feedback = _coverage_retry_prompt(block, coverage_issues, items)
                 time.sleep(min(2 ** (attempt - 1), 30))
                 continue
             return items
@@ -195,14 +199,31 @@ def _compact_retry_prompt(base_prompt: str) -> str:
     )
-def _coverage_retry_prompt(missing_facts: List[str]) -> str:
+def _coverage_retry_prompt(
+    block: ParsedBlock,
+    missing_facts: List[str],
+    items: List[KnowledgeItem],
+) -> str:
     """构造事实覆盖不足时的重试提示。"""
     lines = "\n".join(f"- {fact}" for fact in missing_facts[:8])
+    source_excerpt = _preview(block.content)[:4000] or "无"
+    context_excerpt = _preview(block.context)[:2000] or "无"
+    current_core = _preview(
+        "\n\n".join(_core_sections_for_coverage(item.body) for item in items)
+    )[:3000] or "无"
     return (
         "重试补充要求：上一次输出遗漏了以下来源正文事实。"
         "请重新生成 JSON，并把这些事实写入 ## 1. 核心内容、## 2. 适用边界 或 ## 3. 使用要求。"
         "短定义句、简称句、阈值句和规则句应优先保留原句或等价完整表述，不要只概括关键词。\n"
-        f"{lines}"
+        f"{lines}\n\n"
+        f"来源文档：{block.source_doc}\n"
+        f"来源章节：{block.source_section or '全文'}\n\n"
+        "当前来源原文片段如下，请结合上下文判断缺失事实应补入哪个正文小节：\n"
+        f"{source_excerpt}\n\n"
+        "辅助上下文如下。辅助上下文只用于理解位置和术语，不要把其中独有事实写入正文：\n"
+        f"{context_excerpt}\n\n"
+        "上一轮生成的核心正文如下，请在此基础上补齐遗漏事实并保持 JSON 根结构不变：\n"
+        f"{current_core}"
     )
@@ -719,6 +740,42 @@ def _normalize_heuristically(block: ParsedBlock, status: str) -> KnowledgeItem:
     )
+def _items_with_coverage_warning(
+    items: List[KnowledgeItem],
+    block: ParsedBlock,
+    missing_facts: List[str],
+) -> List[KnowledgeItem]:
+    """把覆盖不足的 LLM 结果降级为需人工复核的草稿。"""
+    for item in items:
+        item.status = "draft"
+        item.review_status = "coverage_warning"
+        if "需人工复核" not in item.tags:
+            item.tags.append("需人工复核")
+        item.body = _body_with_coverage_warning(item.body, block, missing_facts)
+    return items
+def _body_with_coverage_warning(
+    body: str,
+    block: ParsedBlock,
+    missing_facts: List[str],
+) -> str:
+    """在正文核心章节标注覆盖不足的来源事实。"""
+    fact_lines = "\n".join(f"> - {fact}" for fact in missing_facts[:20])
+    note = (
+        "\n\n"
+        "> WARNING: LLM 多次重试后仍未通过来源事实覆盖校验，本条目已降为草稿，需人工复核。\n"
+        f"> 来源文档：{block.source_doc}\n"
+        f"> 来源章节：{block.source_section or '全文'}\n"
+        "> 待人工补齐或确认的来源事实：\n"
+        f"{fact_lines}"
+    )
+    pattern = r"(^##\s+1\.\s+核心内容\s*$)"
+    if re.search(pattern, body, flags=re.M):
+        return re.sub(pattern, r"\1" + note, body, count=1, flags=re.M)
+    return f"{body.rstrip()}\n\n## 人工复核提示{note}"
 def _source_fact_coverage_issues(block: ParsedBlock, items: List[KnowledgeItem]) -> List[str]:
     """检查来源事实是否进入核心正文。"""
     facts = _source_fact_sentences(block.content)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "union_kb_ingest",
-  "version": "1.0.5",
+  "version": "1.0.6",
   "description": "Offline knowledge-base ingest helper for PDF, Word, Markdown and TXT documents.",
   "bin": {
     "union_kb_ingest": "bin/union_kb_ingest"

package/schemas.py CHANGED Viewed

@@ -104,6 +104,7 @@ class KnowledgeItem:
             "applicable_roles": self.applicable_roles,
             "tags": self.tags,
             "status": self.status,
+            "review_status": self.review_status,
         }