npm - union_kb_ingest - Versions diffs - 1.0.7 → 1.0.9 - Mend

union_kb_ingest 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/config/config.yaml CHANGED Viewed

@@ -1,9 +1,9 @@
 llm:
-  enabled: true
+  enabled: false
   timeout_seconds: 120
   max_tokens: 8192
   temperature: 0.1
-  api_key: "15f066c4509845038027ea5746524af5.w4CLSC6ODiKVC1wK"
+  api_key: ""
   model: "GLM-4.7-Flash"
   base_url: "https://open.bigmodel.cn/api/paas/v4/"

package/normalizer.py CHANGED Viewed

@@ -453,22 +453,76 @@ def _get_zhipu_client_class():
 def _extract_response_content(response) -> str:
     """从模型响应中提取正文内容。"""
-    if isinstance(response, dict):
-        choices = response.get("choices") or []
-        if not choices:
-            return ""
-        message = choices[0].get("message") if isinstance(choices[0], dict) else None
-        return str((message or {}).get("content") or "")
-    choices = getattr(response, "choices", None) or []
-    if not choices:
+    message = _first_message(response)
+    if message is None:
         return ""
-    message = getattr(choices[0], "message", None)
-    if message is None and isinstance(choices[0], dict):
-        message = choices[0].get("message")
     if isinstance(message, dict):
-        return str(message.get("content") or "")
-    return str(getattr(message, "content", "") or "")
+        content = _stringify_message_content(message.get("content"))
+        if content:
+            return content
+        content = _extract_tool_call_content(message.get("function_call"))
+        if content:
+            return content
+        content = _extract_tool_call_content(message.get("tool_calls"))
+        if content:
+            return content
+        return _stringify_message_content(message.get("reasoning_content"))
+    content = _stringify_message_content(getattr(message, "content", ""))
+    if content:
+        return content
+    content = _extract_tool_call_content(getattr(message, "function_call", None))
+    if content:
+        return content
+    content = _extract_tool_call_content(getattr(message, "tool_calls", None))
+    if content:
+        return content
+    return _stringify_message_content(getattr(message, "reasoning_content", ""))
+def _stringify_message_content(content) -> str:
+    """兼容不同 SDK 返回的纯文本、分段文本和结构化 content。"""
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts = [_stringify_message_content(part) for part in content]
+        return "\n".join(part for part in parts if part)
+    if isinstance(content, dict):
+        for key in ("text", "content", "output_text", "json", "arguments"):
+            value = content.get(key)
+            text = _stringify_message_content(value)
+            if text:
+                return text
+        try:
+            return json.dumps(content, ensure_ascii=False)
+        except TypeError:
+            return str(content)
+    for attr in ("text", "content", "output_text"):
+        value = getattr(content, attr, None)
+        text = _stringify_message_content(value)
+        if text:
+            return text
+    return str(content)
+def _extract_tool_call_content(tool_calls) -> str:
+    """从工具/函数调用参数里兜底提取 JSON 文本。"""
+    if not tool_calls:
+        return ""
+    calls = tool_calls if isinstance(tool_calls, list) else [tool_calls]
+    for call in calls:
+        function = call.get("function") if isinstance(call, dict) else getattr(call, "function", None)
+        if function is None:
+            function = call
+        arguments = function.get("arguments") if isinstance(function, dict) else getattr(function, "arguments", None)
+        text = _stringify_message_content(arguments)
+        if text:
+            return text
+    return ""
 def _extract_reasoning_content(response) -> str:
@@ -525,7 +579,19 @@ def _coerce_raw_items(parsed):
         if isinstance(items, list):
             return items
-        for key in ("knowledge_items", "records", "data", "result", "results"):
+        for key in (
+            "knowledge_items",
+            "records",
+            "data",
+            "payload",
+            "output",
+            "response",
+            "answer",
+            "content",
+            "message",
+            "result",
+            "results",
+        ):
             value = parsed.get(key)
             if isinstance(value, list):
                 print(f"llm parse notice: using non-standard list field '{key}' as items")
@@ -535,6 +601,13 @@ def _coerce_raw_items(parsed):
                 if isinstance(nested, list):
                     print(f"llm parse notice: using nested field '{key}' as items")
                     return nested
+            if isinstance(value, str) and value.strip():
+                nested = _extract_json_with_diagnostics(value)
+                if nested.value is not None:
+                    nested_items = _coerce_raw_items(nested.value)
+                    if isinstance(nested_items, list):
+                        print(f"llm parse notice: parsed JSON string field '{key}' as items")
+                        return nested_items
         if _looks_like_single_item(parsed):
             print("llm parse notice: wrapping single item object as items[0]")

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "union_kb_ingest",
-  "version": "1.0.7",
+  "version": "1.0.9",
   "description": "Offline knowledge-base ingest helper for PDF, Word, Markdown and TXT documents.",
   "bin": {
     "union_kb_ingest": "bin/union_kb_ingest"