union_kb_ingest 1.0.7 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,9 @@
1
1
  llm:
2
- enabled: true
2
+ enabled: false
3
3
  timeout_seconds: 120
4
4
  max_tokens: 8192
5
5
  temperature: 0.1
6
- api_key: "15f066c4509845038027ea5746524af5.w4CLSC6ODiKVC1wK"
6
+ api_key: ""
7
7
  model: "GLM-4.7-Flash"
8
8
  base_url: "https://open.bigmodel.cn/api/paas/v4/"
9
9
 
package/normalizer.py CHANGED
@@ -453,22 +453,76 @@ def _get_zhipu_client_class():
453
453
 
454
454
  def _extract_response_content(response) -> str:
455
455
  """从模型响应中提取正文内容。"""
456
- if isinstance(response, dict):
457
- choices = response.get("choices") or []
458
- if not choices:
459
- return ""
460
- message = choices[0].get("message") if isinstance(choices[0], dict) else None
461
- return str((message or {}).get("content") or "")
462
-
463
- choices = getattr(response, "choices", None) or []
464
- if not choices:
456
+ message = _first_message(response)
457
+ if message is None:
465
458
  return ""
466
- message = getattr(choices[0], "message", None)
467
- if message is None and isinstance(choices[0], dict):
468
- message = choices[0].get("message")
459
+
469
460
  if isinstance(message, dict):
470
- return str(message.get("content") or "")
471
- return str(getattr(message, "content", "") or "")
461
+ content = _stringify_message_content(message.get("content"))
462
+ if content:
463
+ return content
464
+ content = _extract_tool_call_content(message.get("function_call"))
465
+ if content:
466
+ return content
467
+ content = _extract_tool_call_content(message.get("tool_calls"))
468
+ if content:
469
+ return content
470
+ return _stringify_message_content(message.get("reasoning_content"))
471
+
472
+ content = _stringify_message_content(getattr(message, "content", ""))
473
+ if content:
474
+ return content
475
+ content = _extract_tool_call_content(getattr(message, "function_call", None))
476
+ if content:
477
+ return content
478
+ content = _extract_tool_call_content(getattr(message, "tool_calls", None))
479
+ if content:
480
+ return content
481
+ return _stringify_message_content(getattr(message, "reasoning_content", ""))
482
+
483
+
484
+ def _stringify_message_content(content) -> str:
485
+ """兼容不同 SDK 返回的纯文本、分段文本和结构化 content。"""
486
+ if content is None:
487
+ return ""
488
+ if isinstance(content, str):
489
+ return content
490
+ if isinstance(content, list):
491
+ parts = [_stringify_message_content(part) for part in content]
492
+ return "\n".join(part for part in parts if part)
493
+ if isinstance(content, dict):
494
+ for key in ("text", "content", "output_text", "json", "arguments"):
495
+ value = content.get(key)
496
+ text = _stringify_message_content(value)
497
+ if text:
498
+ return text
499
+ try:
500
+ return json.dumps(content, ensure_ascii=False)
501
+ except TypeError:
502
+ return str(content)
503
+
504
+ for attr in ("text", "content", "output_text"):
505
+ value = getattr(content, attr, None)
506
+ text = _stringify_message_content(value)
507
+ if text:
508
+ return text
509
+ return str(content)
510
+
511
+
512
+ def _extract_tool_call_content(tool_calls) -> str:
513
+ """从工具/函数调用参数里兜底提取 JSON 文本。"""
514
+ if not tool_calls:
515
+ return ""
516
+ calls = tool_calls if isinstance(tool_calls, list) else [tool_calls]
517
+ for call in calls:
518
+ function = call.get("function") if isinstance(call, dict) else getattr(call, "function", None)
519
+ if function is None:
520
+ function = call
521
+ arguments = function.get("arguments") if isinstance(function, dict) else getattr(function, "arguments", None)
522
+ text = _stringify_message_content(arguments)
523
+ if text:
524
+ return text
525
+ return ""
472
526
 
473
527
 
474
528
  def _extract_reasoning_content(response) -> str:
@@ -525,7 +579,19 @@ def _coerce_raw_items(parsed):
525
579
  if isinstance(items, list):
526
580
  return items
527
581
 
528
- for key in ("knowledge_items", "records", "data", "result", "results"):
582
+ for key in (
583
+ "knowledge_items",
584
+ "records",
585
+ "data",
586
+ "payload",
587
+ "output",
588
+ "response",
589
+ "answer",
590
+ "content",
591
+ "message",
592
+ "result",
593
+ "results",
594
+ ):
529
595
  value = parsed.get(key)
530
596
  if isinstance(value, list):
531
597
  print(f"llm parse notice: using non-standard list field '{key}' as items")
@@ -535,6 +601,13 @@ def _coerce_raw_items(parsed):
535
601
  if isinstance(nested, list):
536
602
  print(f"llm parse notice: using nested field '{key}' as items")
537
603
  return nested
604
+ if isinstance(value, str) and value.strip():
605
+ nested = _extract_json_with_diagnostics(value)
606
+ if nested.value is not None:
607
+ nested_items = _coerce_raw_items(nested.value)
608
+ if isinstance(nested_items, list):
609
+ print(f"llm parse notice: parsed JSON string field '{key}' as items")
610
+ return nested_items
538
611
 
539
612
  if _looks_like_single_item(parsed):
540
613
  print("llm parse notice: wrapping single item object as items[0]")
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "union_kb_ingest",
3
- "version": "1.0.7",
3
+ "version": "1.0.9",
4
4
  "description": "Offline knowledge-base ingest helper for PDF, Word, Markdown and TXT documents.",
5
5
  "bin": {
6
6
  "union_kb_ingest": "bin/union_kb_ingest"