union_kb_ingest 1.0.7 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/config/config.yaml +2 -2
- package/normalizer.py +88 -15
- package/package.json +1 -1
package/config/config.yaml
CHANGED
package/normalizer.py
CHANGED
|
@@ -453,22 +453,76 @@ def _get_zhipu_client_class():
|
|
|
453
453
|
|
|
454
454
|
def _extract_response_content(response) -> str:
|
|
455
455
|
"""从模型响应中提取正文内容。"""
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
if not choices:
|
|
459
|
-
return ""
|
|
460
|
-
message = choices[0].get("message") if isinstance(choices[0], dict) else None
|
|
461
|
-
return str((message or {}).get("content") or "")
|
|
462
|
-
|
|
463
|
-
choices = getattr(response, "choices", None) or []
|
|
464
|
-
if not choices:
|
|
456
|
+
message = _first_message(response)
|
|
457
|
+
if message is None:
|
|
465
458
|
return ""
|
|
466
|
-
|
|
467
|
-
if message is None and isinstance(choices[0], dict):
|
|
468
|
-
message = choices[0].get("message")
|
|
459
|
+
|
|
469
460
|
if isinstance(message, dict):
|
|
470
|
-
|
|
471
|
-
|
|
461
|
+
content = _stringify_message_content(message.get("content"))
|
|
462
|
+
if content:
|
|
463
|
+
return content
|
|
464
|
+
content = _extract_tool_call_content(message.get("function_call"))
|
|
465
|
+
if content:
|
|
466
|
+
return content
|
|
467
|
+
content = _extract_tool_call_content(message.get("tool_calls"))
|
|
468
|
+
if content:
|
|
469
|
+
return content
|
|
470
|
+
return _stringify_message_content(message.get("reasoning_content"))
|
|
471
|
+
|
|
472
|
+
content = _stringify_message_content(getattr(message, "content", ""))
|
|
473
|
+
if content:
|
|
474
|
+
return content
|
|
475
|
+
content = _extract_tool_call_content(getattr(message, "function_call", None))
|
|
476
|
+
if content:
|
|
477
|
+
return content
|
|
478
|
+
content = _extract_tool_call_content(getattr(message, "tool_calls", None))
|
|
479
|
+
if content:
|
|
480
|
+
return content
|
|
481
|
+
return _stringify_message_content(getattr(message, "reasoning_content", ""))
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _stringify_message_content(content) -> str:
|
|
485
|
+
"""兼容不同 SDK 返回的纯文本、分段文本和结构化 content。"""
|
|
486
|
+
if content is None:
|
|
487
|
+
return ""
|
|
488
|
+
if isinstance(content, str):
|
|
489
|
+
return content
|
|
490
|
+
if isinstance(content, list):
|
|
491
|
+
parts = [_stringify_message_content(part) for part in content]
|
|
492
|
+
return "\n".join(part for part in parts if part)
|
|
493
|
+
if isinstance(content, dict):
|
|
494
|
+
for key in ("text", "content", "output_text", "json", "arguments"):
|
|
495
|
+
value = content.get(key)
|
|
496
|
+
text = _stringify_message_content(value)
|
|
497
|
+
if text:
|
|
498
|
+
return text
|
|
499
|
+
try:
|
|
500
|
+
return json.dumps(content, ensure_ascii=False)
|
|
501
|
+
except TypeError:
|
|
502
|
+
return str(content)
|
|
503
|
+
|
|
504
|
+
for attr in ("text", "content", "output_text"):
|
|
505
|
+
value = getattr(content, attr, None)
|
|
506
|
+
text = _stringify_message_content(value)
|
|
507
|
+
if text:
|
|
508
|
+
return text
|
|
509
|
+
return str(content)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _extract_tool_call_content(tool_calls) -> str:
|
|
513
|
+
"""从工具/函数调用参数里兜底提取 JSON 文本。"""
|
|
514
|
+
if not tool_calls:
|
|
515
|
+
return ""
|
|
516
|
+
calls = tool_calls if isinstance(tool_calls, list) else [tool_calls]
|
|
517
|
+
for call in calls:
|
|
518
|
+
function = call.get("function") if isinstance(call, dict) else getattr(call, "function", None)
|
|
519
|
+
if function is None:
|
|
520
|
+
function = call
|
|
521
|
+
arguments = function.get("arguments") if isinstance(function, dict) else getattr(function, "arguments", None)
|
|
522
|
+
text = _stringify_message_content(arguments)
|
|
523
|
+
if text:
|
|
524
|
+
return text
|
|
525
|
+
return ""
|
|
472
526
|
|
|
473
527
|
|
|
474
528
|
def _extract_reasoning_content(response) -> str:
|
|
@@ -525,7 +579,19 @@ def _coerce_raw_items(parsed):
|
|
|
525
579
|
if isinstance(items, list):
|
|
526
580
|
return items
|
|
527
581
|
|
|
528
|
-
for key in (
|
|
582
|
+
for key in (
|
|
583
|
+
"knowledge_items",
|
|
584
|
+
"records",
|
|
585
|
+
"data",
|
|
586
|
+
"payload",
|
|
587
|
+
"output",
|
|
588
|
+
"response",
|
|
589
|
+
"answer",
|
|
590
|
+
"content",
|
|
591
|
+
"message",
|
|
592
|
+
"result",
|
|
593
|
+
"results",
|
|
594
|
+
):
|
|
529
595
|
value = parsed.get(key)
|
|
530
596
|
if isinstance(value, list):
|
|
531
597
|
print(f"llm parse notice: using non-standard list field '{key}' as items")
|
|
@@ -535,6 +601,13 @@ def _coerce_raw_items(parsed):
|
|
|
535
601
|
if isinstance(nested, list):
|
|
536
602
|
print(f"llm parse notice: using nested field '{key}' as items")
|
|
537
603
|
return nested
|
|
604
|
+
if isinstance(value, str) and value.strip():
|
|
605
|
+
nested = _extract_json_with_diagnostics(value)
|
|
606
|
+
if nested.value is not None:
|
|
607
|
+
nested_items = _coerce_raw_items(nested.value)
|
|
608
|
+
if isinstance(nested_items, list):
|
|
609
|
+
print(f"llm parse notice: parsed JSON string field '{key}' as items")
|
|
610
|
+
return nested_items
|
|
538
611
|
|
|
539
612
|
if _looks_like_single_item(parsed):
|
|
540
613
|
print("llm parse notice: wrapping single item object as items[0]")
|