union_kb_ingest 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
4. 可选调用大模型,把内容整理为项目知识库规范要求的 Markdown 文件。
|
|
11
11
|
5. 默认生成可直接交给知识库项目使用的 `status: active` Markdown 文件。
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
启用大模型时,工具只会读取 `prompts/知识库建立规范.md` 作为格式和质量约束,并由代码按当前片段、辅助上下文和输出 JSON 结构组装生成提示词。模型需依据原文语义判断业务场景、模块、角色、标签和风险等级;代码中的启发式生成只作为未启用大模型时的兜底,不使用预设业务关键词去指导大模型输出。
|
|
14
14
|
|
|
15
15
|
## 安装可选依赖
|
|
16
16
|
|
package/normalizer.py
CHANGED
|
@@ -19,6 +19,7 @@ CURRENT_DIR = Path(__file__).resolve().parent
|
|
|
19
19
|
KB_SPEC_PATH = CURRENT_DIR / "prompts" / "知识库建立规范.md"
|
|
20
20
|
TOOLS_PATH = CURRENT_DIR / "input" / "function" / "tools.json"
|
|
21
21
|
LLM_MAX_RETRIES = 10
|
|
22
|
+
COVERAGE_MAX_RETRIES = 3
|
|
22
23
|
|
|
23
24
|
|
|
24
25
|
@dataclass(frozen=True)
|
|
@@ -59,6 +60,7 @@ def _normalize_with_llm(block: ParsedBlock, status: str) -> List[KnowledgeItem]:
|
|
|
59
60
|
client = client_cls(api_key=config.api_key, base_url=config.base_url)
|
|
60
61
|
|
|
61
62
|
compact_retry = False
|
|
63
|
+
coverage_retry_count = 0
|
|
62
64
|
coverage_retry_feedback = ""
|
|
63
65
|
json_retry_feedback = ""
|
|
64
66
|
for attempt in range(1, LLM_MAX_RETRIES + 1):
|
|
@@ -150,18 +152,29 @@ def _normalize_with_llm(block: ParsedBlock, status: str) -> List[KnowledgeItem]:
|
|
|
150
152
|
if items:
|
|
151
153
|
coverage_issues = _source_fact_coverage_issues(block, items)
|
|
152
154
|
if coverage_issues:
|
|
155
|
+
high_relevance_issues = _high_relevance_coverage_issues(
|
|
156
|
+
client, config, block, items, coverage_issues
|
|
157
|
+
)
|
|
158
|
+
if not high_relevance_issues:
|
|
159
|
+
print(
|
|
160
|
+
"llm coverage warning ignored: "
|
|
161
|
+
"no highly relevant missing facts after relevance review"
|
|
162
|
+
)
|
|
163
|
+
return items
|
|
153
164
|
print(
|
|
154
165
|
"llm coverage failed: "
|
|
155
166
|
f"missing_facts={len(coverage_issues)} "
|
|
156
|
-
f"
|
|
167
|
+
f"high_relevance_missing_facts={len(high_relevance_issues)} "
|
|
168
|
+
f"preview={_preview(';'.join(high_relevance_issues[:3]))}"
|
|
157
169
|
)
|
|
158
|
-
if attempt >= LLM_MAX_RETRIES:
|
|
170
|
+
if coverage_retry_count >= COVERAGE_MAX_RETRIES or attempt >= LLM_MAX_RETRIES:
|
|
159
171
|
print(
|
|
160
172
|
"WARNING: source fact coverage failed after "
|
|
161
|
-
f"{
|
|
173
|
+
f"{coverage_retry_count} coverage retries; releasing draft for manual review"
|
|
162
174
|
)
|
|
163
|
-
return _items_with_coverage_warning(items, block,
|
|
164
|
-
|
|
175
|
+
return _items_with_coverage_warning(items, block, high_relevance_issues)
|
|
176
|
+
coverage_retry_count += 1
|
|
177
|
+
coverage_retry_feedback = _coverage_retry_prompt(block, high_relevance_issues, items)
|
|
165
178
|
time.sleep(min(2 ** (attempt - 1), 30))
|
|
166
179
|
continue
|
|
167
180
|
return items
|
|
@@ -199,6 +212,167 @@ def _compact_retry_prompt(base_prompt: str) -> str:
|
|
|
199
212
|
)
|
|
200
213
|
|
|
201
214
|
|
|
215
|
+
def _high_relevance_coverage_issues(
|
|
216
|
+
client,
|
|
217
|
+
config,
|
|
218
|
+
block: ParsedBlock,
|
|
219
|
+
items: List[KnowledgeItem],
|
|
220
|
+
missing_facts: List[str],
|
|
221
|
+
) -> List[str]:
|
|
222
|
+
"""让 LLM 判断缺失事实是否与当前条目高度相关。"""
|
|
223
|
+
if not missing_facts:
|
|
224
|
+
return []
|
|
225
|
+
prompt = _coverage_relevance_prompt(block, items, missing_facts)
|
|
226
|
+
try:
|
|
227
|
+
response = _create_zhipu_completion(client, config, prompt)
|
|
228
|
+
content = _extract_response_content(response)
|
|
229
|
+
parsed = _extract_json_with_diagnostics(content).value
|
|
230
|
+
high_relevance = _high_relevance_facts_from_analysis(parsed, missing_facts)
|
|
231
|
+
if high_relevance is not None:
|
|
232
|
+
print(
|
|
233
|
+
"llm coverage relevance: "
|
|
234
|
+
f"missing_facts={len(missing_facts)} high_relevance={len(high_relevance)}"
|
|
235
|
+
)
|
|
236
|
+
return high_relevance
|
|
237
|
+
except Exception as exc:
|
|
238
|
+
print(f"llm coverage relevance failed: {type(exc).__name__} detail={exc}")
|
|
239
|
+
|
|
240
|
+
fallback = _fallback_high_relevance_coverage_issues(block, items, missing_facts)
|
|
241
|
+
print(
|
|
242
|
+
"llm coverage relevance fallback: "
|
|
243
|
+
f"missing_facts={len(missing_facts)} high_relevance={len(fallback)}"
|
|
244
|
+
)
|
|
245
|
+
return fallback
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _coverage_relevance_prompt(
|
|
249
|
+
block: ParsedBlock,
|
|
250
|
+
items: List[KnowledgeItem],
|
|
251
|
+
missing_facts: List[str],
|
|
252
|
+
) -> str:
|
|
253
|
+
"""构造缺失事实相关性判定提示。"""
|
|
254
|
+
fact_lines = "\n".join(f"- {fact}" for fact in missing_facts[:20])
|
|
255
|
+
current_items = "\n\n".join(
|
|
256
|
+
f"标题:{item.title}\n核心正文:{_core_sections_for_coverage(item.body)}"
|
|
257
|
+
for item in items
|
|
258
|
+
)
|
|
259
|
+
return f"""
|
|
260
|
+
请判断以下“覆盖校验缺失事实”是否与当前知识条目的主题极高相关。
|
|
261
|
+
|
|
262
|
+
判定规则:
|
|
263
|
+
1. 只有缺失事实是回答当前条目标题或核心正文所必须保留的定义、规则、阈值、条件、主体、简称、例外或限制时,才标记为“极高”。
|
|
264
|
+
2. 来源文件标题、章节标题、目录项、上级主题名称、页眉页脚、纯标签、仅用于定位的小标题,通常不是“极高”,除非它本身就是当前条目要解释的完整定义或规则。
|
|
265
|
+
3. 辅助上下文只用于理解位置和主题,不要把辅助上下文中独有的信息作为缺失事实依据。
|
|
266
|
+
4. 只能返回 JSON object,不要 Markdown 或解释文字。
|
|
267
|
+
|
|
268
|
+
返回格式:
|
|
269
|
+
{{
|
|
270
|
+
"facts": [
|
|
271
|
+
{{"fact": "必须原样复制待判断事实", "relevance": "极高|一般|低", "reason": "一句话原因"}}
|
|
272
|
+
]
|
|
273
|
+
}}
|
|
274
|
+
|
|
275
|
+
来源文档:{block.source_doc}
|
|
276
|
+
来源章节:{block.source_section or "全文"}
|
|
277
|
+
|
|
278
|
+
当前来源原文片段:
|
|
279
|
+
{_preview(block.content)[:4000] or "无"}
|
|
280
|
+
|
|
281
|
+
辅助上下文:
|
|
282
|
+
{_preview(block.context)[:2000] or "无"}
|
|
283
|
+
|
|
284
|
+
当前已生成条目:
|
|
285
|
+
{_preview(current_items)[:4000] or "无"}
|
|
286
|
+
|
|
287
|
+
待判断事实:
|
|
288
|
+
{fact_lines}
|
|
289
|
+
""".strip()
|
|
290
|
+
|
|
291
|
+
|
|
292
|
+
def _high_relevance_facts_from_analysis(parsed, missing_facts: List[str]):
|
|
293
|
+
"""从相关性判定 JSON 中提取极高相关事实。"""
|
|
294
|
+
if not isinstance(parsed, dict):
|
|
295
|
+
return None
|
|
296
|
+
raw_facts = parsed.get("facts")
|
|
297
|
+
if raw_facts is None and isinstance(parsed.get("results"), list):
|
|
298
|
+
raw_facts = parsed.get("results")
|
|
299
|
+
if raw_facts is None and isinstance(parsed.get("items"), list):
|
|
300
|
+
raw_facts = parsed.get("items")
|
|
301
|
+
if not isinstance(raw_facts, list):
|
|
302
|
+
return None
|
|
303
|
+
|
|
304
|
+
missing_by_norm = {_coverage_text(fact): fact for fact in missing_facts}
|
|
305
|
+
selected: List[str] = []
|
|
306
|
+
for raw in raw_facts:
|
|
307
|
+
if not isinstance(raw, dict):
|
|
308
|
+
continue
|
|
309
|
+
relevance = str(raw.get("relevance") or raw.get("关联度") or "").strip().lower()
|
|
310
|
+
if not ("极高" in relevance or "high" in relevance):
|
|
311
|
+
continue
|
|
312
|
+
fact = str(raw.get("fact") or raw.get("事实") or raw.get("text") or "").strip()
|
|
313
|
+
matched = _match_missing_fact(fact, missing_by_norm)
|
|
314
|
+
if matched and matched not in selected:
|
|
315
|
+
selected.append(matched)
|
|
316
|
+
return selected
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
def _match_missing_fact(fact: str, missing_by_norm: Dict[str, str]) -> str:
|
|
320
|
+
"""把模型返回事实匹配回原始缺失事实。"""
|
|
321
|
+
fact_norm = _coverage_text(fact)
|
|
322
|
+
if not fact_norm:
|
|
323
|
+
return ""
|
|
324
|
+
if fact_norm in missing_by_norm:
|
|
325
|
+
return missing_by_norm[fact_norm]
|
|
326
|
+
for missing_norm, missing in missing_by_norm.items():
|
|
327
|
+
if fact_norm in missing_norm or missing_norm in fact_norm:
|
|
328
|
+
return missing
|
|
329
|
+
return ""
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def _fallback_high_relevance_coverage_issues(
|
|
333
|
+
block: ParsedBlock,
|
|
334
|
+
items: List[KnowledgeItem],
|
|
335
|
+
missing_facts: List[str],
|
|
336
|
+
) -> List[str]:
|
|
337
|
+
"""相关性判定失败时的保守兜底,过滤明显结构性标题。"""
|
|
338
|
+
return [
|
|
339
|
+
fact for fact in missing_facts
|
|
340
|
+
if not _looks_like_structural_missing_fact(block, items, fact)
|
|
341
|
+
]
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def _looks_like_structural_missing_fact(
|
|
345
|
+
block: ParsedBlock,
|
|
346
|
+
items: List[KnowledgeItem],
|
|
347
|
+
fact: str,
|
|
348
|
+
) -> bool:
|
|
349
|
+
"""判断缺失事实是否只是标题、章节或定位信息。"""
|
|
350
|
+
fact_norm = _coverage_text(fact)
|
|
351
|
+
if not fact_norm:
|
|
352
|
+
return True
|
|
353
|
+
candidates = [
|
|
354
|
+
block.source_doc,
|
|
355
|
+
Path(block.source_doc).stem,
|
|
356
|
+
block.source_section,
|
|
357
|
+
block.category,
|
|
358
|
+
block.subcategory,
|
|
359
|
+
block.source_doc_description,
|
|
360
|
+
block.subcategory_description,
|
|
361
|
+
*block.category_path,
|
|
362
|
+
*block.related_categories,
|
|
363
|
+
*(item.title for item in items),
|
|
364
|
+
]
|
|
365
|
+
candidate_norms = {_coverage_text(value) for value in candidates if value}
|
|
366
|
+
if fact_norm in candidate_norms:
|
|
367
|
+
return True
|
|
368
|
+
if len(fact_norm) <= 30 and not re.search(
|
|
369
|
+
r"是|为|指|称|简称|英文|应|需|必须|不得|禁止|超过|低于|大于|小于|不少于|不超过|\d",
|
|
370
|
+
fact,
|
|
371
|
+
):
|
|
372
|
+
return True
|
|
373
|
+
return False
|
|
374
|
+
|
|
375
|
+
|
|
202
376
|
def _coverage_retry_prompt(
|
|
203
377
|
block: ParsedBlock,
|
|
204
378
|
missing_facts: List[str],
|
|
@@ -279,22 +453,76 @@ def _get_zhipu_client_class():
|
|
|
279
453
|
|
|
280
454
|
def _extract_response_content(response) -> str:
|
|
281
455
|
"""从模型响应中提取正文内容。"""
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
if not choices:
|
|
285
|
-
return ""
|
|
286
|
-
message = choices[0].get("message") if isinstance(choices[0], dict) else None
|
|
287
|
-
return str((message or {}).get("content") or "")
|
|
288
|
-
|
|
289
|
-
choices = getattr(response, "choices", None) or []
|
|
290
|
-
if not choices:
|
|
456
|
+
message = _first_message(response)
|
|
457
|
+
if message is None:
|
|
291
458
|
return ""
|
|
292
|
-
|
|
293
|
-
if message is None and isinstance(choices[0], dict):
|
|
294
|
-
message = choices[0].get("message")
|
|
459
|
+
|
|
295
460
|
if isinstance(message, dict):
|
|
296
|
-
|
|
297
|
-
|
|
461
|
+
content = _stringify_message_content(message.get("content"))
|
|
462
|
+
if content:
|
|
463
|
+
return content
|
|
464
|
+
content = _extract_tool_call_content(message.get("function_call"))
|
|
465
|
+
if content:
|
|
466
|
+
return content
|
|
467
|
+
content = _extract_tool_call_content(message.get("tool_calls"))
|
|
468
|
+
if content:
|
|
469
|
+
return content
|
|
470
|
+
return _stringify_message_content(message.get("reasoning_content"))
|
|
471
|
+
|
|
472
|
+
content = _stringify_message_content(getattr(message, "content", ""))
|
|
473
|
+
if content:
|
|
474
|
+
return content
|
|
475
|
+
content = _extract_tool_call_content(getattr(message, "function_call", None))
|
|
476
|
+
if content:
|
|
477
|
+
return content
|
|
478
|
+
content = _extract_tool_call_content(getattr(message, "tool_calls", None))
|
|
479
|
+
if content:
|
|
480
|
+
return content
|
|
481
|
+
return _stringify_message_content(getattr(message, "reasoning_content", ""))
|
|
482
|
+
|
|
483
|
+
|
|
484
|
+
def _stringify_message_content(content) -> str:
|
|
485
|
+
"""兼容不同 SDK 返回的纯文本、分段文本和结构化 content。"""
|
|
486
|
+
if content is None:
|
|
487
|
+
return ""
|
|
488
|
+
if isinstance(content, str):
|
|
489
|
+
return content
|
|
490
|
+
if isinstance(content, list):
|
|
491
|
+
parts = [_stringify_message_content(part) for part in content]
|
|
492
|
+
return "\n".join(part for part in parts if part)
|
|
493
|
+
if isinstance(content, dict):
|
|
494
|
+
for key in ("text", "content", "output_text", "json", "arguments"):
|
|
495
|
+
value = content.get(key)
|
|
496
|
+
text = _stringify_message_content(value)
|
|
497
|
+
if text:
|
|
498
|
+
return text
|
|
499
|
+
try:
|
|
500
|
+
return json.dumps(content, ensure_ascii=False)
|
|
501
|
+
except TypeError:
|
|
502
|
+
return str(content)
|
|
503
|
+
|
|
504
|
+
for attr in ("text", "content", "output_text"):
|
|
505
|
+
value = getattr(content, attr, None)
|
|
506
|
+
text = _stringify_message_content(value)
|
|
507
|
+
if text:
|
|
508
|
+
return text
|
|
509
|
+
return str(content)
|
|
510
|
+
|
|
511
|
+
|
|
512
|
+
def _extract_tool_call_content(tool_calls) -> str:
|
|
513
|
+
"""从工具/函数调用参数里兜底提取 JSON 文本。"""
|
|
514
|
+
if not tool_calls:
|
|
515
|
+
return ""
|
|
516
|
+
calls = tool_calls if isinstance(tool_calls, list) else [tool_calls]
|
|
517
|
+
for call in calls:
|
|
518
|
+
function = call.get("function") if isinstance(call, dict) else getattr(call, "function", None)
|
|
519
|
+
if function is None:
|
|
520
|
+
function = call
|
|
521
|
+
arguments = function.get("arguments") if isinstance(function, dict) else getattr(function, "arguments", None)
|
|
522
|
+
text = _stringify_message_content(arguments)
|
|
523
|
+
if text:
|
|
524
|
+
return text
|
|
525
|
+
return ""
|
|
298
526
|
|
|
299
527
|
|
|
300
528
|
def _extract_reasoning_content(response) -> str:
|
|
@@ -351,7 +579,19 @@ def _coerce_raw_items(parsed):
|
|
|
351
579
|
if isinstance(items, list):
|
|
352
580
|
return items
|
|
353
581
|
|
|
354
|
-
for key in (
|
|
582
|
+
for key in (
|
|
583
|
+
"knowledge_items",
|
|
584
|
+
"records",
|
|
585
|
+
"data",
|
|
586
|
+
"payload",
|
|
587
|
+
"output",
|
|
588
|
+
"response",
|
|
589
|
+
"answer",
|
|
590
|
+
"content",
|
|
591
|
+
"message",
|
|
592
|
+
"result",
|
|
593
|
+
"results",
|
|
594
|
+
):
|
|
355
595
|
value = parsed.get(key)
|
|
356
596
|
if isinstance(value, list):
|
|
357
597
|
print(f"llm parse notice: using non-standard list field '{key}' as items")
|
|
@@ -361,6 +601,13 @@ def _coerce_raw_items(parsed):
|
|
|
361
601
|
if isinstance(nested, list):
|
|
362
602
|
print(f"llm parse notice: using nested field '{key}' as items")
|
|
363
603
|
return nested
|
|
604
|
+
if isinstance(value, str) and value.strip():
|
|
605
|
+
nested = _extract_json_with_diagnostics(value)
|
|
606
|
+
if nested.value is not None:
|
|
607
|
+
nested_items = _coerce_raw_items(nested.value)
|
|
608
|
+
if isinstance(nested_items, list):
|
|
609
|
+
print(f"llm parse notice: parsed JSON string field '{key}' as items")
|
|
610
|
+
return nested_items
|
|
364
611
|
|
|
365
612
|
if _looks_like_single_item(parsed):
|
|
366
613
|
print("llm parse notice: wrapping single item object as items[0]")
|
|
@@ -418,7 +665,7 @@ def _build_prompt(block: ParsedBlock, status: str) -> str:
|
|
|
418
665
|
要求:
|
|
419
666
|
1. 严格参照《知识库建立规范》的元数据字段、正文 5 节结构、内容切分原则和质量校验要求生成。
|
|
420
667
|
2. 只依据原文理解知识点、对象、模块、角色、标签和风险等级,不要依据示例或常见关键词进行套写。
|
|
421
|
-
3.
|
|
668
|
+
3. 如果一个片段包含多个独立定义、规则、流程、指标、接口或判定标准,请拆成多个 items。
|
|
422
669
|
4. 每个 item 必须可独立检索、独立回答,颗粒度控制在 800 到 1500 中文字符左右;复杂表格可适当放宽。
|
|
423
670
|
5. 不要编造来源、阈值、角色、日期、版本;原文没有的信息留空、空数组或使用规范允许的通用值。
|
|
424
671
|
6. 涉及表格、阈值、比较符、单位、持续时间、笔数、适用对象时必须保留原始逻辑。
|
|
@@ -447,7 +694,7 @@ doc_type 只能取:
|
|
|
447
694
|
"items": [
|
|
448
695
|
{{
|
|
449
696
|
"title": "",
|
|
450
|
-
"doc_type": "
|
|
697
|
+
"doc_type": "biz",
|
|
451
698
|
"category": "",
|
|
452
699
|
"subcategory": "",
|
|
453
700
|
"related_items": [],
|
package/package.json
CHANGED
|
@@ -14,18 +14,12 @@
|
|
|
14
14
|
|
|
15
15
|
## 2. 条目类型
|
|
16
16
|
|
|
17
|
-
`doc_type`
|
|
17
|
+
`doc_type` 只做粗粒度标识,用于区分普通知识条目和明确的可调用能力说明。无法明确归类时一律使用 `biz`,不要为了贴合某类业务场景创造或套用细分类型。
|
|
18
18
|
|
|
19
19
|
| 类型 | 说明 |
|
|
20
20
|
| --- | --- |
|
|
21
|
-
| `biz` |
|
|
22
|
-
| `
|
|
23
|
-
| `sop` | 操作步骤、处理流程、执行要求 |
|
|
24
|
-
| `metric` | 指标定义、统计口径、计算方式、阈值 |
|
|
25
|
-
| `severity` | 分级、定级、优先级或等级判断 |
|
|
26
|
-
| `change` | 变更、发布、评估、通知或回退要求 |
|
|
27
|
-
| `function` | 可调用能力、工具、接口或函数说明 |
|
|
28
|
-
| `evaluation` | 评价对象、评价周期、评价指标、评价结果 |
|
|
21
|
+
| `biz` | 普通知识条目,覆盖概念、对象、范围、规则、流程、指标、阈值、分级、要求等来源事实 |
|
|
22
|
+
| `function` | 来源正文明确描述可调用工具、接口、函数名称、入参或出参时使用 |
|
|
29
23
|
|
|
30
24
|
## 3. 单篇知识文档格式
|
|
31
25
|
|
|
@@ -101,7 +95,7 @@ status: "active"
|
|
|
101
95
|
| --- | --- |
|
|
102
96
|
| `low` | 解释、查询、制度说明、定义说明 |
|
|
103
97
|
| `medium` | 需要判断条件、组合信息或给出建议 |
|
|
104
|
-
| `high` |
|
|
98
|
+
| `high` | 涉及高影响动作、降级、暂停、回退等需谨慎确认的建议 |
|
|
105
99
|
| `critical` | 涉及不可逆动作、敏感操作或必须人工审批的内容 |
|
|
106
100
|
|
|
107
101
|
## 5. 正文内容规范
|
|
@@ -152,7 +146,7 @@ status: "active"
|
|
|
152
146
|
2. 一组紧密相关的规则或条件。
|
|
153
147
|
3. 一个流程或操作要求。
|
|
154
148
|
4. 一个指标口径或阈值表。
|
|
155
|
-
5.
|
|
149
|
+
5. 一组等级、优先级或判定标准。
|
|
156
150
|
6. 一个接口、函数或工具说明。
|
|
157
151
|
|
|
158
152
|
不要为了凑栏目把无关内容合并,也不要把整份制度原文作为一个向量文档直接入库。
|
package/schemas.py
CHANGED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
你是业务知识库整理助手。
|
|
2
|
-
|
|
3
|
-
请基于输入原文生成标准知识库条目,并严格参照 `prompts/知识库建立规范.md`。必须遵守:
|
|
4
|
-
|
|
5
|
-
1. 只依据原文,不编造阈值、角色、日期、版本。
|
|
6
|
-
2. 如果一个片段包含多个独立定义、规则、流程、指标、接口或评价标准,拆成多个 items。
|
|
7
|
-
3. 每个 item 需要可独立检索、独立回答。
|
|
8
|
-
4. 保留表格、阈值、比较符、单位、持续时间和适用对象。
|
|
9
|
-
5. 输出严格 JSON,不要 Markdown 代码围栏。
|
|
10
|
-
6. 不要依据预设业务关键词套写业务模块、角色、标签或风险等级,应根据原文语义判断;原文缺失时使用空数组或规范允许的默认值。
|
|
11
|
-
|
|
12
|
-
输出格式:
|
|
13
|
-
|
|
14
|
-
{
|
|
15
|
-
"items": [
|
|
16
|
-
{
|
|
17
|
-
"title": "",
|
|
18
|
-
"doc_type": "scenario",
|
|
19
|
-
"business_modules": [],
|
|
20
|
-
"source_version": "",
|
|
21
|
-
"risk_level": "low",
|
|
22
|
-
"applicable_roles": [],
|
|
23
|
-
"tags": [],
|
|
24
|
-
"body": "# 标题\n\n## 1. 核心内容\n\n...\n\n## 2. 适用边界\n\n...\n\n## 3. 使用要求\n\n...\n\n## 4. 关联能力\n\n...\n\n## 5. 来源依据\n\n..."
|
|
25
|
-
}
|
|
26
|
-
]
|
|
27
|
-
}
|