@tikomni/skills 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. package/package.json +1 -1
  2. package/skills/creator-analysis/SKILL.md +34 -10
  3. package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
  4. package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
  5. package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
  6. package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
  7. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
  8. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
  9. package/skills/creator-analysis/references/workflow.md +8 -3
  10. package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
  11. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
  12. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
  13. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
  14. package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
  15. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
  16. package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
  17. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
  18. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
  19. package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
  20. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
  21. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
@@ -47,6 +47,9 @@ def resolve_default_card_root() -> str:
47
47
  # Keep import-time compatibility for other scripts without crashing when env is absent.
48
48
  DEFAULT_CARD_ROOT = ""
49
49
  CARD_TYPES = ["work", "author", "author_sample_work"]
50
+ AUTHOR_SAMPLE_CARD_ROLE = "author_sample_card"
51
+ SAMPLE_WORK_CARD_ROLE = "sample_work_card"
52
+ AUTHOR_CARD_ROLE = "author_card"
50
53
 
51
54
 
52
55
  def _normalize_lines(value: Any) -> List[str]:
@@ -92,6 +95,39 @@ def _safe_optional_int(value: Any) -> Optional[int]:
92
95
  return None
93
96
 
94
97
 
98
+ def _safe_text(value: Any) -> str:
99
+ if value is None:
100
+ return ""
101
+ if isinstance(value, str):
102
+ return value.strip()
103
+ if isinstance(value, (int, float, bool)):
104
+ return str(value).strip()
105
+ return ""
106
+
107
+
108
+ def _safe_text_list(value: Any) -> List[str]:
109
+ if isinstance(value, list):
110
+ result: List[str] = []
111
+ for item in value:
112
+ if isinstance(item, dict):
113
+ for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
114
+ text = _safe_text(item.get(key))
115
+ if text:
116
+ result.append(text)
117
+ break
118
+ continue
119
+ text = _safe_text(item)
120
+ if text:
121
+ result.append(text)
122
+ return list(dict.fromkeys(result))
123
+ if isinstance(value, str):
124
+ text = _safe_text(value)
125
+ if not text:
126
+ return []
127
+ return [item for item in re.split(r"[,,\s]+", text) if item]
128
+ return []
129
+
130
+
95
131
  def _to_unix_sec(value: Any) -> int:
96
132
  if value is None:
97
133
  return 0
@@ -417,7 +453,7 @@ def _extract_tags(payload: Dict[str, Any]) -> List[str]:
417
453
  for key in ("tags", "tag_list", "hashtags"):
418
454
  value = payload.get(key)
419
455
  if isinstance(value, list):
420
- tags = [normalize_text(item).lstrip("#") for item in value if normalize_text(item)]
456
+ tags = [item.lstrip("#") for item in _safe_text_list(value)]
421
457
  if tags:
422
458
  return list(dict.fromkeys(tags))
423
459
  if isinstance(value, str) and normalize_text(value):
@@ -430,7 +466,7 @@ def _extract_tags(payload: Dict[str, Any]) -> List[str]:
430
466
  for key in ("tags", "tag_list", "hashtags"):
431
467
  value = source.get(key)
432
468
  if isinstance(value, list):
433
- tags = [normalize_text(item).lstrip("#") for item in value if normalize_text(item)]
469
+ tags = [item.lstrip("#") for item in _safe_text_list(value)]
434
470
  if tags:
435
471
  return list(dict.fromkeys(tags))
436
472
 
@@ -476,11 +512,14 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
476
512
  if create_time_sec <= 0:
477
513
  create_time_sec = _to_unix_sec(_source_dict(payload).get("create_time"))
478
514
 
479
- digg_count = _safe_int(payload.get("digg_count"), default=0)
480
- comment_count = _safe_int(payload.get("comment_count"), default=0)
481
- collect_count = _safe_int(payload.get("collect_count"), default=0)
482
- share_count = _safe_int(payload.get("share_count"), default=0)
515
+ metrics = payload.get("metrics") if isinstance(payload.get("metrics"), dict) else {}
516
+ digg_count = _safe_int(payload.get("digg_count"), default=_safe_int(metrics.get("like"), default=0))
517
+ comment_count = _safe_int(payload.get("comment_count"), default=_safe_int(metrics.get("comment"), default=0))
518
+ collect_count = _safe_int(payload.get("collect_count"), default=_safe_int(metrics.get("collect"), default=0))
519
+ share_count = _safe_int(payload.get("share_count"), default=_safe_int(metrics.get("share"), default=0))
483
520
  play_count = _safe_optional_int(payload.get("play_count"))
521
+ if play_count is None:
522
+ play_count = _safe_optional_int(metrics.get("play"))
484
523
 
485
524
  summary = normalize_text(payload.get("summary"))
486
525
  raw_content = normalize_text(payload.get("raw_content"))
@@ -512,6 +551,8 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
512
551
  if not primary_text:
513
552
  primary_text = asr_clean if primary_text_source == "asr_clean" else normalize_text(payload.get("desc"))
514
553
 
554
+ sampled_explanation = payload.get("sampled_explanation") if isinstance(payload.get("sampled_explanation"), dict) else {}
555
+
515
556
  return {
516
557
  "title": title,
517
558
  "platform": platform,
@@ -540,6 +581,18 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
540
581
  "raw_content": raw_content,
541
582
  "primary_text": primary_text,
542
583
  "asr_clean": asr_clean,
584
+ "performance_score": payload.get("performance_score"),
585
+ "performance_score_norm": payload.get("performance_score_norm"),
586
+ "bucket": normalize_text(payload.get("bucket")),
587
+ "hook_type": normalize_text(payload.get("hook_type")),
588
+ "structure_type": normalize_text(payload.get("structure_type")),
589
+ "cta_type": normalize_text(payload.get("cta_type")),
590
+ "content_form": normalize_text(payload.get("content_form")),
591
+ "style_markers": _safe_text_list(payload.get("style_markers")),
592
+ "analysis_eligibility": normalize_text(payload.get("analysis_eligibility")) or "eligible",
593
+ "analysis_exclusion_reason": normalize_text(payload.get("analysis_exclusion_reason")),
594
+ "card_role": normalize_text(payload.get("card_role")),
595
+ "sampled_explanation": sampled_explanation,
543
596
  "platform_native_refs": payload.get("platform_native_refs") if isinstance(payload.get("platform_native_refs"), dict) else {},
544
597
  "request_id": payload.get("request_id"),
545
598
  "confidence": normalize_text(payload.get("confidence")) or "low",
@@ -552,6 +605,9 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
552
605
  "sampled_work_explanations": payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {},
553
606
  "author_card_highlights": payload.get("author_card_highlights") if isinstance(payload.get("author_card_highlights"), dict) else {},
554
607
  "validation": payload.get("validation") if isinstance(payload.get("validation"), dict) else {},
608
+ "quality_tier": normalize_text(payload.get("quality_tier")),
609
+ "stage_status": payload.get("stage_status") if isinstance(payload.get("stage_status"), dict) else {},
610
+ "sampled_work_ids": _safe_text_list(payload.get("sampled_work_ids")),
555
611
  "business_score": _safe_int(payload.get("business_score"), default=0),
556
612
  "benchmark_gap_score": _safe_int(payload.get("benchmark_gap_score"), default=0),
557
613
  "style_radar": payload.get("style_radar") if isinstance(payload.get("style_radar"), dict) else {},
@@ -965,6 +1021,7 @@ def _build_output_path(
965
1021
  now: dt.datetime,
966
1022
  sample_author: Optional[str],
967
1023
  storage_config: Optional[Dict[str, Any]],
1024
+ extra_route_parts: Optional[List[str]] = None,
968
1025
  ) -> Dict[str, str]:
969
1026
  author_slug = _pick_author_slug(payload, author_hint=sample_author)
970
1027
  title_slug = _pick_title_slug(payload)
@@ -979,6 +1036,7 @@ def _build_output_path(
979
1036
  year_month=now.strftime("%Y-%m"),
980
1037
  timestamp=now.strftime("%Y%m%d-%H%M%S"),
981
1038
  storage_config=storage_config,
1039
+ extra_route_parts=extra_route_parts,
982
1040
  )
983
1041
  return {
984
1042
  "path": path,
@@ -989,7 +1047,30 @@ def _build_output_path(
989
1047
  }
990
1048
 
991
1049
 
992
- def _render_author_markdown(
1050
+ def _json_details_block(title: str, payload: Any) -> List[str]:
1051
+ return [
1052
+ "<details>",
1053
+ f"<summary>{title}</summary>",
1054
+ "",
1055
+ "```json",
1056
+ json.dumps(payload, ensure_ascii=False, indent=2),
1057
+ "```",
1058
+ "",
1059
+ "</details>",
1060
+ ]
1061
+
1062
+
1063
+ def _display_list(values: Any, *, fallback: str = "数据不足") -> str:
1064
+ items = _safe_text_list(values)
1065
+ return "、".join(items) if items else fallback
1066
+
1067
+
1068
+ def _display_scalar(value: Any, *, fallback: str = "数据不足") -> str:
1069
+ text = normalize_text(value)
1070
+ return text or fallback
1071
+
1072
+
1073
+ def _render_author_card_markdown(
993
1074
  *,
994
1075
  card_id: str,
995
1076
  card_type: str,
@@ -998,59 +1079,38 @@ def _render_author_markdown(
998
1079
  ) -> str:
999
1080
  analysis_output = fields.get("analysis_output") if isinstance(fields.get("analysis_output"), dict) else {}
1000
1081
  author_analysis_v2 = fields.get("author_analysis_v2") if isinstance(fields.get("author_analysis_v2"), dict) else analysis_output.get("author_analysis_v2", {})
1001
- if not isinstance(author_analysis_v2, dict):
1002
- author_analysis_v2 = {}
1003
- sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
1004
- if not isinstance(sampled_work_explanations, dict):
1005
- sampled_work_explanations = {}
1006
- author_card_highlights = fields.get("author_card_highlights") if isinstance(fields.get("author_card_highlights"), dict) else {}
1007
- if not isinstance(author_card_highlights, dict):
1008
- author_card_highlights = {}
1082
+ author_analysis_v2 = author_analysis_v2 if isinstance(author_analysis_v2, dict) else {}
1009
1083
  validation = fields.get("validation") if isinstance(fields.get("validation"), dict) else analysis_output.get("validation", {})
1010
- if not isinstance(validation, dict):
1011
- validation = {}
1012
-
1013
- business_score = _safe_int(fields.get("business_score"), default=_safe_int(analysis_output.get("business_score"), default=0))
1014
- benchmark_gap_score = _safe_int(fields.get("benchmark_gap_score"), default=_safe_int(analysis_output.get("benchmark_gap_score"), default=0))
1015
- style_radar = fields.get("style_radar") if isinstance(fields.get("style_radar"), dict) else analysis_output.get("style_radar", {})
1016
- if not isinstance(style_radar, dict):
1017
- style_radar = {}
1018
-
1019
- core_contradictions = fields.get("core_contradictions") if isinstance(fields.get("core_contradictions"), list) else analysis_output.get("core_contradictions", [])
1020
- if not isinstance(core_contradictions, list):
1021
- core_contradictions = []
1084
+ validation = validation if isinstance(validation, dict) else {}
1085
+ stage_status = fields.get("stage_status") if isinstance(fields.get("stage_status"), dict) else {}
1086
+ stage_status = stage_status if isinstance(stage_status, dict) else {}
1087
+ sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
1088
+ sampled_work_explanations = sampled_work_explanations if isinstance(sampled_work_explanations, dict) else {}
1089
+ quality_tier = _display_scalar(fields.get("quality_tier"), fallback="unknown")
1022
1090
 
1023
- recommendations = fields.get("recommendations") if isinstance(fields.get("recommendations"), list) else analysis_output.get("recommendations", [])
1024
- if not isinstance(recommendations, list):
1025
- recommendations = []
1091
+ positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
1092
+ trust_model = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
1093
+ content_mechanism = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
1094
+ commercial_bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
1095
+ core_tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
1096
+ clone_guidance = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
1097
+ evidence_pack = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
1026
1098
 
1027
- business_analysis = normalize_text(fields.get("business_analysis")) or normalize_text(analysis_output.get("business_analysis"))
1028
- benchmark_analysis = normalize_text(fields.get("benchmark_analysis")) or normalize_text(analysis_output.get("benchmark_analysis"))
1029
- author_portrait = normalize_text(author_card_highlights.get("one_liner")) or normalize_text(fields.get("summary")) or normalize_text(analysis_output.get("author_portrait"))
1099
+ sampled_work_ids = _safe_text_list(fields.get("sampled_work_ids"))
1100
+ representative_works = _safe_text_list(evidence_pack.get("representative_works")) or sampled_work_ids[:8]
1030
1101
 
1031
1102
  fm = {
1032
1103
  "card_id": card_id,
1033
1104
  "card_type": card_type,
1105
+ "card_role": fields.get("card_role") or AUTHOR_CARD_ROLE,
1034
1106
  "platform": fields.get("platform"),
1035
1107
  "generated_at": generated_at,
1036
1108
  "updated_at": generated_at,
1037
1109
  "title": fields.get("title"),
1038
- "platform_work_id": fields.get("platform_work_id"),
1039
- "author": fields.get("author"),
1040
- "author_handle": fields.get("author_handle"),
1041
1110
  "platform_author_id": fields.get("platform_author_id"),
1111
+ "author_handle": fields.get("author_handle"),
1042
1112
  "nickname": fields.get("nickname"),
1043
- "ip_location": fields.get("ip_location"),
1044
- "avatar_url": fields.get("avatar_url"),
1045
- "signature": fields.get("signature"),
1046
- "fans_count": fields.get("fans_count"),
1047
- "liked_count": fields.get("liked_count"),
1048
- "collected_count": fields.get("collected_count"),
1049
- "works_count": fields.get("works_count"),
1050
- "verified": fields.get("verified"),
1051
- "snapshot_at": fields.get("snapshot_at"),
1052
- "business_score": business_score,
1053
- "benchmark_gap_score": benchmark_gap_score,
1113
+ "quality_tier": quality_tier,
1054
1114
  "request_id": fields.get("request_id"),
1055
1115
  }
1056
1116
 
@@ -1062,91 +1122,209 @@ def _render_author_markdown(
1062
1122
  lines = [
1063
1123
  *frontmatter,
1064
1124
  "",
1065
- "## 基础事实",
1066
- f"- 平台:{fields.get('platform') or '未知'}",
1067
- f"- 作者ID:{fields.get('platform_author_id') or '未知'}",
1068
- f"- 账号标识:{fields.get('author_handle') or 'N/A'}",
1069
- f"- 昵称:{fields.get('nickname') or fields.get('author') or '未知'}",
1070
- f"- IP属地:{fields.get('ip_location') or 'N/A'}",
1071
- f"- 签名:{fields.get('signature') or 'N/A'}",
1072
- f"- 头像:{fields.get('avatar_url') or 'N/A'}",
1125
+ "## 基础主页事实",
1126
+ f"- 平台:{_display_scalar(fields.get('platform'), fallback='未知')}",
1127
+ f"- 作者ID:{_display_scalar(fields.get('platform_author_id'), fallback='未知')}",
1128
+ f"- 账号标识:{_display_scalar(fields.get('author_handle'), fallback='N/A')}",
1129
+ f"- 昵称:{_display_scalar(fields.get('nickname') or fields.get('author'), fallback='未知')}",
1130
+ f"- IP属地:{_display_scalar(fields.get('ip_location'), fallback='N/A')}",
1131
+ f"- 签名:{_display_scalar(fields.get('signature'), fallback='N/A')}",
1073
1132
  f"- 粉丝数:{_display_metric(fields.get('fans_count'))}",
1074
1133
  f"- 累计获赞:{_display_metric(fields.get('liked_count'))}",
1075
1134
  f"- 累计收藏:{_display_metric(fields.get('collected_count'))}",
1076
1135
  f"- 作品数:{_display_metric(fields.get('works_count'))}",
1077
- f"- 认证状态:{'是' if fields.get('verified') else '否'}" if fields.get('verified') is not None else "- 认证状态:N/A",
1078
- f"- 抓取时间:{fields.get('snapshot_at') or 'N/A'}",
1136
+ f"- 质量档:{quality_tier}",
1079
1137
  "",
1080
- "## 作者画像",
1081
- author_portrait or "数据不足",
1138
+ "## 作者定位",
1139
+ _display_scalar(positioning.get("one_liner") or fields.get("summary")),
1140
+ f"- 作者类型:{_display_scalar(positioning.get('author_type'))}",
1141
+ f"- 主要角色:{_display_scalar(positioning.get('primary_role'))}",
1142
+ f"- 目标受众:{_display_scalar(positioning.get('target_audience'))}",
1143
+ f"- 核心问题:{_display_scalar(positioning.get('core_problem_solved'))}",
1144
+ f"- 核心价值:{_display_scalar(positioning.get('core_value_proposition'))}",
1082
1145
  "",
1083
- "## 主页摘要卡",
1084
- f"- 核心价值:{normalize_text(author_card_highlights.get('core_value_proposition')) or '数据不足'}",
1085
- f"- 主要信任源:{normalize_text(author_card_highlights.get('primary_trust_source')) or '数据不足'}",
1086
- f"- 胜率结构:{('、'.join([normalize_text(x) for x in author_card_highlights.get('winning_content_structures', []) if normalize_text(x)])) or '数据不足'}",
1087
- f"- 可能产品:{('、'.join([normalize_text(x) for x in author_card_highlights.get('likely_products', []) if normalize_text(x)])) or '证据不足'}",
1088
- f"- 最大张力:{normalize_text(author_card_highlights.get('most_important_tension')) or '数据不足'}",
1089
- f"- 只学一件事:{normalize_text(author_card_highlights.get('if_only_learn_one_thing')) or '数据不足'}",
1146
+ "## 信任模型",
1147
+ f"- 主要信任源:{_display_scalar(trust_model.get('primary_trust_source'))}",
1148
+ f"- 次级信任源:{_display_list(trust_model.get('secondary_trust_sources'))}",
1149
+ f"- 建立机制:{_display_list(trust_model.get('trust_building_mechanisms'))}",
1150
+ f"- 风险:{_display_list(trust_model.get('trust_risks'))}",
1090
1151
  "",
1091
- "## 商业分析",
1092
- business_analysis or "数据不足",
1152
+ "## 内容机制",
1153
+ f"- 世界观:{_display_scalar((author_analysis_v2.get('cognitive_engine') or {}).get('worldview'))}",
1154
+ f"- 推理模式:{_display_list((author_analysis_v2.get('cognitive_engine') or {}).get('reasoning_modes'))}",
1155
+ f"- 内容来源:{_display_list(content_mechanism.get('topic_sources'))}",
1156
+ f"- 内容目标:{_display_list(content_mechanism.get('topic_goals'))}",
1157
+ f"- 优势结构:{_display_list(content_mechanism.get('winning_content_structures'))}",
1158
+ f"- 流量驱动:{_display_list(content_mechanism.get('traffic_drivers'))}",
1159
+ f"- 主导主题:{_display_list(content_mechanism.get('dominant_themes'))}",
1093
1160
  "",
1094
- "## 对标分析",
1095
- benchmark_analysis or "数据不足",
1161
+ "## 商业桥",
1162
+ f"- 漏斗角色:{_display_list(commercial_bridge.get('content_role_in_funnel'))}",
1163
+ f"- 可能产品:{_display_list(commercial_bridge.get('likely_products'), fallback='证据不足')}",
1164
+ f"- 转化路径:{_display_scalar(commercial_bridge.get('conversion_path'), fallback='证据不足')}",
1165
+ f"- 商业信号:{_display_list(commercial_bridge.get('business_model_signals'), fallback='证据不足')}",
1096
1166
  "",
1097
- "## 评分",
1098
- f"- business_score: {business_score}",
1099
- f"- benchmark_gap_score: {benchmark_gap_score}",
1167
+ "## 核心张力",
1168
+ f"- 最重要张力:{_display_scalar(core_tensions.get('most_important_tension'))}",
1169
+ f"- 张力列表:{_display_list(core_tensions.get('tensions'))}",
1100
1170
  "",
1101
- "## 风格雷达",
1102
- "```json",
1103
- json.dumps(style_radar, ensure_ascii=False, indent=2),
1104
- "```",
1171
+ "## 建议动作",
1172
+ f"- 可复制要素:{_display_list(clone_guidance.get('copyable_elements'))}",
1173
+ f"- 不可复制要素:{_display_list(clone_guidance.get('non_copyable_elements'))}",
1174
+ f"- 风险区:{_display_list(clone_guidance.get('danger_zones'))}",
1175
+ f"- 只学一件事:{_display_scalar(clone_guidance.get('if_only_learn_one_thing'))}",
1105
1176
  "",
1106
- "## 核心矛盾",
1177
+ "## 代表样本",
1107
1178
  ]
1108
1179
 
1109
- if core_contradictions:
1110
- lines.extend([f"- {normalize_text(item)}" for item in core_contradictions if normalize_text(item)])
1111
- else:
1112
- lines.append("- 数据不足")
1113
-
1114
- lines.extend(["", "## 建议动作"])
1115
- if recommendations:
1116
- lines.extend([f"- {normalize_text(item)}" for item in recommendations if normalize_text(item)])
1180
+ if representative_works:
1181
+ lines.extend([f"- {item}" for item in representative_works[:8]])
1117
1182
  else:
1118
1183
  lines.append("- 数据不足")
1119
1184
 
1120
1185
  lines.extend(
1121
1186
  [
1122
1187
  "",
1123
- "## author_analysis_v2",
1124
- "```json",
1125
- json.dumps(author_analysis_v2, ensure_ascii=False, indent=2),
1126
- "```",
1127
- "",
1128
- "## sampled_work_explanations",
1129
- "```json",
1130
- json.dumps(sampled_work_explanations, ensure_ascii=False, indent=2),
1131
- "```",
1188
+ "## 附录",
1189
+ f"- confidence: {fields.get('confidence')}",
1190
+ f"- error_reason: {fields.get('error_reason') or 'N/A'}",
1191
+ ]
1192
+ )
1193
+
1194
+ if quality_tier == "fallback":
1195
+ lines.append("- note: 当前作者分析使用 fallback 结果,请优先复核正文结论。")
1196
+ elif quality_tier == "degraded_author_only":
1197
+ lines.append("- note: 批量解释缺失,作者分析基于聚合统计与样本事实完成。")
1198
+
1199
+ lines.extend([""] + _json_details_block("author_analysis_v2", author_analysis_v2))
1200
+ lines.extend([""] + _json_details_block("sampled_work_explanations", sampled_work_explanations))
1201
+ lines.extend([""] + _json_details_block("validation", validation))
1202
+ lines.extend([""] + _json_details_block("stage_status", stage_status))
1203
+ lines.extend([""] + _json_details_block("extract_trace", fields.get("extract_trace", [])))
1204
+ return "\n".join(lines)
1205
+
1206
+
1207
+ def _render_author_sample_markdown(
1208
+ *,
1209
+ card_id: str,
1210
+ card_type: str,
1211
+ fields: Dict[str, Any],
1212
+ generated_at: str,
1213
+ ) -> str:
1214
+ card_role = normalize_text(fields.get("card_role")) or AUTHOR_SAMPLE_CARD_ROLE
1215
+ sampled_explanation = fields.get("sampled_explanation") if isinstance(fields.get("sampled_explanation"), dict) else {}
1216
+ sampled_explanation = sampled_explanation if isinstance(sampled_explanation, dict) else {}
1217
+
1218
+ fm = {
1219
+ "card_id": card_id,
1220
+ "card_type": card_type,
1221
+ "card_role": card_role,
1222
+ "platform": fields.get("platform"),
1223
+ "generated_at": generated_at,
1224
+ "updated_at": generated_at,
1225
+ "title": fields.get("title"),
1226
+ "platform_work_id": fields.get("platform_work_id"),
1227
+ "author": fields.get("author"),
1228
+ "author_handle": fields.get("author_handle"),
1229
+ "platform_author_id": fields.get("platform_author_id"),
1230
+ "share_url": fields.get("share_url"),
1231
+ "source_url": fields.get("source_url"),
1232
+ }
1233
+
1234
+ frontmatter = ["---"]
1235
+ for key, value in fm.items():
1236
+ frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1237
+ frontmatter.append("---")
1238
+
1239
+ metrics_line = (
1240
+ f"赞 {_display_metric(fields.get('digg_count'))} / 评 {_display_metric(fields.get('comment_count'))} / "
1241
+ f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
1242
+ )
1243
+
1244
+ lines = [
1245
+ *frontmatter,
1246
+ "",
1247
+ "## 基础信息",
1248
+ f"- 作者:{_display_scalar(fields.get('author') or fields.get('author_handle') or fields.get('platform_author_id'), fallback='未知作者')}",
1249
+ f"- 标题:{_display_scalar(fields.get('title'), fallback='(标题缺失)')}",
1250
+ f"- 原始文案:{_display_scalar(fields.get('caption_raw'), fallback='N/A')}",
1251
+ f"- 作品模态:{_display_scalar(fields.get('work_modality'), fallback='未知')}",
1252
+ f"- 发布时间:{_display_scalar(fields.get('published_date'), fallback='N/A')}",
1253
+ f"- 时长:{_format_duration(_safe_int(fields.get('duration_ms'), default=0)) if _safe_int(fields.get('duration_ms'), default=0) > 0 else 'N/A'}",
1254
+ f"- 互动:{metrics_line}",
1255
+ f"- 标签:{_display_list(fields.get('tags'), fallback='无')}",
1256
+ f"- 链接:{_display_scalar(fields.get('share_url'), fallback='(未提供)')}",
1257
+ "",
1258
+ "## 表现与结构",
1259
+ f"- performance_score:{fields.get('performance_score') if fields.get('performance_score') is not None else 'N/A'}",
1260
+ f"- performance_score_norm:{fields.get('performance_score_norm') if fields.get('performance_score_norm') is not None else 'N/A'}",
1261
+ f"- bucket:{_display_scalar(fields.get('bucket'), fallback='unknown')}",
1262
+ f"- hook_type:{_display_scalar(fields.get('hook_type'), fallback='unknown')}",
1263
+ f"- structure_type:{_display_scalar(fields.get('structure_type'), fallback='unknown')}",
1264
+ f"- cta_type:{_display_scalar(fields.get('cta_type'), fallback='unknown')}",
1265
+ f"- content_form:{_display_scalar(fields.get('content_form'), fallback='unknown')}",
1266
+ f"- style_markers:{_display_list(fields.get('style_markers'), fallback='未命中显著标记')}",
1267
+ ]
1268
+
1269
+ precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1270
+ modules = precomputed_sections.get("modules") if isinstance(precomputed_sections.get("modules"), dict) else {}
1271
+ for heading in DEFAULT_MODULE_SECTIONS:
1272
+ lines.append("")
1273
+ lines.append(f"## {heading}")
1274
+ for item in modules.get(heading, ["数据不足"]):
1275
+ lines.append(_display_scalar(item))
1276
+
1277
+ if card_role == SAMPLE_WORK_CARD_ROLE:
1278
+ lines.extend(
1279
+ [
1280
+ "",
1281
+ "## 批量解释",
1282
+ f"- why_it_worked_or_failed:{_display_scalar(sampled_explanation.get('why_it_worked_or_failed'), fallback='批量解释未生成')}",
1283
+ f"- copyable_elements:{_display_list(sampled_explanation.get('copyable_elements'), fallback='批量解释未生成')}",
1284
+ f"- non_copyable_elements:{_display_list(sampled_explanation.get('non_copyable_elements'), fallback='批量解释未生成')}",
1285
+ f"- emotional_triggers:{_display_list(sampled_explanation.get('emotional_triggers'), fallback='批量解释未生成')}",
1286
+ f"- cognitive_gap:{_display_scalar(sampled_explanation.get('cognitive_gap'), fallback='批量解释未生成')}",
1287
+ f"- commercial_signal:{_display_scalar(sampled_explanation.get('commercial_signal'), fallback='批量解释未生成')}",
1288
+ ]
1289
+ )
1290
+
1291
+ lines.extend(
1292
+ [
1132
1293
  "",
1133
- "## 校验",
1134
- f"- validation_ok: {bool(validation.get('ok'))}",
1135
- f"- validation_error_count: {len(validation.get('errors') or [])}",
1294
+ "## 主文本",
1295
+ _display_scalar(fields.get("primary_text"), fallback="(无可用主文本)"),
1136
1296
  "",
1137
1297
  "## 附录",
1138
- f"- confidence: {fields.get('confidence')}",
1139
- f"- error_reason: {fields.get('error_reason')}",
1298
+ f"- analysis_eligibility: {_display_scalar(fields.get('analysis_eligibility'), fallback='unknown')}",
1299
+ f"- analysis_exclusion_reason: {_display_scalar(fields.get('analysis_exclusion_reason'), fallback='N/A')}",
1300
+ f"- request_id: {_display_scalar(fields.get('request_id'), fallback='N/A')}",
1301
+ f"- confidence: {_display_scalar(fields.get('confidence'), fallback='low')}",
1302
+ f"- error_reason: {_display_scalar(fields.get('error_reason'), fallback='N/A')}",
1140
1303
  "",
1141
- "```json",
1142
- json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2),
1143
- "```",
1304
+ "### ASR_RAW",
1305
+ _display_scalar(fields.get("raw_content"), fallback="(无可用 ASR 原文)"),
1144
1306
  "",
1145
1307
  ]
1146
1308
  )
1309
+ lines.extend(_json_details_block("extract_trace", fields.get("extract_trace", [])))
1147
1310
  return "\n".join(lines)
1148
1311
 
1149
1312
 
1313
+ def _render_author_markdown(
1314
+ *,
1315
+ card_id: str,
1316
+ card_type: str,
1317
+ fields: Dict[str, Any],
1318
+ generated_at: str,
1319
+ ) -> str:
1320
+ return _render_author_card_markdown(
1321
+ card_id=card_id,
1322
+ card_type=card_type,
1323
+ fields=fields,
1324
+ generated_at=generated_at,
1325
+ )
1326
+
1327
+
1150
1328
  def _render_markdown(
1151
1329
  *,
1152
1330
  card_id: str,
@@ -1161,6 +1339,14 @@ def _render_markdown(
1161
1339
  fields=fields,
1162
1340
  generated_at=generated_at,
1163
1341
  )
1342
+ if card_type == "author_sample_work":
1343
+ return _render_author_sample_markdown(
1344
+ card_id=card_id,
1345
+ card_type=card_type,
1346
+ fields=fields,
1347
+ generated_at=generated_at,
1348
+ )
1349
+
1164
1350
  author_name = fields.get("author") or fields.get("author_handle") or fields.get("platform_author_id") or "未知作者"
1165
1351
  title = fields.get("title") or "(标题缺失)"
1166
1352
  metrics_line = (
@@ -1168,10 +1354,7 @@ def _render_markdown(
1168
1354
  f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
1169
1355
  )
1170
1356
  precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1171
- if precomputed_sections:
1172
- analysis_sections = precomputed_sections
1173
- else:
1174
- analysis_sections = {} if card_type == "author_sample_work" else build_analysis_sections(fields)
1357
+ analysis_sections = precomputed_sections or build_analysis_sections(fields)
1175
1358
  creative_modules = analysis_sections.get("modules", {})
1176
1359
  insight_lines = analysis_sections.get("insight", ["数据不足"])
1177
1360
  extract_trace_json = json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2)
@@ -1187,21 +1370,6 @@ def _render_markdown(
1187
1370
  "author": fields.get("author"),
1188
1371
  "author_handle": fields.get("author_handle"),
1189
1372
  "platform_author_id": fields.get("platform_author_id"),
1190
- "caption_raw": fields.get("caption_raw"),
1191
- "primary_text": fields.get("primary_text"),
1192
- "share_url": fields.get("share_url"),
1193
- "source_url": fields.get("source_url"),
1194
- "cover_image": fields.get("cover_image"),
1195
- "video_download_url": fields.get("video_download_url"),
1196
- "published_date": fields.get("published_date"),
1197
- "duration_ms": fields.get("duration_ms"),
1198
- "digg_count": fields.get("digg_count"),
1199
- "comment_count": fields.get("comment_count"),
1200
- "collect_count": fields.get("collect_count"),
1201
- "share_count": fields.get("share_count"),
1202
- "play_count": fields.get("play_count"),
1203
- "tags": fields.get("tags", []),
1204
- "work_modality": fields.get("work_modality"),
1205
1373
  }
1206
1374
 
1207
1375
  frontmatter = ["---"]
@@ -1218,10 +1386,8 @@ def _render_markdown(
1218
1386
  f"- 原始文案:{fields.get('caption_raw') or 'N/A'}",
1219
1387
  f"- 作品模态:{fields.get('work_modality') or '未知'}",
1220
1388
  f"- 发布时间:{fields.get('published_date') or 'N/A'}",
1221
- f"- {'视频时长' if fields.get('work_modality') == 'video' else '阅读载体'}:{_format_duration(fields.get('duration_ms', 0)) if fields.get('work_modality') == 'video' else '文本'}",
1222
1389
  f"- 互动:{metrics_line}",
1223
1390
  f"- 链接:{fields.get('share_url') or '(未提供)'}",
1224
- f"- 下载链接:{fields.get('video_download_url') or 'N/A'}" if fields.get("work_modality") == "video" else "- 下载链接:N/A",
1225
1391
  ]
1226
1392
 
1227
1393
  for heading in DEFAULT_MODULE_SECTIONS:
@@ -1235,20 +1401,11 @@ def _render_markdown(
1235
1401
  for item in insight_lines:
1236
1402
  lines.append(item)
1237
1403
 
1238
- transcript_heading = "## 主文本"
1239
- transcript_body = fields.get("primary_text")
1240
- transcript_fallback = "(无可用主文本)"
1241
-
1242
1404
  lines.extend(
1243
1405
  [
1244
1406
  "",
1245
- transcript_heading,
1246
- transcript_body or transcript_fallback,
1247
- ]
1248
- )
1249
-
1250
- lines.extend(
1251
- [
1407
+ "## 主文本",
1408
+ fields.get("primary_text") or "(无可用主文本)",
1252
1409
  "",
1253
1410
  "## 附录",
1254
1411
  "### ASR_RAW",
@@ -1299,6 +1456,9 @@ def write_benchmark_card(
1299
1456
  content_kind: Optional[str] = None,
1300
1457
  storage_config: Optional[Dict[str, Any]] = None,
1301
1458
  force_card_type: bool = False,
1459
+ route_card_type: Optional[str] = None,
1460
+ route_extra_parts: Optional[List[str]] = None,
1461
+ card_role: Optional[str] = None,
1302
1462
  ) -> Dict[str, Any]:
1303
1463
  now = dt.datetime.now()
1304
1464
  generated_at = now.isoformat(timespec="seconds")
@@ -1313,17 +1473,30 @@ def write_benchmark_card(
1313
1473
  storage_config=storage_config,
1314
1474
  force_card_type=force_card_type,
1315
1475
  )
1316
- fields = _extract_required_fields(payload, platform=platform)
1476
+ effective_route_card_type = normalize_card_type(route_card_type) if route_card_type else effective_card_type
1477
+ effective_card_role = normalize_text(card_role) or normalize_text(payload.get("card_role"))
1478
+ explicit_route_override = bool(route_card_type or route_extra_parts)
1479
+
1480
+ payload_for_fields = dict(payload)
1481
+ if effective_card_role:
1482
+ payload_for_fields["card_role"] = effective_card_role
1483
+ fields = _extract_required_fields(payload_for_fields, platform=platform)
1484
+ if not fields.get("card_role"):
1485
+ if effective_card_type == "author":
1486
+ fields["card_role"] = AUTHOR_CARD_ROLE
1487
+ elif effective_card_type == "author_sample_work":
1488
+ fields["card_role"] = AUTHOR_SAMPLE_CARD_ROLE
1317
1489
  resolved_card_root = _resolve_card_root(card_root)
1318
1490
 
1319
1491
  primary_target = _build_output_path(
1320
1492
  card_root=resolved_card_root,
1321
1493
  platform=platform,
1322
- card_type=effective_card_type,
1494
+ card_type=effective_route_card_type,
1323
1495
  payload=payload,
1324
1496
  now=now,
1325
1497
  sample_author=sample_author,
1326
1498
  storage_config=storage_config,
1499
+ extra_route_parts=route_extra_parts,
1327
1500
  )
1328
1501
  primary_path = primary_target["path"]
1329
1502
 
@@ -1340,12 +1513,16 @@ def write_benchmark_card(
1340
1513
  "ok": True,
1341
1514
  "platform": platform,
1342
1515
  "card_type": effective_card_type,
1516
+ "card_role": fields.get("card_role"),
1343
1517
  "requested_card_type": normalized_card_type,
1344
1518
  "force_card_type": bool(force_card_type),
1345
1519
  "content_kind": resolved_content_kind or None,
1346
1520
  "primary_card_path": primary_path,
1347
1521
  "routing": {
1522
+ "card_role": fields.get("card_role"),
1523
+ "route_key": effective_route_card_type,
1348
1524
  "primary_route_parts": primary_target["route_parts"],
1525
+ "explicit_override": explicit_route_override,
1349
1526
  "storage_routes_configured": bool(isinstance(storage_config, dict) and isinstance(storage_config.get("storage_routes"), dict)),
1350
1527
  },
1351
1528
  "required_fields": fields,