@tikomni/skills 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/creator-analysis/SKILL.md +34 -10
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
- package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
- package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
- package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
- package/skills/creator-analysis/references/workflow.md +8 -3
- package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
- package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
- package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
- package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
|
@@ -47,6 +47,9 @@ def resolve_default_card_root() -> str:
|
|
|
47
47
|
# Keep import-time compatibility for other scripts without crashing when env is absent.
|
|
48
48
|
DEFAULT_CARD_ROOT = ""
|
|
49
49
|
CARD_TYPES = ["work", "author", "author_sample_work"]
|
|
50
|
+
AUTHOR_SAMPLE_CARD_ROLE = "author_sample_card"
|
|
51
|
+
SAMPLE_WORK_CARD_ROLE = "sample_work_card"
|
|
52
|
+
AUTHOR_CARD_ROLE = "author_card"
|
|
50
53
|
|
|
51
54
|
|
|
52
55
|
def _normalize_lines(value: Any) -> List[str]:
|
|
@@ -92,6 +95,39 @@ def _safe_optional_int(value: Any) -> Optional[int]:
|
|
|
92
95
|
return None
|
|
93
96
|
|
|
94
97
|
|
|
98
|
+
def _safe_text(value: Any) -> str:
|
|
99
|
+
if value is None:
|
|
100
|
+
return ""
|
|
101
|
+
if isinstance(value, str):
|
|
102
|
+
return value.strip()
|
|
103
|
+
if isinstance(value, (int, float, bool)):
|
|
104
|
+
return str(value).strip()
|
|
105
|
+
return ""
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _safe_text_list(value: Any) -> List[str]:
|
|
109
|
+
if isinstance(value, list):
|
|
110
|
+
result: List[str] = []
|
|
111
|
+
for item in value:
|
|
112
|
+
if isinstance(item, dict):
|
|
113
|
+
for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
|
|
114
|
+
text = _safe_text(item.get(key))
|
|
115
|
+
if text:
|
|
116
|
+
result.append(text)
|
|
117
|
+
break
|
|
118
|
+
continue
|
|
119
|
+
text = _safe_text(item)
|
|
120
|
+
if text:
|
|
121
|
+
result.append(text)
|
|
122
|
+
return list(dict.fromkeys(result))
|
|
123
|
+
if isinstance(value, str):
|
|
124
|
+
text = _safe_text(value)
|
|
125
|
+
if not text:
|
|
126
|
+
return []
|
|
127
|
+
return [item for item in re.split(r"[,,\s]+", text) if item]
|
|
128
|
+
return []
|
|
129
|
+
|
|
130
|
+
|
|
95
131
|
def _to_unix_sec(value: Any) -> int:
|
|
96
132
|
if value is None:
|
|
97
133
|
return 0
|
|
@@ -417,7 +453,7 @@ def _extract_tags(payload: Dict[str, Any]) -> List[str]:
|
|
|
417
453
|
for key in ("tags", "tag_list", "hashtags"):
|
|
418
454
|
value = payload.get(key)
|
|
419
455
|
if isinstance(value, list):
|
|
420
|
-
tags = [
|
|
456
|
+
tags = [item.lstrip("#") for item in _safe_text_list(value)]
|
|
421
457
|
if tags:
|
|
422
458
|
return list(dict.fromkeys(tags))
|
|
423
459
|
if isinstance(value, str) and normalize_text(value):
|
|
@@ -430,7 +466,7 @@ def _extract_tags(payload: Dict[str, Any]) -> List[str]:
|
|
|
430
466
|
for key in ("tags", "tag_list", "hashtags"):
|
|
431
467
|
value = source.get(key)
|
|
432
468
|
if isinstance(value, list):
|
|
433
|
-
tags = [
|
|
469
|
+
tags = [item.lstrip("#") for item in _safe_text_list(value)]
|
|
434
470
|
if tags:
|
|
435
471
|
return list(dict.fromkeys(tags))
|
|
436
472
|
|
|
@@ -476,11 +512,14 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
|
|
|
476
512
|
if create_time_sec <= 0:
|
|
477
513
|
create_time_sec = _to_unix_sec(_source_dict(payload).get("create_time"))
|
|
478
514
|
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
515
|
+
metrics = payload.get("metrics") if isinstance(payload.get("metrics"), dict) else {}
|
|
516
|
+
digg_count = _safe_int(payload.get("digg_count"), default=_safe_int(metrics.get("like"), default=0))
|
|
517
|
+
comment_count = _safe_int(payload.get("comment_count"), default=_safe_int(metrics.get("comment"), default=0))
|
|
518
|
+
collect_count = _safe_int(payload.get("collect_count"), default=_safe_int(metrics.get("collect"), default=0))
|
|
519
|
+
share_count = _safe_int(payload.get("share_count"), default=_safe_int(metrics.get("share"), default=0))
|
|
483
520
|
play_count = _safe_optional_int(payload.get("play_count"))
|
|
521
|
+
if play_count is None:
|
|
522
|
+
play_count = _safe_optional_int(metrics.get("play"))
|
|
484
523
|
|
|
485
524
|
summary = normalize_text(payload.get("summary"))
|
|
486
525
|
raw_content = normalize_text(payload.get("raw_content"))
|
|
@@ -512,6 +551,8 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
|
|
|
512
551
|
if not primary_text:
|
|
513
552
|
primary_text = asr_clean if primary_text_source == "asr_clean" else normalize_text(payload.get("desc"))
|
|
514
553
|
|
|
554
|
+
sampled_explanation = payload.get("sampled_explanation") if isinstance(payload.get("sampled_explanation"), dict) else {}
|
|
555
|
+
|
|
515
556
|
return {
|
|
516
557
|
"title": title,
|
|
517
558
|
"platform": platform,
|
|
@@ -540,6 +581,18 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
|
|
|
540
581
|
"raw_content": raw_content,
|
|
541
582
|
"primary_text": primary_text,
|
|
542
583
|
"asr_clean": asr_clean,
|
|
584
|
+
"performance_score": payload.get("performance_score"),
|
|
585
|
+
"performance_score_norm": payload.get("performance_score_norm"),
|
|
586
|
+
"bucket": normalize_text(payload.get("bucket")),
|
|
587
|
+
"hook_type": normalize_text(payload.get("hook_type")),
|
|
588
|
+
"structure_type": normalize_text(payload.get("structure_type")),
|
|
589
|
+
"cta_type": normalize_text(payload.get("cta_type")),
|
|
590
|
+
"content_form": normalize_text(payload.get("content_form")),
|
|
591
|
+
"style_markers": _safe_text_list(payload.get("style_markers")),
|
|
592
|
+
"analysis_eligibility": normalize_text(payload.get("analysis_eligibility")) or "eligible",
|
|
593
|
+
"analysis_exclusion_reason": normalize_text(payload.get("analysis_exclusion_reason")),
|
|
594
|
+
"card_role": normalize_text(payload.get("card_role")),
|
|
595
|
+
"sampled_explanation": sampled_explanation,
|
|
543
596
|
"platform_native_refs": payload.get("platform_native_refs") if isinstance(payload.get("platform_native_refs"), dict) else {},
|
|
544
597
|
"request_id": payload.get("request_id"),
|
|
545
598
|
"confidence": normalize_text(payload.get("confidence")) or "low",
|
|
@@ -552,6 +605,9 @@ def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str
|
|
|
552
605
|
"sampled_work_explanations": payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {},
|
|
553
606
|
"author_card_highlights": payload.get("author_card_highlights") if isinstance(payload.get("author_card_highlights"), dict) else {},
|
|
554
607
|
"validation": payload.get("validation") if isinstance(payload.get("validation"), dict) else {},
|
|
608
|
+
"quality_tier": normalize_text(payload.get("quality_tier")),
|
|
609
|
+
"stage_status": payload.get("stage_status") if isinstance(payload.get("stage_status"), dict) else {},
|
|
610
|
+
"sampled_work_ids": _safe_text_list(payload.get("sampled_work_ids")),
|
|
555
611
|
"business_score": _safe_int(payload.get("business_score"), default=0),
|
|
556
612
|
"benchmark_gap_score": _safe_int(payload.get("benchmark_gap_score"), default=0),
|
|
557
613
|
"style_radar": payload.get("style_radar") if isinstance(payload.get("style_radar"), dict) else {},
|
|
@@ -965,6 +1021,7 @@ def _build_output_path(
|
|
|
965
1021
|
now: dt.datetime,
|
|
966
1022
|
sample_author: Optional[str],
|
|
967
1023
|
storage_config: Optional[Dict[str, Any]],
|
|
1024
|
+
extra_route_parts: Optional[List[str]] = None,
|
|
968
1025
|
) -> Dict[str, str]:
|
|
969
1026
|
author_slug = _pick_author_slug(payload, author_hint=sample_author)
|
|
970
1027
|
title_slug = _pick_title_slug(payload)
|
|
@@ -979,6 +1036,7 @@ def _build_output_path(
|
|
|
979
1036
|
year_month=now.strftime("%Y-%m"),
|
|
980
1037
|
timestamp=now.strftime("%Y%m%d-%H%M%S"),
|
|
981
1038
|
storage_config=storage_config,
|
|
1039
|
+
extra_route_parts=extra_route_parts,
|
|
982
1040
|
)
|
|
983
1041
|
return {
|
|
984
1042
|
"path": path,
|
|
@@ -989,7 +1047,30 @@ def _build_output_path(
|
|
|
989
1047
|
}
|
|
990
1048
|
|
|
991
1049
|
|
|
992
|
-
def
|
|
1050
|
+
def _json_details_block(title: str, payload: Any) -> List[str]:
|
|
1051
|
+
return [
|
|
1052
|
+
"<details>",
|
|
1053
|
+
f"<summary>{title}</summary>",
|
|
1054
|
+
"",
|
|
1055
|
+
"```json",
|
|
1056
|
+
json.dumps(payload, ensure_ascii=False, indent=2),
|
|
1057
|
+
"```",
|
|
1058
|
+
"",
|
|
1059
|
+
"</details>",
|
|
1060
|
+
]
|
|
1061
|
+
|
|
1062
|
+
|
|
1063
|
+
def _display_list(values: Any, *, fallback: str = "数据不足") -> str:
|
|
1064
|
+
items = _safe_text_list(values)
|
|
1065
|
+
return "、".join(items) if items else fallback
|
|
1066
|
+
|
|
1067
|
+
|
|
1068
|
+
def _display_scalar(value: Any, *, fallback: str = "数据不足") -> str:
|
|
1069
|
+
text = normalize_text(value)
|
|
1070
|
+
return text or fallback
|
|
1071
|
+
|
|
1072
|
+
|
|
1073
|
+
def _render_author_card_markdown(
|
|
993
1074
|
*,
|
|
994
1075
|
card_id: str,
|
|
995
1076
|
card_type: str,
|
|
@@ -998,59 +1079,38 @@ def _render_author_markdown(
|
|
|
998
1079
|
) -> str:
|
|
999
1080
|
analysis_output = fields.get("analysis_output") if isinstance(fields.get("analysis_output"), dict) else {}
|
|
1000
1081
|
author_analysis_v2 = fields.get("author_analysis_v2") if isinstance(fields.get("author_analysis_v2"), dict) else analysis_output.get("author_analysis_v2", {})
|
|
1001
|
-
if
|
|
1002
|
-
author_analysis_v2 = {}
|
|
1003
|
-
sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
|
|
1004
|
-
if not isinstance(sampled_work_explanations, dict):
|
|
1005
|
-
sampled_work_explanations = {}
|
|
1006
|
-
author_card_highlights = fields.get("author_card_highlights") if isinstance(fields.get("author_card_highlights"), dict) else {}
|
|
1007
|
-
if not isinstance(author_card_highlights, dict):
|
|
1008
|
-
author_card_highlights = {}
|
|
1082
|
+
author_analysis_v2 = author_analysis_v2 if isinstance(author_analysis_v2, dict) else {}
|
|
1009
1083
|
validation = fields.get("validation") if isinstance(fields.get("validation"), dict) else analysis_output.get("validation", {})
|
|
1010
|
-
if
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
if not isinstance(style_radar, dict):
|
|
1017
|
-
style_radar = {}
|
|
1018
|
-
|
|
1019
|
-
core_contradictions = fields.get("core_contradictions") if isinstance(fields.get("core_contradictions"), list) else analysis_output.get("core_contradictions", [])
|
|
1020
|
-
if not isinstance(core_contradictions, list):
|
|
1021
|
-
core_contradictions = []
|
|
1084
|
+
validation = validation if isinstance(validation, dict) else {}
|
|
1085
|
+
stage_status = fields.get("stage_status") if isinstance(fields.get("stage_status"), dict) else {}
|
|
1086
|
+
stage_status = stage_status if isinstance(stage_status, dict) else {}
|
|
1087
|
+
sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
|
|
1088
|
+
sampled_work_explanations = sampled_work_explanations if isinstance(sampled_work_explanations, dict) else {}
|
|
1089
|
+
quality_tier = _display_scalar(fields.get("quality_tier"), fallback="unknown")
|
|
1022
1090
|
|
|
1023
|
-
|
|
1024
|
-
if
|
|
1025
|
-
|
|
1091
|
+
positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
|
|
1092
|
+
trust_model = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
|
|
1093
|
+
content_mechanism = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
|
|
1094
|
+
commercial_bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
|
|
1095
|
+
core_tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
|
|
1096
|
+
clone_guidance = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
|
|
1097
|
+
evidence_pack = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
|
|
1026
1098
|
|
|
1027
|
-
|
|
1028
|
-
|
|
1029
|
-
author_portrait = normalize_text(author_card_highlights.get("one_liner")) or normalize_text(fields.get("summary")) or normalize_text(analysis_output.get("author_portrait"))
|
|
1099
|
+
sampled_work_ids = _safe_text_list(fields.get("sampled_work_ids"))
|
|
1100
|
+
representative_works = _safe_text_list(evidence_pack.get("representative_works")) or sampled_work_ids[:8]
|
|
1030
1101
|
|
|
1031
1102
|
fm = {
|
|
1032
1103
|
"card_id": card_id,
|
|
1033
1104
|
"card_type": card_type,
|
|
1105
|
+
"card_role": fields.get("card_role") or AUTHOR_CARD_ROLE,
|
|
1034
1106
|
"platform": fields.get("platform"),
|
|
1035
1107
|
"generated_at": generated_at,
|
|
1036
1108
|
"updated_at": generated_at,
|
|
1037
1109
|
"title": fields.get("title"),
|
|
1038
|
-
"platform_work_id": fields.get("platform_work_id"),
|
|
1039
|
-
"author": fields.get("author"),
|
|
1040
|
-
"author_handle": fields.get("author_handle"),
|
|
1041
1110
|
"platform_author_id": fields.get("platform_author_id"),
|
|
1111
|
+
"author_handle": fields.get("author_handle"),
|
|
1042
1112
|
"nickname": fields.get("nickname"),
|
|
1043
|
-
"
|
|
1044
|
-
"avatar_url": fields.get("avatar_url"),
|
|
1045
|
-
"signature": fields.get("signature"),
|
|
1046
|
-
"fans_count": fields.get("fans_count"),
|
|
1047
|
-
"liked_count": fields.get("liked_count"),
|
|
1048
|
-
"collected_count": fields.get("collected_count"),
|
|
1049
|
-
"works_count": fields.get("works_count"),
|
|
1050
|
-
"verified": fields.get("verified"),
|
|
1051
|
-
"snapshot_at": fields.get("snapshot_at"),
|
|
1052
|
-
"business_score": business_score,
|
|
1053
|
-
"benchmark_gap_score": benchmark_gap_score,
|
|
1113
|
+
"quality_tier": quality_tier,
|
|
1054
1114
|
"request_id": fields.get("request_id"),
|
|
1055
1115
|
}
|
|
1056
1116
|
|
|
@@ -1062,91 +1122,209 @@ def _render_author_markdown(
|
|
|
1062
1122
|
lines = [
|
|
1063
1123
|
*frontmatter,
|
|
1064
1124
|
"",
|
|
1065
|
-
"##
|
|
1066
|
-
f"- 平台:{fields.get('platform')
|
|
1067
|
-
f"- 作者ID:{fields.get('platform_author_id')
|
|
1068
|
-
f"- 账号标识:{fields.get('author_handle')
|
|
1069
|
-
f"- 昵称:{fields.get('nickname') or fields.get('author')
|
|
1070
|
-
f"- IP属地:{fields.get('ip_location')
|
|
1071
|
-
f"- 签名:{fields.get('signature')
|
|
1072
|
-
f"- 头像:{fields.get('avatar_url') or 'N/A'}",
|
|
1125
|
+
"## 基础主页事实",
|
|
1126
|
+
f"- 平台:{_display_scalar(fields.get('platform'), fallback='未知')}",
|
|
1127
|
+
f"- 作者ID:{_display_scalar(fields.get('platform_author_id'), fallback='未知')}",
|
|
1128
|
+
f"- 账号标识:{_display_scalar(fields.get('author_handle'), fallback='N/A')}",
|
|
1129
|
+
f"- 昵称:{_display_scalar(fields.get('nickname') or fields.get('author'), fallback='未知')}",
|
|
1130
|
+
f"- IP属地:{_display_scalar(fields.get('ip_location'), fallback='N/A')}",
|
|
1131
|
+
f"- 签名:{_display_scalar(fields.get('signature'), fallback='N/A')}",
|
|
1073
1132
|
f"- 粉丝数:{_display_metric(fields.get('fans_count'))}",
|
|
1074
1133
|
f"- 累计获赞:{_display_metric(fields.get('liked_count'))}",
|
|
1075
1134
|
f"- 累计收藏:{_display_metric(fields.get('collected_count'))}",
|
|
1076
1135
|
f"- 作品数:{_display_metric(fields.get('works_count'))}",
|
|
1077
|
-
f"-
|
|
1078
|
-
f"- 抓取时间:{fields.get('snapshot_at') or 'N/A'}",
|
|
1136
|
+
f"- 质量档:{quality_tier}",
|
|
1079
1137
|
"",
|
|
1080
|
-
"##
|
|
1081
|
-
|
|
1138
|
+
"## 作者定位",
|
|
1139
|
+
_display_scalar(positioning.get("one_liner") or fields.get("summary")),
|
|
1140
|
+
f"- 作者类型:{_display_scalar(positioning.get('author_type'))}",
|
|
1141
|
+
f"- 主要角色:{_display_scalar(positioning.get('primary_role'))}",
|
|
1142
|
+
f"- 目标受众:{_display_scalar(positioning.get('target_audience'))}",
|
|
1143
|
+
f"- 核心问题:{_display_scalar(positioning.get('core_problem_solved'))}",
|
|
1144
|
+
f"- 核心价值:{_display_scalar(positioning.get('core_value_proposition'))}",
|
|
1082
1145
|
"",
|
|
1083
|
-
"##
|
|
1084
|
-
f"-
|
|
1085
|
-
f"-
|
|
1086
|
-
f"-
|
|
1087
|
-
f"-
|
|
1088
|
-
f"- 最大张力:{normalize_text(author_card_highlights.get('most_important_tension')) or '数据不足'}",
|
|
1089
|
-
f"- 只学一件事:{normalize_text(author_card_highlights.get('if_only_learn_one_thing')) or '数据不足'}",
|
|
1146
|
+
"## 信任模型",
|
|
1147
|
+
f"- 主要信任源:{_display_scalar(trust_model.get('primary_trust_source'))}",
|
|
1148
|
+
f"- 次级信任源:{_display_list(trust_model.get('secondary_trust_sources'))}",
|
|
1149
|
+
f"- 建立机制:{_display_list(trust_model.get('trust_building_mechanisms'))}",
|
|
1150
|
+
f"- 风险:{_display_list(trust_model.get('trust_risks'))}",
|
|
1090
1151
|
"",
|
|
1091
|
-
"##
|
|
1092
|
-
|
|
1152
|
+
"## 内容机制",
|
|
1153
|
+
f"- 世界观:{_display_scalar((author_analysis_v2.get('cognitive_engine') or {}).get('worldview'))}",
|
|
1154
|
+
f"- 推理模式:{_display_list((author_analysis_v2.get('cognitive_engine') or {}).get('reasoning_modes'))}",
|
|
1155
|
+
f"- 内容来源:{_display_list(content_mechanism.get('topic_sources'))}",
|
|
1156
|
+
f"- 内容目标:{_display_list(content_mechanism.get('topic_goals'))}",
|
|
1157
|
+
f"- 优势结构:{_display_list(content_mechanism.get('winning_content_structures'))}",
|
|
1158
|
+
f"- 流量驱动:{_display_list(content_mechanism.get('traffic_drivers'))}",
|
|
1159
|
+
f"- 主导主题:{_display_list(content_mechanism.get('dominant_themes'))}",
|
|
1093
1160
|
"",
|
|
1094
|
-
"##
|
|
1095
|
-
|
|
1161
|
+
"## 商业桥",
|
|
1162
|
+
f"- 漏斗角色:{_display_list(commercial_bridge.get('content_role_in_funnel'))}",
|
|
1163
|
+
f"- 可能产品:{_display_list(commercial_bridge.get('likely_products'), fallback='证据不足')}",
|
|
1164
|
+
f"- 转化路径:{_display_scalar(commercial_bridge.get('conversion_path'), fallback='证据不足')}",
|
|
1165
|
+
f"- 商业信号:{_display_list(commercial_bridge.get('business_model_signals'), fallback='证据不足')}",
|
|
1096
1166
|
"",
|
|
1097
|
-
"##
|
|
1098
|
-
f"-
|
|
1099
|
-
f"-
|
|
1167
|
+
"## 核心张力",
|
|
1168
|
+
f"- 最重要张力:{_display_scalar(core_tensions.get('most_important_tension'))}",
|
|
1169
|
+
f"- 张力列表:{_display_list(core_tensions.get('tensions'))}",
|
|
1100
1170
|
"",
|
|
1101
|
-
"##
|
|
1102
|
-
"
|
|
1103
|
-
|
|
1104
|
-
"
|
|
1171
|
+
"## 建议动作",
|
|
1172
|
+
f"- 可复制要素:{_display_list(clone_guidance.get('copyable_elements'))}",
|
|
1173
|
+
f"- 不可复制要素:{_display_list(clone_guidance.get('non_copyable_elements'))}",
|
|
1174
|
+
f"- 风险区:{_display_list(clone_guidance.get('danger_zones'))}",
|
|
1175
|
+
f"- 只学一件事:{_display_scalar(clone_guidance.get('if_only_learn_one_thing'))}",
|
|
1105
1176
|
"",
|
|
1106
|
-
"##
|
|
1177
|
+
"## 代表样本",
|
|
1107
1178
|
]
|
|
1108
1179
|
|
|
1109
|
-
if
|
|
1110
|
-
lines.extend([f"- {
|
|
1111
|
-
else:
|
|
1112
|
-
lines.append("- 数据不足")
|
|
1113
|
-
|
|
1114
|
-
lines.extend(["", "## 建议动作"])
|
|
1115
|
-
if recommendations:
|
|
1116
|
-
lines.extend([f"- {normalize_text(item)}" for item in recommendations if normalize_text(item)])
|
|
1180
|
+
if representative_works:
|
|
1181
|
+
lines.extend([f"- {item}" for item in representative_works[:8]])
|
|
1117
1182
|
else:
|
|
1118
1183
|
lines.append("- 数据不足")
|
|
1119
1184
|
|
|
1120
1185
|
lines.extend(
|
|
1121
1186
|
[
|
|
1122
1187
|
"",
|
|
1123
|
-
"##
|
|
1124
|
-
"
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1131
|
-
|
|
1188
|
+
"## 附录",
|
|
1189
|
+
f"- confidence: {fields.get('confidence')}",
|
|
1190
|
+
f"- error_reason: {fields.get('error_reason') or 'N/A'}",
|
|
1191
|
+
]
|
|
1192
|
+
)
|
|
1193
|
+
|
|
1194
|
+
if quality_tier == "fallback":
|
|
1195
|
+
lines.append("- note: 当前作者分析使用 fallback 结果,请优先复核正文结论。")
|
|
1196
|
+
elif quality_tier == "degraded_author_only":
|
|
1197
|
+
lines.append("- note: 批量解释缺失,作者分析基于聚合统计与样本事实完成。")
|
|
1198
|
+
|
|
1199
|
+
lines.extend([""] + _json_details_block("author_analysis_v2", author_analysis_v2))
|
|
1200
|
+
lines.extend([""] + _json_details_block("sampled_work_explanations", sampled_work_explanations))
|
|
1201
|
+
lines.extend([""] + _json_details_block("validation", validation))
|
|
1202
|
+
lines.extend([""] + _json_details_block("stage_status", stage_status))
|
|
1203
|
+
lines.extend([""] + _json_details_block("extract_trace", fields.get("extract_trace", [])))
|
|
1204
|
+
return "\n".join(lines)
|
|
1205
|
+
|
|
1206
|
+
|
|
1207
|
+
def _render_author_sample_markdown(
|
|
1208
|
+
*,
|
|
1209
|
+
card_id: str,
|
|
1210
|
+
card_type: str,
|
|
1211
|
+
fields: Dict[str, Any],
|
|
1212
|
+
generated_at: str,
|
|
1213
|
+
) -> str:
|
|
1214
|
+
card_role = normalize_text(fields.get("card_role")) or AUTHOR_SAMPLE_CARD_ROLE
|
|
1215
|
+
sampled_explanation = fields.get("sampled_explanation") if isinstance(fields.get("sampled_explanation"), dict) else {}
|
|
1216
|
+
sampled_explanation = sampled_explanation if isinstance(sampled_explanation, dict) else {}
|
|
1217
|
+
|
|
1218
|
+
fm = {
|
|
1219
|
+
"card_id": card_id,
|
|
1220
|
+
"card_type": card_type,
|
|
1221
|
+
"card_role": card_role,
|
|
1222
|
+
"platform": fields.get("platform"),
|
|
1223
|
+
"generated_at": generated_at,
|
|
1224
|
+
"updated_at": generated_at,
|
|
1225
|
+
"title": fields.get("title"),
|
|
1226
|
+
"platform_work_id": fields.get("platform_work_id"),
|
|
1227
|
+
"author": fields.get("author"),
|
|
1228
|
+
"author_handle": fields.get("author_handle"),
|
|
1229
|
+
"platform_author_id": fields.get("platform_author_id"),
|
|
1230
|
+
"share_url": fields.get("share_url"),
|
|
1231
|
+
"source_url": fields.get("source_url"),
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
frontmatter = ["---"]
|
|
1235
|
+
for key, value in fm.items():
|
|
1236
|
+
frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
|
|
1237
|
+
frontmatter.append("---")
|
|
1238
|
+
|
|
1239
|
+
metrics_line = (
|
|
1240
|
+
f"赞 {_display_metric(fields.get('digg_count'))} / 评 {_display_metric(fields.get('comment_count'))} / "
|
|
1241
|
+
f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
|
|
1242
|
+
)
|
|
1243
|
+
|
|
1244
|
+
lines = [
|
|
1245
|
+
*frontmatter,
|
|
1246
|
+
"",
|
|
1247
|
+
"## 基础信息",
|
|
1248
|
+
f"- 作者:{_display_scalar(fields.get('author') or fields.get('author_handle') or fields.get('platform_author_id'), fallback='未知作者')}",
|
|
1249
|
+
f"- 标题:{_display_scalar(fields.get('title'), fallback='(标题缺失)')}",
|
|
1250
|
+
f"- 原始文案:{_display_scalar(fields.get('caption_raw'), fallback='N/A')}",
|
|
1251
|
+
f"- 作品模态:{_display_scalar(fields.get('work_modality'), fallback='未知')}",
|
|
1252
|
+
f"- 发布时间:{_display_scalar(fields.get('published_date'), fallback='N/A')}",
|
|
1253
|
+
f"- 时长:{_format_duration(_safe_int(fields.get('duration_ms'), default=0)) if _safe_int(fields.get('duration_ms'), default=0) > 0 else 'N/A'}",
|
|
1254
|
+
f"- 互动:{metrics_line}",
|
|
1255
|
+
f"- 标签:{_display_list(fields.get('tags'), fallback='无')}",
|
|
1256
|
+
f"- 链接:{_display_scalar(fields.get('share_url'), fallback='(未提供)')}",
|
|
1257
|
+
"",
|
|
1258
|
+
"## 表现与结构",
|
|
1259
|
+
f"- performance_score:{fields.get('performance_score') if fields.get('performance_score') is not None else 'N/A'}",
|
|
1260
|
+
f"- performance_score_norm:{fields.get('performance_score_norm') if fields.get('performance_score_norm') is not None else 'N/A'}",
|
|
1261
|
+
f"- bucket:{_display_scalar(fields.get('bucket'), fallback='unknown')}",
|
|
1262
|
+
f"- hook_type:{_display_scalar(fields.get('hook_type'), fallback='unknown')}",
|
|
1263
|
+
f"- structure_type:{_display_scalar(fields.get('structure_type'), fallback='unknown')}",
|
|
1264
|
+
f"- cta_type:{_display_scalar(fields.get('cta_type'), fallback='unknown')}",
|
|
1265
|
+
f"- content_form:{_display_scalar(fields.get('content_form'), fallback='unknown')}",
|
|
1266
|
+
f"- style_markers:{_display_list(fields.get('style_markers'), fallback='未命中显著标记')}",
|
|
1267
|
+
]
|
|
1268
|
+
|
|
1269
|
+
precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
|
|
1270
|
+
modules = precomputed_sections.get("modules") if isinstance(precomputed_sections.get("modules"), dict) else {}
|
|
1271
|
+
for heading in DEFAULT_MODULE_SECTIONS:
|
|
1272
|
+
lines.append("")
|
|
1273
|
+
lines.append(f"## {heading}")
|
|
1274
|
+
for item in modules.get(heading, ["数据不足"]):
|
|
1275
|
+
lines.append(_display_scalar(item))
|
|
1276
|
+
|
|
1277
|
+
if card_role == SAMPLE_WORK_CARD_ROLE:
|
|
1278
|
+
lines.extend(
|
|
1279
|
+
[
|
|
1280
|
+
"",
|
|
1281
|
+
"## 批量解释",
|
|
1282
|
+
f"- why_it_worked_or_failed:{_display_scalar(sampled_explanation.get('why_it_worked_or_failed'), fallback='批量解释未生成')}",
|
|
1283
|
+
f"- copyable_elements:{_display_list(sampled_explanation.get('copyable_elements'), fallback='批量解释未生成')}",
|
|
1284
|
+
f"- non_copyable_elements:{_display_list(sampled_explanation.get('non_copyable_elements'), fallback='批量解释未生成')}",
|
|
1285
|
+
f"- emotional_triggers:{_display_list(sampled_explanation.get('emotional_triggers'), fallback='批量解释未生成')}",
|
|
1286
|
+
f"- cognitive_gap:{_display_scalar(sampled_explanation.get('cognitive_gap'), fallback='批量解释未生成')}",
|
|
1287
|
+
f"- commercial_signal:{_display_scalar(sampled_explanation.get('commercial_signal'), fallback='批量解释未生成')}",
|
|
1288
|
+
]
|
|
1289
|
+
)
|
|
1290
|
+
|
|
1291
|
+
lines.extend(
|
|
1292
|
+
[
|
|
1132
1293
|
"",
|
|
1133
|
-
"##
|
|
1134
|
-
|
|
1135
|
-
f"- validation_error_count: {len(validation.get('errors') or [])}",
|
|
1294
|
+
"## 主文本",
|
|
1295
|
+
_display_scalar(fields.get("primary_text"), fallback="(无可用主文本)"),
|
|
1136
1296
|
"",
|
|
1137
1297
|
"## 附录",
|
|
1138
|
-
f"-
|
|
1139
|
-
f"-
|
|
1298
|
+
f"- analysis_eligibility: {_display_scalar(fields.get('analysis_eligibility'), fallback='unknown')}",
|
|
1299
|
+
f"- analysis_exclusion_reason: {_display_scalar(fields.get('analysis_exclusion_reason'), fallback='N/A')}",
|
|
1300
|
+
f"- request_id: {_display_scalar(fields.get('request_id'), fallback='N/A')}",
|
|
1301
|
+
f"- confidence: {_display_scalar(fields.get('confidence'), fallback='low')}",
|
|
1302
|
+
f"- error_reason: {_display_scalar(fields.get('error_reason'), fallback='N/A')}",
|
|
1140
1303
|
"",
|
|
1141
|
-
"
|
|
1142
|
-
|
|
1143
|
-
"```",
|
|
1304
|
+
"### ASR_RAW",
|
|
1305
|
+
_display_scalar(fields.get("raw_content"), fallback="(无可用 ASR 原文)"),
|
|
1144
1306
|
"",
|
|
1145
1307
|
]
|
|
1146
1308
|
)
|
|
1309
|
+
lines.extend(_json_details_block("extract_trace", fields.get("extract_trace", [])))
|
|
1147
1310
|
return "\n".join(lines)
|
|
1148
1311
|
|
|
1149
1312
|
|
|
1313
|
+
def _render_author_markdown(
|
|
1314
|
+
*,
|
|
1315
|
+
card_id: str,
|
|
1316
|
+
card_type: str,
|
|
1317
|
+
fields: Dict[str, Any],
|
|
1318
|
+
generated_at: str,
|
|
1319
|
+
) -> str:
|
|
1320
|
+
return _render_author_card_markdown(
|
|
1321
|
+
card_id=card_id,
|
|
1322
|
+
card_type=card_type,
|
|
1323
|
+
fields=fields,
|
|
1324
|
+
generated_at=generated_at,
|
|
1325
|
+
)
|
|
1326
|
+
|
|
1327
|
+
|
|
1150
1328
|
def _render_markdown(
|
|
1151
1329
|
*,
|
|
1152
1330
|
card_id: str,
|
|
@@ -1161,6 +1339,14 @@ def _render_markdown(
|
|
|
1161
1339
|
fields=fields,
|
|
1162
1340
|
generated_at=generated_at,
|
|
1163
1341
|
)
|
|
1342
|
+
if card_type == "author_sample_work":
|
|
1343
|
+
return _render_author_sample_markdown(
|
|
1344
|
+
card_id=card_id,
|
|
1345
|
+
card_type=card_type,
|
|
1346
|
+
fields=fields,
|
|
1347
|
+
generated_at=generated_at,
|
|
1348
|
+
)
|
|
1349
|
+
|
|
1164
1350
|
author_name = fields.get("author") or fields.get("author_handle") or fields.get("platform_author_id") or "未知作者"
|
|
1165
1351
|
title = fields.get("title") or "(标题缺失)"
|
|
1166
1352
|
metrics_line = (
|
|
@@ -1168,10 +1354,7 @@ def _render_markdown(
|
|
|
1168
1354
|
f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
|
|
1169
1355
|
)
|
|
1170
1356
|
precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
|
|
1171
|
-
|
|
1172
|
-
analysis_sections = precomputed_sections
|
|
1173
|
-
else:
|
|
1174
|
-
analysis_sections = {} if card_type == "author_sample_work" else build_analysis_sections(fields)
|
|
1357
|
+
analysis_sections = precomputed_sections or build_analysis_sections(fields)
|
|
1175
1358
|
creative_modules = analysis_sections.get("modules", {})
|
|
1176
1359
|
insight_lines = analysis_sections.get("insight", ["数据不足"])
|
|
1177
1360
|
extract_trace_json = json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2)
|
|
@@ -1187,21 +1370,6 @@ def _render_markdown(
|
|
|
1187
1370
|
"author": fields.get("author"),
|
|
1188
1371
|
"author_handle": fields.get("author_handle"),
|
|
1189
1372
|
"platform_author_id": fields.get("platform_author_id"),
|
|
1190
|
-
"caption_raw": fields.get("caption_raw"),
|
|
1191
|
-
"primary_text": fields.get("primary_text"),
|
|
1192
|
-
"share_url": fields.get("share_url"),
|
|
1193
|
-
"source_url": fields.get("source_url"),
|
|
1194
|
-
"cover_image": fields.get("cover_image"),
|
|
1195
|
-
"video_download_url": fields.get("video_download_url"),
|
|
1196
|
-
"published_date": fields.get("published_date"),
|
|
1197
|
-
"duration_ms": fields.get("duration_ms"),
|
|
1198
|
-
"digg_count": fields.get("digg_count"),
|
|
1199
|
-
"comment_count": fields.get("comment_count"),
|
|
1200
|
-
"collect_count": fields.get("collect_count"),
|
|
1201
|
-
"share_count": fields.get("share_count"),
|
|
1202
|
-
"play_count": fields.get("play_count"),
|
|
1203
|
-
"tags": fields.get("tags", []),
|
|
1204
|
-
"work_modality": fields.get("work_modality"),
|
|
1205
1373
|
}
|
|
1206
1374
|
|
|
1207
1375
|
frontmatter = ["---"]
|
|
@@ -1218,10 +1386,8 @@ def _render_markdown(
|
|
|
1218
1386
|
f"- 原始文案:{fields.get('caption_raw') or 'N/A'}",
|
|
1219
1387
|
f"- 作品模态:{fields.get('work_modality') or '未知'}",
|
|
1220
1388
|
f"- 发布时间:{fields.get('published_date') or 'N/A'}",
|
|
1221
|
-
f"- {'视频时长' if fields.get('work_modality') == 'video' else '阅读载体'}:{_format_duration(fields.get('duration_ms', 0)) if fields.get('work_modality') == 'video' else '文本'}",
|
|
1222
1389
|
f"- 互动:{metrics_line}",
|
|
1223
1390
|
f"- 链接:{fields.get('share_url') or '(未提供)'}",
|
|
1224
|
-
f"- 下载链接:{fields.get('video_download_url') or 'N/A'}" if fields.get("work_modality") == "video" else "- 下载链接:N/A",
|
|
1225
1391
|
]
|
|
1226
1392
|
|
|
1227
1393
|
for heading in DEFAULT_MODULE_SECTIONS:
|
|
@@ -1235,20 +1401,11 @@ def _render_markdown(
|
|
|
1235
1401
|
for item in insight_lines:
|
|
1236
1402
|
lines.append(item)
|
|
1237
1403
|
|
|
1238
|
-
transcript_heading = "## 主文本"
|
|
1239
|
-
transcript_body = fields.get("primary_text")
|
|
1240
|
-
transcript_fallback = "(无可用主文本)"
|
|
1241
|
-
|
|
1242
1404
|
lines.extend(
|
|
1243
1405
|
[
|
|
1244
1406
|
"",
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
]
|
|
1248
|
-
)
|
|
1249
|
-
|
|
1250
|
-
lines.extend(
|
|
1251
|
-
[
|
|
1407
|
+
"## 主文本",
|
|
1408
|
+
fields.get("primary_text") or "(无可用主文本)",
|
|
1252
1409
|
"",
|
|
1253
1410
|
"## 附录",
|
|
1254
1411
|
"### ASR_RAW",
|
|
@@ -1299,6 +1456,9 @@ def write_benchmark_card(
|
|
|
1299
1456
|
content_kind: Optional[str] = None,
|
|
1300
1457
|
storage_config: Optional[Dict[str, Any]] = None,
|
|
1301
1458
|
force_card_type: bool = False,
|
|
1459
|
+
route_card_type: Optional[str] = None,
|
|
1460
|
+
route_extra_parts: Optional[List[str]] = None,
|
|
1461
|
+
card_role: Optional[str] = None,
|
|
1302
1462
|
) -> Dict[str, Any]:
|
|
1303
1463
|
now = dt.datetime.now()
|
|
1304
1464
|
generated_at = now.isoformat(timespec="seconds")
|
|
@@ -1313,17 +1473,30 @@ def write_benchmark_card(
|
|
|
1313
1473
|
storage_config=storage_config,
|
|
1314
1474
|
force_card_type=force_card_type,
|
|
1315
1475
|
)
|
|
1316
|
-
|
|
1476
|
+
effective_route_card_type = normalize_card_type(route_card_type) if route_card_type else effective_card_type
|
|
1477
|
+
effective_card_role = normalize_text(card_role) or normalize_text(payload.get("card_role"))
|
|
1478
|
+
explicit_route_override = bool(route_card_type or route_extra_parts)
|
|
1479
|
+
|
|
1480
|
+
payload_for_fields = dict(payload)
|
|
1481
|
+
if effective_card_role:
|
|
1482
|
+
payload_for_fields["card_role"] = effective_card_role
|
|
1483
|
+
fields = _extract_required_fields(payload_for_fields, platform=platform)
|
|
1484
|
+
if not fields.get("card_role"):
|
|
1485
|
+
if effective_card_type == "author":
|
|
1486
|
+
fields["card_role"] = AUTHOR_CARD_ROLE
|
|
1487
|
+
elif effective_card_type == "author_sample_work":
|
|
1488
|
+
fields["card_role"] = AUTHOR_SAMPLE_CARD_ROLE
|
|
1317
1489
|
resolved_card_root = _resolve_card_root(card_root)
|
|
1318
1490
|
|
|
1319
1491
|
primary_target = _build_output_path(
|
|
1320
1492
|
card_root=resolved_card_root,
|
|
1321
1493
|
platform=platform,
|
|
1322
|
-
card_type=
|
|
1494
|
+
card_type=effective_route_card_type,
|
|
1323
1495
|
payload=payload,
|
|
1324
1496
|
now=now,
|
|
1325
1497
|
sample_author=sample_author,
|
|
1326
1498
|
storage_config=storage_config,
|
|
1499
|
+
extra_route_parts=route_extra_parts,
|
|
1327
1500
|
)
|
|
1328
1501
|
primary_path = primary_target["path"]
|
|
1329
1502
|
|
|
@@ -1340,12 +1513,16 @@ def write_benchmark_card(
|
|
|
1340
1513
|
"ok": True,
|
|
1341
1514
|
"platform": platform,
|
|
1342
1515
|
"card_type": effective_card_type,
|
|
1516
|
+
"card_role": fields.get("card_role"),
|
|
1343
1517
|
"requested_card_type": normalized_card_type,
|
|
1344
1518
|
"force_card_type": bool(force_card_type),
|
|
1345
1519
|
"content_kind": resolved_content_kind or None,
|
|
1346
1520
|
"primary_card_path": primary_path,
|
|
1347
1521
|
"routing": {
|
|
1522
|
+
"card_role": fields.get("card_role"),
|
|
1523
|
+
"route_key": effective_route_card_type,
|
|
1348
1524
|
"primary_route_parts": primary_target["route_parts"],
|
|
1525
|
+
"explicit_override": explicit_route_override,
|
|
1349
1526
|
"storage_routes_configured": bool(isinstance(storage_config, dict) and isinstance(storage_config.get("storage_routes"), dict)),
|
|
1350
1527
|
},
|
|
1351
1528
|
"required_fields": fields,
|