@tikomni/skills 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/skills/single-work-analysis/env.example +3 -3
- package/skills/single-work-analysis/references/config-templates/defaults.yaml +8 -19
- package/skills/single-work-analysis/references/prompt-contracts/{insight.md → analysis-bundle.md} +43 -8
- package/skills/single-work-analysis/scripts/core/analysis_adapter.py +384 -0
- package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +399 -76
- package/skills/single-work-analysis/scripts/core/config_loader.py +18 -42
- package/skills/single-work-analysis/scripts/core/progress_report.py +163 -16
- package/skills/single-work-analysis/scripts/core/storage_router.py +24 -57
- package/skills/single-work-analysis/scripts/core/tikomni_common.py +13 -3
- package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +154 -7
- package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +3 -1
- package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +243 -44
- package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +263 -25
- package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +244 -894
- package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
|
@@ -1,24 +1,32 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""Shared analysis
|
|
2
|
+
"""Shared analysis helpers for single-work benchmark card generation."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
3
5
|
|
|
4
6
|
import json
|
|
5
7
|
import os
|
|
6
8
|
import re
|
|
7
|
-
import
|
|
9
|
+
import time
|
|
8
10
|
from typing import Any, Dict, List, Optional
|
|
9
11
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
}
|
|
12
|
+
from scripts.core.analysis_adapter import (
|
|
13
|
+
resolve_analysis_timeout,
|
|
14
|
+
resolve_preferred_provider,
|
|
15
|
+
run_structured_analysis,
|
|
16
|
+
)
|
|
17
|
+
from scripts.core.progress_report import ProgressReporter
|
|
18
|
+
from scripts.core.tikomni_common import normalize_text
|
|
19
|
+
|
|
19
20
|
|
|
21
|
+
ANALYSIS_PROMPT_FILE = "analysis-bundle.md"
|
|
20
22
|
DEFAULT_MODULE_SECTIONS: List[str] = ["选题", "文风", "Hook", "结构"]
|
|
21
23
|
DEFAULT_INSIGHT_SECTION = "洞察分析"
|
|
24
|
+
SECTION_FIELD_MAP = {
|
|
25
|
+
"选题": "topic",
|
|
26
|
+
"文风": "style",
|
|
27
|
+
"Hook": "hook",
|
|
28
|
+
"结构": "structure",
|
|
29
|
+
}
|
|
22
30
|
|
|
23
31
|
|
|
24
32
|
def contracts_dir() -> str:
|
|
@@ -40,12 +48,78 @@ def load_contract_prompt(filename: str) -> str:
|
|
|
40
48
|
return content.strip()
|
|
41
49
|
|
|
42
50
|
|
|
51
|
+
def _normalize_lines(value: Any) -> List[str]:
|
|
52
|
+
if isinstance(value, list):
|
|
53
|
+
return [normalize_text(item) for item in value if normalize_text(item)]
|
|
54
|
+
text = normalize_text(value)
|
|
55
|
+
return [text] if text else []
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _safe_int(value: Any, default: int = 0) -> int:
|
|
59
|
+
if value is None:
|
|
60
|
+
return default
|
|
61
|
+
if isinstance(value, bool):
|
|
62
|
+
return int(value)
|
|
63
|
+
if isinstance(value, int):
|
|
64
|
+
return value
|
|
65
|
+
if isinstance(value, float):
|
|
66
|
+
return int(value)
|
|
67
|
+
text = str(value).strip()
|
|
68
|
+
if not text:
|
|
69
|
+
return default
|
|
70
|
+
try:
|
|
71
|
+
return int(float(text))
|
|
72
|
+
except Exception:
|
|
73
|
+
return default
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _sentence_units(text: str) -> List[str]:
|
|
77
|
+
return [normalize_text(part) for part in re.split(r"[。!?!?;;\n]+", normalize_text(text)) if normalize_text(part)]
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _first_sentence(text: str) -> str:
|
|
81
|
+
units = _sentence_units(text)
|
|
82
|
+
return units[0] if units else ""
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _hit_count(text: str, keywords: List[str]) -> int:
|
|
86
|
+
base = normalize_text(text)
|
|
87
|
+
if not base:
|
|
88
|
+
return 0
|
|
89
|
+
return sum(1 for token in keywords if token in base)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _top_keywords(text: str, candidates: List[str], topn: int = 3) -> List[str]:
|
|
93
|
+
base = normalize_text(text)
|
|
94
|
+
if not base:
|
|
95
|
+
return []
|
|
96
|
+
scored = []
|
|
97
|
+
for token in candidates:
|
|
98
|
+
count = base.count(token)
|
|
99
|
+
if count > 0:
|
|
100
|
+
scored.append((count, token))
|
|
101
|
+
scored.sort(key=lambda item: (-item[0], len(item[1])))
|
|
102
|
+
return [token for _, token in scored[:topn]]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _score_from_hits(hits: int, full_score_hits: int = 4) -> int:
|
|
106
|
+
if hits <= 0:
|
|
107
|
+
return 2
|
|
108
|
+
if hits >= full_score_hits:
|
|
109
|
+
return 5
|
|
110
|
+
return min(5, hits + 2)
|
|
111
|
+
|
|
112
|
+
|
|
43
113
|
def _analysis_payload(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
44
|
-
asr_raw = fields.get("raw_content")
|
|
114
|
+
asr_raw = normalize_text(fields.get("asr_raw") or fields.get("raw_content"))
|
|
45
115
|
return {
|
|
46
|
-
"
|
|
116
|
+
"platform": normalize_text(fields.get("platform")),
|
|
117
|
+
"title": normalize_text(fields.get("title")),
|
|
118
|
+
"caption_raw": normalize_text(fields.get("caption_raw")),
|
|
47
119
|
"asr_raw": asr_raw,
|
|
48
|
-
"asr_clean": fields.get("asr_clean") or asr_raw,
|
|
120
|
+
"asr_clean": normalize_text(fields.get("asr_clean")) or asr_raw,
|
|
121
|
+
"work_modality": normalize_text(fields.get("work_modality")),
|
|
122
|
+
"tags": fields.get("tags") if isinstance(fields.get("tags"), list) else [],
|
|
49
123
|
"metrics": {
|
|
50
124
|
"digg_count": fields.get("digg_count"),
|
|
51
125
|
"comment_count": fields.get("comment_count"),
|
|
@@ -56,78 +130,327 @@ def _analysis_payload(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
56
130
|
}
|
|
57
131
|
|
|
58
132
|
|
|
59
|
-
def
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
|
|
133
|
+
def _analyze_topic(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
134
|
+
title = normalize_text(fields.get("title"))
|
|
135
|
+
asr = normalize_text(fields.get("asr_clean"))
|
|
136
|
+
category = normalize_text(fields.get("category"))
|
|
137
|
+
text = f"{title} {asr}".strip()
|
|
138
|
+
if not text:
|
|
139
|
+
return {
|
|
140
|
+
"lines": ["- 类型:数据不足。", "- 细分主题:数据不足。", "- 受众痛点:数据不足。"],
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
type_rules = {
|
|
144
|
+
"流量型": ["热点", "挑战", "反转", "揭秘", "真相", "别再", "为什么", "踩坑"],
|
|
145
|
+
"人设型": ["我是", "我们", "日常", "分享", "经历", "成长", "复盘", "带你"],
|
|
146
|
+
"营销型": ["领取", "私信", "咨询", "下单", "课程", "优惠", "报名", "合作"],
|
|
147
|
+
}
|
|
148
|
+
type_scores = {name: _hit_count(text, rules) for name, rules in type_rules.items()}
|
|
149
|
+
if category in {"教程", "知识", "方法"}:
|
|
150
|
+
type_scores["营销型"] += 1
|
|
151
|
+
if category in {"观点", "人设", "日常"}:
|
|
152
|
+
type_scores["人设型"] += 1
|
|
153
|
+
|
|
154
|
+
main_type = max(type_scores, key=lambda key: type_scores[key])
|
|
155
|
+
themes = _top_keywords(
|
|
156
|
+
text,
|
|
157
|
+
["AI", "智能体", "变现", "副业", "教程", "工作流", "流量", "涨粉", "口播", "创业", "营销"],
|
|
158
|
+
topn=3,
|
|
74
159
|
)
|
|
160
|
+
pains = _top_keywords(text, ["不会", "焦虑", "卡住", "没流量", "转化", "时间不够", "风险"], topn=2)
|
|
161
|
+
return {
|
|
162
|
+
"lines": [
|
|
163
|
+
f"- 基础类型:{main_type}(命中信号 {type_scores[main_type]} 个)。",
|
|
164
|
+
f"- 细分主题:{'、'.join(themes) if themes else '数据不足'}。",
|
|
165
|
+
f"- 受众痛点:{'、'.join(pains) if pains else '以快速落地/降低门槛为主'}。",
|
|
166
|
+
],
|
|
167
|
+
}
|
|
75
168
|
|
|
76
|
-
try:
|
|
77
|
-
run = subprocess.run(
|
|
78
|
-
["openclaw", "agent", "--agent", "main", "--message", message, "--json"],
|
|
79
|
-
capture_output=True,
|
|
80
|
-
text=True,
|
|
81
|
-
timeout=timeout_sec,
|
|
82
|
-
check=False,
|
|
83
|
-
)
|
|
84
|
-
data = json.loads(run.stdout or "{}")
|
|
85
|
-
texts: List[str] = []
|
|
86
|
-
for payload in data.get("result", {}).get("payloads", []):
|
|
87
|
-
text = payload.get("text") if isinstance(payload, dict) else None
|
|
88
|
-
if isinstance(text, str) and text.strip():
|
|
89
|
-
texts.append(text.strip())
|
|
90
|
-
if texts:
|
|
91
|
-
return "\n".join(texts).strip()
|
|
92
|
-
except Exception:
|
|
93
|
-
pass
|
|
94
169
|
|
|
95
|
-
|
|
170
|
+
def _analyze_style(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
171
|
+
asr = normalize_text(fields.get("asr_clean"))
|
|
172
|
+
title = normalize_text(fields.get("title"))
|
|
173
|
+
text = f"{title} {asr}".strip()
|
|
174
|
+
units = _sentence_units(asr)
|
|
175
|
+
if not text:
|
|
176
|
+
return {
|
|
177
|
+
"lines": ["- 句式结构:数据不足。", "- 语气分布:数据不足。", "- 人设与修辞:数据不足。"],
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
avg_len = int(sum(len(unit) for unit in units) / max(1, len(units))) if units else 0
|
|
181
|
+
if avg_len <= 14:
|
|
182
|
+
length_type = "短句为主"
|
|
183
|
+
elif avg_len <= 24:
|
|
184
|
+
length_type = "中短句混合"
|
|
185
|
+
else:
|
|
186
|
+
length_type = "中长句为主"
|
|
187
|
+
q_count = text.count("?") + text.count("?")
|
|
188
|
+
e_count = text.count("!") + text.count("!")
|
|
189
|
+
statement_count = max(0, len(units) - q_count - e_count)
|
|
190
|
+
persona_hits = _hit_count(text, ["我", "我们", "你", "大家", "朋友们"])
|
|
191
|
+
rhetoric_hits = _hit_count(text, ["不是", "而是", "其实", "真的", "一定", "必须", "先", "再"])
|
|
192
|
+
return {
|
|
193
|
+
"lines": [
|
|
194
|
+
f"- 句式结构:{length_type},平均句长约 {avg_len} 字。",
|
|
195
|
+
f"- 语气分布:疑问 {q_count} / 感叹 {e_count} / 陈述 {statement_count}。",
|
|
196
|
+
f"- 人设与修辞:人设代词命中 {persona_hits} 次,强调/转折词命中 {rhetoric_hits} 次。",
|
|
197
|
+
],
|
|
198
|
+
}
|
|
199
|
+
|
|
96
200
|
|
|
201
|
+
def _analyze_hook(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
202
|
+
title = normalize_text(fields.get("title"))
|
|
203
|
+
asr = normalize_text(fields.get("asr_clean"))
|
|
204
|
+
units = _sentence_units(asr)
|
|
205
|
+
first = _first_sentence(asr) or title
|
|
206
|
+
middle = units[len(units) // 2] if units else ""
|
|
207
|
+
if not first:
|
|
208
|
+
return {
|
|
209
|
+
"lines": ["- 开头钩子:数据不足。", "- 中段钩子:数据不足。", "- 结尾钩子:数据不足。"],
|
|
210
|
+
}
|
|
97
211
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
212
|
+
hook_type = "陈述式"
|
|
213
|
+
if any(token in first for token in ["?", "?", "为什么", "怎么"]):
|
|
214
|
+
hook_type = "疑问式"
|
|
215
|
+
elif any(token in first for token in ["别再", "误区", "真相", "不是"]):
|
|
216
|
+
hook_type = "反常识式"
|
|
217
|
+
elif any(token in first for token in ["当你", "如果", "今天"]):
|
|
218
|
+
hook_type = "场景代入式"
|
|
219
|
+
end_candidates = [
|
|
220
|
+
unit for unit in units if _hit_count(unit, ["关注", "评论", "私信", "收藏", "转发", "领取"]) > 0
|
|
221
|
+
]
|
|
222
|
+
end = end_candidates[-1] if end_candidates else "未检测到明确结尾钩子"
|
|
223
|
+
return {
|
|
224
|
+
"lines": [
|
|
225
|
+
f"- 开头钩子({hook_type}):{first}",
|
|
226
|
+
f"- 中段钩子:{middle or '数据不足'}",
|
|
227
|
+
f"- 结尾钩子:{end}",
|
|
228
|
+
],
|
|
229
|
+
}
|
|
106
230
|
|
|
107
|
-
for section in picked_sections:
|
|
108
|
-
prompt_text = load_contract_prompt(mapping.get(section, ""))
|
|
109
|
-
content = call_prompt_llm(section, prompt_text, fields)
|
|
110
|
-
outputs[section] = [content if content else "数据不足"]
|
|
111
231
|
|
|
112
|
-
|
|
232
|
+
def _analyze_structure(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
233
|
+
units = _sentence_units(fields.get("asr_clean"))
|
|
234
|
+
if not units:
|
|
235
|
+
return {
|
|
236
|
+
"lines": ["- 结构标签:数据不足。", "- 模板判定:数据不足。", "- 缺失模块:数据不足。"],
|
|
237
|
+
}
|
|
113
238
|
|
|
239
|
+
label_rules = {
|
|
240
|
+
"钩子": ["?", "?", "为什么", "怎么", "别再", "真相", "当你", "如果"],
|
|
241
|
+
"冲突": ["但是", "却", "问题", "误区", "卡住", "焦虑", "失败"],
|
|
242
|
+
"转折": ["所以", "于是", "然后", "接着", "这时候", "其实"],
|
|
243
|
+
"举证": ["数据", "案例", "比如", "步骤", "第一", "第二", "第三"],
|
|
244
|
+
"CTA": ["评论", "关注", "私信", "收藏", "转发", "领取"],
|
|
245
|
+
}
|
|
246
|
+
coverage = {label: 0 for label in label_rules}
|
|
247
|
+
for sentence in units:
|
|
248
|
+
for label, keywords in label_rules.items():
|
|
249
|
+
if any(keyword in sentence for keyword in keywords):
|
|
250
|
+
coverage[label] += 1
|
|
114
251
|
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
252
|
+
present = [label for label, count in coverage.items() if count > 0]
|
|
253
|
+
missing = [label for label, count in coverage.items() if count == 0]
|
|
254
|
+
template = "钩子→冲突→转折→举证→CTA" if len(present) >= 4 else "钩子→观点→补充说明"
|
|
255
|
+
return {
|
|
256
|
+
"lines": [
|
|
257
|
+
f"- 结构标签覆盖:{', '.join([f'{label}:{count}' for label, count in coverage.items()])}。",
|
|
258
|
+
f"- 模板判定:{template}。",
|
|
259
|
+
f"- 缺失模块:{'、'.join(missing) if missing else '无'}。",
|
|
260
|
+
],
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def _insight_metric_snapshot(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
265
|
+
digg = _safe_int(fields.get("digg_count"), default=0)
|
|
266
|
+
comment = _safe_int(fields.get("comment_count"), default=0)
|
|
267
|
+
collect = _safe_int(fields.get("collect_count"), default=0)
|
|
268
|
+
share = _safe_int(fields.get("share_count"), default=0)
|
|
269
|
+
play = _safe_int(fields.get("play_count"), default=0)
|
|
270
|
+
interaction = digg + comment * 2 + collect * 3 + share * 4
|
|
271
|
+
interaction_rate = interaction / play if play > 0 else 0.0
|
|
272
|
+
return {
|
|
273
|
+
"interaction": interaction,
|
|
274
|
+
"interaction_rate": interaction_rate,
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
def _build_local_sections(fields: Dict[str, Any]) -> Dict[str, Any]:
|
|
279
|
+
topic = _analyze_topic(fields)
|
|
280
|
+
style = _analyze_style(fields)
|
|
281
|
+
hook = _analyze_hook(fields)
|
|
282
|
+
structure = _analyze_structure(fields)
|
|
283
|
+
metrics = _insight_metric_snapshot(fields)
|
|
284
|
+
|
|
285
|
+
strongest_signals = []
|
|
286
|
+
for section_name, section_payload in {
|
|
287
|
+
"选题": topic,
|
|
288
|
+
"文风": style,
|
|
289
|
+
"Hook": hook,
|
|
290
|
+
"结构": structure,
|
|
291
|
+
}.items():
|
|
292
|
+
first_line = _normalize_lines(section_payload.get("lines"))
|
|
293
|
+
if first_line:
|
|
294
|
+
strongest_signals.append(f"- {section_name}:{first_line[0].lstrip('- ').strip()}")
|
|
295
|
+
|
|
296
|
+
insight = strongest_signals[:3]
|
|
297
|
+
insight.extend(
|
|
298
|
+
[
|
|
299
|
+
f"- 互动折算值:{metrics.get('interaction', 0)}。",
|
|
300
|
+
f"- 粗略互动率:{metrics.get('interaction_rate', 0.0):.4f}。",
|
|
301
|
+
]
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
return {
|
|
305
|
+
"modules": {
|
|
306
|
+
"选题": _normalize_lines(topic.get("lines")) or ["数据不足"],
|
|
307
|
+
"文风": _normalize_lines(style.get("lines")) or ["数据不足"],
|
|
308
|
+
"Hook": _normalize_lines(hook.get("lines")) or ["数据不足"],
|
|
309
|
+
"结构": _normalize_lines(structure.get("lines")) or ["数据不足"],
|
|
310
|
+
},
|
|
311
|
+
"insight": insight or ["数据不足"],
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def _normalize_llm_sections(payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
316
|
+
modules: Dict[str, List[str]] = {}
|
|
317
|
+
for section_name, field_name in SECTION_FIELD_MAP.items():
|
|
318
|
+
lines = _normalize_lines(payload.get(field_name))
|
|
319
|
+
modules[section_name] = lines or ["数据不足"]
|
|
320
|
+
insight = _normalize_lines(payload.get("insight")) or ["数据不足"]
|
|
130
321
|
return {
|
|
131
322
|
"modules": modules,
|
|
132
323
|
"insight": insight,
|
|
133
324
|
}
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _analysis_status_from_reason(reason: str) -> str:
|
|
328
|
+
if not reason:
|
|
329
|
+
return "completed"
|
|
330
|
+
if "timeout" in reason:
|
|
331
|
+
return "timeout"
|
|
332
|
+
if "unavailable" in reason:
|
|
333
|
+
return "unavailable"
|
|
334
|
+
if reason == "analysis_mode_local":
|
|
335
|
+
return "skipped"
|
|
336
|
+
return "failed"
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def ensure_analysis_sections_schema(
|
|
340
|
+
payload: Optional[Dict[str, Any]],
|
|
341
|
+
*,
|
|
342
|
+
provider: str = "local",
|
|
343
|
+
llm_used: bool = False,
|
|
344
|
+
degraded: bool = False,
|
|
345
|
+
reason: str = "",
|
|
346
|
+
duration_ms: int = 0,
|
|
347
|
+
) -> Dict[str, Any]:
|
|
348
|
+
source = payload if isinstance(payload, dict) else {}
|
|
349
|
+
modules_raw = source.get("modules") if isinstance(source.get("modules"), dict) else {}
|
|
350
|
+
modules = {
|
|
351
|
+
section: _normalize_lines(modules_raw.get(section)) or ["数据不足"]
|
|
352
|
+
for section in DEFAULT_MODULE_SECTIONS
|
|
353
|
+
}
|
|
354
|
+
insight = _normalize_lines(source.get("insight")) or ["数据不足"]
|
|
355
|
+
return {
|
|
356
|
+
"version": "v2",
|
|
357
|
+
"provider": provider,
|
|
358
|
+
"modules": modules,
|
|
359
|
+
"insight": insight,
|
|
360
|
+
"meta": {
|
|
361
|
+
"llm_used": bool(llm_used),
|
|
362
|
+
"degraded": bool(degraded),
|
|
363
|
+
"reason": normalize_text(reason),
|
|
364
|
+
"duration_ms": max(0, int(duration_ms or 0)),
|
|
365
|
+
},
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
def build_analysis_sections(
|
|
370
|
+
fields: Dict[str, Any],
|
|
371
|
+
*,
|
|
372
|
+
analysis_mode: str = "auto",
|
|
373
|
+
analysis_config: Optional[Dict[str, Any]] = None,
|
|
374
|
+
progress: Optional[ProgressReporter] = None,
|
|
375
|
+
) -> Dict[str, Any]:
|
|
376
|
+
mode = str(analysis_mode or "auto").strip().lower()
|
|
377
|
+
if mode not in {"auto", "local"}:
|
|
378
|
+
mode = "auto"
|
|
379
|
+
|
|
380
|
+
local_sections = _build_local_sections(fields)
|
|
381
|
+
start_at = time.perf_counter()
|
|
382
|
+
if not normalize_text(fields.get("title")) and not normalize_text(fields.get("asr_clean")):
|
|
383
|
+
duration_ms = int((time.perf_counter() - start_at) * 1000)
|
|
384
|
+
return ensure_analysis_sections_schema(
|
|
385
|
+
local_sections,
|
|
386
|
+
provider="local",
|
|
387
|
+
llm_used=False,
|
|
388
|
+
degraded=True,
|
|
389
|
+
reason="analysis_input_missing",
|
|
390
|
+
duration_ms=duration_ms,
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if mode == "local":
|
|
394
|
+
duration_ms = int((time.perf_counter() - start_at) * 1000)
|
|
395
|
+
return ensure_analysis_sections_schema(
|
|
396
|
+
local_sections,
|
|
397
|
+
provider="local",
|
|
398
|
+
llm_used=False,
|
|
399
|
+
degraded=False,
|
|
400
|
+
reason="analysis_mode_local",
|
|
401
|
+
duration_ms=duration_ms,
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
prompt_text = load_contract_prompt(ANALYSIS_PROMPT_FILE)
|
|
405
|
+
provider_preference = resolve_preferred_provider(analysis_config)
|
|
406
|
+
if provider_preference == "local":
|
|
407
|
+
duration_ms = int((time.perf_counter() - start_at) * 1000)
|
|
408
|
+
return ensure_analysis_sections_schema(
|
|
409
|
+
local_sections,
|
|
410
|
+
provider="local",
|
|
411
|
+
llm_used=False,
|
|
412
|
+
degraded=False,
|
|
413
|
+
reason="analysis_mode_local",
|
|
414
|
+
duration_ms=duration_ms,
|
|
415
|
+
)
|
|
416
|
+
timeout_sec = resolve_analysis_timeout(analysis_config)
|
|
417
|
+
llm_result = run_structured_analysis(
|
|
418
|
+
prompt_text=prompt_text,
|
|
419
|
+
payload=_analysis_payload(fields),
|
|
420
|
+
provider=provider_preference,
|
|
421
|
+
timeout_sec=timeout_sec,
|
|
422
|
+
progress=progress.child(scope="analysis.host") if progress is not None else None,
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
if llm_result.get("ok"):
|
|
426
|
+
normalized = _normalize_llm_sections(llm_result.get("structured", {}))
|
|
427
|
+
duration_ms = int(llm_result.get("duration_ms") or 0)
|
|
428
|
+
return ensure_analysis_sections_schema(
|
|
429
|
+
normalized,
|
|
430
|
+
provider=str(llm_result.get("provider") or "openclaw"),
|
|
431
|
+
llm_used=True,
|
|
432
|
+
degraded=False,
|
|
433
|
+
reason="",
|
|
434
|
+
duration_ms=duration_ms,
|
|
435
|
+
)
|
|
436
|
+
|
|
437
|
+
duration_ms = int(llm_result.get("duration_ms") or int((time.perf_counter() - start_at) * 1000))
|
|
438
|
+
fallback_reason = normalize_text(llm_result.get("error_reason")) or "analysis_provider_unavailable"
|
|
439
|
+
if progress is not None:
|
|
440
|
+
progress.failed(
|
|
441
|
+
stage="analysis.fallback",
|
|
442
|
+
message="structured analysis degraded to local rules",
|
|
443
|
+
data={
|
|
444
|
+
"provider": llm_result.get("provider"),
|
|
445
|
+
"error_reason": fallback_reason,
|
|
446
|
+
"duration_ms": duration_ms,
|
|
447
|
+
},
|
|
448
|
+
)
|
|
449
|
+
return ensure_analysis_sections_schema(
|
|
450
|
+
local_sections,
|
|
451
|
+
provider="local",
|
|
452
|
+
llm_used=False,
|
|
453
|
+
degraded=True,
|
|
454
|
+
reason=fallback_reason,
|
|
455
|
+
duration_ms=duration_ms,
|
|
456
|
+
)
|
|
@@ -35,43 +35,34 @@ BUILTIN_DEFAULT_CONFIG: Dict[str, Any] = {
|
|
|
35
35
|
},
|
|
36
36
|
"content_kind_card_type": {
|
|
37
37
|
"single_video": "work",
|
|
38
|
+
"note": "work",
|
|
38
39
|
"work": "work",
|
|
39
|
-
"author_home": "author_sample_work",
|
|
40
|
-
"author_sample_work": "author_sample_work",
|
|
41
|
-
"author_analysis": "author",
|
|
42
40
|
},
|
|
43
41
|
"card_type_routes": {
|
|
44
42
|
"work": {
|
|
45
43
|
"prefix": "CBV",
|
|
46
44
|
"parts": ["内容系统", "对标研究", "作品卡"],
|
|
47
45
|
},
|
|
48
|
-
"author": {
|
|
49
|
-
"prefix": "CBA",
|
|
50
|
-
"parts": ["内容系统", "对标研究", "作者卡"],
|
|
51
|
-
},
|
|
52
|
-
"author_sample_work": {
|
|
53
|
-
"prefix": "CBV",
|
|
54
|
-
"parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
|
|
55
|
-
},
|
|
56
46
|
},
|
|
57
47
|
},
|
|
58
48
|
"naming_rules": {
|
|
59
|
-
"card_filename_pattern": "{prefix}-{author_slug}-{title_slug}{ext}",
|
|
49
|
+
"card_filename_pattern": "{prefix}-{platform}-{author_slug}-{title_slug}{ext}",
|
|
60
50
|
"json_filename_pattern": "{timestamp}-{platform}-{identifier}{ext}",
|
|
61
51
|
},
|
|
52
|
+
"analysis": {
|
|
53
|
+
"provider": "auto",
|
|
54
|
+
"timeout_sec": 90,
|
|
55
|
+
},
|
|
62
56
|
"asr_strategy": {
|
|
63
57
|
"poll_interval_sec": 3.0,
|
|
64
|
-
"max_polls":
|
|
58
|
+
"max_polls": 10,
|
|
65
59
|
"submit_retry": {
|
|
66
60
|
"douyin_video": {"max_retries": 2, "backoff_ms": 1500},
|
|
67
61
|
"xiaohongshu_note": {"max_retries": 0, "backoff_ms": 0},
|
|
68
62
|
},
|
|
69
63
|
"u2_timeout_retry": {
|
|
70
64
|
"enabled": True,
|
|
71
|
-
"max_retries":
|
|
72
|
-
},
|
|
73
|
-
"author_home": {
|
|
74
|
-
"batch_submit_size": 50,
|
|
65
|
+
"max_retries": 0,
|
|
75
66
|
},
|
|
76
67
|
},
|
|
77
68
|
}
|
|
@@ -82,46 +73,21 @@ LOCALE_ROUTE_PRESETS: Dict[str, Dict[str, Dict[str, Any]]] = {
|
|
|
82
73
|
"prefix": "CBV",
|
|
83
74
|
"parts": ["内容系统", "对标研究", "作品卡"],
|
|
84
75
|
},
|
|
85
|
-
"author": {
|
|
86
|
-
"prefix": "CBA",
|
|
87
|
-
"parts": ["内容系统", "对标研究", "作者卡"],
|
|
88
|
-
},
|
|
89
|
-
"author_sample_work": {
|
|
90
|
-
"prefix": "CBV",
|
|
91
|
-
"parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
|
|
92
|
-
},
|
|
93
76
|
},
|
|
94
77
|
"en": {
|
|
95
78
|
"work": {
|
|
96
79
|
"prefix": "CBV",
|
|
97
80
|
"parts": ["content-system", "benchmark-research", "work-cards"],
|
|
98
81
|
},
|
|
99
|
-
"author": {
|
|
100
|
-
"prefix": "CBA",
|
|
101
|
-
"parts": ["content-system", "benchmark-research", "author-cards"],
|
|
102
|
-
},
|
|
103
|
-
"author_sample_work": {
|
|
104
|
-
"prefix": "CBV",
|
|
105
|
-
"parts": [
|
|
106
|
-
"content-system",
|
|
107
|
-
"benchmark-research",
|
|
108
|
-
"author-sample-cards",
|
|
109
|
-
"{platform}-{author_slug}",
|
|
110
|
-
],
|
|
111
|
-
},
|
|
112
82
|
},
|
|
113
83
|
}
|
|
114
84
|
|
|
115
85
|
CARD_ROUTE_ENV_KEYS: Dict[str, str] = {
|
|
116
86
|
"work": "TIKOMNI_CARD_ROUTE_WORK",
|
|
117
|
-
"author": "TIKOMNI_CARD_ROUTE_AUTHOR",
|
|
118
|
-
"author_sample_work": "TIKOMNI_CARD_ROUTE_AUTHOR_SAMPLE_WORK",
|
|
119
87
|
}
|
|
120
88
|
|
|
121
89
|
CARD_PREFIX_ENV_KEYS: Dict[str, str] = {
|
|
122
90
|
"work": "TIKOMNI_CARD_PREFIX_WORK",
|
|
123
|
-
"author": "TIKOMNI_CARD_PREFIX_AUTHOR",
|
|
124
|
-
"author_sample_work": "TIKOMNI_CARD_PREFIX_AUTHOR_SAMPLE_WORK",
|
|
125
91
|
}
|
|
126
92
|
|
|
127
93
|
|
|
@@ -289,6 +255,8 @@ def apply_env_overrides(config: Dict[str, Any], env_values: Optional[Dict[str, s
|
|
|
289
255
|
|
|
290
256
|
naming_rules = config.setdefault("naming_rules", {}) if isinstance(config.get("naming_rules"), dict) else {}
|
|
291
257
|
config["naming_rules"] = naming_rules
|
|
258
|
+
analysis = config.setdefault("analysis", {}) if isinstance(config.get("analysis"), dict) else {}
|
|
259
|
+
config["analysis"] = analysis
|
|
292
260
|
|
|
293
261
|
timeout_ms = _env_int("TIKOMNI_TIMEOUT_MS", env_values=env_values)
|
|
294
262
|
if timeout_ms is not None:
|
|
@@ -315,6 +283,14 @@ def apply_env_overrides(config: Dict[str, Any], env_values: Optional[Dict[str, s
|
|
|
315
283
|
if json_filename_pattern is not None:
|
|
316
284
|
naming_rules["json_filename_pattern"] = json_filename_pattern
|
|
317
285
|
|
|
286
|
+
analysis_provider = _env_text("TIKOMNI_ANALYSIS_PROVIDER", env_values=env_values)
|
|
287
|
+
if analysis_provider is not None:
|
|
288
|
+
analysis["provider"] = analysis_provider
|
|
289
|
+
|
|
290
|
+
analysis_timeout_sec = _env_int("TIKOMNI_ANALYSIS_TIMEOUT_SEC", env_values=env_values)
|
|
291
|
+
if analysis_timeout_sec is not None:
|
|
292
|
+
analysis["timeout_sec"] = analysis_timeout_sec
|
|
293
|
+
|
|
318
294
|
path_locale = _normalize_path_locale(_read_env("TIKOMNI_PATH_LOCALE", env_values=env_values) or "zh")
|
|
319
295
|
locale_routes = LOCALE_ROUTE_PRESETS.get(path_locale, LOCALE_ROUTE_PRESETS["zh"])
|
|
320
296
|
|