@tikomni/skills 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/package.json +4 -2
  2. package/skills/single-work-analysis/env.example +3 -3
  3. package/skills/single-work-analysis/references/config-templates/defaults.yaml +8 -19
  4. package/skills/single-work-analysis/references/prompt-contracts/{insight.md → analysis-bundle.md} +43 -8
  5. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +384 -0
  6. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +399 -76
  7. package/skills/single-work-analysis/scripts/core/config_loader.py +18 -42
  8. package/skills/single-work-analysis/scripts/core/progress_report.py +163 -16
  9. package/skills/single-work-analysis/scripts/core/storage_router.py +24 -57
  10. package/skills/single-work-analysis/scripts/core/tikomni_common.py +13 -3
  11. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +154 -7
  12. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +3 -1
  13. package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +243 -44
  14. package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +263 -25
  15. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +244 -894
  16. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  17. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  18. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  19. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  20. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  21. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  22. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
@@ -1,24 +1,32 @@
1
1
  #!/usr/bin/env python3
2
- """Shared analysis pipeline helpers for benchmark card generation."""
2
+ """Shared analysis helpers for single-work benchmark card generation."""
3
+
4
+ from __future__ import annotations
3
5
 
4
6
  import json
5
7
  import os
6
8
  import re
7
- import subprocess
9
+ import time
8
10
  from typing import Any, Dict, List, Optional
9
11
 
10
- PROMPT_CONTRACT_FILES: Dict[str, str] = {
11
- "选题": "topic.md",
12
- "文风": "style.md",
13
- "Hook": "hook.md",
14
- "结构": "structure.md",
15
- "洞察分析": "insight.md",
16
- "CTA": "cta.md",
17
- "总结": "summary.md",
18
- }
12
+ from scripts.core.analysis_adapter import (
13
+ resolve_analysis_timeout,
14
+ resolve_preferred_provider,
15
+ run_structured_analysis,
16
+ )
17
+ from scripts.core.progress_report import ProgressReporter
18
+ from scripts.core.tikomni_common import normalize_text
19
+
19
20
 
21
+ ANALYSIS_PROMPT_FILE = "analysis-bundle.md"
20
22
  DEFAULT_MODULE_SECTIONS: List[str] = ["选题", "文风", "Hook", "结构"]
21
23
  DEFAULT_INSIGHT_SECTION = "洞察分析"
24
+ SECTION_FIELD_MAP = {
25
+ "选题": "topic",
26
+ "文风": "style",
27
+ "Hook": "hook",
28
+ "结构": "structure",
29
+ }
22
30
 
23
31
 
24
32
  def contracts_dir() -> str:
@@ -40,12 +48,78 @@ def load_contract_prompt(filename: str) -> str:
40
48
  return content.strip()
41
49
 
42
50
 
51
+ def _normalize_lines(value: Any) -> List[str]:
52
+ if isinstance(value, list):
53
+ return [normalize_text(item) for item in value if normalize_text(item)]
54
+ text = normalize_text(value)
55
+ return [text] if text else []
56
+
57
+
58
+ def _safe_int(value: Any, default: int = 0) -> int:
59
+ if value is None:
60
+ return default
61
+ if isinstance(value, bool):
62
+ return int(value)
63
+ if isinstance(value, int):
64
+ return value
65
+ if isinstance(value, float):
66
+ return int(value)
67
+ text = str(value).strip()
68
+ if not text:
69
+ return default
70
+ try:
71
+ return int(float(text))
72
+ except Exception:
73
+ return default
74
+
75
+
76
+ def _sentence_units(text: str) -> List[str]:
77
+ return [normalize_text(part) for part in re.split(r"[。!?!?;;\n]+", normalize_text(text)) if normalize_text(part)]
78
+
79
+
80
+ def _first_sentence(text: str) -> str:
81
+ units = _sentence_units(text)
82
+ return units[0] if units else ""
83
+
84
+
85
+ def _hit_count(text: str, keywords: List[str]) -> int:
86
+ base = normalize_text(text)
87
+ if not base:
88
+ return 0
89
+ return sum(1 for token in keywords if token in base)
90
+
91
+
92
+ def _top_keywords(text: str, candidates: List[str], topn: int = 3) -> List[str]:
93
+ base = normalize_text(text)
94
+ if not base:
95
+ return []
96
+ scored = []
97
+ for token in candidates:
98
+ count = base.count(token)
99
+ if count > 0:
100
+ scored.append((count, token))
101
+ scored.sort(key=lambda item: (-item[0], len(item[1])))
102
+ return [token for _, token in scored[:topn]]
103
+
104
+
105
+ def _score_from_hits(hits: int, full_score_hits: int = 4) -> int:
106
+ if hits <= 0:
107
+ return 2
108
+ if hits >= full_score_hits:
109
+ return 5
110
+ return min(5, hits + 2)
111
+
112
+
43
113
  def _analysis_payload(fields: Dict[str, Any]) -> Dict[str, Any]:
44
- asr_raw = fields.get("raw_content")
114
+ asr_raw = normalize_text(fields.get("asr_raw") or fields.get("raw_content"))
45
115
  return {
46
- "title": fields.get("title"),
116
+ "platform": normalize_text(fields.get("platform")),
117
+ "title": normalize_text(fields.get("title")),
118
+ "caption_raw": normalize_text(fields.get("caption_raw")),
47
119
  "asr_raw": asr_raw,
48
- "asr_clean": fields.get("asr_clean") or asr_raw,
120
+ "asr_clean": normalize_text(fields.get("asr_clean")) or asr_raw,
121
+ "work_modality": normalize_text(fields.get("work_modality")),
122
+ "tags": fields.get("tags") if isinstance(fields.get("tags"), list) else [],
49
123
  "metrics": {
50
124
  "digg_count": fields.get("digg_count"),
51
125
  "comment_count": fields.get("comment_count"),
@@ -56,78 +130,327 @@ def _analysis_payload(fields: Dict[str, Any]) -> Dict[str, Any]:
56
130
  }
57
131
 
58
132
 
59
- def call_prompt_llm(section: str, prompt_text: str, fields: Dict[str, Any], timeout_sec: int = 240) -> str:
60
- if not prompt_text:
61
- return "数据不足"
62
-
63
- message = (
64
- f"请严格根据下面提示词原文完成【{section}】段落输出。\n"
65
- "要求:\n"
66
- "1) 不要解释提示词,不要输出思考过程。\n"
67
- "2) 不要套额外模板,不要输出打分。\n"
68
- "3) 仅输出该段正文内容。\n\n"
69
- "=== 提示词原文开始 ===\n"
70
- f"{prompt_text}\n"
71
- "=== 提示词原文结束 ===\n\n"
72
- "=== 输入数据(JSON) ===\n"
73
- f"{json.dumps(_analysis_payload(fields), ensure_ascii=False)}"
133
+ def _analyze_topic(fields: Dict[str, Any]) -> Dict[str, Any]:
134
+ title = normalize_text(fields.get("title"))
135
+ asr = normalize_text(fields.get("asr_clean"))
136
+ category = normalize_text(fields.get("category"))
137
+ text = f"{title} {asr}".strip()
138
+ if not text:
139
+ return {
140
+ "lines": ["- 类型:数据不足。", "- 细分主题:数据不足。", "- 受众痛点:数据不足。"],
141
+ }
142
+
143
+ type_rules = {
144
+ "流量型": ["热点", "挑战", "反转", "揭秘", "真相", "别再", "为什么", "踩坑"],
145
+ "人设型": ["我是", "我们", "日常", "分享", "经历", "成长", "复盘", "带你"],
146
+ "营销型": ["领取", "私信", "咨询", "下单", "课程", "优惠", "报名", "合作"],
147
+ }
148
+ type_scores = {name: _hit_count(text, rules) for name, rules in type_rules.items()}
149
+ if category in {"教程", "知识", "方法"}:
150
+ type_scores["营销型"] += 1
151
+ if category in {"观点", "人设", "日常"}:
152
+ type_scores["人设型"] += 1
153
+
154
+ main_type = max(type_scores, key=lambda key: type_scores[key])
155
+ themes = _top_keywords(
156
+ text,
157
+ ["AI", "智能体", "变现", "副业", "教程", "工作流", "流量", "涨粉", "口播", "创业", "营销"],
158
+ topn=3,
74
159
  )
160
+ pains = _top_keywords(text, ["不会", "焦虑", "卡住", "没流量", "转化", "时间不够", "风险"], topn=2)
161
+ return {
162
+ "lines": [
163
+ f"- 基础类型:{main_type}(命中信号 {type_scores[main_type]} 个)。",
164
+ f"- 细分主题:{'、'.join(themes) if themes else '数据不足'}。",
165
+ f"- 受众痛点:{'、'.join(pains) if pains else '以快速落地/降低门槛为主'}。",
166
+ ],
167
+ }
75
168
 
76
- try:
77
- run = subprocess.run(
78
- ["openclaw", "agent", "--agent", "main", "--message", message, "--json"],
79
- capture_output=True,
80
- text=True,
81
- timeout=timeout_sec,
82
- check=False,
83
- )
84
- data = json.loads(run.stdout or "{}")
85
- texts: List[str] = []
86
- for payload in data.get("result", {}).get("payloads", []):
87
- text = payload.get("text") if isinstance(payload, dict) else None
88
- if isinstance(text, str) and text.strip():
89
- texts.append(text.strip())
90
- if texts:
91
- return "\n".join(texts).strip()
92
- except Exception:
93
- pass
94
169
 
95
- return "数据不足"
170
+ def _analyze_style(fields: Dict[str, Any]) -> Dict[str, Any]:
171
+ asr = normalize_text(fields.get("asr_clean"))
172
+ title = normalize_text(fields.get("title"))
173
+ text = f"{title} {asr}".strip()
174
+ units = _sentence_units(asr)
175
+ if not text:
176
+ return {
177
+ "lines": ["- 句式结构:数据不足。", "- 语气分布:数据不足。", "- 人设与修辞:数据不足。"],
178
+ }
179
+
180
+ avg_len = int(sum(len(unit) for unit in units) / max(1, len(units))) if units else 0
181
+ if avg_len <= 14:
182
+ length_type = "短句为主"
183
+ elif avg_len <= 24:
184
+ length_type = "中短句混合"
185
+ else:
186
+ length_type = "中长句为主"
187
+ q_count = text.count("?") + text.count("?")
188
+ e_count = text.count("!") + text.count("!")
189
+ statement_count = max(0, len(units) - q_count - e_count)
190
+ persona_hits = _hit_count(text, ["我", "我们", "你", "大家", "朋友们"])
191
+ rhetoric_hits = _hit_count(text, ["不是", "而是", "其实", "真的", "一定", "必须", "先", "再"])
192
+ return {
193
+ "lines": [
194
+ f"- 句式结构:{length_type},平均句长约 {avg_len} 字。",
195
+ f"- 语气分布:疑问 {q_count} / 感叹 {e_count} / 陈述 {statement_count}。",
196
+ f"- 人设与修辞:人设代词命中 {persona_hits} 次,强调/转折词命中 {rhetoric_hits} 次。",
197
+ ],
198
+ }
199
+
96
200
 
201
+ def _analyze_hook(fields: Dict[str, Any]) -> Dict[str, Any]:
202
+ title = normalize_text(fields.get("title"))
203
+ asr = normalize_text(fields.get("asr_clean"))
204
+ units = _sentence_units(asr)
205
+ first = _first_sentence(asr) or title
206
+ middle = units[len(units) // 2] if units else ""
207
+ if not first:
208
+ return {
209
+ "lines": ["- 开头钩子:数据不足。", "- 中段钩子:数据不足。", "- 结尾钩子:数据不足。"],
210
+ }
97
211
 
98
- def build_module_lines(
99
- fields: Dict[str, Any],
100
- sections: Optional[List[str]] = None,
101
- contract_files: Optional[Dict[str, str]] = None,
102
- ) -> Dict[str, List[str]]:
103
- picked_sections = sections or DEFAULT_MODULE_SECTIONS
104
- mapping = contract_files or PROMPT_CONTRACT_FILES
105
- outputs: Dict[str, List[str]] = {}
212
+ hook_type = "陈述式"
213
+ if any(token in first for token in ["?", "?", "为什么", "怎么"]):
214
+ hook_type = "疑问式"
215
+ elif any(token in first for token in ["别再", "误区", "真相", "不是"]):
216
+ hook_type = "反常识式"
217
+ elif any(token in first for token in ["当你", "如果", "今天"]):
218
+ hook_type = "场景代入式"
219
+ end_candidates = [
220
+ unit for unit in units if _hit_count(unit, ["关注", "评论", "私信", "收藏", "转发", "领取"]) > 0
221
+ ]
222
+ end = end_candidates[-1] if end_candidates else "未检测到明确结尾钩子"
223
+ return {
224
+ "lines": [
225
+ f"- 开头钩子({hook_type}):{first}",
226
+ f"- 中段钩子:{middle or '数据不足'}",
227
+ f"- 结尾钩子:{end}",
228
+ ],
229
+ }
106
230
 
107
- for section in picked_sections:
108
- prompt_text = load_contract_prompt(mapping.get(section, ""))
109
- content = call_prompt_llm(section, prompt_text, fields)
110
- outputs[section] = [content if content else "数据不足"]
111
231
 
112
- return outputs
232
+ def _analyze_structure(fields: Dict[str, Any]) -> Dict[str, Any]:
233
+ units = _sentence_units(fields.get("asr_clean"))
234
+ if not units:
235
+ return {
236
+ "lines": ["- 结构标签:数据不足。", "- 模板判定:数据不足。", "- 缺失模块:数据不足。"],
237
+ }
113
238
 
239
+ label_rules = {
240
+ "钩子": ["?", "?", "为什么", "怎么", "别再", "真相", "当你", "如果"],
241
+ "冲突": ["但是", "却", "问题", "误区", "卡住", "焦虑", "失败"],
242
+ "转折": ["所以", "于是", "然后", "接着", "这时候", "其实"],
243
+ "举证": ["数据", "案例", "比如", "步骤", "第一", "第二", "第三"],
244
+ "CTA": ["评论", "关注", "私信", "收藏", "转发", "领取"],
245
+ }
246
+ coverage = {label: 0 for label in label_rules}
247
+ for sentence in units:
248
+ for label, keywords in label_rules.items():
249
+ if any(keyword in sentence for keyword in keywords):
250
+ coverage[label] += 1
114
251
 
115
- def build_single_section_lines(
116
- fields: Dict[str, Any],
117
- section: str = DEFAULT_INSIGHT_SECTION,
118
- contract_files: Optional[Dict[str, str]] = None,
119
- ) -> List[str]:
120
- mapping = contract_files or PROMPT_CONTRACT_FILES
121
- prompt_text = load_contract_prompt(mapping.get(section, ""))
122
- content = call_prompt_llm(section, prompt_text, fields)
123
- return [content if content else "数据不足"]
124
-
125
-
126
- def build_analysis_sections(fields: Dict[str, Any]) -> Dict[str, Any]:
127
- """Build prompt-contract-driven analysis sections for benchmark cards."""
128
- modules = build_module_lines(fields)
129
- insight = build_single_section_lines(fields, section=DEFAULT_INSIGHT_SECTION)
252
+ present = [label for label, count in coverage.items() if count > 0]
253
+ missing = [label for label, count in coverage.items() if count == 0]
254
+ template = "钩子→冲突→转折→举证→CTA" if len(present) >= 4 else "钩子→观点→补充说明"
255
+ return {
256
+ "lines": [
257
+ f"- 结构标签覆盖:{', '.join([f'{label}:{count}' for label, count in coverage.items()])}。",
258
+ f"- 模板判定:{template}。",
259
+ f"- 缺失模块:{'、'.join(missing) if missing else '无'}。",
260
+ ],
261
+ }
262
+
263
+
264
+ def _insight_metric_snapshot(fields: Dict[str, Any]) -> Dict[str, Any]:
265
+ digg = _safe_int(fields.get("digg_count"), default=0)
266
+ comment = _safe_int(fields.get("comment_count"), default=0)
267
+ collect = _safe_int(fields.get("collect_count"), default=0)
268
+ share = _safe_int(fields.get("share_count"), default=0)
269
+ play = _safe_int(fields.get("play_count"), default=0)
270
+ interaction = digg + comment * 2 + collect * 3 + share * 4
271
+ interaction_rate = interaction / play if play > 0 else 0.0
272
+ return {
273
+ "interaction": interaction,
274
+ "interaction_rate": interaction_rate,
275
+ }
276
+
277
+
278
+ def _build_local_sections(fields: Dict[str, Any]) -> Dict[str, Any]:
279
+ topic = _analyze_topic(fields)
280
+ style = _analyze_style(fields)
281
+ hook = _analyze_hook(fields)
282
+ structure = _analyze_structure(fields)
283
+ metrics = _insight_metric_snapshot(fields)
284
+
285
+ strongest_signals = []
286
+ for section_name, section_payload in {
287
+ "选题": topic,
288
+ "文风": style,
289
+ "Hook": hook,
290
+ "结构": structure,
291
+ }.items():
292
+ first_line = _normalize_lines(section_payload.get("lines"))
293
+ if first_line:
294
+ strongest_signals.append(f"- {section_name}:{first_line[0].lstrip('- ').strip()}")
295
+
296
+ insight = strongest_signals[:3]
297
+ insight.extend(
298
+ [
299
+ f"- 互动折算值:{metrics.get('interaction', 0)}。",
300
+ f"- 粗略互动率:{metrics.get('interaction_rate', 0.0):.4f}。",
301
+ ]
302
+ )
303
+
304
+ return {
305
+ "modules": {
306
+ "选题": _normalize_lines(topic.get("lines")) or ["数据不足"],
307
+ "文风": _normalize_lines(style.get("lines")) or ["数据不足"],
308
+ "Hook": _normalize_lines(hook.get("lines")) or ["数据不足"],
309
+ "结构": _normalize_lines(structure.get("lines")) or ["数据不足"],
310
+ },
311
+ "insight": insight or ["数据不足"],
312
+ }
313
+
314
+
315
+ def _normalize_llm_sections(payload: Dict[str, Any]) -> Dict[str, Any]:
316
+ modules: Dict[str, List[str]] = {}
317
+ for section_name, field_name in SECTION_FIELD_MAP.items():
318
+ lines = _normalize_lines(payload.get(field_name))
319
+ modules[section_name] = lines or ["数据不足"]
320
+ insight = _normalize_lines(payload.get("insight")) or ["数据不足"]
130
321
  return {
131
322
  "modules": modules,
132
323
  "insight": insight,
133
324
  }
325
+
326
+
327
+ def _analysis_status_from_reason(reason: str) -> str:
328
+ if not reason:
329
+ return "completed"
330
+ if "timeout" in reason:
331
+ return "timeout"
332
+ if "unavailable" in reason:
333
+ return "unavailable"
334
+ if reason == "analysis_mode_local":
335
+ return "skipped"
336
+ return "failed"
337
+
338
+
339
+ def ensure_analysis_sections_schema(
340
+ payload: Optional[Dict[str, Any]],
341
+ *,
342
+ provider: str = "local",
343
+ llm_used: bool = False,
344
+ degraded: bool = False,
345
+ reason: str = "",
346
+ duration_ms: int = 0,
347
+ ) -> Dict[str, Any]:
348
+ source = payload if isinstance(payload, dict) else {}
349
+ modules_raw = source.get("modules") if isinstance(source.get("modules"), dict) else {}
350
+ modules = {
351
+ section: _normalize_lines(modules_raw.get(section)) or ["数据不足"]
352
+ for section in DEFAULT_MODULE_SECTIONS
353
+ }
354
+ insight = _normalize_lines(source.get("insight")) or ["数据不足"]
355
+ return {
356
+ "version": "v2",
357
+ "provider": provider,
358
+ "modules": modules,
359
+ "insight": insight,
360
+ "meta": {
361
+ "llm_used": bool(llm_used),
362
+ "degraded": bool(degraded),
363
+ "reason": normalize_text(reason),
364
+ "duration_ms": max(0, int(duration_ms or 0)),
365
+ },
366
+ }
367
+
368
+
369
+ def build_analysis_sections(
370
+ fields: Dict[str, Any],
371
+ *,
372
+ analysis_mode: str = "auto",
373
+ analysis_config: Optional[Dict[str, Any]] = None,
374
+ progress: Optional[ProgressReporter] = None,
375
+ ) -> Dict[str, Any]:
376
+ mode = str(analysis_mode or "auto").strip().lower()
377
+ if mode not in {"auto", "local"}:
378
+ mode = "auto"
379
+
380
+ local_sections = _build_local_sections(fields)
381
+ start_at = time.perf_counter()
382
+ if not normalize_text(fields.get("title")) and not normalize_text(fields.get("asr_clean")):
383
+ duration_ms = int((time.perf_counter() - start_at) * 1000)
384
+ return ensure_analysis_sections_schema(
385
+ local_sections,
386
+ provider="local",
387
+ llm_used=False,
388
+ degraded=True,
389
+ reason="analysis_input_missing",
390
+ duration_ms=duration_ms,
391
+ )
392
+
393
+ if mode == "local":
394
+ duration_ms = int((time.perf_counter() - start_at) * 1000)
395
+ return ensure_analysis_sections_schema(
396
+ local_sections,
397
+ provider="local",
398
+ llm_used=False,
399
+ degraded=False,
400
+ reason="analysis_mode_local",
401
+ duration_ms=duration_ms,
402
+ )
403
+
404
+ prompt_text = load_contract_prompt(ANALYSIS_PROMPT_FILE)
405
+ provider_preference = resolve_preferred_provider(analysis_config)
406
+ if provider_preference == "local":
407
+ duration_ms = int((time.perf_counter() - start_at) * 1000)
408
+ return ensure_analysis_sections_schema(
409
+ local_sections,
410
+ provider="local",
411
+ llm_used=False,
412
+ degraded=False,
413
+ reason="analysis_mode_local",
414
+ duration_ms=duration_ms,
415
+ )
416
+ timeout_sec = resolve_analysis_timeout(analysis_config)
417
+ llm_result = run_structured_analysis(
418
+ prompt_text=prompt_text,
419
+ payload=_analysis_payload(fields),
420
+ provider=provider_preference,
421
+ timeout_sec=timeout_sec,
422
+ progress=progress.child(scope="analysis.host") if progress is not None else None,
423
+ )
424
+
425
+ if llm_result.get("ok"):
426
+ normalized = _normalize_llm_sections(llm_result.get("structured", {}))
427
+ duration_ms = int(llm_result.get("duration_ms") or 0)
428
+ return ensure_analysis_sections_schema(
429
+ normalized,
430
+ provider=str(llm_result.get("provider") or "openclaw"),
431
+ llm_used=True,
432
+ degraded=False,
433
+ reason="",
434
+ duration_ms=duration_ms,
435
+ )
436
+
437
+ duration_ms = int(llm_result.get("duration_ms") or int((time.perf_counter() - start_at) * 1000))
438
+ fallback_reason = normalize_text(llm_result.get("error_reason")) or "analysis_provider_unavailable"
439
+ if progress is not None:
440
+ progress.failed(
441
+ stage="analysis.fallback",
442
+ message="structured analysis degraded to local rules",
443
+ data={
444
+ "provider": llm_result.get("provider"),
445
+ "error_reason": fallback_reason,
446
+ "duration_ms": duration_ms,
447
+ },
448
+ )
449
+ return ensure_analysis_sections_schema(
450
+ local_sections,
451
+ provider="local",
452
+ llm_used=False,
453
+ degraded=True,
454
+ reason=fallback_reason,
455
+ duration_ms=duration_ms,
456
+ )
@@ -35,43 +35,34 @@ BUILTIN_DEFAULT_CONFIG: Dict[str, Any] = {
35
35
  },
36
36
  "content_kind_card_type": {
37
37
  "single_video": "work",
38
+ "note": "work",
38
39
  "work": "work",
39
- "author_home": "author_sample_work",
40
- "author_sample_work": "author_sample_work",
41
- "author_analysis": "author",
42
40
  },
43
41
  "card_type_routes": {
44
42
  "work": {
45
43
  "prefix": "CBV",
46
44
  "parts": ["内容系统", "对标研究", "作品卡"],
47
45
  },
48
- "author": {
49
- "prefix": "CBA",
50
- "parts": ["内容系统", "对标研究", "作者卡"],
51
- },
52
- "author_sample_work": {
53
- "prefix": "CBV",
54
- "parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
55
- },
56
46
  },
57
47
  },
58
48
  "naming_rules": {
59
- "card_filename_pattern": "{prefix}-{author_slug}-{title_slug}{ext}",
49
+ "card_filename_pattern": "{prefix}-{platform}-{author_slug}-{title_slug}{ext}",
60
50
  "json_filename_pattern": "{timestamp}-{platform}-{identifier}{ext}",
61
51
  },
52
+ "analysis": {
53
+ "provider": "auto",
54
+ "timeout_sec": 90,
55
+ },
62
56
  "asr_strategy": {
63
57
  "poll_interval_sec": 3.0,
64
- "max_polls": 30,
58
+ "max_polls": 10,
65
59
  "submit_retry": {
66
60
  "douyin_video": {"max_retries": 2, "backoff_ms": 1500},
67
61
  "xiaohongshu_note": {"max_retries": 0, "backoff_ms": 0},
68
62
  },
69
63
  "u2_timeout_retry": {
70
64
  "enabled": True,
71
- "max_retries": 3,
72
- },
73
- "author_home": {
74
- "batch_submit_size": 50,
65
+ "max_retries": 0,
75
66
  },
76
67
  },
77
68
  }
@@ -82,46 +73,21 @@ LOCALE_ROUTE_PRESETS: Dict[str, Dict[str, Dict[str, Any]]] = {
82
73
  "prefix": "CBV",
83
74
  "parts": ["内容系统", "对标研究", "作品卡"],
84
75
  },
85
- "author": {
86
- "prefix": "CBA",
87
- "parts": ["内容系统", "对标研究", "作者卡"],
88
- },
89
- "author_sample_work": {
90
- "prefix": "CBV",
91
- "parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
92
- },
93
76
  },
94
77
  "en": {
95
78
  "work": {
96
79
  "prefix": "CBV",
97
80
  "parts": ["content-system", "benchmark-research", "work-cards"],
98
81
  },
99
- "author": {
100
- "prefix": "CBA",
101
- "parts": ["content-system", "benchmark-research", "author-cards"],
102
- },
103
- "author_sample_work": {
104
- "prefix": "CBV",
105
- "parts": [
106
- "content-system",
107
- "benchmark-research",
108
- "author-sample-cards",
109
- "{platform}-{author_slug}",
110
- ],
111
- },
112
82
  },
113
83
  }
114
84
 
115
85
  CARD_ROUTE_ENV_KEYS: Dict[str, str] = {
116
86
  "work": "TIKOMNI_CARD_ROUTE_WORK",
117
- "author": "TIKOMNI_CARD_ROUTE_AUTHOR",
118
- "author_sample_work": "TIKOMNI_CARD_ROUTE_AUTHOR_SAMPLE_WORK",
119
87
  }
120
88
 
121
89
  CARD_PREFIX_ENV_KEYS: Dict[str, str] = {
122
90
  "work": "TIKOMNI_CARD_PREFIX_WORK",
123
- "author": "TIKOMNI_CARD_PREFIX_AUTHOR",
124
- "author_sample_work": "TIKOMNI_CARD_PREFIX_AUTHOR_SAMPLE_WORK",
125
91
  }
126
92
 
127
93
 
@@ -289,6 +255,8 @@ def apply_env_overrides(config: Dict[str, Any], env_values: Optional[Dict[str, s
289
255
 
290
256
  naming_rules = config.setdefault("naming_rules", {}) if isinstance(config.get("naming_rules"), dict) else {}
291
257
  config["naming_rules"] = naming_rules
258
+ analysis = config.setdefault("analysis", {}) if isinstance(config.get("analysis"), dict) else {}
259
+ config["analysis"] = analysis
292
260
 
293
261
  timeout_ms = _env_int("TIKOMNI_TIMEOUT_MS", env_values=env_values)
294
262
  if timeout_ms is not None:
@@ -315,6 +283,14 @@ def apply_env_overrides(config: Dict[str, Any], env_values: Optional[Dict[str, s
315
283
  if json_filename_pattern is not None:
316
284
  naming_rules["json_filename_pattern"] = json_filename_pattern
317
285
 
286
+ analysis_provider = _env_text("TIKOMNI_ANALYSIS_PROVIDER", env_values=env_values)
287
+ if analysis_provider is not None:
288
+ analysis["provider"] = analysis_provider
289
+
290
+ analysis_timeout_sec = _env_int("TIKOMNI_ANALYSIS_TIMEOUT_SEC", env_values=env_values)
291
+ if analysis_timeout_sec is not None:
292
+ analysis["timeout_sec"] = analysis_timeout_sec
293
+
318
294
  path_locale = _normalize_path_locale(_read_env("TIKOMNI_PATH_LOCALE", env_values=env_values) or "zh")
319
295
  locale_routes = LOCALE_ROUTE_PRESETS.get(path_locale, LOCALE_ROUTE_PRESETS["zh"])
320
296