@tikomni/skills 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -3
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
  30. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
  33. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  34. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  36. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  37. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  38. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  39. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  40. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
  41. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  42. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
  43. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  45. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  46. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  47. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  48. package/skills/creator-analysis/SKILL.md +0 -95
  49. package/skills/creator-analysis/agents/openai.yaml +0 -4
  50. package/skills/creator-analysis/env.example +0 -36
  51. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  52. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  53. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  54. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  55. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  56. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  57. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  59. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  60. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  61. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  63. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  64. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  65. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  66. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  67. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  68. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  69. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  71. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  72. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  73. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  74. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  75. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  76. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  77. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  78. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  79. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  80. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  81. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  82. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  83. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  84. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  85. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  86. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  87. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  88. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  89. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  90. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  91. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  92. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  93. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  94. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  96. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  100. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  101. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  103. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  104. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  105. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  106. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  107. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  108. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  109. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  110. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  113. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  114. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  116. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  117. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  118. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  119. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  120. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  121. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  122. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  123. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  124. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  125. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  126. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  127. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  128. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  129. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  130. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  131. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  132. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  133. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  134. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  135. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  136. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  137. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  138. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  139. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  140. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  141. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  142. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  143. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  144. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  145. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  146. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  147. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  148. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  149. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  153. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  154. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  155. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  156. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  157. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  158. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  159. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  160. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  161. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  162. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  163. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  164. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  165. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  167. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  168. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  169. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  170. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  171. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  172. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  173. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  174. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  175. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  176. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  177. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  178. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  179. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  180. package/skills/creator-analysis/references/workflow.md +0 -23
  181. package/skills/creator-analysis/scripts/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  185. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  186. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  187. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  188. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  189. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  190. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  191. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  194. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  195. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  196. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  197. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  198. package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
  199. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  200. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  202. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  203. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  205. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  207. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  208. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  209. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  210. package/skills/meta-capability/SKILL.md +0 -69
  211. package/skills/meta-capability/agents/openai.yaml +0 -4
  212. package/skills/meta-capability/env.example +0 -42
  213. package/skills/meta-capability/references/api-capability-index.md +0 -92
  214. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  215. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  216. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  217. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  218. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  219. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  220. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  222. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  223. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  224. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  226. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  227. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  228. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  229. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  230. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  231. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  232. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  233. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  234. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  235. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  236. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  237. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  238. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  239. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  240. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  241. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  242. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  243. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  244. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  245. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  246. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  247. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  248. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  249. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  250. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  251. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  252. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  253. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  254. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  255. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  256. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  257. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  258. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  259. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  263. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  264. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  265. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  266. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  267. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  268. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  269. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  270. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  271. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  272. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  273. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  274. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  276. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  277. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  279. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  280. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  281. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  282. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  283. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  284. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  285. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  286. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  287. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  288. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  289. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  290. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  291. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  292. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  293. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  294. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  295. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  296. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  297. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  298. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  299. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  300. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  301. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  302. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  303. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  304. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  305. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  306. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  307. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  308. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  309. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  310. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  311. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  312. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  316. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  317. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  318. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  319. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  320. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  321. package/skills/meta-capability/references/dispatch.md +0 -27
  322. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  323. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  324. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  325. package/skills/meta-capability/scripts/__init__.py +0 -1
  326. package/skills/meta-capability/scripts/call_route.py +0 -141
  327. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  328. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  329. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  330. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  331. package/skills/meta-capability/scripts/test_auth.py +0 -98
  332. package/skills/single-work-analysis/SKILL.md +0 -62
  333. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  334. package/skills/single-work-analysis/env.example +0 -36
  335. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  336. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  337. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  339. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  340. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  341. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  342. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  344. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  345. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  346. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  348. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  349. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  350. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  353. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  354. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  356. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  357. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  358. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  359. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  360. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  361. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  362. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  363. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  364. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  366. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  373. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  375. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  376. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  377. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  378. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  379. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  381. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  385. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  386. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  388. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  389. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  390. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  391. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  392. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  393. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  394. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  395. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  398. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  399. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  401. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  402. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  403. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  404. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  405. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  406. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  407. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  409. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  410. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  411. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  412. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  413. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  414. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  415. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  416. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  417. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  418. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  419. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  420. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  421. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  422. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  423. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  424. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  425. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  426. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  427. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  428. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  429. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  430. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  431. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  432. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  434. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  438. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  439. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  441. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  442. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  443. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
  444. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  445. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  446. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  447. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  448. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  449. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  450. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  451. package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
  452. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  453. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  454. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  455. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
  456. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  457. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  458. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  459. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  460. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
  461. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  462. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  463. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
  464. package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
  465. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  466. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  468. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  469. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  470. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  471. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  472. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  473. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  474. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  475. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
  476. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  477. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  478. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  479. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  480. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1402 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- if __package__ in {None, ""}:
4
- import sys
5
- from pathlib import Path
6
-
7
- _self = Path(__file__).resolve()
8
- for _parent in _self.parents:
9
- if (_parent / "scripts").is_dir():
10
- sys.path.insert(0, str(_parent))
11
- break
12
-
13
- """Write benchmark markdown cards into card root zones."""
14
-
15
- import argparse
16
- import datetime as dt
17
- import json
18
- import os
19
- import re
20
- import unicodedata
21
- from pathlib import Path
22
- from typing import Any, Dict, List, Optional
23
-
24
- try:
25
- from zoneinfo import ZoneInfo
26
- except Exception: # pragma: no cover - py<3.9 fallback
27
- ZoneInfo = None
28
-
29
- from scripts.core.analysis_pipeline import DEFAULT_MODULE_SECTIONS, build_analysis_sections
30
- from scripts.core.config_loader import load_tikomni_config
31
- from scripts.core.storage_router import build_card_output_path, normalize_card_type, resolve_effective_card_type
32
- from scripts.core.tikomni_common import normalize_text, read_json_file, write_json_stdout
33
-
34
- def resolve_default_card_root() -> str:
35
- raw = os.getenv("TIKOMNI_CARD_ROOT", "").strip()
36
- if not raw:
37
- raise ValueError(
38
- "missing_card_root: set --card-root or define TIKOMNI_CARD_ROOT in .env/.env.local"
39
- )
40
-
41
- candidate = Path(raw).expanduser()
42
- if not candidate.is_absolute():
43
- raise ValueError("TIKOMNI_CARD_ROOT must be an absolute path")
44
- return str(candidate.resolve())
45
-
46
-
47
- # Keep import-time compatibility for other scripts without crashing when env is absent.
48
- DEFAULT_CARD_ROOT = ""
49
- CARD_TYPES = ["work", "author", "author_sample_work"]
50
-
51
-
52
- def _normalize_lines(value: Any) -> List[str]:
53
- if isinstance(value, list):
54
- return [normalize_text(item) for item in value if normalize_text(item)]
55
- if isinstance(value, str):
56
- text = normalize_text(value)
57
- return [text] if text else []
58
- return []
59
-
60
-
61
- def _safe_int(value: Any, default: int = 0) -> int:
62
- if value is None:
63
- return default
64
- if isinstance(value, bool):
65
- return int(value)
66
- if isinstance(value, int):
67
- return value
68
- if isinstance(value, float):
69
- return int(value)
70
- if isinstance(value, str):
71
- text = value.strip()
72
- if text.isdigit() or (text.startswith("-") and text[1:].isdigit()):
73
- return int(text)
74
- return default
75
-
76
-
77
- def _safe_optional_int(value: Any) -> Optional[int]:
78
- if value is None:
79
- return None
80
- if isinstance(value, bool):
81
- return int(value)
82
- if isinstance(value, int):
83
- return value
84
- if isinstance(value, float):
85
- return int(value)
86
- if isinstance(value, str):
87
- text = value.strip()
88
- if not text:
89
- return None
90
- if text.isdigit() or (text.startswith("-") and text[1:].isdigit()):
91
- return int(text)
92
- return None
93
-
94
-
95
- def _to_unix_sec(value: Any) -> int:
96
- if value is None:
97
- return 0
98
- parsed = _safe_int(value, default=0)
99
- if parsed <= 0:
100
- return 0
101
- if parsed > 1_000_000_000_000:
102
- parsed //= 1000
103
- return parsed
104
-
105
-
106
- def _format_shanghai_datetime(value: Any) -> str:
107
- ts = _to_unix_sec(value)
108
- if ts <= 0:
109
- return ""
110
- try:
111
- if ZoneInfo is not None:
112
- dt_obj = dt.datetime.fromtimestamp(ts, tz=ZoneInfo("Asia/Shanghai"))
113
- else:
114
- dt_obj = dt.datetime.fromtimestamp(ts, tz=dt.timezone(dt.timedelta(hours=8)))
115
- return dt_obj.strftime("%Y-%m-%d %H:%M:%S")
116
- except Exception:
117
- return ""
118
-
119
-
120
- def _resolve_publish_time(payload: Dict[str, Any], create_time_sec: int) -> Dict[str, str]:
121
- publish_time_text = normalize_text(payload.get("publish_time_text"))
122
- if publish_time_text:
123
- return {"publish_time_text": publish_time_text, "publish_time_source": "payload.publish_time_text"}
124
-
125
- source = _source_dict(payload)
126
- candidates = [
127
- ("payload.publish_time", payload.get("publish_time")),
128
- ("payload.create_time", payload.get("create_time")),
129
- ("source.publish_time", source.get("publish_time")),
130
- ("source.create_time", source.get("create_time")),
131
- ("source.time", source.get("time")),
132
- ]
133
- for source_key, raw in candidates:
134
- text = normalize_text(raw)
135
- if not text:
136
- continue
137
- ts_text = _format_shanghai_datetime(raw)
138
- if ts_text:
139
- return {"publish_time_text": ts_text, "publish_time_source": source_key}
140
- return {"publish_time_text": text, "publish_time_source": source_key}
141
-
142
- fallback_text = _format_shanghai_datetime(create_time_sec)
143
- if fallback_text:
144
- return {"publish_time_text": fallback_text, "publish_time_source": "create_time_sec"}
145
-
146
- return {"publish_time_text": "未知", "publish_time_source": "unknown"}
147
-
148
-
149
- def _resolve_published_date(payload: Dict[str, Any], create_time_sec: int) -> str:
150
- published_date = normalize_text(payload.get("published_date"))
151
- if published_date:
152
- return published_date
153
- publish_time_info = _resolve_publish_time(payload, create_time_sec)
154
- text = normalize_text(publish_time_info.get("publish_time_text"))
155
- if not text or text == "未知":
156
- return "N/A"
157
- return text[:10]
158
-
159
-
160
- def _display_metric(value: Optional[int]) -> str:
161
- if value is None:
162
- return "N/A"
163
- return str(value)
164
-
165
-
166
- def _source_dict(payload: Dict[str, Any]) -> Dict[str, Any]:
167
- source = payload.get("source")
168
- return source if isinstance(source, dict) else {}
169
-
170
-
171
- def _extract_duration_ms(payload: Dict[str, Any]) -> int:
172
- source = _source_dict(payload)
173
-
174
- def _pick_int(keys: List[str], from_source: bool = False) -> int:
175
- base = source if from_source else payload
176
- for key in keys:
177
- value = _safe_int(base.get(key), default=0)
178
- if value > 0:
179
- return value
180
- return 0
181
-
182
- duration_ms = _pick_int(["duration_ms"])
183
- if duration_ms <= 0:
184
- duration_ms = _pick_int(["duration_ms"], from_source=True)
185
-
186
- if duration_ms <= 0:
187
- raw_duration = _pick_int(["duration", "duration_sec"])
188
- if raw_duration <= 0:
189
- raw_duration = _pick_int(["duration", "duration_sec"], from_source=True)
190
- if raw_duration > 0:
191
- duration_ms = raw_duration * 1000 if raw_duration < 10000 else raw_duration
192
-
193
- return duration_ms
194
-
195
-
196
- def _ensure_sentence_end(text: str) -> str:
197
- if not text:
198
- return text
199
- if text[-1] in "。!?!?" or text.endswith("..."):
200
- return text
201
- return f"{text}。"
202
-
203
-
204
- def _clean_asr_text(raw: str, provided_clean: str) -> str:
205
- """ASR_CLEAN prompt-contracts/asr-clean.md@v1
206
-
207
- Steps:
208
- 1) base select: provided_clean > raw
209
- 2) denoise: remove filler/repetition/whitespace noise
210
- 3) sentence split + punctuation restore
211
- 4) paragraphize: one sentence per line, 2-4 sentences per paragraph
212
- """
213
- base = normalize_text(provided_clean) or normalize_text(raw)
214
- if not base:
215
- return ""
216
-
217
- # step2: 去噪(口头禅/重复)
218
- base = re.sub(r"\b(嗯|啊|呃|额|那个|这个|然后|就是)\b", " ", base)
219
- base = re.sub(r"(嗯+|啊+|呃+)", " ", base)
220
- base = re.sub(r"(就是就是|然后然后|这个这个|那个那个)", " ", base)
221
- base = re.sub(r"\s+", " ", base).strip()
222
-
223
- # step3: 断句 + 句尾标点
224
- units = [normalize_text(part) for part in re.split(r"[。!?!?;;\n]+", base)]
225
- sentences = [_ensure_sentence_end(unit) for unit in units if unit]
226
- if not sentences:
227
- fallback = _ensure_sentence_end(base)
228
- return fallback if fallback else ""
229
-
230
- # step4: 每句一行;每段 2~4 句(默认 3 句)
231
- paragraphs: List[str] = []
232
- bucket: List[str] = []
233
- for sentence in sentences:
234
- bucket.append(sentence)
235
- if len(bucket) >= 3:
236
- paragraphs.append("\n".join(bucket))
237
- bucket = []
238
-
239
- if bucket:
240
- if len(bucket) == 1 and paragraphs:
241
- paragraphs[-1] = f"{paragraphs[-1]}\n{bucket[0]}"
242
- else:
243
- paragraphs.append("\n".join(bucket))
244
-
245
- return "\n\n".join(paragraphs)
246
-
247
-
248
- def _pick_text(payload: Dict[str, Any], keys: List[str], source_keys: Optional[List[str]] = None) -> str:
249
- source = _source_dict(payload)
250
- for key in keys:
251
- text = normalize_text(payload.get(key))
252
- if text:
253
- return text
254
- for key in (source_keys or keys):
255
- text = normalize_text(source.get(key))
256
- if text:
257
- return text
258
- return ""
259
-
260
-
261
- def _extract_platform_work_id(payload: Dict[str, Any]) -> str:
262
- return _pick_text(
263
- payload,
264
- ["platform_work_id", "aweme_id", "note_id", "item_id", "id"],
265
- ["platform_work_id", "aweme_id", "note_id", "item_id", "id"],
266
- )
267
-
268
-
269
- def _extract_author(payload: Dict[str, Any]) -> Dict[str, str]:
270
- author_raw = payload.get("author")
271
- author = author_raw if isinstance(author_raw, dict) else {}
272
-
273
- source = _source_dict(payload)
274
- source_author = source.get("author") if isinstance(source.get("author"), dict) else {}
275
-
276
- author_text = normalize_text(author_raw) if isinstance(author_raw, str) else ""
277
- nickname = (
278
- normalize_text(author.get("nickname"))
279
- or author_text
280
- or normalize_text(source_author.get("nickname"))
281
- )
282
-
283
- author_handle = (
284
- normalize_text(payload.get("author_handle"))
285
- or normalize_text(author.get("author_handle"))
286
- or normalize_text(source_author.get("author_handle"))
287
- or nickname
288
- )
289
- platform_author_id = (
290
- normalize_text(payload.get("platform_author_id"))
291
- or normalize_text(payload.get("author_platform_id"))
292
- or normalize_text(author.get("platform_author_id"))
293
- or normalize_text(author.get("author_platform_id"))
294
- or normalize_text(source_author.get("platform_author_id"))
295
- or normalize_text(source_author.get("author_platform_id"))
296
- )
297
-
298
- xhs_user_id = (
299
- normalize_text(payload.get("xhs_user_id"))
300
- or normalize_text(author.get("xhs_user_id"))
301
- or normalize_text(source_author.get("xhs_user_id"))
302
- )
303
- xhs_sec_token = (
304
- normalize_text(payload.get("xhs_sec_token"))
305
- or normalize_text(author.get("xhs_sec_token"))
306
- or normalize_text(source_author.get("xhs_sec_token"))
307
- )
308
-
309
- douyin_sec_uid = (
310
- normalize_text(payload.get("douyin_sec_uid"))
311
- or normalize_text(author.get("douyin_sec_uid"))
312
- or normalize_text(source_author.get("douyin_sec_uid"))
313
- )
314
- douyin_aweme_author_id = (
315
- normalize_text(payload.get("douyin_aweme_author_id"))
316
- or normalize_text(author.get("douyin_aweme_author_id"))
317
- or normalize_text(source_author.get("douyin_aweme_author_id"))
318
- )
319
-
320
- return {
321
- "nickname": nickname,
322
- "author_handle": author_handle,
323
- "platform_author_id": platform_author_id,
324
- "xhs_user_id": xhs_user_id,
325
- "xhs_sec_token": xhs_sec_token,
326
- "douyin_sec_uid": douyin_sec_uid,
327
- "douyin_aweme_author_id": douyin_aweme_author_id,
328
- }
329
-
330
-
331
- def _is_cjk(char: str) -> bool:
332
- code = ord(char)
333
- return 0x4E00 <= code <= 0x9FFF
334
-
335
-
336
- def _clean_for_filename(text: str) -> str:
337
- if not text:
338
- return ""
339
-
340
- normalized = unicodedata.normalize("NFKC", text)
341
- normalized = re.sub(r"[##][^\s##]+", " ", normalized)
342
- normalized = re.sub(r"\[[^\]]+\]", " ", normalized)
343
- normalized = normalized.replace("\n", " ").replace("\r", " ")
344
-
345
- kept: List[str] = []
346
- for ch in normalized:
347
- cat = unicodedata.category(ch)
348
- if _is_cjk(ch) or ch.isalnum() or ch in {" ", "-", "_"}:
349
- kept.append(ch)
350
- elif cat.startswith("Z"):
351
- kept.append(" ")
352
-
353
- compact = "".join(kept)
354
- compact = re.sub(r"\s+", "", compact)
355
- compact = re.sub(r"[\\/:*?\"<>|]", "", compact)
356
- return compact.strip("._-")
357
-
358
-
359
- def _clip_with_min(text: str, min_len: int, max_len: int, fallback: str) -> str:
360
- candidate = _clean_for_filename(text)
361
- fallback_clean = _clean_for_filename(fallback)
362
-
363
- if not candidate:
364
- candidate = fallback_clean
365
- if len(candidate) < min_len:
366
- candidate = (candidate + fallback_clean)[:max_len]
367
- if len(candidate) < min_len:
368
- candidate = (candidate + "内容速览")[:max_len]
369
-
370
- candidate = candidate[:max_len]
371
- if len(candidate) < min_len:
372
- candidate = (candidate + "作品卡")[:max_len]
373
- return candidate[:max_len] if candidate else fallback_clean[:max_len]
374
-
375
-
376
- def _pick_author_slug(payload: Dict[str, Any], author_hint: Optional[str] = None) -> str:
377
- base = normalize_text(author_hint)
378
- if not base:
379
- author = _extract_author(payload)
380
- base = author["nickname"] or author["author_handle"] or author["platform_author_id"] or "作者"
381
- slug = _clip_with_min(base, min_len=2, max_len=18, fallback="作者")
382
- return slug if len(slug) >= 2 else "作者"
383
-
384
-
385
- def _pick_title_source(payload: Dict[str, Any]) -> str:
386
- for key in ("title", "desc", "summary"):
387
- text = normalize_text(payload.get(key))
388
- if text:
389
- return text
390
-
391
- source = _source_dict(payload)
392
- for key in ("title", "desc"):
393
- text = normalize_text(source.get(key))
394
- if text:
395
- return text
396
-
397
- raw_content = normalize_text(payload.get("raw_content"))
398
- if raw_content:
399
- return raw_content[:48]
400
-
401
- platform_work_id = _extract_platform_work_id(payload)
402
- if platform_work_id:
403
- return f"作品拆解{platform_work_id[-8:]}"
404
-
405
- return "内容拆解速览"
406
-
407
-
408
- def _pick_title_slug(payload: Dict[str, Any]) -> str:
409
- title_source = _pick_title_source(payload)
410
- platform_work_id = _extract_platform_work_id(payload)
411
- fallback = f"内容拆解{platform_work_id[-8:]}" if platform_work_id else "内容拆解速览"
412
- slug = _clip_with_min(title_source, min_len=8, max_len=28, fallback=fallback)
413
- return slug if slug else "内容拆解速览"
414
-
415
-
416
- def _extract_tags(payload: Dict[str, Any]) -> List[str]:
417
- for key in ("tags", "tag_list", "hashtags"):
418
- value = payload.get(key)
419
- if isinstance(value, list):
420
- tags = [normalize_text(item).lstrip("#") for item in value if normalize_text(item)]
421
- if tags:
422
- return list(dict.fromkeys(tags))
423
- if isinstance(value, str) and normalize_text(value):
424
- parts = re.split(r"[,,\s]+", normalize_text(value))
425
- tags = [part.lstrip("#") for part in parts if part]
426
- if tags:
427
- return list(dict.fromkeys(tags))
428
-
429
- source = _source_dict(payload)
430
- for key in ("tags", "tag_list", "hashtags"):
431
- value = source.get(key)
432
- if isinstance(value, list):
433
- tags = [normalize_text(item).lstrip("#") for item in value if normalize_text(item)]
434
- if tags:
435
- return list(dict.fromkeys(tags))
436
-
437
- return []
438
-
439
-
440
- def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str, Any]:
441
- author = _extract_author(payload)
442
-
443
- title = _pick_text(payload, ["title", "desc"], ["title", "desc"])
444
- caption_raw = normalize_text(payload.get("caption_raw") or payload.get("desc"))
445
- platform_work_id = _extract_platform_work_id(payload)
446
-
447
- source_url = _pick_text(
448
- payload,
449
- ["source_url", "share_url", "url"],
450
- ["source_url", "share_url", "url", "share_text"],
451
- )
452
- share_url = _pick_text(
453
- payload,
454
- ["share_url", "canonical_share_url"],
455
- ["share_url", "canonical_share_url", "url", "source_url", "share_text"],
456
- ) or source_url
457
-
458
- cover_image = _pick_text(
459
- payload,
460
- ["cover_image", "cover_url", "cover"],
461
- ["cover_image", "cover_url", "cover", "origin_cover"],
462
- )
463
- selected_images = payload.get("selected_image_urls")
464
- if not cover_image and isinstance(selected_images, list) and selected_images:
465
- cover_image = normalize_text(selected_images[0])
466
-
467
- video_download_url = _pick_text(
468
- payload,
469
- ["video_download_url", "video_down_url", "selected_video_url", "original_video_url", "video_url", "download_url"],
470
- ["video_download_url", "video_down_url", "selected_video_url", "original_video_url", "video_url", "download_url"],
471
- )
472
-
473
- create_time_sec = _to_unix_sec(payload.get("create_time_sec"))
474
- if create_time_sec <= 0:
475
- create_time_sec = _to_unix_sec(payload.get("create_time"))
476
- if create_time_sec <= 0:
477
- create_time_sec = _to_unix_sec(_source_dict(payload).get("create_time"))
478
-
479
- digg_count = _safe_int(payload.get("digg_count"), default=0)
480
- comment_count = _safe_int(payload.get("comment_count"), default=0)
481
- collect_count = _safe_int(payload.get("collect_count"), default=0)
482
- share_count = _safe_int(payload.get("share_count"), default=0)
483
- play_count = _safe_optional_int(payload.get("play_count"))
484
-
485
- summary = normalize_text(payload.get("summary"))
486
- raw_content = normalize_text(payload.get("raw_content"))
487
- primary_text = normalize_text(payload.get("primary_text"))
488
- provided_asr_clean = normalize_text(payload.get("asr_clean"))
489
- asr_clean = _clean_asr_text(raw_content, provided_asr_clean)
490
-
491
- duration_ms = _extract_duration_ms(payload)
492
-
493
- category = normalize_text(payload.get("category"))
494
- if not category:
495
- category = "观点"
496
-
497
- hot_score = _safe_int(payload.get("hot_score"), default=0)
498
- if hot_score <= 0:
499
- hot_score = digg_count + comment_count * 2 + collect_count * 3 + share_count * 4
500
-
501
- work_modality = normalize_text(payload.get("work_modality"))
502
- if not work_modality:
503
- work_modality = "video" if video_download_url or raw_content else "text"
504
-
505
- published_date = _resolve_published_date(payload, create_time_sec)
506
- primary_text_source_raw = normalize_text(payload.get("primary_text_source"))
507
- primary_text_source = (
508
- primary_text_source_raw
509
- if primary_text_source_raw in {"asr_clean", "caption_raw"}
510
- else ("asr_clean" if work_modality == "video" else "caption_raw")
511
- )
512
- if not primary_text:
513
- primary_text = asr_clean if primary_text_source == "asr_clean" else normalize_text(payload.get("desc"))
514
-
515
- return {
516
- "title": title,
517
- "platform": platform,
518
- "platform_work_id": platform_work_id,
519
- "author": author.get("nickname") or "",
520
- "author_handle": author.get("author_handle") or "",
521
- "platform_author_id": author.get("platform_author_id") or "",
522
- "caption_raw": caption_raw,
523
- "share_url": share_url,
524
- "source_url": source_url,
525
- "cover_image": cover_image,
526
- "video_download_url": video_download_url,
527
- "published_date": published_date,
528
- "duration_ms": duration_ms,
529
- "digg_count": digg_count,
530
- "comment_count": comment_count,
531
- "collect_count": collect_count,
532
- "share_count": share_count,
533
- "play_count": play_count,
534
- "tags": _extract_tags(payload),
535
- "work_modality": work_modality,
536
- "category": category,
537
- "content_kind": normalize_text(payload.get("content_kind")),
538
- "summary": summary,
539
- "hot_score": hot_score,
540
- "raw_content": raw_content,
541
- "primary_text": primary_text,
542
- "asr_clean": asr_clean,
543
- "platform_native_refs": payload.get("platform_native_refs") if isinstance(payload.get("platform_native_refs"), dict) else {},
544
- "request_id": payload.get("request_id"),
545
- "confidence": normalize_text(payload.get("confidence")) or "low",
546
- "error_reason": payload.get("error_reason"),
547
- "extract_trace": payload.get("extract_trace", []),
548
- "analysis_sections": payload.get("analysis_sections") if isinstance(payload.get("analysis_sections"), dict) else {},
549
- "analysis_output": payload.get("analysis_output") if isinstance(payload.get("analysis_output"), dict) else {},
550
- "author_analysis_v2": payload.get("author_analysis_v2") if isinstance(payload.get("author_analysis_v2"), dict) else {},
551
- "author_analysis_input_v1": payload.get("author_analysis_input_v1") if isinstance(payload.get("author_analysis_input_v1"), dict) else {},
552
- "sampled_work_explanations": payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {},
553
- "author_card_highlights": payload.get("author_card_highlights") if isinstance(payload.get("author_card_highlights"), dict) else {},
554
- "validation": payload.get("validation") if isinstance(payload.get("validation"), dict) else {},
555
- "business_score": _safe_int(payload.get("business_score"), default=0),
556
- "benchmark_gap_score": _safe_int(payload.get("benchmark_gap_score"), default=0),
557
- "style_radar": payload.get("style_radar") if isinstance(payload.get("style_radar"), dict) else {},
558
- "core_contradictions": payload.get("core_contradictions") if isinstance(payload.get("core_contradictions"), list) else [],
559
- "recommendations": payload.get("recommendations") if isinstance(payload.get("recommendations"), list) else [],
560
- "business_analysis": normalize_text(payload.get("business_analysis")),
561
- "benchmark_analysis": normalize_text(payload.get("benchmark_analysis")),
562
- "nickname": normalize_text(payload.get("nickname")),
563
- "ip_location": normalize_text(payload.get("ip_location")),
564
- "signature": normalize_text(payload.get("signature")),
565
- "avatar_url": normalize_text(payload.get("avatar_url")),
566
- "fans_count": _safe_optional_int(payload.get("fans_count")),
567
- "liked_count": _safe_optional_int(payload.get("liked_count")),
568
- "collected_count": _safe_optional_int(payload.get("collected_count")),
569
- "works_count": _safe_optional_int(payload.get("works_count")),
570
- "verified": payload.get("verified") if isinstance(payload.get("verified"), bool) else None,
571
- "snapshot_at": normalize_text(payload.get("snapshot_at")),
572
- }
573
-
574
-
575
- def _format_create_time(create_time_sec: int) -> str:
576
- text = _format_shanghai_datetime(create_time_sec)
577
- if text:
578
- return text
579
- if create_time_sec <= 0:
580
- return "未知"
581
- return str(create_time_sec)
582
-
583
-
584
- def _format_duration(duration_ms: int) -> str:
585
- if duration_ms <= 0:
586
- return "未知"
587
- total_sec = duration_ms // 1000
588
- minute, second = divmod(total_sec, 60)
589
- if minute:
590
- return f"{minute}分{second:02d}秒"
591
- return f"{second}秒"
592
-
593
-
594
- def _sentence_units(text: str) -> List[str]:
595
- if not text:
596
- return []
597
- return [normalize_text(x) for x in re.split(r"[。!?!?;;\\n]+", text) if normalize_text(x)]
598
-
599
-
600
- def _first_sentence(text: str) -> str:
601
- units = _sentence_units(text)
602
- return units[0] if units else ""
603
-
604
-
605
- def _hit_count(text: str, keywords: List[str]) -> int:
606
- if not text:
607
- return 0
608
- return sum(1 for token in keywords if token in text)
609
-
610
-
611
- def _top_keywords(text: str, candidates: List[str], topn: int = 3) -> List[str]:
612
- if not text:
613
- return []
614
- scored = []
615
- for token in candidates:
616
- count = text.count(token)
617
- if count > 0:
618
- scored.append((count, token))
619
- scored.sort(key=lambda x: (-x[0], len(x[1])))
620
- return [token for _, token in scored[:topn]]
621
-
622
-
623
- def _score_from_hits(hits: int, full_score_hits: int = 4) -> int:
624
- if hits <= 0:
625
- return 2
626
- if hits >= full_score_hits:
627
- return 5
628
- return min(5, hits + 2)
629
-
630
-
631
-
632
- def _analyze_topic(fields: Dict[str, Any]) -> Dict[str, Any]:
633
- title = normalize_text(fields.get("title") or "")
634
- asr = normalize_text(fields.get("asr_clean") or "")
635
- category = normalize_text(fields.get("category") or "")
636
- text = f"{title} {asr}"
637
-
638
- if not text.strip():
639
- return {
640
- "score": 2,
641
- "lines": ["- 类型:数据不足。", "- 细分主题:数据不足。", "- 受众痛点:数据不足,需补充标题或ASR。"],
642
- "gaps": ["补齐标题或ASR文本,才能完成选题分类与主题归因"],
643
- "evidence": "输入文本缺失",
644
- }
645
-
646
- type_rules = {
647
- "流量型": ["热点", "挑战", "反转", "揭秘", "真相", "别再", "为什么", "踩坑", "3秒", "爆款"],
648
- "人设型": ["我是", "我们", "日常", "系列", "分享", "经历", "成长", "复盘", "带你", "我"],
649
- "营销型": ["领取", "私信", "咨询", "下单", "课程", "优惠", "链接", "报名", "合作", "购买"],
650
- }
651
- type_scores = {name: _hit_count(text, kws) for name, kws in type_rules.items()}
652
-
653
- if category in ["教程", "知识", "方法"]:
654
- type_scores["营销型"] += 1
655
- if category in ["观点", "人设", "日常"]:
656
- type_scores["人设型"] += 1
657
-
658
- main_type = max(type_scores, key=lambda k: type_scores[k])
659
- main_hits = type_scores[main_type]
660
-
661
- theme_candidates = [
662
- "AI", "智能体", "变现", "副业", "教程", "工作流", "流量", "涨粉", "投流", "口播", "脚本", "工具", "私域", "创业", "营销",
663
- ]
664
- themes = _top_keywords(text, theme_candidates, topn=3)
665
- pain_candidates = ["不会", "焦虑", "卡住", "没流量", "转化", "不会写", "不会做", "时间不够", "担心", "风险"]
666
- pains = _top_keywords(text, pain_candidates, topn=2)
667
-
668
- lines = [
669
- f"- 基础类型:{main_type}(命中信号 {main_hits} 个)。",
670
- f"- 细分主题:{'、'.join(themes) if themes else '数据不足(未检测到显著主题词)'}。",
671
- f"- 受众痛点:{'、'.join(pains) if pains else '以“快速落地/降低门槛”为主(显性痛点词不足)'}。",
672
- ]
673
-
674
- return {
675
- "score": _score_from_hits(main_hits),
676
- "lines": lines,
677
- "gaps": [] if themes else ["补充更完整ASR,提高细分主题识别稳定性"],
678
- "evidence": f"类型命中分布={type_scores}",
679
- }
680
-
681
-
682
- def _analyze_style(fields: Dict[str, Any]) -> Dict[str, Any]:
683
- asr = normalize_text(fields.get("asr_clean") or "")
684
- title = normalize_text(fields.get("title") or "")
685
- text = f"{title} {asr}".strip()
686
- units = _sentence_units(asr)
687
-
688
- if not text:
689
- return {
690
- "score": 2,
691
- "lines": ["- 人设匹配:数据不足。", "- 句式结构:数据不足。", "- 语气与情绪:数据不足。"],
692
- "gaps": ["补齐ASR文本后再做文风拆解"],
693
- "evidence": "输入文本缺失",
694
- }
695
-
696
- avg_len = int(sum(len(u) for u in units) / max(1, len(units))) if units else 0
697
- if avg_len <= 14:
698
- length_type = "短句为主"
699
- elif avg_len <= 24:
700
- length_type = "中短句混合"
701
- else:
702
- length_type = "中长句为主"
703
-
704
- q_count = text.count("?") + text.count("?")
705
- e_count = text.count("!") + text.count("!")
706
- statement_count = max(0, len(units) - q_count - e_count)
707
- persona_hits = _hit_count(text, ["我", "我们", "你", "大家", "朋友们", "聪明的你"])
708
- rhetoric_hits = _hit_count(text, ["不是", "而是", "其实", "真的", "一定", "必须", "先", "再"])
709
-
710
- lines = [
711
- f"- 句式结构:{length_type},平均句长约 {avg_len} 字。",
712
- f"- 语气分布:疑问 {q_count} / 感叹 {e_count} / 陈述 {statement_count}。",
713
- f"- 人设与修辞:人设代词命中 {persona_hits} 次,强调/转折词命中 {rhetoric_hits} 次。",
714
- ]
715
-
716
- strength_hits = int(avg_len > 0) + int(persona_hits > 0) + int(rhetoric_hits > 0)
717
- return {
718
- "score": _score_from_hits(strength_hits, full_score_hits=3),
719
- "lines": lines,
720
- "gaps": [] if units else ["ASR分句失败,建议人工复核"],
721
- "evidence": f"avg_len={avg_len}, persona_hits={persona_hits}, rhetoric_hits={rhetoric_hits}",
722
- }
723
-
724
-
725
- def _analyze_hook(fields: Dict[str, Any]) -> Dict[str, Any]:
726
- title = normalize_text(fields.get("title") or "")
727
- asr = normalize_text(fields.get("asr_clean") or "")
728
- first = _first_sentence(asr) or title
729
- middle = _sentence_units(asr)[len(_sentence_units(asr)) // 2] if _sentence_units(asr) else ""
730
-
731
- if not first:
732
- return {
733
- "score": 2,
734
- "lines": ["- 开头钩子:数据不足。", "- 中段钩子:数据不足。", "- 结尾钩子:数据不足。"],
735
- "gaps": ["缺少标题与ASR,无法提取钩子原话"],
736
- "evidence": "开头句缺失",
737
- }
738
-
739
- hook_type = "陈述式"
740
- if any(k in first for k in ["?", "?", "为什么", "怎么"]):
741
- hook_type = "疑问式"
742
- elif any(k in first for k in ["别再", "误区", "真相", "不是"]):
743
- hook_type = "反常识式"
744
- elif any(k in first for k in ["当你", "如果", "今天"]):
745
- hook_type = "场景代入式"
746
-
747
- end_candidates = [u for u in _sentence_units(asr) if _hit_count(u, ["关注", "评论", "私信", "收藏", "转发", "下次见", "领取"]) > 0]
748
- end = end_candidates[-1] if end_candidates else "未检测到明确结尾钩子"
749
-
750
- lines = [
751
- f"- 开头钩子({hook_type}):{first}",
752
- f"- 中段钩子:{middle or '数据不足(中段文本不足)'}",
753
- f"- 结尾钩子:{end}",
754
- ]
755
-
756
- hook_hits = int(first != "") + int(bool(middle)) + int(end != "未检测到明确结尾钩子")
757
- return {
758
- "score": _score_from_hits(hook_hits, full_score_hits=3),
759
- "lines": lines,
760
- "gaps": [] if hook_hits >= 2 else ["建议补充中段转折钩子与结尾动作钩子"],
761
- "evidence": f"hook_type={hook_type}, hook_hits={hook_hits}",
762
- }
763
-
764
-
765
- def _analyze_structure(fields: Dict[str, Any]) -> Dict[str, Any]:
766
- asr = normalize_text(fields.get("asr_clean") or "")
767
- units = _sentence_units(asr)
768
- if not units:
769
- return {
770
- "score": 2,
771
- "lines": ["- 结构标签:数据不足。", "- 模板判定:数据不足。"],
772
- "gaps": ["补充ASR后再进行结构标注"],
773
- "evidence": "分句为空",
774
- }
775
-
776
- label_rules = {
777
- "钩子": ["?", "?", "为什么", "怎么", "别再", "真相", "当你", "如果"],
778
- "冲突": ["但是", "却", "问题", "误区", "卡住", "焦虑", "失败"],
779
- "转折": ["所以", "于是", "然后", "接着", "这时候", "其实"],
780
- "举证": ["数据", "案例", "比如", "步骤", "第一", "第二", "第三"],
781
- "CTA": ["评论", "关注", "私信", "收藏", "转发", "点击", "领取", "报名"],
782
- }
783
- coverage = {k: 0 for k in label_rules}
784
- for sent in units:
785
- for label, kws in label_rules.items():
786
- if any(kw in sent for kw in kws):
787
- coverage[label] += 1
788
-
789
- present = [k for k, v in coverage.items() if v > 0]
790
- missing = [k for k, v in coverage.items() if v == 0]
791
- template = "钩子→冲突→转折→举证→CTA" if len(present) >= 4 else "钩子→观点→补充说明"
792
-
793
- lines = [
794
- f"- 结构标签覆盖:{', '.join([f'{k}:{v}' for k, v in coverage.items()])}。",
795
- f"- 模板判定:{template}。",
796
- f"- 缺失模块:{'、'.join(missing) if missing else '无'}。",
797
- ]
798
-
799
- return {
800
- "score": _score_from_hits(len(present), full_score_hits=5),
801
- "lines": lines,
802
- "gaps": [f"优先补齐结构模块:{'、'.join(missing)}"] if missing else [],
803
- "evidence": f"coverage={coverage}",
804
- }
805
-
806
-
807
- def _analyze_cta(fields: Dict[str, Any]) -> Dict[str, Any]:
808
- asr = normalize_text(fields.get("asr_clean") or "")
809
- units = _sentence_units(asr)
810
- cta_tokens = ["评论", "关注", "私信", "收藏", "转发", "点击", "领取", "报名", "下单", "咨询", "试试"]
811
- cta_sentences = [u for u in units if any(token in u for token in cta_tokens)]
812
-
813
- if not units:
814
- return {
815
- "score": 2,
816
- "lines": ["- CTA策略:数据不足。", "- 行动指令:数据不足。"],
817
- "gaps": ["缺少ASR,无法识别CTA"],
818
- "evidence": "分句为空",
819
- }
820
-
821
- if not cta_sentences:
822
- return {
823
- "score": 2,
824
- "lines": ["- CTA策略:未检测到明确行动号召。", "- 行动指令:建议补一句“评论区/私信领取”。"],
825
- "gaps": ["补充单一明确CTA,避免只有信息陈述"],
826
- "evidence": "cta_sentences=0",
827
- }
828
-
829
- primary_cta = cta_sentences[-1]
830
- cta_types = []
831
- if any(k in asr for k in ["评论", "点赞", "收藏", "转发", "关注"]):
832
- cta_types.append("互动型")
833
- if any(k in asr for k in ["私信", "领取", "链接", "资料"]):
834
- cta_types.append("线索型")
835
- if any(k in asr for k in ["下单", "报名", "咨询", "购买"]):
836
- cta_types.append("转化型")
837
-
838
- lines = [
839
- f"- CTA类型:{'、'.join(cta_types) if cta_types else '互动型(弱)'}。",
840
- f"- 关键动作句:{primary_cta}",
841
- f"- CTA密度:{len(cta_sentences)}/{len(units)} 句。",
842
- ]
843
-
844
- return {
845
- "score": _score_from_hits(len(cta_types) + int(len(cta_sentences) > 0), full_score_hits=3),
846
- "lines": lines,
847
- "gaps": [] if len(cta_types) > 0 else ["补充线索型或转化型CTA,提高商业闭环"],
848
- "evidence": f"cta_types={cta_types}, cta_count={len(cta_sentences)}",
849
- }
850
-
851
-
852
- def _build_summary_module(results: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
853
- ordered = ["选题", "文风", "Hook", "结构", "CTA"]
854
- scored = [(name, results[name]["score"]) for name in ordered]
855
- avg_score = round(sum(score for _, score in scored) / max(1, len(scored)), 2)
856
- weakest = sorted(scored, key=lambda x: x[1])[:2]
857
-
858
- if avg_score >= 4.2:
859
- verdict = "可直接复用"
860
- elif avg_score >= 3.4:
861
- verdict = "可用,但需小幅优化"
862
- else:
863
- verdict = "需重写关键模块后再投放"
864
-
865
- suggestions = []
866
- for name, _ in weakest:
867
- gaps = results[name].get("gaps") or []
868
- if gaps:
869
- suggestions.append(f"- [{name}] {gaps[0]}")
870
- if not suggestions:
871
- suggestions = ["- 保持当前结构,持续做A/B测试验证Hook与CTA。"]
872
-
873
- return {
874
- "score": int(round(avg_score)),
875
- "lines": [
876
- f"- 结论:综合评分 {avg_score}/5,判定为“{verdict}”。",
877
- "- 建议:",
878
- *suggestions[:3],
879
- ],
880
- "gaps": [],
881
- "evidence": f"scores={dict(scored)}",
882
- }
883
-
884
-
885
- def _insight_metric_snapshot(fields: Dict[str, Any]) -> Dict[str, Any]:
886
- digg = _safe_int(fields.get("digg_count"), default=0)
887
- comment = _safe_int(fields.get("comment_count"), default=0)
888
- collect = _safe_int(fields.get("collect_count"), default=0)
889
- share = _safe_int(fields.get("share_count"), default=0)
890
- play = _safe_int(fields.get("play_count"), default=0)
891
-
892
- interaction = digg + comment * 2 + collect * 3 + share * 4
893
- interaction_rate = interaction / play if play > 0 else 0.0
894
- return {
895
- "interaction": interaction,
896
- "interaction_rate": interaction_rate,
897
- "digg": digg,
898
- "comment": comment,
899
- "collect": collect,
900
- "share": share,
901
- }
902
-
903
-
904
- def _build_local_analysis_sections(fields: Dict[str, Any]) -> Dict[str, Any]:
905
- topic = _analyze_topic(fields)
906
- style = _analyze_style(fields)
907
- hook = _analyze_hook(fields)
908
- structure = _analyze_structure(fields)
909
- cta = _analyze_cta(fields)
910
- summary = _build_summary_module(
911
- {
912
- "选题": topic,
913
- "文风": style,
914
- "Hook": hook,
915
- "结构": structure,
916
- "CTA": cta,
917
- }
918
- )
919
- metrics = _insight_metric_snapshot(fields)
920
- insight_lines = list(summary.get("lines") or [])
921
- insight_lines.extend(
922
- [
923
- f"- 互动折算值:{metrics.get('interaction', 0)}。",
924
- f"- 粗略互动率:{metrics.get('interaction_rate', 0.0):.4f}。",
925
- ]
926
- )
927
- return {
928
- "modules": {
929
- "选题": topic.get("lines", ["数据不足"]),
930
- "文风": style.get("lines", ["数据不足"]),
931
- "Hook": hook.get("lines", ["数据不足"]),
932
- "结构": structure.get("lines", ["数据不足"]),
933
- },
934
- "insight": insight_lines or ["数据不足"],
935
- }
936
-
937
-
938
- def build_card_analysis_artifact(
939
- *,
940
- payload: Dict[str, Any],
941
- platform: str,
942
- card_type: str,
943
- ) -> Dict[str, Any]:
944
- fields = _extract_required_fields(payload, platform=platform)
945
- precomputed = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
946
- if precomputed:
947
- analysis_sections = precomputed
948
- elif card_type == "author":
949
- analysis_sections = {}
950
- else:
951
- analysis_sections = build_analysis_sections(fields)
952
- fields["analysis_sections"] = analysis_sections
953
- return {
954
- "fields": fields,
955
- "analysis_sections": analysis_sections,
956
- }
957
-
958
-
959
- def _build_output_path(
960
- *,
961
- card_root: str,
962
- platform: str,
963
- card_type: str,
964
- payload: Dict[str, Any],
965
- now: dt.datetime,
966
- sample_author: Optional[str],
967
- storage_config: Optional[Dict[str, Any]],
968
- ) -> Dict[str, str]:
969
- author_slug = _pick_author_slug(payload, author_hint=sample_author)
970
- title_slug = _pick_title_slug(payload)
971
-
972
- path, route_parts = build_card_output_path(
973
- card_root=card_root,
974
- platform=platform,
975
- card_type=card_type,
976
- author_slug=author_slug,
977
- title_slug=title_slug,
978
- year=now.strftime("%Y"),
979
- year_month=now.strftime("%Y-%m"),
980
- timestamp=now.strftime("%Y%m%d-%H%M%S"),
981
- storage_config=storage_config,
982
- )
983
- return {
984
- "path": path,
985
- "route_parts": route_parts,
986
- "author_slug": author_slug,
987
- "title_slug": title_slug,
988
- "target_type": card_type,
989
- }
990
-
991
-
992
- def _render_author_markdown(
993
- *,
994
- card_id: str,
995
- card_type: str,
996
- fields: Dict[str, Any],
997
- generated_at: str,
998
- ) -> str:
999
- analysis_output = fields.get("analysis_output") if isinstance(fields.get("analysis_output"), dict) else {}
1000
- author_analysis_v2 = fields.get("author_analysis_v2") if isinstance(fields.get("author_analysis_v2"), dict) else analysis_output.get("author_analysis_v2", {})
1001
- if not isinstance(author_analysis_v2, dict):
1002
- author_analysis_v2 = {}
1003
- sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
1004
- if not isinstance(sampled_work_explanations, dict):
1005
- sampled_work_explanations = {}
1006
- author_card_highlights = fields.get("author_card_highlights") if isinstance(fields.get("author_card_highlights"), dict) else {}
1007
- if not isinstance(author_card_highlights, dict):
1008
- author_card_highlights = {}
1009
- validation = fields.get("validation") if isinstance(fields.get("validation"), dict) else analysis_output.get("validation", {})
1010
- if not isinstance(validation, dict):
1011
- validation = {}
1012
-
1013
- business_score = _safe_int(fields.get("business_score"), default=_safe_int(analysis_output.get("business_score"), default=0))
1014
- benchmark_gap_score = _safe_int(fields.get("benchmark_gap_score"), default=_safe_int(analysis_output.get("benchmark_gap_score"), default=0))
1015
- style_radar = fields.get("style_radar") if isinstance(fields.get("style_radar"), dict) else analysis_output.get("style_radar", {})
1016
- if not isinstance(style_radar, dict):
1017
- style_radar = {}
1018
-
1019
- core_contradictions = fields.get("core_contradictions") if isinstance(fields.get("core_contradictions"), list) else analysis_output.get("core_contradictions", [])
1020
- if not isinstance(core_contradictions, list):
1021
- core_contradictions = []
1022
-
1023
- recommendations = fields.get("recommendations") if isinstance(fields.get("recommendations"), list) else analysis_output.get("recommendations", [])
1024
- if not isinstance(recommendations, list):
1025
- recommendations = []
1026
-
1027
- business_analysis = normalize_text(fields.get("business_analysis")) or normalize_text(analysis_output.get("business_analysis"))
1028
- benchmark_analysis = normalize_text(fields.get("benchmark_analysis")) or normalize_text(analysis_output.get("benchmark_analysis"))
1029
- author_portrait = normalize_text(author_card_highlights.get("one_liner")) or normalize_text(fields.get("summary")) or normalize_text(analysis_output.get("author_portrait"))
1030
-
1031
- fm = {
1032
- "card_id": card_id,
1033
- "card_type": card_type,
1034
- "platform": fields.get("platform"),
1035
- "generated_at": generated_at,
1036
- "updated_at": generated_at,
1037
- "title": fields.get("title"),
1038
- "platform_work_id": fields.get("platform_work_id"),
1039
- "author": fields.get("author"),
1040
- "author_handle": fields.get("author_handle"),
1041
- "platform_author_id": fields.get("platform_author_id"),
1042
- "nickname": fields.get("nickname"),
1043
- "ip_location": fields.get("ip_location"),
1044
- "avatar_url": fields.get("avatar_url"),
1045
- "signature": fields.get("signature"),
1046
- "fans_count": fields.get("fans_count"),
1047
- "liked_count": fields.get("liked_count"),
1048
- "collected_count": fields.get("collected_count"),
1049
- "works_count": fields.get("works_count"),
1050
- "verified": fields.get("verified"),
1051
- "snapshot_at": fields.get("snapshot_at"),
1052
- "business_score": business_score,
1053
- "benchmark_gap_score": benchmark_gap_score,
1054
- "request_id": fields.get("request_id"),
1055
- }
1056
-
1057
- frontmatter = ["---"]
1058
- for key, value in fm.items():
1059
- frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1060
- frontmatter.append("---")
1061
-
1062
- lines = [
1063
- *frontmatter,
1064
- "",
1065
- "## 基础事实",
1066
- f"- 平台:{fields.get('platform') or '未知'}",
1067
- f"- 作者ID:{fields.get('platform_author_id') or '未知'}",
1068
- f"- 账号标识:{fields.get('author_handle') or 'N/A'}",
1069
- f"- 昵称:{fields.get('nickname') or fields.get('author') or '未知'}",
1070
- f"- IP属地:{fields.get('ip_location') or 'N/A'}",
1071
- f"- 签名:{fields.get('signature') or 'N/A'}",
1072
- f"- 头像:{fields.get('avatar_url') or 'N/A'}",
1073
- f"- 粉丝数:{_display_metric(fields.get('fans_count'))}",
1074
- f"- 累计获赞:{_display_metric(fields.get('liked_count'))}",
1075
- f"- 累计收藏:{_display_metric(fields.get('collected_count'))}",
1076
- f"- 作品数:{_display_metric(fields.get('works_count'))}",
1077
- f"- 认证状态:{'是' if fields.get('verified') else '否'}" if fields.get('verified') is not None else "- 认证状态:N/A",
1078
- f"- 抓取时间:{fields.get('snapshot_at') or 'N/A'}",
1079
- "",
1080
- "## 作者画像",
1081
- author_portrait or "数据不足",
1082
- "",
1083
- "## 主页摘要卡",
1084
- f"- 核心价值:{normalize_text(author_card_highlights.get('core_value_proposition')) or '数据不足'}",
1085
- f"- 主要信任源:{normalize_text(author_card_highlights.get('primary_trust_source')) or '数据不足'}",
1086
- f"- 胜率结构:{('、'.join([normalize_text(x) for x in author_card_highlights.get('winning_content_structures', []) if normalize_text(x)])) or '数据不足'}",
1087
- f"- 可能产品:{('、'.join([normalize_text(x) for x in author_card_highlights.get('likely_products', []) if normalize_text(x)])) or '证据不足'}",
1088
- f"- 最大张力:{normalize_text(author_card_highlights.get('most_important_tension')) or '数据不足'}",
1089
- f"- 只学一件事:{normalize_text(author_card_highlights.get('if_only_learn_one_thing')) or '数据不足'}",
1090
- "",
1091
- "## 商业分析",
1092
- business_analysis or "数据不足",
1093
- "",
1094
- "## 对标分析",
1095
- benchmark_analysis or "数据不足",
1096
- "",
1097
- "## 评分",
1098
- f"- business_score: {business_score}",
1099
- f"- benchmark_gap_score: {benchmark_gap_score}",
1100
- "",
1101
- "## 风格雷达",
1102
- "```json",
1103
- json.dumps(style_radar, ensure_ascii=False, indent=2),
1104
- "```",
1105
- "",
1106
- "## 核心矛盾",
1107
- ]
1108
-
1109
- if core_contradictions:
1110
- lines.extend([f"- {normalize_text(item)}" for item in core_contradictions if normalize_text(item)])
1111
- else:
1112
- lines.append("- 数据不足")
1113
-
1114
- lines.extend(["", "## 建议动作"])
1115
- if recommendations:
1116
- lines.extend([f"- {normalize_text(item)}" for item in recommendations if normalize_text(item)])
1117
- else:
1118
- lines.append("- 数据不足")
1119
-
1120
- lines.extend(
1121
- [
1122
- "",
1123
- "## author_analysis_v2",
1124
- "```json",
1125
- json.dumps(author_analysis_v2, ensure_ascii=False, indent=2),
1126
- "```",
1127
- "",
1128
- "## sampled_work_explanations",
1129
- "```json",
1130
- json.dumps(sampled_work_explanations, ensure_ascii=False, indent=2),
1131
- "```",
1132
- "",
1133
- "## 校验",
1134
- f"- validation_ok: {bool(validation.get('ok'))}",
1135
- f"- validation_error_count: {len(validation.get('errors') or [])}",
1136
- "",
1137
- "## 附录",
1138
- f"- confidence: {fields.get('confidence')}",
1139
- f"- error_reason: {fields.get('error_reason')}",
1140
- "",
1141
- "```json",
1142
- json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2),
1143
- "```",
1144
- "",
1145
- ]
1146
- )
1147
- return "\n".join(lines)
1148
-
1149
-
1150
- def _render_markdown(
1151
- *,
1152
- card_id: str,
1153
- card_type: str,
1154
- fields: Dict[str, Any],
1155
- generated_at: str,
1156
- ) -> str:
1157
- if card_type == "author":
1158
- return _render_author_markdown(
1159
- card_id=card_id,
1160
- card_type=card_type,
1161
- fields=fields,
1162
- generated_at=generated_at,
1163
- )
1164
- author_name = fields.get("author") or fields.get("author_handle") or fields.get("platform_author_id") or "未知作者"
1165
- title = fields.get("title") or "(标题缺失)"
1166
- metrics_line = (
1167
- f"赞 {_display_metric(fields.get('digg_count'))} / 评 {_display_metric(fields.get('comment_count'))} / "
1168
- f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
1169
- )
1170
- precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1171
- if precomputed_sections:
1172
- analysis_sections = precomputed_sections
1173
- else:
1174
- analysis_sections = {} if card_type == "author_sample_work" else build_analysis_sections(fields)
1175
- creative_modules = analysis_sections.get("modules", {})
1176
- insight_lines = analysis_sections.get("insight", ["数据不足"])
1177
- extract_trace_json = json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2)
1178
-
1179
- fm = {
1180
- "card_id": card_id,
1181
- "card_type": card_type,
1182
- "platform": fields.get("platform"),
1183
- "generated_at": generated_at,
1184
- "updated_at": generated_at,
1185
- "title": fields.get("title"),
1186
- "platform_work_id": fields.get("platform_work_id"),
1187
- "author": fields.get("author"),
1188
- "author_handle": fields.get("author_handle"),
1189
- "platform_author_id": fields.get("platform_author_id"),
1190
- "caption_raw": fields.get("caption_raw"),
1191
- "primary_text": fields.get("primary_text"),
1192
- "share_url": fields.get("share_url"),
1193
- "source_url": fields.get("source_url"),
1194
- "cover_image": fields.get("cover_image"),
1195
- "video_download_url": fields.get("video_download_url"),
1196
- "published_date": fields.get("published_date"),
1197
- "duration_ms": fields.get("duration_ms"),
1198
- "digg_count": fields.get("digg_count"),
1199
- "comment_count": fields.get("comment_count"),
1200
- "collect_count": fields.get("collect_count"),
1201
- "share_count": fields.get("share_count"),
1202
- "play_count": fields.get("play_count"),
1203
- "tags": fields.get("tags", []),
1204
- "work_modality": fields.get("work_modality"),
1205
- }
1206
-
1207
- frontmatter = ["---"]
1208
- for key, value in fm.items():
1209
- frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1210
- frontmatter.append("---")
1211
-
1212
- lines = [
1213
- *frontmatter,
1214
- "",
1215
- "## 基础信息",
1216
- f"- 作者:{author_name}",
1217
- f"- 标题:{title}",
1218
- f"- 原始文案:{fields.get('caption_raw') or 'N/A'}",
1219
- f"- 作品模态:{fields.get('work_modality') or '未知'}",
1220
- f"- 发布时间:{fields.get('published_date') or 'N/A'}",
1221
- f"- {'视频时长' if fields.get('work_modality') == 'video' else '阅读载体'}:{_format_duration(fields.get('duration_ms', 0)) if fields.get('work_modality') == 'video' else '文本'}",
1222
- f"- 互动:{metrics_line}",
1223
- f"- 链接:{fields.get('share_url') or '(未提供)'}",
1224
- f"- 下载链接:{fields.get('video_download_url') or 'N/A'}" if fields.get("work_modality") == "video" else "- 下载链接:N/A",
1225
- ]
1226
-
1227
- for heading in DEFAULT_MODULE_SECTIONS:
1228
- lines.append("")
1229
- lines.append(f"## {heading}")
1230
- for item in creative_modules.get(heading, ["数据不足"]):
1231
- lines.append(item)
1232
-
1233
- lines.append("")
1234
- lines.append("## 洞察分析")
1235
- for item in insight_lines:
1236
- lines.append(item)
1237
-
1238
- transcript_heading = "## 主文本"
1239
- transcript_body = fields.get("primary_text")
1240
- transcript_fallback = "(无可用主文本)"
1241
-
1242
- lines.extend(
1243
- [
1244
- "",
1245
- transcript_heading,
1246
- transcript_body or transcript_fallback,
1247
- ]
1248
- )
1249
-
1250
- lines.extend(
1251
- [
1252
- "",
1253
- "## 附录",
1254
- "### ASR_RAW",
1255
- fields.get("raw_content") or "(无可用 ASR 原文)",
1256
- "",
1257
- "### trace",
1258
- f"- request_id: {fields.get('request_id')}",
1259
- f"- confidence: {fields.get('confidence')}",
1260
- f"- error_reason: {fields.get('error_reason')}",
1261
- "",
1262
- "<details>",
1263
- "<summary>extract_trace(点击展开)</summary>",
1264
- "",
1265
- "```json",
1266
- extract_trace_json,
1267
- "```",
1268
- "",
1269
- "</details>",
1270
- "",
1271
- ]
1272
- )
1273
- return "\n".join(lines)
1274
-
1275
-
1276
- def _write_file(path: str, content: str) -> None:
1277
- with open(path, "w", encoding="utf-8") as handle:
1278
- handle.write(content)
1279
-
1280
-
1281
- def _resolve_card_root(card_root: Optional[str]) -> str:
1282
- raw = (card_root or "").strip()
1283
- if not raw:
1284
- return resolve_default_card_root()
1285
-
1286
- candidate = Path(raw).expanduser()
1287
- if not candidate.is_absolute():
1288
- raise ValueError("card_root must be an absolute path")
1289
- return str(candidate.resolve())
1290
-
1291
-
1292
- def write_benchmark_card(
1293
- *,
1294
- payload: Dict[str, Any],
1295
- platform: str,
1296
- card_type: str,
1297
- card_root: Optional[str],
1298
- sample_author: Optional[str] = None,
1299
- content_kind: Optional[str] = None,
1300
- storage_config: Optional[Dict[str, Any]] = None,
1301
- force_card_type: bool = False,
1302
- ) -> Dict[str, Any]:
1303
- now = dt.datetime.now()
1304
- generated_at = now.isoformat(timespec="seconds")
1305
-
1306
- payload_content_kind = normalize_text(payload.get("content_kind"))
1307
- resolved_content_kind = normalize_text(content_kind) or payload_content_kind
1308
-
1309
- normalized_card_type = normalize_card_type(card_type)
1310
- effective_card_type = resolve_effective_card_type(
1311
- card_type=normalized_card_type,
1312
- content_kind=resolved_content_kind,
1313
- storage_config=storage_config,
1314
- force_card_type=force_card_type,
1315
- )
1316
- fields = _extract_required_fields(payload, platform=platform)
1317
- resolved_card_root = _resolve_card_root(card_root)
1318
-
1319
- primary_target = _build_output_path(
1320
- card_root=resolved_card_root,
1321
- platform=platform,
1322
- card_type=effective_card_type,
1323
- payload=payload,
1324
- now=now,
1325
- sample_author=sample_author,
1326
- storage_config=storage_config,
1327
- )
1328
- primary_path = primary_target["path"]
1329
-
1330
- primary_card_id = os.path.basename(primary_path).replace(".md", "")
1331
- primary_markdown = _render_markdown(
1332
- card_id=primary_card_id,
1333
- card_type=effective_card_type,
1334
- fields=fields,
1335
- generated_at=generated_at,
1336
- )
1337
- _write_file(primary_path, primary_markdown)
1338
-
1339
- return {
1340
- "ok": True,
1341
- "platform": platform,
1342
- "card_type": effective_card_type,
1343
- "requested_card_type": normalized_card_type,
1344
- "force_card_type": bool(force_card_type),
1345
- "content_kind": resolved_content_kind or None,
1346
- "primary_card_path": primary_path,
1347
- "routing": {
1348
- "primary_route_parts": primary_target["route_parts"],
1349
- "storage_routes_configured": bool(isinstance(storage_config, dict) and isinstance(storage_config.get("storage_routes"), dict)),
1350
- },
1351
- "required_fields": fields,
1352
- }
1353
-
1354
-
1355
- def _read_payload_from_input(input_json: str) -> Dict[str, Any]:
1356
- if input_json == "-":
1357
- raw = os.read(0, 1024 * 1024).decode("utf-8", errors="replace").strip()
1358
- if not raw:
1359
- return {}
1360
- return json.loads(raw)
1361
- return read_json_file(input_json)
1362
-
1363
-
1364
- def main() -> None:
1365
- parser = argparse.ArgumentParser(description="Write benchmark card markdown to card root")
1366
- parser.add_argument("--platform", required=True, help="Platform name, e.g. douyin or xiaohongshu")
1367
- parser.add_argument("--card-type", choices=CARD_TYPES, default="work", help="Primary card type")
1368
- parser.add_argument("--config", default=None, help="Runtime config YAML path")
1369
- parser.add_argument("--env-file", default=None, help="Shared env file path; defaults to <skills_root>/.env")
1370
- parser.add_argument("--allow-process-env", action="store_true", help="Allow process env to override .env/.env.local")
1371
- parser.add_argument("--sample-author", default=None, help="Optional author slug override for author_sample_work")
1372
- parser.add_argument("--content-kind", default=None, help="Optional workflow kind, e.g. single_video/author_home/author_analysis")
1373
- parser.add_argument("--force-card-type", action="store_true", help="Force manual --card-type to override content_kind mapping")
1374
- parser.add_argument("--card-root", default=None, help="Card root path (absolute); falls back to TIKOMNI_CARD_ROOT when omitted")
1375
- parser.add_argument(
1376
- "--input-json",
1377
- default="-",
1378
- help="Input JSON path or '-' to read from stdin",
1379
- )
1380
- args = parser.parse_args()
1381
-
1382
- config, _ = load_tikomni_config(
1383
- args.config,
1384
- env_file=args.env_file,
1385
- allow_process_env=args.allow_process_env,
1386
- )
1387
- payload = _read_payload_from_input(args.input_json)
1388
- result = write_benchmark_card(
1389
- payload=payload,
1390
- platform=args.platform,
1391
- card_type=args.card_type,
1392
- card_root=args.card_root,
1393
- sample_author=args.sample_author,
1394
- content_kind=args.content_kind,
1395
- storage_config=config,
1396
- force_card_type=args.force_card_type,
1397
- )
1398
- write_json_stdout(result)
1399
-
1400
-
1401
- if __name__ == "__main__":
1402
- main()