@tikomni/skills 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (474) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -5
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +98 -2
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +106 -141
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +3 -1
  30. package/skills/{creator-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  33. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  34. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  36. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  37. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  38. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  39. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +58 -149
  40. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  41. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +30 -119
  42. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  43. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  45. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  46. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  47. package/skills/creator-analysis/SKILL.md +0 -95
  48. package/skills/creator-analysis/agents/openai.yaml +0 -4
  49. package/skills/creator-analysis/env.example +0 -36
  50. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  51. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  52. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  53. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  54. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  55. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  56. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  57. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  59. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  60. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  61. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  63. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  64. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  65. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  66. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  67. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  68. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  69. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  71. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  72. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  73. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  74. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  75. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  76. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  77. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  78. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  79. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  80. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  81. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  82. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  83. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  84. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  85. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  86. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  87. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  88. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  89. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  90. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  91. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  92. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  93. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  94. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  96. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  100. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  101. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  103. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  104. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  105. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  106. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  107. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  108. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  109. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  110. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  113. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  114. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  116. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  117. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  118. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  119. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  120. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  121. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  122. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  123. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  124. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  125. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  126. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  127. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  128. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  129. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  130. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  131. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  132. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  133. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  134. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  135. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  136. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  137. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  138. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  139. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  140. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  141. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  142. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  143. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  144. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  145. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  146. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  147. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  148. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  149. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  153. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  154. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  155. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  156. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  157. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  158. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  159. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  160. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  161. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  162. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  163. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  164. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  165. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  167. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  168. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  169. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  170. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  171. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  172. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  173. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  174. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  175. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  176. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  177. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  178. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  179. package/skills/creator-analysis/references/workflow.md +0 -23
  180. package/skills/creator-analysis/scripts/__init__.py +0 -0
  181. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  185. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  186. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  187. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  188. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  189. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  190. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  191. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  194. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  195. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  196. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  197. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  198. package/skills/creator-analysis/scripts/core/tikomni_common.py +0 -588
  199. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  200. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  202. package/skills/creator-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  203. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  205. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  207. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  208. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  209. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  210. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  211. package/skills/meta-capability/SKILL.md +0 -69
  212. package/skills/meta-capability/agents/openai.yaml +0 -4
  213. package/skills/meta-capability/env.example +0 -42
  214. package/skills/meta-capability/references/api-capability-index.md +0 -92
  215. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  216. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  217. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  218. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  219. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  220. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  222. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  223. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  224. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  226. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  227. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  228. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  229. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  230. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  231. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  232. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  233. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  234. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  235. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  236. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  237. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  238. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  239. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  240. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  241. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  242. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  243. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  244. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  245. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  246. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  247. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  248. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  249. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  250. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  251. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  252. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  253. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  254. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  255. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  256. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  257. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  258. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  259. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  263. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  264. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  265. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  266. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  267. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  268. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  269. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  270. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  271. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  272. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  273. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  274. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  276. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  277. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  279. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  280. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  281. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  282. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  283. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  284. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  285. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  286. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  287. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  288. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  289. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  290. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  291. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  292. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  293. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  294. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  295. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  296. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  297. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  298. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  299. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  300. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  301. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  302. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  303. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  304. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  305. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  306. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  307. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  308. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  309. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  310. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  311. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  312. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  316. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  317. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  318. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  319. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  320. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  321. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  322. package/skills/meta-capability/references/dispatch.md +0 -27
  323. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  324. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  325. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  326. package/skills/meta-capability/scripts/__init__.py +0 -1
  327. package/skills/meta-capability/scripts/call_route.py +0 -141
  328. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  329. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  330. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  331. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  332. package/skills/meta-capability/scripts/test_auth.py +0 -98
  333. package/skills/single-work-analysis/SKILL.md +0 -62
  334. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  335. package/skills/single-work-analysis/env.example +0 -36
  336. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  337. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  339. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  340. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  341. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  342. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  344. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  345. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  346. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  348. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  349. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  350. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  353. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  354. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  356. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  357. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  358. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  359. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  360. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  361. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  362. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  363. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  364. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  366. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  373. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  375. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  376. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  377. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  378. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  379. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  381. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  385. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  386. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  388. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  389. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  390. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  391. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  392. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  393. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  394. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  395. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  398. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  399. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  401. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  402. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  403. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  404. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  405. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  406. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  407. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  409. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  410. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  411. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  412. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  413. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  414. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  415. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  416. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  417. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  418. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  419. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  420. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  421. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  422. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  423. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  424. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  425. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  426. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  427. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  428. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  429. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  430. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  431. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  432. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  434. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  438. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  439. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  441. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  442. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  443. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  444. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -47
  445. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  446. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  447. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  448. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  449. package/skills/single-work-analysis/references/prompt-contracts/analysis-bundle.md +0 -82
  450. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  451. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  452. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  453. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  454. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +0 -384
  455. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -456
  456. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  457. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  458. package/skills/single-work-analysis/scripts/core/progress_report.py +0 -258
  459. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -220
  460. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  461. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  462. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  463. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  464. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  465. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  466. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  468. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -752
  469. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  470. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  471. /package/skills/{single-work-analysis → social-media-crawl}/scripts/core/tikomni_common.py +0 -0
  472. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  473. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  474. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1579 +0,0 @@
1
- #!/usr/bin/env python3
2
-
3
- if __package__ in {None, ""}:
4
- import sys
5
- from pathlib import Path
6
-
7
- _self = Path(__file__).resolve()
8
- for _parent in _self.parents:
9
- if (_parent / "scripts").is_dir():
10
- sys.path.insert(0, str(_parent))
11
- break
12
-
13
- """Write benchmark markdown cards into card root zones."""
14
-
15
- import argparse
16
- import datetime as dt
17
- import json
18
- import os
19
- import re
20
- import unicodedata
21
- from pathlib import Path
22
- from typing import Any, Dict, List, Optional
23
-
24
- try:
25
- from zoneinfo import ZoneInfo
26
- except Exception: # pragma: no cover - py<3.9 fallback
27
- ZoneInfo = None
28
-
29
- from scripts.core.analysis_pipeline import DEFAULT_MODULE_SECTIONS, build_analysis_sections
30
- from scripts.core.config_loader import load_tikomni_config
31
- from scripts.core.storage_router import build_card_output_path, normalize_card_type, resolve_effective_card_type
32
- from scripts.core.tikomni_common import normalize_text, read_json_file, write_json_stdout
33
-
34
- def resolve_default_card_root() -> str:
35
- raw = os.getenv("TIKOMNI_CARD_ROOT", "").strip()
36
- if not raw:
37
- raise ValueError(
38
- "missing_card_root: set --card-root or define TIKOMNI_CARD_ROOT in .env/.env.local"
39
- )
40
-
41
- candidate = Path(raw).expanduser()
42
- if not candidate.is_absolute():
43
- raise ValueError("TIKOMNI_CARD_ROOT must be an absolute path")
44
- return str(candidate.resolve())
45
-
46
-
47
- # Keep import-time compatibility for other scripts without crashing when env is absent.
48
- DEFAULT_CARD_ROOT = ""
49
- CARD_TYPES = ["work", "author", "author_sample_work"]
50
- AUTHOR_SAMPLE_CARD_ROLE = "author_sample_card"
51
- SAMPLE_WORK_CARD_ROLE = "sample_work_card"
52
- AUTHOR_CARD_ROLE = "author_card"
53
-
54
-
55
- def _normalize_lines(value: Any) -> List[str]:
56
- if isinstance(value, list):
57
- return [normalize_text(item) for item in value if normalize_text(item)]
58
- if isinstance(value, str):
59
- text = normalize_text(value)
60
- return [text] if text else []
61
- return []
62
-
63
-
64
- def _safe_int(value: Any, default: int = 0) -> int:
65
- if value is None:
66
- return default
67
- if isinstance(value, bool):
68
- return int(value)
69
- if isinstance(value, int):
70
- return value
71
- if isinstance(value, float):
72
- return int(value)
73
- if isinstance(value, str):
74
- text = value.strip()
75
- if text.isdigit() or (text.startswith("-") and text[1:].isdigit()):
76
- return int(text)
77
- return default
78
-
79
-
80
- def _safe_optional_int(value: Any) -> Optional[int]:
81
- if value is None:
82
- return None
83
- if isinstance(value, bool):
84
- return int(value)
85
- if isinstance(value, int):
86
- return value
87
- if isinstance(value, float):
88
- return int(value)
89
- if isinstance(value, str):
90
- text = value.strip()
91
- if not text:
92
- return None
93
- if text.isdigit() or (text.startswith("-") and text[1:].isdigit()):
94
- return int(text)
95
- return None
96
-
97
-
98
- def _safe_text(value: Any) -> str:
99
- if value is None:
100
- return ""
101
- if isinstance(value, str):
102
- return value.strip()
103
- if isinstance(value, (int, float, bool)):
104
- return str(value).strip()
105
- return ""
106
-
107
-
108
- def _safe_text_list(value: Any) -> List[str]:
109
- if isinstance(value, list):
110
- result: List[str] = []
111
- for item in value:
112
- if isinstance(item, dict):
113
- for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
114
- text = _safe_text(item.get(key))
115
- if text:
116
- result.append(text)
117
- break
118
- continue
119
- text = _safe_text(item)
120
- if text:
121
- result.append(text)
122
- return list(dict.fromkeys(result))
123
- if isinstance(value, str):
124
- text = _safe_text(value)
125
- if not text:
126
- return []
127
- return [item for item in re.split(r"[,,\s]+", text) if item]
128
- return []
129
-
130
-
131
- def _to_unix_sec(value: Any) -> int:
132
- if value is None:
133
- return 0
134
- parsed = _safe_int(value, default=0)
135
- if parsed <= 0:
136
- return 0
137
- if parsed > 1_000_000_000_000:
138
- parsed //= 1000
139
- return parsed
140
-
141
-
142
- def _format_shanghai_datetime(value: Any) -> str:
143
- ts = _to_unix_sec(value)
144
- if ts <= 0:
145
- return ""
146
- try:
147
- if ZoneInfo is not None:
148
- dt_obj = dt.datetime.fromtimestamp(ts, tz=ZoneInfo("Asia/Shanghai"))
149
- else:
150
- dt_obj = dt.datetime.fromtimestamp(ts, tz=dt.timezone(dt.timedelta(hours=8)))
151
- return dt_obj.strftime("%Y-%m-%d %H:%M:%S")
152
- except Exception:
153
- return ""
154
-
155
-
156
- def _resolve_publish_time(payload: Dict[str, Any], create_time_sec: int) -> Dict[str, str]:
157
- publish_time_text = normalize_text(payload.get("publish_time_text"))
158
- if publish_time_text:
159
- return {"publish_time_text": publish_time_text, "publish_time_source": "payload.publish_time_text"}
160
-
161
- source = _source_dict(payload)
162
- candidates = [
163
- ("payload.publish_time", payload.get("publish_time")),
164
- ("payload.create_time", payload.get("create_time")),
165
- ("source.publish_time", source.get("publish_time")),
166
- ("source.create_time", source.get("create_time")),
167
- ("source.time", source.get("time")),
168
- ]
169
- for source_key, raw in candidates:
170
- text = normalize_text(raw)
171
- if not text:
172
- continue
173
- ts_text = _format_shanghai_datetime(raw)
174
- if ts_text:
175
- return {"publish_time_text": ts_text, "publish_time_source": source_key}
176
- return {"publish_time_text": text, "publish_time_source": source_key}
177
-
178
- fallback_text = _format_shanghai_datetime(create_time_sec)
179
- if fallback_text:
180
- return {"publish_time_text": fallback_text, "publish_time_source": "create_time_sec"}
181
-
182
- return {"publish_time_text": "未知", "publish_time_source": "unknown"}
183
-
184
-
185
- def _resolve_published_date(payload: Dict[str, Any], create_time_sec: int) -> str:
186
- published_date = normalize_text(payload.get("published_date"))
187
- if published_date:
188
- return published_date
189
- publish_time_info = _resolve_publish_time(payload, create_time_sec)
190
- text = normalize_text(publish_time_info.get("publish_time_text"))
191
- if not text or text == "未知":
192
- return "N/A"
193
- return text[:10]
194
-
195
-
196
- def _display_metric(value: Optional[int]) -> str:
197
- if value is None:
198
- return "N/A"
199
- return str(value)
200
-
201
-
202
- def _source_dict(payload: Dict[str, Any]) -> Dict[str, Any]:
203
- source = payload.get("source")
204
- return source if isinstance(source, dict) else {}
205
-
206
-
207
- def _extract_duration_ms(payload: Dict[str, Any]) -> int:
208
- source = _source_dict(payload)
209
-
210
- def _pick_int(keys: List[str], from_source: bool = False) -> int:
211
- base = source if from_source else payload
212
- for key in keys:
213
- value = _safe_int(base.get(key), default=0)
214
- if value > 0:
215
- return value
216
- return 0
217
-
218
- duration_ms = _pick_int(["duration_ms"])
219
- if duration_ms <= 0:
220
- duration_ms = _pick_int(["duration_ms"], from_source=True)
221
-
222
- if duration_ms <= 0:
223
- raw_duration = _pick_int(["duration", "duration_sec"])
224
- if raw_duration <= 0:
225
- raw_duration = _pick_int(["duration", "duration_sec"], from_source=True)
226
- if raw_duration > 0:
227
- duration_ms = raw_duration * 1000 if raw_duration < 10000 else raw_duration
228
-
229
- return duration_ms
230
-
231
-
232
- def _ensure_sentence_end(text: str) -> str:
233
- if not text:
234
- return text
235
- if text[-1] in "。!?!?" or text.endswith("..."):
236
- return text
237
- return f"{text}。"
238
-
239
-
240
- def _clean_asr_text(raw: str, provided_clean: str) -> str:
241
- """ASR_CLEAN prompt-contracts/asr-clean.md@v1
242
-
243
- Steps:
244
- 1) base select: provided_clean > raw
245
- 2) denoise: remove filler/repetition/whitespace noise
246
- 3) sentence split + punctuation restore
247
- 4) paragraphize: one sentence per line, 2-4 sentences per paragraph
248
- """
249
- base = normalize_text(provided_clean) or normalize_text(raw)
250
- if not base:
251
- return ""
252
-
253
- # step2: 去噪(口头禅/重复)
254
- base = re.sub(r"\b(嗯|啊|呃|额|那个|这个|然后|就是)\b", " ", base)
255
- base = re.sub(r"(嗯+|啊+|呃+)", " ", base)
256
- base = re.sub(r"(就是就是|然后然后|这个这个|那个那个)", " ", base)
257
- base = re.sub(r"\s+", " ", base).strip()
258
-
259
- # step3: 断句 + 句尾标点
260
- units = [normalize_text(part) for part in re.split(r"[。!?!?;;\n]+", base)]
261
- sentences = [_ensure_sentence_end(unit) for unit in units if unit]
262
- if not sentences:
263
- fallback = _ensure_sentence_end(base)
264
- return fallback if fallback else ""
265
-
266
- # step4: 每句一行;每段 2~4 句(默认 3 句)
267
- paragraphs: List[str] = []
268
- bucket: List[str] = []
269
- for sentence in sentences:
270
- bucket.append(sentence)
271
- if len(bucket) >= 3:
272
- paragraphs.append("\n".join(bucket))
273
- bucket = []
274
-
275
- if bucket:
276
- if len(bucket) == 1 and paragraphs:
277
- paragraphs[-1] = f"{paragraphs[-1]}\n{bucket[0]}"
278
- else:
279
- paragraphs.append("\n".join(bucket))
280
-
281
- return "\n\n".join(paragraphs)
282
-
283
-
284
- def _pick_text(payload: Dict[str, Any], keys: List[str], source_keys: Optional[List[str]] = None) -> str:
285
- source = _source_dict(payload)
286
- for key in keys:
287
- text = normalize_text(payload.get(key))
288
- if text:
289
- return text
290
- for key in (source_keys or keys):
291
- text = normalize_text(source.get(key))
292
- if text:
293
- return text
294
- return ""
295
-
296
-
297
- def _extract_platform_work_id(payload: Dict[str, Any]) -> str:
298
- return _pick_text(
299
- payload,
300
- ["platform_work_id", "aweme_id", "note_id", "item_id", "id"],
301
- ["platform_work_id", "aweme_id", "note_id", "item_id", "id"],
302
- )
303
-
304
-
305
- def _extract_author(payload: Dict[str, Any]) -> Dict[str, str]:
306
- author_raw = payload.get("author")
307
- author = author_raw if isinstance(author_raw, dict) else {}
308
-
309
- source = _source_dict(payload)
310
- source_author = source.get("author") if isinstance(source.get("author"), dict) else {}
311
-
312
- author_text = normalize_text(author_raw) if isinstance(author_raw, str) else ""
313
- nickname = (
314
- normalize_text(author.get("nickname"))
315
- or author_text
316
- or normalize_text(source_author.get("nickname"))
317
- )
318
-
319
- author_handle = (
320
- normalize_text(payload.get("author_handle"))
321
- or normalize_text(author.get("author_handle"))
322
- or normalize_text(source_author.get("author_handle"))
323
- or nickname
324
- )
325
- platform_author_id = (
326
- normalize_text(payload.get("platform_author_id"))
327
- or normalize_text(payload.get("author_platform_id"))
328
- or normalize_text(author.get("platform_author_id"))
329
- or normalize_text(author.get("author_platform_id"))
330
- or normalize_text(source_author.get("platform_author_id"))
331
- or normalize_text(source_author.get("author_platform_id"))
332
- )
333
-
334
- xhs_user_id = (
335
- normalize_text(payload.get("xhs_user_id"))
336
- or normalize_text(author.get("xhs_user_id"))
337
- or normalize_text(source_author.get("xhs_user_id"))
338
- )
339
- xhs_sec_token = (
340
- normalize_text(payload.get("xhs_sec_token"))
341
- or normalize_text(author.get("xhs_sec_token"))
342
- or normalize_text(source_author.get("xhs_sec_token"))
343
- )
344
-
345
- douyin_sec_uid = (
346
- normalize_text(payload.get("douyin_sec_uid"))
347
- or normalize_text(author.get("douyin_sec_uid"))
348
- or normalize_text(source_author.get("douyin_sec_uid"))
349
- )
350
- douyin_aweme_author_id = (
351
- normalize_text(payload.get("douyin_aweme_author_id"))
352
- or normalize_text(author.get("douyin_aweme_author_id"))
353
- or normalize_text(source_author.get("douyin_aweme_author_id"))
354
- )
355
-
356
- return {
357
- "nickname": nickname,
358
- "author_handle": author_handle,
359
- "platform_author_id": platform_author_id,
360
- "xhs_user_id": xhs_user_id,
361
- "xhs_sec_token": xhs_sec_token,
362
- "douyin_sec_uid": douyin_sec_uid,
363
- "douyin_aweme_author_id": douyin_aweme_author_id,
364
- }
365
-
366
-
367
- def _is_cjk(char: str) -> bool:
368
- code = ord(char)
369
- return 0x4E00 <= code <= 0x9FFF
370
-
371
-
372
- def _clean_for_filename(text: str) -> str:
373
- if not text:
374
- return ""
375
-
376
- normalized = unicodedata.normalize("NFKC", text)
377
- normalized = re.sub(r"[##][^\s##]+", " ", normalized)
378
- normalized = re.sub(r"\[[^\]]+\]", " ", normalized)
379
- normalized = normalized.replace("\n", " ").replace("\r", " ")
380
-
381
- kept: List[str] = []
382
- for ch in normalized:
383
- cat = unicodedata.category(ch)
384
- if _is_cjk(ch) or ch.isalnum() or ch in {" ", "-", "_"}:
385
- kept.append(ch)
386
- elif cat.startswith("Z"):
387
- kept.append(" ")
388
-
389
- compact = "".join(kept)
390
- compact = re.sub(r"\s+", "", compact)
391
- compact = re.sub(r"[\\/:*?\"<>|]", "", compact)
392
- return compact.strip("._-")
393
-
394
-
395
- def _clip_with_min(text: str, min_len: int, max_len: int, fallback: str) -> str:
396
- candidate = _clean_for_filename(text)
397
- fallback_clean = _clean_for_filename(fallback)
398
-
399
- if not candidate:
400
- candidate = fallback_clean
401
- if len(candidate) < min_len:
402
- candidate = (candidate + fallback_clean)[:max_len]
403
- if len(candidate) < min_len:
404
- candidate = (candidate + "内容速览")[:max_len]
405
-
406
- candidate = candidate[:max_len]
407
- if len(candidate) < min_len:
408
- candidate = (candidate + "作品卡")[:max_len]
409
- return candidate[:max_len] if candidate else fallback_clean[:max_len]
410
-
411
-
412
- def _pick_author_slug(payload: Dict[str, Any], author_hint: Optional[str] = None) -> str:
413
- base = normalize_text(author_hint)
414
- if not base:
415
- author = _extract_author(payload)
416
- base = author["nickname"] or author["author_handle"] or author["platform_author_id"] or "作者"
417
- slug = _clip_with_min(base, min_len=2, max_len=18, fallback="作者")
418
- return slug if len(slug) >= 2 else "作者"
419
-
420
-
421
- def _pick_title_source(payload: Dict[str, Any]) -> str:
422
- for key in ("title", "desc", "summary"):
423
- text = normalize_text(payload.get(key))
424
- if text:
425
- return text
426
-
427
- source = _source_dict(payload)
428
- for key in ("title", "desc"):
429
- text = normalize_text(source.get(key))
430
- if text:
431
- return text
432
-
433
- raw_content = normalize_text(payload.get("raw_content"))
434
- if raw_content:
435
- return raw_content[:48]
436
-
437
- platform_work_id = _extract_platform_work_id(payload)
438
- if platform_work_id:
439
- return f"作品拆解{platform_work_id[-8:]}"
440
-
441
- return "内容拆解速览"
442
-
443
-
444
- def _pick_title_slug(payload: Dict[str, Any]) -> str:
445
- title_source = _pick_title_source(payload)
446
- platform_work_id = _extract_platform_work_id(payload)
447
- fallback = f"内容拆解{platform_work_id[-8:]}" if platform_work_id else "内容拆解速览"
448
- slug = _clip_with_min(title_source, min_len=8, max_len=28, fallback=fallback)
449
- return slug if slug else "内容拆解速览"
450
-
451
-
452
- def _extract_tags(payload: Dict[str, Any]) -> List[str]:
453
- for key in ("tags", "tag_list", "hashtags"):
454
- value = payload.get(key)
455
- if isinstance(value, list):
456
- tags = [item.lstrip("#") for item in _safe_text_list(value)]
457
- if tags:
458
- return list(dict.fromkeys(tags))
459
- if isinstance(value, str) and normalize_text(value):
460
- parts = re.split(r"[,,\s]+", normalize_text(value))
461
- tags = [part.lstrip("#") for part in parts if part]
462
- if tags:
463
- return list(dict.fromkeys(tags))
464
-
465
- source = _source_dict(payload)
466
- for key in ("tags", "tag_list", "hashtags"):
467
- value = source.get(key)
468
- if isinstance(value, list):
469
- tags = [item.lstrip("#") for item in _safe_text_list(value)]
470
- if tags:
471
- return list(dict.fromkeys(tags))
472
-
473
- return []
474
-
475
-
476
- def _extract_required_fields(payload: Dict[str, Any], platform: str) -> Dict[str, Any]:
477
- author = _extract_author(payload)
478
-
479
- title = _pick_text(payload, ["title", "desc"], ["title", "desc"])
480
- caption_raw = normalize_text(payload.get("caption_raw") or payload.get("desc"))
481
- platform_work_id = _extract_platform_work_id(payload)
482
-
483
- source_url = _pick_text(
484
- payload,
485
- ["source_url", "share_url", "url"],
486
- ["source_url", "share_url", "url", "share_text"],
487
- )
488
- share_url = _pick_text(
489
- payload,
490
- ["share_url", "canonical_share_url"],
491
- ["share_url", "canonical_share_url", "url", "source_url", "share_text"],
492
- ) or source_url
493
-
494
- cover_image = _pick_text(
495
- payload,
496
- ["cover_image", "cover_url", "cover"],
497
- ["cover_image", "cover_url", "cover", "origin_cover"],
498
- )
499
- selected_images = payload.get("selected_image_urls")
500
- if not cover_image and isinstance(selected_images, list) and selected_images:
501
- cover_image = normalize_text(selected_images[0])
502
-
503
- video_download_url = _pick_text(
504
- payload,
505
- ["video_download_url", "video_down_url", "selected_video_url", "original_video_url", "video_url", "download_url"],
506
- ["video_download_url", "video_down_url", "selected_video_url", "original_video_url", "video_url", "download_url"],
507
- )
508
-
509
- create_time_sec = _to_unix_sec(payload.get("create_time_sec"))
510
- if create_time_sec <= 0:
511
- create_time_sec = _to_unix_sec(payload.get("create_time"))
512
- if create_time_sec <= 0:
513
- create_time_sec = _to_unix_sec(_source_dict(payload).get("create_time"))
514
-
515
- metrics = payload.get("metrics") if isinstance(payload.get("metrics"), dict) else {}
516
- digg_count = _safe_int(payload.get("digg_count"), default=_safe_int(metrics.get("like"), default=0))
517
- comment_count = _safe_int(payload.get("comment_count"), default=_safe_int(metrics.get("comment"), default=0))
518
- collect_count = _safe_int(payload.get("collect_count"), default=_safe_int(metrics.get("collect"), default=0))
519
- share_count = _safe_int(payload.get("share_count"), default=_safe_int(metrics.get("share"), default=0))
520
- play_count = _safe_optional_int(payload.get("play_count"))
521
- if play_count is None:
522
- play_count = _safe_optional_int(metrics.get("play"))
523
-
524
- summary = normalize_text(payload.get("summary"))
525
- raw_content = normalize_text(payload.get("raw_content"))
526
- primary_text = normalize_text(payload.get("primary_text"))
527
- provided_asr_clean = normalize_text(payload.get("asr_clean"))
528
- asr_clean = _clean_asr_text(raw_content, provided_asr_clean)
529
-
530
- duration_ms = _extract_duration_ms(payload)
531
-
532
- category = normalize_text(payload.get("category"))
533
- if not category:
534
- category = "观点"
535
-
536
- hot_score = _safe_int(payload.get("hot_score"), default=0)
537
- if hot_score <= 0:
538
- hot_score = digg_count + comment_count * 2 + collect_count * 3 + share_count * 4
539
-
540
- work_modality = normalize_text(payload.get("work_modality"))
541
- if not work_modality:
542
- work_modality = "video" if video_download_url or raw_content else "text"
543
-
544
- published_date = _resolve_published_date(payload, create_time_sec)
545
- primary_text_source_raw = normalize_text(payload.get("primary_text_source"))
546
- primary_text_source = (
547
- primary_text_source_raw
548
- if primary_text_source_raw in {"asr_clean", "caption_raw"}
549
- else ("asr_clean" if work_modality == "video" else "caption_raw")
550
- )
551
- if not primary_text:
552
- primary_text = asr_clean if primary_text_source == "asr_clean" else normalize_text(payload.get("desc"))
553
-
554
- sampled_explanation = payload.get("sampled_explanation") if isinstance(payload.get("sampled_explanation"), dict) else {}
555
-
556
- return {
557
- "title": title,
558
- "platform": platform,
559
- "platform_work_id": platform_work_id,
560
- "author": author.get("nickname") or "",
561
- "author_handle": author.get("author_handle") or "",
562
- "platform_author_id": author.get("platform_author_id") or "",
563
- "caption_raw": caption_raw,
564
- "share_url": share_url,
565
- "source_url": source_url,
566
- "cover_image": cover_image,
567
- "video_download_url": video_download_url,
568
- "published_date": published_date,
569
- "duration_ms": duration_ms,
570
- "digg_count": digg_count,
571
- "comment_count": comment_count,
572
- "collect_count": collect_count,
573
- "share_count": share_count,
574
- "play_count": play_count,
575
- "tags": _extract_tags(payload),
576
- "work_modality": work_modality,
577
- "category": category,
578
- "content_kind": normalize_text(payload.get("content_kind")),
579
- "summary": summary,
580
- "hot_score": hot_score,
581
- "raw_content": raw_content,
582
- "primary_text": primary_text,
583
- "asr_clean": asr_clean,
584
- "performance_score": payload.get("performance_score"),
585
- "performance_score_norm": payload.get("performance_score_norm"),
586
- "bucket": normalize_text(payload.get("bucket")),
587
- "hook_type": normalize_text(payload.get("hook_type")),
588
- "structure_type": normalize_text(payload.get("structure_type")),
589
- "cta_type": normalize_text(payload.get("cta_type")),
590
- "content_form": normalize_text(payload.get("content_form")),
591
- "style_markers": _safe_text_list(payload.get("style_markers")),
592
- "analysis_eligibility": normalize_text(payload.get("analysis_eligibility")) or "eligible",
593
- "analysis_exclusion_reason": normalize_text(payload.get("analysis_exclusion_reason")),
594
- "card_role": normalize_text(payload.get("card_role")),
595
- "sampled_explanation": sampled_explanation,
596
- "platform_native_refs": payload.get("platform_native_refs") if isinstance(payload.get("platform_native_refs"), dict) else {},
597
- "request_id": payload.get("request_id"),
598
- "confidence": normalize_text(payload.get("confidence")) or "low",
599
- "error_reason": payload.get("error_reason"),
600
- "extract_trace": payload.get("extract_trace", []),
601
- "analysis_sections": payload.get("analysis_sections") if isinstance(payload.get("analysis_sections"), dict) else {},
602
- "analysis_output": payload.get("analysis_output") if isinstance(payload.get("analysis_output"), dict) else {},
603
- "author_analysis_v2": payload.get("author_analysis_v2") if isinstance(payload.get("author_analysis_v2"), dict) else {},
604
- "author_analysis_input_v1": payload.get("author_analysis_input_v1") if isinstance(payload.get("author_analysis_input_v1"), dict) else {},
605
- "sampled_work_explanations": payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {},
606
- "author_card_highlights": payload.get("author_card_highlights") if isinstance(payload.get("author_card_highlights"), dict) else {},
607
- "validation": payload.get("validation") if isinstance(payload.get("validation"), dict) else {},
608
- "quality_tier": normalize_text(payload.get("quality_tier")),
609
- "stage_status": payload.get("stage_status") if isinstance(payload.get("stage_status"), dict) else {},
610
- "sampled_work_ids": _safe_text_list(payload.get("sampled_work_ids")),
611
- "business_score": _safe_int(payload.get("business_score"), default=0),
612
- "benchmark_gap_score": _safe_int(payload.get("benchmark_gap_score"), default=0),
613
- "style_radar": payload.get("style_radar") if isinstance(payload.get("style_radar"), dict) else {},
614
- "core_contradictions": payload.get("core_contradictions") if isinstance(payload.get("core_contradictions"), list) else [],
615
- "recommendations": payload.get("recommendations") if isinstance(payload.get("recommendations"), list) else [],
616
- "business_analysis": normalize_text(payload.get("business_analysis")),
617
- "benchmark_analysis": normalize_text(payload.get("benchmark_analysis")),
618
- "nickname": normalize_text(payload.get("nickname")),
619
- "ip_location": normalize_text(payload.get("ip_location")),
620
- "signature": normalize_text(payload.get("signature")),
621
- "avatar_url": normalize_text(payload.get("avatar_url")),
622
- "fans_count": _safe_optional_int(payload.get("fans_count")),
623
- "liked_count": _safe_optional_int(payload.get("liked_count")),
624
- "collected_count": _safe_optional_int(payload.get("collected_count")),
625
- "works_count": _safe_optional_int(payload.get("works_count")),
626
- "verified": payload.get("verified") if isinstance(payload.get("verified"), bool) else None,
627
- "snapshot_at": normalize_text(payload.get("snapshot_at")),
628
- }
629
-
630
-
631
- def _format_create_time(create_time_sec: int) -> str:
632
- text = _format_shanghai_datetime(create_time_sec)
633
- if text:
634
- return text
635
- if create_time_sec <= 0:
636
- return "未知"
637
- return str(create_time_sec)
638
-
639
-
640
- def _format_duration(duration_ms: int) -> str:
641
- if duration_ms <= 0:
642
- return "未知"
643
- total_sec = duration_ms // 1000
644
- minute, second = divmod(total_sec, 60)
645
- if minute:
646
- return f"{minute}分{second:02d}秒"
647
- return f"{second}秒"
648
-
649
-
650
- def _sentence_units(text: str) -> List[str]:
651
- if not text:
652
- return []
653
- return [normalize_text(x) for x in re.split(r"[。!?!?;;\\n]+", text) if normalize_text(x)]
654
-
655
-
656
- def _first_sentence(text: str) -> str:
657
- units = _sentence_units(text)
658
- return units[0] if units else ""
659
-
660
-
661
- def _hit_count(text: str, keywords: List[str]) -> int:
662
- if not text:
663
- return 0
664
- return sum(1 for token in keywords if token in text)
665
-
666
-
667
- def _top_keywords(text: str, candidates: List[str], topn: int = 3) -> List[str]:
668
- if not text:
669
- return []
670
- scored = []
671
- for token in candidates:
672
- count = text.count(token)
673
- if count > 0:
674
- scored.append((count, token))
675
- scored.sort(key=lambda x: (-x[0], len(x[1])))
676
- return [token for _, token in scored[:topn]]
677
-
678
-
679
- def _score_from_hits(hits: int, full_score_hits: int = 4) -> int:
680
- if hits <= 0:
681
- return 2
682
- if hits >= full_score_hits:
683
- return 5
684
- return min(5, hits + 2)
685
-
686
-
687
-
688
- def _analyze_topic(fields: Dict[str, Any]) -> Dict[str, Any]:
689
- title = normalize_text(fields.get("title") or "")
690
- asr = normalize_text(fields.get("asr_clean") or "")
691
- category = normalize_text(fields.get("category") or "")
692
- text = f"{title} {asr}"
693
-
694
- if not text.strip():
695
- return {
696
- "score": 2,
697
- "lines": ["- 类型:数据不足。", "- 细分主题:数据不足。", "- 受众痛点:数据不足,需补充标题或ASR。"],
698
- "gaps": ["补齐标题或ASR文本,才能完成选题分类与主题归因"],
699
- "evidence": "输入文本缺失",
700
- }
701
-
702
- type_rules = {
703
- "流量型": ["热点", "挑战", "反转", "揭秘", "真相", "别再", "为什么", "踩坑", "3秒", "爆款"],
704
- "人设型": ["我是", "我们", "日常", "系列", "分享", "经历", "成长", "复盘", "带你", "我"],
705
- "营销型": ["领取", "私信", "咨询", "下单", "课程", "优惠", "链接", "报名", "合作", "购买"],
706
- }
707
- type_scores = {name: _hit_count(text, kws) for name, kws in type_rules.items()}
708
-
709
- if category in ["教程", "知识", "方法"]:
710
- type_scores["营销型"] += 1
711
- if category in ["观点", "人设", "日常"]:
712
- type_scores["人设型"] += 1
713
-
714
- main_type = max(type_scores, key=lambda k: type_scores[k])
715
- main_hits = type_scores[main_type]
716
-
717
- theme_candidates = [
718
- "AI", "智能体", "变现", "副业", "教程", "工作流", "流量", "涨粉", "投流", "口播", "脚本", "工具", "私域", "创业", "营销",
719
- ]
720
- themes = _top_keywords(text, theme_candidates, topn=3)
721
- pain_candidates = ["不会", "焦虑", "卡住", "没流量", "转化", "不会写", "不会做", "时间不够", "担心", "风险"]
722
- pains = _top_keywords(text, pain_candidates, topn=2)
723
-
724
- lines = [
725
- f"- 基础类型:{main_type}(命中信号 {main_hits} 个)。",
726
- f"- 细分主题:{'、'.join(themes) if themes else '数据不足(未检测到显著主题词)'}。",
727
- f"- 受众痛点:{'、'.join(pains) if pains else '以“快速落地/降低门槛”为主(显性痛点词不足)'}。",
728
- ]
729
-
730
- return {
731
- "score": _score_from_hits(main_hits),
732
- "lines": lines,
733
- "gaps": [] if themes else ["补充更完整ASR,提高细分主题识别稳定性"],
734
- "evidence": f"类型命中分布={type_scores}",
735
- }
736
-
737
-
738
- def _analyze_style(fields: Dict[str, Any]) -> Dict[str, Any]:
739
- asr = normalize_text(fields.get("asr_clean") or "")
740
- title = normalize_text(fields.get("title") or "")
741
- text = f"{title} {asr}".strip()
742
- units = _sentence_units(asr)
743
-
744
- if not text:
745
- return {
746
- "score": 2,
747
- "lines": ["- 人设匹配:数据不足。", "- 句式结构:数据不足。", "- 语气与情绪:数据不足。"],
748
- "gaps": ["补齐ASR文本后再做文风拆解"],
749
- "evidence": "输入文本缺失",
750
- }
751
-
752
- avg_len = int(sum(len(u) for u in units) / max(1, len(units))) if units else 0
753
- if avg_len <= 14:
754
- length_type = "短句为主"
755
- elif avg_len <= 24:
756
- length_type = "中短句混合"
757
- else:
758
- length_type = "中长句为主"
759
-
760
- q_count = text.count("?") + text.count("?")
761
- e_count = text.count("!") + text.count("!")
762
- statement_count = max(0, len(units) - q_count - e_count)
763
- persona_hits = _hit_count(text, ["我", "我们", "你", "大家", "朋友们", "聪明的你"])
764
- rhetoric_hits = _hit_count(text, ["不是", "而是", "其实", "真的", "一定", "必须", "先", "再"])
765
-
766
- lines = [
767
- f"- 句式结构:{length_type},平均句长约 {avg_len} 字。",
768
- f"- 语气分布:疑问 {q_count} / 感叹 {e_count} / 陈述 {statement_count}。",
769
- f"- 人设与修辞:人设代词命中 {persona_hits} 次,强调/转折词命中 {rhetoric_hits} 次。",
770
- ]
771
-
772
- strength_hits = int(avg_len > 0) + int(persona_hits > 0) + int(rhetoric_hits > 0)
773
- return {
774
- "score": _score_from_hits(strength_hits, full_score_hits=3),
775
- "lines": lines,
776
- "gaps": [] if units else ["ASR分句失败,建议人工复核"],
777
- "evidence": f"avg_len={avg_len}, persona_hits={persona_hits}, rhetoric_hits={rhetoric_hits}",
778
- }
779
-
780
-
781
- def _analyze_hook(fields: Dict[str, Any]) -> Dict[str, Any]:
782
- title = normalize_text(fields.get("title") or "")
783
- asr = normalize_text(fields.get("asr_clean") or "")
784
- first = _first_sentence(asr) or title
785
- middle = _sentence_units(asr)[len(_sentence_units(asr)) // 2] if _sentence_units(asr) else ""
786
-
787
- if not first:
788
- return {
789
- "score": 2,
790
- "lines": ["- 开头钩子:数据不足。", "- 中段钩子:数据不足。", "- 结尾钩子:数据不足。"],
791
- "gaps": ["缺少标题与ASR,无法提取钩子原话"],
792
- "evidence": "开头句缺失",
793
- }
794
-
795
- hook_type = "陈述式"
796
- if any(k in first for k in ["?", "?", "为什么", "怎么"]):
797
- hook_type = "疑问式"
798
- elif any(k in first for k in ["别再", "误区", "真相", "不是"]):
799
- hook_type = "反常识式"
800
- elif any(k in first for k in ["当你", "如果", "今天"]):
801
- hook_type = "场景代入式"
802
-
803
- end_candidates = [u for u in _sentence_units(asr) if _hit_count(u, ["关注", "评论", "私信", "收藏", "转发", "下次见", "领取"]) > 0]
804
- end = end_candidates[-1] if end_candidates else "未检测到明确结尾钩子"
805
-
806
- lines = [
807
- f"- 开头钩子({hook_type}):{first}",
808
- f"- 中段钩子:{middle or '数据不足(中段文本不足)'}",
809
- f"- 结尾钩子:{end}",
810
- ]
811
-
812
- hook_hits = int(first != "") + int(bool(middle)) + int(end != "未检测到明确结尾钩子")
813
- return {
814
- "score": _score_from_hits(hook_hits, full_score_hits=3),
815
- "lines": lines,
816
- "gaps": [] if hook_hits >= 2 else ["建议补充中段转折钩子与结尾动作钩子"],
817
- "evidence": f"hook_type={hook_type}, hook_hits={hook_hits}",
818
- }
819
-
820
-
821
- def _analyze_structure(fields: Dict[str, Any]) -> Dict[str, Any]:
822
- asr = normalize_text(fields.get("asr_clean") or "")
823
- units = _sentence_units(asr)
824
- if not units:
825
- return {
826
- "score": 2,
827
- "lines": ["- 结构标签:数据不足。", "- 模板判定:数据不足。"],
828
- "gaps": ["补充ASR后再进行结构标注"],
829
- "evidence": "分句为空",
830
- }
831
-
832
- label_rules = {
833
- "钩子": ["?", "?", "为什么", "怎么", "别再", "真相", "当你", "如果"],
834
- "冲突": ["但是", "却", "问题", "误区", "卡住", "焦虑", "失败"],
835
- "转折": ["所以", "于是", "然后", "接着", "这时候", "其实"],
836
- "举证": ["数据", "案例", "比如", "步骤", "第一", "第二", "第三"],
837
- "CTA": ["评论", "关注", "私信", "收藏", "转发", "点击", "领取", "报名"],
838
- }
839
- coverage = {k: 0 for k in label_rules}
840
- for sent in units:
841
- for label, kws in label_rules.items():
842
- if any(kw in sent for kw in kws):
843
- coverage[label] += 1
844
-
845
- present = [k for k, v in coverage.items() if v > 0]
846
- missing = [k for k, v in coverage.items() if v == 0]
847
- template = "钩子→冲突→转折→举证→CTA" if len(present) >= 4 else "钩子→观点→补充说明"
848
-
849
- lines = [
850
- f"- 结构标签覆盖:{', '.join([f'{k}:{v}' for k, v in coverage.items()])}。",
851
- f"- 模板判定:{template}。",
852
- f"- 缺失模块:{'、'.join(missing) if missing else '无'}。",
853
- ]
854
-
855
- return {
856
- "score": _score_from_hits(len(present), full_score_hits=5),
857
- "lines": lines,
858
- "gaps": [f"优先补齐结构模块:{'、'.join(missing)}"] if missing else [],
859
- "evidence": f"coverage={coverage}",
860
- }
861
-
862
-
863
- def _analyze_cta(fields: Dict[str, Any]) -> Dict[str, Any]:
864
- asr = normalize_text(fields.get("asr_clean") or "")
865
- units = _sentence_units(asr)
866
- cta_tokens = ["评论", "关注", "私信", "收藏", "转发", "点击", "领取", "报名", "下单", "咨询", "试试"]
867
- cta_sentences = [u for u in units if any(token in u for token in cta_tokens)]
868
-
869
- if not units:
870
- return {
871
- "score": 2,
872
- "lines": ["- CTA策略:数据不足。", "- 行动指令:数据不足。"],
873
- "gaps": ["缺少ASR,无法识别CTA"],
874
- "evidence": "分句为空",
875
- }
876
-
877
- if not cta_sentences:
878
- return {
879
- "score": 2,
880
- "lines": ["- CTA策略:未检测到明确行动号召。", "- 行动指令:建议补一句“评论区/私信领取”。"],
881
- "gaps": ["补充单一明确CTA,避免只有信息陈述"],
882
- "evidence": "cta_sentences=0",
883
- }
884
-
885
- primary_cta = cta_sentences[-1]
886
- cta_types = []
887
- if any(k in asr for k in ["评论", "点赞", "收藏", "转发", "关注"]):
888
- cta_types.append("互动型")
889
- if any(k in asr for k in ["私信", "领取", "链接", "资料"]):
890
- cta_types.append("线索型")
891
- if any(k in asr for k in ["下单", "报名", "咨询", "购买"]):
892
- cta_types.append("转化型")
893
-
894
- lines = [
895
- f"- CTA类型:{'、'.join(cta_types) if cta_types else '互动型(弱)'}。",
896
- f"- 关键动作句:{primary_cta}",
897
- f"- CTA密度:{len(cta_sentences)}/{len(units)} 句。",
898
- ]
899
-
900
- return {
901
- "score": _score_from_hits(len(cta_types) + int(len(cta_sentences) > 0), full_score_hits=3),
902
- "lines": lines,
903
- "gaps": [] if len(cta_types) > 0 else ["补充线索型或转化型CTA,提高商业闭环"],
904
- "evidence": f"cta_types={cta_types}, cta_count={len(cta_sentences)}",
905
- }
906
-
907
-
908
- def _build_summary_module(results: Dict[str, Dict[str, Any]]) -> Dict[str, Any]:
909
- ordered = ["选题", "文风", "Hook", "结构", "CTA"]
910
- scored = [(name, results[name]["score"]) for name in ordered]
911
- avg_score = round(sum(score for _, score in scored) / max(1, len(scored)), 2)
912
- weakest = sorted(scored, key=lambda x: x[1])[:2]
913
-
914
- if avg_score >= 4.2:
915
- verdict = "可直接复用"
916
- elif avg_score >= 3.4:
917
- verdict = "可用,但需小幅优化"
918
- else:
919
- verdict = "需重写关键模块后再投放"
920
-
921
- suggestions = []
922
- for name, _ in weakest:
923
- gaps = results[name].get("gaps") or []
924
- if gaps:
925
- suggestions.append(f"- [{name}] {gaps[0]}")
926
- if not suggestions:
927
- suggestions = ["- 保持当前结构,持续做A/B测试验证Hook与CTA。"]
928
-
929
- return {
930
- "score": int(round(avg_score)),
931
- "lines": [
932
- f"- 结论:综合评分 {avg_score}/5,判定为“{verdict}”。",
933
- "- 建议:",
934
- *suggestions[:3],
935
- ],
936
- "gaps": [],
937
- "evidence": f"scores={dict(scored)}",
938
- }
939
-
940
-
941
- def _insight_metric_snapshot(fields: Dict[str, Any]) -> Dict[str, Any]:
942
- digg = _safe_int(fields.get("digg_count"), default=0)
943
- comment = _safe_int(fields.get("comment_count"), default=0)
944
- collect = _safe_int(fields.get("collect_count"), default=0)
945
- share = _safe_int(fields.get("share_count"), default=0)
946
- play = _safe_int(fields.get("play_count"), default=0)
947
-
948
- interaction = digg + comment * 2 + collect * 3 + share * 4
949
- interaction_rate = interaction / play if play > 0 else 0.0
950
- return {
951
- "interaction": interaction,
952
- "interaction_rate": interaction_rate,
953
- "digg": digg,
954
- "comment": comment,
955
- "collect": collect,
956
- "share": share,
957
- }
958
-
959
-
960
- def _build_local_analysis_sections(fields: Dict[str, Any]) -> Dict[str, Any]:
961
- topic = _analyze_topic(fields)
962
- style = _analyze_style(fields)
963
- hook = _analyze_hook(fields)
964
- structure = _analyze_structure(fields)
965
- cta = _analyze_cta(fields)
966
- summary = _build_summary_module(
967
- {
968
- "选题": topic,
969
- "文风": style,
970
- "Hook": hook,
971
- "结构": structure,
972
- "CTA": cta,
973
- }
974
- )
975
- metrics = _insight_metric_snapshot(fields)
976
- insight_lines = list(summary.get("lines") or [])
977
- insight_lines.extend(
978
- [
979
- f"- 互动折算值:{metrics.get('interaction', 0)}。",
980
- f"- 粗略互动率:{metrics.get('interaction_rate', 0.0):.4f}。",
981
- ]
982
- )
983
- return {
984
- "modules": {
985
- "选题": topic.get("lines", ["数据不足"]),
986
- "文风": style.get("lines", ["数据不足"]),
987
- "Hook": hook.get("lines", ["数据不足"]),
988
- "结构": structure.get("lines", ["数据不足"]),
989
- },
990
- "insight": insight_lines or ["数据不足"],
991
- }
992
-
993
-
994
- def build_card_analysis_artifact(
995
- *,
996
- payload: Dict[str, Any],
997
- platform: str,
998
- card_type: str,
999
- ) -> Dict[str, Any]:
1000
- fields = _extract_required_fields(payload, platform=platform)
1001
- precomputed = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1002
- if precomputed:
1003
- analysis_sections = precomputed
1004
- elif card_type == "author":
1005
- analysis_sections = {}
1006
- else:
1007
- analysis_sections = build_analysis_sections(fields)
1008
- fields["analysis_sections"] = analysis_sections
1009
- return {
1010
- "fields": fields,
1011
- "analysis_sections": analysis_sections,
1012
- }
1013
-
1014
-
1015
- def _build_output_path(
1016
- *,
1017
- card_root: str,
1018
- platform: str,
1019
- card_type: str,
1020
- payload: Dict[str, Any],
1021
- now: dt.datetime,
1022
- sample_author: Optional[str],
1023
- storage_config: Optional[Dict[str, Any]],
1024
- extra_route_parts: Optional[List[str]] = None,
1025
- ) -> Dict[str, str]:
1026
- author_slug = _pick_author_slug(payload, author_hint=sample_author)
1027
- title_slug = _pick_title_slug(payload)
1028
-
1029
- path, route_parts = build_card_output_path(
1030
- card_root=card_root,
1031
- platform=platform,
1032
- card_type=card_type,
1033
- author_slug=author_slug,
1034
- title_slug=title_slug,
1035
- year=now.strftime("%Y"),
1036
- year_month=now.strftime("%Y-%m"),
1037
- timestamp=now.strftime("%Y%m%d-%H%M%S"),
1038
- storage_config=storage_config,
1039
- extra_route_parts=extra_route_parts,
1040
- )
1041
- return {
1042
- "path": path,
1043
- "route_parts": route_parts,
1044
- "author_slug": author_slug,
1045
- "title_slug": title_slug,
1046
- "target_type": card_type,
1047
- }
1048
-
1049
-
1050
- def _json_details_block(title: str, payload: Any) -> List[str]:
1051
- return [
1052
- "<details>",
1053
- f"<summary>{title}</summary>",
1054
- "",
1055
- "```json",
1056
- json.dumps(payload, ensure_ascii=False, indent=2),
1057
- "```",
1058
- "",
1059
- "</details>",
1060
- ]
1061
-
1062
-
1063
- def _display_list(values: Any, *, fallback: str = "数据不足") -> str:
1064
- items = _safe_text_list(values)
1065
- return "、".join(items) if items else fallback
1066
-
1067
-
1068
- def _display_scalar(value: Any, *, fallback: str = "数据不足") -> str:
1069
- text = normalize_text(value)
1070
- return text or fallback
1071
-
1072
-
1073
- def _render_author_card_markdown(
1074
- *,
1075
- card_id: str,
1076
- card_type: str,
1077
- fields: Dict[str, Any],
1078
- generated_at: str,
1079
- ) -> str:
1080
- analysis_output = fields.get("analysis_output") if isinstance(fields.get("analysis_output"), dict) else {}
1081
- author_analysis_v2 = fields.get("author_analysis_v2") if isinstance(fields.get("author_analysis_v2"), dict) else analysis_output.get("author_analysis_v2", {})
1082
- author_analysis_v2 = author_analysis_v2 if isinstance(author_analysis_v2, dict) else {}
1083
- validation = fields.get("validation") if isinstance(fields.get("validation"), dict) else analysis_output.get("validation", {})
1084
- validation = validation if isinstance(validation, dict) else {}
1085
- stage_status = fields.get("stage_status") if isinstance(fields.get("stage_status"), dict) else {}
1086
- stage_status = stage_status if isinstance(stage_status, dict) else {}
1087
- sampled_work_explanations = fields.get("sampled_work_explanations") if isinstance(fields.get("sampled_work_explanations"), dict) else analysis_output.get("sampled_work_explanations", {})
1088
- sampled_work_explanations = sampled_work_explanations if isinstance(sampled_work_explanations, dict) else {}
1089
- quality_tier = _display_scalar(fields.get("quality_tier"), fallback="unknown")
1090
-
1091
- positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
1092
- trust_model = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
1093
- content_mechanism = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
1094
- commercial_bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
1095
- core_tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
1096
- clone_guidance = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
1097
- evidence_pack = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
1098
-
1099
- sampled_work_ids = _safe_text_list(fields.get("sampled_work_ids"))
1100
- representative_works = _safe_text_list(evidence_pack.get("representative_works")) or sampled_work_ids[:8]
1101
-
1102
- fm = {
1103
- "card_id": card_id,
1104
- "card_type": card_type,
1105
- "card_role": fields.get("card_role") or AUTHOR_CARD_ROLE,
1106
- "platform": fields.get("platform"),
1107
- "generated_at": generated_at,
1108
- "updated_at": generated_at,
1109
- "title": fields.get("title"),
1110
- "platform_author_id": fields.get("platform_author_id"),
1111
- "author_handle": fields.get("author_handle"),
1112
- "nickname": fields.get("nickname"),
1113
- "quality_tier": quality_tier,
1114
- "request_id": fields.get("request_id"),
1115
- }
1116
-
1117
- frontmatter = ["---"]
1118
- for key, value in fm.items():
1119
- frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1120
- frontmatter.append("---")
1121
-
1122
- lines = [
1123
- *frontmatter,
1124
- "",
1125
- "## 基础主页事实",
1126
- f"- 平台:{_display_scalar(fields.get('platform'), fallback='未知')}",
1127
- f"- 作者ID:{_display_scalar(fields.get('platform_author_id'), fallback='未知')}",
1128
- f"- 账号标识:{_display_scalar(fields.get('author_handle'), fallback='N/A')}",
1129
- f"- 昵称:{_display_scalar(fields.get('nickname') or fields.get('author'), fallback='未知')}",
1130
- f"- IP属地:{_display_scalar(fields.get('ip_location'), fallback='N/A')}",
1131
- f"- 签名:{_display_scalar(fields.get('signature'), fallback='N/A')}",
1132
- f"- 粉丝数:{_display_metric(fields.get('fans_count'))}",
1133
- f"- 累计获赞:{_display_metric(fields.get('liked_count'))}",
1134
- f"- 累计收藏:{_display_metric(fields.get('collected_count'))}",
1135
- f"- 作品数:{_display_metric(fields.get('works_count'))}",
1136
- f"- 质量档:{quality_tier}",
1137
- "",
1138
- "## 作者定位",
1139
- _display_scalar(positioning.get("one_liner") or fields.get("summary")),
1140
- f"- 作者类型:{_display_scalar(positioning.get('author_type'))}",
1141
- f"- 主要角色:{_display_scalar(positioning.get('primary_role'))}",
1142
- f"- 目标受众:{_display_scalar(positioning.get('target_audience'))}",
1143
- f"- 核心问题:{_display_scalar(positioning.get('core_problem_solved'))}",
1144
- f"- 核心价值:{_display_scalar(positioning.get('core_value_proposition'))}",
1145
- "",
1146
- "## 信任模型",
1147
- f"- 主要信任源:{_display_scalar(trust_model.get('primary_trust_source'))}",
1148
- f"- 次级信任源:{_display_list(trust_model.get('secondary_trust_sources'))}",
1149
- f"- 建立机制:{_display_list(trust_model.get('trust_building_mechanisms'))}",
1150
- f"- 风险:{_display_list(trust_model.get('trust_risks'))}",
1151
- "",
1152
- "## 内容机制",
1153
- f"- 世界观:{_display_scalar((author_analysis_v2.get('cognitive_engine') or {}).get('worldview'))}",
1154
- f"- 推理模式:{_display_list((author_analysis_v2.get('cognitive_engine') or {}).get('reasoning_modes'))}",
1155
- f"- 内容来源:{_display_list(content_mechanism.get('topic_sources'))}",
1156
- f"- 内容目标:{_display_list(content_mechanism.get('topic_goals'))}",
1157
- f"- 优势结构:{_display_list(content_mechanism.get('winning_content_structures'))}",
1158
- f"- 流量驱动:{_display_list(content_mechanism.get('traffic_drivers'))}",
1159
- f"- 主导主题:{_display_list(content_mechanism.get('dominant_themes'))}",
1160
- "",
1161
- "## 商业桥",
1162
- f"- 漏斗角色:{_display_list(commercial_bridge.get('content_role_in_funnel'))}",
1163
- f"- 可能产品:{_display_list(commercial_bridge.get('likely_products'), fallback='证据不足')}",
1164
- f"- 转化路径:{_display_scalar(commercial_bridge.get('conversion_path'), fallback='证据不足')}",
1165
- f"- 商业信号:{_display_list(commercial_bridge.get('business_model_signals'), fallback='证据不足')}",
1166
- "",
1167
- "## 核心张力",
1168
- f"- 最重要张力:{_display_scalar(core_tensions.get('most_important_tension'))}",
1169
- f"- 张力列表:{_display_list(core_tensions.get('tensions'))}",
1170
- "",
1171
- "## 建议动作",
1172
- f"- 可复制要素:{_display_list(clone_guidance.get('copyable_elements'))}",
1173
- f"- 不可复制要素:{_display_list(clone_guidance.get('non_copyable_elements'))}",
1174
- f"- 风险区:{_display_list(clone_guidance.get('danger_zones'))}",
1175
- f"- 只学一件事:{_display_scalar(clone_guidance.get('if_only_learn_one_thing'))}",
1176
- "",
1177
- "## 代表样本",
1178
- ]
1179
-
1180
- if representative_works:
1181
- lines.extend([f"- {item}" for item in representative_works[:8]])
1182
- else:
1183
- lines.append("- 数据不足")
1184
-
1185
- lines.extend(
1186
- [
1187
- "",
1188
- "## 附录",
1189
- f"- confidence: {fields.get('confidence')}",
1190
- f"- error_reason: {fields.get('error_reason') or 'N/A'}",
1191
- ]
1192
- )
1193
-
1194
- if quality_tier == "fallback":
1195
- lines.append("- note: 当前作者分析使用 fallback 结果,请优先复核正文结论。")
1196
- elif quality_tier == "degraded_author_only":
1197
- lines.append("- note: 批量解释缺失,作者分析基于聚合统计与样本事实完成。")
1198
-
1199
- lines.extend([""] + _json_details_block("author_analysis_v2", author_analysis_v2))
1200
- lines.extend([""] + _json_details_block("sampled_work_explanations", sampled_work_explanations))
1201
- lines.extend([""] + _json_details_block("validation", validation))
1202
- lines.extend([""] + _json_details_block("stage_status", stage_status))
1203
- lines.extend([""] + _json_details_block("extract_trace", fields.get("extract_trace", [])))
1204
- return "\n".join(lines)
1205
-
1206
-
1207
- def _render_author_sample_markdown(
1208
- *,
1209
- card_id: str,
1210
- card_type: str,
1211
- fields: Dict[str, Any],
1212
- generated_at: str,
1213
- ) -> str:
1214
- card_role = normalize_text(fields.get("card_role")) or AUTHOR_SAMPLE_CARD_ROLE
1215
- sampled_explanation = fields.get("sampled_explanation") if isinstance(fields.get("sampled_explanation"), dict) else {}
1216
- sampled_explanation = sampled_explanation if isinstance(sampled_explanation, dict) else {}
1217
-
1218
- fm = {
1219
- "card_id": card_id,
1220
- "card_type": card_type,
1221
- "card_role": card_role,
1222
- "platform": fields.get("platform"),
1223
- "generated_at": generated_at,
1224
- "updated_at": generated_at,
1225
- "title": fields.get("title"),
1226
- "platform_work_id": fields.get("platform_work_id"),
1227
- "author": fields.get("author"),
1228
- "author_handle": fields.get("author_handle"),
1229
- "platform_author_id": fields.get("platform_author_id"),
1230
- "share_url": fields.get("share_url"),
1231
- "source_url": fields.get("source_url"),
1232
- }
1233
-
1234
- frontmatter = ["---"]
1235
- for key, value in fm.items():
1236
- frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1237
- frontmatter.append("---")
1238
-
1239
- metrics_line = (
1240
- f"赞 {_display_metric(fields.get('digg_count'))} / 评 {_display_metric(fields.get('comment_count'))} / "
1241
- f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
1242
- )
1243
-
1244
- lines = [
1245
- *frontmatter,
1246
- "",
1247
- "## 基础信息",
1248
- f"- 作者:{_display_scalar(fields.get('author') or fields.get('author_handle') or fields.get('platform_author_id'), fallback='未知作者')}",
1249
- f"- 标题:{_display_scalar(fields.get('title'), fallback='(标题缺失)')}",
1250
- f"- 原始文案:{_display_scalar(fields.get('caption_raw'), fallback='N/A')}",
1251
- f"- 作品模态:{_display_scalar(fields.get('work_modality'), fallback='未知')}",
1252
- f"- 发布时间:{_display_scalar(fields.get('published_date'), fallback='N/A')}",
1253
- f"- 时长:{_format_duration(_safe_int(fields.get('duration_ms'), default=0)) if _safe_int(fields.get('duration_ms'), default=0) > 0 else 'N/A'}",
1254
- f"- 互动:{metrics_line}",
1255
- f"- 标签:{_display_list(fields.get('tags'), fallback='无')}",
1256
- f"- 链接:{_display_scalar(fields.get('share_url'), fallback='(未提供)')}",
1257
- "",
1258
- "## 表现与结构",
1259
- f"- performance_score:{fields.get('performance_score') if fields.get('performance_score') is not None else 'N/A'}",
1260
- f"- performance_score_norm:{fields.get('performance_score_norm') if fields.get('performance_score_norm') is not None else 'N/A'}",
1261
- f"- bucket:{_display_scalar(fields.get('bucket'), fallback='unknown')}",
1262
- f"- hook_type:{_display_scalar(fields.get('hook_type'), fallback='unknown')}",
1263
- f"- structure_type:{_display_scalar(fields.get('structure_type'), fallback='unknown')}",
1264
- f"- cta_type:{_display_scalar(fields.get('cta_type'), fallback='unknown')}",
1265
- f"- content_form:{_display_scalar(fields.get('content_form'), fallback='unknown')}",
1266
- f"- style_markers:{_display_list(fields.get('style_markers'), fallback='未命中显著标记')}",
1267
- ]
1268
-
1269
- precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1270
- modules = precomputed_sections.get("modules") if isinstance(precomputed_sections.get("modules"), dict) else {}
1271
- for heading in DEFAULT_MODULE_SECTIONS:
1272
- lines.append("")
1273
- lines.append(f"## {heading}")
1274
- for item in modules.get(heading, ["数据不足"]):
1275
- lines.append(_display_scalar(item))
1276
-
1277
- if card_role == SAMPLE_WORK_CARD_ROLE:
1278
- lines.extend(
1279
- [
1280
- "",
1281
- "## 批量解释",
1282
- f"- why_it_worked_or_failed:{_display_scalar(sampled_explanation.get('why_it_worked_or_failed'), fallback='批量解释未生成')}",
1283
- f"- copyable_elements:{_display_list(sampled_explanation.get('copyable_elements'), fallback='批量解释未生成')}",
1284
- f"- non_copyable_elements:{_display_list(sampled_explanation.get('non_copyable_elements'), fallback='批量解释未生成')}",
1285
- f"- emotional_triggers:{_display_list(sampled_explanation.get('emotional_triggers'), fallback='批量解释未生成')}",
1286
- f"- cognitive_gap:{_display_scalar(sampled_explanation.get('cognitive_gap'), fallback='批量解释未生成')}",
1287
- f"- commercial_signal:{_display_scalar(sampled_explanation.get('commercial_signal'), fallback='批量解释未生成')}",
1288
- ]
1289
- )
1290
-
1291
- lines.extend(
1292
- [
1293
- "",
1294
- "## 主文本",
1295
- _display_scalar(fields.get("primary_text"), fallback="(无可用主文本)"),
1296
- "",
1297
- "## 附录",
1298
- f"- analysis_eligibility: {_display_scalar(fields.get('analysis_eligibility'), fallback='unknown')}",
1299
- f"- analysis_exclusion_reason: {_display_scalar(fields.get('analysis_exclusion_reason'), fallback='N/A')}",
1300
- f"- request_id: {_display_scalar(fields.get('request_id'), fallback='N/A')}",
1301
- f"- confidence: {_display_scalar(fields.get('confidence'), fallback='low')}",
1302
- f"- error_reason: {_display_scalar(fields.get('error_reason'), fallback='N/A')}",
1303
- "",
1304
- "### ASR_RAW",
1305
- _display_scalar(fields.get("raw_content"), fallback="(无可用 ASR 原文)"),
1306
- "",
1307
- ]
1308
- )
1309
- lines.extend(_json_details_block("extract_trace", fields.get("extract_trace", [])))
1310
- return "\n".join(lines)
1311
-
1312
-
1313
- def _render_author_markdown(
1314
- *,
1315
- card_id: str,
1316
- card_type: str,
1317
- fields: Dict[str, Any],
1318
- generated_at: str,
1319
- ) -> str:
1320
- return _render_author_card_markdown(
1321
- card_id=card_id,
1322
- card_type=card_type,
1323
- fields=fields,
1324
- generated_at=generated_at,
1325
- )
1326
-
1327
-
1328
- def _render_markdown(
1329
- *,
1330
- card_id: str,
1331
- card_type: str,
1332
- fields: Dict[str, Any],
1333
- generated_at: str,
1334
- ) -> str:
1335
- if card_type == "author":
1336
- return _render_author_markdown(
1337
- card_id=card_id,
1338
- card_type=card_type,
1339
- fields=fields,
1340
- generated_at=generated_at,
1341
- )
1342
- if card_type == "author_sample_work":
1343
- return _render_author_sample_markdown(
1344
- card_id=card_id,
1345
- card_type=card_type,
1346
- fields=fields,
1347
- generated_at=generated_at,
1348
- )
1349
-
1350
- author_name = fields.get("author") or fields.get("author_handle") or fields.get("platform_author_id") or "未知作者"
1351
- title = fields.get("title") or "(标题缺失)"
1352
- metrics_line = (
1353
- f"赞 {_display_metric(fields.get('digg_count'))} / 评 {_display_metric(fields.get('comment_count'))} / "
1354
- f"藏 {_display_metric(fields.get('collect_count'))} / 转 {_display_metric(fields.get('share_count'))} / 播 {_display_metric(fields.get('play_count'))}"
1355
- )
1356
- precomputed_sections = fields.get("analysis_sections") if isinstance(fields.get("analysis_sections"), dict) else {}
1357
- analysis_sections = precomputed_sections or build_analysis_sections(fields)
1358
- creative_modules = analysis_sections.get("modules", {})
1359
- insight_lines = analysis_sections.get("insight", ["数据不足"])
1360
- extract_trace_json = json.dumps(fields.get("extract_trace", []), ensure_ascii=False, indent=2)
1361
-
1362
- fm = {
1363
- "card_id": card_id,
1364
- "card_type": card_type,
1365
- "platform": fields.get("platform"),
1366
- "generated_at": generated_at,
1367
- "updated_at": generated_at,
1368
- "title": fields.get("title"),
1369
- "platform_work_id": fields.get("platform_work_id"),
1370
- "author": fields.get("author"),
1371
- "author_handle": fields.get("author_handle"),
1372
- "platform_author_id": fields.get("platform_author_id"),
1373
- }
1374
-
1375
- frontmatter = ["---"]
1376
- for key, value in fm.items():
1377
- frontmatter.append(f"{key}: {json.dumps(value, ensure_ascii=False)}")
1378
- frontmatter.append("---")
1379
-
1380
- lines = [
1381
- *frontmatter,
1382
- "",
1383
- "## 基础信息",
1384
- f"- 作者:{author_name}",
1385
- f"- 标题:{title}",
1386
- f"- 原始文案:{fields.get('caption_raw') or 'N/A'}",
1387
- f"- 作品模态:{fields.get('work_modality') or '未知'}",
1388
- f"- 发布时间:{fields.get('published_date') or 'N/A'}",
1389
- f"- 互动:{metrics_line}",
1390
- f"- 链接:{fields.get('share_url') or '(未提供)'}",
1391
- ]
1392
-
1393
- for heading in DEFAULT_MODULE_SECTIONS:
1394
- lines.append("")
1395
- lines.append(f"## {heading}")
1396
- for item in creative_modules.get(heading, ["数据不足"]):
1397
- lines.append(item)
1398
-
1399
- lines.append("")
1400
- lines.append("## 洞察分析")
1401
- for item in insight_lines:
1402
- lines.append(item)
1403
-
1404
- lines.extend(
1405
- [
1406
- "",
1407
- "## 主文本",
1408
- fields.get("primary_text") or "(无可用主文本)",
1409
- "",
1410
- "## 附录",
1411
- "### ASR_RAW",
1412
- fields.get("raw_content") or "(无可用 ASR 原文)",
1413
- "",
1414
- "### trace",
1415
- f"- request_id: {fields.get('request_id')}",
1416
- f"- confidence: {fields.get('confidence')}",
1417
- f"- error_reason: {fields.get('error_reason')}",
1418
- "",
1419
- "<details>",
1420
- "<summary>extract_trace(点击展开)</summary>",
1421
- "",
1422
- "```json",
1423
- extract_trace_json,
1424
- "```",
1425
- "",
1426
- "</details>",
1427
- "",
1428
- ]
1429
- )
1430
- return "\n".join(lines)
1431
-
1432
-
1433
- def _write_file(path: str, content: str) -> None:
1434
- with open(path, "w", encoding="utf-8") as handle:
1435
- handle.write(content)
1436
-
1437
-
1438
- def _resolve_card_root(card_root: Optional[str]) -> str:
1439
- raw = (card_root or "").strip()
1440
- if not raw:
1441
- return resolve_default_card_root()
1442
-
1443
- candidate = Path(raw).expanduser()
1444
- if not candidate.is_absolute():
1445
- raise ValueError("card_root must be an absolute path")
1446
- return str(candidate.resolve())
1447
-
1448
-
1449
- def write_benchmark_card(
1450
- *,
1451
- payload: Dict[str, Any],
1452
- platform: str,
1453
- card_type: str,
1454
- card_root: Optional[str],
1455
- sample_author: Optional[str] = None,
1456
- content_kind: Optional[str] = None,
1457
- storage_config: Optional[Dict[str, Any]] = None,
1458
- force_card_type: bool = False,
1459
- route_card_type: Optional[str] = None,
1460
- route_extra_parts: Optional[List[str]] = None,
1461
- card_role: Optional[str] = None,
1462
- ) -> Dict[str, Any]:
1463
- now = dt.datetime.now()
1464
- generated_at = now.isoformat(timespec="seconds")
1465
-
1466
- payload_content_kind = normalize_text(payload.get("content_kind"))
1467
- resolved_content_kind = normalize_text(content_kind) or payload_content_kind
1468
-
1469
- normalized_card_type = normalize_card_type(card_type)
1470
- effective_card_type = resolve_effective_card_type(
1471
- card_type=normalized_card_type,
1472
- content_kind=resolved_content_kind,
1473
- storage_config=storage_config,
1474
- force_card_type=force_card_type,
1475
- )
1476
- effective_route_card_type = normalize_card_type(route_card_type) if route_card_type else effective_card_type
1477
- effective_card_role = normalize_text(card_role) or normalize_text(payload.get("card_role"))
1478
- explicit_route_override = bool(route_card_type or route_extra_parts)
1479
-
1480
- payload_for_fields = dict(payload)
1481
- if effective_card_role:
1482
- payload_for_fields["card_role"] = effective_card_role
1483
- fields = _extract_required_fields(payload_for_fields, platform=platform)
1484
- if not fields.get("card_role"):
1485
- if effective_card_type == "author":
1486
- fields["card_role"] = AUTHOR_CARD_ROLE
1487
- elif effective_card_type == "author_sample_work":
1488
- fields["card_role"] = AUTHOR_SAMPLE_CARD_ROLE
1489
- resolved_card_root = _resolve_card_root(card_root)
1490
-
1491
- primary_target = _build_output_path(
1492
- card_root=resolved_card_root,
1493
- platform=platform,
1494
- card_type=effective_route_card_type,
1495
- payload=payload,
1496
- now=now,
1497
- sample_author=sample_author,
1498
- storage_config=storage_config,
1499
- extra_route_parts=route_extra_parts,
1500
- )
1501
- primary_path = primary_target["path"]
1502
-
1503
- primary_card_id = os.path.basename(primary_path).replace(".md", "")
1504
- primary_markdown = _render_markdown(
1505
- card_id=primary_card_id,
1506
- card_type=effective_card_type,
1507
- fields=fields,
1508
- generated_at=generated_at,
1509
- )
1510
- _write_file(primary_path, primary_markdown)
1511
-
1512
- return {
1513
- "ok": True,
1514
- "platform": platform,
1515
- "card_type": effective_card_type,
1516
- "card_role": fields.get("card_role"),
1517
- "requested_card_type": normalized_card_type,
1518
- "force_card_type": bool(force_card_type),
1519
- "content_kind": resolved_content_kind or None,
1520
- "primary_card_path": primary_path,
1521
- "routing": {
1522
- "card_role": fields.get("card_role"),
1523
- "route_key": effective_route_card_type,
1524
- "primary_route_parts": primary_target["route_parts"],
1525
- "explicit_override": explicit_route_override,
1526
- "storage_routes_configured": bool(isinstance(storage_config, dict) and isinstance(storage_config.get("storage_routes"), dict)),
1527
- },
1528
- "required_fields": fields,
1529
- }
1530
-
1531
-
1532
- def _read_payload_from_input(input_json: str) -> Dict[str, Any]:
1533
- if input_json == "-":
1534
- raw = os.read(0, 1024 * 1024).decode("utf-8", errors="replace").strip()
1535
- if not raw:
1536
- return {}
1537
- return json.loads(raw)
1538
- return read_json_file(input_json)
1539
-
1540
-
1541
- def main() -> None:
1542
- parser = argparse.ArgumentParser(description="Write benchmark card markdown to card root")
1543
- parser.add_argument("--platform", required=True, help="Platform name, e.g. douyin or xiaohongshu")
1544
- parser.add_argument("--card-type", choices=CARD_TYPES, default="work", help="Primary card type")
1545
- parser.add_argument("--config", default=None, help="Runtime config YAML path")
1546
- parser.add_argument("--env-file", default=None, help="Shared env file path; defaults to <skills_root>/.env")
1547
- parser.add_argument("--allow-process-env", action="store_true", help="Allow process env to override .env/.env.local")
1548
- parser.add_argument("--sample-author", default=None, help="Optional author slug override for author_sample_work")
1549
- parser.add_argument("--content-kind", default=None, help="Optional workflow kind, e.g. single_video/author_home/author_analysis")
1550
- parser.add_argument("--force-card-type", action="store_true", help="Force manual --card-type to override content_kind mapping")
1551
- parser.add_argument("--card-root", default=None, help="Card root path (absolute); falls back to TIKOMNI_CARD_ROOT when omitted")
1552
- parser.add_argument(
1553
- "--input-json",
1554
- default="-",
1555
- help="Input JSON path or '-' to read from stdin",
1556
- )
1557
- args = parser.parse_args()
1558
-
1559
- config, _ = load_tikomni_config(
1560
- args.config,
1561
- env_file=args.env_file,
1562
- allow_process_env=args.allow_process_env,
1563
- )
1564
- payload = _read_payload_from_input(args.input_json)
1565
- result = write_benchmark_card(
1566
- payload=payload,
1567
- platform=args.platform,
1568
- card_type=args.card_type,
1569
- card_root=args.card_root,
1570
- sample_author=args.sample_author,
1571
- content_kind=args.content_kind,
1572
- storage_config=config,
1573
- force_card_type=args.force_card_type,
1574
- )
1575
- write_json_stdout(result)
1576
-
1577
-
1578
- if __name__ == "__main__":
1579
- main()