@tikomni/skills 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -3
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
  30. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
  33. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  34. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  36. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  37. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  38. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  39. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  40. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
  41. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  42. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
  43. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  45. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  46. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  47. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  48. package/skills/creator-analysis/SKILL.md +0 -95
  49. package/skills/creator-analysis/agents/openai.yaml +0 -4
  50. package/skills/creator-analysis/env.example +0 -36
  51. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  52. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  53. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  54. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  55. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  56. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  57. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  59. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  60. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  61. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  63. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  64. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  65. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  66. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  67. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  68. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  69. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  71. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  72. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  73. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  74. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  75. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  76. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  77. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  78. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  79. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  80. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  81. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  82. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  83. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  84. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  85. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  86. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  87. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  88. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  89. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  90. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  91. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  92. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  93. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  94. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  96. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  100. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  101. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  103. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  104. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  105. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  106. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  107. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  108. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  109. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  110. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  113. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  114. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  116. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  117. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  118. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  119. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  120. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  121. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  122. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  123. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  124. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  125. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  126. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  127. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  128. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  129. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  130. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  131. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  132. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  133. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  134. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  135. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  136. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  137. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  138. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  139. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  140. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  141. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  142. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  143. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  144. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  145. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  146. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  147. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  148. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  149. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  153. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  154. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  155. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  156. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  157. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  158. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  159. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  160. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  161. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  162. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  163. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  164. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  165. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  167. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  168. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  169. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  170. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  171. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  172. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  173. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  174. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  175. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  176. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  177. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  178. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  179. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  180. package/skills/creator-analysis/references/workflow.md +0 -23
  181. package/skills/creator-analysis/scripts/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  185. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  186. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  187. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  188. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  189. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  190. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  191. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  194. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  195. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  196. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  197. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  198. package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
  199. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  200. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  202. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  203. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  205. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  207. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  208. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  209. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  210. package/skills/meta-capability/SKILL.md +0 -69
  211. package/skills/meta-capability/agents/openai.yaml +0 -4
  212. package/skills/meta-capability/env.example +0 -42
  213. package/skills/meta-capability/references/api-capability-index.md +0 -92
  214. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  215. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  216. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  217. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  218. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  219. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  220. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  222. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  223. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  224. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  226. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  227. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  228. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  229. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  230. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  231. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  232. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  233. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  234. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  235. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  236. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  237. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  238. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  239. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  240. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  241. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  242. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  243. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  244. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  245. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  246. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  247. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  248. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  249. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  250. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  251. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  252. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  253. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  254. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  255. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  256. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  257. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  258. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  259. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  263. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  264. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  265. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  266. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  267. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  268. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  269. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  270. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  271. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  272. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  273. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  274. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  276. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  277. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  279. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  280. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  281. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  282. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  283. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  284. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  285. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  286. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  287. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  288. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  289. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  290. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  291. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  292. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  293. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  294. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  295. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  296. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  297. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  298. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  299. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  300. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  301. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  302. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  303. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  304. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  305. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  306. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  307. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  308. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  309. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  310. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  311. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  312. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  316. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  317. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  318. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  319. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  320. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  321. package/skills/meta-capability/references/dispatch.md +0 -27
  322. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  323. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  324. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  325. package/skills/meta-capability/scripts/__init__.py +0 -1
  326. package/skills/meta-capability/scripts/call_route.py +0 -141
  327. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  328. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  329. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  330. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  331. package/skills/meta-capability/scripts/test_auth.py +0 -98
  332. package/skills/single-work-analysis/SKILL.md +0 -62
  333. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  334. package/skills/single-work-analysis/env.example +0 -36
  335. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  336. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  337. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  339. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  340. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  341. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  342. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  344. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  345. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  346. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  348. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  349. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  350. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  353. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  354. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  356. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  357. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  358. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  359. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  360. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  361. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  362. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  363. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  364. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  366. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  373. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  375. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  376. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  377. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  378. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  379. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  381. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  385. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  386. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  388. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  389. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  390. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  391. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  392. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  393. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  394. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  395. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  398. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  399. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  401. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  402. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  403. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  404. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  405. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  406. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  407. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  409. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  410. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  411. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  412. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  413. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  414. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  415. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  416. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  417. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  418. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  419. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  420. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  421. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  422. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  423. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  424. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  425. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  426. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  427. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  428. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  429. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  430. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  431. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  432. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  434. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  438. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  439. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  441. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  442. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  443. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
  444. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  445. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  446. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  447. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  448. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  449. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  450. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  451. package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
  452. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  453. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  454. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  455. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
  456. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  457. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  458. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  459. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  460. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
  461. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  462. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  463. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
  464. package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
  465. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  466. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  468. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  469. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  470. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  471. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  472. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  473. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  474. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  475. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
  476. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  477. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  478. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  479. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  480. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1165 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Support helpers for author_home v2 standardized input / aggregate stats / validation."""
3
-
4
- from __future__ import annotations
5
-
6
- import json
7
- import math
8
- import re
9
- from collections import Counter
10
- from datetime import datetime, timezone
11
- from pathlib import Path
12
- from typing import Any, Dict, List, Optional, Sequence, Tuple
13
-
14
- import jsonschema
15
-
16
- SKILL_ROOT = Path(__file__).resolve().parents[3]
17
- INPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-input-v1.schema.json"
18
- OUTPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-v2.schema.json"
19
- PROMPT_CONTRACT_PATH = SKILL_ROOT / "references" / "prompt-contracts" / "author-analysis-v2.md"
20
-
21
- LOW_HIGH_MID = {"low", "mid", "high"}
22
- RELATIONSHIP_DISTANCE = {"near", "mid", "far"}
23
- AUTHOR_TYPES = {
24
- "ai_content_growth", "benchmark_deconstruction", "workflow_automation", "tool_education", "business_cognition",
25
- "ip_growth", "industry_commentary", "case_breakdown", "efficiency_hacking", "creator_education",
26
- }
27
- PRIMARY_ROLES = {"coach", "operator", "researcher", "builder", "companion", "commentator", "teacher", "practitioner", "evangelist", "critic"}
28
- TRUST_SOURCES = {"results", "experience", "case_studies", "systematized_method", "demonstration", "strong_judgment", "consistency", "authority_signal", "community_signal"}
29
- PROBLEM_DEFINITION_STYLES = {"cognition_problem", "execution_problem", "model_problem", "stage_mismatch", "positioning_problem", "traffic_problem", "conversion_problem", "offer_problem", "capability_problem"}
30
- REASONING_MODES = {"benchmark_reasoning", "concept_deconstruction", "contrast_reasoning", "case_induction", "result_backtracking", "anti_common_sense", "framework_building", "workflow_packaging", "data_validation"}
31
- HOOK_TYPES_ENUM = {"result_hook", "curiosity_hook", "shortcut_hook", "pain_point_hook", "comparison_hook", "proof_hook"}
32
- STRUCTURE_TYPES = {"hook_demo_result", "benchmark_then_clone", "problem_solution_cta", "proof_then_pitch"}
33
- CTA_TYPES = {"comment_cta", "private_message_cta", "follow_cta", "collect_cta", "share_cta", "lead_magnet_cta", "weak_cta", "no_cta"}
34
- CONTENT_FORMS = {"talking_head", "voiceover", "screen_recording", "slideshow", "mixed_edit", "live_clip", "interview_clip"}
35
- STYLE_MARKERS_ENUM = {"rhetorical_question", "imperative_tone", "strong_assertion", "self_mockery", "quote_like_phrase", "emotional_wording"}
36
- FUNNEL_ROLES = {"acquire_attention", "build_trust", "educate", "qualify", "convert", "upsell", "retain", "occupy_mindshare"}
37
- LIKELY_PRODUCTS = {"workflow_templates", "prompt_pack", "training_camp", "community", "consulting", "done_for_you_service", "course", "membership", "software_tool", "report"}
38
- BUSINESS_MODEL_SIGNALS = {"high_ip_dependency", "strong_toolization", "template_scalability", "service_extension", "low_marginal_distribution", "high_touch_delivery", "community_leverage", "content_led_growth"}
39
- CORE_COGNITIVE_ACTIONS = {"benchmark_deconstruction", "workflow_packaging", "concept_deconstruction", "contrast_reasoning", "case_induction", "framework_extraction", "result_compression", "anti_common_sense_reframing"}
40
- TOPIC_SOURCES = {"benchmark_authors", "viral_cases", "audience_questions", "workflow_demos", "industry_pain_points", "personal_experience", "tool_updates", "client_cases", "market_signals"}
41
- TOPIC_GOALS = {"acquire_attention", "build_trust", "show_capability", "drive_conversion", "occupy_mindshare", "differentiate_positioning", "nurture_audience"}
42
- WINNING_CONTENT_STRUCTURES = set(STRUCTURE_TYPES)
43
- MEMORABILITY_SOURCES = {"strong_keywords", "sharp_judgment", "identity_projection", "repeatable_phrases", "result_imagery", "emotional_contrast", "unexpected_framing"}
44
- EMOTION_PATTERNS = {"calm_assertive", "sharp_direct", "playful_mocking", "high_energy_excitement", "pragmatic_cold", "empathetic_supportive", "provocative_challenge"}
45
- TRAFFIC_DRIVERS = {"benchmark_target", "result_promise", "shortcut_imagination", "curiosity_gap", "identity_desire", "fear_of_missing_out", "proof_signal", "controversy_edge"}
46
- STOPWORDS = {
47
- "我们", "你们", "他们", "这个", "那个", "一个", "一种", "已经", "如果", "因为", "所以", "然后", "就是", "没有", "自己",
48
- "可以", "还是", "需要", "以及", "并且", "真的", "今天", "现在", "内容", "作者", "账号", "作品", "视频", "老师", "方法",
49
- "the", "and", "for", "that", "with", "from", "this", "you", "your", "are", "was", "were", "have", "has", "had", "into",
50
- }
51
- SCHEMA_CACHE: Dict[Path, Dict[str, Any]] = {}
52
-
53
-
54
- class AnalysisResourceError(RuntimeError):
55
- def __init__(self, *, code: str, path: Path, detail: str = "") -> None:
56
- self.code = code
57
- self.path = path
58
- self.detail = detail
59
- message = f"{code}:{path}"
60
- if detail:
61
- message = f"{message}:{detail}"
62
- super().__init__(message)
63
-
64
- REQUIRED_V2_FIELDS = {
65
- "author_positioning": ["one_liner", "author_type", "primary_role", "target_audience", "core_problem_solved", "core_value_proposition", "evidence"],
66
- "trust_model": ["primary_trust_source", "secondary_trust_sources", "trust_building_mechanisms", "trust_risks", "relationship_posture", "evidence"],
67
- "cognitive_engine": ["worldview", "value_priority", "problem_definition_style", "reasoning_modes", "knowledge_sources", "judgment_style", "core_cognitive_actions", "evidence"],
68
- "expression_hooks": ["language_style", "hook_keywords", "hook_types", "argument_patterns", "emotion_patterns", "memorability_sources", "evidence"],
69
- "content_mechanism": ["topic_sources", "topic_goals", "winning_content_structures", "repeatable_series", "traffic_drivers", "content_flywheel", "cross_platform_variation", "dominant_themes", "theme_clusters", "evidence"],
70
- "commercial_bridge": ["content_role_in_funnel", "likely_products", "conversion_path", "content_product_fit", "business_model_signals", "commercial_risks", "evidence"],
71
- "core_tensions": ["tensions", "most_important_tension", "evidence"],
72
- "evidence_pack": ["sample_size", "sample_confidence", "representative_works", "top_keywords", "observed_hooks", "observed_ctas", "observed_structures", "notes"],
73
- "clone_guidance": ["copyable_elements", "non_copyable_elements", "borrowable_flavor", "danger_zones", "if_only_learn_one_thing"],
74
- }
75
-
76
-
77
- def _safe_text(value: Any) -> str:
78
- if value is None:
79
- return ""
80
- if isinstance(value, str):
81
- return value.strip()
82
- return str(value).strip()
83
-
84
-
85
- def _safe_int(value: Any, default: int = 0) -> int:
86
- try:
87
- if value is None:
88
- return default
89
- if isinstance(value, bool):
90
- return int(value)
91
- if isinstance(value, (int, float)):
92
- return int(value)
93
- text = _safe_text(value).replace(",", "")
94
- return int(float(text)) if text else default
95
- except Exception:
96
- return default
97
-
98
-
99
- def _safe_float(value: Any, default: float = 0.0) -> float:
100
- try:
101
- if value is None:
102
- return default
103
- if isinstance(value, bool):
104
- return float(int(value))
105
- if isinstance(value, (int, float)):
106
- return float(value)
107
- text = _safe_text(value).replace(",", "")
108
- return float(text) if text else default
109
- except Exception:
110
- return default
111
-
112
-
113
- def _clamp(value: float, low: float, high: float) -> float:
114
- return max(low, min(high, value))
115
-
116
-
117
- def load_json_schema(path: Path) -> Dict[str, Any]:
118
- try:
119
- return json.loads(path.read_text(encoding="utf-8"))
120
- except Exception as error:
121
- raise AnalysisResourceError(code="schema_load_failed", path=path, detail=f"{type(error).__name__}:{error}") from error
122
-
123
-
124
- def prompt_contract_text() -> str:
125
- try:
126
- return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
127
- except Exception as error:
128
- raise AnalysisResourceError(code="contract_load_failed", path=PROMPT_CONTRACT_PATH, detail=f"{type(error).__name__}:{error}") from error
129
-
130
-
131
- def _load_schema(path: Path) -> Dict[str, Any]:
132
- cached = SCHEMA_CACHE.get(path)
133
- if cached is not None:
134
- return cached
135
- schema = load_json_schema(path)
136
- SCHEMA_CACHE[path] = schema
137
- return schema
138
-
139
-
140
- def _schema_errors(payload: Any, path: Path) -> List[Dict[str, str]]:
141
- schema = _load_schema(path)
142
- if not schema:
143
- raise AnalysisResourceError(code="schema_empty", path=path)
144
- try:
145
- validator = jsonschema.Draft202012Validator(schema)
146
- rows: List[Dict[str, str]] = []
147
- for error in sorted(validator.iter_errors(payload), key=lambda item: list(item.absolute_path)):
148
- field = ".".join(str(part) for part in error.absolute_path) or "$"
149
- rows.append({"field": field, "reason": f"schema:{error.message}"})
150
- return rows
151
- except Exception as error:
152
- return [{"field": "$", "reason": f"schema_runtime:{type(error).__name__}:{error}"}]
153
-
154
-
155
- def _dedupe_keep_order(values: Sequence[str]) -> List[str]:
156
- result: List[str] = []
157
- seen = set()
158
- for value in values:
159
- clean = _safe_text(value)
160
- if not clean or clean in seen:
161
- continue
162
- seen.add(clean)
163
- result.append(clean)
164
- return result
165
-
166
-
167
- def _safe_text_list(value: Any) -> List[str]:
168
- if not isinstance(value, list):
169
- return []
170
- result: List[str] = []
171
- for item in value:
172
- if isinstance(item, dict):
173
- for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
174
- text = _safe_text(item.get(key))
175
- if text:
176
- result.append(text)
177
- break
178
- continue
179
- text = _safe_text(item)
180
- if text:
181
- result.append(text)
182
- return _dedupe_keep_order(result)
183
-
184
-
185
- def _dedupe_error_list(errors: Sequence[Dict[str, str]]) -> List[Dict[str, str]]:
186
- result: List[Dict[str, str]] = []
187
- seen = set()
188
- for item in errors:
189
- field = _safe_text(item.get("field"))
190
- reason = _safe_text(item.get("reason"))
191
- key = (field, reason)
192
- if key in seen:
193
- continue
194
- seen.add(key)
195
- result.append({"field": field, "reason": reason})
196
- return result
197
-
198
-
199
- def _parse_datetime(value: Any) -> Optional[datetime]:
200
- if value is None:
201
- return None
202
- if isinstance(value, datetime):
203
- return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
204
- num = _safe_int(value, default=0)
205
- if num > 0:
206
- if num > 1_000_000_000_000:
207
- num //= 1000
208
- try:
209
- return datetime.fromtimestamp(num, tz=timezone.utc)
210
- except Exception:
211
- return None
212
- text = _safe_text(value)
213
- if not text:
214
- return None
215
- for candidate in (text, text.replace("Z", "+00:00")):
216
- try:
217
- parsed = datetime.fromisoformat(candidate)
218
- return parsed if parsed.tzinfo else parsed.replace(tzinfo=timezone.utc)
219
- except Exception:
220
- continue
221
- for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d %H:%M:%S", "%Y/%m/%d"):
222
- try:
223
- return datetime.strptime(text, fmt).replace(tzinfo=timezone.utc)
224
- except Exception:
225
- continue
226
- return None
227
-
228
-
229
- def _publish_days_ago(value: Any) -> Optional[int]:
230
- parsed = _parse_datetime(value)
231
- if parsed is None:
232
- return None
233
- return max(int((datetime.now(timezone.utc) - parsed).total_seconds() // 86400), 0)
234
-
235
-
236
- def _first_sentence(text: str) -> str:
237
- if not text:
238
- return ""
239
- units = [part.strip() for part in re.split(r"[。!?!?\n]+", text) if part.strip()]
240
- return units[0] if units else text[:80]
241
-
242
-
243
- def _unique_tokens(text: str) -> List[str]:
244
- if not text:
245
- return []
246
- lowered = text.lower()
247
- tokens: List[str] = []
248
- for token in re.findall(r"[a-z0-9_]{3,24}", lowered):
249
- if token not in STOPWORDS:
250
- tokens.append(token)
251
- for block in re.findall(r"[\u4e00-\u9fff]{2,8}", text):
252
- if block not in STOPWORDS:
253
- tokens.append(block)
254
- return _dedupe_keep_order(tokens)
255
-
256
-
257
- def _top_counter(counter: Counter, *, limit: int = 10) -> List[Dict[str, Any]]:
258
- total = sum(counter.values())
259
- rows: List[Dict[str, Any]] = []
260
- for key, count in counter.most_common(limit):
261
- rows.append({"value": key, "count": int(count), "ratio": round((count / total), 4) if total else 0.0})
262
- return rows
263
-
264
-
265
- def _distribution_from_values(values: Sequence[str], *, limit: int = 10) -> Dict[str, Any]:
266
- counter = Counter(_safe_text(value) for value in values if _safe_text(value))
267
- return {"total": int(sum(counter.values())), "items": _top_counter(counter, limit=limit)}
268
-
269
-
270
- def _merged_text(work: Dict[str, Any]) -> str:
271
- parts = [
272
- _safe_text(work.get("title")),
273
- _safe_text(work.get("caption_raw") or work.get("desc")),
274
- _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw")),
275
- ]
276
- return "\n".join(part for part in parts if part)
277
-
278
-
279
- def _performance_metrics(work: Dict[str, Any]) -> Tuple[int, int, int, int, int]:
280
- metrics = work.get("metrics") if isinstance(work.get("metrics"), dict) else {}
281
- return (
282
- _safe_int(work.get("digg_count"), _safe_int(metrics.get("like"), 0)),
283
- _safe_int(work.get("comment_count"), _safe_int(metrics.get("comment"), 0)),
284
- _safe_int(work.get("collect_count"), _safe_int(metrics.get("collect"), 0)),
285
- _safe_int(work.get("share_count"), _safe_int(metrics.get("share"), 0)),
286
- _safe_int(work.get("play_count"), _safe_int(metrics.get("play"), 0)),
287
- )
288
-
289
-
290
- def _score_signals(texts: Sequence[Tuple[str, float]], mapping: Sequence[Tuple[str, Sequence[str]]], *, default: str) -> str:
291
- scores = {label: 0.0 for label, _ in mapping}
292
- for text, weight in texts:
293
- lowered = _safe_text(text).lower()
294
- if not lowered:
295
- continue
296
- for label, tokens in mapping:
297
- hit_count = sum(1 for token in tokens if token and token.lower() in lowered)
298
- if hit_count:
299
- scores[label] += weight * hit_count
300
- ranked = sorted(scores.items(), key=lambda item: (-item[1], item[0]))
301
- return ranked[0][0] if ranked and ranked[0][1] > 0 else default
302
-
303
-
304
- def _pick_hook_type(text: str, existing: Any = None) -> str:
305
- clean = _safe_text(existing)
306
- if clean in HOOK_TYPES_ENUM:
307
- return clean
308
- first = _first_sentence(text)
309
- return _score_signals(
310
- [(first, 2.0), (text[:180], 1.0)],
311
- [
312
- ("result_hook", ["结果", "涨粉", "成交", "翻倍", "跑通", "案例结果"]),
313
- ("curiosity_hook", ["为什么", "怎么", "?", "?", "真相", "你知道吗"]),
314
- ("shortcut_hook", ["一键", "直接", "立刻", "马上", "不用", "三步", "复制"]),
315
- ("pain_point_hook", ["不会", "卡住", "焦虑", "没流量", "做不出来", "误区"]),
316
- ("comparison_hook", ["对比", "vs", "还是", "比", "A还是B"]),
317
- ("proof_hook", ["案例", "证明", "数据", "实测", "截图", "后台"]),
318
- ],
319
- default="curiosity_hook" if first else "result_hook",
320
- )
321
-
322
-
323
- def _pick_structure_type(text: str, existing: Any = None) -> str:
324
- clean = _safe_text(existing)
325
- if clean in STRUCTURE_TYPES:
326
- return clean
327
- return _score_signals(
328
- [(text, 1.0)],
329
- [
330
- ("benchmark_then_clone", ["对标", "拆解", "复刻", "照着做", "临摹"]),
331
- ("problem_solution_cta", ["问题", "解决", "评论", "私信", "领取", "回复"]),
332
- ("proof_then_pitch", ["案例", "证明", "结果", "报名", "咨询", "私信"]),
333
- ("hook_demo_result", ["演示", "实操", "结果", "前后对比", "跑一遍"]),
334
- ],
335
- default="hook_demo_result",
336
- )
337
-
338
-
339
- def _pick_cta_type(text: str, existing: Any = None) -> str:
340
- clean = _safe_text(existing)
341
- if clean in CTA_TYPES:
342
- return clean
343
- tail = "\n".join([line.strip() for line in _safe_text(text).splitlines()[-3:] if line.strip()])
344
- return _score_signals(
345
- [(tail, 2.0), (text, 0.8)],
346
- [
347
- ("comment_cta", ["评论", "留言", "扣1", "回复区"]),
348
- ("private_message_cta", ["私信", "加我", "vx", "微信", "主页联系"]),
349
- ("lead_magnet_cta", ["领取", "模板", "资料", "清单", "关键词"]),
350
- ("collect_cta", ["收藏", "存下", "保存"]),
351
- ("share_cta", ["转发", "分享给", "发给"]),
352
- ("follow_cta", ["关注", "下期见"]),
353
- ],
354
- default="no_cta",
355
- )
356
-
357
-
358
- def _style_markers(text: str, existing: Any = None) -> List[str]:
359
- markers: List[str] = []
360
- if isinstance(existing, list):
361
- markers.extend([_safe_text(item) for item in existing if _safe_text(item) in STYLE_MARKERS_ENUM])
362
- lowered = _safe_text(text).lower()
363
- mapping = [
364
- ("rhetorical_question", ["为什么", "怎么", "?", "?"]),
365
- ("imperative_tone", ["一定", "必须", "直接", "马上", "立刻"]),
366
- ("strong_assertion", ["就是", "本质上", "根本", "一定要"]),
367
- ("self_mockery", ["我自己也踩过", "我之前也傻", "我也翻车", "我也被打脸"]),
368
- ("quote_like_phrase", ["‘", "’", "“", "”", "所谓", "一句话说"]),
369
- ("emotional_wording", ["焦虑", "崩溃", "爽", "绝了", "离谱", "炸裂"]),
370
- ]
371
- for label, tokens in mapping:
372
- if any(token.lower() in lowered for token in tokens):
373
- markers.append(label)
374
- return _dedupe_keep_order(markers)[:6]
375
-
376
-
377
- def _pick_content_form(work: Dict[str, Any]) -> str:
378
- existing = _safe_text(work.get("content_form"))
379
- if existing in CONTENT_FORMS:
380
- return existing
381
- work_modality = _safe_text(work.get("work_modality")).lower()
382
- raw_text = _merged_text(work)
383
- if work_modality == "text":
384
- return "slideshow"
385
- if "screen" in raw_text.lower() or "录屏" in raw_text:
386
- return "screen_recording"
387
- if "采访" in raw_text or "对谈" in raw_text:
388
- return "interview_clip"
389
- if "直播" in raw_text:
390
- return "live_clip"
391
- return "talking_head" if work_modality == "video" else "voiceover"
392
-
393
-
394
- def _normalize_work(profile: Dict[str, Any], work: Dict[str, Any]) -> Dict[str, Any]:
395
- digg, comment, collect, share, play = _performance_metrics(work)
396
- publish_time = work.get("publish_time") or work.get("create_time") or work.get("create_time_sec")
397
- publish_days_ago = _publish_days_ago(publish_time)
398
- performance_score = round(0.15 * digg + 0.20 * comment + 0.35 * collect + 0.30 * share, 4)
399
- norm_divisor = math.log((publish_days_ago or 0) + 2)
400
- performance_score_norm = round((performance_score / norm_divisor) if norm_divisor > 0 else performance_score, 4)
401
- title = _safe_text(work.get("title"))
402
- caption_raw = _safe_text(work.get("caption_raw") or work.get("desc"))
403
- primary_text = _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw") or caption_raw)
404
- primary_text_source_raw = _safe_text(work.get("primary_text_source"))
405
- primary_text_source = (
406
- primary_text_source_raw
407
- if primary_text_source_raw in {"asr_clean", "caption_raw"}
408
- else ("asr_clean" if _safe_text(work.get("work_modality")) == "video" else "caption_raw")
409
- )
410
- work_modality = _safe_text(work.get("work_modality")) or ("video" if _safe_text(work.get("video_download_url") or work.get("video_down_url") or work.get("asr_raw")) else "text")
411
- merged = "\n".join(part for part in [title, caption_raw, primary_text] if part)
412
- return {
413
- "platform_work_id": _safe_text(work.get("platform_work_id")),
414
- "title": title,
415
- "caption_raw": caption_raw,
416
- "work_modality": work_modality,
417
- "primary_text": primary_text,
418
- "primary_text_source": primary_text_source,
419
- "published_date": _safe_text(work.get("published_date")) or "",
420
- "publish_time": publish_time,
421
- "publish_days_ago": publish_days_ago,
422
- "duration_ms": _safe_int(work.get("duration_ms"), 0),
423
- "digg_count": digg,
424
- "comment_count": comment,
425
- "collect_count": collect,
426
- "share_count": share,
427
- "play_count": play,
428
- "content_form": _pick_content_form(work),
429
- "tags": _safe_text_list(work.get("tags")),
430
- "author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
431
- "author_name": _safe_text(profile.get("nickname")) or "作者",
432
- "performance_score": performance_score,
433
- "performance_score_norm": performance_score_norm,
434
- "bucket": "",
435
- "hook_type": _pick_hook_type(merged, work.get("hook_type") or work.get("hook")),
436
- "structure_type": _pick_structure_type(merged, work.get("structure_type") or work.get("content_structure")),
437
- "cta_type": _pick_cta_type(merged, work.get("cta_type") or work.get("cta")),
438
- "style_markers": _style_markers(merged, work.get("style_markers") or work.get("style_tags")),
439
- "analysis_eligibility": _safe_text(work.get("analysis_eligibility")) or "eligible",
440
- "analysis_exclusion_reason": _safe_text(work.get("analysis_exclusion_reason")),
441
- "analysis_artifact_status": work.get("analysis_artifact_status"),
442
- "recent_30d_score_rank": None,
443
- }
444
-
445
-
446
- def _assign_recent_30d_ranks(items: List[Dict[str, Any]]) -> None:
447
- recent = [item for item in items if _safe_int(item.get("publish_days_ago"), 999999) <= 30]
448
- ranked = sorted(recent, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
449
- for idx, item in enumerate(ranked):
450
- item["recent_30d_score_rank"] = idx + 1
451
- recent_ids = {_safe_text(item.get("platform_work_id")) for item in ranked}
452
- for item in items:
453
- if _safe_text(item.get("platform_work_id")) not in recent_ids:
454
- item["recent_30d_score_rank"] = None
455
-
456
-
457
- def _assign_buckets(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
458
- total = len(items)
459
- ranked = sorted(items, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
460
- for idx, item in enumerate(ranked):
461
- percentile = (idx + 1) / total if total else 1.0
462
- if total < 20:
463
- bucket = "Top" if idx < 4 else ("Strong" if idx < 8 else ("Mid" if idx < max(10, total - 2) else "Bottom"))
464
- elif percentile <= 0.20:
465
- bucket = "Top"
466
- elif percentile <= 0.50:
467
- bucket = "Strong"
468
- elif percentile <= 0.85:
469
- bucket = "Mid"
470
- else:
471
- bucket = "Bottom"
472
- item["bucket"] = bucket
473
- item["all_time_score_rank"] = idx + 1
474
- _assign_recent_30d_ranks(ranked)
475
- return ranked
476
-
477
-
478
- def _scaled_bucket_quota(sample_size: int) -> Dict[str, int]:
479
- if sample_size <= 0:
480
- return {"Top": 0, "Strong": 0, "Mid": 0, "Bottom": 0}
481
- base = {"Top": 18, "Strong": 18, "Mid": 14, "Bottom": 10}
482
- raw = {key: sample_size * (value / 60.0) for key, value in base.items()}
483
- quota = {key: int(math.floor(value)) for key, value in raw.items()}
484
- quota["Bottom"] = max(quota.get("Bottom", 0), 1)
485
- deficit = sample_size - sum(quota.values())
486
- order = sorted(raw.items(), key=lambda item: raw[item[0]] - quota[item[0]], reverse=True)
487
- idx = 0
488
- while deficit > 0 and order:
489
- key = order[idx % len(order)][0]
490
- quota[key] += 1
491
- deficit -= 1
492
- idx += 1
493
- while sum(quota.values()) > sample_size:
494
- for key in ("Mid", "Strong", "Top", "Bottom"):
495
- min_keep = 1 if key == "Bottom" else 0
496
- if quota[key] > min_keep and sum(quota.values()) > sample_size:
497
- quota[key] -= 1
498
- if quota["Top"] + quota["Strong"] < math.ceil(sample_size * 0.55):
499
- needed = math.ceil(sample_size * 0.55) - (quota["Top"] + quota["Strong"])
500
- for _ in range(needed):
501
- if quota["Mid"] > 0:
502
- quota["Mid"] -= 1
503
- quota["Top"] += 1
504
- elif quota["Bottom"] > 1:
505
- quota["Bottom"] -= 1
506
- quota["Strong"] += 1
507
- return quota
508
-
509
-
510
- def _pick_sample_size(total: int) -> int:
511
- if total <= 0:
512
- return 0
513
- return min(max(int(round(total * 0.30)), 40), 80, total)
514
-
515
-
516
- def _value_variants(item: Dict[str, Any], field: str) -> List[str]:
517
- value = item.get(field)
518
- if isinstance(value, list):
519
- return [_safe_text(v) for v in value if _safe_text(v)]
520
- clean = _safe_text(value)
521
- return [clean] if clean else []
522
-
523
-
524
- def _pick_diverse_items(pool: List[Dict[str, Any]], *, selected_ids: set, limits: Sequence[Tuple[str, int]], cap: int) -> List[Dict[str, Any]]:
525
- picked: List[Dict[str, Any]] = []
526
- covered: Dict[str, set] = {field: set() for field, _ in limits}
527
- local_ids: set = set()
528
- for field, minimum in limits:
529
- if minimum <= 0:
530
- continue
531
- for item in pool:
532
- if len(picked) >= cap:
533
- return picked
534
- work_id = _safe_text(item.get("platform_work_id"))
535
- if not work_id or work_id in selected_ids or work_id in local_ids:
536
- continue
537
- candidates = [value for value in _value_variants(item, field) if value not in covered[field]]
538
- if not candidates:
539
- continue
540
- picked.append(item)
541
- local_ids.add(work_id)
542
- for sync_field, _ in limits:
543
- covered[sync_field].update(_value_variants(item, sync_field))
544
- if len(covered[field]) >= minimum:
545
- break
546
- return picked
547
-
548
-
549
- def _sample_standard_works(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
550
- total = len(items)
551
- if total <= 0:
552
- return []
553
- sample_size = _pick_sample_size(total)
554
- quota = _scaled_bucket_quota(sample_size)
555
- bucket_groups: Dict[str, List[Dict[str, Any]]] = {"Top": [], "Strong": [], "Mid": [], "Bottom": []}
556
- for item in items:
557
- bucket_groups.setdefault(_safe_text(item.get("bucket")) or "Mid", []).append(item)
558
- selected: List[Dict[str, Any]] = []
559
- selected_ids: set = set()
560
-
561
- top_pool = bucket_groups.get("Top", [])
562
- for item in _pick_diverse_items(top_pool, selected_ids=selected_ids, limits=(("content_form", 3), ("hook_type", 2), ("structure_type", 2)), cap=quota.get("Top", 0)):
563
- selected.append(item)
564
- selected_ids.add(_safe_text(item.get("platform_work_id")))
565
-
566
- for bucket in ("Top", "Strong", "Mid", "Bottom"):
567
- pool = bucket_groups.get(bucket, [])
568
- for item in pool:
569
- if len([row for row in selected if _safe_text(row.get("bucket")) == bucket]) >= quota.get(bucket, 0):
570
- break
571
- work_id = _safe_text(item.get("platform_work_id"))
572
- if work_id in selected_ids:
573
- continue
574
- selected.append(item)
575
- selected_ids.add(work_id)
576
- if len(selected) < sample_size:
577
- for item in items:
578
- work_id = _safe_text(item.get("platform_work_id"))
579
- if work_id in selected_ids:
580
- continue
581
- selected.append(item)
582
- selected_ids.add(work_id)
583
- if len(selected) >= sample_size:
584
- break
585
- return selected[:sample_size]
586
-
587
-
588
- def _keyword_distribution_from_texts(texts: Sequence[str], *, limit: int = 20) -> Dict[str, Any]:
589
- counter: Counter = Counter()
590
- for text in texts:
591
- counter.update(_unique_tokens(text))
592
- return {"items": _top_counter(counter, limit=limit), "counting_mode": "work_occurrence"}
593
-
594
-
595
- def _field_keyword_distribution(works: List[Dict[str, Any]], field: str, *, limit: int = 20) -> Dict[str, Any]:
596
- return _keyword_distribution_from_texts([_safe_text(work.get(field)) for work in works], limit=limit)
597
-
598
-
599
- def _publish_time_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
600
- weekday = Counter()
601
- hour = Counter()
602
- unavailable = 0
603
- for work in works:
604
- parsed = _parse_datetime(work.get("publish_time"))
605
- if parsed is None:
606
- unavailable += 1
607
- continue
608
- weekday[str(parsed.weekday())] += 1
609
- hour[str(parsed.hour)] += 1
610
- return {"weekday": _top_counter(weekday, limit=7), "hour": _top_counter(hour, limit=24), "unavailable_count": unavailable}
611
-
612
-
613
- def _duration_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
614
- durations = [_safe_int(work.get("duration_ms"), 0) for work in works if _safe_int(work.get("duration_ms"), 0) > 0]
615
- if not durations:
616
- return {"available": False, "reason": "duration_unavailable"}
617
- counter = Counter()
618
- for duration in durations:
619
- counter["short" if duration < 30000 else ("medium" if duration < 120000 else "long")] += 1
620
- return {"available": True, "items": _top_counter(counter, limit=3)}
621
-
622
-
623
- def _performance_distribution(items: List[Dict[str, Any]]) -> Dict[str, Any]:
624
- scores = [_safe_float(item.get("performance_score_norm"), 0.0) for item in items]
625
- if not scores:
626
- return {"available": False, "reason": "no_scores"}
627
- ordered = sorted(scores, reverse=True)
628
- def _pct(position: float) -> float:
629
- idx = min(max(int(math.floor((len(ordered) - 1) * position)), 0), len(ordered) - 1)
630
- return round(ordered[idx], 4)
631
- return {"available": True, "max": round(max(ordered), 4), "min": round(min(ordered), 4), "p20": _pct(0.20), "p50": _pct(0.50), "p85": _pct(0.85)}
632
-
633
-
634
- def _engagement_pattern(items: List[Dict[str, Any]]) -> Dict[str, Any]:
635
- if not items:
636
- return {"count": 0, "avg_digg_count": 0, "avg_comment_count": 0, "avg_collect_count": 0, "avg_share_count": 0, "avg_play_count": 0}
637
- total = {"digg_count": 0, "comment_count": 0, "collect_count": 0, "share_count": 0, "play_count": 0}
638
- for item in items:
639
- for key in total:
640
- total[key] += _safe_int(item.get(key), 0)
641
- count = len(items)
642
- return {
643
- "count": count,
644
- "avg_digg_count": round(total["digg_count"] / count, 2),
645
- "avg_comment_count": round(total["comment_count"] / count, 2),
646
- "avg_collect_count": round(total["collect_count"] / count, 2),
647
- "avg_share_count": round(total["share_count"] / count, 2),
648
- "avg_play_count": round(total["play_count"] / count, 2),
649
- }
650
-
651
-
652
- def _compare_bucket_groups(items: List[Dict[str, Any]]) -> Dict[str, Any]:
653
- groups = {name: [item for item in items if item.get("bucket") == name] for name in ("Top", "Mid", "Bottom")}
654
- result: Dict[str, Any] = {}
655
- for name, works in groups.items():
656
- result[name.lower()] = {
657
- "title_keywords": _field_keyword_distribution(works, "title", limit=8).get("items", []),
658
- "caption_keywords": _field_keyword_distribution(works, "caption_raw", limit=8).get("items", []),
659
- "primary_text_keywords": _field_keyword_distribution(works, "primary_text", limit=8).get("items", []),
660
- "hook_types": _distribution_from_values([_safe_text(work.get("hook_type")) for work in works], limit=6).get("items", []),
661
- "structure_types": _distribution_from_values([_safe_text(work.get("structure_type")) for work in works], limit=6).get("items", []),
662
- "cta_types": _distribution_from_values([_safe_text(work.get("cta_type")) for work in works], limit=6).get("items", []),
663
- "content_forms": _distribution_from_values([_safe_text(work.get("content_form")) for work in works], limit=6).get("items", []),
664
- "engagement_pattern": _engagement_pattern(works),
665
- }
666
- return result
667
-
668
-
669
- def prepare_author_analysis_bundle(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Dict[str, Any]:
670
- normalized = [_normalize_work(profile, work) for work in works if isinstance(work, dict)]
671
- eligible = [item for item in normalized if _safe_text(item.get("analysis_eligibility")) == "eligible"]
672
- excluded_count = len(normalized) - len(eligible)
673
- ranked = _assign_buckets(eligible)
674
- sampled = _sample_standard_works(ranked)
675
- sampled_work_ids = [_safe_text(item.get("platform_work_id")) for item in sampled if _safe_text(item.get("platform_work_id"))]
676
- aggregate_stats = {
677
- "total_works": len(ranked),
678
- "excluded_works_count": excluded_count,
679
- "sampled_works_count": len(sampled),
680
- "sampling_ratio": round((len(sampled) / len(ranked)), 4) if ranked else 0.0,
681
- "sampling_mode": "standard",
682
- "analysis_window": "mixed",
683
- "global_title_keyword_distribution": _field_keyword_distribution(ranked, "title"),
684
- "global_caption_keyword_distribution": _field_keyword_distribution(ranked, "caption_raw"),
685
- "global_primary_text_keyword_distribution": _field_keyword_distribution(ranked, "primary_text"),
686
- "global_hook_type_distribution": _distribution_from_values([_safe_text(item.get("hook_type")) for item in ranked]),
687
- "global_structure_type_distribution": _distribution_from_values([_safe_text(item.get("structure_type")) for item in ranked]),
688
- "global_cta_type_distribution": _distribution_from_values([_safe_text(item.get("cta_type")) for item in ranked]),
689
- "global_content_form_distribution": _distribution_from_values([_safe_text(item.get("content_form")) for item in ranked]),
690
- "global_work_modality_distribution": _distribution_from_values([_safe_text(item.get("work_modality")) for item in ranked]),
691
- "global_performance_distribution": _performance_distribution(ranked),
692
- "global_publish_time_distribution": _publish_time_distribution(ranked),
693
- "global_duration_distribution": _duration_distribution(ranked),
694
- "global_bucket_distribution": _distribution_from_values([_safe_text(item.get("bucket")) for item in ranked], limit=4),
695
- "global_top_vs_mid_vs_bottom_deltas": _compare_bucket_groups(ranked),
696
- }
697
- analysis_input = {
698
- "author_profile": {
699
- "platform": _safe_text(profile.get("platform")) or platform,
700
- "platform_author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
701
- "nickname": _safe_text(profile.get("nickname")),
702
- "author_handle": _safe_text(profile.get("author_handle")),
703
- "signature": _safe_text(profile.get("signature")),
704
- "fans_count": _safe_int(profile.get("fans_count"), 0),
705
- "liked_count": _safe_int(profile.get("liked_count"), 0),
706
- "collected_count": _safe_int(profile.get("collected_count"), 0),
707
- "works_count": _safe_int(profile.get("works_count"), len(ranked)),
708
- "verified": bool(profile.get("verified", False)),
709
- "ip_location": _safe_text(profile.get("ip_location")),
710
- },
711
- "sampled_works": sampled,
712
- "aggregate_stats": aggregate_stats,
713
- "platform_context": {
714
- "platform": platform,
715
- "content_kind": "author_home",
716
- "primary_work_modality": ranked[0].get("work_modality") if ranked else None,
717
- },
718
- "analysis_metadata": {
719
- "input_object_name": "author_analysis_input_v1",
720
- "prompt_contract_path": str(PROMPT_CONTRACT_PATH),
721
- "input_schema_path": str(INPUT_SCHEMA_PATH),
722
- "output_schema_path": str(OUTPUT_SCHEMA_PATH),
723
- "analysis_mode": "standard",
724
- "total_works": len(normalized),
725
- "eligible_works_count": len(ranked),
726
- "excluded_works_count": excluded_count,
727
- "sampled_works_count": len(sampled),
728
- },
729
- }
730
- return {
731
- "analysis_input": analysis_input,
732
- "normalized_works": normalized,
733
- "ranked_works": ranked,
734
- "sampled_works": sampled,
735
- "sampled_work_ids": sampled_work_ids,
736
- "excluded_works_count": excluded_count,
737
- }
738
-
739
-
740
- def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Tuple[Dict[str, Any], List[Dict[str, str]]]:
741
- payload = prepare_author_analysis_bundle(profile=profile, works=works, platform=platform).get("analysis_input") or {}
742
- return payload, validate_author_analysis_input_v1(payload)
743
-
744
-
745
- def _require_non_empty_string(errors: List[Dict[str, str]], field: str, value: Any) -> None:
746
- if not _safe_text(value):
747
- errors.append({"field": field, "reason": "empty_string"})
748
-
749
-
750
- def _require_dict(errors: List[Dict[str, str]], field: str, value: Any) -> Dict[str, Any]:
751
- if not isinstance(value, dict):
752
- errors.append({"field": field, "reason": "type_error:dict"})
753
- return {}
754
- return value
755
-
756
-
757
- def _require_list(errors: List[Dict[str, str]], field: str, value: Any) -> List[Any]:
758
- if not isinstance(value, list):
759
- errors.append({"field": field, "reason": "type_error:list"})
760
- return []
761
- return value
762
-
763
-
764
- def _validate_distribution_object(errors: List[Dict[str, str]], field: str, value: Any) -> None:
765
- block = _require_dict(errors, field, value)
766
- if not block:
767
- return
768
- items = _require_list(errors, f"{field}.items", block.get("items"))
769
- for index, item in enumerate(items[:20]):
770
- row = _require_dict(errors, f"{field}.items.{index}", item)
771
- if row:
772
- _require_non_empty_string(errors, f"{field}.items.{index}.value", row.get("value"))
773
- if "count" not in row:
774
- errors.append({"field": f"{field}.items.{index}.count", "reason": "missing"})
775
-
776
-
777
- def validate_author_analysis_input_v1(payload: Dict[str, Any]) -> List[Dict[str, str]]:
778
- errors: List[Dict[str, str]] = []
779
- errors.extend(_schema_errors(payload, INPUT_SCHEMA_PATH))
780
- author_profile = _require_dict(errors, "author_profile", payload.get("author_profile"))
781
- aggregate = _require_dict(errors, "aggregate_stats", payload.get("aggregate_stats"))
782
- platform_context = _require_dict(errors, "platform_context", payload.get("platform_context"))
783
- analysis_metadata = _require_dict(errors, "analysis_metadata", payload.get("analysis_metadata"))
784
- sampled_works = _require_list(errors, "sampled_works", payload.get("sampled_works"))
785
-
786
- _require_non_empty_string(errors, "author_profile.platform", author_profile.get("platform"))
787
- _require_non_empty_string(errors, "author_profile.platform_author_id", author_profile.get("platform_author_id"))
788
- _require_non_empty_string(errors, "author_profile.nickname", author_profile.get("nickname"))
789
- _require_non_empty_string(errors, "platform_context.platform", platform_context.get("platform"))
790
- _require_non_empty_string(errors, "platform_context.content_kind", platform_context.get("content_kind"))
791
- _require_non_empty_string(errors, "analysis_metadata.input_object_name", analysis_metadata.get("input_object_name"))
792
- _require_non_empty_string(errors, "analysis_metadata.analysis_mode", analysis_metadata.get("analysis_mode"))
793
-
794
- for key in [
795
- "global_title_keyword_distribution",
796
- "global_caption_keyword_distribution",
797
- "global_primary_text_keyword_distribution",
798
- "global_hook_type_distribution",
799
- "global_structure_type_distribution",
800
- "global_cta_type_distribution",
801
- "global_content_form_distribution",
802
- "global_work_modality_distribution",
803
- "global_performance_distribution",
804
- "global_publish_time_distribution",
805
- "global_bucket_distribution",
806
- "global_top_vs_mid_vs_bottom_deltas",
807
- ]:
808
- if key not in aggregate:
809
- errors.append({"field": f"aggregate_stats.{key}", "reason": "missing"})
810
-
811
- for field in [
812
- "aggregate_stats.global_title_keyword_distribution",
813
- "aggregate_stats.global_caption_keyword_distribution",
814
- "aggregate_stats.global_primary_text_keyword_distribution",
815
- "aggregate_stats.global_hook_type_distribution",
816
- "aggregate_stats.global_structure_type_distribution",
817
- "aggregate_stats.global_cta_type_distribution",
818
- "aggregate_stats.global_content_form_distribution",
819
- "aggregate_stats.global_work_modality_distribution",
820
- "aggregate_stats.global_bucket_distribution",
821
- ]:
822
- _validate_distribution_object(errors, field, payload.get(field.split(".")[0], {}).get(field.split(".")[1]) if field.startswith("aggregate_stats") else None)
823
-
824
- total_works = _safe_int(aggregate.get("total_works"), 0)
825
- if total_works > 0 and not sampled_works:
826
- errors.append({"field": "sampled_works", "reason": "empty_collection"})
827
- if sampled_works and _safe_int(aggregate.get("sampled_works_count"), -1) != len(sampled_works):
828
- errors.append({"field": "aggregate_stats.sampled_works_count", "reason": "count_mismatch"})
829
-
830
- for index, item in enumerate(sampled_works[:120]):
831
- row = _require_dict(errors, f"sampled_works.{index}", item)
832
- if not row:
833
- continue
834
- _require_non_empty_string(errors, f"sampled_works.{index}.platform_work_id", row.get("platform_work_id"))
835
- if not any(_safe_text(row.get(key)) for key in ("title", "caption_raw", "primary_text")):
836
- errors.append({"field": f"sampled_works.{index}", "reason": "all_text_fields_empty"})
837
- if _safe_text(row.get("work_modality")) not in {"video", "text"}:
838
- errors.append({"field": f"sampled_works.{index}.work_modality", "reason": "enum_required"})
839
- if _safe_text(row.get("primary_text_source")) not in {"asr_clean", "caption_raw"}:
840
- errors.append({"field": f"sampled_works.{index}.primary_text_source", "reason": "enum_required"})
841
- if _safe_text(row.get("hook_type")) not in HOOK_TYPES_ENUM:
842
- errors.append({"field": f"sampled_works.{index}.hook_type", "reason": "enum_required"})
843
- if _safe_text(row.get("structure_type")) not in STRUCTURE_TYPES:
844
- errors.append({"field": f"sampled_works.{index}.structure_type", "reason": "enum_required"})
845
- if _safe_text(row.get("cta_type")) not in CTA_TYPES:
846
- errors.append({"field": f"sampled_works.{index}.cta_type", "reason": "enum_required"})
847
- if _safe_text(row.get("content_form")) not in CONTENT_FORMS:
848
- errors.append({"field": f"sampled_works.{index}.content_form", "reason": "enum_required"})
849
- markers = _require_list(errors, f"sampled_works.{index}.style_markers", row.get("style_markers"))
850
- for marker in markers:
851
- if _safe_text(marker) not in STYLE_MARKERS_ENUM:
852
- errors.append({"field": f"sampled_works.{index}.style_markers", "reason": f"enum_required:{marker}"})
853
- return _dedupe_error_list(errors)
854
-
855
-
856
- def _fallback_sample_confidence(sample_size: int) -> str:
857
- return "low" if sample_size < 5 else ("mid" if sample_size < 15 else "high")
858
-
859
-
860
- def build_fallback_author_analysis_v2(payload: Dict[str, Any]) -> Dict[str, Any]:
861
- author = payload.get("author_profile") if isinstance(payload.get("author_profile"), dict) else {}
862
- aggregate = payload.get("aggregate_stats") if isinstance(payload.get("aggregate_stats"), dict) else {}
863
- sampled = payload.get("sampled_works") if isinstance(payload.get("sampled_works"), list) else []
864
- nickname = _safe_text(author.get("nickname")) or "该作者"
865
- top_keywords = [item.get("value") for item in ((aggregate.get("global_primary_text_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:5]
866
- theme_items = [item.get("value") for item in ((aggregate.get("global_caption_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:6]
867
- hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
868
- structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
869
- cta_items = [item.get("value") for item in ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
870
- sample_size = len(sampled)
871
- confidence = _fallback_sample_confidence(sample_size)
872
- representative = [{"platform_work_id": item.get("platform_work_id"), "title": item.get("title") or item.get("caption_raw") or item.get("primary_text"), "bucket": item.get("bucket")} for item in sampled[:5] if isinstance(item, dict)]
873
- dominant_themes = theme_items[:3]
874
- theme_clusters = []
875
- if dominant_themes:
876
- theme_clusters = [{"name": theme, "keywords": [theme]} for theme in dominant_themes]
877
- return {
878
- "author_positioning": {
879
- "one_liner": f"{nickname} 是一个围绕{dominant_themes[0] if dominant_themes else '内容增长'}持续输出的创作者,主要用高密度结构化表达帮助目标受众缩短试错路径。",
880
- "author_type": "creator_education",
881
- "primary_role": "teacher",
882
- "secondary_roles": ["operator"],
883
- "target_audience": "想提升内容与增长效率的创作者/操盘手",
884
- "core_problem_solved": "把复杂方法压缩成可快速模仿的内容动作",
885
- "core_value_proposition": "用短平快的机制化表达降低学习和执行门槛",
886
- "evidence": [f"top_keywords={top_keywords}", f"dominant_themes={dominant_themes}"],
887
- },
888
- "trust_model": {
889
- "primary_trust_source": "systematized_method",
890
- "secondary_trust_sources": ["demonstration", "consistency"],
891
- "trust_building_mechanisms": ["重复输出同类方法", "把观点包装成步骤/模板", "用案例或结果句强化可信度"],
892
- "trust_risks": ["样本主要来自单平台表达,真实性与转化深度证据有限"],
893
- "relationship_posture": {"distance": "mid", "authority_level": "mid", "affinity_level": "mid"},
894
- "evidence": [f"sample_size={sample_size}", f"structures={structure_items}"],
895
- },
896
- "cognitive_engine": {
897
- "worldview": "优先追求可执行、可复用、可放大的内容动作。",
898
- "value_priority": ["效率", "结果感", "方法压缩"],
899
- "problem_definition_style": "execution_problem",
900
- "reasoning_modes": ["workflow_packaging", "case_induction"],
901
- "knowledge_sources": ["作品标题/文案/字幕中的步骤化表达"],
902
- "judgment_style": {"certainty_level": "mid", "notes": "基于单平台主页样本初判"},
903
- "core_cognitive_actions": ["workflow_packaging", "result_compression"],
904
- "evidence": [f"keywords={top_keywords}", f"sample_size={sample_size}"],
905
- },
906
- "expression_hooks": {
907
- "language_style": {"oral_level": "mid", "assertiveness": "high", "emotional_intensity": "mid", "professional_density": "mid"},
908
- "hook_keywords": top_keywords[:5],
909
- "hook_types": [item for item in hook_items if item in HOOK_TYPES_ENUM] or ["result_hook"],
910
- "argument_patterns": ["先给结果/判断,再给步骤或解释", "用反差或对比压缩注意力获取"],
911
- "emotion_patterns": ["sharp_direct"],
912
- "memorability_sources": ["strong_keywords", "repeatable_phrases"],
913
- "evidence": [f"hook_types={hook_items}", f"keywords={top_keywords}"],
914
- },
915
- "content_mechanism": {
916
- "topic_sources": ["audience_questions", "workflow_demos"],
917
- "topic_goals": ["acquire_attention", "build_trust", "show_capability"],
918
- "winning_content_structures": [item for item in structure_items if item in WINNING_CONTENT_STRUCTURES] or ["hook_demo_result"],
919
- "repeatable_series": ["高频重复的母题/模板仍在持续输出"],
920
- "traffic_drivers": ["result_promise", "shortcut_imagination"],
921
- "content_flywheel": "用结果型标题拉点击,用结构化拆解留住注意力,再用 CTA 做后续动作承接。",
922
- "cross_platform_variation": "当前不可判断:缺少跨平台样本。",
923
- "dominant_themes": dominant_themes,
924
- "theme_clusters": theme_clusters,
925
- "evidence": [f"themes={dominant_themes}", f"structures={structure_items}", f"cta_types={cta_items}"],
926
- },
927
- "commercial_bridge": {
928
- "content_role_in_funnel": ["acquire_attention", "build_trust", "qualify"],
929
- "likely_products": ["course", "consulting"] if any(item in {"private_message_cta", "lead_magnet_cta"} for item in cta_items) else [],
930
- "conversion_path": "通过内容建立方法可信度,再用评论/私信/领取资料承接线索。",
931
- "content_product_fit": "mid",
932
- "business_model_signals": ["content_led_growth", "high_ip_dependency"],
933
- "commercial_risks": ["明确成交链路和产品证据不足,不能强断言单一模式。"],
934
- "evidence": [f"cta_types={cta_items}"],
935
- },
936
- "core_tensions": {
937
- "tensions": [
938
- {"label": "效率 vs 深度", "surface": "表达短促、结果先行", "deep_reason": "优先追求短视频环境下的注意力效率", "strategic_implication": "适合拉新,但要防止过度压缩导致信任深度不足"},
939
- {"label": "模板复用 vs 差异化", "surface": "高频复用相似母题", "deep_reason": "成熟模板能稳定产出", "strategic_implication": "需要持续刷新案例与视角,避免模板疲劳"},
940
- ],
941
- "most_important_tension": "高效率表达很强,但商业深度证据未必同步充足。",
942
- "evidence": [f"sample_size={sample_size}", f"themes={dominant_themes}"],
943
- },
944
- "evidence_pack": {
945
- "sample_size": sample_size,
946
- "sample_confidence": confidence,
947
- "representative_works": representative,
948
- "top_keywords": top_keywords,
949
- "observed_hooks": hook_items,
950
- "observed_ctas": cta_items,
951
- "observed_structures": structure_items,
952
- "notes": ["fallback_generated", "基于标准化输入的最小保底分析"],
953
- },
954
- "clone_guidance": {
955
- "copyable_elements": ["结果先行的标题结构", "步骤化表达", "稳定重复的母模板"],
956
- "non_copyable_elements": ["作者长期经验背书", "既有受众信任资产"],
957
- "borrowable_flavor": ["短句高密度", "判断明确", "先给结果再给解释"],
958
- "danger_zones": ["不要照抄口头禅和具体句子", "证据不足时别硬抄商业承诺"],
959
- "if_only_learn_one_thing": "学他把复杂方法压缩成高可执行内容动作的能力,而不是抄表面语气。",
960
- },
961
- }
962
-
963
-
964
- def _enum_error(errors: List[Dict[str, str]], field: str, value: Any, allowed: set) -> None:
965
- if value in (None, "", []):
966
- return
967
- if isinstance(value, list):
968
- for item in value:
969
- if item not in allowed:
970
- errors.append({"field": field, "reason": f"enum_preferred:{item}"})
971
- elif value not in allowed:
972
- errors.append({"field": field, "reason": f"enum_preferred:{value}"})
973
-
974
-
975
- def validate_author_analysis_v2(payload: Dict[str, Any], *, analysis_input: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
976
- errors: List[Dict[str, str]] = []
977
- errors.extend(_schema_errors(payload, OUTPUT_SCHEMA_PATH))
978
- for module, fields in REQUIRED_V2_FIELDS.items():
979
- block = payload.get(module)
980
- if not isinstance(block, dict):
981
- errors.append({"field": module, "reason": "missing_or_type_error:dict"})
982
- continue
983
- for field in fields:
984
- if field not in block:
985
- errors.append({"field": f"{module}.{field}", "reason": "missing"})
986
-
987
- author_positioning = _require_dict(errors, "author_positioning", payload.get("author_positioning"))
988
- trust_model = _require_dict(errors, "trust_model", payload.get("trust_model"))
989
- cognitive_engine = _require_dict(errors, "cognitive_engine", payload.get("cognitive_engine"))
990
- expression_hooks = _require_dict(errors, "expression_hooks", payload.get("expression_hooks"))
991
- content_mechanism = _require_dict(errors, "content_mechanism", payload.get("content_mechanism"))
992
- commercial_bridge = _require_dict(errors, "commercial_bridge", payload.get("commercial_bridge"))
993
- core_tensions = _require_dict(errors, "core_tensions", payload.get("core_tensions"))
994
- evidence_pack = _require_dict(errors, "evidence_pack", payload.get("evidence_pack"))
995
- clone_guidance = _require_dict(errors, "clone_guidance", payload.get("clone_guidance"))
996
-
997
- for field, value in [
998
- ("author_positioning.one_liner", author_positioning.get("one_liner")),
999
- ("author_positioning.target_audience", author_positioning.get("target_audience")),
1000
- ("author_positioning.core_problem_solved", author_positioning.get("core_problem_solved")),
1001
- ("author_positioning.core_value_proposition", author_positioning.get("core_value_proposition")),
1002
- ("commercial_bridge.conversion_path", commercial_bridge.get("conversion_path")),
1003
- ("content_mechanism.content_flywheel", content_mechanism.get("content_flywheel")),
1004
- ("content_mechanism.cross_platform_variation", content_mechanism.get("cross_platform_variation")),
1005
- ("core_tensions.most_important_tension", core_tensions.get("most_important_tension")),
1006
- ("clone_guidance.if_only_learn_one_thing", clone_guidance.get("if_only_learn_one_thing")),
1007
- ]:
1008
- _require_non_empty_string(errors, field, value)
1009
-
1010
- posture = _require_dict(errors, "trust_model.relationship_posture", trust_model.get("relationship_posture"))
1011
- lang_style = _require_dict(errors, "expression_hooks.language_style", expression_hooks.get("language_style"))
1012
- judgment_style = _require_dict(errors, "cognitive_engine.judgment_style", cognitive_engine.get("judgment_style"))
1013
-
1014
- for list_field, value in [
1015
- ("trust_model.secondary_trust_sources", trust_model.get("secondary_trust_sources")),
1016
- ("trust_model.trust_building_mechanisms", trust_model.get("trust_building_mechanisms")),
1017
- ("trust_model.trust_risks", trust_model.get("trust_risks")),
1018
- ("cognitive_engine.value_priority", cognitive_engine.get("value_priority")),
1019
- ("cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes")),
1020
- ("cognitive_engine.knowledge_sources", cognitive_engine.get("knowledge_sources")),
1021
- ("cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions")),
1022
- ("expression_hooks.hook_keywords", expression_hooks.get("hook_keywords")),
1023
- ("expression_hooks.hook_types", expression_hooks.get("hook_types")),
1024
- ("expression_hooks.argument_patterns", expression_hooks.get("argument_patterns")),
1025
- ("expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns")),
1026
- ("expression_hooks.memorability_sources", expression_hooks.get("memorability_sources")),
1027
- ("content_mechanism.topic_sources", content_mechanism.get("topic_sources")),
1028
- ("content_mechanism.topic_goals", content_mechanism.get("topic_goals")),
1029
- ("content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures")),
1030
- ("content_mechanism.repeatable_series", content_mechanism.get("repeatable_series")),
1031
- ("content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers")),
1032
- ("content_mechanism.dominant_themes", content_mechanism.get("dominant_themes")),
1033
- ("content_mechanism.theme_clusters", content_mechanism.get("theme_clusters")),
1034
- ("commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel")),
1035
- ("commercial_bridge.likely_products", commercial_bridge.get("likely_products")),
1036
- ("commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals")),
1037
- ("commercial_bridge.commercial_risks", commercial_bridge.get("commercial_risks")),
1038
- ("evidence_pack.representative_works", evidence_pack.get("representative_works")),
1039
- ("evidence_pack.top_keywords", evidence_pack.get("top_keywords")),
1040
- ("evidence_pack.observed_hooks", evidence_pack.get("observed_hooks")),
1041
- ("evidence_pack.observed_ctas", evidence_pack.get("observed_ctas")),
1042
- ("evidence_pack.observed_structures", evidence_pack.get("observed_structures")),
1043
- ("evidence_pack.notes", evidence_pack.get("notes")),
1044
- ("clone_guidance.copyable_elements", clone_guidance.get("copyable_elements")),
1045
- ("clone_guidance.non_copyable_elements", clone_guidance.get("non_copyable_elements")),
1046
- ("clone_guidance.borrowable_flavor", clone_guidance.get("borrowable_flavor")),
1047
- ("clone_guidance.danger_zones", clone_guidance.get("danger_zones")),
1048
- ]:
1049
- _require_list(errors, list_field, value)
1050
-
1051
- _enum_error(errors, "author_positioning.author_type", author_positioning.get("author_type"), AUTHOR_TYPES)
1052
- _enum_error(errors, "author_positioning.primary_role", author_positioning.get("primary_role"), PRIMARY_ROLES)
1053
- _enum_error(errors, "trust_model.primary_trust_source", trust_model.get("primary_trust_source"), TRUST_SOURCES)
1054
- _enum_error(errors, "cognitive_engine.problem_definition_style", cognitive_engine.get("problem_definition_style"), PROBLEM_DEFINITION_STYLES)
1055
- _enum_error(errors, "cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes"), REASONING_MODES)
1056
- _enum_error(errors, "cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions"), CORE_COGNITIVE_ACTIONS)
1057
- _enum_error(errors, "expression_hooks.hook_types", expression_hooks.get("hook_types"), HOOK_TYPES_ENUM)
1058
- _enum_error(errors, "expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns"), EMOTION_PATTERNS)
1059
- _enum_error(errors, "expression_hooks.memorability_sources", expression_hooks.get("memorability_sources"), MEMORABILITY_SOURCES)
1060
- _enum_error(errors, "content_mechanism.topic_sources", content_mechanism.get("topic_sources"), TOPIC_SOURCES)
1061
- _enum_error(errors, "content_mechanism.topic_goals", content_mechanism.get("topic_goals"), TOPIC_GOALS)
1062
- _enum_error(errors, "content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures"), WINNING_CONTENT_STRUCTURES)
1063
- _enum_error(errors, "content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers"), TRAFFIC_DRIVERS)
1064
- _enum_error(errors, "commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel"), FUNNEL_ROLES)
1065
- _enum_error(errors, "commercial_bridge.likely_products", commercial_bridge.get("likely_products"), LIKELY_PRODUCTS)
1066
- _enum_error(errors, "commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals"), BUSINESS_MODEL_SIGNALS)
1067
- _enum_error(errors, "trust_model.relationship_posture.distance", posture.get("distance"), RELATIONSHIP_DISTANCE)
1068
- _enum_error(errors, "trust_model.relationship_posture.authority_level", posture.get("authority_level"), LOW_HIGH_MID)
1069
- _enum_error(errors, "trust_model.relationship_posture.affinity_level", posture.get("affinity_level"), LOW_HIGH_MID)
1070
- _enum_error(errors, "cognitive_engine.judgment_style.certainty_level", judgment_style.get("certainty_level"), LOW_HIGH_MID)
1071
- _enum_error(errors, "expression_hooks.language_style.oral_level", lang_style.get("oral_level"), LOW_HIGH_MID)
1072
- _enum_error(errors, "expression_hooks.language_style.assertiveness", lang_style.get("assertiveness"), LOW_HIGH_MID)
1073
- _enum_error(errors, "expression_hooks.language_style.emotional_intensity", lang_style.get("emotional_intensity"), LOW_HIGH_MID)
1074
- _enum_error(errors, "expression_hooks.language_style.professional_density", lang_style.get("professional_density"), LOW_HIGH_MID)
1075
- _enum_error(errors, "commercial_bridge.content_product_fit", commercial_bridge.get("content_product_fit"), LOW_HIGH_MID)
1076
- _enum_error(errors, "evidence_pack.sample_confidence", evidence_pack.get("sample_confidence"), LOW_HIGH_MID)
1077
- _enum_error(errors, "evidence_pack.observed_hooks", evidence_pack.get("observed_hooks"), HOOK_TYPES_ENUM)
1078
- _enum_error(errors, "evidence_pack.observed_ctas", evidence_pack.get("observed_ctas"), CTA_TYPES)
1079
- _enum_error(errors, "evidence_pack.observed_structures", evidence_pack.get("observed_structures"), STRUCTURE_TYPES)
1080
-
1081
- tensions = _require_list(errors, "core_tensions.tensions", core_tensions.get("tensions"))
1082
- if len(tensions) < 2:
1083
- errors.append({"field": "core_tensions.tensions", "reason": "guardrail:need_at_least_2"})
1084
- for index, tension in enumerate(tensions[:10]):
1085
- block = _require_dict(errors, f"core_tensions.tensions.{index}", tension)
1086
- for sub in ("label", "surface", "deep_reason", "strategic_implication"):
1087
- _require_non_empty_string(errors, f"core_tensions.tensions.{index}.{sub}", block.get(sub))
1088
-
1089
- representative_works = evidence_pack.get("representative_works") if isinstance(evidence_pack.get("representative_works"), list) else []
1090
- for index, work in enumerate(representative_works[:10]):
1091
- block = _require_dict(errors, f"evidence_pack.representative_works.{index}", work)
1092
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.platform_work_id", block.get("platform_work_id"))
1093
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.title", block.get("title"))
1094
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.bucket", block.get("bucket"))
1095
-
1096
- for field, value in [
1097
- ("author_positioning.evidence", author_positioning.get("evidence")),
1098
- ("trust_model.evidence", trust_model.get("evidence")),
1099
- ("cognitive_engine.evidence", cognitive_engine.get("evidence")),
1100
- ("expression_hooks.evidence", expression_hooks.get("evidence")),
1101
- ("content_mechanism.evidence", content_mechanism.get("evidence")),
1102
- ("commercial_bridge.evidence", commercial_bridge.get("evidence")),
1103
- ("core_tensions.evidence", core_tensions.get("evidence")),
1104
- ]:
1105
- items = _require_list(errors, field, value)
1106
- if not items:
1107
- errors.append({"field": field, "reason": "empty_collection"})
1108
-
1109
- sample_size = _safe_int(evidence_pack.get("sample_size"), 0)
1110
- sample_confidence = _safe_text(evidence_pack.get("sample_confidence"))
1111
- if sample_size < 5 and sample_confidence == "high":
1112
- errors.append({"field": "evidence_pack.sample_confidence", "reason": "guardrail:sample_lt_5_cannot_be_high"})
1113
-
1114
- if analysis_input is not None:
1115
- platform_context = analysis_input.get("platform_context") if isinstance(analysis_input.get("platform_context"), dict) else {}
1116
- if len({platform_context.get("platform")} - {None, ""}) <= 1:
1117
- cross_platform_variation = _safe_text(content_mechanism.get("cross_platform_variation"))
1118
- if cross_platform_variation and "不可判断" not in cross_platform_variation and "unknown" not in cross_platform_variation.lower():
1119
- errors.append({"field": "content_mechanism.cross_platform_variation", "reason": "guardrail:single_platform_should_stay_weak"})
1120
- aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1121
- cta_items = ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance((aggregate.get("global_cta_type_distribution") or {}), dict) else []
1122
- explicit_conversion = any(isinstance(item, dict) and item.get("value") in {"private_message_cta", "lead_magnet_cta"} for item in cta_items)
1123
- likely_products = commercial_bridge.get("likely_products") if isinstance(commercial_bridge.get("likely_products"), list) else []
1124
- if likely_products and not explicit_conversion:
1125
- errors.append({"field": "commercial_bridge.likely_products", "reason": "guardrail:weak_conversion_signal"})
1126
- return _dedupe_error_list(errors)
1127
-
1128
-
1129
- def derive_legacy_summary(author_analysis_v2: Dict[str, Any], *, analysis_input: Dict[str, Any], validation_errors: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]:
1130
- positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
1131
- trust = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
1132
- content = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
1133
- bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
1134
- tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
1135
- clone = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
1136
- evidence = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
1137
- aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1138
-
1139
- sample_confidence = _safe_text(evidence.get("sample_confidence")) or _fallback_sample_confidence(_safe_int(evidence.get("sample_size"), 0))
1140
- score_base = {"low": 58, "mid": 72, "high": 84}.get(sample_confidence, 60)
1141
- if validation_errors:
1142
- score_base -= min(len(validation_errors) * 2, 12)
1143
- business_score = int(_clamp(score_base + (6 if (bridge.get("likely_products") or []) else -4), 40, 92))
1144
- benchmark_gap_score = int(_clamp(100 - business_score + 8, 35, 88))
1145
- hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1146
- structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1147
- dominant_themes = content.get("dominant_themes") if isinstance(content.get("dominant_themes"), list) else []
1148
- return {
1149
- "author_portrait": _safe_text(positioning.get("one_liner")) or "作者画像数据不足。",
1150
- "business_analysis": ";".join([
1151
- _safe_text(positioning.get("core_value_proposition")),
1152
- f"主要信任来源:{_safe_text(trust.get('primary_trust_source')) or '待确认'}",
1153
- f"商业承接:{_safe_text(bridge.get('conversion_path')) or '当前证据不足'}",
1154
- ]).strip(";"),
1155
- "benchmark_analysis": ";".join([
1156
- f"高频 hook:{', '.join(hook_items[:3]) or '待补'}",
1157
- f"常见结构:{', '.join(structure_items[:3]) or '待补'}",
1158
- f"主主题:{', '.join(dominant_themes[:3]) or '待补'}",
1159
- ]).strip(";"),
1160
- "business_score": business_score,
1161
- "benchmark_gap_score": benchmark_gap_score,
1162
- "style_radar": {"选题": 76, "表达": 78, "结构": 79, "节奏": 74, "人设": 73, "转化": 70, "差异化": 71, "稳定性": 79},
1163
- "core_contradictions": [tensions.get("most_important_tension") or "张力信息不足"],
1164
- "recommendations": [clone.get("if_only_learn_one_thing") or "优先学习其可复用的结构机制"],
1165
- }