@tikomni/skills 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (474) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -5
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +98 -2
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +106 -141
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +3 -1
  30. package/skills/{creator-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  33. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  34. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  36. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  37. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  38. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  39. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +58 -149
  40. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  41. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +30 -119
  42. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  43. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  45. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  46. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  47. package/skills/creator-analysis/SKILL.md +0 -95
  48. package/skills/creator-analysis/agents/openai.yaml +0 -4
  49. package/skills/creator-analysis/env.example +0 -36
  50. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  51. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  52. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  53. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  54. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  55. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  56. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  57. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  59. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  60. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  61. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  63. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  64. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  65. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  66. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  67. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  68. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  69. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  71. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  72. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  73. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  74. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  75. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  76. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  77. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  78. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  79. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  80. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  81. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  82. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  83. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  84. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  85. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  86. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  87. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  88. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  89. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  90. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  91. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  92. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  93. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  94. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  96. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  100. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  101. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  103. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  104. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  105. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  106. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  107. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  108. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  109. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  110. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  113. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  114. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  116. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  117. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  118. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  119. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  120. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  121. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  122. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  123. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  124. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  125. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  126. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  127. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  128. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  129. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  130. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  131. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  132. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  133. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  134. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  135. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  136. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  137. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  138. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  139. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  140. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  141. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  142. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  143. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  144. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  145. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  146. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  147. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  148. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  149. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  153. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  154. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  155. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  156. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  157. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  158. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  159. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  160. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  161. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  162. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  163. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  164. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  165. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  167. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  168. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  169. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  170. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  171. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  172. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  173. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  174. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  175. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  176. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  177. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  178. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  179. package/skills/creator-analysis/references/workflow.md +0 -23
  180. package/skills/creator-analysis/scripts/__init__.py +0 -0
  181. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  185. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  186. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  187. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  188. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  189. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  190. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  191. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  194. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  195. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  196. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  197. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  198. package/skills/creator-analysis/scripts/core/tikomni_common.py +0 -588
  199. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  200. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  202. package/skills/creator-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  203. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  205. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  207. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  208. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  209. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  210. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  211. package/skills/meta-capability/SKILL.md +0 -69
  212. package/skills/meta-capability/agents/openai.yaml +0 -4
  213. package/skills/meta-capability/env.example +0 -42
  214. package/skills/meta-capability/references/api-capability-index.md +0 -92
  215. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  216. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  217. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  218. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  219. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  220. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  222. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  223. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  224. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  226. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  227. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  228. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  229. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  230. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  231. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  232. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  233. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  234. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  235. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  236. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  237. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  238. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  239. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  240. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  241. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  242. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  243. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  244. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  245. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  246. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  247. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  248. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  249. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  250. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  251. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  252. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  253. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  254. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  255. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  256. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  257. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  258. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  259. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  263. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  264. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  265. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  266. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  267. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  268. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  269. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  270. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  271. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  272. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  273. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  274. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  276. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  277. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  279. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  280. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  281. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  282. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  283. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  284. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  285. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  286. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  287. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  288. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  289. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  290. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  291. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  292. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  293. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  294. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  295. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  296. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  297. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  298. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  299. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  300. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  301. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  302. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  303. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  304. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  305. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  306. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  307. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  308. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  309. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  310. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  311. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  312. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  316. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  317. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  318. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  319. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  320. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  321. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  322. package/skills/meta-capability/references/dispatch.md +0 -27
  323. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  324. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  325. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  326. package/skills/meta-capability/scripts/__init__.py +0 -1
  327. package/skills/meta-capability/scripts/call_route.py +0 -141
  328. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  329. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  330. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  331. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  332. package/skills/meta-capability/scripts/test_auth.py +0 -98
  333. package/skills/single-work-analysis/SKILL.md +0 -62
  334. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  335. package/skills/single-work-analysis/env.example +0 -36
  336. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  337. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  339. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  340. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  341. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  342. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  344. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  345. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  346. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  348. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  349. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  350. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  353. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  354. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  356. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  357. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  358. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  359. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  360. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  361. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  362. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  363. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  364. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  366. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  373. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  375. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  376. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  377. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  378. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  379. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  381. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  385. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  386. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  388. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  389. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  390. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  391. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  392. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  393. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  394. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  395. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  398. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  399. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  401. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  402. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  403. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  404. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  405. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  406. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  407. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  409. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  410. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  411. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  412. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  413. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  414. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  415. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  416. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  417. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  418. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  419. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  420. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  421. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  422. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  423. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  424. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  425. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  426. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  427. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  428. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  429. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  430. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  431. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  432. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  434. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  438. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  439. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  441. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  442. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  443. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  444. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -47
  445. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  446. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  447. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  448. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  449. package/skills/single-work-analysis/references/prompt-contracts/analysis-bundle.md +0 -82
  450. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  451. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  452. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  453. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  454. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +0 -384
  455. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -456
  456. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  457. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  458. package/skills/single-work-analysis/scripts/core/progress_report.py +0 -258
  459. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -220
  460. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  461. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  462. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  463. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  464. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  465. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  466. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  468. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -752
  469. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  470. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  471. /package/skills/{single-work-analysis → social-media-crawl}/scripts/core/tikomni_common.py +0 -0
  472. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  473. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  474. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1165 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Support helpers for author_home v2 standardized input / aggregate stats / validation."""
3
-
4
- from __future__ import annotations
5
-
6
- import json
7
- import math
8
- import re
9
- from collections import Counter
10
- from datetime import datetime, timezone
11
- from pathlib import Path
12
- from typing import Any, Dict, List, Optional, Sequence, Tuple
13
-
14
- import jsonschema
15
-
16
- SKILL_ROOT = Path(__file__).resolve().parents[3]
17
- INPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-input-v1.schema.json"
18
- OUTPUT_SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "author-analysis-v2.schema.json"
19
- PROMPT_CONTRACT_PATH = SKILL_ROOT / "references" / "prompt-contracts" / "author-analysis-v2.md"
20
-
21
- LOW_HIGH_MID = {"low", "mid", "high"}
22
- RELATIONSHIP_DISTANCE = {"near", "mid", "far"}
23
- AUTHOR_TYPES = {
24
- "ai_content_growth", "benchmark_deconstruction", "workflow_automation", "tool_education", "business_cognition",
25
- "ip_growth", "industry_commentary", "case_breakdown", "efficiency_hacking", "creator_education",
26
- }
27
- PRIMARY_ROLES = {"coach", "operator", "researcher", "builder", "companion", "commentator", "teacher", "practitioner", "evangelist", "critic"}
28
- TRUST_SOURCES = {"results", "experience", "case_studies", "systematized_method", "demonstration", "strong_judgment", "consistency", "authority_signal", "community_signal"}
29
- PROBLEM_DEFINITION_STYLES = {"cognition_problem", "execution_problem", "model_problem", "stage_mismatch", "positioning_problem", "traffic_problem", "conversion_problem", "offer_problem", "capability_problem"}
30
- REASONING_MODES = {"benchmark_reasoning", "concept_deconstruction", "contrast_reasoning", "case_induction", "result_backtracking", "anti_common_sense", "framework_building", "workflow_packaging", "data_validation"}
31
- HOOK_TYPES_ENUM = {"result_hook", "curiosity_hook", "shortcut_hook", "pain_point_hook", "comparison_hook", "proof_hook"}
32
- STRUCTURE_TYPES = {"hook_demo_result", "benchmark_then_clone", "problem_solution_cta", "proof_then_pitch"}
33
- CTA_TYPES = {"comment_cta", "private_message_cta", "follow_cta", "collect_cta", "share_cta", "lead_magnet_cta", "weak_cta", "no_cta"}
34
- CONTENT_FORMS = {"talking_head", "voiceover", "screen_recording", "slideshow", "mixed_edit", "live_clip", "interview_clip"}
35
- STYLE_MARKERS_ENUM = {"rhetorical_question", "imperative_tone", "strong_assertion", "self_mockery", "quote_like_phrase", "emotional_wording"}
36
- FUNNEL_ROLES = {"acquire_attention", "build_trust", "educate", "qualify", "convert", "upsell", "retain", "occupy_mindshare"}
37
- LIKELY_PRODUCTS = {"workflow_templates", "prompt_pack", "training_camp", "community", "consulting", "done_for_you_service", "course", "membership", "software_tool", "report"}
38
- BUSINESS_MODEL_SIGNALS = {"high_ip_dependency", "strong_toolization", "template_scalability", "service_extension", "low_marginal_distribution", "high_touch_delivery", "community_leverage", "content_led_growth"}
39
- CORE_COGNITIVE_ACTIONS = {"benchmark_deconstruction", "workflow_packaging", "concept_deconstruction", "contrast_reasoning", "case_induction", "framework_extraction", "result_compression", "anti_common_sense_reframing"}
40
- TOPIC_SOURCES = {"benchmark_authors", "viral_cases", "audience_questions", "workflow_demos", "industry_pain_points", "personal_experience", "tool_updates", "client_cases", "market_signals"}
41
- TOPIC_GOALS = {"acquire_attention", "build_trust", "show_capability", "drive_conversion", "occupy_mindshare", "differentiate_positioning", "nurture_audience"}
42
- WINNING_CONTENT_STRUCTURES = set(STRUCTURE_TYPES)
43
- MEMORABILITY_SOURCES = {"strong_keywords", "sharp_judgment", "identity_projection", "repeatable_phrases", "result_imagery", "emotional_contrast", "unexpected_framing"}
44
- EMOTION_PATTERNS = {"calm_assertive", "sharp_direct", "playful_mocking", "high_energy_excitement", "pragmatic_cold", "empathetic_supportive", "provocative_challenge"}
45
- TRAFFIC_DRIVERS = {"benchmark_target", "result_promise", "shortcut_imagination", "curiosity_gap", "identity_desire", "fear_of_missing_out", "proof_signal", "controversy_edge"}
46
- STOPWORDS = {
47
- "我们", "你们", "他们", "这个", "那个", "一个", "一种", "已经", "如果", "因为", "所以", "然后", "就是", "没有", "自己",
48
- "可以", "还是", "需要", "以及", "并且", "真的", "今天", "现在", "内容", "作者", "账号", "作品", "视频", "老师", "方法",
49
- "the", "and", "for", "that", "with", "from", "this", "you", "your", "are", "was", "were", "have", "has", "had", "into",
50
- }
51
- SCHEMA_CACHE: Dict[Path, Dict[str, Any]] = {}
52
-
53
-
54
- class AnalysisResourceError(RuntimeError):
55
- def __init__(self, *, code: str, path: Path, detail: str = "") -> None:
56
- self.code = code
57
- self.path = path
58
- self.detail = detail
59
- message = f"{code}:{path}"
60
- if detail:
61
- message = f"{message}:{detail}"
62
- super().__init__(message)
63
-
64
- REQUIRED_V2_FIELDS = {
65
- "author_positioning": ["one_liner", "author_type", "primary_role", "target_audience", "core_problem_solved", "core_value_proposition", "evidence"],
66
- "trust_model": ["primary_trust_source", "secondary_trust_sources", "trust_building_mechanisms", "trust_risks", "relationship_posture", "evidence"],
67
- "cognitive_engine": ["worldview", "value_priority", "problem_definition_style", "reasoning_modes", "knowledge_sources", "judgment_style", "core_cognitive_actions", "evidence"],
68
- "expression_hooks": ["language_style", "hook_keywords", "hook_types", "argument_patterns", "emotion_patterns", "memorability_sources", "evidence"],
69
- "content_mechanism": ["topic_sources", "topic_goals", "winning_content_structures", "repeatable_series", "traffic_drivers", "content_flywheel", "cross_platform_variation", "dominant_themes", "theme_clusters", "evidence"],
70
- "commercial_bridge": ["content_role_in_funnel", "likely_products", "conversion_path", "content_product_fit", "business_model_signals", "commercial_risks", "evidence"],
71
- "core_tensions": ["tensions", "most_important_tension", "evidence"],
72
- "evidence_pack": ["sample_size", "sample_confidence", "representative_works", "top_keywords", "observed_hooks", "observed_ctas", "observed_structures", "notes"],
73
- "clone_guidance": ["copyable_elements", "non_copyable_elements", "borrowable_flavor", "danger_zones", "if_only_learn_one_thing"],
74
- }
75
-
76
-
77
- def _safe_text(value: Any) -> str:
78
- if value is None:
79
- return ""
80
- if isinstance(value, str):
81
- return value.strip()
82
- return str(value).strip()
83
-
84
-
85
- def _safe_int(value: Any, default: int = 0) -> int:
86
- try:
87
- if value is None:
88
- return default
89
- if isinstance(value, bool):
90
- return int(value)
91
- if isinstance(value, (int, float)):
92
- return int(value)
93
- text = _safe_text(value).replace(",", "")
94
- return int(float(text)) if text else default
95
- except Exception:
96
- return default
97
-
98
-
99
- def _safe_float(value: Any, default: float = 0.0) -> float:
100
- try:
101
- if value is None:
102
- return default
103
- if isinstance(value, bool):
104
- return float(int(value))
105
- if isinstance(value, (int, float)):
106
- return float(value)
107
- text = _safe_text(value).replace(",", "")
108
- return float(text) if text else default
109
- except Exception:
110
- return default
111
-
112
-
113
- def _clamp(value: float, low: float, high: float) -> float:
114
- return max(low, min(high, value))
115
-
116
-
117
- def load_json_schema(path: Path) -> Dict[str, Any]:
118
- try:
119
- return json.loads(path.read_text(encoding="utf-8"))
120
- except Exception as error:
121
- raise AnalysisResourceError(code="schema_load_failed", path=path, detail=f"{type(error).__name__}:{error}") from error
122
-
123
-
124
- def prompt_contract_text() -> str:
125
- try:
126
- return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
127
- except Exception as error:
128
- raise AnalysisResourceError(code="contract_load_failed", path=PROMPT_CONTRACT_PATH, detail=f"{type(error).__name__}:{error}") from error
129
-
130
-
131
- def _load_schema(path: Path) -> Dict[str, Any]:
132
- cached = SCHEMA_CACHE.get(path)
133
- if cached is not None:
134
- return cached
135
- schema = load_json_schema(path)
136
- SCHEMA_CACHE[path] = schema
137
- return schema
138
-
139
-
140
- def _schema_errors(payload: Any, path: Path) -> List[Dict[str, str]]:
141
- schema = _load_schema(path)
142
- if not schema:
143
- raise AnalysisResourceError(code="schema_empty", path=path)
144
- try:
145
- validator = jsonschema.Draft202012Validator(schema)
146
- rows: List[Dict[str, str]] = []
147
- for error in sorted(validator.iter_errors(payload), key=lambda item: list(item.absolute_path)):
148
- field = ".".join(str(part) for part in error.absolute_path) or "$"
149
- rows.append({"field": field, "reason": f"schema:{error.message}"})
150
- return rows
151
- except Exception as error:
152
- return [{"field": "$", "reason": f"schema_runtime:{type(error).__name__}:{error}"}]
153
-
154
-
155
- def _dedupe_keep_order(values: Sequence[str]) -> List[str]:
156
- result: List[str] = []
157
- seen = set()
158
- for value in values:
159
- clean = _safe_text(value)
160
- if not clean or clean in seen:
161
- continue
162
- seen.add(clean)
163
- result.append(clean)
164
- return result
165
-
166
-
167
- def _safe_text_list(value: Any) -> List[str]:
168
- if not isinstance(value, list):
169
- return []
170
- result: List[str] = []
171
- for item in value:
172
- if isinstance(item, dict):
173
- for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
174
- text = _safe_text(item.get(key))
175
- if text:
176
- result.append(text)
177
- break
178
- continue
179
- text = _safe_text(item)
180
- if text:
181
- result.append(text)
182
- return _dedupe_keep_order(result)
183
-
184
-
185
- def _dedupe_error_list(errors: Sequence[Dict[str, str]]) -> List[Dict[str, str]]:
186
- result: List[Dict[str, str]] = []
187
- seen = set()
188
- for item in errors:
189
- field = _safe_text(item.get("field"))
190
- reason = _safe_text(item.get("reason"))
191
- key = (field, reason)
192
- if key in seen:
193
- continue
194
- seen.add(key)
195
- result.append({"field": field, "reason": reason})
196
- return result
197
-
198
-
199
- def _parse_datetime(value: Any) -> Optional[datetime]:
200
- if value is None:
201
- return None
202
- if isinstance(value, datetime):
203
- return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
204
- num = _safe_int(value, default=0)
205
- if num > 0:
206
- if num > 1_000_000_000_000:
207
- num //= 1000
208
- try:
209
- return datetime.fromtimestamp(num, tz=timezone.utc)
210
- except Exception:
211
- return None
212
- text = _safe_text(value)
213
- if not text:
214
- return None
215
- for candidate in (text, text.replace("Z", "+00:00")):
216
- try:
217
- parsed = datetime.fromisoformat(candidate)
218
- return parsed if parsed.tzinfo else parsed.replace(tzinfo=timezone.utc)
219
- except Exception:
220
- continue
221
- for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d %H:%M:%S", "%Y/%m/%d"):
222
- try:
223
- return datetime.strptime(text, fmt).replace(tzinfo=timezone.utc)
224
- except Exception:
225
- continue
226
- return None
227
-
228
-
229
- def _publish_days_ago(value: Any) -> Optional[int]:
230
- parsed = _parse_datetime(value)
231
- if parsed is None:
232
- return None
233
- return max(int((datetime.now(timezone.utc) - parsed).total_seconds() // 86400), 0)
234
-
235
-
236
- def _first_sentence(text: str) -> str:
237
- if not text:
238
- return ""
239
- units = [part.strip() for part in re.split(r"[。!?!?\n]+", text) if part.strip()]
240
- return units[0] if units else text[:80]
241
-
242
-
243
- def _unique_tokens(text: str) -> List[str]:
244
- if not text:
245
- return []
246
- lowered = text.lower()
247
- tokens: List[str] = []
248
- for token in re.findall(r"[a-z0-9_]{3,24}", lowered):
249
- if token not in STOPWORDS:
250
- tokens.append(token)
251
- for block in re.findall(r"[\u4e00-\u9fff]{2,8}", text):
252
- if block not in STOPWORDS:
253
- tokens.append(block)
254
- return _dedupe_keep_order(tokens)
255
-
256
-
257
- def _top_counter(counter: Counter, *, limit: int = 10) -> List[Dict[str, Any]]:
258
- total = sum(counter.values())
259
- rows: List[Dict[str, Any]] = []
260
- for key, count in counter.most_common(limit):
261
- rows.append({"value": key, "count": int(count), "ratio": round((count / total), 4) if total else 0.0})
262
- return rows
263
-
264
-
265
- def _distribution_from_values(values: Sequence[str], *, limit: int = 10) -> Dict[str, Any]:
266
- counter = Counter(_safe_text(value) for value in values if _safe_text(value))
267
- return {"total": int(sum(counter.values())), "items": _top_counter(counter, limit=limit)}
268
-
269
-
270
- def _merged_text(work: Dict[str, Any]) -> str:
271
- parts = [
272
- _safe_text(work.get("title")),
273
- _safe_text(work.get("caption_raw") or work.get("desc")),
274
- _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw")),
275
- ]
276
- return "\n".join(part for part in parts if part)
277
-
278
-
279
- def _performance_metrics(work: Dict[str, Any]) -> Tuple[int, int, int, int, int]:
280
- metrics = work.get("metrics") if isinstance(work.get("metrics"), dict) else {}
281
- return (
282
- _safe_int(work.get("digg_count"), _safe_int(metrics.get("like"), 0)),
283
- _safe_int(work.get("comment_count"), _safe_int(metrics.get("comment"), 0)),
284
- _safe_int(work.get("collect_count"), _safe_int(metrics.get("collect"), 0)),
285
- _safe_int(work.get("share_count"), _safe_int(metrics.get("share"), 0)),
286
- _safe_int(work.get("play_count"), _safe_int(metrics.get("play"), 0)),
287
- )
288
-
289
-
290
- def _score_signals(texts: Sequence[Tuple[str, float]], mapping: Sequence[Tuple[str, Sequence[str]]], *, default: str) -> str:
291
- scores = {label: 0.0 for label, _ in mapping}
292
- for text, weight in texts:
293
- lowered = _safe_text(text).lower()
294
- if not lowered:
295
- continue
296
- for label, tokens in mapping:
297
- hit_count = sum(1 for token in tokens if token and token.lower() in lowered)
298
- if hit_count:
299
- scores[label] += weight * hit_count
300
- ranked = sorted(scores.items(), key=lambda item: (-item[1], item[0]))
301
- return ranked[0][0] if ranked and ranked[0][1] > 0 else default
302
-
303
-
304
- def _pick_hook_type(text: str, existing: Any = None) -> str:
305
- clean = _safe_text(existing)
306
- if clean in HOOK_TYPES_ENUM:
307
- return clean
308
- first = _first_sentence(text)
309
- return _score_signals(
310
- [(first, 2.0), (text[:180], 1.0)],
311
- [
312
- ("result_hook", ["结果", "涨粉", "成交", "翻倍", "跑通", "案例结果"]),
313
- ("curiosity_hook", ["为什么", "怎么", "?", "?", "真相", "你知道吗"]),
314
- ("shortcut_hook", ["一键", "直接", "立刻", "马上", "不用", "三步", "复制"]),
315
- ("pain_point_hook", ["不会", "卡住", "焦虑", "没流量", "做不出来", "误区"]),
316
- ("comparison_hook", ["对比", "vs", "还是", "比", "A还是B"]),
317
- ("proof_hook", ["案例", "证明", "数据", "实测", "截图", "后台"]),
318
- ],
319
- default="curiosity_hook" if first else "result_hook",
320
- )
321
-
322
-
323
- def _pick_structure_type(text: str, existing: Any = None) -> str:
324
- clean = _safe_text(existing)
325
- if clean in STRUCTURE_TYPES:
326
- return clean
327
- return _score_signals(
328
- [(text, 1.0)],
329
- [
330
- ("benchmark_then_clone", ["对标", "拆解", "复刻", "照着做", "临摹"]),
331
- ("problem_solution_cta", ["问题", "解决", "评论", "私信", "领取", "回复"]),
332
- ("proof_then_pitch", ["案例", "证明", "结果", "报名", "咨询", "私信"]),
333
- ("hook_demo_result", ["演示", "实操", "结果", "前后对比", "跑一遍"]),
334
- ],
335
- default="hook_demo_result",
336
- )
337
-
338
-
339
- def _pick_cta_type(text: str, existing: Any = None) -> str:
340
- clean = _safe_text(existing)
341
- if clean in CTA_TYPES:
342
- return clean
343
- tail = "\n".join([line.strip() for line in _safe_text(text).splitlines()[-3:] if line.strip()])
344
- return _score_signals(
345
- [(tail, 2.0), (text, 0.8)],
346
- [
347
- ("comment_cta", ["评论", "留言", "扣1", "回复区"]),
348
- ("private_message_cta", ["私信", "加我", "vx", "微信", "主页联系"]),
349
- ("lead_magnet_cta", ["领取", "模板", "资料", "清单", "关键词"]),
350
- ("collect_cta", ["收藏", "存下", "保存"]),
351
- ("share_cta", ["转发", "分享给", "发给"]),
352
- ("follow_cta", ["关注", "下期见"]),
353
- ],
354
- default="no_cta",
355
- )
356
-
357
-
358
- def _style_markers(text: str, existing: Any = None) -> List[str]:
359
- markers: List[str] = []
360
- if isinstance(existing, list):
361
- markers.extend([_safe_text(item) for item in existing if _safe_text(item) in STYLE_MARKERS_ENUM])
362
- lowered = _safe_text(text).lower()
363
- mapping = [
364
- ("rhetorical_question", ["为什么", "怎么", "?", "?"]),
365
- ("imperative_tone", ["一定", "必须", "直接", "马上", "立刻"]),
366
- ("strong_assertion", ["就是", "本质上", "根本", "一定要"]),
367
- ("self_mockery", ["我自己也踩过", "我之前也傻", "我也翻车", "我也被打脸"]),
368
- ("quote_like_phrase", ["‘", "’", "“", "”", "所谓", "一句话说"]),
369
- ("emotional_wording", ["焦虑", "崩溃", "爽", "绝了", "离谱", "炸裂"]),
370
- ]
371
- for label, tokens in mapping:
372
- if any(token.lower() in lowered for token in tokens):
373
- markers.append(label)
374
- return _dedupe_keep_order(markers)[:6]
375
-
376
-
377
- def _pick_content_form(work: Dict[str, Any]) -> str:
378
- existing = _safe_text(work.get("content_form"))
379
- if existing in CONTENT_FORMS:
380
- return existing
381
- work_modality = _safe_text(work.get("work_modality")).lower()
382
- raw_text = _merged_text(work)
383
- if work_modality == "text":
384
- return "slideshow"
385
- if "screen" in raw_text.lower() or "录屏" in raw_text:
386
- return "screen_recording"
387
- if "采访" in raw_text or "对谈" in raw_text:
388
- return "interview_clip"
389
- if "直播" in raw_text:
390
- return "live_clip"
391
- return "talking_head" if work_modality == "video" else "voiceover"
392
-
393
-
394
- def _normalize_work(profile: Dict[str, Any], work: Dict[str, Any]) -> Dict[str, Any]:
395
- digg, comment, collect, share, play = _performance_metrics(work)
396
- publish_time = work.get("publish_time") or work.get("create_time") or work.get("create_time_sec")
397
- publish_days_ago = _publish_days_ago(publish_time)
398
- performance_score = round(0.15 * digg + 0.20 * comment + 0.35 * collect + 0.30 * share, 4)
399
- norm_divisor = math.log((publish_days_ago or 0) + 2)
400
- performance_score_norm = round((performance_score / norm_divisor) if norm_divisor > 0 else performance_score, 4)
401
- title = _safe_text(work.get("title"))
402
- caption_raw = _safe_text(work.get("caption_raw") or work.get("desc"))
403
- primary_text = _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw") or caption_raw)
404
- primary_text_source_raw = _safe_text(work.get("primary_text_source"))
405
- primary_text_source = (
406
- primary_text_source_raw
407
- if primary_text_source_raw in {"asr_clean", "caption_raw"}
408
- else ("asr_clean" if _safe_text(work.get("work_modality")) == "video" else "caption_raw")
409
- )
410
- work_modality = _safe_text(work.get("work_modality")) or ("video" if _safe_text(work.get("video_download_url") or work.get("video_down_url") or work.get("asr_raw")) else "text")
411
- merged = "\n".join(part for part in [title, caption_raw, primary_text] if part)
412
- return {
413
- "platform_work_id": _safe_text(work.get("platform_work_id")),
414
- "title": title,
415
- "caption_raw": caption_raw,
416
- "work_modality": work_modality,
417
- "primary_text": primary_text,
418
- "primary_text_source": primary_text_source,
419
- "published_date": _safe_text(work.get("published_date")) or "",
420
- "publish_time": publish_time,
421
- "publish_days_ago": publish_days_ago,
422
- "duration_ms": _safe_int(work.get("duration_ms"), 0),
423
- "digg_count": digg,
424
- "comment_count": comment,
425
- "collect_count": collect,
426
- "share_count": share,
427
- "play_count": play,
428
- "content_form": _pick_content_form(work),
429
- "tags": _safe_text_list(work.get("tags")),
430
- "author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
431
- "author_name": _safe_text(profile.get("nickname")) or "作者",
432
- "performance_score": performance_score,
433
- "performance_score_norm": performance_score_norm,
434
- "bucket": "",
435
- "hook_type": _pick_hook_type(merged, work.get("hook_type") or work.get("hook")),
436
- "structure_type": _pick_structure_type(merged, work.get("structure_type") or work.get("content_structure")),
437
- "cta_type": _pick_cta_type(merged, work.get("cta_type") or work.get("cta")),
438
- "style_markers": _style_markers(merged, work.get("style_markers") or work.get("style_tags")),
439
- "analysis_eligibility": _safe_text(work.get("analysis_eligibility")) or "eligible",
440
- "analysis_exclusion_reason": _safe_text(work.get("analysis_exclusion_reason")),
441
- "analysis_artifact_status": work.get("analysis_artifact_status"),
442
- "recent_30d_score_rank": None,
443
- }
444
-
445
-
446
- def _assign_recent_30d_ranks(items: List[Dict[str, Any]]) -> None:
447
- recent = [item for item in items if _safe_int(item.get("publish_days_ago"), 999999) <= 30]
448
- ranked = sorted(recent, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
449
- for idx, item in enumerate(ranked):
450
- item["recent_30d_score_rank"] = idx + 1
451
- recent_ids = {_safe_text(item.get("platform_work_id")) for item in ranked}
452
- for item in items:
453
- if _safe_text(item.get("platform_work_id")) not in recent_ids:
454
- item["recent_30d_score_rank"] = None
455
-
456
-
457
- def _assign_buckets(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
458
- total = len(items)
459
- ranked = sorted(items, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
460
- for idx, item in enumerate(ranked):
461
- percentile = (idx + 1) / total if total else 1.0
462
- if total < 20:
463
- bucket = "Top" if idx < 4 else ("Strong" if idx < 8 else ("Mid" if idx < max(10, total - 2) else "Bottom"))
464
- elif percentile <= 0.20:
465
- bucket = "Top"
466
- elif percentile <= 0.50:
467
- bucket = "Strong"
468
- elif percentile <= 0.85:
469
- bucket = "Mid"
470
- else:
471
- bucket = "Bottom"
472
- item["bucket"] = bucket
473
- item["all_time_score_rank"] = idx + 1
474
- _assign_recent_30d_ranks(ranked)
475
- return ranked
476
-
477
-
478
- def _scaled_bucket_quota(sample_size: int) -> Dict[str, int]:
479
- if sample_size <= 0:
480
- return {"Top": 0, "Strong": 0, "Mid": 0, "Bottom": 0}
481
- base = {"Top": 18, "Strong": 18, "Mid": 14, "Bottom": 10}
482
- raw = {key: sample_size * (value / 60.0) for key, value in base.items()}
483
- quota = {key: int(math.floor(value)) for key, value in raw.items()}
484
- quota["Bottom"] = max(quota.get("Bottom", 0), 1)
485
- deficit = sample_size - sum(quota.values())
486
- order = sorted(raw.items(), key=lambda item: raw[item[0]] - quota[item[0]], reverse=True)
487
- idx = 0
488
- while deficit > 0 and order:
489
- key = order[idx % len(order)][0]
490
- quota[key] += 1
491
- deficit -= 1
492
- idx += 1
493
- while sum(quota.values()) > sample_size:
494
- for key in ("Mid", "Strong", "Top", "Bottom"):
495
- min_keep = 1 if key == "Bottom" else 0
496
- if quota[key] > min_keep and sum(quota.values()) > sample_size:
497
- quota[key] -= 1
498
- if quota["Top"] + quota["Strong"] < math.ceil(sample_size * 0.55):
499
- needed = math.ceil(sample_size * 0.55) - (quota["Top"] + quota["Strong"])
500
- for _ in range(needed):
501
- if quota["Mid"] > 0:
502
- quota["Mid"] -= 1
503
- quota["Top"] += 1
504
- elif quota["Bottom"] > 1:
505
- quota["Bottom"] -= 1
506
- quota["Strong"] += 1
507
- return quota
508
-
509
-
510
- def _pick_sample_size(total: int) -> int:
511
- if total <= 0:
512
- return 0
513
- return min(max(int(round(total * 0.30)), 40), 80, total)
514
-
515
-
516
- def _value_variants(item: Dict[str, Any], field: str) -> List[str]:
517
- value = item.get(field)
518
- if isinstance(value, list):
519
- return [_safe_text(v) for v in value if _safe_text(v)]
520
- clean = _safe_text(value)
521
- return [clean] if clean else []
522
-
523
-
524
- def _pick_diverse_items(pool: List[Dict[str, Any]], *, selected_ids: set, limits: Sequence[Tuple[str, int]], cap: int) -> List[Dict[str, Any]]:
525
- picked: List[Dict[str, Any]] = []
526
- covered: Dict[str, set] = {field: set() for field, _ in limits}
527
- local_ids: set = set()
528
- for field, minimum in limits:
529
- if minimum <= 0:
530
- continue
531
- for item in pool:
532
- if len(picked) >= cap:
533
- return picked
534
- work_id = _safe_text(item.get("platform_work_id"))
535
- if not work_id or work_id in selected_ids or work_id in local_ids:
536
- continue
537
- candidates = [value for value in _value_variants(item, field) if value not in covered[field]]
538
- if not candidates:
539
- continue
540
- picked.append(item)
541
- local_ids.add(work_id)
542
- for sync_field, _ in limits:
543
- covered[sync_field].update(_value_variants(item, sync_field))
544
- if len(covered[field]) >= minimum:
545
- break
546
- return picked
547
-
548
-
549
- def _sample_standard_works(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
550
- total = len(items)
551
- if total <= 0:
552
- return []
553
- sample_size = _pick_sample_size(total)
554
- quota = _scaled_bucket_quota(sample_size)
555
- bucket_groups: Dict[str, List[Dict[str, Any]]] = {"Top": [], "Strong": [], "Mid": [], "Bottom": []}
556
- for item in items:
557
- bucket_groups.setdefault(_safe_text(item.get("bucket")) or "Mid", []).append(item)
558
- selected: List[Dict[str, Any]] = []
559
- selected_ids: set = set()
560
-
561
- top_pool = bucket_groups.get("Top", [])
562
- for item in _pick_diverse_items(top_pool, selected_ids=selected_ids, limits=(("content_form", 3), ("hook_type", 2), ("structure_type", 2)), cap=quota.get("Top", 0)):
563
- selected.append(item)
564
- selected_ids.add(_safe_text(item.get("platform_work_id")))
565
-
566
- for bucket in ("Top", "Strong", "Mid", "Bottom"):
567
- pool = bucket_groups.get(bucket, [])
568
- for item in pool:
569
- if len([row for row in selected if _safe_text(row.get("bucket")) == bucket]) >= quota.get(bucket, 0):
570
- break
571
- work_id = _safe_text(item.get("platform_work_id"))
572
- if work_id in selected_ids:
573
- continue
574
- selected.append(item)
575
- selected_ids.add(work_id)
576
- if len(selected) < sample_size:
577
- for item in items:
578
- work_id = _safe_text(item.get("platform_work_id"))
579
- if work_id in selected_ids:
580
- continue
581
- selected.append(item)
582
- selected_ids.add(work_id)
583
- if len(selected) >= sample_size:
584
- break
585
- return selected[:sample_size]
586
-
587
-
588
- def _keyword_distribution_from_texts(texts: Sequence[str], *, limit: int = 20) -> Dict[str, Any]:
589
- counter: Counter = Counter()
590
- for text in texts:
591
- counter.update(_unique_tokens(text))
592
- return {"items": _top_counter(counter, limit=limit), "counting_mode": "work_occurrence"}
593
-
594
-
595
- def _field_keyword_distribution(works: List[Dict[str, Any]], field: str, *, limit: int = 20) -> Dict[str, Any]:
596
- return _keyword_distribution_from_texts([_safe_text(work.get(field)) for work in works], limit=limit)
597
-
598
-
599
- def _publish_time_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
600
- weekday = Counter()
601
- hour = Counter()
602
- unavailable = 0
603
- for work in works:
604
- parsed = _parse_datetime(work.get("publish_time"))
605
- if parsed is None:
606
- unavailable += 1
607
- continue
608
- weekday[str(parsed.weekday())] += 1
609
- hour[str(parsed.hour)] += 1
610
- return {"weekday": _top_counter(weekday, limit=7), "hour": _top_counter(hour, limit=24), "unavailable_count": unavailable}
611
-
612
-
613
- def _duration_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
614
- durations = [_safe_int(work.get("duration_ms"), 0) for work in works if _safe_int(work.get("duration_ms"), 0) > 0]
615
- if not durations:
616
- return {"available": False, "reason": "duration_unavailable"}
617
- counter = Counter()
618
- for duration in durations:
619
- counter["short" if duration < 30000 else ("medium" if duration < 120000 else "long")] += 1
620
- return {"available": True, "items": _top_counter(counter, limit=3)}
621
-
622
-
623
- def _performance_distribution(items: List[Dict[str, Any]]) -> Dict[str, Any]:
624
- scores = [_safe_float(item.get("performance_score_norm"), 0.0) for item in items]
625
- if not scores:
626
- return {"available": False, "reason": "no_scores"}
627
- ordered = sorted(scores, reverse=True)
628
- def _pct(position: float) -> float:
629
- idx = min(max(int(math.floor((len(ordered) - 1) * position)), 0), len(ordered) - 1)
630
- return round(ordered[idx], 4)
631
- return {"available": True, "max": round(max(ordered), 4), "min": round(min(ordered), 4), "p20": _pct(0.20), "p50": _pct(0.50), "p85": _pct(0.85)}
632
-
633
-
634
- def _engagement_pattern(items: List[Dict[str, Any]]) -> Dict[str, Any]:
635
- if not items:
636
- return {"count": 0, "avg_digg_count": 0, "avg_comment_count": 0, "avg_collect_count": 0, "avg_share_count": 0, "avg_play_count": 0}
637
- total = {"digg_count": 0, "comment_count": 0, "collect_count": 0, "share_count": 0, "play_count": 0}
638
- for item in items:
639
- for key in total:
640
- total[key] += _safe_int(item.get(key), 0)
641
- count = len(items)
642
- return {
643
- "count": count,
644
- "avg_digg_count": round(total["digg_count"] / count, 2),
645
- "avg_comment_count": round(total["comment_count"] / count, 2),
646
- "avg_collect_count": round(total["collect_count"] / count, 2),
647
- "avg_share_count": round(total["share_count"] / count, 2),
648
- "avg_play_count": round(total["play_count"] / count, 2),
649
- }
650
-
651
-
652
- def _compare_bucket_groups(items: List[Dict[str, Any]]) -> Dict[str, Any]:
653
- groups = {name: [item for item in items if item.get("bucket") == name] for name in ("Top", "Mid", "Bottom")}
654
- result: Dict[str, Any] = {}
655
- for name, works in groups.items():
656
- result[name.lower()] = {
657
- "title_keywords": _field_keyword_distribution(works, "title", limit=8).get("items", []),
658
- "caption_keywords": _field_keyword_distribution(works, "caption_raw", limit=8).get("items", []),
659
- "primary_text_keywords": _field_keyword_distribution(works, "primary_text", limit=8).get("items", []),
660
- "hook_types": _distribution_from_values([_safe_text(work.get("hook_type")) for work in works], limit=6).get("items", []),
661
- "structure_types": _distribution_from_values([_safe_text(work.get("structure_type")) for work in works], limit=6).get("items", []),
662
- "cta_types": _distribution_from_values([_safe_text(work.get("cta_type")) for work in works], limit=6).get("items", []),
663
- "content_forms": _distribution_from_values([_safe_text(work.get("content_form")) for work in works], limit=6).get("items", []),
664
- "engagement_pattern": _engagement_pattern(works),
665
- }
666
- return result
667
-
668
-
669
- def prepare_author_analysis_bundle(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Dict[str, Any]:
670
- normalized = [_normalize_work(profile, work) for work in works if isinstance(work, dict)]
671
- eligible = [item for item in normalized if _safe_text(item.get("analysis_eligibility")) == "eligible"]
672
- excluded_count = len(normalized) - len(eligible)
673
- ranked = _assign_buckets(eligible)
674
- sampled = _sample_standard_works(ranked)
675
- sampled_work_ids = [_safe_text(item.get("platform_work_id")) for item in sampled if _safe_text(item.get("platform_work_id"))]
676
- aggregate_stats = {
677
- "total_works": len(ranked),
678
- "excluded_works_count": excluded_count,
679
- "sampled_works_count": len(sampled),
680
- "sampling_ratio": round((len(sampled) / len(ranked)), 4) if ranked else 0.0,
681
- "sampling_mode": "standard",
682
- "analysis_window": "mixed",
683
- "global_title_keyword_distribution": _field_keyword_distribution(ranked, "title"),
684
- "global_caption_keyword_distribution": _field_keyword_distribution(ranked, "caption_raw"),
685
- "global_primary_text_keyword_distribution": _field_keyword_distribution(ranked, "primary_text"),
686
- "global_hook_type_distribution": _distribution_from_values([_safe_text(item.get("hook_type")) for item in ranked]),
687
- "global_structure_type_distribution": _distribution_from_values([_safe_text(item.get("structure_type")) for item in ranked]),
688
- "global_cta_type_distribution": _distribution_from_values([_safe_text(item.get("cta_type")) for item in ranked]),
689
- "global_content_form_distribution": _distribution_from_values([_safe_text(item.get("content_form")) for item in ranked]),
690
- "global_work_modality_distribution": _distribution_from_values([_safe_text(item.get("work_modality")) for item in ranked]),
691
- "global_performance_distribution": _performance_distribution(ranked),
692
- "global_publish_time_distribution": _publish_time_distribution(ranked),
693
- "global_duration_distribution": _duration_distribution(ranked),
694
- "global_bucket_distribution": _distribution_from_values([_safe_text(item.get("bucket")) for item in ranked], limit=4),
695
- "global_top_vs_mid_vs_bottom_deltas": _compare_bucket_groups(ranked),
696
- }
697
- analysis_input = {
698
- "author_profile": {
699
- "platform": _safe_text(profile.get("platform")) or platform,
700
- "platform_author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
701
- "nickname": _safe_text(profile.get("nickname")),
702
- "author_handle": _safe_text(profile.get("author_handle")),
703
- "signature": _safe_text(profile.get("signature")),
704
- "fans_count": _safe_int(profile.get("fans_count"), 0),
705
- "liked_count": _safe_int(profile.get("liked_count"), 0),
706
- "collected_count": _safe_int(profile.get("collected_count"), 0),
707
- "works_count": _safe_int(profile.get("works_count"), len(ranked)),
708
- "verified": bool(profile.get("verified", False)),
709
- "ip_location": _safe_text(profile.get("ip_location")),
710
- },
711
- "sampled_works": sampled,
712
- "aggregate_stats": aggregate_stats,
713
- "platform_context": {
714
- "platform": platform,
715
- "content_kind": "author_home",
716
- "primary_work_modality": ranked[0].get("work_modality") if ranked else None,
717
- },
718
- "analysis_metadata": {
719
- "input_object_name": "author_analysis_input_v1",
720
- "prompt_contract_path": str(PROMPT_CONTRACT_PATH),
721
- "input_schema_path": str(INPUT_SCHEMA_PATH),
722
- "output_schema_path": str(OUTPUT_SCHEMA_PATH),
723
- "analysis_mode": "standard",
724
- "total_works": len(normalized),
725
- "eligible_works_count": len(ranked),
726
- "excluded_works_count": excluded_count,
727
- "sampled_works_count": len(sampled),
728
- },
729
- }
730
- return {
731
- "analysis_input": analysis_input,
732
- "normalized_works": normalized,
733
- "ranked_works": ranked,
734
- "sampled_works": sampled,
735
- "sampled_work_ids": sampled_work_ids,
736
- "excluded_works_count": excluded_count,
737
- }
738
-
739
-
740
- def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Tuple[Dict[str, Any], List[Dict[str, str]]]:
741
- payload = prepare_author_analysis_bundle(profile=profile, works=works, platform=platform).get("analysis_input") or {}
742
- return payload, validate_author_analysis_input_v1(payload)
743
-
744
-
745
- def _require_non_empty_string(errors: List[Dict[str, str]], field: str, value: Any) -> None:
746
- if not _safe_text(value):
747
- errors.append({"field": field, "reason": "empty_string"})
748
-
749
-
750
- def _require_dict(errors: List[Dict[str, str]], field: str, value: Any) -> Dict[str, Any]:
751
- if not isinstance(value, dict):
752
- errors.append({"field": field, "reason": "type_error:dict"})
753
- return {}
754
- return value
755
-
756
-
757
- def _require_list(errors: List[Dict[str, str]], field: str, value: Any) -> List[Any]:
758
- if not isinstance(value, list):
759
- errors.append({"field": field, "reason": "type_error:list"})
760
- return []
761
- return value
762
-
763
-
764
- def _validate_distribution_object(errors: List[Dict[str, str]], field: str, value: Any) -> None:
765
- block = _require_dict(errors, field, value)
766
- if not block:
767
- return
768
- items = _require_list(errors, f"{field}.items", block.get("items"))
769
- for index, item in enumerate(items[:20]):
770
- row = _require_dict(errors, f"{field}.items.{index}", item)
771
- if row:
772
- _require_non_empty_string(errors, f"{field}.items.{index}.value", row.get("value"))
773
- if "count" not in row:
774
- errors.append({"field": f"{field}.items.{index}.count", "reason": "missing"})
775
-
776
-
777
- def validate_author_analysis_input_v1(payload: Dict[str, Any]) -> List[Dict[str, str]]:
778
- errors: List[Dict[str, str]] = []
779
- errors.extend(_schema_errors(payload, INPUT_SCHEMA_PATH))
780
- author_profile = _require_dict(errors, "author_profile", payload.get("author_profile"))
781
- aggregate = _require_dict(errors, "aggregate_stats", payload.get("aggregate_stats"))
782
- platform_context = _require_dict(errors, "platform_context", payload.get("platform_context"))
783
- analysis_metadata = _require_dict(errors, "analysis_metadata", payload.get("analysis_metadata"))
784
- sampled_works = _require_list(errors, "sampled_works", payload.get("sampled_works"))
785
-
786
- _require_non_empty_string(errors, "author_profile.platform", author_profile.get("platform"))
787
- _require_non_empty_string(errors, "author_profile.platform_author_id", author_profile.get("platform_author_id"))
788
- _require_non_empty_string(errors, "author_profile.nickname", author_profile.get("nickname"))
789
- _require_non_empty_string(errors, "platform_context.platform", platform_context.get("platform"))
790
- _require_non_empty_string(errors, "platform_context.content_kind", platform_context.get("content_kind"))
791
- _require_non_empty_string(errors, "analysis_metadata.input_object_name", analysis_metadata.get("input_object_name"))
792
- _require_non_empty_string(errors, "analysis_metadata.analysis_mode", analysis_metadata.get("analysis_mode"))
793
-
794
- for key in [
795
- "global_title_keyword_distribution",
796
- "global_caption_keyword_distribution",
797
- "global_primary_text_keyword_distribution",
798
- "global_hook_type_distribution",
799
- "global_structure_type_distribution",
800
- "global_cta_type_distribution",
801
- "global_content_form_distribution",
802
- "global_work_modality_distribution",
803
- "global_performance_distribution",
804
- "global_publish_time_distribution",
805
- "global_bucket_distribution",
806
- "global_top_vs_mid_vs_bottom_deltas",
807
- ]:
808
- if key not in aggregate:
809
- errors.append({"field": f"aggregate_stats.{key}", "reason": "missing"})
810
-
811
- for field in [
812
- "aggregate_stats.global_title_keyword_distribution",
813
- "aggregate_stats.global_caption_keyword_distribution",
814
- "aggregate_stats.global_primary_text_keyword_distribution",
815
- "aggregate_stats.global_hook_type_distribution",
816
- "aggregate_stats.global_structure_type_distribution",
817
- "aggregate_stats.global_cta_type_distribution",
818
- "aggregate_stats.global_content_form_distribution",
819
- "aggregate_stats.global_work_modality_distribution",
820
- "aggregate_stats.global_bucket_distribution",
821
- ]:
822
- _validate_distribution_object(errors, field, payload.get(field.split(".")[0], {}).get(field.split(".")[1]) if field.startswith("aggregate_stats") else None)
823
-
824
- total_works = _safe_int(aggregate.get("total_works"), 0)
825
- if total_works > 0 and not sampled_works:
826
- errors.append({"field": "sampled_works", "reason": "empty_collection"})
827
- if sampled_works and _safe_int(aggregate.get("sampled_works_count"), -1) != len(sampled_works):
828
- errors.append({"field": "aggregate_stats.sampled_works_count", "reason": "count_mismatch"})
829
-
830
- for index, item in enumerate(sampled_works[:120]):
831
- row = _require_dict(errors, f"sampled_works.{index}", item)
832
- if not row:
833
- continue
834
- _require_non_empty_string(errors, f"sampled_works.{index}.platform_work_id", row.get("platform_work_id"))
835
- if not any(_safe_text(row.get(key)) for key in ("title", "caption_raw", "primary_text")):
836
- errors.append({"field": f"sampled_works.{index}", "reason": "all_text_fields_empty"})
837
- if _safe_text(row.get("work_modality")) not in {"video", "text"}:
838
- errors.append({"field": f"sampled_works.{index}.work_modality", "reason": "enum_required"})
839
- if _safe_text(row.get("primary_text_source")) not in {"asr_clean", "caption_raw"}:
840
- errors.append({"field": f"sampled_works.{index}.primary_text_source", "reason": "enum_required"})
841
- if _safe_text(row.get("hook_type")) not in HOOK_TYPES_ENUM:
842
- errors.append({"field": f"sampled_works.{index}.hook_type", "reason": "enum_required"})
843
- if _safe_text(row.get("structure_type")) not in STRUCTURE_TYPES:
844
- errors.append({"field": f"sampled_works.{index}.structure_type", "reason": "enum_required"})
845
- if _safe_text(row.get("cta_type")) not in CTA_TYPES:
846
- errors.append({"field": f"sampled_works.{index}.cta_type", "reason": "enum_required"})
847
- if _safe_text(row.get("content_form")) not in CONTENT_FORMS:
848
- errors.append({"field": f"sampled_works.{index}.content_form", "reason": "enum_required"})
849
- markers = _require_list(errors, f"sampled_works.{index}.style_markers", row.get("style_markers"))
850
- for marker in markers:
851
- if _safe_text(marker) not in STYLE_MARKERS_ENUM:
852
- errors.append({"field": f"sampled_works.{index}.style_markers", "reason": f"enum_required:{marker}"})
853
- return _dedupe_error_list(errors)
854
-
855
-
856
- def _fallback_sample_confidence(sample_size: int) -> str:
857
- return "low" if sample_size < 5 else ("mid" if sample_size < 15 else "high")
858
-
859
-
860
- def build_fallback_author_analysis_v2(payload: Dict[str, Any]) -> Dict[str, Any]:
861
- author = payload.get("author_profile") if isinstance(payload.get("author_profile"), dict) else {}
862
- aggregate = payload.get("aggregate_stats") if isinstance(payload.get("aggregate_stats"), dict) else {}
863
- sampled = payload.get("sampled_works") if isinstance(payload.get("sampled_works"), list) else []
864
- nickname = _safe_text(author.get("nickname")) or "该作者"
865
- top_keywords = [item.get("value") for item in ((aggregate.get("global_primary_text_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:5]
866
- theme_items = [item.get("value") for item in ((aggregate.get("global_caption_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:6]
867
- hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
868
- structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
869
- cta_items = [item.get("value") for item in ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
870
- sample_size = len(sampled)
871
- confidence = _fallback_sample_confidence(sample_size)
872
- representative = [{"platform_work_id": item.get("platform_work_id"), "title": item.get("title") or item.get("caption_raw") or item.get("primary_text"), "bucket": item.get("bucket")} for item in sampled[:5] if isinstance(item, dict)]
873
- dominant_themes = theme_items[:3]
874
- theme_clusters = []
875
- if dominant_themes:
876
- theme_clusters = [{"name": theme, "keywords": [theme]} for theme in dominant_themes]
877
- return {
878
- "author_positioning": {
879
- "one_liner": f"{nickname} 是一个围绕{dominant_themes[0] if dominant_themes else '内容增长'}持续输出的创作者,主要用高密度结构化表达帮助目标受众缩短试错路径。",
880
- "author_type": "creator_education",
881
- "primary_role": "teacher",
882
- "secondary_roles": ["operator"],
883
- "target_audience": "想提升内容与增长效率的创作者/操盘手",
884
- "core_problem_solved": "把复杂方法压缩成可快速模仿的内容动作",
885
- "core_value_proposition": "用短平快的机制化表达降低学习和执行门槛",
886
- "evidence": [f"top_keywords={top_keywords}", f"dominant_themes={dominant_themes}"],
887
- },
888
- "trust_model": {
889
- "primary_trust_source": "systematized_method",
890
- "secondary_trust_sources": ["demonstration", "consistency"],
891
- "trust_building_mechanisms": ["重复输出同类方法", "把观点包装成步骤/模板", "用案例或结果句强化可信度"],
892
- "trust_risks": ["样本主要来自单平台表达,真实性与转化深度证据有限"],
893
- "relationship_posture": {"distance": "mid", "authority_level": "mid", "affinity_level": "mid"},
894
- "evidence": [f"sample_size={sample_size}", f"structures={structure_items}"],
895
- },
896
- "cognitive_engine": {
897
- "worldview": "优先追求可执行、可复用、可放大的内容动作。",
898
- "value_priority": ["效率", "结果感", "方法压缩"],
899
- "problem_definition_style": "execution_problem",
900
- "reasoning_modes": ["workflow_packaging", "case_induction"],
901
- "knowledge_sources": ["作品标题/文案/字幕中的步骤化表达"],
902
- "judgment_style": {"certainty_level": "mid", "notes": "基于单平台主页样本初判"},
903
- "core_cognitive_actions": ["workflow_packaging", "result_compression"],
904
- "evidence": [f"keywords={top_keywords}", f"sample_size={sample_size}"],
905
- },
906
- "expression_hooks": {
907
- "language_style": {"oral_level": "mid", "assertiveness": "high", "emotional_intensity": "mid", "professional_density": "mid"},
908
- "hook_keywords": top_keywords[:5],
909
- "hook_types": [item for item in hook_items if item in HOOK_TYPES_ENUM] or ["result_hook"],
910
- "argument_patterns": ["先给结果/判断,再给步骤或解释", "用反差或对比压缩注意力获取"],
911
- "emotion_patterns": ["sharp_direct"],
912
- "memorability_sources": ["strong_keywords", "repeatable_phrases"],
913
- "evidence": [f"hook_types={hook_items}", f"keywords={top_keywords}"],
914
- },
915
- "content_mechanism": {
916
- "topic_sources": ["audience_questions", "workflow_demos"],
917
- "topic_goals": ["acquire_attention", "build_trust", "show_capability"],
918
- "winning_content_structures": [item for item in structure_items if item in WINNING_CONTENT_STRUCTURES] or ["hook_demo_result"],
919
- "repeatable_series": ["高频重复的母题/模板仍在持续输出"],
920
- "traffic_drivers": ["result_promise", "shortcut_imagination"],
921
- "content_flywheel": "用结果型标题拉点击,用结构化拆解留住注意力,再用 CTA 做后续动作承接。",
922
- "cross_platform_variation": "当前不可判断:缺少跨平台样本。",
923
- "dominant_themes": dominant_themes,
924
- "theme_clusters": theme_clusters,
925
- "evidence": [f"themes={dominant_themes}", f"structures={structure_items}", f"cta_types={cta_items}"],
926
- },
927
- "commercial_bridge": {
928
- "content_role_in_funnel": ["acquire_attention", "build_trust", "qualify"],
929
- "likely_products": ["course", "consulting"] if any(item in {"private_message_cta", "lead_magnet_cta"} for item in cta_items) else [],
930
- "conversion_path": "通过内容建立方法可信度,再用评论/私信/领取资料承接线索。",
931
- "content_product_fit": "mid",
932
- "business_model_signals": ["content_led_growth", "high_ip_dependency"],
933
- "commercial_risks": ["明确成交链路和产品证据不足,不能强断言单一模式。"],
934
- "evidence": [f"cta_types={cta_items}"],
935
- },
936
- "core_tensions": {
937
- "tensions": [
938
- {"label": "效率 vs 深度", "surface": "表达短促、结果先行", "deep_reason": "优先追求短视频环境下的注意力效率", "strategic_implication": "适合拉新,但要防止过度压缩导致信任深度不足"},
939
- {"label": "模板复用 vs 差异化", "surface": "高频复用相似母题", "deep_reason": "成熟模板能稳定产出", "strategic_implication": "需要持续刷新案例与视角,避免模板疲劳"},
940
- ],
941
- "most_important_tension": "高效率表达很强,但商业深度证据未必同步充足。",
942
- "evidence": [f"sample_size={sample_size}", f"themes={dominant_themes}"],
943
- },
944
- "evidence_pack": {
945
- "sample_size": sample_size,
946
- "sample_confidence": confidence,
947
- "representative_works": representative,
948
- "top_keywords": top_keywords,
949
- "observed_hooks": hook_items,
950
- "observed_ctas": cta_items,
951
- "observed_structures": structure_items,
952
- "notes": ["fallback_generated", "基于标准化输入的最小保底分析"],
953
- },
954
- "clone_guidance": {
955
- "copyable_elements": ["结果先行的标题结构", "步骤化表达", "稳定重复的母模板"],
956
- "non_copyable_elements": ["作者长期经验背书", "既有受众信任资产"],
957
- "borrowable_flavor": ["短句高密度", "判断明确", "先给结果再给解释"],
958
- "danger_zones": ["不要照抄口头禅和具体句子", "证据不足时别硬抄商业承诺"],
959
- "if_only_learn_one_thing": "学他把复杂方法压缩成高可执行内容动作的能力,而不是抄表面语气。",
960
- },
961
- }
962
-
963
-
964
- def _enum_error(errors: List[Dict[str, str]], field: str, value: Any, allowed: set) -> None:
965
- if value in (None, "", []):
966
- return
967
- if isinstance(value, list):
968
- for item in value:
969
- if item not in allowed:
970
- errors.append({"field": field, "reason": f"enum_preferred:{item}"})
971
- elif value not in allowed:
972
- errors.append({"field": field, "reason": f"enum_preferred:{value}"})
973
-
974
-
975
- def validate_author_analysis_v2(payload: Dict[str, Any], *, analysis_input: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
976
- errors: List[Dict[str, str]] = []
977
- errors.extend(_schema_errors(payload, OUTPUT_SCHEMA_PATH))
978
- for module, fields in REQUIRED_V2_FIELDS.items():
979
- block = payload.get(module)
980
- if not isinstance(block, dict):
981
- errors.append({"field": module, "reason": "missing_or_type_error:dict"})
982
- continue
983
- for field in fields:
984
- if field not in block:
985
- errors.append({"field": f"{module}.{field}", "reason": "missing"})
986
-
987
- author_positioning = _require_dict(errors, "author_positioning", payload.get("author_positioning"))
988
- trust_model = _require_dict(errors, "trust_model", payload.get("trust_model"))
989
- cognitive_engine = _require_dict(errors, "cognitive_engine", payload.get("cognitive_engine"))
990
- expression_hooks = _require_dict(errors, "expression_hooks", payload.get("expression_hooks"))
991
- content_mechanism = _require_dict(errors, "content_mechanism", payload.get("content_mechanism"))
992
- commercial_bridge = _require_dict(errors, "commercial_bridge", payload.get("commercial_bridge"))
993
- core_tensions = _require_dict(errors, "core_tensions", payload.get("core_tensions"))
994
- evidence_pack = _require_dict(errors, "evidence_pack", payload.get("evidence_pack"))
995
- clone_guidance = _require_dict(errors, "clone_guidance", payload.get("clone_guidance"))
996
-
997
- for field, value in [
998
- ("author_positioning.one_liner", author_positioning.get("one_liner")),
999
- ("author_positioning.target_audience", author_positioning.get("target_audience")),
1000
- ("author_positioning.core_problem_solved", author_positioning.get("core_problem_solved")),
1001
- ("author_positioning.core_value_proposition", author_positioning.get("core_value_proposition")),
1002
- ("commercial_bridge.conversion_path", commercial_bridge.get("conversion_path")),
1003
- ("content_mechanism.content_flywheel", content_mechanism.get("content_flywheel")),
1004
- ("content_mechanism.cross_platform_variation", content_mechanism.get("cross_platform_variation")),
1005
- ("core_tensions.most_important_tension", core_tensions.get("most_important_tension")),
1006
- ("clone_guidance.if_only_learn_one_thing", clone_guidance.get("if_only_learn_one_thing")),
1007
- ]:
1008
- _require_non_empty_string(errors, field, value)
1009
-
1010
- posture = _require_dict(errors, "trust_model.relationship_posture", trust_model.get("relationship_posture"))
1011
- lang_style = _require_dict(errors, "expression_hooks.language_style", expression_hooks.get("language_style"))
1012
- judgment_style = _require_dict(errors, "cognitive_engine.judgment_style", cognitive_engine.get("judgment_style"))
1013
-
1014
- for list_field, value in [
1015
- ("trust_model.secondary_trust_sources", trust_model.get("secondary_trust_sources")),
1016
- ("trust_model.trust_building_mechanisms", trust_model.get("trust_building_mechanisms")),
1017
- ("trust_model.trust_risks", trust_model.get("trust_risks")),
1018
- ("cognitive_engine.value_priority", cognitive_engine.get("value_priority")),
1019
- ("cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes")),
1020
- ("cognitive_engine.knowledge_sources", cognitive_engine.get("knowledge_sources")),
1021
- ("cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions")),
1022
- ("expression_hooks.hook_keywords", expression_hooks.get("hook_keywords")),
1023
- ("expression_hooks.hook_types", expression_hooks.get("hook_types")),
1024
- ("expression_hooks.argument_patterns", expression_hooks.get("argument_patterns")),
1025
- ("expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns")),
1026
- ("expression_hooks.memorability_sources", expression_hooks.get("memorability_sources")),
1027
- ("content_mechanism.topic_sources", content_mechanism.get("topic_sources")),
1028
- ("content_mechanism.topic_goals", content_mechanism.get("topic_goals")),
1029
- ("content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures")),
1030
- ("content_mechanism.repeatable_series", content_mechanism.get("repeatable_series")),
1031
- ("content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers")),
1032
- ("content_mechanism.dominant_themes", content_mechanism.get("dominant_themes")),
1033
- ("content_mechanism.theme_clusters", content_mechanism.get("theme_clusters")),
1034
- ("commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel")),
1035
- ("commercial_bridge.likely_products", commercial_bridge.get("likely_products")),
1036
- ("commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals")),
1037
- ("commercial_bridge.commercial_risks", commercial_bridge.get("commercial_risks")),
1038
- ("evidence_pack.representative_works", evidence_pack.get("representative_works")),
1039
- ("evidence_pack.top_keywords", evidence_pack.get("top_keywords")),
1040
- ("evidence_pack.observed_hooks", evidence_pack.get("observed_hooks")),
1041
- ("evidence_pack.observed_ctas", evidence_pack.get("observed_ctas")),
1042
- ("evidence_pack.observed_structures", evidence_pack.get("observed_structures")),
1043
- ("evidence_pack.notes", evidence_pack.get("notes")),
1044
- ("clone_guidance.copyable_elements", clone_guidance.get("copyable_elements")),
1045
- ("clone_guidance.non_copyable_elements", clone_guidance.get("non_copyable_elements")),
1046
- ("clone_guidance.borrowable_flavor", clone_guidance.get("borrowable_flavor")),
1047
- ("clone_guidance.danger_zones", clone_guidance.get("danger_zones")),
1048
- ]:
1049
- _require_list(errors, list_field, value)
1050
-
1051
- _enum_error(errors, "author_positioning.author_type", author_positioning.get("author_type"), AUTHOR_TYPES)
1052
- _enum_error(errors, "author_positioning.primary_role", author_positioning.get("primary_role"), PRIMARY_ROLES)
1053
- _enum_error(errors, "trust_model.primary_trust_source", trust_model.get("primary_trust_source"), TRUST_SOURCES)
1054
- _enum_error(errors, "cognitive_engine.problem_definition_style", cognitive_engine.get("problem_definition_style"), PROBLEM_DEFINITION_STYLES)
1055
- _enum_error(errors, "cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes"), REASONING_MODES)
1056
- _enum_error(errors, "cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions"), CORE_COGNITIVE_ACTIONS)
1057
- _enum_error(errors, "expression_hooks.hook_types", expression_hooks.get("hook_types"), HOOK_TYPES_ENUM)
1058
- _enum_error(errors, "expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns"), EMOTION_PATTERNS)
1059
- _enum_error(errors, "expression_hooks.memorability_sources", expression_hooks.get("memorability_sources"), MEMORABILITY_SOURCES)
1060
- _enum_error(errors, "content_mechanism.topic_sources", content_mechanism.get("topic_sources"), TOPIC_SOURCES)
1061
- _enum_error(errors, "content_mechanism.topic_goals", content_mechanism.get("topic_goals"), TOPIC_GOALS)
1062
- _enum_error(errors, "content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures"), WINNING_CONTENT_STRUCTURES)
1063
- _enum_error(errors, "content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers"), TRAFFIC_DRIVERS)
1064
- _enum_error(errors, "commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel"), FUNNEL_ROLES)
1065
- _enum_error(errors, "commercial_bridge.likely_products", commercial_bridge.get("likely_products"), LIKELY_PRODUCTS)
1066
- _enum_error(errors, "commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals"), BUSINESS_MODEL_SIGNALS)
1067
- _enum_error(errors, "trust_model.relationship_posture.distance", posture.get("distance"), RELATIONSHIP_DISTANCE)
1068
- _enum_error(errors, "trust_model.relationship_posture.authority_level", posture.get("authority_level"), LOW_HIGH_MID)
1069
- _enum_error(errors, "trust_model.relationship_posture.affinity_level", posture.get("affinity_level"), LOW_HIGH_MID)
1070
- _enum_error(errors, "cognitive_engine.judgment_style.certainty_level", judgment_style.get("certainty_level"), LOW_HIGH_MID)
1071
- _enum_error(errors, "expression_hooks.language_style.oral_level", lang_style.get("oral_level"), LOW_HIGH_MID)
1072
- _enum_error(errors, "expression_hooks.language_style.assertiveness", lang_style.get("assertiveness"), LOW_HIGH_MID)
1073
- _enum_error(errors, "expression_hooks.language_style.emotional_intensity", lang_style.get("emotional_intensity"), LOW_HIGH_MID)
1074
- _enum_error(errors, "expression_hooks.language_style.professional_density", lang_style.get("professional_density"), LOW_HIGH_MID)
1075
- _enum_error(errors, "commercial_bridge.content_product_fit", commercial_bridge.get("content_product_fit"), LOW_HIGH_MID)
1076
- _enum_error(errors, "evidence_pack.sample_confidence", evidence_pack.get("sample_confidence"), LOW_HIGH_MID)
1077
- _enum_error(errors, "evidence_pack.observed_hooks", evidence_pack.get("observed_hooks"), HOOK_TYPES_ENUM)
1078
- _enum_error(errors, "evidence_pack.observed_ctas", evidence_pack.get("observed_ctas"), CTA_TYPES)
1079
- _enum_error(errors, "evidence_pack.observed_structures", evidence_pack.get("observed_structures"), STRUCTURE_TYPES)
1080
-
1081
- tensions = _require_list(errors, "core_tensions.tensions", core_tensions.get("tensions"))
1082
- if len(tensions) < 2:
1083
- errors.append({"field": "core_tensions.tensions", "reason": "guardrail:need_at_least_2"})
1084
- for index, tension in enumerate(tensions[:10]):
1085
- block = _require_dict(errors, f"core_tensions.tensions.{index}", tension)
1086
- for sub in ("label", "surface", "deep_reason", "strategic_implication"):
1087
- _require_non_empty_string(errors, f"core_tensions.tensions.{index}.{sub}", block.get(sub))
1088
-
1089
- representative_works = evidence_pack.get("representative_works") if isinstance(evidence_pack.get("representative_works"), list) else []
1090
- for index, work in enumerate(representative_works[:10]):
1091
- block = _require_dict(errors, f"evidence_pack.representative_works.{index}", work)
1092
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.platform_work_id", block.get("platform_work_id"))
1093
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.title", block.get("title"))
1094
- _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.bucket", block.get("bucket"))
1095
-
1096
- for field, value in [
1097
- ("author_positioning.evidence", author_positioning.get("evidence")),
1098
- ("trust_model.evidence", trust_model.get("evidence")),
1099
- ("cognitive_engine.evidence", cognitive_engine.get("evidence")),
1100
- ("expression_hooks.evidence", expression_hooks.get("evidence")),
1101
- ("content_mechanism.evidence", content_mechanism.get("evidence")),
1102
- ("commercial_bridge.evidence", commercial_bridge.get("evidence")),
1103
- ("core_tensions.evidence", core_tensions.get("evidence")),
1104
- ]:
1105
- items = _require_list(errors, field, value)
1106
- if not items:
1107
- errors.append({"field": field, "reason": "empty_collection"})
1108
-
1109
- sample_size = _safe_int(evidence_pack.get("sample_size"), 0)
1110
- sample_confidence = _safe_text(evidence_pack.get("sample_confidence"))
1111
- if sample_size < 5 and sample_confidence == "high":
1112
- errors.append({"field": "evidence_pack.sample_confidence", "reason": "guardrail:sample_lt_5_cannot_be_high"})
1113
-
1114
- if analysis_input is not None:
1115
- platform_context = analysis_input.get("platform_context") if isinstance(analysis_input.get("platform_context"), dict) else {}
1116
- if len({platform_context.get("platform")} - {None, ""}) <= 1:
1117
- cross_platform_variation = _safe_text(content_mechanism.get("cross_platform_variation"))
1118
- if cross_platform_variation and "不可判断" not in cross_platform_variation and "unknown" not in cross_platform_variation.lower():
1119
- errors.append({"field": "content_mechanism.cross_platform_variation", "reason": "guardrail:single_platform_should_stay_weak"})
1120
- aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1121
- cta_items = ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance((aggregate.get("global_cta_type_distribution") or {}), dict) else []
1122
- explicit_conversion = any(isinstance(item, dict) and item.get("value") in {"private_message_cta", "lead_magnet_cta"} for item in cta_items)
1123
- likely_products = commercial_bridge.get("likely_products") if isinstance(commercial_bridge.get("likely_products"), list) else []
1124
- if likely_products and not explicit_conversion:
1125
- errors.append({"field": "commercial_bridge.likely_products", "reason": "guardrail:weak_conversion_signal"})
1126
- return _dedupe_error_list(errors)
1127
-
1128
-
1129
- def derive_legacy_summary(author_analysis_v2: Dict[str, Any], *, analysis_input: Dict[str, Any], validation_errors: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]:
1130
- positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
1131
- trust = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
1132
- content = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
1133
- bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
1134
- tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
1135
- clone = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
1136
- evidence = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
1137
- aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1138
-
1139
- sample_confidence = _safe_text(evidence.get("sample_confidence")) or _fallback_sample_confidence(_safe_int(evidence.get("sample_size"), 0))
1140
- score_base = {"low": 58, "mid": 72, "high": 84}.get(sample_confidence, 60)
1141
- if validation_errors:
1142
- score_base -= min(len(validation_errors) * 2, 12)
1143
- business_score = int(_clamp(score_base + (6 if (bridge.get("likely_products") or []) else -4), 40, 92))
1144
- benchmark_gap_score = int(_clamp(100 - business_score + 8, 35, 88))
1145
- hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1146
- structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1147
- dominant_themes = content.get("dominant_themes") if isinstance(content.get("dominant_themes"), list) else []
1148
- return {
1149
- "author_portrait": _safe_text(positioning.get("one_liner")) or "作者画像数据不足。",
1150
- "business_analysis": ";".join([
1151
- _safe_text(positioning.get("core_value_proposition")),
1152
- f"主要信任来源:{_safe_text(trust.get('primary_trust_source')) or '待确认'}",
1153
- f"商业承接:{_safe_text(bridge.get('conversion_path')) or '当前证据不足'}",
1154
- ]).strip(";"),
1155
- "benchmark_analysis": ";".join([
1156
- f"高频 hook:{', '.join(hook_items[:3]) or '待补'}",
1157
- f"常见结构:{', '.join(structure_items[:3]) or '待补'}",
1158
- f"主主题:{', '.join(dominant_themes[:3]) or '待补'}",
1159
- ]).strip(";"),
1160
- "business_score": business_score,
1161
- "benchmark_gap_score": benchmark_gap_score,
1162
- "style_radar": {"选题": 76, "表达": 78, "结构": 79, "节奏": 74, "人设": 73, "转化": 70, "差异化": 71, "稳定性": 79},
1163
- "core_contradictions": [tensions.get("most_important_tension") or "张力信息不足"],
1164
- "recommendations": [clone.get("if_only_learn_one_thing") or "优先学习其可复用的结构机制"],
1165
- }