@tikomni/skills 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (451) hide show
  1. package/.skill-package-allowlist.txt +4 -0
  2. package/LICENSE +21 -0
  3. package/README.md +167 -0
  4. package/README.zh-CN.md +167 -0
  5. package/bin/tikomni-skills.js +127 -0
  6. package/env.example +160 -0
  7. package/lib/installer.js +176 -0
  8. package/package.json +44 -0
  9. package/skills/creator-analysis/SKILL.md +71 -0
  10. package/skills/creator-analysis/agents/openai.yaml +4 -0
  11. package/skills/creator-analysis/env.example +36 -0
  12. package/skills/creator-analysis/references/api-capability-index.md +92 -0
  13. package/skills/creator-analysis/references/api-contracts/asr-api.md +130 -0
  14. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +776 -0
  15. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +2017 -0
  16. package/skills/creator-analysis/references/api-contracts/demo-api.md +717 -0
  17. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +3594 -0
  18. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +2274 -0
  19. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +1575 -0
  20. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  21. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +4118 -0
  22. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +5544 -0
  23. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +1916 -0
  24. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  25. package/skills/creator-analysis/references/api-contracts/health-check.md +69 -0
  26. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +78 -0
  27. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +2256 -0
  28. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +2011 -0
  29. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +2630 -0
  30. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +44 -0
  31. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +1518 -0
  32. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +1242 -0
  33. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +1088 -0
  34. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +1949 -0
  35. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +126 -0
  36. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +1142 -0
  37. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +2025 -0
  38. package/skills/creator-analysis/references/api-contracts/sora2-api.md +2266 -0
  39. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +208 -0
  40. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +897 -0
  41. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +134 -0
  42. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +494 -0
  43. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +5947 -0
  44. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +968 -0
  45. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  46. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +1951 -0
  47. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +742 -0
  48. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  49. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +4448 -0
  50. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +342 -0
  51. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +143 -0
  52. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +989 -0
  53. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +809 -0
  54. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  55. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +1547 -0
  56. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +798 -0
  57. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +2459 -0
  58. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  59. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  60. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  61. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  62. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +489 -0
  63. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +2636 -0
  64. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +2660 -0
  65. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +2315 -0
  66. package/skills/creator-analysis/references/api-tags/asr-api.md +100 -0
  67. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +482 -0
  68. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +1267 -0
  69. package/skills/creator-analysis/references/api-tags/demo-api.md +365 -0
  70. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +2012 -0
  71. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +1428 -0
  72. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +694 -0
  73. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +694 -0
  74. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +1059 -0
  75. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +3314 -0
  76. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +935 -0
  77. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  78. package/skills/creator-analysis/references/api-tags/health-check.md +40 -0
  79. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +57 -0
  80. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +1224 -0
  81. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +1147 -0
  82. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +1123 -0
  83. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +45 -0
  84. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +846 -0
  85. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +551 -0
  86. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +687 -0
  87. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +1105 -0
  88. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +112 -0
  89. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +721 -0
  90. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +1057 -0
  91. package/skills/creator-analysis/references/api-tags/sora2-api.md +737 -0
  92. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +136 -0
  93. package/skills/creator-analysis/references/api-tags/threads-web-api.md +472 -0
  94. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +65 -0
  95. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +253 -0
  96. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +1393 -0
  97. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +179 -0
  98. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +3264 -0
  99. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +709 -0
  100. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +366 -0
  101. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +663 -0
  102. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +2516 -0
  103. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +220 -0
  104. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +96 -0
  105. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +562 -0
  106. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +405 -0
  107. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +431 -0
  108. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +851 -0
  109. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +470 -0
  110. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +1405 -0
  111. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +534 -0
  112. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  113. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +757 -0
  114. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  115. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +308 -0
  116. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +934 -0
  117. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +717 -0
  118. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +1384 -0
  119. package/skills/creator-analysis/references/asr-orchestration.md +33 -0
  120. package/skills/creator-analysis/references/config-templates/defaults.yaml +60 -0
  121. package/skills/creator-analysis/references/contracts/creator-card-fields.md +23 -0
  122. package/skills/creator-analysis/references/contracts/work-card-fields.md +32 -0
  123. package/skills/creator-analysis/references/platform-guides/douyin.md +49 -0
  124. package/skills/creator-analysis/references/platform-guides/generic.md +46 -0
  125. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +54 -0
  126. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +28 -0
  127. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +46 -0
  128. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +49 -0
  129. package/skills/creator-analysis/references/prompt-contracts/cta.md +24 -0
  130. package/skills/creator-analysis/references/prompt-contracts/hook.md +25 -0
  131. package/skills/creator-analysis/references/prompt-contracts/insight.md +47 -0
  132. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +30 -0
  133. package/skills/creator-analysis/references/prompt-contracts/structure.md +25 -0
  134. package/skills/creator-analysis/references/prompt-contracts/style.md +27 -0
  135. package/skills/creator-analysis/references/prompt-contracts/summary.md +29 -0
  136. package/skills/creator-analysis/references/prompt-contracts/topic.md +29 -0
  137. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +325 -0
  138. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +158 -0
  139. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +41 -0
  140. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +75 -0
  141. package/skills/creator-analysis/references/workflow.md +18 -0
  142. package/skills/creator-analysis/scripts/__init__.py +0 -0
  143. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  144. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  145. package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +299 -0
  146. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  147. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +1122 -0
  148. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +260 -0
  149. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +260 -0
  150. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +5 -0
  151. package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +961 -0
  152. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  153. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +149 -0
  154. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  155. package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +636 -0
  156. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  157. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +491 -0
  158. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +553 -0
  159. package/skills/creator-analysis/scripts/author_home/schema.py +417 -0
  160. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  161. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +133 -0
  162. package/skills/creator-analysis/scripts/core/bootstrap_env.py +35 -0
  163. package/skills/creator-analysis/scripts/core/config_loader.py +418 -0
  164. package/skills/creator-analysis/scripts/core/extract_pipeline.py +173 -0
  165. package/skills/creator-analysis/scripts/core/progress_report.py +111 -0
  166. package/skills/creator-analysis/scripts/core/storage_router.py +253 -0
  167. package/skills/creator-analysis/scripts/core/tikomni_common.py +588 -0
  168. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  169. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  170. package/skills/creator-analysis/scripts/pipeline/asr/asr_pipeline.py +1189 -0
  171. package/skills/creator-analysis/scripts/pipeline/asr/poll_u2_task.py +95 -0
  172. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  173. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  174. package/skills/creator-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +224 -0
  175. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +1208 -0
  176. package/skills/creator-analysis/scripts/platform/douyin/select_low_quality_video_url.py +200 -0
  177. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  178. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +2128 -0
  179. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  180. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +106 -0
  181. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +1402 -0
  182. package/skills/meta-capability/SKILL.md +69 -0
  183. package/skills/meta-capability/agents/openai.yaml +4 -0
  184. package/skills/meta-capability/env.example +42 -0
  185. package/skills/meta-capability/references/api-capability-index.md +92 -0
  186. package/skills/meta-capability/references/api-contracts/asr-api.md +130 -0
  187. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +776 -0
  188. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +2017 -0
  189. package/skills/meta-capability/references/api-contracts/demo-api.md +717 -0
  190. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +3594 -0
  191. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +2274 -0
  192. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +1575 -0
  193. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  194. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +4118 -0
  195. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +5544 -0
  196. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +1916 -0
  197. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  198. package/skills/meta-capability/references/api-contracts/health-check.md +69 -0
  199. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +78 -0
  200. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +2256 -0
  201. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +2011 -0
  202. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +2630 -0
  203. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +44 -0
  204. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +1518 -0
  205. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +1242 -0
  206. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +1088 -0
  207. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +1949 -0
  208. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +126 -0
  209. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +1142 -0
  210. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +2025 -0
  211. package/skills/meta-capability/references/api-contracts/sora2-api.md +2266 -0
  212. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +208 -0
  213. package/skills/meta-capability/references/api-contracts/threads-web-api.md +897 -0
  214. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +134 -0
  215. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +494 -0
  216. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +5947 -0
  217. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +968 -0
  218. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  219. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +1951 -0
  220. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +742 -0
  221. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  222. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +4448 -0
  223. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +342 -0
  224. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +143 -0
  225. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +989 -0
  226. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +809 -0
  227. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  228. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +1547 -0
  229. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +798 -0
  230. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +2459 -0
  231. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  232. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  233. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  234. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  235. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +489 -0
  236. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +2636 -0
  237. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +2660 -0
  238. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +2315 -0
  239. package/skills/meta-capability/references/api-tags/asr-api.md +100 -0
  240. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +482 -0
  241. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +1267 -0
  242. package/skills/meta-capability/references/api-tags/demo-api.md +365 -0
  243. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +2012 -0
  244. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +1428 -0
  245. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +694 -0
  246. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +694 -0
  247. package/skills/meta-capability/references/api-tags/douyin-search-api.md +1059 -0
  248. package/skills/meta-capability/references/api-tags/douyin-web-api.md +3314 -0
  249. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +935 -0
  250. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  251. package/skills/meta-capability/references/api-tags/health-check.md +40 -0
  252. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +57 -0
  253. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +1224 -0
  254. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +1147 -0
  255. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +1123 -0
  256. package/skills/meta-capability/references/api-tags/ios-shortcut.md +45 -0
  257. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +846 -0
  258. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +551 -0
  259. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +687 -0
  260. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +1105 -0
  261. package/skills/meta-capability/references/api-tags/media-ingest-api.md +112 -0
  262. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +721 -0
  263. package/skills/meta-capability/references/api-tags/reddit-app-api.md +1057 -0
  264. package/skills/meta-capability/references/api-tags/sora2-api.md +737 -0
  265. package/skills/meta-capability/references/api-tags/temp-mail-api.md +136 -0
  266. package/skills/meta-capability/references/api-tags/threads-web-api.md +472 -0
  267. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +65 -0
  268. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +253 -0
  269. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +1393 -0
  270. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +179 -0
  271. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +3264 -0
  272. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +709 -0
  273. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +366 -0
  274. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +663 -0
  275. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +2516 -0
  276. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +220 -0
  277. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +96 -0
  278. package/skills/meta-capability/references/api-tags/twitter-web-api.md +562 -0
  279. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +405 -0
  280. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +431 -0
  281. package/skills/meta-capability/references/api-tags/weibo-app-api.md +851 -0
  282. package/skills/meta-capability/references/api-tags/weibo-web-api.md +470 -0
  283. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +1405 -0
  284. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +534 -0
  285. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  286. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +757 -0
  287. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  288. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +308 -0
  289. package/skills/meta-capability/references/api-tags/youtube-web-api.md +934 -0
  290. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +717 -0
  291. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +1384 -0
  292. package/skills/meta-capability/references/config-templates/defaults.yaml +18 -0
  293. package/skills/meta-capability/references/dispatch.md +27 -0
  294. package/skills/meta-capability/references/execution-guidelines.md +25 -0
  295. package/skills/meta-capability/references/implemented-route-map.md +177 -0
  296. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +75 -0
  297. package/skills/meta-capability/scripts/__init__.py +1 -0
  298. package/skills/meta-capability/scripts/call_route.py +141 -0
  299. package/skills/meta-capability/scripts/core/__init__.py +1 -0
  300. package/skills/meta-capability/scripts/core/bootstrap_env.py +32 -0
  301. package/skills/meta-capability/scripts/core/config_loader.py +204 -0
  302. package/skills/meta-capability/scripts/core/tikomni_common.py +443 -0
  303. package/skills/meta-capability/scripts/test_auth.py +98 -0
  304. package/skills/single-work-analysis/SKILL.md +62 -0
  305. package/skills/single-work-analysis/agents/openai.yaml +4 -0
  306. package/skills/single-work-analysis/env.example +36 -0
  307. package/skills/single-work-analysis/references/api-capability-index.md +92 -0
  308. package/skills/single-work-analysis/references/api-contracts/asr-api.md +130 -0
  309. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +776 -0
  310. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +2017 -0
  311. package/skills/single-work-analysis/references/api-contracts/demo-api.md +717 -0
  312. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +3594 -0
  313. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +2274 -0
  314. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +1575 -0
  315. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  316. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +4118 -0
  317. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +5544 -0
  318. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +1916 -0
  319. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  320. package/skills/single-work-analysis/references/api-contracts/health-check.md +69 -0
  321. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +78 -0
  322. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +2256 -0
  323. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +2011 -0
  324. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +2630 -0
  325. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +44 -0
  326. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +1518 -0
  327. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +1242 -0
  328. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +1088 -0
  329. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +1949 -0
  330. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +126 -0
  331. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +1142 -0
  332. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +2025 -0
  333. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +2266 -0
  334. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +208 -0
  335. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +897 -0
  336. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +134 -0
  337. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +494 -0
  338. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +5947 -0
  339. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +968 -0
  340. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  341. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +1951 -0
  342. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +742 -0
  343. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  344. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +4448 -0
  345. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +342 -0
  346. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +143 -0
  347. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +989 -0
  348. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +809 -0
  349. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  350. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +1547 -0
  351. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +798 -0
  352. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +2459 -0
  353. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  354. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  355. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  356. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  357. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +489 -0
  358. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +2636 -0
  359. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +2660 -0
  360. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +2315 -0
  361. package/skills/single-work-analysis/references/api-tags/asr-api.md +100 -0
  362. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +482 -0
  363. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +1267 -0
  364. package/skills/single-work-analysis/references/api-tags/demo-api.md +365 -0
  365. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +2012 -0
  366. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +1428 -0
  367. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +694 -0
  368. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +694 -0
  369. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +1059 -0
  370. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +3314 -0
  371. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +935 -0
  372. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  373. package/skills/single-work-analysis/references/api-tags/health-check.md +40 -0
  374. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +57 -0
  375. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +1224 -0
  376. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +1147 -0
  377. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +1123 -0
  378. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +45 -0
  379. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +846 -0
  380. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +551 -0
  381. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +687 -0
  382. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +1105 -0
  383. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +112 -0
  384. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +721 -0
  385. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +1057 -0
  386. package/skills/single-work-analysis/references/api-tags/sora2-api.md +737 -0
  387. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +136 -0
  388. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +472 -0
  389. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +65 -0
  390. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +253 -0
  391. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +1393 -0
  392. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +179 -0
  393. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +3264 -0
  394. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +709 -0
  395. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +366 -0
  396. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +663 -0
  397. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +2516 -0
  398. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +220 -0
  399. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +96 -0
  400. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +562 -0
  401. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +405 -0
  402. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +431 -0
  403. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +851 -0
  404. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +470 -0
  405. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +1405 -0
  406. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +534 -0
  407. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  408. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +757 -0
  409. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  410. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +308 -0
  411. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +934 -0
  412. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +717 -0
  413. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +1384 -0
  414. package/skills/single-work-analysis/references/asr-and-fallback.md +20 -0
  415. package/skills/single-work-analysis/references/config-templates/defaults.yaml +58 -0
  416. package/skills/single-work-analysis/references/contracts/work-card-fields.md +41 -0
  417. package/skills/single-work-analysis/references/platform-guides/douyin.md +47 -0
  418. package/skills/single-work-analysis/references/platform-guides/generic.md +43 -0
  419. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +54 -0
  420. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +28 -0
  421. package/skills/single-work-analysis/references/prompt-contracts/cta.md +24 -0
  422. package/skills/single-work-analysis/references/prompt-contracts/hook.md +25 -0
  423. package/skills/single-work-analysis/references/prompt-contracts/insight.md +47 -0
  424. package/skills/single-work-analysis/references/prompt-contracts/structure.md +25 -0
  425. package/skills/single-work-analysis/references/prompt-contracts/style.md +27 -0
  426. package/skills/single-work-analysis/references/prompt-contracts/summary.md +29 -0
  427. package/skills/single-work-analysis/references/prompt-contracts/topic.md +29 -0
  428. package/skills/single-work-analysis/references/schemas/work-card.schema.json +39 -0
  429. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +75 -0
  430. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  431. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  432. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +133 -0
  433. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +35 -0
  434. package/skills/single-work-analysis/scripts/core/config_loader.py +418 -0
  435. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +173 -0
  436. package/skills/single-work-analysis/scripts/core/progress_report.py +111 -0
  437. package/skills/single-work-analysis/scripts/core/storage_router.py +253 -0
  438. package/skills/single-work-analysis/scripts/core/tikomni_common.py +588 -0
  439. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  440. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  441. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +1189 -0
  442. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +95 -0
  443. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  444. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  445. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +224 -0
  446. package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +1233 -0
  447. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +200 -0
  448. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  449. package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +2156 -0
  450. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  451. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +1402 -0
@@ -0,0 +1,1122 @@
1
+ #!/usr/bin/env python3
2
+ """Support helpers for author_home v2 standardized input / aggregate stats / validation."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import json
7
+ import math
8
+ import re
9
+ from collections import Counter
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Sequence, Tuple
13
+
14
+ import jsonschema
15
+
16
+ INPUT_SCHEMA_PATH = Path(__file__).resolve().parents[2] / "references" / "schemas" / "author-analysis-input-v1.schema.json"
17
+ OUTPUT_SCHEMA_PATH = Path(__file__).resolve().parents[2] / "references" / "schemas" / "author-analysis-v2.schema.json"
18
+ PROMPT_CONTRACT_PATH = Path(__file__).resolve().parents[2] / "references" / "prompt-contracts" / "author-analysis-v2.md"
19
+
20
+ LOW_HIGH_MID = {"low", "mid", "high"}
21
+ RELATIONSHIP_DISTANCE = {"near", "mid", "far"}
22
+ AUTHOR_TYPES = {
23
+ "ai_content_growth", "benchmark_deconstruction", "workflow_automation", "tool_education", "business_cognition",
24
+ "ip_growth", "industry_commentary", "case_breakdown", "efficiency_hacking", "creator_education",
25
+ }
26
+ PRIMARY_ROLES = {"coach", "operator", "researcher", "builder", "companion", "commentator", "teacher", "practitioner", "evangelist", "critic"}
27
+ TRUST_SOURCES = {"results", "experience", "case_studies", "systematized_method", "demonstration", "strong_judgment", "consistency", "authority_signal", "community_signal"}
28
+ PROBLEM_DEFINITION_STYLES = {"cognition_problem", "execution_problem", "model_problem", "stage_mismatch", "positioning_problem", "traffic_problem", "conversion_problem", "offer_problem", "capability_problem"}
29
+ REASONING_MODES = {"benchmark_reasoning", "concept_deconstruction", "contrast_reasoning", "case_induction", "result_backtracking", "anti_common_sense", "framework_building", "workflow_packaging", "data_validation"}
30
+ HOOK_TYPES_ENUM = {"result_hook", "curiosity_hook", "shortcut_hook", "pain_point_hook", "comparison_hook", "proof_hook"}
31
+ STRUCTURE_TYPES = {"hook_demo_result", "benchmark_then_clone", "problem_solution_cta", "proof_then_pitch"}
32
+ CTA_TYPES = {"comment_cta", "private_message_cta", "follow_cta", "collect_cta", "share_cta", "lead_magnet_cta", "weak_cta", "no_cta"}
33
+ CONTENT_FORMS = {"talking_head", "voiceover", "screen_recording", "slideshow", "mixed_edit", "live_clip", "interview_clip"}
34
+ STYLE_MARKERS_ENUM = {"rhetorical_question", "imperative_tone", "strong_assertion", "self_mockery", "quote_like_phrase", "emotional_wording"}
35
+ FUNNEL_ROLES = {"acquire_attention", "build_trust", "educate", "qualify", "convert", "upsell", "retain", "occupy_mindshare"}
36
+ LIKELY_PRODUCTS = {"workflow_templates", "prompt_pack", "training_camp", "community", "consulting", "done_for_you_service", "course", "membership", "software_tool", "report"}
37
+ BUSINESS_MODEL_SIGNALS = {"high_ip_dependency", "strong_toolization", "template_scalability", "service_extension", "low_marginal_distribution", "high_touch_delivery", "community_leverage", "content_led_growth"}
38
+ CORE_COGNITIVE_ACTIONS = {"benchmark_deconstruction", "workflow_packaging", "concept_deconstruction", "contrast_reasoning", "case_induction", "framework_extraction", "result_compression", "anti_common_sense_reframing"}
39
+ TOPIC_SOURCES = {"benchmark_authors", "viral_cases", "audience_questions", "workflow_demos", "industry_pain_points", "personal_experience", "tool_updates", "client_cases", "market_signals"}
40
+ TOPIC_GOALS = {"acquire_attention", "build_trust", "show_capability", "drive_conversion", "occupy_mindshare", "differentiate_positioning", "nurture_audience"}
41
+ WINNING_CONTENT_STRUCTURES = set(STRUCTURE_TYPES)
42
+ MEMORABILITY_SOURCES = {"strong_keywords", "sharp_judgment", "identity_projection", "repeatable_phrases", "result_imagery", "emotional_contrast", "unexpected_framing"}
43
+ EMOTION_PATTERNS = {"calm_assertive", "sharp_direct", "playful_mocking", "high_energy_excitement", "pragmatic_cold", "empathetic_supportive", "provocative_challenge"}
44
+ TRAFFIC_DRIVERS = {"benchmark_target", "result_promise", "shortcut_imagination", "curiosity_gap", "identity_desire", "fear_of_missing_out", "proof_signal", "controversy_edge"}
45
+ STOPWORDS = {
46
+ "我们", "你们", "他们", "这个", "那个", "一个", "一种", "已经", "如果", "因为", "所以", "然后", "就是", "没有", "自己",
47
+ "可以", "还是", "需要", "以及", "并且", "真的", "今天", "现在", "内容", "作者", "账号", "作品", "视频", "老师", "方法",
48
+ "the", "and", "for", "that", "with", "from", "this", "you", "your", "are", "was", "were", "have", "has", "had", "into",
49
+ }
50
+ SCHEMA_CACHE: Dict[Path, Dict[str, Any]] = {}
51
+
52
+ REQUIRED_V2_FIELDS = {
53
+ "author_positioning": ["one_liner", "author_type", "primary_role", "target_audience", "core_problem_solved", "core_value_proposition", "evidence"],
54
+ "trust_model": ["primary_trust_source", "secondary_trust_sources", "trust_building_mechanisms", "trust_risks", "relationship_posture", "evidence"],
55
+ "cognitive_engine": ["worldview", "value_priority", "problem_definition_style", "reasoning_modes", "knowledge_sources", "judgment_style", "core_cognitive_actions", "evidence"],
56
+ "expression_hooks": ["language_style", "hook_keywords", "hook_types", "argument_patterns", "emotion_patterns", "memorability_sources", "evidence"],
57
+ "content_mechanism": ["topic_sources", "topic_goals", "winning_content_structures", "repeatable_series", "traffic_drivers", "content_flywheel", "cross_platform_variation", "dominant_themes", "theme_clusters", "evidence"],
58
+ "commercial_bridge": ["content_role_in_funnel", "likely_products", "conversion_path", "content_product_fit", "business_model_signals", "commercial_risks", "evidence"],
59
+ "core_tensions": ["tensions", "most_important_tension", "evidence"],
60
+ "evidence_pack": ["sample_size", "sample_confidence", "representative_works", "top_keywords", "observed_hooks", "observed_ctas", "observed_structures", "notes"],
61
+ "clone_guidance": ["copyable_elements", "non_copyable_elements", "borrowable_flavor", "danger_zones", "if_only_learn_one_thing"],
62
+ }
63
+
64
+
65
+ def _safe_text(value: Any) -> str:
66
+ if value is None:
67
+ return ""
68
+ if isinstance(value, str):
69
+ return value.strip()
70
+ return str(value).strip()
71
+
72
+
73
+ def _safe_int(value: Any, default: int = 0) -> int:
74
+ try:
75
+ if value is None:
76
+ return default
77
+ if isinstance(value, bool):
78
+ return int(value)
79
+ if isinstance(value, (int, float)):
80
+ return int(value)
81
+ text = _safe_text(value).replace(",", "")
82
+ return int(float(text)) if text else default
83
+ except Exception:
84
+ return default
85
+
86
+
87
+ def _safe_float(value: Any, default: float = 0.0) -> float:
88
+ try:
89
+ if value is None:
90
+ return default
91
+ if isinstance(value, bool):
92
+ return float(int(value))
93
+ if isinstance(value, (int, float)):
94
+ return float(value)
95
+ text = _safe_text(value).replace(",", "")
96
+ return float(text) if text else default
97
+ except Exception:
98
+ return default
99
+
100
+
101
+ def _clamp(value: float, low: float, high: float) -> float:
102
+ return max(low, min(high, value))
103
+
104
+
105
+ def load_json_schema(path: Path) -> Dict[str, Any]:
106
+ try:
107
+ return json.loads(path.read_text(encoding="utf-8"))
108
+ except Exception:
109
+ return {}
110
+
111
+
112
+ def prompt_contract_text() -> str:
113
+ try:
114
+ return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
115
+ except Exception:
116
+ return ""
117
+
118
+
119
+ def _load_schema(path: Path) -> Dict[str, Any]:
120
+ cached = SCHEMA_CACHE.get(path)
121
+ if cached is not None:
122
+ return cached
123
+ schema = load_json_schema(path)
124
+ SCHEMA_CACHE[path] = schema
125
+ return schema
126
+
127
+
128
+ def _schema_errors(payload: Any, path: Path) -> List[Dict[str, str]]:
129
+ schema = _load_schema(path)
130
+ if not schema:
131
+ return []
132
+ try:
133
+ validator = jsonschema.Draft202012Validator(schema)
134
+ rows: List[Dict[str, str]] = []
135
+ for error in sorted(validator.iter_errors(payload), key=lambda item: list(item.absolute_path)):
136
+ field = ".".join(str(part) for part in error.absolute_path) or "$"
137
+ rows.append({"field": field, "reason": f"schema:{error.message}"})
138
+ return rows
139
+ except Exception as error:
140
+ return [{"field": "$", "reason": f"schema_runtime:{type(error).__name__}:{error}"}]
141
+
142
+
143
+ def _dedupe_keep_order(values: Sequence[str]) -> List[str]:
144
+ result: List[str] = []
145
+ seen = set()
146
+ for value in values:
147
+ clean = _safe_text(value)
148
+ if not clean or clean in seen:
149
+ continue
150
+ seen.add(clean)
151
+ result.append(clean)
152
+ return result
153
+
154
+
155
+ def _dedupe_error_list(errors: Sequence[Dict[str, str]]) -> List[Dict[str, str]]:
156
+ result: List[Dict[str, str]] = []
157
+ seen = set()
158
+ for item in errors:
159
+ field = _safe_text(item.get("field"))
160
+ reason = _safe_text(item.get("reason"))
161
+ key = (field, reason)
162
+ if key in seen:
163
+ continue
164
+ seen.add(key)
165
+ result.append({"field": field, "reason": reason})
166
+ return result
167
+
168
+
169
+ def _parse_datetime(value: Any) -> Optional[datetime]:
170
+ if value is None:
171
+ return None
172
+ if isinstance(value, datetime):
173
+ return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
174
+ num = _safe_int(value, default=0)
175
+ if num > 0:
176
+ if num > 1_000_000_000_000:
177
+ num //= 1000
178
+ try:
179
+ return datetime.fromtimestamp(num, tz=timezone.utc)
180
+ except Exception:
181
+ return None
182
+ text = _safe_text(value)
183
+ if not text:
184
+ return None
185
+ for candidate in (text, text.replace("Z", "+00:00")):
186
+ try:
187
+ parsed = datetime.fromisoformat(candidate)
188
+ return parsed if parsed.tzinfo else parsed.replace(tzinfo=timezone.utc)
189
+ except Exception:
190
+ continue
191
+ for fmt in ("%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y/%m/%d %H:%M:%S", "%Y/%m/%d"):
192
+ try:
193
+ return datetime.strptime(text, fmt).replace(tzinfo=timezone.utc)
194
+ except Exception:
195
+ continue
196
+ return None
197
+
198
+
199
+ def _publish_days_ago(value: Any) -> Optional[int]:
200
+ parsed = _parse_datetime(value)
201
+ if parsed is None:
202
+ return None
203
+ return max(int((datetime.now(timezone.utc) - parsed).total_seconds() // 86400), 0)
204
+
205
+
206
+ def _first_sentence(text: str) -> str:
207
+ if not text:
208
+ return ""
209
+ units = [part.strip() for part in re.split(r"[。!?!?\n]+", text) if part.strip()]
210
+ return units[0] if units else text[:80]
211
+
212
+
213
+ def _unique_tokens(text: str) -> List[str]:
214
+ if not text:
215
+ return []
216
+ lowered = text.lower()
217
+ tokens: List[str] = []
218
+ for token in re.findall(r"[a-z0-9_]{3,24}", lowered):
219
+ if token not in STOPWORDS:
220
+ tokens.append(token)
221
+ for block in re.findall(r"[\u4e00-\u9fff]{2,8}", text):
222
+ if block not in STOPWORDS:
223
+ tokens.append(block)
224
+ return _dedupe_keep_order(tokens)
225
+
226
+
227
+ def _top_counter(counter: Counter, *, limit: int = 10) -> List[Dict[str, Any]]:
228
+ total = sum(counter.values())
229
+ rows: List[Dict[str, Any]] = []
230
+ for key, count in counter.most_common(limit):
231
+ rows.append({"value": key, "count": int(count), "ratio": round((count / total), 4) if total else 0.0})
232
+ return rows
233
+
234
+
235
+ def _distribution_from_values(values: Sequence[str], *, limit: int = 10) -> Dict[str, Any]:
236
+ counter = Counter(_safe_text(value) for value in values if _safe_text(value))
237
+ return {"total": int(sum(counter.values())), "items": _top_counter(counter, limit=limit)}
238
+
239
+
240
+ def _merged_text(work: Dict[str, Any]) -> str:
241
+ parts = [
242
+ _safe_text(work.get("title")),
243
+ _safe_text(work.get("caption_raw") or work.get("desc")),
244
+ _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw")),
245
+ ]
246
+ return "\n".join(part for part in parts if part)
247
+
248
+
249
+ def _performance_metrics(work: Dict[str, Any]) -> Tuple[int, int, int, int, int]:
250
+ metrics = work.get("metrics") if isinstance(work.get("metrics"), dict) else {}
251
+ return (
252
+ _safe_int(work.get("digg_count"), _safe_int(metrics.get("like"), 0)),
253
+ _safe_int(work.get("comment_count"), _safe_int(metrics.get("comment"), 0)),
254
+ _safe_int(work.get("collect_count"), _safe_int(metrics.get("collect"), 0)),
255
+ _safe_int(work.get("share_count"), _safe_int(metrics.get("share"), 0)),
256
+ _safe_int(work.get("play_count"), _safe_int(metrics.get("play"), 0)),
257
+ )
258
+
259
+
260
+ def _score_signals(texts: Sequence[Tuple[str, float]], mapping: Sequence[Tuple[str, Sequence[str]]], *, default: str) -> str:
261
+ scores = {label: 0.0 for label, _ in mapping}
262
+ for text, weight in texts:
263
+ lowered = _safe_text(text).lower()
264
+ if not lowered:
265
+ continue
266
+ for label, tokens in mapping:
267
+ hit_count = sum(1 for token in tokens if token and token.lower() in lowered)
268
+ if hit_count:
269
+ scores[label] += weight * hit_count
270
+ ranked = sorted(scores.items(), key=lambda item: (-item[1], item[0]))
271
+ return ranked[0][0] if ranked and ranked[0][1] > 0 else default
272
+
273
+
274
+ def _pick_hook_type(text: str, existing: Any = None) -> str:
275
+ clean = _safe_text(existing)
276
+ if clean in HOOK_TYPES_ENUM:
277
+ return clean
278
+ first = _first_sentence(text)
279
+ return _score_signals(
280
+ [(first, 2.0), (text[:180], 1.0)],
281
+ [
282
+ ("result_hook", ["结果", "涨粉", "成交", "翻倍", "跑通", "案例结果"]),
283
+ ("curiosity_hook", ["为什么", "怎么", "?", "?", "真相", "你知道吗"]),
284
+ ("shortcut_hook", ["一键", "直接", "立刻", "马上", "不用", "三步", "复制"]),
285
+ ("pain_point_hook", ["不会", "卡住", "焦虑", "没流量", "做不出来", "误区"]),
286
+ ("comparison_hook", ["对比", "vs", "还是", "比", "A还是B"]),
287
+ ("proof_hook", ["案例", "证明", "数据", "实测", "截图", "后台"]),
288
+ ],
289
+ default="curiosity_hook" if first else "result_hook",
290
+ )
291
+
292
+
293
+ def _pick_structure_type(text: str, existing: Any = None) -> str:
294
+ clean = _safe_text(existing)
295
+ if clean in STRUCTURE_TYPES:
296
+ return clean
297
+ return _score_signals(
298
+ [(text, 1.0)],
299
+ [
300
+ ("benchmark_then_clone", ["对标", "拆解", "复刻", "照着做", "临摹"]),
301
+ ("problem_solution_cta", ["问题", "解决", "评论", "私信", "领取", "回复"]),
302
+ ("proof_then_pitch", ["案例", "证明", "结果", "报名", "咨询", "私信"]),
303
+ ("hook_demo_result", ["演示", "实操", "结果", "前后对比", "跑一遍"]),
304
+ ],
305
+ default="hook_demo_result",
306
+ )
307
+
308
+
309
+ def _pick_cta_type(text: str, existing: Any = None) -> str:
310
+ clean = _safe_text(existing)
311
+ if clean in CTA_TYPES:
312
+ return clean
313
+ tail = "\n".join([line.strip() for line in _safe_text(text).splitlines()[-3:] if line.strip()])
314
+ return _score_signals(
315
+ [(tail, 2.0), (text, 0.8)],
316
+ [
317
+ ("comment_cta", ["评论", "留言", "扣1", "回复区"]),
318
+ ("private_message_cta", ["私信", "加我", "vx", "微信", "主页联系"]),
319
+ ("lead_magnet_cta", ["领取", "模板", "资料", "清单", "关键词"]),
320
+ ("collect_cta", ["收藏", "存下", "保存"]),
321
+ ("share_cta", ["转发", "分享给", "发给"]),
322
+ ("follow_cta", ["关注", "下期见"]),
323
+ ],
324
+ default="no_cta",
325
+ )
326
+
327
+
328
+ def _style_markers(text: str, existing: Any = None) -> List[str]:
329
+ markers: List[str] = []
330
+ if isinstance(existing, list):
331
+ markers.extend([_safe_text(item) for item in existing if _safe_text(item) in STYLE_MARKERS_ENUM])
332
+ lowered = _safe_text(text).lower()
333
+ mapping = [
334
+ ("rhetorical_question", ["为什么", "怎么", "?", "?"]),
335
+ ("imperative_tone", ["一定", "必须", "直接", "马上", "立刻"]),
336
+ ("strong_assertion", ["就是", "本质上", "根本", "一定要"]),
337
+ ("self_mockery", ["我自己也踩过", "我之前也傻", "我也翻车", "我也被打脸"]),
338
+ ("quote_like_phrase", ["‘", "’", "“", "”", "所谓", "一句话说"]),
339
+ ("emotional_wording", ["焦虑", "崩溃", "爽", "绝了", "离谱", "炸裂"]),
340
+ ]
341
+ for label, tokens in mapping:
342
+ if any(token.lower() in lowered for token in tokens):
343
+ markers.append(label)
344
+ return _dedupe_keep_order(markers)[:6]
345
+
346
+
347
+ def _pick_content_form(work: Dict[str, Any]) -> str:
348
+ existing = _safe_text(work.get("content_form"))
349
+ if existing in CONTENT_FORMS:
350
+ return existing
351
+ work_modality = _safe_text(work.get("work_modality")).lower()
352
+ raw_text = _merged_text(work)
353
+ if work_modality == "text":
354
+ return "slideshow"
355
+ if "screen" in raw_text.lower() or "录屏" in raw_text:
356
+ return "screen_recording"
357
+ if "采访" in raw_text or "对谈" in raw_text:
358
+ return "interview_clip"
359
+ if "直播" in raw_text:
360
+ return "live_clip"
361
+ return "talking_head" if work_modality == "video" else "voiceover"
362
+
363
+
364
+ def _normalize_work(profile: Dict[str, Any], work: Dict[str, Any]) -> Dict[str, Any]:
365
+ digg, comment, collect, share, play = _performance_metrics(work)
366
+ publish_time = work.get("publish_time") or work.get("create_time") or work.get("create_time_sec")
367
+ publish_days_ago = _publish_days_ago(publish_time)
368
+ performance_score = round(0.15 * digg + 0.20 * comment + 0.35 * collect + 0.30 * share, 4)
369
+ norm_divisor = math.log((publish_days_ago or 0) + 2)
370
+ performance_score_norm = round((performance_score / norm_divisor) if norm_divisor > 0 else performance_score, 4)
371
+ title = _safe_text(work.get("title"))
372
+ caption_raw = _safe_text(work.get("caption_raw") or work.get("desc"))
373
+ primary_text = _safe_text(work.get("primary_text") or work.get("asr_clean") or work.get("asr_raw") or caption_raw)
374
+ primary_text_source_raw = _safe_text(work.get("primary_text_source"))
375
+ primary_text_source = (
376
+ primary_text_source_raw
377
+ if primary_text_source_raw in {"asr_clean", "caption_raw"}
378
+ else ("asr_clean" if _safe_text(work.get("work_modality")) == "video" else "caption_raw")
379
+ )
380
+ work_modality = _safe_text(work.get("work_modality")) or ("video" if _safe_text(work.get("video_download_url") or work.get("video_down_url") or work.get("asr_raw")) else "text")
381
+ merged = "\n".join(part for part in [title, caption_raw, primary_text] if part)
382
+ return {
383
+ "platform_work_id": _safe_text(work.get("platform_work_id")),
384
+ "title": title,
385
+ "caption_raw": caption_raw,
386
+ "work_modality": work_modality,
387
+ "primary_text": primary_text,
388
+ "primary_text_source": primary_text_source,
389
+ "published_date": _safe_text(work.get("published_date")) or "",
390
+ "publish_time": publish_time,
391
+ "publish_days_ago": publish_days_ago,
392
+ "duration_ms": _safe_int(work.get("duration_ms"), 0),
393
+ "digg_count": digg,
394
+ "comment_count": comment,
395
+ "collect_count": collect,
396
+ "share_count": share,
397
+ "play_count": play,
398
+ "content_form": _pick_content_form(work),
399
+ "tags": list(work.get("tags") or []) if isinstance(work.get("tags"), list) else [],
400
+ "author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
401
+ "author_name": _safe_text(profile.get("nickname")) or "作者",
402
+ "performance_score": performance_score,
403
+ "performance_score_norm": performance_score_norm,
404
+ "bucket": "",
405
+ "hook_type": _pick_hook_type(merged, work.get("hook_type") or work.get("hook")),
406
+ "structure_type": _pick_structure_type(merged, work.get("structure_type") or work.get("content_structure")),
407
+ "cta_type": _pick_cta_type(merged, work.get("cta_type") or work.get("cta")),
408
+ "style_markers": _style_markers(merged, work.get("style_markers") or work.get("style_tags")),
409
+ "analysis_eligibility": _safe_text(work.get("analysis_eligibility")) or "eligible",
410
+ "analysis_exclusion_reason": _safe_text(work.get("analysis_exclusion_reason")),
411
+ "analysis_artifact_status": work.get("analysis_artifact_status"),
412
+ "recent_30d_score_rank": None,
413
+ }
414
+
415
+
416
+ def _assign_recent_30d_ranks(items: List[Dict[str, Any]]) -> None:
417
+ recent = [item for item in items if _safe_int(item.get("publish_days_ago"), 999999) <= 30]
418
+ ranked = sorted(recent, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
419
+ for idx, item in enumerate(ranked):
420
+ item["recent_30d_score_rank"] = idx + 1
421
+ recent_ids = {_safe_text(item.get("platform_work_id")) for item in ranked}
422
+ for item in items:
423
+ if _safe_text(item.get("platform_work_id")) not in recent_ids:
424
+ item["recent_30d_score_rank"] = None
425
+
426
+
427
+ def _assign_buckets(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
428
+ total = len(items)
429
+ ranked = sorted(items, key=lambda item: (-_safe_float(item.get("performance_score_norm"), 0.0), _safe_text(item.get("platform_work_id"))))
430
+ for idx, item in enumerate(ranked):
431
+ percentile = (idx + 1) / total if total else 1.0
432
+ if total < 20:
433
+ bucket = "Top" if idx < 4 else ("Strong" if idx < 8 else ("Mid" if idx < max(10, total - 2) else "Bottom"))
434
+ elif percentile <= 0.20:
435
+ bucket = "Top"
436
+ elif percentile <= 0.50:
437
+ bucket = "Strong"
438
+ elif percentile <= 0.85:
439
+ bucket = "Mid"
440
+ else:
441
+ bucket = "Bottom"
442
+ item["bucket"] = bucket
443
+ item["all_time_score_rank"] = idx + 1
444
+ _assign_recent_30d_ranks(ranked)
445
+ return ranked
446
+
447
+
448
+ def _scaled_bucket_quota(sample_size: int) -> Dict[str, int]:
449
+ if sample_size <= 0:
450
+ return {"Top": 0, "Strong": 0, "Mid": 0, "Bottom": 0}
451
+ base = {"Top": 18, "Strong": 18, "Mid": 14, "Bottom": 10}
452
+ raw = {key: sample_size * (value / 60.0) for key, value in base.items()}
453
+ quota = {key: int(math.floor(value)) for key, value in raw.items()}
454
+ quota["Bottom"] = max(quota.get("Bottom", 0), 1)
455
+ deficit = sample_size - sum(quota.values())
456
+ order = sorted(raw.items(), key=lambda item: raw[item[0]] - quota[item[0]], reverse=True)
457
+ idx = 0
458
+ while deficit > 0 and order:
459
+ key = order[idx % len(order)][0]
460
+ quota[key] += 1
461
+ deficit -= 1
462
+ idx += 1
463
+ while sum(quota.values()) > sample_size:
464
+ for key in ("Mid", "Strong", "Top", "Bottom"):
465
+ min_keep = 1 if key == "Bottom" else 0
466
+ if quota[key] > min_keep and sum(quota.values()) > sample_size:
467
+ quota[key] -= 1
468
+ if quota["Top"] + quota["Strong"] < math.ceil(sample_size * 0.55):
469
+ needed = math.ceil(sample_size * 0.55) - (quota["Top"] + quota["Strong"])
470
+ for _ in range(needed):
471
+ if quota["Mid"] > 0:
472
+ quota["Mid"] -= 1
473
+ quota["Top"] += 1
474
+ elif quota["Bottom"] > 1:
475
+ quota["Bottom"] -= 1
476
+ quota["Strong"] += 1
477
+ return quota
478
+
479
+
480
+ def _pick_sample_size(total: int) -> int:
481
+ if total <= 0:
482
+ return 0
483
+ return min(max(int(round(total * 0.30)), 40), 80, total)
484
+
485
+
486
+ def _value_variants(item: Dict[str, Any], field: str) -> List[str]:
487
+ value = item.get(field)
488
+ if isinstance(value, list):
489
+ return [_safe_text(v) for v in value if _safe_text(v)]
490
+ clean = _safe_text(value)
491
+ return [clean] if clean else []
492
+
493
+
494
+ def _pick_diverse_items(pool: List[Dict[str, Any]], *, selected_ids: set, limits: Sequence[Tuple[str, int]], cap: int) -> List[Dict[str, Any]]:
495
+ picked: List[Dict[str, Any]] = []
496
+ covered: Dict[str, set] = {field: set() for field, _ in limits}
497
+ local_ids: set = set()
498
+ for field, minimum in limits:
499
+ if minimum <= 0:
500
+ continue
501
+ for item in pool:
502
+ if len(picked) >= cap:
503
+ return picked
504
+ work_id = _safe_text(item.get("platform_work_id"))
505
+ if not work_id or work_id in selected_ids or work_id in local_ids:
506
+ continue
507
+ candidates = [value for value in _value_variants(item, field) if value not in covered[field]]
508
+ if not candidates:
509
+ continue
510
+ picked.append(item)
511
+ local_ids.add(work_id)
512
+ for sync_field, _ in limits:
513
+ covered[sync_field].update(_value_variants(item, sync_field))
514
+ if len(covered[field]) >= minimum:
515
+ break
516
+ return picked
517
+
518
+
519
+ def _sample_standard_works(items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
520
+ total = len(items)
521
+ if total <= 0:
522
+ return []
523
+ sample_size = _pick_sample_size(total)
524
+ quota = _scaled_bucket_quota(sample_size)
525
+ bucket_groups: Dict[str, List[Dict[str, Any]]] = {"Top": [], "Strong": [], "Mid": [], "Bottom": []}
526
+ for item in items:
527
+ bucket_groups.setdefault(_safe_text(item.get("bucket")) or "Mid", []).append(item)
528
+ selected: List[Dict[str, Any]] = []
529
+ selected_ids: set = set()
530
+
531
+ top_pool = bucket_groups.get("Top", [])
532
+ for item in _pick_diverse_items(top_pool, selected_ids=selected_ids, limits=(("content_form", 3), ("hook_type", 2), ("structure_type", 2)), cap=quota.get("Top", 0)):
533
+ selected.append(item)
534
+ selected_ids.add(_safe_text(item.get("platform_work_id")))
535
+
536
+ for bucket in ("Top", "Strong", "Mid", "Bottom"):
537
+ pool = bucket_groups.get(bucket, [])
538
+ for item in pool:
539
+ if len([row for row in selected if _safe_text(row.get("bucket")) == bucket]) >= quota.get(bucket, 0):
540
+ break
541
+ work_id = _safe_text(item.get("platform_work_id"))
542
+ if work_id in selected_ids:
543
+ continue
544
+ selected.append(item)
545
+ selected_ids.add(work_id)
546
+ if len(selected) < sample_size:
547
+ for item in items:
548
+ work_id = _safe_text(item.get("platform_work_id"))
549
+ if work_id in selected_ids:
550
+ continue
551
+ selected.append(item)
552
+ selected_ids.add(work_id)
553
+ if len(selected) >= sample_size:
554
+ break
555
+ return selected[:sample_size]
556
+
557
+
558
+ def _keyword_distribution_from_texts(texts: Sequence[str], *, limit: int = 20) -> Dict[str, Any]:
559
+ counter: Counter = Counter()
560
+ for text in texts:
561
+ counter.update(_unique_tokens(text))
562
+ return {"items": _top_counter(counter, limit=limit), "counting_mode": "work_occurrence"}
563
+
564
+
565
+ def _field_keyword_distribution(works: List[Dict[str, Any]], field: str, *, limit: int = 20) -> Dict[str, Any]:
566
+ return _keyword_distribution_from_texts([_safe_text(work.get(field)) for work in works], limit=limit)
567
+
568
+
569
+ def _publish_time_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
570
+ weekday = Counter()
571
+ hour = Counter()
572
+ unavailable = 0
573
+ for work in works:
574
+ parsed = _parse_datetime(work.get("publish_time"))
575
+ if parsed is None:
576
+ unavailable += 1
577
+ continue
578
+ weekday[str(parsed.weekday())] += 1
579
+ hour[str(parsed.hour)] += 1
580
+ return {"weekday": _top_counter(weekday, limit=7), "hour": _top_counter(hour, limit=24), "unavailable_count": unavailable}
581
+
582
+
583
+ def _duration_distribution(works: List[Dict[str, Any]]) -> Dict[str, Any]:
584
+ durations = [_safe_int(work.get("duration_ms"), 0) for work in works if _safe_int(work.get("duration_ms"), 0) > 0]
585
+ if not durations:
586
+ return {"available": False, "reason": "duration_unavailable"}
587
+ counter = Counter()
588
+ for duration in durations:
589
+ counter["short" if duration < 30000 else ("medium" if duration < 120000 else "long")] += 1
590
+ return {"available": True, "items": _top_counter(counter, limit=3)}
591
+
592
+
593
+ def _performance_distribution(items: List[Dict[str, Any]]) -> Dict[str, Any]:
594
+ scores = [_safe_float(item.get("performance_score_norm"), 0.0) for item in items]
595
+ if not scores:
596
+ return {"available": False, "reason": "no_scores"}
597
+ ordered = sorted(scores, reverse=True)
598
+ def _pct(position: float) -> float:
599
+ idx = min(max(int(math.floor((len(ordered) - 1) * position)), 0), len(ordered) - 1)
600
+ return round(ordered[idx], 4)
601
+ return {"available": True, "max": round(max(ordered), 4), "min": round(min(ordered), 4), "p20": _pct(0.20), "p50": _pct(0.50), "p85": _pct(0.85)}
602
+
603
+
604
+ def _engagement_pattern(items: List[Dict[str, Any]]) -> Dict[str, Any]:
605
+ if not items:
606
+ return {"count": 0, "avg_digg_count": 0, "avg_comment_count": 0, "avg_collect_count": 0, "avg_share_count": 0, "avg_play_count": 0}
607
+ total = {"digg_count": 0, "comment_count": 0, "collect_count": 0, "share_count": 0, "play_count": 0}
608
+ for item in items:
609
+ for key in total:
610
+ total[key] += _safe_int(item.get(key), 0)
611
+ count = len(items)
612
+ return {
613
+ "count": count,
614
+ "avg_digg_count": round(total["digg_count"] / count, 2),
615
+ "avg_comment_count": round(total["comment_count"] / count, 2),
616
+ "avg_collect_count": round(total["collect_count"] / count, 2),
617
+ "avg_share_count": round(total["share_count"] / count, 2),
618
+ "avg_play_count": round(total["play_count"] / count, 2),
619
+ }
620
+
621
+
622
+ def _compare_bucket_groups(items: List[Dict[str, Any]]) -> Dict[str, Any]:
623
+ groups = {name: [item for item in items if item.get("bucket") == name] for name in ("Top", "Mid", "Bottom")}
624
+ result: Dict[str, Any] = {}
625
+ for name, works in groups.items():
626
+ result[name.lower()] = {
627
+ "title_keywords": _field_keyword_distribution(works, "title", limit=8).get("items", []),
628
+ "caption_keywords": _field_keyword_distribution(works, "caption_raw", limit=8).get("items", []),
629
+ "primary_text_keywords": _field_keyword_distribution(works, "primary_text", limit=8).get("items", []),
630
+ "hook_types": _distribution_from_values([_safe_text(work.get("hook_type")) for work in works], limit=6).get("items", []),
631
+ "structure_types": _distribution_from_values([_safe_text(work.get("structure_type")) for work in works], limit=6).get("items", []),
632
+ "cta_types": _distribution_from_values([_safe_text(work.get("cta_type")) for work in works], limit=6).get("items", []),
633
+ "content_forms": _distribution_from_values([_safe_text(work.get("content_form")) for work in works], limit=6).get("items", []),
634
+ "engagement_pattern": _engagement_pattern(works),
635
+ }
636
+ return result
637
+
638
+
639
+ def build_author_analysis_input_v1(*, profile: Dict[str, Any], works: List[Dict[str, Any]], platform: str) -> Tuple[Dict[str, Any], List[Dict[str, str]]]:
640
+ normalized = [_normalize_work(profile, work) for work in works if isinstance(work, dict)]
641
+ eligible = [item for item in normalized if _safe_text(item.get("analysis_eligibility")) == "eligible"]
642
+ excluded_count = len(normalized) - len(eligible)
643
+ ranked = _assign_buckets(eligible)
644
+ sampled = _sample_standard_works(ranked)
645
+ aggregate_stats = {
646
+ "total_works": len(ranked),
647
+ "excluded_works_count": excluded_count,
648
+ "sampled_works_count": len(sampled),
649
+ "sampling_ratio": round((len(sampled) / len(ranked)), 4) if ranked else 0.0,
650
+ "sampling_mode": "standard",
651
+ "analysis_window": "mixed",
652
+ "global_title_keyword_distribution": _field_keyword_distribution(ranked, "title"),
653
+ "global_caption_keyword_distribution": _field_keyword_distribution(ranked, "caption_raw"),
654
+ "global_primary_text_keyword_distribution": _field_keyword_distribution(ranked, "primary_text"),
655
+ "global_hook_type_distribution": _distribution_from_values([_safe_text(item.get("hook_type")) for item in ranked]),
656
+ "global_structure_type_distribution": _distribution_from_values([_safe_text(item.get("structure_type")) for item in ranked]),
657
+ "global_cta_type_distribution": _distribution_from_values([_safe_text(item.get("cta_type")) for item in ranked]),
658
+ "global_content_form_distribution": _distribution_from_values([_safe_text(item.get("content_form")) for item in ranked]),
659
+ "global_work_modality_distribution": _distribution_from_values([_safe_text(item.get("work_modality")) for item in ranked]),
660
+ "global_performance_distribution": _performance_distribution(ranked),
661
+ "global_publish_time_distribution": _publish_time_distribution(ranked),
662
+ "global_duration_distribution": _duration_distribution(ranked),
663
+ "global_bucket_distribution": _distribution_from_values([_safe_text(item.get("bucket")) for item in ranked], limit=4),
664
+ "global_top_vs_mid_vs_bottom_deltas": _compare_bucket_groups(ranked),
665
+ }
666
+ payload = {
667
+ "author_profile": {
668
+ "platform": _safe_text(profile.get("platform")) or platform,
669
+ "platform_author_id": _safe_text(profile.get("author_platform_id") or profile.get("platform_author_id")),
670
+ "nickname": _safe_text(profile.get("nickname")),
671
+ "author_handle": _safe_text(profile.get("author_handle")),
672
+ "signature": _safe_text(profile.get("signature")),
673
+ "fans_count": _safe_int(profile.get("fans_count"), 0),
674
+ "liked_count": _safe_int(profile.get("liked_count"), 0),
675
+ "collected_count": _safe_int(profile.get("collected_count"), 0),
676
+ "works_count": _safe_int(profile.get("works_count"), len(ranked)),
677
+ "verified": bool(profile.get("verified", False)),
678
+ "ip_location": _safe_text(profile.get("ip_location")),
679
+ },
680
+ "sampled_works": sampled,
681
+ "aggregate_stats": aggregate_stats,
682
+ "platform_context": {
683
+ "platform": platform,
684
+ "content_kind": "author_home",
685
+ "primary_work_modality": ranked[0].get("work_modality") if ranked else None,
686
+ },
687
+ "analysis_metadata": {
688
+ "input_object_name": "author_analysis_input_v1",
689
+ "prompt_contract_path": str(PROMPT_CONTRACT_PATH),
690
+ "input_schema_path": str(INPUT_SCHEMA_PATH),
691
+ "output_schema_path": str(OUTPUT_SCHEMA_PATH),
692
+ "analysis_mode": "standard",
693
+ "total_works": len(normalized),
694
+ "eligible_works_count": len(ranked),
695
+ "excluded_works_count": excluded_count,
696
+ "sampled_works_count": len(sampled),
697
+ },
698
+ }
699
+ return payload, validate_author_analysis_input_v1(payload)
700
+
701
+
702
+ def _require_non_empty_string(errors: List[Dict[str, str]], field: str, value: Any) -> None:
703
+ if not _safe_text(value):
704
+ errors.append({"field": field, "reason": "empty_string"})
705
+
706
+
707
+ def _require_dict(errors: List[Dict[str, str]], field: str, value: Any) -> Dict[str, Any]:
708
+ if not isinstance(value, dict):
709
+ errors.append({"field": field, "reason": "type_error:dict"})
710
+ return {}
711
+ return value
712
+
713
+
714
+ def _require_list(errors: List[Dict[str, str]], field: str, value: Any) -> List[Any]:
715
+ if not isinstance(value, list):
716
+ errors.append({"field": field, "reason": "type_error:list"})
717
+ return []
718
+ return value
719
+
720
+
721
+ def _validate_distribution_object(errors: List[Dict[str, str]], field: str, value: Any) -> None:
722
+ block = _require_dict(errors, field, value)
723
+ if not block:
724
+ return
725
+ items = _require_list(errors, f"{field}.items", block.get("items"))
726
+ for index, item in enumerate(items[:20]):
727
+ row = _require_dict(errors, f"{field}.items.{index}", item)
728
+ if row:
729
+ _require_non_empty_string(errors, f"{field}.items.{index}.value", row.get("value"))
730
+ if "count" not in row:
731
+ errors.append({"field": f"{field}.items.{index}.count", "reason": "missing"})
732
+
733
+
734
+ def validate_author_analysis_input_v1(payload: Dict[str, Any]) -> List[Dict[str, str]]:
735
+ errors: List[Dict[str, str]] = []
736
+ errors.extend(_schema_errors(payload, INPUT_SCHEMA_PATH))
737
+ author_profile = _require_dict(errors, "author_profile", payload.get("author_profile"))
738
+ aggregate = _require_dict(errors, "aggregate_stats", payload.get("aggregate_stats"))
739
+ platform_context = _require_dict(errors, "platform_context", payload.get("platform_context"))
740
+ analysis_metadata = _require_dict(errors, "analysis_metadata", payload.get("analysis_metadata"))
741
+ sampled_works = _require_list(errors, "sampled_works", payload.get("sampled_works"))
742
+
743
+ _require_non_empty_string(errors, "author_profile.platform", author_profile.get("platform"))
744
+ _require_non_empty_string(errors, "author_profile.platform_author_id", author_profile.get("platform_author_id"))
745
+ _require_non_empty_string(errors, "author_profile.nickname", author_profile.get("nickname"))
746
+ _require_non_empty_string(errors, "platform_context.platform", platform_context.get("platform"))
747
+ _require_non_empty_string(errors, "platform_context.content_kind", platform_context.get("content_kind"))
748
+ _require_non_empty_string(errors, "analysis_metadata.input_object_name", analysis_metadata.get("input_object_name"))
749
+ _require_non_empty_string(errors, "analysis_metadata.analysis_mode", analysis_metadata.get("analysis_mode"))
750
+
751
+ for key in [
752
+ "global_title_keyword_distribution",
753
+ "global_caption_keyword_distribution",
754
+ "global_primary_text_keyword_distribution",
755
+ "global_hook_type_distribution",
756
+ "global_structure_type_distribution",
757
+ "global_cta_type_distribution",
758
+ "global_content_form_distribution",
759
+ "global_work_modality_distribution",
760
+ "global_performance_distribution",
761
+ "global_publish_time_distribution",
762
+ "global_bucket_distribution",
763
+ "global_top_vs_mid_vs_bottom_deltas",
764
+ ]:
765
+ if key not in aggregate:
766
+ errors.append({"field": f"aggregate_stats.{key}", "reason": "missing"})
767
+
768
+ for field in [
769
+ "aggregate_stats.global_title_keyword_distribution",
770
+ "aggregate_stats.global_caption_keyword_distribution",
771
+ "aggregate_stats.global_primary_text_keyword_distribution",
772
+ "aggregate_stats.global_hook_type_distribution",
773
+ "aggregate_stats.global_structure_type_distribution",
774
+ "aggregate_stats.global_cta_type_distribution",
775
+ "aggregate_stats.global_content_form_distribution",
776
+ "aggregate_stats.global_work_modality_distribution",
777
+ "aggregate_stats.global_bucket_distribution",
778
+ ]:
779
+ _validate_distribution_object(errors, field, payload.get(field.split(".")[0], {}).get(field.split(".")[1]) if field.startswith("aggregate_stats") else None)
780
+
781
+ total_works = _safe_int(aggregate.get("total_works"), 0)
782
+ if total_works > 0 and not sampled_works:
783
+ errors.append({"field": "sampled_works", "reason": "empty_collection"})
784
+ if sampled_works and _safe_int(aggregate.get("sampled_works_count"), -1) != len(sampled_works):
785
+ errors.append({"field": "aggregate_stats.sampled_works_count", "reason": "count_mismatch"})
786
+
787
+ for index, item in enumerate(sampled_works[:120]):
788
+ row = _require_dict(errors, f"sampled_works.{index}", item)
789
+ if not row:
790
+ continue
791
+ _require_non_empty_string(errors, f"sampled_works.{index}.platform_work_id", row.get("platform_work_id"))
792
+ if not any(_safe_text(row.get(key)) for key in ("title", "caption_raw", "primary_text")):
793
+ errors.append({"field": f"sampled_works.{index}", "reason": "all_text_fields_empty"})
794
+ if _safe_text(row.get("work_modality")) not in {"video", "text"}:
795
+ errors.append({"field": f"sampled_works.{index}.work_modality", "reason": "enum_required"})
796
+ if _safe_text(row.get("primary_text_source")) not in {"asr_clean", "caption_raw"}:
797
+ errors.append({"field": f"sampled_works.{index}.primary_text_source", "reason": "enum_required"})
798
+ if _safe_text(row.get("hook_type")) not in HOOK_TYPES_ENUM:
799
+ errors.append({"field": f"sampled_works.{index}.hook_type", "reason": "enum_required"})
800
+ if _safe_text(row.get("structure_type")) not in STRUCTURE_TYPES:
801
+ errors.append({"field": f"sampled_works.{index}.structure_type", "reason": "enum_required"})
802
+ if _safe_text(row.get("cta_type")) not in CTA_TYPES:
803
+ errors.append({"field": f"sampled_works.{index}.cta_type", "reason": "enum_required"})
804
+ if _safe_text(row.get("content_form")) not in CONTENT_FORMS:
805
+ errors.append({"field": f"sampled_works.{index}.content_form", "reason": "enum_required"})
806
+ markers = _require_list(errors, f"sampled_works.{index}.style_markers", row.get("style_markers"))
807
+ for marker in markers:
808
+ if _safe_text(marker) not in STYLE_MARKERS_ENUM:
809
+ errors.append({"field": f"sampled_works.{index}.style_markers", "reason": f"enum_required:{marker}"})
810
+ return _dedupe_error_list(errors)
811
+
812
+
813
+ def _fallback_sample_confidence(sample_size: int) -> str:
814
+ return "low" if sample_size < 5 else ("mid" if sample_size < 15 else "high")
815
+
816
+
817
+ def build_fallback_author_analysis_v2(payload: Dict[str, Any]) -> Dict[str, Any]:
818
+ author = payload.get("author_profile") if isinstance(payload.get("author_profile"), dict) else {}
819
+ aggregate = payload.get("aggregate_stats") if isinstance(payload.get("aggregate_stats"), dict) else {}
820
+ sampled = payload.get("sampled_works") if isinstance(payload.get("sampled_works"), list) else []
821
+ nickname = _safe_text(author.get("nickname")) or "该作者"
822
+ top_keywords = [item.get("value") for item in ((aggregate.get("global_primary_text_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:5]
823
+ theme_items = [item.get("value") for item in ((aggregate.get("global_caption_keyword_distribution") or {}).get("items") or []) if isinstance(item, dict)][:6]
824
+ hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
825
+ structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
826
+ cta_items = [item.get("value") for item in ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance(item, dict)][:3]
827
+ sample_size = len(sampled)
828
+ confidence = _fallback_sample_confidence(sample_size)
829
+ representative = [{"platform_work_id": item.get("platform_work_id"), "title": item.get("title") or item.get("caption_raw") or item.get("primary_text"), "bucket": item.get("bucket")} for item in sampled[:5] if isinstance(item, dict)]
830
+ dominant_themes = theme_items[:3]
831
+ theme_clusters = []
832
+ if dominant_themes:
833
+ theme_clusters = [{"name": theme, "keywords": [theme]} for theme in dominant_themes]
834
+ return {
835
+ "author_positioning": {
836
+ "one_liner": f"{nickname} 是一个围绕{dominant_themes[0] if dominant_themes else '内容增长'}持续输出的创作者,主要用高密度结构化表达帮助目标受众缩短试错路径。",
837
+ "author_type": "creator_education",
838
+ "primary_role": "teacher",
839
+ "secondary_roles": ["operator"],
840
+ "target_audience": "想提升内容与增长效率的创作者/操盘手",
841
+ "core_problem_solved": "把复杂方法压缩成可快速模仿的内容动作",
842
+ "core_value_proposition": "用短平快的机制化表达降低学习和执行门槛",
843
+ "evidence": [f"top_keywords={top_keywords}", f"dominant_themes={dominant_themes}"],
844
+ },
845
+ "trust_model": {
846
+ "primary_trust_source": "systematized_method",
847
+ "secondary_trust_sources": ["demonstration", "consistency"],
848
+ "trust_building_mechanisms": ["重复输出同类方法", "把观点包装成步骤/模板", "用案例或结果句强化可信度"],
849
+ "trust_risks": ["样本主要来自单平台表达,真实性与转化深度证据有限"],
850
+ "relationship_posture": {"distance": "mid", "authority_level": "mid", "affinity_level": "mid"},
851
+ "evidence": [f"sample_size={sample_size}", f"structures={structure_items}"],
852
+ },
853
+ "cognitive_engine": {
854
+ "worldview": "优先追求可执行、可复用、可放大的内容动作。",
855
+ "value_priority": ["效率", "结果感", "方法压缩"],
856
+ "problem_definition_style": "execution_problem",
857
+ "reasoning_modes": ["workflow_packaging", "case_induction"],
858
+ "knowledge_sources": ["作品标题/文案/字幕中的步骤化表达"],
859
+ "judgment_style": {"certainty_level": "mid", "notes": "基于单平台主页样本初判"},
860
+ "core_cognitive_actions": ["workflow_packaging", "result_compression"],
861
+ "evidence": [f"keywords={top_keywords}", f"sample_size={sample_size}"],
862
+ },
863
+ "expression_hooks": {
864
+ "language_style": {"oral_level": "mid", "assertiveness": "high", "emotional_intensity": "mid", "professional_density": "mid"},
865
+ "hook_keywords": top_keywords[:5],
866
+ "hook_types": [item for item in hook_items if item in HOOK_TYPES_ENUM] or ["result_hook"],
867
+ "argument_patterns": ["先给结果/判断,再给步骤或解释", "用反差或对比压缩注意力获取"],
868
+ "emotion_patterns": ["sharp_direct"],
869
+ "memorability_sources": ["strong_keywords", "repeatable_phrases"],
870
+ "evidence": [f"hook_types={hook_items}", f"keywords={top_keywords}"],
871
+ },
872
+ "content_mechanism": {
873
+ "topic_sources": ["audience_questions", "workflow_demos"],
874
+ "topic_goals": ["acquire_attention", "build_trust", "show_capability"],
875
+ "winning_content_structures": [item for item in structure_items if item in WINNING_CONTENT_STRUCTURES] or ["hook_demo_result"],
876
+ "repeatable_series": ["高频重复的母题/模板仍在持续输出"],
877
+ "traffic_drivers": ["result_promise", "shortcut_imagination"],
878
+ "content_flywheel": "用结果型标题拉点击,用结构化拆解留住注意力,再用 CTA 做后续动作承接。",
879
+ "cross_platform_variation": "当前不可判断:缺少跨平台样本。",
880
+ "dominant_themes": dominant_themes,
881
+ "theme_clusters": theme_clusters,
882
+ "evidence": [f"themes={dominant_themes}", f"structures={structure_items}", f"cta_types={cta_items}"],
883
+ },
884
+ "commercial_bridge": {
885
+ "content_role_in_funnel": ["acquire_attention", "build_trust", "qualify"],
886
+ "likely_products": ["course", "consulting"] if any(item in {"private_message_cta", "lead_magnet_cta"} for item in cta_items) else [],
887
+ "conversion_path": "通过内容建立方法可信度,再用评论/私信/领取资料承接线索。",
888
+ "content_product_fit": "mid",
889
+ "business_model_signals": ["content_led_growth", "high_ip_dependency"],
890
+ "commercial_risks": ["明确成交链路和产品证据不足,不能强断言单一模式。"],
891
+ "evidence": [f"cta_types={cta_items}"],
892
+ },
893
+ "core_tensions": {
894
+ "tensions": [
895
+ {"label": "效率 vs 深度", "surface": "表达短促、结果先行", "deep_reason": "优先追求短视频环境下的注意力效率", "strategic_implication": "适合拉新,但要防止过度压缩导致信任深度不足"},
896
+ {"label": "模板复用 vs 差异化", "surface": "高频复用相似母题", "deep_reason": "成熟模板能稳定产出", "strategic_implication": "需要持续刷新案例与视角,避免模板疲劳"},
897
+ ],
898
+ "most_important_tension": "高效率表达很强,但商业深度证据未必同步充足。",
899
+ "evidence": [f"sample_size={sample_size}", f"themes={dominant_themes}"],
900
+ },
901
+ "evidence_pack": {
902
+ "sample_size": sample_size,
903
+ "sample_confidence": confidence,
904
+ "representative_works": representative,
905
+ "top_keywords": top_keywords,
906
+ "observed_hooks": hook_items,
907
+ "observed_ctas": cta_items,
908
+ "observed_structures": structure_items,
909
+ "notes": ["fallback_generated", "基于标准化输入的最小保底分析"],
910
+ },
911
+ "clone_guidance": {
912
+ "copyable_elements": ["结果先行的标题结构", "步骤化表达", "稳定重复的母模板"],
913
+ "non_copyable_elements": ["作者长期经验背书", "既有受众信任资产"],
914
+ "borrowable_flavor": ["短句高密度", "判断明确", "先给结果再给解释"],
915
+ "danger_zones": ["不要照抄口头禅和具体句子", "证据不足时别硬抄商业承诺"],
916
+ "if_only_learn_one_thing": "学他把复杂方法压缩成高可执行内容动作的能力,而不是抄表面语气。",
917
+ },
918
+ }
919
+
920
+
921
+ def _enum_error(errors: List[Dict[str, str]], field: str, value: Any, allowed: set) -> None:
922
+ if value in (None, "", []):
923
+ return
924
+ if isinstance(value, list):
925
+ for item in value:
926
+ if item not in allowed:
927
+ errors.append({"field": field, "reason": f"enum_preferred:{item}"})
928
+ elif value not in allowed:
929
+ errors.append({"field": field, "reason": f"enum_preferred:{value}"})
930
+
931
+
932
+ def validate_author_analysis_v2(payload: Dict[str, Any], *, analysis_input: Optional[Dict[str, Any]] = None) -> List[Dict[str, str]]:
933
+ errors: List[Dict[str, str]] = []
934
+ errors.extend(_schema_errors(payload, OUTPUT_SCHEMA_PATH))
935
+ for module, fields in REQUIRED_V2_FIELDS.items():
936
+ block = payload.get(module)
937
+ if not isinstance(block, dict):
938
+ errors.append({"field": module, "reason": "missing_or_type_error:dict"})
939
+ continue
940
+ for field in fields:
941
+ if field not in block:
942
+ errors.append({"field": f"{module}.{field}", "reason": "missing"})
943
+
944
+ author_positioning = _require_dict(errors, "author_positioning", payload.get("author_positioning"))
945
+ trust_model = _require_dict(errors, "trust_model", payload.get("trust_model"))
946
+ cognitive_engine = _require_dict(errors, "cognitive_engine", payload.get("cognitive_engine"))
947
+ expression_hooks = _require_dict(errors, "expression_hooks", payload.get("expression_hooks"))
948
+ content_mechanism = _require_dict(errors, "content_mechanism", payload.get("content_mechanism"))
949
+ commercial_bridge = _require_dict(errors, "commercial_bridge", payload.get("commercial_bridge"))
950
+ core_tensions = _require_dict(errors, "core_tensions", payload.get("core_tensions"))
951
+ evidence_pack = _require_dict(errors, "evidence_pack", payload.get("evidence_pack"))
952
+ clone_guidance = _require_dict(errors, "clone_guidance", payload.get("clone_guidance"))
953
+
954
+ for field, value in [
955
+ ("author_positioning.one_liner", author_positioning.get("one_liner")),
956
+ ("author_positioning.target_audience", author_positioning.get("target_audience")),
957
+ ("author_positioning.core_problem_solved", author_positioning.get("core_problem_solved")),
958
+ ("author_positioning.core_value_proposition", author_positioning.get("core_value_proposition")),
959
+ ("commercial_bridge.conversion_path", commercial_bridge.get("conversion_path")),
960
+ ("content_mechanism.content_flywheel", content_mechanism.get("content_flywheel")),
961
+ ("content_mechanism.cross_platform_variation", content_mechanism.get("cross_platform_variation")),
962
+ ("core_tensions.most_important_tension", core_tensions.get("most_important_tension")),
963
+ ("clone_guidance.if_only_learn_one_thing", clone_guidance.get("if_only_learn_one_thing")),
964
+ ]:
965
+ _require_non_empty_string(errors, field, value)
966
+
967
+ posture = _require_dict(errors, "trust_model.relationship_posture", trust_model.get("relationship_posture"))
968
+ lang_style = _require_dict(errors, "expression_hooks.language_style", expression_hooks.get("language_style"))
969
+ judgment_style = _require_dict(errors, "cognitive_engine.judgment_style", cognitive_engine.get("judgment_style"))
970
+
971
+ for list_field, value in [
972
+ ("trust_model.secondary_trust_sources", trust_model.get("secondary_trust_sources")),
973
+ ("trust_model.trust_building_mechanisms", trust_model.get("trust_building_mechanisms")),
974
+ ("trust_model.trust_risks", trust_model.get("trust_risks")),
975
+ ("cognitive_engine.value_priority", cognitive_engine.get("value_priority")),
976
+ ("cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes")),
977
+ ("cognitive_engine.knowledge_sources", cognitive_engine.get("knowledge_sources")),
978
+ ("cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions")),
979
+ ("expression_hooks.hook_keywords", expression_hooks.get("hook_keywords")),
980
+ ("expression_hooks.hook_types", expression_hooks.get("hook_types")),
981
+ ("expression_hooks.argument_patterns", expression_hooks.get("argument_patterns")),
982
+ ("expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns")),
983
+ ("expression_hooks.memorability_sources", expression_hooks.get("memorability_sources")),
984
+ ("content_mechanism.topic_sources", content_mechanism.get("topic_sources")),
985
+ ("content_mechanism.topic_goals", content_mechanism.get("topic_goals")),
986
+ ("content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures")),
987
+ ("content_mechanism.repeatable_series", content_mechanism.get("repeatable_series")),
988
+ ("content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers")),
989
+ ("content_mechanism.dominant_themes", content_mechanism.get("dominant_themes")),
990
+ ("content_mechanism.theme_clusters", content_mechanism.get("theme_clusters")),
991
+ ("commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel")),
992
+ ("commercial_bridge.likely_products", commercial_bridge.get("likely_products")),
993
+ ("commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals")),
994
+ ("commercial_bridge.commercial_risks", commercial_bridge.get("commercial_risks")),
995
+ ("evidence_pack.representative_works", evidence_pack.get("representative_works")),
996
+ ("evidence_pack.top_keywords", evidence_pack.get("top_keywords")),
997
+ ("evidence_pack.observed_hooks", evidence_pack.get("observed_hooks")),
998
+ ("evidence_pack.observed_ctas", evidence_pack.get("observed_ctas")),
999
+ ("evidence_pack.observed_structures", evidence_pack.get("observed_structures")),
1000
+ ("evidence_pack.notes", evidence_pack.get("notes")),
1001
+ ("clone_guidance.copyable_elements", clone_guidance.get("copyable_elements")),
1002
+ ("clone_guidance.non_copyable_elements", clone_guidance.get("non_copyable_elements")),
1003
+ ("clone_guidance.borrowable_flavor", clone_guidance.get("borrowable_flavor")),
1004
+ ("clone_guidance.danger_zones", clone_guidance.get("danger_zones")),
1005
+ ]:
1006
+ _require_list(errors, list_field, value)
1007
+
1008
+ _enum_error(errors, "author_positioning.author_type", author_positioning.get("author_type"), AUTHOR_TYPES)
1009
+ _enum_error(errors, "author_positioning.primary_role", author_positioning.get("primary_role"), PRIMARY_ROLES)
1010
+ _enum_error(errors, "trust_model.primary_trust_source", trust_model.get("primary_trust_source"), TRUST_SOURCES)
1011
+ _enum_error(errors, "cognitive_engine.problem_definition_style", cognitive_engine.get("problem_definition_style"), PROBLEM_DEFINITION_STYLES)
1012
+ _enum_error(errors, "cognitive_engine.reasoning_modes", cognitive_engine.get("reasoning_modes"), REASONING_MODES)
1013
+ _enum_error(errors, "cognitive_engine.core_cognitive_actions", cognitive_engine.get("core_cognitive_actions"), CORE_COGNITIVE_ACTIONS)
1014
+ _enum_error(errors, "expression_hooks.hook_types", expression_hooks.get("hook_types"), HOOK_TYPES_ENUM)
1015
+ _enum_error(errors, "expression_hooks.emotion_patterns", expression_hooks.get("emotion_patterns"), EMOTION_PATTERNS)
1016
+ _enum_error(errors, "expression_hooks.memorability_sources", expression_hooks.get("memorability_sources"), MEMORABILITY_SOURCES)
1017
+ _enum_error(errors, "content_mechanism.topic_sources", content_mechanism.get("topic_sources"), TOPIC_SOURCES)
1018
+ _enum_error(errors, "content_mechanism.topic_goals", content_mechanism.get("topic_goals"), TOPIC_GOALS)
1019
+ _enum_error(errors, "content_mechanism.winning_content_structures", content_mechanism.get("winning_content_structures"), WINNING_CONTENT_STRUCTURES)
1020
+ _enum_error(errors, "content_mechanism.traffic_drivers", content_mechanism.get("traffic_drivers"), TRAFFIC_DRIVERS)
1021
+ _enum_error(errors, "commercial_bridge.content_role_in_funnel", commercial_bridge.get("content_role_in_funnel"), FUNNEL_ROLES)
1022
+ _enum_error(errors, "commercial_bridge.likely_products", commercial_bridge.get("likely_products"), LIKELY_PRODUCTS)
1023
+ _enum_error(errors, "commercial_bridge.business_model_signals", commercial_bridge.get("business_model_signals"), BUSINESS_MODEL_SIGNALS)
1024
+ _enum_error(errors, "trust_model.relationship_posture.distance", posture.get("distance"), RELATIONSHIP_DISTANCE)
1025
+ _enum_error(errors, "trust_model.relationship_posture.authority_level", posture.get("authority_level"), LOW_HIGH_MID)
1026
+ _enum_error(errors, "trust_model.relationship_posture.affinity_level", posture.get("affinity_level"), LOW_HIGH_MID)
1027
+ _enum_error(errors, "cognitive_engine.judgment_style.certainty_level", judgment_style.get("certainty_level"), LOW_HIGH_MID)
1028
+ _enum_error(errors, "expression_hooks.language_style.oral_level", lang_style.get("oral_level"), LOW_HIGH_MID)
1029
+ _enum_error(errors, "expression_hooks.language_style.assertiveness", lang_style.get("assertiveness"), LOW_HIGH_MID)
1030
+ _enum_error(errors, "expression_hooks.language_style.emotional_intensity", lang_style.get("emotional_intensity"), LOW_HIGH_MID)
1031
+ _enum_error(errors, "expression_hooks.language_style.professional_density", lang_style.get("professional_density"), LOW_HIGH_MID)
1032
+ _enum_error(errors, "commercial_bridge.content_product_fit", commercial_bridge.get("content_product_fit"), LOW_HIGH_MID)
1033
+ _enum_error(errors, "evidence_pack.sample_confidence", evidence_pack.get("sample_confidence"), LOW_HIGH_MID)
1034
+ _enum_error(errors, "evidence_pack.observed_hooks", evidence_pack.get("observed_hooks"), HOOK_TYPES_ENUM)
1035
+ _enum_error(errors, "evidence_pack.observed_ctas", evidence_pack.get("observed_ctas"), CTA_TYPES)
1036
+ _enum_error(errors, "evidence_pack.observed_structures", evidence_pack.get("observed_structures"), STRUCTURE_TYPES)
1037
+
1038
+ tensions = _require_list(errors, "core_tensions.tensions", core_tensions.get("tensions"))
1039
+ if len(tensions) < 2:
1040
+ errors.append({"field": "core_tensions.tensions", "reason": "guardrail:need_at_least_2"})
1041
+ for index, tension in enumerate(tensions[:10]):
1042
+ block = _require_dict(errors, f"core_tensions.tensions.{index}", tension)
1043
+ for sub in ("label", "surface", "deep_reason", "strategic_implication"):
1044
+ _require_non_empty_string(errors, f"core_tensions.tensions.{index}.{sub}", block.get(sub))
1045
+
1046
+ representative_works = evidence_pack.get("representative_works") if isinstance(evidence_pack.get("representative_works"), list) else []
1047
+ for index, work in enumerate(representative_works[:10]):
1048
+ block = _require_dict(errors, f"evidence_pack.representative_works.{index}", work)
1049
+ _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.platform_work_id", block.get("platform_work_id"))
1050
+ _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.title", block.get("title"))
1051
+ _require_non_empty_string(errors, f"evidence_pack.representative_works.{index}.bucket", block.get("bucket"))
1052
+
1053
+ for field, value in [
1054
+ ("author_positioning.evidence", author_positioning.get("evidence")),
1055
+ ("trust_model.evidence", trust_model.get("evidence")),
1056
+ ("cognitive_engine.evidence", cognitive_engine.get("evidence")),
1057
+ ("expression_hooks.evidence", expression_hooks.get("evidence")),
1058
+ ("content_mechanism.evidence", content_mechanism.get("evidence")),
1059
+ ("commercial_bridge.evidence", commercial_bridge.get("evidence")),
1060
+ ("core_tensions.evidence", core_tensions.get("evidence")),
1061
+ ]:
1062
+ items = _require_list(errors, field, value)
1063
+ if not items:
1064
+ errors.append({"field": field, "reason": "empty_collection"})
1065
+
1066
+ sample_size = _safe_int(evidence_pack.get("sample_size"), 0)
1067
+ sample_confidence = _safe_text(evidence_pack.get("sample_confidence"))
1068
+ if sample_size < 5 and sample_confidence == "high":
1069
+ errors.append({"field": "evidence_pack.sample_confidence", "reason": "guardrail:sample_lt_5_cannot_be_high"})
1070
+
1071
+ if analysis_input is not None:
1072
+ platform_context = analysis_input.get("platform_context") if isinstance(analysis_input.get("platform_context"), dict) else {}
1073
+ if len({platform_context.get("platform")} - {None, ""}) <= 1:
1074
+ cross_platform_variation = _safe_text(content_mechanism.get("cross_platform_variation"))
1075
+ if cross_platform_variation and "不可判断" not in cross_platform_variation and "unknown" not in cross_platform_variation.lower():
1076
+ errors.append({"field": "content_mechanism.cross_platform_variation", "reason": "guardrail:single_platform_should_stay_weak"})
1077
+ aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1078
+ cta_items = ((aggregate.get("global_cta_type_distribution") or {}).get("items") or []) if isinstance((aggregate.get("global_cta_type_distribution") or {}), dict) else []
1079
+ explicit_conversion = any(isinstance(item, dict) and item.get("value") in {"private_message_cta", "lead_magnet_cta"} for item in cta_items)
1080
+ likely_products = commercial_bridge.get("likely_products") if isinstance(commercial_bridge.get("likely_products"), list) else []
1081
+ if likely_products and not explicit_conversion:
1082
+ errors.append({"field": "commercial_bridge.likely_products", "reason": "guardrail:weak_conversion_signal"})
1083
+ return _dedupe_error_list(errors)
1084
+
1085
+
1086
+ def derive_legacy_summary(author_analysis_v2: Dict[str, Any], *, analysis_input: Dict[str, Any], validation_errors: Optional[List[Dict[str, str]]] = None) -> Dict[str, Any]:
1087
+ positioning = author_analysis_v2.get("author_positioning") if isinstance(author_analysis_v2.get("author_positioning"), dict) else {}
1088
+ trust = author_analysis_v2.get("trust_model") if isinstance(author_analysis_v2.get("trust_model"), dict) else {}
1089
+ content = author_analysis_v2.get("content_mechanism") if isinstance(author_analysis_v2.get("content_mechanism"), dict) else {}
1090
+ bridge = author_analysis_v2.get("commercial_bridge") if isinstance(author_analysis_v2.get("commercial_bridge"), dict) else {}
1091
+ tensions = author_analysis_v2.get("core_tensions") if isinstance(author_analysis_v2.get("core_tensions"), dict) else {}
1092
+ clone = author_analysis_v2.get("clone_guidance") if isinstance(author_analysis_v2.get("clone_guidance"), dict) else {}
1093
+ evidence = author_analysis_v2.get("evidence_pack") if isinstance(author_analysis_v2.get("evidence_pack"), dict) else {}
1094
+ aggregate = analysis_input.get("aggregate_stats") if isinstance(analysis_input.get("aggregate_stats"), dict) else {}
1095
+
1096
+ sample_confidence = _safe_text(evidence.get("sample_confidence")) or _fallback_sample_confidence(_safe_int(evidence.get("sample_size"), 0))
1097
+ score_base = {"low": 58, "mid": 72, "high": 84}.get(sample_confidence, 60)
1098
+ if validation_errors:
1099
+ score_base -= min(len(validation_errors) * 2, 12)
1100
+ business_score = int(_clamp(score_base + (6 if (bridge.get("likely_products") or []) else -4), 40, 92))
1101
+ benchmark_gap_score = int(_clamp(100 - business_score + 8, 35, 88))
1102
+ hook_items = [item.get("value") for item in ((aggregate.get("global_hook_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1103
+ structure_items = [item.get("value") for item in ((aggregate.get("global_structure_type_distribution") or {}).get("items") or []) if isinstance(item, dict)]
1104
+ dominant_themes = content.get("dominant_themes") if isinstance(content.get("dominant_themes"), list) else []
1105
+ return {
1106
+ "author_portrait": _safe_text(positioning.get("one_liner")) or "作者画像数据不足。",
1107
+ "business_analysis": ";".join([
1108
+ _safe_text(positioning.get("core_value_proposition")),
1109
+ f"主要信任来源:{_safe_text(trust.get('primary_trust_source')) or '待确认'}",
1110
+ f"商业承接:{_safe_text(bridge.get('conversion_path')) or '当前证据不足'}",
1111
+ ]).strip(";"),
1112
+ "benchmark_analysis": ";".join([
1113
+ f"高频 hook:{', '.join(hook_items[:3]) or '待补'}",
1114
+ f"常见结构:{', '.join(structure_items[:3]) or '待补'}",
1115
+ f"主主题:{', '.join(dominant_themes[:3]) or '待补'}",
1116
+ ]).strip(";"),
1117
+ "business_score": business_score,
1118
+ "benchmark_gap_score": benchmark_gap_score,
1119
+ "style_radar": {"选题": 76, "表达": 78, "结构": 79, "节奏": 74, "人设": 73, "转化": 70, "差异化": 71, "稳定性": 79},
1120
+ "core_contradictions": [tensions.get("most_important_tension") or "张力信息不足"],
1121
+ "recommendations": [clone.get("if_only_learn_one_thing") or "优先学习其可复用的结构机制"],
1122
+ }