@tikomni/skills 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -3
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
  30. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
  33. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  34. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  36. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  37. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  38. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  39. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  40. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
  41. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  42. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
  43. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  45. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  46. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  47. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  48. package/skills/creator-analysis/SKILL.md +0 -95
  49. package/skills/creator-analysis/agents/openai.yaml +0 -4
  50. package/skills/creator-analysis/env.example +0 -36
  51. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  52. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  53. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  54. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  55. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  56. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  57. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  59. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  60. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  61. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  63. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  64. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  65. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  66. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  67. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  68. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  69. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  71. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  72. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  73. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  74. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  75. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  76. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  77. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  78. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  79. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  80. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  81. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  82. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  83. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  84. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  85. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  86. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  87. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  88. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  89. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  90. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  91. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  92. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  93. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  94. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  96. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  100. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  101. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  103. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  104. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  105. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  106. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  107. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  108. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  109. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  110. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  113. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  114. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  116. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  117. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  118. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  119. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  120. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  121. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  122. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  123. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  124. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  125. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  126. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  127. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  128. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  129. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  130. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  131. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  132. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  133. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  134. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  135. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  136. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  137. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  138. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  139. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  140. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  141. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  142. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  143. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  144. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  145. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  146. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  147. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  148. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  149. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  153. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  154. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  155. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  156. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  157. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  158. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  159. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  160. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  161. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  162. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  163. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  164. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  165. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  167. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  168. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  169. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  170. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  171. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  172. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  173. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  174. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  175. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  176. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  177. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  178. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  179. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  180. package/skills/creator-analysis/references/workflow.md +0 -23
  181. package/skills/creator-analysis/scripts/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  185. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  186. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  187. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  188. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  189. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  190. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  191. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  194. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  195. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  196. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  197. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  198. package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
  199. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  200. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  202. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  203. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  205. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  207. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  208. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  209. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  210. package/skills/meta-capability/SKILL.md +0 -69
  211. package/skills/meta-capability/agents/openai.yaml +0 -4
  212. package/skills/meta-capability/env.example +0 -42
  213. package/skills/meta-capability/references/api-capability-index.md +0 -92
  214. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  215. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  216. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  217. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  218. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  219. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  220. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  222. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  223. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  224. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  226. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  227. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  228. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  229. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  230. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  231. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  232. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  233. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  234. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  235. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  236. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  237. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  238. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  239. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  240. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  241. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  242. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  243. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  244. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  245. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  246. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  247. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  248. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  249. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  250. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  251. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  252. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  253. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  254. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  255. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  256. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  257. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  258. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  259. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  263. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  264. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  265. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  266. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  267. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  268. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  269. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  270. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  271. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  272. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  273. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  274. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  276. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  277. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  279. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  280. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  281. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  282. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  283. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  284. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  285. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  286. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  287. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  288. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  289. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  290. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  291. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  292. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  293. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  294. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  295. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  296. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  297. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  298. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  299. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  300. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  301. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  302. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  303. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  304. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  305. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  306. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  307. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  308. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  309. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  310. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  311. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  312. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  316. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  317. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  318. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  319. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  320. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  321. package/skills/meta-capability/references/dispatch.md +0 -27
  322. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  323. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  324. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  325. package/skills/meta-capability/scripts/__init__.py +0 -1
  326. package/skills/meta-capability/scripts/call_route.py +0 -141
  327. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  328. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  329. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  330. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  331. package/skills/meta-capability/scripts/test_auth.py +0 -98
  332. package/skills/single-work-analysis/SKILL.md +0 -62
  333. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  334. package/skills/single-work-analysis/env.example +0 -36
  335. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  336. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  337. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  339. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  340. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  341. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  342. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  344. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  345. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  346. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  348. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  349. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  350. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  353. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  354. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  356. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  357. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  358. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  359. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  360. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  361. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  362. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  363. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  364. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  366. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  373. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  375. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  376. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  377. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  378. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  379. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  381. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  385. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  386. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  388. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  389. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  390. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  391. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  392. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  393. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  394. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  395. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  398. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  399. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  401. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  402. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  403. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  404. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  405. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  406. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  407. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  409. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  410. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  411. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  412. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  413. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  414. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  415. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  416. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  417. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  418. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  419. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  420. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  421. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  422. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  423. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  424. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  425. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  426. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  427. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  428. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  429. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  430. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  431. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  432. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  434. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  438. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  439. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  441. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  442. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  443. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
  444. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  445. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  446. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  447. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  448. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  449. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  450. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  451. package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
  452. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  453. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  454. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  455. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
  456. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  457. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  458. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  459. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  460. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
  461. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  462. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  463. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
  464. package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
  465. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  466. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  468. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  469. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  470. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  471. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  472. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  473. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  474. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  475. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
  476. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  477. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  478. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  479. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  480. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1208 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Douyin single-video low-quality runner (APP first, WEB fallback)."""
3
-
4
- from __future__ import annotations
5
-
6
- if __package__ in {None, ""}:
7
- import sys
8
- from pathlib import Path
9
-
10
- _self = Path(__file__).resolve()
11
- for _parent in _self.parents:
12
- if (_parent / "scripts" / "core" / "bootstrap_env.py").is_file():
13
- sys.path.insert(0, str(_parent))
14
- break
15
-
16
-
17
- import argparse
18
-
19
- from scripts.core.bootstrap_env import bootstrap_for_direct_run
20
-
21
- bootstrap_for_direct_run(__file__, __package__)
22
- import hashlib
23
- import json
24
- import re
25
- from datetime import datetime
26
- from pathlib import Path
27
- from typing import Any, Dict, List, Optional
28
-
29
- from scripts.core.config_loader import config_get, load_tikomni_config, resolve_storage_paths
30
- from scripts.core.extract_pipeline import resolve_trace_error_context
31
- from scripts.core.progress_report import ProgressReporter
32
- from scripts.core.storage_router import render_output_filename, resolve_json_filename_pattern
33
- from scripts.platform.douyin.douyin_video_type_matrix import normalize_douyin_video_type
34
- from scripts.pipeline.asr.asr_pipeline import submit_u2_asr_with_retry
35
- from scripts.pipeline.asr.poll_u2_task import poll_u2_task
36
- from scripts.platform.douyin.select_low_quality_video_url import select_low_quality_video_url
37
- from scripts.core.tikomni_common import (
38
- call_json_api,
39
- deep_find_all,
40
- normalize_text,
41
- resolve_runtime,
42
- summarize_content,
43
- write_json_stdout,
44
- )
45
- from scripts.writers.write_benchmark_card import write_benchmark_card
46
-
47
- APP_ENDPOINT = "/api/u1/v1/douyin/app/v3/fetch_one_video_by_share_url"
48
- WEB_ENDPOINT = "/api/u1/v1/douyin/web/fetch_one_video_by_share_url"
49
- U2_SUBMIT_ENDPOINT = "/api/u2/v1/services/audio/asr/transcription"
50
-
51
-
52
-
53
-
54
- def _safe_slug(value: Optional[str], fallback: str = "unknown") -> str:
55
- text = normalize_text(value)
56
- if not text:
57
- return fallback
58
- lowered = text.lower()
59
- slug = re.sub(r"[^a-z0-9_-]+", "-", lowered).strip("-")
60
- return slug[:64] or fallback
61
-
62
-
63
- def _traceable_identifier(source_input: Dict[str, Optional[str]], platform_work_id: Optional[str]) -> str:
64
- if platform_work_id:
65
- return _safe_slug(platform_work_id)
66
-
67
- share_url = normalize_text(source_input.get("share_url"))
68
- if not share_url:
69
- return "missing_input"
70
-
71
- digest = hashlib.sha1(share_url.encode("utf-8")).hexdigest()[:10]
72
- return f"url-{digest}"
73
-
74
-
75
- def _build_persist_payload(
76
- *,
77
- result: Dict[str, Any],
78
- source_input: Dict[str, Optional[str]],
79
- platform_work_id: Optional[str],
80
- status: str,
81
- written_at: datetime,
82
- ) -> Dict[str, Any]:
83
- summary = {
84
- "summary": result.get("summary", ""),
85
- "insights": result.get("insights", []),
86
- "confidence": result.get("confidence"),
87
- "error_reason": result.get("error_reason"),
88
- }
89
- normalized = {
90
- "platform": result.get("platform", "douyin"),
91
- "content_kind": result.get("content_kind", "single_video"),
92
- "platform_work_id": result.get("platform_work_id"),
93
- "title": result.get("title"),
94
- "duration_ms": result.get("duration_ms"),
95
- "is_video": result.get("is_video"),
96
- "u2_task_id": result.get("u2_task_id"),
97
- "u2_task_status": result.get("u2_task_status"),
98
- "request_id": result.get("request_id"),
99
- "source": source_input,
100
- }
101
- return {
102
- "meta": {
103
- "written_at": written_at.isoformat(timespec="seconds"),
104
- "status": status,
105
- "platform": "douyin",
106
- "identifier": _traceable_identifier(source_input, platform_work_id),
107
- },
108
- "summary": summary,
109
- "normalized": normalized,
110
- "raw": result,
111
- }
112
-
113
-
114
- def _persist_output_artifact(
115
- *,
116
- result: Dict[str, Any],
117
- source_input: Dict[str, Optional[str]],
118
- platform_work_id: Optional[str],
119
- storage_config: Optional[Dict[str, Any]],
120
- persist_output: bool,
121
- ) -> Dict[str, Any]:
122
- if not persist_output:
123
- return {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
124
-
125
- try:
126
- paths = resolve_storage_paths(storage_config or {})
127
- except Exception as error:
128
- return {"enabled": True, "ok": False, "error": f"resolve_storage_paths_failed:{error}"}
129
-
130
- now = datetime.now()
131
- date_key = now.strftime("%Y%m%d")
132
- timestamp = now.strftime("%Y%m%dT%H%M%S")
133
- identifier = _traceable_identifier(source_input, platform_work_id)
134
- has_error = bool(result.get("error_reason"))
135
- status = "error" if has_error else "success"
136
-
137
- if has_error:
138
- target_dir = Path(paths.get("errors_root", "")) / date_key
139
- else:
140
- target_dir = Path(paths.get("results_root", "")) / date_key
141
-
142
- target_dir.mkdir(parents=True, exist_ok=True)
143
- file_name = render_output_filename(
144
- pattern=resolve_json_filename_pattern(storage_config),
145
- context={
146
- "prefix": status,
147
- "platform": "douyin",
148
- "card_type": "single_work_result",
149
- "author_slug": identifier,
150
- "title_slug": identifier,
151
- "identifier": identifier,
152
- "timestamp": timestamp,
153
- "date": date_key,
154
- "ext": ".json",
155
- },
156
- default_filename=f"{timestamp}-douyin-{identifier}.json",
157
- default_ext=".json",
158
- )
159
- file_path = target_dir / file_name
160
-
161
- payload = _build_persist_payload(
162
- result=result,
163
- source_input=source_input,
164
- platform_work_id=platform_work_id,
165
- status=status,
166
- written_at=now,
167
- )
168
- file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
169
-
170
- return {
171
- "enabled": True,
172
- "ok": True,
173
- "status": status,
174
- "path": str(file_path),
175
- }
176
-
177
-
178
- def _finalize_result(
179
- *,
180
- result: Dict[str, Any],
181
- source_input: Dict[str, Optional[str]],
182
- platform_work_id: Optional[str],
183
- storage_config: Optional[Dict[str, Any]],
184
- persist_output: bool,
185
- ) -> Dict[str, Any]:
186
- result["output_persist"] = _persist_output_artifact(
187
- result=result,
188
- source_input=source_input,
189
- platform_work_id=platform_work_id,
190
- storage_config=storage_config,
191
- persist_output=persist_output,
192
- )
193
- return result
194
-
195
- def _normalize_input(
196
- input_value: Optional[str],
197
- share_url: Optional[str],
198
- ) -> Dict[str, Optional[str]]:
199
- normalized_share = (share_url or "").strip() or None
200
-
201
- if input_value and not normalized_share:
202
- candidate = input_value.strip()
203
- if candidate.startswith("http://") or candidate.startswith("https://"):
204
- normalized_share = candidate
205
-
206
- return {"share_url": normalized_share}
207
-
208
-
209
- def _extract_aweme_detail(payload: Any) -> Optional[Dict[str, Any]]:
210
- if isinstance(payload, dict):
211
- direct = payload.get("aweme_detail")
212
- if isinstance(direct, dict):
213
- return direct
214
-
215
- for list_key in ("aweme_details", "aweme_list", "item_list", "items"):
216
- values = payload.get(list_key)
217
- if isinstance(values, list) and values:
218
- first = values[0]
219
- if isinstance(first, dict):
220
- return first
221
-
222
- nested_data = payload.get("data")
223
- if nested_data is not None:
224
- hit = _extract_aweme_detail(nested_data)
225
- if hit:
226
- return hit
227
-
228
- for value in payload.values():
229
- hit = _extract_aweme_detail(value)
230
- if hit:
231
- return hit
232
-
233
- elif isinstance(payload, list):
234
- for item in payload:
235
- hit = _extract_aweme_detail(item)
236
- if hit:
237
- return hit
238
-
239
- return None
240
-
241
-
242
- def _safe_int(value: Any) -> Optional[int]:
243
- if value is None:
244
- return None
245
- if isinstance(value, bool):
246
- return 1 if value else 0
247
- if isinstance(value, int):
248
- return value
249
- if isinstance(value, float):
250
- return int(value)
251
- if isinstance(value, str):
252
- text = value.strip()
253
- if text.startswith("-"):
254
- sign = -1
255
- text = text[1:]
256
- else:
257
- sign = 1
258
- if text.isdigit():
259
- return sign * int(text)
260
- return None
261
-
262
-
263
- def _normalize_duration_ms(item: Dict[str, Any]) -> Optional[int]:
264
- raw = item.get("duration")
265
- if raw is None:
266
- video = item.get("video")
267
- if isinstance(video, dict):
268
- raw = video.get("duration")
269
-
270
- value = _safe_int(raw)
271
- if value is None:
272
- return None
273
-
274
- # Douyin commonly uses ms; convert obvious second-level durations.
275
- if 0 < value < 10000:
276
- return value * 1000
277
- return value
278
-
279
-
280
- def _pick_title(item: Dict[str, Any]) -> str:
281
- for key in ("item_title", "title", "desc", "preview_title"):
282
- value = item.get(key)
283
- text = normalize_text(value)
284
- if text:
285
- return text
286
- return ""
287
-
288
-
289
- def _pick_desc(item: Dict[str, Any]) -> str:
290
- for key in ("desc", "item_title", "title", "preview_title"):
291
- value = item.get(key)
292
- text = normalize_text(value)
293
- if text:
294
- return text
295
- return ""
296
-
297
-
298
- def _extract_author(item: Dict[str, Any]) -> Dict[str, Optional[str]]:
299
- author = item.get("author")
300
- if not isinstance(author, dict):
301
- author = {}
302
-
303
- author_platform_id = normalize_text(author.get("uid")) or normalize_text(author.get("id")) or normalize_text(item.get("author_user_id"))
304
- author_handle = normalize_text(author.get("short_id")) or normalize_text(author.get("nickname"))
305
- douyin_sec_uid = normalize_text(author.get("sec_uid"))
306
- douyin_aweme_author_id = normalize_text(item.get("author_user_id")) or author_platform_id
307
-
308
- return {
309
- "author_handle": author_handle or None,
310
- "platform_author_id": author_platform_id or None,
311
- "author_platform_id": author_platform_id or None,
312
- "douyin_sec_uid": douyin_sec_uid or None,
313
- "douyin_aweme_author_id": douyin_aweme_author_id or None,
314
- "nickname": normalize_text(author.get("nickname")) or None,
315
- "signature": normalize_text(author.get("signature")) or None,
316
- }
317
-
318
-
319
- def _extract_metrics(item: Dict[str, Any]) -> Dict[str, int]:
320
- statistics = item.get("statistics")
321
- if not isinstance(statistics, dict):
322
- statistics = {}
323
-
324
- def metric(*keys: str) -> int:
325
- for key in keys:
326
- value = _safe_int(statistics.get(key))
327
- if value is not None:
328
- return value
329
- value = _safe_int(item.get(key))
330
- if value is not None:
331
- return value
332
- return 0
333
-
334
- return {
335
- "digg_count": metric("digg_count"),
336
- "comment_count": metric("comment_count"),
337
- "collect_count": metric("collect_count"),
338
- "share_count": metric("share_count", "forward_count"),
339
- "play_count": metric("play_count"),
340
- }
341
-
342
-
343
- def _extract_platform_work_id(item: Dict[str, Any]) -> Optional[str]:
344
- for key in ("aweme_id", "item_id", "id"):
345
- value = item.get(key)
346
- if value is None:
347
- continue
348
- text = normalize_text(value)
349
- if text:
350
- return text
351
-
352
- statistics = item.get("statistics")
353
- if isinstance(statistics, dict):
354
- value = statistics.get("aweme_id")
355
- text = normalize_text(value)
356
- if text:
357
- return text
358
- return None
359
-
360
-
361
- def _extract_create_time_sec(item: Dict[str, Any]) -> Optional[int]:
362
- for key in ("create_time", "aweme_create_time"):
363
- value = _safe_int(item.get(key))
364
- if value is not None and value > 0:
365
- return value
366
- return None
367
-
368
-
369
- def _pick_first_url(value: Any) -> Optional[str]:
370
- if isinstance(value, str):
371
- text = value.strip()
372
- return text or None
373
- if isinstance(value, list):
374
- for item in value:
375
- if isinstance(item, str) and item.strip():
376
- return item.strip()
377
- if isinstance(value, dict):
378
- for key in ("url_list", "url", "uri"):
379
- hit = _pick_first_url(value.get(key))
380
- if hit:
381
- return hit
382
- return None
383
-
384
-
385
- def _extract_cover_image(item: Dict[str, Any]) -> Optional[str]:
386
- # prefer video-level covers
387
- video = item.get("video") if isinstance(item.get("video"), dict) else {}
388
- for key in ("cover", "origin_cover", "dynamic_cover"):
389
- hit = _pick_first_url(video.get(key))
390
- if hit:
391
- return hit
392
-
393
- # fallback to item-level covers
394
- for key in ("cover", "origin_cover", "dynamic_cover"):
395
- hit = _pick_first_url(item.get(key))
396
- if hit:
397
- return hit
398
-
399
- return None
400
-
401
-
402
- def _clean_tag_text(value: Any) -> str:
403
- text = normalize_text(value)
404
- if not text:
405
- return ""
406
- text = text.strip().strip("#")
407
- return text
408
-
409
-
410
- def _append_tag(raw: Any, output: List[str], seen: set) -> None:
411
- tag = _clean_tag_text(raw)
412
- if not tag or tag in seen:
413
- return
414
- seen.add(tag)
415
- output.append(tag)
416
-
417
-
418
- def _extract_tags_from_container(value: Any, output: List[str], seen: set) -> None:
419
- if isinstance(value, str):
420
- text = value.strip()
421
- if not text:
422
- return
423
- if text.startswith("{") or text.startswith("["):
424
- try:
425
- parsed = json.loads(text)
426
- _extract_tags_from_container(parsed, output, seen)
427
- return
428
- except Exception:
429
- pass
430
- for part in re.split(r"[,,\s]+", text):
431
- _append_tag(part, output, seen)
432
- return
433
-
434
- if isinstance(value, list):
435
- for item in value:
436
- _extract_tags_from_container(item, output, seen)
437
- return
438
-
439
- if isinstance(value, dict):
440
- for key in ("hashtag_name", "cha_name", "name", "tag_name", "topic_name", "hashtag"):
441
- _append_tag(value.get(key), output, seen)
442
-
443
-
444
- def _extract_douyin_tags(item: Dict[str, Any]) -> List[str]:
445
- tags: List[str] = []
446
- seen: set = set()
447
-
448
- for value in deep_find_all(item, ["text_extra"]):
449
- _extract_tags_from_container(value, tags, seen)
450
-
451
- for value in deep_find_all(item, ["cha_list"]):
452
- _extract_tags_from_container(value, tags, seen)
453
-
454
- for value in deep_find_all(item, ["hashtag"]):
455
- _extract_tags_from_container(value, tags, seen)
456
-
457
- for common_flags in deep_find_all(item, ["common_flags"]):
458
- if not isinstance(common_flags, str):
459
- continue
460
- try:
461
- parsed = json.loads(common_flags)
462
- except Exception:
463
- continue
464
- _extract_tags_from_container(parsed.get("hashtag"), tags, seen)
465
-
466
- for text_field in (normalize_text(item.get("caption")), normalize_text(item.get("desc"))):
467
- if not text_field:
468
- continue
469
- for match in re.findall(r"#([^#\s]+)", text_field):
470
- _append_tag(match, tags, seen)
471
-
472
- return tags
473
-
474
-
475
- def _u1_fetch_one_video(
476
- *,
477
- base_url: str,
478
- token: str,
479
- share_url: str,
480
- app_timeout_ms: int,
481
- web_timeout_ms: int,
482
- ) -> Dict[str, Any]:
483
- app_response = call_json_api(
484
- base_url=base_url,
485
- path=APP_ENDPOINT,
486
- token=token,
487
- method="GET",
488
- timeout_ms=app_timeout_ms,
489
- params={"share_url": share_url},
490
- )
491
- app_response["_endpoint"] = APP_ENDPOINT
492
- if app_response.get("ok"):
493
- return app_response
494
-
495
- app_response["fallback_trigger_reason"] = (
496
- "primary_timeout_retry_exhausted" if app_response.get("timeout_retry_exhausted") else "primary_non_timeout_failure"
497
- )
498
- web_response = call_json_api(
499
- base_url=base_url,
500
- path=WEB_ENDPOINT,
501
- token=token,
502
- method="GET",
503
- timeout_ms=web_timeout_ms,
504
- params={"share_url": share_url},
505
- )
506
- web_response["_endpoint"] = WEB_ENDPOINT
507
- web_response["_app_failed"] = app_response
508
- web_response["fallback_trigger_reason"] = app_response.get("fallback_trigger_reason")
509
- return web_response
510
-
511
-
512
- def _trace_step(
513
- *,
514
- step: str,
515
- endpoint: Optional[str] = None,
516
- response: Optional[Dict[str, Any]] = None,
517
- extra: Optional[Dict[str, Any]] = None,
518
- ) -> Dict[str, Any]:
519
- payload: Dict[str, Any] = {"step": step}
520
- if endpoint:
521
- payload["endpoint"] = endpoint
522
- if response is not None:
523
- payload.update(
524
- {
525
- "ok": response.get("ok"),
526
- "status_code": response.get("status_code"),
527
- "request_id": response.get("request_id"),
528
- "error_reason": response.get("error_reason"),
529
- "rate_limit_wait_ms": response.get("rate_limit_wait_ms", 0),
530
- "retry_attempt": response.get("retry_attempt", 0),
531
- "fallback_trigger_reason": response.get("fallback_trigger_reason"),
532
- }
533
- )
534
- if extra:
535
- payload.update(extra)
536
- return payload
537
-
538
-
539
- def _build_missing_fields(
540
- *,
541
- title: str,
542
- desc: str,
543
- platform_work_id: Optional[str],
544
- video_down_url: Optional[str],
545
- author: Dict[str, Optional[str]],
546
- ) -> List[Dict[str, str]]:
547
- missing: List[Dict[str, str]] = []
548
-
549
- def _append(field: str) -> None:
550
- missing.append({"field": field, "reason": "missing_metadata"})
551
-
552
- if not normalize_text(title):
553
- _append("title")
554
- if not normalize_text(desc):
555
- _append("desc")
556
- if not normalize_text(platform_work_id):
557
- _append("platform_work_id")
558
- if not normalize_text(video_down_url):
559
- _append("video_down_url")
560
-
561
- author_key_map = {
562
- "author_handle": ("author_handle",),
563
- "platform_author_id": ("platform_author_id", "author_platform_id"),
564
- "douyin_sec_uid": ("douyin_sec_uid",),
565
- "douyin_aweme_author_id": ("douyin_aweme_author_id",),
566
- }
567
- for field, aliases in author_key_map.items():
568
- if not any(normalize_text(author.get(alias)) for alias in aliases):
569
- _append(field)
570
-
571
- return missing
572
-
573
-
574
- def _build_result(
575
- *,
576
- source_input: Dict[str, Optional[str]],
577
- platform_work_id: Optional[str],
578
- title: str,
579
- desc: str,
580
- duration_ms: Optional[int],
581
- video_down_url: Optional[str],
582
- author: Dict[str, Optional[str]],
583
- metrics: Dict[str, int],
584
- tags: List[str],
585
- is_video: bool,
586
- video_type_reason: str,
587
- raw_content: str,
588
- confidence: str,
589
- error_reason: Optional[str],
590
- extract_trace: List[Dict[str, Any]],
591
- fallback_trace: List[Dict[str, Any]],
592
- request_id: Optional[str],
593
- u2_task_id: Optional[str],
594
- u2_task_status: str,
595
- u2_gate_reason: str,
596
- create_time_sec: Optional[int] = None,
597
- cover_image: Optional[str] = None,
598
- asr_source: str = "fallback_none",
599
- ) -> Dict[str, Any]:
600
- summary_block = summarize_content(raw_content, source="douyin:single-video-low-quality")
601
- insights = list(summary_block.get("insights", []))
602
- insights.extend(
603
- [
604
- f"platform_work_id={platform_work_id or ''}",
605
- f"is_video={is_video}",
606
- f"video_type_reason={video_type_reason}",
607
- f"u2_gate_reason={u2_gate_reason}",
608
- ]
609
- )
610
-
611
- endpoint_list = [
612
- step.get("endpoint")
613
- for step in extract_trace
614
- if isinstance(step, dict) and isinstance(step.get("endpoint"), str)
615
- ]
616
-
617
- payload: Dict[str, Any] = {
618
- "platform": "douyin",
619
- "content_kind": "single_video",
620
- "source": source_input,
621
- "platform_work_id": platform_work_id,
622
- "title": title,
623
- "desc": desc,
624
- "duration_ms": duration_ms,
625
- "create_time_sec": create_time_sec,
626
- "cover_image": cover_image,
627
- "video_down_url": video_down_url,
628
- "author": author,
629
- "author_handle": author.get("author_handle"),
630
- "platform_author_id": author.get("platform_author_id") or author.get("author_platform_id"),
631
- "douyin_sec_uid": author.get("douyin_sec_uid"),
632
- "douyin_aweme_author_id": author.get("douyin_aweme_author_id"),
633
- "digg_count": metrics.get("digg_count", 0),
634
- "comment_count": metrics.get("comment_count", 0),
635
- "collect_count": metrics.get("collect_count", 0),
636
- "share_count": metrics.get("share_count", 0),
637
- "play_count": metrics.get("play_count", 0),
638
- "tags": tags or [],
639
- "is_video": is_video,
640
- "video_type_reason": video_type_reason,
641
- "u2_task_id": u2_task_id,
642
- "u2_task_status": u2_task_status,
643
- "raw_content": raw_content,
644
- "asr_source": asr_source,
645
- "summary": summary_block.get("summary", ""),
646
- "insights": insights,
647
- "confidence": confidence,
648
- "error_reason": error_reason,
649
- "missing_fields": _build_missing_fields(
650
- title=title,
651
- desc=desc,
652
- platform_work_id=platform_work_id,
653
- video_down_url=video_down_url,
654
- author=author,
655
- ),
656
- "extract_trace": extract_trace,
657
- "fallback_trace": fallback_trace,
658
- "request_id": request_id,
659
- "endpoint_list": endpoint_list,
660
- }
661
- return payload
662
-
663
-
664
- def run_douyin_single_video(
665
- *,
666
- input_value: Optional[str],
667
- share_url: Optional[str],
668
- env_file: Optional[str],
669
- api_key_env: str,
670
- base_url: Optional[str],
671
- timeout_ms: Optional[int],
672
- app_timeout_ms: Optional[int],
673
- web_timeout_ms: Optional[int],
674
- poll_interval_sec: float,
675
- max_polls: int,
676
- u2_submit_max_retries: int,
677
- u2_submit_backoff_ms: int,
678
- write_card: bool,
679
- card_type: str,
680
- card_root: Optional[str],
681
- content_kind: str = "single_video",
682
- storage_config: Optional[Dict[str, Any]] = None,
683
- allow_process_env: bool = False,
684
- persist_output: bool = True,
685
- progress: Optional[ProgressReporter] = None,
686
- ) -> Dict[str, Any]:
687
- if not write_card or not persist_output:
688
- raise ValueError(
689
- f"fixed_pipeline_requires_full_persistence:douyin:{content_kind}:write_card={bool(write_card)}:persist_output={bool(persist_output)}"
690
- )
691
-
692
- source_input = _normalize_input(input_value, share_url)
693
- if progress is not None:
694
- progress.started(stage="single_video.workflow", message="douyin single_video workflow started")
695
- if not source_input.get("share_url"):
696
- result = _build_result(
697
- source_input=source_input,
698
- platform_work_id=None,
699
- title="",
700
- desc="",
701
- duration_ms=None,
702
- video_down_url=None,
703
- author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
704
- metrics={
705
- "digg_count": 0,
706
- "comment_count": 0,
707
- "collect_count": 0,
708
- "share_count": 0,
709
- "play_count": 0,
710
- },
711
- tags=[],
712
- is_video=False,
713
- video_type_reason="missing_share_url",
714
- raw_content="",
715
- confidence="low",
716
- error_reason="missing_share_url",
717
- extract_trace=[],
718
- fallback_trace=[],
719
- request_id=None,
720
- u2_task_id=None,
721
- u2_task_status="UNKNOWN",
722
- u2_gate_reason="not_started",
723
- )
724
- if write_card:
725
- result["card_write"] = write_benchmark_card(
726
- payload=result,
727
- platform="douyin",
728
- card_type=card_type,
729
- card_root=card_root,
730
- content_kind=content_kind,
731
- storage_config=storage_config,
732
- )
733
- return _finalize_result(
734
- result=result,
735
- source_input=source_input,
736
- platform_work_id=None,
737
- storage_config=storage_config,
738
- persist_output=persist_output,
739
- )
740
-
741
- runtime = resolve_runtime(
742
- env_file=env_file,
743
- api_key_env=api_key_env,
744
- base_url=base_url,
745
- timeout_ms=timeout_ms,
746
- allow_process_env=allow_process_env,
747
- )
748
-
749
- app_timeout = int(app_timeout_ms or runtime["timeout_ms"])
750
- web_timeout = int(web_timeout_ms or runtime["timeout_ms"])
751
-
752
- trace: List[Dict[str, Any]] = []
753
-
754
- if progress is not None:
755
- progress.progress(stage="single_video.fetch", message="fetching douyin single_video payload")
756
- one_video_response = _u1_fetch_one_video(
757
- base_url=runtime["base_url"],
758
- token=runtime["token"],
759
- share_url=source_input["share_url"] or "",
760
- app_timeout_ms=app_timeout,
761
- web_timeout_ms=web_timeout,
762
- )
763
-
764
- app_failed = one_video_response.get("_app_failed")
765
- if app_failed:
766
- trace.append(
767
- _trace_step(
768
- step="u1_fetch_one_video_primary",
769
- endpoint=APP_ENDPOINT,
770
- response=app_failed,
771
- extra={"timeout_ms": app_timeout},
772
- )
773
- )
774
-
775
- trace.append(
776
- _trace_step(
777
- step="u1_fetch_one_video_effective",
778
- endpoint=one_video_response.get("_endpoint"),
779
- response=one_video_response,
780
- extra={
781
- "app_timeout_ms": app_timeout,
782
- "web_timeout_ms": web_timeout,
783
- },
784
- )
785
- )
786
-
787
- if not one_video_response.get("ok"):
788
- error_ctx = resolve_trace_error_context(
789
- responses=[one_video_response],
790
- extract_trace=trace,
791
- default_error_reason="u1_fetch_one_video_failed",
792
- )
793
- result = _build_result(
794
- source_input=source_input,
795
- platform_work_id=None,
796
- title="",
797
- desc="",
798
- duration_ms=None,
799
- video_down_url=None,
800
- author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
801
- metrics={
802
- "digg_count": 0,
803
- "comment_count": 0,
804
- "collect_count": 0,
805
- "share_count": 0,
806
- "play_count": 0,
807
- },
808
- tags=[],
809
- is_video=False,
810
- video_type_reason="u1_failed",
811
- raw_content="",
812
- confidence="low",
813
- error_reason=error_ctx.get("error_reason"),
814
- extract_trace=trace,
815
- fallback_trace=error_ctx.get("fallback_trace", []),
816
- request_id=error_ctx.get("request_id"),
817
- u2_task_id=None,
818
- u2_task_status="UNKNOWN",
819
- u2_gate_reason="u1_failed",
820
- )
821
- if write_card:
822
- result["card_write"] = write_benchmark_card(
823
- payload=result,
824
- platform="douyin",
825
- card_type=card_type,
826
- card_root=card_root,
827
- content_kind=content_kind,
828
- storage_config=storage_config,
829
- )
830
- return _finalize_result(
831
- result=result,
832
- source_input=source_input,
833
- platform_work_id=None,
834
- storage_config=storage_config,
835
- persist_output=persist_output,
836
- )
837
-
838
- aweme_detail = _extract_aweme_detail(one_video_response.get("data"))
839
- if not aweme_detail:
840
- error_ctx = resolve_trace_error_context(
841
- responses=[one_video_response],
842
- extract_trace=trace,
843
- default_error_reason="aweme_detail_missing",
844
- )
845
- result = _build_result(
846
- source_input=source_input,
847
- platform_work_id=None,
848
- title="",
849
- desc="",
850
- duration_ms=None,
851
- video_down_url=None,
852
- author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
853
- metrics={
854
- "digg_count": 0,
855
- "comment_count": 0,
856
- "collect_count": 0,
857
- "share_count": 0,
858
- "play_count": 0,
859
- },
860
- tags=[],
861
- is_video=False,
862
- video_type_reason="aweme_detail_missing",
863
- raw_content="",
864
- confidence="low",
865
- error_reason=error_ctx.get("error_reason"),
866
- extract_trace=trace,
867
- fallback_trace=error_ctx.get("fallback_trace", []),
868
- request_id=error_ctx.get("request_id"),
869
- u2_task_id=None,
870
- u2_task_status="UNKNOWN",
871
- u2_gate_reason="aweme_detail_missing",
872
- )
873
- if write_card:
874
- result["card_write"] = write_benchmark_card(
875
- payload=result,
876
- platform="douyin",
877
- card_type=card_type,
878
- card_root=card_root,
879
- content_kind=content_kind,
880
- storage_config=storage_config,
881
- )
882
- return _finalize_result(
883
- result=result,
884
- source_input=source_input,
885
- platform_work_id=None,
886
- storage_config=storage_config,
887
- persist_output=persist_output,
888
- )
889
-
890
- video_type_info = normalize_douyin_video_type(aweme_detail)
891
- duration_ms = _normalize_duration_ms(aweme_detail)
892
- platform_work_id = _extract_platform_work_id(aweme_detail)
893
- title = _pick_title(aweme_detail)
894
- desc = _pick_desc(aweme_detail)
895
- author = _extract_author(aweme_detail)
896
- metrics = _extract_metrics(aweme_detail)
897
- tags = _extract_douyin_tags(aweme_detail)
898
- create_time_sec = _extract_create_time_sec(aweme_detail)
899
- cover_image = _extract_cover_image(aweme_detail)
900
-
901
- video_obj = aweme_detail.get("video") if isinstance(aweme_detail.get("video"), dict) else {}
902
- down_url_selection = select_low_quality_video_url(video_obj)
903
- video_down_url = down_url_selection.get("video_down_url")
904
-
905
- trace.append(
906
- {
907
- "step": "video_type_normalization",
908
- "is_video": video_type_info.get("is_video"),
909
- "video_type_reason": video_type_info.get("video_type_reason"),
910
- "matched_field": video_type_info.get("matched_field"),
911
- "duration_ms": duration_ms,
912
- }
913
- )
914
-
915
- trace.append(
916
- {
917
- "step": "select_low_quality_video_url",
918
- "selection_reason": down_url_selection.get("selection_reason"),
919
- "selected_from": down_url_selection.get("selected_from"),
920
- "video_down_url": video_down_url,
921
- "min_bit_rate": down_url_selection.get("min_bit_rate"),
922
- "download_url_count": down_url_selection.get("download_url_count"),
923
- "bit_rate_count": down_url_selection.get("bit_rate_count"),
924
- }
925
- )
926
-
927
- can_u2 = bool(video_type_info.get("is_video")) and bool(duration_ms and duration_ms > 13000 and duration_ms <= 1800000)
928
- can_u2 = can_u2 and bool(video_down_url)
929
-
930
- if not video_type_info.get("is_video"):
931
- gate_reason = "skip:not_video"
932
- elif not duration_ms:
933
- gate_reason = "skip:duration_missing"
934
- elif duration_ms <= 13000:
935
- gate_reason = "skip:duration_too_short"
936
- elif duration_ms > 1800000:
937
- gate_reason = "skip:duration_too_long"
938
- elif not video_down_url:
939
- gate_reason = "skip:video_down_url_missing"
940
- else:
941
- gate_reason = "pass"
942
-
943
- trace.append(
944
- {
945
- "step": "u2_gate",
946
- "can_u2": can_u2,
947
- "gate_reason": gate_reason,
948
- "rule": "is_video && 13000<duration_ms<=1800000 && video_down_url_present",
949
- "is_video": bool(video_type_info.get("is_video")),
950
- "video_type_reason": video_type_info.get("video_type_reason"),
951
- "duration_ms": duration_ms,
952
- "video_down_url_present": bool(video_down_url),
953
- }
954
- )
955
-
956
- raw_content = ""
957
- error_reason: Optional[str] = None
958
- u2_task_id: Optional[str] = None
959
- u2_task_status = "SKIPPED"
960
- submit_response: Dict[str, Any] = {}
961
- poll_result: Dict[str, Any] = {}
962
-
963
- if can_u2 and video_down_url:
964
- if progress is not None:
965
- progress.progress(
966
- stage="single_video.u2",
967
- message="starting douyin u2 submit",
968
- data={"video_down_url_present": True},
969
- )
970
- submit_bundle = submit_u2_asr_with_retry(
971
- base_url=runtime["base_url"],
972
- token=runtime["token"],
973
- timeout_ms=runtime["timeout_ms"],
974
- video_url=video_down_url,
975
- max_retries=u2_submit_max_retries,
976
- backoff_ms=u2_submit_backoff_ms,
977
- )
978
- submit_response = submit_bundle["submit_response"]
979
- u2_task_id = submit_bundle.get("task_id")
980
-
981
- trace.append(
982
- _trace_step(
983
- step="u2_submit_transcription",
984
- endpoint=U2_SUBMIT_ENDPOINT,
985
- response=submit_response,
986
- extra={
987
- "task_id": u2_task_id,
988
- "video_down_url": video_down_url,
989
- "final_submit_status": submit_bundle.get("final_submit_status"),
990
- },
991
- )
992
- )
993
-
994
- trace.append(
995
- {
996
- "step": "u2_submit_retry_chain",
997
- "final_submit_status": submit_bundle.get("final_submit_status"),
998
- "retries_config": {
999
- "u2_submit_max_retries": max(0, int(u2_submit_max_retries)),
1000
- "u2_submit_backoff_ms": max(0, int(u2_submit_backoff_ms)),
1001
- },
1002
- "attempts": submit_bundle.get("retry_chain", []),
1003
- }
1004
- )
1005
-
1006
- if not submit_response.get("ok") or not u2_task_id:
1007
- if progress is not None:
1008
- progress.failed(
1009
- stage="single_video.u2",
1010
- message="douyin u2 submit failed",
1011
- data={"error_reason": submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id"},
1012
- )
1013
- error_reason = submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id"
1014
- u2_task_status = "UNKNOWN"
1015
- else:
1016
- if progress is not None:
1017
- progress.progress(stage="single_video.u2", message="polling douyin u2 task", data={"task_id": u2_task_id})
1018
- poll_result = poll_u2_task(
1019
- base_url=runtime["base_url"],
1020
- token=runtime["token"],
1021
- timeout_ms=runtime["timeout_ms"],
1022
- task_id=u2_task_id,
1023
- poll_interval_sec=poll_interval_sec,
1024
- max_polls=max_polls,
1025
- )
1026
- u2_task_status = poll_result.get("task_status") or "UNKNOWN"
1027
- raw_content = poll_result.get("transcript_text", "") if poll_result.get("ok") else ""
1028
- error_reason = poll_result.get("error_reason")
1029
-
1030
- trace.append(
1031
- {
1032
- "step": "u2_poll_task",
1033
- "endpoint": "/api/u2/v1/tasks/{task_id}",
1034
- "task_id": u2_task_id,
1035
- "ok": poll_result.get("ok"),
1036
- "task_status": u2_task_status,
1037
- "request_id": poll_result.get("request_id"),
1038
- "error_reason": poll_result.get("error_reason"),
1039
- "attempts": len(poll_result.get("trace", [])),
1040
- }
1041
- )
1042
- if progress is not None:
1043
- (progress.done if poll_result.get("ok") else progress.failed)(
1044
- stage="single_video.u2",
1045
- message="douyin u2 polling finished" if poll_result.get("ok") else "douyin u2 polling failed",
1046
- data={"task_id": u2_task_id, "task_status": u2_task_status, "attempts": len(poll_result.get("trace", []))},
1047
- )
1048
-
1049
- error_ctx = resolve_trace_error_context(
1050
- responses=[poll_result, submit_response, one_video_response],
1051
- extract_trace=trace,
1052
- explicit_error_reason=error_reason,
1053
- explicit_request_id=poll_result.get("request_id") or submit_response.get("request_id") or one_video_response.get("request_id"),
1054
- )
1055
- error_reason = error_ctx.get("error_reason")
1056
-
1057
- if error_reason:
1058
- confidence = "low"
1059
- elif can_u2 and raw_content:
1060
- confidence = "high"
1061
- elif can_u2 and not raw_content:
1062
- confidence = "medium"
1063
- else:
1064
- confidence = "medium"
1065
-
1066
- result = _build_result(
1067
- source_input=source_input,
1068
- platform_work_id=platform_work_id,
1069
- title=title,
1070
- desc=desc,
1071
- duration_ms=duration_ms,
1072
- create_time_sec=create_time_sec,
1073
- cover_image=cover_image,
1074
- video_down_url=video_down_url,
1075
- author=author,
1076
- metrics=metrics,
1077
- tags=tags,
1078
- is_video=bool(video_type_info.get("is_video")),
1079
- video_type_reason=str(video_type_info.get("video_type_reason") or ""),
1080
- raw_content=raw_content,
1081
- confidence=confidence,
1082
- error_reason=error_reason,
1083
- extract_trace=trace,
1084
- fallback_trace=error_ctx.get("fallback_trace", []),
1085
- request_id=error_ctx.get("request_id"),
1086
- u2_task_id=u2_task_id,
1087
- u2_task_status=u2_task_status,
1088
- u2_gate_reason=gate_reason,
1089
- asr_source="u2" if raw_content else "fallback_none",
1090
- )
1091
-
1092
- if write_card:
1093
- if progress is not None:
1094
- progress.progress(stage="single_video.card_write", message="writing douyin single_video card")
1095
- result["card_write"] = write_benchmark_card(
1096
- payload=result,
1097
- platform="douyin",
1098
- card_type=card_type,
1099
- card_root=card_root,
1100
- content_kind=content_kind,
1101
- storage_config=storage_config,
1102
- )
1103
-
1104
- finalized = _finalize_result(
1105
- result=result,
1106
- source_input=source_input,
1107
- platform_work_id=platform_work_id,
1108
- storage_config=storage_config,
1109
- persist_output=persist_output,
1110
- )
1111
- if progress is not None:
1112
- final_event = progress.failed if finalized.get("error_reason") else progress.done
1113
- final_event(
1114
- stage="single_video.workflow",
1115
- message="douyin single_video workflow finished" if not finalized.get("error_reason") else "douyin single_video workflow failed",
1116
- data={
1117
- "request_id": finalized.get("request_id"),
1118
- "card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
1119
- "output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
1120
- },
1121
- )
1122
- return finalized
1123
-
1124
-
1125
- def main() -> None:
1126
- parser = argparse.ArgumentParser(description="Run Douyin single-video low-quality extraction")
1127
- parser.add_argument("input", nargs="?", default=None, help="Douyin share URL")
1128
- parser.add_argument("--share-url", default=None, help="Douyin share URL")
1129
- parser.add_argument("--config", default=None, help="Runtime config YAML path")
1130
- parser.add_argument("--env-file", default=None, help="Optional env file path")
1131
- parser.add_argument("--allow-process-env", action="store_true", help="Allow process env to override .env/.env.local")
1132
- parser.add_argument("--api-key-env", default=None, help="API key env variable name")
1133
- parser.add_argument("--base-url", default=None, help="Tikomni base URL")
1134
- parser.add_argument("--timeout-ms", type=int, default=None, help="Global timeout ms")
1135
- parser.add_argument("--app-timeout-ms", type=int, default=None, help="APP endpoint timeout ms (optional)")
1136
- parser.add_argument("--web-timeout-ms", type=int, default=None, help="WEB endpoint timeout ms (optional)")
1137
- parser.add_argument("--poll-interval-sec", type=float, default=3.0, help="U2 poll interval seconds")
1138
- parser.add_argument("--max-polls", type=int, default=30, help="Max U2 polls")
1139
- parser.add_argument(
1140
- "--u2-submit-max-retries",
1141
- type=int,
1142
- default=2,
1143
- help="Max retries for retriable U2 submit failures",
1144
- )
1145
- parser.add_argument(
1146
- "--u2-submit-backoff-ms",
1147
- type=int,
1148
- default=1500,
1149
- help="Base backoff ms for retriable U2 submit failures (exponential)",
1150
- )
1151
- parser.add_argument("--card-type", choices=["work", "author", "author_sample_work"], default="work", help="Primary card type")
1152
- parser.add_argument("--content-kind", default="single_video", help="Routing kind, e.g. single_video/author_home/author_analysis")
1153
- parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
1154
- args = parser.parse_args()
1155
-
1156
- config, _ = load_tikomni_config(
1157
- args.config,
1158
- env_file=args.env_file,
1159
- allow_process_env=args.allow_process_env,
1160
- )
1161
- resolved_env_file = args.env_file or config_get(config, "runtime.env_file", None)
1162
- api_key_env = args.api_key_env or config_get(config, "runtime.auth_env_key", "TIKOMNI_API_KEY")
1163
- base_url = args.base_url or config_get(config, "runtime.base_url", None)
1164
- timeout_ms = args.timeout_ms if args.timeout_ms is not None else config_get(config, "runtime.timeout_ms", None)
1165
-
1166
- try:
1167
- result = run_douyin_single_video(
1168
- input_value=args.input,
1169
- share_url=args.share_url,
1170
- env_file=resolved_env_file,
1171
- api_key_env=api_key_env,
1172
- base_url=base_url,
1173
- timeout_ms=timeout_ms,
1174
- app_timeout_ms=args.app_timeout_ms,
1175
- web_timeout_ms=args.web_timeout_ms,
1176
- poll_interval_sec=args.poll_interval_sec,
1177
- max_polls=args.max_polls,
1178
- u2_submit_max_retries=args.u2_submit_max_retries,
1179
- u2_submit_backoff_ms=args.u2_submit_backoff_ms,
1180
- write_card=True,
1181
- card_type=args.card_type,
1182
- card_root=args.card_root,
1183
- content_kind=args.content_kind,
1184
- storage_config=config,
1185
- allow_process_env=args.allow_process_env,
1186
- persist_output=True,
1187
- )
1188
- except ValueError as error:
1189
- result = {
1190
- "platform": "douyin",
1191
- "raw_content": "",
1192
- "summary": "",
1193
- "insights": ["source=douyin:single-video-low-quality", "runtime_not_ready"],
1194
- "confidence": "low",
1195
- "error_reason": str(error),
1196
- "missing_fields": [],
1197
- "extract_trace": [],
1198
- "fallback_trace": [],
1199
- "request_id": None,
1200
- "endpoint_list": [],
1201
- }
1202
-
1203
- write_json_stdout(result)
1204
- raise SystemExit(0 if not result.get("error_reason") else 1)
1205
-
1206
-
1207
- if __name__ == "__main__":
1208
- main()