@tikomni/skills 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (451) hide show
  1. package/.skill-package-allowlist.txt +4 -0
  2. package/LICENSE +21 -0
  3. package/README.md +167 -0
  4. package/README.zh-CN.md +167 -0
  5. package/bin/tikomni-skills.js +127 -0
  6. package/env.example +160 -0
  7. package/lib/installer.js +176 -0
  8. package/package.json +44 -0
  9. package/skills/creator-analysis/SKILL.md +71 -0
  10. package/skills/creator-analysis/agents/openai.yaml +4 -0
  11. package/skills/creator-analysis/env.example +36 -0
  12. package/skills/creator-analysis/references/api-capability-index.md +92 -0
  13. package/skills/creator-analysis/references/api-contracts/asr-api.md +130 -0
  14. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +776 -0
  15. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +2017 -0
  16. package/skills/creator-analysis/references/api-contracts/demo-api.md +717 -0
  17. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +3594 -0
  18. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +2274 -0
  19. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +1575 -0
  20. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  21. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +4118 -0
  22. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +5544 -0
  23. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +1916 -0
  24. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  25. package/skills/creator-analysis/references/api-contracts/health-check.md +69 -0
  26. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +78 -0
  27. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +2256 -0
  28. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +2011 -0
  29. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +2630 -0
  30. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +44 -0
  31. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +1518 -0
  32. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +1242 -0
  33. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +1088 -0
  34. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +1949 -0
  35. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +126 -0
  36. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +1142 -0
  37. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +2025 -0
  38. package/skills/creator-analysis/references/api-contracts/sora2-api.md +2266 -0
  39. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +208 -0
  40. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +897 -0
  41. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +134 -0
  42. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +494 -0
  43. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +5947 -0
  44. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +968 -0
  45. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  46. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +1951 -0
  47. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +742 -0
  48. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  49. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +4448 -0
  50. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +342 -0
  51. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +143 -0
  52. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +989 -0
  53. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +809 -0
  54. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  55. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +1547 -0
  56. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +798 -0
  57. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +2459 -0
  58. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  59. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  60. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  61. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  62. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +489 -0
  63. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +2636 -0
  64. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +2660 -0
  65. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +2315 -0
  66. package/skills/creator-analysis/references/api-tags/asr-api.md +100 -0
  67. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +482 -0
  68. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +1267 -0
  69. package/skills/creator-analysis/references/api-tags/demo-api.md +365 -0
  70. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +2012 -0
  71. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +1428 -0
  72. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +694 -0
  73. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +694 -0
  74. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +1059 -0
  75. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +3314 -0
  76. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +935 -0
  77. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  78. package/skills/creator-analysis/references/api-tags/health-check.md +40 -0
  79. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +57 -0
  80. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +1224 -0
  81. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +1147 -0
  82. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +1123 -0
  83. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +45 -0
  84. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +846 -0
  85. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +551 -0
  86. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +687 -0
  87. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +1105 -0
  88. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +112 -0
  89. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +721 -0
  90. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +1057 -0
  91. package/skills/creator-analysis/references/api-tags/sora2-api.md +737 -0
  92. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +136 -0
  93. package/skills/creator-analysis/references/api-tags/threads-web-api.md +472 -0
  94. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +65 -0
  95. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +253 -0
  96. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +1393 -0
  97. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +179 -0
  98. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +3264 -0
  99. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +709 -0
  100. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +366 -0
  101. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +663 -0
  102. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +2516 -0
  103. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +220 -0
  104. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +96 -0
  105. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +562 -0
  106. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +405 -0
  107. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +431 -0
  108. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +851 -0
  109. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +470 -0
  110. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +1405 -0
  111. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +534 -0
  112. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  113. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +757 -0
  114. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  115. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +308 -0
  116. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +934 -0
  117. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +717 -0
  118. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +1384 -0
  119. package/skills/creator-analysis/references/asr-orchestration.md +33 -0
  120. package/skills/creator-analysis/references/config-templates/defaults.yaml +60 -0
  121. package/skills/creator-analysis/references/contracts/creator-card-fields.md +23 -0
  122. package/skills/creator-analysis/references/contracts/work-card-fields.md +32 -0
  123. package/skills/creator-analysis/references/platform-guides/douyin.md +49 -0
  124. package/skills/creator-analysis/references/platform-guides/generic.md +46 -0
  125. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +54 -0
  126. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +28 -0
  127. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +46 -0
  128. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +49 -0
  129. package/skills/creator-analysis/references/prompt-contracts/cta.md +24 -0
  130. package/skills/creator-analysis/references/prompt-contracts/hook.md +25 -0
  131. package/skills/creator-analysis/references/prompt-contracts/insight.md +47 -0
  132. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +30 -0
  133. package/skills/creator-analysis/references/prompt-contracts/structure.md +25 -0
  134. package/skills/creator-analysis/references/prompt-contracts/style.md +27 -0
  135. package/skills/creator-analysis/references/prompt-contracts/summary.md +29 -0
  136. package/skills/creator-analysis/references/prompt-contracts/topic.md +29 -0
  137. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +325 -0
  138. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +158 -0
  139. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +41 -0
  140. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +75 -0
  141. package/skills/creator-analysis/references/workflow.md +18 -0
  142. package/skills/creator-analysis/scripts/__init__.py +0 -0
  143. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  144. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  145. package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +299 -0
  146. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  147. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +1122 -0
  148. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +260 -0
  149. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +260 -0
  150. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +5 -0
  151. package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +961 -0
  152. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  153. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +149 -0
  154. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  155. package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +636 -0
  156. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  157. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +491 -0
  158. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +553 -0
  159. package/skills/creator-analysis/scripts/author_home/schema.py +417 -0
  160. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  161. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +133 -0
  162. package/skills/creator-analysis/scripts/core/bootstrap_env.py +35 -0
  163. package/skills/creator-analysis/scripts/core/config_loader.py +418 -0
  164. package/skills/creator-analysis/scripts/core/extract_pipeline.py +173 -0
  165. package/skills/creator-analysis/scripts/core/progress_report.py +111 -0
  166. package/skills/creator-analysis/scripts/core/storage_router.py +253 -0
  167. package/skills/creator-analysis/scripts/core/tikomni_common.py +588 -0
  168. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  169. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  170. package/skills/creator-analysis/scripts/pipeline/asr/asr_pipeline.py +1189 -0
  171. package/skills/creator-analysis/scripts/pipeline/asr/poll_u2_task.py +95 -0
  172. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  173. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  174. package/skills/creator-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +224 -0
  175. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +1208 -0
  176. package/skills/creator-analysis/scripts/platform/douyin/select_low_quality_video_url.py +200 -0
  177. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  178. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +2128 -0
  179. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  180. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +106 -0
  181. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +1402 -0
  182. package/skills/meta-capability/SKILL.md +69 -0
  183. package/skills/meta-capability/agents/openai.yaml +4 -0
  184. package/skills/meta-capability/env.example +42 -0
  185. package/skills/meta-capability/references/api-capability-index.md +92 -0
  186. package/skills/meta-capability/references/api-contracts/asr-api.md +130 -0
  187. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +776 -0
  188. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +2017 -0
  189. package/skills/meta-capability/references/api-contracts/demo-api.md +717 -0
  190. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +3594 -0
  191. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +2274 -0
  192. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +1575 -0
  193. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  194. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +4118 -0
  195. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +5544 -0
  196. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +1916 -0
  197. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  198. package/skills/meta-capability/references/api-contracts/health-check.md +69 -0
  199. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +78 -0
  200. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +2256 -0
  201. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +2011 -0
  202. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +2630 -0
  203. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +44 -0
  204. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +1518 -0
  205. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +1242 -0
  206. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +1088 -0
  207. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +1949 -0
  208. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +126 -0
  209. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +1142 -0
  210. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +2025 -0
  211. package/skills/meta-capability/references/api-contracts/sora2-api.md +2266 -0
  212. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +208 -0
  213. package/skills/meta-capability/references/api-contracts/threads-web-api.md +897 -0
  214. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +134 -0
  215. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +494 -0
  216. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +5947 -0
  217. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +968 -0
  218. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  219. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +1951 -0
  220. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +742 -0
  221. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  222. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +4448 -0
  223. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +342 -0
  224. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +143 -0
  225. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +989 -0
  226. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +809 -0
  227. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  228. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +1547 -0
  229. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +798 -0
  230. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +2459 -0
  231. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  232. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  233. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  234. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  235. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +489 -0
  236. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +2636 -0
  237. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +2660 -0
  238. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +2315 -0
  239. package/skills/meta-capability/references/api-tags/asr-api.md +100 -0
  240. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +482 -0
  241. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +1267 -0
  242. package/skills/meta-capability/references/api-tags/demo-api.md +365 -0
  243. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +2012 -0
  244. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +1428 -0
  245. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +694 -0
  246. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +694 -0
  247. package/skills/meta-capability/references/api-tags/douyin-search-api.md +1059 -0
  248. package/skills/meta-capability/references/api-tags/douyin-web-api.md +3314 -0
  249. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +935 -0
  250. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  251. package/skills/meta-capability/references/api-tags/health-check.md +40 -0
  252. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +57 -0
  253. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +1224 -0
  254. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +1147 -0
  255. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +1123 -0
  256. package/skills/meta-capability/references/api-tags/ios-shortcut.md +45 -0
  257. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +846 -0
  258. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +551 -0
  259. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +687 -0
  260. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +1105 -0
  261. package/skills/meta-capability/references/api-tags/media-ingest-api.md +112 -0
  262. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +721 -0
  263. package/skills/meta-capability/references/api-tags/reddit-app-api.md +1057 -0
  264. package/skills/meta-capability/references/api-tags/sora2-api.md +737 -0
  265. package/skills/meta-capability/references/api-tags/temp-mail-api.md +136 -0
  266. package/skills/meta-capability/references/api-tags/threads-web-api.md +472 -0
  267. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +65 -0
  268. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +253 -0
  269. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +1393 -0
  270. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +179 -0
  271. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +3264 -0
  272. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +709 -0
  273. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +366 -0
  274. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +663 -0
  275. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +2516 -0
  276. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +220 -0
  277. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +96 -0
  278. package/skills/meta-capability/references/api-tags/twitter-web-api.md +562 -0
  279. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +405 -0
  280. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +431 -0
  281. package/skills/meta-capability/references/api-tags/weibo-app-api.md +851 -0
  282. package/skills/meta-capability/references/api-tags/weibo-web-api.md +470 -0
  283. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +1405 -0
  284. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +534 -0
  285. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  286. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +757 -0
  287. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  288. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +308 -0
  289. package/skills/meta-capability/references/api-tags/youtube-web-api.md +934 -0
  290. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +717 -0
  291. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +1384 -0
  292. package/skills/meta-capability/references/config-templates/defaults.yaml +18 -0
  293. package/skills/meta-capability/references/dispatch.md +27 -0
  294. package/skills/meta-capability/references/execution-guidelines.md +25 -0
  295. package/skills/meta-capability/references/implemented-route-map.md +177 -0
  296. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +75 -0
  297. package/skills/meta-capability/scripts/__init__.py +1 -0
  298. package/skills/meta-capability/scripts/call_route.py +141 -0
  299. package/skills/meta-capability/scripts/core/__init__.py +1 -0
  300. package/skills/meta-capability/scripts/core/bootstrap_env.py +32 -0
  301. package/skills/meta-capability/scripts/core/config_loader.py +204 -0
  302. package/skills/meta-capability/scripts/core/tikomni_common.py +443 -0
  303. package/skills/meta-capability/scripts/test_auth.py +98 -0
  304. package/skills/single-work-analysis/SKILL.md +62 -0
  305. package/skills/single-work-analysis/agents/openai.yaml +4 -0
  306. package/skills/single-work-analysis/env.example +36 -0
  307. package/skills/single-work-analysis/references/api-capability-index.md +92 -0
  308. package/skills/single-work-analysis/references/api-contracts/asr-api.md +130 -0
  309. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +776 -0
  310. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +2017 -0
  311. package/skills/single-work-analysis/references/api-contracts/demo-api.md +717 -0
  312. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +3594 -0
  313. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +2274 -0
  314. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +1575 -0
  315. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +3254 -0
  316. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +4118 -0
  317. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +5544 -0
  318. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +1916 -0
  319. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +1540 -0
  320. package/skills/single-work-analysis/references/api-contracts/health-check.md +69 -0
  321. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +78 -0
  322. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +2256 -0
  323. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +2011 -0
  324. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +2630 -0
  325. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +44 -0
  326. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +1518 -0
  327. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +1242 -0
  328. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +1088 -0
  329. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +1949 -0
  330. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +126 -0
  331. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +1142 -0
  332. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +2025 -0
  333. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +2266 -0
  334. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +208 -0
  335. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +897 -0
  336. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +134 -0
  337. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +494 -0
  338. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +5947 -0
  339. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +968 -0
  340. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +5735 -0
  341. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +1951 -0
  342. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +742 -0
  343. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +1890 -0
  344. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +4448 -0
  345. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +342 -0
  346. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +143 -0
  347. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +989 -0
  348. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +809 -0
  349. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +677 -0
  350. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +1547 -0
  351. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +798 -0
  352. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +2459 -0
  353. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +1291 -0
  354. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +1683 -0
  355. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +1324 -0
  356. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +1209 -0
  357. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +489 -0
  358. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +2636 -0
  359. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +2660 -0
  360. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +2315 -0
  361. package/skills/single-work-analysis/references/api-tags/asr-api.md +100 -0
  362. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +482 -0
  363. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +1267 -0
  364. package/skills/single-work-analysis/references/api-tags/demo-api.md +365 -0
  365. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +2012 -0
  366. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +1428 -0
  367. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +694 -0
  368. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +694 -0
  369. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +1059 -0
  370. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +3314 -0
  371. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +935 -0
  372. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +925 -0
  373. package/skills/single-work-analysis/references/api-tags/health-check.md +40 -0
  374. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +57 -0
  375. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +1224 -0
  376. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +1147 -0
  377. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +1123 -0
  378. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +45 -0
  379. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +846 -0
  380. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +551 -0
  381. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +687 -0
  382. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +1105 -0
  383. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +112 -0
  384. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +721 -0
  385. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +1057 -0
  386. package/skills/single-work-analysis/references/api-tags/sora2-api.md +737 -0
  387. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +136 -0
  388. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +472 -0
  389. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +65 -0
  390. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +253 -0
  391. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +1393 -0
  392. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +179 -0
  393. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +3264 -0
  394. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +709 -0
  395. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +366 -0
  396. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +663 -0
  397. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +2516 -0
  398. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +220 -0
  399. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +96 -0
  400. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +562 -0
  401. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +405 -0
  402. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +431 -0
  403. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +851 -0
  404. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +470 -0
  405. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +1405 -0
  406. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +534 -0
  407. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +934 -0
  408. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +757 -0
  409. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +762 -0
  410. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +308 -0
  411. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +934 -0
  412. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +717 -0
  413. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +1384 -0
  414. package/skills/single-work-analysis/references/asr-and-fallback.md +20 -0
  415. package/skills/single-work-analysis/references/config-templates/defaults.yaml +58 -0
  416. package/skills/single-work-analysis/references/contracts/work-card-fields.md +41 -0
  417. package/skills/single-work-analysis/references/platform-guides/douyin.md +47 -0
  418. package/skills/single-work-analysis/references/platform-guides/generic.md +43 -0
  419. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +54 -0
  420. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +28 -0
  421. package/skills/single-work-analysis/references/prompt-contracts/cta.md +24 -0
  422. package/skills/single-work-analysis/references/prompt-contracts/hook.md +25 -0
  423. package/skills/single-work-analysis/references/prompt-contracts/insight.md +47 -0
  424. package/skills/single-work-analysis/references/prompt-contracts/structure.md +25 -0
  425. package/skills/single-work-analysis/references/prompt-contracts/style.md +27 -0
  426. package/skills/single-work-analysis/references/prompt-contracts/summary.md +29 -0
  427. package/skills/single-work-analysis/references/prompt-contracts/topic.md +29 -0
  428. package/skills/single-work-analysis/references/schemas/work-card.schema.json +39 -0
  429. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +75 -0
  430. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  431. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  432. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +133 -0
  433. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +35 -0
  434. package/skills/single-work-analysis/scripts/core/config_loader.py +418 -0
  435. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +173 -0
  436. package/skills/single-work-analysis/scripts/core/progress_report.py +111 -0
  437. package/skills/single-work-analysis/scripts/core/storage_router.py +253 -0
  438. package/skills/single-work-analysis/scripts/core/tikomni_common.py +588 -0
  439. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  440. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  441. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +1189 -0
  442. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +95 -0
  443. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  444. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  445. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +224 -0
  446. package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +1233 -0
  447. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +200 -0
  448. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  449. package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +2156 -0
  450. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  451. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +1402 -0
@@ -0,0 +1,1189 @@
1
+ #!/usr/bin/env python3
2
+ """Shared ASR pipeline helpers for runner scripts."""
3
+
4
+ import json
5
+ import time
6
+ import urllib.error
7
+ import urllib.request
8
+ from urllib.parse import urlparse, urlunparse
9
+ from typing import Any, Dict, List, Optional
10
+
11
+ from scripts.core.tikomni_common import (
12
+ call_json_api,
13
+ deep_find_first,
14
+ extract_task_id,
15
+ extract_task_status,
16
+ extract_transcript_text,
17
+ is_terminal_status,
18
+ normalize_text,
19
+ )
20
+
21
+ U2_BATCH_SUBMIT_HARD_LIMIT = 100
22
+
23
+
24
+ def clamp_u2_batch_submit_size(size: int, *, default: int = 50, hard_limit: int = U2_BATCH_SUBMIT_HARD_LIMIT) -> int:
25
+ try:
26
+ parsed = int(size)
27
+ except Exception:
28
+ parsed = int(default)
29
+ parsed = max(1, parsed)
30
+ return min(parsed, max(1, int(hard_limit)))
31
+
32
+
33
+ def submit_u2_asr(
34
+ *,
35
+ base_url: str,
36
+ token: str,
37
+ timeout_ms: int,
38
+ video_url: str,
39
+ ) -> Dict[str, Any]:
40
+ return submit_u2_asr_batch(
41
+ base_url=base_url,
42
+ token=token,
43
+ timeout_ms=timeout_ms,
44
+ file_urls=[video_url],
45
+ )
46
+
47
+
48
+ def submit_u2_asr_batch(
49
+ *,
50
+ base_url: str,
51
+ token: str,
52
+ timeout_ms: int,
53
+ file_urls: List[str],
54
+ ) -> Dict[str, Any]:
55
+ normalized_urls = normalize_media_candidates(file_urls)
56
+ limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
57
+ return call_json_api(
58
+ base_url=base_url,
59
+ path="/api/u2/v1/services/audio/asr/transcription",
60
+ token=token,
61
+ method="POST",
62
+ timeout_ms=timeout_ms,
63
+ body={"input": {"file_urls": limited_urls}},
64
+ )
65
+
66
+
67
+ def is_retriable_submit_failure(response: Dict[str, Any]) -> bool:
68
+ status_code = response.get("status_code")
69
+ if isinstance(status_code, str) and status_code.isdigit():
70
+ status_code = int(status_code)
71
+ if isinstance(status_code, (int, float)) and int(status_code) in {502, 503, 504}:
72
+ return True
73
+
74
+ error_reason = str(response.get("error_reason") or "").upper()
75
+ return "UPSTREAM_TIMEOUT" in error_reason or "TIMEOUT" in error_reason
76
+
77
+
78
+ def submit_u2_asr_batch_with_retry(
79
+ *,
80
+ base_url: str,
81
+ token: str,
82
+ timeout_ms: int,
83
+ file_urls: List[str],
84
+ max_retries: int,
85
+ backoff_ms: int,
86
+ ) -> Dict[str, Any]:
87
+ retries = max(0, int(max_retries))
88
+ base_backoff = max(0, int(backoff_ms))
89
+ max_attempts = 1 + retries
90
+
91
+ normalized_urls = normalize_media_candidates(file_urls)
92
+ limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
93
+ if not limited_urls:
94
+ return {
95
+ "submit_response": {
96
+ "ok": False,
97
+ "status_code": None,
98
+ "error_reason": "no_valid_file_urls",
99
+ "data": {},
100
+ "request_id": None,
101
+ },
102
+ "task_id": None,
103
+ "retry_chain": [],
104
+ "final_submit_status": "failed_no_valid_file_urls",
105
+ "file_urls": [],
106
+ }
107
+
108
+ retry_chain: List[Dict[str, Any]] = []
109
+ final_response: Dict[str, Any] = {}
110
+ final_task_id: Optional[str] = None
111
+ final_submit_status = "failed_unknown"
112
+
113
+ for attempt in range(1, max_attempts + 1):
114
+ wait_ms = 0 if attempt == 1 else base_backoff * (2 ** (attempt - 2))
115
+ if wait_ms > 0:
116
+ time.sleep(wait_ms / 1000.0)
117
+
118
+ submit_response = submit_u2_asr_batch(
119
+ base_url=base_url,
120
+ token=token,
121
+ timeout_ms=timeout_ms,
122
+ file_urls=limited_urls,
123
+ )
124
+ task_id = extract_task_id(submit_response.get("data"))
125
+ retriable = is_retriable_submit_failure(submit_response)
126
+
127
+ retry_chain.append(
128
+ {
129
+ "attempt": attempt,
130
+ "wait_ms": wait_ms,
131
+ "status_code": submit_response.get("status_code"),
132
+ "error_reason": submit_response.get("error_reason"),
133
+ "ok": submit_response.get("ok"),
134
+ "task_id": task_id,
135
+ "retriable": retriable,
136
+ "file_url_count": len(limited_urls),
137
+ }
138
+ )
139
+
140
+ final_response = submit_response
141
+ final_task_id = task_id
142
+
143
+ if submit_response.get("ok") and task_id:
144
+ final_submit_status = "success"
145
+ break
146
+
147
+ if submit_response.get("ok") and not task_id:
148
+ final_submit_status = "failed_missing_task_id"
149
+ break
150
+
151
+ if retriable and attempt < max_attempts:
152
+ final_submit_status = "retrying"
153
+ continue
154
+
155
+ final_submit_status = "failed_retries_exhausted" if retriable else "failed_non_retriable"
156
+ break
157
+
158
+ return {
159
+ "submit_response": final_response,
160
+ "task_id": final_task_id,
161
+ "retry_chain": retry_chain,
162
+ "final_submit_status": final_submit_status,
163
+ "file_urls": limited_urls,
164
+ }
165
+
166
+
167
+ def submit_u2_asr_with_retry(
168
+ *,
169
+ base_url: str,
170
+ token: str,
171
+ timeout_ms: int,
172
+ video_url: str,
173
+ max_retries: int,
174
+ backoff_ms: int,
175
+ ) -> Dict[str, Any]:
176
+ return submit_u2_asr_batch_with_retry(
177
+ base_url=base_url,
178
+ token=token,
179
+ timeout_ms=timeout_ms,
180
+ file_urls=[video_url],
181
+ max_retries=max_retries,
182
+ backoff_ms=backoff_ms,
183
+ )
184
+
185
+
186
+ def clean_transcript_text(raw_text: Any) -> str:
187
+ if raw_text is None:
188
+ return ""
189
+ return str(raw_text).strip()
190
+
191
+
192
+ def extract_u2_task_metrics(payload: Any) -> Dict[str, Any]:
193
+ metrics = deep_find_first(payload, ["task_metrics", "metrics"])
194
+ return metrics if isinstance(metrics, dict) else {}
195
+
196
+
197
+ def _safe_int(value: Any) -> int:
198
+ if isinstance(value, bool):
199
+ return int(value)
200
+ if isinstance(value, (int, float)):
201
+ return int(value)
202
+ if isinstance(value, str):
203
+ text = value.strip()
204
+ if not text:
205
+ return 0
206
+ try:
207
+ return int(float(text))
208
+ except Exception:
209
+ return 0
210
+ return 0
211
+
212
+
213
+ def _status_upper(value: Any) -> str:
214
+ return str(value or "").strip().upper()
215
+
216
+
217
+ def _is_success_status(status: str) -> bool:
218
+ return status in {"SUCCEEDED", "SUCCESS", "COMPLETED", "DONE"}
219
+
220
+
221
+ def _is_failed_status(status: str) -> bool:
222
+ return status in {"FAILED", "FAILURE", "ERROR", "CANCELED", "CANCELLED"}
223
+
224
+
225
+ def extract_platform_task_status(payload: Any) -> str:
226
+ status = deep_find_first(payload, ["platform_task_status"])
227
+ return _status_upper(status)
228
+
229
+
230
+ def extract_pending_count(payload: Any) -> int:
231
+ return max(0, _safe_int(deep_find_first(payload, ["pending_count"])))
232
+
233
+
234
+ def extract_u2_batch_result_items(payload: Any) -> List[Dict[str, Any]]:
235
+ found: Dict[str, Dict[str, Any]] = {}
236
+
237
+ stack: List[Any] = [payload]
238
+ while stack:
239
+ node = stack.pop(0)
240
+ if isinstance(node, dict):
241
+ raw_file_url = (
242
+ node.get("file_url")
243
+ or node.get("source_url")
244
+ or node.get("media_url")
245
+ or node.get("url")
246
+ )
247
+ file_url = normalize_media_url(str(raw_file_url or ""))
248
+ if file_url:
249
+ transcript = clean_transcript_text(
250
+ node.get("transcript_text")
251
+ or node.get("text")
252
+ or node.get("transcript")
253
+ or node.get("transcription")
254
+ or node.get("content")
255
+ or ""
256
+ )
257
+ if not transcript:
258
+ transcript = clean_transcript_text(extract_transcript_text(node))
259
+
260
+ status = _status_upper(node.get("status") or node.get("task_status") or node.get("state"))
261
+ error_reason = str(node.get("error_reason") or node.get("error") or "").strip()
262
+ transcription_url = normalize_text(node.get("transcription_url"))
263
+ ok = _is_success_status(status) or bool(transcript) or bool(transcription_url)
264
+
265
+ candidate = {
266
+ "file_url": file_url,
267
+ "transcript_text": transcript,
268
+ "task_status": status,
269
+ "transcription_url": transcription_url,
270
+ "error_reason": error_reason,
271
+ "ok": ok,
272
+ }
273
+
274
+ existing = found.get(file_url)
275
+ if existing is None:
276
+ found[file_url] = candidate
277
+ else:
278
+ old_score = (
279
+ 1 if existing.get("ok") else 0,
280
+ len(str(existing.get("transcript_text") or "")),
281
+ 1 if existing.get("transcription_url") else 0,
282
+ 1 if not existing.get("error_reason") else 0,
283
+ )
284
+ new_score = (
285
+ 1 if candidate.get("ok") else 0,
286
+ len(str(candidate.get("transcript_text") or "")),
287
+ 1 if candidate.get("transcription_url") else 0,
288
+ 1 if not candidate.get("error_reason") else 0,
289
+ )
290
+ if new_score > old_score:
291
+ found[file_url] = candidate
292
+
293
+ for value in node.values():
294
+ if isinstance(value, (dict, list)):
295
+ stack.append(value)
296
+ elif isinstance(node, list):
297
+ for item in node:
298
+ if isinstance(item, (dict, list)):
299
+ stack.append(item)
300
+
301
+ return list(found.values())
302
+
303
+
304
+ def map_u2_batch_results_by_file_url(payload: Any) -> Dict[str, Dict[str, Any]]:
305
+ mapped: Dict[str, Dict[str, Any]] = {}
306
+ for item in extract_u2_batch_result_items(payload):
307
+ file_url = normalize_media_url(item.get("file_url"))
308
+ if not file_url:
309
+ continue
310
+ mapped[file_url] = item
311
+ return mapped
312
+
313
+
314
+ def _parse_non_negative_item_index(value: Any) -> Optional[int]:
315
+ if isinstance(value, bool):
316
+ return None
317
+ if isinstance(value, int):
318
+ return value if value >= 0 else None
319
+ if isinstance(value, float):
320
+ if value < 0 or not value.is_integer():
321
+ return None
322
+ return int(value)
323
+ if isinstance(value, str):
324
+ text = value.strip()
325
+ if not text or not text.isdigit():
326
+ return None
327
+ return int(text)
328
+ return None
329
+
330
+
331
+ def map_u2_batch_results_by_item_index(payload: Any) -> Dict[int, Dict[str, Any]]:
332
+ mapped: Dict[int, Dict[str, Any]] = {}
333
+ stack: List[Any] = [payload]
334
+
335
+ while stack:
336
+ node = stack.pop(0)
337
+ if isinstance(node, dict):
338
+ item_index_raw = node.get("item_index")
339
+ item_index = _parse_non_negative_item_index(item_index_raw)
340
+ if item_index is not None:
341
+ transcript = clean_transcript_text(
342
+ node.get("transcript_text")
343
+ or node.get("text")
344
+ or node.get("transcript")
345
+ or node.get("transcription")
346
+ or node.get("content")
347
+ or ""
348
+ )
349
+ if not transcript:
350
+ transcript = clean_transcript_text(extract_transcript_text(node))
351
+
352
+ status = _status_upper(node.get("task_status") or node.get("status") or node.get("state"))
353
+ error_reason = str(node.get("error_reason") or node.get("error") or "").strip()
354
+ transcription_url = normalize_text(node.get("transcription_url"))
355
+ ok = _is_success_status(status) or bool(transcript) or bool(transcription_url)
356
+
357
+ candidate = {
358
+ "item_index": item_index,
359
+ "transcript_text": transcript,
360
+ "task_status": status,
361
+ "error_reason": error_reason,
362
+ "transcription_url": transcription_url,
363
+ "ok": ok,
364
+ }
365
+
366
+ existing = mapped.get(item_index)
367
+ if existing is None:
368
+ mapped[item_index] = candidate
369
+ else:
370
+ old_score = (
371
+ 1 if existing.get("ok") else 0,
372
+ len(str(existing.get("transcript_text") or "")),
373
+ 1 if existing.get("transcription_url") else 0,
374
+ 1 if not existing.get("error_reason") else 0,
375
+ )
376
+ new_score = (
377
+ 1 if candidate.get("ok") else 0,
378
+ len(str(candidate.get("transcript_text") or "")),
379
+ 1 if candidate.get("transcription_url") else 0,
380
+ 1 if not candidate.get("error_reason") else 0,
381
+ )
382
+ if new_score > old_score:
383
+ mapped[item_index] = candidate
384
+
385
+ for value in node.values():
386
+ if isinstance(value, (dict, list)):
387
+ stack.append(value)
388
+ elif isinstance(node, list):
389
+ for item in node:
390
+ if isinstance(item, (dict, list)):
391
+ stack.append(item)
392
+
393
+ return mapped
394
+
395
+
396
+ def _extract_transcript_from_transcription_payload(payload: Any) -> str:
397
+ if isinstance(payload, str):
398
+ text = clean_transcript_text(payload)
399
+ if text:
400
+ return text
401
+ try:
402
+ payload = json.loads(payload)
403
+ except Exception:
404
+ return ""
405
+
406
+ transcript = clean_transcript_text(deep_find_first(payload, ["full_text"]))
407
+ if transcript:
408
+ return transcript
409
+
410
+ transcript = clean_transcript_text(extract_transcript_text(payload))
411
+ if transcript:
412
+ return transcript
413
+
414
+ sentences = deep_find_first(payload, ["sentences"])
415
+ if isinstance(sentences, list):
416
+ lines: List[str] = []
417
+ for sentence in sentences:
418
+ if not isinstance(sentence, dict):
419
+ continue
420
+ line = clean_transcript_text(
421
+ sentence.get("text") or sentence.get("sentence") or sentence.get("content")
422
+ )
423
+ if line:
424
+ lines.append(line)
425
+ if lines:
426
+ return "\n".join(lines)
427
+
428
+ return ""
429
+
430
+
431
+ def fetch_transcription_text_by_url(*, transcription_url: str, timeout_ms: int) -> Dict[str, Any]:
432
+ url = normalize_media_url(transcription_url)
433
+ if not url:
434
+ return {
435
+ "ok": False,
436
+ "transcription_url": "",
437
+ "error_reason": "transcription_url_missing",
438
+ "transcript_text": "",
439
+ }
440
+ if not (url.startswith("http://") or url.startswith("https://")):
441
+ return {
442
+ "ok": False,
443
+ "transcription_url": url,
444
+ "error_reason": "transcription_url_invalid",
445
+ "transcript_text": "",
446
+ }
447
+
448
+ request = urllib.request.Request(url=url, method="GET", headers={"Accept": "application/json"})
449
+ try:
450
+ with urllib.request.urlopen(request, timeout=max(timeout_ms / 1000.0, 1.0)) as response:
451
+ raw_text = response.read().decode("utf-8", errors="replace")
452
+ except urllib.error.URLError as error:
453
+ return {
454
+ "ok": False,
455
+ "transcription_url": url,
456
+ "error_reason": f"transcription_fetch_failed:{normalize_text(getattr(error, 'reason', error)) or 'unknown'}",
457
+ "transcript_text": "",
458
+ }
459
+ except Exception as error:
460
+ return {
461
+ "ok": False,
462
+ "transcription_url": url,
463
+ "error_reason": f"transcription_fetch_failed:{normalize_text(error) or 'unknown'}",
464
+ "transcript_text": "",
465
+ }
466
+
467
+ payload: Any = raw_text
468
+ try:
469
+ payload = json.loads(raw_text)
470
+ except Exception:
471
+ payload = raw_text
472
+
473
+ transcript = _extract_transcript_from_transcription_payload(payload)
474
+ if transcript:
475
+ return {
476
+ "ok": True,
477
+ "transcription_url": url,
478
+ "error_reason": "",
479
+ "transcript_text": transcript,
480
+ }
481
+
482
+ return {
483
+ "ok": False,
484
+ "transcription_url": url,
485
+ "error_reason": "transcription_payload_empty",
486
+ "transcript_text": "",
487
+ }
488
+
489
+
490
+ def hydrate_u2_batch_results_from_transcription_urls(
491
+ *,
492
+ mapped_results: Dict[str, Dict[str, Any]],
493
+ timeout_ms: int,
494
+ ) -> Dict[str, Dict[str, Any]]:
495
+ hydrated: Dict[str, Dict[str, Any]] = {}
496
+ fetch_timeout_ms = max(1000, min(int(timeout_ms), 15000))
497
+
498
+ for file_url, item in mapped_results.items():
499
+ if not isinstance(item, dict):
500
+ continue
501
+
502
+ candidate = dict(item)
503
+ status = _status_upper(candidate.get("task_status"))
504
+ transcript = clean_transcript_text(candidate.get("transcript_text"))
505
+ transcription_url = normalize_text(candidate.get("transcription_url"))
506
+
507
+ if not transcript and _is_success_status(status) and transcription_url:
508
+ fetch_result = fetch_transcription_text_by_url(
509
+ transcription_url=transcription_url,
510
+ timeout_ms=fetch_timeout_ms,
511
+ )
512
+ fetched_text = clean_transcript_text(fetch_result.get("transcript_text"))
513
+ candidate["transcription_fetch"] = {
514
+ "ok": bool(fetch_result.get("ok")),
515
+ "error_reason": fetch_result.get("error_reason"),
516
+ }
517
+ if fetched_text:
518
+ transcript = fetched_text
519
+ candidate["transcript_text"] = fetched_text
520
+ elif not candidate.get("error_reason"):
521
+ candidate["error_reason"] = fetch_result.get("error_reason") or "transcription_payload_empty"
522
+
523
+ candidate["task_status"] = status
524
+ candidate["transcription_url"] = transcription_url
525
+ candidate["transcript_text"] = transcript
526
+ candidate["ok"] = bool(candidate.get("ok") or transcript)
527
+ hydrated[file_url] = candidate
528
+
529
+ return hydrated
530
+
531
+
532
+ def build_u2_batch_progress(*, payload: Any, expected_total: int = 0) -> Dict[str, Any]:
533
+ metrics_raw = extract_u2_task_metrics(payload)
534
+ metrics = {str(key).strip().upper(): value for key, value in metrics_raw.items()} if isinstance(metrics_raw, dict) else {}
535
+
536
+ metrics_total = _safe_int(metrics.get("TOTAL") or metrics.get("TASK_TOTAL") or metrics.get("COUNT"))
537
+ metrics_succeeded = _safe_int(metrics.get("SUCCEEDED") or metrics.get("SUCCESS"))
538
+ metrics_failed = (
539
+ _safe_int(metrics.get("FAILED"))
540
+ + _safe_int(metrics.get("FAILURE"))
541
+ + _safe_int(metrics.get("ERROR"))
542
+ + _safe_int(metrics.get("CANCELED"))
543
+ + _safe_int(metrics.get("CANCELLED"))
544
+ )
545
+ metrics_completed = metrics_succeeded + metrics_failed
546
+
547
+ provider_total = _safe_int(deep_find_first(payload, ["input_count", "total_count"]))
548
+ provider_succeeded = _safe_int(deep_find_first(payload, ["succeeded_count"]))
549
+ provider_failed = _safe_int(deep_find_first(payload, ["failed_count"]))
550
+ provider_pending = max(0, _safe_int(deep_find_first(payload, ["pending_count"])))
551
+ provider_completed = provider_succeeded + provider_failed
552
+ provider_status = extract_platform_task_status(payload)
553
+
554
+ mapped_results = map_u2_batch_results_by_file_url(payload)
555
+ result_total = len(mapped_results)
556
+ result_succeeded = 0
557
+ result_failed = 0
558
+
559
+ for item in mapped_results.values():
560
+ status = _status_upper(item.get("task_status"))
561
+ transcript = clean_transcript_text(item.get("transcript_text"))
562
+ if _is_success_status(status) or transcript:
563
+ result_succeeded += 1
564
+ elif _is_failed_status(status):
565
+ result_failed += 1
566
+
567
+ result_completed = result_succeeded + result_failed
568
+
569
+ target_total = metrics_total if metrics_total > 0 else (provider_total if provider_total > 0 else max(0, int(expected_total or 0)))
570
+ complete_by_metrics = target_total > 0 and metrics_completed >= target_total
571
+ complete_by_provider_counts = target_total > 0 and provider_pending == 0 and provider_completed >= target_total
572
+ complete_by_provider_status = provider_pending == 0 and provider_status in {"SUCCEEDED", "PARTIAL_SUCCEEDED", "FAILED"}
573
+ complete_by_results = target_total > 0 and result_completed >= target_total
574
+
575
+ completion_basis = "pending"
576
+ if complete_by_metrics:
577
+ completion_basis = "task_metrics"
578
+ elif complete_by_provider_counts or complete_by_provider_status:
579
+ completion_basis = "platform_status"
580
+ elif complete_by_results:
581
+ completion_basis = "results"
582
+
583
+ return {
584
+ "expected_total": max(0, int(expected_total or 0)),
585
+ "target_total": target_total,
586
+ "metrics_total": metrics_total,
587
+ "metrics_succeeded": metrics_succeeded,
588
+ "metrics_failed": metrics_failed,
589
+ "metrics_completed": metrics_completed,
590
+ "provider_total": provider_total,
591
+ "provider_succeeded": provider_succeeded,
592
+ "provider_failed": provider_failed,
593
+ "provider_pending": provider_pending,
594
+ "platform_task_status": provider_status,
595
+ "results_total": result_total,
596
+ "results_succeeded": result_succeeded,
597
+ "results_failed": result_failed,
598
+ "results_completed": result_completed,
599
+ "complete": bool(complete_by_metrics or complete_by_provider_counts or complete_by_provider_status or complete_by_results),
600
+ "completion_basis": completion_basis,
601
+ "metrics": metrics_raw if isinstance(metrics_raw, dict) else {},
602
+ }
603
+
604
+
605
+ def poll_u2_task_core(
606
+ *,
607
+ base_url: str,
608
+ token: str,
609
+ timeout_ms: int,
610
+ task_id: str,
611
+ poll_interval_sec: float,
612
+ max_polls: int,
613
+ require_batch_complete: bool = False,
614
+ expected_total: int = 0,
615
+ ) -> Dict[str, Any]:
616
+ trace = []
617
+ last_request_id = None
618
+
619
+ last_status = "UNKNOWN"
620
+ last_payload: Any = {}
621
+ last_batch_results: Dict[str, Dict[str, Any]] = {}
622
+ last_metrics: Dict[str, Any] = {}
623
+ last_progress: Dict[str, Any] = {
624
+ "expected_total": max(0, int(expected_total or 0)),
625
+ "target_total": 0,
626
+ "complete": False,
627
+ "completion_basis": "pending",
628
+ }
629
+
630
+ for attempt in range(1, max_polls + 1):
631
+ response = call_json_api(
632
+ base_url=base_url,
633
+ path=f"/api/u2/v1/tasks/{task_id}",
634
+ token=token,
635
+ method="POST",
636
+ timeout_ms=timeout_ms,
637
+ )
638
+
639
+ payload = response.get("data")
640
+ status = extract_task_status(payload)
641
+ platform_status = extract_platform_task_status(payload)
642
+ pending_count = extract_pending_count(payload)
643
+ last_request_id = response.get("request_id") or last_request_id
644
+
645
+ metrics = extract_u2_task_metrics(payload)
646
+ batch_results = map_u2_batch_results_by_file_url(payload)
647
+ batch_progress = build_u2_batch_progress(payload=payload, expected_total=expected_total)
648
+
649
+ effective_status = platform_status or status
650
+ last_status = effective_status or last_status
651
+ last_payload = payload
652
+ last_batch_results = batch_results
653
+ last_metrics = metrics if isinstance(metrics, dict) else {}
654
+ last_progress = batch_progress
655
+
656
+ trace.append(
657
+ {
658
+ "attempt": attempt,
659
+ "status_code": response.get("status_code"),
660
+ "task_status": status,
661
+ "platform_task_status": platform_status,
662
+ "pending_count": pending_count,
663
+ "request_id": response.get("request_id"),
664
+ "error_reason": response.get("error_reason"),
665
+ "batch_progress": batch_progress,
666
+ }
667
+ )
668
+
669
+ if not response.get("ok"):
670
+ if attempt < max_polls:
671
+ time.sleep(max(poll_interval_sec, 0.2))
672
+ continue
673
+ return {
674
+ "ok": False,
675
+ "task_id": task_id,
676
+ "task_status": status or "UNKNOWN",
677
+ "request_id": last_request_id,
678
+ "error_reason": response.get("error_reason") or "u2_poll_http_error",
679
+ "raw_task": payload,
680
+ "task_metrics": last_metrics,
681
+ "batch_results": batch_results,
682
+ "batch_progress": batch_progress,
683
+ "batch_complete": bool(batch_progress.get("complete")),
684
+ "trace": trace,
685
+ }
686
+
687
+ status_terminal = is_terminal_status(status)
688
+ platform_terminal = pending_count == 0 and platform_status in {"SUCCEEDED", "PARTIAL_SUCCEEDED", "FAILED"}
689
+ task_complete = status_terminal or platform_terminal
690
+ batch_complete = bool(batch_progress.get("complete")) if require_batch_complete else task_complete
691
+
692
+ if require_batch_complete and not batch_complete:
693
+ if attempt < max_polls:
694
+ time.sleep(max(poll_interval_sec, 0.2))
695
+ continue
696
+ return {
697
+ "ok": False,
698
+ "task_id": task_id,
699
+ "task_status": effective_status or "UNKNOWN",
700
+ "request_id": last_request_id,
701
+ "error_reason": "u2_batch_incomplete_timeout",
702
+ "raw_task": payload,
703
+ "task_metrics": last_metrics,
704
+ "batch_results": batch_results,
705
+ "batch_progress": batch_progress,
706
+ "batch_complete": False,
707
+ "trace": trace,
708
+ }
709
+
710
+ if task_complete or batch_complete:
711
+ success_signal = (
712
+ platform_status == "SUCCEEDED" and pending_count == 0
713
+ ) or _is_success_status(status)
714
+ transcript = extract_transcript_text(payload) if success_signal else ""
715
+ return {
716
+ "ok": bool(success_signal),
717
+ "task_id": task_id,
718
+ "task_status": effective_status or status,
719
+ "platform_task_status": platform_status,
720
+ "pending_count": pending_count,
721
+ "request_id": last_request_id,
722
+ "error_reason": None if success_signal else (None if batch_complete and require_batch_complete else "u2_task_failed"),
723
+ "transcript_text": clean_transcript_text(transcript),
724
+ "raw_task": payload,
725
+ "task_metrics": last_metrics,
726
+ "batch_results": batch_results,
727
+ "batch_progress": batch_progress,
728
+ "batch_complete": bool(batch_complete),
729
+ "trace": trace,
730
+ }
731
+
732
+ time.sleep(max(poll_interval_sec, 0.2))
733
+
734
+ timeout_reason = "u2_batch_incomplete_timeout" if require_batch_complete else "u2_poll_timeout"
735
+ return {
736
+ "ok": False,
737
+ "task_id": task_id,
738
+ "task_status": last_status or "TIMEOUT",
739
+ "request_id": last_request_id,
740
+ "error_reason": timeout_reason,
741
+ "raw_task": last_payload,
742
+ "task_metrics": last_metrics,
743
+ "batch_results": last_batch_results,
744
+ "batch_progress": last_progress,
745
+ "batch_complete": bool(last_progress.get("complete")) if require_batch_complete else False,
746
+ "trace": trace,
747
+ }
748
+
749
+
750
+ def normalize_media_url(url: str) -> str:
751
+ text = str(url or "").strip()
752
+ if not text:
753
+ return ""
754
+ try:
755
+ parsed = urlparse(text)
756
+ except Exception:
757
+ return text
758
+
759
+ scheme = (parsed.scheme or "").lower()
760
+ if scheme == "http":
761
+ parsed = parsed._replace(scheme="https")
762
+ return urlunparse(parsed)
763
+
764
+
765
+ def is_valid_u2_media_candidate(url: str) -> bool:
766
+ lower = str(url or "").lower()
767
+ if not (lower.startswith("http://") or lower.startswith("https://")):
768
+ return False
769
+ image_tokens = [".jpg", ".jpeg", ".png", ".webp", "imageview2", "imagemogr2", "redimage", "frame/"]
770
+ if any(token in lower for token in image_tokens):
771
+ return False
772
+ media_tokens = [".mp4", ".m3u8", ".m4a", ".mp3", "video", "stream", "audio", "vod"]
773
+ return any(token in lower for token in media_tokens)
774
+
775
+
776
+ def normalize_media_candidates(candidates: List[str]) -> List[str]:
777
+ normalized: List[str] = []
778
+ seen = set()
779
+ for raw in candidates or []:
780
+ url = normalize_media_url(raw)
781
+ if not url or url in seen:
782
+ continue
783
+ seen.add(url)
784
+ normalized.append(url)
785
+ return normalized
786
+
787
+
788
+ def run_u2_asr_candidates_with_timeout_retry(
789
+ *,
790
+ base_url: str,
791
+ token: str,
792
+ timeout_ms: int,
793
+ candidates: List[str],
794
+ submit_max_retries: int,
795
+ submit_backoff_ms: int,
796
+ poll_interval_sec: float,
797
+ max_polls: int,
798
+ timeout_retry_enabled: bool = True,
799
+ timeout_retry_max_retries: int = 3,
800
+ ) -> Dict[str, Any]:
801
+ normalized_candidates = normalize_media_candidates(candidates)
802
+ attempts: List[Dict[str, Any]] = []
803
+
804
+ final_bundle: Dict[str, Any] = {
805
+ "submit_bundle": {},
806
+ "poll_result": {"ok": False, "task_status": "UNKNOWN", "error_reason": "no_candidates"},
807
+ "rounds": [],
808
+ "timeout_retry": {"enabled": bool(timeout_retry_enabled), "configured_max_retries": max(0, min(3, int(timeout_retry_max_retries))), "triggered": False, "result": "not_triggered"},
809
+ }
810
+ chosen_url: Optional[str] = None
811
+
812
+ for index, candidate in enumerate(normalized_candidates, start=1):
813
+ valid = is_valid_u2_media_candidate(candidate)
814
+ if not valid:
815
+ attempts.append({
816
+ "index": index,
817
+ "candidate": candidate,
818
+ "valid": False,
819
+ "result": "skipped_non_media_candidate",
820
+ })
821
+ continue
822
+
823
+ bundle = run_u2_asr_with_timeout_retry(
824
+ base_url=base_url,
825
+ token=token,
826
+ timeout_ms=timeout_ms,
827
+ video_url=candidate,
828
+ submit_max_retries=submit_max_retries,
829
+ submit_backoff_ms=submit_backoff_ms,
830
+ poll_interval_sec=poll_interval_sec,
831
+ max_polls=max_polls,
832
+ timeout_retry_enabled=timeout_retry_enabled,
833
+ timeout_retry_max_retries=timeout_retry_max_retries,
834
+ )
835
+ poll_result = bundle.get("poll_result", {})
836
+ error_reason = str(poll_result.get("error_reason") or "")
837
+ ok = bool(poll_result.get("ok"))
838
+
839
+ attempts.append({
840
+ "index": index,
841
+ "candidate": candidate,
842
+ "valid": True,
843
+ "ok": ok,
844
+ "error_reason": error_reason,
845
+ "task_status": poll_result.get("task_status"),
846
+ })
847
+
848
+ final_bundle = bundle
849
+ chosen_url = candidate
850
+ if ok:
851
+ break
852
+
853
+ if error_reason == "INVALID_SOURCE_URL":
854
+ continue
855
+
856
+ final_bundle["candidate_attempts"] = attempts
857
+ final_bundle["chosen_candidate"] = chosen_url
858
+ final_bundle["normalized_candidates"] = normalized_candidates
859
+ return final_bundle
860
+
861
+
862
+ def run_u2_asr_batch_with_timeout_retry(
863
+ *,
864
+ base_url: str,
865
+ token: str,
866
+ timeout_ms: int,
867
+ file_urls: List[str],
868
+ submit_max_retries: int,
869
+ submit_backoff_ms: int,
870
+ poll_interval_sec: float,
871
+ max_polls: int,
872
+ timeout_retry_enabled: bool = True,
873
+ timeout_retry_max_retries: int = 3,
874
+ ) -> Dict[str, Any]:
875
+ normalized_urls = normalize_media_candidates(file_urls)
876
+ limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
877
+
878
+ conservative_retries = max(0, min(3, int(timeout_retry_max_retries)))
879
+ retries = conservative_retries if timeout_retry_enabled else 0
880
+ max_rounds = 1 + retries
881
+
882
+ rounds: List[Dict[str, Any]] = []
883
+ final_submit_bundle: Dict[str, Any] = {}
884
+ final_poll_result: Dict[str, Any] = {
885
+ "ok": False,
886
+ "task_status": "UNKNOWN",
887
+ "error_reason": "u2_submit_failed_or_missing_task_id",
888
+ "task_metrics": {},
889
+ "batch_results": {},
890
+ }
891
+ timeout_retry_triggered = False
892
+ timeout_retry_result = "not_triggered"
893
+
894
+ if not limited_urls:
895
+ return {
896
+ "submit_bundle": {
897
+ "submit_response": {"ok": False, "error_reason": "no_valid_file_urls"},
898
+ "task_id": None,
899
+ "retry_chain": [],
900
+ "final_submit_status": "failed_no_valid_file_urls",
901
+ "file_urls": [],
902
+ },
903
+ "poll_result": {
904
+ "ok": False,
905
+ "task_status": "UNKNOWN",
906
+ "error_reason": "no_valid_file_urls",
907
+ "task_metrics": {},
908
+ "batch_results": {},
909
+ },
910
+ "rounds": [],
911
+ "timeout_retry": {
912
+ "enabled": bool(timeout_retry_enabled),
913
+ "configured_max_retries": conservative_retries,
914
+ "triggered": False,
915
+ "result": "not_triggered",
916
+ },
917
+ "normalized_file_urls": [],
918
+ "mapped_results": {},
919
+ "result_items": [],
920
+ "task_metrics": {},
921
+ }
922
+
923
+ for round_index in range(1, max_rounds + 1):
924
+ submit_bundle = submit_u2_asr_batch_with_retry(
925
+ base_url=base_url,
926
+ token=token,
927
+ timeout_ms=timeout_ms,
928
+ file_urls=limited_urls,
929
+ max_retries=submit_max_retries,
930
+ backoff_ms=submit_backoff_ms,
931
+ )
932
+ submit_response = submit_bundle.get("submit_response", {})
933
+ task_id = submit_bundle.get("task_id")
934
+
935
+ poll_result: Dict[str, Any]
936
+ if submit_response.get("ok") and task_id:
937
+ poll_result = poll_u2_task_core(
938
+ base_url=base_url,
939
+ token=token,
940
+ timeout_ms=timeout_ms,
941
+ task_id=str(task_id),
942
+ poll_interval_sec=poll_interval_sec,
943
+ max_polls=max_polls,
944
+ require_batch_complete=True,
945
+ expected_total=len(limited_urls),
946
+ )
947
+ else:
948
+ poll_result = {
949
+ "ok": False,
950
+ "task_id": task_id,
951
+ "task_status": "UNKNOWN",
952
+ "request_id": submit_response.get("request_id"),
953
+ "error_reason": submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id",
954
+ "task_metrics": {},
955
+ "batch_results": {},
956
+ "batch_progress": {
957
+ "expected_total": len(limited_urls),
958
+ "target_total": len(limited_urls),
959
+ "complete": False,
960
+ "completion_basis": "pending",
961
+ },
962
+ "batch_complete": False,
963
+ "trace": [],
964
+ }
965
+
966
+ rounds.append(
967
+ {
968
+ "round": round_index,
969
+ "submit": {
970
+ "task_id": task_id,
971
+ "final_submit_status": submit_bundle.get("final_submit_status"),
972
+ "request_id": submit_response.get("request_id"),
973
+ "status_code": submit_response.get("status_code"),
974
+ "ok": submit_response.get("ok"),
975
+ "error_reason": submit_response.get("error_reason"),
976
+ "retry_chain": submit_bundle.get("retry_chain", []),
977
+ "file_url_count": len(limited_urls),
978
+ },
979
+ "poll": {
980
+ "task_id": poll_result.get("task_id") or task_id,
981
+ "task_status": poll_result.get("task_status"),
982
+ "request_id": poll_result.get("request_id"),
983
+ "ok": poll_result.get("ok"),
984
+ "error_reason": poll_result.get("error_reason"),
985
+ "attempts": len(poll_result.get("trace", [])),
986
+ "task_metrics": poll_result.get("task_metrics", {}),
987
+ "batch_complete": bool(poll_result.get("batch_complete")),
988
+ "batch_progress": poll_result.get("batch_progress", {}),
989
+ },
990
+ }
991
+ )
992
+
993
+ final_submit_bundle = submit_bundle
994
+ final_poll_result = poll_result
995
+
996
+ if poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"} and round_index < max_rounds:
997
+ timeout_retry_triggered = True
998
+ timeout_retry_result = "retrying"
999
+ continue
1000
+
1001
+ break
1002
+
1003
+ if final_poll_result.get("ok"):
1004
+ timeout_retry_result = "retry_succeeded" if timeout_retry_triggered else "not_needed"
1005
+ elif final_poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"}:
1006
+ timeout_retry_result = "retry_timeout_exhausted" if timeout_retry_triggered else "timeout_no_retry"
1007
+ elif timeout_retry_triggered:
1008
+ timeout_retry_result = "retry_failed_non_timeout"
1009
+ else:
1010
+ timeout_retry_result = "not_triggered"
1011
+
1012
+ raw_task_payload = final_poll_result.get("raw_task")
1013
+ mapped_results = map_u2_batch_results_by_file_url(raw_task_payload)
1014
+
1015
+ index_mapped = map_u2_batch_results_by_item_index(raw_task_payload)
1016
+ for item_index, item in index_mapped.items():
1017
+ if item_index < 0 or item_index >= len(limited_urls):
1018
+ continue
1019
+ file_url = normalize_media_url(limited_urls[item_index])
1020
+ if not file_url:
1021
+ continue
1022
+
1023
+ candidate = {
1024
+ "file_url": file_url,
1025
+ "transcript_text": clean_transcript_text(item.get("transcript_text")),
1026
+ "task_status": _status_upper(item.get("task_status")),
1027
+ "error_reason": str(item.get("error_reason") or "").strip(),
1028
+ "transcription_url": normalize_text(item.get("transcription_url")),
1029
+ "ok": bool(item.get("ok")),
1030
+ }
1031
+
1032
+ existing = mapped_results.get(file_url)
1033
+ if existing is None:
1034
+ mapped_results[file_url] = candidate
1035
+ continue
1036
+
1037
+ old_score = (
1038
+ 1 if existing.get("ok") else 0,
1039
+ len(str(existing.get("transcript_text") or "")),
1040
+ 1 if existing.get("transcription_url") else 0,
1041
+ 1 if not existing.get("error_reason") else 0,
1042
+ )
1043
+ new_score = (
1044
+ 1 if candidate.get("ok") else 0,
1045
+ len(str(candidate.get("transcript_text") or "")),
1046
+ 1 if candidate.get("transcription_url") else 0,
1047
+ 1 if not candidate.get("error_reason") else 0,
1048
+ )
1049
+ if new_score > old_score:
1050
+ mapped_results[file_url] = candidate
1051
+
1052
+ mapped_results = hydrate_u2_batch_results_from_transcription_urls(
1053
+ mapped_results=mapped_results,
1054
+ timeout_ms=timeout_ms,
1055
+ )
1056
+ result_items = list(mapped_results.values())
1057
+
1058
+ return {
1059
+ "submit_bundle": final_submit_bundle,
1060
+ "poll_result": final_poll_result,
1061
+ "rounds": rounds,
1062
+ "timeout_retry": {
1063
+ "enabled": bool(timeout_retry_enabled),
1064
+ "configured_max_retries": conservative_retries,
1065
+ "triggered": timeout_retry_triggered,
1066
+ "result": timeout_retry_result,
1067
+ },
1068
+ "normalized_file_urls": limited_urls,
1069
+ "mapped_results": mapped_results,
1070
+ "result_items": result_items,
1071
+ "task_metrics": final_poll_result.get("task_metrics") if isinstance(final_poll_result.get("task_metrics"), dict) else extract_u2_task_metrics(raw_task_payload),
1072
+ "batch_progress": final_poll_result.get("batch_progress") if isinstance(final_poll_result.get("batch_progress"), dict) else build_u2_batch_progress(payload=raw_task_payload, expected_total=len(limited_urls)),
1073
+ "batch_complete": bool(final_poll_result.get("batch_complete")),
1074
+ }
1075
+
1076
+
1077
+ def run_u2_asr_with_timeout_retry(
1078
+ *,
1079
+ base_url: str,
1080
+ token: str,
1081
+ timeout_ms: int,
1082
+ video_url: str,
1083
+ submit_max_retries: int,
1084
+ submit_backoff_ms: int,
1085
+ poll_interval_sec: float,
1086
+ max_polls: int,
1087
+ timeout_retry_enabled: bool = True,
1088
+ timeout_retry_max_retries: int = 3,
1089
+ ) -> Dict[str, Any]:
1090
+ video_url = normalize_media_url(video_url)
1091
+ conservative_retries = max(0, min(3, int(timeout_retry_max_retries)))
1092
+ retries = conservative_retries if timeout_retry_enabled else 0
1093
+ max_rounds = 1 + retries
1094
+
1095
+ rounds: List[Dict[str, Any]] = []
1096
+ final_submit_bundle: Dict[str, Any] = {}
1097
+ final_poll_result: Dict[str, Any] = {
1098
+ "ok": False,
1099
+ "task_status": "UNKNOWN",
1100
+ "error_reason": "u2_submit_failed_or_missing_task_id",
1101
+ }
1102
+ timeout_retry_triggered = False
1103
+ timeout_retry_result = "not_triggered"
1104
+
1105
+ for round_index in range(1, max_rounds + 1):
1106
+ submit_bundle = submit_u2_asr_with_retry(
1107
+ base_url=base_url,
1108
+ token=token,
1109
+ timeout_ms=timeout_ms,
1110
+ video_url=video_url,
1111
+ max_retries=submit_max_retries,
1112
+ backoff_ms=submit_backoff_ms,
1113
+ )
1114
+ submit_response = submit_bundle.get("submit_response", {})
1115
+ task_id = submit_bundle.get("task_id")
1116
+
1117
+ poll_result: Dict[str, Any]
1118
+ if submit_response.get("ok") and task_id:
1119
+ poll_result = poll_u2_task_core(
1120
+ base_url=base_url,
1121
+ token=token,
1122
+ timeout_ms=timeout_ms,
1123
+ task_id=str(task_id),
1124
+ poll_interval_sec=poll_interval_sec,
1125
+ max_polls=max_polls,
1126
+ )
1127
+ else:
1128
+ poll_result = {
1129
+ "ok": False,
1130
+ "task_id": task_id,
1131
+ "task_status": "UNKNOWN",
1132
+ "request_id": submit_response.get("request_id"),
1133
+ "error_reason": submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id",
1134
+ "trace": [],
1135
+ }
1136
+
1137
+ rounds.append(
1138
+ {
1139
+ "round": round_index,
1140
+ "submit": {
1141
+ "task_id": task_id,
1142
+ "final_submit_status": submit_bundle.get("final_submit_status"),
1143
+ "request_id": submit_response.get("request_id"),
1144
+ "status_code": submit_response.get("status_code"),
1145
+ "ok": submit_response.get("ok"),
1146
+ "error_reason": submit_response.get("error_reason"),
1147
+ "retry_chain": submit_bundle.get("retry_chain", []),
1148
+ },
1149
+ "poll": {
1150
+ "task_id": poll_result.get("task_id") or task_id,
1151
+ "task_status": poll_result.get("task_status"),
1152
+ "request_id": poll_result.get("request_id"),
1153
+ "ok": poll_result.get("ok"),
1154
+ "error_reason": poll_result.get("error_reason"),
1155
+ "attempts": len(poll_result.get("trace", [])),
1156
+ },
1157
+ }
1158
+ )
1159
+
1160
+ final_submit_bundle = submit_bundle
1161
+ final_poll_result = poll_result
1162
+
1163
+ if poll_result.get("error_reason") == "u2_poll_timeout" and round_index < max_rounds:
1164
+ timeout_retry_triggered = True
1165
+ timeout_retry_result = "retrying"
1166
+ continue
1167
+
1168
+ break
1169
+
1170
+ if final_poll_result.get("ok"):
1171
+ timeout_retry_result = "retry_succeeded" if timeout_retry_triggered else "not_needed"
1172
+ elif final_poll_result.get("error_reason") == "u2_poll_timeout":
1173
+ timeout_retry_result = "retry_timeout_exhausted" if timeout_retry_triggered else "timeout_no_retry"
1174
+ elif timeout_retry_triggered:
1175
+ timeout_retry_result = "retry_failed_non_timeout"
1176
+ else:
1177
+ timeout_retry_result = "not_triggered"
1178
+
1179
+ return {
1180
+ "submit_bundle": final_submit_bundle,
1181
+ "poll_result": final_poll_result,
1182
+ "rounds": rounds,
1183
+ "timeout_retry": {
1184
+ "enabled": bool(timeout_retry_enabled),
1185
+ "configured_max_retries": conservative_retries,
1186
+ "triggered": timeout_retry_triggered,
1187
+ "result": timeout_retry_result,
1188
+ },
1189
+ }