@tikomni/skills 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (474) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -5
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +98 -2
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +106 -141
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{single-work-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +3 -1
  30. package/skills/{creator-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  33. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  34. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  36. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  37. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  38. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  39. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +58 -149
  40. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  41. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +30 -119
  42. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  43. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  45. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  46. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  47. package/skills/creator-analysis/SKILL.md +0 -95
  48. package/skills/creator-analysis/agents/openai.yaml +0 -4
  49. package/skills/creator-analysis/env.example +0 -36
  50. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  51. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  52. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  53. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  54. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  55. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  56. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  57. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  59. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  60. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  61. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  63. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  64. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  65. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  66. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  67. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  68. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  69. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  71. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  72. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  73. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  74. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  75. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  76. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  77. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  78. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  79. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  80. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  81. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  82. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  83. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  84. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  85. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  86. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  87. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  88. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  89. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  90. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  91. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  92. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  93. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  94. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  96. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  100. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  101. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  103. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  104. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  105. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  106. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  107. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  108. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  109. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  110. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  113. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  114. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  116. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  117. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  118. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  119. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  120. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  121. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  122. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  123. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  124. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  125. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  126. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  127. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  128. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  129. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  130. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  131. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  132. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  133. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  134. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  135. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  136. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  137. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  138. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  139. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  140. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  141. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  142. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  143. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  144. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  145. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  146. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  147. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  148. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  149. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  153. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  154. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  155. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  156. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  157. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  158. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  159. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  160. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  161. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  162. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  163. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  164. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  165. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  167. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  168. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  169. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  170. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  171. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  172. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  173. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  174. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  175. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  176. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  177. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  178. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  179. package/skills/creator-analysis/references/workflow.md +0 -23
  180. package/skills/creator-analysis/scripts/__init__.py +0 -0
  181. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  185. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  186. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  187. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  188. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  189. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  190. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  191. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  194. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  195. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  196. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  197. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  198. package/skills/creator-analysis/scripts/core/tikomni_common.py +0 -588
  199. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  200. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  202. package/skills/creator-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  203. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  205. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  207. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  208. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  209. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  210. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  211. package/skills/meta-capability/SKILL.md +0 -69
  212. package/skills/meta-capability/agents/openai.yaml +0 -4
  213. package/skills/meta-capability/env.example +0 -42
  214. package/skills/meta-capability/references/api-capability-index.md +0 -92
  215. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  216. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  217. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  218. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  219. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  220. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  222. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  223. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  224. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  226. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  227. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  228. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  229. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  230. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  231. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  232. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  233. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  234. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  235. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  236. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  237. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  238. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  239. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  240. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  241. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  242. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  243. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  244. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  245. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  246. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  247. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  248. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  249. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  250. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  251. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  252. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  253. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  254. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  255. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  256. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  257. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  258. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  259. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  263. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  264. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  265. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  266. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  267. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  268. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  269. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  270. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  271. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  272. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  273. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  274. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  276. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  277. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  279. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  280. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  281. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  282. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  283. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  284. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  285. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  286. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  287. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  288. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  289. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  290. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  291. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  292. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  293. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  294. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  295. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  296. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  297. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  298. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  299. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  300. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  301. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  302. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  303. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  304. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  305. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  306. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  307. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  308. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  309. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  310. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  311. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  312. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  316. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  317. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  318. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  319. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  320. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  321. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  322. package/skills/meta-capability/references/dispatch.md +0 -27
  323. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  324. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  325. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  326. package/skills/meta-capability/scripts/__init__.py +0 -1
  327. package/skills/meta-capability/scripts/call_route.py +0 -141
  328. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  329. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  330. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  331. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  332. package/skills/meta-capability/scripts/test_auth.py +0 -98
  333. package/skills/single-work-analysis/SKILL.md +0 -62
  334. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  335. package/skills/single-work-analysis/env.example +0 -36
  336. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  337. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  339. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  340. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  341. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  342. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  344. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  345. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  346. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  348. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  349. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  350. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  353. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  354. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  356. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  357. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  358. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  359. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  360. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  361. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  362. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  363. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  364. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  366. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  373. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  375. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  376. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  377. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  378. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  379. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  381. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  385. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  386. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  388. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  389. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  390. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  391. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  392. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  393. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  394. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  395. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  398. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  399. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  401. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  402. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  403. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  404. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  405. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  406. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  407. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  409. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  410. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  411. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  412. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  413. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  414. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  415. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  416. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  417. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  418. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  419. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  420. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  421. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  422. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  423. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  424. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  425. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  426. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  427. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  428. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  429. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  430. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  431. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  432. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  434. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  438. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  439. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  441. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  442. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  443. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  444. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -47
  445. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  446. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  447. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  448. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  449. package/skills/single-work-analysis/references/prompt-contracts/analysis-bundle.md +0 -82
  450. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  451. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  452. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  453. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  454. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +0 -384
  455. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -456
  456. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  457. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  458. package/skills/single-work-analysis/scripts/core/progress_report.py +0 -258
  459. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -220
  460. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  461. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  462. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  463. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  464. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  465. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  466. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  468. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -752
  469. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  470. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  471. /package/skills/{single-work-analysis → social-media-crawl}/scripts/core/tikomni_common.py +0 -0
  472. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  473. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  474. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -1,1189 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Shared ASR pipeline helpers for runner scripts."""
3
-
4
- import json
5
- import time
6
- import urllib.error
7
- import urllib.request
8
- from urllib.parse import urlparse, urlunparse
9
- from typing import Any, Dict, List, Optional
10
-
11
- from scripts.core.tikomni_common import (
12
- call_json_api,
13
- deep_find_first,
14
- extract_task_id,
15
- extract_task_status,
16
- extract_transcript_text,
17
- is_terminal_status,
18
- normalize_text,
19
- )
20
-
21
- U2_BATCH_SUBMIT_HARD_LIMIT = 100
22
-
23
-
24
- def clamp_u2_batch_submit_size(size: int, *, default: int = 50, hard_limit: int = U2_BATCH_SUBMIT_HARD_LIMIT) -> int:
25
- try:
26
- parsed = int(size)
27
- except Exception:
28
- parsed = int(default)
29
- parsed = max(1, parsed)
30
- return min(parsed, max(1, int(hard_limit)))
31
-
32
-
33
- def submit_u2_asr(
34
- *,
35
- base_url: str,
36
- token: str,
37
- timeout_ms: int,
38
- video_url: str,
39
- ) -> Dict[str, Any]:
40
- return submit_u2_asr_batch(
41
- base_url=base_url,
42
- token=token,
43
- timeout_ms=timeout_ms,
44
- file_urls=[video_url],
45
- )
46
-
47
-
48
- def submit_u2_asr_batch(
49
- *,
50
- base_url: str,
51
- token: str,
52
- timeout_ms: int,
53
- file_urls: List[str],
54
- ) -> Dict[str, Any]:
55
- normalized_urls = normalize_media_candidates(file_urls)
56
- limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
57
- return call_json_api(
58
- base_url=base_url,
59
- path="/api/u2/v1/services/audio/asr/transcription",
60
- token=token,
61
- method="POST",
62
- timeout_ms=timeout_ms,
63
- body={"input": {"file_urls": limited_urls}},
64
- )
65
-
66
-
67
- def is_retriable_submit_failure(response: Dict[str, Any]) -> bool:
68
- status_code = response.get("status_code")
69
- if isinstance(status_code, str) and status_code.isdigit():
70
- status_code = int(status_code)
71
- if isinstance(status_code, (int, float)) and int(status_code) in {502, 503, 504}:
72
- return True
73
-
74
- error_reason = str(response.get("error_reason") or "").upper()
75
- return "UPSTREAM_TIMEOUT" in error_reason or "TIMEOUT" in error_reason
76
-
77
-
78
- def submit_u2_asr_batch_with_retry(
79
- *,
80
- base_url: str,
81
- token: str,
82
- timeout_ms: int,
83
- file_urls: List[str],
84
- max_retries: int,
85
- backoff_ms: int,
86
- ) -> Dict[str, Any]:
87
- retries = max(0, int(max_retries))
88
- base_backoff = max(0, int(backoff_ms))
89
- max_attempts = 1 + retries
90
-
91
- normalized_urls = normalize_media_candidates(file_urls)
92
- limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
93
- if not limited_urls:
94
- return {
95
- "submit_response": {
96
- "ok": False,
97
- "status_code": None,
98
- "error_reason": "no_valid_file_urls",
99
- "data": {},
100
- "request_id": None,
101
- },
102
- "task_id": None,
103
- "retry_chain": [],
104
- "final_submit_status": "failed_no_valid_file_urls",
105
- "file_urls": [],
106
- }
107
-
108
- retry_chain: List[Dict[str, Any]] = []
109
- final_response: Dict[str, Any] = {}
110
- final_task_id: Optional[str] = None
111
- final_submit_status = "failed_unknown"
112
-
113
- for attempt in range(1, max_attempts + 1):
114
- wait_ms = 0 if attempt == 1 else base_backoff * (2 ** (attempt - 2))
115
- if wait_ms > 0:
116
- time.sleep(wait_ms / 1000.0)
117
-
118
- submit_response = submit_u2_asr_batch(
119
- base_url=base_url,
120
- token=token,
121
- timeout_ms=timeout_ms,
122
- file_urls=limited_urls,
123
- )
124
- task_id = extract_task_id(submit_response.get("data"))
125
- retriable = is_retriable_submit_failure(submit_response)
126
-
127
- retry_chain.append(
128
- {
129
- "attempt": attempt,
130
- "wait_ms": wait_ms,
131
- "status_code": submit_response.get("status_code"),
132
- "error_reason": submit_response.get("error_reason"),
133
- "ok": submit_response.get("ok"),
134
- "task_id": task_id,
135
- "retriable": retriable,
136
- "file_url_count": len(limited_urls),
137
- }
138
- )
139
-
140
- final_response = submit_response
141
- final_task_id = task_id
142
-
143
- if submit_response.get("ok") and task_id:
144
- final_submit_status = "success"
145
- break
146
-
147
- if submit_response.get("ok") and not task_id:
148
- final_submit_status = "failed_missing_task_id"
149
- break
150
-
151
- if retriable and attempt < max_attempts:
152
- final_submit_status = "retrying"
153
- continue
154
-
155
- final_submit_status = "failed_retries_exhausted" if retriable else "failed_non_retriable"
156
- break
157
-
158
- return {
159
- "submit_response": final_response,
160
- "task_id": final_task_id,
161
- "retry_chain": retry_chain,
162
- "final_submit_status": final_submit_status,
163
- "file_urls": limited_urls,
164
- }
165
-
166
-
167
- def submit_u2_asr_with_retry(
168
- *,
169
- base_url: str,
170
- token: str,
171
- timeout_ms: int,
172
- video_url: str,
173
- max_retries: int,
174
- backoff_ms: int,
175
- ) -> Dict[str, Any]:
176
- return submit_u2_asr_batch_with_retry(
177
- base_url=base_url,
178
- token=token,
179
- timeout_ms=timeout_ms,
180
- file_urls=[video_url],
181
- max_retries=max_retries,
182
- backoff_ms=backoff_ms,
183
- )
184
-
185
-
186
- def clean_transcript_text(raw_text: Any) -> str:
187
- if raw_text is None:
188
- return ""
189
- return str(raw_text).strip()
190
-
191
-
192
- def extract_u2_task_metrics(payload: Any) -> Dict[str, Any]:
193
- metrics = deep_find_first(payload, ["task_metrics", "metrics"])
194
- return metrics if isinstance(metrics, dict) else {}
195
-
196
-
197
- def _safe_int(value: Any) -> int:
198
- if isinstance(value, bool):
199
- return int(value)
200
- if isinstance(value, (int, float)):
201
- return int(value)
202
- if isinstance(value, str):
203
- text = value.strip()
204
- if not text:
205
- return 0
206
- try:
207
- return int(float(text))
208
- except Exception:
209
- return 0
210
- return 0
211
-
212
-
213
- def _status_upper(value: Any) -> str:
214
- return str(value or "").strip().upper()
215
-
216
-
217
- def _is_success_status(status: str) -> bool:
218
- return status in {"SUCCEEDED", "SUCCESS", "COMPLETED", "DONE"}
219
-
220
-
221
- def _is_failed_status(status: str) -> bool:
222
- return status in {"FAILED", "FAILURE", "ERROR", "CANCELED", "CANCELLED"}
223
-
224
-
225
- def extract_platform_task_status(payload: Any) -> str:
226
- status = deep_find_first(payload, ["platform_task_status"])
227
- return _status_upper(status)
228
-
229
-
230
- def extract_pending_count(payload: Any) -> int:
231
- return max(0, _safe_int(deep_find_first(payload, ["pending_count"])))
232
-
233
-
234
- def extract_u2_batch_result_items(payload: Any) -> List[Dict[str, Any]]:
235
- found: Dict[str, Dict[str, Any]] = {}
236
-
237
- stack: List[Any] = [payload]
238
- while stack:
239
- node = stack.pop(0)
240
- if isinstance(node, dict):
241
- raw_file_url = (
242
- node.get("file_url")
243
- or node.get("source_url")
244
- or node.get("media_url")
245
- or node.get("url")
246
- )
247
- file_url = normalize_media_url(str(raw_file_url or ""))
248
- if file_url:
249
- transcript = clean_transcript_text(
250
- node.get("transcript_text")
251
- or node.get("text")
252
- or node.get("transcript")
253
- or node.get("transcription")
254
- or node.get("content")
255
- or ""
256
- )
257
- if not transcript:
258
- transcript = clean_transcript_text(extract_transcript_text(node))
259
-
260
- status = _status_upper(node.get("status") or node.get("task_status") or node.get("state"))
261
- error_reason = str(node.get("error_reason") or node.get("error") or "").strip()
262
- transcription_url = normalize_text(node.get("transcription_url"))
263
- ok = _is_success_status(status) or bool(transcript) or bool(transcription_url)
264
-
265
- candidate = {
266
- "file_url": file_url,
267
- "transcript_text": transcript,
268
- "task_status": status,
269
- "transcription_url": transcription_url,
270
- "error_reason": error_reason,
271
- "ok": ok,
272
- }
273
-
274
- existing = found.get(file_url)
275
- if existing is None:
276
- found[file_url] = candidate
277
- else:
278
- old_score = (
279
- 1 if existing.get("ok") else 0,
280
- len(str(existing.get("transcript_text") or "")),
281
- 1 if existing.get("transcription_url") else 0,
282
- 1 if not existing.get("error_reason") else 0,
283
- )
284
- new_score = (
285
- 1 if candidate.get("ok") else 0,
286
- len(str(candidate.get("transcript_text") or "")),
287
- 1 if candidate.get("transcription_url") else 0,
288
- 1 if not candidate.get("error_reason") else 0,
289
- )
290
- if new_score > old_score:
291
- found[file_url] = candidate
292
-
293
- for value in node.values():
294
- if isinstance(value, (dict, list)):
295
- stack.append(value)
296
- elif isinstance(node, list):
297
- for item in node:
298
- if isinstance(item, (dict, list)):
299
- stack.append(item)
300
-
301
- return list(found.values())
302
-
303
-
304
- def map_u2_batch_results_by_file_url(payload: Any) -> Dict[str, Dict[str, Any]]:
305
- mapped: Dict[str, Dict[str, Any]] = {}
306
- for item in extract_u2_batch_result_items(payload):
307
- file_url = normalize_media_url(item.get("file_url"))
308
- if not file_url:
309
- continue
310
- mapped[file_url] = item
311
- return mapped
312
-
313
-
314
- def _parse_non_negative_item_index(value: Any) -> Optional[int]:
315
- if isinstance(value, bool):
316
- return None
317
- if isinstance(value, int):
318
- return value if value >= 0 else None
319
- if isinstance(value, float):
320
- if value < 0 or not value.is_integer():
321
- return None
322
- return int(value)
323
- if isinstance(value, str):
324
- text = value.strip()
325
- if not text or not text.isdigit():
326
- return None
327
- return int(text)
328
- return None
329
-
330
-
331
- def map_u2_batch_results_by_item_index(payload: Any) -> Dict[int, Dict[str, Any]]:
332
- mapped: Dict[int, Dict[str, Any]] = {}
333
- stack: List[Any] = [payload]
334
-
335
- while stack:
336
- node = stack.pop(0)
337
- if isinstance(node, dict):
338
- item_index_raw = node.get("item_index")
339
- item_index = _parse_non_negative_item_index(item_index_raw)
340
- if item_index is not None:
341
- transcript = clean_transcript_text(
342
- node.get("transcript_text")
343
- or node.get("text")
344
- or node.get("transcript")
345
- or node.get("transcription")
346
- or node.get("content")
347
- or ""
348
- )
349
- if not transcript:
350
- transcript = clean_transcript_text(extract_transcript_text(node))
351
-
352
- status = _status_upper(node.get("task_status") or node.get("status") or node.get("state"))
353
- error_reason = str(node.get("error_reason") or node.get("error") or "").strip()
354
- transcription_url = normalize_text(node.get("transcription_url"))
355
- ok = _is_success_status(status) or bool(transcript) or bool(transcription_url)
356
-
357
- candidate = {
358
- "item_index": item_index,
359
- "transcript_text": transcript,
360
- "task_status": status,
361
- "error_reason": error_reason,
362
- "transcription_url": transcription_url,
363
- "ok": ok,
364
- }
365
-
366
- existing = mapped.get(item_index)
367
- if existing is None:
368
- mapped[item_index] = candidate
369
- else:
370
- old_score = (
371
- 1 if existing.get("ok") else 0,
372
- len(str(existing.get("transcript_text") or "")),
373
- 1 if existing.get("transcription_url") else 0,
374
- 1 if not existing.get("error_reason") else 0,
375
- )
376
- new_score = (
377
- 1 if candidate.get("ok") else 0,
378
- len(str(candidate.get("transcript_text") or "")),
379
- 1 if candidate.get("transcription_url") else 0,
380
- 1 if not candidate.get("error_reason") else 0,
381
- )
382
- if new_score > old_score:
383
- mapped[item_index] = candidate
384
-
385
- for value in node.values():
386
- if isinstance(value, (dict, list)):
387
- stack.append(value)
388
- elif isinstance(node, list):
389
- for item in node:
390
- if isinstance(item, (dict, list)):
391
- stack.append(item)
392
-
393
- return mapped
394
-
395
-
396
- def _extract_transcript_from_transcription_payload(payload: Any) -> str:
397
- if isinstance(payload, str):
398
- text = clean_transcript_text(payload)
399
- if text:
400
- return text
401
- try:
402
- payload = json.loads(payload)
403
- except Exception:
404
- return ""
405
-
406
- transcript = clean_transcript_text(deep_find_first(payload, ["full_text"]))
407
- if transcript:
408
- return transcript
409
-
410
- transcript = clean_transcript_text(extract_transcript_text(payload))
411
- if transcript:
412
- return transcript
413
-
414
- sentences = deep_find_first(payload, ["sentences"])
415
- if isinstance(sentences, list):
416
- lines: List[str] = []
417
- for sentence in sentences:
418
- if not isinstance(sentence, dict):
419
- continue
420
- line = clean_transcript_text(
421
- sentence.get("text") or sentence.get("sentence") or sentence.get("content")
422
- )
423
- if line:
424
- lines.append(line)
425
- if lines:
426
- return "\n".join(lines)
427
-
428
- return ""
429
-
430
-
431
- def fetch_transcription_text_by_url(*, transcription_url: str, timeout_ms: int) -> Dict[str, Any]:
432
- url = normalize_media_url(transcription_url)
433
- if not url:
434
- return {
435
- "ok": False,
436
- "transcription_url": "",
437
- "error_reason": "transcription_url_missing",
438
- "transcript_text": "",
439
- }
440
- if not (url.startswith("http://") or url.startswith("https://")):
441
- return {
442
- "ok": False,
443
- "transcription_url": url,
444
- "error_reason": "transcription_url_invalid",
445
- "transcript_text": "",
446
- }
447
-
448
- request = urllib.request.Request(url=url, method="GET", headers={"Accept": "application/json"})
449
- try:
450
- with urllib.request.urlopen(request, timeout=max(timeout_ms / 1000.0, 1.0)) as response:
451
- raw_text = response.read().decode("utf-8", errors="replace")
452
- except urllib.error.URLError as error:
453
- return {
454
- "ok": False,
455
- "transcription_url": url,
456
- "error_reason": f"transcription_fetch_failed:{normalize_text(getattr(error, 'reason', error)) or 'unknown'}",
457
- "transcript_text": "",
458
- }
459
- except Exception as error:
460
- return {
461
- "ok": False,
462
- "transcription_url": url,
463
- "error_reason": f"transcription_fetch_failed:{normalize_text(error) or 'unknown'}",
464
- "transcript_text": "",
465
- }
466
-
467
- payload: Any = raw_text
468
- try:
469
- payload = json.loads(raw_text)
470
- except Exception:
471
- payload = raw_text
472
-
473
- transcript = _extract_transcript_from_transcription_payload(payload)
474
- if transcript:
475
- return {
476
- "ok": True,
477
- "transcription_url": url,
478
- "error_reason": "",
479
- "transcript_text": transcript,
480
- }
481
-
482
- return {
483
- "ok": False,
484
- "transcription_url": url,
485
- "error_reason": "transcription_payload_empty",
486
- "transcript_text": "",
487
- }
488
-
489
-
490
- def hydrate_u2_batch_results_from_transcription_urls(
491
- *,
492
- mapped_results: Dict[str, Dict[str, Any]],
493
- timeout_ms: int,
494
- ) -> Dict[str, Dict[str, Any]]:
495
- hydrated: Dict[str, Dict[str, Any]] = {}
496
- fetch_timeout_ms = max(1000, min(int(timeout_ms), 15000))
497
-
498
- for file_url, item in mapped_results.items():
499
- if not isinstance(item, dict):
500
- continue
501
-
502
- candidate = dict(item)
503
- status = _status_upper(candidate.get("task_status"))
504
- transcript = clean_transcript_text(candidate.get("transcript_text"))
505
- transcription_url = normalize_text(candidate.get("transcription_url"))
506
-
507
- if not transcript and _is_success_status(status) and transcription_url:
508
- fetch_result = fetch_transcription_text_by_url(
509
- transcription_url=transcription_url,
510
- timeout_ms=fetch_timeout_ms,
511
- )
512
- fetched_text = clean_transcript_text(fetch_result.get("transcript_text"))
513
- candidate["transcription_fetch"] = {
514
- "ok": bool(fetch_result.get("ok")),
515
- "error_reason": fetch_result.get("error_reason"),
516
- }
517
- if fetched_text:
518
- transcript = fetched_text
519
- candidate["transcript_text"] = fetched_text
520
- elif not candidate.get("error_reason"):
521
- candidate["error_reason"] = fetch_result.get("error_reason") or "transcription_payload_empty"
522
-
523
- candidate["task_status"] = status
524
- candidate["transcription_url"] = transcription_url
525
- candidate["transcript_text"] = transcript
526
- candidate["ok"] = bool(candidate.get("ok") or transcript)
527
- hydrated[file_url] = candidate
528
-
529
- return hydrated
530
-
531
-
532
- def build_u2_batch_progress(*, payload: Any, expected_total: int = 0) -> Dict[str, Any]:
533
- metrics_raw = extract_u2_task_metrics(payload)
534
- metrics = {str(key).strip().upper(): value for key, value in metrics_raw.items()} if isinstance(metrics_raw, dict) else {}
535
-
536
- metrics_total = _safe_int(metrics.get("TOTAL") or metrics.get("TASK_TOTAL") or metrics.get("COUNT"))
537
- metrics_succeeded = _safe_int(metrics.get("SUCCEEDED") or metrics.get("SUCCESS"))
538
- metrics_failed = (
539
- _safe_int(metrics.get("FAILED"))
540
- + _safe_int(metrics.get("FAILURE"))
541
- + _safe_int(metrics.get("ERROR"))
542
- + _safe_int(metrics.get("CANCELED"))
543
- + _safe_int(metrics.get("CANCELLED"))
544
- )
545
- metrics_completed = metrics_succeeded + metrics_failed
546
-
547
- provider_total = _safe_int(deep_find_first(payload, ["input_count", "total_count"]))
548
- provider_succeeded = _safe_int(deep_find_first(payload, ["succeeded_count"]))
549
- provider_failed = _safe_int(deep_find_first(payload, ["failed_count"]))
550
- provider_pending = max(0, _safe_int(deep_find_first(payload, ["pending_count"])))
551
- provider_completed = provider_succeeded + provider_failed
552
- provider_status = extract_platform_task_status(payload)
553
-
554
- mapped_results = map_u2_batch_results_by_file_url(payload)
555
- result_total = len(mapped_results)
556
- result_succeeded = 0
557
- result_failed = 0
558
-
559
- for item in mapped_results.values():
560
- status = _status_upper(item.get("task_status"))
561
- transcript = clean_transcript_text(item.get("transcript_text"))
562
- if _is_success_status(status) or transcript:
563
- result_succeeded += 1
564
- elif _is_failed_status(status):
565
- result_failed += 1
566
-
567
- result_completed = result_succeeded + result_failed
568
-
569
- target_total = metrics_total if metrics_total > 0 else (provider_total if provider_total > 0 else max(0, int(expected_total or 0)))
570
- complete_by_metrics = target_total > 0 and metrics_completed >= target_total
571
- complete_by_provider_counts = target_total > 0 and provider_pending == 0 and provider_completed >= target_total
572
- complete_by_provider_status = provider_pending == 0 and provider_status in {"SUCCEEDED", "PARTIAL_SUCCEEDED", "FAILED"}
573
- complete_by_results = target_total > 0 and result_completed >= target_total
574
-
575
- completion_basis = "pending"
576
- if complete_by_metrics:
577
- completion_basis = "task_metrics"
578
- elif complete_by_provider_counts or complete_by_provider_status:
579
- completion_basis = "platform_status"
580
- elif complete_by_results:
581
- completion_basis = "results"
582
-
583
- return {
584
- "expected_total": max(0, int(expected_total or 0)),
585
- "target_total": target_total,
586
- "metrics_total": metrics_total,
587
- "metrics_succeeded": metrics_succeeded,
588
- "metrics_failed": metrics_failed,
589
- "metrics_completed": metrics_completed,
590
- "provider_total": provider_total,
591
- "provider_succeeded": provider_succeeded,
592
- "provider_failed": provider_failed,
593
- "provider_pending": provider_pending,
594
- "platform_task_status": provider_status,
595
- "results_total": result_total,
596
- "results_succeeded": result_succeeded,
597
- "results_failed": result_failed,
598
- "results_completed": result_completed,
599
- "complete": bool(complete_by_metrics or complete_by_provider_counts or complete_by_provider_status or complete_by_results),
600
- "completion_basis": completion_basis,
601
- "metrics": metrics_raw if isinstance(metrics_raw, dict) else {},
602
- }
603
-
604
-
605
- def poll_u2_task_core(
606
- *,
607
- base_url: str,
608
- token: str,
609
- timeout_ms: int,
610
- task_id: str,
611
- poll_interval_sec: float,
612
- max_polls: int,
613
- require_batch_complete: bool = False,
614
- expected_total: int = 0,
615
- ) -> Dict[str, Any]:
616
- trace = []
617
- last_request_id = None
618
-
619
- last_status = "UNKNOWN"
620
- last_payload: Any = {}
621
- last_batch_results: Dict[str, Dict[str, Any]] = {}
622
- last_metrics: Dict[str, Any] = {}
623
- last_progress: Dict[str, Any] = {
624
- "expected_total": max(0, int(expected_total or 0)),
625
- "target_total": 0,
626
- "complete": False,
627
- "completion_basis": "pending",
628
- }
629
-
630
- for attempt in range(1, max_polls + 1):
631
- response = call_json_api(
632
- base_url=base_url,
633
- path=f"/api/u2/v1/tasks/{task_id}",
634
- token=token,
635
- method="POST",
636
- timeout_ms=timeout_ms,
637
- )
638
-
639
- payload = response.get("data")
640
- status = extract_task_status(payload)
641
- platform_status = extract_platform_task_status(payload)
642
- pending_count = extract_pending_count(payload)
643
- last_request_id = response.get("request_id") or last_request_id
644
-
645
- metrics = extract_u2_task_metrics(payload)
646
- batch_results = map_u2_batch_results_by_file_url(payload)
647
- batch_progress = build_u2_batch_progress(payload=payload, expected_total=expected_total)
648
-
649
- effective_status = platform_status or status
650
- last_status = effective_status or last_status
651
- last_payload = payload
652
- last_batch_results = batch_results
653
- last_metrics = metrics if isinstance(metrics, dict) else {}
654
- last_progress = batch_progress
655
-
656
- trace.append(
657
- {
658
- "attempt": attempt,
659
- "status_code": response.get("status_code"),
660
- "task_status": status,
661
- "platform_task_status": platform_status,
662
- "pending_count": pending_count,
663
- "request_id": response.get("request_id"),
664
- "error_reason": response.get("error_reason"),
665
- "batch_progress": batch_progress,
666
- }
667
- )
668
-
669
- if not response.get("ok"):
670
- if attempt < max_polls:
671
- time.sleep(max(poll_interval_sec, 0.2))
672
- continue
673
- return {
674
- "ok": False,
675
- "task_id": task_id,
676
- "task_status": status or "UNKNOWN",
677
- "request_id": last_request_id,
678
- "error_reason": response.get("error_reason") or "u2_poll_http_error",
679
- "raw_task": payload,
680
- "task_metrics": last_metrics,
681
- "batch_results": batch_results,
682
- "batch_progress": batch_progress,
683
- "batch_complete": bool(batch_progress.get("complete")),
684
- "trace": trace,
685
- }
686
-
687
- status_terminal = is_terminal_status(status)
688
- platform_terminal = pending_count == 0 and platform_status in {"SUCCEEDED", "PARTIAL_SUCCEEDED", "FAILED"}
689
- task_complete = status_terminal or platform_terminal
690
- batch_complete = bool(batch_progress.get("complete")) if require_batch_complete else task_complete
691
-
692
- if require_batch_complete and not batch_complete:
693
- if attempt < max_polls:
694
- time.sleep(max(poll_interval_sec, 0.2))
695
- continue
696
- return {
697
- "ok": False,
698
- "task_id": task_id,
699
- "task_status": effective_status or "UNKNOWN",
700
- "request_id": last_request_id,
701
- "error_reason": "u2_batch_incomplete_timeout",
702
- "raw_task": payload,
703
- "task_metrics": last_metrics,
704
- "batch_results": batch_results,
705
- "batch_progress": batch_progress,
706
- "batch_complete": False,
707
- "trace": trace,
708
- }
709
-
710
- if task_complete or batch_complete:
711
- success_signal = (
712
- platform_status == "SUCCEEDED" and pending_count == 0
713
- ) or _is_success_status(status)
714
- transcript = extract_transcript_text(payload) if success_signal else ""
715
- return {
716
- "ok": bool(success_signal),
717
- "task_id": task_id,
718
- "task_status": effective_status or status,
719
- "platform_task_status": platform_status,
720
- "pending_count": pending_count,
721
- "request_id": last_request_id,
722
- "error_reason": None if success_signal else (None if batch_complete and require_batch_complete else "u2_task_failed"),
723
- "transcript_text": clean_transcript_text(transcript),
724
- "raw_task": payload,
725
- "task_metrics": last_metrics,
726
- "batch_results": batch_results,
727
- "batch_progress": batch_progress,
728
- "batch_complete": bool(batch_complete),
729
- "trace": trace,
730
- }
731
-
732
- time.sleep(max(poll_interval_sec, 0.2))
733
-
734
- timeout_reason = "u2_batch_incomplete_timeout" if require_batch_complete else "u2_poll_timeout"
735
- return {
736
- "ok": False,
737
- "task_id": task_id,
738
- "task_status": last_status or "TIMEOUT",
739
- "request_id": last_request_id,
740
- "error_reason": timeout_reason,
741
- "raw_task": last_payload,
742
- "task_metrics": last_metrics,
743
- "batch_results": last_batch_results,
744
- "batch_progress": last_progress,
745
- "batch_complete": bool(last_progress.get("complete")) if require_batch_complete else False,
746
- "trace": trace,
747
- }
748
-
749
-
750
- def normalize_media_url(url: str) -> str:
751
- text = str(url or "").strip()
752
- if not text:
753
- return ""
754
- try:
755
- parsed = urlparse(text)
756
- except Exception:
757
- return text
758
-
759
- scheme = (parsed.scheme or "").lower()
760
- if scheme == "http":
761
- parsed = parsed._replace(scheme="https")
762
- return urlunparse(parsed)
763
-
764
-
765
- def is_valid_u2_media_candidate(url: str) -> bool:
766
- lower = str(url or "").lower()
767
- if not (lower.startswith("http://") or lower.startswith("https://")):
768
- return False
769
- image_tokens = [".jpg", ".jpeg", ".png", ".webp", "imageview2", "imagemogr2", "redimage", "frame/"]
770
- if any(token in lower for token in image_tokens):
771
- return False
772
- media_tokens = [".mp4", ".m3u8", ".m4a", ".mp3", "video", "stream", "audio", "vod"]
773
- return any(token in lower for token in media_tokens)
774
-
775
-
776
- def normalize_media_candidates(candidates: List[str]) -> List[str]:
777
- normalized: List[str] = []
778
- seen = set()
779
- for raw in candidates or []:
780
- url = normalize_media_url(raw)
781
- if not url or url in seen:
782
- continue
783
- seen.add(url)
784
- normalized.append(url)
785
- return normalized
786
-
787
-
788
- def run_u2_asr_candidates_with_timeout_retry(
789
- *,
790
- base_url: str,
791
- token: str,
792
- timeout_ms: int,
793
- candidates: List[str],
794
- submit_max_retries: int,
795
- submit_backoff_ms: int,
796
- poll_interval_sec: float,
797
- max_polls: int,
798
- timeout_retry_enabled: bool = True,
799
- timeout_retry_max_retries: int = 3,
800
- ) -> Dict[str, Any]:
801
- normalized_candidates = normalize_media_candidates(candidates)
802
- attempts: List[Dict[str, Any]] = []
803
-
804
- final_bundle: Dict[str, Any] = {
805
- "submit_bundle": {},
806
- "poll_result": {"ok": False, "task_status": "UNKNOWN", "error_reason": "no_candidates"},
807
- "rounds": [],
808
- "timeout_retry": {"enabled": bool(timeout_retry_enabled), "configured_max_retries": max(0, min(3, int(timeout_retry_max_retries))), "triggered": False, "result": "not_triggered"},
809
- }
810
- chosen_url: Optional[str] = None
811
-
812
- for index, candidate in enumerate(normalized_candidates, start=1):
813
- valid = is_valid_u2_media_candidate(candidate)
814
- if not valid:
815
- attempts.append({
816
- "index": index,
817
- "candidate": candidate,
818
- "valid": False,
819
- "result": "skipped_non_media_candidate",
820
- })
821
- continue
822
-
823
- bundle = run_u2_asr_with_timeout_retry(
824
- base_url=base_url,
825
- token=token,
826
- timeout_ms=timeout_ms,
827
- video_url=candidate,
828
- submit_max_retries=submit_max_retries,
829
- submit_backoff_ms=submit_backoff_ms,
830
- poll_interval_sec=poll_interval_sec,
831
- max_polls=max_polls,
832
- timeout_retry_enabled=timeout_retry_enabled,
833
- timeout_retry_max_retries=timeout_retry_max_retries,
834
- )
835
- poll_result = bundle.get("poll_result", {})
836
- error_reason = str(poll_result.get("error_reason") or "")
837
- ok = bool(poll_result.get("ok"))
838
-
839
- attempts.append({
840
- "index": index,
841
- "candidate": candidate,
842
- "valid": True,
843
- "ok": ok,
844
- "error_reason": error_reason,
845
- "task_status": poll_result.get("task_status"),
846
- })
847
-
848
- final_bundle = bundle
849
- chosen_url = candidate
850
- if ok:
851
- break
852
-
853
- if error_reason == "INVALID_SOURCE_URL":
854
- continue
855
-
856
- final_bundle["candidate_attempts"] = attempts
857
- final_bundle["chosen_candidate"] = chosen_url
858
- final_bundle["normalized_candidates"] = normalized_candidates
859
- return final_bundle
860
-
861
-
862
- def run_u2_asr_batch_with_timeout_retry(
863
- *,
864
- base_url: str,
865
- token: str,
866
- timeout_ms: int,
867
- file_urls: List[str],
868
- submit_max_retries: int,
869
- submit_backoff_ms: int,
870
- poll_interval_sec: float,
871
- max_polls: int,
872
- timeout_retry_enabled: bool = True,
873
- timeout_retry_max_retries: int = 3,
874
- ) -> Dict[str, Any]:
875
- normalized_urls = normalize_media_candidates(file_urls)
876
- limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
877
-
878
- conservative_retries = max(0, min(3, int(timeout_retry_max_retries)))
879
- retries = conservative_retries if timeout_retry_enabled else 0
880
- max_rounds = 1 + retries
881
-
882
- rounds: List[Dict[str, Any]] = []
883
- final_submit_bundle: Dict[str, Any] = {}
884
- final_poll_result: Dict[str, Any] = {
885
- "ok": False,
886
- "task_status": "UNKNOWN",
887
- "error_reason": "u2_submit_failed_or_missing_task_id",
888
- "task_metrics": {},
889
- "batch_results": {},
890
- }
891
- timeout_retry_triggered = False
892
- timeout_retry_result = "not_triggered"
893
-
894
- if not limited_urls:
895
- return {
896
- "submit_bundle": {
897
- "submit_response": {"ok": False, "error_reason": "no_valid_file_urls"},
898
- "task_id": None,
899
- "retry_chain": [],
900
- "final_submit_status": "failed_no_valid_file_urls",
901
- "file_urls": [],
902
- },
903
- "poll_result": {
904
- "ok": False,
905
- "task_status": "UNKNOWN",
906
- "error_reason": "no_valid_file_urls",
907
- "task_metrics": {},
908
- "batch_results": {},
909
- },
910
- "rounds": [],
911
- "timeout_retry": {
912
- "enabled": bool(timeout_retry_enabled),
913
- "configured_max_retries": conservative_retries,
914
- "triggered": False,
915
- "result": "not_triggered",
916
- },
917
- "normalized_file_urls": [],
918
- "mapped_results": {},
919
- "result_items": [],
920
- "task_metrics": {},
921
- }
922
-
923
- for round_index in range(1, max_rounds + 1):
924
- submit_bundle = submit_u2_asr_batch_with_retry(
925
- base_url=base_url,
926
- token=token,
927
- timeout_ms=timeout_ms,
928
- file_urls=limited_urls,
929
- max_retries=submit_max_retries,
930
- backoff_ms=submit_backoff_ms,
931
- )
932
- submit_response = submit_bundle.get("submit_response", {})
933
- task_id = submit_bundle.get("task_id")
934
-
935
- poll_result: Dict[str, Any]
936
- if submit_response.get("ok") and task_id:
937
- poll_result = poll_u2_task_core(
938
- base_url=base_url,
939
- token=token,
940
- timeout_ms=timeout_ms,
941
- task_id=str(task_id),
942
- poll_interval_sec=poll_interval_sec,
943
- max_polls=max_polls,
944
- require_batch_complete=True,
945
- expected_total=len(limited_urls),
946
- )
947
- else:
948
- poll_result = {
949
- "ok": False,
950
- "task_id": task_id,
951
- "task_status": "UNKNOWN",
952
- "request_id": submit_response.get("request_id"),
953
- "error_reason": submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id",
954
- "task_metrics": {},
955
- "batch_results": {},
956
- "batch_progress": {
957
- "expected_total": len(limited_urls),
958
- "target_total": len(limited_urls),
959
- "complete": False,
960
- "completion_basis": "pending",
961
- },
962
- "batch_complete": False,
963
- "trace": [],
964
- }
965
-
966
- rounds.append(
967
- {
968
- "round": round_index,
969
- "submit": {
970
- "task_id": task_id,
971
- "final_submit_status": submit_bundle.get("final_submit_status"),
972
- "request_id": submit_response.get("request_id"),
973
- "status_code": submit_response.get("status_code"),
974
- "ok": submit_response.get("ok"),
975
- "error_reason": submit_response.get("error_reason"),
976
- "retry_chain": submit_bundle.get("retry_chain", []),
977
- "file_url_count": len(limited_urls),
978
- },
979
- "poll": {
980
- "task_id": poll_result.get("task_id") or task_id,
981
- "task_status": poll_result.get("task_status"),
982
- "request_id": poll_result.get("request_id"),
983
- "ok": poll_result.get("ok"),
984
- "error_reason": poll_result.get("error_reason"),
985
- "attempts": len(poll_result.get("trace", [])),
986
- "task_metrics": poll_result.get("task_metrics", {}),
987
- "batch_complete": bool(poll_result.get("batch_complete")),
988
- "batch_progress": poll_result.get("batch_progress", {}),
989
- },
990
- }
991
- )
992
-
993
- final_submit_bundle = submit_bundle
994
- final_poll_result = poll_result
995
-
996
- if poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"} and round_index < max_rounds:
997
- timeout_retry_triggered = True
998
- timeout_retry_result = "retrying"
999
- continue
1000
-
1001
- break
1002
-
1003
- if final_poll_result.get("ok"):
1004
- timeout_retry_result = "retry_succeeded" if timeout_retry_triggered else "not_needed"
1005
- elif final_poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"}:
1006
- timeout_retry_result = "retry_timeout_exhausted" if timeout_retry_triggered else "timeout_no_retry"
1007
- elif timeout_retry_triggered:
1008
- timeout_retry_result = "retry_failed_non_timeout"
1009
- else:
1010
- timeout_retry_result = "not_triggered"
1011
-
1012
- raw_task_payload = final_poll_result.get("raw_task")
1013
- mapped_results = map_u2_batch_results_by_file_url(raw_task_payload)
1014
-
1015
- index_mapped = map_u2_batch_results_by_item_index(raw_task_payload)
1016
- for item_index, item in index_mapped.items():
1017
- if item_index < 0 or item_index >= len(limited_urls):
1018
- continue
1019
- file_url = normalize_media_url(limited_urls[item_index])
1020
- if not file_url:
1021
- continue
1022
-
1023
- candidate = {
1024
- "file_url": file_url,
1025
- "transcript_text": clean_transcript_text(item.get("transcript_text")),
1026
- "task_status": _status_upper(item.get("task_status")),
1027
- "error_reason": str(item.get("error_reason") or "").strip(),
1028
- "transcription_url": normalize_text(item.get("transcription_url")),
1029
- "ok": bool(item.get("ok")),
1030
- }
1031
-
1032
- existing = mapped_results.get(file_url)
1033
- if existing is None:
1034
- mapped_results[file_url] = candidate
1035
- continue
1036
-
1037
- old_score = (
1038
- 1 if existing.get("ok") else 0,
1039
- len(str(existing.get("transcript_text") or "")),
1040
- 1 if existing.get("transcription_url") else 0,
1041
- 1 if not existing.get("error_reason") else 0,
1042
- )
1043
- new_score = (
1044
- 1 if candidate.get("ok") else 0,
1045
- len(str(candidate.get("transcript_text") or "")),
1046
- 1 if candidate.get("transcription_url") else 0,
1047
- 1 if not candidate.get("error_reason") else 0,
1048
- )
1049
- if new_score > old_score:
1050
- mapped_results[file_url] = candidate
1051
-
1052
- mapped_results = hydrate_u2_batch_results_from_transcription_urls(
1053
- mapped_results=mapped_results,
1054
- timeout_ms=timeout_ms,
1055
- )
1056
- result_items = list(mapped_results.values())
1057
-
1058
- return {
1059
- "submit_bundle": final_submit_bundle,
1060
- "poll_result": final_poll_result,
1061
- "rounds": rounds,
1062
- "timeout_retry": {
1063
- "enabled": bool(timeout_retry_enabled),
1064
- "configured_max_retries": conservative_retries,
1065
- "triggered": timeout_retry_triggered,
1066
- "result": timeout_retry_result,
1067
- },
1068
- "normalized_file_urls": limited_urls,
1069
- "mapped_results": mapped_results,
1070
- "result_items": result_items,
1071
- "task_metrics": final_poll_result.get("task_metrics") if isinstance(final_poll_result.get("task_metrics"), dict) else extract_u2_task_metrics(raw_task_payload),
1072
- "batch_progress": final_poll_result.get("batch_progress") if isinstance(final_poll_result.get("batch_progress"), dict) else build_u2_batch_progress(payload=raw_task_payload, expected_total=len(limited_urls)),
1073
- "batch_complete": bool(final_poll_result.get("batch_complete")),
1074
- }
1075
-
1076
-
1077
- def run_u2_asr_with_timeout_retry(
1078
- *,
1079
- base_url: str,
1080
- token: str,
1081
- timeout_ms: int,
1082
- video_url: str,
1083
- submit_max_retries: int,
1084
- submit_backoff_ms: int,
1085
- poll_interval_sec: float,
1086
- max_polls: int,
1087
- timeout_retry_enabled: bool = True,
1088
- timeout_retry_max_retries: int = 3,
1089
- ) -> Dict[str, Any]:
1090
- video_url = normalize_media_url(video_url)
1091
- conservative_retries = max(0, min(3, int(timeout_retry_max_retries)))
1092
- retries = conservative_retries if timeout_retry_enabled else 0
1093
- max_rounds = 1 + retries
1094
-
1095
- rounds: List[Dict[str, Any]] = []
1096
- final_submit_bundle: Dict[str, Any] = {}
1097
- final_poll_result: Dict[str, Any] = {
1098
- "ok": False,
1099
- "task_status": "UNKNOWN",
1100
- "error_reason": "u2_submit_failed_or_missing_task_id",
1101
- }
1102
- timeout_retry_triggered = False
1103
- timeout_retry_result = "not_triggered"
1104
-
1105
- for round_index in range(1, max_rounds + 1):
1106
- submit_bundle = submit_u2_asr_with_retry(
1107
- base_url=base_url,
1108
- token=token,
1109
- timeout_ms=timeout_ms,
1110
- video_url=video_url,
1111
- max_retries=submit_max_retries,
1112
- backoff_ms=submit_backoff_ms,
1113
- )
1114
- submit_response = submit_bundle.get("submit_response", {})
1115
- task_id = submit_bundle.get("task_id")
1116
-
1117
- poll_result: Dict[str, Any]
1118
- if submit_response.get("ok") and task_id:
1119
- poll_result = poll_u2_task_core(
1120
- base_url=base_url,
1121
- token=token,
1122
- timeout_ms=timeout_ms,
1123
- task_id=str(task_id),
1124
- poll_interval_sec=poll_interval_sec,
1125
- max_polls=max_polls,
1126
- )
1127
- else:
1128
- poll_result = {
1129
- "ok": False,
1130
- "task_id": task_id,
1131
- "task_status": "UNKNOWN",
1132
- "request_id": submit_response.get("request_id"),
1133
- "error_reason": submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id",
1134
- "trace": [],
1135
- }
1136
-
1137
- rounds.append(
1138
- {
1139
- "round": round_index,
1140
- "submit": {
1141
- "task_id": task_id,
1142
- "final_submit_status": submit_bundle.get("final_submit_status"),
1143
- "request_id": submit_response.get("request_id"),
1144
- "status_code": submit_response.get("status_code"),
1145
- "ok": submit_response.get("ok"),
1146
- "error_reason": submit_response.get("error_reason"),
1147
- "retry_chain": submit_bundle.get("retry_chain", []),
1148
- },
1149
- "poll": {
1150
- "task_id": poll_result.get("task_id") or task_id,
1151
- "task_status": poll_result.get("task_status"),
1152
- "request_id": poll_result.get("request_id"),
1153
- "ok": poll_result.get("ok"),
1154
- "error_reason": poll_result.get("error_reason"),
1155
- "attempts": len(poll_result.get("trace", [])),
1156
- },
1157
- }
1158
- )
1159
-
1160
- final_submit_bundle = submit_bundle
1161
- final_poll_result = poll_result
1162
-
1163
- if poll_result.get("error_reason") == "u2_poll_timeout" and round_index < max_rounds:
1164
- timeout_retry_triggered = True
1165
- timeout_retry_result = "retrying"
1166
- continue
1167
-
1168
- break
1169
-
1170
- if final_poll_result.get("ok"):
1171
- timeout_retry_result = "retry_succeeded" if timeout_retry_triggered else "not_needed"
1172
- elif final_poll_result.get("error_reason") == "u2_poll_timeout":
1173
- timeout_retry_result = "retry_timeout_exhausted" if timeout_retry_triggered else "timeout_no_retry"
1174
- elif timeout_retry_triggered:
1175
- timeout_retry_result = "retry_failed_non_timeout"
1176
- else:
1177
- timeout_retry_result = "not_triggered"
1178
-
1179
- return {
1180
- "submit_bundle": final_submit_bundle,
1181
- "poll_result": final_poll_result,
1182
- "rounds": rounds,
1183
- "timeout_retry": {
1184
- "enabled": bool(timeout_retry_enabled),
1185
- "configured_max_retries": conservative_retries,
1186
- "triggered": timeout_retry_triggered,
1187
- "result": timeout_retry_result,
1188
- },
1189
- }