@tikomni/skills 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -3
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
  30. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
  33. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  34. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  36. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  37. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  38. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  39. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  40. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
  41. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  42. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
  43. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  45. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  46. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  47. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  48. package/skills/creator-analysis/SKILL.md +0 -95
  49. package/skills/creator-analysis/agents/openai.yaml +0 -4
  50. package/skills/creator-analysis/env.example +0 -36
  51. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  52. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  53. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  54. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  55. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  56. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  57. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  59. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  60. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  61. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  63. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  64. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  65. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  66. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  67. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  68. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  69. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  71. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  72. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  73. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  74. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  75. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  76. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  77. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  78. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  79. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  80. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  81. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  82. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  83. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  84. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  85. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  86. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  87. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  88. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  89. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  90. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  91. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  92. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  93. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  94. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  96. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  100. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  101. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  103. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  104. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  105. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  106. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  107. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  108. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  109. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  110. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  113. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  114. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  116. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  117. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  118. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  119. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  120. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  121. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  122. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  123. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  124. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  125. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  126. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  127. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  128. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  129. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  130. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  131. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  132. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  133. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  134. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  135. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  136. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  137. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  138. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  139. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  140. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  141. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  142. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  143. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  144. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  145. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  146. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  147. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  148. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  149. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  153. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  154. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  155. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  156. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  157. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  158. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  159. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  160. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  161. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  162. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  163. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  164. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  165. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  167. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  168. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  169. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  170. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  171. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  172. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  173. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  174. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  175. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  176. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  177. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  178. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  179. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  180. package/skills/creator-analysis/references/workflow.md +0 -23
  181. package/skills/creator-analysis/scripts/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  185. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  186. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  187. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  188. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  189. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  190. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  191. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  194. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  195. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  196. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  197. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  198. package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
  199. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  200. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  202. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  203. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  205. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  207. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  208. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  209. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  210. package/skills/meta-capability/SKILL.md +0 -69
  211. package/skills/meta-capability/agents/openai.yaml +0 -4
  212. package/skills/meta-capability/env.example +0 -42
  213. package/skills/meta-capability/references/api-capability-index.md +0 -92
  214. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  215. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  216. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  217. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  218. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  219. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  220. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  222. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  223. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  224. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  226. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  227. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  228. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  229. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  230. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  231. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  232. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  233. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  234. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  235. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  236. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  237. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  238. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  239. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  240. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  241. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  242. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  243. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  244. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  245. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  246. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  247. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  248. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  249. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  250. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  251. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  252. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  253. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  254. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  255. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  256. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  257. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  258. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  259. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  263. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  264. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  265. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  266. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  267. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  268. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  269. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  270. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  271. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  272. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  273. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  274. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  276. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  277. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  279. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  280. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  281. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  282. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  283. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  284. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  285. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  286. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  287. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  288. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  289. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  290. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  291. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  292. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  293. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  294. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  295. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  296. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  297. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  298. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  299. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  300. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  301. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  302. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  303. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  304. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  305. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  306. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  307. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  308. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  309. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  310. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  311. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  312. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  316. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  317. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  318. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  319. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  320. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  321. package/skills/meta-capability/references/dispatch.md +0 -27
  322. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  323. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  324. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  325. package/skills/meta-capability/scripts/__init__.py +0 -1
  326. package/skills/meta-capability/scripts/call_route.py +0 -141
  327. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  328. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  329. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  330. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  331. package/skills/meta-capability/scripts/test_auth.py +0 -98
  332. package/skills/single-work-analysis/SKILL.md +0 -62
  333. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  334. package/skills/single-work-analysis/env.example +0 -36
  335. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  336. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  337. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  339. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  340. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  341. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  342. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  344. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  345. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  346. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  348. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  349. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  350. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  353. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  354. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  356. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  357. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  358. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  359. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  360. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  361. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  362. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  363. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  364. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  366. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  373. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  375. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  376. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  377. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  378. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  379. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  381. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  385. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  386. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  388. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  389. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  390. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  391. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  392. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  393. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  394. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  395. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  398. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  399. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  401. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  402. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  403. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  404. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  405. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  406. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  407. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  409. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  410. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  411. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  412. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  413. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  414. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  415. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  416. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  417. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  418. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  419. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  420. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  421. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  422. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  423. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  424. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  425. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  426. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  427. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  428. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  429. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  430. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  431. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  432. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  434. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  438. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  439. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  441. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  442. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  443. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
  444. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  445. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  446. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  447. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  448. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  449. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  450. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  451. package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
  452. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  453. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  454. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  455. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
  456. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  457. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  458. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  459. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  460. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
  461. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  462. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  463. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
  464. package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
  465. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  466. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  468. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  469. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  470. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  471. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  472. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  473. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  474. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  475. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
  476. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  477. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  478. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  479. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  480. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -2,11 +2,13 @@
2
2
  """Shared ASR pipeline helpers for runner scripts."""
3
3
 
4
4
  import json
5
+ import re
6
+ import threading
5
7
  import time
6
8
  import urllib.error
7
9
  import urllib.request
8
10
  from urllib.parse import urlparse, urlunparse
9
- from typing import Any, Dict, List, Optional
11
+ from typing import Any, Callable, Dict, List, Optional
10
12
 
11
13
  from scripts.core.tikomni_common import (
12
14
  call_json_api,
@@ -17,8 +19,10 @@ from scripts.core.tikomni_common import (
17
19
  is_terminal_status,
18
20
  normalize_text,
19
21
  )
22
+ from scripts.core.u3_fallback import run_u3_public_url_fallback
20
23
 
21
24
  U2_BATCH_SUBMIT_HARD_LIMIT = 100
25
+ DEFAULT_U2_PENDING_TIMEOUT_SEC = 60
22
26
 
23
27
 
24
28
  def clamp_u2_batch_submit_size(size: int, *, default: int = 50, hard_limit: int = U2_BATCH_SUBMIT_HARD_LIMIT) -> int:
@@ -83,6 +87,7 @@ def submit_u2_asr_batch_with_retry(
83
87
  file_urls: List[str],
84
88
  max_retries: int,
85
89
  backoff_ms: int,
90
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
86
91
  ) -> Dict[str, Any]:
87
92
  retries = max(0, int(max_retries))
88
93
  base_backoff = max(0, int(backoff_ms))
@@ -110,20 +115,75 @@ def submit_u2_asr_batch_with_retry(
110
115
  final_task_id: Optional[str] = None
111
116
  final_submit_status = "failed_unknown"
112
117
 
118
+ def _emit_submit_progress(event: Dict[str, Any]) -> None:
119
+ if progress_callback is None:
120
+ return
121
+ try:
122
+ progress_callback(event)
123
+ except Exception:
124
+ pass
125
+
113
126
  for attempt in range(1, max_attempts + 1):
114
127
  wait_ms = 0 if attempt == 1 else base_backoff * (2 ** (attempt - 2))
115
128
  if wait_ms > 0:
116
129
  time.sleep(wait_ms / 1000.0)
117
130
 
118
- submit_response = submit_u2_asr_batch(
119
- base_url=base_url,
120
- token=token,
121
- timeout_ms=timeout_ms,
122
- file_urls=limited_urls,
131
+ _emit_submit_progress(
132
+ {
133
+ "phase": "submit",
134
+ "state": "started",
135
+ "attempt": attempt,
136
+ "wait_ms": wait_ms,
137
+ "candidate_count": len(limited_urls),
138
+ }
123
139
  )
140
+
141
+ heartbeat_stop = threading.Event()
142
+
143
+ def _heartbeat() -> None:
144
+ while not heartbeat_stop.wait(5.0):
145
+ _emit_submit_progress(
146
+ {
147
+ "phase": "submit",
148
+ "state": "heartbeat",
149
+ "attempt": attempt,
150
+ "wait_ms": wait_ms,
151
+ "candidate_count": len(limited_urls),
152
+ }
153
+ )
154
+
155
+ heartbeat_thread = threading.Thread(target=_heartbeat, daemon=True)
156
+ heartbeat_thread.start()
157
+
158
+ try:
159
+ submit_response = submit_u2_asr_batch(
160
+ base_url=base_url,
161
+ token=token,
162
+ timeout_ms=timeout_ms,
163
+ file_urls=limited_urls,
164
+ )
165
+ finally:
166
+ heartbeat_stop.set()
167
+ heartbeat_thread.join(timeout=0.2)
124
168
  task_id = extract_task_id(submit_response.get("data"))
125
169
  retriable = is_retriable_submit_failure(submit_response)
126
170
 
171
+ _emit_submit_progress(
172
+ {
173
+ "phase": "submit",
174
+ "state": "finished",
175
+ "attempt": attempt,
176
+ "wait_ms": wait_ms,
177
+ "candidate_count": len(limited_urls),
178
+ "task_id": task_id,
179
+ "status_code": submit_response.get("status_code"),
180
+ "ok": bool(submit_response.get("ok")),
181
+ "error_reason": submit_response.get("error_reason"),
182
+ "request_id": submit_response.get("request_id"),
183
+ "retriable": retriable,
184
+ }
185
+ )
186
+
127
187
  retry_chain.append(
128
188
  {
129
189
  "attempt": attempt,
@@ -172,6 +232,7 @@ def submit_u2_asr_with_retry(
172
232
  video_url: str,
173
233
  max_retries: int,
174
234
  backoff_ms: int,
235
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
175
236
  ) -> Dict[str, Any]:
176
237
  return submit_u2_asr_batch_with_retry(
177
238
  base_url=base_url,
@@ -180,13 +241,55 @@ def submit_u2_asr_with_retry(
180
241
  file_urls=[video_url],
181
242
  max_retries=max_retries,
182
243
  backoff_ms=backoff_ms,
244
+ progress_callback=progress_callback,
183
245
  )
184
246
 
185
247
 
186
248
  def clean_transcript_text(raw_text: Any) -> str:
187
249
  if raw_text is None:
188
250
  return ""
189
- return str(raw_text).strip()
251
+ return normalize_text(raw_text)
252
+
253
+
254
+ def _ensure_sentence_end(text: str) -> str:
255
+ if not text:
256
+ return text
257
+ if text[-1] in "。!?!?" or text.endswith("..."):
258
+ return text
259
+ return f"{text}。"
260
+
261
+
262
+ def derive_asr_clean_text(asr_raw: Any, legacy_clean: Any = None) -> str:
263
+ base = clean_transcript_text(asr_raw) or clean_transcript_text(legacy_clean)
264
+ if not base:
265
+ return ""
266
+
267
+ denoised = re.sub(r"\b(嗯|啊|呃|额|那个|这个|然后|就是)\b", " ", base)
268
+ denoised = re.sub(r"(嗯+|啊+|呃+)", " ", denoised)
269
+ denoised = re.sub(r"(就是就是|然后然后|这个这个|那个那个)", " ", denoised)
270
+ denoised = re.sub(r"\s+", " ", denoised).strip()
271
+
272
+ units = [clean_transcript_text(part) for part in re.split(r"[。!?!?;;\n]+", denoised)]
273
+ sentences = [_ensure_sentence_end(unit) for unit in units if unit]
274
+ if not sentences:
275
+ fallback = _ensure_sentence_end(denoised)
276
+ return fallback if fallback else ""
277
+
278
+ paragraphs: List[str] = []
279
+ bucket: List[str] = []
280
+ for sentence in sentences:
281
+ bucket.append(sentence)
282
+ if len(bucket) >= 3:
283
+ paragraphs.append("\n".join(bucket))
284
+ bucket = []
285
+
286
+ if bucket:
287
+ if len(bucket) == 1 and paragraphs:
288
+ paragraphs[-1] = f"{paragraphs[-1]}\n{bucket[0]}"
289
+ else:
290
+ paragraphs.append("\n".join(bucket))
291
+
292
+ return "\n\n".join(paragraphs)
190
293
 
191
294
 
192
295
  def extract_u2_task_metrics(payload: Any) -> Dict[str, Any]:
@@ -612,9 +715,12 @@ def poll_u2_task_core(
612
715
  max_polls: int,
613
716
  require_batch_complete: bool = False,
614
717
  expected_total: int = 0,
718
+ pending_timeout_sec: Optional[int] = None,
719
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
615
720
  ) -> Dict[str, Any]:
616
721
  trace = []
617
722
  last_request_id = None
723
+ started_at = time.perf_counter()
618
724
 
619
725
  last_status = "UNKNOWN"
620
726
  last_payload: Any = {}
@@ -656,6 +762,7 @@ def poll_u2_task_core(
656
762
  trace.append(
657
763
  {
658
764
  "attempt": attempt,
765
+ "elapsed_ms": int((time.perf_counter() - started_at) * 1000),
659
766
  "status_code": response.get("status_code"),
660
767
  "task_status": status,
661
768
  "platform_task_status": platform_status,
@@ -666,6 +773,26 @@ def poll_u2_task_core(
666
773
  }
667
774
  )
668
775
 
776
+ if progress_callback is not None:
777
+ try:
778
+ progress_callback(
779
+ {
780
+ "attempt": attempt,
781
+ "elapsed_ms": int((time.perf_counter() - started_at) * 1000),
782
+ "task_id": task_id,
783
+ "task_status": status or "UNKNOWN",
784
+ "platform_task_status": platform_status or "UNKNOWN",
785
+ "pending_count": pending_count,
786
+ "request_id": response.get("request_id"),
787
+ "status_code": response.get("status_code"),
788
+ "ok": bool(response.get("ok")),
789
+ "error_reason": response.get("error_reason"),
790
+ "batch_progress": batch_progress,
791
+ }
792
+ )
793
+ except Exception:
794
+ pass
795
+
669
796
  if not response.get("ok"):
670
797
  if attempt < max_polls:
671
798
  time.sleep(max(poll_interval_sec, 0.2))
@@ -689,6 +816,26 @@ def poll_u2_task_core(
689
816
  task_complete = status_terminal or platform_terminal
690
817
  batch_complete = bool(batch_progress.get("complete")) if require_batch_complete else task_complete
691
818
 
819
+ elapsed_sec = time.perf_counter() - started_at
820
+ if pending_timeout_sec and elapsed_sec >= max(float(pending_timeout_sec), 1.0):
821
+ timeout_reason = "u2_batch_pending_timeout" if require_batch_complete else "u2_pending_timeout"
822
+ return {
823
+ "ok": False,
824
+ "task_id": task_id,
825
+ "task_status": effective_status or last_status or "PENDING",
826
+ "platform_task_status": platform_status,
827
+ "pending_count": pending_count,
828
+ "request_id": last_request_id,
829
+ "error_reason": timeout_reason,
830
+ "transcript_text": "",
831
+ "raw_task": payload,
832
+ "task_metrics": last_metrics,
833
+ "batch_results": batch_results,
834
+ "batch_progress": batch_progress,
835
+ "batch_complete": bool(batch_complete),
836
+ "trace": trace,
837
+ }
838
+
692
839
  if require_batch_complete and not batch_complete:
693
840
  if attempt < max_polls:
694
841
  time.sleep(max(poll_interval_sec, 0.2))
@@ -797,6 +944,8 @@ def run_u2_asr_candidates_with_timeout_retry(
797
944
  max_polls: int,
798
945
  timeout_retry_enabled: bool = True,
799
946
  timeout_retry_max_retries: int = 3,
947
+ pending_timeout_sec: int = DEFAULT_U2_PENDING_TIMEOUT_SEC,
948
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
800
949
  ) -> Dict[str, Any]:
801
950
  normalized_candidates = normalize_media_candidates(candidates)
802
951
  attempts: List[Dict[str, Any]] = []
@@ -831,6 +980,8 @@ def run_u2_asr_candidates_with_timeout_retry(
831
980
  max_polls=max_polls,
832
981
  timeout_retry_enabled=timeout_retry_enabled,
833
982
  timeout_retry_max_retries=timeout_retry_max_retries,
983
+ pending_timeout_sec=pending_timeout_sec,
984
+ progress_callback=progress_callback,
834
985
  )
835
986
  poll_result = bundle.get("poll_result", {})
836
987
  error_reason = str(poll_result.get("error_reason") or "")
@@ -843,6 +994,7 @@ def run_u2_asr_candidates_with_timeout_retry(
843
994
  "ok": ok,
844
995
  "error_reason": error_reason,
845
996
  "task_status": poll_result.get("task_status"),
997
+ "u3_fallback": bundle.get("u3_fallback", {}),
846
998
  })
847
999
 
848
1000
  final_bundle = bundle
@@ -850,6 +1002,9 @@ def run_u2_asr_candidates_with_timeout_retry(
850
1002
  if ok:
851
1003
  break
852
1004
 
1005
+ if (bundle.get("u3_fallback") or {}).get("triggered"):
1006
+ break
1007
+
853
1008
  if error_reason == "INVALID_SOURCE_URL":
854
1009
  continue
855
1010
 
@@ -871,6 +1026,8 @@ def run_u2_asr_batch_with_timeout_retry(
871
1026
  max_polls: int,
872
1027
  timeout_retry_enabled: bool = True,
873
1028
  timeout_retry_max_retries: int = 3,
1029
+ pending_timeout_sec: int = DEFAULT_U2_PENDING_TIMEOUT_SEC,
1030
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
874
1031
  ) -> Dict[str, Any]:
875
1032
  normalized_urls = normalize_media_candidates(file_urls)
876
1033
  limited_urls = normalized_urls[:U2_BATCH_SUBMIT_HARD_LIMIT]
@@ -921,6 +1078,7 @@ def run_u2_asr_batch_with_timeout_retry(
921
1078
  }
922
1079
 
923
1080
  for round_index in range(1, max_rounds + 1):
1081
+ submit_started_at = time.perf_counter()
924
1082
  submit_bundle = submit_u2_asr_batch_with_retry(
925
1083
  base_url=base_url,
926
1084
  token=token,
@@ -928,12 +1086,16 @@ def run_u2_asr_batch_with_timeout_retry(
928
1086
  file_urls=limited_urls,
929
1087
  max_retries=submit_max_retries,
930
1088
  backoff_ms=submit_backoff_ms,
1089
+ progress_callback=progress_callback,
931
1090
  )
1091
+ submit_duration_ms = int((time.perf_counter() - submit_started_at) * 1000)
932
1092
  submit_response = submit_bundle.get("submit_response", {})
933
1093
  task_id = submit_bundle.get("task_id")
934
1094
 
935
1095
  poll_result: Dict[str, Any]
1096
+ poll_duration_ms = 0
936
1097
  if submit_response.get("ok") and task_id:
1098
+ poll_started_at = time.perf_counter()
937
1099
  poll_result = poll_u2_task_core(
938
1100
  base_url=base_url,
939
1101
  token=token,
@@ -943,7 +1105,10 @@ def run_u2_asr_batch_with_timeout_retry(
943
1105
  max_polls=max_polls,
944
1106
  require_batch_complete=True,
945
1107
  expected_total=len(limited_urls),
1108
+ pending_timeout_sec=pending_timeout_sec,
1109
+ progress_callback=progress_callback,
946
1110
  )
1111
+ poll_duration_ms = int((time.perf_counter() - poll_started_at) * 1000)
947
1112
  else:
948
1113
  poll_result = {
949
1114
  "ok": False,
@@ -975,6 +1140,7 @@ def run_u2_asr_batch_with_timeout_retry(
975
1140
  "error_reason": submit_response.get("error_reason"),
976
1141
  "retry_chain": submit_bundle.get("retry_chain", []),
977
1142
  "file_url_count": len(limited_urls),
1143
+ "duration_ms": submit_duration_ms,
978
1144
  },
979
1145
  "poll": {
980
1146
  "task_id": poll_result.get("task_id") or task_id,
@@ -986,6 +1152,7 @@ def run_u2_asr_batch_with_timeout_retry(
986
1152
  "task_metrics": poll_result.get("task_metrics", {}),
987
1153
  "batch_complete": bool(poll_result.get("batch_complete")),
988
1154
  "batch_progress": poll_result.get("batch_progress", {}),
1155
+ "duration_ms": poll_duration_ms,
989
1156
  },
990
1157
  }
991
1158
  )
@@ -993,7 +1160,7 @@ def run_u2_asr_batch_with_timeout_retry(
993
1160
  final_submit_bundle = submit_bundle
994
1161
  final_poll_result = poll_result
995
1162
 
996
- if poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"} and round_index < max_rounds:
1163
+ if poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout", "u2_batch_pending_timeout"} and round_index < max_rounds:
997
1164
  timeout_retry_triggered = True
998
1165
  timeout_retry_result = "retrying"
999
1166
  continue
@@ -1002,7 +1169,7 @@ def run_u2_asr_batch_with_timeout_retry(
1002
1169
 
1003
1170
  if final_poll_result.get("ok"):
1004
1171
  timeout_retry_result = "retry_succeeded" if timeout_retry_triggered else "not_needed"
1005
- elif final_poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout"}:
1172
+ elif final_poll_result.get("error_reason") in {"u2_poll_timeout", "u2_batch_incomplete_timeout", "u2_batch_pending_timeout"}:
1006
1173
  timeout_retry_result = "retry_timeout_exhausted" if timeout_retry_triggered else "timeout_no_retry"
1007
1174
  elif timeout_retry_triggered:
1008
1175
  timeout_retry_result = "retry_failed_non_timeout"
@@ -1071,6 +1238,8 @@ def run_u2_asr_batch_with_timeout_retry(
1071
1238
  "task_metrics": final_poll_result.get("task_metrics") if isinstance(final_poll_result.get("task_metrics"), dict) else extract_u2_task_metrics(raw_task_payload),
1072
1239
  "batch_progress": final_poll_result.get("batch_progress") if isinstance(final_poll_result.get("batch_progress"), dict) else build_u2_batch_progress(payload=raw_task_payload, expected_total=len(limited_urls)),
1073
1240
  "batch_complete": bool(final_poll_result.get("batch_complete")),
1241
+ "submit_duration_ms": _safe_int((rounds[-1].get("submit") if rounds else {}).get("duration_ms")),
1242
+ "poll_duration_ms": _safe_int((rounds[-1].get("poll") if rounds else {}).get("duration_ms")),
1074
1243
  }
1075
1244
 
1076
1245
 
@@ -1086,6 +1255,9 @@ def run_u2_asr_with_timeout_retry(
1086
1255
  max_polls: int,
1087
1256
  timeout_retry_enabled: bool = True,
1088
1257
  timeout_retry_max_retries: int = 3,
1258
+ pending_timeout_sec: int = DEFAULT_U2_PENDING_TIMEOUT_SEC,
1259
+ u3_fallback_enabled: bool = True,
1260
+ progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
1089
1261
  ) -> Dict[str, Any]:
1090
1262
  video_url = normalize_media_url(video_url)
1091
1263
  conservative_retries = max(0, min(3, int(timeout_retry_max_retries)))
@@ -1101,8 +1273,17 @@ def run_u2_asr_with_timeout_retry(
1101
1273
  }
1102
1274
  timeout_retry_triggered = False
1103
1275
  timeout_retry_result = "not_triggered"
1276
+ u3_fallback_bundle: Dict[str, Any] = {
1277
+ "enabled": bool(u3_fallback_enabled),
1278
+ "triggered": False,
1279
+ "ok": False,
1280
+ "result": "not_triggered",
1281
+ "public_url": "",
1282
+ "trace": [],
1283
+ }
1104
1284
 
1105
1285
  for round_index in range(1, max_rounds + 1):
1286
+ submit_started_at = time.perf_counter()
1106
1287
  submit_bundle = submit_u2_asr_with_retry(
1107
1288
  base_url=base_url,
1108
1289
  token=token,
@@ -1110,12 +1291,16 @@ def run_u2_asr_with_timeout_retry(
1110
1291
  video_url=video_url,
1111
1292
  max_retries=submit_max_retries,
1112
1293
  backoff_ms=submit_backoff_ms,
1294
+ progress_callback=progress_callback,
1113
1295
  )
1296
+ submit_duration_ms = int((time.perf_counter() - submit_started_at) * 1000)
1114
1297
  submit_response = submit_bundle.get("submit_response", {})
1115
1298
  task_id = submit_bundle.get("task_id")
1116
1299
 
1117
1300
  poll_result: Dict[str, Any]
1301
+ poll_duration_ms = 0
1118
1302
  if submit_response.get("ok") and task_id:
1303
+ poll_started_at = time.perf_counter()
1119
1304
  poll_result = poll_u2_task_core(
1120
1305
  base_url=base_url,
1121
1306
  token=token,
@@ -1123,7 +1308,10 @@ def run_u2_asr_with_timeout_retry(
1123
1308
  task_id=str(task_id),
1124
1309
  poll_interval_sec=poll_interval_sec,
1125
1310
  max_polls=max_polls,
1311
+ pending_timeout_sec=pending_timeout_sec,
1312
+ progress_callback=progress_callback,
1126
1313
  )
1314
+ poll_duration_ms = int((time.perf_counter() - poll_started_at) * 1000)
1127
1315
  else:
1128
1316
  poll_result = {
1129
1317
  "ok": False,
@@ -1145,6 +1333,7 @@ def run_u2_asr_with_timeout_retry(
1145
1333
  "ok": submit_response.get("ok"),
1146
1334
  "error_reason": submit_response.get("error_reason"),
1147
1335
  "retry_chain": submit_bundle.get("retry_chain", []),
1336
+ "duration_ms": submit_duration_ms,
1148
1337
  },
1149
1338
  "poll": {
1150
1339
  "task_id": poll_result.get("task_id") or task_id,
@@ -1153,6 +1342,7 @@ def run_u2_asr_with_timeout_retry(
1153
1342
  "ok": poll_result.get("ok"),
1154
1343
  "error_reason": poll_result.get("error_reason"),
1155
1344
  "attempts": len(poll_result.get("trace", [])),
1345
+ "duration_ms": poll_duration_ms,
1156
1346
  },
1157
1347
  }
1158
1348
  )
@@ -1160,6 +1350,56 @@ def run_u2_asr_with_timeout_retry(
1160
1350
  final_submit_bundle = submit_bundle
1161
1351
  final_poll_result = poll_result
1162
1352
 
1353
+ if (
1354
+ u3_fallback_enabled
1355
+ and poll_result.get("error_reason") in {"u2_pending_timeout", "u2_poll_timeout"}
1356
+ ):
1357
+ u3_fallback_bundle["triggered"] = True
1358
+ u3_result = run_u3_public_url_fallback(
1359
+ base_url=base_url,
1360
+ token=token,
1361
+ timeout_ms=timeout_ms,
1362
+ source_url=video_url,
1363
+ )
1364
+ u3_fallback_bundle.update(
1365
+ {
1366
+ "ok": bool(u3_result.get("ok")),
1367
+ "result": "u3_completed" if u3_result.get("ok") else "u3_failed",
1368
+ "public_url": u3_result.get("public_url") or "",
1369
+ "request_id": u3_result.get("request_id"),
1370
+ "error_reason": u3_result.get("error_reason"),
1371
+ "trace": u3_result.get("trace", []),
1372
+ }
1373
+ )
1374
+ if u3_result.get("ok") and u3_result.get("public_url"):
1375
+ retry_bundle = run_u2_asr_with_timeout_retry(
1376
+ base_url=base_url,
1377
+ token=token,
1378
+ timeout_ms=timeout_ms,
1379
+ video_url=str(u3_result.get("public_url")),
1380
+ submit_max_retries=submit_max_retries,
1381
+ submit_backoff_ms=submit_backoff_ms,
1382
+ poll_interval_sec=poll_interval_sec,
1383
+ max_polls=max_polls,
1384
+ timeout_retry_enabled=timeout_retry_enabled,
1385
+ timeout_retry_max_retries=timeout_retry_max_retries,
1386
+ pending_timeout_sec=pending_timeout_sec,
1387
+ u3_fallback_enabled=False,
1388
+ progress_callback=progress_callback,
1389
+ )
1390
+ retry_bundle["u3_fallback"] = u3_fallback_bundle
1391
+ retry_bundle["original_source_url"] = video_url
1392
+ return retry_bundle
1393
+ final_poll_result = {
1394
+ "ok": False,
1395
+ "task_id": poll_result.get("task_id") or task_id,
1396
+ "task_status": poll_result.get("task_status") or "UNKNOWN",
1397
+ "request_id": u3_result.get("request_id") or poll_result.get("request_id"),
1398
+ "error_reason": u3_result.get("error_reason") or "u3_fallback_failed",
1399
+ "trace": list(poll_result.get("trace", [])) + list(u3_result.get("trace", [])),
1400
+ }
1401
+ break
1402
+
1163
1403
  if poll_result.get("error_reason") == "u2_poll_timeout" and round_index < max_rounds:
1164
1404
  timeout_retry_triggered = True
1165
1405
  timeout_retry_result = "retrying"
@@ -1186,4 +1426,7 @@ def run_u2_asr_with_timeout_retry(
1186
1426
  "triggered": timeout_retry_triggered,
1187
1427
  "result": timeout_retry_result,
1188
1428
  },
1429
+ "u3_fallback": u3_fallback_bundle,
1430
+ "submit_duration_ms": _safe_int((rounds[-1].get("submit") if rounds else {}).get("duration_ms")),
1431
+ "poll_duration_ms": _safe_int((rounds[-1].get("poll") if rounds else {}).get("duration_ms")),
1189
1432
  }
@@ -0,0 +1,83 @@
1
+ #!/usr/bin/env python3
2
+ """Completeness evaluation helpers."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import hashlib
7
+ import time
8
+ from typing import Any, Dict, List, Optional
9
+
10
+
11
+ def normalize_missing_fields(items: Any) -> List[Dict[str, str]]:
12
+ normalized: List[Dict[str, str]] = []
13
+ seen = set()
14
+ if not isinstance(items, list):
15
+ return normalized
16
+ for item in items:
17
+ if isinstance(item, dict):
18
+ field = str(item.get("field") or "").strip()
19
+ reason = str(item.get("reason") or "").strip() or "missing"
20
+ else:
21
+ field = str(item or "").strip()
22
+ reason = "missing"
23
+ if not field:
24
+ continue
25
+ key = (field, reason)
26
+ if key in seen:
27
+ continue
28
+ seen.add(key)
29
+ normalized.append({"field": field, "reason": reason})
30
+ return normalized
31
+
32
+
33
+ def ensure_request_id(value: Optional[str], fallback_seed: str = "") -> str:
34
+ text = str(value or "").strip()
35
+ if text:
36
+ return text
37
+ digest = hashlib.sha1(fallback_seed.encode("utf-8")).hexdigest()[:8] if fallback_seed else "unknown"
38
+ return f"local-{int(time.time())}-{digest}"
39
+
40
+
41
+ def evaluate_work_fact_card(card: Dict[str, Any]) -> Dict[str, Any]:
42
+ missing = normalize_missing_fields(card.get("missing_fields"))
43
+ existing_fields = {entry["field"] for entry in missing}
44
+
45
+ required_pairs = [
46
+ ("platform", bool(str(card.get("platform") or "").strip())),
47
+ ("platform_work_id", bool(str(card.get("platform_work_id") or "").strip())),
48
+ ("platform_author_id", bool(str(card.get("platform_author_id") or "").strip())),
49
+ ("author_handle", bool(str(card.get("author_handle") or "").strip())),
50
+ ("title", bool(str(card.get("title") or "").strip())),
51
+ ("source_url", bool(str(card.get("source_url") or "").strip())),
52
+ ("share_url", bool(str(card.get("share_url") or "").strip())),
53
+ ]
54
+ for field, ok in required_pairs:
55
+ if ok or field in existing_fields:
56
+ continue
57
+ missing.append({"field": field, "reason": "missing_required"})
58
+ existing_fields.add(field)
59
+
60
+ primary_text = str(card.get("primary_text") or "").strip()
61
+ if not primary_text and "primary_text" not in existing_fields:
62
+ missing.append({"field": "primary_text", "reason": "missing_primary_text"})
63
+
64
+ required_missing = [entry for entry in missing if entry["reason"] == "missing_required"]
65
+ if required_missing:
66
+ completeness = "incomplete"
67
+ elif primary_text:
68
+ completeness = "complete"
69
+ else:
70
+ completeness = "partial"
71
+
72
+ return {
73
+ "completeness": completeness,
74
+ "missing_fields": missing,
75
+ }
76
+
77
+
78
+ def evaluate_collection(profile: Dict[str, Any], works: List[Dict[str, Any]]) -> str:
79
+ if not str(profile.get("platform_author_id") or "").strip():
80
+ return "incomplete"
81
+ if works:
82
+ return "complete"
83
+ return "partial"