@tikomni/skills 0.1.2 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (480) hide show
  1. package/.skill-package-allowlist.txt +1 -3
  2. package/README.md +41 -49
  3. package/README.zh-CN.md +43 -51
  4. package/bin/tikomni-skills.js +2 -2
  5. package/env.example +37 -56
  6. package/package.json +7 -3
  7. package/skills/social-media-crawl/SKILL.md +53 -0
  8. package/skills/social-media-crawl/agents/openai.yaml +5 -0
  9. package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
  10. package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
  11. package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
  12. package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
  13. package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
  14. package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
  15. package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
  16. package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
  17. package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
  18. package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
  19. package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
  20. package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
  21. package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
  22. package/skills/social-media-crawl/scripts/__init__.py +2 -0
  23. package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
  24. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
  25. package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
  26. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
  27. package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
  28. package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
  29. package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
  30. package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
  31. package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
  32. package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
  33. package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
  34. package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
  35. package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
  36. package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
  37. package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
  38. package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
  39. package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
  40. package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
  41. package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
  42. package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
  43. package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
  44. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
  45. package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
  46. package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
  47. package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
  48. package/skills/creator-analysis/SKILL.md +0 -95
  49. package/skills/creator-analysis/agents/openai.yaml +0 -4
  50. package/skills/creator-analysis/env.example +0 -36
  51. package/skills/creator-analysis/references/api-capability-index.md +0 -92
  52. package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
  53. package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  54. package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  55. package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
  56. package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  57. package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  58. package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  59. package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  60. package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  61. package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  62. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  63. package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  64. package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
  65. package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  66. package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  67. package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  68. package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  69. package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
  70. package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  71. package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  72. package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  73. package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  74. package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
  75. package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  76. package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  77. package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
  78. package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
  79. package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
  80. package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  81. package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  82. package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  83. package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  84. package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  85. package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  86. package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  87. package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  88. package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  89. package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  90. package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  91. package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
  92. package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  93. package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  94. package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  95. package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
  96. package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  97. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  98. package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  99. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  100. package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  101. package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  102. package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  103. package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  104. package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  105. package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
  106. package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
  107. package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  108. package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
  109. package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  110. package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  111. package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
  112. package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  113. package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
  114. package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
  115. package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  116. package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  117. package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
  118. package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
  119. package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  120. package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  121. package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  122. package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
  123. package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  124. package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  125. package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
  126. package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  127. package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
  128. package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
  129. package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
  130. package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
  131. package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
  132. package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
  133. package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  134. package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
  135. package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  136. package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  137. package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  138. package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  139. package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  140. package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  141. package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  142. package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
  143. package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
  144. package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
  145. package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
  146. package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  147. package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
  148. package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
  149. package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  150. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  151. package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  152. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  153. package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  154. package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  155. package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
  156. package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  157. package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  158. package/skills/creator-analysis/references/asr-orchestration.md +0 -33
  159. package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
  160. package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
  161. package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
  162. package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
  163. package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
  164. package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
  165. package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
  166. package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
  167. package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
  168. package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
  169. package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
  170. package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
  171. package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
  172. package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
  173. package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
  174. package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
  175. package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
  176. package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
  177. package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
  178. package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
  179. package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  180. package/skills/creator-analysis/references/workflow.md +0 -23
  181. package/skills/creator-analysis/scripts/__init__.py +0 -0
  182. package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
  183. package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
  184. package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
  185. package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
  186. package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
  187. package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
  188. package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
  189. package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
  190. package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
  191. package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
  192. package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
  193. package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
  194. package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
  195. package/skills/creator-analysis/scripts/core/__init__.py +0 -0
  196. package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
  197. package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
  198. package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
  199. package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
  200. package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
  201. package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
  202. package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
  203. package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
  204. package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
  205. package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  206. package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
  207. package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
  208. package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
  209. package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
  210. package/skills/meta-capability/SKILL.md +0 -69
  211. package/skills/meta-capability/agents/openai.yaml +0 -4
  212. package/skills/meta-capability/env.example +0 -42
  213. package/skills/meta-capability/references/api-capability-index.md +0 -92
  214. package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
  215. package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
  216. package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
  217. package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
  218. package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
  219. package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
  220. package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
  221. package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  222. package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
  223. package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
  224. package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
  225. package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  226. package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
  227. package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
  228. package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
  229. package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
  230. package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
  231. package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
  232. package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
  233. package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
  234. package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
  235. package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
  236. package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
  237. package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
  238. package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
  239. package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
  240. package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
  241. package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
  242. package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
  243. package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
  244. package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
  245. package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
  246. package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  247. package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
  248. package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
  249. package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  250. package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
  251. package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
  252. package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
  253. package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
  254. package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
  255. package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  256. package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
  257. package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
  258. package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
  259. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  260. package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  261. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  262. package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  263. package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
  264. package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
  265. package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
  266. package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
  267. package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
  268. package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
  269. package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
  270. package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
  271. package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
  272. package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
  273. package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
  274. package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
  275. package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
  276. package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
  277. package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
  278. package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  279. package/skills/meta-capability/references/api-tags/health-check.md +0 -40
  280. package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
  281. package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
  282. package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
  283. package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
  284. package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
  285. package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
  286. package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
  287. package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
  288. package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
  289. package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
  290. package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
  291. package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
  292. package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
  293. package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
  294. package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
  295. package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
  296. package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
  297. package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
  298. package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
  299. package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
  300. package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
  301. package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
  302. package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
  303. package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
  304. package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
  305. package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
  306. package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
  307. package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
  308. package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
  309. package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
  310. package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
  311. package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
  312. package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
  313. package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  314. package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
  315. package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  316. package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
  317. package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
  318. package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
  319. package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
  320. package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
  321. package/skills/meta-capability/references/dispatch.md +0 -27
  322. package/skills/meta-capability/references/execution-guidelines.md +0 -25
  323. package/skills/meta-capability/references/implemented-route-map.md +0 -177
  324. package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
  325. package/skills/meta-capability/scripts/__init__.py +0 -1
  326. package/skills/meta-capability/scripts/call_route.py +0 -141
  327. package/skills/meta-capability/scripts/core/__init__.py +0 -1
  328. package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
  329. package/skills/meta-capability/scripts/core/config_loader.py +0 -204
  330. package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
  331. package/skills/meta-capability/scripts/test_auth.py +0 -98
  332. package/skills/single-work-analysis/SKILL.md +0 -62
  333. package/skills/single-work-analysis/agents/openai.yaml +0 -4
  334. package/skills/single-work-analysis/env.example +0 -36
  335. package/skills/single-work-analysis/references/api-capability-index.md +0 -92
  336. package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
  337. package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
  338. package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
  339. package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
  340. package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
  341. package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
  342. package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
  343. package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
  344. package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
  345. package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
  346. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
  347. package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
  348. package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
  349. package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
  350. package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
  351. package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
  352. package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
  353. package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
  354. package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
  355. package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
  356. package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
  357. package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
  358. package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
  359. package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
  360. package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
  361. package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
  362. package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
  363. package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
  364. package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
  365. package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
  366. package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
  367. package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
  368. package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
  369. package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
  370. package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
  371. package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
  372. package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
  373. package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
  374. package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
  375. package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
  376. package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
  377. package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
  378. package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
  379. package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
  380. package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
  381. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
  382. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
  383. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
  384. package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
  385. package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
  386. package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
  387. package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
  388. package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
  389. package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
  390. package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
  391. package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
  392. package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
  393. package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
  394. package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
  395. package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
  396. package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
  397. package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
  398. package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
  399. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
  400. package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
  401. package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
  402. package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
  403. package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
  404. package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
  405. package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
  406. package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
  407. package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
  408. package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
  409. package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
  410. package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
  411. package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
  412. package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
  413. package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
  414. package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
  415. package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
  416. package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
  417. package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
  418. package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
  419. package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
  420. package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
  421. package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
  422. package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
  423. package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
  424. package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
  425. package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
  426. package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
  427. package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
  428. package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
  429. package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
  430. package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
  431. package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
  432. package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
  433. package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
  434. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
  435. package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
  436. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
  437. package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
  438. package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
  439. package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
  440. package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
  441. package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
  442. package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
  443. package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
  444. package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
  445. package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
  446. package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
  447. package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
  448. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  449. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  450. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  451. package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
  452. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  453. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  454. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  455. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
  456. package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
  457. package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
  458. package/skills/single-work-analysis/scripts/__init__.py +0 -0
  459. package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
  460. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
  461. package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
  462. package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
  463. package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
  464. package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
  465. package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
  466. package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
  467. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
  468. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
  469. package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
  470. package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
  471. package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
  472. package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
  473. package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
  474. package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
  475. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
  476. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
  477. /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
  478. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
  479. /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
  480. /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
@@ -20,16 +20,16 @@ import argparse
20
20
  import hashlib
21
21
  import json
22
22
  import re
23
+ import time
23
24
  import urllib.parse
24
25
  import urllib.request
25
26
  from datetime import datetime
26
27
  from pathlib import Path
27
28
  from typing import Any, Dict, List, Optional, Tuple
28
29
 
29
- from scripts.pipeline.asr.asr_pipeline import run_u2_asr_candidates_with_timeout_retry
30
- from scripts.core.config_loader import config_get, load_tikomni_config, resolve_storage_paths
31
- from scripts.core.progress_report import ProgressReporter
32
- from scripts.core.storage_router import render_output_filename, resolve_json_filename_pattern
30
+ from scripts.core.asr_pipeline import derive_asr_clean_text, run_u2_asr_candidates_with_timeout_retry
31
+ from scripts.core.config_loader import config_get, load_tikomni_config
32
+ from scripts.core.progress_report import ProgressReporter, build_progress_reporter
33
33
  from scripts.core.extract_pipeline import build_api_trace, resolve_trace_error_context
34
34
  from scripts.core.tikomni_common import (
35
35
  call_json_api,
@@ -40,7 +40,11 @@ from scripts.core.tikomni_common import (
40
40
  summarize_content,
41
41
  write_json_stdout,
42
42
  )
43
- from scripts.writers.write_benchmark_card import write_benchmark_card
43
+ from scripts.writers.write_work_fact_card import (
44
+ build_work_output_envelope,
45
+ persist_output_envelope,
46
+ write_work_fact_card,
47
+ )
44
48
 
45
49
  APP_V2_VIDEO_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_video_note_detail"
46
50
  APP_V2_IMAGE_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_image_note_detail"
@@ -49,6 +53,7 @@ APP_V1_ENDPOINT = "/api/u1/v1/xiaohongshu/app/get_note_info"
49
53
  WEB_V2_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v2"
50
54
  WEB_V2_V3_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v3"
51
55
  WEB_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v7"
56
+ U2_REQUEST_TIMEOUT_CAP_MS = 15000
52
57
  U2_GATE_MIN_DURATION_MS = 13000
53
58
  U2_GATE_MAX_DURATION_MS = 1800000
54
59
  U2_GATE_RULE = "is_video && 13000<duration_ms<=1800000 && video_download_url_present"
@@ -80,6 +85,43 @@ def _to_int_or_none(value: Any) -> Optional[int]:
80
85
  return None
81
86
 
82
87
 
88
+ def _resolve_u2_timeout_ms(timeout_ms: Any) -> int:
89
+ parsed = _to_int_or_none(timeout_ms)
90
+ if parsed is None or parsed <= 0:
91
+ return U2_REQUEST_TIMEOUT_CAP_MS
92
+ return max(5000, min(parsed, U2_REQUEST_TIMEOUT_CAP_MS))
93
+
94
+
95
+ def _report_u2_progress(progress: Optional[ProgressReporter], *, stage: str, event: Dict[str, Any], label: str) -> None:
96
+ if progress is None:
97
+ return
98
+
99
+ phase = normalize_text(event.get("phase")).lower()
100
+ state = normalize_text(event.get("state")).lower()
101
+ payload = {
102
+ "phase": phase or "poll",
103
+ "state": state or "",
104
+ "task_id": event.get("task_id"),
105
+ "attempt": event.get("attempt"),
106
+ "task_status": event.get("task_status"),
107
+ "platform_task_status": event.get("platform_task_status"),
108
+ "pending_count": event.get("pending_count"),
109
+ "status_code": event.get("status_code"),
110
+ "batch_progress": event.get("batch_progress"),
111
+ "wait_ms": event.get("wait_ms"),
112
+ "candidate_count": event.get("candidate_count"),
113
+ "ok": event.get("ok"),
114
+ "error_reason": event.get("error_reason"),
115
+ "retriable": event.get("retriable"),
116
+ "request_id": event.get("request_id"),
117
+ }
118
+ message = f"{label} u2 {phase or 'poll'} {state or 'progress'}"
119
+ if phase == "submit" and state == "heartbeat":
120
+ progress.heartbeat(stage=stage, message=message, data=payload)
121
+ return
122
+ progress.progress(stage=stage, message=message, data=payload)
123
+
124
+
83
125
  def _evaluate_u2_gate_for_xhs(*, note_content_type: str, duration_ms: Any, video_down_url: Optional[str]) -> Dict[str, Any]:
84
126
  content_type = normalize_text(note_content_type).lower()
85
127
  is_video = content_type in {"video", "mixed"}
@@ -128,106 +170,6 @@ def _traceable_identifier(source_input: Dict[str, Optional[str]], note_id: Optio
128
170
  return f"url-{digest}"
129
171
 
130
172
 
131
- def _build_persist_payload(
132
- *,
133
- result: Dict[str, Any],
134
- source_input: Dict[str, Optional[str]],
135
- note_id: Optional[str],
136
- status: str,
137
- written_at: datetime,
138
- ) -> Dict[str, Any]:
139
- summary = {
140
- "summary": result.get("summary", ""),
141
- "insights": result.get("insights", []),
142
- "confidence": result.get("confidence"),
143
- "error_reason": result.get("error_reason"),
144
- }
145
- normalized = {
146
- "platform": "xiaohongshu",
147
- "content_kind": result.get("content_kind", "note"),
148
- "note_id": result.get("note_id") or note_id,
149
- "note_content_type": result.get("note_content_type"),
150
- "text_source": result.get("text_source"),
151
- "request_id": result.get("request_id"),
152
- "source": source_input,
153
- }
154
- return {
155
- "meta": {
156
- "written_at": written_at.isoformat(timespec="seconds"),
157
- "status": status,
158
- "platform": "xiaohongshu",
159
- "identifier": _traceable_identifier(source_input, note_id),
160
- },
161
- "summary": summary,
162
- "normalized": normalized,
163
- "raw": result,
164
- }
165
-
166
-
167
- def _persist_output_artifact(
168
- *,
169
- result: Dict[str, Any],
170
- source_input: Dict[str, Optional[str]],
171
- note_id: Optional[str],
172
- storage_config: Optional[Dict[str, Any]],
173
- persist_output: bool,
174
- ) -> Dict[str, Any]:
175
- if not persist_output:
176
- return {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
177
-
178
- try:
179
- paths = resolve_storage_paths(storage_config or {})
180
- except Exception as error:
181
- return {"enabled": True, "ok": False, "error": f"resolve_storage_paths_failed:{error}"}
182
-
183
- now = datetime.now()
184
- date_key = now.strftime("%Y%m%d")
185
- timestamp = now.strftime("%Y%m%dT%H%M%S")
186
- identifier = _traceable_identifier(source_input, note_id)
187
- has_error = bool(result.get("error_reason"))
188
- status = "error" if has_error else "success"
189
-
190
- if has_error:
191
- target_dir = Path(paths.get("errors_root", "")) / date_key
192
- else:
193
- target_dir = Path(paths.get("results_root", "")) / date_key
194
-
195
- target_dir.mkdir(parents=True, exist_ok=True)
196
- file_name = render_output_filename(
197
- pattern=resolve_json_filename_pattern(storage_config),
198
- context={
199
- "prefix": status,
200
- "platform": "xiaohongshu",
201
- "card_type": "single_work_result",
202
- "author_slug": identifier,
203
- "title_slug": identifier,
204
- "identifier": identifier,
205
- "timestamp": timestamp,
206
- "date": date_key,
207
- "ext": ".json",
208
- },
209
- default_filename=f"{timestamp}-xiaohongshu-{identifier}.json",
210
- default_ext=".json",
211
- )
212
- file_path = target_dir / file_name
213
-
214
- payload = _build_persist_payload(
215
- result=result,
216
- source_input=source_input,
217
- note_id=note_id,
218
- status=status,
219
- written_at=now,
220
- )
221
- file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
222
-
223
- return {
224
- "enabled": True,
225
- "ok": True,
226
- "status": status,
227
- "path": str(file_path),
228
- }
229
-
230
-
231
173
  def _finalize_result(
232
174
  *,
233
175
  result: Dict[str, Any],
@@ -236,14 +178,19 @@ def _finalize_result(
236
178
  storage_config: Optional[Dict[str, Any]],
237
179
  persist_output: bool,
238
180
  ) -> Dict[str, Any]:
239
- result["output_persist"] = _persist_output_artifact(
240
- result=result,
241
- source_input=source_input,
242
- note_id=note_id,
181
+ envelope = build_work_output_envelope(result, platform="xiaohongshu")
182
+ if "card_write" in result:
183
+ envelope["card_write"] = result.get("card_write")
184
+ if not persist_output:
185
+ envelope["output_persist"] = {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
186
+ return envelope
187
+ envelope["output_persist"] = persist_output_envelope(
188
+ envelope=envelope,
243
189
  storage_config=storage_config,
244
- persist_output=persist_output,
190
+ platform="xiaohongshu",
191
+ fallback_identifier=note_id or _traceable_identifier(source_input, note_id),
245
192
  )
246
- return result
193
+ return envelope
247
194
 
248
195
 
249
196
  def _normalize_input(input_value: Optional[str], share_text: Optional[str], note_id: Optional[str]) -> Dict[str, Optional[str]]:
@@ -760,6 +707,70 @@ def _append_missing_metadata_fields(missing_fields: List[Dict[str, str]], metada
760
707
  _append(key)
761
708
 
762
709
 
710
+ def _empty_timings() -> Dict[str, int]:
711
+ return {
712
+ "url_parse_ms": 0,
713
+ "u1_total_ms": 0,
714
+ "u2_submit_ms": 0,
715
+ "u2_poll_ms": 0,
716
+ "card_write_ms": 0,
717
+ "llm_analysis_ms": 0,
718
+ "total_ms": 0,
719
+ }
720
+
721
+
722
+ def _elapsed_ms(started_at: float) -> int:
723
+ return int((time.perf_counter() - started_at) * 1000)
724
+
725
+
726
+ def _http_summary_for_note(response: Dict[str, Any], source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
727
+ completeness = response.get("_field_completeness") if isinstance(response.get("_field_completeness"), dict) else {}
728
+ payload = response.get("data")
729
+ metadata = _extract_xhs_metadata(
730
+ payload=payload,
731
+ source_input=source_input,
732
+ selected_video_url=None,
733
+ selected_image_urls=[],
734
+ ) if response.get("ok") else {}
735
+ return {
736
+ "note_id": normalize_text(metadata.get("note_id")) or normalize_text(source_input.get("note_id")),
737
+ "title_hit": bool(normalize_text(metadata.get("title"))),
738
+ "author_hit": bool(normalize_text(metadata.get("author"))),
739
+ "media_present": bool(normalize_text(metadata.get("video_down_url")) or metadata.get("cover_image")),
740
+ "filled_count": completeness.get("filled_count"),
741
+ "ratio": completeness.get("ratio"),
742
+ }
743
+
744
+
745
+ def _emit_http_progress(
746
+ progress: Optional[ProgressReporter],
747
+ *,
748
+ stage: str,
749
+ response: Dict[str, Any],
750
+ route_label: str,
751
+ source_input: Dict[str, Optional[str]],
752
+ ) -> None:
753
+ if progress is None:
754
+ return
755
+ progress.http_event(
756
+ stage=stage,
757
+ endpoint=str(response.get("_endpoint") or route_label),
758
+ response=response,
759
+ route_label=route_label,
760
+ summary=_http_summary_for_note(response, source_input),
761
+ )
762
+
763
+
764
+ def _update_pipeline_status(result: Dict[str, Any]) -> None:
765
+ card_write = result.get("card_write") if isinstance(result.get("card_write"), dict) else {}
766
+ deep_analysis = result.get("deep_analysis") if isinstance(result.get("deep_analysis"), dict) else {}
767
+ result["pipeline_status"] = {
768
+ "facts_ready": True,
769
+ "card_ready": bool(card_write.get("ok")),
770
+ "deep_analysis": deep_analysis.get("status") or "skipped",
771
+ }
772
+
773
+
763
774
  def _fetch_sparse_metadata_enrich(
764
775
  *,
765
776
  base_url: str,
@@ -767,6 +778,7 @@ def _fetch_sparse_metadata_enrich(
767
778
  timeout_ms: int,
768
779
  source_input: Dict[str, Optional[str]],
769
780
  note_id: Optional[str],
781
+ progress: Optional[ProgressReporter] = None,
770
782
  ) -> Dict[str, Any]:
771
783
  share_text = source_input.get("share_text")
772
784
  resolved_note_id = note_id or source_input.get("note_id") or _extract_note_id_from_share(share_text)
@@ -782,6 +794,7 @@ def _fetch_sparse_metadata_enrich(
782
794
  )
783
795
  response["_endpoint"] = WEB_V2_V3_ENDPOINT
784
796
  response["_route_label"] = "web_v2_v3_sparse_enrich"
797
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v3_sparse_enrich", source_input=source_input)
785
798
  return response
786
799
 
787
800
  if resolved_note_id:
@@ -795,6 +808,7 @@ def _fetch_sparse_metadata_enrich(
795
808
  )
796
809
  response["_endpoint"] = WEB_V2_V2_ENDPOINT
797
810
  response["_route_label"] = "web_v2_v2_sparse_enrich"
811
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v2_sparse_enrich", source_input=source_input)
798
812
  return response
799
813
 
800
814
  return {
@@ -805,7 +819,14 @@ def _fetch_sparse_metadata_enrich(
805
819
  }
806
820
 
807
821
 
808
- def _fetch_note_info(*, base_url: str, token: str, timeout_ms: int, source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
822
+ def _fetch_note_info(
823
+ *,
824
+ base_url: str,
825
+ token: str,
826
+ timeout_ms: int,
827
+ source_input: Dict[str, Optional[str]],
828
+ progress: Optional[ProgressReporter] = None,
829
+ ) -> Dict[str, Any]:
809
830
  attempts: List[Dict[str, Any]] = []
810
831
 
811
832
  share_text = source_input.get("share_text")
@@ -832,6 +853,7 @@ def _fetch_note_info(*, base_url: str, token: str, timeout_ms: int, source_input
832
853
  "missing_core": ["note_id", "title_or_desc", "media"],
833
854
  "core_ready": False,
834
855
  }
856
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label=label, source_input=source_input)
835
857
  attempts.append({"label": label, "endpoint": path, "response": response})
836
858
  return response
837
859
 
@@ -1323,6 +1345,7 @@ def _build_result(
1323
1345
  missing_fields: Optional[List[Dict[str, str]]] = None,
1324
1346
  metadata_fields: Optional[Dict[str, Any]] = None,
1325
1347
  asr_source: Optional[str] = None,
1348
+ timings: Optional[Dict[str, int]] = None,
1326
1349
  ) -> Dict[str, Any]:
1327
1350
  metadata = metadata_fields or {}
1328
1351
  summary_block = summarize_content(raw_content, source=f"xiaohongshu:{text_source}")
@@ -1344,7 +1367,8 @@ def _build_result(
1344
1367
 
1345
1368
  work_modality = "video" if normalize_text(note_content_type).lower() in {"video", "mixed"} else "text"
1346
1369
  caption_raw = normalize_text(metadata.get("caption_raw"))
1347
- primary_text = raw_content if work_modality == "video" else (caption_raw or raw_content)
1370
+ asr_clean = derive_asr_clean_text(raw_content)
1371
+ primary_text = asr_clean if work_modality == "video" else (caption_raw or raw_content)
1348
1372
  primary_text_source = "asr_clean" if work_modality == "video" else "caption_raw"
1349
1373
  analysis_eligibility = "eligible" if primary_text else "incomplete"
1350
1374
  analysis_exclusion_reason = "" if analysis_eligibility == "eligible" else ("video_asr_unavailable" if work_modality == "video" else "caption_raw_missing")
@@ -1389,6 +1413,8 @@ def _build_result(
1389
1413
  "xhs_sec_token": metadata.get("xhs_sec_token"),
1390
1414
  "downloaded_assets": downloaded_assets,
1391
1415
  "raw_content": raw_content,
1416
+ "asr_raw": raw_content,
1417
+ "asr_clean": asr_clean,
1392
1418
  "primary_text": primary_text,
1393
1419
  "primary_text_source": primary_text_source,
1394
1420
  "analysis_eligibility": analysis_eligibility,
@@ -1401,6 +1427,7 @@ def _build_result(
1401
1427
  "extract_trace": extract_trace,
1402
1428
  "fallback_trace": fallback_trace,
1403
1429
  "request_id": request_id,
1430
+ "timings": dict(timings or {}),
1404
1431
  }
1405
1432
 
1406
1433
 
@@ -1421,6 +1448,7 @@ def run_xiaohongshu_extract(
1421
1448
  u2_timeout_retry_max_retries: int,
1422
1449
  force_u2_fallback: bool,
1423
1450
  write_card: bool,
1451
+ analysis_mode: str,
1424
1452
  card_type: str,
1425
1453
  card_root: Optional[str],
1426
1454
  storage_config: Optional[Dict[str, Any]] = None,
@@ -1428,14 +1456,17 @@ def run_xiaohongshu_extract(
1428
1456
  persist_output: bool = True,
1429
1457
  progress: Optional[ProgressReporter] = None,
1430
1458
  ) -> Dict[str, Any]:
1431
- if not write_card or not persist_output:
1432
- raise ValueError(
1433
- f"fixed_pipeline_requires_full_persistence:xiaohongshu:note:write_card={bool(write_card)}:persist_output={bool(persist_output)}"
1434
- )
1435
-
1459
+ workflow_started_at = time.perf_counter()
1460
+ timings = _empty_timings()
1461
+ parse_started_at = time.perf_counter()
1436
1462
  source_input = _normalize_input(input_value, share_text, note_id)
1463
+ timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
1437
1464
  if progress is not None:
1438
- progress.started(stage="note.workflow", message="xiaohongshu note workflow started")
1465
+ progress.started(
1466
+ stage="note.workflow",
1467
+ message="xiaohongshu note workflow started",
1468
+ data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
1469
+ )
1439
1470
  metadata_fields: Dict[str, Any] = {}
1440
1471
  if not source_input["share_text"] and not source_input["note_id"]:
1441
1472
  result = _build_result(
@@ -1452,23 +1483,32 @@ def run_xiaohongshu_extract(
1452
1483
  u2_task_id=None,
1453
1484
  u2_task_status="UNKNOWN",
1454
1485
  note_content_type="unknown",
1455
- analysis_mode="none",
1486
+ analysis_mode=analysis_mode,
1456
1487
  selected_video_url=None,
1457
1488
  selected_video_candidates=[],
1458
1489
  selected_image_urls=[],
1459
1490
  downloaded_assets=[],
1460
1491
  missing_fields=[{"field": "share_text_or_note_id", "reason": "missing_input"}],
1461
1492
  metadata_fields=metadata_fields,
1493
+ timings=timings,
1462
1494
  )
1463
1495
  if write_card:
1464
- result["card_write"] = write_benchmark_card(
1496
+ card_started_at = time.perf_counter()
1497
+ result["card_write"] = write_work_fact_card(
1465
1498
  payload=result,
1466
1499
  platform="xiaohongshu",
1467
1500
  card_type=card_type,
1468
1501
  card_root=card_root,
1469
1502
  content_kind="note",
1470
1503
  storage_config=storage_config,
1504
+ analysis_mode=analysis_mode,
1505
+ progress=progress.child(scope="card_write") if progress is not None else None,
1471
1506
  )
1507
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1508
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1509
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1510
+ result["timings"] = dict(timings)
1511
+ _update_pipeline_status(result)
1472
1512
  return _finalize_result(
1473
1513
  result=result,
1474
1514
  source_input=source_input,
@@ -1487,6 +1527,7 @@ def run_xiaohongshu_extract(
1487
1527
 
1488
1528
  trace: List[Dict[str, Any]] = []
1489
1529
 
1530
+ u1_started_at = time.perf_counter()
1490
1531
  if progress is not None:
1491
1532
  progress.progress(stage="note.fetch", message="fetching xiaohongshu note payload")
1492
1533
  note_response = _fetch_note_info(
@@ -1494,7 +1535,9 @@ def run_xiaohongshu_extract(
1494
1535
  token=runtime["token"],
1495
1536
  timeout_ms=runtime["timeout_ms"],
1496
1537
  source_input=source_input,
1538
+ progress=progress,
1497
1539
  )
1540
+ timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
1498
1541
 
1499
1542
  attempts = note_response.get("_attempts") or []
1500
1543
  for index, attempt in enumerate(attempts, start=1):
@@ -1548,23 +1591,32 @@ def run_xiaohongshu_extract(
1548
1591
  u2_task_id=None,
1549
1592
  u2_task_status="UNKNOWN",
1550
1593
  note_content_type="unknown",
1551
- analysis_mode="none",
1594
+ analysis_mode=analysis_mode,
1552
1595
  selected_video_url=None,
1553
1596
  selected_video_candidates=[],
1554
1597
  selected_image_urls=[],
1555
1598
  downloaded_assets=[],
1556
1599
  missing_fields=[{"field": "u1_note_info", "reason": "all_routes_failed"}],
1557
1600
  metadata_fields=metadata_fields,
1601
+ timings=timings,
1558
1602
  )
1559
1603
  if write_card:
1560
- result["card_write"] = write_benchmark_card(
1604
+ card_started_at = time.perf_counter()
1605
+ result["card_write"] = write_work_fact_card(
1561
1606
  payload=result,
1562
1607
  platform="xiaohongshu",
1563
1608
  card_type=card_type,
1564
1609
  card_root=card_root,
1565
1610
  content_kind="note",
1566
1611
  storage_config=storage_config,
1612
+ analysis_mode=analysis_mode,
1613
+ progress=progress.child(scope="card_write") if progress is not None else None,
1567
1614
  )
1615
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1616
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1617
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1618
+ result["timings"] = dict(timings)
1619
+ _update_pipeline_status(result)
1568
1620
  return _finalize_result(
1569
1621
  result=result,
1570
1622
  source_input=source_input,
@@ -1589,13 +1641,16 @@ def run_xiaohongshu_extract(
1589
1641
  enrich_payload: Any = None
1590
1642
 
1591
1643
  if sparse_metadata_detected:
1644
+ enrich_started_at = time.perf_counter()
1592
1645
  enrich_response = _fetch_sparse_metadata_enrich(
1593
1646
  base_url=runtime["base_url"],
1594
1647
  token=runtime["token"],
1595
1648
  timeout_ms=runtime["timeout_ms"],
1596
1649
  source_input=source_input,
1597
1650
  note_id=source_input.get("note_id"),
1651
+ progress=progress,
1598
1652
  )
1653
+ timings["u1_total_ms"] += _elapsed_ms(enrich_started_at)
1599
1654
  trace.append(
1600
1655
  build_api_trace(
1601
1656
  step="u1_sparse_metadata_enrich",
@@ -1710,23 +1765,32 @@ def run_xiaohongshu_extract(
1710
1765
  u2_task_id=None,
1711
1766
  u2_task_status="SKIPPED",
1712
1767
  note_content_type=note_content_type,
1713
- analysis_mode="video_full",
1768
+ analysis_mode=analysis_mode,
1714
1769
  selected_video_url=selected_video_url,
1715
1770
  selected_video_candidates=video_candidates,
1716
1771
  selected_image_urls=image_candidates,
1717
1772
  downloaded_assets=[],
1718
1773
  missing_fields=missing_fields,
1719
1774
  metadata_fields=metadata_fields,
1775
+ timings=timings,
1720
1776
  )
1721
1777
  if write_card:
1722
- result["card_write"] = write_benchmark_card(
1778
+ card_started_at = time.perf_counter()
1779
+ result["card_write"] = write_work_fact_card(
1723
1780
  payload=result,
1724
1781
  platform="xiaohongshu",
1725
1782
  card_type=card_type,
1726
1783
  card_root=card_root,
1727
1784
  content_kind="single_video",
1728
1785
  storage_config=storage_config,
1786
+ analysis_mode=analysis_mode,
1787
+ progress=progress.child(scope="card_write") if progress is not None else None,
1729
1788
  )
1789
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1790
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1791
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1792
+ result["timings"] = dict(timings)
1793
+ _update_pipeline_status(result)
1730
1794
  return _finalize_result(
1731
1795
  result=result,
1732
1796
  source_input=source_input,
@@ -1766,23 +1830,32 @@ def run_xiaohongshu_extract(
1766
1830
  u2_task_id=None,
1767
1831
  u2_task_status="SKIPPED",
1768
1832
  note_content_type=note_content_type,
1769
- analysis_mode="video_full",
1833
+ analysis_mode=analysis_mode,
1770
1834
  selected_video_url=u2_gate.get("video_down_url") or selected_video_url,
1771
1835
  selected_video_candidates=video_candidates,
1772
1836
  selected_image_urls=image_candidates,
1773
1837
  downloaded_assets=[],
1774
1838
  missing_fields=missing_fields,
1775
1839
  metadata_fields=metadata_fields,
1840
+ timings=timings,
1776
1841
  )
1777
1842
  if write_card:
1778
- result["card_write"] = write_benchmark_card(
1843
+ card_started_at = time.perf_counter()
1844
+ result["card_write"] = write_work_fact_card(
1779
1845
  payload=result,
1780
1846
  platform="xiaohongshu",
1781
1847
  card_type=card_type,
1782
1848
  card_root=card_root,
1783
1849
  content_kind="single_video",
1784
1850
  storage_config=storage_config,
1851
+ analysis_mode=analysis_mode,
1852
+ progress=progress.child(scope="card_write") if progress is not None else None,
1785
1853
  )
1854
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1855
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1856
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1857
+ result["timings"] = dict(timings)
1858
+ _update_pipeline_status(result)
1786
1859
  return _finalize_result(
1787
1860
  result=result,
1788
1861
  source_input=source_input,
@@ -1792,16 +1865,18 @@ def run_xiaohongshu_extract(
1792
1865
  )
1793
1866
 
1794
1867
  u2_candidates = _dedupe_keep_order([u2_gate.get("video_down_url")] + list(video_candidates))
1868
+ u2_timeout_ms = _resolve_u2_timeout_ms(runtime["timeout_ms"])
1795
1869
  if progress is not None:
1796
1870
  progress.progress(
1797
1871
  stage="note.u2",
1798
1872
  message="starting xiaohongshu u2 flow",
1799
- data={"candidate_count": len(u2_candidates)},
1873
+ data={"candidate_count": len(u2_candidates), "timeout_ms": u2_timeout_ms},
1800
1874
  )
1875
+ u2_started_at = time.perf_counter()
1801
1876
  u2_bundle = run_u2_asr_candidates_with_timeout_retry(
1802
1877
  base_url=runtime["base_url"],
1803
1878
  token=runtime["token"],
1804
- timeout_ms=runtime["timeout_ms"],
1879
+ timeout_ms=u2_timeout_ms,
1805
1880
  candidates=u2_candidates,
1806
1881
  submit_max_retries=u2_submit_max_retries,
1807
1882
  submit_backoff_ms=u2_submit_backoff_ms,
@@ -1809,7 +1884,13 @@ def run_xiaohongshu_extract(
1809
1884
  max_polls=max_polls,
1810
1885
  timeout_retry_enabled=u2_timeout_retry_enabled,
1811
1886
  timeout_retry_max_retries=u2_timeout_retry_max_retries,
1887
+ pending_timeout_sec=int(config_get(storage_config or {}, "runtime.u2_pending_timeout_sec", 60) or 60),
1888
+ progress_callback=(
1889
+ lambda event: _report_u2_progress(progress, stage="note.u2", event=event, label="xiaohongshu")
1890
+ ) if progress is not None else None,
1812
1891
  )
1892
+ timings["u2_submit_ms"] = _to_int_or_none(u2_bundle.get("submit_duration_ms")) or 0
1893
+ timings["u2_poll_ms"] = _to_int_or_none(u2_bundle.get("poll_duration_ms")) or _elapsed_ms(u2_started_at)
1813
1894
  submit_bundle = u2_bundle.get("submit_bundle", {})
1814
1895
  submit_response = submit_bundle.get("submit_response", {})
1815
1896
  task_id = submit_bundle.get("task_id")
@@ -1818,6 +1899,19 @@ def run_xiaohongshu_extract(
1818
1899
  if selected_video_url and not normalize_text(metadata_fields.get("video_down_url")):
1819
1900
  metadata_fields["video_down_url"] = selected_video_url
1820
1901
 
1902
+ if progress is not None:
1903
+ progress.http_event(
1904
+ stage="note.u2",
1905
+ endpoint="/api/u2/v1/services/audio/asr/transcription",
1906
+ response=submit_response,
1907
+ route_label="u2_submit",
1908
+ summary={
1909
+ "task_id": task_id,
1910
+ "retry_count": len(submit_bundle.get("retry_chain", [])),
1911
+ "candidate_count": len(u2_candidates),
1912
+ },
1913
+ )
1914
+
1821
1915
  trace.append(
1822
1916
  {
1823
1917
  "step": "u2_asr_timeout_retry",
@@ -1830,6 +1924,7 @@ def run_xiaohongshu_extract(
1830
1924
  "u2_submit_backoff_ms": max(0, int(u2_submit_backoff_ms)),
1831
1925
  },
1832
1926
  "timeout_retry": u2_bundle.get("timeout_retry", {}),
1927
+ "u3_fallback": u2_bundle.get("u3_fallback", {}),
1833
1928
  "rounds": u2_bundle.get("rounds", []),
1834
1929
  "final_task_id": poll_result.get("task_id") or task_id,
1835
1930
  "final_task_status": poll_result.get("task_status"),
@@ -1879,23 +1974,32 @@ def run_xiaohongshu_extract(
1879
1974
  u2_task_id=poll_result.get("task_id") or task_id,
1880
1975
  u2_task_status=poll_result.get("task_status") or "UNKNOWN",
1881
1976
  note_content_type=note_content_type,
1882
- analysis_mode="video_full",
1977
+ analysis_mode=analysis_mode,
1883
1978
  selected_video_url=selected_video_url,
1884
1979
  selected_video_candidates=u2_candidates,
1885
1980
  selected_image_urls=image_candidates,
1886
1981
  downloaded_assets=[],
1887
1982
  missing_fields=missing_fields,
1888
1983
  metadata_fields=metadata_fields,
1984
+ timings=timings,
1889
1985
  )
1890
1986
  if write_card:
1891
- result["card_write"] = write_benchmark_card(
1987
+ card_started_at = time.perf_counter()
1988
+ result["card_write"] = write_work_fact_card(
1892
1989
  payload=result,
1893
1990
  platform="xiaohongshu",
1894
1991
  card_type=card_type,
1895
1992
  card_root=card_root,
1896
1993
  content_kind="single_video",
1897
1994
  storage_config=storage_config,
1995
+ analysis_mode=analysis_mode,
1996
+ progress=progress.child(scope="card_write") if progress is not None else None,
1898
1997
  )
1998
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1999
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2000
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2001
+ result["timings"] = dict(timings)
2002
+ _update_pipeline_status(result)
1899
2003
  return _finalize_result(
1900
2004
  result=result,
1901
2005
  source_input=source_input,
@@ -1911,38 +2015,57 @@ def run_xiaohongshu_extract(
1911
2015
  explicit_error_reason=poll_result.get("error_reason"),
1912
2016
  explicit_request_id=poll_result.get("request_id") or submit_response.get("request_id") or note_response.get("request_id"),
1913
2017
  )
2018
+ text_source = "u2"
2019
+ confidence = "high" if poll_result.get("ok") and raw_content else "low"
2020
+ error_reason = final_ctx.get("error_reason")
2021
+ if not raw_content and caption_text:
2022
+ missing_fields.append({"field": "asr_transcript", "reason": f"u2_failed:{error_reason or 'u2_poll_timeout'}"})
2023
+ raw_content = caption_text
2024
+ text_source = "caption_fallback"
2025
+ confidence = "medium"
2026
+ error_reason = None
1914
2027
  result = _build_result(
1915
2028
  source_input=source_input,
1916
2029
  raw_content=raw_content,
1917
- confidence="high" if poll_result.get("ok") and raw_content else "low",
1918
- error_reason=final_ctx.get("error_reason"),
2030
+ confidence=confidence,
2031
+ error_reason=error_reason,
1919
2032
  extract_trace=trace,
1920
2033
  fallback_trace=final_ctx.get("fallback_trace", []),
1921
2034
  request_id=final_ctx.get("request_id"),
1922
- text_source="u2",
2035
+ text_source=text_source,
1923
2036
  note_id=str(resolved_note_id) if resolved_note_id else source_input.get("note_id"),
1924
2037
  subtitle_hit=False,
1925
2038
  u2_task_id=poll_result.get("task_id") or task_id,
1926
2039
  u2_task_status=poll_result.get("task_status"),
1927
2040
  note_content_type=note_content_type,
1928
- analysis_mode="video_full",
2041
+ analysis_mode=analysis_mode,
1929
2042
  selected_video_url=selected_video_url,
1930
2043
  selected_video_candidates=u2_candidates,
1931
2044
  selected_image_urls=image_candidates,
1932
2045
  downloaded_assets=[],
1933
2046
  missing_fields=missing_fields,
1934
2047
  metadata_fields=metadata_fields,
2048
+ timings=timings,
1935
2049
  )
1936
2050
 
1937
2051
  if write_card:
1938
- result["card_write"] = write_benchmark_card(
2052
+ card_started_at = time.perf_counter()
2053
+ result["card_write"] = write_work_fact_card(
1939
2054
  payload=result,
1940
2055
  platform="xiaohongshu",
1941
2056
  card_type=card_type,
1942
2057
  card_root=card_root,
1943
2058
  content_kind="single_video",
1944
2059
  storage_config=storage_config,
2060
+ analysis_mode=analysis_mode,
2061
+ progress=progress.child(scope="card_write") if progress is not None else None,
1945
2062
  )
2063
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
2064
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2065
+
2066
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2067
+ result["timings"] = dict(timings)
2068
+ _update_pipeline_status(result)
1946
2069
 
1947
2070
  return _finalize_result(
1948
2071
  result=result,
@@ -1989,24 +2112,34 @@ def run_xiaohongshu_extract(
1989
2112
  u2_task_id=None,
1990
2113
  u2_task_status="SKIPPED",
1991
2114
  note_content_type="image" if note_content_type == "unknown" else note_content_type,
1992
- analysis_mode="image_light_analysis",
2115
+ analysis_mode=analysis_mode,
1993
2116
  selected_video_url=None,
1994
2117
  selected_video_candidates=video_candidates,
1995
2118
  selected_image_urls=image_candidates,
1996
2119
  downloaded_assets=downloaded_assets,
1997
2120
  missing_fields=missing_fields,
1998
2121
  metadata_fields=metadata_fields,
2122
+ timings=timings,
1999
2123
  )
2000
2124
 
2001
2125
  if write_card:
2002
- result["card_write"] = write_benchmark_card(
2126
+ card_started_at = time.perf_counter()
2127
+ result["card_write"] = write_work_fact_card(
2003
2128
  payload=result,
2004
2129
  platform="xiaohongshu",
2005
2130
  card_type=card_type,
2006
2131
  card_root=card_root,
2007
2132
  content_kind="note",
2008
2133
  storage_config=storage_config,
2134
+ analysis_mode=analysis_mode,
2135
+ progress=progress.child(scope="card_write") if progress is not None else None,
2009
2136
  )
2137
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
2138
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2139
+
2140
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2141
+ result["timings"] = dict(timings)
2142
+ _update_pipeline_status(result)
2010
2143
 
2011
2144
  finalized = _finalize_result(
2012
2145
  result=result,
@@ -2025,6 +2158,7 @@ def run_xiaohongshu_extract(
2025
2158
  "card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
2026
2159
  "output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
2027
2160
  "text_source": finalized.get("text_source"),
2161
+ "deep_analysis_status": ((finalized.get("deep_analysis") or {}).get("status")),
2028
2162
  },
2029
2163
  )
2030
2164
  return finalized
@@ -2069,7 +2203,14 @@ def main() -> None:
2069
2203
  help="Conservative max retries for U2 timeout-only retry (0~3)",
2070
2204
  )
2071
2205
  parser.add_argument("--force-u2-fallback", action="store_true", help="Skip subtitle usage and force U2 fallback (test)")
2072
- parser.add_argument("--card-type", choices=["work", "author", "author_sample_work"], default="work", help="Primary card type")
2206
+ parser.add_argument("--card-type", choices=["work"], default="work", help="Primary card type")
2207
+ parser.add_argument("--card-mode", dest="analysis_mode", choices=["standard", "auto", "local"], default="standard", help="Fact-card mode")
2208
+ parser.add_argument("--analysis-mode", dest="analysis_mode", choices=["standard", "auto", "local"], help=argparse.SUPPRESS)
2209
+ parser.set_defaults(write_card=True, persist_output=True)
2210
+ parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write final work card")
2211
+ parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
2212
+ parser.add_argument("--persist-output", dest="persist_output", action="store_true", help="Persist result JSON")
2213
+ parser.add_argument("--no-persist-output", dest="persist_output", action="store_false", help="Skip result JSON persist")
2073
2214
  parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
2074
2215
  args = parser.parse_args()
2075
2216
 
@@ -2109,6 +2250,12 @@ def main() -> None:
2109
2250
  if args.u2_timeout_retry_max_retries is not None
2110
2251
  else config_get(config, "asr_strategy.u2_timeout_retry.max_retries", 3)
2111
2252
  )
2253
+ progress = build_progress_reporter(
2254
+ workflow="social-media-crawl",
2255
+ platform="xiaohongshu",
2256
+ content_kind="note",
2257
+ input_value=args.share_text or args.note_id or args.input,
2258
+ )
2112
2259
 
2113
2260
  try:
2114
2261
  result = run_xiaohongshu_extract(
@@ -2126,12 +2273,14 @@ def main() -> None:
2126
2273
  u2_timeout_retry_enabled=bool(u2_timeout_retry_enabled),
2127
2274
  u2_timeout_retry_max_retries=int(u2_timeout_retry_max_retries),
2128
2275
  force_u2_fallback=args.force_u2_fallback,
2129
- write_card=True,
2276
+ write_card=bool(args.write_card),
2277
+ analysis_mode=args.analysis_mode,
2130
2278
  card_type=args.card_type,
2131
2279
  card_root=args.card_root,
2132
2280
  storage_config=config,
2133
2281
  allow_process_env=args.allow_process_env,
2134
- persist_output=True,
2282
+ persist_output=bool(args.persist_output),
2283
+ progress=progress,
2135
2284
  )
2136
2285
  except ValueError as error:
2137
2286
  result = {