@tikomni/skills 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.skill-package-allowlist.txt +1 -3
- package/README.md +41 -49
- package/README.zh-CN.md +43 -51
- package/bin/tikomni-skills.js +2 -2
- package/env.example +37 -56
- package/package.json +7 -3
- package/skills/social-media-crawl/SKILL.md +53 -0
- package/skills/social-media-crawl/agents/openai.yaml +5 -0
- package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
- package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
- package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
- package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
- package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
- package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
- package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
- package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
- package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
- package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
- package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
- package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
- package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
- package/skills/social-media-crawl/scripts/__init__.py +2 -0
- package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
- package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
- package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
- package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
- package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
- package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
- package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
- package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
- package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
- package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
- package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
- package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
- package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
- package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
- package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
- package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
- package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
- package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
- package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
- package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
- package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
- package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
- package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
- package/skills/creator-analysis/SKILL.md +0 -95
- package/skills/creator-analysis/agents/openai.yaml +0 -4
- package/skills/creator-analysis/env.example +0 -36
- package/skills/creator-analysis/references/api-capability-index.md +0 -92
- package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
- package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
- package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
- package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
- package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
- package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
- package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
- package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
- package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
- package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
- package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
- package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
- package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
- package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
- package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
- package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
- package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
- package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/creator-analysis/references/asr-orchestration.md +0 -33
- package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
- package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
- package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
- package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
- package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
- package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
- package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
- package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
- package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
- package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
- package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
- package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/creator-analysis/references/workflow.md +0 -23
- package/skills/creator-analysis/scripts/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
- package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
- package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
- package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
- package/skills/creator-analysis/scripts/core/__init__.py +0 -0
- package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
- package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
- package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
- package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
- package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
- package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
- package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
- package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
- package/skills/meta-capability/SKILL.md +0 -69
- package/skills/meta-capability/agents/openai.yaml +0 -4
- package/skills/meta-capability/env.example +0 -42
- package/skills/meta-capability/references/api-capability-index.md +0 -92
- package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
- package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
- package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
- package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
- package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
- package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
- package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
- package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/meta-capability/references/api-tags/health-check.md +0 -40
- package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
- package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
- package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
- package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
- package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
- package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
- package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
- package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
- package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
- package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
- package/skills/meta-capability/references/dispatch.md +0 -27
- package/skills/meta-capability/references/execution-guidelines.md +0 -25
- package/skills/meta-capability/references/implemented-route-map.md +0 -177
- package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/meta-capability/scripts/__init__.py +0 -1
- package/skills/meta-capability/scripts/call_route.py +0 -141
- package/skills/meta-capability/scripts/core/__init__.py +0 -1
- package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
- package/skills/meta-capability/scripts/core/config_loader.py +0 -204
- package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
- package/skills/meta-capability/scripts/test_auth.py +0 -98
- package/skills/single-work-analysis/SKILL.md +0 -62
- package/skills/single-work-analysis/agents/openai.yaml +0 -4
- package/skills/single-work-analysis/env.example +0 -36
- package/skills/single-work-analysis/references/api-capability-index.md +0 -92
- package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
- package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
- package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
- package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
- package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
- package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
- package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
- package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
- package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
- package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
- package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
- package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
- package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
- package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
- package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
- package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
- package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
- package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
- package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
- package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
- package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
- package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
- package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
- package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
- package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
- package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
- package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/single-work-analysis/scripts/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
- package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
- package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
- package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
- package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
- package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
- package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
- package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
- package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
- package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
- /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
- /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
- /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
- /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
- /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""Douyin single-
|
|
2
|
+
"""Douyin single-work fixed pipeline runner (APP first, WEB fallback)."""
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
@@ -22,18 +22,17 @@ bootstrap_for_direct_run(__file__, __package__)
|
|
|
22
22
|
import hashlib
|
|
23
23
|
import json
|
|
24
24
|
import re
|
|
25
|
+
import time
|
|
25
26
|
from datetime import datetime
|
|
26
27
|
from pathlib import Path
|
|
27
28
|
from typing import Any, Dict, List, Optional
|
|
28
29
|
|
|
29
|
-
from scripts.core.config_loader import config_get, load_tikomni_config
|
|
30
|
+
from scripts.core.config_loader import config_get, load_tikomni_config
|
|
30
31
|
from scripts.core.extract_pipeline import resolve_trace_error_context
|
|
31
|
-
from scripts.core.progress_report import ProgressReporter
|
|
32
|
-
from scripts.
|
|
33
|
-
from scripts.
|
|
34
|
-
from scripts.
|
|
35
|
-
from scripts.pipeline.asr.poll_u2_task import poll_u2_task
|
|
36
|
-
from scripts.platform.douyin.select_low_quality_video_url import select_low_quality_video_url
|
|
32
|
+
from scripts.core.progress_report import ProgressReporter, build_progress_reporter
|
|
33
|
+
from scripts.pipelines.douyin_video_type_matrix import normalize_douyin_video_type
|
|
34
|
+
from scripts.core.asr_pipeline import derive_asr_clean_text, run_u2_asr_with_timeout_retry
|
|
35
|
+
from scripts.pipelines.select_low_quality_video_url import select_low_quality_video_url
|
|
37
36
|
from scripts.core.tikomni_common import (
|
|
38
37
|
call_json_api,
|
|
39
38
|
deep_find_all,
|
|
@@ -42,11 +41,16 @@ from scripts.core.tikomni_common import (
|
|
|
42
41
|
summarize_content,
|
|
43
42
|
write_json_stdout,
|
|
44
43
|
)
|
|
45
|
-
from scripts.writers.
|
|
44
|
+
from scripts.writers.write_work_fact_card import (
|
|
45
|
+
build_work_output_envelope,
|
|
46
|
+
persist_output_envelope,
|
|
47
|
+
write_work_fact_card,
|
|
48
|
+
)
|
|
46
49
|
|
|
47
50
|
APP_ENDPOINT = "/api/u1/v1/douyin/app/v3/fetch_one_video_by_share_url"
|
|
48
51
|
WEB_ENDPOINT = "/api/u1/v1/douyin/web/fetch_one_video_by_share_url"
|
|
49
52
|
U2_SUBMIT_ENDPOINT = "/api/u2/v1/services/audio/asr/transcription"
|
|
53
|
+
U2_REQUEST_TIMEOUT_CAP_MS = 15000
|
|
50
54
|
|
|
51
55
|
|
|
52
56
|
def _format_published_date(value: Any) -> str:
|
|
@@ -86,107 +90,44 @@ def _traceable_identifier(source_input: Dict[str, Optional[str]], platform_work_
|
|
|
86
90
|
return f"url-{digest}"
|
|
87
91
|
|
|
88
92
|
|
|
89
|
-
def
|
|
90
|
-
*,
|
|
91
|
-
result: Dict[str, Any],
|
|
92
|
-
source_input: Dict[str, Optional[str]],
|
|
93
|
-
platform_work_id: Optional[str],
|
|
94
|
-
status: str,
|
|
95
|
-
written_at: datetime,
|
|
96
|
-
) -> Dict[str, Any]:
|
|
97
|
-
summary = {
|
|
98
|
-
"summary": result.get("summary", ""),
|
|
99
|
-
"insights": result.get("insights", []),
|
|
100
|
-
"confidence": result.get("confidence"),
|
|
101
|
-
"error_reason": result.get("error_reason"),
|
|
102
|
-
}
|
|
103
|
-
normalized = {
|
|
104
|
-
"platform": result.get("platform", "douyin"),
|
|
105
|
-
"content_kind": result.get("content_kind", "single_video"),
|
|
106
|
-
"platform_work_id": result.get("platform_work_id"),
|
|
107
|
-
"title": result.get("title"),
|
|
108
|
-
"duration_ms": result.get("duration_ms"),
|
|
109
|
-
"is_video": result.get("is_video"),
|
|
110
|
-
"u2_task_id": result.get("u2_task_id"),
|
|
111
|
-
"u2_task_status": result.get("u2_task_status"),
|
|
112
|
-
"request_id": result.get("request_id"),
|
|
113
|
-
"source": source_input,
|
|
114
|
-
}
|
|
115
|
-
return {
|
|
116
|
-
"meta": {
|
|
117
|
-
"written_at": written_at.isoformat(timespec="seconds"),
|
|
118
|
-
"status": status,
|
|
119
|
-
"platform": "douyin",
|
|
120
|
-
"identifier": _traceable_identifier(source_input, platform_work_id),
|
|
121
|
-
},
|
|
122
|
-
"summary": summary,
|
|
123
|
-
"normalized": normalized,
|
|
124
|
-
"raw": result,
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def _persist_output_artifact(
|
|
129
|
-
*,
|
|
130
|
-
result: Dict[str, Any],
|
|
131
|
-
source_input: Dict[str, Optional[str]],
|
|
132
|
-
platform_work_id: Optional[str],
|
|
133
|
-
storage_config: Optional[Dict[str, Any]],
|
|
134
|
-
persist_output: bool,
|
|
135
|
-
) -> Dict[str, Any]:
|
|
136
|
-
if not persist_output:
|
|
137
|
-
return {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
|
|
138
|
-
|
|
93
|
+
def _resolve_u2_timeout_ms(timeout_ms: Any) -> int:
|
|
139
94
|
try:
|
|
140
|
-
|
|
141
|
-
except Exception
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
timestamp = now.strftime("%Y%m%dT%H%M%S")
|
|
147
|
-
identifier = _traceable_identifier(source_input, platform_work_id)
|
|
148
|
-
has_error = bool(result.get("error_reason"))
|
|
149
|
-
status = "error" if has_error else "success"
|
|
150
|
-
|
|
151
|
-
if has_error:
|
|
152
|
-
target_dir = Path(paths.get("errors_root", "")) / date_key
|
|
153
|
-
else:
|
|
154
|
-
target_dir = Path(paths.get("results_root", "")) / date_key
|
|
95
|
+
parsed = int(timeout_ms)
|
|
96
|
+
except Exception:
|
|
97
|
+
parsed = U2_REQUEST_TIMEOUT_CAP_MS
|
|
98
|
+
if parsed <= 0:
|
|
99
|
+
return U2_REQUEST_TIMEOUT_CAP_MS
|
|
100
|
+
return max(5000, min(parsed, U2_REQUEST_TIMEOUT_CAP_MS))
|
|
155
101
|
|
|
156
|
-
target_dir.mkdir(parents=True, exist_ok=True)
|
|
157
|
-
file_name = render_output_filename(
|
|
158
|
-
pattern=resolve_json_filename_pattern(storage_config),
|
|
159
|
-
context={
|
|
160
|
-
"prefix": status,
|
|
161
|
-
"platform": "douyin",
|
|
162
|
-
"card_type": "single_work_result",
|
|
163
|
-
"author_slug": identifier,
|
|
164
|
-
"title_slug": identifier,
|
|
165
|
-
"identifier": identifier,
|
|
166
|
-
"timestamp": timestamp,
|
|
167
|
-
"date": date_key,
|
|
168
|
-
"ext": ".json",
|
|
169
|
-
},
|
|
170
|
-
default_filename=f"{timestamp}-douyin-{identifier}.json",
|
|
171
|
-
default_ext=".json",
|
|
172
|
-
)
|
|
173
|
-
file_path = target_dir / file_name
|
|
174
102
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
platform_work_id=platform_work_id,
|
|
179
|
-
status=status,
|
|
180
|
-
written_at=now,
|
|
181
|
-
)
|
|
182
|
-
file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
103
|
+
def _report_u2_progress(progress: Optional[ProgressReporter], *, stage: str, event: Dict[str, Any], label: str) -> None:
|
|
104
|
+
if progress is None:
|
|
105
|
+
return
|
|
183
106
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
"
|
|
188
|
-
"
|
|
107
|
+
phase = normalize_text(event.get("phase")).lower()
|
|
108
|
+
state = normalize_text(event.get("state")).lower()
|
|
109
|
+
payload = {
|
|
110
|
+
"phase": phase or "poll",
|
|
111
|
+
"state": state or "",
|
|
112
|
+
"task_id": event.get("task_id"),
|
|
113
|
+
"attempt": event.get("attempt"),
|
|
114
|
+
"task_status": event.get("task_status"),
|
|
115
|
+
"platform_task_status": event.get("platform_task_status"),
|
|
116
|
+
"pending_count": event.get("pending_count"),
|
|
117
|
+
"status_code": event.get("status_code"),
|
|
118
|
+
"batch_progress": event.get("batch_progress"),
|
|
119
|
+
"wait_ms": event.get("wait_ms"),
|
|
120
|
+
"candidate_count": event.get("candidate_count"),
|
|
121
|
+
"ok": event.get("ok"),
|
|
122
|
+
"error_reason": event.get("error_reason"),
|
|
123
|
+
"retriable": event.get("retriable"),
|
|
124
|
+
"request_id": event.get("request_id"),
|
|
189
125
|
}
|
|
126
|
+
message = f"{label} u2 {phase or 'poll'} {state or 'progress'}"
|
|
127
|
+
if phase == "submit" and state == "heartbeat":
|
|
128
|
+
progress.heartbeat(stage=stage, message=message, data=payload)
|
|
129
|
+
return
|
|
130
|
+
progress.progress(stage=stage, message=message, data=payload)
|
|
190
131
|
|
|
191
132
|
|
|
192
133
|
def _finalize_result(
|
|
@@ -197,14 +138,19 @@ def _finalize_result(
|
|
|
197
138
|
storage_config: Optional[Dict[str, Any]],
|
|
198
139
|
persist_output: bool,
|
|
199
140
|
) -> Dict[str, Any]:
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
141
|
+
envelope = build_work_output_envelope(result, platform="douyin")
|
|
142
|
+
if "card_write" in result:
|
|
143
|
+
envelope["card_write"] = result.get("card_write")
|
|
144
|
+
if not persist_output:
|
|
145
|
+
envelope["output_persist"] = {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
|
|
146
|
+
return envelope
|
|
147
|
+
envelope["output_persist"] = persist_output_envelope(
|
|
148
|
+
envelope=envelope,
|
|
204
149
|
storage_config=storage_config,
|
|
205
|
-
|
|
150
|
+
platform="douyin",
|
|
151
|
+
fallback_identifier=platform_work_id or _traceable_identifier(source_input, platform_work_id),
|
|
206
152
|
)
|
|
207
|
-
return
|
|
153
|
+
return envelope
|
|
208
154
|
|
|
209
155
|
def _normalize_input(
|
|
210
156
|
input_value: Optional[str],
|
|
@@ -330,12 +276,12 @@ def _extract_author(item: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
|
330
276
|
}
|
|
331
277
|
|
|
332
278
|
|
|
333
|
-
def _extract_metrics(item: Dict[str, Any]) -> Dict[str, int]:
|
|
279
|
+
def _extract_metrics(item: Dict[str, Any]) -> Dict[str, Optional[int]]:
|
|
334
280
|
statistics = item.get("statistics")
|
|
335
281
|
if not isinstance(statistics, dict):
|
|
336
282
|
statistics = {}
|
|
337
283
|
|
|
338
|
-
def metric(*keys: str) -> int:
|
|
284
|
+
def metric(*keys: str, default: Optional[int] = 0) -> Optional[int]:
|
|
339
285
|
for key in keys:
|
|
340
286
|
value = _safe_int(statistics.get(key))
|
|
341
287
|
if value is not None:
|
|
@@ -343,15 +289,25 @@ def _extract_metrics(item: Dict[str, Any]) -> Dict[str, int]:
|
|
|
343
289
|
value = _safe_int(item.get(key))
|
|
344
290
|
if value is not None:
|
|
345
291
|
return value
|
|
346
|
-
return
|
|
292
|
+
return default
|
|
347
293
|
|
|
348
|
-
|
|
294
|
+
metrics = {
|
|
349
295
|
"digg_count": metric("digg_count"),
|
|
350
296
|
"comment_count": metric("comment_count"),
|
|
351
297
|
"collect_count": metric("collect_count"),
|
|
352
298
|
"share_count": metric("share_count", "forward_count"),
|
|
353
|
-
"play_count": metric("play_count"),
|
|
299
|
+
"play_count": metric("play_count", default=None),
|
|
354
300
|
}
|
|
301
|
+
play_count = metrics.get("play_count")
|
|
302
|
+
engagement_floor = max(
|
|
303
|
+
int(metrics.get("digg_count") or 0),
|
|
304
|
+
int(metrics.get("comment_count") or 0),
|
|
305
|
+
int(metrics.get("collect_count") or 0),
|
|
306
|
+
int(metrics.get("share_count") or 0),
|
|
307
|
+
)
|
|
308
|
+
if play_count is not None and int(play_count) <= 0 and engagement_floor > 0:
|
|
309
|
+
metrics["play_count"] = None
|
|
310
|
+
return metrics
|
|
355
311
|
|
|
356
312
|
|
|
357
313
|
def _extract_platform_work_id(item: Dict[str, Any]) -> Optional[str]:
|
|
@@ -550,6 +506,71 @@ def _trace_step(
|
|
|
550
506
|
return payload
|
|
551
507
|
|
|
552
508
|
|
|
509
|
+
def _empty_metrics() -> Dict[str, Optional[int]]:
|
|
510
|
+
return {
|
|
511
|
+
"digg_count": 0,
|
|
512
|
+
"comment_count": 0,
|
|
513
|
+
"collect_count": 0,
|
|
514
|
+
"share_count": 0,
|
|
515
|
+
"play_count": None,
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
|
|
519
|
+
def _empty_timings() -> Dict[str, int]:
|
|
520
|
+
return {
|
|
521
|
+
"url_parse_ms": 0,
|
|
522
|
+
"u1_total_ms": 0,
|
|
523
|
+
"u2_submit_ms": 0,
|
|
524
|
+
"u2_poll_ms": 0,
|
|
525
|
+
"card_write_ms": 0,
|
|
526
|
+
"llm_analysis_ms": 0,
|
|
527
|
+
"total_ms": 0,
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def _elapsed_ms(started_at: float) -> int:
|
|
532
|
+
return int((time.perf_counter() - started_at) * 1000)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def _u1_response_summary(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
536
|
+
payload = response.get("data")
|
|
537
|
+
item = _extract_aweme_detail(payload)
|
|
538
|
+
return {
|
|
539
|
+
"platform_work_id": _extract_platform_work_id(item or {}) if isinstance(item, dict) else None,
|
|
540
|
+
"title_hit": bool(_pick_title(item or {})) if isinstance(item, dict) else False,
|
|
541
|
+
"desc_hit": bool(_pick_desc(item or {})) if isinstance(item, dict) else False,
|
|
542
|
+
"video_url_present": bool(normalize_text((item or {}).get("video_down_url"))) if isinstance(item, dict) else False,
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def _emit_http_progress(
|
|
547
|
+
progress: Optional[ProgressReporter],
|
|
548
|
+
*,
|
|
549
|
+
stage: str,
|
|
550
|
+
response: Dict[str, Any],
|
|
551
|
+
route_label: str,
|
|
552
|
+
) -> None:
|
|
553
|
+
if progress is None:
|
|
554
|
+
return
|
|
555
|
+
progress.http_event(
|
|
556
|
+
stage=stage,
|
|
557
|
+
endpoint=str(response.get("_endpoint") or route_label),
|
|
558
|
+
response=response,
|
|
559
|
+
route_label=route_label,
|
|
560
|
+
summary=_u1_response_summary(response),
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
def _update_pipeline_status(result: Dict[str, Any]) -> None:
|
|
565
|
+
card_write = result.get("card_write") if isinstance(result.get("card_write"), dict) else {}
|
|
566
|
+
deep_analysis = result.get("deep_analysis") if isinstance(result.get("deep_analysis"), dict) else {}
|
|
567
|
+
result["pipeline_status"] = {
|
|
568
|
+
"facts_ready": True,
|
|
569
|
+
"card_ready": bool(card_write.get("ok")),
|
|
570
|
+
"deep_analysis": deep_analysis.get("status") or "skipped",
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
|
|
553
574
|
def _build_missing_fields(
|
|
554
575
|
*,
|
|
555
576
|
title: str,
|
|
@@ -594,7 +615,7 @@ def _build_result(
|
|
|
594
615
|
duration_ms: Optional[int],
|
|
595
616
|
video_down_url: Optional[str],
|
|
596
617
|
author: Dict[str, Optional[str]],
|
|
597
|
-
metrics: Dict[str, int],
|
|
618
|
+
metrics: Dict[str, Optional[int]],
|
|
598
619
|
tags: List[str],
|
|
599
620
|
is_video: bool,
|
|
600
621
|
video_type_reason: str,
|
|
@@ -607,9 +628,11 @@ def _build_result(
|
|
|
607
628
|
u2_task_id: Optional[str],
|
|
608
629
|
u2_task_status: str,
|
|
609
630
|
u2_gate_reason: str,
|
|
631
|
+
analysis_mode: str,
|
|
610
632
|
create_time_sec: Optional[int] = None,
|
|
611
633
|
cover_image: Optional[str] = None,
|
|
612
634
|
asr_source: str = "fallback_none",
|
|
635
|
+
timings: Optional[Dict[str, int]] = None,
|
|
613
636
|
) -> Dict[str, Any]:
|
|
614
637
|
summary_block = summarize_content(raw_content, source="douyin:single-video-low-quality")
|
|
615
638
|
insights = list(summary_block.get("insights", []))
|
|
@@ -627,13 +650,15 @@ def _build_result(
|
|
|
627
650
|
for step in extract_trace
|
|
628
651
|
if isinstance(step, dict) and isinstance(step.get("endpoint"), str)
|
|
629
652
|
]
|
|
630
|
-
|
|
653
|
+
asr_clean = derive_asr_clean_text(raw_content)
|
|
654
|
+
primary_text = asr_clean or raw_content
|
|
631
655
|
analysis_eligibility = "eligible" if raw_content else "incomplete"
|
|
632
656
|
analysis_exclusion_reason = "" if raw_content else "video_asr_unavailable"
|
|
633
657
|
|
|
634
658
|
payload: Dict[str, Any] = {
|
|
635
659
|
"platform": "douyin",
|
|
636
660
|
"content_kind": "single_video",
|
|
661
|
+
"analysis_mode": analysis_mode,
|
|
637
662
|
"source": source_input,
|
|
638
663
|
"platform_work_id": platform_work_id,
|
|
639
664
|
"title": title,
|
|
@@ -655,13 +680,15 @@ def _build_result(
|
|
|
655
680
|
"comment_count": metrics.get("comment_count", 0),
|
|
656
681
|
"collect_count": metrics.get("collect_count", 0),
|
|
657
682
|
"share_count": metrics.get("share_count", 0),
|
|
658
|
-
"play_count": metrics.get("play_count"
|
|
683
|
+
"play_count": metrics.get("play_count"),
|
|
659
684
|
"tags": tags or [],
|
|
660
685
|
"is_video": is_video,
|
|
661
686
|
"video_type_reason": video_type_reason,
|
|
662
687
|
"u2_task_id": u2_task_id,
|
|
663
688
|
"u2_task_status": u2_task_status,
|
|
664
689
|
"raw_content": raw_content,
|
|
690
|
+
"asr_raw": raw_content,
|
|
691
|
+
"asr_clean": asr_clean,
|
|
665
692
|
"primary_text": primary_text,
|
|
666
693
|
"primary_text_source": "asr_clean",
|
|
667
694
|
"analysis_eligibility": analysis_eligibility,
|
|
@@ -682,6 +709,7 @@ def _build_result(
|
|
|
682
709
|
"fallback_trace": fallback_trace,
|
|
683
710
|
"request_id": request_id,
|
|
684
711
|
"endpoint_list": endpoint_list,
|
|
712
|
+
"timings": dict(timings or {}),
|
|
685
713
|
}
|
|
686
714
|
return payload
|
|
687
715
|
|
|
@@ -701,6 +729,7 @@ def run_douyin_single_video(
|
|
|
701
729
|
u2_submit_max_retries: int,
|
|
702
730
|
u2_submit_backoff_ms: int,
|
|
703
731
|
write_card: bool,
|
|
732
|
+
analysis_mode: str,
|
|
704
733
|
card_type: str,
|
|
705
734
|
card_root: Optional[str],
|
|
706
735
|
content_kind: str = "single_video",
|
|
@@ -709,14 +738,17 @@ def run_douyin_single_video(
|
|
|
709
738
|
persist_output: bool = True,
|
|
710
739
|
progress: Optional[ProgressReporter] = None,
|
|
711
740
|
) -> Dict[str, Any]:
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
)
|
|
716
|
-
|
|
741
|
+
workflow_started_at = time.perf_counter()
|
|
742
|
+
timings = _empty_timings()
|
|
743
|
+
parse_started_at = time.perf_counter()
|
|
717
744
|
source_input = _normalize_input(input_value, share_url)
|
|
745
|
+
timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
|
|
718
746
|
if progress is not None:
|
|
719
|
-
progress.started(
|
|
747
|
+
progress.started(
|
|
748
|
+
stage="single_video.workflow",
|
|
749
|
+
message="douyin single_video workflow started",
|
|
750
|
+
data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
|
|
751
|
+
)
|
|
720
752
|
if not source_input.get("share_url"):
|
|
721
753
|
result = _build_result(
|
|
722
754
|
source_input=source_input,
|
|
@@ -726,13 +758,7 @@ def run_douyin_single_video(
|
|
|
726
758
|
duration_ms=None,
|
|
727
759
|
video_down_url=None,
|
|
728
760
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
729
|
-
metrics=
|
|
730
|
-
"digg_count": 0,
|
|
731
|
-
"comment_count": 0,
|
|
732
|
-
"collect_count": 0,
|
|
733
|
-
"share_count": 0,
|
|
734
|
-
"play_count": 0,
|
|
735
|
-
},
|
|
761
|
+
metrics=_empty_metrics(),
|
|
736
762
|
tags=[],
|
|
737
763
|
is_video=False,
|
|
738
764
|
video_type_reason="missing_share_url",
|
|
@@ -745,16 +771,26 @@ def run_douyin_single_video(
|
|
|
745
771
|
u2_task_id=None,
|
|
746
772
|
u2_task_status="UNKNOWN",
|
|
747
773
|
u2_gate_reason="not_started",
|
|
774
|
+
analysis_mode=analysis_mode,
|
|
775
|
+
timings=timings,
|
|
748
776
|
)
|
|
749
777
|
if write_card:
|
|
750
|
-
|
|
778
|
+
card_started_at = time.perf_counter()
|
|
779
|
+
result["card_write"] = write_work_fact_card(
|
|
751
780
|
payload=result,
|
|
752
781
|
platform="douyin",
|
|
753
782
|
card_type=card_type,
|
|
754
783
|
card_root=card_root,
|
|
755
784
|
content_kind=content_kind,
|
|
756
785
|
storage_config=storage_config,
|
|
786
|
+
analysis_mode=analysis_mode,
|
|
787
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
757
788
|
)
|
|
789
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
790
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
791
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
792
|
+
result["timings"] = dict(timings)
|
|
793
|
+
_update_pipeline_status(result)
|
|
758
794
|
return _finalize_result(
|
|
759
795
|
result=result,
|
|
760
796
|
source_input=source_input,
|
|
@@ -776,6 +812,7 @@ def run_douyin_single_video(
|
|
|
776
812
|
|
|
777
813
|
trace: List[Dict[str, Any]] = []
|
|
778
814
|
|
|
815
|
+
u1_started_at = time.perf_counter()
|
|
779
816
|
if progress is not None:
|
|
780
817
|
progress.progress(stage="single_video.fetch", message="fetching douyin single_video payload")
|
|
781
818
|
one_video_response = _u1_fetch_one_video(
|
|
@@ -785,9 +822,11 @@ def run_douyin_single_video(
|
|
|
785
822
|
app_timeout_ms=app_timeout,
|
|
786
823
|
web_timeout_ms=web_timeout,
|
|
787
824
|
)
|
|
825
|
+
timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
|
|
788
826
|
|
|
789
827
|
app_failed = one_video_response.get("_app_failed")
|
|
790
828
|
if app_failed:
|
|
829
|
+
_emit_http_progress(progress, stage="single_video.fetch", response=app_failed, route_label="app_primary")
|
|
791
830
|
trace.append(
|
|
792
831
|
_trace_step(
|
|
793
832
|
step="u1_fetch_one_video_primary",
|
|
@@ -797,6 +836,12 @@ def run_douyin_single_video(
|
|
|
797
836
|
)
|
|
798
837
|
)
|
|
799
838
|
|
|
839
|
+
_emit_http_progress(
|
|
840
|
+
progress,
|
|
841
|
+
stage="single_video.fetch",
|
|
842
|
+
response=one_video_response,
|
|
843
|
+
route_label="effective_route",
|
|
844
|
+
)
|
|
800
845
|
trace.append(
|
|
801
846
|
_trace_step(
|
|
802
847
|
step="u1_fetch_one_video_effective",
|
|
@@ -823,13 +868,7 @@ def run_douyin_single_video(
|
|
|
823
868
|
duration_ms=None,
|
|
824
869
|
video_down_url=None,
|
|
825
870
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
826
|
-
metrics=
|
|
827
|
-
"digg_count": 0,
|
|
828
|
-
"comment_count": 0,
|
|
829
|
-
"collect_count": 0,
|
|
830
|
-
"share_count": 0,
|
|
831
|
-
"play_count": 0,
|
|
832
|
-
},
|
|
871
|
+
metrics=_empty_metrics(),
|
|
833
872
|
tags=[],
|
|
834
873
|
is_video=False,
|
|
835
874
|
video_type_reason="u1_failed",
|
|
@@ -842,16 +881,26 @@ def run_douyin_single_video(
|
|
|
842
881
|
u2_task_id=None,
|
|
843
882
|
u2_task_status="UNKNOWN",
|
|
844
883
|
u2_gate_reason="u1_failed",
|
|
884
|
+
analysis_mode=analysis_mode,
|
|
885
|
+
timings=timings,
|
|
845
886
|
)
|
|
846
887
|
if write_card:
|
|
847
|
-
|
|
888
|
+
card_started_at = time.perf_counter()
|
|
889
|
+
result["card_write"] = write_work_fact_card(
|
|
848
890
|
payload=result,
|
|
849
891
|
platform="douyin",
|
|
850
892
|
card_type=card_type,
|
|
851
893
|
card_root=card_root,
|
|
852
894
|
content_kind=content_kind,
|
|
853
895
|
storage_config=storage_config,
|
|
896
|
+
analysis_mode=analysis_mode,
|
|
897
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
854
898
|
)
|
|
899
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
900
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
901
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
902
|
+
result["timings"] = dict(timings)
|
|
903
|
+
_update_pipeline_status(result)
|
|
855
904
|
return _finalize_result(
|
|
856
905
|
result=result,
|
|
857
906
|
source_input=source_input,
|
|
@@ -875,13 +924,7 @@ def run_douyin_single_video(
|
|
|
875
924
|
duration_ms=None,
|
|
876
925
|
video_down_url=None,
|
|
877
926
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
878
|
-
metrics=
|
|
879
|
-
"digg_count": 0,
|
|
880
|
-
"comment_count": 0,
|
|
881
|
-
"collect_count": 0,
|
|
882
|
-
"share_count": 0,
|
|
883
|
-
"play_count": 0,
|
|
884
|
-
},
|
|
927
|
+
metrics=_empty_metrics(),
|
|
885
928
|
tags=[],
|
|
886
929
|
is_video=False,
|
|
887
930
|
video_type_reason="aweme_detail_missing",
|
|
@@ -894,16 +937,26 @@ def run_douyin_single_video(
|
|
|
894
937
|
u2_task_id=None,
|
|
895
938
|
u2_task_status="UNKNOWN",
|
|
896
939
|
u2_gate_reason="aweme_detail_missing",
|
|
940
|
+
analysis_mode=analysis_mode,
|
|
941
|
+
timings=timings,
|
|
897
942
|
)
|
|
898
943
|
if write_card:
|
|
899
|
-
|
|
944
|
+
card_started_at = time.perf_counter()
|
|
945
|
+
result["card_write"] = write_work_fact_card(
|
|
900
946
|
payload=result,
|
|
901
947
|
platform="douyin",
|
|
902
948
|
card_type=card_type,
|
|
903
949
|
card_root=card_root,
|
|
904
950
|
content_kind=content_kind,
|
|
905
951
|
storage_config=storage_config,
|
|
952
|
+
analysis_mode=analysis_mode,
|
|
953
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
906
954
|
)
|
|
955
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
956
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
957
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
958
|
+
result["timings"] = dict(timings)
|
|
959
|
+
_update_pipeline_status(result)
|
|
907
960
|
return _finalize_result(
|
|
908
961
|
result=result,
|
|
909
962
|
source_input=source_input,
|
|
@@ -986,22 +1039,43 @@ def run_douyin_single_video(
|
|
|
986
1039
|
poll_result: Dict[str, Any] = {}
|
|
987
1040
|
|
|
988
1041
|
if can_u2 and video_down_url:
|
|
1042
|
+
u2_timeout_ms = _resolve_u2_timeout_ms(runtime["timeout_ms"])
|
|
989
1043
|
if progress is not None:
|
|
990
1044
|
progress.progress(
|
|
991
1045
|
stage="single_video.u2",
|
|
992
1046
|
message="starting douyin u2 submit",
|
|
993
|
-
data={"video_down_url_present": True},
|
|
1047
|
+
data={"video_down_url_present": True, "timeout_ms": u2_timeout_ms},
|
|
994
1048
|
)
|
|
995
|
-
|
|
1049
|
+
submit_started_at = time.perf_counter()
|
|
1050
|
+
submit_bundle = run_u2_asr_with_timeout_retry(
|
|
996
1051
|
base_url=runtime["base_url"],
|
|
997
1052
|
token=runtime["token"],
|
|
998
|
-
timeout_ms=
|
|
1053
|
+
timeout_ms=u2_timeout_ms,
|
|
999
1054
|
video_url=video_down_url,
|
|
1000
|
-
|
|
1001
|
-
|
|
1055
|
+
submit_max_retries=u2_submit_max_retries,
|
|
1056
|
+
submit_backoff_ms=u2_submit_backoff_ms,
|
|
1057
|
+
poll_interval_sec=poll_interval_sec,
|
|
1058
|
+
max_polls=max_polls,
|
|
1059
|
+
pending_timeout_sec=int(config_get(storage_config or {}, "runtime.u2_pending_timeout_sec", 60) or 60),
|
|
1060
|
+
progress_callback=(
|
|
1061
|
+
lambda event: _report_u2_progress(progress, stage="single_video.u2", event=event, label="douyin")
|
|
1062
|
+
) if progress is not None else None,
|
|
1002
1063
|
)
|
|
1003
|
-
|
|
1004
|
-
|
|
1064
|
+
timings["u2_submit_ms"] = _safe_int(submit_bundle.get("submit_duration_ms")) or _elapsed_ms(submit_started_at)
|
|
1065
|
+
submit_response = (submit_bundle.get("submit_bundle") or {}).get("submit_response", {})
|
|
1066
|
+
u2_task_id = (submit_bundle.get("submit_bundle") or {}).get("task_id")
|
|
1067
|
+
if progress is not None:
|
|
1068
|
+
progress.http_event(
|
|
1069
|
+
stage="single_video.u2",
|
|
1070
|
+
endpoint=U2_SUBMIT_ENDPOINT,
|
|
1071
|
+
response=submit_response,
|
|
1072
|
+
route_label="u2_submit",
|
|
1073
|
+
summary={
|
|
1074
|
+
"task_id": u2_task_id,
|
|
1075
|
+
"final_submit_status": (submit_bundle.get("submit_bundle") or {}).get("final_submit_status"),
|
|
1076
|
+
"retry_count": len(((submit_bundle.get("submit_bundle") or {}).get("retry_chain") or [])),
|
|
1077
|
+
},
|
|
1078
|
+
)
|
|
1005
1079
|
|
|
1006
1080
|
trace.append(
|
|
1007
1081
|
_trace_step(
|
|
@@ -1011,7 +1085,7 @@ def run_douyin_single_video(
|
|
|
1011
1085
|
extra={
|
|
1012
1086
|
"task_id": u2_task_id,
|
|
1013
1087
|
"video_down_url": video_down_url,
|
|
1014
|
-
"final_submit_status": submit_bundle.get("final_submit_status"),
|
|
1088
|
+
"final_submit_status": (submit_bundle.get("submit_bundle") or {}).get("final_submit_status"),
|
|
1015
1089
|
},
|
|
1016
1090
|
)
|
|
1017
1091
|
)
|
|
@@ -1024,7 +1098,7 @@ def run_douyin_single_video(
|
|
|
1024
1098
|
"u2_submit_max_retries": max(0, int(u2_submit_max_retries)),
|
|
1025
1099
|
"u2_submit_backoff_ms": max(0, int(u2_submit_backoff_ms)),
|
|
1026
1100
|
},
|
|
1027
|
-
"attempts": submit_bundle.get("retry_chain"
|
|
1101
|
+
"attempts": ((submit_bundle.get("submit_bundle") or {}).get("retry_chain") or []),
|
|
1028
1102
|
}
|
|
1029
1103
|
)
|
|
1030
1104
|
|
|
@@ -1038,16 +1112,8 @@ def run_douyin_single_video(
|
|
|
1038
1112
|
error_reason = submit_response.get("error_reason") or "u2_submit_failed_or_missing_task_id"
|
|
1039
1113
|
u2_task_status = "UNKNOWN"
|
|
1040
1114
|
else:
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
poll_result = poll_u2_task(
|
|
1044
|
-
base_url=runtime["base_url"],
|
|
1045
|
-
token=runtime["token"],
|
|
1046
|
-
timeout_ms=runtime["timeout_ms"],
|
|
1047
|
-
task_id=u2_task_id,
|
|
1048
|
-
poll_interval_sec=poll_interval_sec,
|
|
1049
|
-
max_polls=max_polls,
|
|
1050
|
-
)
|
|
1115
|
+
poll_result = submit_bundle.get("poll_result", {})
|
|
1116
|
+
timings["u2_poll_ms"] = _safe_int(submit_bundle.get("poll_duration_ms"))
|
|
1051
1117
|
u2_task_status = poll_result.get("task_status") or "UNKNOWN"
|
|
1052
1118
|
raw_content = poll_result.get("transcript_text", "") if poll_result.get("ok") else ""
|
|
1053
1119
|
error_reason = poll_result.get("error_reason")
|
|
@@ -1062,8 +1128,22 @@ def run_douyin_single_video(
|
|
|
1062
1128
|
"request_id": poll_result.get("request_id"),
|
|
1063
1129
|
"error_reason": poll_result.get("error_reason"),
|
|
1064
1130
|
"attempts": len(poll_result.get("trace", [])),
|
|
1131
|
+
"timeout_retry": submit_bundle.get("timeout_retry", {}),
|
|
1132
|
+
"u3_fallback": submit_bundle.get("u3_fallback", {}),
|
|
1065
1133
|
}
|
|
1066
1134
|
)
|
|
1135
|
+
if submit_bundle.get("u3_fallback", {}).get("triggered"):
|
|
1136
|
+
trace.append(
|
|
1137
|
+
{
|
|
1138
|
+
"step": "u3_fallback",
|
|
1139
|
+
"triggered": True,
|
|
1140
|
+
"ok": submit_bundle.get("u3_fallback", {}).get("ok"),
|
|
1141
|
+
"result": submit_bundle.get("u3_fallback", {}).get("result"),
|
|
1142
|
+
"public_url": submit_bundle.get("u3_fallback", {}).get("public_url"),
|
|
1143
|
+
"error_reason": submit_bundle.get("u3_fallback", {}).get("error_reason"),
|
|
1144
|
+
"trace": submit_bundle.get("u3_fallback", {}).get("trace", []),
|
|
1145
|
+
}
|
|
1146
|
+
)
|
|
1067
1147
|
if progress is not None:
|
|
1068
1148
|
(progress.done if poll_result.get("ok") else progress.failed)(
|
|
1069
1149
|
stage="single_video.u2",
|
|
@@ -1111,20 +1191,31 @@ def run_douyin_single_video(
|
|
|
1111
1191
|
u2_task_id=u2_task_id,
|
|
1112
1192
|
u2_task_status=u2_task_status,
|
|
1113
1193
|
u2_gate_reason=gate_reason,
|
|
1194
|
+
analysis_mode=analysis_mode,
|
|
1114
1195
|
asr_source="u2" if raw_content else "fallback_none",
|
|
1196
|
+
timings=timings,
|
|
1115
1197
|
)
|
|
1116
1198
|
|
|
1117
1199
|
if write_card:
|
|
1118
1200
|
if progress is not None:
|
|
1119
1201
|
progress.progress(stage="single_video.card_write", message="writing douyin single_video card")
|
|
1120
|
-
|
|
1202
|
+
card_started_at = time.perf_counter()
|
|
1203
|
+
result["card_write"] = write_work_fact_card(
|
|
1121
1204
|
payload=result,
|
|
1122
1205
|
platform="douyin",
|
|
1123
1206
|
card_type=card_type,
|
|
1124
1207
|
card_root=card_root,
|
|
1125
1208
|
content_kind=content_kind,
|
|
1126
1209
|
storage_config=storage_config,
|
|
1210
|
+
analysis_mode=analysis_mode,
|
|
1211
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1127
1212
|
)
|
|
1213
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1214
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
1215
|
+
|
|
1216
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1217
|
+
result["timings"] = dict(timings)
|
|
1218
|
+
_update_pipeline_status(result)
|
|
1128
1219
|
|
|
1129
1220
|
finalized = _finalize_result(
|
|
1130
1221
|
result=result,
|
|
@@ -1142,13 +1233,14 @@ def run_douyin_single_video(
|
|
|
1142
1233
|
"request_id": finalized.get("request_id"),
|
|
1143
1234
|
"card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
|
|
1144
1235
|
"output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
|
|
1236
|
+
"deep_analysis_status": ((finalized.get("deep_analysis") or {}).get("status")),
|
|
1145
1237
|
},
|
|
1146
1238
|
)
|
|
1147
1239
|
return finalized
|
|
1148
1240
|
|
|
1149
1241
|
|
|
1150
1242
|
def main() -> None:
|
|
1151
|
-
parser = argparse.ArgumentParser(description="Run Douyin single-
|
|
1243
|
+
parser = argparse.ArgumentParser(description="Run Douyin single-work fixed pipeline")
|
|
1152
1244
|
parser.add_argument("input", nargs="?", default=None, help="Douyin share URL")
|
|
1153
1245
|
parser.add_argument("--share-url", default=None, help="Douyin share URL")
|
|
1154
1246
|
parser.add_argument("--config", default=None, help="Runtime config YAML path")
|
|
@@ -1173,8 +1265,15 @@ def main() -> None:
|
|
|
1173
1265
|
default=1500,
|
|
1174
1266
|
help="Base backoff ms for retriable U2 submit failures (exponential)",
|
|
1175
1267
|
)
|
|
1176
|
-
parser.add_argument("--card-type", choices=["work"
|
|
1177
|
-
parser.add_argument("--content-kind", default="single_video", help="Routing kind, e.g. single_video/
|
|
1268
|
+
parser.add_argument("--card-type", choices=["work"], default="work", help="Primary card type")
|
|
1269
|
+
parser.add_argument("--content-kind", default="single_video", help="Routing kind, e.g. single_video/work")
|
|
1270
|
+
parser.add_argument("--card-mode", dest="analysis_mode", choices=["standard", "auto", "local"], default="standard", help="Fact-card mode")
|
|
1271
|
+
parser.add_argument("--analysis-mode", dest="analysis_mode", choices=["standard", "auto", "local"], help=argparse.SUPPRESS)
|
|
1272
|
+
parser.set_defaults(write_card=True, persist_output=True)
|
|
1273
|
+
parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write final work card")
|
|
1274
|
+
parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
|
|
1275
|
+
parser.add_argument("--persist-output", dest="persist_output", action="store_true", help="Persist result JSON")
|
|
1276
|
+
parser.add_argument("--no-persist-output", dest="persist_output", action="store_false", help="Skip result JSON persist")
|
|
1178
1277
|
parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
|
|
1179
1278
|
args = parser.parse_args()
|
|
1180
1279
|
|
|
@@ -1188,6 +1287,13 @@ def main() -> None:
|
|
|
1188
1287
|
base_url = args.base_url or config_get(config, "runtime.base_url", None)
|
|
1189
1288
|
timeout_ms = args.timeout_ms if args.timeout_ms is not None else config_get(config, "runtime.timeout_ms", None)
|
|
1190
1289
|
|
|
1290
|
+
progress = build_progress_reporter(
|
|
1291
|
+
workflow="social-media-crawl",
|
|
1292
|
+
platform="douyin",
|
|
1293
|
+
content_kind=args.content_kind,
|
|
1294
|
+
input_value=args.share_url or args.input,
|
|
1295
|
+
)
|
|
1296
|
+
|
|
1191
1297
|
try:
|
|
1192
1298
|
result = run_douyin_single_video(
|
|
1193
1299
|
input_value=args.input,
|
|
@@ -1202,13 +1308,15 @@ def main() -> None:
|
|
|
1202
1308
|
max_polls=args.max_polls,
|
|
1203
1309
|
u2_submit_max_retries=args.u2_submit_max_retries,
|
|
1204
1310
|
u2_submit_backoff_ms=args.u2_submit_backoff_ms,
|
|
1205
|
-
write_card=
|
|
1311
|
+
write_card=bool(args.write_card),
|
|
1312
|
+
analysis_mode=args.analysis_mode,
|
|
1206
1313
|
card_type=args.card_type,
|
|
1207
1314
|
card_root=args.card_root,
|
|
1208
1315
|
content_kind=args.content_kind,
|
|
1209
1316
|
storage_config=config,
|
|
1210
1317
|
allow_process_env=args.allow_process_env,
|
|
1211
|
-
persist_output=
|
|
1318
|
+
persist_output=bool(args.persist_output),
|
|
1319
|
+
progress=progress,
|
|
1212
1320
|
)
|
|
1213
1321
|
except ValueError as error:
|
|
1214
1322
|
result = {
|