@tikomni/skills 0.1.2 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.skill-package-allowlist.txt +1 -3
- package/README.md +41 -49
- package/README.zh-CN.md +43 -51
- package/bin/tikomni-skills.js +2 -2
- package/env.example +37 -56
- package/package.json +7 -3
- package/skills/social-media-crawl/SKILL.md +53 -0
- package/skills/social-media-crawl/agents/openai.yaml +5 -0
- package/skills/social-media-crawl/references/contracts/output-envelope.md +22 -0
- package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md +48 -0
- package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +30 -0
- package/skills/social-media-crawl/references/mcp-usage-contract.md +30 -0
- package/skills/social-media-crawl/references/pipelines/douyin-creator-home.md +7 -0
- package/skills/social-media-crawl/references/pipelines/douyin-single-work.md +7 -0
- package/skills/social-media-crawl/references/pipelines/xiaohongshu-creator-home.md +7 -0
- package/skills/social-media-crawl/references/pipelines/xiaohongshu-single-work.md +7 -0
- package/skills/social-media-crawl/references/schemas/creator-profile.schema.json +33 -0
- package/skills/social-media-crawl/references/schemas/output-envelope.schema.json +41 -0
- package/skills/social-media-crawl/references/schemas/work-collection.schema.json +29 -0
- package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json +67 -0
- package/skills/social-media-crawl/references/service-guides/u2-u3-mandatory-fallback.md +21 -0
- package/skills/social-media-crawl/scripts/__init__.py +2 -0
- package/skills/social-media-crawl/scripts/core/__init__.py +2 -0
- package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/asr_pipeline.py +252 -9
- package/skills/social-media-crawl/scripts/core/completeness.py +83 -0
- package/skills/{single-work-analysis → social-media-crawl}/scripts/core/config_loader.py +108 -167
- package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +145 -0
- package/skills/social-media-crawl/scripts/core/object_detection.py +63 -0
- package/skills/{creator-analysis/scripts/pipeline/asr → social-media-crawl/scripts/core}/poll_u2_task.py +6 -2
- package/skills/{single-work-analysis → social-media-crawl}/scripts/core/progress_report.py +32 -0
- package/skills/social-media-crawl/scripts/core/storage_router.py +160 -0
- package/skills/{creator-analysis → social-media-crawl}/scripts/core/tikomni_common.py +13 -3
- package/skills/social-media-crawl/scripts/core/u3_fallback.py +328 -0
- package/skills/social-media-crawl/scripts/pipelines/__init__.py +2 -0
- package/skills/social-media-crawl/scripts/pipelines/douyin_creator_home_helpers.py +35 -0
- package/skills/social-media-crawl/scripts/pipelines/douyin_platform_adapter.py +7 -0
- package/skills/{creator-analysis/scripts/author_home/asr → social-media-crawl/scripts/pipelines}/home_asr.py +1 -1
- package/skills/{creator-analysis/scripts/author_home/adapters → social-media-crawl/scripts/pipelines}/platform_adapters.py +8 -2
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +237 -0
- package/skills/{single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py → social-media-crawl/scripts/pipelines/run_douyin_single_work.py} +282 -174
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +237 -0
- package/skills/{single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py → social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py} +290 -141
- package/skills/{creator-analysis/scripts/author_home → social-media-crawl/scripts/pipelines}/schema.py +1 -1
- package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_creator_home_helpers.py +35 -0
- package/skills/social-media-crawl/scripts/pipelines/xiaohongshu_platform_adapter.py +7 -0
- package/skills/social-media-crawl/scripts/writers/__init__.py +1 -0
- package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +391 -0
- package/skills/creator-analysis/SKILL.md +0 -95
- package/skills/creator-analysis/agents/openai.yaml +0 -4
- package/skills/creator-analysis/env.example +0 -36
- package/skills/creator-analysis/references/api-capability-index.md +0 -92
- package/skills/creator-analysis/references/api-contracts/asr-api.md +0 -130
- package/skills/creator-analysis/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/creator-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/creator-analysis/references/api-contracts/demo-api.md +0 -717
- package/skills/creator-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/creator-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/creator-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/creator-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/creator-analysis/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/creator-analysis/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/creator-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/creator-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/creator-analysis/references/api-contracts/health-check.md +0 -69
- package/skills/creator-analysis/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/creator-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/creator-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/creator-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/creator-analysis/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/creator-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/creator-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/creator-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/creator-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/creator-analysis/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/creator-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/creator-analysis/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/creator-analysis/references/api-contracts/sora2-api.md +0 -2266
- package/skills/creator-analysis/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/creator-analysis/references/api-contracts/threads-web-api.md +0 -897
- package/skills/creator-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/creator-analysis/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/creator-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/creator-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/creator-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/creator-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/creator-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/creator-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/creator-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/creator-analysis/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/creator-analysis/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/creator-analysis/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/creator-analysis/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/creator-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/creator-analysis/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/creator-analysis/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/creator-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/creator-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/creator-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/creator-analysis/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/creator-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/creator-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/creator-analysis/references/api-tags/asr-api.md +0 -100
- package/skills/creator-analysis/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/creator-analysis/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/creator-analysis/references/api-tags/demo-api.md +0 -365
- package/skills/creator-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/creator-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/creator-analysis/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/creator-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/creator-analysis/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/creator-analysis/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/creator-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/creator-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/creator-analysis/references/api-tags/health-check.md +0 -40
- package/skills/creator-analysis/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/creator-analysis/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/creator-analysis/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/creator-analysis/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/creator-analysis/references/api-tags/ios-shortcut.md +0 -45
- package/skills/creator-analysis/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/creator-analysis/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/creator-analysis/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/creator-analysis/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/creator-analysis/references/api-tags/media-ingest-api.md +0 -112
- package/skills/creator-analysis/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/creator-analysis/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/creator-analysis/references/api-tags/sora2-api.md +0 -737
- package/skills/creator-analysis/references/api-tags/temp-mail-api.md +0 -136
- package/skills/creator-analysis/references/api-tags/threads-web-api.md +0 -472
- package/skills/creator-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/creator-analysis/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/creator-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/creator-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/creator-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/creator-analysis/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/creator-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/creator-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/creator-analysis/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/creator-analysis/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/creator-analysis/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/creator-analysis/references/api-tags/twitter-web-api.md +0 -562
- package/skills/creator-analysis/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/creator-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/creator-analysis/references/api-tags/weibo-app-api.md +0 -851
- package/skills/creator-analysis/references/api-tags/weibo-web-api.md +0 -470
- package/skills/creator-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/creator-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/creator-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/creator-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/creator-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/creator-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/creator-analysis/references/api-tags/youtube-web-api.md +0 -934
- package/skills/creator-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/creator-analysis/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/creator-analysis/references/asr-orchestration.md +0 -33
- package/skills/creator-analysis/references/config-templates/defaults.yaml +0 -60
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +0 -25
- package/skills/creator-analysis/references/contracts/work-card-fields.md +0 -68
- package/skills/creator-analysis/references/platform-guides/douyin.md +0 -54
- package/skills/creator-analysis/references/platform-guides/generic.md +0 -50
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +0 -69
- package/skills/creator-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/creator-analysis/references/prompt-contracts/author-analysis-v2.md +0 -46
- package/skills/creator-analysis/references/prompt-contracts/author-analysis.md +0 -49
- package/skills/creator-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/creator-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/creator-analysis/references/prompt-contracts/insight.md +0 -47
- package/skills/creator-analysis/references/prompt-contracts/sampled-work-batch-explanations.md +0 -30
- package/skills/creator-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/creator-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/creator-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/creator-analysis/references/prompt-contracts/topic.md +0 -29
- package/skills/creator-analysis/references/schemas/author-analysis-input-v1.schema.json +0 -325
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +0 -287
- package/skills/creator-analysis/references/schemas/sampled-work-batch-explanations.schema.json +0 -41
- package/skills/creator-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/creator-analysis/references/workflow.md +0 -23
- package/skills/creator-analysis/scripts/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/adapters/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/analyzers/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +0 -1165
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +0 -447
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +0 -331
- package/skills/creator-analysis/scripts/author_home/asr/__init__.py +0 -5
- package/skills/creator-analysis/scripts/author_home/builders/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +0 -213
- package/skills/creator-analysis/scripts/author_home/collectors/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/orchestrator/__init__.py +0 -0
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +0 -834
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +0 -609
- package/skills/creator-analysis/scripts/core/__init__.py +0 -0
- package/skills/creator-analysis/scripts/core/analysis_pipeline.py +0 -133
- package/skills/creator-analysis/scripts/core/config_loader.py +0 -418
- package/skills/creator-analysis/scripts/core/progress_report.py +0 -111
- package/skills/creator-analysis/scripts/core/storage_router.py +0 -256
- package/skills/creator-analysis/scripts/pipeline/__init__.py +0 -0
- package/skills/creator-analysis/scripts/pipeline/asr/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/douyin/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/douyin/run_douyin_single_video.py +0 -1208
- package/skills/creator-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
- package/skills/creator-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +0 -2128
- package/skills/creator-analysis/scripts/writers/__init__.py +0 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +0 -107
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +0 -1579
- package/skills/meta-capability/SKILL.md +0 -69
- package/skills/meta-capability/agents/openai.yaml +0 -4
- package/skills/meta-capability/env.example +0 -42
- package/skills/meta-capability/references/api-capability-index.md +0 -92
- package/skills/meta-capability/references/api-contracts/asr-api.md +0 -130
- package/skills/meta-capability/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/meta-capability/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/meta-capability/references/api-contracts/demo-api.md +0 -717
- package/skills/meta-capability/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/meta-capability/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/meta-capability/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/meta-capability/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/meta-capability/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/meta-capability/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/meta-capability/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/meta-capability/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/meta-capability/references/api-contracts/health-check.md +0 -69
- package/skills/meta-capability/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/meta-capability/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/meta-capability/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/meta-capability/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/meta-capability/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/meta-capability/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/meta-capability/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/meta-capability/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/meta-capability/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/meta-capability/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/meta-capability/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/meta-capability/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/meta-capability/references/api-contracts/sora2-api.md +0 -2266
- package/skills/meta-capability/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/meta-capability/references/api-contracts/threads-web-api.md +0 -897
- package/skills/meta-capability/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/meta-capability/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/meta-capability/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/meta-capability/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/meta-capability/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/meta-capability/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/meta-capability/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/meta-capability/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/meta-capability/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/meta-capability/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/meta-capability/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/meta-capability/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/meta-capability/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/meta-capability/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/meta-capability/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/meta-capability/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/meta-capability/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/meta-capability/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/meta-capability/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/meta-capability/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/meta-capability/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/meta-capability/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/meta-capability/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/meta-capability/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/meta-capability/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/meta-capability/references/api-tags/asr-api.md +0 -100
- package/skills/meta-capability/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/meta-capability/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/meta-capability/references/api-tags/demo-api.md +0 -365
- package/skills/meta-capability/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/meta-capability/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/meta-capability/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/meta-capability/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/meta-capability/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/meta-capability/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/meta-capability/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/meta-capability/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/meta-capability/references/api-tags/health-check.md +0 -40
- package/skills/meta-capability/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/meta-capability/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/meta-capability/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/meta-capability/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/meta-capability/references/api-tags/ios-shortcut.md +0 -45
- package/skills/meta-capability/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/meta-capability/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/meta-capability/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/meta-capability/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/meta-capability/references/api-tags/media-ingest-api.md +0 -112
- package/skills/meta-capability/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/meta-capability/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/meta-capability/references/api-tags/sora2-api.md +0 -737
- package/skills/meta-capability/references/api-tags/temp-mail-api.md +0 -136
- package/skills/meta-capability/references/api-tags/threads-web-api.md +0 -472
- package/skills/meta-capability/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/meta-capability/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/meta-capability/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/meta-capability/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/meta-capability/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/meta-capability/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/meta-capability/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/meta-capability/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/meta-capability/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/meta-capability/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/meta-capability/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/meta-capability/references/api-tags/twitter-web-api.md +0 -562
- package/skills/meta-capability/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/meta-capability/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/meta-capability/references/api-tags/weibo-app-api.md +0 -851
- package/skills/meta-capability/references/api-tags/weibo-web-api.md +0 -470
- package/skills/meta-capability/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/meta-capability/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/meta-capability/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/meta-capability/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/meta-capability/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/meta-capability/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/meta-capability/references/api-tags/youtube-web-api.md +0 -934
- package/skills/meta-capability/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/meta-capability/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/meta-capability/references/config-templates/defaults.yaml +0 -18
- package/skills/meta-capability/references/dispatch.md +0 -27
- package/skills/meta-capability/references/execution-guidelines.md +0 -25
- package/skills/meta-capability/references/implemented-route-map.md +0 -177
- package/skills/meta-capability/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/meta-capability/scripts/__init__.py +0 -1
- package/skills/meta-capability/scripts/call_route.py +0 -141
- package/skills/meta-capability/scripts/core/__init__.py +0 -1
- package/skills/meta-capability/scripts/core/bootstrap_env.py +0 -32
- package/skills/meta-capability/scripts/core/config_loader.py +0 -204
- package/skills/meta-capability/scripts/core/tikomni_common.py +0 -443
- package/skills/meta-capability/scripts/test_auth.py +0 -98
- package/skills/single-work-analysis/SKILL.md +0 -62
- package/skills/single-work-analysis/agents/openai.yaml +0 -4
- package/skills/single-work-analysis/env.example +0 -36
- package/skills/single-work-analysis/references/api-capability-index.md +0 -92
- package/skills/single-work-analysis/references/api-contracts/asr-api.md +0 -130
- package/skills/single-work-analysis/references/api-contracts/bilibili-app-api.md +0 -776
- package/skills/single-work-analysis/references/api-contracts/bilibili-web-api.md +0 -2017
- package/skills/single-work-analysis/references/api-contracts/demo-api.md +0 -717
- package/skills/single-work-analysis/references/api-contracts/douyin-app-v3-api.md +0 -3594
- package/skills/single-work-analysis/references/api-contracts/douyin-billboard-api.md +0 -2274
- package/skills/single-work-analysis/references/api-contracts/douyin-creator-api.md +0 -1575
- package/skills/single-work-analysis/references/api-contracts/douyin-creator-v2-api.md +0 -3254
- package/skills/single-work-analysis/references/api-contracts/douyin-search-api.md +0 -4118
- package/skills/single-work-analysis/references/api-contracts/douyin-web-api.md +0 -5544
- package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-api.md +0 -1916
- package/skills/single-work-analysis/references/api-contracts/douyin-xingtu-v2-api.md +0 -1540
- package/skills/single-work-analysis/references/api-contracts/health-check.md +0 -69
- package/skills/single-work-analysis/references/api-contracts/hybrid-parsing.md +0 -78
- package/skills/single-work-analysis/references/api-contracts/instagram-v1-api.md +0 -2256
- package/skills/single-work-analysis/references/api-contracts/instagram-v2-api.md +0 -2011
- package/skills/single-work-analysis/references/api-contracts/instagram-v3-api.md +0 -2630
- package/skills/single-work-analysis/references/api-contracts/ios-shortcut.md +0 -44
- package/skills/single-work-analysis/references/api-contracts/kuaishou-app-api.md +0 -1518
- package/skills/single-work-analysis/references/api-contracts/kuaishou-web-api.md +0 -1242
- package/skills/single-work-analysis/references/api-contracts/lemon8-app-api.md +0 -1088
- package/skills/single-work-analysis/references/api-contracts/linkedin-web-api.md +0 -1949
- package/skills/single-work-analysis/references/api-contracts/media-ingest-api.md +0 -126
- package/skills/single-work-analysis/references/api-contracts/pipixia-app-api.md +0 -1142
- package/skills/single-work-analysis/references/api-contracts/reddit-app-api.md +0 -2025
- package/skills/single-work-analysis/references/api-contracts/sora2-api.md +0 -2266
- package/skills/single-work-analysis/references/api-contracts/temp-mail-api.md +0 -208
- package/skills/single-work-analysis/references/api-contracts/threads-web-api.md +0 -897
- package/skills/single-work-analysis/references/api-contracts/tikhub-downloader-api.md +0 -134
- package/skills/single-work-analysis/references/api-contracts/tikhub-user-api.md +0 -494
- package/skills/single-work-analysis/references/api-contracts/tiktok-ads-api.md +0 -5947
- package/skills/single-work-analysis/references/api-contracts/tiktok-analytics-api.md +0 -968
- package/skills/single-work-analysis/references/api-contracts/tiktok-app-v3-api.md +0 -5735
- package/skills/single-work-analysis/references/api-contracts/tiktok-creator-api.md +0 -1951
- package/skills/single-work-analysis/references/api-contracts/tiktok-interaction-api.md +0 -742
- package/skills/single-work-analysis/references/api-contracts/tiktok-shop-web-api.md +0 -1890
- package/skills/single-work-analysis/references/api-contracts/tiktok-web-api.md +0 -4448
- package/skills/single-work-analysis/references/api-contracts/toutiao-app-api.md +0 -342
- package/skills/single-work-analysis/references/api-contracts/toutiao-web-api.md +0 -143
- package/skills/single-work-analysis/references/api-contracts/twitter-web-api.md +0 -989
- package/skills/single-work-analysis/references/api-contracts/wechat-channels-api.md +0 -809
- package/skills/single-work-analysis/references/api-contracts/wechat-media-platform-web-api.md +0 -677
- package/skills/single-work-analysis/references/api-contracts/weibo-app-api.md +0 -1547
- package/skills/single-work-analysis/references/api-contracts/weibo-web-api.md +0 -798
- package/skills/single-work-analysis/references/api-contracts/weibo-web-v2-api.md +0 -2459
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-api.md +0 -1291
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-app-v2-api.md +0 -1683
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-api.md +0 -1324
- package/skills/single-work-analysis/references/api-contracts/xiaohongshu-web-v2-api.md +0 -1209
- package/skills/single-work-analysis/references/api-contracts/xigua-app-v2-api.md +0 -489
- package/skills/single-work-analysis/references/api-contracts/youtube-web-api.md +0 -2636
- package/skills/single-work-analysis/references/api-contracts/youtube-web-v2-api.md +0 -2660
- package/skills/single-work-analysis/references/api-contracts/zhihu-web-api.md +0 -2315
- package/skills/single-work-analysis/references/api-tags/asr-api.md +0 -100
- package/skills/single-work-analysis/references/api-tags/bilibili-app-api.md +0 -482
- package/skills/single-work-analysis/references/api-tags/bilibili-web-api.md +0 -1267
- package/skills/single-work-analysis/references/api-tags/demo-api.md +0 -365
- package/skills/single-work-analysis/references/api-tags/douyin-app-v3-api.md +0 -2012
- package/skills/single-work-analysis/references/api-tags/douyin-billboard-api.md +0 -1428
- package/skills/single-work-analysis/references/api-tags/douyin-creator-api.md +0 -694
- package/skills/single-work-analysis/references/api-tags/douyin-creator-v2-api.md +0 -694
- package/skills/single-work-analysis/references/api-tags/douyin-search-api.md +0 -1059
- package/skills/single-work-analysis/references/api-tags/douyin-web-api.md +0 -3314
- package/skills/single-work-analysis/references/api-tags/douyin-xingtu-api.md +0 -935
- package/skills/single-work-analysis/references/api-tags/douyin-xingtu-v2-api.md +0 -925
- package/skills/single-work-analysis/references/api-tags/health-check.md +0 -40
- package/skills/single-work-analysis/references/api-tags/hybrid-parsing.md +0 -57
- package/skills/single-work-analysis/references/api-tags/instagram-v1-api.md +0 -1224
- package/skills/single-work-analysis/references/api-tags/instagram-v2-api.md +0 -1147
- package/skills/single-work-analysis/references/api-tags/instagram-v3-api.md +0 -1123
- package/skills/single-work-analysis/references/api-tags/ios-shortcut.md +0 -45
- package/skills/single-work-analysis/references/api-tags/kuaishou-app-api.md +0 -846
- package/skills/single-work-analysis/references/api-tags/kuaishou-web-api.md +0 -551
- package/skills/single-work-analysis/references/api-tags/lemon8-app-api.md +0 -687
- package/skills/single-work-analysis/references/api-tags/linkedin-web-api.md +0 -1105
- package/skills/single-work-analysis/references/api-tags/media-ingest-api.md +0 -112
- package/skills/single-work-analysis/references/api-tags/pipixia-app-api.md +0 -721
- package/skills/single-work-analysis/references/api-tags/reddit-app-api.md +0 -1057
- package/skills/single-work-analysis/references/api-tags/sora2-api.md +0 -737
- package/skills/single-work-analysis/references/api-tags/temp-mail-api.md +0 -136
- package/skills/single-work-analysis/references/api-tags/threads-web-api.md +0 -472
- package/skills/single-work-analysis/references/api-tags/tikhub-downloader-api.md +0 -65
- package/skills/single-work-analysis/references/api-tags/tikhub-user-api.md +0 -253
- package/skills/single-work-analysis/references/api-tags/tiktok-ads-api.md +0 -1393
- package/skills/single-work-analysis/references/api-tags/tiktok-analytics-api.md +0 -179
- package/skills/single-work-analysis/references/api-tags/tiktok-app-v3-api.md +0 -3264
- package/skills/single-work-analysis/references/api-tags/tiktok-creator-api.md +0 -709
- package/skills/single-work-analysis/references/api-tags/tiktok-interaction-api.md +0 -366
- package/skills/single-work-analysis/references/api-tags/tiktok-shop-web-api.md +0 -663
- package/skills/single-work-analysis/references/api-tags/tiktok-web-api.md +0 -2516
- package/skills/single-work-analysis/references/api-tags/toutiao-app-api.md +0 -220
- package/skills/single-work-analysis/references/api-tags/toutiao-web-api.md +0 -96
- package/skills/single-work-analysis/references/api-tags/twitter-web-api.md +0 -562
- package/skills/single-work-analysis/references/api-tags/wechat-channels-api.md +0 -405
- package/skills/single-work-analysis/references/api-tags/wechat-media-platform-web-api.md +0 -431
- package/skills/single-work-analysis/references/api-tags/weibo-app-api.md +0 -851
- package/skills/single-work-analysis/references/api-tags/weibo-web-api.md +0 -470
- package/skills/single-work-analysis/references/api-tags/weibo-web-v2-api.md +0 -1405
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-api.md +0 -534
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-app-v2-api.md +0 -934
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-api.md +0 -757
- package/skills/single-work-analysis/references/api-tags/xiaohongshu-web-v2-api.md +0 -762
- package/skills/single-work-analysis/references/api-tags/xigua-app-v2-api.md +0 -308
- package/skills/single-work-analysis/references/api-tags/youtube-web-api.md +0 -934
- package/skills/single-work-analysis/references/api-tags/youtube-web-v2-api.md +0 -717
- package/skills/single-work-analysis/references/api-tags/zhihu-web-api.md +0 -1384
- package/skills/single-work-analysis/references/asr-and-fallback.md +0 -20
- package/skills/single-work-analysis/references/config-templates/defaults.yaml +0 -58
- package/skills/single-work-analysis/references/contracts/work-card-fields.md +0 -41
- package/skills/single-work-analysis/references/platform-guides/douyin.md +0 -47
- package/skills/single-work-analysis/references/platform-guides/generic.md +0 -43
- package/skills/single-work-analysis/references/platform-guides/xiaohongshu.md +0 -54
- package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/insight.md +0 -47
- package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
- package/skills/single-work-analysis/references/schemas/work-card.schema.json +0 -39
- package/skills/single-work-analysis/references/service-guides/asr-u2-u3-fallback.md +0 -75
- package/skills/single-work-analysis/scripts/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/core/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +0 -133
- package/skills/single-work-analysis/scripts/core/bootstrap_env.py +0 -35
- package/skills/single-work-analysis/scripts/core/extract_pipeline.py +0 -173
- package/skills/single-work-analysis/scripts/core/storage_router.py +0 -253
- package/skills/single-work-analysis/scripts/core/tikomni_common.py +0 -588
- package/skills/single-work-analysis/scripts/pipeline/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/pipeline/asr/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +0 -1189
- package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +0 -95
- package/skills/single-work-analysis/scripts/platform/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/platform/douyin/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/platform/douyin/douyin_video_type_matrix.py +0 -224
- package/skills/single-work-analysis/scripts/platform/douyin/select_low_quality_video_url.py +0 -200
- package/skills/single-work-analysis/scripts/platform/xiaohongshu/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/writers/__init__.py +0 -0
- package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +0 -1402
- /package/skills/{creator-analysis → social-media-crawl}/scripts/core/bootstrap_env.py +0 -0
- /package/skills/{creator-analysis → social-media-crawl}/scripts/core/extract_pipeline.py +0 -0
- /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/douyin_video_type_matrix.py +0 -0
- /package/skills/{creator-analysis/scripts/author_home/collectors → social-media-crawl/scripts/pipelines}/homepage_collectors.py +0 -0
- /package/skills/{creator-analysis/scripts/platform/douyin → social-media-crawl/scripts/pipelines}/select_low_quality_video_url.py +0 -0
|
@@ -20,16 +20,16 @@ import argparse
|
|
|
20
20
|
import hashlib
|
|
21
21
|
import json
|
|
22
22
|
import re
|
|
23
|
+
import time
|
|
23
24
|
import urllib.parse
|
|
24
25
|
import urllib.request
|
|
25
26
|
from datetime import datetime
|
|
26
27
|
from pathlib import Path
|
|
27
28
|
from typing import Any, Dict, List, Optional, Tuple
|
|
28
29
|
|
|
29
|
-
from scripts.
|
|
30
|
-
from scripts.core.config_loader import config_get, load_tikomni_config
|
|
31
|
-
from scripts.core.progress_report import ProgressReporter
|
|
32
|
-
from scripts.core.storage_router import render_output_filename, resolve_json_filename_pattern
|
|
30
|
+
from scripts.core.asr_pipeline import derive_asr_clean_text, run_u2_asr_candidates_with_timeout_retry
|
|
31
|
+
from scripts.core.config_loader import config_get, load_tikomni_config
|
|
32
|
+
from scripts.core.progress_report import ProgressReporter, build_progress_reporter
|
|
33
33
|
from scripts.core.extract_pipeline import build_api_trace, resolve_trace_error_context
|
|
34
34
|
from scripts.core.tikomni_common import (
|
|
35
35
|
call_json_api,
|
|
@@ -40,7 +40,11 @@ from scripts.core.tikomni_common import (
|
|
|
40
40
|
summarize_content,
|
|
41
41
|
write_json_stdout,
|
|
42
42
|
)
|
|
43
|
-
from scripts.writers.
|
|
43
|
+
from scripts.writers.write_work_fact_card import (
|
|
44
|
+
build_work_output_envelope,
|
|
45
|
+
persist_output_envelope,
|
|
46
|
+
write_work_fact_card,
|
|
47
|
+
)
|
|
44
48
|
|
|
45
49
|
APP_V2_VIDEO_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_video_note_detail"
|
|
46
50
|
APP_V2_IMAGE_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_image_note_detail"
|
|
@@ -49,6 +53,7 @@ APP_V1_ENDPOINT = "/api/u1/v1/xiaohongshu/app/get_note_info"
|
|
|
49
53
|
WEB_V2_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v2"
|
|
50
54
|
WEB_V2_V3_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v3"
|
|
51
55
|
WEB_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v7"
|
|
56
|
+
U2_REQUEST_TIMEOUT_CAP_MS = 15000
|
|
52
57
|
U2_GATE_MIN_DURATION_MS = 13000
|
|
53
58
|
U2_GATE_MAX_DURATION_MS = 1800000
|
|
54
59
|
U2_GATE_RULE = "is_video && 13000<duration_ms<=1800000 && video_download_url_present"
|
|
@@ -80,6 +85,43 @@ def _to_int_or_none(value: Any) -> Optional[int]:
|
|
|
80
85
|
return None
|
|
81
86
|
|
|
82
87
|
|
|
88
|
+
def _resolve_u2_timeout_ms(timeout_ms: Any) -> int:
|
|
89
|
+
parsed = _to_int_or_none(timeout_ms)
|
|
90
|
+
if parsed is None or parsed <= 0:
|
|
91
|
+
return U2_REQUEST_TIMEOUT_CAP_MS
|
|
92
|
+
return max(5000, min(parsed, U2_REQUEST_TIMEOUT_CAP_MS))
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _report_u2_progress(progress: Optional[ProgressReporter], *, stage: str, event: Dict[str, Any], label: str) -> None:
|
|
96
|
+
if progress is None:
|
|
97
|
+
return
|
|
98
|
+
|
|
99
|
+
phase = normalize_text(event.get("phase")).lower()
|
|
100
|
+
state = normalize_text(event.get("state")).lower()
|
|
101
|
+
payload = {
|
|
102
|
+
"phase": phase or "poll",
|
|
103
|
+
"state": state or "",
|
|
104
|
+
"task_id": event.get("task_id"),
|
|
105
|
+
"attempt": event.get("attempt"),
|
|
106
|
+
"task_status": event.get("task_status"),
|
|
107
|
+
"platform_task_status": event.get("platform_task_status"),
|
|
108
|
+
"pending_count": event.get("pending_count"),
|
|
109
|
+
"status_code": event.get("status_code"),
|
|
110
|
+
"batch_progress": event.get("batch_progress"),
|
|
111
|
+
"wait_ms": event.get("wait_ms"),
|
|
112
|
+
"candidate_count": event.get("candidate_count"),
|
|
113
|
+
"ok": event.get("ok"),
|
|
114
|
+
"error_reason": event.get("error_reason"),
|
|
115
|
+
"retriable": event.get("retriable"),
|
|
116
|
+
"request_id": event.get("request_id"),
|
|
117
|
+
}
|
|
118
|
+
message = f"{label} u2 {phase or 'poll'} {state or 'progress'}"
|
|
119
|
+
if phase == "submit" and state == "heartbeat":
|
|
120
|
+
progress.heartbeat(stage=stage, message=message, data=payload)
|
|
121
|
+
return
|
|
122
|
+
progress.progress(stage=stage, message=message, data=payload)
|
|
123
|
+
|
|
124
|
+
|
|
83
125
|
def _evaluate_u2_gate_for_xhs(*, note_content_type: str, duration_ms: Any, video_down_url: Optional[str]) -> Dict[str, Any]:
|
|
84
126
|
content_type = normalize_text(note_content_type).lower()
|
|
85
127
|
is_video = content_type in {"video", "mixed"}
|
|
@@ -128,106 +170,6 @@ def _traceable_identifier(source_input: Dict[str, Optional[str]], note_id: Optio
|
|
|
128
170
|
return f"url-{digest}"
|
|
129
171
|
|
|
130
172
|
|
|
131
|
-
def _build_persist_payload(
|
|
132
|
-
*,
|
|
133
|
-
result: Dict[str, Any],
|
|
134
|
-
source_input: Dict[str, Optional[str]],
|
|
135
|
-
note_id: Optional[str],
|
|
136
|
-
status: str,
|
|
137
|
-
written_at: datetime,
|
|
138
|
-
) -> Dict[str, Any]:
|
|
139
|
-
summary = {
|
|
140
|
-
"summary": result.get("summary", ""),
|
|
141
|
-
"insights": result.get("insights", []),
|
|
142
|
-
"confidence": result.get("confidence"),
|
|
143
|
-
"error_reason": result.get("error_reason"),
|
|
144
|
-
}
|
|
145
|
-
normalized = {
|
|
146
|
-
"platform": "xiaohongshu",
|
|
147
|
-
"content_kind": result.get("content_kind", "note"),
|
|
148
|
-
"note_id": result.get("note_id") or note_id,
|
|
149
|
-
"note_content_type": result.get("note_content_type"),
|
|
150
|
-
"text_source": result.get("text_source"),
|
|
151
|
-
"request_id": result.get("request_id"),
|
|
152
|
-
"source": source_input,
|
|
153
|
-
}
|
|
154
|
-
return {
|
|
155
|
-
"meta": {
|
|
156
|
-
"written_at": written_at.isoformat(timespec="seconds"),
|
|
157
|
-
"status": status,
|
|
158
|
-
"platform": "xiaohongshu",
|
|
159
|
-
"identifier": _traceable_identifier(source_input, note_id),
|
|
160
|
-
},
|
|
161
|
-
"summary": summary,
|
|
162
|
-
"normalized": normalized,
|
|
163
|
-
"raw": result,
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def _persist_output_artifact(
|
|
168
|
-
*,
|
|
169
|
-
result: Dict[str, Any],
|
|
170
|
-
source_input: Dict[str, Optional[str]],
|
|
171
|
-
note_id: Optional[str],
|
|
172
|
-
storage_config: Optional[Dict[str, Any]],
|
|
173
|
-
persist_output: bool,
|
|
174
|
-
) -> Dict[str, Any]:
|
|
175
|
-
if not persist_output:
|
|
176
|
-
return {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
|
|
177
|
-
|
|
178
|
-
try:
|
|
179
|
-
paths = resolve_storage_paths(storage_config or {})
|
|
180
|
-
except Exception as error:
|
|
181
|
-
return {"enabled": True, "ok": False, "error": f"resolve_storage_paths_failed:{error}"}
|
|
182
|
-
|
|
183
|
-
now = datetime.now()
|
|
184
|
-
date_key = now.strftime("%Y%m%d")
|
|
185
|
-
timestamp = now.strftime("%Y%m%dT%H%M%S")
|
|
186
|
-
identifier = _traceable_identifier(source_input, note_id)
|
|
187
|
-
has_error = bool(result.get("error_reason"))
|
|
188
|
-
status = "error" if has_error else "success"
|
|
189
|
-
|
|
190
|
-
if has_error:
|
|
191
|
-
target_dir = Path(paths.get("errors_root", "")) / date_key
|
|
192
|
-
else:
|
|
193
|
-
target_dir = Path(paths.get("results_root", "")) / date_key
|
|
194
|
-
|
|
195
|
-
target_dir.mkdir(parents=True, exist_ok=True)
|
|
196
|
-
file_name = render_output_filename(
|
|
197
|
-
pattern=resolve_json_filename_pattern(storage_config),
|
|
198
|
-
context={
|
|
199
|
-
"prefix": status,
|
|
200
|
-
"platform": "xiaohongshu",
|
|
201
|
-
"card_type": "single_work_result",
|
|
202
|
-
"author_slug": identifier,
|
|
203
|
-
"title_slug": identifier,
|
|
204
|
-
"identifier": identifier,
|
|
205
|
-
"timestamp": timestamp,
|
|
206
|
-
"date": date_key,
|
|
207
|
-
"ext": ".json",
|
|
208
|
-
},
|
|
209
|
-
default_filename=f"{timestamp}-xiaohongshu-{identifier}.json",
|
|
210
|
-
default_ext=".json",
|
|
211
|
-
)
|
|
212
|
-
file_path = target_dir / file_name
|
|
213
|
-
|
|
214
|
-
payload = _build_persist_payload(
|
|
215
|
-
result=result,
|
|
216
|
-
source_input=source_input,
|
|
217
|
-
note_id=note_id,
|
|
218
|
-
status=status,
|
|
219
|
-
written_at=now,
|
|
220
|
-
)
|
|
221
|
-
file_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
222
|
-
|
|
223
|
-
return {
|
|
224
|
-
"enabled": True,
|
|
225
|
-
"ok": True,
|
|
226
|
-
"status": status,
|
|
227
|
-
"path": str(file_path),
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
|
|
231
173
|
def _finalize_result(
|
|
232
174
|
*,
|
|
233
175
|
result: Dict[str, Any],
|
|
@@ -236,14 +178,19 @@ def _finalize_result(
|
|
|
236
178
|
storage_config: Optional[Dict[str, Any]],
|
|
237
179
|
persist_output: bool,
|
|
238
180
|
) -> Dict[str, Any]:
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
181
|
+
envelope = build_work_output_envelope(result, platform="xiaohongshu")
|
|
182
|
+
if "card_write" in result:
|
|
183
|
+
envelope["card_write"] = result.get("card_write")
|
|
184
|
+
if not persist_output:
|
|
185
|
+
envelope["output_persist"] = {"enabled": False, "skipped": True, "reason": "disabled_by_flag"}
|
|
186
|
+
return envelope
|
|
187
|
+
envelope["output_persist"] = persist_output_envelope(
|
|
188
|
+
envelope=envelope,
|
|
243
189
|
storage_config=storage_config,
|
|
244
|
-
|
|
190
|
+
platform="xiaohongshu",
|
|
191
|
+
fallback_identifier=note_id or _traceable_identifier(source_input, note_id),
|
|
245
192
|
)
|
|
246
|
-
return
|
|
193
|
+
return envelope
|
|
247
194
|
|
|
248
195
|
|
|
249
196
|
def _normalize_input(input_value: Optional[str], share_text: Optional[str], note_id: Optional[str]) -> Dict[str, Optional[str]]:
|
|
@@ -760,6 +707,70 @@ def _append_missing_metadata_fields(missing_fields: List[Dict[str, str]], metada
|
|
|
760
707
|
_append(key)
|
|
761
708
|
|
|
762
709
|
|
|
710
|
+
def _empty_timings() -> Dict[str, int]:
|
|
711
|
+
return {
|
|
712
|
+
"url_parse_ms": 0,
|
|
713
|
+
"u1_total_ms": 0,
|
|
714
|
+
"u2_submit_ms": 0,
|
|
715
|
+
"u2_poll_ms": 0,
|
|
716
|
+
"card_write_ms": 0,
|
|
717
|
+
"llm_analysis_ms": 0,
|
|
718
|
+
"total_ms": 0,
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
def _elapsed_ms(started_at: float) -> int:
|
|
723
|
+
return int((time.perf_counter() - started_at) * 1000)
|
|
724
|
+
|
|
725
|
+
|
|
726
|
+
def _http_summary_for_note(response: Dict[str, Any], source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
|
|
727
|
+
completeness = response.get("_field_completeness") if isinstance(response.get("_field_completeness"), dict) else {}
|
|
728
|
+
payload = response.get("data")
|
|
729
|
+
metadata = _extract_xhs_metadata(
|
|
730
|
+
payload=payload,
|
|
731
|
+
source_input=source_input,
|
|
732
|
+
selected_video_url=None,
|
|
733
|
+
selected_image_urls=[],
|
|
734
|
+
) if response.get("ok") else {}
|
|
735
|
+
return {
|
|
736
|
+
"note_id": normalize_text(metadata.get("note_id")) or normalize_text(source_input.get("note_id")),
|
|
737
|
+
"title_hit": bool(normalize_text(metadata.get("title"))),
|
|
738
|
+
"author_hit": bool(normalize_text(metadata.get("author"))),
|
|
739
|
+
"media_present": bool(normalize_text(metadata.get("video_down_url")) or metadata.get("cover_image")),
|
|
740
|
+
"filled_count": completeness.get("filled_count"),
|
|
741
|
+
"ratio": completeness.get("ratio"),
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
|
|
745
|
+
def _emit_http_progress(
|
|
746
|
+
progress: Optional[ProgressReporter],
|
|
747
|
+
*,
|
|
748
|
+
stage: str,
|
|
749
|
+
response: Dict[str, Any],
|
|
750
|
+
route_label: str,
|
|
751
|
+
source_input: Dict[str, Optional[str]],
|
|
752
|
+
) -> None:
|
|
753
|
+
if progress is None:
|
|
754
|
+
return
|
|
755
|
+
progress.http_event(
|
|
756
|
+
stage=stage,
|
|
757
|
+
endpoint=str(response.get("_endpoint") or route_label),
|
|
758
|
+
response=response,
|
|
759
|
+
route_label=route_label,
|
|
760
|
+
summary=_http_summary_for_note(response, source_input),
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def _update_pipeline_status(result: Dict[str, Any]) -> None:
|
|
765
|
+
card_write = result.get("card_write") if isinstance(result.get("card_write"), dict) else {}
|
|
766
|
+
deep_analysis = result.get("deep_analysis") if isinstance(result.get("deep_analysis"), dict) else {}
|
|
767
|
+
result["pipeline_status"] = {
|
|
768
|
+
"facts_ready": True,
|
|
769
|
+
"card_ready": bool(card_write.get("ok")),
|
|
770
|
+
"deep_analysis": deep_analysis.get("status") or "skipped",
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
|
|
763
774
|
def _fetch_sparse_metadata_enrich(
|
|
764
775
|
*,
|
|
765
776
|
base_url: str,
|
|
@@ -767,6 +778,7 @@ def _fetch_sparse_metadata_enrich(
|
|
|
767
778
|
timeout_ms: int,
|
|
768
779
|
source_input: Dict[str, Optional[str]],
|
|
769
780
|
note_id: Optional[str],
|
|
781
|
+
progress: Optional[ProgressReporter] = None,
|
|
770
782
|
) -> Dict[str, Any]:
|
|
771
783
|
share_text = source_input.get("share_text")
|
|
772
784
|
resolved_note_id = note_id or source_input.get("note_id") or _extract_note_id_from_share(share_text)
|
|
@@ -782,6 +794,7 @@ def _fetch_sparse_metadata_enrich(
|
|
|
782
794
|
)
|
|
783
795
|
response["_endpoint"] = WEB_V2_V3_ENDPOINT
|
|
784
796
|
response["_route_label"] = "web_v2_v3_sparse_enrich"
|
|
797
|
+
_emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v3_sparse_enrich", source_input=source_input)
|
|
785
798
|
return response
|
|
786
799
|
|
|
787
800
|
if resolved_note_id:
|
|
@@ -795,6 +808,7 @@ def _fetch_sparse_metadata_enrich(
|
|
|
795
808
|
)
|
|
796
809
|
response["_endpoint"] = WEB_V2_V2_ENDPOINT
|
|
797
810
|
response["_route_label"] = "web_v2_v2_sparse_enrich"
|
|
811
|
+
_emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v2_sparse_enrich", source_input=source_input)
|
|
798
812
|
return response
|
|
799
813
|
|
|
800
814
|
return {
|
|
@@ -805,7 +819,14 @@ def _fetch_sparse_metadata_enrich(
|
|
|
805
819
|
}
|
|
806
820
|
|
|
807
821
|
|
|
808
|
-
def _fetch_note_info(
|
|
822
|
+
def _fetch_note_info(
|
|
823
|
+
*,
|
|
824
|
+
base_url: str,
|
|
825
|
+
token: str,
|
|
826
|
+
timeout_ms: int,
|
|
827
|
+
source_input: Dict[str, Optional[str]],
|
|
828
|
+
progress: Optional[ProgressReporter] = None,
|
|
829
|
+
) -> Dict[str, Any]:
|
|
809
830
|
attempts: List[Dict[str, Any]] = []
|
|
810
831
|
|
|
811
832
|
share_text = source_input.get("share_text")
|
|
@@ -832,6 +853,7 @@ def _fetch_note_info(*, base_url: str, token: str, timeout_ms: int, source_input
|
|
|
832
853
|
"missing_core": ["note_id", "title_or_desc", "media"],
|
|
833
854
|
"core_ready": False,
|
|
834
855
|
}
|
|
856
|
+
_emit_http_progress(progress, stage="note.fetch", response=response, route_label=label, source_input=source_input)
|
|
835
857
|
attempts.append({"label": label, "endpoint": path, "response": response})
|
|
836
858
|
return response
|
|
837
859
|
|
|
@@ -1323,6 +1345,7 @@ def _build_result(
|
|
|
1323
1345
|
missing_fields: Optional[List[Dict[str, str]]] = None,
|
|
1324
1346
|
metadata_fields: Optional[Dict[str, Any]] = None,
|
|
1325
1347
|
asr_source: Optional[str] = None,
|
|
1348
|
+
timings: Optional[Dict[str, int]] = None,
|
|
1326
1349
|
) -> Dict[str, Any]:
|
|
1327
1350
|
metadata = metadata_fields or {}
|
|
1328
1351
|
summary_block = summarize_content(raw_content, source=f"xiaohongshu:{text_source}")
|
|
@@ -1344,7 +1367,8 @@ def _build_result(
|
|
|
1344
1367
|
|
|
1345
1368
|
work_modality = "video" if normalize_text(note_content_type).lower() in {"video", "mixed"} else "text"
|
|
1346
1369
|
caption_raw = normalize_text(metadata.get("caption_raw"))
|
|
1347
|
-
|
|
1370
|
+
asr_clean = derive_asr_clean_text(raw_content)
|
|
1371
|
+
primary_text = asr_clean if work_modality == "video" else (caption_raw or raw_content)
|
|
1348
1372
|
primary_text_source = "asr_clean" if work_modality == "video" else "caption_raw"
|
|
1349
1373
|
analysis_eligibility = "eligible" if primary_text else "incomplete"
|
|
1350
1374
|
analysis_exclusion_reason = "" if analysis_eligibility == "eligible" else ("video_asr_unavailable" if work_modality == "video" else "caption_raw_missing")
|
|
@@ -1389,6 +1413,8 @@ def _build_result(
|
|
|
1389
1413
|
"xhs_sec_token": metadata.get("xhs_sec_token"),
|
|
1390
1414
|
"downloaded_assets": downloaded_assets,
|
|
1391
1415
|
"raw_content": raw_content,
|
|
1416
|
+
"asr_raw": raw_content,
|
|
1417
|
+
"asr_clean": asr_clean,
|
|
1392
1418
|
"primary_text": primary_text,
|
|
1393
1419
|
"primary_text_source": primary_text_source,
|
|
1394
1420
|
"analysis_eligibility": analysis_eligibility,
|
|
@@ -1401,6 +1427,7 @@ def _build_result(
|
|
|
1401
1427
|
"extract_trace": extract_trace,
|
|
1402
1428
|
"fallback_trace": fallback_trace,
|
|
1403
1429
|
"request_id": request_id,
|
|
1430
|
+
"timings": dict(timings or {}),
|
|
1404
1431
|
}
|
|
1405
1432
|
|
|
1406
1433
|
|
|
@@ -1421,6 +1448,7 @@ def run_xiaohongshu_extract(
|
|
|
1421
1448
|
u2_timeout_retry_max_retries: int,
|
|
1422
1449
|
force_u2_fallback: bool,
|
|
1423
1450
|
write_card: bool,
|
|
1451
|
+
analysis_mode: str,
|
|
1424
1452
|
card_type: str,
|
|
1425
1453
|
card_root: Optional[str],
|
|
1426
1454
|
storage_config: Optional[Dict[str, Any]] = None,
|
|
@@ -1428,14 +1456,17 @@ def run_xiaohongshu_extract(
|
|
|
1428
1456
|
persist_output: bool = True,
|
|
1429
1457
|
progress: Optional[ProgressReporter] = None,
|
|
1430
1458
|
) -> Dict[str, Any]:
|
|
1431
|
-
|
|
1432
|
-
|
|
1433
|
-
|
|
1434
|
-
)
|
|
1435
|
-
|
|
1459
|
+
workflow_started_at = time.perf_counter()
|
|
1460
|
+
timings = _empty_timings()
|
|
1461
|
+
parse_started_at = time.perf_counter()
|
|
1436
1462
|
source_input = _normalize_input(input_value, share_text, note_id)
|
|
1463
|
+
timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
|
|
1437
1464
|
if progress is not None:
|
|
1438
|
-
progress.started(
|
|
1465
|
+
progress.started(
|
|
1466
|
+
stage="note.workflow",
|
|
1467
|
+
message="xiaohongshu note workflow started",
|
|
1468
|
+
data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
|
|
1469
|
+
)
|
|
1439
1470
|
metadata_fields: Dict[str, Any] = {}
|
|
1440
1471
|
if not source_input["share_text"] and not source_input["note_id"]:
|
|
1441
1472
|
result = _build_result(
|
|
@@ -1452,23 +1483,32 @@ def run_xiaohongshu_extract(
|
|
|
1452
1483
|
u2_task_id=None,
|
|
1453
1484
|
u2_task_status="UNKNOWN",
|
|
1454
1485
|
note_content_type="unknown",
|
|
1455
|
-
analysis_mode=
|
|
1486
|
+
analysis_mode=analysis_mode,
|
|
1456
1487
|
selected_video_url=None,
|
|
1457
1488
|
selected_video_candidates=[],
|
|
1458
1489
|
selected_image_urls=[],
|
|
1459
1490
|
downloaded_assets=[],
|
|
1460
1491
|
missing_fields=[{"field": "share_text_or_note_id", "reason": "missing_input"}],
|
|
1461
1492
|
metadata_fields=metadata_fields,
|
|
1493
|
+
timings=timings,
|
|
1462
1494
|
)
|
|
1463
1495
|
if write_card:
|
|
1464
|
-
|
|
1496
|
+
card_started_at = time.perf_counter()
|
|
1497
|
+
result["card_write"] = write_work_fact_card(
|
|
1465
1498
|
payload=result,
|
|
1466
1499
|
platform="xiaohongshu",
|
|
1467
1500
|
card_type=card_type,
|
|
1468
1501
|
card_root=card_root,
|
|
1469
1502
|
content_kind="note",
|
|
1470
1503
|
storage_config=storage_config,
|
|
1504
|
+
analysis_mode=analysis_mode,
|
|
1505
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1471
1506
|
)
|
|
1507
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1508
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
1509
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1510
|
+
result["timings"] = dict(timings)
|
|
1511
|
+
_update_pipeline_status(result)
|
|
1472
1512
|
return _finalize_result(
|
|
1473
1513
|
result=result,
|
|
1474
1514
|
source_input=source_input,
|
|
@@ -1487,6 +1527,7 @@ def run_xiaohongshu_extract(
|
|
|
1487
1527
|
|
|
1488
1528
|
trace: List[Dict[str, Any]] = []
|
|
1489
1529
|
|
|
1530
|
+
u1_started_at = time.perf_counter()
|
|
1490
1531
|
if progress is not None:
|
|
1491
1532
|
progress.progress(stage="note.fetch", message="fetching xiaohongshu note payload")
|
|
1492
1533
|
note_response = _fetch_note_info(
|
|
@@ -1494,7 +1535,9 @@ def run_xiaohongshu_extract(
|
|
|
1494
1535
|
token=runtime["token"],
|
|
1495
1536
|
timeout_ms=runtime["timeout_ms"],
|
|
1496
1537
|
source_input=source_input,
|
|
1538
|
+
progress=progress,
|
|
1497
1539
|
)
|
|
1540
|
+
timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
|
|
1498
1541
|
|
|
1499
1542
|
attempts = note_response.get("_attempts") or []
|
|
1500
1543
|
for index, attempt in enumerate(attempts, start=1):
|
|
@@ -1548,23 +1591,32 @@ def run_xiaohongshu_extract(
|
|
|
1548
1591
|
u2_task_id=None,
|
|
1549
1592
|
u2_task_status="UNKNOWN",
|
|
1550
1593
|
note_content_type="unknown",
|
|
1551
|
-
analysis_mode=
|
|
1594
|
+
analysis_mode=analysis_mode,
|
|
1552
1595
|
selected_video_url=None,
|
|
1553
1596
|
selected_video_candidates=[],
|
|
1554
1597
|
selected_image_urls=[],
|
|
1555
1598
|
downloaded_assets=[],
|
|
1556
1599
|
missing_fields=[{"field": "u1_note_info", "reason": "all_routes_failed"}],
|
|
1557
1600
|
metadata_fields=metadata_fields,
|
|
1601
|
+
timings=timings,
|
|
1558
1602
|
)
|
|
1559
1603
|
if write_card:
|
|
1560
|
-
|
|
1604
|
+
card_started_at = time.perf_counter()
|
|
1605
|
+
result["card_write"] = write_work_fact_card(
|
|
1561
1606
|
payload=result,
|
|
1562
1607
|
platform="xiaohongshu",
|
|
1563
1608
|
card_type=card_type,
|
|
1564
1609
|
card_root=card_root,
|
|
1565
1610
|
content_kind="note",
|
|
1566
1611
|
storage_config=storage_config,
|
|
1612
|
+
analysis_mode=analysis_mode,
|
|
1613
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1567
1614
|
)
|
|
1615
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1616
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
1617
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1618
|
+
result["timings"] = dict(timings)
|
|
1619
|
+
_update_pipeline_status(result)
|
|
1568
1620
|
return _finalize_result(
|
|
1569
1621
|
result=result,
|
|
1570
1622
|
source_input=source_input,
|
|
@@ -1589,13 +1641,16 @@ def run_xiaohongshu_extract(
|
|
|
1589
1641
|
enrich_payload: Any = None
|
|
1590
1642
|
|
|
1591
1643
|
if sparse_metadata_detected:
|
|
1644
|
+
enrich_started_at = time.perf_counter()
|
|
1592
1645
|
enrich_response = _fetch_sparse_metadata_enrich(
|
|
1593
1646
|
base_url=runtime["base_url"],
|
|
1594
1647
|
token=runtime["token"],
|
|
1595
1648
|
timeout_ms=runtime["timeout_ms"],
|
|
1596
1649
|
source_input=source_input,
|
|
1597
1650
|
note_id=source_input.get("note_id"),
|
|
1651
|
+
progress=progress,
|
|
1598
1652
|
)
|
|
1653
|
+
timings["u1_total_ms"] += _elapsed_ms(enrich_started_at)
|
|
1599
1654
|
trace.append(
|
|
1600
1655
|
build_api_trace(
|
|
1601
1656
|
step="u1_sparse_metadata_enrich",
|
|
@@ -1710,23 +1765,32 @@ def run_xiaohongshu_extract(
|
|
|
1710
1765
|
u2_task_id=None,
|
|
1711
1766
|
u2_task_status="SKIPPED",
|
|
1712
1767
|
note_content_type=note_content_type,
|
|
1713
|
-
analysis_mode=
|
|
1768
|
+
analysis_mode=analysis_mode,
|
|
1714
1769
|
selected_video_url=selected_video_url,
|
|
1715
1770
|
selected_video_candidates=video_candidates,
|
|
1716
1771
|
selected_image_urls=image_candidates,
|
|
1717
1772
|
downloaded_assets=[],
|
|
1718
1773
|
missing_fields=missing_fields,
|
|
1719
1774
|
metadata_fields=metadata_fields,
|
|
1775
|
+
timings=timings,
|
|
1720
1776
|
)
|
|
1721
1777
|
if write_card:
|
|
1722
|
-
|
|
1778
|
+
card_started_at = time.perf_counter()
|
|
1779
|
+
result["card_write"] = write_work_fact_card(
|
|
1723
1780
|
payload=result,
|
|
1724
1781
|
platform="xiaohongshu",
|
|
1725
1782
|
card_type=card_type,
|
|
1726
1783
|
card_root=card_root,
|
|
1727
1784
|
content_kind="single_video",
|
|
1728
1785
|
storage_config=storage_config,
|
|
1786
|
+
analysis_mode=analysis_mode,
|
|
1787
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1729
1788
|
)
|
|
1789
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1790
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
1791
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1792
|
+
result["timings"] = dict(timings)
|
|
1793
|
+
_update_pipeline_status(result)
|
|
1730
1794
|
return _finalize_result(
|
|
1731
1795
|
result=result,
|
|
1732
1796
|
source_input=source_input,
|
|
@@ -1766,23 +1830,32 @@ def run_xiaohongshu_extract(
|
|
|
1766
1830
|
u2_task_id=None,
|
|
1767
1831
|
u2_task_status="SKIPPED",
|
|
1768
1832
|
note_content_type=note_content_type,
|
|
1769
|
-
analysis_mode=
|
|
1833
|
+
analysis_mode=analysis_mode,
|
|
1770
1834
|
selected_video_url=u2_gate.get("video_down_url") or selected_video_url,
|
|
1771
1835
|
selected_video_candidates=video_candidates,
|
|
1772
1836
|
selected_image_urls=image_candidates,
|
|
1773
1837
|
downloaded_assets=[],
|
|
1774
1838
|
missing_fields=missing_fields,
|
|
1775
1839
|
metadata_fields=metadata_fields,
|
|
1840
|
+
timings=timings,
|
|
1776
1841
|
)
|
|
1777
1842
|
if write_card:
|
|
1778
|
-
|
|
1843
|
+
card_started_at = time.perf_counter()
|
|
1844
|
+
result["card_write"] = write_work_fact_card(
|
|
1779
1845
|
payload=result,
|
|
1780
1846
|
platform="xiaohongshu",
|
|
1781
1847
|
card_type=card_type,
|
|
1782
1848
|
card_root=card_root,
|
|
1783
1849
|
content_kind="single_video",
|
|
1784
1850
|
storage_config=storage_config,
|
|
1851
|
+
analysis_mode=analysis_mode,
|
|
1852
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1785
1853
|
)
|
|
1854
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1855
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
1856
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1857
|
+
result["timings"] = dict(timings)
|
|
1858
|
+
_update_pipeline_status(result)
|
|
1786
1859
|
return _finalize_result(
|
|
1787
1860
|
result=result,
|
|
1788
1861
|
source_input=source_input,
|
|
@@ -1792,16 +1865,18 @@ def run_xiaohongshu_extract(
|
|
|
1792
1865
|
)
|
|
1793
1866
|
|
|
1794
1867
|
u2_candidates = _dedupe_keep_order([u2_gate.get("video_down_url")] + list(video_candidates))
|
|
1868
|
+
u2_timeout_ms = _resolve_u2_timeout_ms(runtime["timeout_ms"])
|
|
1795
1869
|
if progress is not None:
|
|
1796
1870
|
progress.progress(
|
|
1797
1871
|
stage="note.u2",
|
|
1798
1872
|
message="starting xiaohongshu u2 flow",
|
|
1799
|
-
data={"candidate_count": len(u2_candidates)},
|
|
1873
|
+
data={"candidate_count": len(u2_candidates), "timeout_ms": u2_timeout_ms},
|
|
1800
1874
|
)
|
|
1875
|
+
u2_started_at = time.perf_counter()
|
|
1801
1876
|
u2_bundle = run_u2_asr_candidates_with_timeout_retry(
|
|
1802
1877
|
base_url=runtime["base_url"],
|
|
1803
1878
|
token=runtime["token"],
|
|
1804
|
-
timeout_ms=
|
|
1879
|
+
timeout_ms=u2_timeout_ms,
|
|
1805
1880
|
candidates=u2_candidates,
|
|
1806
1881
|
submit_max_retries=u2_submit_max_retries,
|
|
1807
1882
|
submit_backoff_ms=u2_submit_backoff_ms,
|
|
@@ -1809,7 +1884,13 @@ def run_xiaohongshu_extract(
|
|
|
1809
1884
|
max_polls=max_polls,
|
|
1810
1885
|
timeout_retry_enabled=u2_timeout_retry_enabled,
|
|
1811
1886
|
timeout_retry_max_retries=u2_timeout_retry_max_retries,
|
|
1887
|
+
pending_timeout_sec=int(config_get(storage_config or {}, "runtime.u2_pending_timeout_sec", 60) or 60),
|
|
1888
|
+
progress_callback=(
|
|
1889
|
+
lambda event: _report_u2_progress(progress, stage="note.u2", event=event, label="xiaohongshu")
|
|
1890
|
+
) if progress is not None else None,
|
|
1812
1891
|
)
|
|
1892
|
+
timings["u2_submit_ms"] = _to_int_or_none(u2_bundle.get("submit_duration_ms")) or 0
|
|
1893
|
+
timings["u2_poll_ms"] = _to_int_or_none(u2_bundle.get("poll_duration_ms")) or _elapsed_ms(u2_started_at)
|
|
1813
1894
|
submit_bundle = u2_bundle.get("submit_bundle", {})
|
|
1814
1895
|
submit_response = submit_bundle.get("submit_response", {})
|
|
1815
1896
|
task_id = submit_bundle.get("task_id")
|
|
@@ -1818,6 +1899,19 @@ def run_xiaohongshu_extract(
|
|
|
1818
1899
|
if selected_video_url and not normalize_text(metadata_fields.get("video_down_url")):
|
|
1819
1900
|
metadata_fields["video_down_url"] = selected_video_url
|
|
1820
1901
|
|
|
1902
|
+
if progress is not None:
|
|
1903
|
+
progress.http_event(
|
|
1904
|
+
stage="note.u2",
|
|
1905
|
+
endpoint="/api/u2/v1/services/audio/asr/transcription",
|
|
1906
|
+
response=submit_response,
|
|
1907
|
+
route_label="u2_submit",
|
|
1908
|
+
summary={
|
|
1909
|
+
"task_id": task_id,
|
|
1910
|
+
"retry_count": len(submit_bundle.get("retry_chain", [])),
|
|
1911
|
+
"candidate_count": len(u2_candidates),
|
|
1912
|
+
},
|
|
1913
|
+
)
|
|
1914
|
+
|
|
1821
1915
|
trace.append(
|
|
1822
1916
|
{
|
|
1823
1917
|
"step": "u2_asr_timeout_retry",
|
|
@@ -1830,6 +1924,7 @@ def run_xiaohongshu_extract(
|
|
|
1830
1924
|
"u2_submit_backoff_ms": max(0, int(u2_submit_backoff_ms)),
|
|
1831
1925
|
},
|
|
1832
1926
|
"timeout_retry": u2_bundle.get("timeout_retry", {}),
|
|
1927
|
+
"u3_fallback": u2_bundle.get("u3_fallback", {}),
|
|
1833
1928
|
"rounds": u2_bundle.get("rounds", []),
|
|
1834
1929
|
"final_task_id": poll_result.get("task_id") or task_id,
|
|
1835
1930
|
"final_task_status": poll_result.get("task_status"),
|
|
@@ -1879,23 +1974,32 @@ def run_xiaohongshu_extract(
|
|
|
1879
1974
|
u2_task_id=poll_result.get("task_id") or task_id,
|
|
1880
1975
|
u2_task_status=poll_result.get("task_status") or "UNKNOWN",
|
|
1881
1976
|
note_content_type=note_content_type,
|
|
1882
|
-
analysis_mode=
|
|
1977
|
+
analysis_mode=analysis_mode,
|
|
1883
1978
|
selected_video_url=selected_video_url,
|
|
1884
1979
|
selected_video_candidates=u2_candidates,
|
|
1885
1980
|
selected_image_urls=image_candidates,
|
|
1886
1981
|
downloaded_assets=[],
|
|
1887
1982
|
missing_fields=missing_fields,
|
|
1888
1983
|
metadata_fields=metadata_fields,
|
|
1984
|
+
timings=timings,
|
|
1889
1985
|
)
|
|
1890
1986
|
if write_card:
|
|
1891
|
-
|
|
1987
|
+
card_started_at = time.perf_counter()
|
|
1988
|
+
result["card_write"] = write_work_fact_card(
|
|
1892
1989
|
payload=result,
|
|
1893
1990
|
platform="xiaohongshu",
|
|
1894
1991
|
card_type=card_type,
|
|
1895
1992
|
card_root=card_root,
|
|
1896
1993
|
content_kind="single_video",
|
|
1897
1994
|
storage_config=storage_config,
|
|
1995
|
+
analysis_mode=analysis_mode,
|
|
1996
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1898
1997
|
)
|
|
1998
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1999
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
2000
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
2001
|
+
result["timings"] = dict(timings)
|
|
2002
|
+
_update_pipeline_status(result)
|
|
1899
2003
|
return _finalize_result(
|
|
1900
2004
|
result=result,
|
|
1901
2005
|
source_input=source_input,
|
|
@@ -1911,38 +2015,57 @@ def run_xiaohongshu_extract(
|
|
|
1911
2015
|
explicit_error_reason=poll_result.get("error_reason"),
|
|
1912
2016
|
explicit_request_id=poll_result.get("request_id") or submit_response.get("request_id") or note_response.get("request_id"),
|
|
1913
2017
|
)
|
|
2018
|
+
text_source = "u2"
|
|
2019
|
+
confidence = "high" if poll_result.get("ok") and raw_content else "low"
|
|
2020
|
+
error_reason = final_ctx.get("error_reason")
|
|
2021
|
+
if not raw_content and caption_text:
|
|
2022
|
+
missing_fields.append({"field": "asr_transcript", "reason": f"u2_failed:{error_reason or 'u2_poll_timeout'}"})
|
|
2023
|
+
raw_content = caption_text
|
|
2024
|
+
text_source = "caption_fallback"
|
|
2025
|
+
confidence = "medium"
|
|
2026
|
+
error_reason = None
|
|
1914
2027
|
result = _build_result(
|
|
1915
2028
|
source_input=source_input,
|
|
1916
2029
|
raw_content=raw_content,
|
|
1917
|
-
confidence=
|
|
1918
|
-
error_reason=
|
|
2030
|
+
confidence=confidence,
|
|
2031
|
+
error_reason=error_reason,
|
|
1919
2032
|
extract_trace=trace,
|
|
1920
2033
|
fallback_trace=final_ctx.get("fallback_trace", []),
|
|
1921
2034
|
request_id=final_ctx.get("request_id"),
|
|
1922
|
-
text_source=
|
|
2035
|
+
text_source=text_source,
|
|
1923
2036
|
note_id=str(resolved_note_id) if resolved_note_id else source_input.get("note_id"),
|
|
1924
2037
|
subtitle_hit=False,
|
|
1925
2038
|
u2_task_id=poll_result.get("task_id") or task_id,
|
|
1926
2039
|
u2_task_status=poll_result.get("task_status"),
|
|
1927
2040
|
note_content_type=note_content_type,
|
|
1928
|
-
analysis_mode=
|
|
2041
|
+
analysis_mode=analysis_mode,
|
|
1929
2042
|
selected_video_url=selected_video_url,
|
|
1930
2043
|
selected_video_candidates=u2_candidates,
|
|
1931
2044
|
selected_image_urls=image_candidates,
|
|
1932
2045
|
downloaded_assets=[],
|
|
1933
2046
|
missing_fields=missing_fields,
|
|
1934
2047
|
metadata_fields=metadata_fields,
|
|
2048
|
+
timings=timings,
|
|
1935
2049
|
)
|
|
1936
2050
|
|
|
1937
2051
|
if write_card:
|
|
1938
|
-
|
|
2052
|
+
card_started_at = time.perf_counter()
|
|
2053
|
+
result["card_write"] = write_work_fact_card(
|
|
1939
2054
|
payload=result,
|
|
1940
2055
|
platform="xiaohongshu",
|
|
1941
2056
|
card_type=card_type,
|
|
1942
2057
|
card_root=card_root,
|
|
1943
2058
|
content_kind="single_video",
|
|
1944
2059
|
storage_config=storage_config,
|
|
2060
|
+
analysis_mode=analysis_mode,
|
|
2061
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1945
2062
|
)
|
|
2063
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
2064
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
2065
|
+
|
|
2066
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
2067
|
+
result["timings"] = dict(timings)
|
|
2068
|
+
_update_pipeline_status(result)
|
|
1946
2069
|
|
|
1947
2070
|
return _finalize_result(
|
|
1948
2071
|
result=result,
|
|
@@ -1989,24 +2112,34 @@ def run_xiaohongshu_extract(
|
|
|
1989
2112
|
u2_task_id=None,
|
|
1990
2113
|
u2_task_status="SKIPPED",
|
|
1991
2114
|
note_content_type="image" if note_content_type == "unknown" else note_content_type,
|
|
1992
|
-
analysis_mode=
|
|
2115
|
+
analysis_mode=analysis_mode,
|
|
1993
2116
|
selected_video_url=None,
|
|
1994
2117
|
selected_video_candidates=video_candidates,
|
|
1995
2118
|
selected_image_urls=image_candidates,
|
|
1996
2119
|
downloaded_assets=downloaded_assets,
|
|
1997
2120
|
missing_fields=missing_fields,
|
|
1998
2121
|
metadata_fields=metadata_fields,
|
|
2122
|
+
timings=timings,
|
|
1999
2123
|
)
|
|
2000
2124
|
|
|
2001
2125
|
if write_card:
|
|
2002
|
-
|
|
2126
|
+
card_started_at = time.perf_counter()
|
|
2127
|
+
result["card_write"] = write_work_fact_card(
|
|
2003
2128
|
payload=result,
|
|
2004
2129
|
platform="xiaohongshu",
|
|
2005
2130
|
card_type=card_type,
|
|
2006
2131
|
card_root=card_root,
|
|
2007
2132
|
content_kind="note",
|
|
2008
2133
|
storage_config=storage_config,
|
|
2134
|
+
analysis_mode=analysis_mode,
|
|
2135
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
2009
2136
|
)
|
|
2137
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
2138
|
+
timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
|
|
2139
|
+
|
|
2140
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
2141
|
+
result["timings"] = dict(timings)
|
|
2142
|
+
_update_pipeline_status(result)
|
|
2010
2143
|
|
|
2011
2144
|
finalized = _finalize_result(
|
|
2012
2145
|
result=result,
|
|
@@ -2025,6 +2158,7 @@ def run_xiaohongshu_extract(
|
|
|
2025
2158
|
"card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
|
|
2026
2159
|
"output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
|
|
2027
2160
|
"text_source": finalized.get("text_source"),
|
|
2161
|
+
"deep_analysis_status": ((finalized.get("deep_analysis") or {}).get("status")),
|
|
2028
2162
|
},
|
|
2029
2163
|
)
|
|
2030
2164
|
return finalized
|
|
@@ -2069,7 +2203,14 @@ def main() -> None:
|
|
|
2069
2203
|
help="Conservative max retries for U2 timeout-only retry (0~3)",
|
|
2070
2204
|
)
|
|
2071
2205
|
parser.add_argument("--force-u2-fallback", action="store_true", help="Skip subtitle usage and force U2 fallback (test)")
|
|
2072
|
-
parser.add_argument("--card-type", choices=["work"
|
|
2206
|
+
parser.add_argument("--card-type", choices=["work"], default="work", help="Primary card type")
|
|
2207
|
+
parser.add_argument("--card-mode", dest="analysis_mode", choices=["standard", "auto", "local"], default="standard", help="Fact-card mode")
|
|
2208
|
+
parser.add_argument("--analysis-mode", dest="analysis_mode", choices=["standard", "auto", "local"], help=argparse.SUPPRESS)
|
|
2209
|
+
parser.set_defaults(write_card=True, persist_output=True)
|
|
2210
|
+
parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write final work card")
|
|
2211
|
+
parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
|
|
2212
|
+
parser.add_argument("--persist-output", dest="persist_output", action="store_true", help="Persist result JSON")
|
|
2213
|
+
parser.add_argument("--no-persist-output", dest="persist_output", action="store_false", help="Skip result JSON persist")
|
|
2073
2214
|
parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
|
|
2074
2215
|
args = parser.parse_args()
|
|
2075
2216
|
|
|
@@ -2109,6 +2250,12 @@ def main() -> None:
|
|
|
2109
2250
|
if args.u2_timeout_retry_max_retries is not None
|
|
2110
2251
|
else config_get(config, "asr_strategy.u2_timeout_retry.max_retries", 3)
|
|
2111
2252
|
)
|
|
2253
|
+
progress = build_progress_reporter(
|
|
2254
|
+
workflow="social-media-crawl",
|
|
2255
|
+
platform="xiaohongshu",
|
|
2256
|
+
content_kind="note",
|
|
2257
|
+
input_value=args.share_text or args.note_id or args.input,
|
|
2258
|
+
)
|
|
2112
2259
|
|
|
2113
2260
|
try:
|
|
2114
2261
|
result = run_xiaohongshu_extract(
|
|
@@ -2126,12 +2273,14 @@ def main() -> None:
|
|
|
2126
2273
|
u2_timeout_retry_enabled=bool(u2_timeout_retry_enabled),
|
|
2127
2274
|
u2_timeout_retry_max_retries=int(u2_timeout_retry_max_retries),
|
|
2128
2275
|
force_u2_fallback=args.force_u2_fallback,
|
|
2129
|
-
write_card=
|
|
2276
|
+
write_card=bool(args.write_card),
|
|
2277
|
+
analysis_mode=args.analysis_mode,
|
|
2130
2278
|
card_type=args.card_type,
|
|
2131
2279
|
card_root=args.card_root,
|
|
2132
2280
|
storage_config=config,
|
|
2133
2281
|
allow_process_env=args.allow_process_env,
|
|
2134
|
-
persist_output=
|
|
2282
|
+
persist_output=bool(args.persist_output),
|
|
2283
|
+
progress=progress,
|
|
2135
2284
|
)
|
|
2136
2285
|
except ValueError as error:
|
|
2137
2286
|
result = {
|