@tikomni/skills 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/skills/single-work-analysis/env.example +3 -3
- package/skills/single-work-analysis/references/config-templates/defaults.yaml +8 -19
- package/skills/single-work-analysis/references/prompt-contracts/{insight.md → analysis-bundle.md} +43 -8
- package/skills/single-work-analysis/scripts/core/analysis_adapter.py +384 -0
- package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +399 -76
- package/skills/single-work-analysis/scripts/core/config_loader.py +18 -42
- package/skills/single-work-analysis/scripts/core/progress_report.py +163 -16
- package/skills/single-work-analysis/scripts/core/storage_router.py +24 -57
- package/skills/single-work-analysis/scripts/core/tikomni_common.py +13 -3
- package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +154 -7
- package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +3 -1
- package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +243 -44
- package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +263 -25
- package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +244 -894
- package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
- package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
- package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
- package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
- package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
- package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
|
@@ -22,16 +22,17 @@ bootstrap_for_direct_run(__file__, __package__)
|
|
|
22
22
|
import hashlib
|
|
23
23
|
import json
|
|
24
24
|
import re
|
|
25
|
+
import time
|
|
25
26
|
from datetime import datetime
|
|
26
27
|
from pathlib import Path
|
|
27
28
|
from typing import Any, Dict, List, Optional
|
|
28
29
|
|
|
29
30
|
from scripts.core.config_loader import config_get, load_tikomni_config, resolve_storage_paths
|
|
30
31
|
from scripts.core.extract_pipeline import resolve_trace_error_context
|
|
31
|
-
from scripts.core.progress_report import ProgressReporter
|
|
32
|
+
from scripts.core.progress_report import ProgressReporter, build_progress_reporter
|
|
32
33
|
from scripts.core.storage_router import render_output_filename, resolve_json_filename_pattern
|
|
33
34
|
from scripts.platform.douyin.douyin_video_type_matrix import normalize_douyin_video_type
|
|
34
|
-
from scripts.pipeline.asr.asr_pipeline import submit_u2_asr_with_retry
|
|
35
|
+
from scripts.pipeline.asr.asr_pipeline import derive_asr_clean_text, submit_u2_asr_with_retry
|
|
35
36
|
from scripts.pipeline.asr.poll_u2_task import poll_u2_task
|
|
36
37
|
from scripts.platform.douyin.select_low_quality_video_url import select_low_quality_video_url
|
|
37
38
|
from scripts.core.tikomni_common import (
|
|
@@ -47,6 +48,7 @@ from scripts.writers.write_benchmark_card import write_benchmark_card
|
|
|
47
48
|
APP_ENDPOINT = "/api/u1/v1/douyin/app/v3/fetch_one_video_by_share_url"
|
|
48
49
|
WEB_ENDPOINT = "/api/u1/v1/douyin/web/fetch_one_video_by_share_url"
|
|
49
50
|
U2_SUBMIT_ENDPOINT = "/api/u2/v1/services/audio/asr/transcription"
|
|
51
|
+
U2_REQUEST_TIMEOUT_CAP_MS = 15000
|
|
50
52
|
|
|
51
53
|
|
|
52
54
|
def _format_published_date(value: Any) -> str:
|
|
@@ -86,6 +88,46 @@ def _traceable_identifier(source_input: Dict[str, Optional[str]], platform_work_
|
|
|
86
88
|
return f"url-{digest}"
|
|
87
89
|
|
|
88
90
|
|
|
91
|
+
def _resolve_u2_timeout_ms(timeout_ms: Any) -> int:
|
|
92
|
+
try:
|
|
93
|
+
parsed = int(timeout_ms)
|
|
94
|
+
except Exception:
|
|
95
|
+
parsed = U2_REQUEST_TIMEOUT_CAP_MS
|
|
96
|
+
if parsed <= 0:
|
|
97
|
+
return U2_REQUEST_TIMEOUT_CAP_MS
|
|
98
|
+
return max(5000, min(parsed, U2_REQUEST_TIMEOUT_CAP_MS))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _report_u2_progress(progress: Optional[ProgressReporter], *, stage: str, event: Dict[str, Any], label: str) -> None:
|
|
102
|
+
if progress is None:
|
|
103
|
+
return
|
|
104
|
+
|
|
105
|
+
phase = normalize_text(event.get("phase")).lower()
|
|
106
|
+
state = normalize_text(event.get("state")).lower()
|
|
107
|
+
payload = {
|
|
108
|
+
"phase": phase or "poll",
|
|
109
|
+
"state": state or "",
|
|
110
|
+
"task_id": event.get("task_id"),
|
|
111
|
+
"attempt": event.get("attempt"),
|
|
112
|
+
"task_status": event.get("task_status"),
|
|
113
|
+
"platform_task_status": event.get("platform_task_status"),
|
|
114
|
+
"pending_count": event.get("pending_count"),
|
|
115
|
+
"status_code": event.get("status_code"),
|
|
116
|
+
"batch_progress": event.get("batch_progress"),
|
|
117
|
+
"wait_ms": event.get("wait_ms"),
|
|
118
|
+
"candidate_count": event.get("candidate_count"),
|
|
119
|
+
"ok": event.get("ok"),
|
|
120
|
+
"error_reason": event.get("error_reason"),
|
|
121
|
+
"retriable": event.get("retriable"),
|
|
122
|
+
"request_id": event.get("request_id"),
|
|
123
|
+
}
|
|
124
|
+
message = f"{label} u2 {phase or 'poll'} {state or 'progress'}"
|
|
125
|
+
if phase == "submit" and state == "heartbeat":
|
|
126
|
+
progress.heartbeat(stage=stage, message=message, data=payload)
|
|
127
|
+
return
|
|
128
|
+
progress.progress(stage=stage, message=message, data=payload)
|
|
129
|
+
|
|
130
|
+
|
|
89
131
|
def _build_persist_payload(
|
|
90
132
|
*,
|
|
91
133
|
result: Dict[str, Any],
|
|
@@ -330,12 +372,12 @@ def _extract_author(item: Dict[str, Any]) -> Dict[str, Optional[str]]:
|
|
|
330
372
|
}
|
|
331
373
|
|
|
332
374
|
|
|
333
|
-
def _extract_metrics(item: Dict[str, Any]) -> Dict[str, int]:
|
|
375
|
+
def _extract_metrics(item: Dict[str, Any]) -> Dict[str, Optional[int]]:
|
|
334
376
|
statistics = item.get("statistics")
|
|
335
377
|
if not isinstance(statistics, dict):
|
|
336
378
|
statistics = {}
|
|
337
379
|
|
|
338
|
-
def metric(*keys: str) -> int:
|
|
380
|
+
def metric(*keys: str, default: Optional[int] = 0) -> Optional[int]:
|
|
339
381
|
for key in keys:
|
|
340
382
|
value = _safe_int(statistics.get(key))
|
|
341
383
|
if value is not None:
|
|
@@ -343,15 +385,25 @@ def _extract_metrics(item: Dict[str, Any]) -> Dict[str, int]:
|
|
|
343
385
|
value = _safe_int(item.get(key))
|
|
344
386
|
if value is not None:
|
|
345
387
|
return value
|
|
346
|
-
return
|
|
388
|
+
return default
|
|
347
389
|
|
|
348
|
-
|
|
390
|
+
metrics = {
|
|
349
391
|
"digg_count": metric("digg_count"),
|
|
350
392
|
"comment_count": metric("comment_count"),
|
|
351
393
|
"collect_count": metric("collect_count"),
|
|
352
394
|
"share_count": metric("share_count", "forward_count"),
|
|
353
|
-
"play_count": metric("play_count"),
|
|
395
|
+
"play_count": metric("play_count", default=None),
|
|
354
396
|
}
|
|
397
|
+
play_count = metrics.get("play_count")
|
|
398
|
+
engagement_floor = max(
|
|
399
|
+
int(metrics.get("digg_count") or 0),
|
|
400
|
+
int(metrics.get("comment_count") or 0),
|
|
401
|
+
int(metrics.get("collect_count") or 0),
|
|
402
|
+
int(metrics.get("share_count") or 0),
|
|
403
|
+
)
|
|
404
|
+
if play_count is not None and int(play_count) <= 0 and engagement_floor > 0:
|
|
405
|
+
metrics["play_count"] = None
|
|
406
|
+
return metrics
|
|
355
407
|
|
|
356
408
|
|
|
357
409
|
def _extract_platform_work_id(item: Dict[str, Any]) -> Optional[str]:
|
|
@@ -550,6 +602,71 @@ def _trace_step(
|
|
|
550
602
|
return payload
|
|
551
603
|
|
|
552
604
|
|
|
605
|
+
def _empty_metrics() -> Dict[str, Optional[int]]:
|
|
606
|
+
return {
|
|
607
|
+
"digg_count": 0,
|
|
608
|
+
"comment_count": 0,
|
|
609
|
+
"collect_count": 0,
|
|
610
|
+
"share_count": 0,
|
|
611
|
+
"play_count": None,
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
def _empty_timings() -> Dict[str, int]:
|
|
616
|
+
return {
|
|
617
|
+
"url_parse_ms": 0,
|
|
618
|
+
"u1_total_ms": 0,
|
|
619
|
+
"u2_submit_ms": 0,
|
|
620
|
+
"u2_poll_ms": 0,
|
|
621
|
+
"card_write_ms": 0,
|
|
622
|
+
"llm_analysis_ms": 0,
|
|
623
|
+
"total_ms": 0,
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def _elapsed_ms(started_at: float) -> int:
|
|
628
|
+
return int((time.perf_counter() - started_at) * 1000)
|
|
629
|
+
|
|
630
|
+
|
|
631
|
+
def _u1_response_summary(response: Dict[str, Any]) -> Dict[str, Any]:
|
|
632
|
+
payload = response.get("data")
|
|
633
|
+
item = _extract_aweme_detail(payload)
|
|
634
|
+
return {
|
|
635
|
+
"platform_work_id": _extract_platform_work_id(item or {}) if isinstance(item, dict) else None,
|
|
636
|
+
"title_hit": bool(_pick_title(item or {})) if isinstance(item, dict) else False,
|
|
637
|
+
"desc_hit": bool(_pick_desc(item or {})) if isinstance(item, dict) else False,
|
|
638
|
+
"video_url_present": bool(normalize_text((item or {}).get("video_down_url"))) if isinstance(item, dict) else False,
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
|
|
642
|
+
def _emit_http_progress(
|
|
643
|
+
progress: Optional[ProgressReporter],
|
|
644
|
+
*,
|
|
645
|
+
stage: str,
|
|
646
|
+
response: Dict[str, Any],
|
|
647
|
+
route_label: str,
|
|
648
|
+
) -> None:
|
|
649
|
+
if progress is None:
|
|
650
|
+
return
|
|
651
|
+
progress.http_event(
|
|
652
|
+
stage=stage,
|
|
653
|
+
endpoint=str(response.get("_endpoint") or route_label),
|
|
654
|
+
response=response,
|
|
655
|
+
route_label=route_label,
|
|
656
|
+
summary=_u1_response_summary(response),
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
|
|
660
|
+
def _update_pipeline_status(result: Dict[str, Any]) -> None:
|
|
661
|
+
card_write = result.get("card_write") if isinstance(result.get("card_write"), dict) else {}
|
|
662
|
+
deep_analysis = result.get("deep_analysis") if isinstance(result.get("deep_analysis"), dict) else {}
|
|
663
|
+
result["pipeline_status"] = {
|
|
664
|
+
"facts_ready": True,
|
|
665
|
+
"card_ready": bool(card_write.get("ok")),
|
|
666
|
+
"deep_analysis": deep_analysis.get("status") or "skipped",
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
|
|
553
670
|
def _build_missing_fields(
|
|
554
671
|
*,
|
|
555
672
|
title: str,
|
|
@@ -594,7 +711,7 @@ def _build_result(
|
|
|
594
711
|
duration_ms: Optional[int],
|
|
595
712
|
video_down_url: Optional[str],
|
|
596
713
|
author: Dict[str, Optional[str]],
|
|
597
|
-
metrics: Dict[str, int],
|
|
714
|
+
metrics: Dict[str, Optional[int]],
|
|
598
715
|
tags: List[str],
|
|
599
716
|
is_video: bool,
|
|
600
717
|
video_type_reason: str,
|
|
@@ -607,9 +724,11 @@ def _build_result(
|
|
|
607
724
|
u2_task_id: Optional[str],
|
|
608
725
|
u2_task_status: str,
|
|
609
726
|
u2_gate_reason: str,
|
|
727
|
+
analysis_mode: str,
|
|
610
728
|
create_time_sec: Optional[int] = None,
|
|
611
729
|
cover_image: Optional[str] = None,
|
|
612
730
|
asr_source: str = "fallback_none",
|
|
731
|
+
timings: Optional[Dict[str, int]] = None,
|
|
613
732
|
) -> Dict[str, Any]:
|
|
614
733
|
summary_block = summarize_content(raw_content, source="douyin:single-video-low-quality")
|
|
615
734
|
insights = list(summary_block.get("insights", []))
|
|
@@ -627,13 +746,15 @@ def _build_result(
|
|
|
627
746
|
for step in extract_trace
|
|
628
747
|
if isinstance(step, dict) and isinstance(step.get("endpoint"), str)
|
|
629
748
|
]
|
|
630
|
-
|
|
749
|
+
asr_clean = derive_asr_clean_text(raw_content)
|
|
750
|
+
primary_text = asr_clean or raw_content
|
|
631
751
|
analysis_eligibility = "eligible" if raw_content else "incomplete"
|
|
632
752
|
analysis_exclusion_reason = "" if raw_content else "video_asr_unavailable"
|
|
633
753
|
|
|
634
754
|
payload: Dict[str, Any] = {
|
|
635
755
|
"platform": "douyin",
|
|
636
756
|
"content_kind": "single_video",
|
|
757
|
+
"analysis_mode": analysis_mode,
|
|
637
758
|
"source": source_input,
|
|
638
759
|
"platform_work_id": platform_work_id,
|
|
639
760
|
"title": title,
|
|
@@ -655,13 +776,15 @@ def _build_result(
|
|
|
655
776
|
"comment_count": metrics.get("comment_count", 0),
|
|
656
777
|
"collect_count": metrics.get("collect_count", 0),
|
|
657
778
|
"share_count": metrics.get("share_count", 0),
|
|
658
|
-
"play_count": metrics.get("play_count"
|
|
779
|
+
"play_count": metrics.get("play_count"),
|
|
659
780
|
"tags": tags or [],
|
|
660
781
|
"is_video": is_video,
|
|
661
782
|
"video_type_reason": video_type_reason,
|
|
662
783
|
"u2_task_id": u2_task_id,
|
|
663
784
|
"u2_task_status": u2_task_status,
|
|
664
785
|
"raw_content": raw_content,
|
|
786
|
+
"asr_raw": raw_content,
|
|
787
|
+
"asr_clean": asr_clean,
|
|
665
788
|
"primary_text": primary_text,
|
|
666
789
|
"primary_text_source": "asr_clean",
|
|
667
790
|
"analysis_eligibility": analysis_eligibility,
|
|
@@ -682,6 +805,7 @@ def _build_result(
|
|
|
682
805
|
"fallback_trace": fallback_trace,
|
|
683
806
|
"request_id": request_id,
|
|
684
807
|
"endpoint_list": endpoint_list,
|
|
808
|
+
"timings": dict(timings or {}),
|
|
685
809
|
}
|
|
686
810
|
return payload
|
|
687
811
|
|
|
@@ -701,6 +825,7 @@ def run_douyin_single_video(
|
|
|
701
825
|
u2_submit_max_retries: int,
|
|
702
826
|
u2_submit_backoff_ms: int,
|
|
703
827
|
write_card: bool,
|
|
828
|
+
analysis_mode: str,
|
|
704
829
|
card_type: str,
|
|
705
830
|
card_root: Optional[str],
|
|
706
831
|
content_kind: str = "single_video",
|
|
@@ -709,14 +834,17 @@ def run_douyin_single_video(
|
|
|
709
834
|
persist_output: bool = True,
|
|
710
835
|
progress: Optional[ProgressReporter] = None,
|
|
711
836
|
) -> Dict[str, Any]:
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
)
|
|
716
|
-
|
|
837
|
+
workflow_started_at = time.perf_counter()
|
|
838
|
+
timings = _empty_timings()
|
|
839
|
+
parse_started_at = time.perf_counter()
|
|
717
840
|
source_input = _normalize_input(input_value, share_url)
|
|
841
|
+
timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
|
|
718
842
|
if progress is not None:
|
|
719
|
-
progress.started(
|
|
843
|
+
progress.started(
|
|
844
|
+
stage="single_video.workflow",
|
|
845
|
+
message="douyin single_video workflow started",
|
|
846
|
+
data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
|
|
847
|
+
)
|
|
720
848
|
if not source_input.get("share_url"):
|
|
721
849
|
result = _build_result(
|
|
722
850
|
source_input=source_input,
|
|
@@ -726,13 +854,7 @@ def run_douyin_single_video(
|
|
|
726
854
|
duration_ms=None,
|
|
727
855
|
video_down_url=None,
|
|
728
856
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
729
|
-
metrics=
|
|
730
|
-
"digg_count": 0,
|
|
731
|
-
"comment_count": 0,
|
|
732
|
-
"collect_count": 0,
|
|
733
|
-
"share_count": 0,
|
|
734
|
-
"play_count": 0,
|
|
735
|
-
},
|
|
857
|
+
metrics=_empty_metrics(),
|
|
736
858
|
tags=[],
|
|
737
859
|
is_video=False,
|
|
738
860
|
video_type_reason="missing_share_url",
|
|
@@ -745,8 +867,11 @@ def run_douyin_single_video(
|
|
|
745
867
|
u2_task_id=None,
|
|
746
868
|
u2_task_status="UNKNOWN",
|
|
747
869
|
u2_gate_reason="not_started",
|
|
870
|
+
analysis_mode=analysis_mode,
|
|
871
|
+
timings=timings,
|
|
748
872
|
)
|
|
749
873
|
if write_card:
|
|
874
|
+
card_started_at = time.perf_counter()
|
|
750
875
|
result["card_write"] = write_benchmark_card(
|
|
751
876
|
payload=result,
|
|
752
877
|
platform="douyin",
|
|
@@ -754,7 +879,14 @@ def run_douyin_single_video(
|
|
|
754
879
|
card_root=card_root,
|
|
755
880
|
content_kind=content_kind,
|
|
756
881
|
storage_config=storage_config,
|
|
882
|
+
analysis_mode=analysis_mode,
|
|
883
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
757
884
|
)
|
|
885
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
886
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
887
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
888
|
+
result["timings"] = dict(timings)
|
|
889
|
+
_update_pipeline_status(result)
|
|
758
890
|
return _finalize_result(
|
|
759
891
|
result=result,
|
|
760
892
|
source_input=source_input,
|
|
@@ -776,6 +908,7 @@ def run_douyin_single_video(
|
|
|
776
908
|
|
|
777
909
|
trace: List[Dict[str, Any]] = []
|
|
778
910
|
|
|
911
|
+
u1_started_at = time.perf_counter()
|
|
779
912
|
if progress is not None:
|
|
780
913
|
progress.progress(stage="single_video.fetch", message="fetching douyin single_video payload")
|
|
781
914
|
one_video_response = _u1_fetch_one_video(
|
|
@@ -785,9 +918,11 @@ def run_douyin_single_video(
|
|
|
785
918
|
app_timeout_ms=app_timeout,
|
|
786
919
|
web_timeout_ms=web_timeout,
|
|
787
920
|
)
|
|
921
|
+
timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
|
|
788
922
|
|
|
789
923
|
app_failed = one_video_response.get("_app_failed")
|
|
790
924
|
if app_failed:
|
|
925
|
+
_emit_http_progress(progress, stage="single_video.fetch", response=app_failed, route_label="app_primary")
|
|
791
926
|
trace.append(
|
|
792
927
|
_trace_step(
|
|
793
928
|
step="u1_fetch_one_video_primary",
|
|
@@ -797,6 +932,12 @@ def run_douyin_single_video(
|
|
|
797
932
|
)
|
|
798
933
|
)
|
|
799
934
|
|
|
935
|
+
_emit_http_progress(
|
|
936
|
+
progress,
|
|
937
|
+
stage="single_video.fetch",
|
|
938
|
+
response=one_video_response,
|
|
939
|
+
route_label="effective_route",
|
|
940
|
+
)
|
|
800
941
|
trace.append(
|
|
801
942
|
_trace_step(
|
|
802
943
|
step="u1_fetch_one_video_effective",
|
|
@@ -823,13 +964,7 @@ def run_douyin_single_video(
|
|
|
823
964
|
duration_ms=None,
|
|
824
965
|
video_down_url=None,
|
|
825
966
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
826
|
-
metrics=
|
|
827
|
-
"digg_count": 0,
|
|
828
|
-
"comment_count": 0,
|
|
829
|
-
"collect_count": 0,
|
|
830
|
-
"share_count": 0,
|
|
831
|
-
"play_count": 0,
|
|
832
|
-
},
|
|
967
|
+
metrics=_empty_metrics(),
|
|
833
968
|
tags=[],
|
|
834
969
|
is_video=False,
|
|
835
970
|
video_type_reason="u1_failed",
|
|
@@ -842,8 +977,11 @@ def run_douyin_single_video(
|
|
|
842
977
|
u2_task_id=None,
|
|
843
978
|
u2_task_status="UNKNOWN",
|
|
844
979
|
u2_gate_reason="u1_failed",
|
|
980
|
+
analysis_mode=analysis_mode,
|
|
981
|
+
timings=timings,
|
|
845
982
|
)
|
|
846
983
|
if write_card:
|
|
984
|
+
card_started_at = time.perf_counter()
|
|
847
985
|
result["card_write"] = write_benchmark_card(
|
|
848
986
|
payload=result,
|
|
849
987
|
platform="douyin",
|
|
@@ -851,7 +989,14 @@ def run_douyin_single_video(
|
|
|
851
989
|
card_root=card_root,
|
|
852
990
|
content_kind=content_kind,
|
|
853
991
|
storage_config=storage_config,
|
|
992
|
+
analysis_mode=analysis_mode,
|
|
993
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
854
994
|
)
|
|
995
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
996
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
997
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
998
|
+
result["timings"] = dict(timings)
|
|
999
|
+
_update_pipeline_status(result)
|
|
855
1000
|
return _finalize_result(
|
|
856
1001
|
result=result,
|
|
857
1002
|
source_input=source_input,
|
|
@@ -875,13 +1020,7 @@ def run_douyin_single_video(
|
|
|
875
1020
|
duration_ms=None,
|
|
876
1021
|
video_down_url=None,
|
|
877
1022
|
author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
|
|
878
|
-
metrics=
|
|
879
|
-
"digg_count": 0,
|
|
880
|
-
"comment_count": 0,
|
|
881
|
-
"collect_count": 0,
|
|
882
|
-
"share_count": 0,
|
|
883
|
-
"play_count": 0,
|
|
884
|
-
},
|
|
1023
|
+
metrics=_empty_metrics(),
|
|
885
1024
|
tags=[],
|
|
886
1025
|
is_video=False,
|
|
887
1026
|
video_type_reason="aweme_detail_missing",
|
|
@@ -894,8 +1033,11 @@ def run_douyin_single_video(
|
|
|
894
1033
|
u2_task_id=None,
|
|
895
1034
|
u2_task_status="UNKNOWN",
|
|
896
1035
|
u2_gate_reason="aweme_detail_missing",
|
|
1036
|
+
analysis_mode=analysis_mode,
|
|
1037
|
+
timings=timings,
|
|
897
1038
|
)
|
|
898
1039
|
if write_card:
|
|
1040
|
+
card_started_at = time.perf_counter()
|
|
899
1041
|
result["card_write"] = write_benchmark_card(
|
|
900
1042
|
payload=result,
|
|
901
1043
|
platform="douyin",
|
|
@@ -903,7 +1045,14 @@ def run_douyin_single_video(
|
|
|
903
1045
|
card_root=card_root,
|
|
904
1046
|
content_kind=content_kind,
|
|
905
1047
|
storage_config=storage_config,
|
|
1048
|
+
analysis_mode=analysis_mode,
|
|
1049
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
906
1050
|
)
|
|
1051
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1052
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
1053
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1054
|
+
result["timings"] = dict(timings)
|
|
1055
|
+
_update_pipeline_status(result)
|
|
907
1056
|
return _finalize_result(
|
|
908
1057
|
result=result,
|
|
909
1058
|
source_input=source_input,
|
|
@@ -986,22 +1135,40 @@ def run_douyin_single_video(
|
|
|
986
1135
|
poll_result: Dict[str, Any] = {}
|
|
987
1136
|
|
|
988
1137
|
if can_u2 and video_down_url:
|
|
1138
|
+
u2_timeout_ms = _resolve_u2_timeout_ms(runtime["timeout_ms"])
|
|
989
1139
|
if progress is not None:
|
|
990
1140
|
progress.progress(
|
|
991
1141
|
stage="single_video.u2",
|
|
992
1142
|
message="starting douyin u2 submit",
|
|
993
|
-
data={"video_down_url_present": True},
|
|
1143
|
+
data={"video_down_url_present": True, "timeout_ms": u2_timeout_ms},
|
|
994
1144
|
)
|
|
1145
|
+
submit_started_at = time.perf_counter()
|
|
995
1146
|
submit_bundle = submit_u2_asr_with_retry(
|
|
996
1147
|
base_url=runtime["base_url"],
|
|
997
1148
|
token=runtime["token"],
|
|
998
|
-
timeout_ms=
|
|
1149
|
+
timeout_ms=u2_timeout_ms,
|
|
999
1150
|
video_url=video_down_url,
|
|
1000
1151
|
max_retries=u2_submit_max_retries,
|
|
1001
1152
|
backoff_ms=u2_submit_backoff_ms,
|
|
1153
|
+
progress_callback=(
|
|
1154
|
+
lambda event: _report_u2_progress(progress, stage="single_video.u2", event=event, label="douyin")
|
|
1155
|
+
) if progress is not None else None,
|
|
1002
1156
|
)
|
|
1157
|
+
timings["u2_submit_ms"] = _elapsed_ms(submit_started_at)
|
|
1003
1158
|
submit_response = submit_bundle["submit_response"]
|
|
1004
1159
|
u2_task_id = submit_bundle.get("task_id")
|
|
1160
|
+
if progress is not None:
|
|
1161
|
+
progress.http_event(
|
|
1162
|
+
stage="single_video.u2",
|
|
1163
|
+
endpoint=U2_SUBMIT_ENDPOINT,
|
|
1164
|
+
response=submit_response,
|
|
1165
|
+
route_label="u2_submit",
|
|
1166
|
+
summary={
|
|
1167
|
+
"task_id": u2_task_id,
|
|
1168
|
+
"final_submit_status": submit_bundle.get("final_submit_status"),
|
|
1169
|
+
"retry_count": len(submit_bundle.get("retry_chain", [])),
|
|
1170
|
+
},
|
|
1171
|
+
)
|
|
1005
1172
|
|
|
1006
1173
|
trace.append(
|
|
1007
1174
|
_trace_step(
|
|
@@ -1040,14 +1207,19 @@ def run_douyin_single_video(
|
|
|
1040
1207
|
else:
|
|
1041
1208
|
if progress is not None:
|
|
1042
1209
|
progress.progress(stage="single_video.u2", message="polling douyin u2 task", data={"task_id": u2_task_id})
|
|
1210
|
+
poll_started_at = time.perf_counter()
|
|
1043
1211
|
poll_result = poll_u2_task(
|
|
1044
1212
|
base_url=runtime["base_url"],
|
|
1045
1213
|
token=runtime["token"],
|
|
1046
|
-
timeout_ms=
|
|
1214
|
+
timeout_ms=u2_timeout_ms,
|
|
1047
1215
|
task_id=u2_task_id,
|
|
1048
1216
|
poll_interval_sec=poll_interval_sec,
|
|
1049
1217
|
max_polls=max_polls,
|
|
1218
|
+
progress_callback=(
|
|
1219
|
+
lambda event: _report_u2_progress(progress, stage="single_video.u2", event=event, label="douyin")
|
|
1220
|
+
) if progress is not None else None,
|
|
1050
1221
|
)
|
|
1222
|
+
timings["u2_poll_ms"] = _elapsed_ms(poll_started_at)
|
|
1051
1223
|
u2_task_status = poll_result.get("task_status") or "UNKNOWN"
|
|
1052
1224
|
raw_content = poll_result.get("transcript_text", "") if poll_result.get("ok") else ""
|
|
1053
1225
|
error_reason = poll_result.get("error_reason")
|
|
@@ -1111,12 +1283,15 @@ def run_douyin_single_video(
|
|
|
1111
1283
|
u2_task_id=u2_task_id,
|
|
1112
1284
|
u2_task_status=u2_task_status,
|
|
1113
1285
|
u2_gate_reason=gate_reason,
|
|
1286
|
+
analysis_mode=analysis_mode,
|
|
1114
1287
|
asr_source="u2" if raw_content else "fallback_none",
|
|
1288
|
+
timings=timings,
|
|
1115
1289
|
)
|
|
1116
1290
|
|
|
1117
1291
|
if write_card:
|
|
1118
1292
|
if progress is not None:
|
|
1119
1293
|
progress.progress(stage="single_video.card_write", message="writing douyin single_video card")
|
|
1294
|
+
card_started_at = time.perf_counter()
|
|
1120
1295
|
result["card_write"] = write_benchmark_card(
|
|
1121
1296
|
payload=result,
|
|
1122
1297
|
platform="douyin",
|
|
@@ -1124,7 +1299,15 @@ def run_douyin_single_video(
|
|
|
1124
1299
|
card_root=card_root,
|
|
1125
1300
|
content_kind=content_kind,
|
|
1126
1301
|
storage_config=storage_config,
|
|
1302
|
+
analysis_mode=analysis_mode,
|
|
1303
|
+
progress=progress.child(scope="card_write") if progress is not None else None,
|
|
1127
1304
|
)
|
|
1305
|
+
timings["card_write_ms"] = _elapsed_ms(card_started_at)
|
|
1306
|
+
timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
|
|
1307
|
+
|
|
1308
|
+
timings["total_ms"] = _elapsed_ms(workflow_started_at)
|
|
1309
|
+
result["timings"] = dict(timings)
|
|
1310
|
+
_update_pipeline_status(result)
|
|
1128
1311
|
|
|
1129
1312
|
finalized = _finalize_result(
|
|
1130
1313
|
result=result,
|
|
@@ -1142,6 +1325,7 @@ def run_douyin_single_video(
|
|
|
1142
1325
|
"request_id": finalized.get("request_id"),
|
|
1143
1326
|
"card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
|
|
1144
1327
|
"output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
|
|
1328
|
+
"deep_analysis_status": ((finalized.get("deep_analysis") or {}).get("status")),
|
|
1145
1329
|
},
|
|
1146
1330
|
)
|
|
1147
1331
|
return finalized
|
|
@@ -1173,8 +1357,14 @@ def main() -> None:
|
|
|
1173
1357
|
default=1500,
|
|
1174
1358
|
help="Base backoff ms for retriable U2 submit failures (exponential)",
|
|
1175
1359
|
)
|
|
1176
|
-
parser.add_argument("--card-type", choices=["work"
|
|
1177
|
-
parser.add_argument("--content-kind", default="single_video", help="Routing kind, e.g. single_video/
|
|
1360
|
+
parser.add_argument("--card-type", choices=["work"], default="work", help="Primary card type")
|
|
1361
|
+
parser.add_argument("--content-kind", default="single_video", help="Routing kind, e.g. single_video/work")
|
|
1362
|
+
parser.add_argument("--analysis-mode", choices=["auto", "local"], default="auto", help="Card analysis mode")
|
|
1363
|
+
parser.set_defaults(write_card=True, persist_output=True)
|
|
1364
|
+
parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write final work card")
|
|
1365
|
+
parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
|
|
1366
|
+
parser.add_argument("--persist-output", dest="persist_output", action="store_true", help="Persist result JSON")
|
|
1367
|
+
parser.add_argument("--no-persist-output", dest="persist_output", action="store_false", help="Skip result JSON persist")
|
|
1178
1368
|
parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
|
|
1179
1369
|
args = parser.parse_args()
|
|
1180
1370
|
|
|
@@ -1188,6 +1378,13 @@ def main() -> None:
|
|
|
1188
1378
|
base_url = args.base_url or config_get(config, "runtime.base_url", None)
|
|
1189
1379
|
timeout_ms = args.timeout_ms if args.timeout_ms is not None else config_get(config, "runtime.timeout_ms", None)
|
|
1190
1380
|
|
|
1381
|
+
progress = build_progress_reporter(
|
|
1382
|
+
workflow="single-work-analysis",
|
|
1383
|
+
platform="douyin",
|
|
1384
|
+
content_kind=args.content_kind,
|
|
1385
|
+
input_value=args.share_url or args.input,
|
|
1386
|
+
)
|
|
1387
|
+
|
|
1191
1388
|
try:
|
|
1192
1389
|
result = run_douyin_single_video(
|
|
1193
1390
|
input_value=args.input,
|
|
@@ -1202,13 +1399,15 @@ def main() -> None:
|
|
|
1202
1399
|
max_polls=args.max_polls,
|
|
1203
1400
|
u2_submit_max_retries=args.u2_submit_max_retries,
|
|
1204
1401
|
u2_submit_backoff_ms=args.u2_submit_backoff_ms,
|
|
1205
|
-
write_card=
|
|
1402
|
+
write_card=bool(args.write_card),
|
|
1403
|
+
analysis_mode=args.analysis_mode,
|
|
1206
1404
|
card_type=args.card_type,
|
|
1207
1405
|
card_root=args.card_root,
|
|
1208
1406
|
content_kind=args.content_kind,
|
|
1209
1407
|
storage_config=config,
|
|
1210
1408
|
allow_process_env=args.allow_process_env,
|
|
1211
|
-
persist_output=
|
|
1409
|
+
persist_output=bool(args.persist_output),
|
|
1410
|
+
progress=progress,
|
|
1212
1411
|
)
|
|
1213
1412
|
except ValueError as error:
|
|
1214
1413
|
result = {
|