@tikomni/skills 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/package.json +4 -2
  2. package/skills/single-work-analysis/env.example +3 -3
  3. package/skills/single-work-analysis/references/config-templates/defaults.yaml +8 -19
  4. package/skills/single-work-analysis/references/prompt-contracts/{insight.md → analysis-bundle.md} +43 -8
  5. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +384 -0
  6. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +399 -76
  7. package/skills/single-work-analysis/scripts/core/config_loader.py +18 -42
  8. package/skills/single-work-analysis/scripts/core/progress_report.py +163 -16
  9. package/skills/single-work-analysis/scripts/core/storage_router.py +24 -57
  10. package/skills/single-work-analysis/scripts/core/tikomni_common.py +13 -3
  11. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +154 -7
  12. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +3 -1
  13. package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +243 -44
  14. package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +263 -25
  15. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +244 -894
  16. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  17. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  18. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  19. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  20. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  21. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  22. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
@@ -20,15 +20,16 @@ import argparse
20
20
  import hashlib
21
21
  import json
22
22
  import re
23
+ import time
23
24
  import urllib.parse
24
25
  import urllib.request
25
26
  from datetime import datetime
26
27
  from pathlib import Path
27
28
  from typing import Any, Dict, List, Optional, Tuple
28
29
 
29
- from scripts.pipeline.asr.asr_pipeline import run_u2_asr_candidates_with_timeout_retry
30
+ from scripts.pipeline.asr.asr_pipeline import derive_asr_clean_text, run_u2_asr_candidates_with_timeout_retry
30
31
  from scripts.core.config_loader import config_get, load_tikomni_config, resolve_storage_paths
31
- from scripts.core.progress_report import ProgressReporter
32
+ from scripts.core.progress_report import ProgressReporter, build_progress_reporter
32
33
  from scripts.core.storage_router import render_output_filename, resolve_json_filename_pattern
33
34
  from scripts.core.extract_pipeline import build_api_trace, resolve_trace_error_context
34
35
  from scripts.core.tikomni_common import (
@@ -49,6 +50,7 @@ APP_V1_ENDPOINT = "/api/u1/v1/xiaohongshu/app/get_note_info"
49
50
  WEB_V2_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v2"
50
51
  WEB_V2_V3_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v3"
51
52
  WEB_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v7"
53
+ U2_REQUEST_TIMEOUT_CAP_MS = 15000
52
54
  U2_GATE_MIN_DURATION_MS = 13000
53
55
  U2_GATE_MAX_DURATION_MS = 1800000
54
56
  U2_GATE_RULE = "is_video && 13000<duration_ms<=1800000 && video_download_url_present"
@@ -80,6 +82,43 @@ def _to_int_or_none(value: Any) -> Optional[int]:
80
82
  return None
81
83
 
82
84
 
85
+ def _resolve_u2_timeout_ms(timeout_ms: Any) -> int:
86
+ parsed = _to_int_or_none(timeout_ms)
87
+ if parsed is None or parsed <= 0:
88
+ return U2_REQUEST_TIMEOUT_CAP_MS
89
+ return max(5000, min(parsed, U2_REQUEST_TIMEOUT_CAP_MS))
90
+
91
+
92
+ def _report_u2_progress(progress: Optional[ProgressReporter], *, stage: str, event: Dict[str, Any], label: str) -> None:
93
+ if progress is None:
94
+ return
95
+
96
+ phase = normalize_text(event.get("phase")).lower()
97
+ state = normalize_text(event.get("state")).lower()
98
+ payload = {
99
+ "phase": phase or "poll",
100
+ "state": state or "",
101
+ "task_id": event.get("task_id"),
102
+ "attempt": event.get("attempt"),
103
+ "task_status": event.get("task_status"),
104
+ "platform_task_status": event.get("platform_task_status"),
105
+ "pending_count": event.get("pending_count"),
106
+ "status_code": event.get("status_code"),
107
+ "batch_progress": event.get("batch_progress"),
108
+ "wait_ms": event.get("wait_ms"),
109
+ "candidate_count": event.get("candidate_count"),
110
+ "ok": event.get("ok"),
111
+ "error_reason": event.get("error_reason"),
112
+ "retriable": event.get("retriable"),
113
+ "request_id": event.get("request_id"),
114
+ }
115
+ message = f"{label} u2 {phase or 'poll'} {state or 'progress'}"
116
+ if phase == "submit" and state == "heartbeat":
117
+ progress.heartbeat(stage=stage, message=message, data=payload)
118
+ return
119
+ progress.progress(stage=stage, message=message, data=payload)
120
+
121
+
83
122
  def _evaluate_u2_gate_for_xhs(*, note_content_type: str, duration_ms: Any, video_down_url: Optional[str]) -> Dict[str, Any]:
84
123
  content_type = normalize_text(note_content_type).lower()
85
124
  is_video = content_type in {"video", "mixed"}
@@ -760,6 +799,70 @@ def _append_missing_metadata_fields(missing_fields: List[Dict[str, str]], metada
760
799
  _append(key)
761
800
 
762
801
 
802
+ def _empty_timings() -> Dict[str, int]:
803
+ return {
804
+ "url_parse_ms": 0,
805
+ "u1_total_ms": 0,
806
+ "u2_submit_ms": 0,
807
+ "u2_poll_ms": 0,
808
+ "card_write_ms": 0,
809
+ "llm_analysis_ms": 0,
810
+ "total_ms": 0,
811
+ }
812
+
813
+
814
+ def _elapsed_ms(started_at: float) -> int:
815
+ return int((time.perf_counter() - started_at) * 1000)
816
+
817
+
818
+ def _http_summary_for_note(response: Dict[str, Any], source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
819
+ completeness = response.get("_field_completeness") if isinstance(response.get("_field_completeness"), dict) else {}
820
+ payload = response.get("data")
821
+ metadata = _extract_xhs_metadata(
822
+ payload=payload,
823
+ source_input=source_input,
824
+ selected_video_url=None,
825
+ selected_image_urls=[],
826
+ ) if response.get("ok") else {}
827
+ return {
828
+ "note_id": normalize_text(metadata.get("note_id")) or normalize_text(source_input.get("note_id")),
829
+ "title_hit": bool(normalize_text(metadata.get("title"))),
830
+ "author_hit": bool(normalize_text(metadata.get("author"))),
831
+ "media_present": bool(normalize_text(metadata.get("video_down_url")) or metadata.get("cover_image")),
832
+ "filled_count": completeness.get("filled_count"),
833
+ "ratio": completeness.get("ratio"),
834
+ }
835
+
836
+
837
+ def _emit_http_progress(
838
+ progress: Optional[ProgressReporter],
839
+ *,
840
+ stage: str,
841
+ response: Dict[str, Any],
842
+ route_label: str,
843
+ source_input: Dict[str, Optional[str]],
844
+ ) -> None:
845
+ if progress is None:
846
+ return
847
+ progress.http_event(
848
+ stage=stage,
849
+ endpoint=str(response.get("_endpoint") or route_label),
850
+ response=response,
851
+ route_label=route_label,
852
+ summary=_http_summary_for_note(response, source_input),
853
+ )
854
+
855
+
856
+ def _update_pipeline_status(result: Dict[str, Any]) -> None:
857
+ card_write = result.get("card_write") if isinstance(result.get("card_write"), dict) else {}
858
+ deep_analysis = result.get("deep_analysis") if isinstance(result.get("deep_analysis"), dict) else {}
859
+ result["pipeline_status"] = {
860
+ "facts_ready": True,
861
+ "card_ready": bool(card_write.get("ok")),
862
+ "deep_analysis": deep_analysis.get("status") or "skipped",
863
+ }
864
+
865
+
763
866
  def _fetch_sparse_metadata_enrich(
764
867
  *,
765
868
  base_url: str,
@@ -767,6 +870,7 @@ def _fetch_sparse_metadata_enrich(
767
870
  timeout_ms: int,
768
871
  source_input: Dict[str, Optional[str]],
769
872
  note_id: Optional[str],
873
+ progress: Optional[ProgressReporter] = None,
770
874
  ) -> Dict[str, Any]:
771
875
  share_text = source_input.get("share_text")
772
876
  resolved_note_id = note_id or source_input.get("note_id") or _extract_note_id_from_share(share_text)
@@ -782,6 +886,7 @@ def _fetch_sparse_metadata_enrich(
782
886
  )
783
887
  response["_endpoint"] = WEB_V2_V3_ENDPOINT
784
888
  response["_route_label"] = "web_v2_v3_sparse_enrich"
889
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v3_sparse_enrich", source_input=source_input)
785
890
  return response
786
891
 
787
892
  if resolved_note_id:
@@ -795,6 +900,7 @@ def _fetch_sparse_metadata_enrich(
795
900
  )
796
901
  response["_endpoint"] = WEB_V2_V2_ENDPOINT
797
902
  response["_route_label"] = "web_v2_v2_sparse_enrich"
903
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label="web_v2_v2_sparse_enrich", source_input=source_input)
798
904
  return response
799
905
 
800
906
  return {
@@ -805,7 +911,14 @@ def _fetch_sparse_metadata_enrich(
805
911
  }
806
912
 
807
913
 
808
- def _fetch_note_info(*, base_url: str, token: str, timeout_ms: int, source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
914
+ def _fetch_note_info(
915
+ *,
916
+ base_url: str,
917
+ token: str,
918
+ timeout_ms: int,
919
+ source_input: Dict[str, Optional[str]],
920
+ progress: Optional[ProgressReporter] = None,
921
+ ) -> Dict[str, Any]:
809
922
  attempts: List[Dict[str, Any]] = []
810
923
 
811
924
  share_text = source_input.get("share_text")
@@ -832,6 +945,7 @@ def _fetch_note_info(*, base_url: str, token: str, timeout_ms: int, source_input
832
945
  "missing_core": ["note_id", "title_or_desc", "media"],
833
946
  "core_ready": False,
834
947
  }
948
+ _emit_http_progress(progress, stage="note.fetch", response=response, route_label=label, source_input=source_input)
835
949
  attempts.append({"label": label, "endpoint": path, "response": response})
836
950
  return response
837
951
 
@@ -1323,6 +1437,7 @@ def _build_result(
1323
1437
  missing_fields: Optional[List[Dict[str, str]]] = None,
1324
1438
  metadata_fields: Optional[Dict[str, Any]] = None,
1325
1439
  asr_source: Optional[str] = None,
1440
+ timings: Optional[Dict[str, int]] = None,
1326
1441
  ) -> Dict[str, Any]:
1327
1442
  metadata = metadata_fields or {}
1328
1443
  summary_block = summarize_content(raw_content, source=f"xiaohongshu:{text_source}")
@@ -1344,7 +1459,8 @@ def _build_result(
1344
1459
 
1345
1460
  work_modality = "video" if normalize_text(note_content_type).lower() in {"video", "mixed"} else "text"
1346
1461
  caption_raw = normalize_text(metadata.get("caption_raw"))
1347
- primary_text = raw_content if work_modality == "video" else (caption_raw or raw_content)
1462
+ asr_clean = derive_asr_clean_text(raw_content)
1463
+ primary_text = asr_clean if work_modality == "video" else (caption_raw or raw_content)
1348
1464
  primary_text_source = "asr_clean" if work_modality == "video" else "caption_raw"
1349
1465
  analysis_eligibility = "eligible" if primary_text else "incomplete"
1350
1466
  analysis_exclusion_reason = "" if analysis_eligibility == "eligible" else ("video_asr_unavailable" if work_modality == "video" else "caption_raw_missing")
@@ -1389,6 +1505,8 @@ def _build_result(
1389
1505
  "xhs_sec_token": metadata.get("xhs_sec_token"),
1390
1506
  "downloaded_assets": downloaded_assets,
1391
1507
  "raw_content": raw_content,
1508
+ "asr_raw": raw_content,
1509
+ "asr_clean": asr_clean,
1392
1510
  "primary_text": primary_text,
1393
1511
  "primary_text_source": primary_text_source,
1394
1512
  "analysis_eligibility": analysis_eligibility,
@@ -1401,6 +1519,7 @@ def _build_result(
1401
1519
  "extract_trace": extract_trace,
1402
1520
  "fallback_trace": fallback_trace,
1403
1521
  "request_id": request_id,
1522
+ "timings": dict(timings or {}),
1404
1523
  }
1405
1524
 
1406
1525
 
@@ -1421,6 +1540,7 @@ def run_xiaohongshu_extract(
1421
1540
  u2_timeout_retry_max_retries: int,
1422
1541
  force_u2_fallback: bool,
1423
1542
  write_card: bool,
1543
+ analysis_mode: str,
1424
1544
  card_type: str,
1425
1545
  card_root: Optional[str],
1426
1546
  storage_config: Optional[Dict[str, Any]] = None,
@@ -1428,14 +1548,17 @@ def run_xiaohongshu_extract(
1428
1548
  persist_output: bool = True,
1429
1549
  progress: Optional[ProgressReporter] = None,
1430
1550
  ) -> Dict[str, Any]:
1431
- if not write_card or not persist_output:
1432
- raise ValueError(
1433
- f"fixed_pipeline_requires_full_persistence:xiaohongshu:note:write_card={bool(write_card)}:persist_output={bool(persist_output)}"
1434
- )
1435
-
1551
+ workflow_started_at = time.perf_counter()
1552
+ timings = _empty_timings()
1553
+ parse_started_at = time.perf_counter()
1436
1554
  source_input = _normalize_input(input_value, share_text, note_id)
1555
+ timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
1437
1556
  if progress is not None:
1438
- progress.started(stage="note.workflow", message="xiaohongshu note workflow started")
1557
+ progress.started(
1558
+ stage="note.workflow",
1559
+ message="xiaohongshu note workflow started",
1560
+ data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
1561
+ )
1439
1562
  metadata_fields: Dict[str, Any] = {}
1440
1563
  if not source_input["share_text"] and not source_input["note_id"]:
1441
1564
  result = _build_result(
@@ -1452,15 +1575,17 @@ def run_xiaohongshu_extract(
1452
1575
  u2_task_id=None,
1453
1576
  u2_task_status="UNKNOWN",
1454
1577
  note_content_type="unknown",
1455
- analysis_mode="none",
1578
+ analysis_mode=analysis_mode,
1456
1579
  selected_video_url=None,
1457
1580
  selected_video_candidates=[],
1458
1581
  selected_image_urls=[],
1459
1582
  downloaded_assets=[],
1460
1583
  missing_fields=[{"field": "share_text_or_note_id", "reason": "missing_input"}],
1461
1584
  metadata_fields=metadata_fields,
1585
+ timings=timings,
1462
1586
  )
1463
1587
  if write_card:
1588
+ card_started_at = time.perf_counter()
1464
1589
  result["card_write"] = write_benchmark_card(
1465
1590
  payload=result,
1466
1591
  platform="xiaohongshu",
@@ -1468,7 +1593,14 @@ def run_xiaohongshu_extract(
1468
1593
  card_root=card_root,
1469
1594
  content_kind="note",
1470
1595
  storage_config=storage_config,
1596
+ analysis_mode=analysis_mode,
1597
+ progress=progress.child(scope="card_write") if progress is not None else None,
1471
1598
  )
1599
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1600
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1601
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1602
+ result["timings"] = dict(timings)
1603
+ _update_pipeline_status(result)
1472
1604
  return _finalize_result(
1473
1605
  result=result,
1474
1606
  source_input=source_input,
@@ -1487,6 +1619,7 @@ def run_xiaohongshu_extract(
1487
1619
 
1488
1620
  trace: List[Dict[str, Any]] = []
1489
1621
 
1622
+ u1_started_at = time.perf_counter()
1490
1623
  if progress is not None:
1491
1624
  progress.progress(stage="note.fetch", message="fetching xiaohongshu note payload")
1492
1625
  note_response = _fetch_note_info(
@@ -1494,7 +1627,9 @@ def run_xiaohongshu_extract(
1494
1627
  token=runtime["token"],
1495
1628
  timeout_ms=runtime["timeout_ms"],
1496
1629
  source_input=source_input,
1630
+ progress=progress,
1497
1631
  )
1632
+ timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
1498
1633
 
1499
1634
  attempts = note_response.get("_attempts") or []
1500
1635
  for index, attempt in enumerate(attempts, start=1):
@@ -1548,15 +1683,17 @@ def run_xiaohongshu_extract(
1548
1683
  u2_task_id=None,
1549
1684
  u2_task_status="UNKNOWN",
1550
1685
  note_content_type="unknown",
1551
- analysis_mode="none",
1686
+ analysis_mode=analysis_mode,
1552
1687
  selected_video_url=None,
1553
1688
  selected_video_candidates=[],
1554
1689
  selected_image_urls=[],
1555
1690
  downloaded_assets=[],
1556
1691
  missing_fields=[{"field": "u1_note_info", "reason": "all_routes_failed"}],
1557
1692
  metadata_fields=metadata_fields,
1693
+ timings=timings,
1558
1694
  )
1559
1695
  if write_card:
1696
+ card_started_at = time.perf_counter()
1560
1697
  result["card_write"] = write_benchmark_card(
1561
1698
  payload=result,
1562
1699
  platform="xiaohongshu",
@@ -1564,7 +1701,14 @@ def run_xiaohongshu_extract(
1564
1701
  card_root=card_root,
1565
1702
  content_kind="note",
1566
1703
  storage_config=storage_config,
1704
+ analysis_mode=analysis_mode,
1705
+ progress=progress.child(scope="card_write") if progress is not None else None,
1567
1706
  )
1707
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1708
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1709
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1710
+ result["timings"] = dict(timings)
1711
+ _update_pipeline_status(result)
1568
1712
  return _finalize_result(
1569
1713
  result=result,
1570
1714
  source_input=source_input,
@@ -1589,13 +1733,16 @@ def run_xiaohongshu_extract(
1589
1733
  enrich_payload: Any = None
1590
1734
 
1591
1735
  if sparse_metadata_detected:
1736
+ enrich_started_at = time.perf_counter()
1592
1737
  enrich_response = _fetch_sparse_metadata_enrich(
1593
1738
  base_url=runtime["base_url"],
1594
1739
  token=runtime["token"],
1595
1740
  timeout_ms=runtime["timeout_ms"],
1596
1741
  source_input=source_input,
1597
1742
  note_id=source_input.get("note_id"),
1743
+ progress=progress,
1598
1744
  )
1745
+ timings["u1_total_ms"] += _elapsed_ms(enrich_started_at)
1599
1746
  trace.append(
1600
1747
  build_api_trace(
1601
1748
  step="u1_sparse_metadata_enrich",
@@ -1710,15 +1857,17 @@ def run_xiaohongshu_extract(
1710
1857
  u2_task_id=None,
1711
1858
  u2_task_status="SKIPPED",
1712
1859
  note_content_type=note_content_type,
1713
- analysis_mode="video_full",
1860
+ analysis_mode=analysis_mode,
1714
1861
  selected_video_url=selected_video_url,
1715
1862
  selected_video_candidates=video_candidates,
1716
1863
  selected_image_urls=image_candidates,
1717
1864
  downloaded_assets=[],
1718
1865
  missing_fields=missing_fields,
1719
1866
  metadata_fields=metadata_fields,
1867
+ timings=timings,
1720
1868
  )
1721
1869
  if write_card:
1870
+ card_started_at = time.perf_counter()
1722
1871
  result["card_write"] = write_benchmark_card(
1723
1872
  payload=result,
1724
1873
  platform="xiaohongshu",
@@ -1726,7 +1875,14 @@ def run_xiaohongshu_extract(
1726
1875
  card_root=card_root,
1727
1876
  content_kind="single_video",
1728
1877
  storage_config=storage_config,
1878
+ analysis_mode=analysis_mode,
1879
+ progress=progress.child(scope="card_write") if progress is not None else None,
1729
1880
  )
1881
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1882
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1883
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1884
+ result["timings"] = dict(timings)
1885
+ _update_pipeline_status(result)
1730
1886
  return _finalize_result(
1731
1887
  result=result,
1732
1888
  source_input=source_input,
@@ -1766,15 +1922,17 @@ def run_xiaohongshu_extract(
1766
1922
  u2_task_id=None,
1767
1923
  u2_task_status="SKIPPED",
1768
1924
  note_content_type=note_content_type,
1769
- analysis_mode="video_full",
1925
+ analysis_mode=analysis_mode,
1770
1926
  selected_video_url=u2_gate.get("video_down_url") or selected_video_url,
1771
1927
  selected_video_candidates=video_candidates,
1772
1928
  selected_image_urls=image_candidates,
1773
1929
  downloaded_assets=[],
1774
1930
  missing_fields=missing_fields,
1775
1931
  metadata_fields=metadata_fields,
1932
+ timings=timings,
1776
1933
  )
1777
1934
  if write_card:
1935
+ card_started_at = time.perf_counter()
1778
1936
  result["card_write"] = write_benchmark_card(
1779
1937
  payload=result,
1780
1938
  platform="xiaohongshu",
@@ -1782,7 +1940,14 @@ def run_xiaohongshu_extract(
1782
1940
  card_root=card_root,
1783
1941
  content_kind="single_video",
1784
1942
  storage_config=storage_config,
1943
+ analysis_mode=analysis_mode,
1944
+ progress=progress.child(scope="card_write") if progress is not None else None,
1785
1945
  )
1946
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
1947
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
1948
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
1949
+ result["timings"] = dict(timings)
1950
+ _update_pipeline_status(result)
1786
1951
  return _finalize_result(
1787
1952
  result=result,
1788
1953
  source_input=source_input,
@@ -1792,16 +1957,18 @@ def run_xiaohongshu_extract(
1792
1957
  )
1793
1958
 
1794
1959
  u2_candidates = _dedupe_keep_order([u2_gate.get("video_down_url")] + list(video_candidates))
1960
+ u2_timeout_ms = _resolve_u2_timeout_ms(runtime["timeout_ms"])
1795
1961
  if progress is not None:
1796
1962
  progress.progress(
1797
1963
  stage="note.u2",
1798
1964
  message="starting xiaohongshu u2 flow",
1799
- data={"candidate_count": len(u2_candidates)},
1965
+ data={"candidate_count": len(u2_candidates), "timeout_ms": u2_timeout_ms},
1800
1966
  )
1967
+ u2_started_at = time.perf_counter()
1801
1968
  u2_bundle = run_u2_asr_candidates_with_timeout_retry(
1802
1969
  base_url=runtime["base_url"],
1803
1970
  token=runtime["token"],
1804
- timeout_ms=runtime["timeout_ms"],
1971
+ timeout_ms=u2_timeout_ms,
1805
1972
  candidates=u2_candidates,
1806
1973
  submit_max_retries=u2_submit_max_retries,
1807
1974
  submit_backoff_ms=u2_submit_backoff_ms,
@@ -1809,7 +1976,12 @@ def run_xiaohongshu_extract(
1809
1976
  max_polls=max_polls,
1810
1977
  timeout_retry_enabled=u2_timeout_retry_enabled,
1811
1978
  timeout_retry_max_retries=u2_timeout_retry_max_retries,
1979
+ progress_callback=(
1980
+ lambda event: _report_u2_progress(progress, stage="note.u2", event=event, label="xiaohongshu")
1981
+ ) if progress is not None else None,
1812
1982
  )
1983
+ timings["u2_submit_ms"] = _to_int_or_none(u2_bundle.get("submit_duration_ms")) or 0
1984
+ timings["u2_poll_ms"] = _to_int_or_none(u2_bundle.get("poll_duration_ms")) or _elapsed_ms(u2_started_at)
1813
1985
  submit_bundle = u2_bundle.get("submit_bundle", {})
1814
1986
  submit_response = submit_bundle.get("submit_response", {})
1815
1987
  task_id = submit_bundle.get("task_id")
@@ -1818,6 +1990,19 @@ def run_xiaohongshu_extract(
1818
1990
  if selected_video_url and not normalize_text(metadata_fields.get("video_down_url")):
1819
1991
  metadata_fields["video_down_url"] = selected_video_url
1820
1992
 
1993
+ if progress is not None:
1994
+ progress.http_event(
1995
+ stage="note.u2",
1996
+ endpoint="/api/u2/v1/services/audio/asr/transcription",
1997
+ response=submit_response,
1998
+ route_label="u2_submit",
1999
+ summary={
2000
+ "task_id": task_id,
2001
+ "retry_count": len(submit_bundle.get("retry_chain", [])),
2002
+ "candidate_count": len(u2_candidates),
2003
+ },
2004
+ )
2005
+
1821
2006
  trace.append(
1822
2007
  {
1823
2008
  "step": "u2_asr_timeout_retry",
@@ -1879,15 +2064,17 @@ def run_xiaohongshu_extract(
1879
2064
  u2_task_id=poll_result.get("task_id") or task_id,
1880
2065
  u2_task_status=poll_result.get("task_status") or "UNKNOWN",
1881
2066
  note_content_type=note_content_type,
1882
- analysis_mode="video_full",
2067
+ analysis_mode=analysis_mode,
1883
2068
  selected_video_url=selected_video_url,
1884
2069
  selected_video_candidates=u2_candidates,
1885
2070
  selected_image_urls=image_candidates,
1886
2071
  downloaded_assets=[],
1887
2072
  missing_fields=missing_fields,
1888
2073
  metadata_fields=metadata_fields,
2074
+ timings=timings,
1889
2075
  )
1890
2076
  if write_card:
2077
+ card_started_at = time.perf_counter()
1891
2078
  result["card_write"] = write_benchmark_card(
1892
2079
  payload=result,
1893
2080
  platform="xiaohongshu",
@@ -1895,7 +2082,14 @@ def run_xiaohongshu_extract(
1895
2082
  card_root=card_root,
1896
2083
  content_kind="single_video",
1897
2084
  storage_config=storage_config,
2085
+ analysis_mode=analysis_mode,
2086
+ progress=progress.child(scope="card_write") if progress is not None else None,
1898
2087
  )
2088
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
2089
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2090
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2091
+ result["timings"] = dict(timings)
2092
+ _update_pipeline_status(result)
1899
2093
  return _finalize_result(
1900
2094
  result=result,
1901
2095
  source_input=source_input,
@@ -1911,30 +2105,41 @@ def run_xiaohongshu_extract(
1911
2105
  explicit_error_reason=poll_result.get("error_reason"),
1912
2106
  explicit_request_id=poll_result.get("request_id") or submit_response.get("request_id") or note_response.get("request_id"),
1913
2107
  )
2108
+ text_source = "u2"
2109
+ confidence = "high" if poll_result.get("ok") and raw_content else "low"
2110
+ error_reason = final_ctx.get("error_reason")
2111
+ if not raw_content and caption_text:
2112
+ missing_fields.append({"field": "asr_transcript", "reason": f"u2_failed:{error_reason or 'u2_poll_timeout'}"})
2113
+ raw_content = caption_text
2114
+ text_source = "caption_fallback"
2115
+ confidence = "medium"
2116
+ error_reason = None
1914
2117
  result = _build_result(
1915
2118
  source_input=source_input,
1916
2119
  raw_content=raw_content,
1917
- confidence="high" if poll_result.get("ok") and raw_content else "low",
1918
- error_reason=final_ctx.get("error_reason"),
2120
+ confidence=confidence,
2121
+ error_reason=error_reason,
1919
2122
  extract_trace=trace,
1920
2123
  fallback_trace=final_ctx.get("fallback_trace", []),
1921
2124
  request_id=final_ctx.get("request_id"),
1922
- text_source="u2",
2125
+ text_source=text_source,
1923
2126
  note_id=str(resolved_note_id) if resolved_note_id else source_input.get("note_id"),
1924
2127
  subtitle_hit=False,
1925
2128
  u2_task_id=poll_result.get("task_id") or task_id,
1926
2129
  u2_task_status=poll_result.get("task_status"),
1927
2130
  note_content_type=note_content_type,
1928
- analysis_mode="video_full",
2131
+ analysis_mode=analysis_mode,
1929
2132
  selected_video_url=selected_video_url,
1930
2133
  selected_video_candidates=u2_candidates,
1931
2134
  selected_image_urls=image_candidates,
1932
2135
  downloaded_assets=[],
1933
2136
  missing_fields=missing_fields,
1934
2137
  metadata_fields=metadata_fields,
2138
+ timings=timings,
1935
2139
  )
1936
2140
 
1937
2141
  if write_card:
2142
+ card_started_at = time.perf_counter()
1938
2143
  result["card_write"] = write_benchmark_card(
1939
2144
  payload=result,
1940
2145
  platform="xiaohongshu",
@@ -1942,7 +2147,15 @@ def run_xiaohongshu_extract(
1942
2147
  card_root=card_root,
1943
2148
  content_kind="single_video",
1944
2149
  storage_config=storage_config,
2150
+ analysis_mode=analysis_mode,
2151
+ progress=progress.child(scope="card_write") if progress is not None else None,
1945
2152
  )
2153
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
2154
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2155
+
2156
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2157
+ result["timings"] = dict(timings)
2158
+ _update_pipeline_status(result)
1946
2159
 
1947
2160
  return _finalize_result(
1948
2161
  result=result,
@@ -1989,16 +2202,18 @@ def run_xiaohongshu_extract(
1989
2202
  u2_task_id=None,
1990
2203
  u2_task_status="SKIPPED",
1991
2204
  note_content_type="image" if note_content_type == "unknown" else note_content_type,
1992
- analysis_mode="image_light_analysis",
2205
+ analysis_mode=analysis_mode,
1993
2206
  selected_video_url=None,
1994
2207
  selected_video_candidates=video_candidates,
1995
2208
  selected_image_urls=image_candidates,
1996
2209
  downloaded_assets=downloaded_assets,
1997
2210
  missing_fields=missing_fields,
1998
2211
  metadata_fields=metadata_fields,
2212
+ timings=timings,
1999
2213
  )
2000
2214
 
2001
2215
  if write_card:
2216
+ card_started_at = time.perf_counter()
2002
2217
  result["card_write"] = write_benchmark_card(
2003
2218
  payload=result,
2004
2219
  platform="xiaohongshu",
@@ -2006,7 +2221,15 @@ def run_xiaohongshu_extract(
2006
2221
  card_root=card_root,
2007
2222
  content_kind="note",
2008
2223
  storage_config=storage_config,
2224
+ analysis_mode=analysis_mode,
2225
+ progress=progress.child(scope="card_write") if progress is not None else None,
2009
2226
  )
2227
+ timings["card_write_ms"] = _elapsed_ms(card_started_at)
2228
+ timings["llm_analysis_ms"] = _to_int_or_none((result.get("card_write") or {}).get("llm_analysis_ms")) or 0
2229
+
2230
+ timings["total_ms"] = _elapsed_ms(workflow_started_at)
2231
+ result["timings"] = dict(timings)
2232
+ _update_pipeline_status(result)
2010
2233
 
2011
2234
  finalized = _finalize_result(
2012
2235
  result=result,
@@ -2025,6 +2248,7 @@ def run_xiaohongshu_extract(
2025
2248
  "card_write_ok": bool((finalized.get("card_write") or {}).get("ok")),
2026
2249
  "output_persist_ok": bool((finalized.get("output_persist") or {}).get("ok")),
2027
2250
  "text_source": finalized.get("text_source"),
2251
+ "deep_analysis_status": ((finalized.get("deep_analysis") or {}).get("status")),
2028
2252
  },
2029
2253
  )
2030
2254
  return finalized
@@ -2069,7 +2293,13 @@ def main() -> None:
2069
2293
  help="Conservative max retries for U2 timeout-only retry (0~3)",
2070
2294
  )
2071
2295
  parser.add_argument("--force-u2-fallback", action="store_true", help="Skip subtitle usage and force U2 fallback (test)")
2072
- parser.add_argument("--card-type", choices=["work", "author", "author_sample_work"], default="work", help="Primary card type")
2296
+ parser.add_argument("--card-type", choices=["work"], default="work", help="Primary card type")
2297
+ parser.add_argument("--analysis-mode", choices=["auto", "local"], default="auto", help="Card analysis mode")
2298
+ parser.set_defaults(write_card=True, persist_output=True)
2299
+ parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write final work card")
2300
+ parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
2301
+ parser.add_argument("--persist-output", dest="persist_output", action="store_true", help="Persist result JSON")
2302
+ parser.add_argument("--no-persist-output", dest="persist_output", action="store_false", help="Skip result JSON persist")
2073
2303
  parser.add_argument("--card-root", default=None, help="Card root (absolute); falls back to TIKOMNI_CARD_ROOT when writing cards")
2074
2304
  args = parser.parse_args()
2075
2305
 
@@ -2109,6 +2339,12 @@ def main() -> None:
2109
2339
  if args.u2_timeout_retry_max_retries is not None
2110
2340
  else config_get(config, "asr_strategy.u2_timeout_retry.max_retries", 3)
2111
2341
  )
2342
+ progress = build_progress_reporter(
2343
+ workflow="single-work-analysis",
2344
+ platform="xiaohongshu",
2345
+ content_kind="note",
2346
+ input_value=args.share_text or args.note_id or args.input,
2347
+ )
2112
2348
 
2113
2349
  try:
2114
2350
  result = run_xiaohongshu_extract(
@@ -2126,12 +2362,14 @@ def main() -> None:
2126
2362
  u2_timeout_retry_enabled=bool(u2_timeout_retry_enabled),
2127
2363
  u2_timeout_retry_max_retries=int(u2_timeout_retry_max_retries),
2128
2364
  force_u2_fallback=args.force_u2_fallback,
2129
- write_card=True,
2365
+ write_card=bool(args.write_card),
2366
+ analysis_mode=args.analysis_mode,
2130
2367
  card_type=args.card_type,
2131
2368
  card_root=args.card_root,
2132
2369
  storage_config=config,
2133
2370
  allow_process_env=args.allow_process_env,
2134
- persist_output=True,
2371
+ persist_output=bool(args.persist_output),
2372
+ progress=progress,
2135
2373
  )
2136
2374
  except ValueError as error:
2137
2375
  result = {