@tikomni/skills 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ bootstrap_for_direct_run(__file__, __package__)
19
19
  import argparse
20
20
  import hashlib
21
21
  import json
22
+ import os
22
23
  import re
23
24
  import time
24
25
  import urllib.parse
@@ -30,7 +31,13 @@ from typing import Any, Dict, List, Optional, Tuple
30
31
  from scripts.core.asr_pipeline import derive_asr_clean_text, run_u3_then_u2_asr_candidates_with_timeout_retry
31
32
  from scripts.core.config_loader import config_get, load_tikomni_config
32
33
  from scripts.core.progress_report import ProgressReporter, build_progress_reporter
33
- from scripts.core.extract_pipeline import build_api_trace, resolve_trace_error_context
34
+ from scripts.core.extract_pipeline import (
35
+ build_api_trace,
36
+ build_attempted_route,
37
+ build_route_plan_entry,
38
+ build_stage_status,
39
+ resolve_trace_error_context,
40
+ )
34
41
  from scripts.core.tikomni_common import (
35
42
  call_json_api,
36
43
  deep_find_all,
@@ -55,10 +62,14 @@ from scripts.writers.write_work_fact_card import (
55
62
  APP_V2_VIDEO_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_video_note_detail"
56
63
  APP_V2_IMAGE_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_image_note_detail"
57
64
  APP_V2_MIXED_ENDPOINT = "/api/u1/v1/xiaohongshu/app_v2/get_mixed_note_detail"
65
+ APP_V1_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/app/get_note_info_v2"
58
66
  APP_V1_ENDPOINT = "/api/u1/v1/xiaohongshu/app/get_note_info"
59
67
  WEB_V2_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v2"
60
68
  WEB_V2_V3_ENDPOINT = "/api/u1/v1/xiaohongshu/web_v2/fetch_feed_notes_v3"
61
- WEB_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v7"
69
+ WEB_V1_V7_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v7"
70
+ WEB_V1_V5_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v5"
71
+ WEB_V1_V4_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v4"
72
+ WEB_V1_V2_ENDPOINT = "/api/u1/v1/xiaohongshu/web/get_note_info_v2"
62
73
  U2_REQUEST_TIMEOUT_CAP_MS = 15000
63
74
  U2_GATE_MIN_DURATION_MS = 13000
64
75
  U2_GATE_MAX_DURATION_MS = 1800000
@@ -320,6 +331,218 @@ def _route_success_for_note(response: Dict[str, Any], source_input: Dict[str, Op
320
331
  return bool(completeness.get("core_ready"))
321
332
 
322
333
 
334
+ def _response_failure_reason(response: Dict[str, Any]) -> str:
335
+ if response.get("timeout_retry_exhausted"):
336
+ return "primary_timeout_retry_exhausted"
337
+ if response.get("error_reason"):
338
+ return "primary_non_timeout_failure"
339
+ return "primary_unknown_failure"
340
+
341
+
342
+ def _route_accept_decision(response: Dict[str, Any], source_input: Dict[str, Optional[str]]) -> Dict[str, Any]:
343
+ if not response.get("ok"):
344
+ return {
345
+ "accepted": False,
346
+ "accept_reason": "response_not_ok",
347
+ "fallback_reason": _response_failure_reason(response),
348
+ }
349
+
350
+ completeness = response.get("_field_completeness")
351
+ if not isinstance(completeness, dict):
352
+ completeness = _route_field_completeness(response.get("data"), source_input)
353
+ response["_field_completeness"] = completeness
354
+
355
+ missing_core = list(completeness.get("missing_core") or [])
356
+ if missing_core:
357
+ return {
358
+ "accepted": False,
359
+ "accept_reason": "note_missing_core_fields",
360
+ "fallback_reason": f"note_missing_core:{','.join(missing_core)}",
361
+ }
362
+
363
+ fields = completeness.get("fields") if isinstance(completeness.get("fields"), dict) else {}
364
+ optional_missing = [field_name for field_name in ("author", "subtitle", "metrics") if not fields.get(field_name)]
365
+ accept_reason = "note_core_fields_ready"
366
+ if optional_missing:
367
+ accept_reason = f"note_core_fields_ready_optional_missing:{','.join(optional_missing)}"
368
+ return {
369
+ "accepted": True,
370
+ "accept_reason": accept_reason,
371
+ "fallback_reason": "",
372
+ }
373
+
374
+
375
+ def _extract_xsec_token_from_input(share_text: Optional[str]) -> str:
376
+ text = normalize_text(share_text)
377
+ if not text:
378
+ return ""
379
+
380
+ candidates = [text]
381
+ candidates.extend(re.findall(r"https?://\\S+", text))
382
+ for candidate in candidates:
383
+ try:
384
+ query = urllib.parse.parse_qs(urllib.parse.urlparse(candidate).query)
385
+ except Exception:
386
+ continue
387
+ token = normalize_text((query.get("xsec_token") or [""])[0])
388
+ if token:
389
+ return urllib.parse.unquote(token)
390
+ return ""
391
+
392
+
393
+ def _build_unavailable_attempt(
394
+ *,
395
+ route_label: str,
396
+ endpoint: str,
397
+ method: str,
398
+ reason: str,
399
+ ) -> Dict[str, Any]:
400
+ return build_attempted_route(
401
+ route_label=route_label,
402
+ endpoint=endpoint,
403
+ accepted=False,
404
+ accept_reason="skipped_param_unavailable",
405
+ fallback_reason=reason,
406
+ param_readiness="unavailable",
407
+ param_reason=reason,
408
+ skipped=True,
409
+ extra={"method": method.upper()},
410
+ )
411
+
412
+
413
+ def _build_note_fetch_routes(source_input: Dict[str, Optional[str]]) -> List[Dict[str, Any]]:
414
+ share_text = source_input.get("share_text")
415
+ note_id = source_input.get("note_id") or _extract_note_id_from_share(share_text)
416
+ app_params: Dict[str, Any] = {}
417
+ web_params: Dict[str, Any] = {}
418
+
419
+ if share_text:
420
+ app_params["share_text"] = share_text
421
+ web_params["share_text"] = share_text
422
+ if note_id:
423
+ app_params["note_id"] = note_id
424
+ web_params["note_id"] = note_id
425
+
426
+ short_url_ready = bool(_is_short_share_url(share_text) and share_text)
427
+ xsec_token = _extract_xsec_token_from_input(share_text)
428
+ web_cookie = os.getenv("TIKOMNI_XHS_WEB_COOKIE", "").strip()
429
+ web_v5_ready = bool(note_id and xsec_token and web_cookie)
430
+
431
+ return [
432
+ {
433
+ "route_label": "app_v2_video",
434
+ "endpoint": APP_V2_VIDEO_ENDPOINT,
435
+ "method": "GET",
436
+ "params": dict(app_params),
437
+ "body": None,
438
+ "param_readiness": "ready" if app_params else "unavailable",
439
+ "param_reason": "" if app_params else "missing_note_id_or_share_text",
440
+ },
441
+ {
442
+ "route_label": "app_v2_image",
443
+ "endpoint": APP_V2_IMAGE_ENDPOINT,
444
+ "method": "GET",
445
+ "params": dict(app_params),
446
+ "body": None,
447
+ "param_readiness": "ready" if app_params else "unavailable",
448
+ "param_reason": "" if app_params else "missing_note_id_or_share_text",
449
+ },
450
+ {
451
+ "route_label": "app_v2_mixed",
452
+ "endpoint": APP_V2_MIXED_ENDPOINT,
453
+ "method": "GET",
454
+ "params": dict(app_params),
455
+ "body": None,
456
+ "param_readiness": "ready" if app_params else "unavailable",
457
+ "param_reason": "" if app_params else "missing_note_id_or_share_text",
458
+ },
459
+ {
460
+ "route_label": "app_v1_v2",
461
+ "endpoint": APP_V1_V2_ENDPOINT,
462
+ "method": "GET",
463
+ "params": dict(app_params),
464
+ "body": None,
465
+ "param_readiness": "ready" if app_params else "unavailable",
466
+ "param_reason": "" if app_params else "missing_note_id_or_share_text",
467
+ },
468
+ {
469
+ "route_label": "app_v1",
470
+ "endpoint": APP_V1_ENDPOINT,
471
+ "method": "GET",
472
+ "params": dict(app_params),
473
+ "body": None,
474
+ "param_readiness": "ready" if app_params else "unavailable",
475
+ "param_reason": "" if app_params else "missing_note_id_or_share_text",
476
+ },
477
+ {
478
+ "route_label": "web_v2_v3",
479
+ "endpoint": WEB_V2_V3_ENDPOINT,
480
+ "method": "GET",
481
+ "params": {"short_url": share_text} if short_url_ready else {},
482
+ "body": None,
483
+ "param_readiness": "ready" if short_url_ready else "unavailable",
484
+ "param_reason": "" if short_url_ready else "missing_short_share_url",
485
+ },
486
+ {
487
+ "route_label": "web_v2_v2",
488
+ "endpoint": WEB_V2_V2_ENDPOINT,
489
+ "method": "GET",
490
+ "params": {"note_id": note_id} if note_id else {},
491
+ "body": None,
492
+ "param_readiness": "ready" if note_id else "unavailable",
493
+ "param_reason": "" if note_id else "missing_note_id",
494
+ },
495
+ {
496
+ "route_label": "web_v1_v7",
497
+ "endpoint": WEB_V1_V7_ENDPOINT,
498
+ "method": "GET",
499
+ "params": dict(web_params),
500
+ "body": None,
501
+ "param_readiness": "ready" if web_params else "unavailable",
502
+ "param_reason": "" if web_params else "missing_note_id_or_share_text",
503
+ },
504
+ {
505
+ "route_label": "web_v1_v5",
506
+ "endpoint": WEB_V1_V5_ENDPOINT,
507
+ "method": "POST",
508
+ "params": {},
509
+ "body": {
510
+ "note_id": note_id,
511
+ "xsec_token": xsec_token,
512
+ "cookie": web_cookie,
513
+ } if web_v5_ready else None,
514
+ "param_readiness": "ready" if web_v5_ready else "unavailable",
515
+ "param_reason": (
516
+ ""
517
+ if web_v5_ready
518
+ else "missing_note_id"
519
+ if not note_id
520
+ else "missing_xsec_token"
521
+ if not xsec_token
522
+ else "fallback_requires_cookie"
523
+ ),
524
+ },
525
+ {
526
+ "route_label": "web_v1_v4",
527
+ "endpoint": WEB_V1_V4_ENDPOINT,
528
+ "method": "GET",
529
+ "params": dict(web_params),
530
+ "body": None,
531
+ "param_readiness": "ready" if web_params else "unavailable",
532
+ "param_reason": "" if web_params else "missing_note_id_or_share_text",
533
+ },
534
+ {
535
+ "route_label": "web_v1_v2",
536
+ "endpoint": WEB_V1_V2_ENDPOINT,
537
+ "method": "GET",
538
+ "params": dict(web_params),
539
+ "body": None,
540
+ "param_readiness": "ready" if web_params else "unavailable",
541
+ "param_reason": "" if web_params else "missing_note_id_or_share_text",
542
+ },
543
+ ]
544
+
545
+
323
546
  def _pick_text_from_paths(payload: Any, paths: List[List[str]]) -> str:
324
547
  for path in paths:
325
548
  raw = deep_find_first(payload, path)
@@ -817,18 +1040,35 @@ def _fetch_note_info(
817
1040
  progress: Optional[ProgressReporter] = None,
818
1041
  ) -> Dict[str, Any]:
819
1042
  attempts: List[Dict[str, Any]] = []
1043
+ routes = _build_note_fetch_routes(source_input)
1044
+ route_plan = [
1045
+ build_route_plan_entry(
1046
+ route_label=str(route["route_label"]),
1047
+ endpoint=str(route["endpoint"]),
1048
+ method=str(route["method"]),
1049
+ param_readiness=str(route.get("param_readiness") or "ready"),
1050
+ param_reason=str(route.get("param_reason") or ""),
1051
+ )
1052
+ for route in routes
1053
+ ]
820
1054
 
821
- share_text = source_input.get("share_text")
822
- note_id = source_input.get("note_id") or _extract_note_id_from_share(share_text)
823
-
824
- def _call(path: str, params: Dict[str, Any], label: str, fallback_reason: Optional[str] = None) -> Dict[str, Any]:
1055
+ def _call(
1056
+ *,
1057
+ path: str,
1058
+ method: str,
1059
+ params: Optional[Dict[str, Any]],
1060
+ body: Optional[Dict[str, Any]],
1061
+ label: str,
1062
+ fallback_reason: Optional[str] = None,
1063
+ ) -> Dict[str, Any]:
825
1064
  response = call_json_api(
826
1065
  base_url=base_url,
827
1066
  path=path,
828
1067
  token=token,
829
- method="GET",
1068
+ method=method,
830
1069
  timeout_ms=timeout_ms,
831
1070
  params=params,
1071
+ body=body,
832
1072
  )
833
1073
  response["_endpoint"] = path
834
1074
  response["_route_label"] = label
@@ -843,78 +1083,64 @@ def _fetch_note_info(
843
1083
  "core_ready": False,
844
1084
  }
845
1085
  _emit_http_progress(progress, stage="note.fetch", response=response, route_label=label, source_input=source_input)
846
- attempts.append({"label": label, "endpoint": path, "response": response})
847
1086
  return response
848
1087
 
849
- app_params: Dict[str, Any] = {}
850
- if share_text:
851
- app_params["share_text"] = share_text
852
- if note_id:
853
- app_params["note_id"] = note_id
854
-
855
- app_v2_attempts = [
856
- (APP_V2_VIDEO_ENDPOINT, "app_v2_video"),
857
- (APP_V2_IMAGE_ENDPOINT, "app_v2_image"),
858
- (APP_V2_MIXED_ENDPOINT, "app_v2_mixed"),
859
- ]
860
1088
  next_reason: Optional[str] = None
1089
+ final_response: Dict[str, Any] = {
1090
+ "ok": False,
1091
+ "error_reason": "single_fetch_all_routes_failed",
1092
+ "_endpoint": None,
1093
+ "_route_label": "",
1094
+ }
861
1095
 
862
- for path, label in app_v2_attempts:
863
- app_v2_response = _call(path, app_params, label, fallback_reason=next_reason)
864
- if _route_success_for_note(app_v2_response, source_input):
865
- app_v2_response["_attempts"] = attempts
866
- return app_v2_response
867
- if app_v2_response.get("ok"):
868
- app_v2_response["fallback_trigger_reason"] = "field_completeness_below_threshold"
869
- next_reason = "field_completeness_below_threshold" if app_v2_response.get("ok") else (
870
- "primary_timeout_retry_exhausted" if app_v2_response.get("timeout_retry_exhausted") else "primary_non_timeout_failure"
871
- )
1096
+ for route in routes:
1097
+ if route.get("param_readiness") != "ready":
1098
+ attempts.append(
1099
+ _build_unavailable_attempt(
1100
+ route_label=str(route["route_label"]),
1101
+ endpoint=str(route["endpoint"]),
1102
+ method=str(route["method"]),
1103
+ reason=str(route.get("param_reason") or "fallback_param_unavailable"),
1104
+ )
1105
+ )
1106
+ continue
872
1107
 
873
- app_response = _call(APP_V1_ENDPOINT, app_params, "app_v1", fallback_reason=next_reason)
874
- if _route_success_for_note(app_response, source_input):
875
- app_response["_attempts"] = attempts
876
- return app_response
877
- if app_response.get("ok"):
878
- app_response["fallback_trigger_reason"] = "field_completeness_below_threshold"
879
-
880
- app_fallback_reason = (
881
- "field_completeness_below_threshold"
882
- if app_response.get("ok")
883
- else ("primary_timeout_retry_exhausted" if app_response.get("timeout_retry_exhausted") else "primary_non_timeout_failure")
884
- )
885
- is_short = _is_short_share_url(share_text)
886
-
887
- if is_short and share_text:
888
- v3_response = _call(
889
- WEB_V2_V3_ENDPOINT,
890
- {"short_url": share_text},
891
- "web_v2_v3_short",
892
- fallback_reason=app_fallback_reason,
1108
+ response = _call(
1109
+ path=str(route["endpoint"]),
1110
+ method=str(route["method"]),
1111
+ params=dict(route.get("params") or {}),
1112
+ body=dict(route.get("body") or {}) if isinstance(route.get("body"), dict) else None,
1113
+ label=str(route["route_label"]),
1114
+ fallback_reason=next_reason,
893
1115
  )
894
- if v3_response.get("ok"):
895
- v3_response["_attempts"] = attempts
896
- return v3_response
897
-
898
- if note_id:
899
- v2_response = _call(
900
- WEB_V2_V2_ENDPOINT,
901
- {"note_id": note_id},
902
- "web_v2_v2_note_id",
903
- fallback_reason=app_fallback_reason,
1116
+ decision = _route_accept_decision(response, source_input)
1117
+ attempts.append(
1118
+ build_attempted_route(
1119
+ route_label=str(route["route_label"]),
1120
+ endpoint=str(route["endpoint"]),
1121
+ response=response,
1122
+ accepted=bool(decision.get("accepted")),
1123
+ accept_reason=str(decision.get("accept_reason") or ""),
1124
+ fallback_reason=str(decision.get("fallback_reason") or ""),
1125
+ extra={
1126
+ "method": str(route["method"]).upper(),
1127
+ "field_completeness": response.get("_field_completeness"),
1128
+ "response": response,
1129
+ },
1130
+ )
904
1131
  )
905
- if v2_response.get("ok"):
906
- v2_response["_attempts"] = attempts
907
- return v2_response
908
-
909
- web_params: Dict[str, Any] = {}
910
- if share_text:
911
- web_params["share_text"] = share_text
912
- if note_id:
913
- web_params["note_id"] = note_id
1132
+ final_response = response
1133
+ if decision.get("accepted"):
1134
+ response["_attempts"] = attempts
1135
+ response["_route_plan"] = route_plan
1136
+ response["_accept_reason"] = decision.get("accept_reason")
1137
+ return response
1138
+ next_reason = str(decision.get("fallback_reason") or "field_completeness_below_threshold")
1139
+ response["fallback_trigger_reason"] = next_reason
914
1140
 
915
- web_response = _call(WEB_ENDPOINT, web_params, "web_v7", fallback_reason=app_fallback_reason)
916
- web_response["_attempts"] = attempts
917
- return web_response
1141
+ final_response["_attempts"] = attempts
1142
+ final_response["_route_plan"] = route_plan
1143
+ return final_response
918
1144
 
919
1145
 
920
1146
  def _extract_subtitle_urls(payload: Any) -> List[str]:
@@ -1344,6 +1570,7 @@ def _build_result(
1344
1570
  metadata_fields: Optional[Dict[str, Any]] = None,
1345
1571
  asr_source: Optional[str] = None,
1346
1572
  timings: Optional[Dict[str, int]] = None,
1573
+ stage_status: Optional[Dict[str, Any]] = None,
1347
1574
  ) -> Dict[str, Any]:
1348
1575
  metadata = metadata_fields or {}
1349
1576
  summary_block = summarize_content(raw_content, source=f"xiaohongshu:{text_source}")
@@ -1371,7 +1598,7 @@ def _build_result(
1371
1598
  analysis_eligibility = "eligible" if primary_text else "incomplete"
1372
1599
  analysis_exclusion_reason = "" if analysis_eligibility == "eligible" else ("video_asr_unavailable" if work_modality == "video" else "caption_raw_missing")
1373
1600
 
1374
- return {
1601
+ payload = {
1375
1602
  "platform": "xiaohongshu",
1376
1603
  "content_kind": "note",
1377
1604
  "source": source_input,
@@ -1427,6 +1654,9 @@ def _build_result(
1427
1654
  "request_id": request_id,
1428
1655
  "timings": dict(timings or {}),
1429
1656
  }
1657
+ if isinstance(stage_status, dict):
1658
+ payload["stage_status"] = dict(stage_status)
1659
+ return payload
1430
1660
 
1431
1661
 
1432
1662
  def run_xiaohongshu_extract(
@@ -1601,11 +1831,35 @@ def run_xiaohongshu_extract(
1601
1831
  timings["u1_total_ms"] = _elapsed_ms(u1_started_at)
1602
1832
 
1603
1833
  attempts = note_response.get("_attempts") or []
1834
+ stage_status = build_stage_status(
1835
+ stage="fetch",
1836
+ status="succeeded" if note_response.get("ok") else "failed",
1837
+ route_plan=list(note_response.get("_route_plan") or []),
1838
+ attempted_routes=list(attempts),
1839
+ chosen_route=str(note_response.get("_route_label") or ""),
1840
+ accept_reason=str(note_response.get("_accept_reason") or ""),
1841
+ fallback_reason=str(note_response.get("fallback_trigger_reason") or ""),
1842
+ error_reason=None if note_response.get("ok") else "single_fetch_all_routes_failed",
1843
+ all_routes_failed=not bool(note_response.get("ok")),
1844
+ )
1604
1845
  for index, attempt in enumerate(attempts, start=1):
1605
1846
  response = attempt.get("response") if isinstance(attempt, dict) else None
1606
1847
  endpoint = attempt.get("endpoint") if isinstance(attempt, dict) else None
1607
- label = attempt.get("label") if isinstance(attempt, dict) else None
1848
+ label = attempt.get("route_label") if isinstance(attempt, dict) else None
1608
1849
  if not isinstance(response, dict):
1850
+ if attempt.get("skipped"):
1851
+ trace.append(
1852
+ {
1853
+ "step": f"u1_get_note_info_attempt_{index}",
1854
+ "route_label": label,
1855
+ "endpoint": endpoint,
1856
+ "accept_reason": attempt.get("accept_reason"),
1857
+ "fallback_reason": attempt.get("fallback_reason"),
1858
+ "param_readiness": attempt.get("param_readiness"),
1859
+ "param_reason": attempt.get("param_reason"),
1860
+ "skipped": True,
1861
+ }
1862
+ )
1609
1863
  continue
1610
1864
  step = "u1_get_note_info_effective" if index == len(attempts) else f"u1_get_note_info_attempt_{index}"
1611
1865
  trace.append(
@@ -1625,10 +1879,7 @@ def run_xiaohongshu_extract(
1625
1879
  trace.append(
1626
1880
  {
1627
1881
  "step": "u1_get_note_info_route_decision",
1628
- "chosen_route": note_response.get("_route_label"),
1629
- "request_id": note_response.get("request_id"),
1630
- "field_completeness": note_response.get("_field_completeness"),
1631
- "attempt_count": len(attempts),
1882
+ **stage_status,
1632
1883
  }
1633
1884
  )
1634
1885
 
@@ -1636,7 +1887,7 @@ def run_xiaohongshu_extract(
1636
1887
  error_ctx = resolve_trace_error_context(
1637
1888
  responses=[note_response],
1638
1889
  extract_trace=trace,
1639
- default_error_reason="u1_get_note_info_failed",
1890
+ default_error_reason="single_fetch_all_routes_failed",
1640
1891
  )
1641
1892
  result = _build_result(
1642
1893
  source_input=source_input,
@@ -1660,6 +1911,7 @@ def run_xiaohongshu_extract(
1660
1911
  missing_fields=[{"field": "u1_note_info", "reason": "all_routes_failed"}],
1661
1912
  metadata_fields=metadata_fields,
1662
1913
  timings=timings,
1914
+ stage_status={"fetch": stage_status},
1663
1915
  )
1664
1916
  if write_card:
1665
1917
  card_started_at = time.perf_counter()
@@ -1835,6 +2087,7 @@ def run_xiaohongshu_extract(
1835
2087
  missing_fields=missing_fields,
1836
2088
  metadata_fields=metadata_fields,
1837
2089
  timings=timings,
2090
+ stage_status={"fetch": stage_status},
1838
2091
  )
1839
2092
  if write_card:
1840
2093
  card_started_at = time.perf_counter()
@@ -1900,6 +2153,7 @@ def run_xiaohongshu_extract(
1900
2153
  missing_fields=missing_fields,
1901
2154
  metadata_fields=metadata_fields,
1902
2155
  timings=timings,
2156
+ stage_status={"fetch": stage_status},
1903
2157
  )
1904
2158
  if write_card:
1905
2159
  card_started_at = time.perf_counter()
@@ -2046,6 +2300,7 @@ def run_xiaohongshu_extract(
2046
2300
  missing_fields=missing_fields,
2047
2301
  metadata_fields=metadata_fields,
2048
2302
  timings=timings,
2303
+ stage_status={"fetch": stage_status},
2049
2304
  )
2050
2305
  if write_card:
2051
2306
  card_started_at = time.perf_counter()
@@ -2110,6 +2365,7 @@ def run_xiaohongshu_extract(
2110
2365
  missing_fields=missing_fields,
2111
2366
  metadata_fields=metadata_fields,
2112
2367
  timings=timings,
2368
+ stage_status={"fetch": stage_status},
2113
2369
  )
2114
2370
 
2115
2371
  if write_card:
@@ -2184,6 +2440,7 @@ def run_xiaohongshu_extract(
2184
2440
  missing_fields=missing_fields,
2185
2441
  metadata_fields=metadata_fields,
2186
2442
  timings=timings,
2443
+ stage_status={"fetch": stage_status},
2187
2444
  )
2188
2445
 
2189
2446
  if write_card:
@@ -295,7 +295,7 @@ def build_work_output_envelope(payload: Dict[str, Any], platform: Optional[str]
295
295
  card = build_work_fact_card(payload, platform=platform)
296
296
  source = _source_dict(payload)
297
297
  input_value = source.get("share_url") or source.get("share_text") or source.get("source_url") or source
298
- return {
298
+ envelope = {
299
299
  "object_type": "work",
300
300
  "platform": card["platform"],
301
301
  "input": input_value,
@@ -306,6 +306,13 @@ def build_work_output_envelope(payload: Dict[str, Any], platform: Optional[str]
306
306
  "extract_trace": card.get("extract_trace", []),
307
307
  "request_id": card["request_id"],
308
308
  }
309
+ stage_status = payload.get("stage_status")
310
+ if isinstance(stage_status, dict):
311
+ envelope["stage_status"] = stage_status
312
+ pipeline_status = payload.get("pipeline_status")
313
+ if isinstance(pipeline_status, dict):
314
+ envelope["pipeline_status"] = pipeline_status
315
+ return envelope
309
316
 
310
317
 
311
318
  def _yaml_scalar(value: Any) -> str: