npm - @tikomni/skills - Versions diffs - 0.1.7 → 0.1.8 - Mend

@tikomni/skills 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/skills/social-media-crawl/scripts/pipelines/media_url_rules.py ADDED Viewed

@@ -0,0 +1,86 @@
+#!/usr/bin/env python3
+"""Shared media URL classification helpers."""
+from __future__ import annotations
+from typing import Iterable, List
+def _is_http_url(url: str) -> bool:
+    lower = (url or "").lower()
+    return lower.startswith("http://") or lower.startswith("https://")
+def is_probable_image_url(url: str) -> bool:
+    lower = (url or "").lower()
+    if not _is_http_url(lower):
+        return False
+    image_tokens = [
+        ".jpg",
+        ".jpeg",
+        ".png",
+        ".webp",
+        ".gif",
+        "imageview2",
+        "imagemogr2",
+        "redimage",
+        "frame/",
+        "sns-img",
+        "sns-webpic",
+        "notes_pre_post",
+        "/image/",
+        "/img/",
+    ]
+    return any(token in lower for token in image_tokens)
+def is_probable_audio_url(url: str) -> bool:
+    lower = (url or "").lower()
+    if not _is_http_url(lower):
+        return False
+    audio_tokens = [
+        ".m4a",
+        ".mp3",
+        ".aac",
+        ".wav",
+        ".flac",
+        ".ogg",
+        "/audio/",
+        "sns-audio",
+        "redaudio",
+    ]
+    return any(token in lower for token in audio_tokens)
+def is_probable_video_url(url: str) -> bool:
+    lower = (url or "").lower()
+    if not _is_http_url(lower):
+        return False
+    if is_probable_image_url(lower) or is_probable_audio_url(lower):
+        return False
+    video_tokens = [
+        ".mp4",
+        ".m3u8",
+        ".mov",
+        ".flv",
+        "/video/",
+        "sns-video",
+        "redvideo",
+        "play",
+        "stream",
+        "master",
+        "vod",
+    ]
+    return any(token in lower for token in video_tokens)
+def filter_video_urls(urls: Iterable[str]) -> List[str]:
+    unique: List[str] = []
+    seen = set()
+    for raw in urls:
+        url = str(raw or "").strip()
+        if not url or url in seen or not is_probable_video_url(url):
+            continue
+        unique.append(url)
+        seen.add(url)
+    return unique

package/skills/social-media-crawl/scripts/pipelines/platform_adapters.py CHANGED Viewed

@@ -13,6 +13,14 @@ from scripts.pipelines.schema import (
     validate_work_item,
     validate_works_collection,
 )
+from scripts.pipelines.douyin_metadata import (
+    extract_douyin_author,
+    extract_douyin_caption,
+    extract_douyin_metrics,
+    extract_douyin_title,
+    normalize_douyin_author_handle,
+)
+from scripts.pipelines.media_url_rules import is_probable_video_url as is_shared_probable_video_url
 from scripts.core.tikomni_common import deep_find_all, deep_find_first
 from scripts.pipelines.select_low_quality_video_url import select_low_quality_video_url
@@ -117,12 +125,7 @@ def _normalize_douyin_tags(value: Any) -> List[str]:
 def _is_probable_video_url(url: str) -> bool:
-    lower = (url or "").lower()
-    if not (lower.startswith("http://") or lower.startswith("https://")):
-        return False
-    if any(token in lower for token in [".jpg", ".jpeg", ".png", ".webp", "image", "img"]):
-        return False
-    return any(token in lower for token in [".mp4", ".m3u8", ".m4a", "video", "stream", "play"])
+    return is_shared_probable_video_url(url)
 def _extract_douyin_video_down_url(item: Dict[str, Any]) -> str:
@@ -348,14 +351,21 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
     internal_author_id = _t(_first(profile_data, ["sec_user_id", "sec_uid"], raw.get("resolved_author_id")))
     stable_author_id = _t(_first(profile_data, ["uid", "user_id", "id"]))
-    author_handle = _t(_first(profile_data, ["short_id", "unique_id", "douyin_id", "display_id"]))
+    author_handle = normalize_douyin_author_handle(
+        _first(profile_data, ["unique_id"]),
+        _first(profile_data, ["short_id"]),
+        _first(profile_data, ["douyin_id"]),
+        _first(profile_data, ["display_id"]),
+        _first(profile_data, ["nickname", "name"]),
+    )
+    nickname = _t(_first(profile_data, ["nickname", "name"]))
     author_id = internal_author_id or stable_author_id
     profile = build_author_profile(
         platform="douyin",
         platform_author_id=author_id,
         author_handle=author_handle,
-        nickname=_t(_first(profile_data, ["nickname", "name"])),
+        nickname=nickname,
         ip_location=_t(_first(profile_data, ["ip_location", "ip_label", "ipLocation"])),
         fans_count=_i(_first(profile_data, ["follower_count", "fans_count", "mplatform_followers_count"])),
         liked_count=_i(_first(profile_data, ["total_favorited", "liked_count", "favoriting_count"])),
@@ -383,23 +393,27 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
         if not isinstance(item, dict):
             continue
         aweme_id = _t(_first(item, ["aweme_id", "item_id", "id"]))
-        metrics = {
-            "like": _i(_first(item, ["digg_count", "like_count"], 0)),
-            "comment": _i(_first(item, ["comment_count"], 0)),
-            "collect": _i(_first(item, ["collect_count"], 0)),
-            "share": _i(_first(item, ["share_count"], 0)),
-            "play": _optional_i(_first(item, ["play_count", "view_count"], None)),
-        }
+        author_info = extract_douyin_author(item)
+        metrics = extract_douyin_metrics(item)
         video_down_url = _extract_douyin_video_down_url(item)
         tags = _normalize_douyin_tags(_first(item, ["hashtags", "tags", "text_extra"], []))
+        work_author_handle = normalize_douyin_author_handle(
+            author_info.get("author_handle"),
+            author_handle,
+            nickname,
+        )
+        work_platform_author_id = _t(author_info.get("platform_author_id") or author_id)
+        work_author_platform_id = _t(author_info.get("author_platform_id") or stable_author_id or author_id)
+        work_nickname = _t(author_info.get("nickname") or nickname)
+        work_signature = _t(author_info.get("signature") or profile.get("signature"))
         work = build_work_item(
             platform="douyin",
             platform_work_id=aweme_id,
-            platform_author_id=author_id,
-            author_handle=author_handle,
-            author_platform_id=stable_author_id or author_id,
-            title=_t(_first(item, ["title"])),
-            caption_raw=_t(_first(item, ["desc"])),
+            platform_author_id=work_platform_author_id,
+            author_handle=work_author_handle,
+            author_platform_id=work_author_platform_id,
+            title=extract_douyin_title(item),
+            caption_raw=extract_douyin_caption(item),
             subtitle_raw="",
             subtitle_source="missing",
             publish_time=_t(_first(item, ["create_time", "publish_time"])),
@@ -407,7 +421,12 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
             content_type="video",
             duration_ms=_i(_first(item, ["duration_ms", "duration"], 0)),
             tags=tags,
-            metrics=metrics,
+            metrics={
+                "digg_count": int(metrics.get("digg_count") or 0),
+                "comment_count": int(metrics.get("comment_count") or 0),
+                "collect_count": int(metrics.get("collect_count") or 0),
+                "share_count": int(metrics.get("share_count") or 0),
+            },
             cover_image=(
                 _extract_first_url(_first(item, ["cover_url"], ""))
                 or _extract_first_url(_first(item, ["cover"], ""))
@@ -420,18 +439,31 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
             asr_error_reason="",
             asr_source="fallback_none",
             platform_native_refs={
-                "douyin_sec_uid": internal_author_id,
-                "douyin_aweme_author_id": stable_author_id or author_id,
+                "douyin_sec_uid": _t(author_info.get("douyin_sec_uid") or internal_author_id),
+                "douyin_aweme_author_id": _t(author_info.get("douyin_aweme_author_id") or stable_author_id or author_id),
+                "douyin_unique_id": _t(author_info.get("unique_id")),
             },
             raw_ref={"aweme_id": aweme_id, "raw_item": item},
         )
         work.update(
             {
-                "digg_count": metrics["like"],
-                "comment_count": metrics["comment"],
-                "collect_count": metrics["collect"],
-                "share_count": metrics["share"],
-                "play_count": metrics["play"],
+                "author": {
+                    "author_handle": work_author_handle,
+                    "platform_author_id": work_platform_author_id,
+                    "author_platform_id": work_author_platform_id,
+                    "douyin_sec_uid": _t(author_info.get("douyin_sec_uid") or internal_author_id),
+                    "douyin_aweme_author_id": _t(author_info.get("douyin_aweme_author_id") or stable_author_id or author_id),
+                    "unique_id": _t(author_info.get("unique_id")),
+                    "nickname": work_nickname,
+                    "signature": work_signature,
+                },
+                "nickname": work_nickname,
+                "signature": work_signature,
+                "digg_count": int(metrics.get("digg_count") or 0),
+                "comment_count": int(metrics.get("comment_count") or 0),
+                "collect_count": int(metrics.get("collect_count") or 0),
+                "share_count": int(metrics.get("share_count") or 0),
+                "play_count": metrics.get("play_count"),
             }
         )
@@ -448,16 +480,18 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
     author_id = _t(_first(profile_data, ["user_id", "userid", "id"], raw.get("resolved_author_id")))
     author_handle = _t(_first(profile_data, ["red_id", "redid", "display_id", "username"]))
+    nickname = _t(_first(profile_data, ["nickname", "name"]))
+    signature = _t(_first(profile_data, ["desc", "signature", "bio", "introduction"]))
     profile = build_author_profile(
         platform="xiaohongshu",
         platform_author_id=author_id,
         author_handle=author_handle,
-        nickname=_t(_first(profile_data, ["nickname", "name"])),
+        nickname=nickname,
         ip_location=_t(_first(profile_data, ["ip_location", "ip_location_desc", "ipLocation"])),
         fans_count=_i(_first(profile_data, ["fans", "fans_count", "follower_count", "followers"])),
         liked_count=_i(_first(profile_data, ["liked_count", "likes", "total_liked", "like_count"])),
         collected_count=_i(_first(profile_data, ["collected_count", "collect_count", "total_collected", "favorite_count"])),
-        signature=_t(_first(profile_data, ["desc", "signature", "bio", "introduction"])),
+        signature=signature,
         avatar_url=_extract_xhs_avatar_url(profile_data),
         works_count=_i(_first(profile_data, ["notes", "note_count", "works_count", "post_count"])),
         verified=bool(_first(profile_data, ["official_verified", "verified"], False)),
@@ -480,6 +514,8 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
             "share": _i(_first(item, ["share_count"], 0)),
             "play": _optional_i(_first(item, ["view_count", "play_count"], None)),
         }
+        if (metrics["play"] or 0) <= 0 and max(metrics["like"], metrics["comment"], metrics["collect"], metrics["share"]) > 0:
+            metrics["play"] = None
         subtitle_inline = _extract_xhs_subtitle_inline(item)
         subtitle_urls = _extract_xhs_subtitle_urls(item)
         video_down_url = _extract_xhs_video_down_url(item)
@@ -489,6 +525,8 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
         cover_image = _extract_xhs_cover_image(item)
         source_url = _extract_xhs_source_url(item, note_id)
         share_url = _extract_xhs_share_url(item, note_id)
+        work_nickname = nickname
+        work_signature = signature
         work = build_work_item(
             platform="xiaohongshu",
@@ -523,6 +561,15 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
         )
         work.update(
             {
+                "author": {
+                    "author_handle": author_handle,
+                    "platform_author_id": author_id,
+                    "author_platform_id": author_id,
+                    "nickname": work_nickname,
+                    "signature": work_signature,
+                },
+                "nickname": work_nickname,
+                "signature": work_signature,
                 "digg_count": metrics["like"],
                 "comment_count": metrics["comment"],
                 "collect_count": metrics["collect"],

package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py CHANGED Viewed

@@ -27,10 +27,15 @@ from scripts.core.config_loader import config_get, load_tikomni_config, resolve_
 from scripts.core.progress_report import build_progress_reporter
 from scripts.core.storage_router import resolve_author_directory_name
 from scripts.core.tikomni_common import resolve_runtime, write_json_stdout
+from scripts.pipelines.input_contracts import normalize_douyin_creator_input
+from scripts.pipelines.schema import build_author_profile
 from scripts.pipelines.douyin_creator_home_helpers import collect_and_adapt
 from scripts.pipelines.home_asr import enrich_author_home_asr
 from scripts.writers.write_work_fact_card import build_work_fact_card, persist_output_envelope, write_work_fact_card
+DEFAULT_MAX_ITEMS = 200
+MAX_ITEMS_HARD_LIMIT = 200
 def _write_collection_artifacts(
     *,
@@ -81,11 +86,12 @@ def run_douyin_creator_home(
     *,
     input_value: str,
     config: Dict[str, Any],
-    runtime: Dict[str, Any],
+    runtime: Dict[str, Any] | None,
     max_items: int,
     write_card: bool,
     persist_output: bool,
 ) -> Dict[str, Any]:
+    bounded_max_items = max(1, min(int(max_items), MAX_ITEMS_HARD_LIMIT))
     progress = build_progress_reporter(
         workflow="social-media-crawl",
         platform="douyin",
@@ -94,15 +100,69 @@ def run_douyin_creator_home(
         scope="workflow",
     )
     progress.started(stage="author_home.workflow", message="douyin author_home workflow started")
+    preflight = normalize_douyin_creator_input(input_value)
+    normalized_input_value = str(preflight.get("input_value") or "")
+    if preflight.get("error_reason"):
+        request_id = ensure_request_id(None, fallback_seed=input_value)
+        empty_profile = build_author_profile(platform="douyin", request_id=request_id)
+        extract_trace = [
+            {
+                "step": "input.preflight",
+                "ok": False,
+                "input_kind": "creator_url_or_sec_uid",
+                "normalized_input_value": normalized_input_value or None,
+                "error_reason": preflight.get("error_reason"),
+                "missing_fields": list(preflight.get("missing_fields") or []),
+            }
+        ]
+        envelope = {
+            "object_type": "creator",
+            "platform": "douyin",
+            "input": input_value,
+            "normalized": {
+                "creator_profile": {**empty_profile, "request_id": request_id, "extract_trace": extract_trace},
+                "work_collection": {
+                    "platform": "douyin",
+                    "platform_author_id": "",
+                    "count": 0,
+                    "items": [],
+                    "request_id": request_id,
+                    "extract_trace": extract_trace,
+                },
+            },
+            "completeness": evaluate_collection(empty_profile, []),
+            "missing_fields": normalize_missing_fields(preflight.get("missing_fields")),
+            "error_reason": str(preflight.get("error_reason") or "invalid_creator_input"),
+            "extract_trace": extract_trace,
+            "request_id": request_id,
+            "card_write": {
+                "enabled": bool(write_card),
+                "ok": False,
+                "count": 0,
+                "results": [],
+                "reason": "skipped_invalid_input",
+            },
+            "collection_artifacts": {},
+            "output_persist": {"enabled": False, "skipped": True, "reason": "invalid_input"},
+        }
+        progress.done(
+            stage="author_home.workflow",
+            message="douyin author_home workflow finished",
+            data={"request_id": request_id, "works_count": 0, "error_reason": envelope["error_reason"]},
+        )
+        return envelope
+    if runtime is None:
+        raise ValueError("runtime_required_for_valid_input")
     raw, profile, works, missing = collect_and_adapt(
-        input_value=input_value,
+        input_value=normalized_input_value or input_value,
         base_url=runtime["base_url"],
         token=runtime["token"],
         timeout_ms=runtime["timeout_ms"],
         page_size=20,
         pages_max=50,
-        max_items=max(1, int(max_items)),
+        max_items=bounded_max_items,
         progress=progress.child(scope="author_home.collect"),
     )
@@ -138,7 +198,7 @@ def run_douyin_creator_home(
     request_id = ensure_request_id(
         raw.get("request_id") or profile.get("request_id"),
-        fallback_seed=input_value,
+        fallback_seed=normalized_input_value or input_value,
     )
     extract_trace = list(raw.get("extract_trace") or []) + list(asr_bundle.get("trace") or [])
@@ -206,7 +266,12 @@ def main() -> None:
     parser.add_argument("--allow-process-env", action="store_true", help="Allow process env overrides")
     parser.add_argument("--base-url", default=None, help="Override Tikomni base URL")
     parser.add_argument("--timeout-ms", type=int, default=None, help="Override timeout in ms")
-    parser.add_argument("--max-items", type=int, default=5, help="Max works to collect from homepage")
+    parser.add_argument(
+        "--max-items",
+        type=int,
+        default=DEFAULT_MAX_ITEMS,
+        help=f"Max works to collect from homepage (default full crawl, capped at {MAX_ITEMS_HARD_LIMIT})",
+    )
     parser.set_defaults(write_card=True, persist_output=True)
     parser.add_argument("--write-card", dest="write_card", action="store_true", help="Write work fact cards")
     parser.add_argument("--no-write-card", dest="write_card", action="store_false", help="Skip card writing")
@@ -215,6 +280,19 @@ def main() -> None:
     args = parser.parse_args()
     config, _ = load_tikomni_config(args.config, env_file=args.env_file, allow_process_env=args.allow_process_env)
+    preflight = normalize_douyin_creator_input(args.input)
+    if preflight.get("error_reason"):
+        write_json_stdout(
+            run_douyin_creator_home(
+                input_value=args.input,
+                config=config,
+                runtime=None,
+                max_items=int(args.max_items),
+                write_card=bool(args.write_card),
+                persist_output=bool(args.persist_output),
+            )
+        )
+        return
     runtime = resolve_runtime(
         env_file=args.env_file,
         api_key_env=str(config_get(config, "runtime.auth_env_key", "TIKOMNI_API_KEY")),
@@ -224,7 +302,7 @@ def main() -> None:
     )
     write_json_stdout(
         run_douyin_creator_home(
-            input_value=args.input,
+            input_value=str(preflight.get("input_value") or args.input),
             config=config,
             runtime=runtime,
             max_items=int(args.max_items),

package/skills/social-media-crawl/scripts/pipelines/run_douyin_single_work.py CHANGED Viewed

@@ -31,6 +31,13 @@ from scripts.core.config_loader import config_get, load_tikomni_config
 from scripts.core.extract_pipeline import resolve_trace_error_context
 from scripts.core.progress_report import ProgressReporter, build_progress_reporter
 from scripts.pipelines.douyin_video_type_matrix import normalize_douyin_video_type
+from scripts.pipelines.douyin_metadata import (
+    extract_douyin_author as extract_shared_douyin_author,
+    extract_douyin_caption as extract_shared_douyin_caption,
+    extract_douyin_metrics as extract_shared_douyin_metrics,
+    extract_douyin_title as extract_shared_douyin_title,
+)
+from scripts.pipelines.input_contracts import normalize_douyin_work_input
 from scripts.core.asr_pipeline import derive_asr_clean_text, run_u2_asr_with_timeout_retry
 from scripts.pipelines.select_low_quality_video_url import select_low_quality_video_url
 from scripts.core.tikomni_common import (
@@ -156,14 +163,8 @@ def _normalize_input(
     input_value: Optional[str],
     share_url: Optional[str],
 ) -> Dict[str, Optional[str]]:
-    normalized_share = (share_url or "").strip() or None
-    if input_value and not normalized_share:
-        candidate = input_value.strip()
-        if candidate.startswith("http://") or candidate.startswith("https://"):
-            normalized_share = candidate
-    return {"share_url": normalized_share}
+    normalized = normalize_douyin_work_input(input_value, share_url)
+    return {"share_url": normalize_text(normalized.get("share_url")) or None}
 def _extract_aweme_detail(payload: Any) -> Optional[Dict[str, Any]]:
@@ -238,76 +239,19 @@ def _normalize_duration_ms(item: Dict[str, Any]) -> Optional[int]:
 def _pick_title(item: Dict[str, Any]) -> str:
-    for key in ("item_title", "title", "desc", "preview_title"):
-        value = item.get(key)
-        text = normalize_text(value)
-        if text:
-            return text
-    return ""
+    return extract_shared_douyin_title(item)
 def _pick_desc(item: Dict[str, Any]) -> str:
-    for key in ("desc", "item_title", "title", "preview_title"):
-        value = item.get(key)
-        text = normalize_text(value)
-        if text:
-            return text
-    return ""
+    return extract_shared_douyin_caption(item)
 def _extract_author(item: Dict[str, Any]) -> Dict[str, Optional[str]]:
-    author = item.get("author")
-    if not isinstance(author, dict):
-        author = {}
-    author_platform_id = normalize_text(author.get("uid")) or normalize_text(author.get("id")) or normalize_text(item.get("author_user_id"))
-    author_handle = normalize_text(author.get("short_id")) or normalize_text(author.get("nickname"))
-    douyin_sec_uid = normalize_text(author.get("sec_uid"))
-    douyin_aweme_author_id = normalize_text(item.get("author_user_id")) or author_platform_id
-    return {
-        "author_handle": author_handle or None,
-        "platform_author_id": author_platform_id or None,
-        "author_platform_id": author_platform_id or None,
-        "douyin_sec_uid": douyin_sec_uid or None,
-        "douyin_aweme_author_id": douyin_aweme_author_id or None,
-        "nickname": normalize_text(author.get("nickname")) or None,
-        "signature": normalize_text(author.get("signature")) or None,
-    }
+    return extract_shared_douyin_author(item)
 def _extract_metrics(item: Dict[str, Any]) -> Dict[str, Optional[int]]:
-    statistics = item.get("statistics")
-    if not isinstance(statistics, dict):
-        statistics = {}
-    def metric(*keys: str, default: Optional[int] = 0) -> Optional[int]:
-        for key in keys:
-            value = _safe_int(statistics.get(key))
-            if value is not None:
-                return value
-            value = _safe_int(item.get(key))
-            if value is not None:
-                return value
-        return default
-    metrics = {
-        "digg_count": metric("digg_count"),
-        "comment_count": metric("comment_count"),
-        "collect_count": metric("collect_count"),
-        "share_count": metric("share_count", "forward_count"),
-        "play_count": metric("play_count", default=None),
-    }
-    play_count = metrics.get("play_count")
-    engagement_floor = max(
-        int(metrics.get("digg_count") or 0),
-        int(metrics.get("comment_count") or 0),
-        int(metrics.get("collect_count") or 0),
-        int(metrics.get("share_count") or 0),
-    )
-    if play_count is not None and int(play_count) <= 0 and engagement_floor > 0:
-        metrics["play_count"] = None
-    return metrics
+    return extract_shared_douyin_metrics(item)
 def _extract_platform_work_id(item: Dict[str, Any]) -> Optional[str]:
@@ -633,6 +577,7 @@ def _build_result(
     cover_image: Optional[str] = None,
     asr_source: str = "fallback_none",
     timings: Optional[Dict[str, int]] = None,
+    missing_fields: Optional[List[Dict[str, str]]] = None,
 ) -> Dict[str, Any]:
     summary_block = summarize_content(raw_content, source="douyin:single-video-low-quality")
     insights = list(summary_block.get("insights", []))
@@ -698,13 +643,13 @@ def _build_result(
         "insights": insights,
         "confidence": confidence,
         "error_reason": error_reason,
-        "missing_fields": _build_missing_fields(
+        "missing_fields": list(missing_fields or _build_missing_fields(
             title=title,
             desc=desc,
             platform_work_id=platform_work_id,
             video_down_url=video_down_url,
             author=author,
-        ),
+        )),
         "extract_trace": extract_trace,
         "fallback_trace": fallback_trace,
         "request_id": request_id,
@@ -741,7 +686,8 @@ def run_douyin_single_video(
     workflow_started_at = time.perf_counter()
     timings = _empty_timings()
     parse_started_at = time.perf_counter()
-    source_input = _normalize_input(input_value, share_url)
+    preflight = normalize_douyin_work_input(input_value, share_url)
+    source_input = {"share_url": normalize_text(preflight.get("share_url")) or None}
     timings["url_parse_ms"] = _elapsed_ms(parse_started_at)
     if progress is not None:
         progress.started(
@@ -749,6 +695,66 @@ def run_douyin_single_video(
             message="douyin single_video workflow started",
             data={"analysis_mode": analysis_mode, "write_card": bool(write_card), "persist_output": bool(persist_output)},
         )
+    preflight_trace = [
+        {
+            "step": "input.preflight",
+            "ok": preflight.get("error_reason") is None,
+            "input_kind": "share_url",
+            "normalized_share_url": source_input.get("share_url"),
+            "error_reason": preflight.get("error_reason"),
+            "missing_fields": list(preflight.get("missing_fields") or []),
+        }
+    ]
+    if preflight.get("error_reason"):
+        result = _build_result(
+            source_input=source_input,
+            platform_work_id=None,
+            title="",
+            desc="",
+            duration_ms=None,
+            video_down_url=None,
+            author={"author_handle": None, "author_platform_id": None, "douyin_sec_uid": None, "douyin_aweme_author_id": None, "nickname": None, "signature": None},
+            metrics=_empty_metrics(),
+            tags=[],
+            is_video=False,
+            video_type_reason="invalid_input",
+            raw_content="",
+            confidence="low",
+            error_reason=str(preflight.get("error_reason") or "invalid_share_url"),
+            extract_trace=preflight_trace,
+            fallback_trace=[],
+            request_id=None,
+            u2_task_id=None,
+            u2_task_status="UNKNOWN",
+            u2_gate_reason="invalid_input",
+            analysis_mode=analysis_mode,
+            timings=timings,
+            missing_fields=list(preflight.get("missing_fields") or []),
+        )
+        if write_card:
+            card_started_at = time.perf_counter()
+            result["card_write"] = write_work_fact_card(
+                payload=result,
+                platform="douyin",
+                card_type=card_type,
+                card_root=card_root,
+                content_kind=content_kind,
+                storage_config=storage_config,
+                analysis_mode=analysis_mode,
+                progress=progress.child(scope="card_write") if progress is not None else None,
+            )
+            timings["card_write_ms"] = _elapsed_ms(card_started_at)
+            timings["llm_analysis_ms"] = _safe_int((result.get("card_write") or {}).get("llm_analysis_ms"))
+        timings["total_ms"] = _elapsed_ms(workflow_started_at)
+        result["timings"] = dict(timings)
+        _update_pipeline_status(result)
+        return _finalize_result(
+            result=result,
+            source_input=source_input,
+            platform_work_id=None,
+            storage_config=storage_config,
+            persist_output=persist_output,
+        )
     if not source_input.get("share_url"):
         result = _build_result(
             source_input=source_input,
@@ -765,7 +771,7 @@ def run_douyin_single_video(
             raw_content="",
             confidence="low",
             error_reason="missing_share_url",
-            extract_trace=[],
+            extract_trace=preflight_trace,
             fallback_trace=[],
             request_id=None,
             u2_task_id=None,