npm - @tikomni/skills - Versions diffs - 0.1.1 → 0.1.2 - Mend

@tikomni/skills 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py CHANGED Viewed

@@ -10,9 +10,13 @@ import subprocess
 from typing import Any, Dict, List, Tuple
 from scripts.author_home.analyzers.author_analysis_v2_support import (
+    AnalysisResourceError,
+    OUTPUT_SCHEMA_PATH,
+    PROMPT_CONTRACT_PATH,
     build_author_analysis_input_v1,
     build_fallback_author_analysis_v2,
     derive_legacy_summary,
+    prepare_author_analysis_bundle,
     prompt_contract_text,
     validate_author_analysis_v2,
 )
@@ -106,8 +110,7 @@ def _compact_analysis_input_for_prompt(analysis_input: Dict[str, Any]) -> Dict[s
-def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dict[str, Any]) -> str:
-    contract_prompt = prompt_contract_text()
+def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dict[str, Any], *, contract_prompt: str) -> str:
     prompt_input = _compact_analysis_input_for_prompt(analysis_input)
     prompt_payload = {"author_analysis_input_v1": prompt_input}
     if isinstance(sampled_work_explanations, dict) and sampled_work_explanations.get("sampled_work_explanations"):
@@ -126,6 +129,17 @@ def _build_prompt(analysis_input: Dict[str, Any], sampled_work_explanations: Dic
     )
+def _stage_status(*, status: str, ok_count: int, failed_count: int, degraded_count: int, reason_codes: List[str], failure_kind: str = "") -> Dict[str, Any]:
+    return {
+        "status": status,
+        "ok_count": ok_count,
+        "failed_count": failed_count,
+        "degraded_count": degraded_count,
+        "reason_codes": list(dict.fromkeys([code for code in reason_codes if code])),
+        "failure_kind": failure_kind or None,
+    }
 def _extract_json_block(text: str) -> Dict[str, Any]:
     content = (text or "").strip()
     if not content:
@@ -159,9 +173,29 @@ def _unwrap_author_analysis(payload: Dict[str, Any]) -> Dict[str, Any]:
     return payload
-def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[str, Any]]) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]]]:
-    analysis_input, input_errors = build_author_analysis_input_v1(profile=profile, works=works, platform=str(profile.get("platform") or "unknown"))
-    sampled_work_explanations, sampled_explanation_errors, sampled_explanation_trace = run_sampled_work_batch_explanations(analysis_input)
+def run_prompt_first_author_analysis(
+    profile: Dict[str, Any],
+    works: List[Dict[str, Any]],
+    *,
+    analysis_bundle: Dict[str, Any] | None = None,
+) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]]]:
+    prepared = analysis_bundle if isinstance(analysis_bundle, dict) else prepare_author_analysis_bundle(
+        profile=profile,
+        works=works,
+        platform=str(profile.get("platform") or "unknown"),
+    )
+    analysis_input = prepared.get("analysis_input") if isinstance(prepared.get("analysis_input"), dict) else {}
+    input_errors: List[Dict[str, str]] = []
+    input_resource_error: AnalysisResourceError | None = None
+    try:
+        input_errors = build_author_analysis_input_v1(
+            profile=profile,
+            works=works,
+            platform=str(profile.get("platform") or "unknown"),
+        )[1]
+    except AnalysisResourceError as error:
+        input_resource_error = error
+    sampled_work_explanations, sampled_explanation_errors, sampled_explanation_trace, sampled_explanations_status = run_sampled_work_batch_explanations(analysis_input)
     sampled_works_count = len(analysis_input.get("sampled_works") or [])
     total_works = ((analysis_input.get("aggregate_stats") or {}).get("total_works") if isinstance(analysis_input.get("aggregate_stats"), dict) else 0)
     llm_timeout_sec = max(int(os.getenv("TIKOMNI_AUTHOR_ANALYSIS_TIMEOUT_SEC", str(DEFAULT_ANALYSIS_TIMEOUT_SEC))), 5)
@@ -173,20 +207,46 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
             "total_works": total_works,
             "sampled_works_count": sampled_works_count,
             "prompt_contract": f"prompt-contracts/{AUTHOR_ANALYSIS_PROMPT_FILE}@v1",
+            "contract_path": str(PROMPT_CONTRACT_PATH),
+            "schema_path": str(OUTPUT_SCHEMA_PATH),
             "llm_timeout_sec": llm_timeout_sec,
             "small_sample_skip_threshold": small_sample_skip_threshold,
         }
     ] + sampled_explanation_trace
     if input_errors:
         trace.append({"step": "analysis.input_validation_failed", "error_count": len(input_errors)})
+    if input_resource_error is not None:
+        trace.append({"step": "analysis.input_resource_error", "error": str(input_resource_error)})
+        result = {
+            **derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=[]),
+            "author_analysis_v2": {},
+            "author_analysis_input_v1": analysis_input,
+            "sampled_work_explanations": sampled_work_explanations,
+            "sampled_explanations_status": sampled_explanations_status,
+            "author_analysis_status": _stage_status(
+                status="failed",
+                ok_count=0,
+                failed_count=1,
+                degraded_count=0,
+                reason_codes=[input_resource_error.code],
+                failure_kind="configuration",
+            ),
+            "quality_tier": "failed",
+            "validation": {
+                "ok": False,
+                "errors": [],
+            },
+        }
+        return result, [], trace
     if sampled_explanation_errors:
         trace.append({"step": "analysis.sampled_work_explanations_validation_failed", "error_count": len(sampled_explanation_errors)})
-    prompt = _build_prompt(analysis_input, sampled_work_explanations)
     response_text = ""
     analysis_v2: Dict[str, Any] = {}
     llm_ok = False
     skip_llm = sampled_works_count < small_sample_skip_threshold
+    author_reason_codes: List[str] = []
+    author_status = _stage_status(status="failed", ok_count=0, failed_count=1, degraded_count=0, reason_codes=["analysis_not_started"])
     if skip_llm:
         trace.append(
             {
@@ -196,7 +256,42 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
                 "threshold": small_sample_skip_threshold,
             }
         )
+        author_reason_codes.append("small_sample_below_threshold")
     else:
+        try:
+            contract_prompt = prompt_contract_text()
+            trace.append(
+                {
+                    "step": "analysis.resources_loaded",
+                    "contract_loaded": True,
+                    "contract_chars": len(contract_prompt),
+                }
+            )
+            prompt = _build_prompt(analysis_input, sampled_work_explanations, contract_prompt=contract_prompt)
+        except AnalysisResourceError as error:
+            trace.append({"step": "analysis.resource_error", "error": str(error)})
+            author_status = _stage_status(
+                status="failed",
+                ok_count=0,
+                failed_count=1,
+                degraded_count=0,
+                reason_codes=[error.code],
+                failure_kind="configuration",
+            )
+            result = {
+                **derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
+                "author_analysis_v2": {},
+                "author_analysis_input_v1": analysis_input,
+                "sampled_work_explanations": sampled_work_explanations,
+                "sampled_explanations_status": sampled_explanations_status,
+                "author_analysis_status": author_status,
+                "quality_tier": "failed",
+                "validation": {
+                    "ok": False,
+                    "errors": input_errors,
+                },
+            }
+            return result, input_errors, trace
         try:
             run = subprocess.run(
                 ["openclaw", "agent", "--agent", "main", "--message", prompt, "--json"],
@@ -226,32 +321,124 @@ def run_prompt_first_author_analysis(profile: Dict[str, Any], works: List[Dict[s
             )
         except Exception as error:
             trace.append({"step": "analysis.llm_error", "error": f"{type(error).__name__}:{error}"})
+            author_reason_codes.append("author_llm_runtime_error")
+    validation_errors: List[Dict[str, str]] = []
+    resource_error: AnalysisResourceError | None = None
+    if analysis_v2:
+        try:
+            validation_errors = validate_author_analysis_v2(analysis_v2, analysis_input=analysis_input)
+            trace.append({"step": "analysis.output_schema_loaded", "schema_loaded": True})
+        except AnalysisResourceError as error:
+            resource_error = error
+    if resource_error is not None:
+        trace.append(
+            {
+                "step": "analysis.resource_error",
+                "error": str(resource_error),
+                "contract_path": str(resource_error.path) if resource_error.code == "contract_load_failed" else str(resource_error.path),
+            }
+        )
+        author_status = _stage_status(
+            status="failed",
+            ok_count=0,
+            failed_count=1,
+            degraded_count=0,
+            reason_codes=[resource_error.code],
+            failure_kind="configuration",
+        )
+        result = {
+            **derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
+            "author_analysis_v2": {},
+            "author_analysis_input_v1": analysis_input,
+            "sampled_work_explanations": sampled_work_explanations,
+            "sampled_explanations_status": sampled_explanations_status,
+            "author_analysis_status": author_status,
+            "quality_tier": "failed",
+            "validation": {
+                "ok": False,
+                "errors": input_errors,
+            },
+        }
+        return result, input_errors, trace
+    if not analysis_v2 or validation_errors or skip_llm:
+        try:
+            fallback = build_fallback_author_analysis_v2(analysis_input)
+            fallback_errors = validate_author_analysis_v2(fallback, analysis_input=analysis_input)
+        except AnalysisResourceError as error:
+            trace.append({"step": "analysis.fallback_resource_error", "error": str(error)})
+            author_status = _stage_status(
+                status="failed",
+                ok_count=0,
+                failed_count=1,
+                degraded_count=0,
+                reason_codes=[error.code],
+                failure_kind="configuration",
+            )
+            result = {
+                **derive_legacy_summary({}, analysis_input=analysis_input, validation_errors=input_errors),
+                "author_analysis_v2": {},
+                "author_analysis_input_v1": analysis_input,
+                "sampled_work_explanations": sampled_work_explanations,
+                "sampled_explanations_status": sampled_explanations_status,
+                "author_analysis_status": author_status,
+                "quality_tier": "failed",
+                "validation": {
+                    "ok": False,
+                    "errors": input_errors,
+                },
+            }
+            return result, input_errors, trace
-    validation_errors = validate_author_analysis_v2(analysis_v2, analysis_input=analysis_input) if analysis_v2 else []
-    if not analysis_v2 or validation_errors:
-        fallback = build_fallback_author_analysis_v2(analysis_input)
-        fallback_errors = validate_author_analysis_v2(fallback, analysis_input=analysis_input)
         trace.append(
             {
                 "step": "analysis.fallback_used",
-                "reason": "llm_empty_or_validation_failed",
+                "reason": "small_sample_below_threshold" if skip_llm else "llm_empty_or_validation_failed",
                 "llm_ok": llm_ok,
                 "validation_error_count": len(validation_errors),
                 "fallback_error_count": len(fallback_errors),
             }
         )
         analysis_v2 = fallback
-        validation_errors = input_errors + sampled_explanation_errors + validation_errors + fallback_errors
+        validation_errors = input_errors + validation_errors + fallback_errors
+        author_status = _stage_status(
+            status="fallback",
+            ok_count=1 if analysis_v2 else 0,
+            failed_count=0 if analysis_v2 else 1,
+            degraded_count=0,
+            reason_codes=author_reason_codes or ["fallback_used"],
+            failure_kind="runtime",
+        )
     else:
-        validation_errors = input_errors + sampled_explanation_errors + validation_errors
+        validation_errors = input_errors + validation_errors
         trace.append({"step": "analysis.schema_validation_passed"})
+        author_status = _stage_status(
+            status="full",
+            ok_count=1,
+            failed_count=0,
+            degraded_count=0,
+            reason_codes=[],
+        )
     legacy = derive_legacy_summary(analysis_v2, analysis_input=analysis_input, validation_errors=validation_errors)
+    if author_status.get("status") == "failed":
+        quality_tier = "failed"
+    elif author_status.get("status") == "fallback":
+        quality_tier = "fallback"
+    elif sampled_explanations_status.get("status") != "full":
+        quality_tier = "degraded_author_only"
+    else:
+        quality_tier = "full"
     result = {
         **legacy,
         "author_analysis_v2": analysis_v2,
         "author_analysis_input_v1": analysis_input,
         "sampled_work_explanations": sampled_work_explanations,
+        "sampled_explanations_status": sampled_explanations_status,
+        "author_analysis_status": author_status,
+        "quality_tier": quality_tier,
         "validation": {
             "ok": not bool(validation_errors),
             "errors": validation_errors,

package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py CHANGED Viewed

@@ -13,8 +13,9 @@ from typing import Any, Dict, List, Tuple
 import jsonschema
-PROMPT_CONTRACT_PATH = Path(__file__).resolve().parents[2] / "references" / "prompt-contracts" / "sampled-work-batch-explanations.md"
-SCHEMA_PATH = Path(__file__).resolve().parents[2] / "references" / "schemas" / "sampled-work-batch-explanations.schema.json"
+SKILL_ROOT = Path(__file__).resolve().parents[3]
+PROMPT_CONTRACT_PATH = SKILL_ROOT / "references" / "prompt-contracts" / "sampled-work-batch-explanations.md"
+SCHEMA_PATH = SKILL_ROOT / "references" / "schemas" / "sampled-work-batch-explanations.schema.json"
 DEFAULT_TIMEOUT_SEC = 45
 TEXT_LIMITS = {
     "title": 120,
@@ -24,6 +25,17 @@ TEXT_LIMITS = {
 }
+class SampledExplanationResourceError(RuntimeError):
+    def __init__(self, *, code: str, path: Path, detail: str = "") -> None:
+        self.code = code
+        self.path = path
+        self.detail = detail
+        message = f"{code}:{path}"
+        if detail:
+            message = f"{message}:{detail}"
+        super().__init__(message)
 def _safe_text(value: Any) -> str:
     if value is None:
         return ""
@@ -42,14 +54,13 @@ def _truncate_text(value: Any, limit: int) -> str:
 def _load_json(path: Path) -> Dict[str, Any]:
     try:
         return json.loads(path.read_text(encoding="utf-8"))
-    except Exception:
-        return {}
+    except Exception as error:
+        raise SampledExplanationResourceError(code="schema_load_failed", path=path, detail=f"{type(error).__name__}:{error}") from error
-def _schema_errors(payload: Any) -> List[Dict[str, str]]:
-    schema = _load_json(SCHEMA_PATH)
+def _schema_errors(payload: Any, schema: Dict[str, Any]) -> List[Dict[str, str]]:
     if not schema:
-        return []
+        raise SampledExplanationResourceError(code="schema_empty", path=SCHEMA_PATH)
     try:
         validator = jsonschema.Draft202012Validator(schema)
         errors: List[Dict[str, str]] = []
@@ -64,8 +75,8 @@ def _schema_errors(payload: Any) -> List[Dict[str, str]]:
 def _prompt_contract_text() -> str:
     try:
         return PROMPT_CONTRACT_PATH.read_text(encoding="utf-8").strip()
-    except Exception:
-        return ""
+    except Exception as error:
+        raise SampledExplanationResourceError(code="contract_load_failed", path=PROMPT_CONTRACT_PATH, detail=f"{type(error).__name__}:{error}") from error
 def _extract_json_block(text: str) -> Dict[str, Any]:
@@ -144,40 +155,20 @@ def _compact_input(analysis_input: Dict[str, Any]) -> Dict[str, Any]:
     }
-def _build_prompt(analysis_input: Dict[str, Any]) -> str:
+def _build_prompt(analysis_input: Dict[str, Any], *, contract_text: str) -> str:
     compacted = _compact_input(analysis_input)
     return (
         "请严格根据以下提示词原文输出，结果必须是 JSON 对象，且只输出 JSON。\n"
         "顶层对象必须是 sampled_work_explanations。\n"
         "不得输出 markdown，不得输出解释。\n\n"
         "=== 提示词原文开始 ===\n"
-        f"{_prompt_contract_text()}\n"
+        f"{contract_text}\n"
         "=== 提示词原文结束 ===\n\n"
         "=== 输入数据(JSON) ===\n"
         f"{json.dumps(compacted, ensure_ascii=False)}"
     )
-def _fallback_explanations(analysis_input: Dict[str, Any]) -> Dict[str, Any]:
-    sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
-    explanations: Dict[str, Any] = {}
-    for item in sampled:
-        if not isinstance(item, dict):
-            continue
-        work_id = _safe_text(item.get("platform_work_id"))
-        if not work_id:
-            continue
-        explanations[work_id] = {
-            "why_it_worked_or_failed": f"该样本主要依赖 { _safe_text(item.get('hook_type')) or 'hook' }、{ _safe_text(item.get('structure_type')) or 'structure' } 与 { _safe_text(item.get('content_form')) or 'content_form' } 的组合。",
-            "copyable_elements": [value for value in [_safe_text(item.get("hook_type")), _safe_text(item.get("structure_type")), _safe_text(item.get("cta_type"))] if value],
-            "non_copyable_elements": ["具体个人经历或原始案例背书"],
-            "emotional_triggers": [_safe_text(item.get("hook_type")) or "结果预期"],
-            "cognitive_gap": "观众想知道为什么这个结构能成立，以及自己如何快速套用。",
-            "commercial_signal": "从 CTA 与内容结构看，具备基础商业承接意图，但证据仍有限。",
-        }
-    return {"sampled_work_explanations": explanations}
 def _coverage_errors(payload: Dict[str, Any], analysis_input: Dict[str, Any]) -> List[Dict[str, str]]:
     sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
     explanations = payload.get("sampled_work_explanations") if isinstance(payload.get("sampled_work_explanations"), dict) else {}
@@ -193,24 +184,84 @@ def _coverage_errors(payload: Dict[str, Any], analysis_input: Dict[str, Any]) ->
     return errors
-def run_sampled_work_batch_explanations(analysis_input: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]]]:
+def _classify_runtime_reason(text: str) -> str:
+    lowered = (text or "").lower()
+    if "timeoutexpired" in lowered or "timeout" in lowered:
+        return "timeout"
+    if "rate limit" in lowered or "ratelimit" in lowered or "429" in lowered:
+        return "rate_limit"
+    return "transient_llm_error"
+def _stage_status(*, status: str, ok_count: int, failed_count: int, degraded_count: int, reason_codes: List[str], failure_kind: str = "") -> Dict[str, Any]:
+    return {
+        "status": status,
+        "ok_count": ok_count,
+        "failed_count": failed_count,
+        "degraded_count": degraded_count,
+        "reason_codes": list(dict.fromkeys([code for code in reason_codes if code])),
+        "failure_kind": failure_kind or None,
+    }
+def run_sampled_work_batch_explanations(analysis_input: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dict[str, str]], List[Dict[str, Any]], Dict[str, Any]]:
     sampled = analysis_input.get("sampled_works") if isinstance(analysis_input.get("sampled_works"), list) else []
     trace: List[Dict[str, Any]] = [
         {
             "step": "sampled_work_explanations.input_built",
             "sampled_works_count": len(sampled),
             "prompt_contract": "prompt-contracts/sampled-work-batch-explanations.md@v1",
+            "contract_path": str(PROMPT_CONTRACT_PATH),
+            "schema_path": str(SCHEMA_PATH),
         }
     ]
     if not sampled:
         trace.append({"step": "sampled_work_explanations.skipped", "reason": "empty_sampled_works"})
-        return {"sampled_work_explanations": {}}, [], trace
+        return {"sampled_work_explanations": {}}, [], trace, _stage_status(
+            status="skipped",
+            ok_count=0,
+            failed_count=0,
+            degraded_count=0,
+            reason_codes=["empty_sampled_works"],
+        )
     llm_timeout_sec = max(int(os.getenv("TIKOMNI_SAMPLED_EXPLANATION_TIMEOUT_SEC", str(DEFAULT_TIMEOUT_SEC))), 5)
-    prompt = _build_prompt(analysis_input)
     result: Dict[str, Any] = {}
     errors: List[Dict[str, str]] = []
+    reason_codes: List[str] = []
+    try:
+        contract_text = _prompt_contract_text()
+        schema = _load_json(SCHEMA_PATH)
+        trace.append(
+            {
+                "step": "sampled_work_explanations.resources_loaded",
+                "contract_loaded": True,
+                "contract_chars": len(contract_text),
+                "schema_loaded": True,
+            }
+        )
+    except SampledExplanationResourceError as error:
+        trace.append(
+            {
+                "step": "sampled_work_explanations.resource_error",
+                "contract_loaded": error.code != "contract_load_failed",
+                "contract_chars": 0,
+                "schema_loaded": error.code not in {"schema_load_failed", "schema_empty"},
+                "error": str(error),
+            }
+        )
+        return {"sampled_work_explanations": {}}, [], trace, _stage_status(
+            status="failed",
+            ok_count=0,
+            failed_count=len(sampled),
+            degraded_count=0,
+            reason_codes=[error.code],
+            failure_kind="configuration",
+        )
+    prompt = _build_prompt(analysis_input, contract_text=contract_text)
     try:
         run = subprocess.run(
@@ -238,23 +289,43 @@ def run_sampled_work_batch_explanations(analysis_input: Dict[str, Any]) -> Tuple
             }
         )
     except Exception as error:
-        trace.append({"step": "sampled_work_explanations.llm_error", "error": f"{type(error).__name__}:{error}"})
+        reason_code = _classify_runtime_reason(f"{type(error).__name__}:{error}")
+        trace.append({"step": "sampled_work_explanations.llm_error", "error": f"{type(error).__name__}:{error}", "reason_code": reason_code})
+        return {"sampled_work_explanations": {}}, [], trace, _stage_status(
+            status="degraded",
+            ok_count=0,
+            failed_count=0,
+            degraded_count=len(sampled),
+            reason_codes=[reason_code],
+            failure_kind="runtime",
+        )
-    errors = _schema_errors(result) if result else [{"field": "$", "reason": "empty_result"}]
+    errors = _schema_errors(result, schema) if result else [{"field": "$", "reason": "empty_result"}]
     if not errors:
         errors.extend(_coverage_errors(result, analysis_input))
     if errors:
-        fallback = _fallback_explanations(analysis_input)
-        fallback_errors = _schema_errors(fallback) + _coverage_errors(fallback, analysis_input)
         trace.append(
             {
-                "step": "sampled_work_explanations.fallback_used",
+                "step": "sampled_work_explanations.validation_failed",
                 "reason": "llm_empty_or_validation_failed",
                 "validation_error_count": len(errors),
-                "fallback_error_count": len(fallback_errors),
             }
         )
-        return fallback, errors + fallback_errors, trace
+        reason_codes.append("validation_failed")
+        return {"sampled_work_explanations": {}}, errors, trace, _stage_status(
+            status="degraded",
+            ok_count=0,
+            failed_count=0,
+            degraded_count=len(sampled),
+            reason_codes=reason_codes,
+            failure_kind="runtime",
+        )
     trace.append({"step": "sampled_work_explanations.schema_validation_passed"})
-    return result, [], trace
+    return result, [], trace, _stage_status(
+        status="full",
+        ok_count=len(sampled),
+        failed_count=0,
+        degraded_count=0,
+        reason_codes=[],
+    )

package/skills/creator-analysis/scripts/author_home/asr/home_asr.py CHANGED Viewed

@@ -363,17 +363,68 @@ def _iter_xhs_interface_text_candidates(work: Dict[str, Any]) -> List[Tuple[str,
     return deduped
-def _resolve_xhs_subtitle(work: Dict[str, Any], timeout_ms: int) -> Tuple[str, str, List[str], str]:
+def _classify_xhs_subtitle_failure(*, work: Dict[str, Any], interface_candidates: List[Tuple[str, str]], subtitle_urls: List[str], invalid_reason: str) -> str:
+    raw_ref = work.get("raw_ref") if isinstance(work.get("raw_ref"), dict) else {}
+    has_subtitle_signal = any(
+        normalize_text(raw_ref.get(key))
+        for key in (
+            "subtitle_inline",
+            "subtitle_text",
+            "subtitle_raw",
+            "caption_text",
+            "transcript_text",
+        )
+    )
+    if interface_candidates:
+        return "subtitle_content_invalid"
+    if subtitle_urls:
+        return "subtitle_url_unavailable"
+    if has_subtitle_signal:
+        return "subtitle_structure_unrecognized"
+    if invalid_reason == "subtitle_empty":
+        return "subtitle_missing"
+    return "subtitle_content_invalid"
+def _resolve_xhs_subtitle(work: Dict[str, Any], timeout_ms: int) -> Dict[str, Any]:
     raw_ref = work.get("raw_ref") if isinstance(work.get("raw_ref"), dict) else {}
     subtitle_urls = raw_ref.get("subtitle_urls") if isinstance(raw_ref.get("subtitle_urls"), list) else []
     subtitle_urls = [normalize_text(item) for item in subtitle_urls if normalize_text(item)]
-    for source, candidate in _iter_xhs_interface_text_candidates(work):
-        if _invalid_subtitle_reason(candidate) is None:
-            return candidate, "interface", subtitle_urls, source
+    interface_candidates = _iter_xhs_interface_text_candidates(work)
+    invalid_reasons: List[Dict[str, str]] = []
+    for source, candidate in interface_candidates:
+        invalid_reason = _invalid_subtitle_reason(candidate)
+        if invalid_reason is None:
+            return {
+                "text": candidate,
+                "subtitle_source": "interface",
+                "subtitle_field": source,
+                "subtitle_urls": subtitle_urls,
+                "invalid_reasons": invalid_reasons,
+                "failure_category": "",
+            }
+        invalid_reasons.append({"field": source, "reason": invalid_reason})
     fetched = _fetch_subtitle_text(subtitle_urls, timeout_ms=timeout_ms)
-    return _clean_text(fetched), "url", subtitle_urls, "subtitle_url"
+    cleaned = _clean_text(fetched)
+    fetched_invalid = _invalid_subtitle_reason(cleaned)
+    if fetched_invalid is not None and subtitle_urls:
+        invalid_reasons.append({"field": "subtitle_url", "reason": fetched_invalid})
+    return {
+        "text": cleaned,
+        "subtitle_source": "url" if subtitle_urls else "missing",
+        "subtitle_field": "subtitle_url" if subtitle_urls else "",
+        "subtitle_urls": subtitle_urls,
+        "invalid_reasons": invalid_reasons,
+        "failure_category": _classify_xhs_subtitle_failure(
+            work=work,
+            interface_candidates=interface_candidates,
+            subtitle_urls=subtitle_urls,
+            invalid_reason=fetched_invalid or "subtitle_empty",
+        ),
+    }
 def _dedupe_works_by_platform_id(works: List[Dict[str, Any]]) -> Tuple[List[Dict[str, Any]], int]:
@@ -734,7 +785,11 @@ def enrich_author_home_asr(
                 )
                 continue
-            subtitle_text, subtitle_source, subtitle_urls, subtitle_field = _resolve_xhs_subtitle(work, timeout_ms=timeout_ms)
+            subtitle_probe = _resolve_xhs_subtitle(work, timeout_ms=timeout_ms)
+            subtitle_text = normalize_text(subtitle_probe.get("text"))
+            subtitle_source = normalize_text(subtitle_probe.get("subtitle_source"))
+            subtitle_urls = subtitle_probe.get("subtitle_urls") if isinstance(subtitle_probe.get("subtitle_urls"), list) else []
+            subtitle_field = normalize_text(subtitle_probe.get("subtitle_field"))
             subtitle_invalid = _invalid_subtitle_reason(subtitle_text)
             if subtitle_invalid is None:
                 work.update(
@@ -761,6 +816,7 @@ def enrich_author_home_asr(
                         "subtitle_source": subtitle_source,
                         "subtitle_field": subtitle_field,
                         "subtitle_url_count": len(subtitle_urls),
+                        "failure_category": "",
                     }
                 )
             else:
@@ -774,6 +830,8 @@ def enrich_author_home_asr(
                         "subtitle_source": subtitle_source,
                         "subtitle_field": subtitle_field,
                         "subtitle_url_count": len(subtitle_urls),
+                        "failure_category": subtitle_probe.get("failure_category"),
+                        "invalid_reasons": subtitle_probe.get("invalid_reasons"),
                     }
                 )