@tikomni/skills 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/social-media-crawl/scripts/core/asr_pipeline.py +2 -16
- package/skills/social-media-crawl/scripts/core/extract_pipeline.py +93 -1
- package/skills/social-media-crawl/scripts/pipelines/homepage_collectors.py +1066 -102
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +9 -4
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_single_work.py +102 -25
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +9 -4
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py +335 -78
- package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +8 -1
- package/skills/social-media-crawl/tests/test_fixed_pipeline_fallback.py +235 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tikomni/skills",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.5",
|
|
4
4
|
"description": "TikOmni skill installer CLI for structured social media crawling in Codex, Claude Code, and OpenClaw",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/mark-ly-wang/TikOmni-Skills#readme",
|
|
@@ -1586,22 +1586,8 @@ def run_u2_asr_batch_with_timeout_retry(
|
|
|
1586
1586
|
existing = mapped_results.get(file_url)
|
|
1587
1587
|
if existing is None:
|
|
1588
1588
|
mapped_results[file_url] = candidate
|
|
1589
|
-
|
|
1590
|
-
|
|
1591
|
-
old_score = (
|
|
1592
|
-
1 if existing.get("ok") else 0,
|
|
1593
|
-
len(str(existing.get("transcript_text") or "")),
|
|
1594
|
-
1 if existing.get("transcription_url") else 0,
|
|
1595
|
-
1 if not existing.get("error_reason") else 0,
|
|
1596
|
-
)
|
|
1597
|
-
new_score = (
|
|
1598
|
-
1 if candidate.get("ok") else 0,
|
|
1599
|
-
len(str(candidate.get("transcript_text") or "")),
|
|
1600
|
-
1 if candidate.get("transcription_url") else 0,
|
|
1601
|
-
1 if not candidate.get("error_reason") else 0,
|
|
1602
|
-
)
|
|
1603
|
-
if new_score > old_score:
|
|
1604
|
-
mapped_results[file_url] = candidate
|
|
1589
|
+
# When the provider returns file_url, treat it as the source of truth.
|
|
1590
|
+
# item_index remains a fallback only for older payloads without file_url.
|
|
1605
1591
|
|
|
1606
1592
|
mapped_results = hydrate_u2_batch_results_from_transcription_urls(
|
|
1607
1593
|
mapped_results=mapped_results,
|
|
@@ -30,13 +30,105 @@ def build_api_trace(
|
|
|
30
30
|
return payload
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
def build_route_plan_entry(
|
|
34
|
+
*,
|
|
35
|
+
route_label: str,
|
|
36
|
+
endpoint: Optional[str],
|
|
37
|
+
method: str = "GET",
|
|
38
|
+
param_readiness: str = "ready",
|
|
39
|
+
param_reason: str = "",
|
|
40
|
+
) -> Dict[str, Any]:
|
|
41
|
+
return {
|
|
42
|
+
"route_label": route_label,
|
|
43
|
+
"endpoint": endpoint,
|
|
44
|
+
"method": method.upper(),
|
|
45
|
+
"param_readiness": param_readiness,
|
|
46
|
+
"param_reason": param_reason,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def build_attempted_route(
|
|
51
|
+
*,
|
|
52
|
+
route_label: str,
|
|
53
|
+
endpoint: Optional[str],
|
|
54
|
+
response: Optional[Dict[str, Any]] = None,
|
|
55
|
+
accepted: bool = False,
|
|
56
|
+
accept_reason: str = "",
|
|
57
|
+
fallback_reason: str = "",
|
|
58
|
+
param_readiness: str = "ready",
|
|
59
|
+
param_reason: str = "",
|
|
60
|
+
skipped: bool = False,
|
|
61
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
62
|
+
) -> Dict[str, Any]:
|
|
63
|
+
attempt: Dict[str, Any] = {
|
|
64
|
+
"route_label": route_label,
|
|
65
|
+
"endpoint": endpoint,
|
|
66
|
+
"accepted": bool(accepted),
|
|
67
|
+
"accept_reason": accept_reason,
|
|
68
|
+
"fallback_reason": fallback_reason,
|
|
69
|
+
"param_readiness": param_readiness,
|
|
70
|
+
"param_reason": param_reason,
|
|
71
|
+
"skipped": bool(skipped),
|
|
72
|
+
}
|
|
73
|
+
if isinstance(response, dict):
|
|
74
|
+
attempt.update(
|
|
75
|
+
{
|
|
76
|
+
"ok": response.get("ok"),
|
|
77
|
+
"status_code": response.get("status_code"),
|
|
78
|
+
"request_id": response.get("request_id"),
|
|
79
|
+
"error_reason": response.get("error_reason"),
|
|
80
|
+
"retry_attempt": response.get("retry_attempt", 0),
|
|
81
|
+
"rate_limit_wait_ms": response.get("rate_limit_wait_ms", 0),
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
attempt.update(
|
|
86
|
+
{
|
|
87
|
+
"ok": None,
|
|
88
|
+
"status_code": None,
|
|
89
|
+
"request_id": None,
|
|
90
|
+
"error_reason": None,
|
|
91
|
+
"retry_attempt": 0,
|
|
92
|
+
"rate_limit_wait_ms": 0,
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
if extra:
|
|
96
|
+
attempt.update(extra)
|
|
97
|
+
return attempt
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def build_stage_status(
|
|
101
|
+
*,
|
|
102
|
+
stage: str,
|
|
103
|
+
status: str,
|
|
104
|
+
route_plan: Optional[List[Dict[str, Any]]] = None,
|
|
105
|
+
attempted_routes: Optional[List[Dict[str, Any]]] = None,
|
|
106
|
+
chosen_route: Optional[str] = None,
|
|
107
|
+
accept_reason: str = "",
|
|
108
|
+
fallback_reason: str = "",
|
|
109
|
+
error_reason: Optional[str] = None,
|
|
110
|
+
all_routes_failed: bool = False,
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
return {
|
|
113
|
+
"stage": stage,
|
|
114
|
+
"status": status,
|
|
115
|
+
"route_plan": list(route_plan or []),
|
|
116
|
+
"attempted_routes": list(attempted_routes or []),
|
|
117
|
+
"chosen_route": chosen_route or "",
|
|
118
|
+
"accept_reason": accept_reason,
|
|
119
|
+
"fallback_reason": fallback_reason,
|
|
120
|
+
"error_reason": error_reason,
|
|
121
|
+
"all_routes_failed": bool(all_routes_failed),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
33
125
|
def build_fallback_trace_from_extract_trace(extract_trace: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
34
126
|
"""Build a compact fallback trace from runner extract trace."""
|
|
35
127
|
if not extract_trace:
|
|
36
128
|
return []
|
|
37
129
|
|
|
38
130
|
trace: List[Dict[str, Any]] = []
|
|
39
|
-
include_tokens = ("primary", "effective", "fallback", "gate", "retry")
|
|
131
|
+
include_tokens = ("primary", "effective", "fallback", "gate", "retry", "attempt", "route_decision")
|
|
40
132
|
|
|
41
133
|
for step in extract_trace:
|
|
42
134
|
if not isinstance(step, dict):
|