@tikomni/skills 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/social-media-crawl/scripts/core/extract_pipeline.py +93 -1
- package/skills/social-media-crawl/scripts/core/storage_router.py +15 -6
- package/skills/social-media-crawl/scripts/pipelines/homepage_collectors.py +1066 -102
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py +9 -4
- package/skills/social-media-crawl/scripts/pipelines/run_douyin_single_work.py +102 -25
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py +9 -4
- package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_single_work.py +335 -78
- package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py +9 -2
- package/skills/social-media-crawl/tests/test_fixed_pipeline_fallback.py +169 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@tikomni/skills",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "TikOmni skill installer CLI for structured social media crawling in Codex, Claude Code, and OpenClaw",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"homepage": "https://github.com/mark-ly-wang/TikOmni-Skills#readme",
|
|
@@ -30,13 +30,105 @@ def build_api_trace(
|
|
|
30
30
|
return payload
|
|
31
31
|
|
|
32
32
|
|
|
33
|
+
def build_route_plan_entry(
|
|
34
|
+
*,
|
|
35
|
+
route_label: str,
|
|
36
|
+
endpoint: Optional[str],
|
|
37
|
+
method: str = "GET",
|
|
38
|
+
param_readiness: str = "ready",
|
|
39
|
+
param_reason: str = "",
|
|
40
|
+
) -> Dict[str, Any]:
|
|
41
|
+
return {
|
|
42
|
+
"route_label": route_label,
|
|
43
|
+
"endpoint": endpoint,
|
|
44
|
+
"method": method.upper(),
|
|
45
|
+
"param_readiness": param_readiness,
|
|
46
|
+
"param_reason": param_reason,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def build_attempted_route(
|
|
51
|
+
*,
|
|
52
|
+
route_label: str,
|
|
53
|
+
endpoint: Optional[str],
|
|
54
|
+
response: Optional[Dict[str, Any]] = None,
|
|
55
|
+
accepted: bool = False,
|
|
56
|
+
accept_reason: str = "",
|
|
57
|
+
fallback_reason: str = "",
|
|
58
|
+
param_readiness: str = "ready",
|
|
59
|
+
param_reason: str = "",
|
|
60
|
+
skipped: bool = False,
|
|
61
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
62
|
+
) -> Dict[str, Any]:
|
|
63
|
+
attempt: Dict[str, Any] = {
|
|
64
|
+
"route_label": route_label,
|
|
65
|
+
"endpoint": endpoint,
|
|
66
|
+
"accepted": bool(accepted),
|
|
67
|
+
"accept_reason": accept_reason,
|
|
68
|
+
"fallback_reason": fallback_reason,
|
|
69
|
+
"param_readiness": param_readiness,
|
|
70
|
+
"param_reason": param_reason,
|
|
71
|
+
"skipped": bool(skipped),
|
|
72
|
+
}
|
|
73
|
+
if isinstance(response, dict):
|
|
74
|
+
attempt.update(
|
|
75
|
+
{
|
|
76
|
+
"ok": response.get("ok"),
|
|
77
|
+
"status_code": response.get("status_code"),
|
|
78
|
+
"request_id": response.get("request_id"),
|
|
79
|
+
"error_reason": response.get("error_reason"),
|
|
80
|
+
"retry_attempt": response.get("retry_attempt", 0),
|
|
81
|
+
"rate_limit_wait_ms": response.get("rate_limit_wait_ms", 0),
|
|
82
|
+
}
|
|
83
|
+
)
|
|
84
|
+
else:
|
|
85
|
+
attempt.update(
|
|
86
|
+
{
|
|
87
|
+
"ok": None,
|
|
88
|
+
"status_code": None,
|
|
89
|
+
"request_id": None,
|
|
90
|
+
"error_reason": None,
|
|
91
|
+
"retry_attempt": 0,
|
|
92
|
+
"rate_limit_wait_ms": 0,
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
if extra:
|
|
96
|
+
attempt.update(extra)
|
|
97
|
+
return attempt
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def build_stage_status(
|
|
101
|
+
*,
|
|
102
|
+
stage: str,
|
|
103
|
+
status: str,
|
|
104
|
+
route_plan: Optional[List[Dict[str, Any]]] = None,
|
|
105
|
+
attempted_routes: Optional[List[Dict[str, Any]]] = None,
|
|
106
|
+
chosen_route: Optional[str] = None,
|
|
107
|
+
accept_reason: str = "",
|
|
108
|
+
fallback_reason: str = "",
|
|
109
|
+
error_reason: Optional[str] = None,
|
|
110
|
+
all_routes_failed: bool = False,
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
return {
|
|
113
|
+
"stage": stage,
|
|
114
|
+
"status": status,
|
|
115
|
+
"route_plan": list(route_plan or []),
|
|
116
|
+
"attempted_routes": list(attempted_routes or []),
|
|
117
|
+
"chosen_route": chosen_route or "",
|
|
118
|
+
"accept_reason": accept_reason,
|
|
119
|
+
"fallback_reason": fallback_reason,
|
|
120
|
+
"error_reason": error_reason,
|
|
121
|
+
"all_routes_failed": bool(all_routes_failed),
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
33
125
|
def build_fallback_trace_from_extract_trace(extract_trace: Optional[List[Dict[str, Any]]]) -> List[Dict[str, Any]]:
|
|
34
126
|
"""Build a compact fallback trace from runner extract trace."""
|
|
35
127
|
if not extract_trace:
|
|
36
128
|
return []
|
|
37
129
|
|
|
38
130
|
trace: List[Dict[str, Any]] = []
|
|
39
|
-
include_tokens = ("primary", "effective", "fallback", "gate", "retry")
|
|
131
|
+
include_tokens = ("primary", "effective", "fallback", "gate", "retry", "attempt", "route_decision")
|
|
40
132
|
|
|
41
133
|
for step in extract_trace:
|
|
42
134
|
if not isinstance(step, dict):
|
|
@@ -26,14 +26,15 @@ def slugify_token(value: Any, fallback: str = "unknown") -> str:
|
|
|
26
26
|
return text or fallback
|
|
27
27
|
|
|
28
28
|
|
|
29
|
-
def cardify_token(value: Any, fallback: str = "unknown") -> str:
|
|
29
|
+
def cardify_token(value: Any, fallback: str = "unknown", keep_trailing_dash: bool = False) -> str:
|
|
30
30
|
text = str(value or "").strip()
|
|
31
31
|
if not text:
|
|
32
32
|
text = fallback
|
|
33
33
|
text = _INVALID_FILENAME_CHARS.sub("-", text)
|
|
34
34
|
text = _SPACE_RUN.sub("", text)
|
|
35
35
|
text = _CARD_TOKEN_INVALID_CHARS.sub("", text)
|
|
36
|
-
text = re.sub(r"-{2,}", "-", text)
|
|
36
|
+
text = re.sub(r"-{2,}", "-", text)
|
|
37
|
+
text = text.strip("_.") if keep_trailing_dash else text.strip("-_.")
|
|
37
38
|
return text or fallback
|
|
38
39
|
|
|
39
40
|
|
|
@@ -85,7 +86,10 @@ def render_card_filename(
|
|
|
85
86
|
default_filename: str,
|
|
86
87
|
default_ext: str,
|
|
87
88
|
) -> str:
|
|
88
|
-
safe_context = {
|
|
89
|
+
safe_context = {
|
|
90
|
+
key: cardify_token(value, fallback="", keep_trailing_dash=(key == "identifier"))
|
|
91
|
+
for key, value in context.items()
|
|
92
|
+
}
|
|
89
93
|
safe_context["ext"] = default_ext
|
|
90
94
|
try:
|
|
91
95
|
rendered = str(pattern).format(**safe_context).strip()
|
|
@@ -159,10 +163,15 @@ def build_card_identifier(
|
|
|
159
163
|
return f"{published_token}-{title_token}"
|
|
160
164
|
if title_token:
|
|
161
165
|
return title_token
|
|
166
|
+
work_id_token = cardify_token(platform_work_id, fallback="")
|
|
167
|
+
if published_token and work_id_token:
|
|
168
|
+
return f"{published_token}-{work_id_token}"
|
|
169
|
+
if work_id_token:
|
|
170
|
+
return work_id_token
|
|
171
|
+
if published_token:
|
|
172
|
+
return f"{published_token}-"
|
|
162
173
|
fallback_token = cardify_token(fallback_identifier, fallback="")
|
|
163
|
-
|
|
164
|
-
return f"{published_token}-{fallback_token}"
|
|
165
|
-
return fallback_token or slugify_token(platform_work_id, fallback="unknown")
|
|
174
|
+
return fallback_token or "unknown"
|
|
166
175
|
|
|
167
176
|
|
|
168
177
|
def resolve_card_route_parts(
|