@tikomni/skills 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/skills/creator-analysis/SKILL.md +34 -10
- package/skills/creator-analysis/references/contracts/creator-card-fields.md +2 -0
- package/skills/creator-analysis/references/contracts/work-card-fields.md +40 -4
- package/skills/creator-analysis/references/platform-guides/douyin.md +41 -36
- package/skills/creator-analysis/references/platform-guides/generic.md +11 -7
- package/skills/creator-analysis/references/platform-guides/xiaohongshu.md +45 -30
- package/skills/creator-analysis/references/schemas/author-analysis-v2.schema.json +224 -95
- package/skills/creator-analysis/references/workflow.md +8 -3
- package/skills/creator-analysis/scripts/author_home/adapters/platform_adapters.py +205 -21
- package/skills/creator-analysis/scripts/author_home/analyzers/author_analysis_v2_support.py +54 -11
- package/skills/creator-analysis/scripts/author_home/analyzers/prompt_first_analyzers.py +200 -13
- package/skills/creator-analysis/scripts/author_home/analyzers/sampled_work_batch_explainer.py +113 -42
- package/skills/creator-analysis/scripts/author_home/asr/home_asr.py +65 -7
- package/skills/creator-analysis/scripts/author_home/builders/home_builders.py +82 -18
- package/skills/creator-analysis/scripts/author_home/collectors/homepage_collectors.py +198 -32
- package/skills/creator-analysis/scripts/author_home/orchestrator/run_author_analysis.py +374 -31
- package/skills/creator-analysis/scripts/author_home/orchestrator/work_analysis_artifacts.py +68 -12
- package/skills/creator-analysis/scripts/core/storage_router.py +3 -0
- package/skills/creator-analysis/scripts/writers/write_author_homepage_samples.py +3 -2
- package/skills/creator-analysis/scripts/writers/write_benchmark_card.py +314 -137
|
@@ -17,7 +17,11 @@ from scripts.core.config_loader import config_get, resolve_storage_paths
|
|
|
17
17
|
from scripts.core.progress_report import ProgressReporter
|
|
18
18
|
from scripts.writers.write_benchmark_card import build_card_analysis_artifact
|
|
19
19
|
|
|
20
|
-
WORK_ANALYSIS_ARTIFACT_VERSION = "creator_analysis.work_analysis_artifact@
|
|
20
|
+
WORK_ANALYSIS_ARTIFACT_VERSION = "creator_analysis.work_analysis_artifact@v4"
|
|
21
|
+
AUTHOR_HOME_TIMING_VERSION = "creator_analysis.author_home_timing@v2"
|
|
22
|
+
AUTHOR_HOME_CARD_CONTRACT_VERSION = "creator_analysis.author_sample_card@v2"
|
|
23
|
+
DOUYIN_NORMALIZATION_VERSION = "douyin_author_home_normalization@v2"
|
|
24
|
+
XHS_NORMALIZATION_VERSION = "xiaohongshu_author_home_normalization@v1"
|
|
21
25
|
DEFAULT_MAX_WORKERS = 3
|
|
22
26
|
MAX_MAX_WORKERS = 5
|
|
23
27
|
PERF_FIELDS = ("digg_count", "comment_count", "collect_count", "share_count", "play_count")
|
|
@@ -45,6 +49,28 @@ def _safe_int(value: Any, default: int = 0) -> int:
|
|
|
45
49
|
return default
|
|
46
50
|
|
|
47
51
|
|
|
52
|
+
def _safe_text_list(value: Any) -> List[str]:
|
|
53
|
+
if not isinstance(value, list):
|
|
54
|
+
return []
|
|
55
|
+
result: List[str] = []
|
|
56
|
+
for item in value:
|
|
57
|
+
if isinstance(item, dict):
|
|
58
|
+
for key in ("name", "value", "label", "hashtag_name", "search_text", "tag_name", "text"):
|
|
59
|
+
text = _safe_text(item.get(key))
|
|
60
|
+
if text:
|
|
61
|
+
result.append(text)
|
|
62
|
+
break
|
|
63
|
+
continue
|
|
64
|
+
text = _safe_text(item)
|
|
65
|
+
if text:
|
|
66
|
+
result.append(text)
|
|
67
|
+
return list(dict.fromkeys(result))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _normalization_version(platform: str) -> str:
|
|
71
|
+
return DOUYIN_NORMALIZATION_VERSION if platform == "douyin" else XHS_NORMALIZATION_VERSION
|
|
72
|
+
|
|
73
|
+
|
|
48
74
|
def _coerce_unix_sec(value: Any) -> int:
|
|
49
75
|
parsed = _safe_int(value, default=0)
|
|
50
76
|
if parsed > 1_000_000_000_000:
|
|
@@ -91,8 +117,8 @@ def _format_metric_summary(work: Dict[str, Any]) -> str:
|
|
|
91
117
|
|
|
92
118
|
|
|
93
119
|
def _author_home_structural_sections(work: Dict[str, Any]) -> Dict[str, Any]:
|
|
94
|
-
tags =
|
|
95
|
-
style_markers =
|
|
120
|
+
tags = _safe_text_list(work.get("tags"))
|
|
121
|
+
style_markers = _safe_text_list(work.get("style_markers"))
|
|
96
122
|
first_sentence = _first_sentence(work.get("primary_text") or work.get("asr_raw") or work.get("caption_raw") or work.get("title"))
|
|
97
123
|
bucket = _safe_text(work.get("bucket")) or "unknown"
|
|
98
124
|
all_time_rank = work.get("all_time_score_rank")
|
|
@@ -136,11 +162,11 @@ def _author_home_structural_sections(work: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
136
162
|
def _metrics_from_work(work: Dict[str, Any]) -> Dict[str, int]:
|
|
137
163
|
metrics = work.get("metrics") if isinstance(work.get("metrics"), dict) else {}
|
|
138
164
|
return {
|
|
139
|
-
"digg_count": _safe_int(metrics.get("like"), default=0),
|
|
140
|
-
"comment_count": _safe_int(metrics.get("comment"), default=0),
|
|
141
|
-
"collect_count": _safe_int(metrics.get("collect"), default=0),
|
|
142
|
-
"share_count": _safe_int(metrics.get("share"), default=0),
|
|
143
|
-
"play_count": _safe_int(metrics.get("play"), default=0),
|
|
165
|
+
"digg_count": _safe_int(work.get("digg_count"), default=_safe_int(metrics.get("like"), default=0)),
|
|
166
|
+
"comment_count": _safe_int(work.get("comment_count"), default=_safe_int(metrics.get("comment"), default=0)),
|
|
167
|
+
"collect_count": _safe_int(work.get("collect_count"), default=_safe_int(metrics.get("collect"), default=0)),
|
|
168
|
+
"share_count": _safe_int(work.get("share_count"), default=_safe_int(metrics.get("share"), default=0)),
|
|
169
|
+
"play_count": _safe_int(work.get("play_count"), default=_safe_int(metrics.get("play"), default=0)),
|
|
144
170
|
}
|
|
145
171
|
|
|
146
172
|
|
|
@@ -158,7 +184,7 @@ def build_single_work_payload(
|
|
|
158
184
|
"author_handle": author_handle,
|
|
159
185
|
}
|
|
160
186
|
payload: Dict[str, Any] = {
|
|
161
|
-
"content_kind": "
|
|
187
|
+
"content_kind": "author_home",
|
|
162
188
|
"platform_work_id": work.get("platform_work_id"),
|
|
163
189
|
"title": work.get("title") or work.get("desc"),
|
|
164
190
|
"caption_raw": work.get("caption_raw") or work.get("desc") or "",
|
|
@@ -183,6 +209,14 @@ def build_single_work_payload(
|
|
|
183
209
|
"work_modality": _safe_text(work.get("work_modality")) or ("video" if platform == "douyin" else "text"),
|
|
184
210
|
"analysis_eligibility": _safe_text(work.get("analysis_eligibility")) or "eligible",
|
|
185
211
|
"analysis_exclusion_reason": _safe_text(work.get("analysis_exclusion_reason")),
|
|
212
|
+
"performance_score": work.get("performance_score"),
|
|
213
|
+
"performance_score_norm": work.get("performance_score_norm"),
|
|
214
|
+
"bucket": _safe_text(work.get("bucket")),
|
|
215
|
+
"hook_type": _safe_text(work.get("hook_type")),
|
|
216
|
+
"structure_type": _safe_text(work.get("structure_type")),
|
|
217
|
+
"cta_type": _safe_text(work.get("cta_type")),
|
|
218
|
+
"content_form": _safe_text(work.get("content_form")),
|
|
219
|
+
"style_markers": _safe_text_list(work.get("style_markers")),
|
|
186
220
|
"platform_native_refs": dict(work.get("platform_native_refs") or {}),
|
|
187
221
|
"request_id": work.get("request_id"),
|
|
188
222
|
"confidence": "medium" if _safe_text(work.get("primary_text") or work.get("title") or work.get("caption_raw")) else "low",
|
|
@@ -202,6 +236,8 @@ def _artifact_cache_key(*, platform: str, content_kind: str, platform_work_id: s
|
|
|
202
236
|
platform_work_id,
|
|
203
237
|
WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
204
238
|
PROMPT_CONTRACT_HASH,
|
|
239
|
+
AUTHOR_HOME_TIMING_VERSION,
|
|
240
|
+
AUTHOR_HOME_CARD_CONTRACT_VERSION,
|
|
205
241
|
]
|
|
206
242
|
)
|
|
207
243
|
return hashlib.sha1(raw.encode("utf-8")).hexdigest()[:20]
|
|
@@ -252,6 +288,12 @@ def _load_cached_artifact(
|
|
|
252
288
|
return None
|
|
253
289
|
if meta.get("prompt_contract_hash") != PROMPT_CONTRACT_HASH:
|
|
254
290
|
return None
|
|
291
|
+
if meta.get("author_home_timing_version") != AUTHOR_HOME_TIMING_VERSION:
|
|
292
|
+
return None
|
|
293
|
+
if meta.get("author_sample_card_contract_version") != AUTHOR_HOME_CARD_CONTRACT_VERSION:
|
|
294
|
+
return None
|
|
295
|
+
if meta.get("normalization_version") != _normalization_version(platform):
|
|
296
|
+
return None
|
|
255
297
|
return payload if isinstance(payload, dict) else None
|
|
256
298
|
|
|
257
299
|
|
|
@@ -295,6 +337,8 @@ def _refresh_cached_payload(
|
|
|
295
337
|
"artifact_cache_key": ((artifact.get("meta") or {}).get("cache_key") if isinstance(artifact.get("meta"), dict) else None),
|
|
296
338
|
"artifact_path": artifact_path,
|
|
297
339
|
"from_cache": from_cache,
|
|
340
|
+
"artifact_version": ((artifact.get("meta") or {}).get("analysis_logic_version") if isinstance(artifact.get("meta"), dict) else None),
|
|
341
|
+
"normalization_version": ((artifact.get("meta") or {}).get("normalization_version") if isinstance(artifact.get("meta"), dict) else None),
|
|
298
342
|
}
|
|
299
343
|
)
|
|
300
344
|
payload["request_id"] = work.get("request_id") or payload.get("request_id")
|
|
@@ -326,6 +370,8 @@ def _build_artifact(
|
|
|
326
370
|
"platform_work_id": work.get("platform_work_id"),
|
|
327
371
|
"ok": True,
|
|
328
372
|
"prompt_contract_hash": PROMPT_CONTRACT_HASH,
|
|
373
|
+
"artifact_version": WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
374
|
+
"normalization_version": _normalization_version(platform),
|
|
329
375
|
}
|
|
330
376
|
)
|
|
331
377
|
platform_work_id = _safe_text(work.get("platform_work_id"))
|
|
@@ -333,12 +379,15 @@ def _build_artifact(
|
|
|
333
379
|
"meta": {
|
|
334
380
|
"platform": platform,
|
|
335
381
|
"platform_work_id": platform_work_id,
|
|
336
|
-
"content_kind": _safe_text(payload.get("content_kind")) or "
|
|
382
|
+
"content_kind": _safe_text(payload.get("content_kind")) or "author_home",
|
|
337
383
|
"analysis_logic_version": WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
338
384
|
"prompt_contract_hash": PROMPT_CONTRACT_HASH,
|
|
385
|
+
"author_home_timing_version": AUTHOR_HOME_TIMING_VERSION,
|
|
386
|
+
"author_sample_card_contract_version": AUTHOR_HOME_CARD_CONTRACT_VERSION,
|
|
387
|
+
"normalization_version": _normalization_version(platform),
|
|
339
388
|
"cache_key": _artifact_cache_key(
|
|
340
389
|
platform=platform,
|
|
341
|
-
content_kind=_safe_text(payload.get("content_kind")) or "
|
|
390
|
+
content_kind=_safe_text(payload.get("content_kind")) or "author_home",
|
|
342
391
|
platform_work_id=platform_work_id,
|
|
343
392
|
) if platform_work_id else None,
|
|
344
393
|
"written_at": datetime.now().isoformat(timespec="seconds"),
|
|
@@ -393,7 +442,7 @@ def orchestrate_work_analysis_artifacts(
|
|
|
393
442
|
|
|
394
443
|
for work in works:
|
|
395
444
|
platform_work_id = _safe_text(work.get("platform_work_id"))
|
|
396
|
-
content_kind = "
|
|
445
|
+
content_kind = "author_home"
|
|
397
446
|
if not platform_work_id:
|
|
398
447
|
stats["failed_count"] += 1
|
|
399
448
|
failed_items.append({"platform_work_id": "", "error_reason": "missing_platform_work_id"})
|
|
@@ -430,6 +479,9 @@ def orchestrate_work_analysis_artifacts(
|
|
|
430
479
|
artifact_manifest[platform_work_id] = {
|
|
431
480
|
"from_cache": True,
|
|
432
481
|
"artifact_path": str(artifact_path) if artifact_path is not None else None,
|
|
482
|
+
"artifact_version": WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
483
|
+
"prompt_contract_hash": PROMPT_CONTRACT_HASH,
|
|
484
|
+
"normalization_version": _normalization_version(platform),
|
|
433
485
|
}
|
|
434
486
|
stats["cache_hit_count"] += 1
|
|
435
487
|
trace.append(
|
|
@@ -484,6 +536,9 @@ def orchestrate_work_analysis_artifacts(
|
|
|
484
536
|
artifact_manifest[platform_work_id] = {
|
|
485
537
|
"from_cache": False,
|
|
486
538
|
"artifact_path": artifact_path,
|
|
539
|
+
"artifact_version": WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
540
|
+
"prompt_contract_hash": PROMPT_CONTRACT_HASH,
|
|
541
|
+
"normalization_version": _normalization_version(platform),
|
|
487
542
|
}
|
|
488
543
|
stats["finished_count"] += 1
|
|
489
544
|
trace.append(
|
|
@@ -550,4 +605,5 @@ def orchestrate_work_analysis_artifacts(
|
|
|
550
605
|
"artifact_root": str(artifact_root) if artifact_root is not None else None,
|
|
551
606
|
"analysis_logic_version": WORK_ANALYSIS_ARTIFACT_VERSION,
|
|
552
607
|
"prompt_contract_hash": PROMPT_CONTRACT_HASH,
|
|
608
|
+
"normalization_version": _normalization_version(platform),
|
|
553
609
|
}
|
|
@@ -214,6 +214,7 @@ def build_card_output_path(
|
|
|
214
214
|
year_month: str,
|
|
215
215
|
timestamp: str,
|
|
216
216
|
storage_config: Optional[Dict[str, Any]],
|
|
217
|
+
extra_route_parts: Optional[List[str]] = None,
|
|
217
218
|
) -> Tuple[str, str]:
|
|
218
219
|
card_routes = _configured_card_routes(storage_config)
|
|
219
220
|
route = card_routes.get(card_type) or DEFAULT_CARD_TYPE_ROUTES["work"]
|
|
@@ -229,6 +230,8 @@ def build_card_output_path(
|
|
|
229
230
|
"timestamp": timestamp,
|
|
230
231
|
}
|
|
231
232
|
rendered_parts = render_route_parts(parts, context=route_context)
|
|
233
|
+
if isinstance(extra_route_parts, list) and extra_route_parts:
|
|
234
|
+
rendered_parts.extend(render_route_parts(extra_route_parts, context=route_context))
|
|
232
235
|
directory = os.path.join(card_root, *rendered_parts)
|
|
233
236
|
os.makedirs(directory, exist_ok=True)
|
|
234
237
|
|
|
@@ -10,7 +10,7 @@ if __package__ in {None, ""}:
|
|
|
10
10
|
sys.path.insert(0, str(_parent))
|
|
11
11
|
break
|
|
12
12
|
|
|
13
|
-
"""
|
|
13
|
+
"""Creator-analysis batch writer for author sample cards."""
|
|
14
14
|
|
|
15
15
|
import argparse
|
|
16
16
|
import json
|
|
@@ -60,7 +60,7 @@ def _items(data: Any) -> List[Dict[str, Any]]:
|
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def main() -> None:
|
|
63
|
-
parser = argparse.ArgumentParser(description="Write
|
|
63
|
+
parser = argparse.ArgumentParser(description="Write creator-analysis author sample cards")
|
|
64
64
|
parser.add_argument("--platform", required=True, help="Platform name, e.g. douyin/xiaohongshu")
|
|
65
65
|
parser.add_argument("--config", default=None, help="Runtime config YAML path")
|
|
66
66
|
parser.add_argument("--input-json", default="-", help="JSON list/dict path or '-' for stdin")
|
|
@@ -89,6 +89,7 @@ def main() -> None:
|
|
|
89
89
|
sample_author=_author_hint(payload, args.sample_author),
|
|
90
90
|
content_kind="author_home",
|
|
91
91
|
storage_config=config,
|
|
92
|
+
card_role="author_sample_card",
|
|
92
93
|
)
|
|
93
94
|
results.append(result)
|
|
94
95
|
|