npm - @tikomni/skills - Versions diffs - 0.1.4 → 0.1.5 - Mend

@tikomni/skills 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/env.example CHANGED Viewed

@@ -88,8 +88,8 @@ TIKOMNI_PATH_LOCALE="zh"
 # [ZH] 卡片文件名模板。默认值：{identifier}{ext}
 # [EN] Card filename pattern. Default: {identifier}{ext}
-# [ZH] 当前可用占位符：identifier / platform / author_slug / ext
-# [EN] Supported placeholders: identifier / platform / author_slug / ext
+# [ZH] 当前可用占位符：identifier / platform / author_slug / published_at / published_date / title / title_slug / ext
+# [EN] Supported placeholders: identifier / platform / author_slug / published_at / published_date / title / title_slug / ext
 TIKOMNI_FILENAME_PATTERN_CARD="{identifier}{ext}"
 # [ZH] JSON 结果文件名模板。默认值：{timestamp}-{platform}-{identifier}{ext}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@tikomni/skills",
-  "version": "0.1.4",
+  "version": "0.1.5",
   "description": "TikOmni skill installer CLI for structured social media crawling in Codex, Claude Code, and OpenClaw",
   "license": "MIT",
   "homepage": "https://github.com/mark-ly-wang/TikOmni-Skills#readme",

package/skills/social-media-crawl/references/contracts/work-fact-card-fields.md CHANGED Viewed

@@ -37,7 +37,9 @@
 ## Field Rules
 - `author` 是展示名，不是对象。
+- Markdown 卡片的事实字段进入 frontmatter，不再输出 `## Facts` 章节。
 - `primary_text` 为当前任务最适合阅读和索引的主文本。
+- `play_count` 允许为 `null`；缺失时卡片展示为空，只有平台明确返回 `0` 时才保留 `0`。
 - 视频优先顺序：
   - `subtitle_raw`
   - `asr_clean`
@@ -45,4 +47,3 @@
 - 文本作品优先顺序：
   - `caption_raw`
 - 不允许出现分析字段。

package/skills/social-media-crawl/references/schemas/work-fact-card.schema.json CHANGED Viewed

@@ -36,7 +36,7 @@
     "comment_count": { "type": "integer" },
     "collect_count": { "type": "integer" },
     "share_count": { "type": "integer" },
-    "play_count": { "type": "integer" },
+    "play_count": { "type": ["integer", "null"] },
     "cover_image": { "type": "string" },
     "source_url": { "type": "string" },
     "share_url": { "type": "string" },
@@ -64,4 +64,3 @@
   },
   "additionalProperties": false
 }

package/skills/social-media-crawl/scripts/core/storage_router.py CHANGED Viewed

@@ -12,6 +12,7 @@ DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
 _INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
 _SPACE_RUN = re.compile(r"\s+")
 _INVALID_AUTHOR_SLUGS = {"0", "unknown", "none", "null", "nil", "na", "n-a"}
+_CARD_TOKEN_INVALID_CHARS = re.compile(r"[^\w\-.]+", re.UNICODE)
 def slugify_token(value: Any, fallback: str = "unknown") -> str:
@@ -25,6 +26,17 @@ def slugify_token(value: Any, fallback: str = "unknown") -> str:
     return text or fallback
+def cardify_token(value: Any, fallback: str = "unknown") -> str:
+    text = str(value or "").strip()
+    if not text:
+        text = fallback
+    text = _INVALID_FILENAME_CHARS.sub("-", text)
+    text = _SPACE_RUN.sub("", text)
+    text = _CARD_TOKEN_INVALID_CHARS.sub("", text)
+    text = re.sub(r"-{2,}", "-", text).strip("-_.")
+    return text or fallback
 def resolve_card_filename_pattern(storage_config: Optional[Dict[str, Any]]) -> str:
     if not isinstance(storage_config, dict):
         return DEFAULT_CARD_FILENAME_PATTERN
@@ -66,6 +78,30 @@ def render_output_filename(
     return rendered
+def render_card_filename(
+    *,
+    pattern: str,
+    context: Dict[str, Any],
+    default_filename: str,
+    default_ext: str,
+) -> str:
+    safe_context = {key: cardify_token(value, fallback="") for key, value in context.items()}
+    safe_context["ext"] = default_ext
+    try:
+        rendered = str(pattern).format(**safe_context).strip()
+    except Exception:
+        rendered = default_filename
+    rendered = _INVALID_FILENAME_CHARS.sub("-", rendered)
+    rendered = _SPACE_RUN.sub("", rendered)
+    rendered = _CARD_TOKEN_INVALID_CHARS.sub("", rendered)
+    rendered = re.sub(r"-{2,}", "-", rendered).strip("-_.")
+    if not rendered:
+        rendered = default_filename
+    if not Path(rendered).suffix:
+        rendered = f"{rendered}{default_ext}"
+    return rendered
 def _storage_routes_cfg(storage_config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
     if not isinstance(storage_config, dict):
         return {}
@@ -88,18 +124,45 @@ def resolve_card_root(storage_config: Optional[Dict[str, Any]], explicit_card_ro
     return str(Path(card_root).expanduser().resolve())
-def resolve_author_slug(platform: str, author_handle: str, platform_author_id: str) -> str:
-    handle_slug = slugify_token(author_handle, fallback="")
-    if handle_slug and handle_slug not in _INVALID_AUTHOR_SLUGS:
+def resolve_author_slug(platform: str, author_handle: str, platform_author_id: str, author_display: str = "") -> str:
+    display_slug = cardify_token(author_display, fallback="")
+    if display_slug and display_slug.lower() not in _INVALID_AUTHOR_SLUGS:
+        return display_slug
+    handle_slug = cardify_token(author_handle, fallback="")
+    if handle_slug and handle_slug.lower() not in _INVALID_AUTHOR_SLUGS:
         return handle_slug
-    author_id_slug = slugify_token(platform_author_id, fallback="")
+    author_id_slug = cardify_token(platform_author_id, fallback="")
     if author_id_slug:
         return author_id_slug
     return f"{slugify_token(platform)}-unknown"
-def resolve_author_directory_name(platform: str, author_handle: str, platform_author_id: str) -> str:
-    return f"{slugify_token(platform)}-{resolve_author_slug(platform, author_handle, platform_author_id)}"
+def resolve_author_directory_name(
+    platform: str,
+    author_handle: str,
+    platform_author_id: str,
+    author_display: str = "",
+) -> str:
+    return f"{slugify_token(platform)}-{resolve_author_slug(platform, author_handle, platform_author_id, author_display)}"
+def build_card_identifier(
+    *,
+    published_date: str,
+    title: str,
+    fallback_identifier: str,
+    platform_work_id: str,
+) -> str:
+    published_token = cardify_token(published_date, fallback="")
+    title_token = cardify_token(title, fallback="")
+    if published_token and title_token:
+        return f"{published_token}-{title_token}"
+    if title_token:
+        return title_token
+    fallback_token = cardify_token(fallback_identifier, fallback="")
+    if published_token and fallback_token:
+        return f"{published_token}-{fallback_token}"
+    return fallback_token or slugify_token(platform_work_id, fallback="unknown")
 def resolve_card_route_parts(
@@ -129,29 +192,58 @@ def build_work_fact_card_paths(
     platform_work_id: str,
     author_handle: str,
     platform_author_id: str,
+    author_name: str,
+    title: str,
+    published_date: str,
     storage_config: Optional[Dict[str, Any]],
     fallback_identifier: str,
 ) -> Dict[str, str]:
-    author_slug = resolve_author_slug(platform, author_handle, platform_author_id)
+    author_slug = resolve_author_slug(platform, author_handle, platform_author_id, author_name)
     route_parts = resolve_card_route_parts(storage_config, platform=platform, author_slug=author_slug)
     directory = Path(card_root).joinpath(*route_parts)
     directory.mkdir(parents=True, exist_ok=True)
-    identifier = slugify_token(platform_work_id, fallback="") or slugify_token(fallback_identifier, fallback="unknown")
+    json_identifier = slugify_token(platform_work_id, fallback="") or slugify_token(fallback_identifier, fallback="unknown")
+    card_identifier = build_card_identifier(
+        published_date=published_date,
+        title=title,
+        fallback_identifier=fallback_identifier,
+        platform_work_id=platform_work_id,
+    )
     json_filename = render_output_filename(
         pattern=resolve_card_filename_pattern(storage_config),
-        context={"identifier": identifier, "platform": platform, "author_slug": author_slug, "ext": ".json"},
-        default_filename=f"{identifier}.json",
+        context={
+            "identifier": json_identifier,
+            "platform": platform,
+            "author_slug": author_slug,
+            "published_at": published_date,
+            "published_date": published_date,
+            "title": title,
+            "title_slug": title,
+            "ext": ".json",
+        },
+        default_filename=f"{json_identifier}.json",
         default_ext=".json",
     )
-    markdown_filename = render_output_filename(
+    markdown_filename = render_card_filename(
         pattern=resolve_card_filename_pattern(storage_config),
-        context={"identifier": identifier, "platform": platform, "author_slug": author_slug, "ext": ".md"},
-        default_filename=f"{identifier}.md",
+        context={
+            "identifier": card_identifier,
+            "platform": platform,
+            "author_slug": author_slug,
+            "published_at": published_date,
+            "published_date": published_date,
+            "title": title,
+            "title_slug": title,
+            "ext": ".md",
+        },
+        default_filename=f"{card_identifier}.md",
         default_ext=".md",
     )
     return {
-        "identifier": identifier,
+        "identifier": card_identifier,
+        "json_identifier": json_identifier,
+        "card_identifier": card_identifier,
         "author_slug": author_slug,
         "directory": str(directory),
         "route": "/".join(route_parts),

package/skills/social-media-crawl/scripts/pipelines/platform_adapters.py CHANGED Viewed

@@ -34,6 +34,18 @@ def _i(value: Any, default: int = 0) -> int:
         return default
+def _optional_i(value: Any) -> int | None:
+    try:
+        if value is None:
+            return None
+        if isinstance(value, (int, float)):
+            return int(value)
+        text = _t(value)
+        return int(float(text.replace(",", ""))) if text else None
+    except Exception:
+        return None
 def _first(payload: Any, keys: List[str], default: Any = "") -> Any:
     hit = deep_find_first(payload, keys)
     return default if hit is None else hit
@@ -339,7 +351,7 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
             "comment": _i(_first(item, ["comment_count"], 0)),
             "collect": _i(_first(item, ["collect_count"], 0)),
             "share": _i(_first(item, ["share_count"], 0)),
-            "play": _i(_first(item, ["play_count", "view_count"], 0)),
+            "play": _optional_i(_first(item, ["play_count", "view_count"], None)),
         }
         video_down_url = _extract_douyin_video_down_url(item)
         tags = _normalize_douyin_tags(_first(item, ["hashtags", "tags", "text_extra"], []))
@@ -429,7 +441,7 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
             "comment": _i(_first(item, ["comment_count"], 0)),
             "collect": _i(_first(item, ["collected_count", "collect_count"], 0)),
             "share": _i(_first(item, ["share_count"], 0)),
-            "play": _i(_first(item, ["view_count", "play_count"], 0)),
+            "play": _optional_i(_first(item, ["view_count", "play_count"], None)),
         }
         subtitle_inline = _extract_xhs_subtitle_inline(item)
         subtitle_urls = _extract_xhs_subtitle_urls(item)

package/skills/social-media-crawl/scripts/pipelines/run_douyin_creator_home.py CHANGED Viewed

@@ -44,6 +44,7 @@ def _write_collection_artifacts(
         "douyin",
         str(profile.get("author_handle") or ""),
         str(profile.get("platform_author_id") or ""),
+        str(profile.get("nickname") or ""),
     )
     author_dir = Path(card_root) / "内容系统" / "作品库" / author_dir_name
     author_dir.mkdir(parents=True, exist_ok=True)

package/skills/social-media-crawl/scripts/pipelines/run_xiaohongshu_creator_home.py CHANGED Viewed

@@ -44,6 +44,7 @@ def _write_collection_artifacts(
         "xiaohongshu",
         str(profile.get("author_handle") or ""),
         str(profile.get("platform_author_id") or ""),
+        str(profile.get("nickname") or ""),
     )
     author_dir = Path(card_root) / "内容系统" / "作品库" / author_dir_name
     author_dir.mkdir(parents=True, exist_ok=True)

package/skills/social-media-crawl/scripts/writers/write_work_fact_card.py CHANGED Viewed

@@ -3,7 +3,6 @@
 from __future__ import annotations
-import hashlib
 import json
 from datetime import datetime, timedelta, timezone
 from pathlib import Path
@@ -45,6 +44,24 @@ def _safe_int(value: Any) -> int:
         return 0
+def _safe_optional_int(value: Any) -> Optional[int]:
+    if value is None:
+        return None
+    if isinstance(value, bool):
+        return int(value)
+    if isinstance(value, int):
+        return value
+    if isinstance(value, float):
+        return int(value)
+    text = _safe_text(value)
+    if not text:
+        return None
+    try:
+        return int(float(text.replace(",", "")))
+    except Exception:
+        return None
 def _source_dict(payload: Dict[str, Any]) -> Dict[str, Any]:
     source = payload.get("source")
     return source if isinstance(source, dict) else {}
@@ -234,7 +251,7 @@ def build_work_fact_card(payload: Dict[str, Any], platform: Optional[str] = None
         "comment_count": _safe_int(payload.get("comment_count")),
         "collect_count": _safe_int(payload.get("collect_count")),
         "share_count": _safe_int(payload.get("share_count")),
-        "play_count": _safe_int(payload.get("play_count")),
+        "play_count": _safe_optional_int(payload.get("play_count")),
         "cover_image": _safe_text(payload.get("cover_image")),
         "source_url": source_url,
         "share_url": share_url,
@@ -280,38 +297,68 @@ def build_work_output_envelope(payload: Dict[str, Any], platform: Optional[str]
     }
-def _markdown_lines(card: Dict[str, Any]) -> List[str]:
-    lines = [
-        f"# {card.get('title') or card.get('platform_work_id') or 'Work'}",
-        "",
-        "## Facts",
-        f"- platform: {card.get('platform') or ''}",
-        f"- platform_work_id: {card.get('platform_work_id') or ''}",
-        f"- platform_author_id: {card.get('platform_author_id') or ''}",
-        f"- author_handle: {card.get('author_handle') or ''}",
-        f"- author: {card.get('author') or ''}",
-        f"- published_date: {card.get('published_date') or ''}",
-        f"- work_modality: {card.get('work_modality') or ''}",
-        f"- digg_count: {card.get('digg_count')}",
-        f"- comment_count: {card.get('comment_count')}",
-        f"- collect_count: {card.get('collect_count')}",
-        f"- share_count: {card.get('share_count')}",
-        f"- play_count: {card.get('play_count')}",
-        f"- source_url: {card.get('source_url') or ''}",
-        f"- share_url: {card.get('share_url') or ''}",
-        f"- video_download_url: {card.get('video_download_url') or ''}",
-        "",
-        "## Text",
-        card.get("primary_text") or "",
-        "",
-        "## Meta",
-        f"- primary_text_source: {card.get('primary_text_source') or ''}",
-        f"- completeness: {card.get('completeness') or ''}",
-        f"- request_id: {card.get('request_id') or ''}",
-        f"- error_reason: {card.get('error_reason') or ''}",
+def _yaml_scalar(value: Any) -> str:
+    if value is None:
+        return ""
+    if isinstance(value, bool):
+        return "true" if value else "false"
+    if isinstance(value, (int, float)) and not isinstance(value, bool):
+        return str(value)
+    text = _safe_text(value)
+    if not text:
+        return ""
+    return json.dumps(text, ensure_ascii=False)
+def _frontmatter_lines(card: Dict[str, Any]) -> List[str]:
+    fields = [
+        ("card_type", "work"),
+        ("platform", card.get("platform")),
+        ("platform_work_id", card.get("platform_work_id")),
+        ("platform_author_id", card.get("platform_author_id")),
+        ("author_handle", card.get("author_handle")),
+        ("author", card.get("author")),
+        ("title", card.get("title")),
+        ("published_date", card.get("published_date")),
+        ("work_modality", card.get("work_modality")),
+        ("digg_count", card.get("digg_count")),
+        ("comment_count", card.get("comment_count")),
+        ("collect_count", card.get("collect_count")),
+        ("share_count", card.get("share_count")),
+        ("play_count", card.get("play_count")),
+        ("cover_image", card.get("cover_image")),
+        ("source_url", card.get("source_url")),
+        ("share_url", card.get("share_url")),
+        ("video_download_url", card.get("video_download_url")),
+        ("primary_text_source", card.get("primary_text_source")),
+        ("completeness", card.get("completeness")),
+        ("request_id", card.get("request_id")),
+        ("error_reason", card.get("error_reason")),
     ]
+    lines = ["---"]
+    for key, value in fields:
+        rendered = _yaml_scalar(value)
+        lines.append(f"{key}: {rendered}" if rendered else f"{key}:")
+    lines.append("---")
+    return lines
+def _markdown_lines(card: Dict[str, Any]) -> List[str]:
+    lines = _frontmatter_lines(card)
+    primary_text = _safe_text(card.get("primary_text"))
+    caption_raw = _safe_text(card.get("caption_raw"))
+    subtitle_raw = _safe_text(card.get("subtitle_raw"))
+    asr_raw = _safe_text(card.get("asr_raw"))
+    lines.extend(["", "## 主文本", primary_text or ""])
+    if caption_raw and caption_raw != primary_text:
+        lines.extend(["", "## 原始文案", caption_raw])
+    if subtitle_raw and subtitle_raw != primary_text:
+        lines.extend(["", "## 原始字幕", subtitle_raw])
+    if asr_raw and asr_raw not in {primary_text, subtitle_raw}:
+        lines.extend(["", "## 原始转写", asr_raw])
     if card.get("missing_fields"):
-        lines.extend(["", "## Missing Fields"])
+        lines.extend(["", "## 缺失字段"])
         for entry in card["missing_fields"]:
             lines.append(f"- {entry.get('field')}: {entry.get('reason')}")
     return lines
@@ -326,6 +373,7 @@ def write_work_fact_card(
     **_: Any,
 ) -> Dict[str, Any]:
     card = build_work_fact_card(payload, platform=platform)
+    published_date = card["published_date"] or _resolve_published_date(payload)
     resolved_card_root = resolve_card_root(storage_config, explicit_card_root=card_root)
     fallback_identifier = card["share_url"] or card["source_url"] or card["title"] or card["request_id"]
     paths = build_work_fact_card_paths(
@@ -334,12 +382,18 @@ def write_work_fact_card(
         platform_work_id=card["platform_work_id"],
         author_handle=card["author_handle"],
         platform_author_id=card["platform_author_id"],
+        author_name=card["author"],
+        title=card["title"],
+        published_date=published_date,
         storage_config=storage_config,
         fallback_identifier=fallback_identifier,
     )
     Path(paths["json_path"]).write_text(json.dumps(card, ensure_ascii=False, indent=2), encoding="utf-8")
-    Path(paths["markdown_path"]).write_text("\n".join(_markdown_lines(card)).strip() + "\n", encoding="utf-8")
+    Path(paths["markdown_path"]).write_text(
+        "\n".join(_markdown_lines(card)).strip() + "\n",
+        encoding="utf-8",
+    )
     return {
         "enabled": True,