@tikomni/skills 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/env.example CHANGED
@@ -88,8 +88,8 @@ TIKOMNI_PATH_LOCALE="zh"
88
88
 
89
89
  # [ZH] 卡片文件名模板。默认值:{identifier}{ext}
90
90
  # [EN] Card filename pattern. Default: {identifier}{ext}
91
- # [ZH] 当前可用占位符:identifier / platform / author_slug / ext
92
- # [EN] Supported placeholders: identifier / platform / author_slug / ext
91
+ # [ZH] 当前可用占位符:identifier / platform / author_slug / published_at / published_date / title / title_slug / ext
92
+ # [EN] Supported placeholders: identifier / platform / author_slug / published_at / published_date / title / title_slug / ext
93
93
  TIKOMNI_FILENAME_PATTERN_CARD="{identifier}{ext}"
94
94
 
95
95
  # [ZH] JSON 结果文件名模板。默认值:{timestamp}-{platform}-{identifier}{ext}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@tikomni/skills",
3
- "version": "0.1.4",
3
+ "version": "0.1.5",
4
4
  "description": "TikOmni skill installer CLI for structured social media crawling in Codex, Claude Code, and OpenClaw",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/mark-ly-wang/TikOmni-Skills#readme",
@@ -37,7 +37,9 @@
37
37
  ## Field Rules
38
38
 
39
39
  - `author` 是展示名,不是对象。
40
+ - Markdown 卡片的事实字段进入 frontmatter,不再输出 `## Facts` 章节。
40
41
  - `primary_text` 为当前任务最适合阅读和索引的主文本。
42
+ - `play_count` 允许为 `null`;缺失时卡片展示为空,只有平台明确返回 `0` 时才保留 `0`。
41
43
  - 视频优先顺序:
42
44
  - `subtitle_raw`
43
45
  - `asr_clean`
@@ -45,4 +47,3 @@
45
47
  - 文本作品优先顺序:
46
48
  - `caption_raw`
47
49
  - 不允许出现分析字段。
48
-
@@ -36,7 +36,7 @@
36
36
  "comment_count": { "type": "integer" },
37
37
  "collect_count": { "type": "integer" },
38
38
  "share_count": { "type": "integer" },
39
- "play_count": { "type": "integer" },
39
+ "play_count": { "type": ["integer", "null"] },
40
40
  "cover_image": { "type": "string" },
41
41
  "source_url": { "type": "string" },
42
42
  "share_url": { "type": "string" },
@@ -64,4 +64,3 @@
64
64
  },
65
65
  "additionalProperties": false
66
66
  }
67
-
@@ -12,6 +12,7 @@ DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
12
12
  _INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
13
13
  _SPACE_RUN = re.compile(r"\s+")
14
14
  _INVALID_AUTHOR_SLUGS = {"0", "unknown", "none", "null", "nil", "na", "n-a"}
15
+ _CARD_TOKEN_INVALID_CHARS = re.compile(r"[^\w\-.]+", re.UNICODE)
15
16
 
16
17
 
17
18
  def slugify_token(value: Any, fallback: str = "unknown") -> str:
@@ -25,6 +26,17 @@ def slugify_token(value: Any, fallback: str = "unknown") -> str:
25
26
  return text or fallback
26
27
 
27
28
 
29
+ def cardify_token(value: Any, fallback: str = "unknown") -> str:
30
+ text = str(value or "").strip()
31
+ if not text:
32
+ text = fallback
33
+ text = _INVALID_FILENAME_CHARS.sub("-", text)
34
+ text = _SPACE_RUN.sub("", text)
35
+ text = _CARD_TOKEN_INVALID_CHARS.sub("", text)
36
+ text = re.sub(r"-{2,}", "-", text).strip("-_.")
37
+ return text or fallback
38
+
39
+
28
40
  def resolve_card_filename_pattern(storage_config: Optional[Dict[str, Any]]) -> str:
29
41
  if not isinstance(storage_config, dict):
30
42
  return DEFAULT_CARD_FILENAME_PATTERN
@@ -66,6 +78,30 @@ def render_output_filename(
66
78
  return rendered
67
79
 
68
80
 
81
+ def render_card_filename(
82
+ *,
83
+ pattern: str,
84
+ context: Dict[str, Any],
85
+ default_filename: str,
86
+ default_ext: str,
87
+ ) -> str:
88
+ safe_context = {key: cardify_token(value, fallback="") for key, value in context.items()}
89
+ safe_context["ext"] = default_ext
90
+ try:
91
+ rendered = str(pattern).format(**safe_context).strip()
92
+ except Exception:
93
+ rendered = default_filename
94
+ rendered = _INVALID_FILENAME_CHARS.sub("-", rendered)
95
+ rendered = _SPACE_RUN.sub("", rendered)
96
+ rendered = _CARD_TOKEN_INVALID_CHARS.sub("", rendered)
97
+ rendered = re.sub(r"-{2,}", "-", rendered).strip("-_.")
98
+ if not rendered:
99
+ rendered = default_filename
100
+ if not Path(rendered).suffix:
101
+ rendered = f"{rendered}{default_ext}"
102
+ return rendered
103
+
104
+
69
105
  def _storage_routes_cfg(storage_config: Optional[Dict[str, Any]]) -> Dict[str, Any]:
70
106
  if not isinstance(storage_config, dict):
71
107
  return {}
@@ -88,18 +124,45 @@ def resolve_card_root(storage_config: Optional[Dict[str, Any]], explicit_card_ro
88
124
  return str(Path(card_root).expanduser().resolve())
89
125
 
90
126
 
91
- def resolve_author_slug(platform: str, author_handle: str, platform_author_id: str) -> str:
92
- handle_slug = slugify_token(author_handle, fallback="")
93
- if handle_slug and handle_slug not in _INVALID_AUTHOR_SLUGS:
127
+ def resolve_author_slug(platform: str, author_handle: str, platform_author_id: str, author_display: str = "") -> str:
128
+ display_slug = cardify_token(author_display, fallback="")
129
+ if display_slug and display_slug.lower() not in _INVALID_AUTHOR_SLUGS:
130
+ return display_slug
131
+ handle_slug = cardify_token(author_handle, fallback="")
132
+ if handle_slug and handle_slug.lower() not in _INVALID_AUTHOR_SLUGS:
94
133
  return handle_slug
95
- author_id_slug = slugify_token(platform_author_id, fallback="")
134
+ author_id_slug = cardify_token(platform_author_id, fallback="")
96
135
  if author_id_slug:
97
136
  return author_id_slug
98
137
  return f"{slugify_token(platform)}-unknown"
99
138
 
100
139
 
101
- def resolve_author_directory_name(platform: str, author_handle: str, platform_author_id: str) -> str:
102
- return f"{slugify_token(platform)}-{resolve_author_slug(platform, author_handle, platform_author_id)}"
140
+ def resolve_author_directory_name(
141
+ platform: str,
142
+ author_handle: str,
143
+ platform_author_id: str,
144
+ author_display: str = "",
145
+ ) -> str:
146
+ return f"{slugify_token(platform)}-{resolve_author_slug(platform, author_handle, platform_author_id, author_display)}"
147
+
148
+
149
+ def build_card_identifier(
150
+ *,
151
+ published_date: str,
152
+ title: str,
153
+ fallback_identifier: str,
154
+ platform_work_id: str,
155
+ ) -> str:
156
+ published_token = cardify_token(published_date, fallback="")
157
+ title_token = cardify_token(title, fallback="")
158
+ if published_token and title_token:
159
+ return f"{published_token}-{title_token}"
160
+ if title_token:
161
+ return title_token
162
+ fallback_token = cardify_token(fallback_identifier, fallback="")
163
+ if published_token and fallback_token:
164
+ return f"{published_token}-{fallback_token}"
165
+ return fallback_token or slugify_token(platform_work_id, fallback="unknown")
103
166
 
104
167
 
105
168
  def resolve_card_route_parts(
@@ -129,29 +192,58 @@ def build_work_fact_card_paths(
129
192
  platform_work_id: str,
130
193
  author_handle: str,
131
194
  platform_author_id: str,
195
+ author_name: str,
196
+ title: str,
197
+ published_date: str,
132
198
  storage_config: Optional[Dict[str, Any]],
133
199
  fallback_identifier: str,
134
200
  ) -> Dict[str, str]:
135
- author_slug = resolve_author_slug(platform, author_handle, platform_author_id)
201
+ author_slug = resolve_author_slug(platform, author_handle, platform_author_id, author_name)
136
202
  route_parts = resolve_card_route_parts(storage_config, platform=platform, author_slug=author_slug)
137
203
  directory = Path(card_root).joinpath(*route_parts)
138
204
  directory.mkdir(parents=True, exist_ok=True)
139
205
 
140
- identifier = slugify_token(platform_work_id, fallback="") or slugify_token(fallback_identifier, fallback="unknown")
206
+ json_identifier = slugify_token(platform_work_id, fallback="") or slugify_token(fallback_identifier, fallback="unknown")
207
+ card_identifier = build_card_identifier(
208
+ published_date=published_date,
209
+ title=title,
210
+ fallback_identifier=fallback_identifier,
211
+ platform_work_id=platform_work_id,
212
+ )
141
213
  json_filename = render_output_filename(
142
214
  pattern=resolve_card_filename_pattern(storage_config),
143
- context={"identifier": identifier, "platform": platform, "author_slug": author_slug, "ext": ".json"},
144
- default_filename=f"{identifier}.json",
215
+ context={
216
+ "identifier": json_identifier,
217
+ "platform": platform,
218
+ "author_slug": author_slug,
219
+ "published_at": published_date,
220
+ "published_date": published_date,
221
+ "title": title,
222
+ "title_slug": title,
223
+ "ext": ".json",
224
+ },
225
+ default_filename=f"{json_identifier}.json",
145
226
  default_ext=".json",
146
227
  )
147
- markdown_filename = render_output_filename(
228
+ markdown_filename = render_card_filename(
148
229
  pattern=resolve_card_filename_pattern(storage_config),
149
- context={"identifier": identifier, "platform": platform, "author_slug": author_slug, "ext": ".md"},
150
- default_filename=f"{identifier}.md",
230
+ context={
231
+ "identifier": card_identifier,
232
+ "platform": platform,
233
+ "author_slug": author_slug,
234
+ "published_at": published_date,
235
+ "published_date": published_date,
236
+ "title": title,
237
+ "title_slug": title,
238
+ "ext": ".md",
239
+ },
240
+ default_filename=f"{card_identifier}.md",
151
241
  default_ext=".md",
152
242
  )
153
243
  return {
154
- "identifier": identifier,
244
+ "identifier": card_identifier,
245
+ "json_identifier": json_identifier,
246
+ "card_identifier": card_identifier,
155
247
  "author_slug": author_slug,
156
248
  "directory": str(directory),
157
249
  "route": "/".join(route_parts),
@@ -34,6 +34,18 @@ def _i(value: Any, default: int = 0) -> int:
34
34
  return default
35
35
 
36
36
 
37
+ def _optional_i(value: Any) -> int | None:
38
+ try:
39
+ if value is None:
40
+ return None
41
+ if isinstance(value, (int, float)):
42
+ return int(value)
43
+ text = _t(value)
44
+ return int(float(text.replace(",", ""))) if text else None
45
+ except Exception:
46
+ return None
47
+
48
+
37
49
  def _first(payload: Any, keys: List[str], default: Any = "") -> Any:
38
50
  hit = deep_find_first(payload, keys)
39
51
  return default if hit is None else hit
@@ -339,7 +351,7 @@ def adapt_douyin_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[
339
351
  "comment": _i(_first(item, ["comment_count"], 0)),
340
352
  "collect": _i(_first(item, ["collect_count"], 0)),
341
353
  "share": _i(_first(item, ["share_count"], 0)),
342
- "play": _i(_first(item, ["play_count", "view_count"], 0)),
354
+ "play": _optional_i(_first(item, ["play_count", "view_count"], None)),
343
355
  }
344
356
  video_down_url = _extract_douyin_video_down_url(item)
345
357
  tags = _normalize_douyin_tags(_first(item, ["hashtags", "tags", "text_extra"], []))
@@ -429,7 +441,7 @@ def adapt_xhs_author_home(raw: Dict[str, Any]) -> Tuple[Dict[str, Any], List[Dic
429
441
  "comment": _i(_first(item, ["comment_count"], 0)),
430
442
  "collect": _i(_first(item, ["collected_count", "collect_count"], 0)),
431
443
  "share": _i(_first(item, ["share_count"], 0)),
432
- "play": _i(_first(item, ["view_count", "play_count"], 0)),
444
+ "play": _optional_i(_first(item, ["view_count", "play_count"], None)),
433
445
  }
434
446
  subtitle_inline = _extract_xhs_subtitle_inline(item)
435
447
  subtitle_urls = _extract_xhs_subtitle_urls(item)
@@ -44,6 +44,7 @@ def _write_collection_artifacts(
44
44
  "douyin",
45
45
  str(profile.get("author_handle") or ""),
46
46
  str(profile.get("platform_author_id") or ""),
47
+ str(profile.get("nickname") or ""),
47
48
  )
48
49
  author_dir = Path(card_root) / "内容系统" / "作品库" / author_dir_name
49
50
  author_dir.mkdir(parents=True, exist_ok=True)
@@ -44,6 +44,7 @@ def _write_collection_artifacts(
44
44
  "xiaohongshu",
45
45
  str(profile.get("author_handle") or ""),
46
46
  str(profile.get("platform_author_id") or ""),
47
+ str(profile.get("nickname") or ""),
47
48
  )
48
49
  author_dir = Path(card_root) / "内容系统" / "作品库" / author_dir_name
49
50
  author_dir.mkdir(parents=True, exist_ok=True)
@@ -3,7 +3,6 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
- import hashlib
7
6
  import json
8
7
  from datetime import datetime, timedelta, timezone
9
8
  from pathlib import Path
@@ -45,6 +44,24 @@ def _safe_int(value: Any) -> int:
45
44
  return 0
46
45
 
47
46
 
47
+ def _safe_optional_int(value: Any) -> Optional[int]:
48
+ if value is None:
49
+ return None
50
+ if isinstance(value, bool):
51
+ return int(value)
52
+ if isinstance(value, int):
53
+ return value
54
+ if isinstance(value, float):
55
+ return int(value)
56
+ text = _safe_text(value)
57
+ if not text:
58
+ return None
59
+ try:
60
+ return int(float(text.replace(",", "")))
61
+ except Exception:
62
+ return None
63
+
64
+
48
65
  def _source_dict(payload: Dict[str, Any]) -> Dict[str, Any]:
49
66
  source = payload.get("source")
50
67
  return source if isinstance(source, dict) else {}
@@ -234,7 +251,7 @@ def build_work_fact_card(payload: Dict[str, Any], platform: Optional[str] = None
234
251
  "comment_count": _safe_int(payload.get("comment_count")),
235
252
  "collect_count": _safe_int(payload.get("collect_count")),
236
253
  "share_count": _safe_int(payload.get("share_count")),
237
- "play_count": _safe_int(payload.get("play_count")),
254
+ "play_count": _safe_optional_int(payload.get("play_count")),
238
255
  "cover_image": _safe_text(payload.get("cover_image")),
239
256
  "source_url": source_url,
240
257
  "share_url": share_url,
@@ -280,38 +297,68 @@ def build_work_output_envelope(payload: Dict[str, Any], platform: Optional[str]
280
297
  }
281
298
 
282
299
 
283
- def _markdown_lines(card: Dict[str, Any]) -> List[str]:
284
- lines = [
285
- f"# {card.get('title') or card.get('platform_work_id') or 'Work'}",
286
- "",
287
- "## Facts",
288
- f"- platform: {card.get('platform') or ''}",
289
- f"- platform_work_id: {card.get('platform_work_id') or ''}",
290
- f"- platform_author_id: {card.get('platform_author_id') or ''}",
291
- f"- author_handle: {card.get('author_handle') or ''}",
292
- f"- author: {card.get('author') or ''}",
293
- f"- published_date: {card.get('published_date') or ''}",
294
- f"- work_modality: {card.get('work_modality') or ''}",
295
- f"- digg_count: {card.get('digg_count')}",
296
- f"- comment_count: {card.get('comment_count')}",
297
- f"- collect_count: {card.get('collect_count')}",
298
- f"- share_count: {card.get('share_count')}",
299
- f"- play_count: {card.get('play_count')}",
300
- f"- source_url: {card.get('source_url') or ''}",
301
- f"- share_url: {card.get('share_url') or ''}",
302
- f"- video_download_url: {card.get('video_download_url') or ''}",
303
- "",
304
- "## Text",
305
- card.get("primary_text") or "",
306
- "",
307
- "## Meta",
308
- f"- primary_text_source: {card.get('primary_text_source') or ''}",
309
- f"- completeness: {card.get('completeness') or ''}",
310
- f"- request_id: {card.get('request_id') or ''}",
311
- f"- error_reason: {card.get('error_reason') or ''}",
300
+ def _yaml_scalar(value: Any) -> str:
301
+ if value is None:
302
+ return ""
303
+ if isinstance(value, bool):
304
+ return "true" if value else "false"
305
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
306
+ return str(value)
307
+ text = _safe_text(value)
308
+ if not text:
309
+ return ""
310
+ return json.dumps(text, ensure_ascii=False)
311
+
312
+
313
+ def _frontmatter_lines(card: Dict[str, Any]) -> List[str]:
314
+ fields = [
315
+ ("card_type", "work"),
316
+ ("platform", card.get("platform")),
317
+ ("platform_work_id", card.get("platform_work_id")),
318
+ ("platform_author_id", card.get("platform_author_id")),
319
+ ("author_handle", card.get("author_handle")),
320
+ ("author", card.get("author")),
321
+ ("title", card.get("title")),
322
+ ("published_date", card.get("published_date")),
323
+ ("work_modality", card.get("work_modality")),
324
+ ("digg_count", card.get("digg_count")),
325
+ ("comment_count", card.get("comment_count")),
326
+ ("collect_count", card.get("collect_count")),
327
+ ("share_count", card.get("share_count")),
328
+ ("play_count", card.get("play_count")),
329
+ ("cover_image", card.get("cover_image")),
330
+ ("source_url", card.get("source_url")),
331
+ ("share_url", card.get("share_url")),
332
+ ("video_download_url", card.get("video_download_url")),
333
+ ("primary_text_source", card.get("primary_text_source")),
334
+ ("completeness", card.get("completeness")),
335
+ ("request_id", card.get("request_id")),
336
+ ("error_reason", card.get("error_reason")),
312
337
  ]
338
+ lines = ["---"]
339
+ for key, value in fields:
340
+ rendered = _yaml_scalar(value)
341
+ lines.append(f"{key}: {rendered}" if rendered else f"{key}:")
342
+ lines.append("---")
343
+ return lines
344
+
345
+
346
+ def _markdown_lines(card: Dict[str, Any]) -> List[str]:
347
+ lines = _frontmatter_lines(card)
348
+ primary_text = _safe_text(card.get("primary_text"))
349
+ caption_raw = _safe_text(card.get("caption_raw"))
350
+ subtitle_raw = _safe_text(card.get("subtitle_raw"))
351
+ asr_raw = _safe_text(card.get("asr_raw"))
352
+
353
+ lines.extend(["", "## 主文本", primary_text or ""])
354
+ if caption_raw and caption_raw != primary_text:
355
+ lines.extend(["", "## 原始文案", caption_raw])
356
+ if subtitle_raw and subtitle_raw != primary_text:
357
+ lines.extend(["", "## 原始字幕", subtitle_raw])
358
+ if asr_raw and asr_raw not in {primary_text, subtitle_raw}:
359
+ lines.extend(["", "## 原始转写", asr_raw])
313
360
  if card.get("missing_fields"):
314
- lines.extend(["", "## Missing Fields"])
361
+ lines.extend(["", "## 缺失字段"])
315
362
  for entry in card["missing_fields"]:
316
363
  lines.append(f"- {entry.get('field')}: {entry.get('reason')}")
317
364
  return lines
@@ -326,6 +373,7 @@ def write_work_fact_card(
326
373
  **_: Any,
327
374
  ) -> Dict[str, Any]:
328
375
  card = build_work_fact_card(payload, platform=platform)
376
+ published_date = card["published_date"] or _resolve_published_date(payload)
329
377
  resolved_card_root = resolve_card_root(storage_config, explicit_card_root=card_root)
330
378
  fallback_identifier = card["share_url"] or card["source_url"] or card["title"] or card["request_id"]
331
379
  paths = build_work_fact_card_paths(
@@ -334,12 +382,18 @@ def write_work_fact_card(
334
382
  platform_work_id=card["platform_work_id"],
335
383
  author_handle=card["author_handle"],
336
384
  platform_author_id=card["platform_author_id"],
385
+ author_name=card["author"],
386
+ title=card["title"],
387
+ published_date=published_date,
337
388
  storage_config=storage_config,
338
389
  fallback_identifier=fallback_identifier,
339
390
  )
340
391
 
341
392
  Path(paths["json_path"]).write_text(json.dumps(card, ensure_ascii=False, indent=2), encoding="utf-8")
342
- Path(paths["markdown_path"]).write_text("\n".join(_markdown_lines(card)).strip() + "\n", encoding="utf-8")
393
+ Path(paths["markdown_path"]).write_text(
394
+ "\n".join(_markdown_lines(card)).strip() + "\n",
395
+ encoding="utf-8",
396
+ )
343
397
 
344
398
  return {
345
399
  "enabled": True,