@tikomni/skills 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. package/package.json +4 -2
  2. package/skills/single-work-analysis/env.example +3 -3
  3. package/skills/single-work-analysis/references/config-templates/defaults.yaml +8 -19
  4. package/skills/single-work-analysis/references/prompt-contracts/{insight.md → analysis-bundle.md} +43 -8
  5. package/skills/single-work-analysis/scripts/core/analysis_adapter.py +384 -0
  6. package/skills/single-work-analysis/scripts/core/analysis_pipeline.py +399 -76
  7. package/skills/single-work-analysis/scripts/core/config_loader.py +18 -42
  8. package/skills/single-work-analysis/scripts/core/progress_report.py +163 -16
  9. package/skills/single-work-analysis/scripts/core/storage_router.py +24 -57
  10. package/skills/single-work-analysis/scripts/core/tikomni_common.py +13 -3
  11. package/skills/single-work-analysis/scripts/pipeline/asr/asr_pipeline.py +154 -7
  12. package/skills/single-work-analysis/scripts/pipeline/asr/poll_u2_task.py +3 -1
  13. package/skills/single-work-analysis/scripts/platform/douyin/run_douyin_single_video.py +243 -44
  14. package/skills/single-work-analysis/scripts/platform/xiaohongshu/run_xiaohongshu_extract.py +263 -25
  15. package/skills/single-work-analysis/scripts/writers/write_benchmark_card.py +244 -894
  16. package/skills/single-work-analysis/references/prompt-contracts/asr-clean.md +0 -28
  17. package/skills/single-work-analysis/references/prompt-contracts/cta.md +0 -24
  18. package/skills/single-work-analysis/references/prompt-contracts/hook.md +0 -25
  19. package/skills/single-work-analysis/references/prompt-contracts/structure.md +0 -25
  20. package/skills/single-work-analysis/references/prompt-contracts/style.md +0 -27
  21. package/skills/single-work-analysis/references/prompt-contracts/summary.md +0 -29
  22. package/skills/single-work-analysis/references/prompt-contracts/topic.md +0 -29
@@ -1,11 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- """Structured stderr progress reporting for long-running TikOmni workflows.
3
-
4
- Design goals:
5
- - emit machine-readable progress events to stderr only
6
- - keep final JSON stdout contract untouched
7
- - offer a tiny shared helper so handlers do not duplicate logging logic
8
- """
2
+ """Structured stderr progress reporting for long-running TikOmni workflows."""
9
3
 
10
4
  from __future__ import annotations
11
5
 
@@ -13,9 +7,95 @@ import json
13
7
  import sys
14
8
  from datetime import datetime, timezone
15
9
  from typing import Any, Dict, Optional
10
+ from urllib.parse import urlparse
16
11
 
17
12
 
18
13
  VALID_EVENTS = {"started", "progress", "done", "failed"}
14
+ _MASKED_TEXT = "<redacted>"
15
+ _MAX_TEXT_PREVIEW = 180
16
+ _MAX_LIST_ITEMS = 8
17
+ _SENSITIVE_KEYS = {
18
+ "api_key",
19
+ "authorization",
20
+ "cookie",
21
+ "cookies",
22
+ "set_cookie",
23
+ "token",
24
+ "xsec_token",
25
+ }
26
+ _LONG_TEXT_KEYS = {
27
+ "asr_clean",
28
+ "asr_raw",
29
+ "prompt",
30
+ "prompt_text",
31
+ "raw_content",
32
+ "stderr",
33
+ "stdout",
34
+ "transcript",
35
+ "transcript_text",
36
+ }
37
+
38
+
39
+ def _looks_like_url(text: str) -> bool:
40
+ return text.startswith("http://") or text.startswith("https://")
41
+
42
+
43
+ def _mask_key(key: str) -> bool:
44
+ lowered = key.lower()
45
+ if lowered in _SENSITIVE_KEYS:
46
+ return True
47
+ return any(token in lowered for token in ("api_key", "token", "cookie", "authorization"))
48
+
49
+
50
+ def _sanitize_url(text: str) -> str:
51
+ try:
52
+ parsed = urlparse(text)
53
+ except Exception:
54
+ return text[:_MAX_TEXT_PREVIEW]
55
+ if not parsed.scheme or not parsed.netloc:
56
+ return text[:_MAX_TEXT_PREVIEW]
57
+ return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
58
+
59
+
60
+ def _truncate_text(text: str) -> str:
61
+ if len(text) <= _MAX_TEXT_PREVIEW:
62
+ return text
63
+ return f"{text[:_MAX_TEXT_PREVIEW]}…(len={len(text)})"
64
+
65
+
66
+ def _sanitize_scalar(value: Any, *, key: str = "") -> Any:
67
+ if value is None or isinstance(value, (bool, int, float)):
68
+ return value
69
+
70
+ text = str(value)
71
+ if _mask_key(key):
72
+ return _MASKED_TEXT
73
+ if key.lower() in _LONG_TEXT_KEYS:
74
+ return f"<redacted:{key}:len={len(text)}>"
75
+ if _looks_like_url(text):
76
+ return _sanitize_url(text)
77
+ return _truncate_text(text)
78
+
79
+
80
+ def _sanitize_payload(value: Any, *, key: str = "") -> Any:
81
+ if isinstance(value, dict):
82
+ sanitized: Dict[str, Any] = {}
83
+ for child_key, child_value in value.items():
84
+ child_key_text = str(child_key)
85
+ if _mask_key(child_key_text):
86
+ sanitized[child_key_text] = _MASKED_TEXT
87
+ continue
88
+ sanitized[child_key_text] = _sanitize_payload(child_value, key=child_key_text)
89
+ return sanitized
90
+
91
+ if isinstance(value, list):
92
+ items = value[:_MAX_LIST_ITEMS]
93
+ sanitized_items = [_sanitize_payload(item, key=key) for item in items]
94
+ if len(value) > _MAX_LIST_ITEMS:
95
+ sanitized_items.append(f"...({len(value) - _MAX_LIST_ITEMS} more)")
96
+ return sanitized_items
97
+
98
+ return _sanitize_scalar(value, key=key)
19
99
 
20
100
 
21
101
  class ProgressReporter:
@@ -36,12 +116,12 @@ class ProgressReporter:
36
116
  self.run_id = str(run_id or f"{self.platform}.{self.content_kind}")
37
117
  self.scope = str(scope or "workflow")
38
118
  self.enabled = bool(enabled)
39
- self.defaults = dict(defaults or {})
119
+ self.defaults = _sanitize_payload(dict(defaults or {}))
40
120
 
41
121
  def child(self, *, scope: str, defaults: Optional[Dict[str, Any]] = None) -> "ProgressReporter":
42
122
  merged = dict(self.defaults)
43
123
  if defaults:
44
- merged.update(defaults)
124
+ merged.update(_sanitize_payload(defaults))
45
125
  return ProgressReporter(
46
126
  workflow=self.workflow,
47
127
  platform=self.platform,
@@ -70,13 +150,12 @@ class ProgressReporter:
70
150
  "stage": str(stage or "unknown"),
71
151
  }
72
152
  if message:
73
- payload["message"] = str(message)
74
- if self.defaults:
75
- payload["data"] = dict(self.defaults)
153
+ payload["message"] = _truncate_text(str(message))
154
+ merged_data = dict(self.defaults)
76
155
  if isinstance(data, dict) and data:
77
- merged = dict(payload.get("data") or {})
78
- merged.update(data)
79
- payload["data"] = merged
156
+ merged_data.update(_sanitize_payload(data))
157
+ if merged_data:
158
+ payload["data"] = merged_data
80
159
  sys.stderr.write(json.dumps(payload, ensure_ascii=False) + "\n")
81
160
  sys.stderr.flush()
82
161
 
@@ -92,6 +171,74 @@ class ProgressReporter:
92
171
  def failed(self, *, stage: str, message: str = "", data: Optional[Dict[str, Any]] = None) -> None:
93
172
  self.emit("failed", stage=stage, message=message, data=data)
94
173
 
174
+ def heartbeat(self, *, stage: str, message: str = "", data: Optional[Dict[str, Any]] = None) -> None:
175
+ heartbeat_data = dict(data or {})
176
+ heartbeat_data["heartbeat"] = True
177
+ self.progress(stage=stage, message=message or "heartbeat", data=heartbeat_data)
178
+
179
+ def http_event(
180
+ self,
181
+ *,
182
+ stage: str,
183
+ endpoint: str,
184
+ response: Optional[Dict[str, Any]],
185
+ summary: Optional[Dict[str, Any]] = None,
186
+ route_label: Optional[str] = None,
187
+ ) -> None:
188
+ payload: Dict[str, Any] = {
189
+ "kind": "http",
190
+ "endpoint": endpoint,
191
+ }
192
+ if route_label:
193
+ payload["route_label"] = route_label
194
+ if isinstance(response, dict):
195
+ payload.update(
196
+ {
197
+ "ok": bool(response.get("ok")),
198
+ "status_code": response.get("status_code"),
199
+ "request_id": response.get("request_id"),
200
+ "attempt": int(response.get("retry_attempt", 0)) + 1,
201
+ "fallback_trigger_reason": response.get("fallback_trigger_reason"),
202
+ "timeout_retry_exhausted": bool(response.get("timeout_retry_exhausted")),
203
+ }
204
+ )
205
+ if isinstance(summary, dict) and summary:
206
+ payload["summary"] = summary
207
+ event_fn = self.done if isinstance(response, dict) and response.get("ok") else self.failed
208
+ event_fn(stage=stage, message="http request finished", data=payload)
209
+
210
+ def subprocess_event(
211
+ self,
212
+ *,
213
+ stage: str,
214
+ provider: str,
215
+ operation: str,
216
+ event: str,
217
+ duration_ms: Optional[int] = None,
218
+ exit_code: Optional[int] = None,
219
+ summary: Optional[Dict[str, Any]] = None,
220
+ ) -> None:
221
+ payload: Dict[str, Any] = {
222
+ "kind": "subprocess",
223
+ "provider": provider,
224
+ "operation": operation,
225
+ }
226
+ if duration_ms is not None:
227
+ payload["duration_ms"] = int(duration_ms)
228
+ if exit_code is not None:
229
+ payload["exit_code"] = int(exit_code)
230
+ if isinstance(summary, dict) and summary:
231
+ payload["summary"] = summary
232
+
233
+ if event == "started":
234
+ self.started(stage=stage, message="subprocess started", data=payload)
235
+ elif event == "done":
236
+ self.done(stage=stage, message="subprocess finished", data=payload)
237
+ elif event == "failed":
238
+ self.failed(stage=stage, message="subprocess failed", data=payload)
239
+ else:
240
+ self.progress(stage=stage, message="subprocess progress", data=payload)
241
+
95
242
 
96
243
  def build_progress_reporter(
97
244
  *,
@@ -107,5 +254,5 @@ def build_progress_reporter(
107
254
  content_kind=content_kind,
108
255
  run_id=f"{platform}.{content_kind}",
109
256
  enabled=enabled,
110
- defaults={"input_value": str(input_value or "")[:240]},
257
+ defaults={"input_value": _sanitize_scalar(str(input_value or ""), key="input_value")},
111
258
  )
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env python3
2
- """Storage routing helpers for benchmark card outputs."""
2
+ """Storage routing helpers for single-work card outputs."""
3
3
 
4
4
  from __future__ import annotations
5
5
 
@@ -8,53 +8,33 @@ import re
8
8
  from pathlib import Path
9
9
  from typing import Any, Dict, List, Optional, Tuple
10
10
 
11
+
11
12
  DEFAULT_CARD_TYPE_ROUTES: Dict[str, Dict[str, Any]] = {
12
13
  "work": {
13
14
  "prefix": "CBV",
14
15
  "parts": ["内容系统", "对标研究", "作品卡"],
15
16
  },
16
- "author": {
17
- "prefix": "CBA",
18
- "parts": ["内容系统", "对标研究", "作者卡"],
19
- },
20
- "author_sample_work": {
21
- "prefix": "CBV",
22
- "parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
23
- },
24
17
  }
25
18
 
26
19
  DEFAULT_CONTENT_KIND_CARD_TYPE: Dict[str, str] = {
27
20
  "single_video": "work",
21
+ "note": "work",
28
22
  "work": "work",
29
- "author_home": "author_sample_work",
30
- "author_sample_work": "author_sample_work",
31
- "author_analysis": "author",
32
- }
33
-
34
- CARD_TYPE_ALIASES: Dict[str, str] = {
35
- "sample": "author_sample_work",
36
- "sample_work": "author_sample_work",
37
- "homepage_sample": "author_sample_work",
38
- "author_homepage_sample": "author_sample_work",
39
- "author_home": "author_sample_work",
40
- "author_analysis": "author",
41
23
  }
42
24
 
43
25
  CONTENT_KIND_ALIASES: Dict[str, str] = {
44
- "author_homepage": "author_home",
45
- "author_homepage_sample": "author_home",
46
- "homepage_sample": "author_home",
47
- "analysis_author": "author_analysis",
26
+ "single-work": "work",
48
27
  }
49
28
 
29
+ DEFAULT_CARD_FILENAME_PATTERN = "{prefix}-{platform}-{author_slug}-{title_slug}{ext}"
30
+ DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
31
+ _INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
32
+ _SPACE_RUN = re.compile(r"\s+")
33
+
50
34
 
51
35
  def normalize_card_type(card_type: str) -> str:
52
36
  normalized = (card_type or "").strip().lower().replace("-", "_")
53
- if normalized in CARD_TYPE_ALIASES:
54
- normalized = CARD_TYPE_ALIASES[normalized]
55
- if normalized in {"work", "author", "author_sample_work"}:
56
- return normalized
57
- return "work"
37
+ return "work" if normalized == "work" else "work"
58
38
 
59
39
 
60
40
  def normalize_content_kind(content_kind: Optional[str]) -> str:
@@ -75,14 +55,13 @@ def _configured_content_kind_map(storage_config: Optional[Dict[str, Any]]) -> Di
75
55
  routes = _storage_routes_cfg(storage_config)
76
56
  configured = routes.get("content_kind_card_type")
77
57
  if not isinstance(configured, dict):
78
- return DEFAULT_CONTENT_KIND_CARD_TYPE
58
+ return dict(DEFAULT_CONTENT_KIND_CARD_TYPE)
79
59
 
80
60
  merged = dict(DEFAULT_CONTENT_KIND_CARD_TYPE)
81
61
  for key, value in configured.items():
82
- k = normalize_content_kind(str(key))
83
- v = normalize_card_type(str(value))
84
- if k:
85
- merged[k] = v
62
+ normalized_key = normalize_content_kind(str(key))
63
+ if normalized_key:
64
+ merged[normalized_key] = normalize_card_type(str(value))
86
65
  return merged
87
66
 
88
67
 
@@ -90,32 +69,24 @@ def _configured_card_routes(storage_config: Optional[Dict[str, Any]]) -> Dict[st
90
69
  routes = _storage_routes_cfg(storage_config)
91
70
  configured = routes.get("card_type_routes")
92
71
  if not isinstance(configured, dict):
93
- return DEFAULT_CARD_TYPE_ROUTES
72
+ return {key: dict(value) for key, value in DEFAULT_CARD_TYPE_ROUTES.items()}
94
73
 
95
- merged: Dict[str, Dict[str, Any]] = {k: dict(v) for k, v in DEFAULT_CARD_TYPE_ROUTES.items()}
74
+ merged = {key: dict(value) for key, value in DEFAULT_CARD_TYPE_ROUTES.items()}
96
75
  for key, value in configured.items():
97
76
  card_type = normalize_card_type(str(key))
98
- if not isinstance(value, dict):
77
+ if card_type != "work" or not isinstance(value, dict):
99
78
  continue
100
-
101
- prefix = value.get("prefix")
102
79
  parts = value.get("parts")
103
- if not isinstance(parts, list) or not all(isinstance(x, str) and x for x in parts):
80
+ if not isinstance(parts, list) or not all(isinstance(item, str) and item for item in parts):
104
81
  continue
105
-
106
- merged[card_type] = {
107
- "prefix": str(prefix) if isinstance(prefix, str) and prefix else merged.get(card_type, {}).get("prefix", ""),
82
+ prefix = str(value.get("prefix") or merged["work"]["prefix"])
83
+ merged["work"] = {
84
+ "prefix": prefix,
108
85
  "parts": parts,
109
86
  }
110
87
  return merged
111
88
 
112
89
 
113
- DEFAULT_CARD_FILENAME_PATTERN = "{prefix}-{author_slug}-{title_slug}{ext}"
114
- DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
115
- _INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
116
- _SPACE_RUN = re.compile(r"\s+")
117
-
118
-
119
90
  def _sanitize_filename_token(value: Any, fallback: str = "item") -> str:
120
91
  text = str(value or "").strip()
121
92
  if not text:
@@ -185,12 +156,8 @@ def resolve_effective_card_type(
185
156
  if not normalized_content_kind:
186
157
  return normalized_card_type
187
158
 
188
- card_type_map = _configured_content_kind_map(storage_config)
189
- mapped = card_type_map.get(normalized_content_kind)
190
- if mapped is not None:
191
- return normalize_card_type(str(mapped))
192
-
193
- return normalized_card_type
159
+ mapped = _configured_content_kind_map(storage_config).get(normalized_content_kind)
160
+ return normalize_card_type(str(mapped)) if mapped is not None else normalized_card_type
194
161
 
195
162
 
196
163
  def render_route_parts(parts: List[str], *, context: Dict[str, str]) -> List[str]:
@@ -232,7 +199,7 @@ def build_card_output_path(
232
199
  directory = os.path.join(card_root, *rendered_parts)
233
200
  os.makedirs(directory, exist_ok=True)
234
201
 
235
- default_filename = f"{prefix}-{author_slug}-{title_slug}.md"
202
+ default_filename = f"{prefix}-{platform}-{author_slug}-{title_slug}.md"
236
203
  filename = render_output_filename(
237
204
  pattern=resolve_card_filename_pattern(storage_config),
238
205
  context={
@@ -69,9 +69,19 @@ def _resolve_env_file_path(env_file: Optional[str]) -> Path:
69
69
  return (skills_root / ".env").resolve()
70
70
 
71
71
  candidate = Path(env_file).expanduser()
72
- if not candidate.is_absolute():
73
- candidate = skills_root / candidate
74
- return candidate.resolve()
72
+ if candidate.is_absolute():
73
+ return candidate.resolve()
74
+
75
+ search_roots = [
76
+ Path.cwd(),
77
+ get_repo_root(),
78
+ skills_root,
79
+ ]
80
+ for root in search_roots:
81
+ resolved = (root / candidate).resolve()
82
+ if resolved.exists():
83
+ return resolved
84
+ return (Path.cwd() / candidate).resolve()
75
85
 
76
86
 
77
87
  def _infer_default_env_paths(primary_env_file: Optional[str]) -> Tuple[Path, Path]: