npm - @tikomni/skills - Versions diffs - 0.1.2 → 0.1.3 - Mend

@tikomni/skills 0.1.2 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/skills/single-work-analysis/scripts/core/progress_report.py CHANGED Viewed

@@ -1,11 +1,5 @@
 #!/usr/bin/env python3
-"""Structured stderr progress reporting for long-running TikOmni workflows.
-Design goals:
-- emit machine-readable progress events to stderr only
-- keep final JSON stdout contract untouched
-- offer a tiny shared helper so handlers do not duplicate logging logic
-"""
+"""Structured stderr progress reporting for long-running TikOmni workflows."""
 from __future__ import annotations
@@ -13,9 +7,95 @@ import json
 import sys
 from datetime import datetime, timezone
 from typing import Any, Dict, Optional
+from urllib.parse import urlparse
 VALID_EVENTS = {"started", "progress", "done", "failed"}
+_MASKED_TEXT = "<redacted>"
+_MAX_TEXT_PREVIEW = 180
+_MAX_LIST_ITEMS = 8
+_SENSITIVE_KEYS = {
+    "api_key",
+    "authorization",
+    "cookie",
+    "cookies",
+    "set_cookie",
+    "token",
+    "xsec_token",
+}
+_LONG_TEXT_KEYS = {
+    "asr_clean",
+    "asr_raw",
+    "prompt",
+    "prompt_text",
+    "raw_content",
+    "stderr",
+    "stdout",
+    "transcript",
+    "transcript_text",
+}
+def _looks_like_url(text: str) -> bool:
+    return text.startswith("http://") or text.startswith("https://")
+def _mask_key(key: str) -> bool:
+    lowered = key.lower()
+    if lowered in _SENSITIVE_KEYS:
+        return True
+    return any(token in lowered for token in ("api_key", "token", "cookie", "authorization"))
+def _sanitize_url(text: str) -> str:
+    try:
+        parsed = urlparse(text)
+    except Exception:
+        return text[:_MAX_TEXT_PREVIEW]
+    if not parsed.scheme or not parsed.netloc:
+        return text[:_MAX_TEXT_PREVIEW]
+    return f"{parsed.scheme}://{parsed.netloc}{parsed.path}"
+def _truncate_text(text: str) -> str:
+    if len(text) <= _MAX_TEXT_PREVIEW:
+        return text
+    return f"{text[:_MAX_TEXT_PREVIEW]}…(len={len(text)})"
+def _sanitize_scalar(value: Any, *, key: str = "") -> Any:
+    if value is None or isinstance(value, (bool, int, float)):
+        return value
+    text = str(value)
+    if _mask_key(key):
+        return _MASKED_TEXT
+    if key.lower() in _LONG_TEXT_KEYS:
+        return f"<redacted:{key}:len={len(text)}>"
+    if _looks_like_url(text):
+        return _sanitize_url(text)
+    return _truncate_text(text)
+def _sanitize_payload(value: Any, *, key: str = "") -> Any:
+    if isinstance(value, dict):
+        sanitized: Dict[str, Any] = {}
+        for child_key, child_value in value.items():
+            child_key_text = str(child_key)
+            if _mask_key(child_key_text):
+                sanitized[child_key_text] = _MASKED_TEXT
+                continue
+            sanitized[child_key_text] = _sanitize_payload(child_value, key=child_key_text)
+        return sanitized
+    if isinstance(value, list):
+        items = value[:_MAX_LIST_ITEMS]
+        sanitized_items = [_sanitize_payload(item, key=key) for item in items]
+        if len(value) > _MAX_LIST_ITEMS:
+            sanitized_items.append(f"...({len(value) - _MAX_LIST_ITEMS} more)")
+        return sanitized_items
+    return _sanitize_scalar(value, key=key)
 class ProgressReporter:
@@ -36,12 +116,12 @@ class ProgressReporter:
         self.run_id = str(run_id or f"{self.platform}.{self.content_kind}")
         self.scope = str(scope or "workflow")
         self.enabled = bool(enabled)
-        self.defaults = dict(defaults or {})
+        self.defaults = _sanitize_payload(dict(defaults or {}))
     def child(self, *, scope: str, defaults: Optional[Dict[str, Any]] = None) -> "ProgressReporter":
         merged = dict(self.defaults)
         if defaults:
-            merged.update(defaults)
+            merged.update(_sanitize_payload(defaults))
         return ProgressReporter(
             workflow=self.workflow,
             platform=self.platform,
@@ -70,13 +150,12 @@ class ProgressReporter:
             "stage": str(stage or "unknown"),
         }
         if message:
-            payload["message"] = str(message)
-        if self.defaults:
-            payload["data"] = dict(self.defaults)
+            payload["message"] = _truncate_text(str(message))
+        merged_data = dict(self.defaults)
         if isinstance(data, dict) and data:
-            merged = dict(payload.get("data") or {})
-            merged.update(data)
-            payload["data"] = merged
+            merged_data.update(_sanitize_payload(data))
+        if merged_data:
+            payload["data"] = merged_data
         sys.stderr.write(json.dumps(payload, ensure_ascii=False) + "\n")
         sys.stderr.flush()
@@ -92,6 +171,74 @@ class ProgressReporter:
     def failed(self, *, stage: str, message: str = "", data: Optional[Dict[str, Any]] = None) -> None:
         self.emit("failed", stage=stage, message=message, data=data)
+    def heartbeat(self, *, stage: str, message: str = "", data: Optional[Dict[str, Any]] = None) -> None:
+        heartbeat_data = dict(data or {})
+        heartbeat_data["heartbeat"] = True
+        self.progress(stage=stage, message=message or "heartbeat", data=heartbeat_data)
+    def http_event(
+        self,
+        *,
+        stage: str,
+        endpoint: str,
+        response: Optional[Dict[str, Any]],
+        summary: Optional[Dict[str, Any]] = None,
+        route_label: Optional[str] = None,
+    ) -> None:
+        payload: Dict[str, Any] = {
+            "kind": "http",
+            "endpoint": endpoint,
+        }
+        if route_label:
+            payload["route_label"] = route_label
+        if isinstance(response, dict):
+            payload.update(
+                {
+                    "ok": bool(response.get("ok")),
+                    "status_code": response.get("status_code"),
+                    "request_id": response.get("request_id"),
+                    "attempt": int(response.get("retry_attempt", 0)) + 1,
+                    "fallback_trigger_reason": response.get("fallback_trigger_reason"),
+                    "timeout_retry_exhausted": bool(response.get("timeout_retry_exhausted")),
+                }
+            )
+        if isinstance(summary, dict) and summary:
+            payload["summary"] = summary
+        event_fn = self.done if isinstance(response, dict) and response.get("ok") else self.failed
+        event_fn(stage=stage, message="http request finished", data=payload)
+    def subprocess_event(
+        self,
+        *,
+        stage: str,
+        provider: str,
+        operation: str,
+        event: str,
+        duration_ms: Optional[int] = None,
+        exit_code: Optional[int] = None,
+        summary: Optional[Dict[str, Any]] = None,
+    ) -> None:
+        payload: Dict[str, Any] = {
+            "kind": "subprocess",
+            "provider": provider,
+            "operation": operation,
+        }
+        if duration_ms is not None:
+            payload["duration_ms"] = int(duration_ms)
+        if exit_code is not None:
+            payload["exit_code"] = int(exit_code)
+        if isinstance(summary, dict) and summary:
+            payload["summary"] = summary
+        if event == "started":
+            self.started(stage=stage, message="subprocess started", data=payload)
+        elif event == "done":
+            self.done(stage=stage, message="subprocess finished", data=payload)
+        elif event == "failed":
+            self.failed(stage=stage, message="subprocess failed", data=payload)
+        else:
+            self.progress(stage=stage, message="subprocess progress", data=payload)
 def build_progress_reporter(
     *,
@@ -107,5 +254,5 @@ def build_progress_reporter(
         content_kind=content_kind,
         run_id=f"{platform}.{content_kind}",
         enabled=enabled,
-        defaults={"input_value": str(input_value or "")[:240]},
+        defaults={"input_value": _sanitize_scalar(str(input_value or ""), key="input_value")},
     )

package/skills/single-work-analysis/scripts/core/storage_router.py CHANGED Viewed

@@ -1,5 +1,5 @@
 #!/usr/bin/env python3
-"""Storage routing helpers for benchmark card outputs."""
+"""Storage routing helpers for single-work card outputs."""
 from __future__ import annotations
@@ -8,53 +8,33 @@ import re
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 DEFAULT_CARD_TYPE_ROUTES: Dict[str, Dict[str, Any]] = {
     "work": {
         "prefix": "CBV",
         "parts": ["内容系统", "对标研究", "作品卡"],
     },
-    "author": {
-        "prefix": "CBA",
-        "parts": ["内容系统", "对标研究", "作者卡"],
-    },
-    "author_sample_work": {
-        "prefix": "CBV",
-        "parts": ["内容系统", "对标研究", "作者样本卡", "{platform}-{author_slug}"],
-    },
 }
 DEFAULT_CONTENT_KIND_CARD_TYPE: Dict[str, str] = {
     "single_video": "work",
+    "note": "work",
     "work": "work",
-    "author_home": "author_sample_work",
-    "author_sample_work": "author_sample_work",
-    "author_analysis": "author",
-}
-CARD_TYPE_ALIASES: Dict[str, str] = {
-    "sample": "author_sample_work",
-    "sample_work": "author_sample_work",
-    "homepage_sample": "author_sample_work",
-    "author_homepage_sample": "author_sample_work",
-    "author_home": "author_sample_work",
-    "author_analysis": "author",
 }
 CONTENT_KIND_ALIASES: Dict[str, str] = {
-    "author_homepage": "author_home",
-    "author_homepage_sample": "author_home",
-    "homepage_sample": "author_home",
-    "analysis_author": "author_analysis",
+    "single-work": "work",
 }
+DEFAULT_CARD_FILENAME_PATTERN = "{prefix}-{platform}-{author_slug}-{title_slug}{ext}"
+DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
+_INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
+_SPACE_RUN = re.compile(r"\s+")
 def normalize_card_type(card_type: str) -> str:
     normalized = (card_type or "").strip().lower().replace("-", "_")
-    if normalized in CARD_TYPE_ALIASES:
-        normalized = CARD_TYPE_ALIASES[normalized]
-    if normalized in {"work", "author", "author_sample_work"}:
-        return normalized
-    return "work"
+    return "work" if normalized == "work" else "work"
 def normalize_content_kind(content_kind: Optional[str]) -> str:
@@ -75,14 +55,13 @@ def _configured_content_kind_map(storage_config: Optional[Dict[str, Any]]) -> Di
     routes = _storage_routes_cfg(storage_config)
     configured = routes.get("content_kind_card_type")
     if not isinstance(configured, dict):
-        return DEFAULT_CONTENT_KIND_CARD_TYPE
+        return dict(DEFAULT_CONTENT_KIND_CARD_TYPE)
     merged = dict(DEFAULT_CONTENT_KIND_CARD_TYPE)
     for key, value in configured.items():
-        k = normalize_content_kind(str(key))
-        v = normalize_card_type(str(value))
-        if k:
-            merged[k] = v
+        normalized_key = normalize_content_kind(str(key))
+        if normalized_key:
+            merged[normalized_key] = normalize_card_type(str(value))
     return merged
@@ -90,32 +69,24 @@ def _configured_card_routes(storage_config: Optional[Dict[str, Any]]) -> Dict[st
     routes = _storage_routes_cfg(storage_config)
     configured = routes.get("card_type_routes")
     if not isinstance(configured, dict):
-        return DEFAULT_CARD_TYPE_ROUTES
+        return {key: dict(value) for key, value in DEFAULT_CARD_TYPE_ROUTES.items()}
-    merged: Dict[str, Dict[str, Any]] = {k: dict(v) for k, v in DEFAULT_CARD_TYPE_ROUTES.items()}
+    merged = {key: dict(value) for key, value in DEFAULT_CARD_TYPE_ROUTES.items()}
     for key, value in configured.items():
         card_type = normalize_card_type(str(key))
-        if not isinstance(value, dict):
+        if card_type != "work" or not isinstance(value, dict):
             continue
-        prefix = value.get("prefix")
         parts = value.get("parts")
-        if not isinstance(parts, list) or not all(isinstance(x, str) and x for x in parts):
+        if not isinstance(parts, list) or not all(isinstance(item, str) and item for item in parts):
             continue
-        merged[card_type] = {
-            "prefix": str(prefix) if isinstance(prefix, str) and prefix else merged.get(card_type, {}).get("prefix", ""),
+        prefix = str(value.get("prefix") or merged["work"]["prefix"])
+        merged["work"] = {
+            "prefix": prefix,
             "parts": parts,
         }
     return merged
-DEFAULT_CARD_FILENAME_PATTERN = "{prefix}-{author_slug}-{title_slug}{ext}"
-DEFAULT_JSON_FILENAME_PATTERN = "{timestamp}-{platform}-{identifier}{ext}"
-_INVALID_FILENAME_CHARS = re.compile(r"[\\\\/:*?\"<>|]+")
-_SPACE_RUN = re.compile(r"\s+")
 def _sanitize_filename_token(value: Any, fallback: str = "item") -> str:
     text = str(value or "").strip()
     if not text:
@@ -185,12 +156,8 @@ def resolve_effective_card_type(
     if not normalized_content_kind:
         return normalized_card_type
-    card_type_map = _configured_content_kind_map(storage_config)
-    mapped = card_type_map.get(normalized_content_kind)
-    if mapped is not None:
-        return normalize_card_type(str(mapped))
-    return normalized_card_type
+    mapped = _configured_content_kind_map(storage_config).get(normalized_content_kind)
+    return normalize_card_type(str(mapped)) if mapped is not None else normalized_card_type
 def render_route_parts(parts: List[str], *, context: Dict[str, str]) -> List[str]:
@@ -232,7 +199,7 @@ def build_card_output_path(
     directory = os.path.join(card_root, *rendered_parts)
     os.makedirs(directory, exist_ok=True)
-    default_filename = f"{prefix}-{author_slug}-{title_slug}.md"
+    default_filename = f"{prefix}-{platform}-{author_slug}-{title_slug}.md"
     filename = render_output_filename(
         pattern=resolve_card_filename_pattern(storage_config),
         context={

package/skills/single-work-analysis/scripts/core/tikomni_common.py CHANGED Viewed

@@ -69,9 +69,19 @@ def _resolve_env_file_path(env_file: Optional[str]) -> Path:
         return (skills_root / ".env").resolve()
     candidate = Path(env_file).expanduser()
-    if not candidate.is_absolute():
-        candidate = skills_root / candidate
-    return candidate.resolve()
+    if candidate.is_absolute():
+        return candidate.resolve()
+    search_roots = [
+        Path.cwd(),
+        get_repo_root(),
+        skills_root,
+    ]
+    for root in search_roots:
+        resolved = (root / candidate).resolve()
+        if resolved.exists():
+            return resolved
+    return (Path.cwd() / candidate).resolve()
 def _infer_default_env_paths(primary_env_file: Optional[str]) -> Tuple[Path, Path]: