@tikomni/skills 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/README.zh-CN.md +3 -3
- package/env.example +2 -2
- package/package.json +2 -2
- package/skills/social-media-crawl/SKILL.md +20 -16
- package/skills/social-media-crawl/agents/openai.yaml +2 -2
- package/skills/social-media-crawl/references/api-catalog/aliases.json +237 -0
- package/skills/social-media-crawl/references/api-catalog/capabilities.json +362 -0
- package/skills/social-media-crawl/references/api-catalog/metadata.json +11 -0
- package/skills/social-media-crawl/references/api-catalog/operations.json +100875 -0
- package/skills/social-media-crawl/references/api-catalog/overrides.json +10 -0
- package/skills/social-media-crawl/references/api-catalog/platforms.json +463 -0
- package/skills/social-media-crawl/references/api-routing-contract.md +73 -0
- package/skills/social-media-crawl/references/contracts/output-envelope.md +1 -1
- package/skills/social-media-crawl/scripts/core/api_catalog.py +104 -0
- package/skills/social-media-crawl/scripts/core/call_tikomni_api.py +269 -0
- package/skills/social-media-crawl/scripts/core/config_loader.py +0 -2
- package/skills/social-media-crawl/scripts/core/resolve_api_endpoint.py +176 -0
- package/skills/social-media-crawl/scripts/run_task.py +96 -0
- package/skills/social-media-crawl/tests/test_api_only_routing.py +130 -0
- package/skills/social-media-crawl/references/guides/generic-mcp-objects.md +0 -40
- package/skills/social-media-crawl/references/mcp-usage-contract.md +0 -40
- package/skills/social-media-crawl/scripts/core/mcp_dispatch.py +0 -161
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# Generic MCP Objects Guide
|
|
2
|
-
|
|
3
|
-
The following objects do not freeze a fine-grained schema in the first release:
|
|
4
|
-
|
|
5
|
-
- Comment threads
|
|
6
|
-
- Search results
|
|
7
|
-
- Ranking pages
|
|
8
|
-
- Livestream rooms
|
|
9
|
-
- Product pages
|
|
10
|
-
|
|
11
|
-
In addition, every platform and object combination that does not match a fixed pipeline falls under this guide, for example:
|
|
12
|
-
|
|
13
|
-
- A single X/Twitter post
|
|
14
|
-
- An X/Twitter thread
|
|
15
|
-
- An X/Twitter long-form post
|
|
16
|
-
- An X/Twitter creator homepage
|
|
17
|
-
- The top N comments from a comment section
|
|
18
|
-
|
|
19
|
-
## Rules
|
|
20
|
-
|
|
21
|
-
- Route these objects through the generic MCP workflow inside this skill.
|
|
22
|
-
- The platform is not limited to the Douyin and Xiaohongshu cases covered by fixed pipelines. If the platform is discoverable in the MCP catalog, try this workflow first.
|
|
23
|
-
- Detect the object first, then use `catalog.search` and `endpoint.describe` to choose the smallest toolchain.
|
|
24
|
-
- Do not jump to browser/CDP only because the platform is not Douyin or Xiaohongshu.
|
|
25
|
-
- Use browser/CDP only when the generic MCP path is unavailable, or when the task explicitly requires page-level interaction that the API cannot satisfy. Explain the reason in the output.
|
|
26
|
-
- The output must satisfy the unified envelope.
|
|
27
|
-
- No card write is required in the first release.
|
|
28
|
-
- Do not fabricate fields only to satisfy schema completeness.
|
|
29
|
-
|
|
30
|
-
## Minimum Deliverable
|
|
31
|
-
|
|
32
|
-
- `object_type`
|
|
33
|
-
- `platform`
|
|
34
|
-
- `input`
|
|
35
|
-
- `normalized`
|
|
36
|
-
- `request_id`
|
|
37
|
-
- `completeness`
|
|
38
|
-
- `missing_fields`
|
|
39
|
-
- `error_reason`
|
|
40
|
-
- `extract_trace`
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
# MCP Usage Contract
|
|
2
|
-
|
|
3
|
-
## Scope
|
|
4
|
-
|
|
5
|
-
- This contract applies to every social-media object that does not match a fixed pipeline, not only Douyin and Xiaohongshu.
|
|
6
|
-
- The currently supported platforms include Douyin, Xiaohongshu, Kuaishou, Bilibili, Weibo, TikTok, YouTube, Instagram, Threads, Twitter/X, Reddit, LinkedIn, WeChat Channels, Official Accounts, Toutiao, Xigua, Zhihu, Lemon8, and Pipixia.
|
|
7
|
-
- Typical objects include X/Twitter posts, threads, long-form posts, creator homepages, comment sections, search results, ranking pages, livestream rooms, and product pages.
|
|
8
|
-
- Fixed pipelines are frozen only for Douyin and Xiaohongshu single-work and creator-home cases. All other supported platform and object combinations should use the generic MCP path defined here.
|
|
9
|
-
|
|
10
|
-
## Fixed Inputs
|
|
11
|
-
|
|
12
|
-
- MCP URL: `https://mcp.tikomni.com/mcp`
|
|
13
|
-
- Auth: `Authorization: Bearer <TIKOMNI_API_KEY>`
|
|
14
|
-
- Do not repeat the API key inside tool parameters.
|
|
15
|
-
|
|
16
|
-
## Required Tool Order
|
|
17
|
-
|
|
18
|
-
1. Detect the platform and object type.
|
|
19
|
-
2. Decide whether a fixed pipeline matches.
|
|
20
|
-
3. If a fixed pipeline matches, run the fixed script directly and do not enter the generic MCP path.
|
|
21
|
-
4. If no fixed pipeline matches:
|
|
22
|
-
- `tools/list`
|
|
23
|
-
- `catalog.search`
|
|
24
|
-
- `endpoint.describe`
|
|
25
|
-
- `api.call`
|
|
26
|
-
5. If video text is required:
|
|
27
|
-
- `u2.submit`
|
|
28
|
-
- `u2.query`
|
|
29
|
-
- Enter the U3 fallback path if the task is still `pending` after 60 seconds.
|
|
30
|
-
6. Use browser/CDP only when the generic MCP path is unavailable or clearly insufficient. Do not skip step 4 and jump straight to browser/CDP.
|
|
31
|
-
|
|
32
|
-
## Output Rules
|
|
33
|
-
|
|
34
|
-
- Keep factual fields separate from derived metadata.
|
|
35
|
-
- The result must include `request_id`.
|
|
36
|
-
- The result must include `completeness`.
|
|
37
|
-
- The result must include `missing_fields`.
|
|
38
|
-
- The result must include `error_reason`.
|
|
39
|
-
- The result must include `extract_trace`.
|
|
40
|
-
- If the flow ends in browser/CDP fallback, `extract_trace` must also include the earlier MCP attempts and the fallback reason.
|
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Minimal MCP HTTP client for social-media-crawl."""
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
import urllib.request
|
|
8
|
-
from dataclasses import dataclass
|
|
9
|
-
from typing import Any, Dict, Optional
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def _parse_response_body(body: bytes) -> Any:
|
|
13
|
-
text = body.decode("utf-8", errors="replace").strip()
|
|
14
|
-
if not text:
|
|
15
|
-
return {}
|
|
16
|
-
if text.startswith("data:"):
|
|
17
|
-
payloads = []
|
|
18
|
-
for line in text.splitlines():
|
|
19
|
-
if not line.startswith("data:"):
|
|
20
|
-
continue
|
|
21
|
-
payload = line.split("data:", 1)[1].strip()
|
|
22
|
-
if payload:
|
|
23
|
-
payloads.append(payload)
|
|
24
|
-
if payloads:
|
|
25
|
-
return json.loads(payloads[-1])
|
|
26
|
-
return json.loads(text)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
@dataclass
|
|
30
|
-
class McpResponse:
|
|
31
|
-
ok: bool
|
|
32
|
-
status_code: int
|
|
33
|
-
data: Any
|
|
34
|
-
session_id: Optional[str]
|
|
35
|
-
error_reason: Optional[str] = None
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class McpHttpClient:
|
|
39
|
-
def __init__(self, *, url: str, api_key: str, timeout_ms: int = 60000) -> None:
|
|
40
|
-
self.url = url.rstrip("/")
|
|
41
|
-
self.api_key = api_key
|
|
42
|
-
self.timeout_ms = timeout_ms
|
|
43
|
-
self.session_id: Optional[str] = None
|
|
44
|
-
self._next_id = 1
|
|
45
|
-
|
|
46
|
-
def _headers(self) -> Dict[str, str]:
|
|
47
|
-
headers = {
|
|
48
|
-
"Authorization": f"Bearer {self.api_key}",
|
|
49
|
-
"Content-Type": "application/json",
|
|
50
|
-
"Accept": "application/json, text/event-stream",
|
|
51
|
-
"User-Agent": "OpenClaw-SocialMediaCrawl/0.1",
|
|
52
|
-
"X-Client-Name": "social-media-crawl",
|
|
53
|
-
"X-Client-Version": "0.1.0",
|
|
54
|
-
}
|
|
55
|
-
if self.session_id:
|
|
56
|
-
headers["mcp-session-id"] = self.session_id
|
|
57
|
-
return headers
|
|
58
|
-
|
|
59
|
-
def _request(self, payload: Dict[str, Any]) -> McpResponse:
|
|
60
|
-
req = urllib.request.Request(
|
|
61
|
-
self.url,
|
|
62
|
-
data=json.dumps(payload).encode("utf-8"),
|
|
63
|
-
headers=self._headers(),
|
|
64
|
-
method="POST",
|
|
65
|
-
)
|
|
66
|
-
try:
|
|
67
|
-
with urllib.request.urlopen(req, timeout=max(self.timeout_ms / 1000.0, 1.0)) as response:
|
|
68
|
-
body = response.read()
|
|
69
|
-
data = _parse_response_body(body)
|
|
70
|
-
session_id = response.headers.get("mcp-session-id") or response.headers.get("Mcp-Session-Id")
|
|
71
|
-
if session_id:
|
|
72
|
-
self.session_id = session_id
|
|
73
|
-
return McpResponse(ok=True, status_code=response.getcode(), data=data, session_id=self.session_id)
|
|
74
|
-
except urllib.error.HTTPError as error:
|
|
75
|
-
body = error.read()
|
|
76
|
-
try:
|
|
77
|
-
data = _parse_response_body(body)
|
|
78
|
-
except Exception:
|
|
79
|
-
data = {"raw": body.decode("utf-8", errors="replace")}
|
|
80
|
-
return McpResponse(
|
|
81
|
-
ok=False,
|
|
82
|
-
status_code=error.code,
|
|
83
|
-
data=data,
|
|
84
|
-
session_id=self.session_id,
|
|
85
|
-
error_reason=f"http_error:{error.code}",
|
|
86
|
-
)
|
|
87
|
-
except Exception as error:
|
|
88
|
-
return McpResponse(
|
|
89
|
-
ok=False,
|
|
90
|
-
status_code=0,
|
|
91
|
-
data={},
|
|
92
|
-
session_id=self.session_id,
|
|
93
|
-
error_reason=f"request_failed:{type(error).__name__}:{error}",
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
def _rpc(self, method: str, params: Optional[Dict[str, Any]] = None, notification: bool = False) -> McpResponse:
|
|
97
|
-
payload: Dict[str, Any] = {"jsonrpc": "2.0", "method": method}
|
|
98
|
-
if not notification:
|
|
99
|
-
payload["id"] = self._next_id
|
|
100
|
-
self._next_id += 1
|
|
101
|
-
if params is not None:
|
|
102
|
-
payload["params"] = params
|
|
103
|
-
return self._request(payload)
|
|
104
|
-
|
|
105
|
-
def initialize(self) -> McpResponse:
|
|
106
|
-
response = self._rpc(
|
|
107
|
-
"initialize",
|
|
108
|
-
{
|
|
109
|
-
"protocolVersion": "2025-03-26",
|
|
110
|
-
"capabilities": {},
|
|
111
|
-
"clientInfo": {"name": "social-media-crawl", "version": "0.1.0"},
|
|
112
|
-
},
|
|
113
|
-
)
|
|
114
|
-
if response.ok:
|
|
115
|
-
self._rpc("notifications/initialized", notification=True)
|
|
116
|
-
return response
|
|
117
|
-
|
|
118
|
-
def tools_list(self) -> McpResponse:
|
|
119
|
-
if not self.session_id:
|
|
120
|
-
init = self.initialize()
|
|
121
|
-
if not init.ok:
|
|
122
|
-
return init
|
|
123
|
-
return self._rpc("tools/list")
|
|
124
|
-
|
|
125
|
-
def tool_call(self, name: str, arguments: Optional[Dict[str, Any]] = None) -> McpResponse:
|
|
126
|
-
if not self.session_id:
|
|
127
|
-
init = self.initialize()
|
|
128
|
-
if not init.ok:
|
|
129
|
-
return init
|
|
130
|
-
return self._rpc("tools/call", {"name": name, "arguments": arguments or {}})
|
|
131
|
-
|
|
132
|
-
def catalog_search(self, query: str) -> McpResponse:
|
|
133
|
-
return self.tool_call("catalog.search", {"query": query})
|
|
134
|
-
|
|
135
|
-
def endpoint_describe(self, method: str, path: str) -> McpResponse:
|
|
136
|
-
return self.tool_call("endpoint.describe", {"method": method, "path": path})
|
|
137
|
-
|
|
138
|
-
def api_call(
|
|
139
|
-
self,
|
|
140
|
-
method: str,
|
|
141
|
-
path: str,
|
|
142
|
-
query: Optional[Dict[str, Any]] = None,
|
|
143
|
-
headers: Optional[Dict[str, Any]] = None,
|
|
144
|
-
body: Optional[Any] = None,
|
|
145
|
-
) -> McpResponse:
|
|
146
|
-
return self.tool_call(
|
|
147
|
-
"api.call",
|
|
148
|
-
{
|
|
149
|
-
"method": method,
|
|
150
|
-
"path": path,
|
|
151
|
-
"query": query or {},
|
|
152
|
-
"headers": headers or {},
|
|
153
|
-
"body": {} if body is None else body,
|
|
154
|
-
},
|
|
155
|
-
)
|
|
156
|
-
|
|
157
|
-
def u2_submit(self, file_url: str) -> McpResponse:
|
|
158
|
-
return self.tool_call("u2.submit", {"file_url": file_url})
|
|
159
|
-
|
|
160
|
-
def u2_query(self, task_id: str) -> McpResponse:
|
|
161
|
-
return self.tool_call("u2.query", {"task_id": task_id})
|