sophhub 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/bin/sophhub.js +21 -0
  2. package/package.json +32 -0
  3. package/skills/VERSIONS.md +27 -0
  4. package/skills/builtin/clawhub/SKILL.md +77 -0
  5. package/skills/builtin/flight-booking/SKILL.md +288 -0
  6. package/skills/builtin/flight-booking/scripts/flight_booking.py +1232 -0
  7. package/skills/builtin/inventory-management/SKILL.md +241 -0
  8. package/skills/builtin/inventory-management/scripts/inventory.py +1844 -0
  9. package/skills/builtin/schedule-reminder/SKILL.md +619 -0
  10. package/skills/builtin/schedule-reminder/schedule_template.md +68 -0
  11. package/skills/builtin/schedule-reminder/scripts/append_event.py +204 -0
  12. package/skills/builtin/schedule-reminder/scripts/create_reminders.sh +163 -0
  13. package/skills/builtin/schedule-reminder/scripts/daily_activate.sh +175 -0
  14. package/skills/builtin/schedule-reminder/scripts/parse_schedule.py +704 -0
  15. package/skills/builtin/schedule-reminder/scripts/setup.sh +242 -0
  16. package/skills/builtin/schedule-reminder//347/224/250/346/210/267/346/214/207/345/215/227.md +311 -0
  17. package/skills/builtin/skill-creator/SKILL.md +370 -0
  18. package/skills/builtin/skill-creator/license.txt +202 -0
  19. package/skills/builtin/skill-creator/scripts/init_skill.py +378 -0
  20. package/skills/builtin/skill-creator/scripts/package_skill.py +111 -0
  21. package/skills/builtin/skill-creator/scripts/quick_validate.py +101 -0
  22. package/skills/builtin/sophnet-customer-management/SKILL.md +271 -0
  23. package/skills/builtin/sophnet-customer-management/pyproject.toml +15 -0
  24. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/__init__.py +2 -0
  25. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/__main__.py +5 -0
  26. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/cli.py +67 -0
  27. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/__init__.py +2 -0
  28. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/customer.py +60 -0
  29. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/export_file.py +18 -0
  30. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/import_file.py +15 -0
  31. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/reminder.py +26 -0
  32. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/commands/schema.py +28 -0
  33. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_cli/config.py +54 -0
  34. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/__init__.py +2 -0
  35. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/exporter.py +85 -0
  36. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/models.py +84 -0
  37. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/normalizer.py +144 -0
  38. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/parser.py +241 -0
  39. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/query.py +109 -0
  40. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/reminder.py +121 -0
  41. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/repository.py +397 -0
  42. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/schema.py +106 -0
  43. package/skills/builtin/sophnet-customer-management/src/customer_mgmt_core/service.py +565 -0
  44. package/skills/builtin/sophnet-customer-management/uv.lock +48 -0
  45. package/skills/builtin/sophnet-customized-marketing/SKILL.md +144 -0
  46. package/skills/builtin/sophnet-customized-marketing/playbooks/campaign-planning.md +187 -0
  47. package/skills/builtin/sophnet-customized-marketing/playbooks/content-generation.md +124 -0
  48. package/skills/builtin/sophnet-customized-marketing/playbooks/marketing-calendar.md +59 -0
  49. package/skills/builtin/sophnet-customized-marketing/playbooks/multi-channel-bundle.md +94 -0
  50. package/skills/builtin/sophnet-customized-marketing/playbooks/poster-generation.md +182 -0
  51. package/skills/builtin/sophnet-customized-marketing/playbooks/style-profile-workflow.md +103 -0
  52. package/skills/builtin/sophnet-customized-marketing/pyproject.toml +9 -0
  53. package/skills/builtin/sophnet-customized-marketing/references/campaign-mechanics.md +168 -0
  54. package/skills/builtin/sophnet-customized-marketing/references/content-safety.md +26 -0
  55. package/skills/builtin/sophnet-customized-marketing/references/marketing-date-checklist.md +99 -0
  56. package/skills/builtin/sophnet-customized-marketing/references/platform-writing-guidelines.md +88 -0
  57. package/skills/builtin/sophnet-customized-marketing/references/quality-checklist.md +44 -0
  58. package/skills/builtin/sophnet-customized-marketing/scripts/generate_poster.py +585 -0
  59. package/skills/builtin/sophnet-customized-marketing/scripts/style_profile.py +215 -0
  60. package/skills/builtin/sophnet-face-search/SKILL.md +115 -0
  61. package/skills/builtin/sophnet-face-search/pyproject.toml +11 -0
  62. package/skills/builtin/sophnet-face-search/scripts/face_search.py +336 -0
  63. package/skills/builtin/sophnet-face-search/uv.lock +508 -0
  64. package/skills/builtin/sophnet-image-edit/SKILL.md +140 -0
  65. package/skills/builtin/sophnet-image-edit/pyproject.toml +9 -0
  66. package/skills/builtin/sophnet-image-edit/scripts/edit_and_preview.sh +68 -0
  67. package/skills/builtin/sophnet-image-edit/scripts/edit_image.py +279 -0
  68. package/skills/builtin/sophnet-image-edit/uv.lock +234 -0
  69. package/skills/builtin/sophnet-image-generate/SKILL.md +62 -0
  70. package/skills/builtin/sophnet-image-generate/pyproject.toml +9 -0
  71. package/skills/builtin/sophnet-image-generate/scripts/generate_image.py +156 -0
  72. package/skills/builtin/sophnet-image-generate/uv.lock +234 -0
  73. package/skills/builtin/sophnet-image-ocr/SKILL.md +167 -0
  74. package/skills/builtin/sophnet-image-ocr/pyproject.toml +13 -0
  75. package/skills/builtin/sophnet-image-ocr/scripts/ocr.py +226 -0
  76. package/skills/builtin/sophnet-image-ocr/uv.lock +234 -0
  77. package/skills/builtin/sophnet-infinite-talk/SKILL.md +140 -0
  78. package/skills/builtin/sophnet-infinite-talk/pyproject.toml +9 -0
  79. package/skills/builtin/sophnet-infinite-talk/scripts/gen.py +172 -0
  80. package/skills/builtin/sophnet-oss/SKILL.md +109 -0
  81. package/skills/builtin/sophnet-oss/pyproject.toml +8 -0
  82. package/skills/builtin/sophnet-oss/scripts/upload_file.py +43 -0
  83. package/skills/builtin/sophnet-qa-install/SKILL.md +210 -0
  84. package/skills/builtin/sophnet-qa-install/pyproject.toml +6 -0
  85. package/skills/builtin/sophnet-qa-install/scripts/backup_md.py +35 -0
  86. package/skills/builtin/sophnet-qa-install/scripts/check_installed.py +143 -0
  87. package/skills/builtin/sophnet-qa-install/scripts/update_config.py +142 -0
  88. package/skills/builtin/sophnet-qa-install/scripts/update_md.py +73 -0
  89. package/skills/builtin/sophnet-training-install/SKILL.md +211 -0
  90. package/skills/builtin/sophnet-training-install/pyproject.toml +6 -0
  91. package/skills/builtin/sophnet-training-install/scripts/backup_md.py +35 -0
  92. package/skills/builtin/sophnet-training-install/scripts/check_installed.py +144 -0
  93. package/skills/builtin/sophnet-training-install/scripts/update_config.py +142 -0
  94. package/skills/builtin/sophnet-training-install/scripts/update_md.py +73 -0
  95. package/skills/builtin/sophnet-tts/SKILL.md +79 -0
  96. package/skills/builtin/sophnet-tts/pyproject.toml +9 -0
  97. package/skills/builtin/sophnet-tts/scripts/gen_tts.py +130 -0
  98. package/skills/builtin/sophnet-video-generate/SKILL.md +116 -0
  99. package/skills/builtin/sophnet-video-generate/scripts/gen_video.py +304 -0
  100. package/skills/builtin/video-understand/SKILL.md +79 -0
  101. package/skills/builtin/video-understand/scripts/video_understand.py +204 -0
  102. package/skills/builtin/weather/SKILL.md +112 -0
  103. package/skills/builtin/web-scraper/SKILL.md +101 -0
  104. package/skills/builtin/web-scraper/scripts/scrape.py +270 -0
  105. package/skills/builtin/website-builder/SKILL.md +266 -0
  106. package/skills/builtin/website-builder/scripts/deploy_site.sh +46 -0
  107. package/skills/store/didi-ride/SKILL.md +309 -0
  108. package/skills/store/didi-ride/_meta.json +6 -0
  109. package/skills/store/didi-ride/assets/PREFERENCE.md +58 -0
  110. package/skills/store/didi-ride/package.json +15 -0
  111. package/skills/store/didi-ride/references/api_references.md +171 -0
  112. package/skills/store/didi-ride/references/error_handling.md +68 -0
  113. package/skills/store/didi-ride/references/setup.md +73 -0
  114. package/skills/store/didi-ride/references/workflow.md +150 -0
  115. package/skills/store/flyai/SKILL.md +119 -0
  116. package/skills/store/flyai/references/fliggy-fast-search.md +53 -0
  117. package/skills/store/flyai/references/search-flight.md +89 -0
  118. package/skills/store/flyai/references/search-hotels.md +57 -0
  119. package/skills/store/flyai/references/search-poi.md +49 -0
  120. package/src/commands/download.js +103 -0
  121. package/src/commands/list.js +67 -0
  122. package/src/utils/config.js +24 -0
  123. package/src/utils/gitlab.js +67 -0
  124. package/src/utils/paths.js +19 -0
  125. package/src/utils/versions.js +38 -0
@@ -0,0 +1,101 @@
1
+ ---
2
+ name: web-scraper
3
+ description: Scrape web pages on the server side. Supports static HTTP scraping (curl) and dynamic JavaScript rendering (Chrome print-to-pdf + PyMuPDF). Zero extra pip dependencies. Use when web_fetch returns empty or minimal content.
4
+ metadata: { "openclaw": { "emoji": "🕸️", "requires": { "bins": ["python3", "curl"] } } }
5
+ ---
6
+
7
+ # Web Scraper
8
+
9
+ Server-side web page scraping with two modes. **No extra pip packages needed** -- uses only curl, Chrome, and PyMuPDF (already in the Docker image).
10
+
11
+ ## When to Use
12
+
13
+ - `web_fetch` returned empty or very short content (common with SPA/JS-heavy sites)
14
+ - The user explicitly asks to scrape or crawl a URL
15
+
16
+ ## Quick Start
17
+
18
+ ```bash
19
+ # Static scrape (fast, curl + text extraction, works for most pages)
20
+ python3 {baseDir}/scripts/scrape.py "https://example.com"
21
+
22
+ # Dynamic scrape (Chrome renders JS, saves PDF, PyMuPDF extracts text)
23
+ python3 {baseDir}/scripts/scrape.py "https://www.bitmain.com/" --mode dynamic
24
+ ```
25
+
26
+ ## Options
27
+
28
+ | Option | Default | Description |
29
+ | ------------------------ | -------- | ---------------------------------------------------------------------------------- |
30
+ | `--mode static\|dynamic` | `static` | `static`: curl fetch + regex text extract. `dynamic`: Chrome PDF render + PyMuPDF. |
31
+ | `--timeout <seconds>` | `15` | Request/render timeout |
32
+ | `--max-chars <n>` | `50000` | Truncate output beyond this length |
33
+
34
+ ## Decision Logic
35
+
36
+ 1. **Try static mode first** (fast, < 2 seconds).
37
+ 2. If the result `text` is very short (< 200 characters) or empty, the page likely needs JavaScript.
38
+ 3. **Retry with `--mode dynamic`** to render JavaScript via headless Chrome.
39
+ 4. If dynamic mode fails (no Chrome), report the error to the user.
40
+
41
+ ## Output Format
42
+
43
+ JSON to stdout:
44
+
45
+ ```json
46
+ {
47
+ "status": "ok",
48
+ "mode": "dynamic",
49
+ "url": "https://www.bitmain.com/",
50
+ "title": "BITMAIN",
51
+ "text": "BITMAIN\nProducts\nBitcoin Miner S23 Hyd...",
52
+ "length": 1779
53
+ }
54
+ ```
55
+
56
+ ## Modes Explained
57
+
58
+ ### Static Mode (`--mode static`)
59
+
60
+ - Uses `curl` to fetch raw HTML
61
+ - Regex-based tag stripping for text extraction
62
+ - Fast (< 2 seconds), no Python dependencies needed
63
+ - Does NOT execute JavaScript
64
+ - Good for: documentation, blogs, news, static content
65
+
66
+ ### Dynamic Mode (`--mode dynamic`)
67
+
68
+ - Chrome `--headless=new --print-to-pdf` renders the page including all JavaScript
69
+ - PyMuPDF (`fitz`) extracts text from the resulting PDF
70
+ - Slower (15-40 seconds) but captures dynamically generated content
71
+ - Requires `google-chrome` or `chromium` on PATH
72
+ - Good for: SPA (React/Vue/Angular), JS-rendered dashboards
73
+
74
+ ## Examples
75
+
76
+ ```bash
77
+ # Basic static scrape
78
+ python3 {baseDir}/scripts/scrape.py "https://docs.python.org/3/tutorial/index.html"
79
+
80
+ # SPA page (needs JavaScript rendering)
81
+ python3 {baseDir}/scripts/scrape.py "https://www.bitmain.com/" --mode dynamic
82
+
83
+ # Longer timeout for slow pages
84
+ python3 {baseDir}/scripts/scrape.py "https://slow-site.example.com" --mode dynamic --timeout 30
85
+ ```
86
+
87
+ ## Dependencies
88
+
89
+ | Tool | Required For | Already Installed |
90
+ | ---------------------------- | ------------------------ | ---------------------- |
91
+ | `python3` | Script runtime | Yes (Docker image) |
92
+ | `curl` | Static mode | Yes (Docker image) |
93
+ | `google-chrome` / `chromium` | Dynamic mode only | Depends on image |
94
+ | `PyMuPDF` (fitz) | Dynamic mode PDF reading | Yes (requirements.txt) |
95
+ | `pdfminer` | Fallback PDF reading | Yes (requirements.txt) |
96
+
97
+ ## Security
98
+
99
+ - Only `http://` and `https://` URLs are allowed
100
+ - Requests to `localhost`, private IPs, and reserved addresses are blocked (SSRF protection)
101
+ - Output is truncated at `--max-chars` to prevent excessive memory usage
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Web scraper via headless Chrome print-to-pdf + PyMuPDF text extraction.
4
+ No requests/beautifulsoup4 needed -- only uses packages already in the Docker image.
5
+ Outputs JSON to stdout for agent consumption.
6
+ """
7
+
8
+ import argparse
9
+ import ipaddress
10
+ import json
11
+ import os
12
+ import shutil
13
+ import subprocess
14
+ import sys
15
+ import tempfile
16
+ from typing import Dict, Optional
17
+ from urllib.parse import urlparse
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # SSRF guard
21
+ # ---------------------------------------------------------------------------
22
+
23
+ BLOCKED_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "[::1]", "metadata.google.internal"}
24
+
25
+
26
+ def is_private_ip(hostname):
27
+ # type: (str) -> bool
28
+ try:
29
+ addr = ipaddress.ip_address(hostname)
30
+ return addr.is_private or addr.is_loopback or addr.is_link_local or addr.is_reserved
31
+ except ValueError:
32
+ return False
33
+
34
+
35
+ def validate_url(url):
36
+ # type: (str) -> Optional[str]
37
+ parsed = urlparse(url)
38
+ if parsed.scheme not in ("http", "https"):
39
+ return "Unsupported scheme: %s. Only http/https allowed." % parsed.scheme
40
+ hostname = parsed.hostname or ""
41
+ if hostname in BLOCKED_HOSTS:
42
+ return "Blocked host: %s" % hostname
43
+ if is_private_ip(hostname):
44
+ return "Private/reserved IP blocked: %s" % hostname
45
+ return None
46
+
47
+
48
+ # ---------------------------------------------------------------------------
49
+ # Chrome binary detection
50
+ # ---------------------------------------------------------------------------
51
+
52
+ CHROME_CANDIDATES = ["google-chrome", "google-chrome-stable", "chromium", "chromium-browser"]
53
+
54
+
55
+ def find_chrome():
56
+ # type: () -> Optional[str]
57
+ for name in CHROME_CANDIDATES:
58
+ path = shutil.which(name)
59
+ if path:
60
+ return path
61
+ return None
62
+
63
+
64
+ # ---------------------------------------------------------------------------
65
+ # Static scrape via curl (no extra deps)
66
+ # ---------------------------------------------------------------------------
67
+
68
+ def scrape_static(url, timeout):
69
+ # type: (str, int) -> Dict
70
+ """Lightweight fetch using curl + basic text extraction."""
71
+ curl = shutil.which("curl")
72
+ if not curl:
73
+ return {"status": "error", "mode": "static", "url": url,
74
+ "error": "curl not found on PATH"}
75
+
76
+ try:
77
+ result = subprocess.run(
78
+ [curl, "-sL", "--max-time", str(timeout),
79
+ "-H", "User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/137.0",
80
+ "-H", "Accept: text/html,*/*;q=0.8",
81
+ url],
82
+ capture_output=True, text=True, timeout=timeout + 5,
83
+ )
84
+ except subprocess.TimeoutExpired:
85
+ return {"status": "error", "mode": "static", "url": url,
86
+ "error": "curl timed out after %ds" % timeout}
87
+
88
+ if result.returncode != 0:
89
+ return {"status": "error", "mode": "static", "url": url,
90
+ "error": "curl failed (code %d): %s" % (result.returncode, result.stderr[:300])}
91
+
92
+ html = result.stdout
93
+ text = _extract_text_from_html(html)
94
+ title = _extract_title_from_html(html)
95
+
96
+ return {
97
+ "status": "ok",
98
+ "mode": "static",
99
+ "url": url,
100
+ "title": title,
101
+ "text": text,
102
+ "length": len(text),
103
+ }
104
+
105
+
106
+ def _extract_title_from_html(html):
107
+ # type: (str) -> str
108
+ """Extract <title> content without bs4."""
109
+ import re
110
+ m = re.search(r"<title[^>]*>(.*?)</title>", html, re.IGNORECASE | re.DOTALL)
111
+ return m.group(1).strip() if m else ""
112
+
113
+
114
+ def _extract_text_from_html(html):
115
+ # type: (str) -> str
116
+ """Rough HTML-to-text without bs4: strip tags, collapse whitespace."""
117
+ import re
118
+ text = re.sub(r"<script[^>]*>.*?</script>", "", html, flags=re.DOTALL | re.IGNORECASE)
119
+ text = re.sub(r"<style[^>]*>.*?</style>", "", text, flags=re.DOTALL | re.IGNORECASE)
120
+ text = re.sub(r"<[^>]+>", "\n", text)
121
+ text = re.sub(r"&nbsp;", " ", text)
122
+ text = re.sub(r"&amp;", "&", text)
123
+ text = re.sub(r"&lt;", "<", text)
124
+ text = re.sub(r"&gt;", ">", text)
125
+ text = re.sub(r"&#\d+;", "", text)
126
+ text = re.sub(r"&\w+;", "", text)
127
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
128
+ return "\n".join(lines)
129
+
130
+
131
+ # ---------------------------------------------------------------------------
132
+ # Dynamic scrape: Chrome --print-to-pdf + PyMuPDF
133
+ # ---------------------------------------------------------------------------
134
+
135
+ def scrape_dynamic(url, timeout):
136
+ # type: (str, int) -> Dict
137
+ chrome = find_chrome()
138
+ if not chrome:
139
+ return {
140
+ "status": "error", "mode": "dynamic", "url": url,
141
+ "error": "No Chrome/Chromium found. Searched: %s. Use --mode static instead."
142
+ % ", ".join(CHROME_CANDIDATES),
143
+ }
144
+
145
+ tmpdir = tempfile.mkdtemp(prefix="web-scraper-")
146
+ pdf_path = os.path.join(tmpdir, "page.pdf")
147
+
148
+ try:
149
+ js_budget_ms = max(5000, timeout * 1000)
150
+ cmd = [
151
+ chrome,
152
+ "--headless=new",
153
+ "--print-to-pdf=" + pdf_path,
154
+ "--no-sandbox",
155
+ "--disable-gpu",
156
+ "--disable-software-rasterizer",
157
+ "--disable-dev-shm-usage",
158
+ "--no-pdf-header-footer",
159
+ "--virtual-time-budget=%d" % js_budget_ms,
160
+ url,
161
+ ]
162
+
163
+ try:
164
+ subprocess.run(
165
+ cmd, capture_output=True, text=True,
166
+ timeout=timeout + 30,
167
+ )
168
+ except subprocess.TimeoutExpired:
169
+ return {"status": "error", "mode": "dynamic", "url": url,
170
+ "error": "Chrome timed out after %ds" % timeout}
171
+
172
+ if not os.path.exists(pdf_path) or os.path.getsize(pdf_path) == 0:
173
+ return {"status": "error", "mode": "dynamic", "url": url,
174
+ "error": "Chrome did not produce a PDF file"}
175
+
176
+ text = _extract_text_from_pdf(pdf_path)
177
+ title = _guess_title(text)
178
+
179
+ return {
180
+ "status": "ok",
181
+ "mode": "dynamic",
182
+ "url": url,
183
+ "title": title,
184
+ "text": text,
185
+ "length": len(text),
186
+ }
187
+ finally:
188
+ try:
189
+ shutil.rmtree(tmpdir, ignore_errors=True)
190
+ except Exception:
191
+ pass
192
+
193
+
194
+ def _extract_text_from_pdf(pdf_path):
195
+ # type: (str) -> str
196
+ """Extract text from PDF using PyMuPDF (already in requirements.txt)."""
197
+ try:
198
+ import fitz # PyMuPDF
199
+ except ImportError:
200
+ return _extract_text_from_pdf_fallback(pdf_path)
201
+
202
+ doc = fitz.open(pdf_path)
203
+ pages = []
204
+ for page in doc:
205
+ pages.append(page.get_text())
206
+ doc.close()
207
+ text = "\n".join(pages).strip()
208
+ lines = [line.strip() for line in text.split("\n") if line.strip()]
209
+ return "\n".join(lines)
210
+
211
+
212
+ def _extract_text_from_pdf_fallback(pdf_path):
213
+ # type: (str) -> str
214
+ """Fallback: try pdfminer if PyMuPDF is not available."""
215
+ try:
216
+ from pdfminer.high_level import extract_text
217
+ return extract_text(pdf_path).strip()
218
+ except ImportError:
219
+ return "[error] Neither PyMuPDF nor pdfminer available. Install with: pip3 install --break-system-packages PyMuPDF"
220
+
221
+
222
+ def _guess_title(text):
223
+ # type: (str) -> str
224
+ """Use the first non-empty line as title."""
225
+ for line in text.split("\n"):
226
+ stripped = line.strip()
227
+ if stripped and len(stripped) < 200:
228
+ return stripped
229
+ return ""
230
+
231
+
232
+ # ---------------------------------------------------------------------------
233
+ # Main
234
+ # ---------------------------------------------------------------------------
235
+
236
+ def main():
237
+ parser = argparse.ArgumentParser(description="Web scraper for OpenClaw agents")
238
+ parser.add_argument("url", help="URL to scrape (http/https only)")
239
+ parser.add_argument("--mode", choices=["static", "dynamic"], default="static",
240
+ help="static: curl + text extract; dynamic: Chrome PDF + PyMuPDF (default: static)")
241
+ parser.add_argument("--timeout", type=int, default=15,
242
+ help="Request timeout in seconds (default: 15)")
243
+ parser.add_argument("--max-chars", type=int, default=50000,
244
+ help="Max output characters (default: 50000)")
245
+ args = parser.parse_args()
246
+
247
+ err = validate_url(args.url)
248
+ if err:
249
+ print(json.dumps({"status": "error", "url": args.url, "error": err}, ensure_ascii=False))
250
+ sys.exit(1)
251
+
252
+ try:
253
+ if args.mode == "static":
254
+ result = scrape_static(args.url, args.timeout)
255
+ else:
256
+ result = scrape_dynamic(args.url, args.timeout)
257
+ except Exception as e:
258
+ result = {"status": "error", "url": args.url, "mode": args.mode,
259
+ "error": "%s: %s" % (type(e).__name__, e)}
260
+
261
+ if result.get("status") == "ok" and len(result.get("text", "")) > args.max_chars:
262
+ result["text"] = result["text"][:args.max_chars] + "\n\n... [truncated]"
263
+ result["truncated"] = True
264
+ result["length"] = len(result["text"])
265
+
266
+ print(json.dumps(result, ensure_ascii=False, indent=2))
267
+
268
+
269
+ if __name__ == "__main__":
270
+ main()
@@ -0,0 +1,266 @@
1
+ ---
2
+ name: website-builder
3
+ description: >
4
+ 一键建站助手:根据用户需求生成完整网站(HTML/CSS/JS),自动部署到 moltbot 内置静态托管,
5
+ 立即给出可公开访问的链接(http://<host>:18789/canvas/<site-name>/)。
6
+ 支持:上传截图/UI图进行像素级复刻、多轮自然语言修改、响应式设计、多页面网站。
7
+ 触发场景:用户说"帮我做一个网站"、"做一个landing page"、"做一个作品集"、"复刻这个界面"、
8
+ "帮我建一个xxx网站"等。
9
+ metadata: { "openclaw": { "emoji": "🌐", "requires": { "bins": ["node"] } } }
10
+ ---
11
+
12
+ ## 加载时说明
13
+
14
+ 当用户启用此 skill 时,发送以下欢迎语:
15
+
16
+ > 🌐 **建站助手已就绪!**
17
+ >
18
+ > 我可以帮你:
19
+ >
20
+ > - 用一句话描述需求,自动生成并部署网站
21
+ > - 上传截图 / UI 图,像素级复刻
22
+ > - 多轮对话实时修改,所见即所得
23
+ >
24
+ > 生成后你会收到一个可直接访问的链接,无需额外配置。
25
+ >
26
+ > 你想做什么样的网站?
27
+
28
+ ---
29
+
30
+ # 建站助手 (Website Builder)
31
+
32
+ 根据需求生成完整网站,部署到 moltbot gateway 的 `/canvas/` 静态路由,立即可访问。
33
+
34
+ ---
35
+
36
+ ## 一、访问路径说明
37
+
38
+ 网站文件存放在:
39
+
40
+ ```
41
+ <workspace>/canvas/<site-name>/index.html
42
+ ```
43
+
44
+ 访问地址:
45
+
46
+ ```
47
+ http://<moltbot-host>:18789/canvas/<site-name>/
48
+ ```
49
+
50
+ 其中 `<moltbot-host>` 是运行 moltbot 的服务器 IP 或域名(如 `192.168.1.100`)。
51
+
52
+ ---
53
+
54
+ ## 二、工作流程
55
+
56
+ ### Step 1:理解需求,输出确认卡片
57
+
58
+ 收到建站需求后,先展示理解摘要让用户确认,**不要直接开始写代码**:
59
+
60
+ ```
61
+ 🌐 建站方案确认
62
+
63
+ - 网站类型:个人作品集
64
+ - 风格:极简黑白
65
+ - 页面:首页、作品、关于、联系
66
+ - 技术栈:纯 HTML + CSS + JS(无需构建)
67
+ - 站点名称(URL路径):portfolio
68
+ - 访问地址:http://<your-ip>:18789/canvas/portfolio/
69
+
70
+ 确认开始生成?
71
+ ```
72
+
73
+ ### Step 2:生成网站文件
74
+
75
+ 用户确认后,调用部署脚本:
76
+
77
+ ```bash
78
+ bash /app/skills/website-builder/scripts/deploy_site.sh \
79
+ --name "<site-name>" \
80
+ --workspace "<workspace-dir>"
81
+ ```
82
+
83
+ 脚本负责:
84
+
85
+ 1. 创建 `<workspace>/canvas/<site-name>/` 目录
86
+ 2. Agent 将生成的 HTML/CSS/JS 文件写入该目录
87
+ 3. 输出访问链接
88
+
89
+ #### 生成代码的要求
90
+
91
+ - **单文件优先**:能写在一个 `index.html` 里就不拆分(内联 CSS + JS),更易修改
92
+ - **多页面**:每个页面一个 HTML 文件(`about.html`、`works.html` 等),共享一个 `style.css`
93
+ - **响应式**:必须适配手机和桌面
94
+ - **无外部依赖**:不引用 CDN,所有样式和脚本内联或本地,确保离线可用
95
+ - **现代设计**:使用 CSS Grid / Flexbox,避免过时的表格布局
96
+
97
+ ### Step 3:写入文件
98
+
99
+ 使用 exec 工具直接写文件到 canvas 目录:
100
+
101
+ ```python
102
+ # 写入 index.html
103
+ with open('<workspace>/canvas/<site-name>/index.html', 'w') as f:
104
+ f.write('''...生成的HTML内容...''')
105
+ ```
106
+
107
+ ### Step 4:输出访问链接
108
+
109
+ ```
110
+ ✅ 网站已部署!
111
+
112
+ 🔗 访问链接:http://<your-ip>:18789/canvas/<site-name>/
113
+
114
+ 📁 文件位置:<workspace>/canvas/<site-name>/
115
+ ├── index.html
116
+ ├── about.html(如有)
117
+ └── style.css(如有)
118
+
119
+ 需要修改?直接告诉我(如"把导航栏改成深色"、"加一个联系表单")。
120
+ ```
121
+
122
+ ---
123
+
124
+ ## 三、获取 moltbot 访问地址
125
+
126
+ Agent 需要告知用户正确的访问地址。通过以下方式获取 host:
127
+
128
+ ```bash
129
+ # 获取容器内可用的网络地址
130
+ hostname -I 2>/dev/null | awk '{print $1}'
131
+ ```
132
+
133
+ 或直接提示用户:
134
+
135
+ > 请将 `<your-ip>` 替换为你访问 moltbot 的 IP 地址(就是你打开 moltbot 界面时用的那个 IP)。
136
+
137
+ ---
138
+
139
+ ## 四、部署脚本
140
+
141
+ ```bash
142
+ bash /app/skills/website-builder/scripts/deploy_site.sh --name <site-name> --workspace <workspace-dir>
143
+ ```
144
+
145
+ 脚本仅负责创建目录,文件由 agent 用 exec 写入。
146
+
147
+ ---
148
+
149
+ ## 五、多轮修改流程
150
+
151
+ 用户发出修改指令后:
152
+
153
+ 1. 读取现有文件:`cat <workspace>/canvas/<site-name>/index.html`
154
+ 2. 按需修改代码
155
+ 3. 重新写入文件
156
+ 4. 告知用户刷新浏览器即可看到效果(支持 live reload 如已开启)
157
+
158
+ **常见修改示例:**
159
+
160
+ | 用户说 | Agent 动作 |
161
+ | ---------------------- | ---------------------------------- |
162
+ | "把导航栏改成深色背景" | 修改 CSS `.navbar` 背景色 |
163
+ | "按钮换成蓝色渐变" | 修改 button 样式 |
164
+ | "加一个联系我表单" | 新增 form 区块 + 样式 |
165
+ | "首页加个轮播图" | 添加 JS 轮播逻辑 |
166
+ | "字体换成更现代的" | 引入 Google Fonts 或换用系统字体栈 |
167
+ | "把这个部分删掉" | 删除对应 HTML 区块 |
168
+
169
+ ---
170
+
171
+ ## 六、上传截图复刻流程
172
+
173
+ 用户上传网页截图或 UI 图时:
174
+
175
+ 1. **读取图片描述**:系统会在消息上下文中提供图片的文字描述(`[Image] Description: ...`)
176
+ 2. **提取设计元素**:从描述中理解布局、配色、字体、组件结构
177
+ 3. **生成复刻代码**:按理解的设计还原 HTML/CSS
178
+ 4. **展示确认**:告知用户已识别的设计要素,确认后部署
179
+
180
+ 若需要更精确的 OCR 识别,可配合 `sophnet-image-ocr` skill 提取图片中的文字内容。
181
+
182
+ ---
183
+
184
+ ## 七、站点命名规则
185
+
186
+ 站点名称(`<site-name>`)用于 URL 路径,规则:
187
+
188
+ - 只含小写字母、数字、连字符
189
+ - 不含空格和特殊字符
190
+ - 示例:`portfolio`、`my-blog`、`landing-2026`
191
+
192
+ 若用户未指定,Agent 根据需求自动生成一个合适的名称并告知用户。
193
+
194
+ ---
195
+
196
+ ## 八、管理已部署站点
197
+
198
+ ```bash
199
+ # 查看所有已部署站点
200
+ ls <workspace>/canvas/
201
+
202
+ # 删除某个站点
203
+ rm -rf <workspace>/canvas/<site-name>/
204
+
205
+ # 更新站点(直接覆盖文件即可)
206
+ ```
207
+
208
+ ---
209
+
210
+ ## 九、快速建站示例
211
+
212
+ ### 示例 A:个人作品集
213
+
214
+ **用户**:帮我做一个极简风格的个人作品集网站,包含首页、作品、关于我三个页面
215
+
216
+ **Bot**:
217
+
218
+ ```
219
+ 🌐 建站方案确认
220
+
221
+ - 类型:个人作品集
222
+ - 风格:极简(白底、细线、大量留白)
223
+ - 页面:首页(index.html)、作品(works.html)、关于(about.html)
224
+ - 站点名称:portfolio
225
+ - 访问地址:http://<your-ip>:18789/canvas/portfolio/
226
+
227
+ 确认开始?
228
+ ```
229
+
230
+ ### 示例 B:产品 Landing Page
231
+
232
+ **用户**:帮我做一个卖运动鞋的 landing page,苹果官网风格
233
+
234
+ **Bot**:
235
+
236
+ ```
237
+ 🌐 建站方案确认
238
+
239
+ - 类型:产品 Landing Page
240
+ - 风格:极简(深色背景、全屏大图、苹果式排版)
241
+ - 内容:Hero区、产品特性、图片展示、购买CTA
242
+ - 站点名称:sneaker-landing
243
+ - 访问地址:http://<your-ip>:18789/canvas/sneaker-landing/
244
+
245
+ 确认开始?
246
+ ```
247
+
248
+ ### 示例 C:复刻截图
249
+
250
+ **用户**:(上传网页截图)帮我复刻这个界面
251
+
252
+ **Bot**:
253
+
254
+ ```
255
+ 📸 识别到以下设计要素:
256
+
257
+ - 布局:顶部导航 + Hero大图 + 三栏特性介绍 + 底部
258
+ - 配色:深蓝主色 (#1a237e),白色文字,橙色强调色
259
+ - 字体:无衬线,标题大号加粗
260
+ - 组件:固定导航栏、全屏背景图、卡片组件
261
+
262
+ 站点名称:cloned-site
263
+ 访问地址:http://<your-ip>:18789/canvas/cloned-site/
264
+
265
+ 开始复刻?
266
+ ```
@@ -0,0 +1,46 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # deploy_site.sh - Create canvas directory for a website
4
+ #
5
+ # Usage:
6
+ # bash deploy_site.sh --name <site-name> --workspace <workspace-dir>
7
+ #
8
+ # Outputs:
9
+ # SITE_DIR=<absolute path to site directory>
10
+ # SITE_URL_PATH=/canvas/<site-name>/
11
+ #
12
+
13
+ set -euo pipefail
14
+
15
+ SITE_NAME=""
16
+ WORKSPACE_DIR=""
17
+
18
+ while [[ $# -gt 0 ]]; do
19
+ case "$1" in
20
+ --name) SITE_NAME="$2"; shift 2 ;;
21
+ --workspace) WORKSPACE_DIR="$2"; shift 2 ;;
22
+ *) echo "Unknown option: $1" >&2; exit 1 ;;
23
+ esac
24
+ done
25
+
26
+ if [[ -z "$SITE_NAME" ]]; then
27
+ echo "Error: --name is required" >&2
28
+ exit 1
29
+ fi
30
+
31
+ # Sanitize site name
32
+ SITE_NAME="$(echo "$SITE_NAME" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9-]/-/g' | sed 's/--*/-/g' | sed 's/^-//;s/-$//')"
33
+
34
+ if [[ -z "$WORKSPACE_DIR" ]]; then
35
+ # Try to infer from environment
36
+ STATE_DIR="${HOME}/.openclaw"
37
+ WORKSPACE_DIR="${STATE_DIR}/workspace"
38
+ fi
39
+
40
+ CANVAS_ROOT="${WORKSPACE_DIR}/canvas"
41
+ SITE_DIR="${CANVAS_ROOT}/${SITE_NAME}"
42
+
43
+ mkdir -p "$SITE_DIR"
44
+
45
+ echo "SITE_DIR=${SITE_DIR}"
46
+ echo "SITE_URL_PATH=/canvas/${SITE_NAME}/"