contentclaw-openclaw-plugin 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,71 @@
1
+ # ContentClaw OpenClaw Plugin
2
+
3
+ Native [OpenClaw](https://docs.openclaw.ai) plugin for [ContentClaw](https://www.npmjs.com/package/contentclaw) - generate AI-powered content pages at scale, analyze competitor sitemaps, and serve via REST API.
4
+
5
+ ## Installation
6
+
7
+ ### Prerequisites
8
+
9
+ ```bash
10
+ npm install -g contentclaw
11
+ ```
12
+
13
+ ### Install the plugin
14
+
15
+ ```bash
16
+ openclaw plugins install @contentclaw/openclaw-plugin
17
+ ```
18
+
19
+ Or install from local path:
20
+
21
+ ```bash
22
+ openclaw plugins install ./openclaw-plugin
23
+ ```
24
+
25
+ ## Registered Tools
26
+
27
+ | Tool | Description |
28
+ |------|-------------|
29
+ | `contentclaw_generate` | Generate content pages from keywords (auto, blog, glossary, comparison, listicle, how-to, alternatives, review, landing, hub) |
30
+ | `contentclaw_competitor` | Analyze competitor sitemap and generate competing content |
31
+ | `contentclaw_pages` | List all generated pages from the database |
32
+ | `contentclaw_page` | Get a specific page by slug with full HTML body and links |
33
+ | `contentclaw_serve` | Start the ContentClaw API server and dashboard |
34
+
35
+ ## Configuration
36
+
37
+ Add to your OpenClaw config under `plugins.entries.contentclaw.config`:
38
+
39
+ ```json
40
+ {
41
+ "plugins": {
42
+ "entries": {
43
+ "contentclaw": {
44
+ "config": {
45
+ "defaultProvider": "openai",
46
+ "defaultModel": "gpt-5.4",
47
+ "language": "en",
48
+ "webSearch": true
49
+ }
50
+ }
51
+ }
52
+ }
53
+ }
54
+ ```
55
+
56
+ ## Example Agent Prompts
57
+
58
+ ```
59
+ Generate 20 pages about "technical SEO" using OpenAI
60
+ Analyze ahrefs.com sitemap and create competing content about SEO
61
+ Show me all generated pages
62
+ Start the content dashboard
63
+ ```
64
+
65
+ ## Links
66
+
67
+ - [ContentClaw npm](https://www.npmjs.com/package/contentclaw)
68
+ - [ContentClaw GitHub](https://github.com/metehan777/contentclaw)
69
+ - [OpenClaw Docs](https://docs.openclaw.ai/plugin)
70
+
71
+ Built by [metehan.ai](https://metehan.ai)
@@ -0,0 +1 @@
1
+ export default function register(api: any): void;
package/dist/index.js ADDED
@@ -0,0 +1,331 @@
1
+ import { execSync } from "node:child_process";
2
+ function run(cmd, cwd) {
3
+ return execSync(cmd, {
4
+ cwd: cwd || process.cwd(),
5
+ encoding: "utf-8",
6
+ timeout: 300_000,
7
+ maxBuffer: 10 * 1024 * 1024,
8
+ }).trim();
9
+ }
10
+ function ensureBinary() {
11
+ try {
12
+ run("contentclaw --version");
13
+ return true;
14
+ }
15
+ catch {
16
+ return false;
17
+ }
18
+ }
19
+ export default function register(api) {
20
+ // Tool: Generate content pages
21
+ api.registerTool({
22
+ name: "contentclaw_generate",
23
+ description: "Generate AI-powered content pages from keywords. Supports auto (AI-planned), blog, glossary, comparison, listicle, how-to, alternatives, review, landing, and hub content types. Pages are stored in a local SQLite database and served via REST API.",
24
+ parameters: {
25
+ type: "object",
26
+ properties: {
27
+ keywords: {
28
+ type: "array",
29
+ items: { type: "string" },
30
+ description: "Keywords or topics to generate content for",
31
+ },
32
+ provider: {
33
+ type: "string",
34
+ enum: ["openai", "gemini", "anthropic", "xai", "qwen", "ollama"],
35
+ description: "AI provider to use (default: openai)",
36
+ },
37
+ type: {
38
+ type: "string",
39
+ enum: [
40
+ "auto",
41
+ "blog",
42
+ "landing",
43
+ "glossary",
44
+ "comparison",
45
+ "listicle",
46
+ "how-to",
47
+ "alternatives",
48
+ "review",
49
+ "hub",
50
+ ],
51
+ description: "Content type. Use 'auto' (default) to let AI plan the best mix.",
52
+ },
53
+ language: {
54
+ type: "string",
55
+ description: "Content language code (default: en)",
56
+ },
57
+ force: {
58
+ type: "boolean",
59
+ description: "Overwrite existing pages with the same slug",
60
+ },
61
+ noWebSearch: {
62
+ type: "boolean",
63
+ description: "Disable web search/grounding (external links will be stripped)",
64
+ },
65
+ },
66
+ required: ["keywords"],
67
+ },
68
+ async execute(_id, params) {
69
+ if (!ensureBinary()) {
70
+ return {
71
+ content: [
72
+ {
73
+ type: "text",
74
+ text: JSON.stringify({
75
+ success: false,
76
+ error: "contentclaw is not installed. Run: npm install -g contentclaw",
77
+ }),
78
+ },
79
+ ],
80
+ };
81
+ }
82
+ const args = ["contentclaw", "generate"];
83
+ args.push(...(params.keywords || []));
84
+ args.push("--json", "--yes");
85
+ if (params.provider)
86
+ args.push("-p", params.provider);
87
+ if (params.type)
88
+ args.push("-t", params.type);
89
+ if (params.language)
90
+ args.push("-l", params.language);
91
+ if (params.force)
92
+ args.push("--force");
93
+ if (params.noWebSearch)
94
+ args.push("--no-web-search");
95
+ try {
96
+ const output = run(args.join(" "));
97
+ return { content: [{ type: "text", text: output }] };
98
+ }
99
+ catch (err) {
100
+ return {
101
+ content: [
102
+ {
103
+ type: "text",
104
+ text: JSON.stringify({
105
+ success: false,
106
+ error: err.stderr || err.message || String(err),
107
+ }),
108
+ },
109
+ ],
110
+ };
111
+ }
112
+ },
113
+ });
114
+ // Tool: Analyze competitor sitemap
115
+ api.registerTool({
116
+ name: "contentclaw_competitor",
117
+ description: "Analyze a competitor's sitemap and generate content to beat them. Crawls the sitemap, finds content gaps and opportunities, and creates competing pages.",
118
+ parameters: {
119
+ type: "object",
120
+ properties: {
121
+ topic: {
122
+ type: "string",
123
+ description: "Topic focus for the competitor analysis",
124
+ },
125
+ sitemapUrl: {
126
+ type: "string",
127
+ description: "Full URL to the competitor's sitemap.xml",
128
+ },
129
+ provider: {
130
+ type: "string",
131
+ enum: ["openai", "gemini", "anthropic", "xai", "qwen", "ollama"],
132
+ description: "AI provider to use",
133
+ },
134
+ },
135
+ required: ["topic", "sitemapUrl"],
136
+ },
137
+ async execute(_id, params) {
138
+ if (!ensureBinary()) {
139
+ return {
140
+ content: [
141
+ {
142
+ type: "text",
143
+ text: JSON.stringify({
144
+ success: false,
145
+ error: "contentclaw is not installed. Run: npm install -g contentclaw",
146
+ }),
147
+ },
148
+ ],
149
+ };
150
+ }
151
+ const args = [
152
+ "contentclaw",
153
+ "generate",
154
+ JSON.stringify(params.topic),
155
+ "--competitor",
156
+ params.sitemapUrl,
157
+ "--json",
158
+ "--yes",
159
+ ];
160
+ if (params.provider)
161
+ args.push("-p", params.provider);
162
+ try {
163
+ const output = run(args.join(" "));
164
+ return { content: [{ type: "text", text: output }] };
165
+ }
166
+ catch (err) {
167
+ return {
168
+ content: [
169
+ {
170
+ type: "text",
171
+ text: JSON.stringify({
172
+ success: false,
173
+ error: err.stderr || err.message || String(err),
174
+ }),
175
+ },
176
+ ],
177
+ };
178
+ }
179
+ },
180
+ });
181
+ // Tool: List generated pages
182
+ api.registerTool({
183
+ name: "contentclaw_pages",
184
+ description: "List all generated content pages from the local ContentClaw database. Returns titles, slugs, types, and dates.",
185
+ parameters: {
186
+ type: "object",
187
+ properties: {
188
+ page: {
189
+ type: "number",
190
+ description: "Page number for pagination (default: 1)",
191
+ },
192
+ limit: {
193
+ type: "number",
194
+ description: "Results per page (default: 20)",
195
+ },
196
+ type: {
197
+ type: "string",
198
+ description: "Filter by content type",
199
+ },
200
+ },
201
+ },
202
+ async execute(_id, params) {
203
+ if (!ensureBinary()) {
204
+ return {
205
+ content: [
206
+ {
207
+ type: "text",
208
+ text: JSON.stringify({
209
+ success: false,
210
+ error: "contentclaw is not installed. Run: npm install -g contentclaw",
211
+ }),
212
+ },
213
+ ],
214
+ };
215
+ }
216
+ const port = 3099;
217
+ let url = `http://localhost:${port}/api/pages?`;
218
+ if (params.page)
219
+ url += `page=${params.page}&`;
220
+ if (params.limit)
221
+ url += `limit=${params.limit}&`;
222
+ if (params.type)
223
+ url += `type=${params.type}&`;
224
+ try {
225
+ const output = run(`curl -s "${url}"`);
226
+ return { content: [{ type: "text", text: output }] };
227
+ }
228
+ catch (err) {
229
+ return {
230
+ content: [
231
+ {
232
+ type: "text",
233
+ text: JSON.stringify({
234
+ success: false,
235
+ error: "ContentClaw server is not running. Start it with: contentclaw serve",
236
+ }),
237
+ },
238
+ ],
239
+ };
240
+ }
241
+ },
242
+ });
243
+ // Tool: Get a specific page
244
+ api.registerTool({
245
+ name: "contentclaw_page",
246
+ description: "Get a specific generated page by slug. Returns title, meta description, body HTML, internal links, and external links.",
247
+ parameters: {
248
+ type: "object",
249
+ properties: {
250
+ slug: {
251
+ type: "string",
252
+ description: "The page slug to retrieve",
253
+ },
254
+ },
255
+ required: ["slug"],
256
+ },
257
+ async execute(_id, params) {
258
+ try {
259
+ const output = run(`curl -s "http://localhost:3099/api/pages/${params.slug}"`);
260
+ return { content: [{ type: "text", text: output }] };
261
+ }
262
+ catch (err) {
263
+ return {
264
+ content: [
265
+ {
266
+ type: "text",
267
+ text: JSON.stringify({
268
+ success: false,
269
+ error: "ContentClaw server is not running. Start it with: contentclaw serve",
270
+ }),
271
+ },
272
+ ],
273
+ };
274
+ }
275
+ },
276
+ });
277
+ // Tool: Start the server
278
+ api.registerTool({
279
+ name: "contentclaw_serve",
280
+ description: "Start the ContentClaw API server and dashboard on localhost:3099. The server provides REST endpoints for pages and a web dashboard with real-time updates.",
281
+ parameters: {
282
+ type: "object",
283
+ properties: {
284
+ port: {
285
+ type: "number",
286
+ description: "Server port (default: 3099)",
287
+ },
288
+ },
289
+ },
290
+ async execute(_id, params) {
291
+ if (!ensureBinary()) {
292
+ return {
293
+ content: [
294
+ {
295
+ type: "text",
296
+ text: JSON.stringify({
297
+ success: false,
298
+ error: "contentclaw is not installed. Run: npm install -g contentclaw",
299
+ }),
300
+ },
301
+ ],
302
+ };
303
+ }
304
+ const port = params.port || 3099;
305
+ try {
306
+ execSync(`contentclaw serve --port ${port} &`, {
307
+ encoding: "utf-8",
308
+ timeout: 5000,
309
+ stdio: "ignore",
310
+ });
311
+ }
312
+ catch {
313
+ // background process - expected to "fail" exec since it keeps running
314
+ }
315
+ return {
316
+ content: [
317
+ {
318
+ type: "text",
319
+ text: JSON.stringify({
320
+ success: true,
321
+ message: `ContentClaw server starting on port ${port}`,
322
+ dashboard: `http://localhost:${port}`,
323
+ api: `http://localhost:${port}/api`,
324
+ docs: `http://localhost:${port}/docs`,
325
+ }),
326
+ },
327
+ ],
328
+ };
329
+ },
330
+ });
331
+ }
@@ -0,0 +1,59 @@
1
+ {
2
+ "id": "contentclaw",
3
+ "name": "ContentClaw",
4
+ "version": "1.0.0",
5
+ "description": "Generate AI-powered content pages at scale, analyze competitor sitemaps, and serve via REST API. Supports OpenAI, Gemini, Anthropic, xAI, Qwen, and Ollama.",
6
+ "skills": ["./skills"],
7
+ "configSchema": {
8
+ "type": "object",
9
+ "additionalProperties": false,
10
+ "properties": {
11
+ "defaultProvider": {
12
+ "type": "string",
13
+ "enum": ["openai", "gemini", "anthropic", "xai", "qwen", "ollama"],
14
+ "default": "openai",
15
+ "description": "Default AI provider for content generation"
16
+ },
17
+ "defaultModel": {
18
+ "type": "string",
19
+ "description": "Default model override (uses provider default if empty)"
20
+ },
21
+ "language": {
22
+ "type": "string",
23
+ "default": "en",
24
+ "description": "Content language code"
25
+ },
26
+ "webSearch": {
27
+ "type": "boolean",
28
+ "default": true,
29
+ "description": "Enable web search/grounding for real external links"
30
+ },
31
+ "workingDirectory": {
32
+ "type": "string",
33
+ "description": "Directory where contentclaw.db and config live (defaults to cwd)"
34
+ }
35
+ }
36
+ },
37
+ "uiHints": {
38
+ "defaultProvider": {
39
+ "label": "AI Provider",
40
+ "placeholder": "openai"
41
+ },
42
+ "defaultModel": {
43
+ "label": "Model",
44
+ "placeholder": "gpt-5.4"
45
+ },
46
+ "language": {
47
+ "label": "Language",
48
+ "placeholder": "en"
49
+ },
50
+ "webSearch": {
51
+ "label": "Web Search",
52
+ "placeholder": "true"
53
+ },
54
+ "workingDirectory": {
55
+ "label": "Working Directory",
56
+ "placeholder": "/path/to/project"
57
+ }
58
+ }
59
+ }
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "contentclaw-openclaw-plugin",
3
+ "version": "1.0.0",
4
+ "description": "OpenClaw plugin for ContentClaw - generate AI content pages, analyze competitors, serve via REST API",
5
+ "main": "dist/index.js",
6
+ "type": "module",
7
+ "scripts": {
8
+ "build": "tsc",
9
+ "prepublishOnly": "npm run build"
10
+ },
11
+ "keywords": [
12
+ "openclaw",
13
+ "openclaw-plugin",
14
+ "contentclaw",
15
+ "seo",
16
+ "content-generation",
17
+ "ai-content",
18
+ "programmatic-seo"
19
+ ],
20
+ "author": "metehan.ai",
21
+ "license": "MIT",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/metehan777/contentclaw"
25
+ },
26
+ "files": [
27
+ "dist/",
28
+ "skills/",
29
+ "openclaw.plugin.json",
30
+ "README.md"
31
+ ],
32
+ "devDependencies": {
33
+ "typescript": "^5.0.0"
34
+ }
35
+ }
@@ -0,0 +1,541 @@
1
+ ---
2
+ name: contentclaw
3
+ description: Universal content engine - generate pages with AI from any topic, analyze competitor sitemaps, and serve via local REST API. Use when the user asks to create content, generate pages, analyze competitors, create glossary entries, landing pages, comparisons, listicles, how-to guides, alternatives, reviews, hub pages, bulk-create articles, expand topics, or start a content API. Features AI content planning with strict accuracy rules, competitor sitemap analysis with Parallel.ai deep extraction, web-grounded external links, natural internal linking with no 404s, emdash stripping, rate limiting, parallel generation, duplicate detection, and template mode. Supports OpenAI, Gemini, Anthropic, xAI, Qwen, and Ollama.
4
+ metadata: {"openclaw":{"emoji":"🦞","requires":{"anyBins":["contentclaw"]},"os":["linux","darwin","win32"]}}
5
+ ---
6
+
7
+ # ContentClaw - Universal Content Engine
8
+
9
+ Generate pages with AI for any topic, analyze competitor sitemaps, and serve via local REST API for any CMS. Works for any subject - cooking, fitness, law, SaaS, travel, education, not just SEO. Built by [metehan.ai](https://metehan.ai).
10
+
11
+ **npm:** [contentclaw](https://www.npmjs.com/package/contentclaw) | **GitHub:** [metehan777/contentclaw](https://github.com/metehan777/contentclaw)
12
+
13
+ ## When to Use
14
+
15
+ - User wants to generate content pages at scale for any topic
16
+ - User wants to analyze a competitor's website/sitemap and generate competing content
17
+ - User wants glossary, comparison, listicle, how-to, alternatives, review, landing, hub, or blog pages
18
+ - User wants to expand a single topic into a full content strategy (15-25 pages)
19
+ - User needs a local REST API to serve generated content to any CMS (WordPress, Webflow, Framer, custom)
20
+ - User asks to bulk-create articles with AI using templates and variable files
21
+ - User wants duplicate-aware generation that skips existing slugs or refreshes stale content
22
+
23
+ ## Installation
24
+
25
+ ```bash
26
+ npm install -g contentclaw
27
+ ```
28
+
29
+ ## Prerequisites
30
+
31
+ ### API Keys
32
+
33
+ At least one AI provider API key is required. Set as environment variables or pass via `--api-key` or `contentclaw.config.json`:
34
+
35
+ | Provider | Environment Variable | Default Model | Notes |
36
+ |----------|---------------------|---------------|-------|
37
+ | OpenAI | `OPENAI_API_KEY` | `gpt-5.4` | Web search via `web_search` tool |
38
+ | Gemini | `GEMINI_API_KEY` | `gemini-3-flash-preview` | Web search via `googleSearch` grounding |
39
+ | Anthropic | `ANTHROPIC_API_KEY` | `claude-opus-4-6` | Web search via `web_search_20250305` tool |
40
+ | xAI (Grok) | `XAI_API_KEY` | `grok-4-1-fast` | Web search via `web_search` tool |
41
+ | Qwen | `QWEN_API_KEY` | `qwen-max` | No native web search |
42
+ | Ollama | *(none needed)* | `llama3` | Local models, no web search |
43
+
44
+ ### Optional Keys
45
+
46
+ | Key | Purpose |
47
+ |-----|---------|
48
+ | `PARALLEL_API_KEY` | Deep competitor page extraction via Parallel.ai Extract API |
49
+
50
+ ## Configuration
51
+
52
+ ### Interactive Setup
53
+
54
+ ```bash
55
+ contentclaw init
56
+ ```
57
+
58
+ This creates `contentclaw.config.json` in the current directory with all settings.
59
+
60
+ ### Config File Structure (`contentclaw.config.json`)
61
+
62
+ ```json
63
+ {
64
+ "provider": "openai",
65
+ "model": "gpt-5.4",
66
+ "apiKey": "sk-...",
67
+ "language": "en",
68
+ "tone": "informative",
69
+ "wordCount": 1500,
70
+ "webSearch": true,
71
+ "brand": {
72
+ "name": "Your Brand",
73
+ "url": "https://yourbrand.com",
74
+ "description": "Short brand description for natural mentions in content"
75
+ },
76
+ "internalLinking": {
77
+ "enabled": true,
78
+ "source": "sitemap",
79
+ "sitemapUrl": "https://yourbrand.com/sitemap.xml",
80
+ "urls": [],
81
+ "slugPrefix": "blog"
82
+ },
83
+ "server": {
84
+ "port": 3099,
85
+ "host": "localhost"
86
+ },
87
+ "rateLimit": {
88
+ "openai": { "rpm": 500, "concurrency": 5, "batchSize": 5 },
89
+ "gemini": { "rpm": 1000, "concurrency": 10, "batchSize": 10 }
90
+ }
91
+ }
92
+ ```
93
+
94
+ ### Config Properties
95
+
96
+ | Property | Type | Default | Description |
97
+ |----------|------|---------|-------------|
98
+ | `provider` | string | `"openai"` | Default AI provider |
99
+ | `model` | string | *(per provider)* | Model override |
100
+ | `apiKey` | string | *(env var)* | API key (overrides env var) |
101
+ | `language` | string | `"en"` | Content language code |
102
+ | `tone` | string | `"informative"` | Tone: informative, professional, casual, academic, persuasive, conversational |
103
+ | `wordCount` | number | `1500` | Target word count per page |
104
+ | `webSearch` | boolean | `true` | Enable web search/grounding for real external links |
105
+ | `brand.name` | string | — | Brand name for natural mentions |
106
+ | `brand.url` | string | — | Brand website URL |
107
+ | `brand.description` | string | — | Short brand description |
108
+ | `internalLinking.enabled` | boolean | `false` | Enable internal linking |
109
+ | `internalLinking.source` | string | `"manual"` | Link source: `sitemap`, `manual`, or `both` |
110
+ | `internalLinking.sitemapUrl` | string | — | Sitemap URL to fetch link targets from |
111
+ | `internalLinking.urls` | string[] | `[]` | Manual URL list for internal links |
112
+ | `internalLinking.slugPrefix` | string | — | URL path prefix for generated pages (e.g. `"blog"` → `/blog/slug`) |
113
+ | `server.port` | number | `3099` | API server port |
114
+ | `server.host` | string | `"localhost"` | API server host |
115
+ | `rateLimit.<provider>` | object | *(see defaults)* | Per-provider rate limit override |
116
+
117
+ ### Rate Limit Defaults (Tier 1)
118
+
119
+ | Provider | RPM | Concurrency | Batch Size |
120
+ |----------|-----|-------------|------------|
121
+ | OpenAI | 500 | 5 | 5 |
122
+ | Gemini | 1000 | 10 | 10 |
123
+ | Anthropic | 50 | 3 | 3 |
124
+ | xAI | 60 | 3 | 3 |
125
+ | Qwen | 60 | 3 | 3 |
126
+ | Ollama | 999 | 1 | 1 |
127
+
128
+ You can override any of these in `contentclaw.config.json` under `rateLimit.<provider>`.
129
+
130
+ ## Content Types
131
+
132
+ | Type | What it generates | Approx. words |
133
+ |------|-------------------|---------------|
134
+ | `blog` | Long-form article with sections, key takeaways | ~1500 |
135
+ | `landing` | Conversion-focused page with hero, benefits, CTA | ~800 |
136
+ | `glossary` | Definition with Schema.org markup, related terms | ~500 |
137
+ | `comparison` | Head-to-head with comparison table, verdict | ~1200 |
138
+ | `listicle` | Ranked list with dynamic count in title (e.g. "7 Best...") | ~1200 |
139
+ | `how-to` | Step-by-step with HowTo schema markup | ~1200 |
140
+ | `alternatives` | 5-8 alternatives with comparison table, use cases | ~1200 |
141
+ | `review` | Pros/cons, pricing, features, verdict | ~1200 |
142
+ | `hub` | Pillar page linking to sub-pages, topic overview | ~2000 |
143
+ | `auto` | **AI plans a full content strategy** — picks types and keywords automatically (default) | varies |
144
+
145
+ ## Commands
146
+
147
+ ### `contentclaw init`
148
+
149
+ Interactive setup wizard. Creates `contentclaw.config.json` with provider, model, API key, language, tone, word count, brand info, internal linking config, server port, and web search preference.
150
+
151
+ ### `contentclaw generate [keywords...]`
152
+
153
+ Main generation command. Generates content pages and stores them in a local SQLite database (`contentclaw.db`).
154
+
155
+ #### All CLI Flags
156
+
157
+ | Flag | Description | Default |
158
+ |------|-------------|---------|
159
+ | `-i, --input <file>` | Seed data file (CSV or JSON) | — |
160
+ | `-e, --expand <count>` | Expand each keyword into N long-tail variations | — |
161
+ | `-p, --provider <name>` | AI provider: `openai`, `gemini`, `anthropic`, `xai`, `qwen`, `ollama` | config or `openai` |
162
+ | `-m, --model <name>` | Model name override | config or provider default |
163
+ | `-k, --api-key <key>` | API key override | config or env var |
164
+ | `-l, --language <lang>` | Content language | config or `en` |
165
+ | `-t, --type <type>` | Content type: `auto`, `blog`, `landing`, `glossary`, `comparison`, `listicle`, `how-to`, `alternatives`, `review`, `hub` | `auto` |
166
+ | `--template <pattern>` | Template pattern with `{variables}`, e.g. `"{service} in {city}"` | — |
167
+ | `--vars <files...>` | Variable files for template (one value per line, one file per variable) | — |
168
+ | `--competitor <sitemap>` | Analyze competitor sitemap URL and generate competing content | — |
169
+ | `--no-web-search` | Disable web search/grounding (strips external links from body) | web search on |
170
+ | `--force` | Overwrite existing pages with the same slug | skip existing |
171
+ | `--refresh <days>` | Only regenerate pages older than N days | — |
172
+ | `--json` | Machine-readable JSON output (no spinners, no colors) | — |
173
+ | `-y, --yes` | Skip all interactive prompts, use defaults | — |
174
+
175
+ ### `contentclaw serve`
176
+
177
+ Start the API server with dashboard.
178
+
179
+ | Flag | Description | Default |
180
+ |------|-------------|---------|
181
+ | `--port <number>` | Server port | `3099` |
182
+ | `--host <address>` | Server host | `localhost` |
183
+
184
+ ## Usage Examples
185
+
186
+ ### AI-Planned Content (Default - `auto` mode)
187
+
188
+ The AI analyzes the topic and generates 15-25 pages with the best content types and specific, keyword-rich slugs. It's aware of existing pages and won't duplicate them.
189
+
190
+ ```bash
191
+ contentclaw generate "sourdough bread" -p openai
192
+ contentclaw generate "email marketing" -p gemini
193
+ contentclaw generate "kubernetes" -p anthropic --json --yes
194
+ ```
195
+
196
+ ### Competitor Analysis
197
+
198
+ Fetches the competitor's sitemap, analyzes their content, finds gaps and opportunities, and generates a plan to beat them. If `PARALLEL_API_KEY` is set, it uses deep page extraction for richer analysis.
199
+
200
+ ```bash
201
+ contentclaw generate "seo" --competitor https://ahrefs.com/sitemap.xml -p xai
202
+ contentclaw generate "coffee" --competitor https://competitor.com/sitemap.xml -p openai
203
+
204
+ # With Parallel.ai deep extraction
205
+ PARALLEL_API_KEY="key" contentclaw generate "seo" --competitor https://moz.com/sitemap.xml -p gemini
206
+ ```
207
+
208
+ ### Force a Content Type
209
+
210
+ ```bash
211
+ contentclaw generate "what is crawl budget" --type glossary -p openai
212
+ contentclaw generate "plumber in Austin" --type landing -p gemini
213
+ contentclaw generate "Ahrefs vs Semrush" --type comparison -p xai
214
+ contentclaw generate "best project management tools" --type listicle -p anthropic
215
+ contentclaw generate "Notion alternatives" --type alternatives -p openai
216
+ contentclaw generate "Figma review" --type review -p gemini
217
+ contentclaw generate "how to set up Google Analytics" --type how-to -p openai
218
+ contentclaw generate "complete guide to technical SEO" --type hub -p anthropic
219
+ ```
220
+
221
+ ### Expand a Keyword into Variations
222
+
223
+ ```bash
224
+ contentclaw generate "coffee" --expand 20 --type glossary -p openai
225
+ contentclaw generate "yoga" --expand 15 --type blog -p gemini
226
+ ```
227
+
228
+ ### Template Mode (Bulk Cross-Multiplication)
229
+
230
+ Create variable files (one value per line) and a template pattern. ContentClaw generates the Cartesian product.
231
+
232
+ ```bash
233
+ # services.txt: plumber, electrician, roofer
234
+ # cities.txt: Austin, Dallas, Houston
235
+
236
+ contentclaw generate --template "{service} in {city}" --vars services.txt cities.txt --type landing -p openai
237
+ # Generates 9 pages: plumber in Austin, plumber in Dallas, ... roofer in Houston
238
+ ```
239
+
240
+ ### Seed File Input
241
+
242
+ CSV or JSON files with keyword, optional slug, optional type, optional custom_prompt:
243
+
244
+ ```bash
245
+ contentclaw generate --input seeds.csv -p gemini
246
+ contentclaw generate --input seeds.json -p openai --json --yes
247
+ ```
248
+
249
+ **CSV format:**
250
+ ```csv
251
+ keyword,type,slug
252
+ best seo tools 2026,listicle,best-seo-tools-2026
253
+ what is E-E-A-T,glossary,what-is-eeat
254
+ Ahrefs vs Semrush,comparison,ahrefs-vs-semrush
255
+ ```
256
+
257
+ **JSON format:**
258
+ ```json
259
+ [
260
+ { "keyword": "best seo tools 2026", "type": "listicle" },
261
+ { "keyword": "what is E-E-A-T", "type": "glossary" }
262
+ ]
263
+ ```
264
+
265
+ ### Duplicate Handling
266
+
267
+ ```bash
268
+ # Skip existing slugs (default behavior)
269
+ contentclaw generate "topic" -p openai
270
+
271
+ # Force overwrite existing pages
272
+ contentclaw generate "topic" --force -p openai
273
+
274
+ # Only regenerate pages older than 30 days
275
+ contentclaw generate "topic" --refresh 30 -p openai
276
+ ```
277
+
278
+ ### Disable Web Search
279
+
280
+ ```bash
281
+ contentclaw generate "topic" --no-web-search -p openai
282
+ ```
283
+
284
+ When web search is disabled, external links are stripped from the body to prevent hallucinated URLs.
285
+
286
+ ### Start the API Server
287
+
288
+ ```bash
289
+ contentclaw serve
290
+ contentclaw serve --port 8080 --host 0.0.0.0
291
+ ```
292
+
293
+ ## API Endpoints
294
+
295
+ Default base URL: `http://localhost:3099`
296
+
297
+ | Method | Endpoint | Description | Parameters |
298
+ |--------|----------|-------------|------------|
299
+ | `GET` | `/api/health` | Health check | — |
300
+ | `GET` | `/api/pages` | List pages (paginated) | `?page=1&limit=20&type=blog` |
301
+ | `GET` | `/api/pages/:slug` | Get single page with all fields | — |
302
+ | `DELETE` | `/api/pages/:slug` | Delete a page | — |
303
+ | `POST` | `/api/generate` | Generate pages via API | JSON body (see below) |
304
+
305
+ ### POST `/api/generate` Body
306
+
307
+ ```json
308
+ {
309
+ "keywords": ["best seo tools 2026"],
310
+ "provider": "openai",
311
+ "type": "listicle",
312
+ "language": "en"
313
+ }
314
+ ```
315
+
316
+ ### Page Response Shape
317
+
318
+ ```json
319
+ {
320
+ "slug": "best-seo-tools-2026",
321
+ "keyword": "best SEO tools 2026",
322
+ "title": "10 Best SEO Tools for 2026",
323
+ "meta_description": "Compare the top SEO tools...",
324
+ "body": "<h2>...</h2><p>...</p>",
325
+ "internal_links": [
326
+ { "anchor": "technical SEO guide", "url": "/blog/technical-seo" }
327
+ ],
328
+ "external_links": [
329
+ { "anchor": "Ahrefs", "url": "https://ahrefs.com", "title": "Ahrefs" }
330
+ ],
331
+ "page_type": "listicle",
332
+ "published_date": "2026-03-18",
333
+ "created_at": "2026-03-18T19:00:00.000Z"
334
+ }
335
+ ```
336
+
337
+ ## Dashboard
338
+
339
+ Accessible at `http://localhost:3099` when the server is running. Features:
340
+
341
+ - **Stats cards**: Total pages, content types breakdown, latest page
342
+ - **Page grid**: All generated pages with title, slug, type badge, date, internal/external link counts
343
+ - **Page detail modal**: Full HTML body preview, meta description, all links
344
+ - **Real-time updates**: Auto-refreshes every 3 seconds with toast notifications for new pages (no manual refresh needed)
345
+ - **Live indicator**: Pulsing green dot shows the dashboard is connected and updating
346
+ - **Warning banner**: Content-at-scale warning about Google penalties
347
+ - **Follow SEO Experts page**: Curated list of SEO experts with X and LinkedIn handles
348
+ - **API Docs page**: Full endpoint documentation with examples
349
+
350
+ ## Content Quality Features
351
+
352
+ ### Web Search & External Links
353
+
354
+ When `webSearch` is enabled (default):
355
+ - **OpenAI**: Uses `web_search` tool for grounded responses
356
+ - **Gemini**: Uses `googleSearch` grounding with citation extraction from `groundingChunks` (filters out Google redirect URLs)
357
+ - **Anthropic**: Uses `web_search_20250305` tool
358
+ - **xAI**: Uses `web_search` tool
359
+ - External links are embedded inline in the HTML body
360
+ - For `listicle`, `alternatives`, `review`: links to every product/tool homepage
361
+ - For other types: 2-5 authoritative external links
362
+ - Provider citations are merged with body-extracted links and deduplicated
363
+
364
+ When `webSearch` is disabled (`--no-web-search`):
365
+ - No external links in body (stripped by post-processor)
366
+ - AI is instructed not to include any external URLs
367
+
368
+ ### Internal Linking
369
+
370
+ - Pulls link targets from sitemap XML, manual URL list, or both
371
+ - Queries existing pages from the local SQLite database
372
+ - Builds prompt section with available internal pages (filtered by `slugPrefix`, excludes current page)
373
+ - Post-processes body HTML to validate all internal links exist — removes any link to a non-existent page
374
+ - 2-5 internal links per page with natural, contextual anchor text
375
+
376
+ ### Content Accuracy Rules
377
+
378
+ The AI planner (`auto` mode and `--competitor` mode) enforces strict rules:
379
+ - **Comparisons**: Only products in the same category serving the same purpose (e.g. "Ahrefs vs Semrush" is valid; "Ahrefs vs Perplexity" is invalid)
380
+ - **Alternatives**: Main product must be real and well-known; alternatives must serve the same function
381
+ - **Reviews**: Only real, currently available products relevant to the topic
382
+ - **Listicles**: Every item must be a real, verifiable thing
383
+ - **Glossaries**: Real industry terminology that practitioners actually use
384
+ - **How-to**: Specific, achievable tasks — not vague concepts
385
+
386
+ ### Post-Processing
387
+
388
+ All generated content goes through:
389
+ 1. **Emdash stripping** — `—` → ` - ` and `–` → `-`
390
+ 2. **External link validation** — strips all `<a href="https://...">` if web search is off
391
+ 3. **Internal link validation** — removes links to non-existent pages
392
+ 4. **Link deduplication** — both internal and external links are deduplicated
393
+ 5. **50+ forbidden AI phrases** blocked (e.g. "in today's fast-paced", "dive deep", "game-changer", "navigate the landscape", etc.)
394
+
395
+ ### Other Quality Features
396
+
397
+ - **Current year awareness**: Every prompt includes today's date and current year (2026)
398
+ - **Brand awareness**: Natural brand mentions when configured (not forced)
399
+ - **YMYL disclaimers**: Automatic disclaimer for health, finance, legal topics
400
+ - **3+ FAQs**: Added at the end of every content page
401
+ - **Comparison tables**: Included in comparison, alternatives, and relevant content types
402
+ - **Dynamic listicle titles**: Count in title matches actual list items (not a static number)
403
+ - **HowTo schema**: Step-by-step pages include Schema.org HowTo markup
404
+ - **Glossary schema**: Definition pages include Schema.org DefinedTerm markup
405
+
406
+ ## Database
407
+
408
+ ContentClaw uses SQLite (`contentclaw.db`) in the current working directory. Features:
409
+
410
+ - **Auto-reconnection**: The server detects when the database file changes (inode-based) and reconnects automatically. No need to restart the server after running `generate`.
411
+ - **WAL mode**: Write-Ahead Logging for better concurrent read/write performance.
412
+ - **Schema migrations**: Automatic column additions when upgrading versions.
413
+
414
+ ### Stored Fields
415
+
416
+ | Column | Type | Description |
417
+ |--------|------|-------------|
418
+ | `id` | INTEGER | Auto-increment primary key |
419
+ | `slug` | TEXT | Unique URL slug |
420
+ | `keyword` | TEXT | Source keyword |
421
+ | `title` | TEXT | Page title |
422
+ | `meta_description` | TEXT | Meta description |
423
+ | `body` | TEXT | Full HTML body content |
424
+ | `internal_links` | TEXT | JSON array of `{anchor, url}` |
425
+ | `external_links` | TEXT | JSON array of `{anchor, url, title}` |
426
+ | `page_type` | TEXT | Content type (blog, glossary, etc.) |
427
+ | `published_date` | TEXT | ISO date string |
428
+ | `created_at` | DATETIME | Auto-set on insert |
429
+ | `updated_at` | DATETIME | Auto-set on insert/update |
430
+
431
+ ## Rate Limiting & Parallel Generation
432
+
433
+ - **Per-provider RPM limits**: Enforced via sliding window (60-second window)
434
+ - **Concurrency control**: Max simultaneous requests per provider
435
+ - **Batch processing**: Pages generated in configurable batch sizes
436
+ - **Automatic throttling**: When RPM limit is hit, waits until the window clears
437
+ - **In-memory cache**: Identical prompts return cached responses (1-hour TTL, max 10,000 entries)
438
+ - **Parallel mode**: Automatically enabled when generating >3 pages with batch size >1
439
+
440
+ Override defaults in config:
441
+
442
+ ```json
443
+ {
444
+ "rateLimit": {
445
+ "gemini": { "rpm": 2000, "concurrency": 15, "batchSize": 15 },
446
+ "openai": { "rpm": 1000, "concurrency": 10, "batchSize": 10 }
447
+ }
448
+ }
449
+ ```
450
+
451
+ ## Competitor Analysis
452
+
453
+ The `--competitor` flag enables sitemap-based competitor analysis:
454
+
455
+ 1. **Sitemap crawling**: Recursively fetches and parses the competitor's sitemap (supports nested sitemap indexes)
456
+ 2. **Page extraction** (optional): If `PARALLEL_API_KEY` is set, uses Parallel.ai Extract API to deeply analyze up to 20 relevant competitor pages — extracting titles, topics, and content structure
457
+ 3. **AI planning**: Feeds the competitor's content map to the AI, which generates a plan to:
458
+ - Find content gaps the competitor doesn't cover
459
+ - Identify weak pages you can do better
460
+ - Discover missing angles, subtopics, and comparisons
461
+ - Match their strong pages with even better versions
462
+ 4. **Duplicate awareness**: The planner receives your existing page slugs and generates complementary content only
463
+
464
+ ```bash
465
+ # Basic competitor analysis
466
+ contentclaw generate "seo" --competitor https://ahrefs.com/sitemap.xml -p xai
467
+
468
+ # With deep page extraction
469
+ PARALLEL_API_KEY="key" contentclaw generate "seo" --competitor https://moz.com/sitemap.xml -p gemini --json --yes
470
+ ```
471
+
472
+ ## Agent Best Practices (OpenClaw Integration)
473
+
474
+ 1. Always use `--json --yes` for parseable, non-interactive output.
475
+ 2. Use `auto` mode (default) for AI-planned content — it generates the best mix of content types.
476
+ 3. Use `--competitor` with a sitemap URL to beat a specific competitor.
477
+ 4. Use `--type` only when the user specifies a single format.
478
+ 5. For bulk generation (1000+ pages), use `--template` with `--vars` files.
479
+ 6. After generating, suggest `contentclaw serve` to start the dashboard and API.
480
+ 7. Set `PARALLEL_API_KEY` for deeper competitor page analysis.
481
+ 8. Keep web search enabled (default) so external links are real and grounded.
482
+ 9. Use `--refresh 30` to regenerate stale content without duplicating fresh pages.
483
+ 10. Check `--json` output's `errors` array to handle failures gracefully.
484
+ 11. The database auto-reconnects — no need to restart the server between generate runs.
485
+ 12. For non-English content, pass `-l <lang>` (e.g. `-l tr`, `-l de`, `-l fr`).
486
+
487
+ ## Output Format (`--json`)
488
+
489
+ ### Successful Generation
490
+
491
+ ```json
492
+ {
493
+ "success": true,
494
+ "generated": 20,
495
+ "failed": 0,
496
+ "pages": [
497
+ {
498
+ "slug": "best-seo-tools-2026",
499
+ "title": "10 Best SEO Tools for 2026",
500
+ "keyword": "best SEO tools 2026",
501
+ "meta_description": "Compare the top SEO tools for 2026...",
502
+ "published_date": "2026-03-18",
503
+ "page_type": "listicle"
504
+ }
505
+ ],
506
+ "errors": []
507
+ }
508
+ ```
509
+
510
+ ### All Pages Already Exist
511
+
512
+ ```json
513
+ {
514
+ "success": true,
515
+ "generated": 0,
516
+ "failed": 0,
517
+ "pages": [],
518
+ "errors": [],
519
+ "message": "All pages already exist. Use --force to regenerate."
520
+ }
521
+ ```
522
+
523
+ ### Error
524
+
525
+ ```json
526
+ {
527
+ "success": false,
528
+ "error": "No keywords provided. Use arguments, --input, or --template flag."
529
+ }
530
+ ```
531
+
532
+ ## Supported Providers
533
+
534
+ | Provider | Web Search | Default Model | Rate Limit (Tier 1) |
535
+ |----------|-----------|---------------|---------------------|
536
+ | `openai` | Yes (`web_search` tool) | `gpt-5.4` | 500 RPM, 5 concurrent |
537
+ | `gemini` | Yes (`googleSearch` grounding) | `gemini-3-flash-preview` | 1000 RPM, 10 concurrent |
538
+ | `anthropic` | Yes (`web_search_20250305`) | `claude-opus-4-6` | 50 RPM, 3 concurrent |
539
+ | `xai` | Yes (`web_search` tool) | `grok-4-1-fast` | 60 RPM, 3 concurrent |
540
+ | `qwen` | No | `qwen-max` | 60 RPM, 3 concurrent |
541
+ | `ollama` | No | `llama3` | 999 RPM, 1 concurrent |