seo-intel 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/.env.example +41 -0
  2. package/LICENSE +75 -0
  3. package/README.md +243 -0
  4. package/Start SEO Intel.bat +9 -0
  5. package/Start SEO Intel.command +8 -0
  6. package/cli.js +3727 -0
  7. package/config/example.json +29 -0
  8. package/config/setup-wizard.js +522 -0
  9. package/crawler/index.js +566 -0
  10. package/crawler/robots.js +103 -0
  11. package/crawler/sanitize.js +124 -0
  12. package/crawler/schema-parser.js +168 -0
  13. package/crawler/sitemap.js +103 -0
  14. package/crawler/stealth.js +393 -0
  15. package/crawler/subdomain-discovery.js +341 -0
  16. package/db/db.js +213 -0
  17. package/db/schema.sql +120 -0
  18. package/exports/competitive.js +186 -0
  19. package/exports/heuristics.js +67 -0
  20. package/exports/queries.js +197 -0
  21. package/exports/suggestive.js +230 -0
  22. package/exports/technical.js +180 -0
  23. package/exports/templates.js +77 -0
  24. package/lib/gate.js +204 -0
  25. package/lib/license.js +369 -0
  26. package/lib/oauth.js +432 -0
  27. package/lib/updater.js +324 -0
  28. package/package.json +68 -0
  29. package/reports/generate-html.js +6194 -0
  30. package/reports/generate-site-graph.js +949 -0
  31. package/reports/gsc-loader.js +190 -0
  32. package/scheduler.js +142 -0
  33. package/seo-audit.js +619 -0
  34. package/seo-intel.png +0 -0
  35. package/server.js +602 -0
  36. package/setup/ROADMAP.md +109 -0
  37. package/setup/checks.js +483 -0
  38. package/setup/config-builder.js +227 -0
  39. package/setup/engine.js +65 -0
  40. package/setup/installers.js +197 -0
  41. package/setup/models.js +328 -0
  42. package/setup/openclaw-bridge.js +329 -0
  43. package/setup/validator.js +395 -0
  44. package/setup/web-routes.js +688 -0
  45. package/setup/wizard.html +2920 -0
  46. package/start-seo-intel.sh +8 -0
package/.env.example ADDED
@@ -0,0 +1,41 @@
1
+ # SEO Intel Configuration
2
+ # Run `node cli.js setup` to configure interactively
3
+
4
+ # ── License (Pro features) ───────────────────────────────────────────────
5
+ # Get your key at https://froggo.pro/seo-intel
6
+ # SEO_INTEL_LICENSE=SI-xxxx-xxxx-xxxx-xxxx
7
+
8
+ # ── Analysis Model (cloud, pick one) ──────────────────────────────────────
9
+ # Gemini: Best value — 1M context, cheapest (~$0.01-0.05/analysis)
10
+ GEMINI_API_KEY=
11
+
12
+ # Claude: Best quality — nuanced strategic reasoning (~$0.10-0.30/analysis)
13
+ # ANTHROPIC_API_KEY=
14
+
15
+ # OpenAI: Solid all-around (~$0.05-0.15/analysis)
16
+ # OPENAI_API_KEY=
17
+
18
+ # DeepSeek: Budget cloud option (~$0.02-0.08/analysis)
19
+ # DEEPSEEK_API_KEY=
20
+
21
+ # ── OAuth (Google Search Console, Analytics) ─────────────────────────────
22
+ # Create at: https://console.cloud.google.com/apis/credentials
23
+ # Type: OAuth 2.0 Client ID → Desktop app
24
+ # GOOGLE_CLIENT_ID=
25
+ # GOOGLE_CLIENT_SECRET=
26
+
27
+ # ── Extraction Model (local Ollama) ───────────────────────────────────────
28
+ # Recommended: qwen3.5:9b (balanced), qwen3.5:4b (budget), qwen3.5:27b (quality)
29
+ OLLAMA_URL=http://localhost:11434
30
+ OLLAMA_MODEL=qwen3.5:9b
31
+ OLLAMA_CTX=8192
32
+ OLLAMA_TIMEOUT_MS=60000 # 60s default — increase to 120000 on slow hardware (BUG-008)
33
+
34
+ # Fallback Ollama host (optional — for LAN setups)
35
+ # OLLAMA_FALLBACK_URL=http://192.168.0.xxx:11434
36
+ # OLLAMA_FALLBACK_MODEL=qwen3:4b
37
+
38
+ # ── Crawler Settings ──────────────────────────────────────────────────────
39
+ CRAWL_DELAY_MS=1500
40
+ CRAWL_MAX_PAGES=50
41
+ CRAWL_TIMEOUT_MS=15000
package/LICENSE ADDED
@@ -0,0 +1,75 @@
1
+ SEO Intel — Dual License
2
+
3
+ Copyright (c) 2024-2026 froggo.pro
4
+
5
+ This project uses a dual license structure:
6
+
7
+ ================================================================================
8
+ PART 1 — MIT License (Open Source Layer)
9
+ ================================================================================
10
+
11
+ The following directories and their contents are licensed under the MIT License:
12
+
13
+ crawler/
14
+ db/
15
+ setup/
16
+ lib/
17
+ config/example.json
18
+ config/setup-wizard.js
19
+ cli.js (command routing and setup commands only)
20
+ server.js
21
+ .env.example
22
+
23
+ Permission is hereby granted, free of charge, to any person obtaining a copy
24
+ of the above software and associated documentation files (the "Software"), to
25
+ deal in the Software without restriction, including without limitation the
26
+ rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
27
+ sell copies of the Software, and to permit persons to whom the Software is
28
+ furnished to do so, subject to the following conditions:
29
+
30
+ The above copyright notice and this permission notice shall be included in all
31
+ copies or substantial portions of the Software.
32
+
33
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39
+ SOFTWARE.
40
+
41
+ ================================================================================
42
+ PART 2 — Commercial License (Proprietary Layer)
43
+ ================================================================================
44
+
45
+ The following directories and their contents are proprietary and require a
46
+ valid SEO Intel Solo license for use:
47
+
48
+ analysis/
49
+ extractor/
50
+ reports/generate-html.js
51
+ reports/gsc-loader.js
52
+
53
+ These files contain the competitive analysis prompts, intelligence logic,
54
+ and report generation code that form the core intellectual property of
55
+ SEO Intel.
56
+
57
+ You MAY NOT:
58
+ - Copy, modify, or redistribute these files
59
+ - Reverse-engineer the analysis prompts or logic
60
+ - Include these files in derivative works without a license
61
+
62
+ You MAY:
63
+ - Use these files as part of a validly licensed SEO Intel installation
64
+ - Run analyses and generate reports for your own use or client work
65
+ - Share generated reports and dashboards (outputs are yours)
66
+
67
+ License:
68
+ Solo — €19.99/month or €199/year — full AI analysis, all commands
69
+ Also available at $9.99/month via froggo.pro marketplace
70
+
71
+ Purchase at: https://ukkometa.fi or https://froggo.pro/seo-intel
72
+
73
+ ================================================================================
74
+
75
+ For questions: hello@froggo.pro
package/README.md ADDED
@@ -0,0 +1,243 @@
1
+ # SEO Intel
2
+
3
+ Local-first competitive SEO intelligence. Point it at your site + competitors, get keyword gaps, content audits, and visual dashboards. All data stays on your machine.
4
+
5
+ **Crawl → Extract (local AI) → Analyze (cloud AI) → Dashboard**
6
+
7
+ ```
8
+ Your site + competitors (Playwright crawler)
9
+ ↓ structured extraction
10
+ Qwen 3.5 via Ollama (local, free)
11
+ ↓ stored in
12
+ SQLite database (WAL mode)
13
+ ↓ competitive analysis
14
+ Gemini / Claude / GPT (your API key)
15
+ ↓ visual reports
16
+ Self-contained HTML dashboards (Chart.js)
17
+ ```
18
+
19
+ ## Quick Start
20
+
21
+ ```bash
22
+ # Install globally
23
+ npm install -g seo-intel
24
+
25
+ # Run the setup wizard (auto-detects OpenClaw for agent-powered setup)
26
+ seo-intel setup
27
+ ```
28
+
29
+ The setup wizard handles everything: dependency checks, model selection, API keys, project configuration, and pipeline validation.
30
+
31
+ ### Requirements
32
+
33
+ - **Node.js 22.5+** (uses built-in SQLite)
34
+ - **Ollama** with a Qwen model (for local extraction)
35
+ - **One API key** for analysis: Gemini (recommended), Claude, OpenAI, or DeepSeek
36
+
37
+ ### Manual Setup
38
+
39
+ ```bash
40
+ npm install -g seo-intel
41
+ seo-intel setup --classic # traditional CLI wizard
42
+ # or
43
+ seo-intel setup # agent-powered if OpenClaw is running
44
+ ```
45
+
46
+ ## Usage
47
+
48
+ ```bash
49
+ # Full pipeline
50
+ seo-intel crawl myproject # crawl target + competitors
51
+ seo-intel extract myproject # local AI extraction (Ollama)
52
+ seo-intel analyze myproject # competitive gap analysis
53
+ seo-intel html myproject # generate dashboard
54
+ seo-intel serve # open dashboard at localhost:3000
55
+
56
+ # Agentic exports — turn data into implementation briefs
57
+ seo-intel export-actions myproject --scope technical # free: broken links, missing schemas, orphans
58
+ seo-intel export-actions myproject --scope all # full: technical + competitive + suggestive
59
+ seo-intel competitive-actions myproject --vs rival.com # what competitors have that you don't
60
+ seo-intel suggest-usecases myproject --scope docs # infer what pages/docs should exist
61
+ ```
62
+
63
+ ## Commands
64
+
65
+ ### Free
66
+
67
+ | Command | Description |
68
+ |---------|-------------|
69
+ | `setup` | First-time wizard — auto-detects OpenClaw for agent-powered setup |
70
+ | `crawl <project>` | Crawl target + competitor sites |
71
+ | `status` | System status, crawl freshness, license info |
72
+ | `html <project>` | Generate crawl-only dashboard |
73
+ | `serve` | Start local dashboard server (port 3000) |
74
+ | `export-actions <project> --scope technical` | Technical SEO audit from crawl data |
75
+ | `schemas <project>` | Schema.org coverage analysis |
76
+ | `update` | Check for updates |
77
+
78
+ ### Solo (€19.99/mo · [ukkometa.fi/seo-intel](https://ukkometa.fi/seo-intel))
79
+
80
+ | Command | Description |
81
+ |---------|-------------|
82
+ | `extract <project>` | Local AI extraction via Ollama |
83
+ | `analyze <project>` | Full competitive gap analysis |
84
+ | `export-actions <project>` | All export scopes (technical + competitive + suggestive) |
85
+ | `competitive-actions <project>` | Competitive gap export with `--vs domain` filter |
86
+ | `suggest-usecases <project>` | Infer what pages/features to build from competitor data |
87
+ | `keywords <project>` | Keyword gap matrix |
88
+ | `run <project>` | Full pipeline: crawl → extract → analyze → report |
89
+ | `brief <project>` | AI content briefs for gap topics |
90
+ | `velocity <project>` | Content publishing velocity tracker |
91
+ | `shallow <project>` | Find "shallow champion" pages to outrank |
92
+ | `decay <project>` | Find stale competitor content |
93
+ | `headings-audit <project>` | H1-H6 structure analysis |
94
+ | `orphans <project>` | Orphaned pages detection |
95
+ | `entities <project>` | Entity coverage gap analysis |
96
+ | `friction <project>` | Conversion friction detection |
97
+ | `js-delta <project>` | JS-rendered vs raw HTML comparison |
98
+
99
+ ## Project Configuration
100
+
101
+ Create a project config in `config/`:
102
+
103
+ ```json
104
+ {
105
+ "project": "myproject",
106
+ "context": {
107
+ "siteName": "My Site",
108
+ "url": "https://example.com",
109
+ "industry": "Your industry description",
110
+ "audience": "Your target audience",
111
+ "goal": "Your SEO objective"
112
+ },
113
+ "target": {
114
+ "domain": "example.com",
115
+ "maxPages": 200,
116
+ "crawlMode": "standard"
117
+ },
118
+ "competitors": [
119
+ { "domain": "competitor1.com", "maxPages": 100 },
120
+ { "domain": "competitor2.com", "maxPages": 100 }
121
+ ]
122
+ }
123
+ ```
124
+
125
+ Or use the setup wizard: `seo-intel setup`
126
+
127
+ ### Managing Competitors
128
+
129
+ ```bash
130
+ seo-intel competitors myproject # list all
131
+ seo-intel competitors myproject --add new-rival.com
132
+ seo-intel competitors myproject --remove old-rival.com
133
+ ```
134
+
135
+ ## Web Setup Wizard
136
+
137
+ ```bash
138
+ seo-intel serve
139
+ # Open http://localhost:3000/setup
140
+ ```
141
+
142
+ The 6-step web wizard guides you through:
143
+ 1. **System Check** — Node, Ollama, Playwright, GPU detection
144
+ 2. **Models** — VRAM-based model recommendations
145
+ 3. **Project** — Target domain + competitors
146
+ 4. **Search Console** — CSV upload or OAuth API
147
+ 5. **Pipeline Test** — Validates the full pipeline
148
+ 6. **Done** — Your first CLI commands
149
+
150
+ If [OpenClaw](https://openclaw.ai) is running, you'll see an option for **agent-powered setup** that handles everything conversationally — including troubleshooting, dependency installation, and OAuth configuration.
151
+
152
+ ## Model Configuration
153
+
154
+ ### Extraction (local, free)
155
+
156
+ SEO Intel uses Ollama for local AI extraction. Edit `.env`:
157
+
158
+ ```bash
159
+ OLLAMA_URL=http://localhost:11434
160
+ OLLAMA_MODEL=qwen3.5:9b # recommended (needs 6GB+ VRAM)
161
+ OLLAMA_CTX=16384
162
+ ```
163
+
164
+ Model recommendations by VRAM:
165
+ - **3-4 GB** → `qwen3.5:4b`
166
+ - **6-8 GB** → `qwen3.5:9b` (recommended)
167
+ - **16+ GB** → `qwen3.5:27b`
168
+
169
+ ### Analysis (cloud, user's API key)
170
+
171
+ You need at least one API key in `.env`:
172
+
173
+ ```bash
174
+ GEMINI_API_KEY=your-key # recommended (~$0.01/analysis)
175
+ # or
176
+ ANTHROPIC_API_KEY=your-key # highest quality
177
+ # or
178
+ OPENAI_API_KEY=your-key # solid all-around
179
+ # or
180
+ DEEPSEEK_API_KEY=your-key # budget option
181
+ ```
182
+
183
+ ## Google Search Console
184
+
185
+ Upload your GSC data for ranking insights:
186
+
187
+ 1. Go to [Google Search Console](https://search.google.com/search-console)
188
+ 2. Export Performance data as CSV
189
+ 3. Upload via the web wizard or place CSVs in `gsc/<project>/`
190
+
191
+ ## License
192
+
193
+ ### Free Tier
194
+ - 1 project, 500 pages/domain
195
+ - Crawl, extract, setup, basic reports
196
+
197
+ ### Pro Tier ($49 one-time)
198
+ - Unlimited projects and pages
199
+ - All analysis commands, GSC insights, scheduling
200
+
201
+ ```bash
202
+ # Set your license key
203
+ echo "SEO_INTEL_LICENSE=SI-xxxx-xxxx-xxxx-xxxx" >> .env
204
+ ```
205
+
206
+ Get a key at [froggo.pro/seo-intel](https://froggo.pro/seo-intel)
207
+
208
+ ## Updates
209
+
210
+ ```bash
211
+ seo-intel update # check for updates
212
+ seo-intel update --apply # auto-apply via npm
213
+ ```
214
+
215
+ Updates are checked automatically in the background and shown at the end of `seo-intel status`.
216
+
217
+ ## Security
218
+
219
+ - All data stays local — no telemetry, no cloud sync
220
+ - Scraped content is HTML-stripped and sanitized before reaching any model
221
+ - Extraction outputs are validated against schema before DB insert
222
+ - API keys are stored in `.env` (gitignored)
223
+ - OAuth tokens stored in `.tokens/` (gitignored)
224
+
225
+ ## OpenClaw Integration
226
+
227
+ If you have [OpenClaw](https://openclaw.ai) installed:
228
+
229
+ ```bash
230
+ seo-intel setup # auto-detects gateway, uses agent
231
+ seo-intel setup --agent # require agent setup
232
+ seo-intel setup --classic # force manual wizard
233
+ ```
234
+
235
+ The OpenClaw agent provides:
236
+ - Conversational setup with real-time troubleshooting
237
+ - Automatic dependency installation
238
+ - Smart model recommendations
239
+ - Security update notifications
240
+
241
+ ---
242
+
243
+ Built by [froggo.pro](https://froggo.pro) — local-first SEO intelligence.
@@ -0,0 +1,9 @@
1
+ @echo off
2
+ title SEO Intel
3
+ echo.
4
+ echo Starting SEO Intel...
5
+ echo Dashboard will open in your browser.
6
+ echo.
7
+ cd /d "%~dp0"
8
+ node cli.js serve --open
9
+ pause
@@ -0,0 +1,8 @@
1
+ #!/bin/bash
2
+ # Double-click this file to launch SEO Intel dashboard
3
+ cd "$(dirname "$0")"
4
+ echo ""
5
+ echo " Starting SEO Intel..."
6
+ echo " Dashboard will open in your browser."
7
+ echo ""
8
+ node cli.js serve --open