seo-intel 1.5.39 → 1.5.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,127 @@
1
+ /**
2
+ * Lightweight HTML extractor — pure string/regex parsing. No DOM, no browser.
3
+ *
4
+ * Powers the fetch-based light crawler (crawler/light.js) so ANY Claude user can
5
+ * crawl + analyze a site with zero browser environment installed. Consistent
6
+ * with schema-parser.js's regex approach ("no DOM parser needed").
7
+ *
8
+ * Trade-off: not as bulletproof as a full DOM parse on adversarial markup, but
9
+ * more than good enough for SEO/AEO metadata (title, meta, headings, links,
10
+ * JSON-LD, dates). The full Playwright crawler stays the heavyweight option.
11
+ */
12
+
13
+ import { stripHtml } from './sanitize.js';
14
+ import { parseJsonLd } from './schema-parser.js';
15
+
16
+ function decodeEntities(s) {
17
+ return (s || '')
18
+ .replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>')
19
+ .replace(/&quot;/g, '"').replace(/&#0?39;/g, "'").replace(/&apos;/g, "'")
20
+ .replace(/&nbsp;/g, ' ')
21
+ .replace(/&#(\d+);/g, (_, n) => { try { return String.fromCodePoint(+n); } catch { return ' '; } })
22
+ .replace(/&#x([0-9a-f]+);/gi, (_, h) => { try { return String.fromCodePoint(parseInt(h, 16)); } catch { return ' '; } })
23
+ .trim();
24
+ }
25
+
26
+ const collapse = (s) => decodeEntities(stripHtml(s || '').replace(/\s+/g, ' '));
27
+
28
+ export function extractTitle(html) {
29
+ const m = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
30
+ return m ? decodeEntities(m[1].replace(/\s+/g, ' ')) : '';
31
+ }
32
+
33
+ // Find a <meta> tag by attribute (name|property) = value, then read its content.
34
+ function metaContent(html, attr, value) {
35
+ const re = new RegExp(`<meta\\b[^>]*\\b${attr}\\s*=\\s*["']${value}["'][^>]*>`, 'i');
36
+ const tag = html.match(re);
37
+ if (!tag) return '';
38
+ const c = tag[0].match(/\bcontent\s*=\s*["']([\s\S]*?)["']/i);
39
+ return c ? decodeEntities(c[1]) : '';
40
+ }
41
+
42
+ export function extractMetaDescription(html) {
43
+ return metaContent(html, 'name', 'description') || metaContent(html, 'property', 'og:description');
44
+ }
45
+
46
+ export function extractMetaRobots(html) {
47
+ return metaContent(html, 'name', 'robots').toLowerCase();
48
+ }
49
+
50
+ export function extractCanonical(html, baseUrl) {
51
+ const tag = html.match(/<link\b[^>]*\brel\s*=\s*["']canonical["'][^>]*>/i);
52
+ if (!tag) return '';
53
+ const h = tag[0].match(/\bhref\s*=\s*["']([^"']+)["']/i);
54
+ if (!h) return '';
55
+ try { return new URL(h[1], baseUrl).toString(); } catch { return h[1]; }
56
+ }
57
+
58
+ export function extractHeadings(html) {
59
+ const out = [];
60
+ const re = /<h([1-6])\b[^>]*>([\s\S]*?)<\/h\1>/gi;
61
+ let m;
62
+ while ((m = re.exec(html)) !== null) {
63
+ const text = collapse(m[2]);
64
+ if (text) out.push({ level: Number(m[1]), text: text.slice(0, 300) });
65
+ if (out.length >= 300) break;
66
+ }
67
+ return out;
68
+ }
69
+
70
+ export function extractLinks(html, baseUrl) {
71
+ const out = [];
72
+ const seen = new Set();
73
+ let base; try { base = new URL(baseUrl); } catch { base = null; }
74
+ const re = /<a\b[^>]*\bhref\s*=\s*["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi;
75
+ let m;
76
+ while ((m = re.exec(html)) !== null) {
77
+ let href = m[1].trim();
78
+ if (!href) continue;
79
+ if (/^(#|mailto:|tel:|javascript:|data:)/i.test(href)) continue;
80
+ let abs;
81
+ try { abs = base ? new URL(href, base).toString() : href; } catch { continue; }
82
+ abs = abs.split('#')[0];
83
+ if (seen.has(abs)) continue;
84
+ seen.add(abs);
85
+ let internal = false;
86
+ try { internal = !!base && new URL(abs).hostname === base.hostname; } catch { /* keep false */ }
87
+ out.push({ href: abs, text: collapse(m[2]).slice(0, 120), internal });
88
+ if (out.length >= 1000) break;
89
+ }
90
+ return out;
91
+ }
92
+
93
+ /**
94
+ * Parse one fetched HTML document into the structured shape the rest of
95
+ * SEO Intel speaks (mirrors the Playwright crawler's per-page object).
96
+ * @param {string} html
97
+ * @param {string} url - the (final) URL this HTML was fetched from
98
+ */
99
+ export function extractPageData(html, url) {
100
+ const schemas = parseJsonLd(html) || [];
101
+ const schemaTypes = [...new Set(schemas.map(s => s.type).filter(Boolean))];
102
+ let published = null, modified = null;
103
+ for (const s of schemas) {
104
+ if (!published && s.datePublished) published = s.datePublished;
105
+ if (!modified && s.dateModified) modified = s.dateModified;
106
+ }
107
+ const bodyText = stripHtml(html);
108
+ const wordCount = bodyText ? bodyText.split(/\s+/).filter(Boolean).length : 0;
109
+ const robots = extractMetaRobots(html);
110
+
111
+ return {
112
+ url,
113
+ title: extractTitle(html),
114
+ meta_desc: extractMetaDescription(html),
115
+ canonical: extractCanonical(html, url),
116
+ robots,
117
+ is_indexable: !/\bnoindex\b/.test(robots),
118
+ headings: extractHeadings(html),
119
+ links: extractLinks(html, url),
120
+ schema_types: schemaTypes,
121
+ schemas,
122
+ word_count: wordCount,
123
+ body_text: bodyText.slice(0, 20000),
124
+ published_date: published,
125
+ modified_date: modified,
126
+ };
127
+ }
@@ -0,0 +1,169 @@
1
+ /**
2
+ * Light crawler — fetch-based, zero-browser, zero-config, zero-signup.
3
+ *
4
+ * The "crawl for all Claude users" path: point it at a URL and it BFS-crawls
5
+ * same-origin pages with plain HTTP fetch (no Playwright, no browser download),
6
+ * returns structured SEO/AEO data entirely in memory. Nothing is persisted,
7
+ * nothing leaves the machine, no account required.
8
+ *
9
+ * Deliberately NOT a "massive crawl environment":
10
+ * - small page budget (default 10, hard cap 50)
11
+ * - same-origin only by default
12
+ * - honours robots.txt + crawl-delay (no tricks)
13
+ * - no JS rendering (use the full Playwright crawler for JS-heavy sites)
14
+ *
15
+ * For deep, persistent, JS-rendered crawls of a configured project, use the
16
+ * heavyweight crawler (`crawler/index.js` via `seo-intel crawl`).
17
+ */
18
+
19
+ import fetch from 'node-fetch';
20
+ import { checkRobots } from './robots.js';
21
+ import { extractPageData } from './html-extract.js';
22
+ import { scorePage } from '../analyses/aeo/scorer.js';
23
+
24
+ const HARD_CAP = 50;
25
+ const DEFAULT_UA = 'SEOIntelBot (+https://ukkometa.fi/seo-intel; light-crawl)';
26
+
27
+ function sleep(ms) { return new Promise(r => setTimeout(r, ms)); }
28
+
29
+ function normalizeStart(url) {
30
+ let u = url.trim();
31
+ if (!/^https?:\/\//i.test(u)) u = 'https://' + u;
32
+ return new URL(u).toString();
33
+ }
34
+
35
+ // Same-site key: hostname minus a leading "www." (and protocol-agnostic), so
36
+ // http↔https and www↔non-www redirects don't break same-origin link following.
37
+ function siteKey(u) {
38
+ try { return new URL(u).hostname.replace(/^www\./i, '').toLowerCase(); } catch { return null; }
39
+ }
40
+
41
+ /**
42
+ * @param {string} startUrl
43
+ * @param {object} [opts]
44
+ * @param {number} [opts.maxPages=10] pages to fetch (clamped to HARD_CAP)
45
+ * @param {boolean} [opts.sameOrigin=true] only follow links on the start origin
46
+ * @param {boolean} [opts.includeCitability=false] run the AEO scorer per page
47
+ * @param {boolean} [opts.respectRobots=true] honour robots.txt + crawl-delay
48
+ * @param {number} [opts.timeoutMs=10000] per-request timeout
49
+ * @param {number} [opts.maxDelayMs=3000] cap on politeness delay between requests
50
+ * @param {(msg:string)=>void} [opts.onProgress]
51
+ * @returns {Promise<object>} { start, origin, pages, skipped, stats }
52
+ */
53
+ export async function lightCrawl(startUrl, opts = {}) {
54
+ const {
55
+ maxPages = 10,
56
+ sameOrigin = true,
57
+ includeCitability = false,
58
+ respectRobots = true,
59
+ timeoutMs = 10000,
60
+ maxDelayMs = 3000,
61
+ onProgress,
62
+ } = opts;
63
+
64
+ const budget = Math.max(1, Math.min(maxPages, HARD_CAP));
65
+ let start;
66
+ try { start = normalizeStart(startUrl); } catch { throw new Error(`Invalid URL: ${startUrl}`); }
67
+ const origin = new URL(start).origin;
68
+
69
+ const siteRoot = siteKey(start);
70
+ const queue = [start];
71
+ const queued = new Set([start]);
72
+ const visited = new Set(); // FINAL (post-redirect) URLs actually processed
73
+ const pages = [];
74
+ const skipped = [];
75
+ const t0 = Date.now();
76
+
77
+ while (queue.length && pages.length < budget) {
78
+ const url = queue.shift();
79
+
80
+ if (respectRobots) {
81
+ let robot;
82
+ try { robot = await checkRobots(url); } catch { robot = { allowed: true, crawlDelayMs: 0 }; }
83
+ if (!robot.allowed) { skipped.push({ url, reason: 'robots_disallow' }); continue; }
84
+ }
85
+
86
+ let res, finalUrl = url, status = 0, html = '';
87
+ try {
88
+ res = await fetch(url, { timeout: timeoutMs, redirect: 'follow', headers: { 'User-Agent': DEFAULT_UA, Accept: 'text/html,application/xhtml+xml' } });
89
+ status = res.status;
90
+ finalUrl = res.url || url;
91
+ const ct = (res.headers.get('content-type') || '').toLowerCase();
92
+ if (res.ok && ct.includes('html')) {
93
+ html = await res.text();
94
+ } else {
95
+ skipped.push({ url, reason: res.ok ? `non_html (${ct || 'unknown'})` : `http_${status}`, status });
96
+ continue;
97
+ }
98
+ } catch (e) {
99
+ skipped.push({ url, reason: `fetch_error: ${e.message}` });
100
+ continue;
101
+ }
102
+
103
+ // Dedupe on the FINAL url — a redirect may collapse onto a page we already
104
+ // crawled (e.g. non-www start → www, then the page's own www self-link).
105
+ if (visited.has(finalUrl)) continue;
106
+ visited.add(finalUrl);
107
+ queued.add(finalUrl);
108
+
109
+ const data = extractPageData(html, finalUrl);
110
+ data.status_code = status;
111
+
112
+ if (includeCitability) {
113
+ try {
114
+ const cite = scorePage(
115
+ { url: data.url, title: data.title, body_text: data.body_text, word_count: data.word_count, published_date: data.published_date, modified_date: data.modified_date },
116
+ data.headings, [], data.schema_types, [], null
117
+ );
118
+ data.citability = { score: cite.score, tier: cite.tier, breakdown: cite.breakdown, ai_intents: cite.aiIntents };
119
+ } catch (e) {
120
+ data.citability = { error: e.message };
121
+ }
122
+ }
123
+
124
+ pages.push(data);
125
+ if (onProgress) onProgress(`[${pages.length}/${budget}] ${finalUrl} (${status}, ${data.word_count}w)`);
126
+
127
+ // Enqueue internal links for BFS
128
+ if (pages.length < budget) {
129
+ for (const link of data.links) {
130
+ if (!link.href || !/^https?:/i.test(link.href)) continue;
131
+ if (queued.has(link.href)) continue;
132
+ if (sameOrigin && siteKey(link.href) !== siteRoot) continue;
133
+ // skip obvious non-page assets
134
+ if (/\.(png|jpe?g|gif|svg|webp|ico|css|js|pdf|zip|mp4|woff2?|ttf)(\?|$)/i.test(link.href)) continue;
135
+ queued.add(link.href);
136
+ queue.push(link.href);
137
+ }
138
+ }
139
+
140
+ // Politeness delay between requests (honour robots crawl-delay, capped)
141
+ if (queue.length && pages.length < budget && respectRobots) {
142
+ let delay = 0;
143
+ try { delay = (await checkRobots(url)).crawlDelayMs || 0; } catch { delay = 0; }
144
+ if (delay) await sleep(Math.min(delay, maxDelayMs));
145
+ }
146
+ }
147
+
148
+ const indexable = pages.filter(p => p.is_indexable).length;
149
+ const withSchema = pages.filter(p => p.schema_types.length).length;
150
+ const missingTitle = pages.filter(p => !p.title).length;
151
+ const missingMeta = pages.filter(p => !p.meta_desc).length;
152
+
153
+ return {
154
+ start,
155
+ origin,
156
+ pages,
157
+ skipped,
158
+ stats: {
159
+ crawled: pages.length,
160
+ skipped: skipped.length,
161
+ queued_unvisited: Math.max(0, queue.length),
162
+ indexable,
163
+ with_schema: withSchema,
164
+ missing_title: missingTitle,
165
+ missing_meta_desc: missingMeta,
166
+ elapsed_ms: Date.now() - t0,
167
+ },
168
+ };
169
+ }
package/db/db.js CHANGED
@@ -328,6 +328,72 @@ export function updateInsightStatus(db, id, status) {
328
328
  db.prepare('UPDATE insights SET status = ? WHERE id = ?').run(status, id);
329
329
  }
330
330
 
331
+ // ── Agentic loop write-back (F1, v1.5.42) ───────────────────────────────────
332
+ //
333
+ // Closes the loop's memory gap: when a draft is actually produced, the Ledger
334
+ // should remember it. Two moves:
335
+ // 1. recordDraftCreated — persist a `draft_created` insight (idempotent per
336
+ // topic/type/lang) so "I drafted X" is durable and visible.
337
+ // 2. markGapsInProgress — flip matching ACTIVE gap insights to 'in_progress'
338
+ // so the same gap stops resurfacing in the next blog-draft pass.
339
+ // Both are best-effort and must never break draft generation.
340
+
341
+ /**
342
+ * Record that a draft was created targeting this project's Ledger.
343
+ * Idempotent: re-drafting the same (topic, content_type, lang) refreshes it.
344
+ * @returns {string} the fingerprint used
345
+ */
346
+ export function recordDraftCreated(db, project, { topic, score = null, tier = null, wordCount = null, lang = 'en', contentType = 'blog', savedPath = null } = {}) {
347
+ const ts = Date.now();
348
+ const normTopic = (topic || 'auto').toLowerCase().trim().slice(0, 120);
349
+ const fp = `draft:${contentType}:${lang}:${normTopic}`.replace(/[^a-z0-9:_-]+/g, '-');
350
+ const data = JSON.stringify({
351
+ topic: topic || '(auto)', score, tier, word_count: wordCount,
352
+ lang, content_type: contentType, saved_path: savedPath, created_at: ts,
353
+ });
354
+ db.prepare(`
355
+ INSERT INTO insights (project, type, status, fingerprint, first_seen, last_seen, source_analysis_id, data)
356
+ VALUES (?, 'draft_created', 'active', ?, ?, ?, NULL, ?)
357
+ ON CONFLICT(project, type, fingerprint) DO UPDATE SET
358
+ last_seen = excluded.last_seen,
359
+ data = excluded.data
360
+ `).run(project, fp, ts, ts, data);
361
+ return fp;
362
+ }
363
+
364
+ /**
365
+ * Flip ACTIVE gap insights matching `topic` to 'in_progress' so the loop stops
366
+ * re-suggesting work that's already been drafted. Precise substring match on
367
+ * each gap's key term (never a loose word-split that would over-match).
368
+ * Only touches drafting-relevant gap types — never positioning/site_watch/etc.
369
+ * @returns {number} count of insights marked
370
+ */
371
+ export function markGapsInProgress(db, project, topic) {
372
+ if (!topic || !topic.trim()) return 0;
373
+ const needle = topic.toLowerCase().trim();
374
+ const GAP_TYPES = ['keyword_gap', 'long_tail', 'content_gap', 'citability_gap', 'keyword_inventor'];
375
+ const placeholders = GAP_TYPES.map(() => '?').join(',');
376
+ const rows = db.prepare(
377
+ `SELECT id, data FROM insights WHERE project = ? AND status = 'active' AND type IN (${placeholders})`
378
+ ).all(project, ...GAP_TYPES);
379
+
380
+ const upd = db.prepare(`UPDATE insights SET status = 'in_progress', last_seen = ? WHERE id = ?`);
381
+ const ts = Date.now();
382
+ let marked = 0;
383
+ for (const r of rows) {
384
+ let keyTerm = '', fullText = '';
385
+ try {
386
+ const d = JSON.parse(r.data);
387
+ keyTerm = (d.keyword || d.phrase || d.topic || d.suggested_title || d.title || d.url || '').toLowerCase().trim();
388
+ fullText = [d.keyword, d.phrase, d.topic, d.suggested_title, d.title, d.url]
389
+ .filter(Boolean).join(' ').toLowerCase();
390
+ } catch { continue; }
391
+ const hit = (keyTerm && (needle.includes(keyTerm) || keyTerm.includes(needle))) || (fullText && fullText.includes(needle));
392
+ if (hit) { upd.run(ts, r.id); marked++; }
393
+ }
394
+ return marked;
395
+ }
396
+
331
397
  export function upsertDomain(db, { domain, project, role }) {
332
398
  const now = Date.now();
333
399
  return db.prepare(`
package/lib/cron.js ADDED
@@ -0,0 +1,108 @@
1
+ /**
2
+ * lib/cron.js — Install / remove the daily `seo-intel notify` cron entry.
3
+ *
4
+ * The "user forgets to check SEO" defense from v1.5.34's delivery brainstorm.
5
+ * Adds a single managed crontab line tagged with a marker comment so we can
6
+ * find and replace/remove our own entry without touching the user's other
7
+ * cron jobs.
8
+ *
9
+ * macOS + Linux: uses crontab(1). On macOS the first install will prompt the
10
+ * user to approve calendar/automation access via the system permission dialog
11
+ * — that's normal, nothing we can do about it.
12
+ *
13
+ * Windows: returns ok:false with a hint pointing at Task Scheduler. Out of
14
+ * scope for v1.5.40.
15
+ */
16
+
17
+ import { spawnSync } from 'child_process';
18
+ import { fileURLToPath } from 'url';
19
+ import { dirname, join } from 'path';
20
+
21
+ const __dirname = dirname(fileURLToPath(import.meta.url));
22
+ const ROOT = join(__dirname, '..');
23
+ const NODE_BIN = process.execPath;
24
+ const MARKER = '# managed-by-seo-intel';
25
+
26
+ export const DEFAULT_SCHEDULE = '0 9 * * *'; // 9am every day
27
+
28
+ function readCrontab() {
29
+ const r = spawnSync('crontab', ['-l'], { encoding: 'utf8' });
30
+ if (r.status === 0) return r.stdout || '';
31
+ // status !== 0 typically means "no crontab for user yet" — return empty
32
+ return '';
33
+ }
34
+
35
+ function writeCrontab(content) {
36
+ const text = (content || '').replace(/\n*$/, '\n'); // ensure single trailing newline
37
+ const r = spawnSync('crontab', ['-'], { input: text, encoding: 'utf8' });
38
+ if (r.status !== 0) {
39
+ throw new Error(`crontab write failed: ${r.stderr || 'unknown error'}`);
40
+ }
41
+ }
42
+
43
+ function isWindows() { return process.platform === 'win32'; }
44
+
45
+ /**
46
+ * @returns {{ installed: boolean, line: string|null, schedule: string|null, platform: string }}
47
+ */
48
+ export function getNotifyCronStatus() {
49
+ if (isWindows()) return { installed: false, line: null, schedule: null, platform: 'win32' };
50
+ const lines = readCrontab().split('\n').filter(l => l.includes(MARKER));
51
+ if (!lines.length) return { installed: false, line: null, schedule: null, platform: process.platform };
52
+ const line = lines[0];
53
+ // Schedule is the first 5 space-separated fields
54
+ const parts = line.trim().split(/\s+/);
55
+ const schedule = parts.slice(0, 5).join(' ');
56
+ return { installed: true, line, schedule, platform: process.platform };
57
+ }
58
+
59
+ /**
60
+ * Install (or replace) the managed cron line.
61
+ *
62
+ * @param {object} [opts]
63
+ * @param {string} [opts.schedule] Cron schedule, default DEFAULT_SCHEDULE (9am daily)
64
+ * @param {boolean} [opts.openOnFire] Append `--open` flag so the dashboard opens when fired
65
+ * @returns {{ ok: boolean, line?: string, schedule?: string, error?: string, hint?: string }}
66
+ */
67
+ export function installNotifyCron({ schedule = DEFAULT_SCHEDULE, openOnFire = false } = {}) {
68
+ if (isWindows()) {
69
+ return {
70
+ ok: false,
71
+ error: 'Windows not supported — use Task Scheduler manually',
72
+ hint: `Create a daily task running: ${NODE_BIN} "${join(ROOT, 'cli.js')}" notify`,
73
+ };
74
+ }
75
+ // Sanity-check schedule (5 fields, no shell metachars)
76
+ if (!/^[-*\/0-9, ]+$/.test(schedule) || schedule.trim().split(/\s+/).length !== 5) {
77
+ return { ok: false, error: `Invalid cron schedule "${schedule}". Expected 5 fields, e.g. "0 9 * * *".` };
78
+ }
79
+ const cmd = `cd ${ROOT} && ${NODE_BIN} cli.js notify${openOnFire ? ' --open' : ''}`;
80
+ const newLine = `${schedule} ${cmd} ${MARKER}`;
81
+ const current = readCrontab();
82
+ const kept = current.split('\n').filter(l => l && !l.includes(MARKER));
83
+ kept.push(newLine);
84
+ try {
85
+ writeCrontab(kept.join('\n'));
86
+ return { ok: true, line: newLine, schedule };
87
+ } catch (e) {
88
+ return { ok: false, error: e.message };
89
+ }
90
+ }
91
+
92
+ /**
93
+ * Remove the managed cron line (if any). Idempotent.
94
+ * @returns {{ ok: boolean, removed: boolean, error?: string }}
95
+ */
96
+ export function removeNotifyCron() {
97
+ if (isWindows()) return { ok: true, removed: false }; // nothing to remove
98
+ const current = readCrontab();
99
+ const before = current.split('\n').filter(Boolean).length;
100
+ const kept = current.split('\n').filter(l => l && !l.includes(MARKER));
101
+ if (kept.length === before) return { ok: true, removed: false };
102
+ try {
103
+ writeCrontab(kept.join('\n'));
104
+ return { ok: true, removed: true };
105
+ } catch (e) {
106
+ return { ok: false, removed: false, error: e.message };
107
+ }
108
+ }
package/lib/gate.js CHANGED
@@ -65,6 +65,36 @@ const FEATURE_NAMES = {
65
65
  'intel-competitor': 'Intel Competitor Digest (AI-agent-ready)',
66
66
  };
67
67
 
68
+ // ── Free features (v1.5.41 monetization line) ───────────────────────────────
69
+ //
70
+ // Analysis of YOUR OWN site is free: a capable agent commoditizes one-shot
71
+ // analysis anyway, so we give it away to make the free tier a genuine
72
+ // "complete SEO brain" — local, private, zero flagship tokens on grunt work.
73
+ //
74
+ // The paywall sits on what an agent structurally CANNOT do for itself:
75
+ // • Competitors — analyze, shallow, decay, headings-audit, entities,
76
+ // friction, competitive, gap-intel, intel-competitor
77
+ // • Automation — run (scheduler)
78
+ // • History — brief ("what changed"), velocity (publish-rate over time)
79
+ //
80
+ // Anything listed here passes requirePro() regardless of license tier.
81
+ const FREE_FEATURES = new Set([
82
+ 'extract', // local Ollama labor — the grunt work that powers the free brain
83
+ 'aeo', // AI citability scoring (pure function, own site)
84
+ 'keywords', // keyword intelligence (own site)
85
+ 'templates', // programmatic template detection (own site)
86
+ 'orphans', // orphan entity detection (own site)
87
+ 'js-delta', // JS rendering delta (own site, technical)
88
+ 'blog-draft', // AEO blog draft from the Ledger (own site, content)
89
+ 'html', // HTML dashboard
90
+ 'html-all', // HTML dashboard (all projects)
91
+ 'gsc-insights', // Search Console intelligence (own site)
92
+ 'intel-audit', // agent-ready audit digest (own site)
93
+ 'intel-blog', // agent-ready blog digest (own site)
94
+ 'scan', // one-shot single-domain audit — the first-touch "try it" command
95
+ 'loop', // content-loop orchestrator (own-site gaps → draft → queue)
96
+ ]);
97
+
68
98
  // ── CLI Gate — blocks command and shows upgrade message ──────────────────────
69
99
 
70
100
  /**
@@ -75,6 +105,7 @@ const FEATURE_NAMES = {
75
105
  * @returns {boolean}
76
106
  */
77
107
  export function requirePro(feature) {
108
+ if (FREE_FEATURES.has(feature)) return true;
78
109
  if (isPro()) return true;
79
110
 
80
111
  const displayName = FEATURE_NAMES[feature] || feature;
@@ -178,7 +209,8 @@ export function printLicenseStatus() {
178
209
  if (license.stale) console.log(`\x1b[33m ⚠ License cache stale — will re-validate on next network access${RESET}`);
179
210
  } else {
180
211
  console.log(`${DIM} SEO Intel Free${RESET}`);
181
- console.log(`${DIM} Unlimited crawl · Raw SQLite data · No AI analysis · No dashboard${RESET}`);
212
+ console.log(`${DIM} Unlimited crawl · Full site analysis · Dashboard · Local & private${RESET}`);
213
+ console.log(`${DIM} Solo adds: competitors · scheduled crawls · history & trends${RESET}`);
182
214
  if (license.invalidKey) {
183
215
  console.log(`\x1b[33m ⚠ ${license.reason}${RESET}`);
184
216
  }
package/lib/intel.js CHANGED
@@ -8,10 +8,14 @@
8
8
  *
9
9
  * Slices:
10
10
  * raw (free) — page/keyword/heading inventory, no analysis
11
- * audit (paid) — citability + technical + active insights
12
- * blog (paid) — gaps + tone hints for drafting
11
+ * audit (free) — citability + technical + active insights (your own site)
12
+ * blog (free) — gaps + tone hints for drafting (your own site)
13
13
  * competitor (paid) — competitor summary + schema landscape
14
14
  *
15
+ * Monetization line (v1.5.41): analysis of YOUR OWN site is free — a smart
16
+ * agent commoditizes one-shot analysis anyway. The paywall sits on the things
17
+ * an agent structurally can't do for itself: competitors, automation, history.
18
+ *
15
19
  * Output is a stable structured object — agents should be able to chain calls
16
20
  * without prompt gymnastics. Keep the schema additive across versions.
17
21
  */
@@ -26,7 +30,9 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
26
30
  const VERSION = JSON.parse(readFileSync(join(__dirname, '..', 'package.json'), 'utf8')).version;
27
31
 
28
32
  export const INTEL_SLICES = ['raw', 'audit', 'blog', 'competitor'];
29
- export const FREE_SLICES = ['raw'];
33
+ // Own-site slices are free; only the competitor slice (data the agent can't
34
+ // gather on its own) requires Solo.
35
+ export const FREE_SLICES = ['raw', 'audit', 'blog'];
30
36
 
31
37
  /**
32
38
  * @param {import('node:sqlite').DatabaseSync} db