seo-intel 1.5.2 → 1.5.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Start SEO Intel.command +10 -0
- package/analyses/blog-draft/index.js +62 -10
- package/cli.js +239 -0
- package/lib/scan-export.js +180 -0
- package/package.json +1 -1
- package/reports/generate-html.js +490 -44
- package/server.js +294 -17
package/Start SEO Intel.command
CHANGED
|
@@ -5,4 +5,14 @@ echo ""
|
|
|
5
5
|
echo " Starting SEO Intel..."
|
|
6
6
|
echo " Dashboard will open in your browser."
|
|
7
7
|
echo ""
|
|
8
|
+
|
|
9
|
+
# Kill any stale server on the same port so new code is always loaded
|
|
10
|
+
PORT="${SEO_INTEL_PORT:-3000}"
|
|
11
|
+
OLD_PID=$(lsof -ti :"$PORT" 2>/dev/null)
|
|
12
|
+
if [ -n "$OLD_PID" ]; then
|
|
13
|
+
echo " Restarting server (killing stale PID $OLD_PID on port $PORT)..."
|
|
14
|
+
kill "$OLD_PID" 2>/dev/null
|
|
15
|
+
sleep 1
|
|
16
|
+
fi
|
|
17
|
+
|
|
8
18
|
node cli.js serve --open
|
|
@@ -89,9 +89,10 @@ export function gatherBlogDraftContext(db, project, topic = null) {
|
|
|
89
89
|
|
|
90
90
|
// ── Prompt Builder ──────────────────────────────────────────────────────────
|
|
91
91
|
|
|
92
|
-
export function buildBlogDraftPrompt(context, { config, lang = 'en', topic = null }) {
|
|
92
|
+
export function buildBlogDraftPrompt(context, { config, lang = 'en', topic = null, contentType = 'blog' }) {
|
|
93
93
|
const { longTails, keywordGaps, citabilityGaps, entityRows, topCitablePages, kwInventor, contentGaps, insights } = context;
|
|
94
94
|
const isFi = lang === 'fi';
|
|
95
|
+
const langName = isFi ? 'Finnish' : 'English';
|
|
95
96
|
|
|
96
97
|
// Extract unique entities from extraction data
|
|
97
98
|
const allEntities = new Set();
|
|
@@ -103,11 +104,19 @@ export function buildBlogDraftPrompt(context, { config, lang = 'en', topic = nul
|
|
|
103
104
|
}
|
|
104
105
|
const topEntities = [...allEntities].slice(0, 15);
|
|
105
106
|
|
|
106
|
-
// ── Section 1: Role ──
|
|
107
|
+
// ── Section 1: Role — adapts to content type ──
|
|
108
|
+
const typeInstructions = {
|
|
109
|
+
blog: `Your task: write a complete, publish-ready blog post draft in ${langName}.
|
|
110
|
+
The post must score 70+ on the AEO citability scale (entity authority, structured claims, answer density, Q&A proximity, freshness signals, schema coverage).`,
|
|
111
|
+
docs: `Your task: write a complete, publish-ready documentation page in ${langName}.
|
|
112
|
+
The page must be technically precise, well-structured, and scannable. Use step-by-step instructions where applicable. Optimise for developers and technical users searching for how-to answers.`,
|
|
113
|
+
social: `Your task: write a set of social media posts in ${langName}.
|
|
114
|
+
Create 5-7 distinct posts suitable for LinkedIn/X/Twitter. Each should be self-contained, engaging, and drive traffic to the site. Include hashtag suggestions. Vary formats: thread opener, hot take, stat-based, question-based, listicle.`,
|
|
115
|
+
};
|
|
116
|
+
|
|
107
117
|
let prompt = `You are an expert content strategist and copywriter specialising in AEO (Answer Engine Optimisation).
|
|
108
118
|
|
|
109
|
-
|
|
110
|
-
The post must score 70+ on the AEO citability scale (entity authority, structured claims, answer density, Q&A proximity, freshness signals, schema coverage).
|
|
119
|
+
${typeInstructions[contentType] || typeInstructions.blog}
|
|
111
120
|
|
|
112
121
|
`;
|
|
113
122
|
|
|
@@ -183,12 +192,46 @@ The post must score 70+ on the AEO citability scale (entity authority, structure
|
|
|
183
192
|
}
|
|
184
193
|
}
|
|
185
194
|
|
|
186
|
-
// ── Section 5:
|
|
187
|
-
|
|
195
|
+
// ── Section 5: Structural requirements — adapts to content type ──
|
|
196
|
+
const siteName = config.context?.siteName || config.target?.domain;
|
|
197
|
+
const today = new Date().toISOString().slice(0, 10);
|
|
198
|
+
|
|
199
|
+
if (contentType === 'social') {
|
|
200
|
+
prompt += `
|
|
201
|
+
## Social Media Requirements
|
|
202
|
+
|
|
203
|
+
1. Create 5-7 distinct posts, each separated by ---
|
|
204
|
+
2. Each post must be self-contained (not a thread unless marked as such)
|
|
205
|
+
3. Include a mix of: hot takes, statistics/data, questions, listicles, how-to snippets
|
|
206
|
+
4. Optimise for engagement: hooks in the first line, clear value proposition
|
|
207
|
+
5. Include 3-5 relevant hashtags per post
|
|
208
|
+
6. Keep posts under 280 characters for X/Twitter variants; LinkedIn variants can be longer (600-1,200 chars)
|
|
209
|
+
7. Reference ${siteName} naturally where appropriate (not in every post)
|
|
210
|
+
8. Include one thread idea (3-5 connected posts) marked with [THREAD]
|
|
211
|
+
9. Language: ${isFi ? 'Finnish' : 'English'}
|
|
212
|
+
`;
|
|
213
|
+
} else if (contentType === 'docs') {
|
|
214
|
+
prompt += `
|
|
215
|
+
## Documentation Requirements
|
|
216
|
+
|
|
217
|
+
The page MUST include:
|
|
218
|
+
1. YAML frontmatter with: title, slug, description (155 chars max), primary_keyword, secondary_keywords[], date (${today}), lang (${lang}), tags[]
|
|
219
|
+
2. An H1 that clearly states what this page covers
|
|
220
|
+
3. A 1-2 sentence overview immediately after the H1
|
|
221
|
+
4. Prerequisites section (if applicable)
|
|
222
|
+
5. Step-by-step instructions with numbered lists
|
|
223
|
+
6. Code examples with language-tagged fenced code blocks
|
|
224
|
+
7. At least one table for reference data (parameters, options, etc.)
|
|
225
|
+
8. A "Troubleshooting" or "Common Issues" section at the end
|
|
226
|
+
9. Word count: 800-2,000 words
|
|
227
|
+
10. Internal link suggestions: include 2-3 \`[anchor text](URL)\` links to related pages
|
|
228
|
+
`;
|
|
229
|
+
} else {
|
|
230
|
+
prompt += `
|
|
188
231
|
## AEO Structural Requirements
|
|
189
232
|
|
|
190
233
|
The draft MUST include:
|
|
191
|
-
1. YAML frontmatter with: title, slug, description (155 chars max), primary_keyword, secondary_keywords[], date (${
|
|
234
|
+
1. YAML frontmatter with: title, slug, description (155 chars max), primary_keyword, secondary_keywords[], date (${today}), updated (same), lang (${lang}), tags[]${!topic ? ', topic_selection_rationale' : ''}
|
|
192
235
|
2. An H1 that contains the primary keyword
|
|
193
236
|
3. A 2-3 sentence summary immediately after the H1 (answer-first structure — inverted pyramid). This paragraph will be cited by AI assistants.
|
|
194
237
|
4. Minimum 6 H2 subheadings
|
|
@@ -196,10 +239,11 @@ The draft MUST include:
|
|
|
196
239
|
6. At least one numbered or bulleted list with 4+ items
|
|
197
240
|
7. At least one "X is Y because Z" definitional sentence per major concept
|
|
198
241
|
8. A FAQ section at the end with minimum 4 Q&A pairs (### H3 questions, 2-4 sentence answers)
|
|
199
|
-
9. A closing CTA paragraph referencing ${
|
|
242
|
+
9. A closing CTA paragraph referencing ${siteName}
|
|
200
243
|
10. Word count: 1,200-2,000 words
|
|
201
244
|
11. Internal link suggestions: include 2-3 \`[anchor text](URL)\` links back to the site where natural
|
|
202
245
|
`;
|
|
246
|
+
}
|
|
203
247
|
|
|
204
248
|
// ── Section 6: Language ──
|
|
205
249
|
if (isFi) {
|
|
@@ -217,11 +261,19 @@ Write in clear, direct international English. No filler phrases. No "in today's
|
|
|
217
261
|
}
|
|
218
262
|
|
|
219
263
|
// ── Section 7: Output format ──
|
|
220
|
-
|
|
264
|
+
if (contentType === 'social') {
|
|
265
|
+
prompt += `
|
|
266
|
+
## Output Format
|
|
267
|
+
|
|
268
|
+
Respond with ONLY the social media posts. Separate each post with ---. No explanation before or after. No triple backticks wrapping the response.
|
|
269
|
+
`;
|
|
270
|
+
} else {
|
|
271
|
+
prompt += `
|
|
221
272
|
## Output Format
|
|
222
273
|
|
|
223
|
-
Respond with ONLY the complete markdown document. Start with --- (YAML frontmatter open fence). End with the FAQ section and CTA. No explanation before or after. No triple backticks wrapping the response.
|
|
274
|
+
Respond with ONLY the complete markdown document. Start with --- (YAML frontmatter open fence). End with the ${contentType === 'docs' ? 'Troubleshooting section' : 'FAQ section and CTA'}. No explanation before or after. No triple backticks wrapping the response.
|
|
224
275
|
`;
|
|
276
|
+
}
|
|
225
277
|
|
|
226
278
|
return prompt;
|
|
227
279
|
}
|
package/cli.js
CHANGED
|
@@ -4439,6 +4439,7 @@ program
|
|
|
4439
4439
|
.description('Generate an AEO-optimised blog post draft from Intelligence Ledger data')
|
|
4440
4440
|
.option('--topic <keyword>', 'Focus the post on a specific topic')
|
|
4441
4441
|
.option('--lang <code>', 'Language: en or fi', 'en')
|
|
4442
|
+
.option('--type <type>', 'Content type: blog, docs, or social', 'blog')
|
|
4442
4443
|
.option('--model <name>', 'Model to use for generation (gemini, claude, gpt, deepseek)', 'gemini')
|
|
4443
4444
|
.option('--save', 'Save the generated draft to reports/')
|
|
4444
4445
|
.action(async (project, opts) => {
|
|
@@ -4480,6 +4481,7 @@ program
|
|
|
4480
4481
|
config,
|
|
4481
4482
|
lang: opts.lang,
|
|
4482
4483
|
topic: opts.topic || null,
|
|
4484
|
+
contentType: opts.type || 'blog',
|
|
4483
4485
|
});
|
|
4484
4486
|
console.log(chalk.gray(` Prompt size: ${(prompt.length / 1024).toFixed(1)}KB`));
|
|
4485
4487
|
|
|
@@ -4771,6 +4773,243 @@ program.hook('preAction', async () => {
|
|
|
4771
4773
|
}
|
|
4772
4774
|
});
|
|
4773
4775
|
|
|
4776
|
+
// ── SCAN — One-shot full audit ────────────────────────────────────────────────
|
|
4777
|
+
program
|
|
4778
|
+
.command('scan <domain>')
|
|
4779
|
+
.description('One-shot full audit: crawl → extract → analyze → export (no config needed)')
|
|
4780
|
+
.option('--pages <n>', 'Max pages to crawl', '100')
|
|
4781
|
+
.option('--no-ai', 'Skip AI-enriched export (deterministic only)')
|
|
4782
|
+
.option('--model <name>', 'Model for analysis + AI export (gemini, claude, gpt)', 'gemini')
|
|
4783
|
+
.option('--no-stealth', 'Disable stealth browser mode')
|
|
4784
|
+
.action(async (domainInput, opts) => {
|
|
4785
|
+
if (!requirePro('scan')) return;
|
|
4786
|
+
|
|
4787
|
+
// ── Parse domain ──
|
|
4788
|
+
const domain = domainInput.replace(/^https?:\/\//, '').replace(/\/.*$/, '').replace(/^www\./, '');
|
|
4789
|
+
const projectSlug = '_scan-' + domain.replace(/[^a-z0-9]/gi, '-').toLowerCase();
|
|
4790
|
+
const siteUrl = defaultSiteUrl(domain);
|
|
4791
|
+
const useStealth = opts.stealth !== false;
|
|
4792
|
+
const useAi = opts.ai !== false;
|
|
4793
|
+
const maxPages = Math.min(parseInt(opts.pages) || 100, capPages(9999));
|
|
4794
|
+
|
|
4795
|
+
console.log(chalk.bold.hex('#d4af37')(`\n${'═'.repeat(60)}`));
|
|
4796
|
+
console.log(chalk.bold.hex('#d4af37')(` ⚡ SCAN — Full SEO Audit`));
|
|
4797
|
+
console.log(chalk.bold.hex('#d4af37')(`${'═'.repeat(60)}`));
|
|
4798
|
+
console.log('');
|
|
4799
|
+
console.log(chalk.white(` Target: ${chalk.bold(domain)}`));
|
|
4800
|
+
console.log(chalk.white(` Pages: ${maxPages}`));
|
|
4801
|
+
console.log(chalk.white(` Stealth: ${useStealth ? chalk.green('yes') : chalk.gray('no')}`));
|
|
4802
|
+
console.log(chalk.white(` AI Export: ${useAi ? chalk.green('yes') : chalk.gray('no')}`));
|
|
4803
|
+
console.log(chalk.white(` Model: ${opts.model}`));
|
|
4804
|
+
console.log('');
|
|
4805
|
+
|
|
4806
|
+
const scanStart = Date.now();
|
|
4807
|
+
const db = getDb();
|
|
4808
|
+
|
|
4809
|
+
// ── Ephemeral config ──
|
|
4810
|
+
const config = {
|
|
4811
|
+
project: projectSlug,
|
|
4812
|
+
context: { siteName: domain, industry: '', audience: '', goal: '' },
|
|
4813
|
+
target: { domain, url: siteUrl, maxPages, crawlMode: 'standard' },
|
|
4814
|
+
competitors: [],
|
|
4815
|
+
owned: [],
|
|
4816
|
+
};
|
|
4817
|
+
// Save config so dashboard/export functions work
|
|
4818
|
+
const configPath = join(__dirname, `config/${projectSlug}.json`);
|
|
4819
|
+
writeFileSync(configPath, JSON.stringify(config, null, 2), 'utf8');
|
|
4820
|
+
|
|
4821
|
+
applyExtractionRuntimeConfig(config);
|
|
4822
|
+
|
|
4823
|
+
// ── Step 1: Crawl + Extract ──
|
|
4824
|
+
console.log(chalk.bold.cyan(' ⏱ Step 1/3 — Crawl + Extract'));
|
|
4825
|
+
console.log('');
|
|
4826
|
+
|
|
4827
|
+
// Check extraction availability
|
|
4828
|
+
let doExtract = true;
|
|
4829
|
+
const ollamaAvailable = await checkOllamaAvailability(config);
|
|
4830
|
+
if (!ollamaAvailable) {
|
|
4831
|
+
console.log(chalk.yellow(' ⚠ No AI extraction available (Ollama unreachable)'));
|
|
4832
|
+
console.log(chalk.gray(' → Crawl-only mode — body text still captured for analysis'));
|
|
4833
|
+
console.log('');
|
|
4834
|
+
doExtract = false;
|
|
4835
|
+
}
|
|
4836
|
+
|
|
4837
|
+
upsertDomain(db, { domain, project: projectSlug, role: 'target' });
|
|
4838
|
+
const domainId = db.prepare('SELECT id FROM domains WHERE domain = ? AND project = ?').get(domain, projectSlug)?.id;
|
|
4839
|
+
|
|
4840
|
+
let pageCount = 0, extracted = 0, failed = 0;
|
|
4841
|
+
const tag = chalk.cyan(`[${domain.split('.')[0]}]`);
|
|
4842
|
+
|
|
4843
|
+
for await (const page of crawlDomain(siteUrl, { maxPages, stealth: useStealth, tiered: true })) {
|
|
4844
|
+
if (page._blocked) {
|
|
4845
|
+
console.log(chalk.bold.red(` ${tag} ⛔ BLOCKED: ${page._blockReason}`));
|
|
4846
|
+
break;
|
|
4847
|
+
}
|
|
4848
|
+
|
|
4849
|
+
const pageRes = upsertPage(db, {
|
|
4850
|
+
domainId, url: page.url, statusCode: page.status,
|
|
4851
|
+
wordCount: page.wordCount, loadMs: page.loadMs,
|
|
4852
|
+
isIndexable: page.isIndexable, clickDepth: page.depth ?? 0,
|
|
4853
|
+
publishedDate: page.publishedDate || null, modifiedDate: page.modifiedDate || null,
|
|
4854
|
+
contentHash: page.contentHash || null, title: page.title || null,
|
|
4855
|
+
metaDesc: page.metaDesc || null, bodyText: page.fullBodyText || page.bodyText || null,
|
|
4856
|
+
});
|
|
4857
|
+
const pageId = pageRes?.id;
|
|
4858
|
+
|
|
4859
|
+
upsertTechnical(db, { pageId, hasCanonical: page.hasCanonical, hasOgTags: page.hasOgTags, hasSchema: page.hasSchema, hasRobots: page.hasRobots });
|
|
4860
|
+
insertHeadings(db, pageId, page.headings);
|
|
4861
|
+
insertLinks(db, pageId, page.links);
|
|
4862
|
+
if (page.parsedSchemas?.length) insertPageSchemas(db, pageId, page.parsedSchemas);
|
|
4863
|
+
|
|
4864
|
+
if (doExtract) {
|
|
4865
|
+
process.stdout.write(chalk.gray(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 60)} → extracting...`));
|
|
4866
|
+
try {
|
|
4867
|
+
const extractFn = await getExtractPage();
|
|
4868
|
+
const extraction = await extractFn(page);
|
|
4869
|
+
insertExtraction(db, { pageId, data: extraction });
|
|
4870
|
+
insertKeywords(db, pageId, extraction.keywords);
|
|
4871
|
+
process.stdout.write(chalk.green(` ✓\n`));
|
|
4872
|
+
extracted++;
|
|
4873
|
+
} catch (err) {
|
|
4874
|
+
process.stdout.write(chalk.red(` ✗ ${err.message}\n`));
|
|
4875
|
+
failed++;
|
|
4876
|
+
}
|
|
4877
|
+
} else {
|
|
4878
|
+
process.stdout.write(chalk.gray(` ${tag} [${pageCount + 1}] d${page.depth ?? 0} ${page.url.slice(0, 65)} ✓\n`));
|
|
4879
|
+
}
|
|
4880
|
+
pageCount++;
|
|
4881
|
+
}
|
|
4882
|
+
|
|
4883
|
+
const crawlSec = ((Date.now() - scanStart) / 1000).toFixed(1);
|
|
4884
|
+
console.log(chalk.green(`\n ✅ Crawl done: ${pageCount} pages, ${extracted} extracted (${crawlSec}s)\n`));
|
|
4885
|
+
|
|
4886
|
+
// ── Step 2: Analyze ──
|
|
4887
|
+
console.log(chalk.bold.cyan(' ⏱ Step 2/3 — Analyze'));
|
|
4888
|
+
console.log('');
|
|
4889
|
+
|
|
4890
|
+
const summary = getCompetitorSummary(db, projectSlug);
|
|
4891
|
+
const target = summary.find(s => s.role === 'target');
|
|
4892
|
+
|
|
4893
|
+
if (!target) {
|
|
4894
|
+
console.log(chalk.yellow(' ⚠ No target data found — skipping analysis'));
|
|
4895
|
+
} else {
|
|
4896
|
+
target.domain = domain;
|
|
4897
|
+
const keywordMatrix = getKeywordMatrix(db, projectSlug);
|
|
4898
|
+
const headings = getHeadingStructure(db, projectSlug);
|
|
4899
|
+
|
|
4900
|
+
const buildPromptFn = await getBuildAnalysisPrompt();
|
|
4901
|
+
const prompt = buildPromptFn({
|
|
4902
|
+
project: projectSlug, target, competitors: [],
|
|
4903
|
+
keywordMatrix, headingStructure: headings, context: config.context,
|
|
4904
|
+
});
|
|
4905
|
+
|
|
4906
|
+
console.log(chalk.gray(` Prompt: ~${Math.round(prompt.length / 4)} tokens → ${opts.model}...`));
|
|
4907
|
+
process.env._SEO_INTEL_PROJECT = projectSlug;
|
|
4908
|
+
const result = await callAnalysisModel(prompt, opts.model);
|
|
4909
|
+
|
|
4910
|
+
if (result) {
|
|
4911
|
+
try {
|
|
4912
|
+
const jsonMatch = result.match(/\{[\s\S]*\}/);
|
|
4913
|
+
const analysis = JSON.parse(jsonMatch[0]);
|
|
4914
|
+
|
|
4915
|
+
// Save to DB
|
|
4916
|
+
const analysisTs = Date.now();
|
|
4917
|
+
db.prepare(`
|
|
4918
|
+
INSERT INTO analyses (project, generated_at, model, keyword_gaps, long_tails, quick_wins, new_pages, content_gaps, positioning, technical_gaps, raw)
|
|
4919
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
4920
|
+
`).run(
|
|
4921
|
+
projectSlug, analysisTs, opts.model,
|
|
4922
|
+
JSON.stringify(analysis.keyword_gaps || []),
|
|
4923
|
+
JSON.stringify(analysis.long_tails || []),
|
|
4924
|
+
JSON.stringify(analysis.quick_wins || []),
|
|
4925
|
+
JSON.stringify(analysis.new_pages || []),
|
|
4926
|
+
JSON.stringify(analysis.content_gaps || []),
|
|
4927
|
+
JSON.stringify(analysis.positioning || {}),
|
|
4928
|
+
JSON.stringify(analysis.technical_gaps || []),
|
|
4929
|
+
result,
|
|
4930
|
+
);
|
|
4931
|
+
const analysisRowId = db.prepare('SELECT last_insert_rowid() as id').get().id;
|
|
4932
|
+
upsertInsightsFromAnalysis(db, projectSlug, analysisRowId, analysis, analysisTs);
|
|
4933
|
+
|
|
4934
|
+
printAnalysisSummary(analysis, projectSlug);
|
|
4935
|
+
} catch (parseErr) {
|
|
4936
|
+
console.log(chalk.yellow(` ⚠ Could not parse analysis: ${parseErr.message}`));
|
|
4937
|
+
const rawPath = join(__dirname, `reports/${projectSlug}-raw-${new Date().toISOString().slice(0, 10)}.md`);
|
|
4938
|
+
writeFileSync(rawPath, result, 'utf8');
|
|
4939
|
+
console.log(chalk.gray(` Raw output saved: ${rawPath}`));
|
|
4940
|
+
}
|
|
4941
|
+
} else {
|
|
4942
|
+
console.log(chalk.yellow(' ⚠ No response from model — skipping analysis'));
|
|
4943
|
+
}
|
|
4944
|
+
}
|
|
4945
|
+
|
|
4946
|
+
// ── Step 3: Export ──
|
|
4947
|
+
console.log(chalk.bold.cyan('\n ⏱ Step 3/3 — Export Report'));
|
|
4948
|
+
console.log('');
|
|
4949
|
+
|
|
4950
|
+
// Generate dashboard (so gatherProjectData works)
|
|
4951
|
+
try {
|
|
4952
|
+
const allConfigs = loadAllConfigs();
|
|
4953
|
+
generateMultiDashboard(db, allConfigs);
|
|
4954
|
+
} catch { /* ok if this fails */ }
|
|
4955
|
+
|
|
4956
|
+
// Build export markdown via the server's export logic
|
|
4957
|
+
const { gatherProjectData } = await import('./reports/generate-html.js');
|
|
4958
|
+
const dash = gatherProjectData(db, projectSlug, config);
|
|
4959
|
+
|
|
4960
|
+
// Inline the deterministic markdown builder from server.js
|
|
4961
|
+
const { buildScanMarkdown } = await import('./lib/scan-export.js');
|
|
4962
|
+
let md = buildScanMarkdown(dash, projectSlug, domain);
|
|
4963
|
+
|
|
4964
|
+
// AI enrichment
|
|
4965
|
+
if (useAi) {
|
|
4966
|
+
console.log(chalk.gray(` Enriching with AI (${opts.model})...`));
|
|
4967
|
+
const aiPrompt = `You are an SEO strategist reviewing a data export report. Your job is to ENRICH this report, NOT rewrite it.
|
|
4968
|
+
|
|
4969
|
+
Rules:
|
|
4970
|
+
- Keep ALL existing data, tables, headers, and instruction blocks exactly as they are
|
|
4971
|
+
- Fill any empty table cells (marked with empty | | columns) with concise, actionable content
|
|
4972
|
+
- For empty "Parent" cells: infer the parent keyword cluster
|
|
4973
|
+
- For empty "Opportunity" cells: classify as how-to guide, comparison, tutorial, landing page, etc.
|
|
4974
|
+
- For empty "Gap"/"Suggestion"/"Rationale"/"Potential" cells: fill with concise actionable content
|
|
4975
|
+
- After the last section, add "## AI Action Plan" with a numbered top-10 highest-impact actions
|
|
4976
|
+
- Keep markdown format — tables, headers, blockquotes
|
|
4977
|
+
- Be concise — table cells under 80 chars
|
|
4978
|
+
- Do NOT add commentary outside the report
|
|
4979
|
+
|
|
4980
|
+
Here is the report:
|
|
4981
|
+
|
|
4982
|
+
${md}`;
|
|
4983
|
+
const aiResult = await callAnalysisModel(aiPrompt, opts.model);
|
|
4984
|
+
if (aiResult && aiResult.trim().length > md.length * 0.5) {
|
|
4985
|
+
md = aiResult;
|
|
4986
|
+
console.log(chalk.green(' ✓ AI enrichment applied'));
|
|
4987
|
+
} else {
|
|
4988
|
+
console.log(chalk.yellow(' ⚠ AI enrichment failed — using deterministic export'));
|
|
4989
|
+
}
|
|
4990
|
+
}
|
|
4991
|
+
|
|
4992
|
+
// Save
|
|
4993
|
+
const dateStr = new Date().toISOString().slice(0, 10);
|
|
4994
|
+
const fileName = `scan-${domain.replace(/[^a-z0-9]/gi, '-')}-${dateStr}.md`;
|
|
4995
|
+
const outPath = join(__dirname, 'reports', fileName);
|
|
4996
|
+
writeFileSync(outPath, md, 'utf8');
|
|
4997
|
+
|
|
4998
|
+
const totalSec = ((Date.now() - scanStart) / 1000).toFixed(1);
|
|
4999
|
+
console.log('');
|
|
5000
|
+
console.log(chalk.bold.hex('#d4af37')(`${'═'.repeat(60)}`));
|
|
5001
|
+
console.log(chalk.bold.hex('#d4af37')(` ✅ Scan Complete — ${totalSec}s`));
|
|
5002
|
+
console.log(chalk.bold.hex('#d4af37')(`${'═'.repeat(60)}`));
|
|
5003
|
+
console.log('');
|
|
5004
|
+
console.log(chalk.white(` Report: ${chalk.bold(outPath)}`));
|
|
5005
|
+
console.log(chalk.white(` Pages: ${pageCount} crawled, ${extracted} extracted`));
|
|
5006
|
+
console.log(chalk.white(` Export: ${useAi ? 'AI-enriched' : 'deterministic'} markdown`));
|
|
5007
|
+
console.log('');
|
|
5008
|
+
|
|
5009
|
+
// Clean up ephemeral config (keep the report)
|
|
5010
|
+
try { unlinkSync(configPath); } catch { /* fine if already gone */ }
|
|
5011
|
+
});
|
|
5012
|
+
|
|
4774
5013
|
// ── BUG-002: No-args getting-started handler ─────────────────────────────────
|
|
4775
5014
|
// When run with no command, show a friendly entry point instead of generic help
|
|
4776
5015
|
if (process.argv.length <= 2) {
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Scan Export — Deterministic markdown builder for the `scan` command.
|
|
3
|
+
* Mirrors the dashboardToMarkdown logic from server.js but works standalone.
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
function inferLongTailParent(phrase, keywordGaps) {
|
|
7
|
+
const lower = phrase.toLowerCase();
|
|
8
|
+
let best = null, bestScore = 0;
|
|
9
|
+
for (const g of (keywordGaps || [])) {
|
|
10
|
+
const kw = (g.keyword || '').toLowerCase();
|
|
11
|
+
if (!kw || kw.length < 3) continue;
|
|
12
|
+
const words = kw.split(/\s+/);
|
|
13
|
+
const score = words.filter(w => lower.includes(w)).length / words.length;
|
|
14
|
+
if (score > bestScore && score >= 0.5) { bestScore = score; best = g.keyword; }
|
|
15
|
+
}
|
|
16
|
+
return best;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function inferLongTailOpportunity(item) {
|
|
20
|
+
const p = (item.phrase || '').toLowerCase();
|
|
21
|
+
const intent = item.intent || '';
|
|
22
|
+
const pageType = item.page_type || '';
|
|
23
|
+
if (p.startsWith('how to ') || p.includes(' tutorial')) return `How-to ${pageType || 'guide'} — ${intent || 'informational'} intent`;
|
|
24
|
+
if (p.includes(' vs ') || p.includes(' comparison')) return `Comparison ${pageType || 'article'} — captures decision-stage traffic`;
|
|
25
|
+
if (p.includes('best ') || p.includes('top ')) return `Listicle / roundup — high commercial intent`;
|
|
26
|
+
if (p.includes('what is ') || p.includes('explained')) return `Explainer ${pageType || 'page'} — top-of-funnel awareness`;
|
|
27
|
+
if (p.includes(' api ') || p.includes(' sdk ')) return `Technical docs ${pageType || 'page'} — developer intent`;
|
|
28
|
+
if (p.includes(' price') || p.includes(' cost') || p.includes(' pricing')) return `Pricing / comparison page — transactional intent`;
|
|
29
|
+
if (intent) return `${pageType || 'Content'} page — ${intent} intent`;
|
|
30
|
+
return pageType ? `${pageType} page` : '';
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function inferPotential(item) {
|
|
34
|
+
const p = (item.priority || '').toLowerCase();
|
|
35
|
+
if (p === 'high' || p === 'critical') return 'High';
|
|
36
|
+
if (p === 'medium') return 'Medium';
|
|
37
|
+
if (p === 'low') return 'Low';
|
|
38
|
+
const phrase = (item.phrase || '').toLowerCase();
|
|
39
|
+
if (phrase.startsWith('how') || phrase.includes(' vs ') || phrase.includes('best ')) return 'High';
|
|
40
|
+
if (item.type === 'question' || item.type === 'comparison') return 'High';
|
|
41
|
+
if (item.type === 'ai_query') return 'Medium';
|
|
42
|
+
return 'Medium';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export function buildScanMarkdown(dash, projectSlug, domain) {
|
|
46
|
+
const date = new Date().toISOString().slice(0, 10);
|
|
47
|
+
const a = dash.latestAnalysis || {};
|
|
48
|
+
const s = {};
|
|
49
|
+
|
|
50
|
+
// Map dashboard data to sections
|
|
51
|
+
const target = dash.technicalScores?.find(d => d.isTarget);
|
|
52
|
+
if (target) {
|
|
53
|
+
s.technical = { score: target.score, h1_coverage: target.h1Pct + '%', meta_coverage: target.metaPct + '%', schema_coverage: target.schemaPct + '%', title_coverage: target.titlePct + '%' };
|
|
54
|
+
}
|
|
55
|
+
if (a.technical_gaps?.length) s.technical_gaps = a.technical_gaps;
|
|
56
|
+
if (a.quick_wins?.length) s.quick_wins = a.quick_wins;
|
|
57
|
+
if (a.keyword_gaps?.length) s.keyword_gaps = a.keyword_gaps;
|
|
58
|
+
if (dash.keywordGaps?.length) s.top_keyword_gaps = dash.keywordGaps.slice(0, 50);
|
|
59
|
+
if (a.long_tails?.length) s.long_tails = a.long_tails;
|
|
60
|
+
if (a.new_pages?.length) s.new_pages = a.new_pages;
|
|
61
|
+
if (a.content_gaps?.length) s.content_gaps = a.content_gaps;
|
|
62
|
+
if (a.positioning) s.positioning = a.positioning;
|
|
63
|
+
if (a.keyword_inventor?.length) s.keyword_inventor = a.keyword_inventor;
|
|
64
|
+
if (dash.internalLinks) {
|
|
65
|
+
s.internal_links = { total_links: dash.internalLinks.totalLinks, orphan_pages: dash.internalLinks.orphanCount, top_pages: dash.internalLinks.topPages };
|
|
66
|
+
}
|
|
67
|
+
if (dash.crawlStats) s.crawl_stats = dash.crawlStats;
|
|
68
|
+
|
|
69
|
+
// Build markdown
|
|
70
|
+
let md = `# SEO Scan Report — ${domain}\n\n- Date: ${date}\n- Mode: One-shot audit (no competitors)\n\n`;
|
|
71
|
+
|
|
72
|
+
if (s.technical) {
|
|
73
|
+
md += `## Technical Scorecard\n\n`;
|
|
74
|
+
md += `- Overall: **${s.technical.score}/100**\n`;
|
|
75
|
+
md += `- H1: ${s.technical.h1_coverage} | Meta: ${s.technical.meta_coverage} | Schema: ${s.technical.schema_coverage} | Title: ${s.technical.title_coverage}\n\n`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (s.technical_gaps?.length) {
|
|
79
|
+
md += `## Technical Gaps (${s.technical_gaps.length})\n\n`;
|
|
80
|
+
md += `> Implement these schema and markup fixes to qualify for rich results. Start with FAQ and HowTo schema.\n\n`;
|
|
81
|
+
md += `| Issue | Affected | Fix |\n|-------|----------|-----|\n`;
|
|
82
|
+
for (const g of s.technical_gaps) md += `| ${g.gap || g.issue || ''} | ${g.affected || g.pages || ''} | ${g.recommendation || g.fix || ''} |\n`;
|
|
83
|
+
md += '\n';
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
if (s.quick_wins?.length) {
|
|
87
|
+
const highCount = s.quick_wins.filter(w => w.impact === 'high').length;
|
|
88
|
+
md += `## Quick Wins (${s.quick_wins.length})\n\n`;
|
|
89
|
+
md += `> **${highCount} high-impact items.** Pick the top 3 and implement this week — each takes <30 min.\n\n`;
|
|
90
|
+
md += `| Page | Issue | Fix | Impact |\n|------|-------|-----|--------|\n`;
|
|
91
|
+
for (const w of s.quick_wins) md += `| ${w.page || ''} | ${w.issue || ''} | ${w.fix || ''} | ${w.impact || ''} |\n`;
|
|
92
|
+
md += '\n';
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (s.internal_links) {
|
|
96
|
+
md += `## Internal Links\n\n- Total links: ${s.internal_links.total_links}\n- Orphan pages: ${s.internal_links.orphan_pages}\n`;
|
|
97
|
+
if (s.internal_links.top_pages?.length) {
|
|
98
|
+
md += '\n| Page | Depth Score |\n|------|-------------|\n';
|
|
99
|
+
for (const p of s.internal_links.top_pages) md += `| ${p.url || p.label} | ${p.count} |\n`;
|
|
100
|
+
}
|
|
101
|
+
md += '\n';
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
if (s.keyword_gaps?.length) {
|
|
105
|
+
md += `## Keyword Opportunities (${s.keyword_gaps.length})\n\n`;
|
|
106
|
+
md += `> Keywords identified from your content that could be targeted more aggressively.\n\n`;
|
|
107
|
+
md += `| Keyword | Coverage | Priority |\n|---------|----------|----------|\n`;
|
|
108
|
+
for (const g of s.keyword_gaps) md += `| ${g.keyword || ''} | ${g.your_coverage || g.target_count || 'low'} | ${g.priority || ''} |\n`;
|
|
109
|
+
md += '\n';
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
if (s.top_keyword_gaps?.length) {
|
|
113
|
+
md += `## Top Keyword Gaps\n\n`;
|
|
114
|
+
md += `| Keyword | Frequency | Your Count | Gap |\n|---------|-----------|------------|-----|\n`;
|
|
115
|
+
for (const g of s.top_keyword_gaps) {
|
|
116
|
+
const freq = g.total || g.competitor_count || '';
|
|
117
|
+
const tgt = g.target || 0;
|
|
118
|
+
const gap = freq ? (Number(freq) - Number(tgt)) || freq : '';
|
|
119
|
+
md += `| ${g.keyword || ''} | ${freq} | ${tgt} | ${gap} |\n`;
|
|
120
|
+
}
|
|
121
|
+
md += '\n';
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
if (s.long_tails?.length) {
|
|
125
|
+
md += `## Long-tail Opportunities (${s.long_tails.length})\n\n`;
|
|
126
|
+
md += `> Lower competition, higher conversion. Each maps to a parent cluster and content type.\n\n`;
|
|
127
|
+
md += `| Phrase | Parent | Opportunity |\n|-------|--------|-------------|\n`;
|
|
128
|
+
for (const l of s.long_tails) {
|
|
129
|
+
const parent = l.parent || l.keyword || inferLongTailParent(l.phrase, s.keyword_gaps) || '';
|
|
130
|
+
const opportunity = l.opportunity || l.rationale || inferLongTailOpportunity(l) || '';
|
|
131
|
+
md += `| ${l.phrase || ''} | ${parent} | ${opportunity} |\n`;
|
|
132
|
+
}
|
|
133
|
+
md += '\n';
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (s.new_pages?.length) {
|
|
137
|
+
md += `## New Pages to Create (${s.new_pages.length})\n\n`;
|
|
138
|
+
md += `> Each targets a keyword gap. Create with proper H1, schema, and internal links.\n\n`;
|
|
139
|
+
md += `| Title | Target Keyword | Rationale |\n|-------|----------------|----------|\n`;
|
|
140
|
+
for (const p of s.new_pages) {
|
|
141
|
+
md += `| ${p.title || ''} | ${p.target_keyword || ''} | ${p.rationale || p.why || p.content_angle || ''} |\n`;
|
|
142
|
+
}
|
|
143
|
+
md += '\n';
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
if (s.content_gaps?.length) {
|
|
147
|
+
md += `## Content Gaps (${s.content_gaps.length})\n\n`;
|
|
148
|
+
md += `> Topics you should cover but currently don't.\n\n`;
|
|
149
|
+
md += `| Topic | Gap | Suggestion |\n|-------|-----|------------|\n`;
|
|
150
|
+
for (const g of s.content_gaps) {
|
|
151
|
+
const gap = g.gap || (g.covered_by?.length ? `Covered by ${g.covered_by.join(', ')}` : '') || g.why_it_matters || '';
|
|
152
|
+
const suggestion = g.suggestion || g.suggested_title || (g.format ? `Create a ${g.format} covering this topic` : '') || '';
|
|
153
|
+
md += `| ${g.topic || ''} | ${gap} | ${suggestion} |\n`;
|
|
154
|
+
}
|
|
155
|
+
md += '\n';
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (s.keyword_inventor?.length) {
|
|
159
|
+
md += `## Keyword Ideas (${s.keyword_inventor.length})\n\n`;
|
|
160
|
+
md += `| Phrase | Cluster | Potential |\n|-------|---------|----------|\n`;
|
|
161
|
+
for (const k of s.keyword_inventor.slice(0, 50)) {
|
|
162
|
+
md += `| ${k.phrase || ''} | ${k.cluster || ''} | ${k.potential || k.volume || inferPotential(k) || ''} |\n`;
|
|
163
|
+
}
|
|
164
|
+
if (s.keyword_inventor.length > 50) md += `\n_...and ${s.keyword_inventor.length - 50} more._\n`;
|
|
165
|
+
md += '\n';
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
if (s.positioning) {
|
|
169
|
+
md += `## Positioning Strategy\n\n`;
|
|
170
|
+
if (s.positioning.open_angle) md += `**Open angle:** ${s.positioning.open_angle}\n\n`;
|
|
171
|
+
if (s.positioning.target_differentiator) md += `**Differentiator:** ${s.positioning.target_differentiator}\n\n`;
|
|
172
|
+
if (s.positioning.competitor_map) md += `**Competitor map:** ${s.positioning.competitor_map}\n\n`;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (s.crawl_stats) {
|
|
176
|
+
md += `## Crawl Info\n\n- Last crawl: ${s.crawl_stats.lastCrawl || date}\n- Extracted pages: ${s.crawl_stats.extractedPages || 0}\n`;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return md;
|
|
180
|
+
}
|