seo-intel 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +41 -0
- package/LICENSE +75 -0
- package/README.md +243 -0
- package/Start SEO Intel.bat +9 -0
- package/Start SEO Intel.command +8 -0
- package/cli.js +3727 -0
- package/config/example.json +29 -0
- package/config/setup-wizard.js +522 -0
- package/crawler/index.js +566 -0
- package/crawler/robots.js +103 -0
- package/crawler/sanitize.js +124 -0
- package/crawler/schema-parser.js +168 -0
- package/crawler/sitemap.js +103 -0
- package/crawler/stealth.js +393 -0
- package/crawler/subdomain-discovery.js +341 -0
- package/db/db.js +213 -0
- package/db/schema.sql +120 -0
- package/exports/competitive.js +186 -0
- package/exports/heuristics.js +67 -0
- package/exports/queries.js +197 -0
- package/exports/suggestive.js +230 -0
- package/exports/technical.js +180 -0
- package/exports/templates.js +77 -0
- package/lib/gate.js +204 -0
- package/lib/license.js +369 -0
- package/lib/oauth.js +432 -0
- package/lib/updater.js +324 -0
- package/package.json +68 -0
- package/reports/generate-html.js +6194 -0
- package/reports/generate-site-graph.js +949 -0
- package/reports/gsc-loader.js +190 -0
- package/scheduler.js +142 -0
- package/seo-audit.js +619 -0
- package/seo-intel.png +0 -0
- package/server.js +602 -0
- package/setup/ROADMAP.md +109 -0
- package/setup/checks.js +483 -0
- package/setup/config-builder.js +227 -0
- package/setup/engine.js +65 -0
- package/setup/installers.js +197 -0
- package/setup/models.js +328 -0
- package/setup/openclaw-bridge.js +329 -0
- package/setup/validator.js +395 -0
- package/setup/web-routes.js +688 -0
- package/setup/wizard.html +2920 -0
- package/start-seo-intel.sh +8 -0
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
import { collectTop, makeAction, normalizeActionType, normalizePriority, sortActions } from './heuristics.js';
|
|
2
|
+
import { getEntityCoverage, getLatestAnalysis, getProjectDomains, getSchemaCoverage } from './queries.js';
|
|
3
|
+
|
|
4
|
+
function aggregateEntityCoverage(rows) {
|
|
5
|
+
const target = new Set();
|
|
6
|
+
const competitors = new Map();
|
|
7
|
+
|
|
8
|
+
for (const row of rows) {
|
|
9
|
+
const parsed = Array.isArray(row.primary_entities) ? row.primary_entities : (() => {
|
|
10
|
+
try { return JSON.parse(row.primary_entities || '[]'); } catch { return []; }
|
|
11
|
+
})();
|
|
12
|
+
for (const entity of parsed) {
|
|
13
|
+
const key = String(entity || '').trim();
|
|
14
|
+
if (!key) continue;
|
|
15
|
+
if (row.role === 'competitor') {
|
|
16
|
+
if (!competitors.has(key)) competitors.set(key, new Set());
|
|
17
|
+
competitors.get(key).add(row.domain);
|
|
18
|
+
} else {
|
|
19
|
+
target.add(key);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
return { target, competitors };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function buildCompetitiveActions(db, project, options = {}) {
|
|
28
|
+
const { vsDomain = null } = options;
|
|
29
|
+
const analysis = getLatestAnalysis(db, project);
|
|
30
|
+
const competitorDomains = getProjectDomains(db, project).filter(d => d.role === 'competitor' && (!vsDomain || d.domain === vsDomain));
|
|
31
|
+
if (!competitorDomains.length) return [];
|
|
32
|
+
|
|
33
|
+
const actions = [];
|
|
34
|
+
|
|
35
|
+
if (analysis) {
|
|
36
|
+
for (const gap of analysis.keyword_gaps || []) {
|
|
37
|
+
actions.push(makeAction({
|
|
38
|
+
id: `competitive-keyword-${String(gap.keyword || '').toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
39
|
+
type: normalizeActionType(gap.suggested_action === 'new_page' ? 'new_page' : 'improve', 'new_page'),
|
|
40
|
+
priority: normalizePriority(gap.priority, 'medium'),
|
|
41
|
+
area: 'content',
|
|
42
|
+
title: `Close keyword gap: ${gap.keyword}`,
|
|
43
|
+
why: `${gap.keyword} is covered by ${gap.competitor_count || competitorDomains.length} competitor domains and maps to ${gap.intent || 'search'} intent demand.`,
|
|
44
|
+
evidence: collectTop([
|
|
45
|
+
gap.suggested_page ? `Suggested page: ${gap.suggested_page}` : null,
|
|
46
|
+
gap.difficulty ? `Difficulty: ${gap.difficulty}` : null,
|
|
47
|
+
gap.intent ? `Intent: ${gap.intent}` : null,
|
|
48
|
+
].filter(Boolean), 5),
|
|
49
|
+
implementationHints: [
|
|
50
|
+
gap.suggested_page ? `Create or upgrade ${gap.suggested_page} around the target query.` : 'Create a dedicated landing page for this query cluster.',
|
|
51
|
+
'Benchmark the top competitor pages for headings, examples, proof, and schema coverage.',
|
|
52
|
+
],
|
|
53
|
+
}));
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
for (const gap of analysis.content_gaps || []) {
|
|
57
|
+
actions.push(makeAction({
|
|
58
|
+
id: `competitive-content-${String(gap.topic || '').toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
59
|
+
type: gap.format === 'comparison' || gap.format === 'landing' ? 'new_page' : 'improve',
|
|
60
|
+
priority: 'high',
|
|
61
|
+
area: 'content',
|
|
62
|
+
title: `Cover topic gap: ${gap.topic}`,
|
|
63
|
+
why: gap.why_it_matters || 'Competitors cover this topic and the target site currently does not.',
|
|
64
|
+
evidence: collectTop([
|
|
65
|
+
gap.suggested_title ? `Suggested title: ${gap.suggested_title}` : null,
|
|
66
|
+
Array.isArray(gap.covered_by) && gap.covered_by.length ? `Covered by: ${gap.covered_by.join(', ')}` : null,
|
|
67
|
+
gap.format ? `Format: ${gap.format}` : null,
|
|
68
|
+
].filter(Boolean), 5),
|
|
69
|
+
implementationHints: [
|
|
70
|
+
gap.suggested_title ? `Build the piece around: ${gap.suggested_title}` : 'Publish a dedicated piece for this topic.',
|
|
71
|
+
'Include comparison tables, proof, examples, and intent-matched CTAs.',
|
|
72
|
+
],
|
|
73
|
+
}));
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const gap of analysis.technical_gaps || []) {
|
|
77
|
+
actions.push(makeAction({
|
|
78
|
+
id: `competitive-schema-${String(gap.gap || '').toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
79
|
+
type: 'add_schema',
|
|
80
|
+
priority: 'medium',
|
|
81
|
+
area: 'schema',
|
|
82
|
+
title: `Match competitor schema pattern: ${gap.gap}`,
|
|
83
|
+
why: gap.fix || 'Competitors have richer structured data coverage on relevant templates.',
|
|
84
|
+
evidence: collectTop([
|
|
85
|
+
Array.isArray(gap.competitors_with_it) && gap.competitors_with_it.length ? `Used by: ${gap.competitors_with_it.join(', ')}` : null,
|
|
86
|
+
].filter(Boolean), 3),
|
|
87
|
+
implementationHints: [
|
|
88
|
+
gap.fix || 'Add the schema type to matching target templates.',
|
|
89
|
+
'Validate rich result eligibility in Google Rich Results Test after rollout.',
|
|
90
|
+
],
|
|
91
|
+
}));
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const schemaCoverage = getSchemaCoverage(db, project, vsDomain);
|
|
96
|
+
const targetSchemaTypes = new Set(schemaCoverage.filter(r => r.role !== 'competitor').map(r => r.schema_type));
|
|
97
|
+
const competitorSchemaMap = new Map();
|
|
98
|
+
for (const row of schemaCoverage.filter(r => r.role === 'competitor')) {
|
|
99
|
+
if (!competitorSchemaMap.has(row.schema_type)) competitorSchemaMap.set(row.schema_type, { domains: new Set(), pages: 0 });
|
|
100
|
+
competitorSchemaMap.get(row.schema_type).domains.add(row.domain);
|
|
101
|
+
competitorSchemaMap.get(row.schema_type).pages += row.page_count || 0;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
for (const [schemaType, info] of competitorSchemaMap.entries()) {
|
|
105
|
+
if (targetSchemaTypes.has(schemaType)) continue;
|
|
106
|
+
actions.push(makeAction({
|
|
107
|
+
id: `competitive-schema-coverage-${String(schemaType).toLowerCase()}`,
|
|
108
|
+
type: 'add_schema',
|
|
109
|
+
priority: info.domains.size >= 2 ? 'high' : 'medium',
|
|
110
|
+
area: 'schema',
|
|
111
|
+
title: `Add ${schemaType} schema where competitors already do`,
|
|
112
|
+
why: 'Competitors are enriching equivalent pages with schema types the target site has not deployed.',
|
|
113
|
+
evidence: collectTop([
|
|
114
|
+
`Competitors using it: ${[...info.domains].join(', ')}`,
|
|
115
|
+
`Competitor pages with this schema: ${info.pages}`,
|
|
116
|
+
], 5),
|
|
117
|
+
implementationHints: [
|
|
118
|
+
`Map ${schemaType} to the closest target template and ship JSON-LD at render time.`,
|
|
119
|
+
'Prioritize pages where this schema can improve CTR or eligibility for enhanced SERP features.',
|
|
120
|
+
],
|
|
121
|
+
}));
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const entityCoverage = aggregateEntityCoverage(getEntityCoverage(db, project, vsDomain));
|
|
125
|
+
const missingEntities = [...entityCoverage.competitors.entries()]
|
|
126
|
+
.filter(([entity]) => !entityCoverage.target.has(entity))
|
|
127
|
+
.sort((a, b) => b[1].size - a[1].size)
|
|
128
|
+
.slice(0, 8);
|
|
129
|
+
|
|
130
|
+
for (const [entity, domains] of missingEntities) {
|
|
131
|
+
actions.push(makeAction({
|
|
132
|
+
id: `competitive-entity-${String(entity).toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
133
|
+
type: 'improve',
|
|
134
|
+
priority: domains.size >= 3 ? 'high' : 'medium',
|
|
135
|
+
area: 'content',
|
|
136
|
+
title: `Expand entity coverage around “${entity}”`,
|
|
137
|
+
why: 'Competitors repeatedly mention this entity while the target domain set does not.',
|
|
138
|
+
evidence: collectTop([
|
|
139
|
+
`Competitors covering it: ${[...domains].join(', ')}`,
|
|
140
|
+
], 4),
|
|
141
|
+
implementationHints: [
|
|
142
|
+
'Add the entity to relevant product, docs, or comparison pages with supporting context, examples, and links.',
|
|
143
|
+
'If the entity deserves dedicated intent coverage, create a focused landing page or guide.',
|
|
144
|
+
],
|
|
145
|
+
}));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (analysis?.new_pages?.length) {
|
|
149
|
+
for (const page of analysis.new_pages.slice(0, 6)) {
|
|
150
|
+
actions.push(makeAction({
|
|
151
|
+
id: `competitive-new-page-${String(page.title || '').toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
152
|
+
type: 'new_page',
|
|
153
|
+
priority: normalizePriority(page.priority, 'medium'),
|
|
154
|
+
area: 'content',
|
|
155
|
+
title: `Build page: ${page.title}`,
|
|
156
|
+
why: page.why || 'Analysis recommends a dedicated page to win competitor demand.',
|
|
157
|
+
evidence: collectTop((page.placement || []).map(p => `${p.property}: ${p.url}`), 5),
|
|
158
|
+
implementationHints: [
|
|
159
|
+
page.content_angle ? `Angle: ${page.content_angle}` : null,
|
|
160
|
+
'Use the best-fit property and internal link it from high-authority hub pages.',
|
|
161
|
+
].filter(Boolean),
|
|
162
|
+
}));
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
if (analysis?.positioning?.open_angle) {
|
|
167
|
+
actions.push(makeAction({
|
|
168
|
+
id: 'competitive-positioning-open-angle',
|
|
169
|
+
type: 'improve',
|
|
170
|
+
priority: 'medium',
|
|
171
|
+
area: 'content',
|
|
172
|
+
title: 'Sharpen positioning around the open market angle',
|
|
173
|
+
why: analysis.positioning.open_angle,
|
|
174
|
+
evidence: collectTop([
|
|
175
|
+
analysis.positioning.target_differentiator ? `Differentiator: ${analysis.positioning.target_differentiator}` : null,
|
|
176
|
+
analysis.positioning.competitor_map ? `Competitor map: ${analysis.positioning.competitor_map}` : null,
|
|
177
|
+
].filter(Boolean), 4),
|
|
178
|
+
implementationHints: [
|
|
179
|
+
'Reflect the differentiator in homepage hero copy, solution pages, and comparison pages.',
|
|
180
|
+
'Use repeated phrasing across title tags, H1s, and product proof sections to build topical association.',
|
|
181
|
+
],
|
|
182
|
+
}));
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
return sortActions(actions);
|
|
186
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
function slugify(value) {
|
|
2
|
+
return String(value || '')
|
|
3
|
+
.toLowerCase()
|
|
4
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
5
|
+
.replace(/^-+|-+$/g, '')
|
|
6
|
+
.slice(0, 80) || 'item';
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function makeAction(overrides = {}) {
|
|
10
|
+
const base = {
|
|
11
|
+
id: overrides.id || `${overrides.area || 'general'}-${overrides.type || 'fix'}-${slugify(overrides.title)}`,
|
|
12
|
+
type: 'fix',
|
|
13
|
+
priority: 'medium',
|
|
14
|
+
area: 'content',
|
|
15
|
+
title: '',
|
|
16
|
+
why: '',
|
|
17
|
+
evidence: [],
|
|
18
|
+
implementationHints: [],
|
|
19
|
+
};
|
|
20
|
+
|
|
21
|
+
return {
|
|
22
|
+
...base,
|
|
23
|
+
...overrides,
|
|
24
|
+
evidence: Array.isArray(overrides.evidence) ? overrides.evidence.filter(Boolean) : [],
|
|
25
|
+
implementationHints: Array.isArray(overrides.implementationHints) ? overrides.implementationHints.filter(Boolean) : [],
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function priorityWeight(priority) {
|
|
30
|
+
return ({ critical: 4, high: 3, medium: 2, low: 1 })[priority] || 0;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function summarizeActions(actions) {
|
|
34
|
+
return actions.reduce((acc, action) => {
|
|
35
|
+
acc[action.priority] = (acc[action.priority] || 0) + 1;
|
|
36
|
+
return acc;
|
|
37
|
+
}, { critical: 0, high: 0, medium: 0, low: 0 });
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export function sortActions(actions) {
|
|
41
|
+
return [...actions].sort((a, b) => {
|
|
42
|
+
const priorityDelta = priorityWeight(b.priority) - priorityWeight(a.priority);
|
|
43
|
+
if (priorityDelta !== 0) return priorityDelta;
|
|
44
|
+
return a.title.localeCompare(b.title);
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export function normalizePriority(priority, fallback = 'medium') {
|
|
49
|
+
const value = String(priority || '').toLowerCase();
|
|
50
|
+
return ['critical', 'high', 'medium', 'low'].includes(value) ? value : fallback;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function normalizeActionType(type, fallback = 'improve') {
|
|
54
|
+
const value = String(type || '').toLowerCase();
|
|
55
|
+
return ['fix', 'new_page', 'improve', 'add_schema'].includes(value) ? value : fallback;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export function inferPriorityFromCount(count, thresholds = { critical: 20, high: 10, medium: 3 }) {
|
|
59
|
+
if (count >= thresholds.critical) return 'critical';
|
|
60
|
+
if (count >= thresholds.high) return 'high';
|
|
61
|
+
if (count >= thresholds.medium) return 'medium';
|
|
62
|
+
return 'low';
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function collectTop(values, limit = 5) {
|
|
66
|
+
return [...values].slice(0, limit);
|
|
67
|
+
}
|
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
const SAFE_JSON_FALLBACK = [];
|
|
2
|
+
|
|
3
|
+
export function parseJson(value, fallback = SAFE_JSON_FALLBACK) {
|
|
4
|
+
if (value == null || value === '') return Array.isArray(fallback) ? [...fallback] : fallback;
|
|
5
|
+
if (typeof value !== 'string') return value;
|
|
6
|
+
try {
|
|
7
|
+
return JSON.parse(value);
|
|
8
|
+
} catch {
|
|
9
|
+
return Array.isArray(fallback) ? [...fallback] : fallback;
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function getProjectDomains(db, project) {
|
|
14
|
+
return db.prepare(`
|
|
15
|
+
SELECT id, domain, role
|
|
16
|
+
FROM domains
|
|
17
|
+
WHERE project = ?
|
|
18
|
+
ORDER BY CASE role WHEN 'target' THEN 0 WHEN 'owned' THEN 1 ELSE 2 END, domain
|
|
19
|
+
`).all(project);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getTargetDomains(domains) {
|
|
23
|
+
return domains.filter(d => d.role === 'target' || d.role === 'owned');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function getCompetitorDomains(domains, vsDomain = null) {
|
|
27
|
+
return domains.filter(d => d.role === 'competitor' && (!vsDomain || d.domain === vsDomain));
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function getProjectPageCount(db, project) {
|
|
31
|
+
return db.prepare(`
|
|
32
|
+
SELECT COUNT(*) AS count
|
|
33
|
+
FROM pages p
|
|
34
|
+
JOIN domains d ON d.id = p.domain_id
|
|
35
|
+
WHERE d.project = ?
|
|
36
|
+
`).get(project)?.count || 0;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function assertHasCrawlData(db, project) {
|
|
40
|
+
const count = getProjectPageCount(db, project);
|
|
41
|
+
if (!count) {
|
|
42
|
+
const err = new Error('No crawl data found. Run `crawl` first.');
|
|
43
|
+
err.code = 'NO_CRAWL_DATA';
|
|
44
|
+
throw err;
|
|
45
|
+
}
|
|
46
|
+
return count;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function getLatestAnalysis(db, project) {
|
|
50
|
+
const row = db.prepare(`
|
|
51
|
+
SELECT *
|
|
52
|
+
FROM analyses
|
|
53
|
+
WHERE project = ?
|
|
54
|
+
ORDER BY generated_at DESC
|
|
55
|
+
LIMIT 1
|
|
56
|
+
`).get(project);
|
|
57
|
+
|
|
58
|
+
if (!row) return null;
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
...row,
|
|
62
|
+
keyword_gaps: parseJson(row.keyword_gaps, []),
|
|
63
|
+
content_gaps: parseJson(row.content_gaps, []),
|
|
64
|
+
technical_gaps: parseJson(row.technical_gaps, []),
|
|
65
|
+
new_pages: parseJson(row.new_pages, []),
|
|
66
|
+
positioning: parseJson(row.positioning, null),
|
|
67
|
+
};
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function getTechnicalDataset(db, project) {
|
|
71
|
+
return db.prepare(`
|
|
72
|
+
SELECT
|
|
73
|
+
p.id,
|
|
74
|
+
p.url,
|
|
75
|
+
p.status_code,
|
|
76
|
+
p.word_count,
|
|
77
|
+
p.click_depth,
|
|
78
|
+
p.is_indexable,
|
|
79
|
+
d.domain,
|
|
80
|
+
d.role,
|
|
81
|
+
COALESCE(e.meta_desc, '') AS meta_desc,
|
|
82
|
+
COALESCE(e.h1, '') AS h1,
|
|
83
|
+
COALESCE(t.has_canonical, 0) AS has_canonical,
|
|
84
|
+
COALESCE(t.has_og_tags, 0) AS has_og_tags,
|
|
85
|
+
COALESCE(t.has_schema, 0) AS has_schema,
|
|
86
|
+
COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id), 0) AS schema_count,
|
|
87
|
+
COALESCE((SELECT COUNT(*) FROM page_schemas ps WHERE ps.page_id = p.id AND LOWER(ps.schema_type) = 'breadcrumblist'), 0) AS breadcrumb_count,
|
|
88
|
+
COALESCE((SELECT COUNT(*) FROM headings h WHERE h.page_id = p.id AND h.level = 1), 0) AS h1_count,
|
|
89
|
+
COALESCE((
|
|
90
|
+
SELECT COUNT(*)
|
|
91
|
+
FROM links l
|
|
92
|
+
JOIN pages src ON src.id = l.source_id
|
|
93
|
+
JOIN domains srcd ON srcd.id = src.domain_id
|
|
94
|
+
WHERE srcd.project = d.project
|
|
95
|
+
AND l.is_internal = 1
|
|
96
|
+
AND l.target_url = p.url
|
|
97
|
+
), 0) AS inbound_internal_links,
|
|
98
|
+
COALESCE((
|
|
99
|
+
SELECT COUNT(*)
|
|
100
|
+
FROM pages child
|
|
101
|
+
JOIN domains childd ON childd.id = child.domain_id
|
|
102
|
+
WHERE childd.project = d.project
|
|
103
|
+
AND child.status_code BETWEEN 300 AND 399
|
|
104
|
+
AND EXISTS (
|
|
105
|
+
SELECT 1 FROM links l2 WHERE l2.source_id = p.id AND l2.target_url = child.url
|
|
106
|
+
)
|
|
107
|
+
), 0) AS redirects_linked_from_page
|
|
108
|
+
FROM pages p
|
|
109
|
+
JOIN domains d ON d.id = p.domain_id
|
|
110
|
+
LEFT JOIN technical t ON t.page_id = p.id
|
|
111
|
+
LEFT JOIN extractions e ON e.page_id = p.id
|
|
112
|
+
WHERE d.project = ?
|
|
113
|
+
AND d.role IN ('target', 'owned')
|
|
114
|
+
ORDER BY d.role, d.domain, p.click_depth, p.url
|
|
115
|
+
`).all(project);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
export function getSchemaCoverage(db, project, vsDomain = null) {
|
|
119
|
+
const params = [project];
|
|
120
|
+
let competitorFilter = '';
|
|
121
|
+
if (vsDomain) {
|
|
122
|
+
competitorFilter = ' AND (d.role != \'competitor\' OR d.domain = ?)';
|
|
123
|
+
params.push(vsDomain);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
return db.prepare(`
|
|
127
|
+
SELECT
|
|
128
|
+
ps.schema_type,
|
|
129
|
+
d.domain,
|
|
130
|
+
d.role,
|
|
131
|
+
COUNT(*) AS count,
|
|
132
|
+
COUNT(DISTINCT p.id) AS page_count
|
|
133
|
+
FROM page_schemas ps
|
|
134
|
+
JOIN pages p ON p.id = ps.page_id
|
|
135
|
+
JOIN domains d ON d.id = p.domain_id
|
|
136
|
+
WHERE d.project = ?
|
|
137
|
+
${competitorFilter}
|
|
138
|
+
GROUP BY ps.schema_type, d.domain, d.role
|
|
139
|
+
ORDER BY ps.schema_type, d.role, count DESC
|
|
140
|
+
`).all(...params);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function getEntityCoverage(db, project, vsDomain = null) {
|
|
144
|
+
const params = [project];
|
|
145
|
+
let competitorFilter = '';
|
|
146
|
+
if (vsDomain) {
|
|
147
|
+
competitorFilter = ' AND (d.role != \'competitor\' OR d.domain = ?)';
|
|
148
|
+
params.push(vsDomain);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return db.prepare(`
|
|
152
|
+
SELECT e.primary_entities, p.url, d.domain, d.role
|
|
153
|
+
FROM extractions e
|
|
154
|
+
JOIN pages p ON p.id = e.page_id
|
|
155
|
+
JOIN domains d ON d.id = p.domain_id
|
|
156
|
+
WHERE d.project = ?
|
|
157
|
+
${competitorFilter}
|
|
158
|
+
`).all(...params);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
export function getHeadingClusterDataset(db, project, vsDomain = null) {
|
|
162
|
+
const params = [project];
|
|
163
|
+
let competitorFilter = '';
|
|
164
|
+
if (vsDomain) {
|
|
165
|
+
competitorFilter = ' AND (d.role != \'competitor\' OR d.domain = ?)';
|
|
166
|
+
params.push(vsDomain);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
return db.prepare(`
|
|
170
|
+
SELECT h.level, h.text, p.url, p.word_count, d.domain, d.role
|
|
171
|
+
FROM headings h
|
|
172
|
+
JOIN pages p ON p.id = h.page_id
|
|
173
|
+
JOIN domains d ON d.id = p.domain_id
|
|
174
|
+
WHERE d.project = ?
|
|
175
|
+
AND h.level IN (1, 2, 3)
|
|
176
|
+
${competitorFilter}
|
|
177
|
+
ORDER BY d.role, d.domain, p.url, h.level
|
|
178
|
+
`).all(...params);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
export function getPagePatternDataset(db, project, vsDomain = null) {
|
|
182
|
+
const params = [project];
|
|
183
|
+
let competitorFilter = '';
|
|
184
|
+
if (vsDomain) {
|
|
185
|
+
competitorFilter = ' AND (d.role != \'competitor\' OR d.domain = ?)';
|
|
186
|
+
params.push(vsDomain);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
return db.prepare(`
|
|
190
|
+
SELECT p.url, p.word_count, p.click_depth, d.domain, d.role
|
|
191
|
+
FROM pages p
|
|
192
|
+
JOIN domains d ON d.id = p.domain_id
|
|
193
|
+
WHERE d.project = ?
|
|
194
|
+
${competitorFilter}
|
|
195
|
+
ORDER BY d.role, d.domain, p.url
|
|
196
|
+
`).all(...params);
|
|
197
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
import { collectTop, makeAction, sortActions } from './heuristics.js';
|
|
2
|
+
import { getEntityCoverage, getHeadingClusterDataset, getPagePatternDataset, getSchemaCoverage } from './queries.js';
|
|
3
|
+
|
|
4
|
+
function getUrlPath(url) {
|
|
5
|
+
try {
|
|
6
|
+
return new URL(url).pathname || '/';
|
|
7
|
+
} catch {
|
|
8
|
+
return '/';
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function classifyPath(pathname) {
|
|
13
|
+
const path = pathname.toLowerCase();
|
|
14
|
+
if (path === '/' || !path) return 'homepage';
|
|
15
|
+
if (/docs|documentation|api|reference/.test(path)) return 'docs';
|
|
16
|
+
if (/pricing|security|status|sla|compliance|trust|rate-limit/.test(path)) return 'trust';
|
|
17
|
+
if (/dashboard|studio|app|console|portal/.test(path)) return 'dashboards';
|
|
18
|
+
if (/compare|alternative|vs-/.test(path)) return 'comparison';
|
|
19
|
+
if (/guide|tutorial|how-to|learn|academy/.test(path)) return 'tutorials';
|
|
20
|
+
if (/product|products|platform|solutions|use-cases|features/.test(path)) return 'product-pages';
|
|
21
|
+
if (/onboarding|get-started|quickstart|start-here|sign-up|signup/.test(path)) return 'onboarding';
|
|
22
|
+
return 'other';
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function normalizeHeadingTopic(text) {
|
|
26
|
+
return String(text || '')
|
|
27
|
+
.toLowerCase()
|
|
28
|
+
.replace(/[^a-z0-9\s]/g, ' ')
|
|
29
|
+
.replace(/\b(what|why|how|when|where|best|your|with|for|the|and|from|into|using|guide|tutorial|overview|introduction)\b/g, ' ')
|
|
30
|
+
.replace(/\s+/g, ' ')
|
|
31
|
+
.trim();
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
function buildEntitySets(rows) {
|
|
35
|
+
const target = new Set();
|
|
36
|
+
const competitors = new Map();
|
|
37
|
+
for (const row of rows) {
|
|
38
|
+
let values = [];
|
|
39
|
+
try { values = JSON.parse(row.primary_entities || '[]'); } catch { values = []; }
|
|
40
|
+
for (const entity of values) {
|
|
41
|
+
const key = String(entity || '').trim();
|
|
42
|
+
if (!key) continue;
|
|
43
|
+
if (row.role === 'competitor') {
|
|
44
|
+
if (!competitors.has(key)) competitors.set(key, new Set());
|
|
45
|
+
competitors.get(key).add(row.domain);
|
|
46
|
+
} else {
|
|
47
|
+
target.add(key);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
return { target, competitors };
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function buildSuggestiveActions(db, project, options = {}) {
|
|
55
|
+
const { vsDomain = null, scope = 'all' } = options;
|
|
56
|
+
const actions = [];
|
|
57
|
+
const pages = getPagePatternDataset(db, project, vsDomain);
|
|
58
|
+
const headings = getHeadingClusterDataset(db, project, vsDomain);
|
|
59
|
+
const schemas = getSchemaCoverage(db, project, vsDomain);
|
|
60
|
+
const entities = buildEntitySets(getEntityCoverage(db, project, vsDomain));
|
|
61
|
+
|
|
62
|
+
const targetCategories = new Set();
|
|
63
|
+
const competitorCategories = new Map();
|
|
64
|
+
const targetPaths = new Set();
|
|
65
|
+
|
|
66
|
+
for (const page of pages) {
|
|
67
|
+
const path = getUrlPath(page.url);
|
|
68
|
+
const category = classifyPath(path);
|
|
69
|
+
if (page.role === 'competitor') {
|
|
70
|
+
if (!competitorCategories.has(category)) competitorCategories.set(category, []);
|
|
71
|
+
competitorCategories.get(category).push(page);
|
|
72
|
+
} else {
|
|
73
|
+
targetCategories.add(category);
|
|
74
|
+
targetPaths.add(path.toLowerCase());
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
const categoryScopeMap = {
|
|
79
|
+
docs: new Set(['docs', 'tutorials']),
|
|
80
|
+
'product-pages': new Set(['product-pages', 'comparison', 'trust']),
|
|
81
|
+
dashboards: new Set(['dashboards']),
|
|
82
|
+
onboarding: new Set(['onboarding', 'trust']),
|
|
83
|
+
all: null,
|
|
84
|
+
};
|
|
85
|
+
const allowedCategories = categoryScopeMap[scope] || null;
|
|
86
|
+
|
|
87
|
+
for (const [category, compPages] of competitorCategories.entries()) {
|
|
88
|
+
if (category === 'homepage' || category === 'other') continue;
|
|
89
|
+
if (allowedCategories && !allowedCategories.has(category)) continue;
|
|
90
|
+
if (targetCategories.has(category)) continue;
|
|
91
|
+
|
|
92
|
+
const samplePaths = compPages.slice(0, 4).map(p => `${p.domain}${getUrlPath(p.url)}`);
|
|
93
|
+
const titleMap = {
|
|
94
|
+
docs: 'Create documentation/reference pages competitors already use',
|
|
95
|
+
trust: 'Publish trust pages competitors rely on for conversion',
|
|
96
|
+
comparison: 'Launch competitor comparison pages',
|
|
97
|
+
tutorials: 'Build tutorial and guide content clusters',
|
|
98
|
+
'product-pages': 'Add dedicated product or solution landing pages',
|
|
99
|
+
dashboards: 'Expose dashboards/console entry pages to capture tool intent',
|
|
100
|
+
onboarding: 'Create onboarding and quickstart paths',
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
actions.push(makeAction({
|
|
104
|
+
id: `suggestive-category-${category}`,
|
|
105
|
+
type: 'new_page',
|
|
106
|
+
priority: compPages.length >= 5 ? 'high' : 'medium',
|
|
107
|
+
area: 'structure',
|
|
108
|
+
title: titleMap[category] || `Add ${category} page type coverage`,
|
|
109
|
+
why: `Competitors have ${compPages.length} ${category} pages while the target domain set has none in this pattern.`,
|
|
110
|
+
evidence: collectTop(samplePaths, 6),
|
|
111
|
+
implementationHints: [
|
|
112
|
+
'Start with the highest commercial-intent template competitors repeat most often.',
|
|
113
|
+
'Link the new section from navigation, footer, and relevant hub pages so it becomes crawlable fast.',
|
|
114
|
+
],
|
|
115
|
+
}));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const competitorTopics = new Map();
|
|
119
|
+
const targetTopics = new Set();
|
|
120
|
+
for (const row of headings) {
|
|
121
|
+
const topic = normalizeHeadingTopic(row.text);
|
|
122
|
+
if (!topic || topic.length < 10) continue;
|
|
123
|
+
if (row.role === 'competitor') {
|
|
124
|
+
if (!competitorTopics.has(topic)) competitorTopics.set(topic, { domains: new Set(), pages: [] });
|
|
125
|
+
competitorTopics.get(topic).domains.add(row.domain);
|
|
126
|
+
competitorTopics.get(topic).pages.push(`${row.domain}${getUrlPath(row.url)}`);
|
|
127
|
+
} else {
|
|
128
|
+
targetTopics.add(topic);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
for (const [topic, info] of [...competitorTopics.entries()].sort((a, b) => b[1].domains.size - a[1].domains.size).slice(0, 10)) {
|
|
133
|
+
if (targetTopics.has(topic)) continue;
|
|
134
|
+
actions.push(makeAction({
|
|
135
|
+
id: `suggestive-topic-${topic.replace(/[^a-z0-9]+/g, '-')}`,
|
|
136
|
+
type: 'new_page',
|
|
137
|
+
priority: info.domains.size >= 3 ? 'high' : 'medium',
|
|
138
|
+
area: 'content',
|
|
139
|
+
title: `Cover missing topic cluster: ${topic}`,
|
|
140
|
+
why: 'Competitors repeatedly organize content around this heading cluster and the target site does not.',
|
|
141
|
+
evidence: collectTop([
|
|
142
|
+
`Competitors: ${[...info.domains].join(', ')}`,
|
|
143
|
+
...info.pages.slice(0, 4),
|
|
144
|
+
], 5),
|
|
145
|
+
implementationHints: [
|
|
146
|
+
'Turn this topic into a guide, docs page, or landing page depending on user intent.',
|
|
147
|
+
'Reuse the subtopics competitors surface in H2/H3 structure, but add stronger proof and differentiation.',
|
|
148
|
+
],
|
|
149
|
+
}));
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
for (const [entity, domains] of [...entities.competitors.entries()].filter(([entity]) => !entities.target.has(entity)).sort((a, b) => b[1].size - a[1].size).slice(0, 6)) {
|
|
153
|
+
actions.push(makeAction({
|
|
154
|
+
id: `suggestive-entity-${entity.toLowerCase().replace(/[^a-z0-9]+/g, '-')}`,
|
|
155
|
+
type: 'improve',
|
|
156
|
+
priority: domains.size >= 3 ? 'high' : 'medium',
|
|
157
|
+
area: 'content',
|
|
158
|
+
title: `Add use-case coverage around entity: ${entity}`,
|
|
159
|
+
why: 'Competitor pages keep referencing this entity, which suggests buyer or developer demand the target has not served yet.',
|
|
160
|
+
evidence: collectTop([
|
|
161
|
+
`Competitors mentioning it: ${[...domains].join(', ')}`,
|
|
162
|
+
], 4),
|
|
163
|
+
implementationHints: [
|
|
164
|
+
'Decide whether this belongs in docs, a comparison page, a feature page, or a tutorial.',
|
|
165
|
+
'Add concrete examples, code snippets, or workflows tied to the entity.',
|
|
166
|
+
],
|
|
167
|
+
}));
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const targetSchemaTypes = new Set(schemas.filter(s => s.role !== 'competitor').map(s => s.schema_type));
|
|
171
|
+
const missingSchemaTypes = new Map();
|
|
172
|
+
for (const row of schemas.filter(s => s.role === 'competitor')) {
|
|
173
|
+
if (targetSchemaTypes.has(row.schema_type)) continue;
|
|
174
|
+
if (!missingSchemaTypes.has(row.schema_type)) missingSchemaTypes.set(row.schema_type, new Set());
|
|
175
|
+
missingSchemaTypes.get(row.schema_type).add(row.domain);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
for (const [schemaType, domains] of [...missingSchemaTypes.entries()].slice(0, 5)) {
|
|
179
|
+
actions.push(makeAction({
|
|
180
|
+
id: `suggestive-schema-${schemaType.toLowerCase()}`,
|
|
181
|
+
type: 'add_schema',
|
|
182
|
+
priority: domains.size >= 2 ? 'medium' : 'low',
|
|
183
|
+
area: 'schema',
|
|
184
|
+
title: `Plan content/templates that support ${schemaType} schema`,
|
|
185
|
+
why: 'Competitors use this schema type on pages or features the target site likely has not built yet.',
|
|
186
|
+
evidence: collectTop([
|
|
187
|
+
`Competitors using it: ${[...domains].join(', ')}`,
|
|
188
|
+
], 3),
|
|
189
|
+
implementationHints: [
|
|
190
|
+
`Identify which future page template should emit ${schemaType} schema.`,
|
|
191
|
+
'Design the page structure so the schema fields are naturally supported in the content model.',
|
|
192
|
+
],
|
|
193
|
+
}));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const targetAvgWords = average(pages.filter(p => p.role !== 'competitor').map(p => p.word_count || 0));
|
|
197
|
+
const competitorInvestments = new Map();
|
|
198
|
+
for (const page of pages.filter(p => p.role === 'competitor')) {
|
|
199
|
+
const category = classifyPath(getUrlPath(page.url));
|
|
200
|
+
if (!competitorInvestments.has(category)) competitorInvestments.set(category, []);
|
|
201
|
+
competitorInvestments.get(category).push(page.word_count || 0);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
for (const [category, words] of competitorInvestments.entries()) {
|
|
205
|
+
if (allowedCategories && !allowedCategories.has(category)) continue;
|
|
206
|
+
const avg = average(words);
|
|
207
|
+
if (avg < 300 || avg <= targetAvgWords * 1.5) continue;
|
|
208
|
+
actions.push(makeAction({
|
|
209
|
+
id: `suggestive-depth-${category}`,
|
|
210
|
+
type: 'improve',
|
|
211
|
+
priority: avg > 1200 ? 'high' : 'medium',
|
|
212
|
+
area: 'content',
|
|
213
|
+
title: `Invest in deeper ${category} content`,
|
|
214
|
+
why: `Competitors average ${Math.round(avg)} words on ${category} pages versus about ${Math.round(targetAvgWords || 0)} words across the target domain set.`,
|
|
215
|
+
evidence: collectTop(pages.filter(p => p.role === 'competitor' && classifyPath(getUrlPath(p.url)) === category).slice(0, 4).map(p => `${p.domain}${getUrlPath(p.url)} (${p.word_count || 0} words)`), 4),
|
|
216
|
+
implementationHints: [
|
|
217
|
+
'Add examples, implementation details, FAQs, trust proof, and comparisons instead of pure feature fluff.',
|
|
218
|
+
'Prioritize page types that map to high-intent keywords or repeated competitor templates.',
|
|
219
|
+
],
|
|
220
|
+
}));
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
return sortActions(actions);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
function average(values) {
|
|
227
|
+
const filtered = values.filter(v => Number.isFinite(v) && v > 0);
|
|
228
|
+
if (!filtered.length) return 0;
|
|
229
|
+
return filtered.reduce((sum, value) => sum + value, 0) / filtered.length;
|
|
230
|
+
}
|