seo-intel 1.4.7 → 1.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,25 @@
1
1
  # Changelog
2
2
 
3
+ ## 1.4.9 (2026-04-10)
4
+
5
+ ### Security
6
+ - Fixed arbitrary file write via `--out` query param in dashboard terminal API — write paths now server-controlled only
7
+ - Fixed path traversal in froggo config loader — project names validated to `[a-z0-9_-]`
8
+ - Added project name validation to export and terminal API endpoints
9
+
10
+ ### URL Normalization
11
+ - Pages are now normalized before storage: fragments stripped (`/#pricing` → `/`), `index.html` collapsed
12
+ - Internal link targets also normalized for consistent orphan/link analysis
13
+ - Re-crawl to clean up existing fragment duplicates in your database
14
+
15
+ ## 1.4.8 (2026-04-10)
16
+
17
+ ### Export: own site only, zero competitor bloat
18
+ - ALL profile sections now filter to own site (target/owned) — no competitor pages, links, headings, or AEO scores
19
+ - Keywords export shows gap summary only: keywords competitors use that you don't, with who uses them
20
+ - AEO export shows only low-scoring own pages (<60) that need improvement
21
+ - Technical export was already own-site; removed the AI pipeline exception that bypassed filtering
22
+
3
23
  ## 1.4.7 (2026-04-09)
4
24
 
5
25
  ### Export: profiles are actions only
package/db/db.js CHANGED
@@ -268,7 +268,19 @@ export function upsertDomain(db, { domain, project, role }) {
268
268
  `).run(domain, project, role, now, now);
269
269
  }
270
270
 
271
+ function normalizePageUrl(rawUrl) {
272
+ try {
273
+ const u = new URL(rawUrl);
274
+ u.hash = ''; // strip fragments (#pricing, #faq, etc.)
275
+ let path = u.pathname;
276
+ path = path.replace(/\/index\.html?$/i, '/'); // /en/index.html → /en/
277
+ u.pathname = path;
278
+ return u.toString();
279
+ } catch { return rawUrl; }
280
+ }
281
+
271
282
  export function upsertPage(db, { domainId, url, statusCode, wordCount, loadMs, isIndexable, clickDepth = 0, publishedDate = null, modifiedDate = null, contentHash = null, title = null, metaDesc = null, bodyText = null }) {
283
+ url = normalizePageUrl(url);
272
284
  const now = Date.now();
273
285
  db.prepare(`
274
286
  INSERT INTO pages (domain_id, url, crawled_at, first_seen_at, status_code, word_count, load_ms, is_indexable, click_depth, published_date, modified_date, content_hash, title, meta_desc, body_text)
@@ -350,7 +362,7 @@ export function insertLinks(db, sourceId, links) {
350
362
  const stmt = db.prepare(`INSERT INTO links (source_id, target_url, anchor_text, is_internal) VALUES (?, ?, ?, ?)`);
351
363
  db.exec('BEGIN');
352
364
  try {
353
- for (const l of links) stmt.run(sourceId, l.url, l.anchor, l.isInternal ? 1 : 0);
365
+ for (const l of links) stmt.run(sourceId, normalizePageUrl(l.url), l.anchor, l.isInternal ? 1 : 0);
354
366
  db.exec('COMMIT');
355
367
  } catch (e) { db.exec('ROLLBACK'); throw e; }
356
368
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "seo-intel",
3
- "version": "1.4.7",
3
+ "version": "1.4.9",
4
4
  "description": "Local Ahrefs-style SEO competitor intelligence. Crawl → SQLite → cloud analysis.",
5
5
  "type": "module",
6
6
  "license": "SEE LICENSE IN LICENSE",
package/server.js CHANGED
@@ -596,7 +596,7 @@ async function handleRequest(req, res) {
596
596
  const format = url.searchParams.get('format') || 'json';
597
597
  const profile = url.searchParams.get('profile'); // dev | content | ai-pipeline
598
598
 
599
- if (!project) { json(res, 400, { error: 'Missing project' }); return; }
599
+ if (!project || !/^[a-z0-9_-]+$/i.test(project)) { json(res, 400, { error: 'Invalid project name' }); return; }
600
600
 
601
601
  const { getDb } = await import('./db/db.js');
602
602
  const db = getDb(join(__dirname, 'seo-intel.db'));
@@ -758,7 +758,7 @@ async function handleRequest(req, res) {
758
758
  return p.insightTypes ? data.filter(r => p.insightTypes.includes(r._type)) : data;
759
759
  }
760
760
  case 'technical': {
761
- if (!Array.isArray(data) || prof === 'ai-pipeline') return data;
761
+ if (!Array.isArray(data)) return data;
762
762
  // Own site only, per-page issue summary
763
763
  const own = data.filter(r => r.role === 'target' || r.role === 'owned');
764
764
  const issues = [];
@@ -830,27 +830,30 @@ async function handleRequest(req, res) {
830
830
  case 'schemas': return data; // raw only — not in any profile
831
831
  case 'aeo': {
832
832
  if (!Array.isArray(data)) return data;
833
- if (prof === 'content') {
834
- // Content: only low-scoring pages (needs improvement)
835
- return data.filter(r => r.score < 60);
836
- }
837
- return data;
833
+ // Own site only, low-scoring pages that need work
834
+ const ownAeo = data.filter(r => r.role === 'target' || r.role === 'owned');
835
+ return ownAeo.filter(r => r.score < 60);
838
836
  }
839
837
  case 'keywords': {
840
838
  if (!Array.isArray(data)) return data;
841
- if (prof === 'content') {
842
- // Content: only competitor-dominated keywords (role != target/owned)
843
- const byKw = {};
844
- for (const r of data) { (byKw[r.keyword] ||= []).push(r); }
845
- const gapKws = new Set();
846
- for (const [kw, rows] of Object.entries(byKw)) {
847
- const hasTarget = rows.some(r => r.role === 'target' || r.role === 'owned');
848
- const hasCompetitor = rows.some(r => r.role === 'competitor');
849
- if (!hasTarget && hasCompetitor) gapKws.add(kw);
850
- }
851
- return data.filter(r => gapKws.has(r.keyword));
839
+ // Only keyword gaps: competitor has it, you don't
840
+ const byKw = {};
841
+ for (const r of data) { (byKw[r.keyword] ||= []).push(r); }
842
+ const gapKws = new Set();
843
+ for (const [kw, rows] of Object.entries(byKw)) {
844
+ const hasTarget = rows.some(r => r.role === 'target' || r.role === 'owned');
845
+ const hasCompetitor = rows.some(r => r.role === 'competitor');
846
+ if (!hasTarget && hasCompetitor) gapKws.add(kw);
852
847
  }
853
- return data;
848
+ // Return gap keywords with which competitors use them
849
+ const gaps = [];
850
+ for (const kw of gapKws) {
851
+ const rows = byKw[kw];
852
+ const competitors = rows.map(r => r.domain).join(', ');
853
+ const topFreq = Math.max(...rows.map(r => r.freq));
854
+ gaps.push({ keyword: kw, used_by: competitors, frequency: topFreq });
855
+ }
856
+ return gaps.sort((a, b) => b.frequency - a.frequency);
854
857
  }
855
858
  case 'watch': {
856
859
  // Keep only errors + warnings, drop notices
@@ -954,6 +957,15 @@ async function handleRequest(req, res) {
954
957
  return md;
955
958
  }
956
959
  case 'keywords': {
960
+ // Profile exports return gap summary; raw exports return full matrix
961
+ if (data[0] && data[0].used_by !== undefined) {
962
+ let md = header + `## Keyword Gaps (${data.length})\n\nKeywords competitors use that you don't.\n\n| Keyword | Used By | Frequency |\n|---------|---------|----------|\n`;
963
+ for (const r of data.slice(0, 200)) {
964
+ md += `| ${r.keyword} | ${r.used_by} | ${r.frequency} |\n`;
965
+ }
966
+ if (data.length > 200) md += `\n_...and ${data.length - 200} more._\n`;
967
+ return md;
968
+ }
957
969
  let md = header + '## Keyword Matrix\n\n| Keyword | Domain | Role | Location | Frequency |\n|---------|--------|------|----------|-----------|\n';
958
970
  for (const r of data.slice(0, 500)) {
959
971
  md += `| ${r.keyword} | ${r.domain} | ${r.role} | ${r.location || ''} | ${r.freq} |\n`;
@@ -1152,6 +1164,10 @@ async function handleRequest(req, res) {
1152
1164
  const params = url.searchParams;
1153
1165
  const command = params.get('command');
1154
1166
  const project = params.get('project') || '';
1167
+ if (project && !/^[a-z0-9_-]+$/i.test(project)) {
1168
+ json(res, 400, { error: 'Invalid project name' });
1169
+ return;
1170
+ }
1155
1171
 
1156
1172
  // Whitelist allowed commands
1157
1173
  const ALLOWED = ['crawl', 'extract', 'analyze', 'export-actions', 'competitive-actions',
@@ -1178,7 +1194,7 @@ async function handleRequest(req, res) {
1178
1194
  if (params.get('type')) args.push('--type', params.get('type'));
1179
1195
  if (params.get('limit')) args.push('--limit', params.get('limit'));
1180
1196
  if (params.has('raw')) args.push('--raw');
1181
- if (params.get('out')) args.push('--out', params.get('out'));
1197
+ // --out is NOT passed from dashboard — write paths are server-controlled only (see auto-save below)
1182
1198
 
1183
1199
  // Auto-save exports from dashboard to reports/
1184
1200
  const EXPORT_CMDS = ['export-actions', 'suggest-usecases', 'competitive-actions'];