@pseolint/core 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +84 -15
- package/dist/ai/prompt.d.ts +1 -1
- package/dist/ai/prompt.d.ts.map +1 -1
- package/dist/ai/prompt.js +13 -1
- package/dist/ai/prompt.js.map +1 -1
- package/dist/auditor.d.ts.map +1 -1
- package/dist/auditor.js +197 -63
- package/dist/auditor.js.map +1 -1
- package/dist/cache.d.ts.map +1 -1
- package/dist/cache.js +38 -2
- package/dist/cache.js.map +1 -1
- package/dist/formatters/console.d.ts +9 -0
- package/dist/formatters/console.d.ts.map +1 -1
- package/dist/formatters/console.js +53 -0
- package/dist/formatters/console.js.map +1 -1
- package/dist/formatters/html.d.ts.map +1 -1
- package/dist/formatters/html.js +363 -135
- package/dist/formatters/html.js.map +1 -1
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +9 -0
- package/dist/index.js.map +1 -1
- package/dist/rule-references.d.ts.map +1 -1
- package/dist/rule-references.js +8 -0
- package/dist/rule-references.js.map +1 -1
- package/dist/rules/aeo/answer-first.d.ts +18 -0
- package/dist/rules/aeo/answer-first.d.ts.map +1 -0
- package/dist/rules/aeo/answer-first.js +191 -0
- package/dist/rules/aeo/answer-first.js.map +1 -0
- package/dist/rules/aeo/citable-facts.d.ts +9 -0
- package/dist/rules/aeo/citable-facts.d.ts.map +1 -0
- package/dist/rules/aeo/citable-facts.js +90 -0
- package/dist/rules/aeo/citable-facts.js.map +1 -0
- package/dist/rules/aeo/content-modularity.d.ts +11 -0
- package/dist/rules/aeo/content-modularity.d.ts.map +1 -0
- package/dist/rules/aeo/content-modularity.js +107 -0
- package/dist/rules/aeo/content-modularity.js.map +1 -0
- package/dist/rules/aeo/crawler-access.d.ts +25 -0
- package/dist/rules/aeo/crawler-access.d.ts.map +1 -0
- package/dist/rules/aeo/crawler-access.js +116 -0
- package/dist/rules/aeo/crawler-access.js.map +1 -0
- package/dist/rules/aeo/faq-coverage.d.ts +9 -0
- package/dist/rules/aeo/faq-coverage.d.ts.map +1 -0
- package/dist/rules/aeo/faq-coverage.js +71 -0
- package/dist/rules/aeo/faq-coverage.js.map +1 -0
- package/dist/rules/aeo/freshness-signals.d.ts +9 -0
- package/dist/rules/aeo/freshness-signals.d.ts.map +1 -0
- package/dist/rules/aeo/freshness-signals.js +109 -0
- package/dist/rules/aeo/freshness-signals.js.map +1 -0
- package/dist/rules/aeo/llms-txt.d.ts +24 -0
- package/dist/rules/aeo/llms-txt.d.ts.map +1 -0
- package/dist/rules/aeo/llms-txt.js +93 -0
- package/dist/rules/aeo/llms-txt.js.map +1 -0
- package/dist/rules/aeo/non-replicable-value.d.ts +9 -0
- package/dist/rules/aeo/non-replicable-value.d.ts.map +1 -0
- package/dist/rules/aeo/non-replicable-value.js +95 -0
- package/dist/rules/aeo/non-replicable-value.js.map +1 -0
- package/dist/rules/scope.d.ts +12 -0
- package/dist/rules/scope.d.ts.map +1 -0
- package/dist/rules/scope.js +66 -0
- package/dist/rules/scope.js.map +1 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts +16 -0
- package/dist/rules/tech/robots-sitemap-presence.d.ts.map +1 -1
- package/dist/rules/tech/robots-sitemap-presence.js +26 -2
- package/dist/rules/tech/robots-sitemap-presence.js.map +1 -1
- package/dist/types.d.ts +29 -0
- package/dist/types.d.ts.map +1 -1
- package/package.json +91 -66
package/README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# @pseolint/core
|
|
2
2
|
|
|
3
|
-
> Programmatic SEO audit engine
|
|
3
|
+
> Programmatic SEO audit engine for SpamBrain-risk detection across large template-generated sites.
|
|
4
4
|
|
|
5
|
-
The core engine behind [pseolint](https://www.npmjs.com/package/pseolint). Use this package to
|
|
5
|
+
The core engine behind [pseolint](https://www.npmjs.com/package/pseolint). Use this package to embed pSEO auditing into your own tools, CI pipelines, or SaaS products.
|
|
6
6
|
|
|
7
7
|
## Install
|
|
8
8
|
|
|
@@ -12,7 +12,7 @@ npm install @pseolint/core
|
|
|
12
12
|
|
|
13
13
|
## Usage
|
|
14
14
|
|
|
15
|
-
```
|
|
15
|
+
```ts
|
|
16
16
|
import { auditSource } from "@pseolint/core";
|
|
17
17
|
|
|
18
18
|
const summary = await auditSource("./out");
|
|
@@ -20,34 +20,103 @@ console.log(`Score: ${summary.score}/100`);
|
|
|
20
20
|
console.log(`Findings: ${summary.findings.length}`);
|
|
21
21
|
```
|
|
22
22
|
|
|
23
|
+
`auditSource` accepts a local directory, a single HTML file, a page URL, or a sitemap URL.
|
|
24
|
+
|
|
23
25
|
## What It Checks
|
|
24
26
|
|
|
25
|
-
|
|
27
|
+
42 rules across 8 categories. Seven categories feed the composite score; `data/*` is a separate data-binding family.
|
|
26
28
|
|
|
27
|
-
- **SpamBrain
|
|
28
|
-
- **
|
|
29
|
-
- **
|
|
30
|
-
- **
|
|
31
|
-
- **
|
|
32
|
-
- **
|
|
29
|
+
- **Spam / SpamBrain risk** (8) — near-duplicate (SimHash), entity-swap doorways, thin content, boilerplate ratio, template diversity, template coverage, publication velocity, doorway pattern
|
|
30
|
+
- **Technical SEO** (8) — canonical consistency, canonical/noindex and robots/noindex conflicts, sitemap completeness, robots compliance, redirect chains, soft 404s, Open Graph, hreflang
|
|
31
|
+
- **AEO / AI Overview citability** (8, v0.3.0) — `llms.txt` presence, AI-crawler access in robots.txt, freshness signals, FAQ coverage, answer-first opener, citable-fact density, non-replicable value, content modularity
|
|
32
|
+
- **Content** (5) — unique value, heading / meta uniqueness, author attribution, E-E-A-T signals
|
|
33
|
+
- **Internal linking** (5) — orphan pages, dead ends, cluster connectivity, hub pages, link depth
|
|
34
|
+
- **Structured data** (3) — JSON-LD validity, required fields, cross-page schema consistency
|
|
35
|
+
- **Cannibalization** (3) — title overlap, keyword collision, URL pattern conflicts
|
|
36
|
+
- **Data binding** (2) — verify rendered pages expose values from a source dataset (missing or identical-across-pages bindings)
|
|
33
37
|
|
|
34
38
|
## API
|
|
35
39
|
|
|
36
40
|
### `auditSource(source, options?)`
|
|
37
41
|
|
|
38
|
-
|
|
42
|
+
Returns an `AuditSummary` with composite score, category scores, enriched findings, and optional cache / state / AI-triage metadata.
|
|
43
|
+
|
|
44
|
+
Selected options (see `AuditOptions` in `types.ts` for the full surface):
|
|
45
|
+
|
|
46
|
+
```ts
|
|
47
|
+
await auditSource("https://example.com/sitemap.xml", {
|
|
48
|
+
concurrency: 5,
|
|
49
|
+
timeout: 30_000,
|
|
50
|
+
sampleSize: 200,
|
|
51
|
+
samplingStrategy: "stratified", // or "random"
|
|
52
|
+
ignore: ["**/api/**"],
|
|
53
|
+
maxFetchBytes: 52_428_800, // 50 MB hard cap per run
|
|
54
|
+
cache: { dir: ".pseolint/cache", ttlMs: 7 * 24 * 60 * 60 * 1000 },
|
|
55
|
+
state: { path: ".pseolint/state.json", since: true, exitOnRegression: true },
|
|
56
|
+
pageGroups: {
|
|
57
|
+
blog: { match: "**/blog/**", rules: ["content/*", "spam/*"] },
|
|
58
|
+
products: { match: "**/p/**", overrides: { "spam/thin-content": { thinContentMinWords: 200 } } },
|
|
59
|
+
},
|
|
60
|
+
dataSource: { records: [{ url: "/p/*", data: { price: "$19", stock: 12 } }] },
|
|
61
|
+
entityPatterns: [{ placeholder: "[CITY]", pattern: "\\b(NYC|LA|SF)\\b", flags: "gi" }],
|
|
62
|
+
ai: { enabled: true, provider: "anthropic", model: "claude-haiku-4-5-20251001", maxCostUsd: 0.1 },
|
|
63
|
+
telemetry: { enabled: true, path: ".pseolint/telemetry.jsonl" },
|
|
64
|
+
rules: {
|
|
65
|
+
nearDuplicateThreshold: 0.85,
|
|
66
|
+
thinContentMinWords: 300,
|
|
67
|
+
titleOverlapThreshold: 0.8,
|
|
68
|
+
// ...
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
```
|
|
39
72
|
|
|
40
73
|
### Formatters
|
|
41
74
|
|
|
42
|
-
```
|
|
75
|
+
```ts
|
|
43
76
|
import { formatConsole, formatJson, formatMarkdown, formatHtml } from "@pseolint/core";
|
|
44
77
|
|
|
45
|
-
const
|
|
78
|
+
const out = formatConsole(summary);
|
|
46
79
|
const json = formatJson(summary);
|
|
47
|
-
const md
|
|
80
|
+
const md = formatMarkdown(summary);
|
|
48
81
|
const html = formatHtml(summary);
|
|
49
82
|
```
|
|
50
83
|
|
|
84
|
+
### AI triage
|
|
85
|
+
|
|
86
|
+
When `ai.enabled` is set, findings are clustered into root-causes by an LLM. Providers are loaded lazily from optional peer deps — install only the one you need:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
npm install @ai-sdk/anthropic # or @ai-sdk/openai, @ai-sdk/google, @ai-sdk/mistral,
|
|
90
|
+
# @ai-sdk/groq, @ai-sdk/xai, @ai-sdk/cohere,
|
|
91
|
+
# ollama-ai-provider-v2
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
```ts
|
|
95
|
+
import { triageFindings, createLanguageModel, estimateCostUsd } from "@pseolint/core";
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
Cost and daily-budget caps are enforced pre-flight; results are cached on disk by default.
|
|
99
|
+
|
|
100
|
+
### Delta runs & regression gating
|
|
101
|
+
|
|
102
|
+
Pass `state.since: true` to audit only URLs whose content hash changed since the last run, and `state.exitOnRegression: true` to flag a run where a new rule ID fires on any previously clean URL (`summary.hasRegression`).
|
|
103
|
+
|
|
104
|
+
### Caching
|
|
105
|
+
|
|
106
|
+
Setting `cache` enables an ETag/Last-Modified-aware disk cache for HTTP fetches. `summary.cacheStats` reports `{ hits, total, bytesSavedEstimate }`.
|
|
107
|
+
|
|
108
|
+
### Page groups
|
|
109
|
+
|
|
110
|
+
Classify pages by glob and apply different rule subsets or threshold overrides per group. Results are surfaced in `summary.groupScores` / `summary.groupPageCounts`.
|
|
111
|
+
|
|
112
|
+
### Rendering
|
|
113
|
+
|
|
114
|
+
For client-rendered pages, install `playwright-core` and pass `render: { browserWsEndpoint }` to connect to an existing browser endpoint.
|
|
115
|
+
|
|
116
|
+
## Peer dependencies
|
|
117
|
+
|
|
118
|
+
All AI providers and `playwright-core` are optional peers — you only install the ones you actually use.
|
|
119
|
+
|
|
51
120
|
## License
|
|
52
121
|
|
|
53
|
-
MIT
|
|
122
|
+
MIT
|
package/dist/ai/prompt.d.ts
CHANGED
package/dist/ai/prompt.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"prompt.d.ts","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,UAAU,EAAY,MAAM,aAAa,CAAC;AAExD,eAAO,MAAM,cAAc,UAAU,CAAC;AACtC,eAAO,MAAM,sBAAsB,MAAM,CAAC;AAqB1C,MAAM,WAAW,aAAa;IAC5B,MAAM,EAAE,MAAM,CAAC;IACf,IAAI,EAAE,MAAM,CAAC;CACd;AAED,wBAAgB,eAAe,CAAC,CAAC,EAAE,UAAU,GAAG,MAAM,CAMrD;AAqBD,wBAAgB,kBAAkB,CAAC,QAAQ,EAAE,UAAU,EAAE,EAAE,SAAS,EAAE,MAAM,GAAG,aAAa,CAqC3F"}
|
package/dist/ai/prompt.js
CHANGED
|
@@ -1,9 +1,15 @@
|
|
|
1
1
|
import { createHash } from "node:crypto";
|
|
2
|
-
export const PROMPT_VERSION = "1.
|
|
2
|
+
export const PROMPT_VERSION = "1.1.0";
|
|
3
3
|
export const MAX_FINDINGS_IN_PROMPT = 200;
|
|
4
4
|
const SEVERITY_ORDER = { info: 0, warning: 1, error: 2, critical: 3 };
|
|
5
5
|
const SYSTEM_PROMPT = `You are an SEO audit triage assistant. Given a list of pSEO linter findings, identify 1-5 underlying ROOT CAUSES driving the findings. Group findings by shared underlying problem, not by rule ID. Rank causes by likely SEO impact (highest first).
|
|
6
6
|
|
|
7
|
+
Findings fall into two distinct threat families — treat them as separate root causes, not one combined cause:
|
|
8
|
+
- SpamBrain penalty risk: spam/*, cannibal/*, content/*, data/*, tech/*, schema/*, links/* — these make Google penalize or demote the site.
|
|
9
|
+
- AI Overview invisibility: aeo/* — these make pages uncitable in AI answer engines (ChatGPT, Perplexity, Gemini, AI Overviews). Sites not cited lose ~68% of traffic vs ~12% for cited sites.
|
|
10
|
+
|
|
11
|
+
When both families are present, produce at least one root cause from each. Label AEO root causes clearly (e.g. "AI Overviews: ...") so the user can tell them apart from penalty risks.
|
|
12
|
+
|
|
7
13
|
Rules:
|
|
8
14
|
- Emit rootCauses FIRST, then narrative — do not reverse this order.
|
|
9
15
|
- Keep each rootCause label <= 80 chars and phrase it as a problem statement.
|
|
@@ -31,11 +37,17 @@ export function buildPromptRequest(findings, pageCount) {
|
|
|
31
37
|
pageUrl: f.pageUrl,
|
|
32
38
|
group: f.group,
|
|
33
39
|
}));
|
|
40
|
+
const countByCategory = {};
|
|
41
|
+
for (const f of findings) {
|
|
42
|
+
const cat = f.ruleId.split("/")[0];
|
|
43
|
+
countByCategory[cat] = (countByCategory[cat] ?? 0) + 1;
|
|
44
|
+
}
|
|
34
45
|
const payload = {
|
|
35
46
|
totalFindings: total,
|
|
36
47
|
pageCount,
|
|
37
48
|
truncated,
|
|
38
49
|
findings: projected,
|
|
50
|
+
findingCountByCategory: countByCategory,
|
|
39
51
|
};
|
|
40
52
|
if (truncated) {
|
|
41
53
|
const counts = {};
|
package/dist/ai/prompt.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG
|
|
1
|
+
{"version":3,"file":"prompt.js","sourceRoot":"","sources":["../../src/ai/prompt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAGzC,MAAM,CAAC,MAAM,cAAc,GAAG,OAAO,CAAC;AACtC,MAAM,CAAC,MAAM,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,cAAc,GAA6B,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE,CAAC;AAEhG,MAAM,aAAa,GAAG;;;;;;;;;;;;;;;sDAegC,CAAC;AAOvD,MAAM,UAAU,eAAe,CAAC,CAAa;IAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC;SAC9B,MAAM,CAAC,CAAC,CAAC,CAAC,OAAO,IAAI,EAAE,CAAC,GAAG,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC;SAC3C,MAAM,CAAC,KAAK,CAAC;SACb,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;IACf,OAAO,GAAG,CAAC,CAAC,MAAM,IAAI,IAAI,EAAE,CAAC;AAC/B,CAAC;AAqBD,MAAM,UAAU,kBAAkB,CAAC,QAAsB,EAAE,SAAiB;IAC1E,MAAM,KAAK,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC9B,MAAM,MAAM,GAAG,CAAC,GAAG,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,GAAG,cAAc,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC;IACrG,MAAM,SAAS,GAAG,KAAK,GAAG,sBAAsB,CAAC;IACjD,MAAM,SAAS,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,sBAAsB,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAqB,EAAE,CAAC,CAAC;QACvF,EAAE,EAAE,eAAe,CAAC,CAAC,CAAC;QACtB,MAAM,EAAE,CAAC,CAAC,MAAM;QAChB,QAAQ,EAAE,CAAC,CAAC,QAAQ;QACpB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,OAAO,EAAE,CAAC,CAAC,OAAO;QAClB,KAAK,EAAE,CAAC,CAAC,KAAK;KACf,CAAC,CAAC,CAAC;IAEJ,MAAM,eAAe,GAA2B,EAAE,CAAC;IACnD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QACnC,eAAe,CAAC,GAAG,CAAC,GAAG,CAAC,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,OAAO,GAAkB;QAC7B,aAAa,EAAE,KAAK;QACpB,SAAS;QACT,SAAS;QACT,QAAQ,EAAE,SAAS;QACnB,sBAAsB,EAAE,eAAe;KACxC,CAAC;IAEF,IAAI,SAAS,EAAE,CAAC;QACd,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,KAAK,MAAM,CAAC,IAAI,QAAQ;YAAE,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QACzE,OAAO,CAAC,kBAAkB,GAAG,MAAM,CAAC;IACtC,CAAC;IAED,OAAO;QACL,MAAM,EAAE,aAAa;QACrB,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAC9B,CAAC;AACJ,CAAC"}
|
package/dist/auditor.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"auditor.d.ts","sourceRoot":"","sources":["../src/auditor.ts"],"names":[],"mappings":"AA6DA,OAAO,KAAK,EAAE,YAAY,EAAE,YAAY,EAAwG,MAAM,YAAY,CAAC;AAu0BnK,wBAAsB,WAAW,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,YAAY,CAAC,CAgf/F"}
|