@ibalzam/codejitsu-core 0.3.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ #!/usr/bin/env node
2
+ import { parseArgs } from 'node:util';
3
+ import { runBlog } from '../modules/cli/src/blog.mjs';
4
+ import { runAudit } from '../modules/audit/src/run.mjs';
5
+
6
+ const subcommand = process.argv[2];
7
+ const rest = process.argv.slice(3);
8
+
9
+ const COMMANDS = {
10
+ 'blog:list': () => runBlog('blog:list'),
11
+ 'blog:drafts': () => runBlog('blog:drafts'),
12
+ audit: () => {
13
+ const { values } = parseArgs({
14
+ args: rest,
15
+ options: {
16
+ live: { type: 'string' },
17
+ a11y: { type: 'boolean' },
18
+ ai: { type: 'boolean' },
19
+ },
20
+ allowPositionals: true,
21
+ });
22
+ return runAudit({ liveUrl: values.live, a11y: values.a11y, ai: values.ai });
23
+ },
24
+ // Aliases for the existing standalone bins
25
+ llms: () => import('../modules/llms/bin/generate.mjs'),
26
+ 'optimize-images': () => import('../modules/images/bin/optimize.mjs'),
27
+ check: () => import('../checklist/bin/run.mjs'),
28
+ };
29
+
30
+ if (!subcommand || subcommand === '--help' || subcommand === '-h') {
31
+ printHelp();
32
+ process.exit(0);
33
+ }
34
+
35
+ const handler = COMMANDS[subcommand];
36
+ if (!handler) {
37
+ console.error(`Unknown subcommand: ${subcommand}\n`);
38
+ printHelp();
39
+ process.exit(1);
40
+ }
41
+
42
+ try {
43
+ await handler();
44
+ } catch (err) {
45
+ console.error(err instanceof Error ? err.message : String(err));
46
+ process.exit(1);
47
+ }
48
+
49
+ function printHelp() {
50
+ console.log(`\nUsage: codejitsu <subcommand> [flags]\n`);
51
+ console.log(`Subcommands:`);
52
+ console.log(` blog:list List every non-draft post with URL + image check`);
53
+ console.log(` blog:drafts List future-dated (pending) posts only`);
54
+ console.log(``);
55
+ console.log(` audit Run pre-delivery audit. Flags:`);
56
+ console.log(` --live <url> Add live-URL checks (SSL, headers, 404, broken links)`);
57
+ console.log(` --a11y Add axe-core WCAG 2.1 AA scan (with --live)`);
58
+ console.log(` --ai Add Claude-powered content audit (uses 'claude -p')`);
59
+ console.log(``);
60
+ console.log(` llms Generate public/llms.txt and public/llms-full.txt`);
61
+ console.log(` optimize-images Optimize images per codejitsu.config`);
62
+ console.log(` check Run minimal pre-build checklist`);
63
+ console.log(``);
64
+ console.log(`All commands read codejitsu.config.{ts,mjs,json} from the current directory.`);
65
+ }
@@ -0,0 +1,146 @@
1
+ import { spawn } from 'child_process';
2
+ import fs from 'fs';
3
+ import os from 'os';
4
+ import path from 'path';
5
+ import { pass, fail, warn, info } from '../util.mjs';
6
+
7
+ /**
8
+ * Accessibility audit via `@axe-core/cli`. Spawns the CLI against the live URL,
9
+ * parses its JSON report, and groups violations by impact (critical / serious /
10
+ * moderate / minor) for the audit summary.
11
+ *
12
+ * Requires either:
13
+ * - `@axe-core/cli` installed in the site (`npm install -D @axe-core/cli`), or
14
+ * - network access for npx to fetch it on demand.
15
+ *
16
+ * Uses real headless Chrome (via Puppeteer, bundled by @axe-core/cli) so the
17
+ * test reflects WCAG 2.1 AA verdicts on the rendered page (not raw HTML).
18
+ */
19
+ export async function runA11y(ctx) {
20
+ const { liveUrl } = ctx;
21
+ if (!liveUrl) {
22
+ return [info('a11y skipped — provide --live <url> to enable')];
23
+ }
24
+
25
+ const reportFile = path.join(os.tmpdir(), `codejitsu-axe-${Date.now()}.json`);
26
+ const reportDir = path.dirname(reportFile);
27
+
28
+ let stderr = '';
29
+ let exitCode;
30
+ try {
31
+ const result = await runCmd('npx', [
32
+ '--yes',
33
+ '@axe-core/cli',
34
+ liveUrl,
35
+ '--dir', reportDir,
36
+ '--save', path.basename(reportFile),
37
+ '--exit',
38
+ ]);
39
+ exitCode = result.code;
40
+ stderr = result.stderr;
41
+ } catch (err) {
42
+ return [warn('Could not run @axe-core/cli', String(err.message ?? err))];
43
+ }
44
+
45
+ // Find the file axe wrote. The --save option names it; --dir specifies its dir.
46
+ let parsed;
47
+ try {
48
+ if (!fs.existsSync(reportFile)) {
49
+ // axe sometimes writes a slightly different filename. Try the dir.
50
+ const candidates = fs.readdirSync(reportDir).filter((n) => n.startsWith('codejitsu-axe-') && n.endsWith('.json'));
51
+ if (candidates.length === 0) {
52
+ return [warn('axe-core ran but produced no JSON report', stderr.slice(0, 400) || `exit code: ${exitCode}`)];
53
+ }
54
+ const newest = candidates.map((n) => path.join(reportDir, n)).sort((a, b) => fs.statSync(b).mtimeMs - fs.statSync(a).mtimeMs)[0];
55
+ parsed = JSON.parse(fs.readFileSync(newest, 'utf8'));
56
+ } else {
57
+ parsed = JSON.parse(fs.readFileSync(reportFile, 'utf8'));
58
+ }
59
+ } catch (err) {
60
+ return [warn('Could not parse axe-core JSON output', err.message)];
61
+ }
62
+
63
+ const results = [];
64
+ // axe report is an array of per-URL test results.
65
+ const reports = Array.isArray(parsed) ? parsed : [parsed];
66
+
67
+ // Aggregate counts.
68
+ const violationsByImpact = { critical: 0, serious: 0, moderate: 0, minor: 0 };
69
+ const ruleHits = new Map(); // rule id → { impact, help, count, exampleUrl }
70
+ let totalPasses = 0;
71
+ let totalIncomplete = 0;
72
+
73
+ for (const r of reports) {
74
+ totalPasses += (r.passes ?? []).length;
75
+ totalIncomplete += (r.incomplete ?? []).length;
76
+
77
+ for (const v of r.violations ?? []) {
78
+ const impact = v.impact ?? 'moderate';
79
+ violationsByImpact[impact] = (violationsByImpact[impact] ?? 0) + (v.nodes?.length ?? 1);
80
+ const existing = ruleHits.get(v.id);
81
+ if (existing) {
82
+ existing.count += v.nodes?.length ?? 1;
83
+ } else {
84
+ ruleHits.set(v.id, {
85
+ impact,
86
+ help: v.help,
87
+ helpUrl: v.helpUrl,
88
+ count: v.nodes?.length ?? 1,
89
+ exampleUrl: r.url ?? liveUrl,
90
+ });
91
+ }
92
+ }
93
+ }
94
+
95
+ const totalViolations = Object.values(violationsByImpact).reduce((a, b) => a + b, 0);
96
+
97
+ results.push(info(
98
+ `axe-core checked ${reports.length} URL(s) — ${totalPasses} rules passed, ${totalViolations} violations, ${totalIncomplete} need manual review`
99
+ ));
100
+
101
+ // Per-impact severity buckets.
102
+ const impactMap = [
103
+ { key: 'critical', label: 'critical', sev: 'fail' },
104
+ { key: 'serious', label: 'serious', sev: 'fail' },
105
+ { key: 'moderate', label: 'moderate', sev: 'warn' },
106
+ { key: 'minor', label: 'minor', sev: 'warn' },
107
+ ];
108
+
109
+ for (const { key, label, sev } of impactMap) {
110
+ const count = violationsByImpact[key] ?? 0;
111
+ if (count === 0) {
112
+ results.push(pass(`No ${label} a11y violations`));
113
+ } else {
114
+ const examples = [...ruleHits.entries()]
115
+ .filter(([, info]) => info.impact === key)
116
+ .slice(0, 5)
117
+ .map(([id, info]) => `${id} (×${info.count}) — ${info.help}`);
118
+ const result = sev === 'fail' ? fail : warn;
119
+ results.push(result(`${count} ${label} a11y violation${count === 1 ? '' : 's'}`, examples));
120
+ }
121
+ }
122
+
123
+ return results;
124
+ }
125
+
126
+ function runCmd(cmd, args, timeoutMs = 180_000) {
127
+ return new Promise((resolve, reject) => {
128
+ const proc = spawn(cmd, args, { stdio: ['ignore', 'pipe', 'pipe'] });
129
+ let stdout = '';
130
+ let stderr = '';
131
+ proc.stdout.on('data', (d) => { stdout += d.toString(); });
132
+ proc.stderr.on('data', (d) => { stderr += d.toString(); });
133
+ const t = setTimeout(() => {
134
+ proc.kill();
135
+ reject(new Error(`${cmd} timed out after ${timeoutMs / 1000}s`));
136
+ }, timeoutMs);
137
+ proc.on('error', (err) => {
138
+ clearTimeout(t);
139
+ reject(err);
140
+ });
141
+ proc.on('close', (code) => {
142
+ clearTimeout(t);
143
+ resolve({ code, stdout, stderr });
144
+ });
145
+ });
146
+ }
@@ -0,0 +1,176 @@
1
+ import { spawn } from 'child_process';
2
+ import { pass, fail, warn, info } from '../util.mjs';
3
+
4
+ /**
5
+ * AI-tier audit via `claude -p` headless. Samples a few pages, asks Claude to
6
+ * proofread + score AI-friendliness, displays findings in the audit summary.
7
+ *
8
+ * Uses --bare mode and --model sonnet to minimize context cost. A typical
9
+ * audit (3 sample pages) costs ~$0.02–0.05.
10
+ */
11
+ export async function runAi(ctx) {
12
+ const { htmlFiles, ai } = ctx;
13
+ if (!ai) return [];
14
+
15
+ // Verify claude is in PATH.
16
+ const claudeAvailable = await commandExists('claude');
17
+ if (!claudeAvailable) {
18
+ return [warn(
19
+ 'AI tier skipped — `claude` CLI not in PATH',
20
+ 'Install Claude Code: https://claude.com/claude-code'
21
+ )];
22
+ }
23
+
24
+ const results = [];
25
+ const sample = pickSample(htmlFiles, 3);
26
+
27
+ results.push(info(`AI sampling ${sample.length} pages via claude -p (--model sonnet)`));
28
+
29
+ let totalIssues = 0;
30
+ let totalCost = 0;
31
+
32
+ for (const f of sample) {
33
+ const text = extractMainText(f.content);
34
+ if (text.length < 200) {
35
+ results.push(info(`Skipped ${f.relPath} (too short: ${text.length} chars)`));
36
+ continue;
37
+ }
38
+
39
+ const proofread = await callClaude(buildProofreadPrompt(text), f.relPath);
40
+ if (proofread.error) {
41
+ results.push(warn(`Proofread failed: ${f.relPath}`, proofread.error));
42
+ continue;
43
+ }
44
+ totalCost += proofread.costUsd ?? 0;
45
+
46
+ if (proofread.issues.length === 0) {
47
+ results.push(pass(`Proofread clean: ${f.relPath}`));
48
+ } else {
49
+ totalIssues += proofread.issues.length;
50
+ const details = proofread.issues.slice(0, 5).map((i) => `${i.severity}: ${i.message}`);
51
+ const sev = proofread.issues.some((i) => i.severity === 'fail') ? 'fail' : 'warn';
52
+ const result = sev === 'fail' ? fail : warn;
53
+ results.push(result(
54
+ `Proofread: ${proofread.issues.length} issues in ${f.relPath}`,
55
+ details
56
+ ));
57
+ }
58
+ }
59
+
60
+ if (totalCost > 0) {
61
+ results.push(info(`AI tier total cost: ~$${totalCost.toFixed(3)}`));
62
+ }
63
+
64
+ return results;
65
+ }
66
+
67
+ function buildProofreadPrompt(text) {
68
+ return `Read the webpage content between <PAGE> tags. Identify ONLY:
69
+ - Typos / misspellings
70
+ - Grammar errors
71
+ - Sentences that are objectively broken or incoherent
72
+
73
+ DO NOT flag style preferences, alternative phrasings, or things you'd just personally prefer.
74
+
75
+ Output STRICT JSON, no markdown, no preamble. Schema:
76
+ {"issues": [{"severity": "fail" | "warn", "message": "specific issue with quoted text"}]}
77
+
78
+ If no issues: {"issues": []}
79
+
80
+ <PAGE>
81
+ ${text}
82
+ </PAGE>`;
83
+ }
84
+
85
+ async function callClaude(prompt, label) {
86
+ try {
87
+ // Note: --bare requires ANTHROPIC_API_KEY env var. We don't use it so users
88
+ // on OAuth (Claude Code subscribers) still work. Cost is higher per call
89
+ // (~$0.07 with sonnet) but the audit only samples a few pages.
90
+ const out = await runCmd(
91
+ 'claude',
92
+ [
93
+ '--model', 'sonnet',
94
+ '--system-prompt', 'You are a senior copy editor. Output strict JSON only, no preamble.',
95
+ '--disallowedTools', 'Bash,Edit,Write,Read,Glob,Grep,Agent',
96
+ '-p',
97
+ '--output-format', 'json',
98
+ prompt,
99
+ ],
100
+ 120_000
101
+ );
102
+
103
+ if (out.code !== 0) {
104
+ return { error: `claude exit ${out.code}: ${out.stderr.slice(0, 200)}` };
105
+ }
106
+
107
+ // claude -p --output-format json returns an envelope { result, total_cost_usd, ... }
108
+ let envelope;
109
+ try { envelope = JSON.parse(out.stdout); }
110
+ catch { return { error: 'Could not parse claude envelope JSON' }; }
111
+
112
+ const inner = (envelope.result ?? '').trim();
113
+ const costUsd = envelope.total_cost_usd;
114
+
115
+ // Strip code fences if Claude added them despite the instruction.
116
+ const cleaned = inner.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/, '').trim();
117
+ let parsed;
118
+ try { parsed = JSON.parse(cleaned); }
119
+ catch { return { error: `Claude returned non-JSON: ${cleaned.slice(0, 100)}` }; }
120
+
121
+ return { issues: parsed.issues ?? [], costUsd };
122
+ } catch (err) {
123
+ return { error: err.message ?? String(err) };
124
+ }
125
+ }
126
+
127
+ function pickSample(files, n) {
128
+ const sample = [];
129
+ const home = files.find((f) => f.relPath === 'index.html');
130
+ if (home) sample.push(home);
131
+ for (const f of files) {
132
+ if (sample.length >= n) break;
133
+ if (sample.includes(f)) continue;
134
+ if (f.relPath === '404.html') continue;
135
+ sample.push(f);
136
+ }
137
+ return sample;
138
+ }
139
+
140
+ function extractMainText(html) {
141
+ let cleaned = html
142
+ .replace(/<head[\s\S]*?<\/head>/gi, '')
143
+ .replace(/<script[\s\S]*?<\/script>/gi, '')
144
+ .replace(/<style[\s\S]*?<\/style>/gi, '')
145
+ .replace(/<nav[\s\S]*?<\/nav>/gi, '')
146
+ .replace(/<footer[\s\S]*?<\/footer>/gi, '')
147
+ .replace(/<header[\s\S]*?<\/header>/gi, '');
148
+ const main = cleaned.match(/<main[\s\S]*?<\/main>/i);
149
+ if (main) cleaned = main[0];
150
+ const text = cleaned.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
151
+ return text.split(/\s+/).slice(0, 2500).join(' ');
152
+ }
153
+
154
+ async function commandExists(cmd) {
155
+ return new Promise((resolve) => {
156
+ const proc = spawn('which', [cmd], { stdio: 'ignore' });
157
+ proc.on('close', (code) => resolve(code === 0));
158
+ proc.on('error', () => resolve(false));
159
+ });
160
+ }
161
+
162
+ function runCmd(cmd, args, timeoutMs = 120_000) {
163
+ return new Promise((resolve, reject) => {
164
+ const proc = spawn(cmd, args, { stdio: ['ignore', 'pipe', 'pipe'] });
165
+ let stdout = '';
166
+ let stderr = '';
167
+ proc.stdout.on('data', (d) => { stdout += d.toString(); });
168
+ proc.stderr.on('data', (d) => { stderr += d.toString(); });
169
+ const t = setTimeout(() => {
170
+ proc.kill();
171
+ reject(new Error(`${cmd} timed out`));
172
+ }, timeoutMs);
173
+ proc.on('error', (err) => { clearTimeout(t); reject(err); });
174
+ proc.on('close', (code) => { clearTimeout(t); resolve({ code, stdout, stderr }); });
175
+ });
176
+ }
@@ -0,0 +1,51 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import { pass, fail, warn, info } from '../util.mjs';
4
+
5
+ const AI_BOTS = [
6
+ 'GPTBot', // OpenAI
7
+ 'ClaudeBot', // Anthropic
8
+ 'PerplexityBot', // Perplexity
9
+ 'CCBot', // Common Crawl (used by many AI training pipelines)
10
+ 'Google-Extended', // Google (Bard / Gemini)
11
+ ];
12
+
13
+ export async function runAi(ctx) {
14
+ const { distDir } = ctx;
15
+ const results = [];
16
+
17
+ const robotsPath = path.join(distDir, 'robots.txt');
18
+ if (!fs.existsSync(robotsPath)) {
19
+ results.push(warn('robots.txt missing — cannot check AI bot rules'));
20
+ return results;
21
+ }
22
+
23
+ const robots = fs.readFileSync(robotsPath, 'utf8');
24
+ const mentioned = AI_BOTS.filter((bot) =>
25
+ new RegExp(`User-agent:\\s*${bot}`, 'i').test(robots)
26
+ );
27
+
28
+ if (mentioned.length === 0) {
29
+ results.push(info(
30
+ 'robots.txt has no explicit AI crawler rules',
31
+ 'Default = allow. Add `User-agent: GPTBot` (etc.) with Allow/Disallow if you want explicit control.'
32
+ ));
33
+ } else {
34
+ results.push(pass(`robots.txt mentions AI bots: ${mentioned.join(', ')}`));
35
+ }
36
+
37
+ // llms.txt presence checked in structure.mjs; here check content quality.
38
+ const llmsPath = path.join(distDir, 'llms.txt');
39
+ if (fs.existsSync(llmsPath)) {
40
+ const llms = fs.readFileSync(llmsPath, 'utf8');
41
+ const lines = llms.split('\n');
42
+ const headings = lines.filter((l) => l.startsWith('## ')).length;
43
+ results.push(
44
+ headings >= 3
45
+ ? pass(`llms.txt has ${headings} sections`)
46
+ : warn(`llms.txt has only ${headings} sections — consider expanding`)
47
+ );
48
+ }
49
+
50
+ return results;
51
+ }
@@ -0,0 +1,54 @@
1
+ import { pass, fail, warn, info } from '../util.mjs';
2
+
3
+ const PROVIDERS = [
4
+ { key: 'ga4', label: 'GA4', pattern: /G-[A-Z0-9]{6,}/, },
5
+ { key: 'gtm', label: 'GTM', pattern: /GTM-[A-Z0-9]{6,}/, },
6
+ { key: 'googleAds', label: 'Google Ads', pattern: /AW-\d{8,}/, },
7
+ { key: 'ahrefs', label: 'Ahrefs', pattern: /analytics\.ahrefs\.com\/analytics\.js/, },
8
+ { key: 'hotjar', label: 'Hotjar', pattern: /static\.hotjar\.com\/c\/hotjar-/, },
9
+ ];
10
+
11
+ const VERIFICATION = [
12
+ { key: 'googleSearchConsole', label: 'Google Search Console',
13
+ pattern: /<meta\s+name=["']google-site-verification["']/ },
14
+ { key: 'bingWebmaster', label: 'Bing Webmaster',
15
+ pattern: /<meta\s+name=["']msvalidate\.01["']/ },
16
+ ];
17
+
18
+ export async function runAnalytics(ctx) {
19
+ const { htmlFiles, config } = ctx;
20
+ const results = [];
21
+ const auditCfg = config.audit ?? {};
22
+ const analyticsCfg = auditCfg.analytics ?? {};
23
+ const verificationCfg = auditCfg.verification ?? {};
24
+
25
+ // Sample the homepage; analytics scripts should be on every page, but checking
26
+ // one is enough — they're usually injected via a layout component.
27
+ const home = htmlFiles.find((f) => f.relPath === 'index.html') ?? htmlFiles[0];
28
+
29
+ for (const provider of PROVIDERS) {
30
+ const present = provider.pattern.test(home.content);
31
+ const requirement = analyticsCfg[provider.key] ?? 'optional';
32
+
33
+ if (requirement === 'required') {
34
+ results.push(present ? pass(`${provider.label} installed`) : fail(`${provider.label} required but not found`));
35
+ } else if (requirement === 'banned') {
36
+ results.push(present ? fail(`${provider.label} found but banned in config`) : pass(`${provider.label} not present (as configured)`));
37
+ } else {
38
+ // optional
39
+ results.push(present ? pass(`${provider.label} installed`) : info(`${provider.label} not installed`));
40
+ }
41
+ }
42
+
43
+ for (const v of VERIFICATION) {
44
+ const present = v.pattern.test(home.content);
45
+ const required = verificationCfg[v.key] === true;
46
+ if (required) {
47
+ results.push(present ? pass(`${v.label} verification tag`) : fail(`${v.label} verification tag missing`));
48
+ } else {
49
+ results.push(present ? pass(`${v.label} verification tag`) : info(`${v.label} verification tag not present`));
50
+ }
51
+ }
52
+
53
+ return results;
54
+ }
@@ -0,0 +1,98 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import matter from 'gray-matter';
4
+ import { pass, fail, warn, info, summarize } from '../util.mjs';
5
+
6
+ export async function runBlogQuality(ctx) {
7
+ const { cwd, config, enabled } = ctx;
8
+ if (!enabled.blog) return [];
9
+
10
+ const blogCfg = config.blog && typeof config.blog === 'object' ? config.blog : {};
11
+ const contentDir = path.resolve(cwd, blogCfg.contentDir ?? 'src/content/blog');
12
+ const dateField = blogCfg.dateField ?? 'date';
13
+ const draftField = blogCfg.draftField ?? null;
14
+
15
+ if (!fs.existsSync(contentDir)) {
16
+ return [info('Blog content directory not found', `Looked at: ${contentDir}`)];
17
+ }
18
+
19
+ const results = [];
20
+ const auditCfg = config.audit ?? {};
21
+ const staleMonths = auditCfg.blog?.staleMonths ?? 12;
22
+
23
+ const files = fs.readdirSync(contentDir).filter((n) => n.endsWith('.md'));
24
+ if (files.length === 0) {
25
+ results.push(info('No blog posts found'));
26
+ return results;
27
+ }
28
+
29
+ const now = new Date();
30
+ const today = new Date(Date.UTC(now.getUTCFullYear(), now.getUTCMonth(), now.getUTCDate()));
31
+ const staleCutoff = new Date(today);
32
+ staleCutoff.setUTCMonth(staleCutoff.getUTCMonth() - staleMonths);
33
+
34
+ const stalePosts = [];
35
+ const missingDescription = [];
36
+ const missingImage = [];
37
+ const shortBody = [];
38
+ const dupTitle = new Map();
39
+ const dupDesc = new Map();
40
+
41
+ for (const fileName of files) {
42
+ const raw = fs.readFileSync(path.join(contentDir, fileName), 'utf8');
43
+ const { data, content } = matter(raw);
44
+ if (draftField && data[draftField]) continue;
45
+
46
+ const slug = fileName.replace(/\.md$/, '');
47
+ const dateVal = data[dateField];
48
+ const postDate = dateVal instanceof Date ? dateVal : (typeof dateVal === 'string' ? new Date(dateVal) : null);
49
+
50
+ if (postDate && postDate < staleCutoff) {
51
+ stalePosts.push(`${slug} (${postDate.toISOString().split('T')[0]})`);
52
+ }
53
+ if (!data.description || data.description.length < 50) {
54
+ missingDescription.push(slug);
55
+ }
56
+ if (!data.image) missingImage.push(slug);
57
+ if (content.length < 800) shortBody.push(`${slug} (${content.length} chars)`);
58
+
59
+ if (data.title) {
60
+ const list = dupTitle.get(data.title) ?? [];
61
+ list.push(slug);
62
+ dupTitle.set(data.title, list);
63
+ }
64
+ if (data.description) {
65
+ const list = dupDesc.get(data.description) ?? [];
66
+ list.push(slug);
67
+ dupDesc.set(data.description, list);
68
+ }
69
+ }
70
+
71
+ results.push(info(`${files.length} blog posts indexed`));
72
+ results.push(summarize(
73
+ `No posts older than ${staleMonths} months`,
74
+ stalePosts,
75
+ 'warn'
76
+ ));
77
+ results.push(summarize('Posts have a description (≥ 50 chars)', missingDescription));
78
+ results.push(summarize('Posts have an image', missingImage, 'warn'));
79
+ results.push(summarize('Posts have substantial body (≥ 800 chars)', shortBody, 'warn'));
80
+
81
+ const dupTitles = [...dupTitle.entries()].filter(([, ps]) => ps.length > 1);
82
+ results.push(
83
+ dupTitles.length === 0
84
+ ? pass('Blog post titles are unique')
85
+ : warn(`${dupTitles.length} duplicate post titles`,
86
+ dupTitles.map(([t, ps]) => `"${t.slice(0, 50)}" → ${ps.join(', ')}`))
87
+ );
88
+
89
+ const dupDescs = [...dupDesc.entries()].filter(([, ps]) => ps.length > 1);
90
+ results.push(
91
+ dupDescs.length === 0
92
+ ? pass('Blog post descriptions are unique')
93
+ : warn(`${dupDescs.length} duplicate post descriptions`,
94
+ dupDescs.map(([d, ps]) => `"${d.slice(0, 50)}..." → ${ps.join(', ')}`))
95
+ );
96
+
97
+ return results;
98
+ }
@@ -0,0 +1,87 @@
1
+ import { pass, fail, warn, info, summarize } from '../util.mjs';
2
+
3
+ const PLACEHOLDER_RE = /\b(lorem ipsum|TODO:|FIXME:|XXX:)\b/i;
4
+
5
+ // Anchor text that's bad for SEO + accessibility. "Click here" doesn't tell
6
+ // screen readers (or search engines) what's on the other side of the link.
7
+ const GENERIC_ANCHOR_RE = /^(click here|read more|learn more|here|more info|details|continue|see more|find out more)$/i;
8
+
9
+ export async function runContent(ctx) {
10
+ const { htmlFiles, webpSet } = ctx;
11
+ const results = [];
12
+
13
+ const placeholderHits = [];
14
+ const pngWhereWebp = [];
15
+ const imgsNoAlt = [];
16
+ const aTagsNoText = [];
17
+ const genericAnchors = [];
18
+
19
+ // Heading hierarchy: every page must have exactly one <h1>, and headings
20
+ // shouldn't skip levels (h1 → h3 without h2).
21
+ const noH1 = [];
22
+ const multiH1 = [];
23
+ const skippedHeading = [];
24
+
25
+ for (const f of htmlFiles) {
26
+ // Placeholders
27
+ if (PLACEHOLDER_RE.test(f.content)) placeholderHits.push(f.relPath);
28
+
29
+ // PNG/JPG references where WebP exists
30
+ for (const m of f.content.matchAll(/<img[^>]+src=["']([^"']+\.(?:png|jpe?g))["']/gi)) {
31
+ const src = m[1].replace(/^\//, '');
32
+ const noExt = src.replace(/\.(?:png|jpe?g)$/i, '');
33
+ if (webpSet.has(noExt)) pngWhereWebp.push(`${f.relPath}: ${m[1]}`);
34
+ }
35
+
36
+ // <img> without alt
37
+ for (const m of f.content.matchAll(/<img(?![^>]*\salt=)[^>]*>/gi)) {
38
+ if (/aria-hidden=["']true["']/.test(m[0])) continue;
39
+ const src = m[0].match(/src=["']([^"']+)["']/)?.[1] ?? '(no src)';
40
+ imgsNoAlt.push(`${f.relPath}: ${src}`);
41
+ }
42
+
43
+ // <a> tags: empty content + generic anchor text
44
+ for (const m of f.content.matchAll(/<a[^>]*>([\s\S]*?)<\/a>/gi)) {
45
+ const inner = m[1].replace(/<[^>]+>/g, '').trim();
46
+ const hasAriaLabel = /aria-label=["'][^"']+["']/.test(m[0]);
47
+ const hasTitle = /title=["'][^"']+["']/.test(m[0]);
48
+ const href = m[0].match(/href=["']([^"']+)["']/)?.[1] ?? '?';
49
+
50
+ if (!inner && !hasAriaLabel && !hasTitle) {
51
+ aTagsNoText.push(`${f.relPath}: <a href="${href}"> (empty)`);
52
+ } else if (GENERIC_ANCHOR_RE.test(inner) && !hasAriaLabel) {
53
+ genericAnchors.push(`${f.relPath}: "${inner}" → ${href}`);
54
+ }
55
+ }
56
+
57
+ // Heading hierarchy
58
+ const headings = [...f.content.matchAll(/<h([1-6])(?:\s[^>]*)?>/gi)].map((m) => parseInt(m[1], 10));
59
+ const h1Count = headings.filter((h) => h === 1).length;
60
+ if (h1Count === 0) noH1.push(f.relPath);
61
+ if (h1Count > 1) multiH1.push(`${f.relPath} (${h1Count} <h1>s)`);
62
+
63
+ let prev = 0;
64
+ for (const h of headings) {
65
+ if (prev > 0 && h - prev > 1) {
66
+ skippedHeading.push(`${f.relPath}: h${prev} → h${h}`);
67
+ break;
68
+ }
69
+ prev = h;
70
+ }
71
+ }
72
+
73
+ results.push(summarize('No placeholder text (Lorem/TODO/FIXME)', placeholderHits));
74
+ results.push(summarize('No raw PNG/JPG where WebP exists', pngWhereWebp));
75
+ results.push(summarize('All images have alt text', dedupe(imgsNoAlt), 'warn'));
76
+ results.push(summarize('All links have accessible text', dedupe(aTagsNoText), 'warn'));
77
+ results.push(summarize('No generic anchor text ("click here" etc.)', dedupe(genericAnchors), 'warn'));
78
+ results.push(summarize('Every page has exactly one <h1>', noH1));
79
+ results.push(summarize('No multiple <h1>s', multiH1, 'warn'));
80
+ results.push(summarize('Heading hierarchy is sequential', dedupe(skippedHeading), 'warn'));
81
+
82
+ return results;
83
+ }
84
+
85
+ function dedupe(arr) {
86
+ return Array.from(new Set(arr)).slice(0, 20);
87
+ }