@ibalzam/codejitsu-core 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/codejitsu.mjs +23 -3
- package/modules/audit/src/a11y/runner.mjs +146 -0
- package/modules/audit/src/ai/runner.mjs +176 -0
- package/modules/audit/src/groups/ai-discoverability.mjs +51 -0
- package/modules/audit/src/groups/analytics.mjs +54 -0
- package/modules/audit/src/groups/blog-quality.mjs +98 -0
- package/modules/audit/src/groups/content.mjs +87 -0
- package/modules/audit/src/groups/forms.mjs +112 -0
- package/modules/audit/src/groups/links.mjs +58 -0
- package/modules/audit/src/groups/performance.mjs +117 -0
- package/modules/audit/src/groups/seo.mjs +178 -0
- package/modules/audit/src/groups/structure.mjs +105 -0
- package/modules/audit/src/http/runner.mjs +185 -0
- package/modules/audit/src/run.mjs +168 -0
- package/modules/audit/src/util.mjs +72 -0
- package/modules/config/src/types.d.ts +21 -0
- package/modules/config/src/types.ts +23 -0
- package/modules/llms/src/generate.mjs +22 -5
- package/modules/rehype/CLAUDE.md +64 -0
- package/modules/rehype/src/trailing-slash.mjs +88 -0
- package/package.json +2 -1
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import { pass, fail, warn, info, summarize } from '../util.mjs';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* HTTP-tier audit. Hits a live URL (production or staging) and verifies:
|
|
5
|
+
* - HTTPS + HTTP→HTTPS redirect
|
|
6
|
+
* - Security headers
|
|
7
|
+
* - 404 behavior (custom styled page, correct status)
|
|
8
|
+
* - Broken internal links (bounded same-origin crawl)
|
|
9
|
+
*
|
|
10
|
+
* Uses Node's native fetch (no deps). Caller supplies the base URL.
|
|
11
|
+
*/
|
|
12
|
+
export async function runHttp(ctx) {
|
|
13
|
+
const { liveUrl } = ctx;
|
|
14
|
+
if (!liveUrl) return [];
|
|
15
|
+
|
|
16
|
+
const results = [];
|
|
17
|
+
const base = new URL(liveUrl);
|
|
18
|
+
const origin = base.origin;
|
|
19
|
+
|
|
20
|
+
// ─── SSL / HTTP→HTTPS ──────────────────────────────────────────────────
|
|
21
|
+
if (base.protocol !== 'https:') {
|
|
22
|
+
results.push(fail(`Base URL is not HTTPS: ${liveUrl}`));
|
|
23
|
+
} else {
|
|
24
|
+
results.push(pass('Base URL is HTTPS'));
|
|
25
|
+
const httpUrl = `http://${base.host}${base.pathname}`;
|
|
26
|
+
try {
|
|
27
|
+
const r = await fetchWithTimeout(httpUrl, { redirect: 'manual' });
|
|
28
|
+
if (r.status >= 300 && r.status < 400) {
|
|
29
|
+
const location = r.headers.get('location') ?? '';
|
|
30
|
+
if (location.startsWith('https://')) {
|
|
31
|
+
results.push(pass(`HTTP → HTTPS redirect (${r.status} to ${location})`));
|
|
32
|
+
} else {
|
|
33
|
+
results.push(warn(`HTTP redirects but not to HTTPS`, `${r.status} → ${location}`));
|
|
34
|
+
}
|
|
35
|
+
} else {
|
|
36
|
+
results.push(fail(`HTTP did not redirect (status ${r.status})`));
|
|
37
|
+
}
|
|
38
|
+
} catch (err) {
|
|
39
|
+
results.push(warn(`Could not test HTTP→HTTPS redirect`, err.message));
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ─── Security headers ──────────────────────────────────────────────────
|
|
44
|
+
let homeResponse;
|
|
45
|
+
try {
|
|
46
|
+
homeResponse = await fetchWithTimeout(origin + '/');
|
|
47
|
+
} catch (err) {
|
|
48
|
+
results.push(fail(`Could not fetch ${origin}/`, err.message));
|
|
49
|
+
return results;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (!homeResponse.ok) {
|
|
53
|
+
results.push(fail(`Homepage returned ${homeResponse.status}`));
|
|
54
|
+
} else {
|
|
55
|
+
results.push(pass(`Homepage returns ${homeResponse.status} ${homeResponse.statusText}`));
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
const headers = homeResponse.headers;
|
|
59
|
+
const securityHeaders = [
|
|
60
|
+
{ key: 'strict-transport-security', label: 'HSTS', severity: 'fail' },
|
|
61
|
+
{ key: 'content-security-policy', label: 'Content-Security-Policy', severity: 'warn' },
|
|
62
|
+
{ key: 'x-frame-options', label: 'X-Frame-Options', severity: 'warn' },
|
|
63
|
+
{ key: 'x-content-type-options', label: 'X-Content-Type-Options (nosniff)', severity: 'warn' },
|
|
64
|
+
{ key: 'referrer-policy', label: 'Referrer-Policy', severity: 'warn' },
|
|
65
|
+
{ key: 'permissions-policy', label: 'Permissions-Policy', severity: 'info' },
|
|
66
|
+
];
|
|
67
|
+
|
|
68
|
+
for (const h of securityHeaders) {
|
|
69
|
+
const value = headers.get(h.key);
|
|
70
|
+
if (value) {
|
|
71
|
+
results.push(pass(`${h.label}: ${value.slice(0, 70)}${value.length > 70 ? '…' : ''}`));
|
|
72
|
+
} else if (h.severity === 'fail') {
|
|
73
|
+
results.push(fail(`${h.label} header missing`));
|
|
74
|
+
} else if (h.severity === 'warn') {
|
|
75
|
+
results.push(warn(`${h.label} header missing`));
|
|
76
|
+
} else {
|
|
77
|
+
results.push(info(`${h.label} header missing`));
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// ─── 404 behavior ──────────────────────────────────────────────────────
|
|
82
|
+
const probe = `${origin}/__codejitsu_audit_probe_${Date.now()}/`;
|
|
83
|
+
try {
|
|
84
|
+
const r = await fetchWithTimeout(probe);
|
|
85
|
+
if (r.status === 404) {
|
|
86
|
+
results.push(pass('Unknown URL returns 404'));
|
|
87
|
+
const body = await r.text();
|
|
88
|
+
const branded =
|
|
89
|
+
/pearl|workzen|veteran|profix|codejitsu/i.test(body) ||
|
|
90
|
+
body.includes('<head>') && body.length > 1000;
|
|
91
|
+
results.push(branded
|
|
92
|
+
? pass('404 page is styled/branded')
|
|
93
|
+
: warn('404 page returned but may be unstyled', `Body size: ${body.length} bytes`));
|
|
94
|
+
} else {
|
|
95
|
+
results.push(fail(`Unknown URL returned ${r.status} (expected 404)`));
|
|
96
|
+
}
|
|
97
|
+
} catch (err) {
|
|
98
|
+
results.push(warn('Could not test 404 behavior', err.message));
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// ─── Broken-link crawl (bounded) ───────────────────────────────────────
|
|
102
|
+
const crawlResults = await crawl(origin, 30);
|
|
103
|
+
if (crawlResults.broken.length === 0) {
|
|
104
|
+
results.push(pass(`Crawled ${crawlResults.visited} pages — no broken links`));
|
|
105
|
+
} else {
|
|
106
|
+
results.push(fail(
|
|
107
|
+
`${crawlResults.broken.length} broken links (crawled ${crawlResults.visited} pages)`,
|
|
108
|
+
crawlResults.broken
|
|
109
|
+
));
|
|
110
|
+
}
|
|
111
|
+
if (crawlResults.redirected.length > 0) {
|
|
112
|
+
results.push(warn(
|
|
113
|
+
`${crawlResults.redirected.length} internal redirects (prefer direct links)`,
|
|
114
|
+
crawlResults.redirected.slice(0, 5)
|
|
115
|
+
));
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return results;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
async function fetchWithTimeout(url, init = {}, timeoutMs = 10_000) {
|
|
122
|
+
const controller = new AbortController();
|
|
123
|
+
const t = setTimeout(() => controller.abort(), timeoutMs);
|
|
124
|
+
try {
|
|
125
|
+
return await fetch(url, { ...init, signal: controller.signal, headers: { 'User-Agent': 'codejitsu-audit/0.5' } });
|
|
126
|
+
} finally {
|
|
127
|
+
clearTimeout(t);
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Bounded same-origin crawl. Starts at `origin/`, follows internal links
|
|
133
|
+
* (extracted from <a href>), stops at `max` pages. Returns broken links
|
|
134
|
+
* (4xx/5xx) and redirects encountered along the way.
|
|
135
|
+
*/
|
|
136
|
+
async function crawl(origin, max) {
|
|
137
|
+
const visited = new Set();
|
|
138
|
+
const queue = [origin + '/'];
|
|
139
|
+
const broken = [];
|
|
140
|
+
const redirected = [];
|
|
141
|
+
|
|
142
|
+
while (queue.length > 0 && visited.size < max) {
|
|
143
|
+
const url = queue.shift();
|
|
144
|
+
if (visited.has(url)) continue;
|
|
145
|
+
visited.add(url);
|
|
146
|
+
|
|
147
|
+
let response;
|
|
148
|
+
try {
|
|
149
|
+
response = await fetchWithTimeout(url, { redirect: 'manual' });
|
|
150
|
+
} catch (err) {
|
|
151
|
+
broken.push(`${url}: ${err.message}`);
|
|
152
|
+
continue;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (response.status >= 400) {
|
|
156
|
+
broken.push(`${url}: ${response.status}`);
|
|
157
|
+
continue;
|
|
158
|
+
}
|
|
159
|
+
if (response.status >= 300 && response.status < 400) {
|
|
160
|
+
const location = response.headers.get('location') ?? '';
|
|
161
|
+
redirected.push(`${url} → ${response.status} → ${location}`);
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
const contentType = response.headers.get('content-type') ?? '';
|
|
166
|
+
if (!contentType.includes('text/html')) continue;
|
|
167
|
+
|
|
168
|
+
const body = await response.text();
|
|
169
|
+
for (const m of body.matchAll(/<a[^>]+href=["']([^"']+)["']/gi)) {
|
|
170
|
+
try {
|
|
171
|
+
const absolute = new URL(m[1], url).toString();
|
|
172
|
+
if (!absolute.startsWith(origin)) continue; // external
|
|
173
|
+
if (absolute.includes('#')) continue; // skip anchors
|
|
174
|
+
if (/\.(?:webp|png|jpe?g|svg|pdf|woff2?|ico|css|js)(?:\?|$)/i.test(absolute)) continue;
|
|
175
|
+
if (!visited.has(absolute) && queue.length + visited.size < max) {
|
|
176
|
+
queue.push(absolute);
|
|
177
|
+
}
|
|
178
|
+
} catch {
|
|
179
|
+
// skip invalid URLs
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
return { visited: visited.size, broken, redirected };
|
|
185
|
+
}
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { loadConfig, isModuleEnabled } from '../../config/src/load.mjs';
|
|
4
|
+
import { c } from '../../cli/src/format.mjs';
|
|
5
|
+
import { runStructure } from './groups/structure.mjs';
|
|
6
|
+
import { runLinks } from './groups/links.mjs';
|
|
7
|
+
import { runSeo } from './groups/seo.mjs';
|
|
8
|
+
import { runAi } from './groups/ai-discoverability.mjs';
|
|
9
|
+
import { runAnalytics } from './groups/analytics.mjs';
|
|
10
|
+
import { runForms } from './groups/forms.mjs';
|
|
11
|
+
import { runContent } from './groups/content.mjs';
|
|
12
|
+
import { runPerformance } from './groups/performance.mjs';
|
|
13
|
+
import { runBlogQuality } from './groups/blog-quality.mjs';
|
|
14
|
+
import { runHttp } from './http/runner.mjs';
|
|
15
|
+
import { runA11y } from './a11y/runner.mjs';
|
|
16
|
+
import { runAi as runAiTier } from './ai/runner.mjs';
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Pre-delivery audit. Static (against dist/) + optional tiers:
|
|
20
|
+
* --live <url> Hits the URL for security headers, redirects, 404, broken links.
|
|
21
|
+
* --a11y Runs axe-core against --live URL (requires @axe-core/cli).
|
|
22
|
+
*
|
|
23
|
+
* Reads codejitsu.config for module enablement + audit preferences.
|
|
24
|
+
*
|
|
25
|
+
* @param {object} [opts]
|
|
26
|
+
* @param {string} [opts.liveUrl]
|
|
27
|
+
* @param {boolean} [opts.a11y]
|
|
28
|
+
*/
|
|
29
|
+
export async function runAudit(opts = {}) {
|
|
30
|
+
const cwd = process.cwd();
|
|
31
|
+
const distDir = path.join(cwd, 'dist');
|
|
32
|
+
|
|
33
|
+
let config;
|
|
34
|
+
try {
|
|
35
|
+
config = await loadConfig(cwd);
|
|
36
|
+
} catch (err) {
|
|
37
|
+
console.error(c.red('✗ No codejitsu.config found.'));
|
|
38
|
+
console.error(' Run `codejitsu audit` from a Codejitsu site root.');
|
|
39
|
+
process.exit(1);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
if (!fs.existsSync(distDir)) {
|
|
43
|
+
console.error(c.red('✗ No dist/ directory.'));
|
|
44
|
+
console.error(' Run `npm run build` first.');
|
|
45
|
+
process.exit(1);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Index HTML files once; pass to all check groups.
|
|
49
|
+
const htmlFiles = collectHtmlFiles(distDir).map((file) => ({
|
|
50
|
+
relPath: path.relative(distDir, file),
|
|
51
|
+
fullPath: file,
|
|
52
|
+
content: fs.readFileSync(file, 'utf8'),
|
|
53
|
+
}));
|
|
54
|
+
|
|
55
|
+
// Index public assets for cross-reference checks.
|
|
56
|
+
const webpSet = new Set();
|
|
57
|
+
collectAssets(distDir).forEach((p) => {
|
|
58
|
+
if (p.toLowerCase().endsWith('.webp')) {
|
|
59
|
+
webpSet.add(path.relative(distDir, p).replace(/\.webp$/i, ''));
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
const ctx = {
|
|
64
|
+
cwd,
|
|
65
|
+
distDir,
|
|
66
|
+
config,
|
|
67
|
+
htmlFiles,
|
|
68
|
+
webpSet,
|
|
69
|
+
liveUrl: opts.liveUrl ?? null,
|
|
70
|
+
a11y: opts.a11y ?? false,
|
|
71
|
+
ai: opts.ai ?? false,
|
|
72
|
+
enabled: {
|
|
73
|
+
blog: isModuleEnabled(config, 'blog'),
|
|
74
|
+
seo: isModuleEnabled(config, 'seo'),
|
|
75
|
+
images: isModuleEnabled(config, 'images'),
|
|
76
|
+
llms: isModuleEnabled(config, 'llms'),
|
|
77
|
+
deploy: isModuleEnabled(config, 'deploy'),
|
|
78
|
+
},
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const groups = [
|
|
82
|
+
{ name: 'Structure & Build', run: runStructure },
|
|
83
|
+
{ name: 'Links & URLs', run: runLinks },
|
|
84
|
+
{ name: 'SEO', run: runSeo },
|
|
85
|
+
{ name: 'AI Discoverability', run: runAi },
|
|
86
|
+
{ name: 'Analytics & Tags', run: runAnalytics },
|
|
87
|
+
{ name: 'Forms', run: runForms },
|
|
88
|
+
{ name: 'Content & A11y', run: runContent },
|
|
89
|
+
{ name: 'Performance', run: runPerformance },
|
|
90
|
+
{ name: 'Blog Quality', run: runBlogQuality },
|
|
91
|
+
];
|
|
92
|
+
if (ctx.liveUrl) {
|
|
93
|
+
groups.push({ name: `Live HTTP (${ctx.liveUrl})`, run: runHttp });
|
|
94
|
+
}
|
|
95
|
+
if (ctx.a11y) {
|
|
96
|
+
groups.push({ name: 'Accessibility (axe-core WCAG 2.1 AA)', run: runA11y });
|
|
97
|
+
}
|
|
98
|
+
if (ctx.ai) {
|
|
99
|
+
groups.push({ name: 'AI content review (claude -p)', run: runAiTier });
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
console.log(c.bold(`\nCodejitsu Audit · ${config.site.name} (${htmlFiles.length} pages)\n`));
|
|
103
|
+
|
|
104
|
+
let totals = { pass: 0, warn: 0, fail: 0, info: 0 };
|
|
105
|
+
|
|
106
|
+
for (const group of groups) {
|
|
107
|
+
const results = await group.run(ctx);
|
|
108
|
+
if (!results || results.length === 0) continue;
|
|
109
|
+
console.log(c.bold(`◉ ${group.name}`));
|
|
110
|
+
for (const r of results) {
|
|
111
|
+
printResult(r);
|
|
112
|
+
totals[r.status] = (totals[r.status] ?? 0) + 1;
|
|
113
|
+
}
|
|
114
|
+
console.log('');
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const summary =
|
|
118
|
+
`${c.green(totals.pass + ' pass')} ` +
|
|
119
|
+
`${c.yellow(totals.warn + ' warn')} ` +
|
|
120
|
+
`${c.red(totals.fail + ' fail')}` +
|
|
121
|
+
(totals.info ? ` ${c.gray(totals.info + ' info')}` : '');
|
|
122
|
+
console.log(summary);
|
|
123
|
+
|
|
124
|
+
if (totals.fail > 0) process.exit(1);
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function printResult(r) {
|
|
128
|
+
const icon =
|
|
129
|
+
r.status === 'pass' ? c.green('✓') :
|
|
130
|
+
r.status === 'warn' ? c.yellow('!') :
|
|
131
|
+
r.status === 'info' ? c.gray('i') :
|
|
132
|
+
c.red('✗');
|
|
133
|
+
console.log(` ${icon} ${r.label}`);
|
|
134
|
+
if (r.detail) {
|
|
135
|
+
const lines = Array.isArray(r.detail) ? r.detail : [r.detail];
|
|
136
|
+
for (const line of lines.slice(0, 5)) {
|
|
137
|
+
console.log(` ${c.gray(line)}`);
|
|
138
|
+
}
|
|
139
|
+
if (lines.length > 5) {
|
|
140
|
+
console.log(` ${c.gray(`… (+${lines.length - 5} more)`)}`);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function collectHtmlFiles(distDir) {
|
|
146
|
+
const out = [];
|
|
147
|
+
(function walk(dir) {
|
|
148
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
149
|
+
const full = path.join(dir, entry.name);
|
|
150
|
+
if (entry.isDirectory()) walk(full);
|
|
151
|
+
else if (entry.name.endsWith('.html')) out.push(full);
|
|
152
|
+
}
|
|
153
|
+
})(distDir);
|
|
154
|
+
return out;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function collectAssets(distDir) {
|
|
158
|
+
const out = [];
|
|
159
|
+
(function walk(dir) {
|
|
160
|
+
if (!fs.existsSync(dir)) return;
|
|
161
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
162
|
+
const full = path.join(dir, entry.name);
|
|
163
|
+
if (entry.isDirectory()) walk(full);
|
|
164
|
+
else if (entry.isFile()) out.push(full);
|
|
165
|
+
}
|
|
166
|
+
})(distDir);
|
|
167
|
+
return out;
|
|
168
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
// Shared helpers used across audit check groups.
|
|
2
|
+
|
|
3
|
+
export function pass(label, detail) {
|
|
4
|
+
return { status: 'pass', label, detail };
|
|
5
|
+
}
|
|
6
|
+
export function fail(label, detail) {
|
|
7
|
+
return { status: 'fail', label, detail };
|
|
8
|
+
}
|
|
9
|
+
export function warn(label, detail) {
|
|
10
|
+
return { status: 'warn', label, detail };
|
|
11
|
+
}
|
|
12
|
+
export function info(label, detail) {
|
|
13
|
+
return { status: 'info', label, detail };
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Collapses a list of per-page issues into a single check result.
|
|
18
|
+
* If `issues` is empty → pass. Otherwise fail/warn with `issues.length` count
|
|
19
|
+
* and the first few examples.
|
|
20
|
+
*/
|
|
21
|
+
export function summarize(label, issues, severity = 'fail') {
|
|
22
|
+
if (issues.length === 0) return pass(label);
|
|
23
|
+
const result =
|
|
24
|
+
severity === 'fail' ? fail :
|
|
25
|
+
severity === 'warn' ? warn :
|
|
26
|
+
info;
|
|
27
|
+
return result(`${label} (${issues.length})`, issues);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Extract <title> innerHTML. */
|
|
31
|
+
export function getTitle(html) {
|
|
32
|
+
return html.match(/<title>([^<]*)<\/title>/)?.[1] ?? null;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/** Get content of a `<meta name|property="X" content="...">` tag. */
|
|
36
|
+
export function getMeta(html, key) {
|
|
37
|
+
const re = new RegExp(
|
|
38
|
+
`<meta\\s+(?:name|property)=["']${escapeRegex(key)}["']\\s+content=["']([^"']*)["']`,
|
|
39
|
+
'i'
|
|
40
|
+
);
|
|
41
|
+
return html.match(re)?.[1] ?? null;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/** Get href of a `<link rel="X">` tag. */
|
|
45
|
+
export function getLinkHref(html, rel) {
|
|
46
|
+
const re = new RegExp(
|
|
47
|
+
`<link\\s+rel=["']${escapeRegex(rel)}["']\\s+href=["']([^"']*)["']`,
|
|
48
|
+
'i'
|
|
49
|
+
);
|
|
50
|
+
return html.match(re)?.[1] ?? null;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export function escapeRegex(s) {
|
|
54
|
+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** Match all `<a href="...">` hrefs in HTML. */
|
|
58
|
+
export function* anchorHrefs(html) {
|
|
59
|
+
const re = /<a[^>]+href=["']([^"']+)["'][^>]*>/gi;
|
|
60
|
+
let m;
|
|
61
|
+
while ((m = re.exec(html)) !== null) {
|
|
62
|
+
yield { href: m[1], full: m[0] };
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export function isExternal(href, siteOrigin) {
|
|
67
|
+
if (href.startsWith('http://') || href.startsWith('https://')) {
|
|
68
|
+
return !href.startsWith(siteOrigin);
|
|
69
|
+
}
|
|
70
|
+
if (href.startsWith('//')) return true;
|
|
71
|
+
return false;
|
|
72
|
+
}
|
|
@@ -14,7 +14,28 @@ export interface CodejitsuConfig {
|
|
|
14
14
|
images?: ImagesConfig | false;
|
|
15
15
|
llms?: LlmsConfig | false;
|
|
16
16
|
deploy?: DeployConfig | false;
|
|
17
|
+
audit?: AuditConfig;
|
|
17
18
|
}
|
|
19
|
+
export interface AuditConfig {
|
|
20
|
+
/** Per-provider requirement. 'optional' = pass either way; 'required' = fail if absent; 'banned' = fail if present. */
|
|
21
|
+
analytics?: {
|
|
22
|
+
ga4?: AuditRequirement;
|
|
23
|
+
gtm?: AuditRequirement;
|
|
24
|
+
googleAds?: AuditRequirement;
|
|
25
|
+
ahrefs?: AuditRequirement;
|
|
26
|
+
hotjar?: AuditRequirement;
|
|
27
|
+
};
|
|
28
|
+
/** Site verification meta tags. true = required, false/missing = optional. */
|
|
29
|
+
verification?: {
|
|
30
|
+
googleSearchConsole?: boolean;
|
|
31
|
+
bingWebmaster?: boolean;
|
|
32
|
+
};
|
|
33
|
+
forms?: {
|
|
34
|
+
requireSpamProtection?: boolean;
|
|
35
|
+
requireConsent?: boolean;
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
export type AuditRequirement = 'required' | 'optional' | 'banned';
|
|
18
39
|
export interface SiteConfig {
|
|
19
40
|
/** Absolute site URL, no trailing slash. e.g. 'https://example.com'. */
|
|
20
41
|
url: string;
|
|
@@ -15,8 +15,31 @@ export interface CodejitsuConfig {
|
|
|
15
15
|
images?: ImagesConfig | false;
|
|
16
16
|
llms?: LlmsConfig | false;
|
|
17
17
|
deploy?: DeployConfig | false;
|
|
18
|
+
audit?: AuditConfig;
|
|
18
19
|
}
|
|
19
20
|
|
|
21
|
+
export interface AuditConfig {
|
|
22
|
+
/** Per-provider requirement. 'optional' = pass either way; 'required' = fail if absent; 'banned' = fail if present. */
|
|
23
|
+
analytics?: {
|
|
24
|
+
ga4?: AuditRequirement;
|
|
25
|
+
gtm?: AuditRequirement;
|
|
26
|
+
googleAds?: AuditRequirement;
|
|
27
|
+
ahrefs?: AuditRequirement;
|
|
28
|
+
hotjar?: AuditRequirement;
|
|
29
|
+
};
|
|
30
|
+
/** Site verification meta tags. true = required, false/missing = optional. */
|
|
31
|
+
verification?: {
|
|
32
|
+
googleSearchConsole?: boolean;
|
|
33
|
+
bingWebmaster?: boolean;
|
|
34
|
+
};
|
|
35
|
+
forms?: {
|
|
36
|
+
requireSpamProtection?: boolean;
|
|
37
|
+
requireConsent?: boolean;
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export type AuditRequirement = 'required' | 'optional' | 'banned';
|
|
42
|
+
|
|
20
43
|
export interface SiteConfig {
|
|
21
44
|
/** Absolute site URL, no trailing slash. e.g. 'https://example.com'. */
|
|
22
45
|
url: string;
|
|
@@ -198,6 +198,12 @@ async function generateContentScan({ config, cwd }) {
|
|
|
198
198
|
const llms = config.llms;
|
|
199
199
|
const scan = llms.contentScan ?? {};
|
|
200
200
|
|
|
201
|
+
// Date + draft field names come from the blog module config; CC schemas like
|
|
202
|
+
// pearl's use `pubDate` + `draft`, while simpler sites use `date` (+ no draft).
|
|
203
|
+
const blogCfg = config.blog && typeof config.blog === 'object' ? config.blog : {};
|
|
204
|
+
const dateField = blogCfg.dateField ?? 'date';
|
|
205
|
+
const draftField = blogCfg.draftField ?? null;
|
|
206
|
+
|
|
201
207
|
const servicesDir = scan.servicesDir ? path.resolve(cwd, scan.servicesDir) : null;
|
|
202
208
|
const locationsDir = scan.locationsDir ? path.resolve(cwd, scan.locationsDir) : null;
|
|
203
209
|
const pagesDir = scan.pagesDir ? path.resolve(cwd, scan.pagesDir) : null;
|
|
@@ -205,10 +211,7 @@ async function generateContentScan({ config, cwd }) {
|
|
|
205
211
|
|
|
206
212
|
const services = readContentDir(servicesDir);
|
|
207
213
|
const locations = readContentDir(locationsDir);
|
|
208
|
-
const blogPosts = readBlogPosts(blogDir,
|
|
209
|
-
// Also try 'date' field for fallback
|
|
210
|
-
blogDir && readBlogPosts(blogDir, 'date', 'draft').filter((p) => !p.pubDate) || []
|
|
211
|
-
);
|
|
214
|
+
const blogPosts = readBlogPosts(blogDir, dateField, draftField);
|
|
212
215
|
const pages = pagesDir ? collectStaticPages(pagesDir) : [];
|
|
213
216
|
|
|
214
217
|
const dynamicRoutes = scan.dynamicRoutes ?? [];
|
|
@@ -239,6 +242,7 @@ async function generateContentScan({ config, cwd }) {
|
|
|
239
242
|
business: site.business,
|
|
240
243
|
services,
|
|
241
244
|
locations,
|
|
245
|
+
blogPosts: blogPosts.slice(0, llms.blogFullLimit ?? 20),
|
|
242
246
|
aiGuidance: llms.aiGuidance,
|
|
243
247
|
today: isoDate(),
|
|
244
248
|
});
|
|
@@ -372,7 +376,7 @@ function renderContentScanConcise({ siteUrl, siteName, tagline, about, business,
|
|
|
372
376
|
return lines.join('\n') + '\n';
|
|
373
377
|
}
|
|
374
378
|
|
|
375
|
-
function renderContentScanFull({ siteUrl, siteName, tagline, about, business, services, locations, aiGuidance, today }) {
|
|
379
|
+
function renderContentScanFull({ siteUrl, siteName, tagline, about, business, services, locations, blogPosts, aiGuidance, today }) {
|
|
376
380
|
const lines = [];
|
|
377
381
|
lines.push(`# ${siteName} — Full Reference`);
|
|
378
382
|
lines.push(`Last Updated: ${today}`, '');
|
|
@@ -431,6 +435,19 @@ function renderContentScanFull({ siteUrl, siteName, tagline, about, business, se
|
|
|
431
435
|
lines.push('', '---', '');
|
|
432
436
|
}
|
|
433
437
|
|
|
438
|
+
if (blogPosts && blogPosts.length) {
|
|
439
|
+
lines.push('## Blog Posts', '');
|
|
440
|
+
for (const post of blogPosts) {
|
|
441
|
+
lines.push(`### ${post.title}`, '');
|
|
442
|
+
if (post.date) lines.push(`**Published**: ${post.date}`);
|
|
443
|
+
if (post.author) lines.push(`**Author**: ${post.author}`);
|
|
444
|
+
if (post.tags?.length) lines.push(`**Tags**: ${post.tags.join(', ')}`);
|
|
445
|
+
lines.push(`**URL**: ${siteUrl}/blog/${post.slug}/`, '');
|
|
446
|
+
if (post.description) lines.push(post.description, '');
|
|
447
|
+
lines.push('---', '');
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
|
|
434
451
|
lines.push(`## Optional`, '', `- Sitemap: ${siteUrl}/sitemap-index.xml`, '');
|
|
435
452
|
if (aiGuidance) lines.push('## For AI Assistants', '', aiGuidance, '');
|
|
436
453
|
return lines.join('\n') + '\n';
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Rehype module — instructions for Claude
|
|
2
|
+
|
|
3
|
+
When the user asks to **fix trailing-slash bugs in markdown content** (or pre-empt them across a Codejitsu site), wire up `rehypeTrailingSlash`.
|
|
4
|
+
|
|
5
|
+
## What this module provides
|
|
6
|
+
|
|
7
|
+
A single rehype plugin: `trailingSlash`. Runs during Astro's markdown→HTML conversion. Walks the HTML AST and rewrites internal `<a href="/foo">` to `<a href="/foo/">` (or vice versa with `policy: 'never'`).
|
|
8
|
+
|
|
9
|
+
**Why this exists:** Astro's `trailingSlash: 'always'` config covers route resolution and `Astro.url.pathname` but does NOT touch href strings written by humans in markdown or `.astro` files. This plugin closes that gap for markdown-rendered HTML.
|
|
10
|
+
|
|
11
|
+
It does **not** affect href strings inside `.astro` component source (Astro doesn't run rehype on those). Use the audit to catch those.
|
|
12
|
+
|
|
13
|
+
## Wiring it into a site
|
|
14
|
+
|
|
15
|
+
```ts
|
|
16
|
+
// astro.config.mjs
|
|
17
|
+
import { defineConfig } from 'astro/config';
|
|
18
|
+
import trailingSlash from '@ibalzam/codejitsu-core/rehype/trailing-slash';
|
|
19
|
+
|
|
20
|
+
export default defineConfig({
|
|
21
|
+
markdown: {
|
|
22
|
+
rehypePlugins: [trailingSlash],
|
|
23
|
+
},
|
|
24
|
+
});
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
With explicit options:
|
|
28
|
+
|
|
29
|
+
```ts
|
|
30
|
+
rehypePlugins: [
|
|
31
|
+
[trailingSlash, { policy: 'always' }],
|
|
32
|
+
],
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## What it does NOT touch
|
|
36
|
+
|
|
37
|
+
- External URLs (`http://`, `https://`, `//`)
|
|
38
|
+
- `mailto:`, `tel:`, `javascript:`
|
|
39
|
+
- Anchor-only links (`#section`)
|
|
40
|
+
- Paths ending in a file extension (`.pdf`, `.html`, `.webp`, etc.)
|
|
41
|
+
- Root path (`/`)
|
|
42
|
+
|
|
43
|
+
## What it DOES touch
|
|
44
|
+
|
|
45
|
+
- `<a href="/foo">` → `<a href="/foo/">`
|
|
46
|
+
- `<a href="/foo?bar=1">` → `<a href="/foo/?bar=1">`
|
|
47
|
+
- `<a href="/foo#section">` → `<a href="/foo/#section">`
|
|
48
|
+
|
|
49
|
+
Preserves query strings and fragments. Path-only modification.
|
|
50
|
+
|
|
51
|
+
## What must NOT be done
|
|
52
|
+
|
|
53
|
+
- **Don't apply this to `.astro` component files** — the plugin runs on markdown rehype, not Astro components. If a `<a href="/foo">` lives in a `.astro` file, the plugin can't see it.
|
|
54
|
+
- **Don't set `policy: 'never'` if `astro.config` has `trailingSlash: 'always'`** — they'd contradict each other. The audit will flag the inconsistency.
|
|
55
|
+
- **Don't run this with `policy: 'preserve'` and expect anything to change** — that mode is a no-op (registered as a placeholder for symmetry).
|
|
56
|
+
|
|
57
|
+
## Verify after wiring
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
npm run build
|
|
61
|
+
npx codejitsu audit
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
The audit's "All internal links end with /" check should now report 0 markdown-level offenders. Component-level offenders (in `.astro` files) still surface — those must be fixed by hand.
|