designlang 9.0.0 → 10.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/launch.json +11 -0
- package/CHANGELOG.md +62 -0
- package/README.md +16 -1
- package/bin/design-extract.js +107 -1
- package/package.json +2 -2
- package/src/classifiers/smart.js +130 -0
- package/src/extractors/component-library.js +193 -0
- package/src/extractors/component-screenshots.js +161 -0
- package/src/extractors/imagery-style.js +131 -0
- package/src/extractors/logo.js +142 -0
- package/src/extractors/material-language.js +152 -0
- package/src/extractors/page-intent.js +172 -0
- package/src/extractors/section-roles.js +135 -0
- package/src/formatters/markdown.js +109 -0
- package/src/formatters/prompt-pack.js +214 -0
- package/src/index.js +27 -0
- package/src/multipage.js +233 -0
package/src/index.js
CHANGED
|
@@ -28,6 +28,11 @@ import { extractInteractionStates } from './extractors/interaction-states.js';
|
|
|
28
28
|
import { extractMotion } from './extractors/motion.js';
|
|
29
29
|
import { extractComponentAnatomy, formatAnatomyStubs } from './extractors/component-anatomy.js';
|
|
30
30
|
import { extractVoice } from './extractors/voice.js';
|
|
31
|
+
import { extractPageIntent } from './extractors/page-intent.js';
|
|
32
|
+
import { extractSectionRoles } from './extractors/section-roles.js';
|
|
33
|
+
import { extractComponentLibrary } from './extractors/component-library.js';
|
|
34
|
+
import { extractMaterialLanguage } from './extractors/material-language.js';
|
|
35
|
+
import { extractImageryStyle } from './extractors/imagery-style.js';
|
|
31
36
|
import { formatDtcgTokens } from './formatters/dtcg-tokens.js';
|
|
32
37
|
import { formatMotionTokens } from './formatters/motion-tokens.js';
|
|
33
38
|
|
|
@@ -125,6 +130,17 @@ export async function extractDesignLanguage(url, options = {}) {
|
|
|
125
130
|
|
|
126
131
|
design.tokenSources = safeExtract(extractTokenSources, design, styles) || [];
|
|
127
132
|
|
|
133
|
+
// v10: page intent, section roles, component library, material language,
|
|
134
|
+
// imagery style. All additive — no existing field is modified.
|
|
135
|
+
design.pageIntent = safeExtract(extractPageIntent, rawData, { url: rawData.url, title: rawData.title }) || { type: 'unknown', confidence: 0, signals: [] };
|
|
136
|
+
design.sectionRoles = safeExtract(extractSectionRoles, rawData.light?.sections || [], design.regions, design.pageIntent) || { sections: [], counts: {}, readingOrder: [] };
|
|
137
|
+
design.componentLibrary = safeExtract(extractComponentLibrary, rawData.light?.stack || {}) || { library: 'unknown', confidence: 0, evidence: [], alternates: [] };
|
|
138
|
+
design.materialLanguage = safeExtract(extractMaterialLanguage, design) || { label: 'flat', confidence: 0, signals: [], metrics: {} };
|
|
139
|
+
design.imageryStyle = safeExtract(extractImageryStyle, rawData.light?.images || []) || { label: 'none', confidence: 0, counts: {}, signals: [] };
|
|
140
|
+
// Stash raw crawler output so downstream orchestration (multipage, smart)
|
|
141
|
+
// can rebuild the digest without re-crawling.
|
|
142
|
+
design._raw = rawData;
|
|
143
|
+
|
|
128
144
|
// Per-route token extraction (Tier 2 multi-page reconciliation).
|
|
129
145
|
if (Array.isArray(rawData.routes) && rawData.routes.length > 0) {
|
|
130
146
|
design.routes = rawData.routes.map(r => {
|
|
@@ -179,3 +195,14 @@ export { extractVoice } from './extractors/voice.js';
|
|
|
179
195
|
export { lintTokens } from './lint.js';
|
|
180
196
|
export { checkDrift, formatDriftMarkdown } from './drift.js';
|
|
181
197
|
export { visualDiff, formatVisualDiffHtml } from './visual-diff.js';
|
|
198
|
+
// v10
|
|
199
|
+
export { extractPageIntent } from './extractors/page-intent.js';
|
|
200
|
+
export { extractSectionRoles } from './extractors/section-roles.js';
|
|
201
|
+
export { extractComponentLibrary } from './extractors/component-library.js';
|
|
202
|
+
export { extractMaterialLanguage } from './extractors/material-language.js';
|
|
203
|
+
export { extractImageryStyle } from './extractors/imagery-style.js';
|
|
204
|
+
export { extractLogo } from './extractors/logo.js';
|
|
205
|
+
export { captureComponentScreenshotsV10 } from './extractors/component-screenshots.js';
|
|
206
|
+
export { refineWithSmart } from './classifiers/smart.js';
|
|
207
|
+
export { crawlCanonicalPages, computeCrossPageConsistency, discoverCanonicalPages } from './multipage.js';
|
|
208
|
+
export { buildPromptPack, formatV0Prompt, formatLovablePrompt, formatCursorPrompt, formatClaudeArtifactPrompt } from './formatters/prompt-pack.js';
|
package/src/multipage.js
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
// Multi-page crawl orchestrator (v10).
|
|
2
|
+
//
|
|
3
|
+
// Given a homepage result, discover canonical sub-pages from the site's own
|
|
4
|
+
// nav links (pricing / docs / blog / about / product / a representative blog
|
|
5
|
+
// post), score them by URL + anchor text to pick the top N unique page types,
|
|
6
|
+
// and run the full extractor pipeline against each. A `cross-page consistency`
|
|
7
|
+
// pass then diffs tokens across the crawled pages so agents can see which bits
|
|
8
|
+
// of the design language are site-wide vs per-page-type.
|
|
9
|
+
//
|
|
10
|
+
// Intentionally *orchestrates* the existing `extractDesignLanguage` — it does
|
|
11
|
+
// not re-implement extraction. That keeps this thin and lets every extractor
|
|
12
|
+
// improve automatically when v10+ gains more signals.
|
|
13
|
+
|
|
14
|
+
import { chromium } from 'playwright';
|
|
15
|
+
import { extractPageIntent } from './extractors/page-intent.js';
|
|
16
|
+
|
|
17
|
+
const PAGE_TYPE_RANK = [
|
|
18
|
+
'pricing', 'docs', 'product', 'about', 'blog', 'blog-post', 'auth', 'legal',
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
const ANCHOR_HINTS = {
|
|
22
|
+
pricing: /\b(pricing|plans?|buy)\b/i,
|
|
23
|
+
docs: /\b(docs?|documentation|guide|api|reference|developers?)\b/i,
|
|
24
|
+
product: /\b(product|features?|solutions?|platform)\b/i,
|
|
25
|
+
about: /\b(about|company|team|careers?)\b/i,
|
|
26
|
+
blog: /\b(blog|news|updates|changelog)\b/i,
|
|
27
|
+
'blog-post': /./,
|
|
28
|
+
legal: /\b(privacy|terms|legal)\b/i,
|
|
29
|
+
auth: /\b(sign.?in|log.?in|sign.?up|register)\b/i,
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
function rankCandidateByUrl(href) {
|
|
33
|
+
try {
|
|
34
|
+
const u = new URL(href);
|
|
35
|
+
const path = (u.pathname || '/').toLowerCase();
|
|
36
|
+
const hits = [];
|
|
37
|
+
if (/\/pricing(\/|$)|\/plans(\/|$)/.test(path)) hits.push('pricing');
|
|
38
|
+
if (/\/docs?(\/|$)|\/documentation|\/guide|\/reference/.test(path)) hits.push('docs');
|
|
39
|
+
if (/\/about|\/company|\/team|\/careers/.test(path)) hits.push('about');
|
|
40
|
+
if (/\/blog(\/[\w-]+)+/.test(path)) hits.push('blog-post');
|
|
41
|
+
else if (/\/blog(\/|$)|\/changelog/.test(path)) hits.push('blog');
|
|
42
|
+
if (/\/product(\/|$)|\/features?(\/|$)|\/solutions?(\/|$)/.test(path)) hits.push('product');
|
|
43
|
+
if (/\/terms|\/privacy|\/legal/.test(path)) hits.push('legal');
|
|
44
|
+
return hits;
|
|
45
|
+
} catch { return []; }
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function scoreAnchor(anchorText) {
|
|
49
|
+
const scores = {};
|
|
50
|
+
for (const [type, re] of Object.entries(ANCHOR_HINTS)) {
|
|
51
|
+
if (type === 'blog-post') continue; // only URL-derived
|
|
52
|
+
if (re.test(anchorText)) scores[type] = 0.6;
|
|
53
|
+
}
|
|
54
|
+
return scores;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export async function discoverCanonicalPages(links, homepageUrl, maxPages = 5) {
|
|
58
|
+
// links: [{ href, text }]
|
|
59
|
+
const base = new URL(homepageUrl);
|
|
60
|
+
const byType = {};
|
|
61
|
+
for (const link of links) {
|
|
62
|
+
let href;
|
|
63
|
+
try { href = new URL(link.href, base).toString(); } catch { continue; }
|
|
64
|
+
try {
|
|
65
|
+
const u = new URL(href);
|
|
66
|
+
if (u.hostname !== base.hostname) continue;
|
|
67
|
+
if (u.pathname === base.pathname && (!u.search && !u.hash)) continue;
|
|
68
|
+
} catch { continue; }
|
|
69
|
+
const urlHits = rankCandidateByUrl(href);
|
|
70
|
+
const anchorScores = scoreAnchor(link.text || '');
|
|
71
|
+
const candidates = new Set([...urlHits, ...Object.keys(anchorScores)]);
|
|
72
|
+
for (const type of candidates) {
|
|
73
|
+
if (!byType[type]) byType[type] = [];
|
|
74
|
+
byType[type].push({ href, text: link.text || '' });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const chosen = [];
|
|
78
|
+
for (const type of PAGE_TYPE_RANK) {
|
|
79
|
+
if (chosen.length >= maxPages) break;
|
|
80
|
+
const cands = byType[type];
|
|
81
|
+
if (!cands || !cands.length) continue;
|
|
82
|
+
chosen.push({ type, url: cands[0].href, anchor: cands[0].text });
|
|
83
|
+
}
|
|
84
|
+
return chosen;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export async function collectLinks(page) {
|
|
88
|
+
return page.evaluate(() => {
|
|
89
|
+
const out = [];
|
|
90
|
+
const seen = new Set();
|
|
91
|
+
// Restrict to header/nav first — those are canonical.
|
|
92
|
+
const roots = Array.from(document.querySelectorAll('header, nav, [role="navigation"]'));
|
|
93
|
+
const anchors = [];
|
|
94
|
+
if (roots.length) {
|
|
95
|
+
for (const r of roots) anchors.push(...r.querySelectorAll('a[href]'));
|
|
96
|
+
} else {
|
|
97
|
+
anchors.push(...document.querySelectorAll('a[href]'));
|
|
98
|
+
}
|
|
99
|
+
for (const a of anchors) {
|
|
100
|
+
const href = a.getAttribute('href') || '';
|
|
101
|
+
if (!href || href.startsWith('#')) continue;
|
|
102
|
+
if (seen.has(href)) continue;
|
|
103
|
+
seen.add(href);
|
|
104
|
+
out.push({ href, text: (a.textContent || '').trim().slice(0, 80) });
|
|
105
|
+
if (out.length >= 60) break;
|
|
106
|
+
}
|
|
107
|
+
return out;
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
function pickChoices(aAll = [], bAll = []) {
|
|
112
|
+
// Very cheap: number of tokens that appear in both sets.
|
|
113
|
+
const setA = new Set(aAll);
|
|
114
|
+
const setB = new Set(bAll);
|
|
115
|
+
let overlap = 0;
|
|
116
|
+
for (const x of setA) if (setB.has(x)) overlap++;
|
|
117
|
+
const union = new Set([...setA, ...setB]).size || 1;
|
|
118
|
+
return { overlap, union, jaccard: overlap / union };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function hexSet(colors) {
|
|
122
|
+
return new Set((colors?.all || []).map(c => (c.hex || '').toLowerCase()).filter(Boolean));
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function typeSet(typography) {
|
|
126
|
+
return new Set(((typography?.families) || []).map(f => f.toLowerCase()));
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
function spaceSet(spacing) {
|
|
130
|
+
return new Set(((spacing?.scale) || []).map(s => (s.value || s).toString()));
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
function radiusSet(borders) {
|
|
134
|
+
return new Set(((borders?.radii) || []).map(r => (r.value || r).toString()));
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
export function computeCrossPageConsistency(pages) {
|
|
138
|
+
// pages: [{ url, type, design }]
|
|
139
|
+
if (pages.length < 2) return { pairwise: [], drift: {}, shared: {} };
|
|
140
|
+
const metrics = {};
|
|
141
|
+
for (const field of ['colors', 'typography', 'spacing', 'borders']) {
|
|
142
|
+
metrics[field] = {};
|
|
143
|
+
}
|
|
144
|
+
const pairwise = [];
|
|
145
|
+
for (let i = 0; i < pages.length; i++) {
|
|
146
|
+
for (let j = i + 1; j < pages.length; j++) {
|
|
147
|
+
const a = pages[i], b = pages[j];
|
|
148
|
+
const colorDiff = pickChoices([...hexSet(a.design.colors)], [...hexSet(b.design.colors)]);
|
|
149
|
+
const typeDiff = pickChoices([...typeSet(a.design.typography)], [...typeSet(b.design.typography)]);
|
|
150
|
+
const spaceDiff = pickChoices([...spaceSet(a.design.spacing)], [...spaceSet(b.design.spacing)]);
|
|
151
|
+
const radiusDiff = pickChoices([...radiusSet(a.design.borders)], [...radiusSet(b.design.borders)]);
|
|
152
|
+
pairwise.push({
|
|
153
|
+
pair: [a.type || a.url, b.type || b.url],
|
|
154
|
+
colors: colorDiff,
|
|
155
|
+
typography: typeDiff,
|
|
156
|
+
spacing: spaceDiff,
|
|
157
|
+
borders: radiusDiff,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Shared-across-all sets.
|
|
163
|
+
const sharedColors = pages.reduce((acc, p, i) => {
|
|
164
|
+
const set = hexSet(p.design.colors);
|
|
165
|
+
if (i === 0) return set;
|
|
166
|
+
return new Set([...acc].filter(x => set.has(x)));
|
|
167
|
+
}, new Set());
|
|
168
|
+
const sharedTypes = pages.reduce((acc, p, i) => {
|
|
169
|
+
const set = typeSet(p.design.typography);
|
|
170
|
+
if (i === 0) return set;
|
|
171
|
+
return new Set([...acc].filter(x => set.has(x)));
|
|
172
|
+
}, new Set());
|
|
173
|
+
|
|
174
|
+
// Per-page uniques.
|
|
175
|
+
const perPageUnique = pages.map((p, idx) => {
|
|
176
|
+
const others = pages.filter((_, i) => i !== idx);
|
|
177
|
+
const othersColors = others.reduce((s, o) => {
|
|
178
|
+
for (const x of hexSet(o.design.colors)) s.add(x);
|
|
179
|
+
return s;
|
|
180
|
+
}, new Set());
|
|
181
|
+
const unique = [...hexSet(p.design.colors)].filter(c => !othersColors.has(c));
|
|
182
|
+
return { url: p.url, type: p.type, uniqueColors: unique.slice(0, 20) };
|
|
183
|
+
});
|
|
184
|
+
|
|
185
|
+
return {
|
|
186
|
+
pairwise,
|
|
187
|
+
shared: {
|
|
188
|
+
colors: [...sharedColors].slice(0, 50),
|
|
189
|
+
typography: [...sharedTypes].slice(0, 10),
|
|
190
|
+
},
|
|
191
|
+
perPageUnique,
|
|
192
|
+
};
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// End-to-end runner: opens its own browser, discovers nav links from homepage,
|
|
196
|
+
// crawls N more pages, returns per-page `design` + consistency report. It
|
|
197
|
+
// leaves the single-page code path alone so --full users get an additive bump.
|
|
198
|
+
export async function crawlCanonicalPages({ homepageUrl, homepageRawData, maxPages = 5, extract, crawlerOptions = {} }) {
|
|
199
|
+
// `extract` is injected so we can reuse the full single-page pipeline without
|
|
200
|
+
// circular imports (index.js → multipage.js → index.js).
|
|
201
|
+
const browser = await chromium.launch({ headless: true });
|
|
202
|
+
const context = await browser.newContext({
|
|
203
|
+
viewport: { width: crawlerOptions.width || 1280, height: crawlerOptions.height || 800 },
|
|
204
|
+
colorScheme: 'light',
|
|
205
|
+
});
|
|
206
|
+
const page = await context.newPage();
|
|
207
|
+
try {
|
|
208
|
+
await page.goto(homepageUrl, { waitUntil: 'domcontentloaded', timeout: 30000 }).catch(() => {});
|
|
209
|
+
await page.waitForLoadState('networkidle').catch(() => {});
|
|
210
|
+
const links = await collectLinks(page).catch(() => []);
|
|
211
|
+
const targets = await discoverCanonicalPages(links, homepageUrl, maxPages);
|
|
212
|
+
const perPage = [];
|
|
213
|
+
for (const t of targets) {
|
|
214
|
+
try {
|
|
215
|
+
const design = await extract(t.url, { ...crawlerOptions, depth: 0, dark: false, screenshots: false, responsive: false, interactions: false, deepInteract: false, _skipMultipage: true });
|
|
216
|
+
perPage.push({ url: t.url, type: t.type, design });
|
|
217
|
+
} catch (e) {
|
|
218
|
+
perPage.push({ url: t.url, type: t.type, error: e.message });
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
// Include the homepage itself in the consistency pass if we have its design.
|
|
222
|
+
const allPages = perPage.filter(p => p.design);
|
|
223
|
+
const consistency = computeCrossPageConsistency(allPages);
|
|
224
|
+
return { targets, pages: perPage.map(p => ({
|
|
225
|
+
url: p.url,
|
|
226
|
+
type: p.type,
|
|
227
|
+
intent: p.design ? extractPageIntent(p.design.raw || {}, { url: p.url, title: p.design.meta?.title }) : null,
|
|
228
|
+
error: p.error || null,
|
|
229
|
+
})), consistency };
|
|
230
|
+
} finally {
|
|
231
|
+
await browser.close();
|
|
232
|
+
}
|
|
233
|
+
}
|