webpeel 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +257 -0
- package/dist/cli.js.map +1 -1
- package/dist/core/branding.d.ts +55 -0
- package/dist/core/branding.d.ts.map +1 -0
- package/dist/core/branding.js +235 -0
- package/dist/core/branding.js.map +1 -0
- package/dist/core/change-tracking.d.ts +76 -0
- package/dist/core/change-tracking.d.ts.map +1 -0
- package/dist/core/change-tracking.js +267 -0
- package/dist/core/change-tracking.js.map +1 -0
- package/dist/core/map.d.ts +4 -0
- package/dist/core/map.d.ts.map +1 -1
- package/dist/core/map.js +60 -5
- package/dist/core/map.js.map +1 -1
- package/dist/index.d.ts +3 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +73 -0
- package/dist/index.js.map +1 -1
- package/dist/mcp/server.js +290 -0
- package/dist/mcp/server.js.map +1 -1
- package/dist/types.d.ts +21 -0
- package/dist/types.d.ts.map +1 -1
- package/dist/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Branding and design system extraction from web pages
|
|
3
|
+
* Extracts colors, fonts, typography, spacing, components, and CSS variables
|
|
4
|
+
*/
|
|
5
|
+
/**
|
|
6
|
+
* Extract branding and design system from a webpage
|
|
7
|
+
* This must run inside a Playwright browser context to access computed styles
|
|
8
|
+
*
|
|
9
|
+
* @param page - Playwright Page object
|
|
10
|
+
* @returns Complete branding profile
|
|
11
|
+
*
|
|
12
|
+
* @example
|
|
13
|
+
* ```typescript
|
|
14
|
+
* const browser = await chromium.launch();
|
|
15
|
+
* const page = await browser.newPage();
|
|
16
|
+
* await page.goto('https://example.com');
|
|
17
|
+
* const branding = await extractBranding(page);
|
|
18
|
+
* console.log(branding.colors.primary);
|
|
19
|
+
* ```
|
|
20
|
+
*/
|
|
21
|
+
export async function extractBranding(page) {
|
|
22
|
+
try {
|
|
23
|
+
// Run extraction in browser context to access computed styles
|
|
24
|
+
const extracted = await page.evaluate(() => {
|
|
25
|
+
const result = {
|
|
26
|
+
colorScheme: 'light',
|
|
27
|
+
colors: {},
|
|
28
|
+
fonts: [],
|
|
29
|
+
typography: {
|
|
30
|
+
fontFamilies: {},
|
|
31
|
+
fontSizes: {},
|
|
32
|
+
fontWeights: {},
|
|
33
|
+
lineHeights: {},
|
|
34
|
+
},
|
|
35
|
+
spacing: {},
|
|
36
|
+
components: {},
|
|
37
|
+
cssVariables: {},
|
|
38
|
+
};
|
|
39
|
+
// Helper to parse RGB/RGBA to hex
|
|
40
|
+
function rgbToHex(rgb) {
|
|
41
|
+
const match = rgb.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)(?:,\s*[\d.]+)?\)/);
|
|
42
|
+
if (!match)
|
|
43
|
+
return rgb;
|
|
44
|
+
const [, r, g, b] = match;
|
|
45
|
+
return '#' + [r, g, b].map(x => parseInt(x).toString(16).padStart(2, '0')).join('');
|
|
46
|
+
}
|
|
47
|
+
// Extract all CSS variables from :root
|
|
48
|
+
const rootStyles = getComputedStyle(document.documentElement);
|
|
49
|
+
for (let i = 0; i < rootStyles.length; i++) {
|
|
50
|
+
const prop = rootStyles[i];
|
|
51
|
+
if (prop.startsWith('--')) {
|
|
52
|
+
const value = rootStyles.getPropertyValue(prop).trim();
|
|
53
|
+
result.cssVariables[prop] = value;
|
|
54
|
+
// Detect color variables
|
|
55
|
+
if (value.match(/^#[0-9a-f]{3,8}$/i) || value.match(/^rgba?\(/i) || value.match(/^hsla?\(/i)) {
|
|
56
|
+
const colorKey = prop.replace(/^--/, '').replace(/-/g, '_');
|
|
57
|
+
result.colors[colorKey] = value.startsWith('rgb') ? rgbToHex(value) : value;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Detect color scheme (light/dark)
|
|
62
|
+
const bgColor = rootStyles.backgroundColor || getComputedStyle(document.body).backgroundColor;
|
|
63
|
+
if (bgColor) {
|
|
64
|
+
const rgb = bgColor.match(/rgba?\((\d+),\s*(\d+),\s*(\d+)/);
|
|
65
|
+
if (rgb) {
|
|
66
|
+
const [, r, g, b] = rgb.map(Number);
|
|
67
|
+
const brightness = (r * 299 + g * 587 + b * 114) / 1000;
|
|
68
|
+
result.colorScheme = brightness < 128 ? 'dark' : 'light';
|
|
69
|
+
result.colors.background = rgbToHex(bgColor);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
// Extract text colors
|
|
73
|
+
const bodyStyles = getComputedStyle(document.body);
|
|
74
|
+
result.colors.textPrimary = rgbToHex(bodyStyles.color);
|
|
75
|
+
// Find headings for secondary text color
|
|
76
|
+
const heading = document.querySelector('h1, h2, h3, h4, h5, h6');
|
|
77
|
+
if (heading) {
|
|
78
|
+
const headingColor = getComputedStyle(heading).color;
|
|
79
|
+
if (headingColor !== bodyStyles.color) {
|
|
80
|
+
result.colors.textSecondary = rgbToHex(headingColor);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
// Try to detect primary/accent colors from buttons, links, etc.
|
|
84
|
+
const button = document.querySelector('button, .btn, [role="button"], a.button');
|
|
85
|
+
if (button) {
|
|
86
|
+
const btnStyles = getComputedStyle(button);
|
|
87
|
+
const btnBg = btnStyles.backgroundColor;
|
|
88
|
+
if (btnBg && !btnBg.includes('rgba(0, 0, 0, 0)')) {
|
|
89
|
+
result.colors.primary = rgbToHex(btnBg);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
const link = document.querySelector('a');
|
|
93
|
+
if (link) {
|
|
94
|
+
const linkColor = getComputedStyle(link).color;
|
|
95
|
+
if (!result.colors.primary && linkColor !== bodyStyles.color) {
|
|
96
|
+
result.colors.accent = rgbToHex(linkColor);
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
// Extract fonts
|
|
100
|
+
const fontFamiliesSet = new Set();
|
|
101
|
+
const fontElements = [document.body, ...Array.from(document.querySelectorAll('h1, h2, h3, h4, h5, h6, p, button, input'))];
|
|
102
|
+
for (const el of fontElements) {
|
|
103
|
+
if (el) {
|
|
104
|
+
const styles = getComputedStyle(el);
|
|
105
|
+
const family = styles.fontFamily.split(',')[0].replace(/['"]/g, '').trim();
|
|
106
|
+
fontFamiliesSet.add(family);
|
|
107
|
+
const tagName = el.tagName.toLowerCase();
|
|
108
|
+
result.typography.fontFamilies[tagName] = family;
|
|
109
|
+
result.typography.fontSizes[tagName] = styles.fontSize;
|
|
110
|
+
result.typography.fontWeights[tagName] = parseInt(styles.fontWeight) || 400;
|
|
111
|
+
result.typography.lineHeights[tagName] = styles.lineHeight;
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// Build fonts array with sources
|
|
115
|
+
fontFamiliesSet.forEach(family => {
|
|
116
|
+
const fontObj = { family };
|
|
117
|
+
// Detect Google Fonts
|
|
118
|
+
const links = document.querySelectorAll('link[href*="fonts.googleapis.com"]');
|
|
119
|
+
for (const link of links) {
|
|
120
|
+
const href = link.getAttribute('href') || '';
|
|
121
|
+
if (href.includes(family.replace(/\s+/g, '+'))) {
|
|
122
|
+
fontObj.source = 'Google Fonts';
|
|
123
|
+
// Extract weights from URL
|
|
124
|
+
const weightMatch = href.match(/wght@([0-9;]+)/);
|
|
125
|
+
if (weightMatch) {
|
|
126
|
+
fontObj.weights = weightMatch[1].split(';').map(Number);
|
|
127
|
+
}
|
|
128
|
+
break;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
if (!fontObj.source) {
|
|
132
|
+
fontObj.source = 'system';
|
|
133
|
+
}
|
|
134
|
+
result.fonts.push(fontObj);
|
|
135
|
+
});
|
|
136
|
+
// Extract spacing values
|
|
137
|
+
const container = document.querySelector('main, .container, .wrapper, #content, [class*="container"]');
|
|
138
|
+
if (container) {
|
|
139
|
+
const containerStyles = getComputedStyle(container);
|
|
140
|
+
result.spacing.containerMaxWidth = containerStyles.maxWidth;
|
|
141
|
+
result.spacing.borderRadius = containerStyles.borderRadius;
|
|
142
|
+
// Try to detect base spacing unit
|
|
143
|
+
const padding = containerStyles.padding;
|
|
144
|
+
const paddingMatch = padding.match(/(\d+)px/);
|
|
145
|
+
if (paddingMatch) {
|
|
146
|
+
const px = parseInt(paddingMatch[1]);
|
|
147
|
+
// Common spacing systems use multiples of 4 or 8
|
|
148
|
+
if (px % 8 === 0)
|
|
149
|
+
result.spacing.baseUnit = 8;
|
|
150
|
+
else if (px % 4 === 0)
|
|
151
|
+
result.spacing.baseUnit = 4;
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
// Extract common component patterns
|
|
155
|
+
const componentSelectors = {
|
|
156
|
+
button: 'button, .btn, [role="button"]',
|
|
157
|
+
input: 'input[type="text"], input[type="email"], textarea',
|
|
158
|
+
card: '.card, [class*="card"]',
|
|
159
|
+
nav: 'nav, .nav, .navigation',
|
|
160
|
+
header: 'header, .header',
|
|
161
|
+
footer: 'footer, .footer',
|
|
162
|
+
};
|
|
163
|
+
for (const [name, selector] of Object.entries(componentSelectors)) {
|
|
164
|
+
const el = document.querySelector(selector);
|
|
165
|
+
if (el) {
|
|
166
|
+
const styles = getComputedStyle(el);
|
|
167
|
+
result.components[name] = {
|
|
168
|
+
backgroundColor: styles.backgroundColor.includes('rgba(0, 0, 0, 0)') ? 'transparent' : rgbToHex(styles.backgroundColor),
|
|
169
|
+
color: rgbToHex(styles.color),
|
|
170
|
+
borderRadius: styles.borderRadius,
|
|
171
|
+
padding: styles.padding,
|
|
172
|
+
fontSize: styles.fontSize,
|
|
173
|
+
fontWeight: styles.fontWeight,
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return result;
|
|
178
|
+
});
|
|
179
|
+
// Extract logo and favicon using Playwright
|
|
180
|
+
const logo = await page.evaluate(() => {
|
|
181
|
+
const logoSelectors = [
|
|
182
|
+
'img[alt*="logo" i]',
|
|
183
|
+
'img[class*="logo" i]',
|
|
184
|
+
'img[id*="logo" i]',
|
|
185
|
+
'a.logo img',
|
|
186
|
+
'a[class*="logo"] img',
|
|
187
|
+
'header img',
|
|
188
|
+
'.header img',
|
|
189
|
+
'nav img:first-of-type',
|
|
190
|
+
];
|
|
191
|
+
for (const selector of logoSelectors) {
|
|
192
|
+
const img = document.querySelector(selector);
|
|
193
|
+
if (img?.src)
|
|
194
|
+
return img.src;
|
|
195
|
+
}
|
|
196
|
+
return undefined;
|
|
197
|
+
});
|
|
198
|
+
const favicon = await page.evaluate(() => {
|
|
199
|
+
const faviconSelectors = [
|
|
200
|
+
'link[rel="icon"]',
|
|
201
|
+
'link[rel="shortcut icon"]',
|
|
202
|
+
'link[rel="apple-touch-icon"]',
|
|
203
|
+
];
|
|
204
|
+
for (const selector of faviconSelectors) {
|
|
205
|
+
const link = document.querySelector(selector);
|
|
206
|
+
if (link?.href)
|
|
207
|
+
return link.href;
|
|
208
|
+
}
|
|
209
|
+
return undefined;
|
|
210
|
+
});
|
|
211
|
+
return {
|
|
212
|
+
...extracted,
|
|
213
|
+
logo,
|
|
214
|
+
favicon,
|
|
215
|
+
};
|
|
216
|
+
}
|
|
217
|
+
catch (error) {
|
|
218
|
+
// Return minimal branding profile on error
|
|
219
|
+
console.error('Branding extraction failed:', error);
|
|
220
|
+
return {
|
|
221
|
+
colorScheme: 'light',
|
|
222
|
+
colors: {},
|
|
223
|
+
fonts: [],
|
|
224
|
+
typography: {
|
|
225
|
+
fontFamilies: {},
|
|
226
|
+
fontSizes: {},
|
|
227
|
+
fontWeights: {},
|
|
228
|
+
},
|
|
229
|
+
spacing: {},
|
|
230
|
+
components: {},
|
|
231
|
+
cssVariables: {},
|
|
232
|
+
};
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
//# sourceMappingURL=branding.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"branding.js","sourceRoot":"","sources":["../../src/core/branding.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA2CH;;;;;;;;;;;;;;;GAeG;AACH,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,IAAU;IAC9C,IAAI,CAAC;QACH,8DAA8D;QAC9D,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACzC,MAAM,MAAM,GAAQ;gBAClB,WAAW,EAAE,OAAO;gBACpB,MAAM,EAAE,EAAE;gBACV,KAAK,EAAE,EAAE;gBACT,UAAU,EAAE;oBACV,YAAY,EAAE,EAAE;oBAChB,SAAS,EAAE,EAAE;oBACb,WAAW,EAAE,EAAE;oBACf,WAAW,EAAE,EAAE;iBAChB;gBACD,OAAO,EAAE,EAAE;gBACX,UAAU,EAAE,EAAE;gBACd,YAAY,EAAE,EAAE;aACjB,CAAC;YAEF,kCAAkC;YAClC,SAAS,QAAQ,CAAC,GAAW;gBAC3B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,iDAAiD,CAAC,CAAC;gBAC3E,IAAI,CAAC,KAAK;oBAAE,OAAO,GAAG,CAAC;gBACvB,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,KAAK,CAAC;gBAC1B,OAAO,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACtF,CAAC;YAED,uCAAuC;YACvC,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;YAC9D,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,IAAI,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;gBAC3B,IAAI,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;oBAC1B,MAAM,KAAK,GAAG,UAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;oBACvD,MAAM,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC;oBAElC,yBAAyB;oBACzB,IAAI,KAAK,CAAC,KAAK,CAAC,mBAAmB,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,WAAW,CAAC,EAAE,CAAC;wBAC7F,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;wBAC5D,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,GAAG,KAAK,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC;oBAC9E,CAAC;gBACH,CAAC;YACH,CAAC;YAED,mCAAmC;YACnC,MAAM,OAAO,GAAG,UAAU,CAAC,eAAe,IAAI,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,eAAe,CAAC;YAC9F,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;gBAC5D,IAAI,GAAG,EAAE,CAAC;oBACR,MAAM,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,GAAG,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBACpC,MAAM,UAAU,GAAG,CAAC,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,GAAG,CAAC,GAAG,GAAG,CAAC,GAAG,IAAI,CAAC;oBACxD,MAAM,CAAC,WAAW,GAAG,UAAU,GAAG,GAAG,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC;oBACzD,MAAM,CAAC,MAAM,CAAC,UAAU,GAAG,QAAQ,CAAC,OAAO,CAAC,CAAC;gBAC/C,CAAC;YACH,CAAC;YAED,sBAAsB;YACtB,MAAM,UAAU,GAAG,gBAAgB,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;YACnD,MAAM,CAAC,MAAM,CAAC,WAAW,GAAG,QAAQ,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC;YAEvD,yCAAyC;YACzC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC,CAAC;YACjE,IAAI,OAAO,EAAE,CAAC;gBACZ,MAAM,YAAY,GAAG,gBAAgB,CAAC,OAAO,CAAC,CAAC,KAAK,CAAC;gBACrD,IAAI,YAAY,KAAK,UAAU,CAAC,KAAK,EAAE,CAAC;oBACtC,MAAM,CAAC,MAAM,CAAC,aAAa,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;YAED,gEAAgE;YAChE,MAAM,MAAM,GAAG,QAAQ,CAAC,aAAa,CAAC,yCAAyC,CAAC,CAAC;YACjF,IAAI,MAAM,EAAE,CAAC;gBACX,MAAM,SAAS,GAAG,gBAAgB,CAAC,MAAM,CAAC,CAAC;gBAC3C,MAAM,KAAK,GAAG,SAAS,CAAC,eAAe,CAAC;gBACxC,IAAI,KAAK,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,kBAAkB,CAAC,EAAE,CAAC;oBACjD,MAAM,CAAC,MAAM,CAAC,OAAO,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC;gBAC1C,CAAC;YACH,CAAC;YAED,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,GAAG,CAAC,CAAC;YACzC,IAAI,IAAI,EAAE,CAAC;gBACT,MAAM,SAAS,GAAG,gBAAgB,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC;gBAC/C,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,OAAO,IAAI,SAAS,KAAK,UAAU,CAAC,KAAK,EAAE,CAAC;oBAC7D,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,CAAC;gBAC7C,CAAC;YACH,CAAC;YAED,gBAAgB;YAChB,MAAM,eAAe,GAAG,IAAI,GAAG,EAAU,CAAC;YAC1C,MAAM,YAAY,GAAG,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,0CAA0C,CAAC,CAAC,CAAC,CAAC;YAE3H,KAAK,MAAM,EAAE,IAAI,YAAY,EAAE,CAAC;gBAC9B,IAAI,EAAE,EAAE,CAAC;oBACP,MAAM,MAAM,GAAG,gBAAgB,CAAC,EAAE,CAAC,CAAC;oBACpC,MAAM,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;oBAC3E,eAAe,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;oBAE5B,MAAM,OAAO,GAAG,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC;oBACzC,MAAM,CAAC,UAAU,CAAC,YAAY,CAAC,OAAO,CAAC,GAAG,MAAM,CAAC;oBACjD,MAAM,CAAC,UAAU,CAAC,SAAS,CAAC,OAAO,CAAC,GAAG,MAAM,CAAC,QAAQ,CAAC;oBACvD,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC,UAAU,CAAC,IAAI,GAAG,CAAC;oBAC5E,MAAM,CAAC,UAAU,CAAC,WAAW,CAAC,OAAO,CAAC,GAAG,MAAM,CAAC,UAAU,CAAC;gBAC7D,CAAC;YACH,CAAC;YAED,iCAAiC;YACjC,eAAe,CAAC,OAAO,CAAC,MAAM,CAAC,EAAE;gBAC/B,MAAM,OAAO,GAAQ,EAAE,MAAM,EAAE,CAAC;gBAEhC,sBAAsB;gBACtB,MAAM,KAAK,GAAG,QAAQ,CAAC,gBAAgB,CAAC,oCAAoC,CAAC,CAAC;gBAC9E,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;oBACzB,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;oBAC7C,IAAI,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,EAAE,CAAC;wBAC/C,OAAO,CAAC,MAAM,GAAG,cAAc,CAAC;wBAChC,2BAA2B;wBAC3B,MAAM,WAAW,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,CAAC;wBACjD,IAAI,WAAW,EAAE,CAAC;4BAChB,OAAO,CAAC,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;wBAC1D,CAAC;wBACD,MAAM;oBACR,CAAC;gBACH,CAAC;gBAED,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;oBACpB,OAAO,CAAC,MAAM,GAAG,QAAQ,CAAC;gBAC5B,CAAC;gBAED,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC7B,CAAC,CAAC,CAAC;YAEH,yBAAyB;YACzB,MAAM,SAAS,GAAG,QAAQ,CAAC,aAAa,CAAC,4DAA4D,CAAC,CAAC;YACvG,IAAI,SAAS,EAAE,CAAC;gBACd,MAAM,eAAe,GAAG,gBAAgB,CAAC,SAAS,CAAC,CAAC;gBACpD,MAAM,CAAC,OAAO,CAAC,iBAAiB,GAAG,eAAe,CAAC,QAAQ,CAAC;gBAC5D,MAAM,CAAC,OAAO,CAAC,YAAY,GAAG,eAAe,CAAC,YAAY,CAAC;gBAE3D,kCAAkC;gBAClC,MAAM,OAAO,GAAG,eAAe,CAAC,OAAO,CAAC;gBACxC,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;gBAC9C,IAAI,YAAY,EAAE,CAAC;oBACjB,MAAM,EAAE,GAAG,QAAQ,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC;oBACrC,iDAAiD;oBACjD,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC;wBAAE,MAAM,CAAC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;yBACzC,IAAI,EAAE,GAAG,CAAC,KAAK,CAAC;wBAAE,MAAM,CAAC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;gBACrD,CAAC;YACH,CAAC;YAED,oCAAoC;YACpC,MAAM,kBAAkB,GAAG;gBACzB,MAAM,EAAE,+BAA+B;gBACvC,KAAK,EAAE,mDAAmD;gBAC1D,IAAI,EAAE,wBAAwB;gBAC9B,GAAG,EAAE,wBAAwB;gBAC7B,MAAM,EAAE,iBAAiB;gBACzB,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,KAAK,MAAM,CAAC,IAAI,EAAE,QAAQ,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,kBAAkB,CAAC,EAAE,CAAC;gBAClE,MAAM,EAAE,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;gBAC5C,IAAI,EAAE,EAAE,CAAC;oBACP,MAAM,MAAM,GAAG,gBAAgB,CAAC,EAAE,CAAC,CAAC;oBACpC,MAAM,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG;wBACxB,eAAe,EAAE,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,kBAAkB,CAAC,CAAC,CAAC,CAAC,aAAa,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,eAAe,CAAC;wBACvH,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC,KAAK,CAAC;wBAC7B,YAAY,EAAE,MAAM,CAAC,YAAY;wBACjC,OAAO,EAAE,MAAM,CAAC,OAAO;wBACvB,QAAQ,EAAE,MAAM,CAAC,QAAQ;wBACzB,UAAU,EAAE,MAAM,CAAC,UAAU;qBAC9B,CAAC;gBACJ,CAAC;YACH,CAAC;YAED,OAAO,MAAM,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,4CAA4C;QAC5C,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACpC,MAAM,aAAa,GAAG;gBACpB,oBAAoB;gBACpB,sBAAsB;gBACtB,mBAAmB;gBACnB,YAAY;gBACZ,sBAAsB;gBACtB,YAAY;gBACZ,aAAa;gBACb,uBAAuB;aACxB,CAAC;YAEF,KAAK,MAAM,QAAQ,IAAI,aAAa,EAAE,CAAC;gBACrC,MAAM,GAAG,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAQ,CAAC;gBACpD,IAAI,GAAG,EAAE,GAAG;oBAAE,OAAO,GAAG,CAAC,GAAG,CAAC;YAC/B,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC,CAAC,CAAC;QAEH,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YACvC,MAAM,gBAAgB,GAAG;gBACvB,kBAAkB;gBAClB,2BAA2B;gBAC3B,8BAA8B;aAC/B,CAAC;YAEF,KAAK,MAAM,QAAQ,IAAI,gBAAgB,EAAE,CAAC;gBACxC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,QAAQ,CAAQ,CAAC;gBACrD,IAAI,IAAI,EAAE,IAAI;oBAAE,OAAO,IAAI,CAAC,IAAI,CAAC;YACnC,CAAC;YACD,OAAO,SAAS,CAAC;QACnB,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,GAAG,SAAS;YACZ,IAAI;YACJ,OAAO;SACW,CAAC;IAEvB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,2CAA2C;QAC3C,OAAO,CAAC,KAAK,CAAC,6BAA6B,EAAE,KAAK,CAAC,CAAC;QACpD,OAAO;YACL,WAAW,EAAE,OAAO;YACpB,MAAM,EAAE,EAAE;YACV,KAAK,EAAE,EAAE;YACT,UAAU,EAAE;gBACV,YAAY,EAAE,EAAE;gBAChB,SAAS,EAAE,EAAE;gBACb,WAAW,EAAE,EAAE;aAChB;YACD,OAAO,EAAE,EAAE;YACX,UAAU,EAAE,EAAE;YACd,YAAY,EAAE,EAAE;SACjB,CAAC;IACJ,CAAC;AACH,CAAC"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local-first content change tracking
|
|
3
|
+
* Stores snapshots in ~/.webpeel/snapshots/ and provides diffing
|
|
4
|
+
*/
|
|
5
|
+
export interface Snapshot {
|
|
6
|
+
url: string;
|
|
7
|
+
fingerprint: string;
|
|
8
|
+
content: string;
|
|
9
|
+
timestamp: number;
|
|
10
|
+
metadata?: Record<string, any>;
|
|
11
|
+
}
|
|
12
|
+
export interface ChangeResult {
|
|
13
|
+
changeStatus: 'new' | 'same' | 'changed' | 'removed';
|
|
14
|
+
previousScrapeAt: string | null;
|
|
15
|
+
diff?: {
|
|
16
|
+
text: string;
|
|
17
|
+
additions: number;
|
|
18
|
+
deletions: number;
|
|
19
|
+
changes: Array<{
|
|
20
|
+
type: 'add' | 'del' | 'normal';
|
|
21
|
+
line: number;
|
|
22
|
+
content: string;
|
|
23
|
+
}>;
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Get a snapshot for a URL
|
|
28
|
+
*
|
|
29
|
+
* @param url - URL to get snapshot for
|
|
30
|
+
* @returns Snapshot if exists, null otherwise
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```typescript
|
|
34
|
+
* const snapshot = await getSnapshot('https://example.com');
|
|
35
|
+
* if (snapshot) {
|
|
36
|
+
* console.log('Last scraped:', new Date(snapshot.timestamp));
|
|
37
|
+
* }
|
|
38
|
+
* ```
|
|
39
|
+
*/
|
|
40
|
+
export declare function getSnapshot(url: string): Promise<Snapshot | null>;
|
|
41
|
+
/**
|
|
42
|
+
* Track content changes for a URL
|
|
43
|
+
* Compares with previous snapshot and saves new one
|
|
44
|
+
*
|
|
45
|
+
* @param url - URL being tracked
|
|
46
|
+
* @param content - Current content
|
|
47
|
+
* @param fingerprint - Content fingerprint (SHA256 hash)
|
|
48
|
+
* @returns Change detection result
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```typescript
|
|
52
|
+
* const result = await trackChange('https://example.com', content, fingerprint);
|
|
53
|
+
* if (result.changeStatus === 'changed') {
|
|
54
|
+
* console.log('Content changed!');
|
|
55
|
+
* console.log(`+${result.diff.additions} -${result.diff.deletions}`);
|
|
56
|
+
* }
|
|
57
|
+
* ```
|
|
58
|
+
*/
|
|
59
|
+
export declare function trackChange(url: string, content: string, fingerprint: string): Promise<ChangeResult>;
|
|
60
|
+
/**
|
|
61
|
+
* Clear snapshots matching a URL pattern
|
|
62
|
+
*
|
|
63
|
+
* @param urlPattern - Optional regex pattern to match URLs (if not provided, clears all)
|
|
64
|
+
* @returns Number of snapshots cleared
|
|
65
|
+
*
|
|
66
|
+
* @example
|
|
67
|
+
* ```typescript
|
|
68
|
+
* // Clear all snapshots
|
|
69
|
+
* const count = await clearSnapshots();
|
|
70
|
+
*
|
|
71
|
+
* // Clear specific domain
|
|
72
|
+
* const count = await clearSnapshots('example\\.com');
|
|
73
|
+
* ```
|
|
74
|
+
*/
|
|
75
|
+
export declare function clearSnapshots(urlPattern?: string): Promise<number>;
|
|
76
|
+
//# sourceMappingURL=change-tracking.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"change-tracking.d.ts","sourceRoot":"","sources":["../../src/core/change-tracking.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,MAAM,WAAW,QAAQ;IACvB,GAAG,EAAE,MAAM,CAAC;IACZ,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,CAAC;IAChB,SAAS,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAChC;AAED,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,KAAK,GAAG,MAAM,GAAG,SAAS,GAAG,SAAS,CAAC;IACrD,gBAAgB,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,IAAI,CAAC,EAAE;QACL,IAAI,EAAE,MAAM,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,SAAS,EAAE,MAAM,CAAC;QAClB,OAAO,EAAE,KAAK,CAAC;YACb,IAAI,EAAE,KAAK,GAAG,KAAK,GAAG,QAAQ,CAAC;YAC/B,IAAI,EAAE,MAAM,CAAC;YACb,OAAO,EAAE,MAAM,CAAC;SACjB,CAAC,CAAC;KACJ,CAAC;CACH;AAwBD;;;;;;;;;;;;;GAaG;AACH,wBAAsB,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAC,CAQvE;AAoGD;;;;;;;;;;;;;;;;;GAiBG;AACH,wBAAsB,WAAW,CAC/B,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,WAAW,EAAE,MAAM,GAClB,OAAO,CAAC,YAAY,CAAC,CA8DvB;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,cAAc,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAqCzE"}
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local-first content change tracking
|
|
3
|
+
* Stores snapshots in ~/.webpeel/snapshots/ and provides diffing
|
|
4
|
+
*/
|
|
5
|
+
import { createHash } from 'crypto';
|
|
6
|
+
import { promises as fs } from 'fs';
|
|
7
|
+
import { join } from 'path';
|
|
8
|
+
import { homedir } from 'os';
|
|
9
|
+
// Snapshot storage directory
|
|
10
|
+
const SNAPSHOTS_DIR = join(homedir(), '.webpeel', 'snapshots');
|
|
11
|
+
/**
|
|
12
|
+
* Get storage path for a URL
|
|
13
|
+
*/
|
|
14
|
+
function getSnapshotPath(url) {
|
|
15
|
+
const hash = createHash('sha256').update(url).digest('hex');
|
|
16
|
+
return join(SNAPSHOTS_DIR, `${hash}.json`);
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Ensure snapshots directory exists
|
|
20
|
+
*/
|
|
21
|
+
async function ensureSnapshotsDir() {
|
|
22
|
+
try {
|
|
23
|
+
await fs.mkdir(SNAPSHOTS_DIR, { recursive: true });
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
// Ignore if already exists
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Get a snapshot for a URL
|
|
31
|
+
*
|
|
32
|
+
* @param url - URL to get snapshot for
|
|
33
|
+
* @returns Snapshot if exists, null otherwise
|
|
34
|
+
*
|
|
35
|
+
* @example
|
|
36
|
+
* ```typescript
|
|
37
|
+
* const snapshot = await getSnapshot('https://example.com');
|
|
38
|
+
* if (snapshot) {
|
|
39
|
+
* console.log('Last scraped:', new Date(snapshot.timestamp));
|
|
40
|
+
* }
|
|
41
|
+
* ```
|
|
42
|
+
*/
|
|
43
|
+
export async function getSnapshot(url) {
|
|
44
|
+
try {
|
|
45
|
+
const path = getSnapshotPath(url);
|
|
46
|
+
const data = await fs.readFile(path, 'utf-8');
|
|
47
|
+
return JSON.parse(data);
|
|
48
|
+
}
|
|
49
|
+
catch (error) {
|
|
50
|
+
return null;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Save a snapshot for a URL
|
|
55
|
+
*/
|
|
56
|
+
async function saveSnapshot(snapshot) {
|
|
57
|
+
await ensureSnapshotsDir();
|
|
58
|
+
const path = getSnapshotPath(snapshot.url);
|
|
59
|
+
await fs.writeFile(path, JSON.stringify(snapshot, null, 2), 'utf-8');
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Simple LCS-based unified diff implementation
|
|
63
|
+
* Returns unified diff format and change statistics
|
|
64
|
+
*/
|
|
65
|
+
function computeDiff(oldContent, newContent) {
|
|
66
|
+
const oldLines = oldContent.split('\n');
|
|
67
|
+
const newLines = newContent.split('\n');
|
|
68
|
+
// Compute LCS (Longest Common Subsequence) using dynamic programming
|
|
69
|
+
const m = oldLines.length;
|
|
70
|
+
const n = newLines.length;
|
|
71
|
+
const lcs = Array(m + 1).fill(null).map(() => Array(n + 1).fill(0));
|
|
72
|
+
for (let i = 1; i <= m; i++) {
|
|
73
|
+
for (let j = 1; j <= n; j++) {
|
|
74
|
+
if (oldLines[i - 1] === newLines[j - 1]) {
|
|
75
|
+
lcs[i][j] = lcs[i - 1][j - 1] + 1;
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
lcs[i][j] = Math.max(lcs[i - 1][j], lcs[i][j - 1]);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
// Backtrack to build diff
|
|
83
|
+
const changes = [];
|
|
84
|
+
let i = m;
|
|
85
|
+
let j = n;
|
|
86
|
+
while (i > 0 || j > 0) {
|
|
87
|
+
if (i > 0 && j > 0 && oldLines[i - 1] === newLines[j - 1]) {
|
|
88
|
+
changes.unshift({ type: 'normal', line: j, content: newLines[j - 1] });
|
|
89
|
+
i--;
|
|
90
|
+
j--;
|
|
91
|
+
}
|
|
92
|
+
else if (j > 0 && (i === 0 || lcs[i][j - 1] >= lcs[i - 1][j])) {
|
|
93
|
+
changes.unshift({ type: 'add', line: j, content: newLines[j - 1] });
|
|
94
|
+
j--;
|
|
95
|
+
}
|
|
96
|
+
else if (i > 0) {
|
|
97
|
+
changes.unshift({ type: 'del', line: i, content: oldLines[i - 1] });
|
|
98
|
+
i--;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
// Count additions and deletions
|
|
102
|
+
let additions = 0;
|
|
103
|
+
let deletions = 0;
|
|
104
|
+
for (const change of changes) {
|
|
105
|
+
if (change.type === 'add')
|
|
106
|
+
additions++;
|
|
107
|
+
if (change.type === 'del')
|
|
108
|
+
deletions++;
|
|
109
|
+
}
|
|
110
|
+
// Build unified diff text
|
|
111
|
+
const diffLines = [];
|
|
112
|
+
let contextStart = 0;
|
|
113
|
+
for (let idx = 0; idx < changes.length; idx++) {
|
|
114
|
+
const change = changes[idx];
|
|
115
|
+
// Find chunks of changes
|
|
116
|
+
if (change.type !== 'normal') {
|
|
117
|
+
// Add context header
|
|
118
|
+
const chunkStart = Math.max(0, idx - 3);
|
|
119
|
+
const chunkEnd = Math.min(changes.length, idx + 10);
|
|
120
|
+
// Skip if we're continuing from previous chunk
|
|
121
|
+
if (idx > contextStart) {
|
|
122
|
+
diffLines.push(`@@ -${chunkStart + 1},${chunkEnd - chunkStart} +${chunkStart + 1},${chunkEnd - chunkStart} @@`);
|
|
123
|
+
}
|
|
124
|
+
// Add changes
|
|
125
|
+
for (let k = chunkStart; k < chunkEnd; k++) {
|
|
126
|
+
const c = changes[k];
|
|
127
|
+
const prefix = c.type === 'add' ? '+' : c.type === 'del' ? '-' : ' ';
|
|
128
|
+
diffLines.push(`${prefix}${c.content}`);
|
|
129
|
+
}
|
|
130
|
+
contextStart = chunkEnd;
|
|
131
|
+
idx = chunkEnd - 1;
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
return {
|
|
135
|
+
text: diffLines.join('\n'),
|
|
136
|
+
additions,
|
|
137
|
+
deletions,
|
|
138
|
+
changes,
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Track content changes for a URL
|
|
143
|
+
* Compares with previous snapshot and saves new one
|
|
144
|
+
*
|
|
145
|
+
* @param url - URL being tracked
|
|
146
|
+
* @param content - Current content
|
|
147
|
+
* @param fingerprint - Content fingerprint (SHA256 hash)
|
|
148
|
+
* @returns Change detection result
|
|
149
|
+
*
|
|
150
|
+
* @example
|
|
151
|
+
* ```typescript
|
|
152
|
+
* const result = await trackChange('https://example.com', content, fingerprint);
|
|
153
|
+
* if (result.changeStatus === 'changed') {
|
|
154
|
+
* console.log('Content changed!');
|
|
155
|
+
* console.log(`+${result.diff.additions} -${result.diff.deletions}`);
|
|
156
|
+
* }
|
|
157
|
+
* ```
|
|
158
|
+
*/
|
|
159
|
+
export async function trackChange(url, content, fingerprint) {
|
|
160
|
+
try {
|
|
161
|
+
const previous = await getSnapshot(url);
|
|
162
|
+
if (!previous) {
|
|
163
|
+
// First time seeing this URL
|
|
164
|
+
await saveSnapshot({
|
|
165
|
+
url,
|
|
166
|
+
fingerprint,
|
|
167
|
+
content,
|
|
168
|
+
timestamp: Date.now(),
|
|
169
|
+
});
|
|
170
|
+
return {
|
|
171
|
+
changeStatus: 'new',
|
|
172
|
+
previousScrapeAt: null,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// Compare fingerprints
|
|
176
|
+
if (previous.fingerprint === fingerprint) {
|
|
177
|
+
// Content unchanged, just update timestamp
|
|
178
|
+
await saveSnapshot({
|
|
179
|
+
...previous,
|
|
180
|
+
timestamp: Date.now(),
|
|
181
|
+
});
|
|
182
|
+
return {
|
|
183
|
+
changeStatus: 'same',
|
|
184
|
+
previousScrapeAt: new Date(previous.timestamp).toISOString(),
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
// Content changed - compute diff
|
|
188
|
+
const diff = computeDiff(previous.content, content);
|
|
189
|
+
// Save new snapshot
|
|
190
|
+
await saveSnapshot({
|
|
191
|
+
url,
|
|
192
|
+
fingerprint,
|
|
193
|
+
content,
|
|
194
|
+
timestamp: Date.now(),
|
|
195
|
+
metadata: {
|
|
196
|
+
previousFingerprint: previous.fingerprint,
|
|
197
|
+
previousTimestamp: previous.timestamp,
|
|
198
|
+
},
|
|
199
|
+
});
|
|
200
|
+
return {
|
|
201
|
+
changeStatus: 'changed',
|
|
202
|
+
previousScrapeAt: new Date(previous.timestamp).toISOString(),
|
|
203
|
+
diff,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
catch (error) {
|
|
207
|
+
console.error('Change tracking error:', error);
|
|
208
|
+
// On error, treat as new
|
|
209
|
+
return {
|
|
210
|
+
changeStatus: 'new',
|
|
211
|
+
previousScrapeAt: null,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
/**
|
|
216
|
+
* Clear snapshots matching a URL pattern
|
|
217
|
+
*
|
|
218
|
+
* @param urlPattern - Optional regex pattern to match URLs (if not provided, clears all)
|
|
219
|
+
* @returns Number of snapshots cleared
|
|
220
|
+
*
|
|
221
|
+
* @example
|
|
222
|
+
* ```typescript
|
|
223
|
+
* // Clear all snapshots
|
|
224
|
+
* const count = await clearSnapshots();
|
|
225
|
+
*
|
|
226
|
+
* // Clear specific domain
|
|
227
|
+
* const count = await clearSnapshots('example\\.com');
|
|
228
|
+
* ```
|
|
229
|
+
*/
|
|
230
|
+
export async function clearSnapshots(urlPattern) {
|
|
231
|
+
try {
|
|
232
|
+
await ensureSnapshotsDir();
|
|
233
|
+
const files = await fs.readdir(SNAPSHOTS_DIR);
|
|
234
|
+
let cleared = 0;
|
|
235
|
+
const pattern = urlPattern ? new RegExp(urlPattern) : null;
|
|
236
|
+
for (const file of files) {
|
|
237
|
+
if (!file.endsWith('.json'))
|
|
238
|
+
continue;
|
|
239
|
+
const path = join(SNAPSHOTS_DIR, file);
|
|
240
|
+
if (pattern) {
|
|
241
|
+
// Check if URL matches pattern
|
|
242
|
+
try {
|
|
243
|
+
const data = await fs.readFile(path, 'utf-8');
|
|
244
|
+
const snapshot = JSON.parse(data);
|
|
245
|
+
if (pattern.test(snapshot.url)) {
|
|
246
|
+
await fs.unlink(path);
|
|
247
|
+
cleared++;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
catch {
|
|
251
|
+
// Skip malformed snapshots
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
else {
|
|
255
|
+
// Clear all
|
|
256
|
+
await fs.unlink(path);
|
|
257
|
+
cleared++;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
return cleared;
|
|
261
|
+
}
|
|
262
|
+
catch (error) {
|
|
263
|
+
console.error('Clear snapshots error:', error);
|
|
264
|
+
return 0;
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
//# sourceMappingURL=change-tracking.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"change-tracking.js","sourceRoot":"","sources":["../../src/core/change-tracking.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AACpC,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,IAAI,CAAC;AACpC,OAAO,EAAE,IAAI,EAAE,MAAM,MAAM,CAAC;AAC5B,OAAO,EAAE,OAAO,EAAE,MAAM,IAAI,CAAC;AAyB7B,6BAA6B;AAC7B,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,WAAW,CAAC,CAAC;AAE/D;;GAEG;AACH,SAAS,eAAe,CAAC,GAAW;IAClC,MAAM,IAAI,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAC5D,OAAO,IAAI,CAAC,aAAa,EAAE,GAAG,IAAI,OAAO,CAAC,CAAC;AAC7C,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,kBAAkB;IAC/B,IAAI,CAAC;QACH,MAAM,EAAE,CAAC,KAAK,CAAC,aAAa,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IACrD,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,2BAA2B;IAC7B,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,GAAW;IAC3C,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,GAAG,CAAC,CAAC;QAClC,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC9C,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAa,CAAC;IACtC,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED;;GAEG;AACH,KAAK,UAAU,YAAY,CAAC,QAAkB;IAC5C,MAAM,kBAAkB,EAAE,CAAC;IAC3B,MAAM,IAAI,GAAG,eAAe,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;AACvE,CAAC;AAED;;;GAGG;AACH,SAAS,WAAW,CAAC,UAAkB,EAAE,UAAkB;IACzD,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAExC,qEAAqE;IACrE,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC1B,MAAM,CAAC,GAAG,QAAQ,CAAC,MAAM,CAAC;IAC1B,MAAM,GAAG,GAAe,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;IAEhF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YAC5B,IAAI,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;gBACxC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;iBAAM,CAAC;gBACN,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;YACrD,CAAC;QACH,CAAC;IACH,CAAC;IAED,0BAA0B;IAC1B,MAAM,OAAO,GAA6E,EAAE,CAAC;IAC7F,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;QACtB,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,IAAI,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC;YAC1D,OAAO,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YACvE,CAAC,EAAE,CAAC;YACJ,CAAC,EAAE,CAAC;QACN,CAAC;aAAM,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAChE,OAAO,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YACpE,CAAC,EAAE,CAAC;QACN,CAAC;aAAM,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACjB,OAAO,CAAC,OAAO,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,EAAE,OAAO,EAAE,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;YACpE,CAAC,EAAE,CAAC;QACN,CAAC;IACH,CAAC;IAED,gCAAgC;IAChC,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,IAAI,SAAS,GAAG,CAAC,CAAC;IAElB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK;YAAE,SAAS,EAAE,CAAC;QACvC,IAAI,MAAM,CAAC,IAAI,KAAK,KAAK;YAAE,SAAS,EAAE,CAAC;IACzC,CAAC;IAED,0BAA0B;IAC1B,MAAM,SAAS,GAAa,EAAE,CAAC;IAC/B,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,IAAI,GAAG,GAAG,CAAC,EAAE,GAAG,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC9C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC;QAE5B,yBAAyB;QACzB,IAAI,MAAM,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7B,qBAAqB;YACrB,MAAM,UAAU,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,GAAG,GAAG,CAAC,CAAC,CAAC;YACxC,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,GAAG,EAAE,CAAC,CAAC;YAEpD,+CAA+C;YAC/C,IAAI,GAAG,GAAG,YAAY,EAAE,CAAC;gBACvB,SAAS,CAAC,IAAI,CAAC,OAAO,UAAU,GAAG,CAAC,IAAI,QAAQ,GAAG,UAAU,KAAK,UAAU,GAAG,CAAC,IAAI,QAAQ,GAAG,UAAU,KAAK,CAAC,CAAC;YAClH,CAAC;YAED,cAAc;YACd,KAAK,IAAI,CAAC,GAAG,UAAU,EAAE,CAAC,GAAG,QAAQ,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC3C,MAAM,CAAC,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;gBACrB,MAAM,MAAM,GAAG,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,KAAK,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC;gBACrE,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC;YAC1C,CAAC;YAED,YAAY,GAAG,QAAQ,CAAC;YACxB,GAAG,GAAG,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO;QACL,IAAI,EAAE,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC;QAC1B,SAAS;QACT,SAAS;QACT,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,CAAC,KAAK,UAAU,WAAW,CAC/B,GAAW,EACX,OAAe,EACf,WAAmB;IAEnB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,WAAW,CAAC,GAAG,CAAC,CAAC;QAExC,IAAI,CAAC,QAAQ,EAAE,CAAC;YACd,6BAA6B;YAC7B,MAAM,YAAY,CAAC;gBACjB,GAAG;gBACH,WAAW;gBACX,OAAO;gBACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,OAAO;gBACL,YAAY,EAAE,KAAK;gBACnB,gBAAgB,EAAE,IAAI;aACvB,CAAC;QACJ,CAAC;QAED,uBAAuB;QACvB,IAAI,QAAQ,CAAC,WAAW,KAAK,WAAW,EAAE,CAAC;YACzC,2CAA2C;YAC3C,MAAM,YAAY,CAAC;gBACjB,GAAG,QAAQ;gBACX,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;aACtB,CAAC,CAAC;YAEH,OAAO;gBACL,YAAY,EAAE,MAAM;gBACpB,gBAAgB,EAAE,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;aAC7D,CAAC;QACJ,CAAC;QAED,iCAAiC;QACjC,MAAM,IAAI,GAAG,WAAW,CAAC,QAAQ,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC;QAEpD,oBAAoB;QACpB,MAAM,YAAY,CAAC;YACjB,GAAG;YACH,WAAW;YACX,OAAO;YACP,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;YACrB,QAAQ,EAAE;gBACR,mBAAmB,EAAE,QAAQ,CAAC,WAAW;gBACzC,iBAAiB,EAAE,QAAQ,CAAC,SAAS;aACtC;SACF,CAAC,CAAC;QAEH,OAAO;YACL,YAAY,EAAE,SAAS;YACvB,gBAAgB,EAAE,IAAI,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,WAAW,EAAE;YAC5D,IAAI;SACL,CAAC;IAEJ,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;QAC/C,yBAAyB;QACzB,OAAO;YACL,YAAY,EAAE,KAAK;YACnB,gBAAgB,EAAE,IAAI;SACvB,CAAC;IACJ,CAAC;AACH,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,UAAmB;IACtD,IAAI,CAAC;QACH,MAAM,kBAAkB,EAAE,CAAC;QAC3B,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC;QAC9C,IAAI,OAAO,GAAG,CAAC,CAAC;QAEhB,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;QAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAAE,SAAS;YAEtC,MAAM,IAAI,GAAG,IAAI,CAAC,aAAa,EAAE,IAAI,CAAC,CAAC;YAEvC,IAAI,OAAO,EAAE,CAAC;gBACZ,+BAA+B;gBAC/B,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;oBAC9C,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAa,CAAC;oBAC9C,IAAI,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;wBAC/B,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;wBACtB,OAAO,EAAE,CAAC;oBACZ,CAAC;gBACH,CAAC;gBAAC,MAAM,CAAC;oBACP,2BAA2B;gBAC7B,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,YAAY;gBACZ,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;gBACtB,OAAO,EAAE,CAAC;YACZ,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,OAAO,CAAC,KAAK,CAAC,wBAAwB,EAAE,KAAK,CAAC,CAAC;QAC/C,OAAO,CAAC,CAAC;IACX,CAAC;AACH,CAAC"}
|
package/dist/core/map.d.ts
CHANGED
|
@@ -15,6 +15,10 @@ export interface MapOptions {
|
|
|
15
15
|
includePatterns?: string[];
|
|
16
16
|
/** Exclude URL patterns matching these regexes */
|
|
17
17
|
excludePatterns?: string[];
|
|
18
|
+
/** Filter URLs by relevance to this search query */
|
|
19
|
+
search?: string;
|
|
20
|
+
/** Only return URLs matching these content types */
|
|
21
|
+
contentTypeFilter?: string[];
|
|
18
22
|
}
|
|
19
23
|
export interface MapResult {
|
|
20
24
|
/** All discovered URLs (deduplicated) */
|
package/dist/core/map.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/core/map.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,2CAA2C;IAC3C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,kDAAkD;IAClD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"map.d.ts","sourceRoot":"","sources":["../../src/core/map.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAKH,MAAM,WAAW,UAAU;IACzB,2CAA2C;IAC3C,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,8DAA8D;IAC9D,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,+CAA+C;IAC/C,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,iDAAiD;IACjD,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,uDAAuD;IACvD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,kDAAkD;IAClD,eAAe,CAAC,EAAE,MAAM,EAAE,CAAC;IAC3B,oDAAoD;IACpD,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,oDAAoD;IACpD,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAC;CAC9B;AAED,MAAM,WAAW,SAAS;IACxB,yCAAyC;IACzC,IAAI,EAAE,MAAM,EAAE,CAAC;IACf,wBAAwB;IACxB,WAAW,EAAE,MAAM,EAAE,CAAC;IACtB,4BAA4B;IAC5B,KAAK,EAAE,MAAM,CAAC;IACd,yBAAyB;IACzB,OAAO,EAAE,MAAM,CAAC;CACjB;AAED,wBAAsB,SAAS,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,GAAE,UAAe,GAAG,OAAO,CAAC,SAAS,CAAC,CAuH9F"}
|